1 /* 2 Copyright (c) 2003-2013, Troy D. Hanson http://uthash.sourceforge.net 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 12 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 13 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 14 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 15 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 16 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 17 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 18 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 19 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 20 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #ifndef UTHASH_H 25 #define UTHASH_H 26 27 #include <string.h> /* memcmp,strlen */ 28 #include <stddef.h> /* ptrdiff_t */ 29 #include <stdlib.h> /* exit() */ 30 31 /* These macros use decltype or the earlier __typeof GNU extension. 32 As decltype is only available in newer compilers (VS2010 or gcc 4.3+ 33 when compiling c++ source) this code uses whatever method is needed 34 or, for VS2008 where neither is available, uses casting workarounds. */ 35 #ifdef _MSC_VER /* MS compiler */ 36 #if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ 37 #define DECLTYPE(x) (decltype(x)) 38 #else /* VS2008 or older (or VS2010 in C mode) */ 39 #define NO_DECLTYPE 40 #define DECLTYPE(x) 41 #endif 42 #else /* GNU, Sun and other compilers */ 43 #define DECLTYPE(x) (__typeof(x)) 44 #endif 45 46 #ifdef NO_DECLTYPE 47 #define DECLTYPE_ASSIGN(dst,src) \ 48 do { \ 49 char **_da_dst = (char**)(&(dst)); \ 50 *_da_dst = (char*)(src); \ 51 } while(0) 52 #else 53 #define DECLTYPE_ASSIGN(dst,src) \ 54 do { \ 55 (dst) = DECLTYPE(dst)(src); \ 56 } while(0) 57 #endif 58 59 /* a number of the hash function use uint32_t which isn't defined on win32 */ 60 #ifdef _MSC_VER 61 typedef unsigned int uint32_t; 62 typedef unsigned char uint8_t; 63 #else 64 #include <inttypes.h> /* uint32_t */ 65 #endif 66 67 #define UTHASH_VERSION 1.9.7 68 69 #ifndef uthash_fatal 70 #define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ 71 #endif 72 #ifndef uthash_malloc 73 #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ 74 #endif 75 #ifndef uthash_free 76 #define uthash_free(ptr,sz) free(ptr) /* free fcn */ 77 #endif 78 79 #ifndef uthash_noexpand_fyi 80 #define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ 81 #endif 82 #ifndef uthash_expand_fyi 83 #define uthash_expand_fyi(tbl) /* can be defined to log expands */ 84 #endif 85 86 /* initial number of buckets */ 87 #define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ 88 #define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ 89 #define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ 90 91 /* calculate the element whose hash handle address is hhe */ 92 #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) 93 94 #define HASH_FIND(hh,head,keyptr,keylen,out) \ 95 do { \ 96 unsigned _hf_bkt,_hf_hashv; \ 97 out=NULL; \ 98 if (head) { \ 99 HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ 100 if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ 101 HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ 102 keyptr,keylen,out); \ 103 } \ 104 } \ 105 } while (0) 106 107 #ifdef HASH_BLOOM 108 #define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) 109 #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) 110 #define HASH_BLOOM_MAKE(tbl) \ 111 do { \ 112 (tbl)->bloom_nbits = HASH_BLOOM; \ 113 (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ 114 if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ 115 memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ 116 (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ 117 } while (0) 118 119 #define HASH_BLOOM_FREE(tbl) \ 120 do { \ 121 uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ 122 } while (0) 123 124 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) 125 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) 126 127 #define HASH_BLOOM_ADD(tbl,hashv) \ 128 HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 129 130 #define HASH_BLOOM_TEST(tbl,hashv) \ 131 HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) 132 133 #else 134 #define HASH_BLOOM_MAKE(tbl) 135 #define HASH_BLOOM_FREE(tbl) 136 #define HASH_BLOOM_ADD(tbl,hashv) 137 #define HASH_BLOOM_TEST(tbl,hashv) (1) 138 #endif 139 140 #define HASH_MAKE_TABLE(hh,head) \ 141 do { \ 142 (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ 143 sizeof(UT_hash_table)); \ 144 if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ 145 memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ 146 (head)->hh.tbl->tail = &((head)->hh); \ 147 (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ 148 (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ 149 (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ 150 (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ 151 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 152 if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ 153 memset((head)->hh.tbl->buckets, 0, \ 154 HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ 155 HASH_BLOOM_MAKE((head)->hh.tbl); \ 156 (head)->hh.tbl->signature = HASH_SIGNATURE; \ 157 } while(0) 158 159 #define HASH_ADD(hh,head,fieldname,keylen_in,add) \ 160 HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) 161 162 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ 163 do { \ 164 unsigned _ha_bkt; \ 165 (add)->hh.next = NULL; \ 166 (add)->hh.key = (char*)keyptr; \ 167 (add)->hh.keylen = (unsigned)keylen_in; \ 168 if (!(head)) { \ 169 head = (add); \ 170 (head)->hh.prev = NULL; \ 171 HASH_MAKE_TABLE(hh,head); \ 172 } else { \ 173 (head)->hh.tbl->tail->next = (add); \ 174 (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ 175 (head)->hh.tbl->tail = &((add)->hh); \ 176 } \ 177 (head)->hh.tbl->num_items++; \ 178 (add)->hh.tbl = (head)->hh.tbl; \ 179 HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ 180 (add)->hh.hashv, _ha_bkt); \ 181 HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ 182 HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ 183 HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ 184 HASH_FSCK(hh,head); \ 185 } while(0) 186 187 #define HASH_TO_BKT( hashv, num_bkts, bkt ) \ 188 do { \ 189 bkt = ((hashv) & ((num_bkts) - 1)); \ 190 } while(0) 191 192 /* delete "delptr" from the hash table. 193 * "the usual" patch-up process for the app-order doubly-linked-list. 194 * The use of _hd_hh_del below deserves special explanation. 195 * These used to be expressed using (delptr) but that led to a bug 196 * if someone used the same symbol for the head and deletee, like 197 * HASH_DELETE(hh,users,users); 198 * We want that to work, but by changing the head (users) below 199 * we were forfeiting our ability to further refer to the deletee (users) 200 * in the patch-up process. Solution: use scratch space to 201 * copy the deletee pointer, then the latter references are via that 202 * scratch pointer rather than through the repointed (users) symbol. 203 */ 204 #define HASH_DELETE(hh,head,delptr) \ 205 do { \ 206 unsigned _hd_bkt; \ 207 struct UT_hash_handle *_hd_hh_del; \ 208 if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ 209 uthash_free((head)->hh.tbl->buckets, \ 210 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 211 HASH_BLOOM_FREE((head)->hh.tbl); \ 212 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 213 head = NULL; \ 214 } else { \ 215 _hd_hh_del = &((delptr)->hh); \ 216 if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ 217 (head)->hh.tbl->tail = \ 218 (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 219 (head)->hh.tbl->hho); \ 220 } \ 221 if ((delptr)->hh.prev) { \ 222 ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ 223 (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ 224 } else { \ 225 DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ 226 } \ 227 if (_hd_hh_del->next) { \ 228 ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ 229 (head)->hh.tbl->hho))->prev = \ 230 _hd_hh_del->prev; \ 231 } \ 232 HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ 233 HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ 234 (head)->hh.tbl->num_items--; \ 235 } \ 236 HASH_FSCK(hh,head); \ 237 } while (0) 238 239 240 /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ 241 #define HASH_FIND_STR(head,findstr,out) \ 242 HASH_FIND(hh,head,findstr,strlen(findstr),out) 243 #define HASH_ADD_STR(head,strfield,add) \ 244 HASH_ADD(hh,head,strfield,strlen(add->strfield),add) 245 #define HASH_FIND_INT(head,findint,out) \ 246 HASH_FIND(hh,head,findint,sizeof(int),out) 247 #define HASH_ADD_INT(head,intfield,add) \ 248 HASH_ADD(hh,head,intfield,sizeof(int),add) 249 #define HASH_FIND_PTR(head,findptr,out) \ 250 HASH_FIND(hh,head,findptr,sizeof(void *),out) 251 #define HASH_ADD_PTR(head,ptrfield,add) \ 252 HASH_ADD(hh,head,ptrfield,sizeof(void *),add) 253 #define HASH_DEL(head,delptr) \ 254 HASH_DELETE(hh,head,delptr) 255 256 /* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. 257 * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. 258 */ 259 #ifdef HASH_DEBUG 260 #define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) 261 #define HASH_FSCK(hh,head) \ 262 do { \ 263 unsigned _bkt_i; \ 264 unsigned _count, _bkt_count; \ 265 char *_prev; \ 266 struct UT_hash_handle *_thh; \ 267 if (head) { \ 268 _count = 0; \ 269 for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ 270 _bkt_count = 0; \ 271 _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ 272 _prev = NULL; \ 273 while (_thh) { \ 274 if (_prev != (char*)(_thh->hh_prev)) { \ 275 HASH_OOPS("invalid hh_prev %p, actual %p\n", \ 276 _thh->hh_prev, _prev ); \ 277 } \ 278 _bkt_count++; \ 279 _prev = (char*)(_thh); \ 280 _thh = _thh->hh_next; \ 281 } \ 282 _count += _bkt_count; \ 283 if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ 284 HASH_OOPS("invalid bucket count %d, actual %d\n", \ 285 (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ 286 } \ 287 } \ 288 if (_count != (head)->hh.tbl->num_items) { \ 289 HASH_OOPS("invalid hh item count %d, actual %d\n", \ 290 (head)->hh.tbl->num_items, _count ); \ 291 } \ 292 /* traverse hh in app order; check next/prev integrity, count */ \ 293 _count = 0; \ 294 _prev = NULL; \ 295 _thh = &(head)->hh; \ 296 while (_thh) { \ 297 _count++; \ 298 if (_prev !=(char*)(_thh->prev)) { \ 299 HASH_OOPS("invalid prev %p, actual %p\n", \ 300 _thh->prev, _prev ); \ 301 } \ 302 _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ 303 _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ 304 (head)->hh.tbl->hho) : NULL ); \ 305 } \ 306 if (_count != (head)->hh.tbl->num_items) { \ 307 HASH_OOPS("invalid app item count %d, actual %d\n", \ 308 (head)->hh.tbl->num_items, _count ); \ 309 } \ 310 } \ 311 } while (0) 312 #else 313 #define HASH_FSCK(hh,head) 314 #endif 315 316 /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 317 * the descriptor to which this macro is defined for tuning the hash function. 318 * The app can #include <unistd.h> to get the prototype for write(2). */ 319 #ifdef HASH_EMIT_KEYS 320 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ 321 do { \ 322 unsigned _klen = fieldlen; \ 323 write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ 324 write(HASH_EMIT_KEYS, keyptr, fieldlen); \ 325 } while (0) 326 #else 327 #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) 328 #endif 329 330 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ 331 #ifdef HASH_FUNCTION 332 #define HASH_FCN HASH_FUNCTION 333 #else 334 #define HASH_FCN HASH_JEN 335 #endif 336 337 /* The Bernstein hash function, used in Perl prior to v5.6 */ 338 #define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ 339 do { \ 340 unsigned _hb_keylen=keylen; \ 341 char *_hb_key=(char*)(key); \ 342 (hashv) = 0; \ 343 while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ 344 bkt = (hashv) & (num_bkts-1); \ 345 } while (0) 346 347 348 /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at 349 * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ 350 #define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ 351 do { \ 352 unsigned _sx_i; \ 353 char *_hs_key=(char*)(key); \ 354 hashv = 0; \ 355 for(_sx_i=0; _sx_i < keylen; _sx_i++) \ 356 hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ 357 bkt = hashv & (num_bkts-1); \ 358 } while (0) 359 360 #define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ 361 do { \ 362 unsigned _fn_i; \ 363 char *_hf_key=(char*)(key); \ 364 hashv = 2166136261UL; \ 365 for(_fn_i=0; _fn_i < keylen; _fn_i++) \ 366 hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ 367 bkt = hashv & (num_bkts-1); \ 368 } while(0) 369 370 #define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ 371 do { \ 372 unsigned _ho_i; \ 373 char *_ho_key=(char*)(key); \ 374 hashv = 0; \ 375 for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ 376 hashv += _ho_key[_ho_i]; \ 377 hashv += (hashv << 10); \ 378 hashv ^= (hashv >> 6); \ 379 } \ 380 hashv += (hashv << 3); \ 381 hashv ^= (hashv >> 11); \ 382 hashv += (hashv << 15); \ 383 bkt = hashv & (num_bkts-1); \ 384 } while(0) 385 386 #define HASH_JEN_MIX(a,b,c) \ 387 do { \ 388 a -= b; a -= c; a ^= ( c >> 13 ); \ 389 b -= c; b -= a; b ^= ( a << 8 ); \ 390 c -= a; c -= b; c ^= ( b >> 13 ); \ 391 a -= b; a -= c; a ^= ( c >> 12 ); \ 392 b -= c; b -= a; b ^= ( a << 16 ); \ 393 c -= a; c -= b; c ^= ( b >> 5 ); \ 394 a -= b; a -= c; a ^= ( c >> 3 ); \ 395 b -= c; b -= a; b ^= ( a << 10 ); \ 396 c -= a; c -= b; c ^= ( b >> 15 ); \ 397 } while (0) 398 399 #define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ 400 do { \ 401 unsigned _hj_i,_hj_j,_hj_k; \ 402 char *_hj_key=(char*)(key); \ 403 hashv = 0xfeedbeef; \ 404 _hj_i = _hj_j = 0x9e3779b9; \ 405 _hj_k = (unsigned)keylen; \ 406 while (_hj_k >= 12) { \ 407 _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ 408 + ( (unsigned)_hj_key[2] << 16 ) \ 409 + ( (unsigned)_hj_key[3] << 24 ) ); \ 410 _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ 411 + ( (unsigned)_hj_key[6] << 16 ) \ 412 + ( (unsigned)_hj_key[7] << 24 ) ); \ 413 hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ 414 + ( (unsigned)_hj_key[10] << 16 ) \ 415 + ( (unsigned)_hj_key[11] << 24 ) ); \ 416 \ 417 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 418 \ 419 _hj_key += 12; \ 420 _hj_k -= 12; \ 421 } \ 422 hashv += keylen; \ 423 switch ( _hj_k ) { \ 424 case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ 425 case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ 426 case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ 427 case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ 428 case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ 429 case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ 430 case 5: _hj_j += _hj_key[4]; \ 431 case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ 432 case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ 433 case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ 434 case 1: _hj_i += _hj_key[0]; \ 435 } \ 436 HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ 437 bkt = hashv & (num_bkts-1); \ 438 } while(0) 439 440 /* The Paul Hsieh hash function */ 441 #undef get16bits 442 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 443 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 444 #define get16bits(d) (*((const uint16_t *) (d))) 445 #endif 446 447 #if !defined (get16bits) 448 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ 449 +(uint32_t)(((const uint8_t *)(d))[0]) ) 450 #endif 451 #define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ 452 do { \ 453 char *_sfh_key=(char*)(key); \ 454 uint32_t _sfh_tmp, _sfh_len = keylen; \ 455 \ 456 int _sfh_rem = _sfh_len & 3; \ 457 _sfh_len >>= 2; \ 458 hashv = 0xcafebabe; \ 459 \ 460 /* Main loop */ \ 461 for (;_sfh_len > 0; _sfh_len--) { \ 462 hashv += get16bits (_sfh_key); \ 463 _sfh_tmp = (get16bits (_sfh_key+2) << 11) ^ hashv; \ 464 hashv = (hashv << 16) ^ _sfh_tmp; \ 465 _sfh_key += 2*sizeof (uint16_t); \ 466 hashv += hashv >> 11; \ 467 } \ 468 \ 469 /* Handle end cases */ \ 470 switch (_sfh_rem) { \ 471 case 3: hashv += get16bits (_sfh_key); \ 472 hashv ^= hashv << 16; \ 473 hashv ^= _sfh_key[sizeof (uint16_t)] << 18; \ 474 hashv += hashv >> 11; \ 475 break; \ 476 case 2: hashv += get16bits (_sfh_key); \ 477 hashv ^= hashv << 11; \ 478 hashv += hashv >> 17; \ 479 break; \ 480 case 1: hashv += *_sfh_key; \ 481 hashv ^= hashv << 10; \ 482 hashv += hashv >> 1; \ 483 } \ 484 \ 485 /* Force "avalanching" of final 127 bits */ \ 486 hashv ^= hashv << 3; \ 487 hashv += hashv >> 5; \ 488 hashv ^= hashv << 4; \ 489 hashv += hashv >> 17; \ 490 hashv ^= hashv << 25; \ 491 hashv += hashv >> 6; \ 492 bkt = hashv & (num_bkts-1); \ 493 } while(0) 494 495 #ifdef HASH_USING_NO_STRICT_ALIASING 496 /* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. 497 * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. 498 * MurmurHash uses the faster approach only on CPU's where we know it's safe. 499 * 500 * Note the preprocessor built-in defines can be emitted using: 501 * 502 * gcc -m64 -dM -E - < /dev/null (on gcc) 503 * cc -## a.c (where a.c is a simple test file) (Sun Studio) 504 */ 505 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) 506 #define MUR_GETBLOCK(p,i) p[i] 507 #else /* non intel */ 508 #define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) 509 #define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) 510 #define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) 511 #define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) 512 #define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) 513 #if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) 514 #define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) 515 #define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) 516 #define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) 517 #else /* assume little endian non-intel */ 518 #define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) 519 #define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) 520 #define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) 521 #endif 522 #define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ 523 (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ 524 (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ 525 MUR_ONE_THREE(p)))) 526 #endif 527 #define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) 528 #define MUR_FMIX(_h) \ 529 do { \ 530 _h ^= _h >> 16; \ 531 _h *= 0x85ebca6b; \ 532 _h ^= _h >> 13; \ 533 _h *= 0xc2b2ae35l; \ 534 _h ^= _h >> 16; \ 535 } while(0) 536 537 #define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ 538 do { \ 539 const uint8_t *_mur_data = (const uint8_t*)(key); \ 540 const int _mur_nblocks = (keylen) / 4; \ 541 uint32_t _mur_h1 = 0xf88D5353; \ 542 uint32_t _mur_c1 = 0xcc9e2d51; \ 543 uint32_t _mur_c2 = 0x1b873593; \ 544 uint32_t _mur_k1 = 0; \ 545 const uint8_t *_mur_tail; \ 546 const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ 547 int _mur_i; \ 548 for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ 549 _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ 550 _mur_k1 *= _mur_c1; \ 551 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 552 _mur_k1 *= _mur_c2; \ 553 \ 554 _mur_h1 ^= _mur_k1; \ 555 _mur_h1 = MUR_ROTL32(_mur_h1,13); \ 556 _mur_h1 = _mur_h1*5+0xe6546b64; \ 557 } \ 558 _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ 559 _mur_k1=0; \ 560 switch((keylen) & 3) { \ 561 case 3: _mur_k1 ^= _mur_tail[2] << 16; \ 562 case 2: _mur_k1 ^= _mur_tail[1] << 8; \ 563 case 1: _mur_k1 ^= _mur_tail[0]; \ 564 _mur_k1 *= _mur_c1; \ 565 _mur_k1 = MUR_ROTL32(_mur_k1,15); \ 566 _mur_k1 *= _mur_c2; \ 567 _mur_h1 ^= _mur_k1; \ 568 } \ 569 _mur_h1 ^= (keylen); \ 570 MUR_FMIX(_mur_h1); \ 571 hashv = _mur_h1; \ 572 bkt = hashv & (num_bkts-1); \ 573 } while(0) 574 #endif /* HASH_USING_NO_STRICT_ALIASING */ 575 576 /* key comparison function; return 0 if keys equal */ 577 #define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 578 579 /* iterate over items in a known bucket to find desired item */ 580 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ 581 do { \ 582 if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ 583 else out=NULL; \ 584 while (out) { \ 585 if ((out)->hh.keylen == keylen_in) { \ 586 if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ 587 } \ 588 if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ 589 else out = NULL; \ 590 } \ 591 } while(0) 592 593 /* add an item to a bucket */ 594 #define HASH_ADD_TO_BKT(head,addhh) \ 595 do { \ 596 head.count++; \ 597 (addhh)->hh_next = head.hh_head; \ 598 (addhh)->hh_prev = NULL; \ 599 if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ 600 (head).hh_head=addhh; \ 601 if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ 602 && (addhh)->tbl->noexpand != 1) { \ 603 HASH_EXPAND_BUCKETS((addhh)->tbl); \ 604 } \ 605 } while(0) 606 607 /* remove an item from a given bucket */ 608 #define HASH_DEL_IN_BKT(hh,head,hh_del) \ 609 (head).count--; \ 610 if ((head).hh_head == hh_del) { \ 611 (head).hh_head = hh_del->hh_next; \ 612 } \ 613 if (hh_del->hh_prev) { \ 614 hh_del->hh_prev->hh_next = hh_del->hh_next; \ 615 } \ 616 if (hh_del->hh_next) { \ 617 hh_del->hh_next->hh_prev = hh_del->hh_prev; \ 618 } 619 620 /* Bucket expansion has the effect of doubling the number of buckets 621 * and redistributing the items into the new buckets. Ideally the 622 * items will distribute more or less evenly into the new buckets 623 * (the extent to which this is true is a measure of the quality of 624 * the hash function as it applies to the key domain). 625 * 626 * With the items distributed into more buckets, the chain length 627 * (item count) in each bucket is reduced. Thus by expanding buckets 628 * the hash keeps a bound on the chain length. This bounded chain 629 * length is the essence of how a hash provides constant time lookup. 630 * 631 * The calculation of tbl->ideal_chain_maxlen below deserves some 632 * explanation. First, keep in mind that we're calculating the ideal 633 * maximum chain length based on the *new* (doubled) bucket count. 634 * In fractions this is just n/b (n=number of items,b=new num buckets). 635 * Since the ideal chain length is an integer, we want to calculate 636 * ceil(n/b). We don't depend on floating point arithmetic in this 637 * hash, so to calculate ceil(n/b) with integers we could write 638 * 639 * ceil(n/b) = (n/b) + ((n%b)?1:0) 640 * 641 * and in fact a previous version of this hash did just that. 642 * But now we have improved things a bit by recognizing that b is 643 * always a power of two. We keep its base 2 log handy (call it lb), 644 * so now we can write this with a bit shift and logical AND: 645 * 646 * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) 647 * 648 */ 649 #define HASH_EXPAND_BUCKETS(tbl) \ 650 do { \ 651 unsigned _he_bkt; \ 652 unsigned _he_bkt_i; \ 653 struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ 654 UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ 655 _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ 656 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 657 if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ 658 memset(_he_new_buckets, 0, \ 659 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ 660 tbl->ideal_chain_maxlen = \ 661 (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ 662 ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ 663 tbl->nonideal_items = 0; \ 664 for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ 665 { \ 666 _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ 667 while (_he_thh) { \ 668 _he_hh_nxt = _he_thh->hh_next; \ 669 HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ 670 _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ 671 if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ 672 tbl->nonideal_items++; \ 673 _he_newbkt->expand_mult = _he_newbkt->count / \ 674 tbl->ideal_chain_maxlen; \ 675 } \ 676 _he_thh->hh_prev = NULL; \ 677 _he_thh->hh_next = _he_newbkt->hh_head; \ 678 if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ 679 _he_thh; \ 680 _he_newbkt->hh_head = _he_thh; \ 681 _he_thh = _he_hh_nxt; \ 682 } \ 683 } \ 684 uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ 685 tbl->num_buckets *= 2; \ 686 tbl->log2_num_buckets++; \ 687 tbl->buckets = _he_new_buckets; \ 688 tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ 689 (tbl->ineff_expands+1) : 0; \ 690 if (tbl->ineff_expands > 1) { \ 691 tbl->noexpand=1; \ 692 uthash_noexpand_fyi(tbl); \ 693 } \ 694 uthash_expand_fyi(tbl); \ 695 } while(0) 696 697 698 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ 699 /* Note that HASH_SORT assumes the hash handle name to be hh. 700 * HASH_SRT was added to allow the hash handle name to be passed in. */ 701 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) 702 #define HASH_SRT(hh,head,cmpfcn) \ 703 do { \ 704 unsigned _hs_i; \ 705 unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ 706 struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ 707 if (head) { \ 708 _hs_insize = 1; \ 709 _hs_looping = 1; \ 710 _hs_list = &((head)->hh); \ 711 while (_hs_looping) { \ 712 _hs_p = _hs_list; \ 713 _hs_list = NULL; \ 714 _hs_tail = NULL; \ 715 _hs_nmerges = 0; \ 716 while (_hs_p) { \ 717 _hs_nmerges++; \ 718 _hs_q = _hs_p; \ 719 _hs_psize = 0; \ 720 for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ 721 _hs_psize++; \ 722 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 723 ((void*)((char*)(_hs_q->next) + \ 724 (head)->hh.tbl->hho)) : NULL); \ 725 if (! (_hs_q) ) break; \ 726 } \ 727 _hs_qsize = _hs_insize; \ 728 while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ 729 if (_hs_psize == 0) { \ 730 _hs_e = _hs_q; \ 731 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 732 ((void*)((char*)(_hs_q->next) + \ 733 (head)->hh.tbl->hho)) : NULL); \ 734 _hs_qsize--; \ 735 } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ 736 _hs_e = _hs_p; \ 737 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 738 ((void*)((char*)(_hs_p->next) + \ 739 (head)->hh.tbl->hho)) : NULL); \ 740 _hs_psize--; \ 741 } else if (( \ 742 cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ 743 DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ 744 ) <= 0) { \ 745 _hs_e = _hs_p; \ 746 _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ 747 ((void*)((char*)(_hs_p->next) + \ 748 (head)->hh.tbl->hho)) : NULL); \ 749 _hs_psize--; \ 750 } else { \ 751 _hs_e = _hs_q; \ 752 _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ 753 ((void*)((char*)(_hs_q->next) + \ 754 (head)->hh.tbl->hho)) : NULL); \ 755 _hs_qsize--; \ 756 } \ 757 if ( _hs_tail ) { \ 758 _hs_tail->next = ((_hs_e) ? \ 759 ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ 760 } else { \ 761 _hs_list = _hs_e; \ 762 } \ 763 _hs_e->prev = ((_hs_tail) ? \ 764 ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ 765 _hs_tail = _hs_e; \ 766 } \ 767 _hs_p = _hs_q; \ 768 } \ 769 _hs_tail->next = NULL; \ 770 if ( _hs_nmerges <= 1 ) { \ 771 _hs_looping=0; \ 772 (head)->hh.tbl->tail = _hs_tail; \ 773 DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ 774 } \ 775 _hs_insize *= 2; \ 776 } \ 777 HASH_FSCK(hh,head); \ 778 } \ 779 } while (0) 780 781 /* This function selects items from one hash into another hash. 782 * The end result is that the selected items have dual presence 783 * in both hashes. There is no copy of the items made; rather 784 * they are added into the new hash through a secondary hash 785 * hash handle that must be present in the structure. */ 786 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ 787 do { \ 788 unsigned _src_bkt, _dst_bkt; \ 789 void *_last_elt=NULL, *_elt; \ 790 UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ 791 ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ 792 if (src) { \ 793 for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ 794 for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ 795 _src_hh; \ 796 _src_hh = _src_hh->hh_next) { \ 797 _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ 798 if (cond(_elt)) { \ 799 _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ 800 _dst_hh->key = _src_hh->key; \ 801 _dst_hh->keylen = _src_hh->keylen; \ 802 _dst_hh->hashv = _src_hh->hashv; \ 803 _dst_hh->prev = _last_elt; \ 804 _dst_hh->next = NULL; \ 805 if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ 806 if (!dst) { \ 807 DECLTYPE_ASSIGN(dst,_elt); \ 808 HASH_MAKE_TABLE(hh_dst,dst); \ 809 } else { \ 810 _dst_hh->tbl = (dst)->hh_dst.tbl; \ 811 } \ 812 HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ 813 HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ 814 (dst)->hh_dst.tbl->num_items++; \ 815 _last_elt = _elt; \ 816 _last_elt_hh = _dst_hh; \ 817 } \ 818 } \ 819 } \ 820 } \ 821 HASH_FSCK(hh_dst,dst); \ 822 } while (0) 823 824 #define HASH_CLEAR(hh,head) \ 825 do { \ 826 if (head) { \ 827 uthash_free((head)->hh.tbl->buckets, \ 828 (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ 829 HASH_BLOOM_FREE((head)->hh.tbl); \ 830 uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ 831 (head)=NULL; \ 832 } \ 833 } while(0) 834 835 #ifdef NO_DECLTYPE 836 #define HASH_ITER(hh,head,el,tmp) \ 837 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ 838 el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 839 #else 840 #define HASH_ITER(hh,head,el,tmp) \ 841 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ 842 el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) 843 #endif 844 845 /* obtain a count of items in the hash */ 846 #define HASH_COUNT(head) HASH_CNT(hh,head) 847 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) 848 849 typedef struct UT_hash_bucket { 850 struct UT_hash_handle *hh_head; 851 unsigned count; 852 853 /* expand_mult is normally set to 0. In this situation, the max chain length 854 * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If 855 * the bucket's chain exceeds this length, bucket expansion is triggered). 856 * However, setting expand_mult to a non-zero value delays bucket expansion 857 * (that would be triggered by additions to this particular bucket) 858 * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. 859 * (The multiplier is simply expand_mult+1). The whole idea of this 860 * multiplier is to reduce bucket expansions, since they are expensive, in 861 * situations where we know that a particular bucket tends to be overused. 862 * It is better to let its chain length grow to a longer yet-still-bounded 863 * value, than to do an O(n) bucket expansion too often. 864 */ 865 unsigned expand_mult; 866 867 } UT_hash_bucket; 868 869 /* random signature used only to find hash tables in external analysis */ 870 #define HASH_SIGNATURE 0xa0111fe1 871 #define HASH_BLOOM_SIGNATURE 0xb12220f2 872 873 typedef struct UT_hash_table { 874 UT_hash_bucket *buckets; 875 unsigned num_buckets, log2_num_buckets; 876 unsigned num_items; 877 struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ 878 ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ 879 880 /* in an ideal situation (all buckets used equally), no bucket would have 881 * more than ceil(#items/#buckets) items. that's the ideal chain length. */ 882 unsigned ideal_chain_maxlen; 883 884 /* nonideal_items is the number of items in the hash whose chain position 885 * exceeds the ideal chain maxlen. these items pay the penalty for an uneven 886 * hash distribution; reaching them in a chain traversal takes >ideal steps */ 887 unsigned nonideal_items; 888 889 /* ineffective expands occur when a bucket doubling was performed, but 890 * afterward, more than half the items in the hash had nonideal chain 891 * positions. If this happens on two consecutive expansions we inhibit any 892 * further expansion, as it's not helping; this happens when the hash 893 * function isn't a good fit for the key domain. When expansion is inhibited 894 * the hash will still work, albeit no longer in constant time. */ 895 unsigned ineff_expands, noexpand; 896 897 uint32_t signature; /* used only to find hash tables in external analysis */ 898 #ifdef HASH_BLOOM 899 uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ 900 uint8_t *bloom_bv; 901 char bloom_nbits; 902 #endif 903 904 } UT_hash_table; 905 906 typedef struct UT_hash_handle { 907 struct UT_hash_table *tbl; 908 void *prev; /* prev element in app order */ 909 void *next; /* next element in app order */ 910 struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ 911 struct UT_hash_handle *hh_next; /* next hh in bucket order */ 912 void *key; /* ptr to enclosing struct's key */ 913 unsigned keylen; /* enclosing struct's key len */ 914 unsigned hashv; /* result of hash-fcn(key) */ 915 } UT_hash_handle; 916 917 #endif /* UTHASH_H */ 918