1 /*------------------------------------------------------------------------- 2 * 3 * dshash.c 4 * Concurrent hash tables backed by dynamic shared memory areas. 5 * 6 * This is an open hashing hash table, with a linked list at each table 7 * entry. It supports dynamic resizing, as required to prevent the linked 8 * lists from growing too long on average. Currently, only growing is 9 * supported: the hash table never becomes smaller. 10 * 11 * To deal with concurrency, it has a fixed size set of partitions, each of 12 * which is independently locked. Each bucket maps to a partition; so insert, 13 * find and iterate operations normally only acquire one lock. Therefore, 14 * good concurrency is achieved whenever such operations don't collide at the 15 * lock partition level. However, when a resize operation begins, all 16 * partition locks must be acquired simultaneously for a brief period. This 17 * is only expected to happen a small number of times until a stable size is 18 * found, since growth is geometric. 19 * 20 * Future versions may support iterators and incremental resizing; for now 21 * the implementation is minimalist. 22 * 23 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 24 * Portions Copyright (c) 1994, Regents of the University of California 25 * 26 * IDENTIFICATION 27 * src/backend/lib/dshash.c 28 * 29 *------------------------------------------------------------------------- 30 */ 31 32 #include "postgres.h" 33 34 #include "lib/dshash.h" 35 #include "storage/ipc.h" 36 #include "storage/lwlock.h" 37 #include "utils/dsa.h" 38 #include "utils/hsearch.h" 39 #include "utils/memutils.h" 40 41 /* 42 * An item in the hash table. This wraps the user's entry object in an 43 * envelop that holds a pointer back to the bucket and a pointer to the next 44 * item in the bucket. 45 */ 46 struct dshash_table_item 47 { 48 /* The next item in the same bucket. */ 49 dsa_pointer next; 50 /* The hashed key, to avoid having to recompute it. */ 51 dshash_hash hash; 52 /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */ 53 }; 54 55 /* 56 * The number of partitions for locking purposes. This is set to match 57 * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for 58 * the buffer pool must be good enough for any other purpose. This could 59 * become a runtime parameter in future. 60 */ 61 #define DSHASH_NUM_PARTITIONS_LOG2 7 62 #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2) 63 64 /* A magic value used to identify our hash tables. */ 65 #define DSHASH_MAGIC 0x75ff6a20 66 67 /* 68 * Tracking information for each lock partition. Initially, each partition 69 * corresponds to one bucket, but each time the hash table grows, the buckets 70 * covered by each partition split so the number of buckets covered doubles. 71 * 72 * We might want to add padding here so that each partition is on a different 73 * cache line, but doing so would bloat this structure considerably. 74 */ 75 typedef struct dshash_partition 76 { 77 LWLock lock; /* Protects all buckets in this partition. */ 78 size_t count; /* # of items in this partition's buckets */ 79 } dshash_partition; 80 81 /* 82 * The head object for a hash table. This will be stored in dynamic shared 83 * memory. 84 */ 85 typedef struct dshash_table_control 86 { 87 dshash_table_handle handle; 88 uint32 magic; 89 dshash_partition partitions[DSHASH_NUM_PARTITIONS]; 90 int lwlock_tranche_id; 91 92 /* 93 * The following members are written to only when ALL partitions locks are 94 * held. They can be read when any one partition lock is held. 95 */ 96 97 /* Number of buckets expressed as power of 2 (8 = 256 buckets). */ 98 size_t size_log2; /* log2(number of buckets) */ 99 dsa_pointer buckets; /* current bucket array */ 100 } dshash_table_control; 101 102 /* 103 * Per-backend state for a dynamic hash table. 104 */ 105 struct dshash_table 106 { 107 dsa_area *area; /* Backing dynamic shared memory area. */ 108 dshash_parameters params; /* Parameters. */ 109 void *arg; /* User-supplied data pointer. */ 110 dshash_table_control *control; /* Control object in DSM. */ 111 dsa_pointer *buckets; /* Current bucket pointers in DSM. */ 112 size_t size_log2; /* log2(number of buckets) */ 113 bool find_locked; /* Is any partition lock held by 'find'? */ 114 bool find_exclusively_locked; /* ... exclusively? */ 115 }; 116 117 /* Given a pointer to an item, find the entry (user data) it holds. */ 118 #define ENTRY_FROM_ITEM(item) \ 119 ((char *)(item) + MAXALIGN(sizeof(dshash_table_item))) 120 121 /* Given a pointer to an entry, find the item that holds it. */ 122 #define ITEM_FROM_ENTRY(entry) \ 123 ((dshash_table_item *)((char *)(entry) - \ 124 MAXALIGN(sizeof(dshash_table_item)))) 125 126 /* How many resize operations (bucket splits) have there been? */ 127 #define NUM_SPLITS(size_log2) \ 128 (size_log2 - DSHASH_NUM_PARTITIONS_LOG2) 129 130 /* How many buckets are there in each partition at a given size? */ 131 #define BUCKETS_PER_PARTITION(size_log2) \ 132 (((size_t) 1) << NUM_SPLITS(size_log2)) 133 134 /* Max entries before we need to grow. Half + quarter = 75% load factor. */ 135 #define MAX_COUNT_PER_PARTITION(hash_table) \ 136 (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \ 137 BUCKETS_PER_PARTITION(hash_table->size_log2) / 4) 138 139 /* Choose partition based on the highest order bits of the hash. */ 140 #define PARTITION_FOR_HASH(hash) \ 141 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2)) 142 143 /* 144 * Find the bucket index for a given hash and table size. Each time the table 145 * doubles in size, the appropriate bucket for a given hash value doubles and 146 * possibly adds one, depending on the newly revealed bit, so that all buckets 147 * are split. 148 */ 149 #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \ 150 (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2))) 151 152 /* The index of the first bucket in a given partition. */ 153 #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \ 154 ((partition) << NUM_SPLITS(size_log2)) 155 156 /* The head of the active bucket for a given hash value (lvalue). */ 157 #define BUCKET_FOR_HASH(hash_table, hash) \ 158 (hash_table->buckets[ \ 159 BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \ 160 hash_table->size_log2)]) 161 162 static void delete_item(dshash_table *hash_table, 163 dshash_table_item *item); 164 static void resize(dshash_table *hash_table, size_t new_size); 165 static inline void ensure_valid_bucket_pointers(dshash_table *hash_table); 166 static inline dshash_table_item *find_in_bucket(dshash_table *hash_table, 167 const void *key, 168 dsa_pointer item_pointer); 169 static void insert_item_into_bucket(dshash_table *hash_table, 170 dsa_pointer item_pointer, 171 dshash_table_item *item, 172 dsa_pointer *bucket); 173 static dshash_table_item *insert_into_bucket(dshash_table *hash_table, 174 const void *key, 175 dsa_pointer *bucket); 176 static bool delete_key_from_bucket(dshash_table *hash_table, 177 const void *key, 178 dsa_pointer *bucket_head); 179 static bool delete_item_from_bucket(dshash_table *hash_table, 180 dshash_table_item *item, 181 dsa_pointer *bucket_head); 182 static inline dshash_hash hash_key(dshash_table *hash_table, const void *key); 183 static inline bool equal_keys(dshash_table *hash_table, 184 const void *a, const void *b); 185 186 #define PARTITION_LOCK(hash_table, i) \ 187 (&(hash_table)->control->partitions[(i)].lock) 188 189 /* 190 * Create a new hash table backed by the given dynamic shared area, with the 191 * given parameters. The returned object is allocated in backend-local memory 192 * using the current MemoryContext. 'arg' will be passed through to the 193 * compare and hash functions. 194 */ 195 dshash_table * 196 dshash_create(dsa_area *area, const dshash_parameters *params, void *arg) 197 { 198 dshash_table *hash_table; 199 dsa_pointer control; 200 201 /* Allocate the backend-local object representing the hash table. */ 202 hash_table = palloc(sizeof(dshash_table)); 203 204 /* Allocate the control object in shared memory. */ 205 control = dsa_allocate(area, sizeof(dshash_table_control)); 206 207 /* Set up the local and shared hash table structs. */ 208 hash_table->area = area; 209 hash_table->params = *params; 210 hash_table->arg = arg; 211 hash_table->control = dsa_get_address(area, control); 212 hash_table->control->handle = control; 213 hash_table->control->magic = DSHASH_MAGIC; 214 hash_table->control->lwlock_tranche_id = params->tranche_id; 215 216 /* Set up the array of lock partitions. */ 217 { 218 dshash_partition *partitions = hash_table->control->partitions; 219 int tranche_id = hash_table->control->lwlock_tranche_id; 220 int i; 221 222 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 223 { 224 LWLockInitialize(&partitions[i].lock, tranche_id); 225 partitions[i].count = 0; 226 } 227 } 228 229 hash_table->find_locked = false; 230 hash_table->find_exclusively_locked = false; 231 232 /* 233 * Set up the initial array of buckets. Our initial size is the same as 234 * the number of partitions. 235 */ 236 hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2; 237 hash_table->control->buckets = 238 dsa_allocate_extended(area, 239 sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS, 240 DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); 241 if (!DsaPointerIsValid(hash_table->control->buckets)) 242 { 243 dsa_free(area, control); 244 ereport(ERROR, 245 (errcode(ERRCODE_OUT_OF_MEMORY), 246 errmsg("out of memory"), 247 errdetail("Failed on DSA request of size %zu.", 248 sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS))); 249 } 250 hash_table->buckets = dsa_get_address(area, 251 hash_table->control->buckets); 252 hash_table->size_log2 = hash_table->control->size_log2; 253 254 return hash_table; 255 } 256 257 /* 258 * Attach to an existing hash table using a handle. The returned object is 259 * allocated in backend-local memory using the current MemoryContext. 'arg' 260 * will be passed through to the compare and hash functions. 261 */ 262 dshash_table * 263 dshash_attach(dsa_area *area, const dshash_parameters *params, 264 dshash_table_handle handle, void *arg) 265 { 266 dshash_table *hash_table; 267 dsa_pointer control; 268 269 /* Allocate the backend-local object representing the hash table. */ 270 hash_table = palloc(sizeof(dshash_table)); 271 272 /* Find the control object in shared memory. */ 273 control = handle; 274 275 /* Set up the local hash table struct. */ 276 hash_table->area = area; 277 hash_table->params = *params; 278 hash_table->arg = arg; 279 hash_table->control = dsa_get_address(area, control); 280 hash_table->find_locked = false; 281 hash_table->find_exclusively_locked = false; 282 Assert(hash_table->control->magic == DSHASH_MAGIC); 283 284 /* 285 * These will later be set to the correct values by 286 * ensure_valid_bucket_pointers(), at which time we'll be holding a 287 * partition lock for interlocking against concurrent resizing. 288 */ 289 hash_table->buckets = NULL; 290 hash_table->size_log2 = 0; 291 292 return hash_table; 293 } 294 295 /* 296 * Detach from a hash table. This frees backend-local resources associated 297 * with the hash table, but the hash table will continue to exist until it is 298 * either explicitly destroyed (by a backend that is still attached to it), or 299 * the area that backs it is returned to the operating system. 300 */ 301 void 302 dshash_detach(dshash_table *hash_table) 303 { 304 Assert(!hash_table->find_locked); 305 306 /* The hash table may have been destroyed. Just free local memory. */ 307 pfree(hash_table); 308 } 309 310 /* 311 * Destroy a hash table, returning all memory to the area. The caller must be 312 * certain that no other backend will attempt to access the hash table before 313 * calling this function. Other backend must explicitly call dshash_detach to 314 * free up backend-local memory associated with the hash table. The backend 315 * that calls dshash_destroy must not call dshash_detach. 316 */ 317 void 318 dshash_destroy(dshash_table *hash_table) 319 { 320 size_t size; 321 size_t i; 322 323 Assert(hash_table->control->magic == DSHASH_MAGIC); 324 ensure_valid_bucket_pointers(hash_table); 325 326 /* Free all the entries. */ 327 size = ((size_t) 1) << hash_table->size_log2; 328 for (i = 0; i < size; ++i) 329 { 330 dsa_pointer item_pointer = hash_table->buckets[i]; 331 332 while (DsaPointerIsValid(item_pointer)) 333 { 334 dshash_table_item *item; 335 dsa_pointer next_item_pointer; 336 337 item = dsa_get_address(hash_table->area, item_pointer); 338 next_item_pointer = item->next; 339 dsa_free(hash_table->area, item_pointer); 340 item_pointer = next_item_pointer; 341 } 342 } 343 344 /* 345 * Vandalize the control block to help catch programming errors where 346 * other backends access the memory formerly occupied by this hash table. 347 */ 348 hash_table->control->magic = 0; 349 350 /* Free the active table and control object. */ 351 dsa_free(hash_table->area, hash_table->control->buckets); 352 dsa_free(hash_table->area, hash_table->control->handle); 353 354 pfree(hash_table); 355 } 356 357 /* 358 * Get a handle that can be used by other processes to attach to this hash 359 * table. 360 */ 361 dshash_table_handle 362 dshash_get_hash_table_handle(dshash_table *hash_table) 363 { 364 Assert(hash_table->control->magic == DSHASH_MAGIC); 365 366 return hash_table->control->handle; 367 } 368 369 /* 370 * Look up an entry, given a key. Returns a pointer to an entry if one can be 371 * found with the given key. Returns NULL if the key is not found. If a 372 * non-NULL value is returned, the entry is locked and must be released by 373 * calling dshash_release_lock. If an error is raised before 374 * dshash_release_lock is called, the lock will be released automatically, but 375 * the caller must take care to ensure that the entry is not left corrupted. 376 * The lock mode is either shared or exclusive depending on 'exclusive'. 377 * 378 * The caller must not hold a lock already. 379 * 380 * Note that the lock held is in fact an LWLock, so interrupts will be held on 381 * return from this function, and not resumed until dshash_release_lock is 382 * called. It is a very good idea for the caller to release the lock quickly. 383 */ 384 void * 385 dshash_find(dshash_table *hash_table, const void *key, bool exclusive) 386 { 387 dshash_hash hash; 388 size_t partition; 389 dshash_table_item *item; 390 391 hash = hash_key(hash_table, key); 392 partition = PARTITION_FOR_HASH(hash); 393 394 Assert(hash_table->control->magic == DSHASH_MAGIC); 395 Assert(!hash_table->find_locked); 396 397 LWLockAcquire(PARTITION_LOCK(hash_table, partition), 398 exclusive ? LW_EXCLUSIVE : LW_SHARED); 399 ensure_valid_bucket_pointers(hash_table); 400 401 /* Search the active bucket. */ 402 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); 403 404 if (!item) 405 { 406 /* Not found. */ 407 LWLockRelease(PARTITION_LOCK(hash_table, partition)); 408 return NULL; 409 } 410 else 411 { 412 /* The caller will free the lock by calling dshash_release. */ 413 hash_table->find_locked = true; 414 hash_table->find_exclusively_locked = exclusive; 415 return ENTRY_FROM_ITEM(item); 416 } 417 } 418 419 /* 420 * Returns a pointer to an exclusively locked item which must be released with 421 * dshash_release_lock. If the key is found in the hash table, 'found' is set 422 * to true and a pointer to the existing entry is returned. If the key is not 423 * found, 'found' is set to false, and a pointer to a newly created entry is 424 * returned. 425 * 426 * Notes above dshash_find() regarding locking and error handling equally 427 * apply here. 428 */ 429 void * 430 dshash_find_or_insert(dshash_table *hash_table, 431 const void *key, 432 bool *found) 433 { 434 dshash_hash hash; 435 size_t partition_index; 436 dshash_partition *partition; 437 dshash_table_item *item; 438 439 hash = hash_key(hash_table, key); 440 partition_index = PARTITION_FOR_HASH(hash); 441 partition = &hash_table->control->partitions[partition_index]; 442 443 Assert(hash_table->control->magic == DSHASH_MAGIC); 444 Assert(!hash_table->find_locked); 445 446 restart: 447 LWLockAcquire(PARTITION_LOCK(hash_table, partition_index), 448 LW_EXCLUSIVE); 449 ensure_valid_bucket_pointers(hash_table); 450 451 /* Search the active bucket. */ 452 item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash)); 453 454 if (item) 455 *found = true; 456 else 457 { 458 *found = false; 459 460 /* Check if we are getting too full. */ 461 if (partition->count > MAX_COUNT_PER_PARTITION(hash_table)) 462 { 463 /* 464 * The load factor (= keys / buckets) for all buckets protected by 465 * this partition is > 0.75. Presumably the same applies 466 * generally across the whole hash table (though we don't attempt 467 * to track that directly to avoid contention on some kind of 468 * central counter; we just assume that this partition is 469 * representative). This is a good time to resize. 470 * 471 * Give up our existing lock first, because resizing needs to 472 * reacquire all the locks in the right order to avoid deadlocks. 473 */ 474 LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); 475 resize(hash_table, hash_table->size_log2 + 1); 476 477 goto restart; 478 } 479 480 /* Finally we can try to insert the new item. */ 481 item = insert_into_bucket(hash_table, key, 482 &BUCKET_FOR_HASH(hash_table, hash)); 483 item->hash = hash; 484 /* Adjust per-lock-partition counter for load factor knowledge. */ 485 ++partition->count; 486 } 487 488 /* The caller must release the lock with dshash_release_lock. */ 489 hash_table->find_locked = true; 490 hash_table->find_exclusively_locked = true; 491 return ENTRY_FROM_ITEM(item); 492 } 493 494 /* 495 * Remove an entry by key. Returns true if the key was found and the 496 * corresponding entry was removed. 497 * 498 * To delete an entry that you already have a pointer to, see 499 * dshash_delete_entry. 500 */ 501 bool 502 dshash_delete_key(dshash_table *hash_table, const void *key) 503 { 504 dshash_hash hash; 505 size_t partition; 506 bool found; 507 508 Assert(hash_table->control->magic == DSHASH_MAGIC); 509 Assert(!hash_table->find_locked); 510 511 hash = hash_key(hash_table, key); 512 partition = PARTITION_FOR_HASH(hash); 513 514 LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE); 515 ensure_valid_bucket_pointers(hash_table); 516 517 if (delete_key_from_bucket(hash_table, key, 518 &BUCKET_FOR_HASH(hash_table, hash))) 519 { 520 Assert(hash_table->control->partitions[partition].count > 0); 521 found = true; 522 --hash_table->control->partitions[partition].count; 523 } 524 else 525 found = false; 526 527 LWLockRelease(PARTITION_LOCK(hash_table, partition)); 528 529 return found; 530 } 531 532 /* 533 * Remove an entry. The entry must already be exclusively locked, and must 534 * have been obtained by dshash_find or dshash_find_or_insert. Note that this 535 * function releases the lock just like dshash_release_lock. 536 * 537 * To delete an entry by key, see dshash_delete_key. 538 */ 539 void 540 dshash_delete_entry(dshash_table *hash_table, void *entry) 541 { 542 dshash_table_item *item = ITEM_FROM_ENTRY(entry); 543 size_t partition = PARTITION_FOR_HASH(item->hash); 544 545 Assert(hash_table->control->magic == DSHASH_MAGIC); 546 Assert(hash_table->find_locked); 547 Assert(hash_table->find_exclusively_locked); 548 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition), 549 LW_EXCLUSIVE)); 550 551 delete_item(hash_table, item); 552 hash_table->find_locked = false; 553 hash_table->find_exclusively_locked = false; 554 LWLockRelease(PARTITION_LOCK(hash_table, partition)); 555 } 556 557 /* 558 * Unlock an entry which was locked by dshash_find or dshash_find_or_insert. 559 */ 560 void 561 dshash_release_lock(dshash_table *hash_table, void *entry) 562 { 563 dshash_table_item *item = ITEM_FROM_ENTRY(entry); 564 size_t partition_index = PARTITION_FOR_HASH(item->hash); 565 566 Assert(hash_table->control->magic == DSHASH_MAGIC); 567 Assert(hash_table->find_locked); 568 Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index), 569 hash_table->find_exclusively_locked 570 ? LW_EXCLUSIVE : LW_SHARED)); 571 572 hash_table->find_locked = false; 573 hash_table->find_exclusively_locked = false; 574 LWLockRelease(PARTITION_LOCK(hash_table, partition_index)); 575 } 576 577 /* 578 * A compare function that forwards to memcmp. 579 */ 580 int 581 dshash_memcmp(const void *a, const void *b, size_t size, void *arg) 582 { 583 return memcmp(a, b, size); 584 } 585 586 /* 587 * A hash function that forwards to tag_hash. 588 */ 589 dshash_hash 590 dshash_memhash(const void *v, size_t size, void *arg) 591 { 592 return tag_hash(v, size); 593 } 594 595 /* 596 * Print debugging information about the internal state of the hash table to 597 * stderr. The caller must hold no partition locks. 598 */ 599 void 600 dshash_dump(dshash_table *hash_table) 601 { 602 size_t i; 603 size_t j; 604 605 Assert(hash_table->control->magic == DSHASH_MAGIC); 606 Assert(!hash_table->find_locked); 607 608 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 609 { 610 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); 611 LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED); 612 } 613 614 ensure_valid_bucket_pointers(hash_table); 615 616 fprintf(stderr, 617 "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2); 618 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 619 { 620 dshash_partition *partition = &hash_table->control->partitions[i]; 621 size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2); 622 size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2); 623 624 fprintf(stderr, " partition %zu\n", i); 625 fprintf(stderr, 626 " active buckets (key count = %zu)\n", partition->count); 627 628 for (j = begin; j < end; ++j) 629 { 630 size_t count = 0; 631 dsa_pointer bucket = hash_table->buckets[j]; 632 633 while (DsaPointerIsValid(bucket)) 634 { 635 dshash_table_item *item; 636 637 item = dsa_get_address(hash_table->area, bucket); 638 639 bucket = item->next; 640 ++count; 641 } 642 fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count); 643 } 644 } 645 646 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 647 LWLockRelease(PARTITION_LOCK(hash_table, i)); 648 } 649 650 /* 651 * Delete a locked item to which we have a pointer. 652 */ 653 static void 654 delete_item(dshash_table *hash_table, dshash_table_item *item) 655 { 656 size_t hash = item->hash; 657 size_t partition = PARTITION_FOR_HASH(hash); 658 659 Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition))); 660 661 if (delete_item_from_bucket(hash_table, item, 662 &BUCKET_FOR_HASH(hash_table, hash))) 663 { 664 Assert(hash_table->control->partitions[partition].count > 0); 665 --hash_table->control->partitions[partition].count; 666 } 667 else 668 { 669 Assert(false); 670 } 671 } 672 673 /* 674 * Grow the hash table if necessary to the requested number of buckets. The 675 * requested size must be double some previously observed size. 676 * 677 * Must be called without any partition lock held. 678 */ 679 static void 680 resize(dshash_table *hash_table, size_t new_size_log2) 681 { 682 dsa_pointer old_buckets; 683 dsa_pointer new_buckets_shared; 684 dsa_pointer *new_buckets; 685 size_t size; 686 size_t new_size = ((size_t) 1) << new_size_log2; 687 size_t i; 688 689 /* 690 * Acquire the locks for all lock partitions. This is expensive, but we 691 * shouldn't have to do it many times. 692 */ 693 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 694 { 695 Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i))); 696 697 LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE); 698 if (i == 0 && hash_table->control->size_log2 >= new_size_log2) 699 { 700 /* 701 * Another backend has already increased the size; we can avoid 702 * obtaining all the locks and return early. 703 */ 704 LWLockRelease(PARTITION_LOCK(hash_table, 0)); 705 return; 706 } 707 } 708 709 Assert(new_size_log2 == hash_table->control->size_log2 + 1); 710 711 /* Allocate the space for the new table. */ 712 new_buckets_shared = dsa_allocate0(hash_table->area, 713 sizeof(dsa_pointer) * new_size); 714 new_buckets = dsa_get_address(hash_table->area, new_buckets_shared); 715 716 /* 717 * We've allocated the new bucket array; all that remains to do now is to 718 * reinsert all items, which amounts to adjusting all the pointers. 719 */ 720 size = ((size_t) 1) << hash_table->control->size_log2; 721 for (i = 0; i < size; ++i) 722 { 723 dsa_pointer item_pointer = hash_table->buckets[i]; 724 725 while (DsaPointerIsValid(item_pointer)) 726 { 727 dshash_table_item *item; 728 dsa_pointer next_item_pointer; 729 730 item = dsa_get_address(hash_table->area, item_pointer); 731 next_item_pointer = item->next; 732 insert_item_into_bucket(hash_table, item_pointer, item, 733 &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash, 734 new_size_log2)]); 735 item_pointer = next_item_pointer; 736 } 737 } 738 739 /* Swap the hash table into place and free the old one. */ 740 old_buckets = hash_table->control->buckets; 741 hash_table->control->buckets = new_buckets_shared; 742 hash_table->control->size_log2 = new_size_log2; 743 hash_table->buckets = new_buckets; 744 dsa_free(hash_table->area, old_buckets); 745 746 /* Release all the locks. */ 747 for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i) 748 LWLockRelease(PARTITION_LOCK(hash_table, i)); 749 } 750 751 /* 752 * Make sure that our backend-local bucket pointers are up to date. The 753 * caller must have locked one lock partition, which prevents resize() from 754 * running concurrently. 755 */ 756 static inline void 757 ensure_valid_bucket_pointers(dshash_table *hash_table) 758 { 759 if (hash_table->size_log2 != hash_table->control->size_log2) 760 { 761 hash_table->buckets = dsa_get_address(hash_table->area, 762 hash_table->control->buckets); 763 hash_table->size_log2 = hash_table->control->size_log2; 764 } 765 } 766 767 /* 768 * Scan a locked bucket for a match, using the provided compare function. 769 */ 770 static inline dshash_table_item * 771 find_in_bucket(dshash_table *hash_table, const void *key, 772 dsa_pointer item_pointer) 773 { 774 while (DsaPointerIsValid(item_pointer)) 775 { 776 dshash_table_item *item; 777 778 item = dsa_get_address(hash_table->area, item_pointer); 779 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) 780 return item; 781 item_pointer = item->next; 782 } 783 return NULL; 784 } 785 786 /* 787 * Insert an already-allocated item into a bucket. 788 */ 789 static void 790 insert_item_into_bucket(dshash_table *hash_table, 791 dsa_pointer item_pointer, 792 dshash_table_item *item, 793 dsa_pointer *bucket) 794 { 795 Assert(item == dsa_get_address(hash_table->area, item_pointer)); 796 797 item->next = *bucket; 798 *bucket = item_pointer; 799 } 800 801 /* 802 * Allocate space for an entry with the given key and insert it into the 803 * provided bucket. 804 */ 805 static dshash_table_item * 806 insert_into_bucket(dshash_table *hash_table, 807 const void *key, 808 dsa_pointer *bucket) 809 { 810 dsa_pointer item_pointer; 811 dshash_table_item *item; 812 813 item_pointer = dsa_allocate(hash_table->area, 814 hash_table->params.entry_size + 815 MAXALIGN(sizeof(dshash_table_item))); 816 item = dsa_get_address(hash_table->area, item_pointer); 817 memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size); 818 insert_item_into_bucket(hash_table, item_pointer, item, bucket); 819 return item; 820 } 821 822 /* 823 * Search a bucket for a matching key and delete it. 824 */ 825 static bool 826 delete_key_from_bucket(dshash_table *hash_table, 827 const void *key, 828 dsa_pointer *bucket_head) 829 { 830 while (DsaPointerIsValid(*bucket_head)) 831 { 832 dshash_table_item *item; 833 834 item = dsa_get_address(hash_table->area, *bucket_head); 835 836 if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item))) 837 { 838 dsa_pointer next; 839 840 next = item->next; 841 dsa_free(hash_table->area, *bucket_head); 842 *bucket_head = next; 843 844 return true; 845 } 846 bucket_head = &item->next; 847 } 848 return false; 849 } 850 851 /* 852 * Delete the specified item from the bucket. 853 */ 854 static bool 855 delete_item_from_bucket(dshash_table *hash_table, 856 dshash_table_item *item, 857 dsa_pointer *bucket_head) 858 { 859 while (DsaPointerIsValid(*bucket_head)) 860 { 861 dshash_table_item *bucket_item; 862 863 bucket_item = dsa_get_address(hash_table->area, *bucket_head); 864 865 if (bucket_item == item) 866 { 867 dsa_pointer next; 868 869 next = item->next; 870 dsa_free(hash_table->area, *bucket_head); 871 *bucket_head = next; 872 return true; 873 } 874 bucket_head = &bucket_item->next; 875 } 876 return false; 877 } 878 879 /* 880 * Compute the hash value for a key. 881 */ 882 static inline dshash_hash 883 hash_key(dshash_table *hash_table, const void *key) 884 { 885 return hash_table->params.hash_function(key, 886 hash_table->params.key_size, 887 hash_table->arg); 888 } 889 890 /* 891 * Check whether two keys compare equal. 892 */ 893 static inline bool 894 equal_keys(dshash_table *hash_table, const void *a, const void *b) 895 { 896 return hash_table->params.compare_function(a, b, 897 hash_table->params.key_size, 898 hash_table->arg) == 0; 899 } 900