1 /*------------------------------------------------------------------------- 2 * 3 * gistbuildbuffers.c 4 * node buffer management functions for GiST buffering build algorithm. 5 * 6 * 7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * IDENTIFICATION 11 * src/backend/access/gist/gistbuildbuffers.c 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #include "postgres.h" 16 17 #include "access/genam.h" 18 #include "access/gist_private.h" 19 #include "catalog/index.h" 20 #include "miscadmin.h" 21 #include "storage/buffile.h" 22 #include "storage/bufmgr.h" 23 #include "utils/memutils.h" 24 #include "utils/rel.h" 25 26 static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb); 27 static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb, 28 GISTNodeBuffer *nodeBuffer); 29 static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb, 30 GISTNodeBuffer *nodeBuffer); 31 static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, 32 GISTNodeBuffer *nodeBuffer); 33 static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, 34 IndexTuple item); 35 static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, 36 IndexTuple *item); 37 static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb); 38 static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum); 39 40 static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr); 41 static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr); 42 43 44 /* 45 * Initialize GiST build buffers. 46 */ 47 GISTBuildBuffers * 48 gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel) 49 { 50 GISTBuildBuffers *gfbb; 51 HASHCTL hashCtl; 52 53 gfbb = palloc(sizeof(GISTBuildBuffers)); 54 gfbb->pagesPerBuffer = pagesPerBuffer; 55 gfbb->levelStep = levelStep; 56 57 /* 58 * Create a temporary file to hold buffer pages that are swapped out of 59 * memory. 60 */ 61 gfbb->pfile = BufFileCreateTemp(false); 62 gfbb->nFileBlocks = 0; 63 64 /* Initialize free page management. */ 65 gfbb->nFreeBlocks = 0; 66 gfbb->freeBlocksLen = 32; 67 gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long)); 68 69 /* 70 * Current memory context will be used for all in-memory data structures 71 * of buffers which are persistent during buffering build. 72 */ 73 gfbb->context = CurrentMemoryContext; 74 75 /* 76 * nodeBuffersTab hash is association between index blocks and it's 77 * buffers. 78 */ 79 memset(&hashCtl, 0, sizeof(hashCtl)); 80 hashCtl.keysize = sizeof(BlockNumber); 81 hashCtl.entrysize = sizeof(GISTNodeBuffer); 82 hashCtl.hcxt = CurrentMemoryContext; 83 gfbb->nodeBuffersTab = hash_create("gistbuildbuffers", 84 1024, 85 &hashCtl, 86 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); 87 88 gfbb->bufferEmptyingQueue = NIL; 89 90 /* 91 * Per-level node buffers lists for final buffers emptying process. Node 92 * buffers are inserted here when they are created. 93 */ 94 gfbb->buffersOnLevelsLen = 1; 95 gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) * 96 gfbb->buffersOnLevelsLen); 97 gfbb->buffersOnLevels[0] = NIL; 98 99 /* 100 * Block numbers of node buffers which last pages are currently loaded 101 * into main memory. 102 */ 103 gfbb->loadedBuffersLen = 32; 104 gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen * 105 sizeof(GISTNodeBuffer *)); 106 gfbb->loadedBuffersCount = 0; 107 108 gfbb->rootlevel = maxLevel; 109 110 return gfbb; 111 } 112 113 /* 114 * Returns a node buffer for given block. The buffer is created if it 115 * doesn't exist yet. 116 */ 117 GISTNodeBuffer * 118 gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate, 119 BlockNumber nodeBlocknum, int level) 120 { 121 GISTNodeBuffer *nodeBuffer; 122 bool found; 123 124 /* Find node buffer in hash table */ 125 nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab, 126 (const void *) &nodeBlocknum, 127 HASH_ENTER, 128 &found); 129 if (!found) 130 { 131 /* 132 * Node buffer wasn't found. Initialize the new buffer as empty. 133 */ 134 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); 135 136 /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */ 137 nodeBuffer->blocksCount = 0; 138 nodeBuffer->pageBlocknum = InvalidBlockNumber; 139 nodeBuffer->pageBuffer = NULL; 140 nodeBuffer->queuedForEmptying = false; 141 nodeBuffer->isTemp = false; 142 nodeBuffer->level = level; 143 144 /* 145 * Add this buffer to the list of buffers on this level. Enlarge 146 * buffersOnLevels array if needed. 147 */ 148 if (level >= gfbb->buffersOnLevelsLen) 149 { 150 int i; 151 152 gfbb->buffersOnLevels = 153 (List **) repalloc(gfbb->buffersOnLevels, 154 (level + 1) * sizeof(List *)); 155 156 /* initialize the enlarged portion */ 157 for (i = gfbb->buffersOnLevelsLen; i <= level; i++) 158 gfbb->buffersOnLevels[i] = NIL; 159 gfbb->buffersOnLevelsLen = level + 1; 160 } 161 162 /* 163 * Prepend the new buffer to the list of buffers on this level. It's 164 * not arbitrary that the new buffer is put to the beginning of the 165 * list: in the final emptying phase we loop through all buffers at 166 * each level, and flush them. If a page is split during the emptying, 167 * it's more efficient to flush the new splitted pages first, before 168 * moving on to pre-existing pages on the level. The buffers just 169 * created during the page split are likely still in cache, so 170 * flushing them immediately is more efficient than putting them to 171 * the end of the queue. 172 */ 173 gfbb->buffersOnLevels[level] = lcons(nodeBuffer, 174 gfbb->buffersOnLevels[level]); 175 176 MemoryContextSwitchTo(oldcxt); 177 } 178 179 return nodeBuffer; 180 } 181 182 /* 183 * Allocate memory for a buffer page. 184 */ 185 static GISTNodeBufferPage * 186 gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb) 187 { 188 GISTNodeBufferPage *pageBuffer; 189 190 pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context, 191 BLCKSZ); 192 pageBuffer->prev = InvalidBlockNumber; 193 194 /* Set page free space */ 195 PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET; 196 return pageBuffer; 197 } 198 199 /* 200 * Add specified buffer into loadedBuffers array. 201 */ 202 static void 203 gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) 204 { 205 /* Never add a temporary buffer to the array */ 206 if (nodeBuffer->isTemp) 207 return; 208 209 /* Enlarge the array if needed */ 210 if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen) 211 { 212 gfbb->loadedBuffersLen *= 2; 213 gfbb->loadedBuffers = (GISTNodeBuffer **) 214 repalloc(gfbb->loadedBuffers, 215 gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *)); 216 } 217 218 gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer; 219 gfbb->loadedBuffersCount++; 220 } 221 222 /* 223 * Load last page of node buffer into main memory. 224 */ 225 static void 226 gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) 227 { 228 /* Check if we really should load something */ 229 if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0) 230 { 231 /* Allocate memory for page */ 232 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb); 233 234 /* Read block from temporary file */ 235 ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum, 236 nodeBuffer->pageBuffer); 237 238 /* Mark file block as free */ 239 gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum); 240 241 /* Mark node buffer as loaded */ 242 gistAddLoadedBuffer(gfbb, nodeBuffer); 243 nodeBuffer->pageBlocknum = InvalidBlockNumber; 244 } 245 } 246 247 /* 248 * Write last page of node buffer to the disk. 249 */ 250 static void 251 gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer) 252 { 253 /* Check if we have something to write */ 254 if (nodeBuffer->pageBuffer) 255 { 256 BlockNumber blkno; 257 258 /* Get free file block */ 259 blkno = gistBuffersGetFreeBlock(gfbb); 260 261 /* Write block to the temporary file */ 262 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer); 263 264 /* Free memory of that page */ 265 pfree(nodeBuffer->pageBuffer); 266 nodeBuffer->pageBuffer = NULL; 267 268 /* Save block number */ 269 nodeBuffer->pageBlocknum = blkno; 270 } 271 } 272 273 /* 274 * Write last pages of all node buffers to the disk. 275 */ 276 void 277 gistUnloadNodeBuffers(GISTBuildBuffers *gfbb) 278 { 279 int i; 280 281 /* Unload all the buffers that have a page loaded in memory. */ 282 for (i = 0; i < gfbb->loadedBuffersCount; i++) 283 gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]); 284 285 /* Now there are no node buffers with loaded last page */ 286 gfbb->loadedBuffersCount = 0; 287 } 288 289 /* 290 * Add index tuple to buffer page. 291 */ 292 static void 293 gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup) 294 { 295 Size itupsz = IndexTupleSize(itup); 296 char *ptr; 297 298 /* There should be enough of space. */ 299 Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz)); 300 301 /* Reduce free space value of page to reserve a spot for the tuple. */ 302 PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz); 303 304 /* Get pointer to the spot we reserved (ie. end of free space). */ 305 ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET 306 + PAGE_FREE_SPACE(pageBuffer); 307 308 /* Copy the index tuple there. */ 309 memcpy(ptr, itup, itupsz); 310 } 311 312 /* 313 * Get last item from buffer page and remove it from page. 314 */ 315 static void 316 gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup) 317 { 318 IndexTuple ptr; 319 Size itupsz; 320 321 Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */ 322 323 /* Get pointer to last index tuple */ 324 ptr = (IndexTuple) ((char *) pageBuffer 325 + BUFFER_PAGE_DATA_OFFSET 326 + PAGE_FREE_SPACE(pageBuffer)); 327 itupsz = IndexTupleSize(ptr); 328 329 /* Make a copy of the tuple */ 330 *itup = (IndexTuple) palloc(itupsz); 331 memcpy(*itup, ptr, itupsz); 332 333 /* Mark the space used by the tuple as free */ 334 PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz); 335 } 336 337 /* 338 * Push an index tuple to node buffer. 339 */ 340 void 341 gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, 342 IndexTuple itup) 343 { 344 /* 345 * Most part of memory operations will be in buffering build persistent 346 * context. So, let's switch to it. 347 */ 348 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context); 349 350 /* 351 * If the buffer is currently empty, create the first page. 352 */ 353 if (nodeBuffer->blocksCount == 0) 354 { 355 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb); 356 nodeBuffer->blocksCount = 1; 357 gistAddLoadedBuffer(gfbb, nodeBuffer); 358 } 359 360 /* Load last page of node buffer if it wasn't in memory already */ 361 if (!nodeBuffer->pageBuffer) 362 gistLoadNodeBuffer(gfbb, nodeBuffer); 363 364 /* 365 * Check if there is enough space on the last page for the tuple. 366 */ 367 if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup)) 368 { 369 /* 370 * Nope. Swap previous block to disk and allocate a new one. 371 */ 372 BlockNumber blkno; 373 374 /* Write filled page to the disk */ 375 blkno = gistBuffersGetFreeBlock(gfbb); 376 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer); 377 378 /* 379 * Reset the in-memory page as empty, and link the previous block to 380 * the new page by storing its block number in the prev-link. 381 */ 382 PAGE_FREE_SPACE(nodeBuffer->pageBuffer) = 383 BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata)); 384 nodeBuffer->pageBuffer->prev = blkno; 385 386 /* We've just added one more page */ 387 nodeBuffer->blocksCount++; 388 } 389 390 gistPlaceItupToPage(nodeBuffer->pageBuffer, itup); 391 392 /* 393 * If the buffer just overflowed, add it to the emptying queue. 394 */ 395 if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying) 396 { 397 gfbb->bufferEmptyingQueue = lcons(nodeBuffer, 398 gfbb->bufferEmptyingQueue); 399 nodeBuffer->queuedForEmptying = true; 400 } 401 402 /* Restore memory context */ 403 MemoryContextSwitchTo(oldcxt); 404 } 405 406 /* 407 * Removes one index tuple from node buffer. Returns true if success and false 408 * if node buffer is empty. 409 */ 410 bool 411 gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer, 412 IndexTuple *itup) 413 { 414 /* 415 * If node buffer is empty then return false. 416 */ 417 if (nodeBuffer->blocksCount <= 0) 418 return false; 419 420 /* Load last page of node buffer if needed */ 421 if (!nodeBuffer->pageBuffer) 422 gistLoadNodeBuffer(gfbb, nodeBuffer); 423 424 /* 425 * Get index tuple from last non-empty page. 426 */ 427 gistGetItupFromPage(nodeBuffer->pageBuffer, itup); 428 429 /* 430 * If we just removed the last tuple from the page, fetch previous page on 431 * this node buffer (if any). 432 */ 433 if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer)) 434 { 435 BlockNumber prevblkno; 436 437 /* 438 * blocksCount includes the page in pageBuffer, so decrease it now. 439 */ 440 nodeBuffer->blocksCount--; 441 442 /* 443 * If there's more pages, fetch previous one. 444 */ 445 prevblkno = nodeBuffer->pageBuffer->prev; 446 if (prevblkno != InvalidBlockNumber) 447 { 448 /* There is a previous page. Fetch it. */ 449 Assert(nodeBuffer->blocksCount > 0); 450 ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer); 451 452 /* 453 * Now that we've read the block in memory, we can release its 454 * on-disk block for reuse. 455 */ 456 gistBuffersReleaseBlock(gfbb, prevblkno); 457 } 458 else 459 { 460 /* No more pages. Free memory. */ 461 Assert(nodeBuffer->blocksCount == 0); 462 pfree(nodeBuffer->pageBuffer); 463 nodeBuffer->pageBuffer = NULL; 464 } 465 } 466 return true; 467 } 468 469 /* 470 * Select a currently unused block for writing to. 471 */ 472 static long 473 gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb) 474 { 475 /* 476 * If there are multiple free blocks, we select the one appearing last in 477 * freeBlocks[]. If there are none, assign the next block at the end of 478 * the file (causing the file to be extended). 479 */ 480 if (gfbb->nFreeBlocks > 0) 481 return gfbb->freeBlocks[--gfbb->nFreeBlocks]; 482 else 483 return gfbb->nFileBlocks++; 484 } 485 486 /* 487 * Return a block# to the freelist. 488 */ 489 static void 490 gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum) 491 { 492 int ndx; 493 494 /* Enlarge freeBlocks array if full. */ 495 if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen) 496 { 497 gfbb->freeBlocksLen *= 2; 498 gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks, 499 gfbb->freeBlocksLen * 500 sizeof(long)); 501 } 502 503 /* Add blocknum to array */ 504 ndx = gfbb->nFreeBlocks++; 505 gfbb->freeBlocks[ndx] = blocknum; 506 } 507 508 /* 509 * Free buffering build data structure. 510 */ 511 void 512 gistFreeBuildBuffers(GISTBuildBuffers *gfbb) 513 { 514 /* Close buffers file. */ 515 BufFileClose(gfbb->pfile); 516 517 /* All other things will be freed on memory context release */ 518 } 519 520 /* 521 * Data structure representing information about node buffer for index tuples 522 * relocation from splitted node buffer. 523 */ 524 typedef struct 525 { 526 GISTENTRY entry[INDEX_MAX_KEYS]; 527 bool isnull[INDEX_MAX_KEYS]; 528 GISTPageSplitInfo *splitinfo; 529 GISTNodeBuffer *nodeBuffer; 530 } RelocationBufferInfo; 531 532 /* 533 * At page split, distribute tuples from the buffer of the split page to 534 * new buffers for the created page halves. This also adjusts the downlinks 535 * in 'splitinfo' to include the tuples in the buffers. 536 */ 537 void 538 gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate, 539 Relation r, int level, 540 Buffer buffer, List *splitinfo) 541 { 542 RelocationBufferInfo *relocationBuffersInfos; 543 bool found; 544 GISTNodeBuffer *nodeBuffer; 545 BlockNumber blocknum; 546 IndexTuple itup; 547 int splitPagesCount = 0, 548 i; 549 GISTENTRY entry[INDEX_MAX_KEYS]; 550 bool isnull[INDEX_MAX_KEYS]; 551 GISTNodeBuffer oldBuf; 552 ListCell *lc; 553 554 /* If the splitted page doesn't have buffers, we have nothing to do. */ 555 if (!LEVEL_HAS_BUFFERS(level, gfbb)) 556 return; 557 558 /* 559 * Get the node buffer of the splitted page. 560 */ 561 blocknum = BufferGetBlockNumber(buffer); 562 nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum, 563 HASH_FIND, &found); 564 if (!found) 565 { 566 /* The page has no buffer, so we have nothing to do. */ 567 return; 568 } 569 570 /* 571 * Make a copy of the old buffer, as we're going reuse it as the buffer 572 * for the new left page, which is on the same block as the old page. 573 * That's not true for the root page, but that's fine because we never 574 * have a buffer on the root page anyway. The original algorithm as 575 * described by Arge et al did, but it's of no use, as you might as well 576 * read the tuples straight from the heap instead of the root buffer. 577 */ 578 Assert(blocknum != GIST_ROOT_BLKNO); 579 memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer)); 580 oldBuf.isTemp = true; 581 582 /* Reset the old buffer, used for the new left page from now on */ 583 nodeBuffer->blocksCount = 0; 584 nodeBuffer->pageBuffer = NULL; 585 nodeBuffer->pageBlocknum = InvalidBlockNumber; 586 587 /* 588 * Allocate memory for information about relocation buffers. 589 */ 590 splitPagesCount = list_length(splitinfo); 591 relocationBuffersInfos = 592 (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) * 593 splitPagesCount); 594 595 /* 596 * Fill relocation buffers information for node buffers of pages produced 597 * by split. 598 */ 599 i = 0; 600 foreach(lc, splitinfo) 601 { 602 GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc); 603 GISTNodeBuffer *newNodeBuffer; 604 605 /* Decompress parent index tuple of node buffer page. */ 606 gistDeCompressAtt(giststate, r, 607 si->downlink, NULL, (OffsetNumber) 0, 608 relocationBuffersInfos[i].entry, 609 relocationBuffersInfos[i].isnull); 610 611 /* 612 * Create a node buffer for the page. The leftmost half is on the same 613 * block as the old page before split, so for the leftmost half this 614 * will return the original buffer. The tuples on the original buffer 615 * were relinked to the temporary buffer, so the original one is now 616 * empty. 617 */ 618 newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level); 619 620 relocationBuffersInfos[i].nodeBuffer = newNodeBuffer; 621 relocationBuffersInfos[i].splitinfo = si; 622 623 i++; 624 } 625 626 /* 627 * Loop through all index tuples in the buffer of the page being split, 628 * moving them to buffers for the new pages. We try to move each tuple to 629 * the page that will result in the lowest penalty for the leading column 630 * or, in the case of a tie, the lowest penalty for the earliest column 631 * that is not tied. 632 * 633 * The page searching logic is very similar to gistchoose(). 634 */ 635 while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup)) 636 { 637 float best_penalty[INDEX_MAX_KEYS]; 638 int i, 639 which; 640 IndexTuple newtup; 641 RelocationBufferInfo *targetBufferInfo; 642 643 gistDeCompressAtt(giststate, r, 644 itup, NULL, (OffsetNumber) 0, entry, isnull); 645 646 /* default to using first page (shouldn't matter) */ 647 which = 0; 648 649 /* 650 * best_penalty[j] is the best penalty we have seen so far for column 651 * j, or -1 when we haven't yet examined column j. Array entries to 652 * the right of the first -1 are undefined. 653 */ 654 best_penalty[0] = -1; 655 656 /* 657 * Loop over possible target pages, looking for one to move this tuple 658 * to. 659 */ 660 for (i = 0; i < splitPagesCount; i++) 661 { 662 RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i]; 663 bool zero_penalty; 664 int j; 665 666 zero_penalty = true; 667 668 /* Loop over index attributes. */ 669 for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++) 670 { 671 float usize; 672 673 /* Compute penalty for this column. */ 674 usize = gistpenalty(giststate, j, 675 &splitPageInfo->entry[j], 676 splitPageInfo->isnull[j], 677 &entry[j], isnull[j]); 678 if (usize > 0) 679 zero_penalty = false; 680 681 if (best_penalty[j] < 0 || usize < best_penalty[j]) 682 { 683 /* 684 * New best penalty for column. Tentatively select this 685 * page as the target, and record the best penalty. Then 686 * reset the next column's penalty to "unknown" (and 687 * indirectly, the same for all the ones to its right). 688 * This will force us to adopt this page's penalty values 689 * as the best for all the remaining columns during 690 * subsequent loop iterations. 691 */ 692 which = i; 693 best_penalty[j] = usize; 694 695 if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1) 696 best_penalty[j + 1] = -1; 697 } 698 else if (best_penalty[j] == usize) 699 { 700 /* 701 * The current page is exactly as good for this column as 702 * the best page seen so far. The next iteration of this 703 * loop will compare the next column. 704 */ 705 } 706 else 707 { 708 /* 709 * The current page is worse for this column than the best 710 * page seen so far. Skip the remaining columns and move 711 * on to the next page, if any. 712 */ 713 zero_penalty = false; /* so outer loop won't exit */ 714 break; 715 } 716 } 717 718 /* 719 * If we find a page with zero penalty for all columns, there's no 720 * need to examine remaining pages; just break out of the loop and 721 * return it. 722 */ 723 if (zero_penalty) 724 break; 725 } 726 727 /* OK, "which" is the page index to push the tuple to */ 728 targetBufferInfo = &relocationBuffersInfos[which]; 729 730 /* Push item to selected node buffer */ 731 gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup); 732 733 /* Adjust the downlink for this page, if needed. */ 734 newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink, 735 itup, giststate); 736 if (newtup) 737 { 738 gistDeCompressAtt(giststate, r, 739 newtup, NULL, (OffsetNumber) 0, 740 targetBufferInfo->entry, 741 targetBufferInfo->isnull); 742 743 targetBufferInfo->splitinfo->downlink = newtup; 744 } 745 } 746 747 pfree(relocationBuffersInfos); 748 } 749 750 751 /* 752 * Wrappers around BufFile operations. The main difference is that these 753 * wrappers report errors with ereport(), so that the callers don't need 754 * to check the return code. 755 */ 756 757 static void 758 ReadTempFileBlock(BufFile *file, long blknum, void *ptr) 759 { 760 size_t nread; 761 762 if (BufFileSeekBlock(file, blknum) != 0) 763 elog(ERROR, "could not seek to block %ld in temporary file", blknum); 764 nread = BufFileRead(file, ptr, BLCKSZ); 765 if (nread != BLCKSZ) 766 elog(ERROR, "could not read temporary file: read only %zu of %zu bytes", 767 nread, (size_t) BLCKSZ); 768 } 769 770 static void 771 WriteTempFileBlock(BufFile *file, long blknum, void *ptr) 772 { 773 if (BufFileSeekBlock(file, blknum) != 0) 774 elog(ERROR, "could not seek to block %ld in temporary file", blknum); 775 BufFileWrite(file, ptr, BLCKSZ); 776 } 777