1 /*------------------------------------------------------------------------- 2 * 3 * spgscan.c 4 * routines for scanning SP-GiST indexes 5 * 6 * 7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * IDENTIFICATION 11 * src/backend/access/spgist/spgscan.c 12 * 13 *------------------------------------------------------------------------- ~S1S114 */ 15 16 #include "postgres.h" 17 18 #include "access/relscan.h" 19 #include "access/spgist_private.h" 20 #include "miscadmin.h" 21 #include "pgstat.h" 22 #include "storage/bufmgr.h" 23 #include "utils/datum.h" 24 #include "utils/memutils.h" 25 #include "utils/rel.h" 26 27 28 typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr, 29 Datum leafValue, bool isnull, bool recheck); 30 31 typedef struct ScanStackEntry 32 { 33 Datum reconstructedValue; /* value reconstructed from parent */ 34 void *traversalValue; /* opclass-specific traverse value */ 35 int level; /* level of items on this page */ 36 ItemPointerData ptr; /* block and offset to scan from */ 37 } ScanStackEntry; 38 39 40 /* Free a ScanStackEntry */ 41 static void 42 freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry) 43 { 44 if (!so->state.attLeafType.attbyval && 45 DatumGetPointer(stackEntry->reconstructedValue) != NULL) 46 pfree(DatumGetPointer(stackEntry->reconstructedValue)); 47 if (stackEntry->traversalValue) 48 pfree(stackEntry->traversalValue); 49 50 pfree(stackEntry); 51 } 52 53 /* Free the entire stack */ 54 static void 55 freeScanStack(SpGistScanOpaque so) 56 { 57 ListCell *lc; 58 59 foreach(lc, so->scanStack) 60 { 61 freeScanStackEntry(so, (ScanStackEntry *) lfirst(lc)); 62 } 63 list_free(so->scanStack); 64 so->scanStack = NIL; 65 } 66 67 /* 68 * Initialize scanStack to search the root page, resetting 69 * any previously active scan 70 */ 71 static void 72 resetSpGistScanOpaque(SpGistScanOpaque so) 73 { 74 ScanStackEntry *startEntry; 75 76 freeScanStack(so); 77 78 /* 79 * clear traversal context before proceeding to the next scan; this must 80 * not happen before the freeScanStack above, else we get double-free 81 * crashes. 82 */ 83 MemoryContextReset(so->traversalCxt); 84 85 if (so->searchNulls) 86 { 87 /* Stack a work item to scan the null index entries */ 88 startEntry = (ScanStackEntry *) palloc0(sizeof(ScanStackEntry)); 89 ItemPointerSet(&startEntry->ptr, SPGIST_NULL_BLKNO, FirstOffsetNumber); 90 so->scanStack = lappend(so->scanStack, startEntry); 91 } 92 93 if (so->searchNonNulls) 94 { 95 /* Stack a work item to scan the non-null index entries */ 96 startEntry = (ScanStackEntry *) palloc0(sizeof(ScanStackEntry)); 97 ItemPointerSet(&startEntry->ptr, SPGIST_ROOT_BLKNO, FirstOffsetNumber); 98 so->scanStack = lappend(so->scanStack, startEntry); 99 } 100 101 if (so->want_itup) 102 { 103 /* Must pfree reconstructed tuples to avoid memory leak */ 104 int i; 105 106 for (i = 0; i < so->nPtrs; i++) 107 pfree(so->reconTups[i]); 108 } 109 so->iPtr = so->nPtrs = 0; 110 } 111 112 /* 113 * Prepare scan keys in SpGistScanOpaque from caller-given scan keys 114 * 115 * Sets searchNulls, searchNonNulls, numberOfKeys, keyData fields of *so. 116 * 117 * The point here is to eliminate null-related considerations from what the 118 * opclass consistent functions need to deal with. We assume all SPGiST- 119 * indexable operators are strict, so any null RHS value makes the scan 120 * condition unsatisfiable. We also pull out any IS NULL/IS NOT NULL 121 * conditions; their effect is reflected into searchNulls/searchNonNulls. 122 */ 123 static void 124 spgPrepareScanKeys(IndexScanDesc scan) 125 { 126 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque; 127 bool qual_ok; 128 bool haveIsNull; 129 bool haveNotNull; 130 int nkeys; 131 int i; 132 133 if (scan->numberOfKeys <= 0) 134 { 135 /* If no quals, whole-index scan is required */ 136 so->searchNulls = true; 137 so->searchNonNulls = true; 138 so->numberOfKeys = 0; 139 return; 140 } 141 142 /* Examine the given quals */ 143 qual_ok = true; 144 haveIsNull = haveNotNull = false; 145 nkeys = 0; 146 for (i = 0; i < scan->numberOfKeys; i++) 147 { 148 ScanKey skey = &scan->keyData[i]; 149 150 if (skey->sk_flags & SK_SEARCHNULL) 151 haveIsNull = true; 152 else if (skey->sk_flags & SK_SEARCHNOTNULL) 153 haveNotNull = true; 154 else if (skey->sk_flags & SK_ISNULL) 155 { 156 /* ordinary qual with null argument - unsatisfiable */ 157 qual_ok = false; 158 break; 159 } 160 else 161 { 162 /* ordinary qual, propagate into so->keyData */ 163 so->keyData[nkeys++] = *skey; 164 /* this effectively creates a not-null requirement */ 165 haveNotNull = true; 166 } 167 } 168 169 /* IS NULL in combination with something else is unsatisfiable */ 170 if (haveIsNull && haveNotNull) 171 qual_ok = false; 172 173 /* Emit results */ 174 if (qual_ok) 175 { 176 so->searchNulls = haveIsNull; 177 so->searchNonNulls = haveNotNull; 178 so->numberOfKeys = nkeys; 179 } 180 else 181 { 182 so->searchNulls = false; 183 so->searchNonNulls = false; 184 so->numberOfKeys = 0; 185 } 186 } 187 188 IndexScanDesc 189 spgbeginscan(Relation rel, int keysz, int orderbysz) 190 { 191 IndexScanDesc scan; 192 SpGistScanOpaque so; 193 194 scan = RelationGetIndexScan(rel, keysz, 0); 195 196 so = (SpGistScanOpaque) palloc0(sizeof(SpGistScanOpaqueData)); 197 if (keysz > 0) 198 so->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * keysz); 199 else 200 so->keyData = NULL; 201 initSpGistState(&so->state, scan->indexRelation); 202 so->tempCxt = AllocSetContextCreate(CurrentMemoryContext, 203 "SP-GiST search temporary context", 204 ALLOCSET_DEFAULT_SIZES); 205 so->traversalCxt = AllocSetContextCreate(CurrentMemoryContext, 206 "SP-GiST traversal-value context", 207 ALLOCSET_DEFAULT_SIZES); 208 209 /* Set up indexTupDesc and xs_hitupdesc in case it's an index-only scan */ 210 so->indexTupDesc = scan->xs_hitupdesc = RelationGetDescr(rel); 211 212 scan->opaque = so; 213 214 return scan; 215 } 216 217 void 218 spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, 219 ScanKey orderbys, int norderbys) 220 { 221 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque; 222 223 /* copy scankeys into local storage */ 224 if (scankey && scan->numberOfKeys > 0) 225 { 226 memmove(scan->keyData, scankey, 227 scan->numberOfKeys * sizeof(ScanKeyData)); 228 } 229 230 /* preprocess scankeys, set up the representation in *so */ 231 spgPrepareScanKeys(scan); 232 233 /* set up starting stack entries */ 234 resetSpGistScanOpaque(so); 235 236 /* count an indexscan for stats */ 237 pgstat_count_index_scan(scan->indexRelation); 238 } 239 240 void 241 spgendscan(IndexScanDesc scan) 242 { 243 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque; 244 245 MemoryContextDelete(so->tempCxt); 246 MemoryContextDelete(so->traversalCxt); 247 248 if (so->keyData) 249 pfree(so->keyData); 250 251 if (so->state.deadTupleStorage) 252 pfree(so->state.deadTupleStorage); 253 254 pfree(so); 255 } 256 257 /* 258 * Test whether a leaf tuple satisfies all the scan keys 259 * 260 * *leafValue is set to the reconstructed datum, if provided 261 * *recheck is set true if any of the operators are lossy 262 */ 263 static bool 264 spgLeafTest(Relation index, SpGistScanOpaque so, 265 SpGistLeafTuple leafTuple, bool isnull, 266 int level, Datum reconstructedValue, 267 void *traversalValue, 268 Datum *leafValue, bool *recheck) 269 { 270 bool result; 271 Datum leafDatum; 272 spgLeafConsistentIn in; 273 spgLeafConsistentOut out; 274 FmgrInfo *procinfo; 275 MemoryContext oldCtx; 276 277 if (isnull) 278 { 279 /* Should not have arrived on a nulls page unless nulls are wanted */ 280 Assert(so->searchNulls); 281 *leafValue = (Datum) 0; 282 *recheck = false; 283 return true; 284 } 285 286 leafDatum = SGLTDATUM(leafTuple, &so->state); 287 288 /* use temp context for calling leaf_consistent */ 289 oldCtx = MemoryContextSwitchTo(so->tempCxt); 290 291 in.scankeys = so->keyData; 292 in.nkeys = so->numberOfKeys; 293 in.reconstructedValue = reconstructedValue; 294 in.traversalValue = traversalValue; 295 in.level = level; 296 in.returnData = so->want_itup; 297 in.leafDatum = leafDatum; 298 299 out.leafValue = (Datum) 0; 300 out.recheck = false; 301 302 procinfo = index_getprocinfo(index, 1, SPGIST_LEAF_CONSISTENT_PROC); 303 result = DatumGetBool(FunctionCall2Coll(procinfo, 304 index->rd_indcollation[0], 305 PointerGetDatum(&in), 306 PointerGetDatum(&out))); 307 308 *leafValue = out.leafValue; 309 *recheck = out.recheck; 310 311 MemoryContextSwitchTo(oldCtx); 312 313 return result; 314 } 315 316 /* 317 * Walk the tree and report all tuples passing the scan quals to the storeRes 318 * subroutine. 319 * 320 * If scanWholeIndex is true, we'll do just that. If not, we'll stop at the 321 * next page boundary once we have reported at least one tuple. 322 */ 323 static void 324 spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex, 325 storeRes_func storeRes, Snapshot snapshot) 326 { 327 Buffer buffer = InvalidBuffer; 328 bool reportedSome = false; 329 330 while (scanWholeIndex || !reportedSome) 331 { 332 ScanStackEntry *stackEntry; 333 BlockNumber blkno; 334 OffsetNumber offset; 335 Page page; 336 bool isnull; 337 338 /* Pull next to-do item from the list */ 339 if (so->scanStack == NIL) 340 break; /* there are no more pages to scan */ 341 342 stackEntry = (ScanStackEntry *) linitial(so->scanStack); 343 so->scanStack = list_delete_first(so->scanStack); 344 345 redirect: 346 /* Check for interrupts, just in case of infinite loop */ 347 CHECK_FOR_INTERRUPTS(); 348 349 blkno = ItemPointerGetBlockNumber(&stackEntry->ptr); 350 offset = ItemPointerGetOffsetNumber(&stackEntry->ptr); 351 352 if (buffer == InvalidBuffer) 353 { 354 buffer = ReadBuffer(index, blkno); 355 LockBuffer(buffer, BUFFER_LOCK_SHARE); 356 } 357 else if (blkno != BufferGetBlockNumber(buffer)) 358 { 359 UnlockReleaseBuffer(buffer); 360 buffer = ReadBuffer(index, blkno); 361 LockBuffer(buffer, BUFFER_LOCK_SHARE); 362 } 363 /* else new pointer points to the same page, no work needed */ 364 365 page = BufferGetPage(buffer); 366 TestForOldSnapshot(snapshot, index, page); 367 368 isnull = SpGistPageStoresNulls(page) ? true : false; 369 370 if (SpGistPageIsLeaf(page)) 371 { 372 SpGistLeafTuple leafTuple; 373 OffsetNumber max = PageGetMaxOffsetNumber(page); 374 Datum leafValue = (Datum) 0; 375 bool recheck = false; 376 377 if (SpGistBlockIsRoot(blkno)) 378 { 379 /* When root is a leaf, examine all its tuples */ 380 for (offset = FirstOffsetNumber; offset <= max; offset++) 381 { 382 leafTuple = (SpGistLeafTuple) 383 PageGetItem(page, PageGetItemId(page, offset)); 384 if (leafTuple->tupstate != SPGIST_LIVE) 385 { 386 /* all tuples on root should be live */ 387 elog(ERROR, "unexpected SPGiST tuple state: %d", 388 leafTuple->tupstate); 389 } 390 391 Assert(ItemPointerIsValid(&leafTuple->heapPtr)); 392 if (spgLeafTest(index, so, 393 leafTuple, isnull, 394 stackEntry->level, 395 stackEntry->reconstructedValue, 396 stackEntry->traversalValue, 397 &leafValue, 398 &recheck)) 399 { 400 storeRes(so, &leafTuple->heapPtr, 401 leafValue, isnull, recheck); 402 reportedSome = true; 403 } 404 } 405 } 406 else 407 { 408 /* Normal case: just examine the chain we arrived at */ 409 while (offset != InvalidOffsetNumber) 410 { 411 Assert(offset >= FirstOffsetNumber && offset <= max); 412 leafTuple = (SpGistLeafTuple) 413 PageGetItem(page, PageGetItemId(page, offset)); 414 if (leafTuple->tupstate != SPGIST_LIVE) 415 { 416 if (leafTuple->tupstate == SPGIST_REDIRECT) 417 { 418 /* redirection tuple should be first in chain */ 419 Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr)); 420 /* transfer attention to redirect point */ 421 stackEntry->ptr = ((SpGistDeadTuple) leafTuple)->pointer; 422 Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO); 423 goto redirect; 424 } 425 if (leafTuple->tupstate == SPGIST_DEAD) 426 { 427 /* dead tuple should be first in chain */ 428 Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr)); 429 /* No live entries on this page */ 430 Assert(leafTuple->nextOffset == InvalidOffsetNumber); 431 break; 432 } 433 /* We should not arrive at a placeholder */ 434 elog(ERROR, "unexpected SPGiST tuple state: %d", 435 leafTuple->tupstate); 436 } 437 438 Assert(ItemPointerIsValid(&leafTuple->heapPtr)); 439 if (spgLeafTest(index, so, 440 leafTuple, isnull, 441 stackEntry->level, 442 stackEntry->reconstructedValue, 443 stackEntry->traversalValue, 444 &leafValue, 445 &recheck)) 446 { 447 storeRes(so, &leafTuple->heapPtr, 448 leafValue, isnull, recheck); 449 reportedSome = true; 450 } 451 452 offset = leafTuple->nextOffset; 453 } 454 } 455 } 456 else /* page is inner */ 457 { 458 SpGistInnerTuple innerTuple; 459 spgInnerConsistentIn in; 460 spgInnerConsistentOut out; 461 FmgrInfo *procinfo; 462 SpGistNodeTuple *nodes; 463 SpGistNodeTuple node; 464 int i; 465 MemoryContext oldCtx; 466 467 innerTuple = (SpGistInnerTuple) PageGetItem(page, 468 PageGetItemId(page, offset)); 469 470 if (innerTuple->tupstate != SPGIST_LIVE) 471 { 472 if (innerTuple->tupstate == SPGIST_REDIRECT) 473 { 474 /* transfer attention to redirect point */ 475 stackEntry->ptr = ((SpGistDeadTuple) innerTuple)->pointer; 476 Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO); 477 goto redirect; 478 } 479 elog(ERROR, "unexpected SPGiST tuple state: %d", 480 innerTuple->tupstate); 481 } 482 483 /* use temp context for calling inner_consistent */ 484 oldCtx = MemoryContextSwitchTo(so->tempCxt); 485 486 in.scankeys = so->keyData; 487 in.nkeys = so->numberOfKeys; 488 in.reconstructedValue = stackEntry->reconstructedValue; 489 in.traversalMemoryContext = so->traversalCxt; 490 in.traversalValue = stackEntry->traversalValue; 491 in.level = stackEntry->level; 492 in.returnData = so->want_itup; 493 in.allTheSame = innerTuple->allTheSame; 494 in.hasPrefix = (innerTuple->prefixSize > 0); 495 in.prefixDatum = SGITDATUM(innerTuple, &so->state); 496 in.nNodes = innerTuple->nNodes; 497 in.nodeLabels = spgExtractNodeLabels(&so->state, innerTuple); 498 499 /* collect node pointers */ 500 nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) * in.nNodes); 501 SGITITERATE(innerTuple, i, node) 502 { 503 nodes[i] = node; 504 } 505 506 memset(&out, 0, sizeof(out)); 507 508 if (!isnull) 509 { 510 /* use user-defined inner consistent method */ 511 procinfo = index_getprocinfo(index, 1, SPGIST_INNER_CONSISTENT_PROC); 512 FunctionCall2Coll(procinfo, 513 index->rd_indcollation[0], 514 PointerGetDatum(&in), 515 PointerGetDatum(&out)); 516 } 517 else 518 { 519 /* force all children to be visited */ 520 out.nNodes = in.nNodes; 521 out.nodeNumbers = (int *) palloc(sizeof(int) * in.nNodes); 522 for (i = 0; i < in.nNodes; i++) 523 out.nodeNumbers[i] = i; 524 } 525 526 MemoryContextSwitchTo(oldCtx); 527 528 /* If allTheSame, they should all or none of 'em match */ 529 if (innerTuple->allTheSame) 530 if (out.nNodes != 0 && out.nNodes != in.nNodes) 531 elog(ERROR, "inconsistent inner_consistent results for allTheSame inner tuple"); 532 533 for (i = 0; i < out.nNodes; i++) 534 { 535 int nodeN = out.nodeNumbers[i]; 536 537 Assert(nodeN >= 0 && nodeN < in.nNodes); 538 if (ItemPointerIsValid(&nodes[nodeN]->t_tid)) 539 { 540 ScanStackEntry *newEntry; 541 542 /* Create new work item for this node */ 543 newEntry = palloc(sizeof(ScanStackEntry)); 544 newEntry->ptr = nodes[nodeN]->t_tid; 545 if (out.levelAdds) 546 newEntry->level = stackEntry->level + out.levelAdds[i]; 547 else 548 newEntry->level = stackEntry->level; 549 /* Must copy value out of temp context */ 550 if (out.reconstructedValues) 551 newEntry->reconstructedValue = 552 datumCopy(out.reconstructedValues[i], 553 so->state.attLeafType.attbyval, 554 so->state.attLeafType.attlen); 555 else 556 newEntry->reconstructedValue = (Datum) 0; 557 558 /* 559 * Elements of out.traversalValues should be allocated in 560 * in.traversalMemoryContext, which is actually a long 561 * lived context of index scan. 562 */ 563 newEntry->traversalValue = (out.traversalValues) ? 564 out.traversalValues[i] : NULL; 565 566 so->scanStack = lcons(newEntry, so->scanStack); 567 } 568 } 569 } 570 571 /* done with this scan stack entry */ 572 freeScanStackEntry(so, stackEntry); 573 /* clear temp context before proceeding to the next one */ 574 MemoryContextReset(so->tempCxt); 575 } 576 577 if (buffer != InvalidBuffer) 578 UnlockReleaseBuffer(buffer); 579 } 580 581 /* storeRes subroutine for getbitmap case */ 582 static void 583 storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr, 584 Datum leafValue, bool isnull, bool recheck) 585 { 586 tbm_add_tuples(so->tbm, heapPtr, 1, recheck); 587 so->ntids++; 588 } 589 590 int64 591 spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm) 592 { 593 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque; 594 595 /* Copy want_itup to *so so we don't need to pass it around separately */ 596 so->want_itup = false; 597 598 so->tbm = tbm; 599 so->ntids = 0; 600 601 spgWalk(scan->indexRelation, so, true, storeBitmap, scan->xs_snapshot); 602 603 return so->ntids; 604 } 605 606 /* storeRes subroutine for gettuple case */ 607 static void 608 storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr, 609 Datum leafValue, bool isnull, bool recheck) 610 { 611 Assert(so->nPtrs < MaxIndexTuplesPerPage); 612 so->heapPtrs[so->nPtrs] = *heapPtr; 613 so->recheck[so->nPtrs] = recheck; 614 if (so->want_itup) 615 { 616 /* 617 * Reconstruct index data. We have to copy the datum out of the temp 618 * context anyway, so we may as well create the tuple here. 619 */ 620 so->reconTups[so->nPtrs] = heap_form_tuple(so->indexTupDesc, 621 &leafValue, 622 &isnull); 623 } 624 so->nPtrs++; 625 } 626 627 bool 628 spggettuple(IndexScanDesc scan, ScanDirection dir) 629 { 630 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque; 631 632 if (dir != ForwardScanDirection) 633 elog(ERROR, "SP-GiST only supports forward scan direction"); 634 635 /* Copy want_itup to *so so we don't need to pass it around separately */ 636 so->want_itup = scan->xs_want_itup; 637 638 for (;;) 639 { 640 if (so->iPtr < so->nPtrs) 641 { 642 /* continuing to return tuples from a leaf page */ 643 scan->xs_ctup.t_self = so->heapPtrs[so->iPtr]; 644 scan->xs_recheck = so->recheck[so->iPtr]; 645 scan->xs_hitup = so->reconTups[so->iPtr]; 646 so->iPtr++; 647 return true; 648 } 649 650 if (so->want_itup) 651 { 652 /* Must pfree reconstructed tuples to avoid memory leak */ 653 int i; 654 655 for (i = 0; i < so->nPtrs; i++) 656 pfree(so->reconTups[i]); 657 } 658 so->iPtr = so->nPtrs = 0; 659 660 spgWalk(scan->indexRelation, so, false, storeGettuple, 661 scan->xs_snapshot); 662 663 if (so->nPtrs == 0) 664 break; /* must have completed scan */ 665 } 666 667 return false; 668 } 669 670 bool 671 spgcanreturn(Relation index, int attno) 672 { 673 SpGistCache *cache; 674 675 /* We can do it if the opclass config function says so */ 676 cache = spgGetCache(index); 677 678 return cache->config.canReturnData; 679 } 680