1 /*-------------------------------------------------------------------------
2 *
3 * spgscan.c
4 * routines for scanning SP-GiST indexes
5 *
6 *
7 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/spgist/spgscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include "access/relscan.h"
19 #include "access/spgist_private.h"
20 #include "miscadmin.h"
21 #include "pgstat.h"
22 #include "storage/bufmgr.h"
23 #include "utils/datum.h"
24 #include "utils/memutils.h"
25 #include "utils/rel.h"
26
27
28 typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr,
29 Datum leafValue, bool isnull, bool recheck);
30
31 typedef struct ScanStackEntry
32 {
33 Datum reconstructedValue; /* value reconstructed from parent */
34 void *traversalValue; /* opclass-specific traverse value */
35 int level; /* level of items on this page */
36 ItemPointerData ptr; /* block and offset to scan from */
37 } ScanStackEntry;
38
39
40 /* Free a ScanStackEntry */
41 static void
freeScanStackEntry(SpGistScanOpaque so,ScanStackEntry * stackEntry)42 freeScanStackEntry(SpGistScanOpaque so, ScanStackEntry *stackEntry)
43 {
44 if (!so->state.attType.attbyval &&
45 DatumGetPointer(stackEntry->reconstructedValue) != NULL)
46 pfree(DatumGetPointer(stackEntry->reconstructedValue));
47 if (stackEntry->traversalValue)
48 pfree(stackEntry->traversalValue);
49
50 pfree(stackEntry);
51 }
52
53 /* Free the entire stack */
54 static void
freeScanStack(SpGistScanOpaque so)55 freeScanStack(SpGistScanOpaque so)
56 {
57 ListCell *lc;
58
59 foreach(lc, so->scanStack)
60 {
61 freeScanStackEntry(so, (ScanStackEntry *) lfirst(lc));
62 }
63 list_free(so->scanStack);
64 so->scanStack = NIL;
65 }
66
67 /*
68 * Initialize scanStack to search the root page, resetting
69 * any previously active scan
70 */
71 static void
resetSpGistScanOpaque(SpGistScanOpaque so)72 resetSpGistScanOpaque(SpGistScanOpaque so)
73 {
74 ScanStackEntry *startEntry;
75
76 freeScanStack(so);
77
78 /*
79 * clear traversal context before proceeding to the next scan; this must
80 * not happen before the freeScanStack above, else we get double-free
81 * crashes.
82 */
83 MemoryContextReset(so->traversalCxt);
84
85 if (so->searchNulls)
86 {
87 /* Stack a work item to scan the null index entries */
88 startEntry = (ScanStackEntry *) palloc0(sizeof(ScanStackEntry));
89 ItemPointerSet(&startEntry->ptr, SPGIST_NULL_BLKNO, FirstOffsetNumber);
90 so->scanStack = lappend(so->scanStack, startEntry);
91 }
92
93 if (so->searchNonNulls)
94 {
95 /* Stack a work item to scan the non-null index entries */
96 startEntry = (ScanStackEntry *) palloc0(sizeof(ScanStackEntry));
97 ItemPointerSet(&startEntry->ptr, SPGIST_ROOT_BLKNO, FirstOffsetNumber);
98 so->scanStack = lappend(so->scanStack, startEntry);
99 }
100
101 if (so->want_itup)
102 {
103 /* Must pfree reconstructed tuples to avoid memory leak */
104 int i;
105
106 for (i = 0; i < so->nPtrs; i++)
107 pfree(so->reconTups[i]);
108 }
109 so->iPtr = so->nPtrs = 0;
110 }
111
112 /*
113 * Prepare scan keys in SpGistScanOpaque from caller-given scan keys
114 *
115 * Sets searchNulls, searchNonNulls, numberOfKeys, keyData fields of *so.
116 *
117 * The point here is to eliminate null-related considerations from what the
118 * opclass consistent functions need to deal with. We assume all SPGiST-
119 * indexable operators are strict, so any null RHS value makes the scan
120 * condition unsatisfiable. We also pull out any IS NULL/IS NOT NULL
121 * conditions; their effect is reflected into searchNulls/searchNonNulls.
122 */
123 static void
spgPrepareScanKeys(IndexScanDesc scan)124 spgPrepareScanKeys(IndexScanDesc scan)
125 {
126 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
127 bool qual_ok;
128 bool haveIsNull;
129 bool haveNotNull;
130 int nkeys;
131 int i;
132
133 if (scan->numberOfKeys <= 0)
134 {
135 /* If no quals, whole-index scan is required */
136 so->searchNulls = true;
137 so->searchNonNulls = true;
138 so->numberOfKeys = 0;
139 return;
140 }
141
142 /* Examine the given quals */
143 qual_ok = true;
144 haveIsNull = haveNotNull = false;
145 nkeys = 0;
146 for (i = 0; i < scan->numberOfKeys; i++)
147 {
148 ScanKey skey = &scan->keyData[i];
149
150 if (skey->sk_flags & SK_SEARCHNULL)
151 haveIsNull = true;
152 else if (skey->sk_flags & SK_SEARCHNOTNULL)
153 haveNotNull = true;
154 else if (skey->sk_flags & SK_ISNULL)
155 {
156 /* ordinary qual with null argument - unsatisfiable */
157 qual_ok = false;
158 break;
159 }
160 else
161 {
162 /* ordinary qual, propagate into so->keyData */
163 so->keyData[nkeys++] = *skey;
164 /* this effectively creates a not-null requirement */
165 haveNotNull = true;
166 }
167 }
168
169 /* IS NULL in combination with something else is unsatisfiable */
170 if (haveIsNull && haveNotNull)
171 qual_ok = false;
172
173 /* Emit results */
174 if (qual_ok)
175 {
176 so->searchNulls = haveIsNull;
177 so->searchNonNulls = haveNotNull;
178 so->numberOfKeys = nkeys;
179 }
180 else
181 {
182 so->searchNulls = false;
183 so->searchNonNulls = false;
184 so->numberOfKeys = 0;
185 }
186 }
187
188 IndexScanDesc
spgbeginscan(Relation rel,int keysz,int orderbysz)189 spgbeginscan(Relation rel, int keysz, int orderbysz)
190 {
191 IndexScanDesc scan;
192 SpGistScanOpaque so;
193
194 scan = RelationGetIndexScan(rel, keysz, 0);
195
196 so = (SpGistScanOpaque) palloc0(sizeof(SpGistScanOpaqueData));
197 if (keysz > 0)
198 so->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * keysz);
199 else
200 so->keyData = NULL;
201 initSpGistState(&so->state, scan->indexRelation);
202 so->tempCxt = AllocSetContextCreate(CurrentMemoryContext,
203 "SP-GiST search temporary context",
204 ALLOCSET_DEFAULT_SIZES);
205 so->traversalCxt = AllocSetContextCreate(CurrentMemoryContext,
206 "SP-GiST traversal-value context",
207 ALLOCSET_DEFAULT_SIZES);
208
209 /* Set up indexTupDesc and xs_hitupdesc in case it's an index-only scan */
210 so->indexTupDesc = scan->xs_hitupdesc = RelationGetDescr(rel);
211
212 scan->opaque = so;
213
214 return scan;
215 }
216
217 void
spgrescan(IndexScanDesc scan,ScanKey scankey,int nscankeys,ScanKey orderbys,int norderbys)218 spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
219 ScanKey orderbys, int norderbys)
220 {
221 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
222
223 /* copy scankeys into local storage */
224 if (scankey && scan->numberOfKeys > 0)
225 {
226 memmove(scan->keyData, scankey,
227 scan->numberOfKeys * sizeof(ScanKeyData));
228 }
229
230 /* preprocess scankeys, set up the representation in *so */
231 spgPrepareScanKeys(scan);
232
233 /* set up starting stack entries */
234 resetSpGistScanOpaque(so);
235
236 /* count an indexscan for stats */
237 pgstat_count_index_scan(scan->indexRelation);
238 }
239
240 void
spgendscan(IndexScanDesc scan)241 spgendscan(IndexScanDesc scan)
242 {
243 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
244
245 MemoryContextDelete(so->tempCxt);
246 MemoryContextDelete(so->traversalCxt);
247
248 if (so->keyData)
249 pfree(so->keyData);
250
251 if (so->state.deadTupleStorage)
252 pfree(so->state.deadTupleStorage);
253
254 pfree(so);
255 }
256
257 /*
258 * Test whether a leaf tuple satisfies all the scan keys
259 *
260 * *leafValue is set to the reconstructed datum, if provided
261 * *recheck is set true if any of the operators are lossy
262 */
263 static bool
spgLeafTest(Relation index,SpGistScanOpaque so,SpGistLeafTuple leafTuple,bool isnull,int level,Datum reconstructedValue,void * traversalValue,Datum * leafValue,bool * recheck)264 spgLeafTest(Relation index, SpGistScanOpaque so,
265 SpGistLeafTuple leafTuple, bool isnull,
266 int level, Datum reconstructedValue,
267 void *traversalValue,
268 Datum *leafValue, bool *recheck)
269 {
270 bool result;
271 Datum leafDatum;
272 spgLeafConsistentIn in;
273 spgLeafConsistentOut out;
274 FmgrInfo *procinfo;
275 MemoryContext oldCtx;
276
277 if (isnull)
278 {
279 /* Should not have arrived on a nulls page unless nulls are wanted */
280 Assert(so->searchNulls);
281 *leafValue = (Datum) 0;
282 *recheck = false;
283 return true;
284 }
285
286 leafDatum = SGLTDATUM(leafTuple, &so->state);
287
288 /* use temp context for calling leaf_consistent */
289 oldCtx = MemoryContextSwitchTo(so->tempCxt);
290
291 in.scankeys = so->keyData;
292 in.nkeys = so->numberOfKeys;
293 in.reconstructedValue = reconstructedValue;
294 in.traversalValue = traversalValue;
295 in.level = level;
296 in.returnData = so->want_itup;
297 in.leafDatum = leafDatum;
298
299 out.leafValue = (Datum) 0;
300 out.recheck = false;
301
302 procinfo = index_getprocinfo(index, 1, SPGIST_LEAF_CONSISTENT_PROC);
303 result = DatumGetBool(FunctionCall2Coll(procinfo,
304 index->rd_indcollation[0],
305 PointerGetDatum(&in),
306 PointerGetDatum(&out)));
307
308 *leafValue = out.leafValue;
309 *recheck = out.recheck;
310
311 MemoryContextSwitchTo(oldCtx);
312
313 return result;
314 }
315
316 /*
317 * Walk the tree and report all tuples passing the scan quals to the storeRes
318 * subroutine.
319 *
320 * If scanWholeIndex is true, we'll do just that. If not, we'll stop at the
321 * next page boundary once we have reported at least one tuple.
322 */
323 static void
spgWalk(Relation index,SpGistScanOpaque so,bool scanWholeIndex,storeRes_func storeRes,Snapshot snapshot)324 spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex,
325 storeRes_func storeRes, Snapshot snapshot)
326 {
327 Buffer buffer = InvalidBuffer;
328 bool reportedSome = false;
329
330 while (scanWholeIndex || !reportedSome)
331 {
332 ScanStackEntry *stackEntry;
333 BlockNumber blkno;
334 OffsetNumber offset;
335 Page page;
336 bool isnull;
337
338 /* Pull next to-do item from the list */
339 if (so->scanStack == NIL)
340 break; /* there are no more pages to scan */
341
342 stackEntry = (ScanStackEntry *) linitial(so->scanStack);
343 so->scanStack = list_delete_first(so->scanStack);
344
345 redirect:
346 /* Check for interrupts, just in case of infinite loop */
347 CHECK_FOR_INTERRUPTS();
348
349 blkno = ItemPointerGetBlockNumber(&stackEntry->ptr);
350 offset = ItemPointerGetOffsetNumber(&stackEntry->ptr);
351
352 if (buffer == InvalidBuffer)
353 {
354 buffer = ReadBuffer(index, blkno);
355 LockBuffer(buffer, BUFFER_LOCK_SHARE);
356 }
357 else if (blkno != BufferGetBlockNumber(buffer))
358 {
359 UnlockReleaseBuffer(buffer);
360 buffer = ReadBuffer(index, blkno);
361 LockBuffer(buffer, BUFFER_LOCK_SHARE);
362 }
363 /* else new pointer points to the same page, no work needed */
364
365 page = BufferGetPage(buffer);
366 TestForOldSnapshot(snapshot, index, page);
367
368 isnull = SpGistPageStoresNulls(page) ? true : false;
369
370 if (SpGistPageIsLeaf(page))
371 {
372 SpGistLeafTuple leafTuple;
373 OffsetNumber max = PageGetMaxOffsetNumber(page);
374 Datum leafValue = (Datum) 0;
375 bool recheck = false;
376
377 if (SpGistBlockIsRoot(blkno))
378 {
379 /* When root is a leaf, examine all its tuples */
380 for (offset = FirstOffsetNumber; offset <= max; offset++)
381 {
382 leafTuple = (SpGistLeafTuple)
383 PageGetItem(page, PageGetItemId(page, offset));
384 if (leafTuple->tupstate != SPGIST_LIVE)
385 {
386 /* all tuples on root should be live */
387 elog(ERROR, "unexpected SPGiST tuple state: %d",
388 leafTuple->tupstate);
389 }
390
391 Assert(ItemPointerIsValid(&leafTuple->heapPtr));
392 if (spgLeafTest(index, so,
393 leafTuple, isnull,
394 stackEntry->level,
395 stackEntry->reconstructedValue,
396 stackEntry->traversalValue,
397 &leafValue,
398 &recheck))
399 {
400 storeRes(so, &leafTuple->heapPtr,
401 leafValue, isnull, recheck);
402 reportedSome = true;
403 }
404 }
405 }
406 else
407 {
408 /* Normal case: just examine the chain we arrived at */
409 while (offset != InvalidOffsetNumber)
410 {
411 Assert(offset >= FirstOffsetNumber && offset <= max);
412 leafTuple = (SpGistLeafTuple)
413 PageGetItem(page, PageGetItemId(page, offset));
414 if (leafTuple->tupstate != SPGIST_LIVE)
415 {
416 if (leafTuple->tupstate == SPGIST_REDIRECT)
417 {
418 /* redirection tuple should be first in chain */
419 Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr));
420 /* transfer attention to redirect point */
421 stackEntry->ptr = ((SpGistDeadTuple) leafTuple)->pointer;
422 Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO);
423 goto redirect;
424 }
425 if (leafTuple->tupstate == SPGIST_DEAD)
426 {
427 /* dead tuple should be first in chain */
428 Assert(offset == ItemPointerGetOffsetNumber(&stackEntry->ptr));
429 /* No live entries on this page */
430 Assert(leafTuple->nextOffset == InvalidOffsetNumber);
431 break;
432 }
433 /* We should not arrive at a placeholder */
434 elog(ERROR, "unexpected SPGiST tuple state: %d",
435 leafTuple->tupstate);
436 }
437
438 Assert(ItemPointerIsValid(&leafTuple->heapPtr));
439 if (spgLeafTest(index, so,
440 leafTuple, isnull,
441 stackEntry->level,
442 stackEntry->reconstructedValue,
443 stackEntry->traversalValue,
444 &leafValue,
445 &recheck))
446 {
447 storeRes(so, &leafTuple->heapPtr,
448 leafValue, isnull, recheck);
449 reportedSome = true;
450 }
451
452 offset = leafTuple->nextOffset;
453 }
454 }
455 }
456 else /* page is inner */
457 {
458 SpGistInnerTuple innerTuple;
459 spgInnerConsistentIn in;
460 spgInnerConsistentOut out;
461 FmgrInfo *procinfo;
462 SpGistNodeTuple *nodes;
463 SpGistNodeTuple node;
464 int i;
465 MemoryContext oldCtx;
466
467 innerTuple = (SpGistInnerTuple) PageGetItem(page,
468 PageGetItemId(page, offset));
469
470 if (innerTuple->tupstate != SPGIST_LIVE)
471 {
472 if (innerTuple->tupstate == SPGIST_REDIRECT)
473 {
474 /* transfer attention to redirect point */
475 stackEntry->ptr = ((SpGistDeadTuple) innerTuple)->pointer;
476 Assert(ItemPointerGetBlockNumber(&stackEntry->ptr) != SPGIST_METAPAGE_BLKNO);
477 goto redirect;
478 }
479 elog(ERROR, "unexpected SPGiST tuple state: %d",
480 innerTuple->tupstate);
481 }
482
483 /* use temp context for calling inner_consistent */
484 oldCtx = MemoryContextSwitchTo(so->tempCxt);
485
486 in.scankeys = so->keyData;
487 in.nkeys = so->numberOfKeys;
488 in.reconstructedValue = stackEntry->reconstructedValue;
489 in.traversalMemoryContext = so->traversalCxt;
490 in.traversalValue = stackEntry->traversalValue;
491 in.level = stackEntry->level;
492 in.returnData = so->want_itup;
493 in.allTheSame = innerTuple->allTheSame;
494 in.hasPrefix = (innerTuple->prefixSize > 0);
495 in.prefixDatum = SGITDATUM(innerTuple, &so->state);
496 in.nNodes = innerTuple->nNodes;
497 in.nodeLabels = spgExtractNodeLabels(&so->state, innerTuple);
498
499 /* collect node pointers */
500 nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) * in.nNodes);
501 SGITITERATE(innerTuple, i, node)
502 {
503 nodes[i] = node;
504 }
505
506 memset(&out, 0, sizeof(out));
507
508 if (!isnull)
509 {
510 /* use user-defined inner consistent method */
511 procinfo = index_getprocinfo(index, 1, SPGIST_INNER_CONSISTENT_PROC);
512 FunctionCall2Coll(procinfo,
513 index->rd_indcollation[0],
514 PointerGetDatum(&in),
515 PointerGetDatum(&out));
516 }
517 else
518 {
519 /* force all children to be visited */
520 out.nNodes = in.nNodes;
521 out.nodeNumbers = (int *) palloc(sizeof(int) * in.nNodes);
522 for (i = 0; i < in.nNodes; i++)
523 out.nodeNumbers[i] = i;
524 }
525
526 MemoryContextSwitchTo(oldCtx);
527
528 /* If allTheSame, they should all or none of 'em match */
529 if (innerTuple->allTheSame)
530 if (out.nNodes != 0 && out.nNodes != in.nNodes)
531 elog(ERROR, "inconsistent inner_consistent results for allTheSame inner tuple");
532
533 for (i = 0; i < out.nNodes; i++)
534 {
535 int nodeN = out.nodeNumbers[i];
536
537 Assert(nodeN >= 0 && nodeN < in.nNodes);
538 if (ItemPointerIsValid(&nodes[nodeN]->t_tid))
539 {
540 ScanStackEntry *newEntry;
541
542 /* Create new work item for this node */
543 newEntry = palloc(sizeof(ScanStackEntry));
544 newEntry->ptr = nodes[nodeN]->t_tid;
545 if (out.levelAdds)
546 newEntry->level = stackEntry->level + out.levelAdds[i];
547 else
548 newEntry->level = stackEntry->level;
549 /* Must copy value out of temp context */
550 if (out.reconstructedValues)
551 newEntry->reconstructedValue =
552 datumCopy(out.reconstructedValues[i],
553 so->state.attType.attbyval,
554 so->state.attType.attlen);
555 else
556 newEntry->reconstructedValue = (Datum) 0;
557
558 /*
559 * Elements of out.traversalValues should be allocated in
560 * in.traversalMemoryContext, which is actually a long
561 * lived context of index scan.
562 */
563 newEntry->traversalValue = (out.traversalValues) ?
564 out.traversalValues[i] : NULL;
565
566 so->scanStack = lcons(newEntry, so->scanStack);
567 }
568 }
569 }
570
571 /* done with this scan stack entry */
572 freeScanStackEntry(so, stackEntry);
573 /* clear temp context before proceeding to the next one */
574 MemoryContextReset(so->tempCxt);
575 }
576
577 if (buffer != InvalidBuffer)
578 UnlockReleaseBuffer(buffer);
579 }
580
581 /* storeRes subroutine for getbitmap case */
582 static void
storeBitmap(SpGistScanOpaque so,ItemPointer heapPtr,Datum leafValue,bool isnull,bool recheck)583 storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr,
584 Datum leafValue, bool isnull, bool recheck)
585 {
586 tbm_add_tuples(so->tbm, heapPtr, 1, recheck);
587 so->ntids++;
588 }
589
590 int64
spggetbitmap(IndexScanDesc scan,TIDBitmap * tbm)591 spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
592 {
593 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
594
595 /* Copy want_itup to *so so we don't need to pass it around separately */
596 so->want_itup = false;
597
598 so->tbm = tbm;
599 so->ntids = 0;
600
601 spgWalk(scan->indexRelation, so, true, storeBitmap, scan->xs_snapshot);
602
603 return so->ntids;
604 }
605
606 /* storeRes subroutine for gettuple case */
607 static void
storeGettuple(SpGistScanOpaque so,ItemPointer heapPtr,Datum leafValue,bool isnull,bool recheck)608 storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr,
609 Datum leafValue, bool isnull, bool recheck)
610 {
611 Assert(so->nPtrs < MaxIndexTuplesPerPage);
612 so->heapPtrs[so->nPtrs] = *heapPtr;
613 so->recheck[so->nPtrs] = recheck;
614 if (so->want_itup)
615 {
616 /*
617 * Reconstruct index data. We have to copy the datum out of the temp
618 * context anyway, so we may as well create the tuple here.
619 */
620 so->reconTups[so->nPtrs] = heap_form_tuple(so->indexTupDesc,
621 &leafValue,
622 &isnull);
623 }
624 so->nPtrs++;
625 }
626
627 bool
spggettuple(IndexScanDesc scan,ScanDirection dir)628 spggettuple(IndexScanDesc scan, ScanDirection dir)
629 {
630 SpGistScanOpaque so = (SpGistScanOpaque) scan->opaque;
631
632 if (dir != ForwardScanDirection)
633 elog(ERROR, "SP-GiST only supports forward scan direction");
634
635 /* Copy want_itup to *so so we don't need to pass it around separately */
636 so->want_itup = scan->xs_want_itup;
637
638 for (;;)
639 {
640 if (so->iPtr < so->nPtrs)
641 {
642 /* continuing to return tuples from a leaf page */
643 scan->xs_ctup.t_self = so->heapPtrs[so->iPtr];
644 scan->xs_recheck = so->recheck[so->iPtr];
645 scan->xs_hitup = so->reconTups[so->iPtr];
646 so->iPtr++;
647 return true;
648 }
649
650 if (so->want_itup)
651 {
652 /* Must pfree reconstructed tuples to avoid memory leak */
653 int i;
654
655 for (i = 0; i < so->nPtrs; i++)
656 pfree(so->reconTups[i]);
657 }
658 so->iPtr = so->nPtrs = 0;
659
660 spgWalk(scan->indexRelation, so, false, storeGettuple,
661 scan->xs_snapshot);
662
663 if (so->nPtrs == 0)
664 break; /* must have completed scan */
665 }
666
667 return false;
668 }
669
670 bool
spgcanreturn(Relation index,int attno)671 spgcanreturn(Relation index, int attno)
672 {
673 SpGistCache *cache;
674
675 /* We can do it if the opclass config function says so */
676 cache = spgGetCache(index);
677
678 return cache->config.canReturnData;
679 }
680