1 /*-------------------------------------------------------------------------
2 *
3 * nodeTidscan.c
4 * Routines to support direct tid scans of relations
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeTidscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /*
16 * INTERFACE ROUTINES
17 *
18 * ExecTidScan scans a relation using tids
19 * ExecInitTidScan creates and initializes state info.
20 * ExecReScanTidScan rescans the tid relation.
21 * ExecEndTidScan releases all storage.
22 */
23 #include "postgres.h"
24
25 #include "access/sysattr.h"
26 #include "catalog/pg_type.h"
27 #include "executor/execdebug.h"
28 #include "executor/nodeTidscan.h"
29 #include "miscadmin.h"
30 #include "optimizer/clauses.h"
31 #include "storage/bufmgr.h"
32 #include "utils/array.h"
33 #include "utils/rel.h"
34
35
36 #define IsCTIDVar(node) \
37 ((node) != NULL && \
38 IsA((node), Var) && \
39 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
40 ((Var *) (node))->varlevelsup == 0)
41
42 /* one element in tss_tidexprs */
43 typedef struct TidExpr
44 {
45 ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
46 bool isarray; /* if true, it yields tid[] not just tid */
47 CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
48 } TidExpr;
49
50 static void TidExprListCreate(TidScanState *tidstate);
51 static void TidListEval(TidScanState *tidstate);
52 static int itemptr_comparator(const void *a, const void *b);
53 static TupleTableSlot *TidNext(TidScanState *node);
54
55
56 /*
57 * Extract the qual subexpressions that yield TIDs to search for,
58 * and compile them into ExprStates if they're ordinary expressions.
59 *
60 * CURRENT OF is a special case that we can't compile usefully;
61 * just drop it into the TidExpr list as-is.
62 */
63 static void
TidExprListCreate(TidScanState * tidstate)64 TidExprListCreate(TidScanState *tidstate)
65 {
66 TidScan *node = (TidScan *) tidstate->ss.ps.plan;
67 ListCell *l;
68
69 tidstate->tss_tidexprs = NIL;
70 tidstate->tss_isCurrentOf = false;
71
72 foreach(l, node->tidquals)
73 {
74 Expr *expr = (Expr *) lfirst(l);
75 TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
76
77 if (is_opclause(expr))
78 {
79 Node *arg1;
80 Node *arg2;
81
82 arg1 = get_leftop(expr);
83 arg2 = get_rightop(expr);
84 if (IsCTIDVar(arg1))
85 tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
86 &tidstate->ss.ps);
87 else if (IsCTIDVar(arg2))
88 tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
89 &tidstate->ss.ps);
90 else
91 elog(ERROR, "could not identify CTID variable");
92 tidexpr->isarray = false;
93 }
94 else if (expr && IsA(expr, ScalarArrayOpExpr))
95 {
96 ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
97
98 Assert(IsCTIDVar(linitial(saex->args)));
99 tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
100 &tidstate->ss.ps);
101 tidexpr->isarray = true;
102 }
103 else if (expr && IsA(expr, CurrentOfExpr))
104 {
105 CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
106
107 tidexpr->cexpr = cexpr;
108 tidstate->tss_isCurrentOf = true;
109 }
110 else
111 elog(ERROR, "could not identify CTID expression");
112
113 tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
114 }
115
116 /* CurrentOfExpr could never appear OR'd with something else */
117 Assert(list_length(tidstate->tss_tidexprs) == 1 ||
118 !tidstate->tss_isCurrentOf);
119 }
120
121 /*
122 * Compute the list of TIDs to be visited, by evaluating the expressions
123 * for them.
124 *
125 * (The result is actually an array, not a list.)
126 */
127 static void
TidListEval(TidScanState * tidstate)128 TidListEval(TidScanState *tidstate)
129 {
130 ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
131 BlockNumber nblocks;
132 ItemPointerData *tidList;
133 int numAllocTids;
134 int numTids;
135 ListCell *l;
136
137 /*
138 * We silently discard any TIDs that are out of range at the time of scan
139 * start. (Since we hold at least AccessShareLock on the table, it won't
140 * be possible for someone to truncate away the blocks we intend to
141 * visit.)
142 */
143 nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
144
145 /*
146 * We initialize the array with enough slots for the case that all quals
147 * are simple OpExprs or CurrentOfExprs. If there are any
148 * ScalarArrayOpExprs, we may have to enlarge the array.
149 */
150 numAllocTids = list_length(tidstate->tss_tidexprs);
151 tidList = (ItemPointerData *)
152 palloc(numAllocTids * sizeof(ItemPointerData));
153 numTids = 0;
154
155 foreach(l, tidstate->tss_tidexprs)
156 {
157 TidExpr *tidexpr = (TidExpr *) lfirst(l);
158 ItemPointer itemptr;
159 bool isNull;
160
161 if (tidexpr->exprstate && !tidexpr->isarray)
162 {
163 itemptr = (ItemPointer)
164 DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
165 econtext,
166 &isNull));
167 if (!isNull &&
168 ItemPointerIsValid(itemptr) &&
169 ItemPointerGetBlockNumber(itemptr) < nblocks)
170 {
171 if (numTids >= numAllocTids)
172 {
173 numAllocTids *= 2;
174 tidList = (ItemPointerData *)
175 repalloc(tidList,
176 numAllocTids * sizeof(ItemPointerData));
177 }
178 tidList[numTids++] = *itemptr;
179 }
180 }
181 else if (tidexpr->exprstate && tidexpr->isarray)
182 {
183 Datum arraydatum;
184 ArrayType *itemarray;
185 Datum *ipdatums;
186 bool *ipnulls;
187 int ndatums;
188 int i;
189
190 arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
191 econtext,
192 &isNull);
193 if (isNull)
194 continue;
195 itemarray = DatumGetArrayTypeP(arraydatum);
196 deconstruct_array(itemarray,
197 TIDOID, sizeof(ItemPointerData), false, 's',
198 &ipdatums, &ipnulls, &ndatums);
199 if (numTids + ndatums > numAllocTids)
200 {
201 numAllocTids = numTids + ndatums;
202 tidList = (ItemPointerData *)
203 repalloc(tidList,
204 numAllocTids * sizeof(ItemPointerData));
205 }
206 for (i = 0; i < ndatums; i++)
207 {
208 if (!ipnulls[i])
209 {
210 itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
211 if (ItemPointerIsValid(itemptr) &&
212 ItemPointerGetBlockNumber(itemptr) < nblocks)
213 tidList[numTids++] = *itemptr;
214 }
215 }
216 pfree(ipdatums);
217 pfree(ipnulls);
218 }
219 else
220 {
221 ItemPointerData cursor_tid;
222
223 Assert(tidexpr->cexpr);
224 if (execCurrentOf(tidexpr->cexpr, econtext,
225 RelationGetRelid(tidstate->ss.ss_currentRelation),
226 &cursor_tid))
227 {
228 if (numTids >= numAllocTids)
229 {
230 numAllocTids *= 2;
231 tidList = (ItemPointerData *)
232 repalloc(tidList,
233 numAllocTids * sizeof(ItemPointerData));
234 }
235 tidList[numTids++] = cursor_tid;
236 }
237 }
238 }
239
240 /*
241 * Sort the array of TIDs into order, and eliminate duplicates.
242 * Eliminating duplicates is necessary since we want OR semantics across
243 * the list. Sorting makes it easier to detect duplicates, and as a bonus
244 * ensures that we will visit the heap in the most efficient way.
245 */
246 if (numTids > 1)
247 {
248 int lastTid;
249 int i;
250
251 /* CurrentOfExpr could never appear OR'd with something else */
252 Assert(!tidstate->tss_isCurrentOf);
253
254 qsort((void *) tidList, numTids, sizeof(ItemPointerData),
255 itemptr_comparator);
256 lastTid = 0;
257 for (i = 1; i < numTids; i++)
258 {
259 if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
260 tidList[++lastTid] = tidList[i];
261 }
262 numTids = lastTid + 1;
263 }
264
265 tidstate->tss_TidList = tidList;
266 tidstate->tss_NumTids = numTids;
267 tidstate->tss_TidPtr = -1;
268 }
269
270 /*
271 * qsort comparator for ItemPointerData items
272 */
273 static int
itemptr_comparator(const void * a,const void * b)274 itemptr_comparator(const void *a, const void *b)
275 {
276 const ItemPointerData *ipa = (const ItemPointerData *) a;
277 const ItemPointerData *ipb = (const ItemPointerData *) b;
278 BlockNumber ba = ItemPointerGetBlockNumber(ipa);
279 BlockNumber bb = ItemPointerGetBlockNumber(ipb);
280 OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
281 OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
282
283 if (ba < bb)
284 return -1;
285 if (ba > bb)
286 return 1;
287 if (oa < ob)
288 return -1;
289 if (oa > ob)
290 return 1;
291 return 0;
292 }
293
294 /* ----------------------------------------------------------------
295 * TidNext
296 *
297 * Retrieve a tuple from the TidScan node's currentRelation
298 * using the tids in the TidScanState information.
299 *
300 * ----------------------------------------------------------------
301 */
302 static TupleTableSlot *
TidNext(TidScanState * node)303 TidNext(TidScanState *node)
304 {
305 EState *estate;
306 ScanDirection direction;
307 Snapshot snapshot;
308 Relation heapRelation;
309 HeapTuple tuple;
310 TupleTableSlot *slot;
311 Buffer buffer = InvalidBuffer;
312 ItemPointerData *tidList;
313 int numTids;
314 bool bBackward;
315
316 /*
317 * extract necessary information from tid scan node
318 */
319 estate = node->ss.ps.state;
320 direction = estate->es_direction;
321 snapshot = estate->es_snapshot;
322 heapRelation = node->ss.ss_currentRelation;
323 slot = node->ss.ss_ScanTupleSlot;
324
325 /*
326 * First time through, compute the list of TIDs to be visited
327 */
328 if (node->tss_TidList == NULL)
329 TidListEval(node);
330
331 tidList = node->tss_TidList;
332 numTids = node->tss_NumTids;
333
334 /*
335 * We use node->tss_htup as the tuple pointer; note this can't just be a
336 * local variable here, as the scan tuple slot will keep a pointer to it.
337 */
338 tuple = &(node->tss_htup);
339
340 /*
341 * Initialize or advance scan position, depending on direction.
342 */
343 bBackward = ScanDirectionIsBackward(direction);
344 if (bBackward)
345 {
346 if (node->tss_TidPtr < 0)
347 {
348 /* initialize for backward scan */
349 node->tss_TidPtr = numTids - 1;
350 }
351 else
352 node->tss_TidPtr--;
353 }
354 else
355 {
356 if (node->tss_TidPtr < 0)
357 {
358 /* initialize for forward scan */
359 node->tss_TidPtr = 0;
360 }
361 else
362 node->tss_TidPtr++;
363 }
364
365 while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
366 {
367 tuple->t_self = tidList[node->tss_TidPtr];
368
369 /*
370 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
371 * since have been updated; if so, we should fetch the version that is
372 * current according to our snapshot.
373 */
374 if (node->tss_isCurrentOf)
375 heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
376
377 if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
378 {
379 /*
380 * store the scanned tuple in the scan tuple slot of the scan
381 * state. Eventually we will only do this and not return a tuple.
382 * Note: we pass 'false' because tuples returned by amgetnext are
383 * pointers onto disk pages and were not created with palloc() and
384 * so should not be pfree()'d.
385 */
386 ExecStoreTuple(tuple, /* tuple to store */
387 slot, /* slot to store in */
388 buffer, /* buffer associated with tuple */
389 false); /* don't pfree */
390
391 /*
392 * At this point we have an extra pin on the buffer, because
393 * ExecStoreTuple incremented the pin count. Drop our local pin.
394 */
395 ReleaseBuffer(buffer);
396
397 return slot;
398 }
399 /* Bad TID or failed snapshot qual; try next */
400 if (bBackward)
401 node->tss_TidPtr--;
402 else
403 node->tss_TidPtr++;
404
405 CHECK_FOR_INTERRUPTS();
406 }
407
408 /*
409 * if we get here it means the tid scan failed so we are at the end of the
410 * scan..
411 */
412 return ExecClearTuple(slot);
413 }
414
415 /*
416 * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
417 */
418 static bool
TidRecheck(TidScanState * node,TupleTableSlot * slot)419 TidRecheck(TidScanState *node, TupleTableSlot *slot)
420 {
421 /*
422 * XXX shouldn't we check here to make sure tuple matches TID list? In
423 * runtime-key case this is not certain, is it? However, in the WHERE
424 * CURRENT OF case it might not match anyway ...
425 */
426 return true;
427 }
428
429
430 /* ----------------------------------------------------------------
431 * ExecTidScan(node)
432 *
433 * Scans the relation using tids and returns
434 * the next qualifying tuple in the direction specified.
435 * We call the ExecScan() routine and pass it the appropriate
436 * access method functions.
437 *
438 * Conditions:
439 * -- the "cursor" maintained by the AMI is positioned at the tuple
440 * returned previously.
441 *
442 * Initial States:
443 * -- the relation indicated is opened for scanning so that the
444 * "cursor" is positioned before the first qualifying tuple.
445 * -- tidPtr is -1.
446 * ----------------------------------------------------------------
447 */
448 static TupleTableSlot *
ExecTidScan(PlanState * pstate)449 ExecTidScan(PlanState *pstate)
450 {
451 TidScanState *node = castNode(TidScanState, pstate);
452
453 return ExecScan(&node->ss,
454 (ExecScanAccessMtd) TidNext,
455 (ExecScanRecheckMtd) TidRecheck);
456 }
457
458 /* ----------------------------------------------------------------
459 * ExecReScanTidScan(node)
460 * ----------------------------------------------------------------
461 */
462 void
ExecReScanTidScan(TidScanState * node)463 ExecReScanTidScan(TidScanState *node)
464 {
465 if (node->tss_TidList)
466 pfree(node->tss_TidList);
467 node->tss_TidList = NULL;
468 node->tss_NumTids = 0;
469 node->tss_TidPtr = -1;
470
471 ExecScanReScan(&node->ss);
472 }
473
474 /* ----------------------------------------------------------------
475 * ExecEndTidScan
476 *
477 * Releases any storage allocated through C routines.
478 * Returns nothing.
479 * ----------------------------------------------------------------
480 */
481 void
ExecEndTidScan(TidScanState * node)482 ExecEndTidScan(TidScanState *node)
483 {
484 /*
485 * Free the exprcontext
486 */
487 ExecFreeExprContext(&node->ss.ps);
488
489 /*
490 * clear out tuple table slots
491 */
492 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
493 ExecClearTuple(node->ss.ss_ScanTupleSlot);
494
495 /*
496 * close the heap relation.
497 */
498 ExecCloseScanRelation(node->ss.ss_currentRelation);
499 }
500
501 /* ----------------------------------------------------------------
502 * ExecInitTidScan
503 *
504 * Initializes the tid scan's state information, creates
505 * scan keys, and opens the base and tid relations.
506 *
507 * Parameters:
508 * node: TidNode node produced by the planner.
509 * estate: the execution state initialized in InitPlan.
510 * ----------------------------------------------------------------
511 */
512 TidScanState *
ExecInitTidScan(TidScan * node,EState * estate,int eflags)513 ExecInitTidScan(TidScan *node, EState *estate, int eflags)
514 {
515 TidScanState *tidstate;
516 Relation currentRelation;
517
518 /*
519 * create state structure
520 */
521 tidstate = makeNode(TidScanState);
522 tidstate->ss.ps.plan = (Plan *) node;
523 tidstate->ss.ps.state = estate;
524 tidstate->ss.ps.ExecProcNode = ExecTidScan;
525
526 /*
527 * Miscellaneous initialization
528 *
529 * create expression context for node
530 */
531 ExecAssignExprContext(estate, &tidstate->ss.ps);
532
533 /*
534 * initialize child expressions
535 */
536 tidstate->ss.ps.qual =
537 ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
538
539 TidExprListCreate(tidstate);
540
541 /*
542 * tuple table initialization
543 */
544 ExecInitResultTupleSlot(estate, &tidstate->ss.ps);
545 ExecInitScanTupleSlot(estate, &tidstate->ss);
546
547 /*
548 * mark tid list as not computed yet
549 */
550 tidstate->tss_TidList = NULL;
551 tidstate->tss_NumTids = 0;
552 tidstate->tss_TidPtr = -1;
553
554 /*
555 * open the base relation and acquire appropriate lock on it.
556 */
557 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
558
559 tidstate->ss.ss_currentRelation = currentRelation;
560 tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
561
562 /*
563 * get the scan type from the relation descriptor.
564 */
565 ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation));
566
567 /*
568 * Initialize result tuple type and projection info.
569 */
570 ExecAssignResultTypeFromTL(&tidstate->ss.ps);
571 ExecAssignScanProjectionInfo(&tidstate->ss);
572
573 /*
574 * all done.
575 */
576 return tidstate;
577 }
578