1 /*-------------------------------------------------------------------------
2 *
3 * nodeTidscan.c
4 * Routines to support direct tid scans of relations
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeTidscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /*
16 * INTERFACE ROUTINES
17 *
18 * ExecTidScan scans a relation using tids
19 * ExecInitTidScan creates and initializes state info.
20 * ExecReScanTidScan rescans the tid relation.
21 * ExecEndTidScan releases all storage.
22 */
23 #include "postgres.h"
24
25 #include "access/sysattr.h"
26 #include "access/tableam.h"
27 #include "catalog/pg_type.h"
28 #include "executor/execdebug.h"
29 #include "executor/nodeTidscan.h"
30 #include "miscadmin.h"
31 #include "nodes/nodeFuncs.h"
32 #include "storage/bufmgr.h"
33 #include "utils/array.h"
34 #include "utils/rel.h"
35
36
37 #define IsCTIDVar(node) \
38 ((node) != NULL && \
39 IsA((node), Var) && \
40 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
41 ((Var *) (node))->varlevelsup == 0)
42
43 /* one element in tss_tidexprs */
44 typedef struct TidExpr
45 {
46 ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
47 bool isarray; /* if true, it yields tid[] not just tid */
48 CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
49 } TidExpr;
50
51 static void TidExprListCreate(TidScanState *tidstate);
52 static void TidListEval(TidScanState *tidstate);
53 static int itemptr_comparator(const void *a, const void *b);
54 static TupleTableSlot *TidNext(TidScanState *node);
55
56
57 /*
58 * Extract the qual subexpressions that yield TIDs to search for,
59 * and compile them into ExprStates if they're ordinary expressions.
60 *
61 * CURRENT OF is a special case that we can't compile usefully;
62 * just drop it into the TidExpr list as-is.
63 */
64 static void
TidExprListCreate(TidScanState * tidstate)65 TidExprListCreate(TidScanState *tidstate)
66 {
67 TidScan *node = (TidScan *) tidstate->ss.ps.plan;
68 ListCell *l;
69
70 tidstate->tss_tidexprs = NIL;
71 tidstate->tss_isCurrentOf = false;
72
73 foreach(l, node->tidquals)
74 {
75 Expr *expr = (Expr *) lfirst(l);
76 TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
77
78 if (is_opclause(expr))
79 {
80 Node *arg1;
81 Node *arg2;
82
83 arg1 = get_leftop(expr);
84 arg2 = get_rightop(expr);
85 if (IsCTIDVar(arg1))
86 tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
87 &tidstate->ss.ps);
88 else if (IsCTIDVar(arg2))
89 tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
90 &tidstate->ss.ps);
91 else
92 elog(ERROR, "could not identify CTID variable");
93 tidexpr->isarray = false;
94 }
95 else if (expr && IsA(expr, ScalarArrayOpExpr))
96 {
97 ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
98
99 Assert(IsCTIDVar(linitial(saex->args)));
100 tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
101 &tidstate->ss.ps);
102 tidexpr->isarray = true;
103 }
104 else if (expr && IsA(expr, CurrentOfExpr))
105 {
106 CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
107
108 tidexpr->cexpr = cexpr;
109 tidstate->tss_isCurrentOf = true;
110 }
111 else
112 elog(ERROR, "could not identify CTID expression");
113
114 tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
115 }
116
117 /* CurrentOfExpr could never appear OR'd with something else */
118 Assert(list_length(tidstate->tss_tidexprs) == 1 ||
119 !tidstate->tss_isCurrentOf);
120 }
121
122 /*
123 * Compute the list of TIDs to be visited, by evaluating the expressions
124 * for them.
125 *
126 * (The result is actually an array, not a list.)
127 */
128 static void
TidListEval(TidScanState * tidstate)129 TidListEval(TidScanState *tidstate)
130 {
131 ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
132 TableScanDesc scan;
133 ItemPointerData *tidList;
134 int numAllocTids;
135 int numTids;
136 ListCell *l;
137
138 /*
139 * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
140 * the size of the table), so it makes sense to delay that until needed -
141 * the node might never get executed.
142 */
143 if (tidstate->ss.ss_currentScanDesc == NULL)
144 tidstate->ss.ss_currentScanDesc =
145 table_beginscan_tid(tidstate->ss.ss_currentRelation,
146 tidstate->ss.ps.state->es_snapshot);
147 scan = tidstate->ss.ss_currentScanDesc;
148
149 /*
150 * We initialize the array with enough slots for the case that all quals
151 * are simple OpExprs or CurrentOfExprs. If there are any
152 * ScalarArrayOpExprs, we may have to enlarge the array.
153 */
154 numAllocTids = list_length(tidstate->tss_tidexprs);
155 tidList = (ItemPointerData *)
156 palloc(numAllocTids * sizeof(ItemPointerData));
157 numTids = 0;
158
159 foreach(l, tidstate->tss_tidexprs)
160 {
161 TidExpr *tidexpr = (TidExpr *) lfirst(l);
162 ItemPointer itemptr;
163 bool isNull;
164
165 if (tidexpr->exprstate && !tidexpr->isarray)
166 {
167 itemptr = (ItemPointer)
168 DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
169 econtext,
170 &isNull));
171 if (isNull)
172 continue;
173
174 /*
175 * We silently discard any TIDs that the AM considers invalid
176 * (E.g. for heap, they could be out of range at the time of scan
177 * start. Since we hold at least AccessShareLock on the table, it
178 * won't be possible for someone to truncate away the blocks we
179 * intend to visit.).
180 */
181 if (!table_tuple_tid_valid(scan, itemptr))
182 continue;
183
184 if (numTids >= numAllocTids)
185 {
186 numAllocTids *= 2;
187 tidList = (ItemPointerData *)
188 repalloc(tidList,
189 numAllocTids * sizeof(ItemPointerData));
190 }
191 tidList[numTids++] = *itemptr;
192 }
193 else if (tidexpr->exprstate && tidexpr->isarray)
194 {
195 Datum arraydatum;
196 ArrayType *itemarray;
197 Datum *ipdatums;
198 bool *ipnulls;
199 int ndatums;
200 int i;
201
202 arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
203 econtext,
204 &isNull);
205 if (isNull)
206 continue;
207 itemarray = DatumGetArrayTypeP(arraydatum);
208 deconstruct_array(itemarray,
209 TIDOID, sizeof(ItemPointerData), false, 's',
210 &ipdatums, &ipnulls, &ndatums);
211 if (numTids + ndatums > numAllocTids)
212 {
213 numAllocTids = numTids + ndatums;
214 tidList = (ItemPointerData *)
215 repalloc(tidList,
216 numAllocTids * sizeof(ItemPointerData));
217 }
218 for (i = 0; i < ndatums; i++)
219 {
220 if (ipnulls[i])
221 continue;
222
223 itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
224
225 if (!table_tuple_tid_valid(scan, itemptr))
226 continue;
227
228 tidList[numTids++] = *itemptr;
229 }
230 pfree(ipdatums);
231 pfree(ipnulls);
232 }
233 else
234 {
235 ItemPointerData cursor_tid;
236
237 Assert(tidexpr->cexpr);
238 if (execCurrentOf(tidexpr->cexpr, econtext,
239 RelationGetRelid(tidstate->ss.ss_currentRelation),
240 &cursor_tid))
241 {
242 if (numTids >= numAllocTids)
243 {
244 numAllocTids *= 2;
245 tidList = (ItemPointerData *)
246 repalloc(tidList,
247 numAllocTids * sizeof(ItemPointerData));
248 }
249 tidList[numTids++] = cursor_tid;
250 }
251 }
252 }
253
254 /*
255 * Sort the array of TIDs into order, and eliminate duplicates.
256 * Eliminating duplicates is necessary since we want OR semantics across
257 * the list. Sorting makes it easier to detect duplicates, and as a bonus
258 * ensures that we will visit the heap in the most efficient way.
259 */
260 if (numTids > 1)
261 {
262 int lastTid;
263 int i;
264
265 /* CurrentOfExpr could never appear OR'd with something else */
266 Assert(!tidstate->tss_isCurrentOf);
267
268 qsort((void *) tidList, numTids, sizeof(ItemPointerData),
269 itemptr_comparator);
270 lastTid = 0;
271 for (i = 1; i < numTids; i++)
272 {
273 if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
274 tidList[++lastTid] = tidList[i];
275 }
276 numTids = lastTid + 1;
277 }
278
279 tidstate->tss_TidList = tidList;
280 tidstate->tss_NumTids = numTids;
281 tidstate->tss_TidPtr = -1;
282 }
283
284 /*
285 * qsort comparator for ItemPointerData items
286 */
287 static int
itemptr_comparator(const void * a,const void * b)288 itemptr_comparator(const void *a, const void *b)
289 {
290 const ItemPointerData *ipa = (const ItemPointerData *) a;
291 const ItemPointerData *ipb = (const ItemPointerData *) b;
292 BlockNumber ba = ItemPointerGetBlockNumber(ipa);
293 BlockNumber bb = ItemPointerGetBlockNumber(ipb);
294 OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
295 OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
296
297 if (ba < bb)
298 return -1;
299 if (ba > bb)
300 return 1;
301 if (oa < ob)
302 return -1;
303 if (oa > ob)
304 return 1;
305 return 0;
306 }
307
308 /* ----------------------------------------------------------------
309 * TidNext
310 *
311 * Retrieve a tuple from the TidScan node's currentRelation
312 * using the tids in the TidScanState information.
313 *
314 * ----------------------------------------------------------------
315 */
316 static TupleTableSlot *
TidNext(TidScanState * node)317 TidNext(TidScanState *node)
318 {
319 EState *estate;
320 ScanDirection direction;
321 Snapshot snapshot;
322 TableScanDesc scan;
323 Relation heapRelation;
324 TupleTableSlot *slot;
325 ItemPointerData *tidList;
326 int numTids;
327 bool bBackward;
328
329 /*
330 * extract necessary information from tid scan node
331 */
332 estate = node->ss.ps.state;
333 direction = estate->es_direction;
334 snapshot = estate->es_snapshot;
335 heapRelation = node->ss.ss_currentRelation;
336 slot = node->ss.ss_ScanTupleSlot;
337
338 /*
339 * First time through, compute the list of TIDs to be visited
340 */
341 if (node->tss_TidList == NULL)
342 TidListEval(node);
343
344 scan = node->ss.ss_currentScanDesc;
345 tidList = node->tss_TidList;
346 numTids = node->tss_NumTids;
347
348 /*
349 * Initialize or advance scan position, depending on direction.
350 */
351 bBackward = ScanDirectionIsBackward(direction);
352 if (bBackward)
353 {
354 if (node->tss_TidPtr < 0)
355 {
356 /* initialize for backward scan */
357 node->tss_TidPtr = numTids - 1;
358 }
359 else
360 node->tss_TidPtr--;
361 }
362 else
363 {
364 if (node->tss_TidPtr < 0)
365 {
366 /* initialize for forward scan */
367 node->tss_TidPtr = 0;
368 }
369 else
370 node->tss_TidPtr++;
371 }
372
373 while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
374 {
375 ItemPointerData tid = tidList[node->tss_TidPtr];
376
377 /*
378 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
379 * since have been updated; if so, we should fetch the version that is
380 * current according to our snapshot.
381 */
382 if (node->tss_isCurrentOf)
383 table_tuple_get_latest_tid(scan, &tid);
384
385 if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
386 return slot;
387
388 /* Bad TID or failed snapshot qual; try next */
389 if (bBackward)
390 node->tss_TidPtr--;
391 else
392 node->tss_TidPtr++;
393
394 CHECK_FOR_INTERRUPTS();
395 }
396
397 /*
398 * if we get here it means the tid scan failed so we are at the end of the
399 * scan..
400 */
401 return ExecClearTuple(slot);
402 }
403
404 /*
405 * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
406 */
407 static bool
TidRecheck(TidScanState * node,TupleTableSlot * slot)408 TidRecheck(TidScanState *node, TupleTableSlot *slot)
409 {
410 /*
411 * XXX shouldn't we check here to make sure tuple matches TID list? In
412 * runtime-key case this is not certain, is it? However, in the WHERE
413 * CURRENT OF case it might not match anyway ...
414 */
415 return true;
416 }
417
418
419 /* ----------------------------------------------------------------
420 * ExecTidScan(node)
421 *
422 * Scans the relation using tids and returns
423 * the next qualifying tuple in the direction specified.
424 * We call the ExecScan() routine and pass it the appropriate
425 * access method functions.
426 *
427 * Conditions:
428 * -- the "cursor" maintained by the AMI is positioned at the tuple
429 * returned previously.
430 *
431 * Initial States:
432 * -- the relation indicated is opened for scanning so that the
433 * "cursor" is positioned before the first qualifying tuple.
434 * -- tidPtr is -1.
435 * ----------------------------------------------------------------
436 */
437 static TupleTableSlot *
ExecTidScan(PlanState * pstate)438 ExecTidScan(PlanState *pstate)
439 {
440 TidScanState *node = castNode(TidScanState, pstate);
441
442 return ExecScan(&node->ss,
443 (ExecScanAccessMtd) TidNext,
444 (ExecScanRecheckMtd) TidRecheck);
445 }
446
447 /* ----------------------------------------------------------------
448 * ExecReScanTidScan(node)
449 * ----------------------------------------------------------------
450 */
451 void
ExecReScanTidScan(TidScanState * node)452 ExecReScanTidScan(TidScanState *node)
453 {
454 if (node->tss_TidList)
455 pfree(node->tss_TidList);
456 node->tss_TidList = NULL;
457 node->tss_NumTids = 0;
458 node->tss_TidPtr = -1;
459
460 /* not really necessary, but seems good form */
461 if (node->ss.ss_currentScanDesc)
462 table_rescan(node->ss.ss_currentScanDesc, NULL);
463
464 ExecScanReScan(&node->ss);
465 }
466
467 /* ----------------------------------------------------------------
468 * ExecEndTidScan
469 *
470 * Releases any storage allocated through C routines.
471 * Returns nothing.
472 * ----------------------------------------------------------------
473 */
474 void
ExecEndTidScan(TidScanState * node)475 ExecEndTidScan(TidScanState *node)
476 {
477 if (node->ss.ss_currentScanDesc)
478 table_endscan(node->ss.ss_currentScanDesc);
479
480 /*
481 * Free the exprcontext
482 */
483 ExecFreeExprContext(&node->ss.ps);
484
485 /*
486 * clear out tuple table slots
487 */
488 if (node->ss.ps.ps_ResultTupleSlot)
489 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
490 ExecClearTuple(node->ss.ss_ScanTupleSlot);
491 }
492
493 /* ----------------------------------------------------------------
494 * ExecInitTidScan
495 *
496 * Initializes the tid scan's state information, creates
497 * scan keys, and opens the base and tid relations.
498 *
499 * Parameters:
500 * node: TidNode node produced by the planner.
501 * estate: the execution state initialized in InitPlan.
502 * ----------------------------------------------------------------
503 */
504 TidScanState *
ExecInitTidScan(TidScan * node,EState * estate,int eflags)505 ExecInitTidScan(TidScan *node, EState *estate, int eflags)
506 {
507 TidScanState *tidstate;
508 Relation currentRelation;
509
510 /*
511 * create state structure
512 */
513 tidstate = makeNode(TidScanState);
514 tidstate->ss.ps.plan = (Plan *) node;
515 tidstate->ss.ps.state = estate;
516 tidstate->ss.ps.ExecProcNode = ExecTidScan;
517
518 /*
519 * Miscellaneous initialization
520 *
521 * create expression context for node
522 */
523 ExecAssignExprContext(estate, &tidstate->ss.ps);
524
525 /*
526 * mark tid list as not computed yet
527 */
528 tidstate->tss_TidList = NULL;
529 tidstate->tss_NumTids = 0;
530 tidstate->tss_TidPtr = -1;
531
532 /*
533 * open the scan relation
534 */
535 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
536
537 tidstate->ss.ss_currentRelation = currentRelation;
538 tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
539
540 /*
541 * get the scan type from the relation descriptor.
542 */
543 ExecInitScanTupleSlot(estate, &tidstate->ss,
544 RelationGetDescr(currentRelation),
545 table_slot_callbacks(currentRelation));
546
547 /*
548 * Initialize result type and projection.
549 */
550 ExecInitResultTypeTL(&tidstate->ss.ps);
551 ExecAssignScanProjectionInfo(&tidstate->ss);
552
553 /*
554 * initialize child expressions
555 */
556 tidstate->ss.ps.qual =
557 ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
558
559 TidExprListCreate(tidstate);
560
561 /*
562 * all done.
563 */
564 return tidstate;
565 }
566