1 /*-------------------------------------------------------------------------
2  *
3  * nodeTidscan.c
4  *	  Routines to support direct tid scans of relations
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeTidscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *
18  *		ExecTidScan			scans a relation using tids
19  *		ExecInitTidScan		creates and initializes state info.
20  *		ExecReScanTidScan	rescans the tid relation.
21  *		ExecEndTidScan		releases all storage.
22  */
23 #include "postgres.h"
24 
25 #include "access/sysattr.h"
26 #include "access/tableam.h"
27 #include "catalog/pg_type.h"
28 #include "executor/execdebug.h"
29 #include "executor/nodeTidscan.h"
30 #include "miscadmin.h"
31 #include "nodes/nodeFuncs.h"
32 #include "storage/bufmgr.h"
33 #include "utils/array.h"
34 #include "utils/rel.h"
35 
36 
37 #define IsCTIDVar(node)  \
38 	((node) != NULL && \
39 	 IsA((node), Var) && \
40 	 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
41 	 ((Var *) (node))->varlevelsup == 0)
42 
43 /* one element in tss_tidexprs */
44 typedef struct TidExpr
45 {
46 	ExprState  *exprstate;		/* ExprState for a TID-yielding subexpr */
47 	bool		isarray;		/* if true, it yields tid[] not just tid */
48 	CurrentOfExpr *cexpr;		/* alternatively, we can have CURRENT OF */
49 } TidExpr;
50 
51 static void TidExprListCreate(TidScanState *tidstate);
52 static void TidListEval(TidScanState *tidstate);
53 static int	itemptr_comparator(const void *a, const void *b);
54 static TupleTableSlot *TidNext(TidScanState *node);
55 
56 
57 /*
58  * Extract the qual subexpressions that yield TIDs to search for,
59  * and compile them into ExprStates if they're ordinary expressions.
60  *
61  * CURRENT OF is a special case that we can't compile usefully;
62  * just drop it into the TidExpr list as-is.
63  */
64 static void
TidExprListCreate(TidScanState * tidstate)65 TidExprListCreate(TidScanState *tidstate)
66 {
67 	TidScan    *node = (TidScan *) tidstate->ss.ps.plan;
68 	ListCell   *l;
69 
70 	tidstate->tss_tidexprs = NIL;
71 	tidstate->tss_isCurrentOf = false;
72 
73 	foreach(l, node->tidquals)
74 	{
75 		Expr	   *expr = (Expr *) lfirst(l);
76 		TidExpr    *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
77 
78 		if (is_opclause(expr))
79 		{
80 			Node	   *arg1;
81 			Node	   *arg2;
82 
83 			arg1 = get_leftop(expr);
84 			arg2 = get_rightop(expr);
85 			if (IsCTIDVar(arg1))
86 				tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
87 												  &tidstate->ss.ps);
88 			else if (IsCTIDVar(arg2))
89 				tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
90 												  &tidstate->ss.ps);
91 			else
92 				elog(ERROR, "could not identify CTID variable");
93 			tidexpr->isarray = false;
94 		}
95 		else if (expr && IsA(expr, ScalarArrayOpExpr))
96 		{
97 			ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
98 
99 			Assert(IsCTIDVar(linitial(saex->args)));
100 			tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
101 											  &tidstate->ss.ps);
102 			tidexpr->isarray = true;
103 		}
104 		else if (expr && IsA(expr, CurrentOfExpr))
105 		{
106 			CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
107 
108 			tidexpr->cexpr = cexpr;
109 			tidstate->tss_isCurrentOf = true;
110 		}
111 		else
112 			elog(ERROR, "could not identify CTID expression");
113 
114 		tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
115 	}
116 
117 	/* CurrentOfExpr could never appear OR'd with something else */
118 	Assert(list_length(tidstate->tss_tidexprs) == 1 ||
119 		   !tidstate->tss_isCurrentOf);
120 }
121 
122 /*
123  * Compute the list of TIDs to be visited, by evaluating the expressions
124  * for them.
125  *
126  * (The result is actually an array, not a list.)
127  */
128 static void
TidListEval(TidScanState * tidstate)129 TidListEval(TidScanState *tidstate)
130 {
131 	ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
132 	TableScanDesc scan;
133 	ItemPointerData *tidList;
134 	int			numAllocTids;
135 	int			numTids;
136 	ListCell   *l;
137 
138 	/*
139 	 * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
140 	 * the size of the table), so it makes sense to delay that until needed -
141 	 * the node might never get executed.
142 	 */
143 	if (tidstate->ss.ss_currentScanDesc == NULL)
144 		tidstate->ss.ss_currentScanDesc =
145 			table_beginscan_tid(tidstate->ss.ss_currentRelation,
146 							tidstate->ss.ps.state->es_snapshot);
147 	scan = tidstate->ss.ss_currentScanDesc;
148 
149 	/*
150 	 * We initialize the array with enough slots for the case that all quals
151 	 * are simple OpExprs or CurrentOfExprs.  If there are any
152 	 * ScalarArrayOpExprs, we may have to enlarge the array.
153 	 */
154 	numAllocTids = list_length(tidstate->tss_tidexprs);
155 	tidList = (ItemPointerData *)
156 		palloc(numAllocTids * sizeof(ItemPointerData));
157 	numTids = 0;
158 
159 	foreach(l, tidstate->tss_tidexprs)
160 	{
161 		TidExpr    *tidexpr = (TidExpr *) lfirst(l);
162 		ItemPointer itemptr;
163 		bool		isNull;
164 
165 		if (tidexpr->exprstate && !tidexpr->isarray)
166 		{
167 			itemptr = (ItemPointer)
168 				DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
169 														  econtext,
170 														  &isNull));
171 			if (isNull)
172 				continue;
173 
174 			/*
175 			 * We silently discard any TIDs that the AM considers invalid
176 			 * (E.g. for heap, they could be out of range at the time of scan
177 			 * start.  Since we hold at least AccessShareLock on the table, it
178 			 * won't be possible for someone to truncate away the blocks we
179 			 * intend to visit.).
180 			 */
181 			if (!table_tuple_tid_valid(scan, itemptr))
182 				continue;
183 
184 			if (numTids >= numAllocTids)
185 			{
186 				numAllocTids *= 2;
187 				tidList = (ItemPointerData *)
188 					repalloc(tidList,
189 							 numAllocTids * sizeof(ItemPointerData));
190 			}
191 			tidList[numTids++] = *itemptr;
192 		}
193 		else if (tidexpr->exprstate && tidexpr->isarray)
194 		{
195 			Datum		arraydatum;
196 			ArrayType  *itemarray;
197 			Datum	   *ipdatums;
198 			bool	   *ipnulls;
199 			int			ndatums;
200 			int			i;
201 
202 			arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
203 												   econtext,
204 												   &isNull);
205 			if (isNull)
206 				continue;
207 			itemarray = DatumGetArrayTypeP(arraydatum);
208 			deconstruct_array(itemarray,
209 							  TIDOID, sizeof(ItemPointerData), false, 's',
210 							  &ipdatums, &ipnulls, &ndatums);
211 			if (numTids + ndatums > numAllocTids)
212 			{
213 				numAllocTids = numTids + ndatums;
214 				tidList = (ItemPointerData *)
215 					repalloc(tidList,
216 							 numAllocTids * sizeof(ItemPointerData));
217 			}
218 			for (i = 0; i < ndatums; i++)
219 			{
220 				if (ipnulls[i])
221 					continue;
222 
223 				itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
224 
225 				if (!table_tuple_tid_valid(scan, itemptr))
226 					continue;
227 
228 				tidList[numTids++] = *itemptr;
229 			}
230 			pfree(ipdatums);
231 			pfree(ipnulls);
232 		}
233 		else
234 		{
235 			ItemPointerData cursor_tid;
236 
237 			Assert(tidexpr->cexpr);
238 			if (execCurrentOf(tidexpr->cexpr, econtext,
239 							  RelationGetRelid(tidstate->ss.ss_currentRelation),
240 							  &cursor_tid))
241 			{
242 				if (numTids >= numAllocTids)
243 				{
244 					numAllocTids *= 2;
245 					tidList = (ItemPointerData *)
246 						repalloc(tidList,
247 								 numAllocTids * sizeof(ItemPointerData));
248 				}
249 				tidList[numTids++] = cursor_tid;
250 			}
251 		}
252 	}
253 
254 	/*
255 	 * Sort the array of TIDs into order, and eliminate duplicates.
256 	 * Eliminating duplicates is necessary since we want OR semantics across
257 	 * the list.  Sorting makes it easier to detect duplicates, and as a bonus
258 	 * ensures that we will visit the heap in the most efficient way.
259 	 */
260 	if (numTids > 1)
261 	{
262 		int			lastTid;
263 		int			i;
264 
265 		/* CurrentOfExpr could never appear OR'd with something else */
266 		Assert(!tidstate->tss_isCurrentOf);
267 
268 		qsort((void *) tidList, numTids, sizeof(ItemPointerData),
269 			  itemptr_comparator);
270 		lastTid = 0;
271 		for (i = 1; i < numTids; i++)
272 		{
273 			if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
274 				tidList[++lastTid] = tidList[i];
275 		}
276 		numTids = lastTid + 1;
277 	}
278 
279 	tidstate->tss_TidList = tidList;
280 	tidstate->tss_NumTids = numTids;
281 	tidstate->tss_TidPtr = -1;
282 }
283 
284 /*
285  * qsort comparator for ItemPointerData items
286  */
287 static int
itemptr_comparator(const void * a,const void * b)288 itemptr_comparator(const void *a, const void *b)
289 {
290 	const ItemPointerData *ipa = (const ItemPointerData *) a;
291 	const ItemPointerData *ipb = (const ItemPointerData *) b;
292 	BlockNumber ba = ItemPointerGetBlockNumber(ipa);
293 	BlockNumber bb = ItemPointerGetBlockNumber(ipb);
294 	OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
295 	OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
296 
297 	if (ba < bb)
298 		return -1;
299 	if (ba > bb)
300 		return 1;
301 	if (oa < ob)
302 		return -1;
303 	if (oa > ob)
304 		return 1;
305 	return 0;
306 }
307 
308 /* ----------------------------------------------------------------
309  *		TidNext
310  *
311  *		Retrieve a tuple from the TidScan node's currentRelation
312  *		using the tids in the TidScanState information.
313  *
314  * ----------------------------------------------------------------
315  */
316 static TupleTableSlot *
TidNext(TidScanState * node)317 TidNext(TidScanState *node)
318 {
319 	EState	   *estate;
320 	ScanDirection direction;
321 	Snapshot	snapshot;
322 	TableScanDesc scan;
323 	Relation	heapRelation;
324 	TupleTableSlot *slot;
325 	ItemPointerData *tidList;
326 	int			numTids;
327 	bool		bBackward;
328 
329 	/*
330 	 * extract necessary information from tid scan node
331 	 */
332 	estate = node->ss.ps.state;
333 	direction = estate->es_direction;
334 	snapshot = estate->es_snapshot;
335 	heapRelation = node->ss.ss_currentRelation;
336 	slot = node->ss.ss_ScanTupleSlot;
337 
338 	/*
339 	 * First time through, compute the list of TIDs to be visited
340 	 */
341 	if (node->tss_TidList == NULL)
342 		TidListEval(node);
343 
344 	scan = node->ss.ss_currentScanDesc;
345 	tidList = node->tss_TidList;
346 	numTids = node->tss_NumTids;
347 
348 	/*
349 	 * Initialize or advance scan position, depending on direction.
350 	 */
351 	bBackward = ScanDirectionIsBackward(direction);
352 	if (bBackward)
353 	{
354 		if (node->tss_TidPtr < 0)
355 		{
356 			/* initialize for backward scan */
357 			node->tss_TidPtr = numTids - 1;
358 		}
359 		else
360 			node->tss_TidPtr--;
361 	}
362 	else
363 	{
364 		if (node->tss_TidPtr < 0)
365 		{
366 			/* initialize for forward scan */
367 			node->tss_TidPtr = 0;
368 		}
369 		else
370 			node->tss_TidPtr++;
371 	}
372 
373 	while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
374 	{
375 		ItemPointerData tid = tidList[node->tss_TidPtr];
376 
377 		/*
378 		 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
379 		 * since have been updated; if so, we should fetch the version that is
380 		 * current according to our snapshot.
381 		 */
382 		if (node->tss_isCurrentOf)
383 			table_tuple_get_latest_tid(scan, &tid);
384 
385 		if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
386 			return slot;
387 
388 		/* Bad TID or failed snapshot qual; try next */
389 		if (bBackward)
390 			node->tss_TidPtr--;
391 		else
392 			node->tss_TidPtr++;
393 
394 		CHECK_FOR_INTERRUPTS();
395 	}
396 
397 	/*
398 	 * if we get here it means the tid scan failed so we are at the end of the
399 	 * scan..
400 	 */
401 	return ExecClearTuple(slot);
402 }
403 
404 /*
405  * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
406  */
407 static bool
TidRecheck(TidScanState * node,TupleTableSlot * slot)408 TidRecheck(TidScanState *node, TupleTableSlot *slot)
409 {
410 	/*
411 	 * XXX shouldn't we check here to make sure tuple matches TID list? In
412 	 * runtime-key case this is not certain, is it?  However, in the WHERE
413 	 * CURRENT OF case it might not match anyway ...
414 	 */
415 	return true;
416 }
417 
418 
419 /* ----------------------------------------------------------------
420  *		ExecTidScan(node)
421  *
422  *		Scans the relation using tids and returns
423  *		   the next qualifying tuple in the direction specified.
424  *		We call the ExecScan() routine and pass it the appropriate
425  *		access method functions.
426  *
427  *		Conditions:
428  *		  -- the "cursor" maintained by the AMI is positioned at the tuple
429  *			 returned previously.
430  *
431  *		Initial States:
432  *		  -- the relation indicated is opened for scanning so that the
433  *			 "cursor" is positioned before the first qualifying tuple.
434  *		  -- tidPtr is -1.
435  * ----------------------------------------------------------------
436  */
437 static TupleTableSlot *
ExecTidScan(PlanState * pstate)438 ExecTidScan(PlanState *pstate)
439 {
440 	TidScanState *node = castNode(TidScanState, pstate);
441 
442 	return ExecScan(&node->ss,
443 					(ExecScanAccessMtd) TidNext,
444 					(ExecScanRecheckMtd) TidRecheck);
445 }
446 
447 /* ----------------------------------------------------------------
448  *		ExecReScanTidScan(node)
449  * ----------------------------------------------------------------
450  */
451 void
ExecReScanTidScan(TidScanState * node)452 ExecReScanTidScan(TidScanState *node)
453 {
454 	if (node->tss_TidList)
455 		pfree(node->tss_TidList);
456 	node->tss_TidList = NULL;
457 	node->tss_NumTids = 0;
458 	node->tss_TidPtr = -1;
459 
460 	/* not really necessary, but seems good form */
461 	if (node->ss.ss_currentScanDesc)
462 		table_rescan(node->ss.ss_currentScanDesc, NULL);
463 
464 	ExecScanReScan(&node->ss);
465 }
466 
467 /* ----------------------------------------------------------------
468  *		ExecEndTidScan
469  *
470  *		Releases any storage allocated through C routines.
471  *		Returns nothing.
472  * ----------------------------------------------------------------
473  */
474 void
ExecEndTidScan(TidScanState * node)475 ExecEndTidScan(TidScanState *node)
476 {
477 	if (node->ss.ss_currentScanDesc)
478 		table_endscan(node->ss.ss_currentScanDesc);
479 
480 	/*
481 	 * Free the exprcontext
482 	 */
483 	ExecFreeExprContext(&node->ss.ps);
484 
485 	/*
486 	 * clear out tuple table slots
487 	 */
488 	if (node->ss.ps.ps_ResultTupleSlot)
489 		ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
490 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
491 }
492 
493 /* ----------------------------------------------------------------
494  *		ExecInitTidScan
495  *
496  *		Initializes the tid scan's state information, creates
497  *		scan keys, and opens the base and tid relations.
498  *
499  *		Parameters:
500  *		  node: TidNode node produced by the planner.
501  *		  estate: the execution state initialized in InitPlan.
502  * ----------------------------------------------------------------
503  */
504 TidScanState *
ExecInitTidScan(TidScan * node,EState * estate,int eflags)505 ExecInitTidScan(TidScan *node, EState *estate, int eflags)
506 {
507 	TidScanState *tidstate;
508 	Relation	currentRelation;
509 
510 	/*
511 	 * create state structure
512 	 */
513 	tidstate = makeNode(TidScanState);
514 	tidstate->ss.ps.plan = (Plan *) node;
515 	tidstate->ss.ps.state = estate;
516 	tidstate->ss.ps.ExecProcNode = ExecTidScan;
517 
518 	/*
519 	 * Miscellaneous initialization
520 	 *
521 	 * create expression context for node
522 	 */
523 	ExecAssignExprContext(estate, &tidstate->ss.ps);
524 
525 	/*
526 	 * mark tid list as not computed yet
527 	 */
528 	tidstate->tss_TidList = NULL;
529 	tidstate->tss_NumTids = 0;
530 	tidstate->tss_TidPtr = -1;
531 
532 	/*
533 	 * open the scan relation
534 	 */
535 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
536 
537 	tidstate->ss.ss_currentRelation = currentRelation;
538 	tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
539 
540 	/*
541 	 * get the scan type from the relation descriptor.
542 	 */
543 	ExecInitScanTupleSlot(estate, &tidstate->ss,
544 						  RelationGetDescr(currentRelation),
545 						  table_slot_callbacks(currentRelation));
546 
547 	/*
548 	 * Initialize result type and projection.
549 	 */
550 	ExecInitResultTypeTL(&tidstate->ss.ps);
551 	ExecAssignScanProjectionInfo(&tidstate->ss);
552 
553 	/*
554 	 * initialize child expressions
555 	 */
556 	tidstate->ss.ps.qual =
557 		ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
558 
559 	TidExprListCreate(tidstate);
560 
561 	/*
562 	 * all done.
563 	 */
564 	return tidstate;
565 }
566