1 /*-------------------------------------------------------------------------
2  *
3  * nodeTidscan.c
4  *	  Routines to support direct tid scans of relations
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeTidscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *
18  *		ExecTidScan			scans a relation using tids
19  *		ExecInitTidScan		creates and initializes state info.
20  *		ExecReScanTidScan	rescans the tid relation.
21  *		ExecEndTidScan		releases all storage.
22  */
23 #include "postgres.h"
24 
25 #include "access/sysattr.h"
26 #include "catalog/pg_type.h"
27 #include "executor/execdebug.h"
28 #include "executor/nodeTidscan.h"
29 #include "miscadmin.h"
30 #include "optimizer/clauses.h"
31 #include "storage/bufmgr.h"
32 #include "utils/array.h"
33 #include "utils/rel.h"
34 
35 
36 #define IsCTIDVar(node)  \
37 	((node) != NULL && \
38 	 IsA((node), Var) && \
39 	 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
40 	 ((Var *) (node))->varlevelsup == 0)
41 
42 /* one element in tss_tidexprs */
43 typedef struct TidExpr
44 {
45 	ExprState  *exprstate;		/* ExprState for a TID-yielding subexpr */
46 	bool		isarray;		/* if true, it yields tid[] not just tid */
47 	CurrentOfExpr *cexpr;		/* alternatively, we can have CURRENT OF */
48 } TidExpr;
49 
50 static void TidExprListCreate(TidScanState *tidstate);
51 static void TidListEval(TidScanState *tidstate);
52 static int	itemptr_comparator(const void *a, const void *b);
53 static TupleTableSlot *TidNext(TidScanState *node);
54 
55 
56 /*
57  * Extract the qual subexpressions that yield TIDs to search for,
58  * and compile them into ExprStates if they're ordinary expressions.
59  *
60  * CURRENT OF is a special case that we can't compile usefully;
61  * just drop it into the TidExpr list as-is.
62  */
63 static void
TidExprListCreate(TidScanState * tidstate)64 TidExprListCreate(TidScanState *tidstate)
65 {
66 	TidScan    *node = (TidScan *) tidstate->ss.ps.plan;
67 	ListCell   *l;
68 
69 	tidstate->tss_tidexprs = NIL;
70 	tidstate->tss_isCurrentOf = false;
71 
72 	foreach(l, node->tidquals)
73 	{
74 		Expr	   *expr = (Expr *) lfirst(l);
75 		TidExpr    *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
76 
77 		if (is_opclause(expr))
78 		{
79 			Node	   *arg1;
80 			Node	   *arg2;
81 
82 			arg1 = get_leftop(expr);
83 			arg2 = get_rightop(expr);
84 			if (IsCTIDVar(arg1))
85 				tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
86 												  &tidstate->ss.ps);
87 			else if (IsCTIDVar(arg2))
88 				tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
89 												  &tidstate->ss.ps);
90 			else
91 				elog(ERROR, "could not identify CTID variable");
92 			tidexpr->isarray = false;
93 		}
94 		else if (expr && IsA(expr, ScalarArrayOpExpr))
95 		{
96 			ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
97 
98 			Assert(IsCTIDVar(linitial(saex->args)));
99 			tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
100 											  &tidstate->ss.ps);
101 			tidexpr->isarray = true;
102 		}
103 		else if (expr && IsA(expr, CurrentOfExpr))
104 		{
105 			CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
106 
107 			tidexpr->cexpr = cexpr;
108 			tidstate->tss_isCurrentOf = true;
109 		}
110 		else
111 			elog(ERROR, "could not identify CTID expression");
112 
113 		tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
114 	}
115 
116 	/* CurrentOfExpr could never appear OR'd with something else */
117 	Assert(list_length(tidstate->tss_tidexprs) == 1 ||
118 		   !tidstate->tss_isCurrentOf);
119 }
120 
121 /*
122  * Compute the list of TIDs to be visited, by evaluating the expressions
123  * for them.
124  *
125  * (The result is actually an array, not a list.)
126  */
127 static void
TidListEval(TidScanState * tidstate)128 TidListEval(TidScanState *tidstate)
129 {
130 	ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
131 	BlockNumber nblocks;
132 	ItemPointerData *tidList;
133 	int			numAllocTids;
134 	int			numTids;
135 	ListCell   *l;
136 
137 	/*
138 	 * We silently discard any TIDs that are out of range at the time of scan
139 	 * start.  (Since we hold at least AccessShareLock on the table, it won't
140 	 * be possible for someone to truncate away the blocks we intend to
141 	 * visit.)
142 	 */
143 	nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation);
144 
145 	/*
146 	 * We initialize the array with enough slots for the case that all quals
147 	 * are simple OpExprs or CurrentOfExprs.  If there are any
148 	 * ScalarArrayOpExprs, we may have to enlarge the array.
149 	 */
150 	numAllocTids = list_length(tidstate->tss_tidexprs);
151 	tidList = (ItemPointerData *)
152 		palloc(numAllocTids * sizeof(ItemPointerData));
153 	numTids = 0;
154 
155 	foreach(l, tidstate->tss_tidexprs)
156 	{
157 		TidExpr    *tidexpr = (TidExpr *) lfirst(l);
158 		ItemPointer itemptr;
159 		bool		isNull;
160 
161 		if (tidexpr->exprstate && !tidexpr->isarray)
162 		{
163 			itemptr = (ItemPointer)
164 				DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
165 														  econtext,
166 														  &isNull));
167 			if (!isNull &&
168 				ItemPointerIsValid(itemptr) &&
169 				ItemPointerGetBlockNumber(itemptr) < nblocks)
170 			{
171 				if (numTids >= numAllocTids)
172 				{
173 					numAllocTids *= 2;
174 					tidList = (ItemPointerData *)
175 						repalloc(tidList,
176 								 numAllocTids * sizeof(ItemPointerData));
177 				}
178 				tidList[numTids++] = *itemptr;
179 			}
180 		}
181 		else if (tidexpr->exprstate && tidexpr->isarray)
182 		{
183 			Datum		arraydatum;
184 			ArrayType  *itemarray;
185 			Datum	   *ipdatums;
186 			bool	   *ipnulls;
187 			int			ndatums;
188 			int			i;
189 
190 			arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
191 												   econtext,
192 												   &isNull);
193 			if (isNull)
194 				continue;
195 			itemarray = DatumGetArrayTypeP(arraydatum);
196 			deconstruct_array(itemarray,
197 							  TIDOID, sizeof(ItemPointerData), false, 's',
198 							  &ipdatums, &ipnulls, &ndatums);
199 			if (numTids + ndatums > numAllocTids)
200 			{
201 				numAllocTids = numTids + ndatums;
202 				tidList = (ItemPointerData *)
203 					repalloc(tidList,
204 							 numAllocTids * sizeof(ItemPointerData));
205 			}
206 			for (i = 0; i < ndatums; i++)
207 			{
208 				if (!ipnulls[i])
209 				{
210 					itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
211 					if (ItemPointerIsValid(itemptr) &&
212 						ItemPointerGetBlockNumber(itemptr) < nblocks)
213 						tidList[numTids++] = *itemptr;
214 				}
215 			}
216 			pfree(ipdatums);
217 			pfree(ipnulls);
218 		}
219 		else
220 		{
221 			ItemPointerData cursor_tid;
222 
223 			Assert(tidexpr->cexpr);
224 			if (execCurrentOf(tidexpr->cexpr, econtext,
225 							  RelationGetRelid(tidstate->ss.ss_currentRelation),
226 							  &cursor_tid))
227 			{
228 				if (numTids >= numAllocTids)
229 				{
230 					numAllocTids *= 2;
231 					tidList = (ItemPointerData *)
232 						repalloc(tidList,
233 								 numAllocTids * sizeof(ItemPointerData));
234 				}
235 				tidList[numTids++] = cursor_tid;
236 			}
237 		}
238 	}
239 
240 	/*
241 	 * Sort the array of TIDs into order, and eliminate duplicates.
242 	 * Eliminating duplicates is necessary since we want OR semantics across
243 	 * the list.  Sorting makes it easier to detect duplicates, and as a bonus
244 	 * ensures that we will visit the heap in the most efficient way.
245 	 */
246 	if (numTids > 1)
247 	{
248 		int			lastTid;
249 		int			i;
250 
251 		/* CurrentOfExpr could never appear OR'd with something else */
252 		Assert(!tidstate->tss_isCurrentOf);
253 
254 		qsort((void *) tidList, numTids, sizeof(ItemPointerData),
255 			  itemptr_comparator);
256 		lastTid = 0;
257 		for (i = 1; i < numTids; i++)
258 		{
259 			if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
260 				tidList[++lastTid] = tidList[i];
261 		}
262 		numTids = lastTid + 1;
263 	}
264 
265 	tidstate->tss_TidList = tidList;
266 	tidstate->tss_NumTids = numTids;
267 	tidstate->tss_TidPtr = -1;
268 }
269 
270 /*
271  * qsort comparator for ItemPointerData items
272  */
273 static int
itemptr_comparator(const void * a,const void * b)274 itemptr_comparator(const void *a, const void *b)
275 {
276 	const ItemPointerData *ipa = (const ItemPointerData *) a;
277 	const ItemPointerData *ipb = (const ItemPointerData *) b;
278 	BlockNumber ba = ItemPointerGetBlockNumber(ipa);
279 	BlockNumber bb = ItemPointerGetBlockNumber(ipb);
280 	OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
281 	OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
282 
283 	if (ba < bb)
284 		return -1;
285 	if (ba > bb)
286 		return 1;
287 	if (oa < ob)
288 		return -1;
289 	if (oa > ob)
290 		return 1;
291 	return 0;
292 }
293 
294 /* ----------------------------------------------------------------
295  *		TidNext
296  *
297  *		Retrieve a tuple from the TidScan node's currentRelation
298  *		using the tids in the TidScanState information.
299  *
300  * ----------------------------------------------------------------
301  */
302 static TupleTableSlot *
TidNext(TidScanState * node)303 TidNext(TidScanState *node)
304 {
305 	EState	   *estate;
306 	ScanDirection direction;
307 	Snapshot	snapshot;
308 	Relation	heapRelation;
309 	HeapTuple	tuple;
310 	TupleTableSlot *slot;
311 	Buffer		buffer = InvalidBuffer;
312 	ItemPointerData *tidList;
313 	int			numTids;
314 	bool		bBackward;
315 
316 	/*
317 	 * extract necessary information from tid scan node
318 	 */
319 	estate = node->ss.ps.state;
320 	direction = estate->es_direction;
321 	snapshot = estate->es_snapshot;
322 	heapRelation = node->ss.ss_currentRelation;
323 	slot = node->ss.ss_ScanTupleSlot;
324 
325 	/*
326 	 * First time through, compute the list of TIDs to be visited
327 	 */
328 	if (node->tss_TidList == NULL)
329 		TidListEval(node);
330 
331 	tidList = node->tss_TidList;
332 	numTids = node->tss_NumTids;
333 
334 	/*
335 	 * We use node->tss_htup as the tuple pointer; note this can't just be a
336 	 * local variable here, as the scan tuple slot will keep a pointer to it.
337 	 */
338 	tuple = &(node->tss_htup);
339 
340 	/*
341 	 * Initialize or advance scan position, depending on direction.
342 	 */
343 	bBackward = ScanDirectionIsBackward(direction);
344 	if (bBackward)
345 	{
346 		if (node->tss_TidPtr < 0)
347 		{
348 			/* initialize for backward scan */
349 			node->tss_TidPtr = numTids - 1;
350 		}
351 		else
352 			node->tss_TidPtr--;
353 	}
354 	else
355 	{
356 		if (node->tss_TidPtr < 0)
357 		{
358 			/* initialize for forward scan */
359 			node->tss_TidPtr = 0;
360 		}
361 		else
362 			node->tss_TidPtr++;
363 	}
364 
365 	while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
366 	{
367 		tuple->t_self = tidList[node->tss_TidPtr];
368 
369 		/*
370 		 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
371 		 * since have been updated; if so, we should fetch the version that is
372 		 * current according to our snapshot.
373 		 */
374 		if (node->tss_isCurrentOf)
375 			heap_get_latest_tid(heapRelation, snapshot, &tuple->t_self);
376 
377 		if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
378 		{
379 			/*
380 			 * store the scanned tuple in the scan tuple slot of the scan
381 			 * state.  Eventually we will only do this and not return a tuple.
382 			 * Note: we pass 'false' because tuples returned by amgetnext are
383 			 * pointers onto disk pages and were not created with palloc() and
384 			 * so should not be pfree()'d.
385 			 */
386 			ExecStoreTuple(tuple,	/* tuple to store */
387 						   slot,	/* slot to store in */
388 						   buffer,	/* buffer associated with tuple  */
389 						   false);	/* don't pfree */
390 
391 			/*
392 			 * At this point we have an extra pin on the buffer, because
393 			 * ExecStoreTuple incremented the pin count. Drop our local pin.
394 			 */
395 			ReleaseBuffer(buffer);
396 
397 			return slot;
398 		}
399 		/* Bad TID or failed snapshot qual; try next */
400 		if (bBackward)
401 			node->tss_TidPtr--;
402 		else
403 			node->tss_TidPtr++;
404 
405 		CHECK_FOR_INTERRUPTS();
406 	}
407 
408 	/*
409 	 * if we get here it means the tid scan failed so we are at the end of the
410 	 * scan..
411 	 */
412 	return ExecClearTuple(slot);
413 }
414 
415 /*
416  * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
417  */
418 static bool
TidRecheck(TidScanState * node,TupleTableSlot * slot)419 TidRecheck(TidScanState *node, TupleTableSlot *slot)
420 {
421 	/*
422 	 * XXX shouldn't we check here to make sure tuple matches TID list? In
423 	 * runtime-key case this is not certain, is it?  However, in the WHERE
424 	 * CURRENT OF case it might not match anyway ...
425 	 */
426 	return true;
427 }
428 
429 
430 /* ----------------------------------------------------------------
431  *		ExecTidScan(node)
432  *
433  *		Scans the relation using tids and returns
434  *		   the next qualifying tuple in the direction specified.
435  *		We call the ExecScan() routine and pass it the appropriate
436  *		access method functions.
437  *
438  *		Conditions:
439  *		  -- the "cursor" maintained by the AMI is positioned at the tuple
440  *			 returned previously.
441  *
442  *		Initial States:
443  *		  -- the relation indicated is opened for scanning so that the
444  *			 "cursor" is positioned before the first qualifying tuple.
445  *		  -- tidPtr is -1.
446  * ----------------------------------------------------------------
447  */
448 static TupleTableSlot *
ExecTidScan(PlanState * pstate)449 ExecTidScan(PlanState *pstate)
450 {
451 	TidScanState *node = castNode(TidScanState, pstate);
452 
453 	return ExecScan(&node->ss,
454 					(ExecScanAccessMtd) TidNext,
455 					(ExecScanRecheckMtd) TidRecheck);
456 }
457 
458 /* ----------------------------------------------------------------
459  *		ExecReScanTidScan(node)
460  * ----------------------------------------------------------------
461  */
462 void
ExecReScanTidScan(TidScanState * node)463 ExecReScanTidScan(TidScanState *node)
464 {
465 	if (node->tss_TidList)
466 		pfree(node->tss_TidList);
467 	node->tss_TidList = NULL;
468 	node->tss_NumTids = 0;
469 	node->tss_TidPtr = -1;
470 
471 	ExecScanReScan(&node->ss);
472 }
473 
474 /* ----------------------------------------------------------------
475  *		ExecEndTidScan
476  *
477  *		Releases any storage allocated through C routines.
478  *		Returns nothing.
479  * ----------------------------------------------------------------
480  */
481 void
ExecEndTidScan(TidScanState * node)482 ExecEndTidScan(TidScanState *node)
483 {
484 	/*
485 	 * Free the exprcontext
486 	 */
487 	ExecFreeExprContext(&node->ss.ps);
488 
489 	/*
490 	 * clear out tuple table slots
491 	 */
492 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
493 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
494 
495 	/*
496 	 * close the heap relation.
497 	 */
498 	ExecCloseScanRelation(node->ss.ss_currentRelation);
499 }
500 
501 /* ----------------------------------------------------------------
502  *		ExecInitTidScan
503  *
504  *		Initializes the tid scan's state information, creates
505  *		scan keys, and opens the base and tid relations.
506  *
507  *		Parameters:
508  *		  node: TidNode node produced by the planner.
509  *		  estate: the execution state initialized in InitPlan.
510  * ----------------------------------------------------------------
511  */
512 TidScanState *
ExecInitTidScan(TidScan * node,EState * estate,int eflags)513 ExecInitTidScan(TidScan *node, EState *estate, int eflags)
514 {
515 	TidScanState *tidstate;
516 	Relation	currentRelation;
517 
518 	/*
519 	 * create state structure
520 	 */
521 	tidstate = makeNode(TidScanState);
522 	tidstate->ss.ps.plan = (Plan *) node;
523 	tidstate->ss.ps.state = estate;
524 	tidstate->ss.ps.ExecProcNode = ExecTidScan;
525 
526 	/*
527 	 * Miscellaneous initialization
528 	 *
529 	 * create expression context for node
530 	 */
531 	ExecAssignExprContext(estate, &tidstate->ss.ps);
532 
533 	/*
534 	 * initialize child expressions
535 	 */
536 	tidstate->ss.ps.qual =
537 		ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
538 
539 	TidExprListCreate(tidstate);
540 
541 	/*
542 	 * tuple table initialization
543 	 */
544 	ExecInitResultTupleSlot(estate, &tidstate->ss.ps);
545 	ExecInitScanTupleSlot(estate, &tidstate->ss);
546 
547 	/*
548 	 * mark tid list as not computed yet
549 	 */
550 	tidstate->tss_TidList = NULL;
551 	tidstate->tss_NumTids = 0;
552 	tidstate->tss_TidPtr = -1;
553 
554 	/*
555 	 * open the base relation and acquire appropriate lock on it.
556 	 */
557 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
558 
559 	tidstate->ss.ss_currentRelation = currentRelation;
560 	tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
561 
562 	/*
563 	 * get the scan type from the relation descriptor.
564 	 */
565 	ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation));
566 
567 	/*
568 	 * Initialize result tuple type and projection info.
569 	 */
570 	ExecAssignResultTypeFromTL(&tidstate->ss.ps);
571 	ExecAssignScanProjectionInfo(&tidstate->ss);
572 
573 	/*
574 	 * all done.
575 	 */
576 	return tidstate;
577 }
578