1 /*-------------------------------------------------------------------------
2  *
3  * nodeSamplescan.c
4  *	  Support routines for sample scans of relations (table sampling).
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeSamplescan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/relscan.h"
18 #include "access/tableam.h"
19 #include "access/tsmapi.h"
20 #include "executor/executor.h"
21 #include "executor/nodeSamplescan.h"
22 #include "miscadmin.h"
23 #include "pgstat.h"
24 #include "storage/bufmgr.h"
25 #include "storage/predicate.h"
26 #include "utils/builtins.h"
27 #include "utils/rel.h"
28 
29 static TupleTableSlot *SampleNext(SampleScanState *node);
30 static void tablesample_init(SampleScanState *scanstate);
31 static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
32 
33 /* ----------------------------------------------------------------
34  *						Scan Support
35  * ----------------------------------------------------------------
36  */
37 
38 /* ----------------------------------------------------------------
39  *		SampleNext
40  *
41  *		This is a workhorse for ExecSampleScan
42  * ----------------------------------------------------------------
43  */
44 static TupleTableSlot *
45 SampleNext(SampleScanState *node)
46 {
47 	/*
48 	 * if this is first call within a scan, initialize
49 	 */
50 	if (!node->begun)
51 		tablesample_init(node);
52 
53 	/*
54 	 * get the next tuple, and store it in our result slot
55 	 */
56 	return tablesample_getnext(node);
57 }
58 
59 /*
60  * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
61  */
62 static bool
63 SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
64 {
65 	/*
66 	 * No need to recheck for SampleScan, since like SeqScan we don't pass any
67 	 * checkable keys to heap_beginscan.
68 	 */
69 	return true;
70 }
71 
72 /* ----------------------------------------------------------------
73  *		ExecSampleScan(node)
74  *
75  *		Scans the relation using the sampling method and returns
76  *		the next qualifying tuple.
77  *		We call the ExecScan() routine and pass it the appropriate
78  *		access method functions.
79  * ----------------------------------------------------------------
80  */
81 static TupleTableSlot *
82 ExecSampleScan(PlanState *pstate)
83 {
84 	SampleScanState *node = castNode(SampleScanState, pstate);
85 
86 	return ExecScan(&node->ss,
87 					(ExecScanAccessMtd) SampleNext,
88 					(ExecScanRecheckMtd) SampleRecheck);
89 }
90 
91 /* ----------------------------------------------------------------
92  *		ExecInitSampleScan
93  * ----------------------------------------------------------------
94  */
95 SampleScanState *
96 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
97 {
98 	SampleScanState *scanstate;
99 	TableSampleClause *tsc = node->tablesample;
100 	TsmRoutine *tsm;
101 
102 	Assert(outerPlan(node) == NULL);
103 	Assert(innerPlan(node) == NULL);
104 
105 	/*
106 	 * create state structure
107 	 */
108 	scanstate = makeNode(SampleScanState);
109 	scanstate->ss.ps.plan = (Plan *) node;
110 	scanstate->ss.ps.state = estate;
111 	scanstate->ss.ps.ExecProcNode = ExecSampleScan;
112 
113 	/*
114 	 * Miscellaneous initialization
115 	 *
116 	 * create expression context for node
117 	 */
118 	ExecAssignExprContext(estate, &scanstate->ss.ps);
119 
120 	/*
121 	 * open the scan relation
122 	 */
123 	scanstate->ss.ss_currentRelation =
124 		ExecOpenScanRelation(estate,
125 							 node->scan.scanrelid,
126 							 eflags);
127 
128 	/* we won't set up the HeapScanDesc till later */
129 	scanstate->ss.ss_currentScanDesc = NULL;
130 
131 	/* and create slot with appropriate rowtype */
132 	ExecInitScanTupleSlot(estate, &scanstate->ss,
133 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
134 						  table_slot_callbacks(scanstate->ss.ss_currentRelation));
135 
136 	/*
137 	 * Initialize result type and projection.
138 	 */
139 	ExecInitResultTypeTL(&scanstate->ss.ps);
140 	ExecAssignScanProjectionInfo(&scanstate->ss);
141 
142 	/*
143 	 * initialize child expressions
144 	 */
145 	scanstate->ss.ps.qual =
146 		ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
147 
148 	scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
149 	scanstate->repeatable =
150 		ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
151 
152 	/*
153 	 * If we don't have a REPEATABLE clause, select a random seed.  We want to
154 	 * do this just once, since the seed shouldn't change over rescans.
155 	 */
156 	if (tsc->repeatable == NULL)
157 		scanstate->seed = random();
158 
159 	/*
160 	 * Finally, initialize the TABLESAMPLE method handler.
161 	 */
162 	tsm = GetTsmRoutine(tsc->tsmhandler);
163 	scanstate->tsmroutine = tsm;
164 	scanstate->tsm_state = NULL;
165 
166 	if (tsm->InitSampleScan)
167 		tsm->InitSampleScan(scanstate, eflags);
168 
169 	/* We'll do BeginSampleScan later; we can't evaluate params yet */
170 	scanstate->begun = false;
171 
172 	return scanstate;
173 }
174 
175 /* ----------------------------------------------------------------
176  *		ExecEndSampleScan
177  *
178  *		frees any storage allocated through C routines.
179  * ----------------------------------------------------------------
180  */
181 void
182 ExecEndSampleScan(SampleScanState *node)
183 {
184 	/*
185 	 * Tell sampling function that we finished the scan.
186 	 */
187 	if (node->tsmroutine->EndSampleScan)
188 		node->tsmroutine->EndSampleScan(node);
189 
190 	/*
191 	 * Free the exprcontext
192 	 */
193 	ExecFreeExprContext(&node->ss.ps);
194 
195 	/*
196 	 * clean out the tuple table
197 	 */
198 	if (node->ss.ps.ps_ResultTupleSlot)
199 		ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
200 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
201 
202 	/*
203 	 * close heap scan
204 	 */
205 	if (node->ss.ss_currentScanDesc)
206 		table_endscan(node->ss.ss_currentScanDesc);
207 }
208 
209 /* ----------------------------------------------------------------
210  *		ExecReScanSampleScan
211  *
212  *		Rescans the relation.
213  *
214  * ----------------------------------------------------------------
215  */
216 void
217 ExecReScanSampleScan(SampleScanState *node)
218 {
219 	/* Remember we need to do BeginSampleScan again (if we did it at all) */
220 	node->begun = false;
221 	node->done = false;
222 	node->haveblock = false;
223 	node->donetuples = 0;
224 
225 	ExecScanReScan(&node->ss);
226 }
227 
228 
229 /*
230  * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
231  */
232 static void
233 tablesample_init(SampleScanState *scanstate)
234 {
235 	TsmRoutine *tsm = scanstate->tsmroutine;
236 	ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
237 	Datum	   *params;
238 	Datum		datum;
239 	bool		isnull;
240 	uint32		seed;
241 	bool		allow_sync;
242 	int			i;
243 	ListCell   *arg;
244 
245 	scanstate->donetuples = 0;
246 	params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
247 
248 	i = 0;
249 	foreach(arg, scanstate->args)
250 	{
251 		ExprState  *argstate = (ExprState *) lfirst(arg);
252 
253 		params[i] = ExecEvalExprSwitchContext(argstate,
254 											  econtext,
255 											  &isnull);
256 		if (isnull)
257 			ereport(ERROR,
258 					(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
259 					 errmsg("TABLESAMPLE parameter cannot be null")));
260 		i++;
261 	}
262 
263 	if (scanstate->repeatable)
264 	{
265 		datum = ExecEvalExprSwitchContext(scanstate->repeatable,
266 										  econtext,
267 										  &isnull);
268 		if (isnull)
269 			ereport(ERROR,
270 					(errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
271 					 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
272 
273 		/*
274 		 * The REPEATABLE parameter has been coerced to float8 by the parser.
275 		 * The reason for using float8 at the SQL level is that it will
276 		 * produce unsurprising results both for users used to databases that
277 		 * accept only integers in the REPEATABLE clause and for those who
278 		 * might expect that REPEATABLE works like setseed() (a float in the
279 		 * range from -1 to 1).
280 		 *
281 		 * We use hashfloat8() to convert the supplied value into a suitable
282 		 * seed.  For regression-testing purposes, that has the convenient
283 		 * property that REPEATABLE(0) gives a machine-independent result.
284 		 */
285 		seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
286 	}
287 	else
288 	{
289 		/* Use the seed selected by ExecInitSampleScan */
290 		seed = scanstate->seed;
291 	}
292 
293 	/* Set default values for params that BeginSampleScan can adjust */
294 	scanstate->use_bulkread = true;
295 	scanstate->use_pagemode = true;
296 
297 	/* Let tablesample method do its thing */
298 	tsm->BeginSampleScan(scanstate,
299 						 params,
300 						 list_length(scanstate->args),
301 						 seed);
302 
303 	/* We'll use syncscan if there's no NextSampleBlock function */
304 	allow_sync = (tsm->NextSampleBlock == NULL);
305 
306 	/* Now we can create or reset the HeapScanDesc */
307 	if (scanstate->ss.ss_currentScanDesc == NULL)
308 	{
309 		scanstate->ss.ss_currentScanDesc =
310 			table_beginscan_sampling(scanstate->ss.ss_currentRelation,
311 									 scanstate->ss.ps.state->es_snapshot,
312 									 0, NULL,
313 									 scanstate->use_bulkread,
314 									 allow_sync,
315 									 scanstate->use_pagemode);
316 	}
317 	else
318 	{
319 		table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
320 								scanstate->use_bulkread,
321 								allow_sync,
322 								scanstate->use_pagemode);
323 	}
324 
325 	pfree(params);
326 
327 	/* And we're initialized. */
328 	scanstate->begun = true;
329 }
330 
331 /*
332  * Get next tuple from TABLESAMPLE method.
333  */
334 static TupleTableSlot *
335 tablesample_getnext(SampleScanState *scanstate)
336 {
337 	TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
338 	TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
339 
340 	ExecClearTuple(slot);
341 
342 	if (scanstate->done)
343 		return NULL;
344 
345 	for (;;)
346 	{
347 		if (!scanstate->haveblock)
348 		{
349 			if (!table_scan_sample_next_block(scan, scanstate))
350 			{
351 				scanstate->haveblock = false;
352 				scanstate->done = true;
353 
354 				/* exhausted relation */
355 				return NULL;
356 			}
357 
358 			scanstate->haveblock = true;
359 		}
360 
361 		if (!table_scan_sample_next_tuple(scan, scanstate, slot))
362 		{
363 			/*
364 			 * If we get here, it means we've exhausted the items on this page
365 			 * and it's time to move to the next.
366 			 */
367 			scanstate->haveblock = false;
368 			continue;
369 		}
370 
371 		/* Found visible tuple, return it. */
372 		break;
373 	}
374 
375 	scanstate->donetuples++;
376 
377 	return slot;
378 }
379