1 /*------------------------------------------------------------------------- 2 * 3 * nodeSamplescan.c 4 * Support routines for sample scans of relations (table sampling). 5 * 6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * 10 * IDENTIFICATION 11 * src/backend/executor/nodeSamplescan.c 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #include "postgres.h" 16 17 #include "access/relscan.h" 18 #include "access/tableam.h" 19 #include "access/tsmapi.h" 20 #include "executor/executor.h" 21 #include "executor/nodeSamplescan.h" 22 #include "miscadmin.h" 23 #include "pgstat.h" 24 #include "storage/bufmgr.h" 25 #include "storage/predicate.h" 26 #include "utils/builtins.h" 27 #include "utils/rel.h" 28 29 static TupleTableSlot *SampleNext(SampleScanState *node); 30 static void tablesample_init(SampleScanState *scanstate); 31 static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate); 32 33 /* ---------------------------------------------------------------- 34 * Scan Support 35 * ---------------------------------------------------------------- 36 */ 37 38 /* ---------------------------------------------------------------- 39 * SampleNext 40 * 41 * This is a workhorse for ExecSampleScan 42 * ---------------------------------------------------------------- 43 */ 44 static TupleTableSlot * 45 SampleNext(SampleScanState *node) 46 { 47 /* 48 * if this is first call within a scan, initialize 49 */ 50 if (!node->begun) 51 tablesample_init(node); 52 53 /* 54 * get the next tuple, and store it in our result slot 55 */ 56 return tablesample_getnext(node); 57 } 58 59 /* 60 * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual 61 */ 62 static bool 63 SampleRecheck(SampleScanState *node, TupleTableSlot *slot) 64 { 65 /* 66 * No need to recheck for SampleScan, since like SeqScan we don't pass any 67 * checkable keys to heap_beginscan. 68 */ 69 return true; 70 } 71 72 /* ---------------------------------------------------------------- 73 * ExecSampleScan(node) 74 * 75 * Scans the relation using the sampling method and returns 76 * the next qualifying tuple. 77 * We call the ExecScan() routine and pass it the appropriate 78 * access method functions. 79 * ---------------------------------------------------------------- 80 */ 81 static TupleTableSlot * 82 ExecSampleScan(PlanState *pstate) 83 { 84 SampleScanState *node = castNode(SampleScanState, pstate); 85 86 return ExecScan(&node->ss, 87 (ExecScanAccessMtd) SampleNext, 88 (ExecScanRecheckMtd) SampleRecheck); 89 } 90 91 /* ---------------------------------------------------------------- 92 * ExecInitSampleScan 93 * ---------------------------------------------------------------- 94 */ 95 SampleScanState * 96 ExecInitSampleScan(SampleScan *node, EState *estate, int eflags) 97 { 98 SampleScanState *scanstate; 99 TableSampleClause *tsc = node->tablesample; 100 TsmRoutine *tsm; 101 102 Assert(outerPlan(node) == NULL); 103 Assert(innerPlan(node) == NULL); 104 105 /* 106 * create state structure 107 */ 108 scanstate = makeNode(SampleScanState); 109 scanstate->ss.ps.plan = (Plan *) node; 110 scanstate->ss.ps.state = estate; 111 scanstate->ss.ps.ExecProcNode = ExecSampleScan; 112 113 /* 114 * Miscellaneous initialization 115 * 116 * create expression context for node 117 */ 118 ExecAssignExprContext(estate, &scanstate->ss.ps); 119 120 /* 121 * open the scan relation 122 */ 123 scanstate->ss.ss_currentRelation = 124 ExecOpenScanRelation(estate, 125 node->scan.scanrelid, 126 eflags); 127 128 /* we won't set up the HeapScanDesc till later */ 129 scanstate->ss.ss_currentScanDesc = NULL; 130 131 /* and create slot with appropriate rowtype */ 132 ExecInitScanTupleSlot(estate, &scanstate->ss, 133 RelationGetDescr(scanstate->ss.ss_currentRelation), 134 table_slot_callbacks(scanstate->ss.ss_currentRelation)); 135 136 /* 137 * Initialize result type and projection. 138 */ 139 ExecInitResultTypeTL(&scanstate->ss.ps); 140 ExecAssignScanProjectionInfo(&scanstate->ss); 141 142 /* 143 * initialize child expressions 144 */ 145 scanstate->ss.ps.qual = 146 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); 147 148 scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate); 149 scanstate->repeatable = 150 ExecInitExpr(tsc->repeatable, (PlanState *) scanstate); 151 152 /* 153 * If we don't have a REPEATABLE clause, select a random seed. We want to 154 * do this just once, since the seed shouldn't change over rescans. 155 */ 156 if (tsc->repeatable == NULL) 157 scanstate->seed = random(); 158 159 /* 160 * Finally, initialize the TABLESAMPLE method handler. 161 */ 162 tsm = GetTsmRoutine(tsc->tsmhandler); 163 scanstate->tsmroutine = tsm; 164 scanstate->tsm_state = NULL; 165 166 if (tsm->InitSampleScan) 167 tsm->InitSampleScan(scanstate, eflags); 168 169 /* We'll do BeginSampleScan later; we can't evaluate params yet */ 170 scanstate->begun = false; 171 172 return scanstate; 173 } 174 175 /* ---------------------------------------------------------------- 176 * ExecEndSampleScan 177 * 178 * frees any storage allocated through C routines. 179 * ---------------------------------------------------------------- 180 */ 181 void 182 ExecEndSampleScan(SampleScanState *node) 183 { 184 /* 185 * Tell sampling function that we finished the scan. 186 */ 187 if (node->tsmroutine->EndSampleScan) 188 node->tsmroutine->EndSampleScan(node); 189 190 /* 191 * Free the exprcontext 192 */ 193 ExecFreeExprContext(&node->ss.ps); 194 195 /* 196 * clean out the tuple table 197 */ 198 if (node->ss.ps.ps_ResultTupleSlot) 199 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); 200 ExecClearTuple(node->ss.ss_ScanTupleSlot); 201 202 /* 203 * close heap scan 204 */ 205 if (node->ss.ss_currentScanDesc) 206 table_endscan(node->ss.ss_currentScanDesc); 207 } 208 209 /* ---------------------------------------------------------------- 210 * ExecReScanSampleScan 211 * 212 * Rescans the relation. 213 * 214 * ---------------------------------------------------------------- 215 */ 216 void 217 ExecReScanSampleScan(SampleScanState *node) 218 { 219 /* Remember we need to do BeginSampleScan again (if we did it at all) */ 220 node->begun = false; 221 node->done = false; 222 node->haveblock = false; 223 node->donetuples = 0; 224 225 ExecScanReScan(&node->ss); 226 } 227 228 229 /* 230 * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan. 231 */ 232 static void 233 tablesample_init(SampleScanState *scanstate) 234 { 235 TsmRoutine *tsm = scanstate->tsmroutine; 236 ExprContext *econtext = scanstate->ss.ps.ps_ExprContext; 237 Datum *params; 238 Datum datum; 239 bool isnull; 240 uint32 seed; 241 bool allow_sync; 242 int i; 243 ListCell *arg; 244 245 scanstate->donetuples = 0; 246 params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum)); 247 248 i = 0; 249 foreach(arg, scanstate->args) 250 { 251 ExprState *argstate = (ExprState *) lfirst(arg); 252 253 params[i] = ExecEvalExprSwitchContext(argstate, 254 econtext, 255 &isnull); 256 if (isnull) 257 ereport(ERROR, 258 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT), 259 errmsg("TABLESAMPLE parameter cannot be null"))); 260 i++; 261 } 262 263 if (scanstate->repeatable) 264 { 265 datum = ExecEvalExprSwitchContext(scanstate->repeatable, 266 econtext, 267 &isnull); 268 if (isnull) 269 ereport(ERROR, 270 (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT), 271 errmsg("TABLESAMPLE REPEATABLE parameter cannot be null"))); 272 273 /* 274 * The REPEATABLE parameter has been coerced to float8 by the parser. 275 * The reason for using float8 at the SQL level is that it will 276 * produce unsurprising results both for users used to databases that 277 * accept only integers in the REPEATABLE clause and for those who 278 * might expect that REPEATABLE works like setseed() (a float in the 279 * range from -1 to 1). 280 * 281 * We use hashfloat8() to convert the supplied value into a suitable 282 * seed. For regression-testing purposes, that has the convenient 283 * property that REPEATABLE(0) gives a machine-independent result. 284 */ 285 seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum)); 286 } 287 else 288 { 289 /* Use the seed selected by ExecInitSampleScan */ 290 seed = scanstate->seed; 291 } 292 293 /* Set default values for params that BeginSampleScan can adjust */ 294 scanstate->use_bulkread = true; 295 scanstate->use_pagemode = true; 296 297 /* Let tablesample method do its thing */ 298 tsm->BeginSampleScan(scanstate, 299 params, 300 list_length(scanstate->args), 301 seed); 302 303 /* We'll use syncscan if there's no NextSampleBlock function */ 304 allow_sync = (tsm->NextSampleBlock == NULL); 305 306 /* Now we can create or reset the HeapScanDesc */ 307 if (scanstate->ss.ss_currentScanDesc == NULL) 308 { 309 scanstate->ss.ss_currentScanDesc = 310 table_beginscan_sampling(scanstate->ss.ss_currentRelation, 311 scanstate->ss.ps.state->es_snapshot, 312 0, NULL, 313 scanstate->use_bulkread, 314 allow_sync, 315 scanstate->use_pagemode); 316 } 317 else 318 { 319 table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL, 320 scanstate->use_bulkread, 321 allow_sync, 322 scanstate->use_pagemode); 323 } 324 325 pfree(params); 326 327 /* And we're initialized. */ 328 scanstate->begun = true; 329 } 330 331 /* 332 * Get next tuple from TABLESAMPLE method. 333 */ 334 static TupleTableSlot * 335 tablesample_getnext(SampleScanState *scanstate) 336 { 337 TableScanDesc scan = scanstate->ss.ss_currentScanDesc; 338 TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot; 339 340 ExecClearTuple(slot); 341 342 if (scanstate->done) 343 return NULL; 344 345 for (;;) 346 { 347 if (!scanstate->haveblock) 348 { 349 if (!table_scan_sample_next_block(scan, scanstate)) 350 { 351 scanstate->haveblock = false; 352 scanstate->done = true; 353 354 /* exhausted relation */ 355 return NULL; 356 } 357 358 scanstate->haveblock = true; 359 } 360 361 if (!table_scan_sample_next_tuple(scan, scanstate, slot)) 362 { 363 /* 364 * If we get here, it means we've exhausted the items on this page 365 * and it's time to move to the next. 366 */ 367 scanstate->haveblock = false; 368 continue; 369 } 370 371 /* Found visible tuple, return it. */ 372 break; 373 } 374 375 scanstate->donetuples++; 376 377 return slot; 378 } 379