1 /*-------------------------------------------------------------------------
2 *
3 * execnodes.h
4 * definitions for executor state nodes
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/nodes/execnodes.h
11 *
12 *-------------------------------------------------------------------------
13 */
14 #ifndef EXECNODES_H
15 #define EXECNODES_H
16
17 #include "access/tupconvert.h"
18 #include "executor/instrument.h"
19 #include "lib/pairingheap.h"
20 #include "nodes/params.h"
21 #include "nodes/plannodes.h"
22 #include "partitioning/partdefs.h"
23 #include "utils/hsearch.h"
24 #include "utils/queryenvironment.h"
25 #include "utils/reltrigger.h"
26 #include "utils/sharedtuplestore.h"
SanitizeOptions(const std::string & dbname,const Options & src)27 #include "utils/snapshot.h"
28 #include "utils/sortsupport.h"
29 #include "utils/tuplestore.h"
30 #include "utils/tuplesort.h"
31 #include "nodes/tidbitmap.h"
32 #include "storage/condition_variable.h"
33
34
35 struct PlanState; /* forward references in this file */
36 struct PartitionRoutingInfo;
37 struct ParallelHashJoinState;
38 struct ExecRowMark;
39 struct ExprState;
40 struct ExprContext;
41 struct RangeTblEntry; /* avoid including parsenodes.h here */
42 struct ExprEvalStep; /* avoid including execExpr.h everywhere */
43 struct CopyMultiInsertBuffer;
44
45
46 /* ----------------
47 * ExprState node
48 *
49 * ExprState is the top-level node for expression evaluation.
50 * It contains instructions (in ->steps) to evaluate the expression.
51 * ----------------
52 */
53 typedef Datum (*ExprStateEvalFunc) (struct ExprState *expression,
54 struct ExprContext *econtext,
55 bool *isNull);
56
57 /* Bits in ExprState->flags (see also execExpr.h for private flag bits): */
58 /* expression is for use with ExecQual() */
59 #define EEO_FLAG_IS_QUAL (1 << 0)
60
61 typedef struct ExprState
62 {
63 Node tag;
64
65 uint8 flags; /* bitmask of EEO_FLAG_* bits, see above */
66
67 /*
68 * Storage for result value of a scalar expression, or for individual
69 * column results within expressions built by ExecBuildProjectionInfo().
70 */
71 #define FIELDNO_EXPRSTATE_RESNULL 2
72 bool resnull;
73 #define FIELDNO_EXPRSTATE_RESVALUE 3
74 Datum resvalue;
75
76 /*
77 * If projecting a tuple result, this slot holds the result; else NULL.
78 */
79 #define FIELDNO_EXPRSTATE_RESULTSLOT 4
80 TupleTableSlot *resultslot;
81
82 /*
83 * Instructions to compute expression's return value.
84 */
85 struct ExprEvalStep *steps;
86
87 /*
88 * Function that actually evaluates the expression. This can be set to
89 * different values depending on the complexity of the expression.
90 */
91 ExprStateEvalFunc evalfunc;
92
93 /* original expression tree, for debugging only */
94 Expr *expr;
95
96 /* private state for an evalfunc */
97 void *evalfunc_private;
98
99 /*
100 * XXX: following fields only needed during "compilation" (ExecInitExpr);
101 * could be thrown away afterwards.
102 */
103
104 int steps_len; /* number of steps currently */
105 int steps_alloc; /* allocated length of steps array */
106
107 struct PlanState *parent; /* parent PlanState node, if any */
108 ParamListInfo ext_params; /* for compiling PARAM_EXTERN nodes */
109
110 Datum *innermost_caseval;
111 bool *innermost_casenull;
112
113 Datum *innermost_domainval;
114 bool *innermost_domainnull;
115 } ExprState;
116
117
118 /* ----------------
119 * IndexInfo information
120 *
121 * this struct holds the information needed to construct new index
122 * entries for a particular index. Used for both index_build and
123 * retail creation of index entries.
124 *
125 * NumIndexAttrs total number of columns in this index
126 * NumIndexKeyAttrs number of key columns in index
127 * IndexAttrNumbers underlying-rel attribute numbers used as keys
128 * (zeroes indicate expressions). It also contains
129 * info about included columns.
130 * Expressions expr trees for expression entries, or NIL if none
131 * ExpressionsState exec state for expressions, or NIL if none
132 * Predicate partial-index predicate, or NIL if none
133 * PredicateState exec state for predicate, or NIL if none
134 * ExclusionOps Per-column exclusion operators, or NULL if none
135 * ExclusionProcs Underlying function OIDs for ExclusionOps
136 * ExclusionStrats Opclass strategy numbers for ExclusionOps
137 * UniqueOps These are like Exclusion*, but for unique indexes
138 * UniqueProcs
139 * UniqueStrats
140 * Unique is it a unique index?
141 * ReadyForInserts is it valid for inserts?
142 * Concurrent are we doing a concurrent index build?
143 * BrokenHotChain did we detect any broken HOT chains?
144 * ParallelWorkers # of workers requested (excludes leader)
145 * Am Oid of index AM
146 * AmCache private cache area for index AM
147 * Context memory context holding this IndexInfo
148 *
149 * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only
150 * during index build; they're conventionally zeroed otherwise.
151 * ----------------
152 */
153 typedef struct IndexInfo
154 {
155 NodeTag type;
156 int ii_NumIndexAttrs; /* total number of columns in index */
157 int ii_NumIndexKeyAttrs; /* number of key columns in index */
158 AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS];
159 List *ii_Expressions; /* list of Expr */
160 List *ii_ExpressionsState; /* list of ExprState */
161 List *ii_Predicate; /* list of Expr */
162 ExprState *ii_PredicateState;
163 Oid *ii_ExclusionOps; /* array with one entry per column */
164 Oid *ii_ExclusionProcs; /* array with one entry per column */
165 uint16 *ii_ExclusionStrats; /* array with one entry per column */
166 Oid *ii_UniqueOps; /* array with one entry per column */
167 Oid *ii_UniqueProcs; /* array with one entry per column */
168 uint16 *ii_UniqueStrats; /* array with one entry per column */
169 bool ii_Unique;
170 bool ii_ReadyForInserts;
171 bool ii_Concurrent;
172 bool ii_BrokenHotChain;
173 int ii_ParallelWorkers;
174 Oid ii_Am;
175 void *ii_AmCache;
176 MemoryContext ii_Context;
177 } IndexInfo;
178
179 /* ----------------
180 * ExprContext_CB
181 *
182 * List of callbacks to be called at ExprContext shutdown.
183 * ----------------
184 */
185 typedef void (*ExprContextCallbackFunction) (Datum arg);
SanitizeOptionsByTable(const DBOptions & db_opts,const std::vector<ColumnFamilyDescriptor> & column_families)186
187 typedef struct ExprContext_CB
188 {
189 struct ExprContext_CB *next;
190 ExprContextCallbackFunction function;
191 Datum arg;
192 } ExprContext_CB;
193
194 /* ----------------
195 * ExprContext
196 *
197 * This class holds the "current context" information
198 * needed to evaluate expressions for doing tuple qualifications
199 * and tuple projections. For example, if an expression refers
ValidateOptions(const DBOptions & db_options,const std::vector<ColumnFamilyDescriptor> & column_families)200 * to an attribute in the current inner tuple then we need to know
201 * what the current inner tuple is and so we look at the expression
202 * context.
203 *
204 * There are two memory contexts associated with an ExprContext:
205 * * ecxt_per_query_memory is a query-lifespan context, typically the same
206 * context the ExprContext node itself is allocated in. This context
207 * can be used for purposes such as storing function call cache info.
208 * * ecxt_per_tuple_memory is a short-term context for expression results.
209 * As the name suggests, it will typically be reset once per tuple,
210 * before we begin to evaluate expressions for that tuple. Each
211 * ExprContext normally has its very own per-tuple memory context.
212 *
213 * CurrentMemoryContext should be set to ecxt_per_tuple_memory before
214 * calling ExecEvalExpr() --- see ExecEvalExprSwitchContext().
215 * ----------------
216 */
217 typedef struct ExprContext
218 {
219 NodeTag type;
220
221 /* Tuples that Var nodes in expression may refer to */
222 #define FIELDNO_EXPRCONTEXT_SCANTUPLE 1
223 TupleTableSlot *ecxt_scantuple;
224 #define FIELDNO_EXPRCONTEXT_INNERTUPLE 2
225 TupleTableSlot *ecxt_innertuple;
226 #define FIELDNO_EXPRCONTEXT_OUTERTUPLE 3
227 TupleTableSlot *ecxt_outertuple;
228
229 /* Memory contexts for expression evaluation --- see notes above */
230 MemoryContext ecxt_per_query_memory;
231 MemoryContext ecxt_per_tuple_memory;
232
233 /* Values to substitute for Param nodes in expression */
234 ParamExecData *ecxt_param_exec_vals; /* for PARAM_EXEC params */
235 ParamListInfo ecxt_param_list_info; /* for other param types */
236
237 /*
238 * Values to substitute for Aggref nodes in the expressions of an Agg
239 * node, or for WindowFunc nodes within a WindowAgg node.
240 */
241 #define FIELDNO_EXPRCONTEXT_AGGVALUES 8
242 Datum *ecxt_aggvalues; /* precomputed values for aggs/windowfuncs */
243 #define FIELDNO_EXPRCONTEXT_AGGNULLS 9
244 bool *ecxt_aggnulls; /* null flags for aggs/windowfuncs */
245
246 /* Value to substitute for CaseTestExpr nodes in expression */
247 #define FIELDNO_EXPRCONTEXT_CASEDATUM 10
248 Datum caseValue_datum;
249 #define FIELDNO_EXPRCONTEXT_CASENULL 11
250 bool caseValue_isNull;
251
252 /* Value to substitute for CoerceToDomainValue nodes in expression */
253 #define FIELDNO_EXPRCONTEXT_DOMAINDATUM 12
254 Datum domainValue_datum;
255 #define FIELDNO_EXPRCONTEXT_DOMAINNULL 13
256 bool domainValue_isNull;
257
258 /* Link to containing EState (NULL if a standalone ExprContext) */
259 struct EState *ecxt_estate;
260
261 /* Functions to call back when ExprContext is shut down or rescanned */
262 ExprContext_CB *ecxt_callbacks;
263 } ExprContext;
264
265 /*
266 * Set-result status used when evaluating functions potentially returning a
267 * set.
268 */
269 typedef enum
270 {
271 ExprSingleResult, /* expression does not return a set */
272 ExprMultipleResult, /* this result is an element of a set */
273 ExprEndResult /* there are no more elements in the set */
274 } ExprDoneCond;
275
276 /*
277 * Return modes for functions returning sets. Note values must be chosen
278 * as separate bits so that a bitmask can be formed to indicate supported
279 * modes. SFRM_Materialize_Random and SFRM_Materialize_Preferred are
280 * auxiliary flags about SFRM_Materialize mode, rather than separate modes.
281 */
282 typedef enum
283 {
284 SFRM_ValuePerCall = 0x01, /* one value returned per call */
285 SFRM_Materialize = 0x02, /* result set instantiated in Tuplestore */
286 SFRM_Materialize_Random = 0x04, /* Tuplestore needs randomAccess */
287 SFRM_Materialize_Preferred = 0x08 /* caller prefers Tuplestore */
288 } SetFunctionReturnMode;
289
290 /*
291 * When calling a function that might return a set (multiple rows),
292 * a node of this type is passed as fcinfo->resultinfo to allow
293 * return status to be passed back. A function returning set should
294 * raise an error if no such resultinfo is provided.
295 */
296 typedef struct ReturnSetInfo
297 {
298 NodeTag type;
299 /* values set by caller: */
300 ExprContext *econtext; /* context function is being called in */
301 TupleDesc expectedDesc; /* tuple descriptor expected by caller */
302 int allowedModes; /* bitmask: return modes caller can handle */
303 /* result status from function (but pre-initialized by caller): */
CreateAndNewDirectory(Env * env,const std::string & dirname,std::unique_ptr<Directory> * directory)304 SetFunctionReturnMode returnMode; /* actual return mode */
305 ExprDoneCond isDone; /* status for ValuePerCall mode */
306 /* fields filled by function in Materialize return mode: */
307 Tuplestorestate *setResult; /* holds the complete returned tuple set */
308 TupleDesc setDesc; /* actual descriptor for returned tuples */
309 } ReturnSetInfo;
310
311 /* ----------------
312 * ProjectionInfo node information
313 *
314 * This is all the information needed to perform projections ---
315 * that is, form new tuples by evaluation of targetlist expressions.
316 * Nodes which need to do projections create one of these.
317 *
318 * The target tuple slot is kept in ProjectionInfo->pi_state.resultslot.
319 * ExecProject() evaluates the tlist, forms a tuple, and stores it
SetDirectories(Env * env,const std::string & dbname,const std::string & wal_dir,const std::vector<DbPath> & data_paths)320 * in the given slot. Note that the result will be a "virtual" tuple
321 * unless ExecMaterializeSlot() is then called to force it to be
322 * converted to a physical tuple. The slot must have a tupledesc
323 * that matches the output of the tlist!
324 * ----------------
325 */
326 typedef struct ProjectionInfo
327 {
328 NodeTag type;
329 /* instructions to evaluate projection */
330 ExprState pi_state;
331 /* expression context in which to evaluate expression */
332 ExprContext *pi_exprContext;
333 } ProjectionInfo;
334
335 /* ----------------
336 * JunkFilter
337 *
338 * This class is used to store information regarding junk attributes.
339 * A junk attribute is an attribute in a tuple that is needed only for
340 * storing intermediate information in the executor, and does not belong
341 * in emitted tuples. For example, when we do an UPDATE query,
342 * the planner adds a "junk" entry to the targetlist so that the tuples
343 * returned to ExecutePlan() contain an extra attribute: the ctid of
344 * the tuple to be updated. This is needed to do the update, but we
345 * don't want the ctid to be part of the stored new tuple! So, we
346 * apply a "junk filter" to remove the junk attributes and form the
347 * real output tuple. The junkfilter code also provides routines to
348 * extract the values of the junk attribute(s) from the input tuple.
349 *
350 * targetList: the original target list (including junk attributes).
351 * cleanTupType: the tuple descriptor for the "clean" tuple (with
Recover(const std::vector<ColumnFamilyDescriptor> & column_families,bool read_only,bool error_if_log_file_exist,bool error_if_data_exists_in_logs,uint64_t * recovered_seq)352 * junk attributes removed).
353 * cleanMap: A map with the correspondence between the non-junk
354 * attribute numbers of the "original" tuple and the
355 * attribute numbers of the "clean" tuple.
356 * resultSlot: tuple slot used to hold cleaned tuple.
357 * junkAttNo: not used by junkfilter code. Can be used by caller
358 * to remember the attno of a specific junk attribute
359 * (nodeModifyTable.c keeps the "ctid" or "wholerow"
360 * attno here).
361 * ----------------
362 */
363 typedef struct JunkFilter
364 {
365 NodeTag type;
366 List *jf_targetList;
367 TupleDesc jf_cleanTupType;
368 AttrNumber *jf_cleanMap;
369 TupleTableSlot *jf_resultSlot;
370 AttrNumber jf_junkAttNo;
371 } JunkFilter;
372
373 /*
374 * OnConflictSetState
375 *
376 * Executor state of an ON CONFLICT DO UPDATE operation.
377 */
378 typedef struct OnConflictSetState
379 {
380 NodeTag type;
381
382 TupleTableSlot *oc_Existing; /* slot to store existing target tuple in */
383 TupleTableSlot *oc_ProjSlot; /* CONFLICT ... SET ... projection target */
384 ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */
385 ExprState *oc_WhereClause; /* state for the WHERE clause */
386 } OnConflictSetState;
387
388 /*
389 * ResultRelInfo
390 *
391 * Whenever we update an existing relation, we have to update indexes on the
392 * relation, and perhaps also fire triggers. ResultRelInfo holds all the
393 * information needed about a result relation, including indexes.
394 *
395 * Normally, a ResultRelInfo refers to a table that is in the query's range
396 * table; then ri_RangeTableIndex is the RT index and ri_RelationDesc is
397 * just a copy of the relevant es_relations[] entry. However, in some
398 * situations we create ResultRelInfos for relations that are not in the
399 * range table, namely for targets of tuple routing in a partitioned table,
400 * and when firing triggers in tables other than the target tables (See
401 * ExecGetTriggerResultRel). In these situations, ri_RangeTableIndex is 0
402 * and ri_RelationDesc is a separately-opened relcache pointer that needs to
403 * be separately closed.
404 */
405 typedef struct ResultRelInfo
406 {
407 NodeTag type;
408
409 /* result relation's range table index, or 0 if not in range table */
410 Index ri_RangeTableIndex;
411
412 /* relation descriptor for result relation */
413 Relation ri_RelationDesc;
414
415 /* # of indices existing on result relation */
416 int ri_NumIndices;
417
418 /* array of relation descriptors for indices */
419 RelationPtr ri_IndexRelationDescs;
420
421 /* array of key/attr info for indices */
422 IndexInfo **ri_IndexRelationInfo;
423
424 /* triggers to be fired, if any */
425 TriggerDesc *ri_TrigDesc;
426
427 /* cached lookup info for trigger functions */
428 FmgrInfo *ri_TrigFunctions;
429
430 /* array of trigger WHEN expr states */
431 ExprState **ri_TrigWhenExprs;
432
433 /* optional runtime measurements for triggers */
434 Instrumentation *ri_TrigInstrument;
435
436 /* On-demand created slots for triggers / returning processing */
437 TupleTableSlot *ri_ReturningSlot; /* for trigger output tuples */
438 TupleTableSlot *ri_TrigOldSlot; /* for a trigger's old tuple */
439 TupleTableSlot *ri_TrigNewSlot; /* for a trigger's new tuple */
440
441 /* FDW callback functions, if foreign table */
442 struct FdwRoutine *ri_FdwRoutine;
443
444 /* available to save private state of FDW */
445 void *ri_FdwState;
446
447 /* true when modifying foreign table directly */
448 bool ri_usesFdwDirectModify;
449
450 /* list of WithCheckOption's to be checked */
451 List *ri_WithCheckOptions;
452
453 /* list of WithCheckOption expr states */
454 List *ri_WithCheckOptionExprs;
455
456 /* array of constraint-checking expr states */
457 ExprState **ri_ConstraintExprs;
458
459 /* array of stored generated columns expr states */
460 ExprState **ri_GeneratedExprs;
461
462 /* for removing junk attributes from tuples */
463 JunkFilter *ri_junkFilter;
464
465 /* list of RETURNING expressions */
466 List *ri_returningList;
467
468 /* for computing a RETURNING list */
469 ProjectionInfo *ri_projectReturning;
470
471 /* list of arbiter indexes to use to check conflicts */
472 List *ri_onConflictArbiterIndexes;
473
474 /* ON CONFLICT evaluation state */
475 OnConflictSetState *ri_onConflict;
476
477 /* partition check expression */
478 List *ri_PartitionCheck;
479
480 /* partition check expression state */
481 ExprState *ri_PartitionCheckExpr;
482
483 /*
484 * RootResultRelInfo gives the target relation mentioned in the query, if
485 * it's a partitioned table. It is not set if the target relation
486 * mentioned in the query is an inherited table, nor when tuple routing is
487 * not needed.
488 */
489 struct ResultRelInfo *ri_RootResultRelInfo;
490
491 /* Additional information specific to partition tuple routing */
492 struct PartitionRoutingInfo *ri_PartitionInfo;
493
494 /* For use by copy.c when performing multi-inserts */
495 struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer;
496 } ResultRelInfo;
497
498 /* ----------------
499 * EState information
500 *
501 * Master working state for an Executor invocation
502 * ----------------
503 */
504 typedef struct EState
505 {
506 NodeTag type;
507
508 /* Basic state for all query types: */
509 ScanDirection es_direction; /* current scan direction */
510 Snapshot es_snapshot; /* time qual to use */
511 Snapshot es_crosscheck_snapshot; /* crosscheck time qual for RI */
512 List *es_range_table; /* List of RangeTblEntry */
513 struct RangeTblEntry **es_range_table_array; /* equivalent array */
514 Index es_range_table_size; /* size of the range table arrays */
515 Relation *es_relations; /* Array of per-range-table-entry Relation
516 * pointers, or NULL if not yet opened */
517 struct ExecRowMark **es_rowmarks; /* Array of per-range-table-entry
518 * ExecRowMarks, or NULL if none */
519 PlannedStmt *es_plannedstmt; /* link to top of plan tree */
520 const char *es_sourceText; /* Source text from QueryDesc */
521
522 JunkFilter *es_junkFilter; /* top-level junk filter, if any */
523
524 /* If query can insert/delete tuples, the command ID to mark them with */
525 CommandId es_output_cid;
526
527 /* Info about target table(s) for insert/update/delete queries: */
528 ResultRelInfo *es_result_relations; /* array of ResultRelInfos */
529 int es_num_result_relations; /* length of array */
530 ResultRelInfo *es_result_relation_info; /* currently active array elt */
531
532 /*
533 * Info about the partition root table(s) for insert/update/delete queries
534 * targeting partitioned tables. Only leaf partitions are mentioned in
535 * es_result_relations, but we need access to the roots for firing
536 * triggers and for runtime tuple routing.
537 */
538 ResultRelInfo *es_root_result_relations; /* array of ResultRelInfos */
539 int es_num_root_result_relations; /* length of the array */
540 PartitionDirectory es_partition_directory; /* for PartitionDesc lookup */
541
542 /*
543 * The following list contains ResultRelInfos created by the tuple routing
544 * code for partitions that don't already have one.
545 */
546 List *es_tuple_routing_result_relations;
547
548 /* Stuff used for firing triggers: */
549 List *es_trig_target_relations; /* trigger-only ResultRelInfos */
550
551 /* Parameter info: */
552 ParamListInfo es_param_list_info; /* values of external params */
553 ParamExecData *es_param_exec_vals; /* values of internal params */
554
555 QueryEnvironment *es_queryEnv; /* query environment */
556
557 /* Other working state: */
558 MemoryContext es_query_cxt; /* per-query context in which EState lives */
559
560 List *es_tupleTable; /* List of TupleTableSlots */
561
562 uint64 es_processed; /* # of tuples processed */
563
564 int es_top_eflags; /* eflags passed to ExecutorStart */
565 int es_instrument; /* OR of InstrumentOption flags */
566 bool es_finished; /* true when ExecutorFinish is done */
567
568 List *es_exprcontexts; /* List of ExprContexts within EState */
569
570 List *es_subplanstates; /* List of PlanState for SubPlans */
571
572 List *es_auxmodifytables; /* List of secondary ModifyTableStates */
573
574 /*
575 * this ExprContext is for per-output-tuple operations, such as constraint
576 * checks and index-value computations. It will be reset for each output
577 * tuple. Note that it will be created only if needed.
578 */
579 ExprContext *es_per_tuple_exprcontext;
580
581 /*
582 * If not NULL, this is an EPQState's EState. This is a field in EState
583 * both to allow EvalPlanQual aware executor nodes to detect that they
584 * need to perform EPQ related work, and to provide necessary information
585 * to do so.
586 */
587 struct EPQState *es_epq_active;
588
589 bool es_use_parallel_mode; /* can we use parallel workers? */
PersistentStatsProcessFormatVersion()590
591 /* The per-query shared memory area to use for parallel execution. */
592 struct dsa_area *es_query_dsa;
593
594 /*
595 * JIT information. es_jit_flags indicates whether JIT should be performed
596 * and with which options. es_jit is created on-demand when JITing is
597 * performed.
598 *
599 * es_jit_combined_instr is the combined, on demand allocated,
600 * instrumentation from all workers. The leader's instrumentation is kept
601 * separate, and is combined on demand by ExplainPrintJITSummary().
602 */
603 int es_jit_flags;
604 struct JitContext *es_jit;
605 struct JitInstrumentation *es_jit_worker_instr;
606 } EState;
607
608
609 /*
610 * ExecRowMark -
611 * runtime representation of FOR [KEY] UPDATE/SHARE clauses
612 *
613 * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we will have an
614 * ExecRowMark for each non-target relation in the query (except inheritance
615 * parent RTEs, which can be ignored at runtime). Virtual relations such as
616 * subqueries-in-FROM will have an ExecRowMark with relation == NULL. See
617 * PlanRowMark for details about most of the fields. In addition to fields
618 * directly derived from PlanRowMark, we store an activity flag (to denote
619 * inactive children of inheritance trees), curCtid, which is used by the
620 * WHERE CURRENT OF code, and ermExtra, which is available for use by the plan
621 * node that sources the relation (e.g., for a foreign table the FDW can use
622 * ermExtra to hold information).
623 *
624 * EState->es_rowmarks is an array of these structs, indexed by RT index,
625 * with NULLs for irrelevant RT indexes. es_rowmarks itself is NULL if
626 * there are no rowmarks.
627 */
628 typedef struct ExecRowMark
629 {
630 Relation relation; /* opened and suitably locked relation */
631 Oid relid; /* its OID (or InvalidOid, if subquery) */
632 Index rti; /* its range table index */
633 Index prti; /* parent range table index, if child */
634 Index rowmarkId; /* unique identifier for resjunk columns */
635 RowMarkType markType; /* see enum in nodes/plannodes.h */
636 LockClauseStrength strength; /* LockingClause's strength, or LCS_NONE */
637 LockWaitPolicy waitPolicy; /* NOWAIT and SKIP LOCKED */
638 bool ermActive; /* is this mark relevant for current tuple? */
639 ItemPointerData curCtid; /* ctid of currently locked tuple, if any */
640 void *ermExtra; /* available for use by relation source node */
641 } ExecRowMark;
642
643 /*
644 * ExecAuxRowMark -
645 * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
646 *
647 * Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
648 * deal with. In addition to a pointer to the related entry in es_rowmarks,
649 * this struct carries the column number(s) of the resjunk columns associated
650 * with the rowmark (see comments for PlanRowMark for more detail). In the
651 * case of ModifyTable, there has to be a separate ExecAuxRowMark list for
652 * each child plan, because the resjunk columns could be at different physical
653 * column positions in different subplans.
654 */
InitPersistStatsColumnFamily()655 typedef struct ExecAuxRowMark
656 {
657 ExecRowMark *rowmark; /* related entry in es_rowmarks */
658 AttrNumber ctidAttNo; /* resno of ctid junk attribute, if any */
659 AttrNumber toidAttNo; /* resno of tableoid junk attribute, if any */
660 AttrNumber wholeAttNo; /* resno of whole-row junk attribute, if any */
661 } ExecAuxRowMark;
662
663
664 /* ----------------------------------------------------------------
665 * Tuple Hash Tables
666 *
667 * All-in-memory tuple hash tables are used for a number of purposes.
668 *
669 * Note: tab_hash_funcs are for the key datatype(s) stored in the table,
670 * and tab_eq_funcs are non-cross-type equality operators for those types.
671 * Normally these are the only functions used, but FindTupleHashEntry()
672 * supports searching a hashtable using cross-data-type hashing. For that,
673 * the caller must supply hash functions for the LHS datatype as well as
674 * the cross-type equality operators to use. in_hash_funcs and cur_eq_func
675 * are set to point to the caller's function arrays while doing such a search.
676 * During LookupTupleHashEntry(), they point to tab_hash_funcs and
677 * tab_eq_func respectively.
678 * ----------------------------------------------------------------
679 */
680 typedef struct TupleHashEntryData *TupleHashEntry;
681 typedef struct TupleHashTableData *TupleHashTable;
682
RecoverLogFiles(const std::vector<uint64_t> & log_numbers,SequenceNumber * next_sequence,bool read_only,bool * corrupted_log_found)683 typedef struct TupleHashEntryData
684 {
685 MinimalTuple firstTuple; /* copy of first tuple in this group */
686 void *additional; /* user data */
687 uint32 status; /* hash status */
688 uint32 hash; /* hash value (cached) */
689 } TupleHashEntryData;
690
CorruptionROCKSDB_NAMESPACE::DBImpl::RecoverLogFiles::LogReporter691 /* define parameters necessary to generate the tuple hash table interface */
692 #define SH_PREFIX tuplehash
693 #define SH_ELEMENT_TYPE TupleHashEntryData
694 #define SH_KEY_TYPE MinimalTuple
695 #define SH_SCOPE extern
696 #define SH_DECLARE
697 #include "lib/simplehash.h"
698
699 typedef struct TupleHashTableData
700 {
701 tuplehash_hash *hashtab; /* underlying hash table */
702 int numCols; /* number of columns in lookup key */
703 AttrNumber *keyColIdx; /* attr numbers of key columns */
704 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
705 ExprState *tab_eq_func; /* comparator for table datatype(s) */
706 Oid *tab_collations; /* collations for hash and comparison */
707 MemoryContext tablecxt; /* memory context containing table */
708 MemoryContext tempcxt; /* context for function evaluations */
709 Size entrysize; /* actual size to make each hash entry */
710 TupleTableSlot *tableslot; /* slot for referencing table entries */
711 /* The following fields are set transiently for each table search: */
712 TupleTableSlot *inputslot; /* current input tuple's slot */
713 FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */
714 ExprState *cur_eq_func; /* comparator for input vs. table */
715 uint32 hash_iv; /* hash-function IV */
716 ExprContext *exprcontext; /* expression context */
717 } TupleHashTableData;
718
719 typedef tuplehash_iterator TupleHashIterator;
720
721 /*
722 * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan.
723 * Use ResetTupleHashIterator if the table can be frozen (in this case no
724 * explicit scan termination is needed).
725 */
726 #define InitTupleHashIterator(htable, iter) \
727 tuplehash_start_iterate(htable->hashtab, iter)
728 #define TermTupleHashIterator(iter) \
729 ((void) 0)
730 #define ResetTupleHashIterator(htable, iter) \
731 InitTupleHashIterator(htable, iter)
732 #define ScanTupleHashTable(htable, iter) \
733 tuplehash_iterate(htable->hashtab, iter)
734
735
736 /* ----------------------------------------------------------------
737 * Expression State Nodes
738 *
739 * Formerly, there was a separate executor expression state node corresponding
740 * to each node in a planned expression tree. That's no longer the case; for
741 * common expression node types, all the execution info is embedded into
742 * step(s) in a single ExprState node. But we still have a few executor state
743 * node types for selected expression node types, mostly those in which info
744 * has to be shared with other parts of the execution state tree.
745 * ----------------------------------------------------------------
746 */
747
748 /* ----------------
749 * AggrefExprState node
750 * ----------------
751 */
752 typedef struct AggrefExprState
753 {
754 NodeTag type;
755 Aggref *aggref; /* expression plan node */
756 int aggno; /* ID number for agg within its plan node */
757 } AggrefExprState;
758
759 /* ----------------
760 * WindowFuncExprState node
__anonb4a0c2ec0202() 761 * ----------------
762 */
763 typedef struct WindowFuncExprState
764 {
765 NodeTag type;
766 WindowFunc *wfunc; /* expression plan node */
767 List *args; /* ExprStates for argument expressions */
768 ExprState *aggfilter; /* FILTER expression */
769 int wfuncno; /* ID number for wfunc within its plan node */
770 } WindowFuncExprState;
771
772
773 /* ----------------
774 * SetExprState node
775 *
776 * State for evaluating a potentially set-returning expression (like FuncExpr
777 * or OpExpr). In some cases, like some of the expressions in ROWS FROM(...)
778 * the expression might not be a SRF, but nonetheless it uses the same
779 * machinery as SRFs; it will be treated as a SRF returning a single row.
780 * ----------------
781 */
782 typedef struct SetExprState
783 {
784 NodeTag type;
785 Expr *expr; /* expression plan node */
786 List *args; /* ExprStates for argument expressions */
787
788 /*
789 * In ROWS FROM, functions can be inlined, removing the FuncExpr normally
790 * inside. In such a case this is the compiled expression (which cannot
791 * return a set), which'll be evaluated using regular ExecEvalExpr().
792 */
793 ExprState *elidedFuncState;
794
795 /*
796 * Function manager's lookup info for the target function. If func.fn_oid
797 * is InvalidOid, we haven't initialized it yet (nor any of the following
798 * fields, except funcReturnsSet).
799 */
800 FmgrInfo func;
801
802 /*
803 * For a set-returning function (SRF) that returns a tuplestore, we keep
804 * the tuplestore here and dole out the result rows one at a time. The
805 * slot holds the row currently being returned.
806 */
807 Tuplestorestate *funcResultStore;
808 TupleTableSlot *funcResultSlot;
809
810 /*
811 * In some cases we need to compute a tuple descriptor for the function's
812 * output. If so, it's stored here.
813 */
814 TupleDesc funcResultDesc;
815 bool funcReturnsTuple; /* valid when funcResultDesc isn't NULL */
816
817 /*
818 * Remember whether the function is declared to return a set. This is set
819 * by ExecInitExpr, and is valid even before the FmgrInfo is set up.
820 */
821 bool funcReturnsSet;
822
823 /*
824 * setArgsValid is true when we are evaluating a set-returning function
825 * that uses value-per-call mode and we are in the middle of a call
826 * series; we want to pass the same argument values to the function again
827 * (and again, until it returns ExprEndResult). This indicates that
828 * fcinfo_data already contains valid argument data.
829 */
830 bool setArgsValid;
831
832 /*
833 * Flag to remember whether we have registered a shutdown callback for
834 * this SetExprState. We do so only if funcResultStore or setArgsValid
835 * has been set at least once (since all the callback is for is to release
836 * the tuplestore or clear setArgsValid).
837 */
838 bool shutdown_reg; /* a shutdown callback is registered */
839
840 /*
841 * Call parameter structure for the function. This has been initialized
842 * (by InitFunctionCallInfoData) if func.fn_oid is valid. It also saves
843 * argument values between calls, when setArgsValid is true.
844 */
845 FunctionCallInfo fcinfo;
846 } SetExprState;
847
848 /* ----------------
849 * SubPlanState node
850 * ----------------
851 */
852 typedef struct SubPlanState
853 {
854 NodeTag type;
855 SubPlan *subplan; /* expression plan node */
856 struct PlanState *planstate; /* subselect plan's state tree */
857 struct PlanState *parent; /* parent plan node's state tree */
858 ExprState *testexpr; /* state of combining expression */
859 List *args; /* states of argument expression(s) */
860 HeapTuple curTuple; /* copy of most recent tuple from subplan */
861 Datum curArray; /* most recent array from ARRAY() subplan */
862 /* these are used when hashing the subselect's output: */
863 TupleDesc descRight; /* subselect desc after projection */
864 ProjectionInfo *projLeft; /* for projecting lefthand exprs */
865 ProjectionInfo *projRight; /* for projecting subselect output */
866 TupleHashTable hashtable; /* hash table for no-nulls subselect rows */
867 TupleHashTable hashnulls; /* hash table for rows with null(s) */
868 bool havehashrows; /* true if hashtable is not empty */
869 bool havenullrows; /* true if hashnulls is not empty */
870 MemoryContext hashtablecxt; /* memory context containing hash tables */
871 MemoryContext hashtempcxt; /* temp memory context for hash tables */
872 ExprContext *innerecontext; /* econtext for computing inner tuples */
873 /* each of the following fields is an array of length numCols: */
874 AttrNumber *keyColIdx; /* control data for hash tables */
875 Oid *tab_eq_funcoids; /* equality func oids for table
876 * datatype(s) */
877 Oid *tab_collations; /* collations for hash and comparison */
878 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
879 FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */
880 FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
881 FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */
882 ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */
883 int numCols; /* number of columns being hashed */
884 } SubPlanState;
885
886 /* ----------------
887 * AlternativeSubPlanState node
888 * ----------------
889 */
890 typedef struct AlternativeSubPlanState
891 {
892 NodeTag type;
893 AlternativeSubPlan *subplan; /* expression plan node */
894 List *subplans; /* SubPlanStates of alternative subplans */
895 int active; /* list index of the one we're using */
896 } AlternativeSubPlanState;
897
898 /*
899 * DomainConstraintState - one item to check during CoerceToDomain
900 *
901 * Note: we consider this to be part of an ExprState tree, so we give it
902 * a name following the xxxState convention. But there's no directly
903 * associated plan-tree node.
904 */
905 typedef enum DomainConstraintType
906 {
907 DOM_CONSTRAINT_NOTNULL,
908 DOM_CONSTRAINT_CHECK
909 } DomainConstraintType;
910
911 typedef struct DomainConstraintState
912 {
913 NodeTag type;
914 DomainConstraintType constrainttype; /* constraint type */
915 char *name; /* name of constraint (for error msgs) */
916 Expr *check_expr; /* for CHECK, a boolean expression */
917 ExprState *check_exprstate; /* check_expr's eval state, or NULL */
918 } DomainConstraintState;
919
920
921 /* ----------------------------------------------------------------
922 * Executor State Trees
923 *
924 * An executing query has a PlanState tree paralleling the Plan tree
925 * that describes the plan.
926 * ----------------------------------------------------------------
927 */
928
929 /* ----------------
930 * ExecProcNodeMtd
931 *
932 * This is the method called by ExecProcNode to return the next tuple
933 * from an executor node. It returns NULL, or an empty TupleTableSlot,
934 * if no more tuples are available.
935 * ----------------
936 */
937 typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate);
938
939 /* ----------------
940 * PlanState node
941 *
942 * We never actually instantiate any PlanState nodes; this is just the common
943 * abstract superclass for all PlanState-type nodes.
944 * ----------------
945 */
946 typedef struct PlanState
947 {
948 NodeTag type;
949
950 Plan *plan; /* associated Plan node */
951
952 EState *state; /* at execution time, states of individual
953 * nodes point to one EState for the whole
954 * top-level plan */
955
956 ExecProcNodeMtd ExecProcNode; /* function to return next tuple */
957 ExecProcNodeMtd ExecProcNodeReal; /* actual function, if above is a
958 * wrapper */
959
960 Instrumentation *instrument; /* Optional runtime stats for this node */
961 WorkerInstrumentation *worker_instrument; /* per-worker instrumentation */
962
963 /* Per-worker JIT instrumentation */
964 struct SharedJitInstrumentation *worker_jit_instrument;
965
966 /*
967 * Common structural data for all Plan types. These links to subsidiary
968 * state trees parallel links in the associated plan tree (except for the
969 * subPlan list, which does not exist in the plan tree).
970 */
971 ExprState *qual; /* boolean qual condition */
972 struct PlanState *lefttree; /* input plan tree(s) */
973 struct PlanState *righttree;
974
975 List *initPlan; /* Init SubPlanState nodes (un-correlated expr
976 * subselects) */
977 List *subPlan; /* SubPlanState nodes in my expressions */
978
979 /*
980 * State for management of parameter-change-driven rescanning
981 */
982 Bitmapset *chgParam; /* set of IDs of changed Params */
983
984 /*
985 * Other run-time state needed by most if not all node types.
986 */
987 TupleDesc ps_ResultTupleDesc; /* node's return type */
988 TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
989 ExprContext *ps_ExprContext; /* node's expression-evaluation context */
990 ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */
991
992 /*
993 * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
994 * it's hard for expression compilation to optimize based on the
995 * descriptor, without encoding knowledge about all executor nodes.
996 */
997 TupleDesc scandesc;
998
999 /*
1000 * Define the slot types for inner, outer and scanslots for expression
1001 * contexts with this state as a parent. If *opsset is set, then
1002 * *opsfixed indicates whether *ops is guaranteed to be the type of slot
1003 * used. That means that every slot in the corresponding
1004 * ExprContext.ecxt_*tuple will point to a slot of that type, while
1005 * evaluating the expression. If *opsfixed is false, but *ops is set,
1006 * that indicates the most likely type of slot.
1007 *
1008 * The scan* fields are set by ExecInitScanTupleSlot(). If that's not
1009 * called, nodes can initialize the fields themselves.
1010 *
1011 * If outer/inneropsset is false, the information is inferred on-demand
1012 * using ExecGetResultSlotOps() on ->righttree/lefttree, using the
1013 * corresponding node's resultops* fields.
1014 *
1015 * The result* fields are automatically set when ExecInitResultSlot is
1016 * used (be it directly or when the slot is created by
1017 * ExecAssignScanProjectionInfo() /
1018 * ExecConditionalAssignProjectionInfo()). If no projection is necessary
1019 * ExecConditionalAssignProjectionInfo() defaults those fields to the scan
1020 * operations.
1021 */
1022 const TupleTableSlotOps *scanops;
1023 const TupleTableSlotOps *outerops;
1024 const TupleTableSlotOps *innerops;
1025 const TupleTableSlotOps *resultops;
1026 bool scanopsfixed;
1027 bool outeropsfixed;
1028 bool inneropsfixed;
1029 bool resultopsfixed;
1030 bool scanopsset;
1031 bool outeropsset;
1032 bool inneropsset;
1033 bool resultopsset;
1034 } PlanState;
1035
1036 /* ----------------
1037 * these are defined to avoid confusion problems with "left"
1038 * and "right" and "inner" and "outer". The convention is that
1039 * the "left" plan is the "outer" plan and the "right" plan is
1040 * the inner plan, but these make the code more readable.
1041 * ----------------
1042 */
1043 #define innerPlanState(node) (((PlanState *)(node))->righttree)
1044 #define outerPlanState(node) (((PlanState *)(node))->lefttree)
1045
1046 /* Macros for inline access to certain instrumentation counters */
1047 #define InstrCountTuples2(node, delta) \
1048 do { \
1049 if (((PlanState *)(node))->instrument) \
1050 ((PlanState *)(node))->instrument->ntuples2 += (delta); \
1051 } while (0)
1052 #define InstrCountFiltered1(node, delta) \
1053 do { \
1054 if (((PlanState *)(node))->instrument) \
1055 ((PlanState *)(node))->instrument->nfiltered1 += (delta); \
1056 } while(0)
1057 #define InstrCountFiltered2(node, delta) \
1058 do { \
1059 if (((PlanState *)(node))->instrument) \
1060 ((PlanState *)(node))->instrument->nfiltered2 += (delta); \
1061 } while(0)
1062
1063 /*
1064 * EPQState is state for executing an EvalPlanQual recheck on a candidate
1065 * tuples e.g. in ModifyTable or LockRows.
1066 *
1067 * To execute EPQ a separate EState is created (stored in ->recheckestate),
1068 * which shares some resources, like the rangetable, with the main query's
1069 * EState (stored in ->parentestate). The (sub-)tree of the plan that needs to
1070 * be rechecked (in ->plan), is separately initialized (into
1071 * ->recheckplanstate), but shares plan nodes with the corresponding nodes in
1072 * the main query. The scan nodes in that separate executor tree are changed
1073 * to return only the current tuple of interest for the respective
1074 * table. Those tuples are either provided by the caller (using
1075 * EvalPlanQualSlot), and/or found using the rowmark mechanism (non-locking
1076 * rowmarks by the EPQ machinery itself, locking ones by the caller).
1077 *
1078 * While the plan to be checked may be changed using EvalPlanQualSetPlan() -
1079 * e.g. so all source plans for a ModifyTable node can be processed - all such
1080 * plans need to share the same EState.
1081 */
1082 typedef struct EPQState
1083 {
1084 /* Initialized at EvalPlanQualInit() time: */
1085
1086 EState *parentestate; /* main query's EState */
1087 int epqParam; /* ID of Param to force scan node re-eval */
1088
1089 /*
1090 * Tuples to be substituted by scan nodes. They need to set up, before
1091 * calling EvalPlanQual()/EvalPlanQualNext(), into the slot returned by
1092 * EvalPlanQualSlot(scanrelid). The array is indexed by scanrelid - 1.
1093 */
1094 List *tuple_table; /* tuple table for relsubs_slot */
1095 TupleTableSlot **relsubs_slot;
1096
1097 /*
1098 * Initialized by EvalPlanQualInit(), may be changed later with
1099 * EvalPlanQualSetPlan():
1100 */
1101
1102 Plan *plan; /* plan tree to be executed */
1103 List *arowMarks; /* ExecAuxRowMarks (non-locking only) */
1104
1105
1106 /*
1107 * The original output tuple to be rechecked. Set by
1108 * EvalPlanQualSetSlot(), before EvalPlanQualNext() or EvalPlanQual() may
1109 * be called.
1110 */
1111 TupleTableSlot *origslot;
1112
1113
1114 /* Initialized or reset by EvalPlanQualBegin(): */
1115
1116 EState *recheckestate; /* EState for EPQ execution, see above */
1117
1118 /*
1119 * Rowmarks that can be fetched on-demand using
1120 * EvalPlanQualFetchRowMark(), indexed by scanrelid - 1. Only non-locking
1121 * rowmarks.
1122 */
1123 ExecAuxRowMark **relsubs_rowmark;
1124
1125 /*
RestoreAliveLogFiles(const std::vector<uint64_t> & log_numbers)1126 * True if a relation's EPQ tuple has been fetched for relation, indexed
1127 * by scanrelid - 1.
1128 */
1129 bool *relsubs_done;
1130
1131 PlanState *recheckplanstate; /* EPQ specific exec nodes, for ->plan */
1132 } EPQState;
1133
1134
1135 /* ----------------
1136 * ResultState information
1137 * ----------------
1138 */
1139 typedef struct ResultState
1140 {
1141 PlanState ps; /* its first field is NodeTag */
1142 ExprState *resconstantqual;
1143 bool rs_done; /* are we done? */
1144 bool rs_checkqual; /* do we need to check the qual? */
1145 } ResultState;
1146
1147 /* ----------------
1148 * ProjectSetState information
1149 *
1150 * Note: at least one of the "elems" will be a SetExprState; the rest are
1151 * regular ExprStates.
1152 * ----------------
1153 */
1154 typedef struct ProjectSetState
1155 {
1156 PlanState ps; /* its first field is NodeTag */
1157 Node **elems; /* array of expression states */
1158 ExprDoneCond *elemdone; /* array of per-SRF is-done states */
1159 int nelems; /* length of elemdone[] array */
1160 bool pending_srf_tuples; /* still evaluating srfs in tlist? */
1161 MemoryContext argcontext; /* context for SRF arguments */
1162 } ProjectSetState;
1163
1164 /* ----------------
1165 * ModifyTableState information
1166 * ----------------
1167 */
1168 typedef struct ModifyTableState
1169 {
1170 PlanState ps; /* its first field is NodeTag */
1171 CmdType operation; /* INSERT, UPDATE, or DELETE */
1172 bool canSetTag; /* do we set the command tag/es_processed? */
1173 bool mt_done; /* are we done? */
1174 PlanState **mt_plans; /* subplans (one per target rel) */
1175 int mt_nplans; /* number of plans in the array */
1176 int mt_whichplan; /* which one is being executed (0..n-1) */
1177 TupleTableSlot **mt_scans; /* input tuple corresponding to underlying
1178 * plans */
1179 ResultRelInfo *resultRelInfo; /* per-subplan target relations */
1180 ResultRelInfo *rootResultRelInfo; /* root target relation (partitioned
WriteLevel0TableForRecovery(int job_id,ColumnFamilyData * cfd,MemTable * mem,VersionEdit * edit)1181 * table root) */
1182 List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */
1183 EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */
1184 bool fireBSTriggers; /* do we need to fire stmt triggers? */
1185 List *mt_excludedtlist; /* the excluded pseudo relation's tlist */
1186
1187 /*
1188 * Slot for storing tuples in the root partitioned table's rowtype during
1189 * an UPDATE of a partitioned table.
1190 */
1191 TupleTableSlot *mt_root_tuple_slot;
1192
1193 /* Tuple-routing support info */
1194 struct PartitionTupleRouting *mt_partition_tuple_routing;
1195
1196 /* controls transition table population for specified operation */
1197 struct TransitionCaptureState *mt_transition_capture;
1198
1199 /* controls transition table population for INSERT...ON CONFLICT UPDATE */
1200 struct TransitionCaptureState *mt_oc_transition_capture;
1201
1202 /* Per plan map for tuple conversion from child to root */
1203 TupleConversionMap **mt_per_subplan_tupconv_maps;
1204 } ModifyTableState;
1205
1206 /* ----------------
1207 * AppendState information
1208 *
1209 * nplans how many plans are in the array
1210 * whichplan which plan is being executed (0 .. n-1), or a
1211 * special negative value. See nodeAppend.c.
1212 * prune_state details required to allow partitions to be
1213 * eliminated from the scan, or NULL if not possible.
1214 * valid_subplans for runtime pruning, valid appendplans indexes to
1215 * scan.
1216 * ----------------
1217 */
1218
1219 struct AppendState;
1220 typedef struct AppendState AppendState;
1221 struct ParallelAppendState;
1222 typedef struct ParallelAppendState ParallelAppendState;
1223 struct PartitionPruneState;
1224
1225 struct AppendState
1226 {
1227 PlanState ps; /* its first field is NodeTag */
1228 PlanState **appendplans; /* array of PlanStates for my inputs */
1229 int as_nplans;
1230 int as_whichplan;
1231 int as_first_partial_plan; /* Index of 'appendplans' containing
1232 * the first partial plan */
1233 ParallelAppendState *as_pstate; /* parallel coordination info */
1234 Size pstate_len; /* size of parallel coordination info */
1235 struct PartitionPruneState *as_prune_state;
1236 Bitmapset *as_valid_subplans;
1237 bool (*choose_next_subplan) (AppendState *);
1238 };
1239
1240 /* ----------------
1241 * MergeAppendState information
1242 *
1243 * nplans how many plans are in the array
1244 * nkeys number of sort key columns
1245 * sortkeys sort keys in SortSupport representation
1246 * slots current output tuple of each subplan
1247 * heap heap of active tuples
1248 * initialized true if we have fetched first tuple from each subplan
1249 * noopscan true if partition pruning proved that none of the
1250 * mergeplans can contain a record to satisfy this query.
1251 * prune_state details required to allow partitions to be
1252 * eliminated from the scan, or NULL if not possible.
1253 * valid_subplans for runtime pruning, valid mergeplans indexes to
1254 * scan.
1255 * ----------------
1256 */
1257 typedef struct MergeAppendState
1258 {
1259 PlanState ps; /* its first field is NodeTag */
1260 PlanState **mergeplans; /* array of PlanStates for my inputs */
1261 int ms_nplans;
1262 int ms_nkeys;
1263 SortSupport ms_sortkeys; /* array of length ms_nkeys */
1264 TupleTableSlot **ms_slots; /* array of length ms_nplans */
1265 struct binaryheap *ms_heap; /* binary heap of slot indices */
1266 bool ms_initialized; /* are subplans started? */
1267 bool ms_noopscan;
1268 struct PartitionPruneState *ms_prune_state;
1269 Bitmapset *ms_valid_subplans;
1270 } MergeAppendState;
1271
1272 /* ----------------
1273 * RecursiveUnionState information
1274 *
1275 * RecursiveUnionState is used for performing a recursive union.
1276 *
Open(const Options & options,const std::string & dbname,DB ** dbptr)1277 * recursing T when we're done scanning the non-recursive term
1278 * intermediate_empty T if intermediate_table is currently empty
1279 * working_table working table (to be scanned by recursive term)
1280 * intermediate_table current recursive output (next generation of WT)
1281 * ----------------
1282 */
1283 typedef struct RecursiveUnionState
1284 {
1285 PlanState ps; /* its first field is NodeTag */
1286 bool recursing;
1287 bool intermediate_empty;
1288 Tuplestorestate *working_table;
1289 Tuplestorestate *intermediate_table;
1290 /* Remaining fields are unused in UNION ALL case */
1291 Oid *eqfuncoids; /* per-grouping-field equality fns */
1292 FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
1293 MemoryContext tempContext; /* short-term context for comparisons */
1294 TupleHashTable hashtable; /* hash table for tuples already seen */
1295 MemoryContext tableContext; /* memory context containing hash table */
1296 } RecursiveUnionState;
1297
1298 /* ----------------
1299 * BitmapAndState information
1300 * ----------------
1301 */
1302 typedef struct BitmapAndState
1303 {
1304 PlanState ps; /* its first field is NodeTag */
Open(const DBOptions & db_options,const std::string & dbname,const std::vector<ColumnFamilyDescriptor> & column_families,std::vector<ColumnFamilyHandle * > * handles,DB ** dbptr)1305 PlanState **bitmapplans; /* array of PlanStates for my inputs */
1306 int nplans; /* number of input plans */
1307 } BitmapAndState;
1308
1309 /* ----------------
1310 * BitmapOrState information
1311 * ----------------
1312 */
1313 typedef struct BitmapOrState
CreateWAL(uint64_t log_file_num,uint64_t recycle_log_number,size_t preallocate_block_size,log::Writer ** new_log)1314 {
1315 PlanState ps; /* its first field is NodeTag */
1316 PlanState **bitmapplans; /* array of PlanStates for my inputs */
1317 int nplans; /* number of input plans */
1318 } BitmapOrState;
1319
1320 /* ----------------------------------------------------------------
1321 * Scan State Information
1322 * ----------------------------------------------------------------
1323 */
1324
1325 /* ----------------
1326 * ScanState information
1327 *
1328 * ScanState extends PlanState for node types that represent
1329 * scans of an underlying relation. It can also be used for nodes
1330 * that scan the output of an underlying plan node --- in that case,
1331 * only ScanTupleSlot is actually useful, and it refers to the tuple
1332 * retrieved from the subplan.
1333 *
1334 * currentRelation relation being scanned (NULL if none)
1335 * currentScanDesc current scan descriptor for scan (NULL if none)
1336 * ScanTupleSlot pointer to slot in tuple table holding scan tuple
1337 * ----------------
1338 */
1339 typedef struct ScanState
1340 {
1341 PlanState ps; /* its first field is NodeTag */
1342 Relation ss_currentRelation;
1343 struct TableScanDescData *ss_currentScanDesc;
1344 TupleTableSlot *ss_ScanTupleSlot;
1345 } ScanState;
1346
1347 /* ----------------
1348 * SeqScanState information
1349 * ----------------
1350 */
1351 typedef struct SeqScanState
1352 {
1353 ScanState ss; /* its first field is NodeTag */
1354 Size pscan_len; /* size of parallel heap scan descriptor */
Open(const DBOptions & db_options,const std::string & dbname,const std::vector<ColumnFamilyDescriptor> & column_families,std::vector<ColumnFamilyHandle * > * handles,DB ** dbptr,const bool seq_per_batch,const bool batch_per_txn)1355 } SeqScanState;
1356
1357 /* ----------------
1358 * SampleScanState information
1359 * ----------------
1360 */
1361 typedef struct SampleScanState
1362 {
1363 ScanState ss;
1364 List *args; /* expr states for TABLESAMPLE params */
1365 ExprState *repeatable; /* expr state for REPEATABLE expr */
1366 /* use struct pointer to avoid including tsmapi.h here */
1367 struct TsmRoutine *tsmroutine; /* descriptor for tablesample method */
1368 void *tsm_state; /* tablesample method can keep state here */
1369 bool use_bulkread; /* use bulkread buffer access strategy? */
1370 bool use_pagemode; /* use page-at-a-time visibility checking? */
1371 bool begun; /* false means need to call BeginSampleScan */
1372 uint32 seed; /* random seed */
1373 int64 donetuples; /* number of tuples already returned */
1374 bool haveblock; /* has a block for sampling been determined */
1375 bool done; /* exhausted all tuples? */
1376 } SampleScanState;
1377
1378 /*
1379 * These structs store information about index quals that don't have simple
1380 * constant right-hand sides. See comments for ExecIndexBuildScanKeys()
1381 * for discussion.
1382 */
1383 typedef struct
1384 {
1385 struct ScanKeyData *scan_key; /* scankey to put value into */
1386 ExprState *key_expr; /* expr to evaluate to get value */
1387 bool key_toastable; /* is expr's result a toastable datatype? */
1388 } IndexRuntimeKeyInfo;
1389
1390 typedef struct
1391 {
1392 struct ScanKeyData *scan_key; /* scankey to put value into */
1393 ExprState *array_expr; /* expr to evaluate to get array value */
1394 int next_elem; /* next array element to use */
1395 int num_elems; /* number of elems in current array value */
1396 Datum *elem_values; /* array of num_elems Datums */
1397 bool *elem_nulls; /* array of num_elems is-null flags */
1398 } IndexArrayKeyInfo;
1399
1400 /* ----------------
1401 * IndexScanState information
1402 *
1403 * indexqualorig execution state for indexqualorig expressions
1404 * indexorderbyorig execution state for indexorderbyorig expressions
1405 * ScanKeys Skey structures for index quals
1406 * NumScanKeys number of ScanKeys
1407 * OrderByKeys Skey structures for index ordering operators
1408 * NumOrderByKeys number of OrderByKeys
1409 * RuntimeKeys info about Skeys that must be evaluated at runtime
1410 * NumRuntimeKeys number of RuntimeKeys
1411 * RuntimeKeysReady true if runtime Skeys have been computed
1412 * RuntimeContext expr context for evaling runtime Skeys
1413 * RelationDesc index relation descriptor
1414 * ScanDesc index scan descriptor
1415 *
1416 * ReorderQueue tuples that need reordering due to re-check
1417 * ReachedEnd have we fetched all tuples from index already?
1418 * OrderByValues values of ORDER BY exprs of last fetched tuple
1419 * OrderByNulls null flags for OrderByValues
1420 * SortSupport for reordering ORDER BY exprs
1421 * OrderByTypByVals is the datatype of order by expression pass-by-value?
1422 * OrderByTypLens typlens of the datatypes of order by expressions
1423 * PscanLen size of parallel index scan descriptor
1424 * ----------------
1425 */
1426 typedef struct IndexScanState
1427 {
1428 ScanState ss; /* its first field is NodeTag */
1429 ExprState *indexqualorig;
1430 List *indexorderbyorig;
1431 struct ScanKeyData *iss_ScanKeys;
1432 int iss_NumScanKeys;
1433 struct ScanKeyData *iss_OrderByKeys;
1434 int iss_NumOrderByKeys;
1435 IndexRuntimeKeyInfo *iss_RuntimeKeys;
1436 int iss_NumRuntimeKeys;
1437 bool iss_RuntimeKeysReady;
1438 ExprContext *iss_RuntimeContext;
1439 Relation iss_RelationDesc;
1440 struct IndexScanDescData *iss_ScanDesc;
1441
1442 /* These are needed for re-checking ORDER BY expr ordering */
1443 pairingheap *iss_ReorderQueue;
1444 bool iss_ReachedEnd;
1445 Datum *iss_OrderByValues;
1446 bool *iss_OrderByNulls;
1447 SortSupport iss_SortSupport;
1448 bool *iss_OrderByTypByVals;
1449 int16 *iss_OrderByTypLens;
1450 Size iss_PscanLen;
1451 } IndexScanState;
1452
1453 /* ----------------
1454 * IndexOnlyScanState information
1455 *
1456 * indexqual execution state for indexqual expressions
1457 * ScanKeys Skey structures for index quals
1458 * NumScanKeys number of ScanKeys
1459 * OrderByKeys Skey structures for index ordering operators
1460 * NumOrderByKeys number of OrderByKeys
1461 * RuntimeKeys info about Skeys that must be evaluated at runtime
1462 * NumRuntimeKeys number of RuntimeKeys
1463 * RuntimeKeysReady true if runtime Skeys have been computed
1464 * RuntimeContext expr context for evaling runtime Skeys
1465 * RelationDesc index relation descriptor
1466 * ScanDesc index scan descriptor
1467 * TableSlot slot for holding tuples fetched from the table
1468 * VMBuffer buffer in use for visibility map testing, if any
1469 * PscanLen size of parallel index-only scan descriptor
1470 * ----------------
1471 */
1472 typedef struct IndexOnlyScanState
1473 {
1474 ScanState ss; /* its first field is NodeTag */
1475 ExprState *indexqual;
1476 struct ScanKeyData *ioss_ScanKeys;
1477 int ioss_NumScanKeys;
1478 struct ScanKeyData *ioss_OrderByKeys;
1479 int ioss_NumOrderByKeys;
1480 IndexRuntimeKeyInfo *ioss_RuntimeKeys;
1481 int ioss_NumRuntimeKeys;
1482 bool ioss_RuntimeKeysReady;
1483 ExprContext *ioss_RuntimeContext;
1484 Relation ioss_RelationDesc;
1485 struct IndexScanDescData *ioss_ScanDesc;
1486 TupleTableSlot *ioss_TableSlot;
1487 Buffer ioss_VMBuffer;
1488 Size ioss_PscanLen;
1489 } IndexOnlyScanState;
1490
1491 /* ----------------
1492 * BitmapIndexScanState information
1493 *
1494 * result bitmap to return output into, or NULL
1495 * ScanKeys Skey structures for index quals
1496 * NumScanKeys number of ScanKeys
1497 * RuntimeKeys info about Skeys that must be evaluated at runtime
1498 * NumRuntimeKeys number of RuntimeKeys
1499 * ArrayKeys info about Skeys that come from ScalarArrayOpExprs
1500 * NumArrayKeys number of ArrayKeys
1501 * RuntimeKeysReady true if runtime Skeys have been computed
1502 * RuntimeContext expr context for evaling runtime Skeys
1503 * RelationDesc index relation descriptor
1504 * ScanDesc index scan descriptor
1505 * ----------------
1506 */
1507 typedef struct BitmapIndexScanState
1508 {
1509 ScanState ss; /* its first field is NodeTag */
1510 TIDBitmap *biss_result;
1511 struct ScanKeyData *biss_ScanKeys;
1512 int biss_NumScanKeys;
1513 IndexRuntimeKeyInfo *biss_RuntimeKeys;
1514 int biss_NumRuntimeKeys;
1515 IndexArrayKeyInfo *biss_ArrayKeys;
1516 int biss_NumArrayKeys;
1517 bool biss_RuntimeKeysReady;
1518 ExprContext *biss_RuntimeContext;
1519 Relation biss_RelationDesc;
1520 struct IndexScanDescData *biss_ScanDesc;
1521 } BitmapIndexScanState;
1522
1523 /* ----------------
1524 * SharedBitmapState information
1525 *
1526 * BM_INITIAL TIDBitmap creation is not yet started, so first worker
1527 * to see this state will set the state to BM_INPROGRESS
1528 * and that process will be responsible for creating
1529 * TIDBitmap.
1530 * BM_INPROGRESS TIDBitmap creation is in progress; workers need to
1531 * sleep until it's finished.
1532 * BM_FINISHED TIDBitmap creation is done, so now all workers can
1533 * proceed to iterate over TIDBitmap.
1534 * ----------------
1535 */
1536 typedef enum
1537 {
1538 BM_INITIAL,
1539 BM_INPROGRESS,
1540 BM_FINISHED
1541 } SharedBitmapState;
1542
1543 /* ----------------
1544 * ParallelBitmapHeapState information
1545 * tbmiterator iterator for scanning current pages
1546 * prefetch_iterator iterator for prefetching ahead of current page
1547 * mutex mutual exclusion for the prefetching variable
1548 * and state
1549 * prefetch_pages # pages prefetch iterator is ahead of current
1550 * prefetch_target current target prefetch distance
1551 * state current state of the TIDBitmap
1552 * cv conditional wait variable
1553 * phs_snapshot_data snapshot data shared to workers
1554 * ----------------
1555 */
1556 typedef struct ParallelBitmapHeapState
1557 {
1558 dsa_pointer tbmiterator;
1559 dsa_pointer prefetch_iterator;
1560 slock_t mutex;
1561 int prefetch_pages;
1562 int prefetch_target;
1563 SharedBitmapState state;
1564 ConditionVariable cv;
1565 char phs_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
1566 } ParallelBitmapHeapState;
1567
1568 /* ----------------
1569 * BitmapHeapScanState information
1570 *
1571 * bitmapqualorig execution state for bitmapqualorig expressions
1572 * tbm bitmap obtained from child index scan(s)
1573 * tbmiterator iterator for scanning current pages
1574 * tbmres current-page data
1575 * can_skip_fetch can we potentially skip tuple fetches in this scan?
1576 * return_empty_tuples number of empty tuples to return
1577 * vmbuffer buffer for visibility-map lookups
1578 * pvmbuffer ditto, for prefetched pages
1579 * exact_pages total number of exact pages retrieved
1580 * lossy_pages total number of lossy pages retrieved
1581 * prefetch_iterator iterator for prefetching ahead of current page
1582 * prefetch_pages # pages prefetch iterator is ahead of current
1583 * prefetch_target current target prefetch distance
1584 * prefetch_maximum maximum value for prefetch_target
1585 * pscan_len size of the shared memory for parallel bitmap
1586 * initialized is node is ready to iterate
1587 * shared_tbmiterator shared iterator
1588 * shared_prefetch_iterator shared iterator for prefetching
1589 * pstate shared state for parallel bitmap scan
1590 * ----------------
1591 */
1592 typedef struct BitmapHeapScanState
1593 {
1594 ScanState ss; /* its first field is NodeTag */
1595 ExprState *bitmapqualorig;
1596 TIDBitmap *tbm;
1597 TBMIterator *tbmiterator;
1598 TBMIterateResult *tbmres;
1599 bool can_skip_fetch;
1600 int return_empty_tuples;
1601 Buffer vmbuffer;
1602 Buffer pvmbuffer;
1603 long exact_pages;
1604 long lossy_pages;
1605 TBMIterator *prefetch_iterator;
1606 int prefetch_pages;
1607 int prefetch_target;
1608 int prefetch_maximum;
1609 Size pscan_len;
1610 bool initialized;
1611 TBMSharedIterator *shared_tbmiterator;
1612 TBMSharedIterator *shared_prefetch_iterator;
1613 ParallelBitmapHeapState *pstate;
1614 } BitmapHeapScanState;
1615
1616 /* ----------------
1617 * TidScanState information
1618 *
1619 * tidexprs list of TidExpr structs (see nodeTidscan.c)
1620 * isCurrentOf scan has a CurrentOfExpr qual
1621 * NumTids number of tids in this scan
1622 * TidPtr index of currently fetched tid
1623 * TidList evaluated item pointers (array of size NumTids)
1624 * htup currently-fetched tuple, if any
1625 * ----------------
1626 */
1627 typedef struct TidScanState
1628 {
1629 ScanState ss; /* its first field is NodeTag */
1630 List *tss_tidexprs;
1631 bool tss_isCurrentOf;
1632 int tss_NumTids;
1633 int tss_TidPtr;
1634 ItemPointerData *tss_TidList;
1635 HeapTupleData tss_htup;
1636 } TidScanState;
1637
1638 /* ----------------
1639 * SubqueryScanState information
1640 *
1641 * SubqueryScanState is used for scanning a sub-query in the range table.
1642 * ScanTupleSlot references the current output tuple of the sub-query.
1643 * ----------------
1644 */
1645 typedef struct SubqueryScanState
1646 {
1647 ScanState ss; /* its first field is NodeTag */
1648 PlanState *subplan;
1649 } SubqueryScanState;
1650
1651 /* ----------------
1652 * FunctionScanState information
1653 *
1654 * Function nodes are used to scan the results of a
1655 * function appearing in FROM (typically a function returning set).
1656 *
1657 * eflags node's capability flags
1658 * ordinality is this scan WITH ORDINALITY?
1659 * simple true if we have 1 function and no ordinality
1660 * ordinal current ordinal column value
1661 * nfuncs number of functions being executed
1662 * funcstates per-function execution states (private in
1663 * nodeFunctionscan.c)
1664 * argcontext memory context to evaluate function arguments in
1665 * ----------------
1666 */
1667 struct FunctionScanPerFuncState;
1668
1669 typedef struct FunctionScanState
1670 {
1671 ScanState ss; /* its first field is NodeTag */
1672 int eflags;
1673 bool ordinality;
1674 bool simple;
1675 int64 ordinal;
1676 int nfuncs;
1677 struct FunctionScanPerFuncState *funcstates; /* array of length nfuncs */
1678 MemoryContext argcontext;
1679 } FunctionScanState;
1680
1681 /* ----------------
1682 * ValuesScanState information
1683 *
1684 * ValuesScan nodes are used to scan the results of a VALUES list
1685 *
1686 * rowcontext per-expression-list context
1687 * exprlists array of expression lists being evaluated
1688 * exprstatelists array of expression state lists, for SubPlans only
1689 * array_len size of above arrays
1690 * curr_idx current array index (0-based)
1691 *
1692 * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection
1693 * expressions attached to the node. We create a second ExprContext,
1694 * rowcontext, in which to build the executor expression state for each
1695 * Values sublist. Resetting this context lets us get rid of expression
1696 * state for each row, avoiding major memory leakage over a long values list.
1697 * However, that doesn't work for sublists containing SubPlans, because a
1698 * SubPlan has to be connected up to the outer plan tree to work properly.
1699 * Therefore, for only those sublists containing SubPlans, we do expression
1700 * state construction at executor start, and store those pointers in
1701 * exprstatelists[]. NULL entries in that array correspond to simple
1702 * subexpressions that are handled as described above.
1703 * ----------------
1704 */
1705 typedef struct ValuesScanState
1706 {
1707 ScanState ss; /* its first field is NodeTag */
1708 ExprContext *rowcontext;
1709 List **exprlists;
1710 int array_len;
1711 int curr_idx;
1712 /* in back branches, put this at the end to avoid ABI break: */
1713 List **exprstatelists;
1714 } ValuesScanState;
1715
1716 /* ----------------
1717 * TableFuncScanState node
1718 *
1719 * Used in table-expression functions like XMLTABLE.
1720 * ----------------
1721 */
1722 typedef struct TableFuncScanState
1723 {
1724 ScanState ss; /* its first field is NodeTag */
1725 ExprState *docexpr; /* state for document expression */
1726 ExprState *rowexpr; /* state for row-generating expression */
1727 List *colexprs; /* state for column-generating expression */
1728 List *coldefexprs; /* state for column default expressions */
1729 List *ns_names; /* same as TableFunc.ns_names */
1730 List *ns_uris; /* list of states of namespace URI exprs */
1731 Bitmapset *notnulls; /* nullability flag for each output column */
1732 void *opaque; /* table builder private space */
1733 const struct TableFuncRoutine *routine; /* table builder methods */
1734 FmgrInfo *in_functions; /* input function for each column */
1735 Oid *typioparams; /* typioparam for each column */
1736 int64 ordinal; /* row number to be output next */
1737 MemoryContext perTableCxt; /* per-table context */
1738 Tuplestorestate *tupstore; /* output tuple store */
1739 } TableFuncScanState;
1740
1741 /* ----------------
1742 * CteScanState information
1743 *
1744 * CteScan nodes are used to scan a CommonTableExpr query.
1745 *
1746 * Multiple CteScan nodes can read out from the same CTE query. We use
1747 * a tuplestore to hold rows that have been read from the CTE query but
1748 * not yet consumed by all readers.
1749 * ----------------
1750 */
1751 typedef struct CteScanState
1752 {
1753 ScanState ss; /* its first field is NodeTag */
1754 int eflags; /* capability flags to pass to tuplestore */
1755 int readptr; /* index of my tuplestore read pointer */
1756 PlanState *cteplanstate; /* PlanState for the CTE query itself */
1757 /* Link to the "leader" CteScanState (possibly this same node) */
1758 struct CteScanState *leader;
1759 /* The remaining fields are only valid in the "leader" CteScanState */
1760 Tuplestorestate *cte_table; /* rows already read from the CTE query */
1761 bool eof_cte; /* reached end of CTE query? */
1762 } CteScanState;
1763
1764 /* ----------------
1765 * NamedTuplestoreScanState information
1766 *
1767 * NamedTuplestoreScan nodes are used to scan a Tuplestore created and
1768 * named prior to execution of the query. An example is a transition
1769 * table for an AFTER trigger.
1770 *
1771 * Multiple NamedTuplestoreScan nodes can read out from the same Tuplestore.
1772 * ----------------
1773 */
1774 typedef struct NamedTuplestoreScanState
1775 {
1776 ScanState ss; /* its first field is NodeTag */
1777 int readptr; /* index of my tuplestore read pointer */
1778 TupleDesc tupdesc; /* format of the tuples in the tuplestore */
1779 Tuplestorestate *relation; /* the rows */
1780 } NamedTuplestoreScanState;
1781
1782 /* ----------------
1783 * WorkTableScanState information
1784 *
1785 * WorkTableScan nodes are used to scan the work table created by
1786 * a RecursiveUnion node. We locate the RecursiveUnion node
1787 * during executor startup.
1788 * ----------------
1789 */
1790 typedef struct WorkTableScanState
1791 {
1792 ScanState ss; /* its first field is NodeTag */
1793 RecursiveUnionState *rustate;
1794 } WorkTableScanState;
1795
1796 /* ----------------
1797 * ForeignScanState information
1798 *
1799 * ForeignScan nodes are used to scan foreign-data tables.
1800 * ----------------
1801 */
1802 typedef struct ForeignScanState
1803 {
1804 ScanState ss; /* its first field is NodeTag */
1805 ExprState *fdw_recheck_quals; /* original quals not in ss.ps.qual */
1806 Size pscan_len; /* size of parallel coordination information */
1807 /* use struct pointer to avoid including fdwapi.h here */
1808 struct FdwRoutine *fdwroutine;
1809 void *fdw_state; /* foreign-data wrapper can keep state here */
1810 } ForeignScanState;
1811
1812 /* ----------------
1813 * CustomScanState information
1814 *
1815 * CustomScan nodes are used to execute custom code within executor.
1816 *
1817 * Core code must avoid assuming that the CustomScanState is only as large as
1818 * the structure declared here; providers are allowed to make it the first
1819 * element in a larger structure, and typically would need to do so. The
1820 * struct is actually allocated by the CreateCustomScanState method associated
1821 * with the plan node. Any additional fields can be initialized there, or in
1822 * the BeginCustomScan method.
1823 * ----------------
1824 */
1825 struct CustomExecMethods;
1826
1827 typedef struct CustomScanState
1828 {
1829 ScanState ss;
1830 uint32 flags; /* mask of CUSTOMPATH_* flags, see
1831 * nodes/extensible.h */
1832 List *custom_ps; /* list of child PlanState nodes, if any */
1833 Size pscan_len; /* size of parallel coordination information */
1834 const struct CustomExecMethods *methods;
1835 } CustomScanState;
1836
1837 /* ----------------------------------------------------------------
1838 * Join State Information
1839 * ----------------------------------------------------------------
1840 */
1841
1842 /* ----------------
1843 * JoinState information
1844 *
1845 * Superclass for state nodes of join plans.
1846 * ----------------
1847 */
1848 typedef struct JoinState
1849 {
1850 PlanState ps;
1851 JoinType jointype;
1852 bool single_match; /* True if we should skip to next outer tuple
1853 * after finding one inner match */
1854 ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */
1855 } JoinState;
1856
1857 /* ----------------
1858 * NestLoopState information
1859 *
1860 * NeedNewOuter true if need new outer tuple on next call
1861 * MatchedOuter true if found a join match for current outer tuple
1862 * NullInnerTupleSlot prepared null tuple for left outer joins
1863 * ----------------
1864 */
1865 typedef struct NestLoopState
1866 {
1867 JoinState js; /* its first field is NodeTag */
1868 bool nl_NeedNewOuter;
1869 bool nl_MatchedOuter;
1870 TupleTableSlot *nl_NullInnerTupleSlot;
1871 } NestLoopState;
1872
1873 /* ----------------
1874 * MergeJoinState information
1875 *
1876 * NumClauses number of mergejoinable join clauses
1877 * Clauses info for each mergejoinable clause
1878 * JoinState current state of ExecMergeJoin state machine
1879 * SkipMarkRestore true if we may skip Mark and Restore operations
1880 * ExtraMarks true to issue extra Mark operations on inner scan
1881 * ConstFalseJoin true if we have a constant-false joinqual
1882 * FillOuter true if should emit unjoined outer tuples anyway
1883 * FillInner true if should emit unjoined inner tuples anyway
1884 * MatchedOuter true if found a join match for current outer tuple
1885 * MatchedInner true if found a join match for current inner tuple
1886 * OuterTupleSlot slot in tuple table for cur outer tuple
1887 * InnerTupleSlot slot in tuple table for cur inner tuple
1888 * MarkedTupleSlot slot in tuple table for marked tuple
1889 * NullOuterTupleSlot prepared null tuple for right outer joins
1890 * NullInnerTupleSlot prepared null tuple for left outer joins
1891 * OuterEContext workspace for computing outer tuple's join values
1892 * InnerEContext workspace for computing inner tuple's join values
1893 * ----------------
1894 */
1895 /* private in nodeMergejoin.c: */
1896 typedef struct MergeJoinClauseData *MergeJoinClause;
1897
1898 typedef struct MergeJoinState
1899 {
1900 JoinState js; /* its first field is NodeTag */
1901 int mj_NumClauses;
1902 MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */
1903 int mj_JoinState;
1904 bool mj_SkipMarkRestore;
1905 bool mj_ExtraMarks;
1906 bool mj_ConstFalseJoin;
1907 bool mj_FillOuter;
1908 bool mj_FillInner;
1909 bool mj_MatchedOuter;
1910 bool mj_MatchedInner;
1911 TupleTableSlot *mj_OuterTupleSlot;
1912 TupleTableSlot *mj_InnerTupleSlot;
1913 TupleTableSlot *mj_MarkedTupleSlot;
1914 TupleTableSlot *mj_NullOuterTupleSlot;
1915 TupleTableSlot *mj_NullInnerTupleSlot;
1916 ExprContext *mj_OuterEContext;
1917 ExprContext *mj_InnerEContext;
1918 } MergeJoinState;
1919
1920 /* ----------------
1921 * HashJoinState information
1922 *
1923 * hashclauses original form of the hashjoin condition
1924 * hj_OuterHashKeys the outer hash keys in the hashjoin condition
1925 * hj_HashOperators the join operators in the hashjoin condition
1926 * hj_HashTable hash table for the hashjoin
1927 * (NULL if table not built yet)
1928 * hj_CurHashValue hash value for current outer tuple
1929 * hj_CurBucketNo regular bucket# for current outer tuple
1930 * hj_CurSkewBucketNo skew bucket# for current outer tuple
1931 * hj_CurTuple last inner tuple matched to current outer
1932 * tuple, or NULL if starting search
1933 * (hj_CurXXX variables are undefined if
1934 * OuterTupleSlot is empty!)
1935 * hj_OuterTupleSlot tuple slot for outer tuples
1936 * hj_HashTupleSlot tuple slot for inner (hashed) tuples
1937 * hj_NullOuterTupleSlot prepared null tuple for right/full outer joins
1938 * hj_NullInnerTupleSlot prepared null tuple for left/full outer joins
1939 * hj_FirstOuterTupleSlot first tuple retrieved from outer plan
1940 * hj_JoinState current state of ExecHashJoin state machine
1941 * hj_MatchedOuter true if found a join match for current outer
1942 * hj_OuterNotEmpty true if outer relation known not empty
1943 * ----------------
1944 */
1945
1946 /* these structs are defined in executor/hashjoin.h: */
1947 typedef struct HashJoinTupleData *HashJoinTuple;
1948 typedef struct HashJoinTableData *HashJoinTable;
1949
1950 typedef struct HashJoinState
1951 {
1952 JoinState js; /* its first field is NodeTag */
1953 ExprState *hashclauses;
1954 List *hj_OuterHashKeys; /* list of ExprState nodes */
1955 List *hj_HashOperators; /* list of operator OIDs */
1956 List *hj_Collations;
1957 HashJoinTable hj_HashTable;
1958 uint32 hj_CurHashValue;
1959 int hj_CurBucketNo;
1960 int hj_CurSkewBucketNo;
1961 HashJoinTuple hj_CurTuple;
1962 TupleTableSlot *hj_OuterTupleSlot;
1963 TupleTableSlot *hj_HashTupleSlot;
1964 TupleTableSlot *hj_NullOuterTupleSlot;
1965 TupleTableSlot *hj_NullInnerTupleSlot;
1966 TupleTableSlot *hj_FirstOuterTupleSlot;
1967 int hj_JoinState;
1968 bool hj_MatchedOuter;
1969 bool hj_OuterNotEmpty;
1970 } HashJoinState;
1971
1972
1973 /* ----------------------------------------------------------------
1974 * Materialization State Information
1975 * ----------------------------------------------------------------
1976 */
1977
1978 /* ----------------
1979 * MaterialState information
1980 *
1981 * materialize nodes are used to materialize the results
1982 * of a subplan into a temporary file.
1983 *
1984 * ss.ss_ScanTupleSlot refers to output of underlying plan.
1985 * ----------------
1986 */
1987 typedef struct MaterialState
1988 {
1989 ScanState ss; /* its first field is NodeTag */
1990 int eflags; /* capability flags to pass to tuplestore */
1991 bool eof_underlying; /* reached end of underlying plan? */
1992 Tuplestorestate *tuplestorestate;
1993 } MaterialState;
1994
1995 /* ----------------
1996 * Shared memory container for per-worker sort information
1997 * ----------------
1998 */
1999 typedef struct SharedSortInfo
2000 {
2001 int num_workers;
2002 TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
2003 } SharedSortInfo;
2004
2005 /* ----------------
2006 * SortState information
2007 * ----------------
2008 */
2009 typedef struct SortState
2010 {
2011 ScanState ss; /* its first field is NodeTag */
2012 bool randomAccess; /* need random access to sort output? */
2013 bool bounded; /* is the result set bounded? */
2014 int64 bound; /* if bounded, how many tuples are needed */
2015 bool sort_Done; /* sort completed yet? */
2016 bool bounded_Done; /* value of bounded we did the sort with */
2017 int64 bound_Done; /* value of bound we did the sort with */
2018 void *tuplesortstate; /* private state of tuplesort.c */
2019 bool am_worker; /* are we a worker? */
2020 SharedSortInfo *shared_info; /* one entry per worker */
2021 } SortState;
2022
2023 /* ---------------------
2024 * GroupState information
2025 * ---------------------
2026 */
2027 typedef struct GroupState
2028 {
2029 ScanState ss; /* its first field is NodeTag */
2030 ExprState *eqfunction; /* equality function */
2031 bool grp_done; /* indicates completion of Group scan */
2032 } GroupState;
2033
2034 /* ---------------------
2035 * AggState information
2036 *
2037 * ss.ss_ScanTupleSlot refers to output of underlying plan.
2038 *
2039 * Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and
2040 * ecxt_aggnulls arrays, which hold the computed agg values for the current
2041 * input group during evaluation of an Agg node's output tuple(s). We
2042 * create a second ExprContext, tmpcontext, in which to evaluate input
2043 * expressions and run the aggregate transition functions.
2044 * ---------------------
2045 */
2046 /* these structs are private in nodeAgg.c: */
2047 typedef struct AggStatePerAggData *AggStatePerAgg;
2048 typedef struct AggStatePerTransData *AggStatePerTrans;
2049 typedef struct AggStatePerGroupData *AggStatePerGroup;
2050 typedef struct AggStatePerPhaseData *AggStatePerPhase;
2051 typedef struct AggStatePerHashData *AggStatePerHash;
2052
2053 typedef struct AggState
2054 {
2055 ScanState ss; /* its first field is NodeTag */
2056 List *aggs; /* all Aggref nodes in targetlist & quals */
2057 int numaggs; /* length of list (could be zero!) */
2058 int numtrans; /* number of pertrans items */
2059 AggStrategy aggstrategy; /* strategy mode */
2060 AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
2061 AggStatePerPhase phase; /* pointer to current phase data */
2062 int numphases; /* number of phases (including phase 0) */
2063 int current_phase; /* current phase number */
2064 AggStatePerAgg peragg; /* per-Aggref information */
2065 AggStatePerTrans pertrans; /* per-Trans state information */
2066 ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
2067 ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
2068 ExprContext *tmpcontext; /* econtext for input expressions */
2069 #define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
2070 ExprContext *curaggcontext; /* currently active aggcontext */
2071 AggStatePerAgg curperagg; /* currently active aggregate, if any */
2072 #define FIELDNO_AGGSTATE_CURPERTRANS 16
2073 AggStatePerTrans curpertrans; /* currently active trans state, if any */
2074 bool input_done; /* indicates end of input */
2075 bool agg_done; /* indicates completion of Agg scan */
2076 int projected_set; /* The last projected grouping set */
2077 #define FIELDNO_AGGSTATE_CURRENT_SET 20
2078 int current_set; /* The current grouping set being evaluated */
2079 Bitmapset *grouped_cols; /* grouped cols in current projection */
2080 List *all_grouped_cols; /* list of all grouped cols in DESC order */
2081 /* These fields are for grouping set phase data */
2082 int maxsets; /* The max number of sets in any phase */
2083 AggStatePerPhase phases; /* array of all phases */
2084 Tuplesortstate *sort_in; /* sorted input to phases > 1 */
2085 Tuplesortstate *sort_out; /* input is copied here for next phase */
2086 TupleTableSlot *sort_slot; /* slot for sort results */
2087 /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
2088 AggStatePerGroup *pergroups; /* grouping set indexed array of per-group
2089 * pointers */
2090 HeapTuple grp_firstTuple; /* copy of first tuple of current group */
2091 /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
2092 bool table_filled; /* hash table filled yet? */
2093 int num_hashes;
2094 AggStatePerHash perhash; /* array of per-hashtable data */
2095 AggStatePerGroup *hash_pergroup; /* grouping set indexed array of
2096 * per-group pointers */
2097
2098 /* support for evaluation of agg input expressions: */
2099 #define FIELDNO_AGGSTATE_ALL_PERGROUPS 34
2100 AggStatePerGroup *all_pergroups; /* array of first ->pergroups, than
2101 * ->hash_pergroup */
2102 ProjectionInfo *combinedproj; /* projection machinery */
2103 } AggState;
2104
2105 /* ----------------
2106 * WindowAggState information
2107 * ----------------
2108 */
2109 /* these structs are private in nodeWindowAgg.c: */
2110 typedef struct WindowStatePerFuncData *WindowStatePerFunc;
2111 typedef struct WindowStatePerAggData *WindowStatePerAgg;
2112
2113 typedef struct WindowAggState
2114 {
2115 ScanState ss; /* its first field is NodeTag */
2116
2117 /* these fields are filled in by ExecInitExpr: */
2118 List *funcs; /* all WindowFunc nodes in targetlist */
2119 int numfuncs; /* total number of window functions */
2120 int numaggs; /* number that are plain aggregates */
2121
2122 WindowStatePerFunc perfunc; /* per-window-function information */
2123 WindowStatePerAgg peragg; /* per-plain-aggregate information */
2124 ExprState *partEqfunction; /* equality funcs for partition columns */
2125 ExprState *ordEqfunction; /* equality funcs for ordering columns */
2126 Tuplestorestate *buffer; /* stores rows of current partition */
2127 int current_ptr; /* read pointer # for current row */
2128 int framehead_ptr; /* read pointer # for frame head, if used */
2129 int frametail_ptr; /* read pointer # for frame tail, if used */
2130 int grouptail_ptr; /* read pointer # for group tail, if used */
2131 int64 spooled_rows; /* total # of rows in buffer */
2132 int64 currentpos; /* position of current row in partition */
2133 int64 frameheadpos; /* current frame head position */
2134 int64 frametailpos; /* current frame tail position (frame end+1) */
2135 /* use struct pointer to avoid including windowapi.h here */
2136 struct WindowObjectData *agg_winobj; /* winobj for aggregate fetches */
2137 int64 aggregatedbase; /* start row for current aggregates */
2138 int64 aggregatedupto; /* rows before this one are aggregated */
2139
2140 int frameOptions; /* frame_clause options, see WindowDef */
2141 ExprState *startOffset; /* expression for starting bound offset */
2142 ExprState *endOffset; /* expression for ending bound offset */
2143 Datum startOffsetValue; /* result of startOffset evaluation */
2144 Datum endOffsetValue; /* result of endOffset evaluation */
2145
2146 /* these fields are used with RANGE offset PRECEDING/FOLLOWING: */
2147 FmgrInfo startInRangeFunc; /* in_range function for startOffset */
2148 FmgrInfo endInRangeFunc; /* in_range function for endOffset */
2149 Oid inRangeColl; /* collation for in_range tests */
2150 bool inRangeAsc; /* use ASC sort order for in_range tests? */
2151 bool inRangeNullsFirst; /* nulls sort first for in_range tests? */
2152
2153 /* these fields are used in GROUPS mode: */
2154 int64 currentgroup; /* peer group # of current row in partition */
2155 int64 frameheadgroup; /* peer group # of frame head row */
2156 int64 frametailgroup; /* peer group # of frame tail row */
2157 int64 groupheadpos; /* current row's peer group head position */
2158 int64 grouptailpos; /* " " " " tail position (group end+1) */
2159
2160 MemoryContext partcontext; /* context for partition-lifespan data */
2161 MemoryContext aggcontext; /* shared context for aggregate working data */
2162 MemoryContext curaggcontext; /* current aggregate's working data */
2163 ExprContext *tmpcontext; /* short-term evaluation context */
2164
2165 bool all_first; /* true if the scan is starting */
2166 bool all_done; /* true if the scan is finished */
2167 bool partition_spooled; /* true if all tuples in current partition
2168 * have been spooled into tuplestore */
2169 bool more_partitions; /* true if there's more partitions after
2170 * this one */
2171 bool framehead_valid; /* true if frameheadpos is known up to
2172 * date for current row */
2173 bool frametail_valid; /* true if frametailpos is known up to
2174 * date for current row */
2175 bool grouptail_valid; /* true if grouptailpos is known up to
2176 * date for current row */
2177
2178 TupleTableSlot *first_part_slot; /* first tuple of current or next
2179 * partition */
2180 TupleTableSlot *framehead_slot; /* first tuple of current frame */
2181 TupleTableSlot *frametail_slot; /* first tuple after current frame */
2182
2183 /* temporary slots for tuples fetched back from tuplestore */
2184 TupleTableSlot *agg_row_slot;
2185 TupleTableSlot *temp_slot_1;
2186 TupleTableSlot *temp_slot_2;
2187 } WindowAggState;
2188
2189 /* ----------------
2190 * UniqueState information
2191 *
2192 * Unique nodes are used "on top of" sort nodes to discard
2193 * duplicate tuples returned from the sort phase. Basically
2194 * all it does is compare the current tuple from the subplan
2195 * with the previously fetched tuple (stored in its result slot).
2196 * If the two are identical in all interesting fields, then
2197 * we just fetch another tuple from the sort and try again.
2198 * ----------------
2199 */
2200 typedef struct UniqueState
2201 {
2202 PlanState ps; /* its first field is NodeTag */
2203 ExprState *eqfunction; /* tuple equality qual */
2204 } UniqueState;
2205
2206 /* ----------------
2207 * GatherState information
2208 *
2209 * Gather nodes launch 1 or more parallel workers, run a subplan
2210 * in those workers, and collect the results.
2211 * ----------------
2212 */
2213 typedef struct GatherState
2214 {
2215 PlanState ps; /* its first field is NodeTag */
2216 bool initialized; /* workers launched? */
2217 bool need_to_scan_locally; /* need to read from local plan? */
2218 int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */
2219 /* these fields are set up once: */
2220 TupleTableSlot *funnel_slot;
2221 struct ParallelExecutorInfo *pei;
2222 /* all remaining fields are reinitialized during a rescan: */
2223 int nworkers_launched; /* original number of workers */
2224 int nreaders; /* number of still-active workers */
2225 int nextreader; /* next one to try to read from */
2226 struct TupleQueueReader **reader; /* array with nreaders active entries */
2227 } GatherState;
2228
2229 /* ----------------
2230 * GatherMergeState information
2231 *
2232 * Gather merge nodes launch 1 or more parallel workers, run a
2233 * subplan which produces sorted output in each worker, and then
2234 * merge the results into a single sorted stream.
2235 * ----------------
2236 */
2237 struct GMReaderTupleBuffer; /* private in nodeGatherMerge.c */
2238
2239 typedef struct GatherMergeState
2240 {
2241 PlanState ps; /* its first field is NodeTag */
2242 bool initialized; /* workers launched? */
2243 bool gm_initialized; /* gather_merge_init() done? */
2244 bool need_to_scan_locally; /* need to read from local plan? */
2245 int64 tuples_needed; /* tuple bound, see ExecSetTupleBound */
2246 /* these fields are set up once: */
2247 TupleDesc tupDesc; /* descriptor for subplan result tuples */
2248 int gm_nkeys; /* number of sort columns */
2249 SortSupport gm_sortkeys; /* array of length gm_nkeys */
2250 struct ParallelExecutorInfo *pei;
2251 /* all remaining fields are reinitialized during a rescan */
2252 /* (but the arrays are not reallocated, just cleared) */
2253 int nworkers_launched; /* original number of workers */
2254 int nreaders; /* number of active workers */
2255 TupleTableSlot **gm_slots; /* array with nreaders+1 entries */
2256 struct TupleQueueReader **reader; /* array with nreaders active entries */
2257 struct GMReaderTupleBuffer *gm_tuple_buffers; /* nreaders tuple buffers */
2258 struct binaryheap *gm_heap; /* binary heap of slot indices */
2259 } GatherMergeState;
2260
2261 /* ----------------
2262 * Values displayed by EXPLAIN ANALYZE
2263 * ----------------
2264 */
2265 typedef struct HashInstrumentation
2266 {
2267 int nbuckets; /* number of buckets at end of execution */
2268 int nbuckets_original; /* planned number of buckets */
2269 int nbatch; /* number of batches at end of execution */
2270 int nbatch_original; /* planned number of batches */
2271 size_t space_peak; /* speak memory usage in bytes */
2272 } HashInstrumentation;
2273
2274 /* ----------------
2275 * Shared memory container for per-worker hash information
2276 * ----------------
2277 */
2278 typedef struct SharedHashInfo
2279 {
2280 int num_workers;
2281 HashInstrumentation hinstrument[FLEXIBLE_ARRAY_MEMBER];
2282 } SharedHashInfo;
2283
2284 /* ----------------
2285 * HashState information
2286 * ----------------
2287 */
2288 typedef struct HashState
2289 {
2290 PlanState ps; /* its first field is NodeTag */
2291 HashJoinTable hashtable; /* hash table for the hashjoin */
2292 List *hashkeys; /* list of ExprState nodes */
2293
2294 SharedHashInfo *shared_info; /* one entry per worker */
2295 HashInstrumentation *hinstrument; /* this worker's entry */
2296
2297 /* Parallel hash state. */
2298 struct ParallelHashJoinState *parallel_state;
2299 } HashState;
2300
2301 /* ----------------
2302 * SetOpState information
2303 *
2304 * Even in "sorted" mode, SetOp nodes are more complex than a simple
2305 * Unique, since we have to count how many duplicates to return. But
2306 * we also support hashing, so this is really more like a cut-down
2307 * form of Agg.
2308 * ----------------
2309 */
2310 /* this struct is private in nodeSetOp.c: */
2311 typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
2312
2313 typedef struct SetOpState
2314 {
2315 PlanState ps; /* its first field is NodeTag */
2316 ExprState *eqfunction; /* equality comparator */
2317 Oid *eqfuncoids; /* per-grouping-field equality fns */
2318 FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
2319 bool setop_done; /* indicates completion of output scan */
2320 long numOutput; /* number of dups left to output */
2321 /* these fields are used in SETOP_SORTED mode: */
2322 SetOpStatePerGroup pergroup; /* per-group working state */
2323 HeapTuple grp_firstTuple; /* copy of first tuple of current group */
2324 /* these fields are used in SETOP_HASHED mode: */
2325 TupleHashTable hashtable; /* hash table with one entry per group */
2326 MemoryContext tableContext; /* memory context containing hash table */
2327 bool table_filled; /* hash table filled yet? */
2328 TupleHashIterator hashiter; /* for iterating through hash table */
2329 } SetOpState;
2330
2331 /* ----------------
2332 * LockRowsState information
2333 *
2334 * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
2335 * ----------------
2336 */
2337 typedef struct LockRowsState
2338 {
2339 PlanState ps; /* its first field is NodeTag */
2340 List *lr_arowMarks; /* List of ExecAuxRowMarks */
2341 EPQState lr_epqstate; /* for evaluating EvalPlanQual rechecks */
2342 } LockRowsState;
2343
2344 /* ----------------
2345 * LimitState information
2346 *
2347 * Limit nodes are used to enforce LIMIT/OFFSET clauses.
2348 * They just select the desired subrange of their subplan's output.
2349 *
2350 * offset is the number of initial tuples to skip (0 does nothing).
2351 * count is the number of tuples to return after skipping the offset tuples.
2352 * If no limit count was specified, count is undefined and noCount is true.
2353 * When lstate == LIMIT_INITIAL, offset/count/noCount haven't been set yet.
2354 * ----------------
2355 */
2356 typedef enum
2357 {
2358 LIMIT_INITIAL, /* initial state for LIMIT node */
2359 LIMIT_RESCAN, /* rescan after recomputing parameters */
2360 LIMIT_EMPTY, /* there are no returnable rows */
2361 LIMIT_INWINDOW, /* have returned a row in the window */
2362 LIMIT_SUBPLANEOF, /* at EOF of subplan (within window) */
2363 LIMIT_WINDOWEND, /* stepped off end of window */
2364 LIMIT_WINDOWSTART /* stepped off beginning of window */
2365 } LimitStateCond;
2366
2367 typedef struct LimitState
2368 {
2369 PlanState ps; /* its first field is NodeTag */
2370 ExprState *limitOffset; /* OFFSET parameter, or NULL if none */
2371 ExprState *limitCount; /* COUNT parameter, or NULL if none */
2372 int64 offset; /* current OFFSET value */
2373 int64 count; /* current COUNT, if any */
2374 bool noCount; /* if true, ignore count */
2375 LimitStateCond lstate; /* state machine status, as above */
2376 int64 position; /* 1-based index of last tuple returned */
2377 TupleTableSlot *subSlot; /* tuple last obtained from subplan */
2378 } LimitState;
2379
2380 #endif /* EXECNODES_H */
2381