1 /*-------------------------------------------------------------------------
2  *
3  * nodeAgg.h
4  *	  prototypes for nodeAgg.c
5  *
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/executor/nodeAgg.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef NODEAGG_H
15 #define NODEAGG_H
16 
17 #include "access/parallel.h"
18 #include "nodes/execnodes.h"
19 
20 
21 /*
22  * AggStatePerTransData - per aggregate state value information
23  *
24  * Working state for updating the aggregate's state value, by calling the
25  * transition function with an input row. This struct does not store the
26  * information needed to produce the final aggregate result from the transition
27  * state, that's stored in AggStatePerAggData instead. This separation allows
28  * multiple aggregate results to be produced from a single state value.
29  */
30 typedef struct AggStatePerTransData
31 {
32 	/*
33 	 * These values are set up during ExecInitAgg() and do not change
34 	 * thereafter:
35 	 */
36 
37 	/*
38 	 * Link to an Aggref expr this state value is for.
39 	 *
40 	 * There can be multiple Aggref's sharing the same state value, so long as
41 	 * the inputs and transition functions are identical and the final
42 	 * functions are not read-write.  This points to the first one of them.
43 	 */
44 	Aggref	   *aggref;
45 
46 	/*
47 	 * Is this state value actually being shared by more than one Aggref?
48 	 */
49 	bool		aggshared;
50 
51 	/*
52 	 * Number of aggregated input columns.  This includes ORDER BY expressions
53 	 * in both the plain-agg and ordered-set cases.  Ordered-set direct args
54 	 * are not counted, though.
55 	 */
56 	int			numInputs;
57 
58 	/*
59 	 * Number of aggregated input columns to pass to the transfn.  This
60 	 * includes the ORDER BY columns for ordered-set aggs, but not for plain
61 	 * aggs.  (This doesn't count the transition state value!)
62 	 */
63 	int			numTransInputs;
64 
65 	/* Oid of the state transition or combine function */
66 	Oid			transfn_oid;
67 
68 	/* Oid of the serialization function or InvalidOid */
69 	Oid			serialfn_oid;
70 
71 	/* Oid of the deserialization function or InvalidOid */
72 	Oid			deserialfn_oid;
73 
74 	/* Oid of state value's datatype */
75 	Oid			aggtranstype;
76 
77 	/*
78 	 * fmgr lookup data for transition function or combine function.  Note in
79 	 * particular that the fn_strict flag is kept here.
80 	 */
81 	FmgrInfo	transfn;
82 
83 	/* fmgr lookup data for serialization function */
84 	FmgrInfo	serialfn;
85 
86 	/* fmgr lookup data for deserialization function */
87 	FmgrInfo	deserialfn;
88 
89 	/* Input collation derived for aggregate */
90 	Oid			aggCollation;
91 
92 	/* number of sorting columns */
93 	int			numSortCols;
94 
95 	/* number of sorting columns to consider in DISTINCT comparisons */
96 	/* (this is either zero or the same as numSortCols) */
97 	int			numDistinctCols;
98 
99 	/* deconstructed sorting information (arrays of length numSortCols) */
100 	AttrNumber *sortColIdx;
101 	Oid		   *sortOperators;
102 	Oid		   *sortCollations;
103 	bool	   *sortNullsFirst;
104 
105 	/*
106 	 * Comparators for input columns --- only set/used when aggregate has
107 	 * DISTINCT flag. equalfnOne version is used for single-column
108 	 * comparisons, equalfnMulti for the case of multiple columns.
109 	 */
110 	FmgrInfo	equalfnOne;
111 	ExprState  *equalfnMulti;
112 
113 	/*
114 	 * initial value from pg_aggregate entry
115 	 */
116 	Datum		initValue;
117 	bool		initValueIsNull;
118 
119 	/*
120 	 * We need the len and byval info for the agg's input and transition data
121 	 * types in order to know how to copy/delete values.
122 	 *
123 	 * Note that the info for the input type is used only when handling
124 	 * DISTINCT aggs with just one argument, so there is only one input type.
125 	 */
126 	int16		inputtypeLen,
127 				transtypeLen;
128 	bool		inputtypeByVal,
129 				transtypeByVal;
130 
131 	/*
132 	 * Slots for holding the evaluated input arguments.  These are set up
133 	 * during ExecInitAgg() and then used for each input row requiring either
134 	 * FILTER or ORDER BY/DISTINCT processing.
135 	 */
136 	TupleTableSlot *sortslot;	/* current input tuple */
137 	TupleTableSlot *uniqslot;	/* used for multi-column DISTINCT */
138 	TupleDesc	sortdesc;		/* descriptor of input tuples */
139 
140 	/*
141 	 * These values are working state that is initialized at the start of an
142 	 * input tuple group and updated for each input tuple.
143 	 *
144 	 * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
145 	 * values straight to the transition function.  If it's DISTINCT or
146 	 * requires ORDER BY, we pass the input values into a Tuplesort object;
147 	 * then at completion of the input tuple group, we scan the sorted values,
148 	 * eliminate duplicates if needed, and run the transition function on the
149 	 * rest.
150 	 *
151 	 * We need a separate tuplesort for each grouping set.
152 	 */
153 
154 	Tuplesortstate **sortstates;	/* sort objects, if DISTINCT or ORDER BY */
155 
156 	/*
157 	 * This field is a pre-initialized FunctionCallInfo struct used for
158 	 * calling this aggregate's transfn.  We save a few cycles per row by not
159 	 * re-initializing the unchanging fields; which isn't much, but it seems
160 	 * worth the extra space consumption.
161 	 */
162 	FunctionCallInfo transfn_fcinfo;
163 
164 	/* Likewise for serialization and deserialization functions */
165 	FunctionCallInfo serialfn_fcinfo;
166 
167 	FunctionCallInfo deserialfn_fcinfo;
168 }			AggStatePerTransData;
169 
170 /*
171  * AggStatePerAggData - per-aggregate information
172  *
173  * This contains the information needed to call the final function, to produce
174  * a final aggregate result from the state value. If there are multiple
175  * identical Aggrefs in the query, they can all share the same per-agg data.
176  *
177  * These values are set up during ExecInitAgg() and do not change thereafter.
178  */
179 typedef struct AggStatePerAggData
180 {
181 	/*
182 	 * Link to an Aggref expr this state value is for.
183 	 *
184 	 * There can be multiple identical Aggref's sharing the same per-agg. This
185 	 * points to the first one of them.
186 	 */
187 	Aggref	   *aggref;
188 
189 	/* index to the state value which this agg should use */
190 	int			transno;
191 
192 	/* Optional Oid of final function (may be InvalidOid) */
193 	Oid			finalfn_oid;
194 
195 	/*
196 	 * fmgr lookup data for final function --- only valid when finalfn_oid is
197 	 * not InvalidOid.
198 	 */
199 	FmgrInfo	finalfn;
200 
201 	/*
202 	 * Number of arguments to pass to the finalfn.  This is always at least 1
203 	 * (the transition state value) plus any ordered-set direct args. If the
204 	 * finalfn wants extra args then we pass nulls corresponding to the
205 	 * aggregated input columns.
206 	 */
207 	int			numFinalArgs;
208 
209 	/* ExprStates for any direct-argument expressions */
210 	List	   *aggdirectargs;
211 
212 	/*
213 	 * We need the len and byval info for the agg's result data type in order
214 	 * to know how to copy/delete values.
215 	 */
216 	int16		resulttypeLen;
217 	bool		resulttypeByVal;
218 
219 	/*
220 	 * "shareable" is false if this agg cannot share state values with other
221 	 * aggregates because the final function is read-write.
222 	 */
223 	bool		shareable;
224 }			AggStatePerAggData;
225 
226 /*
227  * AggStatePerGroupData - per-aggregate-per-group working state
228  *
229  * These values are working state that is initialized at the start of
230  * an input tuple group and updated for each input tuple.
231  *
232  * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
233  * structs (pointed to by aggstate->pergroup); we re-use the array for
234  * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
235  * hash table contains an array of these structs for each tuple group.
236  *
237  * Logically, the sortstate field belongs in this struct, but we do not
238  * keep it here for space reasons: we don't support DISTINCT aggregates
239  * in AGG_HASHED mode, so there's no reason to use up a pointer field
240  * in every entry of the hashtable.
241  */
242 typedef struct AggStatePerGroupData
243 {
244 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0
245 	Datum		transValue;		/* current transition value */
246 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1
247 	bool		transValueIsNull;
248 
249 #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2
250 	bool		noTransValue;	/* true if transValue not set yet */
251 
252 	/*
253 	 * Note: noTransValue initially has the same value as transValueIsNull,
254 	 * and if true both are cleared to false at the same time.  They are not
255 	 * the same though: if transfn later returns a NULL, we want to keep that
256 	 * NULL and not auto-replace it with a later input value. Only the first
257 	 * non-NULL input will be auto-substituted.
258 	 */
259 }			AggStatePerGroupData;
260 
261 /*
262  * AggStatePerPhaseData - per-grouping-set-phase state
263  *
264  * Grouping sets are divided into "phases", where a single phase can be
265  * processed in one pass over the input. If there is more than one phase, then
266  * at the end of input from the current phase, state is reset and another pass
267  * taken over the data which has been re-sorted in the mean time.
268  *
269  * Accordingly, each phase specifies a list of grouping sets and group clause
270  * information, plus each phase after the first also has a sort order.
271  */
272 typedef struct AggStatePerPhaseData
273 {
274 	AggStrategy aggstrategy;	/* strategy for this phase */
275 	int			numsets;		/* number of grouping sets (or 0) */
276 	int		   *gset_lengths;	/* lengths of grouping sets */
277 	Bitmapset **grouped_cols;	/* column groupings for rollup */
278 	ExprState **eqfunctions;	/* expression returning equality, indexed by
279 								 * nr of cols to compare */
280 	Agg		   *aggnode;		/* Agg node for phase data */
281 	Sort	   *sortnode;		/* Sort node for input ordering for phase */
282 
283 	ExprState  *evaltrans;		/* evaluation of transition functions  */
284 
285 	/*----------
286 	 * Cached variants of the compiled expression.
287 	 * first subscript: 0: outerops; 1: TTSOpsMinimalTuple
288 	 * second subscript: 0: no NULL check; 1: with NULL check
289 	 *----------
290 	 */
291 	ExprState  *evaltrans_cache[2][2];
292 }			AggStatePerPhaseData;
293 
294 /*
295  * AggStatePerHashData - per-hashtable state
296  *
297  * When doing grouping sets with hashing, we have one of these for each
298  * grouping set. (When doing hashing without grouping sets, we have just one of
299  * them.)
300  */
301 typedef struct AggStatePerHashData
302 {
303 	TupleHashTable hashtable;	/* hash table with one entry per group */
304 	TupleHashIterator hashiter; /* for iterating through hash table */
305 	TupleTableSlot *hashslot;	/* slot for loading hash table */
306 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
307 	Oid		   *eqfuncoids;		/* per-grouping-field equality fns */
308 	int			numCols;		/* number of hash key columns */
309 	int			numhashGrpCols; /* number of columns in hash table */
310 	int			largestGrpColIdx;	/* largest col required for hashing */
311 	AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
312 	AttrNumber *hashGrpColIdxHash;	/* indices in hash table tuples */
313 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
314 }			AggStatePerHashData;
315 
316 
317 extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
318 extern void ExecEndAgg(AggState *node);
319 extern void ExecReScanAgg(AggState *node);
320 
321 extern Size hash_agg_entry_size(int numTrans, Size tupleWidth,
322 								Size transitionSpace);
323 extern void hash_agg_set_limits(double hashentrysize, double input_groups,
324 								int used_bits, Size *mem_limit,
325 								uint64 *ngroups_limit, int *num_partitions);
326 
327 /* parallel instrumentation support */
328 extern void ExecAggEstimate(AggState *node, ParallelContext *pcxt);
329 extern void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt);
330 extern void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt);
331 extern void ExecAggRetrieveInstrumentation(AggState *node);
332 
333 #endif							/* NODEAGG_H */
334