1 /*------------------------------------------------------------------------- 2 * 3 * nodeAgg.h 4 * prototypes for nodeAgg.c 5 * 6 * 7 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * src/include/executor/nodeAgg.h 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #ifndef NODEAGG_H 15 #define NODEAGG_H 16 17 #include "access/parallel.h" 18 #include "nodes/execnodes.h" 19 20 21 /* 22 * AggStatePerTransData - per aggregate state value information 23 * 24 * Working state for updating the aggregate's state value, by calling the 25 * transition function with an input row. This struct does not store the 26 * information needed to produce the final aggregate result from the transition 27 * state, that's stored in AggStatePerAggData instead. This separation allows 28 * multiple aggregate results to be produced from a single state value. 29 */ 30 typedef struct AggStatePerTransData 31 { 32 /* 33 * These values are set up during ExecInitAgg() and do not change 34 * thereafter: 35 */ 36 37 /* 38 * Link to an Aggref expr this state value is for. 39 * 40 * There can be multiple Aggref's sharing the same state value, so long as 41 * the inputs and transition functions are identical and the final 42 * functions are not read-write. This points to the first one of them. 43 */ 44 Aggref *aggref; 45 46 /* 47 * Is this state value actually being shared by more than one Aggref? 48 */ 49 bool aggshared; 50 51 /* 52 * Number of aggregated input columns. This includes ORDER BY expressions 53 * in both the plain-agg and ordered-set cases. Ordered-set direct args 54 * are not counted, though. 55 */ 56 int numInputs; 57 58 /* 59 * Number of aggregated input columns to pass to the transfn. This 60 * includes the ORDER BY columns for ordered-set aggs, but not for plain 61 * aggs. (This doesn't count the transition state value!) 62 */ 63 int numTransInputs; 64 65 /* Oid of the state transition or combine function */ 66 Oid transfn_oid; 67 68 /* Oid of the serialization function or InvalidOid */ 69 Oid serialfn_oid; 70 71 /* Oid of the deserialization function or InvalidOid */ 72 Oid deserialfn_oid; 73 74 /* Oid of state value's datatype */ 75 Oid aggtranstype; 76 77 /* 78 * fmgr lookup data for transition function or combine function. Note in 79 * particular that the fn_strict flag is kept here. 80 */ 81 FmgrInfo transfn; 82 83 /* fmgr lookup data for serialization function */ 84 FmgrInfo serialfn; 85 86 /* fmgr lookup data for deserialization function */ 87 FmgrInfo deserialfn; 88 89 /* Input collation derived for aggregate */ 90 Oid aggCollation; 91 92 /* number of sorting columns */ 93 int numSortCols; 94 95 /* number of sorting columns to consider in DISTINCT comparisons */ 96 /* (this is either zero or the same as numSortCols) */ 97 int numDistinctCols; 98 99 /* deconstructed sorting information (arrays of length numSortCols) */ 100 AttrNumber *sortColIdx; 101 Oid *sortOperators; 102 Oid *sortCollations; 103 bool *sortNullsFirst; 104 105 /* 106 * Comparators for input columns --- only set/used when aggregate has 107 * DISTINCT flag. equalfnOne version is used for single-column 108 * comparisons, equalfnMulti for the case of multiple columns. 109 */ 110 FmgrInfo equalfnOne; 111 ExprState *equalfnMulti; 112 113 /* 114 * initial value from pg_aggregate entry 115 */ 116 Datum initValue; 117 bool initValueIsNull; 118 119 /* 120 * We need the len and byval info for the agg's input and transition data 121 * types in order to know how to copy/delete values. 122 * 123 * Note that the info for the input type is used only when handling 124 * DISTINCT aggs with just one argument, so there is only one input type. 125 */ 126 int16 inputtypeLen, 127 transtypeLen; 128 bool inputtypeByVal, 129 transtypeByVal; 130 131 /* 132 * Slots for holding the evaluated input arguments. These are set up 133 * during ExecInitAgg() and then used for each input row requiring either 134 * FILTER or ORDER BY/DISTINCT processing. 135 */ 136 TupleTableSlot *sortslot; /* current input tuple */ 137 TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */ 138 TupleDesc sortdesc; /* descriptor of input tuples */ 139 140 /* 141 * These values are working state that is initialized at the start of an 142 * input tuple group and updated for each input tuple. 143 * 144 * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input 145 * values straight to the transition function. If it's DISTINCT or 146 * requires ORDER BY, we pass the input values into a Tuplesort object; 147 * then at completion of the input tuple group, we scan the sorted values, 148 * eliminate duplicates if needed, and run the transition function on the 149 * rest. 150 * 151 * We need a separate tuplesort for each grouping set. 152 */ 153 154 Tuplesortstate **sortstates; /* sort objects, if DISTINCT or ORDER BY */ 155 156 /* 157 * This field is a pre-initialized FunctionCallInfo struct used for 158 * calling this aggregate's transfn. We save a few cycles per row by not 159 * re-initializing the unchanging fields; which isn't much, but it seems 160 * worth the extra space consumption. 161 */ 162 FunctionCallInfo transfn_fcinfo; 163 164 /* Likewise for serialization and deserialization functions */ 165 FunctionCallInfo serialfn_fcinfo; 166 167 FunctionCallInfo deserialfn_fcinfo; 168 } AggStatePerTransData; 169 170 /* 171 * AggStatePerAggData - per-aggregate information 172 * 173 * This contains the information needed to call the final function, to produce 174 * a final aggregate result from the state value. If there are multiple 175 * identical Aggrefs in the query, they can all share the same per-agg data. 176 * 177 * These values are set up during ExecInitAgg() and do not change thereafter. 178 */ 179 typedef struct AggStatePerAggData 180 { 181 /* 182 * Link to an Aggref expr this state value is for. 183 * 184 * There can be multiple identical Aggref's sharing the same per-agg. This 185 * points to the first one of them. 186 */ 187 Aggref *aggref; 188 189 /* index to the state value which this agg should use */ 190 int transno; 191 192 /* Optional Oid of final function (may be InvalidOid) */ 193 Oid finalfn_oid; 194 195 /* 196 * fmgr lookup data for final function --- only valid when finalfn_oid is 197 * not InvalidOid. 198 */ 199 FmgrInfo finalfn; 200 201 /* 202 * Number of arguments to pass to the finalfn. This is always at least 1 203 * (the transition state value) plus any ordered-set direct args. If the 204 * finalfn wants extra args then we pass nulls corresponding to the 205 * aggregated input columns. 206 */ 207 int numFinalArgs; 208 209 /* ExprStates for any direct-argument expressions */ 210 List *aggdirectargs; 211 212 /* 213 * We need the len and byval info for the agg's result data type in order 214 * to know how to copy/delete values. 215 */ 216 int16 resulttypeLen; 217 bool resulttypeByVal; 218 219 /* 220 * "shareable" is false if this agg cannot share state values with other 221 * aggregates because the final function is read-write. 222 */ 223 bool shareable; 224 } AggStatePerAggData; 225 226 /* 227 * AggStatePerGroupData - per-aggregate-per-group working state 228 * 229 * These values are working state that is initialized at the start of 230 * an input tuple group and updated for each input tuple. 231 * 232 * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these 233 * structs (pointed to by aggstate->pergroup); we re-use the array for 234 * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the 235 * hash table contains an array of these structs for each tuple group. 236 * 237 * Logically, the sortstate field belongs in this struct, but we do not 238 * keep it here for space reasons: we don't support DISTINCT aggregates 239 * in AGG_HASHED mode, so there's no reason to use up a pointer field 240 * in every entry of the hashtable. 241 */ 242 typedef struct AggStatePerGroupData 243 { 244 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0 245 Datum transValue; /* current transition value */ 246 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1 247 bool transValueIsNull; 248 249 #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2 250 bool noTransValue; /* true if transValue not set yet */ 251 252 /* 253 * Note: noTransValue initially has the same value as transValueIsNull, 254 * and if true both are cleared to false at the same time. They are not 255 * the same though: if transfn later returns a NULL, we want to keep that 256 * NULL and not auto-replace it with a later input value. Only the first 257 * non-NULL input will be auto-substituted. 258 */ 259 } AggStatePerGroupData; 260 261 /* 262 * AggStatePerPhaseData - per-grouping-set-phase state 263 * 264 * Grouping sets are divided into "phases", where a single phase can be 265 * processed in one pass over the input. If there is more than one phase, then 266 * at the end of input from the current phase, state is reset and another pass 267 * taken over the data which has been re-sorted in the mean time. 268 * 269 * Accordingly, each phase specifies a list of grouping sets and group clause 270 * information, plus each phase after the first also has a sort order. 271 */ 272 typedef struct AggStatePerPhaseData 273 { 274 AggStrategy aggstrategy; /* strategy for this phase */ 275 int numsets; /* number of grouping sets (or 0) */ 276 int *gset_lengths; /* lengths of grouping sets */ 277 Bitmapset **grouped_cols; /* column groupings for rollup */ 278 ExprState **eqfunctions; /* expression returning equality, indexed by 279 * nr of cols to compare */ 280 Agg *aggnode; /* Agg node for phase data */ 281 Sort *sortnode; /* Sort node for input ordering for phase */ 282 283 ExprState *evaltrans; /* evaluation of transition functions */ 284 285 /*---------- 286 * Cached variants of the compiled expression. 287 * first subscript: 0: outerops; 1: TTSOpsMinimalTuple 288 * second subscript: 0: no NULL check; 1: with NULL check 289 *---------- 290 */ 291 ExprState *evaltrans_cache[2][2]; 292 } AggStatePerPhaseData; 293 294 /* 295 * AggStatePerHashData - per-hashtable state 296 * 297 * When doing grouping sets with hashing, we have one of these for each 298 * grouping set. (When doing hashing without grouping sets, we have just one of 299 * them.) 300 */ 301 typedef struct AggStatePerHashData 302 { 303 TupleHashTable hashtable; /* hash table with one entry per group */ 304 TupleHashIterator hashiter; /* for iterating through hash table */ 305 TupleTableSlot *hashslot; /* slot for loading hash table */ 306 FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ 307 Oid *eqfuncoids; /* per-grouping-field equality fns */ 308 int numCols; /* number of hash key columns */ 309 int numhashGrpCols; /* number of columns in hash table */ 310 int largestGrpColIdx; /* largest col required for hashing */ 311 AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */ 312 AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */ 313 Agg *aggnode; /* original Agg node, for numGroups etc. */ 314 } AggStatePerHashData; 315 316 317 extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags); 318 extern void ExecEndAgg(AggState *node); 319 extern void ExecReScanAgg(AggState *node); 320 321 extern Size hash_agg_entry_size(int numTrans, Size tupleWidth, 322 Size transitionSpace); 323 extern void hash_agg_set_limits(double hashentrysize, double input_groups, 324 int used_bits, Size *mem_limit, 325 uint64 *ngroups_limit, int *num_partitions); 326 327 /* parallel instrumentation support */ 328 extern void ExecAggEstimate(AggState *node, ParallelContext *pcxt); 329 extern void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt); 330 extern void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt); 331 extern void ExecAggRetrieveInstrumentation(AggState *node); 332 333 #endif /* NODEAGG_H */ 334