1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.h
4  *	  Selectivity functions for standard operators, and assorted
5  *	  infrastructure for selectivity and cost estimation.
6  *
7  *
8  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/include/utils/selfuncs.h
12  *
13  *-------------------------------------------------------------------------
14  */
15 #ifndef SELFUNCS_H
16 #define SELFUNCS_H
17 
18 #include "access/htup.h"
19 #include "fmgr.h"
20 #include "nodes/pathnodes.h"
21 
22 
23 /*
24  * Note: the default selectivity estimates are not chosen entirely at random.
25  * We want them to be small enough to ensure that indexscans will be used if
26  * available, for typical table densities of ~100 tuples/page.  Thus, for
27  * example, 0.01 is not quite small enough, since that makes it appear that
28  * nearly all pages will be hit anyway.  Also, since we sometimes estimate
29  * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
30  * 1/DEFAULT_EQ_SEL.
31  */
32 
33 /* default selectivity estimate for equalities such as "A = b" */
34 #define DEFAULT_EQ_SEL	0.005
35 
36 /* default selectivity estimate for inequalities such as "A < b" */
37 #define DEFAULT_INEQ_SEL  0.3333333333333333
38 
39 /* default selectivity estimate for range inequalities "A > b AND A < c" */
40 #define DEFAULT_RANGE_INEQ_SEL	0.005
41 
42 /* default selectivity estimate for multirange inequalities "A > b AND A < c" */
43 #define DEFAULT_MULTIRANGE_INEQ_SEL	0.005
44 
45 /* default selectivity estimate for pattern-match operators such as LIKE */
46 #define DEFAULT_MATCH_SEL	0.005
47 
48 /* default selectivity estimate for other matching operators */
49 #define DEFAULT_MATCHING_SEL	0.010
50 
51 /* default number of distinct values in a table */
52 #define DEFAULT_NUM_DISTINCT  200
53 
54 /* default selectivity estimate for boolean and null test nodes */
55 #define DEFAULT_UNK_SEL			0.005
56 #define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
57 
58 
59 /*
60  * Clamp a computed probability estimate (which may suffer from roundoff or
61  * estimation errors) to valid range.  Argument must be a float variable.
62  */
63 #define CLAMP_PROBABILITY(p) \
64 	do { \
65 		if (p < 0.0) \
66 			p = 0.0; \
67 		else if (p > 1.0) \
68 			p = 1.0; \
69 	} while (0)
70 
71 /*
72  * A set of flags which some selectivity estimation functions can pass back to
73  * callers to provide further details about some assumptions which were made
74  * during the estimation.
75  */
76 #define SELFLAG_USED_DEFAULT		(1 << 0)	/* Estimation fell back on one
77 												 * of the DEFAULTs as defined
78 												 * above. */
79 
80 typedef struct EstimationInfo
81 {
82 	uint32		flags;			/* Flags, as defined above to mark special
83 								 * properties of the estimation. */
84 } EstimationInfo;
85 
86 /* Return data from examine_variable and friends */
87 typedef struct VariableStatData
88 {
89 	Node	   *var;			/* the Var or expression tree */
90 	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
91 	HeapTuple	statsTuple;		/* pg_statistic tuple, or NULL if none */
92 	/* NB: if statsTuple!=NULL, it must be freed when caller is done */
93 	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
94 	Oid			vartype;		/* exposed type of expression */
95 	Oid			atttype;		/* actual type (after stripping relabel) */
96 	int32		atttypmod;		/* actual typmod (after stripping relabel) */
97 	bool		isunique;		/* matches unique index or DISTINCT clause */
98 	bool		acl_ok;			/* result of ACL check on table or column */
99 } VariableStatData;
100 
101 #define ReleaseVariableStats(vardata)  \
102 	do { \
103 		if (HeapTupleIsValid((vardata).statsTuple)) \
104 			(vardata).freefunc((vardata).statsTuple); \
105 	} while(0)
106 
107 
108 /*
109  * genericcostestimate is a general-purpose estimator that can be used for
110  * most index types.  In some cases we use genericcostestimate as the base
111  * code and then incorporate additional index-type-specific knowledge in
112  * the type-specific calling function.  To avoid code duplication, we make
113  * genericcostestimate return a number of intermediate values as well as
114  * its preliminary estimates of the output cost values.  The GenericCosts
115  * struct includes all these values.
116  *
117  * Callers should initialize all fields of GenericCosts to zero.  In addition,
118  * they can set numIndexTuples to some positive value if they have a better
119  * than default way of estimating the number of leaf index tuples visited.
120  */
121 typedef struct
122 {
123 	/* These are the values the cost estimator must return to the planner */
124 	Cost		indexStartupCost;	/* index-related startup cost */
125 	Cost		indexTotalCost; /* total index-related scan cost */
126 	Selectivity indexSelectivity;	/* selectivity of index */
127 	double		indexCorrelation;	/* order correlation of index */
128 
129 	/* Intermediate values we obtain along the way */
130 	double		numIndexPages;	/* number of leaf pages visited */
131 	double		numIndexTuples; /* number of leaf tuples visited */
132 	double		spc_random_page_cost;	/* relevant random_page_cost value */
133 	double		num_sa_scans;	/* # indexscans from ScalarArrayOpExprs */
134 } GenericCosts;
135 
136 /* Hooks for plugins to get control when we ask for stats */
137 typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
138 											  RangeTblEntry *rte,
139 											  AttrNumber attnum,
140 											  VariableStatData *vardata);
141 extern PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook;
142 typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
143 										   Oid indexOid,
144 										   AttrNumber indexattnum,
145 										   VariableStatData *vardata);
146 extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
147 
148 /* Functions in selfuncs.c */
149 
150 extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
151 							 VariableStatData *vardata);
152 extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid);
153 extern bool get_restriction_variable(PlannerInfo *root, List *args,
154 									 int varRelid,
155 									 VariableStatData *vardata, Node **other,
156 									 bool *varonleft);
157 extern void get_join_variables(PlannerInfo *root, List *args,
158 							   SpecialJoinInfo *sjinfo,
159 							   VariableStatData *vardata1,
160 							   VariableStatData *vardata2,
161 							   bool *join_is_reversed);
162 extern double get_variable_numdistinct(VariableStatData *vardata,
163 									   bool *isdefault);
164 extern double mcv_selectivity(VariableStatData *vardata,
165 							  FmgrInfo *opproc, Oid collation,
166 							  Datum constval, bool varonleft,
167 							  double *sumcommonp);
168 extern double histogram_selectivity(VariableStatData *vardata,
169 									FmgrInfo *opproc, Oid collation,
170 									Datum constval, bool varonleft,
171 									int min_hist_size, int n_skip,
172 									int *hist_size);
173 extern double generic_restriction_selectivity(PlannerInfo *root,
174 											  Oid oproid, Oid collation,
175 											  List *args, int varRelid,
176 											  double default_selectivity);
177 extern double ineq_histogram_selectivity(PlannerInfo *root,
178 										 VariableStatData *vardata,
179 										 Oid opoid, FmgrInfo *opproc,
180 										 bool isgt, bool iseq,
181 										 Oid collation,
182 										 Datum constval, Oid consttype);
183 extern double var_eq_const(VariableStatData *vardata,
184 						   Oid oproid, Oid collation,
185 						   Datum constval, bool constisnull,
186 						   bool varonleft, bool negate);
187 extern double var_eq_non_const(VariableStatData *vardata,
188 							   Oid oproid, Oid collation,
189 							   Node *other,
190 							   bool varonleft, bool negate);
191 
192 extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
193 extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
194 							   Node *arg, int varRelid,
195 							   JoinType jointype, SpecialJoinInfo *sjinfo);
196 extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
197 							   Node *arg, int varRelid,
198 							   JoinType jointype, SpecialJoinInfo *sjinfo);
199 extern Selectivity scalararraysel(PlannerInfo *root,
200 								  ScalarArrayOpExpr *clause,
201 								  bool is_join_clause,
202 								  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
203 extern int	estimate_array_length(Node *arrayexpr);
204 extern Selectivity rowcomparesel(PlannerInfo *root,
205 								 RowCompareExpr *clause,
206 								 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
207 
208 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
209 							 Oid opfamily, int strategy, bool nulls_first,
210 							 Selectivity *leftstart, Selectivity *leftend,
211 							 Selectivity *rightstart, Selectivity *rightend);
212 
213 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
214 								  double input_rows, List **pgset,
215 								  EstimationInfo *estinfo);
216 
217 extern void estimate_hash_bucket_stats(PlannerInfo *root,
218 									   Node *hashkey, double nbuckets,
219 									   Selectivity *mcv_freq,
220 									   Selectivity *bucketsize_frac);
221 extern double estimate_hashagg_tablesize(PlannerInfo *root, Path *path,
222 										 const AggClauseCosts *agg_costs,
223 										 double dNumGroups);
224 
225 extern List *get_quals_from_indexclauses(List *indexclauses);
226 extern Cost index_other_operands_eval_cost(PlannerInfo *root,
227 										   List *indexquals);
228 extern List *add_predicate_to_index_quals(IndexOptInfo *index,
229 										  List *indexQuals);
230 extern void genericcostestimate(PlannerInfo *root, IndexPath *path,
231 								double loop_count,
232 								GenericCosts *costs);
233 
234 /* Functions in array_selfuncs.c */
235 
236 extern Selectivity scalararraysel_containment(PlannerInfo *root,
237 											  Node *leftop, Node *rightop,
238 											  Oid elemtype, bool isEquality, bool useOr,
239 											  int varRelid);
240 
241 #endif							/* SELFUNCS_H */
242