1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.h
4  *	  Selectivity functions for standard operators, and assorted
5  *	  infrastructure for selectivity and cost estimation.
6  *
7  *
8  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
9  * Portions Copyright (c) 1994, Regents of the University of California
10  *
11  * src/include/utils/selfuncs.h
12  *
13  *-------------------------------------------------------------------------
14  */
15 #ifndef SELFUNCS_H
16 #define SELFUNCS_H
17 
18 #include "access/htup.h"
19 #include "nodes/pathnodes.h"
20 
21 
22 /*
23  * Note: the default selectivity estimates are not chosen entirely at random.
24  * We want them to be small enough to ensure that indexscans will be used if
25  * available, for typical table densities of ~100 tuples/page.  Thus, for
26  * example, 0.01 is not quite small enough, since that makes it appear that
27  * nearly all pages will be hit anyway.  Also, since we sometimes estimate
28  * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal
29  * 1/DEFAULT_EQ_SEL.
30  */
31 
32 /* default selectivity estimate for equalities such as "A = b" */
33 #define DEFAULT_EQ_SEL	0.005
34 
35 /* default selectivity estimate for inequalities such as "A < b" */
36 #define DEFAULT_INEQ_SEL  0.3333333333333333
37 
38 /* default selectivity estimate for range inequalities "A > b AND A < c" */
39 #define DEFAULT_RANGE_INEQ_SEL	0.005
40 
41 /* default selectivity estimate for pattern-match operators such as LIKE */
42 #define DEFAULT_MATCH_SEL	0.005
43 
44 /* default number of distinct values in a table */
45 #define DEFAULT_NUM_DISTINCT  200
46 
47 /* default selectivity estimate for boolean and null test nodes */
48 #define DEFAULT_UNK_SEL			0.005
49 #define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)
50 
51 
52 /*
53  * Clamp a computed probability estimate (which may suffer from roundoff or
54  * estimation errors) to valid range.  Argument must be a float variable.
55  */
56 #define CLAMP_PROBABILITY(p) \
57 	do { \
58 		if (p < 0.0) \
59 			p = 0.0; \
60 		else if (p > 1.0) \
61 			p = 1.0; \
62 	} while (0)
63 
64 
65 /* Return data from examine_variable and friends */
66 typedef struct VariableStatData
67 {
68 	Node	   *var;			/* the Var or expression tree */
69 	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
70 	HeapTuple	statsTuple;		/* pg_statistic tuple, or NULL if none */
71 	/* NB: if statsTuple!=NULL, it must be freed when caller is done */
72 	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
73 	Oid			vartype;		/* exposed type of expression */
74 	Oid			atttype;		/* actual type (after stripping relabel) */
75 	int32		atttypmod;		/* actual typmod (after stripping relabel) */
76 	bool		isunique;		/* matches unique index or DISTINCT clause */
77 	bool		acl_ok;			/* result of ACL check on table or column */
78 } VariableStatData;
79 
80 #define ReleaseVariableStats(vardata)  \
81 	do { \
82 		if (HeapTupleIsValid((vardata).statsTuple)) \
83 			(vardata).freefunc((vardata).statsTuple); \
84 	} while(0)
85 
86 
87 /*
88  * genericcostestimate is a general-purpose estimator that can be used for
89  * most index types.  In some cases we use genericcostestimate as the base
90  * code and then incorporate additional index-type-specific knowledge in
91  * the type-specific calling function.  To avoid code duplication, we make
92  * genericcostestimate return a number of intermediate values as well as
93  * its preliminary estimates of the output cost values.  The GenericCosts
94  * struct includes all these values.
95  *
96  * Callers should initialize all fields of GenericCosts to zero.  In addition,
97  * they can set numIndexTuples to some positive value if they have a better
98  * than default way of estimating the number of leaf index tuples visited.
99  */
100 typedef struct
101 {
102 	/* These are the values the cost estimator must return to the planner */
103 	Cost		indexStartupCost;	/* index-related startup cost */
104 	Cost		indexTotalCost; /* total index-related scan cost */
105 	Selectivity indexSelectivity;	/* selectivity of index */
106 	double		indexCorrelation;	/* order correlation of index */
107 
108 	/* Intermediate values we obtain along the way */
109 	double		numIndexPages;	/* number of leaf pages visited */
110 	double		numIndexTuples; /* number of leaf tuples visited */
111 	double		spc_random_page_cost;	/* relevant random_page_cost value */
112 	double		num_sa_scans;	/* # indexscans from ScalarArrayOps */
113 } GenericCosts;
114 
115 /* Hooks for plugins to get control when we ask for stats */
116 typedef bool (*get_relation_stats_hook_type) (PlannerInfo *root,
117 											  RangeTblEntry *rte,
118 											  AttrNumber attnum,
119 											  VariableStatData *vardata);
120 extern PGDLLIMPORT get_relation_stats_hook_type get_relation_stats_hook;
121 typedef bool (*get_index_stats_hook_type) (PlannerInfo *root,
122 										   Oid indexOid,
123 										   AttrNumber indexattnum,
124 										   VariableStatData *vardata);
125 extern PGDLLIMPORT get_index_stats_hook_type get_index_stats_hook;
126 
127 /* Functions in selfuncs.c */
128 
129 extern void examine_variable(PlannerInfo *root, Node *node, int varRelid,
130 							 VariableStatData *vardata);
131 extern bool statistic_proc_security_check(VariableStatData *vardata, Oid func_oid);
132 extern bool get_restriction_variable(PlannerInfo *root, List *args,
133 									 int varRelid,
134 									 VariableStatData *vardata, Node **other,
135 									 bool *varonleft);
136 extern void get_join_variables(PlannerInfo *root, List *args,
137 							   SpecialJoinInfo *sjinfo,
138 							   VariableStatData *vardata1,
139 							   VariableStatData *vardata2,
140 							   bool *join_is_reversed);
141 extern double get_variable_numdistinct(VariableStatData *vardata,
142 									   bool *isdefault);
143 extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
144 							  Datum constval, bool varonleft,
145 							  double *sumcommonp);
146 extern double mcv_selectivity_ext(VariableStatData *vardata,
147 								  FmgrInfo *opproc, Oid collation,
148 								  Datum constval, bool varonleft,
149 								  double *sumcommonp);
150 extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
151 									Datum constval, bool varonleft,
152 									int min_hist_size, int n_skip,
153 									int *hist_size);
154 extern double histogram_selectivity_ext(VariableStatData *vardata,
155 										FmgrInfo *opproc, Oid collation,
156 										Datum constval, bool varonleft,
157 										int min_hist_size, int n_skip,
158 										int *hist_size);
159 extern double ineq_histogram_selectivity(PlannerInfo *root,
160 										 VariableStatData *vardata,
161 										 FmgrInfo *opproc, bool isgt, bool iseq,
162 										 Datum constval, Oid consttype);
163 extern double ineq_histogram_selectivity_ext(PlannerInfo *root,
164 											 VariableStatData *vardata,
165 											 FmgrInfo *opproc,
166 											 bool isgt, bool iseq,
167 											 Oid collation,
168 											 Datum constval, Oid consttype);
169 extern double var_eq_const(VariableStatData *vardata, Oid oproid,
170 						   Datum constval, bool constisnull,
171 						   bool varonleft, bool negate);
172 extern double var_eq_const_ext(VariableStatData *vardata,
173 							   Oid oproid, Oid collation,
174 							   Datum constval, bool constisnull,
175 							   bool varonleft, bool negate);
176 extern double var_eq_non_const(VariableStatData *vardata, Oid oproid,
177 							   Node *other,
178 							   bool varonleft, bool negate);
179 
180 extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid);
181 extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
182 							   Node *arg, int varRelid,
183 							   JoinType jointype, SpecialJoinInfo *sjinfo);
184 extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
185 							   Node *arg, int varRelid,
186 							   JoinType jointype, SpecialJoinInfo *sjinfo);
187 extern Selectivity scalararraysel(PlannerInfo *root,
188 								  ScalarArrayOpExpr *clause,
189 								  bool is_join_clause,
190 								  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
191 extern int	estimate_array_length(Node *arrayexpr);
192 extern Selectivity rowcomparesel(PlannerInfo *root,
193 								 RowCompareExpr *clause,
194 								 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
195 
196 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
197 							 Oid opfamily, int strategy, bool nulls_first,
198 							 Selectivity *leftstart, Selectivity *leftend,
199 							 Selectivity *rightstart, Selectivity *rightend);
200 
201 extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
202 								  double input_rows, List **pgset);
203 
204 extern void estimate_hash_bucket_stats(PlannerInfo *root,
205 									   Node *hashkey, double nbuckets,
206 									   Selectivity *mcv_freq,
207 									   Selectivity *bucketsize_frac);
208 extern double estimate_hashagg_tablesize(Path *path,
209 										 const AggClauseCosts *agg_costs,
210 										 double dNumGroups);
211 
212 extern List *get_quals_from_indexclauses(List *indexclauses);
213 extern Cost index_other_operands_eval_cost(PlannerInfo *root,
214 										   List *indexquals);
215 extern List *add_predicate_to_index_quals(IndexOptInfo *index,
216 										  List *indexQuals);
217 extern void genericcostestimate(PlannerInfo *root, IndexPath *path,
218 								double loop_count,
219 								GenericCosts *costs);
220 
221 /* Functions in array_selfuncs.c */
222 
223 extern Selectivity scalararraysel_containment(PlannerInfo *root,
224 											  Node *leftop, Node *rightop,
225 											  Oid elemtype, bool isEquality, bool useOr,
226 											  int varRelid);
227 
228 #endif							/* SELFUNCS_H */
229