1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.c
4  *	  Selectivity functions and index cost estimation functions for
5  *	  standard operators and index access methods.
6  *
7  *	  Selectivity routines are registered in the pg_operator catalog
8  *	  in the "oprrest" and "oprjoin" attributes.
9  *
10  *	  Index cost functions are located via the index AM's API struct,
11  *	  which is obtained from the handler function registered in pg_am.
12  *
13  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  *
17  * IDENTIFICATION
18  *	  src/backend/utils/adt/selfuncs.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 /*----------
24  * Operator selectivity estimation functions are called to estimate the
25  * selectivity of WHERE clauses whose top-level operator is their operator.
26  * We divide the problem into two cases:
27  *		Restriction clause estimation: the clause involves vars of just
28  *			one relation.
29  *		Join clause estimation: the clause involves vars of multiple rels.
30  * Join selectivity estimation is far more difficult and usually less accurate
31  * than restriction estimation.
32  *
33  * When dealing with the inner scan of a nestloop join, we consider the
34  * join's joinclauses as restriction clauses for the inner relation, and
35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
36  * values).  So, restriction estimators need to be able to accept an argument
37  * telling which relation is to be treated as the variable.
38  *
39  * The call convention for a restriction estimator (oprrest function) is
40  *
41  *		Selectivity oprrest (PlannerInfo *root,
42  *							 Oid operator,
43  *							 List *args,
44  *							 int varRelid);
45  *
46  * root: general information about the query (rtable and RelOptInfo lists
47  * are particularly important for the estimator).
48  * operator: OID of the specific operator in question.
49  * args: argument list from the operator clause.
50  * varRelid: if not zero, the relid (rtable index) of the relation to
51  * be treated as the variable relation.  May be zero if the args list
52  * is known to contain vars of only one relation.
53  *
54  * This is represented at the SQL level (in pg_proc) as
55  *
56  *		float8 oprrest (internal, oid, internal, int4);
57  *
58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59  * of the relation that are expected to produce a TRUE result for the
60  * given operator.
61  *
62  * The call convention for a join estimator (oprjoin function) is similar
63  * except that varRelid is not needed, and instead join information is
64  * supplied:
65  *
66  *		Selectivity oprjoin (PlannerInfo *root,
67  *							 Oid operator,
68  *							 List *args,
69  *							 JoinType jointype,
70  *							 SpecialJoinInfo *sjinfo);
71  *
72  *		float8 oprjoin (internal, oid, internal, int2, internal);
73  *
74  * (Before Postgres 8.4, join estimators had only the first four of these
75  * parameters.  That signature is still allowed, but deprecated.)  The
76  * relationship between jointype and sjinfo is explained in the comments for
77  * clause_selectivity() --- the short version is that jointype is usually
78  * best ignored in favor of examining sjinfo.
79  *
80  * Join selectivity for regular inner and outer joins is defined as the
81  * fraction (0 to 1) of the cross product of the relations that is expected
82  * to produce a TRUE result for the given operator.  For both semi and anti
83  * joins, however, the selectivity is defined as the fraction of the left-hand
84  * side relation's rows that are expected to have a match (ie, at least one
85  * row with a TRUE result) in the right-hand side.
86  *
87  * For both oprrest and oprjoin functions, the operator's input collation OID
88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
89  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
90  * statistics in pg_statistic are currently built using the database's default
91  * collation.  Thus, in most cases where we are looking at statistics, we
92  * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
93  * We expect that the error induced by doing this is usually not large enough
94  * to justify complicating matters.
95  *----------
96  */
97 
98 #include "postgres.h"
99 
100 #include <ctype.h>
101 #include <float.h>
102 #include <math.h>
103 
104 #include "access/brin.h"
105 #include "access/brin_page.h"
106 #include "access/gin.h"
107 #include "access/htup_details.h"
108 #include "access/relscan.h"
109 #include "access/sysattr.h"
110 #include "access/visibilitymap.h"
111 #include "catalog/pg_am.h"
112 #include "catalog/pg_collation.h"
113 #include "catalog/pg_operator.h"
114 #include "catalog/pg_opfamily.h"
115 #include "catalog/pg_statistic.h"
116 #include "catalog/pg_statistic_ext.h"
117 #include "catalog/pg_type.h"
118 #include "mb/pg_wchar.h"
119 #include "miscadmin.h"
120 #include "nodes/makefuncs.h"
121 #include "nodes/nodeFuncs.h"
122 #include "optimizer/clauses.h"
123 #include "optimizer/cost.h"
124 #include "optimizer/pathnode.h"
125 #include "optimizer/paths.h"
126 #include "optimizer/plancat.h"
127 #include "optimizer/predtest.h"
128 #include "optimizer/restrictinfo.h"
129 #include "optimizer/var.h"
130 #include "parser/parse_clause.h"
131 #include "parser/parse_coerce.h"
132 #include "parser/parsetree.h"
133 #include "statistics/statistics.h"
134 #include "storage/bufmgr.h"
135 #include "utils/acl.h"
136 #include "utils/builtins.h"
137 #include "utils/bytea.h"
138 #include "utils/date.h"
139 #include "utils/datum.h"
140 #include "utils/fmgroids.h"
141 #include "utils/index_selfuncs.h"
142 #include "utils/lsyscache.h"
143 #include "utils/memutils.h"
144 #include "utils/nabstime.h"
145 #include "utils/pg_locale.h"
146 #include "utils/rel.h"
147 #include "utils/selfuncs.h"
148 #include "utils/snapmgr.h"
149 #include "utils/spccache.h"
150 #include "utils/syscache.h"
151 #include "utils/timestamp.h"
152 #include "utils/tqual.h"
153 #include "utils/typcache.h"
154 #include "utils/varlena.h"
155 
156 
157 /* Hooks for plugins to get control when we ask for stats */
158 get_relation_stats_hook_type get_relation_stats_hook = NULL;
159 get_index_stats_hook_type get_index_stats_hook = NULL;
160 
161 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
162 static double var_eq_const(VariableStatData *vardata, Oid operator,
163 			 Datum constval, bool constisnull,
164 			 bool varonleft, bool negate);
165 static double var_eq_non_const(VariableStatData *vardata, Oid operator,
166 				 Node *other,
167 				 bool varonleft, bool negate);
168 static double ineq_histogram_selectivity(PlannerInfo *root,
169 						   VariableStatData *vardata,
170 						   FmgrInfo *opproc, bool isgt, bool iseq,
171 						   Datum constval, Oid consttype);
172 static double eqjoinsel_inner(Oid operator,
173 				VariableStatData *vardata1, VariableStatData *vardata2);
174 static double eqjoinsel_semi(Oid operator,
175 			   VariableStatData *vardata1, VariableStatData *vardata2,
176 			   RelOptInfo *inner_rel);
177 static bool estimate_multivariate_ndistinct(PlannerInfo *root,
178 								RelOptInfo *rel, List **varinfos, double *ndistinct);
179 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
180 				  Datum lobound, Datum hibound, Oid boundstypid,
181 				  double *scaledlobound, double *scaledhibound);
182 static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
183 static void convert_string_to_scalar(char *value,
184 						 double *scaledvalue,
185 						 char *lobound,
186 						 double *scaledlobound,
187 						 char *hibound,
188 						 double *scaledhibound);
189 static void convert_bytea_to_scalar(Datum value,
190 						double *scaledvalue,
191 						Datum lobound,
192 						double *scaledlobound,
193 						Datum hibound,
194 						double *scaledhibound);
195 static double convert_one_string_to_scalar(char *value,
196 							 int rangelo, int rangehi);
197 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
198 							int rangelo, int rangehi);
199 static char *convert_string_datum(Datum value, Oid typid, bool *failure);
200 static double convert_timevalue_to_scalar(Datum value, Oid typid,
201 							bool *failure);
202 static void examine_simple_variable(PlannerInfo *root, Var *var,
203 						VariableStatData *vardata);
204 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
205 				   Oid sortop, Datum *min, Datum *max);
206 static bool get_actual_variable_range(PlannerInfo *root,
207 						  VariableStatData *vardata,
208 						  Oid sortop,
209 						  Datum *min, Datum *max);
210 static bool get_actual_variable_endpoint(Relation heapRel,
211 										 Relation indexRel,
212 										 ScanDirection indexscandir,
213 										 ScanKey scankeys,
214 										 int16 typLen,
215 										 bool typByVal,
216 										 MemoryContext outercontext,
217 										 Datum *endpointDatum);
218 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
219 static Selectivity prefix_selectivity(PlannerInfo *root,
220 				   VariableStatData *vardata,
221 				   Oid vartype, Oid opfamily, Const *prefixcon);
222 static Selectivity like_selectivity(const char *patt, int pattlen,
223 				 bool case_insensitive);
224 static Selectivity regex_selectivity(const char *patt, int pattlen,
225 				  bool case_insensitive,
226 				  int fixed_prefix_len);
227 static Datum string_to_datum(const char *str, Oid datatype);
228 static Const *string_to_const(const char *str, Oid datatype);
229 static Const *string_to_bytea_const(const char *str, size_t str_len);
230 static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals);
231 
232 
233 /*
234  *		eqsel			- Selectivity of "=" for any data types.
235  *
236  * Note: this routine is also used to estimate selectivity for some
237  * operators that are not "=" but have comparable selectivity behavior,
238  * such as "~=" (geometric approximate-match).  Even for "=", we must
239  * keep in mind that the left and right datatypes may differ.
240  */
241 Datum
242 eqsel(PG_FUNCTION_ARGS)
243 {
244 	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
245 }
246 
247 /*
248  * Common code for eqsel() and neqsel()
249  */
250 static double
251 eqsel_internal(PG_FUNCTION_ARGS, bool negate)
252 {
253 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
254 	Oid			operator = PG_GETARG_OID(1);
255 	List	   *args = (List *) PG_GETARG_POINTER(2);
256 	int			varRelid = PG_GETARG_INT32(3);
257 	VariableStatData vardata;
258 	Node	   *other;
259 	bool		varonleft;
260 	double		selec;
261 
262 	/*
263 	 * When asked about <>, we do the estimation using the corresponding =
264 	 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
265 	 */
266 	if (negate)
267 	{
268 		operator = get_negator(operator);
269 		if (!OidIsValid(operator))
270 		{
271 			/* Use default selectivity (should we raise an error instead?) */
272 			return 1.0 - DEFAULT_EQ_SEL;
273 		}
274 	}
275 
276 	/*
277 	 * If expression is not variable = something or something = variable, then
278 	 * punt and return a default estimate.
279 	 */
280 	if (!get_restriction_variable(root, args, varRelid,
281 								  &vardata, &other, &varonleft))
282 		return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
283 
284 	/*
285 	 * We can do a lot better if the something is a constant.  (Note: the
286 	 * Const might result from estimation rather than being a simple constant
287 	 * in the query.)
288 	 */
289 	if (IsA(other, Const))
290 		selec = var_eq_const(&vardata, operator,
291 							 ((Const *) other)->constvalue,
292 							 ((Const *) other)->constisnull,
293 							 varonleft, negate);
294 	else
295 		selec = var_eq_non_const(&vardata, operator, other,
296 								 varonleft, negate);
297 
298 	ReleaseVariableStats(vardata);
299 
300 	return selec;
301 }
302 
303 /*
304  * var_eq_const --- eqsel for var = const case
305  *
306  * This is split out so that some other estimation functions can use it.
307  */
308 static double
309 var_eq_const(VariableStatData *vardata, Oid operator,
310 			 Datum constval, bool constisnull,
311 			 bool varonleft, bool negate)
312 {
313 	double		selec;
314 	double		nullfrac = 0.0;
315 	bool		isdefault;
316 	Oid			opfuncoid;
317 
318 	/*
319 	 * If the constant is NULL, assume operator is strict and return zero, ie,
320 	 * operator will never return TRUE.  (It's zero even for a negator op.)
321 	 */
322 	if (constisnull)
323 		return 0.0;
324 
325 	/*
326 	 * Grab the nullfrac for use below.  Note we allow use of nullfrac
327 	 * regardless of security check.
328 	 */
329 	if (HeapTupleIsValid(vardata->statsTuple))
330 	{
331 		Form_pg_statistic stats;
332 
333 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
334 		nullfrac = stats->stanullfrac;
335 	}
336 
337 	/*
338 	 * If we matched the var to a unique index or DISTINCT clause, assume
339 	 * there is exactly one match regardless of anything else.  (This is
340 	 * slightly bogus, since the index or clause's equality operator might be
341 	 * different from ours, but it's much more likely to be right than
342 	 * ignoring the information.)
343 	 */
344 	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
345 	{
346 		selec = 1.0 / vardata->rel->tuples;
347 	}
348 	else if (HeapTupleIsValid(vardata->statsTuple) &&
349 			 statistic_proc_security_check(vardata,
350 										   (opfuncoid = get_opcode(operator))))
351 	{
352 		AttStatsSlot sslot;
353 		bool		match = false;
354 		int			i;
355 
356 		/*
357 		 * Is the constant "=" to any of the column's most common values?
358 		 * (Although the given operator may not really be "=", we will assume
359 		 * that seeing whether it returns TRUE is an appropriate test.  If you
360 		 * don't like this, maybe you shouldn't be using eqsel for your
361 		 * operator...)
362 		 */
363 		if (get_attstatsslot(&sslot, vardata->statsTuple,
364 							 STATISTIC_KIND_MCV, InvalidOid,
365 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
366 		{
367 			FmgrInfo	eqproc;
368 
369 			fmgr_info(opfuncoid, &eqproc);
370 
371 			for (i = 0; i < sslot.nvalues; i++)
372 			{
373 				/* be careful to apply operator right way 'round */
374 				if (varonleft)
375 					match = DatumGetBool(FunctionCall2Coll(&eqproc,
376 														   DEFAULT_COLLATION_OID,
377 														   sslot.values[i],
378 														   constval));
379 				else
380 					match = DatumGetBool(FunctionCall2Coll(&eqproc,
381 														   DEFAULT_COLLATION_OID,
382 														   constval,
383 														   sslot.values[i]));
384 				if (match)
385 					break;
386 			}
387 		}
388 		else
389 		{
390 			/* no most-common-value info available */
391 			i = 0;				/* keep compiler quiet */
392 		}
393 
394 		if (match)
395 		{
396 			/*
397 			 * Constant is "=" to this common value.  We know selectivity
398 			 * exactly (or as exactly as ANALYZE could calculate it, anyway).
399 			 */
400 			selec = sslot.numbers[i];
401 		}
402 		else
403 		{
404 			/*
405 			 * Comparison is against a constant that is neither NULL nor any
406 			 * of the common values.  Its selectivity cannot be more than
407 			 * this:
408 			 */
409 			double		sumcommon = 0.0;
410 			double		otherdistinct;
411 
412 			for (i = 0; i < sslot.nnumbers; i++)
413 				sumcommon += sslot.numbers[i];
414 			selec = 1.0 - sumcommon - nullfrac;
415 			CLAMP_PROBABILITY(selec);
416 
417 			/*
418 			 * and in fact it's probably a good deal less. We approximate that
419 			 * all the not-common values share this remaining fraction
420 			 * equally, so we divide by the number of other distinct values.
421 			 */
422 			otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
423 				sslot.nnumbers;
424 			if (otherdistinct > 1)
425 				selec /= otherdistinct;
426 
427 			/*
428 			 * Another cross-check: selectivity shouldn't be estimated as more
429 			 * than the least common "most common value".
430 			 */
431 			if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
432 				selec = sslot.numbers[sslot.nnumbers - 1];
433 		}
434 
435 		free_attstatsslot(&sslot);
436 	}
437 	else
438 	{
439 		/*
440 		 * No ANALYZE stats available, so make a guess using estimated number
441 		 * of distinct values and assuming they are equally common. (The guess
442 		 * is unlikely to be very good, but we do know a few special cases.)
443 		 */
444 		selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
445 	}
446 
447 	/* now adjust if we wanted <> rather than = */
448 	if (negate)
449 		selec = 1.0 - selec - nullfrac;
450 
451 	/* result should be in range, but make sure... */
452 	CLAMP_PROBABILITY(selec);
453 
454 	return selec;
455 }
456 
457 /*
458  * var_eq_non_const --- eqsel for var = something-other-than-const case
459  */
460 static double
461 var_eq_non_const(VariableStatData *vardata, Oid operator,
462 				 Node *other,
463 				 bool varonleft, bool negate)
464 {
465 	double		selec;
466 	double		nullfrac = 0.0;
467 	bool		isdefault;
468 
469 	/*
470 	 * Grab the nullfrac for use below.
471 	 */
472 	if (HeapTupleIsValid(vardata->statsTuple))
473 	{
474 		Form_pg_statistic stats;
475 
476 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
477 		nullfrac = stats->stanullfrac;
478 	}
479 
480 	/*
481 	 * If we matched the var to a unique index or DISTINCT clause, assume
482 	 * there is exactly one match regardless of anything else.  (This is
483 	 * slightly bogus, since the index or clause's equality operator might be
484 	 * different from ours, but it's much more likely to be right than
485 	 * ignoring the information.)
486 	 */
487 	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
488 	{
489 		selec = 1.0 / vardata->rel->tuples;
490 	}
491 	else if (HeapTupleIsValid(vardata->statsTuple))
492 	{
493 		double		ndistinct;
494 		AttStatsSlot sslot;
495 
496 		/*
497 		 * Search is for a value that we do not know a priori, but we will
498 		 * assume it is not NULL.  Estimate the selectivity as non-null
499 		 * fraction divided by number of distinct values, so that we get a
500 		 * result averaged over all possible values whether common or
501 		 * uncommon.  (Essentially, we are assuming that the not-yet-known
502 		 * comparison value is equally likely to be any of the possible
503 		 * values, regardless of their frequency in the table.  Is that a good
504 		 * idea?)
505 		 */
506 		selec = 1.0 - nullfrac;
507 		ndistinct = get_variable_numdistinct(vardata, &isdefault);
508 		if (ndistinct > 1)
509 			selec /= ndistinct;
510 
511 		/*
512 		 * Cross-check: selectivity should never be estimated as more than the
513 		 * most common value's.
514 		 */
515 		if (get_attstatsslot(&sslot, vardata->statsTuple,
516 							 STATISTIC_KIND_MCV, InvalidOid,
517 							 ATTSTATSSLOT_NUMBERS))
518 		{
519 			if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
520 				selec = sslot.numbers[0];
521 			free_attstatsslot(&sslot);
522 		}
523 	}
524 	else
525 	{
526 		/*
527 		 * No ANALYZE stats available, so make a guess using estimated number
528 		 * of distinct values and assuming they are equally common. (The guess
529 		 * is unlikely to be very good, but we do know a few special cases.)
530 		 */
531 		selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
532 	}
533 
534 	/* now adjust if we wanted <> rather than = */
535 	if (negate)
536 		selec = 1.0 - selec - nullfrac;
537 
538 	/* result should be in range, but make sure... */
539 	CLAMP_PROBABILITY(selec);
540 
541 	return selec;
542 }
543 
544 /*
545  *		neqsel			- Selectivity of "!=" for any data types.
546  *
547  * This routine is also used for some operators that are not "!="
548  * but have comparable selectivity behavior.  See above comments
549  * for eqsel().
550  */
551 Datum
552 neqsel(PG_FUNCTION_ARGS)
553 {
554 	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
555 }
556 
557 /*
558  *	scalarineqsel		- Selectivity of "<", "<=", ">", ">=" for scalars.
559  *
560  * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
561  * The isgt and iseq flags distinguish which of the four cases apply.
562  *
563  * The caller has commuted the clause, if necessary, so that we can treat
564  * the variable as being on the left.  The caller must also make sure that
565  * the other side of the clause is a non-null Const, and dissect that into
566  * a value and datatype.  (This definition simplifies some callers that
567  * want to estimate against a computed value instead of a Const node.)
568  *
569  * This routine works for any datatype (or pair of datatypes) known to
570  * convert_to_scalar().  If it is applied to some other datatype,
571  * it will return an approximate estimate based on assuming that the constant
572  * value falls in the middle of the bin identified by binary search.
573  */
574 static double
575 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
576 			  VariableStatData *vardata, Datum constval, Oid consttype)
577 {
578 	Form_pg_statistic stats;
579 	FmgrInfo	opproc;
580 	double		mcv_selec,
581 				hist_selec,
582 				sumcommon;
583 	double		selec;
584 
585 	if (!HeapTupleIsValid(vardata->statsTuple))
586 	{
587 		/* no stats available, so default result */
588 		return DEFAULT_INEQ_SEL;
589 	}
590 	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
591 
592 	fmgr_info(get_opcode(operator), &opproc);
593 
594 	/*
595 	 * If we have most-common-values info, add up the fractions of the MCV
596 	 * entries that satisfy MCV OP CONST.  These fractions contribute directly
597 	 * to the result selectivity.  Also add up the total fraction represented
598 	 * by MCV entries.
599 	 */
600 	mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
601 								&sumcommon);
602 
603 	/*
604 	 * If there is a histogram, determine which bin the constant falls in, and
605 	 * compute the resulting contribution to selectivity.
606 	 */
607 	hist_selec = ineq_histogram_selectivity(root, vardata,
608 											&opproc, isgt, iseq,
609 											constval, consttype);
610 
611 	/*
612 	 * Now merge the results from the MCV and histogram calculations,
613 	 * realizing that the histogram covers only the non-null values that are
614 	 * not listed in MCV.
615 	 */
616 	selec = 1.0 - stats->stanullfrac - sumcommon;
617 
618 	if (hist_selec >= 0.0)
619 		selec *= hist_selec;
620 	else
621 	{
622 		/*
623 		 * If no histogram but there are values not accounted for by MCV,
624 		 * arbitrarily assume half of them will match.
625 		 */
626 		selec *= 0.5;
627 	}
628 
629 	selec += mcv_selec;
630 
631 	/* result should be in range, but make sure... */
632 	CLAMP_PROBABILITY(selec);
633 
634 	return selec;
635 }
636 
637 /*
638  *	mcv_selectivity			- Examine the MCV list for selectivity estimates
639  *
640  * Determine the fraction of the variable's MCV population that satisfies
641  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.  Also
642  * compute the fraction of the total column population represented by the MCV
643  * list.  This code will work for any boolean-returning predicate operator.
644  *
645  * The function result is the MCV selectivity, and the fraction of the
646  * total population is returned into *sumcommonp.  Zeroes are returned
647  * if there is no MCV list.
648  */
649 double
650 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
651 				Datum constval, bool varonleft,
652 				double *sumcommonp)
653 {
654 	double		mcv_selec,
655 				sumcommon;
656 	AttStatsSlot sslot;
657 	int			i;
658 
659 	mcv_selec = 0.0;
660 	sumcommon = 0.0;
661 
662 	if (HeapTupleIsValid(vardata->statsTuple) &&
663 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
664 		get_attstatsslot(&sslot, vardata->statsTuple,
665 						 STATISTIC_KIND_MCV, InvalidOid,
666 						 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
667 	{
668 		for (i = 0; i < sslot.nvalues; i++)
669 		{
670 			if (varonleft ?
671 				DatumGetBool(FunctionCall2Coll(opproc,
672 											   DEFAULT_COLLATION_OID,
673 											   sslot.values[i],
674 											   constval)) :
675 				DatumGetBool(FunctionCall2Coll(opproc,
676 											   DEFAULT_COLLATION_OID,
677 											   constval,
678 											   sslot.values[i])))
679 				mcv_selec += sslot.numbers[i];
680 			sumcommon += sslot.numbers[i];
681 		}
682 		free_attstatsslot(&sslot);
683 	}
684 
685 	*sumcommonp = sumcommon;
686 	return mcv_selec;
687 }
688 
689 /*
690  *	histogram_selectivity	- Examine the histogram for selectivity estimates
691  *
692  * Determine the fraction of the variable's histogram entries that satisfy
693  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
694  *
695  * This code will work for any boolean-returning predicate operator, whether
696  * or not it has anything to do with the histogram sort operator.  We are
697  * essentially using the histogram just as a representative sample.  However,
698  * small histograms are unlikely to be all that representative, so the caller
699  * should be prepared to fall back on some other estimation approach when the
700  * histogram is missing or very small.  It may also be prudent to combine this
701  * approach with another one when the histogram is small.
702  *
703  * If the actual histogram size is not at least min_hist_size, we won't bother
704  * to do the calculation at all.  Also, if the n_skip parameter is > 0, we
705  * ignore the first and last n_skip histogram elements, on the grounds that
706  * they are outliers and hence not very representative.  Typical values for
707  * these parameters are 10 and 1.
708  *
709  * The function result is the selectivity, or -1 if there is no histogram
710  * or it's smaller than min_hist_size.
711  *
712  * The output parameter *hist_size receives the actual histogram size,
713  * or zero if no histogram.  Callers may use this number to decide how
714  * much faith to put in the function result.
715  *
716  * Note that the result disregards both the most-common-values (if any) and
717  * null entries.  The caller is expected to combine this result with
718  * statistics for those portions of the column population.  It may also be
719  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
720  */
721 double
722 histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
723 					  Datum constval, bool varonleft,
724 					  int min_hist_size, int n_skip,
725 					  int *hist_size)
726 {
727 	double		result;
728 	AttStatsSlot sslot;
729 
730 	/* check sanity of parameters */
731 	Assert(n_skip >= 0);
732 	Assert(min_hist_size > 2 * n_skip);
733 
734 	if (HeapTupleIsValid(vardata->statsTuple) &&
735 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
736 		get_attstatsslot(&sslot, vardata->statsTuple,
737 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
738 						 ATTSTATSSLOT_VALUES))
739 	{
740 		*hist_size = sslot.nvalues;
741 		if (sslot.nvalues >= min_hist_size)
742 		{
743 			int			nmatch = 0;
744 			int			i;
745 
746 			for (i = n_skip; i < sslot.nvalues - n_skip; i++)
747 			{
748 				if (varonleft ?
749 					DatumGetBool(FunctionCall2Coll(opproc,
750 												   DEFAULT_COLLATION_OID,
751 												   sslot.values[i],
752 												   constval)) :
753 					DatumGetBool(FunctionCall2Coll(opproc,
754 												   DEFAULT_COLLATION_OID,
755 												   constval,
756 												   sslot.values[i])))
757 					nmatch++;
758 			}
759 			result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
760 		}
761 		else
762 			result = -1;
763 		free_attstatsslot(&sslot);
764 	}
765 	else
766 	{
767 		*hist_size = 0;
768 		result = -1;
769 	}
770 
771 	return result;
772 }
773 
774 /*
775  *	ineq_histogram_selectivity	- Examine the histogram for scalarineqsel
776  *
777  * Determine the fraction of the variable's histogram population that
778  * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
779  * The isgt and iseq flags distinguish which of the four cases apply.
780  *
781  * Returns -1 if there is no histogram (valid results will always be >= 0).
782  *
783  * Note that the result disregards both the most-common-values (if any) and
784  * null entries.  The caller is expected to combine this result with
785  * statistics for those portions of the column population.
786  */
787 static double
788 ineq_histogram_selectivity(PlannerInfo *root,
789 						   VariableStatData *vardata,
790 						   FmgrInfo *opproc, bool isgt, bool iseq,
791 						   Datum constval, Oid consttype)
792 {
793 	double		hist_selec;
794 	AttStatsSlot sslot;
795 
796 	hist_selec = -1.0;
797 
798 	/*
799 	 * Someday, ANALYZE might store more than one histogram per rel/att,
800 	 * corresponding to more than one possible sort ordering defined for the
801 	 * column type.  However, to make that work we will need to figure out
802 	 * which staop to search for --- it's not necessarily the one we have at
803 	 * hand!  (For example, we might have a '<=' operator rather than the '<'
804 	 * operator that will appear in staop.)  For now, assume that whatever
805 	 * appears in pg_statistic is sorted the same way our operator sorts, or
806 	 * the reverse way if isgt is true.
807 	 */
808 	if (HeapTupleIsValid(vardata->statsTuple) &&
809 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
810 		get_attstatsslot(&sslot, vardata->statsTuple,
811 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
812 						 ATTSTATSSLOT_VALUES))
813 	{
814 		if (sslot.nvalues > 1)
815 		{
816 			/*
817 			 * Use binary search to find the desired location, namely the
818 			 * right end of the histogram bin containing the comparison value,
819 			 * which is the leftmost entry for which the comparison operator
820 			 * succeeds (if isgt) or fails (if !isgt).  (If the given operator
821 			 * isn't actually sort-compatible with the histogram, you'll get
822 			 * garbage results ... but probably not any more garbage-y than
823 			 * you would have from the old linear search.)
824 			 *
825 			 * In this loop, we pay no attention to whether the operator iseq
826 			 * or not; that detail will be mopped up below.  (We cannot tell,
827 			 * anyway, whether the operator thinks the values are equal.)
828 			 *
829 			 * If the binary search accesses the first or last histogram
830 			 * entry, we try to replace that endpoint with the true column min
831 			 * or max as found by get_actual_variable_range().  This
832 			 * ameliorates misestimates when the min or max is moving as a
833 			 * result of changes since the last ANALYZE.  Note that this could
834 			 * result in effectively including MCVs into the histogram that
835 			 * weren't there before, but we don't try to correct for that.
836 			 */
837 			double		histfrac;
838 			int			lobound = 0;	/* first possible slot to search */
839 			int			hibound = sslot.nvalues;	/* last+1 slot to search */
840 			bool		have_end = false;
841 
842 			/*
843 			 * If there are only two histogram entries, we'll want up-to-date
844 			 * values for both.  (If there are more than two, we need at most
845 			 * one of them to be updated, so we deal with that within the
846 			 * loop.)
847 			 */
848 			if (sslot.nvalues == 2)
849 				have_end = get_actual_variable_range(root,
850 													 vardata,
851 													 sslot.staop,
852 													 &sslot.values[0],
853 													 &sslot.values[1]);
854 
855 			while (lobound < hibound)
856 			{
857 				int			probe = (lobound + hibound) / 2;
858 				bool		ltcmp;
859 
860 				/*
861 				 * If we find ourselves about to compare to the first or last
862 				 * histogram entry, first try to replace it with the actual
863 				 * current min or max (unless we already did so above).
864 				 */
865 				if (probe == 0 && sslot.nvalues > 2)
866 					have_end = get_actual_variable_range(root,
867 														 vardata,
868 														 sslot.staop,
869 														 &sslot.values[0],
870 														 NULL);
871 				else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
872 					have_end = get_actual_variable_range(root,
873 														 vardata,
874 														 sslot.staop,
875 														 NULL,
876 														 &sslot.values[probe]);
877 
878 				ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
879 													   DEFAULT_COLLATION_OID,
880 													   sslot.values[probe],
881 													   constval));
882 				if (isgt)
883 					ltcmp = !ltcmp;
884 				if (ltcmp)
885 					lobound = probe + 1;
886 				else
887 					hibound = probe;
888 			}
889 
890 			if (lobound <= 0)
891 			{
892 				/*
893 				 * Constant is below lower histogram boundary.  More
894 				 * precisely, we have found that no entry in the histogram
895 				 * satisfies the inequality clause (if !isgt) or they all do
896 				 * (if isgt).  We estimate that that's true of the entire
897 				 * table, so set histfrac to 0.0 (which we'll flip to 1.0
898 				 * below, if isgt).
899 				 */
900 				histfrac = 0.0;
901 			}
902 			else if (lobound >= sslot.nvalues)
903 			{
904 				/*
905 				 * Inverse case: constant is above upper histogram boundary.
906 				 */
907 				histfrac = 1.0;
908 			}
909 			else
910 			{
911 				/* We have values[i-1] <= constant <= values[i]. */
912 				int			i = lobound;
913 				double		eq_selec = 0;
914 				double		val,
915 							high,
916 							low;
917 				double		binfrac;
918 
919 				/*
920 				 * In the cases where we'll need it below, obtain an estimate
921 				 * of the selectivity of "x = constval".  We use a calculation
922 				 * similar to what var_eq_const() does for a non-MCV constant,
923 				 * ie, estimate that all distinct non-MCV values occur equally
924 				 * often.  But multiplication by "1.0 - sumcommon - nullfrac"
925 				 * will be done by our caller, so we shouldn't do that here.
926 				 * Therefore we can't try to clamp the estimate by reference
927 				 * to the least common MCV; the result would be too small.
928 				 *
929 				 * Note: since this is effectively assuming that constval
930 				 * isn't an MCV, it's logically dubious if constval in fact is
931 				 * one.  But we have to apply *some* correction for equality,
932 				 * and anyway we cannot tell if constval is an MCV, since we
933 				 * don't have a suitable equality operator at hand.
934 				 */
935 				if (i == 1 || isgt == iseq)
936 				{
937 					double		otherdistinct;
938 					bool		isdefault;
939 					AttStatsSlot mcvslot;
940 
941 					/* Get estimated number of distinct values */
942 					otherdistinct = get_variable_numdistinct(vardata,
943 															 &isdefault);
944 
945 					/* Subtract off the number of known MCVs */
946 					if (get_attstatsslot(&mcvslot, vardata->statsTuple,
947 										 STATISTIC_KIND_MCV, InvalidOid,
948 										 ATTSTATSSLOT_NUMBERS))
949 					{
950 						otherdistinct -= mcvslot.nnumbers;
951 						free_attstatsslot(&mcvslot);
952 					}
953 
954 					/* If result doesn't seem sane, leave eq_selec at 0 */
955 					if (otherdistinct > 1)
956 						eq_selec = 1.0 / otherdistinct;
957 				}
958 
959 				/*
960 				 * Convert the constant and the two nearest bin boundary
961 				 * values to a uniform comparison scale, and do a linear
962 				 * interpolation within this bin.
963 				 */
964 				if (convert_to_scalar(constval, consttype, &val,
965 									  sslot.values[i - 1], sslot.values[i],
966 									  vardata->vartype,
967 									  &low, &high))
968 				{
969 					if (high <= low)
970 					{
971 						/* cope if bin boundaries appear identical */
972 						binfrac = 0.5;
973 					}
974 					else if (val <= low)
975 						binfrac = 0.0;
976 					else if (val >= high)
977 						binfrac = 1.0;
978 					else
979 					{
980 						binfrac = (val - low) / (high - low);
981 
982 						/*
983 						 * Watch out for the possibility that we got a NaN or
984 						 * Infinity from the division.  This can happen
985 						 * despite the previous checks, if for example "low"
986 						 * is -Infinity.
987 						 */
988 						if (isnan(binfrac) ||
989 							binfrac < 0.0 || binfrac > 1.0)
990 							binfrac = 0.5;
991 					}
992 				}
993 				else
994 				{
995 					/*
996 					 * Ideally we'd produce an error here, on the grounds that
997 					 * the given operator shouldn't have scalarXXsel
998 					 * registered as its selectivity func unless we can deal
999 					 * with its operand types.  But currently, all manner of
1000 					 * stuff is invoking scalarXXsel, so give a default
1001 					 * estimate until that can be fixed.
1002 					 */
1003 					binfrac = 0.5;
1004 				}
1005 
1006 				/*
1007 				 * Now, compute the overall selectivity across the values
1008 				 * represented by the histogram.  We have i-1 full bins and
1009 				 * binfrac partial bin below the constant.
1010 				 */
1011 				histfrac = (double) (i - 1) + binfrac;
1012 				histfrac /= (double) (sslot.nvalues - 1);
1013 
1014 				/*
1015 				 * At this point, histfrac is an estimate of the fraction of
1016 				 * the population represented by the histogram that satisfies
1017 				 * "x <= constval".  Somewhat remarkably, this statement is
1018 				 * true regardless of which operator we were doing the probes
1019 				 * with, so long as convert_to_scalar() delivers reasonable
1020 				 * results.  If the probe constant is equal to some histogram
1021 				 * entry, we would have considered the bin to the left of that
1022 				 * entry if probing with "<" or ">=", or the bin to the right
1023 				 * if probing with "<=" or ">"; but binfrac would have come
1024 				 * out as 1.0 in the first case and 0.0 in the second, leading
1025 				 * to the same histfrac in either case.  For probe constants
1026 				 * between histogram entries, we find the same bin and get the
1027 				 * same estimate with any operator.
1028 				 *
1029 				 * The fact that the estimate corresponds to "x <= constval"
1030 				 * and not "x < constval" is because of the way that ANALYZE
1031 				 * constructs the histogram: each entry is, effectively, the
1032 				 * rightmost value in its sample bucket.  So selectivity
1033 				 * values that are exact multiples of 1/(histogram_size-1)
1034 				 * should be understood as estimates including a histogram
1035 				 * entry plus everything to its left.
1036 				 *
1037 				 * However, that breaks down for the first histogram entry,
1038 				 * which necessarily is the leftmost value in its sample
1039 				 * bucket.  That means the first histogram bin is slightly
1040 				 * narrower than the rest, by an amount equal to eq_selec.
1041 				 * Another way to say that is that we want "x <= leftmost" to
1042 				 * be estimated as eq_selec not zero.  So, if we're dealing
1043 				 * with the first bin (i==1), rescale to make that true while
1044 				 * adjusting the rest of that bin linearly.
1045 				 */
1046 				if (i == 1)
1047 					histfrac += eq_selec * (1.0 - binfrac);
1048 
1049 				/*
1050 				 * "x <= constval" is good if we want an estimate for "<=" or
1051 				 * ">", but if we are estimating for "<" or ">=", we now need
1052 				 * to decrease the estimate by eq_selec.
1053 				 */
1054 				if (isgt == iseq)
1055 					histfrac -= eq_selec;
1056 			}
1057 
1058 			/*
1059 			 * Now the estimate is finished for "<" and "<=" cases.  If we are
1060 			 * estimating for ">" or ">=", flip it.
1061 			 */
1062 			hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1063 
1064 			/*
1065 			 * The histogram boundaries are only approximate to begin with,
1066 			 * and may well be out of date anyway.  Therefore, don't believe
1067 			 * extremely small or large selectivity estimates --- unless we
1068 			 * got actual current endpoint values from the table, in which
1069 			 * case just do the usual sanity clamp.  Somewhat arbitrarily, we
1070 			 * set the cutoff for other cases at a hundredth of the histogram
1071 			 * resolution.
1072 			 */
1073 			if (have_end)
1074 				CLAMP_PROBABILITY(hist_selec);
1075 			else
1076 			{
1077 				double		cutoff = 0.01 / (double) (sslot.nvalues - 1);
1078 
1079 				if (hist_selec < cutoff)
1080 					hist_selec = cutoff;
1081 				else if (hist_selec > 1.0 - cutoff)
1082 					hist_selec = 1.0 - cutoff;
1083 			}
1084 		}
1085 
1086 		free_attstatsslot(&sslot);
1087 	}
1088 
1089 	return hist_selec;
1090 }
1091 
1092 /*
1093  * Common wrapper function for the selectivity estimators that simply
1094  * invoke scalarineqsel().
1095  */
1096 static Datum
1097 scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
1098 {
1099 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1100 	Oid			operator = PG_GETARG_OID(1);
1101 	List	   *args = (List *) PG_GETARG_POINTER(2);
1102 	int			varRelid = PG_GETARG_INT32(3);
1103 	VariableStatData vardata;
1104 	Node	   *other;
1105 	bool		varonleft;
1106 	Datum		constval;
1107 	Oid			consttype;
1108 	double		selec;
1109 
1110 	/*
1111 	 * If expression is not variable op something or something op variable,
1112 	 * then punt and return a default estimate.
1113 	 */
1114 	if (!get_restriction_variable(root, args, varRelid,
1115 								  &vardata, &other, &varonleft))
1116 		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1117 
1118 	/*
1119 	 * Can't do anything useful if the something is not a constant, either.
1120 	 */
1121 	if (!IsA(other, Const))
1122 	{
1123 		ReleaseVariableStats(vardata);
1124 		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1125 	}
1126 
1127 	/*
1128 	 * If the constant is NULL, assume operator is strict and return zero, ie,
1129 	 * operator will never return TRUE.
1130 	 */
1131 	if (((Const *) other)->constisnull)
1132 	{
1133 		ReleaseVariableStats(vardata);
1134 		PG_RETURN_FLOAT8(0.0);
1135 	}
1136 	constval = ((Const *) other)->constvalue;
1137 	consttype = ((Const *) other)->consttype;
1138 
1139 	/*
1140 	 * Force the var to be on the left to simplify logic in scalarineqsel.
1141 	 */
1142 	if (!varonleft)
1143 	{
1144 		operator = get_commutator(operator);
1145 		if (!operator)
1146 		{
1147 			/* Use default selectivity (should we raise an error instead?) */
1148 			ReleaseVariableStats(vardata);
1149 			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1150 		}
1151 		isgt = !isgt;
1152 	}
1153 
1154 	/* The rest of the work is done by scalarineqsel(). */
1155 	selec = scalarineqsel(root, operator, isgt, iseq,
1156 						  &vardata, constval, consttype);
1157 
1158 	ReleaseVariableStats(vardata);
1159 
1160 	PG_RETURN_FLOAT8((float8) selec);
1161 }
1162 
1163 /*
1164  *		scalarltsel		- Selectivity of "<" for scalars.
1165  */
1166 Datum
1167 scalarltsel(PG_FUNCTION_ARGS)
1168 {
1169 	return scalarineqsel_wrapper(fcinfo, false, false);
1170 }
1171 
1172 /*
1173  *		scalarlesel		- Selectivity of "<=" for scalars.
1174  */
1175 Datum
1176 scalarlesel(PG_FUNCTION_ARGS)
1177 {
1178 	return scalarineqsel_wrapper(fcinfo, false, true);
1179 }
1180 
1181 /*
1182  *		scalargtsel		- Selectivity of ">" for scalars.
1183  */
1184 Datum
1185 scalargtsel(PG_FUNCTION_ARGS)
1186 {
1187 	return scalarineqsel_wrapper(fcinfo, true, false);
1188 }
1189 
1190 /*
1191  *		scalargesel		- Selectivity of ">=" for scalars.
1192  */
1193 Datum
1194 scalargesel(PG_FUNCTION_ARGS)
1195 {
1196 	return scalarineqsel_wrapper(fcinfo, true, true);
1197 }
1198 
1199 /*
1200  * patternsel			- Generic code for pattern-match selectivity.
1201  */
1202 static double
1203 patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
1204 {
1205 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1206 	Oid			operator = PG_GETARG_OID(1);
1207 	List	   *args = (List *) PG_GETARG_POINTER(2);
1208 	int			varRelid = PG_GETARG_INT32(3);
1209 	Oid			collation = PG_GET_COLLATION();
1210 	VariableStatData vardata;
1211 	Node	   *other;
1212 	bool		varonleft;
1213 	Datum		constval;
1214 	Oid			consttype;
1215 	Oid			vartype;
1216 	Oid			opfamily;
1217 	Pattern_Prefix_Status pstatus;
1218 	Const	   *patt;
1219 	Const	   *prefix = NULL;
1220 	Selectivity rest_selec = 0;
1221 	double		nullfrac = 0.0;
1222 	double		result;
1223 
1224 	/*
1225 	 * If this is for a NOT LIKE or similar operator, get the corresponding
1226 	 * positive-match operator and work with that.  Set result to the correct
1227 	 * default estimate, too.
1228 	 */
1229 	if (negate)
1230 	{
1231 		operator = get_negator(operator);
1232 		if (!OidIsValid(operator))
1233 			elog(ERROR, "patternsel called for operator without a negator");
1234 		result = 1.0 - DEFAULT_MATCH_SEL;
1235 	}
1236 	else
1237 	{
1238 		result = DEFAULT_MATCH_SEL;
1239 	}
1240 
1241 	/*
1242 	 * If expression is not variable op constant, then punt and return a
1243 	 * default estimate.
1244 	 */
1245 	if (!get_restriction_variable(root, args, varRelid,
1246 								  &vardata, &other, &varonleft))
1247 		return result;
1248 	if (!varonleft || !IsA(other, Const))
1249 	{
1250 		ReleaseVariableStats(vardata);
1251 		return result;
1252 	}
1253 
1254 	/*
1255 	 * If the constant is NULL, assume operator is strict and return zero, ie,
1256 	 * operator will never return TRUE.  (It's zero even for a negator op.)
1257 	 */
1258 	if (((Const *) other)->constisnull)
1259 	{
1260 		ReleaseVariableStats(vardata);
1261 		return 0.0;
1262 	}
1263 	constval = ((Const *) other)->constvalue;
1264 	consttype = ((Const *) other)->consttype;
1265 
1266 	/*
1267 	 * The right-hand const is type text or bytea for all supported operators.
1268 	 * We do not expect to see binary-compatible types here, since
1269 	 * const-folding should have relabeled the const to exactly match the
1270 	 * operator's declared type.
1271 	 */
1272 	if (consttype != TEXTOID && consttype != BYTEAOID)
1273 	{
1274 		ReleaseVariableStats(vardata);
1275 		return result;
1276 	}
1277 
1278 	/*
1279 	 * Similarly, the exposed type of the left-hand side should be one of
1280 	 * those we know.  (Do not look at vardata.atttype, which might be
1281 	 * something binary-compatible but different.)	We can use it to choose
1282 	 * the index opfamily from which we must draw the comparison operators.
1283 	 *
1284 	 * NOTE: It would be more correct to use the PATTERN opfamilies than the
1285 	 * simple ones, but at the moment ANALYZE will not generate statistics for
1286 	 * the PATTERN operators.  But our results are so approximate anyway that
1287 	 * it probably hardly matters.
1288 	 */
1289 	vartype = vardata.vartype;
1290 
1291 	switch (vartype)
1292 	{
1293 		case TEXTOID:
1294 			opfamily = TEXT_BTREE_FAM_OID;
1295 			break;
1296 		case BPCHAROID:
1297 			opfamily = BPCHAR_BTREE_FAM_OID;
1298 			break;
1299 		case NAMEOID:
1300 			opfamily = NAME_BTREE_FAM_OID;
1301 			break;
1302 		case BYTEAOID:
1303 			opfamily = BYTEA_BTREE_FAM_OID;
1304 			break;
1305 		default:
1306 			ReleaseVariableStats(vardata);
1307 			return result;
1308 	}
1309 
1310 	/*
1311 	 * Grab the nullfrac for use below.
1312 	 */
1313 	if (HeapTupleIsValid(vardata.statsTuple))
1314 	{
1315 		Form_pg_statistic stats;
1316 
1317 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1318 		nullfrac = stats->stanullfrac;
1319 	}
1320 
1321 	/*
1322 	 * Pull out any fixed prefix implied by the pattern, and estimate the
1323 	 * fractional selectivity of the remainder of the pattern.  Unlike many of
1324 	 * the other functions in this file, we use the pattern operator's actual
1325 	 * collation for this step.  This is not because we expect the collation
1326 	 * to make a big difference in the selectivity estimate (it seldom would),
1327 	 * but because we want to be sure we cache compiled regexps under the
1328 	 * right cache key, so that they can be re-used at runtime.
1329 	 */
1330 	patt = (Const *) other;
1331 	pstatus = pattern_fixed_prefix(patt, ptype, collation,
1332 								   &prefix, &rest_selec);
1333 
1334 	/*
1335 	 * If necessary, coerce the prefix constant to the right type.
1336 	 */
1337 	if (prefix && prefix->consttype != vartype)
1338 	{
1339 		char	   *prefixstr;
1340 
1341 		switch (prefix->consttype)
1342 		{
1343 			case TEXTOID:
1344 				prefixstr = TextDatumGetCString(prefix->constvalue);
1345 				break;
1346 			case BYTEAOID:
1347 				prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,
1348 																prefix->constvalue));
1349 				break;
1350 			default:
1351 				elog(ERROR, "unrecognized consttype: %u",
1352 					 prefix->consttype);
1353 				ReleaseVariableStats(vardata);
1354 				return result;
1355 		}
1356 		prefix = string_to_const(prefixstr, vartype);
1357 		pfree(prefixstr);
1358 	}
1359 
1360 	if (pstatus == Pattern_Prefix_Exact)
1361 	{
1362 		/*
1363 		 * Pattern specifies an exact match, so pretend operator is '='
1364 		 */
1365 		Oid			eqopr = get_opfamily_member(opfamily, vartype, vartype,
1366 												BTEqualStrategyNumber);
1367 
1368 		if (eqopr == InvalidOid)
1369 			elog(ERROR, "no = operator for opfamily %u", opfamily);
1370 		result = var_eq_const(&vardata, eqopr, prefix->constvalue,
1371 							  false, true, false);
1372 	}
1373 	else
1374 	{
1375 		/*
1376 		 * Not exact-match pattern.  If we have a sufficiently large
1377 		 * histogram, estimate selectivity for the histogram part of the
1378 		 * population by counting matches in the histogram.  If not, estimate
1379 		 * selectivity of the fixed prefix and remainder of pattern
1380 		 * separately, then combine the two to get an estimate of the
1381 		 * selectivity for the part of the column population represented by
1382 		 * the histogram.  (For small histograms, we combine these
1383 		 * approaches.)
1384 		 *
1385 		 * We then add up data for any most-common-values values; these are
1386 		 * not in the histogram population, and we can get exact answers for
1387 		 * them by applying the pattern operator, so there's no reason to
1388 		 * approximate.  (If the MCVs cover a significant part of the total
1389 		 * population, this gives us a big leg up in accuracy.)
1390 		 */
1391 		Selectivity selec;
1392 		int			hist_size;
1393 		FmgrInfo	opproc;
1394 		double		mcv_selec,
1395 					sumcommon;
1396 
1397 		/* Try to use the histogram entries to get selectivity */
1398 		fmgr_info(get_opcode(operator), &opproc);
1399 
1400 		selec = histogram_selectivity(&vardata, &opproc, constval, true,
1401 									  10, 1, &hist_size);
1402 
1403 		/* If not at least 100 entries, use the heuristic method */
1404 		if (hist_size < 100)
1405 		{
1406 			Selectivity heursel;
1407 			Selectivity prefixsel;
1408 
1409 			if (pstatus == Pattern_Prefix_Partial)
1410 				prefixsel = prefix_selectivity(root, &vardata, vartype,
1411 											   opfamily, prefix);
1412 			else
1413 				prefixsel = 1.0;
1414 			heursel = prefixsel * rest_selec;
1415 
1416 			if (selec < 0)		/* fewer than 10 histogram entries? */
1417 				selec = heursel;
1418 			else
1419 			{
1420 				/*
1421 				 * For histogram sizes from 10 to 100, we combine the
1422 				 * histogram and heuristic selectivities, putting increasingly
1423 				 * more trust in the histogram for larger sizes.
1424 				 */
1425 				double		hist_weight = hist_size / 100.0;
1426 
1427 				selec = selec * hist_weight + heursel * (1.0 - hist_weight);
1428 			}
1429 		}
1430 
1431 		/* In any case, don't believe extremely small or large estimates. */
1432 		if (selec < 0.0001)
1433 			selec = 0.0001;
1434 		else if (selec > 0.9999)
1435 			selec = 0.9999;
1436 
1437 		/*
1438 		 * If we have most-common-values info, add up the fractions of the MCV
1439 		 * entries that satisfy MCV OP PATTERN.  These fractions contribute
1440 		 * directly to the result selectivity.  Also add up the total fraction
1441 		 * represented by MCV entries.
1442 		 */
1443 		mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
1444 									&sumcommon);
1445 
1446 		/*
1447 		 * Now merge the results from the MCV and histogram calculations,
1448 		 * realizing that the histogram covers only the non-null values that
1449 		 * are not listed in MCV.
1450 		 */
1451 		selec *= 1.0 - nullfrac - sumcommon;
1452 		selec += mcv_selec;
1453 		result = selec;
1454 	}
1455 
1456 	/* now adjust if we wanted not-match rather than match */
1457 	if (negate)
1458 		result = 1.0 - result - nullfrac;
1459 
1460 	/* result should be in range, but make sure... */
1461 	CLAMP_PROBABILITY(result);
1462 
1463 	if (prefix)
1464 	{
1465 		pfree(DatumGetPointer(prefix->constvalue));
1466 		pfree(prefix);
1467 	}
1468 
1469 	ReleaseVariableStats(vardata);
1470 
1471 	return result;
1472 }
1473 
1474 /*
1475  *		regexeqsel		- Selectivity of regular-expression pattern match.
1476  */
1477 Datum
1478 regexeqsel(PG_FUNCTION_ARGS)
1479 {
1480 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
1481 }
1482 
1483 /*
1484  *		icregexeqsel	- Selectivity of case-insensitive regex match.
1485  */
1486 Datum
1487 icregexeqsel(PG_FUNCTION_ARGS)
1488 {
1489 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
1490 }
1491 
1492 /*
1493  *		likesel			- Selectivity of LIKE pattern match.
1494  */
1495 Datum
1496 likesel(PG_FUNCTION_ARGS)
1497 {
1498 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
1499 }
1500 
1501 /*
1502  *		prefixsel			- selectivity of prefix operator
1503  */
1504 Datum
1505 prefixsel(PG_FUNCTION_ARGS)
1506 {
1507 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
1508 }
1509 
1510 /*
1511  *
1512  *		iclikesel			- Selectivity of ILIKE pattern match.
1513  */
1514 Datum
1515 iclikesel(PG_FUNCTION_ARGS)
1516 {
1517 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
1518 }
1519 
1520 /*
1521  *		regexnesel		- Selectivity of regular-expression pattern non-match.
1522  */
1523 Datum
1524 regexnesel(PG_FUNCTION_ARGS)
1525 {
1526 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
1527 }
1528 
1529 /*
1530  *		icregexnesel	- Selectivity of case-insensitive regex non-match.
1531  */
1532 Datum
1533 icregexnesel(PG_FUNCTION_ARGS)
1534 {
1535 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
1536 }
1537 
1538 /*
1539  *		nlikesel		- Selectivity of LIKE pattern non-match.
1540  */
1541 Datum
1542 nlikesel(PG_FUNCTION_ARGS)
1543 {
1544 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
1545 }
1546 
1547 /*
1548  *		icnlikesel		- Selectivity of ILIKE pattern non-match.
1549  */
1550 Datum
1551 icnlikesel(PG_FUNCTION_ARGS)
1552 {
1553 	PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
1554 }
1555 
1556 /*
1557  *		boolvarsel		- Selectivity of Boolean variable.
1558  *
1559  * This can actually be called on any boolean-valued expression.  If it
1560  * involves only Vars of the specified relation, and if there are statistics
1561  * about the Var or expression (the latter is possible if it's indexed) then
1562  * we'll produce a real estimate; otherwise it's just a default.
1563  */
1564 Selectivity
1565 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1566 {
1567 	VariableStatData vardata;
1568 	double		selec;
1569 
1570 	examine_variable(root, arg, varRelid, &vardata);
1571 	if (HeapTupleIsValid(vardata.statsTuple))
1572 	{
1573 		/*
1574 		 * A boolean variable V is equivalent to the clause V = 't', so we
1575 		 * compute the selectivity as if that is what we have.
1576 		 */
1577 		selec = var_eq_const(&vardata, BooleanEqualOperator,
1578 							 BoolGetDatum(true), false, true, false);
1579 	}
1580 	else if (is_funcclause(arg))
1581 	{
1582 		/*
1583 		 * If we have no stats and it's a function call, estimate 0.3333333.
1584 		 * This seems a pretty unprincipled choice, but Postgres has been
1585 		 * using that estimate for function calls since 1992.  The hoariness
1586 		 * of this behavior suggests that we should not be in too much hurry
1587 		 * to use another value.
1588 		 */
1589 		selec = 0.3333333;
1590 	}
1591 	else
1592 	{
1593 		/* Otherwise, the default estimate is 0.5 */
1594 		selec = 0.5;
1595 	}
1596 	ReleaseVariableStats(vardata);
1597 	return selec;
1598 }
1599 
1600 /*
1601  *		booltestsel		- Selectivity of BooleanTest Node.
1602  */
1603 Selectivity
1604 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1605 			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1606 {
1607 	VariableStatData vardata;
1608 	double		selec;
1609 
1610 	examine_variable(root, arg, varRelid, &vardata);
1611 
1612 	if (HeapTupleIsValid(vardata.statsTuple))
1613 	{
1614 		Form_pg_statistic stats;
1615 		double		freq_null;
1616 		AttStatsSlot sslot;
1617 
1618 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1619 		freq_null = stats->stanullfrac;
1620 
1621 		if (get_attstatsslot(&sslot, vardata.statsTuple,
1622 							 STATISTIC_KIND_MCV, InvalidOid,
1623 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
1624 			&& sslot.nnumbers > 0)
1625 		{
1626 			double		freq_true;
1627 			double		freq_false;
1628 
1629 			/*
1630 			 * Get first MCV frequency and derive frequency for true.
1631 			 */
1632 			if (DatumGetBool(sslot.values[0]))
1633 				freq_true = sslot.numbers[0];
1634 			else
1635 				freq_true = 1.0 - sslot.numbers[0] - freq_null;
1636 
1637 			/*
1638 			 * Next derive frequency for false. Then use these as appropriate
1639 			 * to derive frequency for each case.
1640 			 */
1641 			freq_false = 1.0 - freq_true - freq_null;
1642 
1643 			switch (booltesttype)
1644 			{
1645 				case IS_UNKNOWN:
1646 					/* select only NULL values */
1647 					selec = freq_null;
1648 					break;
1649 				case IS_NOT_UNKNOWN:
1650 					/* select non-NULL values */
1651 					selec = 1.0 - freq_null;
1652 					break;
1653 				case IS_TRUE:
1654 					/* select only TRUE values */
1655 					selec = freq_true;
1656 					break;
1657 				case IS_NOT_TRUE:
1658 					/* select non-TRUE values */
1659 					selec = 1.0 - freq_true;
1660 					break;
1661 				case IS_FALSE:
1662 					/* select only FALSE values */
1663 					selec = freq_false;
1664 					break;
1665 				case IS_NOT_FALSE:
1666 					/* select non-FALSE values */
1667 					selec = 1.0 - freq_false;
1668 					break;
1669 				default:
1670 					elog(ERROR, "unrecognized booltesttype: %d",
1671 						 (int) booltesttype);
1672 					selec = 0.0;	/* Keep compiler quiet */
1673 					break;
1674 			}
1675 
1676 			free_attstatsslot(&sslot);
1677 		}
1678 		else
1679 		{
1680 			/*
1681 			 * No most-common-value info available. Still have null fraction
1682 			 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1683 			 * for null fraction and assume a 50-50 split of TRUE and FALSE.
1684 			 */
1685 			switch (booltesttype)
1686 			{
1687 				case IS_UNKNOWN:
1688 					/* select only NULL values */
1689 					selec = freq_null;
1690 					break;
1691 				case IS_NOT_UNKNOWN:
1692 					/* select non-NULL values */
1693 					selec = 1.0 - freq_null;
1694 					break;
1695 				case IS_TRUE:
1696 				case IS_FALSE:
1697 					/* Assume we select half of the non-NULL values */
1698 					selec = (1.0 - freq_null) / 2.0;
1699 					break;
1700 				case IS_NOT_TRUE:
1701 				case IS_NOT_FALSE:
1702 					/* Assume we select NULLs plus half of the non-NULLs */
1703 					/* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1704 					selec = (freq_null + 1.0) / 2.0;
1705 					break;
1706 				default:
1707 					elog(ERROR, "unrecognized booltesttype: %d",
1708 						 (int) booltesttype);
1709 					selec = 0.0;	/* Keep compiler quiet */
1710 					break;
1711 			}
1712 		}
1713 	}
1714 	else
1715 	{
1716 		/*
1717 		 * If we can't get variable statistics for the argument, perhaps
1718 		 * clause_selectivity can do something with it.  We ignore the
1719 		 * possibility of a NULL value when using clause_selectivity, and just
1720 		 * assume the value is either TRUE or FALSE.
1721 		 */
1722 		switch (booltesttype)
1723 		{
1724 			case IS_UNKNOWN:
1725 				selec = DEFAULT_UNK_SEL;
1726 				break;
1727 			case IS_NOT_UNKNOWN:
1728 				selec = DEFAULT_NOT_UNK_SEL;
1729 				break;
1730 			case IS_TRUE:
1731 			case IS_NOT_FALSE:
1732 				selec = (double) clause_selectivity(root, arg,
1733 													varRelid,
1734 													jointype, sjinfo);
1735 				break;
1736 			case IS_FALSE:
1737 			case IS_NOT_TRUE:
1738 				selec = 1.0 - (double) clause_selectivity(root, arg,
1739 														  varRelid,
1740 														  jointype, sjinfo);
1741 				break;
1742 			default:
1743 				elog(ERROR, "unrecognized booltesttype: %d",
1744 					 (int) booltesttype);
1745 				selec = 0.0;	/* Keep compiler quiet */
1746 				break;
1747 		}
1748 	}
1749 
1750 	ReleaseVariableStats(vardata);
1751 
1752 	/* result should be in range, but make sure... */
1753 	CLAMP_PROBABILITY(selec);
1754 
1755 	return (Selectivity) selec;
1756 }
1757 
1758 /*
1759  *		nulltestsel		- Selectivity of NullTest Node.
1760  */
1761 Selectivity
1762 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1763 			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1764 {
1765 	VariableStatData vardata;
1766 	double		selec;
1767 
1768 	examine_variable(root, arg, varRelid, &vardata);
1769 
1770 	if (HeapTupleIsValid(vardata.statsTuple))
1771 	{
1772 		Form_pg_statistic stats;
1773 		double		freq_null;
1774 
1775 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1776 		freq_null = stats->stanullfrac;
1777 
1778 		switch (nulltesttype)
1779 		{
1780 			case IS_NULL:
1781 
1782 				/*
1783 				 * Use freq_null directly.
1784 				 */
1785 				selec = freq_null;
1786 				break;
1787 			case IS_NOT_NULL:
1788 
1789 				/*
1790 				 * Select not unknown (not null) values. Calculate from
1791 				 * freq_null.
1792 				 */
1793 				selec = 1.0 - freq_null;
1794 				break;
1795 			default:
1796 				elog(ERROR, "unrecognized nulltesttype: %d",
1797 					 (int) nulltesttype);
1798 				return (Selectivity) 0; /* keep compiler quiet */
1799 		}
1800 	}
1801 	else
1802 	{
1803 		/*
1804 		 * No ANALYZE stats available, so make a guess
1805 		 */
1806 		switch (nulltesttype)
1807 		{
1808 			case IS_NULL:
1809 				selec = DEFAULT_UNK_SEL;
1810 				break;
1811 			case IS_NOT_NULL:
1812 				selec = DEFAULT_NOT_UNK_SEL;
1813 				break;
1814 			default:
1815 				elog(ERROR, "unrecognized nulltesttype: %d",
1816 					 (int) nulltesttype);
1817 				return (Selectivity) 0; /* keep compiler quiet */
1818 		}
1819 	}
1820 
1821 	ReleaseVariableStats(vardata);
1822 
1823 	/* result should be in range, but make sure... */
1824 	CLAMP_PROBABILITY(selec);
1825 
1826 	return (Selectivity) selec;
1827 }
1828 
1829 /*
1830  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1831  *
1832  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1833  * but it seems possible that RelabelType might show up.  Also, the planner
1834  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1835  * so we need to be ready to deal with more than one level.
1836  */
1837 static Node *
1838 strip_array_coercion(Node *node)
1839 {
1840 	for (;;)
1841 	{
1842 		if (node && IsA(node, ArrayCoerceExpr))
1843 		{
1844 			ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
1845 
1846 			/*
1847 			 * If the per-element expression is just a RelabelType on top of
1848 			 * CaseTestExpr, then we know it's a binary-compatible relabeling.
1849 			 */
1850 			if (IsA(acoerce->elemexpr, RelabelType) &&
1851 				IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1852 				node = (Node *) acoerce->arg;
1853 			else
1854 				break;
1855 		}
1856 		else if (node && IsA(node, RelabelType))
1857 		{
1858 			/* We don't really expect this case, but may as well cope */
1859 			node = (Node *) ((RelabelType *) node)->arg;
1860 		}
1861 		else
1862 			break;
1863 	}
1864 	return node;
1865 }
1866 
1867 /*
1868  *		scalararraysel		- Selectivity of ScalarArrayOpExpr Node.
1869  */
1870 Selectivity
1871 scalararraysel(PlannerInfo *root,
1872 			   ScalarArrayOpExpr *clause,
1873 			   bool is_join_clause,
1874 			   int varRelid,
1875 			   JoinType jointype,
1876 			   SpecialJoinInfo *sjinfo)
1877 {
1878 	Oid			operator = clause->opno;
1879 	bool		useOr = clause->useOr;
1880 	bool		isEquality = false;
1881 	bool		isInequality = false;
1882 	Node	   *leftop;
1883 	Node	   *rightop;
1884 	Oid			nominal_element_type;
1885 	Oid			nominal_element_collation;
1886 	TypeCacheEntry *typentry;
1887 	RegProcedure oprsel;
1888 	FmgrInfo	oprselproc;
1889 	Selectivity s1;
1890 	Selectivity s1disjoint;
1891 
1892 	/* First, deconstruct the expression */
1893 	Assert(list_length(clause->args) == 2);
1894 	leftop = (Node *) linitial(clause->args);
1895 	rightop = (Node *) lsecond(clause->args);
1896 
1897 	/* aggressively reduce both sides to constants */
1898 	leftop = estimate_expression_value(root, leftop);
1899 	rightop = estimate_expression_value(root, rightop);
1900 
1901 	/* get nominal (after relabeling) element type of rightop */
1902 	nominal_element_type = get_base_element_type(exprType(rightop));
1903 	if (!OidIsValid(nominal_element_type))
1904 		return (Selectivity) 0.5;	/* probably shouldn't happen */
1905 	/* get nominal collation, too, for generating constants */
1906 	nominal_element_collation = exprCollation(rightop);
1907 
1908 	/* look through any binary-compatible relabeling of rightop */
1909 	rightop = strip_array_coercion(rightop);
1910 
1911 	/*
1912 	 * Detect whether the operator is the default equality or inequality
1913 	 * operator of the array element type.
1914 	 */
1915 	typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1916 	if (OidIsValid(typentry->eq_opr))
1917 	{
1918 		if (operator == typentry->eq_opr)
1919 			isEquality = true;
1920 		else if (get_negator(operator) == typentry->eq_opr)
1921 			isInequality = true;
1922 	}
1923 
1924 	/*
1925 	 * If it is equality or inequality, we might be able to estimate this as a
1926 	 * form of array containment; for instance "const = ANY(column)" can be
1927 	 * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
1928 	 * that, and returns the selectivity estimate if successful, or -1 if not.
1929 	 */
1930 	if ((isEquality || isInequality) && !is_join_clause)
1931 	{
1932 		s1 = scalararraysel_containment(root, leftop, rightop,
1933 										nominal_element_type,
1934 										isEquality, useOr, varRelid);
1935 		if (s1 >= 0.0)
1936 			return s1;
1937 	}
1938 
1939 	/*
1940 	 * Look up the underlying operator's selectivity estimator. Punt if it
1941 	 * hasn't got one.
1942 	 */
1943 	if (is_join_clause)
1944 		oprsel = get_oprjoin(operator);
1945 	else
1946 		oprsel = get_oprrest(operator);
1947 	if (!oprsel)
1948 		return (Selectivity) 0.5;
1949 	fmgr_info(oprsel, &oprselproc);
1950 
1951 	/*
1952 	 * In the array-containment check above, we must only believe that an
1953 	 * operator is equality or inequality if it is the default btree equality
1954 	 * operator (or its negator) for the element type, since those are the
1955 	 * operators that array containment will use.  But in what follows, we can
1956 	 * be a little laxer, and also believe that any operators using eqsel() or
1957 	 * neqsel() as selectivity estimator act like equality or inequality.
1958 	 */
1959 	if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1960 		isEquality = true;
1961 	else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1962 		isInequality = true;
1963 
1964 	/*
1965 	 * We consider three cases:
1966 	 *
1967 	 * 1. rightop is an Array constant: deconstruct the array, apply the
1968 	 * operator's selectivity function for each array element, and merge the
1969 	 * results in the same way that clausesel.c does for AND/OR combinations.
1970 	 *
1971 	 * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1972 	 * function for each element of the ARRAY[] construct, and merge.
1973 	 *
1974 	 * 3. otherwise, make a guess ...
1975 	 */
1976 	if (rightop && IsA(rightop, Const))
1977 	{
1978 		Datum		arraydatum = ((Const *) rightop)->constvalue;
1979 		bool		arrayisnull = ((Const *) rightop)->constisnull;
1980 		ArrayType  *arrayval;
1981 		int16		elmlen;
1982 		bool		elmbyval;
1983 		char		elmalign;
1984 		int			num_elems;
1985 		Datum	   *elem_values;
1986 		bool	   *elem_nulls;
1987 		int			i;
1988 
1989 		if (arrayisnull)		/* qual can't succeed if null array */
1990 			return (Selectivity) 0.0;
1991 		arrayval = DatumGetArrayTypeP(arraydatum);
1992 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1993 							 &elmlen, &elmbyval, &elmalign);
1994 		deconstruct_array(arrayval,
1995 						  ARR_ELEMTYPE(arrayval),
1996 						  elmlen, elmbyval, elmalign,
1997 						  &elem_values, &elem_nulls, &num_elems);
1998 
1999 		/*
2000 		 * For generic operators, we assume the probability of success is
2001 		 * independent for each array element.  But for "= ANY" or "<> ALL",
2002 		 * if the array elements are distinct (which'd typically be the case)
2003 		 * then the probabilities are disjoint, and we should just sum them.
2004 		 *
2005 		 * If we were being really tense we would try to confirm that the
2006 		 * elements are all distinct, but that would be expensive and it
2007 		 * doesn't seem to be worth the cycles; it would amount to penalizing
2008 		 * well-written queries in favor of poorly-written ones.  However, we
2009 		 * do protect ourselves a little bit by checking whether the
2010 		 * disjointness assumption leads to an impossible (out of range)
2011 		 * probability; if so, we fall back to the normal calculation.
2012 		 */
2013 		s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2014 
2015 		for (i = 0; i < num_elems; i++)
2016 		{
2017 			List	   *args;
2018 			Selectivity s2;
2019 
2020 			args = list_make2(leftop,
2021 							  makeConst(nominal_element_type,
2022 										-1,
2023 										nominal_element_collation,
2024 										elmlen,
2025 										elem_values[i],
2026 										elem_nulls[i],
2027 										elmbyval));
2028 			if (is_join_clause)
2029 				s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2030 													  clause->inputcollid,
2031 													  PointerGetDatum(root),
2032 													  ObjectIdGetDatum(operator),
2033 													  PointerGetDatum(args),
2034 													  Int16GetDatum(jointype),
2035 													  PointerGetDatum(sjinfo)));
2036 			else
2037 				s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2038 													  clause->inputcollid,
2039 													  PointerGetDatum(root),
2040 													  ObjectIdGetDatum(operator),
2041 													  PointerGetDatum(args),
2042 													  Int32GetDatum(varRelid)));
2043 
2044 			if (useOr)
2045 			{
2046 				s1 = s1 + s2 - s1 * s2;
2047 				if (isEquality)
2048 					s1disjoint += s2;
2049 			}
2050 			else
2051 			{
2052 				s1 = s1 * s2;
2053 				if (isInequality)
2054 					s1disjoint += s2 - 1.0;
2055 			}
2056 		}
2057 
2058 		/* accept disjoint-probability estimate if in range */
2059 		if ((useOr ? isEquality : isInequality) &&
2060 			s1disjoint >= 0.0 && s1disjoint <= 1.0)
2061 			s1 = s1disjoint;
2062 	}
2063 	else if (rightop && IsA(rightop, ArrayExpr) &&
2064 			 !((ArrayExpr *) rightop)->multidims)
2065 	{
2066 		ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
2067 		int16		elmlen;
2068 		bool		elmbyval;
2069 		ListCell   *l;
2070 
2071 		get_typlenbyval(arrayexpr->element_typeid,
2072 						&elmlen, &elmbyval);
2073 
2074 		/*
2075 		 * We use the assumption of disjoint probabilities here too, although
2076 		 * the odds of equal array elements are rather higher if the elements
2077 		 * are not all constants (which they won't be, else constant folding
2078 		 * would have reduced the ArrayExpr to a Const).  In this path it's
2079 		 * critical to have the sanity check on the s1disjoint estimate.
2080 		 */
2081 		s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2082 
2083 		foreach(l, arrayexpr->elements)
2084 		{
2085 			Node	   *elem = (Node *) lfirst(l);
2086 			List	   *args;
2087 			Selectivity s2;
2088 
2089 			/*
2090 			 * Theoretically, if elem isn't of nominal_element_type we should
2091 			 * insert a RelabelType, but it seems unlikely that any operator
2092 			 * estimation function would really care ...
2093 			 */
2094 			args = list_make2(leftop, elem);
2095 			if (is_join_clause)
2096 				s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2097 													  clause->inputcollid,
2098 													  PointerGetDatum(root),
2099 													  ObjectIdGetDatum(operator),
2100 													  PointerGetDatum(args),
2101 													  Int16GetDatum(jointype),
2102 													  PointerGetDatum(sjinfo)));
2103 			else
2104 				s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2105 													  clause->inputcollid,
2106 													  PointerGetDatum(root),
2107 													  ObjectIdGetDatum(operator),
2108 													  PointerGetDatum(args),
2109 													  Int32GetDatum(varRelid)));
2110 
2111 			if (useOr)
2112 			{
2113 				s1 = s1 + s2 - s1 * s2;
2114 				if (isEquality)
2115 					s1disjoint += s2;
2116 			}
2117 			else
2118 			{
2119 				s1 = s1 * s2;
2120 				if (isInequality)
2121 					s1disjoint += s2 - 1.0;
2122 			}
2123 		}
2124 
2125 		/* accept disjoint-probability estimate if in range */
2126 		if ((useOr ? isEquality : isInequality) &&
2127 			s1disjoint >= 0.0 && s1disjoint <= 1.0)
2128 			s1 = s1disjoint;
2129 	}
2130 	else
2131 	{
2132 		CaseTestExpr *dummyexpr;
2133 		List	   *args;
2134 		Selectivity s2;
2135 		int			i;
2136 
2137 		/*
2138 		 * We need a dummy rightop to pass to the operator selectivity
2139 		 * routine.  It can be pretty much anything that doesn't look like a
2140 		 * constant; CaseTestExpr is a convenient choice.
2141 		 */
2142 		dummyexpr = makeNode(CaseTestExpr);
2143 		dummyexpr->typeId = nominal_element_type;
2144 		dummyexpr->typeMod = -1;
2145 		dummyexpr->collation = clause->inputcollid;
2146 		args = list_make2(leftop, dummyexpr);
2147 		if (is_join_clause)
2148 			s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2149 												  clause->inputcollid,
2150 												  PointerGetDatum(root),
2151 												  ObjectIdGetDatum(operator),
2152 												  PointerGetDatum(args),
2153 												  Int16GetDatum(jointype),
2154 												  PointerGetDatum(sjinfo)));
2155 		else
2156 			s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2157 												  clause->inputcollid,
2158 												  PointerGetDatum(root),
2159 												  ObjectIdGetDatum(operator),
2160 												  PointerGetDatum(args),
2161 												  Int32GetDatum(varRelid)));
2162 		s1 = useOr ? 0.0 : 1.0;
2163 
2164 		/*
2165 		 * Arbitrarily assume 10 elements in the eventual array value (see
2166 		 * also estimate_array_length).  We don't risk an assumption of
2167 		 * disjoint probabilities here.
2168 		 */
2169 		for (i = 0; i < 10; i++)
2170 		{
2171 			if (useOr)
2172 				s1 = s1 + s2 - s1 * s2;
2173 			else
2174 				s1 = s1 * s2;
2175 		}
2176 	}
2177 
2178 	/* result should be in range, but make sure... */
2179 	CLAMP_PROBABILITY(s1);
2180 
2181 	return s1;
2182 }
2183 
2184 /*
2185  * Estimate number of elements in the array yielded by an expression.
2186  *
2187  * It's important that this agree with scalararraysel.
2188  */
2189 int
2190 estimate_array_length(Node *arrayexpr)
2191 {
2192 	/* look through any binary-compatible relabeling of arrayexpr */
2193 	arrayexpr = strip_array_coercion(arrayexpr);
2194 
2195 	if (arrayexpr && IsA(arrayexpr, Const))
2196 	{
2197 		Datum		arraydatum = ((Const *) arrayexpr)->constvalue;
2198 		bool		arrayisnull = ((Const *) arrayexpr)->constisnull;
2199 		ArrayType  *arrayval;
2200 
2201 		if (arrayisnull)
2202 			return 0;
2203 		arrayval = DatumGetArrayTypeP(arraydatum);
2204 		return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2205 	}
2206 	else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2207 			 !((ArrayExpr *) arrayexpr)->multidims)
2208 	{
2209 		return list_length(((ArrayExpr *) arrayexpr)->elements);
2210 	}
2211 	else
2212 	{
2213 		/* default guess --- see also scalararraysel */
2214 		return 10;
2215 	}
2216 }
2217 
2218 /*
2219  *		rowcomparesel		- Selectivity of RowCompareExpr Node.
2220  *
2221  * We estimate RowCompare selectivity by considering just the first (high
2222  * order) columns, which makes it equivalent to an ordinary OpExpr.  While
2223  * this estimate could be refined by considering additional columns, it
2224  * seems unlikely that we could do a lot better without multi-column
2225  * statistics.
2226  */
2227 Selectivity
2228 rowcomparesel(PlannerInfo *root,
2229 			  RowCompareExpr *clause,
2230 			  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2231 {
2232 	Selectivity s1;
2233 	Oid			opno = linitial_oid(clause->opnos);
2234 	Oid			inputcollid = linitial_oid(clause->inputcollids);
2235 	List	   *opargs;
2236 	bool		is_join_clause;
2237 
2238 	/* Build equivalent arg list for single operator */
2239 	opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2240 
2241 	/*
2242 	 * Decide if it's a join clause.  This should match clausesel.c's
2243 	 * treat_as_join_clause(), except that we intentionally consider only the
2244 	 * leading columns and not the rest of the clause.
2245 	 */
2246 	if (varRelid != 0)
2247 	{
2248 		/*
2249 		 * Caller is forcing restriction mode (eg, because we are examining an
2250 		 * inner indexscan qual).
2251 		 */
2252 		is_join_clause = false;
2253 	}
2254 	else if (sjinfo == NULL)
2255 	{
2256 		/*
2257 		 * It must be a restriction clause, since it's being evaluated at a
2258 		 * scan node.
2259 		 */
2260 		is_join_clause = false;
2261 	}
2262 	else
2263 	{
2264 		/*
2265 		 * Otherwise, it's a join if there's more than one relation used.
2266 		 */
2267 		is_join_clause = (NumRelids((Node *) opargs) > 1);
2268 	}
2269 
2270 	if (is_join_clause)
2271 	{
2272 		/* Estimate selectivity for a join clause. */
2273 		s1 = join_selectivity(root, opno,
2274 							  opargs,
2275 							  inputcollid,
2276 							  jointype,
2277 							  sjinfo);
2278 	}
2279 	else
2280 	{
2281 		/* Estimate selectivity for a restriction clause. */
2282 		s1 = restriction_selectivity(root, opno,
2283 									 opargs,
2284 									 inputcollid,
2285 									 varRelid);
2286 	}
2287 
2288 	return s1;
2289 }
2290 
2291 /*
2292  *		eqjoinsel		- Join selectivity of "="
2293  */
2294 Datum
2295 eqjoinsel(PG_FUNCTION_ARGS)
2296 {
2297 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2298 	Oid			operator = PG_GETARG_OID(1);
2299 	List	   *args = (List *) PG_GETARG_POINTER(2);
2300 
2301 #ifdef NOT_USED
2302 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
2303 #endif
2304 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2305 	double		selec;
2306 	VariableStatData vardata1;
2307 	VariableStatData vardata2;
2308 	bool		join_is_reversed;
2309 	RelOptInfo *inner_rel;
2310 
2311 	get_join_variables(root, args, sjinfo,
2312 					   &vardata1, &vardata2, &join_is_reversed);
2313 
2314 	switch (sjinfo->jointype)
2315 	{
2316 		case JOIN_INNER:
2317 		case JOIN_LEFT:
2318 		case JOIN_FULL:
2319 			selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
2320 			break;
2321 		case JOIN_SEMI:
2322 		case JOIN_ANTI:
2323 
2324 			/*
2325 			 * Look up the join's inner relation.  min_righthand is sufficient
2326 			 * information because neither SEMI nor ANTI joins permit any
2327 			 * reassociation into or out of their RHS, so the righthand will
2328 			 * always be exactly that set of rels.
2329 			 */
2330 			inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2331 
2332 			if (!join_is_reversed)
2333 				selec = eqjoinsel_semi(operator, &vardata1, &vardata2,
2334 									   inner_rel);
2335 			else
2336 				selec = eqjoinsel_semi(get_commutator(operator),
2337 									   &vardata2, &vardata1,
2338 									   inner_rel);
2339 			break;
2340 		default:
2341 			/* other values not expected here */
2342 			elog(ERROR, "unrecognized join type: %d",
2343 				 (int) sjinfo->jointype);
2344 			selec = 0;			/* keep compiler quiet */
2345 			break;
2346 	}
2347 
2348 	ReleaseVariableStats(vardata1);
2349 	ReleaseVariableStats(vardata2);
2350 
2351 	CLAMP_PROBABILITY(selec);
2352 
2353 	PG_RETURN_FLOAT8((float8) selec);
2354 }
2355 
2356 /*
2357  * eqjoinsel_inner --- eqjoinsel for normal inner join
2358  *
2359  * We also use this for LEFT/FULL outer joins; it's not presently clear
2360  * that it's worth trying to distinguish them here.
2361  */
2362 static double
2363 eqjoinsel_inner(Oid operator,
2364 				VariableStatData *vardata1, VariableStatData *vardata2)
2365 {
2366 	double		selec;
2367 	double		nd1;
2368 	double		nd2;
2369 	bool		isdefault1;
2370 	bool		isdefault2;
2371 	Oid			opfuncoid;
2372 	Form_pg_statistic stats1 = NULL;
2373 	Form_pg_statistic stats2 = NULL;
2374 	bool		have_mcvs1 = false;
2375 	bool		have_mcvs2 = false;
2376 	AttStatsSlot sslot1;
2377 	AttStatsSlot sslot2;
2378 
2379 	nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2380 	nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2381 
2382 	opfuncoid = get_opcode(operator);
2383 
2384 	memset(&sslot1, 0, sizeof(sslot1));
2385 	memset(&sslot2, 0, sizeof(sslot2));
2386 
2387 	if (HeapTupleIsValid(vardata1->statsTuple))
2388 	{
2389 		/* note we allow use of nullfrac regardless of security check */
2390 		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2391 		if (statistic_proc_security_check(vardata1, opfuncoid))
2392 			have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
2393 										  STATISTIC_KIND_MCV, InvalidOid,
2394 										  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2395 	}
2396 
2397 	if (HeapTupleIsValid(vardata2->statsTuple))
2398 	{
2399 		/* note we allow use of nullfrac regardless of security check */
2400 		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
2401 		if (statistic_proc_security_check(vardata2, opfuncoid))
2402 			have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
2403 										  STATISTIC_KIND_MCV, InvalidOid,
2404 										  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2405 	}
2406 
2407 	if (have_mcvs1 && have_mcvs2)
2408 	{
2409 		/*
2410 		 * We have most-common-value lists for both relations.  Run through
2411 		 * the lists to see which MCVs actually join to each other with the
2412 		 * given operator.  This allows us to determine the exact join
2413 		 * selectivity for the portion of the relations represented by the MCV
2414 		 * lists.  We still have to estimate for the remaining population, but
2415 		 * in a skewed distribution this gives us a big leg up in accuracy.
2416 		 * For motivation see the analysis in Y. Ioannidis and S.
2417 		 * Christodoulakis, "On the propagation of errors in the size of join
2418 		 * results", Technical Report 1018, Computer Science Dept., University
2419 		 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2420 		 */
2421 		FmgrInfo	eqproc;
2422 		bool	   *hasmatch1;
2423 		bool	   *hasmatch2;
2424 		double		nullfrac1 = stats1->stanullfrac;
2425 		double		nullfrac2 = stats2->stanullfrac;
2426 		double		matchprodfreq,
2427 					matchfreq1,
2428 					matchfreq2,
2429 					unmatchfreq1,
2430 					unmatchfreq2,
2431 					otherfreq1,
2432 					otherfreq2,
2433 					totalsel1,
2434 					totalsel2;
2435 		int			i,
2436 					nmatches;
2437 
2438 		fmgr_info(opfuncoid, &eqproc);
2439 		hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
2440 		hasmatch2 = (bool *) palloc0(sslot2.nvalues * sizeof(bool));
2441 
2442 		/*
2443 		 * Note we assume that each MCV will match at most one member of the
2444 		 * other MCV list.  If the operator isn't really equality, there could
2445 		 * be multiple matches --- but we don't look for them, both for speed
2446 		 * and because the math wouldn't add up...
2447 		 */
2448 		matchprodfreq = 0.0;
2449 		nmatches = 0;
2450 		for (i = 0; i < sslot1.nvalues; i++)
2451 		{
2452 			int			j;
2453 
2454 			for (j = 0; j < sslot2.nvalues; j++)
2455 			{
2456 				if (hasmatch2[j])
2457 					continue;
2458 				if (DatumGetBool(FunctionCall2Coll(&eqproc,
2459 												   DEFAULT_COLLATION_OID,
2460 												   sslot1.values[i],
2461 												   sslot2.values[j])))
2462 				{
2463 					hasmatch1[i] = hasmatch2[j] = true;
2464 					matchprodfreq += sslot1.numbers[i] * sslot2.numbers[j];
2465 					nmatches++;
2466 					break;
2467 				}
2468 			}
2469 		}
2470 		CLAMP_PROBABILITY(matchprodfreq);
2471 		/* Sum up frequencies of matched and unmatched MCVs */
2472 		matchfreq1 = unmatchfreq1 = 0.0;
2473 		for (i = 0; i < sslot1.nvalues; i++)
2474 		{
2475 			if (hasmatch1[i])
2476 				matchfreq1 += sslot1.numbers[i];
2477 			else
2478 				unmatchfreq1 += sslot1.numbers[i];
2479 		}
2480 		CLAMP_PROBABILITY(matchfreq1);
2481 		CLAMP_PROBABILITY(unmatchfreq1);
2482 		matchfreq2 = unmatchfreq2 = 0.0;
2483 		for (i = 0; i < sslot2.nvalues; i++)
2484 		{
2485 			if (hasmatch2[i])
2486 				matchfreq2 += sslot2.numbers[i];
2487 			else
2488 				unmatchfreq2 += sslot2.numbers[i];
2489 		}
2490 		CLAMP_PROBABILITY(matchfreq2);
2491 		CLAMP_PROBABILITY(unmatchfreq2);
2492 		pfree(hasmatch1);
2493 		pfree(hasmatch2);
2494 
2495 		/*
2496 		 * Compute total frequency of non-null values that are not in the MCV
2497 		 * lists.
2498 		 */
2499 		otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2500 		otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2501 		CLAMP_PROBABILITY(otherfreq1);
2502 		CLAMP_PROBABILITY(otherfreq2);
2503 
2504 		/*
2505 		 * We can estimate the total selectivity from the point of view of
2506 		 * relation 1 as: the known selectivity for matched MCVs, plus
2507 		 * unmatched MCVs that are assumed to match against random members of
2508 		 * relation 2's non-MCV population, plus non-MCV values that are
2509 		 * assumed to match against random members of relation 2's unmatched
2510 		 * MCVs plus non-MCV values.
2511 		 */
2512 		totalsel1 = matchprodfreq;
2513 		if (nd2 > sslot2.nvalues)
2514 			totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2.nvalues);
2515 		if (nd2 > nmatches)
2516 			totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2517 				(nd2 - nmatches);
2518 		/* Same estimate from the point of view of relation 2. */
2519 		totalsel2 = matchprodfreq;
2520 		if (nd1 > sslot1.nvalues)
2521 			totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1.nvalues);
2522 		if (nd1 > nmatches)
2523 			totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2524 				(nd1 - nmatches);
2525 
2526 		/*
2527 		 * Use the smaller of the two estimates.  This can be justified in
2528 		 * essentially the same terms as given below for the no-stats case: to
2529 		 * a first approximation, we are estimating from the point of view of
2530 		 * the relation with smaller nd.
2531 		 */
2532 		selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2533 	}
2534 	else
2535 	{
2536 		/*
2537 		 * We do not have MCV lists for both sides.  Estimate the join
2538 		 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2539 		 * is plausible if we assume that the join operator is strict and the
2540 		 * non-null values are about equally distributed: a given non-null
2541 		 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2542 		 * of rel2, so total join rows are at most
2543 		 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2544 		 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2545 		 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2546 		 * with MIN() is an upper bound.  Using the MIN() means we estimate
2547 		 * from the point of view of the relation with smaller nd (since the
2548 		 * larger nd is determining the MIN).  It is reasonable to assume that
2549 		 * most tuples in this rel will have join partners, so the bound is
2550 		 * probably reasonably tight and should be taken as-is.
2551 		 *
2552 		 * XXX Can we be smarter if we have an MCV list for just one side? It
2553 		 * seems that if we assume equal distribution for the other side, we
2554 		 * end up with the same answer anyway.
2555 		 */
2556 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2557 		double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2558 
2559 		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2560 		if (nd1 > nd2)
2561 			selec /= nd1;
2562 		else
2563 			selec /= nd2;
2564 	}
2565 
2566 	free_attstatsslot(&sslot1);
2567 	free_attstatsslot(&sslot2);
2568 
2569 	return selec;
2570 }
2571 
2572 /*
2573  * eqjoinsel_semi --- eqjoinsel for semi join
2574  *
2575  * (Also used for anti join, which we are supposed to estimate the same way.)
2576  * Caller has ensured that vardata1 is the LHS variable.
2577  * Unlike eqjoinsel_inner, we have to cope with operator being InvalidOid.
2578  */
2579 static double
2580 eqjoinsel_semi(Oid operator,
2581 			   VariableStatData *vardata1, VariableStatData *vardata2,
2582 			   RelOptInfo *inner_rel)
2583 {
2584 	double		selec;
2585 	double		nd1;
2586 	double		nd2;
2587 	bool		isdefault1;
2588 	bool		isdefault2;
2589 	Oid			opfuncoid;
2590 	Form_pg_statistic stats1 = NULL;
2591 	bool		have_mcvs1 = false;
2592 	bool		have_mcvs2 = false;
2593 	AttStatsSlot sslot1;
2594 	AttStatsSlot sslot2;
2595 
2596 	nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2597 	nd2 = get_variable_numdistinct(vardata2, &isdefault2);
2598 
2599 	opfuncoid = OidIsValid(operator) ? get_opcode(operator) : InvalidOid;
2600 
2601 	memset(&sslot1, 0, sizeof(sslot1));
2602 	memset(&sslot2, 0, sizeof(sslot2));
2603 
2604 	/*
2605 	 * We clamp nd2 to be not more than what we estimate the inner relation's
2606 	 * size to be.  This is intuitively somewhat reasonable since obviously
2607 	 * there can't be more than that many distinct values coming from the
2608 	 * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
2609 	 * likewise) is that this is the only pathway by which restriction clauses
2610 	 * applied to the inner rel will affect the join result size estimate,
2611 	 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2612 	 * only the outer rel's size.  If we clamped nd1 we'd be double-counting
2613 	 * the selectivity of outer-rel restrictions.
2614 	 *
2615 	 * We can apply this clamping both with respect to the base relation from
2616 	 * which the join variable comes (if there is just one), and to the
2617 	 * immediate inner input relation of the current join.
2618 	 *
2619 	 * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2620 	 * great, maybe, but it didn't come out of nowhere either.  This is most
2621 	 * helpful when the inner relation is empty and consequently has no stats.
2622 	 */
2623 	if (vardata2->rel)
2624 	{
2625 		if (nd2 >= vardata2->rel->rows)
2626 		{
2627 			nd2 = vardata2->rel->rows;
2628 			isdefault2 = false;
2629 		}
2630 	}
2631 	if (nd2 >= inner_rel->rows)
2632 	{
2633 		nd2 = inner_rel->rows;
2634 		isdefault2 = false;
2635 	}
2636 
2637 	if (HeapTupleIsValid(vardata1->statsTuple))
2638 	{
2639 		/* note we allow use of nullfrac regardless of security check */
2640 		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
2641 		if (statistic_proc_security_check(vardata1, opfuncoid))
2642 			have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
2643 										  STATISTIC_KIND_MCV, InvalidOid,
2644 										  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2645 	}
2646 
2647 	if (HeapTupleIsValid(vardata2->statsTuple) &&
2648 		statistic_proc_security_check(vardata2, opfuncoid))
2649 	{
2650 		have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
2651 									  STATISTIC_KIND_MCV, InvalidOid,
2652 									  ATTSTATSSLOT_VALUES);
2653 		/* note: currently don't need stanumbers from RHS */
2654 	}
2655 
2656 	if (have_mcvs1 && have_mcvs2 && OidIsValid(operator))
2657 	{
2658 		/*
2659 		 * We have most-common-value lists for both relations.  Run through
2660 		 * the lists to see which MCVs actually join to each other with the
2661 		 * given operator.  This allows us to determine the exact join
2662 		 * selectivity for the portion of the relations represented by the MCV
2663 		 * lists.  We still have to estimate for the remaining population, but
2664 		 * in a skewed distribution this gives us a big leg up in accuracy.
2665 		 */
2666 		FmgrInfo	eqproc;
2667 		bool	   *hasmatch1;
2668 		bool	   *hasmatch2;
2669 		double		nullfrac1 = stats1->stanullfrac;
2670 		double		matchfreq1,
2671 					uncertainfrac,
2672 					uncertain;
2673 		int			i,
2674 					nmatches,
2675 					clamped_nvalues2;
2676 
2677 		/*
2678 		 * The clamping above could have resulted in nd2 being less than
2679 		 * sslot2.nvalues; in which case, we assume that precisely the nd2
2680 		 * most common values in the relation will appear in the join input,
2681 		 * and so compare to only the first nd2 members of the MCV list.  Of
2682 		 * course this is frequently wrong, but it's the best bet we can make.
2683 		 */
2684 		clamped_nvalues2 = Min(sslot2.nvalues, nd2);
2685 
2686 		fmgr_info(opfuncoid, &eqproc);
2687 		hasmatch1 = (bool *) palloc0(sslot1.nvalues * sizeof(bool));
2688 		hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2689 
2690 		/*
2691 		 * Note we assume that each MCV will match at most one member of the
2692 		 * other MCV list.  If the operator isn't really equality, there could
2693 		 * be multiple matches --- but we don't look for them, both for speed
2694 		 * and because the math wouldn't add up...
2695 		 */
2696 		nmatches = 0;
2697 		for (i = 0; i < sslot1.nvalues; i++)
2698 		{
2699 			int			j;
2700 
2701 			for (j = 0; j < clamped_nvalues2; j++)
2702 			{
2703 				if (hasmatch2[j])
2704 					continue;
2705 				if (DatumGetBool(FunctionCall2Coll(&eqproc,
2706 												   DEFAULT_COLLATION_OID,
2707 												   sslot1.values[i],
2708 												   sslot2.values[j])))
2709 				{
2710 					hasmatch1[i] = hasmatch2[j] = true;
2711 					nmatches++;
2712 					break;
2713 				}
2714 			}
2715 		}
2716 		/* Sum up frequencies of matched MCVs */
2717 		matchfreq1 = 0.0;
2718 		for (i = 0; i < sslot1.nvalues; i++)
2719 		{
2720 			if (hasmatch1[i])
2721 				matchfreq1 += sslot1.numbers[i];
2722 		}
2723 		CLAMP_PROBABILITY(matchfreq1);
2724 		pfree(hasmatch1);
2725 		pfree(hasmatch2);
2726 
2727 		/*
2728 		 * Now we need to estimate the fraction of relation 1 that has at
2729 		 * least one join partner.  We know for certain that the matched MCVs
2730 		 * do, so that gives us a lower bound, but we're really in the dark
2731 		 * about everything else.  Our crude approach is: if nd1 <= nd2 then
2732 		 * assume all non-null rel1 rows have join partners, else assume for
2733 		 * the uncertain rows that a fraction nd2/nd1 have join partners. We
2734 		 * can discount the known-matched MCVs from the distinct-values counts
2735 		 * before doing the division.
2736 		 *
2737 		 * Crude as the above is, it's completely useless if we don't have
2738 		 * reliable ndistinct values for both sides.  Hence, if either nd1 or
2739 		 * nd2 is default, punt and assume half of the uncertain rows have
2740 		 * join partners.
2741 		 */
2742 		if (!isdefault1 && !isdefault2)
2743 		{
2744 			nd1 -= nmatches;
2745 			nd2 -= nmatches;
2746 			if (nd1 <= nd2 || nd2 < 0)
2747 				uncertainfrac = 1.0;
2748 			else
2749 				uncertainfrac = nd2 / nd1;
2750 		}
2751 		else
2752 			uncertainfrac = 0.5;
2753 		uncertain = 1.0 - matchfreq1 - nullfrac1;
2754 		CLAMP_PROBABILITY(uncertain);
2755 		selec = matchfreq1 + uncertainfrac * uncertain;
2756 	}
2757 	else
2758 	{
2759 		/*
2760 		 * Without MCV lists for both sides, we can only use the heuristic
2761 		 * about nd1 vs nd2.
2762 		 */
2763 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2764 
2765 		if (!isdefault1 && !isdefault2)
2766 		{
2767 			if (nd1 <= nd2 || nd2 < 0)
2768 				selec = 1.0 - nullfrac1;
2769 			else
2770 				selec = (nd2 / nd1) * (1.0 - nullfrac1);
2771 		}
2772 		else
2773 			selec = 0.5 * (1.0 - nullfrac1);
2774 	}
2775 
2776 	free_attstatsslot(&sslot1);
2777 	free_attstatsslot(&sslot2);
2778 
2779 	return selec;
2780 }
2781 
2782 /*
2783  *		neqjoinsel		- Join selectivity of "!="
2784  */
2785 Datum
2786 neqjoinsel(PG_FUNCTION_ARGS)
2787 {
2788 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2789 	Oid			operator = PG_GETARG_OID(1);
2790 	List	   *args = (List *) PG_GETARG_POINTER(2);
2791 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
2792 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2793 	float8		result;
2794 
2795 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
2796 	{
2797 		/*
2798 		 * For semi-joins, if there is more than one distinct value in the RHS
2799 		 * relation then every non-null LHS row must find a row to join since
2800 		 * it can only be equal to one of them.  We'll assume that there is
2801 		 * always more than one distinct RHS value for the sake of stability,
2802 		 * though in theory we could have special cases for empty RHS
2803 		 * (selectivity = 0) and single-distinct-value RHS (selectivity =
2804 		 * fraction of LHS that has the same value as the single RHS value).
2805 		 *
2806 		 * For anti-joins, if we use the same assumption that there is more
2807 		 * than one distinct key in the RHS relation, then every non-null LHS
2808 		 * row must be suppressed by the anti-join.
2809 		 *
2810 		 * So either way, the selectivity estimate should be 1 - nullfrac.
2811 		 */
2812 		VariableStatData leftvar;
2813 		VariableStatData rightvar;
2814 		bool		reversed;
2815 		HeapTuple	statsTuple;
2816 		double		nullfrac;
2817 
2818 		get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2819 		statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2820 		if (HeapTupleIsValid(statsTuple))
2821 			nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2822 		else
2823 			nullfrac = 0.0;
2824 		ReleaseVariableStats(leftvar);
2825 		ReleaseVariableStats(rightvar);
2826 
2827 		result = 1.0 - nullfrac;
2828 	}
2829 	else
2830 	{
2831 		/*
2832 		 * We want 1 - eqjoinsel() where the equality operator is the one
2833 		 * associated with this != operator, that is, its negator.
2834 		 */
2835 		Oid			eqop = get_negator(operator);
2836 
2837 		if (eqop)
2838 		{
2839 			result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2840 														PointerGetDatum(root),
2841 														ObjectIdGetDatum(eqop),
2842 														PointerGetDatum(args),
2843 														Int16GetDatum(jointype),
2844 														PointerGetDatum(sjinfo)));
2845 		}
2846 		else
2847 		{
2848 			/* Use default selectivity (should we raise an error instead?) */
2849 			result = DEFAULT_EQ_SEL;
2850 		}
2851 		result = 1.0 - result;
2852 	}
2853 
2854 	PG_RETURN_FLOAT8(result);
2855 }
2856 
2857 /*
2858  *		scalarltjoinsel - Join selectivity of "<" for scalars
2859  */
2860 Datum
2861 scalarltjoinsel(PG_FUNCTION_ARGS)
2862 {
2863 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2864 }
2865 
2866 /*
2867  *		scalarlejoinsel - Join selectivity of "<=" for scalars
2868  */
2869 Datum
2870 scalarlejoinsel(PG_FUNCTION_ARGS)
2871 {
2872 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2873 }
2874 
2875 /*
2876  *		scalargtjoinsel - Join selectivity of ">" for scalars
2877  */
2878 Datum
2879 scalargtjoinsel(PG_FUNCTION_ARGS)
2880 {
2881 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2882 }
2883 
2884 /*
2885  *		scalargejoinsel - Join selectivity of ">=" for scalars
2886  */
2887 Datum
2888 scalargejoinsel(PG_FUNCTION_ARGS)
2889 {
2890 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2891 }
2892 
2893 /*
2894  * patternjoinsel		- Generic code for pattern-match join selectivity.
2895  */
2896 static double
2897 patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
2898 {
2899 	/* For the moment we just punt. */
2900 	return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
2901 }
2902 
2903 /*
2904  *		regexeqjoinsel	- Join selectivity of regular-expression pattern match.
2905  */
2906 Datum
2907 regexeqjoinsel(PG_FUNCTION_ARGS)
2908 {
2909 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
2910 }
2911 
2912 /*
2913  *		icregexeqjoinsel	- Join selectivity of case-insensitive regex match.
2914  */
2915 Datum
2916 icregexeqjoinsel(PG_FUNCTION_ARGS)
2917 {
2918 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
2919 }
2920 
2921 /*
2922  *		likejoinsel			- Join selectivity of LIKE pattern match.
2923  */
2924 Datum
2925 likejoinsel(PG_FUNCTION_ARGS)
2926 {
2927 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
2928 }
2929 
2930 /*
2931  *		prefixjoinsel			- Join selectivity of prefix operator
2932  */
2933 Datum
2934 prefixjoinsel(PG_FUNCTION_ARGS)
2935 {
2936 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
2937 }
2938 
2939 /*
2940  *		iclikejoinsel			- Join selectivity of ILIKE pattern match.
2941  */
2942 Datum
2943 iclikejoinsel(PG_FUNCTION_ARGS)
2944 {
2945 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
2946 }
2947 
2948 /*
2949  *		regexnejoinsel	- Join selectivity of regex non-match.
2950  */
2951 Datum
2952 regexnejoinsel(PG_FUNCTION_ARGS)
2953 {
2954 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
2955 }
2956 
2957 /*
2958  *		icregexnejoinsel	- Join selectivity of case-insensitive regex non-match.
2959  */
2960 Datum
2961 icregexnejoinsel(PG_FUNCTION_ARGS)
2962 {
2963 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
2964 }
2965 
2966 /*
2967  *		nlikejoinsel		- Join selectivity of LIKE pattern non-match.
2968  */
2969 Datum
2970 nlikejoinsel(PG_FUNCTION_ARGS)
2971 {
2972 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
2973 }
2974 
2975 /*
2976  *		icnlikejoinsel		- Join selectivity of ILIKE pattern non-match.
2977  */
2978 Datum
2979 icnlikejoinsel(PG_FUNCTION_ARGS)
2980 {
2981 	PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
2982 }
2983 
2984 /*
2985  * mergejoinscansel			- Scan selectivity of merge join.
2986  *
2987  * A merge join will stop as soon as it exhausts either input stream.
2988  * Therefore, if we can estimate the ranges of both input variables,
2989  * we can estimate how much of the input will actually be read.  This
2990  * can have a considerable impact on the cost when using indexscans.
2991  *
2992  * Also, we can estimate how much of each input has to be read before the
2993  * first join pair is found, which will affect the join's startup time.
2994  *
2995  * clause should be a clause already known to be mergejoinable.  opfamily,
2996  * strategy, and nulls_first specify the sort ordering being used.
2997  *
2998  * The outputs are:
2999  *		*leftstart is set to the fraction of the left-hand variable expected
3000  *		 to be scanned before the first join pair is found (0 to 1).
3001  *		*leftend is set to the fraction of the left-hand variable expected
3002  *		 to be scanned before the join terminates (0 to 1).
3003  *		*rightstart, *rightend similarly for the right-hand variable.
3004  */
3005 void
3006 mergejoinscansel(PlannerInfo *root, Node *clause,
3007 				 Oid opfamily, int strategy, bool nulls_first,
3008 				 Selectivity *leftstart, Selectivity *leftend,
3009 				 Selectivity *rightstart, Selectivity *rightend)
3010 {
3011 	Node	   *left,
3012 			   *right;
3013 	VariableStatData leftvar,
3014 				rightvar;
3015 	int			op_strategy;
3016 	Oid			op_lefttype;
3017 	Oid			op_righttype;
3018 	Oid			opno,
3019 				lsortop,
3020 				rsortop,
3021 				lstatop,
3022 				rstatop,
3023 				ltop,
3024 				leop,
3025 				revltop,
3026 				revleop;
3027 	bool		isgt;
3028 	Datum		leftmin,
3029 				leftmax,
3030 				rightmin,
3031 				rightmax;
3032 	double		selec;
3033 
3034 	/* Set default results if we can't figure anything out. */
3035 	/* XXX should default "start" fraction be a bit more than 0? */
3036 	*leftstart = *rightstart = 0.0;
3037 	*leftend = *rightend = 1.0;
3038 
3039 	/* Deconstruct the merge clause */
3040 	if (!is_opclause(clause))
3041 		return;					/* shouldn't happen */
3042 	opno = ((OpExpr *) clause)->opno;
3043 	left = get_leftop((Expr *) clause);
3044 	right = get_rightop((Expr *) clause);
3045 	if (!right)
3046 		return;					/* shouldn't happen */
3047 
3048 	/* Look for stats for the inputs */
3049 	examine_variable(root, left, 0, &leftvar);
3050 	examine_variable(root, right, 0, &rightvar);
3051 
3052 	/* Extract the operator's declared left/right datatypes */
3053 	get_op_opfamily_properties(opno, opfamily, false,
3054 							   &op_strategy,
3055 							   &op_lefttype,
3056 							   &op_righttype);
3057 	Assert(op_strategy == BTEqualStrategyNumber);
3058 
3059 	/*
3060 	 * Look up the various operators we need.  If we don't find them all, it
3061 	 * probably means the opfamily is broken, but we just fail silently.
3062 	 *
3063 	 * Note: we expect that pg_statistic histograms will be sorted by the '<'
3064 	 * operator, regardless of which sort direction we are considering.
3065 	 */
3066 	switch (strategy)
3067 	{
3068 		case BTLessStrategyNumber:
3069 			isgt = false;
3070 			if (op_lefttype == op_righttype)
3071 			{
3072 				/* easy case */
3073 				ltop = get_opfamily_member(opfamily,
3074 										   op_lefttype, op_righttype,
3075 										   BTLessStrategyNumber);
3076 				leop = get_opfamily_member(opfamily,
3077 										   op_lefttype, op_righttype,
3078 										   BTLessEqualStrategyNumber);
3079 				lsortop = ltop;
3080 				rsortop = ltop;
3081 				lstatop = lsortop;
3082 				rstatop = rsortop;
3083 				revltop = ltop;
3084 				revleop = leop;
3085 			}
3086 			else
3087 			{
3088 				ltop = get_opfamily_member(opfamily,
3089 										   op_lefttype, op_righttype,
3090 										   BTLessStrategyNumber);
3091 				leop = get_opfamily_member(opfamily,
3092 										   op_lefttype, op_righttype,
3093 										   BTLessEqualStrategyNumber);
3094 				lsortop = get_opfamily_member(opfamily,
3095 											  op_lefttype, op_lefttype,
3096 											  BTLessStrategyNumber);
3097 				rsortop = get_opfamily_member(opfamily,
3098 											  op_righttype, op_righttype,
3099 											  BTLessStrategyNumber);
3100 				lstatop = lsortop;
3101 				rstatop = rsortop;
3102 				revltop = get_opfamily_member(opfamily,
3103 											  op_righttype, op_lefttype,
3104 											  BTLessStrategyNumber);
3105 				revleop = get_opfamily_member(opfamily,
3106 											  op_righttype, op_lefttype,
3107 											  BTLessEqualStrategyNumber);
3108 			}
3109 			break;
3110 		case BTGreaterStrategyNumber:
3111 			/* descending-order case */
3112 			isgt = true;
3113 			if (op_lefttype == op_righttype)
3114 			{
3115 				/* easy case */
3116 				ltop = get_opfamily_member(opfamily,
3117 										   op_lefttype, op_righttype,
3118 										   BTGreaterStrategyNumber);
3119 				leop = get_opfamily_member(opfamily,
3120 										   op_lefttype, op_righttype,
3121 										   BTGreaterEqualStrategyNumber);
3122 				lsortop = ltop;
3123 				rsortop = ltop;
3124 				lstatop = get_opfamily_member(opfamily,
3125 											  op_lefttype, op_lefttype,
3126 											  BTLessStrategyNumber);
3127 				rstatop = lstatop;
3128 				revltop = ltop;
3129 				revleop = leop;
3130 			}
3131 			else
3132 			{
3133 				ltop = get_opfamily_member(opfamily,
3134 										   op_lefttype, op_righttype,
3135 										   BTGreaterStrategyNumber);
3136 				leop = get_opfamily_member(opfamily,
3137 										   op_lefttype, op_righttype,
3138 										   BTGreaterEqualStrategyNumber);
3139 				lsortop = get_opfamily_member(opfamily,
3140 											  op_lefttype, op_lefttype,
3141 											  BTGreaterStrategyNumber);
3142 				rsortop = get_opfamily_member(opfamily,
3143 											  op_righttype, op_righttype,
3144 											  BTGreaterStrategyNumber);
3145 				lstatop = get_opfamily_member(opfamily,
3146 											  op_lefttype, op_lefttype,
3147 											  BTLessStrategyNumber);
3148 				rstatop = get_opfamily_member(opfamily,
3149 											  op_righttype, op_righttype,
3150 											  BTLessStrategyNumber);
3151 				revltop = get_opfamily_member(opfamily,
3152 											  op_righttype, op_lefttype,
3153 											  BTGreaterStrategyNumber);
3154 				revleop = get_opfamily_member(opfamily,
3155 											  op_righttype, op_lefttype,
3156 											  BTGreaterEqualStrategyNumber);
3157 			}
3158 			break;
3159 		default:
3160 			goto fail;			/* shouldn't get here */
3161 	}
3162 
3163 	if (!OidIsValid(lsortop) ||
3164 		!OidIsValid(rsortop) ||
3165 		!OidIsValid(lstatop) ||
3166 		!OidIsValid(rstatop) ||
3167 		!OidIsValid(ltop) ||
3168 		!OidIsValid(leop) ||
3169 		!OidIsValid(revltop) ||
3170 		!OidIsValid(revleop))
3171 		goto fail;				/* insufficient info in catalogs */
3172 
3173 	/* Try to get ranges of both inputs */
3174 	if (!isgt)
3175 	{
3176 		if (!get_variable_range(root, &leftvar, lstatop,
3177 								&leftmin, &leftmax))
3178 			goto fail;			/* no range available from stats */
3179 		if (!get_variable_range(root, &rightvar, rstatop,
3180 								&rightmin, &rightmax))
3181 			goto fail;			/* no range available from stats */
3182 	}
3183 	else
3184 	{
3185 		/* need to swap the max and min */
3186 		if (!get_variable_range(root, &leftvar, lstatop,
3187 								&leftmax, &leftmin))
3188 			goto fail;			/* no range available from stats */
3189 		if (!get_variable_range(root, &rightvar, rstatop,
3190 								&rightmax, &rightmin))
3191 			goto fail;			/* no range available from stats */
3192 	}
3193 
3194 	/*
3195 	 * Now, the fraction of the left variable that will be scanned is the
3196 	 * fraction that's <= the right-side maximum value.  But only believe
3197 	 * non-default estimates, else stick with our 1.0.
3198 	 */
3199 	selec = scalarineqsel(root, leop, isgt, true, &leftvar,
3200 						  rightmax, op_righttype);
3201 	if (selec != DEFAULT_INEQ_SEL)
3202 		*leftend = selec;
3203 
3204 	/* And similarly for the right variable. */
3205 	selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
3206 						  leftmax, op_lefttype);
3207 	if (selec != DEFAULT_INEQ_SEL)
3208 		*rightend = selec;
3209 
3210 	/*
3211 	 * Only one of the two "end" fractions can really be less than 1.0;
3212 	 * believe the smaller estimate and reset the other one to exactly 1.0. If
3213 	 * we get exactly equal estimates (as can easily happen with self-joins),
3214 	 * believe neither.
3215 	 */
3216 	if (*leftend > *rightend)
3217 		*leftend = 1.0;
3218 	else if (*leftend < *rightend)
3219 		*rightend = 1.0;
3220 	else
3221 		*leftend = *rightend = 1.0;
3222 
3223 	/*
3224 	 * Also, the fraction of the left variable that will be scanned before the
3225 	 * first join pair is found is the fraction that's < the right-side
3226 	 * minimum value.  But only believe non-default estimates, else stick with
3227 	 * our own default.
3228 	 */
3229 	selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
3230 						  rightmin, op_righttype);
3231 	if (selec != DEFAULT_INEQ_SEL)
3232 		*leftstart = selec;
3233 
3234 	/* And similarly for the right variable. */
3235 	selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
3236 						  leftmin, op_lefttype);
3237 	if (selec != DEFAULT_INEQ_SEL)
3238 		*rightstart = selec;
3239 
3240 	/*
3241 	 * Only one of the two "start" fractions can really be more than zero;
3242 	 * believe the larger estimate and reset the other one to exactly 0.0. If
3243 	 * we get exactly equal estimates (as can easily happen with self-joins),
3244 	 * believe neither.
3245 	 */
3246 	if (*leftstart < *rightstart)
3247 		*leftstart = 0.0;
3248 	else if (*leftstart > *rightstart)
3249 		*rightstart = 0.0;
3250 	else
3251 		*leftstart = *rightstart = 0.0;
3252 
3253 	/*
3254 	 * If the sort order is nulls-first, we're going to have to skip over any
3255 	 * nulls too.  These would not have been counted by scalarineqsel, and we
3256 	 * can safely add in this fraction regardless of whether we believe
3257 	 * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
3258 	 */
3259 	if (nulls_first)
3260 	{
3261 		Form_pg_statistic stats;
3262 
3263 		if (HeapTupleIsValid(leftvar.statsTuple))
3264 		{
3265 			stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3266 			*leftstart += stats->stanullfrac;
3267 			CLAMP_PROBABILITY(*leftstart);
3268 			*leftend += stats->stanullfrac;
3269 			CLAMP_PROBABILITY(*leftend);
3270 		}
3271 		if (HeapTupleIsValid(rightvar.statsTuple))
3272 		{
3273 			stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3274 			*rightstart += stats->stanullfrac;
3275 			CLAMP_PROBABILITY(*rightstart);
3276 			*rightend += stats->stanullfrac;
3277 			CLAMP_PROBABILITY(*rightend);
3278 		}
3279 	}
3280 
3281 	/* Disbelieve start >= end, just in case that can happen */
3282 	if (*leftstart >= *leftend)
3283 	{
3284 		*leftstart = 0.0;
3285 		*leftend = 1.0;
3286 	}
3287 	if (*rightstart >= *rightend)
3288 	{
3289 		*rightstart = 0.0;
3290 		*rightend = 1.0;
3291 	}
3292 
3293 fail:
3294 	ReleaseVariableStats(leftvar);
3295 	ReleaseVariableStats(rightvar);
3296 }
3297 
3298 
3299 /*
3300  * Helper routine for estimate_num_groups: add an item to a list of
3301  * GroupVarInfos, but only if it's not known equal to any of the existing
3302  * entries.
3303  */
3304 typedef struct
3305 {
3306 	Node	   *var;			/* might be an expression, not just a Var */
3307 	RelOptInfo *rel;			/* relation it belongs to */
3308 	double		ndistinct;		/* # distinct values */
3309 } GroupVarInfo;
3310 
3311 static List *
3312 add_unique_group_var(PlannerInfo *root, List *varinfos,
3313 					 Node *var, VariableStatData *vardata)
3314 {
3315 	GroupVarInfo *varinfo;
3316 	double		ndistinct;
3317 	bool		isdefault;
3318 	ListCell   *lc;
3319 
3320 	ndistinct = get_variable_numdistinct(vardata, &isdefault);
3321 
3322 	/* cannot use foreach here because of possible list_delete */
3323 	lc = list_head(varinfos);
3324 	while (lc)
3325 	{
3326 		varinfo = (GroupVarInfo *) lfirst(lc);
3327 
3328 		/* must advance lc before list_delete possibly pfree's it */
3329 		lc = lnext(lc);
3330 
3331 		/* Drop exact duplicates */
3332 		if (equal(var, varinfo->var))
3333 			return varinfos;
3334 
3335 		/*
3336 		 * Drop known-equal vars, but only if they belong to different
3337 		 * relations (see comments for estimate_num_groups)
3338 		 */
3339 		if (vardata->rel != varinfo->rel &&
3340 			exprs_known_equal(root, var, varinfo->var))
3341 		{
3342 			if (varinfo->ndistinct <= ndistinct)
3343 			{
3344 				/* Keep older item, forget new one */
3345 				return varinfos;
3346 			}
3347 			else
3348 			{
3349 				/* Delete the older item */
3350 				varinfos = list_delete_ptr(varinfos, varinfo);
3351 			}
3352 		}
3353 	}
3354 
3355 	varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3356 
3357 	varinfo->var = var;
3358 	varinfo->rel = vardata->rel;
3359 	varinfo->ndistinct = ndistinct;
3360 	varinfos = lappend(varinfos, varinfo);
3361 	return varinfos;
3362 }
3363 
3364 /*
3365  * estimate_num_groups		- Estimate number of groups in a grouped query
3366  *
3367  * Given a query having a GROUP BY clause, estimate how many groups there
3368  * will be --- ie, the number of distinct combinations of the GROUP BY
3369  * expressions.
3370  *
3371  * This routine is also used to estimate the number of rows emitted by
3372  * a DISTINCT filtering step; that is an isomorphic problem.  (Note:
3373  * actually, we only use it for DISTINCT when there's no grouping or
3374  * aggregation ahead of the DISTINCT.)
3375  *
3376  * Inputs:
3377  *	root - the query
3378  *	groupExprs - list of expressions being grouped by
3379  *	input_rows - number of rows estimated to arrive at the group/unique
3380  *		filter step
3381  *	pgset - NULL, or a List** pointing to a grouping set to filter the
3382  *		groupExprs against
3383  *
3384  * Given the lack of any cross-correlation statistics in the system, it's
3385  * impossible to do anything really trustworthy with GROUP BY conditions
3386  * involving multiple Vars.  We should however avoid assuming the worst
3387  * case (all possible cross-product terms actually appear as groups) since
3388  * very often the grouped-by Vars are highly correlated.  Our current approach
3389  * is as follows:
3390  *	1.  Expressions yielding boolean are assumed to contribute two groups,
3391  *		independently of their content, and are ignored in the subsequent
3392  *		steps.  This is mainly because tests like "col IS NULL" break the
3393  *		heuristic used in step 2 especially badly.
3394  *	2.  Reduce the given expressions to a list of unique Vars used.  For
3395  *		example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3396  *		It is clearly correct not to count the same Var more than once.
3397  *		It is also reasonable to treat f(x) the same as x: f() cannot
3398  *		increase the number of distinct values (unless it is volatile,
3399  *		which we consider unlikely for grouping), but it probably won't
3400  *		reduce the number of distinct values much either.
3401  *		As a special case, if a GROUP BY expression can be matched to an
3402  *		expressional index for which we have statistics, then we treat the
3403  *		whole expression as though it were just a Var.
3404  *	3.  If the list contains Vars of different relations that are known equal
3405  *		due to equivalence classes, then drop all but one of the Vars from each
3406  *		known-equal set, keeping the one with smallest estimated # of values
3407  *		(since the extra values of the others can't appear in joined rows).
3408  *		Note the reason we only consider Vars of different relations is that
3409  *		if we considered ones of the same rel, we'd be double-counting the
3410  *		restriction selectivity of the equality in the next step.
3411  *	4.  For Vars within a single source rel, we multiply together the numbers
3412  *		of values, clamp to the number of rows in the rel (divided by 10 if
3413  *		more than one Var), and then multiply by a factor based on the
3414  *		selectivity of the restriction clauses for that rel.  When there's
3415  *		more than one Var, the initial product is probably too high (it's the
3416  *		worst case) but clamping to a fraction of the rel's rows seems to be a
3417  *		helpful heuristic for not letting the estimate get out of hand.  (The
3418  *		factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
3419  *		we multiply by to adjust for the restriction selectivity assumes that
3420  *		the restriction clauses are independent of the grouping, which may not
3421  *		be a valid assumption, but it's hard to do better.
3422  *	5.  If there are Vars from multiple rels, we repeat step 4 for each such
3423  *		rel, and multiply the results together.
3424  * Note that rels not containing grouped Vars are ignored completely, as are
3425  * join clauses.  Such rels cannot increase the number of groups, and we
3426  * assume such clauses do not reduce the number either (somewhat bogus,
3427  * but we don't have the info to do better).
3428  */
3429 double
3430 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3431 					List **pgset)
3432 {
3433 	List	   *varinfos = NIL;
3434 	double		srf_multiplier = 1.0;
3435 	double		numdistinct;
3436 	ListCell   *l;
3437 	int			i;
3438 
3439 	/*
3440 	 * We don't ever want to return an estimate of zero groups, as that tends
3441 	 * to lead to division-by-zero and other unpleasantness.  The input_rows
3442 	 * estimate is usually already at least 1, but clamp it just in case it
3443 	 * isn't.
3444 	 */
3445 	input_rows = clamp_row_est(input_rows);
3446 
3447 	/*
3448 	 * If no grouping columns, there's exactly one group.  (This can't happen
3449 	 * for normal cases with GROUP BY or DISTINCT, but it is possible for
3450 	 * corner cases with set operations.)
3451 	 */
3452 	if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3453 		return 1.0;
3454 
3455 	/*
3456 	 * Count groups derived from boolean grouping expressions.  For other
3457 	 * expressions, find the unique Vars used, treating an expression as a Var
3458 	 * if we can find stats for it.  For each one, record the statistical
3459 	 * estimate of number of distinct values (total in its table, without
3460 	 * regard for filtering).
3461 	 */
3462 	numdistinct = 1.0;
3463 
3464 	i = 0;
3465 	foreach(l, groupExprs)
3466 	{
3467 		Node	   *groupexpr = (Node *) lfirst(l);
3468 		double		this_srf_multiplier;
3469 		VariableStatData vardata;
3470 		List	   *varshere;
3471 		ListCell   *l2;
3472 
3473 		/* is expression in this grouping set? */
3474 		if (pgset && !list_member_int(*pgset, i++))
3475 			continue;
3476 
3477 		/*
3478 		 * Set-returning functions in grouping columns are a bit problematic.
3479 		 * The code below will effectively ignore their SRF nature and come up
3480 		 * with a numdistinct estimate as though they were scalar functions.
3481 		 * We compensate by scaling up the end result by the largest SRF
3482 		 * rowcount estimate.  (This will be an overestimate if the SRF
3483 		 * produces multiple copies of any output value, but it seems best to
3484 		 * assume the SRF's outputs are distinct.  In any case, it's probably
3485 		 * pointless to worry too much about this without much better
3486 		 * estimates for SRF output rowcounts than we have today.)
3487 		 */
3488 		this_srf_multiplier = expression_returns_set_rows(groupexpr);
3489 		if (srf_multiplier < this_srf_multiplier)
3490 			srf_multiplier = this_srf_multiplier;
3491 
3492 		/* Short-circuit for expressions returning boolean */
3493 		if (exprType(groupexpr) == BOOLOID)
3494 		{
3495 			numdistinct *= 2.0;
3496 			continue;
3497 		}
3498 
3499 		/*
3500 		 * If examine_variable is able to deduce anything about the GROUP BY
3501 		 * expression, treat it as a single variable even if it's really more
3502 		 * complicated.
3503 		 */
3504 		examine_variable(root, groupexpr, 0, &vardata);
3505 		if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3506 		{
3507 			varinfos = add_unique_group_var(root, varinfos,
3508 											groupexpr, &vardata);
3509 			ReleaseVariableStats(vardata);
3510 			continue;
3511 		}
3512 		ReleaseVariableStats(vardata);
3513 
3514 		/*
3515 		 * Else pull out the component Vars.  Handle PlaceHolderVars by
3516 		 * recursing into their arguments (effectively assuming that the
3517 		 * PlaceHolderVar doesn't change the number of groups, which boils
3518 		 * down to ignoring the possible addition of nulls to the result set).
3519 		 */
3520 		varshere = pull_var_clause(groupexpr,
3521 								   PVC_RECURSE_AGGREGATES |
3522 								   PVC_RECURSE_WINDOWFUNCS |
3523 								   PVC_RECURSE_PLACEHOLDERS);
3524 
3525 		/*
3526 		 * If we find any variable-free GROUP BY item, then either it is a
3527 		 * constant (and we can ignore it) or it contains a volatile function;
3528 		 * in the latter case we punt and assume that each input row will
3529 		 * yield a distinct group.
3530 		 */
3531 		if (varshere == NIL)
3532 		{
3533 			if (contain_volatile_functions(groupexpr))
3534 				return input_rows;
3535 			continue;
3536 		}
3537 
3538 		/*
3539 		 * Else add variables to varinfos list
3540 		 */
3541 		foreach(l2, varshere)
3542 		{
3543 			Node	   *var = (Node *) lfirst(l2);
3544 
3545 			examine_variable(root, var, 0, &vardata);
3546 			varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3547 			ReleaseVariableStats(vardata);
3548 		}
3549 	}
3550 
3551 	/*
3552 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3553 	 * list.
3554 	 */
3555 	if (varinfos == NIL)
3556 	{
3557 		/* Apply SRF multiplier as we would do in the long path */
3558 		numdistinct *= srf_multiplier;
3559 		/* Round off */
3560 		numdistinct = ceil(numdistinct);
3561 		/* Guard against out-of-range answers */
3562 		if (numdistinct > input_rows)
3563 			numdistinct = input_rows;
3564 		if (numdistinct < 1.0)
3565 			numdistinct = 1.0;
3566 		return numdistinct;
3567 	}
3568 
3569 	/*
3570 	 * Group Vars by relation and estimate total numdistinct.
3571 	 *
3572 	 * For each iteration of the outer loop, we process the frontmost Var in
3573 	 * varinfos, plus all other Vars in the same relation.  We remove these
3574 	 * Vars from the newvarinfos list for the next iteration. This is the
3575 	 * easiest way to group Vars of same rel together.
3576 	 */
3577 	do
3578 	{
3579 		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3580 		RelOptInfo *rel = varinfo1->rel;
3581 		double		reldistinct = 1;
3582 		double		relmaxndistinct = reldistinct;
3583 		int			relvarcount = 0;
3584 		List	   *newvarinfos = NIL;
3585 		List	   *relvarinfos = NIL;
3586 
3587 		/*
3588 		 * Split the list of varinfos in two - one for the current rel, one
3589 		 * for remaining Vars on other rels.
3590 		 */
3591 		relvarinfos = lcons(varinfo1, relvarinfos);
3592 		for_each_cell(l, lnext(list_head(varinfos)))
3593 		{
3594 			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3595 
3596 			if (varinfo2->rel == varinfo1->rel)
3597 			{
3598 				/* varinfos on current rel */
3599 				relvarinfos = lcons(varinfo2, relvarinfos);
3600 			}
3601 			else
3602 			{
3603 				/* not time to process varinfo2 yet */
3604 				newvarinfos = lcons(varinfo2, newvarinfos);
3605 			}
3606 		}
3607 
3608 		/*
3609 		 * Get the numdistinct estimate for the Vars of this rel.  We
3610 		 * iteratively search for multivariate n-distinct with maximum number
3611 		 * of vars; assuming that each var group is independent of the others,
3612 		 * we multiply them together.  Any remaining relvarinfos after no more
3613 		 * multivariate matches are found are assumed independent too, so
3614 		 * their individual ndistinct estimates are multiplied also.
3615 		 *
3616 		 * While iterating, count how many separate numdistinct values we
3617 		 * apply.  We apply a fudge factor below, but only if we multiplied
3618 		 * more than one such values.
3619 		 */
3620 		while (relvarinfos)
3621 		{
3622 			double		mvndistinct;
3623 
3624 			if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3625 												&mvndistinct))
3626 			{
3627 				reldistinct *= mvndistinct;
3628 				if (relmaxndistinct < mvndistinct)
3629 					relmaxndistinct = mvndistinct;
3630 				relvarcount++;
3631 			}
3632 			else
3633 			{
3634 				foreach(l, relvarinfos)
3635 				{
3636 					GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3637 
3638 					reldistinct *= varinfo2->ndistinct;
3639 					if (relmaxndistinct < varinfo2->ndistinct)
3640 						relmaxndistinct = varinfo2->ndistinct;
3641 					relvarcount++;
3642 				}
3643 
3644 				/* we're done with this relation */
3645 				relvarinfos = NIL;
3646 			}
3647 		}
3648 
3649 		/*
3650 		 * Sanity check --- don't divide by zero if empty relation.
3651 		 */
3652 		Assert(IS_SIMPLE_REL(rel));
3653 		if (rel->tuples > 0)
3654 		{
3655 			/*
3656 			 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3657 			 * fudge factor is because the Vars are probably correlated but we
3658 			 * don't know by how much.  We should never clamp to less than the
3659 			 * largest ndistinct value for any of the Vars, though, since
3660 			 * there will surely be at least that many groups.
3661 			 */
3662 			double		clamp = rel->tuples;
3663 
3664 			if (relvarcount > 1)
3665 			{
3666 				clamp *= 0.1;
3667 				if (clamp < relmaxndistinct)
3668 				{
3669 					clamp = relmaxndistinct;
3670 					/* for sanity in case some ndistinct is too large: */
3671 					if (clamp > rel->tuples)
3672 						clamp = rel->tuples;
3673 				}
3674 			}
3675 			if (reldistinct > clamp)
3676 				reldistinct = clamp;
3677 
3678 			/*
3679 			 * Update the estimate based on the restriction selectivity,
3680 			 * guarding against division by zero when reldistinct is zero.
3681 			 * Also skip this if we know that we are returning all rows.
3682 			 */
3683 			if (reldistinct > 0 && rel->rows < rel->tuples)
3684 			{
3685 				/*
3686 				 * Given a table containing N rows with n distinct values in a
3687 				 * uniform distribution, if we select p rows at random then
3688 				 * the expected number of distinct values selected is
3689 				 *
3690 				 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3691 				 *
3692 				 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3693 				 *
3694 				 * See "Approximating block accesses in database
3695 				 * organizations", S. B. Yao, Communications of the ACM,
3696 				 * Volume 20 Issue 4, April 1977 Pages 260-261.
3697 				 *
3698 				 * Alternatively, re-arranging the terms from the factorials,
3699 				 * this may be written as
3700 				 *
3701 				 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3702 				 *
3703 				 * This form of the formula is more efficient to compute in
3704 				 * the common case where p is larger than N/n.  Additionally,
3705 				 * as pointed out by Dell'Era, if i << N for all terms in the
3706 				 * product, it can be approximated by
3707 				 *
3708 				 * n * (1 - ((N-p)/N)^(N/n))
3709 				 *
3710 				 * See "Expected distinct values when selecting from a bag
3711 				 * without replacement", Alberto Dell'Era,
3712 				 * http://www.adellera.it/investigations/distinct_balls/.
3713 				 *
3714 				 * The condition i << N is equivalent to n >> 1, so this is a
3715 				 * good approximation when the number of distinct values in
3716 				 * the table is large.  It turns out that this formula also
3717 				 * works well even when n is small.
3718 				 */
3719 				reldistinct *=
3720 					(1 - pow((rel->tuples - rel->rows) / rel->tuples,
3721 							 rel->tuples / reldistinct));
3722 			}
3723 			reldistinct = clamp_row_est(reldistinct);
3724 
3725 			/*
3726 			 * Update estimate of total distinct groups.
3727 			 */
3728 			numdistinct *= reldistinct;
3729 		}
3730 
3731 		varinfos = newvarinfos;
3732 	} while (varinfos != NIL);
3733 
3734 	/* Now we can account for the effects of any SRFs */
3735 	numdistinct *= srf_multiplier;
3736 
3737 	/* Round off */
3738 	numdistinct = ceil(numdistinct);
3739 
3740 	/* Guard against out-of-range answers */
3741 	if (numdistinct > input_rows)
3742 		numdistinct = input_rows;
3743 	if (numdistinct < 1.0)
3744 		numdistinct = 1.0;
3745 
3746 	return numdistinct;
3747 }
3748 
3749 /*
3750  * Estimate hash bucket statistics when the specified expression is used
3751  * as a hash key for the given number of buckets.
3752  *
3753  * This attempts to determine two values:
3754  *
3755  * 1. The frequency of the most common value of the expression (returns
3756  * zero into *mcv_freq if we can't get that).
3757  *
3758  * 2. The "bucketsize fraction", ie, average number of entries in a bucket
3759  * divided by total tuples in relation.
3760  *
3761  * XXX This is really pretty bogus since we're effectively assuming that the
3762  * distribution of hash keys will be the same after applying restriction
3763  * clauses as it was in the underlying relation.  However, we are not nearly
3764  * smart enough to figure out how the restrict clauses might change the
3765  * distribution, so this will have to do for now.
3766  *
3767  * We are passed the number of buckets the executor will use for the given
3768  * input relation.  If the data were perfectly distributed, with the same
3769  * number of tuples going into each available bucket, then the bucketsize
3770  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
3771  * only if (a) there are at least nbuckets distinct data values, and (b)
3772  * we have a not-too-skewed data distribution.  Otherwise the buckets will
3773  * be nonuniformly occupied.  If the other relation in the join has a key
3774  * distribution similar to this one's, then the most-loaded buckets are
3775  * exactly those that will be probed most often.  Therefore, the "average"
3776  * bucket size for costing purposes should really be taken as something close
3777  * to the "worst case" bucket size.  We try to estimate this by adjusting the
3778  * fraction if there are too few distinct data values, and then scaling up
3779  * by the ratio of the most common value's frequency to the average frequency.
3780  *
3781  * If no statistics are available, use a default estimate of 0.1.  This will
3782  * discourage use of a hash rather strongly if the inner relation is large,
3783  * which is what we want.  We do not want to hash unless we know that the
3784  * inner rel is well-dispersed (or the alternatives seem much worse).
3785  *
3786  * The caller should also check that the mcv_freq is not so large that the
3787  * most common value would by itself require an impractically large bucket.
3788  * In a hash join, the executor can split buckets if they get too big, but
3789  * obviously that doesn't help for a bucket that contains many duplicates of
3790  * the same value.
3791  */
3792 void
3793 estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
3794 						   Selectivity *mcv_freq,
3795 						   Selectivity *bucketsize_frac)
3796 {
3797 	VariableStatData vardata;
3798 	double		estfract,
3799 				ndistinct,
3800 				stanullfrac,
3801 				avgfreq;
3802 	bool		isdefault;
3803 	AttStatsSlot sslot;
3804 
3805 	examine_variable(root, hashkey, 0, &vardata);
3806 
3807 	/* Look up the frequency of the most common value, if available */
3808 	*mcv_freq = 0.0;
3809 
3810 	if (HeapTupleIsValid(vardata.statsTuple))
3811 	{
3812 		if (get_attstatsslot(&sslot, vardata.statsTuple,
3813 							 STATISTIC_KIND_MCV, InvalidOid,
3814 							 ATTSTATSSLOT_NUMBERS))
3815 		{
3816 			/*
3817 			 * The first MCV stat is for the most common value.
3818 			 */
3819 			if (sslot.nnumbers > 0)
3820 				*mcv_freq = sslot.numbers[0];
3821 			free_attstatsslot(&sslot);
3822 		}
3823 	}
3824 
3825 	/* Get number of distinct values */
3826 	ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3827 
3828 	/*
3829 	 * If ndistinct isn't real, punt.  We normally return 0.1, but if the
3830 	 * mcv_freq is known to be even higher than that, use it instead.
3831 	 */
3832 	if (isdefault)
3833 	{
3834 		*bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3835 		ReleaseVariableStats(vardata);
3836 		return;
3837 	}
3838 
3839 	/* Get fraction that are null */
3840 	if (HeapTupleIsValid(vardata.statsTuple))
3841 	{
3842 		Form_pg_statistic stats;
3843 
3844 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3845 		stanullfrac = stats->stanullfrac;
3846 	}
3847 	else
3848 		stanullfrac = 0.0;
3849 
3850 	/* Compute avg freq of all distinct data values in raw relation */
3851 	avgfreq = (1.0 - stanullfrac) / ndistinct;
3852 
3853 	/*
3854 	 * Adjust ndistinct to account for restriction clauses.  Observe we are
3855 	 * assuming that the data distribution is affected uniformly by the
3856 	 * restriction clauses!
3857 	 *
3858 	 * XXX Possibly better way, but much more expensive: multiply by
3859 	 * selectivity of rel's restriction clauses that mention the target Var.
3860 	 */
3861 	if (vardata.rel && vardata.rel->tuples > 0)
3862 	{
3863 		ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3864 		ndistinct = clamp_row_est(ndistinct);
3865 	}
3866 
3867 	/*
3868 	 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3869 	 * number of buckets is less than the expected number of distinct values;
3870 	 * otherwise it is 1/ndistinct.
3871 	 */
3872 	if (ndistinct > nbuckets)
3873 		estfract = 1.0 / nbuckets;
3874 	else
3875 		estfract = 1.0 / ndistinct;
3876 
3877 	/*
3878 	 * Adjust estimated bucketsize upward to account for skewed distribution.
3879 	 */
3880 	if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3881 		estfract *= *mcv_freq / avgfreq;
3882 
3883 	/*
3884 	 * Clamp bucketsize to sane range (the above adjustment could easily
3885 	 * produce an out-of-range result).  We set the lower bound a little above
3886 	 * zero, since zero isn't a very sane result.
3887 	 */
3888 	if (estfract < 1.0e-6)
3889 		estfract = 1.0e-6;
3890 	else if (estfract > 1.0)
3891 		estfract = 1.0;
3892 
3893 	*bucketsize_frac = (Selectivity) estfract;
3894 
3895 	ReleaseVariableStats(vardata);
3896 }
3897 
3898 
3899 /*-------------------------------------------------------------------------
3900  *
3901  * Support routines
3902  *
3903  *-------------------------------------------------------------------------
3904  */
3905 
3906 /*
3907  * Find applicable ndistinct statistics for the given list of VarInfos (which
3908  * must all belong to the given rel), and update *ndistinct to the estimate of
3909  * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
3910  * updated to remove the list of matched varinfos.
3911  *
3912  * Varinfos that aren't for simple Vars are ignored.
3913  *
3914  * Return true if we're able to find a match, false otherwise.
3915  */
3916 static bool
3917 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
3918 								List **varinfos, double *ndistinct)
3919 {
3920 	ListCell   *lc;
3921 	Bitmapset  *attnums = NULL;
3922 	int			nmatches;
3923 	Oid			statOid = InvalidOid;
3924 	MVNDistinct *stats;
3925 	Bitmapset  *matched = NULL;
3926 
3927 	/* bail out immediately if the table has no extended statistics */
3928 	if (!rel->statlist)
3929 		return false;
3930 
3931 	/* Determine the attnums we're looking for */
3932 	foreach(lc, *varinfos)
3933 	{
3934 		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3935 		AttrNumber	attnum;
3936 
3937 		Assert(varinfo->rel == rel);
3938 
3939 		if (!IsA(varinfo->var, Var))
3940 			continue;
3941 
3942 		attnum = ((Var *) varinfo->var)->varattno;
3943 
3944 		if (!AttrNumberIsForUserDefinedAttr(attnum))
3945 			continue;
3946 
3947 		attnums = bms_add_member(attnums, attnum);
3948 	}
3949 
3950 	/* look for the ndistinct statistics matching the most vars */
3951 	nmatches = 1;				/* we require at least two matches */
3952 	foreach(lc, rel->statlist)
3953 	{
3954 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3955 		Bitmapset  *shared;
3956 		int			nshared;
3957 
3958 		/* skip statistics of other kinds */
3959 		if (info->kind != STATS_EXT_NDISTINCT)
3960 			continue;
3961 
3962 		/* compute attnums shared by the vars and the statistics object */
3963 		shared = bms_intersect(info->keys, attnums);
3964 		nshared = bms_num_members(shared);
3965 
3966 		/*
3967 		 * Does this statistics object match more columns than the currently
3968 		 * best object?  If so, use this one instead.
3969 		 *
3970 		 * XXX This should break ties using name of the object, or something
3971 		 * like that, to make the outcome stable.
3972 		 */
3973 		if (nshared > nmatches)
3974 		{
3975 			statOid = info->statOid;
3976 			nmatches = nshared;
3977 			matched = shared;
3978 		}
3979 	}
3980 
3981 	/* No match? */
3982 	if (statOid == InvalidOid)
3983 		return false;
3984 	Assert(nmatches > 1 && matched != NULL);
3985 
3986 	stats = statext_ndistinct_load(statOid);
3987 
3988 	/*
3989 	 * If we have a match, search it for the specific item that matches (there
3990 	 * must be one), and construct the output values.
3991 	 */
3992 	if (stats)
3993 	{
3994 		int			i;
3995 		List	   *newlist = NIL;
3996 		MVNDistinctItem *item = NULL;
3997 
3998 		/* Find the specific item that exactly matches the combination */
3999 		for (i = 0; i < stats->nitems; i++)
4000 		{
4001 			MVNDistinctItem *tmpitem = &stats->items[i];
4002 
4003 			if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
4004 			{
4005 				item = tmpitem;
4006 				break;
4007 			}
4008 		}
4009 
4010 		/* make sure we found an item */
4011 		if (!item)
4012 			elog(ERROR, "corrupt MVNDistinct entry");
4013 
4014 		/* Form the output varinfo list, keeping only unmatched ones */
4015 		foreach(lc, *varinfos)
4016 		{
4017 			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
4018 			AttrNumber	attnum;
4019 
4020 			if (!IsA(varinfo->var, Var))
4021 			{
4022 				newlist = lappend(newlist, varinfo);
4023 				continue;
4024 			}
4025 
4026 			attnum = ((Var *) varinfo->var)->varattno;
4027 
4028 			if (AttrNumberIsForUserDefinedAttr(attnum) &&
4029 				bms_is_member(attnum, matched))
4030 				continue;
4031 
4032 			newlist = lappend(newlist, varinfo);
4033 		}
4034 
4035 		*varinfos = newlist;
4036 		*ndistinct = item->ndistinct;
4037 		return true;
4038 	}
4039 
4040 	return false;
4041 }
4042 
4043 /*
4044  * convert_to_scalar
4045  *	  Convert non-NULL values of the indicated types to the comparison
4046  *	  scale needed by scalarineqsel().
4047  *	  Returns "true" if successful.
4048  *
4049  * XXX this routine is a hack: ideally we should look up the conversion
4050  * subroutines in pg_type.
4051  *
4052  * All numeric datatypes are simply converted to their equivalent
4053  * "double" values.  (NUMERIC values that are outside the range of "double"
4054  * are clamped to +/- HUGE_VAL.)
4055  *
4056  * String datatypes are converted by convert_string_to_scalar(),
4057  * which is explained below.  The reason why this routine deals with
4058  * three values at a time, not just one, is that we need it for strings.
4059  *
4060  * The bytea datatype is just enough different from strings that it has
4061  * to be treated separately.
4062  *
4063  * The several datatypes representing absolute times are all converted
4064  * to Timestamp, which is actually a double, and then we just use that
4065  * double value.  Note this will give correct results even for the "special"
4066  * values of Timestamp, since those are chosen to compare correctly;
4067  * see timestamp_cmp.
4068  *
4069  * The several datatypes representing relative times (intervals) are all
4070  * converted to measurements expressed in seconds.
4071  */
4072 static bool
4073 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
4074 				  Datum lobound, Datum hibound, Oid boundstypid,
4075 				  double *scaledlobound, double *scaledhibound)
4076 {
4077 	bool		failure = false;
4078 
4079 	/*
4080 	 * Both the valuetypid and the boundstypid should exactly match the
4081 	 * declared input type(s) of the operator we are invoked for.  However,
4082 	 * extensions might try to use scalarineqsel as estimator for operators
4083 	 * with input type(s) we don't handle here; in such cases, we want to
4084 	 * return false, not fail.  In any case, we mustn't assume that valuetypid
4085 	 * and boundstypid are identical.
4086 	 *
4087 	 * XXX The histogram we are interpolating between points of could belong
4088 	 * to a column that's only binary-compatible with the declared type. In
4089 	 * essence we are assuming that the semantics of binary-compatible types
4090 	 * are enough alike that we can use a histogram generated with one type's
4091 	 * operators to estimate selectivity for the other's.  This is outright
4092 	 * wrong in some cases --- in particular signed versus unsigned
4093 	 * interpretation could trip us up.  But it's useful enough in the
4094 	 * majority of cases that we do it anyway.  Should think about more
4095 	 * rigorous ways to do it.
4096 	 */
4097 	switch (valuetypid)
4098 	{
4099 			/*
4100 			 * Built-in numeric types
4101 			 */
4102 		case BOOLOID:
4103 		case INT2OID:
4104 		case INT4OID:
4105 		case INT8OID:
4106 		case FLOAT4OID:
4107 		case FLOAT8OID:
4108 		case NUMERICOID:
4109 		case OIDOID:
4110 		case REGPROCOID:
4111 		case REGPROCEDUREOID:
4112 		case REGOPEROID:
4113 		case REGOPERATOROID:
4114 		case REGCLASSOID:
4115 		case REGTYPEOID:
4116 		case REGCONFIGOID:
4117 		case REGDICTIONARYOID:
4118 		case REGROLEOID:
4119 		case REGNAMESPACEOID:
4120 			*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
4121 													 &failure);
4122 			*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
4123 													   &failure);
4124 			*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
4125 													   &failure);
4126 			return !failure;
4127 
4128 			/*
4129 			 * Built-in string types
4130 			 */
4131 		case CHAROID:
4132 		case BPCHAROID:
4133 		case VARCHAROID:
4134 		case TEXTOID:
4135 		case NAMEOID:
4136 			{
4137 				char	   *valstr = convert_string_datum(value, valuetypid,
4138 														  &failure);
4139 				char	   *lostr = convert_string_datum(lobound, boundstypid,
4140 														 &failure);
4141 				char	   *histr = convert_string_datum(hibound, boundstypid,
4142 														 &failure);
4143 
4144 				/*
4145 				 * Bail out if any of the values is not of string type.  We
4146 				 * might leak converted strings for the other value(s), but
4147 				 * that's not worth troubling over.
4148 				 */
4149 				if (failure)
4150 					return false;
4151 
4152 				convert_string_to_scalar(valstr, scaledvalue,
4153 										 lostr, scaledlobound,
4154 										 histr, scaledhibound);
4155 				pfree(valstr);
4156 				pfree(lostr);
4157 				pfree(histr);
4158 				return true;
4159 			}
4160 
4161 			/*
4162 			 * Built-in bytea type
4163 			 */
4164 		case BYTEAOID:
4165 			{
4166 				/* We only support bytea vs bytea comparison */
4167 				if (boundstypid != BYTEAOID)
4168 					return false;
4169 				convert_bytea_to_scalar(value, scaledvalue,
4170 										lobound, scaledlobound,
4171 										hibound, scaledhibound);
4172 				return true;
4173 			}
4174 
4175 			/*
4176 			 * Built-in time types
4177 			 */
4178 		case TIMESTAMPOID:
4179 		case TIMESTAMPTZOID:
4180 		case ABSTIMEOID:
4181 		case DATEOID:
4182 		case INTERVALOID:
4183 		case RELTIMEOID:
4184 		case TINTERVALOID:
4185 		case TIMEOID:
4186 		case TIMETZOID:
4187 			*scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
4188 													   &failure);
4189 			*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
4190 														 &failure);
4191 			*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
4192 														 &failure);
4193 			return !failure;
4194 
4195 			/*
4196 			 * Built-in network types
4197 			 */
4198 		case INETOID:
4199 		case CIDROID:
4200 		case MACADDROID:
4201 		case MACADDR8OID:
4202 			*scaledvalue = convert_network_to_scalar(value, valuetypid,
4203 													 &failure);
4204 			*scaledlobound = convert_network_to_scalar(lobound, boundstypid,
4205 													   &failure);
4206 			*scaledhibound = convert_network_to_scalar(hibound, boundstypid,
4207 													   &failure);
4208 			return !failure;
4209 	}
4210 	/* Don't know how to convert */
4211 	*scaledvalue = *scaledlobound = *scaledhibound = 0;
4212 	return false;
4213 }
4214 
4215 /*
4216  * Do convert_to_scalar()'s work for any numeric data type.
4217  *
4218  * On failure (e.g., unsupported typid), set *failure to true;
4219  * otherwise, that variable is not changed.
4220  */
4221 static double
4222 convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
4223 {
4224 	switch (typid)
4225 	{
4226 		case BOOLOID:
4227 			return (double) DatumGetBool(value);
4228 		case INT2OID:
4229 			return (double) DatumGetInt16(value);
4230 		case INT4OID:
4231 			return (double) DatumGetInt32(value);
4232 		case INT8OID:
4233 			return (double) DatumGetInt64(value);
4234 		case FLOAT4OID:
4235 			return (double) DatumGetFloat4(value);
4236 		case FLOAT8OID:
4237 			return (double) DatumGetFloat8(value);
4238 		case NUMERICOID:
4239 			/* Note: out-of-range values will be clamped to +-HUGE_VAL */
4240 			return (double)
4241 				DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
4242 												   value));
4243 		case OIDOID:
4244 		case REGPROCOID:
4245 		case REGPROCEDUREOID:
4246 		case REGOPEROID:
4247 		case REGOPERATOROID:
4248 		case REGCLASSOID:
4249 		case REGTYPEOID:
4250 		case REGCONFIGOID:
4251 		case REGDICTIONARYOID:
4252 		case REGROLEOID:
4253 		case REGNAMESPACEOID:
4254 			/* we can treat OIDs as integers... */
4255 			return (double) DatumGetObjectId(value);
4256 	}
4257 
4258 	*failure = true;
4259 	return 0;
4260 }
4261 
4262 /*
4263  * Do convert_to_scalar()'s work for any character-string data type.
4264  *
4265  * String datatypes are converted to a scale that ranges from 0 to 1,
4266  * where we visualize the bytes of the string as fractional digits.
4267  *
4268  * We do not want the base to be 256, however, since that tends to
4269  * generate inflated selectivity estimates; few databases will have
4270  * occurrences of all 256 possible byte values at each position.
4271  * Instead, use the smallest and largest byte values seen in the bounds
4272  * as the estimated range for each byte, after some fudging to deal with
4273  * the fact that we probably aren't going to see the full range that way.
4274  *
4275  * An additional refinement is that we discard any common prefix of the
4276  * three strings before computing the scaled values.  This allows us to
4277  * "zoom in" when we encounter a narrow data range.  An example is a phone
4278  * number database where all the values begin with the same area code.
4279  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
4280  * so this is more likely to happen than you might think.)
4281  */
4282 static void
4283 convert_string_to_scalar(char *value,
4284 						 double *scaledvalue,
4285 						 char *lobound,
4286 						 double *scaledlobound,
4287 						 char *hibound,
4288 						 double *scaledhibound)
4289 {
4290 	int			rangelo,
4291 				rangehi;
4292 	char	   *sptr;
4293 
4294 	rangelo = rangehi = (unsigned char) hibound[0];
4295 	for (sptr = lobound; *sptr; sptr++)
4296 	{
4297 		if (rangelo > (unsigned char) *sptr)
4298 			rangelo = (unsigned char) *sptr;
4299 		if (rangehi < (unsigned char) *sptr)
4300 			rangehi = (unsigned char) *sptr;
4301 	}
4302 	for (sptr = hibound; *sptr; sptr++)
4303 	{
4304 		if (rangelo > (unsigned char) *sptr)
4305 			rangelo = (unsigned char) *sptr;
4306 		if (rangehi < (unsigned char) *sptr)
4307 			rangehi = (unsigned char) *sptr;
4308 	}
4309 	/* If range includes any upper-case ASCII chars, make it include all */
4310 	if (rangelo <= 'Z' && rangehi >= 'A')
4311 	{
4312 		if (rangelo > 'A')
4313 			rangelo = 'A';
4314 		if (rangehi < 'Z')
4315 			rangehi = 'Z';
4316 	}
4317 	/* Ditto lower-case */
4318 	if (rangelo <= 'z' && rangehi >= 'a')
4319 	{
4320 		if (rangelo > 'a')
4321 			rangelo = 'a';
4322 		if (rangehi < 'z')
4323 			rangehi = 'z';
4324 	}
4325 	/* Ditto digits */
4326 	if (rangelo <= '9' && rangehi >= '0')
4327 	{
4328 		if (rangelo > '0')
4329 			rangelo = '0';
4330 		if (rangehi < '9')
4331 			rangehi = '9';
4332 	}
4333 
4334 	/*
4335 	 * If range includes less than 10 chars, assume we have not got enough
4336 	 * data, and make it include regular ASCII set.
4337 	 */
4338 	if (rangehi - rangelo < 9)
4339 	{
4340 		rangelo = ' ';
4341 		rangehi = 127;
4342 	}
4343 
4344 	/*
4345 	 * Now strip any common prefix of the three strings.
4346 	 */
4347 	while (*lobound)
4348 	{
4349 		if (*lobound != *hibound || *lobound != *value)
4350 			break;
4351 		lobound++, hibound++, value++;
4352 	}
4353 
4354 	/*
4355 	 * Now we can do the conversions.
4356 	 */
4357 	*scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4358 	*scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4359 	*scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4360 }
4361 
4362 static double
4363 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4364 {
4365 	int			slen = strlen(value);
4366 	double		num,
4367 				denom,
4368 				base;
4369 
4370 	if (slen <= 0)
4371 		return 0.0;				/* empty string has scalar value 0 */
4372 
4373 	/*
4374 	 * There seems little point in considering more than a dozen bytes from
4375 	 * the string.  Since base is at least 10, that will give us nominal
4376 	 * resolution of at least 12 decimal digits, which is surely far more
4377 	 * precision than this estimation technique has got anyway (especially in
4378 	 * non-C locales).  Also, even with the maximum possible base of 256, this
4379 	 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4380 	 * overflow on any known machine.
4381 	 */
4382 	if (slen > 12)
4383 		slen = 12;
4384 
4385 	/* Convert initial characters to fraction */
4386 	base = rangehi - rangelo + 1;
4387 	num = 0.0;
4388 	denom = base;
4389 	while (slen-- > 0)
4390 	{
4391 		int			ch = (unsigned char) *value++;
4392 
4393 		if (ch < rangelo)
4394 			ch = rangelo - 1;
4395 		else if (ch > rangehi)
4396 			ch = rangehi + 1;
4397 		num += ((double) (ch - rangelo)) / denom;
4398 		denom *= base;
4399 	}
4400 
4401 	return num;
4402 }
4403 
4404 /*
4405  * Convert a string-type Datum into a palloc'd, null-terminated string.
4406  *
4407  * On failure (e.g., unsupported typid), set *failure to true;
4408  * otherwise, that variable is not changed.  (We'll return NULL on failure.)
4409  *
4410  * When using a non-C locale, we must pass the string through strxfrm()
4411  * before continuing, so as to generate correct locale-specific results.
4412  */
4413 static char *
4414 convert_string_datum(Datum value, Oid typid, bool *failure)
4415 {
4416 	char	   *val;
4417 
4418 	switch (typid)
4419 	{
4420 		case CHAROID:
4421 			val = (char *) palloc(2);
4422 			val[0] = DatumGetChar(value);
4423 			val[1] = '\0';
4424 			break;
4425 		case BPCHAROID:
4426 		case VARCHAROID:
4427 		case TEXTOID:
4428 			val = TextDatumGetCString(value);
4429 			break;
4430 		case NAMEOID:
4431 			{
4432 				NameData   *nm = (NameData *) DatumGetPointer(value);
4433 
4434 				val = pstrdup(NameStr(*nm));
4435 				break;
4436 			}
4437 		default:
4438 			*failure = true;
4439 			return NULL;
4440 	}
4441 
4442 	if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
4443 	{
4444 		char	   *xfrmstr;
4445 		size_t		xfrmlen;
4446 		size_t		xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4447 
4448 		/*
4449 		 * XXX: We could guess at a suitable output buffer size and only call
4450 		 * strxfrm twice if our guess is too small.
4451 		 *
4452 		 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4453 		 * bogus data or set an error. This is not really a problem unless it
4454 		 * crashes since it will only give an estimation error and nothing
4455 		 * fatal.
4456 		 */
4457 #if _MSC_VER == 1400			/* VS.Net 2005 */
4458 
4459 		/*
4460 		 *
4461 		 * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99694
4462 		 */
4463 		{
4464 			char		x[1];
4465 
4466 			xfrmlen = strxfrm(x, val, 0);
4467 		}
4468 #else
4469 		xfrmlen = strxfrm(NULL, val, 0);
4470 #endif
4471 #ifdef WIN32
4472 
4473 		/*
4474 		 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4475 		 * of trying to allocate this much memory (and fail), just return the
4476 		 * original string unmodified as if we were in the C locale.
4477 		 */
4478 		if (xfrmlen == INT_MAX)
4479 			return val;
4480 #endif
4481 		xfrmstr = (char *) palloc(xfrmlen + 1);
4482 		xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4483 
4484 		/*
4485 		 * Some systems (e.g., glibc) can return a smaller value from the
4486 		 * second call than the first; thus the Assert must be <= not ==.
4487 		 */
4488 		Assert(xfrmlen2 <= xfrmlen);
4489 		pfree(val);
4490 		val = xfrmstr;
4491 	}
4492 
4493 	return val;
4494 }
4495 
4496 /*
4497  * Do convert_to_scalar()'s work for any bytea data type.
4498  *
4499  * Very similar to convert_string_to_scalar except we can't assume
4500  * null-termination and therefore pass explicit lengths around.
4501  *
4502  * Also, assumptions about likely "normal" ranges of characters have been
4503  * removed - a data range of 0..255 is always used, for now.  (Perhaps
4504  * someday we will add information about actual byte data range to
4505  * pg_statistic.)
4506  */
4507 static void
4508 convert_bytea_to_scalar(Datum value,
4509 						double *scaledvalue,
4510 						Datum lobound,
4511 						double *scaledlobound,
4512 						Datum hibound,
4513 						double *scaledhibound)
4514 {
4515 	bytea	   *valuep = DatumGetByteaPP(value);
4516 	bytea	   *loboundp = DatumGetByteaPP(lobound);
4517 	bytea	   *hiboundp = DatumGetByteaPP(hibound);
4518 	int			rangelo,
4519 				rangehi,
4520 				valuelen = VARSIZE_ANY_EXHDR(valuep),
4521 				loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4522 				hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4523 				i,
4524 				minlen;
4525 	unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4526 	unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4527 	unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4528 
4529 	/*
4530 	 * Assume bytea data is uniformly distributed across all byte values.
4531 	 */
4532 	rangelo = 0;
4533 	rangehi = 255;
4534 
4535 	/*
4536 	 * Now strip any common prefix of the three strings.
4537 	 */
4538 	minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4539 	for (i = 0; i < minlen; i++)
4540 	{
4541 		if (*lostr != *histr || *lostr != *valstr)
4542 			break;
4543 		lostr++, histr++, valstr++;
4544 		loboundlen--, hiboundlen--, valuelen--;
4545 	}
4546 
4547 	/*
4548 	 * Now we can do the conversions.
4549 	 */
4550 	*scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4551 	*scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4552 	*scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4553 }
4554 
4555 static double
4556 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4557 							int rangelo, int rangehi)
4558 {
4559 	double		num,
4560 				denom,
4561 				base;
4562 
4563 	if (valuelen <= 0)
4564 		return 0.0;				/* empty string has scalar value 0 */
4565 
4566 	/*
4567 	 * Since base is 256, need not consider more than about 10 chars (even
4568 	 * this many seems like overkill)
4569 	 */
4570 	if (valuelen > 10)
4571 		valuelen = 10;
4572 
4573 	/* Convert initial characters to fraction */
4574 	base = rangehi - rangelo + 1;
4575 	num = 0.0;
4576 	denom = base;
4577 	while (valuelen-- > 0)
4578 	{
4579 		int			ch = *value++;
4580 
4581 		if (ch < rangelo)
4582 			ch = rangelo - 1;
4583 		else if (ch > rangehi)
4584 			ch = rangehi + 1;
4585 		num += ((double) (ch - rangelo)) / denom;
4586 		denom *= base;
4587 	}
4588 
4589 	return num;
4590 }
4591 
4592 /*
4593  * Do convert_to_scalar()'s work for any timevalue data type.
4594  *
4595  * On failure (e.g., unsupported typid), set *failure to true;
4596  * otherwise, that variable is not changed.
4597  */
4598 static double
4599 convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
4600 {
4601 	switch (typid)
4602 	{
4603 		case TIMESTAMPOID:
4604 			return DatumGetTimestamp(value);
4605 		case TIMESTAMPTZOID:
4606 			return DatumGetTimestampTz(value);
4607 		case ABSTIMEOID:
4608 			return DatumGetTimestamp(DirectFunctionCall1(abstime_timestamp,
4609 														 value));
4610 		case DATEOID:
4611 			return date2timestamp_no_overflow(DatumGetDateADT(value));
4612 		case INTERVALOID:
4613 			{
4614 				Interval   *interval = DatumGetIntervalP(value);
4615 
4616 				/*
4617 				 * Convert the month part of Interval to days using assumed
4618 				 * average month length of 365.25/12.0 days.  Not too
4619 				 * accurate, but plenty good enough for our purposes.
4620 				 */
4621 				return interval->time + interval->day * (double) USECS_PER_DAY +
4622 					interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4623 			}
4624 		case RELTIMEOID:
4625 			return (DatumGetRelativeTime(value) * 1000000.0);
4626 		case TINTERVALOID:
4627 			{
4628 				TimeInterval tinterval = DatumGetTimeInterval(value);
4629 
4630 				if (tinterval->status != 0)
4631 					return ((tinterval->data[1] - tinterval->data[0]) * 1000000.0);
4632 				return 0;		/* for lack of a better idea */
4633 			}
4634 		case TIMEOID:
4635 			return DatumGetTimeADT(value);
4636 		case TIMETZOID:
4637 			{
4638 				TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
4639 
4640 				/* use GMT-equivalent time */
4641 				return (double) (timetz->time + (timetz->zone * 1000000.0));
4642 			}
4643 	}
4644 
4645 	*failure = true;
4646 	return 0;
4647 }
4648 
4649 
4650 /*
4651  * get_restriction_variable
4652  *		Examine the args of a restriction clause to see if it's of the
4653  *		form (variable op pseudoconstant) or (pseudoconstant op variable),
4654  *		where "variable" could be either a Var or an expression in vars of a
4655  *		single relation.  If so, extract information about the variable,
4656  *		and also indicate which side it was on and the other argument.
4657  *
4658  * Inputs:
4659  *	root: the planner info
4660  *	args: clause argument list
4661  *	varRelid: see specs for restriction selectivity functions
4662  *
4663  * Outputs: (these are valid only if true is returned)
4664  *	*vardata: gets information about variable (see examine_variable)
4665  *	*other: gets other clause argument, aggressively reduced to a constant
4666  *	*varonleft: set true if variable is on the left, false if on the right
4667  *
4668  * Returns true if a variable is identified, otherwise false.
4669  *
4670  * Note: if there are Vars on both sides of the clause, we must fail, because
4671  * callers are expecting that the other side will act like a pseudoconstant.
4672  */
4673 bool
4674 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4675 						 VariableStatData *vardata, Node **other,
4676 						 bool *varonleft)
4677 {
4678 	Node	   *left,
4679 			   *right;
4680 	VariableStatData rdata;
4681 
4682 	/* Fail if not a binary opclause (probably shouldn't happen) */
4683 	if (list_length(args) != 2)
4684 		return false;
4685 
4686 	left = (Node *) linitial(args);
4687 	right = (Node *) lsecond(args);
4688 
4689 	/*
4690 	 * Examine both sides.  Note that when varRelid is nonzero, Vars of other
4691 	 * relations will be treated as pseudoconstants.
4692 	 */
4693 	examine_variable(root, left, varRelid, vardata);
4694 	examine_variable(root, right, varRelid, &rdata);
4695 
4696 	/*
4697 	 * If one side is a variable and the other not, we win.
4698 	 */
4699 	if (vardata->rel && rdata.rel == NULL)
4700 	{
4701 		*varonleft = true;
4702 		*other = estimate_expression_value(root, rdata.var);
4703 		/* Assume we need no ReleaseVariableStats(rdata) here */
4704 		return true;
4705 	}
4706 
4707 	if (vardata->rel == NULL && rdata.rel)
4708 	{
4709 		*varonleft = false;
4710 		*other = estimate_expression_value(root, vardata->var);
4711 		/* Assume we need no ReleaseVariableStats(*vardata) here */
4712 		*vardata = rdata;
4713 		return true;
4714 	}
4715 
4716 	/* Oops, clause has wrong structure (probably var op var) */
4717 	ReleaseVariableStats(*vardata);
4718 	ReleaseVariableStats(rdata);
4719 
4720 	return false;
4721 }
4722 
4723 /*
4724  * get_join_variables
4725  *		Apply examine_variable() to each side of a join clause.
4726  *		Also, attempt to identify whether the join clause has the same
4727  *		or reversed sense compared to the SpecialJoinInfo.
4728  *
4729  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4730  * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
4731  * where we can't tell for sure, we default to assuming it's normal.
4732  */
4733 void
4734 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4735 				   VariableStatData *vardata1, VariableStatData *vardata2,
4736 				   bool *join_is_reversed)
4737 {
4738 	Node	   *left,
4739 			   *right;
4740 
4741 	if (list_length(args) != 2)
4742 		elog(ERROR, "join operator should take two arguments");
4743 
4744 	left = (Node *) linitial(args);
4745 	right = (Node *) lsecond(args);
4746 
4747 	examine_variable(root, left, 0, vardata1);
4748 	examine_variable(root, right, 0, vardata2);
4749 
4750 	if (vardata1->rel &&
4751 		bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4752 		*join_is_reversed = true;	/* var1 is on RHS */
4753 	else if (vardata2->rel &&
4754 			 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4755 		*join_is_reversed = true;	/* var2 is on LHS */
4756 	else
4757 		*join_is_reversed = false;
4758 }
4759 
4760 /*
4761  * examine_variable
4762  *		Try to look up statistical data about an expression.
4763  *		Fill in a VariableStatData struct to describe the expression.
4764  *
4765  * Inputs:
4766  *	root: the planner info
4767  *	node: the expression tree to examine
4768  *	varRelid: see specs for restriction selectivity functions
4769  *
4770  * Outputs: *vardata is filled as follows:
4771  *	var: the input expression (with any binary relabeling stripped, if
4772  *		it is or contains a variable; but otherwise the type is preserved)
4773  *	rel: RelOptInfo for relation containing variable; NULL if expression
4774  *		contains no Vars (NOTE this could point to a RelOptInfo of a
4775  *		subquery, not one in the current query).
4776  *	statsTuple: the pg_statistic entry for the variable, if one exists;
4777  *		otherwise NULL.
4778  *	freefunc: pointer to a function to release statsTuple with.
4779  *	vartype: exposed type of the expression; this should always match
4780  *		the declared input type of the operator we are estimating for.
4781  *	atttype, atttypmod: actual type/typmod of the "var" expression.  This is
4782  *		commonly the same as the exposed type of the variable argument,
4783  *		but can be different in binary-compatible-type cases.
4784  *	isunique: true if we were able to match the var to a unique index or a
4785  *		single-column DISTINCT clause, implying its values are unique for
4786  *		this query.  (Caution: this should be trusted for statistical
4787  *		purposes only, since we do not check indimmediate nor verify that
4788  *		the exact same definition of equality applies.)
4789  *	acl_ok: true if current user has permission to read the column(s)
4790  *		underlying the pg_statistic entry.  This is consulted by
4791  *		statistic_proc_security_check().
4792  *
4793  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4794  */
4795 void
4796 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4797 				 VariableStatData *vardata)
4798 {
4799 	Node	   *basenode;
4800 	Relids		varnos;
4801 	RelOptInfo *onerel;
4802 
4803 	/* Make sure we don't return dangling pointers in vardata */
4804 	MemSet(vardata, 0, sizeof(VariableStatData));
4805 
4806 	/* Save the exposed type of the expression */
4807 	vardata->vartype = exprType(node);
4808 
4809 	/* Look inside any binary-compatible relabeling */
4810 
4811 	if (IsA(node, RelabelType))
4812 		basenode = (Node *) ((RelabelType *) node)->arg;
4813 	else
4814 		basenode = node;
4815 
4816 	/* Fast path for a simple Var */
4817 
4818 	if (IsA(basenode, Var) &&
4819 		(varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4820 	{
4821 		Var		   *var = (Var *) basenode;
4822 
4823 		/* Set up result fields other than the stats tuple */
4824 		vardata->var = basenode;	/* return Var without relabeling */
4825 		vardata->rel = find_base_rel(root, var->varno);
4826 		vardata->atttype = var->vartype;
4827 		vardata->atttypmod = var->vartypmod;
4828 		vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4829 
4830 		/* Try to locate some stats */
4831 		examine_simple_variable(root, var, vardata);
4832 
4833 		return;
4834 	}
4835 
4836 	/*
4837 	 * Okay, it's a more complicated expression.  Determine variable
4838 	 * membership.  Note that when varRelid isn't zero, only vars of that
4839 	 * relation are considered "real" vars.
4840 	 */
4841 	varnos = pull_varnos(basenode);
4842 
4843 	onerel = NULL;
4844 
4845 	switch (bms_membership(varnos))
4846 	{
4847 		case BMS_EMPTY_SET:
4848 			/* No Vars at all ... must be pseudo-constant clause */
4849 			break;
4850 		case BMS_SINGLETON:
4851 			if (varRelid == 0 || bms_is_member(varRelid, varnos))
4852 			{
4853 				onerel = find_base_rel(root,
4854 									   (varRelid ? varRelid : bms_singleton_member(varnos)));
4855 				vardata->rel = onerel;
4856 				node = basenode;	/* strip any relabeling */
4857 			}
4858 			/* else treat it as a constant */
4859 			break;
4860 		case BMS_MULTIPLE:
4861 			if (varRelid == 0)
4862 			{
4863 				/* treat it as a variable of a join relation */
4864 				vardata->rel = find_join_rel(root, varnos);
4865 				node = basenode;	/* strip any relabeling */
4866 			}
4867 			else if (bms_is_member(varRelid, varnos))
4868 			{
4869 				/* ignore the vars belonging to other relations */
4870 				vardata->rel = find_base_rel(root, varRelid);
4871 				node = basenode;	/* strip any relabeling */
4872 				/* note: no point in expressional-index search here */
4873 			}
4874 			/* else treat it as a constant */
4875 			break;
4876 	}
4877 
4878 	bms_free(varnos);
4879 
4880 	vardata->var = node;
4881 	vardata->atttype = exprType(node);
4882 	vardata->atttypmod = exprTypmod(node);
4883 
4884 	if (onerel)
4885 	{
4886 		/*
4887 		 * We have an expression in vars of a single relation.  Try to match
4888 		 * it to expressional index columns, in hopes of finding some
4889 		 * statistics.
4890 		 *
4891 		 * Note that we consider all index columns including INCLUDE columns,
4892 		 * since there could be stats for such columns.  But the test for
4893 		 * uniqueness needs to be warier.
4894 		 *
4895 		 * XXX it's conceivable that there are multiple matches with different
4896 		 * index opfamilies; if so, we need to pick one that matches the
4897 		 * operator we are estimating for.  FIXME later.
4898 		 */
4899 		ListCell   *ilist;
4900 
4901 		foreach(ilist, onerel->indexlist)
4902 		{
4903 			IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4904 			ListCell   *indexpr_item;
4905 			int			pos;
4906 
4907 			indexpr_item = list_head(index->indexprs);
4908 			if (indexpr_item == NULL)
4909 				continue;		/* no expressions here... */
4910 
4911 			for (pos = 0; pos < index->ncolumns; pos++)
4912 			{
4913 				if (index->indexkeys[pos] == 0)
4914 				{
4915 					Node	   *indexkey;
4916 
4917 					if (indexpr_item == NULL)
4918 						elog(ERROR, "too few entries in indexprs list");
4919 					indexkey = (Node *) lfirst(indexpr_item);
4920 					if (indexkey && IsA(indexkey, RelabelType))
4921 						indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4922 					if (equal(node, indexkey))
4923 					{
4924 						/*
4925 						 * Found a match ... is it a unique index? Tests here
4926 						 * should match has_unique_index().
4927 						 */
4928 						if (index->unique &&
4929 							index->nkeycolumns == 1 &&
4930 							pos == 0 &&
4931 							(index->indpred == NIL || index->predOK))
4932 							vardata->isunique = true;
4933 
4934 						/*
4935 						 * Has it got stats?  We only consider stats for
4936 						 * non-partial indexes, since partial indexes probably
4937 						 * don't reflect whole-relation statistics; the above
4938 						 * check for uniqueness is the only info we take from
4939 						 * a partial index.
4940 						 *
4941 						 * An index stats hook, however, must make its own
4942 						 * decisions about what to do with partial indexes.
4943 						 */
4944 						if (get_index_stats_hook &&
4945 							(*get_index_stats_hook) (root, index->indexoid,
4946 													 pos + 1, vardata))
4947 						{
4948 							/*
4949 							 * The hook took control of acquiring a stats
4950 							 * tuple.  If it did supply a tuple, it'd better
4951 							 * have supplied a freefunc.
4952 							 */
4953 							if (HeapTupleIsValid(vardata->statsTuple) &&
4954 								!vardata->freefunc)
4955 								elog(ERROR, "no function provided to release variable stats with");
4956 						}
4957 						else if (index->indpred == NIL)
4958 						{
4959 							vardata->statsTuple =
4960 								SearchSysCache3(STATRELATTINH,
4961 												ObjectIdGetDatum(index->indexoid),
4962 												Int16GetDatum(pos + 1),
4963 												BoolGetDatum(false));
4964 							vardata->freefunc = ReleaseSysCache;
4965 
4966 							if (HeapTupleIsValid(vardata->statsTuple))
4967 							{
4968 								/* Get index's table for permission check */
4969 								RangeTblEntry *rte;
4970 								Oid			userid;
4971 
4972 								rte = planner_rt_fetch(index->rel->relid, root);
4973 								Assert(rte->rtekind == RTE_RELATION);
4974 
4975 								/*
4976 								 * Use checkAsUser if it's set, in case we're
4977 								 * accessing the table via a view.
4978 								 */
4979 								userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4980 
4981 								/*
4982 								 * For simplicity, we insist on the whole
4983 								 * table being selectable, rather than trying
4984 								 * to identify which column(s) the index
4985 								 * depends on.  Also require all rows to be
4986 								 * selectable --- there must be no
4987 								 * securityQuals from security barrier views
4988 								 * or RLS policies.
4989 								 */
4990 								vardata->acl_ok =
4991 									rte->securityQuals == NIL &&
4992 									(pg_class_aclcheck(rte->relid, userid,
4993 													   ACL_SELECT) == ACLCHECK_OK);
4994 
4995 								/*
4996 								 * If the user doesn't have permissions to
4997 								 * access an inheritance child relation, check
4998 								 * the permissions of the table actually
4999 								 * mentioned in the query, since most likely
5000 								 * the user does have that permission.  Note
5001 								 * that whole-table select privilege on the
5002 								 * parent doesn't quite guarantee that the
5003 								 * user could read all columns of the child.
5004 								 * But in practice it's unlikely that any
5005 								 * interesting security violation could result
5006 								 * from allowing access to the expression
5007 								 * index's stats, so we allow it anyway.  See
5008 								 * similar code in examine_simple_variable()
5009 								 * for additional comments.
5010 								 */
5011 								if (!vardata->acl_ok &&
5012 									root->append_rel_array != NULL)
5013 								{
5014 									AppendRelInfo *appinfo;
5015 									Index		varno = index->rel->relid;
5016 
5017 									appinfo = root->append_rel_array[varno];
5018 									while (appinfo &&
5019 										   planner_rt_fetch(appinfo->parent_relid,
5020 															root)->rtekind == RTE_RELATION)
5021 									{
5022 										varno = appinfo->parent_relid;
5023 										appinfo = root->append_rel_array[varno];
5024 									}
5025 									if (varno != index->rel->relid)
5026 									{
5027 										/* Repeat access check on this rel */
5028 										rte = planner_rt_fetch(varno, root);
5029 										Assert(rte->rtekind == RTE_RELATION);
5030 
5031 										userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5032 
5033 										vardata->acl_ok =
5034 											rte->securityQuals == NIL &&
5035 											(pg_class_aclcheck(rte->relid,
5036 															   userid,
5037 															   ACL_SELECT) == ACLCHECK_OK);
5038 									}
5039 								}
5040 							}
5041 							else
5042 							{
5043 								/* suppress leakproofness checks later */
5044 								vardata->acl_ok = true;
5045 							}
5046 						}
5047 						if (vardata->statsTuple)
5048 							break;
5049 					}
5050 					indexpr_item = lnext(indexpr_item);
5051 				}
5052 			}
5053 			if (vardata->statsTuple)
5054 				break;
5055 		}
5056 	}
5057 }
5058 
5059 /*
5060  * examine_simple_variable
5061  *		Handle a simple Var for examine_variable
5062  *
5063  * This is split out as a subroutine so that we can recurse to deal with
5064  * Vars referencing subqueries.
5065  *
5066  * We already filled in all the fields of *vardata except for the stats tuple.
5067  */
5068 static void
5069 examine_simple_variable(PlannerInfo *root, Var *var,
5070 						VariableStatData *vardata)
5071 {
5072 	RangeTblEntry *rte = root->simple_rte_array[var->varno];
5073 
5074 	Assert(IsA(rte, RangeTblEntry));
5075 
5076 	if (get_relation_stats_hook &&
5077 		(*get_relation_stats_hook) (root, rte, var->varattno, vardata))
5078 	{
5079 		/*
5080 		 * The hook took control of acquiring a stats tuple.  If it did supply
5081 		 * a tuple, it'd better have supplied a freefunc.
5082 		 */
5083 		if (HeapTupleIsValid(vardata->statsTuple) &&
5084 			!vardata->freefunc)
5085 			elog(ERROR, "no function provided to release variable stats with");
5086 	}
5087 	else if (rte->rtekind == RTE_RELATION)
5088 	{
5089 		/*
5090 		 * Plain table or parent of an inheritance appendrel, so look up the
5091 		 * column in pg_statistic
5092 		 */
5093 		vardata->statsTuple = SearchSysCache3(STATRELATTINH,
5094 											  ObjectIdGetDatum(rte->relid),
5095 											  Int16GetDatum(var->varattno),
5096 											  BoolGetDatum(rte->inh));
5097 		vardata->freefunc = ReleaseSysCache;
5098 
5099 		if (HeapTupleIsValid(vardata->statsTuple))
5100 		{
5101 			Oid			userid;
5102 
5103 			/*
5104 			 * Check if user has permission to read this column.  We require
5105 			 * all rows to be accessible, so there must be no securityQuals
5106 			 * from security barrier views or RLS policies.  Use checkAsUser
5107 			 * if it's set, in case we're accessing the table via a view.
5108 			 */
5109 			userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5110 
5111 			vardata->acl_ok =
5112 				rte->securityQuals == NIL &&
5113 				((pg_class_aclcheck(rte->relid, userid,
5114 									ACL_SELECT) == ACLCHECK_OK) ||
5115 				 (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
5116 										ACL_SELECT) == ACLCHECK_OK));
5117 
5118 			/*
5119 			 * If the user doesn't have permissions to access an inheritance
5120 			 * child relation or specifically this attribute, check the
5121 			 * permissions of the table/column actually mentioned in the
5122 			 * query, since most likely the user does have that permission
5123 			 * (else the query will fail at runtime), and if the user can read
5124 			 * the column there then he can get the values of the child table
5125 			 * too.  To do that, we must find out which of the root parent's
5126 			 * attributes the child relation's attribute corresponds to.
5127 			 */
5128 			if (!vardata->acl_ok && var->varattno > 0 &&
5129 				root->append_rel_array != NULL)
5130 			{
5131 				AppendRelInfo *appinfo;
5132 				Index		varno = var->varno;
5133 				int			varattno = var->varattno;
5134 				bool		found = false;
5135 
5136 				appinfo = root->append_rel_array[varno];
5137 
5138 				/*
5139 				 * Partitions are mapped to their immediate parent, not the
5140 				 * root parent, so must be ready to walk up multiple
5141 				 * AppendRelInfos.  But stop if we hit a parent that is not
5142 				 * RTE_RELATION --- that's a flattened UNION ALL subquery, not
5143 				 * an inheritance parent.
5144 				 */
5145 				while (appinfo &&
5146 					   planner_rt_fetch(appinfo->parent_relid,
5147 										root)->rtekind == RTE_RELATION)
5148 				{
5149 					int			parent_varattno;
5150 					ListCell   *l;
5151 
5152 					parent_varattno = 1;
5153 					found = false;
5154 					foreach(l, appinfo->translated_vars)
5155 					{
5156 						Var		   *childvar = lfirst_node(Var, l);
5157 
5158 						/* Ignore dropped attributes of the parent. */
5159 						if (childvar != NULL &&
5160 							varattno == childvar->varattno)
5161 						{
5162 							found = true;
5163 							break;
5164 						}
5165 						parent_varattno++;
5166 					}
5167 
5168 					if (!found)
5169 						break;
5170 
5171 					varno = appinfo->parent_relid;
5172 					varattno = parent_varattno;
5173 
5174 					/* If the parent is itself a child, continue up. */
5175 					appinfo = root->append_rel_array[varno];
5176 				}
5177 
5178 				/*
5179 				 * In rare cases, the Var may be local to the child table, in
5180 				 * which case, we've got to live with having no access to this
5181 				 * column's stats.
5182 				 */
5183 				if (!found)
5184 					return;
5185 
5186 				/* Repeat the access check on this parent rel & column */
5187 				rte = planner_rt_fetch(varno, root);
5188 				Assert(rte->rtekind == RTE_RELATION);
5189 
5190 				userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5191 
5192 				vardata->acl_ok =
5193 					rte->securityQuals == NIL &&
5194 					((pg_class_aclcheck(rte->relid, userid,
5195 										ACL_SELECT) == ACLCHECK_OK) ||
5196 					 (pg_attribute_aclcheck(rte->relid, varattno, userid,
5197 											ACL_SELECT) == ACLCHECK_OK));
5198 			}
5199 		}
5200 		else
5201 		{
5202 			/* suppress any possible leakproofness checks later */
5203 			vardata->acl_ok = true;
5204 		}
5205 	}
5206 	else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
5207 	{
5208 		/*
5209 		 * Plain subquery (not one that was converted to an appendrel).
5210 		 */
5211 		Query	   *subquery = rte->subquery;
5212 		RelOptInfo *rel;
5213 		TargetEntry *ste;
5214 
5215 		/*
5216 		 * Punt if it's a whole-row var rather than a plain column reference.
5217 		 */
5218 		if (var->varattno == InvalidAttrNumber)
5219 			return;
5220 
5221 		/*
5222 		 * Punt if subquery uses set operations or GROUP BY, as these will
5223 		 * mash underlying columns' stats beyond recognition.  (Set ops are
5224 		 * particularly nasty; if we forged ahead, we would return stats
5225 		 * relevant to only the leftmost subselect...)	DISTINCT is also
5226 		 * problematic, but we check that later because there is a possibility
5227 		 * of learning something even with it.
5228 		 */
5229 		if (subquery->setOperations ||
5230 			subquery->groupClause ||
5231 			subquery->groupingSets)
5232 			return;
5233 
5234 		/*
5235 		 * OK, fetch RelOptInfo for subquery.  Note that we don't change the
5236 		 * rel returned in vardata, since caller expects it to be a rel of the
5237 		 * caller's query level.  Because we might already be recursing, we
5238 		 * can't use that rel pointer either, but have to look up the Var's
5239 		 * rel afresh.
5240 		 */
5241 		rel = find_base_rel(root, var->varno);
5242 
5243 		/* If the subquery hasn't been planned yet, we have to punt */
5244 		if (rel->subroot == NULL)
5245 			return;
5246 		Assert(IsA(rel->subroot, PlannerInfo));
5247 
5248 		/*
5249 		 * Switch our attention to the subquery as mangled by the planner. It
5250 		 * was okay to look at the pre-planning version for the tests above,
5251 		 * but now we need a Var that will refer to the subroot's live
5252 		 * RelOptInfos.  For instance, if any subquery pullup happened during
5253 		 * planning, Vars in the targetlist might have gotten replaced, and we
5254 		 * need to see the replacement expressions.
5255 		 */
5256 		subquery = rel->subroot->parse;
5257 		Assert(IsA(subquery, Query));
5258 
5259 		/* Get the subquery output expression referenced by the upper Var */
5260 		ste = get_tle_by_resno(subquery->targetList, var->varattno);
5261 		if (ste == NULL || ste->resjunk)
5262 			elog(ERROR, "subquery %s does not have attribute %d",
5263 				 rte->eref->aliasname, var->varattno);
5264 		var = (Var *) ste->expr;
5265 
5266 		/*
5267 		 * If subquery uses DISTINCT, we can't make use of any stats for the
5268 		 * variable ... but, if it's the only DISTINCT column, we are entitled
5269 		 * to consider it unique.  We do the test this way so that it works
5270 		 * for cases involving DISTINCT ON.
5271 		 */
5272 		if (subquery->distinctClause)
5273 		{
5274 			if (list_length(subquery->distinctClause) == 1 &&
5275 				targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
5276 				vardata->isunique = true;
5277 			/* cannot go further */
5278 			return;
5279 		}
5280 
5281 		/*
5282 		 * If the sub-query originated from a view with the security_barrier
5283 		 * attribute, we must not look at the variable's statistics, though it
5284 		 * seems all right to notice the existence of a DISTINCT clause. So
5285 		 * stop here.
5286 		 *
5287 		 * This is probably a harsher restriction than necessary; it's
5288 		 * certainly OK for the selectivity estimator (which is a C function,
5289 		 * and therefore omnipotent anyway) to look at the statistics.  But
5290 		 * many selectivity estimators will happily *invoke the operator
5291 		 * function* to try to work out a good estimate - and that's not OK.
5292 		 * So for now, don't dig down for stats.
5293 		 */
5294 		if (rte->security_barrier)
5295 			return;
5296 
5297 		/* Can only handle a simple Var of subquery's query level */
5298 		if (var && IsA(var, Var) &&
5299 			var->varlevelsup == 0)
5300 		{
5301 			/*
5302 			 * OK, recurse into the subquery.  Note that the original setting
5303 			 * of vardata->isunique (which will surely be false) is left
5304 			 * unchanged in this situation.  That's what we want, since even
5305 			 * if the underlying column is unique, the subquery may have
5306 			 * joined to other tables in a way that creates duplicates.
5307 			 */
5308 			examine_simple_variable(rel->subroot, var, vardata);
5309 		}
5310 	}
5311 	else
5312 	{
5313 		/*
5314 		 * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
5315 		 * won't see RTE_JOIN here because join alias Vars have already been
5316 		 * flattened.)	There's not much we can do with function outputs, but
5317 		 * maybe someday try to be smarter about VALUES and/or CTEs.
5318 		 */
5319 	}
5320 }
5321 
5322 /*
5323  * Check whether it is permitted to call func_oid passing some of the
5324  * pg_statistic data in vardata.  We allow this either if the user has SELECT
5325  * privileges on the table or column underlying the pg_statistic data or if
5326  * the function is marked leak-proof.
5327  */
5328 bool
5329 statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
5330 {
5331 	if (vardata->acl_ok)
5332 		return true;
5333 
5334 	if (!OidIsValid(func_oid))
5335 		return false;
5336 
5337 	if (get_func_leakproof(func_oid))
5338 		return true;
5339 
5340 	ereport(DEBUG2,
5341 			(errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
5342 							 get_func_name(func_oid))));
5343 	return false;
5344 }
5345 
5346 /*
5347  * get_variable_numdistinct
5348  *	  Estimate the number of distinct values of a variable.
5349  *
5350  * vardata: results of examine_variable
5351  * *isdefault: set to true if the result is a default rather than based on
5352  * anything meaningful.
5353  *
5354  * NB: be careful to produce a positive integral result, since callers may
5355  * compare the result to exact integer counts, or might divide by it.
5356  */
5357 double
5358 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
5359 {
5360 	double		stadistinct;
5361 	double		stanullfrac = 0.0;
5362 	double		ntuples;
5363 
5364 	*isdefault = false;
5365 
5366 	/*
5367 	 * Determine the stadistinct value to use.  There are cases where we can
5368 	 * get an estimate even without a pg_statistic entry, or can get a better
5369 	 * value than is in pg_statistic.  Grab stanullfrac too if we can find it
5370 	 * (otherwise, assume no nulls, for lack of any better idea).
5371 	 */
5372 	if (HeapTupleIsValid(vardata->statsTuple))
5373 	{
5374 		/* Use the pg_statistic entry */
5375 		Form_pg_statistic stats;
5376 
5377 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5378 		stadistinct = stats->stadistinct;
5379 		stanullfrac = stats->stanullfrac;
5380 	}
5381 	else if (vardata->vartype == BOOLOID)
5382 	{
5383 		/*
5384 		 * Special-case boolean columns: presumably, two distinct values.
5385 		 *
5386 		 * Are there any other datatypes we should wire in special estimates
5387 		 * for?
5388 		 */
5389 		stadistinct = 2.0;
5390 	}
5391 	else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5392 	{
5393 		/*
5394 		 * If the Var represents a column of a VALUES RTE, assume it's unique.
5395 		 * This could of course be very wrong, but it should tend to be true
5396 		 * in well-written queries.  We could consider examining the VALUES'
5397 		 * contents to get some real statistics; but that only works if the
5398 		 * entries are all constants, and it would be pretty expensive anyway.
5399 		 */
5400 		stadistinct = -1.0;		/* unique (and all non null) */
5401 	}
5402 	else
5403 	{
5404 		/*
5405 		 * We don't keep statistics for system columns, but in some cases we
5406 		 * can infer distinctness anyway.
5407 		 */
5408 		if (vardata->var && IsA(vardata->var, Var))
5409 		{
5410 			switch (((Var *) vardata->var)->varattno)
5411 			{
5412 				case ObjectIdAttributeNumber:
5413 				case SelfItemPointerAttributeNumber:
5414 					stadistinct = -1.0; /* unique (and all non null) */
5415 					break;
5416 				case TableOidAttributeNumber:
5417 					stadistinct = 1.0;	/* only 1 value */
5418 					break;
5419 				default:
5420 					stadistinct = 0.0;	/* means "unknown" */
5421 					break;
5422 			}
5423 		}
5424 		else
5425 			stadistinct = 0.0;	/* means "unknown" */
5426 
5427 		/*
5428 		 * XXX consider using estimate_num_groups on expressions?
5429 		 */
5430 	}
5431 
5432 	/*
5433 	 * If there is a unique index or DISTINCT clause for the variable, assume
5434 	 * it is unique no matter what pg_statistic says; the statistics could be
5435 	 * out of date, or we might have found a partial unique index that proves
5436 	 * the var is unique for this query.  However, we'd better still believe
5437 	 * the null-fraction statistic.
5438 	 */
5439 	if (vardata->isunique)
5440 		stadistinct = -1.0 * (1.0 - stanullfrac);
5441 
5442 	/*
5443 	 * If we had an absolute estimate, use that.
5444 	 */
5445 	if (stadistinct > 0.0)
5446 		return clamp_row_est(stadistinct);
5447 
5448 	/*
5449 	 * Otherwise we need to get the relation size; punt if not available.
5450 	 */
5451 	if (vardata->rel == NULL)
5452 	{
5453 		*isdefault = true;
5454 		return DEFAULT_NUM_DISTINCT;
5455 	}
5456 	ntuples = vardata->rel->tuples;
5457 	if (ntuples <= 0.0)
5458 	{
5459 		*isdefault = true;
5460 		return DEFAULT_NUM_DISTINCT;
5461 	}
5462 
5463 	/*
5464 	 * If we had a relative estimate, use that.
5465 	 */
5466 	if (stadistinct < 0.0)
5467 		return clamp_row_est(-stadistinct * ntuples);
5468 
5469 	/*
5470 	 * With no data, estimate ndistinct = ntuples if the table is small, else
5471 	 * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5472 	 * that the behavior isn't discontinuous.
5473 	 */
5474 	if (ntuples < DEFAULT_NUM_DISTINCT)
5475 		return clamp_row_est(ntuples);
5476 
5477 	*isdefault = true;
5478 	return DEFAULT_NUM_DISTINCT;
5479 }
5480 
5481 /*
5482  * get_variable_range
5483  *		Estimate the minimum and maximum value of the specified variable.
5484  *		If successful, store values in *min and *max, and return true.
5485  *		If no data available, return false.
5486  *
5487  * sortop is the "<" comparison operator to use.  This should generally
5488  * be "<" not ">", as only the former is likely to be found in pg_statistic.
5489  */
5490 static bool
5491 get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
5492 				   Datum *min, Datum *max)
5493 {
5494 	Datum		tmin = 0;
5495 	Datum		tmax = 0;
5496 	bool		have_data = false;
5497 	int16		typLen;
5498 	bool		typByVal;
5499 	Oid			opfuncoid;
5500 	AttStatsSlot sslot;
5501 	int			i;
5502 
5503 	/*
5504 	 * XXX It's very tempting to try to use the actual column min and max, if
5505 	 * we can get them relatively-cheaply with an index probe.  However, since
5506 	 * this function is called many times during join planning, that could
5507 	 * have unpleasant effects on planning speed.  Need more investigation
5508 	 * before enabling this.
5509 	 */
5510 #ifdef NOT_USED
5511 	if (get_actual_variable_range(root, vardata, sortop, min, max))
5512 		return true;
5513 #endif
5514 
5515 	if (!HeapTupleIsValid(vardata->statsTuple))
5516 	{
5517 		/* no stats available, so default result */
5518 		return false;
5519 	}
5520 
5521 	/*
5522 	 * If we can't apply the sortop to the stats data, just fail.  In
5523 	 * principle, if there's a histogram and no MCVs, we could return the
5524 	 * histogram endpoints without ever applying the sortop ... but it's
5525 	 * probably not worth trying, because whatever the caller wants to do with
5526 	 * the endpoints would likely fail the security check too.
5527 	 */
5528 	if (!statistic_proc_security_check(vardata,
5529 									   (opfuncoid = get_opcode(sortop))))
5530 		return false;
5531 
5532 	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5533 
5534 	/*
5535 	 * If there is a histogram, grab the first and last values.
5536 	 *
5537 	 * If there is a histogram that is sorted with some other operator than
5538 	 * the one we want, fail --- this suggests that there is data we can't
5539 	 * use.
5540 	 */
5541 	if (get_attstatsslot(&sslot, vardata->statsTuple,
5542 						 STATISTIC_KIND_HISTOGRAM, sortop,
5543 						 ATTSTATSSLOT_VALUES))
5544 	{
5545 		if (sslot.nvalues > 0)
5546 		{
5547 			tmin = datumCopy(sslot.values[0], typByVal, typLen);
5548 			tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5549 			have_data = true;
5550 		}
5551 		free_attstatsslot(&sslot);
5552 	}
5553 	else if (get_attstatsslot(&sslot, vardata->statsTuple,
5554 							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
5555 							  0))
5556 	{
5557 		free_attstatsslot(&sslot);
5558 		return false;
5559 	}
5560 
5561 	/*
5562 	 * If we have most-common-values info, look for extreme MCVs.  This is
5563 	 * needed even if we also have a histogram, since the histogram excludes
5564 	 * the MCVs.  However, if we *only* have MCVs and no histogram, we should
5565 	 * be pretty wary of deciding that that is a full representation of the
5566 	 * data.  Proceed only if the MCVs represent the whole table (to within
5567 	 * roundoff error).
5568 	 */
5569 	if (get_attstatsslot(&sslot, vardata->statsTuple,
5570 						 STATISTIC_KIND_MCV, InvalidOid,
5571 						 have_data ? ATTSTATSSLOT_VALUES :
5572 						 (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)))
5573 	{
5574 		bool		use_mcvs = have_data;
5575 
5576 		if (!have_data)
5577 		{
5578 			double		sumcommon = 0.0;
5579 			double		nullfrac;
5580 			int			i;
5581 
5582 			for (i = 0; i < sslot.nnumbers; i++)
5583 				sumcommon += sslot.numbers[i];
5584 			nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac;
5585 			if (sumcommon + nullfrac > 0.99999)
5586 				use_mcvs = true;
5587 		}
5588 
5589 		if (use_mcvs)
5590 		{
5591 			/*
5592 			 * Usually the MCVs will not be the extreme values, so avoid
5593 			 * unnecessary data copying.
5594 			 */
5595 			bool		tmin_is_mcv = false;
5596 			bool		tmax_is_mcv = false;
5597 			FmgrInfo	opproc;
5598 
5599 			fmgr_info(opfuncoid, &opproc);
5600 
5601 			for (i = 0; i < sslot.nvalues; i++)
5602 			{
5603 				if (!have_data)
5604 				{
5605 					tmin = tmax = sslot.values[i];
5606 					tmin_is_mcv = tmax_is_mcv = have_data = true;
5607 					continue;
5608 				}
5609 				if (DatumGetBool(FunctionCall2Coll(&opproc,
5610 												   DEFAULT_COLLATION_OID,
5611 												   sslot.values[i], tmin)))
5612 				{
5613 					tmin = sslot.values[i];
5614 					tmin_is_mcv = true;
5615 				}
5616 				if (DatumGetBool(FunctionCall2Coll(&opproc,
5617 												   DEFAULT_COLLATION_OID,
5618 												   tmax, sslot.values[i])))
5619 				{
5620 					tmax = sslot.values[i];
5621 					tmax_is_mcv = true;
5622 				}
5623 			}
5624 			if (tmin_is_mcv)
5625 				tmin = datumCopy(tmin, typByVal, typLen);
5626 			if (tmax_is_mcv)
5627 				tmax = datumCopy(tmax, typByVal, typLen);
5628 		}
5629 
5630 		free_attstatsslot(&sslot);
5631 	}
5632 
5633 	*min = tmin;
5634 	*max = tmax;
5635 	return have_data;
5636 }
5637 
5638 
5639 /*
5640  * get_actual_variable_range
5641  *		Attempt to identify the current *actual* minimum and/or maximum
5642  *		of the specified variable, by looking for a suitable btree index
5643  *		and fetching its low and/or high values.
5644  *		If successful, store values in *min and *max, and return true.
5645  *		(Either pointer can be NULL if that endpoint isn't needed.)
5646  *		If no data available, return false.
5647  *
5648  * sortop is the "<" comparison operator to use.
5649  */
5650 static bool
5651 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5652 						  Oid sortop,
5653 						  Datum *min, Datum *max)
5654 {
5655 	bool		have_data = false;
5656 	RelOptInfo *rel = vardata->rel;
5657 	RangeTblEntry *rte;
5658 	ListCell   *lc;
5659 
5660 	/* No hope if no relation or it doesn't have indexes */
5661 	if (rel == NULL || rel->indexlist == NIL)
5662 		return false;
5663 	/* If it has indexes it must be a plain relation */
5664 	rte = root->simple_rte_array[rel->relid];
5665 	Assert(rte->rtekind == RTE_RELATION);
5666 
5667 	/* Search through the indexes to see if any match our problem */
5668 	foreach(lc, rel->indexlist)
5669 	{
5670 		IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5671 		ScanDirection indexscandir;
5672 
5673 		/* Ignore non-btree indexes */
5674 		if (index->relam != BTREE_AM_OID)
5675 			continue;
5676 
5677 		/*
5678 		 * Ignore partial indexes --- we only want stats that cover the entire
5679 		 * relation.
5680 		 */
5681 		if (index->indpred != NIL)
5682 			continue;
5683 
5684 		/*
5685 		 * The index list might include hypothetical indexes inserted by a
5686 		 * get_relation_info hook --- don't try to access them.
5687 		 */
5688 		if (index->hypothetical)
5689 			continue;
5690 
5691 		/*
5692 		 * The first index column must match the desired variable and sort
5693 		 * operator --- but we can use a descending-order index.
5694 		 */
5695 		if (!match_index_to_operand(vardata->var, 0, index))
5696 			continue;
5697 		switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5698 		{
5699 			case BTLessStrategyNumber:
5700 				if (index->reverse_sort[0])
5701 					indexscandir = BackwardScanDirection;
5702 				else
5703 					indexscandir = ForwardScanDirection;
5704 				break;
5705 			case BTGreaterStrategyNumber:
5706 				if (index->reverse_sort[0])
5707 					indexscandir = ForwardScanDirection;
5708 				else
5709 					indexscandir = BackwardScanDirection;
5710 				break;
5711 			default:
5712 				/* index doesn't match the sortop */
5713 				continue;
5714 		}
5715 
5716 		/*
5717 		 * Found a suitable index to extract data from.  Set up some data that
5718 		 * can be used by both invocations of get_actual_variable_endpoint.
5719 		 */
5720 		{
5721 			MemoryContext tmpcontext;
5722 			MemoryContext oldcontext;
5723 			Relation	heapRel;
5724 			Relation	indexRel;
5725 			int16		typLen;
5726 			bool		typByVal;
5727 			ScanKeyData scankeys[1];
5728 
5729 			/* Make sure any cruft gets recycled when we're done */
5730 			tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
5731 											   "get_actual_variable_range workspace",
5732 											   ALLOCSET_DEFAULT_SIZES);
5733 			oldcontext = MemoryContextSwitchTo(tmpcontext);
5734 
5735 			/*
5736 			 * Open the table and index so we can read from them.  We should
5737 			 * already have at least AccessShareLock on the table, but not
5738 			 * necessarily on the index.
5739 			 */
5740 			heapRel = heap_open(rte->relid, NoLock);
5741 			indexRel = index_open(index->indexoid, AccessShareLock);
5742 
5743 			/* build some stuff needed for indexscan execution */
5744 			get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5745 
5746 			/* set up an IS NOT NULL scan key so that we ignore nulls */
5747 			ScanKeyEntryInitialize(&scankeys[0],
5748 								   SK_ISNULL | SK_SEARCHNOTNULL,
5749 								   1,	/* index col to scan */
5750 								   InvalidStrategy, /* no strategy */
5751 								   InvalidOid,	/* no strategy subtype */
5752 								   InvalidOid,	/* no collation */
5753 								   InvalidOid,	/* no reg proc for this */
5754 								   (Datum) 0);	/* constant */
5755 
5756 			/* If min is requested ... */
5757 			if (min)
5758 			{
5759 				have_data = get_actual_variable_endpoint(heapRel,
5760 														 indexRel,
5761 														 indexscandir,
5762 														 scankeys,
5763 														 typLen,
5764 														 typByVal,
5765 														 oldcontext,
5766 														 min);
5767 			}
5768 			else
5769 			{
5770 				/* If min not requested, assume index is nonempty */
5771 				have_data = true;
5772 			}
5773 
5774 			/* If max is requested, and we didn't find the index is empty */
5775 			if (max && have_data)
5776 			{
5777 				/* scan in the opposite direction; all else is the same */
5778 				have_data = get_actual_variable_endpoint(heapRel,
5779 														 indexRel,
5780 														 -indexscandir,
5781 														 scankeys,
5782 														 typLen,
5783 														 typByVal,
5784 														 oldcontext,
5785 														 max);
5786 			}
5787 
5788 			/* Clean everything up */
5789 			index_close(indexRel, AccessShareLock);
5790 			heap_close(heapRel, NoLock);
5791 
5792 			MemoryContextSwitchTo(oldcontext);
5793 			MemoryContextDelete(tmpcontext);
5794 
5795 			/* And we're done */
5796 			break;
5797 		}
5798 	}
5799 
5800 	return have_data;
5801 }
5802 
5803 /*
5804  * Get one endpoint datum (min or max depending on indexscandir) from the
5805  * specified index.  Return true if successful, false if index is empty.
5806  * On success, endpoint value is stored to *endpointDatum (and copied into
5807  * outercontext).
5808  *
5809  * scankeys is a 1-element scankey array set up to reject nulls.
5810  * typLen/typByVal describe the datatype of the index's first column.
5811  * (We could compute these values locally, but that would mean computing them
5812  * twice when get_actual_variable_range needs both the min and the max.)
5813  */
5814 static bool
5815 get_actual_variable_endpoint(Relation heapRel,
5816 							 Relation indexRel,
5817 							 ScanDirection indexscandir,
5818 							 ScanKey scankeys,
5819 							 int16 typLen,
5820 							 bool typByVal,
5821 							 MemoryContext outercontext,
5822 							 Datum *endpointDatum)
5823 {
5824 	bool		have_data = false;
5825 	SnapshotData SnapshotNonVacuumable;
5826 	IndexScanDesc index_scan;
5827 	Buffer		vmbuffer = InvalidBuffer;
5828 	ItemPointer tid;
5829 	Datum		values[INDEX_MAX_KEYS];
5830 	bool		isnull[INDEX_MAX_KEYS];
5831 	MemoryContext oldcontext;
5832 
5833 	/*
5834 	 * We use the index-only-scan machinery for this.  With mostly-static
5835 	 * tables that's a win because it avoids a heap visit.  It's also a win
5836 	 * for dynamic data, but the reason is less obvious; read on for details.
5837 	 *
5838 	 * In principle, we should scan the index with our current active
5839 	 * snapshot, which is the best approximation we've got to what the query
5840 	 * will see when executed.  But that won't be exact if a new snap is taken
5841 	 * before running the query, and it can be very expensive if a lot of
5842 	 * recently-dead or uncommitted rows exist at the beginning or end of the
5843 	 * index (because we'll laboriously fetch each one and reject it).
5844 	 * Instead, we use SnapshotNonVacuumable.  That will accept recently-dead
5845 	 * and uncommitted rows as well as normal visible rows.  On the other
5846 	 * hand, it will reject known-dead rows, and thus not give a bogus answer
5847 	 * when the extreme value has been deleted (unless the deletion was quite
5848 	 * recent); that case motivates not using SnapshotAny here.
5849 	 *
5850 	 * A crucial point here is that SnapshotNonVacuumable, with
5851 	 * RecentGlobalXmin as horizon, yields the inverse of the condition that
5852 	 * the indexscan will use to decide that index entries are killable (see
5853 	 * heap_hot_search_buffer()).  Therefore, if the snapshot rejects a tuple
5854 	 * (or more precisely, all tuples of a HOT chain) and we have to continue
5855 	 * scanning past it, we know that the indexscan will mark that index entry
5856 	 * killed.  That means that the next get_actual_variable_endpoint() call
5857 	 * will not have to re-consider that index entry.  In this way we avoid
5858 	 * repetitive work when this function is used a lot during planning.
5859 	 *
5860 	 * But using SnapshotNonVacuumable creates a hazard of its own.  In a
5861 	 * recently-created index, some index entries may point at "broken" HOT
5862 	 * chains in which not all the tuple versions contain data matching the
5863 	 * index entry.  The live tuple version(s) certainly do match the index,
5864 	 * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5865 	 * don't match.  Hence, if we took data from the selected heap tuple, we
5866 	 * might get a bogus answer that's not close to the index extremal value,
5867 	 * or could even be NULL.  We avoid this hazard because we take the data
5868 	 * from the index entry not the heap.
5869 	 */
5870 	InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
5871 
5872 	index_scan = index_beginscan(heapRel, indexRel,
5873 								 &SnapshotNonVacuumable,
5874 								 1, 0);
5875 	/* Set it up for index-only scan */
5876 	index_scan->xs_want_itup = true;
5877 	index_rescan(index_scan, scankeys, 1, NULL, 0);
5878 
5879 	/* Fetch first/next tuple in specified direction */
5880 	while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
5881 	{
5882 		if (!VM_ALL_VISIBLE(heapRel,
5883 							ItemPointerGetBlockNumber(tid),
5884 							&vmbuffer))
5885 		{
5886 			/* Rats, we have to visit the heap to check visibility */
5887 			if (index_fetch_heap(index_scan) == NULL)
5888 				continue;		/* no visible tuple, try next index entry */
5889 
5890 			/*
5891 			 * We don't care whether there's more than one visible tuple in
5892 			 * the HOT chain; if any are visible, that's good enough.
5893 			 */
5894 		}
5895 
5896 		/*
5897 		 * We expect that btree will return data in IndexTuple not HeapTuple
5898 		 * format.  It's not lossy either.
5899 		 */
5900 		if (!index_scan->xs_itup)
5901 			elog(ERROR, "no data returned for index-only scan");
5902 		if (index_scan->xs_recheck)
5903 			elog(ERROR, "unexpected recheck indication from btree");
5904 
5905 		/* OK to deconstruct the index tuple */
5906 		index_deform_tuple(index_scan->xs_itup,
5907 						   index_scan->xs_itupdesc,
5908 						   values, isnull);
5909 
5910 		/* Shouldn't have got a null, but be careful */
5911 		if (isnull[0])
5912 			elog(ERROR, "found unexpected null value in index \"%s\"",
5913 				 RelationGetRelationName(indexRel));
5914 
5915 		/* Copy the index column value out to caller's context */
5916 		oldcontext = MemoryContextSwitchTo(outercontext);
5917 		*endpointDatum = datumCopy(values[0], typByVal, typLen);
5918 		MemoryContextSwitchTo(oldcontext);
5919 		have_data = true;
5920 		break;
5921 	}
5922 
5923 	if (vmbuffer != InvalidBuffer)
5924 		ReleaseBuffer(vmbuffer);
5925 	index_endscan(index_scan);
5926 
5927 	return have_data;
5928 }
5929 
5930 /*
5931  * find_join_input_rel
5932  *		Look up the input relation for a join.
5933  *
5934  * We assume that the input relation's RelOptInfo must have been constructed
5935  * already.
5936  */
5937 static RelOptInfo *
5938 find_join_input_rel(PlannerInfo *root, Relids relids)
5939 {
5940 	RelOptInfo *rel = NULL;
5941 
5942 	switch (bms_membership(relids))
5943 	{
5944 		case BMS_EMPTY_SET:
5945 			/* should not happen */
5946 			break;
5947 		case BMS_SINGLETON:
5948 			rel = find_base_rel(root, bms_singleton_member(relids));
5949 			break;
5950 		case BMS_MULTIPLE:
5951 			rel = find_join_rel(root, relids);
5952 			break;
5953 	}
5954 
5955 	if (rel == NULL)
5956 		elog(ERROR, "could not find RelOptInfo for given relids");
5957 
5958 	return rel;
5959 }
5960 
5961 
5962 /*-------------------------------------------------------------------------
5963  *
5964  * Pattern analysis functions
5965  *
5966  * These routines support analysis of LIKE and regular-expression patterns
5967  * by the planner/optimizer.  It's important that they agree with the
5968  * regular-expression code in backend/regex/ and the LIKE code in
5969  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
5970  * must be conservative: if we report a string longer than the true fixed
5971  * prefix, the query may produce actually wrong answers, rather than just
5972  * getting a bad selectivity estimate!
5973  *
5974  * Note that the prefix-analysis functions are called from
5975  * backend/optimizer/path/indxpath.c as well as from routines in this file.
5976  *
5977  *-------------------------------------------------------------------------
5978  */
5979 
5980 /*
5981  * Check whether char is a letter (and, hence, subject to case-folding)
5982  *
5983  * In multibyte character sets or with ICU, we can't use isalpha, and it does
5984  * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
5985  * Instead, just assume any non-ASCII char is potentially case-varying, and
5986  * hard-wire knowledge of which ASCII chars are letters.
5987  */
5988 static int
5989 pattern_char_isalpha(char c, bool is_multibyte,
5990 					 pg_locale_t locale, bool locale_is_c)
5991 {
5992 	if (locale_is_c)
5993 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
5994 	else if (is_multibyte && IS_HIGHBIT_SET(c))
5995 		return true;
5996 	else if (locale && locale->provider == COLLPROVIDER_ICU)
5997 		return IS_HIGHBIT_SET(c) ||
5998 			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
5999 #ifdef HAVE_LOCALE_T
6000 	else if (locale && locale->provider == COLLPROVIDER_LIBC)
6001 		return isalpha_l((unsigned char) c, locale->info.lt);
6002 #endif
6003 	else
6004 		return isalpha((unsigned char) c);
6005 }
6006 
6007 /*
6008  * Extract the fixed prefix, if any, for a pattern.
6009  *
6010  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
6011  *	or to NULL if no fixed prefix exists for the pattern.
6012  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
6013  *	selectivity of the remainder of the pattern (without any fixed prefix).
6014  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
6015  *
6016  * The return value distinguishes no fixed prefix, a partial prefix,
6017  * or an exact-match-only pattern.
6018  */
6019 
6020 static Pattern_Prefix_Status
6021 like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
6022 				  Const **prefix_const, Selectivity *rest_selec)
6023 {
6024 	char	   *match;
6025 	char	   *patt;
6026 	int			pattlen;
6027 	Oid			typeid = patt_const->consttype;
6028 	int			pos,
6029 				match_pos;
6030 	bool		is_multibyte = (pg_database_encoding_max_length() > 1);
6031 	pg_locale_t locale = 0;
6032 	bool		locale_is_c = false;
6033 
6034 	/* the right-hand const is type text or bytea */
6035 	Assert(typeid == BYTEAOID || typeid == TEXTOID);
6036 
6037 	if (case_insensitive)
6038 	{
6039 		if (typeid == BYTEAOID)
6040 			ereport(ERROR,
6041 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6042 					 errmsg("case insensitive matching not supported on type bytea")));
6043 
6044 		/* If case-insensitive, we need locale info */
6045 		if (lc_ctype_is_c(collation))
6046 			locale_is_c = true;
6047 		else if (collation != DEFAULT_COLLATION_OID)
6048 		{
6049 			if (!OidIsValid(collation))
6050 			{
6051 				/*
6052 				 * This typically means that the parser could not resolve a
6053 				 * conflict of implicit collations, so report it that way.
6054 				 */
6055 				ereport(ERROR,
6056 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
6057 						 errmsg("could not determine which collation to use for ILIKE"),
6058 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
6059 			}
6060 			locale = pg_newlocale_from_collation(collation);
6061 		}
6062 	}
6063 
6064 	if (typeid != BYTEAOID)
6065 	{
6066 		patt = TextDatumGetCString(patt_const->constvalue);
6067 		pattlen = strlen(patt);
6068 	}
6069 	else
6070 	{
6071 		bytea	   *bstr = DatumGetByteaPP(patt_const->constvalue);
6072 
6073 		pattlen = VARSIZE_ANY_EXHDR(bstr);
6074 		patt = (char *) palloc(pattlen);
6075 		memcpy(patt, VARDATA_ANY(bstr), pattlen);
6076 		Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
6077 	}
6078 
6079 	match = palloc(pattlen + 1);
6080 	match_pos = 0;
6081 	for (pos = 0; pos < pattlen; pos++)
6082 	{
6083 		/* % and _ are wildcard characters in LIKE */
6084 		if (patt[pos] == '%' ||
6085 			patt[pos] == '_')
6086 			break;
6087 
6088 		/* Backslash escapes the next character */
6089 		if (patt[pos] == '\\')
6090 		{
6091 			pos++;
6092 			if (pos >= pattlen)
6093 				break;
6094 		}
6095 
6096 		/* Stop if case-varying character (it's sort of a wildcard) */
6097 		if (case_insensitive &&
6098 			pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
6099 			break;
6100 
6101 		match[match_pos++] = patt[pos];
6102 	}
6103 
6104 	match[match_pos] = '\0';
6105 
6106 	if (typeid != BYTEAOID)
6107 		*prefix_const = string_to_const(match, typeid);
6108 	else
6109 		*prefix_const = string_to_bytea_const(match, match_pos);
6110 
6111 	if (rest_selec != NULL)
6112 		*rest_selec = like_selectivity(&patt[pos], pattlen - pos,
6113 									   case_insensitive);
6114 
6115 	pfree(patt);
6116 	pfree(match);
6117 
6118 	/* in LIKE, an empty pattern is an exact match! */
6119 	if (pos == pattlen)
6120 		return Pattern_Prefix_Exact;	/* reached end of pattern, so exact */
6121 
6122 	if (match_pos > 0)
6123 		return Pattern_Prefix_Partial;
6124 
6125 	return Pattern_Prefix_None;
6126 }
6127 
6128 static Pattern_Prefix_Status
6129 regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
6130 				   Const **prefix_const, Selectivity *rest_selec)
6131 {
6132 	Oid			typeid = patt_const->consttype;
6133 	char	   *prefix;
6134 	bool		exact;
6135 
6136 	/*
6137 	 * Should be unnecessary, there are no bytea regex operators defined. As
6138 	 * such, it should be noted that the rest of this function has *not* been
6139 	 * made safe for binary (possibly NULL containing) strings.
6140 	 */
6141 	if (typeid == BYTEAOID)
6142 		ereport(ERROR,
6143 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
6144 				 errmsg("regular-expression matching not supported on type bytea")));
6145 
6146 	/* Use the regexp machinery to extract the prefix, if any */
6147 	prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
6148 								 case_insensitive, collation,
6149 								 &exact);
6150 
6151 	if (prefix == NULL)
6152 	{
6153 		*prefix_const = NULL;
6154 
6155 		if (rest_selec != NULL)
6156 		{
6157 			char	   *patt = TextDatumGetCString(patt_const->constvalue);
6158 
6159 			*rest_selec = regex_selectivity(patt, strlen(patt),
6160 											case_insensitive,
6161 											0);
6162 			pfree(patt);
6163 		}
6164 
6165 		return Pattern_Prefix_None;
6166 	}
6167 
6168 	*prefix_const = string_to_const(prefix, typeid);
6169 
6170 	if (rest_selec != NULL)
6171 	{
6172 		if (exact)
6173 		{
6174 			/* Exact match, so there's no additional selectivity */
6175 			*rest_selec = 1.0;
6176 		}
6177 		else
6178 		{
6179 			char	   *patt = TextDatumGetCString(patt_const->constvalue);
6180 
6181 			*rest_selec = regex_selectivity(patt, strlen(patt),
6182 											case_insensitive,
6183 											strlen(prefix));
6184 			pfree(patt);
6185 		}
6186 	}
6187 
6188 	pfree(prefix);
6189 
6190 	if (exact)
6191 		return Pattern_Prefix_Exact;	/* pattern specifies exact match */
6192 	else
6193 		return Pattern_Prefix_Partial;
6194 }
6195 
6196 Pattern_Prefix_Status
6197 pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
6198 					 Const **prefix, Selectivity *rest_selec)
6199 {
6200 	Pattern_Prefix_Status result;
6201 
6202 	switch (ptype)
6203 	{
6204 		case Pattern_Type_Like:
6205 			result = like_fixed_prefix(patt, false, collation,
6206 									   prefix, rest_selec);
6207 			break;
6208 		case Pattern_Type_Like_IC:
6209 			result = like_fixed_prefix(patt, true, collation,
6210 									   prefix, rest_selec);
6211 			break;
6212 		case Pattern_Type_Regex:
6213 			result = regex_fixed_prefix(patt, false, collation,
6214 										prefix, rest_selec);
6215 			break;
6216 		case Pattern_Type_Regex_IC:
6217 			result = regex_fixed_prefix(patt, true, collation,
6218 										prefix, rest_selec);
6219 			break;
6220 		case Pattern_Type_Prefix:
6221 			/* Prefix type work is trivial.  */
6222 			result = Pattern_Prefix_Partial;
6223 			*rest_selec = 1.0;	/* all */
6224 			*prefix = makeConst(patt->consttype,
6225 								patt->consttypmod,
6226 								patt->constcollid,
6227 								patt->constlen,
6228 								datumCopy(patt->constvalue,
6229 										  patt->constbyval,
6230 										  patt->constlen),
6231 								patt->constisnull,
6232 								patt->constbyval);
6233 			break;
6234 		default:
6235 			elog(ERROR, "unrecognized ptype: %d", (int) ptype);
6236 			result = Pattern_Prefix_None;	/* keep compiler quiet */
6237 			break;
6238 	}
6239 	return result;
6240 }
6241 
6242 /*
6243  * Estimate the selectivity of a fixed prefix for a pattern match.
6244  *
6245  * A fixed prefix "foo" is estimated as the selectivity of the expression
6246  * "variable >= 'foo' AND variable < 'fop'" (see also indxpath.c).
6247  *
6248  * The selectivity estimate is with respect to the portion of the column
6249  * population represented by the histogram --- the caller must fold this
6250  * together with info about MCVs and NULLs.
6251  *
6252  * We use the >= and < operators from the specified btree opfamily to do the
6253  * estimation.  The given variable and Const must be of the associated
6254  * datatype.
6255  *
6256  * XXX Note: we make use of the upper bound to estimate operator selectivity
6257  * even if the locale is such that we cannot rely on the upper-bound string.
6258  * The selectivity only needs to be approximately right anyway, so it seems
6259  * more useful to use the upper-bound code than not.
6260  */
6261 static Selectivity
6262 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
6263 				   Oid vartype, Oid opfamily, Const *prefixcon)
6264 {
6265 	Selectivity prefixsel;
6266 	Oid			cmpopr;
6267 	FmgrInfo	opproc;
6268 	Const	   *greaterstrcon;
6269 	Selectivity eq_sel;
6270 
6271 	cmpopr = get_opfamily_member(opfamily, vartype, vartype,
6272 								 BTGreaterEqualStrategyNumber);
6273 	if (cmpopr == InvalidOid)
6274 		elog(ERROR, "no >= operator for opfamily %u", opfamily);
6275 	fmgr_info(get_opcode(cmpopr), &opproc);
6276 
6277 	prefixsel = ineq_histogram_selectivity(root, vardata,
6278 										   &opproc, true, true,
6279 										   prefixcon->constvalue,
6280 										   prefixcon->consttype);
6281 
6282 	if (prefixsel < 0.0)
6283 	{
6284 		/* No histogram is present ... return a suitable default estimate */
6285 		return DEFAULT_MATCH_SEL;
6286 	}
6287 
6288 	/*-------
6289 	 * If we can create a string larger than the prefix, say
6290 	 *	"x < greaterstr".
6291 	 *-------
6292 	 */
6293 	cmpopr = get_opfamily_member(opfamily, vartype, vartype,
6294 								 BTLessStrategyNumber);
6295 	if (cmpopr == InvalidOid)
6296 		elog(ERROR, "no < operator for opfamily %u", opfamily);
6297 	fmgr_info(get_opcode(cmpopr), &opproc);
6298 	greaterstrcon = make_greater_string(prefixcon, &opproc,
6299 										DEFAULT_COLLATION_OID);
6300 	if (greaterstrcon)
6301 	{
6302 		Selectivity topsel;
6303 
6304 		topsel = ineq_histogram_selectivity(root, vardata,
6305 											&opproc, false, false,
6306 											greaterstrcon->constvalue,
6307 											greaterstrcon->consttype);
6308 
6309 		/* ineq_histogram_selectivity worked before, it shouldn't fail now */
6310 		Assert(topsel >= 0.0);
6311 
6312 		/*
6313 		 * Merge the two selectivities in the same way as for a range query
6314 		 * (see clauselist_selectivity()).  Note that we don't need to worry
6315 		 * about double-exclusion of nulls, since ineq_histogram_selectivity
6316 		 * doesn't count those anyway.
6317 		 */
6318 		prefixsel = topsel + prefixsel - 1.0;
6319 	}
6320 
6321 	/*
6322 	 * If the prefix is long then the two bounding values might be too close
6323 	 * together for the histogram to distinguish them usefully, resulting in a
6324 	 * zero estimate (plus or minus roundoff error). To avoid returning a
6325 	 * ridiculously small estimate, compute the estimated selectivity for
6326 	 * "variable = 'foo'", and clamp to that. (Obviously, the resultant
6327 	 * estimate should be at least that.)
6328 	 *
6329 	 * We apply this even if we couldn't make a greater string.  That case
6330 	 * suggests that the prefix is near the maximum possible, and thus
6331 	 * probably off the end of the histogram, and thus we probably got a very
6332 	 * small estimate from the >= condition; so we still need to clamp.
6333 	 */
6334 	cmpopr = get_opfamily_member(opfamily, vartype, vartype,
6335 								 BTEqualStrategyNumber);
6336 	if (cmpopr == InvalidOid)
6337 		elog(ERROR, "no = operator for opfamily %u", opfamily);
6338 	eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
6339 						  false, true, false);
6340 
6341 	prefixsel = Max(prefixsel, eq_sel);
6342 
6343 	return prefixsel;
6344 }
6345 
6346 
6347 /*
6348  * Estimate the selectivity of a pattern of the specified type.
6349  * Note that any fixed prefix of the pattern will have been removed already,
6350  * so actually we may be looking at just a fragment of the pattern.
6351  *
6352  * For now, we use a very simplistic approach: fixed characters reduce the
6353  * selectivity a good deal, character ranges reduce it a little,
6354  * wildcards (such as % for LIKE or .* for regex) increase it.
6355  */
6356 
6357 #define FIXED_CHAR_SEL	0.20	/* about 1/5 */
6358 #define CHAR_RANGE_SEL	0.25
6359 #define ANY_CHAR_SEL	0.9		/* not 1, since it won't match end-of-string */
6360 #define FULL_WILDCARD_SEL 5.0
6361 #define PARTIAL_WILDCARD_SEL 2.0
6362 
6363 static Selectivity
6364 like_selectivity(const char *patt, int pattlen, bool case_insensitive)
6365 {
6366 	Selectivity sel = 1.0;
6367 	int			pos;
6368 
6369 	/* Skip any leading wildcard; it's already factored into initial sel */
6370 	for (pos = 0; pos < pattlen; pos++)
6371 	{
6372 		if (patt[pos] != '%' && patt[pos] != '_')
6373 			break;
6374 	}
6375 
6376 	for (; pos < pattlen; pos++)
6377 	{
6378 		/* % and _ are wildcard characters in LIKE */
6379 		if (patt[pos] == '%')
6380 			sel *= FULL_WILDCARD_SEL;
6381 		else if (patt[pos] == '_')
6382 			sel *= ANY_CHAR_SEL;
6383 		else if (patt[pos] == '\\')
6384 		{
6385 			/* Backslash quotes the next character */
6386 			pos++;
6387 			if (pos >= pattlen)
6388 				break;
6389 			sel *= FIXED_CHAR_SEL;
6390 		}
6391 		else
6392 			sel *= FIXED_CHAR_SEL;
6393 	}
6394 	/* Could get sel > 1 if multiple wildcards */
6395 	if (sel > 1.0)
6396 		sel = 1.0;
6397 	return sel;
6398 }
6399 
6400 static Selectivity
6401 regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
6402 {
6403 	Selectivity sel = 1.0;
6404 	int			paren_depth = 0;
6405 	int			paren_pos = 0;	/* dummy init to keep compiler quiet */
6406 	int			pos;
6407 
6408 	for (pos = 0; pos < pattlen; pos++)
6409 	{
6410 		if (patt[pos] == '(')
6411 		{
6412 			if (paren_depth == 0)
6413 				paren_pos = pos;	/* remember start of parenthesized item */
6414 			paren_depth++;
6415 		}
6416 		else if (patt[pos] == ')' && paren_depth > 0)
6417 		{
6418 			paren_depth--;
6419 			if (paren_depth == 0)
6420 				sel *= regex_selectivity_sub(patt + (paren_pos + 1),
6421 											 pos - (paren_pos + 1),
6422 											 case_insensitive);
6423 		}
6424 		else if (patt[pos] == '|' && paren_depth == 0)
6425 		{
6426 			/*
6427 			 * If unquoted | is present at paren level 0 in pattern, we have
6428 			 * multiple alternatives; sum their probabilities.
6429 			 */
6430 			sel += regex_selectivity_sub(patt + (pos + 1),
6431 										 pattlen - (pos + 1),
6432 										 case_insensitive);
6433 			break;				/* rest of pattern is now processed */
6434 		}
6435 		else if (patt[pos] == '[')
6436 		{
6437 			bool		negclass = false;
6438 
6439 			if (patt[++pos] == '^')
6440 			{
6441 				negclass = true;
6442 				pos++;
6443 			}
6444 			if (patt[pos] == ']')	/* ']' at start of class is not special */
6445 				pos++;
6446 			while (pos < pattlen && patt[pos] != ']')
6447 				pos++;
6448 			if (paren_depth == 0)
6449 				sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
6450 		}
6451 		else if (patt[pos] == '.')
6452 		{
6453 			if (paren_depth == 0)
6454 				sel *= ANY_CHAR_SEL;
6455 		}
6456 		else if (patt[pos] == '*' ||
6457 				 patt[pos] == '?' ||
6458 				 patt[pos] == '+')
6459 		{
6460 			/* Ought to be smarter about quantifiers... */
6461 			if (paren_depth == 0)
6462 				sel *= PARTIAL_WILDCARD_SEL;
6463 		}
6464 		else if (patt[pos] == '{')
6465 		{
6466 			while (pos < pattlen && patt[pos] != '}')
6467 				pos++;
6468 			if (paren_depth == 0)
6469 				sel *= PARTIAL_WILDCARD_SEL;
6470 		}
6471 		else if (patt[pos] == '\\')
6472 		{
6473 			/* backslash quotes the next character */
6474 			pos++;
6475 			if (pos >= pattlen)
6476 				break;
6477 			if (paren_depth == 0)
6478 				sel *= FIXED_CHAR_SEL;
6479 		}
6480 		else
6481 		{
6482 			if (paren_depth == 0)
6483 				sel *= FIXED_CHAR_SEL;
6484 		}
6485 	}
6486 	/* Could get sel > 1 if multiple wildcards */
6487 	if (sel > 1.0)
6488 		sel = 1.0;
6489 	return sel;
6490 }
6491 
6492 static Selectivity
6493 regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
6494 				  int fixed_prefix_len)
6495 {
6496 	Selectivity sel;
6497 
6498 	/* If patt doesn't end with $, consider it to have a trailing wildcard */
6499 	if (pattlen > 0 && patt[pattlen - 1] == '$' &&
6500 		(pattlen == 1 || patt[pattlen - 2] != '\\'))
6501 	{
6502 		/* has trailing $ */
6503 		sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
6504 	}
6505 	else
6506 	{
6507 		/* no trailing $ */
6508 		sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
6509 		sel *= FULL_WILDCARD_SEL;
6510 	}
6511 
6512 	/*
6513 	 * If there's a fixed prefix, discount its selectivity.  We have to be
6514 	 * careful here since a very long prefix could result in pow's result
6515 	 * underflowing to zero (in which case "sel" probably has as well).
6516 	 */
6517 	if (fixed_prefix_len > 0)
6518 	{
6519 		double		prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
6520 
6521 		if (prefixsel > 0.0)
6522 			sel /= prefixsel;
6523 	}
6524 
6525 	/* Make sure result stays in range */
6526 	CLAMP_PROBABILITY(sel);
6527 	return sel;
6528 }
6529 
6530 
6531 /*
6532  * For bytea, the increment function need only increment the current byte
6533  * (there are no multibyte characters to worry about).
6534  */
6535 static bool
6536 byte_increment(unsigned char *ptr, int len)
6537 {
6538 	if (*ptr >= 255)
6539 		return false;
6540 	(*ptr)++;
6541 	return true;
6542 }
6543 
6544 /*
6545  * Try to generate a string greater than the given string or any
6546  * string it is a prefix of.  If successful, return a palloc'd string
6547  * in the form of a Const node; else return NULL.
6548  *
6549  * The caller must provide the appropriate "less than" comparison function
6550  * for testing the strings, along with the collation to use.
6551  *
6552  * The key requirement here is that given a prefix string, say "foo",
6553  * we must be able to generate another string "fop" that is greater than
6554  * all strings "foobar" starting with "foo".  We can test that we have
6555  * generated a string greater than the prefix string, but in non-C collations
6556  * that is not a bulletproof guarantee that an extension of the string might
6557  * not sort after it; an example is that "foo " is less than "foo!", but it
6558  * is not clear that a "dictionary" sort ordering will consider "foo!" less
6559  * than "foo bar".  CAUTION: Therefore, this function should be used only for
6560  * estimation purposes when working in a non-C collation.
6561  *
6562  * To try to catch most cases where an extended string might otherwise sort
6563  * before the result value, we determine which of the strings "Z", "z", "y",
6564  * and "9" is seen as largest by the collation, and append that to the given
6565  * prefix before trying to find a string that compares as larger.
6566  *
6567  * To search for a greater string, we repeatedly "increment" the rightmost
6568  * character, using an encoding-specific character incrementer function.
6569  * When it's no longer possible to increment the last character, we truncate
6570  * off that character and start incrementing the next-to-rightmost.
6571  * For example, if "z" were the last character in the sort order, then we
6572  * could produce "foo" as a string greater than "fonz".
6573  *
6574  * This could be rather slow in the worst case, but in most cases we
6575  * won't have to try more than one or two strings before succeeding.
6576  *
6577  * Note that it's important for the character incrementer not to be too anal
6578  * about producing every possible character code, since in some cases the only
6579  * way to get a larger string is to increment a previous character position.
6580  * So we don't want to spend too much time trying every possible character
6581  * code at the last position.  A good rule of thumb is to be sure that we
6582  * don't try more than 256*K values for a K-byte character (and definitely
6583  * not 256^K, which is what an exhaustive search would approach).
6584  */
6585 Const *
6586 make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
6587 {
6588 	Oid			datatype = str_const->consttype;
6589 	char	   *workstr;
6590 	int			len;
6591 	Datum		cmpstr;
6592 	text	   *cmptxt = NULL;
6593 	mbcharacter_incrementer charinc;
6594 
6595 	/*
6596 	 * Get a modifiable copy of the prefix string in C-string format, and set
6597 	 * up the string we will compare to as a Datum.  In C locale this can just
6598 	 * be the given prefix string, otherwise we need to add a suffix.  Types
6599 	 * NAME and BYTEA sort bytewise so they don't need a suffix either.
6600 	 */
6601 	if (datatype == NAMEOID)
6602 	{
6603 		workstr = DatumGetCString(DirectFunctionCall1(nameout,
6604 													  str_const->constvalue));
6605 		len = strlen(workstr);
6606 		cmpstr = str_const->constvalue;
6607 	}
6608 	else if (datatype == BYTEAOID)
6609 	{
6610 		bytea	   *bstr = DatumGetByteaPP(str_const->constvalue);
6611 
6612 		len = VARSIZE_ANY_EXHDR(bstr);
6613 		workstr = (char *) palloc(len);
6614 		memcpy(workstr, VARDATA_ANY(bstr), len);
6615 		Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
6616 		cmpstr = str_const->constvalue;
6617 	}
6618 	else
6619 	{
6620 		workstr = TextDatumGetCString(str_const->constvalue);
6621 		len = strlen(workstr);
6622 		if (lc_collate_is_c(collation) || len == 0)
6623 			cmpstr = str_const->constvalue;
6624 		else
6625 		{
6626 			/* If first time through, determine the suffix to use */
6627 			static char suffixchar = 0;
6628 			static Oid	suffixcollation = 0;
6629 
6630 			if (!suffixchar || suffixcollation != collation)
6631 			{
6632 				char	   *best;
6633 
6634 				best = "Z";
6635 				if (varstr_cmp(best, 1, "z", 1, collation) < 0)
6636 					best = "z";
6637 				if (varstr_cmp(best, 1, "y", 1, collation) < 0)
6638 					best = "y";
6639 				if (varstr_cmp(best, 1, "9", 1, collation) < 0)
6640 					best = "9";
6641 				suffixchar = *best;
6642 				suffixcollation = collation;
6643 			}
6644 
6645 			/* And build the string to compare to */
6646 			cmptxt = (text *) palloc(VARHDRSZ + len + 1);
6647 			SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
6648 			memcpy(VARDATA(cmptxt), workstr, len);
6649 			*(VARDATA(cmptxt) + len) = suffixchar;
6650 			cmpstr = PointerGetDatum(cmptxt);
6651 		}
6652 	}
6653 
6654 	/* Select appropriate character-incrementer function */
6655 	if (datatype == BYTEAOID)
6656 		charinc = byte_increment;
6657 	else
6658 		charinc = pg_database_encoding_character_incrementer();
6659 
6660 	/* And search ... */
6661 	while (len > 0)
6662 	{
6663 		int			charlen;
6664 		unsigned char *lastchar;
6665 
6666 		/* Identify the last character --- for bytea, just the last byte */
6667 		if (datatype == BYTEAOID)
6668 			charlen = 1;
6669 		else
6670 			charlen = len - pg_mbcliplen(workstr, len, len - 1);
6671 		lastchar = (unsigned char *) (workstr + len - charlen);
6672 
6673 		/*
6674 		 * Try to generate a larger string by incrementing the last character
6675 		 * (for BYTEA, we treat each byte as a character).
6676 		 *
6677 		 * Note: the incrementer function is expected to return true if it's
6678 		 * generated a valid-per-the-encoding new character, otherwise false.
6679 		 * The contents of the character on false return are unspecified.
6680 		 */
6681 		while (charinc(lastchar, charlen))
6682 		{
6683 			Const	   *workstr_const;
6684 
6685 			if (datatype == BYTEAOID)
6686 				workstr_const = string_to_bytea_const(workstr, len);
6687 			else
6688 				workstr_const = string_to_const(workstr, datatype);
6689 
6690 			if (DatumGetBool(FunctionCall2Coll(ltproc,
6691 											   collation,
6692 											   cmpstr,
6693 											   workstr_const->constvalue)))
6694 			{
6695 				/* Successfully made a string larger than cmpstr */
6696 				if (cmptxt)
6697 					pfree(cmptxt);
6698 				pfree(workstr);
6699 				return workstr_const;
6700 			}
6701 
6702 			/* No good, release unusable value and try again */
6703 			pfree(DatumGetPointer(workstr_const->constvalue));
6704 			pfree(workstr_const);
6705 		}
6706 
6707 		/*
6708 		 * No luck here, so truncate off the last character and try to
6709 		 * increment the next one.
6710 		 */
6711 		len -= charlen;
6712 		workstr[len] = '\0';
6713 	}
6714 
6715 	/* Failed... */
6716 	if (cmptxt)
6717 		pfree(cmptxt);
6718 	pfree(workstr);
6719 
6720 	return NULL;
6721 }
6722 
6723 /*
6724  * Generate a Datum of the appropriate type from a C string.
6725  * Note that all of the supported types are pass-by-ref, so the
6726  * returned value should be pfree'd if no longer needed.
6727  */
6728 static Datum
6729 string_to_datum(const char *str, Oid datatype)
6730 {
6731 	Assert(str != NULL);
6732 
6733 	/*
6734 	 * We cheat a little by assuming that CStringGetTextDatum() will do for
6735 	 * bpchar and varchar constants too...
6736 	 */
6737 	if (datatype == NAMEOID)
6738 		return DirectFunctionCall1(namein, CStringGetDatum(str));
6739 	else if (datatype == BYTEAOID)
6740 		return DirectFunctionCall1(byteain, CStringGetDatum(str));
6741 	else
6742 		return CStringGetTextDatum(str);
6743 }
6744 
6745 /*
6746  * Generate a Const node of the appropriate type from a C string.
6747  */
6748 static Const *
6749 string_to_const(const char *str, Oid datatype)
6750 {
6751 	Datum		conval = string_to_datum(str, datatype);
6752 	Oid			collation;
6753 	int			constlen;
6754 
6755 	/*
6756 	 * We only need to support a few datatypes here, so hard-wire properties
6757 	 * instead of incurring the expense of catalog lookups.
6758 	 */
6759 	switch (datatype)
6760 	{
6761 		case TEXTOID:
6762 		case VARCHAROID:
6763 		case BPCHAROID:
6764 			collation = DEFAULT_COLLATION_OID;
6765 			constlen = -1;
6766 			break;
6767 
6768 		case NAMEOID:
6769 			collation = InvalidOid;
6770 			constlen = NAMEDATALEN;
6771 			break;
6772 
6773 		case BYTEAOID:
6774 			collation = InvalidOid;
6775 			constlen = -1;
6776 			break;
6777 
6778 		default:
6779 			elog(ERROR, "unexpected datatype in string_to_const: %u",
6780 				 datatype);
6781 			return NULL;
6782 	}
6783 
6784 	return makeConst(datatype, -1, collation, constlen,
6785 					 conval, false, false);
6786 }
6787 
6788 /*
6789  * Generate a Const node of bytea type from a binary C string and a length.
6790  */
6791 static Const *
6792 string_to_bytea_const(const char *str, size_t str_len)
6793 {
6794 	bytea	   *bstr = palloc(VARHDRSZ + str_len);
6795 	Datum		conval;
6796 
6797 	memcpy(VARDATA(bstr), str, str_len);
6798 	SET_VARSIZE(bstr, VARHDRSZ + str_len);
6799 	conval = PointerGetDatum(bstr);
6800 
6801 	return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
6802 }
6803 
6804 /*-------------------------------------------------------------------------
6805  *
6806  * Index cost estimation functions
6807  *
6808  *-------------------------------------------------------------------------
6809  */
6810 
6811 List *
6812 deconstruct_indexquals(IndexPath *path)
6813 {
6814 	List	   *result = NIL;
6815 	IndexOptInfo *index = path->indexinfo;
6816 	ListCell   *lcc,
6817 			   *lci;
6818 
6819 	forboth(lcc, path->indexquals, lci, path->indexqualcols)
6820 	{
6821 		RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc);
6822 		int			indexcol = lfirst_int(lci);
6823 		Expr	   *clause;
6824 		Node	   *leftop,
6825 				   *rightop;
6826 		IndexQualInfo *qinfo;
6827 
6828 		clause = rinfo->clause;
6829 
6830 		qinfo = (IndexQualInfo *) palloc(sizeof(IndexQualInfo));
6831 		qinfo->rinfo = rinfo;
6832 		qinfo->indexcol = indexcol;
6833 
6834 		if (IsA(clause, OpExpr))
6835 		{
6836 			qinfo->clause_op = ((OpExpr *) clause)->opno;
6837 			leftop = get_leftop(clause);
6838 			rightop = get_rightop(clause);
6839 			if (match_index_to_operand(leftop, indexcol, index))
6840 			{
6841 				qinfo->varonleft = true;
6842 				qinfo->other_operand = rightop;
6843 			}
6844 			else
6845 			{
6846 				Assert(match_index_to_operand(rightop, indexcol, index));
6847 				qinfo->varonleft = false;
6848 				qinfo->other_operand = leftop;
6849 			}
6850 		}
6851 		else if (IsA(clause, RowCompareExpr))
6852 		{
6853 			RowCompareExpr *rc = (RowCompareExpr *) clause;
6854 
6855 			qinfo->clause_op = linitial_oid(rc->opnos);
6856 			/* Examine only first columns to determine left/right sides */
6857 			if (match_index_to_operand((Node *) linitial(rc->largs),
6858 									   indexcol, index))
6859 			{
6860 				qinfo->varonleft = true;
6861 				qinfo->other_operand = (Node *) rc->rargs;
6862 			}
6863 			else
6864 			{
6865 				Assert(match_index_to_operand((Node *) linitial(rc->rargs),
6866 											  indexcol, index));
6867 				qinfo->varonleft = false;
6868 				qinfo->other_operand = (Node *) rc->largs;
6869 			}
6870 		}
6871 		else if (IsA(clause, ScalarArrayOpExpr))
6872 		{
6873 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6874 
6875 			qinfo->clause_op = saop->opno;
6876 			/* index column is always on the left in this case */
6877 			Assert(match_index_to_operand((Node *) linitial(saop->args),
6878 										  indexcol, index));
6879 			qinfo->varonleft = true;
6880 			qinfo->other_operand = (Node *) lsecond(saop->args);
6881 		}
6882 		else if (IsA(clause, NullTest))
6883 		{
6884 			qinfo->clause_op = InvalidOid;
6885 			Assert(match_index_to_operand((Node *) ((NullTest *) clause)->arg,
6886 										  indexcol, index));
6887 			qinfo->varonleft = true;
6888 			qinfo->other_operand = NULL;
6889 		}
6890 		else
6891 		{
6892 			elog(ERROR, "unsupported indexqual type: %d",
6893 				 (int) nodeTag(clause));
6894 		}
6895 
6896 		result = lappend(result, qinfo);
6897 	}
6898 	return result;
6899 }
6900 
6901 /*
6902  * Simple function to compute the total eval cost of the "other operands"
6903  * in an IndexQualInfo list.  Since we know these will be evaluated just
6904  * once per scan, there's no need to distinguish startup from per-row cost.
6905  */
6906 static Cost
6907 other_operands_eval_cost(PlannerInfo *root, List *qinfos)
6908 {
6909 	Cost		qual_arg_cost = 0;
6910 	ListCell   *lc;
6911 
6912 	foreach(lc, qinfos)
6913 	{
6914 		IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
6915 		QualCost	index_qual_cost;
6916 
6917 		cost_qual_eval_node(&index_qual_cost, qinfo->other_operand, root);
6918 		qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6919 	}
6920 	return qual_arg_cost;
6921 }
6922 
6923 /*
6924  * Get other-operand eval cost for an index orderby list.
6925  *
6926  * Index orderby expressions aren't represented as RestrictInfos (since they
6927  * aren't boolean, usually).  So we can't apply deconstruct_indexquals to
6928  * them.  However, they are much simpler to deal with since they are always
6929  * OpExprs and the index column is always on the left.
6930  */
6931 static Cost
6932 orderby_operands_eval_cost(PlannerInfo *root, IndexPath *path)
6933 {
6934 	Cost		qual_arg_cost = 0;
6935 	ListCell   *lc;
6936 
6937 	foreach(lc, path->indexorderbys)
6938 	{
6939 		Expr	   *clause = (Expr *) lfirst(lc);
6940 		Node	   *other_operand;
6941 		QualCost	index_qual_cost;
6942 
6943 		if (IsA(clause, OpExpr))
6944 		{
6945 			other_operand = get_rightop(clause);
6946 		}
6947 		else
6948 		{
6949 			elog(ERROR, "unsupported indexorderby type: %d",
6950 				 (int) nodeTag(clause));
6951 			other_operand = NULL;	/* keep compiler quiet */
6952 		}
6953 
6954 		cost_qual_eval_node(&index_qual_cost, other_operand, root);
6955 		qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6956 	}
6957 	return qual_arg_cost;
6958 }
6959 
6960 void
6961 genericcostestimate(PlannerInfo *root,
6962 					IndexPath *path,
6963 					double loop_count,
6964 					List *qinfos,
6965 					GenericCosts *costs)
6966 {
6967 	IndexOptInfo *index = path->indexinfo;
6968 	List	   *indexQuals = path->indexquals;
6969 	List	   *indexOrderBys = path->indexorderbys;
6970 	Cost		indexStartupCost;
6971 	Cost		indexTotalCost;
6972 	Selectivity indexSelectivity;
6973 	double		indexCorrelation;
6974 	double		numIndexPages;
6975 	double		numIndexTuples;
6976 	double		spc_random_page_cost;
6977 	double		num_sa_scans;
6978 	double		num_outer_scans;
6979 	double		num_scans;
6980 	double		qual_op_cost;
6981 	double		qual_arg_cost;
6982 	List	   *selectivityQuals;
6983 	ListCell   *l;
6984 
6985 	/*
6986 	 * If the index is partial, AND the index predicate with the explicitly
6987 	 * given indexquals to produce a more accurate idea of the index
6988 	 * selectivity.
6989 	 */
6990 	selectivityQuals = add_predicate_to_quals(index, indexQuals);
6991 
6992 	/*
6993 	 * Check for ScalarArrayOpExpr index quals, and estimate the number of
6994 	 * index scans that will be performed.
6995 	 */
6996 	num_sa_scans = 1;
6997 	foreach(l, indexQuals)
6998 	{
6999 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
7000 
7001 		if (IsA(rinfo->clause, ScalarArrayOpExpr))
7002 		{
7003 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
7004 			int			alength = estimate_array_length(lsecond(saop->args));
7005 
7006 			if (alength > 1)
7007 				num_sa_scans *= alength;
7008 		}
7009 	}
7010 
7011 	/* Estimate the fraction of main-table tuples that will be visited */
7012 	indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7013 											  index->rel->relid,
7014 											  JOIN_INNER,
7015 											  NULL);
7016 
7017 	/*
7018 	 * If caller didn't give us an estimate, estimate the number of index
7019 	 * tuples that will be visited.  We do it in this rather peculiar-looking
7020 	 * way in order to get the right answer for partial indexes.
7021 	 */
7022 	numIndexTuples = costs->numIndexTuples;
7023 	if (numIndexTuples <= 0.0)
7024 	{
7025 		numIndexTuples = indexSelectivity * index->rel->tuples;
7026 
7027 		/*
7028 		 * The above calculation counts all the tuples visited across all
7029 		 * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
7030 		 * average per-indexscan number, so adjust.  This is a handy place to
7031 		 * round to integer, too.  (If caller supplied tuple estimate, it's
7032 		 * responsible for handling these considerations.)
7033 		 */
7034 		numIndexTuples = rint(numIndexTuples / num_sa_scans);
7035 	}
7036 
7037 	/*
7038 	 * We can bound the number of tuples by the index size in any case. Also,
7039 	 * always estimate at least one tuple is touched, even when
7040 	 * indexSelectivity estimate is tiny.
7041 	 */
7042 	if (numIndexTuples > index->tuples)
7043 		numIndexTuples = index->tuples;
7044 	if (numIndexTuples < 1.0)
7045 		numIndexTuples = 1.0;
7046 
7047 	/*
7048 	 * Estimate the number of index pages that will be retrieved.
7049 	 *
7050 	 * We use the simplistic method of taking a pro-rata fraction of the total
7051 	 * number of index pages.  In effect, this counts only leaf pages and not
7052 	 * any overhead such as index metapage or upper tree levels.
7053 	 *
7054 	 * In practice access to upper index levels is often nearly free because
7055 	 * those tend to stay in cache under load; moreover, the cost involved is
7056 	 * highly dependent on index type.  We therefore ignore such costs here
7057 	 * and leave it to the caller to add a suitable charge if needed.
7058 	 */
7059 	if (index->pages > 1 && index->tuples > 1)
7060 		numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
7061 	else
7062 		numIndexPages = 1.0;
7063 
7064 	/* fetch estimated page cost for tablespace containing index */
7065 	get_tablespace_page_costs(index->reltablespace,
7066 							  &spc_random_page_cost,
7067 							  NULL);
7068 
7069 	/*
7070 	 * Now compute the disk access costs.
7071 	 *
7072 	 * The above calculations are all per-index-scan.  However, if we are in a
7073 	 * nestloop inner scan, we can expect the scan to be repeated (with
7074 	 * different search keys) for each row of the outer relation.  Likewise,
7075 	 * ScalarArrayOpExpr quals result in multiple index scans.  This creates
7076 	 * the potential for cache effects to reduce the number of disk page
7077 	 * fetches needed.  We want to estimate the average per-scan I/O cost in
7078 	 * the presence of caching.
7079 	 *
7080 	 * We use the Mackert-Lohman formula (see costsize.c for details) to
7081 	 * estimate the total number of page fetches that occur.  While this
7082 	 * wasn't what it was designed for, it seems a reasonable model anyway.
7083 	 * Note that we are counting pages not tuples anymore, so we take N = T =
7084 	 * index size, as if there were one "tuple" per page.
7085 	 */
7086 	num_outer_scans = loop_count;
7087 	num_scans = num_sa_scans * num_outer_scans;
7088 
7089 	if (num_scans > 1)
7090 	{
7091 		double		pages_fetched;
7092 
7093 		/* total page fetches ignoring cache effects */
7094 		pages_fetched = numIndexPages * num_scans;
7095 
7096 		/* use Mackert and Lohman formula to adjust for cache effects */
7097 		pages_fetched = index_pages_fetched(pages_fetched,
7098 											index->pages,
7099 											(double) index->pages,
7100 											root);
7101 
7102 		/*
7103 		 * Now compute the total disk access cost, and then report a pro-rated
7104 		 * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
7105 		 * since that's internal to the indexscan.)
7106 		 */
7107 		indexTotalCost = (pages_fetched * spc_random_page_cost)
7108 			/ num_outer_scans;
7109 	}
7110 	else
7111 	{
7112 		/*
7113 		 * For a single index scan, we just charge spc_random_page_cost per
7114 		 * page touched.
7115 		 */
7116 		indexTotalCost = numIndexPages * spc_random_page_cost;
7117 	}
7118 
7119 	/*
7120 	 * CPU cost: any complex expressions in the indexquals will need to be
7121 	 * evaluated once at the start of the scan to reduce them to runtime keys
7122 	 * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
7123 	 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
7124 	 * indexqual operator.  Because we have numIndexTuples as a per-scan
7125 	 * number, we have to multiply by num_sa_scans to get the correct result
7126 	 * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
7127 	 * ORDER BY expressions.
7128 	 *
7129 	 * Note: this neglects the possible costs of rechecking lossy operators.
7130 	 * Detecting that that might be needed seems more expensive than it's
7131 	 * worth, though, considering all the other inaccuracies here ...
7132 	 */
7133 	qual_arg_cost = other_operands_eval_cost(root, qinfos) +
7134 		orderby_operands_eval_cost(root, path);
7135 	qual_op_cost = cpu_operator_cost *
7136 		(list_length(indexQuals) + list_length(indexOrderBys));
7137 
7138 	indexStartupCost = qual_arg_cost;
7139 	indexTotalCost += qual_arg_cost;
7140 	indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
7141 
7142 	/*
7143 	 * Generic assumption about index correlation: there isn't any.
7144 	 */
7145 	indexCorrelation = 0.0;
7146 
7147 	/*
7148 	 * Return everything to caller.
7149 	 */
7150 	costs->indexStartupCost = indexStartupCost;
7151 	costs->indexTotalCost = indexTotalCost;
7152 	costs->indexSelectivity = indexSelectivity;
7153 	costs->indexCorrelation = indexCorrelation;
7154 	costs->numIndexPages = numIndexPages;
7155 	costs->numIndexTuples = numIndexTuples;
7156 	costs->spc_random_page_cost = spc_random_page_cost;
7157 	costs->num_sa_scans = num_sa_scans;
7158 }
7159 
7160 /*
7161  * If the index is partial, add its predicate to the given qual list.
7162  *
7163  * ANDing the index predicate with the explicitly given indexquals produces
7164  * a more accurate idea of the index's selectivity.  However, we need to be
7165  * careful not to insert redundant clauses, because clauselist_selectivity()
7166  * is easily fooled into computing a too-low selectivity estimate.  Our
7167  * approach is to add only the predicate clause(s) that cannot be proven to
7168  * be implied by the given indexquals.  This successfully handles cases such
7169  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
7170  * There are many other cases where we won't detect redundancy, leading to a
7171  * too-low selectivity estimate, which will bias the system in favor of using
7172  * partial indexes where possible.  That is not necessarily bad though.
7173  *
7174  * Note that indexQuals contains RestrictInfo nodes while the indpred
7175  * does not, so the output list will be mixed.  This is OK for both
7176  * predicate_implied_by() and clauselist_selectivity(), but might be
7177  * problematic if the result were passed to other things.
7178  */
7179 static List *
7180 add_predicate_to_quals(IndexOptInfo *index, List *indexQuals)
7181 {
7182 	List	   *predExtraQuals = NIL;
7183 	ListCell   *lc;
7184 
7185 	if (index->indpred == NIL)
7186 		return indexQuals;
7187 
7188 	foreach(lc, index->indpred)
7189 	{
7190 		Node	   *predQual = (Node *) lfirst(lc);
7191 		List	   *oneQual = list_make1(predQual);
7192 
7193 		if (!predicate_implied_by(oneQual, indexQuals, false))
7194 			predExtraQuals = list_concat(predExtraQuals, oneQual);
7195 	}
7196 	/* list_concat avoids modifying the passed-in indexQuals list */
7197 	return list_concat(predExtraQuals, indexQuals);
7198 }
7199 
7200 
7201 void
7202 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7203 			   Cost *indexStartupCost, Cost *indexTotalCost,
7204 			   Selectivity *indexSelectivity, double *indexCorrelation,
7205 			   double *indexPages)
7206 {
7207 	IndexOptInfo *index = path->indexinfo;
7208 	List	   *qinfos;
7209 	GenericCosts costs;
7210 	Oid			relid;
7211 	AttrNumber	colnum;
7212 	VariableStatData vardata;
7213 	double		numIndexTuples;
7214 	Cost		descentCost;
7215 	List	   *indexBoundQuals;
7216 	int			indexcol;
7217 	bool		eqQualHere;
7218 	bool		found_saop;
7219 	bool		found_is_null_op;
7220 	double		num_sa_scans;
7221 	ListCell   *lc;
7222 
7223 	/* Do preliminary analysis of indexquals */
7224 	qinfos = deconstruct_indexquals(path);
7225 
7226 	/*
7227 	 * For a btree scan, only leading '=' quals plus inequality quals for the
7228 	 * immediately next attribute contribute to index selectivity (these are
7229 	 * the "boundary quals" that determine the starting and stopping points of
7230 	 * the index scan).  Additional quals can suppress visits to the heap, so
7231 	 * it's OK to count them in indexSelectivity, but they should not count
7232 	 * for estimating numIndexTuples.  So we must examine the given indexquals
7233 	 * to find out which ones count as boundary quals.  We rely on the
7234 	 * knowledge that they are given in index column order.
7235 	 *
7236 	 * For a RowCompareExpr, we consider only the first column, just as
7237 	 * rowcomparesel() does.
7238 	 *
7239 	 * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
7240 	 * index scans not one, but the ScalarArrayOpExpr's operator can be
7241 	 * considered to act the same as it normally does.
7242 	 */
7243 	indexBoundQuals = NIL;
7244 	indexcol = 0;
7245 	eqQualHere = false;
7246 	found_saop = false;
7247 	found_is_null_op = false;
7248 	num_sa_scans = 1;
7249 	foreach(lc, qinfos)
7250 	{
7251 		IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(lc);
7252 		RestrictInfo *rinfo = qinfo->rinfo;
7253 		Expr	   *clause = rinfo->clause;
7254 		Oid			clause_op;
7255 		int			op_strategy;
7256 
7257 		if (indexcol != qinfo->indexcol)
7258 		{
7259 			/* Beginning of a new column's quals */
7260 			if (!eqQualHere)
7261 				break;			/* done if no '=' qual for indexcol */
7262 			eqQualHere = false;
7263 			indexcol++;
7264 			if (indexcol != qinfo->indexcol)
7265 				break;			/* no quals at all for indexcol */
7266 		}
7267 
7268 		if (IsA(clause, ScalarArrayOpExpr))
7269 		{
7270 			int			alength = estimate_array_length(qinfo->other_operand);
7271 
7272 			found_saop = true;
7273 			/* count up number of SA scans induced by indexBoundQuals only */
7274 			if (alength > 1)
7275 				num_sa_scans *= alength;
7276 		}
7277 		else if (IsA(clause, NullTest))
7278 		{
7279 			NullTest   *nt = (NullTest *) clause;
7280 
7281 			if (nt->nulltesttype == IS_NULL)
7282 			{
7283 				found_is_null_op = true;
7284 				/* IS NULL is like = for selectivity determination purposes */
7285 				eqQualHere = true;
7286 			}
7287 		}
7288 
7289 		/*
7290 		 * We would need to commute the clause_op if not varonleft, except
7291 		 * that we only care if it's equality or not, so that refinement is
7292 		 * unnecessary.
7293 		 */
7294 		clause_op = qinfo->clause_op;
7295 
7296 		/* check for equality operator */
7297 		if (OidIsValid(clause_op))
7298 		{
7299 			op_strategy = get_op_opfamily_strategy(clause_op,
7300 												   index->opfamily[indexcol]);
7301 			Assert(op_strategy != 0);	/* not a member of opfamily?? */
7302 			if (op_strategy == BTEqualStrategyNumber)
7303 				eqQualHere = true;
7304 		}
7305 
7306 		indexBoundQuals = lappend(indexBoundQuals, rinfo);
7307 	}
7308 
7309 	/*
7310 	 * If index is unique and we found an '=' clause for each column, we can
7311 	 * just assume numIndexTuples = 1 and skip the expensive
7312 	 * clauselist_selectivity calculations.  However, a ScalarArrayOp or
7313 	 * NullTest invalidates that theory, even though it sets eqQualHere.
7314 	 */
7315 	if (index->unique &&
7316 		indexcol == index->nkeycolumns - 1 &&
7317 		eqQualHere &&
7318 		!found_saop &&
7319 		!found_is_null_op)
7320 		numIndexTuples = 1.0;
7321 	else
7322 	{
7323 		List	   *selectivityQuals;
7324 		Selectivity btreeSelectivity;
7325 
7326 		/*
7327 		 * If the index is partial, AND the index predicate with the
7328 		 * index-bound quals to produce a more accurate idea of the number of
7329 		 * rows covered by the bound conditions.
7330 		 */
7331 		selectivityQuals = add_predicate_to_quals(index, indexBoundQuals);
7332 
7333 		btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
7334 												  index->rel->relid,
7335 												  JOIN_INNER,
7336 												  NULL);
7337 		numIndexTuples = btreeSelectivity * index->rel->tuples;
7338 
7339 		/*
7340 		 * As in genericcostestimate(), we have to adjust for any
7341 		 * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
7342 		 * to integer.
7343 		 */
7344 		numIndexTuples = rint(numIndexTuples / num_sa_scans);
7345 	}
7346 
7347 	/*
7348 	 * Now do generic index cost estimation.
7349 	 */
7350 	MemSet(&costs, 0, sizeof(costs));
7351 	costs.numIndexTuples = numIndexTuples;
7352 
7353 	genericcostestimate(root, path, loop_count, qinfos, &costs);
7354 
7355 	/*
7356 	 * Add a CPU-cost component to represent the costs of initial btree
7357 	 * descent.  We don't charge any I/O cost for touching upper btree levels,
7358 	 * since they tend to stay in cache, but we still have to do about log2(N)
7359 	 * comparisons to descend a btree of N leaf tuples.  We charge one
7360 	 * cpu_operator_cost per comparison.
7361 	 *
7362 	 * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
7363 	 * ones after the first one are not startup cost so far as the overall
7364 	 * plan is concerned, so add them only to "total" cost.
7365 	 */
7366 	if (index->tuples > 1)		/* avoid computing log(0) */
7367 	{
7368 		descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
7369 		costs.indexStartupCost += descentCost;
7370 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
7371 	}
7372 
7373 	/*
7374 	 * Even though we're not charging I/O cost for touching upper btree pages,
7375 	 * it's still reasonable to charge some CPU cost per page descended
7376 	 * through.  Moreover, if we had no such charge at all, bloated indexes
7377 	 * would appear to have the same search cost as unbloated ones, at least
7378 	 * in cases where only a single leaf page is expected to be visited.  This
7379 	 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
7380 	 * touched.  The number of such pages is btree tree height plus one (ie,
7381 	 * we charge for the leaf page too).  As above, charge once per SA scan.
7382 	 */
7383 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7384 	costs.indexStartupCost += descentCost;
7385 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
7386 
7387 	/*
7388 	 * If we can get an estimate of the first column's ordering correlation C
7389 	 * from pg_statistic, estimate the index correlation as C for a
7390 	 * single-column index, or C * 0.75 for multiple columns. (The idea here
7391 	 * is that multiple columns dilute the importance of the first column's
7392 	 * ordering, but don't negate it entirely.  Before 8.0 we divided the
7393 	 * correlation by the number of columns, but that seems too strong.)
7394 	 */
7395 	MemSet(&vardata, 0, sizeof(vardata));
7396 
7397 	if (index->indexkeys[0] != 0)
7398 	{
7399 		/* Simple variable --- look to stats for the underlying table */
7400 		RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
7401 
7402 		Assert(rte->rtekind == RTE_RELATION);
7403 		relid = rte->relid;
7404 		Assert(relid != InvalidOid);
7405 		colnum = index->indexkeys[0];
7406 
7407 		if (get_relation_stats_hook &&
7408 			(*get_relation_stats_hook) (root, rte, colnum, &vardata))
7409 		{
7410 			/*
7411 			 * The hook took control of acquiring a stats tuple.  If it did
7412 			 * supply a tuple, it'd better have supplied a freefunc.
7413 			 */
7414 			if (HeapTupleIsValid(vardata.statsTuple) &&
7415 				!vardata.freefunc)
7416 				elog(ERROR, "no function provided to release variable stats with");
7417 		}
7418 		else
7419 		{
7420 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7421 												 ObjectIdGetDatum(relid),
7422 												 Int16GetDatum(colnum),
7423 												 BoolGetDatum(rte->inh));
7424 			vardata.freefunc = ReleaseSysCache;
7425 		}
7426 	}
7427 	else
7428 	{
7429 		/* Expression --- maybe there are stats for the index itself */
7430 		relid = index->indexoid;
7431 		colnum = 1;
7432 
7433 		if (get_index_stats_hook &&
7434 			(*get_index_stats_hook) (root, relid, colnum, &vardata))
7435 		{
7436 			/*
7437 			 * The hook took control of acquiring a stats tuple.  If it did
7438 			 * supply a tuple, it'd better have supplied a freefunc.
7439 			 */
7440 			if (HeapTupleIsValid(vardata.statsTuple) &&
7441 				!vardata.freefunc)
7442 				elog(ERROR, "no function provided to release variable stats with");
7443 		}
7444 		else
7445 		{
7446 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7447 												 ObjectIdGetDatum(relid),
7448 												 Int16GetDatum(colnum),
7449 												 BoolGetDatum(false));
7450 			vardata.freefunc = ReleaseSysCache;
7451 		}
7452 	}
7453 
7454 	if (HeapTupleIsValid(vardata.statsTuple))
7455 	{
7456 		Oid			sortop;
7457 		AttStatsSlot sslot;
7458 
7459 		sortop = get_opfamily_member(index->opfamily[0],
7460 									 index->opcintype[0],
7461 									 index->opcintype[0],
7462 									 BTLessStrategyNumber);
7463 		if (OidIsValid(sortop) &&
7464 			get_attstatsslot(&sslot, vardata.statsTuple,
7465 							 STATISTIC_KIND_CORRELATION, sortop,
7466 							 ATTSTATSSLOT_NUMBERS))
7467 		{
7468 			double		varCorrelation;
7469 
7470 			Assert(sslot.nnumbers == 1);
7471 			varCorrelation = sslot.numbers[0];
7472 
7473 			if (index->reverse_sort[0])
7474 				varCorrelation = -varCorrelation;
7475 
7476 			if (index->nkeycolumns > 1)
7477 				costs.indexCorrelation = varCorrelation * 0.75;
7478 			else
7479 				costs.indexCorrelation = varCorrelation;
7480 
7481 			free_attstatsslot(&sslot);
7482 		}
7483 	}
7484 
7485 	ReleaseVariableStats(vardata);
7486 
7487 	*indexStartupCost = costs.indexStartupCost;
7488 	*indexTotalCost = costs.indexTotalCost;
7489 	*indexSelectivity = costs.indexSelectivity;
7490 	*indexCorrelation = costs.indexCorrelation;
7491 	*indexPages = costs.numIndexPages;
7492 }
7493 
7494 void
7495 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7496 				 Cost *indexStartupCost, Cost *indexTotalCost,
7497 				 Selectivity *indexSelectivity, double *indexCorrelation,
7498 				 double *indexPages)
7499 {
7500 	List	   *qinfos;
7501 	GenericCosts costs;
7502 
7503 	/* Do preliminary analysis of indexquals */
7504 	qinfos = deconstruct_indexquals(path);
7505 
7506 	MemSet(&costs, 0, sizeof(costs));
7507 
7508 	genericcostestimate(root, path, loop_count, qinfos, &costs);
7509 
7510 	/*
7511 	 * A hash index has no descent costs as such, since the index AM can go
7512 	 * directly to the target bucket after computing the hash value.  There
7513 	 * are a couple of other hash-specific costs that we could conceivably add
7514 	 * here, though:
7515 	 *
7516 	 * Ideally we'd charge spc_random_page_cost for each page in the target
7517 	 * bucket, not just the numIndexPages pages that genericcostestimate
7518 	 * thought we'd visit.  However in most cases we don't know which bucket
7519 	 * that will be.  There's no point in considering the average bucket size
7520 	 * because the hash AM makes sure that's always one page.
7521 	 *
7522 	 * Likewise, we could consider charging some CPU for each index tuple in
7523 	 * the bucket, if we knew how many there were.  But the per-tuple cost is
7524 	 * just a hash value comparison, not a general datatype-dependent
7525 	 * comparison, so any such charge ought to be quite a bit less than
7526 	 * cpu_operator_cost; which makes it probably not worth worrying about.
7527 	 *
7528 	 * A bigger issue is that chance hash-value collisions will result in
7529 	 * wasted probes into the heap.  We don't currently attempt to model this
7530 	 * cost on the grounds that it's rare, but maybe it's not rare enough.
7531 	 * (Any fix for this ought to consider the generic lossy-operator problem,
7532 	 * though; it's not entirely hash-specific.)
7533 	 */
7534 
7535 	*indexStartupCost = costs.indexStartupCost;
7536 	*indexTotalCost = costs.indexTotalCost;
7537 	*indexSelectivity = costs.indexSelectivity;
7538 	*indexCorrelation = costs.indexCorrelation;
7539 	*indexPages = costs.numIndexPages;
7540 }
7541 
7542 void
7543 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7544 				 Cost *indexStartupCost, Cost *indexTotalCost,
7545 				 Selectivity *indexSelectivity, double *indexCorrelation,
7546 				 double *indexPages)
7547 {
7548 	IndexOptInfo *index = path->indexinfo;
7549 	List	   *qinfos;
7550 	GenericCosts costs;
7551 	Cost		descentCost;
7552 
7553 	/* Do preliminary analysis of indexquals */
7554 	qinfos = deconstruct_indexquals(path);
7555 
7556 	MemSet(&costs, 0, sizeof(costs));
7557 
7558 	genericcostestimate(root, path, loop_count, qinfos, &costs);
7559 
7560 	/*
7561 	 * We model index descent costs similarly to those for btree, but to do
7562 	 * that we first need an idea of the tree height.  We somewhat arbitrarily
7563 	 * assume that the fanout is 100, meaning the tree height is at most
7564 	 * log100(index->pages).
7565 	 *
7566 	 * Although this computation isn't really expensive enough to require
7567 	 * caching, we might as well use index->tree_height to cache it.
7568 	 */
7569 	if (index->tree_height < 0) /* unknown? */
7570 	{
7571 		if (index->pages > 1)	/* avoid computing log(0) */
7572 			index->tree_height = (int) (log(index->pages) / log(100.0));
7573 		else
7574 			index->tree_height = 0;
7575 	}
7576 
7577 	/*
7578 	 * Add a CPU-cost component to represent the costs of initial descent. We
7579 	 * just use log(N) here not log2(N) since the branching factor isn't
7580 	 * necessarily two anyway.  As for btree, charge once per SA scan.
7581 	 */
7582 	if (index->tuples > 1)		/* avoid computing log(0) */
7583 	{
7584 		descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7585 		costs.indexStartupCost += descentCost;
7586 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
7587 	}
7588 
7589 	/*
7590 	 * Likewise add a per-page charge, calculated the same as for btrees.
7591 	 */
7592 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7593 	costs.indexStartupCost += descentCost;
7594 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
7595 
7596 	*indexStartupCost = costs.indexStartupCost;
7597 	*indexTotalCost = costs.indexTotalCost;
7598 	*indexSelectivity = costs.indexSelectivity;
7599 	*indexCorrelation = costs.indexCorrelation;
7600 	*indexPages = costs.numIndexPages;
7601 }
7602 
7603 void
7604 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7605 				Cost *indexStartupCost, Cost *indexTotalCost,
7606 				Selectivity *indexSelectivity, double *indexCorrelation,
7607 				double *indexPages)
7608 {
7609 	IndexOptInfo *index = path->indexinfo;
7610 	List	   *qinfos;
7611 	GenericCosts costs;
7612 	Cost		descentCost;
7613 
7614 	/* Do preliminary analysis of indexquals */
7615 	qinfos = deconstruct_indexquals(path);
7616 
7617 	MemSet(&costs, 0, sizeof(costs));
7618 
7619 	genericcostestimate(root, path, loop_count, qinfos, &costs);
7620 
7621 	/*
7622 	 * We model index descent costs similarly to those for btree, but to do
7623 	 * that we first need an idea of the tree height.  We somewhat arbitrarily
7624 	 * assume that the fanout is 100, meaning the tree height is at most
7625 	 * log100(index->pages).
7626 	 *
7627 	 * Although this computation isn't really expensive enough to require
7628 	 * caching, we might as well use index->tree_height to cache it.
7629 	 */
7630 	if (index->tree_height < 0) /* unknown? */
7631 	{
7632 		if (index->pages > 1)	/* avoid computing log(0) */
7633 			index->tree_height = (int) (log(index->pages) / log(100.0));
7634 		else
7635 			index->tree_height = 0;
7636 	}
7637 
7638 	/*
7639 	 * Add a CPU-cost component to represent the costs of initial descent. We
7640 	 * just use log(N) here not log2(N) since the branching factor isn't
7641 	 * necessarily two anyway.  As for btree, charge once per SA scan.
7642 	 */
7643 	if (index->tuples > 1)		/* avoid computing log(0) */
7644 	{
7645 		descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
7646 		costs.indexStartupCost += descentCost;
7647 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
7648 	}
7649 
7650 	/*
7651 	 * Likewise add a per-page charge, calculated the same as for btrees.
7652 	 */
7653 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
7654 	costs.indexStartupCost += descentCost;
7655 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
7656 
7657 	*indexStartupCost = costs.indexStartupCost;
7658 	*indexTotalCost = costs.indexTotalCost;
7659 	*indexSelectivity = costs.indexSelectivity;
7660 	*indexCorrelation = costs.indexCorrelation;
7661 	*indexPages = costs.numIndexPages;
7662 }
7663 
7664 
7665 /*
7666  * Support routines for gincostestimate
7667  */
7668 
7669 typedef struct
7670 {
7671 	bool		haveFullScan;
7672 	double		partialEntries;
7673 	double		exactEntries;
7674 	double		searchEntries;
7675 	double		arrayScans;
7676 } GinQualCounts;
7677 
7678 /*
7679  * Estimate the number of index terms that need to be searched for while
7680  * testing the given GIN query, and increment the counts in *counts
7681  * appropriately.  If the query is unsatisfiable, return false.
7682  */
7683 static bool
7684 gincost_pattern(IndexOptInfo *index, int indexcol,
7685 				Oid clause_op, Datum query,
7686 				GinQualCounts *counts)
7687 {
7688 	Oid			extractProcOid;
7689 	Oid			collation;
7690 	int			strategy_op;
7691 	Oid			lefttype,
7692 				righttype;
7693 	int32		nentries = 0;
7694 	bool	   *partial_matches = NULL;
7695 	Pointer    *extra_data = NULL;
7696 	bool	   *nullFlags = NULL;
7697 	int32		searchMode = GIN_SEARCH_MODE_DEFAULT;
7698 	int32		i;
7699 
7700 	Assert(indexcol < index->nkeycolumns);
7701 
7702 	/*
7703 	 * Get the operator's strategy number and declared input data types within
7704 	 * the index opfamily.  (We don't need the latter, but we use
7705 	 * get_op_opfamily_properties because it will throw error if it fails to
7706 	 * find a matching pg_amop entry.)
7707 	 */
7708 	get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
7709 							   &strategy_op, &lefttype, &righttype);
7710 
7711 	/*
7712 	 * GIN always uses the "default" support functions, which are those with
7713 	 * lefttype == righttype == the opclass' opcintype (see
7714 	 * IndexSupportInitialize in relcache.c).
7715 	 */
7716 	extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
7717 									   index->opcintype[indexcol],
7718 									   index->opcintype[indexcol],
7719 									   GIN_EXTRACTQUERY_PROC);
7720 
7721 	if (!OidIsValid(extractProcOid))
7722 	{
7723 		/* should not happen; throw same error as index_getprocinfo */
7724 		elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
7725 			 GIN_EXTRACTQUERY_PROC, indexcol + 1,
7726 			 get_rel_name(index->indexoid));
7727 	}
7728 
7729 	/*
7730 	 * Choose collation to pass to extractProc (should match initGinState).
7731 	 */
7732 	if (OidIsValid(index->indexcollations[indexcol]))
7733 		collation = index->indexcollations[indexcol];
7734 	else
7735 		collation = DEFAULT_COLLATION_OID;
7736 
7737 	OidFunctionCall7Coll(extractProcOid,
7738 						 collation,
7739 						 query,
7740 						 PointerGetDatum(&nentries),
7741 						 UInt16GetDatum(strategy_op),
7742 						 PointerGetDatum(&partial_matches),
7743 						 PointerGetDatum(&extra_data),
7744 						 PointerGetDatum(&nullFlags),
7745 						 PointerGetDatum(&searchMode));
7746 
7747 	if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
7748 	{
7749 		/* No match is possible */
7750 		return false;
7751 	}
7752 
7753 	for (i = 0; i < nentries; i++)
7754 	{
7755 		/*
7756 		 * For partial match we haven't any information to estimate number of
7757 		 * matched entries in index, so, we just estimate it as 100
7758 		 */
7759 		if (partial_matches && partial_matches[i])
7760 			counts->partialEntries += 100;
7761 		else
7762 			counts->exactEntries++;
7763 
7764 		counts->searchEntries++;
7765 	}
7766 
7767 	if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
7768 	{
7769 		/* Treat "include empty" like an exact-match item */
7770 		counts->exactEntries++;
7771 		counts->searchEntries++;
7772 	}
7773 	else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
7774 	{
7775 		/* It's GIN_SEARCH_MODE_ALL */
7776 		counts->haveFullScan = true;
7777 	}
7778 
7779 	return true;
7780 }
7781 
7782 /*
7783  * Estimate the number of index terms that need to be searched for while
7784  * testing the given GIN index clause, and increment the counts in *counts
7785  * appropriately.  If the query is unsatisfiable, return false.
7786  */
7787 static bool
7788 gincost_opexpr(PlannerInfo *root,
7789 			   IndexOptInfo *index,
7790 			   IndexQualInfo *qinfo,
7791 			   GinQualCounts *counts)
7792 {
7793 	int			indexcol = qinfo->indexcol;
7794 	Oid			clause_op = qinfo->clause_op;
7795 	Node	   *operand = qinfo->other_operand;
7796 
7797 	if (!qinfo->varonleft)
7798 	{
7799 		/* must commute the operator */
7800 		clause_op = get_commutator(clause_op);
7801 	}
7802 
7803 	/* aggressively reduce to a constant, and look through relabeling */
7804 	operand = estimate_expression_value(root, operand);
7805 
7806 	if (IsA(operand, RelabelType))
7807 		operand = (Node *) ((RelabelType *) operand)->arg;
7808 
7809 	/*
7810 	 * It's impossible to call extractQuery method for unknown operand. So
7811 	 * unless operand is a Const we can't do much; just assume there will be
7812 	 * one ordinary search entry from the operand at runtime.
7813 	 */
7814 	if (!IsA(operand, Const))
7815 	{
7816 		counts->exactEntries++;
7817 		counts->searchEntries++;
7818 		return true;
7819 	}
7820 
7821 	/* If Const is null, there can be no matches */
7822 	if (((Const *) operand)->constisnull)
7823 		return false;
7824 
7825 	/* Otherwise, apply extractQuery and get the actual term counts */
7826 	return gincost_pattern(index, indexcol, clause_op,
7827 						   ((Const *) operand)->constvalue,
7828 						   counts);
7829 }
7830 
7831 /*
7832  * Estimate the number of index terms that need to be searched for while
7833  * testing the given GIN index clause, and increment the counts in *counts
7834  * appropriately.  If the query is unsatisfiable, return false.
7835  *
7836  * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
7837  * each of which involves one value from the RHS array, plus all the
7838  * non-array quals (if any).  To model this, we average the counts across
7839  * the RHS elements, and add the averages to the counts in *counts (which
7840  * correspond to per-indexscan costs).  We also multiply counts->arrayScans
7841  * by N, causing gincostestimate to scale up its estimates accordingly.
7842  */
7843 static bool
7844 gincost_scalararrayopexpr(PlannerInfo *root,
7845 						  IndexOptInfo *index,
7846 						  IndexQualInfo *qinfo,
7847 						  double numIndexEntries,
7848 						  GinQualCounts *counts)
7849 {
7850 	int			indexcol = qinfo->indexcol;
7851 	Oid			clause_op = qinfo->clause_op;
7852 	Node	   *rightop = qinfo->other_operand;
7853 	ArrayType  *arrayval;
7854 	int16		elmlen;
7855 	bool		elmbyval;
7856 	char		elmalign;
7857 	int			numElems;
7858 	Datum	   *elemValues;
7859 	bool	   *elemNulls;
7860 	GinQualCounts arraycounts;
7861 	int			numPossible = 0;
7862 	int			i;
7863 
7864 	Assert(((ScalarArrayOpExpr *) qinfo->rinfo->clause)->useOr);
7865 
7866 	/* aggressively reduce to a constant, and look through relabeling */
7867 	rightop = estimate_expression_value(root, rightop);
7868 
7869 	if (IsA(rightop, RelabelType))
7870 		rightop = (Node *) ((RelabelType *) rightop)->arg;
7871 
7872 	/*
7873 	 * It's impossible to call extractQuery method for unknown operand. So
7874 	 * unless operand is a Const we can't do much; just assume there will be
7875 	 * one ordinary search entry from each array entry at runtime, and fall
7876 	 * back on a probably-bad estimate of the number of array entries.
7877 	 */
7878 	if (!IsA(rightop, Const))
7879 	{
7880 		counts->exactEntries++;
7881 		counts->searchEntries++;
7882 		counts->arrayScans *= estimate_array_length(rightop);
7883 		return true;
7884 	}
7885 
7886 	/* If Const is null, there can be no matches */
7887 	if (((Const *) rightop)->constisnull)
7888 		return false;
7889 
7890 	/* Otherwise, extract the array elements and iterate over them */
7891 	arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
7892 	get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
7893 						 &elmlen, &elmbyval, &elmalign);
7894 	deconstruct_array(arrayval,
7895 					  ARR_ELEMTYPE(arrayval),
7896 					  elmlen, elmbyval, elmalign,
7897 					  &elemValues, &elemNulls, &numElems);
7898 
7899 	memset(&arraycounts, 0, sizeof(arraycounts));
7900 
7901 	for (i = 0; i < numElems; i++)
7902 	{
7903 		GinQualCounts elemcounts;
7904 
7905 		/* NULL can't match anything, so ignore, as the executor will */
7906 		if (elemNulls[i])
7907 			continue;
7908 
7909 		/* Otherwise, apply extractQuery and get the actual term counts */
7910 		memset(&elemcounts, 0, sizeof(elemcounts));
7911 
7912 		if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
7913 							&elemcounts))
7914 		{
7915 			/* We ignore array elements that are unsatisfiable patterns */
7916 			numPossible++;
7917 
7918 			if (elemcounts.haveFullScan)
7919 			{
7920 				/*
7921 				 * Full index scan will be required.  We treat this as if
7922 				 * every key in the index had been listed in the query; is
7923 				 * that reasonable?
7924 				 */
7925 				elemcounts.partialEntries = 0;
7926 				elemcounts.exactEntries = numIndexEntries;
7927 				elemcounts.searchEntries = numIndexEntries;
7928 			}
7929 			arraycounts.partialEntries += elemcounts.partialEntries;
7930 			arraycounts.exactEntries += elemcounts.exactEntries;
7931 			arraycounts.searchEntries += elemcounts.searchEntries;
7932 		}
7933 	}
7934 
7935 	if (numPossible == 0)
7936 	{
7937 		/* No satisfiable patterns in the array */
7938 		return false;
7939 	}
7940 
7941 	/*
7942 	 * Now add the averages to the global counts.  This will give us an
7943 	 * estimate of the average number of terms searched for in each indexscan,
7944 	 * including contributions from both array and non-array quals.
7945 	 */
7946 	counts->partialEntries += arraycounts.partialEntries / numPossible;
7947 	counts->exactEntries += arraycounts.exactEntries / numPossible;
7948 	counts->searchEntries += arraycounts.searchEntries / numPossible;
7949 
7950 	counts->arrayScans *= numPossible;
7951 
7952 	return true;
7953 }
7954 
7955 /*
7956  * GIN has search behavior completely different from other index types
7957  */
7958 void
7959 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7960 				Cost *indexStartupCost, Cost *indexTotalCost,
7961 				Selectivity *indexSelectivity, double *indexCorrelation,
7962 				double *indexPages)
7963 {
7964 	IndexOptInfo *index = path->indexinfo;
7965 	List	   *indexQuals = path->indexquals;
7966 	List	   *indexOrderBys = path->indexorderbys;
7967 	List	   *qinfos;
7968 	ListCell   *l;
7969 	List	   *selectivityQuals;
7970 	double		numPages = index->pages,
7971 				numTuples = index->tuples;
7972 	double		numEntryPages,
7973 				numDataPages,
7974 				numPendingPages,
7975 				numEntries;
7976 	GinQualCounts counts;
7977 	bool		matchPossible;
7978 	double		partialScale;
7979 	double		entryPagesFetched,
7980 				dataPagesFetched,
7981 				dataPagesFetchedBySel;
7982 	double		qual_op_cost,
7983 				qual_arg_cost,
7984 				spc_random_page_cost,
7985 				outer_scans;
7986 	Relation	indexRel;
7987 	GinStatsData ginStats;
7988 
7989 	/* Do preliminary analysis of indexquals */
7990 	qinfos = deconstruct_indexquals(path);
7991 
7992 	/*
7993 	 * Obtain statistical information from the meta page, if possible.  Else
7994 	 * set ginStats to zeroes, and we'll cope below.
7995 	 */
7996 	if (!index->hypothetical)
7997 	{
7998 		indexRel = index_open(index->indexoid, AccessShareLock);
7999 		ginGetStats(indexRel, &ginStats);
8000 		index_close(indexRel, AccessShareLock);
8001 	}
8002 	else
8003 	{
8004 		memset(&ginStats, 0, sizeof(ginStats));
8005 	}
8006 
8007 	/*
8008 	 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
8009 	 * trusted, but the other fields are data as of the last VACUUM.  We can
8010 	 * scale them up to account for growth since then, but that method only
8011 	 * goes so far; in the worst case, the stats might be for a completely
8012 	 * empty index, and scaling them will produce pretty bogus numbers.
8013 	 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
8014 	 * it's grown more than that, fall back to estimating things only from the
8015 	 * assumed-accurate index size.  But we'll trust nPendingPages in any case
8016 	 * so long as it's not clearly insane, ie, more than the index size.
8017 	 */
8018 	if (ginStats.nPendingPages < numPages)
8019 		numPendingPages = ginStats.nPendingPages;
8020 	else
8021 		numPendingPages = 0;
8022 
8023 	if (numPages > 0 && ginStats.nTotalPages <= numPages &&
8024 		ginStats.nTotalPages > numPages / 4 &&
8025 		ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
8026 	{
8027 		/*
8028 		 * OK, the stats seem close enough to sane to be trusted.  But we
8029 		 * still need to scale them by the ratio numPages / nTotalPages to
8030 		 * account for growth since the last VACUUM.
8031 		 */
8032 		double		scale = numPages / ginStats.nTotalPages;
8033 
8034 		numEntryPages = ceil(ginStats.nEntryPages * scale);
8035 		numDataPages = ceil(ginStats.nDataPages * scale);
8036 		numEntries = ceil(ginStats.nEntries * scale);
8037 		/* ensure we didn't round up too much */
8038 		numEntryPages = Min(numEntryPages, numPages - numPendingPages);
8039 		numDataPages = Min(numDataPages,
8040 						   numPages - numPendingPages - numEntryPages);
8041 	}
8042 	else
8043 	{
8044 		/*
8045 		 * We might get here because it's a hypothetical index, or an index
8046 		 * created pre-9.1 and never vacuumed since upgrading (in which case
8047 		 * its stats would read as zeroes), or just because it's grown too
8048 		 * much since the last VACUUM for us to put our faith in scaling.
8049 		 *
8050 		 * Invent some plausible internal statistics based on the index page
8051 		 * count (and clamp that to at least 10 pages, just in case).  We
8052 		 * estimate that 90% of the index is entry pages, and the rest is data
8053 		 * pages.  Estimate 100 entries per entry page; this is rather bogus
8054 		 * since it'll depend on the size of the keys, but it's more robust
8055 		 * than trying to predict the number of entries per heap tuple.
8056 		 */
8057 		numPages = Max(numPages, 10);
8058 		numEntryPages = floor((numPages - numPendingPages) * 0.90);
8059 		numDataPages = numPages - numPendingPages - numEntryPages;
8060 		numEntries = floor(numEntryPages * 100);
8061 	}
8062 
8063 	/* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
8064 	if (numEntries < 1)
8065 		numEntries = 1;
8066 
8067 	/*
8068 	 * Include predicate in selectivityQuals (should match
8069 	 * genericcostestimate)
8070 	 */
8071 	if (index->indpred != NIL)
8072 	{
8073 		List	   *predExtraQuals = NIL;
8074 
8075 		foreach(l, index->indpred)
8076 		{
8077 			Node	   *predQual = (Node *) lfirst(l);
8078 			List	   *oneQual = list_make1(predQual);
8079 
8080 			if (!predicate_implied_by(oneQual, indexQuals, false))
8081 				predExtraQuals = list_concat(predExtraQuals, oneQual);
8082 		}
8083 		/* list_concat avoids modifying the passed-in indexQuals list */
8084 		selectivityQuals = list_concat(predExtraQuals, indexQuals);
8085 	}
8086 	else
8087 		selectivityQuals = indexQuals;
8088 
8089 	/* Estimate the fraction of main-table tuples that will be visited */
8090 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
8091 											   index->rel->relid,
8092 											   JOIN_INNER,
8093 											   NULL);
8094 
8095 	/* fetch estimated page cost for tablespace containing index */
8096 	get_tablespace_page_costs(index->reltablespace,
8097 							  &spc_random_page_cost,
8098 							  NULL);
8099 
8100 	/*
8101 	 * Generic assumption about index correlation: there isn't any.
8102 	 */
8103 	*indexCorrelation = 0.0;
8104 
8105 	/*
8106 	 * Examine quals to estimate number of search entries & partial matches
8107 	 */
8108 	memset(&counts, 0, sizeof(counts));
8109 	counts.arrayScans = 1;
8110 	matchPossible = true;
8111 
8112 	foreach(l, qinfos)
8113 	{
8114 		IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
8115 		Expr	   *clause = qinfo->rinfo->clause;
8116 
8117 		if (IsA(clause, OpExpr))
8118 		{
8119 			matchPossible = gincost_opexpr(root,
8120 										   index,
8121 										   qinfo,
8122 										   &counts);
8123 			if (!matchPossible)
8124 				break;
8125 		}
8126 		else if (IsA(clause, ScalarArrayOpExpr))
8127 		{
8128 			matchPossible = gincost_scalararrayopexpr(root,
8129 													  index,
8130 													  qinfo,
8131 													  numEntries,
8132 													  &counts);
8133 			if (!matchPossible)
8134 				break;
8135 		}
8136 		else
8137 		{
8138 			/* shouldn't be anything else for a GIN index */
8139 			elog(ERROR, "unsupported GIN indexqual type: %d",
8140 				 (int) nodeTag(clause));
8141 		}
8142 	}
8143 
8144 	/* Fall out if there were any provably-unsatisfiable quals */
8145 	if (!matchPossible)
8146 	{
8147 		*indexStartupCost = 0;
8148 		*indexTotalCost = 0;
8149 		*indexSelectivity = 0;
8150 		return;
8151 	}
8152 
8153 	if (counts.haveFullScan || indexQuals == NIL)
8154 	{
8155 		/*
8156 		 * Full index scan will be required.  We treat this as if every key in
8157 		 * the index had been listed in the query; is that reasonable?
8158 		 */
8159 		counts.partialEntries = 0;
8160 		counts.exactEntries = numEntries;
8161 		counts.searchEntries = numEntries;
8162 	}
8163 
8164 	/* Will we have more than one iteration of a nestloop scan? */
8165 	outer_scans = loop_count;
8166 
8167 	/*
8168 	 * Compute cost to begin scan, first of all, pay attention to pending
8169 	 * list.
8170 	 */
8171 	entryPagesFetched = numPendingPages;
8172 
8173 	/*
8174 	 * Estimate number of entry pages read.  We need to do
8175 	 * counts.searchEntries searches.  Use a power function as it should be,
8176 	 * but tuples on leaf pages usually is much greater. Here we include all
8177 	 * searches in entry tree, including search of first entry in partial
8178 	 * match algorithm
8179 	 */
8180 	entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
8181 
8182 	/*
8183 	 * Add an estimate of entry pages read by partial match algorithm. It's a
8184 	 * scan over leaf pages in entry tree.  We haven't any useful stats here,
8185 	 * so estimate it as proportion.  Because counts.partialEntries is really
8186 	 * pretty bogus (see code above), it's possible that it is more than
8187 	 * numEntries; clamp the proportion to ensure sanity.
8188 	 */
8189 	partialScale = counts.partialEntries / numEntries;
8190 	partialScale = Min(partialScale, 1.0);
8191 
8192 	entryPagesFetched += ceil(numEntryPages * partialScale);
8193 
8194 	/*
8195 	 * Partial match algorithm reads all data pages before doing actual scan,
8196 	 * so it's a startup cost.  Again, we haven't any useful stats here, so
8197 	 * estimate it as proportion.
8198 	 */
8199 	dataPagesFetched = ceil(numDataPages * partialScale);
8200 
8201 	/*
8202 	 * Calculate cache effects if more than one scan due to nestloops or array
8203 	 * quals.  The result is pro-rated per nestloop scan, but the array qual
8204 	 * factor shouldn't be pro-rated (compare genericcostestimate).
8205 	 */
8206 	if (outer_scans > 1 || counts.arrayScans > 1)
8207 	{
8208 		entryPagesFetched *= outer_scans * counts.arrayScans;
8209 		entryPagesFetched = index_pages_fetched(entryPagesFetched,
8210 												(BlockNumber) numEntryPages,
8211 												numEntryPages, root);
8212 		entryPagesFetched /= outer_scans;
8213 		dataPagesFetched *= outer_scans * counts.arrayScans;
8214 		dataPagesFetched = index_pages_fetched(dataPagesFetched,
8215 											   (BlockNumber) numDataPages,
8216 											   numDataPages, root);
8217 		dataPagesFetched /= outer_scans;
8218 	}
8219 
8220 	/*
8221 	 * Here we use random page cost because logically-close pages could be far
8222 	 * apart on disk.
8223 	 */
8224 	*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
8225 
8226 	/*
8227 	 * Now compute the number of data pages fetched during the scan.
8228 	 *
8229 	 * We assume every entry to have the same number of items, and that there
8230 	 * is no overlap between them. (XXX: tsvector and array opclasses collect
8231 	 * statistics on the frequency of individual keys; it would be nice to use
8232 	 * those here.)
8233 	 */
8234 	dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
8235 
8236 	/*
8237 	 * If there is a lot of overlap among the entries, in particular if one of
8238 	 * the entries is very frequent, the above calculation can grossly
8239 	 * under-estimate.  As a simple cross-check, calculate a lower bound based
8240 	 * on the overall selectivity of the quals.  At a minimum, we must read
8241 	 * one item pointer for each matching entry.
8242 	 *
8243 	 * The width of each item pointer varies, based on the level of
8244 	 * compression.  We don't have statistics on that, but an average of
8245 	 * around 3 bytes per item is fairly typical.
8246 	 */
8247 	dataPagesFetchedBySel = ceil(*indexSelectivity *
8248 								 (numTuples / (BLCKSZ / 3)));
8249 	if (dataPagesFetchedBySel > dataPagesFetched)
8250 		dataPagesFetched = dataPagesFetchedBySel;
8251 
8252 	/* Account for cache effects, the same as above */
8253 	if (outer_scans > 1 || counts.arrayScans > 1)
8254 	{
8255 		dataPagesFetched *= outer_scans * counts.arrayScans;
8256 		dataPagesFetched = index_pages_fetched(dataPagesFetched,
8257 											   (BlockNumber) numDataPages,
8258 											   numDataPages, root);
8259 		dataPagesFetched /= outer_scans;
8260 	}
8261 
8262 	/* And apply random_page_cost as the cost per page */
8263 	*indexTotalCost = *indexStartupCost +
8264 		dataPagesFetched * spc_random_page_cost;
8265 
8266 	/*
8267 	 * Add on index qual eval costs, much as in genericcostestimate
8268 	 */
8269 	qual_arg_cost = other_operands_eval_cost(root, qinfos) +
8270 		orderby_operands_eval_cost(root, path);
8271 	qual_op_cost = cpu_operator_cost *
8272 		(list_length(indexQuals) + list_length(indexOrderBys));
8273 
8274 	*indexStartupCost += qual_arg_cost;
8275 	*indexTotalCost += qual_arg_cost;
8276 	*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
8277 	*indexPages = dataPagesFetched;
8278 }
8279 
8280 /*
8281  * BRIN has search behavior completely different from other index types
8282  */
8283 void
8284 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
8285 				 Cost *indexStartupCost, Cost *indexTotalCost,
8286 				 Selectivity *indexSelectivity, double *indexCorrelation,
8287 				 double *indexPages)
8288 {
8289 	IndexOptInfo *index = path->indexinfo;
8290 	List	   *indexQuals = path->indexquals;
8291 	double		numPages = index->pages;
8292 	RelOptInfo *baserel = index->rel;
8293 	RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
8294 	List	   *qinfos;
8295 	Cost		spc_seq_page_cost;
8296 	Cost		spc_random_page_cost;
8297 	double		qual_arg_cost;
8298 	double		qualSelectivity;
8299 	BrinStatsData statsData;
8300 	double		indexRanges;
8301 	double		minimalRanges;
8302 	double		estimatedRanges;
8303 	double		selec;
8304 	Relation	indexRel;
8305 	ListCell   *l;
8306 	VariableStatData vardata;
8307 
8308 	Assert(rte->rtekind == RTE_RELATION);
8309 
8310 	/* fetch estimated page cost for the tablespace containing the index */
8311 	get_tablespace_page_costs(index->reltablespace,
8312 							  &spc_random_page_cost,
8313 							  &spc_seq_page_cost);
8314 
8315 	/*
8316 	 * Obtain some data from the index itself, if possible.  Otherwise invent
8317 	 * some plausible internal statistics based on the relation page count.
8318 	 */
8319 	if (!index->hypothetical)
8320 	{
8321 		indexRel = index_open(index->indexoid, AccessShareLock);
8322 		brinGetStats(indexRel, &statsData);
8323 		index_close(indexRel, AccessShareLock);
8324 
8325 		/* work out the actual number of ranges in the index */
8326 		indexRanges = Max(ceil((double) baserel->pages /
8327 							   statsData.pagesPerRange), 1.0);
8328 	}
8329 	else
8330 	{
8331 		/*
8332 		 * Assume default number of pages per range, and estimate the number
8333 		 * of ranges based on that.
8334 		 */
8335 		indexRanges = Max(ceil((double) baserel->pages /
8336 							   BRIN_DEFAULT_PAGES_PER_RANGE), 1.0);
8337 
8338 		statsData.pagesPerRange = BRIN_DEFAULT_PAGES_PER_RANGE;
8339 		statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
8340 	}
8341 
8342 	/*
8343 	 * Compute index correlation
8344 	 *
8345 	 * Because we can use all index quals equally when scanning, we can use
8346 	 * the largest correlation (in absolute value) among columns used by the
8347 	 * query.  Start at zero, the worst possible case.  If we cannot find any
8348 	 * correlation statistics, we will keep it as 0.
8349 	 */
8350 	*indexCorrelation = 0;
8351 
8352 	qinfos = deconstruct_indexquals(path);
8353 	foreach(l, qinfos)
8354 	{
8355 		IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
8356 		AttrNumber	attnum = index->indexkeys[qinfo->indexcol];
8357 
8358 		/* attempt to lookup stats in relation for this index column */
8359 		if (attnum != 0)
8360 		{
8361 			/* Simple variable -- look to stats for the underlying table */
8362 			if (get_relation_stats_hook &&
8363 				(*get_relation_stats_hook) (root, rte, attnum, &vardata))
8364 			{
8365 				/*
8366 				 * The hook took control of acquiring a stats tuple.  If it
8367 				 * did supply a tuple, it'd better have supplied a freefunc.
8368 				 */
8369 				if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
8370 					elog(ERROR,
8371 						 "no function provided to release variable stats with");
8372 			}
8373 			else
8374 			{
8375 				vardata.statsTuple =
8376 					SearchSysCache3(STATRELATTINH,
8377 									ObjectIdGetDatum(rte->relid),
8378 									Int16GetDatum(attnum),
8379 									BoolGetDatum(false));
8380 				vardata.freefunc = ReleaseSysCache;
8381 			}
8382 		}
8383 		else
8384 		{
8385 			/*
8386 			 * Looks like we've found an expression column in the index. Let's
8387 			 * see if there's any stats for it.
8388 			 */
8389 
8390 			/* get the attnum from the 0-based index. */
8391 			attnum = qinfo->indexcol + 1;
8392 
8393 			if (get_index_stats_hook &&
8394 				(*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
8395 			{
8396 				/*
8397 				 * The hook took control of acquiring a stats tuple.  If it
8398 				 * did supply a tuple, it'd better have supplied a freefunc.
8399 				 */
8400 				if (HeapTupleIsValid(vardata.statsTuple) &&
8401 					!vardata.freefunc)
8402 					elog(ERROR, "no function provided to release variable stats with");
8403 			}
8404 			else
8405 			{
8406 				vardata.statsTuple = SearchSysCache3(STATRELATTINH,
8407 													 ObjectIdGetDatum(index->indexoid),
8408 													 Int16GetDatum(attnum),
8409 													 BoolGetDatum(false));
8410 				vardata.freefunc = ReleaseSysCache;
8411 			}
8412 		}
8413 
8414 		if (HeapTupleIsValid(vardata.statsTuple))
8415 		{
8416 			AttStatsSlot sslot;
8417 
8418 			if (get_attstatsslot(&sslot, vardata.statsTuple,
8419 								 STATISTIC_KIND_CORRELATION, InvalidOid,
8420 								 ATTSTATSSLOT_NUMBERS))
8421 			{
8422 				double		varCorrelation = 0.0;
8423 
8424 				if (sslot.nnumbers > 0)
8425 					varCorrelation = Abs(sslot.numbers[0]);
8426 
8427 				if (varCorrelation > *indexCorrelation)
8428 					*indexCorrelation = varCorrelation;
8429 
8430 				free_attstatsslot(&sslot);
8431 			}
8432 		}
8433 
8434 		ReleaseVariableStats(vardata);
8435 	}
8436 
8437 	qualSelectivity = clauselist_selectivity(root, indexQuals,
8438 											 baserel->relid,
8439 											 JOIN_INNER, NULL);
8440 
8441 	/*
8442 	 * Now calculate the minimum possible ranges we could match with if all of
8443 	 * the rows were in the perfect order in the table's heap.
8444 	 */
8445 	minimalRanges = ceil(indexRanges * qualSelectivity);
8446 
8447 	/*
8448 	 * Now estimate the number of ranges that we'll touch by using the
8449 	 * indexCorrelation from the stats. Careful not to divide by zero (note
8450 	 * we're using the absolute value of the correlation).
8451 	 */
8452 	if (*indexCorrelation < 1.0e-10)
8453 		estimatedRanges = indexRanges;
8454 	else
8455 		estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
8456 
8457 	/* we expect to visit this portion of the table */
8458 	selec = estimatedRanges / indexRanges;
8459 
8460 	CLAMP_PROBABILITY(selec);
8461 
8462 	*indexSelectivity = selec;
8463 
8464 	/*
8465 	 * Compute the index qual costs, much as in genericcostestimate, to add to
8466 	 * the index costs.
8467 	 */
8468 	qual_arg_cost = other_operands_eval_cost(root, qinfos) +
8469 		orderby_operands_eval_cost(root, path);
8470 
8471 	/*
8472 	 * Compute the startup cost as the cost to read the whole revmap
8473 	 * sequentially, including the cost to execute the index quals.
8474 	 */
8475 	*indexStartupCost =
8476 		spc_seq_page_cost * statsData.revmapNumPages * loop_count;
8477 	*indexStartupCost += qual_arg_cost;
8478 
8479 	/*
8480 	 * To read a BRIN index there might be a bit of back and forth over
8481 	 * regular pages, as revmap might point to them out of sequential order;
8482 	 * calculate the total cost as reading the whole index in random order.
8483 	 */
8484 	*indexTotalCost = *indexStartupCost +
8485 		spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
8486 
8487 	/*
8488 	 * Charge a small amount per range tuple which we expect to match to. This
8489 	 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
8490 	 * will set a bit for each page in the range when we find a matching
8491 	 * range, so we must multiply the charge by the number of pages in the
8492 	 * range.
8493 	 */
8494 	*indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
8495 		statsData.pagesPerRange;
8496 
8497 	*indexPages = index->pages;
8498 }
8499