1 /*-------------------------------------------------------------------------
2  *
3  * selfuncs.c
4  *	  Selectivity functions and index cost estimation functions for
5  *	  standard operators and index access methods.
6  *
7  *	  Selectivity routines are registered in the pg_operator catalog
8  *	  in the "oprrest" and "oprjoin" attributes.
9  *
10  *	  Index cost functions are located via the index AM's API struct,
11  *	  which is obtained from the handler function registered in pg_am.
12  *
13  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
14  * Portions Copyright (c) 1994, Regents of the University of California
15  *
16  *
17  * IDENTIFICATION
18  *	  src/backend/utils/adt/selfuncs.c
19  *
20  *-------------------------------------------------------------------------
21  */
22 
23 /*----------
24  * Operator selectivity estimation functions are called to estimate the
25  * selectivity of WHERE clauses whose top-level operator is their operator.
26  * We divide the problem into two cases:
27  *		Restriction clause estimation: the clause involves vars of just
28  *			one relation.
29  *		Join clause estimation: the clause involves vars of multiple rels.
30  * Join selectivity estimation is far more difficult and usually less accurate
31  * than restriction estimation.
32  *
33  * When dealing with the inner scan of a nestloop join, we consider the
34  * join's joinclauses as restriction clauses for the inner relation, and
35  * treat vars of the outer relation as parameters (a/k/a constants of unknown
36  * values).  So, restriction estimators need to be able to accept an argument
37  * telling which relation is to be treated as the variable.
38  *
39  * The call convention for a restriction estimator (oprrest function) is
40  *
41  *		Selectivity oprrest (PlannerInfo *root,
42  *							 Oid operator,
43  *							 List *args,
44  *							 int varRelid);
45  *
46  * root: general information about the query (rtable and RelOptInfo lists
47  * are particularly important for the estimator).
48  * operator: OID of the specific operator in question.
49  * args: argument list from the operator clause.
50  * varRelid: if not zero, the relid (rtable index) of the relation to
51  * be treated as the variable relation.  May be zero if the args list
52  * is known to contain vars of only one relation.
53  *
54  * This is represented at the SQL level (in pg_proc) as
55  *
56  *		float8 oprrest (internal, oid, internal, int4);
57  *
58  * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59  * of the relation that are expected to produce a TRUE result for the
60  * given operator.
61  *
62  * The call convention for a join estimator (oprjoin function) is similar
63  * except that varRelid is not needed, and instead join information is
64  * supplied:
65  *
66  *		Selectivity oprjoin (PlannerInfo *root,
67  *							 Oid operator,
68  *							 List *args,
69  *							 JoinType jointype,
70  *							 SpecialJoinInfo *sjinfo);
71  *
72  *		float8 oprjoin (internal, oid, internal, int2, internal);
73  *
74  * (Before Postgres 8.4, join estimators had only the first four of these
75  * parameters.  That signature is still allowed, but deprecated.)  The
76  * relationship between jointype and sjinfo is explained in the comments for
77  * clause_selectivity() --- the short version is that jointype is usually
78  * best ignored in favor of examining sjinfo.
79  *
80  * Join selectivity for regular inner and outer joins is defined as the
81  * fraction (0 to 1) of the cross product of the relations that is expected
82  * to produce a TRUE result for the given operator.  For both semi and anti
83  * joins, however, the selectivity is defined as the fraction of the left-hand
84  * side relation's rows that are expected to have a match (ie, at least one
85  * row with a TRUE result) in the right-hand side.
86  *
87  * For both oprrest and oprjoin functions, the operator's input collation OID
88  * (if any) is passed using the standard fmgr mechanism, so that the estimator
89  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
90  * statistics in pg_statistic are currently built using the relevant column's
91  * collation.
92  *----------
93  */
94 
95 #include "postgres.h"
96 
97 #include <ctype.h>
98 #include <math.h>
99 
100 #include "access/brin.h"
101 #include "access/brin_page.h"
102 #include "access/gin.h"
103 #include "access/table.h"
104 #include "access/tableam.h"
105 #include "access/visibilitymap.h"
106 #include "catalog/pg_am.h"
107 #include "catalog/pg_collation.h"
108 #include "catalog/pg_operator.h"
109 #include "catalog/pg_statistic.h"
110 #include "catalog/pg_statistic_ext.h"
111 #include "executor/nodeAgg.h"
112 #include "miscadmin.h"
113 #include "nodes/makefuncs.h"
114 #include "nodes/nodeFuncs.h"
115 #include "optimizer/clauses.h"
116 #include "optimizer/cost.h"
117 #include "optimizer/optimizer.h"
118 #include "optimizer/pathnode.h"
119 #include "optimizer/paths.h"
120 #include "optimizer/plancat.h"
121 #include "parser/parse_clause.h"
122 #include "parser/parsetree.h"
123 #include "statistics/statistics.h"
124 #include "storage/bufmgr.h"
125 #include "utils/acl.h"
126 #include "utils/builtins.h"
127 #include "utils/date.h"
128 #include "utils/datum.h"
129 #include "utils/fmgroids.h"
130 #include "utils/index_selfuncs.h"
131 #include "utils/lsyscache.h"
132 #include "utils/memutils.h"
133 #include "utils/pg_locale.h"
134 #include "utils/rel.h"
135 #include "utils/selfuncs.h"
136 #include "utils/snapmgr.h"
137 #include "utils/spccache.h"
138 #include "utils/syscache.h"
139 #include "utils/timestamp.h"
140 #include "utils/typcache.h"
141 
142 
143 /* source-code-compatibility hacks for pull_varnos() API change */
144 #define pull_varnos(a,b) pull_varnos_new(a,b)
145 #define NumRelids(a,b) NumRelids_new(a,b)
146 
147 /* Hooks for plugins to get control when we ask for stats */
148 get_relation_stats_hook_type get_relation_stats_hook = NULL;
149 get_index_stats_hook_type get_index_stats_hook = NULL;
150 
151 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
152 static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
153 							  VariableStatData *vardata1, VariableStatData *vardata2,
154 							  double nd1, double nd2,
155 							  bool isdefault1, bool isdefault2,
156 							  AttStatsSlot *sslot1, AttStatsSlot *sslot2,
157 							  Form_pg_statistic stats1, Form_pg_statistic stats2,
158 							  bool have_mcvs1, bool have_mcvs2);
159 static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
160 							 VariableStatData *vardata1, VariableStatData *vardata2,
161 							 double nd1, double nd2,
162 							 bool isdefault1, bool isdefault2,
163 							 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
164 							 Form_pg_statistic stats1, Form_pg_statistic stats2,
165 							 bool have_mcvs1, bool have_mcvs2,
166 							 RelOptInfo *inner_rel);
167 static bool estimate_multivariate_ndistinct(PlannerInfo *root,
168 											RelOptInfo *rel, List **varinfos, double *ndistinct);
169 static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
170 							  double *scaledvalue,
171 							  Datum lobound, Datum hibound, Oid boundstypid,
172 							  double *scaledlobound, double *scaledhibound);
173 static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
174 static void convert_string_to_scalar(char *value,
175 									 double *scaledvalue,
176 									 char *lobound,
177 									 double *scaledlobound,
178 									 char *hibound,
179 									 double *scaledhibound);
180 static void convert_bytea_to_scalar(Datum value,
181 									double *scaledvalue,
182 									Datum lobound,
183 									double *scaledlobound,
184 									Datum hibound,
185 									double *scaledhibound);
186 static double convert_one_string_to_scalar(char *value,
187 										   int rangelo, int rangehi);
188 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
189 										  int rangelo, int rangehi);
190 static char *convert_string_datum(Datum value, Oid typid, Oid collid,
191 								  bool *failure);
192 static double convert_timevalue_to_scalar(Datum value, Oid typid,
193 										  bool *failure);
194 static void examine_simple_variable(PlannerInfo *root, Var *var,
195 									VariableStatData *vardata);
196 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
197 							   Oid sortop, Oid collation,
198 							   Datum *min, Datum *max);
199 static void get_stats_slot_range(AttStatsSlot *sslot,
200 								 Oid opfuncoid, FmgrInfo *opproc,
201 								 Oid collation, int16 typLen, bool typByVal,
202 								 Datum *min, Datum *max, bool *p_have_data);
203 static bool get_actual_variable_range(PlannerInfo *root,
204 									  VariableStatData *vardata,
205 									  Oid sortop, Oid collation,
206 									  Datum *min, Datum *max);
207 static bool get_actual_variable_endpoint(Relation heapRel,
208 										 Relation indexRel,
209 										 ScanDirection indexscandir,
210 										 ScanKey scankeys,
211 										 int16 typLen,
212 										 bool typByVal,
213 										 TupleTableSlot *tableslot,
214 										 MemoryContext outercontext,
215 										 Datum *endpointDatum);
216 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
217 
218 
219 /*
220  *		eqsel			- Selectivity of "=" for any data types.
221  *
222  * Note: this routine is also used to estimate selectivity for some
223  * operators that are not "=" but have comparable selectivity behavior,
224  * such as "~=" (geometric approximate-match).  Even for "=", we must
225  * keep in mind that the left and right datatypes may differ.
226  */
227 Datum
228 eqsel(PG_FUNCTION_ARGS)
229 {
230 	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
231 }
232 
233 /*
234  * Common code for eqsel() and neqsel()
235  */
236 static double
237 eqsel_internal(PG_FUNCTION_ARGS, bool negate)
238 {
239 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
240 	Oid			operator = PG_GETARG_OID(1);
241 	List	   *args = (List *) PG_GETARG_POINTER(2);
242 	int			varRelid = PG_GETARG_INT32(3);
243 	Oid			collation = PG_GET_COLLATION();
244 	VariableStatData vardata;
245 	Node	   *other;
246 	bool		varonleft;
247 	double		selec;
248 
249 	/*
250 	 * When asked about <>, we do the estimation using the corresponding =
251 	 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
252 	 */
253 	if (negate)
254 	{
255 		operator = get_negator(operator);
256 		if (!OidIsValid(operator))
257 		{
258 			/* Use default selectivity (should we raise an error instead?) */
259 			return 1.0 - DEFAULT_EQ_SEL;
260 		}
261 	}
262 
263 	/*
264 	 * If expression is not variable = something or something = variable, then
265 	 * punt and return a default estimate.
266 	 */
267 	if (!get_restriction_variable(root, args, varRelid,
268 								  &vardata, &other, &varonleft))
269 		return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
270 
271 	/*
272 	 * We can do a lot better if the something is a constant.  (Note: the
273 	 * Const might result from estimation rather than being a simple constant
274 	 * in the query.)
275 	 */
276 	if (IsA(other, Const))
277 		selec = var_eq_const(&vardata, operator, collation,
278 							 ((Const *) other)->constvalue,
279 							 ((Const *) other)->constisnull,
280 							 varonleft, negate);
281 	else
282 		selec = var_eq_non_const(&vardata, operator, collation, other,
283 								 varonleft, negate);
284 
285 	ReleaseVariableStats(vardata);
286 
287 	return selec;
288 }
289 
290 /*
291  * var_eq_const --- eqsel for var = const case
292  *
293  * This is exported so that some other estimation functions can use it.
294  */
295 double
296 var_eq_const(VariableStatData *vardata, Oid operator, Oid collation,
297 			 Datum constval, bool constisnull,
298 			 bool varonleft, bool negate)
299 {
300 	double		selec;
301 	double		nullfrac = 0.0;
302 	bool		isdefault;
303 	Oid			opfuncoid;
304 
305 	/*
306 	 * If the constant is NULL, assume operator is strict and return zero, ie,
307 	 * operator will never return TRUE.  (It's zero even for a negator op.)
308 	 */
309 	if (constisnull)
310 		return 0.0;
311 
312 	/*
313 	 * Grab the nullfrac for use below.  Note we allow use of nullfrac
314 	 * regardless of security check.
315 	 */
316 	if (HeapTupleIsValid(vardata->statsTuple))
317 	{
318 		Form_pg_statistic stats;
319 
320 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
321 		nullfrac = stats->stanullfrac;
322 	}
323 
324 	/*
325 	 * If we matched the var to a unique index or DISTINCT clause, assume
326 	 * there is exactly one match regardless of anything else.  (This is
327 	 * slightly bogus, since the index or clause's equality operator might be
328 	 * different from ours, but it's much more likely to be right than
329 	 * ignoring the information.)
330 	 */
331 	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
332 	{
333 		selec = 1.0 / vardata->rel->tuples;
334 	}
335 	else if (HeapTupleIsValid(vardata->statsTuple) &&
336 			 statistic_proc_security_check(vardata,
337 										   (opfuncoid = get_opcode(operator))))
338 	{
339 		AttStatsSlot sslot;
340 		bool		match = false;
341 		int			i;
342 
343 		/*
344 		 * Is the constant "=" to any of the column's most common values?
345 		 * (Although the given operator may not really be "=", we will assume
346 		 * that seeing whether it returns TRUE is an appropriate test.  If you
347 		 * don't like this, maybe you shouldn't be using eqsel for your
348 		 * operator...)
349 		 */
350 		if (get_attstatsslot(&sslot, vardata->statsTuple,
351 							 STATISTIC_KIND_MCV, InvalidOid,
352 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
353 		{
354 			LOCAL_FCINFO(fcinfo, 2);
355 			FmgrInfo	eqproc;
356 
357 			fmgr_info(opfuncoid, &eqproc);
358 
359 			/*
360 			 * Save a few cycles by setting up the fcinfo struct just once.
361 			 * Using FunctionCallInvoke directly also avoids failure if the
362 			 * eqproc returns NULL, though really equality functions should
363 			 * never do that.
364 			 */
365 			InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
366 									 NULL, NULL);
367 			fcinfo->args[0].isnull = false;
368 			fcinfo->args[1].isnull = false;
369 			/* be careful to apply operator right way 'round */
370 			if (varonleft)
371 				fcinfo->args[1].value = constval;
372 			else
373 				fcinfo->args[0].value = constval;
374 
375 			for (i = 0; i < sslot.nvalues; i++)
376 			{
377 				Datum		fresult;
378 
379 				if (varonleft)
380 					fcinfo->args[0].value = sslot.values[i];
381 				else
382 					fcinfo->args[1].value = sslot.values[i];
383 				fcinfo->isnull = false;
384 				fresult = FunctionCallInvoke(fcinfo);
385 				if (!fcinfo->isnull && DatumGetBool(fresult))
386 				{
387 					match = true;
388 					break;
389 				}
390 			}
391 		}
392 		else
393 		{
394 			/* no most-common-value info available */
395 			i = 0;				/* keep compiler quiet */
396 		}
397 
398 		if (match)
399 		{
400 			/*
401 			 * Constant is "=" to this common value.  We know selectivity
402 			 * exactly (or as exactly as ANALYZE could calculate it, anyway).
403 			 */
404 			selec = sslot.numbers[i];
405 		}
406 		else
407 		{
408 			/*
409 			 * Comparison is against a constant that is neither NULL nor any
410 			 * of the common values.  Its selectivity cannot be more than
411 			 * this:
412 			 */
413 			double		sumcommon = 0.0;
414 			double		otherdistinct;
415 
416 			for (i = 0; i < sslot.nnumbers; i++)
417 				sumcommon += sslot.numbers[i];
418 			selec = 1.0 - sumcommon - nullfrac;
419 			CLAMP_PROBABILITY(selec);
420 
421 			/*
422 			 * and in fact it's probably a good deal less. We approximate that
423 			 * all the not-common values share this remaining fraction
424 			 * equally, so we divide by the number of other distinct values.
425 			 */
426 			otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
427 				sslot.nnumbers;
428 			if (otherdistinct > 1)
429 				selec /= otherdistinct;
430 
431 			/*
432 			 * Another cross-check: selectivity shouldn't be estimated as more
433 			 * than the least common "most common value".
434 			 */
435 			if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
436 				selec = sslot.numbers[sslot.nnumbers - 1];
437 		}
438 
439 		free_attstatsslot(&sslot);
440 	}
441 	else
442 	{
443 		/*
444 		 * No ANALYZE stats available, so make a guess using estimated number
445 		 * of distinct values and assuming they are equally common. (The guess
446 		 * is unlikely to be very good, but we do know a few special cases.)
447 		 */
448 		selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
449 	}
450 
451 	/* now adjust if we wanted <> rather than = */
452 	if (negate)
453 		selec = 1.0 - selec - nullfrac;
454 
455 	/* result should be in range, but make sure... */
456 	CLAMP_PROBABILITY(selec);
457 
458 	return selec;
459 }
460 
461 /*
462  * var_eq_non_const --- eqsel for var = something-other-than-const case
463  *
464  * This is exported so that some other estimation functions can use it.
465  */
466 double
467 var_eq_non_const(VariableStatData *vardata, Oid operator, Oid collation,
468 				 Node *other,
469 				 bool varonleft, bool negate)
470 {
471 	double		selec;
472 	double		nullfrac = 0.0;
473 	bool		isdefault;
474 
475 	/*
476 	 * Grab the nullfrac for use below.
477 	 */
478 	if (HeapTupleIsValid(vardata->statsTuple))
479 	{
480 		Form_pg_statistic stats;
481 
482 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
483 		nullfrac = stats->stanullfrac;
484 	}
485 
486 	/*
487 	 * If we matched the var to a unique index or DISTINCT clause, assume
488 	 * there is exactly one match regardless of anything else.  (This is
489 	 * slightly bogus, since the index or clause's equality operator might be
490 	 * different from ours, but it's much more likely to be right than
491 	 * ignoring the information.)
492 	 */
493 	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
494 	{
495 		selec = 1.0 / vardata->rel->tuples;
496 	}
497 	else if (HeapTupleIsValid(vardata->statsTuple))
498 	{
499 		double		ndistinct;
500 		AttStatsSlot sslot;
501 
502 		/*
503 		 * Search is for a value that we do not know a priori, but we will
504 		 * assume it is not NULL.  Estimate the selectivity as non-null
505 		 * fraction divided by number of distinct values, so that we get a
506 		 * result averaged over all possible values whether common or
507 		 * uncommon.  (Essentially, we are assuming that the not-yet-known
508 		 * comparison value is equally likely to be any of the possible
509 		 * values, regardless of their frequency in the table.  Is that a good
510 		 * idea?)
511 		 */
512 		selec = 1.0 - nullfrac;
513 		ndistinct = get_variable_numdistinct(vardata, &isdefault);
514 		if (ndistinct > 1)
515 			selec /= ndistinct;
516 
517 		/*
518 		 * Cross-check: selectivity should never be estimated as more than the
519 		 * most common value's.
520 		 */
521 		if (get_attstatsslot(&sslot, vardata->statsTuple,
522 							 STATISTIC_KIND_MCV, InvalidOid,
523 							 ATTSTATSSLOT_NUMBERS))
524 		{
525 			if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
526 				selec = sslot.numbers[0];
527 			free_attstatsslot(&sslot);
528 		}
529 	}
530 	else
531 	{
532 		/*
533 		 * No ANALYZE stats available, so make a guess using estimated number
534 		 * of distinct values and assuming they are equally common. (The guess
535 		 * is unlikely to be very good, but we do know a few special cases.)
536 		 */
537 		selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
538 	}
539 
540 	/* now adjust if we wanted <> rather than = */
541 	if (negate)
542 		selec = 1.0 - selec - nullfrac;
543 
544 	/* result should be in range, but make sure... */
545 	CLAMP_PROBABILITY(selec);
546 
547 	return selec;
548 }
549 
550 /*
551  *		neqsel			- Selectivity of "!=" for any data types.
552  *
553  * This routine is also used for some operators that are not "!="
554  * but have comparable selectivity behavior.  See above comments
555  * for eqsel().
556  */
557 Datum
558 neqsel(PG_FUNCTION_ARGS)
559 {
560 	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
561 }
562 
563 /*
564  *	scalarineqsel		- Selectivity of "<", "<=", ">", ">=" for scalars.
565  *
566  * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
567  * The isgt and iseq flags distinguish which of the four cases apply.
568  *
569  * The caller has commuted the clause, if necessary, so that we can treat
570  * the variable as being on the left.  The caller must also make sure that
571  * the other side of the clause is a non-null Const, and dissect that into
572  * a value and datatype.  (This definition simplifies some callers that
573  * want to estimate against a computed value instead of a Const node.)
574  *
575  * This routine works for any datatype (or pair of datatypes) known to
576  * convert_to_scalar().  If it is applied to some other datatype,
577  * it will return an approximate estimate based on assuming that the constant
578  * value falls in the middle of the bin identified by binary search.
579  */
580 static double
581 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
582 			  Oid collation,
583 			  VariableStatData *vardata, Datum constval, Oid consttype)
584 {
585 	Form_pg_statistic stats;
586 	FmgrInfo	opproc;
587 	double		mcv_selec,
588 				hist_selec,
589 				sumcommon;
590 	double		selec;
591 
592 	if (!HeapTupleIsValid(vardata->statsTuple))
593 	{
594 		/*
595 		 * No stats are available.  Typically this means we have to fall back
596 		 * on the default estimate; but if the variable is CTID then we can
597 		 * make an estimate based on comparing the constant to the table size.
598 		 */
599 		if (vardata->var && IsA(vardata->var, Var) &&
600 			((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
601 		{
602 			ItemPointer itemptr;
603 			double		block;
604 			double		density;
605 
606 			/*
607 			 * If the relation's empty, we're going to include all of it.
608 			 * (This is mostly to avoid divide-by-zero below.)
609 			 */
610 			if (vardata->rel->pages == 0)
611 				return 1.0;
612 
613 			itemptr = (ItemPointer) DatumGetPointer(constval);
614 			block = ItemPointerGetBlockNumberNoCheck(itemptr);
615 
616 			/*
617 			 * Determine the average number of tuples per page (density).
618 			 *
619 			 * Since the last page will, on average, be only half full, we can
620 			 * estimate it to have half as many tuples as earlier pages.  So
621 			 * give it half the weight of a regular page.
622 			 */
623 			density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
624 
625 			/* If target is the last page, use half the density. */
626 			if (block >= vardata->rel->pages - 1)
627 				density *= 0.5;
628 
629 			/*
630 			 * Using the average tuples per page, calculate how far into the
631 			 * page the itemptr is likely to be and adjust block accordingly,
632 			 * by adding that fraction of a whole block (but never more than a
633 			 * whole block, no matter how high the itemptr's offset is).  Here
634 			 * we are ignoring the possibility of dead-tuple line pointers,
635 			 * which is fairly bogus, but we lack the info to do better.
636 			 */
637 			if (density > 0.0)
638 			{
639 				OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr);
640 
641 				block += Min(offset / density, 1.0);
642 			}
643 
644 			/*
645 			 * Convert relative block number to selectivity.  Again, the last
646 			 * page has only half weight.
647 			 */
648 			selec = block / (vardata->rel->pages - 0.5);
649 
650 			/*
651 			 * The calculation so far gave us a selectivity for the "<=" case.
652 			 * We'll have one less tuple for "<" and one additional tuple for
653 			 * ">=", the latter of which we'll reverse the selectivity for
654 			 * below, so we can simply subtract one tuple for both cases.  The
655 			 * cases that need this adjustment can be identified by iseq being
656 			 * equal to isgt.
657 			 */
658 			if (iseq == isgt && vardata->rel->tuples >= 1.0)
659 				selec -= (1.0 / vardata->rel->tuples);
660 
661 			/* Finally, reverse the selectivity for the ">", ">=" cases. */
662 			if (isgt)
663 				selec = 1.0 - selec;
664 
665 			CLAMP_PROBABILITY(selec);
666 			return selec;
667 		}
668 
669 		/* no stats available, so default result */
670 		return DEFAULT_INEQ_SEL;
671 	}
672 	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
673 
674 	fmgr_info(get_opcode(operator), &opproc);
675 
676 	/*
677 	 * If we have most-common-values info, add up the fractions of the MCV
678 	 * entries that satisfy MCV OP CONST.  These fractions contribute directly
679 	 * to the result selectivity.  Also add up the total fraction represented
680 	 * by MCV entries.
681 	 */
682 	mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true,
683 								&sumcommon);
684 
685 	/*
686 	 * If there is a histogram, determine which bin the constant falls in, and
687 	 * compute the resulting contribution to selectivity.
688 	 */
689 	hist_selec = ineq_histogram_selectivity(root, vardata,
690 											operator, &opproc, isgt, iseq,
691 											collation,
692 											constval, consttype);
693 
694 	/*
695 	 * Now merge the results from the MCV and histogram calculations,
696 	 * realizing that the histogram covers only the non-null values that are
697 	 * not listed in MCV.
698 	 */
699 	selec = 1.0 - stats->stanullfrac - sumcommon;
700 
701 	if (hist_selec >= 0.0)
702 		selec *= hist_selec;
703 	else
704 	{
705 		/*
706 		 * If no histogram but there are values not accounted for by MCV,
707 		 * arbitrarily assume half of them will match.
708 		 */
709 		selec *= 0.5;
710 	}
711 
712 	selec += mcv_selec;
713 
714 	/* result should be in range, but make sure... */
715 	CLAMP_PROBABILITY(selec);
716 
717 	return selec;
718 }
719 
720 /*
721  *	mcv_selectivity			- Examine the MCV list for selectivity estimates
722  *
723  * Determine the fraction of the variable's MCV population that satisfies
724  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.  Also
725  * compute the fraction of the total column population represented by the MCV
726  * list.  This code will work for any boolean-returning predicate operator.
727  *
728  * The function result is the MCV selectivity, and the fraction of the
729  * total population is returned into *sumcommonp.  Zeroes are returned
730  * if there is no MCV list.
731  */
732 double
733 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
734 				Datum constval, bool varonleft,
735 				double *sumcommonp)
736 {
737 	double		mcv_selec,
738 				sumcommon;
739 	AttStatsSlot sslot;
740 	int			i;
741 
742 	mcv_selec = 0.0;
743 	sumcommon = 0.0;
744 
745 	if (HeapTupleIsValid(vardata->statsTuple) &&
746 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
747 		get_attstatsslot(&sslot, vardata->statsTuple,
748 						 STATISTIC_KIND_MCV, InvalidOid,
749 						 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
750 	{
751 		LOCAL_FCINFO(fcinfo, 2);
752 
753 		/*
754 		 * We invoke the opproc "by hand" so that we won't fail on NULL
755 		 * results.  Such cases won't arise for normal comparison functions,
756 		 * but generic_restriction_selectivity could perhaps be used with
757 		 * operators that can return NULL.  A small side benefit is to not
758 		 * need to re-initialize the fcinfo struct from scratch each time.
759 		 */
760 		InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
761 								 NULL, NULL);
762 		fcinfo->args[0].isnull = false;
763 		fcinfo->args[1].isnull = false;
764 		/* be careful to apply operator right way 'round */
765 		if (varonleft)
766 			fcinfo->args[1].value = constval;
767 		else
768 			fcinfo->args[0].value = constval;
769 
770 		for (i = 0; i < sslot.nvalues; i++)
771 		{
772 			Datum		fresult;
773 
774 			if (varonleft)
775 				fcinfo->args[0].value = sslot.values[i];
776 			else
777 				fcinfo->args[1].value = sslot.values[i];
778 			fcinfo->isnull = false;
779 			fresult = FunctionCallInvoke(fcinfo);
780 			if (!fcinfo->isnull && DatumGetBool(fresult))
781 				mcv_selec += sslot.numbers[i];
782 			sumcommon += sslot.numbers[i];
783 		}
784 		free_attstatsslot(&sslot);
785 	}
786 
787 	*sumcommonp = sumcommon;
788 	return mcv_selec;
789 }
790 
791 /*
792  *	histogram_selectivity	- Examine the histogram for selectivity estimates
793  *
794  * Determine the fraction of the variable's histogram entries that satisfy
795  * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
796  *
797  * This code will work for any boolean-returning predicate operator, whether
798  * or not it has anything to do with the histogram sort operator.  We are
799  * essentially using the histogram just as a representative sample.  However,
800  * small histograms are unlikely to be all that representative, so the caller
801  * should be prepared to fall back on some other estimation approach when the
802  * histogram is missing or very small.  It may also be prudent to combine this
803  * approach with another one when the histogram is small.
804  *
805  * If the actual histogram size is not at least min_hist_size, we won't bother
806  * to do the calculation at all.  Also, if the n_skip parameter is > 0, we
807  * ignore the first and last n_skip histogram elements, on the grounds that
808  * they are outliers and hence not very representative.  Typical values for
809  * these parameters are 10 and 1.
810  *
811  * The function result is the selectivity, or -1 if there is no histogram
812  * or it's smaller than min_hist_size.
813  *
814  * The output parameter *hist_size receives the actual histogram size,
815  * or zero if no histogram.  Callers may use this number to decide how
816  * much faith to put in the function result.
817  *
818  * Note that the result disregards both the most-common-values (if any) and
819  * null entries.  The caller is expected to combine this result with
820  * statistics for those portions of the column population.  It may also be
821  * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
822  */
823 double
824 histogram_selectivity(VariableStatData *vardata,
825 					  FmgrInfo *opproc, Oid collation,
826 					  Datum constval, bool varonleft,
827 					  int min_hist_size, int n_skip,
828 					  int *hist_size)
829 {
830 	double		result;
831 	AttStatsSlot sslot;
832 
833 	/* check sanity of parameters */
834 	Assert(n_skip >= 0);
835 	Assert(min_hist_size > 2 * n_skip);
836 
837 	if (HeapTupleIsValid(vardata->statsTuple) &&
838 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
839 		get_attstatsslot(&sslot, vardata->statsTuple,
840 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
841 						 ATTSTATSSLOT_VALUES))
842 	{
843 		*hist_size = sslot.nvalues;
844 		if (sslot.nvalues >= min_hist_size)
845 		{
846 			LOCAL_FCINFO(fcinfo, 2);
847 			int			nmatch = 0;
848 			int			i;
849 
850 			/*
851 			 * We invoke the opproc "by hand" so that we won't fail on NULL
852 			 * results.  Such cases won't arise for normal comparison
853 			 * functions, but generic_restriction_selectivity could perhaps be
854 			 * used with operators that can return NULL.  A small side benefit
855 			 * is to not need to re-initialize the fcinfo struct from scratch
856 			 * each time.
857 			 */
858 			InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
859 									 NULL, NULL);
860 			fcinfo->args[0].isnull = false;
861 			fcinfo->args[1].isnull = false;
862 			/* be careful to apply operator right way 'round */
863 			if (varonleft)
864 				fcinfo->args[1].value = constval;
865 			else
866 				fcinfo->args[0].value = constval;
867 
868 			for (i = n_skip; i < sslot.nvalues - n_skip; i++)
869 			{
870 				Datum		fresult;
871 
872 				if (varonleft)
873 					fcinfo->args[0].value = sslot.values[i];
874 				else
875 					fcinfo->args[1].value = sslot.values[i];
876 				fcinfo->isnull = false;
877 				fresult = FunctionCallInvoke(fcinfo);
878 				if (!fcinfo->isnull && DatumGetBool(fresult))
879 					nmatch++;
880 			}
881 			result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
882 		}
883 		else
884 			result = -1;
885 		free_attstatsslot(&sslot);
886 	}
887 	else
888 	{
889 		*hist_size = 0;
890 		result = -1;
891 	}
892 
893 	return result;
894 }
895 
896 /*
897  *	generic_restriction_selectivity		- Selectivity for almost anything
898  *
899  * This function estimates selectivity for operators that we don't have any
900  * special knowledge about, but are on data types that we collect standard
901  * MCV and/or histogram statistics for.  (Additional assumptions are that
902  * the operator is strict and immutable, or at least stable.)
903  *
904  * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by
905  * applying the operator to each element of the column's MCV and/or histogram
906  * stats, and merging the results using the assumption that the histogram is
907  * a reasonable random sample of the column's non-MCV population.  Note that
908  * if the operator's semantics are related to the histogram ordering, this
909  * might not be such a great assumption; other functions such as
910  * scalarineqsel() are probably a better match in such cases.
911  *
912  * Otherwise, fall back to the default selectivity provided by the caller.
913  */
914 double
915 generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
916 								List *args, int varRelid,
917 								double default_selectivity)
918 {
919 	double		selec;
920 	VariableStatData vardata;
921 	Node	   *other;
922 	bool		varonleft;
923 
924 	/*
925 	 * If expression is not variable OP something or something OP variable,
926 	 * then punt and return the default estimate.
927 	 */
928 	if (!get_restriction_variable(root, args, varRelid,
929 								  &vardata, &other, &varonleft))
930 		return default_selectivity;
931 
932 	/*
933 	 * If the something is a NULL constant, assume operator is strict and
934 	 * return zero, ie, operator will never return TRUE.
935 	 */
936 	if (IsA(other, Const) &&
937 		((Const *) other)->constisnull)
938 	{
939 		ReleaseVariableStats(vardata);
940 		return 0.0;
941 	}
942 
943 	if (IsA(other, Const))
944 	{
945 		/* Variable is being compared to a known non-null constant */
946 		Datum		constval = ((Const *) other)->constvalue;
947 		FmgrInfo	opproc;
948 		double		mcvsum;
949 		double		mcvsel;
950 		double		nullfrac;
951 		int			hist_size;
952 
953 		fmgr_info(get_opcode(oproid), &opproc);
954 
955 		/*
956 		 * Calculate the selectivity for the column's most common values.
957 		 */
958 		mcvsel = mcv_selectivity(&vardata, &opproc, collation,
959 								 constval, varonleft,
960 								 &mcvsum);
961 
962 		/*
963 		 * If the histogram is large enough, see what fraction of it matches
964 		 * the query, and assume that's representative of the non-MCV
965 		 * population.  Otherwise use the default selectivity for the non-MCV
966 		 * population.
967 		 */
968 		selec = histogram_selectivity(&vardata, &opproc, collation,
969 									  constval, varonleft,
970 									  10, 1, &hist_size);
971 		if (selec < 0)
972 		{
973 			/* Nope, fall back on default */
974 			selec = default_selectivity;
975 		}
976 		else if (hist_size < 100)
977 		{
978 			/*
979 			 * For histogram sizes from 10 to 100, we combine the histogram
980 			 * and default selectivities, putting increasingly more trust in
981 			 * the histogram for larger sizes.
982 			 */
983 			double		hist_weight = hist_size / 100.0;
984 
985 			selec = selec * hist_weight +
986 				default_selectivity * (1.0 - hist_weight);
987 		}
988 
989 		/* In any case, don't believe extremely small or large estimates. */
990 		if (selec < 0.0001)
991 			selec = 0.0001;
992 		else if (selec > 0.9999)
993 			selec = 0.9999;
994 
995 		/* Don't forget to account for nulls. */
996 		if (HeapTupleIsValid(vardata.statsTuple))
997 			nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
998 		else
999 			nullfrac = 0.0;
1000 
1001 		/*
1002 		 * Now merge the results from the MCV and histogram calculations,
1003 		 * realizing that the histogram covers only the non-null values that
1004 		 * are not listed in MCV.
1005 		 */
1006 		selec *= 1.0 - nullfrac - mcvsum;
1007 		selec += mcvsel;
1008 	}
1009 	else
1010 	{
1011 		/* Comparison value is not constant, so we can't do anything */
1012 		selec = default_selectivity;
1013 	}
1014 
1015 	ReleaseVariableStats(vardata);
1016 
1017 	/* result should be in range, but make sure... */
1018 	CLAMP_PROBABILITY(selec);
1019 
1020 	return selec;
1021 }
1022 
1023 /*
1024  *	ineq_histogram_selectivity	- Examine the histogram for scalarineqsel
1025  *
1026  * Determine the fraction of the variable's histogram population that
1027  * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
1028  * The isgt and iseq flags distinguish which of the four cases apply.
1029  *
1030  * While opproc could be looked up from the operator OID, common callers
1031  * also need to call it separately, so we make the caller pass both.
1032  *
1033  * Returns -1 if there is no histogram (valid results will always be >= 0).
1034  *
1035  * Note that the result disregards both the most-common-values (if any) and
1036  * null entries.  The caller is expected to combine this result with
1037  * statistics for those portions of the column population.
1038  *
1039  * This is exported so that some other estimation functions can use it.
1040  */
1041 double
1042 ineq_histogram_selectivity(PlannerInfo *root,
1043 						   VariableStatData *vardata,
1044 						   Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq,
1045 						   Oid collation,
1046 						   Datum constval, Oid consttype)
1047 {
1048 	double		hist_selec;
1049 	AttStatsSlot sslot;
1050 
1051 	hist_selec = -1.0;
1052 
1053 	/*
1054 	 * Someday, ANALYZE might store more than one histogram per rel/att,
1055 	 * corresponding to more than one possible sort ordering defined for the
1056 	 * column type.  Right now, we know there is only one, so just grab it and
1057 	 * see if it matches the query.
1058 	 *
1059 	 * Note that we can't use opoid as search argument; the staop appearing in
1060 	 * pg_statistic will be for the relevant '<' operator, but what we have
1061 	 * might be some other inequality operator such as '>='.  (Even if opoid
1062 	 * is a '<' operator, it could be cross-type.)  Hence we must use
1063 	 * comparison_ops_are_compatible() to see if the operators match.
1064 	 */
1065 	if (HeapTupleIsValid(vardata->statsTuple) &&
1066 		statistic_proc_security_check(vardata, opproc->fn_oid) &&
1067 		get_attstatsslot(&sslot, vardata->statsTuple,
1068 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
1069 						 ATTSTATSSLOT_VALUES))
1070 	{
1071 		if (sslot.nvalues > 1 &&
1072 			sslot.stacoll == collation &&
1073 			comparison_ops_are_compatible(sslot.staop, opoid))
1074 		{
1075 			/*
1076 			 * Use binary search to find the desired location, namely the
1077 			 * right end of the histogram bin containing the comparison value,
1078 			 * which is the leftmost entry for which the comparison operator
1079 			 * succeeds (if isgt) or fails (if !isgt).
1080 			 *
1081 			 * In this loop, we pay no attention to whether the operator iseq
1082 			 * or not; that detail will be mopped up below.  (We cannot tell,
1083 			 * anyway, whether the operator thinks the values are equal.)
1084 			 *
1085 			 * If the binary search accesses the first or last histogram
1086 			 * entry, we try to replace that endpoint with the true column min
1087 			 * or max as found by get_actual_variable_range().  This
1088 			 * ameliorates misestimates when the min or max is moving as a
1089 			 * result of changes since the last ANALYZE.  Note that this could
1090 			 * result in effectively including MCVs into the histogram that
1091 			 * weren't there before, but we don't try to correct for that.
1092 			 */
1093 			double		histfrac;
1094 			int			lobound = 0;	/* first possible slot to search */
1095 			int			hibound = sslot.nvalues;	/* last+1 slot to search */
1096 			bool		have_end = false;
1097 
1098 			/*
1099 			 * If there are only two histogram entries, we'll want up-to-date
1100 			 * values for both.  (If there are more than two, we need at most
1101 			 * one of them to be updated, so we deal with that within the
1102 			 * loop.)
1103 			 */
1104 			if (sslot.nvalues == 2)
1105 				have_end = get_actual_variable_range(root,
1106 													 vardata,
1107 													 sslot.staop,
1108 													 collation,
1109 													 &sslot.values[0],
1110 													 &sslot.values[1]);
1111 
1112 			while (lobound < hibound)
1113 			{
1114 				int			probe = (lobound + hibound) / 2;
1115 				bool		ltcmp;
1116 
1117 				/*
1118 				 * If we find ourselves about to compare to the first or last
1119 				 * histogram entry, first try to replace it with the actual
1120 				 * current min or max (unless we already did so above).
1121 				 */
1122 				if (probe == 0 && sslot.nvalues > 2)
1123 					have_end = get_actual_variable_range(root,
1124 														 vardata,
1125 														 sslot.staop,
1126 														 collation,
1127 														 &sslot.values[0],
1128 														 NULL);
1129 				else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
1130 					have_end = get_actual_variable_range(root,
1131 														 vardata,
1132 														 sslot.staop,
1133 														 collation,
1134 														 NULL,
1135 														 &sslot.values[probe]);
1136 
1137 				ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
1138 													   collation,
1139 													   sslot.values[probe],
1140 													   constval));
1141 				if (isgt)
1142 					ltcmp = !ltcmp;
1143 				if (ltcmp)
1144 					lobound = probe + 1;
1145 				else
1146 					hibound = probe;
1147 			}
1148 
1149 			if (lobound <= 0)
1150 			{
1151 				/*
1152 				 * Constant is below lower histogram boundary.  More
1153 				 * precisely, we have found that no entry in the histogram
1154 				 * satisfies the inequality clause (if !isgt) or they all do
1155 				 * (if isgt).  We estimate that that's true of the entire
1156 				 * table, so set histfrac to 0.0 (which we'll flip to 1.0
1157 				 * below, if isgt).
1158 				 */
1159 				histfrac = 0.0;
1160 			}
1161 			else if (lobound >= sslot.nvalues)
1162 			{
1163 				/*
1164 				 * Inverse case: constant is above upper histogram boundary.
1165 				 */
1166 				histfrac = 1.0;
1167 			}
1168 			else
1169 			{
1170 				/* We have values[i-1] <= constant <= values[i]. */
1171 				int			i = lobound;
1172 				double		eq_selec = 0;
1173 				double		val,
1174 							high,
1175 							low;
1176 				double		binfrac;
1177 
1178 				/*
1179 				 * In the cases where we'll need it below, obtain an estimate
1180 				 * of the selectivity of "x = constval".  We use a calculation
1181 				 * similar to what var_eq_const() does for a non-MCV constant,
1182 				 * ie, estimate that all distinct non-MCV values occur equally
1183 				 * often.  But multiplication by "1.0 - sumcommon - nullfrac"
1184 				 * will be done by our caller, so we shouldn't do that here.
1185 				 * Therefore we can't try to clamp the estimate by reference
1186 				 * to the least common MCV; the result would be too small.
1187 				 *
1188 				 * Note: since this is effectively assuming that constval
1189 				 * isn't an MCV, it's logically dubious if constval in fact is
1190 				 * one.  But we have to apply *some* correction for equality,
1191 				 * and anyway we cannot tell if constval is an MCV, since we
1192 				 * don't have a suitable equality operator at hand.
1193 				 */
1194 				if (i == 1 || isgt == iseq)
1195 				{
1196 					double		otherdistinct;
1197 					bool		isdefault;
1198 					AttStatsSlot mcvslot;
1199 
1200 					/* Get estimated number of distinct values */
1201 					otherdistinct = get_variable_numdistinct(vardata,
1202 															 &isdefault);
1203 
1204 					/* Subtract off the number of known MCVs */
1205 					if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1206 										 STATISTIC_KIND_MCV, InvalidOid,
1207 										 ATTSTATSSLOT_NUMBERS))
1208 					{
1209 						otherdistinct -= mcvslot.nnumbers;
1210 						free_attstatsslot(&mcvslot);
1211 					}
1212 
1213 					/* If result doesn't seem sane, leave eq_selec at 0 */
1214 					if (otherdistinct > 1)
1215 						eq_selec = 1.0 / otherdistinct;
1216 				}
1217 
1218 				/*
1219 				 * Convert the constant and the two nearest bin boundary
1220 				 * values to a uniform comparison scale, and do a linear
1221 				 * interpolation within this bin.
1222 				 */
1223 				if (convert_to_scalar(constval, consttype, collation,
1224 									  &val,
1225 									  sslot.values[i - 1], sslot.values[i],
1226 									  vardata->vartype,
1227 									  &low, &high))
1228 				{
1229 					if (high <= low)
1230 					{
1231 						/* cope if bin boundaries appear identical */
1232 						binfrac = 0.5;
1233 					}
1234 					else if (val <= low)
1235 						binfrac = 0.0;
1236 					else if (val >= high)
1237 						binfrac = 1.0;
1238 					else
1239 					{
1240 						binfrac = (val - low) / (high - low);
1241 
1242 						/*
1243 						 * Watch out for the possibility that we got a NaN or
1244 						 * Infinity from the division.  This can happen
1245 						 * despite the previous checks, if for example "low"
1246 						 * is -Infinity.
1247 						 */
1248 						if (isnan(binfrac) ||
1249 							binfrac < 0.0 || binfrac > 1.0)
1250 							binfrac = 0.5;
1251 					}
1252 				}
1253 				else
1254 				{
1255 					/*
1256 					 * Ideally we'd produce an error here, on the grounds that
1257 					 * the given operator shouldn't have scalarXXsel
1258 					 * registered as its selectivity func unless we can deal
1259 					 * with its operand types.  But currently, all manner of
1260 					 * stuff is invoking scalarXXsel, so give a default
1261 					 * estimate until that can be fixed.
1262 					 */
1263 					binfrac = 0.5;
1264 				}
1265 
1266 				/*
1267 				 * Now, compute the overall selectivity across the values
1268 				 * represented by the histogram.  We have i-1 full bins and
1269 				 * binfrac partial bin below the constant.
1270 				 */
1271 				histfrac = (double) (i - 1) + binfrac;
1272 				histfrac /= (double) (sslot.nvalues - 1);
1273 
1274 				/*
1275 				 * At this point, histfrac is an estimate of the fraction of
1276 				 * the population represented by the histogram that satisfies
1277 				 * "x <= constval".  Somewhat remarkably, this statement is
1278 				 * true regardless of which operator we were doing the probes
1279 				 * with, so long as convert_to_scalar() delivers reasonable
1280 				 * results.  If the probe constant is equal to some histogram
1281 				 * entry, we would have considered the bin to the left of that
1282 				 * entry if probing with "<" or ">=", or the bin to the right
1283 				 * if probing with "<=" or ">"; but binfrac would have come
1284 				 * out as 1.0 in the first case and 0.0 in the second, leading
1285 				 * to the same histfrac in either case.  For probe constants
1286 				 * between histogram entries, we find the same bin and get the
1287 				 * same estimate with any operator.
1288 				 *
1289 				 * The fact that the estimate corresponds to "x <= constval"
1290 				 * and not "x < constval" is because of the way that ANALYZE
1291 				 * constructs the histogram: each entry is, effectively, the
1292 				 * rightmost value in its sample bucket.  So selectivity
1293 				 * values that are exact multiples of 1/(histogram_size-1)
1294 				 * should be understood as estimates including a histogram
1295 				 * entry plus everything to its left.
1296 				 *
1297 				 * However, that breaks down for the first histogram entry,
1298 				 * which necessarily is the leftmost value in its sample
1299 				 * bucket.  That means the first histogram bin is slightly
1300 				 * narrower than the rest, by an amount equal to eq_selec.
1301 				 * Another way to say that is that we want "x <= leftmost" to
1302 				 * be estimated as eq_selec not zero.  So, if we're dealing
1303 				 * with the first bin (i==1), rescale to make that true while
1304 				 * adjusting the rest of that bin linearly.
1305 				 */
1306 				if (i == 1)
1307 					histfrac += eq_selec * (1.0 - binfrac);
1308 
1309 				/*
1310 				 * "x <= constval" is good if we want an estimate for "<=" or
1311 				 * ">", but if we are estimating for "<" or ">=", we now need
1312 				 * to decrease the estimate by eq_selec.
1313 				 */
1314 				if (isgt == iseq)
1315 					histfrac -= eq_selec;
1316 			}
1317 
1318 			/*
1319 			 * Now the estimate is finished for "<" and "<=" cases.  If we are
1320 			 * estimating for ">" or ">=", flip it.
1321 			 */
1322 			hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1323 
1324 			/*
1325 			 * The histogram boundaries are only approximate to begin with,
1326 			 * and may well be out of date anyway.  Therefore, don't believe
1327 			 * extremely small or large selectivity estimates --- unless we
1328 			 * got actual current endpoint values from the table, in which
1329 			 * case just do the usual sanity clamp.  Somewhat arbitrarily, we
1330 			 * set the cutoff for other cases at a hundredth of the histogram
1331 			 * resolution.
1332 			 */
1333 			if (have_end)
1334 				CLAMP_PROBABILITY(hist_selec);
1335 			else
1336 			{
1337 				double		cutoff = 0.01 / (double) (sslot.nvalues - 1);
1338 
1339 				if (hist_selec < cutoff)
1340 					hist_selec = cutoff;
1341 				else if (hist_selec > 1.0 - cutoff)
1342 					hist_selec = 1.0 - cutoff;
1343 			}
1344 		}
1345 		else if (sslot.nvalues > 1)
1346 		{
1347 			/*
1348 			 * If we get here, we have a histogram but it's not sorted the way
1349 			 * we want.  Do a brute-force search to see how many of the
1350 			 * entries satisfy the comparison condition, and take that
1351 			 * fraction as our estimate.  (This is identical to the inner loop
1352 			 * of histogram_selectivity; maybe share code?)
1353 			 */
1354 			LOCAL_FCINFO(fcinfo, 2);
1355 			int			nmatch = 0;
1356 
1357 			InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
1358 									 NULL, NULL);
1359 			fcinfo->args[0].isnull = false;
1360 			fcinfo->args[1].isnull = false;
1361 			fcinfo->args[1].value = constval;
1362 			for (int i = 0; i < sslot.nvalues; i++)
1363 			{
1364 				Datum		fresult;
1365 
1366 				fcinfo->args[0].value = sslot.values[i];
1367 				fcinfo->isnull = false;
1368 				fresult = FunctionCallInvoke(fcinfo);
1369 				if (!fcinfo->isnull && DatumGetBool(fresult))
1370 					nmatch++;
1371 			}
1372 			hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
1373 
1374 			/*
1375 			 * As above, clamp to a hundredth of the histogram resolution.
1376 			 * This case is surely even less trustworthy than the normal one,
1377 			 * so we shouldn't believe exact 0 or 1 selectivity.  (Maybe the
1378 			 * clamp should be more restrictive in this case?)
1379 			 */
1380 			{
1381 				double		cutoff = 0.01 / (double) (sslot.nvalues - 1);
1382 
1383 				if (hist_selec < cutoff)
1384 					hist_selec = cutoff;
1385 				else if (hist_selec > 1.0 - cutoff)
1386 					hist_selec = 1.0 - cutoff;
1387 			}
1388 		}
1389 
1390 		free_attstatsslot(&sslot);
1391 	}
1392 
1393 	return hist_selec;
1394 }
1395 
1396 /*
1397  * Common wrapper function for the selectivity estimators that simply
1398  * invoke scalarineqsel().
1399  */
1400 static Datum
1401 scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
1402 {
1403 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1404 	Oid			operator = PG_GETARG_OID(1);
1405 	List	   *args = (List *) PG_GETARG_POINTER(2);
1406 	int			varRelid = PG_GETARG_INT32(3);
1407 	Oid			collation = PG_GET_COLLATION();
1408 	VariableStatData vardata;
1409 	Node	   *other;
1410 	bool		varonleft;
1411 	Datum		constval;
1412 	Oid			consttype;
1413 	double		selec;
1414 
1415 	/*
1416 	 * If expression is not variable op something or something op variable,
1417 	 * then punt and return a default estimate.
1418 	 */
1419 	if (!get_restriction_variable(root, args, varRelid,
1420 								  &vardata, &other, &varonleft))
1421 		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1422 
1423 	/*
1424 	 * Can't do anything useful if the something is not a constant, either.
1425 	 */
1426 	if (!IsA(other, Const))
1427 	{
1428 		ReleaseVariableStats(vardata);
1429 		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1430 	}
1431 
1432 	/*
1433 	 * If the constant is NULL, assume operator is strict and return zero, ie,
1434 	 * operator will never return TRUE.
1435 	 */
1436 	if (((Const *) other)->constisnull)
1437 	{
1438 		ReleaseVariableStats(vardata);
1439 		PG_RETURN_FLOAT8(0.0);
1440 	}
1441 	constval = ((Const *) other)->constvalue;
1442 	consttype = ((Const *) other)->consttype;
1443 
1444 	/*
1445 	 * Force the var to be on the left to simplify logic in scalarineqsel.
1446 	 */
1447 	if (!varonleft)
1448 	{
1449 		operator = get_commutator(operator);
1450 		if (!operator)
1451 		{
1452 			/* Use default selectivity (should we raise an error instead?) */
1453 			ReleaseVariableStats(vardata);
1454 			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1455 		}
1456 		isgt = !isgt;
1457 	}
1458 
1459 	/* The rest of the work is done by scalarineqsel(). */
1460 	selec = scalarineqsel(root, operator, isgt, iseq, collation,
1461 						  &vardata, constval, consttype);
1462 
1463 	ReleaseVariableStats(vardata);
1464 
1465 	PG_RETURN_FLOAT8((float8) selec);
1466 }
1467 
1468 /*
1469  *		scalarltsel		- Selectivity of "<" for scalars.
1470  */
1471 Datum
1472 scalarltsel(PG_FUNCTION_ARGS)
1473 {
1474 	return scalarineqsel_wrapper(fcinfo, false, false);
1475 }
1476 
1477 /*
1478  *		scalarlesel		- Selectivity of "<=" for scalars.
1479  */
1480 Datum
1481 scalarlesel(PG_FUNCTION_ARGS)
1482 {
1483 	return scalarineqsel_wrapper(fcinfo, false, true);
1484 }
1485 
1486 /*
1487  *		scalargtsel		- Selectivity of ">" for scalars.
1488  */
1489 Datum
1490 scalargtsel(PG_FUNCTION_ARGS)
1491 {
1492 	return scalarineqsel_wrapper(fcinfo, true, false);
1493 }
1494 
1495 /*
1496  *		scalargesel		- Selectivity of ">=" for scalars.
1497  */
1498 Datum
1499 scalargesel(PG_FUNCTION_ARGS)
1500 {
1501 	return scalarineqsel_wrapper(fcinfo, true, true);
1502 }
1503 
1504 /*
1505  *		boolvarsel		- Selectivity of Boolean variable.
1506  *
1507  * This can actually be called on any boolean-valued expression.  If it
1508  * involves only Vars of the specified relation, and if there are statistics
1509  * about the Var or expression (the latter is possible if it's indexed) then
1510  * we'll produce a real estimate; otherwise it's just a default.
1511  */
1512 Selectivity
1513 boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1514 {
1515 	VariableStatData vardata;
1516 	double		selec;
1517 
1518 	examine_variable(root, arg, varRelid, &vardata);
1519 	if (HeapTupleIsValid(vardata.statsTuple))
1520 	{
1521 		/*
1522 		 * A boolean variable V is equivalent to the clause V = 't', so we
1523 		 * compute the selectivity as if that is what we have.
1524 		 */
1525 		selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
1526 							 BoolGetDatum(true), false, true, false);
1527 	}
1528 	else
1529 	{
1530 		/* Otherwise, the default estimate is 0.5 */
1531 		selec = 0.5;
1532 	}
1533 	ReleaseVariableStats(vardata);
1534 	return selec;
1535 }
1536 
1537 /*
1538  *		booltestsel		- Selectivity of BooleanTest Node.
1539  */
1540 Selectivity
1541 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1542 			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1543 {
1544 	VariableStatData vardata;
1545 	double		selec;
1546 
1547 	examine_variable(root, arg, varRelid, &vardata);
1548 
1549 	if (HeapTupleIsValid(vardata.statsTuple))
1550 	{
1551 		Form_pg_statistic stats;
1552 		double		freq_null;
1553 		AttStatsSlot sslot;
1554 
1555 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1556 		freq_null = stats->stanullfrac;
1557 
1558 		if (get_attstatsslot(&sslot, vardata.statsTuple,
1559 							 STATISTIC_KIND_MCV, InvalidOid,
1560 							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
1561 			&& sslot.nnumbers > 0)
1562 		{
1563 			double		freq_true;
1564 			double		freq_false;
1565 
1566 			/*
1567 			 * Get first MCV frequency and derive frequency for true.
1568 			 */
1569 			if (DatumGetBool(sslot.values[0]))
1570 				freq_true = sslot.numbers[0];
1571 			else
1572 				freq_true = 1.0 - sslot.numbers[0] - freq_null;
1573 
1574 			/*
1575 			 * Next derive frequency for false. Then use these as appropriate
1576 			 * to derive frequency for each case.
1577 			 */
1578 			freq_false = 1.0 - freq_true - freq_null;
1579 
1580 			switch (booltesttype)
1581 			{
1582 				case IS_UNKNOWN:
1583 					/* select only NULL values */
1584 					selec = freq_null;
1585 					break;
1586 				case IS_NOT_UNKNOWN:
1587 					/* select non-NULL values */
1588 					selec = 1.0 - freq_null;
1589 					break;
1590 				case IS_TRUE:
1591 					/* select only TRUE values */
1592 					selec = freq_true;
1593 					break;
1594 				case IS_NOT_TRUE:
1595 					/* select non-TRUE values */
1596 					selec = 1.0 - freq_true;
1597 					break;
1598 				case IS_FALSE:
1599 					/* select only FALSE values */
1600 					selec = freq_false;
1601 					break;
1602 				case IS_NOT_FALSE:
1603 					/* select non-FALSE values */
1604 					selec = 1.0 - freq_false;
1605 					break;
1606 				default:
1607 					elog(ERROR, "unrecognized booltesttype: %d",
1608 						 (int) booltesttype);
1609 					selec = 0.0;	/* Keep compiler quiet */
1610 					break;
1611 			}
1612 
1613 			free_attstatsslot(&sslot);
1614 		}
1615 		else
1616 		{
1617 			/*
1618 			 * No most-common-value info available. Still have null fraction
1619 			 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1620 			 * for null fraction and assume a 50-50 split of TRUE and FALSE.
1621 			 */
1622 			switch (booltesttype)
1623 			{
1624 				case IS_UNKNOWN:
1625 					/* select only NULL values */
1626 					selec = freq_null;
1627 					break;
1628 				case IS_NOT_UNKNOWN:
1629 					/* select non-NULL values */
1630 					selec = 1.0 - freq_null;
1631 					break;
1632 				case IS_TRUE:
1633 				case IS_FALSE:
1634 					/* Assume we select half of the non-NULL values */
1635 					selec = (1.0 - freq_null) / 2.0;
1636 					break;
1637 				case IS_NOT_TRUE:
1638 				case IS_NOT_FALSE:
1639 					/* Assume we select NULLs plus half of the non-NULLs */
1640 					/* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1641 					selec = (freq_null + 1.0) / 2.0;
1642 					break;
1643 				default:
1644 					elog(ERROR, "unrecognized booltesttype: %d",
1645 						 (int) booltesttype);
1646 					selec = 0.0;	/* Keep compiler quiet */
1647 					break;
1648 			}
1649 		}
1650 	}
1651 	else
1652 	{
1653 		/*
1654 		 * If we can't get variable statistics for the argument, perhaps
1655 		 * clause_selectivity can do something with it.  We ignore the
1656 		 * possibility of a NULL value when using clause_selectivity, and just
1657 		 * assume the value is either TRUE or FALSE.
1658 		 */
1659 		switch (booltesttype)
1660 		{
1661 			case IS_UNKNOWN:
1662 				selec = DEFAULT_UNK_SEL;
1663 				break;
1664 			case IS_NOT_UNKNOWN:
1665 				selec = DEFAULT_NOT_UNK_SEL;
1666 				break;
1667 			case IS_TRUE:
1668 			case IS_NOT_FALSE:
1669 				selec = (double) clause_selectivity(root, arg,
1670 													varRelid,
1671 													jointype, sjinfo);
1672 				break;
1673 			case IS_FALSE:
1674 			case IS_NOT_TRUE:
1675 				selec = 1.0 - (double) clause_selectivity(root, arg,
1676 														  varRelid,
1677 														  jointype, sjinfo);
1678 				break;
1679 			default:
1680 				elog(ERROR, "unrecognized booltesttype: %d",
1681 					 (int) booltesttype);
1682 				selec = 0.0;	/* Keep compiler quiet */
1683 				break;
1684 		}
1685 	}
1686 
1687 	ReleaseVariableStats(vardata);
1688 
1689 	/* result should be in range, but make sure... */
1690 	CLAMP_PROBABILITY(selec);
1691 
1692 	return (Selectivity) selec;
1693 }
1694 
1695 /*
1696  *		nulltestsel		- Selectivity of NullTest Node.
1697  */
1698 Selectivity
1699 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1700 			int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1701 {
1702 	VariableStatData vardata;
1703 	double		selec;
1704 
1705 	examine_variable(root, arg, varRelid, &vardata);
1706 
1707 	if (HeapTupleIsValid(vardata.statsTuple))
1708 	{
1709 		Form_pg_statistic stats;
1710 		double		freq_null;
1711 
1712 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1713 		freq_null = stats->stanullfrac;
1714 
1715 		switch (nulltesttype)
1716 		{
1717 			case IS_NULL:
1718 
1719 				/*
1720 				 * Use freq_null directly.
1721 				 */
1722 				selec = freq_null;
1723 				break;
1724 			case IS_NOT_NULL:
1725 
1726 				/*
1727 				 * Select not unknown (not null) values. Calculate from
1728 				 * freq_null.
1729 				 */
1730 				selec = 1.0 - freq_null;
1731 				break;
1732 			default:
1733 				elog(ERROR, "unrecognized nulltesttype: %d",
1734 					 (int) nulltesttype);
1735 				return (Selectivity) 0; /* keep compiler quiet */
1736 		}
1737 	}
1738 	else if (vardata.var && IsA(vardata.var, Var) &&
1739 			 ((Var *) vardata.var)->varattno < 0)
1740 	{
1741 		/*
1742 		 * There are no stats for system columns, but we know they are never
1743 		 * NULL.
1744 		 */
1745 		selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1746 	}
1747 	else
1748 	{
1749 		/*
1750 		 * No ANALYZE stats available, so make a guess
1751 		 */
1752 		switch (nulltesttype)
1753 		{
1754 			case IS_NULL:
1755 				selec = DEFAULT_UNK_SEL;
1756 				break;
1757 			case IS_NOT_NULL:
1758 				selec = DEFAULT_NOT_UNK_SEL;
1759 				break;
1760 			default:
1761 				elog(ERROR, "unrecognized nulltesttype: %d",
1762 					 (int) nulltesttype);
1763 				return (Selectivity) 0; /* keep compiler quiet */
1764 		}
1765 	}
1766 
1767 	ReleaseVariableStats(vardata);
1768 
1769 	/* result should be in range, but make sure... */
1770 	CLAMP_PROBABILITY(selec);
1771 
1772 	return (Selectivity) selec;
1773 }
1774 
1775 /*
1776  * strip_array_coercion - strip binary-compatible relabeling from an array expr
1777  *
1778  * For array values, the parser normally generates ArrayCoerceExpr conversions,
1779  * but it seems possible that RelabelType might show up.  Also, the planner
1780  * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1781  * so we need to be ready to deal with more than one level.
1782  */
1783 static Node *
1784 strip_array_coercion(Node *node)
1785 {
1786 	for (;;)
1787 	{
1788 		if (node && IsA(node, ArrayCoerceExpr))
1789 		{
1790 			ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
1791 
1792 			/*
1793 			 * If the per-element expression is just a RelabelType on top of
1794 			 * CaseTestExpr, then we know it's a binary-compatible relabeling.
1795 			 */
1796 			if (IsA(acoerce->elemexpr, RelabelType) &&
1797 				IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1798 				node = (Node *) acoerce->arg;
1799 			else
1800 				break;
1801 		}
1802 		else if (node && IsA(node, RelabelType))
1803 		{
1804 			/* We don't really expect this case, but may as well cope */
1805 			node = (Node *) ((RelabelType *) node)->arg;
1806 		}
1807 		else
1808 			break;
1809 	}
1810 	return node;
1811 }
1812 
1813 /*
1814  *		scalararraysel		- Selectivity of ScalarArrayOpExpr Node.
1815  */
1816 Selectivity
1817 scalararraysel(PlannerInfo *root,
1818 			   ScalarArrayOpExpr *clause,
1819 			   bool is_join_clause,
1820 			   int varRelid,
1821 			   JoinType jointype,
1822 			   SpecialJoinInfo *sjinfo)
1823 {
1824 	Oid			operator = clause->opno;
1825 	bool		useOr = clause->useOr;
1826 	bool		isEquality = false;
1827 	bool		isInequality = false;
1828 	Node	   *leftop;
1829 	Node	   *rightop;
1830 	Oid			nominal_element_type;
1831 	Oid			nominal_element_collation;
1832 	TypeCacheEntry *typentry;
1833 	RegProcedure oprsel;
1834 	FmgrInfo	oprselproc;
1835 	Selectivity s1;
1836 	Selectivity s1disjoint;
1837 
1838 	/* First, deconstruct the expression */
1839 	Assert(list_length(clause->args) == 2);
1840 	leftop = (Node *) linitial(clause->args);
1841 	rightop = (Node *) lsecond(clause->args);
1842 
1843 	/* aggressively reduce both sides to constants */
1844 	leftop = estimate_expression_value(root, leftop);
1845 	rightop = estimate_expression_value(root, rightop);
1846 
1847 	/* get nominal (after relabeling) element type of rightop */
1848 	nominal_element_type = get_base_element_type(exprType(rightop));
1849 	if (!OidIsValid(nominal_element_type))
1850 		return (Selectivity) 0.5;	/* probably shouldn't happen */
1851 	/* get nominal collation, too, for generating constants */
1852 	nominal_element_collation = exprCollation(rightop);
1853 
1854 	/* look through any binary-compatible relabeling of rightop */
1855 	rightop = strip_array_coercion(rightop);
1856 
1857 	/*
1858 	 * Detect whether the operator is the default equality or inequality
1859 	 * operator of the array element type.
1860 	 */
1861 	typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1862 	if (OidIsValid(typentry->eq_opr))
1863 	{
1864 		if (operator == typentry->eq_opr)
1865 			isEquality = true;
1866 		else if (get_negator(operator) == typentry->eq_opr)
1867 			isInequality = true;
1868 	}
1869 
1870 	/*
1871 	 * If it is equality or inequality, we might be able to estimate this as a
1872 	 * form of array containment; for instance "const = ANY(column)" can be
1873 	 * treated as "ARRAY[const] <@ column".  scalararraysel_containment tries
1874 	 * that, and returns the selectivity estimate if successful, or -1 if not.
1875 	 */
1876 	if ((isEquality || isInequality) && !is_join_clause)
1877 	{
1878 		s1 = scalararraysel_containment(root, leftop, rightop,
1879 										nominal_element_type,
1880 										isEquality, useOr, varRelid);
1881 		if (s1 >= 0.0)
1882 			return s1;
1883 	}
1884 
1885 	/*
1886 	 * Look up the underlying operator's selectivity estimator. Punt if it
1887 	 * hasn't got one.
1888 	 */
1889 	if (is_join_clause)
1890 		oprsel = get_oprjoin(operator);
1891 	else
1892 		oprsel = get_oprrest(operator);
1893 	if (!oprsel)
1894 		return (Selectivity) 0.5;
1895 	fmgr_info(oprsel, &oprselproc);
1896 
1897 	/*
1898 	 * In the array-containment check above, we must only believe that an
1899 	 * operator is equality or inequality if it is the default btree equality
1900 	 * operator (or its negator) for the element type, since those are the
1901 	 * operators that array containment will use.  But in what follows, we can
1902 	 * be a little laxer, and also believe that any operators using eqsel() or
1903 	 * neqsel() as selectivity estimator act like equality or inequality.
1904 	 */
1905 	if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1906 		isEquality = true;
1907 	else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1908 		isInequality = true;
1909 
1910 	/*
1911 	 * We consider three cases:
1912 	 *
1913 	 * 1. rightop is an Array constant: deconstruct the array, apply the
1914 	 * operator's selectivity function for each array element, and merge the
1915 	 * results in the same way that clausesel.c does for AND/OR combinations.
1916 	 *
1917 	 * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1918 	 * function for each element of the ARRAY[] construct, and merge.
1919 	 *
1920 	 * 3. otherwise, make a guess ...
1921 	 */
1922 	if (rightop && IsA(rightop, Const))
1923 	{
1924 		Datum		arraydatum = ((Const *) rightop)->constvalue;
1925 		bool		arrayisnull = ((Const *) rightop)->constisnull;
1926 		ArrayType  *arrayval;
1927 		int16		elmlen;
1928 		bool		elmbyval;
1929 		char		elmalign;
1930 		int			num_elems;
1931 		Datum	   *elem_values;
1932 		bool	   *elem_nulls;
1933 		int			i;
1934 
1935 		if (arrayisnull)		/* qual can't succeed if null array */
1936 			return (Selectivity) 0.0;
1937 		arrayval = DatumGetArrayTypeP(arraydatum);
1938 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1939 							 &elmlen, &elmbyval, &elmalign);
1940 		deconstruct_array(arrayval,
1941 						  ARR_ELEMTYPE(arrayval),
1942 						  elmlen, elmbyval, elmalign,
1943 						  &elem_values, &elem_nulls, &num_elems);
1944 
1945 		/*
1946 		 * For generic operators, we assume the probability of success is
1947 		 * independent for each array element.  But for "= ANY" or "<> ALL",
1948 		 * if the array elements are distinct (which'd typically be the case)
1949 		 * then the probabilities are disjoint, and we should just sum them.
1950 		 *
1951 		 * If we were being really tense we would try to confirm that the
1952 		 * elements are all distinct, but that would be expensive and it
1953 		 * doesn't seem to be worth the cycles; it would amount to penalizing
1954 		 * well-written queries in favor of poorly-written ones.  However, we
1955 		 * do protect ourselves a little bit by checking whether the
1956 		 * disjointness assumption leads to an impossible (out of range)
1957 		 * probability; if so, we fall back to the normal calculation.
1958 		 */
1959 		s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1960 
1961 		for (i = 0; i < num_elems; i++)
1962 		{
1963 			List	   *args;
1964 			Selectivity s2;
1965 
1966 			args = list_make2(leftop,
1967 							  makeConst(nominal_element_type,
1968 										-1,
1969 										nominal_element_collation,
1970 										elmlen,
1971 										elem_values[i],
1972 										elem_nulls[i],
1973 										elmbyval));
1974 			if (is_join_clause)
1975 				s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1976 													  clause->inputcollid,
1977 													  PointerGetDatum(root),
1978 													  ObjectIdGetDatum(operator),
1979 													  PointerGetDatum(args),
1980 													  Int16GetDatum(jointype),
1981 													  PointerGetDatum(sjinfo)));
1982 			else
1983 				s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1984 													  clause->inputcollid,
1985 													  PointerGetDatum(root),
1986 													  ObjectIdGetDatum(operator),
1987 													  PointerGetDatum(args),
1988 													  Int32GetDatum(varRelid)));
1989 
1990 			if (useOr)
1991 			{
1992 				s1 = s1 + s2 - s1 * s2;
1993 				if (isEquality)
1994 					s1disjoint += s2;
1995 			}
1996 			else
1997 			{
1998 				s1 = s1 * s2;
1999 				if (isInequality)
2000 					s1disjoint += s2 - 1.0;
2001 			}
2002 		}
2003 
2004 		/* accept disjoint-probability estimate if in range */
2005 		if ((useOr ? isEquality : isInequality) &&
2006 			s1disjoint >= 0.0 && s1disjoint <= 1.0)
2007 			s1 = s1disjoint;
2008 	}
2009 	else if (rightop && IsA(rightop, ArrayExpr) &&
2010 			 !((ArrayExpr *) rightop)->multidims)
2011 	{
2012 		ArrayExpr  *arrayexpr = (ArrayExpr *) rightop;
2013 		int16		elmlen;
2014 		bool		elmbyval;
2015 		ListCell   *l;
2016 
2017 		get_typlenbyval(arrayexpr->element_typeid,
2018 						&elmlen, &elmbyval);
2019 
2020 		/*
2021 		 * We use the assumption of disjoint probabilities here too, although
2022 		 * the odds of equal array elements are rather higher if the elements
2023 		 * are not all constants (which they won't be, else constant folding
2024 		 * would have reduced the ArrayExpr to a Const).  In this path it's
2025 		 * critical to have the sanity check on the s1disjoint estimate.
2026 		 */
2027 		s1 = s1disjoint = (useOr ? 0.0 : 1.0);
2028 
2029 		foreach(l, arrayexpr->elements)
2030 		{
2031 			Node	   *elem = (Node *) lfirst(l);
2032 			List	   *args;
2033 			Selectivity s2;
2034 
2035 			/*
2036 			 * Theoretically, if elem isn't of nominal_element_type we should
2037 			 * insert a RelabelType, but it seems unlikely that any operator
2038 			 * estimation function would really care ...
2039 			 */
2040 			args = list_make2(leftop, elem);
2041 			if (is_join_clause)
2042 				s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2043 													  clause->inputcollid,
2044 													  PointerGetDatum(root),
2045 													  ObjectIdGetDatum(operator),
2046 													  PointerGetDatum(args),
2047 													  Int16GetDatum(jointype),
2048 													  PointerGetDatum(sjinfo)));
2049 			else
2050 				s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2051 													  clause->inputcollid,
2052 													  PointerGetDatum(root),
2053 													  ObjectIdGetDatum(operator),
2054 													  PointerGetDatum(args),
2055 													  Int32GetDatum(varRelid)));
2056 
2057 			if (useOr)
2058 			{
2059 				s1 = s1 + s2 - s1 * s2;
2060 				if (isEquality)
2061 					s1disjoint += s2;
2062 			}
2063 			else
2064 			{
2065 				s1 = s1 * s2;
2066 				if (isInequality)
2067 					s1disjoint += s2 - 1.0;
2068 			}
2069 		}
2070 
2071 		/* accept disjoint-probability estimate if in range */
2072 		if ((useOr ? isEquality : isInequality) &&
2073 			s1disjoint >= 0.0 && s1disjoint <= 1.0)
2074 			s1 = s1disjoint;
2075 	}
2076 	else
2077 	{
2078 		CaseTestExpr *dummyexpr;
2079 		List	   *args;
2080 		Selectivity s2;
2081 		int			i;
2082 
2083 		/*
2084 		 * We need a dummy rightop to pass to the operator selectivity
2085 		 * routine.  It can be pretty much anything that doesn't look like a
2086 		 * constant; CaseTestExpr is a convenient choice.
2087 		 */
2088 		dummyexpr = makeNode(CaseTestExpr);
2089 		dummyexpr->typeId = nominal_element_type;
2090 		dummyexpr->typeMod = -1;
2091 		dummyexpr->collation = clause->inputcollid;
2092 		args = list_make2(leftop, dummyexpr);
2093 		if (is_join_clause)
2094 			s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
2095 												  clause->inputcollid,
2096 												  PointerGetDatum(root),
2097 												  ObjectIdGetDatum(operator),
2098 												  PointerGetDatum(args),
2099 												  Int16GetDatum(jointype),
2100 												  PointerGetDatum(sjinfo)));
2101 		else
2102 			s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
2103 												  clause->inputcollid,
2104 												  PointerGetDatum(root),
2105 												  ObjectIdGetDatum(operator),
2106 												  PointerGetDatum(args),
2107 												  Int32GetDatum(varRelid)));
2108 		s1 = useOr ? 0.0 : 1.0;
2109 
2110 		/*
2111 		 * Arbitrarily assume 10 elements in the eventual array value (see
2112 		 * also estimate_array_length).  We don't risk an assumption of
2113 		 * disjoint probabilities here.
2114 		 */
2115 		for (i = 0; i < 10; i++)
2116 		{
2117 			if (useOr)
2118 				s1 = s1 + s2 - s1 * s2;
2119 			else
2120 				s1 = s1 * s2;
2121 		}
2122 	}
2123 
2124 	/* result should be in range, but make sure... */
2125 	CLAMP_PROBABILITY(s1);
2126 
2127 	return s1;
2128 }
2129 
2130 /*
2131  * Estimate number of elements in the array yielded by an expression.
2132  *
2133  * It's important that this agree with scalararraysel.
2134  */
2135 int
2136 estimate_array_length(Node *arrayexpr)
2137 {
2138 	/* look through any binary-compatible relabeling of arrayexpr */
2139 	arrayexpr = strip_array_coercion(arrayexpr);
2140 
2141 	if (arrayexpr && IsA(arrayexpr, Const))
2142 	{
2143 		Datum		arraydatum = ((Const *) arrayexpr)->constvalue;
2144 		bool		arrayisnull = ((Const *) arrayexpr)->constisnull;
2145 		ArrayType  *arrayval;
2146 
2147 		if (arrayisnull)
2148 			return 0;
2149 		arrayval = DatumGetArrayTypeP(arraydatum);
2150 		return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
2151 	}
2152 	else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
2153 			 !((ArrayExpr *) arrayexpr)->multidims)
2154 	{
2155 		return list_length(((ArrayExpr *) arrayexpr)->elements);
2156 	}
2157 	else
2158 	{
2159 		/* default guess --- see also scalararraysel */
2160 		return 10;
2161 	}
2162 }
2163 
2164 /*
2165  *		rowcomparesel		- Selectivity of RowCompareExpr Node.
2166  *
2167  * We estimate RowCompare selectivity by considering just the first (high
2168  * order) columns, which makes it equivalent to an ordinary OpExpr.  While
2169  * this estimate could be refined by considering additional columns, it
2170  * seems unlikely that we could do a lot better without multi-column
2171  * statistics.
2172  */
2173 Selectivity
2174 rowcomparesel(PlannerInfo *root,
2175 			  RowCompareExpr *clause,
2176 			  int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
2177 {
2178 	Selectivity s1;
2179 	Oid			opno = linitial_oid(clause->opnos);
2180 	Oid			inputcollid = linitial_oid(clause->inputcollids);
2181 	List	   *opargs;
2182 	bool		is_join_clause;
2183 
2184 	/* Build equivalent arg list for single operator */
2185 	opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
2186 
2187 	/*
2188 	 * Decide if it's a join clause.  This should match clausesel.c's
2189 	 * treat_as_join_clause(), except that we intentionally consider only the
2190 	 * leading columns and not the rest of the clause.
2191 	 */
2192 	if (varRelid != 0)
2193 	{
2194 		/*
2195 		 * Caller is forcing restriction mode (eg, because we are examining an
2196 		 * inner indexscan qual).
2197 		 */
2198 		is_join_clause = false;
2199 	}
2200 	else if (sjinfo == NULL)
2201 	{
2202 		/*
2203 		 * It must be a restriction clause, since it's being evaluated at a
2204 		 * scan node.
2205 		 */
2206 		is_join_clause = false;
2207 	}
2208 	else
2209 	{
2210 		/*
2211 		 * Otherwise, it's a join if there's more than one relation used.
2212 		 */
2213 		is_join_clause = (NumRelids(root, (Node *) opargs) > 1);
2214 	}
2215 
2216 	if (is_join_clause)
2217 	{
2218 		/* Estimate selectivity for a join clause. */
2219 		s1 = join_selectivity(root, opno,
2220 							  opargs,
2221 							  inputcollid,
2222 							  jointype,
2223 							  sjinfo);
2224 	}
2225 	else
2226 	{
2227 		/* Estimate selectivity for a restriction clause. */
2228 		s1 = restriction_selectivity(root, opno,
2229 									 opargs,
2230 									 inputcollid,
2231 									 varRelid);
2232 	}
2233 
2234 	return s1;
2235 }
2236 
2237 /*
2238  *		eqjoinsel		- Join selectivity of "="
2239  */
2240 Datum
2241 eqjoinsel(PG_FUNCTION_ARGS)
2242 {
2243 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2244 	Oid			operator = PG_GETARG_OID(1);
2245 	List	   *args = (List *) PG_GETARG_POINTER(2);
2246 
2247 #ifdef NOT_USED
2248 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
2249 #endif
2250 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2251 	Oid			collation = PG_GET_COLLATION();
2252 	double		selec;
2253 	double		selec_inner;
2254 	VariableStatData vardata1;
2255 	VariableStatData vardata2;
2256 	double		nd1;
2257 	double		nd2;
2258 	bool		isdefault1;
2259 	bool		isdefault2;
2260 	Oid			opfuncoid;
2261 	AttStatsSlot sslot1;
2262 	AttStatsSlot sslot2;
2263 	Form_pg_statistic stats1 = NULL;
2264 	Form_pg_statistic stats2 = NULL;
2265 	bool		have_mcvs1 = false;
2266 	bool		have_mcvs2 = false;
2267 	bool		join_is_reversed;
2268 	RelOptInfo *inner_rel;
2269 
2270 	get_join_variables(root, args, sjinfo,
2271 					   &vardata1, &vardata2, &join_is_reversed);
2272 
2273 	nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2274 	nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2275 
2276 	opfuncoid = get_opcode(operator);
2277 
2278 	memset(&sslot1, 0, sizeof(sslot1));
2279 	memset(&sslot2, 0, sizeof(sslot2));
2280 
2281 	if (HeapTupleIsValid(vardata1.statsTuple))
2282 	{
2283 		/* note we allow use of nullfrac regardless of security check */
2284 		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2285 		if (statistic_proc_security_check(&vardata1, opfuncoid))
2286 			have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2287 										  STATISTIC_KIND_MCV, InvalidOid,
2288 										  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2289 	}
2290 
2291 	if (HeapTupleIsValid(vardata2.statsTuple))
2292 	{
2293 		/* note we allow use of nullfrac regardless of security check */
2294 		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2295 		if (statistic_proc_security_check(&vardata2, opfuncoid))
2296 			have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2297 										  STATISTIC_KIND_MCV, InvalidOid,
2298 										  ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2299 	}
2300 
2301 	/* We need to compute the inner-join selectivity in all cases */
2302 	selec_inner = eqjoinsel_inner(opfuncoid, collation,
2303 								  &vardata1, &vardata2,
2304 								  nd1, nd2,
2305 								  isdefault1, isdefault2,
2306 								  &sslot1, &sslot2,
2307 								  stats1, stats2,
2308 								  have_mcvs1, have_mcvs2);
2309 
2310 	switch (sjinfo->jointype)
2311 	{
2312 		case JOIN_INNER:
2313 		case JOIN_LEFT:
2314 		case JOIN_FULL:
2315 			selec = selec_inner;
2316 			break;
2317 		case JOIN_SEMI:
2318 		case JOIN_ANTI:
2319 
2320 			/*
2321 			 * Look up the join's inner relation.  min_righthand is sufficient
2322 			 * information because neither SEMI nor ANTI joins permit any
2323 			 * reassociation into or out of their RHS, so the righthand will
2324 			 * always be exactly that set of rels.
2325 			 */
2326 			inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2327 
2328 			if (!join_is_reversed)
2329 				selec = eqjoinsel_semi(opfuncoid, collation,
2330 									   &vardata1, &vardata2,
2331 									   nd1, nd2,
2332 									   isdefault1, isdefault2,
2333 									   &sslot1, &sslot2,
2334 									   stats1, stats2,
2335 									   have_mcvs1, have_mcvs2,
2336 									   inner_rel);
2337 			else
2338 			{
2339 				Oid			commop = get_commutator(operator);
2340 				Oid			commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2341 
2342 				selec = eqjoinsel_semi(commopfuncoid, collation,
2343 									   &vardata2, &vardata1,
2344 									   nd2, nd1,
2345 									   isdefault2, isdefault1,
2346 									   &sslot2, &sslot1,
2347 									   stats2, stats1,
2348 									   have_mcvs2, have_mcvs1,
2349 									   inner_rel);
2350 			}
2351 
2352 			/*
2353 			 * We should never estimate the output of a semijoin to be more
2354 			 * rows than we estimate for an inner join with the same input
2355 			 * rels and join condition; it's obviously impossible for that to
2356 			 * happen.  The former estimate is N1 * Ssemi while the latter is
2357 			 * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner.  Doing
2358 			 * this is worthwhile because of the shakier estimation rules we
2359 			 * use in eqjoinsel_semi, particularly in cases where it has to
2360 			 * punt entirely.
2361 			 */
2362 			selec = Min(selec, inner_rel->rows * selec_inner);
2363 			break;
2364 		default:
2365 			/* other values not expected here */
2366 			elog(ERROR, "unrecognized join type: %d",
2367 				 (int) sjinfo->jointype);
2368 			selec = 0;			/* keep compiler quiet */
2369 			break;
2370 	}
2371 
2372 	free_attstatsslot(&sslot1);
2373 	free_attstatsslot(&sslot2);
2374 
2375 	ReleaseVariableStats(vardata1);
2376 	ReleaseVariableStats(vardata2);
2377 
2378 	CLAMP_PROBABILITY(selec);
2379 
2380 	PG_RETURN_FLOAT8((float8) selec);
2381 }
2382 
2383 /*
2384  * eqjoinsel_inner --- eqjoinsel for normal inner join
2385  *
2386  * We also use this for LEFT/FULL outer joins; it's not presently clear
2387  * that it's worth trying to distinguish them here.
2388  */
2389 static double
2390 eqjoinsel_inner(Oid opfuncoid, Oid collation,
2391 				VariableStatData *vardata1, VariableStatData *vardata2,
2392 				double nd1, double nd2,
2393 				bool isdefault1, bool isdefault2,
2394 				AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2395 				Form_pg_statistic stats1, Form_pg_statistic stats2,
2396 				bool have_mcvs1, bool have_mcvs2)
2397 {
2398 	double		selec;
2399 
2400 	if (have_mcvs1 && have_mcvs2)
2401 	{
2402 		/*
2403 		 * We have most-common-value lists for both relations.  Run through
2404 		 * the lists to see which MCVs actually join to each other with the
2405 		 * given operator.  This allows us to determine the exact join
2406 		 * selectivity for the portion of the relations represented by the MCV
2407 		 * lists.  We still have to estimate for the remaining population, but
2408 		 * in a skewed distribution this gives us a big leg up in accuracy.
2409 		 * For motivation see the analysis in Y. Ioannidis and S.
2410 		 * Christodoulakis, "On the propagation of errors in the size of join
2411 		 * results", Technical Report 1018, Computer Science Dept., University
2412 		 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2413 		 */
2414 		LOCAL_FCINFO(fcinfo, 2);
2415 		FmgrInfo	eqproc;
2416 		bool	   *hasmatch1;
2417 		bool	   *hasmatch2;
2418 		double		nullfrac1 = stats1->stanullfrac;
2419 		double		nullfrac2 = stats2->stanullfrac;
2420 		double		matchprodfreq,
2421 					matchfreq1,
2422 					matchfreq2,
2423 					unmatchfreq1,
2424 					unmatchfreq2,
2425 					otherfreq1,
2426 					otherfreq2,
2427 					totalsel1,
2428 					totalsel2;
2429 		int			i,
2430 					nmatches;
2431 
2432 		fmgr_info(opfuncoid, &eqproc);
2433 
2434 		/*
2435 		 * Save a few cycles by setting up the fcinfo struct just once. Using
2436 		 * FunctionCallInvoke directly also avoids failure if the eqproc
2437 		 * returns NULL, though really equality functions should never do
2438 		 * that.
2439 		 */
2440 		InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2441 								 NULL, NULL);
2442 		fcinfo->args[0].isnull = false;
2443 		fcinfo->args[1].isnull = false;
2444 
2445 		hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2446 		hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2447 
2448 		/*
2449 		 * Note we assume that each MCV will match at most one member of the
2450 		 * other MCV list.  If the operator isn't really equality, there could
2451 		 * be multiple matches --- but we don't look for them, both for speed
2452 		 * and because the math wouldn't add up...
2453 		 */
2454 		matchprodfreq = 0.0;
2455 		nmatches = 0;
2456 		for (i = 0; i < sslot1->nvalues; i++)
2457 		{
2458 			int			j;
2459 
2460 			fcinfo->args[0].value = sslot1->values[i];
2461 
2462 			for (j = 0; j < sslot2->nvalues; j++)
2463 			{
2464 				Datum		fresult;
2465 
2466 				if (hasmatch2[j])
2467 					continue;
2468 				fcinfo->args[1].value = sslot2->values[j];
2469 				fcinfo->isnull = false;
2470 				fresult = FunctionCallInvoke(fcinfo);
2471 				if (!fcinfo->isnull && DatumGetBool(fresult))
2472 				{
2473 					hasmatch1[i] = hasmatch2[j] = true;
2474 					matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2475 					nmatches++;
2476 					break;
2477 				}
2478 			}
2479 		}
2480 		CLAMP_PROBABILITY(matchprodfreq);
2481 		/* Sum up frequencies of matched and unmatched MCVs */
2482 		matchfreq1 = unmatchfreq1 = 0.0;
2483 		for (i = 0; i < sslot1->nvalues; i++)
2484 		{
2485 			if (hasmatch1[i])
2486 				matchfreq1 += sslot1->numbers[i];
2487 			else
2488 				unmatchfreq1 += sslot1->numbers[i];
2489 		}
2490 		CLAMP_PROBABILITY(matchfreq1);
2491 		CLAMP_PROBABILITY(unmatchfreq1);
2492 		matchfreq2 = unmatchfreq2 = 0.0;
2493 		for (i = 0; i < sslot2->nvalues; i++)
2494 		{
2495 			if (hasmatch2[i])
2496 				matchfreq2 += sslot2->numbers[i];
2497 			else
2498 				unmatchfreq2 += sslot2->numbers[i];
2499 		}
2500 		CLAMP_PROBABILITY(matchfreq2);
2501 		CLAMP_PROBABILITY(unmatchfreq2);
2502 		pfree(hasmatch1);
2503 		pfree(hasmatch2);
2504 
2505 		/*
2506 		 * Compute total frequency of non-null values that are not in the MCV
2507 		 * lists.
2508 		 */
2509 		otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2510 		otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2511 		CLAMP_PROBABILITY(otherfreq1);
2512 		CLAMP_PROBABILITY(otherfreq2);
2513 
2514 		/*
2515 		 * We can estimate the total selectivity from the point of view of
2516 		 * relation 1 as: the known selectivity for matched MCVs, plus
2517 		 * unmatched MCVs that are assumed to match against random members of
2518 		 * relation 2's non-MCV population, plus non-MCV values that are
2519 		 * assumed to match against random members of relation 2's unmatched
2520 		 * MCVs plus non-MCV values.
2521 		 */
2522 		totalsel1 = matchprodfreq;
2523 		if (nd2 > sslot2->nvalues)
2524 			totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2525 		if (nd2 > nmatches)
2526 			totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2527 				(nd2 - nmatches);
2528 		/* Same estimate from the point of view of relation 2. */
2529 		totalsel2 = matchprodfreq;
2530 		if (nd1 > sslot1->nvalues)
2531 			totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2532 		if (nd1 > nmatches)
2533 			totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2534 				(nd1 - nmatches);
2535 
2536 		/*
2537 		 * Use the smaller of the two estimates.  This can be justified in
2538 		 * essentially the same terms as given below for the no-stats case: to
2539 		 * a first approximation, we are estimating from the point of view of
2540 		 * the relation with smaller nd.
2541 		 */
2542 		selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2543 	}
2544 	else
2545 	{
2546 		/*
2547 		 * We do not have MCV lists for both sides.  Estimate the join
2548 		 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2549 		 * is plausible if we assume that the join operator is strict and the
2550 		 * non-null values are about equally distributed: a given non-null
2551 		 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2552 		 * of rel2, so total join rows are at most
2553 		 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2554 		 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2555 		 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2556 		 * with MIN() is an upper bound.  Using the MIN() means we estimate
2557 		 * from the point of view of the relation with smaller nd (since the
2558 		 * larger nd is determining the MIN).  It is reasonable to assume that
2559 		 * most tuples in this rel will have join partners, so the bound is
2560 		 * probably reasonably tight and should be taken as-is.
2561 		 *
2562 		 * XXX Can we be smarter if we have an MCV list for just one side? It
2563 		 * seems that if we assume equal distribution for the other side, we
2564 		 * end up with the same answer anyway.
2565 		 */
2566 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2567 		double		nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2568 
2569 		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2570 		if (nd1 > nd2)
2571 			selec /= nd1;
2572 		else
2573 			selec /= nd2;
2574 	}
2575 
2576 	return selec;
2577 }
2578 
2579 /*
2580  * eqjoinsel_semi --- eqjoinsel for semi join
2581  *
2582  * (Also used for anti join, which we are supposed to estimate the same way.)
2583  * Caller has ensured that vardata1 is the LHS variable.
2584  * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
2585  */
2586 static double
2587 eqjoinsel_semi(Oid opfuncoid, Oid collation,
2588 			   VariableStatData *vardata1, VariableStatData *vardata2,
2589 			   double nd1, double nd2,
2590 			   bool isdefault1, bool isdefault2,
2591 			   AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2592 			   Form_pg_statistic stats1, Form_pg_statistic stats2,
2593 			   bool have_mcvs1, bool have_mcvs2,
2594 			   RelOptInfo *inner_rel)
2595 {
2596 	double		selec;
2597 
2598 	/*
2599 	 * We clamp nd2 to be not more than what we estimate the inner relation's
2600 	 * size to be.  This is intuitively somewhat reasonable since obviously
2601 	 * there can't be more than that many distinct values coming from the
2602 	 * inner rel.  The reason for the asymmetry (ie, that we don't clamp nd1
2603 	 * likewise) is that this is the only pathway by which restriction clauses
2604 	 * applied to the inner rel will affect the join result size estimate,
2605 	 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2606 	 * only the outer rel's size.  If we clamped nd1 we'd be double-counting
2607 	 * the selectivity of outer-rel restrictions.
2608 	 *
2609 	 * We can apply this clamping both with respect to the base relation from
2610 	 * which the join variable comes (if there is just one), and to the
2611 	 * immediate inner input relation of the current join.
2612 	 *
2613 	 * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2614 	 * great, maybe, but it didn't come out of nowhere either.  This is most
2615 	 * helpful when the inner relation is empty and consequently has no stats.
2616 	 */
2617 	if (vardata2->rel)
2618 	{
2619 		if (nd2 >= vardata2->rel->rows)
2620 		{
2621 			nd2 = vardata2->rel->rows;
2622 			isdefault2 = false;
2623 		}
2624 	}
2625 	if (nd2 >= inner_rel->rows)
2626 	{
2627 		nd2 = inner_rel->rows;
2628 		isdefault2 = false;
2629 	}
2630 
2631 	if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2632 	{
2633 		/*
2634 		 * We have most-common-value lists for both relations.  Run through
2635 		 * the lists to see which MCVs actually join to each other with the
2636 		 * given operator.  This allows us to determine the exact join
2637 		 * selectivity for the portion of the relations represented by the MCV
2638 		 * lists.  We still have to estimate for the remaining population, but
2639 		 * in a skewed distribution this gives us a big leg up in accuracy.
2640 		 */
2641 		LOCAL_FCINFO(fcinfo, 2);
2642 		FmgrInfo	eqproc;
2643 		bool	   *hasmatch1;
2644 		bool	   *hasmatch2;
2645 		double		nullfrac1 = stats1->stanullfrac;
2646 		double		matchfreq1,
2647 					uncertainfrac,
2648 					uncertain;
2649 		int			i,
2650 					nmatches,
2651 					clamped_nvalues2;
2652 
2653 		/*
2654 		 * The clamping above could have resulted in nd2 being less than
2655 		 * sslot2->nvalues; in which case, we assume that precisely the nd2
2656 		 * most common values in the relation will appear in the join input,
2657 		 * and so compare to only the first nd2 members of the MCV list.  Of
2658 		 * course this is frequently wrong, but it's the best bet we can make.
2659 		 */
2660 		clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2661 
2662 		fmgr_info(opfuncoid, &eqproc);
2663 
2664 		/*
2665 		 * Save a few cycles by setting up the fcinfo struct just once. Using
2666 		 * FunctionCallInvoke directly also avoids failure if the eqproc
2667 		 * returns NULL, though really equality functions should never do
2668 		 * that.
2669 		 */
2670 		InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
2671 								 NULL, NULL);
2672 		fcinfo->args[0].isnull = false;
2673 		fcinfo->args[1].isnull = false;
2674 
2675 		hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2676 		hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2677 
2678 		/*
2679 		 * Note we assume that each MCV will match at most one member of the
2680 		 * other MCV list.  If the operator isn't really equality, there could
2681 		 * be multiple matches --- but we don't look for them, both for speed
2682 		 * and because the math wouldn't add up...
2683 		 */
2684 		nmatches = 0;
2685 		for (i = 0; i < sslot1->nvalues; i++)
2686 		{
2687 			int			j;
2688 
2689 			fcinfo->args[0].value = sslot1->values[i];
2690 
2691 			for (j = 0; j < clamped_nvalues2; j++)
2692 			{
2693 				Datum		fresult;
2694 
2695 				if (hasmatch2[j])
2696 					continue;
2697 				fcinfo->args[1].value = sslot2->values[j];
2698 				fcinfo->isnull = false;
2699 				fresult = FunctionCallInvoke(fcinfo);
2700 				if (!fcinfo->isnull && DatumGetBool(fresult))
2701 				{
2702 					hasmatch1[i] = hasmatch2[j] = true;
2703 					nmatches++;
2704 					break;
2705 				}
2706 			}
2707 		}
2708 		/* Sum up frequencies of matched MCVs */
2709 		matchfreq1 = 0.0;
2710 		for (i = 0; i < sslot1->nvalues; i++)
2711 		{
2712 			if (hasmatch1[i])
2713 				matchfreq1 += sslot1->numbers[i];
2714 		}
2715 		CLAMP_PROBABILITY(matchfreq1);
2716 		pfree(hasmatch1);
2717 		pfree(hasmatch2);
2718 
2719 		/*
2720 		 * Now we need to estimate the fraction of relation 1 that has at
2721 		 * least one join partner.  We know for certain that the matched MCVs
2722 		 * do, so that gives us a lower bound, but we're really in the dark
2723 		 * about everything else.  Our crude approach is: if nd1 <= nd2 then
2724 		 * assume all non-null rel1 rows have join partners, else assume for
2725 		 * the uncertain rows that a fraction nd2/nd1 have join partners. We
2726 		 * can discount the known-matched MCVs from the distinct-values counts
2727 		 * before doing the division.
2728 		 *
2729 		 * Crude as the above is, it's completely useless if we don't have
2730 		 * reliable ndistinct values for both sides.  Hence, if either nd1 or
2731 		 * nd2 is default, punt and assume half of the uncertain rows have
2732 		 * join partners.
2733 		 */
2734 		if (!isdefault1 && !isdefault2)
2735 		{
2736 			nd1 -= nmatches;
2737 			nd2 -= nmatches;
2738 			if (nd1 <= nd2 || nd2 < 0)
2739 				uncertainfrac = 1.0;
2740 			else
2741 				uncertainfrac = nd2 / nd1;
2742 		}
2743 		else
2744 			uncertainfrac = 0.5;
2745 		uncertain = 1.0 - matchfreq1 - nullfrac1;
2746 		CLAMP_PROBABILITY(uncertain);
2747 		selec = matchfreq1 + uncertainfrac * uncertain;
2748 	}
2749 	else
2750 	{
2751 		/*
2752 		 * Without MCV lists for both sides, we can only use the heuristic
2753 		 * about nd1 vs nd2.
2754 		 */
2755 		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2756 
2757 		if (!isdefault1 && !isdefault2)
2758 		{
2759 			if (nd1 <= nd2 || nd2 < 0)
2760 				selec = 1.0 - nullfrac1;
2761 			else
2762 				selec = (nd2 / nd1) * (1.0 - nullfrac1);
2763 		}
2764 		else
2765 			selec = 0.5 * (1.0 - nullfrac1);
2766 	}
2767 
2768 	return selec;
2769 }
2770 
2771 /*
2772  *		neqjoinsel		- Join selectivity of "!="
2773  */
2774 Datum
2775 neqjoinsel(PG_FUNCTION_ARGS)
2776 {
2777 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2778 	Oid			operator = PG_GETARG_OID(1);
2779 	List	   *args = (List *) PG_GETARG_POINTER(2);
2780 	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
2781 	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2782 	Oid			collation = PG_GET_COLLATION();
2783 	float8		result;
2784 
2785 	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
2786 	{
2787 		/*
2788 		 * For semi-joins, if there is more than one distinct value in the RHS
2789 		 * relation then every non-null LHS row must find a row to join since
2790 		 * it can only be equal to one of them.  We'll assume that there is
2791 		 * always more than one distinct RHS value for the sake of stability,
2792 		 * though in theory we could have special cases for empty RHS
2793 		 * (selectivity = 0) and single-distinct-value RHS (selectivity =
2794 		 * fraction of LHS that has the same value as the single RHS value).
2795 		 *
2796 		 * For anti-joins, if we use the same assumption that there is more
2797 		 * than one distinct key in the RHS relation, then every non-null LHS
2798 		 * row must be suppressed by the anti-join.
2799 		 *
2800 		 * So either way, the selectivity estimate should be 1 - nullfrac.
2801 		 */
2802 		VariableStatData leftvar;
2803 		VariableStatData rightvar;
2804 		bool		reversed;
2805 		HeapTuple	statsTuple;
2806 		double		nullfrac;
2807 
2808 		get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2809 		statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2810 		if (HeapTupleIsValid(statsTuple))
2811 			nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2812 		else
2813 			nullfrac = 0.0;
2814 		ReleaseVariableStats(leftvar);
2815 		ReleaseVariableStats(rightvar);
2816 
2817 		result = 1.0 - nullfrac;
2818 	}
2819 	else
2820 	{
2821 		/*
2822 		 * We want 1 - eqjoinsel() where the equality operator is the one
2823 		 * associated with this != operator, that is, its negator.
2824 		 */
2825 		Oid			eqop = get_negator(operator);
2826 
2827 		if (eqop)
2828 		{
2829 			result =
2830 				DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel,
2831 													   collation,
2832 													   PointerGetDatum(root),
2833 													   ObjectIdGetDatum(eqop),
2834 													   PointerGetDatum(args),
2835 													   Int16GetDatum(jointype),
2836 													   PointerGetDatum(sjinfo)));
2837 		}
2838 		else
2839 		{
2840 			/* Use default selectivity (should we raise an error instead?) */
2841 			result = DEFAULT_EQ_SEL;
2842 		}
2843 		result = 1.0 - result;
2844 	}
2845 
2846 	PG_RETURN_FLOAT8(result);
2847 }
2848 
2849 /*
2850  *		scalarltjoinsel - Join selectivity of "<" for scalars
2851  */
2852 Datum
2853 scalarltjoinsel(PG_FUNCTION_ARGS)
2854 {
2855 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2856 }
2857 
2858 /*
2859  *		scalarlejoinsel - Join selectivity of "<=" for scalars
2860  */
2861 Datum
2862 scalarlejoinsel(PG_FUNCTION_ARGS)
2863 {
2864 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2865 }
2866 
2867 /*
2868  *		scalargtjoinsel - Join selectivity of ">" for scalars
2869  */
2870 Datum
2871 scalargtjoinsel(PG_FUNCTION_ARGS)
2872 {
2873 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2874 }
2875 
2876 /*
2877  *		scalargejoinsel - Join selectivity of ">=" for scalars
2878  */
2879 Datum
2880 scalargejoinsel(PG_FUNCTION_ARGS)
2881 {
2882 	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2883 }
2884 
2885 
2886 /*
2887  * mergejoinscansel			- Scan selectivity of merge join.
2888  *
2889  * A merge join will stop as soon as it exhausts either input stream.
2890  * Therefore, if we can estimate the ranges of both input variables,
2891  * we can estimate how much of the input will actually be read.  This
2892  * can have a considerable impact on the cost when using indexscans.
2893  *
2894  * Also, we can estimate how much of each input has to be read before the
2895  * first join pair is found, which will affect the join's startup time.
2896  *
2897  * clause should be a clause already known to be mergejoinable.  opfamily,
2898  * strategy, and nulls_first specify the sort ordering being used.
2899  *
2900  * The outputs are:
2901  *		*leftstart is set to the fraction of the left-hand variable expected
2902  *		 to be scanned before the first join pair is found (0 to 1).
2903  *		*leftend is set to the fraction of the left-hand variable expected
2904  *		 to be scanned before the join terminates (0 to 1).
2905  *		*rightstart, *rightend similarly for the right-hand variable.
2906  */
2907 void
2908 mergejoinscansel(PlannerInfo *root, Node *clause,
2909 				 Oid opfamily, int strategy, bool nulls_first,
2910 				 Selectivity *leftstart, Selectivity *leftend,
2911 				 Selectivity *rightstart, Selectivity *rightend)
2912 {
2913 	Node	   *left,
2914 			   *right;
2915 	VariableStatData leftvar,
2916 				rightvar;
2917 	int			op_strategy;
2918 	Oid			op_lefttype;
2919 	Oid			op_righttype;
2920 	Oid			opno,
2921 				collation,
2922 				lsortop,
2923 				rsortop,
2924 				lstatop,
2925 				rstatop,
2926 				ltop,
2927 				leop,
2928 				revltop,
2929 				revleop;
2930 	bool		isgt;
2931 	Datum		leftmin,
2932 				leftmax,
2933 				rightmin,
2934 				rightmax;
2935 	double		selec;
2936 
2937 	/* Set default results if we can't figure anything out. */
2938 	/* XXX should default "start" fraction be a bit more than 0? */
2939 	*leftstart = *rightstart = 0.0;
2940 	*leftend = *rightend = 1.0;
2941 
2942 	/* Deconstruct the merge clause */
2943 	if (!is_opclause(clause))
2944 		return;					/* shouldn't happen */
2945 	opno = ((OpExpr *) clause)->opno;
2946 	collation = ((OpExpr *) clause)->inputcollid;
2947 	left = get_leftop((Expr *) clause);
2948 	right = get_rightop((Expr *) clause);
2949 	if (!right)
2950 		return;					/* shouldn't happen */
2951 
2952 	/* Look for stats for the inputs */
2953 	examine_variable(root, left, 0, &leftvar);
2954 	examine_variable(root, right, 0, &rightvar);
2955 
2956 	/* Extract the operator's declared left/right datatypes */
2957 	get_op_opfamily_properties(opno, opfamily, false,
2958 							   &op_strategy,
2959 							   &op_lefttype,
2960 							   &op_righttype);
2961 	Assert(op_strategy == BTEqualStrategyNumber);
2962 
2963 	/*
2964 	 * Look up the various operators we need.  If we don't find them all, it
2965 	 * probably means the opfamily is broken, but we just fail silently.
2966 	 *
2967 	 * Note: we expect that pg_statistic histograms will be sorted by the '<'
2968 	 * operator, regardless of which sort direction we are considering.
2969 	 */
2970 	switch (strategy)
2971 	{
2972 		case BTLessStrategyNumber:
2973 			isgt = false;
2974 			if (op_lefttype == op_righttype)
2975 			{
2976 				/* easy case */
2977 				ltop = get_opfamily_member(opfamily,
2978 										   op_lefttype, op_righttype,
2979 										   BTLessStrategyNumber);
2980 				leop = get_opfamily_member(opfamily,
2981 										   op_lefttype, op_righttype,
2982 										   BTLessEqualStrategyNumber);
2983 				lsortop = ltop;
2984 				rsortop = ltop;
2985 				lstatop = lsortop;
2986 				rstatop = rsortop;
2987 				revltop = ltop;
2988 				revleop = leop;
2989 			}
2990 			else
2991 			{
2992 				ltop = get_opfamily_member(opfamily,
2993 										   op_lefttype, op_righttype,
2994 										   BTLessStrategyNumber);
2995 				leop = get_opfamily_member(opfamily,
2996 										   op_lefttype, op_righttype,
2997 										   BTLessEqualStrategyNumber);
2998 				lsortop = get_opfamily_member(opfamily,
2999 											  op_lefttype, op_lefttype,
3000 											  BTLessStrategyNumber);
3001 				rsortop = get_opfamily_member(opfamily,
3002 											  op_righttype, op_righttype,
3003 											  BTLessStrategyNumber);
3004 				lstatop = lsortop;
3005 				rstatop = rsortop;
3006 				revltop = get_opfamily_member(opfamily,
3007 											  op_righttype, op_lefttype,
3008 											  BTLessStrategyNumber);
3009 				revleop = get_opfamily_member(opfamily,
3010 											  op_righttype, op_lefttype,
3011 											  BTLessEqualStrategyNumber);
3012 			}
3013 			break;
3014 		case BTGreaterStrategyNumber:
3015 			/* descending-order case */
3016 			isgt = true;
3017 			if (op_lefttype == op_righttype)
3018 			{
3019 				/* easy case */
3020 				ltop = get_opfamily_member(opfamily,
3021 										   op_lefttype, op_righttype,
3022 										   BTGreaterStrategyNumber);
3023 				leop = get_opfamily_member(opfamily,
3024 										   op_lefttype, op_righttype,
3025 										   BTGreaterEqualStrategyNumber);
3026 				lsortop = ltop;
3027 				rsortop = ltop;
3028 				lstatop = get_opfamily_member(opfamily,
3029 											  op_lefttype, op_lefttype,
3030 											  BTLessStrategyNumber);
3031 				rstatop = lstatop;
3032 				revltop = ltop;
3033 				revleop = leop;
3034 			}
3035 			else
3036 			{
3037 				ltop = get_opfamily_member(opfamily,
3038 										   op_lefttype, op_righttype,
3039 										   BTGreaterStrategyNumber);
3040 				leop = get_opfamily_member(opfamily,
3041 										   op_lefttype, op_righttype,
3042 										   BTGreaterEqualStrategyNumber);
3043 				lsortop = get_opfamily_member(opfamily,
3044 											  op_lefttype, op_lefttype,
3045 											  BTGreaterStrategyNumber);
3046 				rsortop = get_opfamily_member(opfamily,
3047 											  op_righttype, op_righttype,
3048 											  BTGreaterStrategyNumber);
3049 				lstatop = get_opfamily_member(opfamily,
3050 											  op_lefttype, op_lefttype,
3051 											  BTLessStrategyNumber);
3052 				rstatop = get_opfamily_member(opfamily,
3053 											  op_righttype, op_righttype,
3054 											  BTLessStrategyNumber);
3055 				revltop = get_opfamily_member(opfamily,
3056 											  op_righttype, op_lefttype,
3057 											  BTGreaterStrategyNumber);
3058 				revleop = get_opfamily_member(opfamily,
3059 											  op_righttype, op_lefttype,
3060 											  BTGreaterEqualStrategyNumber);
3061 			}
3062 			break;
3063 		default:
3064 			goto fail;			/* shouldn't get here */
3065 	}
3066 
3067 	if (!OidIsValid(lsortop) ||
3068 		!OidIsValid(rsortop) ||
3069 		!OidIsValid(lstatop) ||
3070 		!OidIsValid(rstatop) ||
3071 		!OidIsValid(ltop) ||
3072 		!OidIsValid(leop) ||
3073 		!OidIsValid(revltop) ||
3074 		!OidIsValid(revleop))
3075 		goto fail;				/* insufficient info in catalogs */
3076 
3077 	/* Try to get ranges of both inputs */
3078 	if (!isgt)
3079 	{
3080 		if (!get_variable_range(root, &leftvar, lstatop, collation,
3081 								&leftmin, &leftmax))
3082 			goto fail;			/* no range available from stats */
3083 		if (!get_variable_range(root, &rightvar, rstatop, collation,
3084 								&rightmin, &rightmax))
3085 			goto fail;			/* no range available from stats */
3086 	}
3087 	else
3088 	{
3089 		/* need to swap the max and min */
3090 		if (!get_variable_range(root, &leftvar, lstatop, collation,
3091 								&leftmax, &leftmin))
3092 			goto fail;			/* no range available from stats */
3093 		if (!get_variable_range(root, &rightvar, rstatop, collation,
3094 								&rightmax, &rightmin))
3095 			goto fail;			/* no range available from stats */
3096 	}
3097 
3098 	/*
3099 	 * Now, the fraction of the left variable that will be scanned is the
3100 	 * fraction that's <= the right-side maximum value.  But only believe
3101 	 * non-default estimates, else stick with our 1.0.
3102 	 */
3103 	selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
3104 						  rightmax, op_righttype);
3105 	if (selec != DEFAULT_INEQ_SEL)
3106 		*leftend = selec;
3107 
3108 	/* And similarly for the right variable. */
3109 	selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
3110 						  leftmax, op_lefttype);
3111 	if (selec != DEFAULT_INEQ_SEL)
3112 		*rightend = selec;
3113 
3114 	/*
3115 	 * Only one of the two "end" fractions can really be less than 1.0;
3116 	 * believe the smaller estimate and reset the other one to exactly 1.0. If
3117 	 * we get exactly equal estimates (as can easily happen with self-joins),
3118 	 * believe neither.
3119 	 */
3120 	if (*leftend > *rightend)
3121 		*leftend = 1.0;
3122 	else if (*leftend < *rightend)
3123 		*rightend = 1.0;
3124 	else
3125 		*leftend = *rightend = 1.0;
3126 
3127 	/*
3128 	 * Also, the fraction of the left variable that will be scanned before the
3129 	 * first join pair is found is the fraction that's < the right-side
3130 	 * minimum value.  But only believe non-default estimates, else stick with
3131 	 * our own default.
3132 	 */
3133 	selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
3134 						  rightmin, op_righttype);
3135 	if (selec != DEFAULT_INEQ_SEL)
3136 		*leftstart = selec;
3137 
3138 	/* And similarly for the right variable. */
3139 	selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
3140 						  leftmin, op_lefttype);
3141 	if (selec != DEFAULT_INEQ_SEL)
3142 		*rightstart = selec;
3143 
3144 	/*
3145 	 * Only one of the two "start" fractions can really be more than zero;
3146 	 * believe the larger estimate and reset the other one to exactly 0.0. If
3147 	 * we get exactly equal estimates (as can easily happen with self-joins),
3148 	 * believe neither.
3149 	 */
3150 	if (*leftstart < *rightstart)
3151 		*leftstart = 0.0;
3152 	else if (*leftstart > *rightstart)
3153 		*rightstart = 0.0;
3154 	else
3155 		*leftstart = *rightstart = 0.0;
3156 
3157 	/*
3158 	 * If the sort order is nulls-first, we're going to have to skip over any
3159 	 * nulls too.  These would not have been counted by scalarineqsel, and we
3160 	 * can safely add in this fraction regardless of whether we believe
3161 	 * scalarineqsel's results or not.  But be sure to clamp the sum to 1.0!
3162 	 */
3163 	if (nulls_first)
3164 	{
3165 		Form_pg_statistic stats;
3166 
3167 		if (HeapTupleIsValid(leftvar.statsTuple))
3168 		{
3169 			stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
3170 			*leftstart += stats->stanullfrac;
3171 			CLAMP_PROBABILITY(*leftstart);
3172 			*leftend += stats->stanullfrac;
3173 			CLAMP_PROBABILITY(*leftend);
3174 		}
3175 		if (HeapTupleIsValid(rightvar.statsTuple))
3176 		{
3177 			stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
3178 			*rightstart += stats->stanullfrac;
3179 			CLAMP_PROBABILITY(*rightstart);
3180 			*rightend += stats->stanullfrac;
3181 			CLAMP_PROBABILITY(*rightend);
3182 		}
3183 	}
3184 
3185 	/* Disbelieve start >= end, just in case that can happen */
3186 	if (*leftstart >= *leftend)
3187 	{
3188 		*leftstart = 0.0;
3189 		*leftend = 1.0;
3190 	}
3191 	if (*rightstart >= *rightend)
3192 	{
3193 		*rightstart = 0.0;
3194 		*rightend = 1.0;
3195 	}
3196 
3197 fail:
3198 	ReleaseVariableStats(leftvar);
3199 	ReleaseVariableStats(rightvar);
3200 }
3201 
3202 
3203 /*
3204  *	matchingsel -- generic matching-operator selectivity support
3205  *
3206  * Use these for any operators that (a) are on data types for which we collect
3207  * standard statistics, and (b) have behavior for which the default estimate
3208  * (twice DEFAULT_EQ_SEL) is sane.  Typically that is good for match-like
3209  * operators.
3210  */
3211 
3212 Datum
3213 matchingsel(PG_FUNCTION_ARGS)
3214 {
3215 	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
3216 	Oid			operator = PG_GETARG_OID(1);
3217 	List	   *args = (List *) PG_GETARG_POINTER(2);
3218 	int			varRelid = PG_GETARG_INT32(3);
3219 	Oid			collation = PG_GET_COLLATION();
3220 	double		selec;
3221 
3222 	/* Use generic restriction selectivity logic. */
3223 	selec = generic_restriction_selectivity(root, operator, collation,
3224 											args, varRelid,
3225 											DEFAULT_MATCHING_SEL);
3226 
3227 	PG_RETURN_FLOAT8((float8) selec);
3228 }
3229 
3230 Datum
3231 matchingjoinsel(PG_FUNCTION_ARGS)
3232 {
3233 	/* Just punt, for the moment. */
3234 	PG_RETURN_FLOAT8(DEFAULT_MATCHING_SEL);
3235 }
3236 
3237 
3238 /*
3239  * Helper routine for estimate_num_groups: add an item to a list of
3240  * GroupVarInfos, but only if it's not known equal to any of the existing
3241  * entries.
3242  */
3243 typedef struct
3244 {
3245 	Node	   *var;			/* might be an expression, not just a Var */
3246 	RelOptInfo *rel;			/* relation it belongs to */
3247 	double		ndistinct;		/* # distinct values */
3248 } GroupVarInfo;
3249 
3250 static List *
3251 add_unique_group_var(PlannerInfo *root, List *varinfos,
3252 					 Node *var, VariableStatData *vardata)
3253 {
3254 	GroupVarInfo *varinfo;
3255 	double		ndistinct;
3256 	bool		isdefault;
3257 	ListCell   *lc;
3258 
3259 	ndistinct = get_variable_numdistinct(vardata, &isdefault);
3260 
3261 	foreach(lc, varinfos)
3262 	{
3263 		varinfo = (GroupVarInfo *) lfirst(lc);
3264 
3265 		/* Drop exact duplicates */
3266 		if (equal(var, varinfo->var))
3267 			return varinfos;
3268 
3269 		/*
3270 		 * Drop known-equal vars, but only if they belong to different
3271 		 * relations (see comments for estimate_num_groups)
3272 		 */
3273 		if (vardata->rel != varinfo->rel &&
3274 			exprs_known_equal(root, var, varinfo->var))
3275 		{
3276 			if (varinfo->ndistinct <= ndistinct)
3277 			{
3278 				/* Keep older item, forget new one */
3279 				return varinfos;
3280 			}
3281 			else
3282 			{
3283 				/* Delete the older item */
3284 				varinfos = foreach_delete_current(varinfos, lc);
3285 			}
3286 		}
3287 	}
3288 
3289 	varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
3290 
3291 	varinfo->var = var;
3292 	varinfo->rel = vardata->rel;
3293 	varinfo->ndistinct = ndistinct;
3294 	varinfos = lappend(varinfos, varinfo);
3295 	return varinfos;
3296 }
3297 
3298 /*
3299  * estimate_num_groups		- Estimate number of groups in a grouped query
3300  *
3301  * Given a query having a GROUP BY clause, estimate how many groups there
3302  * will be --- ie, the number of distinct combinations of the GROUP BY
3303  * expressions.
3304  *
3305  * This routine is also used to estimate the number of rows emitted by
3306  * a DISTINCT filtering step; that is an isomorphic problem.  (Note:
3307  * actually, we only use it for DISTINCT when there's no grouping or
3308  * aggregation ahead of the DISTINCT.)
3309  *
3310  * Inputs:
3311  *	root - the query
3312  *	groupExprs - list of expressions being grouped by
3313  *	input_rows - number of rows estimated to arrive at the group/unique
3314  *		filter step
3315  *	pgset - NULL, or a List** pointing to a grouping set to filter the
3316  *		groupExprs against
3317  *
3318  * Given the lack of any cross-correlation statistics in the system, it's
3319  * impossible to do anything really trustworthy with GROUP BY conditions
3320  * involving multiple Vars.  We should however avoid assuming the worst
3321  * case (all possible cross-product terms actually appear as groups) since
3322  * very often the grouped-by Vars are highly correlated.  Our current approach
3323  * is as follows:
3324  *	1.  Expressions yielding boolean are assumed to contribute two groups,
3325  *		independently of their content, and are ignored in the subsequent
3326  *		steps.  This is mainly because tests like "col IS NULL" break the
3327  *		heuristic used in step 2 especially badly.
3328  *	2.  Reduce the given expressions to a list of unique Vars used.  For
3329  *		example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3330  *		It is clearly correct not to count the same Var more than once.
3331  *		It is also reasonable to treat f(x) the same as x: f() cannot
3332  *		increase the number of distinct values (unless it is volatile,
3333  *		which we consider unlikely for grouping), but it probably won't
3334  *		reduce the number of distinct values much either.
3335  *		As a special case, if a GROUP BY expression can be matched to an
3336  *		expressional index for which we have statistics, then we treat the
3337  *		whole expression as though it were just a Var.
3338  *	3.  If the list contains Vars of different relations that are known equal
3339  *		due to equivalence classes, then drop all but one of the Vars from each
3340  *		known-equal set, keeping the one with smallest estimated # of values
3341  *		(since the extra values of the others can't appear in joined rows).
3342  *		Note the reason we only consider Vars of different relations is that
3343  *		if we considered ones of the same rel, we'd be double-counting the
3344  *		restriction selectivity of the equality in the next step.
3345  *	4.  For Vars within a single source rel, we multiply together the numbers
3346  *		of values, clamp to the number of rows in the rel (divided by 10 if
3347  *		more than one Var), and then multiply by a factor based on the
3348  *		selectivity of the restriction clauses for that rel.  When there's
3349  *		more than one Var, the initial product is probably too high (it's the
3350  *		worst case) but clamping to a fraction of the rel's rows seems to be a
3351  *		helpful heuristic for not letting the estimate get out of hand.  (The
3352  *		factor of 10 is derived from pre-Postgres-7.4 practice.)  The factor
3353  *		we multiply by to adjust for the restriction selectivity assumes that
3354  *		the restriction clauses are independent of the grouping, which may not
3355  *		be a valid assumption, but it's hard to do better.
3356  *	5.  If there are Vars from multiple rels, we repeat step 4 for each such
3357  *		rel, and multiply the results together.
3358  * Note that rels not containing grouped Vars are ignored completely, as are
3359  * join clauses.  Such rels cannot increase the number of groups, and we
3360  * assume such clauses do not reduce the number either (somewhat bogus,
3361  * but we don't have the info to do better).
3362  */
3363 double
3364 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3365 					List **pgset)
3366 {
3367 	List	   *varinfos = NIL;
3368 	double		srf_multiplier = 1.0;
3369 	double		numdistinct;
3370 	ListCell   *l;
3371 	int			i;
3372 
3373 	/*
3374 	 * We don't ever want to return an estimate of zero groups, as that tends
3375 	 * to lead to division-by-zero and other unpleasantness.  The input_rows
3376 	 * estimate is usually already at least 1, but clamp it just in case it
3377 	 * isn't.
3378 	 */
3379 	input_rows = clamp_row_est(input_rows);
3380 
3381 	/*
3382 	 * If no grouping columns, there's exactly one group.  (This can't happen
3383 	 * for normal cases with GROUP BY or DISTINCT, but it is possible for
3384 	 * corner cases with set operations.)
3385 	 */
3386 	if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3387 		return 1.0;
3388 
3389 	/*
3390 	 * Count groups derived from boolean grouping expressions.  For other
3391 	 * expressions, find the unique Vars used, treating an expression as a Var
3392 	 * if we can find stats for it.  For each one, record the statistical
3393 	 * estimate of number of distinct values (total in its table, without
3394 	 * regard for filtering).
3395 	 */
3396 	numdistinct = 1.0;
3397 
3398 	i = 0;
3399 	foreach(l, groupExprs)
3400 	{
3401 		Node	   *groupexpr = (Node *) lfirst(l);
3402 		double		this_srf_multiplier;
3403 		VariableStatData vardata;
3404 		List	   *varshere;
3405 		ListCell   *l2;
3406 
3407 		/* is expression in this grouping set? */
3408 		if (pgset && !list_member_int(*pgset, i++))
3409 			continue;
3410 
3411 		/*
3412 		 * Set-returning functions in grouping columns are a bit problematic.
3413 		 * The code below will effectively ignore their SRF nature and come up
3414 		 * with a numdistinct estimate as though they were scalar functions.
3415 		 * We compensate by scaling up the end result by the largest SRF
3416 		 * rowcount estimate.  (This will be an overestimate if the SRF
3417 		 * produces multiple copies of any output value, but it seems best to
3418 		 * assume the SRF's outputs are distinct.  In any case, it's probably
3419 		 * pointless to worry too much about this without much better
3420 		 * estimates for SRF output rowcounts than we have today.)
3421 		 */
3422 		this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3423 		if (srf_multiplier < this_srf_multiplier)
3424 			srf_multiplier = this_srf_multiplier;
3425 
3426 		/* Short-circuit for expressions returning boolean */
3427 		if (exprType(groupexpr) == BOOLOID)
3428 		{
3429 			numdistinct *= 2.0;
3430 			continue;
3431 		}
3432 
3433 		/*
3434 		 * If examine_variable is able to deduce anything about the GROUP BY
3435 		 * expression, treat it as a single variable even if it's really more
3436 		 * complicated.
3437 		 */
3438 		examine_variable(root, groupexpr, 0, &vardata);
3439 		if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3440 		{
3441 			varinfos = add_unique_group_var(root, varinfos,
3442 											groupexpr, &vardata);
3443 			ReleaseVariableStats(vardata);
3444 			continue;
3445 		}
3446 		ReleaseVariableStats(vardata);
3447 
3448 		/*
3449 		 * Else pull out the component Vars.  Handle PlaceHolderVars by
3450 		 * recursing into their arguments (effectively assuming that the
3451 		 * PlaceHolderVar doesn't change the number of groups, which boils
3452 		 * down to ignoring the possible addition of nulls to the result set).
3453 		 */
3454 		varshere = pull_var_clause(groupexpr,
3455 								   PVC_RECURSE_AGGREGATES |
3456 								   PVC_RECURSE_WINDOWFUNCS |
3457 								   PVC_RECURSE_PLACEHOLDERS);
3458 
3459 		/*
3460 		 * If we find any variable-free GROUP BY item, then either it is a
3461 		 * constant (and we can ignore it) or it contains a volatile function;
3462 		 * in the latter case we punt and assume that each input row will
3463 		 * yield a distinct group.
3464 		 */
3465 		if (varshere == NIL)
3466 		{
3467 			if (contain_volatile_functions(groupexpr))
3468 				return input_rows;
3469 			continue;
3470 		}
3471 
3472 		/*
3473 		 * Else add variables to varinfos list
3474 		 */
3475 		foreach(l2, varshere)
3476 		{
3477 			Node	   *var = (Node *) lfirst(l2);
3478 
3479 			examine_variable(root, var, 0, &vardata);
3480 			varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3481 			ReleaseVariableStats(vardata);
3482 		}
3483 	}
3484 
3485 	/*
3486 	 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3487 	 * list.
3488 	 */
3489 	if (varinfos == NIL)
3490 	{
3491 		/* Apply SRF multiplier as we would do in the long path */
3492 		numdistinct *= srf_multiplier;
3493 		/* Round off */
3494 		numdistinct = ceil(numdistinct);
3495 		/* Guard against out-of-range answers */
3496 		if (numdistinct > input_rows)
3497 			numdistinct = input_rows;
3498 		if (numdistinct < 1.0)
3499 			numdistinct = 1.0;
3500 		return numdistinct;
3501 	}
3502 
3503 	/*
3504 	 * Group Vars by relation and estimate total numdistinct.
3505 	 *
3506 	 * For each iteration of the outer loop, we process the frontmost Var in
3507 	 * varinfos, plus all other Vars in the same relation.  We remove these
3508 	 * Vars from the newvarinfos list for the next iteration. This is the
3509 	 * easiest way to group Vars of same rel together.
3510 	 */
3511 	do
3512 	{
3513 		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3514 		RelOptInfo *rel = varinfo1->rel;
3515 		double		reldistinct = 1;
3516 		double		relmaxndistinct = reldistinct;
3517 		int			relvarcount = 0;
3518 		List	   *newvarinfos = NIL;
3519 		List	   *relvarinfos = NIL;
3520 
3521 		/*
3522 		 * Split the list of varinfos in two - one for the current rel, one
3523 		 * for remaining Vars on other rels.
3524 		 */
3525 		relvarinfos = lappend(relvarinfos, varinfo1);
3526 		for_each_from(l, varinfos, 1)
3527 		{
3528 			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3529 
3530 			if (varinfo2->rel == varinfo1->rel)
3531 			{
3532 				/* varinfos on current rel */
3533 				relvarinfos = lappend(relvarinfos, varinfo2);
3534 			}
3535 			else
3536 			{
3537 				/* not time to process varinfo2 yet */
3538 				newvarinfos = lappend(newvarinfos, varinfo2);
3539 			}
3540 		}
3541 
3542 		/*
3543 		 * Get the numdistinct estimate for the Vars of this rel.  We
3544 		 * iteratively search for multivariate n-distinct with maximum number
3545 		 * of vars; assuming that each var group is independent of the others,
3546 		 * we multiply them together.  Any remaining relvarinfos after no more
3547 		 * multivariate matches are found are assumed independent too, so
3548 		 * their individual ndistinct estimates are multiplied also.
3549 		 *
3550 		 * While iterating, count how many separate numdistinct values we
3551 		 * apply.  We apply a fudge factor below, but only if we multiplied
3552 		 * more than one such values.
3553 		 */
3554 		while (relvarinfos)
3555 		{
3556 			double		mvndistinct;
3557 
3558 			if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3559 												&mvndistinct))
3560 			{
3561 				reldistinct *= mvndistinct;
3562 				if (relmaxndistinct < mvndistinct)
3563 					relmaxndistinct = mvndistinct;
3564 				relvarcount++;
3565 			}
3566 			else
3567 			{
3568 				foreach(l, relvarinfos)
3569 				{
3570 					GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3571 
3572 					reldistinct *= varinfo2->ndistinct;
3573 					if (relmaxndistinct < varinfo2->ndistinct)
3574 						relmaxndistinct = varinfo2->ndistinct;
3575 					relvarcount++;
3576 				}
3577 
3578 				/* we're done with this relation */
3579 				relvarinfos = NIL;
3580 			}
3581 		}
3582 
3583 		/*
3584 		 * Sanity check --- don't divide by zero if empty relation.
3585 		 */
3586 		Assert(IS_SIMPLE_REL(rel));
3587 		if (rel->tuples > 0)
3588 		{
3589 			/*
3590 			 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3591 			 * fudge factor is because the Vars are probably correlated but we
3592 			 * don't know by how much.  We should never clamp to less than the
3593 			 * largest ndistinct value for any of the Vars, though, since
3594 			 * there will surely be at least that many groups.
3595 			 */
3596 			double		clamp = rel->tuples;
3597 
3598 			if (relvarcount > 1)
3599 			{
3600 				clamp *= 0.1;
3601 				if (clamp < relmaxndistinct)
3602 				{
3603 					clamp = relmaxndistinct;
3604 					/* for sanity in case some ndistinct is too large: */
3605 					if (clamp > rel->tuples)
3606 						clamp = rel->tuples;
3607 				}
3608 			}
3609 			if (reldistinct > clamp)
3610 				reldistinct = clamp;
3611 
3612 			/*
3613 			 * Update the estimate based on the restriction selectivity,
3614 			 * guarding against division by zero when reldistinct is zero.
3615 			 * Also skip this if we know that we are returning all rows.
3616 			 */
3617 			if (reldistinct > 0 && rel->rows < rel->tuples)
3618 			{
3619 				/*
3620 				 * Given a table containing N rows with n distinct values in a
3621 				 * uniform distribution, if we select p rows at random then
3622 				 * the expected number of distinct values selected is
3623 				 *
3624 				 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3625 				 *
3626 				 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3627 				 *
3628 				 * See "Approximating block accesses in database
3629 				 * organizations", S. B. Yao, Communications of the ACM,
3630 				 * Volume 20 Issue 4, April 1977 Pages 260-261.
3631 				 *
3632 				 * Alternatively, re-arranging the terms from the factorials,
3633 				 * this may be written as
3634 				 *
3635 				 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3636 				 *
3637 				 * This form of the formula is more efficient to compute in
3638 				 * the common case where p is larger than N/n.  Additionally,
3639 				 * as pointed out by Dell'Era, if i << N for all terms in the
3640 				 * product, it can be approximated by
3641 				 *
3642 				 * n * (1 - ((N-p)/N)^(N/n))
3643 				 *
3644 				 * See "Expected distinct values when selecting from a bag
3645 				 * without replacement", Alberto Dell'Era,
3646 				 * http://www.adellera.it/investigations/distinct_balls/.
3647 				 *
3648 				 * The condition i << N is equivalent to n >> 1, so this is a
3649 				 * good approximation when the number of distinct values in
3650 				 * the table is large.  It turns out that this formula also
3651 				 * works well even when n is small.
3652 				 */
3653 				reldistinct *=
3654 					(1 - pow((rel->tuples - rel->rows) / rel->tuples,
3655 							 rel->tuples / reldistinct));
3656 			}
3657 			reldistinct = clamp_row_est(reldistinct);
3658 
3659 			/*
3660 			 * Update estimate of total distinct groups.
3661 			 */
3662 			numdistinct *= reldistinct;
3663 		}
3664 
3665 		varinfos = newvarinfos;
3666 	} while (varinfos != NIL);
3667 
3668 	/* Now we can account for the effects of any SRFs */
3669 	numdistinct *= srf_multiplier;
3670 
3671 	/* Round off */
3672 	numdistinct = ceil(numdistinct);
3673 
3674 	/* Guard against out-of-range answers */
3675 	if (numdistinct > input_rows)
3676 		numdistinct = input_rows;
3677 	if (numdistinct < 1.0)
3678 		numdistinct = 1.0;
3679 
3680 	return numdistinct;
3681 }
3682 
3683 /*
3684  * Estimate hash bucket statistics when the specified expression is used
3685  * as a hash key for the given number of buckets.
3686  *
3687  * This attempts to determine two values:
3688  *
3689  * 1. The frequency of the most common value of the expression (returns
3690  * zero into *mcv_freq if we can't get that).
3691  *
3692  * 2. The "bucketsize fraction", ie, average number of entries in a bucket
3693  * divided by total tuples in relation.
3694  *
3695  * XXX This is really pretty bogus since we're effectively assuming that the
3696  * distribution of hash keys will be the same after applying restriction
3697  * clauses as it was in the underlying relation.  However, we are not nearly
3698  * smart enough to figure out how the restrict clauses might change the
3699  * distribution, so this will have to do for now.
3700  *
3701  * We are passed the number of buckets the executor will use for the given
3702  * input relation.  If the data were perfectly distributed, with the same
3703  * number of tuples going into each available bucket, then the bucketsize
3704  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
3705  * only if (a) there are at least nbuckets distinct data values, and (b)
3706  * we have a not-too-skewed data distribution.  Otherwise the buckets will
3707  * be nonuniformly occupied.  If the other relation in the join has a key
3708  * distribution similar to this one's, then the most-loaded buckets are
3709  * exactly those that will be probed most often.  Therefore, the "average"
3710  * bucket size for costing purposes should really be taken as something close
3711  * to the "worst case" bucket size.  We try to estimate this by adjusting the
3712  * fraction if there are too few distinct data values, and then scaling up
3713  * by the ratio of the most common value's frequency to the average frequency.
3714  *
3715  * If no statistics are available, use a default estimate of 0.1.  This will
3716  * discourage use of a hash rather strongly if the inner relation is large,
3717  * which is what we want.  We do not want to hash unless we know that the
3718  * inner rel is well-dispersed (or the alternatives seem much worse).
3719  *
3720  * The caller should also check that the mcv_freq is not so large that the
3721  * most common value would by itself require an impractically large bucket.
3722  * In a hash join, the executor can split buckets if they get too big, but
3723  * obviously that doesn't help for a bucket that contains many duplicates of
3724  * the same value.
3725  */
3726 void
3727 estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
3728 						   Selectivity *mcv_freq,
3729 						   Selectivity *bucketsize_frac)
3730 {
3731 	VariableStatData vardata;
3732 	double		estfract,
3733 				ndistinct,
3734 				stanullfrac,
3735 				avgfreq;
3736 	bool		isdefault;
3737 	AttStatsSlot sslot;
3738 
3739 	examine_variable(root, hashkey, 0, &vardata);
3740 
3741 	/* Look up the frequency of the most common value, if available */
3742 	*mcv_freq = 0.0;
3743 
3744 	if (HeapTupleIsValid(vardata.statsTuple))
3745 	{
3746 		if (get_attstatsslot(&sslot, vardata.statsTuple,
3747 							 STATISTIC_KIND_MCV, InvalidOid,
3748 							 ATTSTATSSLOT_NUMBERS))
3749 		{
3750 			/*
3751 			 * The first MCV stat is for the most common value.
3752 			 */
3753 			if (sslot.nnumbers > 0)
3754 				*mcv_freq = sslot.numbers[0];
3755 			free_attstatsslot(&sslot);
3756 		}
3757 	}
3758 
3759 	/* Get number of distinct values */
3760 	ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3761 
3762 	/*
3763 	 * If ndistinct isn't real, punt.  We normally return 0.1, but if the
3764 	 * mcv_freq is known to be even higher than that, use it instead.
3765 	 */
3766 	if (isdefault)
3767 	{
3768 		*bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3769 		ReleaseVariableStats(vardata);
3770 		return;
3771 	}
3772 
3773 	/* Get fraction that are null */
3774 	if (HeapTupleIsValid(vardata.statsTuple))
3775 	{
3776 		Form_pg_statistic stats;
3777 
3778 		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3779 		stanullfrac = stats->stanullfrac;
3780 	}
3781 	else
3782 		stanullfrac = 0.0;
3783 
3784 	/* Compute avg freq of all distinct data values in raw relation */
3785 	avgfreq = (1.0 - stanullfrac) / ndistinct;
3786 
3787 	/*
3788 	 * Adjust ndistinct to account for restriction clauses.  Observe we are
3789 	 * assuming that the data distribution is affected uniformly by the
3790 	 * restriction clauses!
3791 	 *
3792 	 * XXX Possibly better way, but much more expensive: multiply by
3793 	 * selectivity of rel's restriction clauses that mention the target Var.
3794 	 */
3795 	if (vardata.rel && vardata.rel->tuples > 0)
3796 	{
3797 		ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3798 		ndistinct = clamp_row_est(ndistinct);
3799 	}
3800 
3801 	/*
3802 	 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3803 	 * number of buckets is less than the expected number of distinct values;
3804 	 * otherwise it is 1/ndistinct.
3805 	 */
3806 	if (ndistinct > nbuckets)
3807 		estfract = 1.0 / nbuckets;
3808 	else
3809 		estfract = 1.0 / ndistinct;
3810 
3811 	/*
3812 	 * Adjust estimated bucketsize upward to account for skewed distribution.
3813 	 */
3814 	if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3815 		estfract *= *mcv_freq / avgfreq;
3816 
3817 	/*
3818 	 * Clamp bucketsize to sane range (the above adjustment could easily
3819 	 * produce an out-of-range result).  We set the lower bound a little above
3820 	 * zero, since zero isn't a very sane result.
3821 	 */
3822 	if (estfract < 1.0e-6)
3823 		estfract = 1.0e-6;
3824 	else if (estfract > 1.0)
3825 		estfract = 1.0;
3826 
3827 	*bucketsize_frac = (Selectivity) estfract;
3828 
3829 	ReleaseVariableStats(vardata);
3830 }
3831 
3832 /*
3833  * estimate_hashagg_tablesize
3834  *	  estimate the number of bytes that a hash aggregate hashtable will
3835  *	  require based on the agg_costs, path width and number of groups.
3836  *
3837  * We return the result as "double" to forestall any possible overflow
3838  * problem in the multiplication by dNumGroups.
3839  *
3840  * XXX this may be over-estimating the size now that hashagg knows to omit
3841  * unneeded columns from the hashtable.  Also for mixed-mode grouping sets,
3842  * grouping columns not in the hashed set are counted here even though hashagg
3843  * won't store them.  Is this a problem?
3844  */
3845 double
3846 estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3847 						   double dNumGroups)
3848 {
3849 	Size		hashentrysize = hash_agg_entry_size(agg_costs->numAggs,
3850 													path->pathtarget->width,
3851 													agg_costs->transitionSpace);
3852 
3853 	/*
3854 	 * Note that this disregards the effect of fill-factor and growth policy
3855 	 * of the hash table.  That's probably ok, given that the default
3856 	 * fill-factor is relatively high.  It'd be hard to meaningfully factor in
3857 	 * "double-in-size" growth policies here.
3858 	 */
3859 	return hashentrysize * dNumGroups;
3860 }
3861 
3862 
3863 /*-------------------------------------------------------------------------
3864  *
3865  * Support routines
3866  *
3867  *-------------------------------------------------------------------------
3868  */
3869 
3870 /*
3871  * Find applicable ndistinct statistics for the given list of VarInfos (which
3872  * must all belong to the given rel), and update *ndistinct to the estimate of
3873  * the MVNDistinctItem that best matches.  If a match it found, *varinfos is
3874  * updated to remove the list of matched varinfos.
3875  *
3876  * Varinfos that aren't for simple Vars are ignored.
3877  *
3878  * Return true if we're able to find a match, false otherwise.
3879  */
3880 static bool
3881 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
3882 								List **varinfos, double *ndistinct)
3883 {
3884 	ListCell   *lc;
3885 	Bitmapset  *attnums = NULL;
3886 	int			nmatches;
3887 	Oid			statOid = InvalidOid;
3888 	MVNDistinct *stats;
3889 	Bitmapset  *matched = NULL;
3890 
3891 	/* bail out immediately if the table has no extended statistics */
3892 	if (!rel->statlist)
3893 		return false;
3894 
3895 	/* Determine the attnums we're looking for */
3896 	foreach(lc, *varinfos)
3897 	{
3898 		GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3899 		AttrNumber	attnum;
3900 
3901 		Assert(varinfo->rel == rel);
3902 
3903 		if (!IsA(varinfo->var, Var))
3904 			continue;
3905 
3906 		attnum = ((Var *) varinfo->var)->varattno;
3907 
3908 		if (!AttrNumberIsForUserDefinedAttr(attnum))
3909 			continue;
3910 
3911 		attnums = bms_add_member(attnums, attnum);
3912 	}
3913 
3914 	/* look for the ndistinct statistics matching the most vars */
3915 	nmatches = 1;				/* we require at least two matches */
3916 	foreach(lc, rel->statlist)
3917 	{
3918 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3919 		Bitmapset  *shared;
3920 		int			nshared;
3921 
3922 		/* skip statistics of other kinds */
3923 		if (info->kind != STATS_EXT_NDISTINCT)
3924 			continue;
3925 
3926 		/* compute attnums shared by the vars and the statistics object */
3927 		shared = bms_intersect(info->keys, attnums);
3928 		nshared = bms_num_members(shared);
3929 
3930 		/*
3931 		 * Does this statistics object match more columns than the currently
3932 		 * best object?  If so, use this one instead.
3933 		 *
3934 		 * XXX This should break ties using name of the object, or something
3935 		 * like that, to make the outcome stable.
3936 		 */
3937 		if (nshared > nmatches)
3938 		{
3939 			statOid = info->statOid;
3940 			nmatches = nshared;
3941 			matched = shared;
3942 		}
3943 	}
3944 
3945 	/* No match? */
3946 	if (statOid == InvalidOid)
3947 		return false;
3948 	Assert(nmatches > 1 && matched != NULL);
3949 
3950 	stats = statext_ndistinct_load(statOid);
3951 
3952 	/*
3953 	 * If we have a match, search it for the specific item that matches (there
3954 	 * must be one), and construct the output values.
3955 	 */
3956 	if (stats)
3957 	{
3958 		int			i;
3959 		List	   *newlist = NIL;
3960 		MVNDistinctItem *item = NULL;
3961 
3962 		/* Find the specific item that exactly matches the combination */
3963 		for (i = 0; i < stats->nitems; i++)
3964 		{
3965 			MVNDistinctItem *tmpitem = &stats->items[i];
3966 
3967 			if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3968 			{
3969 				item = tmpitem;
3970 				break;
3971 			}
3972 		}
3973 
3974 		/* make sure we found an item */
3975 		if (!item)
3976 			elog(ERROR, "corrupt MVNDistinct entry");
3977 
3978 		/* Form the output varinfo list, keeping only unmatched ones */
3979 		foreach(lc, *varinfos)
3980 		{
3981 			GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3982 			AttrNumber	attnum;
3983 
3984 			if (!IsA(varinfo->var, Var))
3985 			{
3986 				newlist = lappend(newlist, varinfo);
3987 				continue;
3988 			}
3989 
3990 			attnum = ((Var *) varinfo->var)->varattno;
3991 
3992 			if (AttrNumberIsForUserDefinedAttr(attnum) &&
3993 				bms_is_member(attnum, matched))
3994 				continue;
3995 
3996 			newlist = lappend(newlist, varinfo);
3997 		}
3998 
3999 		*varinfos = newlist;
4000 		*ndistinct = item->ndistinct;
4001 		return true;
4002 	}
4003 
4004 	return false;
4005 }
4006 
4007 /*
4008  * convert_to_scalar
4009  *	  Convert non-NULL values of the indicated types to the comparison
4010  *	  scale needed by scalarineqsel().
4011  *	  Returns "true" if successful.
4012  *
4013  * XXX this routine is a hack: ideally we should look up the conversion
4014  * subroutines in pg_type.
4015  *
4016  * All numeric datatypes are simply converted to their equivalent
4017  * "double" values.  (NUMERIC values that are outside the range of "double"
4018  * are clamped to +/- HUGE_VAL.)
4019  *
4020  * String datatypes are converted by convert_string_to_scalar(),
4021  * which is explained below.  The reason why this routine deals with
4022  * three values at a time, not just one, is that we need it for strings.
4023  *
4024  * The bytea datatype is just enough different from strings that it has
4025  * to be treated separately.
4026  *
4027  * The several datatypes representing absolute times are all converted
4028  * to Timestamp, which is actually a double, and then we just use that
4029  * double value.  Note this will give correct results even for the "special"
4030  * values of Timestamp, since those are chosen to compare correctly;
4031  * see timestamp_cmp.
4032  *
4033  * The several datatypes representing relative times (intervals) are all
4034  * converted to measurements expressed in seconds.
4035  */
4036 static bool
4037 convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
4038 				  Datum lobound, Datum hibound, Oid boundstypid,
4039 				  double *scaledlobound, double *scaledhibound)
4040 {
4041 	bool		failure = false;
4042 
4043 	/*
4044 	 * Both the valuetypid and the boundstypid should exactly match the
4045 	 * declared input type(s) of the operator we are invoked for.  However,
4046 	 * extensions might try to use scalarineqsel as estimator for operators
4047 	 * with input type(s) we don't handle here; in such cases, we want to
4048 	 * return false, not fail.  In any case, we mustn't assume that valuetypid
4049 	 * and boundstypid are identical.
4050 	 *
4051 	 * XXX The histogram we are interpolating between points of could belong
4052 	 * to a column that's only binary-compatible with the declared type. In
4053 	 * essence we are assuming that the semantics of binary-compatible types
4054 	 * are enough alike that we can use a histogram generated with one type's
4055 	 * operators to estimate selectivity for the other's.  This is outright
4056 	 * wrong in some cases --- in particular signed versus unsigned
4057 	 * interpretation could trip us up.  But it's useful enough in the
4058 	 * majority of cases that we do it anyway.  Should think about more
4059 	 * rigorous ways to do it.
4060 	 */
4061 	switch (valuetypid)
4062 	{
4063 			/*
4064 			 * Built-in numeric types
4065 			 */
4066 		case BOOLOID:
4067 		case INT2OID:
4068 		case INT4OID:
4069 		case INT8OID:
4070 		case FLOAT4OID:
4071 		case FLOAT8OID:
4072 		case NUMERICOID:
4073 		case OIDOID:
4074 		case REGPROCOID:
4075 		case REGPROCEDUREOID:
4076 		case REGOPEROID:
4077 		case REGOPERATOROID:
4078 		case REGCLASSOID:
4079 		case REGTYPEOID:
4080 		case REGCONFIGOID:
4081 		case REGDICTIONARYOID:
4082 		case REGROLEOID:
4083 		case REGNAMESPACEOID:
4084 			*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
4085 													 &failure);
4086 			*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
4087 													   &failure);
4088 			*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
4089 													   &failure);
4090 			return !failure;
4091 
4092 			/*
4093 			 * Built-in string types
4094 			 */
4095 		case CHAROID:
4096 		case BPCHAROID:
4097 		case VARCHAROID:
4098 		case TEXTOID:
4099 		case NAMEOID:
4100 			{
4101 				char	   *valstr = convert_string_datum(value, valuetypid,
4102 														  collid, &failure);
4103 				char	   *lostr = convert_string_datum(lobound, boundstypid,
4104 														 collid, &failure);
4105 				char	   *histr = convert_string_datum(hibound, boundstypid,
4106 														 collid, &failure);
4107 
4108 				/*
4109 				 * Bail out if any of the values is not of string type.  We
4110 				 * might leak converted strings for the other value(s), but
4111 				 * that's not worth troubling over.
4112 				 */
4113 				if (failure)
4114 					return false;
4115 
4116 				convert_string_to_scalar(valstr, scaledvalue,
4117 										 lostr, scaledlobound,
4118 										 histr, scaledhibound);
4119 				pfree(valstr);
4120 				pfree(lostr);
4121 				pfree(histr);
4122 				return true;
4123 			}
4124 
4125 			/*
4126 			 * Built-in bytea type
4127 			 */
4128 		case BYTEAOID:
4129 			{
4130 				/* We only support bytea vs bytea comparison */
4131 				if (boundstypid != BYTEAOID)
4132 					return false;
4133 				convert_bytea_to_scalar(value, scaledvalue,
4134 										lobound, scaledlobound,
4135 										hibound, scaledhibound);
4136 				return true;
4137 			}
4138 
4139 			/*
4140 			 * Built-in time types
4141 			 */
4142 		case TIMESTAMPOID:
4143 		case TIMESTAMPTZOID:
4144 		case DATEOID:
4145 		case INTERVALOID:
4146 		case TIMEOID:
4147 		case TIMETZOID:
4148 			*scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
4149 													   &failure);
4150 			*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
4151 														 &failure);
4152 			*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
4153 														 &failure);
4154 			return !failure;
4155 
4156 			/*
4157 			 * Built-in network types
4158 			 */
4159 		case INETOID:
4160 		case CIDROID:
4161 		case MACADDROID:
4162 		case MACADDR8OID:
4163 			*scaledvalue = convert_network_to_scalar(value, valuetypid,
4164 													 &failure);
4165 			*scaledlobound = convert_network_to_scalar(lobound, boundstypid,
4166 													   &failure);
4167 			*scaledhibound = convert_network_to_scalar(hibound, boundstypid,
4168 													   &failure);
4169 			return !failure;
4170 	}
4171 	/* Don't know how to convert */
4172 	*scaledvalue = *scaledlobound = *scaledhibound = 0;
4173 	return false;
4174 }
4175 
4176 /*
4177  * Do convert_to_scalar()'s work for any numeric data type.
4178  *
4179  * On failure (e.g., unsupported typid), set *failure to true;
4180  * otherwise, that variable is not changed.
4181  */
4182 static double
4183 convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
4184 {
4185 	switch (typid)
4186 	{
4187 		case BOOLOID:
4188 			return (double) DatumGetBool(value);
4189 		case INT2OID:
4190 			return (double) DatumGetInt16(value);
4191 		case INT4OID:
4192 			return (double) DatumGetInt32(value);
4193 		case INT8OID:
4194 			return (double) DatumGetInt64(value);
4195 		case FLOAT4OID:
4196 			return (double) DatumGetFloat4(value);
4197 		case FLOAT8OID:
4198 			return (double) DatumGetFloat8(value);
4199 		case NUMERICOID:
4200 			/* Note: out-of-range values will be clamped to +-HUGE_VAL */
4201 			return (double)
4202 				DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
4203 												   value));
4204 		case OIDOID:
4205 		case REGPROCOID:
4206 		case REGPROCEDUREOID:
4207 		case REGOPEROID:
4208 		case REGOPERATOROID:
4209 		case REGCLASSOID:
4210 		case REGTYPEOID:
4211 		case REGCONFIGOID:
4212 		case REGDICTIONARYOID:
4213 		case REGROLEOID:
4214 		case REGNAMESPACEOID:
4215 			/* we can treat OIDs as integers... */
4216 			return (double) DatumGetObjectId(value);
4217 	}
4218 
4219 	*failure = true;
4220 	return 0;
4221 }
4222 
4223 /*
4224  * Do convert_to_scalar()'s work for any character-string data type.
4225  *
4226  * String datatypes are converted to a scale that ranges from 0 to 1,
4227  * where we visualize the bytes of the string as fractional digits.
4228  *
4229  * We do not want the base to be 256, however, since that tends to
4230  * generate inflated selectivity estimates; few databases will have
4231  * occurrences of all 256 possible byte values at each position.
4232  * Instead, use the smallest and largest byte values seen in the bounds
4233  * as the estimated range for each byte, after some fudging to deal with
4234  * the fact that we probably aren't going to see the full range that way.
4235  *
4236  * An additional refinement is that we discard any common prefix of the
4237  * three strings before computing the scaled values.  This allows us to
4238  * "zoom in" when we encounter a narrow data range.  An example is a phone
4239  * number database where all the values begin with the same area code.
4240  * (Actually, the bounds will be adjacent histogram-bin-boundary values,
4241  * so this is more likely to happen than you might think.)
4242  */
4243 static void
4244 convert_string_to_scalar(char *value,
4245 						 double *scaledvalue,
4246 						 char *lobound,
4247 						 double *scaledlobound,
4248 						 char *hibound,
4249 						 double *scaledhibound)
4250 {
4251 	int			rangelo,
4252 				rangehi;
4253 	char	   *sptr;
4254 
4255 	rangelo = rangehi = (unsigned char) hibound[0];
4256 	for (sptr = lobound; *sptr; sptr++)
4257 	{
4258 		if (rangelo > (unsigned char) *sptr)
4259 			rangelo = (unsigned char) *sptr;
4260 		if (rangehi < (unsigned char) *sptr)
4261 			rangehi = (unsigned char) *sptr;
4262 	}
4263 	for (sptr = hibound; *sptr; sptr++)
4264 	{
4265 		if (rangelo > (unsigned char) *sptr)
4266 			rangelo = (unsigned char) *sptr;
4267 		if (rangehi < (unsigned char) *sptr)
4268 			rangehi = (unsigned char) *sptr;
4269 	}
4270 	/* If range includes any upper-case ASCII chars, make it include all */
4271 	if (rangelo <= 'Z' && rangehi >= 'A')
4272 	{
4273 		if (rangelo > 'A')
4274 			rangelo = 'A';
4275 		if (rangehi < 'Z')
4276 			rangehi = 'Z';
4277 	}
4278 	/* Ditto lower-case */
4279 	if (rangelo <= 'z' && rangehi >= 'a')
4280 	{
4281 		if (rangelo > 'a')
4282 			rangelo = 'a';
4283 		if (rangehi < 'z')
4284 			rangehi = 'z';
4285 	}
4286 	/* Ditto digits */
4287 	if (rangelo <= '9' && rangehi >= '0')
4288 	{
4289 		if (rangelo > '0')
4290 			rangelo = '0';
4291 		if (rangehi < '9')
4292 			rangehi = '9';
4293 	}
4294 
4295 	/*
4296 	 * If range includes less than 10 chars, assume we have not got enough
4297 	 * data, and make it include regular ASCII set.
4298 	 */
4299 	if (rangehi - rangelo < 9)
4300 	{
4301 		rangelo = ' ';
4302 		rangehi = 127;
4303 	}
4304 
4305 	/*
4306 	 * Now strip any common prefix of the three strings.
4307 	 */
4308 	while (*lobound)
4309 	{
4310 		if (*lobound != *hibound || *lobound != *value)
4311 			break;
4312 		lobound++, hibound++, value++;
4313 	}
4314 
4315 	/*
4316 	 * Now we can do the conversions.
4317 	 */
4318 	*scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4319 	*scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4320 	*scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4321 }
4322 
4323 static double
4324 convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4325 {
4326 	int			slen = strlen(value);
4327 	double		num,
4328 				denom,
4329 				base;
4330 
4331 	if (slen <= 0)
4332 		return 0.0;				/* empty string has scalar value 0 */
4333 
4334 	/*
4335 	 * There seems little point in considering more than a dozen bytes from
4336 	 * the string.  Since base is at least 10, that will give us nominal
4337 	 * resolution of at least 12 decimal digits, which is surely far more
4338 	 * precision than this estimation technique has got anyway (especially in
4339 	 * non-C locales).  Also, even with the maximum possible base of 256, this
4340 	 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4341 	 * overflow on any known machine.
4342 	 */
4343 	if (slen > 12)
4344 		slen = 12;
4345 
4346 	/* Convert initial characters to fraction */
4347 	base = rangehi - rangelo + 1;
4348 	num = 0.0;
4349 	denom = base;
4350 	while (slen-- > 0)
4351 	{
4352 		int			ch = (unsigned char) *value++;
4353 
4354 		if (ch < rangelo)
4355 			ch = rangelo - 1;
4356 		else if (ch > rangehi)
4357 			ch = rangehi + 1;
4358 		num += ((double) (ch - rangelo)) / denom;
4359 		denom *= base;
4360 	}
4361 
4362 	return num;
4363 }
4364 
4365 /*
4366  * Convert a string-type Datum into a palloc'd, null-terminated string.
4367  *
4368  * On failure (e.g., unsupported typid), set *failure to true;
4369  * otherwise, that variable is not changed.  (We'll return NULL on failure.)
4370  *
4371  * When using a non-C locale, we must pass the string through strxfrm()
4372  * before continuing, so as to generate correct locale-specific results.
4373  */
4374 static char *
4375 convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
4376 {
4377 	char	   *val;
4378 
4379 	switch (typid)
4380 	{
4381 		case CHAROID:
4382 			val = (char *) palloc(2);
4383 			val[0] = DatumGetChar(value);
4384 			val[1] = '\0';
4385 			break;
4386 		case BPCHAROID:
4387 		case VARCHAROID:
4388 		case TEXTOID:
4389 			val = TextDatumGetCString(value);
4390 			break;
4391 		case NAMEOID:
4392 			{
4393 				NameData   *nm = (NameData *) DatumGetPointer(value);
4394 
4395 				val = pstrdup(NameStr(*nm));
4396 				break;
4397 			}
4398 		default:
4399 			*failure = true;
4400 			return NULL;
4401 	}
4402 
4403 	if (!lc_collate_is_c(collid))
4404 	{
4405 		char	   *xfrmstr;
4406 		size_t		xfrmlen;
4407 		size_t		xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4408 
4409 		/*
4410 		 * XXX: We could guess at a suitable output buffer size and only call
4411 		 * strxfrm twice if our guess is too small.
4412 		 *
4413 		 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4414 		 * bogus data or set an error. This is not really a problem unless it
4415 		 * crashes since it will only give an estimation error and nothing
4416 		 * fatal.
4417 		 */
4418 		xfrmlen = strxfrm(NULL, val, 0);
4419 #ifdef WIN32
4420 
4421 		/*
4422 		 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4423 		 * of trying to allocate this much memory (and fail), just return the
4424 		 * original string unmodified as if we were in the C locale.
4425 		 */
4426 		if (xfrmlen == INT_MAX)
4427 			return val;
4428 #endif
4429 		xfrmstr = (char *) palloc(xfrmlen + 1);
4430 		xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4431 
4432 		/*
4433 		 * Some systems (e.g., glibc) can return a smaller value from the
4434 		 * second call than the first; thus the Assert must be <= not ==.
4435 		 */
4436 		Assert(xfrmlen2 <= xfrmlen);
4437 		pfree(val);
4438 		val = xfrmstr;
4439 	}
4440 
4441 	return val;
4442 }
4443 
4444 /*
4445  * Do convert_to_scalar()'s work for any bytea data type.
4446  *
4447  * Very similar to convert_string_to_scalar except we can't assume
4448  * null-termination and therefore pass explicit lengths around.
4449  *
4450  * Also, assumptions about likely "normal" ranges of characters have been
4451  * removed - a data range of 0..255 is always used, for now.  (Perhaps
4452  * someday we will add information about actual byte data range to
4453  * pg_statistic.)
4454  */
4455 static void
4456 convert_bytea_to_scalar(Datum value,
4457 						double *scaledvalue,
4458 						Datum lobound,
4459 						double *scaledlobound,
4460 						Datum hibound,
4461 						double *scaledhibound)
4462 {
4463 	bytea	   *valuep = DatumGetByteaPP(value);
4464 	bytea	   *loboundp = DatumGetByteaPP(lobound);
4465 	bytea	   *hiboundp = DatumGetByteaPP(hibound);
4466 	int			rangelo,
4467 				rangehi,
4468 				valuelen = VARSIZE_ANY_EXHDR(valuep),
4469 				loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4470 				hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4471 				i,
4472 				minlen;
4473 	unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4474 	unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4475 	unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4476 
4477 	/*
4478 	 * Assume bytea data is uniformly distributed across all byte values.
4479 	 */
4480 	rangelo = 0;
4481 	rangehi = 255;
4482 
4483 	/*
4484 	 * Now strip any common prefix of the three strings.
4485 	 */
4486 	minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4487 	for (i = 0; i < minlen; i++)
4488 	{
4489 		if (*lostr != *histr || *lostr != *valstr)
4490 			break;
4491 		lostr++, histr++, valstr++;
4492 		loboundlen--, hiboundlen--, valuelen--;
4493 	}
4494 
4495 	/*
4496 	 * Now we can do the conversions.
4497 	 */
4498 	*scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4499 	*scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4500 	*scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4501 }
4502 
4503 static double
4504 convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4505 							int rangelo, int rangehi)
4506 {
4507 	double		num,
4508 				denom,
4509 				base;
4510 
4511 	if (valuelen <= 0)
4512 		return 0.0;				/* empty string has scalar value 0 */
4513 
4514 	/*
4515 	 * Since base is 256, need not consider more than about 10 chars (even
4516 	 * this many seems like overkill)
4517 	 */
4518 	if (valuelen > 10)
4519 		valuelen = 10;
4520 
4521 	/* Convert initial characters to fraction */
4522 	base = rangehi - rangelo + 1;
4523 	num = 0.0;
4524 	denom = base;
4525 	while (valuelen-- > 0)
4526 	{
4527 		int			ch = *value++;
4528 
4529 		if (ch < rangelo)
4530 			ch = rangelo - 1;
4531 		else if (ch > rangehi)
4532 			ch = rangehi + 1;
4533 		num += ((double) (ch - rangelo)) / denom;
4534 		denom *= base;
4535 	}
4536 
4537 	return num;
4538 }
4539 
4540 /*
4541  * Do convert_to_scalar()'s work for any timevalue data type.
4542  *
4543  * On failure (e.g., unsupported typid), set *failure to true;
4544  * otherwise, that variable is not changed.
4545  */
4546 static double
4547 convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
4548 {
4549 	switch (typid)
4550 	{
4551 		case TIMESTAMPOID:
4552 			return DatumGetTimestamp(value);
4553 		case TIMESTAMPTZOID:
4554 			return DatumGetTimestampTz(value);
4555 		case DATEOID:
4556 			return date2timestamp_no_overflow(DatumGetDateADT(value));
4557 		case INTERVALOID:
4558 			{
4559 				Interval   *interval = DatumGetIntervalP(value);
4560 
4561 				/*
4562 				 * Convert the month part of Interval to days using assumed
4563 				 * average month length of 365.25/12.0 days.  Not too
4564 				 * accurate, but plenty good enough for our purposes.
4565 				 */
4566 				return interval->time + interval->day * (double) USECS_PER_DAY +
4567 					interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4568 			}
4569 		case TIMEOID:
4570 			return DatumGetTimeADT(value);
4571 		case TIMETZOID:
4572 			{
4573 				TimeTzADT  *timetz = DatumGetTimeTzADTP(value);
4574 
4575 				/* use GMT-equivalent time */
4576 				return (double) (timetz->time + (timetz->zone * 1000000.0));
4577 			}
4578 	}
4579 
4580 	*failure = true;
4581 	return 0;
4582 }
4583 
4584 
4585 /*
4586  * get_restriction_variable
4587  *		Examine the args of a restriction clause to see if it's of the
4588  *		form (variable op pseudoconstant) or (pseudoconstant op variable),
4589  *		where "variable" could be either a Var or an expression in vars of a
4590  *		single relation.  If so, extract information about the variable,
4591  *		and also indicate which side it was on and the other argument.
4592  *
4593  * Inputs:
4594  *	root: the planner info
4595  *	args: clause argument list
4596  *	varRelid: see specs for restriction selectivity functions
4597  *
4598  * Outputs: (these are valid only if true is returned)
4599  *	*vardata: gets information about variable (see examine_variable)
4600  *	*other: gets other clause argument, aggressively reduced to a constant
4601  *	*varonleft: set true if variable is on the left, false if on the right
4602  *
4603  * Returns true if a variable is identified, otherwise false.
4604  *
4605  * Note: if there are Vars on both sides of the clause, we must fail, because
4606  * callers are expecting that the other side will act like a pseudoconstant.
4607  */
4608 bool
4609 get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4610 						 VariableStatData *vardata, Node **other,
4611 						 bool *varonleft)
4612 {
4613 	Node	   *left,
4614 			   *right;
4615 	VariableStatData rdata;
4616 
4617 	/* Fail if not a binary opclause (probably shouldn't happen) */
4618 	if (list_length(args) != 2)
4619 		return false;
4620 
4621 	left = (Node *) linitial(args);
4622 	right = (Node *) lsecond(args);
4623 
4624 	/*
4625 	 * Examine both sides.  Note that when varRelid is nonzero, Vars of other
4626 	 * relations will be treated as pseudoconstants.
4627 	 */
4628 	examine_variable(root, left, varRelid, vardata);
4629 	examine_variable(root, right, varRelid, &rdata);
4630 
4631 	/*
4632 	 * If one side is a variable and the other not, we win.
4633 	 */
4634 	if (vardata->rel && rdata.rel == NULL)
4635 	{
4636 		*varonleft = true;
4637 		*other = estimate_expression_value(root, rdata.var);
4638 		/* Assume we need no ReleaseVariableStats(rdata) here */
4639 		return true;
4640 	}
4641 
4642 	if (vardata->rel == NULL && rdata.rel)
4643 	{
4644 		*varonleft = false;
4645 		*other = estimate_expression_value(root, vardata->var);
4646 		/* Assume we need no ReleaseVariableStats(*vardata) here */
4647 		*vardata = rdata;
4648 		return true;
4649 	}
4650 
4651 	/* Oops, clause has wrong structure (probably var op var) */
4652 	ReleaseVariableStats(*vardata);
4653 	ReleaseVariableStats(rdata);
4654 
4655 	return false;
4656 }
4657 
4658 /*
4659  * get_join_variables
4660  *		Apply examine_variable() to each side of a join clause.
4661  *		Also, attempt to identify whether the join clause has the same
4662  *		or reversed sense compared to the SpecialJoinInfo.
4663  *
4664  * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4665  * or "reversed" if it is "rhs_var OP lhs_var".  In complicated cases
4666  * where we can't tell for sure, we default to assuming it's normal.
4667  */
4668 void
4669 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4670 				   VariableStatData *vardata1, VariableStatData *vardata2,
4671 				   bool *join_is_reversed)
4672 {
4673 	Node	   *left,
4674 			   *right;
4675 
4676 	if (list_length(args) != 2)
4677 		elog(ERROR, "join operator should take two arguments");
4678 
4679 	left = (Node *) linitial(args);
4680 	right = (Node *) lsecond(args);
4681 
4682 	examine_variable(root, left, 0, vardata1);
4683 	examine_variable(root, right, 0, vardata2);
4684 
4685 	if (vardata1->rel &&
4686 		bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4687 		*join_is_reversed = true;	/* var1 is on RHS */
4688 	else if (vardata2->rel &&
4689 			 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4690 		*join_is_reversed = true;	/* var2 is on LHS */
4691 	else
4692 		*join_is_reversed = false;
4693 }
4694 
4695 /*
4696  * examine_variable
4697  *		Try to look up statistical data about an expression.
4698  *		Fill in a VariableStatData struct to describe the expression.
4699  *
4700  * Inputs:
4701  *	root: the planner info
4702  *	node: the expression tree to examine
4703  *	varRelid: see specs for restriction selectivity functions
4704  *
4705  * Outputs: *vardata is filled as follows:
4706  *	var: the input expression (with any binary relabeling stripped, if
4707  *		it is or contains a variable; but otherwise the type is preserved)
4708  *	rel: RelOptInfo for relation containing variable; NULL if expression
4709  *		contains no Vars (NOTE this could point to a RelOptInfo of a
4710  *		subquery, not one in the current query).
4711  *	statsTuple: the pg_statistic entry for the variable, if one exists;
4712  *		otherwise NULL.
4713  *	freefunc: pointer to a function to release statsTuple with.
4714  *	vartype: exposed type of the expression; this should always match
4715  *		the declared input type of the operator we are estimating for.
4716  *	atttype, atttypmod: actual type/typmod of the "var" expression.  This is
4717  *		commonly the same as the exposed type of the variable argument,
4718  *		but can be different in binary-compatible-type cases.
4719  *	isunique: true if we were able to match the var to a unique index or a
4720  *		single-column DISTINCT clause, implying its values are unique for
4721  *		this query.  (Caution: this should be trusted for statistical
4722  *		purposes only, since we do not check indimmediate nor verify that
4723  *		the exact same definition of equality applies.)
4724  *	acl_ok: true if current user has permission to read the column(s)
4725  *		underlying the pg_statistic entry.  This is consulted by
4726  *		statistic_proc_security_check().
4727  *
4728  * Caller is responsible for doing ReleaseVariableStats() before exiting.
4729  */
4730 void
4731 examine_variable(PlannerInfo *root, Node *node, int varRelid,
4732 				 VariableStatData *vardata)
4733 {
4734 	Node	   *basenode;
4735 	Relids		varnos;
4736 	RelOptInfo *onerel;
4737 
4738 	/* Make sure we don't return dangling pointers in vardata */
4739 	MemSet(vardata, 0, sizeof(VariableStatData));
4740 
4741 	/* Save the exposed type of the expression */
4742 	vardata->vartype = exprType(node);
4743 
4744 	/* Look inside any binary-compatible relabeling */
4745 
4746 	if (IsA(node, RelabelType))
4747 		basenode = (Node *) ((RelabelType *) node)->arg;
4748 	else
4749 		basenode = node;
4750 
4751 	/* Fast path for a simple Var */
4752 
4753 	if (IsA(basenode, Var) &&
4754 		(varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4755 	{
4756 		Var		   *var = (Var *) basenode;
4757 
4758 		/* Set up result fields other than the stats tuple */
4759 		vardata->var = basenode;	/* return Var without relabeling */
4760 		vardata->rel = find_base_rel(root, var->varno);
4761 		vardata->atttype = var->vartype;
4762 		vardata->atttypmod = var->vartypmod;
4763 		vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4764 
4765 		/* Try to locate some stats */
4766 		examine_simple_variable(root, var, vardata);
4767 
4768 		return;
4769 	}
4770 
4771 	/*
4772 	 * Okay, it's a more complicated expression.  Determine variable
4773 	 * membership.  Note that when varRelid isn't zero, only vars of that
4774 	 * relation are considered "real" vars.
4775 	 */
4776 	varnos = pull_varnos(root, basenode);
4777 
4778 	onerel = NULL;
4779 
4780 	switch (bms_membership(varnos))
4781 	{
4782 		case BMS_EMPTY_SET:
4783 			/* No Vars at all ... must be pseudo-constant clause */
4784 			break;
4785 		case BMS_SINGLETON:
4786 			if (varRelid == 0 || bms_is_member(varRelid, varnos))
4787 			{
4788 				onerel = find_base_rel(root,
4789 									   (varRelid ? varRelid : bms_singleton_member(varnos)));
4790 				vardata->rel = onerel;
4791 				node = basenode;	/* strip any relabeling */
4792 			}
4793 			/* else treat it as a constant */
4794 			break;
4795 		case BMS_MULTIPLE:
4796 			if (varRelid == 0)
4797 			{
4798 				/* treat it as a variable of a join relation */
4799 				vardata->rel = find_join_rel(root, varnos);
4800 				node = basenode;	/* strip any relabeling */
4801 			}
4802 			else if (bms_is_member(varRelid, varnos))
4803 			{
4804 				/* ignore the vars belonging to other relations */
4805 				vardata->rel = find_base_rel(root, varRelid);
4806 				node = basenode;	/* strip any relabeling */
4807 				/* note: no point in expressional-index search here */
4808 			}
4809 			/* else treat it as a constant */
4810 			break;
4811 	}
4812 
4813 	bms_free(varnos);
4814 
4815 	vardata->var = node;
4816 	vardata->atttype = exprType(node);
4817 	vardata->atttypmod = exprTypmod(node);
4818 
4819 	if (onerel)
4820 	{
4821 		/*
4822 		 * We have an expression in vars of a single relation.  Try to match
4823 		 * it to expressional index columns, in hopes of finding some
4824 		 * statistics.
4825 		 *
4826 		 * Note that we consider all index columns including INCLUDE columns,
4827 		 * since there could be stats for such columns.  But the test for
4828 		 * uniqueness needs to be warier.
4829 		 *
4830 		 * XXX it's conceivable that there are multiple matches with different
4831 		 * index opfamilies; if so, we need to pick one that matches the
4832 		 * operator we are estimating for.  FIXME later.
4833 		 */
4834 		ListCell   *ilist;
4835 
4836 		foreach(ilist, onerel->indexlist)
4837 		{
4838 			IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4839 			ListCell   *indexpr_item;
4840 			int			pos;
4841 
4842 			indexpr_item = list_head(index->indexprs);
4843 			if (indexpr_item == NULL)
4844 				continue;		/* no expressions here... */
4845 
4846 			for (pos = 0; pos < index->ncolumns; pos++)
4847 			{
4848 				if (index->indexkeys[pos] == 0)
4849 				{
4850 					Node	   *indexkey;
4851 
4852 					if (indexpr_item == NULL)
4853 						elog(ERROR, "too few entries in indexprs list");
4854 					indexkey = (Node *) lfirst(indexpr_item);
4855 					if (indexkey && IsA(indexkey, RelabelType))
4856 						indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4857 					if (equal(node, indexkey))
4858 					{
4859 						/*
4860 						 * Found a match ... is it a unique index? Tests here
4861 						 * should match has_unique_index().
4862 						 */
4863 						if (index->unique &&
4864 							index->nkeycolumns == 1 &&
4865 							pos == 0 &&
4866 							(index->indpred == NIL || index->predOK))
4867 							vardata->isunique = true;
4868 
4869 						/*
4870 						 * Has it got stats?  We only consider stats for
4871 						 * non-partial indexes, since partial indexes probably
4872 						 * don't reflect whole-relation statistics; the above
4873 						 * check for uniqueness is the only info we take from
4874 						 * a partial index.
4875 						 *
4876 						 * An index stats hook, however, must make its own
4877 						 * decisions about what to do with partial indexes.
4878 						 */
4879 						if (get_index_stats_hook &&
4880 							(*get_index_stats_hook) (root, index->indexoid,
4881 													 pos + 1, vardata))
4882 						{
4883 							/*
4884 							 * The hook took control of acquiring a stats
4885 							 * tuple.  If it did supply a tuple, it'd better
4886 							 * have supplied a freefunc.
4887 							 */
4888 							if (HeapTupleIsValid(vardata->statsTuple) &&
4889 								!vardata->freefunc)
4890 								elog(ERROR, "no function provided to release variable stats with");
4891 						}
4892 						else if (index->indpred == NIL)
4893 						{
4894 							vardata->statsTuple =
4895 								SearchSysCache3(STATRELATTINH,
4896 												ObjectIdGetDatum(index->indexoid),
4897 												Int16GetDatum(pos + 1),
4898 												BoolGetDatum(false));
4899 							vardata->freefunc = ReleaseSysCache;
4900 
4901 							if (HeapTupleIsValid(vardata->statsTuple))
4902 							{
4903 								/* Get index's table for permission check */
4904 								RangeTblEntry *rte;
4905 								Oid			userid;
4906 
4907 								rte = planner_rt_fetch(index->rel->relid, root);
4908 								Assert(rte->rtekind == RTE_RELATION);
4909 
4910 								/*
4911 								 * Use checkAsUser if it's set, in case we're
4912 								 * accessing the table via a view.
4913 								 */
4914 								userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4915 
4916 								/*
4917 								 * For simplicity, we insist on the whole
4918 								 * table being selectable, rather than trying
4919 								 * to identify which column(s) the index
4920 								 * depends on.  Also require all rows to be
4921 								 * selectable --- there must be no
4922 								 * securityQuals from security barrier views
4923 								 * or RLS policies.
4924 								 */
4925 								vardata->acl_ok =
4926 									rte->securityQuals == NIL &&
4927 									(pg_class_aclcheck(rte->relid, userid,
4928 													   ACL_SELECT) == ACLCHECK_OK);
4929 
4930 								/*
4931 								 * If the user doesn't have permissions to
4932 								 * access an inheritance child relation, check
4933 								 * the permissions of the table actually
4934 								 * mentioned in the query, since most likely
4935 								 * the user does have that permission.  Note
4936 								 * that whole-table select privilege on the
4937 								 * parent doesn't quite guarantee that the
4938 								 * user could read all columns of the child.
4939 								 * But in practice it's unlikely that any
4940 								 * interesting security violation could result
4941 								 * from allowing access to the expression
4942 								 * index's stats, so we allow it anyway.  See
4943 								 * similar code in examine_simple_variable()
4944 								 * for additional comments.
4945 								 */
4946 								if (!vardata->acl_ok &&
4947 									root->append_rel_array != NULL)
4948 								{
4949 									AppendRelInfo *appinfo;
4950 									Index		varno = index->rel->relid;
4951 
4952 									appinfo = root->append_rel_array[varno];
4953 									while (appinfo &&
4954 										   planner_rt_fetch(appinfo->parent_relid,
4955 															root)->rtekind == RTE_RELATION)
4956 									{
4957 										varno = appinfo->parent_relid;
4958 										appinfo = root->append_rel_array[varno];
4959 									}
4960 									if (varno != index->rel->relid)
4961 									{
4962 										/* Repeat access check on this rel */
4963 										rte = planner_rt_fetch(varno, root);
4964 										Assert(rte->rtekind == RTE_RELATION);
4965 
4966 										userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4967 
4968 										vardata->acl_ok =
4969 											rte->securityQuals == NIL &&
4970 											(pg_class_aclcheck(rte->relid,
4971 															   userid,
4972 															   ACL_SELECT) == ACLCHECK_OK);
4973 									}
4974 								}
4975 							}
4976 							else
4977 							{
4978 								/* suppress leakproofness checks later */
4979 								vardata->acl_ok = true;
4980 							}
4981 						}
4982 						if (vardata->statsTuple)
4983 							break;
4984 					}
4985 					indexpr_item = lnext(index->indexprs, indexpr_item);
4986 				}
4987 			}
4988 			if (vardata->statsTuple)
4989 				break;
4990 		}
4991 	}
4992 }
4993 
4994 /*
4995  * examine_simple_variable
4996  *		Handle a simple Var for examine_variable
4997  *
4998  * This is split out as a subroutine so that we can recurse to deal with
4999  * Vars referencing subqueries.
5000  *
5001  * We already filled in all the fields of *vardata except for the stats tuple.
5002  */
5003 static void
5004 examine_simple_variable(PlannerInfo *root, Var *var,
5005 						VariableStatData *vardata)
5006 {
5007 	RangeTblEntry *rte = root->simple_rte_array[var->varno];
5008 
5009 	Assert(IsA(rte, RangeTblEntry));
5010 
5011 	if (get_relation_stats_hook &&
5012 		(*get_relation_stats_hook) (root, rte, var->varattno, vardata))
5013 	{
5014 		/*
5015 		 * The hook took control of acquiring a stats tuple.  If it did supply
5016 		 * a tuple, it'd better have supplied a freefunc.
5017 		 */
5018 		if (HeapTupleIsValid(vardata->statsTuple) &&
5019 			!vardata->freefunc)
5020 			elog(ERROR, "no function provided to release variable stats with");
5021 	}
5022 	else if (rte->rtekind == RTE_RELATION)
5023 	{
5024 		/*
5025 		 * Plain table or parent of an inheritance appendrel, so look up the
5026 		 * column in pg_statistic
5027 		 */
5028 		vardata->statsTuple = SearchSysCache3(STATRELATTINH,
5029 											  ObjectIdGetDatum(rte->relid),
5030 											  Int16GetDatum(var->varattno),
5031 											  BoolGetDatum(rte->inh));
5032 		vardata->freefunc = ReleaseSysCache;
5033 
5034 		if (HeapTupleIsValid(vardata->statsTuple))
5035 		{
5036 			Oid			userid;
5037 
5038 			/*
5039 			 * Check if user has permission to read this column.  We require
5040 			 * all rows to be accessible, so there must be no securityQuals
5041 			 * from security barrier views or RLS policies.  Use checkAsUser
5042 			 * if it's set, in case we're accessing the table via a view.
5043 			 */
5044 			userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5045 
5046 			vardata->acl_ok =
5047 				rte->securityQuals == NIL &&
5048 				((pg_class_aclcheck(rte->relid, userid,
5049 									ACL_SELECT) == ACLCHECK_OK) ||
5050 				 (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
5051 										ACL_SELECT) == ACLCHECK_OK));
5052 
5053 			/*
5054 			 * If the user doesn't have permissions to access an inheritance
5055 			 * child relation or specifically this attribute, check the
5056 			 * permissions of the table/column actually mentioned in the
5057 			 * query, since most likely the user does have that permission
5058 			 * (else the query will fail at runtime), and if the user can read
5059 			 * the column there then he can get the values of the child table
5060 			 * too.  To do that, we must find out which of the root parent's
5061 			 * attributes the child relation's attribute corresponds to.
5062 			 */
5063 			if (!vardata->acl_ok && var->varattno > 0 &&
5064 				root->append_rel_array != NULL)
5065 			{
5066 				AppendRelInfo *appinfo;
5067 				Index		varno = var->varno;
5068 				int			varattno = var->varattno;
5069 				bool		found = false;
5070 
5071 				appinfo = root->append_rel_array[varno];
5072 
5073 				/*
5074 				 * Partitions are mapped to their immediate parent, not the
5075 				 * root parent, so must be ready to walk up multiple
5076 				 * AppendRelInfos.  But stop if we hit a parent that is not
5077 				 * RTE_RELATION --- that's a flattened UNION ALL subquery, not
5078 				 * an inheritance parent.
5079 				 */
5080 				while (appinfo &&
5081 					   planner_rt_fetch(appinfo->parent_relid,
5082 										root)->rtekind == RTE_RELATION)
5083 				{
5084 					int			parent_varattno;
5085 
5086 					found = false;
5087 					if (varattno <= 0 || varattno > appinfo->num_child_cols)
5088 						break;	/* safety check */
5089 					parent_varattno = appinfo->parent_colnos[varattno - 1];
5090 					if (parent_varattno == 0)
5091 						break;	/* Var is local to child */
5092 
5093 					varno = appinfo->parent_relid;
5094 					varattno = parent_varattno;
5095 					found = true;
5096 
5097 					/* If the parent is itself a child, continue up. */
5098 					appinfo = root->append_rel_array[varno];
5099 				}
5100 
5101 				/*
5102 				 * In rare cases, the Var may be local to the child table, in
5103 				 * which case, we've got to live with having no access to this
5104 				 * column's stats.
5105 				 */
5106 				if (!found)
5107 					return;
5108 
5109 				/* Repeat the access check on this parent rel & column */
5110 				rte = planner_rt_fetch(varno, root);
5111 				Assert(rte->rtekind == RTE_RELATION);
5112 
5113 				userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
5114 
5115 				vardata->acl_ok =
5116 					rte->securityQuals == NIL &&
5117 					((pg_class_aclcheck(rte->relid, userid,
5118 										ACL_SELECT) == ACLCHECK_OK) ||
5119 					 (pg_attribute_aclcheck(rte->relid, varattno, userid,
5120 											ACL_SELECT) == ACLCHECK_OK));
5121 			}
5122 		}
5123 		else
5124 		{
5125 			/* suppress any possible leakproofness checks later */
5126 			vardata->acl_ok = true;
5127 		}
5128 	}
5129 	else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
5130 	{
5131 		/*
5132 		 * Plain subquery (not one that was converted to an appendrel).
5133 		 */
5134 		Query	   *subquery = rte->subquery;
5135 		RelOptInfo *rel;
5136 		TargetEntry *ste;
5137 
5138 		/*
5139 		 * Punt if it's a whole-row var rather than a plain column reference.
5140 		 */
5141 		if (var->varattno == InvalidAttrNumber)
5142 			return;
5143 
5144 		/*
5145 		 * Punt if subquery uses set operations or GROUP BY, as these will
5146 		 * mash underlying columns' stats beyond recognition.  (Set ops are
5147 		 * particularly nasty; if we forged ahead, we would return stats
5148 		 * relevant to only the leftmost subselect...)	DISTINCT is also
5149 		 * problematic, but we check that later because there is a possibility
5150 		 * of learning something even with it.
5151 		 */
5152 		if (subquery->setOperations ||
5153 			subquery->groupClause ||
5154 			subquery->groupingSets)
5155 			return;
5156 
5157 		/*
5158 		 * OK, fetch RelOptInfo for subquery.  Note that we don't change the
5159 		 * rel returned in vardata, since caller expects it to be a rel of the
5160 		 * caller's query level.  Because we might already be recursing, we
5161 		 * can't use that rel pointer either, but have to look up the Var's
5162 		 * rel afresh.
5163 		 */
5164 		rel = find_base_rel(root, var->varno);
5165 
5166 		/* If the subquery hasn't been planned yet, we have to punt */
5167 		if (rel->subroot == NULL)
5168 			return;
5169 		Assert(IsA(rel->subroot, PlannerInfo));
5170 
5171 		/*
5172 		 * Switch our attention to the subquery as mangled by the planner. It
5173 		 * was okay to look at the pre-planning version for the tests above,
5174 		 * but now we need a Var that will refer to the subroot's live
5175 		 * RelOptInfos.  For instance, if any subquery pullup happened during
5176 		 * planning, Vars in the targetlist might have gotten replaced, and we
5177 		 * need to see the replacement expressions.
5178 		 */
5179 		subquery = rel->subroot->parse;
5180 		Assert(IsA(subquery, Query));
5181 
5182 		/* Get the subquery output expression referenced by the upper Var */
5183 		ste = get_tle_by_resno(subquery->targetList, var->varattno);
5184 		if (ste == NULL || ste->resjunk)
5185 			elog(ERROR, "subquery %s does not have attribute %d",
5186 				 rte->eref->aliasname, var->varattno);
5187 		var = (Var *) ste->expr;
5188 
5189 		/*
5190 		 * If subquery uses DISTINCT, we can't make use of any stats for the
5191 		 * variable ... but, if it's the only DISTINCT column, we are entitled
5192 		 * to consider it unique.  We do the test this way so that it works
5193 		 * for cases involving DISTINCT ON.
5194 		 */
5195 		if (subquery->distinctClause)
5196 		{
5197 			if (list_length(subquery->distinctClause) == 1 &&
5198 				targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
5199 				vardata->isunique = true;
5200 			/* cannot go further */
5201 			return;
5202 		}
5203 
5204 		/*
5205 		 * If the sub-query originated from a view with the security_barrier
5206 		 * attribute, we must not look at the variable's statistics, though it
5207 		 * seems all right to notice the existence of a DISTINCT clause. So
5208 		 * stop here.
5209 		 *
5210 		 * This is probably a harsher restriction than necessary; it's
5211 		 * certainly OK for the selectivity estimator (which is a C function,
5212 		 * and therefore omnipotent anyway) to look at the statistics.  But
5213 		 * many selectivity estimators will happily *invoke the operator
5214 		 * function* to try to work out a good estimate - and that's not OK.
5215 		 * So for now, don't dig down for stats.
5216 		 */
5217 		if (rte->security_barrier)
5218 			return;
5219 
5220 		/* Can only handle a simple Var of subquery's query level */
5221 		if (var && IsA(var, Var) &&
5222 			var->varlevelsup == 0)
5223 		{
5224 			/*
5225 			 * OK, recurse into the subquery.  Note that the original setting
5226 			 * of vardata->isunique (which will surely be false) is left
5227 			 * unchanged in this situation.  That's what we want, since even
5228 			 * if the underlying column is unique, the subquery may have
5229 			 * joined to other tables in a way that creates duplicates.
5230 			 */
5231 			examine_simple_variable(rel->subroot, var, vardata);
5232 		}
5233 	}
5234 	else
5235 	{
5236 		/*
5237 		 * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE.  (We
5238 		 * won't see RTE_JOIN here because join alias Vars have already been
5239 		 * flattened.)	There's not much we can do with function outputs, but
5240 		 * maybe someday try to be smarter about VALUES and/or CTEs.
5241 		 */
5242 	}
5243 }
5244 
5245 /*
5246  * Check whether it is permitted to call func_oid passing some of the
5247  * pg_statistic data in vardata.  We allow this either if the user has SELECT
5248  * privileges on the table or column underlying the pg_statistic data or if
5249  * the function is marked leak-proof.
5250  */
5251 bool
5252 statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
5253 {
5254 	if (vardata->acl_ok)
5255 		return true;
5256 
5257 	if (!OidIsValid(func_oid))
5258 		return false;
5259 
5260 	if (get_func_leakproof(func_oid))
5261 		return true;
5262 
5263 	ereport(DEBUG2,
5264 			(errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
5265 							 get_func_name(func_oid))));
5266 	return false;
5267 }
5268 
5269 /*
5270  * get_variable_numdistinct
5271  *	  Estimate the number of distinct values of a variable.
5272  *
5273  * vardata: results of examine_variable
5274  * *isdefault: set to true if the result is a default rather than based on
5275  * anything meaningful.
5276  *
5277  * NB: be careful to produce a positive integral result, since callers may
5278  * compare the result to exact integer counts, or might divide by it.
5279  */
5280 double
5281 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
5282 {
5283 	double		stadistinct;
5284 	double		stanullfrac = 0.0;
5285 	double		ntuples;
5286 
5287 	*isdefault = false;
5288 
5289 	/*
5290 	 * Determine the stadistinct value to use.  There are cases where we can
5291 	 * get an estimate even without a pg_statistic entry, or can get a better
5292 	 * value than is in pg_statistic.  Grab stanullfrac too if we can find it
5293 	 * (otherwise, assume no nulls, for lack of any better idea).
5294 	 */
5295 	if (HeapTupleIsValid(vardata->statsTuple))
5296 	{
5297 		/* Use the pg_statistic entry */
5298 		Form_pg_statistic stats;
5299 
5300 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
5301 		stadistinct = stats->stadistinct;
5302 		stanullfrac = stats->stanullfrac;
5303 	}
5304 	else if (vardata->vartype == BOOLOID)
5305 	{
5306 		/*
5307 		 * Special-case boolean columns: presumably, two distinct values.
5308 		 *
5309 		 * Are there any other datatypes we should wire in special estimates
5310 		 * for?
5311 		 */
5312 		stadistinct = 2.0;
5313 	}
5314 	else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
5315 	{
5316 		/*
5317 		 * If the Var represents a column of a VALUES RTE, assume it's unique.
5318 		 * This could of course be very wrong, but it should tend to be true
5319 		 * in well-written queries.  We could consider examining the VALUES'
5320 		 * contents to get some real statistics; but that only works if the
5321 		 * entries are all constants, and it would be pretty expensive anyway.
5322 		 */
5323 		stadistinct = -1.0;		/* unique (and all non null) */
5324 	}
5325 	else
5326 	{
5327 		/*
5328 		 * We don't keep statistics for system columns, but in some cases we
5329 		 * can infer distinctness anyway.
5330 		 */
5331 		if (vardata->var && IsA(vardata->var, Var))
5332 		{
5333 			switch (((Var *) vardata->var)->varattno)
5334 			{
5335 				case SelfItemPointerAttributeNumber:
5336 					stadistinct = -1.0; /* unique (and all non null) */
5337 					break;
5338 				case TableOidAttributeNumber:
5339 					stadistinct = 1.0;	/* only 1 value */
5340 					break;
5341 				default:
5342 					stadistinct = 0.0;	/* means "unknown" */
5343 					break;
5344 			}
5345 		}
5346 		else
5347 			stadistinct = 0.0;	/* means "unknown" */
5348 
5349 		/*
5350 		 * XXX consider using estimate_num_groups on expressions?
5351 		 */
5352 	}
5353 
5354 	/*
5355 	 * If there is a unique index or DISTINCT clause for the variable, assume
5356 	 * it is unique no matter what pg_statistic says; the statistics could be
5357 	 * out of date, or we might have found a partial unique index that proves
5358 	 * the var is unique for this query.  However, we'd better still believe
5359 	 * the null-fraction statistic.
5360 	 */
5361 	if (vardata->isunique)
5362 		stadistinct = -1.0 * (1.0 - stanullfrac);
5363 
5364 	/*
5365 	 * If we had an absolute estimate, use that.
5366 	 */
5367 	if (stadistinct > 0.0)
5368 		return clamp_row_est(stadistinct);
5369 
5370 	/*
5371 	 * Otherwise we need to get the relation size; punt if not available.
5372 	 */
5373 	if (vardata->rel == NULL)
5374 	{
5375 		*isdefault = true;
5376 		return DEFAULT_NUM_DISTINCT;
5377 	}
5378 	ntuples = vardata->rel->tuples;
5379 	if (ntuples <= 0.0)
5380 	{
5381 		*isdefault = true;
5382 		return DEFAULT_NUM_DISTINCT;
5383 	}
5384 
5385 	/*
5386 	 * If we had a relative estimate, use that.
5387 	 */
5388 	if (stadistinct < 0.0)
5389 		return clamp_row_est(-stadistinct * ntuples);
5390 
5391 	/*
5392 	 * With no data, estimate ndistinct = ntuples if the table is small, else
5393 	 * use default.  We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
5394 	 * that the behavior isn't discontinuous.
5395 	 */
5396 	if (ntuples < DEFAULT_NUM_DISTINCT)
5397 		return clamp_row_est(ntuples);
5398 
5399 	*isdefault = true;
5400 	return DEFAULT_NUM_DISTINCT;
5401 }
5402 
5403 /*
5404  * get_variable_range
5405  *		Estimate the minimum and maximum value of the specified variable.
5406  *		If successful, store values in *min and *max, and return true.
5407  *		If no data available, return false.
5408  *
5409  * sortop is the "<" comparison operator to use.  This should generally
5410  * be "<" not ">", as only the former is likely to be found in pg_statistic.
5411  * The collation must be specified too.
5412  */
5413 static bool
5414 get_variable_range(PlannerInfo *root, VariableStatData *vardata,
5415 				   Oid sortop, Oid collation,
5416 				   Datum *min, Datum *max)
5417 {
5418 	Datum		tmin = 0;
5419 	Datum		tmax = 0;
5420 	bool		have_data = false;
5421 	int16		typLen;
5422 	bool		typByVal;
5423 	Oid			opfuncoid;
5424 	FmgrInfo	opproc;
5425 	AttStatsSlot sslot;
5426 
5427 	/*
5428 	 * XXX It's very tempting to try to use the actual column min and max, if
5429 	 * we can get them relatively-cheaply with an index probe.  However, since
5430 	 * this function is called many times during join planning, that could
5431 	 * have unpleasant effects on planning speed.  Need more investigation
5432 	 * before enabling this.
5433 	 */
5434 #ifdef NOT_USED
5435 	if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
5436 		return true;
5437 #endif
5438 
5439 	if (!HeapTupleIsValid(vardata->statsTuple))
5440 	{
5441 		/* no stats available, so default result */
5442 		return false;
5443 	}
5444 
5445 	/*
5446 	 * If we can't apply the sortop to the stats data, just fail.  In
5447 	 * principle, if there's a histogram and no MCVs, we could return the
5448 	 * histogram endpoints without ever applying the sortop ... but it's
5449 	 * probably not worth trying, because whatever the caller wants to do with
5450 	 * the endpoints would likely fail the security check too.
5451 	 */
5452 	if (!statistic_proc_security_check(vardata,
5453 									   (opfuncoid = get_opcode(sortop))))
5454 		return false;
5455 
5456 	opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
5457 
5458 	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5459 
5460 	/*
5461 	 * If there is a histogram with the ordering we want, grab the first and
5462 	 * last values.
5463 	 */
5464 	if (get_attstatsslot(&sslot, vardata->statsTuple,
5465 						 STATISTIC_KIND_HISTOGRAM, sortop,
5466 						 ATTSTATSSLOT_VALUES))
5467 	{
5468 		if (sslot.stacoll == collation && sslot.nvalues > 0)
5469 		{
5470 			tmin = datumCopy(sslot.values[0], typByVal, typLen);
5471 			tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5472 			have_data = true;
5473 		}
5474 		free_attstatsslot(&sslot);
5475 	}
5476 
5477 	/*
5478 	 * Otherwise, if there is a histogram with some other ordering, scan it
5479 	 * and get the min and max values according to the ordering we want.  This
5480 	 * of course may not find values that are really extremal according to our
5481 	 * ordering, but it beats ignoring available data.
5482 	 */
5483 	if (!have_data &&
5484 		get_attstatsslot(&sslot, vardata->statsTuple,
5485 						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
5486 						 ATTSTATSSLOT_VALUES))
5487 	{
5488 		get_stats_slot_range(&sslot, opfuncoid, &opproc,
5489 							 collation, typLen, typByVal,
5490 							 &tmin, &tmax, &have_data);
5491 		free_attstatsslot(&sslot);
5492 	}
5493 
5494 	/*
5495 	 * If we have most-common-values info, look for extreme MCVs.  This is
5496 	 * needed even if we also have a histogram, since the histogram excludes
5497 	 * the MCVs.  However, if we *only* have MCVs and no histogram, we should
5498 	 * be pretty wary of deciding that that is a full representation of the
5499 	 * data.  Proceed only if the MCVs represent the whole table (to within
5500 	 * roundoff error).
5501 	 */
5502 	if (get_attstatsslot(&sslot, vardata->statsTuple,
5503 						 STATISTIC_KIND_MCV, InvalidOid,
5504 						 have_data ? ATTSTATSSLOT_VALUES :
5505 						 (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)))
5506 	{
5507 		bool		use_mcvs = have_data;
5508 
5509 		if (!have_data)
5510 		{
5511 			double		sumcommon = 0.0;
5512 			double		nullfrac;
5513 			int			i;
5514 
5515 			for (i = 0; i < sslot.nnumbers; i++)
5516 				sumcommon += sslot.numbers[i];
5517 			nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac;
5518 			if (sumcommon + nullfrac > 0.99999)
5519 				use_mcvs = true;
5520 		}
5521 
5522 		if (use_mcvs)
5523 			get_stats_slot_range(&sslot, opfuncoid, &opproc,
5524 								 collation, typLen, typByVal,
5525 								 &tmin, &tmax, &have_data);
5526 		free_attstatsslot(&sslot);
5527 	}
5528 
5529 	*min = tmin;
5530 	*max = tmax;
5531 	return have_data;
5532 }
5533 
5534 /*
5535  * get_stats_slot_range: scan sslot for min/max values
5536  *
5537  * Subroutine for get_variable_range: update min/max/have_data according
5538  * to what we find in the statistics array.
5539  */
5540 static void
5541 get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
5542 					 Oid collation, int16 typLen, bool typByVal,
5543 					 Datum *min, Datum *max, bool *p_have_data)
5544 {
5545 	Datum		tmin = *min;
5546 	Datum		tmax = *max;
5547 	bool		have_data = *p_have_data;
5548 	bool		found_tmin = false;
5549 	bool		found_tmax = false;
5550 
5551 	/* Look up the comparison function, if we didn't already do so */
5552 	if (opproc->fn_oid != opfuncoid)
5553 		fmgr_info(opfuncoid, opproc);
5554 
5555 	/* Scan all the slot's values */
5556 	for (int i = 0; i < sslot->nvalues; i++)
5557 	{
5558 		if (!have_data)
5559 		{
5560 			tmin = tmax = sslot->values[i];
5561 			found_tmin = found_tmax = true;
5562 			*p_have_data = have_data = true;
5563 			continue;
5564 		}
5565 		if (DatumGetBool(FunctionCall2Coll(opproc,
5566 										   collation,
5567 										   sslot->values[i], tmin)))
5568 		{
5569 			tmin = sslot->values[i];
5570 			found_tmin = true;
5571 		}
5572 		if (DatumGetBool(FunctionCall2Coll(opproc,
5573 										   collation,
5574 										   tmax, sslot->values[i])))
5575 		{
5576 			tmax = sslot->values[i];
5577 			found_tmax = true;
5578 		}
5579 	}
5580 
5581 	/*
5582 	 * Copy the slot's values, if we found new extreme values.
5583 	 */
5584 	if (found_tmin)
5585 		*min = datumCopy(tmin, typByVal, typLen);
5586 	if (found_tmax)
5587 		*max = datumCopy(tmax, typByVal, typLen);
5588 }
5589 
5590 
5591 /*
5592  * get_actual_variable_range
5593  *		Attempt to identify the current *actual* minimum and/or maximum
5594  *		of the specified variable, by looking for a suitable btree index
5595  *		and fetching its low and/or high values.
5596  *		If successful, store values in *min and *max, and return true.
5597  *		(Either pointer can be NULL if that endpoint isn't needed.)
5598  *		If no data available, return false.
5599  *
5600  * sortop is the "<" comparison operator to use.
5601  * collation is the required collation.
5602  */
5603 static bool
5604 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5605 						  Oid sortop, Oid collation,
5606 						  Datum *min, Datum *max)
5607 {
5608 	bool		have_data = false;
5609 	RelOptInfo *rel = vardata->rel;
5610 	RangeTblEntry *rte;
5611 	ListCell   *lc;
5612 
5613 	/* No hope if no relation or it doesn't have indexes */
5614 	if (rel == NULL || rel->indexlist == NIL)
5615 		return false;
5616 	/* If it has indexes it must be a plain relation */
5617 	rte = root->simple_rte_array[rel->relid];
5618 	Assert(rte->rtekind == RTE_RELATION);
5619 
5620 	/* Search through the indexes to see if any match our problem */
5621 	foreach(lc, rel->indexlist)
5622 	{
5623 		IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5624 		ScanDirection indexscandir;
5625 
5626 		/* Ignore non-btree indexes */
5627 		if (index->relam != BTREE_AM_OID)
5628 			continue;
5629 
5630 		/*
5631 		 * Ignore partial indexes --- we only want stats that cover the entire
5632 		 * relation.
5633 		 */
5634 		if (index->indpred != NIL)
5635 			continue;
5636 
5637 		/*
5638 		 * The index list might include hypothetical indexes inserted by a
5639 		 * get_relation_info hook --- don't try to access them.
5640 		 */
5641 		if (index->hypothetical)
5642 			continue;
5643 
5644 		/*
5645 		 * The first index column must match the desired variable, sortop, and
5646 		 * collation --- but we can use a descending-order index.
5647 		 */
5648 		if (collation != index->indexcollations[0])
5649 			continue;			/* test first 'cause it's cheapest */
5650 		if (!match_index_to_operand(vardata->var, 0, index))
5651 			continue;
5652 		switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5653 		{
5654 			case BTLessStrategyNumber:
5655 				if (index->reverse_sort[0])
5656 					indexscandir = BackwardScanDirection;
5657 				else
5658 					indexscandir = ForwardScanDirection;
5659 				break;
5660 			case BTGreaterStrategyNumber:
5661 				if (index->reverse_sort[0])
5662 					indexscandir = ForwardScanDirection;
5663 				else
5664 					indexscandir = BackwardScanDirection;
5665 				break;
5666 			default:
5667 				/* index doesn't match the sortop */
5668 				continue;
5669 		}
5670 
5671 		/*
5672 		 * Found a suitable index to extract data from.  Set up some data that
5673 		 * can be used by both invocations of get_actual_variable_endpoint.
5674 		 */
5675 		{
5676 			MemoryContext tmpcontext;
5677 			MemoryContext oldcontext;
5678 			Relation	heapRel;
5679 			Relation	indexRel;
5680 			TupleTableSlot *slot;
5681 			int16		typLen;
5682 			bool		typByVal;
5683 			ScanKeyData scankeys[1];
5684 
5685 			/* Make sure any cruft gets recycled when we're done */
5686 			tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
5687 											   "get_actual_variable_range workspace",
5688 											   ALLOCSET_DEFAULT_SIZES);
5689 			oldcontext = MemoryContextSwitchTo(tmpcontext);
5690 
5691 			/*
5692 			 * Open the table and index so we can read from them.  We should
5693 			 * already have some type of lock on each.
5694 			 */
5695 			heapRel = table_open(rte->relid, NoLock);
5696 			indexRel = index_open(index->indexoid, NoLock);
5697 
5698 			/* build some stuff needed for indexscan execution */
5699 			slot = table_slot_create(heapRel, NULL);
5700 			get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5701 
5702 			/* set up an IS NOT NULL scan key so that we ignore nulls */
5703 			ScanKeyEntryInitialize(&scankeys[0],
5704 								   SK_ISNULL | SK_SEARCHNOTNULL,
5705 								   1,	/* index col to scan */
5706 								   InvalidStrategy, /* no strategy */
5707 								   InvalidOid,	/* no strategy subtype */
5708 								   InvalidOid,	/* no collation */
5709 								   InvalidOid,	/* no reg proc for this */
5710 								   (Datum) 0);	/* constant */
5711 
5712 			/* If min is requested ... */
5713 			if (min)
5714 			{
5715 				have_data = get_actual_variable_endpoint(heapRel,
5716 														 indexRel,
5717 														 indexscandir,
5718 														 scankeys,
5719 														 typLen,
5720 														 typByVal,
5721 														 slot,
5722 														 oldcontext,
5723 														 min);
5724 			}
5725 			else
5726 			{
5727 				/* If min not requested, assume index is nonempty */
5728 				have_data = true;
5729 			}
5730 
5731 			/* If max is requested, and we didn't find the index is empty */
5732 			if (max && have_data)
5733 			{
5734 				/* scan in the opposite direction; all else is the same */
5735 				have_data = get_actual_variable_endpoint(heapRel,
5736 														 indexRel,
5737 														 -indexscandir,
5738 														 scankeys,
5739 														 typLen,
5740 														 typByVal,
5741 														 slot,
5742 														 oldcontext,
5743 														 max);
5744 			}
5745 
5746 			/* Clean everything up */
5747 			ExecDropSingleTupleTableSlot(slot);
5748 
5749 			index_close(indexRel, NoLock);
5750 			table_close(heapRel, NoLock);
5751 
5752 			MemoryContextSwitchTo(oldcontext);
5753 			MemoryContextDelete(tmpcontext);
5754 
5755 			/* And we're done */
5756 			break;
5757 		}
5758 	}
5759 
5760 	return have_data;
5761 }
5762 
5763 /*
5764  * Get one endpoint datum (min or max depending on indexscandir) from the
5765  * specified index.  Return true if successful, false if index is empty.
5766  * On success, endpoint value is stored to *endpointDatum (and copied into
5767  * outercontext).
5768  *
5769  * scankeys is a 1-element scankey array set up to reject nulls.
5770  * typLen/typByVal describe the datatype of the index's first column.
5771  * tableslot is a slot suitable to hold table tuples, in case we need
5772  * to probe the heap.
5773  * (We could compute these values locally, but that would mean computing them
5774  * twice when get_actual_variable_range needs both the min and the max.)
5775  */
5776 static bool
5777 get_actual_variable_endpoint(Relation heapRel,
5778 							 Relation indexRel,
5779 							 ScanDirection indexscandir,
5780 							 ScanKey scankeys,
5781 							 int16 typLen,
5782 							 bool typByVal,
5783 							 TupleTableSlot *tableslot,
5784 							 MemoryContext outercontext,
5785 							 Datum *endpointDatum)
5786 {
5787 	bool		have_data = false;
5788 	SnapshotData SnapshotNonVacuumable;
5789 	IndexScanDesc index_scan;
5790 	Buffer		vmbuffer = InvalidBuffer;
5791 	ItemPointer tid;
5792 	Datum		values[INDEX_MAX_KEYS];
5793 	bool		isnull[INDEX_MAX_KEYS];
5794 	MemoryContext oldcontext;
5795 
5796 	/*
5797 	 * We use the index-only-scan machinery for this.  With mostly-static
5798 	 * tables that's a win because it avoids a heap visit.  It's also a win
5799 	 * for dynamic data, but the reason is less obvious; read on for details.
5800 	 *
5801 	 * In principle, we should scan the index with our current active
5802 	 * snapshot, which is the best approximation we've got to what the query
5803 	 * will see when executed.  But that won't be exact if a new snap is taken
5804 	 * before running the query, and it can be very expensive if a lot of
5805 	 * recently-dead or uncommitted rows exist at the beginning or end of the
5806 	 * index (because we'll laboriously fetch each one and reject it).
5807 	 * Instead, we use SnapshotNonVacuumable.  That will accept recently-dead
5808 	 * and uncommitted rows as well as normal visible rows.  On the other
5809 	 * hand, it will reject known-dead rows, and thus not give a bogus answer
5810 	 * when the extreme value has been deleted (unless the deletion was quite
5811 	 * recent); that case motivates not using SnapshotAny here.
5812 	 *
5813 	 * A crucial point here is that SnapshotNonVacuumable, with
5814 	 * RecentGlobalXmin as horizon, yields the inverse of the condition that
5815 	 * the indexscan will use to decide that index entries are killable (see
5816 	 * heap_hot_search_buffer()).  Therefore, if the snapshot rejects a tuple
5817 	 * (or more precisely, all tuples of a HOT chain) and we have to continue
5818 	 * scanning past it, we know that the indexscan will mark that index entry
5819 	 * killed.  That means that the next get_actual_variable_endpoint() call
5820 	 * will not have to re-consider that index entry.  In this way we avoid
5821 	 * repetitive work when this function is used a lot during planning.
5822 	 *
5823 	 * But using SnapshotNonVacuumable creates a hazard of its own.  In a
5824 	 * recently-created index, some index entries may point at "broken" HOT
5825 	 * chains in which not all the tuple versions contain data matching the
5826 	 * index entry.  The live tuple version(s) certainly do match the index,
5827 	 * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5828 	 * don't match.  Hence, if we took data from the selected heap tuple, we
5829 	 * might get a bogus answer that's not close to the index extremal value,
5830 	 * or could even be NULL.  We avoid this hazard because we take the data
5831 	 * from the index entry not the heap.
5832 	 */
5833 	InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
5834 
5835 	index_scan = index_beginscan(heapRel, indexRel,
5836 								 &SnapshotNonVacuumable,
5837 								 1, 0);
5838 	/* Set it up for index-only scan */
5839 	index_scan->xs_want_itup = true;
5840 	index_rescan(index_scan, scankeys, 1, NULL, 0);
5841 
5842 	/* Fetch first/next tuple in specified direction */
5843 	while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
5844 	{
5845 		if (!VM_ALL_VISIBLE(heapRel,
5846 							ItemPointerGetBlockNumber(tid),
5847 							&vmbuffer))
5848 		{
5849 			/* Rats, we have to visit the heap to check visibility */
5850 			if (!index_fetch_heap(index_scan, tableslot))
5851 				continue;		/* no visible tuple, try next index entry */
5852 
5853 			/* We don't actually need the heap tuple for anything */
5854 			ExecClearTuple(tableslot);
5855 
5856 			/*
5857 			 * We don't care whether there's more than one visible tuple in
5858 			 * the HOT chain; if any are visible, that's good enough.
5859 			 */
5860 		}
5861 
5862 		/*
5863 		 * We expect that btree will return data in IndexTuple not HeapTuple
5864 		 * format.  It's not lossy either.
5865 		 */
5866 		if (!index_scan->xs_itup)
5867 			elog(ERROR, "no data returned for index-only scan");
5868 		if (index_scan->xs_recheck)
5869 			elog(ERROR, "unexpected recheck indication from btree");
5870 
5871 		/* OK to deconstruct the index tuple */
5872 		index_deform_tuple(index_scan->xs_itup,
5873 						   index_scan->xs_itupdesc,
5874 						   values, isnull);
5875 
5876 		/* Shouldn't have got a null, but be careful */
5877 		if (isnull[0])
5878 			elog(ERROR, "found unexpected null value in index \"%s\"",
5879 				 RelationGetRelationName(indexRel));
5880 
5881 		/* Copy the index column value out to caller's context */
5882 		oldcontext = MemoryContextSwitchTo(outercontext);
5883 		*endpointDatum = datumCopy(values[0], typByVal, typLen);
5884 		MemoryContextSwitchTo(oldcontext);
5885 		have_data = true;
5886 		break;
5887 	}
5888 
5889 	if (vmbuffer != InvalidBuffer)
5890 		ReleaseBuffer(vmbuffer);
5891 	index_endscan(index_scan);
5892 
5893 	return have_data;
5894 }
5895 
5896 /*
5897  * find_join_input_rel
5898  *		Look up the input relation for a join.
5899  *
5900  * We assume that the input relation's RelOptInfo must have been constructed
5901  * already.
5902  */
5903 static RelOptInfo *
5904 find_join_input_rel(PlannerInfo *root, Relids relids)
5905 {
5906 	RelOptInfo *rel = NULL;
5907 
5908 	switch (bms_membership(relids))
5909 	{
5910 		case BMS_EMPTY_SET:
5911 			/* should not happen */
5912 			break;
5913 		case BMS_SINGLETON:
5914 			rel = find_base_rel(root, bms_singleton_member(relids));
5915 			break;
5916 		case BMS_MULTIPLE:
5917 			rel = find_join_rel(root, relids);
5918 			break;
5919 	}
5920 
5921 	if (rel == NULL)
5922 		elog(ERROR, "could not find RelOptInfo for given relids");
5923 
5924 	return rel;
5925 }
5926 
5927 
5928 /*-------------------------------------------------------------------------
5929  *
5930  * Index cost estimation functions
5931  *
5932  *-------------------------------------------------------------------------
5933  */
5934 
5935 /*
5936  * Extract the actual indexquals (as RestrictInfos) from an IndexClause list
5937  */
5938 List *
5939 get_quals_from_indexclauses(List *indexclauses)
5940 {
5941 	List	   *result = NIL;
5942 	ListCell   *lc;
5943 
5944 	foreach(lc, indexclauses)
5945 	{
5946 		IndexClause *iclause = lfirst_node(IndexClause, lc);
5947 		ListCell   *lc2;
5948 
5949 		foreach(lc2, iclause->indexquals)
5950 		{
5951 			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5952 
5953 			result = lappend(result, rinfo);
5954 		}
5955 	}
5956 	return result;
5957 }
5958 
5959 /*
5960  * Compute the total evaluation cost of the comparison operands in a list
5961  * of index qual expressions.  Since we know these will be evaluated just
5962  * once per scan, there's no need to distinguish startup from per-row cost.
5963  *
5964  * This can be used either on the result of get_quals_from_indexclauses(),
5965  * or directly on an indexorderbys list.  In both cases, we expect that the
5966  * index key expression is on the left side of binary clauses.
5967  */
5968 Cost
5969 index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
5970 {
5971 	Cost		qual_arg_cost = 0;
5972 	ListCell   *lc;
5973 
5974 	foreach(lc, indexquals)
5975 	{
5976 		Expr	   *clause = (Expr *) lfirst(lc);
5977 		Node	   *other_operand;
5978 		QualCost	index_qual_cost;
5979 
5980 		/*
5981 		 * Index quals will have RestrictInfos, indexorderbys won't.  Look
5982 		 * through RestrictInfo if present.
5983 		 */
5984 		if (IsA(clause, RestrictInfo))
5985 			clause = ((RestrictInfo *) clause)->clause;
5986 
5987 		if (IsA(clause, OpExpr))
5988 		{
5989 			OpExpr	   *op = (OpExpr *) clause;
5990 
5991 			other_operand = (Node *) lsecond(op->args);
5992 		}
5993 		else if (IsA(clause, RowCompareExpr))
5994 		{
5995 			RowCompareExpr *rc = (RowCompareExpr *) clause;
5996 
5997 			other_operand = (Node *) rc->rargs;
5998 		}
5999 		else if (IsA(clause, ScalarArrayOpExpr))
6000 		{
6001 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6002 
6003 			other_operand = (Node *) lsecond(saop->args);
6004 		}
6005 		else if (IsA(clause, NullTest))
6006 		{
6007 			other_operand = NULL;
6008 		}
6009 		else
6010 		{
6011 			elog(ERROR, "unsupported indexqual type: %d",
6012 				 (int) nodeTag(clause));
6013 			other_operand = NULL;	/* keep compiler quiet */
6014 		}
6015 
6016 		cost_qual_eval_node(&index_qual_cost, other_operand, root);
6017 		qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
6018 	}
6019 	return qual_arg_cost;
6020 }
6021 
6022 void
6023 genericcostestimate(PlannerInfo *root,
6024 					IndexPath *path,
6025 					double loop_count,
6026 					GenericCosts *costs)
6027 {
6028 	IndexOptInfo *index = path->indexinfo;
6029 	List	   *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6030 	List	   *indexOrderBys = path->indexorderbys;
6031 	Cost		indexStartupCost;
6032 	Cost		indexTotalCost;
6033 	Selectivity indexSelectivity;
6034 	double		indexCorrelation;
6035 	double		numIndexPages;
6036 	double		numIndexTuples;
6037 	double		spc_random_page_cost;
6038 	double		num_sa_scans;
6039 	double		num_outer_scans;
6040 	double		num_scans;
6041 	double		qual_op_cost;
6042 	double		qual_arg_cost;
6043 	List	   *selectivityQuals;
6044 	ListCell   *l;
6045 
6046 	/*
6047 	 * If the index is partial, AND the index predicate with the explicitly
6048 	 * given indexquals to produce a more accurate idea of the index
6049 	 * selectivity.
6050 	 */
6051 	selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6052 
6053 	/*
6054 	 * Check for ScalarArrayOpExpr index quals, and estimate the number of
6055 	 * index scans that will be performed.
6056 	 */
6057 	num_sa_scans = 1;
6058 	foreach(l, indexQuals)
6059 	{
6060 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
6061 
6062 		if (IsA(rinfo->clause, ScalarArrayOpExpr))
6063 		{
6064 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
6065 			int			alength = estimate_array_length(lsecond(saop->args));
6066 
6067 			if (alength > 1)
6068 				num_sa_scans *= alength;
6069 		}
6070 	}
6071 
6072 	/* Estimate the fraction of main-table tuples that will be visited */
6073 	indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6074 											  index->rel->relid,
6075 											  JOIN_INNER,
6076 											  NULL);
6077 
6078 	/*
6079 	 * If caller didn't give us an estimate, estimate the number of index
6080 	 * tuples that will be visited.  We do it in this rather peculiar-looking
6081 	 * way in order to get the right answer for partial indexes.
6082 	 */
6083 	numIndexTuples = costs->numIndexTuples;
6084 	if (numIndexTuples <= 0.0)
6085 	{
6086 		numIndexTuples = indexSelectivity * index->rel->tuples;
6087 
6088 		/*
6089 		 * The above calculation counts all the tuples visited across all
6090 		 * scans induced by ScalarArrayOpExpr nodes.  We want to consider the
6091 		 * average per-indexscan number, so adjust.  This is a handy place to
6092 		 * round to integer, too.  (If caller supplied tuple estimate, it's
6093 		 * responsible for handling these considerations.)
6094 		 */
6095 		numIndexTuples = rint(numIndexTuples / num_sa_scans);
6096 	}
6097 
6098 	/*
6099 	 * We can bound the number of tuples by the index size in any case. Also,
6100 	 * always estimate at least one tuple is touched, even when
6101 	 * indexSelectivity estimate is tiny.
6102 	 */
6103 	if (numIndexTuples > index->tuples)
6104 		numIndexTuples = index->tuples;
6105 	if (numIndexTuples < 1.0)
6106 		numIndexTuples = 1.0;
6107 
6108 	/*
6109 	 * Estimate the number of index pages that will be retrieved.
6110 	 *
6111 	 * We use the simplistic method of taking a pro-rata fraction of the total
6112 	 * number of index pages.  In effect, this counts only leaf pages and not
6113 	 * any overhead such as index metapage or upper tree levels.
6114 	 *
6115 	 * In practice access to upper index levels is often nearly free because
6116 	 * those tend to stay in cache under load; moreover, the cost involved is
6117 	 * highly dependent on index type.  We therefore ignore such costs here
6118 	 * and leave it to the caller to add a suitable charge if needed.
6119 	 */
6120 	if (index->pages > 1 && index->tuples > 1)
6121 		numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
6122 	else
6123 		numIndexPages = 1.0;
6124 
6125 	/* fetch estimated page cost for tablespace containing index */
6126 	get_tablespace_page_costs(index->reltablespace,
6127 							  &spc_random_page_cost,
6128 							  NULL);
6129 
6130 	/*
6131 	 * Now compute the disk access costs.
6132 	 *
6133 	 * The above calculations are all per-index-scan.  However, if we are in a
6134 	 * nestloop inner scan, we can expect the scan to be repeated (with
6135 	 * different search keys) for each row of the outer relation.  Likewise,
6136 	 * ScalarArrayOpExpr quals result in multiple index scans.  This creates
6137 	 * the potential for cache effects to reduce the number of disk page
6138 	 * fetches needed.  We want to estimate the average per-scan I/O cost in
6139 	 * the presence of caching.
6140 	 *
6141 	 * We use the Mackert-Lohman formula (see costsize.c for details) to
6142 	 * estimate the total number of page fetches that occur.  While this
6143 	 * wasn't what it was designed for, it seems a reasonable model anyway.
6144 	 * Note that we are counting pages not tuples anymore, so we take N = T =
6145 	 * index size, as if there were one "tuple" per page.
6146 	 */
6147 	num_outer_scans = loop_count;
6148 	num_scans = num_sa_scans * num_outer_scans;
6149 
6150 	if (num_scans > 1)
6151 	{
6152 		double		pages_fetched;
6153 
6154 		/* total page fetches ignoring cache effects */
6155 		pages_fetched = numIndexPages * num_scans;
6156 
6157 		/* use Mackert and Lohman formula to adjust for cache effects */
6158 		pages_fetched = index_pages_fetched(pages_fetched,
6159 											index->pages,
6160 											(double) index->pages,
6161 											root);
6162 
6163 		/*
6164 		 * Now compute the total disk access cost, and then report a pro-rated
6165 		 * share for each outer scan.  (Don't pro-rate for ScalarArrayOpExpr,
6166 		 * since that's internal to the indexscan.)
6167 		 */
6168 		indexTotalCost = (pages_fetched * spc_random_page_cost)
6169 			/ num_outer_scans;
6170 	}
6171 	else
6172 	{
6173 		/*
6174 		 * For a single index scan, we just charge spc_random_page_cost per
6175 		 * page touched.
6176 		 */
6177 		indexTotalCost = numIndexPages * spc_random_page_cost;
6178 	}
6179 
6180 	/*
6181 	 * CPU cost: any complex expressions in the indexquals will need to be
6182 	 * evaluated once at the start of the scan to reduce them to runtime keys
6183 	 * to pass to the index AM (see nodeIndexscan.c).  We model the per-tuple
6184 	 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
6185 	 * indexqual operator.  Because we have numIndexTuples as a per-scan
6186 	 * number, we have to multiply by num_sa_scans to get the correct result
6187 	 * for ScalarArrayOpExpr cases.  Similarly add in costs for any index
6188 	 * ORDER BY expressions.
6189 	 *
6190 	 * Note: this neglects the possible costs of rechecking lossy operators.
6191 	 * Detecting that that might be needed seems more expensive than it's
6192 	 * worth, though, considering all the other inaccuracies here ...
6193 	 */
6194 	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
6195 		index_other_operands_eval_cost(root, indexOrderBys);
6196 	qual_op_cost = cpu_operator_cost *
6197 		(list_length(indexQuals) + list_length(indexOrderBys));
6198 
6199 	indexStartupCost = qual_arg_cost;
6200 	indexTotalCost += qual_arg_cost;
6201 	indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
6202 
6203 	/*
6204 	 * Generic assumption about index correlation: there isn't any.
6205 	 */
6206 	indexCorrelation = 0.0;
6207 
6208 	/*
6209 	 * Return everything to caller.
6210 	 */
6211 	costs->indexStartupCost = indexStartupCost;
6212 	costs->indexTotalCost = indexTotalCost;
6213 	costs->indexSelectivity = indexSelectivity;
6214 	costs->indexCorrelation = indexCorrelation;
6215 	costs->numIndexPages = numIndexPages;
6216 	costs->numIndexTuples = numIndexTuples;
6217 	costs->spc_random_page_cost = spc_random_page_cost;
6218 	costs->num_sa_scans = num_sa_scans;
6219 }
6220 
6221 /*
6222  * If the index is partial, add its predicate to the given qual list.
6223  *
6224  * ANDing the index predicate with the explicitly given indexquals produces
6225  * a more accurate idea of the index's selectivity.  However, we need to be
6226  * careful not to insert redundant clauses, because clauselist_selectivity()
6227  * is easily fooled into computing a too-low selectivity estimate.  Our
6228  * approach is to add only the predicate clause(s) that cannot be proven to
6229  * be implied by the given indexquals.  This successfully handles cases such
6230  * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
6231  * There are many other cases where we won't detect redundancy, leading to a
6232  * too-low selectivity estimate, which will bias the system in favor of using
6233  * partial indexes where possible.  That is not necessarily bad though.
6234  *
6235  * Note that indexQuals contains RestrictInfo nodes while the indpred
6236  * does not, so the output list will be mixed.  This is OK for both
6237  * predicate_implied_by() and clauselist_selectivity(), but might be
6238  * problematic if the result were passed to other things.
6239  */
6240 List *
6241 add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
6242 {
6243 	List	   *predExtraQuals = NIL;
6244 	ListCell   *lc;
6245 
6246 	if (index->indpred == NIL)
6247 		return indexQuals;
6248 
6249 	foreach(lc, index->indpred)
6250 	{
6251 		Node	   *predQual = (Node *) lfirst(lc);
6252 		List	   *oneQual = list_make1(predQual);
6253 
6254 		if (!predicate_implied_by(oneQual, indexQuals, false))
6255 			predExtraQuals = list_concat(predExtraQuals, oneQual);
6256 	}
6257 	return list_concat(predExtraQuals, indexQuals);
6258 }
6259 
6260 
6261 void
6262 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6263 			   Cost *indexStartupCost, Cost *indexTotalCost,
6264 			   Selectivity *indexSelectivity, double *indexCorrelation,
6265 			   double *indexPages)
6266 {
6267 	IndexOptInfo *index = path->indexinfo;
6268 	GenericCosts costs;
6269 	Oid			relid;
6270 	AttrNumber	colnum;
6271 	VariableStatData vardata;
6272 	double		numIndexTuples;
6273 	Cost		descentCost;
6274 	List	   *indexBoundQuals;
6275 	int			indexcol;
6276 	bool		eqQualHere;
6277 	bool		found_saop;
6278 	bool		found_is_null_op;
6279 	double		num_sa_scans;
6280 	ListCell   *lc;
6281 
6282 	/*
6283 	 * For a btree scan, only leading '=' quals plus inequality quals for the
6284 	 * immediately next attribute contribute to index selectivity (these are
6285 	 * the "boundary quals" that determine the starting and stopping points of
6286 	 * the index scan).  Additional quals can suppress visits to the heap, so
6287 	 * it's OK to count them in indexSelectivity, but they should not count
6288 	 * for estimating numIndexTuples.  So we must examine the given indexquals
6289 	 * to find out which ones count as boundary quals.  We rely on the
6290 	 * knowledge that they are given in index column order.
6291 	 *
6292 	 * For a RowCompareExpr, we consider only the first column, just as
6293 	 * rowcomparesel() does.
6294 	 *
6295 	 * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
6296 	 * index scans not one, but the ScalarArrayOpExpr's operator can be
6297 	 * considered to act the same as it normally does.
6298 	 */
6299 	indexBoundQuals = NIL;
6300 	indexcol = 0;
6301 	eqQualHere = false;
6302 	found_saop = false;
6303 	found_is_null_op = false;
6304 	num_sa_scans = 1;
6305 	foreach(lc, path->indexclauses)
6306 	{
6307 		IndexClause *iclause = lfirst_node(IndexClause, lc);
6308 		ListCell   *lc2;
6309 
6310 		if (indexcol != iclause->indexcol)
6311 		{
6312 			/* Beginning of a new column's quals */
6313 			if (!eqQualHere)
6314 				break;			/* done if no '=' qual for indexcol */
6315 			eqQualHere = false;
6316 			indexcol++;
6317 			if (indexcol != iclause->indexcol)
6318 				break;			/* no quals at all for indexcol */
6319 		}
6320 
6321 		/* Examine each indexqual associated with this index clause */
6322 		foreach(lc2, iclause->indexquals)
6323 		{
6324 			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6325 			Expr	   *clause = rinfo->clause;
6326 			Oid			clause_op = InvalidOid;
6327 			int			op_strategy;
6328 
6329 			if (IsA(clause, OpExpr))
6330 			{
6331 				OpExpr	   *op = (OpExpr *) clause;
6332 
6333 				clause_op = op->opno;
6334 			}
6335 			else if (IsA(clause, RowCompareExpr))
6336 			{
6337 				RowCompareExpr *rc = (RowCompareExpr *) clause;
6338 
6339 				clause_op = linitial_oid(rc->opnos);
6340 			}
6341 			else if (IsA(clause, ScalarArrayOpExpr))
6342 			{
6343 				ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
6344 				Node	   *other_operand = (Node *) lsecond(saop->args);
6345 				int			alength = estimate_array_length(other_operand);
6346 
6347 				clause_op = saop->opno;
6348 				found_saop = true;
6349 				/* count number of SA scans induced by indexBoundQuals only */
6350 				if (alength > 1)
6351 					num_sa_scans *= alength;
6352 			}
6353 			else if (IsA(clause, NullTest))
6354 			{
6355 				NullTest   *nt = (NullTest *) clause;
6356 
6357 				if (nt->nulltesttype == IS_NULL)
6358 				{
6359 					found_is_null_op = true;
6360 					/* IS NULL is like = for selectivity purposes */
6361 					eqQualHere = true;
6362 				}
6363 			}
6364 			else
6365 				elog(ERROR, "unsupported indexqual type: %d",
6366 					 (int) nodeTag(clause));
6367 
6368 			/* check for equality operator */
6369 			if (OidIsValid(clause_op))
6370 			{
6371 				op_strategy = get_op_opfamily_strategy(clause_op,
6372 													   index->opfamily[indexcol]);
6373 				Assert(op_strategy != 0);	/* not a member of opfamily?? */
6374 				if (op_strategy == BTEqualStrategyNumber)
6375 					eqQualHere = true;
6376 			}
6377 
6378 			indexBoundQuals = lappend(indexBoundQuals, rinfo);
6379 		}
6380 	}
6381 
6382 	/*
6383 	 * If index is unique and we found an '=' clause for each column, we can
6384 	 * just assume numIndexTuples = 1 and skip the expensive
6385 	 * clauselist_selectivity calculations.  However, a ScalarArrayOp or
6386 	 * NullTest invalidates that theory, even though it sets eqQualHere.
6387 	 */
6388 	if (index->unique &&
6389 		indexcol == index->nkeycolumns - 1 &&
6390 		eqQualHere &&
6391 		!found_saop &&
6392 		!found_is_null_op)
6393 		numIndexTuples = 1.0;
6394 	else
6395 	{
6396 		List	   *selectivityQuals;
6397 		Selectivity btreeSelectivity;
6398 
6399 		/*
6400 		 * If the index is partial, AND the index predicate with the
6401 		 * index-bound quals to produce a more accurate idea of the number of
6402 		 * rows covered by the bound conditions.
6403 		 */
6404 		selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
6405 
6406 		btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
6407 												  index->rel->relid,
6408 												  JOIN_INNER,
6409 												  NULL);
6410 		numIndexTuples = btreeSelectivity * index->rel->tuples;
6411 
6412 		/*
6413 		 * As in genericcostestimate(), we have to adjust for any
6414 		 * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
6415 		 * to integer.
6416 		 */
6417 		numIndexTuples = rint(numIndexTuples / num_sa_scans);
6418 	}
6419 
6420 	/*
6421 	 * Now do generic index cost estimation.
6422 	 */
6423 	MemSet(&costs, 0, sizeof(costs));
6424 	costs.numIndexTuples = numIndexTuples;
6425 
6426 	genericcostestimate(root, path, loop_count, &costs);
6427 
6428 	/*
6429 	 * Add a CPU-cost component to represent the costs of initial btree
6430 	 * descent.  We don't charge any I/O cost for touching upper btree levels,
6431 	 * since they tend to stay in cache, but we still have to do about log2(N)
6432 	 * comparisons to descend a btree of N leaf tuples.  We charge one
6433 	 * cpu_operator_cost per comparison.
6434 	 *
6435 	 * If there are ScalarArrayOpExprs, charge this once per SA scan.  The
6436 	 * ones after the first one are not startup cost so far as the overall
6437 	 * plan is concerned, so add them only to "total" cost.
6438 	 */
6439 	if (index->tuples > 1)		/* avoid computing log(0) */
6440 	{
6441 		descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
6442 		costs.indexStartupCost += descentCost;
6443 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
6444 	}
6445 
6446 	/*
6447 	 * Even though we're not charging I/O cost for touching upper btree pages,
6448 	 * it's still reasonable to charge some CPU cost per page descended
6449 	 * through.  Moreover, if we had no such charge at all, bloated indexes
6450 	 * would appear to have the same search cost as unbloated ones, at least
6451 	 * in cases where only a single leaf page is expected to be visited.  This
6452 	 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
6453 	 * touched.  The number of such pages is btree tree height plus one (ie,
6454 	 * we charge for the leaf page too).  As above, charge once per SA scan.
6455 	 */
6456 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6457 	costs.indexStartupCost += descentCost;
6458 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6459 
6460 	/*
6461 	 * If we can get an estimate of the first column's ordering correlation C
6462 	 * from pg_statistic, estimate the index correlation as C for a
6463 	 * single-column index, or C * 0.75 for multiple columns. (The idea here
6464 	 * is that multiple columns dilute the importance of the first column's
6465 	 * ordering, but don't negate it entirely.  Before 8.0 we divided the
6466 	 * correlation by the number of columns, but that seems too strong.)
6467 	 */
6468 	MemSet(&vardata, 0, sizeof(vardata));
6469 
6470 	if (index->indexkeys[0] != 0)
6471 	{
6472 		/* Simple variable --- look to stats for the underlying table */
6473 		RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
6474 
6475 		Assert(rte->rtekind == RTE_RELATION);
6476 		relid = rte->relid;
6477 		Assert(relid != InvalidOid);
6478 		colnum = index->indexkeys[0];
6479 
6480 		if (get_relation_stats_hook &&
6481 			(*get_relation_stats_hook) (root, rte, colnum, &vardata))
6482 		{
6483 			/*
6484 			 * The hook took control of acquiring a stats tuple.  If it did
6485 			 * supply a tuple, it'd better have supplied a freefunc.
6486 			 */
6487 			if (HeapTupleIsValid(vardata.statsTuple) &&
6488 				!vardata.freefunc)
6489 				elog(ERROR, "no function provided to release variable stats with");
6490 		}
6491 		else
6492 		{
6493 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6494 												 ObjectIdGetDatum(relid),
6495 												 Int16GetDatum(colnum),
6496 												 BoolGetDatum(rte->inh));
6497 			vardata.freefunc = ReleaseSysCache;
6498 		}
6499 	}
6500 	else
6501 	{
6502 		/* Expression --- maybe there are stats for the index itself */
6503 		relid = index->indexoid;
6504 		colnum = 1;
6505 
6506 		if (get_index_stats_hook &&
6507 			(*get_index_stats_hook) (root, relid, colnum, &vardata))
6508 		{
6509 			/*
6510 			 * The hook took control of acquiring a stats tuple.  If it did
6511 			 * supply a tuple, it'd better have supplied a freefunc.
6512 			 */
6513 			if (HeapTupleIsValid(vardata.statsTuple) &&
6514 				!vardata.freefunc)
6515 				elog(ERROR, "no function provided to release variable stats with");
6516 		}
6517 		else
6518 		{
6519 			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6520 												 ObjectIdGetDatum(relid),
6521 												 Int16GetDatum(colnum),
6522 												 BoolGetDatum(false));
6523 			vardata.freefunc = ReleaseSysCache;
6524 		}
6525 	}
6526 
6527 	if (HeapTupleIsValid(vardata.statsTuple))
6528 	{
6529 		Oid			sortop;
6530 		AttStatsSlot sslot;
6531 
6532 		sortop = get_opfamily_member(index->opfamily[0],
6533 									 index->opcintype[0],
6534 									 index->opcintype[0],
6535 									 BTLessStrategyNumber);
6536 		if (OidIsValid(sortop) &&
6537 			get_attstatsslot(&sslot, vardata.statsTuple,
6538 							 STATISTIC_KIND_CORRELATION, sortop,
6539 							 ATTSTATSSLOT_NUMBERS))
6540 		{
6541 			double		varCorrelation;
6542 
6543 			Assert(sslot.nnumbers == 1);
6544 			varCorrelation = sslot.numbers[0];
6545 
6546 			if (index->reverse_sort[0])
6547 				varCorrelation = -varCorrelation;
6548 
6549 			if (index->nkeycolumns > 1)
6550 				costs.indexCorrelation = varCorrelation * 0.75;
6551 			else
6552 				costs.indexCorrelation = varCorrelation;
6553 
6554 			free_attstatsslot(&sslot);
6555 		}
6556 	}
6557 
6558 	ReleaseVariableStats(vardata);
6559 
6560 	*indexStartupCost = costs.indexStartupCost;
6561 	*indexTotalCost = costs.indexTotalCost;
6562 	*indexSelectivity = costs.indexSelectivity;
6563 	*indexCorrelation = costs.indexCorrelation;
6564 	*indexPages = costs.numIndexPages;
6565 }
6566 
6567 void
6568 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6569 				 Cost *indexStartupCost, Cost *indexTotalCost,
6570 				 Selectivity *indexSelectivity, double *indexCorrelation,
6571 				 double *indexPages)
6572 {
6573 	GenericCosts costs;
6574 
6575 	MemSet(&costs, 0, sizeof(costs));
6576 
6577 	genericcostestimate(root, path, loop_count, &costs);
6578 
6579 	/*
6580 	 * A hash index has no descent costs as such, since the index AM can go
6581 	 * directly to the target bucket after computing the hash value.  There
6582 	 * are a couple of other hash-specific costs that we could conceivably add
6583 	 * here, though:
6584 	 *
6585 	 * Ideally we'd charge spc_random_page_cost for each page in the target
6586 	 * bucket, not just the numIndexPages pages that genericcostestimate
6587 	 * thought we'd visit.  However in most cases we don't know which bucket
6588 	 * that will be.  There's no point in considering the average bucket size
6589 	 * because the hash AM makes sure that's always one page.
6590 	 *
6591 	 * Likewise, we could consider charging some CPU for each index tuple in
6592 	 * the bucket, if we knew how many there were.  But the per-tuple cost is
6593 	 * just a hash value comparison, not a general datatype-dependent
6594 	 * comparison, so any such charge ought to be quite a bit less than
6595 	 * cpu_operator_cost; which makes it probably not worth worrying about.
6596 	 *
6597 	 * A bigger issue is that chance hash-value collisions will result in
6598 	 * wasted probes into the heap.  We don't currently attempt to model this
6599 	 * cost on the grounds that it's rare, but maybe it's not rare enough.
6600 	 * (Any fix for this ought to consider the generic lossy-operator problem,
6601 	 * though; it's not entirely hash-specific.)
6602 	 */
6603 
6604 	*indexStartupCost = costs.indexStartupCost;
6605 	*indexTotalCost = costs.indexTotalCost;
6606 	*indexSelectivity = costs.indexSelectivity;
6607 	*indexCorrelation = costs.indexCorrelation;
6608 	*indexPages = costs.numIndexPages;
6609 }
6610 
6611 void
6612 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6613 				 Cost *indexStartupCost, Cost *indexTotalCost,
6614 				 Selectivity *indexSelectivity, double *indexCorrelation,
6615 				 double *indexPages)
6616 {
6617 	IndexOptInfo *index = path->indexinfo;
6618 	GenericCosts costs;
6619 	Cost		descentCost;
6620 
6621 	MemSet(&costs, 0, sizeof(costs));
6622 
6623 	genericcostestimate(root, path, loop_count, &costs);
6624 
6625 	/*
6626 	 * We model index descent costs similarly to those for btree, but to do
6627 	 * that we first need an idea of the tree height.  We somewhat arbitrarily
6628 	 * assume that the fanout is 100, meaning the tree height is at most
6629 	 * log100(index->pages).
6630 	 *
6631 	 * Although this computation isn't really expensive enough to require
6632 	 * caching, we might as well use index->tree_height to cache it.
6633 	 */
6634 	if (index->tree_height < 0) /* unknown? */
6635 	{
6636 		if (index->pages > 1)	/* avoid computing log(0) */
6637 			index->tree_height = (int) (log(index->pages) / log(100.0));
6638 		else
6639 			index->tree_height = 0;
6640 	}
6641 
6642 	/*
6643 	 * Add a CPU-cost component to represent the costs of initial descent. We
6644 	 * just use log(N) here not log2(N) since the branching factor isn't
6645 	 * necessarily two anyway.  As for btree, charge once per SA scan.
6646 	 */
6647 	if (index->tuples > 1)		/* avoid computing log(0) */
6648 	{
6649 		descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6650 		costs.indexStartupCost += descentCost;
6651 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
6652 	}
6653 
6654 	/*
6655 	 * Likewise add a per-page charge, calculated the same as for btrees.
6656 	 */
6657 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6658 	costs.indexStartupCost += descentCost;
6659 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6660 
6661 	*indexStartupCost = costs.indexStartupCost;
6662 	*indexTotalCost = costs.indexTotalCost;
6663 	*indexSelectivity = costs.indexSelectivity;
6664 	*indexCorrelation = costs.indexCorrelation;
6665 	*indexPages = costs.numIndexPages;
6666 }
6667 
6668 void
6669 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6670 				Cost *indexStartupCost, Cost *indexTotalCost,
6671 				Selectivity *indexSelectivity, double *indexCorrelation,
6672 				double *indexPages)
6673 {
6674 	IndexOptInfo *index = path->indexinfo;
6675 	GenericCosts costs;
6676 	Cost		descentCost;
6677 
6678 	MemSet(&costs, 0, sizeof(costs));
6679 
6680 	genericcostestimate(root, path, loop_count, &costs);
6681 
6682 	/*
6683 	 * We model index descent costs similarly to those for btree, but to do
6684 	 * that we first need an idea of the tree height.  We somewhat arbitrarily
6685 	 * assume that the fanout is 100, meaning the tree height is at most
6686 	 * log100(index->pages).
6687 	 *
6688 	 * Although this computation isn't really expensive enough to require
6689 	 * caching, we might as well use index->tree_height to cache it.
6690 	 */
6691 	if (index->tree_height < 0) /* unknown? */
6692 	{
6693 		if (index->pages > 1)	/* avoid computing log(0) */
6694 			index->tree_height = (int) (log(index->pages) / log(100.0));
6695 		else
6696 			index->tree_height = 0;
6697 	}
6698 
6699 	/*
6700 	 * Add a CPU-cost component to represent the costs of initial descent. We
6701 	 * just use log(N) here not log2(N) since the branching factor isn't
6702 	 * necessarily two anyway.  As for btree, charge once per SA scan.
6703 	 */
6704 	if (index->tuples > 1)		/* avoid computing log(0) */
6705 	{
6706 		descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6707 		costs.indexStartupCost += descentCost;
6708 		costs.indexTotalCost += costs.num_sa_scans * descentCost;
6709 	}
6710 
6711 	/*
6712 	 * Likewise add a per-page charge, calculated the same as for btrees.
6713 	 */
6714 	descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6715 	costs.indexStartupCost += descentCost;
6716 	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6717 
6718 	*indexStartupCost = costs.indexStartupCost;
6719 	*indexTotalCost = costs.indexTotalCost;
6720 	*indexSelectivity = costs.indexSelectivity;
6721 	*indexCorrelation = costs.indexCorrelation;
6722 	*indexPages = costs.numIndexPages;
6723 }
6724 
6725 
6726 /*
6727  * Support routines for gincostestimate
6728  */
6729 
6730 typedef struct
6731 {
6732 	bool		attHasFullScan[INDEX_MAX_KEYS];
6733 	bool		attHasNormalScan[INDEX_MAX_KEYS];
6734 	double		partialEntries;
6735 	double		exactEntries;
6736 	double		searchEntries;
6737 	double		arrayScans;
6738 } GinQualCounts;
6739 
6740 /*
6741  * Estimate the number of index terms that need to be searched for while
6742  * testing the given GIN query, and increment the counts in *counts
6743  * appropriately.  If the query is unsatisfiable, return false.
6744  */
6745 static bool
6746 gincost_pattern(IndexOptInfo *index, int indexcol,
6747 				Oid clause_op, Datum query,
6748 				GinQualCounts *counts)
6749 {
6750 	FmgrInfo	flinfo;
6751 	Oid			extractProcOid;
6752 	Oid			collation;
6753 	int			strategy_op;
6754 	Oid			lefttype,
6755 				righttype;
6756 	int32		nentries = 0;
6757 	bool	   *partial_matches = NULL;
6758 	Pointer    *extra_data = NULL;
6759 	bool	   *nullFlags = NULL;
6760 	int32		searchMode = GIN_SEARCH_MODE_DEFAULT;
6761 	int32		i;
6762 
6763 	Assert(indexcol < index->nkeycolumns);
6764 
6765 	/*
6766 	 * Get the operator's strategy number and declared input data types within
6767 	 * the index opfamily.  (We don't need the latter, but we use
6768 	 * get_op_opfamily_properties because it will throw error if it fails to
6769 	 * find a matching pg_amop entry.)
6770 	 */
6771 	get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
6772 							   &strategy_op, &lefttype, &righttype);
6773 
6774 	/*
6775 	 * GIN always uses the "default" support functions, which are those with
6776 	 * lefttype == righttype == the opclass' opcintype (see
6777 	 * IndexSupportInitialize in relcache.c).
6778 	 */
6779 	extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
6780 									   index->opcintype[indexcol],
6781 									   index->opcintype[indexcol],
6782 									   GIN_EXTRACTQUERY_PROC);
6783 
6784 	if (!OidIsValid(extractProcOid))
6785 	{
6786 		/* should not happen; throw same error as index_getprocinfo */
6787 		elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
6788 			 GIN_EXTRACTQUERY_PROC, indexcol + 1,
6789 			 get_rel_name(index->indexoid));
6790 	}
6791 
6792 	/*
6793 	 * Choose collation to pass to extractProc (should match initGinState).
6794 	 */
6795 	if (OidIsValid(index->indexcollations[indexcol]))
6796 		collation = index->indexcollations[indexcol];
6797 	else
6798 		collation = DEFAULT_COLLATION_OID;
6799 
6800 	fmgr_info(extractProcOid, &flinfo);
6801 
6802 	set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]);
6803 
6804 	FunctionCall7Coll(&flinfo,
6805 					  collation,
6806 					  query,
6807 					  PointerGetDatum(&nentries),
6808 					  UInt16GetDatum(strategy_op),
6809 					  PointerGetDatum(&partial_matches),
6810 					  PointerGetDatum(&extra_data),
6811 					  PointerGetDatum(&nullFlags),
6812 					  PointerGetDatum(&searchMode));
6813 
6814 	if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
6815 	{
6816 		/* No match is possible */
6817 		return false;
6818 	}
6819 
6820 	for (i = 0; i < nentries; i++)
6821 	{
6822 		/*
6823 		 * For partial match we haven't any information to estimate number of
6824 		 * matched entries in index, so, we just estimate it as 100
6825 		 */
6826 		if (partial_matches && partial_matches[i])
6827 			counts->partialEntries += 100;
6828 		else
6829 			counts->exactEntries++;
6830 
6831 		counts->searchEntries++;
6832 	}
6833 
6834 	if (searchMode == GIN_SEARCH_MODE_DEFAULT)
6835 	{
6836 		counts->attHasNormalScan[indexcol] = true;
6837 	}
6838 	else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
6839 	{
6840 		/* Treat "include empty" like an exact-match item */
6841 		counts->attHasNormalScan[indexcol] = true;
6842 		counts->exactEntries++;
6843 		counts->searchEntries++;
6844 	}
6845 	else
6846 	{
6847 		/* It's GIN_SEARCH_MODE_ALL */
6848 		counts->attHasFullScan[indexcol] = true;
6849 	}
6850 
6851 	return true;
6852 }
6853 
6854 /*
6855  * Estimate the number of index terms that need to be searched for while
6856  * testing the given GIN index clause, and increment the counts in *counts
6857  * appropriately.  If the query is unsatisfiable, return false.
6858  */
6859 static bool
6860 gincost_opexpr(PlannerInfo *root,
6861 			   IndexOptInfo *index,
6862 			   int indexcol,
6863 			   OpExpr *clause,
6864 			   GinQualCounts *counts)
6865 {
6866 	Oid			clause_op = clause->opno;
6867 	Node	   *operand = (Node *) lsecond(clause->args);
6868 
6869 	/* aggressively reduce to a constant, and look through relabeling */
6870 	operand = estimate_expression_value(root, operand);
6871 
6872 	if (IsA(operand, RelabelType))
6873 		operand = (Node *) ((RelabelType *) operand)->arg;
6874 
6875 	/*
6876 	 * It's impossible to call extractQuery method for unknown operand. So
6877 	 * unless operand is a Const we can't do much; just assume there will be
6878 	 * one ordinary search entry from the operand at runtime.
6879 	 */
6880 	if (!IsA(operand, Const))
6881 	{
6882 		counts->exactEntries++;
6883 		counts->searchEntries++;
6884 		return true;
6885 	}
6886 
6887 	/* If Const is null, there can be no matches */
6888 	if (((Const *) operand)->constisnull)
6889 		return false;
6890 
6891 	/* Otherwise, apply extractQuery and get the actual term counts */
6892 	return gincost_pattern(index, indexcol, clause_op,
6893 						   ((Const *) operand)->constvalue,
6894 						   counts);
6895 }
6896 
6897 /*
6898  * Estimate the number of index terms that need to be searched for while
6899  * testing the given GIN index clause, and increment the counts in *counts
6900  * appropriately.  If the query is unsatisfiable, return false.
6901  *
6902  * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
6903  * each of which involves one value from the RHS array, plus all the
6904  * non-array quals (if any).  To model this, we average the counts across
6905  * the RHS elements, and add the averages to the counts in *counts (which
6906  * correspond to per-indexscan costs).  We also multiply counts->arrayScans
6907  * by N, causing gincostestimate to scale up its estimates accordingly.
6908  */
6909 static bool
6910 gincost_scalararrayopexpr(PlannerInfo *root,
6911 						  IndexOptInfo *index,
6912 						  int indexcol,
6913 						  ScalarArrayOpExpr *clause,
6914 						  double numIndexEntries,
6915 						  GinQualCounts *counts)
6916 {
6917 	Oid			clause_op = clause->opno;
6918 	Node	   *rightop = (Node *) lsecond(clause->args);
6919 	ArrayType  *arrayval;
6920 	int16		elmlen;
6921 	bool		elmbyval;
6922 	char		elmalign;
6923 	int			numElems;
6924 	Datum	   *elemValues;
6925 	bool	   *elemNulls;
6926 	GinQualCounts arraycounts;
6927 	int			numPossible = 0;
6928 	int			i;
6929 
6930 	Assert(clause->useOr);
6931 
6932 	/* aggressively reduce to a constant, and look through relabeling */
6933 	rightop = estimate_expression_value(root, rightop);
6934 
6935 	if (IsA(rightop, RelabelType))
6936 		rightop = (Node *) ((RelabelType *) rightop)->arg;
6937 
6938 	/*
6939 	 * It's impossible to call extractQuery method for unknown operand. So
6940 	 * unless operand is a Const we can't do much; just assume there will be
6941 	 * one ordinary search entry from each array entry at runtime, and fall
6942 	 * back on a probably-bad estimate of the number of array entries.
6943 	 */
6944 	if (!IsA(rightop, Const))
6945 	{
6946 		counts->exactEntries++;
6947 		counts->searchEntries++;
6948 		counts->arrayScans *= estimate_array_length(rightop);
6949 		return true;
6950 	}
6951 
6952 	/* If Const is null, there can be no matches */
6953 	if (((Const *) rightop)->constisnull)
6954 		return false;
6955 
6956 	/* Otherwise, extract the array elements and iterate over them */
6957 	arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
6958 	get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
6959 						 &elmlen, &elmbyval, &elmalign);
6960 	deconstruct_array(arrayval,
6961 					  ARR_ELEMTYPE(arrayval),
6962 					  elmlen, elmbyval, elmalign,
6963 					  &elemValues, &elemNulls, &numElems);
6964 
6965 	memset(&arraycounts, 0, sizeof(arraycounts));
6966 
6967 	for (i = 0; i < numElems; i++)
6968 	{
6969 		GinQualCounts elemcounts;
6970 
6971 		/* NULL can't match anything, so ignore, as the executor will */
6972 		if (elemNulls[i])
6973 			continue;
6974 
6975 		/* Otherwise, apply extractQuery and get the actual term counts */
6976 		memset(&elemcounts, 0, sizeof(elemcounts));
6977 
6978 		if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
6979 							&elemcounts))
6980 		{
6981 			/* We ignore array elements that are unsatisfiable patterns */
6982 			numPossible++;
6983 
6984 			if (elemcounts.attHasFullScan[indexcol] &&
6985 				!elemcounts.attHasNormalScan[indexcol])
6986 			{
6987 				/*
6988 				 * Full index scan will be required.  We treat this as if
6989 				 * every key in the index had been listed in the query; is
6990 				 * that reasonable?
6991 				 */
6992 				elemcounts.partialEntries = 0;
6993 				elemcounts.exactEntries = numIndexEntries;
6994 				elemcounts.searchEntries = numIndexEntries;
6995 			}
6996 			arraycounts.partialEntries += elemcounts.partialEntries;
6997 			arraycounts.exactEntries += elemcounts.exactEntries;
6998 			arraycounts.searchEntries += elemcounts.searchEntries;
6999 		}
7000 	}
7001 
7002 	if (numPossible == 0)
7003 	{
7004 		/* No satisfiable patterns in the array */
7005 		return false;
7006 	}
7007 
7008 	/*
7009 	 * Now add the averages to the global counts.  This will give us an
7010 	 * estimate of the average number of terms searched for in each indexscan,
7011 	 * including contributions from both array and non-array quals.
7012 	 */
7013 	counts->partialEntries += arraycounts.partialEntries / numPossible;
7014 	counts->exactEntries += arraycounts.exactEntries / numPossible;
7015 	counts->searchEntries += arraycounts.searchEntries / numPossible;
7016 
7017 	counts->arrayScans *= numPossible;
7018 
7019 	return true;
7020 }
7021 
7022 /*
7023  * GIN has search behavior completely different from other index types
7024  */
7025 void
7026 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7027 				Cost *indexStartupCost, Cost *indexTotalCost,
7028 				Selectivity *indexSelectivity, double *indexCorrelation,
7029 				double *indexPages)
7030 {
7031 	IndexOptInfo *index = path->indexinfo;
7032 	List	   *indexQuals = get_quals_from_indexclauses(path->indexclauses);
7033 	List	   *selectivityQuals;
7034 	double		numPages = index->pages,
7035 				numTuples = index->tuples;
7036 	double		numEntryPages,
7037 				numDataPages,
7038 				numPendingPages,
7039 				numEntries;
7040 	GinQualCounts counts;
7041 	bool		matchPossible;
7042 	bool		fullIndexScan;
7043 	double		partialScale;
7044 	double		entryPagesFetched,
7045 				dataPagesFetched,
7046 				dataPagesFetchedBySel;
7047 	double		qual_op_cost,
7048 				qual_arg_cost,
7049 				spc_random_page_cost,
7050 				outer_scans;
7051 	Relation	indexRel;
7052 	GinStatsData ginStats;
7053 	ListCell   *lc;
7054 	int			i;
7055 
7056 	/*
7057 	 * Obtain statistical information from the meta page, if possible.  Else
7058 	 * set ginStats to zeroes, and we'll cope below.
7059 	 */
7060 	if (!index->hypothetical)
7061 	{
7062 		/* Lock should have already been obtained in plancat.c */
7063 		indexRel = index_open(index->indexoid, NoLock);
7064 		ginGetStats(indexRel, &ginStats);
7065 		index_close(indexRel, NoLock);
7066 	}
7067 	else
7068 	{
7069 		memset(&ginStats, 0, sizeof(ginStats));
7070 	}
7071 
7072 	/*
7073 	 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
7074 	 * trusted, but the other fields are data as of the last VACUUM.  We can
7075 	 * scale them up to account for growth since then, but that method only
7076 	 * goes so far; in the worst case, the stats might be for a completely
7077 	 * empty index, and scaling them will produce pretty bogus numbers.
7078 	 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
7079 	 * it's grown more than that, fall back to estimating things only from the
7080 	 * assumed-accurate index size.  But we'll trust nPendingPages in any case
7081 	 * so long as it's not clearly insane, ie, more than the index size.
7082 	 */
7083 	if (ginStats.nPendingPages < numPages)
7084 		numPendingPages = ginStats.nPendingPages;
7085 	else
7086 		numPendingPages = 0;
7087 
7088 	if (numPages > 0 && ginStats.nTotalPages <= numPages &&
7089 		ginStats.nTotalPages > numPages / 4 &&
7090 		ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
7091 	{
7092 		/*
7093 		 * OK, the stats seem close enough to sane to be trusted.  But we
7094 		 * still need to scale them by the ratio numPages / nTotalPages to
7095 		 * account for growth since the last VACUUM.
7096 		 */
7097 		double		scale = numPages / ginStats.nTotalPages;
7098 
7099 		numEntryPages = ceil(ginStats.nEntryPages * scale);
7100 		numDataPages = ceil(ginStats.nDataPages * scale);
7101 		numEntries = ceil(ginStats.nEntries * scale);
7102 		/* ensure we didn't round up too much */
7103 		numEntryPages = Min(numEntryPages, numPages - numPendingPages);
7104 		numDataPages = Min(numDataPages,
7105 						   numPages - numPendingPages - numEntryPages);
7106 	}
7107 	else
7108 	{
7109 		/*
7110 		 * We might get here because it's a hypothetical index, or an index
7111 		 * created pre-9.1 and never vacuumed since upgrading (in which case
7112 		 * its stats would read as zeroes), or just because it's grown too
7113 		 * much since the last VACUUM for us to put our faith in scaling.
7114 		 *
7115 		 * Invent some plausible internal statistics based on the index page
7116 		 * count (and clamp that to at least 10 pages, just in case).  We
7117 		 * estimate that 90% of the index is entry pages, and the rest is data
7118 		 * pages.  Estimate 100 entries per entry page; this is rather bogus
7119 		 * since it'll depend on the size of the keys, but it's more robust
7120 		 * than trying to predict the number of entries per heap tuple.
7121 		 */
7122 		numPages = Max(numPages, 10);
7123 		numEntryPages = floor((numPages - numPendingPages) * 0.90);
7124 		numDataPages = numPages - numPendingPages - numEntryPages;
7125 		numEntries = floor(numEntryPages * 100);
7126 	}
7127 
7128 	/* In an empty index, numEntries could be zero.  Avoid divide-by-zero */
7129 	if (numEntries < 1)
7130 		numEntries = 1;
7131 
7132 	/*
7133 	 * If the index is partial, AND the index predicate with the index-bound
7134 	 * quals to produce a more accurate idea of the number of rows covered by
7135 	 * the bound conditions.
7136 	 */
7137 	selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
7138 
7139 	/* Estimate the fraction of main-table tuples that will be visited */
7140 	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
7141 											   index->rel->relid,
7142 											   JOIN_INNER,
7143 											   NULL);
7144 
7145 	/* fetch estimated page cost for tablespace containing index */
7146 	get_tablespace_page_costs(index->reltablespace,
7147 							  &spc_random_page_cost,
7148 							  NULL);
7149 
7150 	/*
7151 	 * Generic assumption about index correlation: there isn't any.
7152 	 */
7153 	*indexCorrelation = 0.0;
7154 
7155 	/*
7156 	 * Examine quals to estimate number of search entries & partial matches
7157 	 */
7158 	memset(&counts, 0, sizeof(counts));
7159 	counts.arrayScans = 1;
7160 	matchPossible = true;
7161 
7162 	foreach(lc, path->indexclauses)
7163 	{
7164 		IndexClause *iclause = lfirst_node(IndexClause, lc);
7165 		ListCell   *lc2;
7166 
7167 		foreach(lc2, iclause->indexquals)
7168 		{
7169 			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
7170 			Expr	   *clause = rinfo->clause;
7171 
7172 			if (IsA(clause, OpExpr))
7173 			{
7174 				matchPossible = gincost_opexpr(root,
7175 											   index,
7176 											   iclause->indexcol,
7177 											   (OpExpr *) clause,
7178 											   &counts);
7179 				if (!matchPossible)
7180 					break;
7181 			}
7182 			else if (IsA(clause, ScalarArrayOpExpr))
7183 			{
7184 				matchPossible = gincost_scalararrayopexpr(root,
7185 														  index,
7186 														  iclause->indexcol,
7187 														  (ScalarArrayOpExpr *) clause,
7188 														  numEntries,
7189 														  &counts);
7190 				if (!matchPossible)
7191 					break;
7192 			}
7193 			else
7194 			{
7195 				/* shouldn't be anything else for a GIN index */
7196 				elog(ERROR, "unsupported GIN indexqual type: %d",
7197 					 (int) nodeTag(clause));
7198 			}
7199 		}
7200 	}
7201 
7202 	/* Fall out if there were any provably-unsatisfiable quals */
7203 	if (!matchPossible)
7204 	{
7205 		*indexStartupCost = 0;
7206 		*indexTotalCost = 0;
7207 		*indexSelectivity = 0;
7208 		return;
7209 	}
7210 
7211 	/*
7212 	 * If attribute has a full scan and at the same time doesn't have normal
7213 	 * scan, then we'll have to scan all non-null entries of that attribute.
7214 	 * Currently, we don't have per-attribute statistics for GIN.  Thus, we
7215 	 * must assume the whole GIN index has to be scanned in this case.
7216 	 */
7217 	fullIndexScan = false;
7218 	for (i = 0; i < index->nkeycolumns; i++)
7219 	{
7220 		if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
7221 		{
7222 			fullIndexScan = true;
7223 			break;
7224 		}
7225 	}
7226 
7227 	if (fullIndexScan || indexQuals == NIL)
7228 	{
7229 		/*
7230 		 * Full index scan will be required.  We treat this as if every key in
7231 		 * the index had been listed in the query; is that reasonable?
7232 		 */
7233 		counts.partialEntries = 0;
7234 		counts.exactEntries = numEntries;
7235 		counts.searchEntries = numEntries;
7236 	}
7237 
7238 	/* Will we have more than one iteration of a nestloop scan? */
7239 	outer_scans = loop_count;
7240 
7241 	/*
7242 	 * Compute cost to begin scan, first of all, pay attention to pending
7243 	 * list.
7244 	 */
7245 	entryPagesFetched = numPendingPages;
7246 
7247 	/*
7248 	 * Estimate number of entry pages read.  We need to do
7249 	 * counts.searchEntries searches.  Use a power function as it should be,
7250 	 * but tuples on leaf pages usually is much greater. Here we include all
7251 	 * searches in entry tree, including search of first entry in partial
7252 	 * match algorithm
7253 	 */
7254 	entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
7255 
7256 	/*
7257 	 * Add an estimate of entry pages read by partial match algorithm. It's a
7258 	 * scan over leaf pages in entry tree.  We haven't any useful stats here,
7259 	 * so estimate it as proportion.  Because counts.partialEntries is really
7260 	 * pretty bogus (see code above), it's possible that it is more than
7261 	 * numEntries; clamp the proportion to ensure sanity.
7262 	 */
7263 	partialScale = counts.partialEntries / numEntries;
7264 	partialScale = Min(partialScale, 1.0);
7265 
7266 	entryPagesFetched += ceil(numEntryPages * partialScale);
7267 
7268 	/*
7269 	 * Partial match algorithm reads all data pages before doing actual scan,
7270 	 * so it's a startup cost.  Again, we haven't any useful stats here, so
7271 	 * estimate it as proportion.
7272 	 */
7273 	dataPagesFetched = ceil(numDataPages * partialScale);
7274 
7275 	/*
7276 	 * Calculate cache effects if more than one scan due to nestloops or array
7277 	 * quals.  The result is pro-rated per nestloop scan, but the array qual
7278 	 * factor shouldn't be pro-rated (compare genericcostestimate).
7279 	 */
7280 	if (outer_scans > 1 || counts.arrayScans > 1)
7281 	{
7282 		entryPagesFetched *= outer_scans * counts.arrayScans;
7283 		entryPagesFetched = index_pages_fetched(entryPagesFetched,
7284 												(BlockNumber) numEntryPages,
7285 												numEntryPages, root);
7286 		entryPagesFetched /= outer_scans;
7287 		dataPagesFetched *= outer_scans * counts.arrayScans;
7288 		dataPagesFetched = index_pages_fetched(dataPagesFetched,
7289 											   (BlockNumber) numDataPages,
7290 											   numDataPages, root);
7291 		dataPagesFetched /= outer_scans;
7292 	}
7293 
7294 	/*
7295 	 * Here we use random page cost because logically-close pages could be far
7296 	 * apart on disk.
7297 	 */
7298 	*indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
7299 
7300 	/*
7301 	 * Now compute the number of data pages fetched during the scan.
7302 	 *
7303 	 * We assume every entry to have the same number of items, and that there
7304 	 * is no overlap between them. (XXX: tsvector and array opclasses collect
7305 	 * statistics on the frequency of individual keys; it would be nice to use
7306 	 * those here.)
7307 	 */
7308 	dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
7309 
7310 	/*
7311 	 * If there is a lot of overlap among the entries, in particular if one of
7312 	 * the entries is very frequent, the above calculation can grossly
7313 	 * under-estimate.  As a simple cross-check, calculate a lower bound based
7314 	 * on the overall selectivity of the quals.  At a minimum, we must read
7315 	 * one item pointer for each matching entry.
7316 	 *
7317 	 * The width of each item pointer varies, based on the level of
7318 	 * compression.  We don't have statistics on that, but an average of
7319 	 * around 3 bytes per item is fairly typical.
7320 	 */
7321 	dataPagesFetchedBySel = ceil(*indexSelectivity *
7322 								 (numTuples / (BLCKSZ / 3)));
7323 	if (dataPagesFetchedBySel > dataPagesFetched)
7324 		dataPagesFetched = dataPagesFetchedBySel;
7325 
7326 	/* Account for cache effects, the same as above */
7327 	if (outer_scans > 1 || counts.arrayScans > 1)
7328 	{
7329 		dataPagesFetched *= outer_scans * counts.arrayScans;
7330 		dataPagesFetched = index_pages_fetched(dataPagesFetched,
7331 											   (BlockNumber) numDataPages,
7332 											   numDataPages, root);
7333 		dataPagesFetched /= outer_scans;
7334 	}
7335 
7336 	/* And apply random_page_cost as the cost per page */
7337 	*indexTotalCost = *indexStartupCost +
7338 		dataPagesFetched * spc_random_page_cost;
7339 
7340 	/*
7341 	 * Add on index qual eval costs, much as in genericcostestimate.  But we
7342 	 * can disregard indexorderbys, since GIN doesn't support those.
7343 	 */
7344 	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
7345 	qual_op_cost = cpu_operator_cost * list_length(indexQuals);
7346 
7347 	*indexStartupCost += qual_arg_cost;
7348 	*indexTotalCost += qual_arg_cost;
7349 	*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
7350 	*indexPages = dataPagesFetched;
7351 }
7352 
7353 /*
7354  * BRIN has search behavior completely different from other index types
7355  */
7356 void
7357 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
7358 				 Cost *indexStartupCost, Cost *indexTotalCost,
7359 				 Selectivity *indexSelectivity, double *indexCorrelation,
7360 				 double *indexPages)
7361 {
7362 	IndexOptInfo *index = path->indexinfo;
7363 	List	   *indexQuals = get_quals_from_indexclauses(path->indexclauses);
7364 	double		numPages = index->pages;
7365 	RelOptInfo *baserel = index->rel;
7366 	RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
7367 	Cost		spc_seq_page_cost;
7368 	Cost		spc_random_page_cost;
7369 	double		qual_arg_cost;
7370 	double		qualSelectivity;
7371 	BrinStatsData statsData;
7372 	double		indexRanges;
7373 	double		minimalRanges;
7374 	double		estimatedRanges;
7375 	double		selec;
7376 	Relation	indexRel;
7377 	ListCell   *l;
7378 	VariableStatData vardata;
7379 
7380 	Assert(rte->rtekind == RTE_RELATION);
7381 
7382 	/* fetch estimated page cost for the tablespace containing the index */
7383 	get_tablespace_page_costs(index->reltablespace,
7384 							  &spc_random_page_cost,
7385 							  &spc_seq_page_cost);
7386 
7387 	/*
7388 	 * Obtain some data from the index itself, if possible.  Otherwise invent
7389 	 * some plausible internal statistics based on the relation page count.
7390 	 */
7391 	if (!index->hypothetical)
7392 	{
7393 		/*
7394 		 * A lock should have already been obtained on the index in plancat.c.
7395 		 */
7396 		indexRel = index_open(index->indexoid, NoLock);
7397 		brinGetStats(indexRel, &statsData);
7398 		index_close(indexRel, NoLock);
7399 
7400 		/* work out the actual number of ranges in the index */
7401 		indexRanges = Max(ceil((double) baserel->pages /
7402 							   statsData.pagesPerRange), 1.0);
7403 	}
7404 	else
7405 	{
7406 		/*
7407 		 * Assume default number of pages per range, and estimate the number
7408 		 * of ranges based on that.
7409 		 */
7410 		indexRanges = Max(ceil((double) baserel->pages /
7411 							   BRIN_DEFAULT_PAGES_PER_RANGE), 1.0);
7412 
7413 		statsData.pagesPerRange = BRIN_DEFAULT_PAGES_PER_RANGE;
7414 		statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
7415 	}
7416 
7417 	/*
7418 	 * Compute index correlation
7419 	 *
7420 	 * Because we can use all index quals equally when scanning, we can use
7421 	 * the largest correlation (in absolute value) among columns used by the
7422 	 * query.  Start at zero, the worst possible case.  If we cannot find any
7423 	 * correlation statistics, we will keep it as 0.
7424 	 */
7425 	*indexCorrelation = 0;
7426 
7427 	foreach(l, path->indexclauses)
7428 	{
7429 		IndexClause *iclause = lfirst_node(IndexClause, l);
7430 		AttrNumber	attnum = index->indexkeys[iclause->indexcol];
7431 
7432 		/* attempt to lookup stats in relation for this index column */
7433 		if (attnum != 0)
7434 		{
7435 			/* Simple variable -- look to stats for the underlying table */
7436 			if (get_relation_stats_hook &&
7437 				(*get_relation_stats_hook) (root, rte, attnum, &vardata))
7438 			{
7439 				/*
7440 				 * The hook took control of acquiring a stats tuple.  If it
7441 				 * did supply a tuple, it'd better have supplied a freefunc.
7442 				 */
7443 				if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
7444 					elog(ERROR,
7445 						 "no function provided to release variable stats with");
7446 			}
7447 			else
7448 			{
7449 				vardata.statsTuple =
7450 					SearchSysCache3(STATRELATTINH,
7451 									ObjectIdGetDatum(rte->relid),
7452 									Int16GetDatum(attnum),
7453 									BoolGetDatum(false));
7454 				vardata.freefunc = ReleaseSysCache;
7455 			}
7456 		}
7457 		else
7458 		{
7459 			/*
7460 			 * Looks like we've found an expression column in the index. Let's
7461 			 * see if there's any stats for it.
7462 			 */
7463 
7464 			/* get the attnum from the 0-based index. */
7465 			attnum = iclause->indexcol + 1;
7466 
7467 			if (get_index_stats_hook &&
7468 				(*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
7469 			{
7470 				/*
7471 				 * The hook took control of acquiring a stats tuple.  If it
7472 				 * did supply a tuple, it'd better have supplied a freefunc.
7473 				 */
7474 				if (HeapTupleIsValid(vardata.statsTuple) &&
7475 					!vardata.freefunc)
7476 					elog(ERROR, "no function provided to release variable stats with");
7477 			}
7478 			else
7479 			{
7480 				vardata.statsTuple = SearchSysCache3(STATRELATTINH,
7481 													 ObjectIdGetDatum(index->indexoid),
7482 													 Int16GetDatum(attnum),
7483 													 BoolGetDatum(false));
7484 				vardata.freefunc = ReleaseSysCache;
7485 			}
7486 		}
7487 
7488 		if (HeapTupleIsValid(vardata.statsTuple))
7489 		{
7490 			AttStatsSlot sslot;
7491 
7492 			if (get_attstatsslot(&sslot, vardata.statsTuple,
7493 								 STATISTIC_KIND_CORRELATION, InvalidOid,
7494 								 ATTSTATSSLOT_NUMBERS))
7495 			{
7496 				double		varCorrelation = 0.0;
7497 
7498 				if (sslot.nnumbers > 0)
7499 					varCorrelation = Abs(sslot.numbers[0]);
7500 
7501 				if (varCorrelation > *indexCorrelation)
7502 					*indexCorrelation = varCorrelation;
7503 
7504 				free_attstatsslot(&sslot);
7505 			}
7506 		}
7507 
7508 		ReleaseVariableStats(vardata);
7509 	}
7510 
7511 	qualSelectivity = clauselist_selectivity(root, indexQuals,
7512 											 baserel->relid,
7513 											 JOIN_INNER, NULL);
7514 
7515 	/*
7516 	 * Now calculate the minimum possible ranges we could match with if all of
7517 	 * the rows were in the perfect order in the table's heap.
7518 	 */
7519 	minimalRanges = ceil(indexRanges * qualSelectivity);
7520 
7521 	/*
7522 	 * Now estimate the number of ranges that we'll touch by using the
7523 	 * indexCorrelation from the stats. Careful not to divide by zero (note
7524 	 * we're using the absolute value of the correlation).
7525 	 */
7526 	if (*indexCorrelation < 1.0e-10)
7527 		estimatedRanges = indexRanges;
7528 	else
7529 		estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7530 
7531 	/* we expect to visit this portion of the table */
7532 	selec = estimatedRanges / indexRanges;
7533 
7534 	CLAMP_PROBABILITY(selec);
7535 
7536 	*indexSelectivity = selec;
7537 
7538 	/*
7539 	 * Compute the index qual costs, much as in genericcostestimate, to add to
7540 	 * the index costs.  We can disregard indexorderbys, since BRIN doesn't
7541 	 * support those.
7542 	 */
7543 	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
7544 
7545 	/*
7546 	 * Compute the startup cost as the cost to read the whole revmap
7547 	 * sequentially, including the cost to execute the index quals.
7548 	 */
7549 	*indexStartupCost =
7550 		spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7551 	*indexStartupCost += qual_arg_cost;
7552 
7553 	/*
7554 	 * To read a BRIN index there might be a bit of back and forth over
7555 	 * regular pages, as revmap might point to them out of sequential order;
7556 	 * calculate the total cost as reading the whole index in random order.
7557 	 */
7558 	*indexTotalCost = *indexStartupCost +
7559 		spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7560 
7561 	/*
7562 	 * Charge a small amount per range tuple which we expect to match to. This
7563 	 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7564 	 * will set a bit for each page in the range when we find a matching
7565 	 * range, so we must multiply the charge by the number of pages in the
7566 	 * range.
7567 	 */
7568 	*indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
7569 		statsData.pagesPerRange;
7570 
7571 	*indexPages = index->pages;
7572 }
7573