1 /*-------------------------------------------------------------------------
2  *
3  * clausesel.c
4  *	  Routines to compute clause selectivities
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/path/clausesel.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "nodes/makefuncs.h"
18 #include "nodes/nodeFuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/optimizer.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/plancat.h"
24 #include "statistics/statistics.h"
25 #include "utils/fmgroids.h"
26 #include "utils/lsyscache.h"
27 #include "utils/selfuncs.h"
28 
29 /*
30  * Data structure for accumulating info about possible range-query
31  * clause pairs in clauselist_selectivity.
32  */
33 typedef struct RangeQueryClause
34 {
35 	struct RangeQueryClause *next;	/* next in linked list */
36 	Node	   *var;			/* The common variable of the clauses */
37 	bool		have_lobound;	/* found a low-bound clause yet? */
38 	bool		have_hibound;	/* found a high-bound clause yet? */
39 	Selectivity lobound;		/* Selectivity of a var > something clause */
40 	Selectivity hibound;		/* Selectivity of a var < something clause */
41 } RangeQueryClause;
42 
43 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 						   bool varonleft, bool isLTsel, Selectivity s2);
45 static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
46 											   List *clauses);
47 static Selectivity clauselist_selectivity_or(PlannerInfo *root,
48 											 List *clauses,
49 											 int varRelid,
50 											 JoinType jointype,
51 											 SpecialJoinInfo *sjinfo,
52 											 bool use_extended_stats);
53 
54 /****************************************************************************
55  *		ROUTINES TO COMPUTE SELECTIVITIES
56  ****************************************************************************/
57 
58 /*
59  * clauselist_selectivity -
60  *	  Compute the selectivity of an implicitly-ANDed list of boolean
61  *	  expression clauses.  The list can be empty, in which case 1.0
62  *	  must be returned.  List elements may be either RestrictInfos
63  *	  or bare expression clauses --- the former is preferred since
64  *	  it allows caching of results.
65  *
66  * See clause_selectivity() for the meaning of the additional parameters.
67  *
68  * The basic approach is to apply extended statistics first, on as many
69  * clauses as possible, in order to capture cross-column dependencies etc.
70  * The remaining clauses are then estimated by taking the product of their
71  * selectivities, but that's only right if they have independent
72  * probabilities, and in reality they are often NOT independent even if they
73  * only refer to a single column.  So, we want to be smarter where we can.
74  *
75  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
76  * are recognized as possible range query components if they are restriction
77  * opclauses whose operators have scalarltsel or a related function as their
78  * restriction selectivity estimator.  We pair up clauses of this form that
79  * refer to the same variable.  An unpairable clause of this kind is simply
80  * multiplied into the selectivity product in the normal way.  But when we
81  * find a pair, we know that the selectivities represent the relative
82  * positions of the low and high bounds within the column's range, so instead
83  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
85  * below the high bound, while losel is the fraction above the low bound; so
86  * hisel can be interpreted directly as a 0..1 value but we need to convert
87  * losel to 1-losel before interpreting it as a value.  Then the available
88  * range is 1-losel to hisel.  However, this calculation double-excludes
89  * nulls, so really we need hisel + losel + null_frac - 1.)
90  *
91  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
93  * yields an impossible (negative) result.
94  *
95  * A free side-effect is that we can recognize redundant inequalities such
96  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97  *
98  * Of course this is all very dependent on the behavior of the inequality
99  * selectivity functions; perhaps some day we can generalize the approach.
100  */
101 Selectivity
clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)102 clauselist_selectivity(PlannerInfo *root,
103 					   List *clauses,
104 					   int varRelid,
105 					   JoinType jointype,
106 					   SpecialJoinInfo *sjinfo)
107 {
108 	return clauselist_selectivity_ext(root, clauses, varRelid,
109 									  jointype, sjinfo, true);
110 }
111 
112 /*
113  * clauselist_selectivity_ext -
114  *	  Extended version of clauselist_selectivity().  If "use_extended_stats"
115  *	  is false, all extended statistics will be ignored, and only per-column
116  *	  statistics will be used.
117  */
118 Selectivity
clauselist_selectivity_ext(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)119 clauselist_selectivity_ext(PlannerInfo *root,
120 						   List *clauses,
121 						   int varRelid,
122 						   JoinType jointype,
123 						   SpecialJoinInfo *sjinfo,
124 						   bool use_extended_stats)
125 {
126 	Selectivity s1 = 1.0;
127 	RelOptInfo *rel;
128 	Bitmapset  *estimatedclauses = NULL;
129 	RangeQueryClause *rqlist = NULL;
130 	ListCell   *l;
131 	int			listidx;
132 
133 	/*
134 	 * If there's exactly one clause, just go directly to
135 	 * clause_selectivity_ext(). None of what we might do below is relevant.
136 	 */
137 	if (list_length(clauses) == 1)
138 		return clause_selectivity_ext(root, (Node *) linitial(clauses),
139 									  varRelid, jointype, sjinfo,
140 									  use_extended_stats);
141 
142 	/*
143 	 * Determine if these clauses reference a single relation.  If so, and if
144 	 * it has extended statistics, try to apply those.
145 	 */
146 	rel = find_single_rel_for_clauses(root, clauses);
147 	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148 	{
149 		/*
150 		 * Estimate as many clauses as possible using extended statistics.
151 		 *
152 		 * 'estimatedclauses' is populated with the 0-based list position
153 		 * index of clauses estimated here, and that should be ignored below.
154 		 */
155 		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156 											jointype, sjinfo, rel,
157 											&estimatedclauses, false);
158 	}
159 
160 	/*
161 	 * Apply normal selectivity estimates for remaining clauses. We'll be
162 	 * careful to skip any clauses which were already estimated above.
163 	 *
164 	 * Anything that doesn't look like a potential rangequery clause gets
165 	 * multiplied into s1 and forgotten. Anything that does gets inserted into
166 	 * an rqlist entry.
167 	 */
168 	listidx = -1;
169 	foreach(l, clauses)
170 	{
171 		Node	   *clause = (Node *) lfirst(l);
172 		RestrictInfo *rinfo;
173 		Selectivity s2;
174 
175 		listidx++;
176 
177 		/*
178 		 * Skip this clause if it's already been estimated by some other
179 		 * statistics above.
180 		 */
181 		if (bms_is_member(listidx, estimatedclauses))
182 			continue;
183 
184 		/* Compute the selectivity of this clause in isolation */
185 		s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186 									use_extended_stats);
187 
188 		/*
189 		 * Check for being passed a RestrictInfo.
190 		 *
191 		 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192 		 * 0.0; just use that rather than looking for range pairs.
193 		 */
194 		if (IsA(clause, RestrictInfo))
195 		{
196 			rinfo = (RestrictInfo *) clause;
197 			if (rinfo->pseudoconstant)
198 			{
199 				s1 = s1 * s2;
200 				continue;
201 			}
202 			clause = (Node *) rinfo->clause;
203 		}
204 		else
205 			rinfo = NULL;
206 
207 		/*
208 		 * See if it looks like a restriction clause with a pseudoconstant on
209 		 * one side.  (Anything more complicated than that might not behave in
210 		 * the simple way we are expecting.)  Most of the tests here can be
211 		 * done more efficiently with rinfo than without.
212 		 */
213 		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214 		{
215 			OpExpr	   *expr = (OpExpr *) clause;
216 			bool		varonleft = true;
217 			bool		ok;
218 
219 			if (rinfo)
220 			{
221 				ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
222 					(is_pseudo_constant_clause_relids(lsecond(expr->args),
223 													  rinfo->right_relids) ||
224 					 (varonleft = false,
225 					  is_pseudo_constant_clause_relids(linitial(expr->args),
226 													   rinfo->left_relids)));
227 			}
228 			else
229 			{
230 				ok = (NumRelids(root, clause) == 1) &&
231 					(is_pseudo_constant_clause(lsecond(expr->args)) ||
232 					 (varonleft = false,
233 					  is_pseudo_constant_clause(linitial(expr->args))));
234 			}
235 
236 			if (ok)
237 			{
238 				/*
239 				 * If it's not a "<"/"<="/">"/">=" operator, just merge the
240 				 * selectivity in generically.  But if it's the right oprrest,
241 				 * add the clause to rqlist for later processing.
242 				 */
243 				switch (get_oprrest(expr->opno))
244 				{
245 					case F_SCALARLTSEL:
246 					case F_SCALARLESEL:
247 						addRangeClause(&rqlist, clause,
248 									   varonleft, true, s2);
249 						break;
250 					case F_SCALARGTSEL:
251 					case F_SCALARGESEL:
252 						addRangeClause(&rqlist, clause,
253 									   varonleft, false, s2);
254 						break;
255 					default:
256 						/* Just merge the selectivity in generically */
257 						s1 = s1 * s2;
258 						break;
259 				}
260 				continue;		/* drop to loop bottom */
261 			}
262 		}
263 
264 		/* Not the right form, so treat it generically. */
265 		s1 = s1 * s2;
266 	}
267 
268 	/*
269 	 * Now scan the rangequery pair list.
270 	 */
271 	while (rqlist != NULL)
272 	{
273 		RangeQueryClause *rqnext;
274 
275 		if (rqlist->have_lobound && rqlist->have_hibound)
276 		{
277 			/* Successfully matched a pair of range clauses */
278 			Selectivity s2;
279 
280 			/*
281 			 * Exact equality to the default value probably means the
282 			 * selectivity function punted.  This is not airtight but should
283 			 * be good enough.
284 			 */
285 			if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286 				rqlist->lobound == DEFAULT_INEQ_SEL)
287 			{
288 				s2 = DEFAULT_RANGE_INEQ_SEL;
289 			}
290 			else
291 			{
292 				s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 
294 				/* Adjust for double-exclusion of NULLs */
295 				s2 += nulltestsel(root, IS_NULL, rqlist->var,
296 								  varRelid, jointype, sjinfo);
297 
298 				/*
299 				 * A zero or slightly negative s2 should be converted into a
300 				 * small positive value; we probably are dealing with a very
301 				 * tight range and got a bogus result due to roundoff errors.
302 				 * However, if s2 is very negative, then we probably have
303 				 * default selectivity estimates on one or both sides of the
304 				 * range that we failed to recognize above for some reason.
305 				 */
306 				if (s2 <= 0.0)
307 				{
308 					if (s2 < -0.01)
309 					{
310 						/*
311 						 * No data available --- use a default estimate that
312 						 * is small, but not real small.
313 						 */
314 						s2 = DEFAULT_RANGE_INEQ_SEL;
315 					}
316 					else
317 					{
318 						/*
319 						 * It's just roundoff error; use a small positive
320 						 * value
321 						 */
322 						s2 = 1.0e-10;
323 					}
324 				}
325 			}
326 			/* Merge in the selectivity of the pair of clauses */
327 			s1 *= s2;
328 		}
329 		else
330 		{
331 			/* Only found one of a pair, merge it in generically */
332 			if (rqlist->have_lobound)
333 				s1 *= rqlist->lobound;
334 			else
335 				s1 *= rqlist->hibound;
336 		}
337 		/* release storage and advance */
338 		rqnext = rqlist->next;
339 		pfree(rqlist);
340 		rqlist = rqnext;
341 	}
342 
343 	return s1;
344 }
345 
346 /*
347  * clauselist_selectivity_or -
348  *	  Compute the selectivity of an implicitly-ORed list of boolean
349  *	  expression clauses.  The list can be empty, in which case 0.0
350  *	  must be returned.  List elements may be either RestrictInfos
351  *	  or bare expression clauses --- the former is preferred since
352  *	  it allows caching of results.
353  *
354  * See clause_selectivity() for the meaning of the additional parameters.
355  *
356  * The basic approach is to apply extended statistics first, on as many
357  * clauses as possible, in order to capture cross-column dependencies etc.
358  * The remaining clauses are then estimated as if they were independent.
359  */
360 static Selectivity
clauselist_selectivity_or(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)361 clauselist_selectivity_or(PlannerInfo *root,
362 						  List *clauses,
363 						  int varRelid,
364 						  JoinType jointype,
365 						  SpecialJoinInfo *sjinfo,
366 						  bool use_extended_stats)
367 {
368 	Selectivity s1 = 0.0;
369 	RelOptInfo *rel;
370 	Bitmapset  *estimatedclauses = NULL;
371 	ListCell   *lc;
372 	int			listidx;
373 
374 	/*
375 	 * Determine if these clauses reference a single relation.  If so, and if
376 	 * it has extended statistics, try to apply those.
377 	 */
378 	rel = find_single_rel_for_clauses(root, clauses);
379 	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380 	{
381 		/*
382 		 * Estimate as many clauses as possible using extended statistics.
383 		 *
384 		 * 'estimatedclauses' is populated with the 0-based list position
385 		 * index of clauses estimated here, and that should be ignored below.
386 		 */
387 		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388 											jointype, sjinfo, rel,
389 											&estimatedclauses, true);
390 	}
391 
392 	/*
393 	 * Estimate the remaining clauses as if they were independent.
394 	 *
395 	 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396 	 * for the probable overlap of selected tuple sets.
397 	 *
398 	 * XXX is this too conservative?
399 	 */
400 	listidx = -1;
401 	foreach(lc, clauses)
402 	{
403 		Selectivity s2;
404 
405 		listidx++;
406 
407 		/*
408 		 * Skip this clause if it's already been estimated by some other
409 		 * statistics above.
410 		 */
411 		if (bms_is_member(listidx, estimatedclauses))
412 			continue;
413 
414 		s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415 									jointype, sjinfo, use_extended_stats);
416 
417 		s1 = s1 + s2 - s1 * s2;
418 	}
419 
420 	return s1;
421 }
422 
423 /*
424  * addRangeClause --- add a new range clause for clauselist_selectivity
425  *
426  * Here is where we try to match up pairs of range-query clauses
427  */
428 static void
addRangeClause(RangeQueryClause ** rqlist,Node * clause,bool varonleft,bool isLTsel,Selectivity s2)429 addRangeClause(RangeQueryClause **rqlist, Node *clause,
430 			   bool varonleft, bool isLTsel, Selectivity s2)
431 {
432 	RangeQueryClause *rqelem;
433 	Node	   *var;
434 	bool		is_lobound;
435 
436 	if (varonleft)
437 	{
438 		var = get_leftop((Expr *) clause);
439 		is_lobound = !isLTsel;	/* x < something is high bound */
440 	}
441 	else
442 	{
443 		var = get_rightop((Expr *) clause);
444 		is_lobound = isLTsel;	/* something < x is low bound */
445 	}
446 
447 	for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448 	{
449 		/*
450 		 * We use full equal() here because the "var" might be a function of
451 		 * one or more attributes of the same relation...
452 		 */
453 		if (!equal(var, rqelem->var))
454 			continue;
455 		/* Found the right group to put this clause in */
456 		if (is_lobound)
457 		{
458 			if (!rqelem->have_lobound)
459 			{
460 				rqelem->have_lobound = true;
461 				rqelem->lobound = s2;
462 			}
463 			else
464 			{
465 
466 				/*------
467 				 * We have found two similar clauses, such as
468 				 * x < y AND x <= z.
469 				 * Keep only the more restrictive one.
470 				 *------
471 				 */
472 				if (rqelem->lobound > s2)
473 					rqelem->lobound = s2;
474 			}
475 		}
476 		else
477 		{
478 			if (!rqelem->have_hibound)
479 			{
480 				rqelem->have_hibound = true;
481 				rqelem->hibound = s2;
482 			}
483 			else
484 			{
485 
486 				/*------
487 				 * We have found two similar clauses, such as
488 				 * x > y AND x >= z.
489 				 * Keep only the more restrictive one.
490 				 *------
491 				 */
492 				if (rqelem->hibound > s2)
493 					rqelem->hibound = s2;
494 			}
495 		}
496 		return;
497 	}
498 
499 	/* No matching var found, so make a new clause-pair data structure */
500 	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501 	rqelem->var = var;
502 	if (is_lobound)
503 	{
504 		rqelem->have_lobound = true;
505 		rqelem->have_hibound = false;
506 		rqelem->lobound = s2;
507 	}
508 	else
509 	{
510 		rqelem->have_lobound = false;
511 		rqelem->have_hibound = true;
512 		rqelem->hibound = s2;
513 	}
514 	rqelem->next = *rqlist;
515 	*rqlist = rqelem;
516 }
517 
518 /*
519  * find_single_rel_for_clauses
520  *		Examine each clause in 'clauses' and determine if all clauses
521  *		reference only a single relation.  If so return that relation,
522  *		otherwise return NULL.
523  */
524 static RelOptInfo *
find_single_rel_for_clauses(PlannerInfo * root,List * clauses)525 find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
526 {
527 	int			lastrelid = 0;
528 	ListCell   *l;
529 
530 	foreach(l, clauses)
531 	{
532 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533 		int			relid;
534 
535 		/*
536 		 * If we have a list of bare clauses rather than RestrictInfos, we
537 		 * could pull out their relids the hard way with pull_varnos().
538 		 * However, currently the extended-stats machinery won't do anything
539 		 * with non-RestrictInfo clauses anyway, so there's no point in
540 		 * spending extra cycles; just fail if that's what we have.
541 		 *
542 		 * An exception to that rule is if we have a bare BoolExpr AND clause.
543 		 * We treat this as a special case because the restrictinfo machinery
544 		 * doesn't build RestrictInfos on top of AND clauses.
545 		 */
546 		if (is_andclause(rinfo))
547 		{
548 			RelOptInfo *rel;
549 
550 			rel = find_single_rel_for_clauses(root,
551 											  ((BoolExpr *) rinfo)->args);
552 
553 			if (rel == NULL)
554 				return NULL;
555 			if (lastrelid == 0)
556 				lastrelid = rel->relid;
557 			else if (rel->relid != lastrelid)
558 				return NULL;
559 
560 			continue;
561 		}
562 
563 		if (!IsA(rinfo, RestrictInfo))
564 			return NULL;
565 
566 		if (bms_is_empty(rinfo->clause_relids))
567 			continue;			/* we can ignore variable-free clauses */
568 		if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569 			return NULL;		/* multiple relations in this clause */
570 		if (lastrelid == 0)
571 			lastrelid = relid;	/* first clause referencing a relation */
572 		else if (relid != lastrelid)
573 			return NULL;		/* relation not same as last one */
574 	}
575 
576 	if (lastrelid != 0)
577 		return find_base_rel(root, lastrelid);
578 
579 	return NULL;				/* no clauses */
580 }
581 
582 /*
583  * bms_is_subset_singleton
584  *
585  * Same result as bms_is_subset(s, bms_make_singleton(x)),
586  * but a little faster and doesn't leak memory.
587  *
588  * Is this of use anywhere else?  If so move to bitmapset.c ...
589  */
590 static bool
bms_is_subset_singleton(const Bitmapset * s,int x)591 bms_is_subset_singleton(const Bitmapset *s, int x)
592 {
593 	switch (bms_membership(s))
594 	{
595 		case BMS_EMPTY_SET:
596 			return true;
597 		case BMS_SINGLETON:
598 			return bms_is_member(x, s);
599 		case BMS_MULTIPLE:
600 			return false;
601 	}
602 	/* can't get here... */
603 	return false;
604 }
605 
606 /*
607  * treat_as_join_clause -
608  *	  Decide whether an operator clause is to be handled by the
609  *	  restriction or join estimator.  Subroutine for clause_selectivity().
610  */
611 static inline bool
treat_as_join_clause(PlannerInfo * root,Node * clause,RestrictInfo * rinfo,int varRelid,SpecialJoinInfo * sjinfo)612 treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
613 					 int varRelid, SpecialJoinInfo *sjinfo)
614 {
615 	if (varRelid != 0)
616 	{
617 		/*
618 		 * Caller is forcing restriction mode (eg, because we are examining an
619 		 * inner indexscan qual).
620 		 */
621 		return false;
622 	}
623 	else if (sjinfo == NULL)
624 	{
625 		/*
626 		 * It must be a restriction clause, since it's being evaluated at a
627 		 * scan node.
628 		 */
629 		return false;
630 	}
631 	else
632 	{
633 		/*
634 		 * Otherwise, it's a join if there's more than one relation used. We
635 		 * can optimize this calculation if an rinfo was passed.
636 		 *
637 		 * XXX	Since we know the clause is being evaluated at a join, the
638 		 * only way it could be single-relation is if it was delayed by outer
639 		 * joins.  Although we can make use of the restriction qual estimators
640 		 * anyway, it seems likely that we ought to account for the
641 		 * probability of injected nulls somehow.
642 		 */
643 		if (rinfo)
644 			return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
645 		else
646 			return (NumRelids(root, clause) > 1);
647 	}
648 }
649 
650 
651 /*
652  * clause_selectivity -
653  *	  Compute the selectivity of a general boolean expression clause.
654  *
655  * The clause can be either a RestrictInfo or a plain expression.  If it's
656  * a RestrictInfo, we try to cache the selectivity for possible re-use,
657  * so passing RestrictInfos is preferred.
658  *
659  * varRelid is either 0 or a rangetable index.
660  *
661  * When varRelid is not 0, only variables belonging to that relation are
662  * considered in computing selectivity; other vars are treated as constants
663  * of unknown values.  This is appropriate for estimating the selectivity of
664  * a join clause that is being used as a restriction clause in a scan of a
665  * nestloop join's inner relation --- varRelid should then be the ID of the
666  * inner relation.
667  *
668  * When varRelid is 0, all variables are treated as variables.  This
669  * is appropriate for ordinary join clauses and restriction clauses.
670  *
671  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
672  * if the clause isn't a join clause.
673  *
674  * sjinfo is NULL for a non-join clause, otherwise it provides additional
675  * context information about the join being performed.  There are some
676  * special cases:
677  *	1. For a special (not INNER) join, sjinfo is always a member of
678  *	   root->join_info_list.
679  *	2. For an INNER join, sjinfo is just a transient struct, and only the
680  *	   relids and jointype fields in it can be trusted.
681  * It is possible for jointype to be different from sjinfo->jointype.
682  * This indicates we are considering a variant join: either with
683  * the LHS and RHS switched, or with one input unique-ified.
684  *
685  * Note: when passing nonzero varRelid, it's normally appropriate to set
686  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
687  * join clause; because we aren't treating it as a join clause.
688  */
689 Selectivity
clause_selectivity(PlannerInfo * root,Node * clause,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)690 clause_selectivity(PlannerInfo *root,
691 				   Node *clause,
692 				   int varRelid,
693 				   JoinType jointype,
694 				   SpecialJoinInfo *sjinfo)
695 {
696 	return clause_selectivity_ext(root, clause, varRelid,
697 								  jointype, sjinfo, true);
698 }
699 
700 /*
701  * clause_selectivity_ext -
702  *	  Extended version of clause_selectivity().  If "use_extended_stats" is
703  *	  false, all extended statistics will be ignored, and only per-column
704  *	  statistics will be used.
705  */
706 Selectivity
clause_selectivity_ext(PlannerInfo * root,Node * clause,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)707 clause_selectivity_ext(PlannerInfo *root,
708 					   Node *clause,
709 					   int varRelid,
710 					   JoinType jointype,
711 					   SpecialJoinInfo *sjinfo,
712 					   bool use_extended_stats)
713 {
714 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
715 	RestrictInfo *rinfo = NULL;
716 	bool		cacheable = false;
717 
718 	if (clause == NULL)			/* can this still happen? */
719 		return s1;
720 
721 	if (IsA(clause, RestrictInfo))
722 	{
723 		rinfo = (RestrictInfo *) clause;
724 
725 		/*
726 		 * If the clause is marked pseudoconstant, then it will be used as a
727 		 * gating qual and should not affect selectivity estimates; hence
728 		 * return 1.0.  The only exception is that a constant FALSE may be
729 		 * taken as having selectivity 0.0, since it will surely mean no rows
730 		 * out of the plan.  This case is simple enough that we need not
731 		 * bother caching the result.
732 		 */
733 		if (rinfo->pseudoconstant)
734 		{
735 			if (!IsA(rinfo->clause, Const))
736 				return (Selectivity) 1.0;
737 		}
738 
739 		/*
740 		 * If the clause is marked redundant, always return 1.0.
741 		 */
742 		if (rinfo->norm_selec > 1)
743 			return (Selectivity) 1.0;
744 
745 		/*
746 		 * If possible, cache the result of the selectivity calculation for
747 		 * the clause.  We can cache if varRelid is zero or the clause
748 		 * contains only vars of that relid --- otherwise varRelid will affect
749 		 * the result, so mustn't cache.  Outer join quals might be examined
750 		 * with either their join's actual jointype or JOIN_INNER, so we need
751 		 * two cache variables to remember both cases.  Note: we assume the
752 		 * result won't change if we are switching the input relations or
753 		 * considering a unique-ified case, so we only need one cache variable
754 		 * for all non-JOIN_INNER cases.
755 		 */
756 		if (varRelid == 0 ||
757 			bms_is_subset_singleton(rinfo->clause_relids, varRelid))
758 		{
759 			/* Cacheable --- do we already have the result? */
760 			if (jointype == JOIN_INNER)
761 			{
762 				if (rinfo->norm_selec >= 0)
763 					return rinfo->norm_selec;
764 			}
765 			else
766 			{
767 				if (rinfo->outer_selec >= 0)
768 					return rinfo->outer_selec;
769 			}
770 			cacheable = true;
771 		}
772 
773 		/*
774 		 * Proceed with examination of contained clause.  If the clause is an
775 		 * OR-clause, we want to look at the variant with sub-RestrictInfos,
776 		 * so that per-subclause selectivities can be cached.
777 		 */
778 		if (rinfo->orclause)
779 			clause = (Node *) rinfo->orclause;
780 		else
781 			clause = (Node *) rinfo->clause;
782 	}
783 
784 	if (IsA(clause, Var))
785 	{
786 		Var		   *var = (Var *) clause;
787 
788 		/*
789 		 * We probably shouldn't ever see an uplevel Var here, but if we do,
790 		 * return the default selectivity...
791 		 */
792 		if (var->varlevelsup == 0 &&
793 			(varRelid == 0 || varRelid == (int) var->varno))
794 		{
795 			/* Use the restriction selectivity function for a bool Var */
796 			s1 = boolvarsel(root, (Node *) var, varRelid);
797 		}
798 	}
799 	else if (IsA(clause, Const))
800 	{
801 		/* bool constant is pretty easy... */
802 		Const	   *con = (Const *) clause;
803 
804 		s1 = con->constisnull ? 0.0 :
805 			DatumGetBool(con->constvalue) ? 1.0 : 0.0;
806 	}
807 	else if (IsA(clause, Param))
808 	{
809 		/* see if we can replace the Param */
810 		Node	   *subst = estimate_expression_value(root, clause);
811 
812 		if (IsA(subst, Const))
813 		{
814 			/* bool constant is pretty easy... */
815 			Const	   *con = (Const *) subst;
816 
817 			s1 = con->constisnull ? 0.0 :
818 				DatumGetBool(con->constvalue) ? 1.0 : 0.0;
819 		}
820 		else
821 		{
822 			/* XXX any way to do better than default? */
823 		}
824 	}
825 	else if (is_notclause(clause))
826 	{
827 		/* inverse of the selectivity of the underlying clause */
828 		s1 = 1.0 - clause_selectivity_ext(root,
829 										  (Node *) get_notclausearg((Expr *) clause),
830 										  varRelid,
831 										  jointype,
832 										  sjinfo,
833 										  use_extended_stats);
834 	}
835 	else if (is_andclause(clause))
836 	{
837 		/* share code with clauselist_selectivity() */
838 		s1 = clauselist_selectivity_ext(root,
839 										((BoolExpr *) clause)->args,
840 										varRelid,
841 										jointype,
842 										sjinfo,
843 										use_extended_stats);
844 	}
845 	else if (is_orclause(clause))
846 	{
847 		/*
848 		 * Almost the same thing as clauselist_selectivity, but with the
849 		 * clauses connected by OR.
850 		 */
851 		s1 = clauselist_selectivity_or(root,
852 									   ((BoolExpr *) clause)->args,
853 									   varRelid,
854 									   jointype,
855 									   sjinfo,
856 									   use_extended_stats);
857 	}
858 	else if (is_opclause(clause) || IsA(clause, DistinctExpr))
859 	{
860 		OpExpr	   *opclause = (OpExpr *) clause;
861 		Oid			opno = opclause->opno;
862 
863 		if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
864 		{
865 			/* Estimate selectivity for a join clause. */
866 			s1 = join_selectivity(root, opno,
867 								  opclause->args,
868 								  opclause->inputcollid,
869 								  jointype,
870 								  sjinfo);
871 		}
872 		else
873 		{
874 			/* Estimate selectivity for a restriction clause. */
875 			s1 = restriction_selectivity(root, opno,
876 										 opclause->args,
877 										 opclause->inputcollid,
878 										 varRelid);
879 		}
880 
881 		/*
882 		 * DistinctExpr has the same representation as OpExpr, but the
883 		 * contained operator is "=" not "<>", so we must negate the result.
884 		 * This estimation method doesn't give the right behavior for nulls,
885 		 * but it's better than doing nothing.
886 		 */
887 		if (IsA(clause, DistinctExpr))
888 			s1 = 1.0 - s1;
889 	}
890 	else if (is_funcclause(clause))
891 	{
892 		FuncExpr   *funcclause = (FuncExpr *) clause;
893 
894 		/* Try to get an estimate from the support function, if any */
895 		s1 = function_selectivity(root,
896 								  funcclause->funcid,
897 								  funcclause->args,
898 								  funcclause->inputcollid,
899 								  treat_as_join_clause(root, clause, rinfo,
900 													   varRelid, sjinfo),
901 								  varRelid,
902 								  jointype,
903 								  sjinfo);
904 	}
905 	else if (IsA(clause, ScalarArrayOpExpr))
906 	{
907 		/* Use node specific selectivity calculation function */
908 		s1 = scalararraysel(root,
909 							(ScalarArrayOpExpr *) clause,
910 							treat_as_join_clause(root, clause, rinfo,
911 												 varRelid, sjinfo),
912 							varRelid,
913 							jointype,
914 							sjinfo);
915 	}
916 	else if (IsA(clause, RowCompareExpr))
917 	{
918 		/* Use node specific selectivity calculation function */
919 		s1 = rowcomparesel(root,
920 						   (RowCompareExpr *) clause,
921 						   varRelid,
922 						   jointype,
923 						   sjinfo);
924 	}
925 	else if (IsA(clause, NullTest))
926 	{
927 		/* Use node specific selectivity calculation function */
928 		s1 = nulltestsel(root,
929 						 ((NullTest *) clause)->nulltesttype,
930 						 (Node *) ((NullTest *) clause)->arg,
931 						 varRelid,
932 						 jointype,
933 						 sjinfo);
934 	}
935 	else if (IsA(clause, BooleanTest))
936 	{
937 		/* Use node specific selectivity calculation function */
938 		s1 = booltestsel(root,
939 						 ((BooleanTest *) clause)->booltesttype,
940 						 (Node *) ((BooleanTest *) clause)->arg,
941 						 varRelid,
942 						 jointype,
943 						 sjinfo);
944 	}
945 	else if (IsA(clause, CurrentOfExpr))
946 	{
947 		/* CURRENT OF selects at most one row of its table */
948 		CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
949 		RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
950 
951 		if (crel->tuples > 0)
952 			s1 = 1.0 / crel->tuples;
953 	}
954 	else if (IsA(clause, RelabelType))
955 	{
956 		/* Not sure this case is needed, but it can't hurt */
957 		s1 = clause_selectivity_ext(root,
958 									(Node *) ((RelabelType *) clause)->arg,
959 									varRelid,
960 									jointype,
961 									sjinfo,
962 									use_extended_stats);
963 	}
964 	else if (IsA(clause, CoerceToDomain))
965 	{
966 		/* Not sure this case is needed, but it can't hurt */
967 		s1 = clause_selectivity_ext(root,
968 									(Node *) ((CoerceToDomain *) clause)->arg,
969 									varRelid,
970 									jointype,
971 									sjinfo,
972 									use_extended_stats);
973 	}
974 	else
975 	{
976 		/*
977 		 * For anything else, see if we can consider it as a boolean variable.
978 		 * This only works if it's an immutable expression in Vars of a single
979 		 * relation; but there's no point in us checking that here because
980 		 * boolvarsel() will do it internally, and return a suitable default
981 		 * selectivity if not.
982 		 */
983 		s1 = boolvarsel(root, clause, varRelid);
984 	}
985 
986 	/* Cache the result if possible */
987 	if (cacheable)
988 	{
989 		if (jointype == JOIN_INNER)
990 			rinfo->norm_selec = s1;
991 		else
992 			rinfo->outer_selec = s1;
993 	}
994 
995 #ifdef SELECTIVITY_DEBUG
996 	elog(DEBUG4, "clause_selectivity: s1 %f", s1);
997 #endif							/* SELECTIVITY_DEBUG */
998 
999 	return s1;
1000 }
1001