1 /*-------------------------------------------------------------------------
2 *
3 * clausesel.c
4 * Routines to compute clause selectivities
5 *
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/clausesel.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "nodes/makefuncs.h"
18 #include "nodes/nodeFuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/optimizer.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/plancat.h"
24 #include "statistics/statistics.h"
25 #include "utils/fmgroids.h"
26 #include "utils/lsyscache.h"
27 #include "utils/selfuncs.h"
28
29 /*
30 * Data structure for accumulating info about possible range-query
31 * clause pairs in clauselist_selectivity.
32 */
33 typedef struct RangeQueryClause
34 {
35 struct RangeQueryClause *next; /* next in linked list */
36 Node *var; /* The common variable of the clauses */
37 bool have_lobound; /* found a low-bound clause yet? */
38 bool have_hibound; /* found a high-bound clause yet? */
39 Selectivity lobound; /* Selectivity of a var > something clause */
40 Selectivity hibound; /* Selectivity of a var < something clause */
41 } RangeQueryClause;
42
43 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 bool varonleft, bool isLTsel, Selectivity s2);
45 static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
46 List *clauses);
47 static Selectivity clauselist_selectivity_or(PlannerInfo *root,
48 List *clauses,
49 int varRelid,
50 JoinType jointype,
51 SpecialJoinInfo *sjinfo,
52 bool use_extended_stats);
53
54 /****************************************************************************
55 * ROUTINES TO COMPUTE SELECTIVITIES
56 ****************************************************************************/
57
58 /*
59 * clauselist_selectivity -
60 * Compute the selectivity of an implicitly-ANDed list of boolean
61 * expression clauses. The list can be empty, in which case 1.0
62 * must be returned. List elements may be either RestrictInfos
63 * or bare expression clauses --- the former is preferred since
64 * it allows caching of results.
65 *
66 * See clause_selectivity() for the meaning of the additional parameters.
67 *
68 * The basic approach is to apply extended statistics first, on as many
69 * clauses as possible, in order to capture cross-column dependencies etc.
70 * The remaining clauses are then estimated by taking the product of their
71 * selectivities, but that's only right if they have independent
72 * probabilities, and in reality they are often NOT independent even if they
73 * only refer to a single column. So, we want to be smarter where we can.
74 *
75 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76 * are recognized as possible range query components if they are restriction
77 * opclauses whose operators have scalarltsel or a related function as their
78 * restriction selectivity estimator. We pair up clauses of this form that
79 * refer to the same variable. An unpairable clause of this kind is simply
80 * multiplied into the selectivity product in the normal way. But when we
81 * find a pair, we know that the selectivities represent the relative
82 * positions of the low and high bounds within the column's range, so instead
83 * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84 * losel - 1. (To visualize this, see that hisel is the fraction of the range
85 * below the high bound, while losel is the fraction above the low bound; so
86 * hisel can be interpreted directly as a 0..1 value but we need to convert
87 * losel to 1-losel before interpreting it as a value. Then the available
88 * range is 1-losel to hisel. However, this calculation double-excludes
89 * nulls, so really we need hisel + losel + null_frac - 1.)
90 *
91 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93 * yields an impossible (negative) result.
94 *
95 * A free side-effect is that we can recognize redundant inequalities such
96 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97 *
98 * Of course this is all very dependent on the behavior of the inequality
99 * selectivity functions; perhaps some day we can generalize the approach.
100 */
101 Selectivity
clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)102 clauselist_selectivity(PlannerInfo *root,
103 List *clauses,
104 int varRelid,
105 JoinType jointype,
106 SpecialJoinInfo *sjinfo)
107 {
108 return clauselist_selectivity_ext(root, clauses, varRelid,
109 jointype, sjinfo, true);
110 }
111
112 /*
113 * clauselist_selectivity_ext -
114 * Extended version of clauselist_selectivity(). If "use_extended_stats"
115 * is false, all extended statistics will be ignored, and only per-column
116 * statistics will be used.
117 */
118 Selectivity
clauselist_selectivity_ext(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)119 clauselist_selectivity_ext(PlannerInfo *root,
120 List *clauses,
121 int varRelid,
122 JoinType jointype,
123 SpecialJoinInfo *sjinfo,
124 bool use_extended_stats)
125 {
126 Selectivity s1 = 1.0;
127 RelOptInfo *rel;
128 Bitmapset *estimatedclauses = NULL;
129 RangeQueryClause *rqlist = NULL;
130 ListCell *l;
131 int listidx;
132
133 /*
134 * If there's exactly one clause, just go directly to
135 * clause_selectivity_ext(). None of what we might do below is relevant.
136 */
137 if (list_length(clauses) == 1)
138 return clause_selectivity_ext(root, (Node *) linitial(clauses),
139 varRelid, jointype, sjinfo,
140 use_extended_stats);
141
142 /*
143 * Determine if these clauses reference a single relation. If so, and if
144 * it has extended statistics, try to apply those.
145 */
146 rel = find_single_rel_for_clauses(root, clauses);
147 if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148 {
149 /*
150 * Estimate as many clauses as possible using extended statistics.
151 *
152 * 'estimatedclauses' is populated with the 0-based list position
153 * index of clauses estimated here, and that should be ignored below.
154 */
155 s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156 jointype, sjinfo, rel,
157 &estimatedclauses, false);
158 }
159
160 /*
161 * Apply normal selectivity estimates for remaining clauses. We'll be
162 * careful to skip any clauses which were already estimated above.
163 *
164 * Anything that doesn't look like a potential rangequery clause gets
165 * multiplied into s1 and forgotten. Anything that does gets inserted into
166 * an rqlist entry.
167 */
168 listidx = -1;
169 foreach(l, clauses)
170 {
171 Node *clause = (Node *) lfirst(l);
172 RestrictInfo *rinfo;
173 Selectivity s2;
174
175 listidx++;
176
177 /*
178 * Skip this clause if it's already been estimated by some other
179 * statistics above.
180 */
181 if (bms_is_member(listidx, estimatedclauses))
182 continue;
183
184 /* Compute the selectivity of this clause in isolation */
185 s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186 use_extended_stats);
187
188 /*
189 * Check for being passed a RestrictInfo.
190 *
191 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192 * 0.0; just use that rather than looking for range pairs.
193 */
194 if (IsA(clause, RestrictInfo))
195 {
196 rinfo = (RestrictInfo *) clause;
197 if (rinfo->pseudoconstant)
198 {
199 s1 = s1 * s2;
200 continue;
201 }
202 clause = (Node *) rinfo->clause;
203 }
204 else
205 rinfo = NULL;
206
207 /*
208 * See if it looks like a restriction clause with a pseudoconstant on
209 * one side. (Anything more complicated than that might not behave in
210 * the simple way we are expecting.) Most of the tests here can be
211 * done more efficiently with rinfo than without.
212 */
213 if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214 {
215 OpExpr *expr = (OpExpr *) clause;
216 bool varonleft = true;
217 bool ok;
218
219 if (rinfo)
220 {
221 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
222 (is_pseudo_constant_clause_relids(lsecond(expr->args),
223 rinfo->right_relids) ||
224 (varonleft = false,
225 is_pseudo_constant_clause_relids(linitial(expr->args),
226 rinfo->left_relids)));
227 }
228 else
229 {
230 ok = (NumRelids(root, clause) == 1) &&
231 (is_pseudo_constant_clause(lsecond(expr->args)) ||
232 (varonleft = false,
233 is_pseudo_constant_clause(linitial(expr->args))));
234 }
235
236 if (ok)
237 {
238 /*
239 * If it's not a "<"/"<="/">"/">=" operator, just merge the
240 * selectivity in generically. But if it's the right oprrest,
241 * add the clause to rqlist for later processing.
242 */
243 switch (get_oprrest(expr->opno))
244 {
245 case F_SCALARLTSEL:
246 case F_SCALARLESEL:
247 addRangeClause(&rqlist, clause,
248 varonleft, true, s2);
249 break;
250 case F_SCALARGTSEL:
251 case F_SCALARGESEL:
252 addRangeClause(&rqlist, clause,
253 varonleft, false, s2);
254 break;
255 default:
256 /* Just merge the selectivity in generically */
257 s1 = s1 * s2;
258 break;
259 }
260 continue; /* drop to loop bottom */
261 }
262 }
263
264 /* Not the right form, so treat it generically. */
265 s1 = s1 * s2;
266 }
267
268 /*
269 * Now scan the rangequery pair list.
270 */
271 while (rqlist != NULL)
272 {
273 RangeQueryClause *rqnext;
274
275 if (rqlist->have_lobound && rqlist->have_hibound)
276 {
277 /* Successfully matched a pair of range clauses */
278 Selectivity s2;
279
280 /*
281 * Exact equality to the default value probably means the
282 * selectivity function punted. This is not airtight but should
283 * be good enough.
284 */
285 if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286 rqlist->lobound == DEFAULT_INEQ_SEL)
287 {
288 s2 = DEFAULT_RANGE_INEQ_SEL;
289 }
290 else
291 {
292 s2 = rqlist->hibound + rqlist->lobound - 1.0;
293
294 /* Adjust for double-exclusion of NULLs */
295 s2 += nulltestsel(root, IS_NULL, rqlist->var,
296 varRelid, jointype, sjinfo);
297
298 /*
299 * A zero or slightly negative s2 should be converted into a
300 * small positive value; we probably are dealing with a very
301 * tight range and got a bogus result due to roundoff errors.
302 * However, if s2 is very negative, then we probably have
303 * default selectivity estimates on one or both sides of the
304 * range that we failed to recognize above for some reason.
305 */
306 if (s2 <= 0.0)
307 {
308 if (s2 < -0.01)
309 {
310 /*
311 * No data available --- use a default estimate that
312 * is small, but not real small.
313 */
314 s2 = DEFAULT_RANGE_INEQ_SEL;
315 }
316 else
317 {
318 /*
319 * It's just roundoff error; use a small positive
320 * value
321 */
322 s2 = 1.0e-10;
323 }
324 }
325 }
326 /* Merge in the selectivity of the pair of clauses */
327 s1 *= s2;
328 }
329 else
330 {
331 /* Only found one of a pair, merge it in generically */
332 if (rqlist->have_lobound)
333 s1 *= rqlist->lobound;
334 else
335 s1 *= rqlist->hibound;
336 }
337 /* release storage and advance */
338 rqnext = rqlist->next;
339 pfree(rqlist);
340 rqlist = rqnext;
341 }
342
343 return s1;
344 }
345
346 /*
347 * clauselist_selectivity_or -
348 * Compute the selectivity of an implicitly-ORed list of boolean
349 * expression clauses. The list can be empty, in which case 0.0
350 * must be returned. List elements may be either RestrictInfos
351 * or bare expression clauses --- the former is preferred since
352 * it allows caching of results.
353 *
354 * See clause_selectivity() for the meaning of the additional parameters.
355 *
356 * The basic approach is to apply extended statistics first, on as many
357 * clauses as possible, in order to capture cross-column dependencies etc.
358 * The remaining clauses are then estimated as if they were independent.
359 */
360 static Selectivity
clauselist_selectivity_or(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)361 clauselist_selectivity_or(PlannerInfo *root,
362 List *clauses,
363 int varRelid,
364 JoinType jointype,
365 SpecialJoinInfo *sjinfo,
366 bool use_extended_stats)
367 {
368 Selectivity s1 = 0.0;
369 RelOptInfo *rel;
370 Bitmapset *estimatedclauses = NULL;
371 ListCell *lc;
372 int listidx;
373
374 /*
375 * Determine if these clauses reference a single relation. If so, and if
376 * it has extended statistics, try to apply those.
377 */
378 rel = find_single_rel_for_clauses(root, clauses);
379 if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380 {
381 /*
382 * Estimate as many clauses as possible using extended statistics.
383 *
384 * 'estimatedclauses' is populated with the 0-based list position
385 * index of clauses estimated here, and that should be ignored below.
386 */
387 s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388 jointype, sjinfo, rel,
389 &estimatedclauses, true);
390 }
391
392 /*
393 * Estimate the remaining clauses as if they were independent.
394 *
395 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396 * for the probable overlap of selected tuple sets.
397 *
398 * XXX is this too conservative?
399 */
400 listidx = -1;
401 foreach(lc, clauses)
402 {
403 Selectivity s2;
404
405 listidx++;
406
407 /*
408 * Skip this clause if it's already been estimated by some other
409 * statistics above.
410 */
411 if (bms_is_member(listidx, estimatedclauses))
412 continue;
413
414 s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415 jointype, sjinfo, use_extended_stats);
416
417 s1 = s1 + s2 - s1 * s2;
418 }
419
420 return s1;
421 }
422
423 /*
424 * addRangeClause --- add a new range clause for clauselist_selectivity
425 *
426 * Here is where we try to match up pairs of range-query clauses
427 */
428 static void
addRangeClause(RangeQueryClause ** rqlist,Node * clause,bool varonleft,bool isLTsel,Selectivity s2)429 addRangeClause(RangeQueryClause **rqlist, Node *clause,
430 bool varonleft, bool isLTsel, Selectivity s2)
431 {
432 RangeQueryClause *rqelem;
433 Node *var;
434 bool is_lobound;
435
436 if (varonleft)
437 {
438 var = get_leftop((Expr *) clause);
439 is_lobound = !isLTsel; /* x < something is high bound */
440 }
441 else
442 {
443 var = get_rightop((Expr *) clause);
444 is_lobound = isLTsel; /* something < x is low bound */
445 }
446
447 for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448 {
449 /*
450 * We use full equal() here because the "var" might be a function of
451 * one or more attributes of the same relation...
452 */
453 if (!equal(var, rqelem->var))
454 continue;
455 /* Found the right group to put this clause in */
456 if (is_lobound)
457 {
458 if (!rqelem->have_lobound)
459 {
460 rqelem->have_lobound = true;
461 rqelem->lobound = s2;
462 }
463 else
464 {
465
466 /*------
467 * We have found two similar clauses, such as
468 * x < y AND x <= z.
469 * Keep only the more restrictive one.
470 *------
471 */
472 if (rqelem->lobound > s2)
473 rqelem->lobound = s2;
474 }
475 }
476 else
477 {
478 if (!rqelem->have_hibound)
479 {
480 rqelem->have_hibound = true;
481 rqelem->hibound = s2;
482 }
483 else
484 {
485
486 /*------
487 * We have found two similar clauses, such as
488 * x > y AND x >= z.
489 * Keep only the more restrictive one.
490 *------
491 */
492 if (rqelem->hibound > s2)
493 rqelem->hibound = s2;
494 }
495 }
496 return;
497 }
498
499 /* No matching var found, so make a new clause-pair data structure */
500 rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501 rqelem->var = var;
502 if (is_lobound)
503 {
504 rqelem->have_lobound = true;
505 rqelem->have_hibound = false;
506 rqelem->lobound = s2;
507 }
508 else
509 {
510 rqelem->have_lobound = false;
511 rqelem->have_hibound = true;
512 rqelem->hibound = s2;
513 }
514 rqelem->next = *rqlist;
515 *rqlist = rqelem;
516 }
517
518 /*
519 * find_single_rel_for_clauses
520 * Examine each clause in 'clauses' and determine if all clauses
521 * reference only a single relation. If so return that relation,
522 * otherwise return NULL.
523 */
524 static RelOptInfo *
find_single_rel_for_clauses(PlannerInfo * root,List * clauses)525 find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
526 {
527 int lastrelid = 0;
528 ListCell *l;
529
530 foreach(l, clauses)
531 {
532 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533 int relid;
534
535 /*
536 * If we have a list of bare clauses rather than RestrictInfos, we
537 * could pull out their relids the hard way with pull_varnos().
538 * However, currently the extended-stats machinery won't do anything
539 * with non-RestrictInfo clauses anyway, so there's no point in
540 * spending extra cycles; just fail if that's what we have.
541 *
542 * An exception to that rule is if we have a bare BoolExpr AND clause.
543 * We treat this as a special case because the restrictinfo machinery
544 * doesn't build RestrictInfos on top of AND clauses.
545 */
546 if (is_andclause(rinfo))
547 {
548 RelOptInfo *rel;
549
550 rel = find_single_rel_for_clauses(root,
551 ((BoolExpr *) rinfo)->args);
552
553 if (rel == NULL)
554 return NULL;
555 if (lastrelid == 0)
556 lastrelid = rel->relid;
557 else if (rel->relid != lastrelid)
558 return NULL;
559
560 continue;
561 }
562
563 if (!IsA(rinfo, RestrictInfo))
564 return NULL;
565
566 if (bms_is_empty(rinfo->clause_relids))
567 continue; /* we can ignore variable-free clauses */
568 if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569 return NULL; /* multiple relations in this clause */
570 if (lastrelid == 0)
571 lastrelid = relid; /* first clause referencing a relation */
572 else if (relid != lastrelid)
573 return NULL; /* relation not same as last one */
574 }
575
576 if (lastrelid != 0)
577 return find_base_rel(root, lastrelid);
578
579 return NULL; /* no clauses */
580 }
581
582 /*
583 * bms_is_subset_singleton
584 *
585 * Same result as bms_is_subset(s, bms_make_singleton(x)),
586 * but a little faster and doesn't leak memory.
587 *
588 * Is this of use anywhere else? If so move to bitmapset.c ...
589 */
590 static bool
bms_is_subset_singleton(const Bitmapset * s,int x)591 bms_is_subset_singleton(const Bitmapset *s, int x)
592 {
593 switch (bms_membership(s))
594 {
595 case BMS_EMPTY_SET:
596 return true;
597 case BMS_SINGLETON:
598 return bms_is_member(x, s);
599 case BMS_MULTIPLE:
600 return false;
601 }
602 /* can't get here... */
603 return false;
604 }
605
606 /*
607 * treat_as_join_clause -
608 * Decide whether an operator clause is to be handled by the
609 * restriction or join estimator. Subroutine for clause_selectivity().
610 */
611 static inline bool
treat_as_join_clause(PlannerInfo * root,Node * clause,RestrictInfo * rinfo,int varRelid,SpecialJoinInfo * sjinfo)612 treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
613 int varRelid, SpecialJoinInfo *sjinfo)
614 {
615 if (varRelid != 0)
616 {
617 /*
618 * Caller is forcing restriction mode (eg, because we are examining an
619 * inner indexscan qual).
620 */
621 return false;
622 }
623 else if (sjinfo == NULL)
624 {
625 /*
626 * It must be a restriction clause, since it's being evaluated at a
627 * scan node.
628 */
629 return false;
630 }
631 else
632 {
633 /*
634 * Otherwise, it's a join if there's more than one relation used. We
635 * can optimize this calculation if an rinfo was passed.
636 *
637 * XXX Since we know the clause is being evaluated at a join, the
638 * only way it could be single-relation is if it was delayed by outer
639 * joins. Although we can make use of the restriction qual estimators
640 * anyway, it seems likely that we ought to account for the
641 * probability of injected nulls somehow.
642 */
643 if (rinfo)
644 return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
645 else
646 return (NumRelids(root, clause) > 1);
647 }
648 }
649
650
651 /*
652 * clause_selectivity -
653 * Compute the selectivity of a general boolean expression clause.
654 *
655 * The clause can be either a RestrictInfo or a plain expression. If it's
656 * a RestrictInfo, we try to cache the selectivity for possible re-use,
657 * so passing RestrictInfos is preferred.
658 *
659 * varRelid is either 0 or a rangetable index.
660 *
661 * When varRelid is not 0, only variables belonging to that relation are
662 * considered in computing selectivity; other vars are treated as constants
663 * of unknown values. This is appropriate for estimating the selectivity of
664 * a join clause that is being used as a restriction clause in a scan of a
665 * nestloop join's inner relation --- varRelid should then be the ID of the
666 * inner relation.
667 *
668 * When varRelid is 0, all variables are treated as variables. This
669 * is appropriate for ordinary join clauses and restriction clauses.
670 *
671 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
672 * if the clause isn't a join clause.
673 *
674 * sjinfo is NULL for a non-join clause, otherwise it provides additional
675 * context information about the join being performed. There are some
676 * special cases:
677 * 1. For a special (not INNER) join, sjinfo is always a member of
678 * root->join_info_list.
679 * 2. For an INNER join, sjinfo is just a transient struct, and only the
680 * relids and jointype fields in it can be trusted.
681 * It is possible for jointype to be different from sjinfo->jointype.
682 * This indicates we are considering a variant join: either with
683 * the LHS and RHS switched, or with one input unique-ified.
684 *
685 * Note: when passing nonzero varRelid, it's normally appropriate to set
686 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
687 * join clause; because we aren't treating it as a join clause.
688 */
689 Selectivity
clause_selectivity(PlannerInfo * root,Node * clause,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)690 clause_selectivity(PlannerInfo *root,
691 Node *clause,
692 int varRelid,
693 JoinType jointype,
694 SpecialJoinInfo *sjinfo)
695 {
696 return clause_selectivity_ext(root, clause, varRelid,
697 jointype, sjinfo, true);
698 }
699
700 /*
701 * clause_selectivity_ext -
702 * Extended version of clause_selectivity(). If "use_extended_stats" is
703 * false, all extended statistics will be ignored, and only per-column
704 * statistics will be used.
705 */
706 Selectivity
clause_selectivity_ext(PlannerInfo * root,Node * clause,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,bool use_extended_stats)707 clause_selectivity_ext(PlannerInfo *root,
708 Node *clause,
709 int varRelid,
710 JoinType jointype,
711 SpecialJoinInfo *sjinfo,
712 bool use_extended_stats)
713 {
714 Selectivity s1 = 0.5; /* default for any unhandled clause type */
715 RestrictInfo *rinfo = NULL;
716 bool cacheable = false;
717
718 if (clause == NULL) /* can this still happen? */
719 return s1;
720
721 if (IsA(clause, RestrictInfo))
722 {
723 rinfo = (RestrictInfo *) clause;
724
725 /*
726 * If the clause is marked pseudoconstant, then it will be used as a
727 * gating qual and should not affect selectivity estimates; hence
728 * return 1.0. The only exception is that a constant FALSE may be
729 * taken as having selectivity 0.0, since it will surely mean no rows
730 * out of the plan. This case is simple enough that we need not
731 * bother caching the result.
732 */
733 if (rinfo->pseudoconstant)
734 {
735 if (!IsA(rinfo->clause, Const))
736 return (Selectivity) 1.0;
737 }
738
739 /*
740 * If the clause is marked redundant, always return 1.0.
741 */
742 if (rinfo->norm_selec > 1)
743 return (Selectivity) 1.0;
744
745 /*
746 * If possible, cache the result of the selectivity calculation for
747 * the clause. We can cache if varRelid is zero or the clause
748 * contains only vars of that relid --- otherwise varRelid will affect
749 * the result, so mustn't cache. Outer join quals might be examined
750 * with either their join's actual jointype or JOIN_INNER, so we need
751 * two cache variables to remember both cases. Note: we assume the
752 * result won't change if we are switching the input relations or
753 * considering a unique-ified case, so we only need one cache variable
754 * for all non-JOIN_INNER cases.
755 */
756 if (varRelid == 0 ||
757 bms_is_subset_singleton(rinfo->clause_relids, varRelid))
758 {
759 /* Cacheable --- do we already have the result? */
760 if (jointype == JOIN_INNER)
761 {
762 if (rinfo->norm_selec >= 0)
763 return rinfo->norm_selec;
764 }
765 else
766 {
767 if (rinfo->outer_selec >= 0)
768 return rinfo->outer_selec;
769 }
770 cacheable = true;
771 }
772
773 /*
774 * Proceed with examination of contained clause. If the clause is an
775 * OR-clause, we want to look at the variant with sub-RestrictInfos,
776 * so that per-subclause selectivities can be cached.
777 */
778 if (rinfo->orclause)
779 clause = (Node *) rinfo->orclause;
780 else
781 clause = (Node *) rinfo->clause;
782 }
783
784 if (IsA(clause, Var))
785 {
786 Var *var = (Var *) clause;
787
788 /*
789 * We probably shouldn't ever see an uplevel Var here, but if we do,
790 * return the default selectivity...
791 */
792 if (var->varlevelsup == 0 &&
793 (varRelid == 0 || varRelid == (int) var->varno))
794 {
795 /* Use the restriction selectivity function for a bool Var */
796 s1 = boolvarsel(root, (Node *) var, varRelid);
797 }
798 }
799 else if (IsA(clause, Const))
800 {
801 /* bool constant is pretty easy... */
802 Const *con = (Const *) clause;
803
804 s1 = con->constisnull ? 0.0 :
805 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
806 }
807 else if (IsA(clause, Param))
808 {
809 /* see if we can replace the Param */
810 Node *subst = estimate_expression_value(root, clause);
811
812 if (IsA(subst, Const))
813 {
814 /* bool constant is pretty easy... */
815 Const *con = (Const *) subst;
816
817 s1 = con->constisnull ? 0.0 :
818 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
819 }
820 else
821 {
822 /* XXX any way to do better than default? */
823 }
824 }
825 else if (is_notclause(clause))
826 {
827 /* inverse of the selectivity of the underlying clause */
828 s1 = 1.0 - clause_selectivity_ext(root,
829 (Node *) get_notclausearg((Expr *) clause),
830 varRelid,
831 jointype,
832 sjinfo,
833 use_extended_stats);
834 }
835 else if (is_andclause(clause))
836 {
837 /* share code with clauselist_selectivity() */
838 s1 = clauselist_selectivity_ext(root,
839 ((BoolExpr *) clause)->args,
840 varRelid,
841 jointype,
842 sjinfo,
843 use_extended_stats);
844 }
845 else if (is_orclause(clause))
846 {
847 /*
848 * Almost the same thing as clauselist_selectivity, but with the
849 * clauses connected by OR.
850 */
851 s1 = clauselist_selectivity_or(root,
852 ((BoolExpr *) clause)->args,
853 varRelid,
854 jointype,
855 sjinfo,
856 use_extended_stats);
857 }
858 else if (is_opclause(clause) || IsA(clause, DistinctExpr))
859 {
860 OpExpr *opclause = (OpExpr *) clause;
861 Oid opno = opclause->opno;
862
863 if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
864 {
865 /* Estimate selectivity for a join clause. */
866 s1 = join_selectivity(root, opno,
867 opclause->args,
868 opclause->inputcollid,
869 jointype,
870 sjinfo);
871 }
872 else
873 {
874 /* Estimate selectivity for a restriction clause. */
875 s1 = restriction_selectivity(root, opno,
876 opclause->args,
877 opclause->inputcollid,
878 varRelid);
879 }
880
881 /*
882 * DistinctExpr has the same representation as OpExpr, but the
883 * contained operator is "=" not "<>", so we must negate the result.
884 * This estimation method doesn't give the right behavior for nulls,
885 * but it's better than doing nothing.
886 */
887 if (IsA(clause, DistinctExpr))
888 s1 = 1.0 - s1;
889 }
890 else if (is_funcclause(clause))
891 {
892 FuncExpr *funcclause = (FuncExpr *) clause;
893
894 /* Try to get an estimate from the support function, if any */
895 s1 = function_selectivity(root,
896 funcclause->funcid,
897 funcclause->args,
898 funcclause->inputcollid,
899 treat_as_join_clause(root, clause, rinfo,
900 varRelid, sjinfo),
901 varRelid,
902 jointype,
903 sjinfo);
904 }
905 else if (IsA(clause, ScalarArrayOpExpr))
906 {
907 /* Use node specific selectivity calculation function */
908 s1 = scalararraysel(root,
909 (ScalarArrayOpExpr *) clause,
910 treat_as_join_clause(root, clause, rinfo,
911 varRelid, sjinfo),
912 varRelid,
913 jointype,
914 sjinfo);
915 }
916 else if (IsA(clause, RowCompareExpr))
917 {
918 /* Use node specific selectivity calculation function */
919 s1 = rowcomparesel(root,
920 (RowCompareExpr *) clause,
921 varRelid,
922 jointype,
923 sjinfo);
924 }
925 else if (IsA(clause, NullTest))
926 {
927 /* Use node specific selectivity calculation function */
928 s1 = nulltestsel(root,
929 ((NullTest *) clause)->nulltesttype,
930 (Node *) ((NullTest *) clause)->arg,
931 varRelid,
932 jointype,
933 sjinfo);
934 }
935 else if (IsA(clause, BooleanTest))
936 {
937 /* Use node specific selectivity calculation function */
938 s1 = booltestsel(root,
939 ((BooleanTest *) clause)->booltesttype,
940 (Node *) ((BooleanTest *) clause)->arg,
941 varRelid,
942 jointype,
943 sjinfo);
944 }
945 else if (IsA(clause, CurrentOfExpr))
946 {
947 /* CURRENT OF selects at most one row of its table */
948 CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
949 RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
950
951 if (crel->tuples > 0)
952 s1 = 1.0 / crel->tuples;
953 }
954 else if (IsA(clause, RelabelType))
955 {
956 /* Not sure this case is needed, but it can't hurt */
957 s1 = clause_selectivity_ext(root,
958 (Node *) ((RelabelType *) clause)->arg,
959 varRelid,
960 jointype,
961 sjinfo,
962 use_extended_stats);
963 }
964 else if (IsA(clause, CoerceToDomain))
965 {
966 /* Not sure this case is needed, but it can't hurt */
967 s1 = clause_selectivity_ext(root,
968 (Node *) ((CoerceToDomain *) clause)->arg,
969 varRelid,
970 jointype,
971 sjinfo,
972 use_extended_stats);
973 }
974 else
975 {
976 /*
977 * For anything else, see if we can consider it as a boolean variable.
978 * This only works if it's an immutable expression in Vars of a single
979 * relation; but there's no point in us checking that here because
980 * boolvarsel() will do it internally, and return a suitable default
981 * selectivity if not.
982 */
983 s1 = boolvarsel(root, clause, varRelid);
984 }
985
986 /* Cache the result if possible */
987 if (cacheable)
988 {
989 if (jointype == JOIN_INNER)
990 rinfo->norm_selec = s1;
991 else
992 rinfo->outer_selec = s1;
993 }
994
995 #ifdef SELECTIVITY_DEBUG
996 elog(DEBUG4, "clause_selectivity: s1 %f", s1);
997 #endif /* SELECTIVITY_DEBUG */
998
999 return s1;
1000 }
1001