1 /*-------------------------------------------------------------------------
2 *
3 * clausesel.c
4 * Routines to compute clause selectivities
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/clausesel.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "nodes/makefuncs.h"
18 #include "optimizer/clauses.h"
19 #include "optimizer/cost.h"
20 #include "optimizer/pathnode.h"
21 #include "optimizer/plancat.h"
22 #include "utils/fmgroids.h"
23 #include "utils/lsyscache.h"
24 #include "utils/selfuncs.h"
25 #include "statistics/statistics.h"
26
27
28 /*
29 * Data structure for accumulating info about possible range-query
30 * clause pairs in clauselist_selectivity.
31 */
32 typedef struct RangeQueryClause
33 {
34 struct RangeQueryClause *next; /* next in linked list */
35 Node *var; /* The common variable of the clauses */
36 bool have_lobound; /* found a low-bound clause yet? */
37 bool have_hibound; /* found a high-bound clause yet? */
38 Selectivity lobound; /* Selectivity of a var > something clause */
39 Selectivity hibound; /* Selectivity of a var < something clause */
40 } RangeQueryClause;
41
42 static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
43 bool varonleft, bool isLTsel, Selectivity s2);
44 static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
45 List *clauses);
46
47 /****************************************************************************
48 * ROUTINES TO COMPUTE SELECTIVITIES
49 ****************************************************************************/
50
51 /*
52 * clauselist_selectivity -
53 * Compute the selectivity of an implicitly-ANDed list of boolean
54 * expression clauses. The list can be empty, in which case 1.0
55 * must be returned. List elements may be either RestrictInfos
56 * or bare expression clauses --- the former is preferred since
57 * it allows caching of results.
58 *
59 * See clause_selectivity() for the meaning of the additional parameters.
60 *
61 * Our basic approach is to take the product of the selectivities of the
62 * subclauses. However, that's only right if the subclauses have independent
63 * probabilities, and in reality they are often NOT independent. So,
64 * we want to be smarter where we can.
65 *
66 * If the clauses taken together refer to just one relation, we'll try to
67 * apply selectivity estimates using any extended statistics for that rel.
68 * Currently we only have (soft) functional dependencies, so apply these in as
69 * many cases as possible, and fall back on normal estimates for remaining
70 * clauses.
71 *
72 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
73 * are recognized as possible range query components if they are restriction
74 * opclauses whose operators have scalarltsel() or scalargtsel() as their
75 * restriction selectivity estimator. We pair up clauses of this form that
76 * refer to the same variable. An unpairable clause of this kind is simply
77 * multiplied into the selectivity product in the normal way. But when we
78 * find a pair, we know that the selectivities represent the relative
79 * positions of the low and high bounds within the column's range, so instead
80 * of figuring the selectivity as hisel * losel, we can figure it as hisel +
81 * losel - 1. (To visualize this, see that hisel is the fraction of the range
82 * below the high bound, while losel is the fraction above the low bound; so
83 * hisel can be interpreted directly as a 0..1 value but we need to convert
84 * losel to 1-losel before interpreting it as a value. Then the available
85 * range is 1-losel to hisel. However, this calculation double-excludes
86 * nulls, so really we need hisel + losel + null_frac - 1.)
87 *
88 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
89 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
90 * yields an impossible (negative) result.
91 *
92 * A free side-effect is that we can recognize redundant inequalities such
93 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
94 *
95 * Of course this is all very dependent on the behavior of
96 * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
97 */
98 Selectivity
clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)99 clauselist_selectivity(PlannerInfo *root,
100 List *clauses,
101 int varRelid,
102 JoinType jointype,
103 SpecialJoinInfo *sjinfo)
104 {
105 Selectivity s1 = 1.0;
106 RelOptInfo *rel;
107 Bitmapset *estimatedclauses = NULL;
108 RangeQueryClause *rqlist = NULL;
109 ListCell *l;
110 int listidx;
111
112 /*
113 * If there's exactly one clause, just go directly to
114 * clause_selectivity(). None of what we might do below is relevant.
115 */
116 if (list_length(clauses) == 1)
117 return clause_selectivity(root, (Node *) linitial(clauses),
118 varRelid, jointype, sjinfo);
119
120 /*
121 * Determine if these clauses reference a single relation. If so, and if
122 * it has extended statistics, try to apply those.
123 */
124 rel = find_single_rel_for_clauses(root, clauses);
125 if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
126 {
127 /*
128 * Perform selectivity estimations on any clauses found applicable by
129 * dependencies_clauselist_selectivity. 'estimatedclauses' will be
130 * filled with the 0-based list positions of clauses used that way, so
131 * that we can ignore them below.
132 */
133 s1 *= dependencies_clauselist_selectivity(root, clauses, varRelid,
134 jointype, sjinfo, rel,
135 &estimatedclauses);
136
137 /*
138 * This would be the place to apply any other types of extended
139 * statistics selectivity estimations for remaining clauses.
140 */
141 }
142
143 /*
144 * Apply normal selectivity estimates for remaining clauses. We'll be
145 * careful to skip any clauses which were already estimated above.
146 *
147 * Anything that doesn't look like a potential rangequery clause gets
148 * multiplied into s1 and forgotten. Anything that does gets inserted into
149 * an rqlist entry.
150 */
151 listidx = -1;
152 foreach(l, clauses)
153 {
154 Node *clause = (Node *) lfirst(l);
155 RestrictInfo *rinfo;
156 Selectivity s2;
157
158 listidx++;
159
160 /*
161 * Skip this clause if it's already been estimated by some other
162 * statistics above.
163 */
164 if (bms_is_member(listidx, estimatedclauses))
165 continue;
166
167 /* Always compute the selectivity using clause_selectivity */
168 s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
169
170 /*
171 * Check for being passed a RestrictInfo.
172 *
173 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
174 * 0.0; just use that rather than looking for range pairs.
175 */
176 if (IsA(clause, RestrictInfo))
177 {
178 rinfo = (RestrictInfo *) clause;
179 if (rinfo->pseudoconstant)
180 {
181 s1 = s1 * s2;
182 continue;
183 }
184 clause = (Node *) rinfo->clause;
185 }
186 else
187 rinfo = NULL;
188
189 /*
190 * See if it looks like a restriction clause with a pseudoconstant on
191 * one side. (Anything more complicated than that might not behave in
192 * the simple way we are expecting.) Most of the tests here can be
193 * done more efficiently with rinfo than without.
194 */
195 if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
196 {
197 OpExpr *expr = (OpExpr *) clause;
198 bool varonleft = true;
199 bool ok;
200
201 if (rinfo)
202 {
203 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
204 (is_pseudo_constant_clause_relids(lsecond(expr->args),
205 rinfo->right_relids) ||
206 (varonleft = false,
207 is_pseudo_constant_clause_relids(linitial(expr->args),
208 rinfo->left_relids)));
209 }
210 else
211 {
212 ok = (NumRelids(clause) == 1) &&
213 (is_pseudo_constant_clause(lsecond(expr->args)) ||
214 (varonleft = false,
215 is_pseudo_constant_clause(linitial(expr->args))));
216 }
217
218 if (ok)
219 {
220 /*
221 * If it's not a "<" or ">" operator, just merge the
222 * selectivity in generically. But if it's the right oprrest,
223 * add the clause to rqlist for later processing.
224 */
225 switch (get_oprrest(expr->opno))
226 {
227 case F_SCALARLTSEL:
228 addRangeClause(&rqlist, clause,
229 varonleft, true, s2);
230 break;
231 case F_SCALARGTSEL:
232 addRangeClause(&rqlist, clause,
233 varonleft, false, s2);
234 break;
235 default:
236 /* Just merge the selectivity in generically */
237 s1 = s1 * s2;
238 break;
239 }
240 continue; /* drop to loop bottom */
241 }
242 }
243
244 /* Not the right form, so treat it generically. */
245 s1 = s1 * s2;
246 }
247
248 /*
249 * Now scan the rangequery pair list.
250 */
251 while (rqlist != NULL)
252 {
253 RangeQueryClause *rqnext;
254
255 if (rqlist->have_lobound && rqlist->have_hibound)
256 {
257 /* Successfully matched a pair of range clauses */
258 Selectivity s2;
259
260 /*
261 * Exact equality to the default value probably means the
262 * selectivity function punted. This is not airtight but should
263 * be good enough.
264 */
265 if (rqlist->hibound == DEFAULT_INEQ_SEL ||
266 rqlist->lobound == DEFAULT_INEQ_SEL)
267 {
268 s2 = DEFAULT_RANGE_INEQ_SEL;
269 }
270 else
271 {
272 s2 = rqlist->hibound + rqlist->lobound - 1.0;
273
274 /* Adjust for double-exclusion of NULLs */
275 s2 += nulltestsel(root, IS_NULL, rqlist->var,
276 varRelid, jointype, sjinfo);
277
278 /*
279 * A zero or slightly negative s2 should be converted into a
280 * small positive value; we probably are dealing with a very
281 * tight range and got a bogus result due to roundoff errors.
282 * However, if s2 is very negative, then we probably have
283 * default selectivity estimates on one or both sides of the
284 * range that we failed to recognize above for some reason.
285 */
286 if (s2 <= 0.0)
287 {
288 if (s2 < -0.01)
289 {
290 /*
291 * No data available --- use a default estimate that
292 * is small, but not real small.
293 */
294 s2 = DEFAULT_RANGE_INEQ_SEL;
295 }
296 else
297 {
298 /*
299 * It's just roundoff error; use a small positive
300 * value
301 */
302 s2 = 1.0e-10;
303 }
304 }
305 }
306 /* Merge in the selectivity of the pair of clauses */
307 s1 *= s2;
308 }
309 else
310 {
311 /* Only found one of a pair, merge it in generically */
312 if (rqlist->have_lobound)
313 s1 *= rqlist->lobound;
314 else
315 s1 *= rqlist->hibound;
316 }
317 /* release storage and advance */
318 rqnext = rqlist->next;
319 pfree(rqlist);
320 rqlist = rqnext;
321 }
322
323 return s1;
324 }
325
326 /*
327 * addRangeClause --- add a new range clause for clauselist_selectivity
328 *
329 * Here is where we try to match up pairs of range-query clauses
330 */
331 static void
addRangeClause(RangeQueryClause ** rqlist,Node * clause,bool varonleft,bool isLTsel,Selectivity s2)332 addRangeClause(RangeQueryClause **rqlist, Node *clause,
333 bool varonleft, bool isLTsel, Selectivity s2)
334 {
335 RangeQueryClause *rqelem;
336 Node *var;
337 bool is_lobound;
338
339 if (varonleft)
340 {
341 var = get_leftop((Expr *) clause);
342 is_lobound = !isLTsel; /* x < something is high bound */
343 }
344 else
345 {
346 var = get_rightop((Expr *) clause);
347 is_lobound = isLTsel; /* something < x is low bound */
348 }
349
350 for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
351 {
352 /*
353 * We use full equal() here because the "var" might be a function of
354 * one or more attributes of the same relation...
355 */
356 if (!equal(var, rqelem->var))
357 continue;
358 /* Found the right group to put this clause in */
359 if (is_lobound)
360 {
361 if (!rqelem->have_lobound)
362 {
363 rqelem->have_lobound = true;
364 rqelem->lobound = s2;
365 }
366 else
367 {
368
369 /*------
370 * We have found two similar clauses, such as
371 * x < y AND x < z.
372 * Keep only the more restrictive one.
373 *------
374 */
375 if (rqelem->lobound > s2)
376 rqelem->lobound = s2;
377 }
378 }
379 else
380 {
381 if (!rqelem->have_hibound)
382 {
383 rqelem->have_hibound = true;
384 rqelem->hibound = s2;
385 }
386 else
387 {
388
389 /*------
390 * We have found two similar clauses, such as
391 * x > y AND x > z.
392 * Keep only the more restrictive one.
393 *------
394 */
395 if (rqelem->hibound > s2)
396 rqelem->hibound = s2;
397 }
398 }
399 return;
400 }
401
402 /* No matching var found, so make a new clause-pair data structure */
403 rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
404 rqelem->var = var;
405 if (is_lobound)
406 {
407 rqelem->have_lobound = true;
408 rqelem->have_hibound = false;
409 rqelem->lobound = s2;
410 }
411 else
412 {
413 rqelem->have_lobound = false;
414 rqelem->have_hibound = true;
415 rqelem->hibound = s2;
416 }
417 rqelem->next = *rqlist;
418 *rqlist = rqelem;
419 }
420
421 /*
422 * find_single_rel_for_clauses
423 * Examine each clause in 'clauses' and determine if all clauses
424 * reference only a single relation. If so return that relation,
425 * otherwise return NULL.
426 */
427 static RelOptInfo *
find_single_rel_for_clauses(PlannerInfo * root,List * clauses)428 find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
429 {
430 int lastrelid = 0;
431 ListCell *l;
432
433 foreach(l, clauses)
434 {
435 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
436 int relid;
437
438 /*
439 * If we have a list of bare clauses rather than RestrictInfos, we
440 * could pull out their relids the hard way with pull_varnos().
441 * However, currently the extended-stats machinery won't do anything
442 * with non-RestrictInfo clauses anyway, so there's no point in
443 * spending extra cycles; just fail if that's what we have.
444 */
445 if (!IsA(rinfo, RestrictInfo))
446 return NULL;
447
448 if (bms_is_empty(rinfo->clause_relids))
449 continue; /* we can ignore variable-free clauses */
450 if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
451 return NULL; /* multiple relations in this clause */
452 if (lastrelid == 0)
453 lastrelid = relid; /* first clause referencing a relation */
454 else if (relid != lastrelid)
455 return NULL; /* relation not same as last one */
456 }
457
458 if (lastrelid != 0)
459 return find_base_rel(root, lastrelid);
460
461 return NULL; /* no clauses */
462 }
463
464 /*
465 * bms_is_subset_singleton
466 *
467 * Same result as bms_is_subset(s, bms_make_singleton(x)),
468 * but a little faster and doesn't leak memory.
469 *
470 * Is this of use anywhere else? If so move to bitmapset.c ...
471 */
472 static bool
bms_is_subset_singleton(const Bitmapset * s,int x)473 bms_is_subset_singleton(const Bitmapset *s, int x)
474 {
475 switch (bms_membership(s))
476 {
477 case BMS_EMPTY_SET:
478 return true;
479 case BMS_SINGLETON:
480 return bms_is_member(x, s);
481 case BMS_MULTIPLE:
482 return false;
483 }
484 /* can't get here... */
485 return false;
486 }
487
488 /*
489 * treat_as_join_clause -
490 * Decide whether an operator clause is to be handled by the
491 * restriction or join estimator. Subroutine for clause_selectivity().
492 */
493 static inline bool
treat_as_join_clause(Node * clause,RestrictInfo * rinfo,int varRelid,SpecialJoinInfo * sjinfo)494 treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
495 int varRelid, SpecialJoinInfo *sjinfo)
496 {
497 if (varRelid != 0)
498 {
499 /*
500 * Caller is forcing restriction mode (eg, because we are examining an
501 * inner indexscan qual).
502 */
503 return false;
504 }
505 else if (sjinfo == NULL)
506 {
507 /*
508 * It must be a restriction clause, since it's being evaluated at a
509 * scan node.
510 */
511 return false;
512 }
513 else
514 {
515 /*
516 * Otherwise, it's a join if there's more than one relation used. We
517 * can optimize this calculation if an rinfo was passed.
518 *
519 * XXX Since we know the clause is being evaluated at a join, the
520 * only way it could be single-relation is if it was delayed by outer
521 * joins. Although we can make use of the restriction qual estimators
522 * anyway, it seems likely that we ought to account for the
523 * probability of injected nulls somehow.
524 */
525 if (rinfo)
526 return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
527 else
528 return (NumRelids(clause) > 1);
529 }
530 }
531
532
533 /*
534 * clause_selectivity -
535 * Compute the selectivity of a general boolean expression clause.
536 *
537 * The clause can be either a RestrictInfo or a plain expression. If it's
538 * a RestrictInfo, we try to cache the selectivity for possible re-use,
539 * so passing RestrictInfos is preferred.
540 *
541 * varRelid is either 0 or a rangetable index.
542 *
543 * When varRelid is not 0, only variables belonging to that relation are
544 * considered in computing selectivity; other vars are treated as constants
545 * of unknown values. This is appropriate for estimating the selectivity of
546 * a join clause that is being used as a restriction clause in a scan of a
547 * nestloop join's inner relation --- varRelid should then be the ID of the
548 * inner relation.
549 *
550 * When varRelid is 0, all variables are treated as variables. This
551 * is appropriate for ordinary join clauses and restriction clauses.
552 *
553 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
554 * if the clause isn't a join clause.
555 *
556 * sjinfo is NULL for a non-join clause, otherwise it provides additional
557 * context information about the join being performed. There are some
558 * special cases:
559 * 1. For a special (not INNER) join, sjinfo is always a member of
560 * root->join_info_list.
561 * 2. For an INNER join, sjinfo is just a transient struct, and only the
562 * relids and jointype fields in it can be trusted.
563 * It is possible for jointype to be different from sjinfo->jointype.
564 * This indicates we are considering a variant join: either with
565 * the LHS and RHS switched, or with one input unique-ified.
566 *
567 * Note: when passing nonzero varRelid, it's normally appropriate to set
568 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
569 * join clause; because we aren't treating it as a join clause.
570 */
571 Selectivity
clause_selectivity(PlannerInfo * root,Node * clause,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo)572 clause_selectivity(PlannerInfo *root,
573 Node *clause,
574 int varRelid,
575 JoinType jointype,
576 SpecialJoinInfo *sjinfo)
577 {
578 Selectivity s1 = 0.5; /* default for any unhandled clause type */
579 RestrictInfo *rinfo = NULL;
580 bool cacheable = false;
581
582 if (clause == NULL) /* can this still happen? */
583 return s1;
584
585 if (IsA(clause, RestrictInfo))
586 {
587 rinfo = (RestrictInfo *) clause;
588
589 /*
590 * If the clause is marked pseudoconstant, then it will be used as a
591 * gating qual and should not affect selectivity estimates; hence
592 * return 1.0. The only exception is that a constant FALSE may be
593 * taken as having selectivity 0.0, since it will surely mean no rows
594 * out of the plan. This case is simple enough that we need not
595 * bother caching the result.
596 */
597 if (rinfo->pseudoconstant)
598 {
599 if (!IsA(rinfo->clause, Const))
600 return (Selectivity) 1.0;
601 }
602
603 /*
604 * If the clause is marked redundant, always return 1.0.
605 */
606 if (rinfo->norm_selec > 1)
607 return (Selectivity) 1.0;
608
609 /*
610 * If possible, cache the result of the selectivity calculation for
611 * the clause. We can cache if varRelid is zero or the clause
612 * contains only vars of that relid --- otherwise varRelid will affect
613 * the result, so mustn't cache. Outer join quals might be examined
614 * with either their join's actual jointype or JOIN_INNER, so we need
615 * two cache variables to remember both cases. Note: we assume the
616 * result won't change if we are switching the input relations or
617 * considering a unique-ified case, so we only need one cache variable
618 * for all non-JOIN_INNER cases.
619 */
620 if (varRelid == 0 ||
621 bms_is_subset_singleton(rinfo->clause_relids, varRelid))
622 {
623 /* Cacheable --- do we already have the result? */
624 if (jointype == JOIN_INNER)
625 {
626 if (rinfo->norm_selec >= 0)
627 return rinfo->norm_selec;
628 }
629 else
630 {
631 if (rinfo->outer_selec >= 0)
632 return rinfo->outer_selec;
633 }
634 cacheable = true;
635 }
636
637 /*
638 * Proceed with examination of contained clause. If the clause is an
639 * OR-clause, we want to look at the variant with sub-RestrictInfos,
640 * so that per-subclause selectivities can be cached.
641 */
642 if (rinfo->orclause)
643 clause = (Node *) rinfo->orclause;
644 else
645 clause = (Node *) rinfo->clause;
646 }
647
648 if (IsA(clause, Var))
649 {
650 Var *var = (Var *) clause;
651
652 /*
653 * We probably shouldn't ever see an uplevel Var here, but if we do,
654 * return the default selectivity...
655 */
656 if (var->varlevelsup == 0 &&
657 (varRelid == 0 || varRelid == (int) var->varno))
658 {
659 /* Use the restriction selectivity function for a bool Var */
660 s1 = boolvarsel(root, (Node *) var, varRelid);
661 }
662 }
663 else if (IsA(clause, Const))
664 {
665 /* bool constant is pretty easy... */
666 Const *con = (Const *) clause;
667
668 s1 = con->constisnull ? 0.0 :
669 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
670 }
671 else if (IsA(clause, Param))
672 {
673 /* see if we can replace the Param */
674 Node *subst = estimate_expression_value(root, clause);
675
676 if (IsA(subst, Const))
677 {
678 /* bool constant is pretty easy... */
679 Const *con = (Const *) subst;
680
681 s1 = con->constisnull ? 0.0 :
682 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
683 }
684 else
685 {
686 /* XXX any way to do better than default? */
687 }
688 }
689 else if (not_clause(clause))
690 {
691 /* inverse of the selectivity of the underlying clause */
692 s1 = 1.0 - clause_selectivity(root,
693 (Node *) get_notclausearg((Expr *) clause),
694 varRelid,
695 jointype,
696 sjinfo);
697 }
698 else if (and_clause(clause))
699 {
700 /* share code with clauselist_selectivity() */
701 s1 = clauselist_selectivity(root,
702 ((BoolExpr *) clause)->args,
703 varRelid,
704 jointype,
705 sjinfo);
706 }
707 else if (or_clause(clause))
708 {
709 /*
710 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
711 * account for the probable overlap of selected tuple sets.
712 *
713 * XXX is this too conservative?
714 */
715 ListCell *arg;
716
717 s1 = 0.0;
718 foreach(arg, ((BoolExpr *) clause)->args)
719 {
720 Selectivity s2 = clause_selectivity(root,
721 (Node *) lfirst(arg),
722 varRelid,
723 jointype,
724 sjinfo);
725
726 s1 = s1 + s2 - s1 * s2;
727 }
728 }
729 else if (is_opclause(clause) || IsA(clause, DistinctExpr))
730 {
731 OpExpr *opclause = (OpExpr *) clause;
732 Oid opno = opclause->opno;
733
734 if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
735 {
736 /* Estimate selectivity for a join clause. */
737 s1 = join_selectivity(root, opno,
738 opclause->args,
739 opclause->inputcollid,
740 jointype,
741 sjinfo);
742 }
743 else
744 {
745 /* Estimate selectivity for a restriction clause. */
746 s1 = restriction_selectivity(root, opno,
747 opclause->args,
748 opclause->inputcollid,
749 varRelid);
750 }
751
752 /*
753 * DistinctExpr has the same representation as OpExpr, but the
754 * contained operator is "=" not "<>", so we must negate the result.
755 * This estimation method doesn't give the right behavior for nulls,
756 * but it's better than doing nothing.
757 */
758 if (IsA(clause, DistinctExpr))
759 s1 = 1.0 - s1;
760 }
761 else if (IsA(clause, ScalarArrayOpExpr))
762 {
763 /* Use node specific selectivity calculation function */
764 s1 = scalararraysel(root,
765 (ScalarArrayOpExpr *) clause,
766 treat_as_join_clause(clause, rinfo,
767 varRelid, sjinfo),
768 varRelid,
769 jointype,
770 sjinfo);
771 }
772 else if (IsA(clause, RowCompareExpr))
773 {
774 /* Use node specific selectivity calculation function */
775 s1 = rowcomparesel(root,
776 (RowCompareExpr *) clause,
777 varRelid,
778 jointype,
779 sjinfo);
780 }
781 else if (IsA(clause, NullTest))
782 {
783 /* Use node specific selectivity calculation function */
784 s1 = nulltestsel(root,
785 ((NullTest *) clause)->nulltesttype,
786 (Node *) ((NullTest *) clause)->arg,
787 varRelid,
788 jointype,
789 sjinfo);
790 }
791 else if (IsA(clause, BooleanTest))
792 {
793 /* Use node specific selectivity calculation function */
794 s1 = booltestsel(root,
795 ((BooleanTest *) clause)->booltesttype,
796 (Node *) ((BooleanTest *) clause)->arg,
797 varRelid,
798 jointype,
799 sjinfo);
800 }
801 else if (IsA(clause, CurrentOfExpr))
802 {
803 /* CURRENT OF selects at most one row of its table */
804 CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
805 RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
806
807 if (crel->tuples > 0)
808 s1 = 1.0 / crel->tuples;
809 }
810 else if (IsA(clause, RelabelType))
811 {
812 /* Not sure this case is needed, but it can't hurt */
813 s1 = clause_selectivity(root,
814 (Node *) ((RelabelType *) clause)->arg,
815 varRelid,
816 jointype,
817 sjinfo);
818 }
819 else if (IsA(clause, CoerceToDomain))
820 {
821 /* Not sure this case is needed, but it can't hurt */
822 s1 = clause_selectivity(root,
823 (Node *) ((CoerceToDomain *) clause)->arg,
824 varRelid,
825 jointype,
826 sjinfo);
827 }
828 else
829 {
830 /*
831 * For anything else, see if we can consider it as a boolean variable.
832 * This only works if it's an immutable expression in Vars of a single
833 * relation; but there's no point in us checking that here because
834 * boolvarsel() will do it internally, and return a suitable default
835 * selectivity if not.
836 */
837 s1 = boolvarsel(root, clause, varRelid);
838 }
839
840 /* Cache the result if possible */
841 if (cacheable)
842 {
843 if (jointype == JOIN_INNER)
844 rinfo->norm_selec = s1;
845 else
846 rinfo->outer_selec = s1;
847 }
848
849 #ifdef SELECTIVITY_DEBUG
850 elog(DEBUG4, "clause_selectivity: s1 %f", s1);
851 #endif /* SELECTIVITY_DEBUG */
852
853 return s1;
854 }
855