1 /*-------------------------------------------------------------------------
2 *
3 * extended_stats.c
4 * POSTGRES extended statistics
5 *
6 * Generic code supporting statistics objects created via CREATE STATISTICS.
7 *
8 *
9 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 * IDENTIFICATION
13 * src/backend/statistics/extended_stats.c
14 *
15 *-------------------------------------------------------------------------
16 */
17 #include "postgres.h"
18
19 #include "access/detoast.h"
20 #include "access/genam.h"
21 #include "access/htup_details.h"
22 #include "access/table.h"
23 #include "catalog/indexing.h"
24 #include "catalog/pg_collation.h"
25 #include "catalog/pg_statistic_ext.h"
26 #include "catalog/pg_statistic_ext_data.h"
27 #include "commands/progress.h"
28 #include "miscadmin.h"
29 #include "nodes/nodeFuncs.h"
30 #include "optimizer/clauses.h"
31 #include "optimizer/optimizer.h"
32 #include "pgstat.h"
33 #include "postmaster/autovacuum.h"
34 #include "statistics/extended_stats_internal.h"
35 #include "statistics/statistics.h"
36 #include "utils/acl.h"
37 #include "utils/array.h"
38 #include "utils/builtins.h"
39 #include "utils/fmgroids.h"
40 #include "utils/lsyscache.h"
41 #include "utils/memutils.h"
42 #include "utils/rel.h"
43 #include "utils/selfuncs.h"
44 #include "utils/syscache.h"
45
46 /*
47 * To avoid consuming too much memory during analysis and/or too much space
48 * in the resulting pg_statistic rows, we ignore varlena datums that are wider
49 * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
50 * and distinct-value calculations since a wide value is unlikely to be
51 * duplicated at all, much less be a most-common value. For the same reason,
52 * ignoring wide values will not affect our estimates of histogram bin
53 * boundaries very much.
54 */
55 #define WIDTH_THRESHOLD 1024
56
57 /*
58 * Used internally to refer to an individual statistics object, i.e.,
59 * a pg_statistic_ext entry.
60 */
61 typedef struct StatExtEntry
62 {
63 Oid statOid; /* OID of pg_statistic_ext entry */
64 char *schema; /* statistics object's schema */
65 char *name; /* statistics object's name */
66 Bitmapset *columns; /* attribute numbers covered by the object */
67 List *types; /* 'char' list of enabled statistics kinds */
68 int stattarget; /* statistics target (-1 for default) */
69 } StatExtEntry;
70
71
72 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
73 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
74 int nvacatts, VacAttrStats **vacatts);
75 static void statext_store(Oid relid,
76 MVNDistinct *ndistinct, MVDependencies *dependencies,
77 MCVList *mcv, VacAttrStats **stats);
78 static int statext_compute_stattarget(int stattarget,
79 int natts, VacAttrStats **stats);
80
81 /*
82 * Compute requested extended stats, using the rows sampled for the plain
83 * (single-column) stats.
84 *
85 * This fetches a list of stats types from pg_statistic_ext, computes the
86 * requested stats, and serializes them back into the catalog.
87 */
88 void
BuildRelationExtStatistics(Relation onerel,double totalrows,int numrows,HeapTuple * rows,int natts,VacAttrStats ** vacattrstats)89 BuildRelationExtStatistics(Relation onerel, double totalrows,
90 int numrows, HeapTuple *rows,
91 int natts, VacAttrStats **vacattrstats)
92 {
93 Relation pg_stext;
94 ListCell *lc;
95 List *stats;
96 MemoryContext cxt;
97 MemoryContext oldcxt;
98 int64 ext_cnt;
99
100 pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
101 stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
102
103 /* memory context for building each statistics object */
104 cxt = AllocSetContextCreate(CurrentMemoryContext,
105 "BuildRelationExtStatistics",
106 ALLOCSET_DEFAULT_SIZES);
107 oldcxt = MemoryContextSwitchTo(cxt);
108
109 /* report this phase */
110 if (stats != NIL)
111 {
112 const int index[] = {
113 PROGRESS_ANALYZE_PHASE,
114 PROGRESS_ANALYZE_EXT_STATS_TOTAL
115 };
116 const int64 val[] = {
117 PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
118 list_length(stats)
119 };
120
121 pgstat_progress_update_multi_param(2, index, val);
122 }
123
124 ext_cnt = 0;
125 foreach(lc, stats)
126 {
127 StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
128 MVNDistinct *ndistinct = NULL;
129 MVDependencies *dependencies = NULL;
130 MCVList *mcv = NULL;
131 VacAttrStats **stats;
132 ListCell *lc2;
133 int stattarget;
134
135 /*
136 * Check if we can build these stats based on the column analyzed. If
137 * not, report this fact (except in autovacuum) and move on.
138 */
139 stats = lookup_var_attr_stats(onerel, stat->columns,
140 natts, vacattrstats);
141 if (!stats)
142 {
143 if (!IsAutoVacuumWorkerProcess())
144 ereport(WARNING,
145 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
146 errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
147 stat->schema, stat->name,
148 get_namespace_name(onerel->rd_rel->relnamespace),
149 RelationGetRelationName(onerel)),
150 errtable(onerel)));
151 continue;
152 }
153
154 /* check allowed number of dimensions */
155 Assert(bms_num_members(stat->columns) >= 2 &&
156 bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
157
158 /* compute statistics target for this statistics */
159 stattarget = statext_compute_stattarget(stat->stattarget,
160 bms_num_members(stat->columns),
161 stats);
162
163 /*
164 * Don't rebuild statistics objects with statistics target set to 0
165 * (we just leave the existing values around, just like we do for
166 * regular per-column statistics).
167 */
168 if (stattarget == 0)
169 continue;
170
171 /* compute statistic of each requested type */
172 foreach(lc2, stat->types)
173 {
174 char t = (char) lfirst_int(lc2);
175
176 if (t == STATS_EXT_NDISTINCT)
177 ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
178 stat->columns, stats);
179 else if (t == STATS_EXT_DEPENDENCIES)
180 dependencies = statext_dependencies_build(numrows, rows,
181 stat->columns, stats);
182 else if (t == STATS_EXT_MCV)
183 mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
184 totalrows, stattarget);
185 }
186
187 /* store the statistics in the catalog */
188 statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
189
190 /* for reporting progress */
191 pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
192 ++ext_cnt);
193
194 /* free the data used for building this statistics object */
195 MemoryContextReset(cxt);
196 }
197
198 MemoryContextSwitchTo(oldcxt);
199 MemoryContextDelete(cxt);
200
201 list_free(stats);
202
203 table_close(pg_stext, RowExclusiveLock);
204 }
205
206 /*
207 * ComputeExtStatisticsRows
208 * Compute number of rows required by extended statistics on a table.
209 *
210 * Computes number of rows we need to sample to build extended statistics on a
211 * table. This only looks at statistics we can actually build - for example
212 * when analyzing only some of the columns, this will skip statistics objects
213 * that would require additional columns.
214 *
215 * See statext_compute_stattarget for details about how we compute statistics
216 * target for a statistics objects (from the object target, attribute targets
217 * and default statistics target).
218 */
219 int
ComputeExtStatisticsRows(Relation onerel,int natts,VacAttrStats ** vacattrstats)220 ComputeExtStatisticsRows(Relation onerel,
221 int natts, VacAttrStats **vacattrstats)
222 {
223 Relation pg_stext;
224 ListCell *lc;
225 List *lstats;
226 MemoryContext cxt;
227 MemoryContext oldcxt;
228 int result = 0;
229
230 cxt = AllocSetContextCreate(CurrentMemoryContext,
231 "ComputeExtStatisticsRows",
232 ALLOCSET_DEFAULT_SIZES);
233 oldcxt = MemoryContextSwitchTo(cxt);
234
235 pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
236 lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
237
238 foreach(lc, lstats)
239 {
240 StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
241 int stattarget = stat->stattarget;
242 VacAttrStats **stats;
243 int nattrs = bms_num_members(stat->columns);
244
245 /*
246 * Check if we can build this statistics object based on the columns
247 * analyzed. If not, ignore it (don't report anything, we'll do that
248 * during the actual build BuildRelationExtStatistics).
249 */
250 stats = lookup_var_attr_stats(onerel, stat->columns,
251 natts, vacattrstats);
252
253 if (!stats)
254 continue;
255
256 /*
257 * Compute statistics target, based on what's set for the statistic
258 * object itself, and for its attributes.
259 */
260 stattarget = statext_compute_stattarget(stat->stattarget,
261 nattrs, stats);
262
263 /* Use the largest value for all statistics objects. */
264 if (stattarget > result)
265 result = stattarget;
266 }
267
268 table_close(pg_stext, RowExclusiveLock);
269
270 MemoryContextSwitchTo(oldcxt);
271 MemoryContextDelete(cxt);
272
273 /* compute sample size based on the statistics target */
274 return (300 * result);
275 }
276
277 /*
278 * statext_compute_stattarget
279 * compute statistics target for an extended statistic
280 *
281 * When computing target for extended statistics objects, we consider three
282 * places where the target may be set - the statistics object itself,
283 * attributes the statistics is defined on, and then the default statistics
284 * target.
285 *
286 * First we look at what's set for the statistics object itself, using the
287 * ALTER STATISTICS ... SET STATISTICS command. If we find a valid value
288 * there (i.e. not -1) we're done. Otherwise we look at targets set for any
289 * of the attributes the statistic is defined on, and if there are columns
290 * with defined target, we use the maximum value. We do this mostly for
291 * backwards compatibility, because this is what we did before having
292 * statistics target for extended statistics.
293 *
294 * And finally, if we still don't have a statistics target, we use the value
295 * set in default_statistics_target.
296 */
297 static int
statext_compute_stattarget(int stattarget,int nattrs,VacAttrStats ** stats)298 statext_compute_stattarget(int stattarget, int nattrs, VacAttrStats **stats)
299 {
300 int i;
301
302 /*
303 * If there's statistics target set for the statistics object, use it. It
304 * may be set to 0 which disables building of that statistic.
305 */
306 if (stattarget >= 0)
307 return stattarget;
308
309 /*
310 * The target for the statistics object is set to -1, in which case we
311 * look at the maximum target set for any of the attributes the object is
312 * defined on.
313 */
314 for (i = 0; i < nattrs; i++)
315 {
316 /* keep the maximmum statistics target */
317 if (stats[i]->attr->attstattarget > stattarget)
318 stattarget = stats[i]->attr->attstattarget;
319 }
320
321 /*
322 * If the value is still negative (so neither the statistics object nor
323 * any of the columns have custom statistics target set), use the global
324 * default target.
325 */
326 if (stattarget < 0)
327 stattarget = default_statistics_target;
328
329 /* As this point we should have a valid statistics target. */
330 Assert((stattarget >= 0) && (stattarget <= 10000));
331
332 return stattarget;
333 }
334
335 /*
336 * statext_is_kind_built
337 * Is this stat kind built in the given pg_statistic_ext_data tuple?
338 */
339 bool
statext_is_kind_built(HeapTuple htup,char type)340 statext_is_kind_built(HeapTuple htup, char type)
341 {
342 AttrNumber attnum;
343
344 switch (type)
345 {
346 case STATS_EXT_NDISTINCT:
347 attnum = Anum_pg_statistic_ext_data_stxdndistinct;
348 break;
349
350 case STATS_EXT_DEPENDENCIES:
351 attnum = Anum_pg_statistic_ext_data_stxddependencies;
352 break;
353
354 case STATS_EXT_MCV:
355 attnum = Anum_pg_statistic_ext_data_stxdmcv;
356 break;
357
358 default:
359 elog(ERROR, "unexpected statistics type requested: %d", type);
360 }
361
362 return !heap_attisnull(htup, attnum, NULL);
363 }
364
365 /*
366 * Return a list (of StatExtEntry) of statistics objects for the given relation.
367 */
368 static List *
fetch_statentries_for_relation(Relation pg_statext,Oid relid)369 fetch_statentries_for_relation(Relation pg_statext, Oid relid)
370 {
371 SysScanDesc scan;
372 ScanKeyData skey;
373 HeapTuple htup;
374 List *result = NIL;
375
376 /*
377 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
378 * rel.
379 */
380 ScanKeyInit(&skey,
381 Anum_pg_statistic_ext_stxrelid,
382 BTEqualStrategyNumber, F_OIDEQ,
383 ObjectIdGetDatum(relid));
384
385 scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
386 NULL, 1, &skey);
387
388 while (HeapTupleIsValid(htup = systable_getnext(scan)))
389 {
390 StatExtEntry *entry;
391 Datum datum;
392 bool isnull;
393 int i;
394 ArrayType *arr;
395 char *enabled;
396 Form_pg_statistic_ext staForm;
397
398 entry = palloc0(sizeof(StatExtEntry));
399 staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
400 entry->statOid = staForm->oid;
401 entry->schema = get_namespace_name(staForm->stxnamespace);
402 entry->name = pstrdup(NameStr(staForm->stxname));
403 entry->stattarget = staForm->stxstattarget;
404 for (i = 0; i < staForm->stxkeys.dim1; i++)
405 {
406 entry->columns = bms_add_member(entry->columns,
407 staForm->stxkeys.values[i]);
408 }
409
410 /* decode the stxkind char array into a list of chars */
411 datum = SysCacheGetAttr(STATEXTOID, htup,
412 Anum_pg_statistic_ext_stxkind, &isnull);
413 Assert(!isnull);
414 arr = DatumGetArrayTypeP(datum);
415 if (ARR_NDIM(arr) != 1 ||
416 ARR_HASNULL(arr) ||
417 ARR_ELEMTYPE(arr) != CHAROID)
418 elog(ERROR, "stxkind is not a 1-D char array");
419 enabled = (char *) ARR_DATA_PTR(arr);
420 for (i = 0; i < ARR_DIMS(arr)[0]; i++)
421 {
422 Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
423 (enabled[i] == STATS_EXT_DEPENDENCIES) ||
424 (enabled[i] == STATS_EXT_MCV));
425 entry->types = lappend_int(entry->types, (int) enabled[i]);
426 }
427
428 result = lappend(result, entry);
429 }
430
431 systable_endscan(scan);
432
433 return result;
434 }
435
436 /*
437 * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
438 * VacAttrStats array which includes only the items corresponding to
439 * attributes indicated by 'stxkeys'. If we don't have all of the per column
440 * stats available to compute the extended stats, then we return NULL to indicate
441 * to the caller that the stats should not be built.
442 */
443 static VacAttrStats **
lookup_var_attr_stats(Relation rel,Bitmapset * attrs,int nvacatts,VacAttrStats ** vacatts)444 lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
445 int nvacatts, VacAttrStats **vacatts)
446 {
447 int i = 0;
448 int x = -1;
449 VacAttrStats **stats;
450
451 stats = (VacAttrStats **)
452 palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
453
454 /* lookup VacAttrStats info for the requested columns (same attnum) */
455 while ((x = bms_next_member(attrs, x)) >= 0)
456 {
457 int j;
458
459 stats[i] = NULL;
460 for (j = 0; j < nvacatts; j++)
461 {
462 if (x == vacatts[j]->tupattnum)
463 {
464 stats[i] = vacatts[j];
465 break;
466 }
467 }
468
469 if (!stats[i])
470 {
471 /*
472 * Looks like stats were not gathered for one of the columns
473 * required. We'll be unable to build the extended stats without
474 * this column.
475 */
476 pfree(stats);
477 return NULL;
478 }
479
480 /*
481 * Sanity check that the column is not dropped - stats should have
482 * been removed in this case.
483 */
484 Assert(!stats[i]->attr->attisdropped);
485
486 i++;
487 }
488
489 return stats;
490 }
491
492 /*
493 * statext_store
494 * Serializes the statistics and stores them into the pg_statistic_ext_data
495 * tuple.
496 */
497 static void
statext_store(Oid statOid,MVNDistinct * ndistinct,MVDependencies * dependencies,MCVList * mcv,VacAttrStats ** stats)498 statext_store(Oid statOid,
499 MVNDistinct *ndistinct, MVDependencies *dependencies,
500 MCVList *mcv, VacAttrStats **stats)
501 {
502 Relation pg_stextdata;
503 HeapTuple stup,
504 oldtup;
505 Datum values[Natts_pg_statistic_ext_data];
506 bool nulls[Natts_pg_statistic_ext_data];
507 bool replaces[Natts_pg_statistic_ext_data];
508
509 pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
510
511 memset(nulls, true, sizeof(nulls));
512 memset(replaces, false, sizeof(replaces));
513 memset(values, 0, sizeof(values));
514
515 /*
516 * Construct a new pg_statistic_ext_data tuple, replacing the calculated
517 * stats.
518 */
519 if (ndistinct != NULL)
520 {
521 bytea *data = statext_ndistinct_serialize(ndistinct);
522
523 nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
524 values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
525 }
526
527 if (dependencies != NULL)
528 {
529 bytea *data = statext_dependencies_serialize(dependencies);
530
531 nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
532 values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
533 }
534 if (mcv != NULL)
535 {
536 bytea *data = statext_mcv_serialize(mcv, stats);
537
538 nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
539 values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
540 }
541
542 /* always replace the value (either by bytea or NULL) */
543 replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
544 replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
545 replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
546
547 /* there should already be a pg_statistic_ext_data tuple */
548 oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
549 if (!HeapTupleIsValid(oldtup))
550 elog(ERROR, "cache lookup failed for statistics object %u", statOid);
551
552 /* replace it */
553 stup = heap_modify_tuple(oldtup,
554 RelationGetDescr(pg_stextdata),
555 values,
556 nulls,
557 replaces);
558 ReleaseSysCache(oldtup);
559 CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
560
561 heap_freetuple(stup);
562
563 table_close(pg_stextdata, RowExclusiveLock);
564 }
565
566 /* initialize multi-dimensional sort */
567 MultiSortSupport
multi_sort_init(int ndims)568 multi_sort_init(int ndims)
569 {
570 MultiSortSupport mss;
571
572 Assert(ndims >= 2);
573
574 mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
575 + sizeof(SortSupportData) * ndims);
576
577 mss->ndims = ndims;
578
579 return mss;
580 }
581
582 /*
583 * Prepare sort support info using the given sort operator and collation
584 * at the position 'sortdim'
585 */
586 void
multi_sort_add_dimension(MultiSortSupport mss,int sortdim,Oid oper,Oid collation)587 multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
588 Oid oper, Oid collation)
589 {
590 SortSupport ssup = &mss->ssup[sortdim];
591
592 ssup->ssup_cxt = CurrentMemoryContext;
593 ssup->ssup_collation = collation;
594 ssup->ssup_nulls_first = false;
595
596 PrepareSortSupportFromOrderingOp(oper, ssup);
597 }
598
599 /* compare all the dimensions in the selected order */
600 int
multi_sort_compare(const void * a,const void * b,void * arg)601 multi_sort_compare(const void *a, const void *b, void *arg)
602 {
603 MultiSortSupport mss = (MultiSortSupport) arg;
604 SortItem *ia = (SortItem *) a;
605 SortItem *ib = (SortItem *) b;
606 int i;
607
608 for (i = 0; i < mss->ndims; i++)
609 {
610 int compare;
611
612 compare = ApplySortComparator(ia->values[i], ia->isnull[i],
613 ib->values[i], ib->isnull[i],
614 &mss->ssup[i]);
615
616 if (compare != 0)
617 return compare;
618 }
619
620 /* equal by default */
621 return 0;
622 }
623
624 /* compare selected dimension */
625 int
multi_sort_compare_dim(int dim,const SortItem * a,const SortItem * b,MultiSortSupport mss)626 multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
627 MultiSortSupport mss)
628 {
629 return ApplySortComparator(a->values[dim], a->isnull[dim],
630 b->values[dim], b->isnull[dim],
631 &mss->ssup[dim]);
632 }
633
634 int
multi_sort_compare_dims(int start,int end,const SortItem * a,const SortItem * b,MultiSortSupport mss)635 multi_sort_compare_dims(int start, int end,
636 const SortItem *a, const SortItem *b,
637 MultiSortSupport mss)
638 {
639 int dim;
640
641 for (dim = start; dim <= end; dim++)
642 {
643 int r = ApplySortComparator(a->values[dim], a->isnull[dim],
644 b->values[dim], b->isnull[dim],
645 &mss->ssup[dim]);
646
647 if (r != 0)
648 return r;
649 }
650
651 return 0;
652 }
653
654 int
compare_scalars_simple(const void * a,const void * b,void * arg)655 compare_scalars_simple(const void *a, const void *b, void *arg)
656 {
657 return compare_datums_simple(*(Datum *) a,
658 *(Datum *) b,
659 (SortSupport) arg);
660 }
661
662 int
compare_datums_simple(Datum a,Datum b,SortSupport ssup)663 compare_datums_simple(Datum a, Datum b, SortSupport ssup)
664 {
665 return ApplySortComparator(a, false, b, false, ssup);
666 }
667
668 /* simple counterpart to qsort_arg */
669 void *
bsearch_arg(const void * key,const void * base,size_t nmemb,size_t size,int (* compar)(const void *,const void *,void *),void * arg)670 bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size,
671 int (*compar) (const void *, const void *, void *),
672 void *arg)
673 {
674 size_t l,
675 u,
676 idx;
677 const void *p;
678 int comparison;
679
680 l = 0;
681 u = nmemb;
682 while (l < u)
683 {
684 idx = (l + u) / 2;
685 p = (void *) (((const char *) base) + (idx * size));
686 comparison = (*compar) (key, p, arg);
687
688 if (comparison < 0)
689 u = idx;
690 else if (comparison > 0)
691 l = idx + 1;
692 else
693 return (void *) p;
694 }
695
696 return NULL;
697 }
698
699 /*
700 * build_attnums_array
701 * Transforms a bitmap into an array of AttrNumber values.
702 *
703 * This is used for extended statistics only, so all the attribute must be
704 * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
705 * is not necessary here (and when querying the bitmap).
706 */
707 AttrNumber *
build_attnums_array(Bitmapset * attrs,int * numattrs)708 build_attnums_array(Bitmapset *attrs, int *numattrs)
709 {
710 int i,
711 j;
712 AttrNumber *attnums;
713 int num = bms_num_members(attrs);
714
715 if (numattrs)
716 *numattrs = num;
717
718 /* build attnums from the bitmapset */
719 attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
720 i = 0;
721 j = -1;
722 while ((j = bms_next_member(attrs, j)) >= 0)
723 {
724 /*
725 * Make sure the bitmap contains only user-defined attributes. As
726 * bitmaps can't contain negative values, this can be violated in two
727 * ways. Firstly, the bitmap might contain 0 as a member, and secondly
728 * the integer value might be larger than MaxAttrNumber.
729 */
730 Assert(AttrNumberIsForUserDefinedAttr(j));
731 Assert(j <= MaxAttrNumber);
732
733 attnums[i++] = (AttrNumber) j;
734
735 /* protect against overflows */
736 Assert(i <= num);
737 }
738
739 return attnums;
740 }
741
742 /*
743 * build_sorted_items
744 * build a sorted array of SortItem with values from rows
745 *
746 * Note: All the memory is allocated in a single chunk, so that the caller
747 * can simply pfree the return value to release all of it.
748 */
749 SortItem *
build_sorted_items(int numrows,int * nitems,HeapTuple * rows,TupleDesc tdesc,MultiSortSupport mss,int numattrs,AttrNumber * attnums)750 build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
751 MultiSortSupport mss, int numattrs, AttrNumber *attnums)
752 {
753 int i,
754 j,
755 len,
756 idx;
757 int nvalues = numrows * numattrs;
758
759 SortItem *items;
760 Datum *values;
761 bool *isnull;
762 char *ptr;
763
764 /* Compute the total amount of memory we need (both items and values). */
765 len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
766
767 /* Allocate the memory and split it into the pieces. */
768 ptr = palloc0(len);
769
770 /* items to sort */
771 items = (SortItem *) ptr;
772 ptr += numrows * sizeof(SortItem);
773
774 /* values and null flags */
775 values = (Datum *) ptr;
776 ptr += nvalues * sizeof(Datum);
777
778 isnull = (bool *) ptr;
779 ptr += nvalues * sizeof(bool);
780
781 /* make sure we consumed the whole buffer exactly */
782 Assert((ptr - (char *) items) == len);
783
784 /* fix the pointers to Datum and bool arrays */
785 idx = 0;
786 for (i = 0; i < numrows; i++)
787 {
788 bool toowide = false;
789
790 items[idx].values = &values[idx * numattrs];
791 items[idx].isnull = &isnull[idx * numattrs];
792
793 /* load the values/null flags from sample rows */
794 for (j = 0; j < numattrs; j++)
795 {
796 Datum value;
797 bool isnull;
798
799 value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
800
801 /*
802 * If this is a varlena value, check if it's too wide and if yes
803 * then skip the whole item. Otherwise detoast the value.
804 *
805 * XXX It may happen that we've already detoasted some preceding
806 * values for the current item. We don't bother to cleanup those
807 * on the assumption that those are small (below WIDTH_THRESHOLD)
808 * and will be discarded at the end of analyze.
809 */
810 if ((!isnull) &&
811 (TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
812 {
813 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
814 {
815 toowide = true;
816 break;
817 }
818
819 value = PointerGetDatum(PG_DETOAST_DATUM(value));
820 }
821
822 items[idx].values[j] = value;
823 items[idx].isnull[j] = isnull;
824 }
825
826 if (toowide)
827 continue;
828
829 idx++;
830 }
831
832 /* store the actual number of items (ignoring the too-wide ones) */
833 *nitems = idx;
834
835 /* all items were too wide */
836 if (idx == 0)
837 {
838 /* everything is allocated as a single chunk */
839 pfree(items);
840 return NULL;
841 }
842
843 /* do the sort, using the multi-sort */
844 qsort_arg((void *) items, idx, sizeof(SortItem),
845 multi_sort_compare, mss);
846
847 return items;
848 }
849
850 /*
851 * has_stats_of_kind
852 * Check whether the list contains statistic of a given kind
853 */
854 bool
has_stats_of_kind(List * stats,char requiredkind)855 has_stats_of_kind(List *stats, char requiredkind)
856 {
857 ListCell *l;
858
859 foreach(l, stats)
860 {
861 StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
862
863 if (stat->kind == requiredkind)
864 return true;
865 }
866
867 return false;
868 }
869
870 /*
871 * choose_best_statistics
872 * Look for and return statistics with the specified 'requiredkind' which
873 * have keys that match at least two of the given attnums. Return NULL if
874 * there's no match.
875 *
876 * The current selection criteria is very simple - we choose the statistics
877 * object referencing the most attributes in covered (and still unestimated
878 * clauses), breaking ties in favor of objects with fewer keys overall.
879 *
880 * The clause_attnums is an array of bitmaps, storing attnums for individual
881 * clauses. A NULL element means the clause is either incompatible or already
882 * estimated.
883 *
884 * XXX If multiple statistics objects tie on both criteria, then which object
885 * is chosen depends on the order that they appear in the stats list. Perhaps
886 * further tiebreakers are needed.
887 */
888 StatisticExtInfo *
choose_best_statistics(List * stats,char requiredkind,Bitmapset ** clause_attnums,int nclauses)889 choose_best_statistics(List *stats, char requiredkind,
890 Bitmapset **clause_attnums, int nclauses)
891 {
892 ListCell *lc;
893 StatisticExtInfo *best_match = NULL;
894 int best_num_matched = 2; /* goal #1: maximize */
895 int best_match_keys = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */
896
897 foreach(lc, stats)
898 {
899 int i;
900 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
901 Bitmapset *matched = NULL;
902 int num_matched;
903 int numkeys;
904
905 /* skip statistics that are not of the correct type */
906 if (info->kind != requiredkind)
907 continue;
908
909 /*
910 * Collect attributes in remaining (unestimated) clauses fully covered
911 * by this statistic object.
912 */
913 for (i = 0; i < nclauses; i++)
914 {
915 /* ignore incompatible/estimated clauses */
916 if (!clause_attnums[i])
917 continue;
918
919 /* ignore clauses that are not covered by this object */
920 if (!bms_is_subset(clause_attnums[i], info->keys))
921 continue;
922
923 matched = bms_add_members(matched, clause_attnums[i]);
924 }
925
926 num_matched = bms_num_members(matched);
927 bms_free(matched);
928
929 /*
930 * save the actual number of keys in the stats so that we can choose
931 * the narrowest stats with the most matching keys.
932 */
933 numkeys = bms_num_members(info->keys);
934
935 /*
936 * Use this object when it increases the number of matched clauses or
937 * when it matches the same number of attributes but these stats have
938 * fewer keys than any previous match.
939 */
940 if (num_matched > best_num_matched ||
941 (num_matched == best_num_matched && numkeys < best_match_keys))
942 {
943 best_match = info;
944 best_num_matched = num_matched;
945 best_match_keys = numkeys;
946 }
947 }
948
949 return best_match;
950 }
951
952 /*
953 * statext_is_compatible_clause_internal
954 * Determines if the clause is compatible with MCV lists.
955 *
956 * Does the heavy lifting of actually inspecting the clauses for
957 * statext_is_compatible_clause. It needs to be split like this because
958 * of recursion. The attnums bitmap is an input/output parameter collecting
959 * attribute numbers from all compatible clauses (recursively).
960 */
961 static bool
statext_is_compatible_clause_internal(PlannerInfo * root,Node * clause,Index relid,Bitmapset ** attnums)962 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
963 Index relid, Bitmapset **attnums)
964 {
965 /* Look inside any binary-compatible relabeling (as in examine_variable) */
966 if (IsA(clause, RelabelType))
967 clause = (Node *) ((RelabelType *) clause)->arg;
968
969 /* plain Var references (boolean Vars or recursive checks) */
970 if (IsA(clause, Var))
971 {
972 Var *var = (Var *) clause;
973
974 /* Ensure var is from the correct relation */
975 if (var->varno != relid)
976 return false;
977
978 /* we also better ensure the Var is from the current level */
979 if (var->varlevelsup > 0)
980 return false;
981
982 /* Also skip system attributes (we don't allow stats on those). */
983 if (!AttrNumberIsForUserDefinedAttr(var->varattno))
984 return false;
985
986 *attnums = bms_add_member(*attnums, var->varattno);
987
988 return true;
989 }
990
991 /* (Var op Const) or (Const op Var) */
992 if (is_opclause(clause))
993 {
994 RangeTblEntry *rte = root->simple_rte_array[relid];
995 OpExpr *expr = (OpExpr *) clause;
996 Var *var;
997
998 /* Only expressions with two arguments are considered compatible. */
999 if (list_length(expr->args) != 2)
1000 return false;
1001
1002 /* Check if the expression has the right shape (one Var, one Const) */
1003 if (!examine_clause_args(expr->args, &var, NULL, NULL))
1004 return false;
1005
1006 /*
1007 * If it's not one of the supported operators ("=", "<", ">", etc.),
1008 * just ignore the clause, as it's not compatible with MCV lists.
1009 *
1010 * This uses the function for estimating selectivity, not the operator
1011 * directly (a bit awkward, but well ...).
1012 */
1013 switch (get_oprrest(expr->opno))
1014 {
1015 case F_EQSEL:
1016 case F_NEQSEL:
1017 case F_SCALARLTSEL:
1018 case F_SCALARLESEL:
1019 case F_SCALARGTSEL:
1020 case F_SCALARGESEL:
1021 /* supported, will continue with inspection of the Var */
1022 break;
1023
1024 default:
1025 /* other estimators are considered unknown/unsupported */
1026 return false;
1027 }
1028
1029 /*
1030 * If there are any securityQuals on the RTE from security barrier
1031 * views or RLS policies, then the user may not have access to all the
1032 * table's data, and we must check that the operator is leak-proof.
1033 *
1034 * If the operator is leaky, then we must ignore this clause for the
1035 * purposes of estimating with MCV lists, otherwise the operator might
1036 * reveal values from the MCV list that the user doesn't have
1037 * permission to see.
1038 */
1039 if (rte->securityQuals != NIL &&
1040 !get_func_leakproof(get_opcode(expr->opno)))
1041 return false;
1042
1043 return statext_is_compatible_clause_internal(root, (Node *) var,
1044 relid, attnums);
1045 }
1046
1047 /* Var IN Array */
1048 if (IsA(clause, ScalarArrayOpExpr))
1049 {
1050 RangeTblEntry *rte = root->simple_rte_array[relid];
1051 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1052 Var *var;
1053
1054 /* Only expressions with two arguments are considered compatible. */
1055 if (list_length(expr->args) != 2)
1056 return false;
1057
1058 /* Check if the expression has the right shape (one Var, one Const) */
1059 if (!examine_clause_args(expr->args, &var, NULL, NULL))
1060 return false;
1061
1062 /*
1063 * If it's not one of the supported operators ("=", "<", ">", etc.),
1064 * just ignore the clause, as it's not compatible with MCV lists.
1065 *
1066 * This uses the function for estimating selectivity, not the operator
1067 * directly (a bit awkward, but well ...).
1068 */
1069 switch (get_oprrest(expr->opno))
1070 {
1071 case F_EQSEL:
1072 case F_NEQSEL:
1073 case F_SCALARLTSEL:
1074 case F_SCALARLESEL:
1075 case F_SCALARGTSEL:
1076 case F_SCALARGESEL:
1077 /* supported, will continue with inspection of the Var */
1078 break;
1079
1080 default:
1081 /* other estimators are considered unknown/unsupported */
1082 return false;
1083 }
1084
1085 /*
1086 * If there are any securityQuals on the RTE from security barrier
1087 * views or RLS policies, then the user may not have access to all the
1088 * table's data, and we must check that the operator is leak-proof.
1089 *
1090 * If the operator is leaky, then we must ignore this clause for the
1091 * purposes of estimating with MCV lists, otherwise the operator might
1092 * reveal values from the MCV list that the user doesn't have
1093 * permission to see.
1094 */
1095 if (rte->securityQuals != NIL &&
1096 !get_func_leakproof(get_opcode(expr->opno)))
1097 return false;
1098
1099 return statext_is_compatible_clause_internal(root, (Node *) var,
1100 relid, attnums);
1101 }
1102
1103 /* AND/OR/NOT clause */
1104 if (is_andclause(clause) ||
1105 is_orclause(clause) ||
1106 is_notclause(clause))
1107 {
1108 /*
1109 * AND/OR/NOT-clauses are supported if all sub-clauses are supported
1110 *
1111 * Perhaps we could improve this by handling mixed cases, when some of
1112 * the clauses are supported and some are not. Selectivity for the
1113 * supported subclauses would be computed using extended statistics,
1114 * and the remaining clauses would be estimated using the traditional
1115 * algorithm (product of selectivities).
1116 *
1117 * It however seems overly complex, and in a way we already do that
1118 * because if we reject the whole clause as unsupported here, it will
1119 * be eventually passed to clauselist_selectivity() which does exactly
1120 * this (split into supported/unsupported clauses etc).
1121 */
1122 BoolExpr *expr = (BoolExpr *) clause;
1123 ListCell *lc;
1124
1125 foreach(lc, expr->args)
1126 {
1127 /*
1128 * Had we found incompatible clause in the arguments, treat the
1129 * whole clause as incompatible.
1130 */
1131 if (!statext_is_compatible_clause_internal(root,
1132 (Node *) lfirst(lc),
1133 relid, attnums))
1134 return false;
1135 }
1136
1137 return true;
1138 }
1139
1140 /* Var IS NULL */
1141 if (IsA(clause, NullTest))
1142 {
1143 NullTest *nt = (NullTest *) clause;
1144
1145 /*
1146 * Only simple (Var IS NULL) expressions supported for now. Maybe we
1147 * could use examine_variable to fix this?
1148 */
1149 if (!IsA(nt->arg, Var))
1150 return false;
1151
1152 return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
1153 relid, attnums);
1154 }
1155
1156 return false;
1157 }
1158
1159 /*
1160 * statext_is_compatible_clause
1161 * Determines if the clause is compatible with MCV lists.
1162 *
1163 * Currently, we only support three types of clauses:
1164 *
1165 * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
1166 * is one of ("=", "<", ">", ">=", "<=")
1167 *
1168 * (b) (Var IS [NOT] NULL)
1169 *
1170 * (c) combinations using AND/OR/NOT
1171 *
1172 * In the future, the range of supported clauses may be expanded to more
1173 * complex cases, for example (Var op Var).
1174 */
1175 static bool
statext_is_compatible_clause(PlannerInfo * root,Node * clause,Index relid,Bitmapset ** attnums)1176 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
1177 Bitmapset **attnums)
1178 {
1179 RangeTblEntry *rte = root->simple_rte_array[relid];
1180 RestrictInfo *rinfo = (RestrictInfo *) clause;
1181 Oid userid;
1182
1183 if (!IsA(rinfo, RestrictInfo))
1184 return false;
1185
1186 /* Pseudoconstants are not really interesting here. */
1187 if (rinfo->pseudoconstant)
1188 return false;
1189
1190 /* clauses referencing multiple varnos are incompatible */
1191 if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
1192 return false;
1193
1194 /* Check the clause and determine what attributes it references. */
1195 if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
1196 relid, attnums))
1197 return false;
1198
1199 /*
1200 * Check that the user has permission to read all these attributes. Use
1201 * checkAsUser if it's set, in case we're accessing the table via a view.
1202 */
1203 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
1204
1205 if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
1206 {
1207 /* Don't have table privilege, must check individual columns */
1208 if (bms_is_member(InvalidAttrNumber, *attnums))
1209 {
1210 /* Have a whole-row reference, must have access to all columns */
1211 if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
1212 ACLMASK_ALL) != ACLCHECK_OK)
1213 return false;
1214 }
1215 else
1216 {
1217 /* Check the columns referenced by the clause */
1218 int attnum = -1;
1219
1220 while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
1221 {
1222 if (pg_attribute_aclcheck(rte->relid, attnum, userid,
1223 ACL_SELECT) != ACLCHECK_OK)
1224 return false;
1225 }
1226 }
1227 }
1228
1229 /* If we reach here, the clause is OK */
1230 return true;
1231 }
1232
1233 /*
1234 * statext_mcv_clauselist_selectivity
1235 * Estimate clauses using the best multi-column statistics.
1236 *
1237 * Applies available extended (multi-column) statistics on a table. There may
1238 * be multiple applicable statistics (with respect to the clauses), in which
1239 * case we use greedy approach. In each round we select the best statistic on
1240 * a table (measured by the number of attributes extracted from the clauses
1241 * and covered by it), and compute the selectivity for the supplied clauses.
1242 * We repeat this process with the remaining clauses (if any), until none of
1243 * the available statistics can be used.
1244 *
1245 * One of the main challenges with using MCV lists is how to extrapolate the
1246 * estimate to the data not covered by the MCV list. To do that, we compute
1247 * not only the "MCV selectivity" (selectivities for MCV items matching the
1248 * supplied clauses), but also a couple of derived selectivities:
1249 *
1250 * - simple selectivity: Computed without extended statistic, i.e. as if the
1251 * columns/clauses were independent
1252 *
1253 * - base selectivity: Similar to simple selectivity, but is computed using
1254 * the extended statistic by adding up the base frequencies (that we compute
1255 * and store for each MCV item) of matching MCV items.
1256 *
1257 * - total selectivity: Selectivity covered by the whole MCV list.
1258 *
1259 * - other selectivity: A selectivity estimate for data not covered by the MCV
1260 * list (i.e. satisfying the clauses, but not common enough to make it into
1261 * the MCV list)
1262 *
1263 * Note: While simple and base selectivities are defined in a quite similar
1264 * way, the values are computed differently and are not therefore equal. The
1265 * simple selectivity is computed as a product of per-clause estimates, while
1266 * the base selectivity is computed by adding up base frequencies of matching
1267 * items of the multi-column MCV list. So the values may differ for two main
1268 * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1269 * the MCV items did not match the estimated clauses.
1270 *
1271 * As both (a) and (b) reduce the base selectivity value, it generally holds
1272 * that (simple_selectivity >= base_selectivity). If the MCV list covers all
1273 * the data, the values may be equal.
1274 *
1275 * So, (simple_selectivity - base_selectivity) is an estimate for the part
1276 * not covered by the MCV list, and (mcv_selectivity - base_selectivity) may
1277 * be seen as a correction for the part covered by the MCV list. Those two
1278 * statements are actually equivalent.
1279 *
1280 * Note: Due to rounding errors and minor differences in how the estimates
1281 * are computed, the inequality may not always hold. Which is why we clamp
1282 * the selectivities to prevent strange estimate (negative etc.).
1283 *
1284 * 'estimatedclauses' is an input/output parameter. We set bits for the
1285 * 0-based 'clauses' indexes we estimate for and also skip clause items that
1286 * already have a bit set.
1287 */
1288 static Selectivity
statext_mcv_clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Bitmapset ** estimatedclauses)1289 statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
1290 JoinType jointype, SpecialJoinInfo *sjinfo,
1291 RelOptInfo *rel, Bitmapset **estimatedclauses)
1292 {
1293 ListCell *l;
1294 Bitmapset **list_attnums;
1295 int listidx;
1296 Selectivity sel = 1.0;
1297
1298 /* check if there's any stats that might be useful for us. */
1299 if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
1300 return 1.0;
1301
1302 list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
1303 list_length(clauses));
1304
1305 /*
1306 * Pre-process the clauses list to extract the attnums seen in each item.
1307 * We need to determine if there's any clauses which will be useful for
1308 * selectivity estimations with extended stats. Along the way we'll record
1309 * all of the attnums for each clause in a list which we'll reference
1310 * later so we don't need to repeat the same work again. We'll also keep
1311 * track of all attnums seen.
1312 *
1313 * We also skip clauses that we already estimated using different types of
1314 * statistics (we treat them as incompatible).
1315 */
1316 listidx = 0;
1317 foreach(l, clauses)
1318 {
1319 Node *clause = (Node *) lfirst(l);
1320 Bitmapset *attnums = NULL;
1321
1322 if (!bms_is_member(listidx, *estimatedclauses) &&
1323 statext_is_compatible_clause(root, clause, rel->relid, &attnums))
1324 list_attnums[listidx] = attnums;
1325 else
1326 list_attnums[listidx] = NULL;
1327
1328 listidx++;
1329 }
1330
1331 /* apply as many extended statistics as possible */
1332 while (true)
1333 {
1334 StatisticExtInfo *stat;
1335 List *stat_clauses;
1336 Selectivity simple_sel,
1337 mcv_sel,
1338 mcv_basesel,
1339 mcv_totalsel,
1340 other_sel,
1341 stat_sel;
1342
1343 /* find the best suited statistics object for these attnums */
1344 stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
1345 list_attnums, list_length(clauses));
1346
1347 /*
1348 * if no (additional) matching stats could be found then we've nothing
1349 * to do
1350 */
1351 if (!stat)
1352 break;
1353
1354 /* Ensure choose_best_statistics produced an expected stats type. */
1355 Assert(stat->kind == STATS_EXT_MCV);
1356
1357 /* now filter the clauses to be estimated using the selected MCV */
1358 stat_clauses = NIL;
1359
1360 listidx = 0;
1361 foreach(l, clauses)
1362 {
1363 /*
1364 * If the clause is compatible with the selected statistics, mark
1365 * it as estimated and add it to the list to estimate.
1366 */
1367 if (list_attnums[listidx] != NULL &&
1368 bms_is_subset(list_attnums[listidx], stat->keys))
1369 {
1370 stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
1371 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1372
1373 bms_free(list_attnums[listidx]);
1374 list_attnums[listidx] = NULL;
1375 }
1376
1377 listidx++;
1378 }
1379
1380 /*
1381 * First compute "simple" selectivity, i.e. without the extended
1382 * statistics, and essentially assuming independence of the
1383 * columns/clauses. We'll then use the various selectivities computed
1384 * from MCV list to improve it.
1385 */
1386 simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid,
1387 jointype, sjinfo, NULL);
1388
1389 /*
1390 * Now compute the multi-column estimate from the MCV list, along with
1391 * the other selectivities (base & total selectivity).
1392 */
1393 mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid,
1394 jointype, sjinfo, rel,
1395 &mcv_basesel, &mcv_totalsel);
1396
1397 /* Estimated selectivity of values not covered by MCV matches */
1398 other_sel = simple_sel - mcv_basesel;
1399 CLAMP_PROBABILITY(other_sel);
1400
1401 /* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
1402 if (other_sel > 1.0 - mcv_totalsel)
1403 other_sel = 1.0 - mcv_totalsel;
1404
1405 /*
1406 * Overall selectivity is the combination of MCV and non-MCV
1407 * estimates.
1408 */
1409 stat_sel = mcv_sel + other_sel;
1410 CLAMP_PROBABILITY(stat_sel);
1411
1412 /* Factor the estimate from this MCV to the oveall estimate. */
1413 sel *= stat_sel;
1414 }
1415
1416 return sel;
1417 }
1418
1419 /*
1420 * statext_clauselist_selectivity
1421 * Estimate clauses using the best multi-column statistics.
1422 */
1423 Selectivity
statext_clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Bitmapset ** estimatedclauses)1424 statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
1425 JoinType jointype, SpecialJoinInfo *sjinfo,
1426 RelOptInfo *rel, Bitmapset **estimatedclauses)
1427 {
1428 Selectivity sel;
1429
1430 /* First, try estimating clauses using a multivariate MCV list. */
1431 sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
1432 sjinfo, rel, estimatedclauses);
1433
1434 /*
1435 * Then, apply functional dependencies on the remaining clauses by calling
1436 * dependencies_clauselist_selectivity. Pass 'estimatedclauses' so the
1437 * function can properly skip clauses already estimated above.
1438 *
1439 * The reasoning for applying dependencies last is that the more complex
1440 * stats can track more complex correlations between the attributes, and
1441 * so may be considered more reliable.
1442 *
1443 * For example, MCV list can give us an exact selectivity for values in
1444 * two columns, while functional dependencies can only provide information
1445 * about the overall strength of the dependency.
1446 */
1447 sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
1448 jointype, sjinfo, rel,
1449 estimatedclauses);
1450
1451 return sel;
1452 }
1453
1454 /*
1455 * examine_opclause_expression
1456 * Split expression into Var and Const parts.
1457 *
1458 * Attempts to match the arguments to either (Var op Const) or (Const op Var),
1459 * possibly with a RelabelType on top. When the expression matches this form,
1460 * returns true, otherwise returns false.
1461 *
1462 * Optionally returns pointers to the extracted Var/Const nodes, when passed
1463 * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
1464 * on which side of the operator we found the Var node.
1465 */
1466 bool
examine_clause_args(List * args,Var ** varp,Const ** cstp,bool * varonleftp)1467 examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
1468 {
1469 Var *var;
1470 Const *cst;
1471 bool varonleft;
1472 Node *leftop,
1473 *rightop;
1474
1475 /* enforced by statext_is_compatible_clause_internal */
1476 Assert(list_length(args) == 2);
1477
1478 leftop = linitial(args);
1479 rightop = lsecond(args);
1480
1481 /* strip RelabelType from either side of the expression */
1482 if (IsA(leftop, RelabelType))
1483 leftop = (Node *) ((RelabelType *) leftop)->arg;
1484
1485 if (IsA(rightop, RelabelType))
1486 rightop = (Node *) ((RelabelType *) rightop)->arg;
1487
1488 if (IsA(leftop, Var) && IsA(rightop, Const))
1489 {
1490 var = (Var *) leftop;
1491 cst = (Const *) rightop;
1492 varonleft = true;
1493 }
1494 else if (IsA(leftop, Const) && IsA(rightop, Var))
1495 {
1496 var = (Var *) rightop;
1497 cst = (Const *) leftop;
1498 varonleft = false;
1499 }
1500 else
1501 return false;
1502
1503 /* return pointers to the extracted parts if requested */
1504 if (varp)
1505 *varp = var;
1506
1507 if (cstp)
1508 *cstp = cst;
1509
1510 if (varonleftp)
1511 *varonleftp = varonleft;
1512
1513 return true;
1514 }
1515