1 /*-------------------------------------------------------------------------
2 *
3 * mcv.c
4 * POSTGRES multivariate MCV lists
5 *
6 *
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/statistics/mcv.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "access/htup_details.h"
20 #include "catalog/pg_collation.h"
21 #include "catalog/pg_statistic_ext.h"
22 #include "catalog/pg_statistic_ext_data.h"
23 #include "fmgr.h"
24 #include "funcapi.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "statistics/extended_stats_internal.h"
28 #include "statistics/statistics.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/fmgroids.h"
33 #include "utils/fmgrprotos.h"
34 #include "utils/lsyscache.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
38
39 /*
40 * Computes size of a serialized MCV item, depending on the number of
41 * dimensions (columns) the statistic is defined on. The datum values are
42 * stored in a separate array (deduplicated, to minimize the size), and
43 * so the serialized items only store uint16 indexes into that array.
44 *
45 * Each serialized item stores (in this order):
46 *
47 * - indexes to values (ndim * sizeof(uint16))
48 * - null flags (ndim * sizeof(bool))
49 * - frequency (sizeof(double))
50 * - base_frequency (sizeof(double))
51 *
52 * There is no alignment padding within an MCV item.
53 * So in total each MCV item requires this many bytes:
54 *
55 * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
56 */
57 #define ITEM_SIZE(ndims) \
58 ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
59
60 /*
61 * Used to compute size of serialized MCV list representation.
62 */
63 #define MinSizeOfMCVList \
64 (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
65
66 /*
67 * Size of the serialized MCV list, excluding the space needed for
68 * deduplicated per-dimension values. The macro is meant to be used
69 * when it's not yet safe to access the serialized info about amount
70 * of data for each column.
71 */
72 #define SizeOfMCVList(ndims,nitems) \
73 ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
74 ((ndims) * sizeof(DimensionInfo)) + \
75 ((nitems) * ITEM_SIZE(ndims)))
76
77 static MultiSortSupport build_mss(StatsBuildData *data);
78
79 static SortItem *build_distinct_groups(int numrows, SortItem *items,
80 MultiSortSupport mss, int *ndistinct);
81
82 static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
83 MultiSortSupport mss, int *ncounts);
84
85 static int count_distinct_groups(int numrows, SortItem *items,
86 MultiSortSupport mss);
87
88 /*
89 * Compute new value for bitmap item, considering whether it's used for
90 * clauses connected by AND/OR.
91 */
92 #define RESULT_MERGE(value, is_or, match) \
93 ((is_or) ? ((value) || (match)) : ((value) && (match)))
94
95 /*
96 * When processing a list of clauses, the bitmap item may get set to a value
97 * such that additional clauses can't change it. For example, when processing
98 * a list of clauses connected to AND, as soon as the item gets set to 'false'
99 * then it'll remain like that. Similarly clauses connected by OR and 'true'.
100 *
101 * Returns true when the value in the bitmap can't change no matter how the
102 * remaining clauses are evaluated.
103 */
104 #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
105
106 /*
107 * get_mincount_for_mcv_list
108 * Determine the minimum number of times a value needs to appear in
109 * the sample for it to be included in the MCV list.
110 *
111 * We want to keep only values that appear sufficiently often in the
112 * sample that it is reasonable to extrapolate their sample frequencies to
113 * the entire table. We do this by placing an upper bound on the relative
114 * standard error of the sample frequency, so that any estimates the
115 * planner generates from the MCV statistics can be expected to be
116 * reasonably accurate.
117 *
118 * Since we are sampling without replacement, the sample frequency of a
119 * particular value is described by a hypergeometric distribution. A
120 * common rule of thumb when estimating errors in this situation is to
121 * require at least 10 instances of the value in the sample, in which case
122 * the distribution can be approximated by a normal distribution, and
123 * standard error analysis techniques can be applied. Given a sample size
124 * of n, a population size of N, and a sample frequency of p=cnt/n, the
125 * standard error of the proportion p is given by
126 * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
127 * where the second term is the finite population correction. To get
128 * reasonably accurate planner estimates, we impose an upper bound on the
129 * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
130 * error bound is fairly arbitrary, but has been found empirically to work
131 * well. Rearranging this formula gives a lower bound on the number of
132 * instances of the value seen:
133 * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
134 * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
135 * case where n approaches 0 cannot happen in practice, since the sample
136 * size is at least 300. The case where n approaches N corresponds to
137 * sampling the whole the table, in which case it is reasonable to keep
138 * the whole MCV list (have no lower bound), so it makes sense to apply
139 * this formula for all inputs, even though the above derivation is
140 * technically only valid when the right hand side is at least around 10.
141 *
142 * An alternative way to look at this formula is as follows -- assume that
143 * the number of instances of the value seen scales up to the entire
144 * table, so that the population count is K=N*cnt/n. Then the distribution
145 * in the sample is a hypergeometric distribution parameterised by N, n
146 * and K, and the bound above is mathematically equivalent to demanding
147 * that the standard deviation of that distribution is less than 20% of
148 * its mean. Thus the relative errors in any planner estimates produced
149 * from the MCV statistics are likely to be not too large.
150 */
151 static double
get_mincount_for_mcv_list(int samplerows,double totalrows)152 get_mincount_for_mcv_list(int samplerows, double totalrows)
153 {
154 double n = samplerows;
155 double N = totalrows;
156 double numer,
157 denom;
158
159 numer = n * (N - n);
160 denom = N - n + 0.04 * n * (N - 1);
161
162 /* Guard against division by zero (possible if n = N = 1) */
163 if (denom == 0.0)
164 return 0.0;
165
166 return numer / denom;
167 }
168
169 /*
170 * Builds MCV list from the set of sampled rows.
171 *
172 * The algorithm is quite simple:
173 *
174 * (1) sort the data (default collation, '<' for the data type)
175 *
176 * (2) count distinct groups, decide how many to keep
177 *
178 * (3) build the MCV list using the threshold determined in (2)
179 *
180 * (4) remove rows represented by the MCV from the sample
181 *
182 */
183 MCVList *
statext_mcv_build(StatsBuildData * data,double totalrows,int stattarget)184 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
185 {
186 int i,
187 numattrs,
188 numrows,
189 ngroups,
190 nitems;
191 double mincount;
192 SortItem *items;
193 SortItem *groups;
194 MCVList *mcvlist = NULL;
195 MultiSortSupport mss;
196
197 /* comparator for all the columns */
198 mss = build_mss(data);
199
200 /* sort the rows */
201 items = build_sorted_items(data, &nitems, mss,
202 data->nattnums, data->attnums);
203
204 if (!items)
205 return NULL;
206
207 /* for convenience */
208 numattrs = data->nattnums;
209 numrows = data->numrows;
210
211 /* transform the sorted rows into groups (sorted by frequency) */
212 groups = build_distinct_groups(nitems, items, mss, &ngroups);
213
214 /*
215 * The maximum number of MCV items to store, based on the statistics
216 * target we computed for the statistics object (from the target set for
217 * the object itself, attributes and the system default). In any case, we
218 * can't keep more groups than we have available.
219 */
220 nitems = stattarget;
221 if (nitems > ngroups)
222 nitems = ngroups;
223
224 /*
225 * Decide how many items to keep in the MCV list. We can't use the same
226 * algorithm as per-column MCV lists, because that only considers the
227 * actual group frequency - but we're primarily interested in how the
228 * actual frequency differs from the base frequency (product of simple
229 * per-column frequencies, as if the columns were independent).
230 *
231 * Using the same algorithm might exclude items that are close to the
232 * "average" frequency of the sample. But that does not say whether the
233 * observed frequency is close to the base frequency or not. We also need
234 * to consider unexpectedly uncommon items (again, compared to the base
235 * frequency), and the single-column algorithm does not have to.
236 *
237 * We simply decide how many items to keep by computing the minimum count
238 * using get_mincount_for_mcv_list() and then keep all items that seem to
239 * be more common than that.
240 */
241 mincount = get_mincount_for_mcv_list(numrows, totalrows);
242
243 /*
244 * Walk the groups until we find the first group with a count below the
245 * mincount threshold (the index of that group is the number of groups we
246 * want to keep).
247 */
248 for (i = 0; i < nitems; i++)
249 {
250 if (groups[i].count < mincount)
251 {
252 nitems = i;
253 break;
254 }
255 }
256
257 /*
258 * At this point, we know the number of items for the MCV list. There
259 * might be none (for uniform distribution with many groups), and in that
260 * case, there will be no MCV list. Otherwise, construct the MCV list.
261 */
262 if (nitems > 0)
263 {
264 int j;
265 SortItem key;
266 MultiSortSupport tmp;
267
268 /* frequencies for values in each attribute */
269 SortItem **freqs;
270 int *nfreqs;
271
272 /* used to search values */
273 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
274 + sizeof(SortSupportData));
275
276 /* compute frequencies for values in each column */
277 nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279
280 /*
281 * Allocate the MCV list structure, set the global parameters.
282 */
283 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284 sizeof(MCVItem) * nitems);
285
286 mcvlist->magic = STATS_MCV_MAGIC;
287 mcvlist->type = STATS_MCV_TYPE_BASIC;
288 mcvlist->ndimensions = numattrs;
289 mcvlist->nitems = nitems;
290
291 /* store info about data type OIDs */
292 for (i = 0; i < numattrs; i++)
293 mcvlist->types[i] = data->stats[i]->attrtypid;
294
295 /* Copy the first chunk of groups into the result. */
296 for (i = 0; i < nitems; i++)
297 {
298 /* just pointer to the proper place in the list */
299 MCVItem *item = &mcvlist->items[i];
300
301 item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302 item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303
304 /* copy values for the group */
305 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307
308 /* groups should be sorted by frequency in descending order */
309 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310
311 /* group frequency */
312 item->frequency = (double) groups[i].count / numrows;
313
314 /* base frequency, if the attributes were independent */
315 item->base_frequency = 1.0;
316 for (j = 0; j < numattrs; j++)
317 {
318 SortItem *freq;
319
320 /* single dimension */
321 tmp->ndims = 1;
322 tmp->ssup[0] = mss->ssup[j];
323
324 /* fill search key */
325 key.values = &groups[i].values[j];
326 key.isnull = &groups[i].isnull[j];
327
328 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329 sizeof(SortItem),
330 multi_sort_compare, tmp);
331
332 item->base_frequency *= ((double) freq->count) / numrows;
333 }
334 }
335
336 pfree(nfreqs);
337 pfree(freqs);
338 }
339
340 pfree(items);
341 pfree(groups);
342
343 return mcvlist;
344 }
345
346 /*
347 * build_mss
348 * Build a MultiSortSupport for the given StatsBuildData.
349 */
350 static MultiSortSupport
build_mss(StatsBuildData * data)351 build_mss(StatsBuildData *data)
352 {
353 int i;
354 int numattrs = data->nattnums;
355
356 /* Sort by multiple columns (using array of SortSupport) */
357 MultiSortSupport mss = multi_sort_init(numattrs);
358
359 /* prepare the sort functions for all the attributes */
360 for (i = 0; i < numattrs; i++)
361 {
362 VacAttrStats *colstat = data->stats[i];
363 TypeCacheEntry *type;
364
365 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
366 if (type->lt_opr == InvalidOid) /* shouldn't happen */
367 elog(ERROR, "cache lookup failed for ordering operator for type %u",
368 colstat->attrtypid);
369
370 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
371 }
372
373 return mss;
374 }
375
376 /*
377 * count_distinct_groups
378 * Count distinct combinations of SortItems in the array.
379 *
380 * The array is assumed to be sorted according to the MultiSortSupport.
381 */
382 static int
count_distinct_groups(int numrows,SortItem * items,MultiSortSupport mss)383 count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
384 {
385 int i;
386 int ndistinct;
387
388 ndistinct = 1;
389 for (i = 1; i < numrows; i++)
390 {
391 /* make sure the array really is sorted */
392 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
393
394 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
395 ndistinct += 1;
396 }
397
398 return ndistinct;
399 }
400
401 /*
402 * compare_sort_item_count
403 * Comparator for sorting items by count (frequencies) in descending
404 * order.
405 */
406 static int
compare_sort_item_count(const void * a,const void * b)407 compare_sort_item_count(const void *a, const void *b)
408 {
409 SortItem *ia = (SortItem *) a;
410 SortItem *ib = (SortItem *) b;
411
412 if (ia->count == ib->count)
413 return 0;
414 else if (ia->count > ib->count)
415 return -1;
416
417 return 1;
418 }
419
420 /*
421 * build_distinct_groups
422 * Build an array of SortItems for distinct groups and counts matching
423 * items.
424 *
425 * The 'items' array is assumed to be sorted.
426 */
427 static SortItem *
build_distinct_groups(int numrows,SortItem * items,MultiSortSupport mss,int * ndistinct)428 build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
429 int *ndistinct)
430 {
431 int i,
432 j;
433 int ngroups = count_distinct_groups(numrows, items, mss);
434
435 SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
436
437 j = 0;
438 groups[0] = items[0];
439 groups[0].count = 1;
440
441 for (i = 1; i < numrows; i++)
442 {
443 /* Assume sorted in ascending order. */
444 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
445
446 /* New distinct group detected. */
447 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
448 {
449 groups[++j] = items[i];
450 groups[j].count = 0;
451 }
452
453 groups[j].count++;
454 }
455
456 /* ensure we filled the expected number of distinct groups */
457 Assert(j + 1 == ngroups);
458
459 /* Sort the distinct groups by frequency (in descending order). */
460 pg_qsort((void *) groups, ngroups, sizeof(SortItem),
461 compare_sort_item_count);
462
463 *ndistinct = ngroups;
464 return groups;
465 }
466
467 /* compare sort items (single dimension) */
468 static int
sort_item_compare(const void * a,const void * b,void * arg)469 sort_item_compare(const void *a, const void *b, void *arg)
470 {
471 SortSupport ssup = (SortSupport) arg;
472 SortItem *ia = (SortItem *) a;
473 SortItem *ib = (SortItem *) b;
474
475 return ApplySortComparator(ia->values[0], ia->isnull[0],
476 ib->values[0], ib->isnull[0],
477 ssup);
478 }
479
480 /*
481 * build_column_frequencies
482 * Compute frequencies of values in each column.
483 *
484 * This returns an array of SortItems for each attribute the MCV is built
485 * on, with a frequency (number of occurrences) for each value. This is
486 * then used to compute "base" frequency of MCV items.
487 *
488 * All the memory is allocated in a single chunk, so that a single pfree
489 * is enough to release it. We do not allocate space for values/isnull
490 * arrays in the SortItems, because we can simply point into the input
491 * groups directly.
492 */
493 static SortItem **
build_column_frequencies(SortItem * groups,int ngroups,MultiSortSupport mss,int * ncounts)494 build_column_frequencies(SortItem *groups, int ngroups,
495 MultiSortSupport mss, int *ncounts)
496 {
497 int i,
498 dim;
499 SortItem **result;
500 char *ptr;
501
502 Assert(groups);
503 Assert(ncounts);
504
505 /* allocate arrays for all columns as a single chunk */
506 ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
507 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
508
509 /* initial array of pointers */
510 result = (SortItem **) ptr;
511 ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
512
513 for (dim = 0; dim < mss->ndims; dim++)
514 {
515 SortSupport ssup = &mss->ssup[dim];
516
517 /* array of values for a single column */
518 result[dim] = (SortItem *) ptr;
519 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
520
521 /* extract data for the dimension */
522 for (i = 0; i < ngroups; i++)
523 {
524 /* point into the input groups */
525 result[dim][i].values = &groups[i].values[dim];
526 result[dim][i].isnull = &groups[i].isnull[dim];
527 result[dim][i].count = groups[i].count;
528 }
529
530 /* sort the values, deduplicate */
531 qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
532 sort_item_compare, ssup);
533
534 /*
535 * Identify distinct values, compute frequency (there might be
536 * multiple MCV items containing this value, so we need to sum counts
537 * from all of them.
538 */
539 ncounts[dim] = 1;
540 for (i = 1; i < ngroups; i++)
541 {
542 if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
543 {
544 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
545 continue;
546 }
547
548 result[dim][ncounts[dim]] = result[dim][i];
549
550 ncounts[dim]++;
551 }
552 }
553
554 return result;
555 }
556
557 /*
558 * statext_mcv_load
559 * Load the MCV list for the indicated pg_statistic_ext tuple.
560 */
561 MCVList *
statext_mcv_load(Oid mvoid)562 statext_mcv_load(Oid mvoid)
563 {
564 MCVList *result;
565 bool isnull;
566 Datum mcvlist;
567 HeapTuple htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
568
569 if (!HeapTupleIsValid(htup))
570 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
571
572 mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
573 Anum_pg_statistic_ext_data_stxdmcv, &isnull);
574
575 if (isnull)
576 elog(ERROR,
577 "requested statistics kind \"%c\" is not yet built for statistics object %u",
578 STATS_EXT_DEPENDENCIES, mvoid);
579
580 result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
581
582 ReleaseSysCache(htup);
583
584 return result;
585 }
586
587
588 /*
589 * statext_mcv_serialize
590 * Serialize MCV list into a pg_mcv_list value.
591 *
592 * The MCV items may include values of various data types, and it's reasonable
593 * to expect redundancy (values for a given attribute, repeated for multiple
594 * MCV list items). So we deduplicate the values into arrays, and then replace
595 * the values by indexes into those arrays.
596 *
597 * The overall structure of the serialized representation looks like this:
598 *
599 * +---------------+----------------+---------------------+-------+
600 * | header fields | dimension info | deduplicated values | items |
601 * +---------------+----------------+---------------------+-------+
602 *
603 * Where dimension info stores information about the type of the K-th
604 * attribute (e.g. typlen, typbyval and length of deduplicated values).
605 * Deduplicated values store deduplicated values for each attribute. And
606 * items store the actual MCV list items, with values replaced by indexes into
607 * the arrays.
608 *
609 * When serializing the items, we use uint16 indexes. The number of MCV items
610 * is limited by the statistics target (which is capped to 10k at the moment).
611 * We might increase this to 65k and still fit into uint16, so there's a bit of
612 * slack. Furthermore, this limit is on the number of distinct values per column,
613 * and we usually have few of those (and various combinations of them for the
614 * those MCV list). So uint16 seems fine for now.
615 *
616 * We don't really expect the serialization to save as much space as for
617 * histograms, as we are not doing any bucket splits (which is the source
618 * of high redundancy in histograms).
619 *
620 * TODO: Consider packing boolean flags (NULL) for each item into a single char
621 * (or a longer type) instead of using an array of bool items.
622 */
623 bytea *
statext_mcv_serialize(MCVList * mcvlist,VacAttrStats ** stats)624 statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
625 {
626 int i;
627 int dim;
628 int ndims = mcvlist->ndimensions;
629
630 SortSupport ssup;
631 DimensionInfo *info;
632
633 Size total_length;
634
635 /* serialized items (indexes into arrays, etc.) */
636 bytea *raw;
637 char *ptr;
638 char *endptr PG_USED_FOR_ASSERTS_ONLY;
639
640 /* values per dimension (and number of non-NULL values) */
641 Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
642 int *counts = (int *) palloc0(sizeof(int) * ndims);
643
644 /*
645 * We'll include some rudimentary information about the attribute types
646 * (length, by-val flag), so that we don't have to look them up while
647 * deserializing the MCV list (we already have the type OID in the
648 * header). This is safe because when changing the type of the attribute
649 * the statistics gets dropped automatically. We need to store the info
650 * about the arrays of deduplicated values anyway.
651 */
652 info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
653
654 /* sort support data for all attributes included in the MCV list */
655 ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
656
657 /* collect and deduplicate values for each dimension (attribute) */
658 for (dim = 0; dim < ndims; dim++)
659 {
660 int ndistinct;
661 TypeCacheEntry *typentry;
662
663 /*
664 * Lookup the LT operator (can't get it from stats extra_data, as we
665 * don't know how to interpret that - scalar vs. array etc.).
666 */
667 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
668
669 /* copy important info about the data type (length, by-value) */
670 info[dim].typlen = stats[dim]->attrtype->typlen;
671 info[dim].typbyval = stats[dim]->attrtype->typbyval;
672
673 /* allocate space for values in the attribute and collect them */
674 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
675
676 for (i = 0; i < mcvlist->nitems; i++)
677 {
678 /* skip NULL values - we don't need to deduplicate those */
679 if (mcvlist->items[i].isnull[dim])
680 continue;
681
682 /* append the value at the end */
683 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
684 counts[dim] += 1;
685 }
686
687 /* if there are just NULL values in this dimension, we're done */
688 if (counts[dim] == 0)
689 continue;
690
691 /* sort and deduplicate the data */
692 ssup[dim].ssup_cxt = CurrentMemoryContext;
693 ssup[dim].ssup_collation = stats[dim]->attrcollid;
694 ssup[dim].ssup_nulls_first = false;
695
696 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
697
698 qsort_arg(values[dim], counts[dim], sizeof(Datum),
699 compare_scalars_simple, &ssup[dim]);
700
701 /*
702 * Walk through the array and eliminate duplicate values, but keep the
703 * ordering (so that we can do a binary search later). We know there's
704 * at least one item as (counts[dim] != 0), so we can skip the first
705 * element.
706 */
707 ndistinct = 1; /* number of distinct values */
708 for (i = 1; i < counts[dim]; i++)
709 {
710 /* expect sorted array */
711 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
712
713 /* if the value is the same as the previous one, we can skip it */
714 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
715 continue;
716
717 values[dim][ndistinct] = values[dim][i];
718 ndistinct += 1;
719 }
720
721 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
722 Assert(ndistinct <= PG_UINT16_MAX);
723
724 /*
725 * Store additional info about the attribute - number of deduplicated
726 * values, and also size of the serialized data. For fixed-length data
727 * types this is trivial to compute, for varwidth types we need to
728 * actually walk the array and sum the sizes.
729 */
730 info[dim].nvalues = ndistinct;
731
732 if (info[dim].typbyval) /* by-value data types */
733 {
734 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
735
736 /*
737 * We copy the data into the MCV item during deserialization, so
738 * we don't need to allocate any extra space.
739 */
740 info[dim].nbytes_aligned = 0;
741 }
742 else if (info[dim].typlen > 0) /* fixed-length by-ref */
743 {
744 /*
745 * We don't care about alignment in the serialized data, so we
746 * pack the data as much as possible. But we also track how much
747 * data will be needed after deserialization, and in that case we
748 * need to account for alignment of each item.
749 *
750 * Note: As the items are fixed-length, we could easily compute
751 * this during deserialization, but we do it here anyway.
752 */
753 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
754 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
755 }
756 else if (info[dim].typlen == -1) /* varlena */
757 {
758 info[dim].nbytes = 0;
759 info[dim].nbytes_aligned = 0;
760 for (i = 0; i < info[dim].nvalues; i++)
761 {
762 Size len;
763
764 /*
765 * For varlena values, we detoast the values and store the
766 * length and data separately. We don't bother with alignment
767 * here, which means that during deserialization we need to
768 * copy the fields and only access the copies.
769 */
770 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
771
772 /* serialized length (uint32 length + data) */
773 len = VARSIZE_ANY_EXHDR(values[dim][i]);
774 info[dim].nbytes += sizeof(uint32); /* length */
775 info[dim].nbytes += len; /* value (no header) */
776
777 /*
778 * During deserialization we'll build regular varlena values
779 * with full headers, and we need to align them properly.
780 */
781 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
782 }
783 }
784 else if (info[dim].typlen == -2) /* cstring */
785 {
786 info[dim].nbytes = 0;
787 info[dim].nbytes_aligned = 0;
788 for (i = 0; i < info[dim].nvalues; i++)
789 {
790 Size len;
791
792 /*
793 * cstring is handled similar to varlena - first we store the
794 * length as uint32 and then the data. We don't care about
795 * alignment, which means that during deserialization we need
796 * to copy the fields and only access the copies.
797 */
798
799 /* c-strings include terminator, so +1 byte */
800 len = strlen(DatumGetCString(values[dim][i])) + 1;
801 info[dim].nbytes += sizeof(uint32); /* length */
802 info[dim].nbytes += len; /* value */
803
804 /* space needed for properly aligned deserialized copies */
805 info[dim].nbytes_aligned += MAXALIGN(len);
806 }
807 }
808
809 /* we know (count>0) so there must be some data */
810 Assert(info[dim].nbytes > 0);
811 }
812
813 /*
814 * Now we can finally compute how much space we'll actually need for the
815 * whole serialized MCV list (varlena header, MCV header, dimension info
816 * for each attribute, deduplicated values and items).
817 */
818 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
819 + sizeof(AttrNumber) /* ndimensions */
820 + (ndims * sizeof(Oid)); /* attribute types */
821
822 /* dimension info */
823 total_length += ndims * sizeof(DimensionInfo);
824
825 /* add space for the arrays of deduplicated values */
826 for (i = 0; i < ndims; i++)
827 total_length += info[i].nbytes;
828
829 /*
830 * And finally account for the items (those are fixed-length, thanks to
831 * replacing values with uint16 indexes into the deduplicated arrays).
832 */
833 total_length += mcvlist->nitems * ITEM_SIZE(dim);
834
835 /*
836 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
837 * we set them to zero to make the result more compressible).
838 */
839 raw = (bytea *) palloc0(VARHDRSZ + total_length);
840 SET_VARSIZE(raw, VARHDRSZ + total_length);
841
842 ptr = VARDATA(raw);
843 endptr = ptr + total_length;
844
845 /* copy the MCV list header fields, one by one */
846 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
847 ptr += sizeof(uint32);
848
849 memcpy(ptr, &mcvlist->type, sizeof(uint32));
850 ptr += sizeof(uint32);
851
852 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
853 ptr += sizeof(uint32);
854
855 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
856 ptr += sizeof(AttrNumber);
857
858 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
859 ptr += (sizeof(Oid) * ndims);
860
861 /* store information about the attributes (data amounts, ...) */
862 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
863 ptr += sizeof(DimensionInfo) * ndims;
864
865 /* Copy the deduplicated values for all attributes to the output. */
866 for (dim = 0; dim < ndims; dim++)
867 {
868 /* remember the starting point for Asserts later */
869 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
870
871 for (i = 0; i < info[dim].nvalues; i++)
872 {
873 Datum value = values[dim][i];
874
875 if (info[dim].typbyval) /* passed by value */
876 {
877 Datum tmp;
878
879 /*
880 * For byval types, we need to copy just the significant bytes
881 * - we can't use memcpy directly, as that assumes
882 * little-endian behavior. store_att_byval does almost what
883 * we need, but it requires a properly aligned buffer - the
884 * output buffer does not guarantee that. So we simply use a
885 * local Datum variable (which guarantees proper alignment),
886 * and then copy the value from it.
887 */
888 store_att_byval(&tmp, value, info[dim].typlen);
889
890 memcpy(ptr, &tmp, info[dim].typlen);
891 ptr += info[dim].typlen;
892 }
893 else if (info[dim].typlen > 0) /* passed by reference */
894 {
895 /* no special alignment needed, treated as char array */
896 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
897 ptr += info[dim].typlen;
898 }
899 else if (info[dim].typlen == -1) /* varlena */
900 {
901 uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
902
903 /* copy the length */
904 memcpy(ptr, &len, sizeof(uint32));
905 ptr += sizeof(uint32);
906
907 /* data from the varlena value (without the header) */
908 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
909 ptr += len;
910 }
911 else if (info[dim].typlen == -2) /* cstring */
912 {
913 uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
914
915 /* copy the length */
916 memcpy(ptr, &len, sizeof(uint32));
917 ptr += sizeof(uint32);
918
919 /* value */
920 memcpy(ptr, DatumGetCString(value), len);
921 ptr += len;
922 }
923
924 /* no underflows or overflows */
925 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
926 }
927
928 /* we should get exactly nbytes of data for this dimension */
929 Assert((ptr - start) == info[dim].nbytes);
930 }
931
932 /* Serialize the items, with uint16 indexes instead of the values. */
933 for (i = 0; i < mcvlist->nitems; i++)
934 {
935 MCVItem *mcvitem = &mcvlist->items[i];
936
937 /* don't write beyond the allocated space */
938 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
939
940 /* copy NULL and frequency flags into the serialized MCV */
941 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
942 ptr += sizeof(bool) * ndims;
943
944 memcpy(ptr, &mcvitem->frequency, sizeof(double));
945 ptr += sizeof(double);
946
947 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
948 ptr += sizeof(double);
949
950 /* store the indexes last */
951 for (dim = 0; dim < ndims; dim++)
952 {
953 uint16 index = 0;
954 Datum *value;
955
956 /* do the lookup only for non-NULL values */
957 if (!mcvitem->isnull[dim])
958 {
959 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
960 info[dim].nvalues, sizeof(Datum),
961 compare_scalars_simple, &ssup[dim]);
962
963 Assert(value != NULL); /* serialization or deduplication
964 * error */
965
966 /* compute index within the deduplicated array */
967 index = (uint16) (value - values[dim]);
968
969 /* check the index is within expected bounds */
970 Assert(index < info[dim].nvalues);
971 }
972
973 /* copy the index into the serialized MCV */
974 memcpy(ptr, &index, sizeof(uint16));
975 ptr += sizeof(uint16);
976 }
977
978 /* make sure we don't overflow the allocated value */
979 Assert(ptr <= endptr);
980 }
981
982 /* at this point we expect to match the total_length exactly */
983 Assert(ptr == endptr);
984
985 pfree(values);
986 pfree(counts);
987
988 return raw;
989 }
990
991 /*
992 * statext_mcv_deserialize
993 * Reads serialized MCV list into MCVList structure.
994 *
995 * All the memory needed by the MCV list is allocated as a single chunk, so
996 * it's possible to simply pfree() it at once.
997 */
998 MCVList *
statext_mcv_deserialize(bytea * data)999 statext_mcv_deserialize(bytea *data)
1000 {
1001 int dim,
1002 i;
1003 Size expected_size;
1004 MCVList *mcvlist;
1005 char *raw;
1006 char *ptr;
1007 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1008
1009 int ndims,
1010 nitems;
1011 DimensionInfo *info = NULL;
1012
1013 /* local allocation buffer (used only for deserialization) */
1014 Datum **map = NULL;
1015
1016 /* MCV list */
1017 Size mcvlen;
1018
1019 /* buffer used for the result */
1020 Size datalen;
1021 char *dataptr;
1022 char *valuesptr;
1023 char *isnullptr;
1024
1025 if (data == NULL)
1026 return NULL;
1027
1028 /*
1029 * We can't possibly deserialize a MCV list if there's not even a complete
1030 * header. We need an explicit formula here, because we serialize the
1031 * header fields one by one, so we need to ignore struct alignment.
1032 */
1033 if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1034 elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1035 VARSIZE_ANY(data), MinSizeOfMCVList);
1036
1037 /* read the MCV list header */
1038 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1039
1040 /* pointer to the data part (skip the varlena header) */
1041 raw = (char *) data;
1042 ptr = VARDATA_ANY(raw);
1043 endptr = (char *) raw + VARSIZE_ANY(data);
1044
1045 /* get the header and perform further sanity checks */
1046 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1047 ptr += sizeof(uint32);
1048
1049 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1050 ptr += sizeof(uint32);
1051
1052 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1053 ptr += sizeof(uint32);
1054
1055 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1056 ptr += sizeof(AttrNumber);
1057
1058 if (mcvlist->magic != STATS_MCV_MAGIC)
1059 elog(ERROR, "invalid MCV magic %u (expected %u)",
1060 mcvlist->magic, STATS_MCV_MAGIC);
1061
1062 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1063 elog(ERROR, "invalid MCV type %u (expected %u)",
1064 mcvlist->type, STATS_MCV_TYPE_BASIC);
1065
1066 if (mcvlist->ndimensions == 0)
1067 elog(ERROR, "invalid zero-length dimension array in MCVList");
1068 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1069 (mcvlist->ndimensions < 0))
1070 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1071 mcvlist->ndimensions);
1072
1073 if (mcvlist->nitems == 0)
1074 elog(ERROR, "invalid zero-length item array in MCVList");
1075 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1076 elog(ERROR, "invalid length (%u) item array in MCVList",
1077 mcvlist->nitems);
1078
1079 nitems = mcvlist->nitems;
1080 ndims = mcvlist->ndimensions;
1081
1082 /*
1083 * Check amount of data including DimensionInfo for all dimensions and
1084 * also the serialized items (including uint16 indexes). Also, walk
1085 * through the dimension information and add it to the sum.
1086 */
1087 expected_size = SizeOfMCVList(ndims, nitems);
1088
1089 /*
1090 * Check that we have at least the dimension and info records, along with
1091 * the items. We don't know the size of the serialized values yet. We need
1092 * to do this check first, before accessing the dimension info.
1093 */
1094 if (VARSIZE_ANY(data) < expected_size)
1095 elog(ERROR, "invalid MCV size %zd (expected %zu)",
1096 VARSIZE_ANY(data), expected_size);
1097
1098 /* Now copy the array of type Oids. */
1099 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1100 ptr += (sizeof(Oid) * ndims);
1101
1102 /* Now it's safe to access the dimension info. */
1103 info = palloc(ndims * sizeof(DimensionInfo));
1104
1105 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1106 ptr += (ndims * sizeof(DimensionInfo));
1107
1108 /* account for the value arrays */
1109 for (dim = 0; dim < ndims; dim++)
1110 {
1111 /*
1112 * XXX I wonder if we can/should rely on asserts here. Maybe those
1113 * checks should be done every time?
1114 */
1115 Assert(info[dim].nvalues >= 0);
1116 Assert(info[dim].nbytes >= 0);
1117
1118 expected_size += info[dim].nbytes;
1119 }
1120
1121 /*
1122 * Now we know the total expected MCV size, including all the pieces
1123 * (header, dimension info. items and deduplicated data). So do the final
1124 * check on size.
1125 */
1126 if (VARSIZE_ANY(data) != expected_size)
1127 elog(ERROR, "invalid MCV size %zd (expected %zu)",
1128 VARSIZE_ANY(data), expected_size);
1129
1130 /*
1131 * We need an array of Datum values for each dimension, so that we can
1132 * easily translate the uint16 indexes later. We also need a top-level
1133 * array of pointers to those per-dimension arrays.
1134 *
1135 * While allocating the arrays for dimensions, compute how much space we
1136 * need for a copy of the by-ref data, as we can't simply point to the
1137 * original values (it might go away).
1138 */
1139 datalen = 0; /* space for by-ref data */
1140 map = (Datum **) palloc(ndims * sizeof(Datum *));
1141
1142 for (dim = 0; dim < ndims; dim++)
1143 {
1144 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1145
1146 /* space needed for a copy of data for by-ref types */
1147 datalen += info[dim].nbytes_aligned;
1148 }
1149
1150 /*
1151 * Now resize the MCV list so that the allocation includes all the data.
1152 *
1153 * Allocate space for a copy of the data, as we can't simply reference the
1154 * serialized data - it's not aligned properly, and it may disappear while
1155 * we're still using the MCV list, e.g. due to catcache release.
1156 *
1157 * We do care about alignment here, because we will allocate all the
1158 * pieces at once, but then use pointers to different parts.
1159 */
1160 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1161
1162 /* arrays of values and isnull flags for all MCV items */
1163 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1164 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1165
1166 /* we don't quite need to align this, but it makes some asserts easier */
1167 mcvlen += MAXALIGN(datalen);
1168
1169 /* now resize the deserialized MCV list, and compute pointers to parts */
1170 mcvlist = repalloc(mcvlist, mcvlen);
1171
1172 /* pointer to the beginning of values/isnull arrays */
1173 valuesptr = (char *) mcvlist
1174 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1175
1176 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1177
1178 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1179
1180 /*
1181 * Build mapping (index => value) for translating the serialized data into
1182 * the in-memory representation.
1183 */
1184 for (dim = 0; dim < ndims; dim++)
1185 {
1186 /* remember start position in the input array */
1187 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1188
1189 if (info[dim].typbyval)
1190 {
1191 /* for by-val types we simply copy data into the mapping */
1192 for (i = 0; i < info[dim].nvalues; i++)
1193 {
1194 Datum v = 0;
1195
1196 memcpy(&v, ptr, info[dim].typlen);
1197 ptr += info[dim].typlen;
1198
1199 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1200
1201 /* no under/overflow of input array */
1202 Assert(ptr <= (start + info[dim].nbytes));
1203 }
1204 }
1205 else
1206 {
1207 /* for by-ref types we need to also make a copy of the data */
1208
1209 /* passed by reference, but fixed length (name, tid, ...) */
1210 if (info[dim].typlen > 0)
1211 {
1212 for (i = 0; i < info[dim].nvalues; i++)
1213 {
1214 memcpy(dataptr, ptr, info[dim].typlen);
1215 ptr += info[dim].typlen;
1216
1217 /* just point into the array */
1218 map[dim][i] = PointerGetDatum(dataptr);
1219 dataptr += MAXALIGN(info[dim].typlen);
1220 }
1221 }
1222 else if (info[dim].typlen == -1)
1223 {
1224 /* varlena */
1225 for (i = 0; i < info[dim].nvalues; i++)
1226 {
1227 uint32 len;
1228
1229 /* read the uint32 length */
1230 memcpy(&len, ptr, sizeof(uint32));
1231 ptr += sizeof(uint32);
1232
1233 /* the length is data-only */
1234 SET_VARSIZE(dataptr, len + VARHDRSZ);
1235 memcpy(VARDATA(dataptr), ptr, len);
1236 ptr += len;
1237
1238 /* just point into the array */
1239 map[dim][i] = PointerGetDatum(dataptr);
1240
1241 /* skip to place of the next deserialized value */
1242 dataptr += MAXALIGN(len + VARHDRSZ);
1243 }
1244 }
1245 else if (info[dim].typlen == -2)
1246 {
1247 /* cstring */
1248 for (i = 0; i < info[dim].nvalues; i++)
1249 {
1250 uint32 len;
1251
1252 memcpy(&len, ptr, sizeof(uint32));
1253 ptr += sizeof(uint32);
1254
1255 memcpy(dataptr, ptr, len);
1256 ptr += len;
1257
1258 /* just point into the array */
1259 map[dim][i] = PointerGetDatum(dataptr);
1260 dataptr += MAXALIGN(len);
1261 }
1262 }
1263
1264 /* no under/overflow of input array */
1265 Assert(ptr <= (start + info[dim].nbytes));
1266
1267 /* no overflow of the output mcv value */
1268 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1269 }
1270
1271 /* check we consumed input data for this dimension exactly */
1272 Assert(ptr == (start + info[dim].nbytes));
1273 }
1274
1275 /* we should have also filled the MCV list exactly */
1276 Assert(dataptr == ((char *) mcvlist + mcvlen));
1277
1278 /* deserialize the MCV items and translate the indexes to Datums */
1279 for (i = 0; i < nitems; i++)
1280 {
1281 MCVItem *item = &mcvlist->items[i];
1282
1283 item->values = (Datum *) valuesptr;
1284 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1285
1286 item->isnull = (bool *) isnullptr;
1287 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1288
1289 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1290 ptr += sizeof(bool) * ndims;
1291
1292 memcpy(&item->frequency, ptr, sizeof(double));
1293 ptr += sizeof(double);
1294
1295 memcpy(&item->base_frequency, ptr, sizeof(double));
1296 ptr += sizeof(double);
1297
1298 /* finally translate the indexes (for non-NULL only) */
1299 for (dim = 0; dim < ndims; dim++)
1300 {
1301 uint16 index;
1302
1303 memcpy(&index, ptr, sizeof(uint16));
1304 ptr += sizeof(uint16);
1305
1306 if (item->isnull[dim])
1307 continue;
1308
1309 item->values[dim] = map[dim][index];
1310 }
1311
1312 /* check we're not overflowing the input */
1313 Assert(ptr <= endptr);
1314 }
1315
1316 /* check that we processed all the data */
1317 Assert(ptr == endptr);
1318
1319 /* release the buffers used for mapping */
1320 for (dim = 0; dim < ndims; dim++)
1321 pfree(map[dim]);
1322
1323 pfree(map);
1324
1325 return mcvlist;
1326 }
1327
1328 /*
1329 * SRF with details about buckets of a histogram:
1330 *
1331 * - item ID (0...nitems)
1332 * - values (string array)
1333 * - nulls only (boolean array)
1334 * - frequency (double precision)
1335 * - base_frequency (double precision)
1336 *
1337 * The input is the OID of the statistics, and there are no rows returned if
1338 * the statistics contains no histogram.
1339 */
1340 Datum
pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)1341 pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
1342 {
1343 FuncCallContext *funcctx;
1344
1345 /* stuff done only on the first call of the function */
1346 if (SRF_IS_FIRSTCALL())
1347 {
1348 MemoryContext oldcontext;
1349 MCVList *mcvlist;
1350 TupleDesc tupdesc;
1351
1352 /* create a function context for cross-call persistence */
1353 funcctx = SRF_FIRSTCALL_INIT();
1354
1355 /* switch to memory context appropriate for multiple function calls */
1356 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1357
1358 mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1359
1360 funcctx->user_fctx = mcvlist;
1361
1362 /* total number of tuples to be returned */
1363 funcctx->max_calls = 0;
1364 if (funcctx->user_fctx != NULL)
1365 funcctx->max_calls = mcvlist->nitems;
1366
1367 /* Build a tuple descriptor for our result type */
1368 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1369 ereport(ERROR,
1370 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1371 errmsg("function returning record called in context "
1372 "that cannot accept type record")));
1373 tupdesc = BlessTupleDesc(tupdesc);
1374
1375 /*
1376 * generate attribute metadata needed later to produce tuples from raw
1377 * C strings
1378 */
1379 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1380
1381 MemoryContextSwitchTo(oldcontext);
1382 }
1383
1384 /* stuff done on every call of the function */
1385 funcctx = SRF_PERCALL_SETUP();
1386
1387 if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1388 * left to send */
1389 {
1390 Datum values[5];
1391 bool nulls[5];
1392 HeapTuple tuple;
1393 Datum result;
1394 ArrayBuildState *astate_values = NULL;
1395 ArrayBuildState *astate_nulls = NULL;
1396
1397 int i;
1398 MCVList *mcvlist;
1399 MCVItem *item;
1400
1401 mcvlist = (MCVList *) funcctx->user_fctx;
1402
1403 Assert(funcctx->call_cntr < mcvlist->nitems);
1404
1405 item = &mcvlist->items[funcctx->call_cntr];
1406
1407 for (i = 0; i < mcvlist->ndimensions; i++)
1408 {
1409
1410 astate_nulls = accumArrayResult(astate_nulls,
1411 BoolGetDatum(item->isnull[i]),
1412 false,
1413 BOOLOID,
1414 CurrentMemoryContext);
1415
1416 if (!item->isnull[i])
1417 {
1418 bool isvarlena;
1419 Oid outfunc;
1420 FmgrInfo fmgrinfo;
1421 Datum val;
1422 text *txt;
1423
1424 /* lookup output func for the type */
1425 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1426 fmgr_info(outfunc, &fmgrinfo);
1427
1428 val = FunctionCall1(&fmgrinfo, item->values[i]);
1429 txt = cstring_to_text(DatumGetPointer(val));
1430
1431 astate_values = accumArrayResult(astate_values,
1432 PointerGetDatum(txt),
1433 false,
1434 TEXTOID,
1435 CurrentMemoryContext);
1436 }
1437 else
1438 astate_values = accumArrayResult(astate_values,
1439 (Datum) 0,
1440 true,
1441 TEXTOID,
1442 CurrentMemoryContext);
1443 }
1444
1445 values[0] = Int32GetDatum(funcctx->call_cntr);
1446 values[1] = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
1447 values[2] = PointerGetDatum(makeArrayResult(astate_nulls, CurrentMemoryContext));
1448 values[3] = Float8GetDatum(item->frequency);
1449 values[4] = Float8GetDatum(item->base_frequency);
1450
1451 /* no NULLs in the tuple */
1452 memset(nulls, 0, sizeof(nulls));
1453
1454 /* build a tuple */
1455 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1456
1457 /* make the tuple into a datum */
1458 result = HeapTupleGetDatum(tuple);
1459
1460 SRF_RETURN_NEXT(funcctx, result);
1461 }
1462 else /* do when there is no more left */
1463 {
1464 SRF_RETURN_DONE(funcctx);
1465 }
1466 }
1467
1468 /*
1469 * pg_mcv_list_in - input routine for type pg_mcv_list.
1470 *
1471 * pg_mcv_list is real enough to be a table column, but it has no operations
1472 * of its own, and disallows input too
1473 */
1474 Datum
pg_mcv_list_in(PG_FUNCTION_ARGS)1475 pg_mcv_list_in(PG_FUNCTION_ARGS)
1476 {
1477 /*
1478 * pg_mcv_list stores the data in binary form and parsing text input is
1479 * not needed, so disallow this.
1480 */
1481 ereport(ERROR,
1482 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1483 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1484
1485 PG_RETURN_VOID(); /* keep compiler quiet */
1486 }
1487
1488
1489 /*
1490 * pg_mcv_list_out - output routine for type pg_mcv_list.
1491 *
1492 * MCV lists are serialized into a bytea value, so we simply call byteaout()
1493 * to serialize the value into text. But it'd be nice to serialize that into
1494 * a meaningful representation (e.g. for inspection by people).
1495 *
1496 * XXX This should probably return something meaningful, similar to what
1497 * pg_dependencies_out does. Not sure how to deal with the deduplicated
1498 * values, though - do we want to expand that or not?
1499 */
1500 Datum
pg_mcv_list_out(PG_FUNCTION_ARGS)1501 pg_mcv_list_out(PG_FUNCTION_ARGS)
1502 {
1503 return byteaout(fcinfo);
1504 }
1505
1506 /*
1507 * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1508 */
1509 Datum
pg_mcv_list_recv(PG_FUNCTION_ARGS)1510 pg_mcv_list_recv(PG_FUNCTION_ARGS)
1511 {
1512 ereport(ERROR,
1513 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1514 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1515
1516 PG_RETURN_VOID(); /* keep compiler quiet */
1517 }
1518
1519 /*
1520 * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1521 *
1522 * MCV lists are serialized in a bytea value (although the type is named
1523 * differently), so let's just send that.
1524 */
1525 Datum
pg_mcv_list_send(PG_FUNCTION_ARGS)1526 pg_mcv_list_send(PG_FUNCTION_ARGS)
1527 {
1528 return byteasend(fcinfo);
1529 }
1530
1531 /*
1532 * match the attribute/expression to a dimension of the statistic
1533 *
1534 * Match the attribute/expression to statistics dimension. Optionally
1535 * determine the collation.
1536 */
1537 static int
mcv_match_expression(Node * expr,Bitmapset * keys,List * exprs,Oid * collid)1538 mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
1539 {
1540 int idx = -1;
1541
1542 if (IsA(expr, Var))
1543 {
1544 /* simple Var, so just lookup using varattno */
1545 Var *var = (Var *) expr;
1546
1547 if (collid)
1548 *collid = var->varcollid;
1549
1550 idx = bms_member_index(keys, var->varattno);
1551
1552 /* make sure the index is valid */
1553 Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1554 }
1555 else
1556 {
1557 ListCell *lc;
1558
1559 /* expressions are stored after the simple columns */
1560 idx = bms_num_members(keys);
1561
1562 if (collid)
1563 *collid = exprCollation(expr);
1564
1565 /* expression - lookup in stats expressions */
1566 foreach(lc, exprs)
1567 {
1568 Node *stat_expr = (Node *) lfirst(lc);
1569
1570 if (equal(expr, stat_expr))
1571 break;
1572
1573 idx++;
1574 }
1575
1576 /* make sure the index is valid */
1577 Assert((idx >= bms_num_members(keys)) &&
1578 (idx <= bms_num_members(keys) + list_length(exprs)));
1579 }
1580
1581 Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1582
1583 return idx;
1584 }
1585
1586 /*
1587 * mcv_get_match_bitmap
1588 * Evaluate clauses using the MCV list, and update the match bitmap.
1589 *
1590 * A match bitmap keeps match/mismatch status for each MCV item, and we
1591 * update it based on additional clauses. We also use it to skip items
1592 * that can't possibly match (e.g. item marked as "mismatch" can't change
1593 * to "match" when evaluating AND clause list).
1594 *
1595 * The function also returns a flag indicating whether there was an
1596 * equality condition for all attributes, the minimum frequency in the MCV
1597 * list, and a total MCV frequency (sum of frequencies for all items).
1598 *
1599 * XXX Currently the match bitmap uses a bool for each MCV item, which is
1600 * somewhat wasteful as we could do with just a single bit, thus reducing
1601 * the size to ~1/8. It would also allow us to combine bitmaps simply using
1602 * & and |, which should be faster than min/max. The bitmaps are fairly
1603 * small, though (thanks to the cap on the MCV list size).
1604 */
1605 static bool *
mcv_get_match_bitmap(PlannerInfo * root,List * clauses,Bitmapset * keys,List * exprs,MCVList * mcvlist,bool is_or)1606 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
1607 Bitmapset *keys, List *exprs,
1608 MCVList *mcvlist, bool is_or)
1609 {
1610 int i;
1611 ListCell *l;
1612 bool *matches;
1613
1614 /* The bitmap may be partially built. */
1615 Assert(clauses != NIL);
1616 Assert(list_length(clauses) >= 1);
1617 Assert(mcvlist != NULL);
1618 Assert(mcvlist->nitems > 0);
1619 Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1620
1621 matches = palloc(sizeof(bool) * mcvlist->nitems);
1622 memset(matches, (is_or) ? false : true,
1623 sizeof(bool) * mcvlist->nitems);
1624
1625 /*
1626 * Loop through the list of clauses, and for each of them evaluate all the
1627 * MCV items not yet eliminated by the preceding clauses.
1628 */
1629 foreach(l, clauses)
1630 {
1631 Node *clause = (Node *) lfirst(l);
1632
1633 /* if it's a RestrictInfo, then extract the clause */
1634 if (IsA(clause, RestrictInfo))
1635 clause = (Node *) ((RestrictInfo *) clause)->clause;
1636
1637 /*
1638 * Handle the various types of clauses - OpClause, NullTest and
1639 * AND/OR/NOT
1640 */
1641 if (is_opclause(clause))
1642 {
1643 OpExpr *expr = (OpExpr *) clause;
1644 FmgrInfo opproc;
1645
1646 /* valid only after examine_opclause_args returns true */
1647 Node *clause_expr;
1648 Const *cst;
1649 bool expronleft;
1650 int idx;
1651 Oid collid;
1652
1653 fmgr_info(get_opcode(expr->opno), &opproc);
1654
1655 /* extract the var/expr and const from the expression */
1656 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1657 elog(ERROR, "incompatible clause");
1658
1659 /* match the attribute/expression to a dimension of the statistic */
1660 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1661
1662 Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663
1664 /*
1665 * Walk through the MCV items and evaluate the current clause. We
1666 * can skip items that were already ruled out, and terminate if
1667 * there are no remaining MCV items that might possibly match.
1668 */
1669 for (i = 0; i < mcvlist->nitems; i++)
1670 {
1671 bool match = true;
1672 MCVItem *item = &mcvlist->items[i];
1673
1674 Assert(idx >= 0);
1675
1676 /*
1677 * When the MCV item or the Const value is NULL we can treat
1678 * this as a mismatch. We must not call the operator because
1679 * of strictness.
1680 */
1681 if (item->isnull[idx] || cst->constisnull)
1682 {
1683 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1684 continue;
1685 }
1686
1687 /*
1688 * Skip MCV items that can't change result in the bitmap. Once
1689 * the value gets false for AND-lists, or true for OR-lists,
1690 * we don't need to look at more clauses.
1691 */
1692 if (RESULT_IS_FINAL(matches[i], is_or))
1693 continue;
1694
1695 /*
1696 * First check whether the constant is below the lower
1697 * boundary (in that case we can skip the bucket, because
1698 * there's no overlap).
1699 *
1700 * We don't store collations used to build the statistics, but
1701 * we can use the collation for the attribute itself, as
1702 * stored in varcollid. We do reset the statistics after a
1703 * type change (including collation change), so this is OK.
1704 * For expressions, we use the collation extracted from the
1705 * expression itself.
1706 */
1707 if (expronleft)
1708 match = DatumGetBool(FunctionCall2Coll(&opproc,
1709 collid,
1710 item->values[idx],
1711 cst->constvalue));
1712 else
1713 match = DatumGetBool(FunctionCall2Coll(&opproc,
1714 collid,
1715 cst->constvalue,
1716 item->values[idx]));
1717
1718 /* update the match bitmap with the result */
1719 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1720 }
1721 }
1722 else if (IsA(clause, ScalarArrayOpExpr))
1723 {
1724 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1725 FmgrInfo opproc;
1726
1727 /* valid only after examine_opclause_args returns true */
1728 Node *clause_expr;
1729 Const *cst;
1730 bool expronleft;
1731 Oid collid;
1732 int idx;
1733
1734 /* array evaluation */
1735 ArrayType *arrayval;
1736 int16 elmlen;
1737 bool elmbyval;
1738 char elmalign;
1739 int num_elems;
1740 Datum *elem_values;
1741 bool *elem_nulls;
1742
1743 fmgr_info(get_opcode(expr->opno), &opproc);
1744
1745 /* extract the var/expr and const from the expression */
1746 if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1747 elog(ERROR, "incompatible clause");
1748
1749 /* ScalarArrayOpExpr has the Var always on the left */
1750 Assert(expronleft);
1751
1752 /* XXX what if (cst->constisnull == NULL)? */
1753 if (!cst->constisnull)
1754 {
1755 arrayval = DatumGetArrayTypeP(cst->constvalue);
1756 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1757 &elmlen, &elmbyval, &elmalign);
1758 deconstruct_array(arrayval,
1759 ARR_ELEMTYPE(arrayval),
1760 elmlen, elmbyval, elmalign,
1761 &elem_values, &elem_nulls, &num_elems);
1762 }
1763
1764 /* match the attribute/expression to a dimension of the statistic */
1765 idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1766
1767 /*
1768 * Walk through the MCV items and evaluate the current clause. We
1769 * can skip items that were already ruled out, and terminate if
1770 * there are no remaining MCV items that might possibly match.
1771 */
1772 for (i = 0; i < mcvlist->nitems; i++)
1773 {
1774 int j;
1775 bool match = (expr->useOr ? false : true);
1776 MCVItem *item = &mcvlist->items[i];
1777
1778 /*
1779 * When the MCV item or the Const value is NULL we can treat
1780 * this as a mismatch. We must not call the operator because
1781 * of strictness.
1782 */
1783 if (item->isnull[idx] || cst->constisnull)
1784 {
1785 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1786 continue;
1787 }
1788
1789 /*
1790 * Skip MCV items that can't change result in the bitmap. Once
1791 * the value gets false for AND-lists, or true for OR-lists,
1792 * we don't need to look at more clauses.
1793 */
1794 if (RESULT_IS_FINAL(matches[i], is_or))
1795 continue;
1796
1797 for (j = 0; j < num_elems; j++)
1798 {
1799 Datum elem_value = elem_values[j];
1800 bool elem_isnull = elem_nulls[j];
1801 bool elem_match;
1802
1803 /* NULL values always evaluate as not matching. */
1804 if (elem_isnull)
1805 {
1806 match = RESULT_MERGE(match, expr->useOr, false);
1807 continue;
1808 }
1809
1810 /*
1811 * Stop evaluating the array elements once we reach a
1812 * matching value that can't change - ALL() is the same as
1813 * AND-list, ANY() is the same as OR-list.
1814 */
1815 if (RESULT_IS_FINAL(match, expr->useOr))
1816 break;
1817
1818 elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1819 collid,
1820 item->values[idx],
1821 elem_value));
1822
1823 match = RESULT_MERGE(match, expr->useOr, elem_match);
1824 }
1825
1826 /* update the match bitmap with the result */
1827 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1828 }
1829 }
1830 else if (IsA(clause, NullTest))
1831 {
1832 NullTest *expr = (NullTest *) clause;
1833 Node *clause_expr = (Node *) (expr->arg);
1834
1835 /* match the attribute/expression to a dimension of the statistic */
1836 int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1837
1838 /*
1839 * Walk through the MCV items and evaluate the current clause. We
1840 * can skip items that were already ruled out, and terminate if
1841 * there are no remaining MCV items that might possibly match.
1842 */
1843 for (i = 0; i < mcvlist->nitems; i++)
1844 {
1845 bool match = false; /* assume mismatch */
1846 MCVItem *item = &mcvlist->items[i];
1847
1848 /* if the clause mismatches the MCV item, update the bitmap */
1849 switch (expr->nulltesttype)
1850 {
1851 case IS_NULL:
1852 match = (item->isnull[idx]) ? true : match;
1853 break;
1854
1855 case IS_NOT_NULL:
1856 match = (!item->isnull[idx]) ? true : match;
1857 break;
1858 }
1859
1860 /* now, update the match bitmap, depending on OR/AND type */
1861 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1862 }
1863 }
1864 else if (is_orclause(clause) || is_andclause(clause))
1865 {
1866 /* AND/OR clause, with all subclauses being compatible */
1867
1868 int i;
1869 BoolExpr *bool_clause = ((BoolExpr *) clause);
1870 List *bool_clauses = bool_clause->args;
1871
1872 /* match/mismatch bitmap for each MCV item */
1873 bool *bool_matches = NULL;
1874
1875 Assert(bool_clauses != NIL);
1876 Assert(list_length(bool_clauses) >= 2);
1877
1878 /* build the match bitmap for the OR-clauses */
1879 bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1880 mcvlist, is_orclause(clause));
1881
1882 /*
1883 * Merge the bitmap produced by mcv_get_match_bitmap into the
1884 * current one. We need to consider if we're evaluating AND or OR
1885 * condition when merging the results.
1886 */
1887 for (i = 0; i < mcvlist->nitems; i++)
1888 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1889
1890 pfree(bool_matches);
1891 }
1892 else if (is_notclause(clause))
1893 {
1894 /* NOT clause, with all subclauses compatible */
1895
1896 int i;
1897 BoolExpr *not_clause = ((BoolExpr *) clause);
1898 List *not_args = not_clause->args;
1899
1900 /* match/mismatch bitmap for each MCV item */
1901 bool *not_matches = NULL;
1902
1903 Assert(not_args != NIL);
1904 Assert(list_length(not_args) == 1);
1905
1906 /* build the match bitmap for the NOT-clause */
1907 not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1908 mcvlist, false);
1909
1910 /*
1911 * Merge the bitmap produced by mcv_get_match_bitmap into the
1912 * current one. We're handling a NOT clause, so invert the result
1913 * before merging it into the global bitmap.
1914 */
1915 for (i = 0; i < mcvlist->nitems; i++)
1916 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1917
1918 pfree(not_matches);
1919 }
1920 else if (IsA(clause, Var))
1921 {
1922 /* Var (has to be a boolean Var, possibly from below NOT) */
1923
1924 Var *var = (Var *) (clause);
1925
1926 /* match the attribute to a dimension of the statistic */
1927 int idx = bms_member_index(keys, var->varattno);
1928
1929 Assert(var->vartype == BOOLOID);
1930
1931 /*
1932 * Walk through the MCV items and evaluate the current clause. We
1933 * can skip items that were already ruled out, and terminate if
1934 * there are no remaining MCV items that might possibly match.
1935 */
1936 for (i = 0; i < mcvlist->nitems; i++)
1937 {
1938 MCVItem *item = &mcvlist->items[i];
1939 bool match = false;
1940
1941 /* if the item is NULL, it's a mismatch */
1942 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1943 match = true;
1944
1945 /* update the result bitmap */
1946 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1947 }
1948 }
1949 else
1950 elog(ERROR, "unknown clause type: %d", clause->type);
1951 }
1952
1953 return matches;
1954 }
1955
1956
1957 /*
1958 * mcv_combine_selectivities
1959 * Combine per-column and multi-column MCV selectivity estimates.
1960 *
1961 * simple_sel is a "simple" selectivity estimate (produced without using any
1962 * extended statistics, essentially assuming independence of columns/clauses).
1963 *
1964 * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1965 * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1966 * essentially interpreted as a correction to be added to simple_sel, as
1967 * described below.
1968 *
1969 * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1970 * matching ones). This is used as an upper bound on the portion of the
1971 * selectivity estimates not covered by the MCV statistics.
1972 *
1973 * Note: While simple and base selectivities are defined in a quite similar
1974 * way, the values are computed differently and are not therefore equal. The
1975 * simple selectivity is computed as a product of per-clause estimates, while
1976 * the base selectivity is computed by adding up base frequencies of matching
1977 * items of the multi-column MCV list. So the values may differ for two main
1978 * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1979 * the MCV items did not match the estimated clauses.
1980 *
1981 * As both (a) and (b) reduce the base selectivity value, it generally holds
1982 * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1983 * values may be equal.
1984 *
1985 * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
1986 * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
1987 * correction for the part covered by the MCV list. Those two statements are
1988 * actually equivalent.
1989 */
1990 Selectivity
mcv_combine_selectivities(Selectivity simple_sel,Selectivity mcv_sel,Selectivity mcv_basesel,Selectivity mcv_totalsel)1991 mcv_combine_selectivities(Selectivity simple_sel,
1992 Selectivity mcv_sel,
1993 Selectivity mcv_basesel,
1994 Selectivity mcv_totalsel)
1995 {
1996 Selectivity other_sel;
1997 Selectivity sel;
1998
1999 /* estimated selectivity of values not covered by MCV matches */
2000 other_sel = simple_sel - mcv_basesel;
2001 CLAMP_PROBABILITY(other_sel);
2002
2003 /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2004 if (other_sel > 1.0 - mcv_totalsel)
2005 other_sel = 1.0 - mcv_totalsel;
2006
2007 /* overall selectivity is the sum of the MCV and non-MCV parts */
2008 sel = mcv_sel + other_sel;
2009 CLAMP_PROBABILITY(sel);
2010
2011 return sel;
2012 }
2013
2014
2015 /*
2016 * mcv_clauselist_selectivity
2017 * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2018 * list of clauses.
2019 *
2020 * This determines which MCV items match every clause in the list and returns
2021 * the sum of the frequencies of those items.
2022 *
2023 * In addition, it returns the sum of the base frequencies of each of those
2024 * items (that is the sum of the selectivities that each item would have if
2025 * the columns were independent of one another), and the total selectivity of
2026 * all the MCV items (not just the matching ones). These are expected to be
2027 * used together with a "simple" selectivity estimate (one based only on
2028 * per-column statistics) to produce an overall selectivity estimate that
2029 * makes use of both per-column and multi-column statistics --- see
2030 * mcv_combine_selectivities().
2031 */
2032 Selectivity
mcv_clauselist_selectivity(PlannerInfo * root,StatisticExtInfo * stat,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Selectivity * basesel,Selectivity * totalsel)2033 mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
2034 List *clauses, int varRelid,
2035 JoinType jointype, SpecialJoinInfo *sjinfo,
2036 RelOptInfo *rel,
2037 Selectivity *basesel, Selectivity *totalsel)
2038 {
2039 int i;
2040 MCVList *mcv;
2041 Selectivity s = 0.0;
2042
2043 /* match/mismatch bitmap for each MCV item */
2044 bool *matches = NULL;
2045
2046 /* load the MCV list stored in the statistics object */
2047 mcv = statext_mcv_load(stat->statOid);
2048
2049 /* build a match bitmap for the clauses */
2050 matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2051 mcv, false);
2052
2053 /* sum frequencies for all the matching MCV items */
2054 *basesel = 0.0;
2055 *totalsel = 0.0;
2056 for (i = 0; i < mcv->nitems; i++)
2057 {
2058 *totalsel += mcv->items[i].frequency;
2059
2060 if (matches[i] != false)
2061 {
2062 *basesel += mcv->items[i].base_frequency;
2063 s += mcv->items[i].frequency;
2064 }
2065 }
2066
2067 return s;
2068 }
2069
2070
2071 /*
2072 * mcv_clause_selectivity_or
2073 * Use MCV statistics to estimate the selectivity of a clause that
2074 * appears in an ORed list of clauses.
2075 *
2076 * As with mcv_clauselist_selectivity() this determines which MCV items match
2077 * the clause and returns both the sum of the frequencies and the sum of the
2078 * base frequencies of those items, as well as the sum of the frequencies of
2079 * all MCV items (not just the matching ones) so that this information can be
2080 * used by mcv_combine_selectivities() to produce a selectivity estimate that
2081 * makes use of both per-column and multi-column statistics.
2082 *
2083 * Additionally, we return information to help compute the overall selectivity
2084 * of the ORed list of clauses assumed to contain this clause. This function
2085 * is intended to be called for each clause in the ORed list of clauses,
2086 * allowing the overall selectivity to be computed using the following
2087 * algorithm:
2088 *
2089 * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2090 * of the first n clauses in the list. Then the combined selectivity taking
2091 * into account the next clause C[n+1] can be written as
2092 *
2093 * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2094 *
2095 * The final term above represents the overlap between the clauses examined so
2096 * far and the (n+1)'th clause. To estimate its selectivity, we track the
2097 * match bitmap for the ORed list of clauses examined so far and examine its
2098 * intersection with the match bitmap for the (n+1)'th clause.
2099 *
2100 * We then also return the sums of the MCV item frequencies and base
2101 * frequencies for the match bitmap intersection corresponding to the overlap
2102 * term above, so that they can be combined with a simple selectivity estimate
2103 * for that term.
2104 *
2105 * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2106 * for the clauses examined so far. The caller is expected to set it to NULL
2107 * the first time it calls this function.
2108 */
2109 Selectivity
mcv_clause_selectivity_or(PlannerInfo * root,StatisticExtInfo * stat,MCVList * mcv,Node * clause,bool ** or_matches,Selectivity * basesel,Selectivity * overlap_mcvsel,Selectivity * overlap_basesel,Selectivity * totalsel)2110 mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
2111 MCVList *mcv, Node *clause, bool **or_matches,
2112 Selectivity *basesel, Selectivity *overlap_mcvsel,
2113 Selectivity *overlap_basesel, Selectivity *totalsel)
2114 {
2115 Selectivity s = 0.0;
2116 bool *new_matches;
2117 int i;
2118
2119 /* build the OR-matches bitmap, if not built already */
2120 if (*or_matches == NULL)
2121 *or_matches = palloc0(sizeof(bool) * mcv->nitems);
2122
2123 /* build the match bitmap for the new clause */
2124 new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2125 stat->exprs, mcv, false);
2126
2127 /*
2128 * Sum the frequencies for all the MCV items matching this clause and also
2129 * those matching the overlap between this clause and any of the preceding
2130 * clauses as described above.
2131 */
2132 *basesel = 0.0;
2133 *overlap_mcvsel = 0.0;
2134 *overlap_basesel = 0.0;
2135 *totalsel = 0.0;
2136 for (i = 0; i < mcv->nitems; i++)
2137 {
2138 *totalsel += mcv->items[i].frequency;
2139
2140 if (new_matches[i])
2141 {
2142 s += mcv->items[i].frequency;
2143 *basesel += mcv->items[i].base_frequency;
2144
2145 if ((*or_matches)[i])
2146 {
2147 *overlap_mcvsel += mcv->items[i].frequency;
2148 *overlap_basesel += mcv->items[i].base_frequency;
2149 }
2150 }
2151
2152 /* update the OR-matches bitmap for the next clause */
2153 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2154 }
2155
2156 pfree(new_matches);
2157
2158 return s;
2159 }
2160