1 /*-------------------------------------------------------------------------
2  *
3  * mcv.c
4  *	  POSTGRES multivariate MCV lists
5  *
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *	  src/backend/statistics/mcv.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "access/htup_details.h"
20 #include "catalog/pg_collation.h"
21 #include "catalog/pg_statistic_ext.h"
22 #include "catalog/pg_statistic_ext_data.h"
23 #include "fmgr.h"
24 #include "funcapi.h"
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "statistics/extended_stats_internal.h"
28 #include "statistics/statistics.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/bytea.h"
32 #include "utils/fmgroids.h"
33 #include "utils/fmgrprotos.h"
34 #include "utils/lsyscache.h"
35 #include "utils/selfuncs.h"
36 #include "utils/syscache.h"
37 #include "utils/typcache.h"
38 
39 /*
40  * Computes size of a serialized MCV item, depending on the number of
41  * dimensions (columns) the statistic is defined on. The datum values are
42  * stored in a separate array (deduplicated, to minimize the size), and
43  * so the serialized items only store uint16 indexes into that array.
44  *
45  * Each serialized item stores (in this order):
46  *
47  * - indexes to values	  (ndim * sizeof(uint16))
48  * - null flags			  (ndim * sizeof(bool))
49  * - frequency			  (sizeof(double))
50  * - base_frequency		  (sizeof(double))
51  *
52  * There is no alignment padding within an MCV item.
53  * So in total each MCV item requires this many bytes:
54  *
55  *	 ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
56  */
57 #define ITEM_SIZE(ndims)	\
58 	((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
59 
60 /*
61  * Used to compute size of serialized MCV list representation.
62  */
63 #define MinSizeOfMCVList		\
64 	(VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
65 
66 /*
67  * Size of the serialized MCV list, excluding the space needed for
68  * deduplicated per-dimension values. The macro is meant to be used
69  * when it's not yet safe to access the serialized info about amount
70  * of data for each column.
71  */
72 #define SizeOfMCVList(ndims,nitems)	\
73 	((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
74 	 ((ndims) * sizeof(DimensionInfo)) + \
75 	 ((nitems) * ITEM_SIZE(ndims)))
76 
77 static MultiSortSupport build_mss(StatsBuildData *data);
78 
79 static SortItem *build_distinct_groups(int numrows, SortItem *items,
80 									   MultiSortSupport mss, int *ndistinct);
81 
82 static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
83 										   MultiSortSupport mss, int *ncounts);
84 
85 static int	count_distinct_groups(int numrows, SortItem *items,
86 								  MultiSortSupport mss);
87 
88 /*
89  * Compute new value for bitmap item, considering whether it's used for
90  * clauses connected by AND/OR.
91  */
92 #define RESULT_MERGE(value, is_or, match) \
93 	((is_or) ? ((value) || (match)) : ((value) && (match)))
94 
95 /*
96  * When processing a list of clauses, the bitmap item may get set to a value
97  * such that additional clauses can't change it. For example, when processing
98  * a list of clauses connected to AND, as soon as the item gets set to 'false'
99  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
100  *
101  * Returns true when the value in the bitmap can't change no matter how the
102  * remaining clauses are evaluated.
103  */
104 #define RESULT_IS_FINAL(value, is_or)	((is_or) ? (value) : (!(value)))
105 
106 /*
107  * get_mincount_for_mcv_list
108  * 		Determine the minimum number of times a value needs to appear in
109  * 		the sample for it to be included in the MCV list.
110  *
111  * We want to keep only values that appear sufficiently often in the
112  * sample that it is reasonable to extrapolate their sample frequencies to
113  * the entire table.  We do this by placing an upper bound on the relative
114  * standard error of the sample frequency, so that any estimates the
115  * planner generates from the MCV statistics can be expected to be
116  * reasonably accurate.
117  *
118  * Since we are sampling without replacement, the sample frequency of a
119  * particular value is described by a hypergeometric distribution.  A
120  * common rule of thumb when estimating errors in this situation is to
121  * require at least 10 instances of the value in the sample, in which case
122  * the distribution can be approximated by a normal distribution, and
123  * standard error analysis techniques can be applied.  Given a sample size
124  * of n, a population size of N, and a sample frequency of p=cnt/n, the
125  * standard error of the proportion p is given by
126  *		SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
127  * where the second term is the finite population correction.  To get
128  * reasonably accurate planner estimates, we impose an upper bound on the
129  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
130  * error bound is fairly arbitrary, but has been found empirically to work
131  * well.  Rearranging this formula gives a lower bound on the number of
132  * instances of the value seen:
133  *		cnt > n*(N-n) / (N-n+0.04*n*(N-1))
134  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
135  * case where n approaches 0 cannot happen in practice, since the sample
136  * size is at least 300.  The case where n approaches N corresponds to
137  * sampling the whole the table, in which case it is reasonable to keep
138  * the whole MCV list (have no lower bound), so it makes sense to apply
139  * this formula for all inputs, even though the above derivation is
140  * technically only valid when the right hand side is at least around 10.
141  *
142  * An alternative way to look at this formula is as follows -- assume that
143  * the number of instances of the value seen scales up to the entire
144  * table, so that the population count is K=N*cnt/n. Then the distribution
145  * in the sample is a hypergeometric distribution parameterised by N, n
146  * and K, and the bound above is mathematically equivalent to demanding
147  * that the standard deviation of that distribution is less than 20% of
148  * its mean.  Thus the relative errors in any planner estimates produced
149  * from the MCV statistics are likely to be not too large.
150  */
151 static double
get_mincount_for_mcv_list(int samplerows,double totalrows)152 get_mincount_for_mcv_list(int samplerows, double totalrows)
153 {
154 	double		n = samplerows;
155 	double		N = totalrows;
156 	double		numer,
157 				denom;
158 
159 	numer = n * (N - n);
160 	denom = N - n + 0.04 * n * (N - 1);
161 
162 	/* Guard against division by zero (possible if n = N = 1) */
163 	if (denom == 0.0)
164 		return 0.0;
165 
166 	return numer / denom;
167 }
168 
169 /*
170  * Builds MCV list from the set of sampled rows.
171  *
172  * The algorithm is quite simple:
173  *
174  *	   (1) sort the data (default collation, '<' for the data type)
175  *
176  *	   (2) count distinct groups, decide how many to keep
177  *
178  *	   (3) build the MCV list using the threshold determined in (2)
179  *
180  *	   (4) remove rows represented by the MCV from the sample
181  *
182  */
183 MCVList *
statext_mcv_build(StatsBuildData * data,double totalrows,int stattarget)184 statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
185 {
186 	int			i,
187 				numattrs,
188 				numrows,
189 				ngroups,
190 				nitems;
191 	double		mincount;
192 	SortItem   *items;
193 	SortItem   *groups;
194 	MCVList    *mcvlist = NULL;
195 	MultiSortSupport mss;
196 
197 	/* comparator for all the columns */
198 	mss = build_mss(data);
199 
200 	/* sort the rows */
201 	items = build_sorted_items(data, &nitems, mss,
202 							   data->nattnums, data->attnums);
203 
204 	if (!items)
205 		return NULL;
206 
207 	/* for convenience */
208 	numattrs = data->nattnums;
209 	numrows = data->numrows;
210 
211 	/* transform the sorted rows into groups (sorted by frequency) */
212 	groups = build_distinct_groups(nitems, items, mss, &ngroups);
213 
214 	/*
215 	 * The maximum number of MCV items to store, based on the statistics
216 	 * target we computed for the statistics object (from the target set for
217 	 * the object itself, attributes and the system default). In any case, we
218 	 * can't keep more groups than we have available.
219 	 */
220 	nitems = stattarget;
221 	if (nitems > ngroups)
222 		nitems = ngroups;
223 
224 	/*
225 	 * Decide how many items to keep in the MCV list. We can't use the same
226 	 * algorithm as per-column MCV lists, because that only considers the
227 	 * actual group frequency - but we're primarily interested in how the
228 	 * actual frequency differs from the base frequency (product of simple
229 	 * per-column frequencies, as if the columns were independent).
230 	 *
231 	 * Using the same algorithm might exclude items that are close to the
232 	 * "average" frequency of the sample. But that does not say whether the
233 	 * observed frequency is close to the base frequency or not. We also need
234 	 * to consider unexpectedly uncommon items (again, compared to the base
235 	 * frequency), and the single-column algorithm does not have to.
236 	 *
237 	 * We simply decide how many items to keep by computing the minimum count
238 	 * using get_mincount_for_mcv_list() and then keep all items that seem to
239 	 * be more common than that.
240 	 */
241 	mincount = get_mincount_for_mcv_list(numrows, totalrows);
242 
243 	/*
244 	 * Walk the groups until we find the first group with a count below the
245 	 * mincount threshold (the index of that group is the number of groups we
246 	 * want to keep).
247 	 */
248 	for (i = 0; i < nitems; i++)
249 	{
250 		if (groups[i].count < mincount)
251 		{
252 			nitems = i;
253 			break;
254 		}
255 	}
256 
257 	/*
258 	 * At this point, we know the number of items for the MCV list. There
259 	 * might be none (for uniform distribution with many groups), and in that
260 	 * case, there will be no MCV list. Otherwise, construct the MCV list.
261 	 */
262 	if (nitems > 0)
263 	{
264 		int			j;
265 		SortItem	key;
266 		MultiSortSupport tmp;
267 
268 		/* frequencies for values in each attribute */
269 		SortItem  **freqs;
270 		int		   *nfreqs;
271 
272 		/* used to search values */
273 		tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
274 										+ sizeof(SortSupportData));
275 
276 		/* compute frequencies for values in each column */
277 		nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278 		freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279 
280 		/*
281 		 * Allocate the MCV list structure, set the global parameters.
282 		 */
283 		mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284 									  sizeof(MCVItem) * nitems);
285 
286 		mcvlist->magic = STATS_MCV_MAGIC;
287 		mcvlist->type = STATS_MCV_TYPE_BASIC;
288 		mcvlist->ndimensions = numattrs;
289 		mcvlist->nitems = nitems;
290 
291 		/* store info about data type OIDs */
292 		for (i = 0; i < numattrs; i++)
293 			mcvlist->types[i] = data->stats[i]->attrtypid;
294 
295 		/* Copy the first chunk of groups into the result. */
296 		for (i = 0; i < nitems; i++)
297 		{
298 			/* just pointer to the proper place in the list */
299 			MCVItem    *item = &mcvlist->items[i];
300 
301 			item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302 			item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303 
304 			/* copy values for the group */
305 			memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306 			memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307 
308 			/* groups should be sorted by frequency in descending order */
309 			Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310 
311 			/* group frequency */
312 			item->frequency = (double) groups[i].count / numrows;
313 
314 			/* base frequency, if the attributes were independent */
315 			item->base_frequency = 1.0;
316 			for (j = 0; j < numattrs; j++)
317 			{
318 				SortItem   *freq;
319 
320 				/* single dimension */
321 				tmp->ndims = 1;
322 				tmp->ssup[0] = mss->ssup[j];
323 
324 				/* fill search key */
325 				key.values = &groups[i].values[j];
326 				key.isnull = &groups[i].isnull[j];
327 
328 				freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329 												sizeof(SortItem),
330 												multi_sort_compare, tmp);
331 
332 				item->base_frequency *= ((double) freq->count) / numrows;
333 			}
334 		}
335 
336 		pfree(nfreqs);
337 		pfree(freqs);
338 	}
339 
340 	pfree(items);
341 	pfree(groups);
342 
343 	return mcvlist;
344 }
345 
346 /*
347  * build_mss
348  *		Build a MultiSortSupport for the given StatsBuildData.
349  */
350 static MultiSortSupport
build_mss(StatsBuildData * data)351 build_mss(StatsBuildData *data)
352 {
353 	int			i;
354 	int			numattrs = data->nattnums;
355 
356 	/* Sort by multiple columns (using array of SortSupport) */
357 	MultiSortSupport mss = multi_sort_init(numattrs);
358 
359 	/* prepare the sort functions for all the attributes */
360 	for (i = 0; i < numattrs; i++)
361 	{
362 		VacAttrStats *colstat = data->stats[i];
363 		TypeCacheEntry *type;
364 
365 		type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
366 		if (type->lt_opr == InvalidOid) /* shouldn't happen */
367 			elog(ERROR, "cache lookup failed for ordering operator for type %u",
368 				 colstat->attrtypid);
369 
370 		multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
371 	}
372 
373 	return mss;
374 }
375 
376 /*
377  * count_distinct_groups
378  *		Count distinct combinations of SortItems in the array.
379  *
380  * The array is assumed to be sorted according to the MultiSortSupport.
381  */
382 static int
count_distinct_groups(int numrows,SortItem * items,MultiSortSupport mss)383 count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
384 {
385 	int			i;
386 	int			ndistinct;
387 
388 	ndistinct = 1;
389 	for (i = 1; i < numrows; i++)
390 	{
391 		/* make sure the array really is sorted */
392 		Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
393 
394 		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
395 			ndistinct += 1;
396 	}
397 
398 	return ndistinct;
399 }
400 
401 /*
402  * compare_sort_item_count
403  *		Comparator for sorting items by count (frequencies) in descending
404  *		order.
405  */
406 static int
compare_sort_item_count(const void * a,const void * b)407 compare_sort_item_count(const void *a, const void *b)
408 {
409 	SortItem   *ia = (SortItem *) a;
410 	SortItem   *ib = (SortItem *) b;
411 
412 	if (ia->count == ib->count)
413 		return 0;
414 	else if (ia->count > ib->count)
415 		return -1;
416 
417 	return 1;
418 }
419 
420 /*
421  * build_distinct_groups
422  *		Build an array of SortItems for distinct groups and counts matching
423  *		items.
424  *
425  * The 'items' array is assumed to be sorted.
426  */
427 static SortItem *
build_distinct_groups(int numrows,SortItem * items,MultiSortSupport mss,int * ndistinct)428 build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
429 					  int *ndistinct)
430 {
431 	int			i,
432 				j;
433 	int			ngroups = count_distinct_groups(numrows, items, mss);
434 
435 	SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
436 
437 	j = 0;
438 	groups[0] = items[0];
439 	groups[0].count = 1;
440 
441 	for (i = 1; i < numrows; i++)
442 	{
443 		/* Assume sorted in ascending order. */
444 		Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
445 
446 		/* New distinct group detected. */
447 		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
448 		{
449 			groups[++j] = items[i];
450 			groups[j].count = 0;
451 		}
452 
453 		groups[j].count++;
454 	}
455 
456 	/* ensure we filled the expected number of distinct groups */
457 	Assert(j + 1 == ngroups);
458 
459 	/* Sort the distinct groups by frequency (in descending order). */
460 	pg_qsort((void *) groups, ngroups, sizeof(SortItem),
461 			 compare_sort_item_count);
462 
463 	*ndistinct = ngroups;
464 	return groups;
465 }
466 
467 /* compare sort items (single dimension) */
468 static int
sort_item_compare(const void * a,const void * b,void * arg)469 sort_item_compare(const void *a, const void *b, void *arg)
470 {
471 	SortSupport ssup = (SortSupport) arg;
472 	SortItem   *ia = (SortItem *) a;
473 	SortItem   *ib = (SortItem *) b;
474 
475 	return ApplySortComparator(ia->values[0], ia->isnull[0],
476 							   ib->values[0], ib->isnull[0],
477 							   ssup);
478 }
479 
480 /*
481  * build_column_frequencies
482  *		Compute frequencies of values in each column.
483  *
484  * This returns an array of SortItems for each attribute the MCV is built
485  * on, with a frequency (number of occurrences) for each value. This is
486  * then used to compute "base" frequency of MCV items.
487  *
488  * All the memory is allocated in a single chunk, so that a single pfree
489  * is enough to release it. We do not allocate space for values/isnull
490  * arrays in the SortItems, because we can simply point into the input
491  * groups directly.
492  */
493 static SortItem **
build_column_frequencies(SortItem * groups,int ngroups,MultiSortSupport mss,int * ncounts)494 build_column_frequencies(SortItem *groups, int ngroups,
495 						 MultiSortSupport mss, int *ncounts)
496 {
497 	int			i,
498 				dim;
499 	SortItem  **result;
500 	char	   *ptr;
501 
502 	Assert(groups);
503 	Assert(ncounts);
504 
505 	/* allocate arrays for all columns as a single chunk */
506 	ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
507 				 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
508 
509 	/* initial array of pointers */
510 	result = (SortItem **) ptr;
511 	ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
512 
513 	for (dim = 0; dim < mss->ndims; dim++)
514 	{
515 		SortSupport ssup = &mss->ssup[dim];
516 
517 		/* array of values for a single column */
518 		result[dim] = (SortItem *) ptr;
519 		ptr += MAXALIGN(sizeof(SortItem) * ngroups);
520 
521 		/* extract data for the dimension */
522 		for (i = 0; i < ngroups; i++)
523 		{
524 			/* point into the input groups */
525 			result[dim][i].values = &groups[i].values[dim];
526 			result[dim][i].isnull = &groups[i].isnull[dim];
527 			result[dim][i].count = groups[i].count;
528 		}
529 
530 		/* sort the values, deduplicate */
531 		qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
532 				  sort_item_compare, ssup);
533 
534 		/*
535 		 * Identify distinct values, compute frequency (there might be
536 		 * multiple MCV items containing this value, so we need to sum counts
537 		 * from all of them.
538 		 */
539 		ncounts[dim] = 1;
540 		for (i = 1; i < ngroups; i++)
541 		{
542 			if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
543 			{
544 				result[dim][ncounts[dim] - 1].count += result[dim][i].count;
545 				continue;
546 			}
547 
548 			result[dim][ncounts[dim]] = result[dim][i];
549 
550 			ncounts[dim]++;
551 		}
552 	}
553 
554 	return result;
555 }
556 
557 /*
558  * statext_mcv_load
559  *		Load the MCV list for the indicated pg_statistic_ext tuple.
560  */
561 MCVList *
statext_mcv_load(Oid mvoid)562 statext_mcv_load(Oid mvoid)
563 {
564 	MCVList    *result;
565 	bool		isnull;
566 	Datum		mcvlist;
567 	HeapTuple	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
568 
569 	if (!HeapTupleIsValid(htup))
570 		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
571 
572 	mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
573 							  Anum_pg_statistic_ext_data_stxdmcv, &isnull);
574 
575 	if (isnull)
576 		elog(ERROR,
577 			 "requested statistics kind \"%c\" is not yet built for statistics object %u",
578 			 STATS_EXT_DEPENDENCIES, mvoid);
579 
580 	result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
581 
582 	ReleaseSysCache(htup);
583 
584 	return result;
585 }
586 
587 
588 /*
589  * statext_mcv_serialize
590  *		Serialize MCV list into a pg_mcv_list value.
591  *
592  * The MCV items may include values of various data types, and it's reasonable
593  * to expect redundancy (values for a given attribute, repeated for multiple
594  * MCV list items). So we deduplicate the values into arrays, and then replace
595  * the values by indexes into those arrays.
596  *
597  * The overall structure of the serialized representation looks like this:
598  *
599  * +---------------+----------------+---------------------+-------+
600  * | header fields | dimension info | deduplicated values | items |
601  * +---------------+----------------+---------------------+-------+
602  *
603  * Where dimension info stores information about the type of the K-th
604  * attribute (e.g. typlen, typbyval and length of deduplicated values).
605  * Deduplicated values store deduplicated values for each attribute.  And
606  * items store the actual MCV list items, with values replaced by indexes into
607  * the arrays.
608  *
609  * When serializing the items, we use uint16 indexes. The number of MCV items
610  * is limited by the statistics target (which is capped to 10k at the moment).
611  * We might increase this to 65k and still fit into uint16, so there's a bit of
612  * slack. Furthermore, this limit is on the number of distinct values per column,
613  * and we usually have few of those (and various combinations of them for the
614  * those MCV list). So uint16 seems fine for now.
615  *
616  * We don't really expect the serialization to save as much space as for
617  * histograms, as we are not doing any bucket splits (which is the source
618  * of high redundancy in histograms).
619  *
620  * TODO: Consider packing boolean flags (NULL) for each item into a single char
621  * (or a longer type) instead of using an array of bool items.
622  */
623 bytea *
statext_mcv_serialize(MCVList * mcvlist,VacAttrStats ** stats)624 statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
625 {
626 	int			i;
627 	int			dim;
628 	int			ndims = mcvlist->ndimensions;
629 
630 	SortSupport ssup;
631 	DimensionInfo *info;
632 
633 	Size		total_length;
634 
635 	/* serialized items (indexes into arrays, etc.) */
636 	bytea	   *raw;
637 	char	   *ptr;
638 	char	   *endptr PG_USED_FOR_ASSERTS_ONLY;
639 
640 	/* values per dimension (and number of non-NULL values) */
641 	Datum	  **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
642 	int		   *counts = (int *) palloc0(sizeof(int) * ndims);
643 
644 	/*
645 	 * We'll include some rudimentary information about the attribute types
646 	 * (length, by-val flag), so that we don't have to look them up while
647 	 * deserializing the MCV list (we already have the type OID in the
648 	 * header).  This is safe because when changing the type of the attribute
649 	 * the statistics gets dropped automatically.  We need to store the info
650 	 * about the arrays of deduplicated values anyway.
651 	 */
652 	info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
653 
654 	/* sort support data for all attributes included in the MCV list */
655 	ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
656 
657 	/* collect and deduplicate values for each dimension (attribute) */
658 	for (dim = 0; dim < ndims; dim++)
659 	{
660 		int			ndistinct;
661 		TypeCacheEntry *typentry;
662 
663 		/*
664 		 * Lookup the LT operator (can't get it from stats extra_data, as we
665 		 * don't know how to interpret that - scalar vs. array etc.).
666 		 */
667 		typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
668 
669 		/* copy important info about the data type (length, by-value) */
670 		info[dim].typlen = stats[dim]->attrtype->typlen;
671 		info[dim].typbyval = stats[dim]->attrtype->typbyval;
672 
673 		/* allocate space for values in the attribute and collect them */
674 		values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
675 
676 		for (i = 0; i < mcvlist->nitems; i++)
677 		{
678 			/* skip NULL values - we don't need to deduplicate those */
679 			if (mcvlist->items[i].isnull[dim])
680 				continue;
681 
682 			/* append the value at the end */
683 			values[dim][counts[dim]] = mcvlist->items[i].values[dim];
684 			counts[dim] += 1;
685 		}
686 
687 		/* if there are just NULL values in this dimension, we're done */
688 		if (counts[dim] == 0)
689 			continue;
690 
691 		/* sort and deduplicate the data */
692 		ssup[dim].ssup_cxt = CurrentMemoryContext;
693 		ssup[dim].ssup_collation = stats[dim]->attrcollid;
694 		ssup[dim].ssup_nulls_first = false;
695 
696 		PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
697 
698 		qsort_arg(values[dim], counts[dim], sizeof(Datum),
699 				  compare_scalars_simple, &ssup[dim]);
700 
701 		/*
702 		 * Walk through the array and eliminate duplicate values, but keep the
703 		 * ordering (so that we can do a binary search later). We know there's
704 		 * at least one item as (counts[dim] != 0), so we can skip the first
705 		 * element.
706 		 */
707 		ndistinct = 1;			/* number of distinct values */
708 		for (i = 1; i < counts[dim]; i++)
709 		{
710 			/* expect sorted array */
711 			Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
712 
713 			/* if the value is the same as the previous one, we can skip it */
714 			if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
715 				continue;
716 
717 			values[dim][ndistinct] = values[dim][i];
718 			ndistinct += 1;
719 		}
720 
721 		/* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
722 		Assert(ndistinct <= PG_UINT16_MAX);
723 
724 		/*
725 		 * Store additional info about the attribute - number of deduplicated
726 		 * values, and also size of the serialized data. For fixed-length data
727 		 * types this is trivial to compute, for varwidth types we need to
728 		 * actually walk the array and sum the sizes.
729 		 */
730 		info[dim].nvalues = ndistinct;
731 
732 		if (info[dim].typbyval) /* by-value data types */
733 		{
734 			info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
735 
736 			/*
737 			 * We copy the data into the MCV item during deserialization, so
738 			 * we don't need to allocate any extra space.
739 			 */
740 			info[dim].nbytes_aligned = 0;
741 		}
742 		else if (info[dim].typlen > 0)	/* fixed-length by-ref */
743 		{
744 			/*
745 			 * We don't care about alignment in the serialized data, so we
746 			 * pack the data as much as possible. But we also track how much
747 			 * data will be needed after deserialization, and in that case we
748 			 * need to account for alignment of each item.
749 			 *
750 			 * Note: As the items are fixed-length, we could easily compute
751 			 * this during deserialization, but we do it here anyway.
752 			 */
753 			info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
754 			info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
755 		}
756 		else if (info[dim].typlen == -1)	/* varlena */
757 		{
758 			info[dim].nbytes = 0;
759 			info[dim].nbytes_aligned = 0;
760 			for (i = 0; i < info[dim].nvalues; i++)
761 			{
762 				Size		len;
763 
764 				/*
765 				 * For varlena values, we detoast the values and store the
766 				 * length and data separately. We don't bother with alignment
767 				 * here, which means that during deserialization we need to
768 				 * copy the fields and only access the copies.
769 				 */
770 				values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
771 
772 				/* serialized length (uint32 length + data) */
773 				len = VARSIZE_ANY_EXHDR(values[dim][i]);
774 				info[dim].nbytes += sizeof(uint32); /* length */
775 				info[dim].nbytes += len;	/* value (no header) */
776 
777 				/*
778 				 * During deserialization we'll build regular varlena values
779 				 * with full headers, and we need to align them properly.
780 				 */
781 				info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
782 			}
783 		}
784 		else if (info[dim].typlen == -2)	/* cstring */
785 		{
786 			info[dim].nbytes = 0;
787 			info[dim].nbytes_aligned = 0;
788 			for (i = 0; i < info[dim].nvalues; i++)
789 			{
790 				Size		len;
791 
792 				/*
793 				 * cstring is handled similar to varlena - first we store the
794 				 * length as uint32 and then the data. We don't care about
795 				 * alignment, which means that during deserialization we need
796 				 * to copy the fields and only access the copies.
797 				 */
798 
799 				/* c-strings include terminator, so +1 byte */
800 				len = strlen(DatumGetCString(values[dim][i])) + 1;
801 				info[dim].nbytes += sizeof(uint32); /* length */
802 				info[dim].nbytes += len;	/* value */
803 
804 				/* space needed for properly aligned deserialized copies */
805 				info[dim].nbytes_aligned += MAXALIGN(len);
806 			}
807 		}
808 
809 		/* we know (count>0) so there must be some data */
810 		Assert(info[dim].nbytes > 0);
811 	}
812 
813 	/*
814 	 * Now we can finally compute how much space we'll actually need for the
815 	 * whole serialized MCV list (varlena header, MCV header, dimension info
816 	 * for each attribute, deduplicated values and items).
817 	 */
818 	total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
819 		+ sizeof(AttrNumber)	/* ndimensions */
820 		+ (ndims * sizeof(Oid));	/* attribute types */
821 
822 	/* dimension info */
823 	total_length += ndims * sizeof(DimensionInfo);
824 
825 	/* add space for the arrays of deduplicated values */
826 	for (i = 0; i < ndims; i++)
827 		total_length += info[i].nbytes;
828 
829 	/*
830 	 * And finally account for the items (those are fixed-length, thanks to
831 	 * replacing values with uint16 indexes into the deduplicated arrays).
832 	 */
833 	total_length += mcvlist->nitems * ITEM_SIZE(dim);
834 
835 	/*
836 	 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
837 	 * we set them to zero to make the result more compressible).
838 	 */
839 	raw = (bytea *) palloc0(VARHDRSZ + total_length);
840 	SET_VARSIZE(raw, VARHDRSZ + total_length);
841 
842 	ptr = VARDATA(raw);
843 	endptr = ptr + total_length;
844 
845 	/* copy the MCV list header fields, one by one */
846 	memcpy(ptr, &mcvlist->magic, sizeof(uint32));
847 	ptr += sizeof(uint32);
848 
849 	memcpy(ptr, &mcvlist->type, sizeof(uint32));
850 	ptr += sizeof(uint32);
851 
852 	memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
853 	ptr += sizeof(uint32);
854 
855 	memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
856 	ptr += sizeof(AttrNumber);
857 
858 	memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
859 	ptr += (sizeof(Oid) * ndims);
860 
861 	/* store information about the attributes (data amounts, ...) */
862 	memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
863 	ptr += sizeof(DimensionInfo) * ndims;
864 
865 	/* Copy the deduplicated values for all attributes to the output. */
866 	for (dim = 0; dim < ndims; dim++)
867 	{
868 		/* remember the starting point for Asserts later */
869 		char	   *start PG_USED_FOR_ASSERTS_ONLY = ptr;
870 
871 		for (i = 0; i < info[dim].nvalues; i++)
872 		{
873 			Datum		value = values[dim][i];
874 
875 			if (info[dim].typbyval) /* passed by value */
876 			{
877 				Datum		tmp;
878 
879 				/*
880 				 * For byval types, we need to copy just the significant bytes
881 				 * - we can't use memcpy directly, as that assumes
882 				 * little-endian behavior.  store_att_byval does almost what
883 				 * we need, but it requires a properly aligned buffer - the
884 				 * output buffer does not guarantee that. So we simply use a
885 				 * local Datum variable (which guarantees proper alignment),
886 				 * and then copy the value from it.
887 				 */
888 				store_att_byval(&tmp, value, info[dim].typlen);
889 
890 				memcpy(ptr, &tmp, info[dim].typlen);
891 				ptr += info[dim].typlen;
892 			}
893 			else if (info[dim].typlen > 0)	/* passed by reference */
894 			{
895 				/* no special alignment needed, treated as char array */
896 				memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
897 				ptr += info[dim].typlen;
898 			}
899 			else if (info[dim].typlen == -1)	/* varlena */
900 			{
901 				uint32		len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
902 
903 				/* copy the length */
904 				memcpy(ptr, &len, sizeof(uint32));
905 				ptr += sizeof(uint32);
906 
907 				/* data from the varlena value (without the header) */
908 				memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
909 				ptr += len;
910 			}
911 			else if (info[dim].typlen == -2)	/* cstring */
912 			{
913 				uint32		len = (uint32) strlen(DatumGetCString(value)) + 1;
914 
915 				/* copy the length */
916 				memcpy(ptr, &len, sizeof(uint32));
917 				ptr += sizeof(uint32);
918 
919 				/* value */
920 				memcpy(ptr, DatumGetCString(value), len);
921 				ptr += len;
922 			}
923 
924 			/* no underflows or overflows */
925 			Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
926 		}
927 
928 		/* we should get exactly nbytes of data for this dimension */
929 		Assert((ptr - start) == info[dim].nbytes);
930 	}
931 
932 	/* Serialize the items, with uint16 indexes instead of the values. */
933 	for (i = 0; i < mcvlist->nitems; i++)
934 	{
935 		MCVItem    *mcvitem = &mcvlist->items[i];
936 
937 		/* don't write beyond the allocated space */
938 		Assert(ptr <= (endptr - ITEM_SIZE(dim)));
939 
940 		/* copy NULL and frequency flags into the serialized MCV */
941 		memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
942 		ptr += sizeof(bool) * ndims;
943 
944 		memcpy(ptr, &mcvitem->frequency, sizeof(double));
945 		ptr += sizeof(double);
946 
947 		memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
948 		ptr += sizeof(double);
949 
950 		/* store the indexes last */
951 		for (dim = 0; dim < ndims; dim++)
952 		{
953 			uint16		index = 0;
954 			Datum	   *value;
955 
956 			/* do the lookup only for non-NULL values */
957 			if (!mcvitem->isnull[dim])
958 			{
959 				value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
960 											  info[dim].nvalues, sizeof(Datum),
961 											  compare_scalars_simple, &ssup[dim]);
962 
963 				Assert(value != NULL);	/* serialization or deduplication
964 										 * error */
965 
966 				/* compute index within the deduplicated array */
967 				index = (uint16) (value - values[dim]);
968 
969 				/* check the index is within expected bounds */
970 				Assert(index < info[dim].nvalues);
971 			}
972 
973 			/* copy the index into the serialized MCV */
974 			memcpy(ptr, &index, sizeof(uint16));
975 			ptr += sizeof(uint16);
976 		}
977 
978 		/* make sure we don't overflow the allocated value */
979 		Assert(ptr <= endptr);
980 	}
981 
982 	/* at this point we expect to match the total_length exactly */
983 	Assert(ptr == endptr);
984 
985 	pfree(values);
986 	pfree(counts);
987 
988 	return raw;
989 }
990 
991 /*
992  * statext_mcv_deserialize
993  *		Reads serialized MCV list into MCVList structure.
994  *
995  * All the memory needed by the MCV list is allocated as a single chunk, so
996  * it's possible to simply pfree() it at once.
997  */
998 MCVList *
statext_mcv_deserialize(bytea * data)999 statext_mcv_deserialize(bytea *data)
1000 {
1001 	int			dim,
1002 				i;
1003 	Size		expected_size;
1004 	MCVList    *mcvlist;
1005 	char	   *raw;
1006 	char	   *ptr;
1007 	char	   *endptr PG_USED_FOR_ASSERTS_ONLY;
1008 
1009 	int			ndims,
1010 				nitems;
1011 	DimensionInfo *info = NULL;
1012 
1013 	/* local allocation buffer (used only for deserialization) */
1014 	Datum	  **map = NULL;
1015 
1016 	/* MCV list */
1017 	Size		mcvlen;
1018 
1019 	/* buffer used for the result */
1020 	Size		datalen;
1021 	char	   *dataptr;
1022 	char	   *valuesptr;
1023 	char	   *isnullptr;
1024 
1025 	if (data == NULL)
1026 		return NULL;
1027 
1028 	/*
1029 	 * We can't possibly deserialize a MCV list if there's not even a complete
1030 	 * header. We need an explicit formula here, because we serialize the
1031 	 * header fields one by one, so we need to ignore struct alignment.
1032 	 */
1033 	if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1034 		elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1035 			 VARSIZE_ANY(data), MinSizeOfMCVList);
1036 
1037 	/* read the MCV list header */
1038 	mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1039 
1040 	/* pointer to the data part (skip the varlena header) */
1041 	raw = (char *) data;
1042 	ptr = VARDATA_ANY(raw);
1043 	endptr = (char *) raw + VARSIZE_ANY(data);
1044 
1045 	/* get the header and perform further sanity checks */
1046 	memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1047 	ptr += sizeof(uint32);
1048 
1049 	memcpy(&mcvlist->type, ptr, sizeof(uint32));
1050 	ptr += sizeof(uint32);
1051 
1052 	memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1053 	ptr += sizeof(uint32);
1054 
1055 	memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1056 	ptr += sizeof(AttrNumber);
1057 
1058 	if (mcvlist->magic != STATS_MCV_MAGIC)
1059 		elog(ERROR, "invalid MCV magic %u (expected %u)",
1060 			 mcvlist->magic, STATS_MCV_MAGIC);
1061 
1062 	if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1063 		elog(ERROR, "invalid MCV type %u (expected %u)",
1064 			 mcvlist->type, STATS_MCV_TYPE_BASIC);
1065 
1066 	if (mcvlist->ndimensions == 0)
1067 		elog(ERROR, "invalid zero-length dimension array in MCVList");
1068 	else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1069 			 (mcvlist->ndimensions < 0))
1070 		elog(ERROR, "invalid length (%d) dimension array in MCVList",
1071 			 mcvlist->ndimensions);
1072 
1073 	if (mcvlist->nitems == 0)
1074 		elog(ERROR, "invalid zero-length item array in MCVList");
1075 	else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1076 		elog(ERROR, "invalid length (%u) item array in MCVList",
1077 			 mcvlist->nitems);
1078 
1079 	nitems = mcvlist->nitems;
1080 	ndims = mcvlist->ndimensions;
1081 
1082 	/*
1083 	 * Check amount of data including DimensionInfo for all dimensions and
1084 	 * also the serialized items (including uint16 indexes). Also, walk
1085 	 * through the dimension information and add it to the sum.
1086 	 */
1087 	expected_size = SizeOfMCVList(ndims, nitems);
1088 
1089 	/*
1090 	 * Check that we have at least the dimension and info records, along with
1091 	 * the items. We don't know the size of the serialized values yet. We need
1092 	 * to do this check first, before accessing the dimension info.
1093 	 */
1094 	if (VARSIZE_ANY(data) < expected_size)
1095 		elog(ERROR, "invalid MCV size %zd (expected %zu)",
1096 			 VARSIZE_ANY(data), expected_size);
1097 
1098 	/* Now copy the array of type Oids. */
1099 	memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1100 	ptr += (sizeof(Oid) * ndims);
1101 
1102 	/* Now it's safe to access the dimension info. */
1103 	info = palloc(ndims * sizeof(DimensionInfo));
1104 
1105 	memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1106 	ptr += (ndims * sizeof(DimensionInfo));
1107 
1108 	/* account for the value arrays */
1109 	for (dim = 0; dim < ndims; dim++)
1110 	{
1111 		/*
1112 		 * XXX I wonder if we can/should rely on asserts here. Maybe those
1113 		 * checks should be done every time?
1114 		 */
1115 		Assert(info[dim].nvalues >= 0);
1116 		Assert(info[dim].nbytes >= 0);
1117 
1118 		expected_size += info[dim].nbytes;
1119 	}
1120 
1121 	/*
1122 	 * Now we know the total expected MCV size, including all the pieces
1123 	 * (header, dimension info. items and deduplicated data). So do the final
1124 	 * check on size.
1125 	 */
1126 	if (VARSIZE_ANY(data) != expected_size)
1127 		elog(ERROR, "invalid MCV size %zd (expected %zu)",
1128 			 VARSIZE_ANY(data), expected_size);
1129 
1130 	/*
1131 	 * We need an array of Datum values for each dimension, so that we can
1132 	 * easily translate the uint16 indexes later. We also need a top-level
1133 	 * array of pointers to those per-dimension arrays.
1134 	 *
1135 	 * While allocating the arrays for dimensions, compute how much space we
1136 	 * need for a copy of the by-ref data, as we can't simply point to the
1137 	 * original values (it might go away).
1138 	 */
1139 	datalen = 0;				/* space for by-ref data */
1140 	map = (Datum **) palloc(ndims * sizeof(Datum *));
1141 
1142 	for (dim = 0; dim < ndims; dim++)
1143 	{
1144 		map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1145 
1146 		/* space needed for a copy of data for by-ref types */
1147 		datalen += info[dim].nbytes_aligned;
1148 	}
1149 
1150 	/*
1151 	 * Now resize the MCV list so that the allocation includes all the data.
1152 	 *
1153 	 * Allocate space for a copy of the data, as we can't simply reference the
1154 	 * serialized data - it's not aligned properly, and it may disappear while
1155 	 * we're still using the MCV list, e.g. due to catcache release.
1156 	 *
1157 	 * We do care about alignment here, because we will allocate all the
1158 	 * pieces at once, but then use pointers to different parts.
1159 	 */
1160 	mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1161 
1162 	/* arrays of values and isnull flags for all MCV items */
1163 	mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1164 	mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1165 
1166 	/* we don't quite need to align this, but it makes some asserts easier */
1167 	mcvlen += MAXALIGN(datalen);
1168 
1169 	/* now resize the deserialized MCV list, and compute pointers to parts */
1170 	mcvlist = repalloc(mcvlist, mcvlen);
1171 
1172 	/* pointer to the beginning of values/isnull arrays */
1173 	valuesptr = (char *) mcvlist
1174 		+ MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1175 
1176 	isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1177 
1178 	dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1179 
1180 	/*
1181 	 * Build mapping (index => value) for translating the serialized data into
1182 	 * the in-memory representation.
1183 	 */
1184 	for (dim = 0; dim < ndims; dim++)
1185 	{
1186 		/* remember start position in the input array */
1187 		char	   *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1188 
1189 		if (info[dim].typbyval)
1190 		{
1191 			/* for by-val types we simply copy data into the mapping */
1192 			for (i = 0; i < info[dim].nvalues; i++)
1193 			{
1194 				Datum		v = 0;
1195 
1196 				memcpy(&v, ptr, info[dim].typlen);
1197 				ptr += info[dim].typlen;
1198 
1199 				map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1200 
1201 				/* no under/overflow of input array */
1202 				Assert(ptr <= (start + info[dim].nbytes));
1203 			}
1204 		}
1205 		else
1206 		{
1207 			/* for by-ref types we need to also make a copy of the data */
1208 
1209 			/* passed by reference, but fixed length (name, tid, ...) */
1210 			if (info[dim].typlen > 0)
1211 			{
1212 				for (i = 0; i < info[dim].nvalues; i++)
1213 				{
1214 					memcpy(dataptr, ptr, info[dim].typlen);
1215 					ptr += info[dim].typlen;
1216 
1217 					/* just point into the array */
1218 					map[dim][i] = PointerGetDatum(dataptr);
1219 					dataptr += MAXALIGN(info[dim].typlen);
1220 				}
1221 			}
1222 			else if (info[dim].typlen == -1)
1223 			{
1224 				/* varlena */
1225 				for (i = 0; i < info[dim].nvalues; i++)
1226 				{
1227 					uint32		len;
1228 
1229 					/* read the uint32 length */
1230 					memcpy(&len, ptr, sizeof(uint32));
1231 					ptr += sizeof(uint32);
1232 
1233 					/* the length is data-only */
1234 					SET_VARSIZE(dataptr, len + VARHDRSZ);
1235 					memcpy(VARDATA(dataptr), ptr, len);
1236 					ptr += len;
1237 
1238 					/* just point into the array */
1239 					map[dim][i] = PointerGetDatum(dataptr);
1240 
1241 					/* skip to place of the next deserialized value */
1242 					dataptr += MAXALIGN(len + VARHDRSZ);
1243 				}
1244 			}
1245 			else if (info[dim].typlen == -2)
1246 			{
1247 				/* cstring */
1248 				for (i = 0; i < info[dim].nvalues; i++)
1249 				{
1250 					uint32		len;
1251 
1252 					memcpy(&len, ptr, sizeof(uint32));
1253 					ptr += sizeof(uint32);
1254 
1255 					memcpy(dataptr, ptr, len);
1256 					ptr += len;
1257 
1258 					/* just point into the array */
1259 					map[dim][i] = PointerGetDatum(dataptr);
1260 					dataptr += MAXALIGN(len);
1261 				}
1262 			}
1263 
1264 			/* no under/overflow of input array */
1265 			Assert(ptr <= (start + info[dim].nbytes));
1266 
1267 			/* no overflow of the output mcv value */
1268 			Assert(dataptr <= ((char *) mcvlist + mcvlen));
1269 		}
1270 
1271 		/* check we consumed input data for this dimension exactly */
1272 		Assert(ptr == (start + info[dim].nbytes));
1273 	}
1274 
1275 	/* we should have also filled the MCV list exactly */
1276 	Assert(dataptr == ((char *) mcvlist + mcvlen));
1277 
1278 	/* deserialize the MCV items and translate the indexes to Datums */
1279 	for (i = 0; i < nitems; i++)
1280 	{
1281 		MCVItem    *item = &mcvlist->items[i];
1282 
1283 		item->values = (Datum *) valuesptr;
1284 		valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1285 
1286 		item->isnull = (bool *) isnullptr;
1287 		isnullptr += MAXALIGN(sizeof(bool) * ndims);
1288 
1289 		memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1290 		ptr += sizeof(bool) * ndims;
1291 
1292 		memcpy(&item->frequency, ptr, sizeof(double));
1293 		ptr += sizeof(double);
1294 
1295 		memcpy(&item->base_frequency, ptr, sizeof(double));
1296 		ptr += sizeof(double);
1297 
1298 		/* finally translate the indexes (for non-NULL only) */
1299 		for (dim = 0; dim < ndims; dim++)
1300 		{
1301 			uint16		index;
1302 
1303 			memcpy(&index, ptr, sizeof(uint16));
1304 			ptr += sizeof(uint16);
1305 
1306 			if (item->isnull[dim])
1307 				continue;
1308 
1309 			item->values[dim] = map[dim][index];
1310 		}
1311 
1312 		/* check we're not overflowing the input */
1313 		Assert(ptr <= endptr);
1314 	}
1315 
1316 	/* check that we processed all the data */
1317 	Assert(ptr == endptr);
1318 
1319 	/* release the buffers used for mapping */
1320 	for (dim = 0; dim < ndims; dim++)
1321 		pfree(map[dim]);
1322 
1323 	pfree(map);
1324 
1325 	return mcvlist;
1326 }
1327 
1328 /*
1329  * SRF with details about buckets of a histogram:
1330  *
1331  * - item ID (0...nitems)
1332  * - values (string array)
1333  * - nulls only (boolean array)
1334  * - frequency (double precision)
1335  * - base_frequency (double precision)
1336  *
1337  * The input is the OID of the statistics, and there are no rows returned if
1338  * the statistics contains no histogram.
1339  */
1340 Datum
pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)1341 pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
1342 {
1343 	FuncCallContext *funcctx;
1344 
1345 	/* stuff done only on the first call of the function */
1346 	if (SRF_IS_FIRSTCALL())
1347 	{
1348 		MemoryContext oldcontext;
1349 		MCVList    *mcvlist;
1350 		TupleDesc	tupdesc;
1351 
1352 		/* create a function context for cross-call persistence */
1353 		funcctx = SRF_FIRSTCALL_INIT();
1354 
1355 		/* switch to memory context appropriate for multiple function calls */
1356 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1357 
1358 		mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1359 
1360 		funcctx->user_fctx = mcvlist;
1361 
1362 		/* total number of tuples to be returned */
1363 		funcctx->max_calls = 0;
1364 		if (funcctx->user_fctx != NULL)
1365 			funcctx->max_calls = mcvlist->nitems;
1366 
1367 		/* Build a tuple descriptor for our result type */
1368 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1369 			ereport(ERROR,
1370 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1371 					 errmsg("function returning record called in context "
1372 							"that cannot accept type record")));
1373 		tupdesc = BlessTupleDesc(tupdesc);
1374 
1375 		/*
1376 		 * generate attribute metadata needed later to produce tuples from raw
1377 		 * C strings
1378 		 */
1379 		funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1380 
1381 		MemoryContextSwitchTo(oldcontext);
1382 	}
1383 
1384 	/* stuff done on every call of the function */
1385 	funcctx = SRF_PERCALL_SETUP();
1386 
1387 	if (funcctx->call_cntr < funcctx->max_calls)	/* do when there is more
1388 													 * left to send */
1389 	{
1390 		Datum		values[5];
1391 		bool		nulls[5];
1392 		HeapTuple	tuple;
1393 		Datum		result;
1394 		ArrayBuildState *astate_values = NULL;
1395 		ArrayBuildState *astate_nulls = NULL;
1396 
1397 		int			i;
1398 		MCVList    *mcvlist;
1399 		MCVItem    *item;
1400 
1401 		mcvlist = (MCVList *) funcctx->user_fctx;
1402 
1403 		Assert(funcctx->call_cntr < mcvlist->nitems);
1404 
1405 		item = &mcvlist->items[funcctx->call_cntr];
1406 
1407 		for (i = 0; i < mcvlist->ndimensions; i++)
1408 		{
1409 
1410 			astate_nulls = accumArrayResult(astate_nulls,
1411 											BoolGetDatum(item->isnull[i]),
1412 											false,
1413 											BOOLOID,
1414 											CurrentMemoryContext);
1415 
1416 			if (!item->isnull[i])
1417 			{
1418 				bool		isvarlena;
1419 				Oid			outfunc;
1420 				FmgrInfo	fmgrinfo;
1421 				Datum		val;
1422 				text	   *txt;
1423 
1424 				/* lookup output func for the type */
1425 				getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1426 				fmgr_info(outfunc, &fmgrinfo);
1427 
1428 				val = FunctionCall1(&fmgrinfo, item->values[i]);
1429 				txt = cstring_to_text(DatumGetPointer(val));
1430 
1431 				astate_values = accumArrayResult(astate_values,
1432 												 PointerGetDatum(txt),
1433 												 false,
1434 												 TEXTOID,
1435 												 CurrentMemoryContext);
1436 			}
1437 			else
1438 				astate_values = accumArrayResult(astate_values,
1439 												 (Datum) 0,
1440 												 true,
1441 												 TEXTOID,
1442 												 CurrentMemoryContext);
1443 		}
1444 
1445 		values[0] = Int32GetDatum(funcctx->call_cntr);
1446 		values[1] = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
1447 		values[2] = PointerGetDatum(makeArrayResult(astate_nulls, CurrentMemoryContext));
1448 		values[3] = Float8GetDatum(item->frequency);
1449 		values[4] = Float8GetDatum(item->base_frequency);
1450 
1451 		/* no NULLs in the tuple */
1452 		memset(nulls, 0, sizeof(nulls));
1453 
1454 		/* build a tuple */
1455 		tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1456 
1457 		/* make the tuple into a datum */
1458 		result = HeapTupleGetDatum(tuple);
1459 
1460 		SRF_RETURN_NEXT(funcctx, result);
1461 	}
1462 	else						/* do when there is no more left */
1463 	{
1464 		SRF_RETURN_DONE(funcctx);
1465 	}
1466 }
1467 
1468 /*
1469  * pg_mcv_list_in		- input routine for type pg_mcv_list.
1470  *
1471  * pg_mcv_list is real enough to be a table column, but it has no operations
1472  * of its own, and disallows input too
1473  */
1474 Datum
pg_mcv_list_in(PG_FUNCTION_ARGS)1475 pg_mcv_list_in(PG_FUNCTION_ARGS)
1476 {
1477 	/*
1478 	 * pg_mcv_list stores the data in binary form and parsing text input is
1479 	 * not needed, so disallow this.
1480 	 */
1481 	ereport(ERROR,
1482 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1483 			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1484 
1485 	PG_RETURN_VOID();			/* keep compiler quiet */
1486 }
1487 
1488 
1489 /*
1490  * pg_mcv_list_out		- output routine for type pg_mcv_list.
1491  *
1492  * MCV lists are serialized into a bytea value, so we simply call byteaout()
1493  * to serialize the value into text. But it'd be nice to serialize that into
1494  * a meaningful representation (e.g. for inspection by people).
1495  *
1496  * XXX This should probably return something meaningful, similar to what
1497  * pg_dependencies_out does. Not sure how to deal with the deduplicated
1498  * values, though - do we want to expand that or not?
1499  */
1500 Datum
pg_mcv_list_out(PG_FUNCTION_ARGS)1501 pg_mcv_list_out(PG_FUNCTION_ARGS)
1502 {
1503 	return byteaout(fcinfo);
1504 }
1505 
1506 /*
1507  * pg_mcv_list_recv		- binary input routine for type pg_mcv_list.
1508  */
1509 Datum
pg_mcv_list_recv(PG_FUNCTION_ARGS)1510 pg_mcv_list_recv(PG_FUNCTION_ARGS)
1511 {
1512 	ereport(ERROR,
1513 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1514 			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1515 
1516 	PG_RETURN_VOID();			/* keep compiler quiet */
1517 }
1518 
1519 /*
1520  * pg_mcv_list_send		- binary output routine for type pg_mcv_list.
1521  *
1522  * MCV lists are serialized in a bytea value (although the type is named
1523  * differently), so let's just send that.
1524  */
1525 Datum
pg_mcv_list_send(PG_FUNCTION_ARGS)1526 pg_mcv_list_send(PG_FUNCTION_ARGS)
1527 {
1528 	return byteasend(fcinfo);
1529 }
1530 
1531 /*
1532  * match the attribute/expression to a dimension of the statistic
1533  *
1534  * Match the attribute/expression to statistics dimension. Optionally
1535  * determine the collation.
1536  */
1537 static int
mcv_match_expression(Node * expr,Bitmapset * keys,List * exprs,Oid * collid)1538 mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
1539 {
1540 	int			idx = -1;
1541 
1542 	if (IsA(expr, Var))
1543 	{
1544 		/* simple Var, so just lookup using varattno */
1545 		Var		   *var = (Var *) expr;
1546 
1547 		if (collid)
1548 			*collid = var->varcollid;
1549 
1550 		idx = bms_member_index(keys, var->varattno);
1551 
1552 		/* make sure the index is valid */
1553 		Assert((idx >= 0) && (idx <= bms_num_members(keys)));
1554 	}
1555 	else
1556 	{
1557 		ListCell   *lc;
1558 
1559 		/* expressions are stored after the simple columns */
1560 		idx = bms_num_members(keys);
1561 
1562 		if (collid)
1563 			*collid = exprCollation(expr);
1564 
1565 		/* expression - lookup in stats expressions */
1566 		foreach(lc, exprs)
1567 		{
1568 			Node	   *stat_expr = (Node *) lfirst(lc);
1569 
1570 			if (equal(expr, stat_expr))
1571 				break;
1572 
1573 			idx++;
1574 		}
1575 
1576 		/* make sure the index is valid */
1577 		Assert((idx >= bms_num_members(keys)) &&
1578 			   (idx <= bms_num_members(keys) + list_length(exprs)));
1579 	}
1580 
1581 	Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1582 
1583 	return idx;
1584 }
1585 
1586 /*
1587  * mcv_get_match_bitmap
1588  *	Evaluate clauses using the MCV list, and update the match bitmap.
1589  *
1590  * A match bitmap keeps match/mismatch status for each MCV item, and we
1591  * update it based on additional clauses. We also use it to skip items
1592  * that can't possibly match (e.g. item marked as "mismatch" can't change
1593  * to "match" when evaluating AND clause list).
1594  *
1595  * The function also returns a flag indicating whether there was an
1596  * equality condition for all attributes, the minimum frequency in the MCV
1597  * list, and a total MCV frequency (sum of frequencies for all items).
1598  *
1599  * XXX Currently the match bitmap uses a bool for each MCV item, which is
1600  * somewhat wasteful as we could do with just a single bit, thus reducing
1601  * the size to ~1/8. It would also allow us to combine bitmaps simply using
1602  * & and |, which should be faster than min/max. The bitmaps are fairly
1603  * small, though (thanks to the cap on the MCV list size).
1604  */
1605 static bool *
mcv_get_match_bitmap(PlannerInfo * root,List * clauses,Bitmapset * keys,List * exprs,MCVList * mcvlist,bool is_or)1606 mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
1607 					 Bitmapset *keys, List *exprs,
1608 					 MCVList *mcvlist, bool is_or)
1609 {
1610 	int			i;
1611 	ListCell   *l;
1612 	bool	   *matches;
1613 
1614 	/* The bitmap may be partially built. */
1615 	Assert(clauses != NIL);
1616 	Assert(list_length(clauses) >= 1);
1617 	Assert(mcvlist != NULL);
1618 	Assert(mcvlist->nitems > 0);
1619 	Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1620 
1621 	matches = palloc(sizeof(bool) * mcvlist->nitems);
1622 	memset(matches, (is_or) ? false : true,
1623 		   sizeof(bool) * mcvlist->nitems);
1624 
1625 	/*
1626 	 * Loop through the list of clauses, and for each of them evaluate all the
1627 	 * MCV items not yet eliminated by the preceding clauses.
1628 	 */
1629 	foreach(l, clauses)
1630 	{
1631 		Node	   *clause = (Node *) lfirst(l);
1632 
1633 		/* if it's a RestrictInfo, then extract the clause */
1634 		if (IsA(clause, RestrictInfo))
1635 			clause = (Node *) ((RestrictInfo *) clause)->clause;
1636 
1637 		/*
1638 		 * Handle the various types of clauses - OpClause, NullTest and
1639 		 * AND/OR/NOT
1640 		 */
1641 		if (is_opclause(clause))
1642 		{
1643 			OpExpr	   *expr = (OpExpr *) clause;
1644 			FmgrInfo	opproc;
1645 
1646 			/* valid only after examine_opclause_args returns true */
1647 			Node	   *clause_expr;
1648 			Const	   *cst;
1649 			bool		expronleft;
1650 			int			idx;
1651 			Oid			collid;
1652 
1653 			fmgr_info(get_opcode(expr->opno), &opproc);
1654 
1655 			/* extract the var/expr and const from the expression */
1656 			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1657 				elog(ERROR, "incompatible clause");
1658 
1659 			/* match the attribute/expression to a dimension of the statistic */
1660 			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1661 
1662 			Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
1663 
1664 			/*
1665 			 * Walk through the MCV items and evaluate the current clause. We
1666 			 * can skip items that were already ruled out, and terminate if
1667 			 * there are no remaining MCV items that might possibly match.
1668 			 */
1669 			for (i = 0; i < mcvlist->nitems; i++)
1670 			{
1671 				bool		match = true;
1672 				MCVItem    *item = &mcvlist->items[i];
1673 
1674 				Assert(idx >= 0);
1675 
1676 				/*
1677 				 * When the MCV item or the Const value is NULL we can treat
1678 				 * this as a mismatch. We must not call the operator because
1679 				 * of strictness.
1680 				 */
1681 				if (item->isnull[idx] || cst->constisnull)
1682 				{
1683 					matches[i] = RESULT_MERGE(matches[i], is_or, false);
1684 					continue;
1685 				}
1686 
1687 				/*
1688 				 * Skip MCV items that can't change result in the bitmap. Once
1689 				 * the value gets false for AND-lists, or true for OR-lists,
1690 				 * we don't need to look at more clauses.
1691 				 */
1692 				if (RESULT_IS_FINAL(matches[i], is_or))
1693 					continue;
1694 
1695 				/*
1696 				 * First check whether the constant is below the lower
1697 				 * boundary (in that case we can skip the bucket, because
1698 				 * there's no overlap).
1699 				 *
1700 				 * We don't store collations used to build the statistics, but
1701 				 * we can use the collation for the attribute itself, as
1702 				 * stored in varcollid. We do reset the statistics after a
1703 				 * type change (including collation change), so this is OK.
1704 				 * For expressions, we use the collation extracted from the
1705 				 * expression itself.
1706 				 */
1707 				if (expronleft)
1708 					match = DatumGetBool(FunctionCall2Coll(&opproc,
1709 														   collid,
1710 														   item->values[idx],
1711 														   cst->constvalue));
1712 				else
1713 					match = DatumGetBool(FunctionCall2Coll(&opproc,
1714 														   collid,
1715 														   cst->constvalue,
1716 														   item->values[idx]));
1717 
1718 				/* update the match bitmap with the result */
1719 				matches[i] = RESULT_MERGE(matches[i], is_or, match);
1720 			}
1721 		}
1722 		else if (IsA(clause, ScalarArrayOpExpr))
1723 		{
1724 			ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1725 			FmgrInfo	opproc;
1726 
1727 			/* valid only after examine_opclause_args returns true */
1728 			Node	   *clause_expr;
1729 			Const	   *cst;
1730 			bool		expronleft;
1731 			Oid			collid;
1732 			int			idx;
1733 
1734 			/* array evaluation */
1735 			ArrayType  *arrayval;
1736 			int16		elmlen;
1737 			bool		elmbyval;
1738 			char		elmalign;
1739 			int			num_elems;
1740 			Datum	   *elem_values;
1741 			bool	   *elem_nulls;
1742 
1743 			fmgr_info(get_opcode(expr->opno), &opproc);
1744 
1745 			/* extract the var/expr and const from the expression */
1746 			if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1747 				elog(ERROR, "incompatible clause");
1748 
1749 			/* ScalarArrayOpExpr has the Var always on the left */
1750 			Assert(expronleft);
1751 
1752 			/* XXX what if (cst->constisnull == NULL)? */
1753 			if (!cst->constisnull)
1754 			{
1755 				arrayval = DatumGetArrayTypeP(cst->constvalue);
1756 				get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1757 									 &elmlen, &elmbyval, &elmalign);
1758 				deconstruct_array(arrayval,
1759 								  ARR_ELEMTYPE(arrayval),
1760 								  elmlen, elmbyval, elmalign,
1761 								  &elem_values, &elem_nulls, &num_elems);
1762 			}
1763 
1764 			/* match the attribute/expression to a dimension of the statistic */
1765 			idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1766 
1767 			/*
1768 			 * Walk through the MCV items and evaluate the current clause. We
1769 			 * can skip items that were already ruled out, and terminate if
1770 			 * there are no remaining MCV items that might possibly match.
1771 			 */
1772 			for (i = 0; i < mcvlist->nitems; i++)
1773 			{
1774 				int			j;
1775 				bool		match = (expr->useOr ? false : true);
1776 				MCVItem    *item = &mcvlist->items[i];
1777 
1778 				/*
1779 				 * When the MCV item or the Const value is NULL we can treat
1780 				 * this as a mismatch. We must not call the operator because
1781 				 * of strictness.
1782 				 */
1783 				if (item->isnull[idx] || cst->constisnull)
1784 				{
1785 					matches[i] = RESULT_MERGE(matches[i], is_or, false);
1786 					continue;
1787 				}
1788 
1789 				/*
1790 				 * Skip MCV items that can't change result in the bitmap. Once
1791 				 * the value gets false for AND-lists, or true for OR-lists,
1792 				 * we don't need to look at more clauses.
1793 				 */
1794 				if (RESULT_IS_FINAL(matches[i], is_or))
1795 					continue;
1796 
1797 				for (j = 0; j < num_elems; j++)
1798 				{
1799 					Datum		elem_value = elem_values[j];
1800 					bool		elem_isnull = elem_nulls[j];
1801 					bool		elem_match;
1802 
1803 					/* NULL values always evaluate as not matching. */
1804 					if (elem_isnull)
1805 					{
1806 						match = RESULT_MERGE(match, expr->useOr, false);
1807 						continue;
1808 					}
1809 
1810 					/*
1811 					 * Stop evaluating the array elements once we reach a
1812 					 * matching value that can't change - ALL() is the same as
1813 					 * AND-list, ANY() is the same as OR-list.
1814 					 */
1815 					if (RESULT_IS_FINAL(match, expr->useOr))
1816 						break;
1817 
1818 					elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1819 																collid,
1820 																item->values[idx],
1821 																elem_value));
1822 
1823 					match = RESULT_MERGE(match, expr->useOr, elem_match);
1824 				}
1825 
1826 				/* update the match bitmap with the result */
1827 				matches[i] = RESULT_MERGE(matches[i], is_or, match);
1828 			}
1829 		}
1830 		else if (IsA(clause, NullTest))
1831 		{
1832 			NullTest   *expr = (NullTest *) clause;
1833 			Node	   *clause_expr = (Node *) (expr->arg);
1834 
1835 			/* match the attribute/expression to a dimension of the statistic */
1836 			int			idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1837 
1838 			/*
1839 			 * Walk through the MCV items and evaluate the current clause. We
1840 			 * can skip items that were already ruled out, and terminate if
1841 			 * there are no remaining MCV items that might possibly match.
1842 			 */
1843 			for (i = 0; i < mcvlist->nitems; i++)
1844 			{
1845 				bool		match = false;	/* assume mismatch */
1846 				MCVItem    *item = &mcvlist->items[i];
1847 
1848 				/* if the clause mismatches the MCV item, update the bitmap */
1849 				switch (expr->nulltesttype)
1850 				{
1851 					case IS_NULL:
1852 						match = (item->isnull[idx]) ? true : match;
1853 						break;
1854 
1855 					case IS_NOT_NULL:
1856 						match = (!item->isnull[idx]) ? true : match;
1857 						break;
1858 				}
1859 
1860 				/* now, update the match bitmap, depending on OR/AND type */
1861 				matches[i] = RESULT_MERGE(matches[i], is_or, match);
1862 			}
1863 		}
1864 		else if (is_orclause(clause) || is_andclause(clause))
1865 		{
1866 			/* AND/OR clause, with all subclauses being compatible */
1867 
1868 			int			i;
1869 			BoolExpr   *bool_clause = ((BoolExpr *) clause);
1870 			List	   *bool_clauses = bool_clause->args;
1871 
1872 			/* match/mismatch bitmap for each MCV item */
1873 			bool	   *bool_matches = NULL;
1874 
1875 			Assert(bool_clauses != NIL);
1876 			Assert(list_length(bool_clauses) >= 2);
1877 
1878 			/* build the match bitmap for the OR-clauses */
1879 			bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1880 												mcvlist, is_orclause(clause));
1881 
1882 			/*
1883 			 * Merge the bitmap produced by mcv_get_match_bitmap into the
1884 			 * current one. We need to consider if we're evaluating AND or OR
1885 			 * condition when merging the results.
1886 			 */
1887 			for (i = 0; i < mcvlist->nitems; i++)
1888 				matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1889 
1890 			pfree(bool_matches);
1891 		}
1892 		else if (is_notclause(clause))
1893 		{
1894 			/* NOT clause, with all subclauses compatible */
1895 
1896 			int			i;
1897 			BoolExpr   *not_clause = ((BoolExpr *) clause);
1898 			List	   *not_args = not_clause->args;
1899 
1900 			/* match/mismatch bitmap for each MCV item */
1901 			bool	   *not_matches = NULL;
1902 
1903 			Assert(not_args != NIL);
1904 			Assert(list_length(not_args) == 1);
1905 
1906 			/* build the match bitmap for the NOT-clause */
1907 			not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1908 											   mcvlist, false);
1909 
1910 			/*
1911 			 * Merge the bitmap produced by mcv_get_match_bitmap into the
1912 			 * current one. We're handling a NOT clause, so invert the result
1913 			 * before merging it into the global bitmap.
1914 			 */
1915 			for (i = 0; i < mcvlist->nitems; i++)
1916 				matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1917 
1918 			pfree(not_matches);
1919 		}
1920 		else if (IsA(clause, Var))
1921 		{
1922 			/* Var (has to be a boolean Var, possibly from below NOT) */
1923 
1924 			Var		   *var = (Var *) (clause);
1925 
1926 			/* match the attribute to a dimension of the statistic */
1927 			int			idx = bms_member_index(keys, var->varattno);
1928 
1929 			Assert(var->vartype == BOOLOID);
1930 
1931 			/*
1932 			 * Walk through the MCV items and evaluate the current clause. We
1933 			 * can skip items that were already ruled out, and terminate if
1934 			 * there are no remaining MCV items that might possibly match.
1935 			 */
1936 			for (i = 0; i < mcvlist->nitems; i++)
1937 			{
1938 				MCVItem    *item = &mcvlist->items[i];
1939 				bool		match = false;
1940 
1941 				/* if the item is NULL, it's a mismatch */
1942 				if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1943 					match = true;
1944 
1945 				/* update the result bitmap */
1946 				matches[i] = RESULT_MERGE(matches[i], is_or, match);
1947 			}
1948 		}
1949 		else
1950 			elog(ERROR, "unknown clause type: %d", clause->type);
1951 	}
1952 
1953 	return matches;
1954 }
1955 
1956 
1957 /*
1958  * mcv_combine_selectivities
1959  * 		Combine per-column and multi-column MCV selectivity estimates.
1960  *
1961  * simple_sel is a "simple" selectivity estimate (produced without using any
1962  * extended statistics, essentially assuming independence of columns/clauses).
1963  *
1964  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1965  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
1966  * essentially interpreted as a correction to be added to simple_sel, as
1967  * described below.
1968  *
1969  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1970  * matching ones).  This is used as an upper bound on the portion of the
1971  * selectivity estimates not covered by the MCV statistics.
1972  *
1973  * Note: While simple and base selectivities are defined in a quite similar
1974  * way, the values are computed differently and are not therefore equal. The
1975  * simple selectivity is computed as a product of per-clause estimates, while
1976  * the base selectivity is computed by adding up base frequencies of matching
1977  * items of the multi-column MCV list. So the values may differ for two main
1978  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1979  * the MCV items did not match the estimated clauses.
1980  *
1981  * As both (a) and (b) reduce the base selectivity value, it generally holds
1982  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1983  * values may be equal.
1984  *
1985  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
1986  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
1987  * correction for the part covered by the MCV list. Those two statements are
1988  * actually equivalent.
1989  */
1990 Selectivity
mcv_combine_selectivities(Selectivity simple_sel,Selectivity mcv_sel,Selectivity mcv_basesel,Selectivity mcv_totalsel)1991 mcv_combine_selectivities(Selectivity simple_sel,
1992 						  Selectivity mcv_sel,
1993 						  Selectivity mcv_basesel,
1994 						  Selectivity mcv_totalsel)
1995 {
1996 	Selectivity other_sel;
1997 	Selectivity sel;
1998 
1999 	/* estimated selectivity of values not covered by MCV matches */
2000 	other_sel = simple_sel - mcv_basesel;
2001 	CLAMP_PROBABILITY(other_sel);
2002 
2003 	/* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2004 	if (other_sel > 1.0 - mcv_totalsel)
2005 		other_sel = 1.0 - mcv_totalsel;
2006 
2007 	/* overall selectivity is the sum of the MCV and non-MCV parts */
2008 	sel = mcv_sel + other_sel;
2009 	CLAMP_PROBABILITY(sel);
2010 
2011 	return sel;
2012 }
2013 
2014 
2015 /*
2016  * mcv_clauselist_selectivity
2017  *		Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2018  *		list of clauses.
2019  *
2020  * This determines which MCV items match every clause in the list and returns
2021  * the sum of the frequencies of those items.
2022  *
2023  * In addition, it returns the sum of the base frequencies of each of those
2024  * items (that is the sum of the selectivities that each item would have if
2025  * the columns were independent of one another), and the total selectivity of
2026  * all the MCV items (not just the matching ones).  These are expected to be
2027  * used together with a "simple" selectivity estimate (one based only on
2028  * per-column statistics) to produce an overall selectivity estimate that
2029  * makes use of both per-column and multi-column statistics --- see
2030  * mcv_combine_selectivities().
2031  */
2032 Selectivity
mcv_clauselist_selectivity(PlannerInfo * root,StatisticExtInfo * stat,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Selectivity * basesel,Selectivity * totalsel)2033 mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
2034 						   List *clauses, int varRelid,
2035 						   JoinType jointype, SpecialJoinInfo *sjinfo,
2036 						   RelOptInfo *rel,
2037 						   Selectivity *basesel, Selectivity *totalsel)
2038 {
2039 	int			i;
2040 	MCVList    *mcv;
2041 	Selectivity s = 0.0;
2042 
2043 	/* match/mismatch bitmap for each MCV item */
2044 	bool	   *matches = NULL;
2045 
2046 	/* load the MCV list stored in the statistics object */
2047 	mcv = statext_mcv_load(stat->statOid);
2048 
2049 	/* build a match bitmap for the clauses */
2050 	matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2051 								   mcv, false);
2052 
2053 	/* sum frequencies for all the matching MCV items */
2054 	*basesel = 0.0;
2055 	*totalsel = 0.0;
2056 	for (i = 0; i < mcv->nitems; i++)
2057 	{
2058 		*totalsel += mcv->items[i].frequency;
2059 
2060 		if (matches[i] != false)
2061 		{
2062 			*basesel += mcv->items[i].base_frequency;
2063 			s += mcv->items[i].frequency;
2064 		}
2065 	}
2066 
2067 	return s;
2068 }
2069 
2070 
2071 /*
2072  * mcv_clause_selectivity_or
2073  *		Use MCV statistics to estimate the selectivity of a clause that
2074  *		appears in an ORed list of clauses.
2075  *
2076  * As with mcv_clauselist_selectivity() this determines which MCV items match
2077  * the clause and returns both the sum of the frequencies and the sum of the
2078  * base frequencies of those items, as well as the sum of the frequencies of
2079  * all MCV items (not just the matching ones) so that this information can be
2080  * used by mcv_combine_selectivities() to produce a selectivity estimate that
2081  * makes use of both per-column and multi-column statistics.
2082  *
2083  * Additionally, we return information to help compute the overall selectivity
2084  * of the ORed list of clauses assumed to contain this clause.  This function
2085  * is intended to be called for each clause in the ORed list of clauses,
2086  * allowing the overall selectivity to be computed using the following
2087  * algorithm:
2088  *
2089  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2090  * of the first n clauses in the list.  Then the combined selectivity taking
2091  * into account the next clause C[n+1] can be written as
2092  *
2093  *		P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2094  *
2095  * The final term above represents the overlap between the clauses examined so
2096  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
2097  * match bitmap for the ORed list of clauses examined so far and examine its
2098  * intersection with the match bitmap for the (n+1)'th clause.
2099  *
2100  * We then also return the sums of the MCV item frequencies and base
2101  * frequencies for the match bitmap intersection corresponding to the overlap
2102  * term above, so that they can be combined with a simple selectivity estimate
2103  * for that term.
2104  *
2105  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2106  * for the clauses examined so far.  The caller is expected to set it to NULL
2107  * the first time it calls this function.
2108  */
2109 Selectivity
mcv_clause_selectivity_or(PlannerInfo * root,StatisticExtInfo * stat,MCVList * mcv,Node * clause,bool ** or_matches,Selectivity * basesel,Selectivity * overlap_mcvsel,Selectivity * overlap_basesel,Selectivity * totalsel)2110 mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
2111 						  MCVList *mcv, Node *clause, bool **or_matches,
2112 						  Selectivity *basesel, Selectivity *overlap_mcvsel,
2113 						  Selectivity *overlap_basesel, Selectivity *totalsel)
2114 {
2115 	Selectivity s = 0.0;
2116 	bool	   *new_matches;
2117 	int			i;
2118 
2119 	/* build the OR-matches bitmap, if not built already */
2120 	if (*or_matches == NULL)
2121 		*or_matches = palloc0(sizeof(bool) * mcv->nitems);
2122 
2123 	/* build the match bitmap for the new clause */
2124 	new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2125 									   stat->exprs, mcv, false);
2126 
2127 	/*
2128 	 * Sum the frequencies for all the MCV items matching this clause and also
2129 	 * those matching the overlap between this clause and any of the preceding
2130 	 * clauses as described above.
2131 	 */
2132 	*basesel = 0.0;
2133 	*overlap_mcvsel = 0.0;
2134 	*overlap_basesel = 0.0;
2135 	*totalsel = 0.0;
2136 	for (i = 0; i < mcv->nitems; i++)
2137 	{
2138 		*totalsel += mcv->items[i].frequency;
2139 
2140 		if (new_matches[i])
2141 		{
2142 			s += mcv->items[i].frequency;
2143 			*basesel += mcv->items[i].base_frequency;
2144 
2145 			if ((*or_matches)[i])
2146 			{
2147 				*overlap_mcvsel += mcv->items[i].frequency;
2148 				*overlap_basesel += mcv->items[i].base_frequency;
2149 			}
2150 		}
2151 
2152 		/* update the OR-matches bitmap for the next clause */
2153 		(*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2154 	}
2155 
2156 	pfree(new_matches);
2157 
2158 	return s;
2159 }
2160