1 /*-------------------------------------------------------------------------
2  *
3  * extended_stats.c
4  *	  POSTGRES extended statistics
5  *
6  * Generic code supporting statistics objects created via CREATE STATISTICS.
7  *
8  *
9  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * IDENTIFICATION
13  *	  src/backend/statistics/extended_stats.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include "access/genam.h"
20 #include "access/htup_details.h"
21 #include "access/table.h"
22 #include "access/tuptoaster.h"
23 #include "catalog/indexing.h"
24 #include "catalog/pg_collation.h"
25 #include "catalog/pg_statistic_ext.h"
26 #include "catalog/pg_statistic_ext_data.h"
27 #include "miscadmin.h"
28 #include "nodes/nodeFuncs.h"
29 #include "optimizer/clauses.h"
30 #include "optimizer/optimizer.h"
31 #include "postmaster/autovacuum.h"
32 #include "statistics/extended_stats_internal.h"
33 #include "statistics/statistics.h"
34 #include "utils/builtins.h"
35 #include "utils/fmgroids.h"
36 #include "utils/lsyscache.h"
37 #include "utils/memutils.h"
38 #include "utils/rel.h"
39 #include "utils/selfuncs.h"
40 #include "utils/syscache.h"
41 
42 /*
43  * To avoid consuming too much memory during analysis and/or too much space
44  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
45  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
46  * and distinct-value calculations since a wide value is unlikely to be
47  * duplicated at all, much less be a most-common value.  For the same reason,
48  * ignoring wide values will not affect our estimates of histogram bin
49  * boundaries very much.
50  */
51 #define WIDTH_THRESHOLD  1024
52 
53 /*
54  * Used internally to refer to an individual statistics object, i.e.,
55  * a pg_statistic_ext entry.
56  */
57 typedef struct StatExtEntry
58 {
59 	Oid			statOid;		/* OID of pg_statistic_ext entry */
60 	char	   *schema;			/* statistics object's schema */
61 	char	   *name;			/* statistics object's name */
62 	Bitmapset  *columns;		/* attribute numbers covered by the object */
63 	List	   *types;			/* 'char' list of enabled statistics kinds */
64 } StatExtEntry;
65 
66 
67 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
68 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
69 											int nvacatts, VacAttrStats **vacatts);
70 static void statext_store(Oid relid,
71 						  MVNDistinct *ndistinct, MVDependencies *dependencies,
72 						  MCVList *mcv, VacAttrStats **stats);
73 
74 
75 /*
76  * Compute requested extended stats, using the rows sampled for the plain
77  * (single-column) stats.
78  *
79  * This fetches a list of stats types from pg_statistic_ext, computes the
80  * requested stats, and serializes them back into the catalog.
81  */
82 void
BuildRelationExtStatistics(Relation onerel,double totalrows,int numrows,HeapTuple * rows,int natts,VacAttrStats ** vacattrstats)83 BuildRelationExtStatistics(Relation onerel, double totalrows,
84 						   int numrows, HeapTuple *rows,
85 						   int natts, VacAttrStats **vacattrstats)
86 {
87 	Relation	pg_stext;
88 	ListCell   *lc;
89 	List	   *stats;
90 	MemoryContext cxt;
91 	MemoryContext oldcxt;
92 
93 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
94 	stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
95 
96 	/* memory context for building each statistics object */
97 	cxt = AllocSetContextCreate(CurrentMemoryContext,
98 								"BuildRelationExtStatistics",
99 								ALLOCSET_DEFAULT_SIZES);
100 	oldcxt = MemoryContextSwitchTo(cxt);
101 
102 	foreach(lc, stats)
103 	{
104 		StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
105 		MVNDistinct *ndistinct = NULL;
106 		MVDependencies *dependencies = NULL;
107 		MCVList    *mcv = NULL;
108 		VacAttrStats **stats;
109 		ListCell   *lc2;
110 
111 		/*
112 		 * Check if we can build these stats based on the column analyzed. If
113 		 * not, report this fact (except in autovacuum) and move on.
114 		 */
115 		stats = lookup_var_attr_stats(onerel, stat->columns,
116 									  natts, vacattrstats);
117 		if (!stats)
118 		{
119 			if (!IsAutoVacuumWorkerProcess())
120 				ereport(WARNING,
121 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
122 						 errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
123 								stat->schema, stat->name,
124 								get_namespace_name(onerel->rd_rel->relnamespace),
125 								RelationGetRelationName(onerel)),
126 						 errtable(onerel)));
127 			continue;
128 		}
129 
130 		/* check allowed number of dimensions */
131 		Assert(bms_num_members(stat->columns) >= 2 &&
132 			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
133 
134 		/* compute statistic of each requested type */
135 		foreach(lc2, stat->types)
136 		{
137 			char		t = (char) lfirst_int(lc2);
138 
139 			if (t == STATS_EXT_NDISTINCT)
140 				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
141 													stat->columns, stats);
142 			else if (t == STATS_EXT_DEPENDENCIES)
143 				dependencies = statext_dependencies_build(numrows, rows,
144 														  stat->columns, stats);
145 			else if (t == STATS_EXT_MCV)
146 				mcv = statext_mcv_build(numrows, rows, stat->columns, stats,
147 										totalrows);
148 		}
149 
150 		/* store the statistics in the catalog */
151 		statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
152 
153 		/* free the data used for building this statistics object */
154 		MemoryContextReset(cxt);
155 	}
156 
157 	MemoryContextSwitchTo(oldcxt);
158 	MemoryContextDelete(cxt);
159 
160 	list_free(stats);
161 
162 	table_close(pg_stext, RowExclusiveLock);
163 }
164 
165 /*
166  * statext_is_kind_built
167  *		Is this stat kind built in the given pg_statistic_ext_data tuple?
168  */
169 bool
statext_is_kind_built(HeapTuple htup,char type)170 statext_is_kind_built(HeapTuple htup, char type)
171 {
172 	AttrNumber	attnum;
173 
174 	switch (type)
175 	{
176 		case STATS_EXT_NDISTINCT:
177 			attnum = Anum_pg_statistic_ext_data_stxdndistinct;
178 			break;
179 
180 		case STATS_EXT_DEPENDENCIES:
181 			attnum = Anum_pg_statistic_ext_data_stxddependencies;
182 			break;
183 
184 		case STATS_EXT_MCV:
185 			attnum = Anum_pg_statistic_ext_data_stxdmcv;
186 			break;
187 
188 		default:
189 			elog(ERROR, "unexpected statistics type requested: %d", type);
190 	}
191 
192 	return !heap_attisnull(htup, attnum, NULL);
193 }
194 
195 /*
196  * Return a list (of StatExtEntry) of statistics objects for the given relation.
197  */
198 static List *
fetch_statentries_for_relation(Relation pg_statext,Oid relid)199 fetch_statentries_for_relation(Relation pg_statext, Oid relid)
200 {
201 	SysScanDesc scan;
202 	ScanKeyData skey;
203 	HeapTuple	htup;
204 	List	   *result = NIL;
205 
206 	/*
207 	 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
208 	 * rel.
209 	 */
210 	ScanKeyInit(&skey,
211 				Anum_pg_statistic_ext_stxrelid,
212 				BTEqualStrategyNumber, F_OIDEQ,
213 				ObjectIdGetDatum(relid));
214 
215 	scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
216 							  NULL, 1, &skey);
217 
218 	while (HeapTupleIsValid(htup = systable_getnext(scan)))
219 	{
220 		StatExtEntry *entry;
221 		Datum		datum;
222 		bool		isnull;
223 		int			i;
224 		ArrayType  *arr;
225 		char	   *enabled;
226 		Form_pg_statistic_ext staForm;
227 
228 		entry = palloc0(sizeof(StatExtEntry));
229 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
230 		entry->statOid = staForm->oid;
231 		entry->schema = get_namespace_name(staForm->stxnamespace);
232 		entry->name = pstrdup(NameStr(staForm->stxname));
233 		for (i = 0; i < staForm->stxkeys.dim1; i++)
234 		{
235 			entry->columns = bms_add_member(entry->columns,
236 											staForm->stxkeys.values[i]);
237 		}
238 
239 		/* decode the stxkind char array into a list of chars */
240 		datum = SysCacheGetAttr(STATEXTOID, htup,
241 								Anum_pg_statistic_ext_stxkind, &isnull);
242 		Assert(!isnull);
243 		arr = DatumGetArrayTypeP(datum);
244 		if (ARR_NDIM(arr) != 1 ||
245 			ARR_HASNULL(arr) ||
246 			ARR_ELEMTYPE(arr) != CHAROID)
247 			elog(ERROR, "stxkind is not a 1-D char array");
248 		enabled = (char *) ARR_DATA_PTR(arr);
249 		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
250 		{
251 			Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
252 				   (enabled[i] == STATS_EXT_DEPENDENCIES) ||
253 				   (enabled[i] == STATS_EXT_MCV));
254 			entry->types = lappend_int(entry->types, (int) enabled[i]);
255 		}
256 
257 		result = lappend(result, entry);
258 	}
259 
260 	systable_endscan(scan);
261 
262 	return result;
263 }
264 
265 /*
266  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
267  * VacAttrStats array which includes only the items corresponding to
268  * attributes indicated by 'stxkeys'. If we don't have all of the per column
269  * stats available to compute the extended stats, then we return NULL to indicate
270  * to the caller that the stats should not be built.
271  */
272 static VacAttrStats **
lookup_var_attr_stats(Relation rel,Bitmapset * attrs,int nvacatts,VacAttrStats ** vacatts)273 lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
274 					  int nvacatts, VacAttrStats **vacatts)
275 {
276 	int			i = 0;
277 	int			x = -1;
278 	VacAttrStats **stats;
279 
280 	stats = (VacAttrStats **)
281 		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
282 
283 	/* lookup VacAttrStats info for the requested columns (same attnum) */
284 	while ((x = bms_next_member(attrs, x)) >= 0)
285 	{
286 		int			j;
287 
288 		stats[i] = NULL;
289 		for (j = 0; j < nvacatts; j++)
290 		{
291 			if (x == vacatts[j]->tupattnum)
292 			{
293 				stats[i] = vacatts[j];
294 				break;
295 			}
296 		}
297 
298 		if (!stats[i])
299 		{
300 			/*
301 			 * Looks like stats were not gathered for one of the columns
302 			 * required. We'll be unable to build the extended stats without
303 			 * this column.
304 			 */
305 			pfree(stats);
306 			return NULL;
307 		}
308 
309 		/*
310 		 * Sanity check that the column is not dropped - stats should have
311 		 * been removed in this case.
312 		 */
313 		Assert(!stats[i]->attr->attisdropped);
314 
315 		i++;
316 	}
317 
318 	return stats;
319 }
320 
321 /*
322  * statext_store
323  *	Serializes the statistics and stores them into the pg_statistic_ext_data
324  *	tuple.
325  */
326 static void
statext_store(Oid statOid,MVNDistinct * ndistinct,MVDependencies * dependencies,MCVList * mcv,VacAttrStats ** stats)327 statext_store(Oid statOid,
328 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
329 			  MCVList *mcv, VacAttrStats **stats)
330 {
331 	Relation	pg_stextdata;
332 	HeapTuple	stup,
333 				oldtup;
334 	Datum		values[Natts_pg_statistic_ext_data];
335 	bool		nulls[Natts_pg_statistic_ext_data];
336 	bool		replaces[Natts_pg_statistic_ext_data];
337 
338 	pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
339 
340 	memset(nulls, true, sizeof(nulls));
341 	memset(replaces, false, sizeof(replaces));
342 	memset(values, 0, sizeof(values));
343 
344 	/*
345 	 * Construct a new pg_statistic_ext_data tuple, replacing the calculated
346 	 * stats.
347 	 */
348 	if (ndistinct != NULL)
349 	{
350 		bytea	   *data = statext_ndistinct_serialize(ndistinct);
351 
352 		nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = (data == NULL);
353 		values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = PointerGetDatum(data);
354 	}
355 
356 	if (dependencies != NULL)
357 	{
358 		bytea	   *data = statext_dependencies_serialize(dependencies);
359 
360 		nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = (data == NULL);
361 		values[Anum_pg_statistic_ext_data_stxddependencies - 1] = PointerGetDatum(data);
362 	}
363 	if (mcv != NULL)
364 	{
365 		bytea	   *data = statext_mcv_serialize(mcv, stats);
366 
367 		nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = (data == NULL);
368 		values[Anum_pg_statistic_ext_data_stxdmcv - 1] = PointerGetDatum(data);
369 	}
370 
371 	/* always replace the value (either by bytea or NULL) */
372 	replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
373 	replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
374 	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
375 
376 	/* there should already be a pg_statistic_ext_data tuple */
377 	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
378 	if (!HeapTupleIsValid(oldtup))
379 		elog(ERROR, "cache lookup failed for statistics object %u", statOid);
380 
381 	/* replace it */
382 	stup = heap_modify_tuple(oldtup,
383 							 RelationGetDescr(pg_stextdata),
384 							 values,
385 							 nulls,
386 							 replaces);
387 	ReleaseSysCache(oldtup);
388 	CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
389 
390 	heap_freetuple(stup);
391 
392 	table_close(pg_stextdata, RowExclusiveLock);
393 }
394 
395 /* initialize multi-dimensional sort */
396 MultiSortSupport
multi_sort_init(int ndims)397 multi_sort_init(int ndims)
398 {
399 	MultiSortSupport mss;
400 
401 	Assert(ndims >= 2);
402 
403 	mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
404 									 + sizeof(SortSupportData) * ndims);
405 
406 	mss->ndims = ndims;
407 
408 	return mss;
409 }
410 
411 /*
412  * Prepare sort support info using the given sort operator and collation
413  * at the position 'sortdim'
414  */
415 void
multi_sort_add_dimension(MultiSortSupport mss,int sortdim,Oid oper,Oid collation)416 multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
417 						 Oid oper, Oid collation)
418 {
419 	SortSupport ssup = &mss->ssup[sortdim];
420 
421 	ssup->ssup_cxt = CurrentMemoryContext;
422 	ssup->ssup_collation = collation;
423 	ssup->ssup_nulls_first = false;
424 
425 	PrepareSortSupportFromOrderingOp(oper, ssup);
426 }
427 
428 /* compare all the dimensions in the selected order */
429 int
multi_sort_compare(const void * a,const void * b,void * arg)430 multi_sort_compare(const void *a, const void *b, void *arg)
431 {
432 	MultiSortSupport mss = (MultiSortSupport) arg;
433 	SortItem   *ia = (SortItem *) a;
434 	SortItem   *ib = (SortItem *) b;
435 	int			i;
436 
437 	for (i = 0; i < mss->ndims; i++)
438 	{
439 		int			compare;
440 
441 		compare = ApplySortComparator(ia->values[i], ia->isnull[i],
442 									  ib->values[i], ib->isnull[i],
443 									  &mss->ssup[i]);
444 
445 		if (compare != 0)
446 			return compare;
447 	}
448 
449 	/* equal by default */
450 	return 0;
451 }
452 
453 /* compare selected dimension */
454 int
multi_sort_compare_dim(int dim,const SortItem * a,const SortItem * b,MultiSortSupport mss)455 multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
456 					   MultiSortSupport mss)
457 {
458 	return ApplySortComparator(a->values[dim], a->isnull[dim],
459 							   b->values[dim], b->isnull[dim],
460 							   &mss->ssup[dim]);
461 }
462 
463 int
multi_sort_compare_dims(int start,int end,const SortItem * a,const SortItem * b,MultiSortSupport mss)464 multi_sort_compare_dims(int start, int end,
465 						const SortItem *a, const SortItem *b,
466 						MultiSortSupport mss)
467 {
468 	int			dim;
469 
470 	for (dim = start; dim <= end; dim++)
471 	{
472 		int			r = ApplySortComparator(a->values[dim], a->isnull[dim],
473 											b->values[dim], b->isnull[dim],
474 											&mss->ssup[dim]);
475 
476 		if (r != 0)
477 			return r;
478 	}
479 
480 	return 0;
481 }
482 
483 int
compare_scalars_simple(const void * a,const void * b,void * arg)484 compare_scalars_simple(const void *a, const void *b, void *arg)
485 {
486 	return compare_datums_simple(*(Datum *) a,
487 								 *(Datum *) b,
488 								 (SortSupport) arg);
489 }
490 
491 int
compare_datums_simple(Datum a,Datum b,SortSupport ssup)492 compare_datums_simple(Datum a, Datum b, SortSupport ssup)
493 {
494 	return ApplySortComparator(a, false, b, false, ssup);
495 }
496 
497 /* simple counterpart to qsort_arg */
498 void *
bsearch_arg(const void * key,const void * base,size_t nmemb,size_t size,int (* compar)(const void *,const void *,void *),void * arg)499 bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size,
500 			int (*compar) (const void *, const void *, void *),
501 			void *arg)
502 {
503 	size_t		l,
504 				u,
505 				idx;
506 	const void *p;
507 	int			comparison;
508 
509 	l = 0;
510 	u = nmemb;
511 	while (l < u)
512 	{
513 		idx = (l + u) / 2;
514 		p = (void *) (((const char *) base) + (idx * size));
515 		comparison = (*compar) (key, p, arg);
516 
517 		if (comparison < 0)
518 			u = idx;
519 		else if (comparison > 0)
520 			l = idx + 1;
521 		else
522 			return (void *) p;
523 	}
524 
525 	return NULL;
526 }
527 
528 /*
529  * build_attnums_array
530  *		Transforms a bitmap into an array of AttrNumber values.
531  *
532  * This is used for extended statistics only, so all the attribute must be
533  * user-defined. That means offsetting by FirstLowInvalidHeapAttributeNumber
534  * is not necessary here (and when querying the bitmap).
535  */
536 AttrNumber *
build_attnums_array(Bitmapset * attrs,int * numattrs)537 build_attnums_array(Bitmapset *attrs, int *numattrs)
538 {
539 	int			i,
540 				j;
541 	AttrNumber *attnums;
542 	int			num = bms_num_members(attrs);
543 
544 	if (numattrs)
545 		*numattrs = num;
546 
547 	/* build attnums from the bitmapset */
548 	attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * num);
549 	i = 0;
550 	j = -1;
551 	while ((j = bms_next_member(attrs, j)) >= 0)
552 	{
553 		/*
554 		 * Make sure the bitmap contains only user-defined attributes. As
555 		 * bitmaps can't contain negative values, this can be violated in two
556 		 * ways. Firstly, the bitmap might contain 0 as a member, and secondly
557 		 * the integer value might be larger than MaxAttrNumber.
558 		 */
559 		Assert(AttrNumberIsForUserDefinedAttr(j));
560 		Assert(j <= MaxAttrNumber);
561 
562 		attnums[i++] = (AttrNumber) j;
563 
564 		/* protect against overflows */
565 		Assert(i <= num);
566 	}
567 
568 	return attnums;
569 }
570 
571 /*
572  * build_sorted_items
573  *		build a sorted array of SortItem with values from rows
574  *
575  * Note: All the memory is allocated in a single chunk, so that the caller
576  * can simply pfree the return value to release all of it.
577  */
578 SortItem *
build_sorted_items(int numrows,int * nitems,HeapTuple * rows,TupleDesc tdesc,MultiSortSupport mss,int numattrs,AttrNumber * attnums)579 build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
580 				   MultiSortSupport mss, int numattrs, AttrNumber *attnums)
581 {
582 	int			i,
583 				j,
584 				len,
585 				idx;
586 	int			nvalues = numrows * numattrs;
587 
588 	SortItem   *items;
589 	Datum	   *values;
590 	bool	   *isnull;
591 	char	   *ptr;
592 
593 	/* Compute the total amount of memory we need (both items and values). */
594 	len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
595 
596 	/* Allocate the memory and split it into the pieces. */
597 	ptr = palloc0(len);
598 
599 	/* items to sort */
600 	items = (SortItem *) ptr;
601 	ptr += numrows * sizeof(SortItem);
602 
603 	/* values and null flags */
604 	values = (Datum *) ptr;
605 	ptr += nvalues * sizeof(Datum);
606 
607 	isnull = (bool *) ptr;
608 	ptr += nvalues * sizeof(bool);
609 
610 	/* make sure we consumed the whole buffer exactly */
611 	Assert((ptr - (char *) items) == len);
612 
613 	/* fix the pointers to Datum and bool arrays */
614 	idx = 0;
615 	for (i = 0; i < numrows; i++)
616 	{
617 		bool		toowide = false;
618 
619 		items[idx].values = &values[idx * numattrs];
620 		items[idx].isnull = &isnull[idx * numattrs];
621 
622 		/* load the values/null flags from sample rows */
623 		for (j = 0; j < numattrs; j++)
624 		{
625 			Datum		value;
626 			bool		isnull;
627 
628 			value = heap_getattr(rows[i], attnums[j], tdesc, &isnull);
629 
630 			/*
631 			 * If this is a varlena value, check if it's too wide and if yes
632 			 * then skip the whole item. Otherwise detoast the value.
633 			 *
634 			 * XXX It may happen that we've already detoasted some preceding
635 			 * values for the current item. We don't bother to cleanup those
636 			 * on the assumption that those are small (below WIDTH_THRESHOLD)
637 			 * and will be discarded at the end of analyze.
638 			 */
639 			if ((!isnull) &&
640 				(TupleDescAttr(tdesc, attnums[j] - 1)->attlen == -1))
641 			{
642 				if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
643 				{
644 					toowide = true;
645 					break;
646 				}
647 
648 				value = PointerGetDatum(PG_DETOAST_DATUM(value));
649 			}
650 
651 			items[idx].values[j] = value;
652 			items[idx].isnull[j] = isnull;
653 		}
654 
655 		if (toowide)
656 			continue;
657 
658 		idx++;
659 	}
660 
661 	/* store the actual number of items (ignoring the too-wide ones) */
662 	*nitems = idx;
663 
664 	/* all items were too wide */
665 	if (idx == 0)
666 	{
667 		/* everything is allocated as a single chunk */
668 		pfree(items);
669 		return NULL;
670 	}
671 
672 	/* do the sort, using the multi-sort */
673 	qsort_arg((void *) items, idx, sizeof(SortItem),
674 			  multi_sort_compare, mss);
675 
676 	return items;
677 }
678 
679 /*
680  * has_stats_of_kind
681  *		Check whether the list contains statistic of a given kind
682  */
683 bool
has_stats_of_kind(List * stats,char requiredkind)684 has_stats_of_kind(List *stats, char requiredkind)
685 {
686 	ListCell   *l;
687 
688 	foreach(l, stats)
689 	{
690 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
691 
692 		if (stat->kind == requiredkind)
693 			return true;
694 	}
695 
696 	return false;
697 }
698 
699 /*
700  * choose_best_statistics
701  *		Look for and return statistics with the specified 'requiredkind' which
702  *		have keys that match at least two of the given attnums.  Return NULL if
703  *		there's no match.
704  *
705  * The current selection criteria is very simple - we choose the statistics
706  * object referencing the most attributes in covered (and still unestimated
707  * clauses), breaking ties in favor of objects with fewer keys overall.
708  *
709  * The clause_attnums is an array of bitmaps, storing attnums for individual
710  * clauses. A NULL element means the clause is either incompatible or already
711  * estimated.
712  *
713  * XXX If multiple statistics objects tie on both criteria, then which object
714  * is chosen depends on the order that they appear in the stats list. Perhaps
715  * further tiebreakers are needed.
716  */
717 StatisticExtInfo *
choose_best_statistics(List * stats,char requiredkind,Bitmapset ** clause_attnums,int nclauses)718 choose_best_statistics(List *stats, char requiredkind,
719 					   Bitmapset **clause_attnums, int nclauses)
720 {
721 	ListCell   *lc;
722 	StatisticExtInfo *best_match = NULL;
723 	int			best_num_matched = 2;	/* goal #1: maximize */
724 	int			best_match_keys = (STATS_MAX_DIMENSIONS + 1);	/* goal #2: minimize */
725 
726 	foreach(lc, stats)
727 	{
728 		int			i;
729 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
730 		Bitmapset  *matched = NULL;
731 		int			num_matched;
732 		int			numkeys;
733 
734 		/* skip statistics that are not of the correct type */
735 		if (info->kind != requiredkind)
736 			continue;
737 
738 		/*
739 		 * Collect attributes in remaining (unestimated) clauses fully covered
740 		 * by this statistic object.
741 		 */
742 		for (i = 0; i < nclauses; i++)
743 		{
744 			/* ignore incompatible/estimated clauses */
745 			if (!clause_attnums[i])
746 				continue;
747 
748 			/* ignore clauses that are not covered by this object */
749 			if (!bms_is_subset(clause_attnums[i], info->keys))
750 				continue;
751 
752 			matched = bms_add_members(matched, clause_attnums[i]);
753 		}
754 
755 		num_matched = bms_num_members(matched);
756 		bms_free(matched);
757 
758 		/*
759 		 * save the actual number of keys in the stats so that we can choose
760 		 * the narrowest stats with the most matching keys.
761 		 */
762 		numkeys = bms_num_members(info->keys);
763 
764 		/*
765 		 * Use this object when it increases the number of matched clauses or
766 		 * when it matches the same number of attributes but these stats have
767 		 * fewer keys than any previous match.
768 		 */
769 		if (num_matched > best_num_matched ||
770 			(num_matched == best_num_matched && numkeys < best_match_keys))
771 		{
772 			best_match = info;
773 			best_num_matched = num_matched;
774 			best_match_keys = numkeys;
775 		}
776 	}
777 
778 	return best_match;
779 }
780 
781 /*
782  * statext_is_compatible_clause_internal
783  *		Determines if the clause is compatible with MCV lists.
784  *
785  * Does the heavy lifting of actually inspecting the clauses for
786  * statext_is_compatible_clause. It needs to be split like this because
787  * of recursion.  The attnums bitmap is an input/output parameter collecting
788  * attribute numbers from all compatible clauses (recursively).
789  */
790 static bool
statext_is_compatible_clause_internal(PlannerInfo * root,Node * clause,Index relid,Bitmapset ** attnums)791 statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
792 									  Index relid, Bitmapset **attnums)
793 {
794 	/* Look inside any binary-compatible relabeling (as in examine_variable) */
795 	if (IsA(clause, RelabelType))
796 		clause = (Node *) ((RelabelType *) clause)->arg;
797 
798 	/* plain Var references (boolean Vars or recursive checks) */
799 	if (IsA(clause, Var))
800 	{
801 		Var		   *var = (Var *) clause;
802 
803 		/* Ensure var is from the correct relation */
804 		if (var->varno != relid)
805 			return false;
806 
807 		/* we also better ensure the Var is from the current level */
808 		if (var->varlevelsup > 0)
809 			return false;
810 
811 		/* Also skip system attributes (we don't allow stats on those). */
812 		if (!AttrNumberIsForUserDefinedAttr(var->varattno))
813 			return false;
814 
815 		*attnums = bms_add_member(*attnums, var->varattno);
816 
817 		return true;
818 	}
819 
820 	/* (Var op Const) or (Const op Var) */
821 	if (is_opclause(clause))
822 	{
823 		RangeTblEntry *rte = root->simple_rte_array[relid];
824 		OpExpr	   *expr = (OpExpr *) clause;
825 		Var		   *var;
826 
827 		/* Only expressions with two arguments are considered compatible. */
828 		if (list_length(expr->args) != 2)
829 			return false;
830 
831 		/* Check if the expression the right shape (one Var, one Const) */
832 		if (!examine_opclause_expression(expr, &var, NULL, NULL))
833 			return false;
834 
835 		/*
836 		 * If it's not one of the supported operators ("=", "<", ">", etc.),
837 		 * just ignore the clause, as it's not compatible with MCV lists.
838 		 *
839 		 * This uses the function for estimating selectivity, not the operator
840 		 * directly (a bit awkward, but well ...).
841 		 */
842 		switch (get_oprrest(expr->opno))
843 		{
844 			case F_EQSEL:
845 			case F_NEQSEL:
846 			case F_SCALARLTSEL:
847 			case F_SCALARLESEL:
848 			case F_SCALARGTSEL:
849 			case F_SCALARGESEL:
850 				/* supported, will continue with inspection of the Var */
851 				break;
852 
853 			default:
854 				/* other estimators are considered unknown/unsupported */
855 				return false;
856 		}
857 
858 		/*
859 		 * If there are any securityQuals on the RTE from security barrier
860 		 * views or RLS policies, then the user may not have access to all the
861 		 * table's data, and we must check that the operator is leak-proof.
862 		 *
863 		 * If the operator is leaky, then we must ignore this clause for the
864 		 * purposes of estimating with MCV lists, otherwise the operator might
865 		 * reveal values from the MCV list that the user doesn't have
866 		 * permission to see.
867 		 */
868 		if (rte->securityQuals != NIL &&
869 			!get_func_leakproof(get_opcode(expr->opno)))
870 			return false;
871 
872 		return statext_is_compatible_clause_internal(root, (Node *) var,
873 													 relid, attnums);
874 	}
875 
876 	/* AND/OR/NOT clause */
877 	if (is_andclause(clause) ||
878 		is_orclause(clause) ||
879 		is_notclause(clause))
880 	{
881 		/*
882 		 * AND/OR/NOT-clauses are supported if all sub-clauses are supported
883 		 *
884 		 * Perhaps we could improve this by handling mixed cases, when some of
885 		 * the clauses are supported and some are not. Selectivity for the
886 		 * supported subclauses would be computed using extended statistics,
887 		 * and the remaining clauses would be estimated using the traditional
888 		 * algorithm (product of selectivities).
889 		 *
890 		 * It however seems overly complex, and in a way we already do that
891 		 * because if we reject the whole clause as unsupported here, it will
892 		 * be eventually passed to clauselist_selectivity() which does exactly
893 		 * this (split into supported/unsupported clauses etc).
894 		 */
895 		BoolExpr   *expr = (BoolExpr *) clause;
896 		ListCell   *lc;
897 
898 		foreach(lc, expr->args)
899 		{
900 			/*
901 			 * Had we found incompatible clause in the arguments, treat the
902 			 * whole clause as incompatible.
903 			 */
904 			if (!statext_is_compatible_clause_internal(root,
905 													   (Node *) lfirst(lc),
906 													   relid, attnums))
907 				return false;
908 		}
909 
910 		return true;
911 	}
912 
913 	/* Var IS NULL */
914 	if (IsA(clause, NullTest))
915 	{
916 		NullTest   *nt = (NullTest *) clause;
917 
918 		/*
919 		 * Only simple (Var IS NULL) expressions supported for now. Maybe we
920 		 * could use examine_variable to fix this?
921 		 */
922 		if (!IsA(nt->arg, Var))
923 			return false;
924 
925 		return statext_is_compatible_clause_internal(root, (Node *) (nt->arg),
926 													 relid, attnums);
927 	}
928 
929 	return false;
930 }
931 
932 /*
933  * statext_is_compatible_clause
934  *		Determines if the clause is compatible with MCV lists.
935  *
936  * Currently, we only support three types of clauses:
937  *
938  * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
939  * is one of ("=", "<", ">", ">=", "<=")
940  *
941  * (b) (Var IS [NOT] NULL)
942  *
943  * (c) combinations using AND/OR/NOT
944  *
945  * In the future, the range of supported clauses may be expanded to more
946  * complex cases, for example (Var op Var).
947  */
948 static bool
statext_is_compatible_clause(PlannerInfo * root,Node * clause,Index relid,Bitmapset ** attnums)949 statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
950 							 Bitmapset **attnums)
951 {
952 	RangeTblEntry *rte = root->simple_rte_array[relid];
953 	RestrictInfo *rinfo = (RestrictInfo *) clause;
954 	Oid			userid;
955 
956 	if (!IsA(rinfo, RestrictInfo))
957 		return false;
958 
959 	/* Pseudoconstants are not really interesting here. */
960 	if (rinfo->pseudoconstant)
961 		return false;
962 
963 	/* clauses referencing multiple varnos are incompatible */
964 	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
965 		return false;
966 
967 	/* Check the clause and determine what attributes it references. */
968 	if (!statext_is_compatible_clause_internal(root, (Node *) rinfo->clause,
969 											   relid, attnums))
970 		return false;
971 
972 	/*
973 	 * Check that the user has permission to read all these attributes.  Use
974 	 * checkAsUser if it's set, in case we're accessing the table via a view.
975 	 */
976 	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
977 
978 	if (pg_class_aclcheck(rte->relid, userid, ACL_SELECT) != ACLCHECK_OK)
979 	{
980 		/* Don't have table privilege, must check individual columns */
981 		if (bms_is_member(InvalidAttrNumber, *attnums))
982 		{
983 			/* Have a whole-row reference, must have access to all columns */
984 			if (pg_attribute_aclcheck_all(rte->relid, userid, ACL_SELECT,
985 										  ACLMASK_ALL) != ACLCHECK_OK)
986 				return false;
987 		}
988 		else
989 		{
990 			/* Check the columns referenced by the clause */
991 			int			attnum = -1;
992 
993 			while ((attnum = bms_next_member(*attnums, attnum)) >= 0)
994 			{
995 				if (pg_attribute_aclcheck(rte->relid, attnum, userid,
996 										  ACL_SELECT) != ACLCHECK_OK)
997 					return false;
998 			}
999 		}
1000 	}
1001 
1002 	/* If we reach here, the clause is OK */
1003 	return true;
1004 }
1005 
1006 /*
1007  * statext_mcv_clauselist_selectivity
1008  *		Estimate clauses using the best multi-column statistics.
1009  *
1010  * Selects the best extended (multi-column) statistic on a table (measured by
1011  * the number of attributes extracted from the clauses and covered by it), and
1012  * computes the selectivity for the supplied clauses.
1013  *
1014  * One of the main challenges with using MCV lists is how to extrapolate the
1015  * estimate to the data not covered by the MCV list. To do that, we compute
1016  * not only the "MCV selectivity" (selectivities for MCV items matching the
1017  * supplied clauses), but also a couple of derived selectivities:
1018  *
1019  * - simple selectivity:  Computed without extended statistic, i.e. as if the
1020  * columns/clauses were independent
1021  *
1022  * - base selectivity:  Similar to simple selectivity, but is computed using
1023  * the extended statistic by adding up the base frequencies (that we compute
1024  * and store for each MCV item) of matching MCV items.
1025  *
1026  * - total selectivity: Selectivity covered by the whole MCV list.
1027  *
1028  * - other selectivity: A selectivity estimate for data not covered by the MCV
1029  * list (i.e. satisfying the clauses, but not common enough to make it into
1030  * the MCV list)
1031  *
1032  * Note: While simple and base selectivities are defined in a quite similar
1033  * way, the values are computed differently and are not therefore equal. The
1034  * simple selectivity is computed as a product of per-clause estimates, while
1035  * the base selectivity is computed by adding up base frequencies of matching
1036  * items of the multi-column MCV list. So the values may differ for two main
1037  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1038  * the MCV items did not match the estimated clauses.
1039  *
1040  * As both (a) and (b) reduce the base selectivity value, it generally holds
1041  * that (simple_selectivity >= base_selectivity). If the MCV list covers all
1042  * the data, the values may be equal.
1043  *
1044  * So, (simple_selectivity - base_selectivity) is an estimate for the part
1045  * not covered by the MCV list, and (mcv_selectivity - base_selectivity) may
1046  * be seen as a correction for the part covered by the MCV list. Those two
1047  * statements are actually equivalent.
1048  *
1049  * Note: Due to rounding errors and minor differences in how the estimates
1050  * are computed, the inequality may not always hold. Which is why we clamp
1051  * the selectivities to prevent strange estimate (negative etc.).
1052  *
1053  * 'estimatedclauses' is an input/output parameter.  We set bits for the
1054  * 0-based 'clauses' indexes we estimate for and also skip clause items that
1055  * already have a bit set.
1056  *
1057  * XXX If we were to use multiple statistics, this is where it would happen.
1058  * We would simply repeat this on a loop on the "remaining" clauses, possibly
1059  * using the already estimated clauses as conditions (and combining the values
1060  * using conditional probability formula).
1061  */
1062 static Selectivity
statext_mcv_clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Bitmapset ** estimatedclauses)1063 statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
1064 								   JoinType jointype, SpecialJoinInfo *sjinfo,
1065 								   RelOptInfo *rel, Bitmapset **estimatedclauses)
1066 {
1067 	ListCell   *l;
1068 	Bitmapset **list_attnums;
1069 	int			listidx;
1070 	StatisticExtInfo *stat;
1071 	List	   *stat_clauses;
1072 	Selectivity simple_sel,
1073 				mcv_sel,
1074 				mcv_basesel,
1075 				mcv_totalsel,
1076 				other_sel,
1077 				sel;
1078 
1079 	/* check if there's any stats that might be useful for us. */
1080 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
1081 		return 1.0;
1082 
1083 	list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
1084 										 list_length(clauses));
1085 
1086 	/*
1087 	 * Pre-process the clauses list to extract the attnums seen in each item.
1088 	 * We need to determine if there's any clauses which will be useful for
1089 	 * selectivity estimations with extended stats. Along the way we'll record
1090 	 * all of the attnums for each clause in a list which we'll reference
1091 	 * later so we don't need to repeat the same work again. We'll also keep
1092 	 * track of all attnums seen.
1093 	 *
1094 	 * We also skip clauses that we already estimated using different types of
1095 	 * statistics (we treat them as incompatible).
1096 	 */
1097 	listidx = 0;
1098 	foreach(l, clauses)
1099 	{
1100 		Node	   *clause = (Node *) lfirst(l);
1101 		Bitmapset  *attnums = NULL;
1102 
1103 		if (!bms_is_member(listidx, *estimatedclauses) &&
1104 			statext_is_compatible_clause(root, clause, rel->relid, &attnums))
1105 			list_attnums[listidx] = attnums;
1106 		else
1107 			list_attnums[listidx] = NULL;
1108 
1109 		listidx++;
1110 	}
1111 
1112 	/* find the best suited statistics object for these attnums */
1113 	stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
1114 								  list_attnums, list_length(clauses));
1115 
1116 	/* if no matching stats could be found then we've nothing to do */
1117 	if (!stat)
1118 		return 1.0;
1119 
1120 	/* Ensure choose_best_statistics produced an expected stats type. */
1121 	Assert(stat->kind == STATS_EXT_MCV);
1122 
1123 	/* now filter the clauses to be estimated using the selected MCV */
1124 	stat_clauses = NIL;
1125 
1126 	listidx = 0;
1127 	foreach(l, clauses)
1128 	{
1129 		/*
1130 		 * If the clause is compatible with the selected statistics, mark it
1131 		 * as estimated and add it to the list to estimate.
1132 		 */
1133 		if (list_attnums[listidx] != NULL &&
1134 			bms_is_subset(list_attnums[listidx], stat->keys))
1135 		{
1136 			stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
1137 			*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1138 		}
1139 
1140 		listidx++;
1141 	}
1142 
1143 	/*
1144 	 * First compute "simple" selectivity, i.e. without the extended
1145 	 * statistics, and essentially assuming independence of the
1146 	 * columns/clauses. We'll then use the various selectivities computed from
1147 	 * MCV list to improve it.
1148 	 */
1149 	simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid,
1150 											   jointype, sjinfo, NULL);
1151 
1152 	/*
1153 	 * Now compute the multi-column estimate from the MCV list, along with the
1154 	 * other selectivities (base & total selectivity).
1155 	 */
1156 	mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid,
1157 										 jointype, sjinfo, rel,
1158 										 &mcv_basesel, &mcv_totalsel);
1159 
1160 	/* Estimated selectivity of values not covered by MCV matches */
1161 	other_sel = simple_sel - mcv_basesel;
1162 	CLAMP_PROBABILITY(other_sel);
1163 
1164 	/* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
1165 	if (other_sel > 1.0 - mcv_totalsel)
1166 		other_sel = 1.0 - mcv_totalsel;
1167 
1168 	/* Overall selectivity is the combination of MCV and non-MCV estimates. */
1169 	sel = mcv_sel + other_sel;
1170 	CLAMP_PROBABILITY(sel);
1171 
1172 	return sel;
1173 }
1174 
1175 /*
1176  * statext_clauselist_selectivity
1177  *		Estimate clauses using the best multi-column statistics.
1178  */
1179 Selectivity
statext_clauselist_selectivity(PlannerInfo * root,List * clauses,int varRelid,JoinType jointype,SpecialJoinInfo * sjinfo,RelOptInfo * rel,Bitmapset ** estimatedclauses)1180 statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
1181 							   JoinType jointype, SpecialJoinInfo *sjinfo,
1182 							   RelOptInfo *rel, Bitmapset **estimatedclauses)
1183 {
1184 	Selectivity sel;
1185 
1186 	/* First, try estimating clauses using a multivariate MCV list. */
1187 	sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
1188 											 sjinfo, rel, estimatedclauses);
1189 
1190 	/*
1191 	 * Then, apply functional dependencies on the remaining clauses by calling
1192 	 * dependencies_clauselist_selectivity.  Pass 'estimatedclauses' so the
1193 	 * function can properly skip clauses already estimated above.
1194 	 *
1195 	 * The reasoning for applying dependencies last is that the more complex
1196 	 * stats can track more complex correlations between the attributes, and
1197 	 * so may be considered more reliable.
1198 	 *
1199 	 * For example, MCV list can give us an exact selectivity for values in
1200 	 * two columns, while functional dependencies can only provide information
1201 	 * about the overall strength of the dependency.
1202 	 */
1203 	sel *= dependencies_clauselist_selectivity(root, clauses, varRelid,
1204 											   jointype, sjinfo, rel,
1205 											   estimatedclauses);
1206 
1207 	return sel;
1208 }
1209 
1210 /*
1211  * examine_operator_expression
1212  *		Split expression into Var and Const parts.
1213  *
1214  * Attempts to match the arguments to either (Var op Const) or (Const op Var),
1215  * possibly with a RelabelType on top. When the expression matches this form,
1216  * returns true, otherwise returns false.
1217  *
1218  * Optionally returns pointers to the extracted Var/Const nodes, when passed
1219  * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
1220  * on which side of the operator we found the Var node.
1221  */
1222 bool
examine_opclause_expression(OpExpr * expr,Var ** varp,Const ** cstp,bool * varonleftp)1223 examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
1224 {
1225 	Var	   *var;
1226 	Const  *cst;
1227 	bool	varonleft;
1228 	Node   *leftop,
1229 		   *rightop;
1230 
1231 	/* enforced by statext_is_compatible_clause_internal */
1232 	Assert(list_length(expr->args) == 2);
1233 
1234 	leftop = linitial(expr->args);
1235 	rightop = lsecond(expr->args);
1236 
1237 	/* strip RelabelType from either side of the expression */
1238 	if (IsA(leftop, RelabelType))
1239 		leftop = (Node *) ((RelabelType *) leftop)->arg;
1240 
1241 	if (IsA(rightop, RelabelType))
1242 		rightop = (Node *) ((RelabelType *) rightop)->arg;
1243 
1244 	if (IsA(leftop, Var) && IsA(rightop, Const))
1245 	{
1246 		var = (Var *) leftop;
1247 		cst = (Const *) rightop;
1248 		varonleft = true;
1249 	}
1250 	else if (IsA(leftop, Const) && IsA(rightop, Var))
1251 	{
1252 		var = (Var *) rightop;
1253 		cst = (Const *) leftop;
1254 		varonleft = false;
1255 	}
1256 	else
1257 		return false;
1258 
1259 	/* return pointers to the extracted parts if requested */
1260 	if (varp)
1261 		*varp = var;
1262 
1263 	if (cstp)
1264 		*cstp = cst;
1265 
1266 	if (varonleftp)
1267 		*varonleftp = varonleft;
1268 
1269 	return true;
1270 }
1271