1 /*-------------------------------------------------------------------------
2  *
3  * extended_stats.c
4  *	  POSTGRES extended statistics
5  *
6  * Generic code supporting statistics objects created via CREATE STATISTICS.
7  *
8  *
9  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
10  * Portions Copyright (c) 1994, Regents of the University of California
11  *
12  * IDENTIFICATION
13  *	  src/backend/statistics/extended_stats.c
14  *
15  *-------------------------------------------------------------------------
16  */
17 #include "postgres.h"
18 
19 #include "access/genam.h"
20 #include "access/heapam.h"
21 #include "access/htup_details.h"
22 #include "catalog/indexing.h"
23 #include "catalog/pg_collation.h"
24 #include "catalog/pg_statistic_ext.h"
25 #include "nodes/relation.h"
26 #include "postmaster/autovacuum.h"
27 #include "statistics/extended_stats_internal.h"
28 #include "statistics/statistics.h"
29 #include "utils/builtins.h"
30 #include "utils/fmgroids.h"
31 #include "utils/lsyscache.h"
32 #include "utils/memutils.h"
33 #include "utils/rel.h"
34 #include "utils/syscache.h"
35 
36 
37 /*
38  * Used internally to refer to an individual statistics object, i.e.,
39  * a pg_statistic_ext entry.
40  */
41 typedef struct StatExtEntry
42 {
43 	Oid			statOid;		/* OID of pg_statistic_ext entry */
44 	char	   *schema;			/* statistics object's schema */
45 	char	   *name;			/* statistics object's name */
46 	Bitmapset  *columns;		/* attribute numbers covered by the object */
47 	List	   *types;			/* 'char' list of enabled statistics kinds */
48 } StatExtEntry;
49 
50 
51 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
52 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
53 					  int nvacatts, VacAttrStats **vacatts);
54 static void statext_store(Relation pg_stext, Oid relid,
55 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
56 			  VacAttrStats **stats);
57 
58 
59 /*
60  * Compute requested extended stats, using the rows sampled for the plain
61  * (single-column) stats.
62  *
63  * This fetches a list of stats types from pg_statistic_ext, computes the
64  * requested stats, and serializes them back into the catalog.
65  */
66 void
BuildRelationExtStatistics(Relation onerel,double totalrows,int numrows,HeapTuple * rows,int natts,VacAttrStats ** vacattrstats)67 BuildRelationExtStatistics(Relation onerel, double totalrows,
68 						   int numrows, HeapTuple *rows,
69 						   int natts, VacAttrStats **vacattrstats)
70 {
71 	Relation	pg_stext;
72 	ListCell   *lc;
73 	List	   *stats;
74 	MemoryContext cxt;
75 	MemoryContext oldcxt;
76 
77 	pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
78 	stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
79 
80 	/* memory context for building each statistics object */
81 	cxt = AllocSetContextCreate(CurrentMemoryContext,
82 								"BuildRelationExtStatistics",
83 								ALLOCSET_DEFAULT_SIZES);
84 	oldcxt = MemoryContextSwitchTo(cxt);
85 
86 	foreach(lc, stats)
87 	{
88 		StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
89 		MVNDistinct *ndistinct = NULL;
90 		MVDependencies *dependencies = NULL;
91 		VacAttrStats **stats;
92 		ListCell   *lc2;
93 
94 		/*
95 		 * Check if we can build these stats based on the column analyzed. If
96 		 * not, report this fact (except in autovacuum) and move on.
97 		 */
98 		stats = lookup_var_attr_stats(onerel, stat->columns,
99 									  natts, vacattrstats);
100 		if (!stats)
101 		{
102 			if (!IsAutoVacuumWorkerProcess())
103 				ereport(WARNING,
104 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
105 						 errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
106 								stat->schema, stat->name,
107 								get_namespace_name(onerel->rd_rel->relnamespace),
108 								RelationGetRelationName(onerel)),
109 						 errtable(onerel)));
110 			continue;
111 		}
112 
113 		/* check allowed number of dimensions */
114 		Assert(bms_num_members(stat->columns) >= 2 &&
115 			   bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
116 
117 		/* compute statistic of each requested type */
118 		foreach(lc2, stat->types)
119 		{
120 			char		t = (char) lfirst_int(lc2);
121 
122 			if (t == STATS_EXT_NDISTINCT)
123 				ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
124 													stat->columns, stats);
125 			else if (t == STATS_EXT_DEPENDENCIES)
126 				dependencies = statext_dependencies_build(numrows, rows,
127 														  stat->columns, stats);
128 		}
129 
130 		/* store the statistics in the catalog */
131 		statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
132 
133 		/* free the data used for building this statistics object */
134 		MemoryContextReset(cxt);
135 	}
136 
137 	MemoryContextSwitchTo(oldcxt);
138 	MemoryContextDelete(cxt);
139 
140 	list_free(stats);
141 
142 	heap_close(pg_stext, RowExclusiveLock);
143 }
144 
145 /*
146  * statext_is_kind_built
147  *		Is this stat kind built in the given pg_statistic_ext tuple?
148  */
149 bool
statext_is_kind_built(HeapTuple htup,char type)150 statext_is_kind_built(HeapTuple htup, char type)
151 {
152 	AttrNumber	attnum;
153 
154 	switch (type)
155 	{
156 		case STATS_EXT_NDISTINCT:
157 			attnum = Anum_pg_statistic_ext_stxndistinct;
158 			break;
159 
160 		case STATS_EXT_DEPENDENCIES:
161 			attnum = Anum_pg_statistic_ext_stxdependencies;
162 			break;
163 
164 		default:
165 			elog(ERROR, "unexpected statistics type requested: %d", type);
166 	}
167 
168 	return !heap_attisnull(htup, attnum, NULL);
169 }
170 
171 /*
172  * Return a list (of StatExtEntry) of statistics objects for the given relation.
173  */
174 static List *
fetch_statentries_for_relation(Relation pg_statext,Oid relid)175 fetch_statentries_for_relation(Relation pg_statext, Oid relid)
176 {
177 	SysScanDesc scan;
178 	ScanKeyData skey;
179 	HeapTuple	htup;
180 	List	   *result = NIL;
181 
182 	/*
183 	 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
184 	 * rel.
185 	 */
186 	ScanKeyInit(&skey,
187 				Anum_pg_statistic_ext_stxrelid,
188 				BTEqualStrategyNumber, F_OIDEQ,
189 				ObjectIdGetDatum(relid));
190 
191 	scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
192 							  NULL, 1, &skey);
193 
194 	while (HeapTupleIsValid(htup = systable_getnext(scan)))
195 	{
196 		StatExtEntry *entry;
197 		Datum		datum;
198 		bool		isnull;
199 		int			i;
200 		ArrayType  *arr;
201 		char	   *enabled;
202 		Form_pg_statistic_ext staForm;
203 
204 		entry = palloc0(sizeof(StatExtEntry));
205 		entry->statOid = HeapTupleGetOid(htup);
206 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
207 		entry->schema = get_namespace_name(staForm->stxnamespace);
208 		entry->name = pstrdup(NameStr(staForm->stxname));
209 		for (i = 0; i < staForm->stxkeys.dim1; i++)
210 		{
211 			entry->columns = bms_add_member(entry->columns,
212 											staForm->stxkeys.values[i]);
213 		}
214 
215 		/* decode the stxkind char array into a list of chars */
216 		datum = SysCacheGetAttr(STATEXTOID, htup,
217 								Anum_pg_statistic_ext_stxkind, &isnull);
218 		Assert(!isnull);
219 		arr = DatumGetArrayTypeP(datum);
220 		if (ARR_NDIM(arr) != 1 ||
221 			ARR_HASNULL(arr) ||
222 			ARR_ELEMTYPE(arr) != CHAROID)
223 			elog(ERROR, "stxkind is not a 1-D char array");
224 		enabled = (char *) ARR_DATA_PTR(arr);
225 		for (i = 0; i < ARR_DIMS(arr)[0]; i++)
226 		{
227 			Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
228 				   (enabled[i] == STATS_EXT_DEPENDENCIES));
229 			entry->types = lappend_int(entry->types, (int) enabled[i]);
230 		}
231 
232 		result = lappend(result, entry);
233 	}
234 
235 	systable_endscan(scan);
236 
237 	return result;
238 }
239 
240 /*
241  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
242  * VacAttrStats array which includes only the items corresponding to
243  * attributes indicated by 'stxkeys'. If we don't have all of the per column
244  * stats available to compute the extended stats, then we return NULL to indicate
245  * to the caller that the stats should not be built.
246  */
247 static VacAttrStats **
lookup_var_attr_stats(Relation rel,Bitmapset * attrs,int nvacatts,VacAttrStats ** vacatts)248 lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
249 					  int nvacatts, VacAttrStats **vacatts)
250 {
251 	int			i = 0;
252 	int			x = -1;
253 	VacAttrStats **stats;
254 
255 	stats = (VacAttrStats **)
256 		palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
257 
258 	/* lookup VacAttrStats info for the requested columns (same attnum) */
259 	while ((x = bms_next_member(attrs, x)) >= 0)
260 	{
261 		int			j;
262 
263 		stats[i] = NULL;
264 		for (j = 0; j < nvacatts; j++)
265 		{
266 			if (x == vacatts[j]->tupattnum)
267 			{
268 				stats[i] = vacatts[j];
269 				break;
270 			}
271 		}
272 
273 		if (!stats[i])
274 		{
275 			/*
276 			 * Looks like stats were not gathered for one of the columns
277 			 * required. We'll be unable to build the extended stats without
278 			 * this column.
279 			 */
280 			pfree(stats);
281 			return NULL;
282 		}
283 
284 		/*
285 		 * Sanity check that the column is not dropped - stats should have
286 		 * been removed in this case.
287 		 */
288 		Assert(!stats[i]->attr->attisdropped);
289 
290 		i++;
291 	}
292 
293 	return stats;
294 }
295 
296 /*
297  * statext_store
298  *	Serializes the statistics and stores them into the pg_statistic_ext tuple.
299  */
300 static void
statext_store(Relation pg_stext,Oid statOid,MVNDistinct * ndistinct,MVDependencies * dependencies,VacAttrStats ** stats)301 statext_store(Relation pg_stext, Oid statOid,
302 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
303 			  VacAttrStats **stats)
304 {
305 	HeapTuple	stup,
306 				oldtup;
307 	Datum		values[Natts_pg_statistic_ext];
308 	bool		nulls[Natts_pg_statistic_ext];
309 	bool		replaces[Natts_pg_statistic_ext];
310 
311 	memset(nulls, true, sizeof(nulls));
312 	memset(replaces, false, sizeof(replaces));
313 	memset(values, 0, sizeof(values));
314 
315 	/*
316 	 * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
317 	 */
318 	if (ndistinct != NULL)
319 	{
320 		bytea	   *data = statext_ndistinct_serialize(ndistinct);
321 
322 		nulls[Anum_pg_statistic_ext_stxndistinct - 1] = (data == NULL);
323 		values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(data);
324 	}
325 
326 	if (dependencies != NULL)
327 	{
328 		bytea	   *data = statext_dependencies_serialize(dependencies);
329 
330 		nulls[Anum_pg_statistic_ext_stxdependencies - 1] = (data == NULL);
331 		values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data);
332 	}
333 
334 	/* always replace the value (either by bytea or NULL) */
335 	replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
336 	replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
337 
338 	/* there should already be a pg_statistic_ext tuple */
339 	oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
340 	if (!HeapTupleIsValid(oldtup))
341 		elog(ERROR, "cache lookup failed for statistics object %u", statOid);
342 
343 	/* replace it */
344 	stup = heap_modify_tuple(oldtup,
345 							 RelationGetDescr(pg_stext),
346 							 values,
347 							 nulls,
348 							 replaces);
349 	ReleaseSysCache(oldtup);
350 	CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
351 
352 	heap_freetuple(stup);
353 }
354 
355 /* initialize multi-dimensional sort */
356 MultiSortSupport
multi_sort_init(int ndims)357 multi_sort_init(int ndims)
358 {
359 	MultiSortSupport mss;
360 
361 	Assert(ndims >= 2);
362 
363 	mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
364 									 + sizeof(SortSupportData) * ndims);
365 
366 	mss->ndims = ndims;
367 
368 	return mss;
369 }
370 
371 /*
372  * Prepare sort support info using the given sort operator
373  * at the position 'sortdim'
374  */
375 void
multi_sort_add_dimension(MultiSortSupport mss,int sortdim,Oid oper)376 multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
377 {
378 	SortSupport ssup = &mss->ssup[sortdim];
379 
380 	ssup->ssup_cxt = CurrentMemoryContext;
381 	ssup->ssup_collation = DEFAULT_COLLATION_OID;
382 	ssup->ssup_nulls_first = false;
383 	ssup->ssup_cxt = CurrentMemoryContext;
384 
385 	PrepareSortSupportFromOrderingOp(oper, ssup);
386 }
387 
388 /* compare all the dimensions in the selected order */
389 int
multi_sort_compare(const void * a,const void * b,void * arg)390 multi_sort_compare(const void *a, const void *b, void *arg)
391 {
392 	MultiSortSupport mss = (MultiSortSupport) arg;
393 	SortItem   *ia = (SortItem *) a;
394 	SortItem   *ib = (SortItem *) b;
395 	int			i;
396 
397 	for (i = 0; i < mss->ndims; i++)
398 	{
399 		int			compare;
400 
401 		compare = ApplySortComparator(ia->values[i], ia->isnull[i],
402 									  ib->values[i], ib->isnull[i],
403 									  &mss->ssup[i]);
404 
405 		if (compare != 0)
406 			return compare;
407 	}
408 
409 	/* equal by default */
410 	return 0;
411 }
412 
413 /* compare selected dimension */
414 int
multi_sort_compare_dim(int dim,const SortItem * a,const SortItem * b,MultiSortSupport mss)415 multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
416 					   MultiSortSupport mss)
417 {
418 	return ApplySortComparator(a->values[dim], a->isnull[dim],
419 							   b->values[dim], b->isnull[dim],
420 							   &mss->ssup[dim]);
421 }
422 
423 int
multi_sort_compare_dims(int start,int end,const SortItem * a,const SortItem * b,MultiSortSupport mss)424 multi_sort_compare_dims(int start, int end,
425 						const SortItem *a, const SortItem *b,
426 						MultiSortSupport mss)
427 {
428 	int			dim;
429 
430 	for (dim = start; dim <= end; dim++)
431 	{
432 		int			r = ApplySortComparator(a->values[dim], a->isnull[dim],
433 											b->values[dim], b->isnull[dim],
434 											&mss->ssup[dim]);
435 
436 		if (r != 0)
437 			return r;
438 	}
439 
440 	return 0;
441 }
442 
443 /*
444  * has_stats_of_kind
445  *		Check whether the list contains statistic of a given kind
446  */
447 bool
has_stats_of_kind(List * stats,char requiredkind)448 has_stats_of_kind(List *stats, char requiredkind)
449 {
450 	ListCell   *l;
451 
452 	foreach(l, stats)
453 	{
454 		StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
455 
456 		if (stat->kind == requiredkind)
457 			return true;
458 	}
459 
460 	return false;
461 }
462 
463 /*
464  * choose_best_statistics
465  *		Look for and return statistics with the specified 'requiredkind' which
466  *		have keys that match at least two of the given attnums.  Return NULL if
467  *		there's no match.
468  *
469  * The current selection criteria is very simple - we choose the statistics
470  * object referencing the most of the requested attributes, breaking ties
471  * in favor of objects with fewer keys overall.
472  *
473  * XXX if multiple statistics objects tie on both criteria, then which object
474  * is chosen depends on the order that they appear in the stats list. Perhaps
475  * further tiebreakers are needed.
476  */
477 StatisticExtInfo *
choose_best_statistics(List * stats,Bitmapset * attnums,char requiredkind)478 choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
479 {
480 	ListCell   *lc;
481 	StatisticExtInfo *best_match = NULL;
482 	int			best_num_matched = 2;	/* goal #1: maximize */
483 	int			best_match_keys = (STATS_MAX_DIMENSIONS + 1);	/* goal #2: minimize */
484 
485 	foreach(lc, stats)
486 	{
487 		StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
488 		int			num_matched;
489 		int			numkeys;
490 		Bitmapset  *matched;
491 
492 		/* skip statistics that are not of the correct type */
493 		if (info->kind != requiredkind)
494 			continue;
495 
496 		/* determine how many attributes of these stats can be matched to */
497 		matched = bms_intersect(attnums, info->keys);
498 		num_matched = bms_num_members(matched);
499 		bms_free(matched);
500 
501 		/*
502 		 * save the actual number of keys in the stats so that we can choose
503 		 * the narrowest stats with the most matching keys.
504 		 */
505 		numkeys = bms_num_members(info->keys);
506 
507 		/*
508 		 * Use this object when it increases the number of matched clauses or
509 		 * when it matches the same number of attributes but these stats have
510 		 * fewer keys than any previous match.
511 		 */
512 		if (num_matched > best_num_matched ||
513 			(num_matched == best_num_matched && numkeys < best_match_keys))
514 		{
515 			best_match = info;
516 			best_num_matched = num_matched;
517 			best_match_keys = numkeys;
518 		}
519 	}
520 
521 	return best_match;
522 }
523