1 /*-------------------------------------------------------------------------
2 *
3 * extended_stats.c
4 * POSTGRES extended statistics
5 *
6 * Generic code supporting statistics objects created via CREATE STATISTICS.
7 *
8 *
9 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
11 *
12 * IDENTIFICATION
13 * src/backend/statistics/extended_stats.c
14 *
15 *-------------------------------------------------------------------------
16 */
17 #include "postgres.h"
18
19 #include "access/genam.h"
20 #include "access/heapam.h"
21 #include "access/htup_details.h"
22 #include "catalog/indexing.h"
23 #include "catalog/pg_collation.h"
24 #include "catalog/pg_statistic_ext.h"
25 #include "nodes/relation.h"
26 #include "postmaster/autovacuum.h"
27 #include "statistics/extended_stats_internal.h"
28 #include "statistics/statistics.h"
29 #include "utils/builtins.h"
30 #include "utils/fmgroids.h"
31 #include "utils/lsyscache.h"
32 #include "utils/memutils.h"
33 #include "utils/rel.h"
34 #include "utils/syscache.h"
35
36
37 /*
38 * Used internally to refer to an individual statistics object, i.e.,
39 * a pg_statistic_ext entry.
40 */
41 typedef struct StatExtEntry
42 {
43 Oid statOid; /* OID of pg_statistic_ext entry */
44 char *schema; /* statistics object's schema */
45 char *name; /* statistics object's name */
46 Bitmapset *columns; /* attribute numbers covered by the object */
47 List *types; /* 'char' list of enabled statistics kinds */
48 } StatExtEntry;
49
50
51 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
52 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
53 int nvacatts, VacAttrStats **vacatts);
54 static void statext_store(Relation pg_stext, Oid relid,
55 MVNDistinct *ndistinct, MVDependencies *dependencies,
56 VacAttrStats **stats);
57
58
59 /*
60 * Compute requested extended stats, using the rows sampled for the plain
61 * (single-column) stats.
62 *
63 * This fetches a list of stats types from pg_statistic_ext, computes the
64 * requested stats, and serializes them back into the catalog.
65 */
66 void
BuildRelationExtStatistics(Relation onerel,double totalrows,int numrows,HeapTuple * rows,int natts,VacAttrStats ** vacattrstats)67 BuildRelationExtStatistics(Relation onerel, double totalrows,
68 int numrows, HeapTuple *rows,
69 int natts, VacAttrStats **vacattrstats)
70 {
71 Relation pg_stext;
72 ListCell *lc;
73 List *stats;
74 MemoryContext cxt;
75 MemoryContext oldcxt;
76
77 pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
78 stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
79
80 /* memory context for building each statistics object */
81 cxt = AllocSetContextCreate(CurrentMemoryContext,
82 "BuildRelationExtStatistics",
83 ALLOCSET_DEFAULT_SIZES);
84 oldcxt = MemoryContextSwitchTo(cxt);
85
86 foreach(lc, stats)
87 {
88 StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
89 MVNDistinct *ndistinct = NULL;
90 MVDependencies *dependencies = NULL;
91 VacAttrStats **stats;
92 ListCell *lc2;
93
94 /*
95 * Check if we can build these stats based on the column analyzed. If
96 * not, report this fact (except in autovacuum) and move on.
97 */
98 stats = lookup_var_attr_stats(onerel, stat->columns,
99 natts, vacattrstats);
100 if (!stats)
101 {
102 if (!IsAutoVacuumWorkerProcess())
103 ereport(WARNING,
104 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
105 errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
106 stat->schema, stat->name,
107 get_namespace_name(onerel->rd_rel->relnamespace),
108 RelationGetRelationName(onerel)),
109 errtable(onerel)));
110 continue;
111 }
112
113 /* check allowed number of dimensions */
114 Assert(bms_num_members(stat->columns) >= 2 &&
115 bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
116
117 /* compute statistic of each requested type */
118 foreach(lc2, stat->types)
119 {
120 char t = (char) lfirst_int(lc2);
121
122 if (t == STATS_EXT_NDISTINCT)
123 ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
124 stat->columns, stats);
125 else if (t == STATS_EXT_DEPENDENCIES)
126 dependencies = statext_dependencies_build(numrows, rows,
127 stat->columns, stats);
128 }
129
130 /* store the statistics in the catalog */
131 statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
132
133 /* free the data used for building this statistics object */
134 MemoryContextReset(cxt);
135 }
136
137 MemoryContextSwitchTo(oldcxt);
138 MemoryContextDelete(cxt);
139
140 list_free(stats);
141
142 heap_close(pg_stext, RowExclusiveLock);
143 }
144
145 /*
146 * statext_is_kind_built
147 * Is this stat kind built in the given pg_statistic_ext tuple?
148 */
149 bool
statext_is_kind_built(HeapTuple htup,char type)150 statext_is_kind_built(HeapTuple htup, char type)
151 {
152 AttrNumber attnum;
153
154 switch (type)
155 {
156 case STATS_EXT_NDISTINCT:
157 attnum = Anum_pg_statistic_ext_stxndistinct;
158 break;
159
160 case STATS_EXT_DEPENDENCIES:
161 attnum = Anum_pg_statistic_ext_stxdependencies;
162 break;
163
164 default:
165 elog(ERROR, "unexpected statistics type requested: %d", type);
166 }
167
168 return !heap_attisnull(htup, attnum, NULL);
169 }
170
171 /*
172 * Return a list (of StatExtEntry) of statistics objects for the given relation.
173 */
174 static List *
fetch_statentries_for_relation(Relation pg_statext,Oid relid)175 fetch_statentries_for_relation(Relation pg_statext, Oid relid)
176 {
177 SysScanDesc scan;
178 ScanKeyData skey;
179 HeapTuple htup;
180 List *result = NIL;
181
182 /*
183 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
184 * rel.
185 */
186 ScanKeyInit(&skey,
187 Anum_pg_statistic_ext_stxrelid,
188 BTEqualStrategyNumber, F_OIDEQ,
189 ObjectIdGetDatum(relid));
190
191 scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
192 NULL, 1, &skey);
193
194 while (HeapTupleIsValid(htup = systable_getnext(scan)))
195 {
196 StatExtEntry *entry;
197 Datum datum;
198 bool isnull;
199 int i;
200 ArrayType *arr;
201 char *enabled;
202 Form_pg_statistic_ext staForm;
203
204 entry = palloc0(sizeof(StatExtEntry));
205 entry->statOid = HeapTupleGetOid(htup);
206 staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
207 entry->schema = get_namespace_name(staForm->stxnamespace);
208 entry->name = pstrdup(NameStr(staForm->stxname));
209 for (i = 0; i < staForm->stxkeys.dim1; i++)
210 {
211 entry->columns = bms_add_member(entry->columns,
212 staForm->stxkeys.values[i]);
213 }
214
215 /* decode the stxkind char array into a list of chars */
216 datum = SysCacheGetAttr(STATEXTOID, htup,
217 Anum_pg_statistic_ext_stxkind, &isnull);
218 Assert(!isnull);
219 arr = DatumGetArrayTypeP(datum);
220 if (ARR_NDIM(arr) != 1 ||
221 ARR_HASNULL(arr) ||
222 ARR_ELEMTYPE(arr) != CHAROID)
223 elog(ERROR, "stxkind is not a 1-D char array");
224 enabled = (char *) ARR_DATA_PTR(arr);
225 for (i = 0; i < ARR_DIMS(arr)[0]; i++)
226 {
227 Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
228 (enabled[i] == STATS_EXT_DEPENDENCIES));
229 entry->types = lappend_int(entry->types, (int) enabled[i]);
230 }
231
232 result = lappend(result, entry);
233 }
234
235 systable_endscan(scan);
236
237 return result;
238 }
239
240 /*
241 * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
242 * VacAttrStats array which includes only the items corresponding to
243 * attributes indicated by 'stxkeys'. If we don't have all of the per column
244 * stats available to compute the extended stats, then we return NULL to indicate
245 * to the caller that the stats should not be built.
246 */
247 static VacAttrStats **
lookup_var_attr_stats(Relation rel,Bitmapset * attrs,int nvacatts,VacAttrStats ** vacatts)248 lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
249 int nvacatts, VacAttrStats **vacatts)
250 {
251 int i = 0;
252 int x = -1;
253 VacAttrStats **stats;
254
255 stats = (VacAttrStats **)
256 palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
257
258 /* lookup VacAttrStats info for the requested columns (same attnum) */
259 while ((x = bms_next_member(attrs, x)) >= 0)
260 {
261 int j;
262
263 stats[i] = NULL;
264 for (j = 0; j < nvacatts; j++)
265 {
266 if (x == vacatts[j]->tupattnum)
267 {
268 stats[i] = vacatts[j];
269 break;
270 }
271 }
272
273 if (!stats[i])
274 {
275 /*
276 * Looks like stats were not gathered for one of the columns
277 * required. We'll be unable to build the extended stats without
278 * this column.
279 */
280 pfree(stats);
281 return NULL;
282 }
283
284 /*
285 * Sanity check that the column is not dropped - stats should have
286 * been removed in this case.
287 */
288 Assert(!stats[i]->attr->attisdropped);
289
290 i++;
291 }
292
293 return stats;
294 }
295
296 /*
297 * statext_store
298 * Serializes the statistics and stores them into the pg_statistic_ext tuple.
299 */
300 static void
statext_store(Relation pg_stext,Oid statOid,MVNDistinct * ndistinct,MVDependencies * dependencies,VacAttrStats ** stats)301 statext_store(Relation pg_stext, Oid statOid,
302 MVNDistinct *ndistinct, MVDependencies *dependencies,
303 VacAttrStats **stats)
304 {
305 HeapTuple stup,
306 oldtup;
307 Datum values[Natts_pg_statistic_ext];
308 bool nulls[Natts_pg_statistic_ext];
309 bool replaces[Natts_pg_statistic_ext];
310
311 memset(nulls, true, sizeof(nulls));
312 memset(replaces, false, sizeof(replaces));
313 memset(values, 0, sizeof(values));
314
315 /*
316 * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
317 */
318 if (ndistinct != NULL)
319 {
320 bytea *data = statext_ndistinct_serialize(ndistinct);
321
322 nulls[Anum_pg_statistic_ext_stxndistinct - 1] = (data == NULL);
323 values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(data);
324 }
325
326 if (dependencies != NULL)
327 {
328 bytea *data = statext_dependencies_serialize(dependencies);
329
330 nulls[Anum_pg_statistic_ext_stxdependencies - 1] = (data == NULL);
331 values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data);
332 }
333
334 /* always replace the value (either by bytea or NULL) */
335 replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
336 replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
337
338 /* there should already be a pg_statistic_ext tuple */
339 oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
340 if (!HeapTupleIsValid(oldtup))
341 elog(ERROR, "cache lookup failed for statistics object %u", statOid);
342
343 /* replace it */
344 stup = heap_modify_tuple(oldtup,
345 RelationGetDescr(pg_stext),
346 values,
347 nulls,
348 replaces);
349 ReleaseSysCache(oldtup);
350 CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
351
352 heap_freetuple(stup);
353 }
354
355 /* initialize multi-dimensional sort */
356 MultiSortSupport
multi_sort_init(int ndims)357 multi_sort_init(int ndims)
358 {
359 MultiSortSupport mss;
360
361 Assert(ndims >= 2);
362
363 mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
364 + sizeof(SortSupportData) * ndims);
365
366 mss->ndims = ndims;
367
368 return mss;
369 }
370
371 /*
372 * Prepare sort support info using the given sort operator
373 * at the position 'sortdim'
374 */
375 void
multi_sort_add_dimension(MultiSortSupport mss,int sortdim,Oid oper)376 multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
377 {
378 SortSupport ssup = &mss->ssup[sortdim];
379
380 ssup->ssup_cxt = CurrentMemoryContext;
381 ssup->ssup_collation = DEFAULT_COLLATION_OID;
382 ssup->ssup_nulls_first = false;
383 ssup->ssup_cxt = CurrentMemoryContext;
384
385 PrepareSortSupportFromOrderingOp(oper, ssup);
386 }
387
388 /* compare all the dimensions in the selected order */
389 int
multi_sort_compare(const void * a,const void * b,void * arg)390 multi_sort_compare(const void *a, const void *b, void *arg)
391 {
392 MultiSortSupport mss = (MultiSortSupport) arg;
393 SortItem *ia = (SortItem *) a;
394 SortItem *ib = (SortItem *) b;
395 int i;
396
397 for (i = 0; i < mss->ndims; i++)
398 {
399 int compare;
400
401 compare = ApplySortComparator(ia->values[i], ia->isnull[i],
402 ib->values[i], ib->isnull[i],
403 &mss->ssup[i]);
404
405 if (compare != 0)
406 return compare;
407 }
408
409 /* equal by default */
410 return 0;
411 }
412
413 /* compare selected dimension */
414 int
multi_sort_compare_dim(int dim,const SortItem * a,const SortItem * b,MultiSortSupport mss)415 multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
416 MultiSortSupport mss)
417 {
418 return ApplySortComparator(a->values[dim], a->isnull[dim],
419 b->values[dim], b->isnull[dim],
420 &mss->ssup[dim]);
421 }
422
423 int
multi_sort_compare_dims(int start,int end,const SortItem * a,const SortItem * b,MultiSortSupport mss)424 multi_sort_compare_dims(int start, int end,
425 const SortItem *a, const SortItem *b,
426 MultiSortSupport mss)
427 {
428 int dim;
429
430 for (dim = start; dim <= end; dim++)
431 {
432 int r = ApplySortComparator(a->values[dim], a->isnull[dim],
433 b->values[dim], b->isnull[dim],
434 &mss->ssup[dim]);
435
436 if (r != 0)
437 return r;
438 }
439
440 return 0;
441 }
442
443 /*
444 * has_stats_of_kind
445 * Check whether the list contains statistic of a given kind
446 */
447 bool
has_stats_of_kind(List * stats,char requiredkind)448 has_stats_of_kind(List *stats, char requiredkind)
449 {
450 ListCell *l;
451
452 foreach(l, stats)
453 {
454 StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
455
456 if (stat->kind == requiredkind)
457 return true;
458 }
459
460 return false;
461 }
462
463 /*
464 * choose_best_statistics
465 * Look for and return statistics with the specified 'requiredkind' which
466 * have keys that match at least two of the given attnums. Return NULL if
467 * there's no match.
468 *
469 * The current selection criteria is very simple - we choose the statistics
470 * object referencing the most of the requested attributes, breaking ties
471 * in favor of objects with fewer keys overall.
472 *
473 * XXX if multiple statistics objects tie on both criteria, then which object
474 * is chosen depends on the order that they appear in the stats list. Perhaps
475 * further tiebreakers are needed.
476 */
477 StatisticExtInfo *
choose_best_statistics(List * stats,Bitmapset * attnums,char requiredkind)478 choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
479 {
480 ListCell *lc;
481 StatisticExtInfo *best_match = NULL;
482 int best_num_matched = 2; /* goal #1: maximize */
483 int best_match_keys = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */
484
485 foreach(lc, stats)
486 {
487 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
488 int num_matched;
489 int numkeys;
490 Bitmapset *matched;
491
492 /* skip statistics that are not of the correct type */
493 if (info->kind != requiredkind)
494 continue;
495
496 /* determine how many attributes of these stats can be matched to */
497 matched = bms_intersect(attnums, info->keys);
498 num_matched = bms_num_members(matched);
499 bms_free(matched);
500
501 /*
502 * save the actual number of keys in the stats so that we can choose
503 * the narrowest stats with the most matching keys.
504 */
505 numkeys = bms_num_members(info->keys);
506
507 /*
508 * Use this object when it increases the number of matched clauses or
509 * when it matches the same number of attributes but these stats have
510 * fewer keys than any previous match.
511 */
512 if (num_matched > best_num_matched ||
513 (num_matched == best_num_matched && numkeys < best_match_keys))
514 {
515 best_match = info;
516 best_num_matched = num_matched;
517 best_match_keys = numkeys;
518 }
519 }
520
521 return best_match;
522 }
523