1 /*-------------------------------------------------------------------------
2 *
3 * statscmds.c
4 * Commands for creating and altering extended statistics objects
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/commands/statscmds.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include "access/relation.h"
18 #include "access/relscan.h"
19 #include "access/table.h"
20 #include "catalog/catalog.h"
21 #include "catalog/dependency.h"
22 #include "catalog/indexing.h"
23 #include "catalog/namespace.h"
24 #include "catalog/pg_namespace.h"
25 #include "catalog/pg_statistic_ext.h"
26 #include "catalog/pg_statistic_ext_data.h"
27 #include "commands/comment.h"
28 #include "commands/defrem.h"
29 #include "miscadmin.h"
30 #include "statistics/statistics.h"
31 #include "utils/builtins.h"
32 #include "utils/inval.h"
33 #include "utils/memutils.h"
34 #include "utils/rel.h"
35 #include "utils/syscache.h"
36 #include "utils/typcache.h"
37
38
39 static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
40 const char *label, Oid namespaceid);
41 static char *ChooseExtendedStatisticNameAddition(List *exprs);
42
43
44 /* qsort comparator for the attnums in CreateStatistics */
45 static int
compare_int16(const void * a,const void * b)46 compare_int16(const void *a, const void *b)
47 {
48 int av = *(const int16 *) a;
49 int bv = *(const int16 *) b;
50
51 /* this can't overflow if int is wider than int16 */
52 return (av - bv);
53 }
54
55 /*
56 * CREATE STATISTICS
57 */
58 ObjectAddress
CreateStatistics(CreateStatsStmt * stmt)59 CreateStatistics(CreateStatsStmt *stmt)
60 {
61 int16 attnums[STATS_MAX_DIMENSIONS];
62 int numcols = 0;
63 char *namestr;
64 NameData stxname;
65 Oid statoid;
66 Oid namespaceId;
67 Oid stxowner = GetUserId();
68 HeapTuple htup;
69 Datum values[Natts_pg_statistic_ext];
70 bool nulls[Natts_pg_statistic_ext];
71 Datum datavalues[Natts_pg_statistic_ext_data];
72 bool datanulls[Natts_pg_statistic_ext_data];
73 int2vector *stxkeys;
74 Relation statrel;
75 Relation datarel;
76 Relation rel = NULL;
77 Oid relid;
78 ObjectAddress parentobject,
79 myself;
80 Datum types[3]; /* one for each possible type of statistic */
81 int ntypes;
82 ArrayType *stxkind;
83 bool build_ndistinct;
84 bool build_dependencies;
85 bool build_mcv;
86 bool requested_type = false;
87 int i;
88 ListCell *cell;
89
90 Assert(IsA(stmt, CreateStatsStmt));
91
92 /*
93 * Examine the FROM clause. Currently, we only allow it to be a single
94 * simple table, but later we'll probably allow multiple tables and JOIN
95 * syntax. The grammar is already prepared for that, so we have to check
96 * here that what we got is what we can support.
97 */
98 if (list_length(stmt->relations) != 1)
99 ereport(ERROR,
100 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
101 errmsg("only a single relation is allowed in CREATE STATISTICS")));
102
103 foreach(cell, stmt->relations)
104 {
105 Node *rln = (Node *) lfirst(cell);
106
107 if (!IsA(rln, RangeVar))
108 ereport(ERROR,
109 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
110 errmsg("only a single relation is allowed in CREATE STATISTICS")));
111
112 /*
113 * CREATE STATISTICS will influence future execution plans but does
114 * not interfere with currently executing plans. So it should be
115 * enough to take only ShareUpdateExclusiveLock on relation,
116 * conflicting with ANALYZE and other DDL that sets statistical
117 * information, but not with normal queries.
118 */
119 rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
120
121 /* Restrict to allowed relation types */
122 if (rel->rd_rel->relkind != RELKIND_RELATION &&
123 rel->rd_rel->relkind != RELKIND_MATVIEW &&
124 rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
125 rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
126 ereport(ERROR,
127 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
128 errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
129 RelationGetRelationName(rel))));
130
131 /* You must own the relation to create stats on it */
132 if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
133 aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
134 RelationGetRelationName(rel));
135
136 /* Creating statistics on system catalogs is not allowed */
137 if (!allowSystemTableMods && IsSystemRelation(rel))
138 ereport(ERROR,
139 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
140 errmsg("permission denied: \"%s\" is a system catalog",
141 RelationGetRelationName(rel))));
142 }
143
144 Assert(rel);
145 relid = RelationGetRelid(rel);
146
147 /*
148 * If the node has a name, split it up and determine creation namespace.
149 * If not (a possibility not considered by the grammar, but one which can
150 * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
151 * object in the same namespace as the relation, and cons up a name for
152 * it.
153 */
154 if (stmt->defnames)
155 namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
156 &namestr);
157 else
158 {
159 namespaceId = RelationGetNamespace(rel);
160 namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
161 ChooseExtendedStatisticNameAddition(stmt->exprs),
162 "stat",
163 namespaceId);
164 }
165 namestrcpy(&stxname, namestr);
166
167 /*
168 * Deal with the possibility that the statistics object already exists.
169 */
170 if (SearchSysCacheExists2(STATEXTNAMENSP,
171 CStringGetDatum(namestr),
172 ObjectIdGetDatum(namespaceId)))
173 {
174 if (stmt->if_not_exists)
175 {
176 ereport(NOTICE,
177 (errcode(ERRCODE_DUPLICATE_OBJECT),
178 errmsg("statistics object \"%s\" already exists, skipping",
179 namestr)));
180 relation_close(rel, NoLock);
181 return InvalidObjectAddress;
182 }
183
184 ereport(ERROR,
185 (errcode(ERRCODE_DUPLICATE_OBJECT),
186 errmsg("statistics object \"%s\" already exists", namestr)));
187 }
188
189 /*
190 * Currently, we only allow simple column references in the expression
191 * list. That will change someday, and again the grammar already supports
192 * it so we have to enforce restrictions here. For now, we can convert
193 * the expression list to a simple array of attnums. While at it, enforce
194 * some constraints.
195 */
196 foreach(cell, stmt->exprs)
197 {
198 Node *expr = (Node *) lfirst(cell);
199 ColumnRef *cref;
200 char *attname;
201 HeapTuple atttuple;
202 Form_pg_attribute attForm;
203 TypeCacheEntry *type;
204
205 if (!IsA(expr, ColumnRef))
206 ereport(ERROR,
207 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
208 errmsg("only simple column references are allowed in CREATE STATISTICS")));
209 cref = (ColumnRef *) expr;
210
211 if (list_length(cref->fields) != 1)
212 ereport(ERROR,
213 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
214 errmsg("only simple column references are allowed in CREATE STATISTICS")));
215 attname = strVal((Value *) linitial(cref->fields));
216
217 atttuple = SearchSysCacheAttName(relid, attname);
218 if (!HeapTupleIsValid(atttuple))
219 ereport(ERROR,
220 (errcode(ERRCODE_UNDEFINED_COLUMN),
221 errmsg("column \"%s\" does not exist",
222 attname)));
223 attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
224
225 /* Disallow use of system attributes in extended stats */
226 if (attForm->attnum <= 0)
227 ereport(ERROR,
228 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
229 errmsg("statistics creation on system columns is not supported")));
230
231 /* Disallow data types without a less-than operator */
232 type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
233 if (type->lt_opr == InvalidOid)
234 ereport(ERROR,
235 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
236 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
237 attname, format_type_be(attForm->atttypid))));
238
239 /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
240 if (numcols >= STATS_MAX_DIMENSIONS)
241 ereport(ERROR,
242 (errcode(ERRCODE_TOO_MANY_COLUMNS),
243 errmsg("cannot have more than %d columns in statistics",
244 STATS_MAX_DIMENSIONS)));
245
246 attnums[numcols] = attForm->attnum;
247 numcols++;
248 ReleaseSysCache(atttuple);
249 }
250
251 /*
252 * Check that at least two columns were specified in the statement. The
253 * upper bound was already checked in the loop above.
254 */
255 if (numcols < 2)
256 ereport(ERROR,
257 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
258 errmsg("extended statistics require at least 2 columns")));
259
260 /*
261 * Sort the attnums, which makes detecting duplicates somewhat easier, and
262 * it does not hurt (it does not affect the efficiency, unlike for
263 * indexes, for example).
264 */
265 qsort(attnums, numcols, sizeof(int16), compare_int16);
266
267 /*
268 * Check for duplicates in the list of columns. The attnums are sorted so
269 * just check consecutive elements.
270 */
271 for (i = 1; i < numcols; i++)
272 {
273 if (attnums[i] == attnums[i - 1])
274 ereport(ERROR,
275 (errcode(ERRCODE_DUPLICATE_COLUMN),
276 errmsg("duplicate column name in statistics definition")));
277 }
278
279 /* Form an int2vector representation of the sorted column list */
280 stxkeys = buildint2vector(attnums, numcols);
281
282 /*
283 * Parse the statistics kinds.
284 */
285 build_ndistinct = false;
286 build_dependencies = false;
287 build_mcv = false;
288 foreach(cell, stmt->stat_types)
289 {
290 char *type = strVal((Value *) lfirst(cell));
291
292 if (strcmp(type, "ndistinct") == 0)
293 {
294 build_ndistinct = true;
295 requested_type = true;
296 }
297 else if (strcmp(type, "dependencies") == 0)
298 {
299 build_dependencies = true;
300 requested_type = true;
301 }
302 else if (strcmp(type, "mcv") == 0)
303 {
304 build_mcv = true;
305 requested_type = true;
306 }
307 else
308 ereport(ERROR,
309 (errcode(ERRCODE_SYNTAX_ERROR),
310 errmsg("unrecognized statistics kind \"%s\"",
311 type)));
312 }
313 /* If no statistic type was specified, build them all. */
314 if (!requested_type)
315 {
316 build_ndistinct = true;
317 build_dependencies = true;
318 build_mcv = true;
319 }
320
321 /* construct the char array of enabled statistic types */
322 ntypes = 0;
323 if (build_ndistinct)
324 types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
325 if (build_dependencies)
326 types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
327 if (build_mcv)
328 types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
329 Assert(ntypes > 0 && ntypes <= lengthof(types));
330 stxkind = construct_array(types, ntypes, CHAROID, 1, true, 'c');
331
332 statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
333
334 /*
335 * Everything seems fine, so let's build the pg_statistic_ext tuple.
336 */
337 memset(values, 0, sizeof(values));
338 memset(nulls, false, sizeof(nulls));
339
340 statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
341 Anum_pg_statistic_ext_oid);
342 values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid);
343 values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
344 values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
345 values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
346 values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
347 values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
348 values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
349
350 /* insert it into pg_statistic_ext */
351 htup = heap_form_tuple(statrel->rd_att, values, nulls);
352 CatalogTupleInsert(statrel, htup);
353 heap_freetuple(htup);
354
355 relation_close(statrel, RowExclusiveLock);
356
357 /*
358 * Also build the pg_statistic_ext_data tuple, to hold the actual
359 * statistics data.
360 */
361 datarel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
362
363 memset(datavalues, 0, sizeof(datavalues));
364 memset(datanulls, false, sizeof(datanulls));
365
366 datavalues[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statoid);
367
368 /* no statistics built yet */
369 datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
370 datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
371 datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
372
373 /* insert it into pg_statistic_ext_data */
374 htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
375 CatalogTupleInsert(datarel, htup);
376 heap_freetuple(htup);
377
378 relation_close(datarel, RowExclusiveLock);
379
380 /*
381 * Invalidate relcache so that others see the new statistics object.
382 */
383 CacheInvalidateRelcache(rel);
384
385 relation_close(rel, NoLock);
386
387 /*
388 * Add an AUTO dependency on each column used in the stats, so that the
389 * stats object goes away if any or all of them get dropped.
390 */
391 ObjectAddressSet(myself, StatisticExtRelationId, statoid);
392
393 for (i = 0; i < numcols; i++)
394 {
395 ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
396 recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
397 }
398
399 /*
400 * Also add dependencies on namespace and owner. These are required
401 * because the stats object might have a different namespace and/or owner
402 * than the underlying table(s).
403 */
404 ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
405 recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
406
407 recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
408
409 /*
410 * XXX probably there should be a recordDependencyOnCurrentExtension call
411 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
412 * STATISTICS, which is more work than it seems worth.
413 */
414
415 /* Add any requested comment */
416 if (stmt->stxcomment != NULL)
417 CreateComments(statoid, StatisticExtRelationId, 0,
418 stmt->stxcomment);
419
420 /* Return stats object's address */
421 return myself;
422 }
423
424 /*
425 * Guts of statistics object deletion.
426 */
427 void
RemoveStatisticsById(Oid statsOid)428 RemoveStatisticsById(Oid statsOid)
429 {
430 Relation relation;
431 HeapTuple tup;
432 Form_pg_statistic_ext statext;
433 Oid relid;
434
435 /*
436 * First delete the pg_statistic_ext_data tuple holding the actual
437 * statistical data.
438 */
439 relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
440
441 tup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
442
443 if (!HeapTupleIsValid(tup)) /* should not happen */
444 elog(ERROR, "cache lookup failed for statistics data %u", statsOid);
445
446 CatalogTupleDelete(relation, &tup->t_self);
447
448 ReleaseSysCache(tup);
449
450 table_close(relation, RowExclusiveLock);
451
452 /*
453 * Delete the pg_statistic_ext tuple. Also send out a cache inval on the
454 * associated table, so that dependent plans will be rebuilt.
455 */
456 relation = table_open(StatisticExtRelationId, RowExclusiveLock);
457
458 tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
459
460 if (!HeapTupleIsValid(tup)) /* should not happen */
461 elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
462
463 statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
464 relid = statext->stxrelid;
465
466 CacheInvalidateRelcacheByRelid(relid);
467
468 CatalogTupleDelete(relation, &tup->t_self);
469
470 ReleaseSysCache(tup);
471
472 table_close(relation, RowExclusiveLock);
473 }
474
475 /*
476 * Update a statistics object for ALTER COLUMN TYPE on a source column.
477 *
478 * This could throw an error if the type change can't be supported.
479 * If it can be supported, but the stats must be recomputed, a likely choice
480 * would be to set the relevant column(s) of the pg_statistic_ext_data tuple
481 * to null until the next ANALYZE. (Note that the type change hasn't actually
482 * happened yet, so one option that's *not* on the table is to recompute
483 * immediately.)
484 *
485 * For both ndistinct and functional-dependencies stats, the on-disk
486 * representation is independent of the source column data types, and it is
487 * plausible to assume that the old statistic values will still be good for
488 * the new column contents. (Obviously, if the ALTER COLUMN TYPE has a USING
489 * expression that substantially alters the semantic meaning of the column
490 * values, this assumption could fail. But that seems like a corner case
491 * that doesn't justify zapping the stats in common cases.)
492 *
493 * For MCV lists that's not the case, as those statistics store the datums
494 * internally. In this case we simply reset the statistics value to NULL.
495 *
496 * Note that "type change" includes collation change, which means we can rely
497 * on the MCV list being consistent with the collation info in pg_attribute
498 * during estimation.
499 */
500 void
UpdateStatisticsForTypeChange(Oid statsOid,Oid relationOid,int attnum,Oid oldColumnType,Oid newColumnType)501 UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
502 Oid oldColumnType, Oid newColumnType)
503 {
504 HeapTuple stup,
505 oldtup;
506
507 Relation rel;
508
509 Datum values[Natts_pg_statistic_ext_data];
510 bool nulls[Natts_pg_statistic_ext_data];
511 bool replaces[Natts_pg_statistic_ext_data];
512
513 oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
514 if (!HeapTupleIsValid(oldtup))
515 elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
516
517 /*
518 * When none of the defined statistics types contain datum values from the
519 * table's columns then there's no need to reset the stats. Functional
520 * dependencies and ndistinct stats should still hold true.
521 */
522 if (!statext_is_kind_built(oldtup, STATS_EXT_MCV))
523 {
524 ReleaseSysCache(oldtup);
525 return;
526 }
527
528 /*
529 * OK, we need to reset some statistics. So let's build the new tuple,
530 * replacing the affected statistics types with NULL.
531 */
532 memset(nulls, 0, Natts_pg_statistic_ext_data * sizeof(bool));
533 memset(replaces, 0, Natts_pg_statistic_ext_data * sizeof(bool));
534 memset(values, 0, Natts_pg_statistic_ext_data * sizeof(Datum));
535
536 replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
537 nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
538
539 rel = heap_open(StatisticExtDataRelationId, RowExclusiveLock);
540
541 /* replace the old tuple */
542 stup = heap_modify_tuple(oldtup,
543 RelationGetDescr(rel),
544 values,
545 nulls,
546 replaces);
547
548 ReleaseSysCache(oldtup);
549 CatalogTupleUpdate(rel, &stup->t_self, stup);
550
551 heap_freetuple(stup);
552
553 heap_close(rel, RowExclusiveLock);
554 }
555
556 /*
557 * Select a nonconflicting name for a new statistics.
558 *
559 * name1, name2, and label are used the same way as for makeObjectName(),
560 * except that the label can't be NULL; digits will be appended to the label
561 * if needed to create a name that is unique within the specified namespace.
562 *
563 * Returns a palloc'd string.
564 *
565 * Note: it is theoretically possible to get a collision anyway, if someone
566 * else chooses the same name concurrently. This is fairly unlikely to be
567 * a problem in practice, especially if one is holding a share update
568 * exclusive lock on the relation identified by name1. However, if choosing
569 * multiple names within a single command, you'd better create the new object
570 * and do CommandCounterIncrement before choosing the next one!
571 */
572 static char *
ChooseExtendedStatisticName(const char * name1,const char * name2,const char * label,Oid namespaceid)573 ChooseExtendedStatisticName(const char *name1, const char *name2,
574 const char *label, Oid namespaceid)
575 {
576 int pass = 0;
577 char *stxname = NULL;
578 char modlabel[NAMEDATALEN];
579
580 /* try the unmodified label first */
581 StrNCpy(modlabel, label, sizeof(modlabel));
582
583 for (;;)
584 {
585 Oid existingstats;
586
587 stxname = makeObjectName(name1, name2, modlabel);
588
589 existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
590 PointerGetDatum(stxname),
591 ObjectIdGetDatum(namespaceid));
592 if (!OidIsValid(existingstats))
593 break;
594
595 /* found a conflict, so try a new name component */
596 pfree(stxname);
597 snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
598 }
599
600 return stxname;
601 }
602
603 /*
604 * Generate "name2" for a new statistics given the list of column names for it
605 * This will be passed to ChooseExtendedStatisticName along with the parent
606 * table name and a suitable label.
607 *
608 * We know that less than NAMEDATALEN characters will actually be used,
609 * so we can truncate the result once we've generated that many.
610 *
611 * XXX see also ChooseForeignKeyConstraintNameAddition and
612 * ChooseIndexNameAddition.
613 */
614 static char *
ChooseExtendedStatisticNameAddition(List * exprs)615 ChooseExtendedStatisticNameAddition(List *exprs)
616 {
617 char buf[NAMEDATALEN * 2];
618 int buflen = 0;
619 ListCell *lc;
620
621 buf[0] = '\0';
622 foreach(lc, exprs)
623 {
624 ColumnRef *cref = (ColumnRef *) lfirst(lc);
625 const char *name;
626
627 /* It should be one of these, but just skip if it happens not to be */
628 if (!IsA(cref, ColumnRef))
629 continue;
630
631 name = strVal((Value *) linitial(cref->fields));
632
633 if (buflen > 0)
634 buf[buflen++] = '_'; /* insert _ between names */
635
636 /*
637 * At this point we have buflen <= NAMEDATALEN. name should be less
638 * than NAMEDATALEN already, but use strlcpy for paranoia.
639 */
640 strlcpy(buf + buflen, name, NAMEDATALEN);
641 buflen += strlen(buf + buflen);
642 if (buflen >= NAMEDATALEN)
643 break;
644 }
645 return pstrdup(buf);
646 }
647