1 /*-------------------------------------------------------------------------
2  *
3  * statscmds.c
4  *	  Commands for creating and altering extended statistics objects
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/commands/statscmds.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/relation.h"
19 #include "access/relscan.h"
20 #include "access/table.h"
21 #include "catalog/catalog.h"
22 #include "catalog/dependency.h"
23 #include "catalog/indexing.h"
24 #include "catalog/namespace.h"
25 #include "catalog/objectaccess.h"
26 #include "catalog/pg_namespace.h"
27 #include "catalog/pg_statistic_ext.h"
28 #include "catalog/pg_statistic_ext_data.h"
29 #include "commands/comment.h"
30 #include "commands/defrem.h"
31 #include "miscadmin.h"
32 #include "statistics/statistics.h"
33 #include "utils/builtins.h"
34 #include "utils/fmgroids.h"
35 #include "utils/inval.h"
36 #include "utils/memutils.h"
37 #include "utils/rel.h"
38 #include "utils/syscache.h"
39 #include "utils/typcache.h"
40 
41 
42 static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
43 										 const char *label, Oid namespaceid);
44 static char *ChooseExtendedStatisticNameAddition(List *exprs);
45 
46 
47 /* qsort comparator for the attnums in CreateStatistics */
48 static int
compare_int16(const void * a,const void * b)49 compare_int16(const void *a, const void *b)
50 {
51 	int			av = *(const int16 *) a;
52 	int			bv = *(const int16 *) b;
53 
54 	/* this can't overflow if int is wider than int16 */
55 	return (av - bv);
56 }
57 
58 /*
59  *		CREATE STATISTICS
60  */
61 ObjectAddress
CreateStatistics(CreateStatsStmt * stmt)62 CreateStatistics(CreateStatsStmt *stmt)
63 {
64 	int16		attnums[STATS_MAX_DIMENSIONS];
65 	int			numcols = 0;
66 	char	   *namestr;
67 	NameData	stxname;
68 	Oid			statoid;
69 	Oid			namespaceId;
70 	Oid			stxowner = GetUserId();
71 	HeapTuple	htup;
72 	Datum		values[Natts_pg_statistic_ext];
73 	bool		nulls[Natts_pg_statistic_ext];
74 	Datum		datavalues[Natts_pg_statistic_ext_data];
75 	bool		datanulls[Natts_pg_statistic_ext_data];
76 	int2vector *stxkeys;
77 	Relation	statrel;
78 	Relation	datarel;
79 	Relation	rel = NULL;
80 	Oid			relid;
81 	ObjectAddress parentobject,
82 				myself;
83 	Datum		types[3];		/* one for each possible type of statistic */
84 	int			ntypes;
85 	ArrayType  *stxkind;
86 	bool		build_ndistinct;
87 	bool		build_dependencies;
88 	bool		build_mcv;
89 	bool		requested_type = false;
90 	int			i;
91 	ListCell   *cell;
92 
93 	Assert(IsA(stmt, CreateStatsStmt));
94 
95 	/*
96 	 * Examine the FROM clause.  Currently, we only allow it to be a single
97 	 * simple table, but later we'll probably allow multiple tables and JOIN
98 	 * syntax.  The grammar is already prepared for that, so we have to check
99 	 * here that what we got is what we can support.
100 	 */
101 	if (list_length(stmt->relations) != 1)
102 		ereport(ERROR,
103 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
104 				 errmsg("only a single relation is allowed in CREATE STATISTICS")));
105 
106 	foreach(cell, stmt->relations)
107 	{
108 		Node	   *rln = (Node *) lfirst(cell);
109 
110 		if (!IsA(rln, RangeVar))
111 			ereport(ERROR,
112 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
113 					 errmsg("only a single relation is allowed in CREATE STATISTICS")));
114 
115 		/*
116 		 * CREATE STATISTICS will influence future execution plans but does
117 		 * not interfere with currently executing plans.  So it should be
118 		 * enough to take only ShareUpdateExclusiveLock on relation,
119 		 * conflicting with ANALYZE and other DDL that sets statistical
120 		 * information, but not with normal queries.
121 		 */
122 		rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
123 
124 		/* Restrict to allowed relation types */
125 		if (rel->rd_rel->relkind != RELKIND_RELATION &&
126 			rel->rd_rel->relkind != RELKIND_MATVIEW &&
127 			rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
128 			rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
129 			ereport(ERROR,
130 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
131 					 errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
132 							RelationGetRelationName(rel))));
133 
134 		/* You must own the relation to create stats on it */
135 		if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
136 			aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(rel->rd_rel->relkind),
137 						   RelationGetRelationName(rel));
138 
139 		/* Creating statistics on system catalogs is not allowed */
140 		if (!allowSystemTableMods && IsSystemRelation(rel))
141 			ereport(ERROR,
142 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
143 					 errmsg("permission denied: \"%s\" is a system catalog",
144 							RelationGetRelationName(rel))));
145 	}
146 
147 	Assert(rel);
148 	relid = RelationGetRelid(rel);
149 
150 	/*
151 	 * If the node has a name, split it up and determine creation namespace.
152 	 * If not (a possibility not considered by the grammar, but one which can
153 	 * occur via the "CREATE TABLE ... (LIKE)" command), then we put the
154 	 * object in the same namespace as the relation, and cons up a name for
155 	 * it.
156 	 */
157 	if (stmt->defnames)
158 		namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames,
159 														&namestr);
160 	else
161 	{
162 		namespaceId = RelationGetNamespace(rel);
163 		namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
164 											  ChooseExtendedStatisticNameAddition(stmt->exprs),
165 											  "stat",
166 											  namespaceId);
167 	}
168 	namestrcpy(&stxname, namestr);
169 
170 	/*
171 	 * Deal with the possibility that the statistics object already exists.
172 	 */
173 	if (SearchSysCacheExists2(STATEXTNAMENSP,
174 							  CStringGetDatum(namestr),
175 							  ObjectIdGetDatum(namespaceId)))
176 	{
177 		if (stmt->if_not_exists)
178 		{
179 			ereport(NOTICE,
180 					(errcode(ERRCODE_DUPLICATE_OBJECT),
181 					 errmsg("statistics object \"%s\" already exists, skipping",
182 							namestr)));
183 			relation_close(rel, NoLock);
184 			return InvalidObjectAddress;
185 		}
186 
187 		ereport(ERROR,
188 				(errcode(ERRCODE_DUPLICATE_OBJECT),
189 				 errmsg("statistics object \"%s\" already exists", namestr)));
190 	}
191 
192 	/*
193 	 * Currently, we only allow simple column references in the expression
194 	 * list.  That will change someday, and again the grammar already supports
195 	 * it so we have to enforce restrictions here.  For now, we can convert
196 	 * the expression list to a simple array of attnums.  While at it, enforce
197 	 * some constraints.
198 	 */
199 	foreach(cell, stmt->exprs)
200 	{
201 		Node	   *expr = (Node *) lfirst(cell);
202 		ColumnRef  *cref;
203 		char	   *attname;
204 		HeapTuple	atttuple;
205 		Form_pg_attribute attForm;
206 		TypeCacheEntry *type;
207 
208 		if (!IsA(expr, ColumnRef))
209 			ereport(ERROR,
210 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
211 					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
212 		cref = (ColumnRef *) expr;
213 
214 		if (list_length(cref->fields) != 1)
215 			ereport(ERROR,
216 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
217 					 errmsg("only simple column references are allowed in CREATE STATISTICS")));
218 		attname = strVal((Value *) linitial(cref->fields));
219 
220 		atttuple = SearchSysCacheAttName(relid, attname);
221 		if (!HeapTupleIsValid(atttuple))
222 			ereport(ERROR,
223 					(errcode(ERRCODE_UNDEFINED_COLUMN),
224 					 errmsg("column \"%s\" does not exist",
225 							attname)));
226 		attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
227 
228 		/* Disallow use of system attributes in extended stats */
229 		if (attForm->attnum <= 0)
230 			ereport(ERROR,
231 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
232 					 errmsg("statistics creation on system columns is not supported")));
233 
234 		/* Disallow data types without a less-than operator */
235 		type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
236 		if (type->lt_opr == InvalidOid)
237 			ereport(ERROR,
238 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
239 					 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
240 							attname, format_type_be(attForm->atttypid))));
241 
242 		/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
243 		if (numcols >= STATS_MAX_DIMENSIONS)
244 			ereport(ERROR,
245 					(errcode(ERRCODE_TOO_MANY_COLUMNS),
246 					 errmsg("cannot have more than %d columns in statistics",
247 							STATS_MAX_DIMENSIONS)));
248 
249 		attnums[numcols] = attForm->attnum;
250 		numcols++;
251 		ReleaseSysCache(atttuple);
252 	}
253 
254 	/*
255 	 * Check that at least two columns were specified in the statement. The
256 	 * upper bound was already checked in the loop above.
257 	 */
258 	if (numcols < 2)
259 		ereport(ERROR,
260 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
261 				 errmsg("extended statistics require at least 2 columns")));
262 
263 	/*
264 	 * Sort the attnums, which makes detecting duplicates somewhat easier, and
265 	 * it does not hurt (it does not affect the efficiency, unlike for
266 	 * indexes, for example).
267 	 */
268 	qsort(attnums, numcols, sizeof(int16), compare_int16);
269 
270 	/*
271 	 * Check for duplicates in the list of columns. The attnums are sorted so
272 	 * just check consecutive elements.
273 	 */
274 	for (i = 1; i < numcols; i++)
275 	{
276 		if (attnums[i] == attnums[i - 1])
277 			ereport(ERROR,
278 					(errcode(ERRCODE_DUPLICATE_COLUMN),
279 					 errmsg("duplicate column name in statistics definition")));
280 	}
281 
282 	/* Form an int2vector representation of the sorted column list */
283 	stxkeys = buildint2vector(attnums, numcols);
284 
285 	/*
286 	 * Parse the statistics kinds.
287 	 */
288 	build_ndistinct = false;
289 	build_dependencies = false;
290 	build_mcv = false;
291 	foreach(cell, stmt->stat_types)
292 	{
293 		char	   *type = strVal((Value *) lfirst(cell));
294 
295 		if (strcmp(type, "ndistinct") == 0)
296 		{
297 			build_ndistinct = true;
298 			requested_type = true;
299 		}
300 		else if (strcmp(type, "dependencies") == 0)
301 		{
302 			build_dependencies = true;
303 			requested_type = true;
304 		}
305 		else if (strcmp(type, "mcv") == 0)
306 		{
307 			build_mcv = true;
308 			requested_type = true;
309 		}
310 		else
311 			ereport(ERROR,
312 					(errcode(ERRCODE_SYNTAX_ERROR),
313 					 errmsg("unrecognized statistics kind \"%s\"",
314 							type)));
315 	}
316 	/* If no statistic type was specified, build them all. */
317 	if (!requested_type)
318 	{
319 		build_ndistinct = true;
320 		build_dependencies = true;
321 		build_mcv = true;
322 	}
323 
324 	/* construct the char array of enabled statistic types */
325 	ntypes = 0;
326 	if (build_ndistinct)
327 		types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
328 	if (build_dependencies)
329 		types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
330 	if (build_mcv)
331 		types[ntypes++] = CharGetDatum(STATS_EXT_MCV);
332 	Assert(ntypes > 0 && ntypes <= lengthof(types));
333 	stxkind = construct_array(types, ntypes, CHAROID, 1, true, TYPALIGN_CHAR);
334 
335 	statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
336 
337 	/*
338 	 * Everything seems fine, so let's build the pg_statistic_ext tuple.
339 	 */
340 	memset(values, 0, sizeof(values));
341 	memset(nulls, false, sizeof(nulls));
342 
343 	statoid = GetNewOidWithIndex(statrel, StatisticExtOidIndexId,
344 								 Anum_pg_statistic_ext_oid);
345 	values[Anum_pg_statistic_ext_oid - 1] = ObjectIdGetDatum(statoid);
346 	values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
347 	values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
348 	values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
349 	values[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(-1);
350 	values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
351 	values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
352 	values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
353 
354 	/* insert it into pg_statistic_ext */
355 	htup = heap_form_tuple(statrel->rd_att, values, nulls);
356 	CatalogTupleInsert(statrel, htup);
357 	heap_freetuple(htup);
358 
359 	relation_close(statrel, RowExclusiveLock);
360 
361 	/*
362 	 * Also build the pg_statistic_ext_data tuple, to hold the actual
363 	 * statistics data.
364 	 */
365 	datarel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
366 
367 	memset(datavalues, 0, sizeof(datavalues));
368 	memset(datanulls, false, sizeof(datanulls));
369 
370 	datavalues[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statoid);
371 
372 	/* no statistics built yet */
373 	datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
374 	datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
375 	datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
376 
377 	/* insert it into pg_statistic_ext_data */
378 	htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
379 	CatalogTupleInsert(datarel, htup);
380 	heap_freetuple(htup);
381 
382 	relation_close(datarel, RowExclusiveLock);
383 
384 	InvokeObjectPostCreateHook(StatisticExtRelationId, statoid, 0);
385 
386 	/*
387 	 * Invalidate relcache so that others see the new statistics object.
388 	 */
389 	CacheInvalidateRelcache(rel);
390 
391 	relation_close(rel, NoLock);
392 
393 	/*
394 	 * Add an AUTO dependency on each column used in the stats, so that the
395 	 * stats object goes away if any or all of them get dropped.
396 	 */
397 	ObjectAddressSet(myself, StatisticExtRelationId, statoid);
398 
399 	for (i = 0; i < numcols; i++)
400 	{
401 		ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
402 		recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
403 	}
404 
405 	/*
406 	 * Also add dependencies on namespace and owner.  These are required
407 	 * because the stats object might have a different namespace and/or owner
408 	 * than the underlying table(s).
409 	 */
410 	ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
411 	recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
412 
413 	recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
414 
415 	/*
416 	 * XXX probably there should be a recordDependencyOnCurrentExtension call
417 	 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
418 	 * STATISTICS, which is more work than it seems worth.
419 	 */
420 
421 	/* Add any requested comment */
422 	if (stmt->stxcomment != NULL)
423 		CreateComments(statoid, StatisticExtRelationId, 0,
424 					   stmt->stxcomment);
425 
426 	/* Return stats object's address */
427 	return myself;
428 }
429 
430 /*
431  *		ALTER STATISTICS
432  */
433 ObjectAddress
AlterStatistics(AlterStatsStmt * stmt)434 AlterStatistics(AlterStatsStmt *stmt)
435 {
436 	Relation	rel;
437 	Oid			stxoid;
438 	HeapTuple	oldtup;
439 	HeapTuple	newtup;
440 	Datum		repl_val[Natts_pg_statistic_ext];
441 	bool		repl_null[Natts_pg_statistic_ext];
442 	bool		repl_repl[Natts_pg_statistic_ext];
443 	ObjectAddress address;
444 	int			newtarget = stmt->stxstattarget;
445 
446 	/* Limit statistics target to a sane range */
447 	if (newtarget < -1)
448 	{
449 		ereport(ERROR,
450 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
451 				 errmsg("statistics target %d is too low",
452 						newtarget)));
453 	}
454 	else if (newtarget > 10000)
455 	{
456 		newtarget = 10000;
457 		ereport(WARNING,
458 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
459 				 errmsg("lowering statistics target to %d",
460 						newtarget)));
461 	}
462 
463 	/* lookup OID of the statistics object */
464 	stxoid = get_statistics_object_oid(stmt->defnames, stmt->missing_ok);
465 
466 	/*
467 	 * If we got here and the OID is not valid, it means the statistics does
468 	 * not exist, but the command specified IF EXISTS. So report this as a
469 	 * simple NOTICE and we're done.
470 	 */
471 	if (!OidIsValid(stxoid))
472 	{
473 		char	   *schemaname;
474 		char	   *statname;
475 
476 		Assert(stmt->missing_ok);
477 
478 		DeconstructQualifiedName(stmt->defnames, &schemaname, &statname);
479 
480 		if (schemaname)
481 			ereport(NOTICE,
482 					(errmsg("statistics object \"%s.%s\" does not exist, skipping",
483 							schemaname, statname)));
484 		else
485 			ereport(NOTICE,
486 					(errmsg("statistics object \"%s\" does not exist, skipping",
487 							statname)));
488 
489 		return InvalidObjectAddress;
490 	}
491 
492 	/* Search pg_statistic_ext */
493 	rel = table_open(StatisticExtRelationId, RowExclusiveLock);
494 
495 	oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxoid));
496 
497 	/* Must be owner of the existing statistics object */
498 	if (!pg_statistics_object_ownercheck(stxoid, GetUserId()))
499 		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_STATISTIC_EXT,
500 					   NameListToString(stmt->defnames));
501 
502 	/* Build new tuple. */
503 	memset(repl_val, 0, sizeof(repl_val));
504 	memset(repl_null, false, sizeof(repl_null));
505 	memset(repl_repl, false, sizeof(repl_repl));
506 
507 	/* replace the stxstattarget column */
508 	repl_repl[Anum_pg_statistic_ext_stxstattarget - 1] = true;
509 	repl_val[Anum_pg_statistic_ext_stxstattarget - 1] = Int32GetDatum(newtarget);
510 
511 	newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
512 							   repl_val, repl_null, repl_repl);
513 
514 	/* Update system catalog. */
515 	CatalogTupleUpdate(rel, &newtup->t_self, newtup);
516 
517 	InvokeObjectPostAlterHook(StatisticExtRelationId, stxoid, 0);
518 
519 	ObjectAddressSet(address, StatisticExtRelationId, stxoid);
520 
521 	/*
522 	 * NOTE: because we only support altering the statistics target, not the
523 	 * other fields, there is no need to update dependencies.
524 	 */
525 
526 	heap_freetuple(newtup);
527 	ReleaseSysCache(oldtup);
528 
529 	table_close(rel, RowExclusiveLock);
530 
531 	return address;
532 }
533 
534 /*
535  * Guts of statistics object deletion.
536  */
537 void
RemoveStatisticsById(Oid statsOid)538 RemoveStatisticsById(Oid statsOid)
539 {
540 	Relation	relation;
541 	HeapTuple	tup;
542 	Form_pg_statistic_ext statext;
543 	Oid			relid;
544 
545 	/*
546 	 * First delete the pg_statistic_ext_data tuple holding the actual
547 	 * statistical data.
548 	 */
549 	relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
550 
551 	tup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
552 
553 	if (!HeapTupleIsValid(tup)) /* should not happen */
554 		elog(ERROR, "cache lookup failed for statistics data %u", statsOid);
555 
556 	CatalogTupleDelete(relation, &tup->t_self);
557 
558 	ReleaseSysCache(tup);
559 
560 	table_close(relation, RowExclusiveLock);
561 
562 	/*
563 	 * Delete the pg_statistic_ext tuple.  Also send out a cache inval on the
564 	 * associated table, so that dependent plans will be rebuilt.
565 	 */
566 	relation = table_open(StatisticExtRelationId, RowExclusiveLock);
567 
568 	tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
569 
570 	if (!HeapTupleIsValid(tup)) /* should not happen */
571 		elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
572 
573 	statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
574 	relid = statext->stxrelid;
575 
576 	CacheInvalidateRelcacheByRelid(relid);
577 
578 	CatalogTupleDelete(relation, &tup->t_self);
579 
580 	ReleaseSysCache(tup);
581 
582 	table_close(relation, RowExclusiveLock);
583 }
584 
585 /*
586  * Update a statistics object for ALTER COLUMN TYPE on a source column.
587  *
588  * This could throw an error if the type change can't be supported.
589  * If it can be supported, but the stats must be recomputed, a likely choice
590  * would be to set the relevant column(s) of the pg_statistic_ext_data tuple
591  * to null until the next ANALYZE.  (Note that the type change hasn't actually
592  * happened yet, so one option that's *not* on the table is to recompute
593  * immediately.)
594  *
595  * For both ndistinct and functional-dependencies stats, the on-disk
596  * representation is independent of the source column data types, and it is
597  * plausible to assume that the old statistic values will still be good for
598  * the new column contents.  (Obviously, if the ALTER COLUMN TYPE has a USING
599  * expression that substantially alters the semantic meaning of the column
600  * values, this assumption could fail.  But that seems like a corner case
601  * that doesn't justify zapping the stats in common cases.)
602  *
603  * For MCV lists that's not the case, as those statistics store the datums
604  * internally. In this case we simply reset the statistics value to NULL.
605  *
606  * Note that "type change" includes collation change, which means we can rely
607  * on the MCV list being consistent with the collation info in pg_attribute
608  * during estimation.
609  */
610 void
UpdateStatisticsForTypeChange(Oid statsOid,Oid relationOid,int attnum,Oid oldColumnType,Oid newColumnType)611 UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
612 							  Oid oldColumnType, Oid newColumnType)
613 {
614 	HeapTuple	stup,
615 				oldtup;
616 
617 	Relation	rel;
618 
619 	Datum		values[Natts_pg_statistic_ext_data];
620 	bool		nulls[Natts_pg_statistic_ext_data];
621 	bool		replaces[Natts_pg_statistic_ext_data];
622 
623 	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
624 	if (!HeapTupleIsValid(oldtup))
625 		elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
626 
627 	/*
628 	 * When none of the defined statistics types contain datum values from the
629 	 * table's columns then there's no need to reset the stats. Functional
630 	 * dependencies and ndistinct stats should still hold true.
631 	 */
632 	if (!statext_is_kind_built(oldtup, STATS_EXT_MCV))
633 	{
634 		ReleaseSysCache(oldtup);
635 		return;
636 	}
637 
638 	/*
639 	 * OK, we need to reset some statistics. So let's build the new tuple,
640 	 * replacing the affected statistics types with NULL.
641 	 */
642 	memset(nulls, 0, Natts_pg_statistic_ext_data * sizeof(bool));
643 	memset(replaces, 0, Natts_pg_statistic_ext_data * sizeof(bool));
644 	memset(values, 0, Natts_pg_statistic_ext_data * sizeof(Datum));
645 
646 	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
647 	nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
648 
649 	rel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
650 
651 	/* replace the old tuple */
652 	stup = heap_modify_tuple(oldtup,
653 							 RelationGetDescr(rel),
654 							 values,
655 							 nulls,
656 							 replaces);
657 
658 	ReleaseSysCache(oldtup);
659 	CatalogTupleUpdate(rel, &stup->t_self, stup);
660 
661 	heap_freetuple(stup);
662 
663 	table_close(rel, RowExclusiveLock);
664 }
665 
666 /*
667  * Select a nonconflicting name for a new statistics.
668  *
669  * name1, name2, and label are used the same way as for makeObjectName(),
670  * except that the label can't be NULL; digits will be appended to the label
671  * if needed to create a name that is unique within the specified namespace.
672  *
673  * Returns a palloc'd string.
674  *
675  * Note: it is theoretically possible to get a collision anyway, if someone
676  * else chooses the same name concurrently.  This is fairly unlikely to be
677  * a problem in practice, especially if one is holding a share update
678  * exclusive lock on the relation identified by name1.  However, if choosing
679  * multiple names within a single command, you'd better create the new object
680  * and do CommandCounterIncrement before choosing the next one!
681  */
682 static char *
ChooseExtendedStatisticName(const char * name1,const char * name2,const char * label,Oid namespaceid)683 ChooseExtendedStatisticName(const char *name1, const char *name2,
684 							const char *label, Oid namespaceid)
685 {
686 	int			pass = 0;
687 	char	   *stxname = NULL;
688 	char		modlabel[NAMEDATALEN];
689 
690 	/* try the unmodified label first */
691 	StrNCpy(modlabel, label, sizeof(modlabel));
692 
693 	for (;;)
694 	{
695 		Oid			existingstats;
696 
697 		stxname = makeObjectName(name1, name2, modlabel);
698 
699 		existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
700 										PointerGetDatum(stxname),
701 										ObjectIdGetDatum(namespaceid));
702 		if (!OidIsValid(existingstats))
703 			break;
704 
705 		/* found a conflict, so try a new name component */
706 		pfree(stxname);
707 		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
708 	}
709 
710 	return stxname;
711 }
712 
713 /*
714  * Generate "name2" for a new statistics given the list of column names for it
715  * This will be passed to ChooseExtendedStatisticName along with the parent
716  * table name and a suitable label.
717  *
718  * We know that less than NAMEDATALEN characters will actually be used,
719  * so we can truncate the result once we've generated that many.
720  *
721  * XXX see also ChooseForeignKeyConstraintNameAddition and
722  * ChooseIndexNameAddition.
723  */
724 static char *
ChooseExtendedStatisticNameAddition(List * exprs)725 ChooseExtendedStatisticNameAddition(List *exprs)
726 {
727 	char		buf[NAMEDATALEN * 2];
728 	int			buflen = 0;
729 	ListCell   *lc;
730 
731 	buf[0] = '\0';
732 	foreach(lc, exprs)
733 	{
734 		ColumnRef  *cref = (ColumnRef *) lfirst(lc);
735 		const char *name;
736 
737 		/* It should be one of these, but just skip if it happens not to be */
738 		if (!IsA(cref, ColumnRef))
739 			continue;
740 
741 		name = strVal((Value *) linitial(cref->fields));
742 
743 		if (buflen > 0)
744 			buf[buflen++] = '_';	/* insert _ between names */
745 
746 		/*
747 		 * At this point we have buflen <= NAMEDATALEN.  name should be less
748 		 * than NAMEDATALEN already, but use strlcpy for paranoia.
749 		 */
750 		strlcpy(buf + buflen, name, NAMEDATALEN);
751 		buflen += strlen(buf + buflen);
752 		if (buflen >= NAMEDATALEN)
753 			break;
754 	}
755 	return pstrdup(buf);
756 }
757