1 /*-------------------------------------------------------------------------
2 *
3 * analyze.c
4 * the Postgres statistics generator
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/commands/analyze.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "access/multixact.h"
20 #include "access/transam.h"
21 #include "access/tupconvert.h"
22 #include "access/tuptoaster.h"
23 #include "access/visibilitymap.h"
24 #include "access/xact.h"
25 #include "catalog/catalog.h"
26 #include "catalog/index.h"
27 #include "catalog/indexing.h"
28 #include "catalog/pg_collation.h"
29 #include "catalog/pg_inherits_fn.h"
30 #include "catalog/pg_namespace.h"
31 #include "commands/dbcommands.h"
32 #include "commands/tablecmds.h"
33 #include "commands/vacuum.h"
34 #include "executor/executor.h"
35 #include "foreign/fdwapi.h"
36 #include "miscadmin.h"
37 #include "nodes/nodeFuncs.h"
38 #include "parser/parse_oper.h"
39 #include "parser/parse_relation.h"
40 #include "pgstat.h"
41 #include "postmaster/autovacuum.h"
42 #include "storage/bufmgr.h"
43 #include "storage/lmgr.h"
44 #include "storage/proc.h"
45 #include "storage/procarray.h"
46 #include "utils/acl.h"
47 #include "utils/attoptcache.h"
48 #include "utils/datum.h"
49 #include "utils/guc.h"
50 #include "utils/lsyscache.h"
51 #include "utils/memutils.h"
52 #include "utils/pg_rusage.h"
53 #include "utils/sampling.h"
54 #include "utils/sortsupport.h"
55 #include "utils/syscache.h"
56 #include "utils/timestamp.h"
57 #include "utils/tqual.h"
58
59
60 /* Per-index data for ANALYZE */
61 typedef struct AnlIndexData
62 {
63 IndexInfo *indexInfo; /* BuildIndexInfo result */
64 double tupleFract; /* fraction of rows for partial index */
65 VacAttrStats **vacattrstats; /* index attrs to analyze */
66 int attr_cnt;
67 } AnlIndexData;
68
69
70 /* Default statistics target (GUC parameter) */
71 int default_statistics_target = 100;
72
73 /* A few variables that don't seem worth passing around as parameters */
74 static MemoryContext anl_context = NULL;
75 static BufferAccessStrategy vac_strategy;
76
77
78 static void do_analyze_rel(Relation onerel, int options,
79 VacuumParams *params, List *va_cols,
80 AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
81 bool inh, bool in_outer_xact, int elevel);
82 static void compute_index_stats(Relation onerel, double totalrows,
83 AnlIndexData *indexdata, int nindexes,
84 HeapTuple *rows, int numrows,
85 MemoryContext col_context);
86 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
87 Node *index_expr);
88 static int acquire_sample_rows(Relation onerel, int elevel,
89 HeapTuple *rows, int targrows,
90 double *totalrows, double *totaldeadrows);
91 static int compare_rows(const void *a, const void *b);
92 static int acquire_inherited_sample_rows(Relation onerel, int elevel,
93 HeapTuple *rows, int targrows,
94 double *totalrows, double *totaldeadrows);
95 static void update_attstats(Oid relid, bool inh,
96 int natts, VacAttrStats **vacattrstats);
97 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
98 static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
99
100
101 /*
102 * analyze_rel() -- analyze one relation
103 */
104 void
analyze_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params,List * va_cols,bool in_outer_xact,BufferAccessStrategy bstrategy)105 analyze_rel(Oid relid, RangeVar *relation, int options,
106 VacuumParams *params, List *va_cols, bool in_outer_xact,
107 BufferAccessStrategy bstrategy)
108 {
109 Relation onerel;
110 int elevel;
111 AcquireSampleRowsFunc acquirefunc = NULL;
112 BlockNumber relpages = 0;
113
114 /* Select logging level */
115 if (options & VACOPT_VERBOSE)
116 elevel = INFO;
117 else
118 elevel = DEBUG2;
119
120 /* Set up static variables */
121 vac_strategy = bstrategy;
122
123 /*
124 * Check for user-requested abort.
125 */
126 CHECK_FOR_INTERRUPTS();
127
128 /*
129 * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
130 * ANALYZEs don't run on it concurrently. (This also locks out a
131 * concurrent VACUUM, which doesn't matter much at the moment but might
132 * matter if we ever try to accumulate stats on dead tuples.) If the rel
133 * has been dropped since we last saw it, we don't need to process it.
134 */
135 if (!(options & VACOPT_NOWAIT))
136 onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
137 else if (ConditionalLockRelationOid(relid, ShareUpdateExclusiveLock))
138 onerel = try_relation_open(relid, NoLock);
139 else
140 {
141 onerel = NULL;
142 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
143 ereport(LOG,
144 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
145 errmsg("skipping analyze of \"%s\" --- lock not available",
146 relation->relname)));
147 }
148 if (!onerel)
149 return;
150
151 /*
152 * Check permissions --- this should match vacuum's check!
153 */
154 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
155 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
156 {
157 /* No need for a WARNING if we already complained during VACUUM */
158 if (!(options & VACOPT_VACUUM))
159 {
160 if (onerel->rd_rel->relisshared)
161 ereport(WARNING,
162 (errmsg("skipping \"%s\" --- only superuser can analyze it",
163 RelationGetRelationName(onerel))));
164 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
165 ereport(WARNING,
166 (errmsg("skipping \"%s\" --- only superuser or database owner can analyze it",
167 RelationGetRelationName(onerel))));
168 else
169 ereport(WARNING,
170 (errmsg("skipping \"%s\" --- only table or database owner can analyze it",
171 RelationGetRelationName(onerel))));
172 }
173 relation_close(onerel, ShareUpdateExclusiveLock);
174 return;
175 }
176
177 /*
178 * Silently ignore tables that are temp tables of other backends ---
179 * trying to analyze these is rather pointless, since their contents are
180 * probably not up-to-date on disk. (We don't throw a warning here; it
181 * would just lead to chatter during a database-wide ANALYZE.)
182 */
183 if (RELATION_IS_OTHER_TEMP(onerel))
184 {
185 relation_close(onerel, ShareUpdateExclusiveLock);
186 return;
187 }
188
189 /*
190 * We can ANALYZE any table except pg_statistic. See update_attstats
191 */
192 if (RelationGetRelid(onerel) == StatisticRelationId)
193 {
194 relation_close(onerel, ShareUpdateExclusiveLock);
195 return;
196 }
197
198 /*
199 * Check that it's a plain table, materialized view, or foreign table; we
200 * used to do this in get_rel_oids() but seems safer to check after we've
201 * locked the relation.
202 */
203 if (onerel->rd_rel->relkind == RELKIND_RELATION ||
204 onerel->rd_rel->relkind == RELKIND_MATVIEW)
205 {
206 /* Regular table, so we'll use the regular row acquisition function */
207 acquirefunc = acquire_sample_rows;
208 /* Also get regular table's size */
209 relpages = RelationGetNumberOfBlocks(onerel);
210 }
211 else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
212 {
213 /*
214 * For a foreign table, call the FDW's hook function to see whether it
215 * supports analysis.
216 */
217 FdwRoutine *fdwroutine;
218 bool ok = false;
219
220 fdwroutine = GetFdwRoutineForRelation(onerel, false);
221
222 if (fdwroutine->AnalyzeForeignTable != NULL)
223 ok = fdwroutine->AnalyzeForeignTable(onerel,
224 &acquirefunc,
225 &relpages);
226
227 if (!ok)
228 {
229 ereport(WARNING,
230 (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
231 RelationGetRelationName(onerel))));
232 relation_close(onerel, ShareUpdateExclusiveLock);
233 return;
234 }
235 }
236 else
237 {
238 /* No need for a WARNING if we already complained during VACUUM */
239 if (!(options & VACOPT_VACUUM))
240 ereport(WARNING,
241 (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
242 RelationGetRelationName(onerel))));
243 relation_close(onerel, ShareUpdateExclusiveLock);
244 return;
245 }
246
247 /*
248 * OK, let's do it. First let other backends know I'm in ANALYZE.
249 */
250 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
251 MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
252 LWLockRelease(ProcArrayLock);
253
254 /*
255 * Do the normal non-recursive ANALYZE.
256 */
257 do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages,
258 false, in_outer_xact, elevel);
259
260 /*
261 * If there are child tables, do recursive ANALYZE.
262 */
263 if (onerel->rd_rel->relhassubclass)
264 do_analyze_rel(onerel, options, params, va_cols, acquirefunc, relpages,
265 true, in_outer_xact, elevel);
266
267 /*
268 * Close source relation now, but keep lock so that no one deletes it
269 * before we commit. (If someone did, they'd fail to clean up the entries
270 * we made in pg_statistic. Also, releasing the lock before commit would
271 * expose us to concurrent-update failures in update_attstats.)
272 */
273 relation_close(onerel, NoLock);
274
275 /*
276 * Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel,
277 * because the vacuum flag is cleared by the end-of-xact code.
278 */
279 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
280 MyPgXact->vacuumFlags &= ~PROC_IN_ANALYZE;
281 LWLockRelease(ProcArrayLock);
282 }
283
284 /*
285 * do_analyze_rel() -- analyze one relation, recursively or not
286 *
287 * Note that "acquirefunc" is only relevant for the non-inherited case.
288 * For the inherited case, acquire_inherited_sample_rows() determines the
289 * appropriate acquirefunc for each child table.
290 */
291 static void
do_analyze_rel(Relation onerel,int options,VacuumParams * params,List * va_cols,AcquireSampleRowsFunc acquirefunc,BlockNumber relpages,bool inh,bool in_outer_xact,int elevel)292 do_analyze_rel(Relation onerel, int options, VacuumParams *params,
293 List *va_cols, AcquireSampleRowsFunc acquirefunc,
294 BlockNumber relpages, bool inh, bool in_outer_xact,
295 int elevel)
296 {
297 int attr_cnt,
298 tcnt,
299 i,
300 ind;
301 Relation *Irel;
302 int nindexes;
303 bool hasindex;
304 VacAttrStats **vacattrstats;
305 AnlIndexData *indexdata;
306 int targrows,
307 numrows;
308 double totalrows,
309 totaldeadrows;
310 HeapTuple *rows;
311 PGRUsage ru0;
312 TimestampTz starttime = 0;
313 MemoryContext caller_context;
314 Oid save_userid;
315 int save_sec_context;
316 int save_nestlevel;
317
318 if (inh)
319 ereport(elevel,
320 (errmsg("analyzing \"%s.%s\" inheritance tree",
321 get_namespace_name(RelationGetNamespace(onerel)),
322 RelationGetRelationName(onerel))));
323 else
324 ereport(elevel,
325 (errmsg("analyzing \"%s.%s\"",
326 get_namespace_name(RelationGetNamespace(onerel)),
327 RelationGetRelationName(onerel))));
328
329 /*
330 * Set up a working context so that we can easily free whatever junk gets
331 * created.
332 */
333 anl_context = AllocSetContextCreate(CurrentMemoryContext,
334 "Analyze",
335 ALLOCSET_DEFAULT_SIZES);
336 caller_context = MemoryContextSwitchTo(anl_context);
337
338 /*
339 * Switch to the table owner's userid, so that any index functions are run
340 * as that user. Also lock down security-restricted operations and
341 * arrange to make GUC variable changes local to this command.
342 */
343 GetUserIdAndSecContext(&save_userid, &save_sec_context);
344 SetUserIdAndSecContext(onerel->rd_rel->relowner,
345 save_sec_context | SECURITY_RESTRICTED_OPERATION);
346 save_nestlevel = NewGUCNestLevel();
347
348 /* measure elapsed time iff autovacuum logging requires it */
349 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
350 {
351 pg_rusage_init(&ru0);
352 if (params->log_min_duration > 0)
353 starttime = GetCurrentTimestamp();
354 }
355
356 /*
357 * Determine which columns to analyze
358 *
359 * Note that system attributes are never analyzed, so we just reject them
360 * at the lookup stage. We also reject duplicate column mentions. (We
361 * could alternatively ignore duplicates, but analyzing a column twice
362 * won't work; we'd end up making a conflicting update in pg_statistic.)
363 */
364 if (va_cols != NIL)
365 {
366 Bitmapset *unique_cols = NULL;
367 ListCell *le;
368
369 vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
370 sizeof(VacAttrStats *));
371 tcnt = 0;
372 foreach(le, va_cols)
373 {
374 char *col = strVal(lfirst(le));
375
376 i = attnameAttNum(onerel, col, false);
377 if (i == InvalidAttrNumber)
378 ereport(ERROR,
379 (errcode(ERRCODE_UNDEFINED_COLUMN),
380 errmsg("column \"%s\" of relation \"%s\" does not exist",
381 col, RelationGetRelationName(onerel))));
382 if (bms_is_member(i, unique_cols))
383 ereport(ERROR,
384 (errcode(ERRCODE_DUPLICATE_COLUMN),
385 errmsg("column \"%s\" of relation \"%s\" appears more than once",
386 col, RelationGetRelationName(onerel))));
387 unique_cols = bms_add_member(unique_cols, i);
388
389 vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
390 if (vacattrstats[tcnt] != NULL)
391 tcnt++;
392 }
393 attr_cnt = tcnt;
394 }
395 else
396 {
397 attr_cnt = onerel->rd_att->natts;
398 vacattrstats = (VacAttrStats **)
399 palloc(attr_cnt * sizeof(VacAttrStats *));
400 tcnt = 0;
401 for (i = 1; i <= attr_cnt; i++)
402 {
403 vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
404 if (vacattrstats[tcnt] != NULL)
405 tcnt++;
406 }
407 attr_cnt = tcnt;
408 }
409
410 /*
411 * Open all indexes of the relation, and see if there are any analyzable
412 * columns in the indexes. We do not analyze index columns if there was
413 * an explicit column list in the ANALYZE command, however. If we are
414 * doing a recursive scan, we don't want to touch the parent's indexes at
415 * all.
416 */
417 if (!inh)
418 vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
419 else
420 {
421 Irel = NULL;
422 nindexes = 0;
423 }
424 hasindex = (nindexes > 0);
425 indexdata = NULL;
426 if (hasindex)
427 {
428 indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
429 for (ind = 0; ind < nindexes; ind++)
430 {
431 AnlIndexData *thisdata = &indexdata[ind];
432 IndexInfo *indexInfo;
433
434 thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
435 thisdata->tupleFract = 1.0; /* fix later if partial */
436 if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
437 {
438 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
439
440 thisdata->vacattrstats = (VacAttrStats **)
441 palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
442 tcnt = 0;
443 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
444 {
445 int keycol = indexInfo->ii_KeyAttrNumbers[i];
446
447 if (keycol == 0)
448 {
449 /* Found an index expression */
450 Node *indexkey;
451
452 if (indexpr_item == NULL) /* shouldn't happen */
453 elog(ERROR, "too few entries in indexprs list");
454 indexkey = (Node *) lfirst(indexpr_item);
455 indexpr_item = lnext(indexpr_item);
456 thisdata->vacattrstats[tcnt] =
457 examine_attribute(Irel[ind], i + 1, indexkey);
458 if (thisdata->vacattrstats[tcnt] != NULL)
459 tcnt++;
460 }
461 }
462 thisdata->attr_cnt = tcnt;
463 }
464 }
465 }
466
467 /*
468 * Determine how many rows we need to sample, using the worst case from
469 * all analyzable columns. We use a lower bound of 100 rows to avoid
470 * possible overflow in Vitter's algorithm. (Note: that will also be the
471 * target in the corner case where there are no analyzable columns.)
472 */
473 targrows = 100;
474 for (i = 0; i < attr_cnt; i++)
475 {
476 if (targrows < vacattrstats[i]->minrows)
477 targrows = vacattrstats[i]->minrows;
478 }
479 for (ind = 0; ind < nindexes; ind++)
480 {
481 AnlIndexData *thisdata = &indexdata[ind];
482
483 for (i = 0; i < thisdata->attr_cnt; i++)
484 {
485 if (targrows < thisdata->vacattrstats[i]->minrows)
486 targrows = thisdata->vacattrstats[i]->minrows;
487 }
488 }
489
490 /*
491 * Acquire the sample rows
492 */
493 rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
494 if (inh)
495 numrows = acquire_inherited_sample_rows(onerel, elevel,
496 rows, targrows,
497 &totalrows, &totaldeadrows);
498 else
499 numrows = (*acquirefunc) (onerel, elevel,
500 rows, targrows,
501 &totalrows, &totaldeadrows);
502
503 /*
504 * Compute the statistics. Temporary results during the calculations for
505 * each column are stored in a child context. The calc routines are
506 * responsible to make sure that whatever they store into the VacAttrStats
507 * structure is allocated in anl_context.
508 */
509 if (numrows > 0)
510 {
511 MemoryContext col_context,
512 old_context;
513
514 col_context = AllocSetContextCreate(anl_context,
515 "Analyze Column",
516 ALLOCSET_DEFAULT_SIZES);
517 old_context = MemoryContextSwitchTo(col_context);
518
519 for (i = 0; i < attr_cnt; i++)
520 {
521 VacAttrStats *stats = vacattrstats[i];
522 AttributeOpts *aopt;
523
524 stats->rows = rows;
525 stats->tupDesc = onerel->rd_att;
526 (*stats->compute_stats) (stats,
527 std_fetch_func,
528 numrows,
529 totalrows);
530
531 /*
532 * If the appropriate flavor of the n_distinct option is
533 * specified, override with the corresponding value.
534 */
535 aopt = get_attribute_options(onerel->rd_id, stats->attr->attnum);
536 if (aopt != NULL)
537 {
538 float8 n_distinct;
539
540 n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
541 if (n_distinct != 0.0)
542 stats->stadistinct = n_distinct;
543 }
544
545 MemoryContextResetAndDeleteChildren(col_context);
546 }
547
548 if (hasindex)
549 compute_index_stats(onerel, totalrows,
550 indexdata, nindexes,
551 rows, numrows,
552 col_context);
553
554 MemoryContextSwitchTo(old_context);
555 MemoryContextDelete(col_context);
556
557 /*
558 * Emit the completed stats rows into pg_statistic, replacing any
559 * previous statistics for the target columns. (If there are stats in
560 * pg_statistic for columns we didn't process, we leave them alone.)
561 */
562 update_attstats(RelationGetRelid(onerel), inh,
563 attr_cnt, vacattrstats);
564
565 for (ind = 0; ind < nindexes; ind++)
566 {
567 AnlIndexData *thisdata = &indexdata[ind];
568
569 update_attstats(RelationGetRelid(Irel[ind]), false,
570 thisdata->attr_cnt, thisdata->vacattrstats);
571 }
572 }
573
574 /*
575 * Update pages/tuples stats in pg_class ... but not if we're doing
576 * inherited stats.
577 */
578 if (!inh)
579 {
580 BlockNumber relallvisible;
581
582 visibilitymap_count(onerel, &relallvisible, NULL);
583
584 vac_update_relstats(onerel,
585 relpages,
586 totalrows,
587 relallvisible,
588 hasindex,
589 InvalidTransactionId,
590 InvalidMultiXactId,
591 in_outer_xact);
592 }
593
594 /*
595 * Same for indexes. Vacuum always scans all indexes, so if we're part of
596 * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
597 * VACUUM.
598 */
599 if (!inh && !(options & VACOPT_VACUUM))
600 {
601 for (ind = 0; ind < nindexes; ind++)
602 {
603 AnlIndexData *thisdata = &indexdata[ind];
604 double totalindexrows;
605
606 totalindexrows = ceil(thisdata->tupleFract * totalrows);
607 vac_update_relstats(Irel[ind],
608 RelationGetNumberOfBlocks(Irel[ind]),
609 totalindexrows,
610 0,
611 false,
612 InvalidTransactionId,
613 InvalidMultiXactId,
614 in_outer_xact);
615 }
616 }
617
618 /*
619 * Report ANALYZE to the stats collector, too. However, if doing
620 * inherited stats we shouldn't report, because the stats collector only
621 * tracks per-table stats. Reset the changes_since_analyze counter only
622 * if we analyzed all columns; otherwise, there is still work for
623 * auto-analyze to do.
624 */
625 if (!inh)
626 pgstat_report_analyze(onerel, totalrows, totaldeadrows,
627 (va_cols == NIL));
628
629 /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
630 if (!(options & VACOPT_VACUUM))
631 {
632 for (ind = 0; ind < nindexes; ind++)
633 {
634 IndexBulkDeleteResult *stats;
635 IndexVacuumInfo ivinfo;
636
637 ivinfo.index = Irel[ind];
638 ivinfo.analyze_only = true;
639 ivinfo.estimated_count = true;
640 ivinfo.message_level = elevel;
641 ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
642 ivinfo.strategy = vac_strategy;
643
644 stats = index_vacuum_cleanup(&ivinfo, NULL);
645
646 if (stats)
647 pfree(stats);
648 }
649 }
650
651 /* Done with indexes */
652 vac_close_indexes(nindexes, Irel, NoLock);
653
654 /* Log the action if appropriate */
655 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
656 {
657 if (params->log_min_duration == 0 ||
658 TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
659 params->log_min_duration))
660 ereport(LOG,
661 (errmsg("automatic analyze of table \"%s.%s.%s\" system usage: %s",
662 get_database_name(MyDatabaseId),
663 get_namespace_name(RelationGetNamespace(onerel)),
664 RelationGetRelationName(onerel),
665 pg_rusage_show(&ru0))));
666 }
667
668 /* Roll back any GUC changes executed by index functions */
669 AtEOXact_GUC(false, save_nestlevel);
670
671 /* Restore userid and security context */
672 SetUserIdAndSecContext(save_userid, save_sec_context);
673
674 /* Restore current context and release memory */
675 MemoryContextSwitchTo(caller_context);
676 MemoryContextDelete(anl_context);
677 anl_context = NULL;
678 }
679
680 /*
681 * Compute statistics about indexes of a relation
682 */
683 static void
compute_index_stats(Relation onerel,double totalrows,AnlIndexData * indexdata,int nindexes,HeapTuple * rows,int numrows,MemoryContext col_context)684 compute_index_stats(Relation onerel, double totalrows,
685 AnlIndexData *indexdata, int nindexes,
686 HeapTuple *rows, int numrows,
687 MemoryContext col_context)
688 {
689 MemoryContext ind_context,
690 old_context;
691 Datum values[INDEX_MAX_KEYS];
692 bool isnull[INDEX_MAX_KEYS];
693 int ind,
694 i;
695
696 ind_context = AllocSetContextCreate(anl_context,
697 "Analyze Index",
698 ALLOCSET_DEFAULT_SIZES);
699 old_context = MemoryContextSwitchTo(ind_context);
700
701 for (ind = 0; ind < nindexes; ind++)
702 {
703 AnlIndexData *thisdata = &indexdata[ind];
704 IndexInfo *indexInfo = thisdata->indexInfo;
705 int attr_cnt = thisdata->attr_cnt;
706 TupleTableSlot *slot;
707 EState *estate;
708 ExprContext *econtext;
709 List *predicate;
710 Datum *exprvals;
711 bool *exprnulls;
712 int numindexrows,
713 tcnt,
714 rowno;
715 double totalindexrows;
716
717 /* Ignore index if no columns to analyze and not partial */
718 if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
719 continue;
720
721 /*
722 * Need an EState for evaluation of index expressions and
723 * partial-index predicates. Create it in the per-index context to be
724 * sure it gets cleaned up at the bottom of the loop.
725 */
726 estate = CreateExecutorState();
727 econtext = GetPerTupleExprContext(estate);
728 /* Need a slot to hold the current heap tuple, too */
729 slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel));
730
731 /* Arrange for econtext's scan tuple to be the tuple under test */
732 econtext->ecxt_scantuple = slot;
733
734 /* Set up execution state for predicate. */
735 predicate = (List *)
736 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
737 estate);
738
739 /* Compute and save index expression values */
740 exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
741 exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
742 numindexrows = 0;
743 tcnt = 0;
744 for (rowno = 0; rowno < numrows; rowno++)
745 {
746 HeapTuple heapTuple = rows[rowno];
747
748 vacuum_delay_point();
749
750 /*
751 * Reset the per-tuple context each time, to reclaim any cruft
752 * left behind by evaluating the predicate or index expressions.
753 */
754 ResetExprContext(econtext);
755
756 /* Set up for predicate or expression evaluation */
757 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
758
759 /* If index is partial, check predicate */
760 if (predicate != NIL)
761 {
762 if (!ExecQual(predicate, econtext, false))
763 continue;
764 }
765 numindexrows++;
766
767 if (attr_cnt > 0)
768 {
769 /*
770 * Evaluate the index row to compute expression values. We
771 * could do this by hand, but FormIndexDatum is convenient.
772 */
773 FormIndexDatum(indexInfo,
774 slot,
775 estate,
776 values,
777 isnull);
778
779 /*
780 * Save just the columns we care about. We copy the values
781 * into ind_context from the estate's per-tuple context.
782 */
783 for (i = 0; i < attr_cnt; i++)
784 {
785 VacAttrStats *stats = thisdata->vacattrstats[i];
786 int attnum = stats->attr->attnum;
787
788 if (isnull[attnum - 1])
789 {
790 exprvals[tcnt] = (Datum) 0;
791 exprnulls[tcnt] = true;
792 }
793 else
794 {
795 exprvals[tcnt] = datumCopy(values[attnum - 1],
796 stats->attrtype->typbyval,
797 stats->attrtype->typlen);
798 exprnulls[tcnt] = false;
799 }
800 tcnt++;
801 }
802 }
803 }
804
805 /*
806 * Having counted the number of rows that pass the predicate in the
807 * sample, we can estimate the total number of rows in the index.
808 */
809 thisdata->tupleFract = (double) numindexrows / (double) numrows;
810 totalindexrows = ceil(thisdata->tupleFract * totalrows);
811
812 /*
813 * Now we can compute the statistics for the expression columns.
814 */
815 if (numindexrows > 0)
816 {
817 MemoryContextSwitchTo(col_context);
818 for (i = 0; i < attr_cnt; i++)
819 {
820 VacAttrStats *stats = thisdata->vacattrstats[i];
821 AttributeOpts *aopt =
822 get_attribute_options(stats->attr->attrelid,
823 stats->attr->attnum);
824
825 stats->exprvals = exprvals + i;
826 stats->exprnulls = exprnulls + i;
827 stats->rowstride = attr_cnt;
828 (*stats->compute_stats) (stats,
829 ind_fetch_func,
830 numindexrows,
831 totalindexrows);
832
833 /*
834 * If the n_distinct option is specified, it overrides the
835 * above computation. For indices, we always use just
836 * n_distinct, not n_distinct_inherited.
837 */
838 if (aopt != NULL && aopt->n_distinct != 0.0)
839 stats->stadistinct = aopt->n_distinct;
840
841 MemoryContextResetAndDeleteChildren(col_context);
842 }
843 }
844
845 /* And clean up */
846 MemoryContextSwitchTo(ind_context);
847
848 ExecDropSingleTupleTableSlot(slot);
849 FreeExecutorState(estate);
850 MemoryContextResetAndDeleteChildren(ind_context);
851 }
852
853 MemoryContextSwitchTo(old_context);
854 MemoryContextDelete(ind_context);
855 }
856
857 /*
858 * examine_attribute -- pre-analysis of a single column
859 *
860 * Determine whether the column is analyzable; if so, create and initialize
861 * a VacAttrStats struct for it. If not, return NULL.
862 *
863 * If index_expr isn't NULL, then we're trying to analyze an expression index,
864 * and index_expr is the expression tree representing the column's data.
865 */
866 static VacAttrStats *
examine_attribute(Relation onerel,int attnum,Node * index_expr)867 examine_attribute(Relation onerel, int attnum, Node *index_expr)
868 {
869 Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
870 HeapTuple typtuple;
871 VacAttrStats *stats;
872 int i;
873 bool ok;
874
875 /* Never analyze dropped columns */
876 if (attr->attisdropped)
877 return NULL;
878
879 /* Don't analyze column if user has specified not to */
880 if (attr->attstattarget == 0)
881 return NULL;
882
883 /*
884 * Create the VacAttrStats struct. Note that we only have a copy of the
885 * fixed fields of the pg_attribute tuple.
886 */
887 stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
888 stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE);
889 memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE);
890
891 /*
892 * When analyzing an expression index, believe the expression tree's type
893 * not the column datatype --- the latter might be the opckeytype storage
894 * type of the opclass, which is not interesting for our purposes. (Note:
895 * if we did anything with non-expression index columns, we'd need to
896 * figure out where to get the correct type info from, but for now that's
897 * not a problem.) It's not clear whether anyone will care about the
898 * typmod, but we store that too just in case.
899 */
900 if (index_expr)
901 {
902 stats->attrtypid = exprType(index_expr);
903 stats->attrtypmod = exprTypmod(index_expr);
904 }
905 else
906 {
907 stats->attrtypid = attr->atttypid;
908 stats->attrtypmod = attr->atttypmod;
909 }
910
911 typtuple = SearchSysCacheCopy1(TYPEOID,
912 ObjectIdGetDatum(stats->attrtypid));
913 if (!HeapTupleIsValid(typtuple))
914 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
915 stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
916 stats->anl_context = anl_context;
917 stats->tupattnum = attnum;
918
919 /*
920 * The fields describing the stats->stavalues[n] element types default to
921 * the type of the data being analyzed, but the type-specific typanalyze
922 * function can change them if it wants to store something else.
923 */
924 for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
925 {
926 stats->statypid[i] = stats->attrtypid;
927 stats->statyplen[i] = stats->attrtype->typlen;
928 stats->statypbyval[i] = stats->attrtype->typbyval;
929 stats->statypalign[i] = stats->attrtype->typalign;
930 }
931
932 /*
933 * Call the type-specific typanalyze function. If none is specified, use
934 * std_typanalyze().
935 */
936 if (OidIsValid(stats->attrtype->typanalyze))
937 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
938 PointerGetDatum(stats)));
939 else
940 ok = std_typanalyze(stats);
941
942 if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
943 {
944 heap_freetuple(typtuple);
945 pfree(stats->attr);
946 pfree(stats);
947 return NULL;
948 }
949
950 return stats;
951 }
952
953 /*
954 * acquire_sample_rows -- acquire a random sample of rows from the table
955 *
956 * Selected rows are returned in the caller-allocated array rows[], which
957 * must have at least targrows entries.
958 * The actual number of rows selected is returned as the function result.
959 * We also estimate the total numbers of live and dead rows in the table,
960 * and return them into *totalrows and *totaldeadrows, respectively.
961 *
962 * The returned list of tuples is in order by physical position in the table.
963 * (We will rely on this later to derive correlation estimates.)
964 *
965 * As of May 2004 we use a new two-stage method: Stage one selects up
966 * to targrows random blocks (or all blocks, if there aren't so many).
967 * Stage two scans these blocks and uses the Vitter algorithm to create
968 * a random sample of targrows rows (or less, if there are less in the
969 * sample of blocks). The two stages are executed simultaneously: each
970 * block is processed as soon as stage one returns its number and while
971 * the rows are read stage two controls which ones are to be inserted
972 * into the sample.
973 *
974 * Although every row has an equal chance of ending up in the final
975 * sample, this sampling method is not perfect: not every possible
976 * sample has an equal chance of being selected. For large relations
977 * the number of different blocks represented by the sample tends to be
978 * too small. We can live with that for now. Improvements are welcome.
979 *
980 * An important property of this sampling method is that because we do
981 * look at a statistically unbiased set of blocks, we should get
982 * unbiased estimates of the average numbers of live and dead rows per
983 * block. The previous sampling method put too much credence in the row
984 * density near the start of the table.
985 */
986 static int
acquire_sample_rows(Relation onerel,int elevel,HeapTuple * rows,int targrows,double * totalrows,double * totaldeadrows)987 acquire_sample_rows(Relation onerel, int elevel,
988 HeapTuple *rows, int targrows,
989 double *totalrows, double *totaldeadrows)
990 {
991 int numrows = 0; /* # rows now in reservoir */
992 double samplerows = 0; /* total # rows collected */
993 double liverows = 0; /* # live rows seen */
994 double deadrows = 0; /* # dead rows seen */
995 double rowstoskip = -1; /* -1 means not set yet */
996 BlockNumber totalblocks;
997 TransactionId OldestXmin;
998 BlockSamplerData bs;
999 ReservoirStateData rstate;
1000
1001 Assert(targrows > 0);
1002
1003 totalblocks = RelationGetNumberOfBlocks(onerel);
1004
1005 /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
1006 OldestXmin = GetOldestXmin(onerel, true);
1007
1008 /* Prepare for sampling block numbers */
1009 BlockSampler_Init(&bs, totalblocks, targrows, random());
1010 /* Prepare for sampling rows */
1011 reservoir_init_selection_state(&rstate, targrows);
1012
1013 /* Outer loop over blocks to sample */
1014 while (BlockSampler_HasMore(&bs))
1015 {
1016 BlockNumber targblock = BlockSampler_Next(&bs);
1017 Buffer targbuffer;
1018 Page targpage;
1019 OffsetNumber targoffset,
1020 maxoffset;
1021
1022 vacuum_delay_point();
1023
1024 /*
1025 * We must maintain a pin on the target page's buffer to ensure that
1026 * the maxoffset value stays good (else concurrent VACUUM might delete
1027 * tuples out from under us). Hence, pin the page until we are done
1028 * looking at it. We also choose to hold sharelock on the buffer
1029 * throughout --- we could release and re-acquire sharelock for each
1030 * tuple, but since we aren't doing much work per tuple, the extra
1031 * lock traffic is probably better avoided.
1032 */
1033 targbuffer = ReadBufferExtended(onerel, MAIN_FORKNUM, targblock,
1034 RBM_NORMAL, vac_strategy);
1035 LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
1036 targpage = BufferGetPage(targbuffer);
1037 maxoffset = PageGetMaxOffsetNumber(targpage);
1038
1039 /* Inner loop over all tuples on the selected page */
1040 for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)
1041 {
1042 ItemId itemid;
1043 HeapTupleData targtuple;
1044 bool sample_it = false;
1045
1046 itemid = PageGetItemId(targpage, targoffset);
1047
1048 /*
1049 * We ignore unused and redirect line pointers. DEAD line
1050 * pointers should be counted as dead, because we need vacuum to
1051 * run to get rid of them. Note that this rule agrees with the
1052 * way that heap_page_prune() counts things.
1053 */
1054 if (!ItemIdIsNormal(itemid))
1055 {
1056 if (ItemIdIsDead(itemid))
1057 deadrows += 1;
1058 continue;
1059 }
1060
1061 ItemPointerSet(&targtuple.t_self, targblock, targoffset);
1062
1063 targtuple.t_tableOid = RelationGetRelid(onerel);
1064 targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1065 targtuple.t_len = ItemIdGetLength(itemid);
1066
1067 switch (HeapTupleSatisfiesVacuum(&targtuple,
1068 OldestXmin,
1069 targbuffer))
1070 {
1071 case HEAPTUPLE_LIVE:
1072 sample_it = true;
1073 liverows += 1;
1074 break;
1075
1076 case HEAPTUPLE_DEAD:
1077 case HEAPTUPLE_RECENTLY_DEAD:
1078 /* Count dead and recently-dead rows */
1079 deadrows += 1;
1080 break;
1081
1082 case HEAPTUPLE_INSERT_IN_PROGRESS:
1083
1084 /*
1085 * Insert-in-progress rows are not counted. We assume
1086 * that when the inserting transaction commits or aborts,
1087 * it will send a stats message to increment the proper
1088 * count. This works right only if that transaction ends
1089 * after we finish analyzing the table; if things happen
1090 * in the other order, its stats update will be
1091 * overwritten by ours. However, the error will be large
1092 * only if the other transaction runs long enough to
1093 * insert many tuples, so assuming it will finish after us
1094 * is the safer option.
1095 *
1096 * A special case is that the inserting transaction might
1097 * be our own. In this case we should count and sample
1098 * the row, to accommodate users who load a table and
1099 * analyze it in one transaction. (pgstat_report_analyze
1100 * has to adjust the numbers we send to the stats
1101 * collector to make this come out right.)
1102 */
1103 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data)))
1104 {
1105 sample_it = true;
1106 liverows += 1;
1107 }
1108 break;
1109
1110 case HEAPTUPLE_DELETE_IN_PROGRESS:
1111
1112 /*
1113 * We count and sample delete-in-progress rows the same as
1114 * live ones, so that the stats counters come out right if
1115 * the deleting transaction commits after us, per the same
1116 * reasoning given above.
1117 *
1118 * If the delete was done by our own transaction, however,
1119 * we must count the row as dead to make
1120 * pgstat_report_analyze's stats adjustments come out
1121 * right. (Note: this works out properly when the row was
1122 * both inserted and deleted in our xact.)
1123 *
1124 * The net effect of these choices is that we act as
1125 * though an IN_PROGRESS transaction hasn't happened yet,
1126 * except if it is our own transaction, which we assume
1127 * has happened.
1128 *
1129 * This approach ensures that we behave sanely if we see
1130 * both the pre-image and post-image rows for a row being
1131 * updated by a concurrent transaction: we will sample the
1132 * pre-image but not the post-image. We also get sane
1133 * results if the concurrent transaction never commits.
1134 */
1135 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
1136 deadrows += 1;
1137 else
1138 {
1139 sample_it = true;
1140 liverows += 1;
1141 }
1142 break;
1143
1144 default:
1145 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1146 break;
1147 }
1148
1149 if (sample_it)
1150 {
1151 /*
1152 * The first targrows sample rows are simply copied into the
1153 * reservoir. Then we start replacing tuples in the sample
1154 * until we reach the end of the relation. This algorithm is
1155 * from Jeff Vitter's paper (see full citation in
1156 * utils/misc/sampling.c). It works by repeatedly computing
1157 * the number of tuples to skip before selecting a tuple,
1158 * which replaces a randomly chosen element of the reservoir
1159 * (current set of tuples). At all times the reservoir is a
1160 * true random sample of the tuples we've passed over so far,
1161 * so when we fall off the end of the relation we're done.
1162 */
1163 if (numrows < targrows)
1164 rows[numrows++] = heap_copytuple(&targtuple);
1165 else
1166 {
1167 /*
1168 * t in Vitter's paper is the number of records already
1169 * processed. If we need to compute a new S value, we
1170 * must use the not-yet-incremented value of samplerows as
1171 * t.
1172 */
1173 if (rowstoskip < 0)
1174 rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
1175
1176 if (rowstoskip <= 0)
1177 {
1178 /*
1179 * Found a suitable tuple, so save it, replacing one
1180 * old tuple at random
1181 */
1182 int k = (int) (targrows * sampler_random_fract(rstate.randstate));
1183
1184 Assert(k >= 0 && k < targrows);
1185 heap_freetuple(rows[k]);
1186 rows[k] = heap_copytuple(&targtuple);
1187 }
1188
1189 rowstoskip -= 1;
1190 }
1191
1192 samplerows += 1;
1193 }
1194 }
1195
1196 /* Now release the lock and pin on the page */
1197 UnlockReleaseBuffer(targbuffer);
1198 }
1199
1200 /*
1201 * If we didn't find as many tuples as we wanted then we're done. No sort
1202 * is needed, since they're already in order.
1203 *
1204 * Otherwise we need to sort the collected tuples by position
1205 * (itempointer). It's not worth worrying about corner cases where the
1206 * tuples are already sorted.
1207 */
1208 if (numrows == targrows)
1209 qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
1210
1211 /*
1212 * Estimate total numbers of live and dead rows in relation, extrapolating
1213 * on the assumption that the average tuple density in pages we didn't
1214 * scan is the same as in the pages we did scan. Since what we scanned is
1215 * a random sample of the pages in the relation, this should be a good
1216 * assumption.
1217 */
1218 if (bs.m > 0)
1219 {
1220 *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
1221 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
1222 }
1223 else
1224 {
1225 *totalrows = 0.0;
1226 *totaldeadrows = 0.0;
1227 }
1228
1229 /*
1230 * Emit some interesting relation info
1231 */
1232 ereport(elevel,
1233 (errmsg("\"%s\": scanned %d of %u pages, "
1234 "containing %.0f live rows and %.0f dead rows; "
1235 "%d rows in sample, %.0f estimated total rows",
1236 RelationGetRelationName(onerel),
1237 bs.m, totalblocks,
1238 liverows, deadrows,
1239 numrows, *totalrows)));
1240
1241 return numrows;
1242 }
1243
1244 /*
1245 * qsort comparator for sorting rows[] array
1246 */
1247 static int
compare_rows(const void * a,const void * b)1248 compare_rows(const void *a, const void *b)
1249 {
1250 HeapTuple ha = *(const HeapTuple *) a;
1251 HeapTuple hb = *(const HeapTuple *) b;
1252 BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
1253 OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
1254 BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
1255 OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
1256
1257 if (ba < bb)
1258 return -1;
1259 if (ba > bb)
1260 return 1;
1261 if (oa < ob)
1262 return -1;
1263 if (oa > ob)
1264 return 1;
1265 return 0;
1266 }
1267
1268
1269 /*
1270 * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
1271 *
1272 * This has the same API as acquire_sample_rows, except that rows are
1273 * collected from all inheritance children as well as the specified table.
1274 * We fail and return zero if there are no inheritance children, or if all
1275 * children are foreign tables that don't support ANALYZE.
1276 */
1277 static int
acquire_inherited_sample_rows(Relation onerel,int elevel,HeapTuple * rows,int targrows,double * totalrows,double * totaldeadrows)1278 acquire_inherited_sample_rows(Relation onerel, int elevel,
1279 HeapTuple *rows, int targrows,
1280 double *totalrows, double *totaldeadrows)
1281 {
1282 List *tableOIDs;
1283 Relation *rels;
1284 AcquireSampleRowsFunc *acquirefuncs;
1285 double *relblocks;
1286 double totalblocks;
1287 int numrows,
1288 nrels,
1289 i;
1290 ListCell *lc;
1291
1292 /*
1293 * Find all members of inheritance set. We only need AccessShareLock on
1294 * the children.
1295 */
1296 tableOIDs =
1297 find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
1298
1299 /*
1300 * Check that there's at least one descendant, else fail. This could
1301 * happen despite analyze_rel's relhassubclass check, if table once had a
1302 * child but no longer does. In that case, we can clear the
1303 * relhassubclass field so as not to make the same mistake again later.
1304 * (This is safe because we hold ShareUpdateExclusiveLock.)
1305 */
1306 if (list_length(tableOIDs) < 2)
1307 {
1308 /* CCI because we already updated the pg_class row in this command */
1309 CommandCounterIncrement();
1310 SetRelationHasSubclass(RelationGetRelid(onerel), false);
1311 ereport(elevel,
1312 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
1313 get_namespace_name(RelationGetNamespace(onerel)),
1314 RelationGetRelationName(onerel))));
1315 return 0;
1316 }
1317
1318 /*
1319 * Identify acquirefuncs to use, and count blocks in all the relations.
1320 * The result could overflow BlockNumber, so we use double arithmetic.
1321 */
1322 rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
1323 acquirefuncs = (AcquireSampleRowsFunc *)
1324 palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
1325 relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
1326 totalblocks = 0;
1327 nrels = 0;
1328 foreach(lc, tableOIDs)
1329 {
1330 Oid childOID = lfirst_oid(lc);
1331 Relation childrel;
1332 AcquireSampleRowsFunc acquirefunc = NULL;
1333 BlockNumber relpages = 0;
1334
1335 /* We already got the needed lock */
1336 childrel = heap_open(childOID, NoLock);
1337
1338 /* Ignore if temp table of another backend */
1339 if (RELATION_IS_OTHER_TEMP(childrel))
1340 {
1341 /* ... but release the lock on it */
1342 Assert(childrel != onerel);
1343 heap_close(childrel, AccessShareLock);
1344 continue;
1345 }
1346
1347 /* Check table type (MATVIEW can't happen, but might as well allow) */
1348 if (childrel->rd_rel->relkind == RELKIND_RELATION ||
1349 childrel->rd_rel->relkind == RELKIND_MATVIEW)
1350 {
1351 /* Regular table, so use the regular row acquisition function */
1352 acquirefunc = acquire_sample_rows;
1353 relpages = RelationGetNumberOfBlocks(childrel);
1354 }
1355 else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1356 {
1357 /*
1358 * For a foreign table, call the FDW's hook function to see
1359 * whether it supports analysis.
1360 */
1361 FdwRoutine *fdwroutine;
1362 bool ok = false;
1363
1364 fdwroutine = GetFdwRoutineForRelation(childrel, false);
1365
1366 if (fdwroutine->AnalyzeForeignTable != NULL)
1367 ok = fdwroutine->AnalyzeForeignTable(childrel,
1368 &acquirefunc,
1369 &relpages);
1370
1371 if (!ok)
1372 {
1373 /* ignore, but release the lock on it */
1374 Assert(childrel != onerel);
1375 heap_close(childrel, AccessShareLock);
1376 continue;
1377 }
1378 }
1379 else
1380 {
1381 /* ignore, but release the lock on it */
1382 Assert(childrel != onerel);
1383 heap_close(childrel, AccessShareLock);
1384 continue;
1385 }
1386
1387 /* OK, we'll process this child */
1388 rels[nrels] = childrel;
1389 acquirefuncs[nrels] = acquirefunc;
1390 relblocks[nrels] = (double) relpages;
1391 totalblocks += (double) relpages;
1392 nrels++;
1393 }
1394
1395 /*
1396 * If we don't have at least two tables to consider, fail.
1397 */
1398 if (nrels < 2)
1399 {
1400 ereport(elevel,
1401 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
1402 get_namespace_name(RelationGetNamespace(onerel)),
1403 RelationGetRelationName(onerel))));
1404 return 0;
1405 }
1406
1407 /*
1408 * Now sample rows from each relation, proportionally to its fraction of
1409 * the total block count. (This might be less than desirable if the child
1410 * rels have radically different free-space percentages, but it's not
1411 * clear that it's worth working harder.)
1412 */
1413 numrows = 0;
1414 *totalrows = 0;
1415 *totaldeadrows = 0;
1416 for (i = 0; i < nrels; i++)
1417 {
1418 Relation childrel = rels[i];
1419 AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
1420 double childblocks = relblocks[i];
1421
1422 if (childblocks > 0)
1423 {
1424 int childtargrows;
1425
1426 childtargrows = (int) rint(targrows * childblocks / totalblocks);
1427 /* Make sure we don't overrun due to roundoff error */
1428 childtargrows = Min(childtargrows, targrows - numrows);
1429 if (childtargrows > 0)
1430 {
1431 int childrows;
1432 double trows,
1433 tdrows;
1434
1435 /* Fetch a random sample of the child's rows */
1436 childrows = (*acquirefunc) (childrel, elevel,
1437 rows + numrows, childtargrows,
1438 &trows, &tdrows);
1439
1440 /* We may need to convert from child's rowtype to parent's */
1441 if (childrows > 0 &&
1442 !equalTupleDescs(RelationGetDescr(childrel),
1443 RelationGetDescr(onerel)))
1444 {
1445 TupleConversionMap *map;
1446
1447 map = convert_tuples_by_name(RelationGetDescr(childrel),
1448 RelationGetDescr(onerel),
1449 gettext_noop("could not convert row type"));
1450 if (map != NULL)
1451 {
1452 int j;
1453
1454 for (j = 0; j < childrows; j++)
1455 {
1456 HeapTuple newtup;
1457
1458 newtup = do_convert_tuple(rows[numrows + j], map);
1459 heap_freetuple(rows[numrows + j]);
1460 rows[numrows + j] = newtup;
1461 }
1462 free_conversion_map(map);
1463 }
1464 }
1465
1466 /* And add to counts */
1467 numrows += childrows;
1468 *totalrows += trows;
1469 *totaldeadrows += tdrows;
1470 }
1471 }
1472
1473 /*
1474 * Note: we cannot release the child-table locks, since we may have
1475 * pointers to their TOAST tables in the sampled rows.
1476 */
1477 heap_close(childrel, NoLock);
1478 }
1479
1480 return numrows;
1481 }
1482
1483
1484 /*
1485 * update_attstats() -- update attribute statistics for one relation
1486 *
1487 * Statistics are stored in several places: the pg_class row for the
1488 * relation has stats about the whole relation, and there is a
1489 * pg_statistic row for each (non-system) attribute that has ever
1490 * been analyzed. The pg_class values are updated by VACUUM, not here.
1491 *
1492 * pg_statistic rows are just added or updated normally. This means
1493 * that pg_statistic will probably contain some deleted rows at the
1494 * completion of a vacuum cycle, unless it happens to get vacuumed last.
1495 *
1496 * To keep things simple, we punt for pg_statistic, and don't try
1497 * to compute or store rows for pg_statistic itself in pg_statistic.
1498 * This could possibly be made to work, but it's not worth the trouble.
1499 * Note analyze_rel() has seen to it that we won't come here when
1500 * vacuuming pg_statistic itself.
1501 *
1502 * Note: there would be a race condition here if two backends could
1503 * ANALYZE the same table concurrently. Presently, we lock that out
1504 * by taking a self-exclusive lock on the relation in analyze_rel().
1505 */
1506 static void
update_attstats(Oid relid,bool inh,int natts,VacAttrStats ** vacattrstats)1507 update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
1508 {
1509 Relation sd;
1510 int attno;
1511
1512 if (natts <= 0)
1513 return; /* nothing to do */
1514
1515 sd = heap_open(StatisticRelationId, RowExclusiveLock);
1516
1517 for (attno = 0; attno < natts; attno++)
1518 {
1519 VacAttrStats *stats = vacattrstats[attno];
1520 HeapTuple stup,
1521 oldtup;
1522 int i,
1523 k,
1524 n;
1525 Datum values[Natts_pg_statistic];
1526 bool nulls[Natts_pg_statistic];
1527 bool replaces[Natts_pg_statistic];
1528
1529 /* Ignore attr if we weren't able to collect stats */
1530 if (!stats->stats_valid)
1531 continue;
1532
1533 /*
1534 * Construct a new pg_statistic tuple
1535 */
1536 for (i = 0; i < Natts_pg_statistic; ++i)
1537 {
1538 nulls[i] = false;
1539 replaces[i] = true;
1540 }
1541
1542 values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
1543 values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->attr->attnum);
1544 values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
1545 values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
1546 values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
1547 values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
1548 i = Anum_pg_statistic_stakind1 - 1;
1549 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1550 {
1551 values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
1552 }
1553 i = Anum_pg_statistic_staop1 - 1;
1554 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1555 {
1556 values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
1557 }
1558 i = Anum_pg_statistic_stanumbers1 - 1;
1559 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1560 {
1561 int nnum = stats->numnumbers[k];
1562
1563 if (nnum > 0)
1564 {
1565 Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
1566 ArrayType *arry;
1567
1568 for (n = 0; n < nnum; n++)
1569 numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
1570 /* XXX knows more than it should about type float4: */
1571 arry = construct_array(numdatums, nnum,
1572 FLOAT4OID,
1573 sizeof(float4), FLOAT4PASSBYVAL, 'i');
1574 values[i++] = PointerGetDatum(arry); /* stanumbersN */
1575 }
1576 else
1577 {
1578 nulls[i] = true;
1579 values[i++] = (Datum) 0;
1580 }
1581 }
1582 i = Anum_pg_statistic_stavalues1 - 1;
1583 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
1584 {
1585 if (stats->numvalues[k] > 0)
1586 {
1587 ArrayType *arry;
1588
1589 arry = construct_array(stats->stavalues[k],
1590 stats->numvalues[k],
1591 stats->statypid[k],
1592 stats->statyplen[k],
1593 stats->statypbyval[k],
1594 stats->statypalign[k]);
1595 values[i++] = PointerGetDatum(arry); /* stavaluesN */
1596 }
1597 else
1598 {
1599 nulls[i] = true;
1600 values[i++] = (Datum) 0;
1601 }
1602 }
1603
1604 /* Is there already a pg_statistic tuple for this attribute? */
1605 oldtup = SearchSysCache3(STATRELATTINH,
1606 ObjectIdGetDatum(relid),
1607 Int16GetDatum(stats->attr->attnum),
1608 BoolGetDatum(inh));
1609
1610 if (HeapTupleIsValid(oldtup))
1611 {
1612 /* Yes, replace it */
1613 stup = heap_modify_tuple(oldtup,
1614 RelationGetDescr(sd),
1615 values,
1616 nulls,
1617 replaces);
1618 ReleaseSysCache(oldtup);
1619 simple_heap_update(sd, &stup->t_self, stup);
1620 }
1621 else
1622 {
1623 /* No, insert new tuple */
1624 stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
1625 simple_heap_insert(sd, stup);
1626 }
1627
1628 /* update indexes too */
1629 CatalogUpdateIndexes(sd, stup);
1630
1631 heap_freetuple(stup);
1632 }
1633
1634 heap_close(sd, RowExclusiveLock);
1635 }
1636
1637 /*
1638 * Standard fetch function for use by compute_stats subroutines.
1639 *
1640 * This exists to provide some insulation between compute_stats routines
1641 * and the actual storage of the sample data.
1642 */
1643 static Datum
std_fetch_func(VacAttrStatsP stats,int rownum,bool * isNull)1644 std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1645 {
1646 int attnum = stats->tupattnum;
1647 HeapTuple tuple = stats->rows[rownum];
1648 TupleDesc tupDesc = stats->tupDesc;
1649
1650 return heap_getattr(tuple, attnum, tupDesc, isNull);
1651 }
1652
1653 /*
1654 * Fetch function for analyzing index expressions.
1655 *
1656 * We have not bothered to construct index tuples, instead the data is
1657 * just in Datum arrays.
1658 */
1659 static Datum
ind_fetch_func(VacAttrStatsP stats,int rownum,bool * isNull)1660 ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
1661 {
1662 int i;
1663
1664 /* exprvals and exprnulls are already offset for proper column */
1665 i = rownum * stats->rowstride;
1666 *isNull = stats->exprnulls[i];
1667 return stats->exprvals[i];
1668 }
1669
1670
1671 /*==========================================================================
1672 *
1673 * Code below this point represents the "standard" type-specific statistics
1674 * analysis algorithms. This code can be replaced on a per-data-type basis
1675 * by setting a nonzero value in pg_type.typanalyze.
1676 *
1677 *==========================================================================
1678 */
1679
1680
1681 /*
1682 * To avoid consuming too much memory during analysis and/or too much space
1683 * in the resulting pg_statistic rows, we ignore varlena datums that are wider
1684 * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
1685 * and distinct-value calculations since a wide value is unlikely to be
1686 * duplicated at all, much less be a most-common value. For the same reason,
1687 * ignoring wide values will not affect our estimates of histogram bin
1688 * boundaries very much.
1689 */
1690 #define WIDTH_THRESHOLD 1024
1691
1692 #define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1693 #define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
1694
1695 /*
1696 * Extra information used by the default analysis routines
1697 */
1698 typedef struct
1699 {
1700 Oid eqopr; /* '=' operator for datatype, if any */
1701 Oid eqfunc; /* and associated function */
1702 Oid ltopr; /* '<' operator for datatype, if any */
1703 } StdAnalyzeData;
1704
1705 typedef struct
1706 {
1707 Datum value; /* a data value */
1708 int tupno; /* position index for tuple it came from */
1709 } ScalarItem;
1710
1711 typedef struct
1712 {
1713 int count; /* # of duplicates */
1714 int first; /* values[] index of first occurrence */
1715 } ScalarMCVItem;
1716
1717 typedef struct
1718 {
1719 SortSupport ssup;
1720 int *tupnoLink;
1721 } CompareScalarsContext;
1722
1723
1724 static void compute_trivial_stats(VacAttrStatsP stats,
1725 AnalyzeAttrFetchFunc fetchfunc,
1726 int samplerows,
1727 double totalrows);
1728 static void compute_distinct_stats(VacAttrStatsP stats,
1729 AnalyzeAttrFetchFunc fetchfunc,
1730 int samplerows,
1731 double totalrows);
1732 static void compute_scalar_stats(VacAttrStatsP stats,
1733 AnalyzeAttrFetchFunc fetchfunc,
1734 int samplerows,
1735 double totalrows);
1736 static int compare_scalars(const void *a, const void *b, void *arg);
1737 static int compare_mcvs(const void *a, const void *b);
1738
1739
1740 /*
1741 * std_typanalyze -- the default type-specific typanalyze function
1742 */
1743 bool
std_typanalyze(VacAttrStats * stats)1744 std_typanalyze(VacAttrStats *stats)
1745 {
1746 Form_pg_attribute attr = stats->attr;
1747 Oid ltopr;
1748 Oid eqopr;
1749 StdAnalyzeData *mystats;
1750
1751 /* If the attstattarget column is negative, use the default value */
1752 /* NB: it is okay to scribble on stats->attr since it's a copy */
1753 if (attr->attstattarget < 0)
1754 attr->attstattarget = default_statistics_target;
1755
1756 /* Look for default "<" and "=" operators for column's type */
1757 get_sort_group_operators(stats->attrtypid,
1758 false, false, false,
1759 <opr, &eqopr, NULL,
1760 NULL);
1761
1762 /* Save the operator info for compute_stats routines */
1763 mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
1764 mystats->eqopr = eqopr;
1765 mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
1766 mystats->ltopr = ltopr;
1767 stats->extra_data = mystats;
1768
1769 /*
1770 * Determine which standard statistics algorithm to use
1771 */
1772 if (OidIsValid(eqopr) && OidIsValid(ltopr))
1773 {
1774 /* Seems to be a scalar datatype */
1775 stats->compute_stats = compute_scalar_stats;
1776 /*--------------------
1777 * The following choice of minrows is based on the paper
1778 * "Random sampling for histogram construction: how much is enough?"
1779 * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
1780 * Proceedings of ACM SIGMOD International Conference on Management
1781 * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
1782 * says that for table size n, histogram size k, maximum relative
1783 * error in bin size f, and error probability gamma, the minimum
1784 * random sample size is
1785 * r = 4 * k * ln(2*n/gamma) / f^2
1786 * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
1787 * r = 305.82 * k
1788 * Note that because of the log function, the dependence on n is
1789 * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
1790 * bin size error with probability 0.99. So there's no real need to
1791 * scale for n, which is a good thing because we don't necessarily
1792 * know it at this point.
1793 *--------------------
1794 */
1795 stats->minrows = 300 * attr->attstattarget;
1796 }
1797 else if (OidIsValid(eqopr))
1798 {
1799 /* We can still recognize distinct values */
1800 stats->compute_stats = compute_distinct_stats;
1801 /* Might as well use the same minrows as above */
1802 stats->minrows = 300 * attr->attstattarget;
1803 }
1804 else
1805 {
1806 /* Can't do much but the trivial stuff */
1807 stats->compute_stats = compute_trivial_stats;
1808 /* Might as well use the same minrows as above */
1809 stats->minrows = 300 * attr->attstattarget;
1810 }
1811
1812 return true;
1813 }
1814
1815
1816 /*
1817 * compute_trivial_stats() -- compute very basic column statistics
1818 *
1819 * We use this when we cannot find a hash "=" operator for the datatype.
1820 *
1821 * We determine the fraction of non-null rows and the average datum width.
1822 */
1823 static void
compute_trivial_stats(VacAttrStatsP stats,AnalyzeAttrFetchFunc fetchfunc,int samplerows,double totalrows)1824 compute_trivial_stats(VacAttrStatsP stats,
1825 AnalyzeAttrFetchFunc fetchfunc,
1826 int samplerows,
1827 double totalrows)
1828 {
1829 int i;
1830 int null_cnt = 0;
1831 int nonnull_cnt = 0;
1832 double total_width = 0;
1833 bool is_varlena = (!stats->attrtype->typbyval &&
1834 stats->attrtype->typlen == -1);
1835 bool is_varwidth = (!stats->attrtype->typbyval &&
1836 stats->attrtype->typlen < 0);
1837
1838 for (i = 0; i < samplerows; i++)
1839 {
1840 Datum value;
1841 bool isnull;
1842
1843 vacuum_delay_point();
1844
1845 value = fetchfunc(stats, i, &isnull);
1846
1847 /* Check for null/nonnull */
1848 if (isnull)
1849 {
1850 null_cnt++;
1851 continue;
1852 }
1853 nonnull_cnt++;
1854
1855 /*
1856 * If it's a variable-width field, add up widths for average width
1857 * calculation. Note that if the value is toasted, we use the toasted
1858 * width. We don't bother with this calculation if it's a fixed-width
1859 * type.
1860 */
1861 if (is_varlena)
1862 {
1863 total_width += VARSIZE_ANY(DatumGetPointer(value));
1864 }
1865 else if (is_varwidth)
1866 {
1867 /* must be cstring */
1868 total_width += strlen(DatumGetCString(value)) + 1;
1869 }
1870 }
1871
1872 /* We can only compute average width if we found some non-null values. */
1873 if (nonnull_cnt > 0)
1874 {
1875 stats->stats_valid = true;
1876 /* Do the simple null-frac and width stats */
1877 stats->stanullfrac = (double) null_cnt / (double) samplerows;
1878 if (is_varwidth)
1879 stats->stawidth = total_width / (double) nonnull_cnt;
1880 else
1881 stats->stawidth = stats->attrtype->typlen;
1882 stats->stadistinct = 0.0; /* "unknown" */
1883 }
1884 else if (null_cnt > 0)
1885 {
1886 /* We found only nulls; assume the column is entirely null */
1887 stats->stats_valid = true;
1888 stats->stanullfrac = 1.0;
1889 if (is_varwidth)
1890 stats->stawidth = 0; /* "unknown" */
1891 else
1892 stats->stawidth = stats->attrtype->typlen;
1893 stats->stadistinct = 0.0; /* "unknown" */
1894 }
1895 }
1896
1897
1898 /*
1899 * compute_distinct_stats() -- compute column statistics including ndistinct
1900 *
1901 * We use this when we can find only an "=" operator for the datatype.
1902 *
1903 * We determine the fraction of non-null rows, the average width, the
1904 * most common values, and the (estimated) number of distinct values.
1905 *
1906 * The most common values are determined by brute force: we keep a list
1907 * of previously seen values, ordered by number of times seen, as we scan
1908 * the samples. A newly seen value is inserted just after the last
1909 * multiply-seen value, causing the bottommost (oldest) singly-seen value
1910 * to drop off the list. The accuracy of this method, and also its cost,
1911 * depend mainly on the length of the list we are willing to keep.
1912 */
1913 static void
compute_distinct_stats(VacAttrStatsP stats,AnalyzeAttrFetchFunc fetchfunc,int samplerows,double totalrows)1914 compute_distinct_stats(VacAttrStatsP stats,
1915 AnalyzeAttrFetchFunc fetchfunc,
1916 int samplerows,
1917 double totalrows)
1918 {
1919 int i;
1920 int null_cnt = 0;
1921 int nonnull_cnt = 0;
1922 int toowide_cnt = 0;
1923 double total_width = 0;
1924 bool is_varlena = (!stats->attrtype->typbyval &&
1925 stats->attrtype->typlen == -1);
1926 bool is_varwidth = (!stats->attrtype->typbyval &&
1927 stats->attrtype->typlen < 0);
1928 FmgrInfo f_cmpeq;
1929 typedef struct
1930 {
1931 Datum value;
1932 int count;
1933 } TrackItem;
1934 TrackItem *track;
1935 int track_cnt,
1936 track_max;
1937 int num_mcv = stats->attr->attstattarget;
1938 StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
1939
1940 /*
1941 * We track up to 2*n values for an n-element MCV list; but at least 10
1942 */
1943 track_max = 2 * num_mcv;
1944 if (track_max < 10)
1945 track_max = 10;
1946 track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
1947 track_cnt = 0;
1948
1949 fmgr_info(mystats->eqfunc, &f_cmpeq);
1950
1951 for (i = 0; i < samplerows; i++)
1952 {
1953 Datum value;
1954 bool isnull;
1955 bool match;
1956 int firstcount1,
1957 j;
1958
1959 vacuum_delay_point();
1960
1961 value = fetchfunc(stats, i, &isnull);
1962
1963 /* Check for null/nonnull */
1964 if (isnull)
1965 {
1966 null_cnt++;
1967 continue;
1968 }
1969 nonnull_cnt++;
1970
1971 /*
1972 * If it's a variable-width field, add up widths for average width
1973 * calculation. Note that if the value is toasted, we use the toasted
1974 * width. We don't bother with this calculation if it's a fixed-width
1975 * type.
1976 */
1977 if (is_varlena)
1978 {
1979 total_width += VARSIZE_ANY(DatumGetPointer(value));
1980
1981 /*
1982 * If the value is toasted, we want to detoast it just once to
1983 * avoid repeated detoastings and resultant excess memory usage
1984 * during the comparisons. Also, check to see if the value is
1985 * excessively wide, and if so don't detoast at all --- just
1986 * ignore the value.
1987 */
1988 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
1989 {
1990 toowide_cnt++;
1991 continue;
1992 }
1993 value = PointerGetDatum(PG_DETOAST_DATUM(value));
1994 }
1995 else if (is_varwidth)
1996 {
1997 /* must be cstring */
1998 total_width += strlen(DatumGetCString(value)) + 1;
1999 }
2000
2001 /*
2002 * See if the value matches anything we're already tracking.
2003 */
2004 match = false;
2005 firstcount1 = track_cnt;
2006 for (j = 0; j < track_cnt; j++)
2007 {
2008 /* We always use the default collation for statistics */
2009 if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
2010 DEFAULT_COLLATION_OID,
2011 value, track[j].value)))
2012 {
2013 match = true;
2014 break;
2015 }
2016 if (j < firstcount1 && track[j].count == 1)
2017 firstcount1 = j;
2018 }
2019
2020 if (match)
2021 {
2022 /* Found a match */
2023 track[j].count++;
2024 /* This value may now need to "bubble up" in the track list */
2025 while (j > 0 && track[j].count > track[j - 1].count)
2026 {
2027 swapDatum(track[j].value, track[j - 1].value);
2028 swapInt(track[j].count, track[j - 1].count);
2029 j--;
2030 }
2031 }
2032 else
2033 {
2034 /* No match. Insert at head of count-1 list */
2035 if (track_cnt < track_max)
2036 track_cnt++;
2037 for (j = track_cnt - 1; j > firstcount1; j--)
2038 {
2039 track[j].value = track[j - 1].value;
2040 track[j].count = track[j - 1].count;
2041 }
2042 if (firstcount1 < track_cnt)
2043 {
2044 track[firstcount1].value = value;
2045 track[firstcount1].count = 1;
2046 }
2047 }
2048 }
2049
2050 /* We can only compute real stats if we found some non-null values. */
2051 if (nonnull_cnt > 0)
2052 {
2053 int nmultiple,
2054 summultiple;
2055
2056 stats->stats_valid = true;
2057 /* Do the simple null-frac and width stats */
2058 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2059 if (is_varwidth)
2060 stats->stawidth = total_width / (double) nonnull_cnt;
2061 else
2062 stats->stawidth = stats->attrtype->typlen;
2063
2064 /* Count the number of values we found multiple times */
2065 summultiple = 0;
2066 for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
2067 {
2068 if (track[nmultiple].count == 1)
2069 break;
2070 summultiple += track[nmultiple].count;
2071 }
2072
2073 if (nmultiple == 0)
2074 {
2075 /*
2076 * If we found no repeated non-null values, assume it's a unique
2077 * column; but be sure to discount for any nulls we found.
2078 */
2079 stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2080 }
2081 else if (track_cnt < track_max && toowide_cnt == 0 &&
2082 nmultiple == track_cnt)
2083 {
2084 /*
2085 * Our track list includes every value in the sample, and every
2086 * value appeared more than once. Assume the column has just
2087 * these values. (This case is meant to address columns with
2088 * small, fixed sets of possible values, such as boolean or enum
2089 * columns. If there are any values that appear just once in the
2090 * sample, including too-wide values, we should assume that that's
2091 * not what we're dealing with.)
2092 */
2093 stats->stadistinct = track_cnt;
2094 }
2095 else
2096 {
2097 /*----------
2098 * Estimate the number of distinct values using the estimator
2099 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2100 * n*d / (n - f1 + f1*n/N)
2101 * where f1 is the number of distinct values that occurred
2102 * exactly once in our sample of n rows (from a total of N),
2103 * and d is the total number of distinct values in the sample.
2104 * This is their Duj1 estimator; the other estimators they
2105 * recommend are considerably more complex, and are numerically
2106 * very unstable when n is much smaller than N.
2107 *
2108 * In this calculation, we consider only non-nulls. We used to
2109 * include rows with null values in the n and N counts, but that
2110 * leads to inaccurate answers in columns with many nulls, and
2111 * it's intuitively bogus anyway considering the desired result is
2112 * the number of distinct non-null values.
2113 *
2114 * We assume (not very reliably!) that all the multiply-occurring
2115 * values are reflected in the final track[] list, and the other
2116 * nonnull values all appeared but once. (XXX this usually
2117 * results in a drastic overestimate of ndistinct. Can we do
2118 * any better?)
2119 *----------
2120 */
2121 int f1 = nonnull_cnt - summultiple;
2122 int d = f1 + nmultiple;
2123 double n = samplerows - null_cnt;
2124 double N = totalrows * (1.0 - stats->stanullfrac);
2125 double stadistinct;
2126
2127 /* N == 0 shouldn't happen, but just in case ... */
2128 if (N > 0)
2129 stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2130 else
2131 stadistinct = 0;
2132
2133 /* Clamp to sane range in case of roundoff error */
2134 if (stadistinct < d)
2135 stadistinct = d;
2136 if (stadistinct > N)
2137 stadistinct = N;
2138 /* And round to integer */
2139 stats->stadistinct = floor(stadistinct + 0.5);
2140 }
2141
2142 /*
2143 * If we estimated the number of distinct values at more than 10% of
2144 * the total row count (a very arbitrary limit), then assume that
2145 * stadistinct should scale with the row count rather than be a fixed
2146 * value.
2147 */
2148 if (stats->stadistinct > 0.1 * totalrows)
2149 stats->stadistinct = -(stats->stadistinct / totalrows);
2150
2151 /*
2152 * Decide how many values are worth storing as most-common values. If
2153 * we are able to generate a complete MCV list (all the values in the
2154 * sample will fit, and we think these are all the ones in the table),
2155 * then do so. Otherwise, store only those values that are
2156 * significantly more common than the (estimated) average. We set the
2157 * threshold rather arbitrarily at 25% more than average, with at
2158 * least 2 instances in the sample.
2159 *
2160 * Note: the first of these cases is meant to address columns with
2161 * small, fixed sets of possible values, such as boolean or enum
2162 * columns. If we can *completely* represent the column population by
2163 * an MCV list that will fit into the stats target, then we should do
2164 * so and thus provide the planner with complete information. But if
2165 * the MCV list is not complete, it's generally worth being more
2166 * selective, and not just filling it all the way up to the stats
2167 * target. So for an incomplete list, we try to take only MCVs that
2168 * are significantly more common than average.
2169 */
2170 if (track_cnt < track_max && toowide_cnt == 0 &&
2171 stats->stadistinct > 0 &&
2172 track_cnt <= num_mcv)
2173 {
2174 /* Track list includes all values seen, and all will fit */
2175 num_mcv = track_cnt;
2176 }
2177 else
2178 {
2179 double ndistinct_table = stats->stadistinct;
2180 double avgcount,
2181 mincount;
2182
2183 /* Re-extract estimate of # distinct nonnull values in table */
2184 if (ndistinct_table < 0)
2185 ndistinct_table = -ndistinct_table * totalrows;
2186 /* estimate # occurrences in sample of a typical nonnull value */
2187 avgcount = (double) nonnull_cnt / ndistinct_table;
2188 /* set minimum threshold count to store a value */
2189 mincount = avgcount * 1.25;
2190 if (mincount < 2)
2191 mincount = 2;
2192 if (num_mcv > track_cnt)
2193 num_mcv = track_cnt;
2194 for (i = 0; i < num_mcv; i++)
2195 {
2196 if (track[i].count < mincount)
2197 {
2198 num_mcv = i;
2199 break;
2200 }
2201 }
2202 }
2203
2204 /* Generate MCV slot entry */
2205 if (num_mcv > 0)
2206 {
2207 MemoryContext old_context;
2208 Datum *mcv_values;
2209 float4 *mcv_freqs;
2210
2211 /* Must copy the target values into anl_context */
2212 old_context = MemoryContextSwitchTo(stats->anl_context);
2213 mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2214 mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2215 for (i = 0; i < num_mcv; i++)
2216 {
2217 mcv_values[i] = datumCopy(track[i].value,
2218 stats->attrtype->typbyval,
2219 stats->attrtype->typlen);
2220 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2221 }
2222 MemoryContextSwitchTo(old_context);
2223
2224 stats->stakind[0] = STATISTIC_KIND_MCV;
2225 stats->staop[0] = mystats->eqopr;
2226 stats->stanumbers[0] = mcv_freqs;
2227 stats->numnumbers[0] = num_mcv;
2228 stats->stavalues[0] = mcv_values;
2229 stats->numvalues[0] = num_mcv;
2230
2231 /*
2232 * Accept the defaults for stats->statypid and others. They have
2233 * been set before we were called (see vacuum.h)
2234 */
2235 }
2236 }
2237 else if (null_cnt > 0)
2238 {
2239 /* We found only nulls; assume the column is entirely null */
2240 stats->stats_valid = true;
2241 stats->stanullfrac = 1.0;
2242 if (is_varwidth)
2243 stats->stawidth = 0; /* "unknown" */
2244 else
2245 stats->stawidth = stats->attrtype->typlen;
2246 stats->stadistinct = 0.0; /* "unknown" */
2247 }
2248
2249 /* We don't need to bother cleaning up any of our temporary palloc's */
2250 }
2251
2252
2253 /*
2254 * compute_scalar_stats() -- compute column statistics
2255 *
2256 * We use this when we can find "=" and "<" operators for the datatype.
2257 *
2258 * We determine the fraction of non-null rows, the average width, the
2259 * most common values, the (estimated) number of distinct values, the
2260 * distribution histogram, and the correlation of physical to logical order.
2261 *
2262 * The desired stats can be determined fairly easily after sorting the
2263 * data values into order.
2264 */
2265 static void
compute_scalar_stats(VacAttrStatsP stats,AnalyzeAttrFetchFunc fetchfunc,int samplerows,double totalrows)2266 compute_scalar_stats(VacAttrStatsP stats,
2267 AnalyzeAttrFetchFunc fetchfunc,
2268 int samplerows,
2269 double totalrows)
2270 {
2271 int i;
2272 int null_cnt = 0;
2273 int nonnull_cnt = 0;
2274 int toowide_cnt = 0;
2275 double total_width = 0;
2276 bool is_varlena = (!stats->attrtype->typbyval &&
2277 stats->attrtype->typlen == -1);
2278 bool is_varwidth = (!stats->attrtype->typbyval &&
2279 stats->attrtype->typlen < 0);
2280 double corr_xysum;
2281 SortSupportData ssup;
2282 ScalarItem *values;
2283 int values_cnt = 0;
2284 int *tupnoLink;
2285 ScalarMCVItem *track;
2286 int track_cnt = 0;
2287 int num_mcv = stats->attr->attstattarget;
2288 int num_bins = stats->attr->attstattarget;
2289 StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
2290
2291 values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
2292 tupnoLink = (int *) palloc(samplerows * sizeof(int));
2293 track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
2294
2295 memset(&ssup, 0, sizeof(ssup));
2296 ssup.ssup_cxt = CurrentMemoryContext;
2297 /* We always use the default collation for statistics */
2298 ssup.ssup_collation = DEFAULT_COLLATION_OID;
2299 ssup.ssup_nulls_first = false;
2300
2301 /*
2302 * For now, don't perform abbreviated key conversion, because full values
2303 * are required for MCV slot generation. Supporting that optimization
2304 * would necessitate teaching compare_scalars() to call a tie-breaker.
2305 */
2306 ssup.abbreviate = false;
2307
2308 PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
2309
2310 /* Initial scan to find sortable values */
2311 for (i = 0; i < samplerows; i++)
2312 {
2313 Datum value;
2314 bool isnull;
2315
2316 vacuum_delay_point();
2317
2318 value = fetchfunc(stats, i, &isnull);
2319
2320 /* Check for null/nonnull */
2321 if (isnull)
2322 {
2323 null_cnt++;
2324 continue;
2325 }
2326 nonnull_cnt++;
2327
2328 /*
2329 * If it's a variable-width field, add up widths for average width
2330 * calculation. Note that if the value is toasted, we use the toasted
2331 * width. We don't bother with this calculation if it's a fixed-width
2332 * type.
2333 */
2334 if (is_varlena)
2335 {
2336 total_width += VARSIZE_ANY(DatumGetPointer(value));
2337
2338 /*
2339 * If the value is toasted, we want to detoast it just once to
2340 * avoid repeated detoastings and resultant excess memory usage
2341 * during the comparisons. Also, check to see if the value is
2342 * excessively wide, and if so don't detoast at all --- just
2343 * ignore the value.
2344 */
2345 if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
2346 {
2347 toowide_cnt++;
2348 continue;
2349 }
2350 value = PointerGetDatum(PG_DETOAST_DATUM(value));
2351 }
2352 else if (is_varwidth)
2353 {
2354 /* must be cstring */
2355 total_width += strlen(DatumGetCString(value)) + 1;
2356 }
2357
2358 /* Add it to the list to be sorted */
2359 values[values_cnt].value = value;
2360 values[values_cnt].tupno = values_cnt;
2361 tupnoLink[values_cnt] = values_cnt;
2362 values_cnt++;
2363 }
2364
2365 /* We can only compute real stats if we found some sortable values. */
2366 if (values_cnt > 0)
2367 {
2368 int ndistinct, /* # distinct values in sample */
2369 nmultiple, /* # that appear multiple times */
2370 num_hist,
2371 dups_cnt;
2372 int slot_idx = 0;
2373 CompareScalarsContext cxt;
2374
2375 /* Sort the collected values */
2376 cxt.ssup = &ssup;
2377 cxt.tupnoLink = tupnoLink;
2378 qsort_arg((void *) values, values_cnt, sizeof(ScalarItem),
2379 compare_scalars, (void *) &cxt);
2380
2381 /*
2382 * Now scan the values in order, find the most common ones, and also
2383 * accumulate ordering-correlation statistics.
2384 *
2385 * To determine which are most common, we first have to count the
2386 * number of duplicates of each value. The duplicates are adjacent in
2387 * the sorted list, so a brute-force approach is to compare successive
2388 * datum values until we find two that are not equal. However, that
2389 * requires N-1 invocations of the datum comparison routine, which are
2390 * completely redundant with work that was done during the sort. (The
2391 * sort algorithm must at some point have compared each pair of items
2392 * that are adjacent in the sorted order; otherwise it could not know
2393 * that it's ordered the pair correctly.) We exploit this by having
2394 * compare_scalars remember the highest tupno index that each
2395 * ScalarItem has been found equal to. At the end of the sort, a
2396 * ScalarItem's tupnoLink will still point to itself if and only if it
2397 * is the last item of its group of duplicates (since the group will
2398 * be ordered by tupno).
2399 */
2400 corr_xysum = 0;
2401 ndistinct = 0;
2402 nmultiple = 0;
2403 dups_cnt = 0;
2404 for (i = 0; i < values_cnt; i++)
2405 {
2406 int tupno = values[i].tupno;
2407
2408 corr_xysum += ((double) i) * ((double) tupno);
2409 dups_cnt++;
2410 if (tupnoLink[tupno] == tupno)
2411 {
2412 /* Reached end of duplicates of this value */
2413 ndistinct++;
2414 if (dups_cnt > 1)
2415 {
2416 nmultiple++;
2417 if (track_cnt < num_mcv ||
2418 dups_cnt > track[track_cnt - 1].count)
2419 {
2420 /*
2421 * Found a new item for the mcv list; find its
2422 * position, bubbling down old items if needed. Loop
2423 * invariant is that j points at an empty/ replaceable
2424 * slot.
2425 */
2426 int j;
2427
2428 if (track_cnt < num_mcv)
2429 track_cnt++;
2430 for (j = track_cnt - 1; j > 0; j--)
2431 {
2432 if (dups_cnt <= track[j - 1].count)
2433 break;
2434 track[j].count = track[j - 1].count;
2435 track[j].first = track[j - 1].first;
2436 }
2437 track[j].count = dups_cnt;
2438 track[j].first = i + 1 - dups_cnt;
2439 }
2440 }
2441 dups_cnt = 0;
2442 }
2443 }
2444
2445 stats->stats_valid = true;
2446 /* Do the simple null-frac and width stats */
2447 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2448 if (is_varwidth)
2449 stats->stawidth = total_width / (double) nonnull_cnt;
2450 else
2451 stats->stawidth = stats->attrtype->typlen;
2452
2453 if (nmultiple == 0)
2454 {
2455 /*
2456 * If we found no repeated non-null values, assume it's a unique
2457 * column; but be sure to discount for any nulls we found.
2458 */
2459 stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2460 }
2461 else if (toowide_cnt == 0 && nmultiple == ndistinct)
2462 {
2463 /*
2464 * Every value in the sample appeared more than once. Assume the
2465 * column has just these values. (This case is meant to address
2466 * columns with small, fixed sets of possible values, such as
2467 * boolean or enum columns. If there are any values that appear
2468 * just once in the sample, including too-wide values, we should
2469 * assume that that's not what we're dealing with.)
2470 */
2471 stats->stadistinct = ndistinct;
2472 }
2473 else
2474 {
2475 /*----------
2476 * Estimate the number of distinct values using the estimator
2477 * proposed by Haas and Stokes in IBM Research Report RJ 10025:
2478 * n*d / (n - f1 + f1*n/N)
2479 * where f1 is the number of distinct values that occurred
2480 * exactly once in our sample of n rows (from a total of N),
2481 * and d is the total number of distinct values in the sample.
2482 * This is their Duj1 estimator; the other estimators they
2483 * recommend are considerably more complex, and are numerically
2484 * very unstable when n is much smaller than N.
2485 *
2486 * In this calculation, we consider only non-nulls. We used to
2487 * include rows with null values in the n and N counts, but that
2488 * leads to inaccurate answers in columns with many nulls, and
2489 * it's intuitively bogus anyway considering the desired result is
2490 * the number of distinct non-null values.
2491 *
2492 * Overwidth values are assumed to have been distinct.
2493 *----------
2494 */
2495 int f1 = ndistinct - nmultiple + toowide_cnt;
2496 int d = f1 + nmultiple;
2497 double n = samplerows - null_cnt;
2498 double N = totalrows * (1.0 - stats->stanullfrac);
2499 double stadistinct;
2500
2501 /* N == 0 shouldn't happen, but just in case ... */
2502 if (N > 0)
2503 stadistinct = (n * d) / ((n - f1) + f1 * n / N);
2504 else
2505 stadistinct = 0;
2506
2507 /* Clamp to sane range in case of roundoff error */
2508 if (stadistinct < d)
2509 stadistinct = d;
2510 if (stadistinct > N)
2511 stadistinct = N;
2512 /* And round to integer */
2513 stats->stadistinct = floor(stadistinct + 0.5);
2514 }
2515
2516 /*
2517 * If we estimated the number of distinct values at more than 10% of
2518 * the total row count (a very arbitrary limit), then assume that
2519 * stadistinct should scale with the row count rather than be a fixed
2520 * value.
2521 */
2522 if (stats->stadistinct > 0.1 * totalrows)
2523 stats->stadistinct = -(stats->stadistinct / totalrows);
2524
2525 /*
2526 * Decide how many values are worth storing as most-common values. If
2527 * we are able to generate a complete MCV list (all the values in the
2528 * sample will fit, and we think these are all the ones in the table),
2529 * then do so. Otherwise, store only those values that are
2530 * significantly more common than the (estimated) average. We set the
2531 * threshold rather arbitrarily at 25% more than average, with at
2532 * least 2 instances in the sample. Also, we won't suppress values
2533 * that have a frequency of at least 1/K where K is the intended
2534 * number of histogram bins; such values might otherwise cause us to
2535 * emit duplicate histogram bin boundaries. (We might end up with
2536 * duplicate histogram entries anyway, if the distribution is skewed;
2537 * but we prefer to treat such values as MCVs if at all possible.)
2538 *
2539 * Note: the first of these cases is meant to address columns with
2540 * small, fixed sets of possible values, such as boolean or enum
2541 * columns. If we can *completely* represent the column population by
2542 * an MCV list that will fit into the stats target, then we should do
2543 * so and thus provide the planner with complete information. But if
2544 * the MCV list is not complete, it's generally worth being more
2545 * selective, and not just filling it all the way up to the stats
2546 * target. So for an incomplete list, we try to take only MCVs that
2547 * are significantly more common than average.
2548 */
2549 if (track_cnt == ndistinct && toowide_cnt == 0 &&
2550 stats->stadistinct > 0 &&
2551 track_cnt <= num_mcv)
2552 {
2553 /* Track list includes all values seen, and all will fit */
2554 num_mcv = track_cnt;
2555 }
2556 else
2557 {
2558 double ndistinct_table = stats->stadistinct;
2559 double avgcount,
2560 mincount,
2561 maxmincount;
2562
2563 /* Re-extract estimate of # distinct nonnull values in table */
2564 if (ndistinct_table < 0)
2565 ndistinct_table = -ndistinct_table * totalrows;
2566 /* estimate # occurrences in sample of a typical nonnull value */
2567 avgcount = (double) nonnull_cnt / ndistinct_table;
2568 /* set minimum threshold count to store a value */
2569 mincount = avgcount * 1.25;
2570 if (mincount < 2)
2571 mincount = 2;
2572 /* don't let threshold exceed 1/K, however */
2573 maxmincount = (double) values_cnt / (double) num_bins;
2574 if (mincount > maxmincount)
2575 mincount = maxmincount;
2576 if (num_mcv > track_cnt)
2577 num_mcv = track_cnt;
2578 for (i = 0; i < num_mcv; i++)
2579 {
2580 if (track[i].count < mincount)
2581 {
2582 num_mcv = i;
2583 break;
2584 }
2585 }
2586 }
2587
2588 /* Generate MCV slot entry */
2589 if (num_mcv > 0)
2590 {
2591 MemoryContext old_context;
2592 Datum *mcv_values;
2593 float4 *mcv_freqs;
2594
2595 /* Must copy the target values into anl_context */
2596 old_context = MemoryContextSwitchTo(stats->anl_context);
2597 mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
2598 mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
2599 for (i = 0; i < num_mcv; i++)
2600 {
2601 mcv_values[i] = datumCopy(values[track[i].first].value,
2602 stats->attrtype->typbyval,
2603 stats->attrtype->typlen);
2604 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
2605 }
2606 MemoryContextSwitchTo(old_context);
2607
2608 stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
2609 stats->staop[slot_idx] = mystats->eqopr;
2610 stats->stanumbers[slot_idx] = mcv_freqs;
2611 stats->numnumbers[slot_idx] = num_mcv;
2612 stats->stavalues[slot_idx] = mcv_values;
2613 stats->numvalues[slot_idx] = num_mcv;
2614
2615 /*
2616 * Accept the defaults for stats->statypid and others. They have
2617 * been set before we were called (see vacuum.h)
2618 */
2619 slot_idx++;
2620 }
2621
2622 /*
2623 * Generate a histogram slot entry if there are at least two distinct
2624 * values not accounted for in the MCV list. (This ensures the
2625 * histogram won't collapse to empty or a singleton.)
2626 */
2627 num_hist = ndistinct - num_mcv;
2628 if (num_hist > num_bins)
2629 num_hist = num_bins + 1;
2630 if (num_hist >= 2)
2631 {
2632 MemoryContext old_context;
2633 Datum *hist_values;
2634 int nvals;
2635 int pos,
2636 posfrac,
2637 delta,
2638 deltafrac;
2639
2640 /* Sort the MCV items into position order to speed next loop */
2641 qsort((void *) track, num_mcv,
2642 sizeof(ScalarMCVItem), compare_mcvs);
2643
2644 /*
2645 * Collapse out the MCV items from the values[] array.
2646 *
2647 * Note we destroy the values[] array here... but we don't need it
2648 * for anything more. We do, however, still need values_cnt.
2649 * nvals will be the number of remaining entries in values[].
2650 */
2651 if (num_mcv > 0)
2652 {
2653 int src,
2654 dest;
2655 int j;
2656
2657 src = dest = 0;
2658 j = 0; /* index of next interesting MCV item */
2659 while (src < values_cnt)
2660 {
2661 int ncopy;
2662
2663 if (j < num_mcv)
2664 {
2665 int first = track[j].first;
2666
2667 if (src >= first)
2668 {
2669 /* advance past this MCV item */
2670 src = first + track[j].count;
2671 j++;
2672 continue;
2673 }
2674 ncopy = first - src;
2675 }
2676 else
2677 ncopy = values_cnt - src;
2678 memmove(&values[dest], &values[src],
2679 ncopy * sizeof(ScalarItem));
2680 src += ncopy;
2681 dest += ncopy;
2682 }
2683 nvals = dest;
2684 }
2685 else
2686 nvals = values_cnt;
2687 Assert(nvals >= num_hist);
2688
2689 /* Must copy the target values into anl_context */
2690 old_context = MemoryContextSwitchTo(stats->anl_context);
2691 hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
2692
2693 /*
2694 * The object of this loop is to copy the first and last values[]
2695 * entries along with evenly-spaced values in between. So the
2696 * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)]. But
2697 * computing that subscript directly risks integer overflow when
2698 * the stats target is more than a couple thousand. Instead we
2699 * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
2700 * the integral and fractional parts of the sum separately.
2701 */
2702 delta = (nvals - 1) / (num_hist - 1);
2703 deltafrac = (nvals - 1) % (num_hist - 1);
2704 pos = posfrac = 0;
2705
2706 for (i = 0; i < num_hist; i++)
2707 {
2708 hist_values[i] = datumCopy(values[pos].value,
2709 stats->attrtype->typbyval,
2710 stats->attrtype->typlen);
2711 pos += delta;
2712 posfrac += deltafrac;
2713 if (posfrac >= (num_hist - 1))
2714 {
2715 /* fractional part exceeds 1, carry to integer part */
2716 pos++;
2717 posfrac -= (num_hist - 1);
2718 }
2719 }
2720
2721 MemoryContextSwitchTo(old_context);
2722
2723 stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
2724 stats->staop[slot_idx] = mystats->ltopr;
2725 stats->stavalues[slot_idx] = hist_values;
2726 stats->numvalues[slot_idx] = num_hist;
2727
2728 /*
2729 * Accept the defaults for stats->statypid and others. They have
2730 * been set before we were called (see vacuum.h)
2731 */
2732 slot_idx++;
2733 }
2734
2735 /* Generate a correlation entry if there are multiple values */
2736 if (values_cnt > 1)
2737 {
2738 MemoryContext old_context;
2739 float4 *corrs;
2740 double corr_xsum,
2741 corr_x2sum;
2742
2743 /* Must copy the target values into anl_context */
2744 old_context = MemoryContextSwitchTo(stats->anl_context);
2745 corrs = (float4 *) palloc(sizeof(float4));
2746 MemoryContextSwitchTo(old_context);
2747
2748 /*----------
2749 * Since we know the x and y value sets are both
2750 * 0, 1, ..., values_cnt-1
2751 * we have sum(x) = sum(y) =
2752 * (values_cnt-1)*values_cnt / 2
2753 * and sum(x^2) = sum(y^2) =
2754 * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
2755 *----------
2756 */
2757 corr_xsum = ((double) (values_cnt - 1)) *
2758 ((double) values_cnt) / 2.0;
2759 corr_x2sum = ((double) (values_cnt - 1)) *
2760 ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
2761
2762 /* And the correlation coefficient reduces to */
2763 corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
2764 (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
2765
2766 stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
2767 stats->staop[slot_idx] = mystats->ltopr;
2768 stats->stanumbers[slot_idx] = corrs;
2769 stats->numnumbers[slot_idx] = 1;
2770 slot_idx++;
2771 }
2772 }
2773 else if (nonnull_cnt > 0)
2774 {
2775 /* We found some non-null values, but they were all too wide */
2776 Assert(nonnull_cnt == toowide_cnt);
2777 stats->stats_valid = true;
2778 /* Do the simple null-frac and width stats */
2779 stats->stanullfrac = (double) null_cnt / (double) samplerows;
2780 if (is_varwidth)
2781 stats->stawidth = total_width / (double) nonnull_cnt;
2782 else
2783 stats->stawidth = stats->attrtype->typlen;
2784 /* Assume all too-wide values are distinct, so it's a unique column */
2785 stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
2786 }
2787 else if (null_cnt > 0)
2788 {
2789 /* We found only nulls; assume the column is entirely null */
2790 stats->stats_valid = true;
2791 stats->stanullfrac = 1.0;
2792 if (is_varwidth)
2793 stats->stawidth = 0; /* "unknown" */
2794 else
2795 stats->stawidth = stats->attrtype->typlen;
2796 stats->stadistinct = 0.0; /* "unknown" */
2797 }
2798
2799 /* We don't need to bother cleaning up any of our temporary palloc's */
2800 }
2801
2802 /*
2803 * qsort_arg comparator for sorting ScalarItems
2804 *
2805 * Aside from sorting the items, we update the tupnoLink[] array
2806 * whenever two ScalarItems are found to contain equal datums. The array
2807 * is indexed by tupno; for each ScalarItem, it contains the highest
2808 * tupno that that item's datum has been found to be equal to. This allows
2809 * us to avoid additional comparisons in compute_scalar_stats().
2810 */
2811 static int
compare_scalars(const void * a,const void * b,void * arg)2812 compare_scalars(const void *a, const void *b, void *arg)
2813 {
2814 Datum da = ((const ScalarItem *) a)->value;
2815 int ta = ((const ScalarItem *) a)->tupno;
2816 Datum db = ((const ScalarItem *) b)->value;
2817 int tb = ((const ScalarItem *) b)->tupno;
2818 CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
2819 int compare;
2820
2821 compare = ApplySortComparator(da, false, db, false, cxt->ssup);
2822 if (compare != 0)
2823 return compare;
2824
2825 /*
2826 * The two datums are equal, so update cxt->tupnoLink[].
2827 */
2828 if (cxt->tupnoLink[ta] < tb)
2829 cxt->tupnoLink[ta] = tb;
2830 if (cxt->tupnoLink[tb] < ta)
2831 cxt->tupnoLink[tb] = ta;
2832
2833 /*
2834 * For equal datums, sort by tupno
2835 */
2836 return ta - tb;
2837 }
2838
2839 /*
2840 * qsort comparator for sorting ScalarMCVItems by position
2841 */
2842 static int
compare_mcvs(const void * a,const void * b)2843 compare_mcvs(const void *a, const void *b)
2844 {
2845 int da = ((const ScalarMCVItem *) a)->first;
2846 int db = ((const ScalarMCVItem *) b)->first;
2847
2848 return da - db;
2849 }
2850