1 /*-------------------------------------------------------------------------
2 *
3 * vacuum.c
4 * The postgres vacuum cleaner.
5 *
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9 * in cluster.c.
10 *
11 *
12 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
14 *
15 *
16 * IDENTIFICATION
17 * src/backend/commands/vacuum.c
18 *
19 *-------------------------------------------------------------------------
20 */
21 #include "postgres.h"
22
23 #include <math.h>
24
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_inherits.h"
36 #include "catalog/pg_namespace.h"
37 #include "commands/cluster.h"
38 #include "commands/vacuum.h"
39 #include "miscadmin.h"
40 #include "nodes/makefuncs.h"
41 #include "pgstat.h"
42 #include "postmaster/autovacuum.h"
43 #include "storage/bufmgr.h"
44 #include "storage/lmgr.h"
45 #include "storage/proc.h"
46 #include "storage/procarray.h"
47 #include "utils/acl.h"
48 #include "utils/fmgroids.h"
49 #include "utils/guc.h"
50 #include "utils/memutils.h"
51 #include "utils/snapmgr.h"
52 #include "utils/syscache.h"
53 #include "utils/tqual.h"
54
55
56 /*
57 * GUC parameters
58 */
59 int vacuum_freeze_min_age;
60 int vacuum_freeze_table_age;
61 int vacuum_multixact_freeze_min_age;
62 int vacuum_multixact_freeze_table_age;
63
64
65 /* A few variables that don't seem worth passing around as parameters */
66 static MemoryContext vac_context = NULL;
67 static BufferAccessStrategy vac_strategy;
68
69
70 /* non-export function prototypes */
71 static List *expand_vacuum_rel(VacuumRelation *vrel);
72 static List *get_all_vacuum_rels(void);
73 static void vac_truncate_clog(TransactionId frozenXID,
74 MultiXactId minMulti,
75 TransactionId lastSaneFrozenXid,
76 MultiXactId lastSaneMinMulti);
77 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
78 VacuumParams *params);
79
80 /*
81 * Primary entry point for manual VACUUM and ANALYZE commands
82 *
83 * This is mainly a preparation wrapper for the real operations that will
84 * happen in vacuum().
85 */
86 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)87 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
88 {
89 VacuumParams params;
90
91 /* sanity checks on options */
92 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
93 Assert((vacstmt->options & VACOPT_VACUUM) ||
94 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
95 Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
96
97 /*
98 * Make sure VACOPT_ANALYZE is specified if any column lists are present.
99 */
100 if (!(vacstmt->options & VACOPT_ANALYZE))
101 {
102 ListCell *lc;
103
104 foreach(lc, vacstmt->rels)
105 {
106 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
107
108 if (vrel->va_cols != NIL)
109 ereport(ERROR,
110 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
111 errmsg("ANALYZE option must be specified when a column list is provided")));
112 }
113 }
114
115 /*
116 * All freeze ages are zero if the FREEZE option is given; otherwise pass
117 * them as -1 which means to use the default values.
118 */
119 if (vacstmt->options & VACOPT_FREEZE)
120 {
121 params.freeze_min_age = 0;
122 params.freeze_table_age = 0;
123 params.multixact_freeze_min_age = 0;
124 params.multixact_freeze_table_age = 0;
125 }
126 else
127 {
128 params.freeze_min_age = -1;
129 params.freeze_table_age = -1;
130 params.multixact_freeze_min_age = -1;
131 params.multixact_freeze_table_age = -1;
132 }
133
134 /* user-invoked vacuum is never "for wraparound" */
135 params.is_wraparound = false;
136
137 /* user-invoked vacuum never uses this parameter */
138 params.log_min_duration = -1;
139
140 /* Now go through the common routine */
141 vacuum(vacstmt->options, vacstmt->rels, ¶ms, NULL, isTopLevel);
142 }
143
144 /*
145 * Internal entry point for VACUUM and ANALYZE commands.
146 *
147 * options is a bitmask of VacuumOption flags, indicating what to do.
148 *
149 * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
150 * we process all relevant tables in the database. For each VacuumRelation,
151 * if a valid OID is supplied, the table with that OID is what to process;
152 * otherwise, the VacuumRelation's RangeVar indicates what to process.
153 *
154 * params contains a set of parameters that can be used to customize the
155 * behavior.
156 *
157 * bstrategy is normally given as NULL, but in autovacuum it can be passed
158 * in to use the same buffer strategy object across multiple vacuum() calls.
159 *
160 * isTopLevel should be passed down from ProcessUtility.
161 *
162 * It is the caller's responsibility that all parameters are allocated in a
163 * memory context that will not disappear at transaction commit.
164 */
165 void
vacuum(int options,List * relations,VacuumParams * params,BufferAccessStrategy bstrategy,bool isTopLevel)166 vacuum(int options, List *relations, VacuumParams *params,
167 BufferAccessStrategy bstrategy, bool isTopLevel)
168 {
169 static bool in_vacuum = false;
170
171 const char *stmttype;
172 volatile bool in_outer_xact,
173 use_own_xacts;
174
175 Assert(params != NULL);
176
177 stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
178
179 /*
180 * We cannot run VACUUM inside a user transaction block; if we were inside
181 * a transaction, then our commit- and start-transaction-command calls
182 * would not have the intended effect! There are numerous other subtle
183 * dependencies on this, too.
184 *
185 * ANALYZE (without VACUUM) can run either way.
186 */
187 if (options & VACOPT_VACUUM)
188 {
189 PreventInTransactionBlock(isTopLevel, stmttype);
190 in_outer_xact = false;
191 }
192 else
193 in_outer_xact = IsInTransactionBlock(isTopLevel);
194
195 /*
196 * Due to static variables vac_context, anl_context and vac_strategy,
197 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
198 * calls a hostile index expression that itself calls ANALYZE.
199 */
200 if (in_vacuum)
201 ereport(ERROR,
202 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
203 errmsg("%s cannot be executed from VACUUM or ANALYZE",
204 stmttype)));
205
206 /*
207 * Sanity check DISABLE_PAGE_SKIPPING option.
208 */
209 if ((options & VACOPT_FULL) != 0 &&
210 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
211 ereport(ERROR,
212 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
213 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
214
215 /*
216 * Send info about dead objects to the statistics collector, unless we are
217 * in autovacuum --- autovacuum.c does this for itself.
218 */
219 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
220 pgstat_vacuum_stat();
221
222 /*
223 * Create special memory context for cross-transaction storage.
224 *
225 * Since it is a child of PortalContext, it will go away eventually even
226 * if we suffer an error; there's no need for special abort cleanup logic.
227 */
228 vac_context = AllocSetContextCreate(PortalContext,
229 "Vacuum",
230 ALLOCSET_DEFAULT_SIZES);
231
232 /*
233 * If caller didn't give us a buffer strategy object, make one in the
234 * cross-transaction memory context.
235 */
236 if (bstrategy == NULL)
237 {
238 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
239
240 bstrategy = GetAccessStrategy(BAS_VACUUM);
241 MemoryContextSwitchTo(old_context);
242 }
243 vac_strategy = bstrategy;
244
245 /*
246 * Build list of relation(s) to process, putting any new data in
247 * vac_context for safekeeping.
248 */
249 if (relations != NIL)
250 {
251 List *newrels = NIL;
252 ListCell *lc;
253
254 foreach(lc, relations)
255 {
256 VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
257 List *sublist;
258 MemoryContext old_context;
259
260 sublist = expand_vacuum_rel(vrel);
261 old_context = MemoryContextSwitchTo(vac_context);
262 newrels = list_concat(newrels, sublist);
263 MemoryContextSwitchTo(old_context);
264 }
265 relations = newrels;
266 }
267 else
268 relations = get_all_vacuum_rels();
269
270 /*
271 * Decide whether we need to start/commit our own transactions.
272 *
273 * For VACUUM (with or without ANALYZE): always do so, so that we can
274 * release locks as soon as possible. (We could possibly use the outer
275 * transaction for a one-table VACUUM, but handling TOAST tables would be
276 * problematic.)
277 *
278 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
279 * start/commit our own transactions. Also, there's no need to do so if
280 * only processing one relation. For multiple relations when not within a
281 * transaction block, and also in an autovacuum worker, use own
282 * transactions so we can release locks sooner.
283 */
284 if (options & VACOPT_VACUUM)
285 use_own_xacts = true;
286 else
287 {
288 Assert(options & VACOPT_ANALYZE);
289 if (IsAutoVacuumWorkerProcess())
290 use_own_xacts = true;
291 else if (in_outer_xact)
292 use_own_xacts = false;
293 else if (list_length(relations) > 1)
294 use_own_xacts = true;
295 else
296 use_own_xacts = false;
297 }
298
299 /*
300 * vacuum_rel expects to be entered with no transaction active; it will
301 * start and commit its own transaction. But we are called by an SQL
302 * command, and so we are executing inside a transaction already. We
303 * commit the transaction started in PostgresMain() here, and start
304 * another one before exiting to match the commit waiting for us back in
305 * PostgresMain().
306 */
307 if (use_own_xacts)
308 {
309 Assert(!in_outer_xact);
310
311 /* ActiveSnapshot is not set by autovacuum */
312 if (ActiveSnapshotSet())
313 PopActiveSnapshot();
314
315 /* matches the StartTransaction in PostgresMain() */
316 CommitTransactionCommand();
317 }
318
319 /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
320 PG_TRY();
321 {
322 ListCell *cur;
323
324 in_vacuum = true;
325 VacuumCostActive = (VacuumCostDelay > 0);
326 VacuumCostBalance = 0;
327 VacuumPageHit = 0;
328 VacuumPageMiss = 0;
329 VacuumPageDirty = 0;
330
331 /*
332 * Loop to process each selected relation.
333 */
334 foreach(cur, relations)
335 {
336 VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
337
338 if (options & VACOPT_VACUUM)
339 {
340 if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
341 continue;
342 }
343
344 if (options & VACOPT_ANALYZE)
345 {
346 /*
347 * If using separate xacts, start one for analyze. Otherwise,
348 * we can use the outer transaction.
349 */
350 if (use_own_xacts)
351 {
352 StartTransactionCommand();
353 /* functions in indexes may want a snapshot set */
354 PushActiveSnapshot(GetTransactionSnapshot());
355 }
356
357 analyze_rel(vrel->oid, vrel->relation, options, params,
358 vrel->va_cols, in_outer_xact, vac_strategy);
359
360 if (use_own_xacts)
361 {
362 PopActiveSnapshot();
363 CommitTransactionCommand();
364 }
365 else
366 {
367 /*
368 * If we're not using separate xacts, better separate the
369 * ANALYZE actions with CCIs. This avoids trouble if user
370 * says "ANALYZE t, t".
371 */
372 CommandCounterIncrement();
373 }
374 }
375 }
376 }
377 PG_CATCH();
378 {
379 in_vacuum = false;
380 VacuumCostActive = false;
381 PG_RE_THROW();
382 }
383 PG_END_TRY();
384
385 in_vacuum = false;
386 VacuumCostActive = false;
387
388 /*
389 * Finish up processing.
390 */
391 if (use_own_xacts)
392 {
393 /* here, we are not in a transaction */
394
395 /*
396 * This matches the CommitTransaction waiting for us in
397 * PostgresMain().
398 */
399 StartTransactionCommand();
400 }
401
402 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
403 {
404 /*
405 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
406 * (autovacuum.c does this for itself.)
407 */
408 vac_update_datfrozenxid();
409 }
410
411 /*
412 * Clean up working storage --- note we must do this after
413 * StartTransactionCommand, else we might be trying to delete the active
414 * context!
415 */
416 MemoryContextDelete(vac_context);
417 vac_context = NULL;
418 }
419
420 /*
421 * Given a VacuumRelation, fill in the table OID if it wasn't specified,
422 * and optionally add VacuumRelations for partitions of the table.
423 *
424 * If a VacuumRelation does not have an OID supplied and is a partitioned
425 * table, an extra entry will be added to the output for each partition.
426 * Presently, only autovacuum supplies OIDs when calling vacuum(), and
427 * it does not want us to expand partitioned tables.
428 *
429 * We take care not to modify the input data structure, but instead build
430 * new VacuumRelation(s) to return. (But note that they will reference
431 * unmodified parts of the input, eg column lists.) New data structures
432 * are made in vac_context.
433 */
434 static List *
expand_vacuum_rel(VacuumRelation * vrel)435 expand_vacuum_rel(VacuumRelation *vrel)
436 {
437 List *vacrels = NIL;
438 MemoryContext oldcontext;
439
440 /* If caller supplied OID, there's nothing we need do here. */
441 if (OidIsValid(vrel->oid))
442 {
443 oldcontext = MemoryContextSwitchTo(vac_context);
444 vacrels = lappend(vacrels, vrel);
445 MemoryContextSwitchTo(oldcontext);
446 }
447 else
448 {
449 /* Process a specific relation, and possibly partitions thereof */
450 Oid relid;
451 HeapTuple tuple;
452 Form_pg_class classForm;
453 bool include_parts;
454
455 /*
456 * We transiently take AccessShareLock to protect the syscache lookup
457 * below, as well as find_all_inheritors's expectation that the caller
458 * holds some lock on the starting relation.
459 */
460 relid = RangeVarGetRelid(vrel->relation, AccessShareLock, false);
461
462 /*
463 * Make a returnable VacuumRelation for this rel.
464 */
465 oldcontext = MemoryContextSwitchTo(vac_context);
466 vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
467 relid,
468 vrel->va_cols));
469 MemoryContextSwitchTo(oldcontext);
470
471 /*
472 * To check whether the relation is a partitioned table, fetch its
473 * syscache entry.
474 */
475 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
476 if (!HeapTupleIsValid(tuple))
477 elog(ERROR, "cache lookup failed for relation %u", relid);
478 classForm = (Form_pg_class) GETSTRUCT(tuple);
479 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
480 ReleaseSysCache(tuple);
481
482 /*
483 * If it is, make relation list entries for its partitions. Note that
484 * the list returned by find_all_inheritors() includes the passed-in
485 * OID, so we have to skip that. There's no point in taking locks on
486 * the individual partitions yet, and doing so would just add
487 * unnecessary deadlock risk.
488 */
489 if (include_parts)
490 {
491 List *part_oids = find_all_inheritors(relid, NoLock, NULL);
492 ListCell *part_lc;
493
494 foreach(part_lc, part_oids)
495 {
496 Oid part_oid = lfirst_oid(part_lc);
497
498 if (part_oid == relid)
499 continue; /* ignore original table */
500
501 /*
502 * We omit a RangeVar since it wouldn't be appropriate to
503 * complain about failure to open one of these relations
504 * later.
505 */
506 oldcontext = MemoryContextSwitchTo(vac_context);
507 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
508 part_oid,
509 vrel->va_cols));
510 MemoryContextSwitchTo(oldcontext);
511 }
512 }
513
514 /*
515 * Release lock again. This means that by the time we actually try to
516 * process the table, it might be gone or renamed. In the former case
517 * we'll silently ignore it; in the latter case we'll process it
518 * anyway, but we must beware that the RangeVar doesn't necessarily
519 * identify it anymore. This isn't ideal, perhaps, but there's little
520 * practical alternative, since we're typically going to commit this
521 * transaction and begin a new one between now and then. Moreover,
522 * holding locks on multiple relations would create significant risk
523 * of deadlock.
524 */
525 UnlockRelationOid(relid, AccessShareLock);
526 }
527
528 return vacrels;
529 }
530
531 /*
532 * Construct a list of VacuumRelations for all vacuumable rels in
533 * the current database. The list is built in vac_context.
534 */
535 static List *
get_all_vacuum_rels(void)536 get_all_vacuum_rels(void)
537 {
538 List *vacrels = NIL;
539 Relation pgclass;
540 HeapScanDesc scan;
541 HeapTuple tuple;
542
543 pgclass = heap_open(RelationRelationId, AccessShareLock);
544
545 scan = heap_beginscan_catalog(pgclass, 0, NULL);
546
547 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
548 {
549 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
550 MemoryContext oldcontext;
551
552 /*
553 * We include partitioned tables here; depending on which operation is
554 * to be performed, caller will decide whether to process or ignore
555 * them.
556 */
557 if (classForm->relkind != RELKIND_RELATION &&
558 classForm->relkind != RELKIND_MATVIEW &&
559 classForm->relkind != RELKIND_PARTITIONED_TABLE)
560 continue;
561
562 /*
563 * Build VacuumRelation(s) specifying the table OIDs to be processed.
564 * We omit a RangeVar since it wouldn't be appropriate to complain
565 * about failure to open one of these relations later.
566 */
567 oldcontext = MemoryContextSwitchTo(vac_context);
568 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
569 HeapTupleGetOid(tuple),
570 NIL));
571 MemoryContextSwitchTo(oldcontext);
572 }
573
574 heap_endscan(scan);
575 heap_close(pgclass, AccessShareLock);
576
577 return vacrels;
578 }
579
580 /*
581 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
582 *
583 * The output parameters are:
584 * - oldestXmin is the cutoff value used to distinguish whether tuples are
585 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
586 * - freezeLimit is the Xid below which all Xids are replaced by
587 * FrozenTransactionId during vacuum.
588 * - xidFullScanLimit (computed from table_freeze_age parameter)
589 * represents a minimum Xid value; a table whose relfrozenxid is older than
590 * this will have a full-table vacuum applied to it, to freeze tuples across
591 * the whole table. Vacuuming a table younger than this value can use a
592 * partial scan.
593 * - multiXactCutoff is the value below which all MultiXactIds are removed from
594 * Xmax.
595 * - mxactFullScanLimit is a value against which a table's relminmxid value is
596 * compared to produce a full-table vacuum, as with xidFullScanLimit.
597 *
598 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
599 * not interested.
600 */
601 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)602 vacuum_set_xid_limits(Relation rel,
603 int freeze_min_age,
604 int freeze_table_age,
605 int multixact_freeze_min_age,
606 int multixact_freeze_table_age,
607 TransactionId *oldestXmin,
608 TransactionId *freezeLimit,
609 TransactionId *xidFullScanLimit,
610 MultiXactId *multiXactCutoff,
611 MultiXactId *mxactFullScanLimit)
612 {
613 int freezemin;
614 int mxid_freezemin;
615 int effective_multixact_freeze_max_age;
616 TransactionId limit;
617 TransactionId safeLimit;
618 MultiXactId oldestMxact;
619 MultiXactId mxactLimit;
620 MultiXactId safeMxactLimit;
621
622 /*
623 * We can always ignore processes running lazy vacuum. This is because we
624 * use these values only for deciding which tuples we must keep in the
625 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
626 * ignore it. In theory it could be problematic to ignore lazy vacuums in
627 * a full vacuum, but keep in mind that only one vacuum process can be
628 * working on a particular table at any time, and that each vacuum is
629 * always an independent transaction.
630 */
631 *oldestXmin =
632 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
633
634 Assert(TransactionIdIsNormal(*oldestXmin));
635
636 /*
637 * Determine the minimum freeze age to use: as specified by the caller, or
638 * vacuum_freeze_min_age, but in any case not more than half
639 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
640 * wraparound won't occur too frequently.
641 */
642 freezemin = freeze_min_age;
643 if (freezemin < 0)
644 freezemin = vacuum_freeze_min_age;
645 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
646 Assert(freezemin >= 0);
647
648 /*
649 * Compute the cutoff XID, being careful not to generate a "permanent" XID
650 */
651 limit = *oldestXmin - freezemin;
652 if (!TransactionIdIsNormal(limit))
653 limit = FirstNormalTransactionId;
654
655 /*
656 * If oldestXmin is very far back (in practice, more than
657 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
658 * freeze age of zero.
659 */
660 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
661 if (!TransactionIdIsNormal(safeLimit))
662 safeLimit = FirstNormalTransactionId;
663
664 if (TransactionIdPrecedes(limit, safeLimit))
665 {
666 ereport(WARNING,
667 (errmsg("oldest xmin is far in the past"),
668 errhint("Close open transactions soon to avoid wraparound problems.\n"
669 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
670 limit = *oldestXmin;
671 }
672
673 *freezeLimit = limit;
674
675 /*
676 * Compute the multixact age for which freezing is urgent. This is
677 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
678 * short of multixact member space.
679 */
680 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
681
682 /*
683 * Determine the minimum multixact freeze age to use: as specified by
684 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
685 * than half effective_multixact_freeze_max_age, so that autovacuums to
686 * prevent MultiXact wraparound won't occur too frequently.
687 */
688 mxid_freezemin = multixact_freeze_min_age;
689 if (mxid_freezemin < 0)
690 mxid_freezemin = vacuum_multixact_freeze_min_age;
691 mxid_freezemin = Min(mxid_freezemin,
692 effective_multixact_freeze_max_age / 2);
693 Assert(mxid_freezemin >= 0);
694
695 /* compute the cutoff multi, being careful to generate a valid value */
696 oldestMxact = GetOldestMultiXactId();
697 mxactLimit = oldestMxact - mxid_freezemin;
698 if (mxactLimit < FirstMultiXactId)
699 mxactLimit = FirstMultiXactId;
700
701 safeMxactLimit =
702 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
703 if (safeMxactLimit < FirstMultiXactId)
704 safeMxactLimit = FirstMultiXactId;
705
706 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
707 {
708 ereport(WARNING,
709 (errmsg("oldest multixact is far in the past"),
710 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
711 /* Use the safe limit, unless an older mxact is still running */
712 if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
713 mxactLimit = oldestMxact;
714 else
715 mxactLimit = safeMxactLimit;
716 }
717
718 *multiXactCutoff = mxactLimit;
719
720 if (xidFullScanLimit != NULL)
721 {
722 int freezetable;
723
724 Assert(mxactFullScanLimit != NULL);
725
726 /*
727 * Determine the table freeze age to use: as specified by the caller,
728 * or vacuum_freeze_table_age, but in any case not more than
729 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
730 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
731 * before anti-wraparound autovacuum is launched.
732 */
733 freezetable = freeze_table_age;
734 if (freezetable < 0)
735 freezetable = vacuum_freeze_table_age;
736 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
737 Assert(freezetable >= 0);
738
739 /*
740 * Compute XID limit causing a full-table vacuum, being careful not to
741 * generate a "permanent" XID.
742 */
743 limit = ReadNewTransactionId() - freezetable;
744 if (!TransactionIdIsNormal(limit))
745 limit = FirstNormalTransactionId;
746
747 *xidFullScanLimit = limit;
748
749 /*
750 * Similar to the above, determine the table freeze age to use for
751 * multixacts: as specified by the caller, or
752 * vacuum_multixact_freeze_table_age, but in any case not more than
753 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
754 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
755 * freeze multixacts before anti-wraparound autovacuum is launched.
756 */
757 freezetable = multixact_freeze_table_age;
758 if (freezetable < 0)
759 freezetable = vacuum_multixact_freeze_table_age;
760 freezetable = Min(freezetable,
761 effective_multixact_freeze_max_age * 0.95);
762 Assert(freezetable >= 0);
763
764 /*
765 * Compute MultiXact limit causing a full-table vacuum, being careful
766 * to generate a valid MultiXact value.
767 */
768 mxactLimit = ReadNextMultiXactId() - freezetable;
769 if (mxactLimit < FirstMultiXactId)
770 mxactLimit = FirstMultiXactId;
771
772 *mxactFullScanLimit = mxactLimit;
773 }
774 else
775 {
776 Assert(mxactFullScanLimit == NULL);
777 }
778 }
779
780 /*
781 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
782 *
783 * If we scanned the whole relation then we should just use the count of
784 * live tuples seen; but if we did not, we should not blindly extrapolate
785 * from that number, since VACUUM may have scanned a quite nonrandom
786 * subset of the table. When we have only partial information, we take
787 * the old value of pg_class.reltuples as a measurement of the
788 * tuple density in the unscanned pages.
789 *
790 * Note: scanned_tuples should count only *live* tuples, since
791 * pg_class.reltuples is defined that way.
792 */
793 double
vac_estimate_reltuples(Relation relation,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)794 vac_estimate_reltuples(Relation relation,
795 BlockNumber total_pages,
796 BlockNumber scanned_pages,
797 double scanned_tuples)
798 {
799 BlockNumber old_rel_pages = relation->rd_rel->relpages;
800 double old_rel_tuples = relation->rd_rel->reltuples;
801 double old_density;
802 double unscanned_pages;
803 double total_tuples;
804
805 /* If we did scan the whole table, just use the count as-is */
806 if (scanned_pages >= total_pages)
807 return scanned_tuples;
808
809 /*
810 * If scanned_pages is zero but total_pages isn't, keep the existing value
811 * of reltuples. (Note: callers should avoid updating the pg_class
812 * statistics in this situation, since no new information has been
813 * provided.)
814 */
815 if (scanned_pages == 0)
816 return old_rel_tuples;
817
818 /*
819 * If old value of relpages is zero, old density is indeterminate; we
820 * can't do much except scale up scanned_tuples to match total_pages.
821 */
822 if (old_rel_pages == 0)
823 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
824
825 /*
826 * Okay, we've covered the corner cases. The normal calculation is to
827 * convert the old measurement to a density (tuples per page), then
828 * estimate the number of tuples in the unscanned pages using that figure,
829 * and finally add on the number of tuples in the scanned pages.
830 */
831 old_density = old_rel_tuples / old_rel_pages;
832 unscanned_pages = (double) total_pages - (double) scanned_pages;
833 total_tuples = old_density * unscanned_pages + scanned_tuples;
834 return floor(total_tuples + 0.5);
835 }
836
837
838 /*
839 * vac_update_relstats() -- update statistics for one relation
840 *
841 * Update the whole-relation statistics that are kept in its pg_class
842 * row. There are additional stats that will be updated if we are
843 * doing ANALYZE, but we always update these stats. This routine works
844 * for both index and heap relation entries in pg_class.
845 *
846 * We violate transaction semantics here by overwriting the rel's
847 * existing pg_class tuple with the new values. This is reasonably
848 * safe as long as we're sure that the new values are correct whether or
849 * not this transaction commits. The reason for doing this is that if
850 * we updated these tuples in the usual way, vacuuming pg_class itself
851 * wouldn't work very well --- by the time we got done with a vacuum
852 * cycle, most of the tuples in pg_class would've been obsoleted. Of
853 * course, this only works for fixed-size not-null columns, but these are.
854 *
855 * Another reason for doing it this way is that when we are in a lazy
856 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
857 * Somebody vacuuming pg_class might think they could delete a tuple
858 * marked with xmin = our xid.
859 *
860 * In addition to fundamentally nontransactional statistics such as
861 * relpages and relallvisible, we try to maintain certain lazily-updated
862 * DDL flags such as relhasindex, by clearing them if no longer correct.
863 * It's safe to do this in VACUUM, which can't run in parallel with
864 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
865 * However, it's *not* safe to do it in an ANALYZE that's within an
866 * outer transaction, because for example the current transaction might
867 * have dropped the last index; then we'd think relhasindex should be
868 * cleared, but if the transaction later rolls back this would be wrong.
869 * So we refrain from updating the DDL flags if we're inside an outer
870 * transaction. This is OK since postponing the flag maintenance is
871 * always allowable.
872 *
873 * Note: num_tuples should count only *live* tuples, since
874 * pg_class.reltuples is defined that way.
875 *
876 * This routine is shared by VACUUM and ANALYZE.
877 */
878 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)879 vac_update_relstats(Relation relation,
880 BlockNumber num_pages, double num_tuples,
881 BlockNumber num_all_visible_pages,
882 bool hasindex, TransactionId frozenxid,
883 MultiXactId minmulti,
884 bool in_outer_xact)
885 {
886 Oid relid = RelationGetRelid(relation);
887 Relation rd;
888 HeapTuple ctup;
889 Form_pg_class pgcform;
890 bool dirty;
891
892 rd = heap_open(RelationRelationId, RowExclusiveLock);
893
894 /* Fetch a copy of the tuple to scribble on */
895 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
896 if (!HeapTupleIsValid(ctup))
897 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
898 relid);
899 pgcform = (Form_pg_class) GETSTRUCT(ctup);
900
901 /* Apply statistical updates, if any, to copied tuple */
902
903 dirty = false;
904 if (pgcform->relpages != (int32) num_pages)
905 {
906 pgcform->relpages = (int32) num_pages;
907 dirty = true;
908 }
909 if (pgcform->reltuples != (float4) num_tuples)
910 {
911 pgcform->reltuples = (float4) num_tuples;
912 dirty = true;
913 }
914 if (pgcform->relallvisible != (int32) num_all_visible_pages)
915 {
916 pgcform->relallvisible = (int32) num_all_visible_pages;
917 dirty = true;
918 }
919
920 /* Apply DDL updates, but not inside an outer transaction (see above) */
921
922 if (!in_outer_xact)
923 {
924 /*
925 * If we didn't find any indexes, reset relhasindex.
926 */
927 if (pgcform->relhasindex && !hasindex)
928 {
929 pgcform->relhasindex = false;
930 dirty = true;
931 }
932
933 /* We also clear relhasrules and relhastriggers if needed */
934 if (pgcform->relhasrules && relation->rd_rules == NULL)
935 {
936 pgcform->relhasrules = false;
937 dirty = true;
938 }
939 if (pgcform->relhastriggers && relation->trigdesc == NULL)
940 {
941 pgcform->relhastriggers = false;
942 dirty = true;
943 }
944 }
945
946 /*
947 * Update relfrozenxid, unless caller passed InvalidTransactionId
948 * indicating it has no new data.
949 *
950 * Ordinarily, we don't let relfrozenxid go backwards: if things are
951 * working correctly, the only way the new frozenxid could be older would
952 * be if a previous VACUUM was done with a tighter freeze_min_age, in
953 * which case we don't want to forget the work it already did. However,
954 * if the stored relfrozenxid is "in the future", then it must be corrupt
955 * and it seems best to overwrite it with the cutoff we used this time.
956 * This should match vac_update_datfrozenxid() concerning what we consider
957 * to be "in the future".
958 */
959 if (TransactionIdIsNormal(frozenxid) &&
960 pgcform->relfrozenxid != frozenxid &&
961 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
962 TransactionIdPrecedes(ReadNewTransactionId(),
963 pgcform->relfrozenxid)))
964 {
965 pgcform->relfrozenxid = frozenxid;
966 dirty = true;
967 }
968
969 /* Similarly for relminmxid */
970 if (MultiXactIdIsValid(minmulti) &&
971 pgcform->relminmxid != minmulti &&
972 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
973 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
974 {
975 pgcform->relminmxid = minmulti;
976 dirty = true;
977 }
978
979 /* If anything changed, write out the tuple. */
980 if (dirty)
981 heap_inplace_update(rd, ctup);
982
983 heap_close(rd, RowExclusiveLock);
984 }
985
986
987 /*
988 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
989 *
990 * Update pg_database's datfrozenxid entry for our database to be the
991 * minimum of the pg_class.relfrozenxid values.
992 *
993 * Similarly, update our datminmxid to be the minimum of the
994 * pg_class.relminmxid values.
995 *
996 * If we are able to advance either pg_database value, also try to
997 * truncate pg_xact and pg_multixact.
998 *
999 * We violate transaction semantics here by overwriting the database's
1000 * existing pg_database tuple with the new values. This is reasonably
1001 * safe since the new values are correct whether or not this transaction
1002 * commits. As with vac_update_relstats, this avoids leaving dead tuples
1003 * behind after a VACUUM.
1004 */
1005 void
vac_update_datfrozenxid(void)1006 vac_update_datfrozenxid(void)
1007 {
1008 HeapTuple tuple;
1009 Form_pg_database dbform;
1010 Relation relation;
1011 SysScanDesc scan;
1012 HeapTuple classTup;
1013 TransactionId newFrozenXid;
1014 MultiXactId newMinMulti;
1015 TransactionId lastSaneFrozenXid;
1016 MultiXactId lastSaneMinMulti;
1017 bool bogus = false;
1018 bool dirty = false;
1019
1020 /*
1021 * Restrict this task to one backend per database. This avoids race
1022 * conditions that would move datfrozenxid or datminmxid backward. It
1023 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1024 * datfrozenxid passed to an earlier vac_truncate_clog() call.
1025 */
1026 LockDatabaseFrozenIds(ExclusiveLock);
1027
1028 /*
1029 * Initialize the "min" calculation with GetOldestXmin, which is a
1030 * reasonable approximation to the minimum relfrozenxid for not-yet-
1031 * committed pg_class entries for new tables; see AddNewRelationTuple().
1032 * So we cannot produce a wrong minimum by starting with this.
1033 */
1034 newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1035
1036 /*
1037 * Similarly, initialize the MultiXact "min" with the value that would be
1038 * used on pg_class for new tables. See AddNewRelationTuple().
1039 */
1040 newMinMulti = GetOldestMultiXactId();
1041
1042 /*
1043 * Identify the latest relfrozenxid and relminmxid values that we could
1044 * validly see during the scan. These are conservative values, but it's
1045 * not really worth trying to be more exact.
1046 */
1047 lastSaneFrozenXid = ReadNewTransactionId();
1048 lastSaneMinMulti = ReadNextMultiXactId();
1049
1050 /*
1051 * We must seqscan pg_class to find the minimum Xid, because there is no
1052 * index that can help us here.
1053 */
1054 relation = heap_open(RelationRelationId, AccessShareLock);
1055
1056 scan = systable_beginscan(relation, InvalidOid, false,
1057 NULL, 0, NULL);
1058
1059 while ((classTup = systable_getnext(scan)) != NULL)
1060 {
1061 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1062
1063 /*
1064 * Only consider relations able to hold unfrozen XIDs (anything else
1065 * should have InvalidTransactionId in relfrozenxid anyway.)
1066 */
1067 if (classForm->relkind != RELKIND_RELATION &&
1068 classForm->relkind != RELKIND_MATVIEW &&
1069 classForm->relkind != RELKIND_TOASTVALUE)
1070 continue;
1071
1072 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1073 Assert(MultiXactIdIsValid(classForm->relminmxid));
1074
1075 /*
1076 * If things are working properly, no relation should have a
1077 * relfrozenxid or relminmxid that is "in the future". However, such
1078 * cases have been known to arise due to bugs in pg_upgrade. If we
1079 * see any entries that are "in the future", chicken out and don't do
1080 * anything. This ensures we won't truncate clog before those
1081 * relations have been scanned and cleaned up.
1082 */
1083 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1084 MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1085 {
1086 bogus = true;
1087 break;
1088 }
1089
1090 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1091 newFrozenXid = classForm->relfrozenxid;
1092
1093 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1094 newMinMulti = classForm->relminmxid;
1095 }
1096
1097 /* we're done with pg_class */
1098 systable_endscan(scan);
1099 heap_close(relation, AccessShareLock);
1100
1101 /* chicken out if bogus data found */
1102 if (bogus)
1103 return;
1104
1105 Assert(TransactionIdIsNormal(newFrozenXid));
1106 Assert(MultiXactIdIsValid(newMinMulti));
1107
1108 /* Now fetch the pg_database tuple we need to update. */
1109 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1110
1111 /* Fetch a copy of the tuple to scribble on */
1112 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1113 if (!HeapTupleIsValid(tuple))
1114 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1115 dbform = (Form_pg_database) GETSTRUCT(tuple);
1116
1117 /*
1118 * As in vac_update_relstats(), we ordinarily don't want to let
1119 * datfrozenxid go backward; but if it's "in the future" then it must be
1120 * corrupt and it seems best to overwrite it.
1121 */
1122 if (dbform->datfrozenxid != newFrozenXid &&
1123 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1124 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1125 {
1126 dbform->datfrozenxid = newFrozenXid;
1127 dirty = true;
1128 }
1129 else
1130 newFrozenXid = dbform->datfrozenxid;
1131
1132 /* Ditto for datminmxid */
1133 if (dbform->datminmxid != newMinMulti &&
1134 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1135 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1136 {
1137 dbform->datminmxid = newMinMulti;
1138 dirty = true;
1139 }
1140 else
1141 newMinMulti = dbform->datminmxid;
1142
1143 if (dirty)
1144 heap_inplace_update(relation, tuple);
1145
1146 heap_freetuple(tuple);
1147 heap_close(relation, RowExclusiveLock);
1148
1149 /*
1150 * If we were able to advance datfrozenxid or datminmxid, see if we can
1151 * truncate pg_xact and/or pg_multixact. Also do it if the shared
1152 * XID-wrap-limit info is stale, since this action will update that too.
1153 */
1154 if (dirty || ForceTransactionIdLimitUpdate())
1155 vac_truncate_clog(newFrozenXid, newMinMulti,
1156 lastSaneFrozenXid, lastSaneMinMulti);
1157 }
1158
1159
1160 /*
1161 * vac_truncate_clog() -- attempt to truncate the commit log
1162 *
1163 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1164 * and use it to truncate the transaction commit log (pg_xact).
1165 * Also update the XID wrap limit info maintained by varsup.c.
1166 * Likewise for datminmxid.
1167 *
1168 * The passed frozenXID and minMulti are the updated values for my own
1169 * pg_database entry. They're used to initialize the "min" calculations.
1170 * The caller also passes the "last sane" XID and MXID, since it has
1171 * those at hand already.
1172 *
1173 * This routine is only invoked when we've managed to change our
1174 * DB's datfrozenxid/datminmxid values, or we found that the shared
1175 * XID-wrap-limit info is stale.
1176 */
1177 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1178 vac_truncate_clog(TransactionId frozenXID,
1179 MultiXactId minMulti,
1180 TransactionId lastSaneFrozenXid,
1181 MultiXactId lastSaneMinMulti)
1182 {
1183 TransactionId nextXID = ReadNewTransactionId();
1184 Relation relation;
1185 HeapScanDesc scan;
1186 HeapTuple tuple;
1187 Oid oldestxid_datoid;
1188 Oid minmulti_datoid;
1189 bool bogus = false;
1190 bool frozenAlreadyWrapped = false;
1191
1192 /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1193 LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1194
1195 /* init oldest datoids to sync with my frozenXID/minMulti values */
1196 oldestxid_datoid = MyDatabaseId;
1197 minmulti_datoid = MyDatabaseId;
1198
1199 /*
1200 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1201 *
1202 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1203 * the values could change while we look at them. Fetch each one just
1204 * once to ensure sane behavior of the comparison logic. (Here, as in
1205 * many other places, we assume that fetching or updating an XID in shared
1206 * storage is atomic.)
1207 *
1208 * Note: we need not worry about a race condition with new entries being
1209 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1210 * existing DB's datfrozenxid, and that source DB cannot be ours because
1211 * of the interlock against copying a DB containing an active backend.
1212 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1213 * concurrently modify the datfrozenxid's of different databases, the
1214 * worst possible outcome is that pg_xact is not truncated as aggressively
1215 * as it could be.
1216 */
1217 relation = heap_open(DatabaseRelationId, AccessShareLock);
1218
1219 scan = heap_beginscan_catalog(relation, 0, NULL);
1220
1221 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1222 {
1223 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1224 TransactionId datfrozenxid = dbform->datfrozenxid;
1225 TransactionId datminmxid = dbform->datminmxid;
1226
1227 Assert(TransactionIdIsNormal(datfrozenxid));
1228 Assert(MultiXactIdIsValid(datminmxid));
1229
1230 /*
1231 * If things are working properly, no database should have a
1232 * datfrozenxid or datminmxid that is "in the future". However, such
1233 * cases have been known to arise due to bugs in pg_upgrade. If we
1234 * see any entries that are "in the future", chicken out and don't do
1235 * anything. This ensures we won't truncate clog before those
1236 * databases have been scanned and cleaned up. (We will issue the
1237 * "already wrapped" warning if appropriate, though.)
1238 */
1239 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1240 MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1241 bogus = true;
1242
1243 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1244 frozenAlreadyWrapped = true;
1245 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1246 {
1247 frozenXID = datfrozenxid;
1248 oldestxid_datoid = HeapTupleGetOid(tuple);
1249 }
1250
1251 if (MultiXactIdPrecedes(datminmxid, minMulti))
1252 {
1253 minMulti = datminmxid;
1254 minmulti_datoid = HeapTupleGetOid(tuple);
1255 }
1256 }
1257
1258 heap_endscan(scan);
1259
1260 heap_close(relation, AccessShareLock);
1261
1262 /*
1263 * Do not truncate CLOG if we seem to have suffered wraparound already;
1264 * the computed minimum XID might be bogus. This case should now be
1265 * impossible due to the defenses in GetNewTransactionId, but we keep the
1266 * test anyway.
1267 */
1268 if (frozenAlreadyWrapped)
1269 {
1270 ereport(WARNING,
1271 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1272 errdetail("You might have already suffered transaction-wraparound data loss.")));
1273 return;
1274 }
1275
1276 /* chicken out if data is bogus in any other way */
1277 if (bogus)
1278 return;
1279
1280 /*
1281 * Advance the oldest value for commit timestamps before truncating, so
1282 * that if a user requests a timestamp for a transaction we're truncating
1283 * away right after this point, they get NULL instead of an ugly "file not
1284 * found" error from slru.c. This doesn't matter for xact/multixact
1285 * because they are not subject to arbitrary lookups from users.
1286 */
1287 AdvanceOldestCommitTsXid(frozenXID);
1288
1289 /*
1290 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1291 */
1292 TruncateCLOG(frozenXID, oldestxid_datoid);
1293 TruncateCommitTs(frozenXID);
1294 TruncateMultiXact(minMulti, minmulti_datoid);
1295
1296 /*
1297 * Update the wrap limit for GetNewTransactionId and creation of new
1298 * MultiXactIds. Note: these functions will also signal the postmaster
1299 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1300 * signalling twice?
1301 */
1302 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1303 SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1304
1305 LWLockRelease(WrapLimitsVacuumLock);
1306 }
1307
1308
1309 /*
1310 * vacuum_rel() -- vacuum one heap relation
1311 *
1312 * relid identifies the relation to vacuum. If relation is supplied,
1313 * use the name therein for reporting any failure to open/lock the rel;
1314 * do not use it once we've successfully opened the rel, since it might
1315 * be stale.
1316 *
1317 * Returns true if it's okay to proceed with a requested ANALYZE
1318 * operation on this table.
1319 *
1320 * Doing one heap at a time incurs extra overhead, since we need to
1321 * check that the heap exists again just before we vacuum it. The
1322 * reason that we do this is so that vacuuming can be spread across
1323 * many small transactions. Otherwise, two-phase locking would require
1324 * us to lock the entire database during one pass of the vacuum cleaner.
1325 *
1326 * At entry and exit, we are not inside a transaction.
1327 */
1328 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1329 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1330 {
1331 LOCKMODE lmode;
1332 Relation onerel;
1333 LockRelId onerelid;
1334 Oid toast_relid;
1335 Oid save_userid;
1336 int save_sec_context;
1337 int save_nestlevel;
1338 bool rel_lock = true;
1339
1340 Assert(params != NULL);
1341
1342 /* Begin a transaction for vacuuming this relation */
1343 StartTransactionCommand();
1344
1345 /*
1346 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1347 * ensures that RecentGlobalXmin is kept truly recent.
1348 */
1349 PushActiveSnapshot(GetTransactionSnapshot());
1350
1351 if (!(options & VACOPT_FULL))
1352 {
1353 /*
1354 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1355 * other concurrent VACUUMs know that they can ignore this one while
1356 * determining their OldestXmin. (The reason we don't set it during a
1357 * full VACUUM is exactly that we may have to run user-defined
1358 * functions for functional indexes, and we want to make sure that if
1359 * they use the snapshot set above, any tuples it requires can't get
1360 * removed from other tables. An index function that depends on the
1361 * contents of other tables is arguably broken, but we won't break it
1362 * here by violating transaction semantics.)
1363 *
1364 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1365 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1366 * in an emergency.
1367 *
1368 * Note: these flags remain set until CommitTransaction or
1369 * AbortTransaction. We don't want to clear them until we reset
1370 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1371 * which is probably Not Good.
1372 */
1373 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1374 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1375 if (params->is_wraparound)
1376 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1377 LWLockRelease(ProcArrayLock);
1378 }
1379
1380 /*
1381 * Check for user-requested abort. Note we want this to be inside a
1382 * transaction, so xact.c doesn't issue useless WARNING.
1383 */
1384 CHECK_FOR_INTERRUPTS();
1385
1386 /*
1387 * Determine the type of lock we want --- hard exclusive lock for a FULL
1388 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1389 * way, we can be sure that no other backend is vacuuming the same table.
1390 */
1391 lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1392
1393 /*
1394 * Open the relation and get the appropriate lock on it.
1395 *
1396 * There's a race condition here: the rel may have gone away since the
1397 * last time we saw it. If so, we don't need to vacuum it.
1398 *
1399 * If we've been asked not to wait for the relation lock, acquire it first
1400 * in non-blocking mode, before calling try_relation_open().
1401 */
1402 if (!(options & VACOPT_NOWAIT))
1403 onerel = try_relation_open(relid, lmode);
1404 else if (ConditionalLockRelationOid(relid, lmode))
1405 onerel = try_relation_open(relid, NoLock);
1406 else
1407 {
1408 onerel = NULL;
1409 rel_lock = false;
1410 }
1411
1412 /*
1413 * If we failed to open or lock the relation, emit a log message before
1414 * exiting.
1415 */
1416 if (!onerel)
1417 {
1418 int elevel = 0;
1419
1420 /*
1421 * Determine the log level.
1422 *
1423 * If the RangeVar is not defined, we do not have enough information
1424 * to provide a meaningful log statement. Chances are that
1425 * vacuum_rel's caller has intentionally not provided this information
1426 * so that this logging is skipped, anyway.
1427 *
1428 * Otherwise, for autovacuum logs, we emit a LOG if
1429 * log_autovacuum_min_duration is not disabled. For manual VACUUM, we
1430 * emit a WARNING to match the log statements in the permission
1431 * checks.
1432 */
1433 if (relation != NULL)
1434 {
1435 if (!IsAutoVacuumWorkerProcess())
1436 elevel = WARNING;
1437 else if (params->log_min_duration >= 0)
1438 elevel = LOG;
1439 }
1440
1441 if (elevel != 0)
1442 {
1443 if (!rel_lock)
1444 ereport(elevel,
1445 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1446 errmsg("skipping vacuum of \"%s\" --- lock not available",
1447 relation->relname)));
1448 else
1449 ereport(elevel,
1450 (errcode(ERRCODE_UNDEFINED_TABLE),
1451 errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
1452 relation->relname)));
1453 }
1454
1455 PopActiveSnapshot();
1456 CommitTransactionCommand();
1457 return false;
1458 }
1459
1460 /*
1461 * Check permissions.
1462 *
1463 * We allow the user to vacuum a table if he is superuser, the table
1464 * owner, or the database owner (but in the latter case, only if it's not
1465 * a shared relation). pg_class_ownercheck includes the superuser case.
1466 *
1467 * Note we choose to treat permissions failure as a WARNING and keep
1468 * trying to vacuum the rest of the DB --- is this appropriate?
1469 */
1470 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1471 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1472 {
1473 if (onerel->rd_rel->relisshared)
1474 ereport(WARNING,
1475 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1476 RelationGetRelationName(onerel))));
1477 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1478 ereport(WARNING,
1479 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1480 RelationGetRelationName(onerel))));
1481 else
1482 ereport(WARNING,
1483 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1484 RelationGetRelationName(onerel))));
1485 relation_close(onerel, lmode);
1486 PopActiveSnapshot();
1487 CommitTransactionCommand();
1488 return false;
1489 }
1490
1491 /*
1492 * Check that it's of a vacuumable relkind.
1493 */
1494 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1495 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1496 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1497 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1498 {
1499 ereport(WARNING,
1500 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1501 RelationGetRelationName(onerel))));
1502 relation_close(onerel, lmode);
1503 PopActiveSnapshot();
1504 CommitTransactionCommand();
1505 return false;
1506 }
1507
1508 /*
1509 * Silently ignore tables that are temp tables of other backends ---
1510 * trying to vacuum these will lead to great unhappiness, since their
1511 * contents are probably not up-to-date on disk. (We don't throw a
1512 * warning here; it would just lead to chatter during a database-wide
1513 * VACUUM.)
1514 */
1515 if (RELATION_IS_OTHER_TEMP(onerel))
1516 {
1517 relation_close(onerel, lmode);
1518 PopActiveSnapshot();
1519 CommitTransactionCommand();
1520 return false;
1521 }
1522
1523 /*
1524 * Silently ignore partitioned tables as there is no work to be done. The
1525 * useful work is on their child partitions, which have been queued up for
1526 * us separately.
1527 */
1528 if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1529 {
1530 relation_close(onerel, lmode);
1531 PopActiveSnapshot();
1532 CommitTransactionCommand();
1533 /* It's OK to proceed with ANALYZE on this table */
1534 return true;
1535 }
1536
1537 /*
1538 * Get a session-level lock too. This will protect our access to the
1539 * relation across multiple transactions, so that we can vacuum the
1540 * relation's TOAST table (if any) secure in the knowledge that no one is
1541 * deleting the parent relation.
1542 *
1543 * NOTE: this cannot block, even if someone else is waiting for access,
1544 * because the lock manager knows that both lock requests are from the
1545 * same process.
1546 */
1547 onerelid = onerel->rd_lockInfo.lockRelId;
1548 LockRelationIdForSession(&onerelid, lmode);
1549
1550 /*
1551 * Remember the relation's TOAST relation for later, if the caller asked
1552 * us to process it. In VACUUM FULL, though, the toast table is
1553 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1554 */
1555 if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1556 toast_relid = onerel->rd_rel->reltoastrelid;
1557 else
1558 toast_relid = InvalidOid;
1559
1560 /*
1561 * Switch to the table owner's userid, so that any index functions are run
1562 * as that user. Also lock down security-restricted operations and
1563 * arrange to make GUC variable changes local to this command. (This is
1564 * unnecessary, but harmless, for lazy VACUUM.)
1565 */
1566 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1567 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1568 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1569 save_nestlevel = NewGUCNestLevel();
1570
1571 /*
1572 * Do the actual work --- either FULL or "lazy" vacuum
1573 */
1574 if (options & VACOPT_FULL)
1575 {
1576 /* close relation before vacuuming, but hold lock until commit */
1577 relation_close(onerel, NoLock);
1578 onerel = NULL;
1579
1580 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1581 cluster_rel(relid, InvalidOid, false,
1582 (options & VACOPT_VERBOSE) != 0);
1583 }
1584 else
1585 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1586
1587 /* Roll back any GUC changes executed by index functions */
1588 AtEOXact_GUC(false, save_nestlevel);
1589
1590 /* Restore userid and security context */
1591 SetUserIdAndSecContext(save_userid, save_sec_context);
1592
1593 /* all done with this class, but hold lock until commit */
1594 if (onerel)
1595 relation_close(onerel, NoLock);
1596
1597 /*
1598 * Complete the transaction and free all temporary memory used.
1599 */
1600 PopActiveSnapshot();
1601 CommitTransactionCommand();
1602
1603 /*
1604 * If the relation has a secondary toast rel, vacuum that too while we
1605 * still hold the session lock on the master table. Note however that
1606 * "analyze" will not get done on the toast table. This is good, because
1607 * the toaster always uses hardcoded index access and statistics are
1608 * totally unimportant for toast relations.
1609 */
1610 if (toast_relid != InvalidOid)
1611 vacuum_rel(toast_relid, NULL, options, params);
1612
1613 /*
1614 * Now release the session-level lock on the master table.
1615 */
1616 UnlockRelationIdForSession(&onerelid, lmode);
1617
1618 /* Report that we really did it. */
1619 return true;
1620 }
1621
1622
1623 /*
1624 * Open all the vacuumable indexes of the given relation, obtaining the
1625 * specified kind of lock on each. Return an array of Relation pointers for
1626 * the indexes into *Irel, and the number of indexes into *nindexes.
1627 *
1628 * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1629 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1630 * execution, and what we have is too corrupt to be processable. We will
1631 * vacuum even if the index isn't indisvalid; this is important because in a
1632 * unique index, uniqueness checks will be performed anyway and had better not
1633 * hit dangling index pointers.
1634 */
1635 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1636 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1637 int *nindexes, Relation **Irel)
1638 {
1639 List *indexoidlist;
1640 ListCell *indexoidscan;
1641 int i;
1642
1643 Assert(lockmode != NoLock);
1644
1645 indexoidlist = RelationGetIndexList(relation);
1646
1647 /* allocate enough memory for all indexes */
1648 i = list_length(indexoidlist);
1649
1650 if (i > 0)
1651 *Irel = (Relation *) palloc(i * sizeof(Relation));
1652 else
1653 *Irel = NULL;
1654
1655 /* collect just the ready indexes */
1656 i = 0;
1657 foreach(indexoidscan, indexoidlist)
1658 {
1659 Oid indexoid = lfirst_oid(indexoidscan);
1660 Relation indrel;
1661
1662 indrel = index_open(indexoid, lockmode);
1663 if (IndexIsReady(indrel->rd_index))
1664 (*Irel)[i++] = indrel;
1665 else
1666 index_close(indrel, lockmode);
1667 }
1668
1669 *nindexes = i;
1670
1671 list_free(indexoidlist);
1672 }
1673
1674 /*
1675 * Release the resources acquired by vac_open_indexes. Optionally release
1676 * the locks (say NoLock to keep 'em).
1677 */
1678 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1679 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1680 {
1681 if (Irel == NULL)
1682 return;
1683
1684 while (nindexes--)
1685 {
1686 Relation ind = Irel[nindexes];
1687
1688 index_close(ind, lockmode);
1689 }
1690 pfree(Irel);
1691 }
1692
1693 /*
1694 * vacuum_delay_point --- check for interrupts and cost-based delay.
1695 *
1696 * This should be called in each major loop of VACUUM processing,
1697 * typically once per page processed.
1698 */
1699 void
vacuum_delay_point(void)1700 vacuum_delay_point(void)
1701 {
1702 /* Always check for interrupts */
1703 CHECK_FOR_INTERRUPTS();
1704
1705 /* Nap if appropriate */
1706 if (VacuumCostActive && !InterruptPending &&
1707 VacuumCostBalance >= VacuumCostLimit)
1708 {
1709 int msec;
1710
1711 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1712 if (msec > VacuumCostDelay * 4)
1713 msec = VacuumCostDelay * 4;
1714
1715 pg_usleep(msec * 1000L);
1716
1717 VacuumCostBalance = 0;
1718
1719 /* update balance values for workers */
1720 AutoVacuumUpdateDelay();
1721
1722 /* Might have gotten an interrupt while sleeping */
1723 CHECK_FOR_INTERRUPTS();
1724 }
1725 }
1726