1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *	  The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  *	  src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_inherits.h"
36 #include "catalog/pg_namespace.h"
37 #include "commands/cluster.h"
38 #include "commands/vacuum.h"
39 #include "miscadmin.h"
40 #include "nodes/makefuncs.h"
41 #include "pgstat.h"
42 #include "postmaster/autovacuum.h"
43 #include "storage/bufmgr.h"
44 #include "storage/lmgr.h"
45 #include "storage/proc.h"
46 #include "storage/procarray.h"
47 #include "utils/acl.h"
48 #include "utils/fmgroids.h"
49 #include "utils/guc.h"
50 #include "utils/memutils.h"
51 #include "utils/snapmgr.h"
52 #include "utils/syscache.h"
53 #include "utils/tqual.h"
54 
55 
56 /*
57  * GUC parameters
58  */
59 int			vacuum_freeze_min_age;
60 int			vacuum_freeze_table_age;
61 int			vacuum_multixact_freeze_min_age;
62 int			vacuum_multixact_freeze_table_age;
63 
64 
65 /* A few variables that don't seem worth passing around as parameters */
66 static MemoryContext vac_context = NULL;
67 static BufferAccessStrategy vac_strategy;
68 
69 
70 /* non-export function prototypes */
71 static List *expand_vacuum_rel(VacuumRelation *vrel);
72 static List *get_all_vacuum_rels(void);
73 static void vac_truncate_clog(TransactionId frozenXID,
74 				  MultiXactId minMulti,
75 				  TransactionId lastSaneFrozenXid,
76 				  MultiXactId lastSaneMinMulti);
77 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
78 		   VacuumParams *params);
79 
80 /*
81  * Primary entry point for manual VACUUM and ANALYZE commands
82  *
83  * This is mainly a preparation wrapper for the real operations that will
84  * happen in vacuum().
85  */
86 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)87 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
88 {
89 	VacuumParams params;
90 
91 	/* sanity checks on options */
92 	Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
93 	Assert((vacstmt->options & VACOPT_VACUUM) ||
94 		   !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
95 	Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
96 
97 	/*
98 	 * Make sure VACOPT_ANALYZE is specified if any column lists are present.
99 	 */
100 	if (!(vacstmt->options & VACOPT_ANALYZE))
101 	{
102 		ListCell   *lc;
103 
104 		foreach(lc, vacstmt->rels)
105 		{
106 			VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
107 
108 			if (vrel->va_cols != NIL)
109 				ereport(ERROR,
110 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
111 						 errmsg("ANALYZE option must be specified when a column list is provided")));
112 		}
113 	}
114 
115 	/*
116 	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
117 	 * them as -1 which means to use the default values.
118 	 */
119 	if (vacstmt->options & VACOPT_FREEZE)
120 	{
121 		params.freeze_min_age = 0;
122 		params.freeze_table_age = 0;
123 		params.multixact_freeze_min_age = 0;
124 		params.multixact_freeze_table_age = 0;
125 	}
126 	else
127 	{
128 		params.freeze_min_age = -1;
129 		params.freeze_table_age = -1;
130 		params.multixact_freeze_min_age = -1;
131 		params.multixact_freeze_table_age = -1;
132 	}
133 
134 	/* user-invoked vacuum is never "for wraparound" */
135 	params.is_wraparound = false;
136 
137 	/* user-invoked vacuum never uses this parameter */
138 	params.log_min_duration = -1;
139 
140 	/* Now go through the common routine */
141 	vacuum(vacstmt->options, vacstmt->rels, &params, NULL, isTopLevel);
142 }
143 
144 /*
145  * Internal entry point for VACUUM and ANALYZE commands.
146  *
147  * options is a bitmask of VacuumOption flags, indicating what to do.
148  *
149  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
150  * we process all relevant tables in the database.  For each VacuumRelation,
151  * if a valid OID is supplied, the table with that OID is what to process;
152  * otherwise, the VacuumRelation's RangeVar indicates what to process.
153  *
154  * params contains a set of parameters that can be used to customize the
155  * behavior.
156  *
157  * bstrategy is normally given as NULL, but in autovacuum it can be passed
158  * in to use the same buffer strategy object across multiple vacuum() calls.
159  *
160  * isTopLevel should be passed down from ProcessUtility.
161  *
162  * It is the caller's responsibility that all parameters are allocated in a
163  * memory context that will not disappear at transaction commit.
164  */
165 void
vacuum(int options,List * relations,VacuumParams * params,BufferAccessStrategy bstrategy,bool isTopLevel)166 vacuum(int options, List *relations, VacuumParams *params,
167 	   BufferAccessStrategy bstrategy, bool isTopLevel)
168 {
169 	static bool in_vacuum = false;
170 
171 	const char *stmttype;
172 	volatile bool in_outer_xact,
173 				use_own_xacts;
174 
175 	Assert(params != NULL);
176 
177 	stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
178 
179 	/*
180 	 * We cannot run VACUUM inside a user transaction block; if we were inside
181 	 * a transaction, then our commit- and start-transaction-command calls
182 	 * would not have the intended effect!	There are numerous other subtle
183 	 * dependencies on this, too.
184 	 *
185 	 * ANALYZE (without VACUUM) can run either way.
186 	 */
187 	if (options & VACOPT_VACUUM)
188 	{
189 		PreventInTransactionBlock(isTopLevel, stmttype);
190 		in_outer_xact = false;
191 	}
192 	else
193 		in_outer_xact = IsInTransactionBlock(isTopLevel);
194 
195 	/*
196 	 * Due to static variables vac_context, anl_context and vac_strategy,
197 	 * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
198 	 * calls a hostile index expression that itself calls ANALYZE.
199 	 */
200 	if (in_vacuum)
201 		ereport(ERROR,
202 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
203 				 errmsg("%s cannot be executed from VACUUM or ANALYZE",
204 						stmttype)));
205 
206 	/*
207 	 * Sanity check DISABLE_PAGE_SKIPPING option.
208 	 */
209 	if ((options & VACOPT_FULL) != 0 &&
210 		(options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
211 		ereport(ERROR,
212 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
213 				 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
214 
215 	/*
216 	 * Send info about dead objects to the statistics collector, unless we are
217 	 * in autovacuum --- autovacuum.c does this for itself.
218 	 */
219 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
220 		pgstat_vacuum_stat();
221 
222 	/*
223 	 * Create special memory context for cross-transaction storage.
224 	 *
225 	 * Since it is a child of PortalContext, it will go away eventually even
226 	 * if we suffer an error; there's no need for special abort cleanup logic.
227 	 */
228 	vac_context = AllocSetContextCreate(PortalContext,
229 										"Vacuum",
230 										ALLOCSET_DEFAULT_SIZES);
231 
232 	/*
233 	 * If caller didn't give us a buffer strategy object, make one in the
234 	 * cross-transaction memory context.
235 	 */
236 	if (bstrategy == NULL)
237 	{
238 		MemoryContext old_context = MemoryContextSwitchTo(vac_context);
239 
240 		bstrategy = GetAccessStrategy(BAS_VACUUM);
241 		MemoryContextSwitchTo(old_context);
242 	}
243 	vac_strategy = bstrategy;
244 
245 	/*
246 	 * Build list of relation(s) to process, putting any new data in
247 	 * vac_context for safekeeping.
248 	 */
249 	if (relations != NIL)
250 	{
251 		List	   *newrels = NIL;
252 		ListCell   *lc;
253 
254 		foreach(lc, relations)
255 		{
256 			VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
257 			List	   *sublist;
258 			MemoryContext old_context;
259 
260 			sublist = expand_vacuum_rel(vrel);
261 			old_context = MemoryContextSwitchTo(vac_context);
262 			newrels = list_concat(newrels, sublist);
263 			MemoryContextSwitchTo(old_context);
264 		}
265 		relations = newrels;
266 	}
267 	else
268 		relations = get_all_vacuum_rels();
269 
270 	/*
271 	 * Decide whether we need to start/commit our own transactions.
272 	 *
273 	 * For VACUUM (with or without ANALYZE): always do so, so that we can
274 	 * release locks as soon as possible.  (We could possibly use the outer
275 	 * transaction for a one-table VACUUM, but handling TOAST tables would be
276 	 * problematic.)
277 	 *
278 	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
279 	 * start/commit our own transactions.  Also, there's no need to do so if
280 	 * only processing one relation.  For multiple relations when not within a
281 	 * transaction block, and also in an autovacuum worker, use own
282 	 * transactions so we can release locks sooner.
283 	 */
284 	if (options & VACOPT_VACUUM)
285 		use_own_xacts = true;
286 	else
287 	{
288 		Assert(options & VACOPT_ANALYZE);
289 		if (IsAutoVacuumWorkerProcess())
290 			use_own_xacts = true;
291 		else if (in_outer_xact)
292 			use_own_xacts = false;
293 		else if (list_length(relations) > 1)
294 			use_own_xacts = true;
295 		else
296 			use_own_xacts = false;
297 	}
298 
299 	/*
300 	 * vacuum_rel expects to be entered with no transaction active; it will
301 	 * start and commit its own transaction.  But we are called by an SQL
302 	 * command, and so we are executing inside a transaction already. We
303 	 * commit the transaction started in PostgresMain() here, and start
304 	 * another one before exiting to match the commit waiting for us back in
305 	 * PostgresMain().
306 	 */
307 	if (use_own_xacts)
308 	{
309 		Assert(!in_outer_xact);
310 
311 		/* ActiveSnapshot is not set by autovacuum */
312 		if (ActiveSnapshotSet())
313 			PopActiveSnapshot();
314 
315 		/* matches the StartTransaction in PostgresMain() */
316 		CommitTransactionCommand();
317 	}
318 
319 	/* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
320 	PG_TRY();
321 	{
322 		ListCell   *cur;
323 
324 		in_vacuum = true;
325 		VacuumCostActive = (VacuumCostDelay > 0);
326 		VacuumCostBalance = 0;
327 		VacuumPageHit = 0;
328 		VacuumPageMiss = 0;
329 		VacuumPageDirty = 0;
330 
331 		/*
332 		 * Loop to process each selected relation.
333 		 */
334 		foreach(cur, relations)
335 		{
336 			VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
337 
338 			if (options & VACOPT_VACUUM)
339 			{
340 				if (!vacuum_rel(vrel->oid, vrel->relation, options, params))
341 					continue;
342 			}
343 
344 			if (options & VACOPT_ANALYZE)
345 			{
346 				/*
347 				 * If using separate xacts, start one for analyze. Otherwise,
348 				 * we can use the outer transaction.
349 				 */
350 				if (use_own_xacts)
351 				{
352 					StartTransactionCommand();
353 					/* functions in indexes may want a snapshot set */
354 					PushActiveSnapshot(GetTransactionSnapshot());
355 				}
356 
357 				analyze_rel(vrel->oid, vrel->relation, options, params,
358 							vrel->va_cols, in_outer_xact, vac_strategy);
359 
360 				if (use_own_xacts)
361 				{
362 					PopActiveSnapshot();
363 					CommitTransactionCommand();
364 				}
365 				else
366 				{
367 					/*
368 					 * If we're not using separate xacts, better separate the
369 					 * ANALYZE actions with CCIs.  This avoids trouble if user
370 					 * says "ANALYZE t, t".
371 					 */
372 					CommandCounterIncrement();
373 				}
374 			}
375 		}
376 	}
377 	PG_CATCH();
378 	{
379 		in_vacuum = false;
380 		VacuumCostActive = false;
381 		PG_RE_THROW();
382 	}
383 	PG_END_TRY();
384 
385 	in_vacuum = false;
386 	VacuumCostActive = false;
387 
388 	/*
389 	 * Finish up processing.
390 	 */
391 	if (use_own_xacts)
392 	{
393 		/* here, we are not in a transaction */
394 
395 		/*
396 		 * This matches the CommitTransaction waiting for us in
397 		 * PostgresMain().
398 		 */
399 		StartTransactionCommand();
400 	}
401 
402 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
403 	{
404 		/*
405 		 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
406 		 * (autovacuum.c does this for itself.)
407 		 */
408 		vac_update_datfrozenxid();
409 	}
410 
411 	/*
412 	 * Clean up working storage --- note we must do this after
413 	 * StartTransactionCommand, else we might be trying to delete the active
414 	 * context!
415 	 */
416 	MemoryContextDelete(vac_context);
417 	vac_context = NULL;
418 }
419 
420 /*
421  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
422  * and optionally add VacuumRelations for partitions of the table.
423  *
424  * If a VacuumRelation does not have an OID supplied and is a partitioned
425  * table, an extra entry will be added to the output for each partition.
426  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
427  * it does not want us to expand partitioned tables.
428  *
429  * We take care not to modify the input data structure, but instead build
430  * new VacuumRelation(s) to return.  (But note that they will reference
431  * unmodified parts of the input, eg column lists.)  New data structures
432  * are made in vac_context.
433  */
434 static List *
expand_vacuum_rel(VacuumRelation * vrel)435 expand_vacuum_rel(VacuumRelation *vrel)
436 {
437 	List	   *vacrels = NIL;
438 	MemoryContext oldcontext;
439 
440 	/* If caller supplied OID, there's nothing we need do here. */
441 	if (OidIsValid(vrel->oid))
442 	{
443 		oldcontext = MemoryContextSwitchTo(vac_context);
444 		vacrels = lappend(vacrels, vrel);
445 		MemoryContextSwitchTo(oldcontext);
446 	}
447 	else
448 	{
449 		/* Process a specific relation, and possibly partitions thereof */
450 		Oid			relid;
451 		HeapTuple	tuple;
452 		Form_pg_class classForm;
453 		bool		include_parts;
454 
455 		/*
456 		 * We transiently take AccessShareLock to protect the syscache lookup
457 		 * below, as well as find_all_inheritors's expectation that the caller
458 		 * holds some lock on the starting relation.
459 		 */
460 		relid = RangeVarGetRelid(vrel->relation, AccessShareLock, false);
461 
462 		/*
463 		 * Make a returnable VacuumRelation for this rel.
464 		 */
465 		oldcontext = MemoryContextSwitchTo(vac_context);
466 		vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
467 													  relid,
468 													  vrel->va_cols));
469 		MemoryContextSwitchTo(oldcontext);
470 
471 		/*
472 		 * To check whether the relation is a partitioned table, fetch its
473 		 * syscache entry.
474 		 */
475 		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
476 		if (!HeapTupleIsValid(tuple))
477 			elog(ERROR, "cache lookup failed for relation %u", relid);
478 		classForm = (Form_pg_class) GETSTRUCT(tuple);
479 		include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
480 		ReleaseSysCache(tuple);
481 
482 		/*
483 		 * If it is, make relation list entries for its partitions.  Note that
484 		 * the list returned by find_all_inheritors() includes the passed-in
485 		 * OID, so we have to skip that.  There's no point in taking locks on
486 		 * the individual partitions yet, and doing so would just add
487 		 * unnecessary deadlock risk.
488 		 */
489 		if (include_parts)
490 		{
491 			List	   *part_oids = find_all_inheritors(relid, NoLock, NULL);
492 			ListCell   *part_lc;
493 
494 			foreach(part_lc, part_oids)
495 			{
496 				Oid			part_oid = lfirst_oid(part_lc);
497 
498 				if (part_oid == relid)
499 					continue;	/* ignore original table */
500 
501 				/*
502 				 * We omit a RangeVar since it wouldn't be appropriate to
503 				 * complain about failure to open one of these relations
504 				 * later.
505 				 */
506 				oldcontext = MemoryContextSwitchTo(vac_context);
507 				vacrels = lappend(vacrels, makeVacuumRelation(NULL,
508 															  part_oid,
509 															  vrel->va_cols));
510 				MemoryContextSwitchTo(oldcontext);
511 			}
512 		}
513 
514 		/*
515 		 * Release lock again.  This means that by the time we actually try to
516 		 * process the table, it might be gone or renamed.  In the former case
517 		 * we'll silently ignore it; in the latter case we'll process it
518 		 * anyway, but we must beware that the RangeVar doesn't necessarily
519 		 * identify it anymore.  This isn't ideal, perhaps, but there's little
520 		 * practical alternative, since we're typically going to commit this
521 		 * transaction and begin a new one between now and then.  Moreover,
522 		 * holding locks on multiple relations would create significant risk
523 		 * of deadlock.
524 		 */
525 		UnlockRelationOid(relid, AccessShareLock);
526 	}
527 
528 	return vacrels;
529 }
530 
531 /*
532  * Construct a list of VacuumRelations for all vacuumable rels in
533  * the current database.  The list is built in vac_context.
534  */
535 static List *
get_all_vacuum_rels(void)536 get_all_vacuum_rels(void)
537 {
538 	List	   *vacrels = NIL;
539 	Relation	pgclass;
540 	HeapScanDesc scan;
541 	HeapTuple	tuple;
542 
543 	pgclass = heap_open(RelationRelationId, AccessShareLock);
544 
545 	scan = heap_beginscan_catalog(pgclass, 0, NULL);
546 
547 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
548 	{
549 		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
550 		MemoryContext oldcontext;
551 
552 		/*
553 		 * We include partitioned tables here; depending on which operation is
554 		 * to be performed, caller will decide whether to process or ignore
555 		 * them.
556 		 */
557 		if (classForm->relkind != RELKIND_RELATION &&
558 			classForm->relkind != RELKIND_MATVIEW &&
559 			classForm->relkind != RELKIND_PARTITIONED_TABLE)
560 			continue;
561 
562 		/*
563 		 * Build VacuumRelation(s) specifying the table OIDs to be processed.
564 		 * We omit a RangeVar since it wouldn't be appropriate to complain
565 		 * about failure to open one of these relations later.
566 		 */
567 		oldcontext = MemoryContextSwitchTo(vac_context);
568 		vacrels = lappend(vacrels, makeVacuumRelation(NULL,
569 													  HeapTupleGetOid(tuple),
570 													  NIL));
571 		MemoryContextSwitchTo(oldcontext);
572 	}
573 
574 	heap_endscan(scan);
575 	heap_close(pgclass, AccessShareLock);
576 
577 	return vacrels;
578 }
579 
580 /*
581  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
582  *
583  * The output parameters are:
584  * - oldestXmin is the cutoff value used to distinguish whether tuples are
585  *	 DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
586  * - freezeLimit is the Xid below which all Xids are replaced by
587  *	 FrozenTransactionId during vacuum.
588  * - xidFullScanLimit (computed from table_freeze_age parameter)
589  *	 represents a minimum Xid value; a table whose relfrozenxid is older than
590  *	 this will have a full-table vacuum applied to it, to freeze tuples across
591  *	 the whole table.  Vacuuming a table younger than this value can use a
592  *	 partial scan.
593  * - multiXactCutoff is the value below which all MultiXactIds are removed from
594  *	 Xmax.
595  * - mxactFullScanLimit is a value against which a table's relminmxid value is
596  *	 compared to produce a full-table vacuum, as with xidFullScanLimit.
597  *
598  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
599  * not interested.
600  */
601 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)602 vacuum_set_xid_limits(Relation rel,
603 					  int freeze_min_age,
604 					  int freeze_table_age,
605 					  int multixact_freeze_min_age,
606 					  int multixact_freeze_table_age,
607 					  TransactionId *oldestXmin,
608 					  TransactionId *freezeLimit,
609 					  TransactionId *xidFullScanLimit,
610 					  MultiXactId *multiXactCutoff,
611 					  MultiXactId *mxactFullScanLimit)
612 {
613 	int			freezemin;
614 	int			mxid_freezemin;
615 	int			effective_multixact_freeze_max_age;
616 	TransactionId limit;
617 	TransactionId safeLimit;
618 	MultiXactId oldestMxact;
619 	MultiXactId mxactLimit;
620 	MultiXactId safeMxactLimit;
621 
622 	/*
623 	 * We can always ignore processes running lazy vacuum.  This is because we
624 	 * use these values only for deciding which tuples we must keep in the
625 	 * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
626 	 * ignore it.  In theory it could be problematic to ignore lazy vacuums in
627 	 * a full vacuum, but keep in mind that only one vacuum process can be
628 	 * working on a particular table at any time, and that each vacuum is
629 	 * always an independent transaction.
630 	 */
631 	*oldestXmin =
632 		TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
633 
634 	Assert(TransactionIdIsNormal(*oldestXmin));
635 
636 	/*
637 	 * Determine the minimum freeze age to use: as specified by the caller, or
638 	 * vacuum_freeze_min_age, but in any case not more than half
639 	 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
640 	 * wraparound won't occur too frequently.
641 	 */
642 	freezemin = freeze_min_age;
643 	if (freezemin < 0)
644 		freezemin = vacuum_freeze_min_age;
645 	freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
646 	Assert(freezemin >= 0);
647 
648 	/*
649 	 * Compute the cutoff XID, being careful not to generate a "permanent" XID
650 	 */
651 	limit = *oldestXmin - freezemin;
652 	if (!TransactionIdIsNormal(limit))
653 		limit = FirstNormalTransactionId;
654 
655 	/*
656 	 * If oldestXmin is very far back (in practice, more than
657 	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
658 	 * freeze age of zero.
659 	 */
660 	safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
661 	if (!TransactionIdIsNormal(safeLimit))
662 		safeLimit = FirstNormalTransactionId;
663 
664 	if (TransactionIdPrecedes(limit, safeLimit))
665 	{
666 		ereport(WARNING,
667 				(errmsg("oldest xmin is far in the past"),
668 				 errhint("Close open transactions soon to avoid wraparound problems.\n"
669 						 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
670 		limit = *oldestXmin;
671 	}
672 
673 	*freezeLimit = limit;
674 
675 	/*
676 	 * Compute the multixact age for which freezing is urgent.  This is
677 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
678 	 * short of multixact member space.
679 	 */
680 	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
681 
682 	/*
683 	 * Determine the minimum multixact freeze age to use: as specified by
684 	 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
685 	 * than half effective_multixact_freeze_max_age, so that autovacuums to
686 	 * prevent MultiXact wraparound won't occur too frequently.
687 	 */
688 	mxid_freezemin = multixact_freeze_min_age;
689 	if (mxid_freezemin < 0)
690 		mxid_freezemin = vacuum_multixact_freeze_min_age;
691 	mxid_freezemin = Min(mxid_freezemin,
692 						 effective_multixact_freeze_max_age / 2);
693 	Assert(mxid_freezemin >= 0);
694 
695 	/* compute the cutoff multi, being careful to generate a valid value */
696 	oldestMxact = GetOldestMultiXactId();
697 	mxactLimit = oldestMxact - mxid_freezemin;
698 	if (mxactLimit < FirstMultiXactId)
699 		mxactLimit = FirstMultiXactId;
700 
701 	safeMxactLimit =
702 		ReadNextMultiXactId() - effective_multixact_freeze_max_age;
703 	if (safeMxactLimit < FirstMultiXactId)
704 		safeMxactLimit = FirstMultiXactId;
705 
706 	if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
707 	{
708 		ereport(WARNING,
709 				(errmsg("oldest multixact is far in the past"),
710 				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
711 		/* Use the safe limit, unless an older mxact is still running */
712 		if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
713 			mxactLimit = oldestMxact;
714 		else
715 			mxactLimit = safeMxactLimit;
716 	}
717 
718 	*multiXactCutoff = mxactLimit;
719 
720 	if (xidFullScanLimit != NULL)
721 	{
722 		int			freezetable;
723 
724 		Assert(mxactFullScanLimit != NULL);
725 
726 		/*
727 		 * Determine the table freeze age to use: as specified by the caller,
728 		 * or vacuum_freeze_table_age, but in any case not more than
729 		 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
730 		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
731 		 * before anti-wraparound autovacuum is launched.
732 		 */
733 		freezetable = freeze_table_age;
734 		if (freezetable < 0)
735 			freezetable = vacuum_freeze_table_age;
736 		freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
737 		Assert(freezetable >= 0);
738 
739 		/*
740 		 * Compute XID limit causing a full-table vacuum, being careful not to
741 		 * generate a "permanent" XID.
742 		 */
743 		limit = ReadNewTransactionId() - freezetable;
744 		if (!TransactionIdIsNormal(limit))
745 			limit = FirstNormalTransactionId;
746 
747 		*xidFullScanLimit = limit;
748 
749 		/*
750 		 * Similar to the above, determine the table freeze age to use for
751 		 * multixacts: as specified by the caller, or
752 		 * vacuum_multixact_freeze_table_age, but in any case not more than
753 		 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
754 		 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
755 		 * freeze multixacts before anti-wraparound autovacuum is launched.
756 		 */
757 		freezetable = multixact_freeze_table_age;
758 		if (freezetable < 0)
759 			freezetable = vacuum_multixact_freeze_table_age;
760 		freezetable = Min(freezetable,
761 						  effective_multixact_freeze_max_age * 0.95);
762 		Assert(freezetable >= 0);
763 
764 		/*
765 		 * Compute MultiXact limit causing a full-table vacuum, being careful
766 		 * to generate a valid MultiXact value.
767 		 */
768 		mxactLimit = ReadNextMultiXactId() - freezetable;
769 		if (mxactLimit < FirstMultiXactId)
770 			mxactLimit = FirstMultiXactId;
771 
772 		*mxactFullScanLimit = mxactLimit;
773 	}
774 	else
775 	{
776 		Assert(mxactFullScanLimit == NULL);
777 	}
778 }
779 
780 /*
781  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
782  *
783  *		If we scanned the whole relation then we should just use the count of
784  *		live tuples seen; but if we did not, we should not blindly extrapolate
785  *		from that number, since VACUUM may have scanned a quite nonrandom
786  *		subset of the table.  When we have only partial information, we take
787  *		the old value of pg_class.reltuples as a measurement of the
788  *		tuple density in the unscanned pages.
789  *
790  *		Note: scanned_tuples should count only *live* tuples, since
791  *		pg_class.reltuples is defined that way.
792  */
793 double
vac_estimate_reltuples(Relation relation,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)794 vac_estimate_reltuples(Relation relation,
795 					   BlockNumber total_pages,
796 					   BlockNumber scanned_pages,
797 					   double scanned_tuples)
798 {
799 	BlockNumber old_rel_pages = relation->rd_rel->relpages;
800 	double		old_rel_tuples = relation->rd_rel->reltuples;
801 	double		old_density;
802 	double		unscanned_pages;
803 	double		total_tuples;
804 
805 	/* If we did scan the whole table, just use the count as-is */
806 	if (scanned_pages >= total_pages)
807 		return scanned_tuples;
808 
809 	/*
810 	 * If scanned_pages is zero but total_pages isn't, keep the existing value
811 	 * of reltuples.  (Note: callers should avoid updating the pg_class
812 	 * statistics in this situation, since no new information has been
813 	 * provided.)
814 	 */
815 	if (scanned_pages == 0)
816 		return old_rel_tuples;
817 
818 	/*
819 	 * If old value of relpages is zero, old density is indeterminate; we
820 	 * can't do much except scale up scanned_tuples to match total_pages.
821 	 */
822 	if (old_rel_pages == 0)
823 		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
824 
825 	/*
826 	 * Okay, we've covered the corner cases.  The normal calculation is to
827 	 * convert the old measurement to a density (tuples per page), then
828 	 * estimate the number of tuples in the unscanned pages using that figure,
829 	 * and finally add on the number of tuples in the scanned pages.
830 	 */
831 	old_density = old_rel_tuples / old_rel_pages;
832 	unscanned_pages = (double) total_pages - (double) scanned_pages;
833 	total_tuples = old_density * unscanned_pages + scanned_tuples;
834 	return floor(total_tuples + 0.5);
835 }
836 
837 
838 /*
839  *	vac_update_relstats() -- update statistics for one relation
840  *
841  *		Update the whole-relation statistics that are kept in its pg_class
842  *		row.  There are additional stats that will be updated if we are
843  *		doing ANALYZE, but we always update these stats.  This routine works
844  *		for both index and heap relation entries in pg_class.
845  *
846  *		We violate transaction semantics here by overwriting the rel's
847  *		existing pg_class tuple with the new values.  This is reasonably
848  *		safe as long as we're sure that the new values are correct whether or
849  *		not this transaction commits.  The reason for doing this is that if
850  *		we updated these tuples in the usual way, vacuuming pg_class itself
851  *		wouldn't work very well --- by the time we got done with a vacuum
852  *		cycle, most of the tuples in pg_class would've been obsoleted.  Of
853  *		course, this only works for fixed-size not-null columns, but these are.
854  *
855  *		Another reason for doing it this way is that when we are in a lazy
856  *		VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
857  *		Somebody vacuuming pg_class might think they could delete a tuple
858  *		marked with xmin = our xid.
859  *
860  *		In addition to fundamentally nontransactional statistics such as
861  *		relpages and relallvisible, we try to maintain certain lazily-updated
862  *		DDL flags such as relhasindex, by clearing them if no longer correct.
863  *		It's safe to do this in VACUUM, which can't run in parallel with
864  *		CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
865  *		However, it's *not* safe to do it in an ANALYZE that's within an
866  *		outer transaction, because for example the current transaction might
867  *		have dropped the last index; then we'd think relhasindex should be
868  *		cleared, but if the transaction later rolls back this would be wrong.
869  *		So we refrain from updating the DDL flags if we're inside an outer
870  *		transaction.  This is OK since postponing the flag maintenance is
871  *		always allowable.
872  *
873  *		Note: num_tuples should count only *live* tuples, since
874  *		pg_class.reltuples is defined that way.
875  *
876  *		This routine is shared by VACUUM and ANALYZE.
877  */
878 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)879 vac_update_relstats(Relation relation,
880 					BlockNumber num_pages, double num_tuples,
881 					BlockNumber num_all_visible_pages,
882 					bool hasindex, TransactionId frozenxid,
883 					MultiXactId minmulti,
884 					bool in_outer_xact)
885 {
886 	Oid			relid = RelationGetRelid(relation);
887 	Relation	rd;
888 	HeapTuple	ctup;
889 	Form_pg_class pgcform;
890 	bool		dirty;
891 
892 	rd = heap_open(RelationRelationId, RowExclusiveLock);
893 
894 	/* Fetch a copy of the tuple to scribble on */
895 	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
896 	if (!HeapTupleIsValid(ctup))
897 		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
898 			 relid);
899 	pgcform = (Form_pg_class) GETSTRUCT(ctup);
900 
901 	/* Apply statistical updates, if any, to copied tuple */
902 
903 	dirty = false;
904 	if (pgcform->relpages != (int32) num_pages)
905 	{
906 		pgcform->relpages = (int32) num_pages;
907 		dirty = true;
908 	}
909 	if (pgcform->reltuples != (float4) num_tuples)
910 	{
911 		pgcform->reltuples = (float4) num_tuples;
912 		dirty = true;
913 	}
914 	if (pgcform->relallvisible != (int32) num_all_visible_pages)
915 	{
916 		pgcform->relallvisible = (int32) num_all_visible_pages;
917 		dirty = true;
918 	}
919 
920 	/* Apply DDL updates, but not inside an outer transaction (see above) */
921 
922 	if (!in_outer_xact)
923 	{
924 		/*
925 		 * If we didn't find any indexes, reset relhasindex.
926 		 */
927 		if (pgcform->relhasindex && !hasindex)
928 		{
929 			pgcform->relhasindex = false;
930 			dirty = true;
931 		}
932 
933 		/* We also clear relhasrules and relhastriggers if needed */
934 		if (pgcform->relhasrules && relation->rd_rules == NULL)
935 		{
936 			pgcform->relhasrules = false;
937 			dirty = true;
938 		}
939 		if (pgcform->relhastriggers && relation->trigdesc == NULL)
940 		{
941 			pgcform->relhastriggers = false;
942 			dirty = true;
943 		}
944 	}
945 
946 	/*
947 	 * Update relfrozenxid, unless caller passed InvalidTransactionId
948 	 * indicating it has no new data.
949 	 *
950 	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
951 	 * working correctly, the only way the new frozenxid could be older would
952 	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
953 	 * which case we don't want to forget the work it already did.  However,
954 	 * if the stored relfrozenxid is "in the future", then it must be corrupt
955 	 * and it seems best to overwrite it with the cutoff we used this time.
956 	 * This should match vac_update_datfrozenxid() concerning what we consider
957 	 * to be "in the future".
958 	 */
959 	if (TransactionIdIsNormal(frozenxid) &&
960 		pgcform->relfrozenxid != frozenxid &&
961 		(TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
962 		 TransactionIdPrecedes(ReadNewTransactionId(),
963 							   pgcform->relfrozenxid)))
964 	{
965 		pgcform->relfrozenxid = frozenxid;
966 		dirty = true;
967 	}
968 
969 	/* Similarly for relminmxid */
970 	if (MultiXactIdIsValid(minmulti) &&
971 		pgcform->relminmxid != minmulti &&
972 		(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
973 		 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
974 	{
975 		pgcform->relminmxid = minmulti;
976 		dirty = true;
977 	}
978 
979 	/* If anything changed, write out the tuple. */
980 	if (dirty)
981 		heap_inplace_update(rd, ctup);
982 
983 	heap_close(rd, RowExclusiveLock);
984 }
985 
986 
987 /*
988  *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
989  *
990  *		Update pg_database's datfrozenxid entry for our database to be the
991  *		minimum of the pg_class.relfrozenxid values.
992  *
993  *		Similarly, update our datminmxid to be the minimum of the
994  *		pg_class.relminmxid values.
995  *
996  *		If we are able to advance either pg_database value, also try to
997  *		truncate pg_xact and pg_multixact.
998  *
999  *		We violate transaction semantics here by overwriting the database's
1000  *		existing pg_database tuple with the new values.  This is reasonably
1001  *		safe since the new values are correct whether or not this transaction
1002  *		commits.  As with vac_update_relstats, this avoids leaving dead tuples
1003  *		behind after a VACUUM.
1004  */
1005 void
vac_update_datfrozenxid(void)1006 vac_update_datfrozenxid(void)
1007 {
1008 	HeapTuple	tuple;
1009 	Form_pg_database dbform;
1010 	Relation	relation;
1011 	SysScanDesc scan;
1012 	HeapTuple	classTup;
1013 	TransactionId newFrozenXid;
1014 	MultiXactId newMinMulti;
1015 	TransactionId lastSaneFrozenXid;
1016 	MultiXactId lastSaneMinMulti;
1017 	bool		bogus = false;
1018 	bool		dirty = false;
1019 
1020 	/*
1021 	 * Restrict this task to one backend per database.  This avoids race
1022 	 * conditions that would move datfrozenxid or datminmxid backward.  It
1023 	 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1024 	 * datfrozenxid passed to an earlier vac_truncate_clog() call.
1025 	 */
1026 	LockDatabaseFrozenIds(ExclusiveLock);
1027 
1028 	/*
1029 	 * Initialize the "min" calculation with GetOldestXmin, which is a
1030 	 * reasonable approximation to the minimum relfrozenxid for not-yet-
1031 	 * committed pg_class entries for new tables; see AddNewRelationTuple().
1032 	 * So we cannot produce a wrong minimum by starting with this.
1033 	 */
1034 	newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
1035 
1036 	/*
1037 	 * Similarly, initialize the MultiXact "min" with the value that would be
1038 	 * used on pg_class for new tables.  See AddNewRelationTuple().
1039 	 */
1040 	newMinMulti = GetOldestMultiXactId();
1041 
1042 	/*
1043 	 * Identify the latest relfrozenxid and relminmxid values that we could
1044 	 * validly see during the scan.  These are conservative values, but it's
1045 	 * not really worth trying to be more exact.
1046 	 */
1047 	lastSaneFrozenXid = ReadNewTransactionId();
1048 	lastSaneMinMulti = ReadNextMultiXactId();
1049 
1050 	/*
1051 	 * We must seqscan pg_class to find the minimum Xid, because there is no
1052 	 * index that can help us here.
1053 	 */
1054 	relation = heap_open(RelationRelationId, AccessShareLock);
1055 
1056 	scan = systable_beginscan(relation, InvalidOid, false,
1057 							  NULL, 0, NULL);
1058 
1059 	while ((classTup = systable_getnext(scan)) != NULL)
1060 	{
1061 		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1062 
1063 		/*
1064 		 * Only consider relations able to hold unfrozen XIDs (anything else
1065 		 * should have InvalidTransactionId in relfrozenxid anyway.)
1066 		 */
1067 		if (classForm->relkind != RELKIND_RELATION &&
1068 			classForm->relkind != RELKIND_MATVIEW &&
1069 			classForm->relkind != RELKIND_TOASTVALUE)
1070 			continue;
1071 
1072 		Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1073 		Assert(MultiXactIdIsValid(classForm->relminmxid));
1074 
1075 		/*
1076 		 * If things are working properly, no relation should have a
1077 		 * relfrozenxid or relminmxid that is "in the future".  However, such
1078 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
1079 		 * see any entries that are "in the future", chicken out and don't do
1080 		 * anything.  This ensures we won't truncate clog before those
1081 		 * relations have been scanned and cleaned up.
1082 		 */
1083 		if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1084 			MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1085 		{
1086 			bogus = true;
1087 			break;
1088 		}
1089 
1090 		if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1091 			newFrozenXid = classForm->relfrozenxid;
1092 
1093 		if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1094 			newMinMulti = classForm->relminmxid;
1095 	}
1096 
1097 	/* we're done with pg_class */
1098 	systable_endscan(scan);
1099 	heap_close(relation, AccessShareLock);
1100 
1101 	/* chicken out if bogus data found */
1102 	if (bogus)
1103 		return;
1104 
1105 	Assert(TransactionIdIsNormal(newFrozenXid));
1106 	Assert(MultiXactIdIsValid(newMinMulti));
1107 
1108 	/* Now fetch the pg_database tuple we need to update. */
1109 	relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1110 
1111 	/* Fetch a copy of the tuple to scribble on */
1112 	tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1113 	if (!HeapTupleIsValid(tuple))
1114 		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1115 	dbform = (Form_pg_database) GETSTRUCT(tuple);
1116 
1117 	/*
1118 	 * As in vac_update_relstats(), we ordinarily don't want to let
1119 	 * datfrozenxid go backward; but if it's "in the future" then it must be
1120 	 * corrupt and it seems best to overwrite it.
1121 	 */
1122 	if (dbform->datfrozenxid != newFrozenXid &&
1123 		(TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1124 		 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1125 	{
1126 		dbform->datfrozenxid = newFrozenXid;
1127 		dirty = true;
1128 	}
1129 	else
1130 		newFrozenXid = dbform->datfrozenxid;
1131 
1132 	/* Ditto for datminmxid */
1133 	if (dbform->datminmxid != newMinMulti &&
1134 		(MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1135 		 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1136 	{
1137 		dbform->datminmxid = newMinMulti;
1138 		dirty = true;
1139 	}
1140 	else
1141 		newMinMulti = dbform->datminmxid;
1142 
1143 	if (dirty)
1144 		heap_inplace_update(relation, tuple);
1145 
1146 	heap_freetuple(tuple);
1147 	heap_close(relation, RowExclusiveLock);
1148 
1149 	/*
1150 	 * If we were able to advance datfrozenxid or datminmxid, see if we can
1151 	 * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1152 	 * XID-wrap-limit info is stale, since this action will update that too.
1153 	 */
1154 	if (dirty || ForceTransactionIdLimitUpdate())
1155 		vac_truncate_clog(newFrozenXid, newMinMulti,
1156 						  lastSaneFrozenXid, lastSaneMinMulti);
1157 }
1158 
1159 
1160 /*
1161  *	vac_truncate_clog() -- attempt to truncate the commit log
1162  *
1163  *		Scan pg_database to determine the system-wide oldest datfrozenxid,
1164  *		and use it to truncate the transaction commit log (pg_xact).
1165  *		Also update the XID wrap limit info maintained by varsup.c.
1166  *		Likewise for datminmxid.
1167  *
1168  *		The passed frozenXID and minMulti are the updated values for my own
1169  *		pg_database entry. They're used to initialize the "min" calculations.
1170  *		The caller also passes the "last sane" XID and MXID, since it has
1171  *		those at hand already.
1172  *
1173  *		This routine is only invoked when we've managed to change our
1174  *		DB's datfrozenxid/datminmxid values, or we found that the shared
1175  *		XID-wrap-limit info is stale.
1176  */
1177 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1178 vac_truncate_clog(TransactionId frozenXID,
1179 				  MultiXactId minMulti,
1180 				  TransactionId lastSaneFrozenXid,
1181 				  MultiXactId lastSaneMinMulti)
1182 {
1183 	TransactionId nextXID = ReadNewTransactionId();
1184 	Relation	relation;
1185 	HeapScanDesc scan;
1186 	HeapTuple	tuple;
1187 	Oid			oldestxid_datoid;
1188 	Oid			minmulti_datoid;
1189 	bool		bogus = false;
1190 	bool		frozenAlreadyWrapped = false;
1191 
1192 	/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1193 	LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1194 
1195 	/* init oldest datoids to sync with my frozenXID/minMulti values */
1196 	oldestxid_datoid = MyDatabaseId;
1197 	minmulti_datoid = MyDatabaseId;
1198 
1199 	/*
1200 	 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1201 	 *
1202 	 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1203 	 * the values could change while we look at them.  Fetch each one just
1204 	 * once to ensure sane behavior of the comparison logic.  (Here, as in
1205 	 * many other places, we assume that fetching or updating an XID in shared
1206 	 * storage is atomic.)
1207 	 *
1208 	 * Note: we need not worry about a race condition with new entries being
1209 	 * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1210 	 * existing DB's datfrozenxid, and that source DB cannot be ours because
1211 	 * of the interlock against copying a DB containing an active backend.
1212 	 * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1213 	 * concurrently modify the datfrozenxid's of different databases, the
1214 	 * worst possible outcome is that pg_xact is not truncated as aggressively
1215 	 * as it could be.
1216 	 */
1217 	relation = heap_open(DatabaseRelationId, AccessShareLock);
1218 
1219 	scan = heap_beginscan_catalog(relation, 0, NULL);
1220 
1221 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1222 	{
1223 		volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1224 		TransactionId datfrozenxid = dbform->datfrozenxid;
1225 		TransactionId datminmxid = dbform->datminmxid;
1226 
1227 		Assert(TransactionIdIsNormal(datfrozenxid));
1228 		Assert(MultiXactIdIsValid(datminmxid));
1229 
1230 		/*
1231 		 * If things are working properly, no database should have a
1232 		 * datfrozenxid or datminmxid that is "in the future".  However, such
1233 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
1234 		 * see any entries that are "in the future", chicken out and don't do
1235 		 * anything.  This ensures we won't truncate clog before those
1236 		 * databases have been scanned and cleaned up.  (We will issue the
1237 		 * "already wrapped" warning if appropriate, though.)
1238 		 */
1239 		if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1240 			MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1241 			bogus = true;
1242 
1243 		if (TransactionIdPrecedes(nextXID, datfrozenxid))
1244 			frozenAlreadyWrapped = true;
1245 		else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1246 		{
1247 			frozenXID = datfrozenxid;
1248 			oldestxid_datoid = HeapTupleGetOid(tuple);
1249 		}
1250 
1251 		if (MultiXactIdPrecedes(datminmxid, minMulti))
1252 		{
1253 			minMulti = datminmxid;
1254 			minmulti_datoid = HeapTupleGetOid(tuple);
1255 		}
1256 	}
1257 
1258 	heap_endscan(scan);
1259 
1260 	heap_close(relation, AccessShareLock);
1261 
1262 	/*
1263 	 * Do not truncate CLOG if we seem to have suffered wraparound already;
1264 	 * the computed minimum XID might be bogus.  This case should now be
1265 	 * impossible due to the defenses in GetNewTransactionId, but we keep the
1266 	 * test anyway.
1267 	 */
1268 	if (frozenAlreadyWrapped)
1269 	{
1270 		ereport(WARNING,
1271 				(errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1272 				 errdetail("You might have already suffered transaction-wraparound data loss.")));
1273 		return;
1274 	}
1275 
1276 	/* chicken out if data is bogus in any other way */
1277 	if (bogus)
1278 		return;
1279 
1280 	/*
1281 	 * Advance the oldest value for commit timestamps before truncating, so
1282 	 * that if a user requests a timestamp for a transaction we're truncating
1283 	 * away right after this point, they get NULL instead of an ugly "file not
1284 	 * found" error from slru.c.  This doesn't matter for xact/multixact
1285 	 * because they are not subject to arbitrary lookups from users.
1286 	 */
1287 	AdvanceOldestCommitTsXid(frozenXID);
1288 
1289 	/*
1290 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1291 	 */
1292 	TruncateCLOG(frozenXID, oldestxid_datoid);
1293 	TruncateCommitTs(frozenXID);
1294 	TruncateMultiXact(minMulti, minmulti_datoid);
1295 
1296 	/*
1297 	 * Update the wrap limit for GetNewTransactionId and creation of new
1298 	 * MultiXactIds.  Note: these functions will also signal the postmaster
1299 	 * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1300 	 * signalling twice?
1301 	 */
1302 	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1303 	SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1304 
1305 	LWLockRelease(WrapLimitsVacuumLock);
1306 }
1307 
1308 
1309 /*
1310  *	vacuum_rel() -- vacuum one heap relation
1311  *
1312  *		relid identifies the relation to vacuum.  If relation is supplied,
1313  *		use the name therein for reporting any failure to open/lock the rel;
1314  *		do not use it once we've successfully opened the rel, since it might
1315  *		be stale.
1316  *
1317  *		Returns true if it's okay to proceed with a requested ANALYZE
1318  *		operation on this table.
1319  *
1320  *		Doing one heap at a time incurs extra overhead, since we need to
1321  *		check that the heap exists again just before we vacuum it.  The
1322  *		reason that we do this is so that vacuuming can be spread across
1323  *		many small transactions.  Otherwise, two-phase locking would require
1324  *		us to lock the entire database during one pass of the vacuum cleaner.
1325  *
1326  *		At entry and exit, we are not inside a transaction.
1327  */
1328 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1329 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1330 {
1331 	LOCKMODE	lmode;
1332 	Relation	onerel;
1333 	LockRelId	onerelid;
1334 	Oid			toast_relid;
1335 	Oid			save_userid;
1336 	int			save_sec_context;
1337 	int			save_nestlevel;
1338 	bool		rel_lock = true;
1339 
1340 	Assert(params != NULL);
1341 
1342 	/* Begin a transaction for vacuuming this relation */
1343 	StartTransactionCommand();
1344 
1345 	/*
1346 	 * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1347 	 * ensures that RecentGlobalXmin is kept truly recent.
1348 	 */
1349 	PushActiveSnapshot(GetTransactionSnapshot());
1350 
1351 	if (!(options & VACOPT_FULL))
1352 	{
1353 		/*
1354 		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1355 		 * other concurrent VACUUMs know that they can ignore this one while
1356 		 * determining their OldestXmin.  (The reason we don't set it during a
1357 		 * full VACUUM is exactly that we may have to run user-defined
1358 		 * functions for functional indexes, and we want to make sure that if
1359 		 * they use the snapshot set above, any tuples it requires can't get
1360 		 * removed from other tables.  An index function that depends on the
1361 		 * contents of other tables is arguably broken, but we won't break it
1362 		 * here by violating transaction semantics.)
1363 		 *
1364 		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1365 		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1366 		 * in an emergency.
1367 		 *
1368 		 * Note: these flags remain set until CommitTransaction or
1369 		 * AbortTransaction.  We don't want to clear them until we reset
1370 		 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1371 		 * which is probably Not Good.
1372 		 */
1373 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1374 		MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1375 		if (params->is_wraparound)
1376 			MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1377 		LWLockRelease(ProcArrayLock);
1378 	}
1379 
1380 	/*
1381 	 * Check for user-requested abort.  Note we want this to be inside a
1382 	 * transaction, so xact.c doesn't issue useless WARNING.
1383 	 */
1384 	CHECK_FOR_INTERRUPTS();
1385 
1386 	/*
1387 	 * Determine the type of lock we want --- hard exclusive lock for a FULL
1388 	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1389 	 * way, we can be sure that no other backend is vacuuming the same table.
1390 	 */
1391 	lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1392 
1393 	/*
1394 	 * Open the relation and get the appropriate lock on it.
1395 	 *
1396 	 * There's a race condition here: the rel may have gone away since the
1397 	 * last time we saw it.  If so, we don't need to vacuum it.
1398 	 *
1399 	 * If we've been asked not to wait for the relation lock, acquire it first
1400 	 * in non-blocking mode, before calling try_relation_open().
1401 	 */
1402 	if (!(options & VACOPT_NOWAIT))
1403 		onerel = try_relation_open(relid, lmode);
1404 	else if (ConditionalLockRelationOid(relid, lmode))
1405 		onerel = try_relation_open(relid, NoLock);
1406 	else
1407 	{
1408 		onerel = NULL;
1409 		rel_lock = false;
1410 	}
1411 
1412 	/*
1413 	 * If we failed to open or lock the relation, emit a log message before
1414 	 * exiting.
1415 	 */
1416 	if (!onerel)
1417 	{
1418 		int			elevel = 0;
1419 
1420 		/*
1421 		 * Determine the log level.
1422 		 *
1423 		 * If the RangeVar is not defined, we do not have enough information
1424 		 * to provide a meaningful log statement.  Chances are that
1425 		 * vacuum_rel's caller has intentionally not provided this information
1426 		 * so that this logging is skipped, anyway.
1427 		 *
1428 		 * Otherwise, for autovacuum logs, we emit a LOG if
1429 		 * log_autovacuum_min_duration is not disabled.  For manual VACUUM, we
1430 		 * emit a WARNING to match the log statements in the permission
1431 		 * checks.
1432 		 */
1433 		if (relation != NULL)
1434 		{
1435 			if (!IsAutoVacuumWorkerProcess())
1436 				elevel = WARNING;
1437 			else if (params->log_min_duration >= 0)
1438 				elevel = LOG;
1439 		}
1440 
1441 		if (elevel != 0)
1442 		{
1443 			if (!rel_lock)
1444 				ereport(elevel,
1445 						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1446 						 errmsg("skipping vacuum of \"%s\" --- lock not available",
1447 								relation->relname)));
1448 			else
1449 				ereport(elevel,
1450 						(errcode(ERRCODE_UNDEFINED_TABLE),
1451 						 errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
1452 								relation->relname)));
1453 		}
1454 
1455 		PopActiveSnapshot();
1456 		CommitTransactionCommand();
1457 		return false;
1458 	}
1459 
1460 	/*
1461 	 * Check permissions.
1462 	 *
1463 	 * We allow the user to vacuum a table if he is superuser, the table
1464 	 * owner, or the database owner (but in the latter case, only if it's not
1465 	 * a shared relation).  pg_class_ownercheck includes the superuser case.
1466 	 *
1467 	 * Note we choose to treat permissions failure as a WARNING and keep
1468 	 * trying to vacuum the rest of the DB --- is this appropriate?
1469 	 */
1470 	if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1471 		  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1472 	{
1473 		if (onerel->rd_rel->relisshared)
1474 			ereport(WARNING,
1475 					(errmsg("skipping \"%s\" --- only superuser can vacuum it",
1476 							RelationGetRelationName(onerel))));
1477 		else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1478 			ereport(WARNING,
1479 					(errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1480 							RelationGetRelationName(onerel))));
1481 		else
1482 			ereport(WARNING,
1483 					(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1484 							RelationGetRelationName(onerel))));
1485 		relation_close(onerel, lmode);
1486 		PopActiveSnapshot();
1487 		CommitTransactionCommand();
1488 		return false;
1489 	}
1490 
1491 	/*
1492 	 * Check that it's of a vacuumable relkind.
1493 	 */
1494 	if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1495 		onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1496 		onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1497 		onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1498 	{
1499 		ereport(WARNING,
1500 				(errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1501 						RelationGetRelationName(onerel))));
1502 		relation_close(onerel, lmode);
1503 		PopActiveSnapshot();
1504 		CommitTransactionCommand();
1505 		return false;
1506 	}
1507 
1508 	/*
1509 	 * Silently ignore tables that are temp tables of other backends ---
1510 	 * trying to vacuum these will lead to great unhappiness, since their
1511 	 * contents are probably not up-to-date on disk.  (We don't throw a
1512 	 * warning here; it would just lead to chatter during a database-wide
1513 	 * VACUUM.)
1514 	 */
1515 	if (RELATION_IS_OTHER_TEMP(onerel))
1516 	{
1517 		relation_close(onerel, lmode);
1518 		PopActiveSnapshot();
1519 		CommitTransactionCommand();
1520 		return false;
1521 	}
1522 
1523 	/*
1524 	 * Silently ignore partitioned tables as there is no work to be done.  The
1525 	 * useful work is on their child partitions, which have been queued up for
1526 	 * us separately.
1527 	 */
1528 	if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1529 	{
1530 		relation_close(onerel, lmode);
1531 		PopActiveSnapshot();
1532 		CommitTransactionCommand();
1533 		/* It's OK to proceed with ANALYZE on this table */
1534 		return true;
1535 	}
1536 
1537 	/*
1538 	 * Get a session-level lock too. This will protect our access to the
1539 	 * relation across multiple transactions, so that we can vacuum the
1540 	 * relation's TOAST table (if any) secure in the knowledge that no one is
1541 	 * deleting the parent relation.
1542 	 *
1543 	 * NOTE: this cannot block, even if someone else is waiting for access,
1544 	 * because the lock manager knows that both lock requests are from the
1545 	 * same process.
1546 	 */
1547 	onerelid = onerel->rd_lockInfo.lockRelId;
1548 	LockRelationIdForSession(&onerelid, lmode);
1549 
1550 	/*
1551 	 * Remember the relation's TOAST relation for later, if the caller asked
1552 	 * us to process it.  In VACUUM FULL, though, the toast table is
1553 	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1554 	 */
1555 	if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1556 		toast_relid = onerel->rd_rel->reltoastrelid;
1557 	else
1558 		toast_relid = InvalidOid;
1559 
1560 	/*
1561 	 * Switch to the table owner's userid, so that any index functions are run
1562 	 * as that user.  Also lock down security-restricted operations and
1563 	 * arrange to make GUC variable changes local to this command. (This is
1564 	 * unnecessary, but harmless, for lazy VACUUM.)
1565 	 */
1566 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
1567 	SetUserIdAndSecContext(onerel->rd_rel->relowner,
1568 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
1569 	save_nestlevel = NewGUCNestLevel();
1570 
1571 	/*
1572 	 * Do the actual work --- either FULL or "lazy" vacuum
1573 	 */
1574 	if (options & VACOPT_FULL)
1575 	{
1576 		/* close relation before vacuuming, but hold lock until commit */
1577 		relation_close(onerel, NoLock);
1578 		onerel = NULL;
1579 
1580 		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1581 		cluster_rel(relid, InvalidOid, false,
1582 					(options & VACOPT_VERBOSE) != 0);
1583 	}
1584 	else
1585 		lazy_vacuum_rel(onerel, options, params, vac_strategy);
1586 
1587 	/* Roll back any GUC changes executed by index functions */
1588 	AtEOXact_GUC(false, save_nestlevel);
1589 
1590 	/* Restore userid and security context */
1591 	SetUserIdAndSecContext(save_userid, save_sec_context);
1592 
1593 	/* all done with this class, but hold lock until commit */
1594 	if (onerel)
1595 		relation_close(onerel, NoLock);
1596 
1597 	/*
1598 	 * Complete the transaction and free all temporary memory used.
1599 	 */
1600 	PopActiveSnapshot();
1601 	CommitTransactionCommand();
1602 
1603 	/*
1604 	 * If the relation has a secondary toast rel, vacuum that too while we
1605 	 * still hold the session lock on the master table.  Note however that
1606 	 * "analyze" will not get done on the toast table.  This is good, because
1607 	 * the toaster always uses hardcoded index access and statistics are
1608 	 * totally unimportant for toast relations.
1609 	 */
1610 	if (toast_relid != InvalidOid)
1611 		vacuum_rel(toast_relid, NULL, options, params);
1612 
1613 	/*
1614 	 * Now release the session-level lock on the master table.
1615 	 */
1616 	UnlockRelationIdForSession(&onerelid, lmode);
1617 
1618 	/* Report that we really did it. */
1619 	return true;
1620 }
1621 
1622 
1623 /*
1624  * Open all the vacuumable indexes of the given relation, obtaining the
1625  * specified kind of lock on each.  Return an array of Relation pointers for
1626  * the indexes into *Irel, and the number of indexes into *nindexes.
1627  *
1628  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1629  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1630  * execution, and what we have is too corrupt to be processable.  We will
1631  * vacuum even if the index isn't indisvalid; this is important because in a
1632  * unique index, uniqueness checks will be performed anyway and had better not
1633  * hit dangling index pointers.
1634  */
1635 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1636 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1637 				 int *nindexes, Relation **Irel)
1638 {
1639 	List	   *indexoidlist;
1640 	ListCell   *indexoidscan;
1641 	int			i;
1642 
1643 	Assert(lockmode != NoLock);
1644 
1645 	indexoidlist = RelationGetIndexList(relation);
1646 
1647 	/* allocate enough memory for all indexes */
1648 	i = list_length(indexoidlist);
1649 
1650 	if (i > 0)
1651 		*Irel = (Relation *) palloc(i * sizeof(Relation));
1652 	else
1653 		*Irel = NULL;
1654 
1655 	/* collect just the ready indexes */
1656 	i = 0;
1657 	foreach(indexoidscan, indexoidlist)
1658 	{
1659 		Oid			indexoid = lfirst_oid(indexoidscan);
1660 		Relation	indrel;
1661 
1662 		indrel = index_open(indexoid, lockmode);
1663 		if (IndexIsReady(indrel->rd_index))
1664 			(*Irel)[i++] = indrel;
1665 		else
1666 			index_close(indrel, lockmode);
1667 	}
1668 
1669 	*nindexes = i;
1670 
1671 	list_free(indexoidlist);
1672 }
1673 
1674 /*
1675  * Release the resources acquired by vac_open_indexes.  Optionally release
1676  * the locks (say NoLock to keep 'em).
1677  */
1678 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1679 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1680 {
1681 	if (Irel == NULL)
1682 		return;
1683 
1684 	while (nindexes--)
1685 	{
1686 		Relation	ind = Irel[nindexes];
1687 
1688 		index_close(ind, lockmode);
1689 	}
1690 	pfree(Irel);
1691 }
1692 
1693 /*
1694  * vacuum_delay_point --- check for interrupts and cost-based delay.
1695  *
1696  * This should be called in each major loop of VACUUM processing,
1697  * typically once per page processed.
1698  */
1699 void
vacuum_delay_point(void)1700 vacuum_delay_point(void)
1701 {
1702 	/* Always check for interrupts */
1703 	CHECK_FOR_INTERRUPTS();
1704 
1705 	/* Nap if appropriate */
1706 	if (VacuumCostActive && !InterruptPending &&
1707 		VacuumCostBalance >= VacuumCostLimit)
1708 	{
1709 		int			msec;
1710 
1711 		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1712 		if (msec > VacuumCostDelay * 4)
1713 			msec = VacuumCostDelay * 4;
1714 
1715 		pg_usleep(msec * 1000L);
1716 
1717 		VacuumCostBalance = 0;
1718 
1719 		/* update balance values for workers */
1720 		AutoVacuumUpdateDelay();
1721 
1722 		/* Might have gotten an interrupt while sleeping */
1723 		CHECK_FOR_INTERRUPTS();
1724 	}
1725 }
1726