1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *	  The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  *	  src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_inherits_fn.h"
36 #include "catalog/pg_namespace.h"
37 #include "commands/cluster.h"
38 #include "commands/vacuum.h"
39 #include "miscadmin.h"
40 #include "pgstat.h"
41 #include "postmaster/autovacuum.h"
42 #include "storage/bufmgr.h"
43 #include "storage/lmgr.h"
44 #include "storage/proc.h"
45 #include "storage/procarray.h"
46 #include "utils/acl.h"
47 #include "utils/fmgroids.h"
48 #include "utils/guc.h"
49 #include "utils/memutils.h"
50 #include "utils/snapmgr.h"
51 #include "utils/syscache.h"
52 #include "utils/tqual.h"
53 
54 
55 /*
56  * GUC parameters
57  */
58 int			vacuum_freeze_min_age;
59 int			vacuum_freeze_table_age;
60 int			vacuum_multixact_freeze_min_age;
61 int			vacuum_multixact_freeze_table_age;
62 
63 
64 /* A few variables that don't seem worth passing around as parameters */
65 static MemoryContext vac_context = NULL;
66 static BufferAccessStrategy vac_strategy;
67 
68 
69 /* non-export function prototypes */
70 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
71 static void vac_truncate_clog(TransactionId frozenXID,
72 				  MultiXactId minMulti,
73 				  TransactionId lastSaneFrozenXid,
74 				  MultiXactId lastSaneMinMulti);
75 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
76 		   VacuumParams *params);
77 
78 /*
79  * Primary entry point for manual VACUUM and ANALYZE commands
80  *
81  * This is mainly a preparation wrapper for the real operations that will
82  * happen in vacuum().
83  */
84 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)85 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
86 {
87 	VacuumParams params;
88 
89 	/* sanity checks on options */
90 	Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
91 	Assert((vacstmt->options & VACOPT_VACUUM) ||
92 		   !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
93 	Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
94 	Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
95 
96 	/*
97 	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
98 	 * them as -1 which means to use the default values.
99 	 */
100 	if (vacstmt->options & VACOPT_FREEZE)
101 	{
102 		params.freeze_min_age = 0;
103 		params.freeze_table_age = 0;
104 		params.multixact_freeze_min_age = 0;
105 		params.multixact_freeze_table_age = 0;
106 	}
107 	else
108 	{
109 		params.freeze_min_age = -1;
110 		params.freeze_table_age = -1;
111 		params.multixact_freeze_min_age = -1;
112 		params.multixact_freeze_table_age = -1;
113 	}
114 
115 	/* user-invoked vacuum is never "for wraparound" */
116 	params.is_wraparound = false;
117 
118 	/* user-invoked vacuum never uses this parameter */
119 	params.log_min_duration = -1;
120 
121 	/* Now go through the common routine */
122 	vacuum(vacstmt->options, vacstmt->relation, InvalidOid, &params,
123 		   vacstmt->va_cols, NULL, isTopLevel);
124 }
125 
126 /*
127  * Primary entry point for VACUUM and ANALYZE commands.
128  *
129  * options is a bitmask of VacuumOption flags, indicating what to do.
130  *
131  * relid, if not InvalidOid, indicates the relation to process; otherwise,
132  * if a RangeVar is supplied, that's what to process; otherwise, we process
133  * all relevant tables in the database.  (If both relid and a RangeVar are
134  * supplied, the relid is what is processed, but we use the RangeVar's name
135  * to report any open/lock failure.)
136  *
137  * params contains a set of parameters that can be used to customize the
138  * behavior.
139  *
140  * va_cols is a list of columns to analyze, or NIL to process them all.
141  *
142  * bstrategy is normally given as NULL, but in autovacuum it can be passed
143  * in to use the same buffer strategy object across multiple vacuum() calls.
144  *
145  * isTopLevel should be passed down from ProcessUtility.
146  *
147  * It is the caller's responsibility that all parameters are allocated in a
148  * memory context that will not disappear at transaction commit.
149  */
150 void
vacuum(int options,RangeVar * relation,Oid relid,VacuumParams * params,List * va_cols,BufferAccessStrategy bstrategy,bool isTopLevel)151 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
152 	   List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
153 {
154 	const char *stmttype;
155 	volatile bool in_outer_xact,
156 				use_own_xacts;
157 	List	   *relations;
158 	static bool in_vacuum = false;
159 
160 	Assert(params != NULL);
161 
162 	stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
163 
164 	/*
165 	 * We cannot run VACUUM inside a user transaction block; if we were inside
166 	 * a transaction, then our commit- and start-transaction-command calls
167 	 * would not have the intended effect!	There are numerous other subtle
168 	 * dependencies on this, too.
169 	 *
170 	 * ANALYZE (without VACUUM) can run either way.
171 	 */
172 	if (options & VACOPT_VACUUM)
173 	{
174 		PreventTransactionChain(isTopLevel, stmttype);
175 		in_outer_xact = false;
176 	}
177 	else
178 		in_outer_xact = IsInTransactionChain(isTopLevel);
179 
180 	/*
181 	 * Due to static variables vac_context, anl_context and vac_strategy,
182 	 * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
183 	 * calls a hostile index expression that itself calls ANALYZE.
184 	 */
185 	if (in_vacuum)
186 		ereport(ERROR,
187 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
188 				 errmsg("%s cannot be executed from VACUUM or ANALYZE",
189 						stmttype)));
190 
191 	/*
192 	 * Sanity check DISABLE_PAGE_SKIPPING option.
193 	 */
194 	if ((options & VACOPT_FULL) != 0 &&
195 		(options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
196 		ereport(ERROR,
197 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
198 				 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
199 
200 	/*
201 	 * Send info about dead objects to the statistics collector, unless we are
202 	 * in autovacuum --- autovacuum.c does this for itself.
203 	 */
204 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
205 		pgstat_vacuum_stat();
206 
207 	/*
208 	 * Create special memory context for cross-transaction storage.
209 	 *
210 	 * Since it is a child of PortalContext, it will go away eventually even
211 	 * if we suffer an error; there's no need for special abort cleanup logic.
212 	 */
213 	vac_context = AllocSetContextCreate(PortalContext,
214 										"Vacuum",
215 										ALLOCSET_DEFAULT_SIZES);
216 
217 	/*
218 	 * If caller didn't give us a buffer strategy object, make one in the
219 	 * cross-transaction memory context.
220 	 */
221 	if (bstrategy == NULL)
222 	{
223 		MemoryContext old_context = MemoryContextSwitchTo(vac_context);
224 
225 		bstrategy = GetAccessStrategy(BAS_VACUUM);
226 		MemoryContextSwitchTo(old_context);
227 	}
228 	vac_strategy = bstrategy;
229 
230 	/*
231 	 * Build list of relation OID(s) to process, putting it in vac_context for
232 	 * safekeeping.
233 	 */
234 	relations = get_rel_oids(relid, relation);
235 
236 	/*
237 	 * Decide whether we need to start/commit our own transactions.
238 	 *
239 	 * For VACUUM (with or without ANALYZE): always do so, so that we can
240 	 * release locks as soon as possible.  (We could possibly use the outer
241 	 * transaction for a one-table VACUUM, but handling TOAST tables would be
242 	 * problematic.)
243 	 *
244 	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
245 	 * start/commit our own transactions.  Also, there's no need to do so if
246 	 * only processing one relation.  For multiple relations when not within a
247 	 * transaction block, and also in an autovacuum worker, use own
248 	 * transactions so we can release locks sooner.
249 	 */
250 	if (options & VACOPT_VACUUM)
251 		use_own_xacts = true;
252 	else
253 	{
254 		Assert(options & VACOPT_ANALYZE);
255 		if (IsAutoVacuumWorkerProcess())
256 			use_own_xacts = true;
257 		else if (in_outer_xact)
258 			use_own_xacts = false;
259 		else if (list_length(relations) > 1)
260 			use_own_xacts = true;
261 		else
262 			use_own_xacts = false;
263 	}
264 
265 	/*
266 	 * vacuum_rel expects to be entered with no transaction active; it will
267 	 * start and commit its own transaction.  But we are called by an SQL
268 	 * command, and so we are executing inside a transaction already. We
269 	 * commit the transaction started in PostgresMain() here, and start
270 	 * another one before exiting to match the commit waiting for us back in
271 	 * PostgresMain().
272 	 */
273 	if (use_own_xacts)
274 	{
275 		Assert(!in_outer_xact);
276 
277 		/* ActiveSnapshot is not set by autovacuum */
278 		if (ActiveSnapshotSet())
279 			PopActiveSnapshot();
280 
281 		/* matches the StartTransaction in PostgresMain() */
282 		CommitTransactionCommand();
283 	}
284 
285 	/* Turn vacuum cost accounting on or off */
286 	PG_TRY();
287 	{
288 		ListCell   *cur;
289 
290 		in_vacuum = true;
291 		VacuumCostActive = (VacuumCostDelay > 0);
292 		VacuumCostBalance = 0;
293 		VacuumPageHit = 0;
294 		VacuumPageMiss = 0;
295 		VacuumPageDirty = 0;
296 
297 		/*
298 		 * Loop to process each selected relation.
299 		 */
300 		foreach(cur, relations)
301 		{
302 			Oid			relid = lfirst_oid(cur);
303 
304 			if (options & VACOPT_VACUUM)
305 			{
306 				if (!vacuum_rel(relid, relation, options, params))
307 					continue;
308 			}
309 
310 			if (options & VACOPT_ANALYZE)
311 			{
312 				/*
313 				 * If using separate xacts, start one for analyze. Otherwise,
314 				 * we can use the outer transaction.
315 				 */
316 				if (use_own_xacts)
317 				{
318 					StartTransactionCommand();
319 					/* functions in indexes may want a snapshot set */
320 					PushActiveSnapshot(GetTransactionSnapshot());
321 				}
322 
323 				analyze_rel(relid, relation, options, params,
324 							va_cols, in_outer_xact, vac_strategy);
325 
326 				if (use_own_xacts)
327 				{
328 					PopActiveSnapshot();
329 					CommitTransactionCommand();
330 				}
331 			}
332 		}
333 	}
334 	PG_CATCH();
335 	{
336 		in_vacuum = false;
337 		VacuumCostActive = false;
338 		PG_RE_THROW();
339 	}
340 	PG_END_TRY();
341 
342 	in_vacuum = false;
343 	VacuumCostActive = false;
344 
345 	/*
346 	 * Finish up processing.
347 	 */
348 	if (use_own_xacts)
349 	{
350 		/* here, we are not in a transaction */
351 
352 		/*
353 		 * This matches the CommitTransaction waiting for us in
354 		 * PostgresMain().
355 		 */
356 		StartTransactionCommand();
357 	}
358 
359 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
360 	{
361 		/*
362 		 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
363 		 * (autovacuum.c does this for itself.)
364 		 */
365 		vac_update_datfrozenxid();
366 	}
367 
368 	/*
369 	 * Clean up working storage --- note we must do this after
370 	 * StartTransactionCommand, else we might be trying to delete the active
371 	 * context!
372 	 */
373 	MemoryContextDelete(vac_context);
374 	vac_context = NULL;
375 }
376 
377 /*
378  * Build a list of Oids for each relation to be processed
379  *
380  * The list is built in vac_context so that it will survive across our
381  * per-relation transactions.
382  */
383 static List *
get_rel_oids(Oid relid,const RangeVar * vacrel)384 get_rel_oids(Oid relid, const RangeVar *vacrel)
385 {
386 	List	   *oid_list = NIL;
387 	MemoryContext oldcontext;
388 
389 	/* OID supplied by VACUUM's caller? */
390 	if (OidIsValid(relid))
391 	{
392 		oldcontext = MemoryContextSwitchTo(vac_context);
393 		oid_list = lappend_oid(oid_list, relid);
394 		MemoryContextSwitchTo(oldcontext);
395 	}
396 	else if (vacrel)
397 	{
398 		/* Process a specific relation, and possibly partitions thereof */
399 		Oid			relid;
400 		HeapTuple	tuple;
401 		Form_pg_class classForm;
402 		bool		include_parts;
403 
404 		/*
405 		 * We transiently take AccessShareLock to protect the syscache lookup
406 		 * below, as well as find_all_inheritors's expectation that the caller
407 		 * holds some lock on the starting relation.
408 		 */
409 		relid = RangeVarGetRelid(vacrel, AccessShareLock, false);
410 
411 		/*
412 		 * To check whether the relation is a partitioned table, fetch its
413 		 * syscache entry.
414 		 */
415 		tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
416 		if (!HeapTupleIsValid(tuple))
417 			elog(ERROR, "cache lookup failed for relation %u", relid);
418 		classForm = (Form_pg_class) GETSTRUCT(tuple);
419 		include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
420 		ReleaseSysCache(tuple);
421 
422 		/*
423 		 * Make relation list entries for this rel and its partitions, if any.
424 		 * Note that the list returned by find_all_inheritors() includes the
425 		 * passed-in OID at its head.  There's no point in taking locks on the
426 		 * individual partitions yet, and doing so would just add unnecessary
427 		 * deadlock risk.
428 		 */
429 		oldcontext = MemoryContextSwitchTo(vac_context);
430 		if (include_parts)
431 			oid_list = list_concat(oid_list,
432 								   find_all_inheritors(relid, NoLock, NULL));
433 		else
434 			oid_list = lappend_oid(oid_list, relid);
435 		MemoryContextSwitchTo(oldcontext);
436 
437 		/*
438 		 * Release lock again.  This means that by the time we actually try to
439 		 * process the table, it might be gone or renamed.  In the former case
440 		 * we'll silently ignore it; in the latter case we'll process it
441 		 * anyway, but we must beware that the RangeVar doesn't necessarily
442 		 * identify it anymore.  This isn't ideal, perhaps, but there's little
443 		 * practical alternative, since we're typically going to commit this
444 		 * transaction and begin a new one between now and then.  Moreover,
445 		 * holding locks on multiple relations would create significant risk
446 		 * of deadlock.
447 		 */
448 		UnlockRelationOid(relid, AccessShareLock);
449 	}
450 	else
451 	{
452 		/*
453 		 * Process all plain relations and materialized views listed in
454 		 * pg_class
455 		 */
456 		Relation	pgclass;
457 		HeapScanDesc scan;
458 		HeapTuple	tuple;
459 
460 		pgclass = heap_open(RelationRelationId, AccessShareLock);
461 
462 		scan = heap_beginscan_catalog(pgclass, 0, NULL);
463 
464 		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
465 		{
466 			Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
467 
468 			/*
469 			 * We include partitioned tables here; depending on which
470 			 * operation is to be performed, caller will decide whether to
471 			 * process or ignore them.
472 			 */
473 			if (classForm->relkind != RELKIND_RELATION &&
474 				classForm->relkind != RELKIND_MATVIEW &&
475 				classForm->relkind != RELKIND_PARTITIONED_TABLE)
476 				continue;
477 
478 			/* Make a relation list entry for this rel */
479 			oldcontext = MemoryContextSwitchTo(vac_context);
480 			oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
481 			MemoryContextSwitchTo(oldcontext);
482 		}
483 
484 		heap_endscan(scan);
485 		heap_close(pgclass, AccessShareLock);
486 	}
487 
488 	return oid_list;
489 }
490 
491 /*
492  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
493  *
494  * The output parameters are:
495  * - oldestXmin is the cutoff value used to distinguish whether tuples are
496  *	 DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
497  * - freezeLimit is the Xid below which all Xids are replaced by
498  *	 FrozenTransactionId during vacuum.
499  * - xidFullScanLimit (computed from table_freeze_age parameter)
500  *	 represents a minimum Xid value; a table whose relfrozenxid is older than
501  *	 this will have a full-table vacuum applied to it, to freeze tuples across
502  *	 the whole table.  Vacuuming a table younger than this value can use a
503  *	 partial scan.
504  * - multiXactCutoff is the value below which all MultiXactIds are removed from
505  *	 Xmax.
506  * - mxactFullScanLimit is a value against which a table's relminmxid value is
507  *	 compared to produce a full-table vacuum, as with xidFullScanLimit.
508  *
509  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
510  * not interested.
511  */
512 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)513 vacuum_set_xid_limits(Relation rel,
514 					  int freeze_min_age,
515 					  int freeze_table_age,
516 					  int multixact_freeze_min_age,
517 					  int multixact_freeze_table_age,
518 					  TransactionId *oldestXmin,
519 					  TransactionId *freezeLimit,
520 					  TransactionId *xidFullScanLimit,
521 					  MultiXactId *multiXactCutoff,
522 					  MultiXactId *mxactFullScanLimit)
523 {
524 	int			freezemin;
525 	int			mxid_freezemin;
526 	int			effective_multixact_freeze_max_age;
527 	TransactionId limit;
528 	TransactionId safeLimit;
529 	MultiXactId oldestMxact;
530 	MultiXactId mxactLimit;
531 	MultiXactId safeMxactLimit;
532 
533 	/*
534 	 * We can always ignore processes running lazy vacuum.  This is because we
535 	 * use these values only for deciding which tuples we must keep in the
536 	 * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
537 	 * ignore it.  In theory it could be problematic to ignore lazy vacuums in
538 	 * a full vacuum, but keep in mind that only one vacuum process can be
539 	 * working on a particular table at any time, and that each vacuum is
540 	 * always an independent transaction.
541 	 */
542 	*oldestXmin =
543 		TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
544 
545 	Assert(TransactionIdIsNormal(*oldestXmin));
546 
547 	/*
548 	 * Determine the minimum freeze age to use: as specified by the caller, or
549 	 * vacuum_freeze_min_age, but in any case not more than half
550 	 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
551 	 * wraparound won't occur too frequently.
552 	 */
553 	freezemin = freeze_min_age;
554 	if (freezemin < 0)
555 		freezemin = vacuum_freeze_min_age;
556 	freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
557 	Assert(freezemin >= 0);
558 
559 	/*
560 	 * Compute the cutoff XID, being careful not to generate a "permanent" XID
561 	 */
562 	limit = *oldestXmin - freezemin;
563 	if (!TransactionIdIsNormal(limit))
564 		limit = FirstNormalTransactionId;
565 
566 	/*
567 	 * If oldestXmin is very far back (in practice, more than
568 	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
569 	 * freeze age of zero.
570 	 */
571 	safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
572 	if (!TransactionIdIsNormal(safeLimit))
573 		safeLimit = FirstNormalTransactionId;
574 
575 	if (TransactionIdPrecedes(limit, safeLimit))
576 	{
577 		ereport(WARNING,
578 				(errmsg("oldest xmin is far in the past"),
579 				 errhint("Close open transactions soon to avoid wraparound problems.")));
580 		limit = *oldestXmin;
581 	}
582 
583 	*freezeLimit = limit;
584 
585 	/*
586 	 * Compute the multixact age for which freezing is urgent.  This is
587 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
588 	 * short of multixact member space.
589 	 */
590 	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
591 
592 	/*
593 	 * Determine the minimum multixact freeze age to use: as specified by
594 	 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
595 	 * than half effective_multixact_freeze_max_age, so that autovacuums to
596 	 * prevent MultiXact wraparound won't occur too frequently.
597 	 */
598 	mxid_freezemin = multixact_freeze_min_age;
599 	if (mxid_freezemin < 0)
600 		mxid_freezemin = vacuum_multixact_freeze_min_age;
601 	mxid_freezemin = Min(mxid_freezemin,
602 						 effective_multixact_freeze_max_age / 2);
603 	Assert(mxid_freezemin >= 0);
604 
605 	/* compute the cutoff multi, being careful to generate a valid value */
606 	oldestMxact = GetOldestMultiXactId();
607 	mxactLimit = oldestMxact - mxid_freezemin;
608 	if (mxactLimit < FirstMultiXactId)
609 		mxactLimit = FirstMultiXactId;
610 
611 	safeMxactLimit =
612 		ReadNextMultiXactId() - effective_multixact_freeze_max_age;
613 	if (safeMxactLimit < FirstMultiXactId)
614 		safeMxactLimit = FirstMultiXactId;
615 
616 	if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
617 	{
618 		ereport(WARNING,
619 				(errmsg("oldest multixact is far in the past"),
620 				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
621 		/* Use the safe limit, unless an older mxact is still running */
622 		if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
623 			mxactLimit = oldestMxact;
624 		else
625 			mxactLimit = safeMxactLimit;
626 	}
627 
628 	*multiXactCutoff = mxactLimit;
629 
630 	if (xidFullScanLimit != NULL)
631 	{
632 		int			freezetable;
633 
634 		Assert(mxactFullScanLimit != NULL);
635 
636 		/*
637 		 * Determine the table freeze age to use: as specified by the caller,
638 		 * or vacuum_freeze_table_age, but in any case not more than
639 		 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
640 		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
641 		 * before anti-wraparound autovacuum is launched.
642 		 */
643 		freezetable = freeze_table_age;
644 		if (freezetable < 0)
645 			freezetable = vacuum_freeze_table_age;
646 		freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
647 		Assert(freezetable >= 0);
648 
649 		/*
650 		 * Compute XID limit causing a full-table vacuum, being careful not to
651 		 * generate a "permanent" XID.
652 		 */
653 		limit = ReadNewTransactionId() - freezetable;
654 		if (!TransactionIdIsNormal(limit))
655 			limit = FirstNormalTransactionId;
656 
657 		*xidFullScanLimit = limit;
658 
659 		/*
660 		 * Similar to the above, determine the table freeze age to use for
661 		 * multixacts: as specified by the caller, or
662 		 * vacuum_multixact_freeze_table_age, but in any case not more than
663 		 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
664 		 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
665 		 * freeze multixacts before anti-wraparound autovacuum is launched.
666 		 */
667 		freezetable = multixact_freeze_table_age;
668 		if (freezetable < 0)
669 			freezetable = vacuum_multixact_freeze_table_age;
670 		freezetable = Min(freezetable,
671 						  effective_multixact_freeze_max_age * 0.95);
672 		Assert(freezetable >= 0);
673 
674 		/*
675 		 * Compute MultiXact limit causing a full-table vacuum, being careful
676 		 * to generate a valid MultiXact value.
677 		 */
678 		mxactLimit = ReadNextMultiXactId() - freezetable;
679 		if (mxactLimit < FirstMultiXactId)
680 			mxactLimit = FirstMultiXactId;
681 
682 		*mxactFullScanLimit = mxactLimit;
683 	}
684 	else
685 	{
686 		Assert(mxactFullScanLimit == NULL);
687 	}
688 }
689 
690 /*
691  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
692  *
693  *		If we scanned the whole relation then we should just use the count of
694  *		live tuples seen; but if we did not, we should not blindly extrapolate
695  *		from that number, since VACUUM may have scanned a quite nonrandom
696  *		subset of the table.  When we have only partial information, we take
697  *		the old value of pg_class.reltuples as a measurement of the
698  *		tuple density in the unscanned pages.
699  *
700  *		The is_analyze argument is historical.
701  */
702 double
vac_estimate_reltuples(Relation relation,bool is_analyze,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)703 vac_estimate_reltuples(Relation relation, bool is_analyze,
704 					   BlockNumber total_pages,
705 					   BlockNumber scanned_pages,
706 					   double scanned_tuples)
707 {
708 	BlockNumber old_rel_pages = relation->rd_rel->relpages;
709 	double		old_rel_tuples = relation->rd_rel->reltuples;
710 	double		old_density;
711 	double		unscanned_pages;
712 	double		total_tuples;
713 
714 	/* If we did scan the whole table, just use the count as-is */
715 	if (scanned_pages >= total_pages)
716 		return scanned_tuples;
717 
718 	/*
719 	 * If scanned_pages is zero but total_pages isn't, keep the existing value
720 	 * of reltuples.  (Note: callers should avoid updating the pg_class
721 	 * statistics in this situation, since no new information has been
722 	 * provided.)
723 	 */
724 	if (scanned_pages == 0)
725 		return old_rel_tuples;
726 
727 	/*
728 	 * If old value of relpages is zero, old density is indeterminate; we
729 	 * can't do much except scale up scanned_tuples to match total_pages.
730 	 */
731 	if (old_rel_pages == 0)
732 		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
733 
734 	/*
735 	 * Okay, we've covered the corner cases.  The normal calculation is to
736 	 * convert the old measurement to a density (tuples per page), then
737 	 * estimate the number of tuples in the unscanned pages using that figure,
738 	 * and finally add on the number of tuples in the scanned pages.
739 	 */
740 	old_density = old_rel_tuples / old_rel_pages;
741 	unscanned_pages = (double) total_pages - (double) scanned_pages;
742 	total_tuples = old_density * unscanned_pages + scanned_tuples;
743 	return floor(total_tuples + 0.5);
744 }
745 
746 
747 /*
748  *	vac_update_relstats() -- update statistics for one relation
749  *
750  *		Update the whole-relation statistics that are kept in its pg_class
751  *		row.  There are additional stats that will be updated if we are
752  *		doing ANALYZE, but we always update these stats.  This routine works
753  *		for both index and heap relation entries in pg_class.
754  *
755  *		We violate transaction semantics here by overwriting the rel's
756  *		existing pg_class tuple with the new values.  This is reasonably
757  *		safe as long as we're sure that the new values are correct whether or
758  *		not this transaction commits.  The reason for doing this is that if
759  *		we updated these tuples in the usual way, vacuuming pg_class itself
760  *		wouldn't work very well --- by the time we got done with a vacuum
761  *		cycle, most of the tuples in pg_class would've been obsoleted.  Of
762  *		course, this only works for fixed-size not-null columns, but these are.
763  *
764  *		Another reason for doing it this way is that when we are in a lazy
765  *		VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
766  *		Somebody vacuuming pg_class might think they could delete a tuple
767  *		marked with xmin = our xid.
768  *
769  *		In addition to fundamentally nontransactional statistics such as
770  *		relpages and relallvisible, we try to maintain certain lazily-updated
771  *		DDL flags such as relhasindex, by clearing them if no longer correct.
772  *		It's safe to do this in VACUUM, which can't run in parallel with
773  *		CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
774  *		However, it's *not* safe to do it in an ANALYZE that's within an
775  *		outer transaction, because for example the current transaction might
776  *		have dropped the last index; then we'd think relhasindex should be
777  *		cleared, but if the transaction later rolls back this would be wrong.
778  *		So we refrain from updating the DDL flags if we're inside an outer
779  *		transaction.  This is OK since postponing the flag maintenance is
780  *		always allowable.
781  *
782  *		This routine is shared by VACUUM and ANALYZE.
783  */
784 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)785 vac_update_relstats(Relation relation,
786 					BlockNumber num_pages, double num_tuples,
787 					BlockNumber num_all_visible_pages,
788 					bool hasindex, TransactionId frozenxid,
789 					MultiXactId minmulti,
790 					bool in_outer_xact)
791 {
792 	Oid			relid = RelationGetRelid(relation);
793 	Relation	rd;
794 	HeapTuple	ctup;
795 	Form_pg_class pgcform;
796 	bool		dirty;
797 
798 	rd = heap_open(RelationRelationId, RowExclusiveLock);
799 
800 	/* Fetch a copy of the tuple to scribble on */
801 	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
802 	if (!HeapTupleIsValid(ctup))
803 		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
804 			 relid);
805 	pgcform = (Form_pg_class) GETSTRUCT(ctup);
806 
807 	/* Apply statistical updates, if any, to copied tuple */
808 
809 	dirty = false;
810 	if (pgcform->relpages != (int32) num_pages)
811 	{
812 		pgcform->relpages = (int32) num_pages;
813 		dirty = true;
814 	}
815 	if (pgcform->reltuples != (float4) num_tuples)
816 	{
817 		pgcform->reltuples = (float4) num_tuples;
818 		dirty = true;
819 	}
820 	if (pgcform->relallvisible != (int32) num_all_visible_pages)
821 	{
822 		pgcform->relallvisible = (int32) num_all_visible_pages;
823 		dirty = true;
824 	}
825 
826 	/* Apply DDL updates, but not inside an outer transaction (see above) */
827 
828 	if (!in_outer_xact)
829 	{
830 		/*
831 		 * If we didn't find any indexes, reset relhasindex.
832 		 */
833 		if (pgcform->relhasindex && !hasindex)
834 		{
835 			pgcform->relhasindex = false;
836 			dirty = true;
837 		}
838 
839 		/*
840 		 * If we have discovered that there are no indexes, then there's no
841 		 * primary key either.  This could be done more thoroughly...
842 		 */
843 		if (pgcform->relhaspkey && !hasindex)
844 		{
845 			pgcform->relhaspkey = false;
846 			dirty = true;
847 		}
848 
849 		/* We also clear relhasrules and relhastriggers if needed */
850 		if (pgcform->relhasrules && relation->rd_rules == NULL)
851 		{
852 			pgcform->relhasrules = false;
853 			dirty = true;
854 		}
855 		if (pgcform->relhastriggers && relation->trigdesc == NULL)
856 		{
857 			pgcform->relhastriggers = false;
858 			dirty = true;
859 		}
860 	}
861 
862 	/*
863 	 * Update relfrozenxid, unless caller passed InvalidTransactionId
864 	 * indicating it has no new data.
865 	 *
866 	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
867 	 * working correctly, the only way the new frozenxid could be older would
868 	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
869 	 * which case we don't want to forget the work it already did.  However,
870 	 * if the stored relfrozenxid is "in the future", then it must be corrupt
871 	 * and it seems best to overwrite it with the cutoff we used this time.
872 	 * This should match vac_update_datfrozenxid() concerning what we consider
873 	 * to be "in the future".
874 	 */
875 	if (TransactionIdIsNormal(frozenxid) &&
876 		pgcform->relfrozenxid != frozenxid &&
877 		(TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
878 		 TransactionIdPrecedes(ReadNewTransactionId(),
879 							   pgcform->relfrozenxid)))
880 	{
881 		pgcform->relfrozenxid = frozenxid;
882 		dirty = true;
883 	}
884 
885 	/* Similarly for relminmxid */
886 	if (MultiXactIdIsValid(minmulti) &&
887 		pgcform->relminmxid != minmulti &&
888 		(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
889 		 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
890 	{
891 		pgcform->relminmxid = minmulti;
892 		dirty = true;
893 	}
894 
895 	/* If anything changed, write out the tuple. */
896 	if (dirty)
897 		heap_inplace_update(rd, ctup);
898 
899 	heap_close(rd, RowExclusiveLock);
900 }
901 
902 
903 /*
904  *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
905  *
906  *		Update pg_database's datfrozenxid entry for our database to be the
907  *		minimum of the pg_class.relfrozenxid values.
908  *
909  *		Similarly, update our datminmxid to be the minimum of the
910  *		pg_class.relminmxid values.
911  *
912  *		If we are able to advance either pg_database value, also try to
913  *		truncate pg_xact and pg_multixact.
914  *
915  *		We violate transaction semantics here by overwriting the database's
916  *		existing pg_database tuple with the new values.  This is reasonably
917  *		safe since the new values are correct whether or not this transaction
918  *		commits.  As with vac_update_relstats, this avoids leaving dead tuples
919  *		behind after a VACUUM.
920  */
921 void
vac_update_datfrozenxid(void)922 vac_update_datfrozenxid(void)
923 {
924 	HeapTuple	tuple;
925 	Form_pg_database dbform;
926 	Relation	relation;
927 	SysScanDesc scan;
928 	HeapTuple	classTup;
929 	TransactionId newFrozenXid;
930 	MultiXactId newMinMulti;
931 	TransactionId lastSaneFrozenXid;
932 	MultiXactId lastSaneMinMulti;
933 	bool		bogus = false;
934 	bool		dirty = false;
935 
936 	/*
937 	 * Restrict this task to one backend per database.  This avoids race
938 	 * conditions that would move datfrozenxid or datminmxid backward.  It
939 	 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
940 	 * datfrozenxid passed to an earlier vac_truncate_clog() call.
941 	 */
942 	LockDatabaseFrozenIds(ExclusiveLock);
943 
944 	/*
945 	 * Initialize the "min" calculation with GetOldestXmin, which is a
946 	 * reasonable approximation to the minimum relfrozenxid for not-yet-
947 	 * committed pg_class entries for new tables; see AddNewRelationTuple().
948 	 * So we cannot produce a wrong minimum by starting with this.
949 	 */
950 	newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
951 
952 	/*
953 	 * Similarly, initialize the MultiXact "min" with the value that would be
954 	 * used on pg_class for new tables.  See AddNewRelationTuple().
955 	 */
956 	newMinMulti = GetOldestMultiXactId();
957 
958 	/*
959 	 * Identify the latest relfrozenxid and relminmxid values that we could
960 	 * validly see during the scan.  These are conservative values, but it's
961 	 * not really worth trying to be more exact.
962 	 */
963 	lastSaneFrozenXid = ReadNewTransactionId();
964 	lastSaneMinMulti = ReadNextMultiXactId();
965 
966 	/*
967 	 * We must seqscan pg_class to find the minimum Xid, because there is no
968 	 * index that can help us here.
969 	 */
970 	relation = heap_open(RelationRelationId, AccessShareLock);
971 
972 	scan = systable_beginscan(relation, InvalidOid, false,
973 							  NULL, 0, NULL);
974 
975 	while ((classTup = systable_getnext(scan)) != NULL)
976 	{
977 		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
978 
979 		/*
980 		 * Only consider relations able to hold unfrozen XIDs (anything else
981 		 * should have InvalidTransactionId in relfrozenxid anyway.)
982 		 */
983 		if (classForm->relkind != RELKIND_RELATION &&
984 			classForm->relkind != RELKIND_MATVIEW &&
985 			classForm->relkind != RELKIND_TOASTVALUE)
986 			continue;
987 
988 		Assert(TransactionIdIsNormal(classForm->relfrozenxid));
989 		Assert(MultiXactIdIsValid(classForm->relminmxid));
990 
991 		/*
992 		 * If things are working properly, no relation should have a
993 		 * relfrozenxid or relminmxid that is "in the future".  However, such
994 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
995 		 * see any entries that are "in the future", chicken out and don't do
996 		 * anything.  This ensures we won't truncate clog before those
997 		 * relations have been scanned and cleaned up.
998 		 */
999 		if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1000 			MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1001 		{
1002 			bogus = true;
1003 			break;
1004 		}
1005 
1006 		if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1007 			newFrozenXid = classForm->relfrozenxid;
1008 
1009 		if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1010 			newMinMulti = classForm->relminmxid;
1011 	}
1012 
1013 	/* we're done with pg_class */
1014 	systable_endscan(scan);
1015 	heap_close(relation, AccessShareLock);
1016 
1017 	/* chicken out if bogus data found */
1018 	if (bogus)
1019 		return;
1020 
1021 	Assert(TransactionIdIsNormal(newFrozenXid));
1022 	Assert(MultiXactIdIsValid(newMinMulti));
1023 
1024 	/* Now fetch the pg_database tuple we need to update. */
1025 	relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1026 
1027 	/* Fetch a copy of the tuple to scribble on */
1028 	tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1029 	if (!HeapTupleIsValid(tuple))
1030 		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1031 	dbform = (Form_pg_database) GETSTRUCT(tuple);
1032 
1033 	/*
1034 	 * As in vac_update_relstats(), we ordinarily don't want to let
1035 	 * datfrozenxid go backward; but if it's "in the future" then it must be
1036 	 * corrupt and it seems best to overwrite it.
1037 	 */
1038 	if (dbform->datfrozenxid != newFrozenXid &&
1039 		(TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1040 		 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1041 	{
1042 		dbform->datfrozenxid = newFrozenXid;
1043 		dirty = true;
1044 	}
1045 	else
1046 		newFrozenXid = dbform->datfrozenxid;
1047 
1048 	/* Ditto for datminmxid */
1049 	if (dbform->datminmxid != newMinMulti &&
1050 		(MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1051 		 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1052 	{
1053 		dbform->datminmxid = newMinMulti;
1054 		dirty = true;
1055 	}
1056 	else
1057 		newMinMulti = dbform->datminmxid;
1058 
1059 	if (dirty)
1060 		heap_inplace_update(relation, tuple);
1061 
1062 	heap_freetuple(tuple);
1063 	heap_close(relation, RowExclusiveLock);
1064 
1065 	/*
1066 	 * If we were able to advance datfrozenxid or datminmxid, see if we can
1067 	 * truncate pg_xact and/or pg_multixact.  Also do it if the shared
1068 	 * XID-wrap-limit info is stale, since this action will update that too.
1069 	 */
1070 	if (dirty || ForceTransactionIdLimitUpdate())
1071 		vac_truncate_clog(newFrozenXid, newMinMulti,
1072 						  lastSaneFrozenXid, lastSaneMinMulti);
1073 }
1074 
1075 
1076 /*
1077  *	vac_truncate_clog() -- attempt to truncate the commit log
1078  *
1079  *		Scan pg_database to determine the system-wide oldest datfrozenxid,
1080  *		and use it to truncate the transaction commit log (pg_xact).
1081  *		Also update the XID wrap limit info maintained by varsup.c.
1082  *		Likewise for datminmxid.
1083  *
1084  *		The passed frozenXID and minMulti are the updated values for my own
1085  *		pg_database entry. They're used to initialize the "min" calculations.
1086  *		The caller also passes the "last sane" XID and MXID, since it has
1087  *		those at hand already.
1088  *
1089  *		This routine is only invoked when we've managed to change our
1090  *		DB's datfrozenxid/datminmxid values, or we found that the shared
1091  *		XID-wrap-limit info is stale.
1092  */
1093 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1094 vac_truncate_clog(TransactionId frozenXID,
1095 				  MultiXactId minMulti,
1096 				  TransactionId lastSaneFrozenXid,
1097 				  MultiXactId lastSaneMinMulti)
1098 {
1099 	TransactionId nextXID = ReadNewTransactionId();
1100 	Relation	relation;
1101 	HeapScanDesc scan;
1102 	HeapTuple	tuple;
1103 	Oid			oldestxid_datoid;
1104 	Oid			minmulti_datoid;
1105 	bool		bogus = false;
1106 	bool		frozenAlreadyWrapped = false;
1107 
1108 	/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1109 	LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1110 
1111 	/* init oldest datoids to sync with my frozenXID/minMulti values */
1112 	oldestxid_datoid = MyDatabaseId;
1113 	minmulti_datoid = MyDatabaseId;
1114 
1115 	/*
1116 	 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1117 	 *
1118 	 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1119 	 * the values could change while we look at them.  Fetch each one just
1120 	 * once to ensure sane behavior of the comparison logic.  (Here, as in
1121 	 * many other places, we assume that fetching or updating an XID in shared
1122 	 * storage is atomic.)
1123 	 *
1124 	 * Note: we need not worry about a race condition with new entries being
1125 	 * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1126 	 * existing DB's datfrozenxid, and that source DB cannot be ours because
1127 	 * of the interlock against copying a DB containing an active backend.
1128 	 * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1129 	 * concurrently modify the datfrozenxid's of different databases, the
1130 	 * worst possible outcome is that pg_xact is not truncated as aggressively
1131 	 * as it could be.
1132 	 */
1133 	relation = heap_open(DatabaseRelationId, AccessShareLock);
1134 
1135 	scan = heap_beginscan_catalog(relation, 0, NULL);
1136 
1137 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1138 	{
1139 		volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1140 		TransactionId datfrozenxid = dbform->datfrozenxid;
1141 		TransactionId datminmxid = dbform->datminmxid;
1142 
1143 		Assert(TransactionIdIsNormal(datfrozenxid));
1144 		Assert(MultiXactIdIsValid(datminmxid));
1145 
1146 		/*
1147 		 * If things are working properly, no database should have a
1148 		 * datfrozenxid or datminmxid that is "in the future".  However, such
1149 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
1150 		 * see any entries that are "in the future", chicken out and don't do
1151 		 * anything.  This ensures we won't truncate clog before those
1152 		 * databases have been scanned and cleaned up.  (We will issue the
1153 		 * "already wrapped" warning if appropriate, though.)
1154 		 */
1155 		if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1156 			MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1157 			bogus = true;
1158 
1159 		if (TransactionIdPrecedes(nextXID, datfrozenxid))
1160 			frozenAlreadyWrapped = true;
1161 		else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1162 		{
1163 			frozenXID = datfrozenxid;
1164 			oldestxid_datoid = HeapTupleGetOid(tuple);
1165 		}
1166 
1167 		if (MultiXactIdPrecedes(datminmxid, minMulti))
1168 		{
1169 			minMulti = datminmxid;
1170 			minmulti_datoid = HeapTupleGetOid(tuple);
1171 		}
1172 	}
1173 
1174 	heap_endscan(scan);
1175 
1176 	heap_close(relation, AccessShareLock);
1177 
1178 	/*
1179 	 * Do not truncate CLOG if we seem to have suffered wraparound already;
1180 	 * the computed minimum XID might be bogus.  This case should now be
1181 	 * impossible due to the defenses in GetNewTransactionId, but we keep the
1182 	 * test anyway.
1183 	 */
1184 	if (frozenAlreadyWrapped)
1185 	{
1186 		ereport(WARNING,
1187 				(errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1188 				 errdetail("You might have already suffered transaction-wraparound data loss.")));
1189 		return;
1190 	}
1191 
1192 	/* chicken out if data is bogus in any other way */
1193 	if (bogus)
1194 		return;
1195 
1196 	/*
1197 	 * Advance the oldest value for commit timestamps before truncating, so
1198 	 * that if a user requests a timestamp for a transaction we're truncating
1199 	 * away right after this point, they get NULL instead of an ugly "file not
1200 	 * found" error from slru.c.  This doesn't matter for xact/multixact
1201 	 * because they are not subject to arbitrary lookups from users.
1202 	 */
1203 	AdvanceOldestCommitTsXid(frozenXID);
1204 
1205 	/*
1206 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1207 	 */
1208 	TruncateCLOG(frozenXID, oldestxid_datoid);
1209 	TruncateCommitTs(frozenXID);
1210 	TruncateMultiXact(minMulti, minmulti_datoid);
1211 
1212 	/*
1213 	 * Update the wrap limit for GetNewTransactionId and creation of new
1214 	 * MultiXactIds.  Note: these functions will also signal the postmaster
1215 	 * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1216 	 * signalling twice?
1217 	 */
1218 	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1219 	SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1220 
1221 	LWLockRelease(WrapLimitsVacuumLock);
1222 }
1223 
1224 
1225 /*
1226  *	vacuum_rel() -- vacuum one heap relation
1227  *
1228  *		relid identifies the relation to vacuum.  If relation is supplied,
1229  *		use the name therein for reporting any failure to open/lock the rel;
1230  *		do not use it once we've successfully opened the rel, since it might
1231  *		be stale.
1232  *
1233  *		Doing one heap at a time incurs extra overhead, since we need to
1234  *		check that the heap exists again just before we vacuum it.  The
1235  *		reason that we do this is so that vacuuming can be spread across
1236  *		many small transactions.  Otherwise, two-phase locking would require
1237  *		us to lock the entire database during one pass of the vacuum cleaner.
1238  *
1239  *		At entry and exit, we are not inside a transaction.
1240  */
1241 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1242 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1243 {
1244 	LOCKMODE	lmode;
1245 	Relation	onerel;
1246 	LockRelId	onerelid;
1247 	Oid			toast_relid;
1248 	Oid			save_userid;
1249 	int			save_sec_context;
1250 	int			save_nestlevel;
1251 
1252 	Assert(params != NULL);
1253 
1254 	/* Begin a transaction for vacuuming this relation */
1255 	StartTransactionCommand();
1256 
1257 	/*
1258 	 * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1259 	 * ensures that RecentGlobalXmin is kept truly recent.
1260 	 */
1261 	PushActiveSnapshot(GetTransactionSnapshot());
1262 
1263 	if (!(options & VACOPT_FULL))
1264 	{
1265 		/*
1266 		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1267 		 * other concurrent VACUUMs know that they can ignore this one while
1268 		 * determining their OldestXmin.  (The reason we don't set it during a
1269 		 * full VACUUM is exactly that we may have to run user-defined
1270 		 * functions for functional indexes, and we want to make sure that if
1271 		 * they use the snapshot set above, any tuples it requires can't get
1272 		 * removed from other tables.  An index function that depends on the
1273 		 * contents of other tables is arguably broken, but we won't break it
1274 		 * here by violating transaction semantics.)
1275 		 *
1276 		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1277 		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1278 		 * in an emergency.
1279 		 *
1280 		 * Note: these flags remain set until CommitTransaction or
1281 		 * AbortTransaction.  We don't want to clear them until we reset
1282 		 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1283 		 * which is probably Not Good.
1284 		 */
1285 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1286 		MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1287 		if (params->is_wraparound)
1288 			MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1289 		LWLockRelease(ProcArrayLock);
1290 	}
1291 
1292 	/*
1293 	 * Check for user-requested abort.  Note we want this to be inside a
1294 	 * transaction, so xact.c doesn't issue useless WARNING.
1295 	 */
1296 	CHECK_FOR_INTERRUPTS();
1297 
1298 	/*
1299 	 * Determine the type of lock we want --- hard exclusive lock for a FULL
1300 	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1301 	 * way, we can be sure that no other backend is vacuuming the same table.
1302 	 */
1303 	lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1304 
1305 	/*
1306 	 * Open the relation and get the appropriate lock on it.
1307 	 *
1308 	 * There's a race condition here: the rel may have gone away since the
1309 	 * last time we saw it.  If so, we don't need to vacuum it.
1310 	 *
1311 	 * If we've been asked not to wait for the relation lock, acquire it first
1312 	 * in non-blocking mode, before calling try_relation_open().
1313 	 */
1314 	if (!(options & VACOPT_NOWAIT))
1315 		onerel = try_relation_open(relid, lmode);
1316 	else if (ConditionalLockRelationOid(relid, lmode))
1317 		onerel = try_relation_open(relid, NoLock);
1318 	else
1319 	{
1320 		onerel = NULL;
1321 		if (relation &&
1322 			IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1323 			ereport(LOG,
1324 					(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1325 					 errmsg("skipping vacuum of \"%s\" --- lock not available",
1326 							relation->relname)));
1327 	}
1328 
1329 	if (!onerel)
1330 	{
1331 		PopActiveSnapshot();
1332 		CommitTransactionCommand();
1333 		return false;
1334 	}
1335 
1336 	/*
1337 	 * Check permissions.
1338 	 *
1339 	 * We allow the user to vacuum a table if he is superuser, the table
1340 	 * owner, or the database owner (but in the latter case, only if it's not
1341 	 * a shared relation).  pg_class_ownercheck includes the superuser case.
1342 	 *
1343 	 * Note we choose to treat permissions failure as a WARNING and keep
1344 	 * trying to vacuum the rest of the DB --- is this appropriate?
1345 	 */
1346 	if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1347 		  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1348 	{
1349 		if (onerel->rd_rel->relisshared)
1350 			ereport(WARNING,
1351 					(errmsg("skipping \"%s\" --- only superuser can vacuum it",
1352 							RelationGetRelationName(onerel))));
1353 		else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1354 			ereport(WARNING,
1355 					(errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1356 							RelationGetRelationName(onerel))));
1357 		else
1358 			ereport(WARNING,
1359 					(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1360 							RelationGetRelationName(onerel))));
1361 		relation_close(onerel, lmode);
1362 		PopActiveSnapshot();
1363 		CommitTransactionCommand();
1364 		return false;
1365 	}
1366 
1367 	/*
1368 	 * Check that it's a vacuumable relation; we used to do this in
1369 	 * get_rel_oids() but seems safer to check after we've locked the
1370 	 * relation.
1371 	 */
1372 	if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1373 		onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1374 		onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1375 		onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1376 	{
1377 		ereport(WARNING,
1378 				(errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1379 						RelationGetRelationName(onerel))));
1380 		relation_close(onerel, lmode);
1381 		PopActiveSnapshot();
1382 		CommitTransactionCommand();
1383 		return false;
1384 	}
1385 
1386 	/*
1387 	 * Silently ignore tables that are temp tables of other backends ---
1388 	 * trying to vacuum these will lead to great unhappiness, since their
1389 	 * contents are probably not up-to-date on disk.  (We don't throw a
1390 	 * warning here; it would just lead to chatter during a database-wide
1391 	 * VACUUM.)
1392 	 */
1393 	if (RELATION_IS_OTHER_TEMP(onerel))
1394 	{
1395 		relation_close(onerel, lmode);
1396 		PopActiveSnapshot();
1397 		CommitTransactionCommand();
1398 		return false;
1399 	}
1400 
1401 	/*
1402 	 * Ignore partitioned tables as there is no work to be done.  Since we
1403 	 * release the lock here, it's possible that any partitions added from
1404 	 * this point on will not get processed, but that seems harmless.
1405 	 */
1406 	if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1407 	{
1408 		relation_close(onerel, lmode);
1409 		PopActiveSnapshot();
1410 		CommitTransactionCommand();
1411 
1412 		/* It's OK for other commands to look at this table */
1413 		return true;
1414 	}
1415 
1416 	/*
1417 	 * Get a session-level lock too. This will protect our access to the
1418 	 * relation across multiple transactions, so that we can vacuum the
1419 	 * relation's TOAST table (if any) secure in the knowledge that no one is
1420 	 * deleting the parent relation.
1421 	 *
1422 	 * NOTE: this cannot block, even if someone else is waiting for access,
1423 	 * because the lock manager knows that both lock requests are from the
1424 	 * same process.
1425 	 */
1426 	onerelid = onerel->rd_lockInfo.lockRelId;
1427 	LockRelationIdForSession(&onerelid, lmode);
1428 
1429 	/*
1430 	 * Remember the relation's TOAST relation for later, if the caller asked
1431 	 * us to process it.  In VACUUM FULL, though, the toast table is
1432 	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1433 	 */
1434 	if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1435 		toast_relid = onerel->rd_rel->reltoastrelid;
1436 	else
1437 		toast_relid = InvalidOid;
1438 
1439 	/*
1440 	 * Switch to the table owner's userid, so that any index functions are run
1441 	 * as that user.  Also lock down security-restricted operations and
1442 	 * arrange to make GUC variable changes local to this command. (This is
1443 	 * unnecessary, but harmless, for lazy VACUUM.)
1444 	 */
1445 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
1446 	SetUserIdAndSecContext(onerel->rd_rel->relowner,
1447 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
1448 	save_nestlevel = NewGUCNestLevel();
1449 
1450 	/*
1451 	 * Do the actual work --- either FULL or "lazy" vacuum
1452 	 */
1453 	if (options & VACOPT_FULL)
1454 	{
1455 		/* close relation before vacuuming, but hold lock until commit */
1456 		relation_close(onerel, NoLock);
1457 		onerel = NULL;
1458 
1459 		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1460 		cluster_rel(relid, InvalidOid, false,
1461 					(options & VACOPT_VERBOSE) != 0);
1462 	}
1463 	else
1464 		lazy_vacuum_rel(onerel, options, params, vac_strategy);
1465 
1466 	/* Roll back any GUC changes executed by index functions */
1467 	AtEOXact_GUC(false, save_nestlevel);
1468 
1469 	/* Restore userid and security context */
1470 	SetUserIdAndSecContext(save_userid, save_sec_context);
1471 
1472 	/* all done with this class, but hold lock until commit */
1473 	if (onerel)
1474 		relation_close(onerel, NoLock);
1475 
1476 	/*
1477 	 * Complete the transaction and free all temporary memory used.
1478 	 */
1479 	PopActiveSnapshot();
1480 	CommitTransactionCommand();
1481 
1482 	/*
1483 	 * If the relation has a secondary toast rel, vacuum that too while we
1484 	 * still hold the session lock on the master table.  Note however that
1485 	 * "analyze" will not get done on the toast table.  This is good, because
1486 	 * the toaster always uses hardcoded index access and statistics are
1487 	 * totally unimportant for toast relations.
1488 	 */
1489 	if (toast_relid != InvalidOid)
1490 		vacuum_rel(toast_relid, NULL, options, params);
1491 
1492 	/*
1493 	 * Now release the session-level lock on the master table.
1494 	 */
1495 	UnlockRelationIdForSession(&onerelid, lmode);
1496 
1497 	/* Report that we really did it. */
1498 	return true;
1499 }
1500 
1501 
1502 /*
1503  * Open all the vacuumable indexes of the given relation, obtaining the
1504  * specified kind of lock on each.  Return an array of Relation pointers for
1505  * the indexes into *Irel, and the number of indexes into *nindexes.
1506  *
1507  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1508  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1509  * execution, and what we have is too corrupt to be processable.  We will
1510  * vacuum even if the index isn't indisvalid; this is important because in a
1511  * unique index, uniqueness checks will be performed anyway and had better not
1512  * hit dangling index pointers.
1513  */
1514 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1515 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1516 				 int *nindexes, Relation **Irel)
1517 {
1518 	List	   *indexoidlist;
1519 	ListCell   *indexoidscan;
1520 	int			i;
1521 
1522 	Assert(lockmode != NoLock);
1523 
1524 	indexoidlist = RelationGetIndexList(relation);
1525 
1526 	/* allocate enough memory for all indexes */
1527 	i = list_length(indexoidlist);
1528 
1529 	if (i > 0)
1530 		*Irel = (Relation *) palloc(i * sizeof(Relation));
1531 	else
1532 		*Irel = NULL;
1533 
1534 	/* collect just the ready indexes */
1535 	i = 0;
1536 	foreach(indexoidscan, indexoidlist)
1537 	{
1538 		Oid			indexoid = lfirst_oid(indexoidscan);
1539 		Relation	indrel;
1540 
1541 		indrel = index_open(indexoid, lockmode);
1542 		if (IndexIsReady(indrel->rd_index))
1543 			(*Irel)[i++] = indrel;
1544 		else
1545 			index_close(indrel, lockmode);
1546 	}
1547 
1548 	*nindexes = i;
1549 
1550 	list_free(indexoidlist);
1551 }
1552 
1553 /*
1554  * Release the resources acquired by vac_open_indexes.  Optionally release
1555  * the locks (say NoLock to keep 'em).
1556  */
1557 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1558 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1559 {
1560 	if (Irel == NULL)
1561 		return;
1562 
1563 	while (nindexes--)
1564 	{
1565 		Relation	ind = Irel[nindexes];
1566 
1567 		index_close(ind, lockmode);
1568 	}
1569 	pfree(Irel);
1570 }
1571 
1572 /*
1573  * vacuum_delay_point --- check for interrupts and cost-based delay.
1574  *
1575  * This should be called in each major loop of VACUUM processing,
1576  * typically once per page processed.
1577  */
1578 void
vacuum_delay_point(void)1579 vacuum_delay_point(void)
1580 {
1581 	/* Always check for interrupts */
1582 	CHECK_FOR_INTERRUPTS();
1583 
1584 	/* Nap if appropriate */
1585 	if (VacuumCostActive && !InterruptPending &&
1586 		VacuumCostBalance >= VacuumCostLimit)
1587 	{
1588 		int			msec;
1589 
1590 		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1591 		if (msec > VacuumCostDelay * 4)
1592 			msec = VacuumCostDelay * 4;
1593 
1594 		pg_usleep(msec * 1000L);
1595 
1596 		VacuumCostBalance = 0;
1597 
1598 		/* update balance values for workers */
1599 		AutoVacuumUpdateDelay();
1600 
1601 		/* Might have gotten an interrupt while sleeping */
1602 		CHECK_FOR_INTERRUPTS();
1603 	}
1604 }
1605