1 /*-------------------------------------------------------------------------
2  *
3  * vacuum.c
4  *	  The postgres vacuum cleaner.
5  *
6  * This file now includes only control and dispatch code for VACUUM and
7  * ANALYZE commands.  Regular VACUUM is implemented in vacuumlazy.c,
8  * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9  * in cluster.c.
10  *
11  *
12  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
13  * Portions Copyright (c) 1994, Regents of the University of California
14  *
15  *
16  * IDENTIFICATION
17  *	  src/backend/commands/vacuum.c
18  *
19  *-------------------------------------------------------------------------
20  */
21 #include "postgres.h"
22 
23 #include <math.h>
24 
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_namespace.h"
36 #include "commands/cluster.h"
37 #include "commands/vacuum.h"
38 #include "miscadmin.h"
39 #include "pgstat.h"
40 #include "postmaster/autovacuum.h"
41 #include "storage/bufmgr.h"
42 #include "storage/lmgr.h"
43 #include "storage/proc.h"
44 #include "storage/procarray.h"
45 #include "utils/acl.h"
46 #include "utils/fmgroids.h"
47 #include "utils/guc.h"
48 #include "utils/memutils.h"
49 #include "utils/snapmgr.h"
50 #include "utils/syscache.h"
51 #include "utils/tqual.h"
52 
53 
54 /*
55  * GUC parameters
56  */
57 int			vacuum_freeze_min_age;
58 int			vacuum_freeze_table_age;
59 int			vacuum_multixact_freeze_min_age;
60 int			vacuum_multixact_freeze_table_age;
61 
62 
63 /* A few variables that don't seem worth passing around as parameters */
64 static MemoryContext vac_context = NULL;
65 static BufferAccessStrategy vac_strategy;
66 
67 
68 /* non-export function prototypes */
69 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
70 static void vac_truncate_clog(TransactionId frozenXID,
71 				  MultiXactId minMulti,
72 				  TransactionId lastSaneFrozenXid,
73 				  MultiXactId lastSaneMinMulti);
74 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
75 		   VacuumParams *params);
76 
77 /*
78  * Primary entry point for manual VACUUM and ANALYZE commands
79  *
80  * This is mainly a preparation wrapper for the real operations that will
81  * happen in vacuum().
82  */
83 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)84 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
85 {
86 	VacuumParams params;
87 
88 	/* sanity checks on options */
89 	Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
90 	Assert((vacstmt->options & VACOPT_VACUUM) ||
91 		   !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
92 	Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
93 	Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
94 
95 	/*
96 	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
97 	 * them as -1 which means to use the default values.
98 	 */
99 	if (vacstmt->options & VACOPT_FREEZE)
100 	{
101 		params.freeze_min_age = 0;
102 		params.freeze_table_age = 0;
103 		params.multixact_freeze_min_age = 0;
104 		params.multixact_freeze_table_age = 0;
105 	}
106 	else
107 	{
108 		params.freeze_min_age = -1;
109 		params.freeze_table_age = -1;
110 		params.multixact_freeze_min_age = -1;
111 		params.multixact_freeze_table_age = -1;
112 	}
113 
114 	/* user-invoked vacuum is never "for wraparound" */
115 	params.is_wraparound = false;
116 
117 	/* user-invoked vacuum never uses this parameter */
118 	params.log_min_duration = -1;
119 
120 	/* Now go through the common routine */
121 	vacuum(vacstmt->options, vacstmt->relation, InvalidOid, &params,
122 		   vacstmt->va_cols, NULL, isTopLevel);
123 }
124 
125 /*
126  * Primary entry point for VACUUM and ANALYZE commands.
127  *
128  * options is a bitmask of VacuumOption flags, indicating what to do.
129  *
130  * relid, if not InvalidOid, indicate the relation to process; otherwise,
131  * the RangeVar is used.  (The latter must always be passed, because it's
132  * used for error messages.)
133  *
134  * params contains a set of parameters that can be used to customize the
135  * behavior.
136  *
137  * va_cols is a list of columns to analyze, or NIL to process them all.
138  *
139  * bstrategy is normally given as NULL, but in autovacuum it can be passed
140  * in to use the same buffer strategy object across multiple vacuum() calls.
141  *
142  * isTopLevel should be passed down from ProcessUtility.
143  *
144  * It is the caller's responsibility that all parameters are allocated in a
145  * memory context that will not disappear at transaction commit.
146  */
147 void
vacuum(int options,RangeVar * relation,Oid relid,VacuumParams * params,List * va_cols,BufferAccessStrategy bstrategy,bool isTopLevel)148 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
149 	   List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
150 {
151 	const char *stmttype;
152 	volatile bool in_outer_xact,
153 				use_own_xacts;
154 	List	   *relations;
155 	static bool in_vacuum = false;
156 
157 	Assert(params != NULL);
158 
159 	stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
160 
161 	/*
162 	 * We cannot run VACUUM inside a user transaction block; if we were inside
163 	 * a transaction, then our commit- and start-transaction-command calls
164 	 * would not have the intended effect!	There are numerous other subtle
165 	 * dependencies on this, too.
166 	 *
167 	 * ANALYZE (without VACUUM) can run either way.
168 	 */
169 	if (options & VACOPT_VACUUM)
170 	{
171 		PreventTransactionChain(isTopLevel, stmttype);
172 		in_outer_xact = false;
173 	}
174 	else
175 		in_outer_xact = IsInTransactionChain(isTopLevel);
176 
177 	/*
178 	 * Due to static variables vac_context, anl_context and vac_strategy,
179 	 * vacuum() is not reentrant.  This matters when VACUUM FULL or ANALYZE
180 	 * calls a hostile index expression that itself calls ANALYZE.
181 	 */
182 	if (in_vacuum)
183 		ereport(ERROR,
184 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
185 				 errmsg("%s cannot be executed from VACUUM or ANALYZE",
186 						stmttype)));
187 
188 	/*
189 	 * Sanity check DISABLE_PAGE_SKIPPING option.
190 	 */
191 	if ((options & VACOPT_FULL) != 0 &&
192 		(options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
193 		ereport(ERROR,
194 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
195 				 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
196 
197 	/*
198 	 * Send info about dead objects to the statistics collector, unless we are
199 	 * in autovacuum --- autovacuum.c does this for itself.
200 	 */
201 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
202 		pgstat_vacuum_stat();
203 
204 	/*
205 	 * Create special memory context for cross-transaction storage.
206 	 *
207 	 * Since it is a child of PortalContext, it will go away eventually even
208 	 * if we suffer an error; there's no need for special abort cleanup logic.
209 	 */
210 	vac_context = AllocSetContextCreate(PortalContext,
211 										"Vacuum",
212 										ALLOCSET_DEFAULT_SIZES);
213 
214 	/*
215 	 * If caller didn't give us a buffer strategy object, make one in the
216 	 * cross-transaction memory context.
217 	 */
218 	if (bstrategy == NULL)
219 	{
220 		MemoryContext old_context = MemoryContextSwitchTo(vac_context);
221 
222 		bstrategy = GetAccessStrategy(BAS_VACUUM);
223 		MemoryContextSwitchTo(old_context);
224 	}
225 	vac_strategy = bstrategy;
226 
227 	/*
228 	 * Build list of relations to process, unless caller gave us one. (If we
229 	 * build one, we put it in vac_context for safekeeping.)
230 	 */
231 	relations = get_rel_oids(relid, relation);
232 
233 	/*
234 	 * Decide whether we need to start/commit our own transactions.
235 	 *
236 	 * For VACUUM (with or without ANALYZE): always do so, so that we can
237 	 * release locks as soon as possible.  (We could possibly use the outer
238 	 * transaction for a one-table VACUUM, but handling TOAST tables would be
239 	 * problematic.)
240 	 *
241 	 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
242 	 * start/commit our own transactions.  Also, there's no need to do so if
243 	 * only processing one relation.  For multiple relations when not within a
244 	 * transaction block, and also in an autovacuum worker, use own
245 	 * transactions so we can release locks sooner.
246 	 */
247 	if (options & VACOPT_VACUUM)
248 		use_own_xacts = true;
249 	else
250 	{
251 		Assert(options & VACOPT_ANALYZE);
252 		if (IsAutoVacuumWorkerProcess())
253 			use_own_xacts = true;
254 		else if (in_outer_xact)
255 			use_own_xacts = false;
256 		else if (list_length(relations) > 1)
257 			use_own_xacts = true;
258 		else
259 			use_own_xacts = false;
260 	}
261 
262 	/*
263 	 * vacuum_rel expects to be entered with no transaction active; it will
264 	 * start and commit its own transaction.  But we are called by an SQL
265 	 * command, and so we are executing inside a transaction already. We
266 	 * commit the transaction started in PostgresMain() here, and start
267 	 * another one before exiting to match the commit waiting for us back in
268 	 * PostgresMain().
269 	 */
270 	if (use_own_xacts)
271 	{
272 		Assert(!in_outer_xact);
273 
274 		/* ActiveSnapshot is not set by autovacuum */
275 		if (ActiveSnapshotSet())
276 			PopActiveSnapshot();
277 
278 		/* matches the StartTransaction in PostgresMain() */
279 		CommitTransactionCommand();
280 	}
281 
282 	/* Turn vacuum cost accounting on or off */
283 	PG_TRY();
284 	{
285 		ListCell   *cur;
286 
287 		in_vacuum = true;
288 		VacuumCostActive = (VacuumCostDelay > 0);
289 		VacuumCostBalance = 0;
290 		VacuumPageHit = 0;
291 		VacuumPageMiss = 0;
292 		VacuumPageDirty = 0;
293 
294 		/*
295 		 * Loop to process each selected relation.
296 		 */
297 		foreach(cur, relations)
298 		{
299 			Oid			relid = lfirst_oid(cur);
300 
301 			if (options & VACOPT_VACUUM)
302 			{
303 				if (!vacuum_rel(relid, relation, options, params))
304 					continue;
305 			}
306 
307 			if (options & VACOPT_ANALYZE)
308 			{
309 				/*
310 				 * If using separate xacts, start one for analyze. Otherwise,
311 				 * we can use the outer transaction.
312 				 */
313 				if (use_own_xacts)
314 				{
315 					StartTransactionCommand();
316 					/* functions in indexes may want a snapshot set */
317 					PushActiveSnapshot(GetTransactionSnapshot());
318 				}
319 
320 				analyze_rel(relid, relation, options, params,
321 							va_cols, in_outer_xact, vac_strategy);
322 
323 				if (use_own_xacts)
324 				{
325 					PopActiveSnapshot();
326 					CommitTransactionCommand();
327 				}
328 			}
329 		}
330 	}
331 	PG_CATCH();
332 	{
333 		in_vacuum = false;
334 		VacuumCostActive = false;
335 		PG_RE_THROW();
336 	}
337 	PG_END_TRY();
338 
339 	in_vacuum = false;
340 	VacuumCostActive = false;
341 
342 	/*
343 	 * Finish up processing.
344 	 */
345 	if (use_own_xacts)
346 	{
347 		/* here, we are not in a transaction */
348 
349 		/*
350 		 * This matches the CommitTransaction waiting for us in
351 		 * PostgresMain().
352 		 */
353 		StartTransactionCommand();
354 	}
355 
356 	if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
357 	{
358 		/*
359 		 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
360 		 * (autovacuum.c does this for itself.)
361 		 */
362 		vac_update_datfrozenxid();
363 	}
364 
365 	/*
366 	 * Clean up working storage --- note we must do this after
367 	 * StartTransactionCommand, else we might be trying to delete the active
368 	 * context!
369 	 */
370 	MemoryContextDelete(vac_context);
371 	vac_context = NULL;
372 }
373 
374 /*
375  * Build a list of Oids for each relation to be processed
376  *
377  * The list is built in vac_context so that it will survive across our
378  * per-relation transactions.
379  */
380 static List *
get_rel_oids(Oid relid,const RangeVar * vacrel)381 get_rel_oids(Oid relid, const RangeVar *vacrel)
382 {
383 	List	   *oid_list = NIL;
384 	MemoryContext oldcontext;
385 
386 	/* OID supplied by VACUUM's caller? */
387 	if (OidIsValid(relid))
388 	{
389 		oldcontext = MemoryContextSwitchTo(vac_context);
390 		oid_list = lappend_oid(oid_list, relid);
391 		MemoryContextSwitchTo(oldcontext);
392 	}
393 	else if (vacrel)
394 	{
395 		/* Process a specific relation */
396 		Oid			relid;
397 
398 		/*
399 		 * Since we don't take a lock here, the relation might be gone, or the
400 		 * RangeVar might no longer refer to the OID we look up here.  In the
401 		 * former case, VACUUM will do nothing; in the latter case, it will
402 		 * process the OID we looked up here, rather than the new one. Neither
403 		 * is ideal, but there's little practical alternative, since we're
404 		 * going to commit this transaction and begin a new one between now
405 		 * and then.
406 		 */
407 		relid = RangeVarGetRelid(vacrel, NoLock, false);
408 
409 		/* Make a relation list entry for this guy */
410 		oldcontext = MemoryContextSwitchTo(vac_context);
411 		oid_list = lappend_oid(oid_list, relid);
412 		MemoryContextSwitchTo(oldcontext);
413 	}
414 	else
415 	{
416 		/*
417 		 * Process all plain relations and materialized views listed in
418 		 * pg_class
419 		 */
420 		Relation	pgclass;
421 		HeapScanDesc scan;
422 		HeapTuple	tuple;
423 
424 		pgclass = heap_open(RelationRelationId, AccessShareLock);
425 
426 		scan = heap_beginscan_catalog(pgclass, 0, NULL);
427 
428 		while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
429 		{
430 			Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
431 
432 			if (classForm->relkind != RELKIND_RELATION &&
433 				classForm->relkind != RELKIND_MATVIEW)
434 				continue;
435 
436 			/* Make a relation list entry for this guy */
437 			oldcontext = MemoryContextSwitchTo(vac_context);
438 			oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
439 			MemoryContextSwitchTo(oldcontext);
440 		}
441 
442 		heap_endscan(scan);
443 		heap_close(pgclass, AccessShareLock);
444 	}
445 
446 	return oid_list;
447 }
448 
449 /*
450  * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
451  *
452  * The output parameters are:
453  * - oldestXmin is the cutoff value used to distinguish whether tuples are
454  *	 DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
455  * - freezeLimit is the Xid below which all Xids are replaced by
456  *	 FrozenTransactionId during vacuum.
457  * - xidFullScanLimit (computed from table_freeze_age parameter)
458  *	 represents a minimum Xid value; a table whose relfrozenxid is older than
459  *	 this will have a full-table vacuum applied to it, to freeze tuples across
460  *	 the whole table.  Vacuuming a table younger than this value can use a
461  *	 partial scan.
462  * - multiXactCutoff is the value below which all MultiXactIds are removed from
463  *	 Xmax.
464  * - mxactFullScanLimit is a value against which a table's relminmxid value is
465  *	 compared to produce a full-table vacuum, as with xidFullScanLimit.
466  *
467  * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
468  * not interested.
469  */
470 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)471 vacuum_set_xid_limits(Relation rel,
472 					  int freeze_min_age,
473 					  int freeze_table_age,
474 					  int multixact_freeze_min_age,
475 					  int multixact_freeze_table_age,
476 					  TransactionId *oldestXmin,
477 					  TransactionId *freezeLimit,
478 					  TransactionId *xidFullScanLimit,
479 					  MultiXactId *multiXactCutoff,
480 					  MultiXactId *mxactFullScanLimit)
481 {
482 	int			freezemin;
483 	int			mxid_freezemin;
484 	int			effective_multixact_freeze_max_age;
485 	TransactionId limit;
486 	TransactionId safeLimit;
487 	MultiXactId oldestMxact;
488 	MultiXactId mxactLimit;
489 	MultiXactId safeMxactLimit;
490 
491 	/*
492 	 * We can always ignore processes running lazy vacuum.  This is because we
493 	 * use these values only for deciding which tuples we must keep in the
494 	 * tables.  Since lazy vacuum doesn't write its XID anywhere, it's safe to
495 	 * ignore it.  In theory it could be problematic to ignore lazy vacuums in
496 	 * a full vacuum, but keep in mind that only one vacuum process can be
497 	 * working on a particular table at any time, and that each vacuum is
498 	 * always an independent transaction.
499 	 */
500 	*oldestXmin =
501 		TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, true), rel);
502 
503 	Assert(TransactionIdIsNormal(*oldestXmin));
504 
505 	/*
506 	 * Determine the minimum freeze age to use: as specified by the caller, or
507 	 * vacuum_freeze_min_age, but in any case not more than half
508 	 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
509 	 * wraparound won't occur too frequently.
510 	 */
511 	freezemin = freeze_min_age;
512 	if (freezemin < 0)
513 		freezemin = vacuum_freeze_min_age;
514 	freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
515 	Assert(freezemin >= 0);
516 
517 	/*
518 	 * Compute the cutoff XID, being careful not to generate a "permanent" XID
519 	 */
520 	limit = *oldestXmin - freezemin;
521 	if (!TransactionIdIsNormal(limit))
522 		limit = FirstNormalTransactionId;
523 
524 	/*
525 	 * If oldestXmin is very far back (in practice, more than
526 	 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
527 	 * freeze age of zero.
528 	 */
529 	safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
530 	if (!TransactionIdIsNormal(safeLimit))
531 		safeLimit = FirstNormalTransactionId;
532 
533 	if (TransactionIdPrecedes(limit, safeLimit))
534 	{
535 		ereport(WARNING,
536 				(errmsg("oldest xmin is far in the past"),
537 				 errhint("Close open transactions soon to avoid wraparound problems.")));
538 		limit = *oldestXmin;
539 	}
540 
541 	*freezeLimit = limit;
542 
543 	/*
544 	 * Compute the multixact age for which freezing is urgent.  This is
545 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
546 	 * short of multixact member space.
547 	 */
548 	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
549 
550 	/*
551 	 * Determine the minimum multixact freeze age to use: as specified by
552 	 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
553 	 * than half effective_multixact_freeze_max_age, so that autovacuums to
554 	 * prevent MultiXact wraparound won't occur too frequently.
555 	 */
556 	mxid_freezemin = multixact_freeze_min_age;
557 	if (mxid_freezemin < 0)
558 		mxid_freezemin = vacuum_multixact_freeze_min_age;
559 	mxid_freezemin = Min(mxid_freezemin,
560 						 effective_multixact_freeze_max_age / 2);
561 	Assert(mxid_freezemin >= 0);
562 
563 	/* compute the cutoff multi, being careful to generate a valid value */
564 	oldestMxact = GetOldestMultiXactId();
565 	mxactLimit = oldestMxact - mxid_freezemin;
566 	if (mxactLimit < FirstMultiXactId)
567 		mxactLimit = FirstMultiXactId;
568 
569 	safeMxactLimit =
570 		ReadNextMultiXactId() - effective_multixact_freeze_max_age;
571 	if (safeMxactLimit < FirstMultiXactId)
572 		safeMxactLimit = FirstMultiXactId;
573 
574 	if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
575 	{
576 		ereport(WARNING,
577 				(errmsg("oldest multixact is far in the past"),
578 				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
579 		/* Use the safe limit, unless an older mxact is still running */
580 		if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
581 			mxactLimit = oldestMxact;
582 		else
583 			mxactLimit = safeMxactLimit;
584 	}
585 
586 	*multiXactCutoff = mxactLimit;
587 
588 	if (xidFullScanLimit != NULL)
589 	{
590 		int			freezetable;
591 
592 		Assert(mxactFullScanLimit != NULL);
593 
594 		/*
595 		 * Determine the table freeze age to use: as specified by the caller,
596 		 * or vacuum_freeze_table_age, but in any case not more than
597 		 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
598 		 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
599 		 * before anti-wraparound autovacuum is launched.
600 		 */
601 		freezetable = freeze_table_age;
602 		if (freezetable < 0)
603 			freezetable = vacuum_freeze_table_age;
604 		freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
605 		Assert(freezetable >= 0);
606 
607 		/*
608 		 * Compute XID limit causing a full-table vacuum, being careful not to
609 		 * generate a "permanent" XID.
610 		 */
611 		limit = ReadNewTransactionId() - freezetable;
612 		if (!TransactionIdIsNormal(limit))
613 			limit = FirstNormalTransactionId;
614 
615 		*xidFullScanLimit = limit;
616 
617 		/*
618 		 * Similar to the above, determine the table freeze age to use for
619 		 * multixacts: as specified by the caller, or
620 		 * vacuum_multixact_freeze_table_age, but in any case not more than
621 		 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
622 		 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
623 		 * freeze multixacts before anti-wraparound autovacuum is launched.
624 		 */
625 		freezetable = multixact_freeze_table_age;
626 		if (freezetable < 0)
627 			freezetable = vacuum_multixact_freeze_table_age;
628 		freezetable = Min(freezetable,
629 						  effective_multixact_freeze_max_age * 0.95);
630 		Assert(freezetable >= 0);
631 
632 		/*
633 		 * Compute MultiXact limit causing a full-table vacuum, being careful
634 		 * to generate a valid MultiXact value.
635 		 */
636 		mxactLimit = ReadNextMultiXactId() - freezetable;
637 		if (mxactLimit < FirstMultiXactId)
638 			mxactLimit = FirstMultiXactId;
639 
640 		*mxactFullScanLimit = mxactLimit;
641 	}
642 	else
643 	{
644 		Assert(mxactFullScanLimit == NULL);
645 	}
646 }
647 
648 /*
649  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
650  *
651  *		If we scanned the whole relation then we should just use the count of
652  *		live tuples seen; but if we did not, we should not blindly extrapolate
653  *		from that number, since VACUUM may have scanned a quite nonrandom
654  *		subset of the table.  When we have only partial information, we take
655  *		the old value of pg_class.reltuples as a measurement of the
656  *		tuple density in the unscanned pages.
657  *
658  *		The is_analyze argument is historical.
659  */
660 double
vac_estimate_reltuples(Relation relation,bool is_analyze,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)661 vac_estimate_reltuples(Relation relation, bool is_analyze,
662 					   BlockNumber total_pages,
663 					   BlockNumber scanned_pages,
664 					   double scanned_tuples)
665 {
666 	BlockNumber old_rel_pages = relation->rd_rel->relpages;
667 	double		old_rel_tuples = relation->rd_rel->reltuples;
668 	double		old_density;
669 	double		unscanned_pages;
670 	double		total_tuples;
671 
672 	/* If we did scan the whole table, just use the count as-is */
673 	if (scanned_pages >= total_pages)
674 		return scanned_tuples;
675 
676 	/*
677 	 * If scanned_pages is zero but total_pages isn't, keep the existing value
678 	 * of reltuples.  (Note: callers should avoid updating the pg_class
679 	 * statistics in this situation, since no new information has been
680 	 * provided.)
681 	 */
682 	if (scanned_pages == 0)
683 		return old_rel_tuples;
684 
685 	/*
686 	 * If old value of relpages is zero, old density is indeterminate; we
687 	 * can't do much except scale up scanned_tuples to match total_pages.
688 	 */
689 	if (old_rel_pages == 0)
690 		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
691 
692 	/*
693 	 * Okay, we've covered the corner cases.  The normal calculation is to
694 	 * convert the old measurement to a density (tuples per page), then
695 	 * estimate the number of tuples in the unscanned pages using that figure,
696 	 * and finally add on the number of tuples in the scanned pages.
697 	 */
698 	old_density = old_rel_tuples / old_rel_pages;
699 	unscanned_pages = (double) total_pages - (double) scanned_pages;
700 	total_tuples = old_density * unscanned_pages + scanned_tuples;
701 	return floor(total_tuples + 0.5);
702 }
703 
704 
705 /*
706  *	vac_update_relstats() -- update statistics for one relation
707  *
708  *		Update the whole-relation statistics that are kept in its pg_class
709  *		row.  There are additional stats that will be updated if we are
710  *		doing ANALYZE, but we always update these stats.  This routine works
711  *		for both index and heap relation entries in pg_class.
712  *
713  *		We violate transaction semantics here by overwriting the rel's
714  *		existing pg_class tuple with the new values.  This is reasonably
715  *		safe as long as we're sure that the new values are correct whether or
716  *		not this transaction commits.  The reason for doing this is that if
717  *		we updated these tuples in the usual way, vacuuming pg_class itself
718  *		wouldn't work very well --- by the time we got done with a vacuum
719  *		cycle, most of the tuples in pg_class would've been obsoleted.  Of
720  *		course, this only works for fixed-size not-null columns, but these are.
721  *
722  *		Another reason for doing it this way is that when we are in a lazy
723  *		VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
724  *		Somebody vacuuming pg_class might think they could delete a tuple
725  *		marked with xmin = our xid.
726  *
727  *		In addition to fundamentally nontransactional statistics such as
728  *		relpages and relallvisible, we try to maintain certain lazily-updated
729  *		DDL flags such as relhasindex, by clearing them if no longer correct.
730  *		It's safe to do this in VACUUM, which can't run in parallel with
731  *		CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
732  *		However, it's *not* safe to do it in an ANALYZE that's within an
733  *		outer transaction, because for example the current transaction might
734  *		have dropped the last index; then we'd think relhasindex should be
735  *		cleared, but if the transaction later rolls back this would be wrong.
736  *		So we refrain from updating the DDL flags if we're inside an outer
737  *		transaction.  This is OK since postponing the flag maintenance is
738  *		always allowable.
739  *
740  *		This routine is shared by VACUUM and ANALYZE.
741  */
742 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)743 vac_update_relstats(Relation relation,
744 					BlockNumber num_pages, double num_tuples,
745 					BlockNumber num_all_visible_pages,
746 					bool hasindex, TransactionId frozenxid,
747 					MultiXactId minmulti,
748 					bool in_outer_xact)
749 {
750 	Oid			relid = RelationGetRelid(relation);
751 	Relation	rd;
752 	HeapTuple	ctup;
753 	Form_pg_class pgcform;
754 	bool		dirty;
755 
756 	rd = heap_open(RelationRelationId, RowExclusiveLock);
757 
758 	/* Fetch a copy of the tuple to scribble on */
759 	ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
760 	if (!HeapTupleIsValid(ctup))
761 		elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
762 			 relid);
763 	pgcform = (Form_pg_class) GETSTRUCT(ctup);
764 
765 	/* Apply statistical updates, if any, to copied tuple */
766 
767 	dirty = false;
768 	if (pgcform->relpages != (int32) num_pages)
769 	{
770 		pgcform->relpages = (int32) num_pages;
771 		dirty = true;
772 	}
773 	if (pgcform->reltuples != (float4) num_tuples)
774 	{
775 		pgcform->reltuples = (float4) num_tuples;
776 		dirty = true;
777 	}
778 	if (pgcform->relallvisible != (int32) num_all_visible_pages)
779 	{
780 		pgcform->relallvisible = (int32) num_all_visible_pages;
781 		dirty = true;
782 	}
783 
784 	/* Apply DDL updates, but not inside an outer transaction (see above) */
785 
786 	if (!in_outer_xact)
787 	{
788 		/*
789 		 * If we didn't find any indexes, reset relhasindex.
790 		 */
791 		if (pgcform->relhasindex && !hasindex)
792 		{
793 			pgcform->relhasindex = false;
794 			dirty = true;
795 		}
796 
797 		/*
798 		 * If we have discovered that there are no indexes, then there's no
799 		 * primary key either.  This could be done more thoroughly...
800 		 */
801 		if (pgcform->relhaspkey && !hasindex)
802 		{
803 			pgcform->relhaspkey = false;
804 			dirty = true;
805 		}
806 
807 		/* We also clear relhasrules and relhastriggers if needed */
808 		if (pgcform->relhasrules && relation->rd_rules == NULL)
809 		{
810 			pgcform->relhasrules = false;
811 			dirty = true;
812 		}
813 		if (pgcform->relhastriggers && relation->trigdesc == NULL)
814 		{
815 			pgcform->relhastriggers = false;
816 			dirty = true;
817 		}
818 	}
819 
820 	/*
821 	 * Update relfrozenxid, unless caller passed InvalidTransactionId
822 	 * indicating it has no new data.
823 	 *
824 	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
825 	 * working correctly, the only way the new frozenxid could be older would
826 	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
827 	 * which case we don't want to forget the work it already did.  However,
828 	 * if the stored relfrozenxid is "in the future", then it must be corrupt
829 	 * and it seems best to overwrite it with the cutoff we used this time.
830 	 * This should match vac_update_datfrozenxid() concerning what we consider
831 	 * to be "in the future".
832 	 */
833 	if (TransactionIdIsNormal(frozenxid) &&
834 		pgcform->relfrozenxid != frozenxid &&
835 		(TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
836 		 TransactionIdPrecedes(ReadNewTransactionId(),
837 							   pgcform->relfrozenxid)))
838 	{
839 		pgcform->relfrozenxid = frozenxid;
840 		dirty = true;
841 	}
842 
843 	/* Similarly for relminmxid */
844 	if (MultiXactIdIsValid(minmulti) &&
845 		pgcform->relminmxid != minmulti &&
846 		(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
847 		 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
848 	{
849 		pgcform->relminmxid = minmulti;
850 		dirty = true;
851 	}
852 
853 	/* If anything changed, write out the tuple. */
854 	if (dirty)
855 		heap_inplace_update(rd, ctup);
856 
857 	heap_close(rd, RowExclusiveLock);
858 }
859 
860 
861 /*
862  *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
863  *
864  *		Update pg_database's datfrozenxid entry for our database to be the
865  *		minimum of the pg_class.relfrozenxid values.
866  *
867  *		Similarly, update our datminmxid to be the minimum of the
868  *		pg_class.relminmxid values.
869  *
870  *		If we are able to advance either pg_database value, also try to
871  *		truncate pg_clog and pg_multixact.
872  *
873  *		We violate transaction semantics here by overwriting the database's
874  *		existing pg_database tuple with the new values.  This is reasonably
875  *		safe since the new values are correct whether or not this transaction
876  *		commits.  As with vac_update_relstats, this avoids leaving dead tuples
877  *		behind after a VACUUM.
878  */
879 void
vac_update_datfrozenxid(void)880 vac_update_datfrozenxid(void)
881 {
882 	HeapTuple	tuple;
883 	Form_pg_database dbform;
884 	Relation	relation;
885 	SysScanDesc scan;
886 	HeapTuple	classTup;
887 	TransactionId newFrozenXid;
888 	MultiXactId newMinMulti;
889 	TransactionId lastSaneFrozenXid;
890 	MultiXactId lastSaneMinMulti;
891 	bool		bogus = false;
892 	bool		dirty = false;
893 
894 	/*
895 	 * Restrict this task to one backend per database.  This avoids race
896 	 * conditions that would move datfrozenxid or datminmxid backward.  It
897 	 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
898 	 * datfrozenxid passed to an earlier vac_truncate_clog() call.
899 	 */
900 	LockDatabaseFrozenIds(ExclusiveLock);
901 
902 	/*
903 	 * Initialize the "min" calculation with GetOldestXmin, which is a
904 	 * reasonable approximation to the minimum relfrozenxid for not-yet-
905 	 * committed pg_class entries for new tables; see AddNewRelationTuple().
906 	 * So we cannot produce a wrong minimum by starting with this.
907 	 */
908 	newFrozenXid = GetOldestXmin(NULL, true);
909 
910 	/*
911 	 * Similarly, initialize the MultiXact "min" with the value that would be
912 	 * used on pg_class for new tables.  See AddNewRelationTuple().
913 	 */
914 	newMinMulti = GetOldestMultiXactId();
915 
916 	/*
917 	 * Identify the latest relfrozenxid and relminmxid values that we could
918 	 * validly see during the scan.  These are conservative values, but it's
919 	 * not really worth trying to be more exact.
920 	 */
921 	lastSaneFrozenXid = ReadNewTransactionId();
922 	lastSaneMinMulti = ReadNextMultiXactId();
923 
924 	/*
925 	 * We must seqscan pg_class to find the minimum Xid, because there is no
926 	 * index that can help us here.
927 	 */
928 	relation = heap_open(RelationRelationId, AccessShareLock);
929 
930 	scan = systable_beginscan(relation, InvalidOid, false,
931 							  NULL, 0, NULL);
932 
933 	while ((classTup = systable_getnext(scan)) != NULL)
934 	{
935 		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
936 
937 		/*
938 		 * Only consider relations able to hold unfrozen XIDs (anything else
939 		 * should have InvalidTransactionId in relfrozenxid anyway.)
940 		 */
941 		if (classForm->relkind != RELKIND_RELATION &&
942 			classForm->relkind != RELKIND_MATVIEW &&
943 			classForm->relkind != RELKIND_TOASTVALUE)
944 			continue;
945 
946 		Assert(TransactionIdIsNormal(classForm->relfrozenxid));
947 		Assert(MultiXactIdIsValid(classForm->relminmxid));
948 
949 		/*
950 		 * If things are working properly, no relation should have a
951 		 * relfrozenxid or relminmxid that is "in the future".  However, such
952 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
953 		 * see any entries that are "in the future", chicken out and don't do
954 		 * anything.  This ensures we won't truncate clog before those
955 		 * relations have been scanned and cleaned up.
956 		 */
957 		if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
958 			MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
959 		{
960 			bogus = true;
961 			break;
962 		}
963 
964 		if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
965 			newFrozenXid = classForm->relfrozenxid;
966 
967 		if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
968 			newMinMulti = classForm->relminmxid;
969 	}
970 
971 	/* we're done with pg_class */
972 	systable_endscan(scan);
973 	heap_close(relation, AccessShareLock);
974 
975 	/* chicken out if bogus data found */
976 	if (bogus)
977 		return;
978 
979 	Assert(TransactionIdIsNormal(newFrozenXid));
980 	Assert(MultiXactIdIsValid(newMinMulti));
981 
982 	/* Now fetch the pg_database tuple we need to update. */
983 	relation = heap_open(DatabaseRelationId, RowExclusiveLock);
984 
985 	/* Fetch a copy of the tuple to scribble on */
986 	tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
987 	if (!HeapTupleIsValid(tuple))
988 		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
989 	dbform = (Form_pg_database) GETSTRUCT(tuple);
990 
991 	/*
992 	 * As in vac_update_relstats(), we ordinarily don't want to let
993 	 * datfrozenxid go backward; but if it's "in the future" then it must be
994 	 * corrupt and it seems best to overwrite it.
995 	 */
996 	if (dbform->datfrozenxid != newFrozenXid &&
997 		(TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
998 		 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
999 	{
1000 		dbform->datfrozenxid = newFrozenXid;
1001 		dirty = true;
1002 	}
1003 	else
1004 		newFrozenXid = dbform->datfrozenxid;
1005 
1006 	/* Ditto for datminmxid */
1007 	if (dbform->datminmxid != newMinMulti &&
1008 		(MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1009 		 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1010 	{
1011 		dbform->datminmxid = newMinMulti;
1012 		dirty = true;
1013 	}
1014 	else
1015 		newMinMulti = dbform->datminmxid;
1016 
1017 	if (dirty)
1018 		heap_inplace_update(relation, tuple);
1019 
1020 	heap_freetuple(tuple);
1021 	heap_close(relation, RowExclusiveLock);
1022 
1023 	/*
1024 	 * If we were able to advance datfrozenxid or datminmxid, see if we can
1025 	 * truncate pg_clog and/or pg_multixact.  Also do it if the shared
1026 	 * XID-wrap-limit info is stale, since this action will update that too.
1027 	 */
1028 	if (dirty || ForceTransactionIdLimitUpdate())
1029 		vac_truncate_clog(newFrozenXid, newMinMulti,
1030 						  lastSaneFrozenXid, lastSaneMinMulti);
1031 }
1032 
1033 
1034 /*
1035  *	vac_truncate_clog() -- attempt to truncate the commit log
1036  *
1037  *		Scan pg_database to determine the system-wide oldest datfrozenxid,
1038  *		and use it to truncate the transaction commit log (pg_clog).
1039  *		Also update the XID wrap limit info maintained by varsup.c.
1040  *		Likewise for datminmxid.
1041  *
1042  *		The passed frozenXID and minMulti are the updated values for my own
1043  *		pg_database entry. They're used to initialize the "min" calculations.
1044  *		The caller also passes the "last sane" XID and MXID, since it has
1045  *		those at hand already.
1046  *
1047  *		This routine is only invoked when we've managed to change our
1048  *		DB's datfrozenxid/datminmxid values, or we found that the shared
1049  *		XID-wrap-limit info is stale.
1050  */
1051 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1052 vac_truncate_clog(TransactionId frozenXID,
1053 				  MultiXactId minMulti,
1054 				  TransactionId lastSaneFrozenXid,
1055 				  MultiXactId lastSaneMinMulti)
1056 {
1057 	TransactionId nextXID = ReadNewTransactionId();
1058 	Relation	relation;
1059 	HeapScanDesc scan;
1060 	HeapTuple	tuple;
1061 	Oid			oldestxid_datoid;
1062 	Oid			minmulti_datoid;
1063 	bool		bogus = false;
1064 	bool		frozenAlreadyWrapped = false;
1065 
1066 	/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1067 	LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1068 
1069 	/* init oldest datoids to sync with my frozenXID/minMulti values */
1070 	oldestxid_datoid = MyDatabaseId;
1071 	minmulti_datoid = MyDatabaseId;
1072 
1073 	/*
1074 	 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1075 	 *
1076 	 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1077 	 * the values could change while we look at them.  Fetch each one just
1078 	 * once to ensure sane behavior of the comparison logic.  (Here, as in
1079 	 * many other places, we assume that fetching or updating an XID in shared
1080 	 * storage is atomic.)
1081 	 *
1082 	 * Note: we need not worry about a race condition with new entries being
1083 	 * inserted by CREATE DATABASE.  Any such entry will have a copy of some
1084 	 * existing DB's datfrozenxid, and that source DB cannot be ours because
1085 	 * of the interlock against copying a DB containing an active backend.
1086 	 * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
1087 	 * concurrently modify the datfrozenxid's of different databases, the
1088 	 * worst possible outcome is that pg_clog is not truncated as aggressively
1089 	 * as it could be.
1090 	 */
1091 	relation = heap_open(DatabaseRelationId, AccessShareLock);
1092 
1093 	scan = heap_beginscan_catalog(relation, 0, NULL);
1094 
1095 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1096 	{
1097 		volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1098 		TransactionId datfrozenxid = dbform->datfrozenxid;
1099 		TransactionId datminmxid = dbform->datminmxid;
1100 
1101 		Assert(TransactionIdIsNormal(datfrozenxid));
1102 		Assert(MultiXactIdIsValid(datminmxid));
1103 
1104 		/*
1105 		 * If things are working properly, no database should have a
1106 		 * datfrozenxid or datminmxid that is "in the future".  However, such
1107 		 * cases have been known to arise due to bugs in pg_upgrade.  If we
1108 		 * see any entries that are "in the future", chicken out and don't do
1109 		 * anything.  This ensures we won't truncate clog before those
1110 		 * databases have been scanned and cleaned up.  (We will issue the
1111 		 * "already wrapped" warning if appropriate, though.)
1112 		 */
1113 		if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1114 			MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1115 			bogus = true;
1116 
1117 		if (TransactionIdPrecedes(nextXID, datfrozenxid))
1118 			frozenAlreadyWrapped = true;
1119 		else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1120 		{
1121 			frozenXID = datfrozenxid;
1122 			oldestxid_datoid = HeapTupleGetOid(tuple);
1123 		}
1124 
1125 		if (MultiXactIdPrecedes(datminmxid, minMulti))
1126 		{
1127 			minMulti = datminmxid;
1128 			minmulti_datoid = HeapTupleGetOid(tuple);
1129 		}
1130 	}
1131 
1132 	heap_endscan(scan);
1133 
1134 	heap_close(relation, AccessShareLock);
1135 
1136 	/*
1137 	 * Do not truncate CLOG if we seem to have suffered wraparound already;
1138 	 * the computed minimum XID might be bogus.  This case should now be
1139 	 * impossible due to the defenses in GetNewTransactionId, but we keep the
1140 	 * test anyway.
1141 	 */
1142 	if (frozenAlreadyWrapped)
1143 	{
1144 		ereport(WARNING,
1145 				(errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1146 				 errdetail("You might have already suffered transaction-wraparound data loss.")));
1147 		return;
1148 	}
1149 
1150 	/* chicken out if data is bogus in any other way */
1151 	if (bogus)
1152 		return;
1153 
1154 	/*
1155 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1156 	 */
1157 	TruncateCLOG(frozenXID);
1158 	TruncateCommitTs(frozenXID);
1159 	TruncateMultiXact(minMulti, minmulti_datoid);
1160 
1161 	/*
1162 	 * Update the wrap limit for GetNewTransactionId and creation of new
1163 	 * MultiXactIds.  Note: these functions will also signal the postmaster
1164 	 * for an(other) autovac cycle if needed.   XXX should we avoid possibly
1165 	 * signalling twice?
1166 	 */
1167 	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1168 	SetMultiXactIdLimit(minMulti, minmulti_datoid);
1169 	AdvanceOldestCommitTsXid(frozenXID);
1170 
1171 	LWLockRelease(WrapLimitsVacuumLock);
1172 }
1173 
1174 
1175 /*
1176  *	vacuum_rel() -- vacuum one heap relation
1177  *
1178  *		Doing one heap at a time incurs extra overhead, since we need to
1179  *		check that the heap exists again just before we vacuum it.  The
1180  *		reason that we do this is so that vacuuming can be spread across
1181  *		many small transactions.  Otherwise, two-phase locking would require
1182  *		us to lock the entire database during one pass of the vacuum cleaner.
1183  *
1184  *		At entry and exit, we are not inside a transaction.
1185  */
1186 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1187 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1188 {
1189 	LOCKMODE	lmode;
1190 	Relation	onerel;
1191 	LockRelId	onerelid;
1192 	Oid			toast_relid;
1193 	Oid			save_userid;
1194 	int			save_sec_context;
1195 	int			save_nestlevel;
1196 
1197 	Assert(params != NULL);
1198 
1199 	/* Begin a transaction for vacuuming this relation */
1200 	StartTransactionCommand();
1201 
1202 	/*
1203 	 * Functions in indexes may want a snapshot set.  Also, setting a snapshot
1204 	 * ensures that RecentGlobalXmin is kept truly recent.
1205 	 */
1206 	PushActiveSnapshot(GetTransactionSnapshot());
1207 
1208 	if (!(options & VACOPT_FULL))
1209 	{
1210 		/*
1211 		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1212 		 * other concurrent VACUUMs know that they can ignore this one while
1213 		 * determining their OldestXmin.  (The reason we don't set it during a
1214 		 * full VACUUM is exactly that we may have to run user-defined
1215 		 * functions for functional indexes, and we want to make sure that if
1216 		 * they use the snapshot set above, any tuples it requires can't get
1217 		 * removed from other tables.  An index function that depends on the
1218 		 * contents of other tables is arguably broken, but we won't break it
1219 		 * here by violating transaction semantics.)
1220 		 *
1221 		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1222 		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1223 		 * in an emergency.
1224 		 *
1225 		 * Note: these flags remain set until CommitTransaction or
1226 		 * AbortTransaction.  We don't want to clear them until we reset
1227 		 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1228 		 * which is probably Not Good.
1229 		 */
1230 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1231 		MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1232 		if (params->is_wraparound)
1233 			MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1234 		LWLockRelease(ProcArrayLock);
1235 	}
1236 
1237 	/*
1238 	 * Check for user-requested abort.  Note we want this to be inside a
1239 	 * transaction, so xact.c doesn't issue useless WARNING.
1240 	 */
1241 	CHECK_FOR_INTERRUPTS();
1242 
1243 	/*
1244 	 * Determine the type of lock we want --- hard exclusive lock for a FULL
1245 	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1246 	 * way, we can be sure that no other backend is vacuuming the same table.
1247 	 */
1248 	lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1249 
1250 	/*
1251 	 * Open the relation and get the appropriate lock on it.
1252 	 *
1253 	 * There's a race condition here: the rel may have gone away since the
1254 	 * last time we saw it.  If so, we don't need to vacuum it.
1255 	 *
1256 	 * If we've been asked not to wait for the relation lock, acquire it first
1257 	 * in non-blocking mode, before calling try_relation_open().
1258 	 */
1259 	if (!(options & VACOPT_NOWAIT))
1260 		onerel = try_relation_open(relid, lmode);
1261 	else if (ConditionalLockRelationOid(relid, lmode))
1262 		onerel = try_relation_open(relid, NoLock);
1263 	else
1264 	{
1265 		onerel = NULL;
1266 		if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1267 			ereport(LOG,
1268 					(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1269 				   errmsg("skipping vacuum of \"%s\" --- lock not available",
1270 						  relation->relname)));
1271 	}
1272 
1273 	if (!onerel)
1274 	{
1275 		PopActiveSnapshot();
1276 		CommitTransactionCommand();
1277 		return false;
1278 	}
1279 
1280 	/*
1281 	 * Check permissions.
1282 	 *
1283 	 * We allow the user to vacuum a table if he is superuser, the table
1284 	 * owner, or the database owner (but in the latter case, only if it's not
1285 	 * a shared relation).  pg_class_ownercheck includes the superuser case.
1286 	 *
1287 	 * Note we choose to treat permissions failure as a WARNING and keep
1288 	 * trying to vacuum the rest of the DB --- is this appropriate?
1289 	 */
1290 	if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1291 		  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1292 	{
1293 		if (onerel->rd_rel->relisshared)
1294 			ereport(WARNING,
1295 				  (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1296 						  RelationGetRelationName(onerel))));
1297 		else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1298 			ereport(WARNING,
1299 					(errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1300 							RelationGetRelationName(onerel))));
1301 		else
1302 			ereport(WARNING,
1303 					(errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1304 							RelationGetRelationName(onerel))));
1305 		relation_close(onerel, lmode);
1306 		PopActiveSnapshot();
1307 		CommitTransactionCommand();
1308 		return false;
1309 	}
1310 
1311 	/*
1312 	 * Check that it's a vacuumable relation; we used to do this in
1313 	 * get_rel_oids() but seems safer to check after we've locked the
1314 	 * relation.
1315 	 */
1316 	if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1317 		onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1318 		onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1319 	{
1320 		ereport(WARNING,
1321 				(errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1322 						RelationGetRelationName(onerel))));
1323 		relation_close(onerel, lmode);
1324 		PopActiveSnapshot();
1325 		CommitTransactionCommand();
1326 		return false;
1327 	}
1328 
1329 	/*
1330 	 * Silently ignore tables that are temp tables of other backends ---
1331 	 * trying to vacuum these will lead to great unhappiness, since their
1332 	 * contents are probably not up-to-date on disk.  (We don't throw a
1333 	 * warning here; it would just lead to chatter during a database-wide
1334 	 * VACUUM.)
1335 	 */
1336 	if (RELATION_IS_OTHER_TEMP(onerel))
1337 	{
1338 		relation_close(onerel, lmode);
1339 		PopActiveSnapshot();
1340 		CommitTransactionCommand();
1341 		return false;
1342 	}
1343 
1344 	/*
1345 	 * Get a session-level lock too. This will protect our access to the
1346 	 * relation across multiple transactions, so that we can vacuum the
1347 	 * relation's TOAST table (if any) secure in the knowledge that no one is
1348 	 * deleting the parent relation.
1349 	 *
1350 	 * NOTE: this cannot block, even if someone else is waiting for access,
1351 	 * because the lock manager knows that both lock requests are from the
1352 	 * same process.
1353 	 */
1354 	onerelid = onerel->rd_lockInfo.lockRelId;
1355 	LockRelationIdForSession(&onerelid, lmode);
1356 
1357 	/*
1358 	 * Remember the relation's TOAST relation for later, if the caller asked
1359 	 * us to process it.  In VACUUM FULL, though, the toast table is
1360 	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1361 	 */
1362 	if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1363 		toast_relid = onerel->rd_rel->reltoastrelid;
1364 	else
1365 		toast_relid = InvalidOid;
1366 
1367 	/*
1368 	 * Switch to the table owner's userid, so that any index functions are run
1369 	 * as that user.  Also lock down security-restricted operations and
1370 	 * arrange to make GUC variable changes local to this command. (This is
1371 	 * unnecessary, but harmless, for lazy VACUUM.)
1372 	 */
1373 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
1374 	SetUserIdAndSecContext(onerel->rd_rel->relowner,
1375 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
1376 	save_nestlevel = NewGUCNestLevel();
1377 
1378 	/*
1379 	 * Do the actual work --- either FULL or "lazy" vacuum
1380 	 */
1381 	if (options & VACOPT_FULL)
1382 	{
1383 		/* close relation before vacuuming, but hold lock until commit */
1384 		relation_close(onerel, NoLock);
1385 		onerel = NULL;
1386 
1387 		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1388 		cluster_rel(relid, InvalidOid, false,
1389 					(options & VACOPT_VERBOSE) != 0);
1390 	}
1391 	else
1392 		lazy_vacuum_rel(onerel, options, params, vac_strategy);
1393 
1394 	/* Roll back any GUC changes executed by index functions */
1395 	AtEOXact_GUC(false, save_nestlevel);
1396 
1397 	/* Restore userid and security context */
1398 	SetUserIdAndSecContext(save_userid, save_sec_context);
1399 
1400 	/* all done with this class, but hold lock until commit */
1401 	if (onerel)
1402 		relation_close(onerel, NoLock);
1403 
1404 	/*
1405 	 * Complete the transaction and free all temporary memory used.
1406 	 */
1407 	PopActiveSnapshot();
1408 	CommitTransactionCommand();
1409 
1410 	/*
1411 	 * If the relation has a secondary toast rel, vacuum that too while we
1412 	 * still hold the session lock on the master table.  Note however that
1413 	 * "analyze" will not get done on the toast table.  This is good, because
1414 	 * the toaster always uses hardcoded index access and statistics are
1415 	 * totally unimportant for toast relations.
1416 	 */
1417 	if (toast_relid != InvalidOid)
1418 		vacuum_rel(toast_relid, relation, options, params);
1419 
1420 	/*
1421 	 * Now release the session-level lock on the master table.
1422 	 */
1423 	UnlockRelationIdForSession(&onerelid, lmode);
1424 
1425 	/* Report that we really did it. */
1426 	return true;
1427 }
1428 
1429 
1430 /*
1431  * Open all the vacuumable indexes of the given relation, obtaining the
1432  * specified kind of lock on each.  Return an array of Relation pointers for
1433  * the indexes into *Irel, and the number of indexes into *nindexes.
1434  *
1435  * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1436  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1437  * execution, and what we have is too corrupt to be processable.  We will
1438  * vacuum even if the index isn't indisvalid; this is important because in a
1439  * unique index, uniqueness checks will be performed anyway and had better not
1440  * hit dangling index pointers.
1441  */
1442 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1443 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1444 				 int *nindexes, Relation **Irel)
1445 {
1446 	List	   *indexoidlist;
1447 	ListCell   *indexoidscan;
1448 	int			i;
1449 
1450 	Assert(lockmode != NoLock);
1451 
1452 	indexoidlist = RelationGetIndexList(relation);
1453 
1454 	/* allocate enough memory for all indexes */
1455 	i = list_length(indexoidlist);
1456 
1457 	if (i > 0)
1458 		*Irel = (Relation *) palloc(i * sizeof(Relation));
1459 	else
1460 		*Irel = NULL;
1461 
1462 	/* collect just the ready indexes */
1463 	i = 0;
1464 	foreach(indexoidscan, indexoidlist)
1465 	{
1466 		Oid			indexoid = lfirst_oid(indexoidscan);
1467 		Relation	indrel;
1468 
1469 		indrel = index_open(indexoid, lockmode);
1470 		if (IndexIsReady(indrel->rd_index))
1471 			(*Irel)[i++] = indrel;
1472 		else
1473 			index_close(indrel, lockmode);
1474 	}
1475 
1476 	*nindexes = i;
1477 
1478 	list_free(indexoidlist);
1479 }
1480 
1481 /*
1482  * Release the resources acquired by vac_open_indexes.  Optionally release
1483  * the locks (say NoLock to keep 'em).
1484  */
1485 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1486 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1487 {
1488 	if (Irel == NULL)
1489 		return;
1490 
1491 	while (nindexes--)
1492 	{
1493 		Relation	ind = Irel[nindexes];
1494 
1495 		index_close(ind, lockmode);
1496 	}
1497 	pfree(Irel);
1498 }
1499 
1500 /*
1501  * vacuum_delay_point --- check for interrupts and cost-based delay.
1502  *
1503  * This should be called in each major loop of VACUUM processing,
1504  * typically once per page processed.
1505  */
1506 void
vacuum_delay_point(void)1507 vacuum_delay_point(void)
1508 {
1509 	/* Always check for interrupts */
1510 	CHECK_FOR_INTERRUPTS();
1511 
1512 	/* Nap if appropriate */
1513 	if (VacuumCostActive && !InterruptPending &&
1514 		VacuumCostBalance >= VacuumCostLimit)
1515 	{
1516 		int			msec;
1517 
1518 		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1519 		if (msec > VacuumCostDelay * 4)
1520 			msec = VacuumCostDelay * 4;
1521 
1522 		pg_usleep(msec * 1000L);
1523 
1524 		VacuumCostBalance = 0;
1525 
1526 		/* update balance values for workers */
1527 		AutoVacuumUpdateDelay();
1528 
1529 		/* Might have gotten an interrupt while sleeping */
1530 		CHECK_FOR_INTERRUPTS();
1531 	}
1532 }
1533