1 /*-------------------------------------------------------------------------
2 *
3 * vacuum.c
4 * The postgres vacuum cleaner.
5 *
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9 * in cluster.c.
10 *
11 *
12 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
14 *
15 *
16 * IDENTIFICATION
17 * src/backend/commands/vacuum.c
18 *
19 *-------------------------------------------------------------------------
20 */
21 #include "postgres.h"
22
23 #include <math.h>
24
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_inherits_fn.h"
36 #include "catalog/pg_namespace.h"
37 #include "commands/cluster.h"
38 #include "commands/vacuum.h"
39 #include "miscadmin.h"
40 #include "pgstat.h"
41 #include "postmaster/autovacuum.h"
42 #include "storage/bufmgr.h"
43 #include "storage/lmgr.h"
44 #include "storage/proc.h"
45 #include "storage/procarray.h"
46 #include "utils/acl.h"
47 #include "utils/fmgroids.h"
48 #include "utils/guc.h"
49 #include "utils/memutils.h"
50 #include "utils/snapmgr.h"
51 #include "utils/syscache.h"
52 #include "utils/tqual.h"
53
54
55 /*
56 * GUC parameters
57 */
58 int vacuum_freeze_min_age;
59 int vacuum_freeze_table_age;
60 int vacuum_multixact_freeze_min_age;
61 int vacuum_multixact_freeze_table_age;
62
63
64 /* A few variables that don't seem worth passing around as parameters */
65 static MemoryContext vac_context = NULL;
66 static BufferAccessStrategy vac_strategy;
67
68
69 /* non-export function prototypes */
70 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
71 static void vac_truncate_clog(TransactionId frozenXID,
72 MultiXactId minMulti,
73 TransactionId lastSaneFrozenXid,
74 MultiXactId lastSaneMinMulti);
75 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
76 VacuumParams *params);
77
78 /*
79 * Primary entry point for manual VACUUM and ANALYZE commands
80 *
81 * This is mainly a preparation wrapper for the real operations that will
82 * happen in vacuum().
83 */
84 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)85 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
86 {
87 VacuumParams params;
88
89 /* sanity checks on options */
90 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
91 Assert((vacstmt->options & VACOPT_VACUUM) ||
92 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
93 Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
94 Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
95
96 /*
97 * All freeze ages are zero if the FREEZE option is given; otherwise pass
98 * them as -1 which means to use the default values.
99 */
100 if (vacstmt->options & VACOPT_FREEZE)
101 {
102 params.freeze_min_age = 0;
103 params.freeze_table_age = 0;
104 params.multixact_freeze_min_age = 0;
105 params.multixact_freeze_table_age = 0;
106 }
107 else
108 {
109 params.freeze_min_age = -1;
110 params.freeze_table_age = -1;
111 params.multixact_freeze_min_age = -1;
112 params.multixact_freeze_table_age = -1;
113 }
114
115 /* user-invoked vacuum is never "for wraparound" */
116 params.is_wraparound = false;
117
118 /* user-invoked vacuum never uses this parameter */
119 params.log_min_duration = -1;
120
121 /* Now go through the common routine */
122 vacuum(vacstmt->options, vacstmt->relation, InvalidOid, ¶ms,
123 vacstmt->va_cols, NULL, isTopLevel);
124 }
125
126 /*
127 * Primary entry point for VACUUM and ANALYZE commands.
128 *
129 * options is a bitmask of VacuumOption flags, indicating what to do.
130 *
131 * relid, if not InvalidOid, indicates the relation to process; otherwise,
132 * if a RangeVar is supplied, that's what to process; otherwise, we process
133 * all relevant tables in the database. (If both relid and a RangeVar are
134 * supplied, the relid is what is processed, but we use the RangeVar's name
135 * to report any open/lock failure.)
136 *
137 * params contains a set of parameters that can be used to customize the
138 * behavior.
139 *
140 * va_cols is a list of columns to analyze, or NIL to process them all.
141 *
142 * bstrategy is normally given as NULL, but in autovacuum it can be passed
143 * in to use the same buffer strategy object across multiple vacuum() calls.
144 *
145 * isTopLevel should be passed down from ProcessUtility.
146 *
147 * It is the caller's responsibility that all parameters are allocated in a
148 * memory context that will not disappear at transaction commit.
149 */
150 void
vacuum(int options,RangeVar * relation,Oid relid,VacuumParams * params,List * va_cols,BufferAccessStrategy bstrategy,bool isTopLevel)151 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
152 List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
153 {
154 const char *stmttype;
155 volatile bool in_outer_xact,
156 use_own_xacts;
157 List *relations;
158 static bool in_vacuum = false;
159
160 Assert(params != NULL);
161
162 stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
163
164 /*
165 * We cannot run VACUUM inside a user transaction block; if we were inside
166 * a transaction, then our commit- and start-transaction-command calls
167 * would not have the intended effect! There are numerous other subtle
168 * dependencies on this, too.
169 *
170 * ANALYZE (without VACUUM) can run either way.
171 */
172 if (options & VACOPT_VACUUM)
173 {
174 PreventTransactionChain(isTopLevel, stmttype);
175 in_outer_xact = false;
176 }
177 else
178 in_outer_xact = IsInTransactionChain(isTopLevel);
179
180 /*
181 * Due to static variables vac_context, anl_context and vac_strategy,
182 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
183 * calls a hostile index expression that itself calls ANALYZE.
184 */
185 if (in_vacuum)
186 ereport(ERROR,
187 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
188 errmsg("%s cannot be executed from VACUUM or ANALYZE",
189 stmttype)));
190
191 /*
192 * Sanity check DISABLE_PAGE_SKIPPING option.
193 */
194 if ((options & VACOPT_FULL) != 0 &&
195 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
196 ereport(ERROR,
197 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
198 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
199
200 /*
201 * Send info about dead objects to the statistics collector, unless we are
202 * in autovacuum --- autovacuum.c does this for itself.
203 */
204 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
205 pgstat_vacuum_stat();
206
207 /*
208 * Create special memory context for cross-transaction storage.
209 *
210 * Since it is a child of PortalContext, it will go away eventually even
211 * if we suffer an error; there's no need for special abort cleanup logic.
212 */
213 vac_context = AllocSetContextCreate(PortalContext,
214 "Vacuum",
215 ALLOCSET_DEFAULT_SIZES);
216
217 /*
218 * If caller didn't give us a buffer strategy object, make one in the
219 * cross-transaction memory context.
220 */
221 if (bstrategy == NULL)
222 {
223 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
224
225 bstrategy = GetAccessStrategy(BAS_VACUUM);
226 MemoryContextSwitchTo(old_context);
227 }
228 vac_strategy = bstrategy;
229
230 /*
231 * Build list of relation OID(s) to process, putting it in vac_context for
232 * safekeeping.
233 */
234 relations = get_rel_oids(relid, relation);
235
236 /*
237 * Decide whether we need to start/commit our own transactions.
238 *
239 * For VACUUM (with or without ANALYZE): always do so, so that we can
240 * release locks as soon as possible. (We could possibly use the outer
241 * transaction for a one-table VACUUM, but handling TOAST tables would be
242 * problematic.)
243 *
244 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
245 * start/commit our own transactions. Also, there's no need to do so if
246 * only processing one relation. For multiple relations when not within a
247 * transaction block, and also in an autovacuum worker, use own
248 * transactions so we can release locks sooner.
249 */
250 if (options & VACOPT_VACUUM)
251 use_own_xacts = true;
252 else
253 {
254 Assert(options & VACOPT_ANALYZE);
255 if (IsAutoVacuumWorkerProcess())
256 use_own_xacts = true;
257 else if (in_outer_xact)
258 use_own_xacts = false;
259 else if (list_length(relations) > 1)
260 use_own_xacts = true;
261 else
262 use_own_xacts = false;
263 }
264
265 /*
266 * vacuum_rel expects to be entered with no transaction active; it will
267 * start and commit its own transaction. But we are called by an SQL
268 * command, and so we are executing inside a transaction already. We
269 * commit the transaction started in PostgresMain() here, and start
270 * another one before exiting to match the commit waiting for us back in
271 * PostgresMain().
272 */
273 if (use_own_xacts)
274 {
275 Assert(!in_outer_xact);
276
277 /* ActiveSnapshot is not set by autovacuum */
278 if (ActiveSnapshotSet())
279 PopActiveSnapshot();
280
281 /* matches the StartTransaction in PostgresMain() */
282 CommitTransactionCommand();
283 }
284
285 /* Turn vacuum cost accounting on or off */
286 PG_TRY();
287 {
288 ListCell *cur;
289
290 in_vacuum = true;
291 VacuumCostActive = (VacuumCostDelay > 0);
292 VacuumCostBalance = 0;
293 VacuumPageHit = 0;
294 VacuumPageMiss = 0;
295 VacuumPageDirty = 0;
296
297 /*
298 * Loop to process each selected relation.
299 */
300 foreach(cur, relations)
301 {
302 Oid relid = lfirst_oid(cur);
303
304 if (options & VACOPT_VACUUM)
305 {
306 if (!vacuum_rel(relid, relation, options, params))
307 continue;
308 }
309
310 if (options & VACOPT_ANALYZE)
311 {
312 /*
313 * If using separate xacts, start one for analyze. Otherwise,
314 * we can use the outer transaction.
315 */
316 if (use_own_xacts)
317 {
318 StartTransactionCommand();
319 /* functions in indexes may want a snapshot set */
320 PushActiveSnapshot(GetTransactionSnapshot());
321 }
322
323 analyze_rel(relid, relation, options, params,
324 va_cols, in_outer_xact, vac_strategy);
325
326 if (use_own_xacts)
327 {
328 PopActiveSnapshot();
329 CommitTransactionCommand();
330 }
331 }
332 }
333 }
334 PG_CATCH();
335 {
336 in_vacuum = false;
337 VacuumCostActive = false;
338 PG_RE_THROW();
339 }
340 PG_END_TRY();
341
342 in_vacuum = false;
343 VacuumCostActive = false;
344
345 /*
346 * Finish up processing.
347 */
348 if (use_own_xacts)
349 {
350 /* here, we are not in a transaction */
351
352 /*
353 * This matches the CommitTransaction waiting for us in
354 * PostgresMain().
355 */
356 StartTransactionCommand();
357 }
358
359 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
360 {
361 /*
362 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
363 * (autovacuum.c does this for itself.)
364 */
365 vac_update_datfrozenxid();
366 }
367
368 /*
369 * Clean up working storage --- note we must do this after
370 * StartTransactionCommand, else we might be trying to delete the active
371 * context!
372 */
373 MemoryContextDelete(vac_context);
374 vac_context = NULL;
375 }
376
377 /*
378 * Build a list of Oids for each relation to be processed
379 *
380 * The list is built in vac_context so that it will survive across our
381 * per-relation transactions.
382 */
383 static List *
get_rel_oids(Oid relid,const RangeVar * vacrel)384 get_rel_oids(Oid relid, const RangeVar *vacrel)
385 {
386 List *oid_list = NIL;
387 MemoryContext oldcontext;
388
389 /* OID supplied by VACUUM's caller? */
390 if (OidIsValid(relid))
391 {
392 oldcontext = MemoryContextSwitchTo(vac_context);
393 oid_list = lappend_oid(oid_list, relid);
394 MemoryContextSwitchTo(oldcontext);
395 }
396 else if (vacrel)
397 {
398 /* Process a specific relation, and possibly partitions thereof */
399 Oid relid;
400 HeapTuple tuple;
401 Form_pg_class classForm;
402 bool include_parts;
403
404 /*
405 * We transiently take AccessShareLock to protect the syscache lookup
406 * below, as well as find_all_inheritors's expectation that the caller
407 * holds some lock on the starting relation.
408 */
409 relid = RangeVarGetRelid(vacrel, AccessShareLock, false);
410
411 /*
412 * To check whether the relation is a partitioned table, fetch its
413 * syscache entry.
414 */
415 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
416 if (!HeapTupleIsValid(tuple))
417 elog(ERROR, "cache lookup failed for relation %u", relid);
418 classForm = (Form_pg_class) GETSTRUCT(tuple);
419 include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
420 ReleaseSysCache(tuple);
421
422 /*
423 * Make relation list entries for this rel and its partitions, if any.
424 * Note that the list returned by find_all_inheritors() includes the
425 * passed-in OID at its head. There's no point in taking locks on the
426 * individual partitions yet, and doing so would just add unnecessary
427 * deadlock risk.
428 */
429 oldcontext = MemoryContextSwitchTo(vac_context);
430 if (include_parts)
431 oid_list = list_concat(oid_list,
432 find_all_inheritors(relid, NoLock, NULL));
433 else
434 oid_list = lappend_oid(oid_list, relid);
435 MemoryContextSwitchTo(oldcontext);
436
437 /*
438 * Release lock again. This means that by the time we actually try to
439 * process the table, it might be gone or renamed. In the former case
440 * we'll silently ignore it; in the latter case we'll process it
441 * anyway, but we must beware that the RangeVar doesn't necessarily
442 * identify it anymore. This isn't ideal, perhaps, but there's little
443 * practical alternative, since we're typically going to commit this
444 * transaction and begin a new one between now and then. Moreover,
445 * holding locks on multiple relations would create significant risk
446 * of deadlock.
447 */
448 UnlockRelationOid(relid, AccessShareLock);
449 }
450 else
451 {
452 /*
453 * Process all plain relations and materialized views listed in
454 * pg_class
455 */
456 Relation pgclass;
457 HeapScanDesc scan;
458 HeapTuple tuple;
459
460 pgclass = heap_open(RelationRelationId, AccessShareLock);
461
462 scan = heap_beginscan_catalog(pgclass, 0, NULL);
463
464 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
465 {
466 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
467
468 /*
469 * We include partitioned tables here; depending on which
470 * operation is to be performed, caller will decide whether to
471 * process or ignore them.
472 */
473 if (classForm->relkind != RELKIND_RELATION &&
474 classForm->relkind != RELKIND_MATVIEW &&
475 classForm->relkind != RELKIND_PARTITIONED_TABLE)
476 continue;
477
478 /* Make a relation list entry for this rel */
479 oldcontext = MemoryContextSwitchTo(vac_context);
480 oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
481 MemoryContextSwitchTo(oldcontext);
482 }
483
484 heap_endscan(scan);
485 heap_close(pgclass, AccessShareLock);
486 }
487
488 return oid_list;
489 }
490
491 /*
492 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
493 *
494 * The output parameters are:
495 * - oldestXmin is the cutoff value used to distinguish whether tuples are
496 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
497 * - freezeLimit is the Xid below which all Xids are replaced by
498 * FrozenTransactionId during vacuum.
499 * - xidFullScanLimit (computed from table_freeze_age parameter)
500 * represents a minimum Xid value; a table whose relfrozenxid is older than
501 * this will have a full-table vacuum applied to it, to freeze tuples across
502 * the whole table. Vacuuming a table younger than this value can use a
503 * partial scan.
504 * - multiXactCutoff is the value below which all MultiXactIds are removed from
505 * Xmax.
506 * - mxactFullScanLimit is a value against which a table's relminmxid value is
507 * compared to produce a full-table vacuum, as with xidFullScanLimit.
508 *
509 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
510 * not interested.
511 */
512 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)513 vacuum_set_xid_limits(Relation rel,
514 int freeze_min_age,
515 int freeze_table_age,
516 int multixact_freeze_min_age,
517 int multixact_freeze_table_age,
518 TransactionId *oldestXmin,
519 TransactionId *freezeLimit,
520 TransactionId *xidFullScanLimit,
521 MultiXactId *multiXactCutoff,
522 MultiXactId *mxactFullScanLimit)
523 {
524 int freezemin;
525 int mxid_freezemin;
526 int effective_multixact_freeze_max_age;
527 TransactionId limit;
528 TransactionId safeLimit;
529 MultiXactId oldestMxact;
530 MultiXactId mxactLimit;
531 MultiXactId safeMxactLimit;
532
533 /*
534 * We can always ignore processes running lazy vacuum. This is because we
535 * use these values only for deciding which tuples we must keep in the
536 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
537 * ignore it. In theory it could be problematic to ignore lazy vacuums in
538 * a full vacuum, but keep in mind that only one vacuum process can be
539 * working on a particular table at any time, and that each vacuum is
540 * always an independent transaction.
541 */
542 *oldestXmin =
543 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, PROCARRAY_FLAGS_VACUUM), rel);
544
545 Assert(TransactionIdIsNormal(*oldestXmin));
546
547 /*
548 * Determine the minimum freeze age to use: as specified by the caller, or
549 * vacuum_freeze_min_age, but in any case not more than half
550 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
551 * wraparound won't occur too frequently.
552 */
553 freezemin = freeze_min_age;
554 if (freezemin < 0)
555 freezemin = vacuum_freeze_min_age;
556 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
557 Assert(freezemin >= 0);
558
559 /*
560 * Compute the cutoff XID, being careful not to generate a "permanent" XID
561 */
562 limit = *oldestXmin - freezemin;
563 if (!TransactionIdIsNormal(limit))
564 limit = FirstNormalTransactionId;
565
566 /*
567 * If oldestXmin is very far back (in practice, more than
568 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
569 * freeze age of zero.
570 */
571 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
572 if (!TransactionIdIsNormal(safeLimit))
573 safeLimit = FirstNormalTransactionId;
574
575 if (TransactionIdPrecedes(limit, safeLimit))
576 {
577 ereport(WARNING,
578 (errmsg("oldest xmin is far in the past"),
579 errhint("Close open transactions soon to avoid wraparound problems.")));
580 limit = *oldestXmin;
581 }
582
583 *freezeLimit = limit;
584
585 /*
586 * Compute the multixact age for which freezing is urgent. This is
587 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
588 * short of multixact member space.
589 */
590 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
591
592 /*
593 * Determine the minimum multixact freeze age to use: as specified by
594 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
595 * than half effective_multixact_freeze_max_age, so that autovacuums to
596 * prevent MultiXact wraparound won't occur too frequently.
597 */
598 mxid_freezemin = multixact_freeze_min_age;
599 if (mxid_freezemin < 0)
600 mxid_freezemin = vacuum_multixact_freeze_min_age;
601 mxid_freezemin = Min(mxid_freezemin,
602 effective_multixact_freeze_max_age / 2);
603 Assert(mxid_freezemin >= 0);
604
605 /* compute the cutoff multi, being careful to generate a valid value */
606 oldestMxact = GetOldestMultiXactId();
607 mxactLimit = oldestMxact - mxid_freezemin;
608 if (mxactLimit < FirstMultiXactId)
609 mxactLimit = FirstMultiXactId;
610
611 safeMxactLimit =
612 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
613 if (safeMxactLimit < FirstMultiXactId)
614 safeMxactLimit = FirstMultiXactId;
615
616 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
617 {
618 ereport(WARNING,
619 (errmsg("oldest multixact is far in the past"),
620 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
621 /* Use the safe limit, unless an older mxact is still running */
622 if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
623 mxactLimit = oldestMxact;
624 else
625 mxactLimit = safeMxactLimit;
626 }
627
628 *multiXactCutoff = mxactLimit;
629
630 if (xidFullScanLimit != NULL)
631 {
632 int freezetable;
633
634 Assert(mxactFullScanLimit != NULL);
635
636 /*
637 * Determine the table freeze age to use: as specified by the caller,
638 * or vacuum_freeze_table_age, but in any case not more than
639 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
640 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
641 * before anti-wraparound autovacuum is launched.
642 */
643 freezetable = freeze_table_age;
644 if (freezetable < 0)
645 freezetable = vacuum_freeze_table_age;
646 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
647 Assert(freezetable >= 0);
648
649 /*
650 * Compute XID limit causing a full-table vacuum, being careful not to
651 * generate a "permanent" XID.
652 */
653 limit = ReadNewTransactionId() - freezetable;
654 if (!TransactionIdIsNormal(limit))
655 limit = FirstNormalTransactionId;
656
657 *xidFullScanLimit = limit;
658
659 /*
660 * Similar to the above, determine the table freeze age to use for
661 * multixacts: as specified by the caller, or
662 * vacuum_multixact_freeze_table_age, but in any case not more than
663 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
664 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
665 * freeze multixacts before anti-wraparound autovacuum is launched.
666 */
667 freezetable = multixact_freeze_table_age;
668 if (freezetable < 0)
669 freezetable = vacuum_multixact_freeze_table_age;
670 freezetable = Min(freezetable,
671 effective_multixact_freeze_max_age * 0.95);
672 Assert(freezetable >= 0);
673
674 /*
675 * Compute MultiXact limit causing a full-table vacuum, being careful
676 * to generate a valid MultiXact value.
677 */
678 mxactLimit = ReadNextMultiXactId() - freezetable;
679 if (mxactLimit < FirstMultiXactId)
680 mxactLimit = FirstMultiXactId;
681
682 *mxactFullScanLimit = mxactLimit;
683 }
684 else
685 {
686 Assert(mxactFullScanLimit == NULL);
687 }
688 }
689
690 /*
691 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
692 *
693 * If we scanned the whole relation then we should just use the count of
694 * live tuples seen; but if we did not, we should not blindly extrapolate
695 * from that number, since VACUUM may have scanned a quite nonrandom
696 * subset of the table. When we have only partial information, we take
697 * the old value of pg_class.reltuples as a measurement of the
698 * tuple density in the unscanned pages.
699 *
700 * The is_analyze argument is historical.
701 */
702 double
vac_estimate_reltuples(Relation relation,bool is_analyze,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)703 vac_estimate_reltuples(Relation relation, bool is_analyze,
704 BlockNumber total_pages,
705 BlockNumber scanned_pages,
706 double scanned_tuples)
707 {
708 BlockNumber old_rel_pages = relation->rd_rel->relpages;
709 double old_rel_tuples = relation->rd_rel->reltuples;
710 double old_density;
711 double unscanned_pages;
712 double total_tuples;
713
714 /* If we did scan the whole table, just use the count as-is */
715 if (scanned_pages >= total_pages)
716 return scanned_tuples;
717
718 /*
719 * If scanned_pages is zero but total_pages isn't, keep the existing value
720 * of reltuples. (Note: callers should avoid updating the pg_class
721 * statistics in this situation, since no new information has been
722 * provided.)
723 */
724 if (scanned_pages == 0)
725 return old_rel_tuples;
726
727 /*
728 * If old value of relpages is zero, old density is indeterminate; we
729 * can't do much except scale up scanned_tuples to match total_pages.
730 */
731 if (old_rel_pages == 0)
732 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
733
734 /*
735 * Okay, we've covered the corner cases. The normal calculation is to
736 * convert the old measurement to a density (tuples per page), then
737 * estimate the number of tuples in the unscanned pages using that figure,
738 * and finally add on the number of tuples in the scanned pages.
739 */
740 old_density = old_rel_tuples / old_rel_pages;
741 unscanned_pages = (double) total_pages - (double) scanned_pages;
742 total_tuples = old_density * unscanned_pages + scanned_tuples;
743 return floor(total_tuples + 0.5);
744 }
745
746
747 /*
748 * vac_update_relstats() -- update statistics for one relation
749 *
750 * Update the whole-relation statistics that are kept in its pg_class
751 * row. There are additional stats that will be updated if we are
752 * doing ANALYZE, but we always update these stats. This routine works
753 * for both index and heap relation entries in pg_class.
754 *
755 * We violate transaction semantics here by overwriting the rel's
756 * existing pg_class tuple with the new values. This is reasonably
757 * safe as long as we're sure that the new values are correct whether or
758 * not this transaction commits. The reason for doing this is that if
759 * we updated these tuples in the usual way, vacuuming pg_class itself
760 * wouldn't work very well --- by the time we got done with a vacuum
761 * cycle, most of the tuples in pg_class would've been obsoleted. Of
762 * course, this only works for fixed-size not-null columns, but these are.
763 *
764 * Another reason for doing it this way is that when we are in a lazy
765 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
766 * Somebody vacuuming pg_class might think they could delete a tuple
767 * marked with xmin = our xid.
768 *
769 * In addition to fundamentally nontransactional statistics such as
770 * relpages and relallvisible, we try to maintain certain lazily-updated
771 * DDL flags such as relhasindex, by clearing them if no longer correct.
772 * It's safe to do this in VACUUM, which can't run in parallel with
773 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
774 * However, it's *not* safe to do it in an ANALYZE that's within an
775 * outer transaction, because for example the current transaction might
776 * have dropped the last index; then we'd think relhasindex should be
777 * cleared, but if the transaction later rolls back this would be wrong.
778 * So we refrain from updating the DDL flags if we're inside an outer
779 * transaction. This is OK since postponing the flag maintenance is
780 * always allowable.
781 *
782 * This routine is shared by VACUUM and ANALYZE.
783 */
784 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)785 vac_update_relstats(Relation relation,
786 BlockNumber num_pages, double num_tuples,
787 BlockNumber num_all_visible_pages,
788 bool hasindex, TransactionId frozenxid,
789 MultiXactId minmulti,
790 bool in_outer_xact)
791 {
792 Oid relid = RelationGetRelid(relation);
793 Relation rd;
794 HeapTuple ctup;
795 Form_pg_class pgcform;
796 bool dirty;
797
798 rd = heap_open(RelationRelationId, RowExclusiveLock);
799
800 /* Fetch a copy of the tuple to scribble on */
801 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
802 if (!HeapTupleIsValid(ctup))
803 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
804 relid);
805 pgcform = (Form_pg_class) GETSTRUCT(ctup);
806
807 /* Apply statistical updates, if any, to copied tuple */
808
809 dirty = false;
810 if (pgcform->relpages != (int32) num_pages)
811 {
812 pgcform->relpages = (int32) num_pages;
813 dirty = true;
814 }
815 if (pgcform->reltuples != (float4) num_tuples)
816 {
817 pgcform->reltuples = (float4) num_tuples;
818 dirty = true;
819 }
820 if (pgcform->relallvisible != (int32) num_all_visible_pages)
821 {
822 pgcform->relallvisible = (int32) num_all_visible_pages;
823 dirty = true;
824 }
825
826 /* Apply DDL updates, but not inside an outer transaction (see above) */
827
828 if (!in_outer_xact)
829 {
830 /*
831 * If we didn't find any indexes, reset relhasindex.
832 */
833 if (pgcform->relhasindex && !hasindex)
834 {
835 pgcform->relhasindex = false;
836 dirty = true;
837 }
838
839 /*
840 * If we have discovered that there are no indexes, then there's no
841 * primary key either. This could be done more thoroughly...
842 */
843 if (pgcform->relhaspkey && !hasindex)
844 {
845 pgcform->relhaspkey = false;
846 dirty = true;
847 }
848
849 /* We also clear relhasrules and relhastriggers if needed */
850 if (pgcform->relhasrules && relation->rd_rules == NULL)
851 {
852 pgcform->relhasrules = false;
853 dirty = true;
854 }
855 if (pgcform->relhastriggers && relation->trigdesc == NULL)
856 {
857 pgcform->relhastriggers = false;
858 dirty = true;
859 }
860 }
861
862 /*
863 * Update relfrozenxid, unless caller passed InvalidTransactionId
864 * indicating it has no new data.
865 *
866 * Ordinarily, we don't let relfrozenxid go backwards: if things are
867 * working correctly, the only way the new frozenxid could be older would
868 * be if a previous VACUUM was done with a tighter freeze_min_age, in
869 * which case we don't want to forget the work it already did. However,
870 * if the stored relfrozenxid is "in the future", then it must be corrupt
871 * and it seems best to overwrite it with the cutoff we used this time.
872 * This should match vac_update_datfrozenxid() concerning what we consider
873 * to be "in the future".
874 */
875 if (TransactionIdIsNormal(frozenxid) &&
876 pgcform->relfrozenxid != frozenxid &&
877 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
878 TransactionIdPrecedes(ReadNewTransactionId(),
879 pgcform->relfrozenxid)))
880 {
881 pgcform->relfrozenxid = frozenxid;
882 dirty = true;
883 }
884
885 /* Similarly for relminmxid */
886 if (MultiXactIdIsValid(minmulti) &&
887 pgcform->relminmxid != minmulti &&
888 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
889 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
890 {
891 pgcform->relminmxid = minmulti;
892 dirty = true;
893 }
894
895 /* If anything changed, write out the tuple. */
896 if (dirty)
897 heap_inplace_update(rd, ctup);
898
899 heap_close(rd, RowExclusiveLock);
900 }
901
902
903 /*
904 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
905 *
906 * Update pg_database's datfrozenxid entry for our database to be the
907 * minimum of the pg_class.relfrozenxid values.
908 *
909 * Similarly, update our datminmxid to be the minimum of the
910 * pg_class.relminmxid values.
911 *
912 * If we are able to advance either pg_database value, also try to
913 * truncate pg_xact and pg_multixact.
914 *
915 * We violate transaction semantics here by overwriting the database's
916 * existing pg_database tuple with the new values. This is reasonably
917 * safe since the new values are correct whether or not this transaction
918 * commits. As with vac_update_relstats, this avoids leaving dead tuples
919 * behind after a VACUUM.
920 */
921 void
vac_update_datfrozenxid(void)922 vac_update_datfrozenxid(void)
923 {
924 HeapTuple tuple;
925 Form_pg_database dbform;
926 Relation relation;
927 SysScanDesc scan;
928 HeapTuple classTup;
929 TransactionId newFrozenXid;
930 MultiXactId newMinMulti;
931 TransactionId lastSaneFrozenXid;
932 MultiXactId lastSaneMinMulti;
933 bool bogus = false;
934 bool dirty = false;
935
936 /*
937 * Restrict this task to one backend per database. This avoids race
938 * conditions that would move datfrozenxid or datminmxid backward. It
939 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
940 * datfrozenxid passed to an earlier vac_truncate_clog() call.
941 */
942 LockDatabaseFrozenIds(ExclusiveLock);
943
944 /*
945 * Initialize the "min" calculation with GetOldestXmin, which is a
946 * reasonable approximation to the minimum relfrozenxid for not-yet-
947 * committed pg_class entries for new tables; see AddNewRelationTuple().
948 * So we cannot produce a wrong minimum by starting with this.
949 */
950 newFrozenXid = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
951
952 /*
953 * Similarly, initialize the MultiXact "min" with the value that would be
954 * used on pg_class for new tables. See AddNewRelationTuple().
955 */
956 newMinMulti = GetOldestMultiXactId();
957
958 /*
959 * Identify the latest relfrozenxid and relminmxid values that we could
960 * validly see during the scan. These are conservative values, but it's
961 * not really worth trying to be more exact.
962 */
963 lastSaneFrozenXid = ReadNewTransactionId();
964 lastSaneMinMulti = ReadNextMultiXactId();
965
966 /*
967 * We must seqscan pg_class to find the minimum Xid, because there is no
968 * index that can help us here.
969 */
970 relation = heap_open(RelationRelationId, AccessShareLock);
971
972 scan = systable_beginscan(relation, InvalidOid, false,
973 NULL, 0, NULL);
974
975 while ((classTup = systable_getnext(scan)) != NULL)
976 {
977 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
978
979 /*
980 * Only consider relations able to hold unfrozen XIDs (anything else
981 * should have InvalidTransactionId in relfrozenxid anyway.)
982 */
983 if (classForm->relkind != RELKIND_RELATION &&
984 classForm->relkind != RELKIND_MATVIEW &&
985 classForm->relkind != RELKIND_TOASTVALUE)
986 continue;
987
988 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
989 Assert(MultiXactIdIsValid(classForm->relminmxid));
990
991 /*
992 * If things are working properly, no relation should have a
993 * relfrozenxid or relminmxid that is "in the future". However, such
994 * cases have been known to arise due to bugs in pg_upgrade. If we
995 * see any entries that are "in the future", chicken out and don't do
996 * anything. This ensures we won't truncate clog before those
997 * relations have been scanned and cleaned up.
998 */
999 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
1000 MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1001 {
1002 bogus = true;
1003 break;
1004 }
1005
1006 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1007 newFrozenXid = classForm->relfrozenxid;
1008
1009 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1010 newMinMulti = classForm->relminmxid;
1011 }
1012
1013 /* we're done with pg_class */
1014 systable_endscan(scan);
1015 heap_close(relation, AccessShareLock);
1016
1017 /* chicken out if bogus data found */
1018 if (bogus)
1019 return;
1020
1021 Assert(TransactionIdIsNormal(newFrozenXid));
1022 Assert(MultiXactIdIsValid(newMinMulti));
1023
1024 /* Now fetch the pg_database tuple we need to update. */
1025 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
1026
1027 /* Fetch a copy of the tuple to scribble on */
1028 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
1029 if (!HeapTupleIsValid(tuple))
1030 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1031 dbform = (Form_pg_database) GETSTRUCT(tuple);
1032
1033 /*
1034 * As in vac_update_relstats(), we ordinarily don't want to let
1035 * datfrozenxid go backward; but if it's "in the future" then it must be
1036 * corrupt and it seems best to overwrite it.
1037 */
1038 if (dbform->datfrozenxid != newFrozenXid &&
1039 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1040 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1041 {
1042 dbform->datfrozenxid = newFrozenXid;
1043 dirty = true;
1044 }
1045 else
1046 newFrozenXid = dbform->datfrozenxid;
1047
1048 /* Ditto for datminmxid */
1049 if (dbform->datminmxid != newMinMulti &&
1050 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1051 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1052 {
1053 dbform->datminmxid = newMinMulti;
1054 dirty = true;
1055 }
1056 else
1057 newMinMulti = dbform->datminmxid;
1058
1059 if (dirty)
1060 heap_inplace_update(relation, tuple);
1061
1062 heap_freetuple(tuple);
1063 heap_close(relation, RowExclusiveLock);
1064
1065 /*
1066 * If we were able to advance datfrozenxid or datminmxid, see if we can
1067 * truncate pg_xact and/or pg_multixact. Also do it if the shared
1068 * XID-wrap-limit info is stale, since this action will update that too.
1069 */
1070 if (dirty || ForceTransactionIdLimitUpdate())
1071 vac_truncate_clog(newFrozenXid, newMinMulti,
1072 lastSaneFrozenXid, lastSaneMinMulti);
1073 }
1074
1075
1076 /*
1077 * vac_truncate_clog() -- attempt to truncate the commit log
1078 *
1079 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1080 * and use it to truncate the transaction commit log (pg_xact).
1081 * Also update the XID wrap limit info maintained by varsup.c.
1082 * Likewise for datminmxid.
1083 *
1084 * The passed frozenXID and minMulti are the updated values for my own
1085 * pg_database entry. They're used to initialize the "min" calculations.
1086 * The caller also passes the "last sane" XID and MXID, since it has
1087 * those at hand already.
1088 *
1089 * This routine is only invoked when we've managed to change our
1090 * DB's datfrozenxid/datminmxid values, or we found that the shared
1091 * XID-wrap-limit info is stale.
1092 */
1093 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1094 vac_truncate_clog(TransactionId frozenXID,
1095 MultiXactId minMulti,
1096 TransactionId lastSaneFrozenXid,
1097 MultiXactId lastSaneMinMulti)
1098 {
1099 TransactionId nextXID = ReadNewTransactionId();
1100 Relation relation;
1101 HeapScanDesc scan;
1102 HeapTuple tuple;
1103 Oid oldestxid_datoid;
1104 Oid minmulti_datoid;
1105 bool bogus = false;
1106 bool frozenAlreadyWrapped = false;
1107
1108 /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1109 LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1110
1111 /* init oldest datoids to sync with my frozenXID/minMulti values */
1112 oldestxid_datoid = MyDatabaseId;
1113 minmulti_datoid = MyDatabaseId;
1114
1115 /*
1116 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1117 *
1118 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1119 * the values could change while we look at them. Fetch each one just
1120 * once to ensure sane behavior of the comparison logic. (Here, as in
1121 * many other places, we assume that fetching or updating an XID in shared
1122 * storage is atomic.)
1123 *
1124 * Note: we need not worry about a race condition with new entries being
1125 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1126 * existing DB's datfrozenxid, and that source DB cannot be ours because
1127 * of the interlock against copying a DB containing an active backend.
1128 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1129 * concurrently modify the datfrozenxid's of different databases, the
1130 * worst possible outcome is that pg_xact is not truncated as aggressively
1131 * as it could be.
1132 */
1133 relation = heap_open(DatabaseRelationId, AccessShareLock);
1134
1135 scan = heap_beginscan_catalog(relation, 0, NULL);
1136
1137 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1138 {
1139 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1140 TransactionId datfrozenxid = dbform->datfrozenxid;
1141 TransactionId datminmxid = dbform->datminmxid;
1142
1143 Assert(TransactionIdIsNormal(datfrozenxid));
1144 Assert(MultiXactIdIsValid(datminmxid));
1145
1146 /*
1147 * If things are working properly, no database should have a
1148 * datfrozenxid or datminmxid that is "in the future". However, such
1149 * cases have been known to arise due to bugs in pg_upgrade. If we
1150 * see any entries that are "in the future", chicken out and don't do
1151 * anything. This ensures we won't truncate clog before those
1152 * databases have been scanned and cleaned up. (We will issue the
1153 * "already wrapped" warning if appropriate, though.)
1154 */
1155 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1156 MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1157 bogus = true;
1158
1159 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1160 frozenAlreadyWrapped = true;
1161 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1162 {
1163 frozenXID = datfrozenxid;
1164 oldestxid_datoid = HeapTupleGetOid(tuple);
1165 }
1166
1167 if (MultiXactIdPrecedes(datminmxid, minMulti))
1168 {
1169 minMulti = datminmxid;
1170 minmulti_datoid = HeapTupleGetOid(tuple);
1171 }
1172 }
1173
1174 heap_endscan(scan);
1175
1176 heap_close(relation, AccessShareLock);
1177
1178 /*
1179 * Do not truncate CLOG if we seem to have suffered wraparound already;
1180 * the computed minimum XID might be bogus. This case should now be
1181 * impossible due to the defenses in GetNewTransactionId, but we keep the
1182 * test anyway.
1183 */
1184 if (frozenAlreadyWrapped)
1185 {
1186 ereport(WARNING,
1187 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1188 errdetail("You might have already suffered transaction-wraparound data loss.")));
1189 return;
1190 }
1191
1192 /* chicken out if data is bogus in any other way */
1193 if (bogus)
1194 return;
1195
1196 /*
1197 * Advance the oldest value for commit timestamps before truncating, so
1198 * that if a user requests a timestamp for a transaction we're truncating
1199 * away right after this point, they get NULL instead of an ugly "file not
1200 * found" error from slru.c. This doesn't matter for xact/multixact
1201 * because they are not subject to arbitrary lookups from users.
1202 */
1203 AdvanceOldestCommitTsXid(frozenXID);
1204
1205 /*
1206 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1207 */
1208 TruncateCLOG(frozenXID, oldestxid_datoid);
1209 TruncateCommitTs(frozenXID);
1210 TruncateMultiXact(minMulti, minmulti_datoid);
1211
1212 /*
1213 * Update the wrap limit for GetNewTransactionId and creation of new
1214 * MultiXactIds. Note: these functions will also signal the postmaster
1215 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1216 * signalling twice?
1217 */
1218 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1219 SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1220
1221 LWLockRelease(WrapLimitsVacuumLock);
1222 }
1223
1224
1225 /*
1226 * vacuum_rel() -- vacuum one heap relation
1227 *
1228 * relid identifies the relation to vacuum. If relation is supplied,
1229 * use the name therein for reporting any failure to open/lock the rel;
1230 * do not use it once we've successfully opened the rel, since it might
1231 * be stale.
1232 *
1233 * Doing one heap at a time incurs extra overhead, since we need to
1234 * check that the heap exists again just before we vacuum it. The
1235 * reason that we do this is so that vacuuming can be spread across
1236 * many small transactions. Otherwise, two-phase locking would require
1237 * us to lock the entire database during one pass of the vacuum cleaner.
1238 *
1239 * At entry and exit, we are not inside a transaction.
1240 */
1241 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1242 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1243 {
1244 LOCKMODE lmode;
1245 Relation onerel;
1246 LockRelId onerelid;
1247 Oid toast_relid;
1248 Oid save_userid;
1249 int save_sec_context;
1250 int save_nestlevel;
1251
1252 Assert(params != NULL);
1253
1254 /* Begin a transaction for vacuuming this relation */
1255 StartTransactionCommand();
1256
1257 /*
1258 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1259 * ensures that RecentGlobalXmin is kept truly recent.
1260 */
1261 PushActiveSnapshot(GetTransactionSnapshot());
1262
1263 if (!(options & VACOPT_FULL))
1264 {
1265 /*
1266 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1267 * other concurrent VACUUMs know that they can ignore this one while
1268 * determining their OldestXmin. (The reason we don't set it during a
1269 * full VACUUM is exactly that we may have to run user-defined
1270 * functions for functional indexes, and we want to make sure that if
1271 * they use the snapshot set above, any tuples it requires can't get
1272 * removed from other tables. An index function that depends on the
1273 * contents of other tables is arguably broken, but we won't break it
1274 * here by violating transaction semantics.)
1275 *
1276 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1277 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1278 * in an emergency.
1279 *
1280 * Note: these flags remain set until CommitTransaction or
1281 * AbortTransaction. We don't want to clear them until we reset
1282 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1283 * which is probably Not Good.
1284 */
1285 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1286 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1287 if (params->is_wraparound)
1288 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1289 LWLockRelease(ProcArrayLock);
1290 }
1291
1292 /*
1293 * Check for user-requested abort. Note we want this to be inside a
1294 * transaction, so xact.c doesn't issue useless WARNING.
1295 */
1296 CHECK_FOR_INTERRUPTS();
1297
1298 /*
1299 * Determine the type of lock we want --- hard exclusive lock for a FULL
1300 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1301 * way, we can be sure that no other backend is vacuuming the same table.
1302 */
1303 lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1304
1305 /*
1306 * Open the relation and get the appropriate lock on it.
1307 *
1308 * There's a race condition here: the rel may have gone away since the
1309 * last time we saw it. If so, we don't need to vacuum it.
1310 *
1311 * If we've been asked not to wait for the relation lock, acquire it first
1312 * in non-blocking mode, before calling try_relation_open().
1313 */
1314 if (!(options & VACOPT_NOWAIT))
1315 onerel = try_relation_open(relid, lmode);
1316 else if (ConditionalLockRelationOid(relid, lmode))
1317 onerel = try_relation_open(relid, NoLock);
1318 else
1319 {
1320 onerel = NULL;
1321 if (relation &&
1322 IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1323 ereport(LOG,
1324 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1325 errmsg("skipping vacuum of \"%s\" --- lock not available",
1326 relation->relname)));
1327 }
1328
1329 if (!onerel)
1330 {
1331 PopActiveSnapshot();
1332 CommitTransactionCommand();
1333 return false;
1334 }
1335
1336 /*
1337 * Check permissions.
1338 *
1339 * We allow the user to vacuum a table if he is superuser, the table
1340 * owner, or the database owner (but in the latter case, only if it's not
1341 * a shared relation). pg_class_ownercheck includes the superuser case.
1342 *
1343 * Note we choose to treat permissions failure as a WARNING and keep
1344 * trying to vacuum the rest of the DB --- is this appropriate?
1345 */
1346 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1347 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1348 {
1349 if (onerel->rd_rel->relisshared)
1350 ereport(WARNING,
1351 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1352 RelationGetRelationName(onerel))));
1353 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1354 ereport(WARNING,
1355 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1356 RelationGetRelationName(onerel))));
1357 else
1358 ereport(WARNING,
1359 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1360 RelationGetRelationName(onerel))));
1361 relation_close(onerel, lmode);
1362 PopActiveSnapshot();
1363 CommitTransactionCommand();
1364 return false;
1365 }
1366
1367 /*
1368 * Check that it's a vacuumable relation; we used to do this in
1369 * get_rel_oids() but seems safer to check after we've locked the
1370 * relation.
1371 */
1372 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1373 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1374 onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
1375 onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
1376 {
1377 ereport(WARNING,
1378 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1379 RelationGetRelationName(onerel))));
1380 relation_close(onerel, lmode);
1381 PopActiveSnapshot();
1382 CommitTransactionCommand();
1383 return false;
1384 }
1385
1386 /*
1387 * Silently ignore tables that are temp tables of other backends ---
1388 * trying to vacuum these will lead to great unhappiness, since their
1389 * contents are probably not up-to-date on disk. (We don't throw a
1390 * warning here; it would just lead to chatter during a database-wide
1391 * VACUUM.)
1392 */
1393 if (RELATION_IS_OTHER_TEMP(onerel))
1394 {
1395 relation_close(onerel, lmode);
1396 PopActiveSnapshot();
1397 CommitTransactionCommand();
1398 return false;
1399 }
1400
1401 /*
1402 * Ignore partitioned tables as there is no work to be done. Since we
1403 * release the lock here, it's possible that any partitions added from
1404 * this point on will not get processed, but that seems harmless.
1405 */
1406 if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1407 {
1408 relation_close(onerel, lmode);
1409 PopActiveSnapshot();
1410 CommitTransactionCommand();
1411
1412 /* It's OK for other commands to look at this table */
1413 return true;
1414 }
1415
1416 /*
1417 * Get a session-level lock too. This will protect our access to the
1418 * relation across multiple transactions, so that we can vacuum the
1419 * relation's TOAST table (if any) secure in the knowledge that no one is
1420 * deleting the parent relation.
1421 *
1422 * NOTE: this cannot block, even if someone else is waiting for access,
1423 * because the lock manager knows that both lock requests are from the
1424 * same process.
1425 */
1426 onerelid = onerel->rd_lockInfo.lockRelId;
1427 LockRelationIdForSession(&onerelid, lmode);
1428
1429 /*
1430 * Remember the relation's TOAST relation for later, if the caller asked
1431 * us to process it. In VACUUM FULL, though, the toast table is
1432 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1433 */
1434 if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1435 toast_relid = onerel->rd_rel->reltoastrelid;
1436 else
1437 toast_relid = InvalidOid;
1438
1439 /*
1440 * Switch to the table owner's userid, so that any index functions are run
1441 * as that user. Also lock down security-restricted operations and
1442 * arrange to make GUC variable changes local to this command. (This is
1443 * unnecessary, but harmless, for lazy VACUUM.)
1444 */
1445 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1446 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1447 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1448 save_nestlevel = NewGUCNestLevel();
1449
1450 /*
1451 * Do the actual work --- either FULL or "lazy" vacuum
1452 */
1453 if (options & VACOPT_FULL)
1454 {
1455 /* close relation before vacuuming, but hold lock until commit */
1456 relation_close(onerel, NoLock);
1457 onerel = NULL;
1458
1459 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1460 cluster_rel(relid, InvalidOid, false,
1461 (options & VACOPT_VERBOSE) != 0);
1462 }
1463 else
1464 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1465
1466 /* Roll back any GUC changes executed by index functions */
1467 AtEOXact_GUC(false, save_nestlevel);
1468
1469 /* Restore userid and security context */
1470 SetUserIdAndSecContext(save_userid, save_sec_context);
1471
1472 /* all done with this class, but hold lock until commit */
1473 if (onerel)
1474 relation_close(onerel, NoLock);
1475
1476 /*
1477 * Complete the transaction and free all temporary memory used.
1478 */
1479 PopActiveSnapshot();
1480 CommitTransactionCommand();
1481
1482 /*
1483 * If the relation has a secondary toast rel, vacuum that too while we
1484 * still hold the session lock on the master table. Note however that
1485 * "analyze" will not get done on the toast table. This is good, because
1486 * the toaster always uses hardcoded index access and statistics are
1487 * totally unimportant for toast relations.
1488 */
1489 if (toast_relid != InvalidOid)
1490 vacuum_rel(toast_relid, NULL, options, params);
1491
1492 /*
1493 * Now release the session-level lock on the master table.
1494 */
1495 UnlockRelationIdForSession(&onerelid, lmode);
1496
1497 /* Report that we really did it. */
1498 return true;
1499 }
1500
1501
1502 /*
1503 * Open all the vacuumable indexes of the given relation, obtaining the
1504 * specified kind of lock on each. Return an array of Relation pointers for
1505 * the indexes into *Irel, and the number of indexes into *nindexes.
1506 *
1507 * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1508 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1509 * execution, and what we have is too corrupt to be processable. We will
1510 * vacuum even if the index isn't indisvalid; this is important because in a
1511 * unique index, uniqueness checks will be performed anyway and had better not
1512 * hit dangling index pointers.
1513 */
1514 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1515 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1516 int *nindexes, Relation **Irel)
1517 {
1518 List *indexoidlist;
1519 ListCell *indexoidscan;
1520 int i;
1521
1522 Assert(lockmode != NoLock);
1523
1524 indexoidlist = RelationGetIndexList(relation);
1525
1526 /* allocate enough memory for all indexes */
1527 i = list_length(indexoidlist);
1528
1529 if (i > 0)
1530 *Irel = (Relation *) palloc(i * sizeof(Relation));
1531 else
1532 *Irel = NULL;
1533
1534 /* collect just the ready indexes */
1535 i = 0;
1536 foreach(indexoidscan, indexoidlist)
1537 {
1538 Oid indexoid = lfirst_oid(indexoidscan);
1539 Relation indrel;
1540
1541 indrel = index_open(indexoid, lockmode);
1542 if (IndexIsReady(indrel->rd_index))
1543 (*Irel)[i++] = indrel;
1544 else
1545 index_close(indrel, lockmode);
1546 }
1547
1548 *nindexes = i;
1549
1550 list_free(indexoidlist);
1551 }
1552
1553 /*
1554 * Release the resources acquired by vac_open_indexes. Optionally release
1555 * the locks (say NoLock to keep 'em).
1556 */
1557 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1558 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1559 {
1560 if (Irel == NULL)
1561 return;
1562
1563 while (nindexes--)
1564 {
1565 Relation ind = Irel[nindexes];
1566
1567 index_close(ind, lockmode);
1568 }
1569 pfree(Irel);
1570 }
1571
1572 /*
1573 * vacuum_delay_point --- check for interrupts and cost-based delay.
1574 *
1575 * This should be called in each major loop of VACUUM processing,
1576 * typically once per page processed.
1577 */
1578 void
vacuum_delay_point(void)1579 vacuum_delay_point(void)
1580 {
1581 /* Always check for interrupts */
1582 CHECK_FOR_INTERRUPTS();
1583
1584 /* Nap if appropriate */
1585 if (VacuumCostActive && !InterruptPending &&
1586 VacuumCostBalance >= VacuumCostLimit)
1587 {
1588 int msec;
1589
1590 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1591 if (msec > VacuumCostDelay * 4)
1592 msec = VacuumCostDelay * 4;
1593
1594 pg_usleep(msec * 1000L);
1595
1596 VacuumCostBalance = 0;
1597
1598 /* update balance values for workers */
1599 AutoVacuumUpdateDelay();
1600
1601 /* Might have gotten an interrupt while sleeping */
1602 CHECK_FOR_INTERRUPTS();
1603 }
1604 }
1605