1 /*-------------------------------------------------------------------------
2 *
3 * vacuum.c
4 * The postgres vacuum cleaner.
5 *
6 * This file now includes only control and dispatch code for VACUUM and
7 * ANALYZE commands. Regular VACUUM is implemented in vacuumlazy.c,
8 * ANALYZE in analyze.c, and VACUUM FULL is a variant of CLUSTER, handled
9 * in cluster.c.
10 *
11 *
12 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
14 *
15 *
16 * IDENTIFICATION
17 * src/backend/commands/vacuum.c
18 *
19 *-------------------------------------------------------------------------
20 */
21 #include "postgres.h"
22
23 #include <math.h>
24
25 #include "access/clog.h"
26 #include "access/commit_ts.h"
27 #include "access/genam.h"
28 #include "access/heapam.h"
29 #include "access/htup_details.h"
30 #include "access/multixact.h"
31 #include "access/transam.h"
32 #include "access/xact.h"
33 #include "catalog/namespace.h"
34 #include "catalog/pg_database.h"
35 #include "catalog/pg_namespace.h"
36 #include "commands/cluster.h"
37 #include "commands/vacuum.h"
38 #include "miscadmin.h"
39 #include "pgstat.h"
40 #include "postmaster/autovacuum.h"
41 #include "storage/bufmgr.h"
42 #include "storage/lmgr.h"
43 #include "storage/proc.h"
44 #include "storage/procarray.h"
45 #include "utils/acl.h"
46 #include "utils/fmgroids.h"
47 #include "utils/guc.h"
48 #include "utils/memutils.h"
49 #include "utils/snapmgr.h"
50 #include "utils/syscache.h"
51 #include "utils/tqual.h"
52
53
54 /*
55 * GUC parameters
56 */
57 int vacuum_freeze_min_age;
58 int vacuum_freeze_table_age;
59 int vacuum_multixact_freeze_min_age;
60 int vacuum_multixact_freeze_table_age;
61
62
63 /* A few variables that don't seem worth passing around as parameters */
64 static MemoryContext vac_context = NULL;
65 static BufferAccessStrategy vac_strategy;
66
67
68 /* non-export function prototypes */
69 static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
70 static void vac_truncate_clog(TransactionId frozenXID,
71 MultiXactId minMulti,
72 TransactionId lastSaneFrozenXid,
73 MultiXactId lastSaneMinMulti);
74 static bool vacuum_rel(Oid relid, RangeVar *relation, int options,
75 VacuumParams *params);
76
77 /*
78 * Primary entry point for manual VACUUM and ANALYZE commands
79 *
80 * This is mainly a preparation wrapper for the real operations that will
81 * happen in vacuum().
82 */
83 void
ExecVacuum(VacuumStmt * vacstmt,bool isTopLevel)84 ExecVacuum(VacuumStmt *vacstmt, bool isTopLevel)
85 {
86 VacuumParams params;
87
88 /* sanity checks on options */
89 Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE));
90 Assert((vacstmt->options & VACOPT_VACUUM) ||
91 !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE)));
92 Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL);
93 Assert(!(vacstmt->options & VACOPT_SKIPTOAST));
94
95 /*
96 * All freeze ages are zero if the FREEZE option is given; otherwise pass
97 * them as -1 which means to use the default values.
98 */
99 if (vacstmt->options & VACOPT_FREEZE)
100 {
101 params.freeze_min_age = 0;
102 params.freeze_table_age = 0;
103 params.multixact_freeze_min_age = 0;
104 params.multixact_freeze_table_age = 0;
105 }
106 else
107 {
108 params.freeze_min_age = -1;
109 params.freeze_table_age = -1;
110 params.multixact_freeze_min_age = -1;
111 params.multixact_freeze_table_age = -1;
112 }
113
114 /* user-invoked vacuum is never "for wraparound" */
115 params.is_wraparound = false;
116
117 /* user-invoked vacuum never uses this parameter */
118 params.log_min_duration = -1;
119
120 /* Now go through the common routine */
121 vacuum(vacstmt->options, vacstmt->relation, InvalidOid, ¶ms,
122 vacstmt->va_cols, NULL, isTopLevel);
123 }
124
125 /*
126 * Primary entry point for VACUUM and ANALYZE commands.
127 *
128 * options is a bitmask of VacuumOption flags, indicating what to do.
129 *
130 * relid, if not InvalidOid, indicate the relation to process; otherwise,
131 * the RangeVar is used. (The latter must always be passed, because it's
132 * used for error messages.)
133 *
134 * params contains a set of parameters that can be used to customize the
135 * behavior.
136 *
137 * va_cols is a list of columns to analyze, or NIL to process them all.
138 *
139 * bstrategy is normally given as NULL, but in autovacuum it can be passed
140 * in to use the same buffer strategy object across multiple vacuum() calls.
141 *
142 * isTopLevel should be passed down from ProcessUtility.
143 *
144 * It is the caller's responsibility that all parameters are allocated in a
145 * memory context that will not disappear at transaction commit.
146 */
147 void
vacuum(int options,RangeVar * relation,Oid relid,VacuumParams * params,List * va_cols,BufferAccessStrategy bstrategy,bool isTopLevel)148 vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params,
149 List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel)
150 {
151 const char *stmttype;
152 volatile bool in_outer_xact,
153 use_own_xacts;
154 List *relations;
155 static bool in_vacuum = false;
156
157 Assert(params != NULL);
158
159 stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
160
161 /*
162 * We cannot run VACUUM inside a user transaction block; if we were inside
163 * a transaction, then our commit- and start-transaction-command calls
164 * would not have the intended effect! There are numerous other subtle
165 * dependencies on this, too.
166 *
167 * ANALYZE (without VACUUM) can run either way.
168 */
169 if (options & VACOPT_VACUUM)
170 {
171 PreventTransactionChain(isTopLevel, stmttype);
172 in_outer_xact = false;
173 }
174 else
175 in_outer_xact = IsInTransactionChain(isTopLevel);
176
177 /*
178 * Due to static variables vac_context, anl_context and vac_strategy,
179 * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE
180 * calls a hostile index expression that itself calls ANALYZE.
181 */
182 if (in_vacuum)
183 ereport(ERROR,
184 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
185 errmsg("%s cannot be executed from VACUUM or ANALYZE",
186 stmttype)));
187
188 /*
189 * Sanity check DISABLE_PAGE_SKIPPING option.
190 */
191 if ((options & VACOPT_FULL) != 0 &&
192 (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
193 ereport(ERROR,
194 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
195 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
196
197 /*
198 * Send info about dead objects to the statistics collector, unless we are
199 * in autovacuum --- autovacuum.c does this for itself.
200 */
201 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
202 pgstat_vacuum_stat();
203
204 /*
205 * Create special memory context for cross-transaction storage.
206 *
207 * Since it is a child of PortalContext, it will go away eventually even
208 * if we suffer an error; there's no need for special abort cleanup logic.
209 */
210 vac_context = AllocSetContextCreate(PortalContext,
211 "Vacuum",
212 ALLOCSET_DEFAULT_SIZES);
213
214 /*
215 * If caller didn't give us a buffer strategy object, make one in the
216 * cross-transaction memory context.
217 */
218 if (bstrategy == NULL)
219 {
220 MemoryContext old_context = MemoryContextSwitchTo(vac_context);
221
222 bstrategy = GetAccessStrategy(BAS_VACUUM);
223 MemoryContextSwitchTo(old_context);
224 }
225 vac_strategy = bstrategy;
226
227 /*
228 * Build list of relations to process, unless caller gave us one. (If we
229 * build one, we put it in vac_context for safekeeping.)
230 */
231 relations = get_rel_oids(relid, relation);
232
233 /*
234 * Decide whether we need to start/commit our own transactions.
235 *
236 * For VACUUM (with or without ANALYZE): always do so, so that we can
237 * release locks as soon as possible. (We could possibly use the outer
238 * transaction for a one-table VACUUM, but handling TOAST tables would be
239 * problematic.)
240 *
241 * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
242 * start/commit our own transactions. Also, there's no need to do so if
243 * only processing one relation. For multiple relations when not within a
244 * transaction block, and also in an autovacuum worker, use own
245 * transactions so we can release locks sooner.
246 */
247 if (options & VACOPT_VACUUM)
248 use_own_xacts = true;
249 else
250 {
251 Assert(options & VACOPT_ANALYZE);
252 if (IsAutoVacuumWorkerProcess())
253 use_own_xacts = true;
254 else if (in_outer_xact)
255 use_own_xacts = false;
256 else if (list_length(relations) > 1)
257 use_own_xacts = true;
258 else
259 use_own_xacts = false;
260 }
261
262 /*
263 * vacuum_rel expects to be entered with no transaction active; it will
264 * start and commit its own transaction. But we are called by an SQL
265 * command, and so we are executing inside a transaction already. We
266 * commit the transaction started in PostgresMain() here, and start
267 * another one before exiting to match the commit waiting for us back in
268 * PostgresMain().
269 */
270 if (use_own_xacts)
271 {
272 Assert(!in_outer_xact);
273
274 /* ActiveSnapshot is not set by autovacuum */
275 if (ActiveSnapshotSet())
276 PopActiveSnapshot();
277
278 /* matches the StartTransaction in PostgresMain() */
279 CommitTransactionCommand();
280 }
281
282 /* Turn vacuum cost accounting on or off */
283 PG_TRY();
284 {
285 ListCell *cur;
286
287 in_vacuum = true;
288 VacuumCostActive = (VacuumCostDelay > 0);
289 VacuumCostBalance = 0;
290 VacuumPageHit = 0;
291 VacuumPageMiss = 0;
292 VacuumPageDirty = 0;
293
294 /*
295 * Loop to process each selected relation.
296 */
297 foreach(cur, relations)
298 {
299 Oid relid = lfirst_oid(cur);
300
301 if (options & VACOPT_VACUUM)
302 {
303 if (!vacuum_rel(relid, relation, options, params))
304 continue;
305 }
306
307 if (options & VACOPT_ANALYZE)
308 {
309 /*
310 * If using separate xacts, start one for analyze. Otherwise,
311 * we can use the outer transaction.
312 */
313 if (use_own_xacts)
314 {
315 StartTransactionCommand();
316 /* functions in indexes may want a snapshot set */
317 PushActiveSnapshot(GetTransactionSnapshot());
318 }
319
320 analyze_rel(relid, relation, options, params,
321 va_cols, in_outer_xact, vac_strategy);
322
323 if (use_own_xacts)
324 {
325 PopActiveSnapshot();
326 CommitTransactionCommand();
327 }
328 }
329 }
330 }
331 PG_CATCH();
332 {
333 in_vacuum = false;
334 VacuumCostActive = false;
335 PG_RE_THROW();
336 }
337 PG_END_TRY();
338
339 in_vacuum = false;
340 VacuumCostActive = false;
341
342 /*
343 * Finish up processing.
344 */
345 if (use_own_xacts)
346 {
347 /* here, we are not in a transaction */
348
349 /*
350 * This matches the CommitTransaction waiting for us in
351 * PostgresMain().
352 */
353 StartTransactionCommand();
354 }
355
356 if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
357 {
358 /*
359 * Update pg_database.datfrozenxid, and truncate pg_clog if possible.
360 * (autovacuum.c does this for itself.)
361 */
362 vac_update_datfrozenxid();
363 }
364
365 /*
366 * Clean up working storage --- note we must do this after
367 * StartTransactionCommand, else we might be trying to delete the active
368 * context!
369 */
370 MemoryContextDelete(vac_context);
371 vac_context = NULL;
372 }
373
374 /*
375 * Build a list of Oids for each relation to be processed
376 *
377 * The list is built in vac_context so that it will survive across our
378 * per-relation transactions.
379 */
380 static List *
get_rel_oids(Oid relid,const RangeVar * vacrel)381 get_rel_oids(Oid relid, const RangeVar *vacrel)
382 {
383 List *oid_list = NIL;
384 MemoryContext oldcontext;
385
386 /* OID supplied by VACUUM's caller? */
387 if (OidIsValid(relid))
388 {
389 oldcontext = MemoryContextSwitchTo(vac_context);
390 oid_list = lappend_oid(oid_list, relid);
391 MemoryContextSwitchTo(oldcontext);
392 }
393 else if (vacrel)
394 {
395 /* Process a specific relation */
396 Oid relid;
397
398 /*
399 * Since we don't take a lock here, the relation might be gone, or the
400 * RangeVar might no longer refer to the OID we look up here. In the
401 * former case, VACUUM will do nothing; in the latter case, it will
402 * process the OID we looked up here, rather than the new one. Neither
403 * is ideal, but there's little practical alternative, since we're
404 * going to commit this transaction and begin a new one between now
405 * and then.
406 */
407 relid = RangeVarGetRelid(vacrel, NoLock, false);
408
409 /* Make a relation list entry for this guy */
410 oldcontext = MemoryContextSwitchTo(vac_context);
411 oid_list = lappend_oid(oid_list, relid);
412 MemoryContextSwitchTo(oldcontext);
413 }
414 else
415 {
416 /*
417 * Process all plain relations and materialized views listed in
418 * pg_class
419 */
420 Relation pgclass;
421 HeapScanDesc scan;
422 HeapTuple tuple;
423
424 pgclass = heap_open(RelationRelationId, AccessShareLock);
425
426 scan = heap_beginscan_catalog(pgclass, 0, NULL);
427
428 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
429 {
430 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
431
432 if (classForm->relkind != RELKIND_RELATION &&
433 classForm->relkind != RELKIND_MATVIEW)
434 continue;
435
436 /* Make a relation list entry for this guy */
437 oldcontext = MemoryContextSwitchTo(vac_context);
438 oid_list = lappend_oid(oid_list, HeapTupleGetOid(tuple));
439 MemoryContextSwitchTo(oldcontext);
440 }
441
442 heap_endscan(scan);
443 heap_close(pgclass, AccessShareLock);
444 }
445
446 return oid_list;
447 }
448
449 /*
450 * vacuum_set_xid_limits() -- compute oldest-Xmin and freeze cutoff points
451 *
452 * The output parameters are:
453 * - oldestXmin is the cutoff value used to distinguish whether tuples are
454 * DEAD or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
455 * - freezeLimit is the Xid below which all Xids are replaced by
456 * FrozenTransactionId during vacuum.
457 * - xidFullScanLimit (computed from table_freeze_age parameter)
458 * represents a minimum Xid value; a table whose relfrozenxid is older than
459 * this will have a full-table vacuum applied to it, to freeze tuples across
460 * the whole table. Vacuuming a table younger than this value can use a
461 * partial scan.
462 * - multiXactCutoff is the value below which all MultiXactIds are removed from
463 * Xmax.
464 * - mxactFullScanLimit is a value against which a table's relminmxid value is
465 * compared to produce a full-table vacuum, as with xidFullScanLimit.
466 *
467 * xidFullScanLimit and mxactFullScanLimit can be passed as NULL if caller is
468 * not interested.
469 */
470 void
vacuum_set_xid_limits(Relation rel,int freeze_min_age,int freeze_table_age,int multixact_freeze_min_age,int multixact_freeze_table_age,TransactionId * oldestXmin,TransactionId * freezeLimit,TransactionId * xidFullScanLimit,MultiXactId * multiXactCutoff,MultiXactId * mxactFullScanLimit)471 vacuum_set_xid_limits(Relation rel,
472 int freeze_min_age,
473 int freeze_table_age,
474 int multixact_freeze_min_age,
475 int multixact_freeze_table_age,
476 TransactionId *oldestXmin,
477 TransactionId *freezeLimit,
478 TransactionId *xidFullScanLimit,
479 MultiXactId *multiXactCutoff,
480 MultiXactId *mxactFullScanLimit)
481 {
482 int freezemin;
483 int mxid_freezemin;
484 int effective_multixact_freeze_max_age;
485 TransactionId limit;
486 TransactionId safeLimit;
487 MultiXactId oldestMxact;
488 MultiXactId mxactLimit;
489 MultiXactId safeMxactLimit;
490
491 /*
492 * We can always ignore processes running lazy vacuum. This is because we
493 * use these values only for deciding which tuples we must keep in the
494 * tables. Since lazy vacuum doesn't write its XID anywhere, it's safe to
495 * ignore it. In theory it could be problematic to ignore lazy vacuums in
496 * a full vacuum, but keep in mind that only one vacuum process can be
497 * working on a particular table at any time, and that each vacuum is
498 * always an independent transaction.
499 */
500 *oldestXmin =
501 TransactionIdLimitedForOldSnapshots(GetOldestXmin(rel, true), rel);
502
503 Assert(TransactionIdIsNormal(*oldestXmin));
504
505 /*
506 * Determine the minimum freeze age to use: as specified by the caller, or
507 * vacuum_freeze_min_age, but in any case not more than half
508 * autovacuum_freeze_max_age, so that autovacuums to prevent XID
509 * wraparound won't occur too frequently.
510 */
511 freezemin = freeze_min_age;
512 if (freezemin < 0)
513 freezemin = vacuum_freeze_min_age;
514 freezemin = Min(freezemin, autovacuum_freeze_max_age / 2);
515 Assert(freezemin >= 0);
516
517 /*
518 * Compute the cutoff XID, being careful not to generate a "permanent" XID
519 */
520 limit = *oldestXmin - freezemin;
521 if (!TransactionIdIsNormal(limit))
522 limit = FirstNormalTransactionId;
523
524 /*
525 * If oldestXmin is very far back (in practice, more than
526 * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum
527 * freeze age of zero.
528 */
529 safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age;
530 if (!TransactionIdIsNormal(safeLimit))
531 safeLimit = FirstNormalTransactionId;
532
533 if (TransactionIdPrecedes(limit, safeLimit))
534 {
535 ereport(WARNING,
536 (errmsg("oldest xmin is far in the past"),
537 errhint("Close open transactions soon to avoid wraparound problems.")));
538 limit = *oldestXmin;
539 }
540
541 *freezeLimit = limit;
542
543 /*
544 * Compute the multixact age for which freezing is urgent. This is
545 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
546 * short of multixact member space.
547 */
548 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
549
550 /*
551 * Determine the minimum multixact freeze age to use: as specified by
552 * caller, or vacuum_multixact_freeze_min_age, but in any case not more
553 * than half effective_multixact_freeze_max_age, so that autovacuums to
554 * prevent MultiXact wraparound won't occur too frequently.
555 */
556 mxid_freezemin = multixact_freeze_min_age;
557 if (mxid_freezemin < 0)
558 mxid_freezemin = vacuum_multixact_freeze_min_age;
559 mxid_freezemin = Min(mxid_freezemin,
560 effective_multixact_freeze_max_age / 2);
561 Assert(mxid_freezemin >= 0);
562
563 /* compute the cutoff multi, being careful to generate a valid value */
564 oldestMxact = GetOldestMultiXactId();
565 mxactLimit = oldestMxact - mxid_freezemin;
566 if (mxactLimit < FirstMultiXactId)
567 mxactLimit = FirstMultiXactId;
568
569 safeMxactLimit =
570 ReadNextMultiXactId() - effective_multixact_freeze_max_age;
571 if (safeMxactLimit < FirstMultiXactId)
572 safeMxactLimit = FirstMultiXactId;
573
574 if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit))
575 {
576 ereport(WARNING,
577 (errmsg("oldest multixact is far in the past"),
578 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
579 /* Use the safe limit, unless an older mxact is still running */
580 if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
581 mxactLimit = oldestMxact;
582 else
583 mxactLimit = safeMxactLimit;
584 }
585
586 *multiXactCutoff = mxactLimit;
587
588 if (xidFullScanLimit != NULL)
589 {
590 int freezetable;
591
592 Assert(mxactFullScanLimit != NULL);
593
594 /*
595 * Determine the table freeze age to use: as specified by the caller,
596 * or vacuum_freeze_table_age, but in any case not more than
597 * autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
598 * VACUUM schedule, the nightly VACUUM gets a chance to freeze tuples
599 * before anti-wraparound autovacuum is launched.
600 */
601 freezetable = freeze_table_age;
602 if (freezetable < 0)
603 freezetable = vacuum_freeze_table_age;
604 freezetable = Min(freezetable, autovacuum_freeze_max_age * 0.95);
605 Assert(freezetable >= 0);
606
607 /*
608 * Compute XID limit causing a full-table vacuum, being careful not to
609 * generate a "permanent" XID.
610 */
611 limit = ReadNewTransactionId() - freezetable;
612 if (!TransactionIdIsNormal(limit))
613 limit = FirstNormalTransactionId;
614
615 *xidFullScanLimit = limit;
616
617 /*
618 * Similar to the above, determine the table freeze age to use for
619 * multixacts: as specified by the caller, or
620 * vacuum_multixact_freeze_table_age, but in any case not more than
621 * autovacuum_multixact_freeze_table_age * 0.95, so that if you have
622 * e.g. nightly VACUUM schedule, the nightly VACUUM gets a chance to
623 * freeze multixacts before anti-wraparound autovacuum is launched.
624 */
625 freezetable = multixact_freeze_table_age;
626 if (freezetable < 0)
627 freezetable = vacuum_multixact_freeze_table_age;
628 freezetable = Min(freezetable,
629 effective_multixact_freeze_max_age * 0.95);
630 Assert(freezetable >= 0);
631
632 /*
633 * Compute MultiXact limit causing a full-table vacuum, being careful
634 * to generate a valid MultiXact value.
635 */
636 mxactLimit = ReadNextMultiXactId() - freezetable;
637 if (mxactLimit < FirstMultiXactId)
638 mxactLimit = FirstMultiXactId;
639
640 *mxactFullScanLimit = mxactLimit;
641 }
642 else
643 {
644 Assert(mxactFullScanLimit == NULL);
645 }
646 }
647
648 /*
649 * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
650 *
651 * If we scanned the whole relation then we should just use the count of
652 * live tuples seen; but if we did not, we should not blindly extrapolate
653 * from that number, since VACUUM may have scanned a quite nonrandom
654 * subset of the table. When we have only partial information, we take
655 * the old value of pg_class.reltuples as a measurement of the
656 * tuple density in the unscanned pages.
657 *
658 * The is_analyze argument is historical.
659 */
660 double
vac_estimate_reltuples(Relation relation,bool is_analyze,BlockNumber total_pages,BlockNumber scanned_pages,double scanned_tuples)661 vac_estimate_reltuples(Relation relation, bool is_analyze,
662 BlockNumber total_pages,
663 BlockNumber scanned_pages,
664 double scanned_tuples)
665 {
666 BlockNumber old_rel_pages = relation->rd_rel->relpages;
667 double old_rel_tuples = relation->rd_rel->reltuples;
668 double old_density;
669 double unscanned_pages;
670 double total_tuples;
671
672 /* If we did scan the whole table, just use the count as-is */
673 if (scanned_pages >= total_pages)
674 return scanned_tuples;
675
676 /*
677 * If scanned_pages is zero but total_pages isn't, keep the existing value
678 * of reltuples. (Note: callers should avoid updating the pg_class
679 * statistics in this situation, since no new information has been
680 * provided.)
681 */
682 if (scanned_pages == 0)
683 return old_rel_tuples;
684
685 /*
686 * If old value of relpages is zero, old density is indeterminate; we
687 * can't do much except scale up scanned_tuples to match total_pages.
688 */
689 if (old_rel_pages == 0)
690 return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
691
692 /*
693 * Okay, we've covered the corner cases. The normal calculation is to
694 * convert the old measurement to a density (tuples per page), then
695 * estimate the number of tuples in the unscanned pages using that figure,
696 * and finally add on the number of tuples in the scanned pages.
697 */
698 old_density = old_rel_tuples / old_rel_pages;
699 unscanned_pages = (double) total_pages - (double) scanned_pages;
700 total_tuples = old_density * unscanned_pages + scanned_tuples;
701 return floor(total_tuples + 0.5);
702 }
703
704
705 /*
706 * vac_update_relstats() -- update statistics for one relation
707 *
708 * Update the whole-relation statistics that are kept in its pg_class
709 * row. There are additional stats that will be updated if we are
710 * doing ANALYZE, but we always update these stats. This routine works
711 * for both index and heap relation entries in pg_class.
712 *
713 * We violate transaction semantics here by overwriting the rel's
714 * existing pg_class tuple with the new values. This is reasonably
715 * safe as long as we're sure that the new values are correct whether or
716 * not this transaction commits. The reason for doing this is that if
717 * we updated these tuples in the usual way, vacuuming pg_class itself
718 * wouldn't work very well --- by the time we got done with a vacuum
719 * cycle, most of the tuples in pg_class would've been obsoleted. Of
720 * course, this only works for fixed-size not-null columns, but these are.
721 *
722 * Another reason for doing it this way is that when we are in a lazy
723 * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
724 * Somebody vacuuming pg_class might think they could delete a tuple
725 * marked with xmin = our xid.
726 *
727 * In addition to fundamentally nontransactional statistics such as
728 * relpages and relallvisible, we try to maintain certain lazily-updated
729 * DDL flags such as relhasindex, by clearing them if no longer correct.
730 * It's safe to do this in VACUUM, which can't run in parallel with
731 * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
732 * However, it's *not* safe to do it in an ANALYZE that's within an
733 * outer transaction, because for example the current transaction might
734 * have dropped the last index; then we'd think relhasindex should be
735 * cleared, but if the transaction later rolls back this would be wrong.
736 * So we refrain from updating the DDL flags if we're inside an outer
737 * transaction. This is OK since postponing the flag maintenance is
738 * always allowable.
739 *
740 * This routine is shared by VACUUM and ANALYZE.
741 */
742 void
vac_update_relstats(Relation relation,BlockNumber num_pages,double num_tuples,BlockNumber num_all_visible_pages,bool hasindex,TransactionId frozenxid,MultiXactId minmulti,bool in_outer_xact)743 vac_update_relstats(Relation relation,
744 BlockNumber num_pages, double num_tuples,
745 BlockNumber num_all_visible_pages,
746 bool hasindex, TransactionId frozenxid,
747 MultiXactId minmulti,
748 bool in_outer_xact)
749 {
750 Oid relid = RelationGetRelid(relation);
751 Relation rd;
752 HeapTuple ctup;
753 Form_pg_class pgcform;
754 bool dirty;
755
756 rd = heap_open(RelationRelationId, RowExclusiveLock);
757
758 /* Fetch a copy of the tuple to scribble on */
759 ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
760 if (!HeapTupleIsValid(ctup))
761 elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
762 relid);
763 pgcform = (Form_pg_class) GETSTRUCT(ctup);
764
765 /* Apply statistical updates, if any, to copied tuple */
766
767 dirty = false;
768 if (pgcform->relpages != (int32) num_pages)
769 {
770 pgcform->relpages = (int32) num_pages;
771 dirty = true;
772 }
773 if (pgcform->reltuples != (float4) num_tuples)
774 {
775 pgcform->reltuples = (float4) num_tuples;
776 dirty = true;
777 }
778 if (pgcform->relallvisible != (int32) num_all_visible_pages)
779 {
780 pgcform->relallvisible = (int32) num_all_visible_pages;
781 dirty = true;
782 }
783
784 /* Apply DDL updates, but not inside an outer transaction (see above) */
785
786 if (!in_outer_xact)
787 {
788 /*
789 * If we didn't find any indexes, reset relhasindex.
790 */
791 if (pgcform->relhasindex && !hasindex)
792 {
793 pgcform->relhasindex = false;
794 dirty = true;
795 }
796
797 /*
798 * If we have discovered that there are no indexes, then there's no
799 * primary key either. This could be done more thoroughly...
800 */
801 if (pgcform->relhaspkey && !hasindex)
802 {
803 pgcform->relhaspkey = false;
804 dirty = true;
805 }
806
807 /* We also clear relhasrules and relhastriggers if needed */
808 if (pgcform->relhasrules && relation->rd_rules == NULL)
809 {
810 pgcform->relhasrules = false;
811 dirty = true;
812 }
813 if (pgcform->relhastriggers && relation->trigdesc == NULL)
814 {
815 pgcform->relhastriggers = false;
816 dirty = true;
817 }
818 }
819
820 /*
821 * Update relfrozenxid, unless caller passed InvalidTransactionId
822 * indicating it has no new data.
823 *
824 * Ordinarily, we don't let relfrozenxid go backwards: if things are
825 * working correctly, the only way the new frozenxid could be older would
826 * be if a previous VACUUM was done with a tighter freeze_min_age, in
827 * which case we don't want to forget the work it already did. However,
828 * if the stored relfrozenxid is "in the future", then it must be corrupt
829 * and it seems best to overwrite it with the cutoff we used this time.
830 * This should match vac_update_datfrozenxid() concerning what we consider
831 * to be "in the future".
832 */
833 if (TransactionIdIsNormal(frozenxid) &&
834 pgcform->relfrozenxid != frozenxid &&
835 (TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
836 TransactionIdPrecedes(ReadNewTransactionId(),
837 pgcform->relfrozenxid)))
838 {
839 pgcform->relfrozenxid = frozenxid;
840 dirty = true;
841 }
842
843 /* Similarly for relminmxid */
844 if (MultiXactIdIsValid(minmulti) &&
845 pgcform->relminmxid != minmulti &&
846 (MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
847 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
848 {
849 pgcform->relminmxid = minmulti;
850 dirty = true;
851 }
852
853 /* If anything changed, write out the tuple. */
854 if (dirty)
855 heap_inplace_update(rd, ctup);
856
857 heap_close(rd, RowExclusiveLock);
858 }
859
860
861 /*
862 * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
863 *
864 * Update pg_database's datfrozenxid entry for our database to be the
865 * minimum of the pg_class.relfrozenxid values.
866 *
867 * Similarly, update our datminmxid to be the minimum of the
868 * pg_class.relminmxid values.
869 *
870 * If we are able to advance either pg_database value, also try to
871 * truncate pg_clog and pg_multixact.
872 *
873 * We violate transaction semantics here by overwriting the database's
874 * existing pg_database tuple with the new values. This is reasonably
875 * safe since the new values are correct whether or not this transaction
876 * commits. As with vac_update_relstats, this avoids leaving dead tuples
877 * behind after a VACUUM.
878 */
879 void
vac_update_datfrozenxid(void)880 vac_update_datfrozenxid(void)
881 {
882 HeapTuple tuple;
883 Form_pg_database dbform;
884 Relation relation;
885 SysScanDesc scan;
886 HeapTuple classTup;
887 TransactionId newFrozenXid;
888 MultiXactId newMinMulti;
889 TransactionId lastSaneFrozenXid;
890 MultiXactId lastSaneMinMulti;
891 bool bogus = false;
892 bool dirty = false;
893
894 /*
895 * Restrict this task to one backend per database. This avoids race
896 * conditions that would move datfrozenxid or datminmxid backward. It
897 * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
898 * datfrozenxid passed to an earlier vac_truncate_clog() call.
899 */
900 LockDatabaseFrozenIds(ExclusiveLock);
901
902 /*
903 * Initialize the "min" calculation with GetOldestXmin, which is a
904 * reasonable approximation to the minimum relfrozenxid for not-yet-
905 * committed pg_class entries for new tables; see AddNewRelationTuple().
906 * So we cannot produce a wrong minimum by starting with this.
907 */
908 newFrozenXid = GetOldestXmin(NULL, true);
909
910 /*
911 * Similarly, initialize the MultiXact "min" with the value that would be
912 * used on pg_class for new tables. See AddNewRelationTuple().
913 */
914 newMinMulti = GetOldestMultiXactId();
915
916 /*
917 * Identify the latest relfrozenxid and relminmxid values that we could
918 * validly see during the scan. These are conservative values, but it's
919 * not really worth trying to be more exact.
920 */
921 lastSaneFrozenXid = ReadNewTransactionId();
922 lastSaneMinMulti = ReadNextMultiXactId();
923
924 /*
925 * We must seqscan pg_class to find the minimum Xid, because there is no
926 * index that can help us here.
927 */
928 relation = heap_open(RelationRelationId, AccessShareLock);
929
930 scan = systable_beginscan(relation, InvalidOid, false,
931 NULL, 0, NULL);
932
933 while ((classTup = systable_getnext(scan)) != NULL)
934 {
935 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
936
937 /*
938 * Only consider relations able to hold unfrozen XIDs (anything else
939 * should have InvalidTransactionId in relfrozenxid anyway.)
940 */
941 if (classForm->relkind != RELKIND_RELATION &&
942 classForm->relkind != RELKIND_MATVIEW &&
943 classForm->relkind != RELKIND_TOASTVALUE)
944 continue;
945
946 Assert(TransactionIdIsNormal(classForm->relfrozenxid));
947 Assert(MultiXactIdIsValid(classForm->relminmxid));
948
949 /*
950 * If things are working properly, no relation should have a
951 * relfrozenxid or relminmxid that is "in the future". However, such
952 * cases have been known to arise due to bugs in pg_upgrade. If we
953 * see any entries that are "in the future", chicken out and don't do
954 * anything. This ensures we won't truncate clog before those
955 * relations have been scanned and cleaned up.
956 */
957 if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid) ||
958 MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
959 {
960 bogus = true;
961 break;
962 }
963
964 if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
965 newFrozenXid = classForm->relfrozenxid;
966
967 if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
968 newMinMulti = classForm->relminmxid;
969 }
970
971 /* we're done with pg_class */
972 systable_endscan(scan);
973 heap_close(relation, AccessShareLock);
974
975 /* chicken out if bogus data found */
976 if (bogus)
977 return;
978
979 Assert(TransactionIdIsNormal(newFrozenXid));
980 Assert(MultiXactIdIsValid(newMinMulti));
981
982 /* Now fetch the pg_database tuple we need to update. */
983 relation = heap_open(DatabaseRelationId, RowExclusiveLock);
984
985 /* Fetch a copy of the tuple to scribble on */
986 tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
987 if (!HeapTupleIsValid(tuple))
988 elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
989 dbform = (Form_pg_database) GETSTRUCT(tuple);
990
991 /*
992 * As in vac_update_relstats(), we ordinarily don't want to let
993 * datfrozenxid go backward; but if it's "in the future" then it must be
994 * corrupt and it seems best to overwrite it.
995 */
996 if (dbform->datfrozenxid != newFrozenXid &&
997 (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
998 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
999 {
1000 dbform->datfrozenxid = newFrozenXid;
1001 dirty = true;
1002 }
1003 else
1004 newFrozenXid = dbform->datfrozenxid;
1005
1006 /* Ditto for datminmxid */
1007 if (dbform->datminmxid != newMinMulti &&
1008 (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1009 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1010 {
1011 dbform->datminmxid = newMinMulti;
1012 dirty = true;
1013 }
1014 else
1015 newMinMulti = dbform->datminmxid;
1016
1017 if (dirty)
1018 heap_inplace_update(relation, tuple);
1019
1020 heap_freetuple(tuple);
1021 heap_close(relation, RowExclusiveLock);
1022
1023 /*
1024 * If we were able to advance datfrozenxid or datminmxid, see if we can
1025 * truncate pg_clog and/or pg_multixact. Also do it if the shared
1026 * XID-wrap-limit info is stale, since this action will update that too.
1027 */
1028 if (dirty || ForceTransactionIdLimitUpdate())
1029 vac_truncate_clog(newFrozenXid, newMinMulti,
1030 lastSaneFrozenXid, lastSaneMinMulti);
1031 }
1032
1033
1034 /*
1035 * vac_truncate_clog() -- attempt to truncate the commit log
1036 *
1037 * Scan pg_database to determine the system-wide oldest datfrozenxid,
1038 * and use it to truncate the transaction commit log (pg_clog).
1039 * Also update the XID wrap limit info maintained by varsup.c.
1040 * Likewise for datminmxid.
1041 *
1042 * The passed frozenXID and minMulti are the updated values for my own
1043 * pg_database entry. They're used to initialize the "min" calculations.
1044 * The caller also passes the "last sane" XID and MXID, since it has
1045 * those at hand already.
1046 *
1047 * This routine is only invoked when we've managed to change our
1048 * DB's datfrozenxid/datminmxid values, or we found that the shared
1049 * XID-wrap-limit info is stale.
1050 */
1051 static void
vac_truncate_clog(TransactionId frozenXID,MultiXactId minMulti,TransactionId lastSaneFrozenXid,MultiXactId lastSaneMinMulti)1052 vac_truncate_clog(TransactionId frozenXID,
1053 MultiXactId minMulti,
1054 TransactionId lastSaneFrozenXid,
1055 MultiXactId lastSaneMinMulti)
1056 {
1057 TransactionId nextXID = ReadNewTransactionId();
1058 Relation relation;
1059 HeapScanDesc scan;
1060 HeapTuple tuple;
1061 Oid oldestxid_datoid;
1062 Oid minmulti_datoid;
1063 bool bogus = false;
1064 bool frozenAlreadyWrapped = false;
1065
1066 /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1067 LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1068
1069 /* init oldest datoids to sync with my frozenXID/minMulti values */
1070 oldestxid_datoid = MyDatabaseId;
1071 minmulti_datoid = MyDatabaseId;
1072
1073 /*
1074 * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1075 *
1076 * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1077 * the values could change while we look at them. Fetch each one just
1078 * once to ensure sane behavior of the comparison logic. (Here, as in
1079 * many other places, we assume that fetching or updating an XID in shared
1080 * storage is atomic.)
1081 *
1082 * Note: we need not worry about a race condition with new entries being
1083 * inserted by CREATE DATABASE. Any such entry will have a copy of some
1084 * existing DB's datfrozenxid, and that source DB cannot be ours because
1085 * of the interlock against copying a DB containing an active backend.
1086 * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1087 * concurrently modify the datfrozenxid's of different databases, the
1088 * worst possible outcome is that pg_clog is not truncated as aggressively
1089 * as it could be.
1090 */
1091 relation = heap_open(DatabaseRelationId, AccessShareLock);
1092
1093 scan = heap_beginscan_catalog(relation, 0, NULL);
1094
1095 while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1096 {
1097 volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1098 TransactionId datfrozenxid = dbform->datfrozenxid;
1099 TransactionId datminmxid = dbform->datminmxid;
1100
1101 Assert(TransactionIdIsNormal(datfrozenxid));
1102 Assert(MultiXactIdIsValid(datminmxid));
1103
1104 /*
1105 * If things are working properly, no database should have a
1106 * datfrozenxid or datminmxid that is "in the future". However, such
1107 * cases have been known to arise due to bugs in pg_upgrade. If we
1108 * see any entries that are "in the future", chicken out and don't do
1109 * anything. This ensures we won't truncate clog before those
1110 * databases have been scanned and cleaned up. (We will issue the
1111 * "already wrapped" warning if appropriate, though.)
1112 */
1113 if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1114 MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1115 bogus = true;
1116
1117 if (TransactionIdPrecedes(nextXID, datfrozenxid))
1118 frozenAlreadyWrapped = true;
1119 else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1120 {
1121 frozenXID = datfrozenxid;
1122 oldestxid_datoid = HeapTupleGetOid(tuple);
1123 }
1124
1125 if (MultiXactIdPrecedes(datminmxid, minMulti))
1126 {
1127 minMulti = datminmxid;
1128 minmulti_datoid = HeapTupleGetOid(tuple);
1129 }
1130 }
1131
1132 heap_endscan(scan);
1133
1134 heap_close(relation, AccessShareLock);
1135
1136 /*
1137 * Do not truncate CLOG if we seem to have suffered wraparound already;
1138 * the computed minimum XID might be bogus. This case should now be
1139 * impossible due to the defenses in GetNewTransactionId, but we keep the
1140 * test anyway.
1141 */
1142 if (frozenAlreadyWrapped)
1143 {
1144 ereport(WARNING,
1145 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1146 errdetail("You might have already suffered transaction-wraparound data loss.")));
1147 return;
1148 }
1149
1150 /* chicken out if data is bogus in any other way */
1151 if (bogus)
1152 return;
1153
1154 /*
1155 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1156 */
1157 TruncateCLOG(frozenXID);
1158 TruncateCommitTs(frozenXID);
1159 TruncateMultiXact(minMulti, minmulti_datoid);
1160
1161 /*
1162 * Update the wrap limit for GetNewTransactionId and creation of new
1163 * MultiXactIds. Note: these functions will also signal the postmaster
1164 * for an(other) autovac cycle if needed. XXX should we avoid possibly
1165 * signalling twice?
1166 */
1167 SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1168 SetMultiXactIdLimit(minMulti, minmulti_datoid);
1169 AdvanceOldestCommitTsXid(frozenXID);
1170
1171 LWLockRelease(WrapLimitsVacuumLock);
1172 }
1173
1174
1175 /*
1176 * vacuum_rel() -- vacuum one heap relation
1177 *
1178 * Doing one heap at a time incurs extra overhead, since we need to
1179 * check that the heap exists again just before we vacuum it. The
1180 * reason that we do this is so that vacuuming can be spread across
1181 * many small transactions. Otherwise, two-phase locking would require
1182 * us to lock the entire database during one pass of the vacuum cleaner.
1183 *
1184 * At entry and exit, we are not inside a transaction.
1185 */
1186 static bool
vacuum_rel(Oid relid,RangeVar * relation,int options,VacuumParams * params)1187 vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
1188 {
1189 LOCKMODE lmode;
1190 Relation onerel;
1191 LockRelId onerelid;
1192 Oid toast_relid;
1193 Oid save_userid;
1194 int save_sec_context;
1195 int save_nestlevel;
1196
1197 Assert(params != NULL);
1198
1199 /* Begin a transaction for vacuuming this relation */
1200 StartTransactionCommand();
1201
1202 /*
1203 * Functions in indexes may want a snapshot set. Also, setting a snapshot
1204 * ensures that RecentGlobalXmin is kept truly recent.
1205 */
1206 PushActiveSnapshot(GetTransactionSnapshot());
1207
1208 if (!(options & VACOPT_FULL))
1209 {
1210 /*
1211 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1212 * other concurrent VACUUMs know that they can ignore this one while
1213 * determining their OldestXmin. (The reason we don't set it during a
1214 * full VACUUM is exactly that we may have to run user-defined
1215 * functions for functional indexes, and we want to make sure that if
1216 * they use the snapshot set above, any tuples it requires can't get
1217 * removed from other tables. An index function that depends on the
1218 * contents of other tables is arguably broken, but we won't break it
1219 * here by violating transaction semantics.)
1220 *
1221 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1222 * autovacuum; it's used to avoid canceling a vacuum that was invoked
1223 * in an emergency.
1224 *
1225 * Note: these flags remain set until CommitTransaction or
1226 * AbortTransaction. We don't want to clear them until we reset
1227 * MyPgXact->xid/xmin, else OldestXmin might appear to go backwards,
1228 * which is probably Not Good.
1229 */
1230 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1231 MyPgXact->vacuumFlags |= PROC_IN_VACUUM;
1232 if (params->is_wraparound)
1233 MyPgXact->vacuumFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1234 LWLockRelease(ProcArrayLock);
1235 }
1236
1237 /*
1238 * Check for user-requested abort. Note we want this to be inside a
1239 * transaction, so xact.c doesn't issue useless WARNING.
1240 */
1241 CHECK_FOR_INTERRUPTS();
1242
1243 /*
1244 * Determine the type of lock we want --- hard exclusive lock for a FULL
1245 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
1246 * way, we can be sure that no other backend is vacuuming the same table.
1247 */
1248 lmode = (options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock;
1249
1250 /*
1251 * Open the relation and get the appropriate lock on it.
1252 *
1253 * There's a race condition here: the rel may have gone away since the
1254 * last time we saw it. If so, we don't need to vacuum it.
1255 *
1256 * If we've been asked not to wait for the relation lock, acquire it first
1257 * in non-blocking mode, before calling try_relation_open().
1258 */
1259 if (!(options & VACOPT_NOWAIT))
1260 onerel = try_relation_open(relid, lmode);
1261 else if (ConditionalLockRelationOid(relid, lmode))
1262 onerel = try_relation_open(relid, NoLock);
1263 else
1264 {
1265 onerel = NULL;
1266 if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
1267 ereport(LOG,
1268 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
1269 errmsg("skipping vacuum of \"%s\" --- lock not available",
1270 relation->relname)));
1271 }
1272
1273 if (!onerel)
1274 {
1275 PopActiveSnapshot();
1276 CommitTransactionCommand();
1277 return false;
1278 }
1279
1280 /*
1281 * Check permissions.
1282 *
1283 * We allow the user to vacuum a table if he is superuser, the table
1284 * owner, or the database owner (but in the latter case, only if it's not
1285 * a shared relation). pg_class_ownercheck includes the superuser case.
1286 *
1287 * Note we choose to treat permissions failure as a WARNING and keep
1288 * trying to vacuum the rest of the DB --- is this appropriate?
1289 */
1290 if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||
1291 (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))
1292 {
1293 if (onerel->rd_rel->relisshared)
1294 ereport(WARNING,
1295 (errmsg("skipping \"%s\" --- only superuser can vacuum it",
1296 RelationGetRelationName(onerel))));
1297 else if (onerel->rd_rel->relnamespace == PG_CATALOG_NAMESPACE)
1298 ereport(WARNING,
1299 (errmsg("skipping \"%s\" --- only superuser or database owner can vacuum it",
1300 RelationGetRelationName(onerel))));
1301 else
1302 ereport(WARNING,
1303 (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
1304 RelationGetRelationName(onerel))));
1305 relation_close(onerel, lmode);
1306 PopActiveSnapshot();
1307 CommitTransactionCommand();
1308 return false;
1309 }
1310
1311 /*
1312 * Check that it's a vacuumable relation; we used to do this in
1313 * get_rel_oids() but seems safer to check after we've locked the
1314 * relation.
1315 */
1316 if (onerel->rd_rel->relkind != RELKIND_RELATION &&
1317 onerel->rd_rel->relkind != RELKIND_MATVIEW &&
1318 onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
1319 {
1320 ereport(WARNING,
1321 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
1322 RelationGetRelationName(onerel))));
1323 relation_close(onerel, lmode);
1324 PopActiveSnapshot();
1325 CommitTransactionCommand();
1326 return false;
1327 }
1328
1329 /*
1330 * Silently ignore tables that are temp tables of other backends ---
1331 * trying to vacuum these will lead to great unhappiness, since their
1332 * contents are probably not up-to-date on disk. (We don't throw a
1333 * warning here; it would just lead to chatter during a database-wide
1334 * VACUUM.)
1335 */
1336 if (RELATION_IS_OTHER_TEMP(onerel))
1337 {
1338 relation_close(onerel, lmode);
1339 PopActiveSnapshot();
1340 CommitTransactionCommand();
1341 return false;
1342 }
1343
1344 /*
1345 * Get a session-level lock too. This will protect our access to the
1346 * relation across multiple transactions, so that we can vacuum the
1347 * relation's TOAST table (if any) secure in the knowledge that no one is
1348 * deleting the parent relation.
1349 *
1350 * NOTE: this cannot block, even if someone else is waiting for access,
1351 * because the lock manager knows that both lock requests are from the
1352 * same process.
1353 */
1354 onerelid = onerel->rd_lockInfo.lockRelId;
1355 LockRelationIdForSession(&onerelid, lmode);
1356
1357 /*
1358 * Remember the relation's TOAST relation for later, if the caller asked
1359 * us to process it. In VACUUM FULL, though, the toast table is
1360 * automatically rebuilt by cluster_rel so we shouldn't recurse to it.
1361 */
1362 if (!(options & VACOPT_SKIPTOAST) && !(options & VACOPT_FULL))
1363 toast_relid = onerel->rd_rel->reltoastrelid;
1364 else
1365 toast_relid = InvalidOid;
1366
1367 /*
1368 * Switch to the table owner's userid, so that any index functions are run
1369 * as that user. Also lock down security-restricted operations and
1370 * arrange to make GUC variable changes local to this command. (This is
1371 * unnecessary, but harmless, for lazy VACUUM.)
1372 */
1373 GetUserIdAndSecContext(&save_userid, &save_sec_context);
1374 SetUserIdAndSecContext(onerel->rd_rel->relowner,
1375 save_sec_context | SECURITY_RESTRICTED_OPERATION);
1376 save_nestlevel = NewGUCNestLevel();
1377
1378 /*
1379 * Do the actual work --- either FULL or "lazy" vacuum
1380 */
1381 if (options & VACOPT_FULL)
1382 {
1383 /* close relation before vacuuming, but hold lock until commit */
1384 relation_close(onerel, NoLock);
1385 onerel = NULL;
1386
1387 /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
1388 cluster_rel(relid, InvalidOid, false,
1389 (options & VACOPT_VERBOSE) != 0);
1390 }
1391 else
1392 lazy_vacuum_rel(onerel, options, params, vac_strategy);
1393
1394 /* Roll back any GUC changes executed by index functions */
1395 AtEOXact_GUC(false, save_nestlevel);
1396
1397 /* Restore userid and security context */
1398 SetUserIdAndSecContext(save_userid, save_sec_context);
1399
1400 /* all done with this class, but hold lock until commit */
1401 if (onerel)
1402 relation_close(onerel, NoLock);
1403
1404 /*
1405 * Complete the transaction and free all temporary memory used.
1406 */
1407 PopActiveSnapshot();
1408 CommitTransactionCommand();
1409
1410 /*
1411 * If the relation has a secondary toast rel, vacuum that too while we
1412 * still hold the session lock on the master table. Note however that
1413 * "analyze" will not get done on the toast table. This is good, because
1414 * the toaster always uses hardcoded index access and statistics are
1415 * totally unimportant for toast relations.
1416 */
1417 if (toast_relid != InvalidOid)
1418 vacuum_rel(toast_relid, relation, options, params);
1419
1420 /*
1421 * Now release the session-level lock on the master table.
1422 */
1423 UnlockRelationIdForSession(&onerelid, lmode);
1424
1425 /* Report that we really did it. */
1426 return true;
1427 }
1428
1429
1430 /*
1431 * Open all the vacuumable indexes of the given relation, obtaining the
1432 * specified kind of lock on each. Return an array of Relation pointers for
1433 * the indexes into *Irel, and the number of indexes into *nindexes.
1434 *
1435 * We consider an index vacuumable if it is marked insertable (IndexIsReady).
1436 * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
1437 * execution, and what we have is too corrupt to be processable. We will
1438 * vacuum even if the index isn't indisvalid; this is important because in a
1439 * unique index, uniqueness checks will be performed anyway and had better not
1440 * hit dangling index pointers.
1441 */
1442 void
vac_open_indexes(Relation relation,LOCKMODE lockmode,int * nindexes,Relation ** Irel)1443 vac_open_indexes(Relation relation, LOCKMODE lockmode,
1444 int *nindexes, Relation **Irel)
1445 {
1446 List *indexoidlist;
1447 ListCell *indexoidscan;
1448 int i;
1449
1450 Assert(lockmode != NoLock);
1451
1452 indexoidlist = RelationGetIndexList(relation);
1453
1454 /* allocate enough memory for all indexes */
1455 i = list_length(indexoidlist);
1456
1457 if (i > 0)
1458 *Irel = (Relation *) palloc(i * sizeof(Relation));
1459 else
1460 *Irel = NULL;
1461
1462 /* collect just the ready indexes */
1463 i = 0;
1464 foreach(indexoidscan, indexoidlist)
1465 {
1466 Oid indexoid = lfirst_oid(indexoidscan);
1467 Relation indrel;
1468
1469 indrel = index_open(indexoid, lockmode);
1470 if (IndexIsReady(indrel->rd_index))
1471 (*Irel)[i++] = indrel;
1472 else
1473 index_close(indrel, lockmode);
1474 }
1475
1476 *nindexes = i;
1477
1478 list_free(indexoidlist);
1479 }
1480
1481 /*
1482 * Release the resources acquired by vac_open_indexes. Optionally release
1483 * the locks (say NoLock to keep 'em).
1484 */
1485 void
vac_close_indexes(int nindexes,Relation * Irel,LOCKMODE lockmode)1486 vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
1487 {
1488 if (Irel == NULL)
1489 return;
1490
1491 while (nindexes--)
1492 {
1493 Relation ind = Irel[nindexes];
1494
1495 index_close(ind, lockmode);
1496 }
1497 pfree(Irel);
1498 }
1499
1500 /*
1501 * vacuum_delay_point --- check for interrupts and cost-based delay.
1502 *
1503 * This should be called in each major loop of VACUUM processing,
1504 * typically once per page processed.
1505 */
1506 void
vacuum_delay_point(void)1507 vacuum_delay_point(void)
1508 {
1509 /* Always check for interrupts */
1510 CHECK_FOR_INTERRUPTS();
1511
1512 /* Nap if appropriate */
1513 if (VacuumCostActive && !InterruptPending &&
1514 VacuumCostBalance >= VacuumCostLimit)
1515 {
1516 int msec;
1517
1518 msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
1519 if (msec > VacuumCostDelay * 4)
1520 msec = VacuumCostDelay * 4;
1521
1522 pg_usleep(msec * 1000L);
1523
1524 VacuumCostBalance = 0;
1525
1526 /* update balance values for workers */
1527 AutoVacuumUpdateDelay();
1528
1529 /* Might have gotten an interrupt while sleeping */
1530 CHECK_FOR_INTERRUPTS();
1531 }
1532 }
1533