1 /*-------------------------------------------------------------------------
2 *
3 * autovacuum.c
4 *
5 * PostgreSQL Integrated Autovacuum Daemon
6 *
7 * The autovacuum system is structured in two different kinds of processes: the
8 * autovacuum launcher and the autovacuum worker. The launcher is an
9 * always-running process, started by the postmaster when the autovacuum GUC
10 * parameter is set. The launcher schedules autovacuum workers to be started
11 * when appropriate. The workers are the processes which execute the actual
12 * vacuuming; they connect to a database as determined in the launcher, and
13 * once connected they examine the catalogs to select the tables to vacuum.
14 *
15 * The autovacuum launcher cannot start the worker processes by itself,
16 * because doing so would cause robustness issues (namely, failure to shut
17 * them down on exceptional conditions, and also, since the launcher is
18 * connected to shared memory and is thus subject to corruption there, it is
19 * not as robust as the postmaster). So it leaves that task to the postmaster.
20 *
21 * There is an autovacuum shared memory area, where the launcher stores
22 * information about the database it wants vacuumed. When it wants a new
23 * worker to start, it sets a flag in shared memory and sends a signal to the
24 * postmaster. Then postmaster knows nothing more than it must start a worker;
25 * so it forks a new child, which turns into a worker. This new process
26 * connects to shared memory, and there it can inspect the information that the
27 * launcher has set up.
28 *
29 * If the fork() call fails in the postmaster, it sets a flag in the shared
30 * memory area, and sends a signal to the launcher. The launcher, upon
31 * noticing the flag, can try starting the worker again by resending the
32 * signal. Note that the failure can only be transient (fork failure due to
33 * high load, memory pressure, too many processes, etc); more permanent
34 * problems, like failure to connect to a database, are detected later in the
35 * worker and dealt with just by having the worker exit normally. The launcher
36 * will launch a new worker again later, per schedule.
37 *
38 * When the worker is done vacuuming it sends SIGUSR2 to the launcher. The
39 * launcher then wakes up and is able to launch another worker, if the schedule
40 * is so tight that a new worker is needed immediately. At this time the
41 * launcher can also balance the settings for the various remaining workers'
42 * cost-based vacuum delay feature.
43 *
44 * Note that there can be more than one worker in a database concurrently.
45 * They will store the table they are currently vacuuming in shared memory, so
46 * that other workers avoid being blocked waiting for the vacuum lock for that
47 * table. They will also reload the pgstats data just before vacuuming each
48 * table, to avoid vacuuming a table that was just finished being vacuumed by
49 * another worker and thus is no longer noted in shared memory. However,
50 * there is a window (caused by pgstat delay) on which a worker may choose a
51 * table that was already vacuumed; this is a bug in the current design.
52 *
53 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
54 * Portions Copyright (c) 1994, Regents of the University of California
55 *
56 *
57 * IDENTIFICATION
58 * src/backend/postmaster/autovacuum.c
59 *
60 *-------------------------------------------------------------------------
61 */
62 #include "postgres.h"
63
64 #include <signal.h>
65 #include <sys/time.h>
66 #include <unistd.h>
67
68 #include "access/heapam.h"
69 #include "access/htup_details.h"
70 #include "access/multixact.h"
71 #include "access/reloptions.h"
72 #include "access/transam.h"
73 #include "access/xact.h"
74 #include "catalog/dependency.h"
75 #include "catalog/namespace.h"
76 #include "catalog/pg_database.h"
77 #include "commands/dbcommands.h"
78 #include "commands/vacuum.h"
79 #include "lib/ilist.h"
80 #include "libpq/pqsignal.h"
81 #include "miscadmin.h"
82 #include "pgstat.h"
83 #include "postmaster/autovacuum.h"
84 #include "postmaster/fork_process.h"
85 #include "postmaster/postmaster.h"
86 #include "storage/bufmgr.h"
87 #include "storage/ipc.h"
88 #include "storage/latch.h"
89 #include "storage/lmgr.h"
90 #include "storage/pmsignal.h"
91 #include "storage/proc.h"
92 #include "storage/procsignal.h"
93 #include "storage/sinvaladt.h"
94 #include "storage/smgr.h"
95 #include "tcop/tcopprot.h"
96 #include "utils/fmgroids.h"
97 #include "utils/fmgrprotos.h"
98 #include "utils/lsyscache.h"
99 #include "utils/memutils.h"
100 #include "utils/ps_status.h"
101 #include "utils/rel.h"
102 #include "utils/snapmgr.h"
103 #include "utils/syscache.h"
104 #include "utils/timeout.h"
105 #include "utils/timestamp.h"
106 #include "utils/tqual.h"
107
108
109 /*
110 * GUC parameters
111 */
112 bool autovacuum_start_daemon = false;
113 int autovacuum_max_workers;
114 int autovacuum_work_mem = -1;
115 int autovacuum_naptime;
116 int autovacuum_vac_thresh;
117 double autovacuum_vac_scale;
118 int autovacuum_anl_thresh;
119 double autovacuum_anl_scale;
120 int autovacuum_freeze_max_age;
121 int autovacuum_multixact_freeze_max_age;
122
123 int autovacuum_vac_cost_delay;
124 int autovacuum_vac_cost_limit;
125
126 int Log_autovacuum_min_duration = -1;
127
128 /* how long to keep pgstat data in the launcher, in milliseconds */
129 #define STATS_READ_DELAY 1000
130
131 /* the minimum allowed time between two awakenings of the launcher */
132 #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
133 #define MAX_AUTOVAC_SLEEPTIME 300 /* seconds */
134
135 /* Flags to tell if we are in an autovacuum process */
136 static bool am_autovacuum_launcher = false;
137 static bool am_autovacuum_worker = false;
138
139 /* Flags set by signal handlers */
140 static volatile sig_atomic_t got_SIGHUP = false;
141 static volatile sig_atomic_t got_SIGUSR2 = false;
142 static volatile sig_atomic_t got_SIGTERM = false;
143
144 /* Comparison points for determining whether freeze_max_age is exceeded */
145 static TransactionId recentXid;
146 static MultiXactId recentMulti;
147
148 /* Default freeze ages to use for autovacuum (varies by database) */
149 static int default_freeze_min_age;
150 static int default_freeze_table_age;
151 static int default_multixact_freeze_min_age;
152 static int default_multixact_freeze_table_age;
153
154 /* Memory context for long-lived data */
155 static MemoryContext AutovacMemCxt;
156
157 /* struct to keep track of databases in launcher */
158 typedef struct avl_dbase
159 {
160 Oid adl_datid; /* hash key -- must be first */
161 TimestampTz adl_next_worker;
162 int adl_score;
163 dlist_node adl_node;
164 } avl_dbase;
165
166 /* struct to keep track of databases in worker */
167 typedef struct avw_dbase
168 {
169 Oid adw_datid;
170 char *adw_name;
171 TransactionId adw_frozenxid;
172 MultiXactId adw_minmulti;
173 PgStat_StatDBEntry *adw_entry;
174 } avw_dbase;
175
176 /* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
177 typedef struct av_relation
178 {
179 Oid ar_toastrelid; /* hash key - must be first */
180 Oid ar_relid;
181 bool ar_hasrelopts;
182 AutoVacOpts ar_reloptions; /* copy of AutoVacOpts from the main table's
183 * reloptions, or NULL if none */
184 } av_relation;
185
186 /* struct to keep track of tables to vacuum and/or analyze, after rechecking */
187 typedef struct autovac_table
188 {
189 Oid at_relid;
190 int at_vacoptions; /* bitmask of VacuumOption */
191 VacuumParams at_params;
192 int at_vacuum_cost_delay;
193 int at_vacuum_cost_limit;
194 bool at_dobalance;
195 bool at_sharedrel;
196 char *at_relname;
197 char *at_nspname;
198 char *at_datname;
199 } autovac_table;
200
201 /*-------------
202 * This struct holds information about a single worker's whereabouts. We keep
203 * an array of these in shared memory, sized according to
204 * autovacuum_max_workers.
205 *
206 * wi_links entry into free list or running list
207 * wi_dboid OID of the database this worker is supposed to work on
208 * wi_tableoid OID of the table currently being vacuumed, if any
209 * wi_sharedrel flag indicating whether table is marked relisshared
210 * wi_proc pointer to PGPROC of the running worker, NULL if not started
211 * wi_launchtime Time at which this worker was launched
212 * wi_cost_* Vacuum cost-based delay parameters current in this worker
213 *
214 * All fields are protected by AutovacuumLock, except for wi_tableoid and
215 * wi_sharedrel which are protected by AutovacuumScheduleLock (note these
216 * two fields are read-only for everyone except that worker itself).
217 *-------------
218 */
219 typedef struct WorkerInfoData
220 {
221 dlist_node wi_links;
222 Oid wi_dboid;
223 Oid wi_tableoid;
224 PGPROC *wi_proc;
225 TimestampTz wi_launchtime;
226 bool wi_dobalance;
227 bool wi_sharedrel;
228 int wi_cost_delay;
229 int wi_cost_limit;
230 int wi_cost_limit_base;
231 } WorkerInfoData;
232
233 typedef struct WorkerInfoData *WorkerInfo;
234
235 /*
236 * Possible signals received by the launcher from remote processes. These are
237 * stored atomically in shared memory so that other processes can set them
238 * without locking.
239 */
240 typedef enum
241 {
242 AutoVacForkFailed, /* failed trying to start a worker */
243 AutoVacRebalance, /* rebalance the cost limits */
244 AutoVacNumSignals /* must be last */
245 } AutoVacuumSignal;
246
247 /*
248 * Autovacuum workitem array, stored in AutoVacuumShmem->av_workItems. This
249 * list is mostly protected by AutovacuumLock, except that if an item is
250 * marked 'active' other processes must not modify the work-identifying
251 * members.
252 */
253 typedef struct AutoVacuumWorkItem
254 {
255 AutoVacuumWorkItemType avw_type;
256 bool avw_used; /* below data is valid */
257 bool avw_active; /* being processed */
258 Oid avw_database;
259 Oid avw_relation;
260 BlockNumber avw_blockNumber;
261 } AutoVacuumWorkItem;
262
263 #define NUM_WORKITEMS 256
264
265 /*-------------
266 * The main autovacuum shmem struct. On shared memory we store this main
267 * struct and the array of WorkerInfo structs. This struct keeps:
268 *
269 * av_signal set by other processes to indicate various conditions
270 * av_launcherpid the PID of the autovacuum launcher
271 * av_freeWorkers the WorkerInfo freelist
272 * av_runningWorkers the WorkerInfo non-free queue
273 * av_startingWorker pointer to WorkerInfo currently being started (cleared by
274 * the worker itself as soon as it's up and running)
275 * av_workItems work item array
276 *
277 * This struct is protected by AutovacuumLock, except for av_signal and parts
278 * of the worker list (see above).
279 *-------------
280 */
281 typedef struct
282 {
283 sig_atomic_t av_signal[AutoVacNumSignals];
284 pid_t av_launcherpid;
285 dlist_head av_freeWorkers;
286 dlist_head av_runningWorkers;
287 WorkerInfo av_startingWorker;
288 AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
289 } AutoVacuumShmemStruct;
290
291 static AutoVacuumShmemStruct *AutoVacuumShmem;
292
293 /*
294 * the database list (of avl_dbase elements) in the launcher, and the context
295 * that contains it
296 */
297 static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList);
298 static MemoryContext DatabaseListCxt = NULL;
299
300 /* Pointer to my own WorkerInfo, valid on each worker */
301 static WorkerInfo MyWorkerInfo = NULL;
302
303 /* PID of launcher, valid only in worker while shutting down */
304 int AutovacuumLauncherPid = 0;
305
306 #ifdef EXEC_BACKEND
307 static pid_t avlauncher_forkexec(void);
308 static pid_t avworker_forkexec(void);
309 #endif
310 NON_EXEC_STATIC void AutoVacWorkerMain(int argc, char *argv[]) pg_attribute_noreturn();
311 NON_EXEC_STATIC void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn();
312
313 static Oid do_start_worker(void);
314 static void launcher_determine_sleep(bool canlaunch, bool recursing,
315 struct timeval *nap);
316 static void launch_worker(TimestampTz now);
317 static List *get_database_list(void);
318 static void rebuild_database_list(Oid newdb);
319 static int db_comparator(const void *a, const void *b);
320 static void autovac_balance_cost(void);
321
322 static void do_autovacuum(void);
323 static void FreeWorkerInfo(int code, Datum arg);
324
325 static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
326 TupleDesc pg_class_desc,
327 int effective_multixact_freeze_max_age);
328 static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
329 Form_pg_class classForm,
330 PgStat_StatTabEntry *tabentry,
331 int effective_multixact_freeze_max_age,
332 bool *dovacuum, bool *doanalyze, bool *wraparound);
333
334 static void autovacuum_do_vac_analyze(autovac_table *tab,
335 BufferAccessStrategy bstrategy);
336 static AutoVacOpts *extract_autovac_opts(HeapTuple tup,
337 TupleDesc pg_class_desc);
338 static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
339 PgStat_StatDBEntry *shared,
340 PgStat_StatDBEntry *dbentry);
341 static void perform_work_item(AutoVacuumWorkItem *workitem);
342 static void autovac_report_activity(autovac_table *tab);
343 static void autovac_report_workitem(AutoVacuumWorkItem *workitem,
344 const char *nspname, const char *relname);
345 static void av_sighup_handler(SIGNAL_ARGS);
346 static void avl_sigusr2_handler(SIGNAL_ARGS);
347 static void avl_sigterm_handler(SIGNAL_ARGS);
348 static void autovac_refresh_stats(void);
349
350
351
352 /********************************************************************
353 * AUTOVACUUM LAUNCHER CODE
354 ********************************************************************/
355
356 #ifdef EXEC_BACKEND
357 /*
358 * forkexec routine for the autovacuum launcher process.
359 *
360 * Format up the arglist, then fork and exec.
361 */
362 static pid_t
avlauncher_forkexec(void)363 avlauncher_forkexec(void)
364 {
365 char *av[10];
366 int ac = 0;
367
368 av[ac++] = "postgres";
369 av[ac++] = "--forkavlauncher";
370 av[ac++] = NULL; /* filled in by postmaster_forkexec */
371 av[ac] = NULL;
372
373 Assert(ac < lengthof(av));
374
375 return postmaster_forkexec(ac, av);
376 }
377
378 /*
379 * We need this set from the outside, before InitProcess is called
380 */
381 void
AutovacuumLauncherIAm(void)382 AutovacuumLauncherIAm(void)
383 {
384 am_autovacuum_launcher = true;
385 }
386 #endif
387
388 /*
389 * Main entry point for autovacuum launcher process, to be called from the
390 * postmaster.
391 */
392 int
StartAutoVacLauncher(void)393 StartAutoVacLauncher(void)
394 {
395 pid_t AutoVacPID;
396
397 #ifdef EXEC_BACKEND
398 switch ((AutoVacPID = avlauncher_forkexec()))
399 #else
400 switch ((AutoVacPID = fork_process()))
401 #endif
402 {
403 case -1:
404 ereport(LOG,
405 (errmsg("could not fork autovacuum launcher process: %m")));
406 return 0;
407
408 #ifndef EXEC_BACKEND
409 case 0:
410 /* in postmaster child ... */
411 InitPostmasterChild();
412
413 /* Close the postmaster's sockets */
414 ClosePostmasterPorts(false);
415
416 AutoVacLauncherMain(0, NULL);
417 break;
418 #endif
419 default:
420 return (int) AutoVacPID;
421 }
422
423 /* shouldn't get here */
424 return 0;
425 }
426
427 /*
428 * Main loop for the autovacuum launcher process.
429 */
430 NON_EXEC_STATIC void
AutoVacLauncherMain(int argc,char * argv[])431 AutoVacLauncherMain(int argc, char *argv[])
432 {
433 sigjmp_buf local_sigjmp_buf;
434
435 am_autovacuum_launcher = true;
436
437 /* Identify myself via ps */
438 init_ps_display("autovacuum launcher process", "", "", "");
439
440 ereport(DEBUG1,
441 (errmsg("autovacuum launcher started")));
442
443 if (PostAuthDelay)
444 pg_usleep(PostAuthDelay * 1000000L);
445
446 SetProcessingMode(InitProcessing);
447
448 /*
449 * Set up signal handlers. We operate on databases much like a regular
450 * backend, so we use the same signal handling. See equivalent code in
451 * tcop/postgres.c.
452 */
453 pqsignal(SIGHUP, av_sighup_handler);
454 pqsignal(SIGINT, StatementCancelHandler);
455 pqsignal(SIGTERM, avl_sigterm_handler);
456
457 pqsignal(SIGQUIT, quickdie);
458 InitializeTimeouts(); /* establishes SIGALRM handler */
459
460 pqsignal(SIGPIPE, SIG_IGN);
461 pqsignal(SIGUSR1, procsignal_sigusr1_handler);
462 pqsignal(SIGUSR2, avl_sigusr2_handler);
463 pqsignal(SIGFPE, FloatExceptionHandler);
464 pqsignal(SIGCHLD, SIG_DFL);
465
466 /* Early initialization */
467 BaseInit();
468
469 /*
470 * Create a per-backend PGPROC struct in shared memory, except in the
471 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
472 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
473 * had to do some stuff with LWLocks).
474 */
475 #ifndef EXEC_BACKEND
476 InitProcess();
477 #endif
478
479 InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL);
480
481 SetProcessingMode(NormalProcessing);
482
483 /*
484 * Create a memory context that we will do all our work in. We do this so
485 * that we can reset the context during error recovery and thereby avoid
486 * possible memory leaks.
487 */
488 AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
489 "Autovacuum Launcher",
490 ALLOCSET_DEFAULT_SIZES);
491 MemoryContextSwitchTo(AutovacMemCxt);
492
493 /*
494 * If an exception is encountered, processing resumes here.
495 *
496 * This code is a stripped down version of PostgresMain error recovery.
497 */
498 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
499 {
500 /* since not using PG_TRY, must reset error stack by hand */
501 error_context_stack = NULL;
502
503 /* Prevents interrupts while cleaning up */
504 HOLD_INTERRUPTS();
505
506 /* Forget any pending QueryCancel or timeout request */
507 disable_all_timeouts(false);
508 QueryCancelPending = false; /* second to avoid race condition */
509
510 /* Report the error to the server log */
511 EmitErrorReport();
512
513 /* Abort the current transaction in order to recover */
514 AbortCurrentTransaction();
515
516 /*
517 * Release any other resources, for the case where we were not in a
518 * transaction.
519 */
520 LWLockReleaseAll();
521 pgstat_report_wait_end();
522 AbortBufferIO();
523 UnlockBuffers();
524 if (CurrentResourceOwner)
525 {
526 ResourceOwnerRelease(CurrentResourceOwner,
527 RESOURCE_RELEASE_BEFORE_LOCKS,
528 false, true);
529 /* we needn't bother with the other ResourceOwnerRelease phases */
530 }
531 AtEOXact_Buffers(false);
532 AtEOXact_SMgr();
533 AtEOXact_Files();
534 AtEOXact_HashTables(false);
535
536 /*
537 * Now return to normal top-level context and clear ErrorContext for
538 * next time.
539 */
540 MemoryContextSwitchTo(AutovacMemCxt);
541 FlushErrorState();
542
543 /* Flush any leaked data in the top-level context */
544 MemoryContextResetAndDeleteChildren(AutovacMemCxt);
545
546 /* don't leave dangling pointers to freed memory */
547 DatabaseListCxt = NULL;
548 dlist_init(&DatabaseList);
549
550 /*
551 * Make sure pgstat also considers our stat data as gone. Note: we
552 * mustn't use autovac_refresh_stats here.
553 */
554 pgstat_clear_snapshot();
555
556 /* Now we can allow interrupts again */
557 RESUME_INTERRUPTS();
558
559 /* if in shutdown mode, no need for anything further; just go away */
560 if (got_SIGTERM)
561 goto shutdown;
562
563 /*
564 * Sleep at least 1 second after any error. We don't want to be
565 * filling the error logs as fast as we can.
566 */
567 pg_usleep(1000000L);
568 }
569
570 /* We can now handle ereport(ERROR) */
571 PG_exception_stack = &local_sigjmp_buf;
572
573 /* must unblock signals before calling rebuild_database_list */
574 PG_SETMASK(&UnBlockSig);
575
576 /*
577 * Set always-secure search path. Launcher doesn't connect to a database,
578 * so this has no effect.
579 */
580 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
581
582 /*
583 * Force zero_damaged_pages OFF in the autovac process, even if it is set
584 * in postgresql.conf. We don't really want such a dangerous option being
585 * applied non-interactively.
586 */
587 SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
588
589 /*
590 * Force settable timeouts off to avoid letting these settings prevent
591 * regular maintenance from being executed.
592 */
593 SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
594 SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
595 SetConfigOption("idle_in_transaction_session_timeout", "0",
596 PGC_SUSET, PGC_S_OVERRIDE);
597
598 /*
599 * Force default_transaction_isolation to READ COMMITTED. We don't want
600 * to pay the overhead of serializable mode, nor add any risk of causing
601 * deadlocks or delaying other transactions.
602 */
603 SetConfigOption("default_transaction_isolation", "read committed",
604 PGC_SUSET, PGC_S_OVERRIDE);
605
606 /*
607 * In emergency mode, just start a worker (unless shutdown was requested)
608 * and go away.
609 */
610 if (!AutoVacuumingActive())
611 {
612 if (!got_SIGTERM)
613 do_start_worker();
614 proc_exit(0); /* done */
615 }
616
617 AutoVacuumShmem->av_launcherpid = MyProcPid;
618
619 /*
620 * Create the initial database list. The invariant we want this list to
621 * keep is that it's ordered by decreasing next_time. As soon as an entry
622 * is updated to a higher time, it will be moved to the front (which is
623 * correct because the only operation is to add autovacuum_naptime to the
624 * entry, and time always increases).
625 */
626 rebuild_database_list(InvalidOid);
627
628 /* loop until shutdown request */
629 while (!got_SIGTERM)
630 {
631 struct timeval nap;
632 TimestampTz current_time = 0;
633 bool can_launch;
634 int rc;
635
636 /*
637 * This loop is a bit different from the normal use of WaitLatch,
638 * because we'd like to sleep before the first launch of a child
639 * process. So it's WaitLatch, then ResetLatch, then check for
640 * wakening conditions.
641 */
642
643 launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),
644 false, &nap);
645
646 /*
647 * Wait until naptime expires or we get some type of signal (all the
648 * signal handlers will wake us by calling SetLatch).
649 */
650 rc = WaitLatch(MyLatch,
651 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
652 (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
653 WAIT_EVENT_AUTOVACUUM_MAIN);
654
655 ResetLatch(MyLatch);
656
657 /* Process sinval catchup interrupts that happened while sleeping */
658 ProcessCatchupInterrupt();
659
660 /*
661 * Emergency bailout if postmaster has died. This is to avoid the
662 * necessity for manual cleanup of all postmaster children.
663 */
664 if (rc & WL_POSTMASTER_DEATH)
665 proc_exit(1);
666
667 /* the normal shutdown case */
668 if (got_SIGTERM)
669 break;
670
671 if (got_SIGHUP)
672 {
673 got_SIGHUP = false;
674 ProcessConfigFile(PGC_SIGHUP);
675
676 /* shutdown requested in config file? */
677 if (!AutoVacuumingActive())
678 break;
679
680 /* rebalance in case the default cost parameters changed */
681 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
682 autovac_balance_cost();
683 LWLockRelease(AutovacuumLock);
684
685 /* rebuild the list in case the naptime changed */
686 rebuild_database_list(InvalidOid);
687 }
688
689 /*
690 * a worker finished, or postmaster signalled failure to start a
691 * worker
692 */
693 if (got_SIGUSR2)
694 {
695 got_SIGUSR2 = false;
696
697 /* rebalance cost limits, if needed */
698 if (AutoVacuumShmem->av_signal[AutoVacRebalance])
699 {
700 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
701 AutoVacuumShmem->av_signal[AutoVacRebalance] = false;
702 autovac_balance_cost();
703 LWLockRelease(AutovacuumLock);
704 }
705
706 if (AutoVacuumShmem->av_signal[AutoVacForkFailed])
707 {
708 /*
709 * If the postmaster failed to start a new worker, we sleep
710 * for a little while and resend the signal. The new worker's
711 * state is still in memory, so this is sufficient. After
712 * that, we restart the main loop.
713 *
714 * XXX should we put a limit to the number of times we retry?
715 * I don't think it makes much sense, because a future start
716 * of a worker will continue to fail in the same way.
717 */
718 AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;
719 pg_usleep(1000000L); /* 1s */
720 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
721 continue;
722 }
723 }
724
725 /*
726 * There are some conditions that we need to check before trying to
727 * start a worker. First, we need to make sure that there is a worker
728 * slot available. Second, we need to make sure that no other worker
729 * failed while starting up.
730 */
731
732 current_time = GetCurrentTimestamp();
733 LWLockAcquire(AutovacuumLock, LW_SHARED);
734
735 can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);
736
737 if (AutoVacuumShmem->av_startingWorker != NULL)
738 {
739 int waittime;
740 WorkerInfo worker = AutoVacuumShmem->av_startingWorker;
741
742 /*
743 * We can't launch another worker when another one is still
744 * starting up (or failed while doing so), so just sleep for a bit
745 * more; that worker will wake us up again as soon as it's ready.
746 * We will only wait autovacuum_naptime seconds (up to a maximum
747 * of 60 seconds) for this to happen however. Note that failure
748 * to connect to a particular database is not a problem here,
749 * because the worker removes itself from the startingWorker
750 * pointer before trying to connect. Problems detected by the
751 * postmaster (like fork() failure) are also reported and handled
752 * differently. The only problems that may cause this code to
753 * fire are errors in the earlier sections of AutoVacWorkerMain,
754 * before the worker removes the WorkerInfo from the
755 * startingWorker pointer.
756 */
757 waittime = Min(autovacuum_naptime, 60) * 1000;
758 if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,
759 waittime))
760 {
761 LWLockRelease(AutovacuumLock);
762 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
763
764 /*
765 * No other process can put a worker in starting mode, so if
766 * startingWorker is still INVALID after exchanging our lock,
767 * we assume it's the same one we saw above (so we don't
768 * recheck the launch time).
769 */
770 if (AutoVacuumShmem->av_startingWorker != NULL)
771 {
772 worker = AutoVacuumShmem->av_startingWorker;
773 worker->wi_dboid = InvalidOid;
774 worker->wi_tableoid = InvalidOid;
775 worker->wi_sharedrel = false;
776 worker->wi_proc = NULL;
777 worker->wi_launchtime = 0;
778 dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
779 &worker->wi_links);
780 AutoVacuumShmem->av_startingWorker = NULL;
781 elog(WARNING, "worker took too long to start; canceled");
782 }
783 }
784 else
785 can_launch = false;
786 }
787 LWLockRelease(AutovacuumLock); /* either shared or exclusive */
788
789 /* if we can't do anything, just go back to sleep */
790 if (!can_launch)
791 continue;
792
793 /* We're OK to start a new worker */
794
795 if (dlist_is_empty(&DatabaseList))
796 {
797 /*
798 * Special case when the list is empty: start a worker right away.
799 * This covers the initial case, when no database is in pgstats
800 * (thus the list is empty). Note that the constraints in
801 * launcher_determine_sleep keep us from starting workers too
802 * quickly (at most once every autovacuum_naptime when the list is
803 * empty).
804 */
805 launch_worker(current_time);
806 }
807 else
808 {
809 /*
810 * because rebuild_database_list constructs a list with most
811 * distant adl_next_worker first, we obtain our database from the
812 * tail of the list.
813 */
814 avl_dbase *avdb;
815
816 avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
817
818 /*
819 * launch a worker if next_worker is right now or it is in the
820 * past
821 */
822 if (TimestampDifferenceExceeds(avdb->adl_next_worker,
823 current_time, 0))
824 launch_worker(current_time);
825 }
826 }
827
828 /* Normal exit from the autovac launcher is here */
829 shutdown:
830 ereport(DEBUG1,
831 (errmsg("autovacuum launcher shutting down")));
832 AutoVacuumShmem->av_launcherpid = 0;
833
834 proc_exit(0); /* done */
835 }
836
837 /*
838 * Determine the time to sleep, based on the database list.
839 *
840 * The "canlaunch" parameter indicates whether we can start a worker right now,
841 * for example due to the workers being all busy. If this is false, we will
842 * cause a long sleep, which will be interrupted when a worker exits.
843 */
844 static void
launcher_determine_sleep(bool canlaunch,bool recursing,struct timeval * nap)845 launcher_determine_sleep(bool canlaunch, bool recursing, struct timeval *nap)
846 {
847 /*
848 * We sleep until the next scheduled vacuum. We trust that when the
849 * database list was built, care was taken so that no entries have times
850 * in the past; if the first entry has too close a next_worker value, or a
851 * time in the past, we will sleep a small nominal time.
852 */
853 if (!canlaunch)
854 {
855 nap->tv_sec = autovacuum_naptime;
856 nap->tv_usec = 0;
857 }
858 else if (!dlist_is_empty(&DatabaseList))
859 {
860 TimestampTz current_time = GetCurrentTimestamp();
861 TimestampTz next_wakeup;
862 avl_dbase *avdb;
863 long secs;
864 int usecs;
865
866 avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
867
868 next_wakeup = avdb->adl_next_worker;
869 TimestampDifference(current_time, next_wakeup, &secs, &usecs);
870
871 nap->tv_sec = secs;
872 nap->tv_usec = usecs;
873 }
874 else
875 {
876 /* list is empty, sleep for whole autovacuum_naptime seconds */
877 nap->tv_sec = autovacuum_naptime;
878 nap->tv_usec = 0;
879 }
880
881 /*
882 * If the result is exactly zero, it means a database had an entry with
883 * time in the past. Rebuild the list so that the databases are evenly
884 * distributed again, and recalculate the time to sleep. This can happen
885 * if there are more tables needing vacuum than workers, and they all take
886 * longer to vacuum than autovacuum_naptime.
887 *
888 * We only recurse once. rebuild_database_list should always return times
889 * in the future, but it seems best not to trust too much on that.
890 */
891 if (nap->tv_sec == 0 && nap->tv_usec == 0 && !recursing)
892 {
893 rebuild_database_list(InvalidOid);
894 launcher_determine_sleep(canlaunch, true, nap);
895 return;
896 }
897
898 /* The smallest time we'll allow the launcher to sleep. */
899 if (nap->tv_sec <= 0 && nap->tv_usec <= MIN_AUTOVAC_SLEEPTIME * 1000)
900 {
901 nap->tv_sec = 0;
902 nap->tv_usec = MIN_AUTOVAC_SLEEPTIME * 1000;
903 }
904
905 /*
906 * If the sleep time is too large, clamp it to an arbitrary maximum (plus
907 * any fractional seconds, for simplicity). This avoids an essentially
908 * infinite sleep in strange cases like the system clock going backwards a
909 * few years.
910 */
911 if (nap->tv_sec > MAX_AUTOVAC_SLEEPTIME)
912 nap->tv_sec = MAX_AUTOVAC_SLEEPTIME;
913 }
914
915 /*
916 * Build an updated DatabaseList. It must only contain databases that appear
917 * in pgstats, and must be sorted by next_worker from highest to lowest,
918 * distributed regularly across the next autovacuum_naptime interval.
919 *
920 * Receives the Oid of the database that made this list be generated (we call
921 * this the "new" database, because when the database was already present on
922 * the list, we expect that this function is not called at all). The
923 * preexisting list, if any, will be used to preserve the order of the
924 * databases in the autovacuum_naptime period. The new database is put at the
925 * end of the interval. The actual values are not saved, which should not be
926 * much of a problem.
927 */
928 static void
rebuild_database_list(Oid newdb)929 rebuild_database_list(Oid newdb)
930 {
931 List *dblist;
932 ListCell *cell;
933 MemoryContext newcxt;
934 MemoryContext oldcxt;
935 MemoryContext tmpcxt;
936 HASHCTL hctl;
937 int score;
938 int nelems;
939 HTAB *dbhash;
940 dlist_iter iter;
941
942 /* use fresh stats */
943 autovac_refresh_stats();
944
945 newcxt = AllocSetContextCreate(AutovacMemCxt,
946 "AV dblist",
947 ALLOCSET_DEFAULT_SIZES);
948 tmpcxt = AllocSetContextCreate(newcxt,
949 "tmp AV dblist",
950 ALLOCSET_DEFAULT_SIZES);
951 oldcxt = MemoryContextSwitchTo(tmpcxt);
952
953 /*
954 * Implementing this is not as simple as it sounds, because we need to put
955 * the new database at the end of the list; next the databases that were
956 * already on the list, and finally (at the tail of the list) all the
957 * other databases that are not on the existing list.
958 *
959 * To do this, we build an empty hash table of scored databases. We will
960 * start with the lowest score (zero) for the new database, then
961 * increasing scores for the databases in the existing list, in order, and
962 * lastly increasing scores for all databases gotten via
963 * get_database_list() that are not already on the hash.
964 *
965 * Then we will put all the hash elements into an array, sort the array by
966 * score, and finally put the array elements into the new doubly linked
967 * list.
968 */
969 hctl.keysize = sizeof(Oid);
970 hctl.entrysize = sizeof(avl_dbase);
971 hctl.hcxt = tmpcxt;
972 dbhash = hash_create("db hash", 20, &hctl, /* magic number here FIXME */
973 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
974
975 /* start by inserting the new database */
976 score = 0;
977 if (OidIsValid(newdb))
978 {
979 avl_dbase *db;
980 PgStat_StatDBEntry *entry;
981
982 /* only consider this database if it has a pgstat entry */
983 entry = pgstat_fetch_stat_dbentry(newdb);
984 if (entry != NULL)
985 {
986 /* we assume it isn't found because the hash was just created */
987 db = hash_search(dbhash, &newdb, HASH_ENTER, NULL);
988
989 /* hash_search already filled in the key */
990 db->adl_score = score++;
991 /* next_worker is filled in later */
992 }
993 }
994
995 /* Now insert the databases from the existing list */
996 dlist_foreach(iter, &DatabaseList)
997 {
998 avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
999 avl_dbase *db;
1000 bool found;
1001 PgStat_StatDBEntry *entry;
1002
1003 /*
1004 * skip databases with no stat entries -- in particular, this gets rid
1005 * of dropped databases
1006 */
1007 entry = pgstat_fetch_stat_dbentry(avdb->adl_datid);
1008 if (entry == NULL)
1009 continue;
1010
1011 db = hash_search(dbhash, &(avdb->adl_datid), HASH_ENTER, &found);
1012
1013 if (!found)
1014 {
1015 /* hash_search already filled in the key */
1016 db->adl_score = score++;
1017 /* next_worker is filled in later */
1018 }
1019 }
1020
1021 /* finally, insert all qualifying databases not previously inserted */
1022 dblist = get_database_list();
1023 foreach(cell, dblist)
1024 {
1025 avw_dbase *avdb = lfirst(cell);
1026 avl_dbase *db;
1027 bool found;
1028 PgStat_StatDBEntry *entry;
1029
1030 /* only consider databases with a pgstat entry */
1031 entry = pgstat_fetch_stat_dbentry(avdb->adw_datid);
1032 if (entry == NULL)
1033 continue;
1034
1035 db = hash_search(dbhash, &(avdb->adw_datid), HASH_ENTER, &found);
1036 /* only update the score if the database was not already on the hash */
1037 if (!found)
1038 {
1039 /* hash_search already filled in the key */
1040 db->adl_score = score++;
1041 /* next_worker is filled in later */
1042 }
1043 }
1044 nelems = score;
1045
1046 /* from here on, the allocated memory belongs to the new list */
1047 MemoryContextSwitchTo(newcxt);
1048 dlist_init(&DatabaseList);
1049
1050 if (nelems > 0)
1051 {
1052 TimestampTz current_time;
1053 int millis_increment;
1054 avl_dbase *dbary;
1055 avl_dbase *db;
1056 HASH_SEQ_STATUS seq;
1057 int i;
1058
1059 /* put all the hash elements into an array */
1060 dbary = palloc(nelems * sizeof(avl_dbase));
1061
1062 i = 0;
1063 hash_seq_init(&seq, dbhash);
1064 while ((db = hash_seq_search(&seq)) != NULL)
1065 memcpy(&(dbary[i++]), db, sizeof(avl_dbase));
1066
1067 /* sort the array */
1068 qsort(dbary, nelems, sizeof(avl_dbase), db_comparator);
1069
1070 /*
1071 * Determine the time interval between databases in the schedule. If
1072 * we see that the configured naptime would take us to sleep times
1073 * lower than our min sleep time (which launcher_determine_sleep is
1074 * coded not to allow), silently use a larger naptime (but don't touch
1075 * the GUC variable).
1076 */
1077 millis_increment = 1000.0 * autovacuum_naptime / nelems;
1078 if (millis_increment <= MIN_AUTOVAC_SLEEPTIME)
1079 millis_increment = MIN_AUTOVAC_SLEEPTIME * 1.1;
1080
1081 current_time = GetCurrentTimestamp();
1082
1083 /*
1084 * move the elements from the array into the dllist, setting the
1085 * next_worker while walking the array
1086 */
1087 for (i = 0; i < nelems; i++)
1088 {
1089 avl_dbase *db = &(dbary[i]);
1090
1091 current_time = TimestampTzPlusMilliseconds(current_time,
1092 millis_increment);
1093 db->adl_next_worker = current_time;
1094
1095 /* later elements should go closer to the head of the list */
1096 dlist_push_head(&DatabaseList, &db->adl_node);
1097 }
1098 }
1099
1100 /* all done, clean up memory */
1101 if (DatabaseListCxt != NULL)
1102 MemoryContextDelete(DatabaseListCxt);
1103 MemoryContextDelete(tmpcxt);
1104 DatabaseListCxt = newcxt;
1105 MemoryContextSwitchTo(oldcxt);
1106 }
1107
1108 /* qsort comparator for avl_dbase, using adl_score */
1109 static int
db_comparator(const void * a,const void * b)1110 db_comparator(const void *a, const void *b)
1111 {
1112 if (((const avl_dbase *) a)->adl_score == ((const avl_dbase *) b)->adl_score)
1113 return 0;
1114 else
1115 return (((const avl_dbase *) a)->adl_score < ((const avl_dbase *) b)->adl_score) ? 1 : -1;
1116 }
1117
1118 /*
1119 * do_start_worker
1120 *
1121 * Bare-bones procedure for starting an autovacuum worker from the launcher.
1122 * It determines what database to work on, sets up shared memory stuff and
1123 * signals postmaster to start the worker. It fails gracefully if invoked when
1124 * autovacuum_workers are already active.
1125 *
1126 * Return value is the OID of the database that the worker is going to process,
1127 * or InvalidOid if no worker was actually started.
1128 */
1129 static Oid
do_start_worker(void)1130 do_start_worker(void)
1131 {
1132 List *dblist;
1133 ListCell *cell;
1134 TransactionId xidForceLimit;
1135 MultiXactId multiForceLimit;
1136 bool for_xid_wrap;
1137 bool for_multi_wrap;
1138 avw_dbase *avdb;
1139 TimestampTz current_time;
1140 bool skipit = false;
1141 Oid retval = InvalidOid;
1142 MemoryContext tmpcxt,
1143 oldcxt;
1144
1145 /* return quickly when there are no free workers */
1146 LWLockAcquire(AutovacuumLock, LW_SHARED);
1147 if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
1148 {
1149 LWLockRelease(AutovacuumLock);
1150 return InvalidOid;
1151 }
1152 LWLockRelease(AutovacuumLock);
1153
1154 /*
1155 * Create and switch to a temporary context to avoid leaking the memory
1156 * allocated for the database list.
1157 */
1158 tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
1159 "Start worker tmp cxt",
1160 ALLOCSET_DEFAULT_SIZES);
1161 oldcxt = MemoryContextSwitchTo(tmpcxt);
1162
1163 /* use fresh stats */
1164 autovac_refresh_stats();
1165
1166 /* Get a list of databases */
1167 dblist = get_database_list();
1168
1169 /*
1170 * Determine the oldest datfrozenxid/relfrozenxid that we will allow to
1171 * pass without forcing a vacuum. (This limit can be tightened for
1172 * particular tables, but not loosened.)
1173 */
1174 recentXid = ReadNewTransactionId();
1175 xidForceLimit = recentXid - autovacuum_freeze_max_age;
1176 /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
1177 /* this can cause the limit to go backwards by 3, but that's OK */
1178 if (xidForceLimit < FirstNormalTransactionId)
1179 xidForceLimit -= FirstNormalTransactionId;
1180
1181 /* Also determine the oldest datminmxid we will consider. */
1182 recentMulti = ReadNextMultiXactId();
1183 multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
1184 if (multiForceLimit < FirstMultiXactId)
1185 multiForceLimit -= FirstMultiXactId;
1186
1187 /*
1188 * Choose a database to connect to. We pick the database that was least
1189 * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
1190 * wraparound-related data loss. If any db at risk of Xid wraparound is
1191 * found, we pick the one with oldest datfrozenxid, independently of
1192 * autovacuum times; similarly we pick the one with the oldest datminmxid
1193 * if any is in MultiXactId wraparound. Note that those in Xid wraparound
1194 * danger are given more priority than those in multi wraparound danger.
1195 *
1196 * Note that a database with no stats entry is not considered, except for
1197 * Xid wraparound purposes. The theory is that if no one has ever
1198 * connected to it since the stats were last initialized, it doesn't need
1199 * vacuuming.
1200 *
1201 * XXX This could be improved if we had more info about whether it needs
1202 * vacuuming before connecting to it. Perhaps look through the pgstats
1203 * data for the database's tables? One idea is to keep track of the
1204 * number of new and dead tuples per database in pgstats. However it
1205 * isn't clear how to construct a metric that measures that and not cause
1206 * starvation for less busy databases.
1207 */
1208 avdb = NULL;
1209 for_xid_wrap = false;
1210 for_multi_wrap = false;
1211 current_time = GetCurrentTimestamp();
1212 foreach(cell, dblist)
1213 {
1214 avw_dbase *tmp = lfirst(cell);
1215 dlist_iter iter;
1216
1217 /* Check to see if this one is at risk of wraparound */
1218 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
1219 {
1220 if (avdb == NULL ||
1221 TransactionIdPrecedes(tmp->adw_frozenxid,
1222 avdb->adw_frozenxid))
1223 avdb = tmp;
1224 for_xid_wrap = true;
1225 continue;
1226 }
1227 else if (for_xid_wrap)
1228 continue; /* ignore not-at-risk DBs */
1229 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
1230 {
1231 if (avdb == NULL ||
1232 MultiXactIdPrecedes(tmp->adw_minmulti, avdb->adw_minmulti))
1233 avdb = tmp;
1234 for_multi_wrap = true;
1235 continue;
1236 }
1237 else if (for_multi_wrap)
1238 continue; /* ignore not-at-risk DBs */
1239
1240 /* Find pgstat entry if any */
1241 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
1242
1243 /*
1244 * Skip a database with no pgstat entry; it means it hasn't seen any
1245 * activity.
1246 */
1247 if (!tmp->adw_entry)
1248 continue;
1249
1250 /*
1251 * Also, skip a database that appears on the database list as having
1252 * been processed recently (less than autovacuum_naptime seconds ago).
1253 * We do this so that we don't select a database which we just
1254 * selected, but that pgstat hasn't gotten around to updating the last
1255 * autovacuum time yet.
1256 */
1257 skipit = false;
1258
1259 dlist_reverse_foreach(iter, &DatabaseList)
1260 {
1261 avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
1262
1263 if (dbp->adl_datid == tmp->adw_datid)
1264 {
1265 /*
1266 * Skip this database if its next_worker value falls between
1267 * the current time and the current time plus naptime.
1268 */
1269 if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
1270 current_time, 0) &&
1271 !TimestampDifferenceExceeds(current_time,
1272 dbp->adl_next_worker,
1273 autovacuum_naptime * 1000))
1274 skipit = true;
1275
1276 break;
1277 }
1278 }
1279 if (skipit)
1280 continue;
1281
1282 /*
1283 * Remember the db with oldest autovac time. (If we are here, both
1284 * tmp->entry and db->entry must be non-null.)
1285 */
1286 if (avdb == NULL ||
1287 tmp->adw_entry->last_autovac_time < avdb->adw_entry->last_autovac_time)
1288 avdb = tmp;
1289 }
1290
1291 /* Found a database -- process it */
1292 if (avdb != NULL)
1293 {
1294 WorkerInfo worker;
1295 dlist_node *wptr;
1296
1297 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1298
1299 /*
1300 * Get a worker entry from the freelist. We checked above, so there
1301 * really should be a free slot.
1302 */
1303 wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
1304
1305 worker = dlist_container(WorkerInfoData, wi_links, wptr);
1306 worker->wi_dboid = avdb->adw_datid;
1307 worker->wi_proc = NULL;
1308 worker->wi_launchtime = GetCurrentTimestamp();
1309
1310 AutoVacuumShmem->av_startingWorker = worker;
1311
1312 LWLockRelease(AutovacuumLock);
1313
1314 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
1315
1316 retval = avdb->adw_datid;
1317 }
1318 else if (skipit)
1319 {
1320 /*
1321 * If we skipped all databases on the list, rebuild it, because it
1322 * probably contains a dropped database.
1323 */
1324 rebuild_database_list(InvalidOid);
1325 }
1326
1327 MemoryContextSwitchTo(oldcxt);
1328 MemoryContextDelete(tmpcxt);
1329
1330 return retval;
1331 }
1332
1333 /*
1334 * launch_worker
1335 *
1336 * Wrapper for starting a worker from the launcher. Besides actually starting
1337 * it, update the database list to reflect the next time that another one will
1338 * need to be started on the selected database. The actual database choice is
1339 * left to do_start_worker.
1340 *
1341 * This routine is also expected to insert an entry into the database list if
1342 * the selected database was previously absent from the list.
1343 */
1344 static void
launch_worker(TimestampTz now)1345 launch_worker(TimestampTz now)
1346 {
1347 Oid dbid;
1348 dlist_iter iter;
1349
1350 dbid = do_start_worker();
1351 if (OidIsValid(dbid))
1352 {
1353 bool found = false;
1354
1355 /*
1356 * Walk the database list and update the corresponding entry. If the
1357 * database is not on the list, we'll recreate the list.
1358 */
1359 dlist_foreach(iter, &DatabaseList)
1360 {
1361 avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
1362
1363 if (avdb->adl_datid == dbid)
1364 {
1365 found = true;
1366
1367 /*
1368 * add autovacuum_naptime seconds to the current time, and use
1369 * that as the new "next_worker" field for this database.
1370 */
1371 avdb->adl_next_worker =
1372 TimestampTzPlusMilliseconds(now, autovacuum_naptime * 1000);
1373
1374 dlist_move_head(&DatabaseList, iter.cur);
1375 break;
1376 }
1377 }
1378
1379 /*
1380 * If the database was not present in the database list, we rebuild
1381 * the list. It's possible that the database does not get into the
1382 * list anyway, for example if it's a database that doesn't have a
1383 * pgstat entry, but this is not a problem because we don't want to
1384 * schedule workers regularly into those in any case.
1385 */
1386 if (!found)
1387 rebuild_database_list(dbid);
1388 }
1389 }
1390
1391 /*
1392 * Called from postmaster to signal a failure to fork a process to become
1393 * worker. The postmaster should kill(SIGUSR2) the launcher shortly
1394 * after calling this function.
1395 */
1396 void
AutoVacWorkerFailed(void)1397 AutoVacWorkerFailed(void)
1398 {
1399 AutoVacuumShmem->av_signal[AutoVacForkFailed] = true;
1400 }
1401
1402 /* SIGHUP: set flag to re-read config file at next convenient time */
1403 static void
av_sighup_handler(SIGNAL_ARGS)1404 av_sighup_handler(SIGNAL_ARGS)
1405 {
1406 int save_errno = errno;
1407
1408 got_SIGHUP = true;
1409 SetLatch(MyLatch);
1410
1411 errno = save_errno;
1412 }
1413
1414 /* SIGUSR2: a worker is up and running, or just finished, or failed to fork */
1415 static void
avl_sigusr2_handler(SIGNAL_ARGS)1416 avl_sigusr2_handler(SIGNAL_ARGS)
1417 {
1418 int save_errno = errno;
1419
1420 got_SIGUSR2 = true;
1421 SetLatch(MyLatch);
1422
1423 errno = save_errno;
1424 }
1425
1426 /* SIGTERM: time to die */
1427 static void
avl_sigterm_handler(SIGNAL_ARGS)1428 avl_sigterm_handler(SIGNAL_ARGS)
1429 {
1430 int save_errno = errno;
1431
1432 got_SIGTERM = true;
1433 SetLatch(MyLatch);
1434
1435 errno = save_errno;
1436 }
1437
1438
1439 /********************************************************************
1440 * AUTOVACUUM WORKER CODE
1441 ********************************************************************/
1442
1443 #ifdef EXEC_BACKEND
1444 /*
1445 * forkexec routines for the autovacuum worker.
1446 *
1447 * Format up the arglist, then fork and exec.
1448 */
1449 static pid_t
avworker_forkexec(void)1450 avworker_forkexec(void)
1451 {
1452 char *av[10];
1453 int ac = 0;
1454
1455 av[ac++] = "postgres";
1456 av[ac++] = "--forkavworker";
1457 av[ac++] = NULL; /* filled in by postmaster_forkexec */
1458 av[ac] = NULL;
1459
1460 Assert(ac < lengthof(av));
1461
1462 return postmaster_forkexec(ac, av);
1463 }
1464
1465 /*
1466 * We need this set from the outside, before InitProcess is called
1467 */
1468 void
AutovacuumWorkerIAm(void)1469 AutovacuumWorkerIAm(void)
1470 {
1471 am_autovacuum_worker = true;
1472 }
1473 #endif
1474
1475 /*
1476 * Main entry point for autovacuum worker process.
1477 *
1478 * This code is heavily based on pgarch.c, q.v.
1479 */
1480 int
StartAutoVacWorker(void)1481 StartAutoVacWorker(void)
1482 {
1483 pid_t worker_pid;
1484
1485 #ifdef EXEC_BACKEND
1486 switch ((worker_pid = avworker_forkexec()))
1487 #else
1488 switch ((worker_pid = fork_process()))
1489 #endif
1490 {
1491 case -1:
1492 ereport(LOG,
1493 (errmsg("could not fork autovacuum worker process: %m")));
1494 return 0;
1495
1496 #ifndef EXEC_BACKEND
1497 case 0:
1498 /* in postmaster child ... */
1499 InitPostmasterChild();
1500
1501 /* Close the postmaster's sockets */
1502 ClosePostmasterPorts(false);
1503
1504 AutoVacWorkerMain(0, NULL);
1505 break;
1506 #endif
1507 default:
1508 return (int) worker_pid;
1509 }
1510
1511 /* shouldn't get here */
1512 return 0;
1513 }
1514
1515 /*
1516 * AutoVacWorkerMain
1517 */
1518 NON_EXEC_STATIC void
AutoVacWorkerMain(int argc,char * argv[])1519 AutoVacWorkerMain(int argc, char *argv[])
1520 {
1521 sigjmp_buf local_sigjmp_buf;
1522 Oid dbid;
1523
1524 am_autovacuum_worker = true;
1525
1526 /* Identify myself via ps */
1527 init_ps_display("autovacuum worker process", "", "", "");
1528
1529 SetProcessingMode(InitProcessing);
1530
1531 /*
1532 * Set up signal handlers. We operate on databases much like a regular
1533 * backend, so we use the same signal handling. See equivalent code in
1534 * tcop/postgres.c.
1535 */
1536 pqsignal(SIGHUP, av_sighup_handler);
1537
1538 /*
1539 * SIGINT is used to signal canceling the current table's vacuum; SIGTERM
1540 * means abort and exit cleanly, and SIGQUIT means abandon ship.
1541 */
1542 pqsignal(SIGINT, StatementCancelHandler);
1543 pqsignal(SIGTERM, die);
1544 pqsignal(SIGQUIT, quickdie);
1545 InitializeTimeouts(); /* establishes SIGALRM handler */
1546
1547 pqsignal(SIGPIPE, SIG_IGN);
1548 pqsignal(SIGUSR1, procsignal_sigusr1_handler);
1549 pqsignal(SIGUSR2, SIG_IGN);
1550 pqsignal(SIGFPE, FloatExceptionHandler);
1551 pqsignal(SIGCHLD, SIG_DFL);
1552
1553 /* Early initialization */
1554 BaseInit();
1555
1556 /*
1557 * Create a per-backend PGPROC struct in shared memory, except in the
1558 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
1559 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
1560 * had to do some stuff with LWLocks).
1561 */
1562 #ifndef EXEC_BACKEND
1563 InitProcess();
1564 #endif
1565
1566 /*
1567 * If an exception is encountered, processing resumes here.
1568 *
1569 * See notes in postgres.c about the design of this coding.
1570 */
1571 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1572 {
1573 /* since not using PG_TRY, must reset error stack by hand */
1574 error_context_stack = NULL;
1575
1576 /* Prevents interrupts while cleaning up */
1577 HOLD_INTERRUPTS();
1578
1579 /* Report the error to the server log */
1580 EmitErrorReport();
1581
1582 /*
1583 * We can now go away. Note that because we called InitProcess, a
1584 * callback was registered to do ProcKill, which will clean up
1585 * necessary state.
1586 */
1587 proc_exit(0);
1588 }
1589
1590 /* We can now handle ereport(ERROR) */
1591 PG_exception_stack = &local_sigjmp_buf;
1592
1593 PG_SETMASK(&UnBlockSig);
1594
1595 /*
1596 * Set always-secure search path, so malicious users can't redirect user
1597 * code (e.g. pg_index.indexprs). (That code runs in a
1598 * SECURITY_RESTRICTED_OPERATION sandbox, so malicious users could not
1599 * take control of the entire autovacuum worker in any case.)
1600 */
1601 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1602
1603 /*
1604 * Force zero_damaged_pages OFF in the autovac process, even if it is set
1605 * in postgresql.conf. We don't really want such a dangerous option being
1606 * applied non-interactively.
1607 */
1608 SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
1609
1610 /*
1611 * Force settable timeouts off to avoid letting these settings prevent
1612 * regular maintenance from being executed.
1613 */
1614 SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1615 SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
1616 SetConfigOption("idle_in_transaction_session_timeout", "0",
1617 PGC_SUSET, PGC_S_OVERRIDE);
1618
1619 /*
1620 * Force default_transaction_isolation to READ COMMITTED. We don't want
1621 * to pay the overhead of serializable mode, nor add any risk of causing
1622 * deadlocks or delaying other transactions.
1623 */
1624 SetConfigOption("default_transaction_isolation", "read committed",
1625 PGC_SUSET, PGC_S_OVERRIDE);
1626
1627 /*
1628 * Force synchronous replication off to allow regular maintenance even if
1629 * we are waiting for standbys to connect. This is important to ensure we
1630 * aren't blocked from performing anti-wraparound tasks.
1631 */
1632 if (synchronous_commit > SYNCHRONOUS_COMMIT_LOCAL_FLUSH)
1633 SetConfigOption("synchronous_commit", "local",
1634 PGC_SUSET, PGC_S_OVERRIDE);
1635
1636 /*
1637 * Get the info about the database we're going to work on.
1638 */
1639 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1640
1641 /*
1642 * beware of startingWorker being INVALID; this should normally not
1643 * happen, but if a worker fails after forking and before this, the
1644 * launcher might have decided to remove it from the queue and start
1645 * again.
1646 */
1647 if (AutoVacuumShmem->av_startingWorker != NULL)
1648 {
1649 MyWorkerInfo = AutoVacuumShmem->av_startingWorker;
1650 dbid = MyWorkerInfo->wi_dboid;
1651 MyWorkerInfo->wi_proc = MyProc;
1652
1653 /* insert into the running list */
1654 dlist_push_head(&AutoVacuumShmem->av_runningWorkers,
1655 &MyWorkerInfo->wi_links);
1656
1657 /*
1658 * remove from the "starting" pointer, so that the launcher can start
1659 * a new worker if required
1660 */
1661 AutoVacuumShmem->av_startingWorker = NULL;
1662 LWLockRelease(AutovacuumLock);
1663
1664 on_shmem_exit(FreeWorkerInfo, 0);
1665
1666 /* wake up the launcher */
1667 if (AutoVacuumShmem->av_launcherpid != 0)
1668 kill(AutoVacuumShmem->av_launcherpid, SIGUSR2);
1669 }
1670 else
1671 {
1672 /* no worker entry for me, go away */
1673 elog(WARNING, "autovacuum worker started without a worker entry");
1674 dbid = InvalidOid;
1675 LWLockRelease(AutovacuumLock);
1676 }
1677
1678 if (OidIsValid(dbid))
1679 {
1680 char dbname[NAMEDATALEN];
1681
1682 /*
1683 * Report autovac startup to the stats collector. We deliberately do
1684 * this before InitPostgres, so that the last_autovac_time will get
1685 * updated even if the connection attempt fails. This is to prevent
1686 * autovac from getting "stuck" repeatedly selecting an unopenable
1687 * database, rather than making any progress on stuff it can connect
1688 * to.
1689 */
1690 pgstat_report_autovac(dbid);
1691
1692 /*
1693 * Connect to the selected database
1694 *
1695 * Note: if we have selected a just-deleted database (due to using
1696 * stale stats info), we'll fail and exit here.
1697 */
1698 InitPostgres(NULL, dbid, NULL, InvalidOid, dbname);
1699 SetProcessingMode(NormalProcessing);
1700 set_ps_display(dbname, false);
1701 ereport(DEBUG1,
1702 (errmsg("autovacuum: processing database \"%s\"", dbname)));
1703
1704 if (PostAuthDelay)
1705 pg_usleep(PostAuthDelay * 1000000L);
1706
1707 /* And do an appropriate amount of work */
1708 recentXid = ReadNewTransactionId();
1709 recentMulti = ReadNextMultiXactId();
1710 do_autovacuum();
1711 }
1712
1713 /*
1714 * The launcher will be notified of my death in ProcKill, *if* we managed
1715 * to get a worker slot at all
1716 */
1717
1718 /* All done, go away */
1719 proc_exit(0);
1720 }
1721
1722 /*
1723 * Return a WorkerInfo to the free list
1724 */
1725 static void
FreeWorkerInfo(int code,Datum arg)1726 FreeWorkerInfo(int code, Datum arg)
1727 {
1728 if (MyWorkerInfo != NULL)
1729 {
1730 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
1731
1732 /*
1733 * Wake the launcher up so that he can launch a new worker immediately
1734 * if required. We only save the launcher's PID in local memory here;
1735 * the actual signal will be sent when the PGPROC is recycled. Note
1736 * that we always do this, so that the launcher can rebalance the cost
1737 * limit setting of the remaining workers.
1738 *
1739 * We somewhat ignore the risk that the launcher changes its PID
1740 * between us reading it and the actual kill; we expect ProcKill to be
1741 * called shortly after us, and we assume that PIDs are not reused too
1742 * quickly after a process exits.
1743 */
1744 AutovacuumLauncherPid = AutoVacuumShmem->av_launcherpid;
1745
1746 dlist_delete(&MyWorkerInfo->wi_links);
1747 MyWorkerInfo->wi_dboid = InvalidOid;
1748 MyWorkerInfo->wi_tableoid = InvalidOid;
1749 MyWorkerInfo->wi_sharedrel = false;
1750 MyWorkerInfo->wi_proc = NULL;
1751 MyWorkerInfo->wi_launchtime = 0;
1752 MyWorkerInfo->wi_dobalance = false;
1753 MyWorkerInfo->wi_cost_delay = 0;
1754 MyWorkerInfo->wi_cost_limit = 0;
1755 MyWorkerInfo->wi_cost_limit_base = 0;
1756 dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
1757 &MyWorkerInfo->wi_links);
1758 /* not mine anymore */
1759 MyWorkerInfo = NULL;
1760
1761 /*
1762 * now that we're inactive, cause a rebalancing of the surviving
1763 * workers
1764 */
1765 AutoVacuumShmem->av_signal[AutoVacRebalance] = true;
1766 LWLockRelease(AutovacuumLock);
1767 }
1768 }
1769
1770 /*
1771 * Update the cost-based delay parameters, so that multiple workers consume
1772 * each a fraction of the total available I/O.
1773 */
1774 void
AutoVacuumUpdateDelay(void)1775 AutoVacuumUpdateDelay(void)
1776 {
1777 if (MyWorkerInfo)
1778 {
1779 VacuumCostDelay = MyWorkerInfo->wi_cost_delay;
1780 VacuumCostLimit = MyWorkerInfo->wi_cost_limit;
1781 }
1782 }
1783
1784 /*
1785 * autovac_balance_cost
1786 * Recalculate the cost limit setting for each active worker.
1787 *
1788 * Caller must hold the AutovacuumLock in exclusive mode.
1789 */
1790 static void
autovac_balance_cost(void)1791 autovac_balance_cost(void)
1792 {
1793 /*
1794 * The idea here is that we ration out I/O equally. The amount of I/O
1795 * that a worker can consume is determined by cost_limit/cost_delay, so we
1796 * try to equalize those ratios rather than the raw limit settings.
1797 *
1798 * note: in cost_limit, zero also means use value from elsewhere, because
1799 * zero is not a valid value.
1800 */
1801 int vac_cost_limit = (autovacuum_vac_cost_limit > 0 ?
1802 autovacuum_vac_cost_limit : VacuumCostLimit);
1803 int vac_cost_delay = (autovacuum_vac_cost_delay >= 0 ?
1804 autovacuum_vac_cost_delay : VacuumCostDelay);
1805 double cost_total;
1806 double cost_avail;
1807 dlist_iter iter;
1808
1809 /* not set? nothing to do */
1810 if (vac_cost_limit <= 0 || vac_cost_delay <= 0)
1811 return;
1812
1813 /* calculate the total base cost limit of participating active workers */
1814 cost_total = 0.0;
1815 dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1816 {
1817 WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1818
1819 if (worker->wi_proc != NULL &&
1820 worker->wi_dobalance &&
1821 worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1822 cost_total +=
1823 (double) worker->wi_cost_limit_base / worker->wi_cost_delay;
1824 }
1825
1826 /* there are no cost limits -- nothing to do */
1827 if (cost_total <= 0)
1828 return;
1829
1830 /*
1831 * Adjust cost limit of each active worker to balance the total of cost
1832 * limit to autovacuum_vacuum_cost_limit.
1833 */
1834 cost_avail = (double) vac_cost_limit / vac_cost_delay;
1835 dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
1836 {
1837 WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
1838
1839 if (worker->wi_proc != NULL &&
1840 worker->wi_dobalance &&
1841 worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0)
1842 {
1843 int limit = (int)
1844 (cost_avail * worker->wi_cost_limit_base / cost_total);
1845
1846 /*
1847 * We put a lower bound of 1 on the cost_limit, to avoid division-
1848 * by-zero in the vacuum code. Also, in case of roundoff trouble
1849 * in these calculations, let's be sure we don't ever set
1850 * cost_limit to more than the base value.
1851 */
1852 worker->wi_cost_limit = Max(Min(limit,
1853 worker->wi_cost_limit_base),
1854 1);
1855 }
1856
1857 if (worker->wi_proc != NULL)
1858 elog(DEBUG2, "autovac_balance_cost(pid=%u db=%u, rel=%u, dobalance=%s cost_limit=%d, cost_limit_base=%d, cost_delay=%d)",
1859 worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid,
1860 worker->wi_dobalance ? "yes" : "no",
1861 worker->wi_cost_limit, worker->wi_cost_limit_base,
1862 worker->wi_cost_delay);
1863 }
1864 }
1865
1866 /*
1867 * get_database_list
1868 * Return a list of all databases found in pg_database.
1869 *
1870 * The list and associated data is allocated in the caller's memory context,
1871 * which is in charge of ensuring that it's properly cleaned up afterwards.
1872 *
1873 * Note: this is the only function in which the autovacuum launcher uses a
1874 * transaction. Although we aren't attached to any particular database and
1875 * therefore can't access most catalogs, we do have enough infrastructure
1876 * to do a seqscan on pg_database.
1877 */
1878 static List *
get_database_list(void)1879 get_database_list(void)
1880 {
1881 List *dblist = NIL;
1882 Relation rel;
1883 HeapScanDesc scan;
1884 HeapTuple tup;
1885 MemoryContext resultcxt;
1886
1887 /* This is the context that we will allocate our output data in */
1888 resultcxt = CurrentMemoryContext;
1889
1890 /*
1891 * Start a transaction so we can access pg_database, and get a snapshot.
1892 * We don't have a use for the snapshot itself, but we're interested in
1893 * the secondary effect that it sets RecentGlobalXmin. (This is critical
1894 * for anything that reads heap pages, because HOT may decide to prune
1895 * them even if the process doesn't attempt to modify any tuples.)
1896 */
1897 StartTransactionCommand();
1898 (void) GetTransactionSnapshot();
1899
1900 rel = heap_open(DatabaseRelationId, AccessShareLock);
1901 scan = heap_beginscan_catalog(rel, 0, NULL);
1902
1903 while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
1904 {
1905 Form_pg_database pgdatabase = (Form_pg_database) GETSTRUCT(tup);
1906 avw_dbase *avdb;
1907 MemoryContext oldcxt;
1908
1909 /*
1910 * Allocate our results in the caller's context, not the
1911 * transaction's. We do this inside the loop, and restore the original
1912 * context at the end, so that leaky things like heap_getnext() are
1913 * not called in a potentially long-lived context.
1914 */
1915 oldcxt = MemoryContextSwitchTo(resultcxt);
1916
1917 avdb = (avw_dbase *) palloc(sizeof(avw_dbase));
1918
1919 avdb->adw_datid = HeapTupleGetOid(tup);
1920 avdb->adw_name = pstrdup(NameStr(pgdatabase->datname));
1921 avdb->adw_frozenxid = pgdatabase->datfrozenxid;
1922 avdb->adw_minmulti = pgdatabase->datminmxid;
1923 /* this gets set later: */
1924 avdb->adw_entry = NULL;
1925
1926 dblist = lappend(dblist, avdb);
1927 MemoryContextSwitchTo(oldcxt);
1928 }
1929
1930 heap_endscan(scan);
1931 heap_close(rel, AccessShareLock);
1932
1933 CommitTransactionCommand();
1934
1935 return dblist;
1936 }
1937
1938 /*
1939 * Process a database table-by-table
1940 *
1941 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
1942 * order not to ignore shutdown commands for too long.
1943 */
1944 static void
do_autovacuum(void)1945 do_autovacuum(void)
1946 {
1947 Relation classRel;
1948 HeapTuple tuple;
1949 HeapScanDesc relScan;
1950 Form_pg_database dbForm;
1951 List *table_oids = NIL;
1952 List *orphan_oids = NIL;
1953 HASHCTL ctl;
1954 HTAB *table_toast_map;
1955 ListCell *volatile cell;
1956 PgStat_StatDBEntry *shared;
1957 PgStat_StatDBEntry *dbentry;
1958 BufferAccessStrategy bstrategy;
1959 ScanKeyData key;
1960 TupleDesc pg_class_desc;
1961 int effective_multixact_freeze_max_age;
1962 bool did_vacuum = false;
1963 bool found_concurrent_worker = false;
1964 int i;
1965
1966 /*
1967 * StartTransactionCommand and CommitTransactionCommand will automatically
1968 * switch to other contexts. We need this one to keep the list of
1969 * relations to vacuum/analyze across transactions.
1970 */
1971 AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,
1972 "AV worker",
1973 ALLOCSET_DEFAULT_SIZES);
1974 MemoryContextSwitchTo(AutovacMemCxt);
1975
1976 /*
1977 * may be NULL if we couldn't find an entry (only happens if we are
1978 * forcing a vacuum for anti-wrap purposes).
1979 */
1980 dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
1981
1982 /* Start a transaction so our commands have one to play into. */
1983 StartTransactionCommand();
1984
1985 /*
1986 * Clean up any dead statistics collector entries for this DB. We always
1987 * want to do this exactly once per DB-processing cycle, even if we find
1988 * nothing worth vacuuming in the database.
1989 */
1990 pgstat_vacuum_stat();
1991
1992 /*
1993 * Compute the multixact age for which freezing is urgent. This is
1994 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1995 * short of multixact member space.
1996 */
1997 effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1998
1999 /*
2000 * Find the pg_database entry and select the default freeze ages. We use
2001 * zero in template and nonconnectable databases, else the system-wide
2002 * default.
2003 */
2004 tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
2005 if (!HeapTupleIsValid(tuple))
2006 elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
2007 dbForm = (Form_pg_database) GETSTRUCT(tuple);
2008
2009 if (dbForm->datistemplate || !dbForm->datallowconn)
2010 {
2011 default_freeze_min_age = 0;
2012 default_freeze_table_age = 0;
2013 default_multixact_freeze_min_age = 0;
2014 default_multixact_freeze_table_age = 0;
2015 }
2016 else
2017 {
2018 default_freeze_min_age = vacuum_freeze_min_age;
2019 default_freeze_table_age = vacuum_freeze_table_age;
2020 default_multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
2021 default_multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
2022 }
2023
2024 ReleaseSysCache(tuple);
2025
2026 /* StartTransactionCommand changed elsewhere */
2027 MemoryContextSwitchTo(AutovacMemCxt);
2028
2029 /* The database hash where pgstat keeps shared relations */
2030 shared = pgstat_fetch_stat_dbentry(InvalidOid);
2031
2032 classRel = heap_open(RelationRelationId, AccessShareLock);
2033
2034 /* create a copy so we can use it after closing pg_class */
2035 pg_class_desc = CreateTupleDescCopy(RelationGetDescr(classRel));
2036
2037 /* create hash table for toast <-> main relid mapping */
2038 MemSet(&ctl, 0, sizeof(ctl));
2039 ctl.keysize = sizeof(Oid);
2040 ctl.entrysize = sizeof(av_relation);
2041
2042 table_toast_map = hash_create("TOAST to main relid map",
2043 100,
2044 &ctl,
2045 HASH_ELEM | HASH_BLOBS);
2046
2047 /*
2048 * Scan pg_class to determine which tables to vacuum.
2049 *
2050 * We do this in two passes: on the first one we collect the list of plain
2051 * relations and materialized views, and on the second one we collect
2052 * TOAST tables. The reason for doing the second pass is that during it we
2053 * want to use the main relation's pg_class.reloptions entry if the TOAST
2054 * table does not have any, and we cannot obtain it unless we know
2055 * beforehand what's the main table OID.
2056 *
2057 * We need to check TOAST tables separately because in cases with short,
2058 * wide tables there might be proportionally much more activity in the
2059 * TOAST table than in its parent.
2060 */
2061 relScan = heap_beginscan_catalog(classRel, 0, NULL);
2062
2063 /*
2064 * On the first pass, we collect main tables to vacuum, and also the main
2065 * table relid to TOAST relid mapping.
2066 */
2067 while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2068 {
2069 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2070 PgStat_StatTabEntry *tabentry;
2071 AutoVacOpts *relopts;
2072 Oid relid;
2073 bool dovacuum;
2074 bool doanalyze;
2075 bool wraparound;
2076
2077 if (classForm->relkind != RELKIND_RELATION &&
2078 classForm->relkind != RELKIND_MATVIEW)
2079 continue;
2080
2081 relid = HeapTupleGetOid(tuple);
2082
2083 /*
2084 * Check if it is a temp table (presumably, of some other backend's).
2085 * We cannot safely process other backends' temp tables.
2086 */
2087 if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2088 {
2089 int backendID;
2090 PGPROC *proc;
2091
2092 backendID = GetTempNamespaceBackendId(classForm->relnamespace);
2093
2094 /*
2095 * We just ignore it if the owning backend is still active in the
2096 * same database.
2097 */
2098 if (backendID != InvalidBackendId &&
2099 (backendID == MyBackendId ||
2100 (proc = BackendIdGetProc(backendID)) == NULL ||
2101 proc->databaseId != MyDatabaseId))
2102 {
2103 /*
2104 * The table seems to be orphaned -- although it might be that
2105 * the owning backend has already deleted it and exited; our
2106 * pg_class scan snapshot is not necessarily up-to-date
2107 * anymore, so we could be looking at a committed-dead entry.
2108 * Remember it so we can try to delete it later.
2109 */
2110 orphan_oids = lappend_oid(orphan_oids, relid);
2111 }
2112 continue;
2113 }
2114
2115 /* Fetch reloptions and the pgstat entry for this table */
2116 relopts = extract_autovac_opts(tuple, pg_class_desc);
2117 tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2118 shared, dbentry);
2119
2120 /* Check if it needs vacuum or analyze */
2121 relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2122 effective_multixact_freeze_max_age,
2123 &dovacuum, &doanalyze, &wraparound);
2124
2125 /* Relations that need work are added to table_oids */
2126 if (dovacuum || doanalyze)
2127 table_oids = lappend_oid(table_oids, relid);
2128
2129 /*
2130 * Remember TOAST associations for the second pass. Note: we must do
2131 * this whether or not the table is going to be vacuumed, because we
2132 * don't automatically vacuum toast tables along the parent table.
2133 */
2134 if (OidIsValid(classForm->reltoastrelid))
2135 {
2136 av_relation *hentry;
2137 bool found;
2138
2139 hentry = hash_search(table_toast_map,
2140 &classForm->reltoastrelid,
2141 HASH_ENTER, &found);
2142
2143 if (!found)
2144 {
2145 /* hash_search already filled in the key */
2146 hentry->ar_relid = relid;
2147 hentry->ar_hasrelopts = false;
2148 if (relopts != NULL)
2149 {
2150 hentry->ar_hasrelopts = true;
2151 memcpy(&hentry->ar_reloptions, relopts,
2152 sizeof(AutoVacOpts));
2153 }
2154 }
2155 }
2156 }
2157
2158 heap_endscan(relScan);
2159
2160 /* second pass: check TOAST tables */
2161 ScanKeyInit(&key,
2162 Anum_pg_class_relkind,
2163 BTEqualStrategyNumber, F_CHAREQ,
2164 CharGetDatum(RELKIND_TOASTVALUE));
2165
2166 relScan = heap_beginscan_catalog(classRel, 1, &key);
2167 while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
2168 {
2169 Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
2170 PgStat_StatTabEntry *tabentry;
2171 Oid relid;
2172 AutoVacOpts *relopts = NULL;
2173 bool dovacuum;
2174 bool doanalyze;
2175 bool wraparound;
2176
2177 /*
2178 * We cannot safely process other backends' temp tables, so skip 'em.
2179 */
2180 if (classForm->relpersistence == RELPERSISTENCE_TEMP)
2181 continue;
2182
2183 relid = HeapTupleGetOid(tuple);
2184
2185 /*
2186 * fetch reloptions -- if this toast table does not have them, try the
2187 * main rel
2188 */
2189 relopts = extract_autovac_opts(tuple, pg_class_desc);
2190 if (relopts == NULL)
2191 {
2192 av_relation *hentry;
2193 bool found;
2194
2195 hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2196 if (found && hentry->ar_hasrelopts)
2197 relopts = &hentry->ar_reloptions;
2198 }
2199
2200 /* Fetch the pgstat entry for this table */
2201 tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2202 shared, dbentry);
2203
2204 relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
2205 effective_multixact_freeze_max_age,
2206 &dovacuum, &doanalyze, &wraparound);
2207
2208 /* ignore analyze for toast tables */
2209 if (dovacuum)
2210 table_oids = lappend_oid(table_oids, relid);
2211 }
2212
2213 heap_endscan(relScan);
2214 heap_close(classRel, AccessShareLock);
2215
2216 /*
2217 * Recheck orphan temporary tables, and if they still seem orphaned, drop
2218 * them. We'll eat a transaction per dropped table, which might seem
2219 * excessive, but we should only need to do anything as a result of a
2220 * previous backend crash, so this should not happen often enough to
2221 * justify "optimizing". Using separate transactions ensures that we
2222 * don't bloat the lock table if there are many temp tables to be dropped,
2223 * and it ensures that we don't lose work if a deletion attempt fails.
2224 */
2225 foreach(cell, orphan_oids)
2226 {
2227 Oid relid = lfirst_oid(cell);
2228 Form_pg_class classForm;
2229 int backendID;
2230 ObjectAddress object;
2231
2232 /*
2233 * Check for user-requested abort.
2234 */
2235 CHECK_FOR_INTERRUPTS();
2236
2237 /*
2238 * Try to lock the table. If we can't get the lock immediately,
2239 * somebody else is using (or dropping) the table, so it's not our
2240 * concern anymore. Having the lock prevents race conditions below.
2241 */
2242 if (!ConditionalLockRelationOid(relid, AccessExclusiveLock))
2243 continue;
2244
2245 /*
2246 * Re-fetch the pg_class tuple and re-check whether it still seems to
2247 * be an orphaned temp table. If it's not there or no longer the same
2248 * relation, ignore it.
2249 */
2250 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2251 if (!HeapTupleIsValid(tuple))
2252 {
2253 /* be sure to drop useless lock so we don't bloat lock table */
2254 UnlockRelationOid(relid, AccessExclusiveLock);
2255 continue;
2256 }
2257 classForm = (Form_pg_class) GETSTRUCT(tuple);
2258
2259 /*
2260 * Make all the same tests made in the loop above. In event of OID
2261 * counter wraparound, the pg_class entry we have now might be
2262 * completely unrelated to the one we saw before.
2263 */
2264 if (!((classForm->relkind == RELKIND_RELATION ||
2265 classForm->relkind == RELKIND_MATVIEW) &&
2266 classForm->relpersistence == RELPERSISTENCE_TEMP))
2267 {
2268 UnlockRelationOid(relid, AccessExclusiveLock);
2269 continue;
2270 }
2271 backendID = GetTempNamespaceBackendId(classForm->relnamespace);
2272 if (!(backendID != InvalidBackendId &&
2273 (backendID == MyBackendId ||
2274 BackendIdGetProc(backendID) == NULL)))
2275 {
2276 UnlockRelationOid(relid, AccessExclusiveLock);
2277 continue;
2278 }
2279
2280 /* OK, let's delete it */
2281 ereport(LOG,
2282 (errmsg("autovacuum: dropping orphan temp table \"%s.%s.%s\"",
2283 get_database_name(MyDatabaseId),
2284 get_namespace_name(classForm->relnamespace),
2285 NameStr(classForm->relname))));
2286
2287 object.classId = RelationRelationId;
2288 object.objectId = relid;
2289 object.objectSubId = 0;
2290 performDeletion(&object, DROP_CASCADE,
2291 PERFORM_DELETION_INTERNAL |
2292 PERFORM_DELETION_QUIETLY |
2293 PERFORM_DELETION_SKIP_EXTENSIONS);
2294
2295 /*
2296 * To commit the deletion, end current transaction and start a new
2297 * one. Note this also releases the lock we took.
2298 */
2299 CommitTransactionCommand();
2300 StartTransactionCommand();
2301
2302 /* StartTransactionCommand changed current memory context */
2303 MemoryContextSwitchTo(AutovacMemCxt);
2304 }
2305
2306 /*
2307 * Create a buffer access strategy object for VACUUM to use. We want to
2308 * use the same one across all the vacuum operations we perform, since the
2309 * point is for VACUUM not to blow out the shared cache.
2310 */
2311 bstrategy = GetAccessStrategy(BAS_VACUUM);
2312
2313 /*
2314 * create a memory context to act as fake PortalContext, so that the
2315 * contexts created in the vacuum code are cleaned up for each table.
2316 */
2317 PortalContext = AllocSetContextCreate(AutovacMemCxt,
2318 "Autovacuum Portal",
2319 ALLOCSET_DEFAULT_SIZES);
2320
2321 /*
2322 * Perform operations on collected tables.
2323 */
2324 foreach(cell, table_oids)
2325 {
2326 Oid relid = lfirst_oid(cell);
2327 HeapTuple classTup;
2328 autovac_table *tab;
2329 bool isshared;
2330 bool skipit;
2331 int stdVacuumCostDelay;
2332 int stdVacuumCostLimit;
2333 dlist_iter iter;
2334
2335 CHECK_FOR_INTERRUPTS();
2336
2337 /*
2338 * Check for config changes before processing each collected table.
2339 */
2340 if (got_SIGHUP)
2341 {
2342 got_SIGHUP = false;
2343 ProcessConfigFile(PGC_SIGHUP);
2344
2345 /*
2346 * You might be tempted to bail out if we see autovacuum is now
2347 * disabled. Must resist that temptation -- this might be a
2348 * for-wraparound emergency worker, in which case that would be
2349 * entirely inappropriate.
2350 */
2351 }
2352
2353 /*
2354 * Find out whether the table is shared or not. (It's slightly
2355 * annoying to fetch the syscache entry just for this, but in typical
2356 * cases it adds little cost because table_recheck_autovac would
2357 * refetch the entry anyway. We could buy that back by copying the
2358 * tuple here and passing it to table_recheck_autovac, but that
2359 * increases the odds of that function working with stale data.)
2360 */
2361 classTup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
2362 if (!HeapTupleIsValid(classTup))
2363 continue; /* somebody deleted the rel, forget it */
2364 isshared = ((Form_pg_class) GETSTRUCT(classTup))->relisshared;
2365 ReleaseSysCache(classTup);
2366
2367 /*
2368 * Hold schedule lock from here until we've claimed the table. We
2369 * also need the AutovacuumLock to walk the worker array, but that one
2370 * can just be a shared lock.
2371 */
2372 LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2373 LWLockAcquire(AutovacuumLock, LW_SHARED);
2374
2375 /*
2376 * Check whether the table is being vacuumed concurrently by another
2377 * worker.
2378 */
2379 skipit = false;
2380 dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers)
2381 {
2382 WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur);
2383
2384 /* ignore myself */
2385 if (worker == MyWorkerInfo)
2386 continue;
2387
2388 /* ignore workers in other databases (unless table is shared) */
2389 if (!worker->wi_sharedrel && worker->wi_dboid != MyDatabaseId)
2390 continue;
2391
2392 if (worker->wi_tableoid == relid)
2393 {
2394 skipit = true;
2395 found_concurrent_worker = true;
2396 break;
2397 }
2398 }
2399 LWLockRelease(AutovacuumLock);
2400 if (skipit)
2401 {
2402 LWLockRelease(AutovacuumScheduleLock);
2403 continue;
2404 }
2405
2406 /*
2407 * Store the table's OID in shared memory before releasing the
2408 * schedule lock, so that other workers don't try to vacuum it
2409 * concurrently. (We claim it here so as not to hold
2410 * AutovacuumScheduleLock while rechecking the stats.)
2411 */
2412 MyWorkerInfo->wi_tableoid = relid;
2413 MyWorkerInfo->wi_sharedrel = isshared;
2414 LWLockRelease(AutovacuumScheduleLock);
2415
2416 /*
2417 * Check whether pgstat data still says we need to vacuum this table.
2418 * It could have changed if something else processed the table while
2419 * we weren't looking.
2420 *
2421 * Note: we have a special case in pgstat code to ensure that the
2422 * stats we read are as up-to-date as possible, to avoid the problem
2423 * that somebody just finished vacuuming this table. The window to
2424 * the race condition is not closed but it is very small.
2425 */
2426 MemoryContextSwitchTo(AutovacMemCxt);
2427 tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc,
2428 effective_multixact_freeze_max_age);
2429 if (tab == NULL)
2430 {
2431 /* someone else vacuumed the table, or it went away */
2432 LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2433 MyWorkerInfo->wi_tableoid = InvalidOid;
2434 MyWorkerInfo->wi_sharedrel = false;
2435 LWLockRelease(AutovacuumScheduleLock);
2436 continue;
2437 }
2438
2439 /*
2440 * Remember the prevailing values of the vacuum cost GUCs. We have to
2441 * restore these at the bottom of the loop, else we'll compute wrong
2442 * values in the next iteration of autovac_balance_cost().
2443 */
2444 stdVacuumCostDelay = VacuumCostDelay;
2445 stdVacuumCostLimit = VacuumCostLimit;
2446
2447 /* Must hold AutovacuumLock while mucking with cost balance info */
2448 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2449
2450 /* advertise my cost delay parameters for the balancing algorithm */
2451 MyWorkerInfo->wi_dobalance = tab->at_dobalance;
2452 MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay;
2453 MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit;
2454 MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit;
2455
2456 /* do a balance */
2457 autovac_balance_cost();
2458
2459 /* set the active cost parameters from the result of that */
2460 AutoVacuumUpdateDelay();
2461
2462 /* done */
2463 LWLockRelease(AutovacuumLock);
2464
2465 /* clean up memory before each iteration */
2466 MemoryContextResetAndDeleteChildren(PortalContext);
2467
2468 /*
2469 * Save the relation name for a possible error message, to avoid a
2470 * catalog lookup in case of an error. If any of these return NULL,
2471 * then the relation has been dropped since last we checked; skip it.
2472 * Note: they must live in a long-lived memory context because we call
2473 * vacuum and analyze in different transactions.
2474 */
2475
2476 tab->at_relname = get_rel_name(tab->at_relid);
2477 tab->at_nspname = get_namespace_name(get_rel_namespace(tab->at_relid));
2478 tab->at_datname = get_database_name(MyDatabaseId);
2479 if (!tab->at_relname || !tab->at_nspname || !tab->at_datname)
2480 goto deleted;
2481
2482 /*
2483 * We will abort vacuuming the current table if something errors out,
2484 * and continue with the next one in schedule; in particular, this
2485 * happens if we are interrupted with SIGINT.
2486 */
2487 PG_TRY();
2488 {
2489 /* Use PortalContext for any per-table allocations */
2490 MemoryContextSwitchTo(PortalContext);
2491
2492 /* have at it */
2493 autovacuum_do_vac_analyze(tab, bstrategy);
2494
2495 /*
2496 * Clear a possible query-cancel signal, to avoid a late reaction
2497 * to an automatically-sent signal because of vacuuming the
2498 * current table (we're done with it, so it would make no sense to
2499 * cancel at this point.)
2500 */
2501 QueryCancelPending = false;
2502 }
2503 PG_CATCH();
2504 {
2505 /*
2506 * Abort the transaction, start a new one, and proceed with the
2507 * next table in our list.
2508 */
2509 HOLD_INTERRUPTS();
2510 if (tab->at_vacoptions & VACOPT_VACUUM)
2511 errcontext("automatic vacuum of table \"%s.%s.%s\"",
2512 tab->at_datname, tab->at_nspname, tab->at_relname);
2513 else
2514 errcontext("automatic analyze of table \"%s.%s.%s\"",
2515 tab->at_datname, tab->at_nspname, tab->at_relname);
2516 EmitErrorReport();
2517
2518 /* this resets the PGXACT flags too */
2519 AbortOutOfAnyTransaction();
2520 FlushErrorState();
2521 MemoryContextResetAndDeleteChildren(PortalContext);
2522
2523 /* restart our transaction for the following operations */
2524 StartTransactionCommand();
2525 RESUME_INTERRUPTS();
2526 }
2527 PG_END_TRY();
2528
2529 /* Make sure we're back in AutovacMemCxt */
2530 MemoryContextSwitchTo(AutovacMemCxt);
2531
2532 did_vacuum = true;
2533
2534 /* the PGXACT flags are reset at the next end of transaction */
2535
2536 /* be tidy */
2537 deleted:
2538 if (tab->at_datname != NULL)
2539 pfree(tab->at_datname);
2540 if (tab->at_nspname != NULL)
2541 pfree(tab->at_nspname);
2542 if (tab->at_relname != NULL)
2543 pfree(tab->at_relname);
2544 pfree(tab);
2545
2546 /*
2547 * Remove my info from shared memory. We could, but intentionally
2548 * don't, clear wi_cost_limit and friends --- this is on the
2549 * assumption that we probably have more to do with similar cost
2550 * settings, so we don't want to give up our share of I/O for a very
2551 * short interval and thereby thrash the global balance.
2552 */
2553 LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE);
2554 MyWorkerInfo->wi_tableoid = InvalidOid;
2555 MyWorkerInfo->wi_sharedrel = false;
2556 LWLockRelease(AutovacuumScheduleLock);
2557
2558 /* restore vacuum cost GUCs for the next iteration */
2559 VacuumCostDelay = stdVacuumCostDelay;
2560 VacuumCostLimit = stdVacuumCostLimit;
2561 }
2562
2563 /*
2564 * Perform additional work items, as requested by backends.
2565 */
2566 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2567 for (i = 0; i < NUM_WORKITEMS; i++)
2568 {
2569 AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
2570
2571 if (!workitem->avw_used)
2572 continue;
2573 if (workitem->avw_active)
2574 continue;
2575 if (workitem->avw_database != MyDatabaseId)
2576 continue;
2577
2578 /* claim this one, and release lock while performing it */
2579 workitem->avw_active = true;
2580 LWLockRelease(AutovacuumLock);
2581
2582 perform_work_item(workitem);
2583
2584 /*
2585 * Check for config changes before acquiring lock for further jobs.
2586 */
2587 CHECK_FOR_INTERRUPTS();
2588 if (got_SIGHUP)
2589 {
2590 got_SIGHUP = false;
2591 ProcessConfigFile(PGC_SIGHUP);
2592 }
2593
2594 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
2595
2596 /* and mark it done */
2597 workitem->avw_active = false;
2598 workitem->avw_used = false;
2599 }
2600 LWLockRelease(AutovacuumLock);
2601
2602 /*
2603 * We leak table_toast_map here (among other things), but since we're
2604 * going away soon, it's not a problem.
2605 */
2606
2607 /*
2608 * Update pg_database.datfrozenxid, and truncate pg_xact if possible. We
2609 * only need to do this once, not after each table.
2610 *
2611 * Even if we didn't vacuum anything, it may still be important to do
2612 * this, because one indirect effect of vac_update_datfrozenxid() is to
2613 * update ShmemVariableCache->xidVacLimit. That might need to be done
2614 * even if we haven't vacuumed anything, because relations with older
2615 * relfrozenxid values or other databases with older datfrozenxid values
2616 * might have been dropped, allowing xidVacLimit to advance.
2617 *
2618 * However, it's also important not to do this blindly in all cases,
2619 * because when autovacuum=off this will restart the autovacuum launcher.
2620 * If we're not careful, an infinite loop can result, where workers find
2621 * no work to do and restart the launcher, which starts another worker in
2622 * the same database that finds no work to do. To prevent that, we skip
2623 * this if (1) we found no work to do and (2) we skipped at least one
2624 * table due to concurrent autovacuum activity. In that case, the other
2625 * worker has already done it, or will do so when it finishes.
2626 */
2627 if (did_vacuum || !found_concurrent_worker)
2628 vac_update_datfrozenxid();
2629
2630 /* Finally close out the last transaction. */
2631 CommitTransactionCommand();
2632 }
2633
2634 /*
2635 * Execute a previously registered work item.
2636 */
2637 static void
perform_work_item(AutoVacuumWorkItem * workitem)2638 perform_work_item(AutoVacuumWorkItem *workitem)
2639 {
2640 char *cur_datname = NULL;
2641 char *cur_nspname = NULL;
2642 char *cur_relname = NULL;
2643
2644 /*
2645 * Note we do not store table info in MyWorkerInfo, since this is not
2646 * vacuuming proper.
2647 */
2648
2649 /*
2650 * Save the relation name for a possible error message, to avoid a catalog
2651 * lookup in case of an error. If any of these return NULL, then the
2652 * relation has been dropped since last we checked; skip it.
2653 */
2654 Assert(CurrentMemoryContext == AutovacMemCxt);
2655
2656 cur_relname = get_rel_name(workitem->avw_relation);
2657 cur_nspname = get_namespace_name(get_rel_namespace(workitem->avw_relation));
2658 cur_datname = get_database_name(MyDatabaseId);
2659 if (!cur_relname || !cur_nspname || !cur_datname)
2660 goto deleted2;
2661
2662 autovac_report_workitem(workitem, cur_nspname, cur_relname);
2663
2664 /* clean up memory before each work item */
2665 MemoryContextResetAndDeleteChildren(PortalContext);
2666
2667 /*
2668 * We will abort the current work item if something errors out, and
2669 * continue with the next one; in particular, this happens if we are
2670 * interrupted with SIGINT. Note that this means that the work item list
2671 * can be lossy.
2672 */
2673 PG_TRY();
2674 {
2675 /* Use PortalContext for any per-work-item allocations */
2676 MemoryContextSwitchTo(PortalContext);
2677
2678 /* have at it */
2679 switch (workitem->avw_type)
2680 {
2681 case AVW_BRINSummarizeRange:
2682 DirectFunctionCall2(brin_summarize_range,
2683 ObjectIdGetDatum(workitem->avw_relation),
2684 Int64GetDatum((int64) workitem->avw_blockNumber));
2685 break;
2686 default:
2687 elog(WARNING, "unrecognized work item found: type %d",
2688 workitem->avw_type);
2689 break;
2690 }
2691
2692 /*
2693 * Clear a possible query-cancel signal, to avoid a late reaction to
2694 * an automatically-sent signal because of vacuuming the current table
2695 * (we're done with it, so it would make no sense to cancel at this
2696 * point.)
2697 */
2698 QueryCancelPending = false;
2699 }
2700 PG_CATCH();
2701 {
2702 /*
2703 * Abort the transaction, start a new one, and proceed with the next
2704 * table in our list.
2705 */
2706 HOLD_INTERRUPTS();
2707 errcontext("processing work entry for relation \"%s.%s.%s\"",
2708 cur_datname, cur_nspname, cur_relname);
2709 EmitErrorReport();
2710
2711 /* this resets the PGXACT flags too */
2712 AbortOutOfAnyTransaction();
2713 FlushErrorState();
2714 MemoryContextResetAndDeleteChildren(PortalContext);
2715
2716 /* restart our transaction for the following operations */
2717 StartTransactionCommand();
2718 RESUME_INTERRUPTS();
2719 }
2720 PG_END_TRY();
2721
2722 /* Make sure we're back in AutovacMemCxt */
2723 MemoryContextSwitchTo(AutovacMemCxt);
2724
2725 /* We intentionally do not set did_vacuum here */
2726
2727 /* be tidy */
2728 deleted2:
2729 if (cur_datname)
2730 pfree(cur_datname);
2731 if (cur_nspname)
2732 pfree(cur_nspname);
2733 if (cur_relname)
2734 pfree(cur_relname);
2735 }
2736
2737 /*
2738 * extract_autovac_opts
2739 *
2740 * Given a relation's pg_class tuple, return the AutoVacOpts portion of
2741 * reloptions, if set; otherwise, return NULL.
2742 */
2743 static AutoVacOpts *
extract_autovac_opts(HeapTuple tup,TupleDesc pg_class_desc)2744 extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
2745 {
2746 bytea *relopts;
2747 AutoVacOpts *av;
2748
2749 Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
2750 ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
2751 ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
2752
2753 relopts = extractRelOptions(tup, pg_class_desc, NULL);
2754 if (relopts == NULL)
2755 return NULL;
2756
2757 av = palloc(sizeof(AutoVacOpts));
2758 memcpy(av, &(((StdRdOptions *) relopts)->autovacuum), sizeof(AutoVacOpts));
2759 pfree(relopts);
2760
2761 return av;
2762 }
2763
2764 /*
2765 * get_pgstat_tabentry_relid
2766 *
2767 * Fetch the pgstat entry of a table, either local to a database or shared.
2768 */
2769 static PgStat_StatTabEntry *
get_pgstat_tabentry_relid(Oid relid,bool isshared,PgStat_StatDBEntry * shared,PgStat_StatDBEntry * dbentry)2770 get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared,
2771 PgStat_StatDBEntry *dbentry)
2772 {
2773 PgStat_StatTabEntry *tabentry = NULL;
2774
2775 if (isshared)
2776 {
2777 if (PointerIsValid(shared))
2778 tabentry = hash_search(shared->tables, &relid,
2779 HASH_FIND, NULL);
2780 }
2781 else if (PointerIsValid(dbentry))
2782 tabentry = hash_search(dbentry->tables, &relid,
2783 HASH_FIND, NULL);
2784
2785 return tabentry;
2786 }
2787
2788 /*
2789 * table_recheck_autovac
2790 *
2791 * Recheck whether a table still needs vacuum or analyze. Return value is a
2792 * valid autovac_table pointer if it does, NULL otherwise.
2793 *
2794 * Note that the returned autovac_table does not have the name fields set.
2795 */
2796 static autovac_table *
table_recheck_autovac(Oid relid,HTAB * table_toast_map,TupleDesc pg_class_desc,int effective_multixact_freeze_max_age)2797 table_recheck_autovac(Oid relid, HTAB *table_toast_map,
2798 TupleDesc pg_class_desc,
2799 int effective_multixact_freeze_max_age)
2800 {
2801 Form_pg_class classForm;
2802 HeapTuple classTup;
2803 bool dovacuum;
2804 bool doanalyze;
2805 autovac_table *tab = NULL;
2806 PgStat_StatTabEntry *tabentry;
2807 PgStat_StatDBEntry *shared;
2808 PgStat_StatDBEntry *dbentry;
2809 bool wraparound;
2810 AutoVacOpts *avopts;
2811
2812 /* use fresh stats */
2813 autovac_refresh_stats();
2814
2815 shared = pgstat_fetch_stat_dbentry(InvalidOid);
2816 dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
2817
2818 /* fetch the relation's relcache entry */
2819 classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2820 if (!HeapTupleIsValid(classTup))
2821 return NULL;
2822 classForm = (Form_pg_class) GETSTRUCT(classTup);
2823
2824 /*
2825 * Get the applicable reloptions. If it is a TOAST table, try to get the
2826 * main table reloptions if the toast table itself doesn't have.
2827 */
2828 avopts = extract_autovac_opts(classTup, pg_class_desc);
2829 if (classForm->relkind == RELKIND_TOASTVALUE &&
2830 avopts == NULL && table_toast_map != NULL)
2831 {
2832 av_relation *hentry;
2833 bool found;
2834
2835 hentry = hash_search(table_toast_map, &relid, HASH_FIND, &found);
2836 if (found && hentry->ar_hasrelopts)
2837 avopts = &hentry->ar_reloptions;
2838 }
2839
2840 /* fetch the pgstat table entry */
2841 tabentry = get_pgstat_tabentry_relid(relid, classForm->relisshared,
2842 shared, dbentry);
2843
2844 relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
2845 effective_multixact_freeze_max_age,
2846 &dovacuum, &doanalyze, &wraparound);
2847
2848 /* ignore ANALYZE for toast tables */
2849 if (classForm->relkind == RELKIND_TOASTVALUE)
2850 doanalyze = false;
2851
2852 /* OK, it needs something done */
2853 if (doanalyze || dovacuum)
2854 {
2855 int freeze_min_age;
2856 int freeze_table_age;
2857 int multixact_freeze_min_age;
2858 int multixact_freeze_table_age;
2859 int vac_cost_limit;
2860 int vac_cost_delay;
2861 int log_min_duration;
2862
2863 /*
2864 * Calculate the vacuum cost parameters and the freeze ages. If there
2865 * are options set in pg_class.reloptions, use them; in the case of a
2866 * toast table, try the main table too. Otherwise use the GUC
2867 * defaults, autovacuum's own first and plain vacuum second.
2868 */
2869
2870 /* -1 in autovac setting means use plain vacuum_cost_delay */
2871 vac_cost_delay = (avopts && avopts->vacuum_cost_delay >= 0)
2872 ? avopts->vacuum_cost_delay
2873 : (autovacuum_vac_cost_delay >= 0)
2874 ? autovacuum_vac_cost_delay
2875 : VacuumCostDelay;
2876
2877 /* 0 or -1 in autovac setting means use plain vacuum_cost_limit */
2878 vac_cost_limit = (avopts && avopts->vacuum_cost_limit > 0)
2879 ? avopts->vacuum_cost_limit
2880 : (autovacuum_vac_cost_limit > 0)
2881 ? autovacuum_vac_cost_limit
2882 : VacuumCostLimit;
2883
2884 /* -1 in autovac setting means use log_autovacuum_min_duration */
2885 log_min_duration = (avopts && avopts->log_min_duration >= 0)
2886 ? avopts->log_min_duration
2887 : Log_autovacuum_min_duration;
2888
2889 /* these do not have autovacuum-specific settings */
2890 freeze_min_age = (avopts && avopts->freeze_min_age >= 0)
2891 ? avopts->freeze_min_age
2892 : default_freeze_min_age;
2893
2894 freeze_table_age = (avopts && avopts->freeze_table_age >= 0)
2895 ? avopts->freeze_table_age
2896 : default_freeze_table_age;
2897
2898 multixact_freeze_min_age = (avopts &&
2899 avopts->multixact_freeze_min_age >= 0)
2900 ? avopts->multixact_freeze_min_age
2901 : default_multixact_freeze_min_age;
2902
2903 multixact_freeze_table_age = (avopts &&
2904 avopts->multixact_freeze_table_age >= 0)
2905 ? avopts->multixact_freeze_table_age
2906 : default_multixact_freeze_table_age;
2907
2908 tab = palloc(sizeof(autovac_table));
2909 tab->at_relid = relid;
2910 tab->at_sharedrel = classForm->relisshared;
2911 tab->at_vacoptions = VACOPT_SKIPTOAST |
2912 (dovacuum ? VACOPT_VACUUM : 0) |
2913 (doanalyze ? VACOPT_ANALYZE : 0) |
2914 (!wraparound ? VACOPT_NOWAIT : 0);
2915 tab->at_params.freeze_min_age = freeze_min_age;
2916 tab->at_params.freeze_table_age = freeze_table_age;
2917 tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;
2918 tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age;
2919 tab->at_params.is_wraparound = wraparound;
2920 tab->at_params.log_min_duration = log_min_duration;
2921 tab->at_vacuum_cost_limit = vac_cost_limit;
2922 tab->at_vacuum_cost_delay = vac_cost_delay;
2923 tab->at_relname = NULL;
2924 tab->at_nspname = NULL;
2925 tab->at_datname = NULL;
2926
2927 /*
2928 * If any of the cost delay parameters has been set individually for
2929 * this table, disable the balancing algorithm.
2930 */
2931 tab->at_dobalance =
2932 !(avopts && (avopts->vacuum_cost_limit > 0 ||
2933 avopts->vacuum_cost_delay > 0));
2934 }
2935
2936 heap_freetuple(classTup);
2937
2938 return tab;
2939 }
2940
2941 /*
2942 * relation_needs_vacanalyze
2943 *
2944 * Check whether a relation needs to be vacuumed or analyzed; return each into
2945 * "dovacuum" and "doanalyze", respectively. Also return whether the vacuum is
2946 * being forced because of Xid or multixact wraparound.
2947 *
2948 * relopts is a pointer to the AutoVacOpts options (either for itself in the
2949 * case of a plain table, or for either itself or its parent table in the case
2950 * of a TOAST table), NULL if none; tabentry is the pgstats entry, which can be
2951 * NULL.
2952 *
2953 * A table needs to be vacuumed if the number of dead tuples exceeds a
2954 * threshold. This threshold is calculated as
2955 *
2956 * threshold = vac_base_thresh + vac_scale_factor * reltuples
2957 *
2958 * For analyze, the analysis done is that the number of tuples inserted,
2959 * deleted and updated since the last analyze exceeds a threshold calculated
2960 * in the same fashion as above. Note that the collector actually stores
2961 * the number of tuples (both live and dead) that there were as of the last
2962 * analyze. This is asymmetric to the VACUUM case.
2963 *
2964 * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
2965 * transactions back, and if its relminmxid is more than
2966 * multixact_freeze_max_age multixacts back.
2967 *
2968 * A table whose autovacuum_enabled option is false is
2969 * automatically skipped (unless we have to vacuum it due to freeze_max_age).
2970 * Thus autovacuum can be disabled for specific tables. Also, when the stats
2971 * collector does not have data about a table, it will be skipped.
2972 *
2973 * A table whose vac_base_thresh value is < 0 takes the base value from the
2974 * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
2975 * value < 0 is substituted with the value of
2976 * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
2977 */
2978 static void
relation_needs_vacanalyze(Oid relid,AutoVacOpts * relopts,Form_pg_class classForm,PgStat_StatTabEntry * tabentry,int effective_multixact_freeze_max_age,bool * dovacuum,bool * doanalyze,bool * wraparound)2979 relation_needs_vacanalyze(Oid relid,
2980 AutoVacOpts *relopts,
2981 Form_pg_class classForm,
2982 PgStat_StatTabEntry *tabentry,
2983 int effective_multixact_freeze_max_age,
2984 /* output params below */
2985 bool *dovacuum,
2986 bool *doanalyze,
2987 bool *wraparound)
2988 {
2989 bool force_vacuum;
2990 bool av_enabled;
2991 float4 reltuples; /* pg_class.reltuples */
2992
2993 /* constants from reloptions or GUC variables */
2994 int vac_base_thresh,
2995 anl_base_thresh;
2996 float4 vac_scale_factor,
2997 anl_scale_factor;
2998
2999 /* thresholds calculated from above constants */
3000 float4 vacthresh,
3001 anlthresh;
3002
3003 /* number of vacuum (resp. analyze) tuples at this time */
3004 float4 vactuples,
3005 anltuples;
3006
3007 /* freeze parameters */
3008 int freeze_max_age;
3009 int multixact_freeze_max_age;
3010 TransactionId xidForceLimit;
3011 MultiXactId multiForceLimit;
3012
3013 AssertArg(classForm != NULL);
3014 AssertArg(OidIsValid(relid));
3015
3016 /*
3017 * Determine vacuum/analyze equation parameters. We have two possible
3018 * sources: the passed reloptions (which could be a main table or a toast
3019 * table), or the autovacuum GUC variables.
3020 */
3021
3022 /* -1 in autovac setting means use plain vacuum_cost_delay */
3023 vac_scale_factor = (relopts && relopts->vacuum_scale_factor >= 0)
3024 ? relopts->vacuum_scale_factor
3025 : autovacuum_vac_scale;
3026
3027 vac_base_thresh = (relopts && relopts->vacuum_threshold >= 0)
3028 ? relopts->vacuum_threshold
3029 : autovacuum_vac_thresh;
3030
3031 anl_scale_factor = (relopts && relopts->analyze_scale_factor >= 0)
3032 ? relopts->analyze_scale_factor
3033 : autovacuum_anl_scale;
3034
3035 anl_base_thresh = (relopts && relopts->analyze_threshold >= 0)
3036 ? relopts->analyze_threshold
3037 : autovacuum_anl_thresh;
3038
3039 freeze_max_age = (relopts && relopts->freeze_max_age >= 0)
3040 ? Min(relopts->freeze_max_age, autovacuum_freeze_max_age)
3041 : autovacuum_freeze_max_age;
3042
3043 multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0)
3044 ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age)
3045 : effective_multixact_freeze_max_age;
3046
3047 av_enabled = (relopts ? relopts->enabled : true);
3048
3049 /* Force vacuum if table is at risk of wraparound */
3050 xidForceLimit = recentXid - freeze_max_age;
3051 if (xidForceLimit < FirstNormalTransactionId)
3052 xidForceLimit -= FirstNormalTransactionId;
3053 force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
3054 TransactionIdPrecedes(classForm->relfrozenxid,
3055 xidForceLimit));
3056 if (!force_vacuum)
3057 {
3058 multiForceLimit = recentMulti - multixact_freeze_max_age;
3059 if (multiForceLimit < FirstMultiXactId)
3060 multiForceLimit -= FirstMultiXactId;
3061 force_vacuum = MultiXactIdPrecedes(classForm->relminmxid,
3062 multiForceLimit);
3063 }
3064 *wraparound = force_vacuum;
3065
3066 /* User disabled it in pg_class.reloptions? (But ignore if at risk) */
3067 if (!av_enabled && !force_vacuum)
3068 {
3069 *doanalyze = false;
3070 *dovacuum = false;
3071 return;
3072 }
3073
3074 /*
3075 * If we found the table in the stats hash, and autovacuum is currently
3076 * enabled, make a threshold-based decision whether to vacuum and/or
3077 * analyze. If autovacuum is currently disabled, we must be here for
3078 * anti-wraparound vacuuming only, so don't vacuum (or analyze) anything
3079 * that's not being forced.
3080 */
3081 if (PointerIsValid(tabentry) && AutoVacuumingActive())
3082 {
3083 reltuples = classForm->reltuples;
3084 vactuples = tabentry->n_dead_tuples;
3085 anltuples = tabentry->changes_since_analyze;
3086
3087 vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
3088 anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
3089
3090 /*
3091 * Note that we don't need to take special consideration for stat
3092 * reset, because if that happens, the last vacuum and analyze counts
3093 * will be reset too.
3094 */
3095 elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
3096 NameStr(classForm->relname),
3097 vactuples, vacthresh, anltuples, anlthresh);
3098
3099 /* Determine if this table needs vacuum or analyze. */
3100 *dovacuum = force_vacuum || (vactuples > vacthresh);
3101 *doanalyze = (anltuples > anlthresh);
3102 }
3103 else
3104 {
3105 /*
3106 * Skip a table not found in stat hash, unless we have to force vacuum
3107 * for anti-wrap purposes. If it's not acted upon, there's no need to
3108 * vacuum it.
3109 */
3110 *dovacuum = force_vacuum;
3111 *doanalyze = false;
3112 }
3113
3114 /* ANALYZE refuses to work with pg_statistic */
3115 if (relid == StatisticRelationId)
3116 *doanalyze = false;
3117 }
3118
3119 /*
3120 * autovacuum_do_vac_analyze
3121 * Vacuum and/or analyze the specified table
3122 */
3123 static void
autovacuum_do_vac_analyze(autovac_table * tab,BufferAccessStrategy bstrategy)3124 autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
3125 {
3126 RangeVar rangevar;
3127
3128 /* Set up command parameters --- use local variables instead of palloc */
3129 MemSet(&rangevar, 0, sizeof(rangevar));
3130
3131 rangevar.schemaname = tab->at_nspname;
3132 rangevar.relname = tab->at_relname;
3133 rangevar.location = -1;
3134
3135 /* Let pgstat know what we're doing */
3136 autovac_report_activity(tab);
3137
3138 vacuum(tab->at_vacoptions, &rangevar, tab->at_relid, &tab->at_params, NIL,
3139 bstrategy, true);
3140 }
3141
3142 /*
3143 * autovac_report_activity
3144 * Report to pgstat what autovacuum is doing
3145 *
3146 * We send a SQL string corresponding to what the user would see if the
3147 * equivalent command was to be issued manually.
3148 *
3149 * Note we assume that we are going to report the next command as soon as we're
3150 * done with the current one, and exit right after the last one, so we don't
3151 * bother to report "<IDLE>" or some such.
3152 */
3153 static void
autovac_report_activity(autovac_table * tab)3154 autovac_report_activity(autovac_table *tab)
3155 {
3156 #define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 56)
3157 char activity[MAX_AUTOVAC_ACTIV_LEN];
3158 int len;
3159
3160 /* Report the command and possible options */
3161 if (tab->at_vacoptions & VACOPT_VACUUM)
3162 snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3163 "autovacuum: VACUUM%s",
3164 tab->at_vacoptions & VACOPT_ANALYZE ? " ANALYZE" : "");
3165 else
3166 snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3167 "autovacuum: ANALYZE");
3168
3169 /*
3170 * Report the qualified name of the relation.
3171 */
3172 len = strlen(activity);
3173
3174 snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3175 " %s.%s%s", tab->at_nspname, tab->at_relname,
3176 tab->at_params.is_wraparound ? " (to prevent wraparound)" : "");
3177
3178 /* Set statement_timestamp() to current time for pg_stat_activity */
3179 SetCurrentStatementStartTimestamp();
3180
3181 pgstat_report_activity(STATE_RUNNING, activity);
3182 }
3183
3184 /*
3185 * autovac_report_workitem
3186 * Report to pgstat that autovacuum is processing a work item
3187 */
3188 static void
autovac_report_workitem(AutoVacuumWorkItem * workitem,const char * nspname,const char * relname)3189 autovac_report_workitem(AutoVacuumWorkItem *workitem,
3190 const char *nspname, const char *relname)
3191 {
3192 char activity[MAX_AUTOVAC_ACTIV_LEN + 12 + 2];
3193 char blk[12 + 2];
3194 int len;
3195
3196 switch (workitem->avw_type)
3197 {
3198 case AVW_BRINSummarizeRange:
3199 snprintf(activity, MAX_AUTOVAC_ACTIV_LEN,
3200 "autovacuum: BRIN summarize");
3201 break;
3202 }
3203
3204 /*
3205 * Report the qualified name of the relation, and the block number if any
3206 */
3207 len = strlen(activity);
3208
3209 if (BlockNumberIsValid(workitem->avw_blockNumber))
3210 snprintf(blk, sizeof(blk), " %u", workitem->avw_blockNumber);
3211 else
3212 blk[0] = '\0';
3213
3214 snprintf(activity + len, MAX_AUTOVAC_ACTIV_LEN - len,
3215 " %s.%s%s", nspname, relname, blk);
3216
3217 /* Set statement_timestamp() to current time for pg_stat_activity */
3218 SetCurrentStatementStartTimestamp();
3219
3220 pgstat_report_activity(STATE_RUNNING, activity);
3221 }
3222
3223 /*
3224 * AutoVacuumingActive
3225 * Check GUC vars and report whether the autovacuum process should be
3226 * running.
3227 */
3228 bool
AutoVacuumingActive(void)3229 AutoVacuumingActive(void)
3230 {
3231 if (!autovacuum_start_daemon || !pgstat_track_counts)
3232 return false;
3233 return true;
3234 }
3235
3236 /*
3237 * Request one work item to the next autovacuum run processing our database.
3238 * Return false if the request can't be recorded.
3239 */
3240 bool
AutoVacuumRequestWork(AutoVacuumWorkItemType type,Oid relationId,BlockNumber blkno)3241 AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId,
3242 BlockNumber blkno)
3243 {
3244 int i;
3245 bool result = false;
3246
3247 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
3248
3249 /*
3250 * Locate an unused work item and fill it with the given data.
3251 */
3252 for (i = 0; i < NUM_WORKITEMS; i++)
3253 {
3254 AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
3255
3256 if (workitem->avw_used)
3257 continue;
3258
3259 workitem->avw_used = true;
3260 workitem->avw_active = false;
3261 workitem->avw_type = type;
3262 workitem->avw_database = MyDatabaseId;
3263 workitem->avw_relation = relationId;
3264 workitem->avw_blockNumber = blkno;
3265 result = true;
3266
3267 /* done */
3268 break;
3269 }
3270
3271 LWLockRelease(AutovacuumLock);
3272
3273 return result;
3274 }
3275
3276 /*
3277 * autovac_init
3278 * This is called at postmaster initialization.
3279 *
3280 * All we do here is annoy the user if he got it wrong.
3281 */
3282 void
autovac_init(void)3283 autovac_init(void)
3284 {
3285 if (autovacuum_start_daemon && !pgstat_track_counts)
3286 ereport(WARNING,
3287 (errmsg("autovacuum not started because of misconfiguration"),
3288 errhint("Enable the \"track_counts\" option.")));
3289 }
3290
3291 /*
3292 * IsAutoVacuum functions
3293 * Return whether this is either a launcher autovacuum process or a worker
3294 * process.
3295 */
3296 bool
IsAutoVacuumLauncherProcess(void)3297 IsAutoVacuumLauncherProcess(void)
3298 {
3299 return am_autovacuum_launcher;
3300 }
3301
3302 bool
IsAutoVacuumWorkerProcess(void)3303 IsAutoVacuumWorkerProcess(void)
3304 {
3305 return am_autovacuum_worker;
3306 }
3307
3308
3309 /*
3310 * AutoVacuumShmemSize
3311 * Compute space needed for autovacuum-related shared memory
3312 */
3313 Size
AutoVacuumShmemSize(void)3314 AutoVacuumShmemSize(void)
3315 {
3316 Size size;
3317
3318 /*
3319 * Need the fixed struct and the array of WorkerInfoData.
3320 */
3321 size = sizeof(AutoVacuumShmemStruct);
3322 size = MAXALIGN(size);
3323 size = add_size(size, mul_size(autovacuum_max_workers,
3324 sizeof(WorkerInfoData)));
3325 return size;
3326 }
3327
3328 /*
3329 * AutoVacuumShmemInit
3330 * Allocate and initialize autovacuum-related shared memory
3331 */
3332 void
AutoVacuumShmemInit(void)3333 AutoVacuumShmemInit(void)
3334 {
3335 bool found;
3336
3337 AutoVacuumShmem = (AutoVacuumShmemStruct *)
3338 ShmemInitStruct("AutoVacuum Data",
3339 AutoVacuumShmemSize(),
3340 &found);
3341
3342 if (!IsUnderPostmaster)
3343 {
3344 WorkerInfo worker;
3345 int i;
3346
3347 Assert(!found);
3348
3349 AutoVacuumShmem->av_launcherpid = 0;
3350 dlist_init(&AutoVacuumShmem->av_freeWorkers);
3351 dlist_init(&AutoVacuumShmem->av_runningWorkers);
3352 AutoVacuumShmem->av_startingWorker = NULL;
3353 memset(AutoVacuumShmem->av_workItems, 0,
3354 sizeof(AutoVacuumWorkItem) * NUM_WORKITEMS);
3355
3356 worker = (WorkerInfo) ((char *) AutoVacuumShmem +
3357 MAXALIGN(sizeof(AutoVacuumShmemStruct)));
3358
3359 /* initialize the WorkerInfo free list */
3360 for (i = 0; i < autovacuum_max_workers; i++)
3361 dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
3362 &worker[i].wi_links);
3363 }
3364 else
3365 Assert(found);
3366 }
3367
3368 /*
3369 * autovac_refresh_stats
3370 * Refresh pgstats data for an autovacuum process
3371 *
3372 * Cause the next pgstats read operation to obtain fresh data, but throttle
3373 * such refreshing in the autovacuum launcher. This is mostly to avoid
3374 * rereading the pgstats files too many times in quick succession when there
3375 * are many databases.
3376 *
3377 * Note: we avoid throttling in the autovac worker, as it would be
3378 * counterproductive in the recheck logic.
3379 */
3380 static void
autovac_refresh_stats(void)3381 autovac_refresh_stats(void)
3382 {
3383 if (IsAutoVacuumLauncherProcess())
3384 {
3385 static TimestampTz last_read = 0;
3386 TimestampTz current_time;
3387
3388 current_time = GetCurrentTimestamp();
3389
3390 if (!TimestampDifferenceExceeds(last_read, current_time,
3391 STATS_READ_DELAY))
3392 return;
3393
3394 last_read = current_time;
3395 }
3396
3397 pgstat_clear_snapshot();
3398 }
3399