1 /*-------------------------------------------------------------------------
2  *
3  * pgarch.c
4  *
5  *	PostgreSQL WAL archiver
6  *
7  *	All functions relating to archiver are included here
8  *
9  *	- All functions executed by archiver process
10  *
11  *	- archiver is forked from postmaster, and the two
12  *	processes then communicate using signals. All functions
13  *	executed by postmaster are included in this file.
14  *
15  *	Initial author: Simon Riggs		simon@2ndquadrant.com
16  *
17  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  *	  src/backend/postmaster/pgarch.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27 
28 #include <fcntl.h>
29 #include <signal.h>
30 #include <time.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/wait.h>
34 #include <unistd.h>
35 
36 #include "access/xlog.h"
37 #include "access/xlog_internal.h"
38 #include "libpq/pqsignal.h"
39 #include "miscadmin.h"
40 #include "pgstat.h"
41 #include "postmaster/interrupt.h"
42 #include "postmaster/pgarch.h"
43 #include "storage/fd.h"
44 #include "storage/ipc.h"
45 #include "storage/latch.h"
46 #include "storage/pmsignal.h"
47 #include "storage/proc.h"
48 #include "storage/procsignal.h"
49 #include "storage/shmem.h"
50 #include "utils/guc.h"
51 #include "utils/ps_status.h"
52 
53 
54 /* ----------
55  * Timer definitions.
56  * ----------
57  */
58 #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
59 									 * archive status directory; in seconds. */
60 #define PGARCH_RESTART_INTERVAL 10	/* How often to attempt to restart a
61 									 * failed archiver; in seconds. */
62 
63 /*
64  * Maximum number of retries allowed when attempting to archive a WAL
65  * file.
66  */
67 #define NUM_ARCHIVE_RETRIES 3
68 
69 /*
70  * Maximum number of retries allowed when attempting to remove an
71  * orphan archive status file.
72  */
73 #define NUM_ORPHAN_CLEANUP_RETRIES 3
74 
75 /* Shared memory area for archiver process */
76 typedef struct PgArchData
77 {
78 	int			pgprocno;		/* pgprocno of archiver process */
79 } PgArchData;
80 
81 
82 /* ----------
83  * Local data
84  * ----------
85  */
86 static time_t last_sigterm_time = 0;
87 static PgArchData *PgArch = NULL;
88 
89 /*
90  * Flags set by interrupt handlers for later service in the main loop.
91  */
92 static volatile sig_atomic_t ready_to_stop = false;
93 
94 /* ----------
95  * Local function forward declarations
96  * ----------
97  */
98 static void pgarch_waken_stop(SIGNAL_ARGS);
99 static void pgarch_MainLoop(void);
100 static void pgarch_ArchiverCopyLoop(void);
101 static bool pgarch_archiveXlog(char *xlog);
102 static bool pgarch_readyXlog(char *xlog);
103 static void pgarch_archiveDone(char *xlog);
104 static void pgarch_die(int code, Datum arg);
105 static void HandlePgArchInterrupts(void);
106 
107 /* Report shared memory space needed by PgArchShmemInit */
108 Size
PgArchShmemSize(void)109 PgArchShmemSize(void)
110 {
111 	Size		size = 0;
112 
113 	size = add_size(size, sizeof(PgArchData));
114 
115 	return size;
116 }
117 
118 /* Allocate and initialize archiver-related shared memory */
119 void
PgArchShmemInit(void)120 PgArchShmemInit(void)
121 {
122 	bool		found;
123 
124 	PgArch = (PgArchData *)
125 		ShmemInitStruct("Archiver Data", PgArchShmemSize(), &found);
126 
127 	if (!found)
128 	{
129 		/* First time through, so initialize */
130 		MemSet(PgArch, 0, PgArchShmemSize());
131 		PgArch->pgprocno = INVALID_PGPROCNO;
132 	}
133 }
134 
135 /*
136  * PgArchCanRestart
137  *
138  * Return true and archiver is allowed to restart if enough time has
139  * passed since it was launched last to reach PGARCH_RESTART_INTERVAL.
140  * Otherwise return false.
141  *
142  * This is a safety valve to protect against continuous respawn attempts if the
143  * archiver is dying immediately at launch. Note that since we will retry to
144  * launch the archiver from the postmaster main loop, we will get another
145  * chance later.
146  */
147 bool
PgArchCanRestart(void)148 PgArchCanRestart(void)
149 {
150 	static time_t last_pgarch_start_time = 0;
151 	time_t		curtime = time(NULL);
152 
153 	/*
154 	 * Return false and don't restart archiver if too soon since last archiver
155 	 * start.
156 	 */
157 	if ((unsigned int) (curtime - last_pgarch_start_time) <
158 		(unsigned int) PGARCH_RESTART_INTERVAL)
159 		return false;
160 
161 	last_pgarch_start_time = curtime;
162 	return true;
163 }
164 
165 
166 /* Main entry point for archiver process */
167 void
PgArchiverMain(void)168 PgArchiverMain(void)
169 {
170 	/*
171 	 * Ignore all signals usually bound to some action in the postmaster,
172 	 * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
173 	 */
174 	pqsignal(SIGHUP, SignalHandlerForConfigReload);
175 	pqsignal(SIGINT, SIG_IGN);
176 	pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
177 	/* SIGQUIT handler was already set up by InitPostmasterChild */
178 	pqsignal(SIGALRM, SIG_IGN);
179 	pqsignal(SIGPIPE, SIG_IGN);
180 	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
181 	pqsignal(SIGUSR2, pgarch_waken_stop);
182 
183 	/* Reset some signals that are accepted by postmaster but not here */
184 	pqsignal(SIGCHLD, SIG_DFL);
185 
186 	/* Unblock signals (they were blocked when the postmaster forked us) */
187 	PG_SETMASK(&UnBlockSig);
188 
189 	/* We shouldn't be launched unnecessarily. */
190 	Assert(XLogArchivingActive());
191 
192 	/* Arrange to clean up at archiver exit */
193 	on_shmem_exit(pgarch_die, 0);
194 
195 	/*
196 	 * Advertise our pgprocno so that backends can use our latch to wake us up
197 	 * while we're sleeping.
198 	 */
199 	PgArch->pgprocno = MyProc->pgprocno;
200 
201 	pgarch_MainLoop();
202 
203 	proc_exit(0);
204 }
205 
206 /*
207  * Wake up the archiver
208  */
209 void
PgArchWakeup(void)210 PgArchWakeup(void)
211 {
212 	int			arch_pgprocno = PgArch->pgprocno;
213 
214 	/*
215 	 * We don't acquire ProcArrayLock here.  It's actually fine because
216 	 * procLatch isn't ever freed, so we just can potentially set the wrong
217 	 * process' (or no process') latch.  Even in that case the archiver will
218 	 * be relaunched shortly and will start archiving.
219 	 */
220 	if (arch_pgprocno != INVALID_PGPROCNO)
221 		SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch);
222 }
223 
224 
225 /* SIGUSR2 signal handler for archiver process */
226 static void
pgarch_waken_stop(SIGNAL_ARGS)227 pgarch_waken_stop(SIGNAL_ARGS)
228 {
229 	int			save_errno = errno;
230 
231 	/* set flag to do a final cycle and shut down afterwards */
232 	ready_to_stop = true;
233 	SetLatch(MyLatch);
234 
235 	errno = save_errno;
236 }
237 
238 /*
239  * pgarch_MainLoop
240  *
241  * Main loop for archiver
242  */
243 static void
pgarch_MainLoop(void)244 pgarch_MainLoop(void)
245 {
246 	pg_time_t	last_copy_time = 0;
247 	bool		time_to_stop;
248 
249 	/*
250 	 * There shouldn't be anything for the archiver to do except to wait for a
251 	 * signal ... however, the archiver exists to protect our data, so she
252 	 * wakes up occasionally to allow herself to be proactive.
253 	 */
254 	do
255 	{
256 		ResetLatch(MyLatch);
257 
258 		/* When we get SIGUSR2, we do one more archive cycle, then exit */
259 		time_to_stop = ready_to_stop;
260 
261 		/* Check for barrier events and config update */
262 		HandlePgArchInterrupts();
263 
264 		/*
265 		 * If we've gotten SIGTERM, we normally just sit and do nothing until
266 		 * SIGUSR2 arrives.  However, that means a random SIGTERM would
267 		 * disable archiving indefinitely, which doesn't seem like a good
268 		 * idea.  If more than 60 seconds pass since SIGTERM, exit anyway, so
269 		 * that the postmaster can start a new archiver if needed.
270 		 */
271 		if (ShutdownRequestPending)
272 		{
273 			time_t		curtime = time(NULL);
274 
275 			if (last_sigterm_time == 0)
276 				last_sigterm_time = curtime;
277 			else if ((unsigned int) (curtime - last_sigterm_time) >=
278 					 (unsigned int) 60)
279 				break;
280 		}
281 
282 		/* Do what we're here for */
283 		pgarch_ArchiverCopyLoop();
284 		last_copy_time = time(NULL);
285 
286 		/*
287 		 * Sleep until a signal is received, or until a poll is forced by
288 		 * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or
289 		 * until postmaster dies.
290 		 */
291 		if (!time_to_stop)		/* Don't wait during last iteration */
292 		{
293 			pg_time_t	curtime = (pg_time_t) time(NULL);
294 			int			timeout;
295 
296 			timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time);
297 			if (timeout > 0)
298 			{
299 				int			rc;
300 
301 				rc = WaitLatch(MyLatch,
302 							   WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
303 							   timeout * 1000L,
304 							   WAIT_EVENT_ARCHIVER_MAIN);
305 				if (rc & WL_POSTMASTER_DEATH)
306 					time_to_stop = true;
307 			}
308 		}
309 
310 		/*
311 		 * The archiver quits either when the postmaster dies (not expected)
312 		 * or after completing one more archiving cycle after receiving
313 		 * SIGUSR2.
314 		 */
315 	} while (!time_to_stop);
316 }
317 
318 /*
319  * pgarch_ArchiverCopyLoop
320  *
321  * Archives all outstanding xlogs then returns
322  */
323 static void
pgarch_ArchiverCopyLoop(void)324 pgarch_ArchiverCopyLoop(void)
325 {
326 	char		xlog[MAX_XFN_CHARS + 1];
327 
328 	/*
329 	 * loop through all xlogs with archive_status of .ready and archive
330 	 * them...mostly we expect this to be a single file, though it is possible
331 	 * some backend will add files onto the list of those that need archiving
332 	 * while we are still copying earlier archives
333 	 */
334 	while (pgarch_readyXlog(xlog))
335 	{
336 		int			failures = 0;
337 		int			failures_orphan = 0;
338 
339 		for (;;)
340 		{
341 			struct stat stat_buf;
342 			char		pathname[MAXPGPATH];
343 
344 			/*
345 			 * Do not initiate any more archive commands after receiving
346 			 * SIGTERM, nor after the postmaster has died unexpectedly. The
347 			 * first condition is to try to keep from having init SIGKILL the
348 			 * command, and the second is to avoid conflicts with another
349 			 * archiver spawned by a newer postmaster.
350 			 */
351 			if (ShutdownRequestPending || !PostmasterIsAlive())
352 				return;
353 
354 			/*
355 			 * Check for barrier events and config update.  This is so that
356 			 * we'll adopt a new setting for archive_command as soon as
357 			 * possible, even if there is a backlog of files to be archived.
358 			 */
359 			HandlePgArchInterrupts();
360 
361 			/* can't do anything if no command ... */
362 			if (!XLogArchiveCommandSet())
363 			{
364 				ereport(WARNING,
365 						(errmsg("archive_mode enabled, yet archive_command is not set")));
366 				return;
367 			}
368 
369 			/*
370 			 * Since archive status files are not removed in a durable manner,
371 			 * a system crash could leave behind .ready files for WAL segments
372 			 * that have already been recycled or removed.  In this case,
373 			 * simply remove the orphan status file and move on.  unlink() is
374 			 * used here as even on subsequent crashes the same orphan files
375 			 * would get removed, so there is no need to worry about
376 			 * durability.
377 			 */
378 			snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
379 			if (stat(pathname, &stat_buf) != 0 && errno == ENOENT)
380 			{
381 				char		xlogready[MAXPGPATH];
382 
383 				StatusFilePath(xlogready, xlog, ".ready");
384 				if (unlink(xlogready) == 0)
385 				{
386 					ereport(WARNING,
387 							(errmsg("removed orphan archive status file \"%s\"",
388 									xlogready)));
389 
390 					/* leave loop and move to the next status file */
391 					break;
392 				}
393 
394 				if (++failures_orphan >= NUM_ORPHAN_CLEANUP_RETRIES)
395 				{
396 					ereport(WARNING,
397 							(errmsg("removal of orphan archive status file \"%s\" failed too many times, will try again later",
398 									xlogready)));
399 
400 					/* give up cleanup of orphan status files */
401 					return;
402 				}
403 
404 				/* wait a bit before retrying */
405 				pg_usleep(1000000L);
406 				continue;
407 			}
408 
409 			if (pgarch_archiveXlog(xlog))
410 			{
411 				/* successful */
412 				pgarch_archiveDone(xlog);
413 
414 				/*
415 				 * Tell the collector about the WAL file that we successfully
416 				 * archived
417 				 */
418 				pgstat_send_archiver(xlog, false);
419 
420 				break;			/* out of inner retry loop */
421 			}
422 			else
423 			{
424 				/*
425 				 * Tell the collector about the WAL file that we failed to
426 				 * archive
427 				 */
428 				pgstat_send_archiver(xlog, true);
429 
430 				if (++failures >= NUM_ARCHIVE_RETRIES)
431 				{
432 					ereport(WARNING,
433 							(errmsg("archiving write-ahead log file \"%s\" failed too many times, will try again later",
434 									xlog)));
435 					return;		/* give up archiving for now */
436 				}
437 				pg_usleep(1000000L);	/* wait a bit before retrying */
438 			}
439 		}
440 	}
441 }
442 
443 /*
444  * pgarch_archiveXlog
445  *
446  * Invokes system(3) to copy one archive file to wherever it should go
447  *
448  * Returns true if successful
449  */
450 static bool
pgarch_archiveXlog(char * xlog)451 pgarch_archiveXlog(char *xlog)
452 {
453 	char		xlogarchcmd[MAXPGPATH];
454 	char		pathname[MAXPGPATH];
455 	char		activitymsg[MAXFNAMELEN + 16];
456 	char	   *dp;
457 	char	   *endp;
458 	const char *sp;
459 	int			rc;
460 
461 	snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
462 
463 	/*
464 	 * construct the command to be executed
465 	 */
466 	dp = xlogarchcmd;
467 	endp = xlogarchcmd + MAXPGPATH - 1;
468 	*endp = '\0';
469 
470 	for (sp = XLogArchiveCommand; *sp; sp++)
471 	{
472 		if (*sp == '%')
473 		{
474 			switch (sp[1])
475 			{
476 				case 'p':
477 					/* %p: relative path of source file */
478 					sp++;
479 					strlcpy(dp, pathname, endp - dp);
480 					make_native_path(dp);
481 					dp += strlen(dp);
482 					break;
483 				case 'f':
484 					/* %f: filename of source file */
485 					sp++;
486 					strlcpy(dp, xlog, endp - dp);
487 					dp += strlen(dp);
488 					break;
489 				case '%':
490 					/* convert %% to a single % */
491 					sp++;
492 					if (dp < endp)
493 						*dp++ = *sp;
494 					break;
495 				default:
496 					/* otherwise treat the % as not special */
497 					if (dp < endp)
498 						*dp++ = *sp;
499 					break;
500 			}
501 		}
502 		else
503 		{
504 			if (dp < endp)
505 				*dp++ = *sp;
506 		}
507 	}
508 	*dp = '\0';
509 
510 	ereport(DEBUG3,
511 			(errmsg_internal("executing archive command \"%s\"",
512 							 xlogarchcmd)));
513 
514 	/* Report archive activity in PS display */
515 	snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
516 	set_ps_display(activitymsg);
517 
518 	rc = system(xlogarchcmd);
519 	if (rc != 0)
520 	{
521 		/*
522 		 * If either the shell itself, or a called command, died on a signal,
523 		 * abort the archiver.  We do this because system() ignores SIGINT and
524 		 * SIGQUIT while waiting; so a signal is very likely something that
525 		 * should have interrupted us too.  Also die if the shell got a hard
526 		 * "command not found" type of error.  If we overreact it's no big
527 		 * deal, the postmaster will just start the archiver again.
528 		 */
529 		int			lev = wait_result_is_any_signal(rc, true) ? FATAL : LOG;
530 
531 		if (WIFEXITED(rc))
532 		{
533 			ereport(lev,
534 					(errmsg("archive command failed with exit code %d",
535 							WEXITSTATUS(rc)),
536 					 errdetail("The failed archive command was: %s",
537 							   xlogarchcmd)));
538 		}
539 		else if (WIFSIGNALED(rc))
540 		{
541 #if defined(WIN32)
542 			ereport(lev,
543 					(errmsg("archive command was terminated by exception 0x%X",
544 							WTERMSIG(rc)),
545 					 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
546 					 errdetail("The failed archive command was: %s",
547 							   xlogarchcmd)));
548 #else
549 			ereport(lev,
550 					(errmsg("archive command was terminated by signal %d: %s",
551 							WTERMSIG(rc), pg_strsignal(WTERMSIG(rc))),
552 					 errdetail("The failed archive command was: %s",
553 							   xlogarchcmd)));
554 #endif
555 		}
556 		else
557 		{
558 			ereport(lev,
559 					(errmsg("archive command exited with unrecognized status %d",
560 							rc),
561 					 errdetail("The failed archive command was: %s",
562 							   xlogarchcmd)));
563 		}
564 
565 		snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
566 		set_ps_display(activitymsg);
567 
568 		return false;
569 	}
570 	elog(DEBUG1, "archived write-ahead log file \"%s\"", xlog);
571 
572 	snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
573 	set_ps_display(activitymsg);
574 
575 	return true;
576 }
577 
578 /*
579  * pgarch_readyXlog
580  *
581  * Return name of the oldest xlog file that has not yet been archived.
582  * No notification is set that file archiving is now in progress, so
583  * this would need to be extended if multiple concurrent archival
584  * tasks were created. If a failure occurs, we will completely
585  * re-copy the file at the next available opportunity.
586  *
587  * It is important that we return the oldest, so that we archive xlogs
588  * in order that they were written, for two reasons:
589  * 1) to maintain the sequential chain of xlogs required for recovery
590  * 2) because the oldest ones will sooner become candidates for
591  * recycling at time of checkpoint
592  *
593  * NOTE: the "oldest" comparison will consider any .history file to be older
594  * than any other file except another .history file.  Segments on a timeline
595  * with a smaller ID will be older than all segments on a timeline with a
596  * larger ID; the net result being that past timelines are given higher
597  * priority for archiving.  This seems okay, or at least not obviously worth
598  * changing.
599  */
600 static bool
pgarch_readyXlog(char * xlog)601 pgarch_readyXlog(char *xlog)
602 {
603 	/*
604 	 * open xlog status directory and read through list of xlogs that have the
605 	 * .ready suffix, looking for earliest file. It is possible to optimise
606 	 * this code, though only a single file is expected on the vast majority
607 	 * of calls, so....
608 	 */
609 	char		XLogArchiveStatusDir[MAXPGPATH];
610 	DIR		   *rldir;
611 	struct dirent *rlde;
612 	bool		found = false;
613 	bool		historyFound = false;
614 
615 	snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
616 	rldir = AllocateDir(XLogArchiveStatusDir);
617 
618 	while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
619 	{
620 		int			basenamelen = (int) strlen(rlde->d_name) - 6;
621 		char		basename[MAX_XFN_CHARS + 1];
622 		bool		ishistory;
623 
624 		/* Ignore entries with unexpected number of characters */
625 		if (basenamelen < MIN_XFN_CHARS ||
626 			basenamelen > MAX_XFN_CHARS)
627 			continue;
628 
629 		/* Ignore entries with unexpected characters */
630 		if (strspn(rlde->d_name, VALID_XFN_CHARS) < basenamelen)
631 			continue;
632 
633 		/* Ignore anything not suffixed with .ready */
634 		if (strcmp(rlde->d_name + basenamelen, ".ready") != 0)
635 			continue;
636 
637 		/* Truncate off the .ready */
638 		memcpy(basename, rlde->d_name, basenamelen);
639 		basename[basenamelen] = '\0';
640 
641 		/* Is this a history file? */
642 		ishistory = IsTLHistoryFileName(basename);
643 
644 		/*
645 		 * Consume the file to archive.  History files have the highest
646 		 * priority.  If this is the first file or the first history file
647 		 * ever, copy it.  In the presence of a history file already chosen as
648 		 * target, ignore all other files except history files which have been
649 		 * generated for an older timeline than what is already chosen as
650 		 * target to archive.
651 		 */
652 		if (!found || (ishistory && !historyFound))
653 		{
654 			strcpy(xlog, basename);
655 			found = true;
656 			historyFound = ishistory;
657 		}
658 		else if (ishistory || !historyFound)
659 		{
660 			if (strcmp(basename, xlog) < 0)
661 				strcpy(xlog, basename);
662 		}
663 	}
664 	FreeDir(rldir);
665 
666 	return found;
667 }
668 
669 /*
670  * pgarch_archiveDone
671  *
672  * Emit notification that an xlog file has been successfully archived.
673  * We do this by renaming the status file from NNN.ready to NNN.done.
674  * Eventually, a checkpoint process will notice this and delete both the
675  * NNN.done file and the xlog file itself.
676  */
677 static void
pgarch_archiveDone(char * xlog)678 pgarch_archiveDone(char *xlog)
679 {
680 	char		rlogready[MAXPGPATH];
681 	char		rlogdone[MAXPGPATH];
682 
683 	StatusFilePath(rlogready, xlog, ".ready");
684 	StatusFilePath(rlogdone, xlog, ".done");
685 	(void) durable_rename(rlogready, rlogdone, WARNING);
686 }
687 
688 
689 /*
690  * pgarch_die
691  *
692  * Exit-time cleanup handler
693  */
694 static void
pgarch_die(int code,Datum arg)695 pgarch_die(int code, Datum arg)
696 {
697 	PgArch->pgprocno = INVALID_PGPROCNO;
698 }
699 
700 /*
701  * Interrupt handler for WAL archiver process.
702  *
703  * This is called in the loops pgarch_MainLoop and pgarch_ArchiverCopyLoop.
704  * It checks for barrier events and config update, but not shutdown request
705  * because how to handle shutdown request is different between those loops.
706  */
707 static void
HandlePgArchInterrupts(void)708 HandlePgArchInterrupts(void)
709 {
710 	if (ProcSignalBarrierPending)
711 		ProcessProcSignalBarrier();
712 
713 	if (ConfigReloadPending)
714 	{
715 		ConfigReloadPending = false;
716 		ProcessConfigFile(PGC_SIGHUP);
717 	}
718 }
719