1 /*
2  * contrib/pg_standby/pg_standby.c
3  *
4  *
5  * pg_standby.c
6  *
7  * Production-ready example of how to create a Warm Standby
8  * database server using continuous archiving as a
9  * replication mechanism
10  *
11  * We separate the parameters for archive and nextWALfile
12  * so that we can check the archive exists, even if the
13  * WAL file doesn't (yet).
14  *
15  * This program will be executed once in full for each file
16  * requested by the warm standby server.
17  *
18  * It is designed to cater to a variety of needs, as well
19  * providing a customizable section.
20  *
21  * Original author:		Simon Riggs  simon@2ndquadrant.com
22  * Current maintainer:	Simon Riggs
23  */
24 #include "postgres_fe.h"
25 
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <signal.h>
31 #include <sys/time.h>
32 
33 #include "access/xlog_internal.h"
34 #include "pg_getopt.h"
35 
36 const char *progname;
37 
38 int			WalSegSz = -1;
39 
40 /* Options and defaults */
41 int			sleeptime = 5;		/* amount of time to sleep between file checks */
42 int			waittime = -1;		/* how long we have been waiting, -1 no wait
43 								 * yet */
44 int			maxwaittime = 0;	/* how long are we prepared to wait for? */
45 int			keepfiles = 0;		/* number of WAL files to keep, 0 keep all */
46 int			maxretries = 3;		/* number of retries on restore command */
47 bool		debug = false;		/* are we debugging? */
48 bool		need_cleanup = false;	/* do we need to remove files from
49 									 * archive? */
50 
51 #ifndef WIN32
52 static volatile sig_atomic_t signaled = false;
53 #endif
54 
55 char	   *archiveLocation;	/* where to find the archive? */
56 char	   *triggerPath;		/* where to find the trigger file? */
57 char	   *xlogFilePath;		/* where we are going to restore to */
58 char	   *nextWALFileName;	/* the file we need to get from archive */
59 char	   *restartWALFileName; /* the file from which we can restart restore */
60 char		WALFilePath[MAXPGPATH * 2]; /* the file path including archive */
61 char		restoreCommand[MAXPGPATH];	/* run this to restore */
62 char		exclusiveCleanupFileName[MAXFNAMELEN];	/* the file we need to get
63 													 * from archive */
64 
65 /*
66  * Two types of failover are supported (smart and fast failover).
67  *
68  * The content of the trigger file determines the type of failover. If the
69  * trigger file contains the word "smart" (or the file is empty), smart
70  * failover is chosen: pg_standby acts as cp or ln command itself, on
71  * successful completion all the available WAL records will be applied
72  * resulting in zero data loss. But, it might take a long time to finish
73  * recovery if there's a lot of unapplied WAL.
74  *
75  * On the other hand, if the trigger file contains the word "fast", the
76  * recovery is finished immediately even if unapplied WAL files remain. Any
77  * transactions in the unapplied WAL files are lost.
78  *
79  * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
80  * fast failover. A timeout causes fast failover (smart failover would have
81  * the same effect, since if the timeout is reached there is no unapplied WAL).
82  */
83 #define NoFailover		0
84 #define SmartFailover	1
85 #define FastFailover	2
86 
87 static int	Failover = NoFailover;
88 
89 #define RESTORE_COMMAND_COPY 0
90 #define RESTORE_COMMAND_LINK 1
91 int			restoreCommandType;
92 
93 #define XLOG_DATA			 0
94 #define XLOG_HISTORY		 1
95 int			nextWALFileType;
96 
97 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
98 	snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
99 
100 struct stat stat_buf;
101 
102 static bool SetWALFileNameForCleanup(void);
103 static bool SetWALSegSize(void);
104 
105 
106 /* =====================================================================
107  *
108  *		  Customizable section
109  *
110  * =====================================================================
111  *
112  *	Currently, this section assumes that the Archive is a locally
113  *	accessible directory. If you want to make other assumptions,
114  *	such as using a vendor-specific archive and access API, these
115  *	routines are the ones you'll need to change. You're
116  *	encouraged to submit any changes to pgsql-hackers@lists.postgresql.org
117  *	or personally to the current maintainer. Those changes may be
118  *	folded in to later versions of this program.
119  */
120 
121 /*
122  *	Initialize allows customized commands into the warm standby program.
123  *
124  *	As an example, and probably the common case, we use either
125  *	cp/ln commands on *nix, or copy/move command on Windows.
126  */
127 static void
CustomizableInitialize(void)128 CustomizableInitialize(void)
129 {
130 #ifdef WIN32
131 	snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
132 	switch (restoreCommandType)
133 	{
134 		case RESTORE_COMMAND_LINK:
135 			SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
136 			break;
137 		case RESTORE_COMMAND_COPY:
138 		default:
139 			SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
140 			break;
141 	}
142 #else
143 	snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
144 	switch (restoreCommandType)
145 	{
146 		case RESTORE_COMMAND_LINK:
147 			SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
148 			break;
149 		case RESTORE_COMMAND_COPY:
150 		default:
151 			SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
152 			break;
153 	}
154 #endif
155 
156 	/*
157 	 * This code assumes that archiveLocation is a directory You may wish to
158 	 * add code to check for tape libraries, etc.. So, since it is a
159 	 * directory, we use stat to test if it's accessible
160 	 */
161 	if (stat(archiveLocation, &stat_buf) != 0)
162 	{
163 		fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
164 		fflush(stderr);
165 		exit(2);
166 	}
167 }
168 
169 /*
170  * CustomizableNextWALFileReady()
171  *
172  *	  Is the requested file ready yet?
173  */
174 static bool
CustomizableNextWALFileReady(void)175 CustomizableNextWALFileReady(void)
176 {
177 	if (stat(WALFilePath, &stat_buf) == 0)
178 	{
179 		/*
180 		 * If we've not seen any WAL segments, we don't know the WAL segment
181 		 * size, which we need. If it looks like a WAL segment, determine size
182 		 * of segments for the cluster.
183 		 */
184 		if (WalSegSz == -1 && IsXLogFileName(nextWALFileName))
185 		{
186 			if (SetWALSegSize())
187 			{
188 				/*
189 				 * Successfully determined WAL segment size. Can compute
190 				 * cleanup cutoff now.
191 				 */
192 				need_cleanup = SetWALFileNameForCleanup();
193 				if (debug)
194 				{
195 					fprintf(stderr,
196 							_("WAL segment size:     %d \n"), WalSegSz);
197 					fprintf(stderr, "Keep archive history: ");
198 
199 					if (need_cleanup)
200 						fprintf(stderr, "%s and later\n",
201 								exclusiveCleanupFileName);
202 					else
203 						fprintf(stderr, "no cleanup required\n");
204 				}
205 			}
206 		}
207 
208 		/*
209 		 * Return only if it's the right size already.
210 		 */
211 		if (WalSegSz > 0 && stat_buf.st_size == WalSegSz)
212 		{
213 #ifdef WIN32
214 
215 			/*
216 			 * Windows 'cp' sets the final file size before the copy is
217 			 * complete, and not yet ready to be opened by pg_standby. So we
218 			 * wait for sleeptime secs before attempting to restore. If that
219 			 * is not enough, we will rely on the retry/holdoff mechanism.
220 			 * GNUWin32's cp does not have this problem.
221 			 */
222 			pg_usleep(sleeptime * 1000000L);
223 #endif
224 			nextWALFileType = XLOG_DATA;
225 			return true;
226 		}
227 
228 		/*
229 		 * If still too small, wait until it is the correct size
230 		 */
231 		if (WalSegSz > 0 && stat_buf.st_size > WalSegSz)
232 		{
233 			if (debug)
234 			{
235 				fprintf(stderr, "file size greater than expected\n");
236 				fflush(stderr);
237 			}
238 			exit(3);
239 		}
240 	}
241 
242 	return false;
243 }
244 
245 static void
CustomizableCleanupPriorWALFiles(void)246 CustomizableCleanupPriorWALFiles(void)
247 {
248 	/*
249 	 * Work out name of prior file from current filename
250 	 */
251 	if (nextWALFileType == XLOG_DATA)
252 	{
253 		int			rc;
254 		DIR		   *xldir;
255 		struct dirent *xlde;
256 
257 		/*
258 		 * Assume it's OK to keep failing. The failure situation may change
259 		 * over time, so we'd rather keep going on the main processing than
260 		 * fail because we couldn't clean up yet.
261 		 */
262 		if ((xldir = opendir(archiveLocation)) != NULL)
263 		{
264 			while (errno = 0, (xlde = readdir(xldir)) != NULL)
265 			{
266 				/*
267 				 * We ignore the timeline part of the XLOG segment identifiers
268 				 * in deciding whether a segment is still needed.  This
269 				 * ensures that we won't prematurely remove a segment from a
270 				 * parent timeline. We could probably be a little more
271 				 * proactive about removing segments of non-parent timelines,
272 				 * but that would be a whole lot more complicated.
273 				 *
274 				 * We use the alphanumeric sorting property of the filenames
275 				 * to decide which ones are earlier than the
276 				 * exclusiveCleanupFileName file. Note that this means files
277 				 * are not removed in the order they were originally written,
278 				 * in case this worries you.
279 				 */
280 				if (IsXLogFileName(xlde->d_name) &&
281 					strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
282 				{
283 #ifdef WIN32
284 					snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name);
285 #else
286 					snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name);
287 #endif
288 
289 					if (debug)
290 						fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
291 
292 					rc = unlink(WALFilePath);
293 					if (rc != 0)
294 					{
295 						fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
296 								progname, WALFilePath, strerror(errno));
297 						break;
298 					}
299 				}
300 			}
301 
302 			if (errno)
303 				fprintf(stderr, "%s: could not read archive location \"%s\": %s\n",
304 						progname, archiveLocation, strerror(errno));
305 			if (debug)
306 				fprintf(stderr, "\n");
307 		}
308 		else
309 			fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
310 					progname, archiveLocation, strerror(errno));
311 
312 		if (closedir(xldir))
313 			fprintf(stderr, "%s: could not close archive location \"%s\": %s\n",
314 					progname, archiveLocation, strerror(errno));
315 
316 		fflush(stderr);
317 	}
318 }
319 
320 /* =====================================================================
321  *		  End of Customizable section
322  * =====================================================================
323  */
324 
325 /*
326  * SetWALFileNameForCleanup()
327  *
328  *	  Set the earliest WAL filename that we want to keep on the archive
329  *	  and decide whether we need_cleanup
330  */
331 static bool
SetWALFileNameForCleanup(void)332 SetWALFileNameForCleanup(void)
333 {
334 	uint32		tli = 1,
335 				log = 0,
336 				seg = 0;
337 	uint32		log_diff = 0,
338 				seg_diff = 0;
339 	bool		cleanup = false;
340 	int			max_segments_per_logfile = (0xFFFFFFFF / WalSegSz);
341 
342 	if (restartWALFileName)
343 	{
344 		/*
345 		 * Don't do cleanup if the restartWALFileName provided is later than
346 		 * the xlog file requested. This is an error and we must not remove
347 		 * these files from archive. This shouldn't happen, but better safe
348 		 * than sorry.
349 		 */
350 		if (strcmp(restartWALFileName, nextWALFileName) > 0)
351 			return false;
352 
353 		strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName));
354 		return true;
355 	}
356 
357 	if (keepfiles > 0)
358 	{
359 		sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
360 		if (tli > 0 && seg > 0)
361 		{
362 			log_diff = keepfiles / max_segments_per_logfile;
363 			seg_diff = keepfiles % max_segments_per_logfile;
364 			if (seg_diff > seg)
365 			{
366 				log_diff++;
367 				seg = max_segments_per_logfile - (seg_diff - seg);
368 			}
369 			else
370 				seg -= seg_diff;
371 
372 			if (log >= log_diff)
373 			{
374 				log -= log_diff;
375 				cleanup = true;
376 			}
377 			else
378 			{
379 				log = 0;
380 				seg = 0;
381 			}
382 		}
383 	}
384 
385 	XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
386 
387 	return cleanup;
388 }
389 
390 /*
391  * Try to set the wal segment size from the WAL file specified by WALFilePath.
392  *
393  * Return true if size could be determined, false otherwise.
394  */
395 static bool
SetWALSegSize(void)396 SetWALSegSize(void)
397 {
398 	bool		ret_val = false;
399 	int			fd;
400 	PGAlignedXLogBlock buf;
401 
402 	Assert(WalSegSz == -1);
403 
404 	if ((fd = open(WALFilePath, O_RDWR, 0)) < 0)
405 	{
406 		fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n",
407 				progname, WALFilePath, strerror(errno));
408 		return false;
409 	}
410 
411 	errno = 0;
412 	if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ)
413 	{
414 		XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
415 
416 		WalSegSz = longhdr->xlp_seg_size;
417 
418 		if (IsValidWalSegSize(WalSegSz))
419 		{
420 			/* successfully retrieved WAL segment size */
421 			ret_val = true;
422 		}
423 		else
424 			fprintf(stderr,
425 					"%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n",
426 					progname, WalSegSz);
427 	}
428 	else
429 	{
430 		/*
431 		 * Don't complain loudly, this is to be expected for segments being
432 		 * created.
433 		 */
434 		if (errno != 0)
435 		{
436 			if (debug)
437 				fprintf(stderr, "could not read file \"%s\": %s\n",
438 						WALFilePath, strerror(errno));
439 		}
440 		else
441 		{
442 			if (debug)
443 				fprintf(stderr, "not enough data in file \"%s\"\n",
444 						WALFilePath);
445 		}
446 	}
447 
448 	fflush(stderr);
449 
450 	close(fd);
451 	return ret_val;
452 }
453 
454 /*
455  * CheckForExternalTrigger()
456  *
457  *	  Is there a trigger file? Sets global 'Failover' variable to indicate
458  *	  what kind of a trigger file it was. A "fast" trigger file is turned
459  *	  into a "smart" file as a side-effect.
460  */
461 static void
CheckForExternalTrigger(void)462 CheckForExternalTrigger(void)
463 {
464 	char		buf[32];
465 	int			fd;
466 	int			len;
467 
468 	/*
469 	 * Look for a trigger file, if that option has been selected
470 	 *
471 	 * We use stat() here because triggerPath is always a file rather than
472 	 * potentially being in an archive
473 	 */
474 	if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
475 		return;
476 
477 	/*
478 	 * An empty trigger file performs smart failover. There's a little race
479 	 * condition here: if the writer of the trigger file has just created the
480 	 * file, but not yet written anything to it, we'll treat that as smart
481 	 * shutdown even if the other process was just about to write "fast" to
482 	 * it. But that's fine: we'll restore one more WAL file, and when we're
483 	 * invoked next time, we'll see the word "fast" and fail over immediately.
484 	 */
485 	if (stat_buf.st_size == 0)
486 	{
487 		Failover = SmartFailover;
488 		fprintf(stderr, "trigger file found: smart failover\n");
489 		fflush(stderr);
490 		return;
491 	}
492 
493 	if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
494 	{
495 		fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
496 				triggerPath, strerror(errno));
497 		fflush(stderr);
498 		return;
499 	}
500 
501 	if ((len = read(fd, buf, sizeof(buf) - 1)) < 0)
502 	{
503 		fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
504 				triggerPath, strerror(errno));
505 		fflush(stderr);
506 		close(fd);
507 		return;
508 	}
509 	buf[len] = '\0';
510 
511 	if (strncmp(buf, "smart", 5) == 0)
512 	{
513 		Failover = SmartFailover;
514 		fprintf(stderr, "trigger file found: smart failover\n");
515 		fflush(stderr);
516 		close(fd);
517 		return;
518 	}
519 
520 	if (strncmp(buf, "fast", 4) == 0)
521 	{
522 		Failover = FastFailover;
523 
524 		fprintf(stderr, "trigger file found: fast failover\n");
525 		fflush(stderr);
526 
527 		/*
528 		 * Turn it into a "smart" trigger by truncating the file. Otherwise if
529 		 * the server asks us again to restore a segment that was restored
530 		 * already, we would return "not found" and upset the server.
531 		 */
532 		if (ftruncate(fd, 0) < 0)
533 		{
534 			fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
535 					triggerPath, strerror(errno));
536 			fflush(stderr);
537 		}
538 		close(fd);
539 
540 		return;
541 	}
542 	close(fd);
543 
544 	fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
545 	fflush(stderr);
546 }
547 
548 /*
549  * RestoreWALFileForRecovery()
550  *
551  *	  Perform the action required to restore the file from archive
552  */
553 static bool
RestoreWALFileForRecovery(void)554 RestoreWALFileForRecovery(void)
555 {
556 	int			rc = 0;
557 	int			numretries = 0;
558 
559 	if (debug)
560 	{
561 		fprintf(stderr, "running restore:      ");
562 		fflush(stderr);
563 	}
564 
565 	while (numretries <= maxretries)
566 	{
567 		rc = system(restoreCommand);
568 		if (rc == 0)
569 		{
570 			if (debug)
571 			{
572 				fprintf(stderr, "OK\n");
573 				fflush(stderr);
574 			}
575 			return true;
576 		}
577 		pg_usleep(numretries++ * sleeptime * 1000000L);
578 	}
579 
580 	/*
581 	 * Allow caller to add additional info
582 	 */
583 	if (debug)
584 		fprintf(stderr, "not restored\n");
585 	return false;
586 }
587 
588 static void
usage(void)589 usage(void)
590 {
591 	printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
592 	printf("Usage:\n");
593 	printf("  %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
594 	printf("\nOptions:\n");
595 	printf("  -c                 copy file from archive (default)\n");
596 	printf("  -d                 generate lots of debugging output (testing only)\n");
597 	printf("  -k NUMFILESTOKEEP  if RESTARTWALFILE is not used, remove files prior to limit\n"
598 		   "                     (0 keeps all)\n");
599 	printf("  -l                 does nothing; use of link is now deprecated\n");
600 	printf("  -r MAXRETRIES      max number of times to retry, with progressive wait\n"
601 		   "                     (default=3)\n");
602 	printf("  -s SLEEPTIME       seconds to wait between file checks (min=1, max=60,\n"
603 		   "                     default=5)\n");
604 	printf("  -t TRIGGERFILE     trigger file to initiate failover (no default)\n");
605 	printf("  -V, --version      output version information, then exit\n");
606 	printf("  -w MAXWAITTIME     max seconds to wait for a file (0=no limit) (default=0)\n");
607 	printf("  -?, --help         show this help, then exit\n");
608 	printf("\n"
609 		   "Main intended use as restore_command in postgresql.conf:\n"
610 		   "  restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
611 		   "e.g.\n"
612 		   "  restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
613 	printf("\nReport bugs to <%s>.\n", PACKAGE_BUGREPORT);
614 	printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL);
615 }
616 
617 #ifndef WIN32
618 static void
sighandler(int sig)619 sighandler(int sig)
620 {
621 	signaled = true;
622 }
623 
624 /* We don't want SIGQUIT to core dump */
625 static void
sigquit_handler(int sig)626 sigquit_handler(int sig)
627 {
628 	pqsignal(SIGINT, SIG_DFL);
629 	kill(getpid(), SIGINT);
630 }
631 #endif
632 
633 /*------------ MAIN ----------------------------------------*/
634 int
main(int argc,char ** argv)635 main(int argc, char **argv)
636 {
637 	int			c;
638 
639 	progname = get_progname(argv[0]);
640 
641 	if (argc > 1)
642 	{
643 		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
644 		{
645 			usage();
646 			exit(0);
647 		}
648 		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
649 		{
650 			puts("pg_standby (PostgreSQL) " PG_VERSION);
651 			exit(0);
652 		}
653 	}
654 
655 #ifndef WIN32
656 
657 	/*
658 	 * You can send SIGUSR1 to trigger failover.
659 	 *
660 	 * Postmaster uses SIGQUIT to request immediate shutdown. The default
661 	 * action is to core dump, but we don't want that, so trap it and commit
662 	 * suicide without core dump.
663 	 *
664 	 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
665 	 * out to be a bad idea because postmaster uses SIGQUIT to request
666 	 * immediate shutdown. We still trap SIGINT, but that may change in a
667 	 * future release.
668 	 *
669 	 * There's no way to trigger failover via signal on Windows.
670 	 */
671 	(void) pqsignal(SIGUSR1, sighandler);
672 	(void) pqsignal(SIGINT, sighandler);	/* deprecated, use SIGUSR1 */
673 	(void) pqsignal(SIGQUIT, sigquit_handler);
674 #endif
675 
676 	while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
677 	{
678 		switch (c)
679 		{
680 			case 'c':			/* Use copy */
681 				restoreCommandType = RESTORE_COMMAND_COPY;
682 				break;
683 			case 'd':			/* Debug mode */
684 				debug = true;
685 				break;
686 			case 'k':			/* keepfiles */
687 				keepfiles = atoi(optarg);
688 				if (keepfiles < 0)
689 				{
690 					fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
691 					exit(2);
692 				}
693 				break;
694 			case 'l':			/* Use link */
695 
696 				/*
697 				 * Link feature disabled, possibly permanently. Linking causes
698 				 * a problem after recovery ends that is not currently
699 				 * resolved by PostgreSQL. 25 Jun 2009
700 				 */
701 #ifdef NOT_USED
702 				restoreCommandType = RESTORE_COMMAND_LINK;
703 #endif
704 				break;
705 			case 'r':			/* Retries */
706 				maxretries = atoi(optarg);
707 				if (maxretries < 0)
708 				{
709 					fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
710 					exit(2);
711 				}
712 				break;
713 			case 's':			/* Sleep time */
714 				sleeptime = atoi(optarg);
715 				if (sleeptime <= 0 || sleeptime > 60)
716 				{
717 					fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
718 					exit(2);
719 				}
720 				break;
721 			case 't':			/* Trigger file */
722 				triggerPath = pg_strdup(optarg);
723 				break;
724 			case 'w':			/* Max wait time */
725 				maxwaittime = atoi(optarg);
726 				if (maxwaittime < 0)
727 				{
728 					fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
729 					exit(2);
730 				}
731 				break;
732 			default:
733 				fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
734 				exit(2);
735 				break;
736 		}
737 	}
738 
739 	/*
740 	 * Parameter checking - after checking to see if trigger file present
741 	 */
742 	if (argc == 1)
743 	{
744 		fprintf(stderr, "%s: not enough command-line arguments\n", progname);
745 		exit(2);
746 	}
747 
748 	/*
749 	 * We will go to the archiveLocation to get nextWALFileName.
750 	 * nextWALFileName may not exist yet, which would not be an error, so we
751 	 * separate the archiveLocation and nextWALFileName so we can check
752 	 * separately whether archiveLocation exists, if not that is an error
753 	 */
754 	if (optind < argc)
755 	{
756 		archiveLocation = argv[optind];
757 		optind++;
758 	}
759 	else
760 	{
761 		fprintf(stderr, "%s: must specify archive location\n", progname);
762 		fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
763 		exit(2);
764 	}
765 
766 	if (optind < argc)
767 	{
768 		nextWALFileName = argv[optind];
769 		optind++;
770 	}
771 	else
772 	{
773 		fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
774 		fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
775 		exit(2);
776 	}
777 
778 	if (optind < argc)
779 	{
780 		xlogFilePath = argv[optind];
781 		optind++;
782 	}
783 	else
784 	{
785 		fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
786 		fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
787 		exit(2);
788 	}
789 
790 	if (optind < argc)
791 	{
792 		restartWALFileName = argv[optind];
793 		optind++;
794 	}
795 
796 	CustomizableInitialize();
797 
798 	if (debug)
799 	{
800 		fprintf(stderr, "Trigger file:         %s\n", triggerPath ? triggerPath : "<not set>");
801 		fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
802 		fprintf(stderr, "WAL file path:        %s\n", WALFilePath);
803 		fprintf(stderr, "Restoring to:         %s\n", xlogFilePath);
804 		fprintf(stderr, "Sleep interval:       %d second%s\n",
805 				sleeptime, (sleeptime > 1 ? "s" : " "));
806 		fprintf(stderr, "Max wait interval:    %d %s\n",
807 				maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
808 		fprintf(stderr, "Command for restore:  %s\n", restoreCommand);
809 		fflush(stderr);
810 	}
811 
812 	/*
813 	 * Check for initial history file: always the first file to be requested
814 	 * It's OK if the file isn't there - all other files need to wait
815 	 */
816 	if (IsTLHistoryFileName(nextWALFileName))
817 	{
818 		nextWALFileType = XLOG_HISTORY;
819 		if (RestoreWALFileForRecovery())
820 			exit(0);
821 		else
822 		{
823 			if (debug)
824 			{
825 				fprintf(stderr, "history file not found\n");
826 				fflush(stderr);
827 			}
828 			exit(1);
829 		}
830 	}
831 
832 	/*
833 	 * Main wait loop
834 	 */
835 	for (;;)
836 	{
837 		/* Check for trigger file or signal first */
838 		CheckForExternalTrigger();
839 #ifndef WIN32
840 		if (signaled)
841 		{
842 			Failover = FastFailover;
843 			if (debug)
844 			{
845 				fprintf(stderr, "signaled to exit: fast failover\n");
846 				fflush(stderr);
847 			}
848 		}
849 #endif
850 
851 		/*
852 		 * Check for fast failover immediately, before checking if the
853 		 * requested WAL file is available
854 		 */
855 		if (Failover == FastFailover)
856 			exit(1);
857 
858 		if (CustomizableNextWALFileReady())
859 		{
860 			/*
861 			 * Once we have restored this file successfully we can remove some
862 			 * prior WAL files. If this restore fails we mustn't remove any
863 			 * file because some of them will be requested again immediately
864 			 * after the failed restore, or when we restart recovery.
865 			 */
866 			if (RestoreWALFileForRecovery())
867 			{
868 				if (need_cleanup)
869 					CustomizableCleanupPriorWALFiles();
870 
871 				exit(0);
872 			}
873 			else
874 			{
875 				/* Something went wrong in copying the file */
876 				exit(1);
877 			}
878 		}
879 
880 		/* Check for smart failover if the next WAL file was not available */
881 		if (Failover == SmartFailover)
882 			exit(1);
883 
884 		if (sleeptime <= 60)
885 			pg_usleep(sleeptime * 1000000L);
886 
887 		waittime += sleeptime;
888 		if (waittime >= maxwaittime && maxwaittime > 0)
889 		{
890 			Failover = FastFailover;
891 			if (debug)
892 			{
893 				fprintf(stderr, "Timed out after %d seconds: fast failover\n",
894 						waittime);
895 				fflush(stderr);
896 			}
897 		}
898 		if (debug)
899 		{
900 			fprintf(stderr, "WAL file not present yet.");
901 			if (triggerPath)
902 				fprintf(stderr, " Checking for trigger file...");
903 			fprintf(stderr, "\n");
904 			fflush(stderr);
905 		}
906 	}
907 }
908