1 /*
2 * contrib/pg_standby/pg_standby.c
3 *
4 *
5 * pg_standby.c
6 *
7 * Production-ready example of how to create a Warm Standby
8 * database server using continuous archiving as a
9 * replication mechanism
10 *
11 * We separate the parameters for archive and nextWALfile
12 * so that we can check the archive exists, even if the
13 * WAL file doesn't (yet).
14 *
15 * This program will be executed once in full for each file
16 * requested by the warm standby server.
17 *
18 * It is designed to cater to a variety of needs, as well
19 * providing a customizable section.
20 *
21 * Original author: Simon Riggs simon@2ndquadrant.com
22 * Current maintainer: Simon Riggs
23 */
24 #include "postgres_fe.h"
25
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <signal.h>
31 #include <sys/time.h>
32
33 #include "pg_getopt.h"
34
35 #include "access/xlog_internal.h"
36
37 const char *progname;
38
39 int WalSegSz = -1;
40
41 /* Options and defaults */
42 int sleeptime = 5; /* amount of time to sleep between file checks */
43 int waittime = -1; /* how long we have been waiting, -1 no wait
44 * yet */
45 int maxwaittime = 0; /* how long are we prepared to wait for? */
46 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
47 int maxretries = 3; /* number of retries on restore command */
48 bool debug = false; /* are we debugging? */
49 bool need_cleanup = false; /* do we need to remove files from
50 * archive? */
51
52 #ifndef WIN32
53 static volatile sig_atomic_t signaled = false;
54 #endif
55
56 char *archiveLocation; /* where to find the archive? */
57 char *triggerPath; /* where to find the trigger file? */
58 char *xlogFilePath; /* where we are going to restore to */
59 char *nextWALFileName; /* the file we need to get from archive */
60 char *restartWALFileName; /* the file from which we can restart restore */
61 char *priorWALFileName; /* the file we need to get from archive */
62 char WALFilePath[MAXPGPATH * 2]; /* the file path including archive */
63 char restoreCommand[MAXPGPATH]; /* run this to restore */
64 char exclusiveCleanupFileName[MAXFNAMELEN]; /* the file we need to get
65 * from archive */
66
67 /*
68 * Two types of failover are supported (smart and fast failover).
69 *
70 * The content of the trigger file determines the type of failover. If the
71 * trigger file contains the word "smart" (or the file is empty), smart
72 * failover is chosen: pg_standby acts as cp or ln command itself, on
73 * successful completion all the available WAL records will be applied
74 * resulting in zero data loss. But, it might take a long time to finish
75 * recovery if there's a lot of unapplied WAL.
76 *
77 * On the other hand, if the trigger file contains the word "fast", the
78 * recovery is finished immediately even if unapplied WAL files remain. Any
79 * transactions in the unapplied WAL files are lost.
80 *
81 * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
82 * fast failover. A timeout causes fast failover (smart failover would have
83 * the same effect, since if the timeout is reached there is no unapplied WAL).
84 */
85 #define NoFailover 0
86 #define SmartFailover 1
87 #define FastFailover 2
88
89 static int Failover = NoFailover;
90
91 #define RESTORE_COMMAND_COPY 0
92 #define RESTORE_COMMAND_LINK 1
93 int restoreCommandType;
94
95 #define XLOG_DATA 0
96 #define XLOG_HISTORY 1
97 #define XLOG_BACKUP_LABEL 2
98 int nextWALFileType;
99
100 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
101 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
102
103 struct stat stat_buf;
104
105 static bool SetWALFileNameForCleanup(void);
106 static bool SetWALSegSize(void);
107
108
109 /* =====================================================================
110 *
111 * Customizable section
112 *
113 * =====================================================================
114 *
115 * Currently, this section assumes that the Archive is a locally
116 * accessible directory. If you want to make other assumptions,
117 * such as using a vendor-specific archive and access API, these
118 * routines are the ones you'll need to change. You're
119 * encouraged to submit any changes to pgsql-hackers@postgresql.org
120 * or personally to the current maintainer. Those changes may be
121 * folded in to later versions of this program.
122 */
123
124 /*
125 * Initialize allows customized commands into the warm standby program.
126 *
127 * As an example, and probably the common case, we use either
128 * cp/ln commands on *nix, or copy/move command on Windows.
129 */
130 static void
CustomizableInitialize(void)131 CustomizableInitialize(void)
132 {
133 #ifdef WIN32
134 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
135 switch (restoreCommandType)
136 {
137 case RESTORE_COMMAND_LINK:
138 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
139 break;
140 case RESTORE_COMMAND_COPY:
141 default:
142 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
143 break;
144 }
145 #else
146 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
147 switch (restoreCommandType)
148 {
149 case RESTORE_COMMAND_LINK:
150 #if HAVE_WORKING_LINK
151 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
152 break;
153 #endif
154 case RESTORE_COMMAND_COPY:
155 default:
156 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
157 break;
158 }
159 #endif
160
161 /*
162 * This code assumes that archiveLocation is a directory You may wish to
163 * add code to check for tape libraries, etc.. So, since it is a
164 * directory, we use stat to test if it's accessible
165 */
166 if (stat(archiveLocation, &stat_buf) != 0)
167 {
168 fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
169 fflush(stderr);
170 exit(2);
171 }
172 }
173
174 /*
175 * CustomizableNextWALFileReady()
176 *
177 * Is the requested file ready yet?
178 */
179 static bool
CustomizableNextWALFileReady(void)180 CustomizableNextWALFileReady(void)
181 {
182 if (stat(WALFilePath, &stat_buf) == 0)
183 {
184 /*
185 * If we've not seen any WAL segments, we don't know the WAL segment
186 * size, which we need. If it looks like a WAL segment, determine size
187 * of segments for the cluster.
188 */
189 if (WalSegSz == -1 && IsXLogFileName(nextWALFileName))
190 {
191 if (SetWALSegSize())
192 {
193 /*
194 * Successfully determined WAL segment size. Can compute
195 * cleanup cutoff now.
196 */
197 need_cleanup = SetWALFileNameForCleanup();
198 if (debug)
199 {
200 fprintf(stderr,
201 _("WAL segment size: %d \n"), WalSegSz);
202 fprintf(stderr, "Keep archive history: ");
203
204 if (need_cleanup)
205 fprintf(stderr, "%s and later\n",
206 exclusiveCleanupFileName);
207 else
208 fprintf(stderr, "no cleanup required\n");
209 }
210 }
211 }
212
213 /*
214 * If it's a backup file, return immediately. If it's a regular file
215 * return only if it's the right size already.
216 */
217 if (IsBackupHistoryFileName(nextWALFileName))
218 {
219 nextWALFileType = XLOG_BACKUP_LABEL;
220 return true;
221 }
222 else if (WalSegSz > 0 && stat_buf.st_size == WalSegSz)
223 {
224 #ifdef WIN32
225
226 /*
227 * Windows 'cp' sets the final file size before the copy is
228 * complete, and not yet ready to be opened by pg_standby. So we
229 * wait for sleeptime secs before attempting to restore. If that
230 * is not enough, we will rely on the retry/holdoff mechanism.
231 * GNUWin32's cp does not have this problem.
232 */
233 pg_usleep(sleeptime * 1000000L);
234 #endif
235 nextWALFileType = XLOG_DATA;
236 return true;
237 }
238
239 /*
240 * If still too small, wait until it is the correct size
241 */
242 if (WalSegSz > 0 && stat_buf.st_size > WalSegSz)
243 {
244 if (debug)
245 {
246 fprintf(stderr, "file size greater than expected\n");
247 fflush(stderr);
248 }
249 exit(3);
250 }
251 }
252
253 return false;
254 }
255
256 static void
CustomizableCleanupPriorWALFiles(void)257 CustomizableCleanupPriorWALFiles(void)
258 {
259 /*
260 * Work out name of prior file from current filename
261 */
262 if (nextWALFileType == XLOG_DATA)
263 {
264 int rc;
265 DIR *xldir;
266 struct dirent *xlde;
267
268 /*
269 * Assume it's OK to keep failing. The failure situation may change
270 * over time, so we'd rather keep going on the main processing than
271 * fail because we couldn't clean up yet.
272 */
273 if ((xldir = opendir(archiveLocation)) != NULL)
274 {
275 while (errno = 0, (xlde = readdir(xldir)) != NULL)
276 {
277 /*
278 * We ignore the timeline part of the XLOG segment identifiers
279 * in deciding whether a segment is still needed. This
280 * ensures that we won't prematurely remove a segment from a
281 * parent timeline. We could probably be a little more
282 * proactive about removing segments of non-parent timelines,
283 * but that would be a whole lot more complicated.
284 *
285 * We use the alphanumeric sorting property of the filenames
286 * to decide which ones are earlier than the
287 * exclusiveCleanupFileName file. Note that this means files
288 * are not removed in the order they were originally written,
289 * in case this worries you.
290 */
291 if (IsXLogFileName(xlde->d_name) &&
292 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
293 {
294 #ifdef WIN32
295 snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name);
296 #else
297 snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name);
298 #endif
299
300 if (debug)
301 fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
302
303 rc = unlink(WALFilePath);
304 if (rc != 0)
305 {
306 fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
307 progname, WALFilePath, strerror(errno));
308 break;
309 }
310 }
311 }
312
313 if (errno)
314 fprintf(stderr, "%s: could not read archive location \"%s\": %s\n",
315 progname, archiveLocation, strerror(errno));
316 if (debug)
317 fprintf(stderr, "\n");
318 }
319 else
320 fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
321 progname, archiveLocation, strerror(errno));
322
323 if (closedir(xldir))
324 fprintf(stderr, "%s: could not close archive location \"%s\": %s\n",
325 progname, archiveLocation, strerror(errno));
326
327 fflush(stderr);
328 }
329 }
330
331 /* =====================================================================
332 * End of Customizable section
333 * =====================================================================
334 */
335
336 /*
337 * SetWALFileNameForCleanup()
338 *
339 * Set the earliest WAL filename that we want to keep on the archive
340 * and decide whether we need_cleanup
341 */
342 static bool
SetWALFileNameForCleanup(void)343 SetWALFileNameForCleanup(void)
344 {
345 uint32 tli = 1,
346 log = 0,
347 seg = 0;
348 uint32 log_diff = 0,
349 seg_diff = 0;
350 bool cleanup = false;
351 int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz);
352
353 if (restartWALFileName)
354 {
355 /*
356 * Don't do cleanup if the restartWALFileName provided is later than
357 * the xlog file requested. This is an error and we must not remove
358 * these files from archive. This shouldn't happen, but better safe
359 * than sorry.
360 */
361 if (strcmp(restartWALFileName, nextWALFileName) > 0)
362 return false;
363
364 strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName));
365 return true;
366 }
367
368 if (keepfiles > 0)
369 {
370 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
371 if (tli > 0 && seg > 0)
372 {
373 log_diff = keepfiles / max_segments_per_logfile;
374 seg_diff = keepfiles % max_segments_per_logfile;
375 if (seg_diff > seg)
376 {
377 log_diff++;
378 seg = max_segments_per_logfile - (seg_diff - seg);
379 }
380 else
381 seg -= seg_diff;
382
383 if (log >= log_diff)
384 {
385 log -= log_diff;
386 cleanup = true;
387 }
388 else
389 {
390 log = 0;
391 seg = 0;
392 }
393 }
394 }
395
396 XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
397
398 return cleanup;
399 }
400
401 /*
402 * Try to set the wal segment size from the WAL file specified by WALFilePath.
403 *
404 * Return true if size could be determined, false otherwise.
405 */
406 static bool
SetWALSegSize(void)407 SetWALSegSize(void)
408 {
409 bool ret_val = false;
410 int fd;
411 PGAlignedXLogBlock buf;
412
413 Assert(WalSegSz == -1);
414
415 if ((fd = open(WALFilePath, O_RDWR, 0)) < 0)
416 {
417 fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n",
418 progname, WALFilePath, strerror(errno));
419 return false;
420 }
421
422 errno = 0;
423 if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ)
424 {
425 XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
426
427 WalSegSz = longhdr->xlp_seg_size;
428
429 if (IsValidWalSegSize(WalSegSz))
430 {
431 /* successfully retrieved WAL segment size */
432 ret_val = true;
433 }
434 else
435 fprintf(stderr,
436 "%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n",
437 progname, WalSegSz);
438 }
439 else
440 {
441 /*
442 * Don't complain loudly, this is to be expected for segments being
443 * created.
444 */
445 if (errno != 0)
446 {
447 if (debug)
448 fprintf(stderr, "could not read file \"%s\": %s\n",
449 WALFilePath, strerror(errno));
450 }
451 else
452 {
453 if (debug)
454 fprintf(stderr, "not enough data in file \"%s\"\n",
455 WALFilePath);
456 }
457 }
458
459 fflush(stderr);
460
461 close(fd);
462 return ret_val;
463 }
464
465 /*
466 * CheckForExternalTrigger()
467 *
468 * Is there a trigger file? Sets global 'Failover' variable to indicate
469 * what kind of a trigger file it was. A "fast" trigger file is turned
470 * into a "smart" file as a side-effect.
471 */
472 static void
CheckForExternalTrigger(void)473 CheckForExternalTrigger(void)
474 {
475 char buf[32];
476 int fd;
477 int len;
478
479 /*
480 * Look for a trigger file, if that option has been selected
481 *
482 * We use stat() here because triggerPath is always a file rather than
483 * potentially being in an archive
484 */
485 if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
486 return;
487
488 /*
489 * An empty trigger file performs smart failover. There's a little race
490 * condition here: if the writer of the trigger file has just created the
491 * file, but not yet written anything to it, we'll treat that as smart
492 * shutdown even if the other process was just about to write "fast" to
493 * it. But that's fine: we'll restore one more WAL file, and when we're
494 * invoked next time, we'll see the word "fast" and fail over immediately.
495 */
496 if (stat_buf.st_size == 0)
497 {
498 Failover = SmartFailover;
499 fprintf(stderr, "trigger file found: smart failover\n");
500 fflush(stderr);
501 return;
502 }
503
504 if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
505 {
506 fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
507 triggerPath, strerror(errno));
508 fflush(stderr);
509 return;
510 }
511
512 if ((len = read(fd, buf, sizeof(buf) - 1)) < 0)
513 {
514 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
515 triggerPath, strerror(errno));
516 fflush(stderr);
517 close(fd);
518 return;
519 }
520 buf[len] = '\0';
521
522 if (strncmp(buf, "smart", 5) == 0)
523 {
524 Failover = SmartFailover;
525 fprintf(stderr, "trigger file found: smart failover\n");
526 fflush(stderr);
527 close(fd);
528 return;
529 }
530
531 if (strncmp(buf, "fast", 4) == 0)
532 {
533 Failover = FastFailover;
534
535 fprintf(stderr, "trigger file found: fast failover\n");
536 fflush(stderr);
537
538 /*
539 * Turn it into a "smart" trigger by truncating the file. Otherwise if
540 * the server asks us again to restore a segment that was restored
541 * already, we would return "not found" and upset the server.
542 */
543 if (ftruncate(fd, 0) < 0)
544 {
545 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
546 triggerPath, strerror(errno));
547 fflush(stderr);
548 }
549 close(fd);
550
551 return;
552 }
553 close(fd);
554
555 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
556 fflush(stderr);
557 return;
558 }
559
560 /*
561 * RestoreWALFileForRecovery()
562 *
563 * Perform the action required to restore the file from archive
564 */
565 static bool
RestoreWALFileForRecovery(void)566 RestoreWALFileForRecovery(void)
567 {
568 int rc = 0;
569 int numretries = 0;
570
571 if (debug)
572 {
573 fprintf(stderr, "running restore: ");
574 fflush(stderr);
575 }
576
577 while (numretries <= maxretries)
578 {
579 rc = system(restoreCommand);
580 if (rc == 0)
581 {
582 if (debug)
583 {
584 fprintf(stderr, "OK\n");
585 fflush(stderr);
586 }
587 return true;
588 }
589 pg_usleep(numretries++ * sleeptime * 1000000L);
590 }
591
592 /*
593 * Allow caller to add additional info
594 */
595 if (debug)
596 fprintf(stderr, "not restored\n");
597 return false;
598 }
599
600 static void
usage(void)601 usage(void)
602 {
603 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
604 printf("Usage:\n");
605 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
606 printf("\nOptions:\n");
607 printf(" -c copy file from archive (default)\n");
608 printf(" -d generate lots of debugging output (testing only)\n");
609 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n"
610 " (0 keeps all)\n");
611 printf(" -l does nothing; use of link is now deprecated\n");
612 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
613 " (default=3)\n");
614 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
615 " default=5)\n");
616 printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n");
617 printf(" -V, --version output version information, then exit\n");
618 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
619 printf(" -?, --help show this help, then exit\n");
620 printf("\n"
621 "Main intended use as restore_command in recovery.conf:\n"
622 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
623 "e.g.\n"
624 " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
625 printf("\nReport bugs to <pgsql-bugs@postgresql.org>.\n");
626 }
627
628 #ifndef WIN32
629 static void
sighandler(int sig)630 sighandler(int sig)
631 {
632 signaled = true;
633 }
634
635 /* We don't want SIGQUIT to core dump */
636 static void
sigquit_handler(int sig)637 sigquit_handler(int sig)
638 {
639 pqsignal(SIGINT, SIG_DFL);
640 kill(getpid(), SIGINT);
641 }
642 #endif
643
644 /*------------ MAIN ----------------------------------------*/
645 int
main(int argc,char ** argv)646 main(int argc, char **argv)
647 {
648 int c;
649
650 progname = get_progname(argv[0]);
651
652 if (argc > 1)
653 {
654 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
655 {
656 usage();
657 exit(0);
658 }
659 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
660 {
661 puts("pg_standby (PostgreSQL) " PG_VERSION);
662 exit(0);
663 }
664 }
665
666 #ifndef WIN32
667
668 /*
669 * You can send SIGUSR1 to trigger failover.
670 *
671 * Postmaster uses SIGQUIT to request immediate shutdown. The default
672 * action is to core dump, but we don't want that, so trap it and commit
673 * suicide without core dump.
674 *
675 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
676 * out to be a bad idea because postmaster uses SIGQUIT to request
677 * immediate shutdown. We still trap SIGINT, but that may change in a
678 * future release.
679 *
680 * There's no way to trigger failover via signal on Windows.
681 */
682 (void) pqsignal(SIGUSR1, sighandler);
683 (void) pqsignal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
684 (void) pqsignal(SIGQUIT, sigquit_handler);
685 #endif
686
687 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
688 {
689 switch (c)
690 {
691 case 'c': /* Use copy */
692 restoreCommandType = RESTORE_COMMAND_COPY;
693 break;
694 case 'd': /* Debug mode */
695 debug = true;
696 break;
697 case 'k': /* keepfiles */
698 keepfiles = atoi(optarg);
699 if (keepfiles < 0)
700 {
701 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
702 exit(2);
703 }
704 break;
705 case 'l': /* Use link */
706
707 /*
708 * Link feature disabled, possibly permanently. Linking causes
709 * a problem after recovery ends that is not currently
710 * resolved by PostgreSQL. 25 Jun 2009
711 */
712 #ifdef NOT_USED
713 restoreCommandType = RESTORE_COMMAND_LINK;
714 #endif
715 break;
716 case 'r': /* Retries */
717 maxretries = atoi(optarg);
718 if (maxretries < 0)
719 {
720 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
721 exit(2);
722 }
723 break;
724 case 's': /* Sleep time */
725 sleeptime = atoi(optarg);
726 if (sleeptime <= 0 || sleeptime > 60)
727 {
728 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
729 exit(2);
730 }
731 break;
732 case 't': /* Trigger file */
733 triggerPath = pg_strdup(optarg);
734 break;
735 case 'w': /* Max wait time */
736 maxwaittime = atoi(optarg);
737 if (maxwaittime < 0)
738 {
739 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
740 exit(2);
741 }
742 break;
743 default:
744 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
745 exit(2);
746 break;
747 }
748 }
749
750 /*
751 * Parameter checking - after checking to see if trigger file present
752 */
753 if (argc == 1)
754 {
755 fprintf(stderr, "%s: not enough command-line arguments\n", progname);
756 exit(2);
757 }
758
759 /*
760 * We will go to the archiveLocation to get nextWALFileName.
761 * nextWALFileName may not exist yet, which would not be an error, so we
762 * separate the archiveLocation and nextWALFileName so we can check
763 * separately whether archiveLocation exists, if not that is an error
764 */
765 if (optind < argc)
766 {
767 archiveLocation = argv[optind];
768 optind++;
769 }
770 else
771 {
772 fprintf(stderr, "%s: must specify archive location\n", progname);
773 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
774 exit(2);
775 }
776
777 if (optind < argc)
778 {
779 nextWALFileName = argv[optind];
780 optind++;
781 }
782 else
783 {
784 fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
785 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
786 exit(2);
787 }
788
789 if (optind < argc)
790 {
791 xlogFilePath = argv[optind];
792 optind++;
793 }
794 else
795 {
796 fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
797 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
798 exit(2);
799 }
800
801 if (optind < argc)
802 {
803 restartWALFileName = argv[optind];
804 optind++;
805 }
806
807 CustomizableInitialize();
808
809 if (debug)
810 {
811 fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
812 fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
813 fprintf(stderr, "WAL file path: %s\n", WALFilePath);
814 fprintf(stderr, "Restoring to: %s\n", xlogFilePath);
815 fprintf(stderr, "Sleep interval: %d second%s\n",
816 sleeptime, (sleeptime > 1 ? "s" : " "));
817 fprintf(stderr, "Max wait interval: %d %s\n",
818 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
819 fprintf(stderr, "Command for restore: %s\n", restoreCommand);
820 fflush(stderr);
821 }
822
823 /*
824 * Check for initial history file: always the first file to be requested
825 * It's OK if the file isn't there - all other files need to wait
826 */
827 if (IsTLHistoryFileName(nextWALFileName))
828 {
829 nextWALFileType = XLOG_HISTORY;
830 if (RestoreWALFileForRecovery())
831 exit(0);
832 else
833 {
834 if (debug)
835 {
836 fprintf(stderr, "history file not found\n");
837 fflush(stderr);
838 }
839 exit(1);
840 }
841 }
842
843 /*
844 * Main wait loop
845 */
846 for (;;)
847 {
848 /* Check for trigger file or signal first */
849 CheckForExternalTrigger();
850 #ifndef WIN32
851 if (signaled)
852 {
853 Failover = FastFailover;
854 if (debug)
855 {
856 fprintf(stderr, "signaled to exit: fast failover\n");
857 fflush(stderr);
858 }
859 }
860 #endif
861
862 /*
863 * Check for fast failover immediately, before checking if the
864 * requested WAL file is available
865 */
866 if (Failover == FastFailover)
867 exit(1);
868
869 if (CustomizableNextWALFileReady())
870 {
871 /*
872 * Once we have restored this file successfully we can remove some
873 * prior WAL files. If this restore fails we mustn't remove any
874 * file because some of them will be requested again immediately
875 * after the failed restore, or when we restart recovery.
876 */
877 if (RestoreWALFileForRecovery())
878 {
879 if (need_cleanup)
880 CustomizableCleanupPriorWALFiles();
881
882 exit(0);
883 }
884 else
885 {
886 /* Something went wrong in copying the file */
887 exit(1);
888 }
889 }
890
891 /* Check for smart failover if the next WAL file was not available */
892 if (Failover == SmartFailover)
893 exit(1);
894
895 if (sleeptime <= 60)
896 pg_usleep(sleeptime * 1000000L);
897
898 waittime += sleeptime;
899 if (waittime >= maxwaittime && maxwaittime > 0)
900 {
901 Failover = FastFailover;
902 if (debug)
903 {
904 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
905 waittime);
906 fflush(stderr);
907 }
908 }
909 if (debug)
910 {
911 fprintf(stderr, "WAL file not present yet.");
912 if (triggerPath)
913 fprintf(stderr, " Checking for trigger file...");
914 fprintf(stderr, "\n");
915 fflush(stderr);
916 }
917 }
918 }
919