1 /*
2 * contrib/pg_standby/pg_standby.c
3 *
4 *
5 * pg_standby.c
6 *
7 * Production-ready example of how to create a Warm Standby
8 * database server using continuous archiving as a
9 * replication mechanism
10 *
11 * We separate the parameters for archive and nextWALfile
12 * so that we can check the archive exists, even if the
13 * WAL file doesn't (yet).
14 *
15 * This program will be executed once in full for each file
16 * requested by the warm standby server.
17 *
18 * It is designed to cater to a variety of needs, as well
19 * providing a customizable section.
20 *
21 * Original author: Simon Riggs simon@2ndquadrant.com
22 * Current maintainer: Simon Riggs
23 */
24 #include "postgres_fe.h"
25
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <signal.h>
31 #include <sys/time.h>
32
33 #include "access/xlog_internal.h"
34 #include "pg_getopt.h"
35
36 const char *progname;
37
38 int WalSegSz = -1;
39
40 /* Options and defaults */
41 int sleeptime = 5; /* amount of time to sleep between file checks */
42 int waittime = -1; /* how long we have been waiting, -1 no wait
43 * yet */
44 int maxwaittime = 0; /* how long are we prepared to wait for? */
45 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
46 int maxretries = 3; /* number of retries on restore command */
47 bool debug = false; /* are we debugging? */
48 bool need_cleanup = false; /* do we need to remove files from
49 * archive? */
50
51 #ifndef WIN32
52 static volatile sig_atomic_t signaled = false;
53 #endif
54
55 char *archiveLocation; /* where to find the archive? */
56 char *triggerPath; /* where to find the trigger file? */
57 char *xlogFilePath; /* where we are going to restore to */
58 char *nextWALFileName; /* the file we need to get from archive */
59 char *restartWALFileName; /* the file from which we can restart restore */
60 char WALFilePath[MAXPGPATH * 2]; /* the file path including archive */
61 char restoreCommand[MAXPGPATH]; /* run this to restore */
62 char exclusiveCleanupFileName[MAXFNAMELEN]; /* the file we need to get
63 * from archive */
64
65 /*
66 * Two types of failover are supported (smart and fast failover).
67 *
68 * The content of the trigger file determines the type of failover. If the
69 * trigger file contains the word "smart" (or the file is empty), smart
70 * failover is chosen: pg_standby acts as cp or ln command itself, on
71 * successful completion all the available WAL records will be applied
72 * resulting in zero data loss. But, it might take a long time to finish
73 * recovery if there's a lot of unapplied WAL.
74 *
75 * On the other hand, if the trigger file contains the word "fast", the
76 * recovery is finished immediately even if unapplied WAL files remain. Any
77 * transactions in the unapplied WAL files are lost.
78 *
79 * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
80 * fast failover. A timeout causes fast failover (smart failover would have
81 * the same effect, since if the timeout is reached there is no unapplied WAL).
82 */
83 #define NoFailover 0
84 #define SmartFailover 1
85 #define FastFailover 2
86
87 static int Failover = NoFailover;
88
89 #define RESTORE_COMMAND_COPY 0
90 #define RESTORE_COMMAND_LINK 1
91 int restoreCommandType;
92
93 #define XLOG_DATA 0
94 #define XLOG_HISTORY 1
95 int nextWALFileType;
96
97 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
98 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
99
100 struct stat stat_buf;
101
102 static bool SetWALFileNameForCleanup(void);
103 static bool SetWALSegSize(void);
104
105
106 /* =====================================================================
107 *
108 * Customizable section
109 *
110 * =====================================================================
111 *
112 * Currently, this section assumes that the Archive is a locally
113 * accessible directory. If you want to make other assumptions,
114 * such as using a vendor-specific archive and access API, these
115 * routines are the ones you'll need to change. You're
116 * encouraged to submit any changes to pgsql-hackers@lists.postgresql.org
117 * or personally to the current maintainer. Those changes may be
118 * folded in to later versions of this program.
119 */
120
121 /*
122 * Initialize allows customized commands into the warm standby program.
123 *
124 * As an example, and probably the common case, we use either
125 * cp/ln commands on *nix, or copy/move command on Windows.
126 */
127 static void
CustomizableInitialize(void)128 CustomizableInitialize(void)
129 {
130 #ifdef WIN32
131 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
132 switch (restoreCommandType)
133 {
134 case RESTORE_COMMAND_LINK:
135 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
136 break;
137 case RESTORE_COMMAND_COPY:
138 default:
139 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
140 break;
141 }
142 #else
143 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
144 switch (restoreCommandType)
145 {
146 case RESTORE_COMMAND_LINK:
147 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
148 break;
149 case RESTORE_COMMAND_COPY:
150 default:
151 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
152 break;
153 }
154 #endif
155
156 /*
157 * This code assumes that archiveLocation is a directory You may wish to
158 * add code to check for tape libraries, etc.. So, since it is a
159 * directory, we use stat to test if it's accessible
160 */
161 if (stat(archiveLocation, &stat_buf) != 0)
162 {
163 fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation);
164 fflush(stderr);
165 exit(2);
166 }
167 }
168
169 /*
170 * CustomizableNextWALFileReady()
171 *
172 * Is the requested file ready yet?
173 */
174 static bool
CustomizableNextWALFileReady(void)175 CustomizableNextWALFileReady(void)
176 {
177 if (stat(WALFilePath, &stat_buf) == 0)
178 {
179 /*
180 * If we've not seen any WAL segments, we don't know the WAL segment
181 * size, which we need. If it looks like a WAL segment, determine size
182 * of segments for the cluster.
183 */
184 if (WalSegSz == -1 && IsXLogFileName(nextWALFileName))
185 {
186 if (SetWALSegSize())
187 {
188 /*
189 * Successfully determined WAL segment size. Can compute
190 * cleanup cutoff now.
191 */
192 need_cleanup = SetWALFileNameForCleanup();
193 if (debug)
194 {
195 fprintf(stderr,
196 _("WAL segment size: %d \n"), WalSegSz);
197 fprintf(stderr, "Keep archive history: ");
198
199 if (need_cleanup)
200 fprintf(stderr, "%s and later\n",
201 exclusiveCleanupFileName);
202 else
203 fprintf(stderr, "no cleanup required\n");
204 }
205 }
206 }
207
208 /*
209 * Return only if it's the right size already.
210 */
211 if (WalSegSz > 0 && stat_buf.st_size == WalSegSz)
212 {
213 #ifdef WIN32
214
215 /*
216 * Windows 'cp' sets the final file size before the copy is
217 * complete, and not yet ready to be opened by pg_standby. So we
218 * wait for sleeptime secs before attempting to restore. If that
219 * is not enough, we will rely on the retry/holdoff mechanism.
220 * GNUWin32's cp does not have this problem.
221 */
222 pg_usleep(sleeptime * 1000000L);
223 #endif
224 nextWALFileType = XLOG_DATA;
225 return true;
226 }
227
228 /*
229 * If still too small, wait until it is the correct size
230 */
231 if (WalSegSz > 0 && stat_buf.st_size > WalSegSz)
232 {
233 if (debug)
234 {
235 fprintf(stderr, "file size greater than expected\n");
236 fflush(stderr);
237 }
238 exit(3);
239 }
240 }
241
242 return false;
243 }
244
245 static void
CustomizableCleanupPriorWALFiles(void)246 CustomizableCleanupPriorWALFiles(void)
247 {
248 /*
249 * Work out name of prior file from current filename
250 */
251 if (nextWALFileType == XLOG_DATA)
252 {
253 int rc;
254 DIR *xldir;
255 struct dirent *xlde;
256
257 /*
258 * Assume it's OK to keep failing. The failure situation may change
259 * over time, so we'd rather keep going on the main processing than
260 * fail because we couldn't clean up yet.
261 */
262 if ((xldir = opendir(archiveLocation)) != NULL)
263 {
264 while (errno = 0, (xlde = readdir(xldir)) != NULL)
265 {
266 /*
267 * We ignore the timeline part of the XLOG segment identifiers
268 * in deciding whether a segment is still needed. This
269 * ensures that we won't prematurely remove a segment from a
270 * parent timeline. We could probably be a little more
271 * proactive about removing segments of non-parent timelines,
272 * but that would be a whole lot more complicated.
273 *
274 * We use the alphanumeric sorting property of the filenames
275 * to decide which ones are earlier than the
276 * exclusiveCleanupFileName file. Note that this means files
277 * are not removed in the order they were originally written,
278 * in case this worries you.
279 */
280 if (IsXLogFileName(xlde->d_name) &&
281 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
282 {
283 #ifdef WIN32
284 snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name);
285 #else
286 snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name);
287 #endif
288
289 if (debug)
290 fprintf(stderr, "\nremoving file \"%s\"", WALFilePath);
291
292 rc = unlink(WALFilePath);
293 if (rc != 0)
294 {
295 fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n",
296 progname, WALFilePath, strerror(errno));
297 break;
298 }
299 }
300 }
301
302 if (errno)
303 fprintf(stderr, "%s: could not read archive location \"%s\": %s\n",
304 progname, archiveLocation, strerror(errno));
305 if (debug)
306 fprintf(stderr, "\n");
307 }
308 else
309 fprintf(stderr, "%s: could not open archive location \"%s\": %s\n",
310 progname, archiveLocation, strerror(errno));
311
312 if (closedir(xldir))
313 fprintf(stderr, "%s: could not close archive location \"%s\": %s\n",
314 progname, archiveLocation, strerror(errno));
315
316 fflush(stderr);
317 }
318 }
319
320 /* =====================================================================
321 * End of Customizable section
322 * =====================================================================
323 */
324
325 /*
326 * SetWALFileNameForCleanup()
327 *
328 * Set the earliest WAL filename that we want to keep on the archive
329 * and decide whether we need_cleanup
330 */
331 static bool
SetWALFileNameForCleanup(void)332 SetWALFileNameForCleanup(void)
333 {
334 uint32 tli = 1,
335 log = 0,
336 seg = 0;
337 uint32 log_diff = 0,
338 seg_diff = 0;
339 bool cleanup = false;
340 int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz);
341
342 if (restartWALFileName)
343 {
344 /*
345 * Don't do cleanup if the restartWALFileName provided is later than
346 * the xlog file requested. This is an error and we must not remove
347 * these files from archive. This shouldn't happen, but better safe
348 * than sorry.
349 */
350 if (strcmp(restartWALFileName, nextWALFileName) > 0)
351 return false;
352
353 strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName));
354 return true;
355 }
356
357 if (keepfiles > 0)
358 {
359 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
360 if (tli > 0 && seg > 0)
361 {
362 log_diff = keepfiles / max_segments_per_logfile;
363 seg_diff = keepfiles % max_segments_per_logfile;
364 if (seg_diff > seg)
365 {
366 log_diff++;
367 seg = max_segments_per_logfile - (seg_diff - seg);
368 }
369 else
370 seg -= seg_diff;
371
372 if (log >= log_diff)
373 {
374 log -= log_diff;
375 cleanup = true;
376 }
377 else
378 {
379 log = 0;
380 seg = 0;
381 }
382 }
383 }
384
385 XLogFileNameById(exclusiveCleanupFileName, tli, log, seg);
386
387 return cleanup;
388 }
389
390 /*
391 * Try to set the wal segment size from the WAL file specified by WALFilePath.
392 *
393 * Return true if size could be determined, false otherwise.
394 */
395 static bool
SetWALSegSize(void)396 SetWALSegSize(void)
397 {
398 bool ret_val = false;
399 int fd;
400 PGAlignedXLogBlock buf;
401
402 Assert(WalSegSz == -1);
403
404 if ((fd = open(WALFilePath, O_RDWR, 0)) < 0)
405 {
406 fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n",
407 progname, WALFilePath, strerror(errno));
408 return false;
409 }
410
411 errno = 0;
412 if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ)
413 {
414 XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
415
416 WalSegSz = longhdr->xlp_seg_size;
417
418 if (IsValidWalSegSize(WalSegSz))
419 {
420 /* successfully retrieved WAL segment size */
421 ret_val = true;
422 }
423 else
424 fprintf(stderr,
425 "%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n",
426 progname, WalSegSz);
427 }
428 else
429 {
430 /*
431 * Don't complain loudly, this is to be expected for segments being
432 * created.
433 */
434 if (errno != 0)
435 {
436 if (debug)
437 fprintf(stderr, "could not read file \"%s\": %s\n",
438 WALFilePath, strerror(errno));
439 }
440 else
441 {
442 if (debug)
443 fprintf(stderr, "not enough data in file \"%s\"\n",
444 WALFilePath);
445 }
446 }
447
448 fflush(stderr);
449
450 close(fd);
451 return ret_val;
452 }
453
454 /*
455 * CheckForExternalTrigger()
456 *
457 * Is there a trigger file? Sets global 'Failover' variable to indicate
458 * what kind of a trigger file it was. A "fast" trigger file is turned
459 * into a "smart" file as a side-effect.
460 */
461 static void
CheckForExternalTrigger(void)462 CheckForExternalTrigger(void)
463 {
464 char buf[32];
465 int fd;
466 int len;
467
468 /*
469 * Look for a trigger file, if that option has been selected
470 *
471 * We use stat() here because triggerPath is always a file rather than
472 * potentially being in an archive
473 */
474 if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
475 return;
476
477 /*
478 * An empty trigger file performs smart failover. There's a little race
479 * condition here: if the writer of the trigger file has just created the
480 * file, but not yet written anything to it, we'll treat that as smart
481 * shutdown even if the other process was just about to write "fast" to
482 * it. But that's fine: we'll restore one more WAL file, and when we're
483 * invoked next time, we'll see the word "fast" and fail over immediately.
484 */
485 if (stat_buf.st_size == 0)
486 {
487 Failover = SmartFailover;
488 fprintf(stderr, "trigger file found: smart failover\n");
489 fflush(stderr);
490 return;
491 }
492
493 if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
494 {
495 fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
496 triggerPath, strerror(errno));
497 fflush(stderr);
498 return;
499 }
500
501 if ((len = read(fd, buf, sizeof(buf) - 1)) < 0)
502 {
503 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
504 triggerPath, strerror(errno));
505 fflush(stderr);
506 close(fd);
507 return;
508 }
509 buf[len] = '\0';
510
511 if (strncmp(buf, "smart", 5) == 0)
512 {
513 Failover = SmartFailover;
514 fprintf(stderr, "trigger file found: smart failover\n");
515 fflush(stderr);
516 close(fd);
517 return;
518 }
519
520 if (strncmp(buf, "fast", 4) == 0)
521 {
522 Failover = FastFailover;
523
524 fprintf(stderr, "trigger file found: fast failover\n");
525 fflush(stderr);
526
527 /*
528 * Turn it into a "smart" trigger by truncating the file. Otherwise if
529 * the server asks us again to restore a segment that was restored
530 * already, we would return "not found" and upset the server.
531 */
532 if (ftruncate(fd, 0) < 0)
533 {
534 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
535 triggerPath, strerror(errno));
536 fflush(stderr);
537 }
538 close(fd);
539
540 return;
541 }
542 close(fd);
543
544 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
545 fflush(stderr);
546 }
547
548 /*
549 * RestoreWALFileForRecovery()
550 *
551 * Perform the action required to restore the file from archive
552 */
553 static bool
RestoreWALFileForRecovery(void)554 RestoreWALFileForRecovery(void)
555 {
556 int rc = 0;
557 int numretries = 0;
558
559 if (debug)
560 {
561 fprintf(stderr, "running restore: ");
562 fflush(stderr);
563 }
564
565 while (numretries <= maxretries)
566 {
567 rc = system(restoreCommand);
568 if (rc == 0)
569 {
570 if (debug)
571 {
572 fprintf(stderr, "OK\n");
573 fflush(stderr);
574 }
575 return true;
576 }
577 pg_usleep(numretries++ * sleeptime * 1000000L);
578 }
579
580 /*
581 * Allow caller to add additional info
582 */
583 if (debug)
584 fprintf(stderr, "not restored\n");
585 return false;
586 }
587
588 static void
usage(void)589 usage(void)
590 {
591 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
592 printf("Usage:\n");
593 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
594 printf("\nOptions:\n");
595 printf(" -c copy file from archive (default)\n");
596 printf(" -d generate lots of debugging output (testing only)\n");
597 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n"
598 " (0 keeps all)\n");
599 printf(" -l does nothing; use of link is now deprecated\n");
600 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
601 " (default=3)\n");
602 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
603 " default=5)\n");
604 printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n");
605 printf(" -V, --version output version information, then exit\n");
606 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
607 printf(" -?, --help show this help, then exit\n");
608 printf("\n"
609 "Main intended use as restore_command in postgresql.conf:\n"
610 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
611 "e.g.\n"
612 " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n");
613 printf("\nReport bugs to <%s>.\n", PACKAGE_BUGREPORT);
614 printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL);
615 }
616
617 #ifndef WIN32
618 static void
sighandler(int sig)619 sighandler(int sig)
620 {
621 signaled = true;
622 }
623
624 /* We don't want SIGQUIT to core dump */
625 static void
sigquit_handler(int sig)626 sigquit_handler(int sig)
627 {
628 pqsignal(SIGINT, SIG_DFL);
629 kill(getpid(), SIGINT);
630 }
631 #endif
632
633 /*------------ MAIN ----------------------------------------*/
634 int
main(int argc,char ** argv)635 main(int argc, char **argv)
636 {
637 int c;
638
639 progname = get_progname(argv[0]);
640
641 if (argc > 1)
642 {
643 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
644 {
645 usage();
646 exit(0);
647 }
648 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
649 {
650 puts("pg_standby (PostgreSQL) " PG_VERSION);
651 exit(0);
652 }
653 }
654
655 #ifndef WIN32
656
657 /*
658 * You can send SIGUSR1 to trigger failover.
659 *
660 * Postmaster uses SIGQUIT to request immediate shutdown. The default
661 * action is to core dump, but we don't want that, so trap it and commit
662 * suicide without core dump.
663 *
664 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
665 * out to be a bad idea because postmaster uses SIGQUIT to request
666 * immediate shutdown. We still trap SIGINT, but that may change in a
667 * future release.
668 *
669 * There's no way to trigger failover via signal on Windows.
670 */
671 (void) pqsignal(SIGUSR1, sighandler);
672 (void) pqsignal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
673 (void) pqsignal(SIGQUIT, sigquit_handler);
674 #endif
675
676 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
677 {
678 switch (c)
679 {
680 case 'c': /* Use copy */
681 restoreCommandType = RESTORE_COMMAND_COPY;
682 break;
683 case 'd': /* Debug mode */
684 debug = true;
685 break;
686 case 'k': /* keepfiles */
687 keepfiles = atoi(optarg);
688 if (keepfiles < 0)
689 {
690 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
691 exit(2);
692 }
693 break;
694 case 'l': /* Use link */
695
696 /*
697 * Link feature disabled, possibly permanently. Linking causes
698 * a problem after recovery ends that is not currently
699 * resolved by PostgreSQL. 25 Jun 2009
700 */
701 #ifdef NOT_USED
702 restoreCommandType = RESTORE_COMMAND_LINK;
703 #endif
704 break;
705 case 'r': /* Retries */
706 maxretries = atoi(optarg);
707 if (maxretries < 0)
708 {
709 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
710 exit(2);
711 }
712 break;
713 case 's': /* Sleep time */
714 sleeptime = atoi(optarg);
715 if (sleeptime <= 0 || sleeptime > 60)
716 {
717 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
718 exit(2);
719 }
720 break;
721 case 't': /* Trigger file */
722 triggerPath = pg_strdup(optarg);
723 break;
724 case 'w': /* Max wait time */
725 maxwaittime = atoi(optarg);
726 if (maxwaittime < 0)
727 {
728 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
729 exit(2);
730 }
731 break;
732 default:
733 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
734 exit(2);
735 break;
736 }
737 }
738
739 /*
740 * Parameter checking - after checking to see if trigger file present
741 */
742 if (argc == 1)
743 {
744 fprintf(stderr, "%s: not enough command-line arguments\n", progname);
745 exit(2);
746 }
747
748 /*
749 * We will go to the archiveLocation to get nextWALFileName.
750 * nextWALFileName may not exist yet, which would not be an error, so we
751 * separate the archiveLocation and nextWALFileName so we can check
752 * separately whether archiveLocation exists, if not that is an error
753 */
754 if (optind < argc)
755 {
756 archiveLocation = argv[optind];
757 optind++;
758 }
759 else
760 {
761 fprintf(stderr, "%s: must specify archive location\n", progname);
762 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
763 exit(2);
764 }
765
766 if (optind < argc)
767 {
768 nextWALFileName = argv[optind];
769 optind++;
770 }
771 else
772 {
773 fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname);
774 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
775 exit(2);
776 }
777
778 if (optind < argc)
779 {
780 xlogFilePath = argv[optind];
781 optind++;
782 }
783 else
784 {
785 fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname);
786 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
787 exit(2);
788 }
789
790 if (optind < argc)
791 {
792 restartWALFileName = argv[optind];
793 optind++;
794 }
795
796 CustomizableInitialize();
797
798 if (debug)
799 {
800 fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>");
801 fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName);
802 fprintf(stderr, "WAL file path: %s\n", WALFilePath);
803 fprintf(stderr, "Restoring to: %s\n", xlogFilePath);
804 fprintf(stderr, "Sleep interval: %d second%s\n",
805 sleeptime, (sleeptime > 1 ? "s" : " "));
806 fprintf(stderr, "Max wait interval: %d %s\n",
807 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
808 fprintf(stderr, "Command for restore: %s\n", restoreCommand);
809 fflush(stderr);
810 }
811
812 /*
813 * Check for initial history file: always the first file to be requested
814 * It's OK if the file isn't there - all other files need to wait
815 */
816 if (IsTLHistoryFileName(nextWALFileName))
817 {
818 nextWALFileType = XLOG_HISTORY;
819 if (RestoreWALFileForRecovery())
820 exit(0);
821 else
822 {
823 if (debug)
824 {
825 fprintf(stderr, "history file not found\n");
826 fflush(stderr);
827 }
828 exit(1);
829 }
830 }
831
832 /*
833 * Main wait loop
834 */
835 for (;;)
836 {
837 /* Check for trigger file or signal first */
838 CheckForExternalTrigger();
839 #ifndef WIN32
840 if (signaled)
841 {
842 Failover = FastFailover;
843 if (debug)
844 {
845 fprintf(stderr, "signaled to exit: fast failover\n");
846 fflush(stderr);
847 }
848 }
849 #endif
850
851 /*
852 * Check for fast failover immediately, before checking if the
853 * requested WAL file is available
854 */
855 if (Failover == FastFailover)
856 exit(1);
857
858 if (CustomizableNextWALFileReady())
859 {
860 /*
861 * Once we have restored this file successfully we can remove some
862 * prior WAL files. If this restore fails we mustn't remove any
863 * file because some of them will be requested again immediately
864 * after the failed restore, or when we restart recovery.
865 */
866 if (RestoreWALFileForRecovery())
867 {
868 if (need_cleanup)
869 CustomizableCleanupPriorWALFiles();
870
871 exit(0);
872 }
873 else
874 {
875 /* Something went wrong in copying the file */
876 exit(1);
877 }
878 }
879
880 /* Check for smart failover if the next WAL file was not available */
881 if (Failover == SmartFailover)
882 exit(1);
883
884 if (sleeptime <= 60)
885 pg_usleep(sleeptime * 1000000L);
886
887 waittime += sleeptime;
888 if (waittime >= maxwaittime && maxwaittime > 0)
889 {
890 Failover = FastFailover;
891 if (debug)
892 {
893 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
894 waittime);
895 fflush(stderr);
896 }
897 }
898 if (debug)
899 {
900 fprintf(stderr, "WAL file not present yet.");
901 if (triggerPath)
902 fprintf(stderr, " Checking for trigger file...");
903 fprintf(stderr, "\n");
904 fflush(stderr);
905 }
906 }
907 }
908