1 /* 2 * contrib/pg_standby/pg_standby.c 3 * 4 * 5 * pg_standby.c 6 * 7 * Production-ready example of how to create a Warm Standby 8 * database server using continuous archiving as a 9 * replication mechanism 10 * 11 * We separate the parameters for archive and nextWALfile 12 * so that we can check the archive exists, even if the 13 * WAL file doesn't (yet). 14 * 15 * This program will be executed once in full for each file 16 * requested by the warm standby server. 17 * 18 * It is designed to cater to a variety of needs, as well 19 * providing a customizable section. 20 * 21 * Original author: Simon Riggs simon@2ndquadrant.com 22 * Current maintainer: Simon Riggs 23 */ 24 #include "postgres_fe.h" 25 26 #include <ctype.h> 27 #include <dirent.h> 28 #include <sys/stat.h> 29 #include <fcntl.h> 30 #include <signal.h> 31 #include <sys/time.h> 32 33 #include "pg_getopt.h" 34 35 #include "access/xlog_internal.h" 36 37 const char *progname; 38 39 int WalSegSz = -1; 40 41 /* Options and defaults */ 42 int sleeptime = 5; /* amount of time to sleep between file checks */ 43 int waittime = -1; /* how long we have been waiting, -1 no wait 44 * yet */ 45 int maxwaittime = 0; /* how long are we prepared to wait for? */ 46 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */ 47 int maxretries = 3; /* number of retries on restore command */ 48 bool debug = false; /* are we debugging? */ 49 bool need_cleanup = false; /* do we need to remove files from 50 * archive? */ 51 52 #ifndef WIN32 53 static volatile sig_atomic_t signaled = false; 54 #endif 55 56 char *archiveLocation; /* where to find the archive? */ 57 char *triggerPath; /* where to find the trigger file? */ 58 char *xlogFilePath; /* where we are going to restore to */ 59 char *nextWALFileName; /* the file we need to get from archive */ 60 char *restartWALFileName; /* the file from which we can restart restore */ 61 char *priorWALFileName; /* the file we need to get from archive */ 62 char WALFilePath[MAXPGPATH * 2]; /* the file path including archive */ 63 char restoreCommand[MAXPGPATH]; /* run this to restore */ 64 char exclusiveCleanupFileName[MAXFNAMELEN]; /* the file we need to get 65 * from archive */ 66 67 /* 68 * Two types of failover are supported (smart and fast failover). 69 * 70 * The content of the trigger file determines the type of failover. If the 71 * trigger file contains the word "smart" (or the file is empty), smart 72 * failover is chosen: pg_standby acts as cp or ln command itself, on 73 * successful completion all the available WAL records will be applied 74 * resulting in zero data loss. But, it might take a long time to finish 75 * recovery if there's a lot of unapplied WAL. 76 * 77 * On the other hand, if the trigger file contains the word "fast", the 78 * recovery is finished immediately even if unapplied WAL files remain. Any 79 * transactions in the unapplied WAL files are lost. 80 * 81 * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers 82 * fast failover. A timeout causes fast failover (smart failover would have 83 * the same effect, since if the timeout is reached there is no unapplied WAL). 84 */ 85 #define NoFailover 0 86 #define SmartFailover 1 87 #define FastFailover 2 88 89 static int Failover = NoFailover; 90 91 #define RESTORE_COMMAND_COPY 0 92 #define RESTORE_COMMAND_LINK 1 93 int restoreCommandType; 94 95 #define XLOG_DATA 0 96 #define XLOG_HISTORY 1 97 #define XLOG_BACKUP_LABEL 2 98 int nextWALFileType; 99 100 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \ 101 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2) 102 103 struct stat stat_buf; 104 105 static bool SetWALFileNameForCleanup(void); 106 static bool SetWALSegSize(void); 107 108 109 /* ===================================================================== 110 * 111 * Customizable section 112 * 113 * ===================================================================== 114 * 115 * Currently, this section assumes that the Archive is a locally 116 * accessible directory. If you want to make other assumptions, 117 * such as using a vendor-specific archive and access API, these 118 * routines are the ones you'll need to change. You're 119 * encouraged to submit any changes to pgsql-hackers@postgresql.org 120 * or personally to the current maintainer. Those changes may be 121 * folded in to later versions of this program. 122 */ 123 124 /* 125 * Initialize allows customized commands into the warm standby program. 126 * 127 * As an example, and probably the common case, we use either 128 * cp/ln commands on *nix, or copy/move command on Windows. 129 */ 130 static void 131 CustomizableInitialize(void) 132 { 133 #ifdef WIN32 134 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName); 135 switch (restoreCommandType) 136 { 137 case RESTORE_COMMAND_LINK: 138 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath); 139 break; 140 case RESTORE_COMMAND_COPY: 141 default: 142 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath); 143 break; 144 } 145 #else 146 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName); 147 switch (restoreCommandType) 148 { 149 case RESTORE_COMMAND_LINK: 150 #if HAVE_WORKING_LINK 151 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath); 152 break; 153 #endif 154 case RESTORE_COMMAND_COPY: 155 default: 156 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath); 157 break; 158 } 159 #endif 160 161 /* 162 * This code assumes that archiveLocation is a directory You may wish to 163 * add code to check for tape libraries, etc.. So, since it is a 164 * directory, we use stat to test if it's accessible 165 */ 166 if (stat(archiveLocation, &stat_buf) != 0) 167 { 168 fprintf(stderr, "%s: archive location \"%s\" does not exist\n", progname, archiveLocation); 169 fflush(stderr); 170 exit(2); 171 } 172 } 173 174 /* 175 * CustomizableNextWALFileReady() 176 * 177 * Is the requested file ready yet? 178 */ 179 static bool 180 CustomizableNextWALFileReady(void) 181 { 182 if (stat(WALFilePath, &stat_buf) == 0) 183 { 184 /* 185 * If we've not seen any WAL segments, we don't know the WAL segment 186 * size, which we need. If it looks like a WAL segment, determine size 187 * of segments for the cluster. 188 */ 189 if (WalSegSz == -1 && IsXLogFileName(nextWALFileName)) 190 { 191 if (SetWALSegSize()) 192 { 193 /* 194 * Successfully determined WAL segment size. Can compute 195 * cleanup cutoff now. 196 */ 197 need_cleanup = SetWALFileNameForCleanup(); 198 if (debug) 199 { 200 fprintf(stderr, 201 _("WAL segment size: %d \n"), WalSegSz); 202 fprintf(stderr, "Keep archive history: "); 203 204 if (need_cleanup) 205 fprintf(stderr, "%s and later\n", 206 exclusiveCleanupFileName); 207 else 208 fprintf(stderr, "no cleanup required\n"); 209 } 210 } 211 } 212 213 /* 214 * If it's a backup file, return immediately. If it's a regular file 215 * return only if it's the right size already. 216 */ 217 if (IsBackupHistoryFileName(nextWALFileName)) 218 { 219 nextWALFileType = XLOG_BACKUP_LABEL; 220 return true; 221 } 222 else if (WalSegSz > 0 && stat_buf.st_size == WalSegSz) 223 { 224 #ifdef WIN32 225 226 /* 227 * Windows 'cp' sets the final file size before the copy is 228 * complete, and not yet ready to be opened by pg_standby. So we 229 * wait for sleeptime secs before attempting to restore. If that 230 * is not enough, we will rely on the retry/holdoff mechanism. 231 * GNUWin32's cp does not have this problem. 232 */ 233 pg_usleep(sleeptime * 1000000L); 234 #endif 235 nextWALFileType = XLOG_DATA; 236 return true; 237 } 238 239 /* 240 * If still too small, wait until it is the correct size 241 */ 242 if (WalSegSz > 0 && stat_buf.st_size > WalSegSz) 243 { 244 if (debug) 245 { 246 fprintf(stderr, "file size greater than expected\n"); 247 fflush(stderr); 248 } 249 exit(3); 250 } 251 } 252 253 return false; 254 } 255 256 static void 257 CustomizableCleanupPriorWALFiles(void) 258 { 259 /* 260 * Work out name of prior file from current filename 261 */ 262 if (nextWALFileType == XLOG_DATA) 263 { 264 int rc; 265 DIR *xldir; 266 struct dirent *xlde; 267 268 /* 269 * Assume it's OK to keep failing. The failure situation may change 270 * over time, so we'd rather keep going on the main processing than 271 * fail because we couldn't clean up yet. 272 */ 273 if ((xldir = opendir(archiveLocation)) != NULL) 274 { 275 while (errno = 0, (xlde = readdir(xldir)) != NULL) 276 { 277 /* 278 * We ignore the timeline part of the XLOG segment identifiers 279 * in deciding whether a segment is still needed. This 280 * ensures that we won't prematurely remove a segment from a 281 * parent timeline. We could probably be a little more 282 * proactive about removing segments of non-parent timelines, 283 * but that would be a whole lot more complicated. 284 * 285 * We use the alphanumeric sorting property of the filenames 286 * to decide which ones are earlier than the 287 * exclusiveCleanupFileName file. Note that this means files 288 * are not removed in the order they were originally written, 289 * in case this worries you. 290 */ 291 if (IsXLogFileName(xlde->d_name) && 292 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0) 293 { 294 #ifdef WIN32 295 snprintf(WALFilePath, sizeof(WALFilePath), "%s\\%s", archiveLocation, xlde->d_name); 296 #else 297 snprintf(WALFilePath, sizeof(WALFilePath), "%s/%s", archiveLocation, xlde->d_name); 298 #endif 299 300 if (debug) 301 fprintf(stderr, "\nremoving file \"%s\"", WALFilePath); 302 303 rc = unlink(WALFilePath); 304 if (rc != 0) 305 { 306 fprintf(stderr, "\n%s: ERROR: could not remove file \"%s\": %s\n", 307 progname, WALFilePath, strerror(errno)); 308 break; 309 } 310 } 311 } 312 313 if (errno) 314 fprintf(stderr, "%s: could not read archive location \"%s\": %s\n", 315 progname, archiveLocation, strerror(errno)); 316 if (debug) 317 fprintf(stderr, "\n"); 318 } 319 else 320 fprintf(stderr, "%s: could not open archive location \"%s\": %s\n", 321 progname, archiveLocation, strerror(errno)); 322 323 if (closedir(xldir)) 324 fprintf(stderr, "%s: could not close archive location \"%s\": %s\n", 325 progname, archiveLocation, strerror(errno)); 326 327 fflush(stderr); 328 } 329 } 330 331 /* ===================================================================== 332 * End of Customizable section 333 * ===================================================================== 334 */ 335 336 /* 337 * SetWALFileNameForCleanup() 338 * 339 * Set the earliest WAL filename that we want to keep on the archive 340 * and decide whether we need_cleanup 341 */ 342 static bool 343 SetWALFileNameForCleanup(void) 344 { 345 uint32 tli = 1, 346 log = 0, 347 seg = 0; 348 uint32 log_diff = 0, 349 seg_diff = 0; 350 bool cleanup = false; 351 int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz); 352 353 if (restartWALFileName) 354 { 355 /* 356 * Don't do cleanup if the restartWALFileName provided is later than 357 * the xlog file requested. This is an error and we must not remove 358 * these files from archive. This shouldn't happen, but better safe 359 * than sorry. 360 */ 361 if (strcmp(restartWALFileName, nextWALFileName) > 0) 362 return false; 363 364 strlcpy(exclusiveCleanupFileName, restartWALFileName, sizeof(exclusiveCleanupFileName)); 365 return true; 366 } 367 368 if (keepfiles > 0) 369 { 370 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg); 371 if (tli > 0 && seg > 0) 372 { 373 log_diff = keepfiles / max_segments_per_logfile; 374 seg_diff = keepfiles % max_segments_per_logfile; 375 if (seg_diff > seg) 376 { 377 log_diff++; 378 seg = max_segments_per_logfile - (seg_diff - seg); 379 } 380 else 381 seg -= seg_diff; 382 383 if (log >= log_diff) 384 { 385 log -= log_diff; 386 cleanup = true; 387 } 388 else 389 { 390 log = 0; 391 seg = 0; 392 } 393 } 394 } 395 396 XLogFileNameById(exclusiveCleanupFileName, tli, log, seg); 397 398 return cleanup; 399 } 400 401 /* 402 * Try to set the wal segment size from the WAL file specified by WALFilePath. 403 * 404 * Return true if size could be determined, false otherwise. 405 */ 406 static bool 407 SetWALSegSize(void) 408 { 409 bool ret_val = false; 410 int fd; 411 PGAlignedXLogBlock buf; 412 413 Assert(WalSegSz == -1); 414 415 if ((fd = open(WALFilePath, O_RDWR, 0)) < 0) 416 { 417 fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n", 418 progname, WALFilePath, strerror(errno)); 419 return false; 420 } 421 422 errno = 0; 423 if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ) 424 { 425 XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data; 426 427 WalSegSz = longhdr->xlp_seg_size; 428 429 if (IsValidWalSegSize(WalSegSz)) 430 { 431 /* successfully retrieved WAL segment size */ 432 ret_val = true; 433 } 434 else 435 fprintf(stderr, 436 "%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n", 437 progname, WalSegSz); 438 } 439 else 440 { 441 /* 442 * Don't complain loudly, this is to be expected for segments being 443 * created. 444 */ 445 if (errno != 0) 446 { 447 if (debug) 448 fprintf(stderr, "could not read file \"%s\": %s\n", 449 WALFilePath, strerror(errno)); 450 } 451 else 452 { 453 if (debug) 454 fprintf(stderr, "not enough data in file \"%s\"\n", 455 WALFilePath); 456 } 457 } 458 459 fflush(stderr); 460 461 close(fd); 462 return ret_val; 463 } 464 465 /* 466 * CheckForExternalTrigger() 467 * 468 * Is there a trigger file? Sets global 'Failover' variable to indicate 469 * what kind of a trigger file it was. A "fast" trigger file is turned 470 * into a "smart" file as a side-effect. 471 */ 472 static void 473 CheckForExternalTrigger(void) 474 { 475 char buf[32]; 476 int fd; 477 int len; 478 479 /* 480 * Look for a trigger file, if that option has been selected 481 * 482 * We use stat() here because triggerPath is always a file rather than 483 * potentially being in an archive 484 */ 485 if (!triggerPath || stat(triggerPath, &stat_buf) != 0) 486 return; 487 488 /* 489 * An empty trigger file performs smart failover. There's a little race 490 * condition here: if the writer of the trigger file has just created the 491 * file, but not yet written anything to it, we'll treat that as smart 492 * shutdown even if the other process was just about to write "fast" to 493 * it. But that's fine: we'll restore one more WAL file, and when we're 494 * invoked next time, we'll see the word "fast" and fail over immediately. 495 */ 496 if (stat_buf.st_size == 0) 497 { 498 Failover = SmartFailover; 499 fprintf(stderr, "trigger file found: smart failover\n"); 500 fflush(stderr); 501 return; 502 } 503 504 if ((fd = open(triggerPath, O_RDWR, 0)) < 0) 505 { 506 fprintf(stderr, "WARNING: could not open \"%s\": %s\n", 507 triggerPath, strerror(errno)); 508 fflush(stderr); 509 return; 510 } 511 512 if ((len = read(fd, buf, sizeof(buf) - 1)) < 0) 513 { 514 fprintf(stderr, "WARNING: could not read \"%s\": %s\n", 515 triggerPath, strerror(errno)); 516 fflush(stderr); 517 close(fd); 518 return; 519 } 520 buf[len] = '\0'; 521 522 if (strncmp(buf, "smart", 5) == 0) 523 { 524 Failover = SmartFailover; 525 fprintf(stderr, "trigger file found: smart failover\n"); 526 fflush(stderr); 527 close(fd); 528 return; 529 } 530 531 if (strncmp(buf, "fast", 4) == 0) 532 { 533 Failover = FastFailover; 534 535 fprintf(stderr, "trigger file found: fast failover\n"); 536 fflush(stderr); 537 538 /* 539 * Turn it into a "smart" trigger by truncating the file. Otherwise if 540 * the server asks us again to restore a segment that was restored 541 * already, we would return "not found" and upset the server. 542 */ 543 if (ftruncate(fd, 0) < 0) 544 { 545 fprintf(stderr, "WARNING: could not read \"%s\": %s\n", 546 triggerPath, strerror(errno)); 547 fflush(stderr); 548 } 549 close(fd); 550 551 return; 552 } 553 close(fd); 554 555 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath); 556 fflush(stderr); 557 return; 558 } 559 560 /* 561 * RestoreWALFileForRecovery() 562 * 563 * Perform the action required to restore the file from archive 564 */ 565 static bool 566 RestoreWALFileForRecovery(void) 567 { 568 int rc = 0; 569 int numretries = 0; 570 571 if (debug) 572 { 573 fprintf(stderr, "running restore: "); 574 fflush(stderr); 575 } 576 577 while (numretries <= maxretries) 578 { 579 rc = system(restoreCommand); 580 if (rc == 0) 581 { 582 if (debug) 583 { 584 fprintf(stderr, "OK\n"); 585 fflush(stderr); 586 } 587 return true; 588 } 589 pg_usleep(numretries++ * sleeptime * 1000000L); 590 } 591 592 /* 593 * Allow caller to add additional info 594 */ 595 if (debug) 596 fprintf(stderr, "not restored\n"); 597 return false; 598 } 599 600 static void 601 usage(void) 602 { 603 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname); 604 printf("Usage:\n"); 605 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname); 606 printf("\nOptions:\n"); 607 printf(" -c copy file from archive (default)\n"); 608 printf(" -d generate lots of debugging output (testing only)\n"); 609 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE is not used, remove files prior to limit\n" 610 " (0 keeps all)\n"); 611 printf(" -l does nothing; use of link is now deprecated\n"); 612 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n" 613 " (default=3)\n"); 614 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n" 615 " default=5)\n"); 616 printf(" -t TRIGGERFILE trigger file to initiate failover (no default)\n"); 617 printf(" -V, --version output version information, then exit\n"); 618 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n"); 619 printf(" -?, --help show this help, then exit\n"); 620 printf("\n" 621 "Main intended use as restore_command in recovery.conf:\n" 622 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n" 623 "e.g.\n" 624 " restore_command = 'pg_standby /mnt/server/archiverdir %%f %%p %%r'\n"); 625 printf("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"); 626 } 627 628 #ifndef WIN32 629 static void 630 sighandler(int sig) 631 { 632 signaled = true; 633 } 634 635 /* We don't want SIGQUIT to core dump */ 636 static void 637 sigquit_handler(int sig) 638 { 639 pqsignal(SIGINT, SIG_DFL); 640 kill(getpid(), SIGINT); 641 } 642 #endif 643 644 /*------------ MAIN ----------------------------------------*/ 645 int 646 main(int argc, char **argv) 647 { 648 int c; 649 650 progname = get_progname(argv[0]); 651 652 if (argc > 1) 653 { 654 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) 655 { 656 usage(); 657 exit(0); 658 } 659 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) 660 { 661 puts("pg_standby (PostgreSQL) " PG_VERSION); 662 exit(0); 663 } 664 } 665 666 #ifndef WIN32 667 668 /* 669 * You can send SIGUSR1 to trigger failover. 670 * 671 * Postmaster uses SIGQUIT to request immediate shutdown. The default 672 * action is to core dump, but we don't want that, so trap it and commit 673 * suicide without core dump. 674 * 675 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned 676 * out to be a bad idea because postmaster uses SIGQUIT to request 677 * immediate shutdown. We still trap SIGINT, but that may change in a 678 * future release. 679 * 680 * There's no way to trigger failover via signal on Windows. 681 */ 682 (void) pqsignal(SIGUSR1, sighandler); 683 (void) pqsignal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */ 684 (void) pqsignal(SIGQUIT, sigquit_handler); 685 #endif 686 687 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) 688 { 689 switch (c) 690 { 691 case 'c': /* Use copy */ 692 restoreCommandType = RESTORE_COMMAND_COPY; 693 break; 694 case 'd': /* Debug mode */ 695 debug = true; 696 break; 697 case 'k': /* keepfiles */ 698 keepfiles = atoi(optarg); 699 if (keepfiles < 0) 700 { 701 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname); 702 exit(2); 703 } 704 break; 705 case 'l': /* Use link */ 706 707 /* 708 * Link feature disabled, possibly permanently. Linking causes 709 * a problem after recovery ends that is not currently 710 * resolved by PostgreSQL. 25 Jun 2009 711 */ 712 #ifdef NOT_USED 713 restoreCommandType = RESTORE_COMMAND_LINK; 714 #endif 715 break; 716 case 'r': /* Retries */ 717 maxretries = atoi(optarg); 718 if (maxretries < 0) 719 { 720 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname); 721 exit(2); 722 } 723 break; 724 case 's': /* Sleep time */ 725 sleeptime = atoi(optarg); 726 if (sleeptime <= 0 || sleeptime > 60) 727 { 728 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname); 729 exit(2); 730 } 731 break; 732 case 't': /* Trigger file */ 733 triggerPath = pg_strdup(optarg); 734 break; 735 case 'w': /* Max wait time */ 736 maxwaittime = atoi(optarg); 737 if (maxwaittime < 0) 738 { 739 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname); 740 exit(2); 741 } 742 break; 743 default: 744 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); 745 exit(2); 746 break; 747 } 748 } 749 750 /* 751 * Parameter checking - after checking to see if trigger file present 752 */ 753 if (argc == 1) 754 { 755 fprintf(stderr, "%s: not enough command-line arguments\n", progname); 756 exit(2); 757 } 758 759 /* 760 * We will go to the archiveLocation to get nextWALFileName. 761 * nextWALFileName may not exist yet, which would not be an error, so we 762 * separate the archiveLocation and nextWALFileName so we can check 763 * separately whether archiveLocation exists, if not that is an error 764 */ 765 if (optind < argc) 766 { 767 archiveLocation = argv[optind]; 768 optind++; 769 } 770 else 771 { 772 fprintf(stderr, "%s: must specify archive location\n", progname); 773 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); 774 exit(2); 775 } 776 777 if (optind < argc) 778 { 779 nextWALFileName = argv[optind]; 780 optind++; 781 } 782 else 783 { 784 fprintf(stderr, "%s: must specify WAL file name as second non-option argument (use \"%%f\")\n", progname); 785 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); 786 exit(2); 787 } 788 789 if (optind < argc) 790 { 791 xlogFilePath = argv[optind]; 792 optind++; 793 } 794 else 795 { 796 fprintf(stderr, "%s: must specify xlog destination as third non-option argument (use \"%%p\")\n", progname); 797 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname); 798 exit(2); 799 } 800 801 if (optind < argc) 802 { 803 restartWALFileName = argv[optind]; 804 optind++; 805 } 806 807 CustomizableInitialize(); 808 809 if (debug) 810 { 811 fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : "<not set>"); 812 fprintf(stderr, "Waiting for WAL file: %s\n", nextWALFileName); 813 fprintf(stderr, "WAL file path: %s\n", WALFilePath); 814 fprintf(stderr, "Restoring to: %s\n", xlogFilePath); 815 fprintf(stderr, "Sleep interval: %d second%s\n", 816 sleeptime, (sleeptime > 1 ? "s" : " ")); 817 fprintf(stderr, "Max wait interval: %d %s\n", 818 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); 819 fprintf(stderr, "Command for restore: %s\n", restoreCommand); 820 fflush(stderr); 821 } 822 823 /* 824 * Check for initial history file: always the first file to be requested 825 * It's OK if the file isn't there - all other files need to wait 826 */ 827 if (IsTLHistoryFileName(nextWALFileName)) 828 { 829 nextWALFileType = XLOG_HISTORY; 830 if (RestoreWALFileForRecovery()) 831 exit(0); 832 else 833 { 834 if (debug) 835 { 836 fprintf(stderr, "history file not found\n"); 837 fflush(stderr); 838 } 839 exit(1); 840 } 841 } 842 843 /* 844 * Main wait loop 845 */ 846 for (;;) 847 { 848 /* Check for trigger file or signal first */ 849 CheckForExternalTrigger(); 850 #ifndef WIN32 851 if (signaled) 852 { 853 Failover = FastFailover; 854 if (debug) 855 { 856 fprintf(stderr, "signaled to exit: fast failover\n"); 857 fflush(stderr); 858 } 859 } 860 #endif 861 862 /* 863 * Check for fast failover immediately, before checking if the 864 * requested WAL file is available 865 */ 866 if (Failover == FastFailover) 867 exit(1); 868 869 if (CustomizableNextWALFileReady()) 870 { 871 /* 872 * Once we have restored this file successfully we can remove some 873 * prior WAL files. If this restore fails we mustn't remove any 874 * file because some of them will be requested again immediately 875 * after the failed restore, or when we restart recovery. 876 */ 877 if (RestoreWALFileForRecovery()) 878 { 879 if (need_cleanup) 880 CustomizableCleanupPriorWALFiles(); 881 882 exit(0); 883 } 884 else 885 { 886 /* Something went wrong in copying the file */ 887 exit(1); 888 } 889 } 890 891 /* Check for smart failover if the next WAL file was not available */ 892 if (Failover == SmartFailover) 893 exit(1); 894 895 if (sleeptime <= 60) 896 pg_usleep(sleeptime * 1000000L); 897 898 waittime += sleeptime; 899 if (waittime >= maxwaittime && maxwaittime > 0) 900 { 901 Failover = FastFailover; 902 if (debug) 903 { 904 fprintf(stderr, "Timed out after %d seconds: fast failover\n", 905 waittime); 906 fflush(stderr); 907 } 908 } 909 if (debug) 910 { 911 fprintf(stderr, "WAL file not present yet."); 912 if (triggerPath) 913 fprintf(stderr, " Checking for trigger file..."); 914 fprintf(stderr, "\n"); 915 fflush(stderr); 916 } 917 } 918 } 919