1 /*-------------------------------------------------------------------------
2 *
3 * pg_resetwal.c
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
6 *
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
10 * current format.
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
20 * step 2 ...
21 *
22 *
23 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 * src/bin/pg_resetwal/pg_resetwal.c
27 *
28 *-------------------------------------------------------------------------
29 */
30
31 /*
32 * We have to use postgres.h not postgres_fe.h here, because there's so much
33 * backend-only stuff in the XLOG include files we need. But we need a
34 * frontend-ish environment otherwise. Hence this ugly hack.
35 */
36 #define FRONTEND 1
37
38 #include "postgres.h"
39
40 #include <dirent.h>
41 #include <fcntl.h>
42 #include <sys/stat.h>
43 #include <sys/time.h>
44 #include <time.h>
45 #include <unistd.h>
46
47 #include "access/heaptoast.h"
48 #include "access/multixact.h"
49 #include "access/transam.h"
50 #include "access/xlog.h"
51 #include "access/xlog_internal.h"
52 #include "common/controldata_utils.h"
53 #include "common/fe_memutils.h"
54 #include "common/file_perm.h"
55 #include "common/logging.h"
56 #include "common/restricted_token.h"
57 #include "common/string.h"
58 #include "getopt_long.h"
59 #include "pg_getopt.h"
60 #include "storage/large_object.h"
61
62 static ControlFileData ControlFile; /* pg_control values */
63 static XLogSegNo newXlogSegNo; /* new XLOG segment # */
64 static bool guessed = false; /* T if we had to guess at any values */
65 static const char *progname;
66 static uint32 set_xid_epoch = (uint32) -1;
67 static TransactionId set_oldest_xid = 0;
68 static TransactionId set_xid = 0;
69 static TransactionId set_oldest_commit_ts_xid = 0;
70 static TransactionId set_newest_commit_ts_xid = 0;
71 static Oid set_oid = 0;
72 static MultiXactId set_mxid = 0;
73 static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
74 static uint32 minXlogTli = 0;
75 static XLogSegNo minXlogSegNo = 0;
76 static int WalSegSz;
77 static int set_wal_segsize;
78
79 static void CheckDataVersion(void);
80 static bool read_controlfile(void);
81 static void GuessControlValues(void);
82 static void PrintControlValues(bool guessed);
83 static void PrintNewControlValues(void);
84 static void RewriteControlFile(void);
85 static void FindEndOfXLOG(void);
86 static void KillExistingXLOG(void);
87 static void KillExistingArchiveStatus(void);
88 static void WriteEmptyXLOG(void);
89 static void usage(void);
90
91
92 int
main(int argc,char * argv[])93 main(int argc, char *argv[])
94 {
95 static struct option long_options[] = {
96 {"commit-timestamp-ids", required_argument, NULL, 'c'},
97 {"pgdata", required_argument, NULL, 'D'},
98 {"epoch", required_argument, NULL, 'e'},
99 {"force", no_argument, NULL, 'f'},
100 {"next-wal-file", required_argument, NULL, 'l'},
101 {"multixact-ids", required_argument, NULL, 'm'},
102 {"dry-run", no_argument, NULL, 'n'},
103 {"next-oid", required_argument, NULL, 'o'},
104 {"multixact-offset", required_argument, NULL, 'O'},
105 {"oldest-transaction-id", required_argument, NULL, 'u'},
106 {"next-transaction-id", required_argument, NULL, 'x'},
107 {"wal-segsize", required_argument, NULL, 1},
108 {NULL, 0, NULL, 0}
109 };
110
111 int c;
112 bool force = false;
113 bool noupdate = false;
114 MultiXactId set_oldestmxid = 0;
115 char *endptr;
116 char *endptr2;
117 char *DataDir = NULL;
118 char *log_fname = NULL;
119 int fd;
120
121 pg_logging_init(argv[0]);
122 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal"));
123 progname = get_progname(argv[0]);
124
125 if (argc > 1)
126 {
127 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
128 {
129 usage();
130 exit(0);
131 }
132 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
133 {
134 puts("pg_resetwal (PostgreSQL) " PG_VERSION);
135 exit(0);
136 }
137 }
138
139
140 while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1)
141 {
142 switch (c)
143 {
144 case 'D':
145 DataDir = optarg;
146 break;
147
148 case 'f':
149 force = true;
150 break;
151
152 case 'n':
153 noupdate = true;
154 break;
155
156 case 'e':
157 set_xid_epoch = strtoul(optarg, &endptr, 0);
158 if (endptr == optarg || *endptr != '\0')
159 {
160 /*------
161 translator: the second %s is a command line argument (-e, etc) */
162 pg_log_error("invalid argument for option %s", "-e");
163 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
164 exit(1);
165 }
166 if (set_xid_epoch == -1)
167 {
168 pg_log_error("transaction ID epoch (-e) must not be -1");
169 exit(1);
170 }
171 break;
172
173 case 'u':
174 set_oldest_xid = strtoul(optarg, &endptr, 0);
175 if (endptr == optarg || *endptr != '\0')
176 {
177 pg_log_error("invalid argument for option %s", "-u");
178 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
179 exit(1);
180 }
181 if (!TransactionIdIsNormal(set_oldest_xid))
182 {
183 pg_log_error("oldest transaction ID (-u) must be greater than or equal to %u", FirstNormalTransactionId);
184 exit(1);
185 }
186 break;
187
188 case 'x':
189 set_xid = strtoul(optarg, &endptr, 0);
190 if (endptr == optarg || *endptr != '\0')
191 {
192 pg_log_error("invalid argument for option %s", "-x");
193 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
194 exit(1);
195 }
196 if (!TransactionIdIsNormal(set_xid))
197 {
198 pg_log_error("transaction ID (-x) must be greater than or equal to %u", FirstNormalTransactionId);
199 exit(1);
200 }
201 break;
202
203 case 'c':
204 set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
205 if (endptr == optarg || *endptr != ',')
206 {
207 pg_log_error("invalid argument for option %s", "-c");
208 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
209 exit(1);
210 }
211 set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
212 if (endptr2 == endptr + 1 || *endptr2 != '\0')
213 {
214 pg_log_error("invalid argument for option %s", "-c");
215 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
216 exit(1);
217 }
218
219 if (set_oldest_commit_ts_xid < 2 &&
220 set_oldest_commit_ts_xid != 0)
221 {
222 pg_log_error("transaction ID (-c) must be either 0 or greater than or equal to 2");
223 exit(1);
224 }
225
226 if (set_newest_commit_ts_xid < 2 &&
227 set_newest_commit_ts_xid != 0)
228 {
229 pg_log_error("transaction ID (-c) must be either 0 or greater than or equal to 2");
230 exit(1);
231 }
232 break;
233
234 case 'o':
235 set_oid = strtoul(optarg, &endptr, 0);
236 if (endptr == optarg || *endptr != '\0')
237 {
238 pg_log_error("invalid argument for option %s", "-o");
239 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
240 exit(1);
241 }
242 if (set_oid == 0)
243 {
244 pg_log_error("OID (-o) must not be 0");
245 exit(1);
246 }
247 break;
248
249 case 'm':
250 set_mxid = strtoul(optarg, &endptr, 0);
251 if (endptr == optarg || *endptr != ',')
252 {
253 pg_log_error("invalid argument for option %s", "-m");
254 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
255 exit(1);
256 }
257
258 set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
259 if (endptr2 == endptr + 1 || *endptr2 != '\0')
260 {
261 pg_log_error("invalid argument for option %s", "-m");
262 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
263 exit(1);
264 }
265 if (set_mxid == 0)
266 {
267 pg_log_error("multitransaction ID (-m) must not be 0");
268 exit(1);
269 }
270
271 /*
272 * XXX It'd be nice to have more sanity checks here, e.g. so
273 * that oldest is not wrapped around w.r.t. nextMulti.
274 */
275 if (set_oldestmxid == 0)
276 {
277 pg_log_error("oldest multitransaction ID (-m) must not be 0");
278 exit(1);
279 }
280 break;
281
282 case 'O':
283 set_mxoff = strtoul(optarg, &endptr, 0);
284 if (endptr == optarg || *endptr != '\0')
285 {
286 pg_log_error("invalid argument for option %s", "-O");
287 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
288 exit(1);
289 }
290 if (set_mxoff == -1)
291 {
292 pg_log_error("multitransaction offset (-O) must not be -1");
293 exit(1);
294 }
295 break;
296
297 case 'l':
298 if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
299 {
300 pg_log_error("invalid argument for option %s", "-l");
301 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
302 exit(1);
303 }
304
305 /*
306 * XLogFromFileName requires wal segment size which is not yet
307 * set. Hence wal details are set later on.
308 */
309 log_fname = pg_strdup(optarg);
310 break;
311
312 case 1:
313 set_wal_segsize = strtol(optarg, &endptr, 10) * 1024 * 1024;
314 if (endptr == optarg || *endptr != '\0')
315 {
316 pg_log_error("argument of --wal-segsize must be a number");
317 exit(1);
318 }
319 if (!IsValidWalSegSize(set_wal_segsize))
320 {
321 pg_log_error("argument of --wal-segsize must be a power of 2 between 1 and 1024");
322 exit(1);
323 }
324 break;
325
326 default:
327 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
328 exit(1);
329 }
330 }
331
332 if (DataDir == NULL && optind < argc)
333 DataDir = argv[optind++];
334
335 /* Complain if any arguments remain */
336 if (optind < argc)
337 {
338 pg_log_error("too many command-line arguments (first is \"%s\")",
339 argv[optind]);
340 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
341 progname);
342 exit(1);
343 }
344
345 if (DataDir == NULL)
346 {
347 pg_log_error("no data directory specified");
348 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
349 exit(1);
350 }
351
352 /*
353 * Don't allow pg_resetwal to be run as root, to avoid overwriting the
354 * ownership of files in the data directory. We need only check for root
355 * -- any other user won't have sufficient permissions to modify files in
356 * the data directory.
357 */
358 #ifndef WIN32
359 if (geteuid() == 0)
360 {
361 pg_log_error("cannot be executed by \"root\"");
362 pg_log_info("You must run %s as the PostgreSQL superuser.",
363 progname);
364 exit(1);
365 }
366 #endif
367
368 get_restricted_token();
369
370 /* Set mask based on PGDATA permissions */
371 if (!GetDataDirectoryCreatePerm(DataDir))
372 {
373 pg_log_error("could not read permissions of directory \"%s\": %m",
374 DataDir);
375 exit(1);
376 }
377
378 umask(pg_mode_mask);
379
380 if (chdir(DataDir) < 0)
381 {
382 pg_log_error("could not change directory to \"%s\": %m",
383 DataDir);
384 exit(1);
385 }
386
387 /* Check that data directory matches our server version */
388 CheckDataVersion();
389
390 /*
391 * Check for a postmaster lock file --- if there is one, refuse to
392 * proceed, on grounds we might be interfering with a live installation.
393 */
394 if ((fd = open("postmaster.pid", O_RDONLY, 0)) < 0)
395 {
396 if (errno != ENOENT)
397 {
398 pg_log_error("could not open file \"%s\" for reading: %m",
399 "postmaster.pid");
400 exit(1);
401 }
402 }
403 else
404 {
405 pg_log_error("lock file \"%s\" exists", "postmaster.pid");
406 pg_log_info("Is a server running? If not, delete the lock file and try again.");
407 exit(1);
408 }
409
410 /*
411 * Attempt to read the existing pg_control file
412 */
413 if (!read_controlfile())
414 GuessControlValues();
415
416 /*
417 * If no new WAL segment size was specified, use the control file value.
418 */
419 if (set_wal_segsize != 0)
420 WalSegSz = set_wal_segsize;
421 else
422 WalSegSz = ControlFile.xlog_seg_size;
423
424 if (log_fname != NULL)
425 XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz);
426
427 /*
428 * Also look at existing segment files to set up newXlogSegNo
429 */
430 FindEndOfXLOG();
431
432 /*
433 * If we're not going to proceed with the reset, print the current control
434 * file parameters.
435 */
436 if ((guessed && !force) || noupdate)
437 PrintControlValues(guessed);
438
439 /*
440 * Adjust fields if required by switches. (Do this now so that printout,
441 * if any, includes these values.)
442 */
443 if (set_xid_epoch != -1)
444 ControlFile.checkPointCopy.nextXid =
445 FullTransactionIdFromEpochAndXid(set_xid_epoch,
446 XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
447
448 if (set_oldest_xid != 0)
449 {
450 ControlFile.checkPointCopy.oldestXid = set_oldest_xid;
451 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
452 }
453
454 if (set_xid != 0)
455 ControlFile.checkPointCopy.nextXid =
456 FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
457 set_xid);
458
459 if (set_oldest_commit_ts_xid != 0)
460 ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
461 if (set_newest_commit_ts_xid != 0)
462 ControlFile.checkPointCopy.newestCommitTsXid = set_newest_commit_ts_xid;
463
464 if (set_oid != 0)
465 ControlFile.checkPointCopy.nextOid = set_oid;
466
467 if (set_mxid != 0)
468 {
469 ControlFile.checkPointCopy.nextMulti = set_mxid;
470
471 ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
472 if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
473 ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
474 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
475 }
476
477 if (set_mxoff != -1)
478 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
479
480 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
481 {
482 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
483 ControlFile.checkPointCopy.PrevTimeLineID = minXlogTli;
484 }
485
486 if (set_wal_segsize != 0)
487 ControlFile.xlog_seg_size = WalSegSz;
488
489 if (minXlogSegNo > newXlogSegNo)
490 newXlogSegNo = minXlogSegNo;
491
492 /*
493 * If we had to guess anything, and -f was not given, just print the
494 * guessed values and exit. Also print if -n is given.
495 */
496 if ((guessed && !force) || noupdate)
497 {
498 PrintNewControlValues();
499 if (!noupdate)
500 {
501 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
502 exit(1);
503 }
504 else
505 exit(0);
506 }
507
508 /*
509 * Don't reset from a dirty pg_control without -f, either.
510 */
511 if (ControlFile.state != DB_SHUTDOWNED && !force)
512 {
513 printf(_("The database server was not shut down cleanly.\n"
514 "Resetting the write-ahead log might cause data to be lost.\n"
515 "If you want to proceed anyway, use -f to force reset.\n"));
516 exit(1);
517 }
518
519 /*
520 * Else, do the dirty deed.
521 */
522 RewriteControlFile();
523 KillExistingXLOG();
524 KillExistingArchiveStatus();
525 WriteEmptyXLOG();
526
527 printf(_("Write-ahead log reset\n"));
528 return 0;
529 }
530
531
532 /*
533 * Look at the version string stored in PG_VERSION and decide if this utility
534 * can be run safely or not.
535 *
536 * We don't want to inject pg_control and WAL files that are for a different
537 * major version; that can't do anything good. Note that we don't treat
538 * mismatching version info in pg_control as a reason to bail out, because
539 * recovering from a corrupted pg_control is one of the main reasons for this
540 * program to exist at all. However, PG_VERSION is unlikely to get corrupted,
541 * and if it were it would be easy to fix by hand. So let's make this check
542 * to prevent simple user errors.
543 */
544 static void
CheckDataVersion(void)545 CheckDataVersion(void)
546 {
547 const char *ver_file = "PG_VERSION";
548 FILE *ver_fd;
549 char rawline[64];
550
551 if ((ver_fd = fopen(ver_file, "r")) == NULL)
552 {
553 pg_log_error("could not open file \"%s\" for reading: %m",
554 ver_file);
555 exit(1);
556 }
557
558 /* version number has to be the first line read */
559 if (!fgets(rawline, sizeof(rawline), ver_fd))
560 {
561 if (!ferror(ver_fd))
562 pg_log_error("unexpected empty file \"%s\"", ver_file);
563 else
564 pg_log_error("could not read file \"%s\": %m", ver_file);
565 exit(1);
566 }
567
568 /* strip trailing newline and carriage return */
569 (void) pg_strip_crlf(rawline);
570
571 if (strcmp(rawline, PG_MAJORVERSION) != 0)
572 {
573 pg_log_error("data directory is of wrong version");
574 pg_log_info("File \"%s\" contains \"%s\", which is not compatible with this program's version \"%s\".",
575 ver_file, rawline, PG_MAJORVERSION);
576 exit(1);
577 }
578
579 fclose(ver_fd);
580 }
581
582
583 /*
584 * Try to read the existing pg_control file.
585 *
586 * This routine is also responsible for updating old pg_control versions
587 * to the current format. (Currently we don't do anything of the sort.)
588 */
589 static bool
read_controlfile(void)590 read_controlfile(void)
591 {
592 int fd;
593 int len;
594 char *buffer;
595 pg_crc32c crc;
596
597 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
598 {
599 /*
600 * If pg_control is not there at all, or we can't read it, the odds
601 * are we've been handed a bad DataDir path, so give up. User can do
602 * "touch pg_control" to force us to proceed.
603 */
604 pg_log_error("could not open file \"%s\" for reading: %m",
605 XLOG_CONTROL_FILE);
606 if (errno == ENOENT)
607 pg_log_info("If you are sure the data directory path is correct, execute\n"
608 " touch %s\n"
609 "and try again.",
610 XLOG_CONTROL_FILE);
611 exit(1);
612 }
613
614 /* Use malloc to ensure we have a maxaligned buffer */
615 buffer = (char *) pg_malloc(PG_CONTROL_FILE_SIZE);
616
617 len = read(fd, buffer, PG_CONTROL_FILE_SIZE);
618 if (len < 0)
619 {
620 pg_log_error("could not read file \"%s\": %m", XLOG_CONTROL_FILE);
621 exit(1);
622 }
623 close(fd);
624
625 if (len >= sizeof(ControlFileData) &&
626 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
627 {
628 /* Check the CRC. */
629 INIT_CRC32C(crc);
630 COMP_CRC32C(crc,
631 buffer,
632 offsetof(ControlFileData, crc));
633 FIN_CRC32C(crc);
634
635 if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc))
636 {
637 /* We will use the data but treat it as guessed. */
638 pg_log_warning("pg_control exists but has invalid CRC; proceed with caution");
639 guessed = true;
640 }
641
642 memcpy(&ControlFile, buffer, sizeof(ControlFile));
643
644 /* return false if WAL segment size is not valid */
645 if (!IsValidWalSegSize(ControlFile.xlog_seg_size))
646 {
647 pg_log_warning(ngettext("pg_control specifies invalid WAL segment size (%d byte); proceed with caution",
648 "pg_control specifies invalid WAL segment size (%d bytes); proceed with caution",
649 ControlFile.xlog_seg_size),
650 ControlFile.xlog_seg_size);
651 return false;
652 }
653
654 return true;
655 }
656
657 /* Looks like it's a mess. */
658 pg_log_warning("pg_control exists but is broken or wrong version; ignoring it");
659 return false;
660 }
661
662
663 /*
664 * Guess at pg_control values when we can't read the old ones.
665 */
666 static void
GuessControlValues(void)667 GuessControlValues(void)
668 {
669 uint64 sysidentifier;
670 struct timeval tv;
671
672 /*
673 * Set up a completely default set of pg_control values.
674 */
675 guessed = true;
676 memset(&ControlFile, 0, sizeof(ControlFile));
677
678 ControlFile.pg_control_version = PG_CONTROL_VERSION;
679 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
680
681 /*
682 * Create a new unique installation identifier, since we can no longer use
683 * any old XLOG records. See notes in xlog.c about the algorithm.
684 */
685 gettimeofday(&tv, NULL);
686 sysidentifier = ((uint64) tv.tv_sec) << 32;
687 sysidentifier |= ((uint64) tv.tv_usec) << 12;
688 sysidentifier |= getpid() & 0xFFF;
689
690 ControlFile.system_identifier = sysidentifier;
691
692 ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD;
693 ControlFile.checkPointCopy.ThisTimeLineID = 1;
694 ControlFile.checkPointCopy.PrevTimeLineID = 1;
695 ControlFile.checkPointCopy.fullPageWrites = false;
696 ControlFile.checkPointCopy.nextXid =
697 FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
698 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
699 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
700 ControlFile.checkPointCopy.nextMultiOffset = 0;
701 ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
702 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
703 ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
704 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
705 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
706 ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
707
708 ControlFile.state = DB_SHUTDOWNED;
709 ControlFile.time = (pg_time_t) time(NULL);
710 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
711 ControlFile.unloggedLSN = FirstNormalUnloggedLSN;
712
713 /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
714
715 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
716 ControlFile.wal_log_hints = false;
717 ControlFile.track_commit_timestamp = false;
718 ControlFile.MaxConnections = 100;
719 ControlFile.max_wal_senders = 10;
720 ControlFile.max_worker_processes = 8;
721 ControlFile.max_prepared_xacts = 0;
722 ControlFile.max_locks_per_xact = 64;
723
724 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
725 ControlFile.floatFormat = FLOATFORMAT_VALUE;
726 ControlFile.blcksz = BLCKSZ;
727 ControlFile.relseg_size = RELSEG_SIZE;
728 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
729 ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE;
730 ControlFile.nameDataLen = NAMEDATALEN;
731 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
732 ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
733 ControlFile.loblksize = LOBLKSIZE;
734 ControlFile.float8ByVal = FLOAT8PASSBYVAL;
735
736 /*
737 * XXX eventually, should try to grovel through old XLOG to develop more
738 * accurate values for TimeLineID, nextXID, etc.
739 */
740 }
741
742
743 /*
744 * Print the guessed pg_control values when we had to guess.
745 *
746 * NB: this display should be just those fields that will not be
747 * reset by RewriteControlFile().
748 */
749 static void
PrintControlValues(bool guessed)750 PrintControlValues(bool guessed)
751 {
752 if (guessed)
753 printf(_("Guessed pg_control values:\n\n"));
754 else
755 printf(_("Current pg_control values:\n\n"));
756
757 printf(_("pg_control version number: %u\n"),
758 ControlFile.pg_control_version);
759 printf(_("Catalog version number: %u\n"),
760 ControlFile.catalog_version_no);
761 printf(_("Database system identifier: %llu\n"),
762 (unsigned long long) ControlFile.system_identifier);
763 printf(_("Latest checkpoint's TimeLineID: %u\n"),
764 ControlFile.checkPointCopy.ThisTimeLineID);
765 printf(_("Latest checkpoint's full_page_writes: %s\n"),
766 ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
767 printf(_("Latest checkpoint's NextXID: %u:%u\n"),
768 EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
769 XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
770 printf(_("Latest checkpoint's NextOID: %u\n"),
771 ControlFile.checkPointCopy.nextOid);
772 printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
773 ControlFile.checkPointCopy.nextMulti);
774 printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
775 ControlFile.checkPointCopy.nextMultiOffset);
776 printf(_("Latest checkpoint's oldestXID: %u\n"),
777 ControlFile.checkPointCopy.oldestXid);
778 printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
779 ControlFile.checkPointCopy.oldestXidDB);
780 printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
781 ControlFile.checkPointCopy.oldestActiveXid);
782 printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
783 ControlFile.checkPointCopy.oldestMulti);
784 printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
785 ControlFile.checkPointCopy.oldestMultiDB);
786 printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"),
787 ControlFile.checkPointCopy.oldestCommitTsXid);
788 printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
789 ControlFile.checkPointCopy.newestCommitTsXid);
790 printf(_("Maximum data alignment: %u\n"),
791 ControlFile.maxAlign);
792 /* we don't print floatFormat since can't say much useful about it */
793 printf(_("Database block size: %u\n"),
794 ControlFile.blcksz);
795 printf(_("Blocks per segment of large relation: %u\n"),
796 ControlFile.relseg_size);
797 printf(_("WAL block size: %u\n"),
798 ControlFile.xlog_blcksz);
799 printf(_("Bytes per WAL segment: %u\n"),
800 ControlFile.xlog_seg_size);
801 printf(_("Maximum length of identifiers: %u\n"),
802 ControlFile.nameDataLen);
803 printf(_("Maximum columns in an index: %u\n"),
804 ControlFile.indexMaxKeys);
805 printf(_("Maximum size of a TOAST chunk: %u\n"),
806 ControlFile.toast_max_chunk_size);
807 printf(_("Size of a large-object chunk: %u\n"),
808 ControlFile.loblksize);
809 /* This is no longer configurable, but users may still expect to see it: */
810 printf(_("Date/time type storage: %s\n"),
811 _("64-bit integers"));
812 printf(_("Float8 argument passing: %s\n"),
813 (ControlFile.float8ByVal ? _("by value") : _("by reference")));
814 printf(_("Data page checksum version: %u\n"),
815 ControlFile.data_checksum_version);
816 }
817
818
819 /*
820 * Print the values to be changed.
821 */
822 static void
PrintNewControlValues(void)823 PrintNewControlValues(void)
824 {
825 char fname[MAXFNAMELEN];
826
827 /* This will be always printed in order to keep format same. */
828 printf(_("\n\nValues to be changed:\n\n"));
829
830 XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID,
831 newXlogSegNo, WalSegSz);
832 printf(_("First log segment after reset: %s\n"), fname);
833
834 if (set_mxid != 0)
835 {
836 printf(_("NextMultiXactId: %u\n"),
837 ControlFile.checkPointCopy.nextMulti);
838 printf(_("OldestMultiXid: %u\n"),
839 ControlFile.checkPointCopy.oldestMulti);
840 printf(_("OldestMulti's DB: %u\n"),
841 ControlFile.checkPointCopy.oldestMultiDB);
842 }
843
844 if (set_mxoff != -1)
845 {
846 printf(_("NextMultiOffset: %u\n"),
847 ControlFile.checkPointCopy.nextMultiOffset);
848 }
849
850 if (set_oid != 0)
851 {
852 printf(_("NextOID: %u\n"),
853 ControlFile.checkPointCopy.nextOid);
854 }
855
856 if (set_xid != 0)
857 {
858 printf(_("NextXID: %u\n"),
859 XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
860 printf(_("OldestXID: %u\n"),
861 ControlFile.checkPointCopy.oldestXid);
862 printf(_("OldestXID's DB: %u\n"),
863 ControlFile.checkPointCopy.oldestXidDB);
864 }
865
866 if (set_xid_epoch != -1)
867 {
868 printf(_("NextXID epoch: %u\n"),
869 EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
870 }
871
872 if (set_oldest_commit_ts_xid != 0)
873 {
874 printf(_("oldestCommitTsXid: %u\n"),
875 ControlFile.checkPointCopy.oldestCommitTsXid);
876 }
877 if (set_newest_commit_ts_xid != 0)
878 {
879 printf(_("newestCommitTsXid: %u\n"),
880 ControlFile.checkPointCopy.newestCommitTsXid);
881 }
882
883 if (set_wal_segsize != 0)
884 {
885 printf(_("Bytes per WAL segment: %u\n"),
886 ControlFile.xlog_seg_size);
887 }
888 }
889
890
891 /*
892 * Write out the new pg_control file.
893 */
894 static void
RewriteControlFile(void)895 RewriteControlFile(void)
896 {
897 /*
898 * Adjust fields as needed to force an empty XLOG starting at
899 * newXlogSegNo.
900 */
901 XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, WalSegSz,
902 ControlFile.checkPointCopy.redo);
903 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
904
905 ControlFile.state = DB_SHUTDOWNED;
906 ControlFile.time = (pg_time_t) time(NULL);
907 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
908 ControlFile.minRecoveryPoint = 0;
909 ControlFile.minRecoveryPointTLI = 0;
910 ControlFile.backupStartPoint = 0;
911 ControlFile.backupEndPoint = 0;
912 ControlFile.backupEndRequired = false;
913
914 /*
915 * Force the defaults for max_* settings. The values don't really matter
916 * as long as wal_level='minimal'; the postmaster will reset these fields
917 * anyway at startup.
918 */
919 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
920 ControlFile.wal_log_hints = false;
921 ControlFile.track_commit_timestamp = false;
922 ControlFile.MaxConnections = 100;
923 ControlFile.max_wal_senders = 10;
924 ControlFile.max_worker_processes = 8;
925 ControlFile.max_prepared_xacts = 0;
926 ControlFile.max_locks_per_xact = 64;
927
928 /* The control file gets flushed here. */
929 update_controlfile(".", &ControlFile, true);
930 }
931
932
933 /*
934 * Scan existing XLOG files and determine the highest existing WAL address
935 *
936 * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
937 * are assumed valid (note that we allow the old xlog seg size to differ
938 * from what we're using). On exit, newXlogSegNo is set to suitable
939 * value for the beginning of replacement WAL (in our seg size).
940 */
941 static void
FindEndOfXLOG(void)942 FindEndOfXLOG(void)
943 {
944 DIR *xldir;
945 struct dirent *xlde;
946 uint64 segs_per_xlogid;
947 uint64 xlogbytepos;
948
949 /*
950 * Initialize the max() computation using the last checkpoint address from
951 * old pg_control. Note that for the moment we are working with segment
952 * numbering according to the old xlog seg size.
953 */
954 segs_per_xlogid = (UINT64CONST(0x0000000100000000) / ControlFile.xlog_seg_size);
955 newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
956
957 /*
958 * Scan the pg_wal directory to find existing WAL segment files. We assume
959 * any present have been used; in most scenarios this should be
960 * conservative, because of xlog.c's attempts to pre-create files.
961 */
962 xldir = opendir(XLOGDIR);
963 if (xldir == NULL)
964 {
965 pg_log_error("could not open directory \"%s\": %m", XLOGDIR);
966 exit(1);
967 }
968
969 while (errno = 0, (xlde = readdir(xldir)) != NULL)
970 {
971 if (IsXLogFileName(xlde->d_name) ||
972 IsPartialXLogFileName(xlde->d_name))
973 {
974 unsigned int tli,
975 log,
976 seg;
977 XLogSegNo segno;
978
979 /*
980 * Note: We don't use XLogFromFileName here, because we want to
981 * use the segment size from the control file, not the size the
982 * pg_resetwal binary was compiled with
983 */
984 sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
985 segno = ((uint64) log) * segs_per_xlogid + seg;
986
987 /*
988 * Note: we take the max of all files found, regardless of their
989 * timelines. Another possibility would be to ignore files of
990 * timelines other than the target TLI, but this seems safer.
991 * Better too large a result than too small...
992 */
993 if (segno > newXlogSegNo)
994 newXlogSegNo = segno;
995 }
996 }
997
998 if (errno)
999 {
1000 pg_log_error("could not read directory \"%s\": %m", XLOGDIR);
1001 exit(1);
1002 }
1003
1004 if (closedir(xldir))
1005 {
1006 pg_log_error("could not close directory \"%s\": %m", XLOGDIR);
1007 exit(1);
1008 }
1009
1010 /*
1011 * Finally, convert to new xlog seg size, and advance by one to ensure we
1012 * are in virgin territory.
1013 */
1014 xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
1015 newXlogSegNo = (xlogbytepos + ControlFile.xlog_seg_size - 1) / WalSegSz;
1016 newXlogSegNo++;
1017 }
1018
1019
1020 /*
1021 * Remove existing XLOG files
1022 */
1023 static void
KillExistingXLOG(void)1024 KillExistingXLOG(void)
1025 {
1026 DIR *xldir;
1027 struct dirent *xlde;
1028 char path[MAXPGPATH + sizeof(XLOGDIR)];
1029
1030 xldir = opendir(XLOGDIR);
1031 if (xldir == NULL)
1032 {
1033 pg_log_error("could not open directory \"%s\": %m", XLOGDIR);
1034 exit(1);
1035 }
1036
1037 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1038 {
1039 if (IsXLogFileName(xlde->d_name) ||
1040 IsPartialXLogFileName(xlde->d_name))
1041 {
1042 snprintf(path, sizeof(path), "%s/%s", XLOGDIR, xlde->d_name);
1043 if (unlink(path) < 0)
1044 {
1045 pg_log_error("could not delete file \"%s\": %m", path);
1046 exit(1);
1047 }
1048 }
1049 }
1050
1051 if (errno)
1052 {
1053 pg_log_error("could not read directory \"%s\": %m", XLOGDIR);
1054 exit(1);
1055 }
1056
1057 if (closedir(xldir))
1058 {
1059 pg_log_error("could not close directory \"%s\": %m", XLOGDIR);
1060 exit(1);
1061 }
1062 }
1063
1064
1065 /*
1066 * Remove existing archive status files
1067 */
1068 static void
KillExistingArchiveStatus(void)1069 KillExistingArchiveStatus(void)
1070 {
1071 #define ARCHSTATDIR XLOGDIR "/archive_status"
1072
1073 DIR *xldir;
1074 struct dirent *xlde;
1075 char path[MAXPGPATH + sizeof(ARCHSTATDIR)];
1076
1077 xldir = opendir(ARCHSTATDIR);
1078 if (xldir == NULL)
1079 {
1080 pg_log_error("could not open directory \"%s\": %m", ARCHSTATDIR);
1081 exit(1);
1082 }
1083
1084 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1085 {
1086 if (strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_FNAME_LEN &&
1087 (strcmp(xlde->d_name + XLOG_FNAME_LEN, ".ready") == 0 ||
1088 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".done") == 0 ||
1089 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.ready") == 0 ||
1090 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.done") == 0))
1091 {
1092 snprintf(path, sizeof(path), "%s/%s", ARCHSTATDIR, xlde->d_name);
1093 if (unlink(path) < 0)
1094 {
1095 pg_log_error("could not delete file \"%s\": %m", path);
1096 exit(1);
1097 }
1098 }
1099 }
1100
1101 if (errno)
1102 {
1103 pg_log_error("could not read directory \"%s\": %m", ARCHSTATDIR);
1104 exit(1);
1105 }
1106
1107 if (closedir(xldir))
1108 {
1109 pg_log_error("could not close directory \"%s\": %m", ARCHSTATDIR);
1110 exit(1);
1111 }
1112 }
1113
1114
1115 /*
1116 * Write an empty XLOG file, containing only the checkpoint record
1117 * already set up in ControlFile.
1118 */
1119 static void
WriteEmptyXLOG(void)1120 WriteEmptyXLOG(void)
1121 {
1122 PGAlignedXLogBlock buffer;
1123 XLogPageHeader page;
1124 XLogLongPageHeader longpage;
1125 XLogRecord *record;
1126 pg_crc32c crc;
1127 char path[MAXPGPATH];
1128 int fd;
1129 int nbytes;
1130 char *recptr;
1131
1132 memset(buffer.data, 0, XLOG_BLCKSZ);
1133
1134 /* Set up the XLOG page header */
1135 page = (XLogPageHeader) buffer.data;
1136 page->xlp_magic = XLOG_PAGE_MAGIC;
1137 page->xlp_info = XLP_LONG_HEADER;
1138 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
1139 page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD;
1140 longpage = (XLogLongPageHeader) page;
1141 longpage->xlp_sysid = ControlFile.system_identifier;
1142 longpage->xlp_seg_size = WalSegSz;
1143 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
1144
1145 /* Insert the initial checkpoint record */
1146 recptr = (char *) page + SizeOfXLogLongPHD;
1147 record = (XLogRecord *) recptr;
1148 record->xl_prev = 0;
1149 record->xl_xid = InvalidTransactionId;
1150 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
1151 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
1152 record->xl_rmid = RM_XLOG_ID;
1153
1154 recptr += SizeOfXLogRecord;
1155 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
1156 *(recptr++) = sizeof(CheckPoint);
1157 memcpy(recptr, &ControlFile.checkPointCopy,
1158 sizeof(CheckPoint));
1159
1160 INIT_CRC32C(crc);
1161 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
1162 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
1163 FIN_CRC32C(crc);
1164 record->xl_crc = crc;
1165
1166 /* Write the first page */
1167 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
1168 newXlogSegNo, WalSegSz);
1169
1170 unlink(path);
1171
1172 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1173 pg_file_create_mode);
1174 if (fd < 0)
1175 {
1176 pg_log_error("could not open file \"%s\": %m", path);
1177 exit(1);
1178 }
1179
1180 errno = 0;
1181 if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1182 {
1183 /* if write didn't set errno, assume problem is no disk space */
1184 if (errno == 0)
1185 errno = ENOSPC;
1186 pg_log_error("could not write file \"%s\": %m", path);
1187 exit(1);
1188 }
1189
1190 /* Fill the rest of the file with zeroes */
1191 memset(buffer.data, 0, XLOG_BLCKSZ);
1192 for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ)
1193 {
1194 errno = 0;
1195 if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1196 {
1197 if (errno == 0)
1198 errno = ENOSPC;
1199 pg_log_error("could not write file \"%s\": %m", path);
1200 exit(1);
1201 }
1202 }
1203
1204 if (fsync(fd) != 0)
1205 {
1206 pg_log_error("fsync error: %m");
1207 exit(1);
1208 }
1209
1210 close(fd);
1211 }
1212
1213
1214 static void
usage(void)1215 usage(void)
1216 {
1217 printf(_("%s resets the PostgreSQL write-ahead log.\n\n"), progname);
1218 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
1219 printf(_("Options:\n"));
1220 printf(_(" -c, --commit-timestamp-ids=XID,XID\n"
1221 " set oldest and newest transactions bearing\n"
1222 " commit timestamp (zero means no change)\n"));
1223 printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
1224 printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n"));
1225 printf(_(" -f, --force force update to be done\n"));
1226 printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n"));
1227 printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n"));
1228 printf(_(" -n, --dry-run no update, just show what would be done\n"));
1229 printf(_(" -o, --next-oid=OID set next OID\n"));
1230 printf(_(" -O, --multixact-offset=OFFSET set next multitransaction offset\n"));
1231 printf(_(" -u, --oldest-transaction-id=XID set oldest transaction ID\n"));
1232 printf(_(" -V, --version output version information, then exit\n"));
1233 printf(_(" -x, --next-transaction-id=XID set next transaction ID\n"));
1234 printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n"));
1235 printf(_(" -?, --help show this help, then exit\n"));
1236 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1237 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
1238 }
1239