1 /*-------------------------------------------------------------------------
2 *
3 * pg_checksums.c
4 * Checks, enables or disables page level checksums for an offline
5 * cluster
6 *
7 * Copyright (c) 2010-2020, PostgreSQL Global Development Group
8 *
9 * IDENTIFICATION
10 * src/bin/pg_checksums/pg_checksums.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 #include "postgres_fe.h"
16
17 #include <dirent.h>
18 #include <time.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21
22 #include "access/xlog_internal.h"
23 #include "common/controldata_utils.h"
24 #include "common/file_perm.h"
25 #include "common/file_utils.h"
26 #include "common/logging.h"
27 #include "getopt_long.h"
28 #include "pg_getopt.h"
29 #include "storage/bufpage.h"
30 #include "storage/checksum.h"
31 #include "storage/checksum_impl.h"
32
33
34 static int64 files = 0;
35 static int64 blocks = 0;
36 static int64 badblocks = 0;
37 static ControlFileData *ControlFile;
38
39 static char *only_filenode = NULL;
40 static bool do_sync = true;
41 static bool verbose = false;
42 static bool showprogress = false;
43
44 typedef enum
45 {
46 PG_MODE_CHECK,
47 PG_MODE_DISABLE,
48 PG_MODE_ENABLE
49 } PgChecksumMode;
50
51 /*
52 * Filename components.
53 *
54 * XXX: fd.h is not declared here as frontend side code is not able to
55 * interact with the backend-side definitions for the various fsync
56 * wrappers.
57 */
58 #define PG_TEMP_FILES_DIR "pgsql_tmp"
59 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
60
61 static PgChecksumMode mode = PG_MODE_CHECK;
62
63 static const char *progname;
64
65 /*
66 * Progress status information.
67 */
68 int64 total_size = 0;
69 int64 current_size = 0;
70 static pg_time_t last_progress_report = 0;
71
72 static void
usage(void)73 usage(void)
74 {
75 printf(_("%s enables, disables, or verifies data checksums in a PostgreSQL database cluster.\n\n"), progname);
76 printf(_("Usage:\n"));
77 printf(_(" %s [OPTION]... [DATADIR]\n"), progname);
78 printf(_("\nOptions:\n"));
79 printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
80 printf(_(" -c, --check check data checksums (default)\n"));
81 printf(_(" -d, --disable disable data checksums\n"));
82 printf(_(" -e, --enable enable data checksums\n"));
83 printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n"));
84 printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
85 printf(_(" -P, --progress show progress information\n"));
86 printf(_(" -v, --verbose output verbose messages\n"));
87 printf(_(" -V, --version output version information, then exit\n"));
88 printf(_(" -?, --help show this help, then exit\n"));
89 printf(_("\nIf no data directory (DATADIR) is specified, "
90 "the environment variable PGDATA\nis used.\n\n"));
91 printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT);
92 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
93 }
94
95 /*
96 * Definition of one element part of an exclusion list, used for files
97 * to exclude from checksum validation. "name" is the name of the file
98 * or path to check for exclusion. If "match_prefix" is true, any items
99 * matching the name as prefix are excluded.
100 */
101 struct exclude_list_item
102 {
103 const char *name;
104 bool match_prefix;
105 };
106
107 /*
108 * List of files excluded from checksum validation.
109 *
110 * Note: this list should be kept in sync with what basebackup.c includes.
111 */
112 static const struct exclude_list_item skip[] = {
113 {"pg_control", false},
114 {"pg_filenode.map", false},
115 {"pg_internal.init", true},
116 {"PG_VERSION", false},
117 #ifdef EXEC_BACKEND
118 {"config_exec_params", true},
119 #endif
120 {NULL, false}
121 };
122
123 /*
124 * Report current progress status. Parts borrowed from
125 * src/bin/pg_basebackup/pg_basebackup.c.
126 */
127 static void
progress_report(bool finished)128 progress_report(bool finished)
129 {
130 int percent;
131 char total_size_str[32];
132 char current_size_str[32];
133 pg_time_t now;
134
135 Assert(showprogress);
136
137 now = time(NULL);
138 if (now == last_progress_report && !finished)
139 return; /* Max once per second */
140
141 /* Save current time */
142 last_progress_report = now;
143
144 /* Adjust total size if current_size is larger */
145 if (current_size > total_size)
146 total_size = current_size;
147
148 /* Calculate current percentage of size done */
149 percent = total_size ? (int) ((current_size) * 100 / total_size) : 0;
150
151 /*
152 * Separate step to keep platform-dependent format code out of
153 * translatable strings. And we only test for INT64_FORMAT availability
154 * in snprintf, not fprintf.
155 */
156 snprintf(total_size_str, sizeof(total_size_str), INT64_FORMAT,
157 total_size / (1024 * 1024));
158 snprintf(current_size_str, sizeof(current_size_str), INT64_FORMAT,
159 current_size / (1024 * 1024));
160
161 fprintf(stderr, _("%*s/%s MB (%d%%) computed"),
162 (int) strlen(current_size_str), current_size_str, total_size_str,
163 percent);
164
165 /*
166 * Stay on the same line if reporting to a terminal and we're not done
167 * yet.
168 */
169 fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
170 }
171
172 static bool
skipfile(const char * fn)173 skipfile(const char *fn)
174 {
175 int excludeIdx;
176
177 for (excludeIdx = 0; skip[excludeIdx].name != NULL; excludeIdx++)
178 {
179 int cmplen = strlen(skip[excludeIdx].name);
180
181 if (!skip[excludeIdx].match_prefix)
182 cmplen++;
183 if (strncmp(skip[excludeIdx].name, fn, cmplen) == 0)
184 return true;
185 }
186
187 return false;
188 }
189
190 static void
scan_file(const char * fn,BlockNumber segmentno)191 scan_file(const char *fn, BlockNumber segmentno)
192 {
193 PGAlignedBlock buf;
194 PageHeader header = (PageHeader) buf.data;
195 int f;
196 BlockNumber blockno;
197 int flags;
198
199 Assert(mode == PG_MODE_ENABLE ||
200 mode == PG_MODE_CHECK);
201
202 flags = (mode == PG_MODE_ENABLE) ? O_RDWR : O_RDONLY;
203 f = open(fn, PG_BINARY | flags, 0);
204
205 if (f < 0)
206 {
207 pg_log_error("could not open file \"%s\": %m", fn);
208 exit(1);
209 }
210
211 files++;
212
213 for (blockno = 0;; blockno++)
214 {
215 uint16 csum;
216 int r = read(f, buf.data, BLCKSZ);
217
218 if (r == 0)
219 break;
220 if (r != BLCKSZ)
221 {
222 if (r < 0)
223 pg_log_error("could not read block %u in file \"%s\": %m",
224 blockno, fn);
225 else
226 pg_log_error("could not read block %u in file \"%s\": read %d of %d",
227 blockno, fn, r, BLCKSZ);
228 exit(1);
229 }
230 blocks++;
231
232 /*
233 * Since the file size is counted as total_size for progress status
234 * information, the sizes of all pages including new ones in the file
235 * should be counted as current_size. Otherwise the progress reporting
236 * calculated using those counters may not reach 100%.
237 */
238 current_size += r;
239
240 /* New pages have no checksum yet */
241 if (PageIsNew(header))
242 continue;
243
244 csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
245 if (mode == PG_MODE_CHECK)
246 {
247 if (csum != header->pd_checksum)
248 {
249 if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
250 pg_log_error("checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X",
251 fn, blockno, csum, header->pd_checksum);
252 badblocks++;
253 }
254 }
255 else if (mode == PG_MODE_ENABLE)
256 {
257 int w;
258
259 /* Set checksum in page header */
260 header->pd_checksum = csum;
261
262 /* Seek back to beginning of block */
263 if (lseek(f, -BLCKSZ, SEEK_CUR) < 0)
264 {
265 pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn);
266 exit(1);
267 }
268
269 /* Write block with checksum */
270 w = write(f, buf.data, BLCKSZ);
271 if (w != BLCKSZ)
272 {
273 if (w < 0)
274 pg_log_error("could not write block %u in file \"%s\": %m",
275 blockno, fn);
276 else
277 pg_log_error("could not write block %u in file \"%s\": wrote %d of %d",
278 blockno, fn, w, BLCKSZ);
279 exit(1);
280 }
281 }
282
283 if (showprogress)
284 progress_report(false);
285 }
286
287 if (verbose)
288 {
289 if (mode == PG_MODE_CHECK)
290 pg_log_info("checksums verified in file \"%s\"", fn);
291 if (mode == PG_MODE_ENABLE)
292 pg_log_info("checksums enabled in file \"%s\"", fn);
293 }
294
295 close(f);
296 }
297
298 /*
299 * Scan the given directory for items which can be checksummed and
300 * operate on each one of them. If "sizeonly" is true, the size of
301 * all the items which have checksums is computed and returned back
302 * to the caller without operating on the files. This is used to compile
303 * the total size of the data directory for progress reports.
304 */
305 static int64
scan_directory(const char * basedir,const char * subdir,bool sizeonly)306 scan_directory(const char *basedir, const char *subdir, bool sizeonly)
307 {
308 int64 dirsize = 0;
309 char path[MAXPGPATH];
310 DIR *dir;
311 struct dirent *de;
312
313 snprintf(path, sizeof(path), "%s/%s", basedir, subdir);
314 dir = opendir(path);
315 if (!dir)
316 {
317 pg_log_error("could not open directory \"%s\": %m", path);
318 exit(1);
319 }
320 while ((de = readdir(dir)) != NULL)
321 {
322 char fn[MAXPGPATH];
323 struct stat st;
324
325 if (strcmp(de->d_name, ".") == 0 ||
326 strcmp(de->d_name, "..") == 0)
327 continue;
328
329 /* Skip temporary files */
330 if (strncmp(de->d_name,
331 PG_TEMP_FILE_PREFIX,
332 strlen(PG_TEMP_FILE_PREFIX)) == 0)
333 continue;
334
335 /* Skip temporary folders */
336 if (strncmp(de->d_name,
337 PG_TEMP_FILES_DIR,
338 strlen(PG_TEMP_FILES_DIR)) == 0)
339 continue;
340
341 snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
342 if (lstat(fn, &st) < 0)
343 {
344 pg_log_error("could not stat file \"%s\": %m", fn);
345 exit(1);
346 }
347 if (S_ISREG(st.st_mode))
348 {
349 char fnonly[MAXPGPATH];
350 char *forkpath,
351 *segmentpath;
352 BlockNumber segmentno = 0;
353
354 if (skipfile(de->d_name))
355 continue;
356
357 /*
358 * Cut off at the segment boundary (".") to get the segment number
359 * in order to mix it into the checksum. Then also cut off at the
360 * fork boundary, to get the filenode the file belongs to for
361 * filtering.
362 */
363 strlcpy(fnonly, de->d_name, sizeof(fnonly));
364 segmentpath = strchr(fnonly, '.');
365 if (segmentpath != NULL)
366 {
367 *segmentpath++ = '\0';
368 segmentno = atoi(segmentpath);
369 if (segmentno == 0)
370 {
371 pg_log_error("invalid segment number %d in file name \"%s\"",
372 segmentno, fn);
373 exit(1);
374 }
375 }
376
377 forkpath = strchr(fnonly, '_');
378 if (forkpath != NULL)
379 *forkpath++ = '\0';
380
381 if (only_filenode && strcmp(only_filenode, fnonly) != 0)
382 /* filenode not to be included */
383 continue;
384
385 dirsize += st.st_size;
386
387 /*
388 * No need to work on the file when calculating only the size of
389 * the items in the data folder.
390 */
391 if (!sizeonly)
392 scan_file(fn, segmentno);
393 }
394 #ifndef WIN32
395 else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
396 #else
397 else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
398 #endif
399 {
400 /*
401 * If going through the entries of pg_tblspc, we assume to operate
402 * on tablespace locations where only TABLESPACE_VERSION_DIRECTORY
403 * is valid, resolving the linked locations and dive into them
404 * directly.
405 */
406 if (strncmp("pg_tblspc", subdir, strlen("pg_tblspc")) == 0)
407 {
408 char tblspc_path[MAXPGPATH];
409 struct stat tblspc_st;
410
411 /*
412 * Resolve tablespace location path and check whether
413 * TABLESPACE_VERSION_DIRECTORY exists. Not finding a valid
414 * location is unexpected, since there should be no orphaned
415 * links and no links pointing to something else than a
416 * directory.
417 */
418 snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s/%s",
419 path, de->d_name, TABLESPACE_VERSION_DIRECTORY);
420
421 if (lstat(tblspc_path, &tblspc_st) < 0)
422 {
423 pg_log_error("could not stat file \"%s\": %m",
424 tblspc_path);
425 exit(1);
426 }
427
428 /*
429 * Move backwards once as the scan needs to happen for the
430 * contents of TABLESPACE_VERSION_DIRECTORY.
431 */
432 snprintf(tblspc_path, sizeof(tblspc_path), "%s/%s",
433 path, de->d_name);
434
435 /* Looks like a valid tablespace location */
436 dirsize += scan_directory(tblspc_path,
437 TABLESPACE_VERSION_DIRECTORY,
438 sizeonly);
439 }
440 else
441 {
442 dirsize += scan_directory(path, de->d_name, sizeonly);
443 }
444 }
445 }
446 closedir(dir);
447 return dirsize;
448 }
449
450 int
main(int argc,char * argv[])451 main(int argc, char *argv[])
452 {
453 static struct option long_options[] = {
454 {"check", no_argument, NULL, 'c'},
455 {"pgdata", required_argument, NULL, 'D'},
456 {"disable", no_argument, NULL, 'd'},
457 {"enable", no_argument, NULL, 'e'},
458 {"filenode", required_argument, NULL, 'f'},
459 {"no-sync", no_argument, NULL, 'N'},
460 {"progress", no_argument, NULL, 'P'},
461 {"verbose", no_argument, NULL, 'v'},
462 {NULL, 0, NULL, 0}
463 };
464
465 char *DataDir = NULL;
466 int c;
467 int option_index;
468 bool crc_ok;
469
470 pg_logging_init(argv[0]);
471 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_checksums"));
472 progname = get_progname(argv[0]);
473
474 if (argc > 1)
475 {
476 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
477 {
478 usage();
479 exit(0);
480 }
481 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
482 {
483 puts("pg_checksums (PostgreSQL) " PG_VERSION);
484 exit(0);
485 }
486 }
487
488 while ((c = getopt_long(argc, argv, "cD:deNPf:v", long_options, &option_index)) != -1)
489 {
490 switch (c)
491 {
492 case 'c':
493 mode = PG_MODE_CHECK;
494 break;
495 case 'd':
496 mode = PG_MODE_DISABLE;
497 break;
498 case 'e':
499 mode = PG_MODE_ENABLE;
500 break;
501 case 'f':
502 if (atoi(optarg) == 0)
503 {
504 pg_log_error("invalid filenode specification, must be numeric: %s", optarg);
505 exit(1);
506 }
507 only_filenode = pstrdup(optarg);
508 break;
509 case 'N':
510 do_sync = false;
511 break;
512 case 'v':
513 verbose = true;
514 break;
515 case 'D':
516 DataDir = optarg;
517 break;
518 case 'P':
519 showprogress = true;
520 break;
521 default:
522 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
523 exit(1);
524 }
525 }
526
527 if (DataDir == NULL)
528 {
529 if (optind < argc)
530 DataDir = argv[optind++];
531 else
532 DataDir = getenv("PGDATA");
533
534 /* If no DataDir was specified, and none could be found, error out */
535 if (DataDir == NULL)
536 {
537 pg_log_error("no data directory specified");
538 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
539 exit(1);
540 }
541 }
542
543 /* Complain if any arguments remain */
544 if (optind < argc)
545 {
546 pg_log_error("too many command-line arguments (first is \"%s\")",
547 argv[optind]);
548 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
549 progname);
550 exit(1);
551 }
552
553 /* filenode checking only works in --check mode */
554 if (mode != PG_MODE_CHECK && only_filenode)
555 {
556 pg_log_error("option -f/--filenode can only be used with --check");
557 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
558 progname);
559 exit(1);
560 }
561
562 /* Read the control file and check compatibility */
563 ControlFile = get_controlfile(DataDir, &crc_ok);
564 if (!crc_ok)
565 {
566 pg_log_error("pg_control CRC value is incorrect");
567 exit(1);
568 }
569
570 if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
571 {
572 pg_log_error("cluster is not compatible with this version of pg_checksums");
573 exit(1);
574 }
575
576 if (ControlFile->blcksz != BLCKSZ)
577 {
578 pg_log_error("database cluster is not compatible");
579 fprintf(stderr, _("The database cluster was initialized with block size %u, but pg_checksums was compiled with block size %u.\n"),
580 ControlFile->blcksz, BLCKSZ);
581 exit(1);
582 }
583
584 /*
585 * Check if cluster is running. A clean shutdown is required to avoid
586 * random checksum failures caused by torn pages. Note that this doesn't
587 * guard against someone starting the cluster concurrently.
588 */
589 if (ControlFile->state != DB_SHUTDOWNED &&
590 ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
591 {
592 pg_log_error("cluster must be shut down");
593 exit(1);
594 }
595
596 if (ControlFile->data_checksum_version == 0 &&
597 mode == PG_MODE_CHECK)
598 {
599 pg_log_error("data checksums are not enabled in cluster");
600 exit(1);
601 }
602
603 if (ControlFile->data_checksum_version == 0 &&
604 mode == PG_MODE_DISABLE)
605 {
606 pg_log_error("data checksums are already disabled in cluster");
607 exit(1);
608 }
609
610 if (ControlFile->data_checksum_version > 0 &&
611 mode == PG_MODE_ENABLE)
612 {
613 pg_log_error("data checksums are already enabled in cluster");
614 exit(1);
615 }
616
617 /* Operate on all files if checking or enabling checksums */
618 if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
619 {
620 /*
621 * If progress status information is requested, we need to scan the
622 * directory tree twice: once to know how much total data needs to be
623 * processed and once to do the real work.
624 */
625 if (showprogress)
626 {
627 total_size = scan_directory(DataDir, "global", true);
628 total_size += scan_directory(DataDir, "base", true);
629 total_size += scan_directory(DataDir, "pg_tblspc", true);
630 }
631
632 (void) scan_directory(DataDir, "global", false);
633 (void) scan_directory(DataDir, "base", false);
634 (void) scan_directory(DataDir, "pg_tblspc", false);
635
636 if (showprogress)
637 progress_report(true);
638
639 printf(_("Checksum operation completed\n"));
640 printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files));
641 printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks));
642 if (mode == PG_MODE_CHECK)
643 {
644 printf(_("Bad checksums: %s\n"), psprintf(INT64_FORMAT, badblocks));
645 printf(_("Data checksum version: %u\n"), ControlFile->data_checksum_version);
646
647 if (badblocks > 0)
648 exit(1);
649 }
650 }
651
652 /*
653 * Finally make the data durable on disk if enabling or disabling
654 * checksums. Flush first the data directory for safety, and then update
655 * the control file to keep the switch consistent.
656 */
657 if (mode == PG_MODE_ENABLE || mode == PG_MODE_DISABLE)
658 {
659 ControlFile->data_checksum_version =
660 (mode == PG_MODE_ENABLE) ? PG_DATA_CHECKSUM_VERSION : 0;
661
662 if (do_sync)
663 {
664 pg_log_info("syncing data directory");
665 fsync_pgdata(DataDir, PG_VERSION_NUM);
666 }
667
668 pg_log_info("updating control file");
669 update_controlfile(DataDir, ControlFile, do_sync);
670
671 if (verbose)
672 printf(_("Data checksum version: %u\n"), ControlFile->data_checksum_version);
673 if (mode == PG_MODE_ENABLE)
674 printf(_("Checksums enabled in cluster\n"));
675 else
676 printf(_("Checksums disabled in cluster\n"));
677 }
678
679 return 0;
680 }
681