1 /*-------------------------------------------------------------------------
2 *
3 * basebackup.c
4 * code for taking a base backup and streaming it to a standby
5 *
6 * Portions Copyright (c) 2010-2018, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/basebackup.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <time.h>
18
19 #include "access/xlog_internal.h" /* for pg_start/stop_backup */
20 #include "catalog/pg_type.h"
21 #include "common/file_perm.h"
22 #include "lib/stringinfo.h"
23 #include "libpq/libpq.h"
24 #include "libpq/pqformat.h"
25 #include "miscadmin.h"
26 #include "nodes/pg_list.h"
27 #include "pgtar.h"
28 #include "pgstat.h"
29 #include "port.h"
30 #include "postmaster/syslogger.h"
31 #include "replication/basebackup.h"
32 #include "replication/walsender.h"
33 #include "replication/walsender_private.h"
34 #include "storage/bufpage.h"
35 #include "storage/checksum.h"
36 #include "storage/dsm_impl.h"
37 #include "storage/fd.h"
38 #include "storage/ipc.h"
39 #include "storage/reinit.h"
40 #include "utils/builtins.h"
41 #include "utils/ps_status.h"
42 #include "utils/relcache.h"
43 #include "utils/timestamp.h"
44
45
46 typedef struct
47 {
48 const char *label;
49 bool progress;
50 bool fastcheckpoint;
51 bool nowait;
52 bool includewal;
53 uint32 maxrate;
54 bool sendtblspcmapfile;
55 } basebackup_options;
56
57
58 static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
59 List *tablespaces, bool sendtblspclinks);
60 static bool sendFile(const char *readfilename, const char *tarfilename,
61 struct stat *statbuf, bool missing_ok);
62 static void sendFileWithContent(const char *filename, const char *content);
63 static int64 _tarWriteHeader(const char *filename, const char *linktarget,
64 struct stat *statbuf, bool sizeonly);
65 static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
66 bool sizeonly);
67 static void send_int8_string(StringInfoData *buf, int64 intval);
68 static void SendBackupHeader(List *tablespaces);
69 static void perform_base_backup(basebackup_options *opt);
70 static void parse_basebackup_options(List *options, basebackup_options *opt);
71 static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
72 static int compareWalFileNames(const void *a, const void *b);
73 static void throttle(size_t increment);
74 static bool is_checksummed_file(const char *fullpath, const char *filename);
75
76 /* Was the backup currently in-progress initiated in recovery mode? */
77 static bool backup_started_in_recovery = false;
78
79 /* Relative path of temporary statistics directory */
80 static char *statrelpath = NULL;
81
82 /*
83 * Size of each block sent into the tar stream for larger files.
84 */
85 #define TAR_SEND_SIZE 32768
86
87 /*
88 * How frequently to throttle, as a fraction of the specified rate-second.
89 */
90 #define THROTTLING_FREQUENCY 8
91
92 /*
93 * Checks whether we encountered any error in fread(). fread() doesn't give
94 * any clue what has happened, so we check with ferror(). Also, neither
95 * fread() nor ferror() set errno, so we just throw a generic error.
96 */
97 #define CHECK_FREAD_ERROR(fp, filename) \
98 do { \
99 if (ferror(fp)) \
100 ereport(ERROR, \
101 (errmsg("could not read from file \"%s\"", filename))); \
102 } while (0)
103
104 /* The actual number of bytes, transfer of which may cause sleep. */
105 static uint64 throttling_sample;
106
107 /* Amount of data already transferred but not yet throttled. */
108 static int64 throttling_counter;
109
110 /* The minimum time required to transfer throttling_sample bytes. */
111 static TimeOffset elapsed_min_unit;
112
113 /* The last check of the transfer rate. */
114 static TimestampTz throttled_last;
115
116 /* The starting XLOG position of the base backup. */
117 static XLogRecPtr startptr;
118
119 /* Total number of checksum failures during base backup. */
120 static int64 total_checksum_failures;
121
122 /* Do not verify checksums. */
123 static bool noverify_checksums = false;
124
125 /*
126 * Definition of one element part of an exclusion list, used for paths part
127 * of checksum validation or base backups. "name" is the name of the file
128 * or path to check for exclusion. If "match_prefix" is true, any items
129 * matching the name as prefix are excluded.
130 */
131 struct exclude_list_item
132 {
133 const char *name;
134 bool match_prefix;
135 };
136
137 /*
138 * The contents of these directories are removed or recreated during server
139 * start so they are not included in backups. The directories themselves are
140 * kept and included as empty to preserve access permissions.
141 *
142 * Note: this list should be kept in sync with the filter lists in pg_rewind's
143 * filemap.c.
144 */
145 static const char *excludeDirContents[] =
146 {
147 /*
148 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
149 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
150 * created there.
151 */
152 PG_STAT_TMP_DIR,
153
154 /*
155 * It is generally not useful to backup the contents of this directory
156 * even if the intention is to restore to another master. See backup.sgml
157 * for a more detailed description.
158 */
159 "pg_replslot",
160
161 /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
162 PG_DYNSHMEM_DIR,
163
164 /* Contents removed on startup, see AsyncShmemInit(). */
165 "pg_notify",
166
167 /*
168 * Old contents are loaded for possible debugging but are not required for
169 * normal operation, see OldSerXidInit().
170 */
171 "pg_serial",
172
173 /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
174 "pg_snapshots",
175
176 /* Contents zeroed on startup, see StartupSUBTRANS(). */
177 "pg_subtrans",
178
179 /* end of list */
180 NULL
181 };
182
183 /*
184 * List of files excluded from backups.
185 */
186 static const struct exclude_list_item excludeFiles[] =
187 {
188 /* Skip auto conf temporary file. */
189 {PG_AUTOCONF_FILENAME ".tmp", false},
190
191 /* Skip current log file temporary file */
192 {LOG_METAINFO_DATAFILE_TMP, false},
193
194 /*
195 * Skip relation cache because it is rebuilt on startup. This includes
196 * temporary files.
197 */
198 {RELCACHE_INIT_FILENAME, true},
199
200 /*
201 * If there's a backup_label or tablespace_map file, it belongs to a
202 * backup started by the user with pg_start_backup(). It is *not* correct
203 * for this backup. Our backup_label/tablespace_map is injected into the
204 * tar separately.
205 */
206 {BACKUP_LABEL_FILE, false},
207 {TABLESPACE_MAP, false},
208
209 {"postmaster.pid", false},
210 {"postmaster.opts", false},
211
212 /* end of list */
213 {NULL, false}
214 };
215
216 /*
217 * List of files excluded from checksum validation.
218 *
219 * Note: this list should be kept in sync with what pg_verify_checksums.c
220 * includes.
221 */
222 static const struct exclude_list_item noChecksumFiles[] = {
223 {"pg_control", false},
224 {"pg_filenode.map", false},
225 {"pg_internal.init", true},
226 {"PG_VERSION", false},
227 #ifdef EXEC_BACKEND
228 {"config_exec_params", true},
229 #endif
230 {NULL, false}
231 };
232
233 /*
234 * Actually do a base backup for the specified tablespaces.
235 *
236 * This is split out mainly to avoid complaints about "variable might be
237 * clobbered by longjmp" from stupider versions of gcc.
238 */
239 static void
perform_base_backup(basebackup_options * opt)240 perform_base_backup(basebackup_options *opt)
241 {
242 TimeLineID starttli;
243 XLogRecPtr endptr;
244 TimeLineID endtli;
245 StringInfo labelfile;
246 StringInfo tblspc_map_file = NULL;
247 int datadirpathlen;
248 List *tablespaces = NIL;
249
250 datadirpathlen = strlen(DataDir);
251
252 backup_started_in_recovery = RecoveryInProgress();
253
254 labelfile = makeStringInfo();
255 tblspc_map_file = makeStringInfo();
256
257 total_checksum_failures = 0;
258
259 startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
260 labelfile, &tablespaces,
261 tblspc_map_file,
262 opt->progress, opt->sendtblspcmapfile);
263
264 /*
265 * Once do_pg_start_backup has been called, ensure that any failure causes
266 * us to abort the backup so we don't "leak" a backup counter. For this
267 * reason, *all* functionality between do_pg_start_backup() and the end of
268 * do_pg_stop_backup() should be inside the error cleanup block!
269 */
270
271 PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
272 {
273 ListCell *lc;
274 tablespaceinfo *ti;
275
276 SendXlogRecPtrResult(startptr, starttli);
277
278 /*
279 * Calculate the relative path of temporary statistics directory in
280 * order to skip the files which are located in that directory later.
281 */
282 if (is_absolute_path(pgstat_stat_directory) &&
283 strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
284 statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
285 else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
286 statrelpath = psprintf("./%s", pgstat_stat_directory);
287 else
288 statrelpath = pgstat_stat_directory;
289
290 /* Add a node for the base directory at the end */
291 ti = palloc0(sizeof(tablespaceinfo));
292 ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
293 tablespaces = lappend(tablespaces, ti);
294
295 /* Send tablespace header */
296 SendBackupHeader(tablespaces);
297
298 /* Setup and activate network throttling, if client requested it */
299 if (opt->maxrate > 0)
300 {
301 throttling_sample =
302 (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
303
304 /*
305 * The minimum amount of time for throttling_sample bytes to be
306 * transferred.
307 */
308 elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
309
310 /* Enable throttling. */
311 throttling_counter = 0;
312
313 /* The 'real data' starts now (header was ignored). */
314 throttled_last = GetCurrentTimestamp();
315 }
316 else
317 {
318 /* Disable throttling. */
319 throttling_counter = -1;
320 }
321
322 /* Send off our tablespaces one by one */
323 foreach(lc, tablespaces)
324 {
325 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
326 StringInfoData buf;
327
328 /* Send CopyOutResponse message */
329 pq_beginmessage(&buf, 'H');
330 pq_sendbyte(&buf, 0); /* overall format */
331 pq_sendint16(&buf, 0); /* natts */
332 pq_endmessage(&buf);
333
334 if (ti->path == NULL)
335 {
336 struct stat statbuf;
337
338 /* In the main tar, include the backup_label first... */
339 sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
340
341 /*
342 * Send tablespace_map file if required and then the bulk of
343 * the files.
344 */
345 if (tblspc_map_file && opt->sendtblspcmapfile)
346 {
347 sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
348 sendDir(".", 1, false, tablespaces, false);
349 }
350 else
351 sendDir(".", 1, false, tablespaces, true);
352
353 /* ... and pg_control after everything else. */
354 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
355 ereport(ERROR,
356 (errcode_for_file_access(),
357 errmsg("could not stat control file \"%s\": %m",
358 XLOG_CONTROL_FILE)));
359 sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
360 }
361 else
362 sendTablespace(ti->path, false);
363
364 /*
365 * If we're including WAL, and this is the main data directory we
366 * don't terminate the tar stream here. Instead, we will append
367 * the xlog files below and terminate it then. This is safe since
368 * the main data directory is always sent *last*.
369 */
370 if (opt->includewal && ti->path == NULL)
371 {
372 Assert(lnext(lc) == NULL);
373 }
374 else
375 pq_putemptymessage('c'); /* CopyDone */
376 }
377
378 endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
379 }
380 PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false));
381
382
383 if (opt->includewal)
384 {
385 /*
386 * We've left the last tar file "open", so we can now append the
387 * required WAL files to it.
388 */
389 char pathbuf[MAXPGPATH];
390 XLogSegNo segno;
391 XLogSegNo startsegno;
392 XLogSegNo endsegno;
393 struct stat statbuf;
394 List *historyFileList = NIL;
395 List *walFileList = NIL;
396 char **walFiles;
397 int nWalFiles;
398 char firstoff[MAXFNAMELEN];
399 char lastoff[MAXFNAMELEN];
400 DIR *dir;
401 struct dirent *de;
402 int i;
403 ListCell *lc;
404 TimeLineID tli;
405
406 /*
407 * I'd rather not worry about timelines here, so scan pg_wal and
408 * include all WAL files in the range between 'startptr' and 'endptr',
409 * regardless of the timeline the file is stamped with. If there are
410 * some spurious WAL files belonging to timelines that don't belong in
411 * this server's history, they will be included too. Normally there
412 * shouldn't be such files, but if there are, there's little harm in
413 * including them.
414 */
415 XLByteToSeg(startptr, startsegno, wal_segment_size);
416 XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
417 XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
418 XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
419
420 dir = AllocateDir("pg_wal");
421 while ((de = ReadDir(dir, "pg_wal")) != NULL)
422 {
423 /* Does it look like a WAL segment, and is it in the range? */
424 if (IsXLogFileName(de->d_name) &&
425 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
426 strcmp(de->d_name + 8, lastoff + 8) <= 0)
427 {
428 walFileList = lappend(walFileList, pstrdup(de->d_name));
429 }
430 /* Does it look like a timeline history file? */
431 else if (IsTLHistoryFileName(de->d_name))
432 {
433 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
434 }
435 }
436 FreeDir(dir);
437
438 /*
439 * Before we go any further, check that none of the WAL segments we
440 * need were removed.
441 */
442 CheckXLogRemoved(startsegno, ThisTimeLineID);
443
444 /*
445 * Put the WAL filenames into an array, and sort. We send the files in
446 * order from oldest to newest, to reduce the chance that a file is
447 * recycled before we get a chance to send it over.
448 */
449 nWalFiles = list_length(walFileList);
450 walFiles = palloc(nWalFiles * sizeof(char *));
451 i = 0;
452 foreach(lc, walFileList)
453 {
454 walFiles[i++] = lfirst(lc);
455 }
456 qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
457
458 /*
459 * There must be at least one xlog file in the pg_wal directory, since
460 * we are doing backup-including-xlog.
461 */
462 if (nWalFiles < 1)
463 ereport(ERROR,
464 (errmsg("could not find any WAL files")));
465
466 /*
467 * Sanity check: the first and last segment should cover startptr and
468 * endptr, with no gaps in between.
469 */
470 XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size);
471 if (segno != startsegno)
472 {
473 char startfname[MAXFNAMELEN];
474
475 XLogFileName(startfname, ThisTimeLineID, startsegno,
476 wal_segment_size);
477 ereport(ERROR,
478 (errmsg("could not find WAL file \"%s\"", startfname)));
479 }
480 for (i = 0; i < nWalFiles; i++)
481 {
482 XLogSegNo currsegno = segno;
483 XLogSegNo nextsegno = segno + 1;
484
485 XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
486 if (!(nextsegno == segno || currsegno == segno))
487 {
488 char nextfname[MAXFNAMELEN];
489
490 XLogFileName(nextfname, ThisTimeLineID, nextsegno,
491 wal_segment_size);
492 ereport(ERROR,
493 (errmsg("could not find WAL file \"%s\"", nextfname)));
494 }
495 }
496 if (segno != endsegno)
497 {
498 char endfname[MAXFNAMELEN];
499
500 XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
501 ereport(ERROR,
502 (errmsg("could not find WAL file \"%s\"", endfname)));
503 }
504
505 /* Ok, we have everything we need. Send the WAL files. */
506 for (i = 0; i < nWalFiles; i++)
507 {
508 FILE *fp;
509 char buf[TAR_SEND_SIZE];
510 size_t cnt;
511 pgoff_t len = 0;
512
513 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
514 XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
515
516 fp = AllocateFile(pathbuf, "rb");
517 if (fp == NULL)
518 {
519 int save_errno = errno;
520
521 /*
522 * Most likely reason for this is that the file was already
523 * removed by a checkpoint, so check for that to get a better
524 * error message.
525 */
526 CheckXLogRemoved(segno, tli);
527
528 errno = save_errno;
529 ereport(ERROR,
530 (errcode_for_file_access(),
531 errmsg("could not open file \"%s\": %m", pathbuf)));
532 }
533
534 if (fstat(fileno(fp), &statbuf) != 0)
535 ereport(ERROR,
536 (errcode_for_file_access(),
537 errmsg("could not stat file \"%s\": %m",
538 pathbuf)));
539 if (statbuf.st_size != wal_segment_size)
540 {
541 CheckXLogRemoved(segno, tli);
542 ereport(ERROR,
543 (errcode_for_file_access(),
544 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
545 }
546
547 /* send the WAL file itself */
548 _tarWriteHeader(pathbuf, NULL, &statbuf, false);
549
550 while ((cnt = fread(buf, 1,
551 Min(sizeof(buf), wal_segment_size - len),
552 fp)) > 0)
553 {
554 CheckXLogRemoved(segno, tli);
555 /* Send the chunk as a CopyData message */
556 if (pq_putmessage('d', buf, cnt))
557 ereport(ERROR,
558 (errmsg("base backup could not send data, aborting backup")));
559
560 len += cnt;
561 throttle(cnt);
562
563 if (len == wal_segment_size)
564 break;
565 }
566
567 CHECK_FREAD_ERROR(fp, pathbuf);
568
569 if (len != wal_segment_size)
570 {
571 CheckXLogRemoved(segno, tli);
572 ereport(ERROR,
573 (errcode_for_file_access(),
574 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
575 }
576
577 /* wal_segment_size is a multiple of 512, so no need for padding */
578
579 FreeFile(fp);
580
581 /*
582 * Mark file as archived, otherwise files can get archived again
583 * after promotion of a new node. This is in line with
584 * walreceiver.c always doing an XLogArchiveForceDone() after a
585 * complete segment.
586 */
587 StatusFilePath(pathbuf, walFiles[i], ".done");
588 sendFileWithContent(pathbuf, "");
589 }
590
591 /*
592 * Send timeline history files too. Only the latest timeline history
593 * file is required for recovery, and even that only if there happens
594 * to be a timeline switch in the first WAL segment that contains the
595 * checkpoint record, or if we're taking a base backup from a standby
596 * server and the target timeline changes while the backup is taken.
597 * But they are small and highly useful for debugging purposes, so
598 * better include them all, always.
599 */
600 foreach(lc, historyFileList)
601 {
602 char *fname = lfirst(lc);
603
604 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
605
606 if (lstat(pathbuf, &statbuf) != 0)
607 ereport(ERROR,
608 (errcode_for_file_access(),
609 errmsg("could not stat file \"%s\": %m", pathbuf)));
610
611 sendFile(pathbuf, pathbuf, &statbuf, false);
612
613 /* unconditionally mark file as archived */
614 StatusFilePath(pathbuf, fname, ".done");
615 sendFileWithContent(pathbuf, "");
616 }
617
618 /* Send CopyDone message for the last tar file */
619 pq_putemptymessage('c');
620 }
621 SendXlogRecPtrResult(endptr, endtli);
622
623 if (total_checksum_failures)
624 {
625 if (total_checksum_failures > 1)
626 {
627 char buf[64];
628
629 snprintf(buf, sizeof(buf), INT64_FORMAT, total_checksum_failures);
630
631 ereport(WARNING,
632 (errmsg("%s total checksum verification failures", buf)));
633 }
634 ereport(ERROR,
635 (errcode(ERRCODE_DATA_CORRUPTED),
636 errmsg("checksum verification failure during base backup")));
637 }
638
639 }
640
641 /*
642 * qsort comparison function, to compare log/seg portion of WAL segment
643 * filenames, ignoring the timeline portion.
644 */
645 static int
compareWalFileNames(const void * a,const void * b)646 compareWalFileNames(const void *a, const void *b)
647 {
648 char *fna = *((char **) a);
649 char *fnb = *((char **) b);
650
651 return strcmp(fna + 8, fnb + 8);
652 }
653
654 /*
655 * Parse the base backup options passed down by the parser
656 */
657 static void
parse_basebackup_options(List * options,basebackup_options * opt)658 parse_basebackup_options(List *options, basebackup_options *opt)
659 {
660 ListCell *lopt;
661 bool o_label = false;
662 bool o_progress = false;
663 bool o_fast = false;
664 bool o_nowait = false;
665 bool o_wal = false;
666 bool o_maxrate = false;
667 bool o_tablespace_map = false;
668 bool o_noverify_checksums = false;
669
670 MemSet(opt, 0, sizeof(*opt));
671 foreach(lopt, options)
672 {
673 DefElem *defel = (DefElem *) lfirst(lopt);
674
675 if (strcmp(defel->defname, "label") == 0)
676 {
677 if (o_label)
678 ereport(ERROR,
679 (errcode(ERRCODE_SYNTAX_ERROR),
680 errmsg("duplicate option \"%s\"", defel->defname)));
681 opt->label = strVal(defel->arg);
682 o_label = true;
683 }
684 else if (strcmp(defel->defname, "progress") == 0)
685 {
686 if (o_progress)
687 ereport(ERROR,
688 (errcode(ERRCODE_SYNTAX_ERROR),
689 errmsg("duplicate option \"%s\"", defel->defname)));
690 opt->progress = true;
691 o_progress = true;
692 }
693 else if (strcmp(defel->defname, "fast") == 0)
694 {
695 if (o_fast)
696 ereport(ERROR,
697 (errcode(ERRCODE_SYNTAX_ERROR),
698 errmsg("duplicate option \"%s\"", defel->defname)));
699 opt->fastcheckpoint = true;
700 o_fast = true;
701 }
702 else if (strcmp(defel->defname, "nowait") == 0)
703 {
704 if (o_nowait)
705 ereport(ERROR,
706 (errcode(ERRCODE_SYNTAX_ERROR),
707 errmsg("duplicate option \"%s\"", defel->defname)));
708 opt->nowait = true;
709 o_nowait = true;
710 }
711 else if (strcmp(defel->defname, "wal") == 0)
712 {
713 if (o_wal)
714 ereport(ERROR,
715 (errcode(ERRCODE_SYNTAX_ERROR),
716 errmsg("duplicate option \"%s\"", defel->defname)));
717 opt->includewal = true;
718 o_wal = true;
719 }
720 else if (strcmp(defel->defname, "max_rate") == 0)
721 {
722 long maxrate;
723
724 if (o_maxrate)
725 ereport(ERROR,
726 (errcode(ERRCODE_SYNTAX_ERROR),
727 errmsg("duplicate option \"%s\"", defel->defname)));
728
729 maxrate = intVal(defel->arg);
730 if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
731 ereport(ERROR,
732 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
733 errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
734 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
735
736 opt->maxrate = (uint32) maxrate;
737 o_maxrate = true;
738 }
739 else if (strcmp(defel->defname, "tablespace_map") == 0)
740 {
741 if (o_tablespace_map)
742 ereport(ERROR,
743 (errcode(ERRCODE_SYNTAX_ERROR),
744 errmsg("duplicate option \"%s\"", defel->defname)));
745 opt->sendtblspcmapfile = true;
746 o_tablespace_map = true;
747 }
748 else if (strcmp(defel->defname, "noverify_checksums") == 0)
749 {
750 if (o_noverify_checksums)
751 ereport(ERROR,
752 (errcode(ERRCODE_SYNTAX_ERROR),
753 errmsg("duplicate option \"%s\"", defel->defname)));
754 noverify_checksums = true;
755 o_noverify_checksums = true;
756 }
757 else
758 elog(ERROR, "option \"%s\" not recognized",
759 defel->defname);
760 }
761 if (opt->label == NULL)
762 opt->label = "base backup";
763 }
764
765
766 /*
767 * SendBaseBackup() - send a complete base backup.
768 *
769 * The function will put the system into backup mode like pg_start_backup()
770 * does, so that the backup is consistent even though we read directly from
771 * the filesystem, bypassing the buffer cache.
772 */
773 void
SendBaseBackup(BaseBackupCmd * cmd)774 SendBaseBackup(BaseBackupCmd *cmd)
775 {
776 basebackup_options opt;
777
778 parse_basebackup_options(cmd->options, &opt);
779
780 WalSndSetState(WALSNDSTATE_BACKUP);
781
782 if (update_process_title)
783 {
784 char activitymsg[50];
785
786 snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
787 opt.label);
788 set_ps_display(activitymsg, false);
789 }
790
791 perform_base_backup(&opt);
792 }
793
794 static void
send_int8_string(StringInfoData * buf,int64 intval)795 send_int8_string(StringInfoData *buf, int64 intval)
796 {
797 char is[32];
798
799 sprintf(is, INT64_FORMAT, intval);
800 pq_sendint32(buf, strlen(is));
801 pq_sendbytes(buf, is, strlen(is));
802 }
803
804 static void
SendBackupHeader(List * tablespaces)805 SendBackupHeader(List *tablespaces)
806 {
807 StringInfoData buf;
808 ListCell *lc;
809
810 /* Construct and send the directory information */
811 pq_beginmessage(&buf, 'T'); /* RowDescription */
812 pq_sendint16(&buf, 3); /* 3 fields */
813
814 /* First field - spcoid */
815 pq_sendstring(&buf, "spcoid");
816 pq_sendint32(&buf, 0); /* table oid */
817 pq_sendint16(&buf, 0); /* attnum */
818 pq_sendint32(&buf, OIDOID); /* type oid */
819 pq_sendint16(&buf, 4); /* typlen */
820 pq_sendint32(&buf, 0); /* typmod */
821 pq_sendint16(&buf, 0); /* format code */
822
823 /* Second field - spcpath */
824 pq_sendstring(&buf, "spclocation");
825 pq_sendint32(&buf, 0);
826 pq_sendint16(&buf, 0);
827 pq_sendint32(&buf, TEXTOID);
828 pq_sendint16(&buf, -1);
829 pq_sendint32(&buf, 0);
830 pq_sendint16(&buf, 0);
831
832 /* Third field - size */
833 pq_sendstring(&buf, "size");
834 pq_sendint32(&buf, 0);
835 pq_sendint16(&buf, 0);
836 pq_sendint32(&buf, INT8OID);
837 pq_sendint16(&buf, 8);
838 pq_sendint32(&buf, 0);
839 pq_sendint16(&buf, 0);
840 pq_endmessage(&buf);
841
842 foreach(lc, tablespaces)
843 {
844 tablespaceinfo *ti = lfirst(lc);
845
846 /* Send one datarow message */
847 pq_beginmessage(&buf, 'D');
848 pq_sendint16(&buf, 3); /* number of columns */
849 if (ti->path == NULL)
850 {
851 pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
852 pq_sendint32(&buf, -1);
853 }
854 else
855 {
856 Size len;
857
858 len = strlen(ti->oid);
859 pq_sendint32(&buf, len);
860 pq_sendbytes(&buf, ti->oid, len);
861
862 len = strlen(ti->path);
863 pq_sendint32(&buf, len);
864 pq_sendbytes(&buf, ti->path, len);
865 }
866 if (ti->size >= 0)
867 send_int8_string(&buf, ti->size / 1024);
868 else
869 pq_sendint32(&buf, -1); /* NULL */
870
871 pq_endmessage(&buf);
872 }
873
874 /* Send a CommandComplete message */
875 pq_puttextmessage('C', "SELECT");
876 }
877
878 /*
879 * Send a single resultset containing just a single
880 * XLogRecPtr record (in text format)
881 */
882 static void
SendXlogRecPtrResult(XLogRecPtr ptr,TimeLineID tli)883 SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
884 {
885 StringInfoData buf;
886 char str[MAXFNAMELEN];
887 Size len;
888
889 pq_beginmessage(&buf, 'T'); /* RowDescription */
890 pq_sendint16(&buf, 2); /* 2 fields */
891
892 /* Field headers */
893 pq_sendstring(&buf, "recptr");
894 pq_sendint32(&buf, 0); /* table oid */
895 pq_sendint16(&buf, 0); /* attnum */
896 pq_sendint32(&buf, TEXTOID); /* type oid */
897 pq_sendint16(&buf, -1);
898 pq_sendint32(&buf, 0);
899 pq_sendint16(&buf, 0);
900
901 pq_sendstring(&buf, "tli");
902 pq_sendint32(&buf, 0); /* table oid */
903 pq_sendint16(&buf, 0); /* attnum */
904
905 /*
906 * int8 may seem like a surprising data type for this, but in theory int4
907 * would not be wide enough for this, as TimeLineID is unsigned.
908 */
909 pq_sendint32(&buf, INT8OID); /* type oid */
910 pq_sendint16(&buf, -1);
911 pq_sendint32(&buf, 0);
912 pq_sendint16(&buf, 0);
913 pq_endmessage(&buf);
914
915 /* Data row */
916 pq_beginmessage(&buf, 'D');
917 pq_sendint16(&buf, 2); /* number of columns */
918
919 len = snprintf(str, sizeof(str),
920 "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
921 pq_sendint32(&buf, len);
922 pq_sendbytes(&buf, str, len);
923
924 len = snprintf(str, sizeof(str), "%u", tli);
925 pq_sendint32(&buf, len);
926 pq_sendbytes(&buf, str, len);
927
928 pq_endmessage(&buf);
929
930 /* Send a CommandComplete message */
931 pq_puttextmessage('C', "SELECT");
932 }
933
934 /*
935 * Inject a file with given name and content in the output tar stream.
936 */
937 static void
sendFileWithContent(const char * filename,const char * content)938 sendFileWithContent(const char *filename, const char *content)
939 {
940 struct stat statbuf;
941 int pad,
942 len;
943
944 len = strlen(content);
945
946 /*
947 * Construct a stat struct for the backup_label file we're injecting in
948 * the tar.
949 */
950 /* Windows doesn't have the concept of uid and gid */
951 #ifdef WIN32
952 statbuf.st_uid = 0;
953 statbuf.st_gid = 0;
954 #else
955 statbuf.st_uid = geteuid();
956 statbuf.st_gid = getegid();
957 #endif
958 statbuf.st_mtime = time(NULL);
959 statbuf.st_mode = pg_file_create_mode;
960 statbuf.st_size = len;
961
962 _tarWriteHeader(filename, NULL, &statbuf, false);
963 /* Send the contents as a CopyData message */
964 pq_putmessage('d', content, len);
965
966 /* Pad to 512 byte boundary, per tar format requirements */
967 pad = ((len + 511) & ~511) - len;
968 if (pad > 0)
969 {
970 char buf[512];
971
972 MemSet(buf, 0, pad);
973 pq_putmessage('d', buf, pad);
974 }
975 }
976
977 /*
978 * Include the tablespace directory pointed to by 'path' in the output tar
979 * stream. If 'sizeonly' is true, we just calculate a total length and return
980 * it, without actually sending anything.
981 *
982 * Only used to send auxiliary tablespaces, not PGDATA.
983 */
984 int64
sendTablespace(char * path,bool sizeonly)985 sendTablespace(char *path, bool sizeonly)
986 {
987 int64 size;
988 char pathbuf[MAXPGPATH];
989 struct stat statbuf;
990
991 /*
992 * 'path' points to the tablespace location, but we only want to include
993 * the version directory in it that belongs to us.
994 */
995 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
996 TABLESPACE_VERSION_DIRECTORY);
997
998 /*
999 * Store a directory entry in the tar file so we get the permissions
1000 * right.
1001 */
1002 if (lstat(pathbuf, &statbuf) != 0)
1003 {
1004 if (errno != ENOENT)
1005 ereport(ERROR,
1006 (errcode_for_file_access(),
1007 errmsg("could not stat file or directory \"%s\": %m",
1008 pathbuf)));
1009
1010 /* If the tablespace went away while scanning, it's no error. */
1011 return 0;
1012 }
1013
1014 size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1015 sizeonly);
1016
1017 /* Send all the files in the tablespace version directory */
1018 size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
1019
1020 return size;
1021 }
1022
1023 /*
1024 * Include all files from the given directory in the output tar stream. If
1025 * 'sizeonly' is true, we just calculate a total length and return it, without
1026 * actually sending anything.
1027 *
1028 * Omit any directory in the tablespaces list, to avoid backing up
1029 * tablespaces twice when they were created inside PGDATA.
1030 *
1031 * If sendtblspclinks is true, we need to include symlink
1032 * information in the tar file. If not, we can skip that
1033 * as it will be sent separately in the tablespace_map file.
1034 */
1035 static int64
sendDir(const char * path,int basepathlen,bool sizeonly,List * tablespaces,bool sendtblspclinks)1036 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
1037 bool sendtblspclinks)
1038 {
1039 DIR *dir;
1040 struct dirent *de;
1041 char pathbuf[MAXPGPATH * 2];
1042 struct stat statbuf;
1043 int64 size = 0;
1044 const char *lastDir; /* Split last dir from parent path. */
1045 bool isDbDir = false; /* Does this directory contain relations? */
1046
1047 /*
1048 * Determine if the current path is a database directory that can contain
1049 * relations.
1050 *
1051 * Start by finding the location of the delimiter between the parent path
1052 * and the current path.
1053 */
1054 lastDir = last_dir_separator(path);
1055
1056 /* Does this path look like a database path (i.e. all digits)? */
1057 if (lastDir != NULL &&
1058 strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1059 {
1060 /* Part of path that contains the parent directory. */
1061 int parentPathLen = lastDir - path;
1062
1063 /*
1064 * Mark path as a database directory if the parent path is either
1065 * $PGDATA/base or a tablespace version path.
1066 */
1067 if (strncmp(path, "./base", parentPathLen) == 0 ||
1068 (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1069 strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1070 TABLESPACE_VERSION_DIRECTORY,
1071 sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1072 isDbDir = true;
1073 }
1074
1075 dir = AllocateDir(path);
1076 while ((de = ReadDir(dir, path)) != NULL)
1077 {
1078 int excludeIdx;
1079 bool excludeFound;
1080 ForkNumber relForkNum; /* Type of fork if file is a relation */
1081 int relOidChars; /* Chars in filename that are the rel oid */
1082
1083 /* Skip special stuff */
1084 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1085 continue;
1086
1087 /* Skip temporary files */
1088 if (strncmp(de->d_name,
1089 PG_TEMP_FILE_PREFIX,
1090 strlen(PG_TEMP_FILE_PREFIX)) == 0)
1091 continue;
1092
1093 /*
1094 * Check if the postmaster has signaled us to exit, and abort with an
1095 * error in that case. The error handler further up will call
1096 * do_pg_abort_backup() for us. Also check that if the backup was
1097 * started while still in recovery, the server wasn't promoted.
1098 * dp_pg_stop_backup() will check that too, but it's better to stop
1099 * the backup early than continue to the end and fail there.
1100 */
1101 CHECK_FOR_INTERRUPTS();
1102 if (RecoveryInProgress() != backup_started_in_recovery)
1103 ereport(ERROR,
1104 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1105 errmsg("the standby was promoted during online backup"),
1106 errhint("This means that the backup being taken is corrupt "
1107 "and should not be used. "
1108 "Try taking another online backup.")));
1109
1110 /* Scan for files that should be excluded */
1111 excludeFound = false;
1112 for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
1113 {
1114 int cmplen = strlen(excludeFiles[excludeIdx].name);
1115
1116 if (!excludeFiles[excludeIdx].match_prefix)
1117 cmplen++;
1118 if (strncmp(de->d_name, excludeFiles[excludeIdx].name, cmplen) == 0)
1119 {
1120 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1121 excludeFound = true;
1122 break;
1123 }
1124 }
1125
1126 if (excludeFound)
1127 continue;
1128
1129 /* Exclude all forks for unlogged tables except the init fork */
1130 if (isDbDir &&
1131 parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1132 &relForkNum))
1133 {
1134 /* Never exclude init forks */
1135 if (relForkNum != INIT_FORKNUM)
1136 {
1137 char initForkFile[MAXPGPATH];
1138 char relOid[OIDCHARS + 1];
1139
1140 /*
1141 * If any other type of fork, check if there is an init fork
1142 * with the same OID. If so, the file can be excluded.
1143 */
1144 memcpy(relOid, de->d_name, relOidChars);
1145 relOid[relOidChars] = '\0';
1146 snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1147 path, relOid);
1148
1149 if (lstat(initForkFile, &statbuf) == 0)
1150 {
1151 elog(DEBUG2,
1152 "unlogged relation file \"%s\" excluded from backup",
1153 de->d_name);
1154
1155 continue;
1156 }
1157 }
1158 }
1159
1160 /* Exclude temporary relations */
1161 if (isDbDir && looks_like_temp_rel_name(de->d_name))
1162 {
1163 elog(DEBUG2,
1164 "temporary relation file \"%s\" excluded from backup",
1165 de->d_name);
1166
1167 continue;
1168 }
1169
1170 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1171
1172 /* Skip pg_control here to back up it last */
1173 if (strcmp(pathbuf, "./global/pg_control") == 0)
1174 continue;
1175
1176 if (lstat(pathbuf, &statbuf) != 0)
1177 {
1178 if (errno != ENOENT)
1179 ereport(ERROR,
1180 (errcode_for_file_access(),
1181 errmsg("could not stat file or directory \"%s\": %m",
1182 pathbuf)));
1183
1184 /* If the file went away while scanning, it's not an error. */
1185 continue;
1186 }
1187
1188 /* Scan for directories whose contents should be excluded */
1189 excludeFound = false;
1190 for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1191 {
1192 if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1193 {
1194 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1195 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1196 excludeFound = true;
1197 break;
1198 }
1199 }
1200
1201 if (excludeFound)
1202 continue;
1203
1204 /*
1205 * Exclude contents of directory specified by statrelpath if not set
1206 * to the default (pg_stat_tmp) which is caught in the loop above.
1207 */
1208 if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1209 {
1210 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1211 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1212 continue;
1213 }
1214
1215 /*
1216 * We can skip pg_wal, the WAL segments need to be fetched from the
1217 * WAL archive anyway. But include it as an empty directory anyway, so
1218 * we get permissions right.
1219 */
1220 if (strcmp(pathbuf, "./pg_wal") == 0)
1221 {
1222 /* If pg_wal is a symlink, write it as a directory anyway */
1223 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1224
1225 /*
1226 * Also send archive_status directory (by hackishly reusing
1227 * statbuf from above ...).
1228 */
1229 size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
1230 sizeonly);
1231
1232 continue; /* don't recurse into pg_wal */
1233 }
1234
1235 /* Allow symbolic links in pg_tblspc only */
1236 if (strcmp(path, "./pg_tblspc") == 0 &&
1237 #ifndef WIN32
1238 S_ISLNK(statbuf.st_mode)
1239 #else
1240 pgwin32_is_junction(pathbuf)
1241 #endif
1242 )
1243 {
1244 #if defined(HAVE_READLINK) || defined(WIN32)
1245 char linkpath[MAXPGPATH];
1246 int rllen;
1247
1248 rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1249 if (rllen < 0)
1250 ereport(ERROR,
1251 (errcode_for_file_access(),
1252 errmsg("could not read symbolic link \"%s\": %m",
1253 pathbuf)));
1254 if (rllen >= sizeof(linkpath))
1255 ereport(ERROR,
1256 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1257 errmsg("symbolic link \"%s\" target is too long",
1258 pathbuf)));
1259 linkpath[rllen] = '\0';
1260
1261 size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
1262 &statbuf, sizeonly);
1263 #else
1264
1265 /*
1266 * If the platform does not have symbolic links, it should not be
1267 * possible to have tablespaces - clearly somebody else created
1268 * them. Warn about it and ignore.
1269 */
1270 ereport(WARNING,
1271 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1272 errmsg("tablespaces are not supported on this platform")));
1273 continue;
1274 #endif /* HAVE_READLINK */
1275 }
1276 else if (S_ISDIR(statbuf.st_mode))
1277 {
1278 bool skip_this_dir = false;
1279 ListCell *lc;
1280
1281 /*
1282 * Store a directory entry in the tar file so we can get the
1283 * permissions right.
1284 */
1285 size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
1286 sizeonly);
1287
1288 /*
1289 * Call ourselves recursively for a directory, unless it happens
1290 * to be a separate tablespace located within PGDATA.
1291 */
1292 foreach(lc, tablespaces)
1293 {
1294 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1295
1296 /*
1297 * ti->rpath is the tablespace relative path within PGDATA, or
1298 * NULL if the tablespace has been properly located somewhere
1299 * else.
1300 *
1301 * Skip past the leading "./" in pathbuf when comparing.
1302 */
1303 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1304 {
1305 skip_this_dir = true;
1306 break;
1307 }
1308 }
1309
1310 /*
1311 * skip sending directories inside pg_tblspc, if not required.
1312 */
1313 if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1314 skip_this_dir = true;
1315
1316 if (!skip_this_dir)
1317 size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
1318 }
1319 else if (S_ISREG(statbuf.st_mode))
1320 {
1321 bool sent = false;
1322
1323 if (!sizeonly)
1324 sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1325 true);
1326
1327 if (sent || sizeonly)
1328 {
1329 /* Add size, rounded up to 512byte block */
1330 size += ((statbuf.st_size + 511) & ~511);
1331 size += 512; /* Size of the header of the file */
1332 }
1333 }
1334 else
1335 ereport(WARNING,
1336 (errmsg("skipping special file \"%s\"", pathbuf)));
1337 }
1338 FreeDir(dir);
1339 return size;
1340 }
1341
1342 /*
1343 * Check if a file should have its checksum validated.
1344 * We validate checksums on files in regular tablespaces
1345 * (including global and default) only, and in those there
1346 * are some files that are explicitly excluded.
1347 */
1348 static bool
is_checksummed_file(const char * fullpath,const char * filename)1349 is_checksummed_file(const char *fullpath, const char *filename)
1350 {
1351 /* Check that the file is in a tablespace */
1352 if (strncmp(fullpath, "./global/", 9) == 0 ||
1353 strncmp(fullpath, "./base/", 7) == 0 ||
1354 strncmp(fullpath, "/", 1) == 0)
1355 {
1356 int excludeIdx;
1357
1358 /* Compare file against noChecksumFiles skip list */
1359 for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++)
1360 {
1361 int cmplen = strlen(noChecksumFiles[excludeIdx].name);
1362
1363 if (!noChecksumFiles[excludeIdx].match_prefix)
1364 cmplen++;
1365 if (strncmp(filename, noChecksumFiles[excludeIdx].name,
1366 cmplen) == 0)
1367 return false;
1368 }
1369
1370 return true;
1371 }
1372 else
1373 return false;
1374 }
1375
1376 /*****
1377 * Functions for handling tar file format
1378 *
1379 * Copied from pg_dump, but modified to work with libpq for sending
1380 */
1381
1382
1383 /*
1384 * Given the member, write the TAR header & send the file.
1385 *
1386 * If 'missing_ok' is true, will not throw an error if the file is not found.
1387 *
1388 * Returns true if the file was successfully sent, false if 'missing_ok',
1389 * and the file did not exist.
1390 */
1391 static bool
sendFile(const char * readfilename,const char * tarfilename,struct stat * statbuf,bool missing_ok)1392 sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
1393 bool missing_ok)
1394 {
1395 FILE *fp;
1396 BlockNumber blkno = 0;
1397 bool block_retry = false;
1398 char buf[TAR_SEND_SIZE];
1399 uint16 checksum;
1400 int checksum_failures = 0;
1401 off_t cnt;
1402 int i;
1403 pgoff_t len = 0;
1404 char *page;
1405 size_t pad;
1406 PageHeader phdr;
1407 int segmentno = 0;
1408 char *segmentpath;
1409 bool verify_checksum = false;
1410
1411 fp = AllocateFile(readfilename, "rb");
1412 if (fp == NULL)
1413 {
1414 if (errno == ENOENT && missing_ok)
1415 return false;
1416 ereport(ERROR,
1417 (errcode_for_file_access(),
1418 errmsg("could not open file \"%s\": %m", readfilename)));
1419 }
1420
1421 _tarWriteHeader(tarfilename, NULL, statbuf, false);
1422
1423 if (!noverify_checksums && DataChecksumsEnabled())
1424 {
1425 char *filename;
1426
1427 /*
1428 * Get the filename (excluding path). As last_dir_separator()
1429 * includes the last directory separator, we chop that off by
1430 * incrementing the pointer.
1431 */
1432 filename = last_dir_separator(readfilename) + 1;
1433
1434 if (is_checksummed_file(readfilename, filename))
1435 {
1436 verify_checksum = true;
1437
1438 /*
1439 * Cut off at the segment boundary (".") to get the segment number
1440 * in order to mix it into the checksum.
1441 */
1442 segmentpath = strstr(filename, ".");
1443 if (segmentpath != NULL)
1444 {
1445 segmentno = atoi(segmentpath + 1);
1446 if (segmentno == 0)
1447 ereport(ERROR,
1448 (errmsg("invalid segment number %d in file \"%s\"",
1449 segmentno, filename)));
1450 }
1451 }
1452 }
1453
1454 while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1455 {
1456 /*
1457 * The checksums are verified at block level, so we iterate over the
1458 * buffer in chunks of BLCKSZ, after making sure that
1459 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1460 * BLCKSZ bytes.
1461 */
1462 Assert(TAR_SEND_SIZE % BLCKSZ == 0);
1463
1464 if (verify_checksum && (cnt % BLCKSZ != 0))
1465 {
1466 ereport(WARNING,
1467 (errmsg("cannot verify checksum in file \"%s\", block "
1468 "%d: read buffer size %d and page size %d "
1469 "differ",
1470 readfilename, blkno, (int) cnt, BLCKSZ)));
1471 verify_checksum = false;
1472 }
1473
1474 if (verify_checksum)
1475 {
1476 for (i = 0; i < cnt / BLCKSZ; i++)
1477 {
1478 page = buf + BLCKSZ * i;
1479
1480 /*
1481 * Only check pages which have not been modified since the
1482 * start of the base backup. Otherwise, they might have been
1483 * written only halfway and the checksum would not be valid.
1484 * However, replaying WAL would reinstate the correct page in
1485 * this case. We also skip completely new pages, since they
1486 * don't have a checksum yet.
1487 */
1488 if (!PageIsNew(page) && PageGetLSN(page) < startptr)
1489 {
1490 checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1491 phdr = (PageHeader) page;
1492 if (phdr->pd_checksum != checksum)
1493 {
1494 /*
1495 * Retry the block on the first failure. It's
1496 * possible that we read the first 4K page of the
1497 * block just before postgres updated the entire block
1498 * so it ends up looking torn to us. We only need to
1499 * retry once because the LSN should be updated to
1500 * something we can ignore on the next pass. If the
1501 * error happens again then it is a true validation
1502 * failure.
1503 */
1504 if (block_retry == false)
1505 {
1506 /* Reread the failed block */
1507 if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1508 {
1509 ereport(ERROR,
1510 (errcode_for_file_access(),
1511 errmsg("could not fseek in file \"%s\": %m",
1512 readfilename)));
1513 }
1514
1515 if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1516 {
1517 /*
1518 * If we hit end-of-file, a concurrent
1519 * truncation must have occurred, so break out
1520 * of this loop just as if the initial fread()
1521 * returned 0. We'll drop through to the same
1522 * code that handles that case. (We must fix
1523 * up cnt first, though.)
1524 */
1525 if (feof(fp))
1526 {
1527 cnt = BLCKSZ * i;
1528 break;
1529 }
1530
1531 ereport(ERROR,
1532 (errcode_for_file_access(),
1533 errmsg("could not reread block %d of file \"%s\": %m",
1534 blkno, readfilename)));
1535 }
1536
1537 if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1538 {
1539 ereport(ERROR,
1540 (errcode_for_file_access(),
1541 errmsg("could not fseek in file \"%s\": %m",
1542 readfilename)));
1543 }
1544
1545 /* Set flag so we know a retry was attempted */
1546 block_retry = true;
1547
1548 /* Reset loop to validate the block again */
1549 i--;
1550 continue;
1551 }
1552
1553 checksum_failures++;
1554
1555 if (checksum_failures <= 5)
1556 ereport(WARNING,
1557 (errmsg("checksum verification failed in "
1558 "file \"%s\", block %d: calculated "
1559 "%X but expected %X",
1560 readfilename, blkno, checksum,
1561 phdr->pd_checksum)));
1562 if (checksum_failures == 5)
1563 ereport(WARNING,
1564 (errmsg("further checksum verification "
1565 "failures in file \"%s\" will not "
1566 "be reported", readfilename)));
1567 }
1568 }
1569 block_retry = false;
1570 blkno++;
1571 }
1572 }
1573
1574 /* Send the chunk as a CopyData message */
1575 if (pq_putmessage('d', buf, cnt))
1576 ereport(ERROR,
1577 (errmsg("base backup could not send data, aborting backup")));
1578
1579 len += cnt;
1580 throttle(cnt);
1581
1582 if (feof(fp) || len >= statbuf->st_size)
1583 {
1584 /*
1585 * Reached end of file. The file could be longer, if it was
1586 * extended while we were sending it, but for a base backup we can
1587 * ignore such extended data. It will be restored from WAL.
1588 */
1589 break;
1590 }
1591 }
1592
1593 CHECK_FREAD_ERROR(fp, readfilename);
1594
1595 /* If the file was truncated while we were sending it, pad it with zeros */
1596 if (len < statbuf->st_size)
1597 {
1598 MemSet(buf, 0, sizeof(buf));
1599 while (len < statbuf->st_size)
1600 {
1601 cnt = Min(sizeof(buf), statbuf->st_size - len);
1602 pq_putmessage('d', buf, cnt);
1603 len += cnt;
1604 throttle(cnt);
1605 }
1606 }
1607
1608 /*
1609 * Pad to 512 byte boundary, per tar format requirements. (This small
1610 * piece of data is probably not worth throttling.)
1611 */
1612 pad = ((len + 511) & ~511) - len;
1613 if (pad > 0)
1614 {
1615 MemSet(buf, 0, pad);
1616 pq_putmessage('d', buf, pad);
1617 }
1618
1619 FreeFile(fp);
1620
1621 if (checksum_failures > 1)
1622 {
1623 ereport(WARNING,
1624 (errmsg("file \"%s\" has a total of %d checksum verification "
1625 "failures", readfilename, checksum_failures)));
1626 }
1627 total_checksum_failures += checksum_failures;
1628
1629 return true;
1630 }
1631
1632
1633 static int64
_tarWriteHeader(const char * filename,const char * linktarget,struct stat * statbuf,bool sizeonly)1634 _tarWriteHeader(const char *filename, const char *linktarget,
1635 struct stat *statbuf, bool sizeonly)
1636 {
1637 char h[512];
1638 enum tarError rc;
1639
1640 if (!sizeonly)
1641 {
1642 rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1643 statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1644 statbuf->st_mtime);
1645
1646 switch (rc)
1647 {
1648 case TAR_OK:
1649 break;
1650 case TAR_NAME_TOO_LONG:
1651 ereport(ERROR,
1652 (errmsg("file name too long for tar format: \"%s\"",
1653 filename)));
1654 break;
1655 case TAR_SYMLINK_TOO_LONG:
1656 ereport(ERROR,
1657 (errmsg("symbolic link target too long for tar format: "
1658 "file name \"%s\", target \"%s\"",
1659 filename, linktarget)));
1660 break;
1661 default:
1662 elog(ERROR, "unrecognized tar error: %d", rc);
1663 }
1664
1665 pq_putmessage('d', h, sizeof(h));
1666 }
1667
1668 return sizeof(h);
1669 }
1670
1671 /*
1672 * Write tar header for a directory. If the entry in statbuf is a link then
1673 * write it as a directory anyway.
1674 */
1675 static int64
_tarWriteDir(const char * pathbuf,int basepathlen,struct stat * statbuf,bool sizeonly)1676 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
1677 bool sizeonly)
1678 {
1679 /* If symlink, write it as a directory anyway */
1680 #ifndef WIN32
1681 if (S_ISLNK(statbuf->st_mode))
1682 #else
1683 if (pgwin32_is_junction(pathbuf))
1684 #endif
1685 statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1686
1687 return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
1688 }
1689
1690 /*
1691 * Increment the network transfer counter by the given number of bytes,
1692 * and sleep if necessary to comply with the requested network transfer
1693 * rate.
1694 */
1695 static void
throttle(size_t increment)1696 throttle(size_t increment)
1697 {
1698 TimeOffset elapsed_min;
1699
1700 if (throttling_counter < 0)
1701 return;
1702
1703 throttling_counter += increment;
1704 if (throttling_counter < throttling_sample)
1705 return;
1706
1707 /* How much time should have elapsed at minimum? */
1708 elapsed_min = elapsed_min_unit *
1709 (throttling_counter / throttling_sample);
1710
1711 /*
1712 * Since the latch could be set repeatedly because of concurrently WAL
1713 * activity, sleep in a loop to ensure enough time has passed.
1714 */
1715 for (;;)
1716 {
1717 TimeOffset elapsed,
1718 sleep;
1719 int wait_result;
1720
1721 /* Time elapsed since the last measurement (and possible wake up). */
1722 elapsed = GetCurrentTimestamp() - throttled_last;
1723
1724 /* sleep if the transfer is faster than it should be */
1725 sleep = elapsed_min - elapsed;
1726 if (sleep <= 0)
1727 break;
1728
1729 ResetLatch(MyLatch);
1730
1731 /* We're eating a potentially set latch, so check for interrupts */
1732 CHECK_FOR_INTERRUPTS();
1733
1734 /*
1735 * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1736 * the maximum time to sleep. Thus the cast to long is safe.
1737 */
1738 wait_result = WaitLatch(MyLatch,
1739 WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
1740 (long) (sleep / 1000),
1741 WAIT_EVENT_BASE_BACKUP_THROTTLE);
1742
1743 if (wait_result & WL_LATCH_SET)
1744 CHECK_FOR_INTERRUPTS();
1745
1746 /* Done waiting? */
1747 if (wait_result & WL_TIMEOUT)
1748 break;
1749 }
1750
1751 /*
1752 * As we work with integers, only whole multiple of throttling_sample was
1753 * processed. The rest will be done during the next call of this function.
1754 */
1755 throttling_counter %= throttling_sample;
1756
1757 /*
1758 * Time interval for the remaining amount and possible next increments
1759 * starts now.
1760 */
1761 throttled_last = GetCurrentTimestamp();
1762 }
1763