1 /***********************************************************************************************************************************
2 Backup Command
3 ***********************************************************************************************************************************/
4 #include "build.auto.h"
5
6 #include <string.h>
7 #include <sys/stat.h>
8 #include <time.h>
9 #include <unistd.h>
10
11 #include "command/archive/common.h"
12 #include "command/control/common.h"
13 #include "command/backup/backup.h"
14 #include "command/backup/common.h"
15 #include "command/backup/file.h"
16 #include "command/backup/protocol.h"
17 #include "command/check/common.h"
18 #include "command/stanza/common.h"
19 #include "common/crypto/cipherBlock.h"
20 #include "common/compress/helper.h"
21 #include "common/debug.h"
22 #include "common/io/filter/size.h"
23 #include "common/log.h"
24 #include "common/time.h"
25 #include "common/type/convert.h"
26 #include "common/type/json.h"
27 #include "config/config.h"
28 #include "db/helper.h"
29 #include "info/infoArchive.h"
30 #include "info/infoBackup.h"
31 #include "info/manifest.h"
32 #include "postgres/interface.h"
33 #include "postgres/version.h"
34 #include "protocol/helper.h"
35 #include "protocol/parallel.h"
36 #include "storage/helper.h"
37 #include "version.h"
38
39 /**********************************************************************************************************************************
40 Generate a unique backup label that does not contain a timestamp from a previous backup
41 ***********************************************************************************************************************************/
42 static String *
backupLabelCreate(BackupType type,const String * backupLabelPrior,time_t timestamp)43 backupLabelCreate(BackupType type, const String *backupLabelPrior, time_t timestamp)
44 {
45 FUNCTION_LOG_BEGIN(logLevelTrace);
46 FUNCTION_LOG_PARAM(STRING_ID, type);
47 FUNCTION_LOG_PARAM(STRING, backupLabelPrior);
48 FUNCTION_LOG_PARAM(TIME, timestamp);
49 FUNCTION_LOG_END();
50
51 ASSERT((type == backupTypeFull && backupLabelPrior == NULL) || (type != backupTypeFull && backupLabelPrior != NULL));
52 ASSERT(timestamp > 0);
53
54 String *result = NULL;
55
56 MEM_CONTEXT_TEMP_BEGIN()
57 {
58 const String *backupLabelLatest = NULL;
59
60 // Get the newest backup
61 const StringList *backupList = strLstSort(
62 storageListP(
63 storageRepo(), STRDEF(STORAGE_REPO_BACKUP),
64 .expression = backupRegExpP(.full = true, .differential = true, .incremental = true)),
65 sortOrderDesc);
66
67 if (!strLstEmpty(backupList))
68 backupLabelLatest = strLstGet(backupList, 0);
69
70 // Get the newest history
71 const StringList *historyYearList = strLstSort(
72 storageListP(storageRepo(), STRDEF(STORAGE_REPO_BACKUP "/" BACKUP_PATH_HISTORY), .expression = STRDEF("^2[0-9]{3}$")),
73 sortOrderDesc);
74
75 if (!strLstEmpty(historyYearList))
76 {
77 const StringList *historyList = strLstSort(
78 storageListP(
79 storageRepo(),
80 strNewFmt(STORAGE_REPO_BACKUP "/" BACKUP_PATH_HISTORY "/%s", strZ(strLstGet(historyYearList, 0))),
81 .expression = strNewFmt(
82 "%s\\.manifest\\.%s$",
83 strZ(backupRegExpP(.full = true, .differential = true, .incremental = true, .noAnchorEnd = true)),
84 strZ(compressTypeStr(compressTypeGz)))),
85 sortOrderDesc);
86
87 if (!strLstEmpty(historyList))
88 {
89 const String *historyLabelLatest = strLstGet(historyList, 0);
90
91 if (backupLabelLatest == NULL || strCmp(historyLabelLatest, backupLabelLatest) > 0)
92 backupLabelLatest = historyLabelLatest;
93 }
94 }
95
96 // Now that we have the latest label check if the provided timestamp will give us an even later label
97 result = backupLabelFormat(type, backupLabelPrior, timestamp);
98
99 if (backupLabelLatest != NULL && strCmp(result, backupLabelLatest) <= 0)
100 {
101 // If that didn't give us a later label then add one second. It's possible that two backups (they would need to be
102 // offline or halted online) have run very close together.
103 result = backupLabelFormat(type, backupLabelPrior, timestamp + 1);
104
105 // If the label is still not latest then error. There is probably a timezone change or massive clock skew.
106 if (strCmp(result, backupLabelLatest) <= 0)
107 {
108 THROW_FMT(
109 FormatError,
110 "new backup label '%s' is not later than latest backup label '%s'\n"
111 "HINT: has the timezone changed?\n"
112 "HINT: is there clock skew?",
113 strZ(result), strZ(backupLabelLatest));
114 }
115
116 // If adding a second worked then sleep the remainder of the current second so we don't start early
117 sleepMSec(MSEC_PER_SEC - (timeMSec() % MSEC_PER_SEC));
118 }
119
120 MEM_CONTEXT_PRIOR_BEGIN()
121 {
122 result = strDup(result);
123 }
124 MEM_CONTEXT_PRIOR_END();
125 }
126 MEM_CONTEXT_TEMP_END();
127
128 FUNCTION_LOG_RETURN(STRING, result);
129 }
130
131 /***********************************************************************************************************************************
132 Get the postgres database and storage objects
133 ***********************************************************************************************************************************/
134 #define FUNCTION_LOG_BACKUP_DATA_TYPE \
135 BackupData *
136 #define FUNCTION_LOG_BACKUP_DATA_FORMAT(value, buffer, bufferSize) \
137 objToLog(value, "BackupData", buffer, bufferSize)
138
139 typedef struct BackupData
140 {
141 unsigned int pgIdxPrimary; // cfgOptGrpPg index of the primary
142 Db *dbPrimary; // Database connection to the primary
143 const Storage *storagePrimary; // Storage object for the primary
144 const String *hostPrimary; // Host name of the primary
145
146 unsigned int pgIdxStandby; // cfgOptGrpPg index of the standby
147 Db *dbStandby; // Database connection to the standby
148 const Storage *storageStandby; // Storage object for the standby
149 const String *hostStandby; // Host name of the standby
150
151 unsigned int version; // PostgreSQL version
152 unsigned int walSegmentSize; // PostgreSQL wal segment size
153 } BackupData;
154
155 static BackupData *
backupInit(const InfoBackup * infoBackup)156 backupInit(const InfoBackup *infoBackup)
157 {
158 FUNCTION_LOG_BEGIN(logLevelDebug);
159 FUNCTION_LOG_PARAM(INFO_BACKUP, infoBackup);
160 FUNCTION_LOG_END();
161
162 ASSERT(infoBackup != NULL);
163
164 // Initialize for offline backup
165 BackupData *result = memNew(sizeof(BackupData));
166 *result = (BackupData){0};
167
168 // Check that the PostgreSQL version supports backup from standby. The check is done using the stanza info because pg_control
169 // cannot be loaded until a primary is found -- which will also lead to an error if the version does not support standby. If the
170 // pg_control version does not match the stanza version then there will be an error further down.
171 InfoPgData infoPg = infoPgDataCurrent(infoBackupPg(infoBackup));
172
173 if (cfgOptionBool(cfgOptOnline) && cfgOptionBool(cfgOptBackupStandby) && infoPg.version < PG_VERSION_BACKUP_STANDBY)
174 {
175 THROW_FMT(
176 ConfigError, "option '" CFGOPT_BACKUP_STANDBY "' not valid for " PG_NAME " < %s",
177 strZ(pgVersionToStr(PG_VERSION_BACKUP_STANDBY)));
178 }
179
180 // Don't allow backup from standby when offline
181 if (!cfgOptionBool(cfgOptOnline) && cfgOptionBool(cfgOptBackupStandby))
182 {
183 LOG_WARN(
184 "option " CFGOPT_BACKUP_STANDBY " is enabled but backup is offline - backups will be performed from the primary");
185 cfgOptionSet(cfgOptBackupStandby, cfgSourceParam, BOOL_FALSE_VAR);
186 }
187
188 // Get database info when online
189 if (cfgOptionBool(cfgOptOnline))
190 {
191 bool backupStandby = cfgOptionBool(cfgOptBackupStandby);
192 DbGetResult dbInfo = dbGet(!backupStandby, true, backupStandby);
193
194 result->pgIdxPrimary = dbInfo.primaryIdx;
195 result->dbPrimary = dbInfo.primary;
196
197 if (backupStandby)
198 {
199 ASSERT(dbInfo.standby != NULL);
200
201 result->pgIdxStandby = dbInfo.standbyIdx;
202 result->dbStandby = dbInfo.standby;
203 result->storageStandby = storagePgIdx(result->pgIdxStandby);
204 result->hostStandby = cfgOptionIdxStrNull(cfgOptPgHost, result->pgIdxStandby);
205 }
206 }
207
208 // Add primary info
209 result->storagePrimary = storagePgIdx(result->pgIdxPrimary);
210 result->hostPrimary = cfgOptionIdxStrNull(cfgOptPgHost, result->pgIdxPrimary);
211
212 // Get pg_control info from the primary
213 PgControl pgControl = pgControlFromFile(result->storagePrimary);
214
215 result->version = pgControl.version;
216 result->walSegmentSize = pgControl.walSegmentSize;
217
218 // Validate pg_control info against the stanza
219 if (result->version != infoPg.version || pgControl.systemId != infoPg.systemId)
220 {
221 THROW_FMT(
222 BackupMismatchError,
223 PG_NAME " version %s, system-id %" PRIu64 " do not match stanza version %s, system-id %" PRIu64 "\n"
224 "HINT: is this the correct stanza?", strZ(pgVersionToStr(pgControl.version)), pgControl.systemId,
225 strZ(pgVersionToStr(infoPg.version)), infoPg.systemId);
226 }
227
228 // Only allow stop auto in PostgreSQL >= 9.3 and <= 9.5
229 if (cfgOptionBool(cfgOptStopAuto) && result->version < PG_VERSION_93)
230 {
231 LOG_WARN(CFGOPT_STOP_AUTO " option is only available in " PG_NAME " >= " PG_VERSION_93_STR);
232 cfgOptionSet(cfgOptStopAuto, cfgSourceParam, BOOL_FALSE_VAR);
233 }
234
235 // Only allow start-fast option for PostgreSQL >= 8.4
236 if (cfgOptionBool(cfgOptStartFast) && result->version < PG_VERSION_84)
237 {
238 LOG_WARN(CFGOPT_START_FAST " option is only available in " PG_NAME " >= " PG_VERSION_84_STR);
239 cfgOptionSet(cfgOptStartFast, cfgSourceParam, BOOL_FALSE_VAR);
240 }
241
242 // If checksum page is not explicity set then automatically enable it when checksums are available
243 if (!cfgOptionTest(cfgOptChecksumPage))
244 {
245 // If online then use the value in pg_control to set checksum-page
246 if (cfgOptionBool(cfgOptOnline))
247 {
248 cfgOptionSet(cfgOptChecksumPage, cfgSourceParam, VARBOOL(pgControl.pageChecksum));
249 }
250 // Else set to false. An offline cluster is likely to have false positives so better if the user enables manually.
251 else
252 cfgOptionSet(cfgOptChecksumPage, cfgSourceParam, BOOL_FALSE_VAR);
253 }
254 // Else if checksums have been explicitly enabled but are not available then warn and reset. ??? We should be able to make this
255 // determination when offline as well, but the integration tests don't write pg_control accurately enough to support it.
256 else if (cfgOptionBool(cfgOptOnline) && !pgControl.pageChecksum && cfgOptionBool(cfgOptChecksumPage))
257 {
258 LOG_WARN(CFGOPT_CHECKSUM_PAGE " option set to true but checksums are not enabled on the cluster, resetting to false");
259 cfgOptionSet(cfgOptChecksumPage, cfgSourceParam, BOOL_FALSE_VAR);
260 }
261
262 FUNCTION_LOG_RETURN(BACKUP_DATA, result);
263 }
264
265 /**********************************************************************************************************************************
266 Get time from the database or locally depending on online
267 ***********************************************************************************************************************************/
268 static time_t
backupTime(BackupData * backupData,bool waitRemainder)269 backupTime(BackupData *backupData, bool waitRemainder)
270 {
271 FUNCTION_LOG_BEGIN(logLevelDebug);
272 FUNCTION_LOG_PARAM(BACKUP_DATA, backupData);
273 FUNCTION_LOG_PARAM(BOOL, waitRemainder);
274 FUNCTION_LOG_END();
275
276 // Offline backups will just grab the time from the local system since the value of copyStart is not important in this context.
277 // No worries about causing a delta backup since switching online will do that anyway.
278 time_t result = time(NULL);
279
280 // When online get the time from the database server
281 if (cfgOptionBool(cfgOptOnline))
282 {
283 // Get time from the database
284 TimeMSec timeMSec = dbTimeMSec(backupData->dbPrimary);
285 result = (time_t)(timeMSec / MSEC_PER_SEC);
286
287 // Sleep the remainder of the second when requested (this is so copyStart is not subject to one second resolution issues)
288 if (waitRemainder)
289 {
290 unsigned int retry = 0;
291
292 // Just to be safe we'll loop until PostgreSQL reports that we have slept long enough
293 do
294 {
295 // Error if the clock has not advanced after several attempts
296 if (retry == 3)
297 THROW_FMT(KernelError, PG_NAME " clock has not advanced to the next second after %u tries", retry);
298
299 // Sleep remainder of current second
300 sleepMSec(((TimeMSec)(result + 1) * MSEC_PER_SEC) - timeMSec);
301
302 // Check time again to be sure we slept long enough
303 timeMSec = dbTimeMSec(backupData->dbPrimary);
304
305 // Increment retry to prevent an infinite loop
306 retry++;
307 }
308 while ((time_t)(timeMSec / MSEC_PER_SEC) <= result);
309 }
310 }
311
312 FUNCTION_LOG_RETURN(TIME, result);
313 }
314
315 /***********************************************************************************************************************************
316 Create an incremental backup if type is not full and a compatible prior backup exists
317 ***********************************************************************************************************************************/
318 // Helper to find a compatible prior backup
319 static Manifest *
backupBuildIncrPrior(const InfoBackup * infoBackup)320 backupBuildIncrPrior(const InfoBackup *infoBackup)
321 {
322 FUNCTION_LOG_BEGIN(logLevelDebug);
323 FUNCTION_LOG_PARAM(INFO_BACKUP, infoBackup);
324 FUNCTION_LOG_END();
325
326 ASSERT(infoBackup != NULL);
327
328 Manifest *result = NULL;
329
330 // No incremental if backup type is full
331 BackupType type = (BackupType)cfgOptionStrId(cfgOptType);
332
333 if (type != backupTypeFull)
334 {
335 MEM_CONTEXT_TEMP_BEGIN()
336 {
337 InfoPgData infoPg = infoPgDataCurrent(infoBackupPg(infoBackup));
338 const String *backupLabelPrior = NULL;
339 unsigned int backupTotal = infoBackupDataTotal(infoBackup);
340
341 for (unsigned int backupIdx = backupTotal - 1; backupIdx < backupTotal; backupIdx--)
342 {
343 InfoBackupData backupPrior = infoBackupData(infoBackup, backupIdx);
344
345 // The prior backup for a diff must be full
346 if (type == backupTypeDiff && backupPrior.backupType != backupTypeFull)
347 continue;
348
349 // The backups must come from the same cluster ??? This should enable delta instead
350 if (infoPg.id != backupPrior.backupPgId)
351 continue;
352
353 // This backup is a candidate for prior
354 backupLabelPrior = strDup(backupPrior.backupLabel);
355 break;
356 }
357
358 // If there is a prior backup then check that options for the new backup are compatible
359 if (backupLabelPrior != NULL)
360 {
361 result = manifestLoadFile(
362 storageRepo(), strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupLabelPrior)),
363 cfgOptionStrId(cfgOptRepoCipherType), infoPgCipherPass(infoBackupPg(infoBackup)));
364 const ManifestData *manifestPriorData = manifestData(result);
365
366 LOG_INFO_FMT(
367 "last backup label = %s, version = %s", strZ(manifestData(result)->backupLabel),
368 strZ(manifestData(result)->backrestVersion));
369
370 // Warn if compress-type option changed
371 if (compressTypeEnum(cfgOptionStr(cfgOptCompressType)) != manifestPriorData->backupOptionCompressType)
372 {
373 LOG_WARN_FMT(
374 "%s backup cannot alter " CFGOPT_COMPRESS_TYPE " option to '%s', reset to value in %s",
375 strZ(cfgOptionDisplay(cfgOptType)), strZ(cfgOptionDisplay(cfgOptCompressType)), strZ(backupLabelPrior));
376
377 // Set the compression type back to whatever was in the prior backup. This is not strictly needed since we
378 // could store compression type on a per file basis, but it seems simplest and safest for now.
379 cfgOptionSet(
380 cfgOptCompressType, cfgSourceParam, VARSTR(compressTypeStr(manifestPriorData->backupOptionCompressType)));
381
382 // There's a small chance that the prior manifest is old enough that backupOptionCompressLevel was not recorded.
383 // There's an even smaller chance that the user will also alter compression-type in this scenario right after
384 // upgrading to a newer version. Because we judge this combination of events to be nearly impossible just assert
385 // here so no test coverage is needed.
386 CHECK(manifestPriorData->backupOptionCompressLevel != NULL);
387
388 // Set the compression level back to whatever was in the prior backup
389 cfgOptionSet(cfgOptCompressLevel, cfgSourceParam, manifestPriorData->backupOptionCompressLevel);
390 }
391
392 // Warn if hardlink option changed ??? Doesn't seem like this is needed? Hardlinks are always to a directory that
393 // is guaranteed to contain a real file -- like references. Also annoying that if the full backup was not
394 // hardlinked then an diff/incr can't be used because we need more testing.
395 if (cfgOptionBool(cfgOptRepoHardlink) != manifestPriorData->backupOptionHardLink)
396 {
397 LOG_WARN_FMT(
398 "%s backup cannot alter hardlink option to '%s', reset to value in %s",
399 strZ(cfgOptionDisplay(cfgOptType)), strZ(cfgOptionDisplay(cfgOptRepoHardlink)), strZ(backupLabelPrior));
400 cfgOptionSet(cfgOptRepoHardlink, cfgSourceParam, VARBOOL(manifestPriorData->backupOptionHardLink));
401 }
402
403 // If not defined this backup was done in a version prior to page checksums being introduced. Just set
404 // checksum-page to false and move on without a warning. Page checksums will start on the next full backup.
405 if (manifestData(result)->backupOptionChecksumPage == NULL)
406 {
407 cfgOptionSet(cfgOptChecksumPage, cfgSourceParam, BOOL_FALSE_VAR);
408 }
409 // Don't allow the checksum-page option to change in a diff or incr backup. This could be confusing as only
410 // certain files would be checksummed and the list could be incomplete during reporting.
411 else
412 {
413 bool checksumPagePrior = varBool(manifestData(result)->backupOptionChecksumPage);
414
415 // Warn if an incompatible setting was explicitly requested
416 if (checksumPagePrior != cfgOptionBool(cfgOptChecksumPage))
417 {
418 LOG_WARN_FMT(
419 "%s backup cannot alter '" CFGOPT_CHECKSUM_PAGE "' option to '%s', reset to '%s' from %s",
420 strZ(cfgOptionDisplay(cfgOptType)), strZ(cfgOptionDisplay(cfgOptChecksumPage)),
421 cvtBoolToConstZ(checksumPagePrior), strZ(manifestData(result)->backupLabel));
422 }
423
424 cfgOptionSet(cfgOptChecksumPage, cfgSourceParam, VARBOOL(checksumPagePrior));
425 }
426
427 manifestMove(result, memContextPrior());
428 }
429 else
430 {
431 LOG_WARN_FMT("no prior backup exists, %s backup has been changed to full", strZ(cfgOptionDisplay(cfgOptType)));
432 cfgOptionSet(cfgOptType, cfgSourceParam, VARSTR(strIdToStr(backupTypeFull)));
433 }
434 }
435 MEM_CONTEXT_TEMP_END();
436 }
437
438 FUNCTION_LOG_RETURN(MANIFEST, result);
439 }
440
441 static bool
backupBuildIncr(const InfoBackup * infoBackup,Manifest * manifest,Manifest * manifestPrior,const String * archiveStart)442 backupBuildIncr(const InfoBackup *infoBackup, Manifest *manifest, Manifest *manifestPrior, const String *archiveStart)
443 {
444 FUNCTION_LOG_BEGIN(logLevelDebug);
445 FUNCTION_LOG_PARAM(INFO_BACKUP, infoBackup);
446 FUNCTION_LOG_PARAM(MANIFEST, manifest);
447 FUNCTION_LOG_PARAM(MANIFEST, manifestPrior);
448 FUNCTION_LOG_PARAM(STRING, archiveStart);
449 FUNCTION_LOG_END();
450
451 ASSERT(infoBackup != NULL);
452 ASSERT(manifest != NULL);
453
454 bool result = false;
455
456 // No incremental if no prior manifest
457 if (manifestPrior != NULL)
458 {
459 MEM_CONTEXT_TEMP_BEGIN()
460 {
461 // Move the manifest to this context so it will be freed when we are done
462 manifestMove(manifestPrior, MEM_CONTEXT_TEMP());
463
464 // Build incremental manifest
465 manifestBuildIncr(manifest, manifestPrior, (BackupType)cfgOptionStrId(cfgOptType), archiveStart);
466
467 // Set the cipher subpass from prior manifest since we want a single subpass for the entire backup set
468 manifestCipherSubPassSet(manifest, manifestCipherSubPass(manifestPrior));
469
470 // Incremental was built
471 result = true;
472 }
473 MEM_CONTEXT_TEMP_END();
474 }
475
476 FUNCTION_LOG_RETURN(BOOL, result);
477 }
478
479 /***********************************************************************************************************************************
480 Check for a backup that can be resumed and merge into the manifest if found
481 ***********************************************************************************************************************************/
482 typedef struct BackupResumeData
483 {
484 Manifest *manifest; // New manifest
485 const Manifest *manifestResume; // Resumed manifest
486 const CompressType compressType; // Backup compression type
487 const bool delta; // Is this a delta backup?
488 const String *backupPath; // Path to the current level of the backup being cleaned
489 const String *manifestParentName; // Parent manifest name used to construct manifest name
490 } BackupResumeData;
491
492 // Callback to clean invalid paths/files/links out of the resumable backup path
backupResumeCallback(void * data,const StorageInfo * info)493 void backupResumeCallback(void *data, const StorageInfo *info)
494 {
495 FUNCTION_TEST_BEGIN();
496 FUNCTION_TEST_PARAM_P(VOID, data);
497 FUNCTION_TEST_PARAM(STORAGE_INFO, *storageInfo);
498 FUNCTION_TEST_END();
499
500 ASSERT(data != NULL);
501 ASSERT(info != NULL);
502
503 BackupResumeData *resumeData = data;
504
505 // Skip all . paths because they have already been handled on the previous level of recursion
506 if (strEq(info->name, DOT_STR))
507 {
508 FUNCTION_TEST_RETURN_VOID();
509 return;
510 }
511
512 // Skip backup.manifest.copy -- it must be preserved to allow resume again if this process throws an error before writing the
513 // manifest for the first time
514 if (resumeData->manifestParentName == NULL && strEqZ(info->name, BACKUP_MANIFEST_FILE INFO_COPY_EXT))
515 {
516 FUNCTION_TEST_RETURN_VOID();
517 return;
518 }
519
520 // Build the name used to lookup files in the manifest
521 const String *manifestName = resumeData->manifestParentName != NULL ?
522 strNewFmt("%s/%s", strZ(resumeData->manifestParentName), strZ(info->name)) : info->name;
523
524 // Build the backup path used to remove files/links/paths that are invalid
525 const String *backupPath = strNewFmt("%s/%s", strZ(resumeData->backupPath), strZ(info->name));
526
527 // Process file types
528 switch (info->type)
529 {
530 // Check paths
531 // -------------------------------------------------------------------------------------------------------------------------
532 case storageTypePath:
533 {
534 // If the path was not found in the new manifest then remove it
535 if (manifestPathFindDefault(resumeData->manifest, manifestName, NULL) == NULL)
536 {
537 LOG_DETAIL_FMT("remove path '%s' from resumed backup", strZ(storagePathP(storageRepo(), backupPath)));
538 storagePathRemoveP(storageRepoWrite(), backupPath, .recurse = true);
539 }
540 // Else recurse into the path
541 {
542 BackupResumeData resumeDataSub = *resumeData;
543 resumeDataSub.manifestParentName = manifestName;
544 resumeDataSub.backupPath = backupPath;
545
546 storageInfoListP(
547 storageRepo(), resumeDataSub.backupPath, backupResumeCallback, &resumeDataSub, .sortOrder = sortOrderAsc);
548 }
549
550 break;
551 }
552
553 // Check files
554 // -------------------------------------------------------------------------------------------------------------------------
555 case storageTypeFile:
556 {
557 // If the file is compressed then strip off the extension before doing the lookup
558 CompressType fileCompressType = compressTypeFromName(manifestName);
559
560 if (fileCompressType != compressTypeNone)
561 manifestName = compressExtStrip(manifestName, fileCompressType);
562
563 // Find the file in both manifests
564 const ManifestFile *file = manifestFileFindDefault(resumeData->manifest, manifestName, NULL);
565 const ManifestFile *fileResume = manifestFileFindDefault(resumeData->manifestResume, manifestName, NULL);
566
567 // Check if the file can be resumed or must be removed
568 const char *removeReason = NULL;
569
570 if (fileCompressType != resumeData->compressType)
571 removeReason = "mismatched compression type";
572 else if (file == NULL)
573 removeReason = "missing in manifest";
574 else if (file->reference != NULL)
575 removeReason = "reference in manifest";
576 else if (fileResume == NULL)
577 removeReason = "missing in resumed manifest";
578 else if (fileResume->reference != NULL)
579 removeReason = "reference in resumed manifest";
580 else if (fileResume->checksumSha1[0] == '\0')
581 removeReason = "no checksum in resumed manifest";
582 else if (file->size != fileResume->size)
583 removeReason = "mismatched size";
584 else if (!resumeData->delta && file->timestamp != fileResume->timestamp)
585 removeReason = "mismatched timestamp";
586 else if (file->size == 0)
587 // ??? don't resume zero size files because Perl wouldn't -- this can be removed after the migration)
588 removeReason = "zero size";
589 else
590 {
591 manifestFileUpdate(
592 resumeData->manifest, manifestName, file->size, fileResume->sizeRepo, fileResume->checksumSha1, NULL,
593 fileResume->checksumPage, fileResume->checksumPageError, fileResume->checksumPageErrorList);
594 }
595
596 // Remove the file if it could not be resumed
597 if (removeReason != NULL)
598 {
599 LOG_DETAIL_FMT(
600 "remove file '%s' from resumed backup (%s)", strZ(storagePathP(storageRepo(), backupPath)), removeReason);
601 storageRemoveP(storageRepoWrite(), backupPath);
602 }
603
604 break;
605 }
606
607 // Remove links. We could check that the link has not changed and preserve it but it doesn't seem worth the extra testing.
608 // The link will be recreated during the backup if needed.
609 // -------------------------------------------------------------------------------------------------------------------------
610 case storageTypeLink:
611 storageRemoveP(storageRepoWrite(), backupPath);
612 break;
613
614 // Remove special files
615 // -------------------------------------------------------------------------------------------------------------------------
616 case storageTypeSpecial:
617 LOG_WARN_FMT("remove special file '%s' from resumed backup", strZ(storagePathP(storageRepo(), backupPath)));
618 storageRemoveP(storageRepoWrite(), backupPath);
619 break;
620 }
621
622 FUNCTION_TEST_RETURN_VOID();
623 }
624
625 // Helper to find a resumable backup
626 static const Manifest *
backupResumeFind(const Manifest * manifest,const String * cipherPassBackup)627 backupResumeFind(const Manifest *manifest, const String *cipherPassBackup)
628 {
629 FUNCTION_LOG_BEGIN(logLevelDebug);
630 FUNCTION_LOG_PARAM(MANIFEST, manifest);
631 FUNCTION_TEST_PARAM(STRING, cipherPassBackup);
632 FUNCTION_LOG_END();
633
634 ASSERT(manifest != NULL);
635
636 Manifest *result = NULL;
637
638 MEM_CONTEXT_TEMP_BEGIN()
639 {
640 // Only the last backup can be resumed
641 const StringList *backupList = strLstSort(
642 storageListP(
643 storageRepo(), STRDEF(STORAGE_REPO_BACKUP),
644 .expression = backupRegExpP(.full = true, .differential = true, .incremental = true)),
645 sortOrderDesc);
646
647 if (!strLstEmpty(backupList))
648 {
649 const String *backupLabel = strLstGet(backupList, 0);
650 const String *manifestFile = strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupLabel));
651
652 // Resumable backups do not have backup.manifest
653 if (!storageExistsP(storageRepo(), manifestFile))
654 {
655 bool usable = false;
656 const String *reason = STRDEF("partially deleted by prior resume or invalid");
657 Manifest *manifestResume = NULL;
658
659 // Resumable backups must have backup.manifest.copy
660 if (storageExistsP(storageRepo(), strNewFmt("%s" INFO_COPY_EXT, strZ(manifestFile))))
661 {
662 reason = STRDEF("resume is disabled");
663
664 // Attempt to read the manifest file in the resumable backup to see if it can be used. If any error at all
665 // occurs then the backup will be considered unusable and a resume will not be attempted.
666 if (cfgOptionBool(cfgOptResume))
667 {
668 reason = strNewFmt("unable to read %s" INFO_COPY_EXT, strZ(manifestFile));
669
670 TRY_BEGIN()
671 {
672 manifestResume = manifestLoadFile(
673 storageRepo(), manifestFile, cfgOptionStrId(cfgOptRepoCipherType), cipherPassBackup);
674 const ManifestData *manifestResumeData = manifestData(manifestResume);
675
676 // Check pgBackRest version. This allows the resume implementation to be changed with each version of
677 // pgBackRest at the expense of users losing a resumable back after an upgrade, which seems worth the
678 // cost.
679 if (!strEq(manifestResumeData->backrestVersion, manifestData(manifest)->backrestVersion))
680 {
681 reason = strNewFmt(
682 "new " PROJECT_NAME " version '%s' does not match resumable " PROJECT_NAME " version '%s'",
683 strZ(manifestData(manifest)->backrestVersion), strZ(manifestResumeData->backrestVersion));
684 }
685 // Check backup type because new backup label must be the same type as resume backup label
686 else if (manifestResumeData->backupType != cfgOptionStrId(cfgOptType))
687 {
688 reason = strNewFmt(
689 "new backup type '%s' does not match resumable backup type '%s'",
690 strZ(cfgOptionDisplay(cfgOptType)), strZ(strIdToStr(manifestResumeData->backupType)));
691 }
692 // Check prior backup label ??? Do we really care about the prior backup label?
693 else if (!strEq(manifestResumeData->backupLabelPrior, manifestData(manifest)->backupLabelPrior))
694 {
695 reason = strNewFmt(
696 "new prior backup label '%s' does not match resumable prior backup label '%s'",
697 manifestResumeData->backupLabelPrior ? strZ(manifestResumeData->backupLabelPrior) : "<undef>",
698 manifestData(manifest)->backupLabelPrior ?
699 strZ(manifestData(manifest)->backupLabelPrior) : "<undef>");
700 }
701 // Check compression. Compression can't be changed between backups so resume won't work either.
702 else if (
703 manifestResumeData->backupOptionCompressType != compressTypeEnum(cfgOptionStr(cfgOptCompressType)))
704 {
705 reason = strNewFmt(
706 "new compression '%s' does not match resumable compression '%s'",
707 strZ(cfgOptionDisplay(cfgOptCompressType)),
708 strZ(compressTypeStr(manifestResumeData->backupOptionCompressType)));
709 }
710 else
711 usable = true;
712 }
713 CATCH_ANY()
714 {
715 }
716 TRY_END();
717 }
718 }
719
720 // If the backup is usable then return the manifest
721 if (usable)
722 {
723 result = manifestMove(manifestResume, memContextPrior());
724 }
725 // Else warn and remove the unusable backup
726 else
727 {
728 LOG_WARN_FMT("backup '%s' cannot be resumed: %s", strZ(backupLabel), strZ(reason));
729
730 storagePathRemoveP(
731 storageRepoWrite(), strNewFmt(STORAGE_REPO_BACKUP "/%s", strZ(backupLabel)), .recurse = true);
732 }
733 }
734 }
735 }
736 MEM_CONTEXT_TEMP_END();
737
738 FUNCTION_LOG_RETURN(MANIFEST, result);
739 }
740
741 static bool
backupResume(Manifest * manifest,const String * cipherPassBackup)742 backupResume(Manifest *manifest, const String *cipherPassBackup)
743 {
744 FUNCTION_LOG_BEGIN(logLevelDebug);
745 FUNCTION_LOG_PARAM(MANIFEST, manifest);
746 FUNCTION_TEST_PARAM(STRING, cipherPassBackup);
747 FUNCTION_LOG_END();
748
749 ASSERT(manifest != NULL);
750
751 bool result = false;
752
753 MEM_CONTEXT_TEMP_BEGIN()
754 {
755 const Manifest *manifestResume = backupResumeFind(manifest, cipherPassBackup);
756
757 // If a resumable backup was found set the label and cipher subpass
758 if (manifestResume)
759 {
760 // Resuming
761 result = true;
762
763 // Set the backup label to the resumed backup
764 manifestBackupLabelSet(manifest, manifestData(manifestResume)->backupLabel);
765
766 LOG_WARN_FMT(
767 "resumable backup %s of same type exists -- remove invalid files and resume",
768 strZ(manifestData(manifest)->backupLabel));
769
770 // If resuming a full backup then copy cipher subpass since it was used to encrypt the resumable files
771 if (manifestData(manifest)->backupType == backupTypeFull)
772 manifestCipherSubPassSet(manifest, manifestCipherSubPass(manifestResume));
773
774 // Clean resumed backup
775 BackupResumeData resumeData =
776 {
777 .manifest = manifest,
778 .manifestResume = manifestResume,
779 .compressType = compressTypeEnum(cfgOptionStr(cfgOptCompressType)),
780 .delta = cfgOptionBool(cfgOptDelta),
781 .backupPath = strNewFmt(STORAGE_REPO_BACKUP "/%s", strZ(manifestData(manifest)->backupLabel)),
782 };
783
784 storageInfoListP(storageRepo(), resumeData.backupPath, backupResumeCallback, &resumeData, .sortOrder = sortOrderAsc);
785 }
786 }
787 MEM_CONTEXT_TEMP_END();
788
789 FUNCTION_LOG_RETURN(BOOL, result);
790 }
791
792 /***********************************************************************************************************************************
793 Start the backup
794 ***********************************************************************************************************************************/
795 typedef struct BackupStartResult
796 {
797 String *lsn;
798 String *walSegmentName;
799 VariantList *dbList;
800 VariantList *tablespaceList;
801 } BackupStartResult;
802
803 static BackupStartResult
backupStart(BackupData * backupData)804 backupStart(BackupData *backupData)
805 {
806 FUNCTION_LOG_BEGIN(logLevelDebug);
807 FUNCTION_LOG_PARAM(BACKUP_DATA, backupData);
808 FUNCTION_LOG_END();
809
810 BackupStartResult result = {.lsn = NULL};
811
812 MEM_CONTEXT_TEMP_BEGIN()
813 {
814 // If this is an offline backup
815 if (!cfgOptionBool(cfgOptOnline))
816 {
817 // Check if Postgres is running and if so only continue when forced
818 if (storageExistsP(backupData->storagePrimary, PG_FILE_POSTMASTERPID_STR))
819 {
820 if (cfgOptionBool(cfgOptForce))
821 {
822 LOG_WARN(
823 "--no-" CFGOPT_ONLINE " passed and " PG_FILE_POSTMASTERPID " exists but --" CFGOPT_FORCE " was passed so"
824 " backup will continue though it looks like " PG_NAME " is running and the backup will probably not be"
825 " consistent");
826 }
827 else
828 {
829 THROW(
830 PgRunningError,
831 "--no-" CFGOPT_ONLINE " passed but " PG_FILE_POSTMASTERPID " exists - looks like " PG_NAME " is running."
832 " Shut down " PG_NAME " and try again, or use --force.");
833 }
834 }
835 }
836 // Else start the backup normally
837 else
838 {
839 // Check database configuration
840 checkDbConfig(backupData->version, backupData->pgIdxPrimary, backupData->dbPrimary, false);
841
842 // Start backup
843 LOG_INFO_FMT(
844 "execute %sexclusive pg_start_backup(): backup begins after the %s checkpoint completes",
845 backupData->version >= PG_VERSION_96 ? "non-" : "",
846 cfgOptionBool(cfgOptStartFast) ? "requested immediate" : "next regular");
847
848 DbBackupStartResult dbBackupStartResult = dbBackupStart(
849 backupData->dbPrimary, cfgOptionBool(cfgOptStartFast), cfgOptionBool(cfgOptStopAuto));
850
851 MEM_CONTEXT_PRIOR_BEGIN()
852 {
853 result.lsn = strDup(dbBackupStartResult.lsn);
854 result.walSegmentName = strDup(dbBackupStartResult.walSegmentName);
855 result.dbList = dbList(backupData->dbPrimary);
856 result.tablespaceList = dbTablespaceList(backupData->dbPrimary);
857 }
858 MEM_CONTEXT_PRIOR_END();
859
860 LOG_INFO_FMT("backup start archive = %s, lsn = %s", strZ(result.walSegmentName), strZ(result.lsn));
861
862 // Wait for replay on the standby to catch up
863 if (cfgOptionBool(cfgOptBackupStandby))
864 {
865 LOG_INFO_FMT("wait for replay on the standby to reach %s", strZ(result.lsn));
866 dbReplayWait(backupData->dbStandby, result.lsn, cfgOptionUInt64(cfgOptArchiveTimeout));
867 LOG_INFO_FMT("replay on the standby reached %s", strZ(result.lsn));
868
869 // The standby db object won't be used anymore so free it
870 dbFree(backupData->dbStandby);
871
872 // The standby protocol connection won't be used anymore so free it
873 protocolRemoteFree(backupData->pgIdxStandby);
874 }
875 }
876 }
877 MEM_CONTEXT_TEMP_END();
878
879 FUNCTION_LOG_RETURN_STRUCT(result);
880 }
881
882 /***********************************************************************************************************************************
883 Stop the backup
884 ***********************************************************************************************************************************/
885 // Helper to write a file from a string to the repository and update the manifest
886 static void
backupFilePut(BackupData * backupData,Manifest * manifest,const String * name,time_t timestamp,const String * content)887 backupFilePut(BackupData *backupData, Manifest *manifest, const String *name, time_t timestamp, const String *content)
888 {
889 FUNCTION_LOG_BEGIN(logLevelDebug);
890 FUNCTION_LOG_PARAM(BACKUP_DATA, backupData);
891 FUNCTION_LOG_PARAM(MANIFEST, manifest);
892 FUNCTION_LOG_PARAM(STRING, name);
893 FUNCTION_LOG_PARAM(TIME, timestamp);
894 FUNCTION_LOG_PARAM(STRING, content);
895 FUNCTION_LOG_END();
896
897 // Skip files with no content
898 if (content != NULL)
899 {
900 MEM_CONTEXT_TEMP_BEGIN()
901 {
902 // Create file
903 const String *manifestName = strNewFmt(MANIFEST_TARGET_PGDATA "/%s", strZ(name));
904 CompressType compressType = compressTypeEnum(cfgOptionStr(cfgOptCompressType));
905
906 StorageWrite *write = storageNewWriteP(
907 storageRepoWrite(),
908 strNewFmt(
909 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(manifestData(manifest)->backupLabel), strZ(manifestName),
910 strZ(compressExtStr(compressType))),
911 .compressible = true);
912
913 IoFilterGroup *filterGroup = ioWriteFilterGroup(storageWriteIo(write));
914
915 // Add SHA1 filter
916 ioFilterGroupAdd(filterGroup, cryptoHashNew(HASH_TYPE_SHA1_STR));
917
918 // Add compression
919 if (compressType != compressTypeNone)
920 {
921 ioFilterGroupAdd(
922 ioWriteFilterGroup(storageWriteIo(write)), compressFilter(compressType, cfgOptionInt(cfgOptCompressLevel)));
923 }
924
925 // Add encryption filter if required
926 cipherBlockFilterGroupAdd(
927 filterGroup, cfgOptionStrId(cfgOptRepoCipherType), cipherModeEncrypt, manifestCipherSubPass(manifest));
928
929 // Add size filter last to calculate repo size
930 ioFilterGroupAdd(filterGroup, ioSizeNew());
931
932 // Write file
933 storagePutP(write, BUFSTR(content));
934
935 // Use base path to set ownership and mode
936 const ManifestPath *basePath = manifestPathFind(manifest, MANIFEST_TARGET_PGDATA_STR);
937
938 // Add to manifest
939 ManifestFile file =
940 {
941 .name = manifestName,
942 .primary = true,
943 .mode = basePath->mode & (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH),
944 .user = basePath->user,
945 .group = basePath->group,
946 .size = strSize(content),
947 .sizeRepo = varUInt64Force(ioFilterGroupResult(filterGroup, SIZE_FILTER_TYPE_STR)),
948 .timestamp = timestamp,
949 };
950
951 memcpy(
952 file.checksumSha1, strZ(varStr(ioFilterGroupResult(filterGroup, CRYPTO_HASH_FILTER_TYPE_STR))),
953 HASH_TYPE_SHA1_SIZE_HEX + 1);
954
955 manifestFileAdd(manifest, &file);
956
957 LOG_DETAIL_FMT("wrote '%s' file returned from pg_stop_backup()", strZ(name));
958 }
959 MEM_CONTEXT_TEMP_END();
960 }
961
962 FUNCTION_LOG_RETURN_VOID();
963 }
964
965 /*--------------------------------------------------------------------------------------------------------------------------------*/
966 typedef struct BackupStopResult
967 {
968 String *lsn;
969 String *walSegmentName;
970 time_t timestamp;
971 } BackupStopResult;
972
973 static BackupStopResult
backupStop(BackupData * backupData,Manifest * manifest)974 backupStop(BackupData *backupData, Manifest *manifest)
975 {
976 FUNCTION_LOG_BEGIN(logLevelDebug);
977 FUNCTION_LOG_PARAM(BACKUP_DATA, backupData);
978 FUNCTION_LOG_PARAM(MANIFEST, manifest);
979 FUNCTION_LOG_END();
980
981 BackupStopResult result = {.lsn = NULL};
982
983 if (cfgOptionBool(cfgOptOnline))
984 {
985 MEM_CONTEXT_TEMP_BEGIN()
986 {
987 // Stop the backup
988 LOG_INFO_FMT(
989 "execute %sexclusive pg_stop_backup() and wait for all WAL segments to archive",
990 backupData->version >= PG_VERSION_96 ? "non-" : "");
991
992 DbBackupStopResult dbBackupStopResult = dbBackupStop(backupData->dbPrimary);
993
994 MEM_CONTEXT_PRIOR_BEGIN()
995 {
996 result.timestamp = backupTime(backupData, false);
997 result.lsn = strDup(dbBackupStopResult.lsn);
998 result.walSegmentName = strDup(dbBackupStopResult.walSegmentName);
999 }
1000 MEM_CONTEXT_PRIOR_END();
1001
1002 LOG_INFO_FMT("backup stop archive = %s, lsn = %s", strZ(result.walSegmentName), strZ(result.lsn));
1003
1004 // Save files returned by stop backup
1005 backupFilePut(backupData, manifest, STRDEF(PG_FILE_BACKUPLABEL), result.timestamp, dbBackupStopResult.backupLabel);
1006 backupFilePut(backupData, manifest, STRDEF(PG_FILE_TABLESPACEMAP), result.timestamp, dbBackupStopResult.tablespaceMap);
1007 }
1008 MEM_CONTEXT_TEMP_END();
1009 }
1010 else
1011 result.timestamp = backupTime(backupData, false);
1012
1013 FUNCTION_LOG_RETURN_STRUCT(result);
1014 }
1015
1016 /***********************************************************************************************************************************
1017 Log the results of a job and throw errors
1018 ***********************************************************************************************************************************/
1019 static uint64_t
backupJobResult(Manifest * manifest,const String * host,const String * const fileName,StringList * fileRemove,ProtocolParallelJob * const job,const uint64_t sizeTotal,uint64_t sizeCopied)1020 backupJobResult(
1021 Manifest *manifest, const String *host, const String *const fileName, StringList *fileRemove, ProtocolParallelJob *const job,
1022 const uint64_t sizeTotal, uint64_t sizeCopied)
1023 {
1024 FUNCTION_LOG_BEGIN(logLevelDebug);
1025 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1026 FUNCTION_LOG_PARAM(STRING, host);
1027 FUNCTION_LOG_PARAM(STRING, fileName);
1028 FUNCTION_LOG_PARAM(STRING_LIST, fileRemove);
1029 FUNCTION_LOG_PARAM(PROTOCOL_PARALLEL_JOB, job);
1030 FUNCTION_LOG_PARAM(UINT64, sizeTotal);
1031 FUNCTION_LOG_PARAM(UINT64, sizeCopied);
1032 FUNCTION_LOG_END();
1033
1034 ASSERT(manifest != NULL);
1035 ASSERT(fileName != NULL);
1036 ASSERT(fileRemove != NULL);
1037 ASSERT(job != NULL);
1038
1039 // The job was successful
1040 if (protocolParallelJobErrorCode(job) == 0)
1041 {
1042 MEM_CONTEXT_TEMP_BEGIN()
1043 {
1044 const ManifestFile *const file = manifestFileFind(manifest, varStr(protocolParallelJobKey(job)));
1045 const unsigned int processId = protocolParallelJobProcessId(job);
1046
1047 PackRead *const jobResult = protocolParallelJobResult(job);
1048 const BackupCopyResult copyResult = (BackupCopyResult)pckReadU32P(jobResult);
1049 const uint64_t copySize = pckReadU64P(jobResult);
1050 const uint64_t repoSize = pckReadU64P(jobResult);
1051 const String *const copyChecksum = pckReadStrP(jobResult);
1052 const KeyValue *const checksumPageResult = varKv(jsonToVar(pckReadStrP(jobResult, .defaultValue = NULL_STR)));
1053
1054 // Increment backup copy progress
1055 sizeCopied += copySize;
1056
1057 // Create log file name
1058 const String *fileLog = host == NULL ? fileName : strNewFmt("%s:%s", strZ(host), strZ(fileName));
1059
1060 // Format log strings
1061 const String *const logProgress =
1062 strNewFmt(
1063 "%s, %" PRIu64 "%%", strZ(strSizeFormat(copySize)), sizeTotal == 0 ? 100 : sizeCopied * 100 / sizeTotal);
1064 const String *const logChecksum = copySize != 0 ? strNewFmt(" checksum %s", strZ(copyChecksum)) : EMPTY_STR;
1065
1066 // If the file is in a prior backup and nothing changed, just log it
1067 if (copyResult == backupCopyResultNoOp)
1068 {
1069 LOG_DETAIL_PID_FMT(
1070 processId, "match file from prior backup %s (%s)%s", strZ(fileLog), strZ(logProgress), strZ(logChecksum));
1071 }
1072 // Else if the repo matched the expect checksum, just log it
1073 else if (copyResult == backupCopyResultChecksum)
1074 {
1075 LOG_DETAIL_PID_FMT(
1076 processId, "checksum resumed file %s (%s)%s", strZ(fileLog), strZ(logProgress), strZ(logChecksum));
1077 }
1078 // Else if the file was removed during backup add it to the list of files to be removed from the manifest when the
1079 // backup is complete. It can't be removed right now because that will invalidate the pointers that are being used for
1080 // processing.
1081 else if (copyResult == backupCopyResultSkip)
1082 {
1083 LOG_DETAIL_PID_FMT(processId, "skip file removed by database %s", strZ(fileLog));
1084 strLstAdd(fileRemove, file->name);
1085 }
1086 // Else file was copied so update manifest
1087 else
1088 {
1089 // If the file had to be recopied then warn that there may be an issue with corruption in the repository
1090 // ??? This should really be below the message below for more context -- can be moved after the migration
1091 // ??? The name should be a pg path not manifest name -- can be fixed after the migration
1092 if (copyResult == backupCopyResultReCopy)
1093 {
1094 LOG_WARN_FMT(
1095 "resumed backup file %s does not have expected checksum %s. The file will be recopied and backup will"
1096 " continue but this may be an issue unless the resumed backup path in the repository is known to be"
1097 " corrupted.\n"
1098 "NOTE: this does not indicate a problem with the PostgreSQL page checksums.",
1099 strZ(file->name), file->checksumSha1);
1100 }
1101
1102 LOG_DETAIL_PID_FMT(processId, "backup file %s (%s)%s", strZ(fileLog), strZ(logProgress), strZ(logChecksum));
1103
1104 // If the file had page checksums calculated during the copy
1105 ASSERT((!file->checksumPage && checksumPageResult == NULL) || (file->checksumPage && checksumPageResult != NULL));
1106
1107 bool checksumPageError = false;
1108 const VariantList *checksumPageErrorList = NULL;
1109
1110 if (checksumPageResult != NULL)
1111 {
1112 // If the checksum was valid
1113 if (!varBool(kvGet(checksumPageResult, VARSTRDEF("valid"))))
1114 {
1115 checksumPageError = true;
1116
1117 if (!varBool(kvGet(checksumPageResult, VARSTRDEF("align"))))
1118 {
1119 checksumPageErrorList = NULL;
1120
1121 // ??? Update formatting after migration
1122 LOG_WARN_FMT(
1123 "page misalignment in file %s: file size %" PRIu64 " is not divisible by page size %u",
1124 strZ(fileLog), copySize, PG_PAGE_SIZE_DEFAULT);
1125 }
1126 else
1127 {
1128 // Format the page checksum errors
1129 checksumPageErrorList = varVarLst(kvGet(checksumPageResult, VARSTRDEF("error")));
1130 ASSERT(!varLstEmpty(checksumPageErrorList));
1131
1132 String *error = strNew();
1133 unsigned int errorTotalMin = 0;
1134
1135 for (unsigned int errorIdx = 0; errorIdx < varLstSize(checksumPageErrorList); errorIdx++)
1136 {
1137 const Variant *const errorItem = varLstGet(checksumPageErrorList, errorIdx);
1138
1139 // Add a comma if this is not the first item
1140 if (errorIdx != 0)
1141 strCatZ(error, ", ");
1142
1143 // If an error range
1144 if (varType(errorItem) == varTypeVariantList)
1145 {
1146 const VariantList *const errorItemList = varVarLst(errorItem);
1147 ASSERT(varLstSize(errorItemList) == 2);
1148
1149 strCatFmt(
1150 error, "%" PRIu64 "-%" PRIu64, varUInt64(varLstGet(errorItemList, 0)),
1151 varUInt64(varLstGet(errorItemList, 1)));
1152 errorTotalMin += 2;
1153 }
1154 // Else a single error
1155 else
1156 {
1157 ASSERT(varType(errorItem) == varTypeUInt64);
1158
1159 strCatFmt(error, "%" PRIu64, varUInt64(errorItem));
1160 errorTotalMin++;
1161 }
1162 }
1163
1164 // Make message plural when appropriate
1165 const String *const plural = errorTotalMin > 1 ? STRDEF("s") : EMPTY_STR;
1166
1167 // ??? Update formatting after migration
1168 LOG_WARN_FMT(
1169 "invalid page checksum%s found in file %s at page%s %s", strZ(plural), strZ(fileLog), strZ(plural),
1170 strZ(error));
1171 }
1172 }
1173 }
1174
1175 // Update file info and remove any reference to the file's existence in a prior backup
1176 manifestFileUpdate(
1177 manifest, file->name, copySize, repoSize, strZ(copyChecksum), VARSTR(NULL), file->checksumPage,
1178 checksumPageError, checksumPageErrorList);
1179 }
1180 }
1181 MEM_CONTEXT_TEMP_END();
1182
1183 // Free the job
1184 protocolParallelJobFree(job);
1185 }
1186 // Else the job errored
1187 else
1188 THROW_CODE(protocolParallelJobErrorCode(job), strZ(protocolParallelJobErrorMessage(job)));
1189
1190 FUNCTION_LOG_RETURN(UINT64, sizeCopied);
1191 }
1192
1193 /***********************************************************************************************************************************
1194 Save a copy of the backup manifest during processing to preserve checksums for a possible resume
1195 ***********************************************************************************************************************************/
1196 static void
backupManifestSaveCopy(Manifest * const manifest,const String * cipherPassBackup)1197 backupManifestSaveCopy(Manifest *const manifest, const String *cipherPassBackup)
1198 {
1199 FUNCTION_LOG_BEGIN(logLevelDebug);
1200 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1201 FUNCTION_TEST_PARAM(STRING, cipherPassBackup);
1202 FUNCTION_LOG_END();
1203
1204 ASSERT(manifest != NULL);
1205
1206 MEM_CONTEXT_TEMP_BEGIN()
1207 {
1208 // Open file for write
1209 IoWrite *write = storageWriteIo(
1210 storageNewWriteP(
1211 storageRepoWrite(),
1212 strNewFmt(
1213 STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE INFO_COPY_EXT, strZ(manifestData(manifest)->backupLabel))));
1214
1215 // Add encryption filter if required
1216 cipherBlockFilterGroupAdd(
1217 ioWriteFilterGroup(write), cfgOptionStrId(cfgOptRepoCipherType), cipherModeEncrypt, cipherPassBackup);
1218
1219 // Save file
1220 manifestSave(manifest, write);
1221 }
1222 MEM_CONTEXT_TEMP_END();
1223
1224 FUNCTION_LOG_RETURN_VOID();
1225 }
1226
1227 /***********************************************************************************************************************************
1228 Process the backup manifest
1229 ***********************************************************************************************************************************/
1230 // Comparator to order ManifestFile objects by size then name
1231 static int
backupProcessQueueComparator(const void * item1,const void * item2)1232 backupProcessQueueComparator(const void *item1, const void *item2)
1233 {
1234 FUNCTION_TEST_BEGIN();
1235 FUNCTION_TEST_PARAM_P(VOID, item1);
1236 FUNCTION_TEST_PARAM_P(VOID, item2);
1237 FUNCTION_TEST_END();
1238
1239 ASSERT(item1 != NULL);
1240 ASSERT(item2 != NULL);
1241
1242 // If the size differs then that's enough to determine order
1243 if ((*(ManifestFile **)item1)->size < (*(ManifestFile **)item2)->size)
1244 FUNCTION_TEST_RETURN(-1);
1245 else if ((*(ManifestFile **)item1)->size > (*(ManifestFile **)item2)->size)
1246 FUNCTION_TEST_RETURN(1);
1247
1248 // If size is the same then use name to generate a deterministic ordering (names must be unique)
1249 FUNCTION_TEST_RETURN(strCmp((*(ManifestFile **)item1)->name, (*(ManifestFile **)item2)->name));
1250 }
1251
1252 // Helper to generate the backup queues
1253 static uint64_t
backupProcessQueue(Manifest * manifest,List ** queueList)1254 backupProcessQueue(Manifest *manifest, List **queueList)
1255 {
1256 FUNCTION_LOG_BEGIN(logLevelDebug);
1257 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1258 FUNCTION_LOG_PARAM_P(LIST, queueList);
1259 FUNCTION_LOG_END();
1260
1261 ASSERT(manifest != NULL);
1262
1263 uint64_t result = 0;
1264
1265 MEM_CONTEXT_TEMP_BEGIN()
1266 {
1267 // Create list of process queue
1268 *queueList = lstNewP(sizeof(List *));
1269
1270 // Generate the list of targets
1271 StringList *targetList = strLstNew();
1272 strLstAdd(targetList, STRDEF(MANIFEST_TARGET_PGDATA "/"));
1273
1274 for (unsigned int targetIdx = 0; targetIdx < manifestTargetTotal(manifest); targetIdx++)
1275 {
1276 const ManifestTarget *target = manifestTarget(manifest, targetIdx);
1277
1278 if (target->tablespaceId != 0)
1279 strLstAdd(targetList, strNewFmt("%s/", strZ(target->name)));
1280 }
1281
1282 // Generate the processing queues (there is always at least one)
1283 bool backupStandby = cfgOptionBool(cfgOptBackupStandby);
1284 unsigned int queueOffset = backupStandby ? 1 : 0;
1285
1286 MEM_CONTEXT_BEGIN(lstMemContext(*queueList))
1287 {
1288 for (unsigned int queueIdx = 0; queueIdx < strLstSize(targetList) + queueOffset; queueIdx++)
1289 {
1290 List *queue = lstNewP(sizeof(ManifestFile *), .comparator = backupProcessQueueComparator);
1291 lstAdd(*queueList, &queue);
1292 }
1293 }
1294 MEM_CONTEXT_END();
1295
1296 // Now put all files into the processing queues
1297 bool delta = cfgOptionBool(cfgOptDelta);
1298 uint64_t fileTotal = 0;
1299 bool pgControlFound = false;
1300
1301 for (unsigned int fileIdx = 0; fileIdx < manifestFileTotal(manifest); fileIdx++)
1302 {
1303 const ManifestFile *file = manifestFile(manifest, fileIdx);
1304
1305 // If the file is a reference it should only be backed up if delta and not zero size
1306 if (file->reference != NULL && (!delta || file->size == 0))
1307 continue;
1308
1309 // Is pg_control in the backup?
1310 if (strEq(file->name, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)))
1311 pgControlFound = true;
1312
1313 // Files that must be copied from the primary are always put in queue 0 when backup from standby
1314 if (backupStandby && file->primary)
1315 {
1316 lstAdd(*(List **)lstGet(*queueList, 0), &file);
1317 }
1318 // Else find the correct queue by matching the file to a target
1319 else
1320 {
1321 // Find the target that contains this file
1322 unsigned int targetIdx = 0;
1323
1324 do
1325 {
1326 // A target should always be found
1327 CHECK(targetIdx < strLstSize(targetList));
1328
1329 if (strBeginsWith(file->name, strLstGet(targetList, targetIdx)))
1330 break;
1331
1332 targetIdx++;
1333 }
1334 while (1);
1335
1336 // Add file to queue
1337 lstAdd(*(List **)lstGet(*queueList, targetIdx + queueOffset), &file);
1338 }
1339
1340 // Add size to total
1341 result += file->size;
1342
1343 // Increment total files
1344 fileTotal++;
1345 }
1346
1347 // pg_control should always be in an online backup
1348 if (!pgControlFound && cfgOptionBool(cfgOptOnline))
1349 {
1350 THROW(
1351 FileMissingError,
1352 PG_FILE_PGCONTROL " must be present in all online backups\n"
1353 "HINT: is something wrong with the clock or filesystem timestamps?");
1354 }
1355
1356 // If there are no files to backup then we'll exit with an error. This could happen if the database is down and backup is
1357 // called with --no-online twice in a row.
1358 if (fileTotal == 0)
1359 THROW(FileMissingError, "no files have changed since the last backup - this seems unlikely");
1360
1361 // Sort the queues
1362 for (unsigned int queueIdx = 0; queueIdx < lstSize(*queueList); queueIdx++)
1363 lstSort(*(List **)lstGet(*queueList, queueIdx), sortOrderDesc);
1364
1365 // Move process queues to prior context
1366 lstMove(*queueList, memContextPrior());
1367 }
1368 MEM_CONTEXT_TEMP_END();
1369
1370 FUNCTION_LOG_RETURN(UINT64, result);
1371 }
1372
1373 // Helper to caculate the next queue to scan based on the client index
1374 static int
backupJobQueueNext(unsigned int clientIdx,int queueIdx,unsigned int queueTotal)1375 backupJobQueueNext(unsigned int clientIdx, int queueIdx, unsigned int queueTotal)
1376 {
1377 FUNCTION_TEST_BEGIN();
1378 FUNCTION_TEST_PARAM(UINT, clientIdx);
1379 FUNCTION_TEST_PARAM(INT, queueIdx);
1380 FUNCTION_TEST_PARAM(UINT, queueTotal);
1381 FUNCTION_TEST_END();
1382
1383 // Move (forward or back) to the next queue
1384 queueIdx += clientIdx % 2 ? -1 : 1;
1385
1386 // Deal with wrapping on either end
1387 if (queueIdx < 0)
1388 FUNCTION_TEST_RETURN((int)queueTotal - 1);
1389 else if (queueIdx == (int)queueTotal)
1390 FUNCTION_TEST_RETURN(0);
1391
1392 FUNCTION_TEST_RETURN(queueIdx);
1393 }
1394
1395 // Callback to fetch backup jobs for the parallel executor
1396 typedef struct BackupJobData
1397 {
1398 const String *const backupLabel; // Backup label (defines the backup path)
1399 const bool backupStandby; // Backup from standby
1400 const CipherType cipherType; // Cipher type
1401 const String *const cipherSubPass; // Passphrase used to encrypt files in the backup
1402 const CompressType compressType; // Backup compression type
1403 const int compressLevel; // Compress level if backup is compressed
1404 const bool delta; // Is this a checksum delta backup?
1405 const uint64_t lsnStart; // Starting lsn for the backup
1406
1407 List *queueList; // List of processing queues
1408 } BackupJobData;
1409
backupJobCallback(void * data,unsigned int clientIdx)1410 static ProtocolParallelJob *backupJobCallback(void *data, unsigned int clientIdx)
1411 {
1412 FUNCTION_TEST_BEGIN();
1413 FUNCTION_TEST_PARAM_P(VOID, data);
1414 FUNCTION_TEST_PARAM(UINT, clientIdx);
1415 FUNCTION_TEST_END();
1416
1417 ASSERT(data != NULL);
1418
1419 ProtocolParallelJob *result = NULL;
1420
1421 MEM_CONTEXT_TEMP_BEGIN()
1422 {
1423 // Get a new job if there are any left
1424 BackupJobData *jobData = data;
1425
1426 // Determine where to begin scanning the queue (we'll stop when we get back here). When copying from the primary during
1427 // backup from standby only queue 0 will be used.
1428 unsigned int queueOffset = jobData->backupStandby && clientIdx > 0 ? 1 : 0;
1429 int queueIdx = jobData->backupStandby && clientIdx == 0 ?
1430 0 : (int)(clientIdx % (lstSize(jobData->queueList) - queueOffset));
1431 int queueEnd = queueIdx;
1432
1433 do
1434 {
1435 List *queue = *(List **)lstGet(jobData->queueList, (unsigned int)queueIdx + queueOffset);
1436
1437 if (!lstEmpty(queue))
1438 {
1439 const ManifestFile *file = *(ManifestFile **)lstGet(queue, 0);
1440
1441 // Create backup job
1442 ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_BACKUP_FILE);
1443 PackWrite *const param = protocolCommandParam(command);
1444
1445 pckWriteStrP(param, manifestPathPg(file->name));
1446 pckWriteBoolP(param, !strEq(file->name, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL)));
1447 pckWriteU64P(param, file->size);
1448 pckWriteBoolP(param, !file->primary);
1449 pckWriteStrP(param, file->checksumSha1[0] != 0 ? STR(file->checksumSha1) : NULL);
1450 pckWriteBoolP(param, file->checksumPage);
1451 pckWriteU64P(param, jobData->lsnStart);
1452 pckWriteStrP(param, file->name);
1453 pckWriteBoolP(param, file->reference != NULL);
1454 pckWriteU32P(param, jobData->compressType);
1455 pckWriteI32P(param, jobData->compressLevel);
1456 pckWriteStrP(param, jobData->backupLabel);
1457 pckWriteBoolP(param, jobData->delta);
1458 pckWriteU64P(param, jobData->cipherSubPass == NULL ? cipherTypeNone : cipherTypeAes256Cbc);
1459 pckWriteStrP(param, jobData->cipherSubPass);
1460
1461 // Remove job from the queue
1462 lstRemoveIdx(queue, 0);
1463
1464 // Assign job to result
1465 MEM_CONTEXT_PRIOR_BEGIN()
1466 {
1467 result = protocolParallelJobNew(VARSTR(file->name), command);
1468 }
1469 MEM_CONTEXT_PRIOR_END();
1470
1471 // Break out of the loop early since we found a job
1472 break;
1473 }
1474
1475 // Don't get next queue when copying from primary during backup from standby since the primary only has one queue
1476 if (!jobData->backupStandby || clientIdx > 0)
1477 queueIdx = backupJobQueueNext(clientIdx, queueIdx, lstSize(jobData->queueList) - queueOffset);
1478 }
1479 while (queueIdx != queueEnd);
1480 }
1481 MEM_CONTEXT_TEMP_END();
1482
1483 FUNCTION_TEST_RETURN(result);
1484 }
1485
1486 static uint64_t
backupProcess(BackupData * backupData,Manifest * manifest,const String * lsnStart,const String * cipherPassBackup)1487 backupProcess(BackupData *backupData, Manifest *manifest, const String *lsnStart, const String *cipherPassBackup)
1488 {
1489 FUNCTION_LOG_BEGIN(logLevelDebug);
1490 FUNCTION_LOG_PARAM(BACKUP_DATA, backupData);
1491 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1492 FUNCTION_LOG_PARAM(STRING, lsnStart);
1493 FUNCTION_TEST_PARAM(STRING, cipherPassBackup);
1494 FUNCTION_LOG_END();
1495
1496 ASSERT(manifest != NULL);
1497
1498 uint64_t sizeTotal = 0;
1499
1500 MEM_CONTEXT_TEMP_BEGIN()
1501 {
1502 // Get backup info
1503 const BackupType backupType = manifestData(manifest)->backupType;
1504 const String *const backupLabel = manifestData(manifest)->backupLabel;
1505 const String *const backupPathExp = strNewFmt(STORAGE_REPO_BACKUP "/%s", strZ(backupLabel));
1506 bool hardLink = cfgOptionBool(cfgOptRepoHardlink) && storageFeature(storageRepoWrite(), storageFeatureHardLink);
1507 bool backupStandby = cfgOptionBool(cfgOptBackupStandby);
1508
1509 // If this is a full backup or hard-linked and paths are supported then create all paths explicitly so that empty paths will
1510 // exist in to repo. Also create tablspace symlinks when symlinks are available, This makes it possible for the user to
1511 // make a copy of the backup path and get a valid cluster.
1512 if (backupType == backupTypeFull || hardLink)
1513 {
1514 // Create paths when available
1515 if (storageFeature(storageRepoWrite(), storageFeaturePath))
1516 {
1517 for (unsigned int pathIdx = 0; pathIdx < manifestPathTotal(manifest); pathIdx++)
1518 {
1519 storagePathCreateP(
1520 storageRepoWrite(),
1521 strNewFmt("%s/%s", strZ(backupPathExp), strZ(manifestPath(manifest, pathIdx)->name)));
1522 }
1523 }
1524
1525 // Create tablespace symlinks when available
1526 if (storageFeature(storageRepoWrite(), storageFeatureSymLink))
1527 {
1528 for (unsigned int targetIdx = 0; targetIdx < manifestTargetTotal(manifest); targetIdx++)
1529 {
1530 const ManifestTarget *const target = manifestTarget(manifest, targetIdx);
1531
1532 if (target->tablespaceId != 0)
1533 {
1534 const String *const link = storagePathP(
1535 storageRepo(),
1536 strNewFmt("%s/" MANIFEST_TARGET_PGDATA "/%s", strZ(backupPathExp), strZ(target->name)));
1537 const String *const linkDestination = strNewFmt(
1538 "../../" MANIFEST_TARGET_PGTBLSPC "/%u", target->tablespaceId);
1539
1540 THROW_ON_SYS_ERROR_FMT(
1541 symlink(strZ(linkDestination), strZ(link)) == -1, FileOpenError,
1542 "unable to create symlink '%s' to '%s'", strZ(link), strZ(linkDestination));
1543 }
1544 }
1545 }
1546 }
1547
1548 // Generate processing queues
1549 BackupJobData jobData =
1550 {
1551 .backupLabel = backupLabel,
1552 .backupStandby = backupStandby,
1553 .compressType = compressTypeEnum(cfgOptionStr(cfgOptCompressType)),
1554 .compressLevel = cfgOptionInt(cfgOptCompressLevel),
1555 .cipherType = cfgOptionStrId(cfgOptRepoCipherType),
1556 .cipherSubPass = manifestCipherSubPass(manifest),
1557 .delta = cfgOptionBool(cfgOptDelta),
1558 .lsnStart = cfgOptionBool(cfgOptOnline) ? pgLsnFromStr(lsnStart) : 0xFFFFFFFFFFFFFFFF,
1559 };
1560
1561 sizeTotal = backupProcessQueue(manifest, &jobData.queueList);
1562
1563 // Create the parallel executor
1564 ProtocolParallel *parallelExec = protocolParallelNew(
1565 cfgOptionUInt64(cfgOptProtocolTimeout) / 2, backupJobCallback, &jobData);
1566
1567 // First client is always on the primary
1568 protocolParallelClientAdd(parallelExec, protocolLocalGet(protocolStorageTypePg, backupData->pgIdxPrimary, 1));
1569
1570 // Create the rest of the clients on the primary or standby depending on the value of backup-standby. Note that standby
1571 // backups don't count the primary client in process-max.
1572 unsigned int processMax = cfgOptionUInt(cfgOptProcessMax) + (backupStandby ? 1 : 0);
1573 unsigned int pgIdx = backupStandby ? backupData->pgIdxStandby : backupData->pgIdxPrimary;
1574
1575 for (unsigned int processIdx = 2; processIdx <= processMax; processIdx++)
1576 protocolParallelClientAdd(parallelExec, protocolLocalGet(protocolStorageTypePg, pgIdx, processIdx));
1577
1578 // Maintain a list of files that need to be removed from the manifest when the backup is complete
1579 StringList *fileRemove = strLstNew();
1580
1581 // Determine how often the manifest will be saved (every one percent or threshold size, whichever is greater)
1582 uint64_t manifestSaveLast = 0;
1583 uint64_t manifestSaveSize = sizeTotal / 100;
1584
1585 if (manifestSaveSize < cfgOptionUInt64(cfgOptManifestSaveThreshold))
1586 manifestSaveSize = cfgOptionUInt64(cfgOptManifestSaveThreshold);
1587
1588 // Process jobs
1589 uint64_t sizeCopied = 0;
1590
1591 MEM_CONTEXT_TEMP_RESET_BEGIN()
1592 {
1593 do
1594 {
1595 unsigned int completed = protocolParallelProcess(parallelExec);
1596
1597 for (unsigned int jobIdx = 0; jobIdx < completed; jobIdx++)
1598 {
1599 ProtocolParallelJob *job = protocolParallelResult(parallelExec);
1600
1601 sizeCopied = backupJobResult(
1602 manifest,
1603 backupStandby && protocolParallelJobProcessId(job) > 1 ? backupData->hostStandby : backupData->hostPrimary,
1604 storagePathP(
1605 protocolParallelJobProcessId(job) > 1 ? storagePgIdx(pgIdx) : backupData->storagePrimary,
1606 manifestPathPg(manifestFileFind(manifest, varStr(protocolParallelJobKey(job)))->name)),
1607 fileRemove, job, sizeTotal, sizeCopied);
1608 }
1609
1610 // A keep-alive is required here for the remote holding open the backup connection
1611 protocolKeepAlive();
1612
1613 // Save the manifest periodically to preserve checksums for resume
1614 if (sizeCopied - manifestSaveLast >= manifestSaveSize)
1615 {
1616 backupManifestSaveCopy(manifest, cipherPassBackup);
1617 manifestSaveLast = sizeCopied;
1618 }
1619
1620 // Reset the memory context occasionally so we don't use too much memory or slow down processing
1621 MEM_CONTEXT_TEMP_RESET(1000);
1622 }
1623 while (!protocolParallelDone(parallelExec));
1624 }
1625 MEM_CONTEXT_TEMP_END();
1626
1627 #ifdef DEBUG
1628 // Ensure that all processing queues are empty
1629 for (unsigned int queueIdx = 0; queueIdx < lstSize(jobData.queueList); queueIdx++)
1630 ASSERT(lstEmpty(*(List **)lstGet(jobData.queueList, queueIdx)));
1631 #endif
1632
1633 // Remove files from the manifest that were removed during the backup. This must happen after processing to avoid
1634 // invalidating pointers by deleting items from the list.
1635 for (unsigned int fileRemoveIdx = 0; fileRemoveIdx < strLstSize(fileRemove); fileRemoveIdx++)
1636 manifestFileRemove(manifest, strLstGet(fileRemove, fileRemoveIdx));
1637
1638 // Log references or create hardlinks for all files
1639 const char *const compressExt = strZ(compressExtStr(jobData.compressType));
1640
1641 for (unsigned int fileIdx = 0; fileIdx < manifestFileTotal(manifest); fileIdx++)
1642 {
1643 const ManifestFile *const file = manifestFile(manifest, fileIdx);
1644
1645 // If the file has a reference, then it was not copied since it can be retrieved from the referenced backup. However,
1646 // if hardlinking is enabled the link will need to be created.
1647 if (file->reference != NULL)
1648 {
1649 // If hardlinking is enabled then create a hardlink for files that have not changed since the last backup
1650 if (hardLink)
1651 {
1652 LOG_DETAIL_FMT("hardlink %s to %s", strZ(file->name), strZ(file->reference));
1653
1654 const String *const linkName = storagePathP(
1655 storageRepo(), strNewFmt("%s/%s%s", strZ(backupPathExp), strZ(file->name), compressExt));
1656 const String *const linkDestination = storagePathP(
1657 storageRepo(),
1658 strNewFmt(STORAGE_REPO_BACKUP "/%s/%s%s", strZ(file->reference), strZ(file->name), compressExt));
1659
1660 THROW_ON_SYS_ERROR_FMT(
1661 link(strZ(linkDestination), strZ(linkName)) == -1, FileOpenError,
1662 "unable to create hardlink '%s' to '%s'", strZ(linkName), strZ(linkDestination));
1663 }
1664 // Else log the reference. With delta, it is possible that references may have been removed if a file needed to be
1665 // recopied.
1666 else
1667 LOG_DETAIL_FMT("reference %s to %s", strZ(file->name), strZ(file->reference));
1668 }
1669 }
1670
1671 // Sync backup paths if required
1672 if (storageFeature(storageRepoWrite(), storageFeaturePathSync))
1673 {
1674 for (unsigned int pathIdx = 0; pathIdx < manifestPathTotal(manifest); pathIdx++)
1675 {
1676 const String *const path = strNewFmt("%s/%s", strZ(backupPathExp), strZ(manifestPath(manifest, pathIdx)->name));
1677
1678 if (backupType == backupTypeFull || hardLink || storagePathExistsP(storageRepo(), path))
1679 storagePathSyncP(storageRepoWrite(), path);
1680 }
1681 }
1682 }
1683 MEM_CONTEXT_TEMP_END();
1684
1685 FUNCTION_LOG_RETURN(UINT64, sizeTotal);
1686 }
1687
1688 /***********************************************************************************************************************************
1689 Check and copy WAL segments required to make the backup consistent
1690 ***********************************************************************************************************************************/
1691 static void
backupArchiveCheckCopy(Manifest * manifest,unsigned int walSegmentSize,const String * cipherPassBackup)1692 backupArchiveCheckCopy(Manifest *manifest, unsigned int walSegmentSize, const String *cipherPassBackup)
1693 {
1694 FUNCTION_LOG_BEGIN(logLevelDebug);
1695 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1696 FUNCTION_LOG_PARAM(UINT, walSegmentSize);
1697 FUNCTION_TEST_PARAM(STRING, cipherPassBackup);
1698 FUNCTION_LOG_END();
1699
1700 ASSERT(manifest != NULL);
1701
1702 // If archive logs are required to complete the backup, then check them. This is the default, but can be overridden if the
1703 // archive logs are going to a different server. Be careful of disabling this option because there is no way to verify that the
1704 // backup will be consistent - at least not here.
1705 if (cfgOptionBool(cfgOptOnline) && cfgOptionBool(cfgOptArchiveCheck))
1706 {
1707 MEM_CONTEXT_TEMP_BEGIN()
1708 {
1709 unsigned int timeline = cvtZToUIntBase(strZ(strSubN(manifestData(manifest)->archiveStart, 0, 8)), 16);
1710 uint64_t lsnStart = pgLsnFromStr(manifestData(manifest)->lsnStart);
1711 uint64_t lsnStop = pgLsnFromStr(manifestData(manifest)->lsnStop);
1712
1713 LOG_INFO_FMT(
1714 "check archive for segment(s) %s:%s", strZ(pgLsnToWalSegment(timeline, lsnStart, walSegmentSize)),
1715 strZ(pgLsnToWalSegment(timeline, lsnStop, walSegmentSize)));
1716
1717 // Save the backup manifest before getting archive logs in case of failure
1718 backupManifestSaveCopy(manifest, cipherPassBackup);
1719
1720 // Use base path to set ownership and mode
1721 const ManifestPath *basePath = manifestPathFind(manifest, MANIFEST_TARGET_PGDATA_STR);
1722
1723 // Loop through all the segments in the lsn range
1724 InfoArchive *infoArchive = infoArchiveLoadFile(
1725 storageRepo(), INFO_ARCHIVE_PATH_FILE_STR, cfgOptionStrId(cfgOptRepoCipherType),
1726 cfgOptionStrNull(cfgOptRepoCipherPass));
1727 const String *archiveId = infoArchiveId(infoArchive);
1728
1729 StringList *walSegmentList = pgLsnRangeToWalSegmentList(
1730 manifestData(manifest)->pgVersion, timeline, lsnStart, lsnStop, walSegmentSize);
1731
1732 for (unsigned int walSegmentIdx = 0; walSegmentIdx < strLstSize(walSegmentList); walSegmentIdx++)
1733 {
1734 MEM_CONTEXT_TEMP_BEGIN()
1735 {
1736 const String *walSegment = strLstGet(walSegmentList, walSegmentIdx);
1737
1738 // Find the actual wal segment file in the archive
1739 const String *archiveFile = walSegmentFind(
1740 storageRepo(), archiveId, walSegment, cfgOptionUInt64(cfgOptArchiveTimeout));
1741
1742 if (cfgOptionBool(cfgOptArchiveCopy))
1743 {
1744 // Copy can be a pretty expensive operation so log it
1745 LOG_DETAIL_FMT("copy segment %s to backup", strZ(walSegment));
1746
1747 // Get compression type of the WAL segment and backup
1748 CompressType archiveCompressType = compressTypeFromName(archiveFile);
1749 CompressType backupCompressType = compressTypeEnum(cfgOptionStr(cfgOptCompressType));
1750
1751 // Open the archive file
1752 StorageRead *read = storageNewReadP(
1753 storageRepo(), strNewFmt(STORAGE_REPO_ARCHIVE "/%s/%s", strZ(archiveId), strZ(archiveFile)));
1754 IoFilterGroup *filterGroup = ioReadFilterGroup(storageReadIo(read));
1755
1756 // Decrypt with archive key if encrypted
1757 cipherBlockFilterGroupAdd(
1758 filterGroup, cfgOptionStrId(cfgOptRepoCipherType), cipherModeDecrypt,
1759 infoArchiveCipherPass(infoArchive));
1760
1761 // Compress/decompress if archive and backup do not have the same compression settings
1762 if (archiveCompressType != backupCompressType)
1763 {
1764 if (archiveCompressType != compressTypeNone)
1765 ioFilterGroupAdd(filterGroup, decompressFilter(archiveCompressType));
1766
1767 if (backupCompressType != compressTypeNone)
1768 {
1769 ioFilterGroupAdd(
1770 filterGroup, compressFilter(backupCompressType, cfgOptionInt(cfgOptCompressLevel)));
1771 }
1772 }
1773
1774 // Encrypt with backup key if encrypted
1775 cipherBlockFilterGroupAdd(
1776 filterGroup, cfgOptionStrId(cfgOptRepoCipherType), cipherModeEncrypt, manifestCipherSubPass(manifest));
1777
1778 // Add size filter last to calculate repo size
1779 ioFilterGroupAdd(filterGroup, ioSizeNew());
1780
1781 // Copy the file
1782 const String *manifestName = strNewFmt(
1783 MANIFEST_TARGET_PGDATA "/%s/%s", strZ(pgWalPath(manifestData(manifest)->pgVersion)), strZ(walSegment));
1784
1785 storageCopyP(
1786 read,
1787 storageNewWriteP(
1788 storageRepoWrite(),
1789 strNewFmt(
1790 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(manifestData(manifest)->backupLabel), strZ(manifestName),
1791 strZ(compressExtStr(compressTypeEnum(cfgOptionStr(cfgOptCompressType)))))));
1792
1793 // Add to manifest
1794 ManifestFile file =
1795 {
1796 .name = manifestName,
1797 .primary = true,
1798 .mode = basePath->mode & (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH),
1799 .user = basePath->user,
1800 .group = basePath->group,
1801 .size = walSegmentSize,
1802 .sizeRepo = varUInt64Force(ioFilterGroupResult(filterGroup, SIZE_FILTER_TYPE_STR)),
1803 .timestamp = manifestData(manifest)->backupTimestampStop,
1804 };
1805
1806 memcpy(file.checksumSha1, strZ(strSubN(archiveFile, 25, 40)), HASH_TYPE_SHA1_SIZE_HEX + 1);
1807
1808 manifestFileAdd(manifest, &file);
1809 }
1810 }
1811 MEM_CONTEXT_TEMP_END();
1812
1813 // A keep-alive is required here for the remote holding the backup lock
1814 protocolKeepAlive();
1815 }
1816 }
1817 MEM_CONTEXT_TEMP_END();
1818 }
1819
1820 FUNCTION_LOG_RETURN_VOID();
1821 }
1822
1823 /***********************************************************************************************************************************
1824 Save and update all files required to complete the backup
1825 ***********************************************************************************************************************************/
1826 static void
backupComplete(InfoBackup * const infoBackup,Manifest * const manifest)1827 backupComplete(InfoBackup *const infoBackup, Manifest *const manifest)
1828 {
1829 FUNCTION_LOG_BEGIN(logLevelDebug);
1830 FUNCTION_LOG_PARAM(INFO_BACKUP, infoBackup);
1831 FUNCTION_LOG_PARAM(MANIFEST, manifest);
1832 FUNCTION_LOG_END();
1833
1834 ASSERT(manifest != NULL);
1835
1836 MEM_CONTEXT_TEMP_BEGIN()
1837 {
1838 const String *const backupLabel = manifestData(manifest)->backupLabel;
1839
1840 // Validation and final save of the backup manifest. Validate in strict mode to catch as many potential issues as possible.
1841 // -------------------------------------------------------------------------------------------------------------------------
1842 manifestValidate(manifest, true);
1843
1844 backupManifestSaveCopy(manifest, infoPgCipherPass(infoBackupPg(infoBackup)));
1845
1846 storageCopy(
1847 storageNewReadP(
1848 storageRepo(), strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE INFO_COPY_EXT, strZ(backupLabel))),
1849 storageNewWriteP(
1850 storageRepoWrite(), strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupLabel))));
1851
1852 // Copy a compressed version of the manifest to history. If the repo is encrypted then the passphrase to open the manifest
1853 // is required. We can't just do a straight copy since the destination needs to be compressed and that must happen before
1854 // encryption in order to be efficient. Compression will always be gz for compatibility and since it is always available.
1855 // -------------------------------------------------------------------------------------------------------------------------
1856 StorageRead *manifestRead = storageNewReadP(
1857 storageRepo(), strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupLabel)));
1858
1859 cipherBlockFilterGroupAdd(
1860 ioReadFilterGroup(storageReadIo(manifestRead)), cfgOptionStrId(cfgOptRepoCipherType), cipherModeDecrypt,
1861 infoPgCipherPass(infoBackupPg(infoBackup)));
1862
1863 StorageWrite *manifestWrite = storageNewWriteP(
1864 storageRepoWrite(),
1865 strNewFmt(
1866 STORAGE_REPO_BACKUP "/" BACKUP_PATH_HISTORY "/%s/%s.manifest%s", strZ(strSubN(backupLabel, 0, 4)),
1867 strZ(backupLabel), strZ(compressExtStr(compressTypeGz))));
1868
1869 ioFilterGroupAdd(ioWriteFilterGroup(storageWriteIo(manifestWrite)), compressFilter(compressTypeGz, 9));
1870
1871 cipherBlockFilterGroupAdd(
1872 ioWriteFilterGroup(storageWriteIo(manifestWrite)), cfgOptionStrId(cfgOptRepoCipherType), cipherModeEncrypt,
1873 infoPgCipherPass(infoBackupPg(infoBackup)));
1874
1875 storageCopyP(manifestRead, manifestWrite);
1876
1877 // Sync history path if required
1878 if (storageFeature(storageRepoWrite(), storageFeaturePathSync))
1879 storagePathSyncP(storageRepoWrite(), STRDEF(STORAGE_REPO_BACKUP "/" BACKUP_PATH_HISTORY));
1880
1881 // Create a symlink to the most recent backup if supported. This link is purely informational for the user and is never
1882 // used by us since symlinks are not supported on all storage types.
1883 // -------------------------------------------------------------------------------------------------------------------------
1884 backupLinkLatest(backupLabel, cfgOptionGroupIdxDefault(cfgOptGrpRepo));
1885
1886 // Add manifest and save backup.info (infoBackupSaveFile() is responsible for proper syncing)
1887 // -------------------------------------------------------------------------------------------------------------------------
1888 infoBackupDataAdd(infoBackup, manifest);
1889
1890 infoBackupSaveFile(
1891 infoBackup, storageRepoWrite(), INFO_BACKUP_PATH_FILE_STR, cfgOptionStrId(cfgOptRepoCipherType),
1892 cfgOptionStrNull(cfgOptRepoCipherPass));
1893 }
1894 MEM_CONTEXT_TEMP_END();
1895
1896 FUNCTION_LOG_RETURN_VOID();
1897 }
1898
1899 /**********************************************************************************************************************************/
1900 void
cmdBackup(void)1901 cmdBackup(void)
1902 {
1903 FUNCTION_LOG_VOID(logLevelDebug);
1904
1905 // Verify the repo is local
1906 repoIsLocalVerify();
1907
1908 // Test for stop file
1909 lockStopTest();
1910
1911 MEM_CONTEXT_TEMP_BEGIN()
1912 {
1913 // If the repo option was not provided and more than one repo is configured, then log the default repo chosen
1914 if (!cfgOptionTest(cfgOptRepo) && cfgOptionGroupIdxTotal(cfgOptGrpRepo) > 1)
1915 {
1916 LOG_INFO_FMT(
1917 "repo option not specified, defaulting to repo%u",
1918 cfgOptionGroupIdxToKey(cfgOptGrpRepo, cfgOptionGroupIdxDefault(cfgOptGrpRepo)));
1919 }
1920
1921 // Load backup.info
1922 InfoBackup *infoBackup = infoBackupLoadFileReconstruct(
1923 storageRepo(), INFO_BACKUP_PATH_FILE_STR, cfgOptionStrId(cfgOptRepoCipherType), cfgOptionStrNull(cfgOptRepoCipherPass));
1924 InfoPgData infoPg = infoPgDataCurrent(infoBackupPg(infoBackup));
1925 const String *cipherPassBackup = infoPgCipherPass(infoBackupPg(infoBackup));
1926
1927 // Get pg storage and database objects
1928 BackupData *backupData = backupInit(infoBackup);
1929
1930 // Get the start timestamp which will later be written into the manifest to track total backup time
1931 time_t timestampStart = backupTime(backupData, false);
1932
1933 // Check if there is a prior manifest when backup type is diff/incr
1934 Manifest *manifestPrior = backupBuildIncrPrior(infoBackup);
1935
1936 // Start the backup
1937 BackupStartResult backupStartResult = backupStart(backupData);
1938
1939 // Build the manifest
1940 Manifest *manifest = manifestNewBuild(
1941 backupData->storagePrimary, infoPg.version, infoPg.catalogVersion, cfgOptionBool(cfgOptOnline),
1942 cfgOptionBool(cfgOptChecksumPage), strLstNewVarLst(cfgOptionLst(cfgOptExclude)), backupStartResult.tablespaceList);
1943
1944 // Validate the manifest using the copy start time
1945 manifestBuildValidate(
1946 manifest, cfgOptionBool(cfgOptDelta), backupTime(backupData, true), compressTypeEnum(cfgOptionStr(cfgOptCompressType)));
1947
1948 // Build an incremental backup if type is not full (manifestPrior will be freed in this call)
1949 if (!backupBuildIncr(infoBackup, manifest, manifestPrior, backupStartResult.walSegmentName))
1950 manifestCipherSubPassSet(manifest, cipherPassGen(cfgOptionStrId(cfgOptRepoCipherType)));
1951
1952 // Set delta if it is not already set and the manifest requires it
1953 if (!cfgOptionBool(cfgOptDelta) && varBool(manifestData(manifest)->backupOptionDelta))
1954 cfgOptionSet(cfgOptDelta, cfgSourceParam, BOOL_TRUE_VAR);
1955
1956 // Resume a backup when possible
1957 if (!backupResume(manifest, cipherPassBackup))
1958 {
1959 manifestBackupLabelSet(
1960 manifest,
1961 backupLabelCreate(
1962 (BackupType)cfgOptionStrId(cfgOptType), manifestData(manifest)->backupLabelPrior, timestampStart));
1963 }
1964
1965 // Save the manifest before processing starts
1966 backupManifestSaveCopy(manifest, cipherPassBackup);
1967
1968 // Process the backup manifest
1969 uint64_t backupSizeTotal = backupProcess(backupData, manifest, backupStartResult.lsn, cipherPassBackup);
1970
1971 // Stop the backup
1972 BackupStopResult backupStopResult = backupStop(backupData, manifest);
1973
1974 // Complete manifest
1975 manifestBuildComplete(
1976 manifest, timestampStart, backupStartResult.lsn, backupStartResult.walSegmentName, backupStopResult.timestamp,
1977 backupStopResult.lsn, backupStopResult.walSegmentName, infoPg.id, infoPg.systemId, backupStartResult.dbList,
1978 cfgOptionBool(cfgOptOnline) && cfgOptionBool(cfgOptArchiveCheck),
1979 !cfgOptionBool(cfgOptOnline) || (cfgOptionBool(cfgOptArchiveCheck) && cfgOptionBool(cfgOptArchiveCopy)),
1980 cfgOptionUInt(cfgOptBufferSize), cfgOptionUInt(cfgOptCompressLevel), cfgOptionUInt(cfgOptCompressLevelNetwork),
1981 cfgOptionBool(cfgOptRepoHardlink), cfgOptionUInt(cfgOptProcessMax), cfgOptionBool(cfgOptBackupStandby));
1982
1983 // The primary db object won't be used anymore so free it
1984 dbFree(backupData->dbPrimary);
1985
1986 // Check and copy WAL segments required to make the backup consistent
1987 backupArchiveCheckCopy(manifest, backupData->walSegmentSize, cipherPassBackup);
1988
1989 // The primary protocol connection won't be used anymore so free it. This needs to happen after backupArchiveCheckCopy() so
1990 // the backup lock is held on the remote which allows conditional archiving based on the backup lock. Any further access to
1991 // the primary storage object may result in an error (likely eof).
1992 protocolRemoteFree(backupData->pgIdxPrimary);
1993
1994 // Complete the backup
1995 LOG_INFO_FMT("new backup label = %s", strZ(manifestData(manifest)->backupLabel));
1996 backupComplete(infoBackup, manifest);
1997
1998 // Backup info
1999 LOG_INFO_FMT(
2000 "%s backup size = %s, file total = %u", strZ(strIdToStr(manifestData(manifest)->backupType)),
2001 strZ(strSizeFormat(backupSizeTotal)), manifestFileTotal(manifest));
2002 }
2003 MEM_CONTEXT_TEMP_END();
2004
2005 FUNCTION_LOG_RETURN_VOID();
2006 }
2007