1 /***********************************************************************************************************************************
2 Verify Command
3
4 Verify the contents of the repository.
5 ***********************************************************************************************************************************/
6 #include "build.auto.h"
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11
12 #include "command/archive/common.h"
13 #include "command/check/common.h"
14 #include "command/verify/file.h"
15 #include "command/verify/protocol.h"
16 #include "common/compress/helper.h"
17 #include "common/crypto/cipherBlock.h"
18 #include "common/debug.h"
19 #include "common/io/fdWrite.h"
20 #include "common/io/io.h"
21 #include "common/log.h"
22 #include "config/config.h"
23 #include "info/infoArchive.h"
24 #include "info/infoBackup.h"
25 #include "info/manifest.h"
26 #include "postgres/interface.h"
27 #include "postgres/version.h"
28 #include "protocol/helper.h"
29 #include "protocol/parallel.h"
30 #include "storage/helper.h"
31
32 /***********************************************************************************************************************************
33 Data Types and Structures
34 ***********************************************************************************************************************************/
35 #define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_TYPE \
36 VerifyArchiveResult
37 #define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_FORMAT(value, buffer, bufferSize) \
38 objToLog(&value, "VerifyArchiveResult", buffer, bufferSize)
39
40 #define FUNCTION_LOG_VERIFY_BACKUP_RESULT_TYPE \
41 VerifyBackupResult
42 #define FUNCTION_LOG_VERIFY_BACKUP_RESULT_FORMAT(value, buffer, bufferSize) \
43 objToLog(&value, "VerifyBackupResult", buffer, bufferSize)
44
45 // Structure for verifying repository info files
46 typedef struct VerifyInfoFile
47 {
48 InfoBackup *backup; // Backup.info file contents
49 InfoArchive *archive; // Archive.info file contents
50 Manifest *manifest; // Manifest file contents
51 const String *checksum; // File checksum
52 int errorCode; // Error code else 0 for no error
53 } VerifyInfoFile;
54
55 // Job data results structures for archive and backup
56 typedef struct VerifyArchiveResult
57 {
58 String *archiveId; // Archive Id (e.g. 9.6-1, 10-2)
59 unsigned int totalWalFile; // Total number of WAL files listed in directory on first read
60 unsigned int totalValidWal; // Total number of WAL that were verified and valid
61 PgWal pgWalInfo; // PG version, WAL size, system id
62 List *walRangeList; // List of WAL file ranges - new item is when WAL is missing
63 } VerifyArchiveResult;
64
65 // WAL range includes the start/stop of sequential WAL and start/stop includes the timeline (e.g. 000000020000000100000005)
66 typedef struct VerifyWalRange
67 {
68 String *stop; // Last WAL segment in this sequential range
69 String *start; // First WAL segment in this sequential range
70 List *invalidFileList; // After all jobs complete, list of VerifyInvalidFile
71 } VerifyWalRange;
72
73 // Invalid file information (not missing but files failing verification) - for archive and backup
74 typedef struct VerifyInvalidFile
75 {
76 String *fileName; // Name of the file (includes path within the stanza)
77 VerifyResult reason; // Reason file is invalid (e.g. incorrect checksum)
78 } VerifyInvalidFile;
79
80 // Status result of a backup
81 typedef enum
82 {
83 backupValid, // Default: All files in backup label repo passed verification
84 backupInvalid, // One of more files in backup label repo failed verification
85 backupMissingManifest, // Backup manifest missing (backup may have expired)
86 backupInProgress, // Backup appeared to be in progress (so was skipped)
87 } VerifyBackupResultStatus;
88
89 typedef struct VerifyBackupResult
90 {
91 String *backupLabel; // Label assigned to the backup
92 VerifyBackupResultStatus status; // Final status of the backup
93 bool fileVerifyComplete; // Have all the files of the backup completed verification?
94 unsigned int totalFileManifest; // Total number of backup files in the manifest
95 unsigned int totalFileVerify; // Total number of backup files being verified
96 unsigned int totalFileValid; // Total number of backup files that were verified and valid
97 String *backupPrior; // Prior backup that this backup depends on, if any
98 unsigned int pgId; // PG id will be used to find WAL for the backup in the repo
99 unsigned int pgVersion; // PG version will be used with PG id to find WAL in the repo
100 String *archiveStart; // First WAL segment in the backup
101 String *archiveStop; // Last WAL segment in the backup
102 List *invalidFileList; // List of invalid files found in the backup
103 } VerifyBackupResult;
104
105 // Job data stucture for processing and results collection
106 typedef struct VerifyJobData
107 {
108 MemContext *memContext; // Context for memory allocations in this struct
109 StringList *archiveIdList; // List of archive ids to verify
110 StringList *walPathList; // WAL path list for a single archive id
111 StringList *walFileList; // WAL file list for a single WAL path
112 StringList *backupList; // List of backups to verify
113 Manifest *manifest; // Manifest contents with list of files to verify
114 unsigned int manifestFileIdx; // Index of the file within the manifest file list to process
115 String *currentBackup; // In progress backup, if any
116 const InfoPg *pgHistory; // Database history list
117 bool backupProcessing; // Are we processing WAL or are we processing backups
118 const String *manifestCipherPass; // Cipher pass for reading backup manifests
119 const String *walCipherPass; // Cipher pass for reading WAL files
120 const String *backupCipherPass; // Cipher pass for reading backup files referenced in a manifest
121 unsigned int jobErrorTotal; // Total errors that occurred during the job execution
122 List *archiveIdResultList; // Archive results
123 List *backupResultList; // Backup results
124 } VerifyJobData;
125
126 /***********************************************************************************************************************************
127 Helper function to add a file to an invalid file list
128 ***********************************************************************************************************************************/
129 static void
verifyInvalidFileAdd(List * invalidFileList,VerifyResult reason,const String * fileName)130 verifyInvalidFileAdd(List *invalidFileList, VerifyResult reason, const String *fileName)
131 {
132 FUNCTION_TEST_BEGIN();
133 FUNCTION_TEST_PARAM(LIST, invalidFileList); // Invalid file list to add the filename to
134 FUNCTION_TEST_PARAM(ENUM, reason); // Reason for invalid file
135 FUNCTION_TEST_PARAM(STRING, fileName); // Name of invalid file
136 FUNCTION_TEST_END();
137
138 ASSERT(invalidFileList != NULL);
139 ASSERT(fileName != NULL);
140
141 MEM_CONTEXT_BEGIN(lstMemContext(invalidFileList))
142 {
143 VerifyInvalidFile invalidFile =
144 {
145 .fileName = strDup(fileName),
146 .reason = reason,
147 };
148
149 lstAdd(invalidFileList, &invalidFile);
150 }
151 MEM_CONTEXT_END();
152
153 FUNCTION_TEST_RETURN_VOID();
154 }
155
156 /***********************************************************************************************************************************
157 Load a file into memory
158 ***********************************************************************************************************************************/
159 static StorageRead *
verifyFileLoad(const String * pathFileName,const String * cipherPass)160 verifyFileLoad(const String *pathFileName, const String *cipherPass)
161 {
162 FUNCTION_TEST_BEGIN();
163 FUNCTION_TEST_PARAM(STRING, pathFileName); // Fully qualified path/file name
164 FUNCTION_TEST_PARAM(STRING, cipherPass); // Password to open file if encrypted
165 FUNCTION_TEST_END();
166
167 ASSERT(pathFileName != NULL);
168
169 // Read the file and error if missing
170 StorageRead *result = storageNewReadP(storageRepo(), pathFileName);
171
172 // *read points to a location within result so update result with contents based on necessary filters
173 IoRead *read = storageReadIo(result);
174 cipherBlockFilterGroupAdd(
175 ioReadFilterGroup(read), cfgOptionStrId(cfgOptRepoCipherType), cipherModeDecrypt, cipherPass);
176 ioFilterGroupAdd(ioReadFilterGroup(read), cryptoHashNew(HASH_TYPE_SHA1_STR));
177
178 // If the file is compressed, add a decompression filter
179 if (compressTypeFromName(pathFileName) != compressTypeNone)
180 ioFilterGroupAdd(ioReadFilterGroup(read), decompressFilter(compressTypeFromName(pathFileName)));
181
182 FUNCTION_TEST_RETURN(result);
183 }
184
185 /***********************************************************************************************************************************
186 Get status of info files in the repository
187 ***********************************************************************************************************************************/
188 static VerifyInfoFile
verifyInfoFile(const String * pathFileName,bool keepFile,const String * cipherPass)189 verifyInfoFile(const String *pathFileName, bool keepFile, const String *cipherPass)
190 {
191 FUNCTION_LOG_BEGIN(logLevelDebug);
192 FUNCTION_LOG_PARAM(STRING, pathFileName); // Fully qualified path/file name
193 FUNCTION_LOG_PARAM(BOOL, keepFile); // Should the file be kept in memory?
194 FUNCTION_TEST_PARAM(STRING, cipherPass); // Password to open file if encrypted
195 FUNCTION_LOG_END();
196
197 ASSERT(pathFileName != NULL);
198
199 VerifyInfoFile result = {.errorCode = 0};
200
201 MEM_CONTEXT_TEMP_BEGIN()
202 {
203 TRY_BEGIN()
204 {
205 IoRead *infoRead = storageReadIo(verifyFileLoad(pathFileName, cipherPass));
206
207 // If directed to keep the loaded file in memory, then move the file into the result, else drain the io and close it
208 if (keepFile)
209 {
210 if (strBeginsWith(pathFileName, INFO_BACKUP_PATH_FILE_STR))
211 result.backup = infoBackupMove(infoBackupNewLoad(infoRead), memContextPrior());
212 else if (strBeginsWith(pathFileName, INFO_ARCHIVE_PATH_FILE_STR))
213 result.archive = infoArchiveMove(infoArchiveNewLoad(infoRead), memContextPrior());
214 else
215 result.manifest = manifestMove(manifestNewLoad(infoRead), memContextPrior());
216 }
217 else
218 ioReadDrain(infoRead);
219
220 MEM_CONTEXT_PRIOR_BEGIN()
221 {
222 result.checksum = strDup(varStr(ioFilterGroupResult(ioReadFilterGroup(infoRead), CRYPTO_HASH_FILTER_TYPE_STR)));
223 }
224 MEM_CONTEXT_PRIOR_END();
225 }
226 CATCH_ANY()
227 {
228 result.errorCode = errorCode();
229 String *errorMsg = strNewZ(errorMessage());
230
231 if (result.errorCode == errorTypeCode(&ChecksumError))
232 strCat(errorMsg, strNewFmt(" %s", strZ(pathFileName)));
233
234 LOG_WARN(strZ(errorMsg));
235 }
236 TRY_END();
237 }
238 MEM_CONTEXT_TEMP_END();
239
240 FUNCTION_LOG_RETURN_STRUCT(result);
241 }
242
243 /***********************************************************************************************************************************
244 Get the archive.info file
245 ***********************************************************************************************************************************/
246 static InfoArchive *
verifyArchiveInfoFile(void)247 verifyArchiveInfoFile(void)
248 {
249 FUNCTION_LOG_VOID(logLevelDebug);
250
251 InfoArchive *result = NULL;
252
253 MEM_CONTEXT_TEMP_BEGIN()
254 {
255 // Get the main info file
256 VerifyInfoFile verifyArchiveInfo = verifyInfoFile(INFO_ARCHIVE_PATH_FILE_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
257
258 // If the main file did not error, then report on the copy's status and check checksums
259 if (verifyArchiveInfo.errorCode == 0)
260 {
261 result = verifyArchiveInfo.archive;
262 infoArchiveMove(result, memContextPrior());
263
264 // Attempt to load the copy and report on it's status but don't keep it in memory
265 VerifyInfoFile verifyArchiveInfoCopy = verifyInfoFile(
266 INFO_ARCHIVE_PATH_FILE_COPY_STR, false, cfgOptionStrNull(cfgOptRepoCipherPass));
267
268 // If the copy loaded successfully, then check the checksums
269 if (verifyArchiveInfoCopy.errorCode == 0)
270 {
271 // If the info and info.copy checksums don't match each other than one (or both) of the files could be corrupt so
272 // log a warning but must trust main
273 if (!strEq(verifyArchiveInfo.checksum, verifyArchiveInfoCopy.checksum))
274 LOG_WARN("archive.info.copy does not match archive.info");
275 }
276 }
277 else
278 {
279 // Attempt to load the copy
280 VerifyInfoFile verifyArchiveInfoCopy = verifyInfoFile(
281 INFO_ARCHIVE_PATH_FILE_COPY_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
282
283 // If loaded successfully, then return the copy as usable
284 if (verifyArchiveInfoCopy.errorCode == 0)
285 {
286 result = verifyArchiveInfoCopy.archive;
287 infoArchiveMove(result, memContextPrior());
288 }
289 }
290 }
291 MEM_CONTEXT_TEMP_END();
292
293 FUNCTION_LOG_RETURN(INFO_ARCHIVE, result);
294 }
295
296 /***********************************************************************************************************************************
297 Get the backup.info file
298 ***********************************************************************************************************************************/
299 static InfoBackup *
verifyBackupInfoFile(void)300 verifyBackupInfoFile(void)
301 {
302 FUNCTION_LOG_VOID(logLevelDebug);
303
304 InfoBackup *result = NULL;
305
306 MEM_CONTEXT_TEMP_BEGIN()
307 {
308 // Get the main info file
309 VerifyInfoFile verifyBackupInfo = verifyInfoFile(INFO_BACKUP_PATH_FILE_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
310
311 // If the main file did not error, then report on the copy's status and check checksums
312 if (verifyBackupInfo.errorCode == 0)
313 {
314 result = verifyBackupInfo.backup;
315 infoBackupMove(result, memContextPrior());
316
317 // Attempt to load the copy and report on it's status but don't keep it in memory
318 VerifyInfoFile verifyBackupInfoCopy = verifyInfoFile(
319 INFO_BACKUP_PATH_FILE_COPY_STR, false, cfgOptionStrNull(cfgOptRepoCipherPass));
320
321 // If the copy loaded successfully, then check the checksums
322 if (verifyBackupInfoCopy.errorCode == 0)
323 {
324 // If the info and info.copy checksums don't match each other than one (or both) of the files could be corrupt so
325 // log a warning but must trust main
326 if (!strEq(verifyBackupInfo.checksum, verifyBackupInfoCopy.checksum))
327 LOG_WARN("backup.info.copy does not match backup.info");
328 }
329 }
330 else
331 {
332 // Attempt to load the copy
333 VerifyInfoFile verifyBackupInfoCopy = verifyInfoFile(
334 INFO_BACKUP_PATH_FILE_COPY_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
335
336 // If loaded successfully, then return the copy as usable
337 if (verifyBackupInfoCopy.errorCode == 0)
338 {
339 result = verifyBackupInfoCopy.backup;
340 infoBackupMove(result, memContextPrior());
341 }
342 }
343 }
344 MEM_CONTEXT_TEMP_END();
345
346 FUNCTION_LOG_RETURN(INFO_BACKUP, result);
347 }
348
349 /***********************************************************************************************************************************
350 Get the manifest file
351 ***********************************************************************************************************************************/
352 static Manifest *
verifyManifestFile(VerifyBackupResult * backupResult,const String * cipherPass,bool currentBackup,const InfoPg * pgHistory,unsigned int * jobErrorTotal)353 verifyManifestFile(
354 VerifyBackupResult *backupResult, const String *cipherPass, bool currentBackup, const InfoPg *pgHistory,
355 unsigned int *jobErrorTotal)
356 {
357 FUNCTION_LOG_BEGIN(logLevelDebug);
358 FUNCTION_TEST_PARAM_P(VERIFY_BACKUP_RESULT, backupResult); // The result set for the backup being processed
359 FUNCTION_TEST_PARAM(STRING, cipherPass); // Passphrase to access the manifest file
360 FUNCTION_LOG_PARAM(BOOL, currentBackup); // Is this possibly a backup currently in progress?
361 FUNCTION_TEST_PARAM(INFO_PG, pgHistory); // Database history
362 FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal); // Pointer to the overall job error total
363 FUNCTION_LOG_END();
364
365 Manifest *result = NULL;
366
367 MEM_CONTEXT_TEMP_BEGIN()
368 {
369 String *fileName = strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupResult->backupLabel));
370
371 // Get the main manifest file
372 VerifyInfoFile verifyManifestInfo = verifyInfoFile(fileName, true, cipherPass);
373
374 // If the main file did not error, then report on the copy's status and check checksums
375 if (verifyManifestInfo.errorCode == 0)
376 {
377 result = verifyManifestInfo.manifest;
378
379 // The current in-progress backup is only notional until the main file is checked because the backup may have
380 // completed by the time the main manifest is checked here. So having a main manifest file means this backup is not
381 // (or is no longer) the currentBackup.
382 currentBackup = false;
383
384 // Attempt to load the copy and report on it's status but don't keep it in memory
385 VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
386 strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), false, cipherPass);
387
388 // If the copy loaded successfully, then check the checksums
389 if (verifyManifestInfoCopy.errorCode == 0)
390 {
391 // If the manifest and manifest.copy checksums don't match each other than one (or both) of the files could be
392 // corrupt so log a warning but trust main
393 if (!strEq(verifyManifestInfo.checksum, verifyManifestInfoCopy.checksum))
394 LOG_WARN_FMT("backup '%s' manifest.copy does not match manifest", strZ(backupResult->backupLabel));
395 }
396 }
397 else
398 {
399 // If this might be an in-progress backup and the main manifest is simply missing, it is assumed the backup is an
400 // actual in-progress backup and verification is skipped, otherwise, if the main is not simply missing, or this is not
401 // an in-progress backup then attempt to load the copy.
402 if (!(currentBackup && verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError)))
403 {
404 currentBackup = false;
405
406 VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
407 strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), true, cipherPass);
408
409 // If loaded successfully, then return the copy as usable
410 if (verifyManifestInfoCopy.errorCode == 0)
411 {
412 LOG_WARN_FMT("%s/backup.manifest is missing or unusable, using copy", strZ(backupResult->backupLabel));
413
414 result = verifyManifestInfoCopy.manifest;
415 }
416 else if (verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError) &&
417 verifyManifestInfoCopy.errorCode == errorTypeCode(&FileMissingError))
418 {
419 backupResult->status = backupMissingManifest;
420
421 LOG_WARN_FMT("manifest missing for '%s' - backup may have expired", strZ(backupResult->backupLabel));
422 }
423 }
424 else
425 {
426 backupResult->status = backupInProgress;
427
428 LOG_INFO_FMT("backup '%s' appears to be in progress, skipping", strZ(backupResult->backupLabel));
429 }
430 }
431
432 // If found a usable manifest then check that the database it was based on is in the history
433 if (result != NULL)
434 {
435 bool found = false;
436 const ManifestData *manData = manifestData(result);
437
438 // Confirm the PG database information from the manifest is in the history list
439 for (unsigned int infoPgIdx = 0; infoPgIdx < infoPgDataTotal(pgHistory); infoPgIdx++)
440 {
441 InfoPgData pgHistoryData = infoPgData(pgHistory, infoPgIdx);
442
443 if (pgHistoryData.id == manData->pgId && pgHistoryData.systemId == manData->pgSystemId &&
444 pgHistoryData.version == manData->pgVersion)
445 {
446 found = true;
447 break;
448 }
449 }
450
451 // If the PG data is not found in the backup.info history, then error and reset the result
452 if (!found)
453 {
454 LOG_ERROR_FMT(
455 errorTypeCode(&FileInvalidError),
456 "'%s' may not be recoverable - PG data (id %u, version %s, system-id %" PRIu64 ") is not in the backup.info"
457 " history, skipping",
458 strZ(backupResult->backupLabel), manData->pgId, strZ(pgVersionToStr(manData->pgVersion)), manData->pgSystemId);
459
460 manifestFree(result);
461 result = NULL;
462 }
463 else
464 manifestMove(result, memContextPrior());
465 }
466
467 // If the result is NULL and the backup status has not yet been set, then the backup is unusable (invalid)
468 if (result == NULL && backupResult->status == backupValid)
469 {
470 backupResult->status = backupInvalid;
471 (*jobErrorTotal)++;
472 }
473 }
474 MEM_CONTEXT_TEMP_END();
475
476 FUNCTION_LOG_RETURN(MANIFEST, result);
477 }
478
479 /***********************************************************************************************************************************
480 Check the history in the info files
481 ***********************************************************************************************************************************/
482 void
verifyPgHistory(const InfoPg * archiveInfoPg,const InfoPg * backupInfoPg)483 verifyPgHistory(const InfoPg *archiveInfoPg, const InfoPg *backupInfoPg)
484 {
485 FUNCTION_TEST_BEGIN();
486 FUNCTION_TEST_PARAM(INFO_PG, archiveInfoPg); // Postgres information from the archive.info file
487 FUNCTION_TEST_PARAM(INFO_PG, backupInfoPg); // Postgres information from the backup.info file
488 FUNCTION_TEST_END();
489
490 MEM_CONTEXT_TEMP_BEGIN()
491 {
492 // Check archive.info and backup.info current PG data matches. If there is a mismatch, verify cannot continue since
493 // the database is not considered accessible during the verify command so no way to tell which would be valid.
494 InfoPgData archiveInfoPgData = infoPgData(archiveInfoPg, infoPgDataCurrentId(archiveInfoPg));
495 InfoPgData backupInfoPgData = infoPgData(backupInfoPg, infoPgDataCurrentId(backupInfoPg));
496 checkStanzaInfo(&archiveInfoPgData, &backupInfoPgData);
497
498 unsigned int archiveInfoHistoryTotal = infoPgDataTotal(archiveInfoPg);
499 unsigned int backupInfoHistoryTotal = infoPgDataTotal(backupInfoPg);
500
501 String *errMsg = strNewZ("archive and backup history lists do not match");
502
503 if (archiveInfoHistoryTotal != backupInfoHistoryTotal)
504 THROW(FormatError, strZ(errMsg));
505
506 // Confirm the lists are the same
507 for (unsigned int infoPgIdx = 0; infoPgIdx < archiveInfoHistoryTotal; infoPgIdx++)
508 {
509 InfoPgData archiveInfoPgHistory = infoPgData(archiveInfoPg, infoPgIdx);
510 InfoPgData backupInfoPgHistory = infoPgData(backupInfoPg, infoPgIdx);
511
512 if (archiveInfoPgHistory.id != backupInfoPgHistory.id ||
513 archiveInfoPgHistory.systemId != backupInfoPgHistory.systemId ||
514 archiveInfoPgHistory.version != backupInfoPgHistory.version)
515 {
516 THROW(FormatError, strZ(errMsg));
517 }
518 }
519 }
520 MEM_CONTEXT_TEMP_END();
521
522 FUNCTION_TEST_RETURN_VOID();
523 }
524
525 /***********************************************************************************************************************************
526 Populate the WAL ranges from the provided, sorted, WAL files list for a given archiveId
527 ***********************************************************************************************************************************/
528 static void
verifyCreateArchiveIdRange(VerifyArchiveResult * archiveIdResult,StringList * walFileList,unsigned int * jobErrorTotal)529 verifyCreateArchiveIdRange(VerifyArchiveResult *archiveIdResult, StringList *walFileList, unsigned int *jobErrorTotal)
530 {
531 FUNCTION_TEST_BEGIN();
532 FUNCTION_TEST_PARAM_P(VERIFY_ARCHIVE_RESULT, archiveIdResult); // The result set for the archive Id being processed
533 FUNCTION_TEST_PARAM(STRING_LIST, walFileList); // Sorted (ascending) list of WAL files in a timeline
534 FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal); // Pointer to the overall job error total
535 FUNCTION_TEST_END();
536
537 ASSERT(archiveIdResult != NULL);
538 ASSERT(walFileList != NULL);
539
540 unsigned int walFileIdx = 0;
541
542 // Initialize the WAL range
543 VerifyWalRange *walRange = NULL;
544
545 // If there is a WAL range for this archiveID, get the last one. If there is no timeline change then continue updating the last
546 // WAL range.
547 if (!lstEmpty(archiveIdResult->walRangeList) &&
548 strEq(
549 strSubN(((VerifyWalRange *)lstGetLast(archiveIdResult->walRangeList))->stop, 0, 8),
550 strSubN(strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE), 0, 8)))
551 {
552 walRange = lstGetLast(archiveIdResult->walRangeList);
553 }
554
555 do
556 {
557 String *walSegment = strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE);
558
559 // If walSegment found ends in FF for PG versions 9.2 or less then skip it but log error because it should not exist and
560 // PostgreSQL will ignore it
561 if (archiveIdResult->pgWalInfo.version <= PG_VERSION_92 && strEndsWithZ(walSegment, "FF"))
562 {
563 LOG_ERROR_FMT(
564 errorTypeCode(&FileInvalidError), "invalid WAL '%s' for '%s' exists, skipping", strZ(walSegment),
565 strZ(archiveIdResult->archiveId));
566
567 (*jobErrorTotal)++;
568
569 // Remove the file from the original list so no attempt is made to verify it
570 strLstRemoveIdx(walFileList, walFileIdx);
571 continue;
572 }
573
574 // The lists are sorted so look ahead to see if this is a duplicate of the next one in the list
575 if (walFileIdx + 1 < strLstSize(walFileList))
576 {
577 if (strEq(walSegment, strSubN(strLstGet(walFileList, walFileIdx + 1), 0, WAL_SEGMENT_NAME_SIZE)))
578 {
579 LOG_ERROR_FMT(
580 errorTypeCode(&FileInvalidError), "duplicate WAL '%s' for '%s' exists, skipping", strZ(walSegment),
581 strZ(archiveIdResult->archiveId));
582
583 (*jobErrorTotal)++;
584
585 bool foundDup = true;
586
587 // Remove all duplicates of this WAL, including this WAL, from the list
588 while (walFileIdx < strLstSize(walFileList) && foundDup)
589 {
590 if (strEq(walSegment, strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE)))
591 strLstRemoveIdx(walFileList, walFileIdx);
592 else
593 foundDup = false;
594 }
595
596 continue;
597 }
598 }
599
600 // Initialize the range if it has not yet been initialized and continue to next
601 if (walRange == NULL ||
602 !strEq(
603 walSegmentNext(walRange->stop, (size_t)archiveIdResult->pgWalInfo.size, archiveIdResult->pgWalInfo.version),
604 walSegment))
605 {
606 // Add the initialized wal range to the range list
607 MEM_CONTEXT_BEGIN(lstMemContext(archiveIdResult->walRangeList))
608 {
609 VerifyWalRange walRangeNew =
610 {
611 .start = strDup(walSegment),
612 .stop = strDup(walSegment),
613 .invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
614 };
615
616 lstAdd(archiveIdResult->walRangeList, &walRangeNew);
617 }
618 MEM_CONTEXT_END();
619
620 // Set the current wal range being processed to what was just added
621 walRange = lstGetLast(archiveIdResult->walRangeList);
622 }
623 // If the next WAL is the appropriate distance away, then there is no gap
624 else
625 {
626 MEM_CONTEXT_BEGIN(lstMemContext(archiveIdResult->walRangeList))
627 {
628 strFree(walRange->stop);
629 walRange->stop = strDup(walSegment);
630 }
631 MEM_CONTEXT_END();
632 }
633
634 walFileIdx++;
635 }
636 while (walFileIdx < strLstSize(walFileList));
637
638 FUNCTION_TEST_RETURN_VOID();
639 }
640
641 /***********************************************************************************************************************************
642 Return verify jobs for the archive
643 ***********************************************************************************************************************************/
644 static ProtocolParallelJob *
verifyArchive(void * data)645 verifyArchive(void *data)
646 {
647 FUNCTION_TEST_BEGIN();
648 FUNCTION_TEST_PARAM_P(VOID, data); // Pointer to the job data
649 FUNCTION_TEST_END();
650
651 ProtocolParallelJob *result = NULL;
652
653 VerifyJobData *jobData = data;
654
655 // Process archive files, if any
656 while (!strLstEmpty(jobData->archiveIdList))
657 {
658 result = NULL;
659
660 // Add archiveId to the result list if the list is empty or the last processed is not equal to the current archiveId
661 if (lstEmpty(jobData->archiveIdResultList) ||
662 !strEq(
663 ((VerifyArchiveResult *)lstGetLast(jobData->archiveIdResultList))->archiveId, strLstGet(jobData->archiveIdList, 0)))
664 {
665 const String *archiveId = strLstGet(jobData->archiveIdList, 0);
666
667 MEM_CONTEXT_BEGIN(lstMemContext(jobData->archiveIdResultList))
668 {
669 VerifyArchiveResult archiveIdResult =
670 {
671 .archiveId = strDup(archiveId),
672 .walRangeList = lstNewP(sizeof(VerifyWalRange), .comparator = lstComparatorStr),
673 };
674
675 lstAdd(jobData->archiveIdResultList, &archiveIdResult);
676 }
677 MEM_CONTEXT_END();
678
679 // Free the old WAL path list
680 strLstFree(jobData->walPathList);
681
682 // Get the WAL paths for the archive Id
683 const String *archiveIdPath = strNewFmt(STORAGE_REPO_ARCHIVE "/%s", strZ(archiveId));
684
685 MEM_CONTEXT_BEGIN(jobData->memContext)
686 {
687 jobData->walPathList = strLstSort(
688 storageListP(storageRepo(), archiveIdPath, .expression = WAL_SEGMENT_DIR_REGEXP_STR), sortOrderAsc);
689 }
690 MEM_CONTEXT_END();
691 }
692
693 // If there are WAL paths then get the file lists
694 if (!strLstEmpty(jobData->walPathList))
695 {
696 // Get the archive id info for the current (last) archive id being processed
697 VerifyArchiveResult *archiveResult = lstGetLast(jobData->archiveIdResultList);
698
699 do
700 {
701 String *walPath = strLstGet(jobData->walPathList, 0);
702
703 // Get the WAL files for the first item in the WAL paths list and initialize WAL info and ranges
704 if (strLstEmpty(jobData->walFileList))
705 {
706 // Free the old WAL file list
707 strLstFree(jobData->walFileList);
708
709 // Get WAL file list
710 const String *walFilePath = strNewFmt(
711 STORAGE_REPO_ARCHIVE "/%s/%s", strZ(archiveResult->archiveId), strZ(walPath));
712
713 MEM_CONTEXT_BEGIN(jobData->memContext)
714 {
715 jobData->walFileList = strLstSort(
716 storageListP(storageRepo(), walFilePath, .expression = WAL_SEGMENT_FILE_REGEXP_STR), sortOrderAsc);
717 }
718 MEM_CONTEXT_END();
719
720 if (!strLstEmpty(jobData->walFileList))
721 {
722 if (archiveResult->pgWalInfo.size == 0)
723 {
724 // Initialize the WAL segment size from the first WAL
725 StorageRead *walRead = verifyFileLoad(
726 strNewFmt(
727 STORAGE_REPO_ARCHIVE "/%s/%s/%s", strZ(archiveResult->archiveId), strZ(walPath),
728 strZ(strLstGet(jobData->walFileList, 0))),
729 jobData->walCipherPass);
730
731 PgWal walInfo = pgWalFromBuffer(storageGetP(walRead, .exactSize = PG_WAL_HEADER_SIZE));
732
733 archiveResult->pgWalInfo.size = walInfo.size;
734 archiveResult->pgWalInfo.version = walInfo.version;
735 }
736
737 // Add total number of WAL files in the directory to the total WAL - this number will include duplicates,
738 // if any, that will be filtered out and not checked but will be reported as errors in the log
739 archiveResult->totalWalFile += strLstSize(jobData->walFileList);
740
741 verifyCreateArchiveIdRange(archiveResult, jobData->walFileList, &jobData->jobErrorTotal);
742 }
743 }
744
745 // If there are WAL files, then verify them
746 if (!strLstEmpty(jobData->walFileList))
747 {
748 // Get the fully qualified file name and checksum
749 const String *fileName = strLstGet(jobData->walFileList, 0);
750 const String *filePathName = strNewFmt(
751 STORAGE_REPO_ARCHIVE "/%s/%s/%s", strZ(archiveResult->archiveId), strZ(walPath), strZ(fileName));
752 String *checksum = strSubN(fileName, WAL_SEGMENT_NAME_SIZE + 1, HASH_TYPE_SHA1_SIZE_HEX);
753
754 // Set up the job
755 ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_VERIFY_FILE);
756 PackWrite *const param = protocolCommandParam(command);
757
758 pckWriteStrP(param, filePathName);
759 pckWriteStrP(param, checksum);
760 pckWriteU64P(param, archiveResult->pgWalInfo.size);
761 pckWriteStrP(param, jobData->walCipherPass);
762
763 // Assign job to result, prepending the archiveId to the key for consistency with backup processing
764 result = protocolParallelJobNew(
765 VARSTR(strNewFmt("%s/%s", strZ(archiveResult->archiveId), strZ(filePathName))), command);
766
767 // Remove the file to process from the list
768 strLstRemoveIdx(jobData->walFileList, 0);
769
770 // If this is the last file to process for this timeline, then remove the path
771 if (strLstEmpty(jobData->walFileList))
772 strLstRemoveIdx(jobData->walPathList, 0);
773 }
774 else
775 {
776 // No valid WAL to process (may be only duplicates or nothing in WAL path) - remove the WAL path from the list
777 LOG_WARN_FMT(
778 "path '%s/%s' does not contain any valid WAL to be processed", strZ(archiveResult->archiveId),
779 strZ(walPath));
780 strLstRemoveIdx(jobData->walPathList, 0);
781 }
782
783 // If a job was found to be processed then break out to process it
784 if (result != NULL)
785 break;
786 }
787 while (!strLstEmpty(jobData->walPathList));
788
789 // If this is the last timeline to process for this archiveId, then remove the archiveId
790 if (strLstEmpty(jobData->walPathList))
791 strLstRemoveIdx(jobData->archiveIdList, 0);
792
793 // If a file was sent to be processed then break so can process it
794 if (result != NULL)
795 break;
796 }
797 else
798 {
799 // Log that no WAL paths exist in the archive Id dir - remove the archive Id from the list (nothing to process)
800 LOG_WARN_FMT("archive path '%s' is empty", strZ(strLstGet(jobData->archiveIdList, 0)));
801 strLstRemoveIdx(jobData->archiveIdList, 0);
802 }
803 }
804
805 FUNCTION_TEST_RETURN(result);
806 }
807
808 /***********************************************************************************************************************************
809 Verify the job data backups
810 ***********************************************************************************************************************************/
811 static ProtocolParallelJob *
verifyBackup(void * data)812 verifyBackup(void *data)
813 {
814 FUNCTION_TEST_BEGIN();
815 FUNCTION_TEST_PARAM_P(VOID, data);
816 FUNCTION_TEST_END();
817
818 ProtocolParallelJob *result = NULL;
819
820 VerifyJobData *jobData = data;
821
822 // Process backup files, if any
823 while (!strLstEmpty(jobData->backupList))
824 {
825 result = NULL;
826
827 // If result list is empty or the last processed is not equal to the backup being processed, then initialize the backup
828 // data and results
829 if (lstEmpty(jobData->backupResultList) ||
830 !strEq(((VerifyBackupResult *)lstGetLast(jobData->backupResultList))->backupLabel, strLstGet(jobData->backupList, 0)))
831 {
832 MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
833 {
834 VerifyBackupResult backupResultNew =
835 {
836 .backupLabel = strDup(strLstGet(jobData->backupList, 0)),
837 .invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
838 };
839
840 // Add the backup to the result list
841 lstAdd(jobData->backupResultList, &backupResultNew);
842 }
843 MEM_CONTEXT_END();
844
845 // Get the result just added so it can be updated directly
846 VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
847
848 // If currentBackup is set (meaning the newest backup label on disk was not in the db:current section when the
849 // backup.info file was read) and this is the same label, then set inProgessBackup to true, else false.
850 // inProgressBackup may be changed in verifyManifestFile if a main backup.manifest exists since that would indicate the
851 // backup completed during the verify process.
852 bool inProgressBackup = strEq(jobData->currentBackup, backupResult->backupLabel);
853
854 // Get a usable backup manifest file
855 Manifest *manifest = verifyManifestFile(
856 backupResult, jobData->manifestCipherPass, inProgressBackup, jobData->pgHistory, &jobData->jobErrorTotal);
857
858 // If a usable backup.manifest file is not found
859 if (manifest == NULL)
860 {
861 // Remove this backup from the processing list
862 strLstRemoveIdx(jobData->backupList, 0);
863
864 // No files to process so continue to the next backup in the list
865 continue;
866 }
867 // Initialize the backup results and manifest for processing
868 else
869 {
870 // Move the manifest to the jobData for processing
871 jobData->manifest = manifestMove(manifest, jobData->memContext);
872
873 // Initialize the jobData
874 MEM_CONTEXT_BEGIN(jobData->memContext)
875 {
876 // Get the cipher subpass used to decrypt files in the backup and initialize the file list index
877 jobData->backupCipherPass = strDup(manifestCipherSubPass(jobData->manifest));
878 jobData->manifestFileIdx = 0;
879 }
880 MEM_CONTEXT_END();
881
882 const ManifestData *manData = manifestData(jobData->manifest);
883
884 MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
885 {
886 backupResult->totalFileManifest = manifestFileTotal(jobData->manifest);
887 backupResult->backupPrior = strDup(manData->backupLabelPrior);
888 backupResult->pgId = manData->pgId;
889 backupResult->pgVersion = manData->pgVersion;
890 backupResult->archiveStart = strDup(manData->archiveStart);
891 backupResult->archiveStop = strDup(manData->archiveStop);
892 }
893 MEM_CONTEXT_END();
894 }
895 }
896
897 VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
898
899 // Process any files in the manifest
900 if (jobData->manifestFileIdx < manifestFileTotal(jobData->manifest))
901 {
902 do
903 {
904 const ManifestFile *fileData = manifestFile(jobData->manifest, jobData->manifestFileIdx);
905
906 String *filePathName = NULL;
907
908 // Track the files verified in order to determine when the processing of the backup is complete
909 backupResult->totalFileVerify++;
910
911 // Check if the file is referenced in a prior backup
912 if (fileData->reference != NULL)
913 {
914 // If the prior backup is not in the result list, then that backup was never processed (likely due to the --set
915 // option) so verify the file
916 unsigned int backupPriorIdx = lstFindIdx(jobData->backupResultList, &fileData->reference);
917
918 if (backupPriorIdx == LIST_NOT_FOUND)
919 {
920 filePathName = strNewFmt(
921 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
922 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
923 }
924 // Else the backup this file references has a result so check the processing state for the referenced backup
925 else
926 {
927 VerifyBackupResult *backupResultPrior = lstGet(jobData->backupResultList, backupPriorIdx);
928
929 // If the verify-state of the backup is not complete then verify the file
930 if (!backupResultPrior->fileVerifyComplete)
931 {
932 filePathName = strNewFmt(
933 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
934 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
935 }
936 // Else skip verification
937 else
938 {
939 String *priorFile = strNewFmt(
940 "%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
941 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
942
943 unsigned int backupPriorInvalidIdx = lstFindIdx(backupResultPrior->invalidFileList, &priorFile);
944
945 // If the file is in the invalid file list of the prior backup where it is referenced then add the file
946 // as invalid to this backup result and set the backup result status; since already logged an error on
947 // this file, don't log again
948 if (backupPriorInvalidIdx != LIST_NOT_FOUND)
949 {
950 VerifyInvalidFile *invalidFile = lstGet(
951 backupResultPrior->invalidFileList, backupPriorInvalidIdx);
952 verifyInvalidFileAdd(backupResult->invalidFileList, invalidFile->reason, invalidFile->fileName);
953 backupResult->status = backupInvalid;
954 }
955 // Else the file in the prior backup was valid so increment the total valid files for this backup
956 else
957 {
958 backupResult->totalFileValid++;
959 }
960 }
961 }
962 }
963 // Else file is not referenced in a prior backup
964 else
965 {
966 filePathName = strNewFmt(
967 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(backupResult->backupLabel), strZ(fileData->name),
968 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
969 }
970
971 // If constructed file name is not null then send it off for processing
972 if (filePathName != NULL)
973 {
974 // Set up the job
975 ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_VERIFY_FILE);
976 PackWrite *const param = protocolCommandParam(command);
977
978 pckWriteStrP(param, filePathName);
979 // If the checksum is not present in the manifest, it will be calculated by manifest load
980 pckWriteStrP(param, STR(fileData->checksumSha1));
981 pckWriteU64P(param, fileData->size);
982 pckWriteStrP(param, jobData->backupCipherPass);
983
984 // Assign job to result (prepend backup label being processed to the key since some files are in a prior backup)
985 result = protocolParallelJobNew(
986 VARSTR(strNewFmt("%s/%s", strZ(backupResult->backupLabel), strZ(filePathName))), command);
987 }
988
989 // Increment the index to point to the next file
990 jobData->manifestFileIdx++;
991
992 // If this was the last file to process for this backup, then free the manifest and remove this backup from the
993 // processing list
994 if (jobData->manifestFileIdx == backupResult->totalFileManifest)
995 {
996 manifestFree(jobData->manifest);
997 jobData->manifest = NULL;
998 strLstRemoveIdx(jobData->backupList, 0);
999 }
1000
1001 // If a job was found to be processed then break out to process it
1002 if (result != NULL)
1003 break;
1004 }
1005 while (jobData->manifestFileIdx < backupResult->totalFileManifest);
1006 }
1007 else
1008 {
1009 // Nothing to process so report an error, free the manifest, set the status, and remove the backup from processing list
1010 LOG_ERROR_FMT(
1011 errorTypeCode(&FileInvalidError), "backup '%s' manifest does not contain any target files to verify",
1012 strZ(backupResult->backupLabel));
1013
1014 jobData->jobErrorTotal++;
1015
1016 manifestFree(jobData->manifest);
1017 jobData->manifest = NULL;
1018
1019 backupResult->status = backupInvalid;
1020
1021 strLstRemoveIdx(jobData->backupList, 0);
1022 }
1023
1024 // If a job was found to be processed then break out to process it
1025 if (result != NULL)
1026 break;
1027 }
1028
1029 FUNCTION_TEST_RETURN(result);
1030 }
1031
1032 /***********************************************************************************************************************************
1033 Process the job data
1034 ***********************************************************************************************************************************/
1035 static ProtocolParallelJob *
verifyJobCallback(void * data,unsigned int clientIdx)1036 verifyJobCallback(void *data, unsigned int clientIdx)
1037 {
1038 FUNCTION_TEST_BEGIN();
1039 FUNCTION_TEST_PARAM_P(VOID, data); // Pointer to the job data
1040 (void)clientIdx; // Client index (not used for this process)
1041 FUNCTION_TEST_END();
1042
1043 ASSERT(data != NULL);
1044
1045 // Initialize the result
1046 ProtocolParallelJob *result = NULL;
1047
1048 MEM_CONTEXT_TEMP_BEGIN()
1049 {
1050 VerifyJobData *jobData = data;
1051
1052 if (!jobData->backupProcessing)
1053 {
1054 result = protocolParallelJobMove(verifyArchive(data), memContextPrior());
1055
1056 // Set the backupProcessing flag if the archive processing is finished so backup processing can begin immediately after
1057 jobData->backupProcessing = strLstEmpty(jobData->archiveIdList);
1058 }
1059
1060 if (jobData->backupProcessing)
1061 {
1062 // Only begin backup verification if the last archive result was processed
1063 if (result == NULL)
1064 result = protocolParallelJobMove(verifyBackup(data), memContextPrior());
1065 }
1066 }
1067 MEM_CONTEXT_TEMP_END();
1068
1069 FUNCTION_TEST_RETURN(result);
1070 }
1071
1072 /***********************************************************************************************************************************
1073 Helper function for returning a string corresponding to the result code
1074 ***********************************************************************************************************************************/
1075 static String *
verifyErrorMsg(VerifyResult verifyResult)1076 verifyErrorMsg(VerifyResult verifyResult)
1077 {
1078 FUNCTION_TEST_BEGIN();
1079 FUNCTION_TEST_PARAM(ENUM, verifyResult); // Result code from the verifyFile() function
1080 FUNCTION_TEST_END();
1081
1082 String *result = strNew();
1083
1084 if (verifyResult == verifyFileMissing)
1085 result = strCatZ(result, "file missing");
1086 else if (verifyResult == verifyChecksumMismatch)
1087 result = strCatZ(result, "invalid checksum");
1088 else if (verifyResult == verifySizeInvalid)
1089 result = strCatZ(result, "invalid size");
1090 else
1091 result = strCatZ(result, "invalid result");
1092
1093 FUNCTION_TEST_RETURN(result);
1094 }
1095
1096 /***********************************************************************************************************************************
1097 Helper function to output a log message based on job result that is not verifyOk and return an error count
1098 ***********************************************************************************************************************************/
1099 static unsigned int
verifyLogInvalidResult(const String * fileType,VerifyResult verifyResult,unsigned int processId,const String * filePathName)1100 verifyLogInvalidResult(const String *fileType, VerifyResult verifyResult, unsigned int processId, const String *filePathName)
1101 {
1102 FUNCTION_TEST_BEGIN();
1103 FUNCTION_TEST_PARAM(STRING, fileType); // Indicates archive or backup file
1104 FUNCTION_TEST_PARAM(ENUM, verifyResult); // Result code from the verifyFile() function
1105 FUNCTION_TEST_PARAM(UINT, processId); // Process Id reporting the result
1106 FUNCTION_TEST_PARAM(STRING, filePathName); // File for which results are being reported
1107 FUNCTION_TEST_END();
1108
1109 ASSERT(fileType != NULL);
1110 ASSERT(filePathName != NULL);
1111
1112 // Log a warning because the WAL may have gone missing if expire came through and removed it
1113 // legitimately so it is not necessarily an error so the jobErrorTotal should not be incremented
1114 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR) && verifyResult == verifyFileMissing)
1115 {
1116 LOG_WARN_PID_FMT(processId, "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName));
1117 FUNCTION_TEST_RETURN(0);
1118 }
1119 else
1120 {
1121 LOG_ERROR_PID_FMT(
1122 processId, errorTypeCode(&FileInvalidError), "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName));
1123 FUNCTION_TEST_RETURN(1);
1124 }
1125 }
1126
1127 /***********************************************************************************************************************************
1128 Helper function to set the currently processing backup label, if any, and check that the archiveIds are in the db history
1129 ***********************************************************************************************************************************/
1130 static String *
verifySetBackupCheckArchive(const StringList * backupList,const InfoBackup * backupInfo,const StringList * archiveIdList,const InfoPg * pgHistory,unsigned int * jobErrorTotal)1131 verifySetBackupCheckArchive(
1132 const StringList *backupList, const InfoBackup *backupInfo, const StringList *archiveIdList, const InfoPg *pgHistory,
1133 unsigned int *jobErrorTotal)
1134 {
1135 FUNCTION_TEST_BEGIN();
1136 FUNCTION_TEST_PARAM(STRING_LIST, backupList); // List of backup labels in the backup directory
1137 FUNCTION_TEST_PARAM(INFO_BACKUP, backupInfo); // Contents of the backup.info file
1138 FUNCTION_TEST_PARAM(STRING_LIST, archiveIdList); // List of archiveIds in the archive directory
1139 FUNCTION_TEST_PARAM(INFO_PG, pgHistory); // Pointer to InfoPg of archive.info for accessing PG history
1140 FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal); // Pointer to overall job error total
1141 FUNCTION_TEST_END();
1142
1143 String *result = NULL;
1144
1145 MEM_CONTEXT_TEMP_BEGIN()
1146 {
1147 // If there are backups, set the last backup as current if it is not in backup.info - if it is, then it is complete, else
1148 // it will be checked later
1149 if (!strLstEmpty(backupList))
1150 {
1151 // Get the last backup as current if it is not in backup.info current list
1152 String *backupLabel = strLstGet(backupList, strLstSize(backupList) - 1);
1153
1154 if (infoBackupDataByLabel(backupInfo, backupLabel) == NULL)
1155 {
1156 // Duplicate the string into the prior context
1157 MEM_CONTEXT_PRIOR_BEGIN()
1158 {
1159 result = strDup(backupLabel);
1160 }
1161 MEM_CONTEXT_PRIOR_END();
1162 }
1163 }
1164
1165 // If there are archive directories on disk, make sure they are in the database history list
1166 if (!strLstEmpty(archiveIdList))
1167 {
1168 StringList *archiveIdHistoryList = strLstNew();
1169
1170 for (unsigned int histIdx = 0; histIdx < infoPgDataTotal(pgHistory); histIdx++)
1171 strLstAdd(archiveIdHistoryList, infoPgArchiveId(pgHistory, histIdx));
1172
1173 // Sort the history list
1174 strLstSort(strLstComparatorSet(archiveIdHistoryList, archiveIdComparator), sortOrderAsc);
1175
1176 String *missingFromHistory = strNew();
1177
1178 // Check if the archiveId on disk exists in the archive.info history list and report it if not
1179 for (unsigned int archiveIdx = 0; archiveIdx < strLstSize(archiveIdList); archiveIdx++)
1180 {
1181 String *archiveId = strLstGet(archiveIdList, archiveIdx);
1182
1183 if (!strLstExists(archiveIdHistoryList, archiveId))
1184 strCat(missingFromHistory, (strEmpty(missingFromHistory) ? archiveId : strNewFmt(", %s", strZ(archiveId))));
1185 }
1186
1187 if (!strEmpty(missingFromHistory))
1188 {
1189 LOG_ERROR_FMT(
1190 errorTypeCode(&ArchiveMismatchError), "archiveIds '%s' are not in the archive.info history list",
1191 strZ(missingFromHistory));
1192
1193 (*jobErrorTotal)++;
1194 }
1195 }
1196 }
1197 MEM_CONTEXT_TEMP_END();
1198
1199 FUNCTION_TEST_RETURN(result);
1200 }
1201
1202 /***********************************************************************************************************************************
1203 Add the file to the invalid file list for the range in which it exists
1204 ***********************************************************************************************************************************/
1205 static void
verifyAddInvalidWalFile(List * walRangeList,VerifyResult fileResult,const String * fileName,const String * walSegment)1206 verifyAddInvalidWalFile(List *walRangeList, VerifyResult fileResult, const String *fileName, const String *walSegment)
1207 {
1208 FUNCTION_TEST_BEGIN();
1209 FUNCTION_TEST_PARAM(LIST, walRangeList); // List of WAL ranges for an archive Id
1210 FUNCTION_TEST_PARAM(UINT, fileResult); // Result of verifyFile()
1211 FUNCTION_TEST_PARAM(STRING, fileName); // File name (without the REPO prefix)
1212 FUNCTION_TEST_PARAM(STRING, walSegment); // WAL segment, i.e. 000000010000000000000005
1213 FUNCTION_TEST_END();
1214
1215 ASSERT(walRangeList != NULL);
1216 ASSERT(fileName != NULL);
1217 ASSERT(walSegment != NULL);
1218
1219 MEM_CONTEXT_TEMP_BEGIN()
1220 {
1221 for (unsigned int walIdx = 0; walIdx < lstSize(walRangeList); walIdx++)
1222 {
1223 VerifyWalRange *walRange = lstGet(walRangeList, walIdx);
1224
1225 // If the WAL segment is less/equal to the stop file then it falls in this range since ranges are sorted by stop file in
1226 // ascending order, therefore first one found is the range
1227 if (strCmp(walRange->stop, walSegment) >= 0)
1228 {
1229 // Add the file to the range where it was found and exit the loop
1230 verifyInvalidFileAdd(walRange->invalidFileList, fileResult, fileName);
1231 break;
1232 }
1233 }
1234 }
1235 MEM_CONTEXT_TEMP_END();
1236
1237 FUNCTION_TEST_RETURN_VOID();
1238 }
1239
1240 /***********************************************************************************************************************************
1241 Render the results of the verify command
1242 ***********************************************************************************************************************************/
1243 static String *
verifyRender(List * archiveIdResultList,List * backupResultList)1244 verifyRender(List *archiveIdResultList, List *backupResultList)
1245 {
1246 FUNCTION_TEST_BEGIN();
1247 FUNCTION_TEST_PARAM(LIST, archiveIdResultList); // Result list for all archive Ids in the repo
1248 FUNCTION_TEST_PARAM(LIST, backupResultList); // Result list for all backups in the repo
1249 FUNCTION_TEST_END();
1250
1251 ASSERT(archiveIdResultList != NULL);
1252 ASSERT(backupResultList != NULL);
1253
1254 String *result = strNewZ("Results:");
1255
1256 // Render archive results
1257 if (lstEmpty(archiveIdResultList))
1258 strCatZ(result, "\n archiveId: none found");
1259 else
1260 {
1261 for (unsigned int archiveIdx = 0; archiveIdx < lstSize(archiveIdResultList); archiveIdx++)
1262 {
1263 VerifyArchiveResult *archiveIdResult = lstGet(archiveIdResultList, archiveIdx);
1264 strCatFmt(
1265 result, "\n archiveId: %s, total WAL checked: %u, total valid WAL: %u", strZ(archiveIdResult->archiveId),
1266 archiveIdResult->totalWalFile, archiveIdResult->totalValidWal);
1267
1268 if (archiveIdResult->totalWalFile > 0)
1269 {
1270 unsigned int errMissing = 0;
1271 unsigned int errChecksum = 0;
1272 unsigned int errSize = 0;
1273 unsigned int errOther = 0;
1274
1275 for (unsigned int walIdx = 0; walIdx < lstSize(archiveIdResult->walRangeList); walIdx++)
1276 {
1277 VerifyWalRange *walRange = lstGet(archiveIdResult->walRangeList, walIdx);
1278
1279 LOG_DETAIL_FMT(
1280 "archiveId: %s, wal start: %s, wal stop: %s", strZ(archiveIdResult->archiveId), strZ(walRange->start),
1281 strZ(walRange->stop));
1282
1283 unsigned int invalidIdx = 0;
1284
1285 while (invalidIdx < lstSize(walRange->invalidFileList))
1286 {
1287 VerifyInvalidFile *invalidFile = lstGet(walRange->invalidFileList, invalidIdx);
1288
1289 if (invalidFile->reason == verifyFileMissing)
1290 errMissing++;
1291 else if (invalidFile->reason == verifyChecksumMismatch)
1292 errChecksum++;
1293 else if (invalidFile->reason == verifySizeInvalid)
1294 errSize++;
1295 else
1296 errOther++;
1297
1298 invalidIdx++;
1299 }
1300 }
1301
1302 strCatFmt(
1303 result, "\n missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
1304 errSize, errOther);
1305 }
1306 }
1307 }
1308
1309 // Render backup results
1310 if (lstEmpty(backupResultList))
1311 strCatZ(result, "\n backup: none found");
1312 else
1313 {
1314 for (unsigned int backupIdx = 0; backupIdx < lstSize(backupResultList); backupIdx++)
1315 {
1316 VerifyBackupResult *backupResult = lstGet(backupResultList, backupIdx);
1317 String *status = NULL;
1318
1319 switch (backupResult->status)
1320 {
1321 case backupValid:
1322 {
1323 status = strNewZ("valid");
1324 break;
1325 }
1326
1327 case backupInvalid:
1328 {
1329 status = strNewZ("invalid");
1330 break;
1331 }
1332
1333 case backupMissingManifest:
1334 {
1335 status = strNewZ("manifest missing");
1336 break;
1337 }
1338
1339 case backupInProgress:
1340 {
1341 status = strNewZ("in-progress");
1342 break;
1343 }
1344 }
1345
1346 strCatFmt(
1347 result, "\n backup: %s, status: %s, total files checked: %u, total valid files: %u",
1348 strZ(backupResult->backupLabel), strZ(status), backupResult->totalFileVerify, backupResult->totalFileValid);
1349
1350 if (backupResult->totalFileVerify > 0)
1351 {
1352 unsigned int errMissing = 0;
1353 unsigned int errChecksum = 0;
1354 unsigned int errSize = 0;
1355 unsigned int errOther = 0;
1356
1357 for (unsigned int invalidIdx = 0; invalidIdx < lstSize(backupResult->invalidFileList); invalidIdx++)
1358 {
1359 VerifyInvalidFile *invalidFile = lstGet(backupResult->invalidFileList, invalidIdx);
1360
1361 if (invalidFile->reason == verifyFileMissing)
1362 errMissing++;
1363 else if (invalidFile->reason == verifyChecksumMismatch)
1364 errChecksum++;
1365 else if (invalidFile->reason == verifySizeInvalid)
1366 errSize++;
1367 else
1368 errOther++;
1369 }
1370
1371 strCatFmt(
1372 result, "\n missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
1373 errSize, errOther);
1374 }
1375 }
1376 }
1377
1378 FUNCTION_TEST_RETURN(result);
1379 }
1380
1381 /***********************************************************************************************************************************
1382 Process the verify command
1383 ***********************************************************************************************************************************/
1384 static String *
verifyProcess(unsigned int * errorTotal)1385 verifyProcess(unsigned int *errorTotal)
1386 {
1387 FUNCTION_LOG_BEGIN(logLevelDebug);
1388 FUNCTION_TEST_PARAM_P(UINT, errorTotal); // Pointer to overall job error total
1389 FUNCTION_LOG_END();
1390
1391 String *result = NULL;
1392
1393 MEM_CONTEXT_TEMP_BEGIN()
1394 {
1395 String *resultStr = strNew();
1396
1397 // Get the repo storage in case it is remote and encryption settings need to be pulled down
1398 const Storage *storage = storageRepo();
1399
1400 // Get a usable backup info file
1401 InfoBackup *backupInfo = verifyBackupInfoFile();
1402
1403 // If a usable backup.info file is not found, then report an error in the log
1404 if (backupInfo == NULL)
1405 {
1406 LOG_ERROR(errorTypeCode(&FormatError), "No usable backup.info file");
1407 (*errorTotal)++;
1408 }
1409
1410 // Get a usable archive info file
1411 InfoArchive *archiveInfo = verifyArchiveInfoFile();
1412
1413 // If a usable archive.info file is not found, then report an error in the log
1414 if (archiveInfo == NULL)
1415 {
1416 LOG_ERROR(errorTypeCode(&FormatError), "No usable archive.info file");
1417 (*errorTotal)++;
1418 }
1419
1420 // If both a usable archive info and backup info file were found, then proceed with verification
1421 if (archiveInfo != NULL && backupInfo != NULL)
1422 {
1423 TRY_BEGIN()
1424 {
1425 // Verify that the archive.info and backup.info current database info and history lists are the same
1426 verifyPgHistory(infoArchivePg(archiveInfo), infoBackupPg(backupInfo));
1427 }
1428 CATCH_ANY()
1429 {
1430 LOG_ERROR(errorTypeCode(&FormatError), errorMessage());
1431 (*errorTotal)++;
1432 }
1433 TRY_END();
1434 }
1435
1436 // If valid info files, then begin process of checking backups and archives in the repo
1437 if ((*errorTotal) == 0)
1438 {
1439 // Initialize the job data
1440 VerifyJobData jobData =
1441 {
1442 .memContext = memContextCurrent(),
1443 .walPathList = NULL,
1444 .walFileList = strLstNew(),
1445 .pgHistory = infoArchivePg(archiveInfo),
1446 .manifestCipherPass = infoPgCipherPass(infoBackupPg(backupInfo)),
1447 .walCipherPass = infoPgCipherPass(infoArchivePg(archiveInfo)),
1448 .archiveIdResultList = lstNewP(sizeof(VerifyArchiveResult), .comparator = archiveIdComparator),
1449 .backupResultList = lstNewP(sizeof(VerifyBackupResult), .comparator = lstComparatorStr),
1450 };
1451
1452 // Get a list of backups in the repo sorted ascending
1453 jobData.backupList = strLstSort(
1454 storageListP(
1455 storage, STORAGE_REPO_BACKUP_STR,
1456 .expression = backupRegExpP(.full = true, .differential = true, .incremental = true)),
1457 sortOrderAsc);
1458
1459 // Get a list of archive Ids in the repo (e.g. 9.4-1, 10-2, etc) sorted ascending by the db-id (number after the dash)
1460 jobData.archiveIdList = strLstSort(
1461 strLstComparatorSet(
1462 storageListP(storage, STORAGE_REPO_ARCHIVE_STR, .expression = STRDEF(REGEX_ARCHIVE_DIR_DB_VERSION)),
1463 archiveIdComparator),
1464 sortOrderAsc);
1465
1466 // Only begin processing if there are some archives or backups in the repo
1467 if (!strLstEmpty(jobData.archiveIdList) || !strLstEmpty(jobData.backupList))
1468 {
1469 // Warn if there are no archives or there are no backups in the repo so that the callback need not try to
1470 // distinguish between having processed all of the list or if the list was missing in the first place
1471 if (strLstEmpty(jobData.archiveIdList) || strLstEmpty(jobData.backupList))
1472 LOG_WARN_FMT("no %s exist in the repo", strLstEmpty(jobData.archiveIdList) ? "archives" : "backups");
1473
1474 // If there are no archives to process, then set the processing flag to skip to processing the backups
1475 if (strLstEmpty(jobData.archiveIdList))
1476 jobData.backupProcessing = true;
1477
1478 // Set current backup if there is one and verify the archive history on disk is in the database history
1479 jobData.currentBackup = verifySetBackupCheckArchive(
1480 jobData.backupList, backupInfo, jobData.archiveIdList, jobData.pgHistory, &jobData.jobErrorTotal);
1481
1482 // Create the parallel executor
1483 ProtocolParallel *parallelExec = protocolParallelNew(
1484 cfgOptionUInt64(cfgOptProtocolTimeout) / 2, verifyJobCallback, &jobData);
1485
1486 for (unsigned int processIdx = 1; processIdx <= cfgOptionUInt(cfgOptProcessMax); processIdx++)
1487 protocolParallelClientAdd(parallelExec, protocolLocalGet(protocolStorageTypeRepo, 0, processIdx));
1488
1489 // Process jobs
1490 MEM_CONTEXT_TEMP_RESET_BEGIN()
1491 {
1492 do
1493 {
1494 unsigned int completed = protocolParallelProcess(parallelExec);
1495
1496 // Process completed jobs
1497 for (unsigned int jobIdx = 0; jobIdx < completed; jobIdx++)
1498 {
1499 // Get the job and job key
1500 ProtocolParallelJob *job = protocolParallelResult(parallelExec);
1501 unsigned int processId = protocolParallelJobProcessId(job);
1502 StringList *filePathLst = strLstNewSplit(varStr(protocolParallelJobKey(job)), FSLASH_STR);
1503
1504 // Remove the result and file type identifier and recreate the path file name
1505 const String *resultId = strLstGet(filePathLst, 0);
1506 strLstRemoveIdx(filePathLst, 0);
1507 const String *fileType = strLstGet(filePathLst, 0);
1508 strLstRemoveIdx(filePathLst, 0);
1509 String *filePathName = strLstJoin(filePathLst, "/");
1510
1511 // Initialize the result sets
1512 VerifyArchiveResult *archiveIdResult = NULL;
1513 VerifyBackupResult *backupResult = NULL;
1514
1515 // Get archiveId result data
1516 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1517 {
1518 // Find the archiveId in the list - assert if not found since this should never happen
1519 unsigned int index = lstFindIdx(jobData.archiveIdResultList, &resultId);
1520 ASSERT(index != LIST_NOT_FOUND);
1521
1522 archiveIdResult = lstGet(jobData.archiveIdResultList, index);
1523 }
1524 // Else get the backup result data
1525 else
1526 {
1527 unsigned int index = lstFindIdx(jobData.backupResultList, &resultId);
1528 ASSERT(index != LIST_NOT_FOUND);
1529
1530 backupResult = lstGet(jobData.backupResultList, index);
1531 }
1532
1533 // The job was successful
1534 if (protocolParallelJobErrorCode(job) == 0)
1535 {
1536 const VerifyResult verifyResult = (VerifyResult)pckReadU32P(protocolParallelJobResult(job));
1537
1538 // Update the result set for the type of file being processed
1539 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1540 {
1541 if (verifyResult == verifyOk)
1542 archiveIdResult->totalValidWal++;
1543 else
1544 {
1545 jobData.jobErrorTotal += verifyLogInvalidResult(
1546 fileType, verifyResult, processId, filePathName);
1547
1548 // Add invalid file to the WAL range
1549 verifyAddInvalidWalFile(
1550 archiveIdResult->walRangeList, verifyResult, filePathName,
1551 strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0,
1552 WAL_SEGMENT_NAME_SIZE));
1553 }
1554 }
1555 else
1556 {
1557 if (verifyResult == verifyOk)
1558 backupResult->totalFileValid++;
1559 else
1560 {
1561 jobData.jobErrorTotal += verifyLogInvalidResult(
1562 fileType, verifyResult, processId, filePathName);
1563 backupResult->status = backupInvalid;
1564 verifyInvalidFileAdd(backupResult->invalidFileList, verifyResult, filePathName);
1565 }
1566 }
1567 }
1568 // Else the job errored
1569 else
1570 {
1571 // Log a protocol error and increment the jobErrorTotal
1572 LOG_ERROR_PID_FMT(
1573 processId, errorTypeCode(&ProtocolError),
1574 "%s %s: [%d] %s", strZ(verifyErrorMsg(verifyOtherError)), strZ(filePathName),
1575 protocolParallelJobErrorCode(job), strZ(protocolParallelJobErrorMessage(job)));
1576
1577 jobData.jobErrorTotal++;
1578
1579 // Add invalid file with "OtherError" reason to invalid file list
1580 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1581 {
1582 // Add invalid file to the WAL range
1583 verifyAddInvalidWalFile(
1584 archiveIdResult->walRangeList, verifyOtherError, filePathName,
1585 strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE));
1586 }
1587 else
1588 {
1589 backupResult->status = backupInvalid;
1590 verifyInvalidFileAdd(backupResult->invalidFileList, verifyOtherError, filePathName);
1591 }
1592 }
1593
1594 // Set backup verification complete for a backup if all files have run through verification
1595 if (strEq(fileType, STORAGE_REPO_BACKUP_STR) &&
1596 backupResult->totalFileVerify == backupResult->totalFileManifest)
1597 {
1598 backupResult->fileVerifyComplete = true;
1599 }
1600
1601 // Free the job
1602 protocolParallelJobFree(job);
1603 }
1604
1605 // Reset the memory context occasionally so we don't use too much memory or slow down processing
1606 MEM_CONTEXT_TEMP_RESET(1000);
1607 }
1608 while (!protocolParallelDone(parallelExec));
1609 }
1610 MEM_CONTEXT_TEMP_END();
1611
1612 // ??? Need to do the final reconciliation - checking backup required WAL against, valid WAL
1613
1614 // Report results
1615 resultStr = verifyRender(jobData.archiveIdResultList, jobData.backupResultList);
1616 }
1617 else
1618 LOG_WARN("no archives or backups exist in the repo");
1619
1620 (*errorTotal) += jobData.jobErrorTotal;
1621 }
1622
1623 MEM_CONTEXT_PRIOR_BEGIN()
1624 {
1625 result = strDup(resultStr);
1626 }
1627 MEM_CONTEXT_PRIOR_END();
1628 }
1629 MEM_CONTEXT_TEMP_END();
1630
1631 FUNCTION_LOG_RETURN(STRING, result);
1632 }
1633
1634 /**********************************************************************************************************************************/
1635 void
cmdVerify(void)1636 cmdVerify(void)
1637 {
1638 FUNCTION_LOG_VOID(logLevelDebug);
1639
1640 MEM_CONTEXT_TEMP_BEGIN()
1641 {
1642 unsigned int errorTotal = 0;
1643 String *result = verifyProcess(&errorTotal);
1644
1645 // Output results if any
1646 if (strSize(result) > 0)
1647 LOG_INFO_FMT("%s", strZ(result));
1648
1649 // Throw an error if any encountered
1650 if (errorTotal > 0)
1651 THROW_FMT(RuntimeError, "%u fatal errors encountered, see log for details", errorTotal);
1652 }
1653 MEM_CONTEXT_TEMP_END();
1654
1655 FUNCTION_LOG_RETURN_VOID();
1656 }
1657