1 /***********************************************************************************************************************************
2 Verify Command
3 
4 Verify the contents of the repository.
5 ***********************************************************************************************************************************/
6 #include "build.auto.h"
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 
12 #include "command/archive/common.h"
13 #include "command/check/common.h"
14 #include "command/verify/file.h"
15 #include "command/verify/protocol.h"
16 #include "common/compress/helper.h"
17 #include "common/crypto/cipherBlock.h"
18 #include "common/debug.h"
19 #include "common/io/fdWrite.h"
20 #include "common/io/io.h"
21 #include "common/log.h"
22 #include "config/config.h"
23 #include "info/infoArchive.h"
24 #include "info/infoBackup.h"
25 #include "info/manifest.h"
26 #include "postgres/interface.h"
27 #include "postgres/version.h"
28 #include "protocol/helper.h"
29 #include "protocol/parallel.h"
30 #include "storage/helper.h"
31 
32 /***********************************************************************************************************************************
33 Data Types and Structures
34 ***********************************************************************************************************************************/
35 #define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_TYPE                                                                                    \
36     VerifyArchiveResult
37 #define FUNCTION_LOG_VERIFY_ARCHIVE_RESULT_FORMAT(value, buffer, bufferSize)                                                       \
38     objToLog(&value, "VerifyArchiveResult", buffer, bufferSize)
39 
40 #define FUNCTION_LOG_VERIFY_BACKUP_RESULT_TYPE                                                                                     \
41     VerifyBackupResult
42 #define FUNCTION_LOG_VERIFY_BACKUP_RESULT_FORMAT(value, buffer, bufferSize)                                                        \
43     objToLog(&value, "VerifyBackupResult", buffer, bufferSize)
44 
45 // Structure for verifying repository info files
46 typedef struct VerifyInfoFile
47 {
48     InfoBackup *backup;                                             // Backup.info file contents
49     InfoArchive *archive;                                           // Archive.info file contents
50     Manifest *manifest;                                             // Manifest file contents
51     const String *checksum;                                         // File checksum
52     int errorCode;                                                  // Error code else 0 for no error
53 } VerifyInfoFile;
54 
55 // Job data results structures for archive and backup
56 typedef struct VerifyArchiveResult
57 {
58     String *archiveId;                                              // Archive Id (e.g. 9.6-1, 10-2)
59     unsigned int totalWalFile;                                      // Total number of WAL files listed in directory on first read
60     unsigned int totalValidWal;                                     // Total number of WAL that were verified and valid
61     PgWal pgWalInfo;                                                // PG version, WAL size, system id
62     List *walRangeList;                                             // List of WAL file ranges - new item is when WAL is missing
63 } VerifyArchiveResult;
64 
65 // WAL range includes the start/stop of sequential WAL and start/stop includes the timeline (e.g. 000000020000000100000005)
66 typedef struct VerifyWalRange
67 {
68     String *stop;                                                   // Last WAL segment in this sequential range
69     String *start;                                                  // First WAL segment in this sequential range
70     List *invalidFileList;                                          // After all jobs complete, list of VerifyInvalidFile
71 } VerifyWalRange;
72 
73 // Invalid file information (not missing but files failing verification) - for archive and backup
74 typedef struct VerifyInvalidFile
75 {
76     String *fileName;                                               // Name of the file (includes path within the stanza)
77     VerifyResult reason;                                            // Reason file is invalid (e.g. incorrect checksum)
78 } VerifyInvalidFile;
79 
80 // Status result of a backup
81 typedef enum
82 {
83     backupValid,                                                    // Default: All files in backup label repo passed verification
84     backupInvalid,                                                  // One of more files in backup label repo failed verification
85     backupMissingManifest,                                          // Backup manifest missing (backup may have expired)
86     backupInProgress,                                               // Backup appeared to be in progress (so was skipped)
87 } VerifyBackupResultStatus;
88 
89 typedef struct VerifyBackupResult
90 {
91     String *backupLabel;                                            // Label assigned to the backup
92     VerifyBackupResultStatus status;                                // Final status of the backup
93     bool fileVerifyComplete;                                        // Have all the files of the backup completed verification?
94     unsigned int totalFileManifest;                                 // Total number of backup files in the manifest
95     unsigned int totalFileVerify;                                   // Total number of backup files being verified
96     unsigned int totalFileValid;                                    // Total number of backup files that were verified and valid
97     String *backupPrior;                                            // Prior backup that this backup depends on, if any
98     unsigned int pgId;                                              // PG id will be used to find WAL for the backup in the repo
99     unsigned int pgVersion;                                         // PG version will be used with PG id to find WAL in the repo
100     String *archiveStart;                                           // First WAL segment in the backup
101     String *archiveStop;                                            // Last WAL segment in the backup
102     List *invalidFileList;                                          // List of invalid files found in the backup
103 } VerifyBackupResult;
104 
105 // Job data stucture for processing and results collection
106 typedef struct VerifyJobData
107 {
108     MemContext *memContext;                                         // Context for memory allocations in this struct
109     StringList *archiveIdList;                                      // List of archive ids to verify
110     StringList *walPathList;                                        // WAL path list for a single archive id
111     StringList *walFileList;                                        // WAL file list for a single WAL path
112     StringList *backupList;                                         // List of backups to verify
113     Manifest *manifest;                                             // Manifest contents with list of files to verify
114     unsigned int manifestFileIdx;                                   // Index of the file within the manifest file list to process
115     String *currentBackup;                                          // In progress backup, if any
116     const InfoPg *pgHistory;                                        // Database history list
117     bool backupProcessing;                                          // Are we processing WAL or are we processing backups
118     const String *manifestCipherPass;                               // Cipher pass for reading backup manifests
119     const String *walCipherPass;                                    // Cipher pass for reading WAL files
120     const String *backupCipherPass;                                 // Cipher pass for reading backup files referenced in a manifest
121     unsigned int jobErrorTotal;                                     // Total errors that occurred during the job execution
122     List *archiveIdResultList;                                      // Archive results
123     List *backupResultList;                                         // Backup results
124 } VerifyJobData;
125 
126 /***********************************************************************************************************************************
127 Helper function to add a file to an invalid file list
128 ***********************************************************************************************************************************/
129 static void
verifyInvalidFileAdd(List * invalidFileList,VerifyResult reason,const String * fileName)130 verifyInvalidFileAdd(List *invalidFileList, VerifyResult reason, const String *fileName)
131 {
132     FUNCTION_TEST_BEGIN();
133         FUNCTION_TEST_PARAM(LIST, invalidFileList);                 // Invalid file list to add the filename to
134         FUNCTION_TEST_PARAM(ENUM, reason);                          // Reason for invalid file
135         FUNCTION_TEST_PARAM(STRING, fileName);                      // Name of invalid file
136     FUNCTION_TEST_END();
137 
138     ASSERT(invalidFileList != NULL);
139     ASSERT(fileName != NULL);
140 
141     MEM_CONTEXT_BEGIN(lstMemContext(invalidFileList))
142     {
143         VerifyInvalidFile invalidFile =
144         {
145             .fileName = strDup(fileName),
146             .reason = reason,
147         };
148 
149         lstAdd(invalidFileList, &invalidFile);
150     }
151     MEM_CONTEXT_END();
152 
153     FUNCTION_TEST_RETURN_VOID();
154 }
155 
156 /***********************************************************************************************************************************
157 Load a file into memory
158 ***********************************************************************************************************************************/
159 static StorageRead *
verifyFileLoad(const String * pathFileName,const String * cipherPass)160 verifyFileLoad(const String *pathFileName, const String *cipherPass)
161 {
162     FUNCTION_TEST_BEGIN();
163         FUNCTION_TEST_PARAM(STRING, pathFileName);                  // Fully qualified path/file name
164         FUNCTION_TEST_PARAM(STRING, cipherPass);                    // Password to open file if encrypted
165     FUNCTION_TEST_END();
166 
167     ASSERT(pathFileName != NULL);
168 
169     // Read the file and error if missing
170     StorageRead *result = storageNewReadP(storageRepo(), pathFileName);
171 
172     // *read points to a location within result so update result with contents based on necessary filters
173     IoRead *read = storageReadIo(result);
174     cipherBlockFilterGroupAdd(
175         ioReadFilterGroup(read), cfgOptionStrId(cfgOptRepoCipherType), cipherModeDecrypt, cipherPass);
176     ioFilterGroupAdd(ioReadFilterGroup(read), cryptoHashNew(HASH_TYPE_SHA1_STR));
177 
178     // If the file is compressed, add a decompression filter
179     if (compressTypeFromName(pathFileName) != compressTypeNone)
180         ioFilterGroupAdd(ioReadFilterGroup(read), decompressFilter(compressTypeFromName(pathFileName)));
181 
182     FUNCTION_TEST_RETURN(result);
183 }
184 
185 /***********************************************************************************************************************************
186 Get status of info files in the repository
187 ***********************************************************************************************************************************/
188 static VerifyInfoFile
verifyInfoFile(const String * pathFileName,bool keepFile,const String * cipherPass)189 verifyInfoFile(const String *pathFileName, bool keepFile, const String *cipherPass)
190 {
191     FUNCTION_LOG_BEGIN(logLevelDebug);
192         FUNCTION_LOG_PARAM(STRING, pathFileName);                   // Fully qualified path/file name
193         FUNCTION_LOG_PARAM(BOOL, keepFile);                         // Should the file be kept in memory?
194         FUNCTION_TEST_PARAM(STRING, cipherPass);                    // Password to open file if encrypted
195     FUNCTION_LOG_END();
196 
197     ASSERT(pathFileName != NULL);
198 
199     VerifyInfoFile result = {.errorCode = 0};
200 
201     MEM_CONTEXT_TEMP_BEGIN()
202     {
203         TRY_BEGIN()
204         {
205             IoRead *infoRead = storageReadIo(verifyFileLoad(pathFileName, cipherPass));
206 
207             // If directed to keep the loaded file in memory, then move the file into the result, else drain the io and close it
208             if (keepFile)
209             {
210                 if (strBeginsWith(pathFileName, INFO_BACKUP_PATH_FILE_STR))
211                     result.backup = infoBackupMove(infoBackupNewLoad(infoRead), memContextPrior());
212                 else if (strBeginsWith(pathFileName, INFO_ARCHIVE_PATH_FILE_STR))
213                     result.archive = infoArchiveMove(infoArchiveNewLoad(infoRead), memContextPrior());
214                 else
215                     result.manifest = manifestMove(manifestNewLoad(infoRead), memContextPrior());
216             }
217             else
218                 ioReadDrain(infoRead);
219 
220             MEM_CONTEXT_PRIOR_BEGIN()
221             {
222                 result.checksum = strDup(varStr(ioFilterGroupResult(ioReadFilterGroup(infoRead), CRYPTO_HASH_FILTER_TYPE_STR)));
223             }
224             MEM_CONTEXT_PRIOR_END();
225         }
226         CATCH_ANY()
227         {
228             result.errorCode = errorCode();
229             String *errorMsg = strNewZ(errorMessage());
230 
231             if (result.errorCode == errorTypeCode(&ChecksumError))
232                 strCat(errorMsg, strNewFmt(" %s", strZ(pathFileName)));
233 
234             LOG_WARN(strZ(errorMsg));
235         }
236         TRY_END();
237     }
238     MEM_CONTEXT_TEMP_END();
239 
240     FUNCTION_LOG_RETURN_STRUCT(result);
241 }
242 
243 /***********************************************************************************************************************************
244 Get the archive.info file
245 ***********************************************************************************************************************************/
246 static InfoArchive *
verifyArchiveInfoFile(void)247 verifyArchiveInfoFile(void)
248 {
249     FUNCTION_LOG_VOID(logLevelDebug);
250 
251     InfoArchive *result = NULL;
252 
253     MEM_CONTEXT_TEMP_BEGIN()
254     {
255         // Get the main info file
256         VerifyInfoFile verifyArchiveInfo = verifyInfoFile(INFO_ARCHIVE_PATH_FILE_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
257 
258         // If the main file did not error, then report on the copy's status and check checksums
259         if (verifyArchiveInfo.errorCode == 0)
260         {
261             result = verifyArchiveInfo.archive;
262             infoArchiveMove(result, memContextPrior());
263 
264             // Attempt to load the copy and report on it's status but don't keep it in memory
265             VerifyInfoFile verifyArchiveInfoCopy = verifyInfoFile(
266                 INFO_ARCHIVE_PATH_FILE_COPY_STR, false, cfgOptionStrNull(cfgOptRepoCipherPass));
267 
268             // If the copy loaded successfully, then check the checksums
269             if (verifyArchiveInfoCopy.errorCode == 0)
270             {
271                 // If the info and info.copy checksums don't match each other than one (or both) of the files could be corrupt so
272                 // log a warning but must trust main
273                 if (!strEq(verifyArchiveInfo.checksum, verifyArchiveInfoCopy.checksum))
274                     LOG_WARN("archive.info.copy does not match archive.info");
275             }
276         }
277         else
278         {
279             // Attempt to load the copy
280             VerifyInfoFile verifyArchiveInfoCopy = verifyInfoFile(
281                 INFO_ARCHIVE_PATH_FILE_COPY_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
282 
283             // If loaded successfully, then return the copy as usable
284             if (verifyArchiveInfoCopy.errorCode == 0)
285             {
286                 result = verifyArchiveInfoCopy.archive;
287                 infoArchiveMove(result, memContextPrior());
288             }
289         }
290     }
291     MEM_CONTEXT_TEMP_END();
292 
293     FUNCTION_LOG_RETURN(INFO_ARCHIVE, result);
294 }
295 
296 /***********************************************************************************************************************************
297 Get the backup.info file
298 ***********************************************************************************************************************************/
299 static InfoBackup *
verifyBackupInfoFile(void)300 verifyBackupInfoFile(void)
301 {
302     FUNCTION_LOG_VOID(logLevelDebug);
303 
304     InfoBackup *result = NULL;
305 
306     MEM_CONTEXT_TEMP_BEGIN()
307     {
308         // Get the main info file
309         VerifyInfoFile verifyBackupInfo = verifyInfoFile(INFO_BACKUP_PATH_FILE_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
310 
311         // If the main file did not error, then report on the copy's status and check checksums
312         if (verifyBackupInfo.errorCode == 0)
313         {
314             result = verifyBackupInfo.backup;
315             infoBackupMove(result, memContextPrior());
316 
317             // Attempt to load the copy and report on it's status but don't keep it in memory
318             VerifyInfoFile verifyBackupInfoCopy = verifyInfoFile(
319                 INFO_BACKUP_PATH_FILE_COPY_STR, false, cfgOptionStrNull(cfgOptRepoCipherPass));
320 
321             // If the copy loaded successfully, then check the checksums
322             if (verifyBackupInfoCopy.errorCode == 0)
323             {
324                 // If the info and info.copy checksums don't match each other than one (or both) of the files could be corrupt so
325                 // log a warning but must trust main
326                 if (!strEq(verifyBackupInfo.checksum, verifyBackupInfoCopy.checksum))
327                     LOG_WARN("backup.info.copy does not match backup.info");
328             }
329         }
330         else
331         {
332             // Attempt to load the copy
333             VerifyInfoFile verifyBackupInfoCopy = verifyInfoFile(
334                 INFO_BACKUP_PATH_FILE_COPY_STR, true, cfgOptionStrNull(cfgOptRepoCipherPass));
335 
336             // If loaded successfully, then return the copy as usable
337             if (verifyBackupInfoCopy.errorCode == 0)
338             {
339                 result = verifyBackupInfoCopy.backup;
340                 infoBackupMove(result, memContextPrior());
341             }
342         }
343     }
344     MEM_CONTEXT_TEMP_END();
345 
346     FUNCTION_LOG_RETURN(INFO_BACKUP, result);
347 }
348 
349 /***********************************************************************************************************************************
350 Get the manifest file
351 ***********************************************************************************************************************************/
352 static Manifest *
verifyManifestFile(VerifyBackupResult * backupResult,const String * cipherPass,bool currentBackup,const InfoPg * pgHistory,unsigned int * jobErrorTotal)353 verifyManifestFile(
354     VerifyBackupResult *backupResult, const String *cipherPass, bool currentBackup, const InfoPg *pgHistory,
355     unsigned int *jobErrorTotal)
356 {
357     FUNCTION_LOG_BEGIN(logLevelDebug);
358         FUNCTION_TEST_PARAM_P(VERIFY_BACKUP_RESULT, backupResult);  // The result set for the backup being processed
359         FUNCTION_TEST_PARAM(STRING, cipherPass);                    // Passphrase to access the manifest file
360         FUNCTION_LOG_PARAM(BOOL, currentBackup);                    // Is this possibly a backup currently in progress?
361         FUNCTION_TEST_PARAM(INFO_PG, pgHistory);                    // Database history
362         FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal);                 // Pointer to the overall job error total
363     FUNCTION_LOG_END();
364 
365     Manifest *result = NULL;
366 
367     MEM_CONTEXT_TEMP_BEGIN()
368     {
369         String *fileName = strNewFmt(STORAGE_REPO_BACKUP "/%s/" BACKUP_MANIFEST_FILE, strZ(backupResult->backupLabel));
370 
371         // Get the main manifest file
372         VerifyInfoFile verifyManifestInfo = verifyInfoFile(fileName, true, cipherPass);
373 
374         // If the main file did not error, then report on the copy's status and check checksums
375         if (verifyManifestInfo.errorCode == 0)
376         {
377             result = verifyManifestInfo.manifest;
378 
379             // The current in-progress backup is only notional until the main file is checked because the backup may have
380             // completed by the time the main manifest is checked here. So having a main manifest file means this backup is not
381             // (or is no longer) the currentBackup.
382             currentBackup = false;
383 
384             // Attempt to load the copy and report on it's status but don't keep it in memory
385             VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
386                 strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), false, cipherPass);
387 
388             // If the copy loaded successfully, then check the checksums
389             if (verifyManifestInfoCopy.errorCode == 0)
390             {
391                 // If the manifest and manifest.copy checksums don't match each other than one (or both) of the files could be
392                 // corrupt so log a warning but trust main
393                 if (!strEq(verifyManifestInfo.checksum, verifyManifestInfoCopy.checksum))
394                     LOG_WARN_FMT("backup '%s' manifest.copy does not match manifest", strZ(backupResult->backupLabel));
395             }
396         }
397         else
398         {
399             // If this might be an in-progress backup and the main manifest is simply missing, it is assumed the backup is an
400             // actual in-progress backup and verification is skipped, otherwise, if the main is not simply missing, or this is not
401             // an in-progress backup then attempt to load the copy.
402             if (!(currentBackup && verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError)))
403             {
404                 currentBackup = false;
405 
406                 VerifyInfoFile verifyManifestInfoCopy = verifyInfoFile(
407                     strNewFmt("%s%s", strZ(fileName), INFO_COPY_EXT), true, cipherPass);
408 
409                 // If loaded successfully, then return the copy as usable
410                 if (verifyManifestInfoCopy.errorCode == 0)
411                 {
412                     LOG_WARN_FMT("%s/backup.manifest is missing or unusable, using copy", strZ(backupResult->backupLabel));
413 
414                     result = verifyManifestInfoCopy.manifest;
415                 }
416                 else if (verifyManifestInfo.errorCode == errorTypeCode(&FileMissingError) &&
417                     verifyManifestInfoCopy.errorCode == errorTypeCode(&FileMissingError))
418                 {
419                     backupResult->status = backupMissingManifest;
420 
421                     LOG_WARN_FMT("manifest missing for '%s' - backup may have expired", strZ(backupResult->backupLabel));
422                 }
423             }
424             else
425             {
426                 backupResult->status = backupInProgress;
427 
428                 LOG_INFO_FMT("backup '%s' appears to be in progress, skipping", strZ(backupResult->backupLabel));
429             }
430         }
431 
432         // If found a usable manifest then check that the database it was based on is in the history
433         if (result != NULL)
434         {
435             bool found = false;
436             const ManifestData *manData = manifestData(result);
437 
438             // Confirm the PG database information from the manifest is in the history list
439             for (unsigned int infoPgIdx = 0; infoPgIdx < infoPgDataTotal(pgHistory); infoPgIdx++)
440             {
441                 InfoPgData pgHistoryData = infoPgData(pgHistory, infoPgIdx);
442 
443                 if (pgHistoryData.id == manData->pgId && pgHistoryData.systemId == manData->pgSystemId &&
444                     pgHistoryData.version == manData->pgVersion)
445                 {
446                     found = true;
447                     break;
448                 }
449             }
450 
451             // If the PG data is not found in the backup.info history, then error and reset the result
452             if (!found)
453             {
454                 LOG_ERROR_FMT(
455                     errorTypeCode(&FileInvalidError),
456                     "'%s' may not be recoverable - PG data (id %u, version %s, system-id %" PRIu64 ") is not in the backup.info"
457                         " history, skipping",
458                     strZ(backupResult->backupLabel), manData->pgId, strZ(pgVersionToStr(manData->pgVersion)), manData->pgSystemId);
459 
460                 manifestFree(result);
461                 result = NULL;
462             }
463             else
464                 manifestMove(result, memContextPrior());
465         }
466 
467         // If the result is NULL and the backup status has not yet been set, then the backup is unusable (invalid)
468         if (result == NULL && backupResult->status == backupValid)
469         {
470             backupResult->status = backupInvalid;
471             (*jobErrorTotal)++;
472         }
473     }
474     MEM_CONTEXT_TEMP_END();
475 
476     FUNCTION_LOG_RETURN(MANIFEST, result);
477 }
478 
479 /***********************************************************************************************************************************
480 Check the history in the info files
481 ***********************************************************************************************************************************/
482 void
verifyPgHistory(const InfoPg * archiveInfoPg,const InfoPg * backupInfoPg)483 verifyPgHistory(const InfoPg *archiveInfoPg, const InfoPg *backupInfoPg)
484 {
485     FUNCTION_TEST_BEGIN();
486         FUNCTION_TEST_PARAM(INFO_PG, archiveInfoPg);                // Postgres information from the archive.info file
487         FUNCTION_TEST_PARAM(INFO_PG, backupInfoPg);                 // Postgres information from the backup.info file
488     FUNCTION_TEST_END();
489 
490     MEM_CONTEXT_TEMP_BEGIN()
491     {
492         // Check archive.info and backup.info current PG data matches. If there is a mismatch, verify cannot continue since
493         // the database is not considered accessible during the verify command so no way to tell which would be valid.
494         InfoPgData archiveInfoPgData = infoPgData(archiveInfoPg, infoPgDataCurrentId(archiveInfoPg));
495         InfoPgData backupInfoPgData = infoPgData(backupInfoPg, infoPgDataCurrentId(backupInfoPg));
496         checkStanzaInfo(&archiveInfoPgData, &backupInfoPgData);
497 
498         unsigned int archiveInfoHistoryTotal = infoPgDataTotal(archiveInfoPg);
499         unsigned int backupInfoHistoryTotal = infoPgDataTotal(backupInfoPg);
500 
501         String *errMsg = strNewZ("archive and backup history lists do not match");
502 
503         if (archiveInfoHistoryTotal != backupInfoHistoryTotal)
504             THROW(FormatError, strZ(errMsg));
505 
506         // Confirm the lists are the same
507         for (unsigned int infoPgIdx = 0; infoPgIdx < archiveInfoHistoryTotal; infoPgIdx++)
508         {
509             InfoPgData archiveInfoPgHistory = infoPgData(archiveInfoPg, infoPgIdx);
510             InfoPgData backupInfoPgHistory = infoPgData(backupInfoPg, infoPgIdx);
511 
512             if (archiveInfoPgHistory.id != backupInfoPgHistory.id ||
513                 archiveInfoPgHistory.systemId != backupInfoPgHistory.systemId ||
514                 archiveInfoPgHistory.version != backupInfoPgHistory.version)
515             {
516                 THROW(FormatError, strZ(errMsg));
517             }
518         }
519     }
520     MEM_CONTEXT_TEMP_END();
521 
522     FUNCTION_TEST_RETURN_VOID();
523 }
524 
525 /***********************************************************************************************************************************
526 Populate the WAL ranges from the provided, sorted, WAL files list for a given archiveId
527 ***********************************************************************************************************************************/
528 static void
verifyCreateArchiveIdRange(VerifyArchiveResult * archiveIdResult,StringList * walFileList,unsigned int * jobErrorTotal)529 verifyCreateArchiveIdRange(VerifyArchiveResult *archiveIdResult, StringList *walFileList, unsigned int *jobErrorTotal)
530 {
531     FUNCTION_TEST_BEGIN();
532         FUNCTION_TEST_PARAM_P(VERIFY_ARCHIVE_RESULT, archiveIdResult);  // The result set for the archive Id being processed
533         FUNCTION_TEST_PARAM(STRING_LIST, walFileList);                  // Sorted (ascending) list of WAL files in a timeline
534         FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal);                     // Pointer to the overall job error total
535     FUNCTION_TEST_END();
536 
537     ASSERT(archiveIdResult != NULL);
538     ASSERT(walFileList != NULL);
539 
540     unsigned int walFileIdx = 0;
541 
542     // Initialize the WAL range
543     VerifyWalRange *walRange = NULL;
544 
545     // If there is a WAL range for this archiveID, get the last one. If there is no timeline change then continue updating the last
546     // WAL range.
547     if (!lstEmpty(archiveIdResult->walRangeList) &&
548         strEq(
549             strSubN(((VerifyWalRange *)lstGetLast(archiveIdResult->walRangeList))->stop, 0, 8),
550             strSubN(strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE), 0, 8)))
551     {
552         walRange = lstGetLast(archiveIdResult->walRangeList);
553     }
554 
555     do
556     {
557         String *walSegment = strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE);
558 
559         // If walSegment found ends in FF for PG versions 9.2 or less then skip it but log error because it should not exist and
560         // PostgreSQL will ignore it
561         if (archiveIdResult->pgWalInfo.version <= PG_VERSION_92 && strEndsWithZ(walSegment, "FF"))
562         {
563             LOG_ERROR_FMT(
564                 errorTypeCode(&FileInvalidError), "invalid WAL '%s' for '%s' exists, skipping", strZ(walSegment),
565                 strZ(archiveIdResult->archiveId));
566 
567             (*jobErrorTotal)++;
568 
569             // Remove the file from the original list so no attempt is made to verify it
570             strLstRemoveIdx(walFileList, walFileIdx);
571             continue;
572         }
573 
574         // The lists are sorted so look ahead to see if this is a duplicate of the next one in the list
575         if (walFileIdx + 1 < strLstSize(walFileList))
576         {
577             if (strEq(walSegment, strSubN(strLstGet(walFileList, walFileIdx + 1), 0, WAL_SEGMENT_NAME_SIZE)))
578             {
579                 LOG_ERROR_FMT(
580                     errorTypeCode(&FileInvalidError), "duplicate WAL '%s' for '%s' exists, skipping", strZ(walSegment),
581                     strZ(archiveIdResult->archiveId));
582 
583                 (*jobErrorTotal)++;
584 
585                 bool foundDup = true;
586 
587                 // Remove all duplicates of this WAL, including this WAL, from the list
588                 while (walFileIdx < strLstSize(walFileList) && foundDup)
589                 {
590                     if (strEq(walSegment, strSubN(strLstGet(walFileList, walFileIdx), 0, WAL_SEGMENT_NAME_SIZE)))
591                         strLstRemoveIdx(walFileList, walFileIdx);
592                     else
593                         foundDup = false;
594                 }
595 
596                 continue;
597             }
598         }
599 
600         // Initialize the range if it has not yet been initialized and continue to next
601         if (walRange == NULL ||
602             !strEq(
603                 walSegmentNext(walRange->stop, (size_t)archiveIdResult->pgWalInfo.size, archiveIdResult->pgWalInfo.version),
604                 walSegment))
605         {
606             // Add the initialized wal range to the range list
607             MEM_CONTEXT_BEGIN(lstMemContext(archiveIdResult->walRangeList))
608             {
609                 VerifyWalRange walRangeNew =
610                 {
611                     .start = strDup(walSegment),
612                     .stop = strDup(walSegment),
613                     .invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
614                 };
615 
616                 lstAdd(archiveIdResult->walRangeList, &walRangeNew);
617             }
618             MEM_CONTEXT_END();
619 
620             // Set the current wal range being processed to what was just added
621             walRange = lstGetLast(archiveIdResult->walRangeList);
622         }
623         // If the next WAL is the appropriate distance away, then there is no gap
624         else
625         {
626             MEM_CONTEXT_BEGIN(lstMemContext(archiveIdResult->walRangeList))
627             {
628                 strFree(walRange->stop);
629                 walRange->stop = strDup(walSegment);
630             }
631             MEM_CONTEXT_END();
632         }
633 
634         walFileIdx++;
635     }
636     while (walFileIdx < strLstSize(walFileList));
637 
638     FUNCTION_TEST_RETURN_VOID();
639 }
640 
641 /***********************************************************************************************************************************
642 Return verify jobs for the archive
643 ***********************************************************************************************************************************/
644 static ProtocolParallelJob *
verifyArchive(void * data)645 verifyArchive(void *data)
646 {
647     FUNCTION_TEST_BEGIN();
648         FUNCTION_TEST_PARAM_P(VOID, data);                          // Pointer to the job data
649     FUNCTION_TEST_END();
650 
651     ProtocolParallelJob *result = NULL;
652 
653     VerifyJobData *jobData = data;
654 
655     // Process archive files, if any
656     while (!strLstEmpty(jobData->archiveIdList))
657     {
658         result = NULL;
659 
660         // Add archiveId to the result list if the list is empty or the last processed is not equal to the current archiveId
661         if (lstEmpty(jobData->archiveIdResultList) ||
662             !strEq(
663                 ((VerifyArchiveResult *)lstGetLast(jobData->archiveIdResultList))->archiveId, strLstGet(jobData->archiveIdList, 0)))
664         {
665             const String *archiveId = strLstGet(jobData->archiveIdList, 0);
666 
667             MEM_CONTEXT_BEGIN(lstMemContext(jobData->archiveIdResultList))
668             {
669                 VerifyArchiveResult archiveIdResult =
670                 {
671                     .archiveId = strDup(archiveId),
672                     .walRangeList = lstNewP(sizeof(VerifyWalRange), .comparator = lstComparatorStr),
673                 };
674 
675                 lstAdd(jobData->archiveIdResultList, &archiveIdResult);
676             }
677             MEM_CONTEXT_END();
678 
679             // Free the old WAL path list
680             strLstFree(jobData->walPathList);
681 
682             // Get the WAL paths for the archive Id
683             const String *archiveIdPath = strNewFmt(STORAGE_REPO_ARCHIVE "/%s", strZ(archiveId));
684 
685             MEM_CONTEXT_BEGIN(jobData->memContext)
686             {
687                 jobData->walPathList = strLstSort(
688                     storageListP(storageRepo(), archiveIdPath, .expression = WAL_SEGMENT_DIR_REGEXP_STR), sortOrderAsc);
689             }
690             MEM_CONTEXT_END();
691         }
692 
693         // If there are WAL paths then get the file lists
694         if (!strLstEmpty(jobData->walPathList))
695         {
696             // Get the archive id info for the current (last) archive id being processed
697             VerifyArchiveResult *archiveResult = lstGetLast(jobData->archiveIdResultList);
698 
699             do
700             {
701                 String *walPath = strLstGet(jobData->walPathList, 0);
702 
703                 // Get the WAL files for the first item in the WAL paths list and initialize WAL info and ranges
704                 if (strLstEmpty(jobData->walFileList))
705                 {
706                     // Free the old WAL file list
707                     strLstFree(jobData->walFileList);
708 
709                     // Get WAL file list
710                     const String *walFilePath = strNewFmt(
711                         STORAGE_REPO_ARCHIVE "/%s/%s", strZ(archiveResult->archiveId), strZ(walPath));
712 
713                     MEM_CONTEXT_BEGIN(jobData->memContext)
714                     {
715                         jobData->walFileList = strLstSort(
716                             storageListP(storageRepo(), walFilePath, .expression = WAL_SEGMENT_FILE_REGEXP_STR), sortOrderAsc);
717                     }
718                     MEM_CONTEXT_END();
719 
720                     if (!strLstEmpty(jobData->walFileList))
721                     {
722                         if (archiveResult->pgWalInfo.size == 0)
723                         {
724                             // Initialize the WAL segment size from the first WAL
725                             StorageRead *walRead = verifyFileLoad(
726                                 strNewFmt(
727                                     STORAGE_REPO_ARCHIVE "/%s/%s/%s", strZ(archiveResult->archiveId), strZ(walPath),
728                                     strZ(strLstGet(jobData->walFileList, 0))),
729                                 jobData->walCipherPass);
730 
731                             PgWal walInfo = pgWalFromBuffer(storageGetP(walRead, .exactSize = PG_WAL_HEADER_SIZE));
732 
733                             archiveResult->pgWalInfo.size = walInfo.size;
734                             archiveResult->pgWalInfo.version = walInfo.version;
735                         }
736 
737                         // Add total number of WAL files in the directory to the total WAL - this number will include duplicates,
738                         // if any, that will be filtered out and not checked but will be reported as errors in the log
739                         archiveResult->totalWalFile += strLstSize(jobData->walFileList);
740 
741                         verifyCreateArchiveIdRange(archiveResult, jobData->walFileList, &jobData->jobErrorTotal);
742                     }
743                 }
744 
745                 // If there are WAL files, then verify them
746                 if (!strLstEmpty(jobData->walFileList))
747                 {
748                     // Get the fully qualified file name and checksum
749                     const String *fileName = strLstGet(jobData->walFileList, 0);
750                     const String *filePathName = strNewFmt(
751                         STORAGE_REPO_ARCHIVE "/%s/%s/%s", strZ(archiveResult->archiveId), strZ(walPath), strZ(fileName));
752                     String *checksum = strSubN(fileName, WAL_SEGMENT_NAME_SIZE + 1, HASH_TYPE_SHA1_SIZE_HEX);
753 
754                     // Set up the job
755                     ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_VERIFY_FILE);
756                     PackWrite *const param = protocolCommandParam(command);
757 
758                     pckWriteStrP(param, filePathName);
759                     pckWriteStrP(param, checksum);
760                     pckWriteU64P(param, archiveResult->pgWalInfo.size);
761                     pckWriteStrP(param, jobData->walCipherPass);
762 
763                     // Assign job to result, prepending the archiveId to the key for consistency with backup processing
764                     result = protocolParallelJobNew(
765                         VARSTR(strNewFmt("%s/%s", strZ(archiveResult->archiveId), strZ(filePathName))), command);
766 
767                     // Remove the file to process from the list
768                     strLstRemoveIdx(jobData->walFileList, 0);
769 
770                     // If this is the last file to process for this timeline, then remove the path
771                     if (strLstEmpty(jobData->walFileList))
772                         strLstRemoveIdx(jobData->walPathList, 0);
773                 }
774                 else
775                 {
776                     // No valid WAL to process (may be only duplicates or nothing in WAL path) - remove the WAL path from the list
777                     LOG_WARN_FMT(
778                         "path '%s/%s' does not contain any valid WAL to be processed", strZ(archiveResult->archiveId),
779                         strZ(walPath));
780                     strLstRemoveIdx(jobData->walPathList, 0);
781                 }
782 
783                 // If a job was found to be processed then break out to process it
784                 if (result != NULL)
785                     break;
786             }
787             while (!strLstEmpty(jobData->walPathList));
788 
789             // If this is the last timeline to process for this archiveId, then remove the archiveId
790             if (strLstEmpty(jobData->walPathList))
791                 strLstRemoveIdx(jobData->archiveIdList, 0);
792 
793             // If a file was sent to be processed then break so can process it
794             if (result != NULL)
795                 break;
796         }
797         else
798         {
799             // Log that no WAL paths exist in the archive Id dir - remove the archive Id from the list (nothing to process)
800             LOG_WARN_FMT("archive path '%s' is empty", strZ(strLstGet(jobData->archiveIdList, 0)));
801             strLstRemoveIdx(jobData->archiveIdList, 0);
802         }
803     }
804 
805     FUNCTION_TEST_RETURN(result);
806 }
807 
808 /***********************************************************************************************************************************
809 Verify the job data backups
810 ***********************************************************************************************************************************/
811 static ProtocolParallelJob *
verifyBackup(void * data)812 verifyBackup(void *data)
813 {
814     FUNCTION_TEST_BEGIN();
815         FUNCTION_TEST_PARAM_P(VOID, data);
816     FUNCTION_TEST_END();
817 
818     ProtocolParallelJob *result = NULL;
819 
820     VerifyJobData *jobData = data;
821 
822     // Process backup files, if any
823     while (!strLstEmpty(jobData->backupList))
824     {
825         result = NULL;
826 
827         // If result list is empty or the last processed is not equal to the backup being processed, then initialize the backup
828         // data and results
829         if (lstEmpty(jobData->backupResultList) ||
830             !strEq(((VerifyBackupResult *)lstGetLast(jobData->backupResultList))->backupLabel, strLstGet(jobData->backupList, 0)))
831         {
832             MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
833             {
834                 VerifyBackupResult backupResultNew =
835                 {
836                     .backupLabel = strDup(strLstGet(jobData->backupList, 0)),
837                     .invalidFileList = lstNewP(sizeof(VerifyInvalidFile), .comparator = lstComparatorStr),
838                 };
839 
840                 // Add the backup to the result list
841                 lstAdd(jobData->backupResultList, &backupResultNew);
842             }
843             MEM_CONTEXT_END();
844 
845             // Get the result just added so it can be updated directly
846             VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
847 
848             // If currentBackup is set (meaning the newest backup label on disk was not in the db:current section when the
849             // backup.info file was read) and this is the same label, then set inProgessBackup to true, else false.
850             // inProgressBackup may be changed in verifyManifestFile if a main backup.manifest exists since that would indicate the
851             // backup completed during the verify process.
852             bool inProgressBackup = strEq(jobData->currentBackup, backupResult->backupLabel);
853 
854             // Get a usable backup manifest file
855             Manifest *manifest = verifyManifestFile(
856                 backupResult, jobData->manifestCipherPass, inProgressBackup, jobData->pgHistory, &jobData->jobErrorTotal);
857 
858             // If a usable backup.manifest file is not found
859             if (manifest == NULL)
860             {
861                 // Remove this backup from the processing list
862                 strLstRemoveIdx(jobData->backupList, 0);
863 
864                 // No files to process so continue to the next backup in the list
865                 continue;
866             }
867             // Initialize the backup results and manifest for processing
868             else
869             {
870                 // Move the manifest to the jobData for processing
871                 jobData->manifest = manifestMove(manifest, jobData->memContext);
872 
873                 // Initialize the jobData
874                 MEM_CONTEXT_BEGIN(jobData->memContext)
875                 {
876                     // Get the cipher subpass used to decrypt files in the backup and initialize the file list index
877                     jobData->backupCipherPass = strDup(manifestCipherSubPass(jobData->manifest));
878                     jobData->manifestFileIdx = 0;
879                 }
880                 MEM_CONTEXT_END();
881 
882                 const ManifestData *manData = manifestData(jobData->manifest);
883 
884                 MEM_CONTEXT_BEGIN(lstMemContext(jobData->backupResultList))
885                 {
886                     backupResult->totalFileManifest = manifestFileTotal(jobData->manifest);
887                     backupResult->backupPrior = strDup(manData->backupLabelPrior);
888                     backupResult->pgId = manData->pgId;
889                     backupResult->pgVersion = manData->pgVersion;
890                     backupResult->archiveStart = strDup(manData->archiveStart);
891                     backupResult->archiveStop = strDup(manData->archiveStop);
892                 }
893                 MEM_CONTEXT_END();
894             }
895         }
896 
897         VerifyBackupResult *backupResult = lstGetLast(jobData->backupResultList);
898 
899         // Process any files in the manifest
900         if (jobData->manifestFileIdx < manifestFileTotal(jobData->manifest))
901         {
902             do
903             {
904                 const ManifestFile *fileData = manifestFile(jobData->manifest, jobData->manifestFileIdx);
905 
906                 String *filePathName = NULL;
907 
908                 // Track the files verified in order to determine when the processing of the backup is complete
909                 backupResult->totalFileVerify++;
910 
911                 // Check if the file is referenced in a prior backup
912                 if (fileData->reference != NULL)
913                 {
914                     // If the prior backup is not in the result list, then that backup was never processed (likely due to the --set
915                     // option) so verify the file
916                     unsigned int backupPriorIdx = lstFindIdx(jobData->backupResultList, &fileData->reference);
917 
918                     if (backupPriorIdx == LIST_NOT_FOUND)
919                     {
920                         filePathName = strNewFmt(
921                             STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
922                             strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
923                     }
924                     // Else the backup this file references has a result so check the processing state for the referenced backup
925                     else
926                     {
927                         VerifyBackupResult *backupResultPrior = lstGet(jobData->backupResultList, backupPriorIdx);
928 
929                         // If the verify-state of the backup is not complete then verify the file
930                         if (!backupResultPrior->fileVerifyComplete)
931                         {
932                             filePathName = strNewFmt(
933                                 STORAGE_REPO_BACKUP "/%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
934                                 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
935                         }
936                         // Else skip verification
937                         else
938                         {
939                             String *priorFile = strNewFmt(
940                                 "%s/%s%s", strZ(fileData->reference), strZ(fileData->name),
941                                 strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
942 
943                             unsigned int backupPriorInvalidIdx = lstFindIdx(backupResultPrior->invalidFileList, &priorFile);
944 
945                             // If the file is in the invalid file list of the prior backup where it is referenced then add the file
946                             // as invalid to this backup result and set the backup result status; since already logged an error on
947                             // this file, don't log again
948                             if (backupPriorInvalidIdx != LIST_NOT_FOUND)
949                             {
950                                 VerifyInvalidFile *invalidFile = lstGet(
951                                     backupResultPrior->invalidFileList, backupPriorInvalidIdx);
952                                 verifyInvalidFileAdd(backupResult->invalidFileList, invalidFile->reason, invalidFile->fileName);
953                                 backupResult->status = backupInvalid;
954                             }
955                             // Else the file in the prior backup was valid so increment the total valid files for this backup
956                             else
957                             {
958                                 backupResult->totalFileValid++;
959                             }
960                         }
961                     }
962                 }
963                 // Else file is not referenced in a prior backup
964                 else
965                 {
966                     filePathName = strNewFmt(
967                         STORAGE_REPO_BACKUP "/%s/%s%s", strZ(backupResult->backupLabel), strZ(fileData->name),
968                         strZ(compressExtStr((manifestData(jobData->manifest))->backupOptionCompressType)));
969                 }
970 
971                 // If constructed file name is not null then send it off for processing
972                 if (filePathName != NULL)
973                 {
974                     // Set up the job
975                     ProtocolCommand *command = protocolCommandNew(PROTOCOL_COMMAND_VERIFY_FILE);
976                     PackWrite *const param = protocolCommandParam(command);
977 
978                     pckWriteStrP(param, filePathName);
979                     // If the checksum is not present in the manifest, it will be calculated by manifest load
980                     pckWriteStrP(param, STR(fileData->checksumSha1));
981                     pckWriteU64P(param, fileData->size);
982                     pckWriteStrP(param, jobData->backupCipherPass);
983 
984                     // Assign job to result (prepend backup label being processed to the key since some files are in a prior backup)
985                     result = protocolParallelJobNew(
986                         VARSTR(strNewFmt("%s/%s", strZ(backupResult->backupLabel), strZ(filePathName))), command);
987                 }
988 
989                 // Increment the index to point to the next file
990                 jobData->manifestFileIdx++;
991 
992                 // If this was the last file to process for this backup, then free the manifest and remove this backup from the
993                 // processing list
994                 if (jobData->manifestFileIdx == backupResult->totalFileManifest)
995                 {
996                     manifestFree(jobData->manifest);
997                     jobData->manifest = NULL;
998                     strLstRemoveIdx(jobData->backupList, 0);
999                 }
1000 
1001                 // If a job was found to be processed then break out to process it
1002                 if (result != NULL)
1003                     break;
1004             }
1005             while (jobData->manifestFileIdx < backupResult->totalFileManifest);
1006         }
1007         else
1008         {
1009             // Nothing to process so report an error, free the manifest, set the status, and remove the backup from processing list
1010             LOG_ERROR_FMT(
1011                 errorTypeCode(&FileInvalidError), "backup '%s' manifest does not contain any target files to verify",
1012                 strZ(backupResult->backupLabel));
1013 
1014             jobData->jobErrorTotal++;
1015 
1016             manifestFree(jobData->manifest);
1017             jobData->manifest = NULL;
1018 
1019             backupResult->status = backupInvalid;
1020 
1021             strLstRemoveIdx(jobData->backupList, 0);
1022         }
1023 
1024         // If a job was found to be processed then break out to process it
1025         if (result != NULL)
1026             break;
1027     }
1028 
1029     FUNCTION_TEST_RETURN(result);
1030 }
1031 
1032 /***********************************************************************************************************************************
1033 Process the job data
1034 ***********************************************************************************************************************************/
1035 static ProtocolParallelJob *
verifyJobCallback(void * data,unsigned int clientIdx)1036 verifyJobCallback(void *data, unsigned int clientIdx)
1037 {
1038     FUNCTION_TEST_BEGIN();
1039         FUNCTION_TEST_PARAM_P(VOID, data);                          // Pointer to the job data
1040         (void)clientIdx;                                            // Client index (not used for this process)
1041     FUNCTION_TEST_END();
1042 
1043     ASSERT(data != NULL);
1044 
1045     // Initialize the result
1046     ProtocolParallelJob *result = NULL;
1047 
1048     MEM_CONTEXT_TEMP_BEGIN()
1049     {
1050         VerifyJobData *jobData = data;
1051 
1052         if (!jobData->backupProcessing)
1053         {
1054             result = protocolParallelJobMove(verifyArchive(data), memContextPrior());
1055 
1056             // Set the backupProcessing flag if the archive processing is finished so backup processing can begin immediately after
1057             jobData->backupProcessing = strLstEmpty(jobData->archiveIdList);
1058         }
1059 
1060         if (jobData->backupProcessing)
1061         {
1062             // Only begin backup verification if the last archive result was processed
1063             if (result == NULL)
1064                 result = protocolParallelJobMove(verifyBackup(data), memContextPrior());
1065         }
1066     }
1067     MEM_CONTEXT_TEMP_END();
1068 
1069     FUNCTION_TEST_RETURN(result);
1070 }
1071 
1072 /***********************************************************************************************************************************
1073 Helper function for returning a string corresponding to the result code
1074 ***********************************************************************************************************************************/
1075 static String *
verifyErrorMsg(VerifyResult verifyResult)1076 verifyErrorMsg(VerifyResult verifyResult)
1077 {
1078     FUNCTION_TEST_BEGIN();
1079         FUNCTION_TEST_PARAM(ENUM, verifyResult);                    // Result code from the verifyFile() function
1080     FUNCTION_TEST_END();
1081 
1082     String *result = strNew();
1083 
1084     if (verifyResult == verifyFileMissing)
1085         result = strCatZ(result, "file missing");
1086     else if (verifyResult == verifyChecksumMismatch)
1087         result = strCatZ(result, "invalid checksum");
1088     else if (verifyResult == verifySizeInvalid)
1089         result = strCatZ(result, "invalid size");
1090     else
1091         result = strCatZ(result, "invalid result");
1092 
1093     FUNCTION_TEST_RETURN(result);
1094 }
1095 
1096 /***********************************************************************************************************************************
1097 Helper function to output a log message based on job result that is not verifyOk and return an error count
1098 ***********************************************************************************************************************************/
1099 static unsigned int
verifyLogInvalidResult(const String * fileType,VerifyResult verifyResult,unsigned int processId,const String * filePathName)1100 verifyLogInvalidResult(const String *fileType, VerifyResult verifyResult, unsigned int processId, const String *filePathName)
1101 {
1102     FUNCTION_TEST_BEGIN();
1103         FUNCTION_TEST_PARAM(STRING, fileType);                      // Indicates archive or backup file
1104         FUNCTION_TEST_PARAM(ENUM, verifyResult);                    // Result code from the verifyFile() function
1105         FUNCTION_TEST_PARAM(UINT, processId);                       // Process Id reporting the result
1106         FUNCTION_TEST_PARAM(STRING, filePathName);                  // File for which results are being reported
1107     FUNCTION_TEST_END();
1108 
1109     ASSERT(fileType != NULL);
1110     ASSERT(filePathName != NULL);
1111 
1112     // Log a warning because the WAL may have gone missing if expire came through and removed it
1113     // legitimately so it is not necessarily an error so the jobErrorTotal should not be incremented
1114     if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR) && verifyResult == verifyFileMissing)
1115     {
1116         LOG_WARN_PID_FMT(processId, "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName));
1117         FUNCTION_TEST_RETURN(0);
1118     }
1119     else
1120     {
1121         LOG_ERROR_PID_FMT(
1122             processId, errorTypeCode(&FileInvalidError), "%s '%s'", strZ(verifyErrorMsg(verifyResult)), strZ(filePathName));
1123         FUNCTION_TEST_RETURN(1);
1124     }
1125 }
1126 
1127 /***********************************************************************************************************************************
1128 Helper function to set the currently processing backup label, if any, and check that the archiveIds are in the db history
1129 ***********************************************************************************************************************************/
1130 static String *
verifySetBackupCheckArchive(const StringList * backupList,const InfoBackup * backupInfo,const StringList * archiveIdList,const InfoPg * pgHistory,unsigned int * jobErrorTotal)1131 verifySetBackupCheckArchive(
1132     const StringList *backupList, const InfoBackup *backupInfo, const StringList *archiveIdList, const InfoPg *pgHistory,
1133     unsigned int *jobErrorTotal)
1134 {
1135     FUNCTION_TEST_BEGIN();
1136         FUNCTION_TEST_PARAM(STRING_LIST, backupList);               // List of backup labels in the backup directory
1137         FUNCTION_TEST_PARAM(INFO_BACKUP, backupInfo);               // Contents of the backup.info file
1138         FUNCTION_TEST_PARAM(STRING_LIST, archiveIdList);            // List of archiveIds in the archive directory
1139         FUNCTION_TEST_PARAM(INFO_PG, pgHistory);                    // Pointer to InfoPg of archive.info for accessing PG history
1140         FUNCTION_TEST_PARAM_P(UINT, jobErrorTotal);                 // Pointer to overall job error total
1141     FUNCTION_TEST_END();
1142 
1143     String *result = NULL;
1144 
1145     MEM_CONTEXT_TEMP_BEGIN()
1146     {
1147         // If there are backups, set the last backup as current if it is not in backup.info - if it is, then it is complete, else
1148         // it will be checked later
1149         if (!strLstEmpty(backupList))
1150         {
1151             // Get the last backup as current if it is not in backup.info current list
1152             String *backupLabel = strLstGet(backupList, strLstSize(backupList) - 1);
1153 
1154             if (infoBackupDataByLabel(backupInfo, backupLabel) == NULL)
1155             {
1156                 // Duplicate the string into the prior context
1157                 MEM_CONTEXT_PRIOR_BEGIN()
1158                 {
1159                     result = strDup(backupLabel);
1160                 }
1161                 MEM_CONTEXT_PRIOR_END();
1162             }
1163         }
1164 
1165         // If there are archive directories on disk, make sure they are in the database history list
1166         if (!strLstEmpty(archiveIdList))
1167         {
1168             StringList *archiveIdHistoryList = strLstNew();
1169 
1170             for (unsigned int histIdx = 0; histIdx < infoPgDataTotal(pgHistory); histIdx++)
1171                 strLstAdd(archiveIdHistoryList, infoPgArchiveId(pgHistory, histIdx));
1172 
1173             // Sort the history list
1174             strLstSort(strLstComparatorSet(archiveIdHistoryList, archiveIdComparator), sortOrderAsc);
1175 
1176             String *missingFromHistory = strNew();
1177 
1178             // Check if the archiveId on disk exists in the archive.info history list and report it if not
1179             for (unsigned int archiveIdx = 0; archiveIdx < strLstSize(archiveIdList); archiveIdx++)
1180             {
1181                 String *archiveId = strLstGet(archiveIdList, archiveIdx);
1182 
1183                 if (!strLstExists(archiveIdHistoryList, archiveId))
1184                     strCat(missingFromHistory, (strEmpty(missingFromHistory) ? archiveId : strNewFmt(", %s", strZ(archiveId))));
1185             }
1186 
1187             if (!strEmpty(missingFromHistory))
1188             {
1189                 LOG_ERROR_FMT(
1190                     errorTypeCode(&ArchiveMismatchError), "archiveIds '%s' are not in the archive.info history list",
1191                     strZ(missingFromHistory));
1192 
1193                 (*jobErrorTotal)++;
1194             }
1195         }
1196     }
1197     MEM_CONTEXT_TEMP_END();
1198 
1199     FUNCTION_TEST_RETURN(result);
1200 }
1201 
1202 /***********************************************************************************************************************************
1203 Add the file to the invalid file list for the range in which it exists
1204 ***********************************************************************************************************************************/
1205 static void
verifyAddInvalidWalFile(List * walRangeList,VerifyResult fileResult,const String * fileName,const String * walSegment)1206 verifyAddInvalidWalFile(List *walRangeList, VerifyResult fileResult, const String *fileName, const String *walSegment)
1207 {
1208     FUNCTION_TEST_BEGIN();
1209         FUNCTION_TEST_PARAM(LIST, walRangeList);                    // List of WAL ranges for an archive Id
1210         FUNCTION_TEST_PARAM(UINT, fileResult);                      // Result of verifyFile()
1211         FUNCTION_TEST_PARAM(STRING, fileName);                      // File name (without the REPO prefix)
1212         FUNCTION_TEST_PARAM(STRING, walSegment);                    // WAL segment, i.e. 000000010000000000000005
1213     FUNCTION_TEST_END();
1214 
1215     ASSERT(walRangeList != NULL);
1216     ASSERT(fileName != NULL);
1217     ASSERT(walSegment != NULL);
1218 
1219     MEM_CONTEXT_TEMP_BEGIN()
1220     {
1221         for (unsigned int walIdx = 0; walIdx < lstSize(walRangeList); walIdx++)
1222         {
1223             VerifyWalRange *walRange = lstGet(walRangeList, walIdx);
1224 
1225             // If the WAL segment is less/equal to the stop file then it falls in this range since ranges are sorted by stop file in
1226             // ascending order, therefore first one found is the range
1227             if (strCmp(walRange->stop, walSegment) >= 0)
1228             {
1229                 // Add the file to the range where it was found and exit the loop
1230                 verifyInvalidFileAdd(walRange->invalidFileList, fileResult, fileName);
1231                 break;
1232             }
1233         }
1234     }
1235     MEM_CONTEXT_TEMP_END();
1236 
1237     FUNCTION_TEST_RETURN_VOID();
1238 }
1239 
1240 /***********************************************************************************************************************************
1241 Render the results of the verify command
1242 ***********************************************************************************************************************************/
1243 static String *
verifyRender(List * archiveIdResultList,List * backupResultList)1244 verifyRender(List *archiveIdResultList, List *backupResultList)
1245 {
1246     FUNCTION_TEST_BEGIN();
1247         FUNCTION_TEST_PARAM(LIST, archiveIdResultList);             // Result list for all archive Ids in the repo
1248         FUNCTION_TEST_PARAM(LIST, backupResultList);                // Result list for all backups in the repo
1249     FUNCTION_TEST_END();
1250 
1251     ASSERT(archiveIdResultList != NULL);
1252     ASSERT(backupResultList != NULL);
1253 
1254     String *result = strNewZ("Results:");
1255 
1256     // Render archive results
1257     if (lstEmpty(archiveIdResultList))
1258         strCatZ(result, "\n  archiveId: none found");
1259     else
1260     {
1261         for (unsigned int archiveIdx = 0; archiveIdx < lstSize(archiveIdResultList); archiveIdx++)
1262         {
1263             VerifyArchiveResult *archiveIdResult = lstGet(archiveIdResultList, archiveIdx);
1264             strCatFmt(
1265                 result, "\n  archiveId: %s, total WAL checked: %u, total valid WAL: %u", strZ(archiveIdResult->archiveId),
1266                 archiveIdResult->totalWalFile, archiveIdResult->totalValidWal);
1267 
1268             if (archiveIdResult->totalWalFile > 0)
1269             {
1270                 unsigned int errMissing = 0;
1271                 unsigned int errChecksum = 0;
1272                 unsigned int errSize = 0;
1273                 unsigned int errOther = 0;
1274 
1275                 for (unsigned int walIdx = 0; walIdx < lstSize(archiveIdResult->walRangeList); walIdx++)
1276                 {
1277                     VerifyWalRange *walRange = lstGet(archiveIdResult->walRangeList, walIdx);
1278 
1279                     LOG_DETAIL_FMT(
1280                         "archiveId: %s, wal start: %s, wal stop: %s", strZ(archiveIdResult->archiveId), strZ(walRange->start),
1281                         strZ(walRange->stop));
1282 
1283                     unsigned int invalidIdx = 0;
1284 
1285                     while (invalidIdx < lstSize(walRange->invalidFileList))
1286                     {
1287                         VerifyInvalidFile *invalidFile = lstGet(walRange->invalidFileList, invalidIdx);
1288 
1289                         if (invalidFile->reason == verifyFileMissing)
1290                             errMissing++;
1291                         else if (invalidFile->reason == verifyChecksumMismatch)
1292                             errChecksum++;
1293                         else if (invalidFile->reason == verifySizeInvalid)
1294                             errSize++;
1295                         else
1296                             errOther++;
1297 
1298                         invalidIdx++;
1299                     }
1300                 }
1301 
1302                 strCatFmt(
1303                     result, "\n    missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
1304                     errSize, errOther);
1305             }
1306         }
1307     }
1308 
1309     // Render backup results
1310     if (lstEmpty(backupResultList))
1311         strCatZ(result, "\n  backup: none found");
1312     else
1313     {
1314         for (unsigned int backupIdx = 0; backupIdx < lstSize(backupResultList); backupIdx++)
1315         {
1316             VerifyBackupResult *backupResult = lstGet(backupResultList, backupIdx);
1317             String *status = NULL;
1318 
1319             switch (backupResult->status)
1320             {
1321                 case backupValid:
1322                 {
1323                     status = strNewZ("valid");
1324                     break;
1325                 }
1326 
1327                 case backupInvalid:
1328                 {
1329                     status = strNewZ("invalid");
1330                     break;
1331                 }
1332 
1333                 case backupMissingManifest:
1334                 {
1335                     status = strNewZ("manifest missing");
1336                     break;
1337                 }
1338 
1339                 case backupInProgress:
1340                 {
1341                     status = strNewZ("in-progress");
1342                     break;
1343                 }
1344             }
1345 
1346             strCatFmt(
1347                 result, "\n  backup: %s, status: %s, total files checked: %u, total valid files: %u",
1348                 strZ(backupResult->backupLabel), strZ(status), backupResult->totalFileVerify, backupResult->totalFileValid);
1349 
1350             if (backupResult->totalFileVerify > 0)
1351             {
1352                 unsigned int errMissing = 0;
1353                 unsigned int errChecksum = 0;
1354                 unsigned int errSize = 0;
1355                 unsigned int errOther = 0;
1356 
1357                 for (unsigned int invalidIdx = 0; invalidIdx < lstSize(backupResult->invalidFileList); invalidIdx++)
1358                 {
1359                     VerifyInvalidFile *invalidFile = lstGet(backupResult->invalidFileList, invalidIdx);
1360 
1361                     if (invalidFile->reason == verifyFileMissing)
1362                         errMissing++;
1363                     else if (invalidFile->reason == verifyChecksumMismatch)
1364                         errChecksum++;
1365                     else if (invalidFile->reason == verifySizeInvalid)
1366                         errSize++;
1367                     else
1368                         errOther++;
1369                 }
1370 
1371                 strCatFmt(
1372                     result, "\n    missing: %u, checksum invalid: %u, size invalid: %u, other: %u", errMissing, errChecksum,
1373                     errSize, errOther);
1374             }
1375         }
1376     }
1377 
1378     FUNCTION_TEST_RETURN(result);
1379 }
1380 
1381 /***********************************************************************************************************************************
1382 Process the verify command
1383 ***********************************************************************************************************************************/
1384 static String *
verifyProcess(unsigned int * errorTotal)1385 verifyProcess(unsigned int *errorTotal)
1386 {
1387     FUNCTION_LOG_BEGIN(logLevelDebug);
1388         FUNCTION_TEST_PARAM_P(UINT, errorTotal);                    // Pointer to overall job error total
1389     FUNCTION_LOG_END();
1390 
1391     String *result = NULL;
1392 
1393     MEM_CONTEXT_TEMP_BEGIN()
1394     {
1395         String *resultStr = strNew();
1396 
1397         // Get the repo storage in case it is remote and encryption settings need to be pulled down
1398         const Storage *storage = storageRepo();
1399 
1400         // Get a usable backup info file
1401         InfoBackup *backupInfo = verifyBackupInfoFile();
1402 
1403         // If a usable backup.info file is not found, then report an error in the log
1404         if (backupInfo == NULL)
1405         {
1406             LOG_ERROR(errorTypeCode(&FormatError), "No usable backup.info file");
1407             (*errorTotal)++;
1408         }
1409 
1410         // Get a usable archive info file
1411         InfoArchive *archiveInfo = verifyArchiveInfoFile();
1412 
1413         // If a usable archive.info file is not found, then report an error in the log
1414         if (archiveInfo == NULL)
1415         {
1416             LOG_ERROR(errorTypeCode(&FormatError), "No usable archive.info file");
1417             (*errorTotal)++;
1418         }
1419 
1420         // If both a usable archive info and backup info file were found, then proceed with verification
1421         if (archiveInfo != NULL && backupInfo != NULL)
1422         {
1423             TRY_BEGIN()
1424             {
1425                 // Verify that the archive.info and backup.info current database info and history lists are the same
1426                 verifyPgHistory(infoArchivePg(archiveInfo), infoBackupPg(backupInfo));
1427             }
1428             CATCH_ANY()
1429             {
1430                 LOG_ERROR(errorTypeCode(&FormatError), errorMessage());
1431                 (*errorTotal)++;
1432             }
1433             TRY_END();
1434         }
1435 
1436         // If valid info files, then begin process of checking backups and archives in the repo
1437         if ((*errorTotal) == 0)
1438         {
1439             // Initialize the job data
1440             VerifyJobData jobData =
1441             {
1442                 .memContext = memContextCurrent(),
1443                 .walPathList = NULL,
1444                 .walFileList = strLstNew(),
1445                 .pgHistory = infoArchivePg(archiveInfo),
1446                 .manifestCipherPass = infoPgCipherPass(infoBackupPg(backupInfo)),
1447                 .walCipherPass = infoPgCipherPass(infoArchivePg(archiveInfo)),
1448                 .archiveIdResultList = lstNewP(sizeof(VerifyArchiveResult), .comparator = archiveIdComparator),
1449                 .backupResultList = lstNewP(sizeof(VerifyBackupResult), .comparator = lstComparatorStr),
1450             };
1451 
1452             // Get a list of backups in the repo sorted ascending
1453             jobData.backupList = strLstSort(
1454                 storageListP(
1455                     storage, STORAGE_REPO_BACKUP_STR,
1456                     .expression = backupRegExpP(.full = true, .differential = true, .incremental = true)),
1457                 sortOrderAsc);
1458 
1459             // Get a list of archive Ids in the repo (e.g. 9.4-1, 10-2, etc) sorted ascending by the db-id (number after the dash)
1460             jobData.archiveIdList = strLstSort(
1461                 strLstComparatorSet(
1462                     storageListP(storage, STORAGE_REPO_ARCHIVE_STR, .expression = STRDEF(REGEX_ARCHIVE_DIR_DB_VERSION)),
1463                     archiveIdComparator),
1464                 sortOrderAsc);
1465 
1466             // Only begin processing if there are some archives or backups in the repo
1467             if (!strLstEmpty(jobData.archiveIdList) || !strLstEmpty(jobData.backupList))
1468             {
1469                 // Warn if there are no archives or there are no backups in the repo so that the callback need not try to
1470                 // distinguish between having processed all of the list or if the list was missing in the first place
1471                 if (strLstEmpty(jobData.archiveIdList) || strLstEmpty(jobData.backupList))
1472                     LOG_WARN_FMT("no %s exist in the repo", strLstEmpty(jobData.archiveIdList) ? "archives" : "backups");
1473 
1474                 // If there are no archives to process, then set the processing flag to skip to processing the backups
1475                 if (strLstEmpty(jobData.archiveIdList))
1476                     jobData.backupProcessing = true;
1477 
1478                 // Set current backup if there is one and verify the archive history on disk is in the database history
1479                 jobData.currentBackup = verifySetBackupCheckArchive(
1480                     jobData.backupList, backupInfo, jobData.archiveIdList, jobData.pgHistory, &jobData.jobErrorTotal);
1481 
1482                 // Create the parallel executor
1483                 ProtocolParallel *parallelExec = protocolParallelNew(
1484                     cfgOptionUInt64(cfgOptProtocolTimeout) / 2, verifyJobCallback, &jobData);
1485 
1486                 for (unsigned int processIdx = 1; processIdx <= cfgOptionUInt(cfgOptProcessMax); processIdx++)
1487                     protocolParallelClientAdd(parallelExec, protocolLocalGet(protocolStorageTypeRepo, 0, processIdx));
1488 
1489                 // Process jobs
1490                 MEM_CONTEXT_TEMP_RESET_BEGIN()
1491                 {
1492                     do
1493                     {
1494                         unsigned int completed = protocolParallelProcess(parallelExec);
1495 
1496                         // Process completed jobs
1497                         for (unsigned int jobIdx = 0; jobIdx < completed; jobIdx++)
1498                         {
1499                             // Get the job and job key
1500                             ProtocolParallelJob *job = protocolParallelResult(parallelExec);
1501                             unsigned int processId = protocolParallelJobProcessId(job);
1502                             StringList *filePathLst = strLstNewSplit(varStr(protocolParallelJobKey(job)), FSLASH_STR);
1503 
1504                             // Remove the result and file type identifier and recreate the path file name
1505                             const String *resultId = strLstGet(filePathLst, 0);
1506                             strLstRemoveIdx(filePathLst, 0);
1507                             const String *fileType = strLstGet(filePathLst, 0);
1508                             strLstRemoveIdx(filePathLst, 0);
1509                             String *filePathName = strLstJoin(filePathLst, "/");
1510 
1511                             // Initialize the result sets
1512                             VerifyArchiveResult *archiveIdResult = NULL;
1513                             VerifyBackupResult *backupResult = NULL;
1514 
1515                             // Get archiveId result data
1516                             if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1517                             {
1518                                 // Find the archiveId in the list - assert if not found since this should never happen
1519                                 unsigned int index = lstFindIdx(jobData.archiveIdResultList, &resultId);
1520                                 ASSERT(index != LIST_NOT_FOUND);
1521 
1522                                 archiveIdResult = lstGet(jobData.archiveIdResultList, index);
1523                             }
1524                             // Else get the backup result data
1525                             else
1526                             {
1527                                 unsigned int index = lstFindIdx(jobData.backupResultList, &resultId);
1528                                 ASSERT(index != LIST_NOT_FOUND);
1529 
1530                                 backupResult = lstGet(jobData.backupResultList, index);
1531                             }
1532 
1533                             // The job was successful
1534                             if (protocolParallelJobErrorCode(job) == 0)
1535                             {
1536                                 const VerifyResult verifyResult = (VerifyResult)pckReadU32P(protocolParallelJobResult(job));
1537 
1538                                 // Update the result set for the type of file being processed
1539                                 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1540                                 {
1541                                     if (verifyResult == verifyOk)
1542                                         archiveIdResult->totalValidWal++;
1543                                     else
1544                                     {
1545                                         jobData.jobErrorTotal += verifyLogInvalidResult(
1546                                             fileType, verifyResult, processId, filePathName);
1547 
1548                                         // Add invalid file to the WAL range
1549                                         verifyAddInvalidWalFile(
1550                                             archiveIdResult->walRangeList, verifyResult, filePathName,
1551                                             strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0,
1552                                             WAL_SEGMENT_NAME_SIZE));
1553                                     }
1554                                 }
1555                                 else
1556                                 {
1557                                     if (verifyResult == verifyOk)
1558                                         backupResult->totalFileValid++;
1559                                     else
1560                                     {
1561                                         jobData.jobErrorTotal += verifyLogInvalidResult(
1562                                             fileType, verifyResult, processId, filePathName);
1563                                         backupResult->status = backupInvalid;
1564                                         verifyInvalidFileAdd(backupResult->invalidFileList, verifyResult, filePathName);
1565                                     }
1566                                 }
1567                             }
1568                             // Else the job errored
1569                             else
1570                             {
1571                                 // Log a protocol error and increment the jobErrorTotal
1572                                 LOG_ERROR_PID_FMT(
1573                                     processId, errorTypeCode(&ProtocolError),
1574                                     "%s %s: [%d] %s", strZ(verifyErrorMsg(verifyOtherError)), strZ(filePathName),
1575                                     protocolParallelJobErrorCode(job), strZ(protocolParallelJobErrorMessage(job)));
1576 
1577                                 jobData.jobErrorTotal++;
1578 
1579                                 // Add invalid file with "OtherError" reason to invalid file list
1580                                 if (strEq(fileType, STORAGE_REPO_ARCHIVE_STR))
1581                                 {
1582                                     // Add invalid file to the WAL range
1583                                     verifyAddInvalidWalFile(
1584                                         archiveIdResult->walRangeList, verifyOtherError, filePathName,
1585                                         strSubN(strLstGet(filePathLst, strLstSize(filePathLst) - 1), 0, WAL_SEGMENT_NAME_SIZE));
1586                                 }
1587                                 else
1588                                 {
1589                                     backupResult->status = backupInvalid;
1590                                     verifyInvalidFileAdd(backupResult->invalidFileList, verifyOtherError, filePathName);
1591                                 }
1592                             }
1593 
1594                             // Set backup verification complete for a backup if all files have run through verification
1595                             if (strEq(fileType, STORAGE_REPO_BACKUP_STR) &&
1596                                 backupResult->totalFileVerify == backupResult->totalFileManifest)
1597                             {
1598                                 backupResult->fileVerifyComplete = true;
1599                             }
1600 
1601                             // Free the job
1602                             protocolParallelJobFree(job);
1603                         }
1604 
1605                         // Reset the memory context occasionally so we don't use too much memory or slow down processing
1606                         MEM_CONTEXT_TEMP_RESET(1000);
1607                     }
1608                     while (!protocolParallelDone(parallelExec));
1609                 }
1610                 MEM_CONTEXT_TEMP_END();
1611 
1612                 // ??? Need to do the final reconciliation - checking backup required WAL against, valid WAL
1613 
1614                 // Report results
1615                 resultStr = verifyRender(jobData.archiveIdResultList, jobData.backupResultList);
1616             }
1617             else
1618                 LOG_WARN("no archives or backups exist in the repo");
1619 
1620             (*errorTotal) += jobData.jobErrorTotal;
1621         }
1622 
1623         MEM_CONTEXT_PRIOR_BEGIN()
1624         {
1625             result = strDup(resultStr);
1626         }
1627         MEM_CONTEXT_PRIOR_END();
1628     }
1629     MEM_CONTEXT_TEMP_END();
1630 
1631     FUNCTION_LOG_RETURN(STRING, result);
1632 }
1633 
1634 /**********************************************************************************************************************************/
1635 void
cmdVerify(void)1636 cmdVerify(void)
1637 {
1638     FUNCTION_LOG_VOID(logLevelDebug);
1639 
1640     MEM_CONTEXT_TEMP_BEGIN()
1641     {
1642         unsigned int errorTotal = 0;
1643         String *result = verifyProcess(&errorTotal);
1644 
1645         // Output results if any
1646         if (strSize(result) > 0)
1647             LOG_INFO_FMT("%s", strZ(result));
1648 
1649         // Throw an error if any encountered
1650         if (errorTotal > 0)
1651             THROW_FMT(RuntimeError, "%u fatal errors encountered, see log for details", errorTotal);
1652     }
1653     MEM_CONTEXT_TEMP_END();
1654 
1655     FUNCTION_LOG_RETURN_VOID();
1656 }
1657