1 /*-------------------------------------------------------------------------
2 *
3 * backup_manifest.c
4 * code for generating and sending a backup manifest
5 *
6 * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/backup_manifest.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/timeline.h"
16 #include "libpq/libpq.h"
17 #include "libpq/pqformat.h"
18 #include "mb/pg_wchar.h"
19 #include "replication/backup_manifest.h"
20 #include "utils/builtins.h"
21 #include "utils/json.h"
22
23 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
24
25 /*
26 * Does the user want a backup manifest?
27 *
28 * It's simplest to always have a manifest_info object, so that we don't need
29 * checks for NULL pointers in too many places. However, if the user doesn't
30 * want a manifest, we set manifest->buffile to NULL.
31 */
32 static inline bool
IsManifestEnabled(backup_manifest_info * manifest)33 IsManifestEnabled(backup_manifest_info *manifest)
34 {
35 return (manifest->buffile != NULL);
36 }
37
38 /*
39 * Convenience macro for appending data to the backup manifest.
40 */
41 #define AppendToManifest(manifest, ...) \
42 { \
43 char *_manifest_s = psprintf(__VA_ARGS__); \
44 AppendStringToManifest(manifest, _manifest_s); \
45 pfree(_manifest_s); \
46 }
47
48 /*
49 * Initialize state so that we can construct a backup manifest.
50 *
51 * NB: Although the checksum type for the data files is configurable, the
52 * checksum for the manifest itself always uses SHA-256. See comments in
53 * SendBackupManifest.
54 */
55 void
InitializeBackupManifest(backup_manifest_info * manifest,backup_manifest_option want_manifest,pg_checksum_type manifest_checksum_type)56 InitializeBackupManifest(backup_manifest_info *manifest,
57 backup_manifest_option want_manifest,
58 pg_checksum_type manifest_checksum_type)
59 {
60 memset(manifest, 0, sizeof(backup_manifest_info));
61 manifest->checksum_type = manifest_checksum_type;
62
63 if (want_manifest == MANIFEST_OPTION_NO)
64 manifest->buffile = NULL;
65 else
66 {
67 manifest->buffile = BufFileCreateTemp(false);
68 manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
69 if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
70 elog(ERROR, "failed to initialize checksum of backup manifest");
71 }
72
73 manifest->manifest_size = UINT64CONST(0);
74 manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
75 manifest->first_file = true;
76 manifest->still_checksumming = true;
77
78 if (want_manifest != MANIFEST_OPTION_NO)
79 AppendToManifest(manifest,
80 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
81 "\"Files\": [");
82 }
83
84 /*
85 * Free resources assigned to a backup manifest constructed.
86 */
87 void
FreeBackupManifest(backup_manifest_info * manifest)88 FreeBackupManifest(backup_manifest_info *manifest)
89 {
90 pg_cryptohash_free(manifest->manifest_ctx);
91 manifest->manifest_ctx = NULL;
92 }
93
94 /*
95 * Add an entry to the backup manifest for a file.
96 */
97 void
AddFileToBackupManifest(backup_manifest_info * manifest,const char * spcoid,const char * pathname,size_t size,pg_time_t mtime,pg_checksum_context * checksum_ctx)98 AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
99 const char *pathname, size_t size, pg_time_t mtime,
100 pg_checksum_context *checksum_ctx)
101 {
102 char pathbuf[MAXPGPATH];
103 int pathlen;
104 StringInfoData buf;
105
106 if (!IsManifestEnabled(manifest))
107 return;
108
109 /*
110 * If this file is part of a tablespace, the pathname passed to this
111 * function will be relative to the tar file that contains it. We want the
112 * pathname relative to the data directory (ignoring the intermediate
113 * symlink traversal).
114 */
115 if (spcoid != NULL)
116 {
117 snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
118 pathname);
119 pathname = pathbuf;
120 }
121
122 /*
123 * Each file's entry needs to be separated from any entry that follows by
124 * a comma, but there's no comma before the first one or after the last
125 * one. To make that work, adding a file to the manifest starts by
126 * terminating the most recently added line, with a comma if appropriate,
127 * but does not terminate the line inserted for this file.
128 */
129 initStringInfo(&buf);
130 if (manifest->first_file)
131 {
132 appendStringInfoChar(&buf, '\n');
133 manifest->first_file = false;
134 }
135 else
136 appendStringInfoString(&buf, ",\n");
137
138 /*
139 * Write the relative pathname to this file out to the manifest. The
140 * manifest is always stored in UTF-8, so we have to encode paths that are
141 * not valid in that encoding.
142 */
143 pathlen = strlen(pathname);
144 if (!manifest->force_encode &&
145 pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
146 {
147 appendStringInfoString(&buf, "{ \"Path\": ");
148 escape_json(&buf, pathname);
149 appendStringInfoString(&buf, ", ");
150 }
151 else
152 {
153 appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
154 enlargeStringInfo(&buf, 2 * pathlen);
155 buf.len += hex_encode(pathname, pathlen,
156 &buf.data[buf.len]);
157 appendStringInfoString(&buf, "\", ");
158 }
159
160 appendStringInfo(&buf, "\"Size\": %zu, ", size);
161
162 /*
163 * Convert last modification time to a string and append it to the
164 * manifest. Since it's not clear what time zone to use and since time
165 * zone definitions can change, possibly causing confusion, use GMT
166 * always.
167 */
168 appendStringInfoString(&buf, "\"Last-Modified\": \"");
169 enlargeStringInfo(&buf, 128);
170 buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
171 pg_gmtime(&mtime));
172 appendStringInfoChar(&buf, '"');
173
174 /* Add checksum information. */
175 if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
176 {
177 uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
178 int checksumlen;
179
180 checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
181 if (checksumlen < 0)
182 elog(ERROR, "could not finalize checksum of file \"%s\"",
183 pathname);
184
185 appendStringInfo(&buf,
186 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
187 pg_checksum_type_name(checksum_ctx->type));
188 enlargeStringInfo(&buf, 2 * checksumlen);
189 buf.len += hex_encode((char *) checksumbuf, checksumlen,
190 &buf.data[buf.len]);
191 appendStringInfoChar(&buf, '"');
192 }
193
194 /* Close out the object. */
195 appendStringInfoString(&buf, " }");
196
197 /* OK, add it to the manifest. */
198 AppendStringToManifest(manifest, buf.data);
199
200 /* Avoid leaking memory. */
201 pfree(buf.data);
202 }
203
204 /*
205 * Add information about the WAL that will need to be replayed when restoring
206 * this backup to the manifest.
207 */
208 void
AddWALInfoToBackupManifest(backup_manifest_info * manifest,XLogRecPtr startptr,TimeLineID starttli,XLogRecPtr endptr,TimeLineID endtli)209 AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
210 TimeLineID starttli, XLogRecPtr endptr,
211 TimeLineID endtli)
212 {
213 List *timelines;
214 ListCell *lc;
215 bool first_wal_range = true;
216 bool found_start_timeline = false;
217
218 if (!IsManifestEnabled(manifest))
219 return;
220
221 /* Terminate the list of files. */
222 AppendStringToManifest(manifest, "\n],\n");
223
224 /* Read the timeline history for the ending timeline. */
225 timelines = readTimeLineHistory(endtli);
226
227 /* Start a list of LSN ranges. */
228 AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
229
230 foreach(lc, timelines)
231 {
232 TimeLineHistoryEntry *entry = lfirst(lc);
233 XLogRecPtr tl_beginptr;
234
235 /*
236 * We only care about timelines that were active during the backup.
237 * Skip any that ended before the backup started. (Note that if
238 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
239 * yet ended.)
240 */
241 if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
242 continue;
243
244 /*
245 * Because the timeline history file lists newer timelines before
246 * older ones, the first timeline we encounter that is new enough to
247 * matter ought to match the ending timeline of the backup.
248 */
249 if (first_wal_range && endtli != entry->tli)
250 ereport(ERROR,
251 errmsg("expected end timeline %u but found timeline %u",
252 starttli, entry->tli));
253
254 /*
255 * If this timeline entry matches with the timeline on which the
256 * backup started, WAL needs to be checked from the start LSN of the
257 * backup. If this entry refers to a newer timeline, WAL needs to be
258 * checked since the beginning of this timeline, so use the LSN where
259 * the timeline began.
260 */
261 if (starttli == entry->tli)
262 tl_beginptr = startptr;
263 else
264 {
265 tl_beginptr = entry->begin;
266
267 /*
268 * If we reach a TLI that has no valid beginning LSN, there can't
269 * be any more timelines in the history after this point, so we'd
270 * better have arrived at the expected starting TLI. If not,
271 * something's gone horribly wrong.
272 */
273 if (XLogRecPtrIsInvalid(entry->begin))
274 ereport(ERROR,
275 errmsg("expected start timeline %u but found timeline %u",
276 starttli, entry->tli));
277 }
278
279 AppendToManifest(manifest,
280 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
281 first_wal_range ? "" : ",\n",
282 entry->tli,
283 LSN_FORMAT_ARGS(tl_beginptr),
284 LSN_FORMAT_ARGS(endptr));
285
286 if (starttli == entry->tli)
287 {
288 found_start_timeline = true;
289 break;
290 }
291
292 endptr = entry->begin;
293 first_wal_range = false;
294 }
295
296 /*
297 * The last entry in the timeline history for the ending timeline should
298 * be the ending timeline itself. Verify that this is what we observed.
299 */
300 if (!found_start_timeline)
301 ereport(ERROR,
302 errmsg("start timeline %u not found in history of timeline %u",
303 starttli, endtli));
304
305 /* Terminate the list of WAL ranges. */
306 AppendStringToManifest(manifest, "\n],\n");
307 }
308
309 /*
310 * Finalize the backup manifest, and send it to the client.
311 */
312 void
SendBackupManifest(backup_manifest_info * manifest)313 SendBackupManifest(backup_manifest_info *manifest)
314 {
315 StringInfoData protobuf;
316 uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
317 char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
318 size_t manifest_bytes_done = 0;
319
320 if (!IsManifestEnabled(manifest))
321 return;
322
323 /*
324 * Append manifest checksum, so that the problems with the manifest itself
325 * can be detected.
326 *
327 * We always use SHA-256 for this, regardless of what algorithm is chosen
328 * for checksumming the files. If we ever want to make the checksum
329 * algorithm used for the manifest file variable, the client will need a
330 * way to figure out which algorithm to use as close to the beginning of
331 * the manifest file as possible, to avoid having to read the whole thing
332 * twice.
333 */
334 manifest->still_checksumming = false;
335 if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
336 sizeof(checksumbuf)) < 0)
337 elog(ERROR, "failed to finalize checksum of backup manifest");
338 AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
339
340 hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
341 checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
342
343 AppendStringToManifest(manifest, checksumstringbuf);
344 AppendStringToManifest(manifest, "\"}\n");
345
346 /*
347 * We've written all the data to the manifest file. Rewind the file so
348 * that we can read it all back.
349 */
350 if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
351 ereport(ERROR,
352 (errcode_for_file_access(),
353 errmsg("could not rewind temporary file")));
354
355 /* Send CopyOutResponse message */
356 pq_beginmessage(&protobuf, 'H');
357 pq_sendbyte(&protobuf, 0); /* overall format */
358 pq_sendint16(&protobuf, 0); /* natts */
359 pq_endmessage(&protobuf);
360
361 /*
362 * Send CopyData messages.
363 *
364 * We choose to read back the data from the temporary file in chunks of
365 * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
366 * size, so it seems to make sense to match that value here.
367 */
368 while (manifest_bytes_done < manifest->manifest_size)
369 {
370 char manifestbuf[BLCKSZ];
371 size_t bytes_to_read;
372 size_t rc;
373
374 bytes_to_read = Min(sizeof(manifestbuf),
375 manifest->manifest_size - manifest_bytes_done);
376 rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
377 if (rc != bytes_to_read)
378 ereport(ERROR,
379 (errcode_for_file_access(),
380 errmsg("could not read from temporary file: %m")));
381 pq_putmessage('d', manifestbuf, bytes_to_read);
382 manifest_bytes_done += bytes_to_read;
383 }
384
385 /* No more data, so send CopyDone message */
386 pq_putemptymessage('c');
387
388 /* Release resources */
389 BufFileClose(manifest->buffile);
390 }
391
392 /*
393 * Append a cstring to the manifest.
394 */
395 static void
AppendStringToManifest(backup_manifest_info * manifest,char * s)396 AppendStringToManifest(backup_manifest_info *manifest, char *s)
397 {
398 int len = strlen(s);
399
400 Assert(manifest != NULL);
401 if (manifest->still_checksumming)
402 {
403 if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
404 elog(ERROR, "failed to update checksum of backup manifest");
405 }
406 BufFileWrite(manifest->buffile, s, len);
407 manifest->manifest_size += len;
408 }
409