1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  *	  code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/replication/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "libpq/libpq.h"
17 #include "libpq/pqformat.h"
18 #include "mb/pg_wchar.h"
19 #include "replication/backup_manifest.h"
20 #include "utils/builtins.h"
21 #include "utils/json.h"
22 
23 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
24 
25 /*
26  * Does the user want a backup manifest?
27  *
28  * It's simplest to always have a manifest_info object, so that we don't need
29  * checks for NULL pointers in too many places. However, if the user doesn't
30  * want a manifest, we set manifest->buffile to NULL.
31  */
32 static inline bool
IsManifestEnabled(backup_manifest_info * manifest)33 IsManifestEnabled(backup_manifest_info *manifest)
34 {
35 	return (manifest->buffile != NULL);
36 }
37 
38 /*
39  * Convenience macro for appending data to the backup manifest.
40  */
41 #define AppendToManifest(manifest, ...) \
42 	{ \
43 		char *_manifest_s = psprintf(__VA_ARGS__);	\
44 		AppendStringToManifest(manifest, _manifest_s);	\
45 		pfree(_manifest_s);	\
46 	}
47 
48 /*
49  * Initialize state so that we can construct a backup manifest.
50  *
51  * NB: Although the checksum type for the data files is configurable, the
52  * checksum for the manifest itself always uses SHA-256. See comments in
53  * SendBackupManifest.
54  */
55 void
InitializeBackupManifest(backup_manifest_info * manifest,backup_manifest_option want_manifest,pg_checksum_type manifest_checksum_type)56 InitializeBackupManifest(backup_manifest_info *manifest,
57 						 backup_manifest_option want_manifest,
58 						 pg_checksum_type manifest_checksum_type)
59 {
60 	memset(manifest, 0, sizeof(backup_manifest_info));
61 	manifest->checksum_type = manifest_checksum_type;
62 
63 	if (want_manifest == MANIFEST_OPTION_NO)
64 		manifest->buffile = NULL;
65 	else
66 	{
67 		manifest->buffile = BufFileCreateTemp(false);
68 		manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
69 		if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
70 			elog(ERROR, "failed to initialize checksum of backup manifest");
71 	}
72 
73 	manifest->manifest_size = UINT64CONST(0);
74 	manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
75 	manifest->first_file = true;
76 	manifest->still_checksumming = true;
77 
78 	if (want_manifest != MANIFEST_OPTION_NO)
79 		AppendToManifest(manifest,
80 						 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
81 						 "\"Files\": [");
82 }
83 
84 /*
85  * Free resources assigned to a backup manifest constructed.
86  */
87 void
FreeBackupManifest(backup_manifest_info * manifest)88 FreeBackupManifest(backup_manifest_info *manifest)
89 {
90 	pg_cryptohash_free(manifest->manifest_ctx);
91 	manifest->manifest_ctx = NULL;
92 }
93 
94 /*
95  * Add an entry to the backup manifest for a file.
96  */
97 void
AddFileToBackupManifest(backup_manifest_info * manifest,const char * spcoid,const char * pathname,size_t size,pg_time_t mtime,pg_checksum_context * checksum_ctx)98 AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
99 						const char *pathname, size_t size, pg_time_t mtime,
100 						pg_checksum_context *checksum_ctx)
101 {
102 	char		pathbuf[MAXPGPATH];
103 	int			pathlen;
104 	StringInfoData buf;
105 
106 	if (!IsManifestEnabled(manifest))
107 		return;
108 
109 	/*
110 	 * If this file is part of a tablespace, the pathname passed to this
111 	 * function will be relative to the tar file that contains it. We want the
112 	 * pathname relative to the data directory (ignoring the intermediate
113 	 * symlink traversal).
114 	 */
115 	if (spcoid != NULL)
116 	{
117 		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
118 				 pathname);
119 		pathname = pathbuf;
120 	}
121 
122 	/*
123 	 * Each file's entry needs to be separated from any entry that follows by
124 	 * a comma, but there's no comma before the first one or after the last
125 	 * one. To make that work, adding a file to the manifest starts by
126 	 * terminating the most recently added line, with a comma if appropriate,
127 	 * but does not terminate the line inserted for this file.
128 	 */
129 	initStringInfo(&buf);
130 	if (manifest->first_file)
131 	{
132 		appendStringInfoChar(&buf, '\n');
133 		manifest->first_file = false;
134 	}
135 	else
136 		appendStringInfoString(&buf, ",\n");
137 
138 	/*
139 	 * Write the relative pathname to this file out to the manifest. The
140 	 * manifest is always stored in UTF-8, so we have to encode paths that are
141 	 * not valid in that encoding.
142 	 */
143 	pathlen = strlen(pathname);
144 	if (!manifest->force_encode &&
145 		pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
146 	{
147 		appendStringInfoString(&buf, "{ \"Path\": ");
148 		escape_json(&buf, pathname);
149 		appendStringInfoString(&buf, ", ");
150 	}
151 	else
152 	{
153 		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
154 		enlargeStringInfo(&buf, 2 * pathlen);
155 		buf.len += hex_encode(pathname, pathlen,
156 							  &buf.data[buf.len]);
157 		appendStringInfoString(&buf, "\", ");
158 	}
159 
160 	appendStringInfo(&buf, "\"Size\": %zu, ", size);
161 
162 	/*
163 	 * Convert last modification time to a string and append it to the
164 	 * manifest. Since it's not clear what time zone to use and since time
165 	 * zone definitions can change, possibly causing confusion, use GMT
166 	 * always.
167 	 */
168 	appendStringInfoString(&buf, "\"Last-Modified\": \"");
169 	enlargeStringInfo(&buf, 128);
170 	buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
171 						   pg_gmtime(&mtime));
172 	appendStringInfoChar(&buf, '"');
173 
174 	/* Add checksum information. */
175 	if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
176 	{
177 		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
178 		int			checksumlen;
179 
180 		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
181 		if (checksumlen < 0)
182 			elog(ERROR, "could not finalize checksum of file \"%s\"",
183 				 pathname);
184 
185 		appendStringInfo(&buf,
186 						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
187 						 pg_checksum_type_name(checksum_ctx->type));
188 		enlargeStringInfo(&buf, 2 * checksumlen);
189 		buf.len += hex_encode((char *) checksumbuf, checksumlen,
190 							  &buf.data[buf.len]);
191 		appendStringInfoChar(&buf, '"');
192 	}
193 
194 	/* Close out the object. */
195 	appendStringInfoString(&buf, " }");
196 
197 	/* OK, add it to the manifest. */
198 	AppendStringToManifest(manifest, buf.data);
199 
200 	/* Avoid leaking memory. */
201 	pfree(buf.data);
202 }
203 
204 /*
205  * Add information about the WAL that will need to be replayed when restoring
206  * this backup to the manifest.
207  */
208 void
AddWALInfoToBackupManifest(backup_manifest_info * manifest,XLogRecPtr startptr,TimeLineID starttli,XLogRecPtr endptr,TimeLineID endtli)209 AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
210 						   TimeLineID starttli, XLogRecPtr endptr,
211 						   TimeLineID endtli)
212 {
213 	List	   *timelines;
214 	ListCell   *lc;
215 	bool		first_wal_range = true;
216 	bool		found_start_timeline = false;
217 
218 	if (!IsManifestEnabled(manifest))
219 		return;
220 
221 	/* Terminate the list of files. */
222 	AppendStringToManifest(manifest, "\n],\n");
223 
224 	/* Read the timeline history for the ending timeline. */
225 	timelines = readTimeLineHistory(endtli);
226 
227 	/* Start a list of LSN ranges. */
228 	AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
229 
230 	foreach(lc, timelines)
231 	{
232 		TimeLineHistoryEntry *entry = lfirst(lc);
233 		XLogRecPtr	tl_beginptr;
234 
235 		/*
236 		 * We only care about timelines that were active during the backup.
237 		 * Skip any that ended before the backup started. (Note that if
238 		 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
239 		 * yet ended.)
240 		 */
241 		if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
242 			continue;
243 
244 		/*
245 		 * Because the timeline history file lists newer timelines before
246 		 * older ones, the first timeline we encounter that is new enough to
247 		 * matter ought to match the ending timeline of the backup.
248 		 */
249 		if (first_wal_range && endtli != entry->tli)
250 			ereport(ERROR,
251 					errmsg("expected end timeline %u but found timeline %u",
252 						   starttli, entry->tli));
253 
254 		/*
255 		 * If this timeline entry matches with the timeline on which the
256 		 * backup started, WAL needs to be checked from the start LSN of the
257 		 * backup.  If this entry refers to a newer timeline, WAL needs to be
258 		 * checked since the beginning of this timeline, so use the LSN where
259 		 * the timeline began.
260 		 */
261 		if (starttli == entry->tli)
262 			tl_beginptr = startptr;
263 		else
264 		{
265 			tl_beginptr = entry->begin;
266 
267 			/*
268 			 * If we reach a TLI that has no valid beginning LSN, there can't
269 			 * be any more timelines in the history after this point, so we'd
270 			 * better have arrived at the expected starting TLI. If not,
271 			 * something's gone horribly wrong.
272 			 */
273 			if (XLogRecPtrIsInvalid(entry->begin))
274 				ereport(ERROR,
275 						errmsg("expected start timeline %u but found timeline %u",
276 							   starttli, entry->tli));
277 		}
278 
279 		AppendToManifest(manifest,
280 						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
281 						 first_wal_range ? "" : ",\n",
282 						 entry->tli,
283 						 LSN_FORMAT_ARGS(tl_beginptr),
284 						 LSN_FORMAT_ARGS(endptr));
285 
286 		if (starttli == entry->tli)
287 		{
288 			found_start_timeline = true;
289 			break;
290 		}
291 
292 		endptr = entry->begin;
293 		first_wal_range = false;
294 	}
295 
296 	/*
297 	 * The last entry in the timeline history for the ending timeline should
298 	 * be the ending timeline itself. Verify that this is what we observed.
299 	 */
300 	if (!found_start_timeline)
301 		ereport(ERROR,
302 				errmsg("start timeline %u not found in history of timeline %u",
303 					   starttli, endtli));
304 
305 	/* Terminate the list of WAL ranges. */
306 	AppendStringToManifest(manifest, "\n],\n");
307 }
308 
309 /*
310  * Finalize the backup manifest, and send it to the client.
311  */
312 void
SendBackupManifest(backup_manifest_info * manifest)313 SendBackupManifest(backup_manifest_info *manifest)
314 {
315 	StringInfoData protobuf;
316 	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
317 	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
318 	size_t		manifest_bytes_done = 0;
319 
320 	if (!IsManifestEnabled(manifest))
321 		return;
322 
323 	/*
324 	 * Append manifest checksum, so that the problems with the manifest itself
325 	 * can be detected.
326 	 *
327 	 * We always use SHA-256 for this, regardless of what algorithm is chosen
328 	 * for checksumming the files.  If we ever want to make the checksum
329 	 * algorithm used for the manifest file variable, the client will need a
330 	 * way to figure out which algorithm to use as close to the beginning of
331 	 * the manifest file as possible, to avoid having to read the whole thing
332 	 * twice.
333 	 */
334 	manifest->still_checksumming = false;
335 	if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
336 							sizeof(checksumbuf)) < 0)
337 		elog(ERROR, "failed to finalize checksum of backup manifest");
338 	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
339 
340 	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
341 	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
342 
343 	AppendStringToManifest(manifest, checksumstringbuf);
344 	AppendStringToManifest(manifest, "\"}\n");
345 
346 	/*
347 	 * We've written all the data to the manifest file.  Rewind the file so
348 	 * that we can read it all back.
349 	 */
350 	if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
351 		ereport(ERROR,
352 				(errcode_for_file_access(),
353 				 errmsg("could not rewind temporary file")));
354 
355 	/* Send CopyOutResponse message */
356 	pq_beginmessage(&protobuf, 'H');
357 	pq_sendbyte(&protobuf, 0);	/* overall format */
358 	pq_sendint16(&protobuf, 0); /* natts */
359 	pq_endmessage(&protobuf);
360 
361 	/*
362 	 * Send CopyData messages.
363 	 *
364 	 * We choose to read back the data from the temporary file in chunks of
365 	 * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
366 	 * size, so it seems to make sense to match that value here.
367 	 */
368 	while (manifest_bytes_done < manifest->manifest_size)
369 	{
370 		char		manifestbuf[BLCKSZ];
371 		size_t		bytes_to_read;
372 		size_t		rc;
373 
374 		bytes_to_read = Min(sizeof(manifestbuf),
375 							manifest->manifest_size - manifest_bytes_done);
376 		rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
377 		if (rc != bytes_to_read)
378 			ereport(ERROR,
379 					(errcode_for_file_access(),
380 					 errmsg("could not read from temporary file: %m")));
381 		pq_putmessage('d', manifestbuf, bytes_to_read);
382 		manifest_bytes_done += bytes_to_read;
383 	}
384 
385 	/* No more data, so send CopyDone message */
386 	pq_putemptymessage('c');
387 
388 	/* Release resources */
389 	BufFileClose(manifest->buffile);
390 }
391 
392 /*
393  * Append a cstring to the manifest.
394  */
395 static void
AppendStringToManifest(backup_manifest_info * manifest,char * s)396 AppendStringToManifest(backup_manifest_info *manifest, char *s)
397 {
398 	int			len = strlen(s);
399 
400 	Assert(manifest != NULL);
401 	if (manifest->still_checksumming)
402 	{
403 		if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
404 			elog(ERROR, "failed to update checksum of backup manifest");
405 	}
406 	BufFileWrite(manifest->buffile, s, len);
407 	manifest->manifest_size += len;
408 }
409