1 /*-------------------------------------------------------------------------
2  *
3  * backup_manifest.c
4  *	  code for generating and sending a backup manifest
5  *
6  * Portions Copyright (c) 2010-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/replication/backup_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/timeline.h"
16 #include "libpq/libpq.h"
17 #include "libpq/pqformat.h"
18 #include "mb/pg_wchar.h"
19 #include "replication/backup_manifest.h"
20 #include "utils/builtins.h"
21 #include "utils/json.h"
22 
23 static void AppendStringToManifest(backup_manifest_info *manifest, char *s);
24 
25 /*
26  * Does the user want a backup manifest?
27  *
28  * It's simplest to always have a manifest_info object, so that we don't need
29  * checks for NULL pointers in too many places. However, if the user doesn't
30  * want a manifest, we set manifest->buffile to NULL.
31  */
32 static inline bool
IsManifestEnabled(backup_manifest_info * manifest)33 IsManifestEnabled(backup_manifest_info *manifest)
34 {
35 	return (manifest->buffile != NULL);
36 }
37 
38 /*
39  * Convenience macro for appending data to the backup manifest.
40  */
41 #define AppendToManifest(manifest, ...) \
42 	{ \
43 		char *_manifest_s = psprintf(__VA_ARGS__);	\
44 		AppendStringToManifest(manifest, _manifest_s);	\
45 		pfree(_manifest_s);	\
46 	}
47 
48 /*
49  * Initialize state so that we can construct a backup manifest.
50  *
51  * NB: Although the checksum type for the data files is configurable, the
52  * checksum for the manifest itself always uses SHA-256. See comments in
53  * SendBackupManifest.
54  */
55 void
InitializeBackupManifest(backup_manifest_info * manifest,backup_manifest_option want_manifest,pg_checksum_type manifest_checksum_type)56 InitializeBackupManifest(backup_manifest_info *manifest,
57 						 backup_manifest_option want_manifest,
58 						 pg_checksum_type manifest_checksum_type)
59 {
60 	memset(manifest, 0, sizeof(backup_manifest_info));
61 	manifest->checksum_type = manifest_checksum_type;
62 
63 	if (want_manifest == MANIFEST_OPTION_NO)
64 		manifest->buffile = NULL;
65 	else
66 	{
67 		manifest->buffile = BufFileCreateTemp(false);
68 		pg_sha256_init(&manifest->manifest_ctx);
69 	}
70 
71 	manifest->manifest_size = UINT64CONST(0);
72 	manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
73 	manifest->first_file = true;
74 	manifest->still_checksumming = true;
75 
76 	if (want_manifest != MANIFEST_OPTION_NO)
77 		AppendToManifest(manifest,
78 						 "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
79 						 "\"Files\": [");
80 }
81 
82 /*
83  * Add an entry to the backup manifest for a file.
84  */
85 void
AddFileToBackupManifest(backup_manifest_info * manifest,const char * spcoid,const char * pathname,size_t size,pg_time_t mtime,pg_checksum_context * checksum_ctx)86 AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
87 						const char *pathname, size_t size, pg_time_t mtime,
88 						pg_checksum_context *checksum_ctx)
89 {
90 	char		pathbuf[MAXPGPATH];
91 	int			pathlen;
92 	StringInfoData buf;
93 
94 	if (!IsManifestEnabled(manifest))
95 		return;
96 
97 	/*
98 	 * If this file is part of a tablespace, the pathname passed to this
99 	 * function will be relative to the tar file that contains it. We want the
100 	 * pathname relative to the data directory (ignoring the intermediate
101 	 * symlink traversal).
102 	 */
103 	if (spcoid != NULL)
104 	{
105 		snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
106 				 pathname);
107 		pathname = pathbuf;
108 	}
109 
110 	/*
111 	 * Each file's entry needs to be separated from any entry that follows by
112 	 * a comma, but there's no comma before the first one or after the last
113 	 * one. To make that work, adding a file to the manifest starts by
114 	 * terminating the most recently added line, with a comma if appropriate,
115 	 * but does not terminate the line inserted for this file.
116 	 */
117 	initStringInfo(&buf);
118 	if (manifest->first_file)
119 	{
120 		appendStringInfoString(&buf, "\n");
121 		manifest->first_file = false;
122 	}
123 	else
124 		appendStringInfoString(&buf, ",\n");
125 
126 	/*
127 	 * Write the relative pathname to this file out to the manifest. The
128 	 * manifest is always stored in UTF-8, so we have to encode paths that are
129 	 * not valid in that encoding.
130 	 */
131 	pathlen = strlen(pathname);
132 	if (!manifest->force_encode &&
133 		pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
134 	{
135 		appendStringInfoString(&buf, "{ \"Path\": ");
136 		escape_json(&buf, pathname);
137 		appendStringInfoString(&buf, ", ");
138 	}
139 	else
140 	{
141 		appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
142 		enlargeStringInfo(&buf, 2 * pathlen);
143 		buf.len += hex_encode(pathname, pathlen,
144 							  &buf.data[buf.len]);
145 		appendStringInfoString(&buf, "\", ");
146 	}
147 
148 	appendStringInfo(&buf, "\"Size\": %zu, ", size);
149 
150 	/*
151 	 * Convert last modification time to a string and append it to the
152 	 * manifest. Since it's not clear what time zone to use and since time
153 	 * zone definitions can change, possibly causing confusion, use GMT
154 	 * always.
155 	 */
156 	appendStringInfoString(&buf, "\"Last-Modified\": \"");
157 	enlargeStringInfo(&buf, 128);
158 	buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
159 						   pg_gmtime(&mtime));
160 	appendStringInfoString(&buf, "\"");
161 
162 	/* Add checksum information. */
163 	if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
164 	{
165 		uint8		checksumbuf[PG_CHECKSUM_MAX_LENGTH];
166 		int			checksumlen;
167 
168 		checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
169 
170 		appendStringInfo(&buf,
171 						 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
172 						 pg_checksum_type_name(checksum_ctx->type));
173 		enlargeStringInfo(&buf, 2 * checksumlen);
174 		buf.len += hex_encode((char *) checksumbuf, checksumlen,
175 							  &buf.data[buf.len]);
176 		appendStringInfoString(&buf, "\"");
177 	}
178 
179 	/* Close out the object. */
180 	appendStringInfoString(&buf, " }");
181 
182 	/* OK, add it to the manifest. */
183 	AppendStringToManifest(manifest, buf.data);
184 
185 	/* Avoid leaking memory. */
186 	pfree(buf.data);
187 }
188 
189 /*
190  * Add information about the WAL that will need to be replayed when restoring
191  * this backup to the manifest.
192  */
193 void
AddWALInfoToBackupManifest(backup_manifest_info * manifest,XLogRecPtr startptr,TimeLineID starttli,XLogRecPtr endptr,TimeLineID endtli)194 AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
195 						   TimeLineID starttli, XLogRecPtr endptr,
196 						   TimeLineID endtli)
197 {
198 	List	   *timelines;
199 	ListCell   *lc;
200 	bool		first_wal_range = true;
201 	bool		found_start_timeline = false;
202 
203 	if (!IsManifestEnabled(manifest))
204 		return;
205 
206 	/* Terminate the list of files. */
207 	AppendStringToManifest(manifest, "\n],\n");
208 
209 	/* Read the timeline history for the ending timeline. */
210 	timelines = readTimeLineHistory(endtli);
211 
212 	/* Start a list of LSN ranges. */
213 	AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
214 
215 	foreach(lc, timelines)
216 	{
217 		TimeLineHistoryEntry *entry = lfirst(lc);
218 		XLogRecPtr	tl_beginptr;
219 
220 		/*
221 		 * We only care about timelines that were active during the backup.
222 		 * Skip any that ended before the backup started. (Note that if
223 		 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
224 		 * yet ended.)
225 		 */
226 		if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
227 			continue;
228 
229 		/*
230 		 * Because the timeline history file lists newer timelines before
231 		 * older ones, the first timeline we encounter that is new enough to
232 		 * matter ought to match the ending timeline of the backup.
233 		 */
234 		if (first_wal_range && endtli != entry->tli)
235 			ereport(ERROR,
236 					errmsg("expected end timeline %u but found timeline %u",
237 						   starttli, entry->tli));
238 
239 		/*
240 		 * If this timeline entry matches with the timeline on which the
241 		 * backup started, WAL needs to be checked from the start LSN of the
242 		 * backup.  If this entry refers to a newer timeline, WAL needs to be
243 		 * checked since the beginning of this timeline, so use the LSN where
244 		 * the timeline began.
245 		 */
246 		if (starttli == entry->tli)
247 			tl_beginptr = startptr;
248 		else
249 		{
250 			tl_beginptr = entry->begin;
251 
252 			/*
253 			 * If we reach a TLI that has no valid beginning LSN, there can't
254 			 * be any more timelines in the history after this point, so we'd
255 			 * better have arrived at the expected starting TLI. If not,
256 			 * something's gone horribly wrong.
257 			 */
258 			if (XLogRecPtrIsInvalid(entry->begin))
259 				ereport(ERROR,
260 						errmsg("expected start timeline %u but found timeline %u",
261 							   starttli, entry->tli));
262 		}
263 
264 		AppendToManifest(manifest,
265 						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
266 						 first_wal_range ? "" : ",\n",
267 						 entry->tli,
268 						 (uint32) (tl_beginptr >> 32), (uint32) tl_beginptr,
269 						 (uint32) (endptr >> 32), (uint32) endptr);
270 
271 		if (starttli == entry->tli)
272 		{
273 			found_start_timeline = true;
274 			break;
275 		}
276 
277 		endptr = entry->begin;
278 		first_wal_range = false;
279 	}
280 
281 	/*
282 	 * The last entry in the timeline history for the ending timeline should
283 	 * be the ending timeline itself. Verify that this is what we observed.
284 	 */
285 	if (!found_start_timeline)
286 		ereport(ERROR,
287 				errmsg("start timeline %u not found in history of timeline %u",
288 					   starttli, endtli));
289 
290 	/* Terminate the list of WAL ranges. */
291 	AppendStringToManifest(manifest, "\n],\n");
292 }
293 
294 /*
295  * Finalize the backup manifest, and send it to the client.
296  */
297 void
SendBackupManifest(backup_manifest_info * manifest)298 SendBackupManifest(backup_manifest_info *manifest)
299 {
300 	StringInfoData protobuf;
301 	uint8		checksumbuf[PG_SHA256_DIGEST_LENGTH];
302 	char		checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
303 	size_t		manifest_bytes_done = 0;
304 
305 	if (!IsManifestEnabled(manifest))
306 		return;
307 
308 	/*
309 	 * Append manifest checksum, so that the problems with the manifest itself
310 	 * can be detected.
311 	 *
312 	 * We always use SHA-256 for this, regardless of what algorithm is chosen
313 	 * for checksumming the files.  If we ever want to make the checksum
314 	 * algorithm used for the manifest file variable, the client will need a
315 	 * way to figure out which algorithm to use as close to the beginning of
316 	 * the manifest file as possible, to avoid having to read the whole thing
317 	 * twice.
318 	 */
319 	manifest->still_checksumming = false;
320 	pg_sha256_final(&manifest->manifest_ctx, checksumbuf);
321 	AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
322 	hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
323 	checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
324 	AppendStringToManifest(manifest, checksumstringbuf);
325 	AppendStringToManifest(manifest, "\"}\n");
326 
327 	/*
328 	 * We've written all the data to the manifest file.  Rewind the file so
329 	 * that we can read it all back.
330 	 */
331 	if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET))
332 		ereport(ERROR,
333 				(errcode_for_file_access(),
334 				 errmsg("could not rewind temporary file")));
335 
336 	/* Send CopyOutResponse message */
337 	pq_beginmessage(&protobuf, 'H');
338 	pq_sendbyte(&protobuf, 0);	/* overall format */
339 	pq_sendint16(&protobuf, 0); /* natts */
340 	pq_endmessage(&protobuf);
341 
342 	/*
343 	 * Send CopyData messages.
344 	 *
345 	 * We choose to read back the data from the temporary file in chunks of
346 	 * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O
347 	 * size, so it seems to make sense to match that value here.
348 	 */
349 	while (manifest_bytes_done < manifest->manifest_size)
350 	{
351 		char		manifestbuf[BLCKSZ];
352 		size_t		bytes_to_read;
353 		size_t		rc;
354 
355 		bytes_to_read = Min(sizeof(manifestbuf),
356 							manifest->manifest_size - manifest_bytes_done);
357 		rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read);
358 		if (rc != bytes_to_read)
359 			ereport(ERROR,
360 					(errcode_for_file_access(),
361 					 errmsg("could not read from temporary file: %m")));
362 		pq_putmessage('d', manifestbuf, bytes_to_read);
363 		manifest_bytes_done += bytes_to_read;
364 	}
365 
366 	/* No more data, so send CopyDone message */
367 	pq_putemptymessage('c');
368 
369 	/* Release resources */
370 	BufFileClose(manifest->buffile);
371 }
372 
373 /*
374  * Append a cstring to the manifest.
375  */
376 static void
AppendStringToManifest(backup_manifest_info * manifest,char * s)377 AppendStringToManifest(backup_manifest_info *manifest, char *s)
378 {
379 	int			len = strlen(s);
380 
381 	Assert(manifest != NULL);
382 	if (manifest->still_checksumming)
383 		pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len);
384 	BufFileWrite(manifest->buffile, s, len);
385 	manifest->manifest_size += len;
386 }
387