1 /*-------------------------------------------------------------------------
2  *
3  * libpq_fetch.c
4  *	  Functions for fetching files from a remote server.
5  *
6  * Copyright (c) 2013-2018, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <dirent.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 
17 #include "pg_rewind.h"
18 #include "datapagemap.h"
19 #include "fetch.h"
20 #include "file_ops.h"
21 #include "filemap.h"
22 #include "logging.h"
23 
24 #include "libpq-fe.h"
25 #include "catalog/pg_type_d.h"
26 #include "fe_utils/connect.h"
27 #include "port/pg_bswap.h"
28 
29 static PGconn *conn = NULL;
30 
31 /*
32  * Files are fetched max CHUNKSIZE bytes at a time.
33  *
34  * (This only applies to files that are copied in whole, or for truncated
35  * files where we copy the tail. Relation files, where we know the individual
36  * blocks that need to be fetched, are fetched in BLCKSZ chunks.)
37  */
38 #define CHUNKSIZE 1000000
39 
40 static void receiveFileChunks(const char *sql);
41 static void execute_pagemap(datapagemap_t *pagemap, const char *path);
42 static char *run_simple_query(const char *sql);
43 static void run_simple_command(const char *sql);
44 
45 void
libpqConnect(const char * connstr)46 libpqConnect(const char *connstr)
47 {
48 	char	   *str;
49 	PGresult   *res;
50 
51 	conn = PQconnectdb(connstr);
52 	if (PQstatus(conn) == CONNECTION_BAD)
53 		pg_fatal("%s", PQerrorMessage(conn));
54 
55 	pg_log(PG_PROGRESS, "connected to server\n");
56 
57 	/* disable all types of timeouts */
58 	run_simple_command("SET statement_timeout = 0");
59 	run_simple_command("SET lock_timeout = 0");
60 	run_simple_command("SET idle_in_transaction_session_timeout = 0");
61 
62 	res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL);
63 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
64 		pg_fatal("could not clear search_path: %s",
65 				 PQresultErrorMessage(res));
66 	PQclear(res);
67 
68 	/*
69 	 * Check that the server is not in hot standby mode. There is no
70 	 * fundamental reason that couldn't be made to work, but it doesn't
71 	 * currently because we use a temporary table. Better to check for it
72 	 * explicitly than error out, for a better error message.
73 	 */
74 	str = run_simple_query("SELECT pg_is_in_recovery()");
75 	if (strcmp(str, "f") != 0)
76 		pg_fatal("source server must not be in recovery mode\n");
77 	pg_free(str);
78 
79 	/*
80 	 * Also check that full_page_writes is enabled.  We can get torn pages if
81 	 * a page is modified while we read it with pg_read_binary_file(), and we
82 	 * rely on full page images to fix them.
83 	 */
84 	str = run_simple_query("SHOW full_page_writes");
85 	if (strcmp(str, "on") != 0)
86 		pg_fatal("full_page_writes must be enabled in the source server\n");
87 	pg_free(str);
88 
89 	/*
90 	 * Although we don't do any "real" updates, we do work with a temporary
91 	 * table. We don't care about synchronous commit for that. It doesn't
92 	 * otherwise matter much, but if the server is using synchronous
93 	 * replication, and replication isn't working for some reason, we don't
94 	 * want to get stuck, waiting for it to start working again.
95 	 */
96 	run_simple_command("SET synchronous_commit = off");
97 }
98 
99 /*
100  * Runs a query that returns a single value.
101  * The result should be pg_free'd after use.
102  */
103 static char *
run_simple_query(const char * sql)104 run_simple_query(const char *sql)
105 {
106 	PGresult   *res;
107 	char	   *result;
108 
109 	res = PQexec(conn, sql);
110 
111 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
112 		pg_fatal("error running query (%s) in source server: %s",
113 				 sql, PQresultErrorMessage(res));
114 
115 	/* sanity check the result set */
116 	if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
117 		pg_fatal("unexpected result set from query\n");
118 
119 	result = pg_strdup(PQgetvalue(res, 0, 0));
120 
121 	PQclear(res);
122 
123 	return result;
124 }
125 
126 /*
127  * Runs a command.
128  * In the event of a failure, exit immediately.
129  */
130 static void
run_simple_command(const char * sql)131 run_simple_command(const char *sql)
132 {
133 	PGresult   *res;
134 
135 	res = PQexec(conn, sql);
136 
137 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
138 		pg_fatal("error running query (%s) in source server: %s",
139 				 sql, PQresultErrorMessage(res));
140 
141 	PQclear(res);
142 }
143 
144 /*
145  * Calls pg_current_wal_insert_lsn() function
146  */
147 XLogRecPtr
libpqGetCurrentXlogInsertLocation(void)148 libpqGetCurrentXlogInsertLocation(void)
149 {
150 	XLogRecPtr	result;
151 	uint32		hi;
152 	uint32		lo;
153 	char	   *val;
154 
155 	val = run_simple_query("SELECT pg_current_wal_insert_lsn()");
156 
157 	if (sscanf(val, "%X/%X", &hi, &lo) != 2)
158 		pg_fatal("unrecognized result \"%s\" for current WAL insert location\n", val);
159 
160 	result = ((uint64) hi) << 32 | lo;
161 
162 	pg_free(val);
163 
164 	return result;
165 }
166 
167 /*
168  * Get a list of all files in the data directory.
169  */
170 void
libpqProcessFileList(void)171 libpqProcessFileList(void)
172 {
173 	PGresult   *res;
174 	const char *sql;
175 	int			i;
176 
177 	/*
178 	 * Create a recursive directory listing of the whole data directory.
179 	 *
180 	 * The WITH RECURSIVE part does most of the work. The second part gets the
181 	 * targets of the symlinks in pg_tblspc directory.
182 	 *
183 	 * XXX: There is no backend function to get a symbolic link's target in
184 	 * general, so if the admin has put any custom symbolic links in the data
185 	 * directory, they won't be copied correctly.
186 	 */
187 	sql =
188 		"WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
189 		"  SELECT '' AS path, filename, size, isdir FROM\n"
190 		"  (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
191 		"        pg_stat_file(fn.filename, true) AS this\n"
192 		"  UNION ALL\n"
193 		"  SELECT parent.path || parent.filename || '/' AS path,\n"
194 		"         fn, this.size, this.isdir\n"
195 		"  FROM files AS parent,\n"
196 		"       pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
197 		"       pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
198 		"       WHERE parent.isdir = 't'\n"
199 		")\n"
200 		"SELECT path || filename, size, isdir,\n"
201 		"       pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
202 		"FROM files\n"
203 		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
204 		"                             AND oid::text = files.filename\n";
205 	res = PQexec(conn, sql);
206 
207 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
208 		pg_fatal("could not fetch file list: %s",
209 				 PQresultErrorMessage(res));
210 
211 	/* sanity check the result set */
212 	if (PQnfields(res) != 4)
213 		pg_fatal("unexpected result set while fetching file list\n");
214 
215 	/* Read result to local variables */
216 	for (i = 0; i < PQntuples(res); i++)
217 	{
218 		char	   *path;
219 		int64		filesize;
220 		bool		isdir;
221 		char	   *link_target;
222 		file_type_t type;
223 
224 		if (PQgetisnull(res, i, 1))
225 		{
226 			/*
227 			 * The file was removed from the server while the query was
228 			 * running. Ignore it.
229 			 */
230 			continue;
231 		}
232 
233 		path = PQgetvalue(res, i, 0);
234 		filesize = atol(PQgetvalue(res, i, 1));
235 		isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
236 		link_target = PQgetvalue(res, i, 3);
237 
238 		if (link_target[0])
239 			type = FILE_TYPE_SYMLINK;
240 		else if (isdir)
241 			type = FILE_TYPE_DIRECTORY;
242 		else
243 			type = FILE_TYPE_REGULAR;
244 
245 		process_source_file(path, type, filesize, link_target);
246 	}
247 	PQclear(res);
248 }
249 
250 /*----
251  * Runs a query, which returns pieces of files from the remote source data
252  * directory, and overwrites the corresponding parts of target files with
253  * the received parts. The result set is expected to be of format:
254  *
255  * path		text	-- path in the data directory, e.g "base/1/123"
256  * begin	int8	-- offset within the file
257  * chunk	bytea	-- file content
258  *----
259  */
260 static void
receiveFileChunks(const char * sql)261 receiveFileChunks(const char *sql)
262 {
263 	PGresult   *res;
264 
265 	if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1)
266 		pg_fatal("could not send query: %s", PQerrorMessage(conn));
267 
268 	pg_log(PG_DEBUG, "getting file chunks\n");
269 
270 	if (PQsetSingleRowMode(conn) != 1)
271 		pg_fatal("could not set libpq connection to single row mode\n");
272 
273 	while ((res = PQgetResult(conn)) != NULL)
274 	{
275 		char	   *filename;
276 		int			filenamelen;
277 		int64		chunkoff;
278 		char		chunkoff_str[32];
279 		int			chunksize;
280 		char	   *chunk;
281 
282 		switch (PQresultStatus(res))
283 		{
284 			case PGRES_SINGLE_TUPLE:
285 				break;
286 
287 			case PGRES_TUPLES_OK:
288 				PQclear(res);
289 				continue;		/* final zero-row result */
290 
291 			default:
292 				pg_fatal("unexpected result while fetching remote files: %s",
293 						 PQresultErrorMessage(res));
294 		}
295 
296 		/* sanity check the result set */
297 		if (PQnfields(res) != 3 || PQntuples(res) != 1)
298 			pg_fatal("unexpected result set size while fetching remote files\n");
299 
300 		if (PQftype(res, 0) != TEXTOID ||
301 			PQftype(res, 1) != INT8OID ||
302 			PQftype(res, 2) != BYTEAOID)
303 		{
304 			pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u\n",
305 					 PQftype(res, 0), PQftype(res, 1), PQftype(res, 2));
306 		}
307 
308 		if (PQfformat(res, 0) != 1 &&
309 			PQfformat(res, 1) != 1 &&
310 			PQfformat(res, 2) != 1)
311 		{
312 			pg_fatal("unexpected result format while fetching remote files\n");
313 		}
314 
315 		if (PQgetisnull(res, 0, 0) ||
316 			PQgetisnull(res, 0, 1))
317 		{
318 			pg_fatal("unexpected null values in result while fetching remote files\n");
319 		}
320 
321 		if (PQgetlength(res, 0, 1) != sizeof(int64))
322 			pg_fatal("unexpected result length while fetching remote files\n");
323 
324 		/* Read result set to local variables */
325 		memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64));
326 		chunkoff = pg_ntoh64(chunkoff);
327 		chunksize = PQgetlength(res, 0, 2);
328 
329 		filenamelen = PQgetlength(res, 0, 0);
330 		filename = pg_malloc(filenamelen + 1);
331 		memcpy(filename, PQgetvalue(res, 0, 0), filenamelen);
332 		filename[filenamelen] = '\0';
333 
334 		chunk = PQgetvalue(res, 0, 2);
335 
336 		/*
337 		 * If a file has been deleted on the source, remove it on the target
338 		 * as well.  Note that multiple unlink() calls may happen on the same
339 		 * file if multiple data chunks are associated with it, hence ignore
340 		 * unconditionally anything missing.  If this file is not a relation
341 		 * data file, then it has been already truncated when creating the
342 		 * file chunk list at the previous execution of the filemap.
343 		 */
344 		if (PQgetisnull(res, 0, 2))
345 		{
346 			pg_log(PG_DEBUG,
347 				   "received null value for chunk for file \"%s\", file has been deleted\n",
348 				   filename);
349 			remove_target_file(filename, true);
350 			pg_free(filename);
351 			PQclear(res);
352 			continue;
353 		}
354 
355 		/*
356 		 * Separate step to keep platform-dependent format code out of
357 		 * translatable strings.
358 		 */
359 		snprintf(chunkoff_str, sizeof(chunkoff_str), INT64_FORMAT, chunkoff);
360 		pg_log(PG_DEBUG, "received chunk for file \"%s\", offset %s, size %d\n",
361 			   filename, chunkoff_str, chunksize);
362 
363 		open_target_file(filename, false);
364 
365 		write_target_range(chunk, chunkoff, chunksize);
366 
367 		pg_free(filename);
368 
369 		PQclear(res);
370 	}
371 }
372 
373 /*
374  * Receive a single file as a malloc'd buffer.
375  */
376 char *
libpqGetFile(const char * filename,size_t * filesize)377 libpqGetFile(const char *filename, size_t *filesize)
378 {
379 	PGresult   *res;
380 	char	   *result;
381 	int			len;
382 	const char *paramValues[1];
383 
384 	paramValues[0] = filename;
385 	res = PQexecParams(conn, "SELECT pg_read_binary_file($1)",
386 					   1, NULL, paramValues, NULL, NULL, 1);
387 
388 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
389 		pg_fatal("could not fetch remote file \"%s\": %s",
390 				 filename, PQresultErrorMessage(res));
391 
392 	/* sanity check the result set */
393 	if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
394 		pg_fatal("unexpected result set while fetching remote file \"%s\"\n",
395 				 filename);
396 
397 	/* Read result to local variables */
398 	len = PQgetlength(res, 0, 0);
399 	result = pg_malloc(len + 1);
400 	memcpy(result, PQgetvalue(res, 0, 0), len);
401 	result[len] = '\0';
402 
403 	PQclear(res);
404 
405 	pg_log(PG_DEBUG, "fetched file \"%s\", length %d\n", filename, len);
406 
407 	if (filesize)
408 		*filesize = len;
409 	return result;
410 }
411 
412 /*
413  * Write a file range to a temporary table in the server.
414  *
415  * The range is sent to the server as a COPY formatted line, to be inserted
416  * into the 'fetchchunks' temporary table. It is used in receiveFileChunks()
417  * function to actually fetch the data.
418  */
419 static void
fetch_file_range(const char * path,uint64 begin,uint64 end)420 fetch_file_range(const char *path, uint64 begin, uint64 end)
421 {
422 	char		linebuf[MAXPGPATH + 23];
423 
424 	/* Split the range into CHUNKSIZE chunks */
425 	while (end - begin > 0)
426 	{
427 		unsigned int len;
428 
429 		/* Fine as long as CHUNKSIZE is not bigger than UINT32_MAX */
430 		if (end - begin > CHUNKSIZE)
431 			len = CHUNKSIZE;
432 		else
433 			len = (unsigned int) (end - begin);
434 
435 		snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\n", path, begin, len);
436 
437 		if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1)
438 			pg_fatal("could not send COPY data: %s",
439 					 PQerrorMessage(conn));
440 
441 		begin += len;
442 	}
443 }
444 
445 /*
446  * Fetch all changed blocks from remote source data directory.
447  */
448 void
libpq_executeFileMap(filemap_t * map)449 libpq_executeFileMap(filemap_t *map)
450 {
451 	file_entry_t *entry;
452 	const char *sql;
453 	PGresult   *res;
454 	int			i;
455 
456 	/*
457 	 * First create a temporary table, and load it with the blocks that we
458 	 * need to fetch.
459 	 */
460 	sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4);";
461 	run_simple_command(sql);
462 
463 	sql = "COPY fetchchunks FROM STDIN";
464 	res = PQexec(conn, sql);
465 
466 	if (PQresultStatus(res) != PGRES_COPY_IN)
467 		pg_fatal("could not send file list: %s",
468 				 PQresultErrorMessage(res));
469 	PQclear(res);
470 
471 	for (i = 0; i < map->narray; i++)
472 	{
473 		entry = map->array[i];
474 
475 		/* If this is a relation file, copy the modified blocks */
476 		execute_pagemap(&entry->pagemap, entry->path);
477 
478 		switch (entry->action)
479 		{
480 			case FILE_ACTION_NONE:
481 				/* nothing else to do */
482 				break;
483 
484 			case FILE_ACTION_COPY:
485 				/* Truncate the old file out of the way, if any */
486 				open_target_file(entry->path, true);
487 				fetch_file_range(entry->path, 0, entry->newsize);
488 				break;
489 
490 			case FILE_ACTION_TRUNCATE:
491 				truncate_target_file(entry->path, entry->newsize);
492 				break;
493 
494 			case FILE_ACTION_COPY_TAIL:
495 				fetch_file_range(entry->path, entry->oldsize, entry->newsize);
496 				break;
497 
498 			case FILE_ACTION_REMOVE:
499 				remove_target(entry);
500 				break;
501 
502 			case FILE_ACTION_CREATE:
503 				create_target(entry);
504 				break;
505 		}
506 	}
507 
508 	if (PQputCopyEnd(conn, NULL) != 1)
509 		pg_fatal("could not send end-of-COPY: %s",
510 				 PQerrorMessage(conn));
511 
512 	while ((res = PQgetResult(conn)) != NULL)
513 	{
514 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
515 			pg_fatal("unexpected result while sending file list: %s",
516 					 PQresultErrorMessage(res));
517 		PQclear(res);
518 	}
519 
520 	/*
521 	 * We've now copied the list of file ranges that we need to fetch to the
522 	 * temporary table. Now, actually fetch all of those ranges.
523 	 */
524 	sql =
525 		"SELECT path, begin,\n"
526 		"  pg_read_binary_file(path, begin, len, true) AS chunk\n"
527 		"FROM fetchchunks\n";
528 
529 	receiveFileChunks(sql);
530 }
531 
532 static void
execute_pagemap(datapagemap_t * pagemap,const char * path)533 execute_pagemap(datapagemap_t *pagemap, const char *path)
534 {
535 	datapagemap_iterator_t *iter;
536 	BlockNumber blkno;
537 	off_t		offset;
538 
539 	iter = datapagemap_iterate(pagemap);
540 	while (datapagemap_next(iter, &blkno))
541 	{
542 		offset = blkno * BLCKSZ;
543 
544 		fetch_file_range(path, offset, offset + BLCKSZ);
545 	}
546 	pg_free(iter);
547 }
548