1 /*-------------------------------------------------------------------------
2  *
3  * connection.c
4  *		  Connection management functions for postgres_fdw
5  *
6  * Portions Copyright (c) 2012-2016, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  contrib/postgres_fdw/connection.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "postgres_fdw.h"
16 
17 #include "access/htup_details.h"
18 #include "catalog/pg_user_mapping.h"
19 #include "access/xact.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 #include "storage/latch.h"
23 #include "utils/hsearch.h"
24 #include "utils/inval.h"
25 #include "utils/memutils.h"
26 #include "utils/syscache.h"
27 
28 
29 /*
30  * Connection cache hash table entry
31  *
32  * The lookup key in this hash table is the user mapping OID. We use just one
33  * connection per user mapping ID, which ensures that all the scans use the
34  * same snapshot during a query.  Using the user mapping OID rather than
35  * the foreign server OID + user OID avoids creating multiple connections when
36  * the public user mapping applies to all user OIDs.
37  *
38  * The "conn" pointer can be NULL if we don't currently have a live connection.
39  * When we do have a connection, xact_depth tracks the current depth of
40  * transactions and subtransactions open on the remote side.  We need to issue
41  * commands at the same nesting depth on the remote as we're executing at
42  * ourselves, so that rolling back a subtransaction will kill the right
43  * queries and not the wrong ones.
44  */
45 typedef Oid ConnCacheKey;
46 
47 typedef struct ConnCacheEntry
48 {
49 	ConnCacheKey key;			/* hash key (must be first) */
50 	PGconn	   *conn;			/* connection to foreign server, or NULL */
51 	/* Remaining fields are invalid when conn is NULL: */
52 	int			xact_depth;		/* 0 = no xact open, 1 = main xact open, 2 =
53 								 * one level of subxact open, etc */
54 	bool		have_prep_stmt; /* have we prepared any stmts in this xact? */
55 	bool		have_error;		/* have any subxacts aborted in this xact? */
56 	bool		changing_xact_state;	/* xact state change in process */
57 	bool		invalidated;	/* true if reconnect is pending */
58 	uint32		server_hashvalue;	/* hash value of foreign server OID */
59 	uint32		mapping_hashvalue;	/* hash value of user mapping OID */
60 } ConnCacheEntry;
61 
62 /*
63  * Connection cache (initialized on first use)
64  */
65 static HTAB *ConnectionHash = NULL;
66 
67 /* for assigning cursor numbers and prepared statement numbers */
68 static unsigned int cursor_number = 0;
69 static unsigned int prep_stmt_number = 0;
70 
71 /* tracks whether any work is needed in callback functions */
72 static bool xact_got_connection = false;
73 
74 /* prototypes of private functions */
75 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
76 static void disconnect_pg_server(ConnCacheEntry *entry);
77 static void check_conn_params(const char **keywords, const char **values);
78 static void configure_remote_session(PGconn *conn);
79 static void do_sql_command(PGconn *conn, const char *sql);
80 static void begin_remote_xact(ConnCacheEntry *entry);
81 static void pgfdw_xact_callback(XactEvent event, void *arg);
82 static void pgfdw_subxact_callback(SubXactEvent event,
83 					   SubTransactionId mySubid,
84 					   SubTransactionId parentSubid,
85 					   void *arg);
86 static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
87 static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
88 static bool pgfdw_cancel_query(PGconn *conn);
89 static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
90 						 bool ignore_errors);
91 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
92 						 PGresult **result);
93 
94 
95 /*
96  * Get a PGconn which can be used to execute queries on the remote PostgreSQL
97  * server with the user's authorization.  A new connection is established
98  * if we don't already have a suitable one, and a transaction is opened at
99  * the right subtransaction nesting depth if we didn't do that already.
100  *
101  * will_prep_stmt must be true if caller intends to create any prepared
102  * statements.  Since those don't go away automatically at transaction end
103  * (not even on error), we need this flag to cue manual cleanup.
104  */
105 PGconn *
GetConnection(UserMapping * user,bool will_prep_stmt)106 GetConnection(UserMapping *user, bool will_prep_stmt)
107 {
108 	bool		found;
109 	ConnCacheEntry *entry;
110 	ConnCacheKey key;
111 
112 	/* First time through, initialize connection cache hashtable */
113 	if (ConnectionHash == NULL)
114 	{
115 		HASHCTL		ctl;
116 
117 		MemSet(&ctl, 0, sizeof(ctl));
118 		ctl.keysize = sizeof(ConnCacheKey);
119 		ctl.entrysize = sizeof(ConnCacheEntry);
120 		/* allocate ConnectionHash in the cache context */
121 		ctl.hcxt = CacheMemoryContext;
122 		ConnectionHash = hash_create("postgres_fdw connections", 8,
123 									 &ctl,
124 									 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
125 
126 		/*
127 		 * Register some callback functions that manage connection cleanup.
128 		 * This should be done just once in each backend.
129 		 */
130 		RegisterXactCallback(pgfdw_xact_callback, NULL);
131 		RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
132 		CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
133 									  pgfdw_inval_callback, (Datum) 0);
134 		CacheRegisterSyscacheCallback(USERMAPPINGOID,
135 									  pgfdw_inval_callback, (Datum) 0);
136 	}
137 
138 	/* Set flag that we did GetConnection during the current transaction */
139 	xact_got_connection = true;
140 
141 	/* Create hash key for the entry.  Assume no pad bytes in key struct */
142 	key = user->umid;
143 
144 	/*
145 	 * Find or create cached entry for requested connection.
146 	 */
147 	entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
148 	if (!found)
149 	{
150 		/*
151 		 * We need only clear "conn" here; remaining fields will be filled
152 		 * later when "conn" is set.
153 		 */
154 		entry->conn = NULL;
155 	}
156 
157 	/* Reject further use of connections which failed abort cleanup. */
158 	pgfdw_reject_incomplete_xact_state_change(entry);
159 
160 	/*
161 	 * If the connection needs to be remade due to invalidation, disconnect as
162 	 * soon as we're out of all transactions.
163 	 */
164 	if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
165 	{
166 		elog(DEBUG3, "closing connection %p for option changes to take effect",
167 			 entry->conn);
168 		disconnect_pg_server(entry);
169 	}
170 
171 	/*
172 	 * We don't check the health of cached connection here, because it would
173 	 * require some overhead.  Broken connection will be detected when the
174 	 * connection is actually used.
175 	 */
176 
177 	/*
178 	 * If cache entry doesn't have a connection, we have to establish a new
179 	 * connection.  (If connect_pg_server throws an error, the cache entry
180 	 * will remain in a valid empty state, ie conn == NULL.)
181 	 */
182 	if (entry->conn == NULL)
183 	{
184 		ForeignServer *server = GetForeignServer(user->serverid);
185 
186 		/* Reset all transient state fields, to be sure all are clean */
187 		entry->xact_depth = 0;
188 		entry->have_prep_stmt = false;
189 		entry->have_error = false;
190 		entry->changing_xact_state = false;
191 		entry->invalidated = false;
192 		entry->server_hashvalue =
193 			GetSysCacheHashValue1(FOREIGNSERVEROID,
194 								  ObjectIdGetDatum(server->serverid));
195 		entry->mapping_hashvalue =
196 			GetSysCacheHashValue1(USERMAPPINGOID,
197 								  ObjectIdGetDatum(user->umid));
198 
199 		/* Now try to make the connection */
200 		entry->conn = connect_pg_server(server, user);
201 
202 		elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
203 			 entry->conn, server->servername, user->umid, user->userid);
204 	}
205 
206 	/*
207 	 * Start a new transaction or subtransaction if needed.
208 	 */
209 	begin_remote_xact(entry);
210 
211 	/* Remember if caller will prepare statements */
212 	entry->have_prep_stmt |= will_prep_stmt;
213 
214 	return entry->conn;
215 }
216 
217 /*
218  * Connect to remote server using specified server and user mapping properties.
219  */
220 static PGconn *
connect_pg_server(ForeignServer * server,UserMapping * user)221 connect_pg_server(ForeignServer *server, UserMapping *user)
222 {
223 	PGconn	   *volatile conn = NULL;
224 
225 	/*
226 	 * Use PG_TRY block to ensure closing connection on error.
227 	 */
228 	PG_TRY();
229 	{
230 		const char **keywords;
231 		const char **values;
232 		int			n;
233 
234 		/*
235 		 * Construct connection params from generic options of ForeignServer
236 		 * and UserMapping.  (Some of them might not be libpq options, in
237 		 * which case we'll just waste a few array slots.)  Add 3 extra slots
238 		 * for fallback_application_name, client_encoding, end marker.
239 		 */
240 		n = list_length(server->options) + list_length(user->options) + 3;
241 		keywords = (const char **) palloc(n * sizeof(char *));
242 		values = (const char **) palloc(n * sizeof(char *));
243 
244 		n = 0;
245 		n += ExtractConnectionOptions(server->options,
246 									  keywords + n, values + n);
247 		n += ExtractConnectionOptions(user->options,
248 									  keywords + n, values + n);
249 
250 		/* Use "postgres_fdw" as fallback_application_name. */
251 		keywords[n] = "fallback_application_name";
252 		values[n] = "postgres_fdw";
253 		n++;
254 
255 		/* Set client_encoding so that libpq can convert encoding properly. */
256 		keywords[n] = "client_encoding";
257 		values[n] = GetDatabaseEncodingName();
258 		n++;
259 
260 		keywords[n] = values[n] = NULL;
261 
262 		/* verify connection parameters and make connection */
263 		check_conn_params(keywords, values);
264 
265 		conn = PQconnectdbParams(keywords, values, false);
266 		if (!conn || PQstatus(conn) != CONNECTION_OK)
267 		{
268 			char	   *connmessage;
269 			int			msglen;
270 
271 			/* libpq typically appends a newline, strip that */
272 			connmessage = pstrdup(PQerrorMessage(conn));
273 			msglen = strlen(connmessage);
274 			if (msglen > 0 && connmessage[msglen - 1] == '\n')
275 				connmessage[msglen - 1] = '\0';
276 			ereport(ERROR,
277 			   (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
278 				errmsg("could not connect to server \"%s\"",
279 					   server->servername),
280 				errdetail_internal("%s", connmessage)));
281 		}
282 
283 		/*
284 		 * Check that non-superuser has used password to establish connection;
285 		 * otherwise, he's piggybacking on the postgres server's user
286 		 * identity. See also dblink_security_check() in contrib/dblink.
287 		 */
288 		if (!superuser() && !PQconnectionUsedPassword(conn))
289 			ereport(ERROR,
290 				  (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
291 				   errmsg("password is required"),
292 				   errdetail("Non-superuser cannot connect if the server does not request a password."),
293 				   errhint("Target server's authentication method must be changed.")));
294 
295 		/* Prepare new session for use */
296 		configure_remote_session(conn);
297 
298 		pfree(keywords);
299 		pfree(values);
300 	}
301 	PG_CATCH();
302 	{
303 		/* Release PGconn data structure if we managed to create one */
304 		if (conn)
305 			PQfinish(conn);
306 		PG_RE_THROW();
307 	}
308 	PG_END_TRY();
309 
310 	return conn;
311 }
312 
313 /*
314  * Disconnect any open connection for a connection cache entry.
315  */
316 static void
disconnect_pg_server(ConnCacheEntry * entry)317 disconnect_pg_server(ConnCacheEntry *entry)
318 {
319 	if (entry->conn != NULL)
320 	{
321 		PQfinish(entry->conn);
322 		entry->conn = NULL;
323 	}
324 }
325 
326 /*
327  * For non-superusers, insist that the connstr specify a password.  This
328  * prevents a password from being picked up from .pgpass, a service file,
329  * the environment, etc.  We don't want the postgres user's passwords
330  * to be accessible to non-superusers.  (See also dblink_connstr_check in
331  * contrib/dblink.)
332  */
333 static void
check_conn_params(const char ** keywords,const char ** values)334 check_conn_params(const char **keywords, const char **values)
335 {
336 	int			i;
337 
338 	/* no check required if superuser */
339 	if (superuser())
340 		return;
341 
342 	/* ok if params contain a non-empty password */
343 	for (i = 0; keywords[i] != NULL; i++)
344 	{
345 		if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
346 			return;
347 	}
348 
349 	ereport(ERROR,
350 			(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
351 			 errmsg("password is required"),
352 			 errdetail("Non-superusers must provide a password in the user mapping.")));
353 }
354 
355 /*
356  * Issue SET commands to make sure remote session is configured properly.
357  *
358  * We do this just once at connection, assuming nothing will change the
359  * values later.  Since we'll never send volatile function calls to the
360  * remote, there shouldn't be any way to break this assumption from our end.
361  * It's possible to think of ways to break it at the remote end, eg making
362  * a foreign table point to a view that includes a set_config call ---
363  * but once you admit the possibility of a malicious view definition,
364  * there are any number of ways to break things.
365  */
366 static void
configure_remote_session(PGconn * conn)367 configure_remote_session(PGconn *conn)
368 {
369 	int			remoteversion = PQserverVersion(conn);
370 
371 	/* Force the search path to contain only pg_catalog (see deparse.c) */
372 	do_sql_command(conn, "SET search_path = pg_catalog");
373 
374 	/*
375 	 * Set remote timezone; this is basically just cosmetic, since all
376 	 * transmitted and returned timestamptzs should specify a zone explicitly
377 	 * anyway.  However it makes the regression test outputs more predictable.
378 	 *
379 	 * We don't risk setting remote zone equal to ours, since the remote
380 	 * server might use a different timezone database.  Instead, use UTC
381 	 * (quoted, because very old servers are picky about case).
382 	 */
383 	do_sql_command(conn, "SET timezone = 'UTC'");
384 
385 	/*
386 	 * Set values needed to ensure unambiguous data output from remote.  (This
387 	 * logic should match what pg_dump does.  See also set_transmission_modes
388 	 * in postgres_fdw.c.)
389 	 */
390 	do_sql_command(conn, "SET datestyle = ISO");
391 	if (remoteversion >= 80400)
392 		do_sql_command(conn, "SET intervalstyle = postgres");
393 	if (remoteversion >= 90000)
394 		do_sql_command(conn, "SET extra_float_digits = 3");
395 	else
396 		do_sql_command(conn, "SET extra_float_digits = 2");
397 }
398 
399 /*
400  * Convenience subroutine to issue a non-data-returning SQL command to remote
401  */
402 static void
do_sql_command(PGconn * conn,const char * sql)403 do_sql_command(PGconn *conn, const char *sql)
404 {
405 	PGresult   *res;
406 
407 	if (!PQsendQuery(conn, sql))
408 		pgfdw_report_error(ERROR, NULL, conn, false, sql);
409 	res = pgfdw_get_result(conn, sql);
410 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
411 		pgfdw_report_error(ERROR, res, conn, true, sql);
412 	PQclear(res);
413 }
414 
415 /*
416  * Start remote transaction or subtransaction, if needed.
417  *
418  * Note that we always use at least REPEATABLE READ in the remote session.
419  * This is so that, if a query initiates multiple scans of the same or
420  * different foreign tables, we will get snapshot-consistent results from
421  * those scans.  A disadvantage is that we can't provide sane emulation of
422  * READ COMMITTED behavior --- it would be nice if we had some other way to
423  * control which remote queries share a snapshot.
424  */
425 static void
begin_remote_xact(ConnCacheEntry * entry)426 begin_remote_xact(ConnCacheEntry *entry)
427 {
428 	int			curlevel = GetCurrentTransactionNestLevel();
429 
430 	/* Start main transaction if we haven't yet */
431 	if (entry->xact_depth <= 0)
432 	{
433 		const char *sql;
434 
435 		elog(DEBUG3, "starting remote transaction on connection %p",
436 			 entry->conn);
437 
438 		if (IsolationIsSerializable())
439 			sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
440 		else
441 			sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
442 		entry->changing_xact_state = true;
443 		do_sql_command(entry->conn, sql);
444 		entry->xact_depth = 1;
445 		entry->changing_xact_state = false;
446 	}
447 
448 	/*
449 	 * If we're in a subtransaction, stack up savepoints to match our level.
450 	 * This ensures we can rollback just the desired effects when a
451 	 * subtransaction aborts.
452 	 */
453 	while (entry->xact_depth < curlevel)
454 	{
455 		char		sql[64];
456 
457 		snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
458 		entry->changing_xact_state = true;
459 		do_sql_command(entry->conn, sql);
460 		entry->xact_depth++;
461 		entry->changing_xact_state = false;
462 	}
463 }
464 
465 /*
466  * Release connection reference count created by calling GetConnection.
467  */
468 void
ReleaseConnection(PGconn * conn)469 ReleaseConnection(PGconn *conn)
470 {
471 	/*
472 	 * Currently, we don't actually track connection references because all
473 	 * cleanup is managed on a transaction or subtransaction basis instead. So
474 	 * there's nothing to do here.
475 	 */
476 }
477 
478 /*
479  * Assign a "unique" number for a cursor.
480  *
481  * These really only need to be unique per connection within a transaction.
482  * For the moment we ignore the per-connection point and assign them across
483  * all connections in the transaction, but we ask for the connection to be
484  * supplied in case we want to refine that.
485  *
486  * Note that even if wraparound happens in a very long transaction, actual
487  * collisions are highly improbable; just be sure to use %u not %d to print.
488  */
489 unsigned int
GetCursorNumber(PGconn * conn)490 GetCursorNumber(PGconn *conn)
491 {
492 	return ++cursor_number;
493 }
494 
495 /*
496  * Assign a "unique" number for a prepared statement.
497  *
498  * This works much like GetCursorNumber, except that we never reset the counter
499  * within a session.  That's because we can't be 100% sure we've gotten rid
500  * of all prepared statements on all connections, and it's not really worth
501  * increasing the risk of prepared-statement name collisions by resetting.
502  */
503 unsigned int
GetPrepStmtNumber(PGconn * conn)504 GetPrepStmtNumber(PGconn *conn)
505 {
506 	return ++prep_stmt_number;
507 }
508 
509 /*
510  * Submit a query and wait for the result.
511  *
512  * This function is interruptible by signals.
513  *
514  * Caller is responsible for the error handling on the result.
515  */
516 PGresult *
pgfdw_exec_query(PGconn * conn,const char * query)517 pgfdw_exec_query(PGconn *conn, const char *query)
518 {
519 	/*
520 	 * Submit a query.  Since we don't use non-blocking mode, this also can
521 	 * block.  But its risk is relatively small, so we ignore that for now.
522 	 */
523 	if (!PQsendQuery(conn, query))
524 		pgfdw_report_error(ERROR, NULL, conn, false, query);
525 
526 	/* Wait for the result. */
527 	return pgfdw_get_result(conn, query);
528 }
529 
530 /*
531  * Wait for the result from a prior asynchronous execution function call.
532  *
533  * This function offers quick responsiveness by checking for any interruptions.
534  *
535  * This function emulates PQexec()'s behavior of returning the last result
536  * when there are many.
537  *
538  * Caller is responsible for the error handling on the result.
539  */
540 PGresult *
pgfdw_get_result(PGconn * conn,const char * query)541 pgfdw_get_result(PGconn *conn, const char *query)
542 {
543 	PGresult   *volatile last_res = NULL;
544 
545 	/* In what follows, do not leak any PGresults on an error. */
546 	PG_TRY();
547 	{
548 		for (;;)
549 		{
550 			PGresult   *res;
551 
552 			while (PQisBusy(conn))
553 			{
554 				int			wc;
555 
556 				/* Sleep until there's something to do */
557 				wc = WaitLatchOrSocket(MyLatch,
558 									   WL_LATCH_SET | WL_SOCKET_READABLE,
559 									   PQsocket(conn),
560 									   -1L);
561 				ResetLatch(MyLatch);
562 
563 				CHECK_FOR_INTERRUPTS();
564 
565 				/* Data available in socket? */
566 				if (wc & WL_SOCKET_READABLE)
567 				{
568 					if (!PQconsumeInput(conn))
569 						pgfdw_report_error(ERROR, NULL, conn, false, query);
570 				}
571 			}
572 
573 			res = PQgetResult(conn);
574 			if (res == NULL)
575 				break;			/* query is complete */
576 
577 			PQclear(last_res);
578 			last_res = res;
579 		}
580 	}
581 	PG_CATCH();
582 	{
583 		PQclear(last_res);
584 		PG_RE_THROW();
585 	}
586 	PG_END_TRY();
587 
588 	return last_res;
589 }
590 
591 /*
592  * Report an error we got from the remote server.
593  *
594  * elevel: error level to use (typically ERROR, but might be less)
595  * res: PGresult containing the error
596  * conn: connection we did the query on
597  * clear: if true, PQclear the result (otherwise caller will handle it)
598  * sql: NULL, or text of remote command we tried to execute
599  *
600  * Note: callers that choose not to throw ERROR for a remote error are
601  * responsible for making sure that the associated ConnCacheEntry gets
602  * marked with have_error = true.
603  */
604 void
pgfdw_report_error(int elevel,PGresult * res,PGconn * conn,bool clear,const char * sql)605 pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
606 				   bool clear, const char *sql)
607 {
608 	/* If requested, PGresult must be released before leaving this function. */
609 	PG_TRY();
610 	{
611 		char	   *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
612 		char	   *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
613 		char	   *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
614 		char	   *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
615 		char	   *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
616 		int			sqlstate;
617 
618 		if (diag_sqlstate)
619 			sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
620 									 diag_sqlstate[1],
621 									 diag_sqlstate[2],
622 									 diag_sqlstate[3],
623 									 diag_sqlstate[4]);
624 		else
625 			sqlstate = ERRCODE_CONNECTION_FAILURE;
626 
627 		/*
628 		 * If we don't get a message from the PGresult, try the PGconn.  This
629 		 * is needed because for connection-level failures, PQexec may just
630 		 * return NULL, not a PGresult at all.
631 		 */
632 		if (message_primary == NULL)
633 			message_primary = PQerrorMessage(conn);
634 
635 		ereport(elevel,
636 				(errcode(sqlstate),
637 				 message_primary ? errmsg_internal("%s", message_primary) :
638 				 errmsg("could not obtain message string for remote error"),
639 			   message_detail ? errdetail_internal("%s", message_detail) : 0,
640 				 message_hint ? errhint("%s", message_hint) : 0,
641 				 message_context ? errcontext("%s", message_context) : 0,
642 				 sql ? errcontext("Remote SQL command: %s", sql) : 0));
643 	}
644 	PG_CATCH();
645 	{
646 		if (clear)
647 			PQclear(res);
648 		PG_RE_THROW();
649 	}
650 	PG_END_TRY();
651 	if (clear)
652 		PQclear(res);
653 }
654 
655 /*
656  * pgfdw_xact_callback --- cleanup at main-transaction end.
657  *
658  * This runs just late enough that it must not enter user-defined code
659  * locally.  (Entering such code on the remote side is fine.  Its remote
660  * COMMIT TRANSACTION may run deferred triggers.)
661  */
662 static void
pgfdw_xact_callback(XactEvent event,void * arg)663 pgfdw_xact_callback(XactEvent event, void *arg)
664 {
665 	HASH_SEQ_STATUS scan;
666 	ConnCacheEntry *entry;
667 
668 	/* Quick exit if no connections were touched in this transaction. */
669 	if (!xact_got_connection)
670 		return;
671 
672 	/*
673 	 * Scan all connection cache entries to find open remote transactions, and
674 	 * close them.
675 	 */
676 	hash_seq_init(&scan, ConnectionHash);
677 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
678 	{
679 		PGresult   *res;
680 
681 		/* Ignore cache entry if no open connection right now */
682 		if (entry->conn == NULL)
683 			continue;
684 
685 		/* If it has an open remote transaction, try to close it */
686 		if (entry->xact_depth > 0)
687 		{
688 			bool		abort_cleanup_failure = false;
689 
690 			elog(DEBUG3, "closing remote transaction on connection %p",
691 				 entry->conn);
692 
693 			switch (event)
694 			{
695 				case XACT_EVENT_PARALLEL_PRE_COMMIT:
696 				case XACT_EVENT_PRE_COMMIT:
697 
698 					/*
699 					 * If abort cleanup previously failed for this connection,
700 					 * we can't issue any more commands against it.
701 					 */
702 					pgfdw_reject_incomplete_xact_state_change(entry);
703 
704 					/* Commit all remote transactions during pre-commit */
705 					entry->changing_xact_state = true;
706 					do_sql_command(entry->conn, "COMMIT TRANSACTION");
707 					entry->changing_xact_state = false;
708 
709 					/*
710 					 * If there were any errors in subtransactions, and we
711 					 * made prepared statements, do a DEALLOCATE ALL to make
712 					 * sure we get rid of all prepared statements. This is
713 					 * annoying and not terribly bulletproof, but it's
714 					 * probably not worth trying harder.
715 					 *
716 					 * DEALLOCATE ALL only exists in 8.3 and later, so this
717 					 * constrains how old a server postgres_fdw can
718 					 * communicate with.  We intentionally ignore errors in
719 					 * the DEALLOCATE, so that we can hobble along to some
720 					 * extent with older servers (leaking prepared statements
721 					 * as we go; but we don't really support update operations
722 					 * pre-8.3 anyway).
723 					 */
724 					if (entry->have_prep_stmt && entry->have_error)
725 					{
726 						res = PQexec(entry->conn, "DEALLOCATE ALL");
727 						PQclear(res);
728 					}
729 					entry->have_prep_stmt = false;
730 					entry->have_error = false;
731 					break;
732 				case XACT_EVENT_PRE_PREPARE:
733 
734 					/*
735 					 * We disallow any remote transactions, since it's not
736 					 * very reasonable to hold them open until the prepared
737 					 * transaction is committed.  For the moment, throw error
738 					 * unconditionally; later we might allow read-only cases.
739 					 * Note that the error will cause us to come right back
740 					 * here with event == XACT_EVENT_ABORT, so we'll clean up
741 					 * the connection state at that point.
742 					 */
743 					ereport(ERROR,
744 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
745 							 errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
746 					break;
747 				case XACT_EVENT_PARALLEL_COMMIT:
748 				case XACT_EVENT_COMMIT:
749 				case XACT_EVENT_PREPARE:
750 					/* Pre-commit should have closed the open transaction */
751 					elog(ERROR, "missed cleaning up connection during pre-commit");
752 					break;
753 				case XACT_EVENT_PARALLEL_ABORT:
754 				case XACT_EVENT_ABORT:
755 
756 					/*
757 					 * Don't try to clean up the connection if we're already
758 					 * in error recursion trouble.
759 					 */
760 					if (in_error_recursion_trouble())
761 						entry->changing_xact_state = true;
762 
763 					/*
764 					 * If connection is already unsalvageable, don't touch it
765 					 * further.
766 					 */
767 					if (entry->changing_xact_state)
768 						break;
769 
770 					/*
771 					 * Mark this connection as in the process of changing
772 					 * transaction state.
773 					 */
774 					entry->changing_xact_state = true;
775 
776 					/* Assume we might have lost track of prepared statements */
777 					entry->have_error = true;
778 
779 					/*
780 					 * If a command has been submitted to the remote server by
781 					 * using an asynchronous execution function, the command
782 					 * might not have yet completed.  Check to see if a
783 					 * command is still being processed by the remote server,
784 					 * and if so, request cancellation of the command.
785 					 */
786 					if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
787 						!pgfdw_cancel_query(entry->conn))
788 					{
789 						/* Unable to cancel running query. */
790 						abort_cleanup_failure = true;
791 					}
792 					else if (!pgfdw_exec_cleanup_query(entry->conn,
793 													   "ABORT TRANSACTION",
794 													   false))
795 					{
796 						/* Unable to abort remote transaction. */
797 						abort_cleanup_failure = true;
798 					}
799 					else if (entry->have_prep_stmt && entry->have_error &&
800 							 !pgfdw_exec_cleanup_query(entry->conn,
801 													   "DEALLOCATE ALL",
802 													   true))
803 					{
804 						/* Trouble clearing prepared statements. */
805 						abort_cleanup_failure = true;
806 					}
807 					else
808 					{
809 						entry->have_prep_stmt = false;
810 						entry->have_error = false;
811 					}
812 
813 					/* Disarm changing_xact_state if it all worked. */
814 					entry->changing_xact_state = abort_cleanup_failure;
815 					break;
816 			}
817 		}
818 
819 		/* Reset state to show we're out of a transaction */
820 		entry->xact_depth = 0;
821 
822 		/*
823 		 * If the connection isn't in a good idle state or it is marked as
824 		 * invalid, then discard it to recover. Next GetConnection will open a
825 		 * new connection.
826 		 */
827 		if (PQstatus(entry->conn) != CONNECTION_OK ||
828 			PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
829 			entry->changing_xact_state ||
830 			entry->invalidated)
831 		{
832 			elog(DEBUG3, "discarding connection %p", entry->conn);
833 			disconnect_pg_server(entry);
834 		}
835 	}
836 
837 	/*
838 	 * Regardless of the event type, we can now mark ourselves as out of the
839 	 * transaction.  (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
840 	 * this saves a useless scan of the hashtable during COMMIT or PREPARE.)
841 	 */
842 	xact_got_connection = false;
843 
844 	/* Also reset cursor numbering for next transaction */
845 	cursor_number = 0;
846 }
847 
848 /*
849  * pgfdw_subxact_callback --- cleanup at subtransaction end.
850  */
851 static void
pgfdw_subxact_callback(SubXactEvent event,SubTransactionId mySubid,SubTransactionId parentSubid,void * arg)852 pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
853 					   SubTransactionId parentSubid, void *arg)
854 {
855 	HASH_SEQ_STATUS scan;
856 	ConnCacheEntry *entry;
857 	int			curlevel;
858 
859 	/* Nothing to do at subxact start, nor after commit. */
860 	if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
861 		  event == SUBXACT_EVENT_ABORT_SUB))
862 		return;
863 
864 	/* Quick exit if no connections were touched in this transaction. */
865 	if (!xact_got_connection)
866 		return;
867 
868 	/*
869 	 * Scan all connection cache entries to find open remote subtransactions
870 	 * of the current level, and close them.
871 	 */
872 	curlevel = GetCurrentTransactionNestLevel();
873 	hash_seq_init(&scan, ConnectionHash);
874 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
875 	{
876 		char		sql[100];
877 
878 		/*
879 		 * We only care about connections with open remote subtransactions of
880 		 * the current level.
881 		 */
882 		if (entry->conn == NULL || entry->xact_depth < curlevel)
883 			continue;
884 
885 		if (entry->xact_depth > curlevel)
886 			elog(ERROR, "missed cleaning up remote subtransaction at level %d",
887 				 entry->xact_depth);
888 
889 		if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
890 		{
891 			/*
892 			 * If abort cleanup previously failed for this connection, we
893 			 * can't issue any more commands against it.
894 			 */
895 			pgfdw_reject_incomplete_xact_state_change(entry);
896 
897 			/* Commit all remote subtransactions during pre-commit */
898 			snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
899 			entry->changing_xact_state = true;
900 			do_sql_command(entry->conn, sql);
901 			entry->changing_xact_state = false;
902 		}
903 		else if (in_error_recursion_trouble())
904 		{
905 			/*
906 			 * Don't try to clean up the connection if we're already in error
907 			 * recursion trouble.
908 			 */
909 			entry->changing_xact_state = true;
910 		}
911 		else if (!entry->changing_xact_state)
912 		{
913 			bool		abort_cleanup_failure = false;
914 
915 			/* Remember that abort cleanup is in progress. */
916 			entry->changing_xact_state = true;
917 
918 			/* Assume we might have lost track of prepared statements */
919 			entry->have_error = true;
920 
921 			/*
922 			 * If a command has been submitted to the remote server by using
923 			 * an asynchronous execution function, the command might not have
924 			 * yet completed.  Check to see if a command is still being
925 			 * processed by the remote server, and if so, request cancellation
926 			 * of the command.
927 			 */
928 			if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
929 				!pgfdw_cancel_query(entry->conn))
930 				abort_cleanup_failure = true;
931 			else
932 			{
933 				/* Rollback all remote subtransactions during abort */
934 				snprintf(sql, sizeof(sql),
935 						 "ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d",
936 						 curlevel, curlevel);
937 				if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
938 					abort_cleanup_failure = true;
939 			}
940 
941 			/* Disarm changing_xact_state if it all worked. */
942 			entry->changing_xact_state = abort_cleanup_failure;
943 		}
944 
945 		/* OK, we're outta that level of subtransaction */
946 		entry->xact_depth--;
947 	}
948 }
949 
950 /*
951  * Connection invalidation callback function
952  *
953  * After a change to a pg_foreign_server or pg_user_mapping catalog entry,
954  * close connections depending on that entry immediately if current transaction
955  * has not used those connections yet. Otherwise, mark those connections as
956  * invalid and then make pgfdw_xact_callback() close them at the end of current
957  * transaction, since they cannot be closed in the midst of the transaction
958  * using them. Closed connections will be remade at the next opportunity if
959  * necessary.
960  *
961  * Although most cache invalidation callbacks blow away all the related stuff
962  * regardless of the given hashvalue, connections are expensive enough that
963  * it's worth trying to avoid that.
964  *
965  * NB: We could avoid unnecessary disconnection more strictly by examining
966  * individual option values, but it seems too much effort for the gain.
967  */
968 static void
pgfdw_inval_callback(Datum arg,int cacheid,uint32 hashvalue)969 pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
970 {
971 	HASH_SEQ_STATUS scan;
972 	ConnCacheEntry *entry;
973 
974 	Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
975 
976 	/* ConnectionHash must exist already, if we're registered */
977 	hash_seq_init(&scan, ConnectionHash);
978 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
979 	{
980 		/* Ignore invalid entries */
981 		if (entry->conn == NULL)
982 			continue;
983 
984 		/* hashvalue == 0 means a cache reset, must clear all state */
985 		if (hashvalue == 0 ||
986 			(cacheid == FOREIGNSERVEROID &&
987 			 entry->server_hashvalue == hashvalue) ||
988 			(cacheid == USERMAPPINGOID &&
989 			 entry->mapping_hashvalue == hashvalue))
990 		{
991 			/*
992 			 * Close the connection immediately if it's not used yet in this
993 			 * transaction. Otherwise mark it as invalid so that
994 			 * pgfdw_xact_callback() can close it at the end of this
995 			 * transaction.
996 			 */
997 			if (entry->xact_depth == 0)
998 			{
999 				elog(DEBUG3, "discarding connection %p", entry->conn);
1000 				disconnect_pg_server(entry);
1001 			}
1002 			else
1003 				entry->invalidated = true;
1004 		}
1005 	}
1006 }
1007 
1008 /*
1009  * Raise an error if the given connection cache entry is marked as being
1010  * in the middle of an xact state change.  This should be called at which no
1011  * such change is expected to be in progress; if one is found to be in
1012  * progress, it means that we aborted in the middle of a previous state change
1013  * and now don't know what the remote transaction state actually is.
1014  * Such connections can't safely be further used.  Re-establishing the
1015  * connection would change the snapshot and roll back any writes already
1016  * performed, so that's not an option, either. Thus, we must abort.
1017  */
1018 static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry * entry)1019 pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
1020 {
1021 	HeapTuple	tup;
1022 	Form_pg_user_mapping umform;
1023 	ForeignServer *server;
1024 
1025 	/* nothing to do for inactive entries and entries of sane state */
1026 	if (entry->conn == NULL || !entry->changing_xact_state)
1027 		return;
1028 
1029 	/* make sure this entry is inactive */
1030 	disconnect_pg_server(entry);
1031 
1032 	/* find server name to be shown in the message below */
1033 	tup = SearchSysCache1(USERMAPPINGOID,
1034 						  ObjectIdGetDatum(entry->key));
1035 	if (!HeapTupleIsValid(tup))
1036 		elog(ERROR, "cache lookup failed for user mapping %u", entry->key);
1037 	umform = (Form_pg_user_mapping) GETSTRUCT(tup);
1038 	server = GetForeignServer(umform->umserver);
1039 	ReleaseSysCache(tup);
1040 
1041 	ereport(ERROR,
1042 			(errcode(ERRCODE_CONNECTION_EXCEPTION),
1043 			 errmsg("connection to server \"%s\" was lost",
1044 					server->servername)));
1045 }
1046 
1047 /*
1048  * Cancel the currently-in-progress query (whose query text we do not have)
1049  * and ignore the result.  Returns true if we successfully cancel the query
1050  * and discard any pending result, and false if not.
1051  *
1052  * It's not a huge problem if we throw an ERROR here, but if we get into error
1053  * recursion trouble, we'll end up slamming the connection shut, which will
1054  * necessitate failing the entire toplevel transaction even if subtransactions
1055  * were used.  Try to use WARNING where we can.
1056  */
1057 static bool
pgfdw_cancel_query(PGconn * conn)1058 pgfdw_cancel_query(PGconn *conn)
1059 {
1060 	PGcancel   *cancel;
1061 	char		errbuf[256];
1062 	PGresult   *result = NULL;
1063 	TimestampTz endtime;
1064 
1065 	/*
1066 	 * If it takes too long to cancel the query and discard the result, assume
1067 	 * the connection is dead.
1068 	 */
1069 	endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1070 
1071 	/*
1072 	 * Issue cancel request.  Unfortunately, there's no good way to limit the
1073 	 * amount of time that we might block inside PQgetCancel().
1074 	 */
1075 	if ((cancel = PQgetCancel(conn)))
1076 	{
1077 		if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
1078 		{
1079 			ereport(WARNING,
1080 					(errcode(ERRCODE_CONNECTION_FAILURE),
1081 					 errmsg("could not send cancel request: %s",
1082 							errbuf)));
1083 			PQfreeCancel(cancel);
1084 			return false;
1085 		}
1086 		PQfreeCancel(cancel);
1087 	}
1088 
1089 	/* Get and discard the result of the query. */
1090 	if (pgfdw_get_cleanup_result(conn, endtime, &result))
1091 		return false;
1092 	PQclear(result);
1093 
1094 	return true;
1095 }
1096 
1097 /*
1098  * Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
1099  * result.  If the query is executed without error, the return value is true.
1100  * If the query is executed successfully but returns an error, the return
1101  * value is true if and only if ignore_errors is set.  If the query can't be
1102  * sent or times out, the return value is false.
1103  *
1104  * It's not a huge problem if we throw an ERROR here, but if we get into error
1105  * recursion trouble, we'll end up slamming the connection shut, which will
1106  * necessitate failing the entire toplevel transaction even if subtransactions
1107  * were used.  Try to use WARNING where we can.
1108  */
1109 static bool
pgfdw_exec_cleanup_query(PGconn * conn,const char * query,bool ignore_errors)1110 pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
1111 {
1112 	PGresult   *result = NULL;
1113 	TimestampTz endtime;
1114 
1115 	/*
1116 	 * If it takes too long to execute a cleanup query, assume the connection
1117 	 * is dead.  It's fairly likely that this is why we aborted in the first
1118 	 * place (e.g. statement timeout, user cancel), so the timeout shouldn't
1119 	 * be too long.
1120 	 */
1121 	endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1122 
1123 	/*
1124 	 * Submit a query.  Since we don't use non-blocking mode, this also can
1125 	 * block.  But its risk is relatively small, so we ignore that for now.
1126 	 */
1127 	if (!PQsendQuery(conn, query))
1128 	{
1129 		pgfdw_report_error(WARNING, NULL, conn, false, query);
1130 		return false;
1131 	}
1132 
1133 	/* Get the result of the query. */
1134 	if (pgfdw_get_cleanup_result(conn, endtime, &result))
1135 		return false;
1136 
1137 	/* Issue a warning if not successful. */
1138 	if (PQresultStatus(result) != PGRES_COMMAND_OK)
1139 	{
1140 		pgfdw_report_error(WARNING, result, conn, true, query);
1141 		return ignore_errors;
1142 	}
1143 	PQclear(result);
1144 
1145 	return true;
1146 }
1147 
1148 /*
1149  * Get, during abort cleanup, the result of a query that is in progress.  This
1150  * might be a query that is being interrupted by transaction abort, or it might
1151  * be a query that was initiated as part of transaction abort to get the remote
1152  * side back to the appropriate state.
1153  *
1154  * endtime is the time at which we should give up and assume the remote
1155  * side is dead.  Returns true if the timeout expired, otherwise false.
1156  * Sets *result except in case of a timeout.
1157  */
1158 static bool
pgfdw_get_cleanup_result(PGconn * conn,TimestampTz endtime,PGresult ** result)1159 pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
1160 {
1161 	volatile bool timed_out = false;
1162 	PGresult   *volatile last_res = NULL;
1163 
1164 	/* In what follows, do not leak any PGresults on an error. */
1165 	PG_TRY();
1166 	{
1167 		for (;;)
1168 		{
1169 			PGresult   *res;
1170 
1171 			while (PQisBusy(conn))
1172 			{
1173 				int			wc;
1174 				TimestampTz now = GetCurrentTimestamp();
1175 				long		cur_timeout;
1176 
1177 				/* If timeout has expired, give up, else get sleep time. */
1178 				cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1179 				if (cur_timeout <= 0)
1180 				{
1181 					timed_out = true;
1182 					goto exit;
1183 				}
1184 
1185 				/* Sleep until there's something to do */
1186 				wc = WaitLatchOrSocket(MyLatch,
1187 							  WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT,
1188 									   PQsocket(conn),
1189 									   cur_timeout);
1190 				ResetLatch(MyLatch);
1191 
1192 				CHECK_FOR_INTERRUPTS();
1193 
1194 				/* Data available in socket? */
1195 				if (wc & WL_SOCKET_READABLE)
1196 				{
1197 					if (!PQconsumeInput(conn))
1198 					{
1199 						/* connection trouble; treat the same as a timeout */
1200 						timed_out = true;
1201 						goto exit;
1202 					}
1203 				}
1204 			}
1205 
1206 			res = PQgetResult(conn);
1207 			if (res == NULL)
1208 				break;			/* query is complete */
1209 
1210 			PQclear(last_res);
1211 			last_res = res;
1212 		}
1213 exit:	;
1214 	}
1215 	PG_CATCH();
1216 	{
1217 		PQclear(last_res);
1218 		PG_RE_THROW();
1219 	}
1220 	PG_END_TRY();
1221 
1222 	if (timed_out)
1223 		PQclear(last_res);
1224 	else
1225 		*result = last_res;
1226 	return timed_out;
1227 }
1228