1 /*-------------------------------------------------------------------------
2  *
3  * connection.c
4  *		  Connection management functions for postgres_fdw
5  *
6  * Portions Copyright (c) 2012-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *		  contrib/postgres_fdw/connection.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "postgres_fdw.h"
16 
17 #include "access/htup_details.h"
18 #include "catalog/pg_user_mapping.h"
19 #include "access/xact.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 #include "pgstat.h"
23 #include "storage/latch.h"
24 #include "utils/hsearch.h"
25 #include "utils/inval.h"
26 #include "utils/memutils.h"
27 #include "utils/syscache.h"
28 
29 
30 /*
31  * Connection cache hash table entry
32  *
33  * The lookup key in this hash table is the user mapping OID. We use just one
34  * connection per user mapping ID, which ensures that all the scans use the
35  * same snapshot during a query.  Using the user mapping OID rather than
36  * the foreign server OID + user OID avoids creating multiple connections when
37  * the public user mapping applies to all user OIDs.
38  *
39  * The "conn" pointer can be NULL if we don't currently have a live connection.
40  * When we do have a connection, xact_depth tracks the current depth of
41  * transactions and subtransactions open on the remote side.  We need to issue
42  * commands at the same nesting depth on the remote as we're executing at
43  * ourselves, so that rolling back a subtransaction will kill the right
44  * queries and not the wrong ones.
45  */
46 typedef Oid ConnCacheKey;
47 
48 typedef struct ConnCacheEntry
49 {
50 	ConnCacheKey key;			/* hash key (must be first) */
51 	PGconn	   *conn;			/* connection to foreign server, or NULL */
52 	/* Remaining fields are invalid when conn is NULL: */
53 	int			xact_depth;		/* 0 = no xact open, 1 = main xact open, 2 =
54 								 * one level of subxact open, etc */
55 	bool		have_prep_stmt; /* have we prepared any stmts in this xact? */
56 	bool		have_error;		/* have any subxacts aborted in this xact? */
57 	bool		changing_xact_state;	/* xact state change in process */
58 	bool		invalidated;	/* true if reconnect is pending */
59 	uint32		server_hashvalue;	/* hash value of foreign server OID */
60 	uint32		mapping_hashvalue;	/* hash value of user mapping OID */
61 } ConnCacheEntry;
62 
63 /*
64  * Connection cache (initialized on first use)
65  */
66 static HTAB *ConnectionHash = NULL;
67 
68 /* for assigning cursor numbers and prepared statement numbers */
69 static unsigned int cursor_number = 0;
70 static unsigned int prep_stmt_number = 0;
71 
72 /* tracks whether any work is needed in callback functions */
73 static bool xact_got_connection = false;
74 
75 /* prototypes of private functions */
76 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
77 static void disconnect_pg_server(ConnCacheEntry *entry);
78 static void check_conn_params(const char **keywords, const char **values);
79 static void configure_remote_session(PGconn *conn);
80 static void do_sql_command(PGconn *conn, const char *sql);
81 static void begin_remote_xact(ConnCacheEntry *entry);
82 static void pgfdw_xact_callback(XactEvent event, void *arg);
83 static void pgfdw_subxact_callback(SubXactEvent event,
84 					   SubTransactionId mySubid,
85 					   SubTransactionId parentSubid,
86 					   void *arg);
87 static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
88 static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
89 static bool pgfdw_cancel_query(PGconn *conn);
90 static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
91 						 bool ignore_errors);
92 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
93 						 PGresult **result);
94 
95 
96 /*
97  * Get a PGconn which can be used to execute queries on the remote PostgreSQL
98  * server with the user's authorization.  A new connection is established
99  * if we don't already have a suitable one, and a transaction is opened at
100  * the right subtransaction nesting depth if we didn't do that already.
101  *
102  * will_prep_stmt must be true if caller intends to create any prepared
103  * statements.  Since those don't go away automatically at transaction end
104  * (not even on error), we need this flag to cue manual cleanup.
105  */
106 PGconn *
GetConnection(UserMapping * user,bool will_prep_stmt)107 GetConnection(UserMapping *user, bool will_prep_stmt)
108 {
109 	bool		found;
110 	ConnCacheEntry *entry;
111 	ConnCacheKey key;
112 
113 	/* First time through, initialize connection cache hashtable */
114 	if (ConnectionHash == NULL)
115 	{
116 		HASHCTL		ctl;
117 
118 		MemSet(&ctl, 0, sizeof(ctl));
119 		ctl.keysize = sizeof(ConnCacheKey);
120 		ctl.entrysize = sizeof(ConnCacheEntry);
121 		/* allocate ConnectionHash in the cache context */
122 		ctl.hcxt = CacheMemoryContext;
123 		ConnectionHash = hash_create("postgres_fdw connections", 8,
124 									 &ctl,
125 									 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
126 
127 		/*
128 		 * Register some callback functions that manage connection cleanup.
129 		 * This should be done just once in each backend.
130 		 */
131 		RegisterXactCallback(pgfdw_xact_callback, NULL);
132 		RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
133 		CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
134 									  pgfdw_inval_callback, (Datum) 0);
135 		CacheRegisterSyscacheCallback(USERMAPPINGOID,
136 									  pgfdw_inval_callback, (Datum) 0);
137 	}
138 
139 	/* Set flag that we did GetConnection during the current transaction */
140 	xact_got_connection = true;
141 
142 	/* Create hash key for the entry.  Assume no pad bytes in key struct */
143 	key = user->umid;
144 
145 	/*
146 	 * Find or create cached entry for requested connection.
147 	 */
148 	entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
149 	if (!found)
150 	{
151 		/*
152 		 * We need only clear "conn" here; remaining fields will be filled
153 		 * later when "conn" is set.
154 		 */
155 		entry->conn = NULL;
156 	}
157 
158 	/* Reject further use of connections which failed abort cleanup. */
159 	pgfdw_reject_incomplete_xact_state_change(entry);
160 
161 	/*
162 	 * If the connection needs to be remade due to invalidation, disconnect as
163 	 * soon as we're out of all transactions.
164 	 */
165 	if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
166 	{
167 		elog(DEBUG3, "closing connection %p for option changes to take effect",
168 			 entry->conn);
169 		disconnect_pg_server(entry);
170 	}
171 
172 	/*
173 	 * We don't check the health of cached connection here, because it would
174 	 * require some overhead.  Broken connection will be detected when the
175 	 * connection is actually used.
176 	 */
177 
178 	/*
179 	 * If cache entry doesn't have a connection, we have to establish a new
180 	 * connection.  (If connect_pg_server throws an error, the cache entry
181 	 * will remain in a valid empty state, ie conn == NULL.)
182 	 */
183 	if (entry->conn == NULL)
184 	{
185 		ForeignServer *server = GetForeignServer(user->serverid);
186 
187 		/* Reset all transient state fields, to be sure all are clean */
188 		entry->xact_depth = 0;
189 		entry->have_prep_stmt = false;
190 		entry->have_error = false;
191 		entry->changing_xact_state = false;
192 		entry->invalidated = false;
193 		entry->server_hashvalue =
194 			GetSysCacheHashValue1(FOREIGNSERVEROID,
195 								  ObjectIdGetDatum(server->serverid));
196 		entry->mapping_hashvalue =
197 			GetSysCacheHashValue1(USERMAPPINGOID,
198 								  ObjectIdGetDatum(user->umid));
199 
200 		/* Now try to make the connection */
201 		entry->conn = connect_pg_server(server, user);
202 
203 		elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
204 			 entry->conn, server->servername, user->umid, user->userid);
205 	}
206 
207 	/*
208 	 * Start a new transaction or subtransaction if needed.
209 	 */
210 	begin_remote_xact(entry);
211 
212 	/* Remember if caller will prepare statements */
213 	entry->have_prep_stmt |= will_prep_stmt;
214 
215 	return entry->conn;
216 }
217 
218 /*
219  * Connect to remote server using specified server and user mapping properties.
220  */
221 static PGconn *
connect_pg_server(ForeignServer * server,UserMapping * user)222 connect_pg_server(ForeignServer *server, UserMapping *user)
223 {
224 	PGconn	   *volatile conn = NULL;
225 
226 	/*
227 	 * Use PG_TRY block to ensure closing connection on error.
228 	 */
229 	PG_TRY();
230 	{
231 		const char **keywords;
232 		const char **values;
233 		int			n;
234 
235 		/*
236 		 * Construct connection params from generic options of ForeignServer
237 		 * and UserMapping.  (Some of them might not be libpq options, in
238 		 * which case we'll just waste a few array slots.)  Add 3 extra slots
239 		 * for fallback_application_name, client_encoding, end marker.
240 		 */
241 		n = list_length(server->options) + list_length(user->options) + 3;
242 		keywords = (const char **) palloc(n * sizeof(char *));
243 		values = (const char **) palloc(n * sizeof(char *));
244 
245 		n = 0;
246 		n += ExtractConnectionOptions(server->options,
247 									  keywords + n, values + n);
248 		n += ExtractConnectionOptions(user->options,
249 									  keywords + n, values + n);
250 
251 		/* Use "postgres_fdw" as fallback_application_name. */
252 		keywords[n] = "fallback_application_name";
253 		values[n] = "postgres_fdw";
254 		n++;
255 
256 		/* Set client_encoding so that libpq can convert encoding properly. */
257 		keywords[n] = "client_encoding";
258 		values[n] = GetDatabaseEncodingName();
259 		n++;
260 
261 		keywords[n] = values[n] = NULL;
262 
263 		/* verify connection parameters and make connection */
264 		check_conn_params(keywords, values);
265 
266 		conn = PQconnectdbParams(keywords, values, false);
267 		if (!conn || PQstatus(conn) != CONNECTION_OK)
268 			ereport(ERROR,
269 					(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
270 					 errmsg("could not connect to server \"%s\"",
271 							server->servername),
272 					 errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
273 
274 		/*
275 		 * Check that non-superuser has used password to establish connection;
276 		 * otherwise, he's piggybacking on the postgres server's user
277 		 * identity. See also dblink_security_check() in contrib/dblink.
278 		 */
279 		if (!superuser() && !PQconnectionUsedPassword(conn))
280 			ereport(ERROR,
281 					(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
282 					 errmsg("password is required"),
283 					 errdetail("Non-superuser cannot connect if the server does not request a password."),
284 					 errhint("Target server's authentication method must be changed.")));
285 
286 		/* Prepare new session for use */
287 		configure_remote_session(conn);
288 
289 		pfree(keywords);
290 		pfree(values);
291 	}
292 	PG_CATCH();
293 	{
294 		/* Release PGconn data structure if we managed to create one */
295 		if (conn)
296 			PQfinish(conn);
297 		PG_RE_THROW();
298 	}
299 	PG_END_TRY();
300 
301 	return conn;
302 }
303 
304 /*
305  * Disconnect any open connection for a connection cache entry.
306  */
307 static void
disconnect_pg_server(ConnCacheEntry * entry)308 disconnect_pg_server(ConnCacheEntry *entry)
309 {
310 	if (entry->conn != NULL)
311 	{
312 		PQfinish(entry->conn);
313 		entry->conn = NULL;
314 	}
315 }
316 
317 /*
318  * For non-superusers, insist that the connstr specify a password.  This
319  * prevents a password from being picked up from .pgpass, a service file,
320  * the environment, etc.  We don't want the postgres user's passwords
321  * to be accessible to non-superusers.  (See also dblink_connstr_check in
322  * contrib/dblink.)
323  */
324 static void
check_conn_params(const char ** keywords,const char ** values)325 check_conn_params(const char **keywords, const char **values)
326 {
327 	int			i;
328 
329 	/* no check required if superuser */
330 	if (superuser())
331 		return;
332 
333 	/* ok if params contain a non-empty password */
334 	for (i = 0; keywords[i] != NULL; i++)
335 	{
336 		if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
337 			return;
338 	}
339 
340 	ereport(ERROR,
341 			(errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
342 			 errmsg("password is required"),
343 			 errdetail("Non-superusers must provide a password in the user mapping.")));
344 }
345 
346 /*
347  * Issue SET commands to make sure remote session is configured properly.
348  *
349  * We do this just once at connection, assuming nothing will change the
350  * values later.  Since we'll never send volatile function calls to the
351  * remote, there shouldn't be any way to break this assumption from our end.
352  * It's possible to think of ways to break it at the remote end, eg making
353  * a foreign table point to a view that includes a set_config call ---
354  * but once you admit the possibility of a malicious view definition,
355  * there are any number of ways to break things.
356  */
357 static void
configure_remote_session(PGconn * conn)358 configure_remote_session(PGconn *conn)
359 {
360 	int			remoteversion = PQserverVersion(conn);
361 
362 	/* Force the search path to contain only pg_catalog (see deparse.c) */
363 	do_sql_command(conn, "SET search_path = pg_catalog");
364 
365 	/*
366 	 * Set remote timezone; this is basically just cosmetic, since all
367 	 * transmitted and returned timestamptzs should specify a zone explicitly
368 	 * anyway.  However it makes the regression test outputs more predictable.
369 	 *
370 	 * We don't risk setting remote zone equal to ours, since the remote
371 	 * server might use a different timezone database.  Instead, use UTC
372 	 * (quoted, because very old servers are picky about case).
373 	 */
374 	do_sql_command(conn, "SET timezone = 'UTC'");
375 
376 	/*
377 	 * Set values needed to ensure unambiguous data output from remote.  (This
378 	 * logic should match what pg_dump does.  See also set_transmission_modes
379 	 * in postgres_fdw.c.)
380 	 */
381 	do_sql_command(conn, "SET datestyle = ISO");
382 	if (remoteversion >= 80400)
383 		do_sql_command(conn, "SET intervalstyle = postgres");
384 	if (remoteversion >= 90000)
385 		do_sql_command(conn, "SET extra_float_digits = 3");
386 	else
387 		do_sql_command(conn, "SET extra_float_digits = 2");
388 }
389 
390 /*
391  * Convenience subroutine to issue a non-data-returning SQL command to remote
392  */
393 static void
do_sql_command(PGconn * conn,const char * sql)394 do_sql_command(PGconn *conn, const char *sql)
395 {
396 	PGresult   *res;
397 
398 	if (!PQsendQuery(conn, sql))
399 		pgfdw_report_error(ERROR, NULL, conn, false, sql);
400 	res = pgfdw_get_result(conn, sql);
401 	if (PQresultStatus(res) != PGRES_COMMAND_OK)
402 		pgfdw_report_error(ERROR, res, conn, true, sql);
403 	PQclear(res);
404 }
405 
406 /*
407  * Start remote transaction or subtransaction, if needed.
408  *
409  * Note that we always use at least REPEATABLE READ in the remote session.
410  * This is so that, if a query initiates multiple scans of the same or
411  * different foreign tables, we will get snapshot-consistent results from
412  * those scans.  A disadvantage is that we can't provide sane emulation of
413  * READ COMMITTED behavior --- it would be nice if we had some other way to
414  * control which remote queries share a snapshot.
415  */
416 static void
begin_remote_xact(ConnCacheEntry * entry)417 begin_remote_xact(ConnCacheEntry *entry)
418 {
419 	int			curlevel = GetCurrentTransactionNestLevel();
420 
421 	/* Start main transaction if we haven't yet */
422 	if (entry->xact_depth <= 0)
423 	{
424 		const char *sql;
425 
426 		elog(DEBUG3, "starting remote transaction on connection %p",
427 			 entry->conn);
428 
429 		if (IsolationIsSerializable())
430 			sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
431 		else
432 			sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
433 		entry->changing_xact_state = true;
434 		do_sql_command(entry->conn, sql);
435 		entry->xact_depth = 1;
436 		entry->changing_xact_state = false;
437 	}
438 
439 	/*
440 	 * If we're in a subtransaction, stack up savepoints to match our level.
441 	 * This ensures we can rollback just the desired effects when a
442 	 * subtransaction aborts.
443 	 */
444 	while (entry->xact_depth < curlevel)
445 	{
446 		char		sql[64];
447 
448 		snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
449 		entry->changing_xact_state = true;
450 		do_sql_command(entry->conn, sql);
451 		entry->xact_depth++;
452 		entry->changing_xact_state = false;
453 	}
454 }
455 
456 /*
457  * Release connection reference count created by calling GetConnection.
458  */
459 void
ReleaseConnection(PGconn * conn)460 ReleaseConnection(PGconn *conn)
461 {
462 	/*
463 	 * Currently, we don't actually track connection references because all
464 	 * cleanup is managed on a transaction or subtransaction basis instead. So
465 	 * there's nothing to do here.
466 	 */
467 }
468 
469 /*
470  * Assign a "unique" number for a cursor.
471  *
472  * These really only need to be unique per connection within a transaction.
473  * For the moment we ignore the per-connection point and assign them across
474  * all connections in the transaction, but we ask for the connection to be
475  * supplied in case we want to refine that.
476  *
477  * Note that even if wraparound happens in a very long transaction, actual
478  * collisions are highly improbable; just be sure to use %u not %d to print.
479  */
480 unsigned int
GetCursorNumber(PGconn * conn)481 GetCursorNumber(PGconn *conn)
482 {
483 	return ++cursor_number;
484 }
485 
486 /*
487  * Assign a "unique" number for a prepared statement.
488  *
489  * This works much like GetCursorNumber, except that we never reset the counter
490  * within a session.  That's because we can't be 100% sure we've gotten rid
491  * of all prepared statements on all connections, and it's not really worth
492  * increasing the risk of prepared-statement name collisions by resetting.
493  */
494 unsigned int
GetPrepStmtNumber(PGconn * conn)495 GetPrepStmtNumber(PGconn *conn)
496 {
497 	return ++prep_stmt_number;
498 }
499 
500 /*
501  * Submit a query and wait for the result.
502  *
503  * This function is interruptible by signals.
504  *
505  * Caller is responsible for the error handling on the result.
506  */
507 PGresult *
pgfdw_exec_query(PGconn * conn,const char * query)508 pgfdw_exec_query(PGconn *conn, const char *query)
509 {
510 	/*
511 	 * Submit a query.  Since we don't use non-blocking mode, this also can
512 	 * block.  But its risk is relatively small, so we ignore that for now.
513 	 */
514 	if (!PQsendQuery(conn, query))
515 		pgfdw_report_error(ERROR, NULL, conn, false, query);
516 
517 	/* Wait for the result. */
518 	return pgfdw_get_result(conn, query);
519 }
520 
521 /*
522  * Wait for the result from a prior asynchronous execution function call.
523  *
524  * This function offers quick responsiveness by checking for any interruptions.
525  *
526  * This function emulates PQexec()'s behavior of returning the last result
527  * when there are many.
528  *
529  * Caller is responsible for the error handling on the result.
530  */
531 PGresult *
pgfdw_get_result(PGconn * conn,const char * query)532 pgfdw_get_result(PGconn *conn, const char *query)
533 {
534 	PGresult   *volatile last_res = NULL;
535 
536 	/* In what follows, do not leak any PGresults on an error. */
537 	PG_TRY();
538 	{
539 		for (;;)
540 		{
541 			PGresult   *res;
542 
543 			while (PQisBusy(conn))
544 			{
545 				int			wc;
546 
547 				/* Sleep until there's something to do */
548 				wc = WaitLatchOrSocket(MyLatch,
549 									   WL_LATCH_SET | WL_SOCKET_READABLE,
550 									   PQsocket(conn),
551 									   -1L, PG_WAIT_EXTENSION);
552 				ResetLatch(MyLatch);
553 
554 				CHECK_FOR_INTERRUPTS();
555 
556 				/* Data available in socket? */
557 				if (wc & WL_SOCKET_READABLE)
558 				{
559 					if (!PQconsumeInput(conn))
560 						pgfdw_report_error(ERROR, NULL, conn, false, query);
561 				}
562 			}
563 
564 			res = PQgetResult(conn);
565 			if (res == NULL)
566 				break;			/* query is complete */
567 
568 			PQclear(last_res);
569 			last_res = res;
570 		}
571 	}
572 	PG_CATCH();
573 	{
574 		PQclear(last_res);
575 		PG_RE_THROW();
576 	}
577 	PG_END_TRY();
578 
579 	return last_res;
580 }
581 
582 /*
583  * Report an error we got from the remote server.
584  *
585  * elevel: error level to use (typically ERROR, but might be less)
586  * res: PGresult containing the error
587  * conn: connection we did the query on
588  * clear: if true, PQclear the result (otherwise caller will handle it)
589  * sql: NULL, or text of remote command we tried to execute
590  *
591  * Note: callers that choose not to throw ERROR for a remote error are
592  * responsible for making sure that the associated ConnCacheEntry gets
593  * marked with have_error = true.
594  */
595 void
pgfdw_report_error(int elevel,PGresult * res,PGconn * conn,bool clear,const char * sql)596 pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
597 				   bool clear, const char *sql)
598 {
599 	/* If requested, PGresult must be released before leaving this function. */
600 	PG_TRY();
601 	{
602 		char	   *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
603 		char	   *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
604 		char	   *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
605 		char	   *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
606 		char	   *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
607 		int			sqlstate;
608 
609 		if (diag_sqlstate)
610 			sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
611 									 diag_sqlstate[1],
612 									 diag_sqlstate[2],
613 									 diag_sqlstate[3],
614 									 diag_sqlstate[4]);
615 		else
616 			sqlstate = ERRCODE_CONNECTION_FAILURE;
617 
618 		/*
619 		 * If we don't get a message from the PGresult, try the PGconn.  This
620 		 * is needed because for connection-level failures, PQexec may just
621 		 * return NULL, not a PGresult at all.
622 		 */
623 		if (message_primary == NULL)
624 			message_primary = pchomp(PQerrorMessage(conn));
625 
626 		ereport(elevel,
627 				(errcode(sqlstate),
628 				 message_primary ? errmsg_internal("%s", message_primary) :
629 				 errmsg("could not obtain message string for remote error"),
630 				 message_detail ? errdetail_internal("%s", message_detail) : 0,
631 				 message_hint ? errhint("%s", message_hint) : 0,
632 				 message_context ? errcontext("%s", message_context) : 0,
633 				 sql ? errcontext("Remote SQL command: %s", sql) : 0));
634 	}
635 	PG_CATCH();
636 	{
637 		if (clear)
638 			PQclear(res);
639 		PG_RE_THROW();
640 	}
641 	PG_END_TRY();
642 	if (clear)
643 		PQclear(res);
644 }
645 
646 /*
647  * pgfdw_xact_callback --- cleanup at main-transaction end.
648  *
649  * This runs just late enough that it must not enter user-defined code
650  * locally.  (Entering such code on the remote side is fine.  Its remote
651  * COMMIT TRANSACTION may run deferred triggers.)
652  */
653 static void
pgfdw_xact_callback(XactEvent event,void * arg)654 pgfdw_xact_callback(XactEvent event, void *arg)
655 {
656 	HASH_SEQ_STATUS scan;
657 	ConnCacheEntry *entry;
658 
659 	/* Quick exit if no connections were touched in this transaction. */
660 	if (!xact_got_connection)
661 		return;
662 
663 	/*
664 	 * Scan all connection cache entries to find open remote transactions, and
665 	 * close them.
666 	 */
667 	hash_seq_init(&scan, ConnectionHash);
668 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
669 	{
670 		PGresult   *res;
671 
672 		/* Ignore cache entry if no open connection right now */
673 		if (entry->conn == NULL)
674 			continue;
675 
676 		/* If it has an open remote transaction, try to close it */
677 		if (entry->xact_depth > 0)
678 		{
679 			bool		abort_cleanup_failure = false;
680 
681 			elog(DEBUG3, "closing remote transaction on connection %p",
682 				 entry->conn);
683 
684 			switch (event)
685 			{
686 				case XACT_EVENT_PARALLEL_PRE_COMMIT:
687 				case XACT_EVENT_PRE_COMMIT:
688 
689 					/*
690 					 * If abort cleanup previously failed for this connection,
691 					 * we can't issue any more commands against it.
692 					 */
693 					pgfdw_reject_incomplete_xact_state_change(entry);
694 
695 					/* Commit all remote transactions during pre-commit */
696 					entry->changing_xact_state = true;
697 					do_sql_command(entry->conn, "COMMIT TRANSACTION");
698 					entry->changing_xact_state = false;
699 
700 					/*
701 					 * If there were any errors in subtransactions, and we
702 					 * made prepared statements, do a DEALLOCATE ALL to make
703 					 * sure we get rid of all prepared statements. This is
704 					 * annoying and not terribly bulletproof, but it's
705 					 * probably not worth trying harder.
706 					 *
707 					 * DEALLOCATE ALL only exists in 8.3 and later, so this
708 					 * constrains how old a server postgres_fdw can
709 					 * communicate with.  We intentionally ignore errors in
710 					 * the DEALLOCATE, so that we can hobble along to some
711 					 * extent with older servers (leaking prepared statements
712 					 * as we go; but we don't really support update operations
713 					 * pre-8.3 anyway).
714 					 */
715 					if (entry->have_prep_stmt && entry->have_error)
716 					{
717 						res = PQexec(entry->conn, "DEALLOCATE ALL");
718 						PQclear(res);
719 					}
720 					entry->have_prep_stmt = false;
721 					entry->have_error = false;
722 					break;
723 				case XACT_EVENT_PRE_PREPARE:
724 
725 					/*
726 					 * We disallow any remote transactions, since it's not
727 					 * very reasonable to hold them open until the prepared
728 					 * transaction is committed.  For the moment, throw error
729 					 * unconditionally; later we might allow read-only cases.
730 					 * Note that the error will cause us to come right back
731 					 * here with event == XACT_EVENT_ABORT, so we'll clean up
732 					 * the connection state at that point.
733 					 */
734 					ereport(ERROR,
735 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
736 							 errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
737 					break;
738 				case XACT_EVENT_PARALLEL_COMMIT:
739 				case XACT_EVENT_COMMIT:
740 				case XACT_EVENT_PREPARE:
741 					/* Pre-commit should have closed the open transaction */
742 					elog(ERROR, "missed cleaning up connection during pre-commit");
743 					break;
744 				case XACT_EVENT_PARALLEL_ABORT:
745 				case XACT_EVENT_ABORT:
746 
747 					/*
748 					 * Don't try to clean up the connection if we're already
749 					 * in error recursion trouble.
750 					 */
751 					if (in_error_recursion_trouble())
752 						entry->changing_xact_state = true;
753 
754 					/*
755 					 * If connection is already unsalvageable, don't touch it
756 					 * further.
757 					 */
758 					if (entry->changing_xact_state)
759 						break;
760 
761 					/*
762 					 * Mark this connection as in the process of changing
763 					 * transaction state.
764 					 */
765 					entry->changing_xact_state = true;
766 
767 					/* Assume we might have lost track of prepared statements */
768 					entry->have_error = true;
769 
770 					/*
771 					 * If a command has been submitted to the remote server by
772 					 * using an asynchronous execution function, the command
773 					 * might not have yet completed.  Check to see if a
774 					 * command is still being processed by the remote server,
775 					 * and if so, request cancellation of the command.
776 					 */
777 					if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
778 						!pgfdw_cancel_query(entry->conn))
779 					{
780 						/* Unable to cancel running query. */
781 						abort_cleanup_failure = true;
782 					}
783 					else if (!pgfdw_exec_cleanup_query(entry->conn,
784 													   "ABORT TRANSACTION",
785 													   false))
786 					{
787 						/* Unable to abort remote transaction. */
788 						abort_cleanup_failure = true;
789 					}
790 					else if (entry->have_prep_stmt && entry->have_error &&
791 							 !pgfdw_exec_cleanup_query(entry->conn,
792 													   "DEALLOCATE ALL",
793 													   true))
794 					{
795 						/* Trouble clearing prepared statements. */
796 						abort_cleanup_failure = true;
797 					}
798 					else
799 					{
800 						entry->have_prep_stmt = false;
801 						entry->have_error = false;
802 					}
803 
804 					/* Disarm changing_xact_state if it all worked. */
805 					entry->changing_xact_state = abort_cleanup_failure;
806 					break;
807 			}
808 		}
809 
810 		/* Reset state to show we're out of a transaction */
811 		entry->xact_depth = 0;
812 
813 		/*
814 		 * If the connection isn't in a good idle state or it is marked as
815 		 * invalid, then discard it to recover. Next GetConnection will open a
816 		 * new connection.
817 		 */
818 		if (PQstatus(entry->conn) != CONNECTION_OK ||
819 			PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
820 			entry->changing_xact_state ||
821 			entry->invalidated)
822 		{
823 			elog(DEBUG3, "discarding connection %p", entry->conn);
824 			disconnect_pg_server(entry);
825 		}
826 	}
827 
828 	/*
829 	 * Regardless of the event type, we can now mark ourselves as out of the
830 	 * transaction.  (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
831 	 * this saves a useless scan of the hashtable during COMMIT or PREPARE.)
832 	 */
833 	xact_got_connection = false;
834 
835 	/* Also reset cursor numbering for next transaction */
836 	cursor_number = 0;
837 }
838 
839 /*
840  * pgfdw_subxact_callback --- cleanup at subtransaction end.
841  */
842 static void
pgfdw_subxact_callback(SubXactEvent event,SubTransactionId mySubid,SubTransactionId parentSubid,void * arg)843 pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
844 					   SubTransactionId parentSubid, void *arg)
845 {
846 	HASH_SEQ_STATUS scan;
847 	ConnCacheEntry *entry;
848 	int			curlevel;
849 
850 	/* Nothing to do at subxact start, nor after commit. */
851 	if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
852 		  event == SUBXACT_EVENT_ABORT_SUB))
853 		return;
854 
855 	/* Quick exit if no connections were touched in this transaction. */
856 	if (!xact_got_connection)
857 		return;
858 
859 	/*
860 	 * Scan all connection cache entries to find open remote subtransactions
861 	 * of the current level, and close them.
862 	 */
863 	curlevel = GetCurrentTransactionNestLevel();
864 	hash_seq_init(&scan, ConnectionHash);
865 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
866 	{
867 		char		sql[100];
868 
869 		/*
870 		 * We only care about connections with open remote subtransactions of
871 		 * the current level.
872 		 */
873 		if (entry->conn == NULL || entry->xact_depth < curlevel)
874 			continue;
875 
876 		if (entry->xact_depth > curlevel)
877 			elog(ERROR, "missed cleaning up remote subtransaction at level %d",
878 				 entry->xact_depth);
879 
880 		if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
881 		{
882 			/*
883 			 * If abort cleanup previously failed for this connection, we
884 			 * can't issue any more commands against it.
885 			 */
886 			pgfdw_reject_incomplete_xact_state_change(entry);
887 
888 			/* Commit all remote subtransactions during pre-commit */
889 			snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
890 			entry->changing_xact_state = true;
891 			do_sql_command(entry->conn, sql);
892 			entry->changing_xact_state = false;
893 		}
894 		else if (in_error_recursion_trouble())
895 		{
896 			/*
897 			 * Don't try to clean up the connection if we're already in error
898 			 * recursion trouble.
899 			 */
900 			entry->changing_xact_state = true;
901 		}
902 		else if (!entry->changing_xact_state)
903 		{
904 			bool		abort_cleanup_failure = false;
905 
906 			/* Remember that abort cleanup is in progress. */
907 			entry->changing_xact_state = true;
908 
909 			/* Assume we might have lost track of prepared statements */
910 			entry->have_error = true;
911 
912 			/*
913 			 * If a command has been submitted to the remote server by using
914 			 * an asynchronous execution function, the command might not have
915 			 * yet completed.  Check to see if a command is still being
916 			 * processed by the remote server, and if so, request cancellation
917 			 * of the command.
918 			 */
919 			if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
920 				!pgfdw_cancel_query(entry->conn))
921 				abort_cleanup_failure = true;
922 			else
923 			{
924 				/* Rollback all remote subtransactions during abort */
925 				snprintf(sql, sizeof(sql),
926 						 "ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d",
927 						 curlevel, curlevel);
928 				if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
929 					abort_cleanup_failure = true;
930 			}
931 
932 			/* Disarm changing_xact_state if it all worked. */
933 			entry->changing_xact_state = abort_cleanup_failure;
934 		}
935 
936 		/* OK, we're outta that level of subtransaction */
937 		entry->xact_depth--;
938 	}
939 }
940 
941 /*
942  * Connection invalidation callback function
943  *
944  * After a change to a pg_foreign_server or pg_user_mapping catalog entry,
945  * close connections depending on that entry immediately if current transaction
946  * has not used those connections yet. Otherwise, mark those connections as
947  * invalid and then make pgfdw_xact_callback() close them at the end of current
948  * transaction, since they cannot be closed in the midst of the transaction
949  * using them. Closed connections will be remade at the next opportunity if
950  * necessary.
951  *
952  * Although most cache invalidation callbacks blow away all the related stuff
953  * regardless of the given hashvalue, connections are expensive enough that
954  * it's worth trying to avoid that.
955  *
956  * NB: We could avoid unnecessary disconnection more strictly by examining
957  * individual option values, but it seems too much effort for the gain.
958  */
959 static void
pgfdw_inval_callback(Datum arg,int cacheid,uint32 hashvalue)960 pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
961 {
962 	HASH_SEQ_STATUS scan;
963 	ConnCacheEntry *entry;
964 
965 	Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
966 
967 	/* ConnectionHash must exist already, if we're registered */
968 	hash_seq_init(&scan, ConnectionHash);
969 	while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
970 	{
971 		/* Ignore invalid entries */
972 		if (entry->conn == NULL)
973 			continue;
974 
975 		/* hashvalue == 0 means a cache reset, must clear all state */
976 		if (hashvalue == 0 ||
977 			(cacheid == FOREIGNSERVEROID &&
978 			 entry->server_hashvalue == hashvalue) ||
979 			(cacheid == USERMAPPINGOID &&
980 			 entry->mapping_hashvalue == hashvalue))
981 		{
982 			/*
983 			 * Close the connection immediately if it's not used yet in this
984 			 * transaction. Otherwise mark it as invalid so that
985 			 * pgfdw_xact_callback() can close it at the end of this
986 			 * transaction.
987 			 */
988 			if (entry->xact_depth == 0)
989 			{
990 				elog(DEBUG3, "discarding connection %p", entry->conn);
991 				disconnect_pg_server(entry);
992 			}
993 			else
994 				entry->invalidated = true;
995 		}
996 	}
997 }
998 
999 /*
1000  * Raise an error if the given connection cache entry is marked as being
1001  * in the middle of an xact state change.  This should be called at which no
1002  * such change is expected to be in progress; if one is found to be in
1003  * progress, it means that we aborted in the middle of a previous state change
1004  * and now don't know what the remote transaction state actually is.
1005  * Such connections can't safely be further used.  Re-establishing the
1006  * connection would change the snapshot and roll back any writes already
1007  * performed, so that's not an option, either. Thus, we must abort.
1008  */
1009 static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry * entry)1010 pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
1011 {
1012 	HeapTuple	tup;
1013 	Form_pg_user_mapping umform;
1014 	ForeignServer *server;
1015 
1016 	/* nothing to do for inactive entries and entries of sane state */
1017 	if (entry->conn == NULL || !entry->changing_xact_state)
1018 		return;
1019 
1020 	/* make sure this entry is inactive */
1021 	disconnect_pg_server(entry);
1022 
1023 	/* find server name to be shown in the message below */
1024 	tup = SearchSysCache1(USERMAPPINGOID,
1025 						  ObjectIdGetDatum(entry->key));
1026 	if (!HeapTupleIsValid(tup))
1027 		elog(ERROR, "cache lookup failed for user mapping %u", entry->key);
1028 	umform = (Form_pg_user_mapping) GETSTRUCT(tup);
1029 	server = GetForeignServer(umform->umserver);
1030 	ReleaseSysCache(tup);
1031 
1032 	ereport(ERROR,
1033 			(errcode(ERRCODE_CONNECTION_EXCEPTION),
1034 			 errmsg("connection to server \"%s\" was lost",
1035 					server->servername)));
1036 }
1037 
1038 /*
1039  * Cancel the currently-in-progress query (whose query text we do not have)
1040  * and ignore the result.  Returns true if we successfully cancel the query
1041  * and discard any pending result, and false if not.
1042  *
1043  * It's not a huge problem if we throw an ERROR here, but if we get into error
1044  * recursion trouble, we'll end up slamming the connection shut, which will
1045  * necessitate failing the entire toplevel transaction even if subtransactions
1046  * were used.  Try to use WARNING where we can.
1047  */
1048 static bool
pgfdw_cancel_query(PGconn * conn)1049 pgfdw_cancel_query(PGconn *conn)
1050 {
1051 	PGcancel   *cancel;
1052 	char		errbuf[256];
1053 	PGresult   *result = NULL;
1054 	TimestampTz endtime;
1055 
1056 	/*
1057 	 * If it takes too long to cancel the query and discard the result, assume
1058 	 * the connection is dead.
1059 	 */
1060 	endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1061 
1062 	/*
1063 	 * Issue cancel request.  Unfortunately, there's no good way to limit the
1064 	 * amount of time that we might block inside PQgetCancel().
1065 	 */
1066 	if ((cancel = PQgetCancel(conn)))
1067 	{
1068 		if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
1069 		{
1070 			ereport(WARNING,
1071 					(errcode(ERRCODE_CONNECTION_FAILURE),
1072 					 errmsg("could not send cancel request: %s",
1073 							errbuf)));
1074 			PQfreeCancel(cancel);
1075 			return false;
1076 		}
1077 		PQfreeCancel(cancel);
1078 	}
1079 
1080 	/* Get and discard the result of the query. */
1081 	if (pgfdw_get_cleanup_result(conn, endtime, &result))
1082 		return false;
1083 	PQclear(result);
1084 
1085 	return true;
1086 }
1087 
1088 /*
1089  * Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
1090  * result.  If the query is executed without error, the return value is true.
1091  * If the query is executed successfully but returns an error, the return
1092  * value is true if and only if ignore_errors is set.  If the query can't be
1093  * sent or times out, the return value is false.
1094  *
1095  * It's not a huge problem if we throw an ERROR here, but if we get into error
1096  * recursion trouble, we'll end up slamming the connection shut, which will
1097  * necessitate failing the entire toplevel transaction even if subtransactions
1098  * were used.  Try to use WARNING where we can.
1099  */
1100 static bool
pgfdw_exec_cleanup_query(PGconn * conn,const char * query,bool ignore_errors)1101 pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
1102 {
1103 	PGresult   *result = NULL;
1104 	TimestampTz endtime;
1105 
1106 	/*
1107 	 * If it takes too long to execute a cleanup query, assume the connection
1108 	 * is dead.  It's fairly likely that this is why we aborted in the first
1109 	 * place (e.g. statement timeout, user cancel), so the timeout shouldn't
1110 	 * be too long.
1111 	 */
1112 	endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1113 
1114 	/*
1115 	 * Submit a query.  Since we don't use non-blocking mode, this also can
1116 	 * block.  But its risk is relatively small, so we ignore that for now.
1117 	 */
1118 	if (!PQsendQuery(conn, query))
1119 	{
1120 		pgfdw_report_error(WARNING, NULL, conn, false, query);
1121 		return false;
1122 	}
1123 
1124 	/* Get the result of the query. */
1125 	if (pgfdw_get_cleanup_result(conn, endtime, &result))
1126 		return false;
1127 
1128 	/* Issue a warning if not successful. */
1129 	if (PQresultStatus(result) != PGRES_COMMAND_OK)
1130 	{
1131 		pgfdw_report_error(WARNING, result, conn, true, query);
1132 		return ignore_errors;
1133 	}
1134 	PQclear(result);
1135 
1136 	return true;
1137 }
1138 
1139 /*
1140  * Get, during abort cleanup, the result of a query that is in progress.  This
1141  * might be a query that is being interrupted by transaction abort, or it might
1142  * be a query that was initiated as part of transaction abort to get the remote
1143  * side back to the appropriate state.
1144  *
1145  * endtime is the time at which we should give up and assume the remote
1146  * side is dead.  Returns true if the timeout expired, otherwise false.
1147  * Sets *result except in case of a timeout.
1148  */
1149 static bool
pgfdw_get_cleanup_result(PGconn * conn,TimestampTz endtime,PGresult ** result)1150 pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
1151 {
1152 	volatile bool timed_out = false;
1153 	PGresult   *volatile last_res = NULL;
1154 
1155 	/* In what follows, do not leak any PGresults on an error. */
1156 	PG_TRY();
1157 	{
1158 		for (;;)
1159 		{
1160 			PGresult   *res;
1161 
1162 			while (PQisBusy(conn))
1163 			{
1164 				int			wc;
1165 				TimestampTz now = GetCurrentTimestamp();
1166 				long		cur_timeout;
1167 
1168 				/* If timeout has expired, give up, else get sleep time. */
1169 				cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1170 				if (cur_timeout <= 0)
1171 				{
1172 					timed_out = true;
1173 					goto exit;
1174 				}
1175 
1176 				/* Sleep until there's something to do */
1177 				wc = WaitLatchOrSocket(MyLatch,
1178 									   WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT,
1179 									   PQsocket(conn),
1180 									   cur_timeout, PG_WAIT_EXTENSION);
1181 				ResetLatch(MyLatch);
1182 
1183 				CHECK_FOR_INTERRUPTS();
1184 
1185 				/* Data available in socket? */
1186 				if (wc & WL_SOCKET_READABLE)
1187 				{
1188 					if (!PQconsumeInput(conn))
1189 					{
1190 						/* connection trouble; treat the same as a timeout */
1191 						timed_out = true;
1192 						goto exit;
1193 					}
1194 				}
1195 			}
1196 
1197 			res = PQgetResult(conn);
1198 			if (res == NULL)
1199 				break;			/* query is complete */
1200 
1201 			PQclear(last_res);
1202 			last_res = res;
1203 		}
1204 exit:	;
1205 	}
1206 	PG_CATCH();
1207 	{
1208 		PQclear(last_res);
1209 		PG_RE_THROW();
1210 	}
1211 	PG_END_TRY();
1212 
1213 	if (timed_out)
1214 		PQclear(last_res);
1215 	else
1216 		*result = last_res;
1217 	return timed_out;
1218 }
1219