1 /*-------------------------------------------------------------------------
2 *
3 * connection.c
4 * Connection management functions for postgres_fdw
5 *
6 * Portions Copyright (c) 2012-2017, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/postgres_fdw/connection.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "postgres_fdw.h"
16
17 #include "access/htup_details.h"
18 #include "catalog/pg_user_mapping.h"
19 #include "access/xact.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 #include "pgstat.h"
23 #include "storage/latch.h"
24 #include "utils/hsearch.h"
25 #include "utils/inval.h"
26 #include "utils/memutils.h"
27 #include "utils/syscache.h"
28
29
30 /*
31 * Connection cache hash table entry
32 *
33 * The lookup key in this hash table is the user mapping OID. We use just one
34 * connection per user mapping ID, which ensures that all the scans use the
35 * same snapshot during a query. Using the user mapping OID rather than
36 * the foreign server OID + user OID avoids creating multiple connections when
37 * the public user mapping applies to all user OIDs.
38 *
39 * The "conn" pointer can be NULL if we don't currently have a live connection.
40 * When we do have a connection, xact_depth tracks the current depth of
41 * transactions and subtransactions open on the remote side. We need to issue
42 * commands at the same nesting depth on the remote as we're executing at
43 * ourselves, so that rolling back a subtransaction will kill the right
44 * queries and not the wrong ones.
45 */
46 typedef Oid ConnCacheKey;
47
48 typedef struct ConnCacheEntry
49 {
50 ConnCacheKey key; /* hash key (must be first) */
51 PGconn *conn; /* connection to foreign server, or NULL */
52 /* Remaining fields are invalid when conn is NULL: */
53 int xact_depth; /* 0 = no xact open, 1 = main xact open, 2 =
54 * one level of subxact open, etc */
55 bool have_prep_stmt; /* have we prepared any stmts in this xact? */
56 bool have_error; /* have any subxacts aborted in this xact? */
57 bool changing_xact_state; /* xact state change in process */
58 bool invalidated; /* true if reconnect is pending */
59 uint32 server_hashvalue; /* hash value of foreign server OID */
60 uint32 mapping_hashvalue; /* hash value of user mapping OID */
61 } ConnCacheEntry;
62
63 /*
64 * Connection cache (initialized on first use)
65 */
66 static HTAB *ConnectionHash = NULL;
67
68 /* for assigning cursor numbers and prepared statement numbers */
69 static unsigned int cursor_number = 0;
70 static unsigned int prep_stmt_number = 0;
71
72 /* tracks whether any work is needed in callback functions */
73 static bool xact_got_connection = false;
74
75 /* prototypes of private functions */
76 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
77 static void disconnect_pg_server(ConnCacheEntry *entry);
78 static void check_conn_params(const char **keywords, const char **values);
79 static void configure_remote_session(PGconn *conn);
80 static void do_sql_command(PGconn *conn, const char *sql);
81 static void begin_remote_xact(ConnCacheEntry *entry);
82 static void pgfdw_xact_callback(XactEvent event, void *arg);
83 static void pgfdw_subxact_callback(SubXactEvent event,
84 SubTransactionId mySubid,
85 SubTransactionId parentSubid,
86 void *arg);
87 static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
88 static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
89 static bool pgfdw_cancel_query(PGconn *conn);
90 static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
91 bool ignore_errors);
92 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
93 PGresult **result);
94
95
96 /*
97 * Get a PGconn which can be used to execute queries on the remote PostgreSQL
98 * server with the user's authorization. A new connection is established
99 * if we don't already have a suitable one, and a transaction is opened at
100 * the right subtransaction nesting depth if we didn't do that already.
101 *
102 * will_prep_stmt must be true if caller intends to create any prepared
103 * statements. Since those don't go away automatically at transaction end
104 * (not even on error), we need this flag to cue manual cleanup.
105 */
106 PGconn *
GetConnection(UserMapping * user,bool will_prep_stmt)107 GetConnection(UserMapping *user, bool will_prep_stmt)
108 {
109 bool found;
110 ConnCacheEntry *entry;
111 ConnCacheKey key;
112
113 /* First time through, initialize connection cache hashtable */
114 if (ConnectionHash == NULL)
115 {
116 HASHCTL ctl;
117
118 MemSet(&ctl, 0, sizeof(ctl));
119 ctl.keysize = sizeof(ConnCacheKey);
120 ctl.entrysize = sizeof(ConnCacheEntry);
121 /* allocate ConnectionHash in the cache context */
122 ctl.hcxt = CacheMemoryContext;
123 ConnectionHash = hash_create("postgres_fdw connections", 8,
124 &ctl,
125 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
126
127 /*
128 * Register some callback functions that manage connection cleanup.
129 * This should be done just once in each backend.
130 */
131 RegisterXactCallback(pgfdw_xact_callback, NULL);
132 RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
133 CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
134 pgfdw_inval_callback, (Datum) 0);
135 CacheRegisterSyscacheCallback(USERMAPPINGOID,
136 pgfdw_inval_callback, (Datum) 0);
137 }
138
139 /* Set flag that we did GetConnection during the current transaction */
140 xact_got_connection = true;
141
142 /* Create hash key for the entry. Assume no pad bytes in key struct */
143 key = user->umid;
144
145 /*
146 * Find or create cached entry for requested connection.
147 */
148 entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
149 if (!found)
150 {
151 /*
152 * We need only clear "conn" here; remaining fields will be filled
153 * later when "conn" is set.
154 */
155 entry->conn = NULL;
156 }
157
158 /* Reject further use of connections which failed abort cleanup. */
159 pgfdw_reject_incomplete_xact_state_change(entry);
160
161 /*
162 * If the connection needs to be remade due to invalidation, disconnect as
163 * soon as we're out of all transactions.
164 */
165 if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
166 {
167 elog(DEBUG3, "closing connection %p for option changes to take effect",
168 entry->conn);
169 disconnect_pg_server(entry);
170 }
171
172 /*
173 * We don't check the health of cached connection here, because it would
174 * require some overhead. Broken connection will be detected when the
175 * connection is actually used.
176 */
177
178 /*
179 * If cache entry doesn't have a connection, we have to establish a new
180 * connection. (If connect_pg_server throws an error, the cache entry
181 * will remain in a valid empty state, ie conn == NULL.)
182 */
183 if (entry->conn == NULL)
184 {
185 ForeignServer *server = GetForeignServer(user->serverid);
186
187 /* Reset all transient state fields, to be sure all are clean */
188 entry->xact_depth = 0;
189 entry->have_prep_stmt = false;
190 entry->have_error = false;
191 entry->changing_xact_state = false;
192 entry->invalidated = false;
193 entry->server_hashvalue =
194 GetSysCacheHashValue1(FOREIGNSERVEROID,
195 ObjectIdGetDatum(server->serverid));
196 entry->mapping_hashvalue =
197 GetSysCacheHashValue1(USERMAPPINGOID,
198 ObjectIdGetDatum(user->umid));
199
200 /* Now try to make the connection */
201 entry->conn = connect_pg_server(server, user);
202
203 elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
204 entry->conn, server->servername, user->umid, user->userid);
205 }
206
207 /*
208 * Start a new transaction or subtransaction if needed.
209 */
210 begin_remote_xact(entry);
211
212 /* Remember if caller will prepare statements */
213 entry->have_prep_stmt |= will_prep_stmt;
214
215 return entry->conn;
216 }
217
218 /*
219 * Connect to remote server using specified server and user mapping properties.
220 */
221 static PGconn *
connect_pg_server(ForeignServer * server,UserMapping * user)222 connect_pg_server(ForeignServer *server, UserMapping *user)
223 {
224 PGconn *volatile conn = NULL;
225
226 /*
227 * Use PG_TRY block to ensure closing connection on error.
228 */
229 PG_TRY();
230 {
231 const char **keywords;
232 const char **values;
233 int n;
234
235 /*
236 * Construct connection params from generic options of ForeignServer
237 * and UserMapping. (Some of them might not be libpq options, in
238 * which case we'll just waste a few array slots.) Add 3 extra slots
239 * for fallback_application_name, client_encoding, end marker.
240 */
241 n = list_length(server->options) + list_length(user->options) + 3;
242 keywords = (const char **) palloc(n * sizeof(char *));
243 values = (const char **) palloc(n * sizeof(char *));
244
245 n = 0;
246 n += ExtractConnectionOptions(server->options,
247 keywords + n, values + n);
248 n += ExtractConnectionOptions(user->options,
249 keywords + n, values + n);
250
251 /* Use "postgres_fdw" as fallback_application_name. */
252 keywords[n] = "fallback_application_name";
253 values[n] = "postgres_fdw";
254 n++;
255
256 /* Set client_encoding so that libpq can convert encoding properly. */
257 keywords[n] = "client_encoding";
258 values[n] = GetDatabaseEncodingName();
259 n++;
260
261 keywords[n] = values[n] = NULL;
262
263 /* verify connection parameters and make connection */
264 check_conn_params(keywords, values);
265
266 conn = PQconnectdbParams(keywords, values, false);
267 if (!conn || PQstatus(conn) != CONNECTION_OK)
268 ereport(ERROR,
269 (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
270 errmsg("could not connect to server \"%s\"",
271 server->servername),
272 errdetail_internal("%s", pchomp(PQerrorMessage(conn)))));
273
274 /*
275 * Check that non-superuser has used password to establish connection;
276 * otherwise, he's piggybacking on the postgres server's user
277 * identity. See also dblink_security_check() in contrib/dblink.
278 */
279 if (!superuser() && !PQconnectionUsedPassword(conn))
280 ereport(ERROR,
281 (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
282 errmsg("password is required"),
283 errdetail("Non-superuser cannot connect if the server does not request a password."),
284 errhint("Target server's authentication method must be changed.")));
285
286 /* Prepare new session for use */
287 configure_remote_session(conn);
288
289 pfree(keywords);
290 pfree(values);
291 }
292 PG_CATCH();
293 {
294 /* Release PGconn data structure if we managed to create one */
295 if (conn)
296 PQfinish(conn);
297 PG_RE_THROW();
298 }
299 PG_END_TRY();
300
301 return conn;
302 }
303
304 /*
305 * Disconnect any open connection for a connection cache entry.
306 */
307 static void
disconnect_pg_server(ConnCacheEntry * entry)308 disconnect_pg_server(ConnCacheEntry *entry)
309 {
310 if (entry->conn != NULL)
311 {
312 PQfinish(entry->conn);
313 entry->conn = NULL;
314 }
315 }
316
317 /*
318 * For non-superusers, insist that the connstr specify a password. This
319 * prevents a password from being picked up from .pgpass, a service file,
320 * the environment, etc. We don't want the postgres user's passwords
321 * to be accessible to non-superusers. (See also dblink_connstr_check in
322 * contrib/dblink.)
323 */
324 static void
check_conn_params(const char ** keywords,const char ** values)325 check_conn_params(const char **keywords, const char **values)
326 {
327 int i;
328
329 /* no check required if superuser */
330 if (superuser())
331 return;
332
333 /* ok if params contain a non-empty password */
334 for (i = 0; keywords[i] != NULL; i++)
335 {
336 if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
337 return;
338 }
339
340 ereport(ERROR,
341 (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
342 errmsg("password is required"),
343 errdetail("Non-superusers must provide a password in the user mapping.")));
344 }
345
346 /*
347 * Issue SET commands to make sure remote session is configured properly.
348 *
349 * We do this just once at connection, assuming nothing will change the
350 * values later. Since we'll never send volatile function calls to the
351 * remote, there shouldn't be any way to break this assumption from our end.
352 * It's possible to think of ways to break it at the remote end, eg making
353 * a foreign table point to a view that includes a set_config call ---
354 * but once you admit the possibility of a malicious view definition,
355 * there are any number of ways to break things.
356 */
357 static void
configure_remote_session(PGconn * conn)358 configure_remote_session(PGconn *conn)
359 {
360 int remoteversion = PQserverVersion(conn);
361
362 /* Force the search path to contain only pg_catalog (see deparse.c) */
363 do_sql_command(conn, "SET search_path = pg_catalog");
364
365 /*
366 * Set remote timezone; this is basically just cosmetic, since all
367 * transmitted and returned timestamptzs should specify a zone explicitly
368 * anyway. However it makes the regression test outputs more predictable.
369 *
370 * We don't risk setting remote zone equal to ours, since the remote
371 * server might use a different timezone database. Instead, use UTC
372 * (quoted, because very old servers are picky about case).
373 */
374 do_sql_command(conn, "SET timezone = 'UTC'");
375
376 /*
377 * Set values needed to ensure unambiguous data output from remote. (This
378 * logic should match what pg_dump does. See also set_transmission_modes
379 * in postgres_fdw.c.)
380 */
381 do_sql_command(conn, "SET datestyle = ISO");
382 if (remoteversion >= 80400)
383 do_sql_command(conn, "SET intervalstyle = postgres");
384 if (remoteversion >= 90000)
385 do_sql_command(conn, "SET extra_float_digits = 3");
386 else
387 do_sql_command(conn, "SET extra_float_digits = 2");
388 }
389
390 /*
391 * Convenience subroutine to issue a non-data-returning SQL command to remote
392 */
393 static void
do_sql_command(PGconn * conn,const char * sql)394 do_sql_command(PGconn *conn, const char *sql)
395 {
396 PGresult *res;
397
398 if (!PQsendQuery(conn, sql))
399 pgfdw_report_error(ERROR, NULL, conn, false, sql);
400 res = pgfdw_get_result(conn, sql);
401 if (PQresultStatus(res) != PGRES_COMMAND_OK)
402 pgfdw_report_error(ERROR, res, conn, true, sql);
403 PQclear(res);
404 }
405
406 /*
407 * Start remote transaction or subtransaction, if needed.
408 *
409 * Note that we always use at least REPEATABLE READ in the remote session.
410 * This is so that, if a query initiates multiple scans of the same or
411 * different foreign tables, we will get snapshot-consistent results from
412 * those scans. A disadvantage is that we can't provide sane emulation of
413 * READ COMMITTED behavior --- it would be nice if we had some other way to
414 * control which remote queries share a snapshot.
415 */
416 static void
begin_remote_xact(ConnCacheEntry * entry)417 begin_remote_xact(ConnCacheEntry *entry)
418 {
419 int curlevel = GetCurrentTransactionNestLevel();
420
421 /* Start main transaction if we haven't yet */
422 if (entry->xact_depth <= 0)
423 {
424 const char *sql;
425
426 elog(DEBUG3, "starting remote transaction on connection %p",
427 entry->conn);
428
429 if (IsolationIsSerializable())
430 sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
431 else
432 sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
433 entry->changing_xact_state = true;
434 do_sql_command(entry->conn, sql);
435 entry->xact_depth = 1;
436 entry->changing_xact_state = false;
437 }
438
439 /*
440 * If we're in a subtransaction, stack up savepoints to match our level.
441 * This ensures we can rollback just the desired effects when a
442 * subtransaction aborts.
443 */
444 while (entry->xact_depth < curlevel)
445 {
446 char sql[64];
447
448 snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
449 entry->changing_xact_state = true;
450 do_sql_command(entry->conn, sql);
451 entry->xact_depth++;
452 entry->changing_xact_state = false;
453 }
454 }
455
456 /*
457 * Release connection reference count created by calling GetConnection.
458 */
459 void
ReleaseConnection(PGconn * conn)460 ReleaseConnection(PGconn *conn)
461 {
462 /*
463 * Currently, we don't actually track connection references because all
464 * cleanup is managed on a transaction or subtransaction basis instead. So
465 * there's nothing to do here.
466 */
467 }
468
469 /*
470 * Assign a "unique" number for a cursor.
471 *
472 * These really only need to be unique per connection within a transaction.
473 * For the moment we ignore the per-connection point and assign them across
474 * all connections in the transaction, but we ask for the connection to be
475 * supplied in case we want to refine that.
476 *
477 * Note that even if wraparound happens in a very long transaction, actual
478 * collisions are highly improbable; just be sure to use %u not %d to print.
479 */
480 unsigned int
GetCursorNumber(PGconn * conn)481 GetCursorNumber(PGconn *conn)
482 {
483 return ++cursor_number;
484 }
485
486 /*
487 * Assign a "unique" number for a prepared statement.
488 *
489 * This works much like GetCursorNumber, except that we never reset the counter
490 * within a session. That's because we can't be 100% sure we've gotten rid
491 * of all prepared statements on all connections, and it's not really worth
492 * increasing the risk of prepared-statement name collisions by resetting.
493 */
494 unsigned int
GetPrepStmtNumber(PGconn * conn)495 GetPrepStmtNumber(PGconn *conn)
496 {
497 return ++prep_stmt_number;
498 }
499
500 /*
501 * Submit a query and wait for the result.
502 *
503 * This function is interruptible by signals.
504 *
505 * Caller is responsible for the error handling on the result.
506 */
507 PGresult *
pgfdw_exec_query(PGconn * conn,const char * query)508 pgfdw_exec_query(PGconn *conn, const char *query)
509 {
510 /*
511 * Submit a query. Since we don't use non-blocking mode, this also can
512 * block. But its risk is relatively small, so we ignore that for now.
513 */
514 if (!PQsendQuery(conn, query))
515 pgfdw_report_error(ERROR, NULL, conn, false, query);
516
517 /* Wait for the result. */
518 return pgfdw_get_result(conn, query);
519 }
520
521 /*
522 * Wait for the result from a prior asynchronous execution function call.
523 *
524 * This function offers quick responsiveness by checking for any interruptions.
525 *
526 * This function emulates PQexec()'s behavior of returning the last result
527 * when there are many.
528 *
529 * Caller is responsible for the error handling on the result.
530 */
531 PGresult *
pgfdw_get_result(PGconn * conn,const char * query)532 pgfdw_get_result(PGconn *conn, const char *query)
533 {
534 PGresult *volatile last_res = NULL;
535
536 /* In what follows, do not leak any PGresults on an error. */
537 PG_TRY();
538 {
539 for (;;)
540 {
541 PGresult *res;
542
543 while (PQisBusy(conn))
544 {
545 int wc;
546
547 /* Sleep until there's something to do */
548 wc = WaitLatchOrSocket(MyLatch,
549 WL_LATCH_SET | WL_SOCKET_READABLE,
550 PQsocket(conn),
551 -1L, PG_WAIT_EXTENSION);
552 ResetLatch(MyLatch);
553
554 CHECK_FOR_INTERRUPTS();
555
556 /* Data available in socket? */
557 if (wc & WL_SOCKET_READABLE)
558 {
559 if (!PQconsumeInput(conn))
560 pgfdw_report_error(ERROR, NULL, conn, false, query);
561 }
562 }
563
564 res = PQgetResult(conn);
565 if (res == NULL)
566 break; /* query is complete */
567
568 PQclear(last_res);
569 last_res = res;
570 }
571 }
572 PG_CATCH();
573 {
574 PQclear(last_res);
575 PG_RE_THROW();
576 }
577 PG_END_TRY();
578
579 return last_res;
580 }
581
582 /*
583 * Report an error we got from the remote server.
584 *
585 * elevel: error level to use (typically ERROR, but might be less)
586 * res: PGresult containing the error
587 * conn: connection we did the query on
588 * clear: if true, PQclear the result (otherwise caller will handle it)
589 * sql: NULL, or text of remote command we tried to execute
590 *
591 * Note: callers that choose not to throw ERROR for a remote error are
592 * responsible for making sure that the associated ConnCacheEntry gets
593 * marked with have_error = true.
594 */
595 void
pgfdw_report_error(int elevel,PGresult * res,PGconn * conn,bool clear,const char * sql)596 pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
597 bool clear, const char *sql)
598 {
599 /* If requested, PGresult must be released before leaving this function. */
600 PG_TRY();
601 {
602 char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
603 char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
604 char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
605 char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
606 char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
607 int sqlstate;
608
609 if (diag_sqlstate)
610 sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
611 diag_sqlstate[1],
612 diag_sqlstate[2],
613 diag_sqlstate[3],
614 diag_sqlstate[4]);
615 else
616 sqlstate = ERRCODE_CONNECTION_FAILURE;
617
618 /*
619 * If we don't get a message from the PGresult, try the PGconn. This
620 * is needed because for connection-level failures, PQexec may just
621 * return NULL, not a PGresult at all.
622 */
623 if (message_primary == NULL)
624 message_primary = pchomp(PQerrorMessage(conn));
625
626 ereport(elevel,
627 (errcode(sqlstate),
628 message_primary ? errmsg_internal("%s", message_primary) :
629 errmsg("could not obtain message string for remote error"),
630 message_detail ? errdetail_internal("%s", message_detail) : 0,
631 message_hint ? errhint("%s", message_hint) : 0,
632 message_context ? errcontext("%s", message_context) : 0,
633 sql ? errcontext("Remote SQL command: %s", sql) : 0));
634 }
635 PG_CATCH();
636 {
637 if (clear)
638 PQclear(res);
639 PG_RE_THROW();
640 }
641 PG_END_TRY();
642 if (clear)
643 PQclear(res);
644 }
645
646 /*
647 * pgfdw_xact_callback --- cleanup at main-transaction end.
648 *
649 * This runs just late enough that it must not enter user-defined code
650 * locally. (Entering such code on the remote side is fine. Its remote
651 * COMMIT TRANSACTION may run deferred triggers.)
652 */
653 static void
pgfdw_xact_callback(XactEvent event,void * arg)654 pgfdw_xact_callback(XactEvent event, void *arg)
655 {
656 HASH_SEQ_STATUS scan;
657 ConnCacheEntry *entry;
658
659 /* Quick exit if no connections were touched in this transaction. */
660 if (!xact_got_connection)
661 return;
662
663 /*
664 * Scan all connection cache entries to find open remote transactions, and
665 * close them.
666 */
667 hash_seq_init(&scan, ConnectionHash);
668 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
669 {
670 PGresult *res;
671
672 /* Ignore cache entry if no open connection right now */
673 if (entry->conn == NULL)
674 continue;
675
676 /* If it has an open remote transaction, try to close it */
677 if (entry->xact_depth > 0)
678 {
679 bool abort_cleanup_failure = false;
680
681 elog(DEBUG3, "closing remote transaction on connection %p",
682 entry->conn);
683
684 switch (event)
685 {
686 case XACT_EVENT_PARALLEL_PRE_COMMIT:
687 case XACT_EVENT_PRE_COMMIT:
688
689 /*
690 * If abort cleanup previously failed for this connection,
691 * we can't issue any more commands against it.
692 */
693 pgfdw_reject_incomplete_xact_state_change(entry);
694
695 /* Commit all remote transactions during pre-commit */
696 entry->changing_xact_state = true;
697 do_sql_command(entry->conn, "COMMIT TRANSACTION");
698 entry->changing_xact_state = false;
699
700 /*
701 * If there were any errors in subtransactions, and we
702 * made prepared statements, do a DEALLOCATE ALL to make
703 * sure we get rid of all prepared statements. This is
704 * annoying and not terribly bulletproof, but it's
705 * probably not worth trying harder.
706 *
707 * DEALLOCATE ALL only exists in 8.3 and later, so this
708 * constrains how old a server postgres_fdw can
709 * communicate with. We intentionally ignore errors in
710 * the DEALLOCATE, so that we can hobble along to some
711 * extent with older servers (leaking prepared statements
712 * as we go; but we don't really support update operations
713 * pre-8.3 anyway).
714 */
715 if (entry->have_prep_stmt && entry->have_error)
716 {
717 res = PQexec(entry->conn, "DEALLOCATE ALL");
718 PQclear(res);
719 }
720 entry->have_prep_stmt = false;
721 entry->have_error = false;
722 break;
723 case XACT_EVENT_PRE_PREPARE:
724
725 /*
726 * We disallow any remote transactions, since it's not
727 * very reasonable to hold them open until the prepared
728 * transaction is committed. For the moment, throw error
729 * unconditionally; later we might allow read-only cases.
730 * Note that the error will cause us to come right back
731 * here with event == XACT_EVENT_ABORT, so we'll clean up
732 * the connection state at that point.
733 */
734 ereport(ERROR,
735 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
736 errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
737 break;
738 case XACT_EVENT_PARALLEL_COMMIT:
739 case XACT_EVENT_COMMIT:
740 case XACT_EVENT_PREPARE:
741 /* Pre-commit should have closed the open transaction */
742 elog(ERROR, "missed cleaning up connection during pre-commit");
743 break;
744 case XACT_EVENT_PARALLEL_ABORT:
745 case XACT_EVENT_ABORT:
746
747 /*
748 * Don't try to clean up the connection if we're already
749 * in error recursion trouble.
750 */
751 if (in_error_recursion_trouble())
752 entry->changing_xact_state = true;
753
754 /*
755 * If connection is already unsalvageable, don't touch it
756 * further.
757 */
758 if (entry->changing_xact_state)
759 break;
760
761 /*
762 * Mark this connection as in the process of changing
763 * transaction state.
764 */
765 entry->changing_xact_state = true;
766
767 /* Assume we might have lost track of prepared statements */
768 entry->have_error = true;
769
770 /*
771 * If a command has been submitted to the remote server by
772 * using an asynchronous execution function, the command
773 * might not have yet completed. Check to see if a
774 * command is still being processed by the remote server,
775 * and if so, request cancellation of the command.
776 */
777 if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
778 !pgfdw_cancel_query(entry->conn))
779 {
780 /* Unable to cancel running query. */
781 abort_cleanup_failure = true;
782 }
783 else if (!pgfdw_exec_cleanup_query(entry->conn,
784 "ABORT TRANSACTION",
785 false))
786 {
787 /* Unable to abort remote transaction. */
788 abort_cleanup_failure = true;
789 }
790 else if (entry->have_prep_stmt && entry->have_error &&
791 !pgfdw_exec_cleanup_query(entry->conn,
792 "DEALLOCATE ALL",
793 true))
794 {
795 /* Trouble clearing prepared statements. */
796 abort_cleanup_failure = true;
797 }
798 else
799 {
800 entry->have_prep_stmt = false;
801 entry->have_error = false;
802 }
803
804 /* Disarm changing_xact_state if it all worked. */
805 entry->changing_xact_state = abort_cleanup_failure;
806 break;
807 }
808 }
809
810 /* Reset state to show we're out of a transaction */
811 entry->xact_depth = 0;
812
813 /*
814 * If the connection isn't in a good idle state or it is marked as
815 * invalid, then discard it to recover. Next GetConnection will open a
816 * new connection.
817 */
818 if (PQstatus(entry->conn) != CONNECTION_OK ||
819 PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
820 entry->changing_xact_state ||
821 entry->invalidated)
822 {
823 elog(DEBUG3, "discarding connection %p", entry->conn);
824 disconnect_pg_server(entry);
825 }
826 }
827
828 /*
829 * Regardless of the event type, we can now mark ourselves as out of the
830 * transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
831 * this saves a useless scan of the hashtable during COMMIT or PREPARE.)
832 */
833 xact_got_connection = false;
834
835 /* Also reset cursor numbering for next transaction */
836 cursor_number = 0;
837 }
838
839 /*
840 * pgfdw_subxact_callback --- cleanup at subtransaction end.
841 */
842 static void
pgfdw_subxact_callback(SubXactEvent event,SubTransactionId mySubid,SubTransactionId parentSubid,void * arg)843 pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
844 SubTransactionId parentSubid, void *arg)
845 {
846 HASH_SEQ_STATUS scan;
847 ConnCacheEntry *entry;
848 int curlevel;
849
850 /* Nothing to do at subxact start, nor after commit. */
851 if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
852 event == SUBXACT_EVENT_ABORT_SUB))
853 return;
854
855 /* Quick exit if no connections were touched in this transaction. */
856 if (!xact_got_connection)
857 return;
858
859 /*
860 * Scan all connection cache entries to find open remote subtransactions
861 * of the current level, and close them.
862 */
863 curlevel = GetCurrentTransactionNestLevel();
864 hash_seq_init(&scan, ConnectionHash);
865 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
866 {
867 char sql[100];
868
869 /*
870 * We only care about connections with open remote subtransactions of
871 * the current level.
872 */
873 if (entry->conn == NULL || entry->xact_depth < curlevel)
874 continue;
875
876 if (entry->xact_depth > curlevel)
877 elog(ERROR, "missed cleaning up remote subtransaction at level %d",
878 entry->xact_depth);
879
880 if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
881 {
882 /*
883 * If abort cleanup previously failed for this connection, we
884 * can't issue any more commands against it.
885 */
886 pgfdw_reject_incomplete_xact_state_change(entry);
887
888 /* Commit all remote subtransactions during pre-commit */
889 snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
890 entry->changing_xact_state = true;
891 do_sql_command(entry->conn, sql);
892 entry->changing_xact_state = false;
893 }
894 else if (in_error_recursion_trouble())
895 {
896 /*
897 * Don't try to clean up the connection if we're already in error
898 * recursion trouble.
899 */
900 entry->changing_xact_state = true;
901 }
902 else if (!entry->changing_xact_state)
903 {
904 bool abort_cleanup_failure = false;
905
906 /* Remember that abort cleanup is in progress. */
907 entry->changing_xact_state = true;
908
909 /* Assume we might have lost track of prepared statements */
910 entry->have_error = true;
911
912 /*
913 * If a command has been submitted to the remote server by using
914 * an asynchronous execution function, the command might not have
915 * yet completed. Check to see if a command is still being
916 * processed by the remote server, and if so, request cancellation
917 * of the command.
918 */
919 if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
920 !pgfdw_cancel_query(entry->conn))
921 abort_cleanup_failure = true;
922 else
923 {
924 /* Rollback all remote subtransactions during abort */
925 snprintf(sql, sizeof(sql),
926 "ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d",
927 curlevel, curlevel);
928 if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
929 abort_cleanup_failure = true;
930 }
931
932 /* Disarm changing_xact_state if it all worked. */
933 entry->changing_xact_state = abort_cleanup_failure;
934 }
935
936 /* OK, we're outta that level of subtransaction */
937 entry->xact_depth--;
938 }
939 }
940
941 /*
942 * Connection invalidation callback function
943 *
944 * After a change to a pg_foreign_server or pg_user_mapping catalog entry,
945 * close connections depending on that entry immediately if current transaction
946 * has not used those connections yet. Otherwise, mark those connections as
947 * invalid and then make pgfdw_xact_callback() close them at the end of current
948 * transaction, since they cannot be closed in the midst of the transaction
949 * using them. Closed connections will be remade at the next opportunity if
950 * necessary.
951 *
952 * Although most cache invalidation callbacks blow away all the related stuff
953 * regardless of the given hashvalue, connections are expensive enough that
954 * it's worth trying to avoid that.
955 *
956 * NB: We could avoid unnecessary disconnection more strictly by examining
957 * individual option values, but it seems too much effort for the gain.
958 */
959 static void
pgfdw_inval_callback(Datum arg,int cacheid,uint32 hashvalue)960 pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
961 {
962 HASH_SEQ_STATUS scan;
963 ConnCacheEntry *entry;
964
965 Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
966
967 /* ConnectionHash must exist already, if we're registered */
968 hash_seq_init(&scan, ConnectionHash);
969 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
970 {
971 /* Ignore invalid entries */
972 if (entry->conn == NULL)
973 continue;
974
975 /* hashvalue == 0 means a cache reset, must clear all state */
976 if (hashvalue == 0 ||
977 (cacheid == FOREIGNSERVEROID &&
978 entry->server_hashvalue == hashvalue) ||
979 (cacheid == USERMAPPINGOID &&
980 entry->mapping_hashvalue == hashvalue))
981 {
982 /*
983 * Close the connection immediately if it's not used yet in this
984 * transaction. Otherwise mark it as invalid so that
985 * pgfdw_xact_callback() can close it at the end of this
986 * transaction.
987 */
988 if (entry->xact_depth == 0)
989 {
990 elog(DEBUG3, "discarding connection %p", entry->conn);
991 disconnect_pg_server(entry);
992 }
993 else
994 entry->invalidated = true;
995 }
996 }
997 }
998
999 /*
1000 * Raise an error if the given connection cache entry is marked as being
1001 * in the middle of an xact state change. This should be called at which no
1002 * such change is expected to be in progress; if one is found to be in
1003 * progress, it means that we aborted in the middle of a previous state change
1004 * and now don't know what the remote transaction state actually is.
1005 * Such connections can't safely be further used. Re-establishing the
1006 * connection would change the snapshot and roll back any writes already
1007 * performed, so that's not an option, either. Thus, we must abort.
1008 */
1009 static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry * entry)1010 pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
1011 {
1012 HeapTuple tup;
1013 Form_pg_user_mapping umform;
1014 ForeignServer *server;
1015
1016 /* nothing to do for inactive entries and entries of sane state */
1017 if (entry->conn == NULL || !entry->changing_xact_state)
1018 return;
1019
1020 /* make sure this entry is inactive */
1021 disconnect_pg_server(entry);
1022
1023 /* find server name to be shown in the message below */
1024 tup = SearchSysCache1(USERMAPPINGOID,
1025 ObjectIdGetDatum(entry->key));
1026 if (!HeapTupleIsValid(tup))
1027 elog(ERROR, "cache lookup failed for user mapping %u", entry->key);
1028 umform = (Form_pg_user_mapping) GETSTRUCT(tup);
1029 server = GetForeignServer(umform->umserver);
1030 ReleaseSysCache(tup);
1031
1032 ereport(ERROR,
1033 (errcode(ERRCODE_CONNECTION_EXCEPTION),
1034 errmsg("connection to server \"%s\" was lost",
1035 server->servername)));
1036 }
1037
1038 /*
1039 * Cancel the currently-in-progress query (whose query text we do not have)
1040 * and ignore the result. Returns true if we successfully cancel the query
1041 * and discard any pending result, and false if not.
1042 *
1043 * It's not a huge problem if we throw an ERROR here, but if we get into error
1044 * recursion trouble, we'll end up slamming the connection shut, which will
1045 * necessitate failing the entire toplevel transaction even if subtransactions
1046 * were used. Try to use WARNING where we can.
1047 */
1048 static bool
pgfdw_cancel_query(PGconn * conn)1049 pgfdw_cancel_query(PGconn *conn)
1050 {
1051 PGcancel *cancel;
1052 char errbuf[256];
1053 PGresult *result = NULL;
1054 TimestampTz endtime;
1055
1056 /*
1057 * If it takes too long to cancel the query and discard the result, assume
1058 * the connection is dead.
1059 */
1060 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1061
1062 /*
1063 * Issue cancel request. Unfortunately, there's no good way to limit the
1064 * amount of time that we might block inside PQgetCancel().
1065 */
1066 if ((cancel = PQgetCancel(conn)))
1067 {
1068 if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
1069 {
1070 ereport(WARNING,
1071 (errcode(ERRCODE_CONNECTION_FAILURE),
1072 errmsg("could not send cancel request: %s",
1073 errbuf)));
1074 PQfreeCancel(cancel);
1075 return false;
1076 }
1077 PQfreeCancel(cancel);
1078 }
1079
1080 /* Get and discard the result of the query. */
1081 if (pgfdw_get_cleanup_result(conn, endtime, &result))
1082 return false;
1083 PQclear(result);
1084
1085 return true;
1086 }
1087
1088 /*
1089 * Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
1090 * result. If the query is executed without error, the return value is true.
1091 * If the query is executed successfully but returns an error, the return
1092 * value is true if and only if ignore_errors is set. If the query can't be
1093 * sent or times out, the return value is false.
1094 *
1095 * It's not a huge problem if we throw an ERROR here, but if we get into error
1096 * recursion trouble, we'll end up slamming the connection shut, which will
1097 * necessitate failing the entire toplevel transaction even if subtransactions
1098 * were used. Try to use WARNING where we can.
1099 */
1100 static bool
pgfdw_exec_cleanup_query(PGconn * conn,const char * query,bool ignore_errors)1101 pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
1102 {
1103 PGresult *result = NULL;
1104 TimestampTz endtime;
1105
1106 /*
1107 * If it takes too long to execute a cleanup query, assume the connection
1108 * is dead. It's fairly likely that this is why we aborted in the first
1109 * place (e.g. statement timeout, user cancel), so the timeout shouldn't
1110 * be too long.
1111 */
1112 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1113
1114 /*
1115 * Submit a query. Since we don't use non-blocking mode, this also can
1116 * block. But its risk is relatively small, so we ignore that for now.
1117 */
1118 if (!PQsendQuery(conn, query))
1119 {
1120 pgfdw_report_error(WARNING, NULL, conn, false, query);
1121 return false;
1122 }
1123
1124 /* Get the result of the query. */
1125 if (pgfdw_get_cleanup_result(conn, endtime, &result))
1126 return false;
1127
1128 /* Issue a warning if not successful. */
1129 if (PQresultStatus(result) != PGRES_COMMAND_OK)
1130 {
1131 pgfdw_report_error(WARNING, result, conn, true, query);
1132 return ignore_errors;
1133 }
1134 PQclear(result);
1135
1136 return true;
1137 }
1138
1139 /*
1140 * Get, during abort cleanup, the result of a query that is in progress. This
1141 * might be a query that is being interrupted by transaction abort, or it might
1142 * be a query that was initiated as part of transaction abort to get the remote
1143 * side back to the appropriate state.
1144 *
1145 * endtime is the time at which we should give up and assume the remote
1146 * side is dead. Returns true if the timeout expired, otherwise false.
1147 * Sets *result except in case of a timeout.
1148 */
1149 static bool
pgfdw_get_cleanup_result(PGconn * conn,TimestampTz endtime,PGresult ** result)1150 pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
1151 {
1152 volatile bool timed_out = false;
1153 PGresult *volatile last_res = NULL;
1154
1155 /* In what follows, do not leak any PGresults on an error. */
1156 PG_TRY();
1157 {
1158 for (;;)
1159 {
1160 PGresult *res;
1161
1162 while (PQisBusy(conn))
1163 {
1164 int wc;
1165 TimestampTz now = GetCurrentTimestamp();
1166 long cur_timeout;
1167
1168 /* If timeout has expired, give up, else get sleep time. */
1169 cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1170 if (cur_timeout <= 0)
1171 {
1172 timed_out = true;
1173 goto exit;
1174 }
1175
1176 /* Sleep until there's something to do */
1177 wc = WaitLatchOrSocket(MyLatch,
1178 WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT,
1179 PQsocket(conn),
1180 cur_timeout, PG_WAIT_EXTENSION);
1181 ResetLatch(MyLatch);
1182
1183 CHECK_FOR_INTERRUPTS();
1184
1185 /* Data available in socket? */
1186 if (wc & WL_SOCKET_READABLE)
1187 {
1188 if (!PQconsumeInput(conn))
1189 {
1190 /* connection trouble; treat the same as a timeout */
1191 timed_out = true;
1192 goto exit;
1193 }
1194 }
1195 }
1196
1197 res = PQgetResult(conn);
1198 if (res == NULL)
1199 break; /* query is complete */
1200
1201 PQclear(last_res);
1202 last_res = res;
1203 }
1204 exit: ;
1205 }
1206 PG_CATCH();
1207 {
1208 PQclear(last_res);
1209 PG_RE_THROW();
1210 }
1211 PG_END_TRY();
1212
1213 if (timed_out)
1214 PQclear(last_res);
1215 else
1216 *result = last_res;
1217 return timed_out;
1218 }
1219