1 /*-------------------------------------------------------------------------
2 *
3 * connection.c
4 * Connection management functions for postgres_fdw
5 *
6 * Portions Copyright (c) 2012-2016, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/postgres_fdw/connection.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "postgres_fdw.h"
16
17 #include "access/htup_details.h"
18 #include "catalog/pg_user_mapping.h"
19 #include "access/xact.h"
20 #include "mb/pg_wchar.h"
21 #include "miscadmin.h"
22 #include "storage/latch.h"
23 #include "utils/hsearch.h"
24 #include "utils/inval.h"
25 #include "utils/memutils.h"
26 #include "utils/syscache.h"
27
28
29 /*
30 * Connection cache hash table entry
31 *
32 * The lookup key in this hash table is the user mapping OID. We use just one
33 * connection per user mapping ID, which ensures that all the scans use the
34 * same snapshot during a query. Using the user mapping OID rather than
35 * the foreign server OID + user OID avoids creating multiple connections when
36 * the public user mapping applies to all user OIDs.
37 *
38 * The "conn" pointer can be NULL if we don't currently have a live connection.
39 * When we do have a connection, xact_depth tracks the current depth of
40 * transactions and subtransactions open on the remote side. We need to issue
41 * commands at the same nesting depth on the remote as we're executing at
42 * ourselves, so that rolling back a subtransaction will kill the right
43 * queries and not the wrong ones.
44 */
45 typedef Oid ConnCacheKey;
46
47 typedef struct ConnCacheEntry
48 {
49 ConnCacheKey key; /* hash key (must be first) */
50 PGconn *conn; /* connection to foreign server, or NULL */
51 /* Remaining fields are invalid when conn is NULL: */
52 int xact_depth; /* 0 = no xact open, 1 = main xact open, 2 =
53 * one level of subxact open, etc */
54 bool have_prep_stmt; /* have we prepared any stmts in this xact? */
55 bool have_error; /* have any subxacts aborted in this xact? */
56 bool changing_xact_state; /* xact state change in process */
57 bool invalidated; /* true if reconnect is pending */
58 uint32 server_hashvalue; /* hash value of foreign server OID */
59 uint32 mapping_hashvalue; /* hash value of user mapping OID */
60 } ConnCacheEntry;
61
62 /*
63 * Connection cache (initialized on first use)
64 */
65 static HTAB *ConnectionHash = NULL;
66
67 /* for assigning cursor numbers and prepared statement numbers */
68 static unsigned int cursor_number = 0;
69 static unsigned int prep_stmt_number = 0;
70
71 /* tracks whether any work is needed in callback functions */
72 static bool xact_got_connection = false;
73
74 /* prototypes of private functions */
75 static PGconn *connect_pg_server(ForeignServer *server, UserMapping *user);
76 static void disconnect_pg_server(ConnCacheEntry *entry);
77 static void check_conn_params(const char **keywords, const char **values);
78 static void configure_remote_session(PGconn *conn);
79 static void do_sql_command(PGconn *conn, const char *sql);
80 static void begin_remote_xact(ConnCacheEntry *entry);
81 static void pgfdw_xact_callback(XactEvent event, void *arg);
82 static void pgfdw_subxact_callback(SubXactEvent event,
83 SubTransactionId mySubid,
84 SubTransactionId parentSubid,
85 void *arg);
86 static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
87 static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
88 static bool pgfdw_cancel_query(PGconn *conn);
89 static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query,
90 bool ignore_errors);
91 static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime,
92 PGresult **result);
93
94
95 /*
96 * Get a PGconn which can be used to execute queries on the remote PostgreSQL
97 * server with the user's authorization. A new connection is established
98 * if we don't already have a suitable one, and a transaction is opened at
99 * the right subtransaction nesting depth if we didn't do that already.
100 *
101 * will_prep_stmt must be true if caller intends to create any prepared
102 * statements. Since those don't go away automatically at transaction end
103 * (not even on error), we need this flag to cue manual cleanup.
104 */
105 PGconn *
GetConnection(UserMapping * user,bool will_prep_stmt)106 GetConnection(UserMapping *user, bool will_prep_stmt)
107 {
108 bool found;
109 ConnCacheEntry *entry;
110 ConnCacheKey key;
111
112 /* First time through, initialize connection cache hashtable */
113 if (ConnectionHash == NULL)
114 {
115 HASHCTL ctl;
116
117 MemSet(&ctl, 0, sizeof(ctl));
118 ctl.keysize = sizeof(ConnCacheKey);
119 ctl.entrysize = sizeof(ConnCacheEntry);
120 /* allocate ConnectionHash in the cache context */
121 ctl.hcxt = CacheMemoryContext;
122 ConnectionHash = hash_create("postgres_fdw connections", 8,
123 &ctl,
124 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
125
126 /*
127 * Register some callback functions that manage connection cleanup.
128 * This should be done just once in each backend.
129 */
130 RegisterXactCallback(pgfdw_xact_callback, NULL);
131 RegisterSubXactCallback(pgfdw_subxact_callback, NULL);
132 CacheRegisterSyscacheCallback(FOREIGNSERVEROID,
133 pgfdw_inval_callback, (Datum) 0);
134 CacheRegisterSyscacheCallback(USERMAPPINGOID,
135 pgfdw_inval_callback, (Datum) 0);
136 }
137
138 /* Set flag that we did GetConnection during the current transaction */
139 xact_got_connection = true;
140
141 /* Create hash key for the entry. Assume no pad bytes in key struct */
142 key = user->umid;
143
144 /*
145 * Find or create cached entry for requested connection.
146 */
147 entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found);
148 if (!found)
149 {
150 /*
151 * We need only clear "conn" here; remaining fields will be filled
152 * later when "conn" is set.
153 */
154 entry->conn = NULL;
155 }
156
157 /* Reject further use of connections which failed abort cleanup. */
158 pgfdw_reject_incomplete_xact_state_change(entry);
159
160 /*
161 * If the connection needs to be remade due to invalidation, disconnect as
162 * soon as we're out of all transactions.
163 */
164 if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0)
165 {
166 elog(DEBUG3, "closing connection %p for option changes to take effect",
167 entry->conn);
168 disconnect_pg_server(entry);
169 }
170
171 /*
172 * We don't check the health of cached connection here, because it would
173 * require some overhead. Broken connection will be detected when the
174 * connection is actually used.
175 */
176
177 /*
178 * If cache entry doesn't have a connection, we have to establish a new
179 * connection. (If connect_pg_server throws an error, the cache entry
180 * will remain in a valid empty state, ie conn == NULL.)
181 */
182 if (entry->conn == NULL)
183 {
184 ForeignServer *server = GetForeignServer(user->serverid);
185
186 /* Reset all transient state fields, to be sure all are clean */
187 entry->xact_depth = 0;
188 entry->have_prep_stmt = false;
189 entry->have_error = false;
190 entry->changing_xact_state = false;
191 entry->invalidated = false;
192 entry->server_hashvalue =
193 GetSysCacheHashValue1(FOREIGNSERVEROID,
194 ObjectIdGetDatum(server->serverid));
195 entry->mapping_hashvalue =
196 GetSysCacheHashValue1(USERMAPPINGOID,
197 ObjectIdGetDatum(user->umid));
198
199 /* Now try to make the connection */
200 entry->conn = connect_pg_server(server, user);
201
202 elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)",
203 entry->conn, server->servername, user->umid, user->userid);
204 }
205
206 /*
207 * Start a new transaction or subtransaction if needed.
208 */
209 begin_remote_xact(entry);
210
211 /* Remember if caller will prepare statements */
212 entry->have_prep_stmt |= will_prep_stmt;
213
214 return entry->conn;
215 }
216
217 /*
218 * Connect to remote server using specified server and user mapping properties.
219 */
220 static PGconn *
connect_pg_server(ForeignServer * server,UserMapping * user)221 connect_pg_server(ForeignServer *server, UserMapping *user)
222 {
223 PGconn *volatile conn = NULL;
224
225 /*
226 * Use PG_TRY block to ensure closing connection on error.
227 */
228 PG_TRY();
229 {
230 const char **keywords;
231 const char **values;
232 int n;
233
234 /*
235 * Construct connection params from generic options of ForeignServer
236 * and UserMapping. (Some of them might not be libpq options, in
237 * which case we'll just waste a few array slots.) Add 3 extra slots
238 * for fallback_application_name, client_encoding, end marker.
239 */
240 n = list_length(server->options) + list_length(user->options) + 3;
241 keywords = (const char **) palloc(n * sizeof(char *));
242 values = (const char **) palloc(n * sizeof(char *));
243
244 n = 0;
245 n += ExtractConnectionOptions(server->options,
246 keywords + n, values + n);
247 n += ExtractConnectionOptions(user->options,
248 keywords + n, values + n);
249
250 /* Use "postgres_fdw" as fallback_application_name. */
251 keywords[n] = "fallback_application_name";
252 values[n] = "postgres_fdw";
253 n++;
254
255 /* Set client_encoding so that libpq can convert encoding properly. */
256 keywords[n] = "client_encoding";
257 values[n] = GetDatabaseEncodingName();
258 n++;
259
260 keywords[n] = values[n] = NULL;
261
262 /* verify connection parameters and make connection */
263 check_conn_params(keywords, values);
264
265 conn = PQconnectdbParams(keywords, values, false);
266 if (!conn || PQstatus(conn) != CONNECTION_OK)
267 {
268 char *connmessage;
269 int msglen;
270
271 /* libpq typically appends a newline, strip that */
272 connmessage = pstrdup(PQerrorMessage(conn));
273 msglen = strlen(connmessage);
274 if (msglen > 0 && connmessage[msglen - 1] == '\n')
275 connmessage[msglen - 1] = '\0';
276 ereport(ERROR,
277 (errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
278 errmsg("could not connect to server \"%s\"",
279 server->servername),
280 errdetail_internal("%s", connmessage)));
281 }
282
283 /*
284 * Check that non-superuser has used password to establish connection;
285 * otherwise, he's piggybacking on the postgres server's user
286 * identity. See also dblink_security_check() in contrib/dblink.
287 */
288 if (!superuser() && !PQconnectionUsedPassword(conn))
289 ereport(ERROR,
290 (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
291 errmsg("password is required"),
292 errdetail("Non-superuser cannot connect if the server does not request a password."),
293 errhint("Target server's authentication method must be changed.")));
294
295 /* Prepare new session for use */
296 configure_remote_session(conn);
297
298 pfree(keywords);
299 pfree(values);
300 }
301 PG_CATCH();
302 {
303 /* Release PGconn data structure if we managed to create one */
304 if (conn)
305 PQfinish(conn);
306 PG_RE_THROW();
307 }
308 PG_END_TRY();
309
310 return conn;
311 }
312
313 /*
314 * Disconnect any open connection for a connection cache entry.
315 */
316 static void
disconnect_pg_server(ConnCacheEntry * entry)317 disconnect_pg_server(ConnCacheEntry *entry)
318 {
319 if (entry->conn != NULL)
320 {
321 PQfinish(entry->conn);
322 entry->conn = NULL;
323 }
324 }
325
326 /*
327 * For non-superusers, insist that the connstr specify a password. This
328 * prevents a password from being picked up from .pgpass, a service file,
329 * the environment, etc. We don't want the postgres user's passwords
330 * to be accessible to non-superusers. (See also dblink_connstr_check in
331 * contrib/dblink.)
332 */
333 static void
check_conn_params(const char ** keywords,const char ** values)334 check_conn_params(const char **keywords, const char **values)
335 {
336 int i;
337
338 /* no check required if superuser */
339 if (superuser())
340 return;
341
342 /* ok if params contain a non-empty password */
343 for (i = 0; keywords[i] != NULL; i++)
344 {
345 if (strcmp(keywords[i], "password") == 0 && values[i][0] != '\0')
346 return;
347 }
348
349 ereport(ERROR,
350 (errcode(ERRCODE_S_R_E_PROHIBITED_SQL_STATEMENT_ATTEMPTED),
351 errmsg("password is required"),
352 errdetail("Non-superusers must provide a password in the user mapping.")));
353 }
354
355 /*
356 * Issue SET commands to make sure remote session is configured properly.
357 *
358 * We do this just once at connection, assuming nothing will change the
359 * values later. Since we'll never send volatile function calls to the
360 * remote, there shouldn't be any way to break this assumption from our end.
361 * It's possible to think of ways to break it at the remote end, eg making
362 * a foreign table point to a view that includes a set_config call ---
363 * but once you admit the possibility of a malicious view definition,
364 * there are any number of ways to break things.
365 */
366 static void
configure_remote_session(PGconn * conn)367 configure_remote_session(PGconn *conn)
368 {
369 int remoteversion = PQserverVersion(conn);
370
371 /* Force the search path to contain only pg_catalog (see deparse.c) */
372 do_sql_command(conn, "SET search_path = pg_catalog");
373
374 /*
375 * Set remote timezone; this is basically just cosmetic, since all
376 * transmitted and returned timestamptzs should specify a zone explicitly
377 * anyway. However it makes the regression test outputs more predictable.
378 *
379 * We don't risk setting remote zone equal to ours, since the remote
380 * server might use a different timezone database. Instead, use UTC
381 * (quoted, because very old servers are picky about case).
382 */
383 do_sql_command(conn, "SET timezone = 'UTC'");
384
385 /*
386 * Set values needed to ensure unambiguous data output from remote. (This
387 * logic should match what pg_dump does. See also set_transmission_modes
388 * in postgres_fdw.c.)
389 */
390 do_sql_command(conn, "SET datestyle = ISO");
391 if (remoteversion >= 80400)
392 do_sql_command(conn, "SET intervalstyle = postgres");
393 if (remoteversion >= 90000)
394 do_sql_command(conn, "SET extra_float_digits = 3");
395 else
396 do_sql_command(conn, "SET extra_float_digits = 2");
397 }
398
399 /*
400 * Convenience subroutine to issue a non-data-returning SQL command to remote
401 */
402 static void
do_sql_command(PGconn * conn,const char * sql)403 do_sql_command(PGconn *conn, const char *sql)
404 {
405 PGresult *res;
406
407 if (!PQsendQuery(conn, sql))
408 pgfdw_report_error(ERROR, NULL, conn, false, sql);
409 res = pgfdw_get_result(conn, sql);
410 if (PQresultStatus(res) != PGRES_COMMAND_OK)
411 pgfdw_report_error(ERROR, res, conn, true, sql);
412 PQclear(res);
413 }
414
415 /*
416 * Start remote transaction or subtransaction, if needed.
417 *
418 * Note that we always use at least REPEATABLE READ in the remote session.
419 * This is so that, if a query initiates multiple scans of the same or
420 * different foreign tables, we will get snapshot-consistent results from
421 * those scans. A disadvantage is that we can't provide sane emulation of
422 * READ COMMITTED behavior --- it would be nice if we had some other way to
423 * control which remote queries share a snapshot.
424 */
425 static void
begin_remote_xact(ConnCacheEntry * entry)426 begin_remote_xact(ConnCacheEntry *entry)
427 {
428 int curlevel = GetCurrentTransactionNestLevel();
429
430 /* Start main transaction if we haven't yet */
431 if (entry->xact_depth <= 0)
432 {
433 const char *sql;
434
435 elog(DEBUG3, "starting remote transaction on connection %p",
436 entry->conn);
437
438 if (IsolationIsSerializable())
439 sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE";
440 else
441 sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ";
442 entry->changing_xact_state = true;
443 do_sql_command(entry->conn, sql);
444 entry->xact_depth = 1;
445 entry->changing_xact_state = false;
446 }
447
448 /*
449 * If we're in a subtransaction, stack up savepoints to match our level.
450 * This ensures we can rollback just the desired effects when a
451 * subtransaction aborts.
452 */
453 while (entry->xact_depth < curlevel)
454 {
455 char sql[64];
456
457 snprintf(sql, sizeof(sql), "SAVEPOINT s%d", entry->xact_depth + 1);
458 entry->changing_xact_state = true;
459 do_sql_command(entry->conn, sql);
460 entry->xact_depth++;
461 entry->changing_xact_state = false;
462 }
463 }
464
465 /*
466 * Release connection reference count created by calling GetConnection.
467 */
468 void
ReleaseConnection(PGconn * conn)469 ReleaseConnection(PGconn *conn)
470 {
471 /*
472 * Currently, we don't actually track connection references because all
473 * cleanup is managed on a transaction or subtransaction basis instead. So
474 * there's nothing to do here.
475 */
476 }
477
478 /*
479 * Assign a "unique" number for a cursor.
480 *
481 * These really only need to be unique per connection within a transaction.
482 * For the moment we ignore the per-connection point and assign them across
483 * all connections in the transaction, but we ask for the connection to be
484 * supplied in case we want to refine that.
485 *
486 * Note that even if wraparound happens in a very long transaction, actual
487 * collisions are highly improbable; just be sure to use %u not %d to print.
488 */
489 unsigned int
GetCursorNumber(PGconn * conn)490 GetCursorNumber(PGconn *conn)
491 {
492 return ++cursor_number;
493 }
494
495 /*
496 * Assign a "unique" number for a prepared statement.
497 *
498 * This works much like GetCursorNumber, except that we never reset the counter
499 * within a session. That's because we can't be 100% sure we've gotten rid
500 * of all prepared statements on all connections, and it's not really worth
501 * increasing the risk of prepared-statement name collisions by resetting.
502 */
503 unsigned int
GetPrepStmtNumber(PGconn * conn)504 GetPrepStmtNumber(PGconn *conn)
505 {
506 return ++prep_stmt_number;
507 }
508
509 /*
510 * Submit a query and wait for the result.
511 *
512 * This function is interruptible by signals.
513 *
514 * Caller is responsible for the error handling on the result.
515 */
516 PGresult *
pgfdw_exec_query(PGconn * conn,const char * query)517 pgfdw_exec_query(PGconn *conn, const char *query)
518 {
519 /*
520 * Submit a query. Since we don't use non-blocking mode, this also can
521 * block. But its risk is relatively small, so we ignore that for now.
522 */
523 if (!PQsendQuery(conn, query))
524 pgfdw_report_error(ERROR, NULL, conn, false, query);
525
526 /* Wait for the result. */
527 return pgfdw_get_result(conn, query);
528 }
529
530 /*
531 * Wait for the result from a prior asynchronous execution function call.
532 *
533 * This function offers quick responsiveness by checking for any interruptions.
534 *
535 * This function emulates PQexec()'s behavior of returning the last result
536 * when there are many.
537 *
538 * Caller is responsible for the error handling on the result.
539 */
540 PGresult *
pgfdw_get_result(PGconn * conn,const char * query)541 pgfdw_get_result(PGconn *conn, const char *query)
542 {
543 PGresult *volatile last_res = NULL;
544
545 /* In what follows, do not leak any PGresults on an error. */
546 PG_TRY();
547 {
548 for (;;)
549 {
550 PGresult *res;
551
552 while (PQisBusy(conn))
553 {
554 int wc;
555
556 /* Sleep until there's something to do */
557 wc = WaitLatchOrSocket(MyLatch,
558 WL_LATCH_SET | WL_SOCKET_READABLE,
559 PQsocket(conn),
560 -1L);
561 ResetLatch(MyLatch);
562
563 CHECK_FOR_INTERRUPTS();
564
565 /* Data available in socket? */
566 if (wc & WL_SOCKET_READABLE)
567 {
568 if (!PQconsumeInput(conn))
569 pgfdw_report_error(ERROR, NULL, conn, false, query);
570 }
571 }
572
573 res = PQgetResult(conn);
574 if (res == NULL)
575 break; /* query is complete */
576
577 PQclear(last_res);
578 last_res = res;
579 }
580 }
581 PG_CATCH();
582 {
583 PQclear(last_res);
584 PG_RE_THROW();
585 }
586 PG_END_TRY();
587
588 return last_res;
589 }
590
591 /*
592 * Report an error we got from the remote server.
593 *
594 * elevel: error level to use (typically ERROR, but might be less)
595 * res: PGresult containing the error
596 * conn: connection we did the query on
597 * clear: if true, PQclear the result (otherwise caller will handle it)
598 * sql: NULL, or text of remote command we tried to execute
599 *
600 * Note: callers that choose not to throw ERROR for a remote error are
601 * responsible for making sure that the associated ConnCacheEntry gets
602 * marked with have_error = true.
603 */
604 void
pgfdw_report_error(int elevel,PGresult * res,PGconn * conn,bool clear,const char * sql)605 pgfdw_report_error(int elevel, PGresult *res, PGconn *conn,
606 bool clear, const char *sql)
607 {
608 /* If requested, PGresult must be released before leaving this function. */
609 PG_TRY();
610 {
611 char *diag_sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE);
612 char *message_primary = PQresultErrorField(res, PG_DIAG_MESSAGE_PRIMARY);
613 char *message_detail = PQresultErrorField(res, PG_DIAG_MESSAGE_DETAIL);
614 char *message_hint = PQresultErrorField(res, PG_DIAG_MESSAGE_HINT);
615 char *message_context = PQresultErrorField(res, PG_DIAG_CONTEXT);
616 int sqlstate;
617
618 if (diag_sqlstate)
619 sqlstate = MAKE_SQLSTATE(diag_sqlstate[0],
620 diag_sqlstate[1],
621 diag_sqlstate[2],
622 diag_sqlstate[3],
623 diag_sqlstate[4]);
624 else
625 sqlstate = ERRCODE_CONNECTION_FAILURE;
626
627 /*
628 * If we don't get a message from the PGresult, try the PGconn. This
629 * is needed because for connection-level failures, PQexec may just
630 * return NULL, not a PGresult at all.
631 */
632 if (message_primary == NULL)
633 message_primary = PQerrorMessage(conn);
634
635 ereport(elevel,
636 (errcode(sqlstate),
637 message_primary ? errmsg_internal("%s", message_primary) :
638 errmsg("could not obtain message string for remote error"),
639 message_detail ? errdetail_internal("%s", message_detail) : 0,
640 message_hint ? errhint("%s", message_hint) : 0,
641 message_context ? errcontext("%s", message_context) : 0,
642 sql ? errcontext("Remote SQL command: %s", sql) : 0));
643 }
644 PG_CATCH();
645 {
646 if (clear)
647 PQclear(res);
648 PG_RE_THROW();
649 }
650 PG_END_TRY();
651 if (clear)
652 PQclear(res);
653 }
654
655 /*
656 * pgfdw_xact_callback --- cleanup at main-transaction end.
657 *
658 * This runs just late enough that it must not enter user-defined code
659 * locally. (Entering such code on the remote side is fine. Its remote
660 * COMMIT TRANSACTION may run deferred triggers.)
661 */
662 static void
pgfdw_xact_callback(XactEvent event,void * arg)663 pgfdw_xact_callback(XactEvent event, void *arg)
664 {
665 HASH_SEQ_STATUS scan;
666 ConnCacheEntry *entry;
667
668 /* Quick exit if no connections were touched in this transaction. */
669 if (!xact_got_connection)
670 return;
671
672 /*
673 * Scan all connection cache entries to find open remote transactions, and
674 * close them.
675 */
676 hash_seq_init(&scan, ConnectionHash);
677 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
678 {
679 PGresult *res;
680
681 /* Ignore cache entry if no open connection right now */
682 if (entry->conn == NULL)
683 continue;
684
685 /* If it has an open remote transaction, try to close it */
686 if (entry->xact_depth > 0)
687 {
688 bool abort_cleanup_failure = false;
689
690 elog(DEBUG3, "closing remote transaction on connection %p",
691 entry->conn);
692
693 switch (event)
694 {
695 case XACT_EVENT_PARALLEL_PRE_COMMIT:
696 case XACT_EVENT_PRE_COMMIT:
697
698 /*
699 * If abort cleanup previously failed for this connection,
700 * we can't issue any more commands against it.
701 */
702 pgfdw_reject_incomplete_xact_state_change(entry);
703
704 /* Commit all remote transactions during pre-commit */
705 entry->changing_xact_state = true;
706 do_sql_command(entry->conn, "COMMIT TRANSACTION");
707 entry->changing_xact_state = false;
708
709 /*
710 * If there were any errors in subtransactions, and we
711 * made prepared statements, do a DEALLOCATE ALL to make
712 * sure we get rid of all prepared statements. This is
713 * annoying and not terribly bulletproof, but it's
714 * probably not worth trying harder.
715 *
716 * DEALLOCATE ALL only exists in 8.3 and later, so this
717 * constrains how old a server postgres_fdw can
718 * communicate with. We intentionally ignore errors in
719 * the DEALLOCATE, so that we can hobble along to some
720 * extent with older servers (leaking prepared statements
721 * as we go; but we don't really support update operations
722 * pre-8.3 anyway).
723 */
724 if (entry->have_prep_stmt && entry->have_error)
725 {
726 res = PQexec(entry->conn, "DEALLOCATE ALL");
727 PQclear(res);
728 }
729 entry->have_prep_stmt = false;
730 entry->have_error = false;
731 break;
732 case XACT_EVENT_PRE_PREPARE:
733
734 /*
735 * We disallow any remote transactions, since it's not
736 * very reasonable to hold them open until the prepared
737 * transaction is committed. For the moment, throw error
738 * unconditionally; later we might allow read-only cases.
739 * Note that the error will cause us to come right back
740 * here with event == XACT_EVENT_ABORT, so we'll clean up
741 * the connection state at that point.
742 */
743 ereport(ERROR,
744 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
745 errmsg("cannot PREPARE a transaction that has operated on postgres_fdw foreign tables")));
746 break;
747 case XACT_EVENT_PARALLEL_COMMIT:
748 case XACT_EVENT_COMMIT:
749 case XACT_EVENT_PREPARE:
750 /* Pre-commit should have closed the open transaction */
751 elog(ERROR, "missed cleaning up connection during pre-commit");
752 break;
753 case XACT_EVENT_PARALLEL_ABORT:
754 case XACT_EVENT_ABORT:
755
756 /*
757 * Don't try to clean up the connection if we're already
758 * in error recursion trouble.
759 */
760 if (in_error_recursion_trouble())
761 entry->changing_xact_state = true;
762
763 /*
764 * If connection is already unsalvageable, don't touch it
765 * further.
766 */
767 if (entry->changing_xact_state)
768 break;
769
770 /*
771 * Mark this connection as in the process of changing
772 * transaction state.
773 */
774 entry->changing_xact_state = true;
775
776 /* Assume we might have lost track of prepared statements */
777 entry->have_error = true;
778
779 /*
780 * If a command has been submitted to the remote server by
781 * using an asynchronous execution function, the command
782 * might not have yet completed. Check to see if a
783 * command is still being processed by the remote server,
784 * and if so, request cancellation of the command.
785 */
786 if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
787 !pgfdw_cancel_query(entry->conn))
788 {
789 /* Unable to cancel running query. */
790 abort_cleanup_failure = true;
791 }
792 else if (!pgfdw_exec_cleanup_query(entry->conn,
793 "ABORT TRANSACTION",
794 false))
795 {
796 /* Unable to abort remote transaction. */
797 abort_cleanup_failure = true;
798 }
799 else if (entry->have_prep_stmt && entry->have_error &&
800 !pgfdw_exec_cleanup_query(entry->conn,
801 "DEALLOCATE ALL",
802 true))
803 {
804 /* Trouble clearing prepared statements. */
805 abort_cleanup_failure = true;
806 }
807 else
808 {
809 entry->have_prep_stmt = false;
810 entry->have_error = false;
811 }
812
813 /* Disarm changing_xact_state if it all worked. */
814 entry->changing_xact_state = abort_cleanup_failure;
815 break;
816 }
817 }
818
819 /* Reset state to show we're out of a transaction */
820 entry->xact_depth = 0;
821
822 /*
823 * If the connection isn't in a good idle state or it is marked as
824 * invalid, then discard it to recover. Next GetConnection will open a
825 * new connection.
826 */
827 if (PQstatus(entry->conn) != CONNECTION_OK ||
828 PQtransactionStatus(entry->conn) != PQTRANS_IDLE ||
829 entry->changing_xact_state ||
830 entry->invalidated)
831 {
832 elog(DEBUG3, "discarding connection %p", entry->conn);
833 disconnect_pg_server(entry);
834 }
835 }
836
837 /*
838 * Regardless of the event type, we can now mark ourselves as out of the
839 * transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE,
840 * this saves a useless scan of the hashtable during COMMIT or PREPARE.)
841 */
842 xact_got_connection = false;
843
844 /* Also reset cursor numbering for next transaction */
845 cursor_number = 0;
846 }
847
848 /*
849 * pgfdw_subxact_callback --- cleanup at subtransaction end.
850 */
851 static void
pgfdw_subxact_callback(SubXactEvent event,SubTransactionId mySubid,SubTransactionId parentSubid,void * arg)852 pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
853 SubTransactionId parentSubid, void *arg)
854 {
855 HASH_SEQ_STATUS scan;
856 ConnCacheEntry *entry;
857 int curlevel;
858
859 /* Nothing to do at subxact start, nor after commit. */
860 if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB ||
861 event == SUBXACT_EVENT_ABORT_SUB))
862 return;
863
864 /* Quick exit if no connections were touched in this transaction. */
865 if (!xact_got_connection)
866 return;
867
868 /*
869 * Scan all connection cache entries to find open remote subtransactions
870 * of the current level, and close them.
871 */
872 curlevel = GetCurrentTransactionNestLevel();
873 hash_seq_init(&scan, ConnectionHash);
874 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
875 {
876 char sql[100];
877
878 /*
879 * We only care about connections with open remote subtransactions of
880 * the current level.
881 */
882 if (entry->conn == NULL || entry->xact_depth < curlevel)
883 continue;
884
885 if (entry->xact_depth > curlevel)
886 elog(ERROR, "missed cleaning up remote subtransaction at level %d",
887 entry->xact_depth);
888
889 if (event == SUBXACT_EVENT_PRE_COMMIT_SUB)
890 {
891 /*
892 * If abort cleanup previously failed for this connection, we
893 * can't issue any more commands against it.
894 */
895 pgfdw_reject_incomplete_xact_state_change(entry);
896
897 /* Commit all remote subtransactions during pre-commit */
898 snprintf(sql, sizeof(sql), "RELEASE SAVEPOINT s%d", curlevel);
899 entry->changing_xact_state = true;
900 do_sql_command(entry->conn, sql);
901 entry->changing_xact_state = false;
902 }
903 else if (in_error_recursion_trouble())
904 {
905 /*
906 * Don't try to clean up the connection if we're already in error
907 * recursion trouble.
908 */
909 entry->changing_xact_state = true;
910 }
911 else if (!entry->changing_xact_state)
912 {
913 bool abort_cleanup_failure = false;
914
915 /* Remember that abort cleanup is in progress. */
916 entry->changing_xact_state = true;
917
918 /* Assume we might have lost track of prepared statements */
919 entry->have_error = true;
920
921 /*
922 * If a command has been submitted to the remote server by using
923 * an asynchronous execution function, the command might not have
924 * yet completed. Check to see if a command is still being
925 * processed by the remote server, and if so, request cancellation
926 * of the command.
927 */
928 if (PQtransactionStatus(entry->conn) == PQTRANS_ACTIVE &&
929 !pgfdw_cancel_query(entry->conn))
930 abort_cleanup_failure = true;
931 else
932 {
933 /* Rollback all remote subtransactions during abort */
934 snprintf(sql, sizeof(sql),
935 "ROLLBACK TO SAVEPOINT s%d; RELEASE SAVEPOINT s%d",
936 curlevel, curlevel);
937 if (!pgfdw_exec_cleanup_query(entry->conn, sql, false))
938 abort_cleanup_failure = true;
939 }
940
941 /* Disarm changing_xact_state if it all worked. */
942 entry->changing_xact_state = abort_cleanup_failure;
943 }
944
945 /* OK, we're outta that level of subtransaction */
946 entry->xact_depth--;
947 }
948 }
949
950 /*
951 * Connection invalidation callback function
952 *
953 * After a change to a pg_foreign_server or pg_user_mapping catalog entry,
954 * close connections depending on that entry immediately if current transaction
955 * has not used those connections yet. Otherwise, mark those connections as
956 * invalid and then make pgfdw_xact_callback() close them at the end of current
957 * transaction, since they cannot be closed in the midst of the transaction
958 * using them. Closed connections will be remade at the next opportunity if
959 * necessary.
960 *
961 * Although most cache invalidation callbacks blow away all the related stuff
962 * regardless of the given hashvalue, connections are expensive enough that
963 * it's worth trying to avoid that.
964 *
965 * NB: We could avoid unnecessary disconnection more strictly by examining
966 * individual option values, but it seems too much effort for the gain.
967 */
968 static void
pgfdw_inval_callback(Datum arg,int cacheid,uint32 hashvalue)969 pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
970 {
971 HASH_SEQ_STATUS scan;
972 ConnCacheEntry *entry;
973
974 Assert(cacheid == FOREIGNSERVEROID || cacheid == USERMAPPINGOID);
975
976 /* ConnectionHash must exist already, if we're registered */
977 hash_seq_init(&scan, ConnectionHash);
978 while ((entry = (ConnCacheEntry *) hash_seq_search(&scan)))
979 {
980 /* Ignore invalid entries */
981 if (entry->conn == NULL)
982 continue;
983
984 /* hashvalue == 0 means a cache reset, must clear all state */
985 if (hashvalue == 0 ||
986 (cacheid == FOREIGNSERVEROID &&
987 entry->server_hashvalue == hashvalue) ||
988 (cacheid == USERMAPPINGOID &&
989 entry->mapping_hashvalue == hashvalue))
990 {
991 /*
992 * Close the connection immediately if it's not used yet in this
993 * transaction. Otherwise mark it as invalid so that
994 * pgfdw_xact_callback() can close it at the end of this
995 * transaction.
996 */
997 if (entry->xact_depth == 0)
998 {
999 elog(DEBUG3, "discarding connection %p", entry->conn);
1000 disconnect_pg_server(entry);
1001 }
1002 else
1003 entry->invalidated = true;
1004 }
1005 }
1006 }
1007
1008 /*
1009 * Raise an error if the given connection cache entry is marked as being
1010 * in the middle of an xact state change. This should be called at which no
1011 * such change is expected to be in progress; if one is found to be in
1012 * progress, it means that we aborted in the middle of a previous state change
1013 * and now don't know what the remote transaction state actually is.
1014 * Such connections can't safely be further used. Re-establishing the
1015 * connection would change the snapshot and roll back any writes already
1016 * performed, so that's not an option, either. Thus, we must abort.
1017 */
1018 static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry * entry)1019 pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
1020 {
1021 HeapTuple tup;
1022 Form_pg_user_mapping umform;
1023 ForeignServer *server;
1024
1025 /* nothing to do for inactive entries and entries of sane state */
1026 if (entry->conn == NULL || !entry->changing_xact_state)
1027 return;
1028
1029 /* make sure this entry is inactive */
1030 disconnect_pg_server(entry);
1031
1032 /* find server name to be shown in the message below */
1033 tup = SearchSysCache1(USERMAPPINGOID,
1034 ObjectIdGetDatum(entry->key));
1035 if (!HeapTupleIsValid(tup))
1036 elog(ERROR, "cache lookup failed for user mapping %u", entry->key);
1037 umform = (Form_pg_user_mapping) GETSTRUCT(tup);
1038 server = GetForeignServer(umform->umserver);
1039 ReleaseSysCache(tup);
1040
1041 ereport(ERROR,
1042 (errcode(ERRCODE_CONNECTION_EXCEPTION),
1043 errmsg("connection to server \"%s\" was lost",
1044 server->servername)));
1045 }
1046
1047 /*
1048 * Cancel the currently-in-progress query (whose query text we do not have)
1049 * and ignore the result. Returns true if we successfully cancel the query
1050 * and discard any pending result, and false if not.
1051 *
1052 * It's not a huge problem if we throw an ERROR here, but if we get into error
1053 * recursion trouble, we'll end up slamming the connection shut, which will
1054 * necessitate failing the entire toplevel transaction even if subtransactions
1055 * were used. Try to use WARNING where we can.
1056 */
1057 static bool
pgfdw_cancel_query(PGconn * conn)1058 pgfdw_cancel_query(PGconn *conn)
1059 {
1060 PGcancel *cancel;
1061 char errbuf[256];
1062 PGresult *result = NULL;
1063 TimestampTz endtime;
1064
1065 /*
1066 * If it takes too long to cancel the query and discard the result, assume
1067 * the connection is dead.
1068 */
1069 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1070
1071 /*
1072 * Issue cancel request. Unfortunately, there's no good way to limit the
1073 * amount of time that we might block inside PQgetCancel().
1074 */
1075 if ((cancel = PQgetCancel(conn)))
1076 {
1077 if (!PQcancel(cancel, errbuf, sizeof(errbuf)))
1078 {
1079 ereport(WARNING,
1080 (errcode(ERRCODE_CONNECTION_FAILURE),
1081 errmsg("could not send cancel request: %s",
1082 errbuf)));
1083 PQfreeCancel(cancel);
1084 return false;
1085 }
1086 PQfreeCancel(cancel);
1087 }
1088
1089 /* Get and discard the result of the query. */
1090 if (pgfdw_get_cleanup_result(conn, endtime, &result))
1091 return false;
1092 PQclear(result);
1093
1094 return true;
1095 }
1096
1097 /*
1098 * Submit a query during (sub)abort cleanup and wait up to 30 seconds for the
1099 * result. If the query is executed without error, the return value is true.
1100 * If the query is executed successfully but returns an error, the return
1101 * value is true if and only if ignore_errors is set. If the query can't be
1102 * sent or times out, the return value is false.
1103 *
1104 * It's not a huge problem if we throw an ERROR here, but if we get into error
1105 * recursion trouble, we'll end up slamming the connection shut, which will
1106 * necessitate failing the entire toplevel transaction even if subtransactions
1107 * were used. Try to use WARNING where we can.
1108 */
1109 static bool
pgfdw_exec_cleanup_query(PGconn * conn,const char * query,bool ignore_errors)1110 pgfdw_exec_cleanup_query(PGconn *conn, const char *query, bool ignore_errors)
1111 {
1112 PGresult *result = NULL;
1113 TimestampTz endtime;
1114
1115 /*
1116 * If it takes too long to execute a cleanup query, assume the connection
1117 * is dead. It's fairly likely that this is why we aborted in the first
1118 * place (e.g. statement timeout, user cancel), so the timeout shouldn't
1119 * be too long.
1120 */
1121 endtime = TimestampTzPlusMilliseconds(GetCurrentTimestamp(), 30000);
1122
1123 /*
1124 * Submit a query. Since we don't use non-blocking mode, this also can
1125 * block. But its risk is relatively small, so we ignore that for now.
1126 */
1127 if (!PQsendQuery(conn, query))
1128 {
1129 pgfdw_report_error(WARNING, NULL, conn, false, query);
1130 return false;
1131 }
1132
1133 /* Get the result of the query. */
1134 if (pgfdw_get_cleanup_result(conn, endtime, &result))
1135 return false;
1136
1137 /* Issue a warning if not successful. */
1138 if (PQresultStatus(result) != PGRES_COMMAND_OK)
1139 {
1140 pgfdw_report_error(WARNING, result, conn, true, query);
1141 return ignore_errors;
1142 }
1143 PQclear(result);
1144
1145 return true;
1146 }
1147
1148 /*
1149 * Get, during abort cleanup, the result of a query that is in progress. This
1150 * might be a query that is being interrupted by transaction abort, or it might
1151 * be a query that was initiated as part of transaction abort to get the remote
1152 * side back to the appropriate state.
1153 *
1154 * endtime is the time at which we should give up and assume the remote
1155 * side is dead. Returns true if the timeout expired, otherwise false.
1156 * Sets *result except in case of a timeout.
1157 */
1158 static bool
pgfdw_get_cleanup_result(PGconn * conn,TimestampTz endtime,PGresult ** result)1159 pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, PGresult **result)
1160 {
1161 volatile bool timed_out = false;
1162 PGresult *volatile last_res = NULL;
1163
1164 /* In what follows, do not leak any PGresults on an error. */
1165 PG_TRY();
1166 {
1167 for (;;)
1168 {
1169 PGresult *res;
1170
1171 while (PQisBusy(conn))
1172 {
1173 int wc;
1174 TimestampTz now = GetCurrentTimestamp();
1175 long cur_timeout;
1176
1177 /* If timeout has expired, give up, else get sleep time. */
1178 cur_timeout = TimestampDifferenceMilliseconds(now, endtime);
1179 if (cur_timeout <= 0)
1180 {
1181 timed_out = true;
1182 goto exit;
1183 }
1184
1185 /* Sleep until there's something to do */
1186 wc = WaitLatchOrSocket(MyLatch,
1187 WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT,
1188 PQsocket(conn),
1189 cur_timeout);
1190 ResetLatch(MyLatch);
1191
1192 CHECK_FOR_INTERRUPTS();
1193
1194 /* Data available in socket? */
1195 if (wc & WL_SOCKET_READABLE)
1196 {
1197 if (!PQconsumeInput(conn))
1198 {
1199 /* connection trouble; treat the same as a timeout */
1200 timed_out = true;
1201 goto exit;
1202 }
1203 }
1204 }
1205
1206 res = PQgetResult(conn);
1207 if (res == NULL)
1208 break; /* query is complete */
1209
1210 PQclear(last_res);
1211 last_res = res;
1212 }
1213 exit: ;
1214 }
1215 PG_CATCH();
1216 {
1217 PQclear(last_res);
1218 PG_RE_THROW();
1219 }
1220 PG_END_TRY();
1221
1222 if (timed_out)
1223 PQclear(last_res);
1224 else
1225 *result = last_res;
1226 return timed_out;
1227 }
1228