1 /* -*-pgsql-c-*- */
2 /*
3 * $Header$
4 *
5 * pgpool: a language independent connection pool server for PostgreSQL
6 * written by Tatsuo Ishii
7 *
8 * Copyright (c) 2003-2018 PgPool Global Development Group
9 *
10 * Permission to use, copy, modify, and distribute this software and
11 * its documentation for any purpose and without fee is hereby
12 * granted, provided that the above copyright notice appear in all
13 * copies and that both that copyright notice and this permission
14 * notice appear in supporting documentation, and that the name of the
15 * author not be used in advertising or publicity pertaining to
16 * distribution of the software without specific, written prior
17 * permission. The author makes no representations about the
18 * suitability of this software for any purpose. It is provided "as
19 * is" without express or implied warranty.
20 *
21 * recovery.c: online recovery process
22 *
23 */
24
25 #include "config.h"
26
27 #include <unistd.h>
28 #include <string.h>
29 #include "utils/elog.h"
30
31 #include "pool.h"
32 #include "pool_config.h"
33
34 #include "libpq-fe.h"
35
36 #include "watchdog/wd_ipc_commands.h"
37
38 #define WAIT_RETRY_COUNT (pool_config->recovery_timeout / 3)
39
40 #define FIRST_STAGE 0
41 #define SECOND_STAGE 1
42
43 static void exec_checkpoint(PGconn *conn);
44 static void exec_recovery(PGconn *conn, BackendInfo * master_backend, BackendInfo * recovery_backend, char stage, int recovery_node);
45 static void exec_remote_start(PGconn *conn, BackendInfo * backend);
46 static PGconn *connect_backend_libpq(BackendInfo * backend);
47 static void check_postmaster_started(BackendInfo * backend);
48
49 static char recovery_command[1024];
50
51 extern volatile sig_atomic_t pcp_worker_wakeup_request;
52
53 /*
54 * Start online recovery.
55 * "recovery_node" is the node to be recovered.
56 * Master or primary node is chosen in this function.
57 */
58 void
start_recovery(int recovery_node)59 start_recovery(int recovery_node)
60 {
61 int node_id;
62 BackendInfo *backend;
63 BackendInfo *recovery_backend;
64 PGconn *conn;
65 int failback_wait_count;
66 #define FAILBACK_WAIT_MAX_RETRY 5 /* 5 seconds should be enough for failback
67 * operation */
68
69 ereport(LOG,
70 (errmsg("starting recovering node %d", recovery_node)));
71
72 if ((recovery_node < 0) || (recovery_node >= pool_config->backend_desc->num_backends))
73 ereport(ERROR,
74 (errmsg("node recovery failed, node id: %d is not valid", recovery_node)));
75
76 if (*(my_backend_status[(recovery_node)]) == CON_UNUSED)
77 ereport(ERROR,
78 (errmsg("node recovery failed, node id: %d is unused", recovery_node)));
79
80 if (VALID_BACKEND(recovery_node))
81 ereport(ERROR,
82 (errmsg("node recovery failed, node id: %d is alive", recovery_node)));
83
84 /* select master/primary node */
85 node_id = MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID;
86 backend = &pool_config->backend_desc->backend_info[node_id];
87
88 /* get node info to be recovered */
89 recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];
90
91 conn = connect_backend_libpq(backend);
92 if (conn == NULL)
93 ereport(ERROR,
94 (errmsg("node recovery failed, unable to connect to master node: %d ", node_id)));
95
96 PG_TRY();
97 {
98 /* 1st stage */
99 if (REPLICATION)
100 {
101 exec_checkpoint(conn);
102 ereport(LOG,
103 (errmsg("node recovery, CHECKPOINT in the 1st stage done")));
104 }
105
106 exec_recovery(conn, backend, recovery_backend, FIRST_STAGE, recovery_node);
107
108 ereport(LOG,
109 (errmsg("node recovery, 1st stage is done")));
110
111 if (REPLICATION)
112 {
113 ereport(LOG,
114 (errmsg("node recovery, starting 2nd stage")));
115
116 /* 2nd stage */
117 *InRecovery = RECOVERY_ONLINE;
118 if (pool_config->use_watchdog)
119 {
120 /* announce start recovery */
121 if (COMMAND_OK != wd_start_recovery())
122 ereport(ERROR,
123 (errmsg("node recovery failed, failed to send start recovery packet")));
124 }
125
126 if (wait_connection_closed() != 0)
127 ereport(ERROR,
128 (errmsg("node recovery failed, waiting connection closed in the other pgpools timeout")));
129
130 ereport(LOG,
131 (errmsg("node recovery, all connections from clients have been closed")));
132
133 exec_checkpoint(conn);
134
135 ereport(LOG,
136 (errmsg("node recovery"),
137 errdetail("CHECKPOINT in the 2nd stage done")));
138
139 exec_recovery(conn, backend, recovery_backend, SECOND_STAGE, recovery_node);
140 }
141
142 exec_remote_start(conn, recovery_backend);
143
144 check_postmaster_started(recovery_backend);
145
146 ereport(LOG,
147 (errmsg("node recovery, node: %d restarted", recovery_node)));
148
149 /*
150 * reset failover completion flag. this is necessary since previous
151 * failover/failback will set the flag to 1.
152 */
153 pcp_worker_wakeup_request = 0;
154
155 /* send failback request to pgpool parent */
156 send_failback_request(recovery_node, false, REQ_DETAIL_CONFIRMED);
157
158 /* wait for failback */
159 failback_wait_count = 0;
160 while (!pcp_worker_wakeup_request)
161 {
162 struct timeval t = {1, 0};
163
164 /* polling SIGUSR2 signal every 1 sec */
165 select(0, NULL, NULL, NULL, &t);
166 failback_wait_count++;
167 if (failback_wait_count >= FAILBACK_WAIT_MAX_RETRY)
168 {
169 ereport(LOG,
170 (errmsg("node recovery"),
171 errdetail("waiting for wake up request is timeout(%d seconds)",
172 FAILBACK_WAIT_MAX_RETRY)));
173
174 break;
175 }
176 }
177 pcp_worker_wakeup_request = 0;
178 }
179 PG_CATCH();
180 {
181 PQfinish(conn);
182 PG_RE_THROW();
183 }
184 PG_END_TRY();
185
186 PQfinish(conn);
187
188 ereport(LOG,
189 (errmsg("recovery done")));
190 }
191
192 /*
193 * Notice all children finishing recovery.
194 */
195 void
finish_recovery(void)196 finish_recovery(void)
197 {
198 /* announce end recovery */
199 if (pool_config->use_watchdog && *InRecovery != RECOVERY_INIT)
200 {
201 wd_end_recovery();
202 }
203
204 *InRecovery = RECOVERY_INIT;
205 pool_signal_parent(SIGUSR2);
206 }
207
208 /*
209 * Execute CHECKPOINT
210 */
211 static void
exec_checkpoint(PGconn * conn)212 exec_checkpoint(PGconn *conn)
213 {
214 PGresult *result;
215
216 ereport(DEBUG1,
217 (errmsg("recovery execute checkpoint, start checkpoint")));
218
219 result = PQexec(conn, "CHECKPOINT");
220 if (PQresultStatus(result) != PGRES_COMMAND_OK)
221 ereport(ERROR,
222 (errmsg("executing recovery, execute CHECKPOINT failed")));
223 PQclear(result);
224
225 ereport(DEBUG1,
226 (errmsg("recovery execute checkpoint, finish checkpoint")));
227 }
228
229 /*
230 * Call pgpool_recovery() function.
231 */
232 static void
exec_recovery(PGconn * conn,BackendInfo * master_backend,BackendInfo * recovery_backend,char stage,int recovery_node)233 exec_recovery(PGconn *conn, BackendInfo * master_backend, BackendInfo * recovery_backend, char stage, int recovery_node)
234 {
235 PGresult *result;
236 char *hostname;
237 char *script;
238
239 if (strlen(recovery_backend->backend_hostname) == 0 || *(recovery_backend->backend_hostname) == '/')
240 hostname = "localhost";
241 else
242 hostname = recovery_backend->backend_hostname;
243
244 script = (stage == FIRST_STAGE) ?
245 pool_config->recovery_1st_stage_command : pool_config->recovery_2nd_stage_command;
246
247 if (script == NULL || strlen(script) == 0)
248 {
249 /* do not execute script */
250 return;
251 }
252
253 /*
254 * Execute recovery command
255 */
256 snprintf(recovery_command,
257 sizeof(recovery_command),
258 "SELECT pgpool_recovery('%s', '%s', '%s', '%d', %d, '%d')",
259 script,
260 hostname,
261 recovery_backend->backend_data_directory,
262 master_backend->backend_port,
263 recovery_node,
264 recovery_backend->backend_port
265 );
266
267 ereport(LOG,
268 (errmsg("executing recovery"),
269 errdetail("starting recovery command: \"%s\"", recovery_command)));
270
271 ereport(LOG,
272 (errmsg("executing recovery"),
273 errdetail("disabling statement_timeout")));
274
275 result = PQexec(conn, "SET statement_timeout To 0");
276 if (PQresultStatus(result) != PGRES_COMMAND_OK)
277 ereport(ERROR,
278 (errmsg("executing recovery, SET STATEMENT_TIMEOUT failed at \"%s\"",
279 (stage == FIRST_STAGE) ? "1st stage" : "2nd stage")));
280
281 PQclear(result);
282
283 ereport(DEBUG1,
284 (errmsg("executing recovery, start recovery")));
285
286 result = PQexec(conn, recovery_command);
287 if (PQresultStatus(result) != PGRES_TUPLES_OK)
288 ereport(ERROR,
289 (errmsg("executing recovery, execution of command failed at \"%s\"",
290 (stage == FIRST_STAGE) ? "1st stage" : "2nd stage"),
291 errdetail("command:\"%s\"", script)));
292
293 PQclear(result);
294
295 ereport(DEBUG1,
296 (errmsg("executing recovery, finish recovery")));
297 }
298
299 /*
300 * Call pgpool_remote_start() function.
301 */
302 static void
exec_remote_start(PGconn * conn,BackendInfo * backend)303 exec_remote_start(PGconn *conn, BackendInfo * backend)
304 {
305 PGresult *result;
306 char *hostname;
307
308 if (strlen(backend->backend_hostname) == 0 || *(backend->backend_hostname) == '/')
309 hostname = "localhost";
310 else
311 hostname = backend->backend_hostname;
312
313 snprintf(recovery_command, sizeof(recovery_command),
314 "SELECT pgpool_remote_start('%s', '%s')",
315 hostname,
316 backend->backend_data_directory);
317
318 ereport(DEBUG1,
319 (errmsg("executing remote start"),
320 errdetail("start pgpool_remote_start")));
321
322 result = PQexec(conn, recovery_command);
323 if (PQresultStatus(result) != PGRES_TUPLES_OK)
324 ereport(ERROR,
325 (errmsg("executing remote start failed with error: \"%s\"", PQresultErrorMessage(result))));
326
327 PQclear(result);
328
329 ereport(DEBUG1,
330 (errmsg("executing remote start"),
331 errdetail("finish pgpool_remote_start")));
332 }
333
334 /*
335 * Check postmaster is started.
336 */
337 static void
check_postmaster_started(BackendInfo * backend)338 check_postmaster_started(BackendInfo * backend)
339 {
340 int i = 0;
341 char port_str[16];
342 PGconn *conn;
343 char *dbname;
344 char *password = get_pgpool_config_user_password(pool_config->recovery_user,
345 pool_config->recovery_password);
346
347 snprintf(port_str, sizeof(port_str), "%d", backend->backend_port);
348
349 /*
350 * First we try with "postgres" database.
351 */
352 dbname = "postgres";
353
354 do
355 {
356 ConnStatusType r;
357
358 ereport(LOG,
359 (errmsg("checking if postmaster is started"),
360 errdetail("trying to connect to postmaster on hostname:%s database:%s user:%s (retry %d times)",
361 backend->backend_hostname, dbname, pool_config->recovery_user, i)));
362
363 conn = PQsetdbLogin(backend->backend_hostname,
364 port_str,
365 NULL,
366 NULL,
367 dbname,
368 pool_config->recovery_user,
369 password ? password : NULL);
370
371 r = PQstatus(conn);
372 PQfinish(conn);
373 if (r == CONNECTION_OK)
374 {
375 if (password)
376 pfree(password);
377 return;
378 }
379 ereport(LOG,
380 (errmsg("checking if postmaster is started"),
381 errdetail("failed to connect to postmaster on hostname:%s database:%s user:%s",
382 backend->backend_hostname, dbname, pool_config->recovery_user)));
383
384 sleep(3);
385 } while (i++ < 3); /* XXX Hard coded retry (9 seconds) */
386
387 /*
388 * Retry with "template1" database.
389 */
390 dbname = "template1";
391 i = 0;
392
393 do
394 {
395 ConnStatusType r;
396
397 ereport(LOG,
398 (errmsg("checking if postmaster is started"),
399 errdetail("trying to connect to postmaster on hostname:%s database:%s user:%s (retry %d times)",
400 backend->backend_hostname, dbname, pool_config->recovery_user, i)));
401
402 conn = PQsetdbLogin(backend->backend_hostname,
403 port_str,
404 NULL,
405 NULL,
406 dbname,
407 pool_config->recovery_user,
408 password ? password : NULL);
409
410 r = PQstatus(conn);
411 PQfinish(conn);
412 if (r == CONNECTION_OK)
413 {
414 if (password)
415 pfree(password);
416 return;
417 }
418
419 ereport(LOG,
420 (errmsg("checking if postmaster is started"),
421 errdetail("failed to connect to postmaster on hostname:%s database:%s user:%s",
422 backend->backend_hostname, dbname, pool_config->recovery_user)));
423
424 if (WAIT_RETRY_COUNT != 0)
425 sleep(3);
426 } while (i++ < WAIT_RETRY_COUNT);
427
428 if (password)
429 pfree(password);
430
431 ereport(ERROR,
432 (errmsg("recovery is checking if postmaster is started"),
433 errdetail("postmaster on hostname:\"%s\" database:\"%s\" user:\"%s\" failed to start in %d second",
434 backend->backend_hostname, dbname, pool_config->recovery_user, pool_config->recovery_timeout)));
435 }
436
437 static PGconn *
connect_backend_libpq(BackendInfo * backend)438 connect_backend_libpq(BackendInfo * backend)
439 {
440 char port_str[16];
441 PGconn *conn;
442 char *password = get_pgpool_config_user_password(pool_config->recovery_user,
443 pool_config->recovery_password);
444
445 snprintf(port_str, sizeof(port_str),
446 "%d", backend->backend_port);
447 conn = PQsetdbLogin(backend->backend_hostname,
448 port_str,
449 NULL,
450 NULL,
451 "template1",
452 pool_config->recovery_user,
453 password ? password : "");
454
455 if (password)
456 pfree(password);
457
458 if (PQstatus(conn) != CONNECTION_OK)
459 {
460 PQfinish(conn);
461 return NULL;
462 }
463 return conn;
464 }
465
466 /*
467 * Wait all connections are closed.
468 */
469 int
wait_connection_closed(void)470 wait_connection_closed(void)
471 {
472 int i = 0;
473
474 do
475 {
476
477 if (Req_info->conn_counter == 0)
478 return 0;
479
480 if (WAIT_RETRY_COUNT != 0)
481 sleep(3);
482
483 } while (i++ < WAIT_RETRY_COUNT);
484 ereport(LOG,
485 (errmsg("wait_connection_closed: existing connections did not close in %d sec.", pool_config->recovery_timeout)));
486 return ensure_conn_counter_validity();
487 }
488
ensure_conn_counter_validity(void)489 int ensure_conn_counter_validity(void)
490 {
491 /*
492 * recovery_timeout was expired. Before returning with failure status,
493 * let's check if this is caused by the malformed conn_counter. If a child
494 * process abnormally exits (killed by SIGKILL or SEGFAULT, for example),
495 * then conn_counter is not decremented at process exit, thus it will
496 * never be returning to 0. This could be detected by checking if
497 * client_idle_limit_in_recovery is enabled and less value than
498 * recovery_timeout because all clients must be kicked out by the time
499 * when client_idle_limit_in_recovery is expired. If so, we should reset
500 * conn_counter to 0 also.
501 *
502 * See bug 431 for more info.
503 */
504 if (pool_config->client_idle_limit_in_recovery == -1 ||
505 (pool_config->client_idle_limit_in_recovery > 0 &&
506 pool_config->recovery_timeout >= pool_config->client_idle_limit_in_recovery))
507 {
508 ereport(LOG,
509 (errmsg("wait_connection_closed: mulformed conn_counter (%d) detected. reset it to 0",
510 Req_info->conn_counter)));
511 Req_info->conn_counter = 0;
512 return 0;
513 }
514 return 1;
515 }
516