1 /*
2 * repmgr-action-standby.c
3 *
4 * Implements standby actions for the repmgr command line utility
5 *
6 * Copyright (c) 2ndQuadrant, 2010-2020
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <sys/stat.h>
23
24 #include "repmgr.h"
25 #include "dirutil.h"
26 #include "compat.h"
27 #include "controldata.h"
28
29 #include "repmgr-client-global.h"
30 #include "repmgr-action-standby.h"
31
32
33 typedef struct TablespaceDataListCell
34 {
35 struct TablespaceDataListCell *next;
36 char *name;
37 char *oid;
38 char *location;
39 /* Optional pointer to a file containing a list of tablespace files to copy from Barman */
40 FILE *fptr;
41 } TablespaceDataListCell;
42
43 typedef struct TablespaceDataList
44 {
45 TablespaceDataListCell *head;
46 TablespaceDataListCell *tail;
47 } TablespaceDataList;
48
49
50 typedef struct
51 {
52 int reachable_sibling_node_count;
53 int reachable_sibling_nodes_with_slot_count;
54 int unreachable_sibling_node_count;
55 int min_required_wal_senders;
56 int min_required_free_slots;
57 } SiblingNodeStats;
58
59 #define T_SIBLING_NODES_STATS_INITIALIZER { \
60 0, \
61 0, \
62 0, \
63 0, \
64 0 \
65 }
66
67 static PGconn *primary_conn = NULL;
68 static PGconn *source_conn = NULL;
69
70 static char local_data_directory[MAXPGPATH] = "";
71
72 static bool upstream_conninfo_found = false;
73 static int upstream_node_id = UNKNOWN_NODE_ID;
74
75 static t_conninfo_param_list recovery_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
76 static char recovery_conninfo_str[MAXLEN] = "";
77 static char upstream_repluser[NAMEDATALEN] = "";
78 static char upstream_user[NAMEDATALEN] = "";
79
80 static int source_server_version_num = UNKNOWN_SERVER_VERSION_NUM;
81
82 static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
83
84 static standy_clone_mode mode = pg_basebackup;
85
86 /* used by barman mode */
87 static char local_repmgr_tmp_directory[MAXPGPATH] = "";
88 static char datadir_list_filename[MAXLEN] = "";
89 static char barman_command_buf[MAXLEN] = "";
90
91 /*
92 * To enable "standby clone" to run with lowest possible user
93 * privileges, we'll need to determine which actions need to
94 * be run and which of the available users, which will be one
95 * of the repmgr user, the replication user (if available) or
96 * the superuser (if available).
97 */
98 static t_user_type SettingsUser = REPMGR_USER;
99
100 static void _do_standby_promote_internal(PGconn *conn);
101 static void _do_create_replication_conf(void);
102
103 static void check_barman_config(void);
104 static void check_source_server(void);
105 static void check_source_server_via_barman(void);
106 static bool check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error);
107 static void check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn);
108 static void check_recovery_type(PGconn *conn);
109
110 static void initialise_direct_clone(t_node_info *local_node_record, t_node_info *upstream_node_record);
111 static int run_basebackup(t_node_info *node_record);
112 static int run_file_backup(t_node_info *node_record);
113
114 static void copy_configuration_files(bool delete_after_copy);
115
116 static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
117
118 static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
119 static int get_tablespace_data_barman(char *, TablespaceDataList *);
120 static char *make_barman_ssh_command(char *buf);
121
122 static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
123 static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
124
125 static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
126 static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
127 static bool check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
128
129 static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
130
131 static t_remote_error_type parse_remote_error(const char *error);
132 static CheckStatus parse_check_status(const char *status_str);
133
134 static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
135 static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold, t_remote_error_type *remote_error);
136 static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output);
137 static bool parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error);
138 static bool parse_replication_config_owner(const char *node_check_output);
139 static CheckStatus parse_db_connection(const char *db_connection);
140
141 /*
142 * STANDBY CLONE
143 *
144 * Event(s):
145 * - standby_clone
146 *
147 * Parameters:
148 * --upstream-conninfo
149 * --upstream-node-id
150 * --no-upstream-connection
151 * -F/--force
152 * --dry-run
153 * -c/--fast-checkpoint
154 * --copy-external-config-files
155 * -R/--remote-user
156 * --replication-user (only required if no upstream record)
157 * --without-barman
158 * --replication-conf-only (--recovery-conf-only)
159 * --verify-backup (PostgreSQL 13 and later)
160 */
161
162 void
do_standby_clone(void)163 do_standby_clone(void)
164 {
165 PQExpBufferData event_details;
166 int r = 0;
167
168 /* dummy node record */
169 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
170 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
171
172 bool local_data_directory_provided = false;
173
174 initialize_conninfo_params(&recovery_conninfo, false);
175
176 /*
177 * --replication-conf-only provided - we'll handle that separately
178 */
179 if (runtime_options.replication_conf_only == true)
180 {
181 return _do_create_replication_conf();
182 }
183
184 /*
185 * conninfo params for the actual upstream node (which might be different
186 * to the node we're cloning from) to write to recovery.conf
187 */
188
189 mode = get_standby_clone_mode();
190
191 /*
192 * Copy the provided data directory; if a configuration file was provided,
193 * use the (mandatory) value from that; if -D/--pgdata was provided, use
194 * that.
195 *
196 * Note that barman mode requires -D/--pgdata.
197 */
198
199 get_node_data_directory(local_data_directory);
200 if (local_data_directory[0] != '\0')
201 {
202 local_data_directory_provided = true;
203 log_notice(_("destination directory \"%s\" provided"),
204 local_data_directory);
205 }
206 else
207 {
208 /*
209 * If a configuration file is provided, repmgr will error out after
210 * parsing it if no data directory is provided; this check is for
211 * niche use-cases where no configuration file is provided.
212 */
213 log_error(_("no data directory provided"));
214 log_hint(_("use -D/--pgdata to explicitly specify a data directory"));
215 exit(ERR_BAD_CONFIG);
216 }
217
218
219 if (mode == barman)
220 {
221 /*
222 * Not currently possible to use --verify-backup with Barman
223 */
224 if (runtime_options.verify_backup == true)
225 {
226 log_error(_("--verify-backup option cannot be used when cloning from Barman backups"));
227 exit(ERR_BAD_CONFIG);
228 }
229
230 /*
231 * Sanity-check barman connection and installation;
232 * this will exit with ERR_BARMAN if problems found.
233 */
234 check_barman_config();
235 }
236
237 init_node_record(&local_node_record);
238 local_node_record.type = STANDBY;
239
240 /*
241 * Initialise list of conninfo parameters which will later be used to
242 * create the "primary_conninfo" recovery parameter.
243 *
244 * We'll initialise it with the host settings specified on the command
245 * line. As it's possible the standby will be cloned from a node different
246 * to its intended upstream, we'll later attempt to fetch the upstream
247 * node record and overwrite the values set here with those from the
248 * upstream node record (excluding that record's application_name)
249 */
250
251 copy_conninfo_params(&recovery_conninfo, &source_conninfo);
252
253
254 /* Set the default application name to this node's name */
255 if (config_file_options.node_id != UNKNOWN_NODE_ID)
256 {
257 char application_name[MAXLEN] = "";
258
259 param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
260
261 get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
262 if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, sizeof(config_file_options.node_name)) != 0)
263 {
264 log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
265 }
266 }
267 else
268 {
269 /*
270 * this will only happen in corner cases where the node is being
271 * cloned without a configuration file; fall back to "repmgr" if no
272 * application_name provided
273 */
274 char *application_name = param_get(&source_conninfo, "application_name");
275
276 if (application_name == NULL)
277 param_set(&recovery_conninfo, "application_name", "repmgr");
278 }
279
280
281
282 /*
283 * Do some sanity checks on the proposed data directory; if it exists:
284 * - check it's openable
285 * - check if there's an instance running
286 *
287 * We do this here so the check can be part of a --dry-run.
288 */
289 switch (check_dir(local_data_directory))
290 {
291 case DIR_ERROR:
292 log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
293 log_detail("%s", strerror(errno));
294 exit(ERR_BAD_CONFIG);
295 break;
296 case DIR_NOENT:
297 /*
298 * directory doesn't exist
299 * TODO: in --dry-run mode, attempt to create and delete?
300 */
301 break;
302 case DIR_EMPTY:
303 /* Present but empty */
304 break;
305 case DIR_NOT_EMPTY:
306 /* Present but not empty */
307 if (is_pg_dir(local_data_directory))
308 {
309 /* even -F/--force is not enough to overwrite an active directory... */
310 if (is_pg_running(local_data_directory))
311 {
312 log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
313 local_data_directory);
314 log_hint(_("ensure the target data directory does not contain a running PostgreSQL instance"));
315 exit(ERR_BAD_CONFIG);
316 }
317 }
318 break;
319 default:
320 break;
321 }
322
323 /*
324 * By default attempt to connect to the source node. This will fail if no
325 * connection is possible, unless in Barman mode, in which case we can
326 * fall back to connecting to the source node via Barman (if available).
327 */
328 if (runtime_options.no_upstream_connection == false)
329 {
330 RecordStatus record_status = RECORD_NOT_FOUND;
331
332 /*
333 * This connects to the source node and performs sanity checks, also
334 * sets "recovery_conninfo_str", "upstream_repluser", "upstream_user" and
335 * "upstream_node_id" and creates a connection handle in "source_conn".
336 *
337 * Will error out if source connection not possible and not in
338 * "barman" mode.
339 */
340 check_source_server();
341
342 if (runtime_options.verify_backup == true)
343 {
344 /*
345 * --verify-backup available for PostgreSQL 13 and later
346 */
347 if (PQserverVersion(source_conn) < 130000)
348 {
349 log_error(_("--verify-backup available for PostgreSQL 13 and later"));
350 exit(ERR_BAD_CONFIG);
351 }
352 }
353
354 /* attempt to retrieve upstream node record */
355 record_status = get_node_record(source_conn,
356 upstream_node_id,
357 &upstream_node_record);
358
359 if (record_status != RECORD_FOUND)
360 {
361 log_error(_("unable to retrieve record for upstream node %i"),
362 upstream_node_id);
363 exit(ERR_BAD_CONFIG);
364 }
365
366 }
367 else
368 {
369 upstream_node_id = runtime_options.upstream_node_id;
370 }
371
372 /*
373 * if --upstream-conninfo was supplied, use that (will overwrite value set
374 * by check_source_server(), but that's OK)
375 */
376 if (runtime_options.upstream_conninfo[0] != '\0')
377 {
378 strncpy(recovery_conninfo_str, runtime_options.upstream_conninfo, MAXLEN);
379 upstream_conninfo_found = true;
380 }
381 else if (mode == barman && PQstatus(source_conn) != CONNECTION_OK)
382 {
383 /*
384 * Here we don't have a connection to the upstream node (either
385 * because --no-upstream-connection was supplied, or
386 * check_source_server() was unable to make a connection, and
387 * --upstream-conninfo wasn't supplied.
388 *
389 * As we're executing in Barman mode we can try and connect via the
390 * Barman server to extract the upstream node's conninfo string.
391 *
392 * To do this we need to extract Barman's conninfo string, replace the
393 * database name with the repmgr one (they could well be different)
394 * and remotely execute psql.
395 *
396 * This attempts to set "recovery_conninfo_str".
397 */
398 check_source_server_via_barman();
399 }
400
401 if (recovery_conninfo_str[0] == '\0')
402 {
403 log_error(_("unable to determine a connection string to use as \"primary_conninfo\""));
404 log_hint(_("use \"--upstream-conninfo\" to explicitly provide a value for \"primary_conninfo\""));
405 if (PQstatus(source_conn) == CONNECTION_OK)
406 PQfinish(source_conn);
407 exit(ERR_BAD_CONFIG);
408 }
409
410
411 if (upstream_conninfo_found == true)
412 {
413 /*
414 * parse returned upstream conninfo string to recovery
415 * primary_conninfo params
416 */
417 char *errmsg = NULL;
418 bool parse_success = false;
419
420 log_verbose(LOG_DEBUG, "parsing upstream conninfo string \"%s\"", recovery_conninfo_str);
421
422 /*
423 * parse_conninfo_string() here will remove the upstream's
424 * `application_name`, if set
425 */
426
427 parse_success = parse_conninfo_string(recovery_conninfo_str, &recovery_conninfo, &errmsg, true);
428
429 if (parse_success == false)
430 {
431 log_error(_("unable to parse conninfo string \"%s\" for upstream node"),
432 recovery_conninfo_str);
433 log_detail("%s", errmsg);
434 if (PQstatus(source_conn) == CONNECTION_OK)
435 PQfinish(source_conn);
436 exit(ERR_BAD_CONFIG);
437 }
438
439 if (upstream_repluser[0] != '\0')
440 {
441 /* Write the replication user from the node's upstream record */
442 param_set(&recovery_conninfo, "user", upstream_repluser);
443 }
444 }
445 else
446 {
447 /*
448 * If no upstream node record found, we'll abort with an error here,
449 * unless -F/--force is used, in which case we'll use the parameters
450 * provided on the command line (and assume the user knows what
451 * they're doing).
452 */
453 if (upstream_node_id == UNKNOWN_NODE_ID)
454 {
455 log_error(_("unable to determine upstream node"));
456 if (PQstatus(source_conn) == CONNECTION_OK)
457 PQfinish(source_conn);
458 exit(ERR_BAD_CONFIG);
459 }
460
461 if (!runtime_options.force)
462 {
463 log_error(_("no record found for upstream node (upstream_node_id: %i)"),
464 upstream_node_id);
465 log_hint(_("use -F/--force to create \"primary_conninfo\" based on command-line parameters"));
466
467 if (PQstatus(source_conn) == CONNECTION_OK)
468 PQfinish(source_conn);
469 exit(ERR_BAD_CONFIG);
470 }
471 }
472
473 /*
474 * If copying of external configuration files requested, and any are
475 * detected, perform sanity checks
476 */
477 if (PQstatus(source_conn) == CONNECTION_OK && runtime_options.copy_external_config_files == true)
478 {
479 PGconn *superuser_conn = NULL;
480 PGconn *privileged_conn = NULL;
481 bool external_config_files = false;
482 int i = 0;
483
484 /*
485 * Obtain configuration file locations
486 *
487 * We'll check to see whether the configuration files are in the data
488 * directory - if not we'll have to copy them via SSH, if copying
489 * requested.
490 *
491 * This will require superuser permissions, so we'll attempt to
492 * connect as -S/--superuser (if provided), otherwise check the
493 * current connection user has superuser rights.
494 *
495 * XXX: if configuration files are symlinks to targets outside the
496 * data directory, they won't be copied by pg_basebackup, but we can't
497 * tell this from the below query; we'll probably need to add a check
498 * for their presence and if missing force copy by SSH
499 */
500
501 if (SettingsUser == REPMGR_USER)
502 {
503 privileged_conn = source_conn;
504 }
505 else
506 {
507 get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
508 }
509
510 if (get_configuration_file_locations(privileged_conn, &config_files) == false)
511 {
512 log_notice(_("unable to proceed without establishing configuration file locations"));
513 PQfinish(source_conn);
514
515 if (superuser_conn != NULL)
516 PQfinish(superuser_conn);
517
518 exit(ERR_BAD_CONFIG);
519 }
520
521 /* check if any files actually outside the data directory */
522 for (i = 0; i < config_files.entries; i++)
523 {
524 t_configfile_info *file = config_files.files[i];
525
526 if (file->in_data_directory == false)
527 {
528 external_config_files = true;
529 break;
530 }
531 }
532
533 if (external_config_files == true)
534 {
535 int r;
536 PQExpBufferData msg;
537
538 initPQExpBuffer(&msg);
539
540 appendPQExpBuffer(&msg,
541 _("external configuration files detected, checking SSH connection to host \"%s\""),
542 runtime_options.host);
543
544 if (runtime_options.dry_run == true)
545 {
546 log_notice("%s", msg.data);
547 }
548 else
549 {
550 log_verbose(LOG_INFO, "%s", msg.data);
551 }
552
553 termPQExpBuffer(&msg);
554
555 r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
556
557 if (r != 0)
558 {
559 log_error(_("remote host \"%s\" is not reachable via SSH - unable to copy external configuration files"),
560 runtime_options.host);
561 if (superuser_conn != NULL)
562 PQfinish(superuser_conn);
563 PQfinish(source_conn);
564 exit(ERR_BAD_CONFIG);
565 }
566
567 initPQExpBuffer(&msg);
568
569 appendPQExpBuffer(&msg,
570 _("SSH connection to host \"%s\" succeeded"),
571 runtime_options.host);
572
573 if (runtime_options.dry_run == true)
574 {
575 log_info("%s", msg.data);
576 }
577 else
578 {
579 log_verbose(LOG_INFO, "%s", msg.data);
580 }
581
582 termPQExpBuffer(&msg);
583
584
585 /*
586 * Here we'll attempt an initial test copy of the detected external
587 * files, to detect any issues before we run the base backup.
588 *
589 * Note this will exit with an error, unless -F/--force supplied.
590 *
591 * We don't do this during a --dry-run as it may introduce unexpected changes
592 * on the local node; during an actual clone operation, any problems with
593 * copying files will be detected early and the operation aborted before
594 * the actual database cloning commences.
595 *
596 * TODO: put the files in a temporary directory and move to their final
597 * destination once the database has been cloned.
598 */
599
600 if (runtime_options.dry_run == false)
601 {
602 if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
603 {
604 /*
605 * Files will be placed in the same path as on the source server;
606 * don't delete after copying.
607 */
608 copy_configuration_files(false);
609
610 }
611 else
612 {
613 /*
614 * Files will be placed in the data directory - delete after copying.
615 * They'll be copied again later; see TODO above.
616 */
617 copy_configuration_files(true);
618 }
619 }
620 }
621
622
623 if (superuser_conn != NULL)
624 PQfinish(superuser_conn);
625 }
626
627
628 if (runtime_options.dry_run == true)
629 {
630 /*
631 * If replication slots in use, sanity-check whether we can create them
632 * with the available user permissions.
633 */
634 if (config_file_options.use_replication_slots == true && PQstatus(source_conn) == CONNECTION_OK)
635 {
636 PQExpBufferData msg;
637 bool success = true;
638
639 initPQExpBuffer(&msg);
640
641 /*
642 * "create_replication_slot()" knows about --dry-run mode and
643 * will perform checks but not actually create the slot.
644 */
645 success = create_replication_slot(source_conn,
646 local_node_record.slot_name,
647 &upstream_node_record,
648 &msg);
649 if (success == false)
650 {
651 log_error(_("prerequisites not met for creating a replication slot on upstream node %i"),
652 upstream_node_record.node_id);
653 termPQExpBuffer(&msg);
654 exit(ERR_BAD_CONFIG);
655 }
656 termPQExpBuffer(&msg);
657 }
658
659 if (upstream_node_id != UNKNOWN_NODE_ID)
660 {
661 log_notice(_("standby will attach to upstream node %i"), upstream_node_id);
662 }
663 else
664 {
665 log_warning(_("unable to determine a valid upstream node id"));
666 }
667
668 if (mode == pg_basebackup && runtime_options.fast_checkpoint == false)
669 {
670 log_hint(_("consider using the -c/--fast-checkpoint option"));
671 }
672
673 PQfinish(source_conn);
674
675 log_info(_("all prerequisites for \"standby clone\" are met"));
676
677 exit(SUCCESS);
678 }
679
680 if (mode != barman)
681 {
682 initialise_direct_clone(&local_node_record, &upstream_node_record);
683 }
684
685 switch (mode)
686 {
687 case pg_basebackup:
688 log_notice(_("starting backup (using pg_basebackup)..."));
689 break;
690 case barman:
691 log_notice(_("retrieving backup from Barman..."));
692 break;
693 default:
694 /* should never reach here */
695 log_error(_("unknown clone mode"));
696 }
697
698 if (mode == pg_basebackup)
699 {
700 if (runtime_options.fast_checkpoint == false)
701 {
702 log_hint(_("this may take some time; consider using the -c/--fast-checkpoint option"));
703 }
704 }
705
706 switch (mode)
707 {
708 case pg_basebackup:
709 r = run_basebackup(&local_node_record);
710 break;
711 case barman:
712 r = run_file_backup(&local_node_record);
713 break;
714 default:
715 /* should never reach here */
716 log_error(_("unknown clone mode"));
717 }
718
719 /* If the backup failed then exit */
720 if (r != SUCCESS)
721 {
722 /* If a replication slot was previously created, drop it */
723 if (config_file_options.use_replication_slots == true)
724 {
725 /*
726 * In the case where a standby is being cloned from a node other than its
727 * intended upstream, We can't be sure of the source node's node_id. This
728 * is only required by "drop_replication_slot_if_exists()" to determine
729 * from the node's record whether it has a different replication user, and
730 * as in this case that would need to be supplied via "--replication-user"
731 * it's not a problem.
732 */
733 drop_replication_slot_if_exists(source_conn, UNKNOWN_NODE_ID, local_node_record.slot_name);
734 }
735
736 log_error(_("unable to take a base backup of the primary server"));
737 log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
738 local_data_directory);
739
740 PQfinish(source_conn);
741 exit(r);
742 }
743
744 /*
745 * Run pg_verifybackup here if requested, before any alterations are made
746 * to the data directory.
747 */
748 if (mode == pg_basebackup && runtime_options.verify_backup == true)
749 {
750 PQExpBufferData command;
751 int r;
752 struct stat st;
753
754 initPQExpBuffer(&command);
755
756 make_pg_path(&command, "pg_verifybackup");
757
758 /* check command actually exists */
759 if (stat(command.data, &st) != 0)
760 {
761 log_error(_("unable to find expected binary \"%s\""), command.data);
762 log_detail("%s", strerror(errno));
763 exit(ERR_BAD_CONFIG);
764 }
765
766 appendPQExpBufferStr(&command, " ");
767
768 /* Somewhat inconsistent, but pg_verifybackup doesn't accept a -D option */
769 appendShellString(&command,
770 local_data_directory);
771
772 log_debug("executing:\n %s", command.data);
773
774 r = system(command.data);
775 termPQExpBuffer(&command);
776
777 if (r != 0)
778 {
779 log_error(_("unable to verify backup"));
780 exit(ERR_BAD_BASEBACKUP);
781 }
782
783 log_verbose(LOG_INFO, _("backup successfully verified"));
784
785 }
786
787
788 /*
789 * If `--copy-external-config-files` was provided, copy any configuration
790 * files detected to the appropriate location. Any errors encountered will
791 * not be treated as fatal.
792 *
793 * This won't run in Barman mode as "config_files" is only populated in
794 * "initialise_direct_clone()", which isn't called in Barman mode.
795 */
796 if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
797 {
798 /*
799 * If "--copy-external-config-files=samepath" was used, the files will already
800 * have been copied.
801 */
802 if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
803 copy_configuration_files(false);
804 }
805
806 /* Write the recovery.conf file */
807
808 if (create_recovery_file(&local_node_record,
809 &recovery_conninfo,
810 source_server_version_num,
811 local_data_directory,
812 true) == false)
813 {
814 /* create_recovery_file() will log an error */
815 if (source_server_version_num >= 120000)
816 {
817 log_notice(_("unable to write replication configuration; see preceding error messages"));
818 }
819 else
820 {
821 log_notice(_("unable to create recovery.conf; see preceding error messages"));
822 }
823 log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
824 local_data_directory);
825
826 PQfinish(source_conn);
827 exit(ERR_BAD_CONFIG);
828 }
829
830 switch (mode)
831 {
832 case pg_basebackup:
833 log_notice(_("standby clone (using pg_basebackup) complete"));
834 break;
835
836 case barman:
837 log_notice(_("standby clone (from Barman) complete"));
838 break;
839 }
840
841 /*
842 * TODO: It might be nice to provide an option to have repmgr start the
843 * PostgreSQL server automatically
844 */
845
846 log_notice(_("you can now start your PostgreSQL server"));
847
848 if (config_file_options.service_start_command[0] != '\0')
849 {
850 log_hint(_("for example: %s"),
851 config_file_options.service_start_command);
852 }
853 else if (local_data_directory_provided)
854 {
855 log_hint(_("for example: pg_ctl -D %s start"),
856 local_data_directory);
857 }
858 else
859 {
860 log_hint(_("for example: /etc/init.d/postgresql start"));
861 }
862
863 /*
864 * XXX forgetting to (re) register the standby is a frequent cause of
865 * error; we should consider having repmgr automatically register the
866 * standby, either by default with an option "--no-register", or an option
867 * "--register".
868 *
869 * Note that "repmgr standby register" requires the standby to be running
870 * - if not, and we just update the node record, we'd have an incorrect
871 * representation of the replication cluster. Best combined with an
872 * automatic start of the server (see note above)
873 */
874
875 /*
876 * Check for an existing node record, and output the appropriate command
877 * for registering or re-registering.
878 */
879 {
880 t_node_info node_record = T_NODE_INFO_INITIALIZER;
881 RecordStatus record_status = RECORD_NOT_FOUND;
882
883 record_status = get_node_record(primary_conn,
884 config_file_options.node_id,
885 &node_record);
886
887 if (record_status == RECORD_FOUND)
888 {
889 log_hint(_("after starting the server, you need to re-register this standby with \"repmgr standby register --force\" to update the existing node record"));
890 }
891 else
892 {
893 log_hint(_("after starting the server, you need to register this standby with \"repmgr standby register\""));
894
895 }
896 }
897
898
899 /* Log the event */
900
901 initPQExpBuffer(&event_details);
902
903 /* Add details about relevant runtime options used */
904 appendPQExpBuffer(&event_details,
905 _("cloned from host \"%s\", port %s"),
906 runtime_options.host,
907 runtime_options.port);
908
909 appendPQExpBufferStr(&event_details,
910 _("; backup method: "));
911
912 switch (mode)
913 {
914 case pg_basebackup:
915 appendPQExpBufferStr(&event_details, "pg_basebackup");
916 break;
917 case barman:
918 appendPQExpBufferStr(&event_details, "barman");
919 break;
920 }
921
922 appendPQExpBuffer(&event_details,
923 _("; --force: %s"),
924 runtime_options.force ? "Y" : "N");
925
926 create_event_notification(primary_conn,
927 &config_file_options,
928 config_file_options.node_id,
929 "standby_clone",
930 true,
931 event_details.data);
932
933 if (primary_conn != source_conn && PQstatus(primary_conn) == CONNECTION_OK)
934 PQfinish(primary_conn);
935
936 if (PQstatus(source_conn) == CONNECTION_OK)
937 PQfinish(source_conn);
938
939 exit(r);
940 }
941
942
943 void
check_barman_config(void)944 check_barman_config(void)
945 {
946 PQExpBufferData command;
947 bool command_ok = false;
948
949 /*
950 * Check that there is at least one valid backup
951 */
952
953 log_info(_("connecting to Barman server to verify backup for \"%s\""), config_file_options.barman_server);
954
955 initPQExpBuffer(&command);
956
957 appendPQExpBuffer(&command, "%s show-backup %s latest > /dev/null",
958 make_barman_ssh_command(barman_command_buf),
959 config_file_options.barman_server);
960
961 command_ok = local_command(command.data, NULL);
962
963 if (command_ok == false)
964 {
965 log_error(_("no valid backup for server \"%s\" was found in the Barman catalogue"),
966 config_file_options.barman_server);
967 log_detail(_("command executed was:\n %s"), command.data),
968 log_hint(_("refer to the Barman documentation for more information"));
969
970 termPQExpBuffer(&command);
971 exit(ERR_BARMAN);
972 }
973 else if (runtime_options.dry_run == true)
974 {
975 log_info(_("valid backup for server \"%s\" found in the Barman catalogue"),
976 config_file_options.barman_server);
977 }
978
979 termPQExpBuffer(&command);
980
981 /*
982 * Attempt to create data directory (unless --dry-run specified,
983 * in which case do nothing; warnings will be emitted elsewhere about
984 * any issues with the data directory)
985 */
986 if (runtime_options.dry_run == false)
987 {
988 if (!create_pg_dir(local_data_directory, runtime_options.force))
989 {
990 log_error(_("unable to use directory %s"),
991 local_data_directory);
992 log_hint(_("use -F/--force option to force this directory to be overwritten"));
993 exit(ERR_BAD_CONFIG);
994 }
995
996 /*
997 * Create the local repmgr subdirectory
998 */
999
1000 maxlen_snprintf(local_repmgr_tmp_directory,
1001 "%s/repmgr", local_data_directory);
1002
1003 maxlen_snprintf(datadir_list_filename,
1004 "%s/data.txt", local_repmgr_tmp_directory);
1005
1006 if (!create_pg_dir(local_repmgr_tmp_directory, runtime_options.force))
1007 {
1008 log_error(_("unable to create directory \"%s\""),
1009 local_repmgr_tmp_directory);
1010
1011 exit(ERR_BAD_CONFIG);
1012 }
1013 }
1014
1015 /*
1016 * Fetch server parameters from Barman
1017 */
1018 log_info(_("connecting to Barman server to fetch server parameters"));
1019
1020 initPQExpBuffer(&command);
1021
1022 if (runtime_options.dry_run == true)
1023 {
1024 appendPQExpBuffer(&command, "%s show-server %s > /dev/null",
1025 make_barman_ssh_command(barman_command_buf),
1026 config_file_options.barman_server);
1027 }
1028 else
1029 {
1030 appendPQExpBuffer(&command, "%s show-server %s > %s/show-server.txt",
1031 make_barman_ssh_command(barman_command_buf),
1032 config_file_options.barman_server,
1033 local_repmgr_tmp_directory);
1034 }
1035
1036 command_ok = local_command(command.data, NULL);
1037
1038 if (command_ok == false)
1039 {
1040 log_error(_("unable to fetch server parameters from Barman server"));
1041 log_detail(_("command executed was:\n %s"), command.data),
1042 termPQExpBuffer(&command);
1043 exit(ERR_BARMAN);
1044 }
1045 else if (runtime_options.dry_run == true)
1046 {
1047 log_info(_("server parameters were successfully fetched from Barman server"));
1048 }
1049
1050 termPQExpBuffer(&command);
1051 }
1052
1053
1054 /*
1055 * _do_create_replication_conf()
1056 *
1057 * Create replication configuration for a previously cloned instance.
1058 *
1059 * Prerequisites:
1060 *
1061 * - data directory must be provided, either explicitly or via
1062 * repmgr.conf
1063 * - the instance should not be running
1064 * - an existing "recovery.conf" file can only be overwritten with
1065 * -F/--force (Pg11 and earlier)
1066 * - connection parameters for an existing, running node must be provided
1067 * - --upstream-node-id, if provided, will be "primary_conninfo",
1068 * otherwise primary node id; node must exist; unless -F/--force
1069 * provided, must be active and connection possible
1070 * - if replication slots in use, create (respect --dry-run)
1071 *
1072 * not compatible with --no-upstream-connection
1073 *
1074 */
1075
1076 static void
_do_create_replication_conf(void)1077 _do_create_replication_conf(void)
1078 {
1079 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
1080 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
1081
1082 RecordStatus record_status = RECORD_NOT_FOUND;
1083 char recovery_file_path[MAXPGPATH + sizeof(RECOVERY_COMMAND_FILE)] = "";
1084 struct stat st;
1085 bool node_is_running = false;
1086 bool slot_creation_required = false;
1087 PGconn *upstream_conn = NULL;
1088 PGconn *upstream_repl_conn = NULL;
1089
1090 get_node_data_directory(local_data_directory);
1091
1092 if (local_data_directory[0] == '\0')
1093 {
1094 log_error(_("no data directory provided"));
1095 log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
1096 exit(ERR_BAD_CONFIG);
1097 }
1098
1099
1100 /* check connection */
1101 source_conn = establish_db_connection_by_params(&source_conninfo, true);
1102
1103 /* Verify that source is a supported server version */
1104 (void) check_server_version(source_conn, "source node", true, NULL);
1105
1106 /*
1107 * Do some sanity checks on the data directory to make sure
1108 * it contains a valid but dormant instance
1109 */
1110 switch (check_dir(local_data_directory))
1111 {
1112 case DIR_ERROR:
1113 log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
1114 log_detail("%s", strerror(errno));
1115 PQfinish(source_conn);
1116 exit(ERR_BAD_CONFIG);
1117 break;
1118 case DIR_NOENT:
1119 log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
1120 PQfinish(source_conn);
1121 exit(ERR_BAD_CONFIG);
1122 break;
1123 case DIR_EMPTY:
1124 log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
1125 PQfinish(source_conn);
1126 exit(ERR_BAD_CONFIG);
1127 break;
1128 case DIR_NOT_EMPTY:
1129 /* Present but not empty */
1130 if (!is_pg_dir(local_data_directory))
1131 {
1132 log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
1133 PQfinish(source_conn);
1134 exit(ERR_BAD_CONFIG);
1135 }
1136
1137 if (is_pg_running(local_data_directory))
1138 {
1139 if (runtime_options.force == false)
1140 {
1141 log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
1142 local_data_directory);
1143
1144 if (PQserverVersion(source_conn) >= 120000)
1145 {
1146 log_hint(_("use -F/--force to create replication configuration anyway"));
1147 }
1148 else
1149 {
1150 log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
1151 }
1152
1153 exit(ERR_BAD_CONFIG);
1154 }
1155
1156 node_is_running = true;
1157
1158 if (runtime_options.dry_run == true)
1159 {
1160 if (PQserverVersion(source_conn) >= 120000)
1161 {
1162 log_warning(_("replication configuration would be created in an active data directory"));
1163 }
1164 else
1165 {
1166 log_warning(_("\"recovery.conf\" would be created in an active data directory"));
1167 }
1168 }
1169 else
1170 {
1171 if (PQserverVersion(source_conn) >= 120000)
1172 {
1173 log_warning(_("creating replication configuration in an active data directory"));
1174 }
1175 else
1176 {
1177 log_warning(_("creating \"recovery.conf\" in an active data directory"));
1178 }
1179 }
1180 }
1181 break;
1182 default:
1183 break;
1184 }
1185
1186
1187 /* determine node for primary_conninfo */
1188
1189 if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
1190 {
1191 upstream_node_id = runtime_options.upstream_node_id;
1192 }
1193 else
1194 {
1195 /* if --upstream-node-id not specifically supplied, get primary node id */
1196 upstream_node_id = get_primary_node_id(source_conn);
1197
1198 if (upstream_node_id == NODE_NOT_FOUND)
1199 {
1200 log_error(_("unable to determine primary node for this replication cluster"));
1201 PQfinish(source_conn);
1202 exit(ERR_BAD_CONFIG);
1203 }
1204
1205 log_debug("primary node determined as: %i", upstream_node_id);
1206 }
1207
1208 /* attempt to retrieve upstream node record */
1209 record_status = get_node_record(source_conn,
1210 upstream_node_id,
1211 &upstream_node_record);
1212
1213 if (record_status != RECORD_FOUND)
1214 {
1215 log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
1216
1217 if (record_status == RECORD_ERROR)
1218 {
1219 log_detail("%s", PQerrorMessage(source_conn));
1220 }
1221
1222 exit(ERR_BAD_CONFIG);
1223 }
1224
1225 /* attempt to retrieve local node record */
1226 record_status = get_node_record(source_conn,
1227 config_file_options.node_id,
1228 &local_node_record);
1229
1230 if (record_status != RECORD_FOUND)
1231 {
1232 log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
1233
1234 if (record_status == RECORD_ERROR)
1235 {
1236 log_detail("%s", PQerrorMessage(source_conn));
1237 }
1238 else
1239 {
1240 log_hint(_("standby must be registered before replication can be configured"));
1241 }
1242
1243 exit(ERR_BAD_CONFIG);
1244 }
1245
1246 PQfinish(source_conn);
1247
1248
1249 /* connect to upstream (which could be different to source) */
1250
1251 upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
1252 if (PQstatus(upstream_conn) != CONNECTION_OK)
1253 {
1254 log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
1255 upstream_node_record.node_name,
1256 upstream_node_id);
1257 exit(ERR_BAD_CONFIG);
1258 }
1259
1260 /* Set the application name to this node's name */
1261 if (config_file_options.node_name[0] != '\0')
1262 param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
1263
1264 /* Set the replication user from the primary node record */
1265 param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
1266
1267 initialize_conninfo_params(&recovery_conninfo, false);
1268
1269 /* We ignore any application_name set in the primary's conninfo */
1270 parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
1271
1272 /* check that a replication connection can be made (--force = override) */
1273 upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
1274
1275 if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
1276 {
1277 if (runtime_options.force == false)
1278 {
1279 log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
1280 upstream_node_record.node_name,
1281 upstream_node_id);
1282 PQfinish(upstream_conn);
1283 exit(ERR_BAD_CONFIG);
1284 }
1285 }
1286
1287 /* if replication slots are in use, perform some checks */
1288 if (config_file_options.use_replication_slots == true)
1289 {
1290 PQExpBufferData msg;
1291 t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
1292
1293 record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
1294
1295 /* check if replication slot exists*/
1296 if (record_status == RECORD_FOUND)
1297 {
1298 if (slot_info.active == true)
1299 {
1300 initPQExpBuffer(&msg);
1301
1302 appendPQExpBuffer(&msg,
1303 _("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
1304 local_node_record.slot_name,
1305 upstream_node_record.node_name,
1306 upstream_node_id);
1307
1308 if (runtime_options.force == false && runtime_options.dry_run == false)
1309 {
1310 log_error("%s", msg.data);
1311 log_hint(_("use -F/--force to continue anyway"));
1312 termPQExpBuffer(&msg);
1313 PQfinish(upstream_conn);
1314 exit(ERR_BAD_CONFIG);
1315 }
1316
1317 log_warning("%s", msg.data);
1318 termPQExpBuffer(&msg);
1319 }
1320 else
1321 {
1322 log_info(_("an inactive replication slot for this node exists on the upstream node"));
1323 }
1324 }
1325 /* if not, if check one can and should be created */
1326 else
1327 {
1328 get_node_replication_stats(upstream_conn, &upstream_node_record);
1329
1330 if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
1331 {
1332 slot_creation_required = true;
1333 }
1334 else
1335 {
1336 initPQExpBuffer(&msg);
1337
1338 appendPQExpBuffer(&msg,
1339 _("insufficient free replication slots on upstream node \"%s\" (ID: %i)"),
1340 upstream_node_record.node_name,
1341 upstream_node_id);
1342
1343 if (runtime_options.force == false && runtime_options.dry_run == false)
1344 {
1345 log_error("%s", msg.data);
1346 log_hint(_("use -F/--force to continue anyway"));
1347 termPQExpBuffer(&msg);
1348 PQfinish(upstream_conn);
1349 exit(ERR_BAD_CONFIG);
1350 }
1351
1352 log_warning("%s", msg.data);
1353 termPQExpBuffer(&msg);
1354 }
1355 }
1356 }
1357
1358 /* check if recovery.conf exists (Pg11 and earlier only) */
1359 if (PQserverVersion(upstream_conn) < 120000)
1360 {
1361 snprintf(recovery_file_path, sizeof(recovery_file_path),
1362 "%s/%s",
1363 local_data_directory,
1364 RECOVERY_COMMAND_FILE);
1365
1366 if (stat(recovery_file_path, &st) == -1)
1367 {
1368 if (errno != ENOENT)
1369 {
1370 log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
1371 local_data_directory);
1372 log_detail("%s", strerror(errno));
1373 exit(ERR_BAD_CONFIG);
1374 }
1375 }
1376 else
1377 {
1378 if (runtime_options.force == false)
1379 {
1380 log_error(_("\"recovery.conf\" already exists in \"%s\""),
1381 local_data_directory);
1382 log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
1383 exit(ERR_BAD_CONFIG);
1384 }
1385
1386 if (runtime_options.dry_run == true)
1387 {
1388 log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
1389 }
1390 else
1391 {
1392 log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
1393 }
1394 }
1395 }
1396
1397 if (runtime_options.dry_run == true)
1398 {
1399 char recovery_conf_contents[MAXLEN] = "";
1400 create_recovery_file(&local_node_record,
1401 &recovery_conninfo,
1402 PQserverVersion(upstream_conn),
1403 recovery_conf_contents,
1404 false);
1405
1406 if (PQserverVersion(upstream_conn) >= 120000)
1407 {
1408 log_info(_("following items would be added to \"postgresql.auto.conf\" in \"%s\""), local_data_directory);
1409 }
1410 else
1411 {
1412 log_info(_("would create \"recovery.conf\" file in \"%s\""), local_data_directory);
1413 }
1414
1415 log_detail(_("\n%s"), recovery_conf_contents);
1416 }
1417 else
1418 {
1419 if (!create_recovery_file(&local_node_record,
1420 &recovery_conninfo,
1421 PQserverVersion(upstream_conn),
1422 local_data_directory,
1423 true))
1424 {
1425 if (PQserverVersion(upstream_conn) >= 120000)
1426 {
1427 log_error(_("unable to write replication configuration to \"postgresql.auto.conf\""));
1428 }
1429 else
1430 {
1431 log_error(_("unable to create \"recovery.conf\""));
1432 }
1433 }
1434 else
1435 {
1436 if (PQserverVersion(upstream_conn) >= 120000)
1437 {
1438 log_notice(_("replication configuration written to \"postgresql.auto.conf\""));
1439 }
1440 else
1441 {
1442 log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
1443 }
1444
1445 if (node_is_running == true)
1446 {
1447 log_hint(_("node must be restarted for the new file to take effect"));
1448 }
1449 }
1450 }
1451
1452 /* Pg12 and later: add standby.signal, if not already there */
1453 if (PQserverVersion(upstream_conn) >= 120000)
1454 {
1455 if (runtime_options.dry_run == true)
1456 {
1457 log_info(_("would write \"standby.signal\" file"));
1458
1459 }
1460 else
1461 {
1462 if (write_standby_signal() == false)
1463 {
1464 log_error(_("unable to write \"standby.signal\" file"));
1465 }
1466 }
1467 }
1468
1469 /* add replication slot, if required */
1470 if (slot_creation_required == true)
1471 {
1472 PQExpBufferData msg;
1473 initPQExpBuffer(&msg);
1474
1475 if (runtime_options.dry_run == true)
1476 {
1477 /*
1478 * In --dry-run mode this will check availability
1479 * of a user who can create replication slots.
1480 */
1481 // XXX check return value
1482 create_replication_slot(upstream_conn,
1483 local_node_record.slot_name,
1484 NULL,
1485 &msg);
1486 log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
1487 local_node_record.slot_name,
1488 upstream_node_record.node_name,
1489 upstream_node_id);
1490 }
1491 else
1492 {
1493
1494 if (create_replication_slot(upstream_conn,
1495 local_node_record.slot_name,
1496 NULL,
1497 &msg) == false)
1498 {
1499 log_error("%s", msg.data);
1500 PQfinish(upstream_conn);
1501 termPQExpBuffer(&msg);
1502 exit(ERR_BAD_CONFIG);
1503 }
1504
1505
1506 log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
1507 local_node_record.slot_name,
1508 upstream_node_record.node_name,
1509 upstream_node_id);
1510 }
1511 termPQExpBuffer(&msg);
1512
1513 }
1514
1515
1516 PQfinish(upstream_conn);
1517
1518 return;
1519 }
1520
1521
1522 /*
1523 * do_standby_register()
1524 *
1525 * Event(s):
1526 * - standby_register
1527 * - standby_register_sync
1528 */
1529 /* XXX check --upstream-node-id works when re-registering */
1530
1531 void
do_standby_register(void)1532 do_standby_register(void)
1533 {
1534 PGconn *conn = NULL;
1535 PGconn *primary_conn = NULL;
1536
1537 bool record_created = false;
1538 t_node_info node_record = T_NODE_INFO_INITIALIZER;
1539 RecordStatus record_status = RECORD_NOT_FOUND;
1540
1541 PQExpBufferData details;
1542
1543 /* so we can pass info about the primary to event notification scripts */
1544 t_event_info event_info = T_EVENT_INFO_INITIALIZER;
1545 t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
1546 int primary_node_id = UNKNOWN_NODE_ID;
1547
1548 bool dry_run_ok = true;
1549
1550 log_info(_("connecting to local node \"%s\" (ID: %i)"),
1551 config_file_options.node_name,
1552 config_file_options.node_id);
1553
1554 conn = establish_db_connection_quiet(config_file_options.conninfo);
1555
1556 /*
1557 * If unable to connect, and --force not provided, wait up to --wait-start
1558 * seconds (default: 0) for the node to become reachable.
1559 *
1560 * Not that if --force provided, we don't wait for the node to start, as
1561 * the normal use case will be re-registering an existing node, or
1562 * registering an inactive/not-yet-extant one; we'll do the
1563 * error handling for those cases in the next code block
1564 */
1565 if (PQstatus(conn) != CONNECTION_OK && runtime_options.force == false)
1566 {
1567 bool conn_ok = false;
1568 int timer = 0;
1569
1570 for (;;)
1571 {
1572 if (timer == runtime_options.wait_start)
1573 break;
1574
1575 sleep(1);
1576
1577 log_verbose(LOG_INFO, _("%i of %i connection attempts"),
1578 timer + 1,
1579 runtime_options.wait_start);
1580
1581 conn = establish_db_connection_quiet(config_file_options.conninfo);
1582
1583 if (PQstatus(conn) == CONNECTION_OK)
1584 {
1585 conn_ok = true;
1586 break;
1587 }
1588
1589 timer++;
1590 }
1591
1592 if (conn_ok == true)
1593 {
1594 log_info(_("connected to local node \"%s\" (ID: %i) after %i seconds"),
1595 config_file_options.node_name,
1596 config_file_options.node_id,
1597 timer);
1598 }
1599 }
1600
1601 /*
1602 * If still unable to connect, continue only if -F/--force provided,
1603 * and primary connection parameters provided.
1604 */
1605 if (PQstatus(conn) != CONNECTION_OK)
1606 {
1607 if (runtime_options.force == false)
1608 {
1609 log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
1610 config_file_options.node_name,
1611 config_file_options.node_id);
1612 log_detail("\n%s", PQerrorMessage(conn));
1613 log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
1614
1615 exit(ERR_BAD_CONFIG);
1616 }
1617
1618 if (runtime_options.connection_param_provided == false)
1619 {
1620 log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
1621 config_file_options.node_name,
1622 config_file_options.node_id);
1623 log_hint(_("to register a standby which is not running, additionally provide the primary connection parameters"));
1624 exit(ERR_BAD_CONFIG);
1625 }
1626 }
1627 /* connection OK - check this is actually a standby */
1628 else
1629 {
1630 if (runtime_options.connection_param_provided)
1631 {
1632 log_warning(_("database connection parameters not required when the standby to be registered is running"));
1633 log_detail(_("repmgr uses the \"conninfo\" parameter in \"repmgr.conf\" to connect to the standby"));
1634 }
1635 check_recovery_type(conn);
1636 }
1637
1638 /* check if there is a primary in this cluster */
1639 log_info(_("connecting to primary database"));
1640
1641 /* Normal case - we can connect to the local node */
1642 if (PQstatus(conn) == CONNECTION_OK)
1643 {
1644 primary_conn = get_primary_connection(conn, &primary_node_id, NULL);
1645 }
1646
1647 /*
1648 * otherwise user is forcing a registration of a (potentially) inactive (or
1649 * not-yet-extant) node and must have supplied primary connection info
1650 */
1651 else
1652 {
1653 primary_conn = establish_db_connection_by_params(&source_conninfo, false);
1654 }
1655
1656 /*
1657 * no amount of --force will make it possible to register the standby
1658 * without a primary server to connect to
1659 */
1660 if (PQstatus(primary_conn) != CONNECTION_OK)
1661 {
1662 log_error(_("unable to connect to the primary database"));
1663 log_hint(_("a primary node must be configured before registering a standby node"));
1664 exit(ERR_BAD_CONFIG);
1665 }
1666
1667 /*
1668 * Populate "event_info" with info about the primary for event notifications
1669 */
1670 record_status = get_node_record(primary_conn,
1671 primary_node_id,
1672 &primary_node_record);
1673 event_info.node_id = primary_node_id;
1674 event_info.node_name = primary_node_record.node_name;
1675 event_info.conninfo_str = primary_node_record.conninfo;
1676
1677 /*
1678 * Verify that standby and primary are supported and compatible server
1679 * versions
1680 *
1681 * If the user is registering an inactive standby, we'll trust they know
1682 * what they're doing
1683 */
1684 if (PQstatus(conn) == CONNECTION_OK)
1685 {
1686 check_primary_standby_version_match(conn, primary_conn);
1687 }
1688
1689
1690 /*
1691 * Check that an active node with the same node_name doesn't exist already
1692 */
1693
1694 record_status = get_node_record_by_name(primary_conn,
1695 config_file_options.node_name,
1696 &node_record);
1697
1698 if (record_status == RECORD_FOUND)
1699 {
1700 if (node_record.active == true && node_record.node_id != config_file_options.node_id)
1701 {
1702 log_error(_("node %i exists already with node_name \"%s\""),
1703 node_record.node_id,
1704 config_file_options.node_name);
1705 PQfinish(primary_conn);
1706 if (PQstatus(conn) == CONNECTION_OK)
1707 PQfinish(conn);
1708 exit(ERR_BAD_CONFIG);
1709 }
1710 }
1711
1712 /* Check if node record exists */
1713
1714 record_status = get_node_record(primary_conn,
1715 config_file_options.node_id,
1716 &node_record);
1717
1718 if (record_status == RECORD_FOUND && !runtime_options.force)
1719 {
1720 log_error(_("node %i is already registered"),
1721 config_file_options.node_id);
1722 log_hint(_("use option -F/--force to overwrite an existing node record"));
1723 PQfinish(primary_conn);
1724 if (PQstatus(conn) == CONNECTION_OK)
1725 PQfinish(conn);
1726 exit(ERR_BAD_CONFIG);
1727 }
1728
1729 /*
1730 * If an upstream node is defined, check if that node exists and is active.
1731 *
1732 * If it doesn't exist, and --force set, create a minimal inactive record,
1733 * in the assumption that the user knows what they are doing (usually some kind
1734 * of provisioning where multiple servers are created in parallel) and will
1735 * create the active record later.
1736 */
1737 if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE)
1738 {
1739 RecordStatus upstream_record_status = RECORD_NOT_FOUND;
1740 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
1741
1742 if (runtime_options.upstream_node_id == config_file_options.node_id)
1743 {
1744 log_error(_("provided node ID for --upstream-node-id (%i) is the same as the configured local node ID (%i)"),
1745 runtime_options.upstream_node_id,
1746 config_file_options.node_id);
1747 PQfinish(primary_conn);
1748 if (PQstatus(conn) == CONNECTION_OK)
1749 PQfinish(conn);
1750 exit(ERR_BAD_CONFIG);
1751 }
1752
1753 upstream_record_status = get_node_record(primary_conn,
1754 runtime_options.upstream_node_id,
1755 &upstream_node_record);
1756
1757 /* create placeholder upstream record if -F/--force set */
1758 if (upstream_record_status != RECORD_FOUND)
1759 {
1760 t_node_info placeholder_upstream_node_record = T_NODE_INFO_INITIALIZER;
1761
1762 if (!runtime_options.force)
1763 {
1764 log_error(_("no record found for upstream node %i"),
1765 runtime_options.upstream_node_id);
1766 /* footgun alert - only do this if you know what you're doing */
1767 log_hint(_("use option -F/--force to create a dummy upstream record"));
1768 PQfinish(primary_conn);
1769 if (PQstatus(conn) == CONNECTION_OK)
1770 PQfinish(conn);
1771 exit(ERR_BAD_CONFIG);
1772 }
1773
1774 log_notice(_("creating placeholder record for upstream node %i"),
1775 runtime_options.upstream_node_id);
1776
1777 placeholder_upstream_node_record.node_id = runtime_options.upstream_node_id;
1778 placeholder_upstream_node_record.type = STANDBY;
1779 placeholder_upstream_node_record.upstream_node_id = NO_UPSTREAM_NODE;
1780 strncpy(placeholder_upstream_node_record.conninfo, runtime_options.upstream_conninfo, MAXLEN);
1781 placeholder_upstream_node_record.active = false;
1782
1783 record_created = create_node_record(primary_conn,
1784 "standby register",
1785 &placeholder_upstream_node_record);
1786
1787 /*
1788 * It's possible, in the kind of scenario this functionality is
1789 * intended to support, that there's a race condition where the
1790 * node's actual record gets inserted, causing the insert of the
1791 * placeholder record to fail. If this is the case, we don't worry
1792 * about this insert failing; if not we bail out.
1793 *
1794 * TODO: teach create_node_record() to use ON CONFLICT DO NOTHING
1795 * for 9.5 and later.
1796 */
1797 if (record_created == false)
1798 {
1799 upstream_record_status = get_node_record(primary_conn,
1800 runtime_options.upstream_node_id,
1801 &placeholder_upstream_node_record);
1802 if (upstream_record_status != RECORD_FOUND)
1803 {
1804 log_error(_("unable to create placeholder record for upstream node %i"),
1805 runtime_options.upstream_node_id);
1806 PQfinish(primary_conn);
1807 if (PQstatus(conn) == CONNECTION_OK)
1808 PQfinish(conn);
1809 exit(ERR_BAD_CONFIG);
1810 }
1811
1812 log_info(_("a record for upstream node %i was already created"),
1813 runtime_options.upstream_node_id);
1814 }
1815 }
1816 else if (node_record.active == false)
1817 {
1818 /*
1819 * upstream node is inactive and --force not supplied - refuse to
1820 * register
1821 */
1822 if (!runtime_options.force)
1823 {
1824 log_error(_("record for upstream node %i is marked as inactive"),
1825 runtime_options.upstream_node_id);
1826 log_hint(_("use option -F/--force to register a standby with an inactive upstream node"));
1827 PQfinish(primary_conn);
1828 if (PQstatus(conn) == CONNECTION_OK)
1829 PQfinish(conn);
1830 exit(ERR_BAD_CONFIG);
1831 }
1832
1833 /*
1834 * user is using the --force - notify about the potential footgun
1835 */
1836 log_notice(_("registering node %i with inactive upstream node %i"),
1837 config_file_options.node_id,
1838 runtime_options.upstream_node_id);
1839 }
1840 /* check upstream node is accessible and this node is connected */
1841 else
1842 {
1843 PGconn *upstream_conn = NULL;
1844
1845 upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
1846
1847 if (PQstatus(upstream_conn) != CONNECTION_OK)
1848 {
1849 if (!runtime_options.force)
1850 {
1851 log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
1852 upstream_node_record.node_name,
1853 upstream_node_record.node_id);
1854 log_hint(_("use -F/--force to continue anyway"));
1855 PQfinish(primary_conn);
1856 if (PQstatus(conn) == CONNECTION_OK)
1857 PQfinish(conn);
1858 exit(ERR_BAD_CONFIG);
1859 }
1860
1861 log_warning(_("unable to connect to upstream node \"%s\" (ID: %i) but continuing anyway"),
1862 upstream_node_record.node_name,
1863 upstream_node_record.node_id);
1864 }
1865 else
1866 {
1867 /* check our standby is connected */
1868 if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
1869 {
1870 log_verbose(LOG_INFO, _("local node is attached to specified upstream node %i"), runtime_options.upstream_node_id);
1871 }
1872 else
1873 {
1874 if (!runtime_options.force)
1875 {
1876 log_error(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
1877 upstream_node_record.node_name,
1878 upstream_node_record.node_id);
1879
1880 log_detail(_("no record for application name \"%s\" found in \"pg_stat_replication\""),
1881 config_file_options.node_name);
1882 log_hint(_("use -F/--force to continue anyway"));
1883 PQfinish(primary_conn);
1884 if (PQstatus(conn) == CONNECTION_OK)
1885 PQfinish(conn);
1886 exit(ERR_BAD_CONFIG);
1887 }
1888 log_warning(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
1889 upstream_node_record.node_name,
1890 upstream_node_record.node_id);
1891 }
1892 PQfinish(upstream_conn);
1893 }
1894 }
1895 }
1896
1897 /*
1898 * populate node record structure with current values set in repmgr.conf
1899 * and/or the command line (this will overwrite any existing values, which
1900 * is what we want when updating the record)
1901 */
1902 init_node_record(&node_record);
1903 node_record.type = STANDBY;
1904
1905 /* if --upstream-node-id not provided, set to primary node id */
1906 if (node_record.upstream_node_id == UNKNOWN_NODE_ID)
1907 {
1908 node_record.upstream_node_id = primary_node_id;
1909 }
1910
1911 /*
1912 * If --upstream-node-id not provided, we're defaulting to the primary as
1913 * upstream node. If local node is available, double-check that it's attached
1914 * to the primary, in case --upstream-node-id was an accidental ommission.
1915 *
1916 * Currently we'll only do this for newly registered nodes.
1917 */
1918 if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE && PQstatus(conn) == CONNECTION_OK)
1919 {
1920 /* only do this if record does not exist */
1921 if (record_status != RECORD_FOUND)
1922 {
1923 log_warning(_("--upstream-node-id not supplied, assuming upstream node is primary (node ID %i)"),
1924 primary_node_id);
1925
1926 /* check our standby is connected */
1927 if (is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
1928 {
1929 log_verbose(LOG_INFO, _("local node is attached to primary"));
1930 }
1931 else if (runtime_options.force == false)
1932 {
1933 log_error(_("local node not attached to primary node %i"), primary_node_id);
1934 /* TODO: 9.6 and later, display detail from pg_stat_wal_receiver */
1935 log_hint(_("specify the actual upstream node id with --upstream-node-id, or use -F/--force to continue anyway"));
1936
1937 if (runtime_options.dry_run == true)
1938 {
1939 dry_run_ok = false;
1940 }
1941 else
1942 {
1943 PQfinish(primary_conn);
1944 PQfinish(conn);
1945 exit(ERR_BAD_CONFIG);
1946 }
1947 }
1948 else
1949 {
1950 log_warning(_("local node not attached to primary node %i"), primary_node_id);
1951 log_notice(_("-F/--force supplied, continuing anyway"));
1952 }
1953 }
1954
1955 }
1956
1957 if (runtime_options.dry_run == true)
1958 {
1959 PQfinish(primary_conn);
1960 if (PQstatus(conn) == CONNECTION_OK)
1961 PQfinish(conn);
1962
1963 if (dry_run_ok == false)
1964 {
1965 log_warning(_("issue(s) encountered; see preceding log messages"));
1966 exit(ERR_BAD_CONFIG);
1967 }
1968
1969 log_info(_("all prerequisites for \"standby register\" are met"));
1970
1971 exit(SUCCESS);
1972 }
1973
1974 /*
1975 * node record exists - update it (at this point we have already
1976 * established that -F/--force is in use)
1977 */
1978 if (record_status == RECORD_FOUND)
1979 {
1980 record_created = update_node_record(primary_conn,
1981 "standby register",
1982 &node_record);
1983 }
1984 else
1985 {
1986 record_created = create_node_record(primary_conn,
1987 "standby register",
1988 &node_record);
1989 }
1990
1991 initPQExpBuffer(&details);
1992
1993 if (record_created == false)
1994 {
1995 appendPQExpBuffer(&details,
1996 _("standby registration failed; provided upstream node ID was %i"),
1997 node_record.upstream_node_id);
1998
1999 if (runtime_options.force == true)
2000 appendPQExpBufferStr(&details,
2001 _(" (-F/--force option was used)"));
2002
2003 create_event_notification_extended(
2004 primary_conn,
2005 &config_file_options,
2006 config_file_options.node_id,
2007 "standby_register",
2008 false,
2009 details.data,
2010 &event_info);
2011
2012 termPQExpBuffer(&details);
2013 PQfinish(primary_conn);
2014 primary_conn = NULL;
2015
2016 if (PQstatus(conn) == CONNECTION_OK)
2017 PQfinish(conn);
2018 exit(ERR_BAD_CONFIG);
2019 }
2020
2021 appendPQExpBuffer(&details,
2022 _("standby registration succeeded; upstream node ID is %i"),
2023 node_record.upstream_node_id);
2024
2025 if (runtime_options.force == true)
2026 appendPQExpBufferStr(&details,
2027 _(" (-F/--force option was used)"));
2028
2029
2030 /* Log the event */
2031 create_event_notification_extended(
2032 primary_conn,
2033 &config_file_options,
2034 config_file_options.node_id,
2035 "standby_register",
2036 true,
2037 details.data,
2038 &event_info);
2039
2040 termPQExpBuffer(&details);
2041
2042 /*
2043 * If --wait-sync option set, wait for the records to synchronise
2044 * (unless 0 seconds provided, which disables it, which is the same as
2045 * not providing the option). The default value is -1, which means
2046 * no timeout.
2047 */
2048
2049 if (PQstatus(conn) == CONNECTION_OK &&
2050 runtime_options.wait_register_sync == true &&
2051 runtime_options.wait_register_sync_seconds != 0)
2052 {
2053 bool sync_ok = false;
2054 int timer = 0;
2055 RecordStatus node_record_status = RECORD_NOT_FOUND;
2056 t_node_info node_record_on_primary = T_NODE_INFO_INITIALIZER;
2057 t_node_info node_record_on_standby = T_NODE_INFO_INITIALIZER;
2058
2059 node_record_status = get_node_record(primary_conn,
2060 config_file_options.node_id,
2061 &node_record_on_primary);
2062
2063 if (node_record_status != RECORD_FOUND)
2064 {
2065 log_error(_("unable to retrieve node record from primary"));
2066 PQfinish(primary_conn);
2067 PQfinish(conn);
2068 exit(ERR_REGISTRATION_SYNC);
2069 }
2070
2071 for (;;)
2072 {
2073 bool records_match = true;
2074
2075 /*
2076 * If timeout set to a positive value, check if we've reached it and
2077 * exit the loop
2078 */
2079 if (runtime_options.wait_register_sync_seconds > 0 && runtime_options.wait_register_sync_seconds == timer)
2080 break;
2081
2082 node_record_status = get_node_record(conn,
2083 config_file_options.node_id,
2084 &node_record_on_standby);
2085
2086 if (node_record_status == RECORD_NOT_FOUND)
2087 {
2088 /* no record available yet on standby */
2089 records_match = false;
2090 }
2091 else if (node_record_status == RECORD_FOUND)
2092 {
2093 /* compare relevant fields */
2094 if (node_record_on_standby.upstream_node_id != node_record_on_primary.upstream_node_id)
2095 records_match = false;
2096
2097 if (node_record_on_standby.type != node_record_on_primary.type)
2098 records_match = false;
2099
2100 if (node_record_on_standby.priority != node_record_on_primary.priority)
2101 records_match = false;
2102
2103 if (strcmp(node_record_on_standby.location, node_record_on_primary.location) != 0)
2104 records_match = false;
2105
2106 if (node_record_on_standby.active != node_record_on_primary.active)
2107 records_match = false;
2108
2109 if (strcmp(node_record_on_standby.node_name, node_record_on_primary.node_name) != 0)
2110 records_match = false;
2111
2112 if (strcmp(node_record_on_standby.conninfo, node_record_on_primary.conninfo) != 0)
2113 records_match = false;
2114
2115 if (strcmp(node_record_on_standby.slot_name, node_record_on_primary.slot_name) != 0)
2116 records_match = false;
2117
2118 if (records_match == true)
2119 {
2120 sync_ok = true;
2121 break;
2122 }
2123 }
2124
2125 sleep(1);
2126 timer++;
2127 }
2128
2129 /* Log the event */
2130 initPQExpBuffer(&details);
2131
2132 if (sync_ok == false)
2133 {
2134 appendPQExpBuffer(&details,
2135 _("node record was not synchronised after %i seconds"),
2136 runtime_options.wait_register_sync_seconds);
2137 }
2138 else
2139 {
2140 appendPQExpBuffer(&details,
2141 _("node record synchronised after %i seconds"),
2142 timer);
2143 }
2144
2145 create_event_notification_extended(
2146 primary_conn,
2147 &config_file_options,
2148 config_file_options.node_id,
2149 "standby_register_sync",
2150 sync_ok,
2151 details.data,
2152 &event_info);
2153
2154 if (sync_ok == false)
2155 {
2156 log_error("%s", details.data);
2157 termPQExpBuffer(&details);
2158 PQfinish(primary_conn);
2159 PQfinish(conn);
2160 exit(ERR_REGISTRATION_SYNC);
2161 }
2162
2163 log_info(_("node record on standby synchronised from primary"));
2164 log_detail("%s", details.data);
2165 termPQExpBuffer(&details);
2166 }
2167
2168
2169 PQfinish(primary_conn);
2170
2171 if (PQstatus(conn) == CONNECTION_OK)
2172 PQfinish(conn);
2173
2174 log_info(_("standby registration complete"));
2175 log_notice(_("standby node \"%s\" (ID: %i) successfully registered"),
2176 config_file_options.node_name, config_file_options.node_id);
2177 return;
2178 }
2179
2180
2181 /*
2182 * do_standby_unregister()
2183 *
2184 * Event(s):
2185 * - standby_unregister
2186 */
2187 void
do_standby_unregister(void)2188 do_standby_unregister(void)
2189 {
2190 PGconn *conn = NULL;
2191 PGconn *primary_conn = NULL;
2192
2193 int target_node_id = UNKNOWN_NODE_ID;
2194 t_node_info node_info = T_NODE_INFO_INITIALIZER;
2195
2196 bool node_record_deleted = false;
2197
2198 log_info(_("connecting to local standby"));
2199 conn = establish_db_connection(config_file_options.conninfo, true);
2200
2201 /* check if there is a primary in this cluster */
2202 log_info(_("connecting to primary database"));
2203
2204 primary_conn = get_primary_connection(conn, NULL, NULL);
2205
2206 if (PQstatus(primary_conn) != CONNECTION_OK)
2207 {
2208 log_error(_("unable to connect to primary server"));
2209 log_detail("\n%s", PQerrorMessage(conn));
2210 exit(ERR_BAD_CONFIG);
2211 }
2212
2213 /*
2214 * if --node-id was specified, unregister that node rather than the
2215 * current one - this enables inactive nodes to be unregistered.
2216 */
2217 if (runtime_options.node_id != UNKNOWN_NODE_ID)
2218 target_node_id = runtime_options.node_id;
2219 else
2220 target_node_id = config_file_options.node_id;
2221
2222 /* Check node exists and is really a standby */
2223
2224 if (get_node_record(primary_conn, target_node_id, &node_info) != RECORD_FOUND)
2225 {
2226 log_error(_("no record found for node %i"), target_node_id);
2227 PQfinish(primary_conn);
2228 PQfinish(conn);
2229 exit(ERR_BAD_CONFIG);
2230 }
2231
2232 if (node_info.type != STANDBY)
2233 {
2234 log_error(_("node %i is not a standby server"), target_node_id);
2235 PQfinish(primary_conn);
2236 PQfinish(conn);
2237 exit(ERR_BAD_CONFIG);
2238 }
2239
2240 /* Now unregister the standby */
2241 log_notice(_("unregistering node %i"), target_node_id);
2242 node_record_deleted = delete_node_record(primary_conn,
2243 target_node_id);
2244
2245 if (node_record_deleted == false)
2246 {
2247 PQfinish(primary_conn);
2248 PQfinish(conn);
2249 exit(ERR_BAD_CONFIG);
2250 }
2251
2252 /* Log the event */
2253 create_event_notification(primary_conn,
2254 &config_file_options,
2255 target_node_id,
2256 "standby_unregister",
2257 true,
2258 NULL);
2259
2260 PQfinish(primary_conn);
2261 PQfinish(conn);
2262
2263 log_info(_("standby unregistration complete"));
2264
2265 return;
2266 }
2267
2268
2269 /*
2270 * do_standby_promote()
2271 *
2272 * Event(s):
2273 * - standby_promote
2274 */
2275 void
do_standby_promote(void)2276 do_standby_promote(void)
2277 {
2278 PGconn *local_conn = NULL;
2279 PGconn *current_primary_conn = NULL;
2280
2281 RecoveryType recovery_type = RECTYPE_UNKNOWN;
2282
2283 int existing_primary_id = UNKNOWN_NODE_ID;
2284
2285 RecordStatus record_status = RECORD_NOT_FOUND;
2286 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2287
2288 NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
2289 SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
2290 int available_wal_senders = 0;
2291 bool dry_run_success = true;
2292
2293 local_conn = establish_db_connection(config_file_options.conninfo, true);
2294
2295 log_verbose(LOG_INFO, _("connected to standby, checking its state"));
2296
2297 /* Verify that standby is a supported server version */
2298 (void) check_server_version(local_conn, "standby", true, NULL);
2299
2300 /* Check we are in a standby node */
2301 recovery_type = get_recovery_type(local_conn);
2302
2303 if (recovery_type != RECTYPE_STANDBY)
2304 {
2305 if (recovery_type == RECTYPE_PRIMARY)
2306 {
2307 log_error(_("STANDBY PROMOTE can only be executed on a standby node"));
2308 PQfinish(local_conn);
2309 exit(ERR_PROMOTION_FAIL);
2310 }
2311 else
2312 {
2313 log_error(_("unable to determine node's recovery state"));
2314 PQfinish(local_conn);
2315 exit(ERR_DB_CONN);
2316 }
2317 }
2318 else if (runtime_options.dry_run == true)
2319 {
2320 log_info(_("node is a standby"));
2321 }
2322
2323 record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
2324 if (record_status != RECORD_FOUND)
2325 {
2326 log_error(_("unable to retrieve node record for node %i"),
2327 config_file_options.node_id);
2328
2329 PQfinish(local_conn);
2330
2331 exit(ERR_DB_QUERY);
2332 }
2333
2334 /*
2335 * In PostgreSQL 12 and earlier, executing "pg_ctl ... promote" when WAL
2336 * replay is paused and WAL is pending replay will mean the standby will
2337 * not promote until replay is resumed.
2338 *
2339 * As that could happen at any time outside repmgr's control, we
2340 * need to avoid leaving a "ticking timebomb" which might cause
2341 * an unexpected status change in the replication cluster.
2342 */
2343 if (PQserverVersion(local_conn) < 130000)
2344 {
2345 ReplInfo replication_info;
2346 bool replay_paused = false;
2347
2348 init_replication_info(&replication_info);
2349
2350 if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
2351 {
2352 log_error(_("unable to retrieve replication information from local node"));
2353 PQfinish(local_conn);
2354 exit(ERR_PROMOTION_FAIL);
2355 }
2356
2357 /*
2358 * If the local node is recovering from archive, we can't tell
2359 * whether there's still WAL which needs to be replayed, so
2360 * we'll abort if WAL replay is paused.
2361 */
2362 if (replication_info.receiving_streamed_wal == false)
2363 {
2364 /* just a simple check for paused WAL replay */
2365 replay_paused = is_wal_replay_paused(local_conn, false);
2366 if (replay_paused == true)
2367 {
2368 log_error(_("WAL replay is paused on this node"));
2369 log_detail(_("node is in archive recovery and is not safe to promote in this state"));
2370 log_detail(_("replay paused at %X/%X"),
2371 format_lsn(replication_info.last_wal_replay_lsn));
2372 }
2373 }
2374 else
2375 {
2376 /* check that replay is pause *and* WAL is pending replay */
2377 replay_paused = is_wal_replay_paused(local_conn, true);
2378 if (replay_paused == true)
2379 {
2380 log_error(_("WAL replay is paused on this node but not all WAL has been replayed"));
2381 log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
2382 format_lsn(replication_info.last_wal_replay_lsn),
2383 format_lsn(replication_info.last_wal_receive_lsn));
2384 }
2385 }
2386
2387 if (replay_paused == true)
2388 {
2389 if (PQserverVersion(local_conn) >= 100000)
2390 log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
2391 else
2392 log_hint(_("execute \"pg_xlog_replay_resume()\" to npause WAL replay"));
2393
2394 PQfinish(local_conn);
2395 exit(ERR_PROMOTION_FAIL);
2396 }
2397 }
2398
2399 /* check that there's no existing primary */
2400 current_primary_conn = get_primary_connection_quiet(local_conn, &existing_primary_id, NULL);
2401
2402 if (PQstatus(current_primary_conn) == CONNECTION_OK)
2403 {
2404 log_error(_("this replication cluster already has an active primary server"));
2405
2406 if (existing_primary_id != UNKNOWN_NODE_ID)
2407 {
2408 t_node_info primary_rec;
2409
2410 get_node_record(local_conn, existing_primary_id, &primary_rec);
2411
2412 log_detail(_("current primary is \"%s\" (ID: %i)"),
2413 primary_rec.node_name,
2414 existing_primary_id);
2415 }
2416
2417 PQfinish(current_primary_conn);
2418 PQfinish(local_conn);
2419 exit(ERR_PROMOTION_FAIL);
2420 }
2421 else if (runtime_options.dry_run == true)
2422 {
2423 log_info(_("no active primary server found in this replication cluster"));
2424 }
2425
2426 PQfinish(current_primary_conn);
2427
2428 /*
2429 * populate local node record with current state of various replication-related
2430 * values, so we can check for sufficient walsenders and replication slots
2431 */
2432 get_node_replication_stats(local_conn, &local_node_record);
2433
2434 available_wal_senders = local_node_record.max_wal_senders -
2435 local_node_record.attached_wal_receivers;
2436
2437
2438 /*
2439 * Get list of sibling nodes; if --siblings-follow specified,
2440 * check they're reachable; if not, the list will be used to warn
2441 * about nodes which will not follow the new primary
2442 */
2443 get_active_sibling_node_records(local_conn,
2444 local_node_record.node_id,
2445 local_node_record.upstream_node_id,
2446 &sibling_nodes);
2447
2448 if (check_sibling_nodes(&sibling_nodes, &sibling_nodes_stats) == false)
2449 {
2450 PQfinish(local_conn);
2451 exit(ERR_BAD_CONFIG);
2452 }
2453
2454 /*
2455 * check there are sufficient free walsenders - obviously there's potential
2456 * for a later race condition if some walsenders come into use before the
2457 * promote operation gets around to attaching the sibling nodes, but
2458 * this should catch any actual existing configuration issue (and if anyone's
2459 * performing a promote in such an unstable environment, they only have
2460 * themselves to blame).
2461 */
2462 if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
2463 {
2464 if (runtime_options.dry_run == false || runtime_options.force == false)
2465 {
2466 PQfinish(local_conn);
2467 exit(ERR_BAD_CONFIG);
2468 }
2469 }
2470
2471
2472 /*
2473 * if replication slots are required by siblings,
2474 * check the promotion candidate has sufficient free slots
2475 */
2476 if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
2477 {
2478 if (runtime_options.dry_run == false || runtime_options.force == false)
2479 {
2480 PQfinish(local_conn);
2481 exit(ERR_BAD_CONFIG);
2482 }
2483 }
2484
2485 /*
2486 * In --dry-run mode, note which promotion method will be used.
2487 * For Pg12 and later, check whether pg_promote() can be executed.
2488 */
2489 if (runtime_options.dry_run == true)
2490 {
2491 if (config_file_options.service_promote_command[0] != '\0')
2492 {
2493 log_info(_("node will be promoted using command defined in \"service_promote_command\""));
2494 log_detail(_("\"service_promote_command\" is \"%s\""),
2495 config_file_options.service_promote_command);
2496 }
2497 else if (PQserverVersion(local_conn) >= 120000)
2498 {
2499 if (can_execute_pg_promote(local_conn) == false)
2500 {
2501 log_info(_("node will be promoted using \"pg_ctl promote\""));
2502 log_detail(_("user \"%s\" does not have permission to execute \"pg_promote()\""),
2503 PQuser(local_conn));
2504 }
2505 else
2506 {
2507 log_info(_("node will be promoted using the \"pg_promote()\" function"));
2508 }
2509 }
2510 else
2511 {
2512 log_info(_("node will be promoted using \"pg_ctl promote\""));
2513 }
2514 }
2515
2516 if (runtime_options.dry_run == true)
2517 {
2518 PQfinish(local_conn);
2519
2520 if (dry_run_success == false)
2521 {
2522 log_error(_("prerequisites for executing STANDBY PROMOTE are *not* met"));
2523 log_hint(_("see preceding error messages"));
2524 exit(ERR_BAD_CONFIG);
2525 }
2526 log_info(_("prerequisites for executing STANDBY PROMOTE are met"));
2527 exit(SUCCESS);
2528 }
2529
2530 _do_standby_promote_internal(local_conn);
2531
2532 /*
2533 * If --siblings-follow specified, attempt to make them follow the new
2534 * primary
2535 */
2536 if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
2537 {
2538 sibling_nodes_follow(&local_node_record, &sibling_nodes, &sibling_nodes_stats);
2539 }
2540
2541 clear_node_info_list(&sibling_nodes);
2542
2543 return;
2544 }
2545
2546
2547 static void
_do_standby_promote_internal(PGconn * conn)2548 _do_standby_promote_internal(PGconn *conn)
2549 {
2550 int i;
2551 bool promote_success = false;
2552 PQExpBufferData details;
2553
2554 RecoveryType recovery_type = RECTYPE_UNKNOWN;
2555
2556 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2557 RecordStatus record_status = RECORD_NOT_FOUND;
2558 char data_dir[MAXPGPATH];
2559
2560 get_node_config_directory(data_dir);
2561
2562 /* fetch local node record so we can add detail in log messages */
2563 record_status = get_node_record(conn,
2564 config_file_options.node_id,
2565 &local_node_record);
2566
2567 if (record_status != RECORD_FOUND)
2568 {
2569 log_error(_("unable to retrieve record for node %i"),
2570 config_file_options.node_id);
2571 PQfinish(conn);
2572 exit(ERR_BAD_CONFIG);
2573 }
2574
2575
2576 /*
2577 * Promote standby to primary.
2578 *
2579 * "pg_ctl promote: returns immediately and (prior to 10.0) has no -w
2580 * option so we can't be sure when or if the promotion completes. For now
2581 * we'll poll the server until the default timeout (60 seconds)
2582 *
2583 * For PostgreSQL 12+, use the pg_promote() function, unless one of
2584 * "service_promote_command" or "use_pg_ctl_promote" is set.
2585 */
2586 {
2587 bool use_pg_promote = false;
2588
2589
2590 if (PQserverVersion(conn) >= 120000)
2591 {
2592 use_pg_promote = true;
2593
2594 if (config_file_options.service_promote_command[0] != '\0')
2595 {
2596 use_pg_promote = false;
2597 }
2598 else if (can_execute_pg_promote(conn) == false)
2599 {
2600 use_pg_promote = false;
2601 log_info(_("user \"%s\" does not have permission to execute \"pg_promote()\", falling back to \"pg_ctl promote\""),
2602 PQuser(conn));
2603 }
2604 }
2605
2606 log_notice(_("promoting standby to primary"));
2607
2608 if (use_pg_promote == true)
2609 {
2610 log_detail(_("promoting server \"%s\" (ID: %i) using pg_promote()"),
2611 local_node_record.node_name,
2612 local_node_record.node_id);
2613
2614 /*
2615 * We'll check for promotion success ourselves, but will abort
2616 * if some unrecoverable error prevented the function from being
2617 * executed.
2618 */
2619 if (!promote_standby(conn, false, 0))
2620 {
2621 log_error(_("unable to promote server from standby to primary"));
2622 exit(ERR_PROMOTION_FAIL);
2623 }
2624 }
2625 else
2626 {
2627 char script[MAXLEN];
2628 int r;
2629
2630 get_server_action(ACTION_PROMOTE, script, (char *) data_dir);
2631
2632 log_detail(_("promoting server \"%s\" (ID: %i) using \"%s\""),
2633 local_node_record.node_name,
2634 local_node_record.node_id,
2635 script);
2636
2637 r = system(script);
2638 if (r != 0)
2639 {
2640 log_error(_("unable to promote server from standby to primary"));
2641 exit(ERR_PROMOTION_FAIL);
2642 }
2643 }
2644 }
2645
2646 log_notice(_("waiting up to %i seconds (parameter \"promote_check_timeout\") for promotion to complete"),
2647 config_file_options.promote_check_timeout);
2648
2649 for (i = 0; i < config_file_options.promote_check_timeout; i += config_file_options.promote_check_interval)
2650 {
2651 recovery_type = get_recovery_type(conn);
2652
2653 if (recovery_type == RECTYPE_PRIMARY)
2654 {
2655 promote_success = true;
2656 break;
2657 }
2658 sleep(config_file_options.promote_check_interval);
2659 }
2660
2661 if (promote_success == false)
2662 {
2663 if (recovery_type == RECTYPE_STANDBY)
2664 {
2665 log_error(_("STANDBY PROMOTE failed, node is still a standby"));
2666 log_detail(_("node still in recovery after %i seconds"), config_file_options.promote_check_timeout);
2667 log_hint(_("the node may need more time to promote itself, check the PostgreSQL log for details"));
2668 PQfinish(conn);
2669 exit(ERR_PROMOTION_FAIL);
2670 }
2671 else
2672 {
2673 log_error(_("connection to node lost"));
2674 PQfinish(conn);
2675 exit(ERR_DB_CONN);
2676 }
2677 }
2678
2679 log_verbose(LOG_INFO, _("standby promoted to primary after %i second(s)"), i);
2680
2681 /* update node information to reflect new status */
2682 if (update_node_record_set_primary(conn, config_file_options.node_id) == false)
2683 {
2684 initPQExpBuffer(&details);
2685 appendPQExpBuffer(&details,
2686 _("unable to update node record for node %i"),
2687 config_file_options.node_id);
2688
2689 log_error("%s", details.data);
2690
2691 create_event_notification(NULL,
2692 &config_file_options,
2693 config_file_options.node_id,
2694 "standby_promote",
2695 false,
2696 details.data);
2697
2698 exit(ERR_DB_QUERY);
2699 }
2700
2701
2702 initPQExpBuffer(&details);
2703 appendPQExpBuffer(&details,
2704 _("server \"%s\" (ID: %i) was successfully promoted to primary"),
2705 local_node_record.node_name,
2706 local_node_record.node_id);
2707
2708 log_notice(_("STANDBY PROMOTE successful"));
2709 log_detail("%s", details.data);
2710
2711 /* Log the event */
2712 create_event_notification(conn,
2713 &config_file_options,
2714 config_file_options.node_id,
2715 "standby_promote",
2716 true,
2717 details.data);
2718
2719 termPQExpBuffer(&details);
2720
2721 return;
2722 }
2723
2724
2725 /*
2726 * Follow a new primary.
2727 *
2728 * Node must be running. To start an inactive node and point it at a
2729 * new primary, use "repmgr node rejoin".
2730 *
2731 * TODO: enable provision of new primary's conninfo parameters, which
2732 * will be necessary if the primary's information has changed, but
2733 * was not replicated to the current standby.
2734 */
2735
2736 void
do_standby_follow(void)2737 do_standby_follow(void)
2738 {
2739 PGconn *local_conn = NULL;
2740 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2741
2742 PGconn *primary_conn = NULL;
2743 int primary_node_id = UNKNOWN_NODE_ID;
2744
2745 PGconn *follow_target_conn = NULL;
2746 int follow_target_node_id = UNKNOWN_NODE_ID;
2747 t_node_info follow_target_node_record = T_NODE_INFO_INITIALIZER;
2748 bool follow_target_is_primary = true;
2749
2750 RecordStatus record_status = RECORD_NOT_FOUND;
2751 /* so we can pass info about the primary to event notification scripts */
2752 t_event_info event_info = T_EVENT_INFO_INITIALIZER;
2753
2754 int timer = 0;
2755
2756 PQExpBufferData follow_output;
2757 bool success = false;
2758 int follow_error_code = SUCCESS;
2759
2760 log_verbose(LOG_DEBUG, "do_standby_follow()");
2761
2762 local_conn = establish_db_connection(config_file_options.conninfo, false);
2763
2764 if (PQstatus(local_conn) != CONNECTION_OK)
2765 {
2766 log_hint(_("use \"repmgr node rejoin\" to re-add an inactive node to the replication cluster"));
2767 exit(ERR_DB_CONN);
2768 }
2769
2770 log_verbose(LOG_INFO, _("connected to local node"));
2771
2772 /* check this is a standby */
2773 check_recovery_type(local_conn);
2774
2775 /* attempt to retrieve local node record */
2776 record_status = get_node_record(local_conn,
2777 config_file_options.node_id,
2778 &local_node_record);
2779
2780 if (record_status != RECORD_FOUND)
2781 {
2782 log_error(_("unable to retrieve record for local node %i"),
2783 config_file_options.node_id);
2784 PQfinish(local_conn);
2785 exit(ERR_BAD_CONFIG);
2786 }
2787
2788 /*
2789 * --upstream-node-id provided - attempt to follow that node
2790 */
2791 if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
2792 {
2793 /* we can't follow ourselves */
2794 if (runtime_options.upstream_node_id == config_file_options.node_id)
2795 {
2796 log_error(_("provided --upstream-node-id %i is the current node"),
2797 runtime_options.upstream_node_id);
2798 PQfinish(local_conn);
2799 exit(ERR_FOLLOW_FAIL);
2800 }
2801
2802 follow_target_node_id = runtime_options.upstream_node_id;
2803 record_status = get_node_record(local_conn,
2804 follow_target_node_id,
2805 &follow_target_node_record);
2806
2807 /* but we must follow a node which exists (=registered) */
2808 if (record_status != RECORD_FOUND)
2809 {
2810 log_error(_("unable to find record for intended upstream node %i"),
2811 runtime_options.upstream_node_id);
2812 PQfinish(local_conn);
2813 exit(ERR_FOLLOW_FAIL);
2814 }
2815 }
2816 /*
2817 * otherwise determine the current primary and attempt to follow that
2818 */
2819 else
2820 {
2821 log_notice(_("attempting to find and follow current primary"));
2822 }
2823
2824 /*
2825 * Attempt to connect to follow target - if this was provided with --upstream-node-id,
2826 * we'll connect to that, otherwise we'll attempt to find the current primary.
2827 *
2828 * If --wait provided, loop for up `primary_follow_timeout` seconds
2829 * before giving up
2830 *
2831 * XXX add `upstream_follow_timeout` ?
2832 */
2833
2834 for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
2835 {
2836 /* --upstream-node-id provided - connect to specified node*/
2837 if (follow_target_node_id != UNKNOWN_NODE_ID)
2838 {
2839 follow_target_conn = establish_db_connection_quiet(follow_target_node_record.conninfo);
2840 }
2841 /* attempt to find current primary node */
2842 else
2843 {
2844 follow_target_conn = get_primary_connection_quiet(local_conn,
2845 &follow_target_node_id,
2846 NULL);
2847 }
2848
2849 if (PQstatus(follow_target_conn) == CONNECTION_OK || runtime_options.wait_provided == false)
2850 {
2851 break;
2852 }
2853 sleep(1);
2854 }
2855
2856 /* unable to connect to the follow target */
2857 if (PQstatus(follow_target_conn) != CONNECTION_OK)
2858 {
2859 if (follow_target_node_id == UNKNOWN_NODE_ID)
2860 {
2861 log_error(_("unable to find a primary node"));
2862 }
2863 else
2864 {
2865 log_error(_("unable to connect to target node %i"), follow_target_node_id);
2866 }
2867
2868 if (runtime_options.wait_provided == true)
2869 {
2870 if (follow_target_node_id == UNKNOWN_NODE_ID)
2871 {
2872 log_detail(_("no primary appeared after %i seconds"),
2873 config_file_options.primary_follow_timeout);
2874 }
2875 else
2876 {
2877 log_detail(_("unable to connect to target node %i after %i seconds"),
2878 follow_target_node_id,
2879 config_file_options.primary_follow_timeout);
2880 }
2881
2882 log_hint(_("alter \"primary_follow_timeout\" in \"repmgr.conf\" to change this value"));
2883 }
2884
2885 PQfinish(local_conn);
2886 exit(ERR_FOLLOW_FAIL);
2887 }
2888
2889 /* --upstream-node-id not provided - retrieve record for node determined as primary */
2890 if (runtime_options.upstream_node_id == UNKNOWN_NODE_ID)
2891 {
2892 if (runtime_options.dry_run == true)
2893 {
2894 log_info(_("connected to node %i, checking for current primary"), follow_target_node_id);
2895 }
2896 else
2897 {
2898 log_verbose(LOG_INFO, _("connected to node %i, checking for current primary"), follow_target_node_id);
2899 }
2900
2901 record_status = get_node_record(follow_target_conn,
2902 follow_target_node_id,
2903 &follow_target_node_record);
2904
2905 if (record_status != RECORD_FOUND)
2906 {
2907 log_error(_("unable to find record for follow target node %i"),
2908 follow_target_node_id);
2909 PQfinish(follow_target_conn);
2910 exit(ERR_FOLLOW_FAIL);
2911 }
2912 }
2913
2914 /*
2915 * Populate "event_info" with info about the node to follow for event notifications
2916 *
2917 * XXX need to differentiate between primary and non-primary?
2918 */
2919 event_info.node_id = follow_target_node_id;
2920 event_info.node_name = follow_target_node_record.node_name;
2921 event_info.conninfo_str = follow_target_node_record.conninfo;
2922
2923 /*
2924 * Check whether follow target is in recovery, so we know later whether
2925 * we'll need to open a connection to the primary to update the metadata.
2926 * Also emit an informative message.
2927 */
2928 {
2929 PQExpBufferData node_info_msg;
2930 RecoveryType recovery_type = RECTYPE_UNKNOWN;
2931 initPQExpBuffer(&node_info_msg);
2932
2933 recovery_type = get_recovery_type(follow_target_conn);
2934
2935 /*
2936 * unlikely this will happen, but it's conceivable the follow target will
2937 * have vanished since we last talked to it, or something
2938 */
2939 if (recovery_type == RECTYPE_UNKNOWN)
2940 {
2941 log_error(_("unable to determine recovery type of follow target"));
2942 PQfinish(follow_target_conn);
2943 exit(ERR_FOLLOW_FAIL);
2944 }
2945
2946 if (recovery_type == RECTYPE_PRIMARY)
2947 {
2948 follow_target_is_primary = true;
2949 appendPQExpBuffer(&node_info_msg,
2950 _("follow target is primary node \"%s\" (ID: %i)"),
2951 follow_target_node_record.node_name,
2952 follow_target_node_id);
2953 }
2954 else
2955 {
2956 follow_target_is_primary = false;
2957 appendPQExpBuffer(&node_info_msg,
2958 _("follow target is standby node \"%s\" (ID: %i)"),
2959 follow_target_node_record.node_name,
2960 follow_target_node_id);
2961 }
2962
2963 if (runtime_options.dry_run == true)
2964 {
2965 log_info("%s", node_info_msg.data);
2966 }
2967 else
2968 {
2969 log_verbose(LOG_INFO, "%s", node_info_msg.data);
2970 }
2971
2972 termPQExpBuffer(&node_info_msg);
2973 }
2974
2975 /*
2976 * if replication slots in use, check at least one free slot is available
2977 * on the follow target
2978 */
2979
2980 if (config_file_options.use_replication_slots)
2981 {
2982 bool slots_available = check_replication_slots_available(follow_target_node_id,
2983 follow_target_conn);
2984 if (slots_available == false)
2985 {
2986 PQfinish(follow_target_conn);
2987 PQfinish(local_conn);
2988 exit(ERR_FOLLOW_FAIL);
2989 }
2990 }
2991
2992 /* XXX check this is not current upstream anyway */
2993
2994 /* check if we can attach to the follow target */
2995 {
2996 PGconn *local_repl_conn = NULL;
2997 t_system_identification local_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
2998
2999 bool can_follow;
3000 XLogRecPtr local_xlogpos = get_node_current_lsn(local_conn);
3001
3002 /* Check local replication connection - we want to execute IDENTIFY_SYSTEM
3003 * to get the current timeline ID, which might not yet be written to
3004 * pg_control.
3005 *
3006 * TODO: from 9.6, query "pg_stat_wal_receiver" via the existing local connection
3007 */
3008
3009 local_repl_conn = establish_replication_connection_from_conn(local_conn,
3010 local_node_record.repluser);
3011 if (PQstatus(local_repl_conn) != CONNECTION_OK)
3012 {
3013 log_error(_("unable to establish a replication connection to the local node"));
3014
3015 PQfinish(local_conn);
3016 PQfinish(follow_target_conn);
3017
3018 exit(ERR_FOLLOW_FAIL);
3019 }
3020 else if (runtime_options.dry_run == true)
3021 {
3022 log_info(_("replication connection to the local node was successful"));
3023 }
3024
3025 success = identify_system(local_repl_conn, &local_identification);
3026 PQfinish(local_repl_conn);
3027
3028 if (success == false)
3029 {
3030 log_error(_("unable to query the local node's system identification"));
3031
3032 PQfinish(local_conn);
3033
3034 PQfinish(follow_target_conn);
3035
3036 exit(ERR_FOLLOW_FAIL);
3037 }
3038
3039 can_follow = check_node_can_attach(local_identification.timeline,
3040 local_xlogpos,
3041 follow_target_conn,
3042 &follow_target_node_record,
3043 false);
3044
3045 if (can_follow == false)
3046 {
3047 PQfinish(local_conn);
3048 PQfinish(follow_target_conn);
3049 exit(ERR_FOLLOW_FAIL);
3050 }
3051 }
3052
3053 PQfinish(local_conn);
3054
3055 /*
3056 * Here we'll need a connection to the primary, if the upstream is not a primary.
3057 */
3058 if (follow_target_is_primary == false)
3059 {
3060 /*
3061 * We'll try and establish primary from follow target, in the assumption its node
3062 * record is more up-to-date.
3063 */
3064 primary_conn = get_primary_connection_quiet(follow_target_conn,
3065 &primary_node_id,
3066 NULL);
3067
3068 /*
3069 * If follow target is not primary and no other primary could be found,
3070 * abort because we won't be able to update the node record.
3071 */
3072 if (PQstatus(primary_conn) != CONNECTION_OK)
3073 {
3074 log_error(_("unable to determine the cluster primary"));
3075 log_detail(_("an active primary node is required for \"repmgr standby follow\""));
3076 PQfinish(follow_target_conn);
3077 exit(ERR_FOLLOW_FAIL);
3078 }
3079 }
3080 else
3081 {
3082 primary_conn = follow_target_conn;
3083 }
3084
3085 if (runtime_options.dry_run == true)
3086 {
3087 log_info(_("prerequisites for executing STANDBY FOLLOW are met"));
3088 exit(SUCCESS);
3089 }
3090
3091 initPQExpBuffer(&follow_output);
3092
3093 success = do_standby_follow_internal(
3094 primary_conn,
3095 follow_target_conn,
3096 &follow_target_node_record,
3097 &follow_output,
3098 ERR_FOLLOW_FAIL,
3099 &follow_error_code);
3100
3101 /* unable to restart the standby */
3102 if (success == false)
3103 {
3104 create_event_notification_extended(
3105 follow_target_conn,
3106 &config_file_options,
3107 config_file_options.node_id,
3108 "standby_follow",
3109 success,
3110 follow_output.data,
3111 &event_info);
3112
3113 PQfinish(follow_target_conn);
3114
3115 if (follow_target_is_primary == false)
3116 PQfinish(primary_conn);
3117
3118 log_notice(_("STANDBY FOLLOW failed"));
3119 if (strlen( follow_output.data ))
3120 log_detail("%s", follow_output.data);
3121
3122 termPQExpBuffer(&follow_output);
3123 exit(follow_error_code);
3124 }
3125
3126 termPQExpBuffer(&follow_output);
3127
3128 initPQExpBuffer(&follow_output);
3129
3130 /*
3131 * Wait up to "standby_follow_timeout" seconds for standby to connect to
3132 * upstream.
3133 * For 9.6 and later, we could check pg_stat_wal_receiver on the local node.
3134 */
3135
3136 /* assume success, necessary if standby_follow_timeout is zero */
3137 success = true;
3138
3139 for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
3140 {
3141 NodeAttached node_attached = is_downstream_node_attached(follow_target_conn,
3142 config_file_options.node_name,
3143 NULL);
3144
3145 if (node_attached == NODE_ATTACHED)
3146 {
3147 success = true;
3148 break;
3149 }
3150
3151 log_verbose(LOG_DEBUG, "sleeping %i of max %i seconds waiting for standby to attach to primary",
3152 timer + 1,
3153 config_file_options.standby_follow_timeout);
3154 sleep(1);
3155 }
3156
3157 if (success == true)
3158 {
3159 log_notice(_("STANDBY FOLLOW successful"));
3160 appendPQExpBuffer(&follow_output,
3161 "standby attached to upstream node \"%s\" (ID: %i)",
3162 follow_target_node_record.node_name,
3163 follow_target_node_id);
3164 }
3165 else
3166 {
3167 log_error(_("STANDBY FOLLOW failed"));
3168 appendPQExpBuffer(&follow_output,
3169 "standby did not attach to upstream node \"%s\" (ID: %i) after %i seconds",
3170 follow_target_node_record.node_name,
3171 follow_target_node_id,
3172 config_file_options.standby_follow_timeout);
3173
3174 }
3175
3176 log_detail("%s", follow_output.data);
3177
3178 create_event_notification_extended(
3179 primary_conn,
3180 &config_file_options,
3181 config_file_options.node_id,
3182 "standby_follow",
3183 success,
3184 follow_output.data,
3185 &event_info);
3186
3187 termPQExpBuffer(&follow_output);
3188
3189 PQfinish(follow_target_conn);
3190
3191 if (follow_target_is_primary == false)
3192 PQfinish(primary_conn);
3193
3194 if (success == false)
3195 exit(ERR_FOLLOW_FAIL);
3196
3197 return;
3198 }
3199
3200
3201 /*
3202 * Perform the actual "follow" operation; this is executed by
3203 * "node rejoin" too.
3204 */
3205 bool
do_standby_follow_internal(PGconn * primary_conn,PGconn * follow_target_conn,t_node_info * follow_target_node_record,PQExpBufferData * output,int general_error_code,int * error_code)3206 do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_node_info *follow_target_node_record, PQExpBufferData *output, int general_error_code, int *error_code)
3207 {
3208 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
3209 int original_upstream_node_id = UNKNOWN_NODE_ID;
3210 t_node_info original_upstream_node_record = T_NODE_INFO_INITIALIZER;
3211
3212 RecordStatus record_status = RECORD_NOT_FOUND;
3213 char *errmsg = NULL;
3214
3215 bool remove_old_replication_slot = false;
3216
3217 /*
3218 * Fetch our node record so we can write application_name, if set, and to
3219 * get the current upstream node ID, which we'll need to know if replication
3220 * slots are in use and we want to delete this node's slot on the current
3221 * upstream.
3222 */
3223 record_status = get_node_record(primary_conn,
3224 config_file_options.node_id,
3225 &local_node_record);
3226
3227 if (record_status != RECORD_FOUND)
3228 {
3229 log_error(_("unable to retrieve record for node %i"),
3230 config_file_options.node_id);
3231
3232 *error_code = ERR_BAD_CONFIG;
3233 return false;
3234 }
3235
3236 /*
3237 * If replication slots are in use, we'll need to create a slot on the
3238 * follow target
3239 */
3240
3241 if (config_file_options.use_replication_slots)
3242 {
3243 /*
3244 * Here we add a sanity check for the "slot_name" field - it's possible
3245 * the node was initially registered with "use_replication_slots=false"
3246 * but the configuration was subsequently changed, leaving the field NULL.
3247 *
3248 * To avoid annoying failures we can just update the node record and proceed.
3249 */
3250
3251 if (!strlen(local_node_record.slot_name))
3252 {
3253 create_slot_name(local_node_record.slot_name, config_file_options.node_id);
3254
3255 log_notice(_("setting node %i's slot name to \"%s\""),
3256 config_file_options.node_id,
3257 local_node_record.slot_name);
3258
3259 update_node_record_slot_name(primary_conn, config_file_options.node_id, local_node_record.slot_name);
3260 }
3261
3262
3263 if (create_replication_slot(follow_target_conn,
3264 local_node_record.slot_name,
3265 NULL,
3266 output) == false)
3267 {
3268 log_error("%s", output->data);
3269
3270 *error_code = general_error_code;
3271
3272 return false;
3273 }
3274 }
3275
3276 /*
3277 * Store the original upstream node id so we can delete the
3278 * replication slot, if it exists.
3279 */
3280 if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
3281 {
3282 original_upstream_node_id = local_node_record.upstream_node_id;
3283 }
3284 else
3285 {
3286 original_upstream_node_id = follow_target_node_record->node_id;
3287 }
3288
3289 if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false)
3290 {
3291 /*
3292 * Only attempt to delete the old replication slot if the old upstream
3293 * node is known and is different to the follow target node.
3294 */
3295 if (original_upstream_node_id != UNKNOWN_NODE_ID
3296 && original_upstream_node_id != follow_target_node_record->node_id)
3297 {
3298 remove_old_replication_slot = true;
3299 }
3300 }
3301
3302 /* Fetch original upstream's record */
3303 if (remove_old_replication_slot == true)
3304 {
3305 PGconn *local_conn = NULL;
3306 RecordStatus upstream_record_status = RECORD_NOT_FOUND;
3307
3308 /* abort if local connection not available */
3309 local_conn = establish_db_connection(config_file_options.conninfo, true);
3310
3311 upstream_record_status = get_node_record(local_conn,
3312 original_upstream_node_id,
3313 &original_upstream_node_record);
3314 PQfinish(local_conn);
3315
3316 if (upstream_record_status != RECORD_FOUND)
3317 {
3318 log_warning(_("unable to retrieve node record for old upstream node %i"),
3319 original_upstream_node_id);
3320 log_detail(_("replication slot will need to be removed manually"));
3321 }
3322 }
3323
3324 /* Initialise connection parameters to write as "primary_conninfo" */
3325 initialize_conninfo_params(&recovery_conninfo, false);
3326
3327 /* We ignore any application_name set in the primary's conninfo */
3328 parse_conninfo_string(follow_target_node_record->conninfo, &recovery_conninfo, &errmsg, true);
3329
3330 /* Set the application name to this node's name */
3331 param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
3332
3333 /* Set the replication user from the follow target node record */
3334 param_set(&recovery_conninfo, "user", follow_target_node_record->repluser);
3335
3336 log_notice(_("setting node %i's upstream to node %i"),
3337 config_file_options.node_id, follow_target_node_record->node_id);
3338
3339 if (!create_recovery_file(&local_node_record,
3340 &recovery_conninfo,
3341 PQserverVersion(primary_conn),
3342 config_file_options.data_directory,
3343 true))
3344 {
3345 *error_code = general_error_code;
3346 return false;
3347 }
3348
3349 /*
3350 * start/restart the service
3351 */
3352
3353 {
3354 char server_command[MAXLEN] = "";
3355 bool server_up = is_server_available(config_file_options.conninfo);
3356 char *action = NULL;
3357 bool success;
3358
3359 PQExpBufferData output_buf;
3360 initPQExpBuffer(&output_buf);
3361
3362 if (server_up == true)
3363 {
3364
3365 if (PQserverVersion(primary_conn) >= 130000 && config_file_options.standby_follow_restart == false)
3366 {
3367 /* PostgreSQL 13 and later: we'll send SIGHUP via pg_ctl */
3368 get_server_action(ACTION_RELOAD, server_command, config_file_options.data_directory);
3369
3370 success = local_command(server_command, &output_buf);
3371
3372 if (success == true)
3373 {
3374 goto cleanup;
3375 }
3376
3377 /* In the unlikley event that fails, we'll fall back to a restart */
3378 log_warning(_("unable to reload server configuration"));
3379 }
3380
3381 if (config_file_options.service_restart_command[0] == '\0')
3382 {
3383 /* no "service_restart_command" defined - stop and start using pg_ctl */
3384
3385 action = "stopp"; /* sic */
3386 get_server_action(ACTION_STOP_WAIT, server_command, config_file_options.data_directory);
3387
3388 /* if translation needed, generate messages in the preceding if/else */
3389 log_notice(_("%sing server using \"%s\""),
3390 action,
3391 server_command);
3392
3393 success = local_command(server_command, &output_buf);
3394
3395 if (success == false)
3396 {
3397 log_error(_("unable to %s server"), action);
3398
3399 *error_code = ERR_NO_RESTART;
3400 return false;
3401 }
3402
3403 action = "start";
3404 get_server_action(ACTION_START, server_command, config_file_options.data_directory);
3405
3406 /* if translation needed, generate messages in the preceding if/else */
3407 log_notice(_("%sing server using \"%s\""),
3408 action,
3409 server_command);
3410
3411 success = local_command(server_command, &output_buf);
3412
3413 if (success == false)
3414 {
3415 log_error(_("unable to %s server"), action);
3416
3417 *error_code = ERR_NO_RESTART;
3418 return false;
3419 }
3420
3421 }
3422 else
3423 {
3424 action = "restart";
3425 get_server_action(ACTION_RESTART, server_command, config_file_options.data_directory);
3426
3427 /* if translation needed, generate messages in the preceding if/else */
3428 log_notice(_("%sing server using \"%s\""),
3429 action,
3430 server_command);
3431
3432 success = local_command(server_command, &output_buf);
3433
3434 if (success == false)
3435 {
3436 log_error(_("unable to %s server"), action);
3437
3438 *error_code = ERR_NO_RESTART;
3439 return false;
3440 }
3441
3442 }
3443 }
3444 else
3445 {
3446 action = "start";
3447 get_server_action(ACTION_START, server_command, config_file_options.data_directory);
3448
3449 /* if translation needed, generate messages in the preceding if/else */
3450 log_notice(_("%sing server using \"%s\""),
3451 action,
3452 server_command);
3453
3454 success = local_command(server_command, &output_buf);
3455
3456 if (success == false)
3457 {
3458 log_error(_("unable to %s server"), action);
3459
3460 *error_code = ERR_NO_RESTART;
3461 return false;
3462 }
3463 }
3464 }
3465
3466 cleanup:
3467 /*
3468 * If replication slots are in use, and an inactive one for this node
3469 * exists on the former upstream, drop it.
3470 *
3471 * Note that if this function is called by do_standby_switchover(), the
3472 * "repmgr node rejoin" command executed on the demotion candidate may already
3473 * have removed the slot, so there may be nothing to do.
3474 */
3475
3476 if (remove_old_replication_slot == true)
3477 {
3478 if (original_upstream_node_record.node_id != UNKNOWN_NODE_ID)
3479 {
3480 PGconn *old_upstream_conn = establish_db_connection_quiet(original_upstream_node_record.conninfo);
3481
3482 if (PQstatus(old_upstream_conn) != CONNECTION_OK)
3483 {
3484 log_warning(_("unable to connect to old upstream node %i to remove replication slot"),
3485 original_upstream_node_id);
3486 log_hint(_("if reusing this node, you should manually remove any inactive replication slots"));
3487 }
3488 else
3489 {
3490 drop_replication_slot_if_exists(old_upstream_conn,
3491 original_upstream_node_id,
3492 local_node_record.slot_name);
3493 PQfinish(old_upstream_conn);
3494 }
3495 }
3496 }
3497
3498 /*
3499 * It's possible this node was an inactive primary - update the relevant
3500 * fields to ensure it's marked as an active standby
3501 */
3502 if (update_node_record_status(primary_conn,
3503 config_file_options.node_id,
3504 "standby",
3505 follow_target_node_record->node_id,
3506 true) == false)
3507 {
3508 appendPQExpBufferStr(output,
3509 _("unable to update upstream node"));
3510 return false;
3511 }
3512
3513 appendPQExpBuffer(output,
3514 _("node %i is now attached to node %i"),
3515 config_file_options.node_id,
3516 follow_target_node_record->node_id);
3517
3518 return true;
3519 }
3520
3521
3522 /*
3523 * Perform a switchover by:
3524 *
3525 * - stopping current primary node
3526 * - promoting this standby node to primary
3527 * - forcing the previous primary node to follow this node
3528 *
3529 * Where running and not already paused, repmgrd will be paused (and
3530 * subsequently unpaused), unless --repmgrd-no-pause provided.
3531 *
3532 * Note that this operation can only be considered to have failed completely
3533 * ("ERR_SWITCHOVER_FAIL") in these situations:
3534 *
3535 * - the prerequisites for a switchover are not met
3536 * - the demotion candidate could not be shut down cleanly
3537 * - the promotion candidate could not be promoted
3538 *
3539 * All other failures (demotion candidate did not connect to new primary etc.)
3540 * are considered partial failures ("ERR_SWITCHOVER_INCOMPLETE")
3541 *
3542 * TODO:
3543 * - make connection test timeouts/intervals configurable (see below)
3544 */
3545
3546
3547 void
do_standby_switchover(void)3548 do_standby_switchover(void)
3549 {
3550 PGconn *local_conn = NULL;
3551 PGconn *superuser_conn = NULL;
3552 PGconn *remote_conn = NULL;
3553
3554 t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
3555
3556 /* the remote server is the primary to be demoted */
3557 char remote_conninfo[MAXCONNINFO] = "";
3558 char remote_host[MAXLEN] = "";
3559 int remote_node_id = UNKNOWN_NODE_ID;
3560 t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
3561 int remote_repmgr_version = UNKNOWN_REPMGR_VERSION_NUM;
3562
3563 RecordStatus record_status = RECORD_NOT_FOUND;
3564 RecoveryType recovery_type = RECTYPE_UNKNOWN;
3565 PQExpBufferData remote_command_str;
3566 PQExpBufferData command_output;
3567 PQExpBufferData node_rejoin_options;
3568 PQExpBufferData errmsg;
3569 PQExpBufferData detailmsg;
3570
3571 int r,
3572 i;
3573 bool command_success = false;
3574 bool shutdown_success = false;
3575 bool dry_run_success = true;
3576
3577 /* this flag will use to generate the final message generated */
3578 bool switchover_success = true;
3579
3580 XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr;
3581 ReplInfo replication_info;
3582
3583 /* store list of configuration files on the demotion candidate */
3584 KeyValueList remote_config_files = {NULL, NULL};
3585
3586 NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
3587 SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
3588
3589 /* this will be calculated as max_wal_senders - COUNT(*) FROM pg_stat_replication */
3590 int available_wal_senders = 0;
3591
3592 t_event_info event_info = T_EVENT_INFO_INITIALIZER;
3593
3594 /* used for handling repmgrd pause/unpause */
3595 NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
3596 RepmgrdInfo **repmgrd_info = NULL;
3597 int repmgrd_running_count = 0;
3598
3599 /* number of free walsenders required on promotion candidate
3600 * (at least one will be required for the demotion candidate)
3601 */
3602 sibling_nodes_stats.min_required_wal_senders = 1;
3603
3604 /*
3605 * SANITY CHECKS
3606 *
3607 * We'll be doing a bunch of operations on the remote server (primary to
3608 * be demoted) - careful checks needed before proceding.
3609 */
3610
3611 local_conn = establish_db_connection(config_file_options.conninfo, true);
3612
3613 /* Verify that standby is a supported server version */
3614 (void) check_server_version(local_conn, "standby", true, NULL);
3615
3616 record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
3617 if (record_status != RECORD_FOUND)
3618 {
3619 log_error(_("unable to retrieve node record for node %i"),
3620 config_file_options.node_id);
3621
3622 PQfinish(local_conn);
3623
3624 exit(ERR_DB_QUERY);
3625 }
3626
3627 if (!is_streaming_replication(local_node_record.type))
3628 {
3629 log_error(_("switchover can only performed with streaming replication"));
3630 PQfinish(local_conn);
3631 exit(ERR_BAD_CONFIG);
3632 }
3633
3634 if (runtime_options.dry_run == true)
3635 {
3636 log_notice(_("checking switchover on node \"%s\" (ID: %i) in --dry-run mode"),
3637 local_node_record.node_name,
3638 local_node_record.node_id);
3639 }
3640 else
3641 {
3642 log_notice(_("executing switchover on node \"%s\" (ID: %i)"),
3643 local_node_record.node_name,
3644 local_node_record.node_id);
3645 }
3646
3647 /*
3648 * If -S/--superuser option provided, check that a superuser connection can be made
3649 * to the local database. We'll check the remote superuser connection later,
3650 */
3651
3652 if (runtime_options.superuser[0] != '\0')
3653 {
3654 if (runtime_options.dry_run == true)
3655 {
3656 log_info(_("validating connection to local database for superuser \"%s\""),
3657 runtime_options.superuser);
3658 }
3659
3660 superuser_conn = establish_db_connection_with_replacement_param(
3661 config_file_options.conninfo,
3662 "user",
3663 runtime_options.superuser, false);
3664
3665 if (PQstatus(superuser_conn) != CONNECTION_OK)
3666 {
3667 log_error(_("unable to connect to local database \"%s\" as provided superuser \"%s\""),
3668 PQdb(superuser_conn),
3669 runtime_options.superuser);
3670 exit(ERR_BAD_CONFIG);
3671 }
3672
3673 if (is_superuser_connection(superuser_conn, NULL) == false)
3674 {
3675 log_error(_("connection established to local database \"%s\" for provided superuser \"%s\" is not a superuser connection"),
3676 PQdb(superuser_conn),
3677 runtime_options.superuser);
3678 exit(ERR_BAD_CONFIG);
3679 }
3680
3681 if (runtime_options.dry_run == true)
3682 {
3683 log_info(_("successfully established connection to local database \"%s\" for provided superuser \"%s\""),
3684 PQdb(superuser_conn),
3685 runtime_options.superuser);
3686 }
3687
3688 }
3689
3690 /*
3691 * Warn if no superuser connection is available.
3692 */
3693 if (superuser_conn == NULL && is_superuser_connection(local_conn, NULL) == false)
3694 {
3695 log_warning(_("no superuser connection available"));
3696 log_detail(_("it is recommended to perform switchover operations with a database superuser"));
3697 log_hint(_("provide the name of a superuser with -S/--superuser"));
3698 }
3699
3700 /* Check that this is a standby */
3701 recovery_type = get_recovery_type(local_conn);
3702 if (recovery_type != RECTYPE_STANDBY)
3703 {
3704 log_error(_("switchover must be executed from the standby node to be promoted"));
3705 if (recovery_type == RECTYPE_PRIMARY)
3706 {
3707 log_detail(_("this node (ID: %i) is the primary"),
3708 local_node_record.node_id);
3709 }
3710 PQfinish(local_conn);
3711
3712 exit(ERR_SWITCHOVER_FAIL);
3713 }
3714
3715 /*
3716 * Check that the local replication configuration file is owned by the data
3717 * directory owner.
3718 *
3719 * For PostgreSQL 11 and earlier, if PostgreSQL is not able to rename "recovery.conf",
3720 * promotion will fail.
3721 *
3722 * For PostgreSQL 12 and later, promotion will not fail even if "postgresql.auto.conf"
3723 * is owned by another user, but we'll check just in case, as it is indicative of a
3724 * poorly configured setup. In any case we will need to check "postgresql.auto.conf" on
3725 * the demotion candidate as the rejoin will fail if we are unable to to write to that.
3726 */
3727
3728 initPQExpBuffer(&errmsg);
3729 initPQExpBuffer(&detailmsg);
3730
3731 if (check_replication_config_owner(PQserverVersion(local_conn),
3732 config_file_options.data_directory,
3733 &errmsg, &detailmsg) == false)
3734 {
3735 log_error("%s", errmsg.data);
3736 log_detail("%s", detailmsg.data);
3737
3738 termPQExpBuffer(&errmsg);
3739 termPQExpBuffer(&detailmsg);
3740
3741 PQfinish(local_conn);
3742 exit(ERR_BAD_CONFIG);
3743 }
3744
3745 termPQExpBuffer(&errmsg);
3746 termPQExpBuffer(&detailmsg);
3747
3748 /* check remote server connection and retrieve its record */
3749 remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo);
3750
3751 if (PQstatus(remote_conn) != CONNECTION_OK)
3752 {
3753 log_error(_("unable to connect to current primary node"));
3754 log_hint(_("check that the cluster is correctly configured and this standby is registered"));
3755 PQfinish(local_conn);
3756 exit(ERR_DB_CONN);
3757 }
3758
3759 record_status = get_node_record(remote_conn, remote_node_id, &remote_node_record);
3760
3761 if (record_status != RECORD_FOUND)
3762 {
3763 log_error(_("unable to retrieve node record for current primary (node %i)"),
3764 remote_node_id);
3765
3766 PQfinish(local_conn);
3767 PQfinish(remote_conn);
3768
3769 exit(ERR_DB_QUERY);
3770 }
3771
3772 log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
3773
3774 /*
3775 * Check this standby is attached to the demotion candidate
3776 */
3777
3778 if (local_node_record.upstream_node_id != remote_node_record.node_id)
3779 {
3780 log_error(_("local node \"%s\" (ID: %i) is not a downstream of demotion candidate primary \"%s\" (ID: %i)"),
3781 local_node_record.node_name,
3782 local_node_record.node_id,
3783 remote_node_record.node_name,
3784 remote_node_record.node_id);
3785
3786 if (local_node_record.upstream_node_id == UNKNOWN_NODE_ID)
3787 log_detail(_("local node has no registered upstream node"));
3788 else
3789 log_detail(_("registered upstream node ID is %i"),
3790 local_node_record.upstream_node_id);
3791
3792 log_hint(_("execute \"repmgr standby register --force\" to update the local node's metadata"));
3793
3794 PQfinish(local_conn);
3795 PQfinish(remote_conn);
3796
3797 exit(ERR_BAD_CONFIG);
3798 }
3799
3800 if (is_downstream_node_attached(remote_conn, local_node_record.node_name, NULL) != NODE_ATTACHED)
3801 {
3802 log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
3803 local_node_record.node_name,
3804 local_node_record.node_id,
3805 remote_node_record.node_name,
3806 remote_node_record.node_id);
3807
3808 PQfinish(local_conn);
3809 PQfinish(remote_conn);
3810
3811 exit(ERR_BAD_CONFIG);
3812 }
3813
3814 /*
3815 * In PostgreSQL 12 and earlier, check that WAL replay on the standby
3816 * is *not* paused, as that could lead to unexpected behaviour when the
3817 * standby is promoted.
3818 *
3819 * For switchover we'll mandate that WAL replay *must not* be paused.
3820 * For a promote operation we can proceed if WAL replay is paused and
3821 * there is no more available WAL to be replayed, as we can be sure the
3822 * primary is down already, but in a switchover context there's
3823 * potentially a window for more WAL to be received before we shut down
3824 * the primary completely.
3825 */
3826
3827 if (PQserverVersion(local_conn) < 130000 && is_wal_replay_paused(local_conn, false) == true)
3828 {
3829 ReplInfo replication_info;
3830 init_replication_info(&replication_info);
3831
3832 if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
3833 {
3834 log_error(_("unable to retrieve replication information from local node"));
3835 PQfinish(local_conn);
3836 exit(ERR_SWITCHOVER_FAIL);
3837 }
3838
3839 log_error(_("WAL replay is paused on this node and it is not safe to proceed"));
3840 log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
3841 format_lsn(replication_info.last_wal_replay_lsn),
3842 format_lsn(replication_info.last_wal_receive_lsn));
3843
3844 if (PQserverVersion(local_conn) >= 100000)
3845 log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
3846 else
3847 log_hint(_("execute \"pg_xlog_replay_resume()\" to unpause WAL replay"));
3848
3849 PQfinish(local_conn);
3850 exit(ERR_SWITCHOVER_FAIL);
3851 }
3852
3853
3854 /*
3855 * Check that there are no exclusive backups running on the primary.
3856 * We don't want to end up damaging the backup and also leaving the server in an
3857 * state where there's control data saying it's in backup mode but there's no
3858 * backup_label in PGDATA.
3859 * If the user wants to do the switchover anyway, they should first stop the
3860 * backup that's running.
3861 */
3862 if (server_in_exclusive_backup_mode(remote_conn) != BACKUP_STATE_NO_BACKUP)
3863 {
3864 log_error(_("unable to perform a switchover while primary server is in exclusive backup mode"));
3865 log_hint(_("stop backup before attempting the switchover"));
3866
3867 PQfinish(local_conn);
3868 PQfinish(remote_conn);
3869
3870 exit(ERR_SWITCHOVER_FAIL);
3871 }
3872
3873 /* this will fill the %p event notification parameter */
3874 event_info.node_id = remote_node_record.node_id;
3875
3876 /* keep a running total of how many nodes will require a replication slot */
3877 if (remote_node_record.slot_name[0] != '\0')
3878 {
3879 sibling_nodes_stats.min_required_free_slots++;
3880 }
3881
3882 /*
3883 * If --force-rewind specified, check pg_rewind can be used, and
3884 * pre-emptively fetch the list of configuration files which should be
3885 * archived
3886 */
3887
3888 if (runtime_options.force_rewind_used == true)
3889 {
3890 PQExpBufferData reason;
3891 PQExpBufferData msg;
3892
3893 initPQExpBuffer(&reason);
3894
3895 if (can_use_pg_rewind(remote_conn, config_file_options.data_directory, &reason) == false)
3896 {
3897 log_error(_("--force-rewind specified but pg_rewind cannot be used"));
3898 log_detail("%s", reason.data);
3899 termPQExpBuffer(&reason);
3900 PQfinish(local_conn);
3901 PQfinish(remote_conn);
3902
3903 exit(ERR_BAD_CONFIG);
3904 }
3905 termPQExpBuffer(&reason);
3906
3907 initPQExpBuffer(&msg);
3908 appendPQExpBufferStr(&msg,
3909 _("prerequisites for using pg_rewind are met"));
3910
3911 if (runtime_options.dry_run == true)
3912 {
3913 log_info("%s", msg.data);
3914 }
3915 else
3916 {
3917 log_verbose(LOG_INFO, "%s", msg.data);
3918 }
3919 termPQExpBuffer(&msg);
3920
3921 get_datadir_configuration_files(remote_conn, &remote_config_files);
3922 }
3923
3924
3925 /*
3926 * Check that we can connect by SSH to the remote (current primary) server
3927 */
3928 get_conninfo_value(remote_conninfo, "host", remote_host);
3929
3930 r = test_ssh_connection(remote_host, runtime_options.remote_user);
3931
3932 if (r != 0)
3933 {
3934 log_error(_("unable to connect via SSH to host \"%s\", user \"%s\""),
3935 remote_host, runtime_options.remote_user);
3936 PQfinish(remote_conn);
3937 PQfinish(local_conn);
3938
3939 exit(ERR_BAD_CONFIG);
3940 }
3941 else
3942 {
3943 PQExpBufferData msg;
3944
3945 initPQExpBuffer(&msg);
3946
3947 appendPQExpBuffer(&msg,
3948 _("SSH connection to host \"%s\" succeeded"),
3949 remote_host);
3950
3951 if (runtime_options.dry_run == true)
3952 {
3953 log_info("%s", msg.data);
3954 }
3955 else
3956 {
3957 log_verbose(LOG_INFO, "%s", msg.data);
3958 }
3959
3960 termPQExpBuffer(&msg);
3961 }
3962
3963 /* check remote repmgr binary can be found */
3964 initPQExpBuffer(&remote_command_str);
3965 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
3966
3967 /*
3968 * Here we're executing an arbitrary repmgr command which is guaranteed to
3969 * succeed if repmgr is executed. We'll extract the actual version number in the
3970 * next step.
3971 */
3972 appendPQExpBufferStr(&remote_command_str, "--version >/dev/null 2>&1 && echo \"1\" || echo \"0\"");
3973 initPQExpBuffer(&command_output);
3974 command_success = remote_command(remote_host,
3975 runtime_options.remote_user,
3976 remote_command_str.data,
3977 config_file_options.ssh_options,
3978 &command_output);
3979
3980 termPQExpBuffer(&remote_command_str);
3981
3982 if (command_success == false || command_output.data[0] == '0')
3983 {
3984 PQExpBufferData hint;
3985
3986 log_error(_("unable to execute \"%s\" on \"%s\""),
3987 progname(), remote_host);
3988
3989 if (strlen(command_output.data) > 2)
3990 log_detail("%s", command_output.data);
3991
3992 termPQExpBuffer(&command_output);
3993
3994 initPQExpBuffer(&hint);
3995 appendPQExpBufferStr(&hint,
3996 _("check \"pg_bindir\" is set to the correct path in \"repmgr.conf\"; current value: "));
3997
3998 if (strlen(config_file_options.pg_bindir))
3999 {
4000 appendPQExpBuffer(&hint,
4001 "\"%s\"", config_file_options.pg_bindir);
4002 }
4003 else
4004 {
4005 appendPQExpBufferStr(&hint,
4006 "(not set)");
4007 }
4008
4009 log_hint("%s", hint.data);
4010
4011 termPQExpBuffer(&hint);
4012
4013 PQfinish(remote_conn);
4014 PQfinish(local_conn);
4015
4016 exit(ERR_BAD_CONFIG);
4017 }
4018
4019 termPQExpBuffer(&command_output);
4020
4021 /*
4022 * Now we're sure the binary can be executed, fetch its version number.
4023 */
4024 initPQExpBuffer(&remote_command_str);
4025 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4026
4027 appendPQExpBufferStr(&remote_command_str, "--version 2>/dev/null");
4028 initPQExpBuffer(&command_output);
4029 command_success = remote_command(remote_host,
4030 runtime_options.remote_user,
4031 remote_command_str.data,
4032 config_file_options.ssh_options,
4033 &command_output);
4034
4035 termPQExpBuffer(&remote_command_str);
4036
4037 if (command_success == true)
4038 {
4039 remote_repmgr_version = parse_repmgr_version(command_output.data);
4040 if (remote_repmgr_version == UNKNOWN_REPMGR_VERSION_NUM)
4041 {
4042 log_error(_("unable to parse \"%s\"'s reported version on \"%s\""),
4043 progname(), remote_host);
4044 PQfinish(remote_conn);
4045 PQfinish(local_conn);
4046 exit(ERR_BAD_CONFIG);
4047 }
4048 log_debug(_("\"%s\" version on \"%s\" is %i"),
4049 progname(), remote_host, remote_repmgr_version );
4050
4051 }
4052 else
4053 {
4054 log_error(_("unable to execute \"%s\" on \"%s\""),
4055 progname(), remote_host);
4056
4057 if (strlen(command_output.data) > 2)
4058 log_detail("%s", command_output.data);
4059
4060 termPQExpBuffer(&command_output);
4061
4062 PQfinish(remote_conn);
4063 PQfinish(local_conn);
4064
4065 exit(ERR_BAD_CONFIG);
4066 }
4067
4068 termPQExpBuffer(&command_output);
4069
4070 /*
4071 * Check if the expected remote repmgr.conf file exists
4072 */
4073 initPQExpBuffer(&remote_command_str);
4074
4075 appendPQExpBuffer(&remote_command_str,
4076 "test -f %s && echo 1 || echo 0",
4077 remote_node_record.config_file);
4078 initPQExpBuffer(&command_output);
4079
4080 command_success = remote_command(remote_host,
4081 runtime_options.remote_user,
4082 remote_command_str.data,
4083 config_file_options.ssh_options,
4084 &command_output);
4085
4086 termPQExpBuffer(&remote_command_str);
4087
4088 if (command_success == false || command_output.data[0] == '0')
4089 {
4090 log_error(_("expected configuration file not found on the demotion candiate \"%s\" (ID: %i)"),
4091 remote_node_record.node_name,
4092 remote_node_record.node_id);
4093 log_detail(_("registered configuration file is \"%s\""),
4094 remote_node_record.config_file);
4095 log_hint(_("ensure the configuration file is in the expected location, or re-register \"%s\" to update the configuration file location"),
4096 remote_node_record.node_name);
4097
4098 PQfinish(remote_conn);
4099 PQfinish(local_conn);
4100
4101 termPQExpBuffer(&command_output);
4102
4103 exit(ERR_BAD_CONFIG);
4104 }
4105
4106
4107 /*
4108 * Sanity-check remote "data_directory" is correctly configured in repmgr.conf.
4109 *
4110 * This is important as we'll need to be able to run "repmgr node status" on the data
4111 * directory after the remote (demotion candidate) has shut down.
4112 */
4113
4114 initPQExpBuffer(&remote_command_str);
4115 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4116
4117 /*
4118 * --data-directory-config is available from repmgr 4.3; it will fail
4119 * if the remote repmgr is an earlier version, but the version should match
4120 * anyway.
4121 */
4122 appendPQExpBufferStr(&remote_command_str, "node check --data-directory-config --optformat -LINFO 2>/dev/null");
4123
4124 initPQExpBuffer(&command_output);
4125 command_success = remote_command(remote_host,
4126 runtime_options.remote_user,
4127 remote_command_str.data,
4128 config_file_options.ssh_options,
4129 &command_output);
4130
4131 termPQExpBuffer(&remote_command_str);
4132
4133 if (command_success == false)
4134 {
4135 log_error(_("unable to execute \"%s node check --data-directory-config\" on \"%s\":"),
4136 progname(), remote_host);
4137 log_detail("%s", command_output.data);
4138
4139 PQfinish(remote_conn);
4140 PQfinish(local_conn);
4141
4142 termPQExpBuffer(&command_output);
4143
4144 exit(ERR_BAD_CONFIG);
4145 }
4146
4147 /* check remote repmgr has the data directory correctly configured */
4148 {
4149 t_remote_error_type remote_error = REMOTE_ERROR_NONE;
4150
4151 if (parse_data_directory_config(command_output.data, &remote_error) == false)
4152 {
4153 if (remote_error != REMOTE_ERROR_NONE)
4154 {
4155 log_error(_("unable to run data directory check on node \"%s\" (ID: %i)"),
4156 remote_node_record.node_name,
4157 remote_node_record.node_id);
4158
4159 if (remote_error == REMOTE_ERROR_DB_CONNECTION)
4160 {
4161 PQExpBufferData ssh_command;
4162
4163 /* can happen if the connection configuration is not consistent across nodes */
4164 log_detail(_("an error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4165 remote_node_record.node_name,
4166 remote_node_record.node_id);
4167
4168 /* output a helpful hint to help diagnose the issue */
4169 initPQExpBuffer(&remote_command_str);
4170 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4171
4172 appendPQExpBufferStr(&remote_command_str, "node check --db-connection");
4173
4174 initPQExpBuffer(&ssh_command);
4175
4176 make_remote_command(remote_host,
4177 runtime_options.remote_user,
4178 remote_command_str.data,
4179 config_file_options.ssh_options,
4180 &ssh_command);
4181
4182 log_hint(_("diagnose with:\n %s"), ssh_command.data);
4183
4184 termPQExpBuffer(&remote_command_str);
4185 termPQExpBuffer(&ssh_command);
4186 }
4187 else if (remote_error == REMOTE_ERROR_CONNINFO_PARSE)
4188 {
4189 /* highly unlikely */
4190 log_detail(_("an error was encountered when parsing the \"conninfo\" parameter in \"rempgr.conf\" on node \"%s\" (ID: %i)"),
4191 remote_node_record.node_name,
4192 remote_node_record.node_id);
4193 }
4194 else
4195 {
4196 log_detail(_("an unknown error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4197 remote_node_record.node_name,
4198 remote_node_record.node_id);
4199 }
4200 }
4201 else
4202 {
4203 log_error(_("\"data_directory\" parameter in \"repmgr.conf\" on \"%s\" (ID: %i) is incorrectly configured"),
4204 remote_node_record.node_name,
4205 remote_node_record.node_id);
4206
4207 log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" (ID: %i) to diagnose the issue"),
4208 remote_node_record.node_name,
4209 remote_node_record.node_id);
4210
4211 }
4212
4213 PQfinish(remote_conn);
4214 PQfinish(local_conn);
4215
4216 termPQExpBuffer(&command_output);
4217
4218 exit(ERR_BAD_CONFIG);
4219 }
4220 }
4221
4222 termPQExpBuffer(&command_output);
4223
4224 if (runtime_options.dry_run == true)
4225 {
4226 log_info(_("able to execute \"%s\" on remote host \"%s\""),
4227 progname(),
4228 remote_host);
4229 }
4230
4231 /*
4232 * If -S/--superuser option provided, check that a superuser connection can be made
4233 * to the local database on the remote node.
4234 */
4235
4236 if (runtime_options.superuser[0] != '\0')
4237 {
4238 CheckStatus status = CHECK_STATUS_UNKNOWN;
4239
4240 initPQExpBuffer(&remote_command_str);
4241 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4242
4243 appendPQExpBuffer(&remote_command_str,
4244 "node check --db-connection --superuser=%s --optformat -LINFO 2>/dev/null",
4245 runtime_options.superuser);
4246
4247 initPQExpBuffer(&command_output);
4248 command_success = remote_command(remote_host,
4249 runtime_options.remote_user,
4250 remote_command_str.data,
4251 config_file_options.ssh_options,
4252 &command_output);
4253
4254 termPQExpBuffer(&remote_command_str);
4255
4256 if (command_success == false)
4257 {
4258 log_error(_("unable to execute \"%s node check --db-connection\" on \"%s\":"),
4259 progname(), remote_host);
4260 log_detail("%s", command_output.data);
4261
4262 PQfinish(remote_conn);
4263 PQfinish(local_conn);
4264
4265 termPQExpBuffer(&command_output);
4266
4267 exit(ERR_BAD_CONFIG);
4268 }
4269
4270 status = parse_db_connection(command_output.data);
4271
4272 if (status != CHECK_STATUS_OK)
4273 {
4274 PQExpBufferData ssh_command;
4275 log_error(_("unable to connect locally as superuser \"%s\" on node \"%s\" (ID: %i)"),
4276 runtime_options.superuser,
4277 remote_node_record.node_name,
4278 remote_node_record.node_id);
4279
4280 /* output a helpful hint to help diagnose the issue */
4281 initPQExpBuffer(&remote_command_str);
4282 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4283
4284 appendPQExpBuffer(&remote_command_str,
4285 "node check --db-connection --superuser=%s",
4286 runtime_options.superuser);
4287
4288 initPQExpBuffer(&ssh_command);
4289
4290 make_remote_command(remote_host,
4291 runtime_options.remote_user,
4292 remote_command_str.data,
4293 config_file_options.ssh_options,
4294 &ssh_command);
4295
4296 log_hint(_("diagnose with:\n %s"), ssh_command.data);
4297
4298 termPQExpBuffer(&remote_command_str);
4299 termPQExpBuffer(&ssh_command);
4300 exit(ERR_DB_CONN);
4301 }
4302
4303
4304
4305 termPQExpBuffer(&command_output);
4306 }
4307
4308 /*
4309 * For PostgreSQL 12 and later, check "postgresql.auto.conf" is owned by the
4310 * correct user, otherwise the node will probably not be able to attach to
4311 * the promotion candidate (and is a sign of bad configuration anyway) so we
4312 * will complain vocally.
4313 *
4314 * We'll only do this if we've determined the remote repmgr binary is new
4315 * enough to have the "node check --replication-config-owner" option.
4316 */
4317
4318 if (PQserverVersion(local_conn) >= 120000 && remote_repmgr_version >= 50100)
4319 {
4320 initPQExpBuffer(&remote_command_str);
4321 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4322
4323 appendPQExpBufferStr(&remote_command_str, "node check --replication-config-owner --optformat -LINFO 2>/dev/null");
4324
4325 initPQExpBuffer(&command_output);
4326 command_success = remote_command(remote_host,
4327 runtime_options.remote_user,
4328 remote_command_str.data,
4329 config_file_options.ssh_options,
4330 &command_output);
4331
4332 termPQExpBuffer(&remote_command_str);
4333
4334 if (command_success == false)
4335 {
4336 log_error(_("unable to execute \"%s node check --replication-config-owner\" on \"%s\":"),
4337 progname(), remote_host);
4338 log_detail("%s", command_output.data);
4339
4340 PQfinish(remote_conn);
4341 PQfinish(local_conn);
4342
4343 termPQExpBuffer(&command_output);
4344
4345 exit(ERR_BAD_CONFIG);
4346 }
4347
4348 if (parse_replication_config_owner(command_output.data) == false)
4349 {
4350 log_error(_("\"%s\" file on \"%s\" has incorrect ownership"),
4351 PG_AUTOCONF_FILENAME,
4352 remote_node_record.node_name);
4353
4354 log_hint(_("check the file has the same owner/group as the data directory"));
4355
4356 PQfinish(remote_conn);
4357 PQfinish(local_conn);
4358
4359 termPQExpBuffer(&command_output);
4360
4361 exit(ERR_BAD_CONFIG);
4362 }
4363
4364 termPQExpBuffer(&command_output);
4365 }
4366
4367
4368 /*
4369 * populate local node record with current state of various replication-related
4370 * values, so we can check for sufficient walsenders and replication slots
4371 */
4372 get_node_replication_stats(local_conn, &local_node_record);
4373
4374 available_wal_senders = local_node_record.max_wal_senders -
4375 local_node_record.attached_wal_receivers;
4376
4377 /*
4378 * Get list of sibling nodes; if --siblings-follow specified,
4379 * check they're reachable; if not, the list will be used to warn
4380 * about nodes which will remain attached to the demotion candidate
4381 */
4382 get_active_sibling_node_records(local_conn,
4383 local_node_record.node_id,
4384 local_node_record.upstream_node_id,
4385 &sibling_nodes);
4386
4387 if (check_sibling_nodes(&sibling_nodes, &sibling_nodes_stats) == false)
4388 {
4389 PQfinish(local_conn);
4390 exit(ERR_BAD_CONFIG);
4391 }
4392
4393
4394 /*
4395 * check there are sufficient free walsenders - obviously there's potential
4396 * for a later race condition if some walsenders come into use before the
4397 * switchover operation gets around to attaching the sibling nodes, but
4398 * this should catch any actual existing configuration issue (and if anyone's
4399 * performing a switchover in such an unstable environment, they only have
4400 * themselves to blame).
4401 */
4402 if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
4403 {
4404 if (runtime_options.dry_run == false)
4405 {
4406 PQfinish(local_conn);
4407 exit(ERR_BAD_CONFIG);
4408 }
4409 }
4410
4411
4412 /* check demotion candidate can make replication connection to promotion candidate */
4413 {
4414 initPQExpBuffer(&remote_command_str);
4415 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4416 appendPQExpBuffer(&remote_command_str,
4417 "node check --remote-node-id=%i --replication-connection",
4418 local_node_record.node_id);
4419
4420 initPQExpBuffer(&command_output);
4421
4422 command_success = remote_command(remote_host,
4423 runtime_options.remote_user,
4424 remote_command_str.data,
4425 config_file_options.ssh_options,
4426 &command_output);
4427
4428 termPQExpBuffer(&remote_command_str);
4429
4430 if (command_success == true)
4431 {
4432 ConnectionStatus conn_status = parse_remote_node_replication_connection(command_output.data);
4433
4434 switch(conn_status)
4435 {
4436 case CONN_OK:
4437 if (runtime_options.dry_run == true)
4438 {
4439 log_info(_("demotion candidate is able to make replication connection to promotion candidate"));
4440 }
4441 break;
4442 case CONN_BAD:
4443 log_error(_("demotion candidate is unable to make replication connection to promotion candidate"));
4444 exit(ERR_BAD_CONFIG);
4445 break;
4446 default:
4447 log_error(_("unable to determine whether demotion candidate is able to make replication connection to promotion candidate"));
4448 exit(ERR_BAD_CONFIG);
4449 break;
4450 }
4451
4452 termPQExpBuffer(&command_output);
4453 }
4454 }
4455
4456 /* check archive/replication status */
4457 {
4458 int lag_seconds = 0;
4459 CheckStatus status = CHECK_STATUS_UNKNOWN;
4460
4461 /* archive status - check when "archive_mode" is activated */
4462
4463 if (guc_set(remote_conn, "archive_mode", "!=", "off"))
4464 {
4465 int files = 0;
4466 int threshold = 0;
4467 t_remote_error_type remote_error = REMOTE_ERROR_NONE;
4468
4469 initPQExpBuffer(&remote_command_str);
4470 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4471 appendPQExpBufferStr(&remote_command_str,
4472 "node check --terse -LERROR --archive-ready --optformat");
4473
4474 initPQExpBuffer(&command_output);
4475
4476 command_success = remote_command(remote_host,
4477 runtime_options.remote_user,
4478 remote_command_str.data,
4479 config_file_options.ssh_options,
4480 &command_output);
4481
4482 termPQExpBuffer(&remote_command_str);
4483
4484 if (command_success == true)
4485 {
4486 status = parse_node_check_archiver(command_output.data, &files, &threshold, &remote_error);
4487 }
4488
4489 termPQExpBuffer(&command_output);
4490
4491 switch (status)
4492 {
4493 case CHECK_STATUS_UNKNOWN:
4494 {
4495 if (runtime_options.force == false || remote_error == REMOTE_ERROR_DB_CONNECTION)
4496 {
4497 log_error(_("unable to check number of pending archive files on demotion candidate \"%s\""),
4498 remote_node_record.node_name);
4499
4500 if (remote_error == REMOTE_ERROR_DB_CONNECTION)
4501 log_detail(_("an error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4502 remote_node_record.node_name,
4503 remote_node_record.node_id);
4504 else
4505 log_hint(_("use -F/--force to continue anyway"));
4506
4507 PQfinish(remote_conn);
4508 PQfinish(local_conn);
4509
4510 exit(ERR_SWITCHOVER_FAIL);
4511 }
4512
4513 log_warning(_("unable to check number of pending archive files on demotion candidate \"%s\""),
4514 remote_node_record.node_name);
4515 log_notice(_("-F/--force set, continuing with switchover"));
4516 }
4517 break;
4518
4519 case CHECK_STATUS_CRITICAL:
4520 {
4521 if (runtime_options.force == false)
4522 {
4523 log_error(_("number of pending archive files on demotion candidate \"%s\" is critical"),
4524 remote_node_record.node_name);
4525 log_detail(_("%i pending archive files (critical threshold: %i)"),
4526 files, threshold);
4527 log_hint(_("PostgreSQL will not shut down until all files are archived; use -F/--force to continue anyway"));
4528 PQfinish(remote_conn);
4529 PQfinish(local_conn);
4530
4531 exit(ERR_SWITCHOVER_FAIL);
4532 }
4533
4534 log_warning(_("number of pending archive files on demotion candidate \"%s\" exceeds the critical threshold"),
4535 remote_node_record.node_name);
4536 log_detail(_("%i pending archive files (critical threshold: %i)"),
4537 files, threshold);
4538 log_notice(_("-F/--force set, continuing with switchover"));
4539 }
4540 break;
4541
4542 case CHECK_STATUS_WARNING:
4543 {
4544 log_warning(_("number of pending archive files on demotion candidate \"%s\" exceeds the warning threshold"),
4545 remote_node_record.node_name);
4546 log_detail(_("%i pending archive files (warning threshold: %i)"),
4547 files, threshold);
4548 log_hint(_("PostgreSQL will not shut down until all files are archived"));
4549 }
4550 break;
4551
4552 case CHECK_STATUS_OK:
4553 {
4554 PQExpBufferData msg;
4555
4556 initPQExpBuffer(&msg);
4557
4558 appendPQExpBuffer(&msg,
4559 _("%i pending archive files"),
4560 files);
4561
4562 if (runtime_options.dry_run == true)
4563 {
4564 log_info("%s", msg.data);
4565 }
4566 else
4567 {
4568 log_verbose(LOG_INFO, "%s", msg.data);
4569 }
4570
4571 termPQExpBuffer(&msg);
4572 }
4573 }
4574
4575 }
4576 else
4577 {
4578 char *msg = _("archive mode is \"off\"");
4579
4580 if (runtime_options.dry_run == true)
4581 {
4582 log_info("%s", msg);
4583 }
4584 else
4585 {
4586 log_verbose(LOG_INFO, "%s", msg);
4587 }
4588 }
4589
4590 /*
4591 * check replication lag on promotion candidate (TODO: check on all
4592 * nodes attached to demotion candidate)
4593 */
4594 lag_seconds = get_replication_lag_seconds(local_conn);
4595
4596 log_debug("lag is %i ", lag_seconds);
4597
4598 if (lag_seconds >= config_file_options.replication_lag_critical)
4599 {
4600 if (runtime_options.force == false)
4601 {
4602 log_error(_("replication lag on this node is critical"));
4603 log_detail(_("lag is %i seconds (critical threshold: %i)"),
4604 lag_seconds, config_file_options.replication_lag_critical);
4605 log_hint(_("PostgreSQL on the demotion candidate will not shut down until pending WAL is flushed to the standby; use -F/--force to continue anyway"));
4606 PQfinish(remote_conn);
4607 PQfinish(local_conn);
4608
4609 exit(ERR_SWITCHOVER_FAIL);
4610 }
4611
4612 log_warning(_("replication lag on this node is critical"));
4613 log_detail(_("lag is %i seconds (critical threshold: %i)"),
4614 lag_seconds, config_file_options.replication_lag_critical);
4615 log_notice(_("-F/--force set, continuing with switchover"));
4616 }
4617 else if (lag_seconds >= config_file_options.replication_lag_warning)
4618 {
4619 log_warning(_("replication lag on this node is warning"));
4620 log_detail(_("lag is %i seconds (warning threshold: %i)"),
4621 lag_seconds, config_file_options.replication_lag_warning);
4622 }
4623 else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
4624 {
4625 if (runtime_options.force == false)
4626 {
4627 log_error(_("unable to check replication lag on local node"));
4628 log_hint(_("use -F/--force to continue anyway"));
4629 PQfinish(remote_conn);
4630 PQfinish(local_conn);
4631
4632 exit(ERR_SWITCHOVER_FAIL);
4633 }
4634
4635 log_warning(_("unable to check replication lag on local node"));
4636 log_notice(_("-F/--force set, continuing with switchover"));
4637 }
4638 /* replication lag is below warning threshold */
4639 else
4640 {
4641 PQExpBufferData msg;
4642
4643 initPQExpBuffer(&msg);
4644
4645 appendPQExpBuffer(&msg,
4646 _("replication lag on this standby is %i seconds"),
4647 lag_seconds);
4648
4649 if (runtime_options.dry_run == true)
4650 {
4651 log_info("%s", msg.data);
4652 }
4653 else
4654 {
4655 log_verbose(LOG_INFO, "%s", msg.data);
4656 }
4657
4658 termPQExpBuffer(&msg);
4659 }
4660 }
4661
4662 PQfinish(remote_conn);
4663
4664 /*
4665 * if replication slots are required by demotion candidate and/or siblings,
4666 * check the promotion candidate has sufficient free slots
4667 */
4668 if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
4669 {
4670 if (runtime_options.dry_run == false)
4671 {
4672 PQfinish(local_conn);
4673 exit(ERR_BAD_CONFIG);
4674 }
4675 }
4676
4677
4678 /*
4679 * Attempt to pause all repmgrd instances, unless user explicitly
4680 * specifies not to.
4681 */
4682 if (runtime_options.repmgrd_no_pause == false)
4683 {
4684 NodeInfoListCell *cell = NULL;
4685 ItemList repmgrd_connection_errors = {NULL, NULL};
4686 int i = 0;
4687 int unreachable_node_count = 0;
4688
4689 get_all_node_records(local_conn, &all_nodes);
4690
4691 repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * all_nodes.node_count);
4692
4693 for (cell = all_nodes.head; cell; cell = cell->next)
4694 {
4695 repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
4696 repmgrd_info[i]->node_id = cell->node_info->node_id;
4697 repmgrd_info[i]->pid = UNKNOWN_PID;
4698 repmgrd_info[i]->paused = false;
4699 repmgrd_info[i]->running = false;
4700 repmgrd_info[i]->pg_running = true;
4701
4702 cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
4703
4704 if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
4705 {
4706 /*
4707 * unable to connect; treat this as an error
4708 */
4709
4710 repmgrd_info[i]->pg_running = false;
4711
4712 /*
4713 * Only worry about unreachable nodes if they're marked as active
4714 * in the repmgr metadata.
4715 */
4716 if (cell->node_info->active == true)
4717 {
4718 unreachable_node_count++;
4719
4720 item_list_append_format(&repmgrd_connection_errors,
4721 _("unable to connect to node \"%s\" (ID %i):\n%s"),
4722 cell->node_info->node_name,
4723 cell->node_info->node_id,
4724 PQerrorMessage(cell->node_info->conn));
4725 }
4726
4727 PQfinish(cell->node_info->conn);
4728 cell->node_info->conn = NULL;
4729
4730 i++;
4731 continue;
4732 }
4733
4734 repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
4735 repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
4736 repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
4737
4738 if (repmgrd_info[i]->running == true)
4739 repmgrd_running_count++;
4740
4741 i++;
4742 }
4743
4744 if (unreachable_node_count > 0)
4745 {
4746 PQExpBufferData msg;
4747 PQExpBufferData detail;
4748 ItemListCell *cell;
4749
4750 initPQExpBuffer(&msg);
4751 appendPQExpBuffer(&msg,
4752 _("unable to connect to %i node(s), unable to pause all repmgrd instances"),
4753 unreachable_node_count);
4754
4755 initPQExpBuffer(&detail);
4756
4757 for (cell = repmgrd_connection_errors.head; cell; cell = cell->next)
4758 {
4759 appendPQExpBuffer(&detail,
4760 " %s\n",
4761 cell->string);
4762 }
4763
4764
4765 if (runtime_options.force == false)
4766 {
4767 log_error("%s", msg.data);
4768 }
4769 else
4770 {
4771 log_warning("%s", msg.data);
4772 }
4773
4774 log_detail(_("following node(s) unreachable:\n%s"), detail.data);
4775
4776 termPQExpBuffer(&msg);
4777 termPQExpBuffer(&detail);
4778
4779 /* tell user about footgun */
4780 if (runtime_options.force == false)
4781 {
4782 log_hint(_("use -F/--force to continue anyway"));
4783
4784 clear_node_info_list(&sibling_nodes);
4785 clear_node_info_list(&all_nodes);
4786
4787 exit(ERR_SWITCHOVER_FAIL);
4788 }
4789
4790 }
4791
4792 /* pause repmgrd on all reachable nodes */
4793 if (repmgrd_running_count > 0)
4794 {
4795 i = 0;
4796 for (cell = all_nodes.head; cell; cell = cell->next)
4797 {
4798
4799 /*
4800 * Skip if node was unreachable
4801 */
4802 if (repmgrd_info[i]->pg_running == false)
4803 {
4804 log_warning(_("node \"%s\" (ID %i) unreachable, unable to pause repmgrd"),
4805 cell->node_info->node_name,
4806 cell->node_info->node_id);
4807 i++;
4808 continue;
4809 }
4810
4811
4812 /*
4813 * Skip if repmgrd not running on node
4814 */
4815 if (repmgrd_info[i]->running == false)
4816 {
4817 log_warning(_("repmgrd not running on node \"%s\" (ID %i)"),
4818 cell->node_info->node_name,
4819 cell->node_info->node_id);
4820 i++;
4821 continue;
4822 }
4823 /*
4824 * Skip if node is already paused. Note we won't unpause these, to
4825 * leave the repmgrd instances in the cluster in the same state they
4826 * were before the switchover.
4827 */
4828 if (repmgrd_info[i]->paused == true)
4829 {
4830 PQfinish(cell->node_info->conn);
4831 cell->node_info->conn = NULL;
4832 i++;
4833 continue;
4834 }
4835
4836 if (runtime_options.dry_run == true)
4837 {
4838 log_info(_("would pause repmgrd on node \"%s\" (ID %i)"),
4839 cell->node_info->node_name,
4840 cell->node_info->node_id);
4841 }
4842 else
4843 {
4844 /* XXX check result */
4845 log_debug("pausing repmgrd on node \"%s\" (ID %i)",
4846 cell->node_info->node_name,
4847 cell->node_info->node_id);
4848
4849 (void) repmgrd_pause(cell->node_info->conn, true);
4850 }
4851
4852 PQfinish(cell->node_info->conn);
4853 cell->node_info->conn = NULL;
4854 i++;
4855 }
4856 }
4857 else
4858 {
4859 /* close all connections - we'll reestablish later */
4860 for (cell = all_nodes.head; cell; cell = cell->next)
4861 {
4862 if (cell->node_info->conn != NULL)
4863 {
4864 PQfinish(cell->node_info->conn);
4865 cell->node_info->conn = NULL;
4866 }
4867 }
4868 }
4869 }
4870
4871
4872 /*
4873 * Sanity checks completed - prepare for the switchover
4874 */
4875
4876 if (runtime_options.dry_run == true)
4877 {
4878 log_notice(_("local node \"%s\" (ID: %i) would be promoted to primary; "
4879 "current primary \"%s\" (ID: %i) would be demoted to standby"),
4880 local_node_record.node_name,
4881 local_node_record.node_id,
4882 remote_node_record.node_name,
4883 remote_node_record.node_id);
4884 }
4885 else
4886 {
4887 log_notice(_("local node \"%s\" (ID: %i) will be promoted to primary; "
4888 "current primary \"%s\" (ID: %i) will be demoted to standby"),
4889 local_node_record.node_name,
4890 local_node_record.node_id,
4891 remote_node_record.node_name,
4892 remote_node_record.node_id);
4893 }
4894
4895
4896 /*
4897 * Stop the remote primary
4898 *
4899 * We'll issue the pg_ctl command but not force it not to wait; we'll
4900 * check the connection from here - and error out if no shutdown is
4901 * detected after a certain time.
4902 */
4903
4904 initPQExpBuffer(&remote_command_str);
4905 initPQExpBuffer(&command_output);
4906
4907 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4908
4909 if (runtime_options.dry_run == true)
4910 {
4911 appendPQExpBufferStr(&remote_command_str,
4912 "node service --terse -LERROR --list-actions --action=stop");
4913
4914 }
4915 else
4916 {
4917 log_notice(_("stopping current primary node \"%s\" (ID: %i)"),
4918 remote_node_record.node_name,
4919 remote_node_record.node_id);
4920 appendPQExpBufferStr(&remote_command_str,
4921 "node service --action=stop --checkpoint");
4922
4923 if (runtime_options.superuser[0] != '\0')
4924 {
4925 appendPQExpBuffer(&remote_command_str,
4926 " --superuser=%s",
4927 runtime_options.superuser);
4928 }
4929 }
4930
4931 /* XXX handle failure */
4932
4933 (void) remote_command(remote_host,
4934 runtime_options.remote_user,
4935 remote_command_str.data,
4936 config_file_options.ssh_options,
4937 &command_output);
4938
4939 termPQExpBuffer(&remote_command_str);
4940
4941 /*
4942 * --dry-run ends here with display of command which would be used to shut
4943 * down the remote server
4944 */
4945 if (runtime_options.dry_run == true)
4946 {
4947 /* we use a buffer here as it will be modified by string_remove_trailing_newlines() */
4948 char shutdown_command[MAXLEN] = "";
4949
4950 strncpy(shutdown_command, command_output.data, MAXLEN);
4951
4952 termPQExpBuffer(&command_output);
4953
4954 string_remove_trailing_newlines(shutdown_command);
4955
4956 log_info(_("following shutdown command would be run on node \"%s\":\n \"%s\""),
4957 remote_node_record.node_name,
4958 shutdown_command);
4959
4960 log_info(_("parameter \"shutdown_check_timeout\" is set to %i seconds"),
4961 config_file_options.shutdown_check_timeout);
4962
4963 clear_node_info_list(&sibling_nodes);
4964
4965 key_value_list_free(&remote_config_files);
4966
4967 if (dry_run_success == false)
4968 {
4969 log_error(_("prerequisites for executing STANDBY SWITCHOVER are *not* met"));
4970 log_hint(_("see preceding error messages"));
4971 exit(ERR_BAD_CONFIG);
4972 }
4973
4974 log_info(_("prerequisites for executing STANDBY SWITCHOVER are met"));
4975
4976 exit(SUCCESS);
4977 }
4978
4979 termPQExpBuffer(&command_output);
4980 shutdown_success = false;
4981
4982 /* loop for timeout waiting for current primary to stop */
4983
4984 for (i = 0; i < config_file_options.shutdown_check_timeout; i++)
4985 {
4986 /* Check whether primary is available */
4987 PGPing ping_res;
4988
4989 log_info(_("checking for primary shutdown; %i of %i attempts (\"shutdown_check_timeout\")"),
4990 i + 1, config_file_options.shutdown_check_timeout);
4991
4992 ping_res = PQping(remote_conninfo);
4993
4994 log_debug("ping status is: %s", print_pqping_status(ping_res));
4995
4996 /* database server could not be contacted */
4997 if (ping_res == PQPING_NO_RESPONSE || ping_res == PQPING_NO_ATTEMPT)
4998 {
4999 bool command_success;
5000
5001 /*
5002 * remote server can't be contacted at protocol level - that
5003 * doesn't necessarily mean it's shut down, so we'll ask its
5004 * repmgr to check at data directory level, and if shut down also
5005 * return the last checkpoint LSN.
5006 */
5007
5008 initPQExpBuffer(&remote_command_str);
5009 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
5010 appendPQExpBufferStr(&remote_command_str,
5011 "node status --is-shutdown-cleanly");
5012
5013 initPQExpBuffer(&command_output);
5014
5015 command_success = remote_command(remote_host,
5016 runtime_options.remote_user,
5017 remote_command_str.data,
5018 config_file_options.ssh_options,
5019 &command_output);
5020
5021 termPQExpBuffer(&remote_command_str);
5022
5023 if (command_success == true)
5024 {
5025 NodeStatus status = parse_node_status_is_shutdown_cleanly(command_output.data, &remote_last_checkpoint_lsn);
5026
5027 log_verbose(LOG_DEBUG, "remote node status is: %s", print_node_status(status));
5028
5029 if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr)
5030 {
5031 shutdown_success = true;
5032 log_notice(_("current primary has been cleanly shut down at location %X/%X"),
5033 format_lsn(remote_last_checkpoint_lsn));
5034 termPQExpBuffer(&command_output);
5035
5036 break;
5037 }
5038 /* remote node did not shut down cleanly */
5039 else if (status == NODE_STATUS_UNCLEAN_SHUTDOWN)
5040 {
5041 if (!runtime_options.force)
5042 {
5043 log_error(_("current primary did not shut down cleanly, aborting"));
5044 log_hint(_("use -F/--force to promote current standby"));
5045 termPQExpBuffer(&command_output);
5046 exit(ERR_SWITCHOVER_FAIL);
5047 }
5048 log_error(_("current primary did not shut down cleanly, continuing anyway"));
5049 shutdown_success = true;
5050 break;
5051 }
5052 else if (status == NODE_STATUS_SHUTTING_DOWN)
5053 {
5054 log_info(_("remote node is still shutting down"));
5055 }
5056 }
5057
5058 termPQExpBuffer(&command_output);
5059 }
5060
5061 log_debug("sleeping 1 second until next check");
5062 sleep(1);
5063 }
5064
5065 if (shutdown_success == false)
5066 {
5067 log_error(_("shutdown of the primary server could not be confirmed"));
5068 log_hint(_("check the primary server status before performing any further actions"));
5069 exit(ERR_SWITCHOVER_FAIL);
5070 }
5071
5072 /* this is unlikely to happen, but check and handle gracefully anyway */
5073 if (PQstatus(local_conn) != CONNECTION_OK)
5074 {
5075 log_warning(_("connection to local node lost, reconnecting..."));
5076 log_detail("\n%s", PQerrorMessage(local_conn));
5077 PQfinish(local_conn);
5078
5079 local_conn = establish_db_connection(config_file_options.conninfo, false);
5080
5081 if (PQstatus(local_conn) != CONNECTION_OK)
5082 {
5083 log_error(_("unable to reconnect to local node \"%s\""),
5084 local_node_record.node_name);
5085 exit(ERR_DB_CONN);
5086 }
5087 log_verbose(LOG_INFO, _("successfully reconnected to local node"));
5088 }
5089
5090 init_replication_info(&replication_info);
5091 /*
5092 * Compare standby's last WAL receive location with the primary's last
5093 * checkpoint LSN. We'll loop for a while as it's possible the standby's
5094 * walreceiver has not yet flushed all received WAL to disk.
5095 */
5096 {
5097 bool notice_emitted = false;
5098
5099 for (i = 0; i < config_file_options.wal_receive_check_timeout; i++)
5100 {
5101 get_replication_info(local_conn, STANDBY, &replication_info);
5102 if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn)
5103 break;
5104
5105 /*
5106 * We'll only output this notice if it looks like we're going to have
5107 * to wait for WAL to be flushed.
5108 */
5109 if (notice_emitted == false)
5110 {
5111 log_notice(_("waiting up to %i seconds (parameter \"wal_receive_check_timeout\") for received WAL to flush to disk"),
5112 config_file_options.wal_receive_check_timeout);
5113
5114 notice_emitted = true;
5115 }
5116
5117 log_info(_("sleeping %i of maximum %i seconds waiting for standby to flush received WAL to disk"),
5118 i + 1, config_file_options.wal_receive_check_timeout);
5119 sleep(1);
5120 }
5121 }
5122
5123 if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
5124 {
5125 log_warning(_("local node \"%s\" is behind shutdown primary \"%s\""),
5126 local_node_record.node_name,
5127 remote_node_record.node_name);
5128 log_detail(_("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X"),
5129 format_lsn(replication_info.last_wal_receive_lsn),
5130 format_lsn(remote_last_checkpoint_lsn));
5131
5132 if (runtime_options.always_promote == false)
5133 {
5134 log_notice(_("aborting switchover"));
5135 log_hint(_("use --always-promote to force promotion of standby"));
5136 PQfinish(local_conn);
5137 exit(ERR_SWITCHOVER_FAIL);
5138 }
5139 }
5140
5141 log_debug("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X",
5142 format_lsn(replication_info.last_wal_receive_lsn),
5143 format_lsn(remote_last_checkpoint_lsn));
5144
5145 /*
5146 * Promote standby (local node).
5147 *
5148 * If PostgreSQL 12 or later, and -S/--superuser provided, we will provide
5149 * a superuser connection so that pg_promote() can be used.
5150 */
5151
5152 if (PQserverVersion(local_conn) >= 120000 && superuser_conn != NULL)
5153 {
5154 _do_standby_promote_internal(superuser_conn);
5155 }
5156 else
5157 {
5158 _do_standby_promote_internal(local_conn);
5159 }
5160
5161
5162 /*
5163 * If pg_rewind is requested, issue a checkpoint immediately after promoting
5164 * the local node, as pg_rewind compares timelines on the basis of the value
5165 * in pg_control, which is written at the first checkpoint, which might not
5166 * occur immediately.
5167 */
5168 if (runtime_options.force_rewind_used == true)
5169 {
5170 PGconn *checkpoint_conn = local_conn;
5171 if (superuser_conn != NULL)
5172 {
5173 checkpoint_conn = superuser_conn;
5174 }
5175
5176 if (is_superuser_connection(checkpoint_conn, NULL) == true)
5177 {
5178 log_notice(_("issuing CHECKPOINT on node \"%s\" (ID: %i) "),
5179 config_file_options.node_name,
5180 config_file_options.node_id);
5181 checkpoint(superuser_conn);
5182 }
5183 else
5184 {
5185 log_warning(_("no superuser connection available, unable to issue CHECKPOINT"));
5186 }
5187 }
5188
5189 /*
5190 * Execute "repmgr node rejoin" to create recovery.conf and start the
5191 * remote server. Additionally execute "pg_rewind", if required and
5192 * requested.
5193 */
5194 initPQExpBuffer(&node_rejoin_options);
5195
5196 /*
5197 * Don't wait for repmgr on the remote node to report the success
5198 * of the rejoin operation - we'll check it from here.
5199 */
5200 appendPQExpBufferStr(&node_rejoin_options,
5201 " --no-wait");
5202
5203 if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
5204 {
5205 KeyValueListCell *cell = NULL;
5206 bool first_entry = true;
5207
5208 if (runtime_options.force_rewind_used == false)
5209 {
5210 log_error(_("new primary diverges from former primary and --force-rewind not provided"));
5211 log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\""));
5212
5213 termPQExpBuffer(&node_rejoin_options);
5214 PQfinish(local_conn);
5215 exit(ERR_SWITCHOVER_FAIL);
5216 }
5217
5218 appendPQExpBufferStr(&node_rejoin_options,
5219 " --force-rewind");
5220
5221 if (runtime_options.force_rewind_path[0] != '\0')
5222 {
5223 appendPQExpBuffer(&node_rejoin_options,
5224 "=%s",
5225 runtime_options.force_rewind_path);
5226 }
5227 appendPQExpBufferStr(&node_rejoin_options,
5228 " --config-files=");
5229
5230 for (cell = remote_config_files.head; cell; cell = cell->next)
5231 {
5232 if (first_entry == false)
5233 appendPQExpBufferChar(&node_rejoin_options, ',');
5234 else
5235 first_entry = false;
5236
5237 appendPQExpBufferStr(&node_rejoin_options, cell->key);
5238 }
5239
5240 appendPQExpBufferChar(&node_rejoin_options, ' ');
5241 }
5242
5243 key_value_list_free(&remote_config_files);
5244
5245 initPQExpBuffer(&remote_command_str);
5246 make_remote_repmgr_path(&remote_command_str, &remote_node_record);
5247
5248 /*
5249 * Here we'll coerce the local node's connection string into
5250 * "param=value" format, in case it's configured in URI format,
5251 * to simplify escaping issues when passing the string to the
5252 * remote node.
5253 */
5254 {
5255 char *conninfo_normalized = normalize_conninfo_string(local_node_record.conninfo);
5256
5257 appendPQExpBuffer(&remote_command_str,
5258 "%s -d ",
5259 node_rejoin_options.data);
5260
5261 appendRemoteShellString(&remote_command_str,
5262 conninfo_normalized);
5263
5264 appendPQExpBufferStr(&remote_command_str,
5265 " node rejoin");
5266
5267 pfree(conninfo_normalized);
5268 }
5269
5270 termPQExpBuffer(&node_rejoin_options);
5271
5272 log_debug("executing:\n %s", remote_command_str.data);
5273 initPQExpBuffer(&command_output);
5274
5275 command_success = remote_command(remote_host,
5276 runtime_options.remote_user,
5277 remote_command_str.data,
5278 config_file_options.ssh_options,
5279 &command_output);
5280
5281 termPQExpBuffer(&remote_command_str);
5282
5283 /* TODO: verify this node's record was updated correctly */
5284
5285 if (command_success == false)
5286 {
5287 log_error(_("rejoin failed with error code %i"), r);
5288
5289 create_event_notification_extended(local_conn,
5290 &config_file_options,
5291 config_file_options.node_id,
5292 "standby_switchover",
5293 false,
5294 command_output.data,
5295 &event_info);
5296 }
5297 else
5298 {
5299 PQExpBufferData event_details;
5300 standy_join_status join_success = check_standby_join(local_conn,
5301 &local_node_record,
5302 &remote_node_record);
5303
5304 initPQExpBuffer(&event_details);
5305
5306 switch (join_success) {
5307 case JOIN_FAIL_NO_PING:
5308 appendPQExpBuffer(&event_details,
5309 _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not beome available"),
5310 config_file_options.node_name,
5311 config_file_options.node_id,
5312 remote_node_record.node_name,
5313 remote_node_record.node_id);
5314 switchover_success = false;
5315
5316 break;
5317 case JOIN_FAIL_NO_REPLICATION:
5318 appendPQExpBuffer(&event_details,
5319 _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not connect to the new primary"),
5320 config_file_options.node_name,
5321 config_file_options.node_id,
5322 remote_node_record.node_name,
5323 remote_node_record.node_id);
5324 switchover_success = false;
5325 break;
5326 case JOIN_SUCCESS:
5327 appendPQExpBuffer(&event_details,
5328 _("node \"%s\" (ID: %i) promoted to primary, node \"%s\" (ID: %i) demoted to standby"),
5329 config_file_options.node_name,
5330 config_file_options.node_id,
5331 remote_node_record.node_name,
5332 remote_node_record.node_id);
5333 }
5334
5335 create_event_notification_extended(local_conn,
5336 &config_file_options,
5337 config_file_options.node_id,
5338 "standby_switchover",
5339 switchover_success,
5340 event_details.data,
5341 &event_info);
5342 if (switchover_success == true)
5343 {
5344 log_notice("%s", event_details.data);
5345 }
5346 else
5347 {
5348 log_error("%s", event_details.data);
5349 }
5350 termPQExpBuffer(&event_details);
5351 }
5352
5353 termPQExpBuffer(&command_output);
5354
5355 /*
5356 * If --siblings-follow specified, attempt to make them follow the new
5357 * primary
5358 */
5359 if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
5360 {
5361 sibling_nodes_follow(&local_node_record, &sibling_nodes, &sibling_nodes_stats);
5362 }
5363
5364 clear_node_info_list(&sibling_nodes);
5365
5366 /*
5367 * Clean up remote node (primary demoted to standby). It's possible that the node is
5368 * still starting up, so poll for a while until we get a connection.
5369 */
5370
5371 for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
5372 {
5373 remote_conn = establish_db_connection(remote_node_record.conninfo, false);
5374
5375 if (PQstatus(remote_conn) == CONNECTION_OK)
5376 break;
5377
5378 log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"),
5379 i + 1,
5380 config_file_options.standby_reconnect_timeout);
5381 sleep(1);
5382 }
5383
5384 /* check new standby (old primary) is reachable */
5385 if (PQstatus(remote_conn) != CONNECTION_OK)
5386 {
5387 switchover_success = false;
5388
5389 /* TODO: double-check whether new standby has attached */
5390
5391 log_warning(_("switchover did not fully complete"));
5392 log_detail(_("node \"%s\" (ID: %i) is now primary but node \"%s\" (ID: %i) is not reachable"),
5393 local_node_record.node_name,
5394 local_node_record.node_id,
5395 remote_node_record.node_name,
5396 remote_node_record.node_id);
5397
5398 if (config_file_options.use_replication_slots == true)
5399 {
5400 log_hint(_("any inactive replication slots on the old primary will need to be dropped manually"));
5401 }
5402 }
5403 else
5404 {
5405 NodeAttached node_attached;
5406
5407 /*
5408 * We were able to connect to the former primary - attempt to drop
5409 * this node's former replication slot, if it exists.
5410 */
5411 if (config_file_options.use_replication_slots == true)
5412 {
5413 drop_replication_slot_if_exists(remote_conn,
5414 remote_node_record.node_id,
5415 local_node_record.slot_name);
5416 }
5417
5418
5419 /*
5420 * Do a final check that the standby has connected - it's possible
5421 * the standby became reachable but has not connected (or became disconnected).
5422 */
5423
5424 node_attached = is_downstream_node_attached(local_conn,
5425 remote_node_record.node_name,
5426 NULL);
5427 if (node_attached == NODE_ATTACHED)
5428 {
5429 switchover_success = true;
5430 log_notice(_("switchover was successful"));
5431 log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
5432 local_node_record.node_name,
5433 remote_node_record.node_name);
5434 }
5435 else
5436 {
5437 log_notice(_("switchover is incomplete"));
5438 log_detail(_("node \"%s\" is now primary but node \"%s\" is not attached as standby"),
5439 local_node_record.node_name,
5440 remote_node_record.node_name);
5441 switchover_success = false;
5442 }
5443
5444 }
5445
5446 PQfinish(remote_conn);
5447 PQfinish(local_conn);
5448
5449 /*
5450 * Attempt to unpause all paused repmgrd instances, unless user explicitly
5451 * specifies not to.
5452 */
5453 if (runtime_options.repmgrd_no_pause == false)
5454 {
5455 if (repmgrd_running_count > 0)
5456 {
5457 ItemList repmgrd_unpause_errors = {NULL, NULL};
5458 NodeInfoListCell *cell = NULL;
5459 int i = 0;
5460 int error_node_count = 0;
5461
5462 for (cell = all_nodes.head; cell; cell = cell->next)
5463 {
5464
5465 if (repmgrd_info[i]->paused == true && runtime_options.repmgrd_force_unpause == false)
5466 {
5467 log_debug("repmgrd on node \"%s\" (ID %i) paused before switchover, --repmgrd-force-unpause not provided, not unpausing",
5468 cell->node_info->node_name,
5469 cell->node_info->node_id);
5470
5471 i++;
5472 continue;
5473 }
5474
5475 log_debug("unpausing repmgrd on node \"%s\" (ID %i)",
5476 cell->node_info->node_name,
5477 cell->node_info->node_id);
5478
5479 cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
5480
5481 if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
5482 {
5483 if (repmgrd_pause(cell->node_info->conn, false) == false)
5484 {
5485 item_list_append_format(&repmgrd_unpause_errors,
5486 _("unable to unpause node \"%s\" (ID %i)"),
5487 cell->node_info->node_name,
5488 cell->node_info->node_id);
5489 error_node_count++;
5490 }
5491 }
5492 else
5493 {
5494 item_list_append_format(&repmgrd_unpause_errors,
5495 _("unable to connect to node \"%s\" (ID %i):\n%s"),
5496 cell->node_info->node_name,
5497 cell->node_info->node_id,
5498 PQerrorMessage(cell->node_info->conn));
5499 error_node_count++;
5500 }
5501
5502 i++;
5503 }
5504
5505 if (error_node_count > 0)
5506 {
5507 PQExpBufferData detail;
5508 ItemListCell *cell;
5509
5510 initPQExpBuffer(&detail);
5511
5512 for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
5513 {
5514 appendPQExpBuffer(&detail,
5515 " %s\n",
5516 cell->string);
5517 }
5518
5519 log_warning(_("unable to unpause repmgrd on %i node(s)"),
5520 error_node_count);
5521 log_detail(_("errors encountered for following node(s):\n%s"), detail.data);
5522 log_hint(_("check node connection and status; unpause manually with \"repmgr service unpause\""));
5523
5524 termPQExpBuffer(&detail);
5525 }
5526 }
5527
5528 clear_node_info_list(&all_nodes);
5529 }
5530
5531 if (switchover_success == true)
5532 {
5533 log_notice(_("STANDBY SWITCHOVER has completed successfully"));
5534 }
5535 else
5536 {
5537 log_notice(_("STANDBY SWITCHOVER has completed with issues"));
5538 log_hint(_("see preceding log message(s) for details"));
5539 exit(ERR_SWITCHOVER_INCOMPLETE);
5540 }
5541
5542 return;
5543 }
5544
5545
5546 static void
check_source_server()5547 check_source_server()
5548 {
5549 char cluster_size[MAXLEN];
5550 char *connstr = NULL;
5551
5552 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
5553 RecordStatus record_status = RECORD_NOT_FOUND;
5554 ExtensionStatus extension_status = REPMGR_UNKNOWN;
5555 t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
5556
5557 /* Attempt to connect to the upstream server to verify its configuration */
5558 log_verbose(LOG_DEBUG, "check_source_server()");
5559 log_info(_("connecting to source node"));
5560
5561 connstr = param_list_to_string(&source_conninfo);
5562 log_detail(_("connection string is: %s"), connstr);
5563 pfree(connstr);
5564
5565 source_conn = establish_db_connection_by_params(&source_conninfo, false);
5566
5567 /*
5568 * Unless in barman mode, exit with an error;
5569 * establish_db_connection_by_params() will have already logged an error
5570 * message
5571 */
5572 if (PQstatus(source_conn) != CONNECTION_OK)
5573 {
5574 PQfinish(source_conn);
5575 source_conn = NULL;
5576 if (mode == barman)
5577 return;
5578
5579 exit(ERR_DB_CONN);
5580 }
5581
5582 /*
5583 * If a connection was established, perform some sanity checks on the
5584 * provided upstream connection.
5585 */
5586
5587 source_server_version_num = check_server_version(source_conn, "primary", true, NULL);
5588
5589 /*
5590 * It's not essential to know the cluster size, but useful to sanity-check
5591 * we can actually run a query before going any further.
5592 */
5593 if (get_cluster_size(source_conn, cluster_size) == false)
5594 exit(ERR_DB_QUERY);
5595
5596 log_detail(_("current installation size is %s"),
5597 cluster_size);
5598
5599 /*
5600 * If the upstream node is a standby, try to connect to the primary too so
5601 * we can write an event record
5602 */
5603 if (get_recovery_type(source_conn) == RECTYPE_STANDBY)
5604 {
5605 primary_conn = get_primary_connection(source_conn, NULL, NULL);
5606
5607 if (PQstatus(primary_conn) != CONNECTION_OK)
5608 {
5609 log_error(_("unable to connect to primary node"));
5610 exit(ERR_BAD_CONFIG);
5611 }
5612 }
5613 else
5614 {
5615 primary_conn = source_conn;
5616 }
5617
5618 /*
5619 * Sanity-check that the primary node has a repmgr extension - if not
5620 * present, fail with an error unless -F/--force is used (to enable repmgr
5621 * to be used as a standalone clone tool).
5622 */
5623
5624 extension_status = get_repmgr_extension_status(primary_conn, &extversions);
5625
5626 if (extension_status != REPMGR_INSTALLED)
5627 {
5628 if (!runtime_options.force)
5629 {
5630 /* this is unlikely to happen */
5631 if (extension_status == REPMGR_UNKNOWN)
5632 {
5633 log_error(_("unable to determine status of \"repmgr\" extension"));
5634 log_detail("%s", PQerrorMessage(primary_conn));
5635 PQfinish(source_conn);
5636 exit(ERR_DB_QUERY);
5637 }
5638
5639 if (extension_status == REPMGR_AVAILABLE)
5640 {
5641 log_error(_("repmgr extension is available but not installed in database \"%s\""),
5642 param_get(&source_conninfo, "dbname"));
5643 log_hint(_("check that you are cloning from the database where \"repmgr\" is installed"));
5644 }
5645 else if (extension_status == REPMGR_UNAVAILABLE)
5646 {
5647 log_error(_("repmgr extension is not available on the upstream node"));
5648 }
5649 else if (extension_status == REPMGR_OLD_VERSION_INSTALLED)
5650 {
5651 log_error(_("an older version of the extension is installed on the upstream node"));
5652 log_detail(_("version %s is installed but newer version %s is available"),
5653 extversions.installed_version,
5654 extversions.default_version);
5655 log_hint(_("upgrade \"repmgr\" on the source node first"));
5656 }
5657
5658 PQfinish(source_conn);
5659 exit(ERR_BAD_CONFIG);
5660 }
5661
5662 log_warning(_("repmgr extension not found on source node"));
5663 }
5664 else
5665 {
5666 /*
5667 * If upstream is not a standby, retrieve its node records
5668 * and attempt to connect to one; we'll then compare
5669 * that node's system identifier to that of the source
5670 * connection, to ensure we're cloning from a node which is
5671 * part of the physical replication cluster. This is mainly
5672 * to prevent cloning a standby from a witness server.
5673 *
5674 * Note that it doesn't matter if the node from the node record
5675 * list is the same as the source node; also if the source node
5676 * does not have any node records, there's not a lot we can do.
5677 *
5678 * This check will be only carried out on PostgreSQL 9.6 and
5679 * later, as this is a precautionary check and we can retrieve the system
5680 * identifier with a normal connection.
5681 */
5682
5683 if (runtime_options.dry_run == true)
5684 {
5685 log_info(_("\"repmgr\" extension is installed in database \"%s\""),
5686 param_get(&source_conninfo, "dbname"));
5687 }
5688
5689 if (get_recovery_type(source_conn) == RECTYPE_PRIMARY && PQserverVersion(source_conn) >= 90600)
5690 {
5691 uint64 source_system_identifier = system_identifier(source_conn);
5692
5693 if (source_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
5694 {
5695 NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
5696 NodeInfoListCell *cell = NULL;
5697 get_all_node_records(source_conn, &all_nodes);
5698
5699 log_debug("%i node records returned by source node", all_nodes.node_count);
5700
5701 /* loop through its nodes table */
5702
5703 for (cell = all_nodes.head; cell; cell = cell->next)
5704 {
5705
5706 /* exclude the witness node, as its system identifier will be different, of course */
5707 if (cell->node_info->type == WITNESS)
5708 continue;
5709
5710 cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
5711 if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
5712 {
5713 uint64 test_system_identifier = system_identifier(cell->node_info->conn);
5714 PQfinish(cell->node_info->conn);
5715 cell->node_info->conn = NULL;
5716
5717 if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
5718 {
5719 if (source_system_identifier != test_system_identifier)
5720 {
5721 log_error(_("source node's system identifier does not match other nodes in the replication cluster"));
5722 log_detail(_("source node's system identifier is %lu, replication cluster member \"%s\"'s system identifier is %lu"),
5723 source_system_identifier,
5724 cell->node_info->node_name,
5725 test_system_identifier);
5726 log_hint(_("check that the source node is not a witness server"));
5727 PQfinish(source_conn);
5728 source_conn = NULL;
5729
5730 exit(ERR_BAD_CONFIG);
5731 }
5732 /* identifiers match - our work here is done */
5733 break;
5734 }
5735 }
5736 else
5737 {
5738 PQfinish(cell->node_info->conn);
5739 cell->node_info->conn = NULL;
5740 }
5741 }
5742 clear_node_info_list(&all_nodes);
5743 }
5744 }
5745 }
5746
5747
5748 /*
5749 * Check the local directory to see if it appears to be a PostgreSQL
5750 * data directory.
5751 *
5752 * Note: a previous call to check_dir() will have checked whether it contains
5753 * a running PostgreSQL instance.
5754 */
5755 if (is_pg_dir(local_data_directory))
5756 {
5757 const char *msg = _("target data directory appears to be a PostgreSQL data directory");
5758 const char *hint = _("use -F/--force to overwrite the existing data directory");
5759
5760 if (runtime_options.force == false && runtime_options.dry_run == false)
5761 {
5762 log_error("%s", msg);
5763 log_detail(_("target data directory is \"%s\""), local_data_directory);
5764 log_hint("%s", hint);
5765 PQfinish(source_conn);
5766 exit(ERR_BAD_CONFIG);
5767 }
5768
5769 if (runtime_options.dry_run == true)
5770 {
5771 if (runtime_options.force == true)
5772 {
5773 log_warning("%s and will be overwritten", msg);
5774 log_detail(_("target data directory is \"%s\""), local_data_directory);
5775
5776 }
5777 else
5778 {
5779 log_warning("%s", msg);
5780 log_detail(_("target data directory is \"%s\""), local_data_directory);
5781 log_hint("%s", hint);
5782 }
5783 }
5784 }
5785
5786 /*
5787 * Attempt to find the upstream node record
5788 */
5789 if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE)
5790 upstream_node_id = get_primary_node_id(source_conn);
5791 else
5792 upstream_node_id = runtime_options.upstream_node_id;
5793
5794 log_debug("upstream_node_id determined as %i", upstream_node_id);
5795
5796 if (upstream_node_id != UNKNOWN_NODE_ID)
5797 {
5798 t_node_info other_node_record = T_NODE_INFO_INITIALIZER;
5799
5800 record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
5801 if (record_status == RECORD_FOUND)
5802 {
5803 t_conninfo_param_list upstream_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
5804 char *upstream_conninfo_user;
5805
5806 initialize_conninfo_params(&upstream_conninfo, false);
5807 parse_conninfo_string(upstream_node_record.conninfo, &upstream_conninfo, NULL, false);
5808
5809 strncpy(recovery_conninfo_str, upstream_node_record.conninfo, MAXLEN);
5810 strncpy(upstream_repluser, upstream_node_record.repluser, NAMEDATALEN);
5811
5812 upstream_conninfo_user = param_get(&upstream_conninfo, "user");
5813 if (upstream_conninfo_user != NULL)
5814 {
5815 strncpy(upstream_user, upstream_conninfo_user, NAMEDATALEN);
5816 }
5817 else
5818 {
5819 get_conninfo_default_value("user", upstream_user, NAMEDATALEN);
5820 }
5821
5822 log_verbose(LOG_DEBUG, "upstream_user is \"%s\"", upstream_user);
5823
5824 upstream_conninfo_found = true;
5825 }
5826
5827 /*
5828 * Check that there's no existing node record with the same name but
5829 * different ID.
5830 */
5831 record_status = get_node_record_by_name(source_conn, config_file_options.node_name, &other_node_record);
5832
5833 if (record_status == RECORD_FOUND && other_node_record.node_id != config_file_options.node_id)
5834 {
5835 log_error(_("another node (ID: %i) already exists with node_name \"%s\""),
5836 other_node_record.node_id,
5837 config_file_options.node_name);
5838 PQfinish(source_conn);
5839 exit(ERR_BAD_CONFIG);
5840 }
5841 }
5842
5843 /* Check the source node is configured sufficiently to be able to clone from */
5844 check_upstream_config(source_conn, source_server_version_num, &upstream_node_record, true);
5845
5846 /*
5847 * Work out which users need to perform which tasks.
5848 *
5849 * Here we'll check the qualifications of the repmgr user as we have the
5850 * connection open; replication and superuser connections will be opened
5851 * when required and any errors will be raised at that point.
5852 */
5853
5854 /*
5855 * If the user wants to copy configuration files located outside the
5856 * data directory, we'll need to be able to query the upstream node's data
5857 * directory location, which is available only to superusers or members
5858 * of the appropriate role.
5859 */
5860 if (runtime_options.copy_external_config_files == true)
5861 {
5862 /*
5863 * This will check if the user is superuser or (from Pg10) is a member
5864 * of "pg_read_all_settings"/"pg_monitor"
5865 */
5866 if (connection_has_pg_monitor_role(source_conn, "pg_read_all_settings") == true)
5867 {
5868 SettingsUser = REPMGR_USER;
5869 }
5870 else if (runtime_options.superuser[0] != '\0')
5871 {
5872 SettingsUser = SUPERUSER;
5873 }
5874 else
5875 {
5876 log_error(_("--copy-external-config-files requires a user with permission to read the data directory on the source node"));
5877
5878 if (PQserverVersion(source_conn) >= 100000)
5879 {
5880 log_hint(_("the repmgr user must be superuser or member of role \"pg_monitor\" or \"pg_read_all_settings\", or a superuser provided with -S/--superuser"));
5881 }
5882 else
5883 {
5884 log_hint(_("the repmgr user must be superuser, or a superuser provided with -S/--superuser"));
5885 }
5886
5887 exit(ERR_BAD_CONFIG);
5888 }
5889 }
5890
5891 /*
5892 * To create replication slots, we'll need a user with the REPLICATION
5893 * privilege, or a superuser.
5894 */
5895 if (config_file_options.use_replication_slots == true)
5896 {
5897 }
5898 }
5899
5900
5901 static void
check_source_server_via_barman()5902 check_source_server_via_barman()
5903 {
5904 char buf[MAXLEN] = "";
5905 char barman_conninfo_str[MAXLEN] = "";
5906 t_conninfo_param_list barman_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
5907 char *errmsg = NULL;
5908 bool parse_success = false,
5909 command_success = false;
5910 char where_condition[MAXLEN];
5911 PQExpBufferData command_output;
5912 PQExpBufferData repmgr_conninfo_buf;
5913
5914 int c = 0;
5915
5916 get_barman_property(barman_conninfo_str, "conninfo", local_repmgr_tmp_directory);
5917
5918 initialize_conninfo_params(&barman_conninfo, false);
5919
5920 /*
5921 * parse_conninfo_string() here will remove the upstream's
5922 * `application_name`, if set
5923 */
5924 parse_success = parse_conninfo_string(barman_conninfo_str, &barman_conninfo, &errmsg, true);
5925
5926 if (parse_success == false)
5927 {
5928 log_error(_("Unable to parse barman conninfo string \"%s\":\n%s"),
5929 barman_conninfo_str, errmsg);
5930 exit(ERR_BARMAN);
5931 }
5932
5933 /* Overwrite database name in the parsed parameter list */
5934 param_set(&barman_conninfo, "dbname", runtime_options.dbname);
5935
5936 /* Rebuild the Barman conninfo string */
5937 initPQExpBuffer(&repmgr_conninfo_buf);
5938
5939 for (c = 0; c < barman_conninfo.size && barman_conninfo.keywords[c] != NULL; c++)
5940 {
5941 if (repmgr_conninfo_buf.len != 0)
5942 appendPQExpBufferChar(&repmgr_conninfo_buf, ' ');
5943
5944 appendPQExpBuffer(&repmgr_conninfo_buf, "%s=",
5945 barman_conninfo.keywords[c]);
5946 appendConnStrVal(&repmgr_conninfo_buf,
5947 barman_conninfo.values[c]);
5948 }
5949
5950 log_verbose(LOG_DEBUG,
5951 "repmgr database conninfo string on barman server: %s",
5952 repmgr_conninfo_buf.data);
5953
5954 if (upstream_node_id == UNKNOWN_NODE_ID)
5955 {
5956 maxlen_snprintf(where_condition, "type='primary' AND active IS TRUE");
5957 }
5958 else
5959 {
5960 maxlen_snprintf(where_condition, "node_id=%i", upstream_node_id);
5961 }
5962
5963 initPQExpBuffer(&command_output);
5964 maxlen_snprintf(buf,
5965 "psql -AqtX -d \\\"%s\\\" -c \\\""
5966 " SELECT conninfo"
5967 " FROM repmgr.nodes"
5968 " WHERE %s"
5969 " AND active IS TRUE"
5970 "\\\"",
5971 repmgr_conninfo_buf.data,
5972 where_condition);
5973
5974 termPQExpBuffer(&repmgr_conninfo_buf);
5975
5976 command_success = remote_command(config_file_options.barman_host,
5977 runtime_options.remote_user,
5978 buf,
5979 config_file_options.ssh_options,
5980 &command_output);
5981
5982 if (command_success == false)
5983 {
5984 log_error(_("unable to execute database query via Barman server"));
5985 exit(ERR_BARMAN);
5986 }
5987
5988 maxlen_snprintf(recovery_conninfo_str, "%s", command_output.data);
5989 string_remove_trailing_newlines(recovery_conninfo_str);
5990
5991 upstream_conninfo_found = true;
5992 log_verbose(LOG_DEBUG,
5993 "upstream node conninfo string extracted via barman server: %s",
5994 recovery_conninfo_str);
5995
5996 termPQExpBuffer(&command_output);
5997 }
5998
5999
6000 /*
6001 * check_upstream_config()
6002 *
6003 * Perform sanity check on upstream server configuration before starting cloning
6004 * process
6005 *
6006 * TODO:
6007 * - check user is qualified to perform base backup
6008 */
6009
6010 static bool
check_upstream_config(PGconn * conn,int server_version_num,t_node_info * upstream_node_record,bool exit_on_error)6011 check_upstream_config(PGconn *conn, int server_version_num, t_node_info *upstream_node_record, bool exit_on_error)
6012 {
6013 int i;
6014 bool config_ok = true;
6015 char *wal_error_message = NULL;
6016 t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6017 bool backup_options_ok = true;
6018 ItemList backup_option_errors = {NULL, NULL};
6019 bool wal_method_stream = true;
6020 standy_clone_mode mode;
6021 bool pg_setting_ok;
6022
6023 /*
6024 * Detecting the intended cloning mode
6025 */
6026 mode = get_standby_clone_mode();
6027
6028 /*
6029 * Parse "pg_basebackup_options", if set, to detect whether --wal-method
6030 * has been set to something other than `stream` (i.e. `fetch`), as this
6031 * will influence some checks
6032 */
6033
6034 backup_options_ok = parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
6035 &backup_options, server_version_num,
6036 &backup_option_errors);
6037
6038 if (backup_options_ok == false)
6039 {
6040 if (exit_on_error == true)
6041 {
6042 log_error(_("error(s) encountered parsing \"pg_basebackup_options\""));
6043 print_error_list(&backup_option_errors, LOG_ERR);
6044 log_hint(_("\"pg_basebackup_options\" is: \"%s\""),
6045 config_file_options.pg_basebackup_options);
6046 exit(ERR_BAD_CONFIG);
6047 }
6048
6049 config_ok = false;
6050 }
6051
6052 if (strlen(backup_options.wal_method) && strcmp(backup_options.wal_method, "stream") != 0)
6053 wal_method_stream = false;
6054
6055 {
6056 char *levels_pre96[] = {
6057 "hot_standby",
6058 "logical",
6059 NULL,
6060 };
6061
6062 /*
6063 * Note that in 9.6+, "hot_standby" and "archive" are accepted as
6064 * aliases for "replica", but current_setting() will of course always
6065 * return "replica"
6066 */
6067 char *levels_96plus[] = {
6068 "replica",
6069 "logical",
6070 NULL,
6071 };
6072
6073 char **levels;
6074 int j = 0;
6075
6076 if (server_version_num < 90600)
6077 {
6078 levels = (char **) levels_pre96;
6079 wal_error_message = _("parameter \"wal_level\" must be set to \"hot_standby\" or \"logical\"");
6080 }
6081 else
6082 {
6083 levels = (char **) levels_96plus;
6084 wal_error_message = _("parameter \"wal_level\" must be set to \"replica\" or \"logical\"");
6085 }
6086
6087 do
6088 {
6089 i = guc_set(conn, "wal_level", "=", levels[j]);
6090 if (i)
6091 {
6092 break;
6093 }
6094 j++;
6095 } while (levels[j] != NULL);
6096 }
6097
6098 if (i == 0 || i == -1)
6099 {
6100 if (i == 0)
6101 {
6102 log_error("%s", wal_error_message);
6103 }
6104
6105 if (exit_on_error == true)
6106 {
6107 PQfinish(conn);
6108 exit(ERR_BAD_CONFIG);
6109 }
6110
6111 config_ok = false;
6112 }
6113
6114 if (config_file_options.use_replication_slots == true)
6115 {
6116 pg_setting_ok = get_pg_setting_int(conn, "max_replication_slots", &i);
6117
6118 if (pg_setting_ok == false || i < 1)
6119 {
6120 if (pg_setting_ok == true)
6121 {
6122 log_error(_("parameter \"max_replication_slots\" must be set to at least 1 to enable replication slots"));
6123 log_detail(_("current value is %i"), i);
6124 log_hint(_("\"max_replication_slots\" should be set to at least the number of expected standbys"));
6125 if (exit_on_error == true)
6126 {
6127 PQfinish(conn);
6128 exit(ERR_BAD_CONFIG);
6129 }
6130
6131 config_ok = false;
6132 }
6133 }
6134
6135 if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6136 {
6137 log_info(_("parameter \"max_replication_slots\" set to %i"), i);
6138 }
6139 }
6140 /*
6141 * physical replication slots not available or not requested - check if
6142 * there are any circumstances where "wal_keep_segments" should be set
6143 */
6144 else if (mode != barman)
6145 {
6146 bool check_wal_keep_segments = false;
6147
6148 /*
6149 * A non-zero "wal_keep_segments" value will almost certainly be
6150 * required if pg_basebackup is being used with --xlog-method=fetch,
6151 * *and* no restore command has been specified
6152 */
6153 if (wal_method_stream == false
6154 && strcmp(config_file_options.restore_command, "") == 0)
6155 {
6156 check_wal_keep_segments = true;
6157 }
6158
6159 if (check_wal_keep_segments == true)
6160 {
6161 const char *wal_keep_parameter_name = "wal_keep_size";
6162
6163 if (PQserverVersion(conn) < 130000)
6164 wal_keep_parameter_name = "wal_keep_segments";
6165
6166 pg_setting_ok = get_pg_setting_int(conn, wal_keep_parameter_name, &i);
6167
6168 if (pg_setting_ok == false || i < 1)
6169 {
6170 if (pg_setting_ok == true)
6171 {
6172 log_error(_("parameter \"%s\" on the upstream server must be be set to a non-zero value"),
6173 wal_keep_parameter_name);
6174 log_hint(_("Choose a value sufficiently high enough to retain enough WAL "
6175 "until the standby has been cloned and started.\n "
6176 "Alternatively set up WAL archiving using e.g. PgBarman and configure "
6177 "'restore_command' in repmgr.conf to fetch WALs from there."));
6178 log_hint(_("In PostgreSQL 9.4 and later, replication slots can be used, which "
6179 "do not require \"%s\" to be set "
6180 "(set parameter \"use_replication_slots\" in repmgr.conf to enable)\n"),
6181 wal_keep_parameter_name);
6182 }
6183
6184 if (exit_on_error == true)
6185 {
6186 PQfinish(conn);
6187 exit(ERR_BAD_CONFIG);
6188 }
6189
6190 config_ok = false;
6191 }
6192
6193 if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6194 {
6195 log_info(_("parameter \"%s\" set to %i"),
6196 wal_keep_parameter_name,
6197 i);
6198 }
6199 }
6200 }
6201
6202
6203 if (config_file_options.use_replication_slots == false)
6204 {
6205 log_info(_("replication slot usage not requested; no replication slot will be set up for this standby"));
6206 }
6207
6208
6209 /*
6210 * If archive_mode is enabled, check that 'archive_command' is non empty
6211 * (however it's not practical to check that it actually represents a
6212 * valid command).
6213 *
6214 * From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always'
6215 * so for ease of backwards compatibility, rather than explicitly check
6216 * for an enabled mode, check that it's not "off".
6217 */
6218
6219 if (guc_set(conn, "archive_mode", "!=", "off"))
6220 {
6221 i = guc_set(conn, "archive_command", "!=", "");
6222
6223 if (i == 0 || i == -1)
6224 {
6225 if (i == 0)
6226 log_error(_("parameter \"archive_command\" must be set to a valid command"));
6227
6228 if (exit_on_error == true)
6229 {
6230 PQfinish(conn);
6231 exit(ERR_BAD_CONFIG);
6232 }
6233
6234 config_ok = false;
6235 }
6236 }
6237
6238
6239 /*
6240 * Check that 'hot_standby' is on. This isn't strictly necessary for the
6241 * primary server, however the assumption is that we'll be cloning
6242 * standbys and thus copying the primary configuration; this way the
6243 * standby will be correctly configured by default.
6244 */
6245
6246 i = guc_set(conn, "hot_standby", "=", "on");
6247 if (i == 0 || i == -1)
6248 {
6249 if (i == 0)
6250 {
6251 log_error(_("parameter \"hot_standby\" must be set to \"on\""));
6252 }
6253
6254 if (exit_on_error == true)
6255 {
6256 PQfinish(conn);
6257 exit(ERR_BAD_CONFIG);
6258 }
6259
6260 config_ok = false;
6261 }
6262
6263 pg_setting_ok = get_pg_setting_int(conn, "max_wal_senders", &i);
6264
6265 if (pg_setting_ok == false || i < 1)
6266 {
6267 if (pg_setting_ok == true)
6268 {
6269 log_error(_("parameter \"max_wal_senders\" must be set to be at least %i"), i);
6270 log_hint(_("\"max_wal_senders\" should be set to at least the number of expected standbys"));
6271 }
6272
6273 if (exit_on_error == true)
6274 {
6275 PQfinish(conn);
6276 exit(ERR_BAD_CONFIG);
6277 }
6278
6279 config_ok = false;
6280 }
6281 else if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6282 {
6283 log_info(_("parameter \"max_wal_senders\" set to %i"), i);
6284 }
6285
6286 /*
6287 * If using pg_basebackup, ensure sufficient replication connections can
6288 * be made. There's no guarantee they'll still be available by the time
6289 * pg_basebackup is executed, but there's nothing we can do about that.
6290 * This check is mainly intended to warn about missing replication permissions
6291 * and/or lack of available walsenders.
6292 */
6293 if (mode == pg_basebackup)
6294 {
6295
6296 PGconn **connections;
6297 int i;
6298 int available_wal_senders;
6299 int min_replication_connections = 1;
6300 int possible_replication_connections = 0;
6301 t_conninfo_param_list repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
6302
6303
6304 /*
6305 * work out how many replication connections are required (1 or 2)
6306 */
6307
6308 if (wal_method_stream == true)
6309 min_replication_connections += 1;
6310
6311 log_notice(_("checking for available walsenders on the source node (%i required)"),
6312 min_replication_connections);
6313
6314 /*
6315 * check how many free walsenders are available
6316 */
6317 get_node_replication_stats(conn, upstream_node_record);
6318
6319 available_wal_senders = upstream_node_record->max_wal_senders -
6320 upstream_node_record->attached_wal_receivers;
6321
6322 if (available_wal_senders < min_replication_connections)
6323 {
6324 log_error(_("insufficient free walsenders on the source node"));
6325 log_detail(_("%i free walsenders required, %i free walsenders available"),
6326 min_replication_connections,
6327 available_wal_senders);
6328 log_hint(_("increase \"max_wal_senders\" on the source node by at least %i"),
6329 (upstream_node_record->attached_wal_receivers + min_replication_connections) - upstream_node_record->max_wal_senders);
6330
6331 if (exit_on_error == true)
6332 {
6333 PQfinish(conn);
6334 exit(ERR_BAD_CONFIG);
6335 }
6336 }
6337 else if (runtime_options.dry_run == true)
6338 {
6339 log_info(_("sufficient walsenders available on the source node"));
6340 log_detail(_("%i required, %i available"),
6341 min_replication_connections,
6342 available_wal_senders);
6343 }
6344
6345
6346 /*
6347 * Sufficient free walsenders appear to be available, check if
6348 * we can connect to them. We check that the required number
6349 * of connections can be made e.g. to rule out a very restrictive
6350 * "CONNECTION LIMIT" setting.
6351 */
6352
6353 log_notice(_("checking replication connections can be made to the source server (%i required)"),
6354 min_replication_connections);
6355
6356 /*
6357 * Make a copy of the connection parameter arrays, and append
6358 * "replication".
6359 */
6360 initialize_conninfo_params(&repl_conninfo, false);
6361
6362 conn_to_param_list(conn, &repl_conninfo);
6363
6364 param_set(&repl_conninfo, "replication", "1");
6365
6366 if (runtime_options.replication_user[0] != '\0')
6367 {
6368 param_set(&repl_conninfo, "user", runtime_options.replication_user);
6369 }
6370 else if (upstream_repluser[0] != '\0')
6371 {
6372 param_set(&repl_conninfo, "user", upstream_repluser);
6373 }
6374 else if (upstream_node_record->repluser[0] != '\0')
6375 {
6376 param_set(&repl_conninfo, "user", upstream_node_record->repluser);
6377 }
6378
6379 if (strcmp(param_get(&repl_conninfo, "user"), upstream_user) != 0)
6380 {
6381 param_set(&repl_conninfo, "dbname", "replication");
6382 }
6383
6384 connections = pg_malloc0(sizeof(PGconn *) * min_replication_connections);
6385
6386 /*
6387 * Attempt to create the minimum number of required concurrent
6388 * connections
6389 */
6390 for (i = 0; i < min_replication_connections; i++)
6391 {
6392 PGconn *replication_conn;
6393
6394 replication_conn = establish_db_connection_by_params(&repl_conninfo, false);
6395
6396 if (PQstatus(replication_conn) == CONNECTION_OK)
6397 {
6398 connections[i] = replication_conn;
6399 possible_replication_connections++;
6400 }
6401 }
6402
6403 /* Close previously created connections */
6404 for (i = 0; i < possible_replication_connections; i++)
6405 {
6406 PQfinish(connections[i]);
6407 }
6408
6409 pfree(connections);
6410 free_conninfo_params(&repl_conninfo);
6411
6412 if (possible_replication_connections < min_replication_connections)
6413 {
6414 config_ok = false;
6415
6416 log_error(_("unable to establish necessary replication connections"));
6417 log_hint(_("check replication permissions on the source server"));
6418
6419 if (exit_on_error == true)
6420 {
6421 PQfinish(conn);
6422 exit(ERR_BAD_CONFIG);
6423 }
6424 }
6425
6426 if (runtime_options.dry_run == true)
6427 {
6428 log_info(_("required number of replication connections could be made to the source server"));
6429 log_detail(_("%i replication connections required"),
6430 min_replication_connections);
6431 }
6432 else
6433 {
6434 log_verbose(LOG_INFO, _("sufficient replication connections could be made to the source server (%i required)"),
6435 min_replication_connections);
6436 }
6437 }
6438
6439 /*
6440 * Finally, add some checks for recommended settings
6441 */
6442
6443 {
6444 bool data_checksums = false;
6445 bool wal_log_hints = false;
6446
6447 /* data_checksums available from PostgreSQL 9.3; can be read by any user */
6448 if (get_pg_setting_bool(conn, "data_checksums", &data_checksums) == false)
6449 {
6450 /* highly unlikely this will happen */
6451 log_error(_("unable to determine value for \"data_checksums\""));
6452 exit(ERR_BAD_CONFIG);
6453 }
6454
6455 /* wal_log_hints available from PostgreSQL 9.4; can be read by any user */
6456 if (get_pg_setting_bool(conn, "wal_log_hints", &wal_log_hints) == false)
6457 {
6458 /* highly unlikely this will happen */
6459 log_error(_("unable to determine value for \"wal_log_hints\""));
6460 exit(ERR_BAD_CONFIG);
6461 }
6462
6463 if (data_checksums == false && wal_log_hints == false)
6464 {
6465 log_warning(_("data checksums are not enabled and \"wal_log_hints\" is \"off\""));
6466 log_detail(_("pg_rewind requires \"wal_log_hints\" to be enabled"));
6467 }
6468 }
6469
6470 return config_ok;
6471 }
6472
6473
6474 /*
6475 * initialise_direct_clone()
6476 *
6477 * In pg_basebackup mode, configure the target data directory
6478 * if necessary, and fetch information about tablespaces and configuration
6479 * files.
6480 *
6481 * Event(s):
6482 * - standby_clone
6483 */
6484 static void
initialise_direct_clone(t_node_info * local_node_record,t_node_info * upstream_node_record)6485 initialise_direct_clone(t_node_info *local_node_record, t_node_info *upstream_node_record)
6486 {
6487 /*
6488 * Check the destination data directory can be used (in Barman mode, this
6489 * directory will already have been created)
6490 */
6491
6492 if (!create_pg_dir(local_data_directory, runtime_options.force))
6493 {
6494 log_error(_("unable to use directory \"%s\""),
6495 local_data_directory);
6496 log_hint(_("use -F/--force to force this directory to be overwritten"));
6497 exit(ERR_BAD_CONFIG);
6498 }
6499
6500 /*
6501 * Check that tablespaces named in any `tablespace_mapping` configuration
6502 * file parameters exist.
6503 *
6504 * pg_basebackup doesn't verify mappings, so any errors will not be
6505 * caught. We'll do that here as a value-added service.
6506 *
6507 */
6508
6509 if (config_file_options.tablespace_mapping.head != NULL)
6510 {
6511 TablespaceListCell *cell;
6512 KeyValueList not_found = {NULL, NULL};
6513 int total = 0,
6514 matched = 0;
6515 bool success = false;
6516
6517 for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
6518 {
6519 char *old_dir_escaped = escape_string(source_conn, cell->old_dir);
6520 char name[MAXLEN] = "";
6521
6522 success = get_tablespace_name_by_location(source_conn, old_dir_escaped, name);
6523 pfree(old_dir_escaped);
6524
6525 if (success == true)
6526 {
6527 matched++;
6528 }
6529 else
6530 {
6531 key_value_list_set(¬_found,
6532 cell->old_dir,
6533 "");
6534 }
6535
6536 total++;
6537 }
6538
6539 if (not_found.head != NULL)
6540 {
6541 PQExpBufferData detail;
6542 KeyValueListCell *kv_cell;
6543
6544 log_error(_("%i of %i mapped tablespaces not found"),
6545 total - matched, total);
6546
6547 initPQExpBuffer(&detail);
6548
6549 for (kv_cell = not_found.head; kv_cell; kv_cell = kv_cell->next)
6550 {
6551 appendPQExpBuffer(
6552 &detail,
6553 " %s\n", kv_cell->key);
6554 }
6555
6556 log_detail(_("following tablespaces not found:\n%s"),
6557 detail.data);
6558 termPQExpBuffer(&detail);
6559
6560 exit(ERR_BAD_CONFIG);
6561 }
6562 }
6563
6564
6565 /*
6566 * If replication slots requested, create appropriate slot on the source
6567 * node; this must be done before pg_basebackup is called.
6568 *
6569 * Note: if the source node is different to the specified upstream node,
6570 * we'll need to drop the slot and recreate it on the upstream.
6571 *
6572 * TODO: skip this for Pg10, and ensure temp slot option used
6573 *
6574 * Replication slots are not supported (and not very useful anyway) in
6575 * Barman mode.
6576 */
6577
6578 if (config_file_options.use_replication_slots == true)
6579 {
6580 PQExpBufferData event_details;
6581
6582 initPQExpBuffer(&event_details);
6583
6584 if (create_replication_slot(source_conn, local_node_record->slot_name, upstream_node_record, &event_details) == false)
6585 {
6586 log_error("%s", event_details.data);
6587
6588 create_event_notification(primary_conn,
6589 &config_file_options,
6590 config_file_options.node_id,
6591 "standby_clone",
6592 false,
6593 event_details.data);
6594
6595 PQfinish(source_conn);
6596
6597 exit(ERR_DB_QUERY);
6598 }
6599
6600 termPQExpBuffer(&event_details);
6601
6602 log_verbose(LOG_INFO,
6603 _("replication slot \"%s\" created on source node"),
6604 local_node_record->slot_name);
6605 }
6606
6607 return;
6608 }
6609
6610
6611 static int
run_basebackup(t_node_info * node_record)6612 run_basebackup(t_node_info *node_record)
6613 {
6614 PQExpBufferData params;
6615 PQExpBufferData script;
6616
6617 int r = SUCCESS;
6618
6619 TablespaceListCell *cell = NULL;
6620 t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6621
6622 /*
6623 * Parse the pg_basebackup_options provided in repmgr.conf - we'll want to
6624 * check later whether certain options were set by the user
6625 */
6626 parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
6627 &backup_options,
6628 source_server_version_num,
6629 NULL);
6630
6631 /* Create pg_basebackup command line options */
6632
6633 initPQExpBuffer(¶ms);
6634
6635 appendPQExpBuffer(¶ms, " -D %s", local_data_directory);
6636
6637 /*
6638 * conninfo string provided - pass it to pg_basebackup as the -d option
6639 * (pg_basebackup doesn't require or want a database name, but for
6640 * consistency with other applications accepts a conninfo string under
6641 * -d/--dbname)
6642 */
6643 if (runtime_options.conninfo_provided == true)
6644 {
6645 t_conninfo_param_list conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
6646 char *conninfo_str = NULL;
6647
6648 initialize_conninfo_params(&conninfo, false);
6649
6650 /* string will already have been parsed */
6651 (void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false);
6652
6653 if (runtime_options.replication_user[0] != '\0')
6654 {
6655 param_set(&conninfo, "user", runtime_options.replication_user);
6656 }
6657 else if (upstream_repluser[0] != '\0')
6658 {
6659 param_set(&conninfo, "user", upstream_repluser);
6660 }
6661 else
6662 {
6663 param_set(&conninfo, "user", node_record->repluser);
6664 }
6665
6666 conninfo_str = param_list_to_string(&conninfo);
6667
6668 appendPQExpBuffer(¶ms, " -d '%s'", conninfo_str);
6669
6670 pfree(conninfo_str);
6671 }
6672
6673 /*
6674 * Connection parameters not passed to repmgr as conninfo string - provide
6675 * them individually to pg_basebackup (-d/--dbname not required)
6676 */
6677 else
6678 {
6679 if (strlen(runtime_options.host))
6680 {
6681 appendPQExpBuffer(¶ms, " -h %s", runtime_options.host);
6682 }
6683
6684 if (strlen(runtime_options.port))
6685 {
6686 appendPQExpBuffer(¶ms, " -p %s", runtime_options.port);
6687 }
6688
6689 if (strlen(runtime_options.replication_user))
6690 {
6691 appendPQExpBuffer(¶ms, " -U %s", runtime_options.replication_user);
6692 }
6693 else if (strlen(upstream_repluser))
6694 {
6695 appendPQExpBuffer(¶ms, " -U %s", upstream_repluser);
6696 }
6697 else if (strlen(node_record->repluser))
6698 {
6699 appendPQExpBuffer(¶ms, " -U %s", node_record->repluser);
6700 }
6701 else if (strlen(runtime_options.username))
6702 {
6703 appendPQExpBuffer(¶ms, " -U %s", runtime_options.username);
6704 }
6705
6706 }
6707
6708 if (runtime_options.fast_checkpoint)
6709 {
6710 appendPQExpBufferStr(¶ms, " -c fast");
6711 }
6712
6713 if (config_file_options.tablespace_mapping.head != NULL)
6714 {
6715 for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
6716 {
6717 appendPQExpBuffer(¶ms, " -T %s=%s", cell->old_dir, cell->new_dir);
6718 }
6719 }
6720
6721 /*
6722 * To ensure we have all the WALs needed during basebackup execution we
6723 * stream them as the backup is taking place.
6724 *
6725 * From 9.6, if replication slots are in use, we'll have previously
6726 * created a slot with reserved LSN, and will stream from that slot to
6727 * avoid WAL buildup on the primary using the -S/--slot, which requires
6728 * -X/--xlog-method=stream (from 10, -X/--wal-method=stream)
6729 */
6730 if (!strlen(backup_options.wal_method))
6731 {
6732 appendPQExpBufferStr(¶ms, " -X stream");
6733 }
6734
6735 /*
6736 * From 9.6, pg_basebackup accepts -S/--slot, which forces WAL streaming
6737 * to use the specified replication slot. If replication slot usage is
6738 * specified, the slot will already have been created.
6739 *
6740 * NOTE: currently there's no way of disabling the --slot option while
6741 * using --xlog-method=stream - it's hard to imagine a use case for this,
6742 * so no provision has been made for doing it.
6743 *
6744 * NOTE: It's possible to set 'pg_basebackup_options' with an invalid
6745 * combination of values for --wal-method (--xlog-method) and --slot -
6746 * we're not checking that, just that we're not overriding any
6747 * user-supplied values
6748 */
6749 if (source_server_version_num >= 90600 && config_file_options.use_replication_slots)
6750 {
6751 bool slot_add = true;
6752
6753 /*
6754 * Check whether 'pg_basebackup_options' in repmgr.conf has the --slot
6755 * option set, or if --wal-method (--xlog-method) is set to a value
6756 * other than "stream" (in which case we can't use --slot).
6757 */
6758 if (strlen(backup_options.slot) || (strlen(backup_options.wal_method) && strcmp(backup_options.wal_method, "stream") != 0))
6759 {
6760 slot_add = false;
6761 }
6762
6763 if (slot_add == true)
6764 {
6765 appendPQExpBuffer(¶ms, " -S %s", node_record->slot_name);
6766 }
6767 }
6768
6769 initPQExpBuffer(&script);
6770 make_pg_path(&script, "pg_basebackup");
6771
6772 appendPQExpBuffer(&script,
6773 " -l \"repmgr base backup\" %s %s",
6774 params.data,
6775 config_file_options.pg_basebackup_options);
6776
6777 termPQExpBuffer(¶ms);
6778
6779 log_info(_("executing:\n %s"), script.data);
6780
6781 /*
6782 * As of 9.4, pg_basebackup only ever returns 0 or 1
6783 */
6784
6785 r = system(script.data);
6786
6787 termPQExpBuffer(&script);
6788
6789 if (r != 0)
6790 return ERR_BAD_BASEBACKUP;
6791
6792 /* check connections are still available */
6793 (void)connection_ping_reconnect(primary_conn);
6794
6795 if (source_conn != primary_conn)
6796 (void)connection_ping_reconnect(source_conn);
6797
6798 /*
6799 * If replication slots in use, check the created slot is on the correct
6800 * node; the slot will initially get created on the source node, and will
6801 * need to be dropped and recreated on the actual upstream node if these
6802 * differ.
6803 */
6804 if (config_file_options.use_replication_slots && upstream_node_id != UNKNOWN_NODE_ID)
6805 {
6806 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
6807 t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
6808 RecordStatus record_status = RECORD_NOT_FOUND;
6809 bool slot_exists_on_upstream = false;
6810
6811 record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
6812
6813 /*
6814 * If there's no upstream record, there's no point in trying to create
6815 * a replication slot on the designated upstream, as the assumption is
6816 * it won't exist at this point.
6817 */
6818 if (record_status != RECORD_FOUND)
6819 {
6820 log_warning(_("no record exists for designated upstream node %i"),
6821 upstream_node_id);
6822 log_hint(_("you'll need to create the replication slot (\"%s\") manually"),
6823 node_record->slot_name);
6824 }
6825 else
6826 {
6827 PGconn *upstream_conn = NULL;
6828
6829 upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
6830
6831 /*
6832 * It's possible the upstream node is not yet running, in which case we'll
6833 * have to rely on the user taking action to create the slot
6834 */
6835 if (PQstatus(upstream_conn) != CONNECTION_OK)
6836 {
6837 log_warning(_("unable to connect to upstream node to create replication slot"));
6838 /*
6839 * TODO: if slot creation also handled by "standby register", update warning
6840 */
6841 log_hint(_("you may need to create the replication slot manually"));
6842 }
6843 else
6844 {
6845 record_status = get_slot_record(upstream_conn, node_record->slot_name, &slot_info);
6846
6847 if (record_status == RECORD_FOUND)
6848 {
6849 log_verbose(LOG_INFO,
6850 _("replication slot \"%s\" already exists on upstream node %i"),
6851 node_record->slot_name,
6852 upstream_node_id);
6853 slot_exists_on_upstream = true;
6854 }
6855 else
6856 {
6857 PQExpBufferData event_details;
6858
6859 log_notice(_("creating replication slot \"%s\" on upstream node %i"),
6860 node_record->slot_name,
6861 upstream_node_id);
6862
6863 initPQExpBuffer(&event_details);
6864 if (create_replication_slot(upstream_conn, node_record->slot_name, &upstream_node_record, &event_details) == false)
6865 {
6866 log_error("%s", event_details.data);
6867
6868 create_event_notification(primary_conn,
6869 &config_file_options,
6870 config_file_options.node_id,
6871 "standby_clone",
6872 false,
6873 event_details.data);
6874
6875 PQfinish(source_conn);
6876
6877 exit(ERR_DB_QUERY);
6878 }
6879
6880 termPQExpBuffer(&event_details);
6881 }
6882
6883 PQfinish(upstream_conn);
6884 }
6885 }
6886
6887 if (slot_info.active == false)
6888 {
6889 if (slot_exists_on_upstream == false)
6890 {
6891
6892 /* delete slot on source server */
6893
6894 if (drop_replication_slot_if_exists(source_conn, UNKNOWN_NODE_ID, node_record->slot_name) == true)
6895 {
6896 log_notice(_("replication slot \"%s\" deleted on source node"),
6897 node_record->slot_name);
6898 }
6899 else
6900 {
6901 log_error(_("unable to delete replication slot \"%s\" on source node"),
6902 node_record->slot_name);
6903 }
6904 }
6905 }
6906
6907 /*
6908 * if replication slot is still active (shouldn't happen), emit a
6909 * warning
6910 */
6911 else
6912 {
6913 log_warning(_("replication slot \"%s\" is still active on source node"),
6914 node_record->slot_name);
6915 }
6916 }
6917
6918 return SUCCESS;
6919 }
6920
6921
6922 /*
6923 * Perform a filesystem backup using rsync.
6924 *
6925 * From repmgr 4 this is only used for Barman backups.
6926 */
6927 static int
run_file_backup(t_node_info * local_node_record)6928 run_file_backup(t_node_info *local_node_record)
6929 {
6930 int r = SUCCESS,
6931 i;
6932
6933 char command[MAXLEN] = "";
6934 char filename[MAXLEN] = "";
6935 char buf[MAXLEN] = "";
6936 char basebackups_directory[MAXLEN] = "";
6937 char backup_id[MAXLEN] = "";
6938 TablespaceDataList tablespace_list = {NULL, NULL};
6939 TablespaceDataListCell *cell_t = NULL;
6940
6941 PQExpBufferData tablespace_map;
6942 bool tablespace_map_rewrite = false;
6943
6944 /* For the foreseeable future, no other modes are supported */
6945 Assert(mode == barman);
6946 if (mode == barman)
6947 {
6948 t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6949
6950 /*
6951 * Locate Barman's base backups directory
6952 */
6953
6954 get_barman_property(basebackups_directory, "basebackups_directory", local_repmgr_tmp_directory);
6955
6956 /*
6957 * Read the list of backup files into a local file. In the process:
6958 *
6959 * - determine the backup ID
6960 * - check, and remove, the prefix
6961 * - detect tablespaces
6962 * - filter files in one list per tablespace
6963 */
6964 {
6965 FILE *fi; /* input stream */
6966 FILE *fd; /* output for data.txt */
6967 char prefix[MAXLEN] = "";
6968 char output[MAXLEN] = "";
6969 int n = 0;
6970 char *p = NULL,
6971 *q = NULL;
6972
6973 maxlen_snprintf(command, "%s list-files --target=data %s latest",
6974 make_barman_ssh_command(barman_command_buf),
6975 config_file_options.barman_server);
6976
6977 log_verbose(LOG_DEBUG, "executing:\n %s", command);
6978
6979 fi = popen(command, "r");
6980 if (fi == NULL)
6981 {
6982 log_error("cannot launch command: %s", command);
6983 exit(ERR_BARMAN);
6984 }
6985
6986 fd = fopen(datadir_list_filename, "w");
6987 if (fd == NULL)
6988 {
6989 log_error("cannot open file: %s", datadir_list_filename);
6990 exit(ERR_BARMAN);
6991 }
6992
6993 maxlen_snprintf(prefix, "%s/", basebackups_directory);
6994
6995 while (fgets(output, MAXLEN, fi) != NULL)
6996 {
6997 /*
6998 * Remove prefix
6999 */
7000 p = string_skip_prefix(prefix, output);
7001
7002 if (p == NULL)
7003 {
7004 log_error("unexpected output from \"barman list-files\"");
7005 log_detail("%s", output);
7006 exit(ERR_BARMAN);
7007 }
7008
7009 /*
7010 * Remove and note backup ID; copy backup.info
7011 */
7012 if (!strcmp(backup_id, ""))
7013 {
7014 FILE *fi2;
7015
7016 n = strcspn(p, "/");
7017
7018 strncpy(backup_id, p, n);
7019
7020 strncat(prefix, backup_id, MAXLEN - 1);
7021 strncat(prefix, "/", MAXLEN - 1);
7022 p = string_skip_prefix(backup_id, p);
7023
7024 if (p == NULL)
7025 {
7026 log_error("unexpected output from \"barman list-files\"");
7027 log_detail("%s", output);
7028 exit(ERR_BARMAN);
7029 }
7030
7031 p = string_skip_prefix("/", p);
7032
7033 /*
7034 * Copy backup.info
7035 */
7036 maxlen_snprintf(command,
7037 "rsync -a %s:%s/%s/backup.info %s",
7038 config_file_options.barman_host,
7039 basebackups_directory,
7040 backup_id,
7041 local_repmgr_tmp_directory);
7042
7043 (void) local_command(command,
7044 NULL);
7045
7046 /*
7047 * Get tablespace data
7048 */
7049 maxlen_snprintf(filename, "%s/backup.info",
7050 local_repmgr_tmp_directory);
7051 fi2 = fopen(filename, "r");
7052 if (fi2 == NULL)
7053 {
7054 log_error("cannot open file: %s", filename);
7055 exit(ERR_INTERNAL);
7056 }
7057 while (fgets(buf, MAXLEN, fi2) != NULL)
7058 {
7059 q = string_skip_prefix("tablespaces=", buf);
7060 if (q != NULL && strncmp(q, "None\n", 5))
7061 {
7062 get_tablespace_data_barman(q, &tablespace_list);
7063 }
7064 q = string_skip_prefix("version=", buf);
7065 if (q != NULL)
7066 {
7067 source_server_version_num = strtol(q, NULL, 10);
7068 }
7069 }
7070 fclose(fi2);
7071 unlink(filename);
7072
7073 continue;
7074 }
7075
7076 /*
7077 * Skip backup.info
7078 */
7079 if (string_skip_prefix("backup.info", p))
7080 continue;
7081
7082 /*
7083 * Filter data directory files
7084 */
7085 if ((q = string_skip_prefix("data/", p)) != NULL)
7086 {
7087 fputs(q, fd);
7088 continue;
7089 }
7090
7091 /*
7092 * Filter other files (i.e. tablespaces)
7093 */
7094 for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
7095 {
7096 if ((q = string_skip_prefix(cell_t->oid, p)) != NULL && *q == '/')
7097 {
7098 if (cell_t->fptr == NULL)
7099 {
7100 maxlen_snprintf(filename, "%s/%s.txt", local_repmgr_tmp_directory, cell_t->oid);
7101 cell_t->fptr = fopen(filename, "w");
7102 if (cell_t->fptr == NULL)
7103 {
7104 log_error("cannot open file: %s", filename);
7105 exit(ERR_INTERNAL);
7106 }
7107 }
7108 fputs(q + 1, cell_t->fptr);
7109 break;
7110 }
7111 }
7112 }
7113
7114 fclose(fd);
7115
7116 pclose(fi);
7117 }
7118
7119 /* For 9.5 and greater, create our own tablespace_map file */
7120 if (source_server_version_num >= 90500)
7121 {
7122 initPQExpBuffer(&tablespace_map);
7123 }
7124
7125 /*
7126 * As of Barman version 1.6.1, the file structure of a backup is as
7127 * follows:
7128 *
7129 * base/ - base backup wals/ - WAL files associated to the backup
7130 *
7131 * base/<ID> - backup files
7132 *
7133 * here ID has the standard timestamp form yyyymmddThhmmss
7134 *
7135 * base/<ID>/backup.info - backup metadata, in text format
7136 * base/<ID>/data - data directory base/<ID>/<OID> -
7137 * tablespace with the given oid
7138 */
7139
7140 /*
7141 * Copy all backup files from the Barman server
7142 */
7143 maxlen_snprintf(command,
7144 "rsync --progress -a --files-from=%s %s:%s/%s/data %s",
7145 datadir_list_filename,
7146 config_file_options.barman_host,
7147 basebackups_directory,
7148 backup_id,
7149 local_data_directory);
7150
7151 (void) local_command(
7152 command,
7153 NULL);
7154
7155 unlink(datadir_list_filename);
7156
7157 /*
7158 * At this point we should have the source server version number.
7159 * If not, try and extract it from the data directory.
7160 */
7161 if (source_server_version_num == UNKNOWN_SERVER_VERSION_NUM)
7162 {
7163 log_warning(_("server version number is unknown"));
7164 source_server_version_num = get_pg_version(local_data_directory, NULL);
7165
7166 /*
7167 * In the unlikely we are still unable to obtain the server
7168 * version number, there's not a lot which can be done.
7169 */
7170 if (source_server_version_num == UNKNOWN_SERVER_VERSION_NUM)
7171 {
7172 log_error(_("unable to extract server version number from the data directory, aborting"));
7173 exit(ERR_BAD_CONFIG);
7174 }
7175 log_notice(_("server version number is: %i"), source_server_version_num);
7176 }
7177
7178 /*
7179 * Parse the pg_basebackup_options provided in repmgr.conf - we need to
7180 * check if --waldir/--xlogdir was provided.
7181 */
7182 parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
7183 &backup_options,
7184 source_server_version_num,
7185 NULL);
7186
7187
7188 /*
7189 * We must create some PGDATA subdirectories because they are not
7190 * included in the Barman backup.
7191 *
7192 * See class RsyncBackupExecutor in the Barman source
7193 * (barman/backup_executor.py) for a definitive list of excluded
7194 * directories.
7195 */
7196 {
7197 const char *const dirs[] = {
7198 /* Only from 10 */
7199 "pg_wal",
7200 /* Only from 9.5 */
7201 "pg_commit_ts",
7202 /* Only from 9.4 */
7203 "pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
7204 /* Present in all versions from 9.3 */
7205 "pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp",
7206 "pg_subtrans", "pg_tblspc", "pg_twophase",
7207 /* Present from at least 9.3, but removed in 10 */
7208 "pg_xlog",
7209 /* Array delimiter */
7210 0
7211 };
7212
7213 /*
7214 * This array determines the major version each of the above directories
7215 * first appears in; or if the value is negative, which from major version
7216 * the directory does not appear in.
7217 */
7218 const int vers[] = {
7219 100000,
7220 90500,
7221 90400, 90400, 90400, 90400, 90400,
7222 0, 0, 0, 0, 0,
7223 0, 0, 0,
7224 -100000
7225 };
7226
7227 for (i = 0; dirs[i]; i++)
7228 {
7229 /* directory exists in newer versions than this server - skip */
7230 if (vers[i] > 0 && source_server_version_num < vers[i])
7231 continue;
7232
7233 /*
7234 * directory existed in earlier versions than this server but
7235 * has been removed/renamed - skip
7236 */
7237 if (vers[i] < 0 && source_server_version_num >= abs(vers[i]))
7238 continue;
7239
7240 maxlen_snprintf(filename, "%s/%s", local_data_directory, dirs[i]);
7241
7242 /*
7243 * If --waldir/--xlogdir specified in "pg_basebackup_options",
7244 * create a symlink rather than make a directory.
7245 */
7246 if (strcmp(dirs[i], "pg_wal") == 0 || strcmp(dirs[i], "pg_xlog") == 0)
7247 {
7248 if (backup_options.waldir[0] != '\0')
7249 {
7250 if (create_pg_dir(backup_options.waldir, false) == false)
7251 {
7252 /* create_pg_dir() will log specifics */
7253 log_error(_("unable to create an empty directory for WAL files"));
7254 log_hint(_("see preceding error messages"));
7255 exit(ERR_BAD_CONFIG);
7256 }
7257
7258 if (symlink(backup_options.waldir, filename) != 0)
7259 {
7260 log_error(_("could not create symbolic link \"%s\""), filename);
7261 exit(ERR_BAD_CONFIG);
7262 }
7263 continue;
7264 }
7265 }
7266
7267 if (mkdir(filename, S_IRWXU) != 0 && errno != EEXIST)
7268 {
7269 log_error(_("unable to create the %s directory"), dirs[i]);
7270 exit(ERR_INTERNAL);
7271 }
7272 }
7273 }
7274 }
7275
7276
7277 for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
7278 {
7279 bool mapping_found = false;
7280 TablespaceListCell *cell = NULL;
7281 char *tblspc_dir_dest = NULL;
7282
7283 /*
7284 * Check if tablespace path matches one of the provided tablespace
7285 * mappings
7286 */
7287 if (config_file_options.tablespace_mapping.head != NULL)
7288 {
7289 for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
7290 {
7291 if (strcmp(cell_t->location, cell->old_dir) == 0)
7292 {
7293 mapping_found = true;
7294 break;
7295 }
7296 }
7297 }
7298
7299 if (mapping_found == true)
7300 {
7301 tblspc_dir_dest = cell->new_dir;
7302 log_debug(_("mapping source tablespace \"%s\" (OID %s) to \"%s\""),
7303 cell_t->location, cell_t->oid, tblspc_dir_dest);
7304 }
7305 else
7306 {
7307 tblspc_dir_dest = cell_t->location;
7308 }
7309
7310 /*
7311 * Tablespace file copy
7312 */
7313
7314 if (mode == barman)
7315 {
7316 create_pg_dir(tblspc_dir_dest, false);
7317
7318 if (cell_t->fptr != NULL) /* cell_t->fptr == NULL iff the tablespace is
7319 * empty */
7320 {
7321 /* close the file to ensure the contents are flushed to disk */
7322 fclose(cell_t->fptr);
7323
7324 maxlen_snprintf(command,
7325 "rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
7326 local_repmgr_tmp_directory,
7327 cell_t->oid,
7328 config_file_options.barman_host,
7329 basebackups_directory,
7330 backup_id,
7331 cell_t->oid,
7332 tblspc_dir_dest);
7333 (void) local_command(command,
7334 NULL);
7335 maxlen_snprintf(filename,
7336 "%s/%s.txt",
7337 local_repmgr_tmp_directory,
7338 cell_t->oid);
7339 unlink(filename);
7340 }
7341 }
7342
7343
7344 /*
7345 * If a valid mapping was provided for this tablespace, arrange for it
7346 * to be remapped (if no tablespace mapping was provided, the link
7347 * will be copied as-is by pg_basebackup and no action is required)
7348 */
7349 if (mapping_found == true || mode == barman)
7350 {
7351 /* 9.5 and later - append to the tablespace_map file */
7352 if (source_server_version_num >= 90500)
7353 {
7354 tablespace_map_rewrite = true;
7355 appendPQExpBuffer(&tablespace_map,
7356 "%s %s\n",
7357 cell_t->oid,
7358 tblspc_dir_dest);
7359 }
7360
7361 /*
7362 * Pre-9.5, we have to manipulate the symlinks in pg_tblspc/
7363 * ourselves
7364 */
7365 else
7366 {
7367 PQExpBufferData tblspc_symlink;
7368
7369 initPQExpBuffer(&tblspc_symlink);
7370 appendPQExpBuffer(&tblspc_symlink, "%s/pg_tblspc/%s",
7371 local_data_directory,
7372 cell_t->oid);
7373
7374 if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
7375 {
7376 log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
7377 log_detail("%s", strerror(errno));
7378 r = ERR_BAD_BASEBACKUP;
7379 goto stop_backup;
7380 }
7381
7382 if (symlink(tblspc_dir_dest, tblspc_symlink.data) < 0)
7383 {
7384 log_error(_("unable to create tablespace symlink from %s to %s"), tblspc_symlink.data, tblspc_dir_dest);
7385
7386 r = ERR_BAD_BASEBACKUP;
7387 goto stop_backup;
7388 }
7389 }
7390 }
7391 }
7392
7393 /*
7394 * For 9.5 and later, if tablespace remapping was requested, we'll need to
7395 * rewrite the tablespace map file ourselves. The tablespace map file is
7396 * read on startup and any links created by the backend; we could do this
7397 * ourselves like for pre-9.5 servers, but it's better to rely on
7398 * functionality the backend provides.
7399 */
7400
7401 if (source_server_version_num >= 90500 && tablespace_map_rewrite == true)
7402 {
7403 PQExpBufferData tablespace_map_filename;
7404 FILE *tablespace_map_file;
7405
7406 initPQExpBuffer(&tablespace_map_filename);
7407 appendPQExpBuffer(&tablespace_map_filename, "%s/%s",
7408 local_data_directory,
7409 TABLESPACE_MAP);
7410
7411 /*
7412 * Unlink any existing file (it should be there, but we don't care if
7413 * it isn't)
7414 */
7415 if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
7416 {
7417 log_error(_("unable to remove tablespace_map file \"%s\""),
7418 tablespace_map_filename.data);
7419 log_detail("%s", strerror(errno));
7420
7421 r = ERR_BAD_BASEBACKUP;
7422 goto stop_backup;
7423 }
7424
7425 tablespace_map_file = fopen(tablespace_map_filename.data, "w");
7426 if (tablespace_map_file == NULL)
7427 {
7428 log_error(_("unable to create tablespace_map file \"%s\""), tablespace_map_filename.data);
7429
7430 r = ERR_BAD_BASEBACKUP;
7431 goto stop_backup;
7432 }
7433
7434 if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
7435 {
7436 fclose(tablespace_map_file);
7437
7438 log_error(_("unable to write to tablespace_map file \"%s\""), tablespace_map_filename.data);
7439
7440 r = ERR_BAD_BASEBACKUP;
7441 goto stop_backup;
7442 }
7443
7444 fclose(tablespace_map_file);
7445
7446 termPQExpBuffer(&tablespace_map_filename);
7447 termPQExpBuffer(&tablespace_map);
7448 }
7449
7450 stop_backup:
7451
7452 if (mode == barman)
7453 {
7454 /*
7455 * In Barman mode, remove local_repmgr_tmp_directory,
7456 * which contains various temporary files containing Barman metadata.
7457 */
7458 rmtree(local_repmgr_tmp_directory, true);
7459 }
7460
7461
7462 /*
7463 * if replication slots in use, create replication slot
7464 */
7465 if (r == SUCCESS)
7466 {
7467 if (config_file_options.use_replication_slots == true)
7468 {
7469 bool slot_warning = false;
7470 if (runtime_options.no_upstream_connection == true)
7471 {
7472 slot_warning = true;
7473 }
7474 else
7475 {
7476 t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
7477 t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
7478 RecordStatus record_status = RECORD_NOT_FOUND;
7479 PGconn *upstream_conn = NULL;
7480
7481
7482 /* check connections are still available */
7483 (void)connection_ping_reconnect(primary_conn);
7484
7485 if (source_conn != primary_conn)
7486 (void)connection_ping_reconnect(source_conn);
7487
7488 (void)connection_ping_reconnect(source_conn);
7489
7490 record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
7491
7492 if (record_status != RECORD_FOUND)
7493 {
7494 log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
7495 slot_warning = true;
7496 }
7497 else
7498 {
7499 upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
7500 if (PQstatus(upstream_conn) != CONNECTION_OK)
7501 {
7502 log_error(_("unable to connect to upstream node %i to create a replication slot"), upstream_node_id);
7503 slot_warning = true;
7504 }
7505 else
7506 {
7507 record_status = get_slot_record(upstream_conn, local_node_record->slot_name, &slot_info);
7508
7509 if (record_status == RECORD_FOUND)
7510 {
7511 log_verbose(LOG_INFO,
7512 _("replication slot \"%s\" aleady exists on upstream node %i"),
7513 local_node_record->slot_name,
7514 upstream_node_id);
7515 }
7516 else
7517 {
7518 PQExpBufferData errmsg;
7519 bool success;
7520
7521 initPQExpBuffer(&errmsg);
7522 success = create_replication_slot(upstream_conn,
7523 local_node_record->slot_name,
7524 &upstream_node_record,
7525 &errmsg);
7526 if (success == false)
7527 {
7528 log_error(_("unable to create replication slot \"%s\" on upstream node %i"),
7529 local_node_record->slot_name,
7530 upstream_node_id);
7531 log_detail("%s", errmsg.data);
7532 slot_warning = true;
7533 }
7534 else
7535 {
7536 log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
7537 local_node_record->slot_name,
7538 upstream_node_record.node_name,
7539 upstream_node_id );
7540 }
7541 termPQExpBuffer(&errmsg);
7542 }
7543
7544 PQfinish(upstream_conn);
7545 }
7546 }
7547 }
7548
7549
7550 if (slot_warning == true)
7551 {
7552 log_warning(_("\"use_replication_slots\" specified but a replication slot could not be created"));
7553 log_hint(_("ensure a replication slot called \"%s\" is created on the upstream node (ID: %i)"),
7554 local_node_record->slot_name,
7555 upstream_node_id);
7556 }
7557 }
7558 }
7559
7560 return r;
7561 }
7562
7563
7564 static char *
make_barman_ssh_command(char * buf)7565 make_barman_ssh_command(char *buf)
7566 {
7567 static char config_opt[MAXLEN] = "";
7568
7569 if (strlen(config_file_options.barman_config))
7570 maxlen_snprintf(config_opt,
7571 " --config=%s",
7572 config_file_options.barman_config);
7573
7574 maxlen_snprintf(buf,
7575 "ssh %s barman%s",
7576 config_file_options.barman_host,
7577 config_opt);
7578
7579 return buf;
7580 }
7581
7582
7583 static int
get_tablespace_data_barman(char * tablespace_data_barman,TablespaceDataList * tablespace_list)7584 get_tablespace_data_barman(char *tablespace_data_barman,
7585 TablespaceDataList *tablespace_list)
7586 {
7587 /*
7588 * Example: [('main', 24674, '/var/lib/postgresql/tablespaces/9.5/main'),
7589 * ('alt', 24678, '/var/lib/postgresql/tablespaces/9.5/alt')]
7590 */
7591
7592 char name[MAXLEN] = "";
7593 char oid[MAXLEN] = "";
7594 char location[MAXPGPATH] = "";
7595 char *p = tablespace_data_barman;
7596 int i = 0;
7597
7598 tablespace_list->head = NULL;
7599 tablespace_list->tail = NULL;
7600
7601 p = string_skip_prefix("[", p);
7602 if (p == NULL)
7603 return -1;
7604
7605 while (*p == '(')
7606 {
7607 p = string_skip_prefix("('", p);
7608 if (p == NULL)
7609 return -1;
7610
7611 i = strcspn(p, "'");
7612 strncpy(name, p, i);
7613 name[i] = 0;
7614
7615 p = string_skip_prefix("', ", p + i);
7616 if (p == NULL)
7617 return -1;
7618
7619 i = strcspn(p, ",");
7620 strncpy(oid, p, i);
7621 oid[i] = 0;
7622
7623 p = string_skip_prefix(", '", p + i);
7624 if (p == NULL)
7625 return -1;
7626
7627 i = strcspn(p, "'");
7628 strncpy(location, p, i);
7629 location[i] = 0;
7630
7631 p = string_skip_prefix("')", p + i);
7632 if (p == NULL)
7633 return -1;
7634
7635 tablespace_data_append(tablespace_list, name, oid, location);
7636
7637 if (*p == ']')
7638 break;
7639
7640 p = string_skip_prefix(", ", p);
7641 if (p == NULL)
7642 return -1;
7643 }
7644
7645 return SUCCESS;
7646 }
7647
7648
7649 void
get_barman_property(char * dst,char * name,char * local_repmgr_directory)7650 get_barman_property(char *dst, char *name, char *local_repmgr_directory)
7651 {
7652 PQExpBufferData command_output;
7653 char buf[MAXLEN] = "";
7654 char command[MAXLEN] = "";
7655 char *p = NULL;
7656
7657 initPQExpBuffer(&command_output);
7658
7659 maxlen_snprintf(command,
7660 "grep \"^[[:space:]]%s:\" %s/show-server.txt",
7661 name, local_repmgr_tmp_directory);
7662 (void) local_command(command, &command_output);
7663
7664 maxlen_snprintf(buf, "\t%s: ", name);
7665 p = string_skip_prefix(buf, command_output.data);
7666 if (p == NULL)
7667 {
7668 log_error("unexpected output from Barman: %s",
7669 command_output.data);
7670 exit(ERR_INTERNAL);
7671 }
7672
7673 strncpy(dst, p, MAXLEN);
7674 string_remove_trailing_newlines(dst);
7675
7676 termPQExpBuffer(&command_output);
7677 }
7678
7679
7680 static void
copy_configuration_files(bool delete_after_copy)7681 copy_configuration_files(bool delete_after_copy)
7682 {
7683 int i,
7684 r;
7685 t_configfile_info *file = NULL;
7686 char *host = NULL;
7687
7688 /* get host from upstream record */
7689 host = param_get(&recovery_conninfo, "host");
7690
7691 if (host == NULL)
7692 host = runtime_options.host;
7693
7694 log_notice(_("copying external configuration files from upstream node \"%s\""), host);
7695
7696 for (i = 0; i < config_files.entries; i++)
7697 {
7698 PQExpBufferData dest_path;
7699
7700 file = config_files.files[i];
7701
7702 /*
7703 * Skip files in the data directory - these will be copied during the
7704 * main backup
7705 */
7706 if (file->in_data_directory == true)
7707 continue;
7708
7709 initPQExpBuffer(&dest_path);
7710
7711 if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
7712 {
7713 appendPQExpBufferStr(&dest_path, file->filepath);
7714 }
7715 else
7716 {
7717 appendPQExpBuffer(&dest_path,
7718 "%s/%s",
7719 local_data_directory,
7720 file->filename);
7721 }
7722
7723 r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
7724 file->filepath, dest_path.data, false, source_server_version_num);
7725
7726 /*
7727 * TODO: collate errors into list
7728 */
7729
7730 if (WEXITSTATUS(r))
7731 {
7732 log_error(_("standby clone: unable to copy config file \"%s\""),
7733 file->filename);
7734 log_hint(_("see preceding messages for details"));
7735
7736 if (runtime_options.force == false)
7737 exit(ERR_BAD_RSYNC);
7738 }
7739
7740 /*
7741 * This is to check we can actually copy the files before running the
7742 * main clone operation
7743 */
7744 if (delete_after_copy == true)
7745 {
7746 /* this is very unlikely to happen, but log in case it does */
7747 if (unlink(dest_path.data) < 0 && errno != ENOENT)
7748 {
7749 log_warning(_("unable to delete %s"), dest_path.data);
7750 log_detail("%s", strerror(errno));
7751 }
7752 }
7753
7754 termPQExpBuffer(&dest_path);
7755 }
7756
7757 return;
7758 }
7759
7760
7761 static void
tablespace_data_append(TablespaceDataList * list,const char * name,const char * oid,const char * location)7762 tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location)
7763 {
7764 TablespaceDataListCell *cell = NULL;
7765
7766 cell = (TablespaceDataListCell *) pg_malloc0(sizeof(TablespaceDataListCell));
7767
7768 if (cell == NULL)
7769 {
7770 log_error(_("unable to allocate memory; terminating"));
7771 exit(ERR_OUT_OF_MEMORY);
7772 }
7773
7774 cell->oid = pg_malloc(1 + strlen(oid));
7775 cell->name = pg_malloc(1 + strlen(name));
7776 cell->location = pg_malloc(1 + strlen(location));
7777
7778 strncpy(cell->oid, oid, 1 + strlen(oid));
7779 strncpy(cell->name, name, 1 + strlen(name));
7780 strncpy(cell->location, location, 1 + strlen(location));
7781
7782 if (list->tail)
7783 list->tail->next = cell;
7784 else
7785 list->head = cell;
7786
7787 list->tail = cell;
7788 }
7789
7790
7791
7792 /*
7793 * check_primary_standby_version_match()
7794 *
7795 * Check server versions of supplied connections are compatible for
7796 * replication purposes.
7797 *
7798 * Exits on error.
7799 */
7800 static void
check_primary_standby_version_match(PGconn * conn,PGconn * primary_conn)7801 check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn)
7802 {
7803 char standby_version[MAXVERSIONSTR] = "";
7804 int standby_version_num = UNKNOWN_SERVER_VERSION_NUM;
7805
7806 char primary_version[MAXVERSIONSTR] = "";
7807 int primary_version_num = UNKNOWN_SERVER_VERSION_NUM;
7808
7809 standby_version_num = check_server_version(conn, "standby", true, standby_version);
7810
7811 /* Verify that primary is a supported server version */
7812 primary_version_num = check_server_version(conn, "primary", false, primary_version);
7813 if (primary_version_num < 0)
7814 {
7815 PQfinish(conn);
7816 PQfinish(primary_conn);
7817 exit(ERR_BAD_CONFIG);
7818 }
7819
7820 /* primary and standby version should match */
7821 if ((primary_version_num / 100) != (standby_version_num / 100))
7822 {
7823 PQfinish(conn);
7824 PQfinish(primary_conn);
7825 log_error(_("PostgreSQL versions on primary (%s) and standby (%s) must match"),
7826 primary_version, standby_version);
7827 exit(ERR_BAD_CONFIG);
7828 }
7829 }
7830
7831
7832 static void
check_recovery_type(PGconn * conn)7833 check_recovery_type(PGconn *conn)
7834 {
7835 RecoveryType recovery_type = get_recovery_type(conn);
7836
7837 if (recovery_type != RECTYPE_STANDBY)
7838 {
7839 if (recovery_type == RECTYPE_PRIMARY)
7840 {
7841 log_error(_("this node should be a standby (%s)"),
7842 config_file_options.conninfo);
7843 PQfinish(conn);
7844 exit(ERR_BAD_CONFIG);
7845 }
7846 else
7847 {
7848 log_error(_("connection to node (%s) lost"),
7849 config_file_options.conninfo);
7850 PQfinish(conn);
7851 exit(ERR_DB_CONN);
7852 }
7853 }
7854 }
7855
7856
7857
7858 /*
7859 * Creates recovery configuration for a standby.
7860 *
7861 * A database connection pointer is required for escaping primary_conninfo
7862 * parameters. When cloning from Barman and --no-upstream-connection supplied,
7863 * this might not be available.
7864 */
7865 static bool
create_recovery_file(t_node_info * node_record,t_conninfo_param_list * primary_conninfo,int server_version_num,char * dest,bool as_file)7866 create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file)
7867 {
7868 PQExpBufferData recovery_file_buf;
7869 PQExpBufferData primary_conninfo_buf;
7870 char recovery_file_path[MAXPGPATH] = "";
7871 FILE *recovery_file;
7872 mode_t um;
7873
7874 KeyValueList recovery_config = {NULL, NULL};
7875 KeyValueListCell *cell = NULL;
7876
7877 initPQExpBuffer(&primary_conninfo_buf);
7878
7879 /* standby_mode = 'on' (Pg 11 and earlier) */
7880 if (server_version_num < 120000)
7881 {
7882 key_value_list_set(&recovery_config,
7883 "standby_mode", "on");
7884 }
7885
7886 /* primary_conninfo = '...' */
7887 write_primary_conninfo(&primary_conninfo_buf, primary_conninfo);
7888 key_value_list_set(&recovery_config,
7889 "primary_conninfo", primary_conninfo_buf.data);
7890
7891 /*
7892 * recovery_target_timeline = 'latest'
7893 *
7894 * PostgreSQL 11 and earlier only; 'latest' is the default from PostgreSQL 12.
7895 */
7896
7897 if (server_version_num < 120000)
7898 {
7899 key_value_list_set(&recovery_config,
7900 "recovery_target_timeline", "latest");
7901 }
7902
7903 /* recovery_min_apply_delay = ... (optional) */
7904 if (config_file_options.recovery_min_apply_delay_provided == true)
7905 {
7906 key_value_list_set(&recovery_config,
7907 "recovery_min_apply_delay", config_file_options.recovery_min_apply_delay);
7908 }
7909
7910 /* primary_slot_name = '...' (optional, for 9.4 and later) */
7911 if (config_file_options.use_replication_slots)
7912 {
7913 key_value_list_set(&recovery_config,
7914 "primary_slot_name", node_record->slot_name);
7915 }
7916
7917 /*
7918 * If restore_command is set, we use it as restore_command in
7919 * recovery.conf
7920 */
7921 if (config_file_options.restore_command[0] != '\0')
7922 {
7923 char *escaped = escape_recovery_conf_value(config_file_options.restore_command);
7924
7925 key_value_list_set(&recovery_config,
7926 "restore_command", escaped);
7927 free(escaped);
7928 }
7929
7930 /* archive_cleanup_command (optional) */
7931 if (config_file_options.archive_cleanup_command[0] != '\0')
7932 {
7933 char *escaped = escape_recovery_conf_value(config_file_options.archive_cleanup_command);
7934
7935 key_value_list_set(&recovery_config,
7936 "archive_cleanup_command", escaped);
7937 free(escaped);
7938 }
7939
7940
7941
7942
7943 if (as_file == false)
7944 {
7945 /* create file in buffer */
7946 initPQExpBuffer(&recovery_file_buf);
7947
7948 for (cell = recovery_config.head; cell; cell = cell->next)
7949 {
7950 appendPQExpBuffer(&recovery_file_buf,
7951 "%s = '%s'\n",
7952 cell->key, cell->value);
7953 }
7954
7955 maxlen_snprintf(dest, "%s", recovery_file_buf.data);
7956
7957 termPQExpBuffer(&recovery_file_buf);
7958
7959 return true;
7960 }
7961
7962
7963 /*
7964 * PostgreSQL 12 and later: modify postgresql.auto.conf
7965 *
7966 */
7967 if (server_version_num >= 120000)
7968 {
7969
7970 if (modify_auto_conf(dest, &recovery_config) == false)
7971 {
7972 return false;
7973 }
7974
7975 if (write_standby_signal() == false)
7976 {
7977 return false;
7978 }
7979
7980 return true;
7981 }
7982
7983 /*
7984 * PostgreSQL 11 and earlier: write recovery.conf
7985 */
7986 maxpath_snprintf(recovery_file_path, "%s/%s", dest, RECOVERY_COMMAND_FILE);
7987 log_debug("create_recovery_file(): creating \"%s\"...",
7988 recovery_file_path);
7989
7990 /* Set umask to 0600 */
7991 um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
7992 recovery_file = fopen(recovery_file_path, "w");
7993 umask(um);
7994
7995 if (recovery_file == NULL)
7996 {
7997 log_error(_("unable to create recovery.conf file at \"%s\""),
7998 recovery_file_path);
7999 log_detail("%s", strerror(errno));
8000
8001 return false;
8002 }
8003
8004 for (cell = recovery_config.head; cell; cell = cell->next)
8005 {
8006 initPQExpBuffer(&recovery_file_buf);
8007 appendPQExpBuffer(&recovery_file_buf,
8008 "%s = '%s'\n",
8009 cell->key, cell->value);
8010
8011 log_debug("recovery.conf line: %s", recovery_file_buf.data);
8012
8013 if (fputs(recovery_file_buf.data, recovery_file) == EOF)
8014 {
8015 log_error(_("unable to write to recovery file at \"%s\""), recovery_file_path);
8016 fclose(recovery_file);
8017 termPQExpBuffer(&recovery_file_buf);
8018 return false;
8019 }
8020
8021 termPQExpBuffer(&recovery_file_buf);
8022 }
8023
8024
8025 fclose(recovery_file);
8026
8027 return true;
8028 }
8029
8030
8031 static void
write_primary_conninfo(PQExpBufferData * dest,t_conninfo_param_list * param_list)8032 write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
8033 {
8034 PQExpBufferData conninfo_buf;
8035 bool application_name_provided = false;
8036 bool password_provided = false;
8037 int c;
8038 char *escaped = NULL;
8039 t_conninfo_param_list env_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
8040
8041 initialize_conninfo_params(&env_conninfo, true);
8042
8043 initPQExpBuffer(&conninfo_buf);
8044
8045 for (c = 0; c < param_list->size && param_list->keywords[c] != NULL; c++)
8046 {
8047 /*
8048 * Skip empty settings and ones which don't make any sense in
8049 * recovery.conf
8050 */
8051 if (strcmp(param_list->keywords[c], "dbname") == 0 ||
8052 strcmp(param_list->keywords[c], "replication") == 0 ||
8053 (param_list->values[c] == NULL) ||
8054 (param_list->values[c] != NULL && param_list->values[c][0] == '\0'))
8055 continue;
8056
8057 /* only include "password" if explicitly requested */
8058 if (strcmp(param_list->keywords[c], "password") == 0)
8059 {
8060 password_provided = true;
8061 }
8062
8063 if (conninfo_buf.len != 0)
8064 appendPQExpBufferChar(&conninfo_buf, ' ');
8065
8066 if (strcmp(param_list->keywords[c], "application_name") == 0)
8067 application_name_provided = true;
8068
8069 appendPQExpBuffer(&conninfo_buf, "%s=", param_list->keywords[c]);
8070 appendConnStrVal(&conninfo_buf, param_list->values[c]);
8071 }
8072
8073 /* "application_name" not provided - default to repmgr node name */
8074 if (application_name_provided == false)
8075 {
8076 if (strlen(config_file_options.node_name))
8077 {
8078 appendPQExpBufferStr(&conninfo_buf, " application_name=");
8079 appendConnStrVal(&conninfo_buf, config_file_options.node_name);
8080 }
8081 else
8082 {
8083 appendPQExpBufferStr(&conninfo_buf, " application_name=repmgr");
8084 }
8085 }
8086
8087 /* no password provided explicitly */
8088 if (password_provided == false)
8089 {
8090 if (config_file_options.use_primary_conninfo_password == true)
8091 {
8092 const char *password = param_get(&env_conninfo, "password");
8093
8094 if (password != NULL)
8095 {
8096 appendPQExpBufferStr(&conninfo_buf, " password=");
8097 appendConnStrVal(&conninfo_buf, password);
8098 }
8099 }
8100 }
8101
8102 /* passfile provided as configuration option */
8103 if (config_file_options.passfile[0] != '\0')
8104 {
8105 /* check if the libpq we're using supports "passfile=" */
8106 if (has_passfile() == true)
8107 {
8108 appendPQExpBufferStr(&conninfo_buf, " passfile=");
8109 appendConnStrVal(&conninfo_buf, config_file_options.passfile);
8110 }
8111 }
8112
8113 escaped = escape_recovery_conf_value(conninfo_buf.data);
8114
8115 appendPQExpBufferStr(dest, escaped);
8116
8117 free(escaped);
8118 free_conninfo_params(&env_conninfo);
8119 termPQExpBuffer(&conninfo_buf);
8120 }
8121
8122
8123 /*
8124 * For "standby promote" and "standby follow", check for sibling nodes.
8125 * If "--siblings-follow" was specified, fill the provided SiblingNodeStats
8126 * struct with some aggregate info about the nodes for later
8127 * decision making.
8128 */
8129 static bool
check_sibling_nodes(NodeInfoList * sibling_nodes,SiblingNodeStats * sibling_nodes_stats)8130 check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
8131 {
8132 char host[MAXLEN] = "";
8133 NodeInfoListCell *cell;
8134 int r;
8135
8136 /*
8137 * If --siblings-follow not specified, warn about any extant
8138 * siblings which will not follow the new primary
8139 */
8140
8141 if (runtime_options.siblings_follow == false)
8142 {
8143 if (sibling_nodes->node_count > 0)
8144 {
8145 PQExpBufferData nodes;
8146 NodeInfoListCell *cell;
8147
8148 initPQExpBuffer(&nodes);
8149
8150 for (cell = sibling_nodes->head; cell; cell = cell->next)
8151 {
8152 appendPQExpBuffer(&nodes,
8153 " %s (node ID: %i",
8154 cell->node_info->node_name,
8155 cell->node_info->node_id);
8156
8157 if (cell->node_info->type == WITNESS)
8158 {
8159 appendPQExpBufferStr(&nodes,
8160 ", witness server");
8161 }
8162 appendPQExpBufferChar(&nodes,
8163 ')');
8164 if (cell->next)
8165 appendPQExpBufferStr(&nodes, "\n");
8166 }
8167
8168 log_warning(_("%i sibling nodes found, but option \"--siblings-follow\" not specified"),
8169 sibling_nodes->node_count);
8170 log_detail(_("these nodes will remain attached to the current primary:\n%s"), nodes.data);
8171
8172 termPQExpBuffer(&nodes);
8173 }
8174
8175 return true;
8176 }
8177
8178 log_verbose(LOG_INFO, _("%i active sibling nodes found"),
8179 sibling_nodes->node_count);
8180
8181 if (sibling_nodes->node_count == 0)
8182 {
8183 log_warning(_("option \"--sibling-nodes\" specified, but no sibling nodes exist"));
8184 return true;
8185 }
8186
8187 for (cell = sibling_nodes->head; cell; cell = cell->next)
8188 {
8189 /* get host from node record */
8190 get_conninfo_value(cell->node_info->conninfo, "host", host);
8191 r = test_ssh_connection(host, runtime_options.remote_user);
8192
8193 if (r != 0)
8194 {
8195 cell->node_info->reachable = false;
8196 sibling_nodes_stats->unreachable_sibling_node_count++;
8197 }
8198 else
8199 {
8200 cell->node_info->reachable = true;
8201 sibling_nodes_stats->reachable_sibling_node_count++;
8202 sibling_nodes_stats->min_required_wal_senders++;
8203
8204 if (cell->node_info->slot_name[0] != '\0')
8205 {
8206 sibling_nodes_stats->reachable_sibling_nodes_with_slot_count++;
8207 sibling_nodes_stats->min_required_free_slots++;
8208 }
8209 }
8210 }
8211
8212 if (sibling_nodes_stats->unreachable_sibling_node_count > 0)
8213 {
8214 if (runtime_options.force == false)
8215 {
8216 log_error(_("%i of %i sibling nodes unreachable via SSH:"),
8217 sibling_nodes_stats->unreachable_sibling_node_count,
8218 sibling_nodes->node_count);
8219 }
8220 else
8221 {
8222 log_warning(_("%i of %i sibling nodes unreachable via SSH:"),
8223 sibling_nodes_stats->unreachable_sibling_node_count,
8224 sibling_nodes->node_count);
8225 }
8226
8227 /* display list of unreachable sibling nodes */
8228 for (cell = sibling_nodes->head; cell; cell = cell->next)
8229 {
8230 if (cell->node_info->reachable == true)
8231 continue;
8232 log_detail(" %s (ID: %i)",
8233 cell->node_info->node_name,
8234 cell->node_info->node_id);
8235 }
8236
8237 if (runtime_options.force == false)
8238 {
8239 log_hint(_("use -F/--force to proceed in any case"));
8240 return false;
8241 }
8242
8243 if (runtime_options.dry_run == true)
8244 {
8245 log_detail(_("F/--force specified, would proceed anyway"));
8246 }
8247 else
8248 {
8249 log_detail(_("F/--force specified, proceeding anyway"));
8250 }
8251 }
8252 else
8253 {
8254 char *msg = _("all sibling nodes are reachable via SSH");
8255
8256 if (runtime_options.dry_run == true)
8257 {
8258 log_info("%s", msg);
8259 }
8260 else
8261 {
8262 log_verbose(LOG_INFO, "%s", msg);
8263 }
8264 }
8265
8266 return true;
8267 }
8268
8269
8270 static bool
check_free_wal_senders(int available_wal_senders,SiblingNodeStats * sibling_nodes_stats,bool * dry_run_success)8271 check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success)
8272 {
8273 if (available_wal_senders < sibling_nodes_stats->min_required_wal_senders)
8274 {
8275 if (runtime_options.force == false || runtime_options.dry_run == true)
8276 {
8277 log_error(_("insufficient free walsenders on promotion candidate"));
8278 log_detail(_("at least %i walsenders required but only %i free walsenders on promotion candidate"),
8279 sibling_nodes_stats->min_required_wal_senders,
8280 available_wal_senders);
8281 log_hint(_("increase parameter \"max_wal_senders\" or use -F/--force to proceed in any case"));
8282
8283 if (runtime_options.dry_run == true)
8284 {
8285 *dry_run_success = false;
8286 }
8287 else
8288 {
8289 return false;
8290 }
8291 }
8292 else
8293 {
8294 log_warning(_("insufficient free walsenders on promotion candidate"));
8295 log_detail(_("at least %i walsenders required but only %i free walsender(s) on promotion candidate"),
8296 sibling_nodes_stats->min_required_wal_senders,
8297 available_wal_senders);
8298 return false;
8299 }
8300 }
8301 else
8302 {
8303 if (runtime_options.dry_run == true)
8304 {
8305 log_info(_("%i walsenders required, %i available"),
8306 sibling_nodes_stats->min_required_wal_senders,
8307 available_wal_senders);
8308 }
8309 }
8310
8311 return true;
8312 }
8313
8314
8315 static bool
check_free_slots(t_node_info * local_node_record,SiblingNodeStats * sibling_nodes_stats,bool * dry_run_success)8316 check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success)
8317 {
8318 if (sibling_nodes_stats->min_required_free_slots > 0 )
8319 {
8320 int available_slots = local_node_record->max_replication_slots -
8321 local_node_record->active_replication_slots;
8322
8323 log_debug("minimum of %i free slots (%i for siblings) required; %i available",
8324 sibling_nodes_stats->min_required_free_slots,
8325 sibling_nodes_stats->reachable_sibling_nodes_with_slot_count,
8326 available_slots);
8327
8328 if (available_slots < sibling_nodes_stats->min_required_free_slots)
8329 {
8330 if (runtime_options.force == false || runtime_options.dry_run == true)
8331 {
8332 log_error(_("insufficient free replication slots to attach all nodes"));
8333 log_detail(_("at least %i additional replication slots required but only %i free slots available on promotion candidate"),
8334 sibling_nodes_stats->min_required_free_slots,
8335 available_slots);
8336 log_hint(_("increase parameter \"max_replication_slots\" or use -F/--force to proceed in any case"));
8337
8338 if (runtime_options.dry_run == true)
8339 {
8340 *dry_run_success = false;
8341 }
8342 else
8343 {
8344 return false;
8345 }
8346 }
8347 }
8348 else
8349 {
8350 if (runtime_options.dry_run == true)
8351 {
8352 log_info(_("%i replication slots required, %i available"),
8353 sibling_nodes_stats->min_required_free_slots,
8354 available_slots);
8355 }
8356 }
8357 }
8358
8359 return true;
8360 }
8361
8362
8363 static void
sibling_nodes_follow(t_node_info * local_node_record,NodeInfoList * sibling_nodes,SiblingNodeStats * sibling_nodes_stats)8364 sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
8365 {
8366 int failed_follow_count = 0;
8367 char host[MAXLEN] = "";
8368 NodeInfoListCell *cell = NULL;
8369 PQExpBufferData remote_command_str;
8370 PQExpBufferData command_output;
8371
8372 log_notice(_("executing STANDBY FOLLOW on %i of %i siblings"),
8373 sibling_nodes->node_count - sibling_nodes_stats->unreachable_sibling_node_count,
8374 sibling_nodes->node_count);
8375
8376 for (cell = sibling_nodes->head; cell; cell = cell->next)
8377 {
8378 bool success = false;
8379
8380 /* skip nodes previously determined as unreachable */
8381 if (cell->node_info->reachable == false)
8382 continue;
8383
8384 initPQExpBuffer(&remote_command_str);
8385 make_remote_repmgr_path(&remote_command_str, cell->node_info);
8386
8387 if (cell->node_info->type == WITNESS)
8388 {
8389 PGconn *witness_conn = NULL;
8390
8391 /* TODO: create "repmgr witness resync" or similar */
8392 appendPQExpBuffer(&remote_command_str,
8393 "witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
8394 local_node_record->conninfo);
8395
8396 /*
8397 * Notify the witness repmgrd about the new primary, as at this point it will be assuming
8398 * a failover situation is in place. It will detect the new primary at some point, this
8399 * just speeds up the process.
8400 *
8401 * In the unlikely event repmgrd is not running or not in use, this will have no effect.
8402 */
8403 witness_conn = establish_db_connection_quiet(cell->node_info->conninfo);
8404
8405 if (PQstatus(witness_conn) == CONNECTION_OK)
8406 {
8407 notify_follow_primary(witness_conn, local_node_record->node_id);
8408 }
8409 PQfinish(witness_conn);
8410 }
8411 else
8412 {
8413 appendPQExpBufferStr(&remote_command_str,
8414 "standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
8415 }
8416 get_conninfo_value(cell->node_info->conninfo, "host", host);
8417 log_debug("executing:\n %s", remote_command_str.data);
8418
8419 initPQExpBuffer(&command_output);
8420
8421 success = remote_command(host,
8422 runtime_options.remote_user,
8423 remote_command_str.data,
8424 config_file_options.ssh_options,
8425 &command_output);
8426
8427 termPQExpBuffer(&remote_command_str);
8428
8429 if (success == false || command_output.data[0] == '0')
8430 {
8431 if (cell->node_info->type == WITNESS)
8432 {
8433 log_warning(_("WITNESS REGISTER failed on node \"%s\""),
8434 cell->node_info->node_name);
8435 }
8436 else
8437 {
8438 log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
8439 cell->node_info->node_name);
8440 }
8441
8442 failed_follow_count++;
8443 }
8444
8445 termPQExpBuffer(&command_output);
8446 }
8447
8448 if (failed_follow_count == 0)
8449 {
8450 log_info(_("STANDBY FOLLOW successfully executed on all reachable sibling nodes"));
8451 }
8452 else
8453 {
8454 log_warning(_("execution of STANDBY FOLLOW failed on %i sibling nodes"),
8455 failed_follow_count);
8456 }
8457
8458 /*
8459 * TODO: double-check all expected nodes are in pg_stat_replication
8460 * and entries in repmgr.nodes match
8461 */
8462 }
8463
8464
8465
8466 static t_remote_error_type
parse_remote_error(const char * error)8467 parse_remote_error(const char *error)
8468 {
8469 if (error[0] == '\0')
8470 return REMOTE_ERROR_UNKNOWN;
8471
8472 if (strcasecmp(error, "DB_CONNECTION") == 0)
8473 return REMOTE_ERROR_DB_CONNECTION;
8474
8475 if (strcasecmp(error, "CONNINFO_PARSE") == 0)
8476 return REMOTE_ERROR_CONNINFO_PARSE;
8477
8478 return REMOTE_ERROR_UNKNOWN;
8479 }
8480
8481
8482 static CheckStatus
parse_check_status(const char * status_str)8483 parse_check_status(const char *status_str)
8484 {
8485 CheckStatus status = CHECK_STATUS_UNKNOWN;
8486
8487 if (strncmp(status_str, "OK", MAXLEN) == 0)
8488 {
8489 status = CHECK_STATUS_OK;
8490 }
8491 else if (strncmp(status_str, "WARNING", MAXLEN) == 0)
8492 {
8493 status = CHECK_STATUS_WARNING;
8494 }
8495 else if (strncmp(status_str, "CRITICAL", MAXLEN) == 0)
8496 {
8497 status = CHECK_STATUS_CRITICAL;
8498 }
8499 else if (strncmp(status_str, "UNKNOWN", MAXLEN) == 0)
8500 {
8501 status = CHECK_STATUS_UNKNOWN;
8502 }
8503
8504 return status;
8505 }
8506
8507 static NodeStatus
parse_node_status_is_shutdown_cleanly(const char * node_status_output,XLogRecPtr * checkPoint)8508 parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint)
8509 {
8510 NodeStatus node_status = NODE_STATUS_UNKNOWN;
8511
8512 int c = 0,
8513 argc_item = 0;
8514 char **argv_array = NULL;
8515 int optindex = 0;
8516
8517 /* We're only interested in these options */
8518 struct option node_status_options[] =
8519 {
8520 {"last-checkpoint-lsn", required_argument, NULL, 'L'},
8521 {"state", required_argument, NULL, 'S'},
8522 {NULL, 0, NULL, 0}
8523 };
8524
8525 /* Don't attempt to tokenise an empty string */
8526 if (!strlen(node_status_output))
8527 {
8528 *checkPoint = InvalidXLogRecPtr;
8529 return node_status;
8530 }
8531
8532 argc_item = parse_output_to_argv(node_status_output, &argv_array);
8533
8534 /* Reset getopt's optind variable */
8535 optind = 0;
8536
8537 /* Prevent getopt from emitting errors */
8538 opterr = 0;
8539
8540 while ((c = getopt_long(argc_item, argv_array, "L:S:", node_status_options,
8541 &optindex)) != -1)
8542 {
8543 switch (c)
8544 {
8545 /* --last-checkpoint-lsn */
8546 case 'L':
8547 *checkPoint = parse_lsn(optarg);
8548 break;
8549 /* --state */
8550 case 'S':
8551 {
8552 if (strncmp(optarg, "RUNNING", MAXLEN) == 0)
8553 {
8554 node_status = NODE_STATUS_UP;
8555 }
8556 else if (strncmp(optarg, "SHUTDOWN", MAXLEN) == 0)
8557 {
8558 node_status = NODE_STATUS_DOWN;
8559 }
8560 else if (strncmp(optarg, "UNCLEAN_SHUTDOWN", MAXLEN) == 0)
8561 {
8562 node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
8563 }
8564 else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
8565 {
8566 node_status = NODE_STATUS_UNKNOWN;
8567 }
8568 }
8569 break;
8570 }
8571 }
8572
8573 free_parsed_argv(&argv_array);
8574
8575 return node_status;
8576 }
8577
8578
8579 static ConnectionStatus
parse_remote_node_replication_connection(const char * node_check_output)8580 parse_remote_node_replication_connection(const char *node_check_output)
8581 {
8582 ConnectionStatus conn_status = CONN_UNKNOWN;
8583
8584 int c = 0,
8585 argc_item = 0;
8586 char **argv_array = NULL;
8587 int optindex = 0;
8588
8589 /* We're only interested in these options */
8590 struct option node_check_options[] =
8591 {
8592 {"connection", required_argument, NULL, 'c'},
8593 {NULL, 0, NULL, 0}
8594 };
8595
8596 /* Don't attempt to tokenise an empty string */
8597 if (!strlen(node_check_output))
8598 {
8599 return CONN_UNKNOWN;
8600 }
8601
8602 argc_item = parse_output_to_argv(node_check_output, &argv_array);
8603
8604 /* Reset getopt's optind variable */
8605 optind = 0;
8606
8607 /* Prevent getopt from emitting errors */
8608 opterr = 0;
8609
8610 while ((c = getopt_long(argc_item, argv_array, "L:S:", node_check_options,
8611 &optindex)) != -1)
8612 {
8613 switch (c)
8614 {
8615
8616 /* --connection */
8617 case 'c':
8618 {
8619 if (strncmp(optarg, "OK", MAXLEN) == 0)
8620 {
8621 conn_status = CONN_OK;
8622 }
8623 else if (strncmp(optarg, "BAD", MAXLEN) == 0)
8624 {
8625 conn_status = CONN_BAD;
8626 }
8627 else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
8628 {
8629 conn_status = CONN_UNKNOWN;
8630 }
8631 }
8632 break;
8633 }
8634 }
8635
8636 free_parsed_argv(&argv_array);
8637
8638 return conn_status;
8639 }
8640
8641
8642 static CheckStatus
parse_node_check_archiver(const char * node_check_output,int * files,int * threshold,t_remote_error_type * remote_error)8643 parse_node_check_archiver(const char *node_check_output, int *files, int *threshold, t_remote_error_type *remote_error)
8644 {
8645 CheckStatus status = CHECK_STATUS_UNKNOWN;
8646
8647 int c = 0,
8648 argc_item = 0;
8649 char **argv_array = NULL;
8650 int optindex = 0;
8651
8652 /* We're only interested in these options */
8653 struct option node_check_options[] =
8654 {
8655 {"status", required_argument, NULL, 'S'},
8656 {"files", required_argument, NULL, 'f'},
8657 {"threshold", required_argument, NULL, 't'},
8658 {"error", required_argument, NULL, 'E'},
8659 {NULL, 0, NULL, 0}
8660 };
8661
8662 *files = 0;
8663 *threshold = 0;
8664
8665 /* Don't attempt to tokenise an empty string */
8666 if (!strlen(node_check_output))
8667 {
8668 return status;
8669 }
8670
8671 argc_item = parse_output_to_argv(node_check_output, &argv_array);
8672
8673
8674 /* Reset getopt's optind variable */
8675 optind = 0;
8676
8677 /* Prevent getopt from emitting errors */
8678 opterr = 0;
8679
8680 while ((c = getopt_long(argc_item, argv_array, "f:S:t:", node_check_options,
8681 &optindex)) != -1)
8682 {
8683 switch (c)
8684 {
8685 /* --files */
8686 case 'f':
8687 *files = atoi(optarg);
8688 break;
8689
8690 case 't':
8691 *threshold = atoi(optarg);
8692 break;
8693
8694 /* --status */
8695 case 'S':
8696 status = parse_check_status(optarg);
8697 break;
8698 case 'E':
8699 {
8700 *remote_error = parse_remote_error(optarg);
8701 status = CHECK_STATUS_UNKNOWN;
8702 }
8703 break;
8704 }
8705 }
8706
8707 free_parsed_argv(&argv_array);
8708
8709 return status;
8710 }
8711
8712
8713 static bool
parse_data_directory_config(const char * node_check_output,t_remote_error_type * remote_error)8714 parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error)
8715 {
8716 bool config_ok = true;
8717
8718 int c = 0,
8719 argc_item = 0;
8720 char **argv_array = NULL;
8721 int optindex = 0;
8722
8723 /* We're only interested in these options */
8724 struct option node_check_options[] =
8725 {
8726 {"configured-data-directory", required_argument, NULL, 'C'},
8727 {"error", required_argument, NULL, 'E'},
8728 {NULL, 0, NULL, 0}
8729 };
8730
8731 /* Don't attempt to tokenise an empty string */
8732 if (!strlen(node_check_output))
8733 {
8734 return false;
8735 }
8736
8737 argc_item = parse_output_to_argv(node_check_output, &argv_array);
8738
8739 /* Reset getopt's optind variable */
8740 optind = 0;
8741
8742 /* Prevent getopt from emitting errors */
8743 opterr = 0;
8744
8745 while ((c = getopt_long(argc_item, argv_array, "C:E:", node_check_options,
8746 &optindex)) != -1)
8747 {
8748 switch (c)
8749 {
8750 /* --configured-data-directory */
8751 case 'C':
8752 {
8753 /* we only care whether it's "OK" or not */
8754 if (strncmp(optarg, "OK", 2) != 0)
8755 config_ok = false;
8756 }
8757 break;
8758 case 'E':
8759 {
8760 *remote_error = parse_remote_error(optarg);
8761 config_ok = false;
8762 }
8763 break;
8764 }
8765 }
8766 free_parsed_argv(&argv_array);
8767
8768 return config_ok;
8769 }
8770
8771
8772 static bool
parse_replication_config_owner(const char * node_check_output)8773 parse_replication_config_owner(const char *node_check_output)
8774 {
8775 bool config_ok = true;
8776
8777 int c = 0,
8778 argc_item = 0;
8779 char **argv_array = NULL;
8780 int optindex = 0;
8781
8782 /* We're only interested in these options */
8783 struct option node_check_options[] =
8784 {
8785 {"replication-config-owner", required_argument, NULL, 'C'},
8786 {NULL, 0, NULL, 0}
8787 };
8788
8789 /* Don't attempt to tokenise an empty string */
8790 if (!strlen(node_check_output))
8791 {
8792 return false;
8793 }
8794
8795 argc_item = parse_output_to_argv(node_check_output, &argv_array);
8796
8797 /* Reset getopt's optind variable */
8798 optind = 0;
8799
8800 /* Prevent getopt from emitting errors */
8801 opterr = 0;
8802
8803 while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options,
8804 &optindex)) != -1)
8805 {
8806 switch (c)
8807 {
8808 /* --configured-data-directory */
8809 case 'C':
8810 {
8811 /* we only care whether it's "OK" or not */
8812 if (strncmp(optarg, "OK", 2) != 0)
8813 config_ok = false;
8814 }
8815 break;
8816 }
8817 }
8818
8819 free_parsed_argv(&argv_array);
8820
8821 return config_ok;
8822 }
8823
8824
8825 static CheckStatus
parse_db_connection(const char * db_connection)8826 parse_db_connection(const char *db_connection)
8827 {
8828 CheckStatus status = CHECK_STATUS_UNKNOWN;
8829
8830 int c = 0,
8831 argc_item = 0;
8832 char **argv_array = NULL;
8833 int optindex = 0;
8834
8835 /* We're only interested in this option */
8836 struct option node_check_options[] =
8837 {
8838 {"db-connection", required_argument, NULL, 'c'},
8839 {NULL, 0, NULL, 0}
8840 };
8841
8842 /* Don't attempt to tokenise an empty string */
8843 if (!strlen(db_connection))
8844 {
8845 return false;
8846 }
8847
8848 argc_item = parse_output_to_argv(db_connection, &argv_array);
8849
8850 /* Reset getopt's optind variable */
8851 optind = 0;
8852
8853 /* Prevent getopt from emitting errors */
8854 opterr = 0;
8855
8856 while ((c = getopt_long(argc_item, argv_array, "c:", node_check_options,
8857 &optindex)) != -1)
8858 {
8859 switch (c)
8860 {
8861 /* --db-connection */
8862 case 'c':
8863 {
8864 status = parse_check_status(optarg);
8865 }
8866 break;
8867 }
8868 }
8869
8870 free_parsed_argv(&argv_array);
8871
8872 return status;
8873 }
8874
8875
8876 void
do_standby_help(void)8877 do_standby_help(void)
8878 {
8879 print_help_header();
8880
8881 printf(_("Usage:\n"));
8882 printf(_(" %s [OPTIONS] standby clone\n"), progname());
8883 printf(_(" %s [OPTIONS] standby register\n"), progname());
8884 printf(_(" %s [OPTIONS] standby unregister\n"), progname());
8885 printf(_(" %s [OPTIONS] standby promote\n"), progname());
8886 printf(_(" %s [OPTIONS] standby follow\n"), progname());
8887 printf(_(" %s [OPTIONS] standby switchover\n"), progname());
8888
8889 puts("");
8890
8891 printf(_("STANDBY CLONE\n"));
8892 puts("");
8893 printf(_(" \"standby clone\" clones a standby from the primary or an upstream node.\n"));
8894 puts("");
8895 printf(_(" -d, --dbname=conninfo conninfo of the upstream node to use for cloning.\n"));
8896 printf(_(" -c, --fast-checkpoint force fast checkpoint\n"));
8897 printf(_(" --copy-external-config-files[={samepath|pgdata}]\n" \
8898 " copy configuration files located outside the \n" \
8899 " data directory to the same path on the standby (default) or to the\n" \
8900 " PostgreSQL data directory\n"));
8901 printf(_(" --dry-run perform checks but don't actually clone the standby\n"));
8902 printf(_(" --no-upstream-connection when using Barman, do not connect to upstream node\n"));
8903 printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
8904 printf(_(" --replication-user user to make replication connections with (optional, not usually required)\n"));
8905 printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
8906 printf(_(" --upstream-conninfo \"primary_conninfo\" value to write in recovery.conf\n" \
8907 " when the intended upstream server does not yet exist\n"));
8908 printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
8909 #if (PG_VERSION_NUM >= 130000)
8910 printf(_(" --verify-backup verify a cloned node using the \"pg_verifybackup\" utility\n"));
8911 #endif
8912 printf(_(" --without-barman do not clone from Barman even if configured\n"));
8913 printf(_(" --replication-conf-only generate replication configuration for a previously cloned instance\n"));
8914
8915 puts("");
8916
8917 printf(_("STANDBY REGISTER\n"));
8918 puts("");
8919 printf(_(" \"standby register\" registers the standby node.\n"));
8920 puts("");
8921 printf(_(" -F, --force overwrite an existing node record, or if primary connection\n" \
8922 " parameters supplied, create record even if standby offline\n"));
8923 printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional)\n"));
8924 printf(_(" --wait-start=VALUE wait for the standby to start (timeout in seconds, default %i)\n"), DEFAULT_WAIT_START);
8925
8926 printf(_(" --wait-sync[=VALUE] wait for the node record to synchronise to the standby\n" \
8927 " (optional timeout in seconds)\n"));
8928
8929 puts("");
8930
8931 printf(_("STANDBY UNREGISTER\n"));
8932 puts("");
8933 printf(_(" \"standby unregister\" unregisters an inactive standby node.\n"));
8934 puts("");
8935 printf(_(" --node-id ID of node to unregister (optional, used when the node to\n" \
8936 " unregister is offline)\n"));
8937 puts("");
8938
8939 printf(_("STANDBY PROMOTE\n"));
8940 puts("");
8941 printf(_(" \"standby promote\" promotes a standby node to primary.\n"));
8942 puts("");
8943 printf(_(" --dry-run perform checks etc. but don't actually promote the node\n"));
8944 printf(_(" -F, --force ignore warnings and continue anyway\n"));
8945 printf(_(" --siblings-follow have other standbys follow new primary\n"));
8946 puts("");
8947
8948 printf(_("STANDBY FOLLOW\n"));
8949 puts("");
8950 printf(_(" \"standby follow\" instructs a standby node to follow a new primary.\n"));
8951 puts("");
8952 printf(_(" --dry-run perform checks but don't actually follow the new primary\n"));
8953 printf(_(" --upstream-node-id node ID of the new primary\n"));
8954 printf(_(" -W, --wait wait for a primary to appear\n"));
8955 puts("");
8956
8957
8958 printf(_("STANDBY SWITCHOVER\n"));
8959 puts("");
8960 printf(_(" \"standby switchover\" promotes a standby node to primary, and demotes the previous primary to a standby.\n"));
8961 puts("");
8962 printf(_(" --always-promote promote standby even if behind original primary\n"));
8963 printf(_(" --dry-run perform checks etc. but don't actually execute switchover\n"));
8964 printf(_(" -F, --force ignore warnings and continue anyway\n"));
8965 printf(_(" --force-rewind[=VALUE] use \"pg_rewind\" to reintegrate the old primary if necessary\n"));
8966 printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n"));
8967
8968 printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
8969 printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
8970 printf(_(" --repmgrd-no-pause don't pause repmgrd\n"));
8971 printf(_(" --siblings-follow have other standbys follow new primary\n"));
8972
8973 puts("");
8974
8975 printf(_("%s home page: <%s>\n"), "repmgr", REPMGR_URL);
8976
8977 }
8978