1 /*
2  * repmgr-action-standby.c
3  *
4  * Implements standby actions for the repmgr command line utility
5  *
6  * Copyright (c) 2ndQuadrant, 2010-2020
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <sys/stat.h>
23 
24 #include "repmgr.h"
25 #include "dirutil.h"
26 #include "compat.h"
27 #include "controldata.h"
28 
29 #include "repmgr-client-global.h"
30 #include "repmgr-action-standby.h"
31 
32 
33 typedef struct TablespaceDataListCell
34 {
35 	struct TablespaceDataListCell *next;
36 	char	   *name;
37 	char	   *oid;
38 	char	   *location;
39 	/* Optional pointer to a file containing a list of tablespace files to copy from Barman */
40 	FILE	   *fptr;
41 } TablespaceDataListCell;
42 
43 typedef struct TablespaceDataList
44 {
45 	TablespaceDataListCell *head;
46 	TablespaceDataListCell *tail;
47 } TablespaceDataList;
48 
49 
50 typedef struct
51 {
52 	int			reachable_sibling_node_count;
53 	int			reachable_sibling_nodes_with_slot_count;
54 	int			unreachable_sibling_node_count;
55 	int			min_required_wal_senders;
56 	int			min_required_free_slots;
57 } SiblingNodeStats;
58 
59 #define T_SIBLING_NODES_STATS_INITIALIZER { \
60 	0, \
61 	0, \
62 	0, \
63 	0, \
64 	0 \
65 }
66 
67 static PGconn *primary_conn = NULL;
68 static PGconn *source_conn = NULL;
69 
70 static char local_data_directory[MAXPGPATH] = "";
71 
72 static bool upstream_conninfo_found = false;
73 static int	upstream_node_id = UNKNOWN_NODE_ID;
74 
75 static t_conninfo_param_list recovery_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
76 static char recovery_conninfo_str[MAXLEN] = "";
77 static char upstream_repluser[NAMEDATALEN] = "";
78 static char upstream_user[NAMEDATALEN] = "";
79 
80 static int	source_server_version_num = UNKNOWN_SERVER_VERSION_NUM;
81 
82 static t_configfile_list config_files = T_CONFIGFILE_LIST_INITIALIZER;
83 
84 static standy_clone_mode mode = pg_basebackup;
85 
86 /* used by barman mode */
87 static char local_repmgr_tmp_directory[MAXPGPATH] = "";
88 static char datadir_list_filename[MAXLEN] = "";
89 static char barman_command_buf[MAXLEN] = "";
90 
91 /*
92  * To enable "standby clone" to run with lowest possible user
93  * privileges, we'll need to determine which actions need to
94  * be run and which of the available users, which will be one
95  * of the repmgr user, the replication user (if available) or
96  * the superuser (if available).
97  */
98 static t_user_type SettingsUser = REPMGR_USER;
99 
100 static void _do_standby_promote_internal(PGconn *conn);
101 static void _do_create_replication_conf(void);
102 
103 static void check_barman_config(void);
104 static void check_source_server(void);
105 static void check_source_server_via_barman(void);
106 static bool check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error);
107 static void check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn);
108 static void check_recovery_type(PGconn *conn);
109 
110 static void initialise_direct_clone(t_node_info *local_node_record, t_node_info *upstream_node_record);
111 static int	run_basebackup(t_node_info *node_record);
112 static int	run_file_backup(t_node_info *node_record);
113 
114 static void copy_configuration_files(bool delete_after_copy);
115 
116 static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
117 
118 static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
119 static int	get_tablespace_data_barman(char *, TablespaceDataList *);
120 static char *make_barman_ssh_command(char *buf);
121 
122 static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
123 static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
124 
125 static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
126 static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
127 static bool check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
128 
129 static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
130 
131 static t_remote_error_type parse_remote_error(const char *error);
132 static CheckStatus parse_check_status(const char *status_str);
133 
134 static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
135 static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold, t_remote_error_type *remote_error);
136 static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output);
137 static bool parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error);
138 static bool parse_replication_config_owner(const char *node_check_output);
139 static CheckStatus parse_db_connection(const char *db_connection);
140 
141 /*
142  * STANDBY CLONE
143  *
144  * Event(s):
145  *  - standby_clone
146  *
147  * Parameters:
148  *  --upstream-conninfo
149  *  --upstream-node-id
150  *  --no-upstream-connection
151  *  -F/--force
152  *  --dry-run
153  *  -c/--fast-checkpoint
154  *  --copy-external-config-files
155  *  -R/--remote-user
156  *  --replication-user (only required if no upstream record)
157  *  --without-barman
158  *  --replication-conf-only (--recovery-conf-only)
159  *  --verify-backup (PostgreSQL 13 and later)
160  */
161 
162 void
do_standby_clone(void)163 do_standby_clone(void)
164 {
165 	PQExpBufferData event_details;
166 	int			r = 0;
167 
168 	/* dummy node record */
169 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
170 	t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
171 
172 	bool local_data_directory_provided = false;
173 
174 	initialize_conninfo_params(&recovery_conninfo, false);
175 
176 	/*
177 	 * --replication-conf-only provided - we'll handle that separately
178 	 */
179 	if (runtime_options.replication_conf_only == true)
180 	{
181 		return _do_create_replication_conf();
182 	}
183 
184 	/*
185 	 * conninfo params for the actual upstream node (which might be different
186 	 * to the node we're cloning from) to write to recovery.conf
187 	 */
188 
189 	mode = get_standby_clone_mode();
190 
191 	/*
192 	 * Copy the provided data directory; if a configuration file was provided,
193 	 * use the (mandatory) value from that; if -D/--pgdata was provided, use
194 	 * that.
195 	 *
196 	 * Note that barman mode requires -D/--pgdata.
197 	 */
198 
199 	get_node_data_directory(local_data_directory);
200 	if (local_data_directory[0] != '\0')
201 	{
202 		local_data_directory_provided = true;
203 		log_notice(_("destination directory \"%s\" provided"),
204 				   local_data_directory);
205 	}
206 	else
207 	{
208 		/*
209 		 * If a configuration file is provided, repmgr will error out after
210 		 * parsing it if no data directory is provided; this check is for
211 		 * niche use-cases where no configuration file is provided.
212 		 */
213 		log_error(_("no data directory provided"));
214 		log_hint(_("use -D/--pgdata to explicitly specify a data directory"));
215 		exit(ERR_BAD_CONFIG);
216 	}
217 
218 
219 	if (mode == barman)
220 	{
221 		/*
222 		 * Not currently possible to use --verify-backup with Barman
223 		 */
224 		if (runtime_options.verify_backup == true)
225 		{
226 			log_error(_("--verify-backup option cannot be used when cloning from Barman backups"));
227 			exit(ERR_BAD_CONFIG);
228 		}
229 
230 		/*
231 		 * Sanity-check barman connection and installation;
232 		 * this will exit with ERR_BARMAN if problems found.
233 		 */
234 		check_barman_config();
235 	}
236 
237 	init_node_record(&local_node_record);
238 	local_node_record.type = STANDBY;
239 
240 	/*
241 	 * Initialise list of conninfo parameters which will later be used to
242 	 * create the "primary_conninfo" recovery parameter.
243 	 *
244 	 * We'll initialise it with the host settings specified on the command
245 	 * line. As it's possible the standby will be cloned from a node different
246 	 * to its intended upstream, we'll later attempt to fetch the upstream
247 	 * node record and overwrite the values set here with those from the
248 	 * upstream node record (excluding that record's application_name)
249 	 */
250 
251 	copy_conninfo_params(&recovery_conninfo, &source_conninfo);
252 
253 
254 	/* Set the default application name to this node's name */
255 	if (config_file_options.node_id != UNKNOWN_NODE_ID)
256 	{
257 		char		application_name[MAXLEN] = "";
258 
259 		param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
260 
261 		get_conninfo_value(config_file_options.conninfo, "application_name", application_name);
262 		if (strlen(application_name) && strncmp(application_name, config_file_options.node_name, sizeof(config_file_options.node_name)) != 0)
263 		{
264 			log_notice(_("\"application_name\" is set in repmgr.conf but will be replaced by the node name"));
265 		}
266 	}
267 	else
268 	{
269 		/*
270 		 * this will only happen in corner cases where the node is being
271 		 * cloned without a configuration file; fall back to "repmgr" if no
272 		 * application_name provided
273 		 */
274 		char	   *application_name = param_get(&source_conninfo, "application_name");
275 
276 		if (application_name == NULL)
277 			param_set(&recovery_conninfo, "application_name", "repmgr");
278 	}
279 
280 
281 
282 	/*
283 	 * Do some sanity checks on the proposed data directory; if it exists:
284 	 *  - check it's openable
285 	 *  - check if there's an instance running
286 	 *
287 	 * We do this here so the check can be part of a --dry-run.
288 	 */
289 	switch (check_dir(local_data_directory))
290 	{
291 		case DIR_ERROR:
292 			log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
293 			log_detail("%s", strerror(errno));
294 			exit(ERR_BAD_CONFIG);
295 			break;
296 		case DIR_NOENT:
297 			/*
298 			 * directory doesn't exist
299 			 * TODO: in --dry-run mode, attempt to create and delete?
300 			 */
301 			break;
302 		case DIR_EMPTY:
303 			/* Present but empty */
304 			break;
305 		case DIR_NOT_EMPTY:
306 			/* Present but not empty */
307 			if (is_pg_dir(local_data_directory))
308 			{
309 				/* even -F/--force is not enough to overwrite an active directory... */
310 				if (is_pg_running(local_data_directory))
311 				{
312 					log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
313 							  local_data_directory);
314 					log_hint(_("ensure the target data directory does not contain a running PostgreSQL instance"));
315 					exit(ERR_BAD_CONFIG);
316 				}
317 			}
318 			break;
319 		default:
320 			break;
321 	}
322 
323 	/*
324 	 * By default attempt to connect to the source node. This will fail if no
325 	 * connection is possible, unless in Barman mode, in which case we can
326 	 * fall back to connecting to the source node via Barman (if available).
327 	 */
328 	if (runtime_options.no_upstream_connection == false)
329 	{
330 		RecordStatus record_status = RECORD_NOT_FOUND;
331 
332 		/*
333 		 * This connects to the source node and performs sanity checks, also
334 		 * sets "recovery_conninfo_str", "upstream_repluser", "upstream_user" and
335 		 * "upstream_node_id" and creates a connection handle in "source_conn".
336 		 *
337 		 * Will error out if source connection not possible and not in
338 		 * "barman" mode.
339 		 */
340 		check_source_server();
341 
342 		if (runtime_options.verify_backup == true)
343 		{
344 			/*
345 			 * --verify-backup available for PostgreSQL 13 and later
346 			 */
347 			if (PQserverVersion(source_conn) < 130000)
348 			{
349 				log_error(_("--verify-backup available for PostgreSQL 13 and later"));
350 				exit(ERR_BAD_CONFIG);
351 			}
352 		}
353 
354 		/* attempt to retrieve upstream node record */
355 		record_status = get_node_record(source_conn,
356 										upstream_node_id,
357 										&upstream_node_record);
358 
359 		if (record_status != RECORD_FOUND)
360 		{
361 			log_error(_("unable to retrieve record for upstream node %i"),
362 					  upstream_node_id);
363 			exit(ERR_BAD_CONFIG);
364 		}
365 
366 	}
367 	else
368 	{
369 		upstream_node_id = runtime_options.upstream_node_id;
370 	}
371 
372 	/*
373 	 * if --upstream-conninfo was supplied, use that (will overwrite value set
374 	 * by check_source_server(), but that's OK)
375 	 */
376 	if (runtime_options.upstream_conninfo[0] != '\0')
377 	{
378 		strncpy(recovery_conninfo_str, runtime_options.upstream_conninfo, MAXLEN);
379 		upstream_conninfo_found = true;
380 	}
381 	else if (mode == barman && PQstatus(source_conn) != CONNECTION_OK)
382 	{
383 		/*
384 		 * Here we don't have a connection to the upstream node (either
385 		 * because --no-upstream-connection was supplied, or
386 		 * check_source_server() was unable to make a connection, and
387 		 * --upstream-conninfo wasn't supplied.
388 		 *
389 		 * As we're executing in Barman mode we can try and connect via the
390 		 * Barman server to extract the upstream node's conninfo string.
391 		 *
392 		 * To do this we need to extract Barman's conninfo string, replace the
393 		 * database name with the repmgr one (they could well be different)
394 		 * and remotely execute psql.
395 		 *
396 		 * This attempts to set "recovery_conninfo_str".
397 		 */
398 		check_source_server_via_barman();
399 	}
400 
401 	if (recovery_conninfo_str[0] == '\0')
402 	{
403 		log_error(_("unable to determine a connection string to use as \"primary_conninfo\""));
404 		log_hint(_("use \"--upstream-conninfo\" to explicitly provide a value for \"primary_conninfo\""));
405 		if (PQstatus(source_conn) == CONNECTION_OK)
406 			PQfinish(source_conn);
407 		exit(ERR_BAD_CONFIG);
408 	}
409 
410 
411 	if (upstream_conninfo_found == true)
412 	{
413 		/*
414 		 * parse returned upstream conninfo string to recovery
415 		 * primary_conninfo params
416 		 */
417 		char	   *errmsg = NULL;
418 		bool		parse_success = false;
419 
420 		log_verbose(LOG_DEBUG, "parsing upstream conninfo string \"%s\"", recovery_conninfo_str);
421 
422 		/*
423 		 * parse_conninfo_string() here will remove the upstream's
424 		 * `application_name`, if set
425 		 */
426 
427 		parse_success = parse_conninfo_string(recovery_conninfo_str, &recovery_conninfo, &errmsg, true);
428 
429 		if (parse_success == false)
430 		{
431 			log_error(_("unable to parse conninfo string \"%s\" for upstream node"),
432 					  recovery_conninfo_str);
433 			log_detail("%s", errmsg);
434 			if (PQstatus(source_conn) == CONNECTION_OK)
435 				PQfinish(source_conn);
436 			exit(ERR_BAD_CONFIG);
437 		}
438 
439 		if (upstream_repluser[0] != '\0')
440 		{
441 			/* Write the replication user from the node's upstream record */
442 			param_set(&recovery_conninfo, "user", upstream_repluser);
443 		}
444 	}
445 	else
446 	{
447 		/*
448 		 * If no upstream node record found, we'll abort with an error here,
449 		 * unless -F/--force is used, in which case we'll use the parameters
450 		 * provided on the command line (and assume the user knows what
451 		 * they're doing).
452 		 */
453 		if (upstream_node_id == UNKNOWN_NODE_ID)
454 		{
455 			log_error(_("unable to determine upstream node"));
456 			if (PQstatus(source_conn) == CONNECTION_OK)
457 				PQfinish(source_conn);
458 			exit(ERR_BAD_CONFIG);
459 		}
460 
461 		if (!runtime_options.force)
462 		{
463 			log_error(_("no record found for upstream node (upstream_node_id: %i)"),
464 					  upstream_node_id);
465 			log_hint(_("use -F/--force to create \"primary_conninfo\" based on command-line parameters"));
466 
467 			if (PQstatus(source_conn) == CONNECTION_OK)
468 				PQfinish(source_conn);
469 			exit(ERR_BAD_CONFIG);
470 		}
471 	}
472 
473 	/*
474 	 * If copying of external configuration files requested, and any are
475 	 * detected, perform sanity checks
476 	 */
477 	if (PQstatus(source_conn) == CONNECTION_OK && runtime_options.copy_external_config_files == true)
478 	{
479 		PGconn	   *superuser_conn = NULL;
480 		PGconn	   *privileged_conn = NULL;
481 		bool		external_config_files = false;
482 		int			i = 0;
483 
484 		/*
485 		 * Obtain configuration file locations
486 		 *
487 		 * We'll check to see whether the configuration files are in the data
488 		 * directory - if not we'll have to copy them via SSH, if copying
489 		 * requested.
490 		 *
491 		 * This will require superuser permissions, so we'll attempt to
492 		 * connect as -S/--superuser (if provided), otherwise check the
493 		 * current connection user has superuser rights.
494 		 *
495 		 * XXX: if configuration files are symlinks to targets outside the
496 		 * data directory, they won't be copied by pg_basebackup, but we can't
497 		 * tell this from the below query; we'll probably need to add a check
498 		 * for their presence and if missing force copy by SSH
499 		 */
500 
501 		if (SettingsUser == REPMGR_USER)
502 		{
503 			privileged_conn = source_conn;
504 		}
505 		else
506 		{
507 			get_superuser_connection(&source_conn, &superuser_conn, &privileged_conn);
508 		}
509 
510 		if (get_configuration_file_locations(privileged_conn, &config_files) == false)
511 		{
512 			log_notice(_("unable to proceed without establishing configuration file locations"));
513 			PQfinish(source_conn);
514 
515 			if (superuser_conn != NULL)
516 				PQfinish(superuser_conn);
517 
518 			exit(ERR_BAD_CONFIG);
519 		}
520 
521 		/* check if any files actually outside the data directory */
522 		for (i = 0; i < config_files.entries; i++)
523 		{
524 			t_configfile_info *file = config_files.files[i];
525 
526 			if (file->in_data_directory == false)
527 			{
528 				external_config_files = true;
529 				break;
530 			}
531 		}
532 
533 		if (external_config_files == true)
534 		{
535 			int			r;
536 			PQExpBufferData msg;
537 
538 			initPQExpBuffer(&msg);
539 
540 			appendPQExpBuffer(&msg,
541 							  _("external configuration files detected, checking SSH connection to host \"%s\""),
542 							  runtime_options.host);
543 
544 			if (runtime_options.dry_run == true)
545 			{
546 				log_notice("%s", msg.data);
547 			}
548 			else
549 			{
550 				log_verbose(LOG_INFO, "%s", msg.data);
551 			}
552 
553 			termPQExpBuffer(&msg);
554 
555 			r = test_ssh_connection(runtime_options.host, runtime_options.remote_user);
556 
557 			if (r != 0)
558 			{
559 				log_error(_("remote host \"%s\" is not reachable via SSH - unable to copy external configuration files"),
560 						  runtime_options.host);
561 				if (superuser_conn != NULL)
562 					PQfinish(superuser_conn);
563 				PQfinish(source_conn);
564 				exit(ERR_BAD_CONFIG);
565 			}
566 
567 			initPQExpBuffer(&msg);
568 
569 			appendPQExpBuffer(&msg,
570 							  _("SSH connection to host \"%s\" succeeded"),
571 							  runtime_options.host);
572 
573 			if (runtime_options.dry_run == true)
574 			{
575 				log_info("%s", msg.data);
576 			}
577 			else
578 			{
579 				log_verbose(LOG_INFO, "%s", msg.data);
580 			}
581 
582 			termPQExpBuffer(&msg);
583 
584 
585 			/*
586 			 * Here we'll attempt an initial test copy of the detected external
587 			 * files, to detect any issues before we run the base backup.
588 			 *
589 			 * Note this will exit with an error, unless -F/--force supplied.
590 			 *
591 			 * We don't do this during a --dry-run as it may introduce unexpected changes
592 			 * on the local node; during an actual clone operation, any problems with
593 			 * copying files will be detected early and the operation aborted before
594 			 * the actual database cloning commences.
595 			 *
596 			 * TODO: put the files in a temporary directory and move to their final
597 			 * destination once the database has been cloned.
598 			 */
599 
600 			if (runtime_options.dry_run == false)
601 			{
602 				if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
603 				{
604 					/*
605 					 * Files will be placed in the same path as on the source server;
606 					 * don't delete after copying.
607 					 */
608 					copy_configuration_files(false);
609 
610 				}
611 				else
612 				{
613 					/*
614 					 * Files will be placed in the data directory - delete after copying.
615 					 * They'll be copied again later; see TODO above.
616 					 */
617 					copy_configuration_files(true);
618 				}
619 			}
620 		}
621 
622 
623 		if (superuser_conn != NULL)
624 			PQfinish(superuser_conn);
625 	}
626 
627 
628 	if (runtime_options.dry_run == true)
629 	{
630 		/*
631 		 * If replication slots in use, sanity-check whether we can create them
632 		 * with the available user permissions.
633 		 */
634 		if (config_file_options.use_replication_slots == true && PQstatus(source_conn) == CONNECTION_OK)
635 		{
636 			PQExpBufferData msg;
637 			bool success = true;
638 
639 			initPQExpBuffer(&msg);
640 
641 			/*
642 			 * "create_replication_slot()" knows about --dry-run mode and
643 			 * will perform checks but not actually create the slot.
644 			 */
645 			success = create_replication_slot(source_conn,
646 											  local_node_record.slot_name,
647 											  &upstream_node_record,
648 											  &msg);
649 			if (success == false)
650 			{
651 				log_error(_("prerequisites not met for creating a replication slot on upstream node %i"),
652 						  upstream_node_record.node_id);
653 				termPQExpBuffer(&msg);
654 				exit(ERR_BAD_CONFIG);
655 			}
656 			termPQExpBuffer(&msg);
657 		}
658 
659 		if (upstream_node_id != UNKNOWN_NODE_ID)
660 		{
661 			log_notice(_("standby will attach to upstream node %i"), upstream_node_id);
662 		}
663 		else
664 		{
665 			log_warning(_("unable to determine a valid upstream node id"));
666 		}
667 
668 		if (mode == pg_basebackup && runtime_options.fast_checkpoint == false)
669 		{
670 			log_hint(_("consider using the -c/--fast-checkpoint option"));
671 		}
672 
673 		PQfinish(source_conn);
674 
675 		log_info(_("all prerequisites for \"standby clone\" are met"));
676 
677 		exit(SUCCESS);
678 	}
679 
680 	if (mode != barman)
681 	{
682 		initialise_direct_clone(&local_node_record, &upstream_node_record);
683 	}
684 
685 	switch (mode)
686 	{
687 		case pg_basebackup:
688 			log_notice(_("starting backup (using pg_basebackup)..."));
689 			break;
690 		case barman:
691 			log_notice(_("retrieving backup from Barman..."));
692 			break;
693 		default:
694 			/* should never reach here */
695 			log_error(_("unknown clone mode"));
696 	}
697 
698 	if (mode == pg_basebackup)
699 	{
700 		if (runtime_options.fast_checkpoint == false)
701 		{
702 			log_hint(_("this may take some time; consider using the -c/--fast-checkpoint option"));
703 		}
704 	}
705 
706 	switch (mode)
707 	{
708 		case pg_basebackup:
709 			r = run_basebackup(&local_node_record);
710 			break;
711 		case barman:
712 			r = run_file_backup(&local_node_record);
713 			break;
714 		default:
715 			/* should never reach here */
716 			log_error(_("unknown clone mode"));
717 	}
718 
719 	/* If the backup failed then exit */
720 	if (r != SUCCESS)
721 	{
722 		/* If a replication slot was previously created, drop it */
723 		if (config_file_options.use_replication_slots == true)
724 		{
725 			/*
726 			 * In the case where a standby is being cloned from a node other than its
727 			 * intended upstream, We can't be sure of the source node's node_id. This
728 			 * is only required by "drop_replication_slot_if_exists()" to determine
729 			 * from the node's record whether it has a different replication user, and
730 			 * as in this case that would need to be supplied via "--replication-user"
731 			 * it's not a problem.
732 			 */
733 			drop_replication_slot_if_exists(source_conn, UNKNOWN_NODE_ID, local_node_record.slot_name);
734 		}
735 
736 		log_error(_("unable to take a base backup of the primary server"));
737 		log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
738 				 local_data_directory);
739 
740 		PQfinish(source_conn);
741 		exit(r);
742 	}
743 
744 	/*
745 	 * Run pg_verifybackup here if requested, before any alterations are made
746 	 * to the data directory.
747 	 */
748 	if (mode == pg_basebackup && runtime_options.verify_backup == true)
749 	{
750 		PQExpBufferData command;
751 		int r;
752 		struct stat st;
753 
754 		initPQExpBuffer(&command);
755 
756 		make_pg_path(&command, "pg_verifybackup");
757 
758 		/* check command actually exists */
759 		if (stat(command.data, &st) != 0)
760 		{
761 			log_error(_("unable to find expected binary \"%s\""), command.data);
762 			log_detail("%s", strerror(errno));
763 			exit(ERR_BAD_CONFIG);
764 		}
765 
766 		appendPQExpBufferStr(&command, " ");
767 
768 		/* Somewhat inconsistent, but pg_verifybackup doesn't accept a -D option  */
769 		appendShellString(&command,
770 						  local_data_directory);
771 
772 		log_debug("executing:\n  %s", command.data);
773 
774 		r = system(command.data);
775 		termPQExpBuffer(&command);
776 
777 		if (r != 0)
778 		{
779 			log_error(_("unable to verify backup"));
780 			exit(ERR_BAD_BASEBACKUP);
781 		}
782 
783 		log_verbose(LOG_INFO, _("backup successfully verified"));
784 
785 	}
786 
787 
788 	/*
789 	 * If `--copy-external-config-files` was provided, copy any configuration
790 	 * files detected to the appropriate location. Any errors encountered will
791 	 * not be treated as fatal.
792 	 *
793 	 * This won't run in Barman mode as "config_files" is only populated in
794 	 * "initialise_direct_clone()", which isn't called in Barman mode.
795 	 */
796 	if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
797 	{
798 		/*
799 		 * If "--copy-external-config-files=samepath" was used, the files will already
800 		 * have been copied.
801 		 */
802 		if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
803 			copy_configuration_files(false);
804 	}
805 
806 	/* Write the recovery.conf file */
807 
808 	if (create_recovery_file(&local_node_record,
809 							 &recovery_conninfo,
810 							 source_server_version_num,
811 							 local_data_directory,
812 							 true) == false)
813 	{
814 		/* create_recovery_file() will log an error */
815 		if (source_server_version_num >= 120000)
816 		{
817 			log_notice(_("unable to write replication configuration; see preceding error messages"));
818 		}
819 		else
820 		{
821 			log_notice(_("unable to create recovery.conf; see preceding error messages"));
822 		}
823 		log_hint(_("data directory (\"%s\") may need to be cleaned up manually"),
824 				 local_data_directory);
825 
826 		PQfinish(source_conn);
827 		exit(ERR_BAD_CONFIG);
828 	}
829 
830 	switch (mode)
831 	{
832 		case pg_basebackup:
833 			log_notice(_("standby clone (using pg_basebackup) complete"));
834 			break;
835 
836 		case barman:
837 			log_notice(_("standby clone (from Barman) complete"));
838 			break;
839 	}
840 
841 	/*
842 	 * TODO: It might be nice to provide an option to have repmgr start the
843 	 * PostgreSQL server automatically
844 	 */
845 
846 	log_notice(_("you can now start your PostgreSQL server"));
847 
848 	if (config_file_options.service_start_command[0] != '\0')
849 	{
850 		log_hint(_("for example: %s"),
851 				 config_file_options.service_start_command);
852 	}
853 	else if (local_data_directory_provided)
854 	{
855 		log_hint(_("for example: pg_ctl -D %s start"),
856 				 local_data_directory);
857 	}
858 	else
859 	{
860 		log_hint(_("for example: /etc/init.d/postgresql start"));
861 	}
862 
863 	/*
864 	 * XXX forgetting to (re) register the standby is a frequent cause of
865 	 * error; we should consider having repmgr automatically register the
866 	 * standby, either by default with an option "--no-register", or an option
867 	 * "--register".
868 	 *
869 	 * Note that "repmgr standby register" requires the standby to be running
870 	 * - if not, and we just update the node record, we'd have an incorrect
871 	 * representation of the replication cluster. Best combined with an
872 	 * automatic start of the server (see note above)
873 	 */
874 
875 	/*
876 	 * Check for an existing node record, and output the appropriate command
877 	 * for registering or re-registering.
878 	 */
879 	{
880 		t_node_info node_record = T_NODE_INFO_INITIALIZER;
881 		RecordStatus record_status = RECORD_NOT_FOUND;
882 
883 		record_status = get_node_record(primary_conn,
884 										config_file_options.node_id,
885 										&node_record);
886 
887 		if (record_status == RECORD_FOUND)
888 		{
889 			log_hint(_("after starting the server, you need to re-register this standby with \"repmgr standby register --force\" to update the existing node record"));
890 		}
891 		else
892 		{
893 			log_hint(_("after starting the server, you need to register this standby with \"repmgr standby register\""));
894 
895 		}
896 	}
897 
898 
899 	/* Log the event */
900 
901 	initPQExpBuffer(&event_details);
902 
903 	/* Add details about relevant runtime options used */
904 	appendPQExpBuffer(&event_details,
905 					  _("cloned from host \"%s\", port %s"),
906 					  runtime_options.host,
907 					  runtime_options.port);
908 
909 	appendPQExpBufferStr(&event_details,
910 						 _("; backup method: "));
911 
912 	switch (mode)
913 	{
914 		case pg_basebackup:
915 			appendPQExpBufferStr(&event_details, "pg_basebackup");
916 			break;
917 		case barman:
918 			appendPQExpBufferStr(&event_details, "barman");
919 			break;
920 	}
921 
922 	appendPQExpBuffer(&event_details,
923 					  _("; --force: %s"),
924 					  runtime_options.force ? "Y" : "N");
925 
926 	create_event_notification(primary_conn,
927 							  &config_file_options,
928 							  config_file_options.node_id,
929 							  "standby_clone",
930 							  true,
931 							  event_details.data);
932 
933 	if (primary_conn != source_conn && PQstatus(primary_conn) == CONNECTION_OK)
934 		PQfinish(primary_conn);
935 
936 	if (PQstatus(source_conn) == CONNECTION_OK)
937 		PQfinish(source_conn);
938 
939 	exit(r);
940 }
941 
942 
943 void
check_barman_config(void)944 check_barman_config(void)
945 {
946 	PQExpBufferData command;
947 	bool		command_ok = false;
948 
949 	/*
950 	 * Check that there is at least one valid backup
951 	 */
952 
953 	log_info(_("connecting to Barman server to verify backup for \"%s\""), config_file_options.barman_server);
954 
955 	initPQExpBuffer(&command);
956 
957 	appendPQExpBuffer(&command, "%s show-backup %s latest > /dev/null",
958 					  make_barman_ssh_command(barman_command_buf),
959 					  config_file_options.barman_server);
960 
961 	command_ok = local_command(command.data, NULL);
962 
963 	if (command_ok == false)
964 	{
965 		log_error(_("no valid backup for server \"%s\" was found in the Barman catalogue"),
966 				  config_file_options.barman_server);
967 		log_detail(_("command executed was:\n  %s"), command.data),
968 		log_hint(_("refer to the Barman documentation for more information"));
969 
970 		termPQExpBuffer(&command);
971 		exit(ERR_BARMAN);
972 	}
973 	else if (runtime_options.dry_run == true)
974 	{
975 		log_info(_("valid backup for server \"%s\" found in the Barman catalogue"),
976 				 config_file_options.barman_server);
977 	}
978 
979 	termPQExpBuffer(&command);
980 
981 	/*
982 	 * Attempt to create data directory (unless --dry-run specified,
983 	 * in which case do nothing; warnings will be emitted elsewhere about
984 	 * any issues with the data directory)
985 	 */
986 	if (runtime_options.dry_run == false)
987 	{
988 		if (!create_pg_dir(local_data_directory, runtime_options.force))
989 		{
990 			log_error(_("unable to use directory %s"),
991 					  local_data_directory);
992 			log_hint(_("use -F/--force option to force this directory to be overwritten"));
993 			exit(ERR_BAD_CONFIG);
994 		}
995 
996 		/*
997 		 * Create the local repmgr subdirectory
998 		 */
999 
1000 		maxlen_snprintf(local_repmgr_tmp_directory,
1001 						"%s/repmgr", local_data_directory);
1002 
1003 		maxlen_snprintf(datadir_list_filename,
1004 						"%s/data.txt", local_repmgr_tmp_directory);
1005 
1006 		if (!create_pg_dir(local_repmgr_tmp_directory, runtime_options.force))
1007 		{
1008 			log_error(_("unable to create directory \"%s\""),
1009 					  local_repmgr_tmp_directory);
1010 
1011 			exit(ERR_BAD_CONFIG);
1012 		}
1013 	}
1014 
1015 	/*
1016 	 * Fetch server parameters from Barman
1017 	 */
1018 	log_info(_("connecting to Barman server to fetch server parameters"));
1019 
1020 	initPQExpBuffer(&command);
1021 
1022 	if (runtime_options.dry_run == true)
1023 	{
1024 		appendPQExpBuffer(&command, "%s show-server %s > /dev/null",
1025 						  make_barman_ssh_command(barman_command_buf),
1026 						  config_file_options.barman_server);
1027 	}
1028 	else
1029 	{
1030 		appendPQExpBuffer(&command, "%s show-server %s > %s/show-server.txt",
1031 						  make_barman_ssh_command(barman_command_buf),
1032 						  config_file_options.barman_server,
1033 						  local_repmgr_tmp_directory);
1034 	}
1035 
1036 	command_ok = local_command(command.data, NULL);
1037 
1038 	if (command_ok == false)
1039 	{
1040 		log_error(_("unable to fetch server parameters from Barman server"));
1041 		log_detail(_("command executed was:\n  %s"), command.data),
1042 		termPQExpBuffer(&command);
1043 		exit(ERR_BARMAN);
1044 	}
1045 	else if (runtime_options.dry_run == true)
1046 	{
1047 		log_info(_("server parameters were successfully fetched from Barman server"));
1048 	}
1049 
1050 	termPQExpBuffer(&command);
1051 }
1052 
1053 
1054 /*
1055  * _do_create_replication_conf()
1056  *
1057  * Create replication configuration for a previously cloned instance.
1058  *
1059  * Prerequisites:
1060  *
1061  * - data directory must be provided, either explicitly or via
1062  *   repmgr.conf
1063  * - the instance should not be running
1064  * - an existing "recovery.conf" file can only be overwritten with
1065  *   -F/--force (Pg11 and earlier)
1066  * - connection parameters for an existing, running node must be provided
1067  * - --upstream-node-id, if provided, will be "primary_conninfo",
1068  *   otherwise primary node id; node must exist; unless -F/--force
1069  *   provided, must be active and connection possible
1070  * - if replication slots in use, create (respect --dry-run)
1071  *
1072  * not compatible with --no-upstream-connection
1073  *
1074  */
1075 
1076 static void
_do_create_replication_conf(void)1077 _do_create_replication_conf(void)
1078 {
1079 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
1080 	t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
1081 
1082 	RecordStatus record_status = RECORD_NOT_FOUND;
1083 	char		recovery_file_path[MAXPGPATH + sizeof(RECOVERY_COMMAND_FILE)] = "";
1084 	struct stat st;
1085 	bool		node_is_running = false;
1086 	bool		slot_creation_required = false;
1087 	PGconn	   *upstream_conn = NULL;
1088 	PGconn	   *upstream_repl_conn = NULL;
1089 
1090 	get_node_data_directory(local_data_directory);
1091 
1092 	if (local_data_directory[0] == '\0')
1093 	{
1094 		log_error(_("no data directory provided"));
1095 		log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
1096 		exit(ERR_BAD_CONFIG);
1097 	}
1098 
1099 
1100 	/* check connection */
1101 	source_conn = establish_db_connection_by_params(&source_conninfo, true);
1102 
1103 	/* Verify that source is a supported server version */
1104 	(void) check_server_version(source_conn, "source node", true, NULL);
1105 
1106 	/*
1107 	 * Do some sanity checks on the data directory to make sure
1108 	 * it contains a valid but dormant instance
1109 	 */
1110 	switch (check_dir(local_data_directory))
1111 	{
1112 		case DIR_ERROR:
1113 			log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
1114 			log_detail("%s", strerror(errno));
1115 			PQfinish(source_conn);
1116 			exit(ERR_BAD_CONFIG);
1117 			break;
1118 		case DIR_NOENT:
1119 			log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
1120 			PQfinish(source_conn);
1121 			exit(ERR_BAD_CONFIG);
1122 			break;
1123 		case DIR_EMPTY:
1124 			log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
1125 			PQfinish(source_conn);
1126 			exit(ERR_BAD_CONFIG);
1127 			break;
1128 		case DIR_NOT_EMPTY:
1129 			/* Present but not empty */
1130 			if (!is_pg_dir(local_data_directory))
1131 			{
1132 				log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
1133 				PQfinish(source_conn);
1134 				exit(ERR_BAD_CONFIG);
1135 			}
1136 
1137 			if (is_pg_running(local_data_directory))
1138 			{
1139 				if (runtime_options.force == false)
1140 				{
1141 					log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
1142 							  local_data_directory);
1143 
1144 					if (PQserverVersion(source_conn) >= 120000)
1145 					{
1146 						log_hint(_("use -F/--force to create replication configuration anyway"));
1147 					}
1148 					else
1149 					{
1150 						log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
1151 					}
1152 
1153 					exit(ERR_BAD_CONFIG);
1154 				}
1155 
1156 				node_is_running = true;
1157 
1158 				if (runtime_options.dry_run == true)
1159 				{
1160 					if (PQserverVersion(source_conn) >= 120000)
1161 					{
1162 						log_warning(_("replication configuration would be created in an active data directory"));
1163 					}
1164 					else
1165 					{
1166 						log_warning(_("\"recovery.conf\" would be created in an active data directory"));
1167 					}
1168 				}
1169 				else
1170 				{
1171 					if (PQserverVersion(source_conn) >= 120000)
1172 					{
1173 						log_warning(_("creating replication configuration in an active data directory"));
1174 					}
1175 					else
1176 					{
1177 						log_warning(_("creating \"recovery.conf\" in an active data directory"));
1178 					}
1179 				}
1180 			}
1181 			break;
1182 		default:
1183 			break;
1184 	}
1185 
1186 
1187 	/* determine node for primary_conninfo */
1188 
1189 	if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
1190 	{
1191 		upstream_node_id = runtime_options.upstream_node_id;
1192 	}
1193 	else
1194 	{
1195 		/* if --upstream-node-id not specifically supplied, get primary node id */
1196 		upstream_node_id = get_primary_node_id(source_conn);
1197 
1198 		if (upstream_node_id == NODE_NOT_FOUND)
1199 		{
1200 			log_error(_("unable to determine primary node for this replication cluster"));
1201 			PQfinish(source_conn);
1202 			exit(ERR_BAD_CONFIG);
1203 		}
1204 
1205 		log_debug("primary node determined as: %i", upstream_node_id);
1206 	}
1207 
1208 	/* attempt to retrieve upstream node record */
1209 	record_status = get_node_record(source_conn,
1210 									upstream_node_id,
1211 									&upstream_node_record);
1212 
1213 	if (record_status != RECORD_FOUND)
1214 	{
1215 		log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
1216 
1217 		if (record_status == RECORD_ERROR)
1218 		{
1219 			log_detail("%s", PQerrorMessage(source_conn));
1220 		}
1221 
1222 		exit(ERR_BAD_CONFIG);
1223 	}
1224 
1225 	/* attempt to retrieve local node record */
1226 	record_status = get_node_record(source_conn,
1227 									config_file_options.node_id,
1228 									&local_node_record);
1229 
1230 	if (record_status != RECORD_FOUND)
1231 	{
1232 		log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
1233 
1234 		if (record_status == RECORD_ERROR)
1235 		{
1236 			log_detail("%s", PQerrorMessage(source_conn));
1237 		}
1238 		else
1239 		{
1240 			log_hint(_("standby must be registered before replication can be configured"));
1241 		}
1242 
1243 		exit(ERR_BAD_CONFIG);
1244 	}
1245 
1246 	PQfinish(source_conn);
1247 
1248 
1249 	/* connect to upstream (which could be different to source) */
1250 
1251 	upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
1252 	if (PQstatus(upstream_conn) != CONNECTION_OK)
1253 	{
1254 		log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
1255 				  upstream_node_record.node_name,
1256  				  upstream_node_id);
1257 		exit(ERR_BAD_CONFIG);
1258 	}
1259 
1260 	/* Set the application name to this node's name */
1261 	if (config_file_options.node_name[0] != '\0')
1262 		param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
1263 
1264 	/* Set the replication user from the primary node record */
1265 	param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
1266 
1267 	initialize_conninfo_params(&recovery_conninfo, false);
1268 
1269 	/* We ignore any application_name set in the primary's conninfo */
1270 	parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
1271 
1272 	/* check that a replication connection can be made (--force = override) */
1273 	upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
1274 
1275 	if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
1276 	{
1277 		if (runtime_options.force == false)
1278 		{
1279 			log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
1280 					  upstream_node_record.node_name,
1281 					  upstream_node_id);
1282 			PQfinish(upstream_conn);
1283 			exit(ERR_BAD_CONFIG);
1284 		}
1285 	}
1286 
1287 	/* if replication slots are in use, perform some checks */
1288 	if (config_file_options.use_replication_slots == true)
1289 	{
1290 		PQExpBufferData msg;
1291 		t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
1292 
1293 		record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
1294 
1295 		/* check if replication slot exists*/
1296 		if (record_status == RECORD_FOUND)
1297 		{
1298 			if (slot_info.active == true)
1299 			{
1300 				initPQExpBuffer(&msg);
1301 
1302 				appendPQExpBuffer(&msg,
1303 								  _("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
1304 								  local_node_record.slot_name,
1305 								  upstream_node_record.node_name,
1306 								  upstream_node_id);
1307 
1308 				if (runtime_options.force == false && runtime_options.dry_run == false)
1309 				{
1310 					log_error("%s", msg.data);
1311 					log_hint(_("use -F/--force to continue anyway"));
1312 					termPQExpBuffer(&msg);
1313 					PQfinish(upstream_conn);
1314 					exit(ERR_BAD_CONFIG);
1315 				}
1316 
1317 				log_warning("%s", msg.data);
1318 				termPQExpBuffer(&msg);
1319 			}
1320 			else
1321 			{
1322 				log_info(_("an inactive replication slot for this node exists on the upstream node"));
1323 			}
1324 		}
1325 		/* if not, if check one can and should be created */
1326 		else
1327 		{
1328 			get_node_replication_stats(upstream_conn, &upstream_node_record);
1329 
1330 		    if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
1331 			{
1332 				slot_creation_required = true;
1333 			}
1334 			else
1335 			{
1336 				initPQExpBuffer(&msg);
1337 
1338 				appendPQExpBuffer(&msg,
1339 								  _("insufficient free replication slots on upstream node \"%s\" (ID: %i)"),
1340 								  upstream_node_record.node_name,
1341 								  upstream_node_id);
1342 
1343 				if (runtime_options.force == false && runtime_options.dry_run == false)
1344 				{
1345 					log_error("%s", msg.data);
1346 					log_hint(_("use -F/--force to continue anyway"));
1347 					termPQExpBuffer(&msg);
1348 					PQfinish(upstream_conn);
1349 					exit(ERR_BAD_CONFIG);
1350 				}
1351 
1352 				log_warning("%s", msg.data);
1353 				termPQExpBuffer(&msg);
1354 			}
1355 		}
1356 	}
1357 
1358 	/* check if recovery.conf exists (Pg11 and earlier only) */
1359 	if (PQserverVersion(upstream_conn) < 120000)
1360 	{
1361 		snprintf(recovery_file_path, sizeof(recovery_file_path),
1362 				 "%s/%s",
1363 				 local_data_directory,
1364 				 RECOVERY_COMMAND_FILE);
1365 
1366 		if (stat(recovery_file_path, &st) == -1)
1367 		{
1368 			if (errno != ENOENT)
1369 			{
1370 				log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
1371 						  local_data_directory);
1372 				log_detail("%s", strerror(errno));
1373 			exit(ERR_BAD_CONFIG);
1374 			}
1375 		}
1376 		else
1377 		{
1378 			if (runtime_options.force == false)
1379 			{
1380 				log_error(_("\"recovery.conf\" already exists in \"%s\""),
1381 						  local_data_directory);
1382 				log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
1383 				exit(ERR_BAD_CONFIG);
1384 			}
1385 
1386 			if (runtime_options.dry_run == true)
1387 			{
1388 				log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
1389 			}
1390 			else
1391 			{
1392 				log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
1393 			}
1394 		}
1395 	}
1396 
1397 	if (runtime_options.dry_run == true)
1398 	{
1399 		char		recovery_conf_contents[MAXLEN] = "";
1400 		create_recovery_file(&local_node_record,
1401 							 &recovery_conninfo,
1402 							 PQserverVersion(upstream_conn),
1403 							 recovery_conf_contents,
1404 							 false);
1405 
1406 		if (PQserverVersion(upstream_conn) >= 120000)
1407 		{
1408 			log_info(_("following items would be added to \"postgresql.auto.conf\" in \"%s\""), local_data_directory);
1409 		}
1410 		else
1411 		{
1412 			log_info(_("would create \"recovery.conf\" file in \"%s\""), local_data_directory);
1413 		}
1414 
1415 		log_detail(_("\n%s"), recovery_conf_contents);
1416 	}
1417 	else
1418 	{
1419 		if (!create_recovery_file(&local_node_record,
1420 								  &recovery_conninfo,
1421 								  PQserverVersion(upstream_conn),
1422 								  local_data_directory,
1423 								  true))
1424 		{
1425 			if (PQserverVersion(upstream_conn) >= 120000)
1426 			{
1427 				log_error(_("unable to write replication configuration to \"postgresql.auto.conf\""));
1428 			}
1429 			else
1430 			{
1431 				log_error(_("unable to create \"recovery.conf\""));
1432 			}
1433 		}
1434 		else
1435 		{
1436 			if (PQserverVersion(upstream_conn) >= 120000)
1437 			{
1438 				log_notice(_("replication configuration written to \"postgresql.auto.conf\""));
1439 			}
1440 			else
1441 			{
1442 				log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
1443 			}
1444 
1445 			if (node_is_running == true)
1446 			{
1447 				log_hint(_("node must be restarted for the new file to take effect"));
1448 			}
1449 		}
1450 	}
1451 
1452 	/* Pg12 and later: add standby.signal, if not already there */
1453 	if (PQserverVersion(upstream_conn) >= 120000)
1454 	{
1455 		if (runtime_options.dry_run == true)
1456 		{
1457 			log_info(_("would write \"standby.signal\" file"));
1458 
1459 		}
1460 		else
1461 		{
1462 			if (write_standby_signal() == false)
1463 			{
1464 				log_error(_("unable to write \"standby.signal\" file"));
1465 			}
1466 		}
1467 	}
1468 
1469 	/* add replication slot, if required */
1470 	if (slot_creation_required == true)
1471 	{
1472 		PQExpBufferData msg;
1473 		initPQExpBuffer(&msg);
1474 
1475 		if (runtime_options.dry_run == true)
1476 		{
1477 			/*
1478 			 * In --dry-run mode this will check availability
1479 			 * of a user who can create replication slots.
1480 			 */
1481 			// XXX check return value
1482 			create_replication_slot(upstream_conn,
1483 									local_node_record.slot_name,
1484 									NULL,
1485 									&msg);
1486 			log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
1487 					 local_node_record.slot_name,
1488 					 upstream_node_record.node_name,
1489 					 upstream_node_id);
1490 		}
1491 		else
1492 		{
1493 
1494 			if (create_replication_slot(upstream_conn,
1495 										local_node_record.slot_name,
1496 										NULL,
1497 										&msg) == false)
1498 			{
1499 				log_error("%s", msg.data);
1500 				PQfinish(upstream_conn);
1501 				termPQExpBuffer(&msg);
1502 				exit(ERR_BAD_CONFIG);
1503 			}
1504 
1505 
1506 			log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
1507 					   local_node_record.slot_name,
1508 					   upstream_node_record.node_name,
1509 					   upstream_node_id);
1510 		}
1511 		termPQExpBuffer(&msg);
1512 
1513 	}
1514 
1515 
1516 	PQfinish(upstream_conn);
1517 
1518 	return;
1519 }
1520 
1521 
1522 /*
1523  * do_standby_register()
1524  *
1525  * Event(s):
1526  *  - standby_register
1527  *  - standby_register_sync
1528  */
1529 /*  XXX check --upstream-node-id works when re-registering */
1530 
1531 void
do_standby_register(void)1532 do_standby_register(void)
1533 {
1534 	PGconn	   *conn = NULL;
1535 	PGconn	   *primary_conn = NULL;
1536 
1537 	bool		record_created = false;
1538 	t_node_info node_record = T_NODE_INFO_INITIALIZER;
1539 	RecordStatus record_status = RECORD_NOT_FOUND;
1540 
1541 	PQExpBufferData details;
1542 
1543 	/* so we can pass info about the primary to event notification scripts */
1544 	t_event_info event_info = T_EVENT_INFO_INITIALIZER;
1545 	t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
1546 	int primary_node_id = UNKNOWN_NODE_ID;
1547 
1548 	bool		dry_run_ok = true;
1549 
1550 	log_info(_("connecting to local node \"%s\" (ID: %i)"),
1551 			 config_file_options.node_name,
1552 			 config_file_options.node_id);
1553 
1554 	conn = establish_db_connection_quiet(config_file_options.conninfo);
1555 
1556 	/*
1557 	 * If unable to connect, and --force not provided, wait up to --wait-start
1558 	 * seconds (default: 0) for the node to become reachable.
1559 	 *
1560 	 * Not that if --force provided, we don't wait for the node to start, as
1561 	 * the normal use case will be re-registering an existing node, or
1562 	 * registering an inactive/not-yet-extant one; we'll do the
1563 	 * error handling for those cases in the next code block
1564 	 */
1565 	if (PQstatus(conn) != CONNECTION_OK && runtime_options.force == false)
1566 	{
1567 		bool		conn_ok = false;
1568 		int			timer = 0;
1569 
1570 		for (;;)
1571 		{
1572 			if (timer == runtime_options.wait_start)
1573 				break;
1574 
1575 			sleep(1);
1576 
1577 			log_verbose(LOG_INFO, _("%i of %i connection attempts"),
1578 						timer + 1,
1579 						runtime_options.wait_start);
1580 
1581 			conn = establish_db_connection_quiet(config_file_options.conninfo);
1582 
1583 			if (PQstatus(conn) == CONNECTION_OK)
1584 			{
1585 				conn_ok = true;
1586 				break;
1587 			}
1588 
1589 			timer++;
1590 		}
1591 
1592 		if (conn_ok == true)
1593 		{
1594 			log_info(_("connected to local node \"%s\" (ID: %i) after %i seconds"),
1595 					 config_file_options.node_name,
1596 					 config_file_options.node_id,
1597 					 timer);
1598 		}
1599 	}
1600 
1601 	/*
1602 	 * If still unable to connect, continue only if -F/--force provided,
1603 	 * and primary connection parameters provided.
1604 	 */
1605 	if (PQstatus(conn) != CONNECTION_OK)
1606 	{
1607 		if (runtime_options.force == false)
1608 		{
1609 			log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
1610 					  config_file_options.node_name,
1611 					  config_file_options.node_id);
1612 			log_detail("\n%s", PQerrorMessage(conn));
1613 			log_hint(_("to register a standby which is not running, provide primary connection parameters and use option -F/--force"));
1614 
1615 			exit(ERR_BAD_CONFIG);
1616 		}
1617 
1618 		if (runtime_options.connection_param_provided == false)
1619 		{
1620 			log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
1621 					  config_file_options.node_name,
1622 					  config_file_options.node_id);
1623 			log_hint(_("to register a standby which is not running, additionally provide the primary connection parameters"));
1624 			exit(ERR_BAD_CONFIG);
1625 		}
1626 	}
1627 	/* connection OK - check this is actually a standby */
1628 	else
1629 	{
1630 		if (runtime_options.connection_param_provided)
1631 		{
1632 			log_warning(_("database connection parameters not required when the standby to be registered is running"));
1633 			log_detail(_("repmgr uses the \"conninfo\" parameter in \"repmgr.conf\" to connect to the standby"));
1634 		}
1635 		check_recovery_type(conn);
1636 	}
1637 
1638 	/* check if there is a primary in this cluster */
1639 	log_info(_("connecting to primary database"));
1640 
1641 	/* Normal case - we can connect to the local node */
1642 	if (PQstatus(conn) == CONNECTION_OK)
1643 	{
1644 		primary_conn = get_primary_connection(conn, &primary_node_id, NULL);
1645 	}
1646 
1647 	/*
1648 	 * otherwise user is forcing a registration of a (potentially) inactive (or
1649 	 * not-yet-extant) node and must have supplied primary connection info
1650 	 */
1651 	else
1652 	{
1653 		primary_conn = establish_db_connection_by_params(&source_conninfo, false);
1654 	}
1655 
1656 	/*
1657 	 * no amount of --force will make it possible to register the standby
1658 	 * without a primary server to connect to
1659 	 */
1660 	if (PQstatus(primary_conn) != CONNECTION_OK)
1661 	{
1662 		log_error(_("unable to connect to the primary database"));
1663 		log_hint(_("a primary node must be configured before registering a standby node"));
1664 		exit(ERR_BAD_CONFIG);
1665 	}
1666 
1667 	/*
1668 	 * Populate "event_info" with info about the primary for event notifications
1669 	 */
1670 	record_status = get_node_record(primary_conn,
1671 									primary_node_id,
1672 									&primary_node_record);
1673 	event_info.node_id = primary_node_id;
1674 	event_info.node_name = primary_node_record.node_name;
1675 	event_info.conninfo_str = primary_node_record.conninfo;
1676 
1677 	/*
1678 	 * Verify that standby and primary are supported and compatible server
1679 	 * versions
1680 	 *
1681 	 * If the user is registering an inactive standby, we'll trust they know
1682 	 * what they're doing
1683 	 */
1684 	if (PQstatus(conn) == CONNECTION_OK)
1685 	{
1686 		check_primary_standby_version_match(conn, primary_conn);
1687 	}
1688 
1689 
1690 	/*
1691 	 * Check that an active node with the same node_name doesn't exist already
1692 	 */
1693 
1694 	record_status = get_node_record_by_name(primary_conn,
1695 											config_file_options.node_name,
1696 											&node_record);
1697 
1698 	if (record_status == RECORD_FOUND)
1699 	{
1700 		if (node_record.active == true && node_record.node_id != config_file_options.node_id)
1701 		{
1702 			log_error(_("node %i exists already with node_name \"%s\""),
1703 					  node_record.node_id,
1704 					  config_file_options.node_name);
1705 			PQfinish(primary_conn);
1706 			if (PQstatus(conn) == CONNECTION_OK)
1707 				PQfinish(conn);
1708 			exit(ERR_BAD_CONFIG);
1709 		}
1710 	}
1711 
1712 	/* Check if node record exists */
1713 
1714 	record_status = get_node_record(primary_conn,
1715 									config_file_options.node_id,
1716 									&node_record);
1717 
1718 	if (record_status == RECORD_FOUND && !runtime_options.force)
1719 	{
1720 		log_error(_("node %i is already registered"),
1721 				  config_file_options.node_id);
1722 		log_hint(_("use option -F/--force to overwrite an existing node record"));
1723 		PQfinish(primary_conn);
1724 		if (PQstatus(conn) == CONNECTION_OK)
1725 			PQfinish(conn);
1726 		exit(ERR_BAD_CONFIG);
1727 	}
1728 
1729 	/*
1730 	 * If an upstream node is defined, check if that node exists and is active.
1731 	 *
1732 	 * If it doesn't exist, and --force set, create a minimal inactive record,
1733 	 * in the assumption that the user knows what they are doing (usually some kind
1734 	 * of provisioning where multiple servers are created in parallel) and will
1735 	 * create the active record later.
1736 	 */
1737 	if (runtime_options.upstream_node_id != NO_UPSTREAM_NODE)
1738 	{
1739 		RecordStatus upstream_record_status = RECORD_NOT_FOUND;
1740 		t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
1741 
1742 		if (runtime_options.upstream_node_id == config_file_options.node_id)
1743 		{
1744 			log_error(_("provided node ID for --upstream-node-id (%i) is the same as the configured local node ID (%i)"),
1745 					  runtime_options.upstream_node_id,
1746 					  config_file_options.node_id);
1747 			PQfinish(primary_conn);
1748 			if (PQstatus(conn) == CONNECTION_OK)
1749 				PQfinish(conn);
1750 			exit(ERR_BAD_CONFIG);
1751 		}
1752 
1753 		upstream_record_status = get_node_record(primary_conn,
1754 												 runtime_options.upstream_node_id,
1755 												 &upstream_node_record);
1756 
1757 		/* create placeholder upstream record if -F/--force set */
1758 		if (upstream_record_status != RECORD_FOUND)
1759 		{
1760 			t_node_info placeholder_upstream_node_record = T_NODE_INFO_INITIALIZER;
1761 
1762 			if (!runtime_options.force)
1763 			{
1764 				log_error(_("no record found for upstream node %i"),
1765 						  runtime_options.upstream_node_id);
1766 				/* footgun alert - only do this if you know what you're doing */
1767 				log_hint(_("use option -F/--force to create a dummy upstream record"));
1768 				PQfinish(primary_conn);
1769 				if (PQstatus(conn) == CONNECTION_OK)
1770 					PQfinish(conn);
1771 				exit(ERR_BAD_CONFIG);
1772 			}
1773 
1774 			log_notice(_("creating placeholder record for upstream node %i"),
1775 					   runtime_options.upstream_node_id);
1776 
1777 			placeholder_upstream_node_record.node_id = runtime_options.upstream_node_id;
1778 			placeholder_upstream_node_record.type = STANDBY;
1779 			placeholder_upstream_node_record.upstream_node_id = NO_UPSTREAM_NODE;
1780 			strncpy(placeholder_upstream_node_record.conninfo, runtime_options.upstream_conninfo, MAXLEN);
1781 			placeholder_upstream_node_record.active = false;
1782 
1783 			record_created = create_node_record(primary_conn,
1784 												"standby register",
1785 												&placeholder_upstream_node_record);
1786 
1787 			/*
1788 			 * It's possible, in the kind of scenario this functionality is
1789 			 * intended to support, that there's a race condition where the
1790 			 * node's actual record gets inserted, causing the insert of the
1791 			 * placeholder record to fail. If this is the case, we don't worry
1792 			 * about this insert failing; if not we bail out.
1793 			 *
1794 			 * TODO: teach create_node_record() to use ON CONFLICT DO NOTHING
1795 			 * for 9.5 and later.
1796 			 */
1797 			if (record_created == false)
1798 			{
1799 				upstream_record_status = get_node_record(primary_conn,
1800 														 runtime_options.upstream_node_id,
1801 														 &placeholder_upstream_node_record);
1802 				if (upstream_record_status != RECORD_FOUND)
1803 				{
1804 					log_error(_("unable to create placeholder record for upstream node %i"),
1805 							  runtime_options.upstream_node_id);
1806 					PQfinish(primary_conn);
1807 					if (PQstatus(conn) == CONNECTION_OK)
1808 						PQfinish(conn);
1809 					exit(ERR_BAD_CONFIG);
1810 				}
1811 
1812 				log_info(_("a record for upstream node %i was already created"),
1813 						 runtime_options.upstream_node_id);
1814 			}
1815 		}
1816 		else if (node_record.active == false)
1817 		{
1818 			/*
1819 			 * upstream node is inactive and --force not supplied - refuse to
1820 			 * register
1821 			 */
1822 			if (!runtime_options.force)
1823 			{
1824 				log_error(_("record for upstream node %i is marked as inactive"),
1825 						  runtime_options.upstream_node_id);
1826 				log_hint(_("use option -F/--force to register a standby with an inactive upstream node"));
1827 				PQfinish(primary_conn);
1828 				if (PQstatus(conn) == CONNECTION_OK)
1829 					PQfinish(conn);
1830 				exit(ERR_BAD_CONFIG);
1831 			}
1832 
1833 			/*
1834 			 * user is using the --force - notify about the potential footgun
1835 			 */
1836 			log_notice(_("registering node %i with inactive upstream node %i"),
1837 					   config_file_options.node_id,
1838 					   runtime_options.upstream_node_id);
1839 		}
1840 		/* check upstream node is accessible and this node is connected */
1841 		else
1842 		{
1843 			PGconn	   *upstream_conn = NULL;
1844 
1845 			upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
1846 
1847 			if (PQstatus(upstream_conn) != CONNECTION_OK)
1848 			{
1849 				if (!runtime_options.force)
1850 				{
1851 					log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
1852 							  upstream_node_record.node_name,
1853 							  upstream_node_record.node_id);
1854 					log_hint(_("use -F/--force to continue anyway"));
1855 					PQfinish(primary_conn);
1856 					if (PQstatus(conn) == CONNECTION_OK)
1857 						PQfinish(conn);
1858 					exit(ERR_BAD_CONFIG);
1859 				}
1860 
1861 				log_warning(_("unable to connect to upstream node \"%s\" (ID: %i) but continuing anyway"),
1862 							upstream_node_record.node_name,
1863 							upstream_node_record.node_id);
1864 			}
1865 			else
1866 			{
1867 				/* check our standby is connected */
1868 				if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
1869 				{
1870 					log_verbose(LOG_INFO, _("local node is attached to specified upstream node %i"), runtime_options.upstream_node_id);
1871 				}
1872 				else
1873 				{
1874 					if (!runtime_options.force)
1875 					{
1876 						log_error(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
1877 								  upstream_node_record.node_name,
1878 								  upstream_node_record.node_id);
1879 
1880 						log_detail(_("no record for application name \"%s\" found in \"pg_stat_replication\""),
1881 								   config_file_options.node_name);
1882 						log_hint(_("use -F/--force to continue anyway"));
1883 						PQfinish(primary_conn);
1884 						if (PQstatus(conn) == CONNECTION_OK)
1885 							PQfinish(conn);
1886 						exit(ERR_BAD_CONFIG);
1887 					}
1888 					log_warning(_("this node does not appear to be attached to upstream node \"%s\" (ID: %i)"),
1889 								upstream_node_record.node_name,
1890 								upstream_node_record.node_id);
1891 				}
1892 				PQfinish(upstream_conn);
1893 			}
1894 		}
1895 	}
1896 
1897 	/*
1898 	 * populate node record structure with current values set in repmgr.conf
1899 	 * and/or the command line (this will overwrite  any existing values, which
1900 	 * is what we want when updating the record)
1901 	 */
1902 	init_node_record(&node_record);
1903 	node_record.type = STANDBY;
1904 
1905 	/* if --upstream-node-id not provided, set to primary node id */
1906 	if (node_record.upstream_node_id == UNKNOWN_NODE_ID)
1907 	{
1908 		node_record.upstream_node_id = primary_node_id;
1909 	}
1910 
1911 	/*
1912 	 * If --upstream-node-id not provided, we're defaulting to the primary as
1913 	 * upstream node. If local node is available, double-check that it's attached
1914 	 * to the primary, in case --upstream-node-id was an accidental ommission.
1915 	 *
1916 	 * Currently we'll only do this for newly registered nodes.
1917 	 */
1918 	if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE && PQstatus(conn) == CONNECTION_OK)
1919 	{
1920 		/* only do this if record does not exist */
1921 		if (record_status != RECORD_FOUND)
1922 		{
1923 			log_warning(_("--upstream-node-id not supplied, assuming upstream node is primary (node ID %i)"),
1924 						primary_node_id);
1925 
1926 			/* check our standby is connected */
1927 			if (is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL) == NODE_ATTACHED)
1928 			{
1929 				log_verbose(LOG_INFO, _("local node is attached to primary"));
1930 			}
1931 			else if (runtime_options.force == false)
1932 			{
1933 				log_error(_("local node not attached to primary node %i"), primary_node_id);
1934 				/* TODO: 9.6 and later, display detail from pg_stat_wal_receiver */
1935 				log_hint(_("specify the actual upstream node id with --upstream-node-id, or use -F/--force to continue anyway"));
1936 
1937 				if (runtime_options.dry_run == true)
1938 				{
1939 					dry_run_ok = false;
1940 				}
1941 				else
1942 				{
1943 					PQfinish(primary_conn);
1944 					PQfinish(conn);
1945 					exit(ERR_BAD_CONFIG);
1946 				}
1947 			}
1948 			else
1949 			{
1950 				log_warning(_("local node not attached to primary node %i"), primary_node_id);
1951 				log_notice(_("-F/--force supplied, continuing anyway"));
1952 			}
1953 		}
1954 
1955 	}
1956 
1957 	if (runtime_options.dry_run == true)
1958 	{
1959 		PQfinish(primary_conn);
1960 		if (PQstatus(conn) == CONNECTION_OK)
1961 			PQfinish(conn);
1962 
1963 		if (dry_run_ok == false)
1964 		{
1965 			log_warning(_("issue(s) encountered; see preceding log messages"));
1966 			exit(ERR_BAD_CONFIG);
1967 		}
1968 
1969 		log_info(_("all prerequisites for \"standby register\" are met"));
1970 
1971 		exit(SUCCESS);
1972 	}
1973 
1974 	/*
1975 	 * node record exists - update it (at this point we have already
1976 	 * established that -F/--force is in use)
1977 	 */
1978 	if (record_status == RECORD_FOUND)
1979 	{
1980 		record_created = update_node_record(primary_conn,
1981 											"standby register",
1982 											&node_record);
1983 	}
1984 	else
1985 	{
1986 		record_created = create_node_record(primary_conn,
1987 											"standby register",
1988 											&node_record);
1989 	}
1990 
1991 	initPQExpBuffer(&details);
1992 
1993 	if (record_created == false)
1994 	{
1995 		appendPQExpBuffer(&details,
1996 						  _("standby registration failed; provided upstream node ID was %i"),
1997 						  node_record.upstream_node_id);
1998 
1999 		if (runtime_options.force == true)
2000 			appendPQExpBufferStr(&details,
2001 								 _(" (-F/--force option was used)"));
2002 
2003 		create_event_notification_extended(
2004 			primary_conn,
2005 			&config_file_options,
2006 			config_file_options.node_id,
2007 			"standby_register",
2008 			false,
2009 			details.data,
2010 			&event_info);
2011 
2012 		termPQExpBuffer(&details);
2013 		PQfinish(primary_conn);
2014 		primary_conn = NULL;
2015 
2016 		if (PQstatus(conn) == CONNECTION_OK)
2017 			PQfinish(conn);
2018 		exit(ERR_BAD_CONFIG);
2019 	}
2020 
2021 	appendPQExpBuffer(&details,
2022 					  _("standby registration succeeded; upstream node ID is %i"),
2023 					  node_record.upstream_node_id);
2024 
2025 	if (runtime_options.force == true)
2026 		appendPQExpBufferStr(&details,
2027 							 _(" (-F/--force option was used)"));
2028 
2029 
2030 	/* Log the event */
2031 	create_event_notification_extended(
2032 		primary_conn,
2033 		&config_file_options,
2034 		config_file_options.node_id,
2035 		"standby_register",
2036 		true,
2037 		details.data,
2038 		&event_info);
2039 
2040 	termPQExpBuffer(&details);
2041 
2042 	/*
2043 	 * If --wait-sync option set, wait for the records to synchronise
2044 	 * (unless 0 seconds provided, which disables it, which is the same as
2045 	 *  not providing the option). The default value is -1, which means
2046 	 * no timeout.
2047 	 */
2048 
2049 	if (PQstatus(conn) == CONNECTION_OK &&
2050 		runtime_options.wait_register_sync == true &&
2051 		runtime_options.wait_register_sync_seconds != 0)
2052 	{
2053 		bool		sync_ok = false;
2054 		int			timer = 0;
2055 		RecordStatus node_record_status = RECORD_NOT_FOUND;
2056 		t_node_info node_record_on_primary = T_NODE_INFO_INITIALIZER;
2057 		t_node_info node_record_on_standby = T_NODE_INFO_INITIALIZER;
2058 
2059 		node_record_status = get_node_record(primary_conn,
2060 											 config_file_options.node_id,
2061 											 &node_record_on_primary);
2062 
2063 		if (node_record_status != RECORD_FOUND)
2064 		{
2065 			log_error(_("unable to retrieve node record from primary"));
2066 			PQfinish(primary_conn);
2067 			PQfinish(conn);
2068 			exit(ERR_REGISTRATION_SYNC);
2069 		}
2070 
2071 		for (;;)
2072 		{
2073 			bool		records_match = true;
2074 
2075 			/*
2076 			 * If timeout set to a positive value, check if we've reached it and
2077 			 * exit the loop
2078 			 */
2079 			if (runtime_options.wait_register_sync_seconds > 0 && runtime_options.wait_register_sync_seconds == timer)
2080 				break;
2081 
2082 			node_record_status = get_node_record(conn,
2083 												 config_file_options.node_id,
2084 												 &node_record_on_standby);
2085 
2086 			if (node_record_status == RECORD_NOT_FOUND)
2087 			{
2088 				/* no record available yet on standby */
2089 				records_match = false;
2090 			}
2091 			else if (node_record_status == RECORD_FOUND)
2092 			{
2093 				/* compare relevant fields */
2094 				if (node_record_on_standby.upstream_node_id != node_record_on_primary.upstream_node_id)
2095 					records_match = false;
2096 
2097 				if (node_record_on_standby.type != node_record_on_primary.type)
2098 					records_match = false;
2099 
2100 				if (node_record_on_standby.priority != node_record_on_primary.priority)
2101 					records_match = false;
2102 
2103 				if (strcmp(node_record_on_standby.location, node_record_on_primary.location) != 0)
2104 					records_match = false;
2105 
2106 				if (node_record_on_standby.active != node_record_on_primary.active)
2107 					records_match = false;
2108 
2109 				if (strcmp(node_record_on_standby.node_name, node_record_on_primary.node_name) != 0)
2110 					records_match = false;
2111 
2112 				if (strcmp(node_record_on_standby.conninfo, node_record_on_primary.conninfo) != 0)
2113 					records_match = false;
2114 
2115 				if (strcmp(node_record_on_standby.slot_name, node_record_on_primary.slot_name) != 0)
2116 					records_match = false;
2117 
2118 				if (records_match == true)
2119 				{
2120 					sync_ok = true;
2121 					break;
2122 				}
2123 			}
2124 
2125 			sleep(1);
2126 			timer++;
2127 		}
2128 
2129 		/* Log the event */
2130 		initPQExpBuffer(&details);
2131 
2132 		if (sync_ok == false)
2133 		{
2134 			appendPQExpBuffer(&details,
2135 							  _("node record was not synchronised after %i seconds"),
2136 							  runtime_options.wait_register_sync_seconds);
2137 		}
2138 		else
2139 		{
2140 			appendPQExpBuffer(&details,
2141 							  _("node record synchronised after %i seconds"),
2142 							  timer);
2143 		}
2144 
2145 		create_event_notification_extended(
2146 			primary_conn,
2147 			&config_file_options,
2148 			config_file_options.node_id,
2149 			"standby_register_sync",
2150 			sync_ok,
2151 			details.data,
2152 			&event_info);
2153 
2154 		if (sync_ok == false)
2155 		{
2156 			log_error("%s", details.data);
2157 			termPQExpBuffer(&details);
2158 			PQfinish(primary_conn);
2159 			PQfinish(conn);
2160 			exit(ERR_REGISTRATION_SYNC);
2161 		}
2162 
2163 		log_info(_("node record on standby synchronised from primary"));
2164 		log_detail("%s", details.data);
2165 		termPQExpBuffer(&details);
2166 	}
2167 
2168 
2169 	PQfinish(primary_conn);
2170 
2171 	if (PQstatus(conn) == CONNECTION_OK)
2172 		PQfinish(conn);
2173 
2174 	log_info(_("standby registration complete"));
2175 	log_notice(_("standby node \"%s\" (ID: %i) successfully registered"),
2176 			   config_file_options.node_name, config_file_options.node_id);
2177 	return;
2178 }
2179 
2180 
2181 /*
2182  * do_standby_unregister()
2183  *
2184  * Event(s):
2185  *  - standby_unregister
2186  */
2187 void
do_standby_unregister(void)2188 do_standby_unregister(void)
2189 {
2190 	PGconn	   *conn = NULL;
2191 	PGconn	   *primary_conn = NULL;
2192 
2193 	int			target_node_id = UNKNOWN_NODE_ID;
2194 	t_node_info node_info = T_NODE_INFO_INITIALIZER;
2195 
2196 	bool		node_record_deleted = false;
2197 
2198 	log_info(_("connecting to local standby"));
2199 	conn = establish_db_connection(config_file_options.conninfo, true);
2200 
2201 	/* check if there is a primary in this cluster */
2202 	log_info(_("connecting to primary database"));
2203 
2204 	primary_conn = get_primary_connection(conn, NULL, NULL);
2205 
2206 	if (PQstatus(primary_conn) != CONNECTION_OK)
2207 	{
2208 		log_error(_("unable to connect to primary server"));
2209 		log_detail("\n%s", PQerrorMessage(conn));
2210 		exit(ERR_BAD_CONFIG);
2211 	}
2212 
2213 	/*
2214 	 * if --node-id was specified, unregister that node rather than the
2215 	 * current one - this enables inactive nodes to be unregistered.
2216 	 */
2217 	if (runtime_options.node_id != UNKNOWN_NODE_ID)
2218 		target_node_id = runtime_options.node_id;
2219 	else
2220 		target_node_id = config_file_options.node_id;
2221 
2222 	/* Check node exists and is really a standby */
2223 
2224 	if (get_node_record(primary_conn, target_node_id, &node_info) != RECORD_FOUND)
2225 	{
2226 		log_error(_("no record found for node %i"), target_node_id);
2227 		PQfinish(primary_conn);
2228 		PQfinish(conn);
2229 		exit(ERR_BAD_CONFIG);
2230 	}
2231 
2232 	if (node_info.type != STANDBY)
2233 	{
2234 		log_error(_("node %i is not a standby server"), target_node_id);
2235 		PQfinish(primary_conn);
2236 		PQfinish(conn);
2237 		exit(ERR_BAD_CONFIG);
2238 	}
2239 
2240 	/* Now unregister the standby */
2241 	log_notice(_("unregistering node %i"), target_node_id);
2242 	node_record_deleted = delete_node_record(primary_conn,
2243 											 target_node_id);
2244 
2245 	if (node_record_deleted == false)
2246 	{
2247 		PQfinish(primary_conn);
2248 		PQfinish(conn);
2249 		exit(ERR_BAD_CONFIG);
2250 	}
2251 
2252 	/* Log the event */
2253 	create_event_notification(primary_conn,
2254 							  &config_file_options,
2255 							  target_node_id,
2256 							  "standby_unregister",
2257 							  true,
2258 							  NULL);
2259 
2260 	PQfinish(primary_conn);
2261 	PQfinish(conn);
2262 
2263 	log_info(_("standby unregistration complete"));
2264 
2265 	return;
2266 }
2267 
2268 
2269 /*
2270  * do_standby_promote()
2271  *
2272  * Event(s):
2273  *  - standby_promote
2274  */
2275 void
do_standby_promote(void)2276 do_standby_promote(void)
2277 {
2278 	PGconn	   *local_conn = NULL;
2279 	PGconn	   *current_primary_conn = NULL;
2280 
2281 	RecoveryType recovery_type = RECTYPE_UNKNOWN;
2282 
2283 	int			existing_primary_id = UNKNOWN_NODE_ID;
2284 
2285 	RecordStatus record_status = RECORD_NOT_FOUND;
2286 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2287 
2288 	NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
2289 	SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
2290 	int			available_wal_senders = 0;
2291 	bool		dry_run_success = true;
2292 
2293 	local_conn = establish_db_connection(config_file_options.conninfo, true);
2294 
2295 	log_verbose(LOG_INFO, _("connected to standby, checking its state"));
2296 
2297 	/* Verify that standby is a supported server version */
2298 	(void) check_server_version(local_conn, "standby", true, NULL);
2299 
2300 	/* Check we are in a standby node */
2301 	recovery_type = get_recovery_type(local_conn);
2302 
2303 	if (recovery_type != RECTYPE_STANDBY)
2304 	{
2305 		if (recovery_type == RECTYPE_PRIMARY)
2306 		{
2307 			log_error(_("STANDBY PROMOTE can only be executed on a standby node"));
2308 			PQfinish(local_conn);
2309 			exit(ERR_PROMOTION_FAIL);
2310 		}
2311 		else
2312 		{
2313 			log_error(_("unable to determine node's recovery state"));
2314 			PQfinish(local_conn);
2315 			exit(ERR_DB_CONN);
2316 		}
2317 	}
2318 	else if (runtime_options.dry_run == true)
2319 	{
2320 		log_info(_("node is a standby"));
2321 	}
2322 
2323 	record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
2324 	if (record_status != RECORD_FOUND)
2325 	{
2326 		log_error(_("unable to retrieve node record for node %i"),
2327 				  config_file_options.node_id);
2328 
2329 		PQfinish(local_conn);
2330 
2331 		exit(ERR_DB_QUERY);
2332 	}
2333 
2334 	/*
2335 	 * In PostgreSQL 12 and earlier, executing "pg_ctl ... promote" when WAL
2336 	 * replay is paused and WAL is pending replay will mean the standby will
2337 	 * not promote until replay is resumed.
2338 	 *
2339 	 * As that could happen at any time outside repmgr's control, we
2340 	 * need to avoid leaving a "ticking timebomb" which might cause
2341 	 * an unexpected status change in the replication cluster.
2342 	 */
2343 	if (PQserverVersion(local_conn) < 130000)
2344 	{
2345 		ReplInfo 	replication_info;
2346 		bool 	 	replay_paused = false;
2347 
2348 		init_replication_info(&replication_info);
2349 
2350 		if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
2351 		{
2352 			log_error(_("unable to retrieve replication information from local node"));
2353 			PQfinish(local_conn);
2354 			exit(ERR_PROMOTION_FAIL);
2355 		}
2356 
2357 		/*
2358 		 * If the local node is recovering from archive, we can't tell
2359 		 * whether there's still WAL which needs to be replayed, so
2360 		 * we'll abort if WAL replay is paused.
2361 		 */
2362 		if (replication_info.receiving_streamed_wal == false)
2363 		{
2364 			/* just a simple check for paused WAL replay */
2365 			replay_paused = is_wal_replay_paused(local_conn, false);
2366 			if (replay_paused == true)
2367 			{
2368 				log_error(_("WAL replay is paused on this node"));
2369 				log_detail(_("node is in archive recovery and is not safe to promote in this state"));
2370 				log_detail(_("replay paused at %X/%X"),
2371 						   format_lsn(replication_info.last_wal_replay_lsn));
2372 			}
2373 		}
2374 		else
2375 		{
2376 			/* check that replay is pause *and* WAL is pending replay */
2377 			replay_paused = is_wal_replay_paused(local_conn, true);
2378 			if (replay_paused == true)
2379 			{
2380 				log_error(_("WAL replay is paused on this node but not all WAL has been replayed"));
2381 				log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
2382 						   format_lsn(replication_info.last_wal_replay_lsn),
2383 						   format_lsn(replication_info.last_wal_receive_lsn));
2384 			}
2385 		}
2386 
2387 		if (replay_paused == true)
2388 		{
2389 			if (PQserverVersion(local_conn) >= 100000)
2390 				log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
2391 			else
2392 				log_hint(_("execute \"pg_xlog_replay_resume()\" to npause WAL replay"));
2393 
2394 			PQfinish(local_conn);
2395 			exit(ERR_PROMOTION_FAIL);
2396 		}
2397 	}
2398 
2399 	/* check that there's no existing primary */
2400 	current_primary_conn = get_primary_connection_quiet(local_conn, &existing_primary_id, NULL);
2401 
2402 	if (PQstatus(current_primary_conn) == CONNECTION_OK)
2403 	{
2404 		log_error(_("this replication cluster already has an active primary server"));
2405 
2406 		if (existing_primary_id != UNKNOWN_NODE_ID)
2407 		{
2408 			t_node_info primary_rec;
2409 
2410 			get_node_record(local_conn, existing_primary_id, &primary_rec);
2411 
2412 			log_detail(_("current primary is \"%s\" (ID: %i)"),
2413 					   primary_rec.node_name,
2414 					   existing_primary_id);
2415 		}
2416 
2417 		PQfinish(current_primary_conn);
2418 		PQfinish(local_conn);
2419 		exit(ERR_PROMOTION_FAIL);
2420 	}
2421 	else if (runtime_options.dry_run == true)
2422 	{
2423 		log_info(_("no active primary server found in this replication cluster"));
2424 	}
2425 
2426 	PQfinish(current_primary_conn);
2427 
2428 	/*
2429 	 * populate local node record with current state of various replication-related
2430 	 * values, so we can check for sufficient walsenders and replication slots
2431 	 */
2432 	get_node_replication_stats(local_conn, &local_node_record);
2433 
2434 	available_wal_senders = local_node_record.max_wal_senders -
2435 		local_node_record.attached_wal_receivers;
2436 
2437 
2438 	/*
2439 	 * Get list of sibling nodes; if --siblings-follow specified,
2440 	 * check they're reachable; if not, the list will be used to warn
2441 	 * about nodes which will not follow the new primary
2442 	 */
2443 	get_active_sibling_node_records(local_conn,
2444 									local_node_record.node_id,
2445 									local_node_record.upstream_node_id,
2446 									&sibling_nodes);
2447 
2448 	if (check_sibling_nodes(&sibling_nodes, &sibling_nodes_stats) == false)
2449 	{
2450 		PQfinish(local_conn);
2451 		exit(ERR_BAD_CONFIG);
2452 	}
2453 
2454 	/*
2455 	 * check there are sufficient free walsenders - obviously there's potential
2456 	 * for a later race condition if some walsenders come into use before the
2457 	 * promote operation gets around to attaching the sibling nodes, but
2458 	 * this should catch any actual existing configuration issue (and if anyone's
2459 	 * performing a promote in such an unstable environment, they only have
2460 	 * themselves to blame).
2461 	 */
2462 	if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
2463 	{
2464 		if (runtime_options.dry_run == false || runtime_options.force == false)
2465 		{
2466 			PQfinish(local_conn);
2467 			exit(ERR_BAD_CONFIG);
2468 		}
2469 	}
2470 
2471 
2472 	/*
2473 	 * if replication slots are required by siblings,
2474 	 * check the promotion candidate has sufficient free slots
2475 	 */
2476 	if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
2477 	{
2478 		if (runtime_options.dry_run == false || runtime_options.force == false)
2479 		{
2480 			PQfinish(local_conn);
2481 			exit(ERR_BAD_CONFIG);
2482 		}
2483 	}
2484 
2485 	/*
2486 	 * In --dry-run mode, note which promotion method will be used.
2487 	 * For Pg12 and later, check whether pg_promote() can be executed.
2488 	 */
2489 	if (runtime_options.dry_run == true)
2490 	{
2491 		if (config_file_options.service_promote_command[0] != '\0')
2492 		{
2493 			log_info(_("node will be promoted using command defined in \"service_promote_command\""));
2494 			log_detail(_("\"service_promote_command\" is \"%s\""),
2495 					   config_file_options.service_promote_command);
2496 		}
2497 		else if (PQserverVersion(local_conn) >= 120000)
2498 		{
2499 			if (can_execute_pg_promote(local_conn) == false)
2500 			{
2501 				log_info(_("node will be promoted using \"pg_ctl promote\""));
2502 				log_detail(_("user \"%s\" does not have permission to execute \"pg_promote()\""),
2503 						   PQuser(local_conn));
2504 			}
2505 			else
2506 			{
2507 				log_info(_("node will be promoted using the \"pg_promote()\" function"));
2508 			}
2509 		}
2510 		else
2511 		{
2512 			log_info(_("node will be promoted using \"pg_ctl promote\""));
2513 		}
2514 	}
2515 
2516 	if (runtime_options.dry_run == true)
2517 	{
2518 		PQfinish(local_conn);
2519 
2520 		if (dry_run_success == false)
2521 		{
2522 			log_error(_("prerequisites for executing STANDBY PROMOTE are *not* met"));
2523 			log_hint(_("see preceding error messages"));
2524 			exit(ERR_BAD_CONFIG);
2525 		}
2526 		log_info(_("prerequisites for executing STANDBY PROMOTE are met"));
2527 		exit(SUCCESS);
2528 	}
2529 
2530 	_do_standby_promote_internal(local_conn);
2531 
2532 	/*
2533 	 * If --siblings-follow specified, attempt to make them follow the new
2534 	 * primary
2535 	 */
2536 	if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
2537 	{
2538 		sibling_nodes_follow(&local_node_record, &sibling_nodes, &sibling_nodes_stats);
2539 	}
2540 
2541 	clear_node_info_list(&sibling_nodes);
2542 
2543 	return;
2544 }
2545 
2546 
2547 static void
_do_standby_promote_internal(PGconn * conn)2548 _do_standby_promote_internal(PGconn *conn)
2549 {
2550 	int			i;
2551 	bool		promote_success = false;
2552 	PQExpBufferData details;
2553 
2554 	RecoveryType recovery_type = RECTYPE_UNKNOWN;
2555 
2556 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2557 	RecordStatus record_status = RECORD_NOT_FOUND;
2558 	char		data_dir[MAXPGPATH];
2559 
2560 	get_node_config_directory(data_dir);
2561 
2562 	/* fetch local node record so we can add detail in log messages */
2563 	record_status = get_node_record(conn,
2564 									config_file_options.node_id,
2565 									&local_node_record);
2566 
2567 	if (record_status != RECORD_FOUND)
2568 	{
2569 		log_error(_("unable to retrieve record for node %i"),
2570 				  config_file_options.node_id);
2571 		PQfinish(conn);
2572 		exit(ERR_BAD_CONFIG);
2573 	}
2574 
2575 
2576 	/*
2577 	 * Promote standby to primary.
2578 	 *
2579 	 * "pg_ctl promote: returns immediately and (prior to 10.0) has no -w
2580 	 * option so we can't be sure when or if the promotion completes. For now
2581 	 * we'll poll the server until the default timeout (60 seconds)
2582 	 *
2583 	 * For PostgreSQL 12+, use the pg_promote() function, unless one of
2584 	 * "service_promote_command" or "use_pg_ctl_promote" is set.
2585 	 */
2586 	{
2587 		bool use_pg_promote = false;
2588 
2589 
2590 		if (PQserverVersion(conn) >= 120000)
2591 		{
2592 			use_pg_promote = true;
2593 
2594 			if (config_file_options.service_promote_command[0] != '\0')
2595 			{
2596 				use_pg_promote = false;
2597 			}
2598 			else if (can_execute_pg_promote(conn) == false)
2599 			{
2600 				use_pg_promote = false;
2601 				log_info(_("user \"%s\" does not have permission to execute \"pg_promote()\", falling back to \"pg_ctl promote\""),
2602 						 PQuser(conn));
2603 			}
2604 		}
2605 
2606 		log_notice(_("promoting standby to primary"));
2607 
2608 		if (use_pg_promote == true)
2609 		{
2610 			log_detail(_("promoting server \"%s\" (ID: %i) using pg_promote()"),
2611 					   local_node_record.node_name,
2612 					   local_node_record.node_id);
2613 
2614 			/*
2615 			 * We'll check for promotion success ourselves, but will abort
2616 			 * if some unrecoverable error prevented the function from being
2617 			 * executed.
2618 			 */
2619 			if (!promote_standby(conn, false, 0))
2620 			{
2621 				log_error(_("unable to promote server from standby to primary"));
2622 				exit(ERR_PROMOTION_FAIL);
2623 			}
2624 		}
2625 		else
2626 		{
2627 			char		script[MAXLEN];
2628 			int			r;
2629 
2630 			get_server_action(ACTION_PROMOTE, script, (char *) data_dir);
2631 
2632 			log_detail(_("promoting server \"%s\" (ID: %i) using \"%s\""),
2633 					   local_node_record.node_name,
2634 					   local_node_record.node_id,
2635 					   script);
2636 
2637 			r = system(script);
2638 			if (r != 0)
2639 			{
2640 				log_error(_("unable to promote server from standby to primary"));
2641 				exit(ERR_PROMOTION_FAIL);
2642 			}
2643 		}
2644 	}
2645 
2646 	log_notice(_("waiting up to %i seconds (parameter \"promote_check_timeout\") for promotion to complete"),
2647 			   config_file_options.promote_check_timeout);
2648 
2649 	for (i = 0; i < config_file_options.promote_check_timeout; i += config_file_options.promote_check_interval)
2650 	{
2651 		recovery_type = get_recovery_type(conn);
2652 
2653 		if (recovery_type == RECTYPE_PRIMARY)
2654 		{
2655 			promote_success = true;
2656 			break;
2657 		}
2658 		sleep(config_file_options.promote_check_interval);
2659 	}
2660 
2661 	if (promote_success == false)
2662 	{
2663 		if (recovery_type == RECTYPE_STANDBY)
2664 		{
2665 			log_error(_("STANDBY PROMOTE failed, node is still a standby"));
2666 			log_detail(_("node still in recovery after %i seconds"), config_file_options.promote_check_timeout);
2667 			log_hint(_("the node may need more time to promote itself, check the PostgreSQL log for details"));
2668 			PQfinish(conn);
2669 			exit(ERR_PROMOTION_FAIL);
2670 		}
2671 		else
2672 		{
2673 			log_error(_("connection to node lost"));
2674 			PQfinish(conn);
2675 			exit(ERR_DB_CONN);
2676 		}
2677 	}
2678 
2679 	log_verbose(LOG_INFO, _("standby promoted to primary after %i second(s)"), i);
2680 
2681 	/* update node information to reflect new status */
2682 	if (update_node_record_set_primary(conn, config_file_options.node_id) == false)
2683 	{
2684 		initPQExpBuffer(&details);
2685 		appendPQExpBuffer(&details,
2686 						  _("unable to update node record for node %i"),
2687 						  config_file_options.node_id);
2688 
2689 		log_error("%s", details.data);
2690 
2691 		create_event_notification(NULL,
2692 								  &config_file_options,
2693 								  config_file_options.node_id,
2694 								  "standby_promote",
2695 								  false,
2696 								  details.data);
2697 
2698 		exit(ERR_DB_QUERY);
2699 	}
2700 
2701 
2702 	initPQExpBuffer(&details);
2703 	appendPQExpBuffer(&details,
2704 					  _("server \"%s\" (ID: %i) was successfully promoted to primary"),
2705 					  local_node_record.node_name,
2706 					  local_node_record.node_id);
2707 
2708 	log_notice(_("STANDBY PROMOTE successful"));
2709 	log_detail("%s", details.data);
2710 
2711 	/* Log the event */
2712 	create_event_notification(conn,
2713 							  &config_file_options,
2714 							  config_file_options.node_id,
2715 							  "standby_promote",
2716 							  true,
2717 							  details.data);
2718 
2719 	termPQExpBuffer(&details);
2720 
2721 	return;
2722 }
2723 
2724 
2725 /*
2726  * Follow a new primary.
2727  *
2728  * Node must be running. To start an inactive node and point it at a
2729  * new primary, use "repmgr node rejoin".
2730  *
2731  * TODO: enable provision of new primary's conninfo parameters, which
2732  * will be necessary if the primary's information has changed, but
2733  * was not replicated to the current standby.
2734  */
2735 
2736 void
do_standby_follow(void)2737 do_standby_follow(void)
2738 {
2739 	PGconn	   *local_conn = NULL;
2740 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
2741 
2742 	PGconn	   *primary_conn = NULL;
2743 	int			primary_node_id = UNKNOWN_NODE_ID;
2744 
2745 	PGconn	   *follow_target_conn = NULL;
2746 	int			follow_target_node_id = UNKNOWN_NODE_ID;
2747 	t_node_info follow_target_node_record = T_NODE_INFO_INITIALIZER;
2748 	bool		follow_target_is_primary = true;
2749 
2750 	RecordStatus record_status = RECORD_NOT_FOUND;
2751 	/* so we can pass info about the primary to event notification scripts */
2752 	t_event_info event_info = T_EVENT_INFO_INITIALIZER;
2753 
2754 	int			timer = 0;
2755 
2756 	PQExpBufferData follow_output;
2757 	bool		success = false;
2758 	int			follow_error_code = SUCCESS;
2759 
2760 	log_verbose(LOG_DEBUG, "do_standby_follow()");
2761 
2762 	local_conn = establish_db_connection(config_file_options.conninfo, false);
2763 
2764 	if (PQstatus(local_conn) != CONNECTION_OK)
2765 	{
2766 		log_hint(_("use \"repmgr node rejoin\" to re-add an inactive node to the replication cluster"));
2767 		exit(ERR_DB_CONN);
2768 	}
2769 
2770 	log_verbose(LOG_INFO, _("connected to local node"));
2771 
2772 	/* check this is a standby */
2773 	check_recovery_type(local_conn);
2774 
2775 	/* attempt to retrieve local node record */
2776 	record_status = get_node_record(local_conn,
2777 									config_file_options.node_id,
2778 									&local_node_record);
2779 
2780 	if (record_status != RECORD_FOUND)
2781 	{
2782 		log_error(_("unable to retrieve record for local node %i"),
2783 				  config_file_options.node_id);
2784 		PQfinish(local_conn);
2785 		exit(ERR_BAD_CONFIG);
2786 	}
2787 
2788 	/*
2789 	 * --upstream-node-id provided - attempt to follow that node
2790 	 */
2791 	if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
2792 	{
2793 		/* we can't follow ourselves */
2794 		if (runtime_options.upstream_node_id == config_file_options.node_id)
2795 		{
2796 			log_error(_("provided --upstream-node-id %i is the current node"),
2797 					  runtime_options.upstream_node_id);
2798 			PQfinish(local_conn);
2799 			exit(ERR_FOLLOW_FAIL);
2800 		}
2801 
2802 		follow_target_node_id = runtime_options.upstream_node_id;
2803 		record_status = get_node_record(local_conn,
2804 										follow_target_node_id,
2805 										&follow_target_node_record);
2806 
2807 		/* but we must follow a node which exists (=registered) */
2808 		if (record_status != RECORD_FOUND)
2809 		{
2810 			log_error(_("unable to find record for intended upstream node %i"),
2811 					  runtime_options.upstream_node_id);
2812 			PQfinish(local_conn);
2813 			exit(ERR_FOLLOW_FAIL);
2814 		}
2815 	}
2816 	/*
2817 	 * otherwise determine the current primary and attempt to follow that
2818 	 */
2819 	else
2820 	{
2821 		log_notice(_("attempting to find and follow current primary"));
2822 	}
2823 
2824 	/*
2825 	 * Attempt to connect to follow target - if this was provided with --upstream-node-id,
2826 	 * we'll connect to that, otherwise we'll attempt to find the current primary.
2827 	 *
2828 	 * If --wait provided, loop for up `primary_follow_timeout` seconds
2829 	 * before giving up
2830 	 *
2831 	 * XXX add `upstream_follow_timeout` ?
2832 	 */
2833 
2834 	for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
2835 	{
2836 		/* --upstream-node-id provided - connect to specified node*/
2837 		if (follow_target_node_id != UNKNOWN_NODE_ID)
2838 		{
2839 			follow_target_conn = establish_db_connection_quiet(follow_target_node_record.conninfo);
2840 		}
2841 		/* attempt to find current primary node */
2842 		else
2843 		{
2844 			follow_target_conn = get_primary_connection_quiet(local_conn,
2845 															  &follow_target_node_id,
2846 															  NULL);
2847 		}
2848 
2849 		if (PQstatus(follow_target_conn) == CONNECTION_OK || runtime_options.wait_provided == false)
2850 		{
2851 			break;
2852 		}
2853 		sleep(1);
2854 	}
2855 
2856 	/* unable to connect to the follow target */
2857 	if (PQstatus(follow_target_conn) != CONNECTION_OK)
2858 	{
2859 		if (follow_target_node_id == UNKNOWN_NODE_ID)
2860 		{
2861 			log_error(_("unable to find a primary node"));
2862 		}
2863 		else
2864 		{
2865 			log_error(_("unable to connect to target node %i"), follow_target_node_id);
2866 		}
2867 
2868 		if (runtime_options.wait_provided == true)
2869 		{
2870 			if (follow_target_node_id == UNKNOWN_NODE_ID)
2871 			{
2872 				log_detail(_("no primary appeared after %i seconds"),
2873 						   config_file_options.primary_follow_timeout);
2874 			}
2875 			else
2876 			{
2877 				log_detail(_("unable to connect to target node %i after %i seconds"),
2878 						   follow_target_node_id,
2879 						   config_file_options.primary_follow_timeout);
2880 			}
2881 
2882 			log_hint(_("alter \"primary_follow_timeout\" in \"repmgr.conf\" to change this value"));
2883 		}
2884 
2885 		PQfinish(local_conn);
2886 		exit(ERR_FOLLOW_FAIL);
2887 	}
2888 
2889 	/* --upstream-node-id not provided - retrieve record for node determined as primary  */
2890 	if (runtime_options.upstream_node_id == UNKNOWN_NODE_ID)
2891 	{
2892 		if (runtime_options.dry_run == true)
2893 		{
2894 			log_info(_("connected to node %i, checking for current primary"), follow_target_node_id);
2895 		}
2896 		else
2897 		{
2898 			log_verbose(LOG_INFO, _("connected to node %i, checking for current primary"), follow_target_node_id);
2899 		}
2900 
2901 		record_status = get_node_record(follow_target_conn,
2902 										follow_target_node_id,
2903 										&follow_target_node_record);
2904 
2905 		if (record_status != RECORD_FOUND)
2906 		{
2907 			log_error(_("unable to find record for follow target node %i"),
2908 					  follow_target_node_id);
2909 			PQfinish(follow_target_conn);
2910 			exit(ERR_FOLLOW_FAIL);
2911 		}
2912 	}
2913 
2914 	/*
2915 	 * Populate "event_info" with info about the node to follow for event notifications
2916 	 *
2917 	 * XXX need to differentiate between primary and non-primary?
2918 	 */
2919 	event_info.node_id = follow_target_node_id;
2920 	event_info.node_name = follow_target_node_record.node_name;
2921 	event_info.conninfo_str = follow_target_node_record.conninfo;
2922 
2923 	/*
2924 	 * Check whether follow target is in recovery, so we know later whether
2925 	 * we'll need to open a connection to the primary to update the metadata.
2926 	 * Also emit an informative message.
2927 	 */
2928 	{
2929 		PQExpBufferData node_info_msg;
2930 		RecoveryType recovery_type = RECTYPE_UNKNOWN;
2931 		initPQExpBuffer(&node_info_msg);
2932 
2933 		recovery_type = get_recovery_type(follow_target_conn);
2934 
2935 		/*
2936 		 * unlikely this will happen, but it's conceivable the follow target will
2937 		 * have vanished since we last talked to it, or something
2938 		 */
2939 		if (recovery_type == RECTYPE_UNKNOWN)
2940 		{
2941 			log_error(_("unable to determine recovery type of follow target"));
2942 			PQfinish(follow_target_conn);
2943 			exit(ERR_FOLLOW_FAIL);
2944 		}
2945 
2946 		if (recovery_type == RECTYPE_PRIMARY)
2947 		{
2948 			follow_target_is_primary = true;
2949 			appendPQExpBuffer(&node_info_msg,
2950 							  _("follow target is primary node \"%s\" (ID: %i)"),
2951 							  follow_target_node_record.node_name,
2952 							  follow_target_node_id);
2953 		}
2954 		else
2955 		{
2956 			follow_target_is_primary = false;
2957 			appendPQExpBuffer(&node_info_msg,
2958 							  _("follow target is standby node \"%s\" (ID: %i)"),
2959 							  follow_target_node_record.node_name,
2960 							  follow_target_node_id);
2961 		}
2962 
2963 		if (runtime_options.dry_run == true)
2964 		{
2965 			log_info("%s", node_info_msg.data);
2966 		}
2967 		else
2968 		{
2969 			log_verbose(LOG_INFO, "%s", node_info_msg.data);
2970 		}
2971 
2972 		termPQExpBuffer(&node_info_msg);
2973 	}
2974 
2975 	/*
2976 	 * if replication slots in use, check at least one free slot is available
2977 	 * on the follow target
2978 	 */
2979 
2980 	if (config_file_options.use_replication_slots)
2981 	{
2982 		bool slots_available = check_replication_slots_available(follow_target_node_id,
2983 																 follow_target_conn);
2984 		if (slots_available == false)
2985 		{
2986 			PQfinish(follow_target_conn);
2987 			PQfinish(local_conn);
2988 			exit(ERR_FOLLOW_FAIL);
2989 		}
2990 	}
2991 
2992 	/* XXX check this is not current upstream anyway */
2993 
2994 	/* check if we can attach to the follow target */
2995 	{
2996 		PGconn	   *local_repl_conn = NULL;
2997 		t_system_identification local_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
2998 
2999 		bool can_follow;
3000 		XLogRecPtr local_xlogpos = get_node_current_lsn(local_conn);
3001 
3002 		/* Check local replication connection - we want to execute IDENTIFY_SYSTEM
3003 		 * to get the current timeline ID, which might not yet be written to
3004 		 * pg_control.
3005 		 *
3006 		 * TODO: from 9.6, query "pg_stat_wal_receiver" via the existing local connection
3007 		 */
3008 
3009 		local_repl_conn = establish_replication_connection_from_conn(local_conn,
3010 																	 local_node_record.repluser);
3011 		if (PQstatus(local_repl_conn) != CONNECTION_OK)
3012 		{
3013 			log_error(_("unable to establish a replication connection to the local node"));
3014 
3015 			PQfinish(local_conn);
3016 			PQfinish(follow_target_conn);
3017 
3018 			exit(ERR_FOLLOW_FAIL);
3019 		}
3020 		else if (runtime_options.dry_run == true)
3021 		{
3022 			log_info(_("replication connection to the local node was successful"));
3023 		}
3024 
3025 		success = identify_system(local_repl_conn, &local_identification);
3026 		PQfinish(local_repl_conn);
3027 
3028 		if (success == false)
3029 		{
3030 			log_error(_("unable to query the local node's system identification"));
3031 
3032 			PQfinish(local_conn);
3033 
3034 			PQfinish(follow_target_conn);
3035 
3036 			exit(ERR_FOLLOW_FAIL);
3037 		}
3038 
3039 		can_follow = check_node_can_attach(local_identification.timeline,
3040 										   local_xlogpos,
3041 										   follow_target_conn,
3042 										   &follow_target_node_record,
3043 										   false);
3044 
3045 		if (can_follow == false)
3046 		{
3047 			PQfinish(local_conn);
3048 			PQfinish(follow_target_conn);
3049 			exit(ERR_FOLLOW_FAIL);
3050 		}
3051 	}
3052 
3053 	PQfinish(local_conn);
3054 
3055 	/*
3056 	 * Here we'll need a connection to the primary, if the upstream is not a primary.
3057 	 */
3058 	if (follow_target_is_primary == false)
3059 	{
3060 		/*
3061 		 * We'll try and establish primary from follow target, in the assumption its node
3062 		 * record is more up-to-date.
3063 		 */
3064 		primary_conn = get_primary_connection_quiet(follow_target_conn,
3065 													&primary_node_id,
3066 													NULL);
3067 
3068 		/*
3069 		 * If follow target is not primary and no other primary could be found,
3070 		 * abort because we won't be able to update the node record.
3071 		 */
3072 		if (PQstatus(primary_conn) != CONNECTION_OK)
3073 		{
3074 			log_error(_("unable to determine the cluster primary"));
3075 			log_detail(_("an active primary node is required for \"repmgr standby follow\""));
3076 			PQfinish(follow_target_conn);
3077 			exit(ERR_FOLLOW_FAIL);
3078 		}
3079 	}
3080 	else
3081 	{
3082 		primary_conn = follow_target_conn;
3083 	}
3084 
3085 	if (runtime_options.dry_run == true)
3086 	{
3087 		log_info(_("prerequisites for executing STANDBY FOLLOW are met"));
3088 		exit(SUCCESS);
3089 	}
3090 
3091 	initPQExpBuffer(&follow_output);
3092 
3093 	success = do_standby_follow_internal(
3094 		primary_conn,
3095 		follow_target_conn,
3096 		&follow_target_node_record,
3097 		&follow_output,
3098 		ERR_FOLLOW_FAIL,
3099 		&follow_error_code);
3100 
3101 	/* unable to restart the standby */
3102 	if (success == false)
3103 	{
3104 		create_event_notification_extended(
3105 			follow_target_conn,
3106 			&config_file_options,
3107 			config_file_options.node_id,
3108 			"standby_follow",
3109 			success,
3110 			follow_output.data,
3111 			&event_info);
3112 
3113 		PQfinish(follow_target_conn);
3114 
3115 		if (follow_target_is_primary == false)
3116 			PQfinish(primary_conn);
3117 
3118 		log_notice(_("STANDBY FOLLOW failed"));
3119 		if (strlen( follow_output.data ))
3120 			log_detail("%s", follow_output.data);
3121 
3122 		termPQExpBuffer(&follow_output);
3123 		exit(follow_error_code);
3124 	}
3125 
3126 	termPQExpBuffer(&follow_output);
3127 
3128 	initPQExpBuffer(&follow_output);
3129 
3130 	/*
3131 	 * Wait up to "standby_follow_timeout" seconds for standby to connect to
3132 	 * upstream.
3133 	 * For 9.6 and later, we could check pg_stat_wal_receiver on the local node.
3134 	 */
3135 
3136 	/* assume success, necessary if standby_follow_timeout is zero */
3137 	success = true;
3138 
3139 	for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
3140 	{
3141 		NodeAttached node_attached = is_downstream_node_attached(follow_target_conn,
3142 																 config_file_options.node_name,
3143 																 NULL);
3144 
3145 		if (node_attached == NODE_ATTACHED)
3146 		{
3147 			success = true;
3148 			break;
3149 		}
3150 
3151 		log_verbose(LOG_DEBUG, "sleeping %i of max %i seconds waiting for standby to attach to primary",
3152 					timer + 1,
3153 					config_file_options.standby_follow_timeout);
3154 		sleep(1);
3155 	}
3156 
3157 	if (success == true)
3158 	{
3159 		log_notice(_("STANDBY FOLLOW successful"));
3160 		appendPQExpBuffer(&follow_output,
3161 						  "standby attached to upstream node \"%s\" (ID: %i)",
3162 						  follow_target_node_record.node_name,
3163 						  follow_target_node_id);
3164 	}
3165 	else
3166 	{
3167 		log_error(_("STANDBY FOLLOW failed"));
3168 		appendPQExpBuffer(&follow_output,
3169 						  "standby did not attach to upstream node \"%s\" (ID: %i) after %i seconds",
3170 						  follow_target_node_record.node_name,
3171 						  follow_target_node_id,
3172 						  config_file_options.standby_follow_timeout);
3173 
3174 	}
3175 
3176 	log_detail("%s", follow_output.data);
3177 
3178 	create_event_notification_extended(
3179 		primary_conn,
3180 		&config_file_options,
3181 		config_file_options.node_id,
3182 		"standby_follow",
3183 		success,
3184 		follow_output.data,
3185 		&event_info);
3186 
3187 	termPQExpBuffer(&follow_output);
3188 
3189 	PQfinish(follow_target_conn);
3190 
3191 	if (follow_target_is_primary == false)
3192 		PQfinish(primary_conn);
3193 
3194 	if (success == false)
3195 		exit(ERR_FOLLOW_FAIL);
3196 
3197 	return;
3198 }
3199 
3200 
3201 /*
3202  * Perform the actual "follow" operation; this is executed by
3203  * "node rejoin" too.
3204  */
3205 bool
do_standby_follow_internal(PGconn * primary_conn,PGconn * follow_target_conn,t_node_info * follow_target_node_record,PQExpBufferData * output,int general_error_code,int * error_code)3206 do_standby_follow_internal(PGconn *primary_conn, PGconn *follow_target_conn, t_node_info *follow_target_node_record, PQExpBufferData *output, int general_error_code, int *error_code)
3207 {
3208 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
3209 	int			original_upstream_node_id = UNKNOWN_NODE_ID;
3210 	t_node_info original_upstream_node_record = T_NODE_INFO_INITIALIZER;
3211 
3212 	RecordStatus record_status = RECORD_NOT_FOUND;
3213 	char	   *errmsg = NULL;
3214 
3215 	bool		remove_old_replication_slot = false;
3216 
3217 	/*
3218 	 * Fetch our node record so we can write application_name, if set, and to
3219 	 * get the current upstream node ID, which we'll need to know if replication
3220 	 * slots are in use and we want to delete this node's slot on the current
3221 	 * upstream.
3222 	 */
3223 	record_status = get_node_record(primary_conn,
3224 									config_file_options.node_id,
3225 									&local_node_record);
3226 
3227 	if (record_status != RECORD_FOUND)
3228 	{
3229 		log_error(_("unable to retrieve record for node %i"),
3230 				  config_file_options.node_id);
3231 
3232 		*error_code = ERR_BAD_CONFIG;
3233 		return false;
3234 	}
3235 
3236 	/*
3237 	 * If replication slots are in use, we'll need to create a slot on the
3238 	 * follow target
3239 	 */
3240 
3241 	if (config_file_options.use_replication_slots)
3242 	{
3243 		/*
3244 		 * Here we add a sanity check for the "slot_name" field - it's possible
3245 		 * the node was initially registered with "use_replication_slots=false"
3246 		 * but the configuration was subsequently changed, leaving the field NULL.
3247 		 *
3248 		 * To avoid annoying failures we can just update the node record and proceed.
3249 		 */
3250 
3251 		if (!strlen(local_node_record.slot_name))
3252 		{
3253 			create_slot_name(local_node_record.slot_name, config_file_options.node_id);
3254 
3255 			log_notice(_("setting node %i's slot name to \"%s\""),
3256 					   config_file_options.node_id,
3257 					   local_node_record.slot_name);
3258 
3259 			update_node_record_slot_name(primary_conn, config_file_options.node_id, local_node_record.slot_name);
3260 		}
3261 
3262 
3263 		if (create_replication_slot(follow_target_conn,
3264 									local_node_record.slot_name,
3265 									NULL,
3266 									output) == false)
3267 		{
3268 			log_error("%s", output->data);
3269 
3270 			*error_code = general_error_code;
3271 
3272 			return false;
3273 		}
3274 	}
3275 
3276 	/*
3277 	 * Store the original upstream node id so we can delete the
3278 	 * replication slot, if it exists.
3279 	 */
3280 	if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
3281 	{
3282 		original_upstream_node_id = local_node_record.upstream_node_id;
3283 	}
3284 	else
3285 	{
3286 		original_upstream_node_id = follow_target_node_record->node_id;
3287 	}
3288 
3289 	if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false)
3290 	{
3291 		/*
3292 		 * Only attempt to delete the old replication slot if the old upstream
3293 		 * node is known and is different to the follow target node.
3294 		 */
3295 		if (original_upstream_node_id != UNKNOWN_NODE_ID
3296 		 && original_upstream_node_id != follow_target_node_record->node_id)
3297 		{
3298 			remove_old_replication_slot = true;
3299 		}
3300 	}
3301 
3302 	/* Fetch original upstream's record */
3303 	if (remove_old_replication_slot == true)
3304 	{
3305 		PGconn	    *local_conn = NULL;
3306 		RecordStatus upstream_record_status = RECORD_NOT_FOUND;
3307 
3308 		/* abort if local connection not available */
3309 		local_conn = establish_db_connection(config_file_options.conninfo, true);
3310 
3311 		upstream_record_status = get_node_record(local_conn,
3312 												 original_upstream_node_id,
3313 												 &original_upstream_node_record);
3314 		PQfinish(local_conn);
3315 
3316 		if (upstream_record_status != RECORD_FOUND)
3317 		{
3318 			log_warning(_("unable to retrieve node record for old upstream node %i"),
3319 						original_upstream_node_id);
3320 			log_detail(_("replication slot will need to be removed manually"));
3321 		}
3322 	}
3323 
3324 	/* Initialise connection parameters to write as "primary_conninfo" */
3325 	initialize_conninfo_params(&recovery_conninfo, false);
3326 
3327 	/* We ignore any application_name set in the primary's conninfo */
3328 	parse_conninfo_string(follow_target_node_record->conninfo, &recovery_conninfo, &errmsg, true);
3329 
3330 	/* Set the application name to this node's name */
3331 	param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
3332 
3333 	/* Set the replication user from the follow target node record */
3334 	param_set(&recovery_conninfo, "user", follow_target_node_record->repluser);
3335 
3336 	log_notice(_("setting node %i's upstream to node %i"),
3337 			   config_file_options.node_id, follow_target_node_record->node_id);
3338 
3339 	if (!create_recovery_file(&local_node_record,
3340 							  &recovery_conninfo,
3341 							  PQserverVersion(primary_conn),
3342 							  config_file_options.data_directory,
3343 							  true))
3344 	{
3345 		*error_code = general_error_code;
3346 		return false;
3347 	}
3348 
3349 	/*
3350 	 * start/restart the service
3351 	 */
3352 
3353 	{
3354 		char		server_command[MAXLEN] = "";
3355 		bool		server_up = is_server_available(config_file_options.conninfo);
3356 		char	   *action = NULL;
3357 		bool		success;
3358 
3359 		PQExpBufferData output_buf;
3360 		initPQExpBuffer(&output_buf);
3361 
3362 		if (server_up == true)
3363 		{
3364 
3365 			if (PQserverVersion(primary_conn) >= 130000 && config_file_options.standby_follow_restart == false)
3366 			{
3367 				/* PostgreSQL 13 and later: we'll send SIGHUP via pg_ctl */
3368 				get_server_action(ACTION_RELOAD, server_command, config_file_options.data_directory);
3369 
3370 				success = local_command(server_command, &output_buf);
3371 
3372 				if (success == true)
3373 				{
3374 					goto cleanup;
3375 				}
3376 
3377 				/* In the unlikley event that fails, we'll fall back to a restart */
3378 				log_warning(_("unable to reload server configuration"));
3379 			}
3380 
3381 			if (config_file_options.service_restart_command[0] == '\0')
3382 			{
3383 				/* no "service_restart_command" defined - stop and start using pg_ctl */
3384 
3385 				action = "stopp"; /* sic */
3386 				get_server_action(ACTION_STOP_WAIT, server_command, config_file_options.data_directory);
3387 
3388 				/* if translation needed, generate messages in the preceding if/else */
3389 				log_notice(_("%sing server using \"%s\""),
3390 						   action,
3391 						   server_command);
3392 
3393 				success = local_command(server_command, &output_buf);
3394 
3395 				if (success == false)
3396 				{
3397 					log_error(_("unable to %s server"), action);
3398 
3399 					*error_code = ERR_NO_RESTART;
3400 					return false;
3401 				}
3402 
3403 				action = "start";
3404 				get_server_action(ACTION_START, server_command, config_file_options.data_directory);
3405 
3406 				/* if translation needed, generate messages in the preceding if/else */
3407 				log_notice(_("%sing server using \"%s\""),
3408 						   action,
3409 						   server_command);
3410 
3411 				success = local_command(server_command, &output_buf);
3412 
3413 				if (success == false)
3414 				{
3415 					log_error(_("unable to %s server"), action);
3416 
3417 					*error_code = ERR_NO_RESTART;
3418 					return false;
3419 				}
3420 
3421 			}
3422 			else
3423 			{
3424 				action = "restart";
3425 				get_server_action(ACTION_RESTART, server_command, config_file_options.data_directory);
3426 
3427 				/* if translation needed, generate messages in the preceding if/else */
3428 				log_notice(_("%sing server using \"%s\""),
3429 						   action,
3430 						   server_command);
3431 
3432 				success = local_command(server_command, &output_buf);
3433 
3434 				if (success == false)
3435 				{
3436 					log_error(_("unable to %s server"), action);
3437 
3438 					*error_code = ERR_NO_RESTART;
3439 					return false;
3440 				}
3441 
3442 			}
3443 		}
3444 		else
3445 		{
3446 			action = "start";
3447 			get_server_action(ACTION_START, server_command, config_file_options.data_directory);
3448 
3449 			/* if translation needed, generate messages in the preceding if/else */
3450 			log_notice(_("%sing server using \"%s\""),
3451 					   action,
3452 					   server_command);
3453 
3454 			success = local_command(server_command, &output_buf);
3455 
3456 			if (success == false)
3457 			{
3458 				log_error(_("unable to %s server"), action);
3459 
3460 				*error_code = ERR_NO_RESTART;
3461 				return false;
3462 			}
3463 		}
3464 	}
3465 
3466 cleanup:
3467 	/*
3468 	 * If replication slots are in use, and an inactive one for this node
3469 	 * exists on the former upstream, drop it.
3470 	 *
3471 	 * Note that if this function is called by do_standby_switchover(), the
3472 	 * "repmgr node rejoin" command executed on the demotion candidate may already
3473 	 * have removed the slot, so there may be nothing to do.
3474 	 */
3475 
3476 	if (remove_old_replication_slot == true)
3477 	{
3478 		if (original_upstream_node_record.node_id != UNKNOWN_NODE_ID)
3479 		{
3480 			PGconn	   *old_upstream_conn = establish_db_connection_quiet(original_upstream_node_record.conninfo);
3481 
3482 			if (PQstatus(old_upstream_conn) != CONNECTION_OK)
3483 			{
3484 				log_warning(_("unable to connect to old upstream node %i to remove replication slot"),
3485 							original_upstream_node_id);
3486 				log_hint(_("if reusing this node, you should manually remove any inactive replication slots"));
3487 			}
3488 			else
3489 			{
3490 				drop_replication_slot_if_exists(old_upstream_conn,
3491 												original_upstream_node_id,
3492 												local_node_record.slot_name);
3493 				PQfinish(old_upstream_conn);
3494 			}
3495 		}
3496 	}
3497 
3498 	/*
3499 	 * It's possible this node was an inactive primary - update the relevant
3500 	 * fields to ensure it's marked as an active standby
3501 	 */
3502 	if (update_node_record_status(primary_conn,
3503 								  config_file_options.node_id,
3504 								  "standby",
3505 								  follow_target_node_record->node_id,
3506 								  true) == false)
3507 	{
3508 		appendPQExpBufferStr(output,
3509 							 _("unable to update upstream node"));
3510 		return false;
3511 	}
3512 
3513 	appendPQExpBuffer(output,
3514 					  _("node %i is now attached to node %i"),
3515 					  config_file_options.node_id,
3516 					  follow_target_node_record->node_id);
3517 
3518 	return true;
3519 }
3520 
3521 
3522 /*
3523  * Perform a switchover by:
3524  *
3525  *  - stopping current primary node
3526  *  - promoting this standby node to primary
3527  *  - forcing the previous primary node to follow this node
3528  *
3529  * Where running and not already paused, repmgrd will be paused (and
3530  * subsequently unpaused), unless --repmgrd-no-pause provided.
3531  *
3532  * Note that this operation can only be considered to have failed completely
3533  * ("ERR_SWITCHOVER_FAIL") in these situations:
3534  *
3535  *  - the prerequisites for a switchover are not met
3536  *  - the demotion candidate could not be shut down cleanly
3537  *  - the promotion candidate could not be promoted
3538  *
3539  * All other failures (demotion candidate did not connect to new primary etc.)
3540  * are considered partial failures ("ERR_SWITCHOVER_INCOMPLETE")
3541  *
3542  * TODO:
3543  *  - make connection test timeouts/intervals configurable (see below)
3544  */
3545 
3546 
3547 void
do_standby_switchover(void)3548 do_standby_switchover(void)
3549 {
3550 	PGconn	   *local_conn = NULL;
3551 	PGconn	   *superuser_conn = NULL;
3552 	PGconn	   *remote_conn = NULL;
3553 
3554 	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
3555 
3556 	/* the remote server is the primary to be demoted */
3557 	char		remote_conninfo[MAXCONNINFO] = "";
3558 	char		remote_host[MAXLEN] = "";
3559 	int			remote_node_id = UNKNOWN_NODE_ID;
3560 	t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
3561 	int 		remote_repmgr_version = UNKNOWN_REPMGR_VERSION_NUM;
3562 
3563 	RecordStatus record_status = RECORD_NOT_FOUND;
3564 	RecoveryType recovery_type = RECTYPE_UNKNOWN;
3565 	PQExpBufferData remote_command_str;
3566 	PQExpBufferData command_output;
3567 	PQExpBufferData node_rejoin_options;
3568 	PQExpBufferData errmsg;
3569 	PQExpBufferData detailmsg;
3570 
3571 	int			r,
3572 				i;
3573 	bool		command_success = false;
3574 	bool		shutdown_success = false;
3575 	bool		dry_run_success = true;
3576 
3577 	/* this flag will use to generate the final message generated */
3578 	bool		switchover_success = true;
3579 
3580 	XLogRecPtr	remote_last_checkpoint_lsn = InvalidXLogRecPtr;
3581 	ReplInfo	replication_info;
3582 
3583 	/* store list of configuration files on the demotion candidate */
3584 	KeyValueList remote_config_files = {NULL, NULL};
3585 
3586 	NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
3587 	SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
3588 
3589 	/* this will be calculated as max_wal_senders - COUNT(*) FROM pg_stat_replication */
3590 	int			available_wal_senders = 0;
3591 
3592 	t_event_info event_info = T_EVENT_INFO_INITIALIZER;
3593 
3594 	/* used for handling repmgrd pause/unpause */
3595 	NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
3596 	RepmgrdInfo **repmgrd_info = NULL;
3597 	int			repmgrd_running_count = 0;
3598 
3599 	/* number of free walsenders required on promotion candidate
3600 	 * (at least one will be required for the demotion candidate)
3601 	 */
3602 	sibling_nodes_stats.min_required_wal_senders = 1;
3603 
3604 	/*
3605 	 * SANITY CHECKS
3606 	 *
3607 	 * We'll be doing a bunch of operations on the remote server (primary to
3608 	 * be demoted) - careful checks needed before proceding.
3609 	 */
3610 
3611 	local_conn = establish_db_connection(config_file_options.conninfo, true);
3612 
3613 	/* Verify that standby is a supported server version */
3614 	(void) check_server_version(local_conn, "standby", true, NULL);
3615 
3616 	record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
3617 	if (record_status != RECORD_FOUND)
3618 	{
3619 		log_error(_("unable to retrieve node record for node %i"),
3620 				  config_file_options.node_id);
3621 
3622 		PQfinish(local_conn);
3623 
3624 		exit(ERR_DB_QUERY);
3625 	}
3626 
3627 	if (!is_streaming_replication(local_node_record.type))
3628 	{
3629 		log_error(_("switchover can only performed with streaming replication"));
3630 		PQfinish(local_conn);
3631 		exit(ERR_BAD_CONFIG);
3632 	}
3633 
3634 	if (runtime_options.dry_run == true)
3635 	{
3636 		log_notice(_("checking switchover on node \"%s\" (ID: %i) in --dry-run mode"),
3637 				   local_node_record.node_name,
3638 				   local_node_record.node_id);
3639 	}
3640 	else
3641 	{
3642 		log_notice(_("executing switchover on node \"%s\" (ID: %i)"),
3643 				   local_node_record.node_name,
3644 				   local_node_record.node_id);
3645 	}
3646 
3647 	/*
3648 	 * If -S/--superuser option provided, check that a superuser connection can be made
3649 	 * to the local database. We'll check the remote superuser connection later,
3650 	 */
3651 
3652 	if (runtime_options.superuser[0] != '\0')
3653 	{
3654 		if (runtime_options.dry_run == true)
3655 		{
3656 			log_info(_("validating connection to local database for superuser \"%s\""),
3657 					 runtime_options.superuser);
3658 		}
3659 
3660 		superuser_conn = establish_db_connection_with_replacement_param(
3661 			config_file_options.conninfo,
3662 			"user",
3663 			runtime_options.superuser, false);
3664 
3665 		if (PQstatus(superuser_conn) != CONNECTION_OK)
3666 		{
3667 			log_error(_("unable to connect to local database \"%s\" as provided superuser \"%s\""),
3668 					  PQdb(superuser_conn),
3669 					  runtime_options.superuser);
3670 			exit(ERR_BAD_CONFIG);
3671 		}
3672 
3673 		if (is_superuser_connection(superuser_conn, NULL) == false)
3674 		{
3675 			log_error(_("connection established to local database \"%s\" for provided superuser \"%s\" is not a superuser connection"),
3676 					  PQdb(superuser_conn),
3677 					  runtime_options.superuser);
3678 			exit(ERR_BAD_CONFIG);
3679 		}
3680 
3681 		if (runtime_options.dry_run == true)
3682 		{
3683 			log_info(_("successfully established connection to local database \"%s\" for provided superuser \"%s\""),
3684 					 PQdb(superuser_conn),
3685 					 runtime_options.superuser);
3686 		}
3687 
3688 	}
3689 
3690 	/*
3691 	 * Warn if no superuser connection is available.
3692 	 */
3693 	if (superuser_conn == NULL && is_superuser_connection(local_conn, NULL) == false)
3694 	{
3695 		log_warning(_("no superuser connection available"));
3696 		log_detail(_("it is recommended to perform switchover operations with a database superuser"));
3697 		log_hint(_("provide the name of a superuser with -S/--superuser"));
3698 	}
3699 
3700 	/* Check that this is a standby */
3701 	recovery_type = get_recovery_type(local_conn);
3702 	if (recovery_type != RECTYPE_STANDBY)
3703 	{
3704 		log_error(_("switchover must be executed from the standby node to be promoted"));
3705 		if (recovery_type == RECTYPE_PRIMARY)
3706 		{
3707 			log_detail(_("this node (ID: %i) is the primary"),
3708 					   local_node_record.node_id);
3709 		}
3710 		PQfinish(local_conn);
3711 
3712 		exit(ERR_SWITCHOVER_FAIL);
3713 	}
3714 
3715 	/*
3716 	 * Check that the local replication configuration file is owned by the data
3717 	 * directory owner.
3718 	 *
3719 	 * For PostgreSQL 11 and earlier, if PostgreSQL is not able to rename "recovery.conf",
3720 	 * promotion will fail.
3721 	 *
3722 	 * For PostgreSQL 12 and later, promotion will not fail even if "postgresql.auto.conf"
3723 	 * is owned by another user, but we'll check just in case, as it is indicative of a
3724 	 * poorly configured setup. In any case we will need to check "postgresql.auto.conf" on
3725 	 * the demotion candidate as the rejoin will fail if we are unable to to write to that.
3726 	 */
3727 
3728 	initPQExpBuffer(&errmsg);
3729 	initPQExpBuffer(&detailmsg);
3730 
3731 	if (check_replication_config_owner(PQserverVersion(local_conn),
3732 									   config_file_options.data_directory,
3733 									   &errmsg, &detailmsg) == false)
3734 	{
3735 		log_error("%s", errmsg.data);
3736 		log_detail("%s", detailmsg.data);
3737 
3738 		termPQExpBuffer(&errmsg);
3739 		termPQExpBuffer(&detailmsg);
3740 
3741 		PQfinish(local_conn);
3742 		exit(ERR_BAD_CONFIG);
3743 	}
3744 
3745 	termPQExpBuffer(&errmsg);
3746 	termPQExpBuffer(&detailmsg);
3747 
3748 	/* check remote server connection and retrieve its record */
3749 	remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo);
3750 
3751 	if (PQstatus(remote_conn) != CONNECTION_OK)
3752 	{
3753 		log_error(_("unable to connect to current primary node"));
3754 		log_hint(_("check that the cluster is correctly configured and this standby is registered"));
3755 		PQfinish(local_conn);
3756 		exit(ERR_DB_CONN);
3757 	}
3758 
3759 	record_status = get_node_record(remote_conn, remote_node_id, &remote_node_record);
3760 
3761 	if (record_status != RECORD_FOUND)
3762 	{
3763 		log_error(_("unable to retrieve node record for current primary (node %i)"),
3764 				  remote_node_id);
3765 
3766 		PQfinish(local_conn);
3767 		PQfinish(remote_conn);
3768 
3769 		exit(ERR_DB_QUERY);
3770 	}
3771 
3772 	log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
3773 
3774 	/*
3775 	 * Check this standby is attached to the demotion candidate
3776 	 */
3777 
3778 	if (local_node_record.upstream_node_id != remote_node_record.node_id)
3779 	{
3780 		log_error(_("local node \"%s\" (ID: %i) is not a downstream of demotion candidate primary \"%s\" (ID: %i)"),
3781 				  local_node_record.node_name,
3782 				  local_node_record.node_id,
3783 				  remote_node_record.node_name,
3784 				  remote_node_record.node_id);
3785 
3786 		if (local_node_record.upstream_node_id == UNKNOWN_NODE_ID)
3787 			log_detail(_("local node has no registered upstream node"));
3788 		else
3789 			log_detail(_("registered upstream node ID is %i"),
3790 					   local_node_record.upstream_node_id);
3791 
3792 		log_hint(_("execute \"repmgr standby register --force\" to update the local node's metadata"));
3793 
3794 		PQfinish(local_conn);
3795 		PQfinish(remote_conn);
3796 
3797 		exit(ERR_BAD_CONFIG);
3798 	}
3799 
3800 	if (is_downstream_node_attached(remote_conn, local_node_record.node_name, NULL) != NODE_ATTACHED)
3801 	{
3802 		log_error(_("local node \"%s\" (ID: %i) is not attached to demotion candidate \"%s\" (ID: %i)"),
3803 				  local_node_record.node_name,
3804 				  local_node_record.node_id,
3805 				  remote_node_record.node_name,
3806 				  remote_node_record.node_id);
3807 
3808 		PQfinish(local_conn);
3809 		PQfinish(remote_conn);
3810 
3811 		exit(ERR_BAD_CONFIG);
3812 	}
3813 
3814 	/*
3815 	 * In PostgreSQL 12 and earlier, check that WAL replay on the standby
3816 	 * is *not* paused, as that could lead to unexpected behaviour when the
3817 	 * standby is promoted.
3818 	 *
3819 	 * For switchover we'll mandate that WAL replay *must not* be paused.
3820 	 * For a promote operation we can proceed if WAL replay is paused and
3821 	 * there is no more available WAL to be replayed, as we can be sure the
3822 	 * primary is down already, but in a switchover context there's
3823 	 * potentially a window for more WAL to be received before we shut down
3824 	 * the primary completely.
3825 	 */
3826 
3827 	if (PQserverVersion(local_conn) < 130000 && is_wal_replay_paused(local_conn, false) == true)
3828 	{
3829 		ReplInfo 	replication_info;
3830 		init_replication_info(&replication_info);
3831 
3832 		if (get_replication_info(local_conn, STANDBY, &replication_info) == false)
3833 		{
3834 			log_error(_("unable to retrieve replication information from local node"));
3835 			PQfinish(local_conn);
3836 			exit(ERR_SWITCHOVER_FAIL);
3837 		}
3838 
3839 		log_error(_("WAL replay is paused on this node and it is not safe to proceed"));
3840 		log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
3841 				   format_lsn(replication_info.last_wal_replay_lsn),
3842 				   format_lsn(replication_info.last_wal_receive_lsn));
3843 
3844 		if (PQserverVersion(local_conn) >= 100000)
3845 			log_hint(_("execute \"pg_wal_replay_resume()\" to unpause WAL replay"));
3846 		else
3847 			log_hint(_("execute \"pg_xlog_replay_resume()\" to unpause WAL replay"));
3848 
3849 		PQfinish(local_conn);
3850 		exit(ERR_SWITCHOVER_FAIL);
3851 	}
3852 
3853 
3854 	/*
3855 	 * Check that there are no exclusive backups running on the primary.
3856 	 * We don't want to end up damaging the backup and also leaving the server in an
3857 	 * state where there's control data saying it's in backup mode but there's no
3858 	 * backup_label in PGDATA.
3859 	 * If the user wants to do the switchover anyway, they should first stop the
3860 	 * backup that's running.
3861 	 */
3862 	if (server_in_exclusive_backup_mode(remote_conn) != BACKUP_STATE_NO_BACKUP)
3863 	{
3864 		log_error(_("unable to perform a switchover while primary server is in exclusive backup mode"));
3865 		log_hint(_("stop backup before attempting the switchover"));
3866 
3867 		PQfinish(local_conn);
3868 		PQfinish(remote_conn);
3869 
3870 		exit(ERR_SWITCHOVER_FAIL);
3871 	}
3872 
3873 	/* this will fill the %p event notification parameter */
3874 	event_info.node_id = remote_node_record.node_id;
3875 
3876 	/* keep a running total of how many nodes will require a replication slot */
3877 	if (remote_node_record.slot_name[0] != '\0')
3878 	{
3879 		sibling_nodes_stats.min_required_free_slots++;
3880 	}
3881 
3882 	/*
3883 	 * If --force-rewind specified, check pg_rewind can be used, and
3884 	 * pre-emptively fetch the list of configuration files which should be
3885 	 * archived
3886 	 */
3887 
3888 	if (runtime_options.force_rewind_used == true)
3889 	{
3890 		PQExpBufferData reason;
3891 		PQExpBufferData msg;
3892 
3893 		initPQExpBuffer(&reason);
3894 
3895 		if (can_use_pg_rewind(remote_conn, config_file_options.data_directory, &reason) == false)
3896 		{
3897 			log_error(_("--force-rewind specified but pg_rewind cannot be used"));
3898 			log_detail("%s", reason.data);
3899 			termPQExpBuffer(&reason);
3900 			PQfinish(local_conn);
3901 			PQfinish(remote_conn);
3902 
3903 			exit(ERR_BAD_CONFIG);
3904 		}
3905 		termPQExpBuffer(&reason);
3906 
3907 		initPQExpBuffer(&msg);
3908 		appendPQExpBufferStr(&msg,
3909 							 _("prerequisites for using pg_rewind are met"));
3910 
3911 		if (runtime_options.dry_run == true)
3912 		{
3913 			log_info("%s", msg.data);
3914 		}
3915 		else
3916 		{
3917 			log_verbose(LOG_INFO, "%s", msg.data);
3918 		}
3919 		termPQExpBuffer(&msg);
3920 
3921 		get_datadir_configuration_files(remote_conn, &remote_config_files);
3922 	}
3923 
3924 
3925 	/*
3926 	 * Check that we can connect by SSH to the remote (current primary) server
3927 	 */
3928 	get_conninfo_value(remote_conninfo, "host", remote_host);
3929 
3930 	r = test_ssh_connection(remote_host, runtime_options.remote_user);
3931 
3932 	if (r != 0)
3933 	{
3934 		log_error(_("unable to connect via SSH to host \"%s\", user \"%s\""),
3935 				  remote_host, runtime_options.remote_user);
3936 		PQfinish(remote_conn);
3937 		PQfinish(local_conn);
3938 
3939 		exit(ERR_BAD_CONFIG);
3940 	}
3941 	else
3942 	{
3943 		PQExpBufferData msg;
3944 
3945 		initPQExpBuffer(&msg);
3946 
3947 		appendPQExpBuffer(&msg,
3948 						  _("SSH connection to host \"%s\" succeeded"),
3949 						  remote_host);
3950 
3951 		if (runtime_options.dry_run == true)
3952 		{
3953 			log_info("%s", msg.data);
3954 		}
3955 		else
3956 		{
3957 			log_verbose(LOG_INFO, "%s", msg.data);
3958 		}
3959 
3960 		termPQExpBuffer(&msg);
3961 	}
3962 
3963 	/* check remote repmgr binary can be found */
3964 	initPQExpBuffer(&remote_command_str);
3965 	make_remote_repmgr_path(&remote_command_str, &remote_node_record);
3966 
3967 	/*
3968 	 * Here we're executing an arbitrary repmgr command which is guaranteed to
3969 	 * succeed if repmgr is executed. We'll extract the actual version number in the
3970 	 * next step.
3971 	 */
3972 	appendPQExpBufferStr(&remote_command_str, "--version >/dev/null 2>&1 && echo \"1\" || echo \"0\"");
3973 	initPQExpBuffer(&command_output);
3974 	command_success = remote_command(remote_host,
3975 									 runtime_options.remote_user,
3976 									 remote_command_str.data,
3977 									 config_file_options.ssh_options,
3978 									 &command_output);
3979 
3980 	termPQExpBuffer(&remote_command_str);
3981 
3982 	if (command_success == false || command_output.data[0] == '0')
3983 	{
3984 		PQExpBufferData hint;
3985 
3986 		log_error(_("unable to execute \"%s\" on \"%s\""),
3987 				  progname(), remote_host);
3988 
3989 		if (strlen(command_output.data) > 2)
3990 			log_detail("%s", command_output.data);
3991 
3992 		termPQExpBuffer(&command_output);
3993 
3994 		initPQExpBuffer(&hint);
3995 		appendPQExpBufferStr(&hint,
3996 							 _("check \"pg_bindir\" is set to the correct path in \"repmgr.conf\"; current value: "));
3997 
3998 		if (strlen(config_file_options.pg_bindir))
3999 		{
4000 			appendPQExpBuffer(&hint,
4001 							  "\"%s\"", config_file_options.pg_bindir);
4002 		}
4003 		else
4004 		{
4005 			appendPQExpBufferStr(&hint,
4006 								 "(not set)");
4007 		}
4008 
4009 		log_hint("%s", hint.data);
4010 
4011 		termPQExpBuffer(&hint);
4012 
4013 		PQfinish(remote_conn);
4014 		PQfinish(local_conn);
4015 
4016 		exit(ERR_BAD_CONFIG);
4017 	}
4018 
4019 	termPQExpBuffer(&command_output);
4020 
4021 	/*
4022 	 * Now we're sure the binary can be executed, fetch its version number.
4023 	 */
4024 	initPQExpBuffer(&remote_command_str);
4025 	make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4026 
4027 	appendPQExpBufferStr(&remote_command_str, "--version 2>/dev/null");
4028 	initPQExpBuffer(&command_output);
4029 	command_success = remote_command(remote_host,
4030 									 runtime_options.remote_user,
4031 									 remote_command_str.data,
4032 									 config_file_options.ssh_options,
4033 									 &command_output);
4034 
4035 	termPQExpBuffer(&remote_command_str);
4036 
4037 	if (command_success == true)
4038 	{
4039 		remote_repmgr_version = parse_repmgr_version(command_output.data);
4040 		if (remote_repmgr_version == UNKNOWN_REPMGR_VERSION_NUM)
4041 		{
4042 			log_error(_("unable to parse \"%s\"'s reported version on \"%s\""),
4043 					  progname(), remote_host);
4044 			PQfinish(remote_conn);
4045 			PQfinish(local_conn);
4046 			exit(ERR_BAD_CONFIG);
4047 		}
4048 		log_debug(_("\"%s\" version on \"%s\" is %i"),
4049 				  progname(), remote_host, remote_repmgr_version );
4050 
4051 	}
4052 	else
4053 	{
4054 		log_error(_("unable to execute \"%s\" on \"%s\""),
4055 				  progname(), remote_host);
4056 
4057 		if (strlen(command_output.data) > 2)
4058 			log_detail("%s", command_output.data);
4059 
4060 		termPQExpBuffer(&command_output);
4061 
4062 		PQfinish(remote_conn);
4063 		PQfinish(local_conn);
4064 
4065 		exit(ERR_BAD_CONFIG);
4066 	}
4067 
4068 	termPQExpBuffer(&command_output);
4069 
4070 	/*
4071 	 * Check if the expected remote repmgr.conf file exists
4072 	 */
4073 	initPQExpBuffer(&remote_command_str);
4074 
4075 	appendPQExpBuffer(&remote_command_str,
4076 					  "test -f %s && echo 1 || echo 0",
4077 					  remote_node_record.config_file);
4078 	initPQExpBuffer(&command_output);
4079 
4080 	command_success = remote_command(remote_host,
4081 									 runtime_options.remote_user,
4082 									 remote_command_str.data,
4083 									 config_file_options.ssh_options,
4084 									 &command_output);
4085 
4086 	termPQExpBuffer(&remote_command_str);
4087 
4088 	if (command_success == false || command_output.data[0] == '0')
4089 	{
4090 		log_error(_("expected configuration file not found on the demotion candiate \"%s\" (ID: %i)"),
4091 				  remote_node_record.node_name,
4092 				  remote_node_record.node_id);
4093 		log_detail(_("registered configuration file is \"%s\""),
4094 				   remote_node_record.config_file);
4095 		log_hint(_("ensure the configuration file is in the expected location, or re-register \"%s\" to update the configuration file location"),
4096 				  remote_node_record.node_name);
4097 
4098 		PQfinish(remote_conn);
4099 		PQfinish(local_conn);
4100 
4101 		termPQExpBuffer(&command_output);
4102 
4103 		exit(ERR_BAD_CONFIG);
4104 	}
4105 
4106 
4107 	/*
4108 	 * Sanity-check remote "data_directory" is correctly configured in repmgr.conf.
4109 	 *
4110 	 * This is important as we'll need to be able to run "repmgr node status" on the data
4111 	 * directory after the remote (demotion candidate) has shut down.
4112 	 */
4113 
4114 	initPQExpBuffer(&remote_command_str);
4115 	make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4116 
4117 	/*
4118 	 * --data-directory-config is available from repmgr 4.3; it will fail
4119 	 * if the remote repmgr is an earlier version, but the version should match
4120 	 * anyway.
4121 	 */
4122 	appendPQExpBufferStr(&remote_command_str, "node check --data-directory-config --optformat -LINFO 2>/dev/null");
4123 
4124 	initPQExpBuffer(&command_output);
4125 	command_success = remote_command(remote_host,
4126 									 runtime_options.remote_user,
4127 									 remote_command_str.data,
4128 									 config_file_options.ssh_options,
4129 									 &command_output);
4130 
4131 	termPQExpBuffer(&remote_command_str);
4132 
4133 	if (command_success == false)
4134 	{
4135 		log_error(_("unable to execute \"%s node check --data-directory-config\" on \"%s\":"),
4136 				  progname(), remote_host);
4137 		log_detail("%s", command_output.data);
4138 
4139 		PQfinish(remote_conn);
4140 		PQfinish(local_conn);
4141 
4142 		termPQExpBuffer(&command_output);
4143 
4144 		exit(ERR_BAD_CONFIG);
4145 	}
4146 
4147 	/* check remote repmgr has the data directory correctly configured */
4148 	{
4149 		t_remote_error_type remote_error = REMOTE_ERROR_NONE;
4150 
4151 		if (parse_data_directory_config(command_output.data, &remote_error) == false)
4152 		{
4153 			if (remote_error != REMOTE_ERROR_NONE)
4154 			{
4155 				log_error(_("unable to run data directory check on node \"%s\" (ID: %i)"),
4156 							remote_node_record.node_name,
4157 							remote_node_record.node_id);
4158 
4159 				if (remote_error == REMOTE_ERROR_DB_CONNECTION)
4160 				{
4161 					PQExpBufferData ssh_command;
4162 
4163 					/* can happen if the connection configuration is not consistent across nodes */
4164 					log_detail(_("an error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4165 							   remote_node_record.node_name,
4166 							   remote_node_record.node_id);
4167 
4168 					/* output a helpful hint to help diagnose the issue */
4169 					initPQExpBuffer(&remote_command_str);
4170 					make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4171 
4172 					appendPQExpBufferStr(&remote_command_str, "node check --db-connection");
4173 
4174 					initPQExpBuffer(&ssh_command);
4175 
4176 					make_remote_command(remote_host,
4177 										runtime_options.remote_user,
4178 										remote_command_str.data,
4179 										config_file_options.ssh_options,
4180 										&ssh_command);
4181 
4182 					log_hint(_("diagnose with:\n  %s"), ssh_command.data);
4183 
4184 					termPQExpBuffer(&remote_command_str);
4185 					termPQExpBuffer(&ssh_command);
4186 				}
4187 				else if (remote_error == REMOTE_ERROR_CONNINFO_PARSE)
4188 				{
4189 					/* highly unlikely */
4190 					log_detail(_("an error was encountered when parsing the \"conninfo\" parameter in \"rempgr.conf\" on node \"%s\" (ID: %i)"),
4191 							   remote_node_record.node_name,
4192 							   remote_node_record.node_id);
4193 				}
4194 				else
4195 				{
4196 					log_detail(_("an unknown error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4197 							   remote_node_record.node_name,
4198 							   remote_node_record.node_id);
4199 				}
4200 			}
4201 			else
4202 			{
4203 				log_error(_("\"data_directory\" parameter in \"repmgr.conf\" on \"%s\" (ID: %i) is incorrectly configured"),
4204 						  remote_node_record.node_name,
4205 						  remote_node_record.node_id);
4206 
4207 				log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" (ID: %i) to diagnose the issue"),
4208 						 remote_node_record.node_name,
4209 						 remote_node_record.node_id);
4210 
4211 			}
4212 
4213 			PQfinish(remote_conn);
4214 			PQfinish(local_conn);
4215 
4216 			termPQExpBuffer(&command_output);
4217 
4218 			exit(ERR_BAD_CONFIG);
4219 		}
4220 	}
4221 
4222 	termPQExpBuffer(&command_output);
4223 
4224 	if (runtime_options.dry_run == true)
4225 	{
4226 		log_info(_("able to execute \"%s\" on remote host \"%s\""),
4227 				 progname(),
4228 				 remote_host);
4229 	}
4230 
4231 	/*
4232 	 * If -S/--superuser option provided, check that a superuser connection can be made
4233 	 * to the local database on the remote node.
4234 	 */
4235 
4236 	if (runtime_options.superuser[0] != '\0')
4237 	{
4238 		CheckStatus status = CHECK_STATUS_UNKNOWN;
4239 
4240 		initPQExpBuffer(&remote_command_str);
4241 		make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4242 
4243 		appendPQExpBuffer(&remote_command_str,
4244 						  "node check --db-connection --superuser=%s --optformat -LINFO 2>/dev/null",
4245 						  runtime_options.superuser);
4246 
4247 		initPQExpBuffer(&command_output);
4248 		command_success = remote_command(remote_host,
4249 										 runtime_options.remote_user,
4250 										 remote_command_str.data,
4251 										 config_file_options.ssh_options,
4252 										 &command_output);
4253 
4254 		termPQExpBuffer(&remote_command_str);
4255 
4256 		if (command_success == false)
4257 		{
4258 			log_error(_("unable to execute \"%s node check --db-connection\" on \"%s\":"),
4259 					  progname(), remote_host);
4260 			log_detail("%s", command_output.data);
4261 
4262 			PQfinish(remote_conn);
4263 			PQfinish(local_conn);
4264 
4265 			termPQExpBuffer(&command_output);
4266 
4267 			exit(ERR_BAD_CONFIG);
4268 		}
4269 
4270 		status = parse_db_connection(command_output.data);
4271 
4272 		if (status != CHECK_STATUS_OK)
4273 		{
4274 			PQExpBufferData ssh_command;
4275 			log_error(_("unable to connect locally as superuser \"%s\" on node \"%s\" (ID: %i)"),
4276 					  runtime_options.superuser,
4277 					  remote_node_record.node_name,
4278 					  remote_node_record.node_id);
4279 
4280 			/* output a helpful hint to help diagnose the issue */
4281 			initPQExpBuffer(&remote_command_str);
4282 			make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4283 
4284 			appendPQExpBuffer(&remote_command_str,
4285 							  "node check --db-connection --superuser=%s",
4286 							  runtime_options.superuser);
4287 
4288 			initPQExpBuffer(&ssh_command);
4289 
4290 			make_remote_command(remote_host,
4291 								runtime_options.remote_user,
4292 								remote_command_str.data,
4293 								config_file_options.ssh_options,
4294 								&ssh_command);
4295 
4296 			log_hint(_("diagnose with:\n  %s"), ssh_command.data);
4297 
4298 			termPQExpBuffer(&remote_command_str);
4299 			termPQExpBuffer(&ssh_command);
4300 			exit(ERR_DB_CONN);
4301 		}
4302 
4303 
4304 
4305 		termPQExpBuffer(&command_output);
4306 	}
4307 
4308 	/*
4309 	 * For PostgreSQL 12 and later, check "postgresql.auto.conf" is owned by the
4310 	 * correct user, otherwise the node will probably not be able to attach to
4311 	 * the promotion candidate (and is a sign of bad configuration anyway) so we
4312 	 * will complain vocally.
4313 	 *
4314 	 * We'll only do this if we've determined the remote repmgr binary is new
4315 	 * enough to have the "node check --replication-config-owner" option.
4316 	 */
4317 
4318 	if (PQserverVersion(local_conn) >= 120000 && remote_repmgr_version >= 50100)
4319 	{
4320 		initPQExpBuffer(&remote_command_str);
4321 		make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4322 
4323 		appendPQExpBufferStr(&remote_command_str, "node check --replication-config-owner --optformat -LINFO 2>/dev/null");
4324 
4325 		initPQExpBuffer(&command_output);
4326 		command_success = remote_command(remote_host,
4327 										 runtime_options.remote_user,
4328 										 remote_command_str.data,
4329 										 config_file_options.ssh_options,
4330 										 &command_output);
4331 
4332 		termPQExpBuffer(&remote_command_str);
4333 
4334 		if (command_success == false)
4335 		{
4336 			log_error(_("unable to execute \"%s node check --replication-config-owner\" on \"%s\":"),
4337 					  progname(), remote_host);
4338 			log_detail("%s", command_output.data);
4339 
4340 			PQfinish(remote_conn);
4341 			PQfinish(local_conn);
4342 
4343 			termPQExpBuffer(&command_output);
4344 
4345 			exit(ERR_BAD_CONFIG);
4346 		}
4347 
4348 		if (parse_replication_config_owner(command_output.data) == false)
4349 		{
4350 			log_error(_("\"%s\" file on \"%s\" has incorrect ownership"),
4351 					  PG_AUTOCONF_FILENAME,
4352 					  remote_node_record.node_name);
4353 
4354 			log_hint(_("check the file has the same owner/group as the data directory"));
4355 
4356 			PQfinish(remote_conn);
4357 			PQfinish(local_conn);
4358 
4359 			termPQExpBuffer(&command_output);
4360 
4361 			exit(ERR_BAD_CONFIG);
4362 		}
4363 
4364 		termPQExpBuffer(&command_output);
4365 	}
4366 
4367 
4368 	/*
4369 	 * populate local node record with current state of various replication-related
4370 	 * values, so we can check for sufficient walsenders and replication slots
4371 	 */
4372 	get_node_replication_stats(local_conn, &local_node_record);
4373 
4374 	available_wal_senders = local_node_record.max_wal_senders -
4375 		local_node_record.attached_wal_receivers;
4376 
4377 	/*
4378 	 * Get list of sibling nodes; if --siblings-follow specified,
4379 	 * check they're reachable; if not, the list will be used to warn
4380 	 * about nodes which will remain attached to the demotion candidate
4381 	 */
4382 	get_active_sibling_node_records(local_conn,
4383 									local_node_record.node_id,
4384 									local_node_record.upstream_node_id,
4385 									&sibling_nodes);
4386 
4387 	if (check_sibling_nodes(&sibling_nodes, &sibling_nodes_stats) == false)
4388 	{
4389 		PQfinish(local_conn);
4390 		exit(ERR_BAD_CONFIG);
4391 	}
4392 
4393 
4394 	/*
4395 	 * check there are sufficient free walsenders - obviously there's potential
4396 	 * for a later race condition if some walsenders come into use before the
4397 	 * switchover operation gets around to attaching the sibling nodes, but
4398 	 * this should catch any actual existing configuration issue (and if anyone's
4399 	 * performing a switchover in such an unstable environment, they only have
4400 	 * themselves to blame).
4401 	 */
4402 	if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats, &dry_run_success) == false)
4403 	{
4404 		if (runtime_options.dry_run == false)
4405 		{
4406 			PQfinish(local_conn);
4407 			exit(ERR_BAD_CONFIG);
4408 		}
4409 	}
4410 
4411 
4412 	/* check demotion candidate can make replication connection to promotion candidate */
4413 	{
4414 		initPQExpBuffer(&remote_command_str);
4415 		make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4416 		appendPQExpBuffer(&remote_command_str,
4417 						  "node check --remote-node-id=%i --replication-connection",
4418 						  local_node_record.node_id);
4419 
4420 		initPQExpBuffer(&command_output);
4421 
4422 		command_success = remote_command(remote_host,
4423 										 runtime_options.remote_user,
4424 										 remote_command_str.data,
4425 										 config_file_options.ssh_options,
4426 										 &command_output);
4427 
4428 		termPQExpBuffer(&remote_command_str);
4429 
4430 		if (command_success == true)
4431 		{
4432 			ConnectionStatus conn_status = parse_remote_node_replication_connection(command_output.data);
4433 
4434 			switch(conn_status)
4435 			{
4436 				case CONN_OK:
4437 					if (runtime_options.dry_run == true)
4438 					{
4439 						log_info(_("demotion candidate is able to make replication connection to promotion candidate"));
4440 					}
4441 					break;
4442 				case CONN_BAD:
4443 					log_error(_("demotion candidate is unable to make replication connection to promotion candidate"));
4444 					exit(ERR_BAD_CONFIG);
4445 					break;
4446 				default:
4447 					log_error(_("unable to determine whether demotion candidate is able to make replication connection to promotion candidate"));
4448 					exit(ERR_BAD_CONFIG);
4449 					break;
4450 			}
4451 
4452 			termPQExpBuffer(&command_output);
4453 		}
4454 	}
4455 
4456 	/* check archive/replication status */
4457 	{
4458 		int			lag_seconds = 0;
4459 		CheckStatus status = CHECK_STATUS_UNKNOWN;
4460 
4461 		/* archive status - check when "archive_mode" is activated */
4462 
4463 		if (guc_set(remote_conn, "archive_mode", "!=", "off"))
4464 		{
4465 			int			files = 0;
4466 			int			threshold = 0;
4467 			t_remote_error_type remote_error = REMOTE_ERROR_NONE;
4468 
4469 			initPQExpBuffer(&remote_command_str);
4470 			make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4471 			appendPQExpBufferStr(&remote_command_str,
4472 								 "node check --terse -LERROR --archive-ready --optformat");
4473 
4474 			initPQExpBuffer(&command_output);
4475 
4476 			command_success = remote_command(remote_host,
4477 											 runtime_options.remote_user,
4478 											 remote_command_str.data,
4479 											 config_file_options.ssh_options,
4480 											 &command_output);
4481 
4482 			termPQExpBuffer(&remote_command_str);
4483 
4484 			if (command_success == true)
4485 			{
4486 				status = parse_node_check_archiver(command_output.data, &files, &threshold, &remote_error);
4487 			}
4488 
4489 			termPQExpBuffer(&command_output);
4490 
4491 			switch (status)
4492 			{
4493 				case CHECK_STATUS_UNKNOWN:
4494 					{
4495 						if (runtime_options.force == false || remote_error == REMOTE_ERROR_DB_CONNECTION)
4496 						{
4497 							log_error(_("unable to check number of pending archive files on demotion candidate \"%s\""),
4498 									  remote_node_record.node_name);
4499 
4500 							if (remote_error == REMOTE_ERROR_DB_CONNECTION)
4501 								log_detail(_("an error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
4502 										   remote_node_record.node_name,
4503 										   remote_node_record.node_id);
4504 							else
4505 								log_hint(_("use -F/--force to continue anyway"));
4506 
4507 							PQfinish(remote_conn);
4508 							PQfinish(local_conn);
4509 
4510 							exit(ERR_SWITCHOVER_FAIL);
4511 						}
4512 
4513 						log_warning(_("unable to check number of pending archive files on demotion candidate \"%s\""),
4514 									remote_node_record.node_name);
4515 						log_notice(_("-F/--force set, continuing with switchover"));
4516 					}
4517 					break;
4518 
4519 				case CHECK_STATUS_CRITICAL:
4520 					{
4521 						if (runtime_options.force == false)
4522 						{
4523 							log_error(_("number of pending archive files on demotion candidate \"%s\" is critical"),
4524 									  remote_node_record.node_name);
4525 							log_detail(_("%i pending archive files (critical threshold: %i)"),
4526 									   files, threshold);
4527 							log_hint(_("PostgreSQL will not shut down until all files are archived; use -F/--force to continue anyway"));
4528 							PQfinish(remote_conn);
4529 							PQfinish(local_conn);
4530 
4531 							exit(ERR_SWITCHOVER_FAIL);
4532 						}
4533 
4534 						log_warning(_("number of pending archive files on demotion candidate \"%s\" exceeds the critical threshold"),
4535 									remote_node_record.node_name);
4536 						log_detail(_("%i pending archive files (critical threshold: %i)"),
4537 								   files, threshold);
4538 						log_notice(_("-F/--force set, continuing with switchover"));
4539 					}
4540 					break;
4541 
4542 				case CHECK_STATUS_WARNING:
4543 					{
4544 						log_warning(_("number of pending archive files on demotion candidate \"%s\" exceeds the warning threshold"),
4545 									remote_node_record.node_name);
4546 						log_detail(_("%i pending archive files (warning threshold: %i)"),
4547 								   files, threshold);
4548 						log_hint(_("PostgreSQL will not shut down until all files are archived"));
4549 					}
4550 					break;
4551 
4552 				case CHECK_STATUS_OK:
4553 					{
4554 						PQExpBufferData msg;
4555 
4556 						initPQExpBuffer(&msg);
4557 
4558 						appendPQExpBuffer(&msg,
4559 										  _("%i pending archive files"),
4560 										  files);
4561 
4562 						if (runtime_options.dry_run == true)
4563 						{
4564 							log_info("%s", msg.data);
4565 						}
4566 						else
4567 						{
4568 							log_verbose(LOG_INFO, "%s", msg.data);
4569 						}
4570 
4571 						termPQExpBuffer(&msg);
4572 					}
4573 			}
4574 
4575 		}
4576 		else
4577 		{
4578 			char	   *msg = _("archive mode is \"off\"");
4579 
4580 			if (runtime_options.dry_run == true)
4581 			{
4582 				log_info("%s", msg);
4583 			}
4584 			else
4585 			{
4586 				log_verbose(LOG_INFO, "%s", msg);
4587 			}
4588 		}
4589 
4590 		/*
4591 		 * check replication lag on promotion candidate (TODO: check on all
4592 		 * nodes attached to demotion candidate)
4593 		 */
4594 		lag_seconds = get_replication_lag_seconds(local_conn);
4595 
4596 		log_debug("lag is %i ", lag_seconds);
4597 
4598 		if (lag_seconds >= config_file_options.replication_lag_critical)
4599 		{
4600 			if (runtime_options.force == false)
4601 			{
4602 				log_error(_("replication lag on this node is critical"));
4603 				log_detail(_("lag is %i seconds (critical threshold: %i)"),
4604 						   lag_seconds, config_file_options.replication_lag_critical);
4605 				log_hint(_("PostgreSQL on the demotion candidate will not shut down until pending WAL is flushed to the standby; use -F/--force to continue anyway"));
4606 				PQfinish(remote_conn);
4607 				PQfinish(local_conn);
4608 
4609 				exit(ERR_SWITCHOVER_FAIL);
4610 			}
4611 
4612 			log_warning(_("replication lag on this node is critical"));
4613 			log_detail(_("lag is %i seconds (critical threshold: %i)"),
4614 					   lag_seconds, config_file_options.replication_lag_critical);
4615 			log_notice(_("-F/--force set, continuing with switchover"));
4616 		}
4617 		else if (lag_seconds >= config_file_options.replication_lag_warning)
4618 		{
4619 			log_warning(_("replication lag on this node is warning"));
4620 			log_detail(_("lag is %i seconds (warning threshold: %i)"),
4621 					   lag_seconds, config_file_options.replication_lag_warning);
4622 		}
4623 		else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
4624 		{
4625 			if (runtime_options.force == false)
4626 			{
4627 				log_error(_("unable to check replication lag on local node"));
4628 				log_hint(_("use -F/--force to continue anyway"));
4629 				PQfinish(remote_conn);
4630 				PQfinish(local_conn);
4631 
4632 				exit(ERR_SWITCHOVER_FAIL);
4633 			}
4634 
4635 			log_warning(_("unable to check replication lag on local node"));
4636 			log_notice(_("-F/--force set, continuing with switchover"));
4637 		}
4638 		/* replication lag is below warning threshold */
4639 		else
4640 		{
4641 			PQExpBufferData msg;
4642 
4643 			initPQExpBuffer(&msg);
4644 
4645 			appendPQExpBuffer(&msg,
4646 							  _("replication lag on this standby is %i seconds"),
4647 							  lag_seconds);
4648 
4649 			if (runtime_options.dry_run == true)
4650 			{
4651 				log_info("%s", msg.data);
4652 			}
4653 			else
4654 			{
4655 				log_verbose(LOG_INFO, "%s", msg.data);
4656 			}
4657 
4658 			termPQExpBuffer(&msg);
4659 		}
4660 	}
4661 
4662 	PQfinish(remote_conn);
4663 
4664 	/*
4665 	 * if replication slots are required by demotion candidate and/or siblings,
4666 	 * check the promotion candidate has sufficient free slots
4667 	 */
4668 	if (check_free_slots(&local_node_record, &sibling_nodes_stats, &dry_run_success) == false)
4669 	{
4670 		if (runtime_options.dry_run == false)
4671 		{
4672 			PQfinish(local_conn);
4673 			exit(ERR_BAD_CONFIG);
4674 		}
4675 	}
4676 
4677 
4678 	/*
4679 	 * Attempt to pause all repmgrd instances, unless user explicitly
4680 	 * specifies not to.
4681 	 */
4682 	if (runtime_options.repmgrd_no_pause == false)
4683 	{
4684 		NodeInfoListCell *cell = NULL;
4685 		ItemList repmgrd_connection_errors = {NULL, NULL};
4686 		int i = 0;
4687 		int unreachable_node_count = 0;
4688 
4689 		get_all_node_records(local_conn, &all_nodes);
4690 
4691 		repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * all_nodes.node_count);
4692 
4693 		for (cell = all_nodes.head; cell; cell = cell->next)
4694 		{
4695 			repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
4696 			repmgrd_info[i]->node_id = cell->node_info->node_id;
4697 			repmgrd_info[i]->pid = UNKNOWN_PID;
4698 			repmgrd_info[i]->paused = false;
4699 			repmgrd_info[i]->running = false;
4700 			repmgrd_info[i]->pg_running = true;
4701 
4702 			cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
4703 
4704 			if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
4705 			{
4706 				/*
4707 				 * unable to connect; treat this as an error
4708 				 */
4709 
4710 				repmgrd_info[i]->pg_running = false;
4711 
4712 				/*
4713 				 * Only worry about unreachable nodes if they're marked as active
4714 				 * in the repmgr metadata.
4715 				 */
4716 				if (cell->node_info->active == true)
4717 				{
4718 					unreachable_node_count++;
4719 
4720 					item_list_append_format(&repmgrd_connection_errors,
4721 											_("unable to connect to node \"%s\" (ID %i):\n%s"),
4722 											cell->node_info->node_name,
4723 											cell->node_info->node_id,
4724 											PQerrorMessage(cell->node_info->conn));
4725 				}
4726 
4727 				PQfinish(cell->node_info->conn);
4728 				cell->node_info->conn = NULL;
4729 
4730 				i++;
4731 				continue;
4732 			}
4733 
4734 			repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
4735 			repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
4736 			repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
4737 
4738 			if (repmgrd_info[i]->running == true)
4739 				repmgrd_running_count++;
4740 
4741 			i++;
4742 		}
4743 
4744 		if (unreachable_node_count > 0)
4745 		{
4746 			PQExpBufferData msg;
4747 			PQExpBufferData detail;
4748 			ItemListCell *cell;
4749 
4750 			initPQExpBuffer(&msg);
4751 			appendPQExpBuffer(&msg,
4752 							  _("unable to connect to %i node(s), unable to pause all repmgrd instances"),
4753 							  unreachable_node_count);
4754 
4755 			initPQExpBuffer(&detail);
4756 
4757 			for (cell = repmgrd_connection_errors.head; cell; cell = cell->next)
4758 			{
4759 				appendPQExpBuffer(&detail,
4760 								  "  %s\n",
4761 								  cell->string);
4762 			}
4763 
4764 
4765 			if (runtime_options.force == false)
4766 			{
4767 				log_error("%s", msg.data);
4768 			}
4769 			else
4770 			{
4771 				log_warning("%s", msg.data);
4772 			}
4773 
4774 			log_detail(_("following node(s) unreachable:\n%s"), detail.data);
4775 
4776 			termPQExpBuffer(&msg);
4777 			termPQExpBuffer(&detail);
4778 
4779 			/* tell user about footgun */
4780 			if (runtime_options.force == false)
4781 			{
4782 				log_hint(_("use -F/--force to continue anyway"));
4783 
4784 				clear_node_info_list(&sibling_nodes);
4785 				clear_node_info_list(&all_nodes);
4786 
4787 				exit(ERR_SWITCHOVER_FAIL);
4788 			}
4789 
4790 		}
4791 
4792 		/* pause repmgrd on all reachable nodes */
4793 		if (repmgrd_running_count > 0)
4794 		{
4795 			i = 0;
4796 			for (cell = all_nodes.head; cell; cell = cell->next)
4797 			{
4798 
4799 				/*
4800 				 * Skip if node was unreachable
4801 				 */
4802 				if (repmgrd_info[i]->pg_running == false)
4803 				{
4804 					log_warning(_("node \"%s\" (ID %i) unreachable, unable to pause repmgrd"),
4805 								cell->node_info->node_name,
4806 								cell->node_info->node_id);
4807 					i++;
4808 					continue;
4809 				}
4810 
4811 
4812 				/*
4813 				 * Skip if repmgrd not running on node
4814 				 */
4815 				if (repmgrd_info[i]->running == false)
4816 				{
4817 					log_warning(_("repmgrd not running on node \"%s\" (ID %i)"),
4818 								cell->node_info->node_name,
4819 								cell->node_info->node_id);
4820 					i++;
4821 					continue;
4822 				}
4823 				/*
4824 				 * Skip if node is already paused. Note we won't unpause these, to
4825 				 * leave the repmgrd instances in the cluster in the same state they
4826 				 * were before the switchover.
4827 				 */
4828 				if (repmgrd_info[i]->paused == true)
4829 				{
4830 					PQfinish(cell->node_info->conn);
4831 					cell->node_info->conn = NULL;
4832 					i++;
4833 					continue;
4834 				}
4835 
4836 				if (runtime_options.dry_run == true)
4837 				{
4838 					log_info(_("would pause repmgrd on node \"%s\" (ID %i)"),
4839 							 cell->node_info->node_name,
4840 							 cell->node_info->node_id);
4841 				}
4842 				else
4843 				{
4844 					/* XXX check result  */
4845 					log_debug("pausing repmgrd on node \"%s\" (ID %i)",
4846 							 cell->node_info->node_name,
4847 							 cell->node_info->node_id);
4848 
4849 					(void) repmgrd_pause(cell->node_info->conn, true);
4850 				}
4851 
4852 				PQfinish(cell->node_info->conn);
4853 				cell->node_info->conn = NULL;
4854 				i++;
4855 			}
4856 		}
4857 		else
4858 		{
4859 			/* close all connections - we'll reestablish later */
4860 			for (cell = all_nodes.head; cell; cell = cell->next)
4861 			{
4862 				if (cell->node_info->conn != NULL)
4863 				{
4864 					PQfinish(cell->node_info->conn);
4865 					cell->node_info->conn = NULL;
4866 				}
4867 			}
4868 		}
4869 	}
4870 
4871 
4872 	/*
4873 	 * Sanity checks completed - prepare for the switchover
4874 	 */
4875 
4876 	if (runtime_options.dry_run == true)
4877 	{
4878 		log_notice(_("local node \"%s\" (ID: %i) would be promoted to primary; "
4879 					 "current primary \"%s\" (ID: %i) would be demoted to standby"),
4880 				   local_node_record.node_name,
4881 				   local_node_record.node_id,
4882 				   remote_node_record.node_name,
4883 				   remote_node_record.node_id);
4884 	}
4885 	else
4886 	{
4887 		log_notice(_("local node \"%s\" (ID: %i) will be promoted to primary; "
4888 					 "current primary \"%s\" (ID: %i) will be demoted to standby"),
4889 				   local_node_record.node_name,
4890 				   local_node_record.node_id,
4891 				   remote_node_record.node_name,
4892 				   remote_node_record.node_id);
4893 	}
4894 
4895 
4896 	/*
4897 	 * Stop the remote primary
4898 	 *
4899 	 * We'll issue the pg_ctl command but not force it not to wait; we'll
4900 	 * check the connection from here - and error out if no shutdown is
4901 	 * detected after a certain time.
4902 	 */
4903 
4904 	initPQExpBuffer(&remote_command_str);
4905 	initPQExpBuffer(&command_output);
4906 
4907 	make_remote_repmgr_path(&remote_command_str, &remote_node_record);
4908 
4909 	if (runtime_options.dry_run == true)
4910 	{
4911 		appendPQExpBufferStr(&remote_command_str,
4912 							 "node service --terse -LERROR --list-actions --action=stop");
4913 
4914 	}
4915 	else
4916 	{
4917 		log_notice(_("stopping current primary node \"%s\" (ID: %i)"),
4918 				   remote_node_record.node_name,
4919 				   remote_node_record.node_id);
4920 		appendPQExpBufferStr(&remote_command_str,
4921 							 "node service --action=stop --checkpoint");
4922 
4923 		if (runtime_options.superuser[0] != '\0')
4924 		{
4925 			appendPQExpBuffer(&remote_command_str,
4926 							  " --superuser=%s",
4927 							  runtime_options.superuser);
4928 		}
4929 	}
4930 
4931 	/* XXX handle failure */
4932 
4933 	(void) remote_command(remote_host,
4934 						  runtime_options.remote_user,
4935 						  remote_command_str.data,
4936 						  config_file_options.ssh_options,
4937 						  &command_output);
4938 
4939 	termPQExpBuffer(&remote_command_str);
4940 
4941 	/*
4942 	 * --dry-run ends here with display of command which would be used to shut
4943 	 * down the remote server
4944 	 */
4945 	if (runtime_options.dry_run == true)
4946 	{
4947 		/* we use a buffer here as it will be modified by string_remove_trailing_newlines() */
4948 		char		shutdown_command[MAXLEN] = "";
4949 
4950 		strncpy(shutdown_command, command_output.data, MAXLEN);
4951 
4952 		termPQExpBuffer(&command_output);
4953 
4954 		string_remove_trailing_newlines(shutdown_command);
4955 
4956 		log_info(_("following shutdown command would be run on node \"%s\":\n  \"%s\""),
4957 				 remote_node_record.node_name,
4958 				 shutdown_command);
4959 
4960 		log_info(_("parameter \"shutdown_check_timeout\" is set to %i seconds"),
4961 				 config_file_options.shutdown_check_timeout);
4962 
4963 		clear_node_info_list(&sibling_nodes);
4964 
4965 		key_value_list_free(&remote_config_files);
4966 
4967 		if (dry_run_success == false)
4968 		{
4969 			log_error(_("prerequisites for executing STANDBY SWITCHOVER are *not* met"));
4970 			log_hint(_("see preceding error messages"));
4971 			exit(ERR_BAD_CONFIG);
4972 		}
4973 
4974 		log_info(_("prerequisites for executing STANDBY SWITCHOVER are met"));
4975 
4976 		exit(SUCCESS);
4977 	}
4978 
4979 	termPQExpBuffer(&command_output);
4980 	shutdown_success = false;
4981 
4982 	/* loop for timeout waiting for current primary to stop */
4983 
4984 	for (i = 0; i < config_file_options.shutdown_check_timeout; i++)
4985 	{
4986 		/* Check whether primary is available */
4987 		PGPing		ping_res;
4988 
4989 		log_info(_("checking for primary shutdown; %i of %i attempts (\"shutdown_check_timeout\")"),
4990 				 i + 1, config_file_options.shutdown_check_timeout);
4991 
4992 		ping_res = PQping(remote_conninfo);
4993 
4994 		log_debug("ping status is: %s", print_pqping_status(ping_res));
4995 
4996 		/* database server could not be contacted */
4997 		if (ping_res == PQPING_NO_RESPONSE || ping_res == PQPING_NO_ATTEMPT)
4998 		{
4999 			bool		command_success;
5000 
5001 			/*
5002 			 * remote server can't be contacted at protocol level - that
5003 			 * doesn't necessarily mean it's shut down, so we'll ask its
5004 			 * repmgr to check at data directory level, and if shut down also
5005 			 * return the last checkpoint LSN.
5006 			 */
5007 
5008 			initPQExpBuffer(&remote_command_str);
5009 			make_remote_repmgr_path(&remote_command_str, &remote_node_record);
5010 			appendPQExpBufferStr(&remote_command_str,
5011 								 "node status --is-shutdown-cleanly");
5012 
5013 			initPQExpBuffer(&command_output);
5014 
5015 			command_success = remote_command(remote_host,
5016 											 runtime_options.remote_user,
5017 											 remote_command_str.data,
5018 											 config_file_options.ssh_options,
5019 											 &command_output);
5020 
5021 			termPQExpBuffer(&remote_command_str);
5022 
5023 			if (command_success == true)
5024 			{
5025 				NodeStatus	status = parse_node_status_is_shutdown_cleanly(command_output.data, &remote_last_checkpoint_lsn);
5026 
5027 				log_verbose(LOG_DEBUG, "remote node status is: %s", print_node_status(status));
5028 
5029 				if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr)
5030 				{
5031 					shutdown_success = true;
5032 					log_notice(_("current primary has been cleanly shut down at location %X/%X"),
5033 							   format_lsn(remote_last_checkpoint_lsn));
5034 					termPQExpBuffer(&command_output);
5035 
5036 					break;
5037 				}
5038 				/* remote node did not shut down cleanly */
5039 				else if (status == NODE_STATUS_UNCLEAN_SHUTDOWN)
5040 				{
5041 					if (!runtime_options.force)
5042 					{
5043 						log_error(_("current primary did not shut down cleanly, aborting"));
5044 						log_hint(_("use -F/--force to promote current standby"));
5045 						termPQExpBuffer(&command_output);
5046 						exit(ERR_SWITCHOVER_FAIL);
5047 					}
5048 					log_error(_("current primary did not shut down cleanly, continuing anyway"));
5049 					shutdown_success = true;
5050 					break;
5051 				}
5052 				else if (status == NODE_STATUS_SHUTTING_DOWN)
5053 				{
5054 					log_info(_("remote node is still shutting down"));
5055 				}
5056 			}
5057 
5058 			termPQExpBuffer(&command_output);
5059 		}
5060 
5061 		log_debug("sleeping 1 second until next check");
5062 		sleep(1);
5063 	}
5064 
5065 	if (shutdown_success == false)
5066 	{
5067 		log_error(_("shutdown of the primary server could not be confirmed"));
5068 		log_hint(_("check the primary server status before performing any further actions"));
5069 		exit(ERR_SWITCHOVER_FAIL);
5070 	}
5071 
5072 	/* this is unlikely to happen, but check and handle gracefully anyway */
5073 	if (PQstatus(local_conn) != CONNECTION_OK)
5074 	{
5075 		log_warning(_("connection to local node lost, reconnecting..."));
5076 		log_detail("\n%s", PQerrorMessage(local_conn));
5077 		PQfinish(local_conn);
5078 
5079 		local_conn = establish_db_connection(config_file_options.conninfo, false);
5080 
5081 		if (PQstatus(local_conn) != CONNECTION_OK)
5082 		{
5083 			log_error(_("unable to reconnect to local node \"%s\""),
5084 					  local_node_record.node_name);
5085 			exit(ERR_DB_CONN);
5086 		}
5087 		log_verbose(LOG_INFO, _("successfully reconnected to local node"));
5088 	}
5089 
5090 	init_replication_info(&replication_info);
5091 	/*
5092 	 * Compare standby's last WAL receive location with the primary's last
5093 	 * checkpoint LSN. We'll loop for a while as it's possible the standby's
5094 	 * walreceiver has not yet flushed all received WAL to disk.
5095 	 */
5096 	{
5097 		bool notice_emitted = false;
5098 
5099 		for (i = 0; i < config_file_options.wal_receive_check_timeout; i++)
5100 		{
5101 			get_replication_info(local_conn, STANDBY, &replication_info);
5102 			if (replication_info.last_wal_receive_lsn >= remote_last_checkpoint_lsn)
5103 				break;
5104 
5105 			/*
5106 			 * We'll only output this notice if it looks like we're going to have
5107 			 * to wait for WAL to be flushed.
5108 			 */
5109 			if (notice_emitted == false)
5110 			{
5111 				log_notice(_("waiting up to %i seconds (parameter \"wal_receive_check_timeout\") for received WAL to flush to disk"),
5112 						   config_file_options.wal_receive_check_timeout);
5113 
5114 				notice_emitted = true;
5115 			}
5116 
5117 			log_info(_("sleeping %i of maximum %i seconds waiting for standby to flush received WAL to disk"),
5118 					 i + 1, config_file_options.wal_receive_check_timeout);
5119 			sleep(1);
5120 		}
5121 	}
5122 
5123 	if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
5124 	{
5125 		log_warning(_("local node \"%s\" is behind shutdown primary \"%s\""),
5126 					local_node_record.node_name,
5127 					remote_node_record.node_name);
5128 		log_detail(_("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X"),
5129 				   format_lsn(replication_info.last_wal_receive_lsn),
5130 				   format_lsn(remote_last_checkpoint_lsn));
5131 
5132 		if (runtime_options.always_promote == false)
5133 		{
5134 			log_notice(_("aborting switchover"));
5135 			log_hint(_("use --always-promote to force promotion of standby"));
5136 			PQfinish(local_conn);
5137 			exit(ERR_SWITCHOVER_FAIL);
5138 		}
5139 	}
5140 
5141 	log_debug("local node last receive LSN is %X/%X, primary shutdown checkpoint LSN is %X/%X",
5142 			  format_lsn(replication_info.last_wal_receive_lsn),
5143 			  format_lsn(remote_last_checkpoint_lsn));
5144 
5145 	/*
5146 	 * Promote standby (local node).
5147 	 *
5148 	 * If PostgreSQL 12 or later, and -S/--superuser provided, we will provide
5149 	 * a superuser connection so that pg_promote() can be used.
5150 	 */
5151 
5152 	if (PQserverVersion(local_conn) >= 120000 && superuser_conn != NULL)
5153 	{
5154 		_do_standby_promote_internal(superuser_conn);
5155 	}
5156 	else
5157 	{
5158 		_do_standby_promote_internal(local_conn);
5159 	}
5160 
5161 
5162 	/*
5163 	 * If pg_rewind is requested, issue a checkpoint immediately after promoting
5164 	 * the local node, as pg_rewind compares timelines on the basis of the value
5165 	 * in pg_control, which is written at the first checkpoint, which might not
5166 	 * occur immediately.
5167 	 */
5168 	if (runtime_options.force_rewind_used == true)
5169 	{
5170 		PGconn *checkpoint_conn = local_conn;
5171 		if (superuser_conn != NULL)
5172 		{
5173 			checkpoint_conn = superuser_conn;
5174 		}
5175 
5176 		if (is_superuser_connection(checkpoint_conn, NULL) == true)
5177 		{
5178 			log_notice(_("issuing CHECKPOINT on node \"%s\" (ID: %i) "),
5179 					   config_file_options.node_name,
5180 					   config_file_options.node_id);
5181 			checkpoint(superuser_conn);
5182 		}
5183 		else
5184 		{
5185 			log_warning(_("no superuser connection available, unable to issue CHECKPOINT"));
5186 		}
5187 	}
5188 
5189 	/*
5190 	 * Execute "repmgr node rejoin" to create recovery.conf and start the
5191 	 * remote server. Additionally execute "pg_rewind", if required and
5192 	 * requested.
5193 	 */
5194 	initPQExpBuffer(&node_rejoin_options);
5195 
5196 	/*
5197 	 * Don't wait for repmgr on the remote node to report the success
5198 	 * of the rejoin operation - we'll check it from here.
5199 	 */
5200 	appendPQExpBufferStr(&node_rejoin_options,
5201 						 " --no-wait");
5202 
5203 	if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
5204 	{
5205 		KeyValueListCell *cell = NULL;
5206 		bool		first_entry = true;
5207 
5208 		if (runtime_options.force_rewind_used == false)
5209 		{
5210 			log_error(_("new primary diverges from former primary and --force-rewind not provided"));
5211 			log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\""));
5212 
5213 			termPQExpBuffer(&node_rejoin_options);
5214 			PQfinish(local_conn);
5215 			exit(ERR_SWITCHOVER_FAIL);
5216 		}
5217 
5218 		appendPQExpBufferStr(&node_rejoin_options,
5219 						  " --force-rewind");
5220 
5221 		if (runtime_options.force_rewind_path[0] != '\0')
5222 		{
5223 			appendPQExpBuffer(&node_rejoin_options,
5224 							  "=%s",
5225 							  runtime_options.force_rewind_path);
5226 		}
5227 		appendPQExpBufferStr(&node_rejoin_options,
5228 							 " --config-files=");
5229 
5230 		for (cell = remote_config_files.head; cell; cell = cell->next)
5231 		{
5232 			if (first_entry == false)
5233 				appendPQExpBufferChar(&node_rejoin_options, ',');
5234 			else
5235 				first_entry = false;
5236 
5237 			appendPQExpBufferStr(&node_rejoin_options, cell->key);
5238 		}
5239 
5240 		appendPQExpBufferChar(&node_rejoin_options, ' ');
5241 	}
5242 
5243 	key_value_list_free(&remote_config_files);
5244 
5245 	initPQExpBuffer(&remote_command_str);
5246 	make_remote_repmgr_path(&remote_command_str, &remote_node_record);
5247 
5248 	/*
5249 	 * Here we'll coerce the local node's connection string into
5250 	 * "param=value" format, in case it's configured in URI format,
5251 	 * to simplify escaping issues when passing the string to the
5252 	 * remote node.
5253 	 */
5254 	{
5255 		char	   *conninfo_normalized = normalize_conninfo_string(local_node_record.conninfo);
5256 
5257 		appendPQExpBuffer(&remote_command_str,
5258 						  "%s -d ",
5259 						  node_rejoin_options.data);
5260 
5261 		appendRemoteShellString(&remote_command_str,
5262 								conninfo_normalized);
5263 
5264 		appendPQExpBufferStr(&remote_command_str,
5265 							 " node rejoin");
5266 
5267 		pfree(conninfo_normalized);
5268 	}
5269 
5270 	termPQExpBuffer(&node_rejoin_options);
5271 
5272 	log_debug("executing:\n  %s", remote_command_str.data);
5273 	initPQExpBuffer(&command_output);
5274 
5275 	command_success = remote_command(remote_host,
5276 									 runtime_options.remote_user,
5277 									 remote_command_str.data,
5278 									 config_file_options.ssh_options,
5279 									 &command_output);
5280 
5281 	termPQExpBuffer(&remote_command_str);
5282 
5283 	/* TODO: verify this node's record was updated correctly */
5284 
5285 	if (command_success == false)
5286 	{
5287 		log_error(_("rejoin failed with error code %i"), r);
5288 
5289 		create_event_notification_extended(local_conn,
5290 										   &config_file_options,
5291 										   config_file_options.node_id,
5292 										   "standby_switchover",
5293 										   false,
5294 										   command_output.data,
5295 										   &event_info);
5296 	}
5297 	else
5298 	{
5299 		PQExpBufferData event_details;
5300 		standy_join_status join_success = check_standby_join(local_conn,
5301 															 &local_node_record,
5302 															 &remote_node_record);
5303 
5304 		initPQExpBuffer(&event_details);
5305 
5306 		switch (join_success) {
5307 			case JOIN_FAIL_NO_PING:
5308 				appendPQExpBuffer(&event_details,
5309 								  _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not beome available"),
5310 								  config_file_options.node_name,
5311 								  config_file_options.node_id,
5312 								  remote_node_record.node_name,
5313 								  remote_node_record.node_id);
5314 				switchover_success = false;
5315 
5316 				break;
5317 			case JOIN_FAIL_NO_REPLICATION:
5318 				appendPQExpBuffer(&event_details,
5319 								  _("node \"%s\" (ID: %i) promoted to primary, but demote node \"%s\" (ID: %i) did not connect to the new primary"),
5320 								  config_file_options.node_name,
5321 								  config_file_options.node_id,
5322 								  remote_node_record.node_name,
5323 								  remote_node_record.node_id);
5324 				switchover_success = false;
5325 				break;
5326 			case JOIN_SUCCESS:
5327 				appendPQExpBuffer(&event_details,
5328 								  _("node  \"%s\" (ID: %i) promoted to primary, node \"%s\" (ID: %i) demoted to standby"),
5329 								  config_file_options.node_name,
5330 								  config_file_options.node_id,
5331 								  remote_node_record.node_name,
5332 								  remote_node_record.node_id);
5333 		}
5334 
5335 		create_event_notification_extended(local_conn,
5336 										   &config_file_options,
5337 										   config_file_options.node_id,
5338 										   "standby_switchover",
5339 										   switchover_success,
5340 										   event_details.data,
5341 										   &event_info);
5342 		if (switchover_success == true)
5343 		{
5344 			log_notice("%s", event_details.data);
5345 		}
5346 		else
5347 		{
5348 			log_error("%s", event_details.data);
5349 		}
5350 		termPQExpBuffer(&event_details);
5351 	}
5352 
5353 	termPQExpBuffer(&command_output);
5354 
5355 	/*
5356 	 * If --siblings-follow specified, attempt to make them follow the new
5357 	 * primary
5358 	 */
5359 	if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
5360 	{
5361 		sibling_nodes_follow(&local_node_record, &sibling_nodes, &sibling_nodes_stats);
5362 	}
5363 
5364 	clear_node_info_list(&sibling_nodes);
5365 
5366 	/*
5367 	 * Clean up remote node (primary demoted to standby). It's possible that the node is
5368 	 * still starting up, so poll for a while until we get a connection.
5369 	 */
5370 
5371 	for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
5372 	{
5373 		remote_conn = establish_db_connection(remote_node_record.conninfo, false);
5374 
5375 		if (PQstatus(remote_conn) == CONNECTION_OK)
5376 			break;
5377 
5378 		log_info(_("sleeping 1 second; %i of %i attempts (\"standby_reconnect_timeout\") to reconnect to demoted primary"),
5379 				 i + 1,
5380 				 config_file_options.standby_reconnect_timeout);
5381 		sleep(1);
5382 	}
5383 
5384 	/* check new standby (old primary) is reachable */
5385 	if (PQstatus(remote_conn) != CONNECTION_OK)
5386 	{
5387 		switchover_success = false;
5388 
5389 		/* TODO: double-check whether new standby has attached */
5390 
5391 		log_warning(_("switchover did not fully complete"));
5392 		log_detail(_("node \"%s\" (ID: %i) is now primary but node \"%s\" (ID: %i) is not reachable"),
5393 				   local_node_record.node_name,
5394 				   local_node_record.node_id,
5395 				   remote_node_record.node_name,
5396 				   remote_node_record.node_id);
5397 
5398 		if (config_file_options.use_replication_slots == true)
5399 		{
5400 			log_hint(_("any inactive replication slots on the old primary will need to be dropped manually"));
5401 		}
5402 	}
5403 	else
5404 	{
5405 		NodeAttached node_attached;
5406 
5407 		/*
5408 		 * We were able to connect to the former primary - attempt to drop
5409 		 * this node's former replication slot, if it exists.
5410 		 */
5411 		if (config_file_options.use_replication_slots == true)
5412 		{
5413 			drop_replication_slot_if_exists(remote_conn,
5414 											remote_node_record.node_id,
5415 											local_node_record.slot_name);
5416 		}
5417 
5418 
5419 		/*
5420 		 * Do a final check that the standby has connected - it's possible
5421 		 * the standby became reachable but has not connected (or became disconnected).
5422 		 */
5423 
5424 		 node_attached = is_downstream_node_attached(local_conn,
5425 													 remote_node_record.node_name,
5426 													 NULL);
5427 		if (node_attached == NODE_ATTACHED)
5428 		{
5429 			switchover_success = true;
5430 			log_notice(_("switchover was successful"));
5431 			log_detail(_("node \"%s\" is now primary and node \"%s\" is attached as standby"),
5432 					   local_node_record.node_name,
5433 					   remote_node_record.node_name);
5434 		}
5435 		else
5436 		{
5437 			log_notice(_("switchover is incomplete"));
5438 			log_detail(_("node \"%s\" is now primary but node \"%s\" is not attached as standby"),
5439 					   local_node_record.node_name,
5440 					   remote_node_record.node_name);
5441 			switchover_success = false;
5442 		}
5443 
5444 	}
5445 
5446 	PQfinish(remote_conn);
5447 	PQfinish(local_conn);
5448 
5449 	/*
5450 	 * Attempt to unpause all paused repmgrd instances, unless user explicitly
5451 	 * specifies not to.
5452 	 */
5453 	if (runtime_options.repmgrd_no_pause == false)
5454 	{
5455 		if (repmgrd_running_count > 0)
5456 		{
5457 			ItemList repmgrd_unpause_errors = {NULL, NULL};
5458 			NodeInfoListCell *cell = NULL;
5459 			int i = 0;
5460 			int error_node_count = 0;
5461 
5462 			for (cell = all_nodes.head; cell; cell = cell->next)
5463 			{
5464 
5465 				if (repmgrd_info[i]->paused == true && runtime_options.repmgrd_force_unpause == false)
5466 				{
5467 					log_debug("repmgrd on node \"%s\" (ID %i) paused before switchover, --repmgrd-force-unpause not provided, not unpausing",
5468 							  cell->node_info->node_name,
5469 							  cell->node_info->node_id);
5470 
5471 					i++;
5472 					continue;
5473 				}
5474 
5475 				log_debug("unpausing repmgrd on node \"%s\" (ID %i)",
5476 						  cell->node_info->node_name,
5477 						  cell->node_info->node_id);
5478 
5479 				cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
5480 
5481 				if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
5482 				{
5483 					if (repmgrd_pause(cell->node_info->conn, false) == false)
5484 					{
5485 						item_list_append_format(&repmgrd_unpause_errors,
5486 												_("unable to unpause node \"%s\" (ID %i)"),
5487 												cell->node_info->node_name,
5488 												cell->node_info->node_id);
5489 						error_node_count++;
5490 					}
5491 				}
5492 				else
5493 				{
5494 					item_list_append_format(&repmgrd_unpause_errors,
5495 											_("unable to connect to node \"%s\" (ID %i):\n%s"),
5496 											cell->node_info->node_name,
5497 											cell->node_info->node_id,
5498 											PQerrorMessage(cell->node_info->conn));
5499 					error_node_count++;
5500 				}
5501 
5502 				i++;
5503 			}
5504 
5505 			if (error_node_count > 0)
5506 			{
5507 				PQExpBufferData detail;
5508 				ItemListCell *cell;
5509 
5510 				initPQExpBuffer(&detail);
5511 
5512 				for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
5513 				{
5514 					appendPQExpBuffer(&detail,
5515 									  "  %s\n",
5516 									  cell->string);
5517 				}
5518 
5519 				log_warning(_("unable to unpause repmgrd on %i node(s)"),
5520 							error_node_count);
5521 				log_detail(_("errors encountered for following node(s):\n%s"), detail.data);
5522 				log_hint(_("check node connection and status; unpause manually with \"repmgr service unpause\""));
5523 
5524 				termPQExpBuffer(&detail);
5525 			}
5526 		}
5527 
5528 		clear_node_info_list(&all_nodes);
5529 	}
5530 
5531 	if (switchover_success == true)
5532 	{
5533 		log_notice(_("STANDBY SWITCHOVER has completed successfully"));
5534 	}
5535 	else
5536 	{
5537 		log_notice(_("STANDBY SWITCHOVER has completed with issues"));
5538 		log_hint(_("see preceding log message(s) for details"));
5539 		exit(ERR_SWITCHOVER_INCOMPLETE);
5540 	}
5541 
5542 	return;
5543 }
5544 
5545 
5546 static void
check_source_server()5547 check_source_server()
5548 {
5549 	char		cluster_size[MAXLEN];
5550 	char	   *connstr = NULL;
5551 
5552 	t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
5553 	RecordStatus record_status = RECORD_NOT_FOUND;
5554 	ExtensionStatus extension_status = REPMGR_UNKNOWN;
5555 	t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
5556 
5557 	/* Attempt to connect to the upstream server to verify its configuration */
5558 	log_verbose(LOG_DEBUG, "check_source_server()");
5559 	log_info(_("connecting to source node"));
5560 
5561 	connstr = param_list_to_string(&source_conninfo);
5562 	log_detail(_("connection string is: %s"), connstr);
5563 	pfree(connstr);
5564 
5565 	source_conn = establish_db_connection_by_params(&source_conninfo, false);
5566 
5567 	/*
5568 	 * Unless in barman mode, exit with an error;
5569 	 * establish_db_connection_by_params() will have already logged an error
5570 	 * message
5571 	 */
5572 	if (PQstatus(source_conn) != CONNECTION_OK)
5573 	{
5574 		PQfinish(source_conn);
5575 		source_conn = NULL;
5576 		if (mode == barman)
5577 			return;
5578 
5579 		exit(ERR_DB_CONN);
5580 	}
5581 
5582 	/*
5583 	 * If a connection was established, perform some sanity checks on the
5584 	 * provided upstream connection.
5585 	 */
5586 
5587 	source_server_version_num = check_server_version(source_conn, "primary", true, NULL);
5588 
5589 	/*
5590 	 * It's not essential to know the cluster size, but useful to sanity-check
5591 	 * we can actually run a query before going any further.
5592 	 */
5593 	if (get_cluster_size(source_conn, cluster_size) == false)
5594 		exit(ERR_DB_QUERY);
5595 
5596 	log_detail(_("current installation size is %s"),
5597 			   cluster_size);
5598 
5599 	/*
5600 	 * If the upstream node is a standby, try to connect to the primary too so
5601 	 * we can write an event record
5602 	 */
5603 	if (get_recovery_type(source_conn) == RECTYPE_STANDBY)
5604 	{
5605 		primary_conn = get_primary_connection(source_conn, NULL, NULL);
5606 
5607 		if (PQstatus(primary_conn) != CONNECTION_OK)
5608 		{
5609 			log_error(_("unable to connect to primary node"));
5610 			exit(ERR_BAD_CONFIG);
5611 		}
5612 	}
5613 	else
5614 	{
5615 		primary_conn = source_conn;
5616 	}
5617 
5618 	/*
5619 	 * Sanity-check that the primary node has a repmgr extension - if not
5620 	 * present, fail with an error unless -F/--force is used (to enable repmgr
5621 	 * to be used as a standalone clone tool).
5622 	 */
5623 
5624 	extension_status = get_repmgr_extension_status(primary_conn, &extversions);
5625 
5626 	if (extension_status != REPMGR_INSTALLED)
5627 	{
5628 		if (!runtime_options.force)
5629 		{
5630 			/* this is unlikely to happen */
5631 			if (extension_status == REPMGR_UNKNOWN)
5632 			{
5633 				log_error(_("unable to determine status of \"repmgr\" extension"));
5634 				log_detail("%s", PQerrorMessage(primary_conn));
5635 				PQfinish(source_conn);
5636 				exit(ERR_DB_QUERY);
5637 			}
5638 
5639 			if (extension_status == REPMGR_AVAILABLE)
5640 			{
5641 				log_error(_("repmgr extension is available but not installed in database \"%s\""),
5642 						   param_get(&source_conninfo, "dbname"));
5643 				log_hint(_("check that you are cloning from the database where \"repmgr\" is installed"));
5644 			}
5645 			else if (extension_status == REPMGR_UNAVAILABLE)
5646 			{
5647 				log_error(_("repmgr extension is not available on the upstream node"));
5648 			}
5649 			else if (extension_status == REPMGR_OLD_VERSION_INSTALLED)
5650 			{
5651 				log_error(_("an older version of the extension is installed on the upstream node"));
5652 				log_detail(_("version %s is installed but newer version %s is available"),
5653 						   extversions.installed_version,
5654 						   extversions.default_version);
5655 				log_hint(_("upgrade \"repmgr\" on the source node first"));
5656 			}
5657 
5658 			PQfinish(source_conn);
5659 			exit(ERR_BAD_CONFIG);
5660 		}
5661 
5662 		log_warning(_("repmgr extension not found on source node"));
5663 	}
5664 	else
5665 	{
5666 		/*
5667 		 * If upstream is not a standby, retrieve its node records
5668 		 * and attempt to connect to one; we'll then compare
5669 		 * that node's system identifier to that of the source
5670 		 * connection, to ensure we're cloning from a node which is
5671 		 * part of the physical replication cluster. This is mainly
5672 		 * to prevent cloning a standby from a witness server.
5673 		 *
5674 		 * Note that it doesn't matter if the node from the node record
5675 		 * list is the same as the source node; also if the source node
5676 		 * does not have any node records, there's not a lot we can do.
5677 		 *
5678 		 * This check will be only carried out on PostgreSQL 9.6 and
5679 		 * later, as this is a precautionary check and we can retrieve the system
5680 		 * identifier with a normal connection.
5681 		 */
5682 
5683 		if (runtime_options.dry_run == true)
5684 		{
5685 			log_info(_("\"repmgr\" extension is installed in database \"%s\""),
5686 					 param_get(&source_conninfo, "dbname"));
5687 		}
5688 
5689 		if (get_recovery_type(source_conn) == RECTYPE_PRIMARY && PQserverVersion(source_conn) >= 90600)
5690 		{
5691 			uint64		source_system_identifier = system_identifier(source_conn);
5692 
5693 			if (source_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
5694 			{
5695 				NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
5696 				NodeInfoListCell *cell = NULL;
5697 				get_all_node_records(source_conn, &all_nodes);
5698 
5699 				log_debug("%i node records returned by source node", all_nodes.node_count);
5700 
5701 				/* loop through its nodes table */
5702 
5703 				for (cell = all_nodes.head; cell; cell = cell->next)
5704 				{
5705 
5706 					/* exclude the witness node, as its system identifier will be different, of course */
5707 					if (cell->node_info->type == WITNESS)
5708 						continue;
5709 
5710 					cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
5711 					if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
5712 					{
5713 						uint64		test_system_identifier = system_identifier(cell->node_info->conn);
5714 						PQfinish(cell->node_info->conn);
5715 						cell->node_info->conn = NULL;
5716 
5717 						if (test_system_identifier != UNKNOWN_SYSTEM_IDENTIFIER)
5718 						{
5719 							if (source_system_identifier != test_system_identifier)
5720 							{
5721 								log_error(_("source node's system identifier does not match other nodes in the replication cluster"));
5722 								log_detail(_("source node's system identifier is %lu, replication cluster member \"%s\"'s system identifier is %lu"),
5723 										   source_system_identifier,
5724 										   cell->node_info->node_name,
5725 										   test_system_identifier);
5726 								log_hint(_("check that the source node is not a witness server"));
5727 								PQfinish(source_conn);
5728 								source_conn = NULL;
5729 
5730 								exit(ERR_BAD_CONFIG);
5731 							}
5732 							/* identifiers match - our work here is done */
5733 							break;
5734 						}
5735 					}
5736 					else
5737 					{
5738 						PQfinish(cell->node_info->conn);
5739 						cell->node_info->conn = NULL;
5740 					}
5741 				}
5742 				clear_node_info_list(&all_nodes);
5743 			}
5744 		}
5745 	}
5746 
5747 
5748 	/*
5749 	 * Check the local directory to see if it appears to be a PostgreSQL
5750 	 * data directory.
5751 	 *
5752 	 * Note: a previous call to check_dir() will have checked whether it contains
5753 	 * a running PostgreSQL instance.
5754 	 */
5755 	if (is_pg_dir(local_data_directory))
5756 	{
5757 		const char *msg = _("target data directory appears to be a PostgreSQL data directory");
5758 		const char *hint = _("use -F/--force to overwrite the existing data directory");
5759 
5760 		if (runtime_options.force == false && runtime_options.dry_run == false)
5761 		{
5762 			log_error("%s", msg);
5763 			log_detail(_("target data directory is \"%s\""), local_data_directory);
5764 			log_hint("%s", hint);
5765 			PQfinish(source_conn);
5766 			exit(ERR_BAD_CONFIG);
5767 		}
5768 
5769 		if (runtime_options.dry_run == true)
5770 		{
5771 			if (runtime_options.force == true)
5772 			{
5773 				log_warning("%s and will be overwritten", msg);
5774 				log_detail(_("target data directory is \"%s\""), local_data_directory);
5775 
5776 			}
5777 			else
5778 			{
5779 				log_warning("%s", msg);
5780 				log_detail(_("target data directory is \"%s\""), local_data_directory);
5781 				log_hint("%s", hint);
5782 			}
5783 		}
5784 	}
5785 
5786 	/*
5787 	 * Attempt to find the upstream node record
5788 	 */
5789 	if (runtime_options.upstream_node_id == NO_UPSTREAM_NODE)
5790 		upstream_node_id = get_primary_node_id(source_conn);
5791 	else
5792 		upstream_node_id = runtime_options.upstream_node_id;
5793 
5794 	log_debug("upstream_node_id determined as %i", upstream_node_id);
5795 
5796 	if (upstream_node_id != UNKNOWN_NODE_ID)
5797 	{
5798 		t_node_info other_node_record = T_NODE_INFO_INITIALIZER;
5799 
5800 		record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
5801 		if (record_status == RECORD_FOUND)
5802 		{
5803 			t_conninfo_param_list upstream_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
5804 			char	   *upstream_conninfo_user;
5805 
5806 			initialize_conninfo_params(&upstream_conninfo, false);
5807 			parse_conninfo_string(upstream_node_record.conninfo, &upstream_conninfo, NULL, false);
5808 
5809 			strncpy(recovery_conninfo_str, upstream_node_record.conninfo, MAXLEN);
5810 			strncpy(upstream_repluser, upstream_node_record.repluser, NAMEDATALEN);
5811 
5812 			upstream_conninfo_user = param_get(&upstream_conninfo, "user");
5813 			if (upstream_conninfo_user != NULL)
5814 			{
5815 				strncpy(upstream_user, upstream_conninfo_user, NAMEDATALEN);
5816 			}
5817 			else
5818 			{
5819 				get_conninfo_default_value("user", upstream_user, NAMEDATALEN);
5820 			}
5821 
5822 			log_verbose(LOG_DEBUG, "upstream_user is \"%s\"", upstream_user);
5823 
5824 			upstream_conninfo_found = true;
5825 		}
5826 
5827 		/*
5828 		 * Check that there's no existing node record with the same name but
5829 		 * different ID.
5830 		 */
5831 		record_status = get_node_record_by_name(source_conn, config_file_options.node_name, &other_node_record);
5832 
5833 		if (record_status == RECORD_FOUND && other_node_record.node_id != config_file_options.node_id)
5834 		{
5835 			log_error(_("another node (ID: %i) already exists with node_name \"%s\""),
5836 					  other_node_record.node_id,
5837 					  config_file_options.node_name);
5838 			PQfinish(source_conn);
5839 			exit(ERR_BAD_CONFIG);
5840 		}
5841 	}
5842 
5843 	/* Check the source node is configured sufficiently to be able to clone from */
5844 	check_upstream_config(source_conn, source_server_version_num, &upstream_node_record, true);
5845 
5846 	/*
5847 	 * Work out which users need to perform which tasks.
5848 	 *
5849 	 * Here we'll check the qualifications of the repmgr user as we have the
5850 	 * connection open; replication and superuser connections will be opened
5851 	 * when required and any errors will be raised at that point.
5852 	 */
5853 
5854 	/*
5855 	 * If the user wants to copy configuration files located outside the
5856 	 * data directory, we'll need to be able to query the upstream node's data
5857 	 * directory location, which is available only to superusers or members
5858 	 * of the appropriate role.
5859 	 */
5860 	if (runtime_options.copy_external_config_files == true)
5861 	{
5862 		/*
5863 		 * This will check if the user is superuser or (from Pg10) is a member
5864 		 * of "pg_read_all_settings"/"pg_monitor"
5865 		 */
5866 		if (connection_has_pg_monitor_role(source_conn, "pg_read_all_settings") == true)
5867 		{
5868 			SettingsUser = REPMGR_USER;
5869 		}
5870 		else if (runtime_options.superuser[0] != '\0')
5871 		{
5872 			SettingsUser = SUPERUSER;
5873 		}
5874 		else
5875 		{
5876 			log_error(_("--copy-external-config-files requires a user with permission to read the data directory on the source node"));
5877 
5878 			if (PQserverVersion(source_conn) >= 100000)
5879 			{
5880 				log_hint(_("the repmgr user must be superuser or member of role \"pg_monitor\" or \"pg_read_all_settings\", or a superuser provided with -S/--superuser"));
5881 			}
5882 			else
5883 			{
5884 				log_hint(_("the repmgr user must be superuser, or a superuser provided with -S/--superuser"));
5885 			}
5886 
5887 			exit(ERR_BAD_CONFIG);
5888 		}
5889 	}
5890 
5891 	/*
5892 	 * To create replication slots, we'll need a user with the REPLICATION
5893 	 * privilege, or a superuser.
5894 	 */
5895 	if (config_file_options.use_replication_slots == true)
5896 	{
5897 	}
5898 }
5899 
5900 
5901 static void
check_source_server_via_barman()5902 check_source_server_via_barman()
5903 {
5904 	char		buf[MAXLEN] = "";
5905 	char		barman_conninfo_str[MAXLEN] = "";
5906 	t_conninfo_param_list barman_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
5907 	char	   *errmsg = NULL;
5908 	bool		parse_success = false,
5909 				command_success = false;
5910 	char		where_condition[MAXLEN];
5911 	PQExpBufferData command_output;
5912 	PQExpBufferData repmgr_conninfo_buf;
5913 
5914 	int			c = 0;
5915 
5916 	get_barman_property(barman_conninfo_str, "conninfo", local_repmgr_tmp_directory);
5917 
5918 	initialize_conninfo_params(&barman_conninfo, false);
5919 
5920 	/*
5921 	 * parse_conninfo_string() here will remove the upstream's
5922 	 * `application_name`, if set
5923 	 */
5924 	parse_success = parse_conninfo_string(barman_conninfo_str, &barman_conninfo, &errmsg, true);
5925 
5926 	if (parse_success == false)
5927 	{
5928 		log_error(_("Unable to parse barman conninfo string \"%s\":\n%s"),
5929 				  barman_conninfo_str, errmsg);
5930 		exit(ERR_BARMAN);
5931 	}
5932 
5933 	/* Overwrite database name in the parsed parameter list */
5934 	param_set(&barman_conninfo, "dbname", runtime_options.dbname);
5935 
5936 	/* Rebuild the Barman conninfo string */
5937 	initPQExpBuffer(&repmgr_conninfo_buf);
5938 
5939 	for (c = 0; c < barman_conninfo.size && barman_conninfo.keywords[c] != NULL; c++)
5940 	{
5941 		if (repmgr_conninfo_buf.len != 0)
5942 			appendPQExpBufferChar(&repmgr_conninfo_buf, ' ');
5943 
5944 		appendPQExpBuffer(&repmgr_conninfo_buf, "%s=",
5945 						  barman_conninfo.keywords[c]);
5946 		appendConnStrVal(&repmgr_conninfo_buf,
5947 						 barman_conninfo.values[c]);
5948 	}
5949 
5950 	log_verbose(LOG_DEBUG,
5951 				"repmgr database conninfo string on barman server: %s",
5952 				repmgr_conninfo_buf.data);
5953 
5954 	if (upstream_node_id == UNKNOWN_NODE_ID)
5955 	{
5956 		maxlen_snprintf(where_condition, "type='primary' AND active IS TRUE");
5957 	}
5958 	else
5959 	{
5960 		maxlen_snprintf(where_condition, "node_id=%i", upstream_node_id);
5961 	}
5962 
5963 	initPQExpBuffer(&command_output);
5964 	maxlen_snprintf(buf,
5965 					"psql -AqtX -d \\\"%s\\\" -c \\\""
5966 					" SELECT conninfo"
5967 					" FROM repmgr.nodes"
5968 					" WHERE %s"
5969 					" AND active IS TRUE"
5970 					"\\\"",
5971 					repmgr_conninfo_buf.data,
5972 					where_condition);
5973 
5974 	termPQExpBuffer(&repmgr_conninfo_buf);
5975 
5976 	command_success = remote_command(config_file_options.barman_host,
5977 									 runtime_options.remote_user,
5978 									 buf,
5979 									 config_file_options.ssh_options,
5980 									 &command_output);
5981 
5982 	if (command_success == false)
5983 	{
5984 		log_error(_("unable to execute database query via Barman server"));
5985 		exit(ERR_BARMAN);
5986 	}
5987 
5988 	maxlen_snprintf(recovery_conninfo_str, "%s", command_output.data);
5989 	string_remove_trailing_newlines(recovery_conninfo_str);
5990 
5991 	upstream_conninfo_found = true;
5992 	log_verbose(LOG_DEBUG,
5993 				"upstream node conninfo string extracted via barman server: %s",
5994 				recovery_conninfo_str);
5995 
5996 	termPQExpBuffer(&command_output);
5997 }
5998 
5999 
6000 /*
6001  * check_upstream_config()
6002  *
6003  * Perform sanity check on upstream server configuration before starting cloning
6004  * process
6005  *
6006  * TODO:
6007  *  - check user is qualified to perform base backup
6008  */
6009 
6010 static bool
check_upstream_config(PGconn * conn,int server_version_num,t_node_info * upstream_node_record,bool exit_on_error)6011 check_upstream_config(PGconn *conn, int server_version_num, t_node_info *upstream_node_record, bool exit_on_error)
6012 {
6013 	int			i;
6014 	bool		config_ok = true;
6015 	char	   *wal_error_message = NULL;
6016 	t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6017 	bool		backup_options_ok = true;
6018 	ItemList	backup_option_errors = {NULL, NULL};
6019 	bool		wal_method_stream = true;
6020 	standy_clone_mode mode;
6021 	bool		pg_setting_ok;
6022 
6023 	/*
6024 	 * Detecting the intended cloning mode
6025 	 */
6026 	mode = get_standby_clone_mode();
6027 
6028 	/*
6029 	 * Parse "pg_basebackup_options", if set, to detect whether --wal-method
6030 	 * has been set to something other than `stream` (i.e. `fetch`), as this
6031 	 * will influence some checks
6032 	 */
6033 
6034 	backup_options_ok = parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
6035 													&backup_options, server_version_num,
6036 													&backup_option_errors);
6037 
6038 	if (backup_options_ok == false)
6039 	{
6040 		if (exit_on_error == true)
6041 		{
6042 			log_error(_("error(s) encountered parsing \"pg_basebackup_options\""));
6043 			print_error_list(&backup_option_errors, LOG_ERR);
6044 			log_hint(_("\"pg_basebackup_options\" is: \"%s\""),
6045 					 config_file_options.pg_basebackup_options);
6046 			exit(ERR_BAD_CONFIG);
6047 		}
6048 
6049 		config_ok = false;
6050 	}
6051 
6052 	if (strlen(backup_options.wal_method) && strcmp(backup_options.wal_method, "stream") != 0)
6053 		wal_method_stream = false;
6054 
6055 	{
6056 		char	   *levels_pre96[] = {
6057 			"hot_standby",
6058 			"logical",
6059 			NULL,
6060 		};
6061 
6062 		/*
6063 		 * Note that in 9.6+, "hot_standby" and "archive" are accepted as
6064 		 * aliases for "replica", but current_setting() will of course always
6065 		 * return "replica"
6066 		 */
6067 		char	   *levels_96plus[] = {
6068 			"replica",
6069 			"logical",
6070 			NULL,
6071 		};
6072 
6073 		char	  **levels;
6074 		int			j = 0;
6075 
6076 		if (server_version_num < 90600)
6077 		{
6078 			levels = (char **) levels_pre96;
6079 			wal_error_message = _("parameter \"wal_level\" must be set to \"hot_standby\" or \"logical\"");
6080 		}
6081 		else
6082 		{
6083 			levels = (char **) levels_96plus;
6084 			wal_error_message = _("parameter \"wal_level\" must be set to \"replica\" or \"logical\"");
6085 		}
6086 
6087 		do
6088 		{
6089 			i = guc_set(conn, "wal_level", "=", levels[j]);
6090 			if (i)
6091 			{
6092 				break;
6093 			}
6094 			j++;
6095 		} while (levels[j] != NULL);
6096 	}
6097 
6098 	if (i == 0 || i == -1)
6099 	{
6100 		if (i == 0)
6101 		{
6102 			log_error("%s", wal_error_message);
6103 		}
6104 
6105 		if (exit_on_error == true)
6106 		{
6107 			PQfinish(conn);
6108 			exit(ERR_BAD_CONFIG);
6109 		}
6110 
6111 		config_ok = false;
6112 	}
6113 
6114 	if (config_file_options.use_replication_slots == true)
6115 	{
6116 		pg_setting_ok = get_pg_setting_int(conn, "max_replication_slots", &i);
6117 
6118 		if (pg_setting_ok == false || i < 1)
6119 		{
6120 			if (pg_setting_ok == true)
6121 			{
6122 				log_error(_("parameter \"max_replication_slots\" must be set to at least 1 to enable replication slots"));
6123 				log_detail(_("current value is %i"), i);
6124 				log_hint(_("\"max_replication_slots\" should be set to at least the number of expected standbys"));
6125 				if (exit_on_error == true)
6126 				{
6127 					PQfinish(conn);
6128 					exit(ERR_BAD_CONFIG);
6129 				}
6130 
6131 				config_ok = false;
6132 			}
6133 		}
6134 
6135 		if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6136 		{
6137 			log_info(_("parameter \"max_replication_slots\" set to %i"), i);
6138 		}
6139 	}
6140 	/*
6141 	 * physical replication slots not available or not requested - check if
6142 	 * there are any circumstances where "wal_keep_segments" should be set
6143 	 */
6144 	else if (mode != barman)
6145 	{
6146 		bool		check_wal_keep_segments = false;
6147 
6148 		/*
6149 		 * A non-zero "wal_keep_segments" value will almost certainly be
6150 		 * required if pg_basebackup is being used with --xlog-method=fetch,
6151 		 * *and* no restore command has been specified
6152 		 */
6153 		if (wal_method_stream == false
6154 			&& strcmp(config_file_options.restore_command, "") == 0)
6155 		{
6156 			check_wal_keep_segments = true;
6157 		}
6158 
6159 		if (check_wal_keep_segments == true)
6160 		{
6161 			const char *wal_keep_parameter_name = "wal_keep_size";
6162 
6163 			if (PQserverVersion(conn) < 130000)
6164 				wal_keep_parameter_name = "wal_keep_segments";
6165 
6166 			pg_setting_ok = get_pg_setting_int(conn, wal_keep_parameter_name, &i);
6167 
6168 			if (pg_setting_ok == false || i < 1)
6169 			{
6170 				if (pg_setting_ok == true)
6171 				{
6172 					log_error(_("parameter \"%s\" on the upstream server must be be set to a non-zero value"),
6173 							  wal_keep_parameter_name);
6174 					log_hint(_("Choose a value sufficiently high enough to retain enough WAL "
6175 							   "until the standby has been cloned and started.\n "
6176 							   "Alternatively set up WAL archiving using e.g. PgBarman and configure "
6177 							   "'restore_command' in repmgr.conf to fetch WALs from there."));
6178 					log_hint(_("In PostgreSQL 9.4 and later, replication slots can be used, which "
6179 							   "do not require \"%s\" to be set "
6180 							   "(set parameter \"use_replication_slots\" in repmgr.conf to enable)\n"),
6181 							 wal_keep_parameter_name);
6182 				}
6183 
6184 				if (exit_on_error == true)
6185 				{
6186 					PQfinish(conn);
6187 					exit(ERR_BAD_CONFIG);
6188 				}
6189 
6190 				config_ok = false;
6191 			}
6192 
6193 			if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6194 			{
6195 				log_info(_("parameter \"%s\" set to %i"),
6196 						   wal_keep_parameter_name,
6197 						   i);
6198 			}
6199 		}
6200 	}
6201 
6202 
6203 	if (config_file_options.use_replication_slots == false)
6204 	{
6205 		log_info(_("replication slot usage not requested;  no replication slot will be set up for this standby"));
6206 	}
6207 
6208 
6209 	/*
6210 	 * If archive_mode is enabled, check that 'archive_command' is non empty
6211 	 * (however it's not practical to check that it actually represents a
6212 	 * valid command).
6213 	 *
6214 	 * From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always'
6215 	 * so for ease of backwards compatibility, rather than explicitly check
6216 	 * for an enabled mode, check that it's not "off".
6217 	 */
6218 
6219 	if (guc_set(conn, "archive_mode", "!=", "off"))
6220 	{
6221 		i = guc_set(conn, "archive_command", "!=", "");
6222 
6223 		if (i == 0 || i == -1)
6224 		{
6225 			if (i == 0)
6226 				log_error(_("parameter \"archive_command\" must be set to a valid command"));
6227 
6228 			if (exit_on_error == true)
6229 			{
6230 				PQfinish(conn);
6231 				exit(ERR_BAD_CONFIG);
6232 			}
6233 
6234 			config_ok = false;
6235 		}
6236 	}
6237 
6238 
6239 	/*
6240 	 * Check that 'hot_standby' is on. This isn't strictly necessary for the
6241 	 * primary server, however the assumption is that we'll be cloning
6242 	 * standbys and thus copying the primary configuration; this way the
6243 	 * standby will be correctly configured by default.
6244 	 */
6245 
6246 	i = guc_set(conn, "hot_standby", "=", "on");
6247 	if (i == 0 || i == -1)
6248 	{
6249 		if (i == 0)
6250 		{
6251 			log_error(_("parameter \"hot_standby\" must be set to \"on\""));
6252 		}
6253 
6254 		if (exit_on_error == true)
6255 		{
6256 			PQfinish(conn);
6257 			exit(ERR_BAD_CONFIG);
6258 		}
6259 
6260 		config_ok = false;
6261 	}
6262 
6263 	pg_setting_ok = get_pg_setting_int(conn, "max_wal_senders", &i);
6264 
6265 	if (pg_setting_ok == false || i < 1)
6266 	{
6267 		if (pg_setting_ok == true)
6268 		{
6269 			log_error(_("parameter \"max_wal_senders\" must be set to be at least %i"), i);
6270 			log_hint(_("\"max_wal_senders\" should be set to at least the number of expected standbys"));
6271 		}
6272 
6273 		if (exit_on_error == true)
6274 		{
6275 			PQfinish(conn);
6276 			exit(ERR_BAD_CONFIG);
6277 		}
6278 
6279 		config_ok = false;
6280 	}
6281 	else if (pg_setting_ok == true && i > 0 && runtime_options.dry_run == true)
6282 	{
6283 		log_info(_("parameter \"max_wal_senders\" set to %i"), i);
6284 	}
6285 
6286 	/*
6287 	 * If using pg_basebackup, ensure sufficient replication connections can
6288 	 * be made. There's no guarantee they'll still be available by the time
6289 	 * pg_basebackup is executed, but there's nothing we can do about that.
6290 	 * This check is mainly intended to warn about missing replication permissions
6291 	 * and/or lack of available walsenders.
6292 	 */
6293 	if (mode == pg_basebackup)
6294 	{
6295 
6296 		PGconn	  **connections;
6297 		int			i;
6298 		int			available_wal_senders;
6299 		int			min_replication_connections = 1;
6300 		int			possible_replication_connections = 0;
6301 		t_conninfo_param_list repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
6302 
6303 
6304 		/*
6305 		 * work out how many replication connections are required (1 or 2)
6306 		 */
6307 
6308 		if (wal_method_stream == true)
6309 			min_replication_connections += 1;
6310 
6311 		log_notice(_("checking for available walsenders on the source node (%i required)"),
6312 				   min_replication_connections);
6313 
6314 		/*
6315 		 * check how many free walsenders are available
6316 		 */
6317 		get_node_replication_stats(conn, upstream_node_record);
6318 
6319 		available_wal_senders = upstream_node_record->max_wal_senders -
6320 			upstream_node_record->attached_wal_receivers;
6321 
6322 		if (available_wal_senders < min_replication_connections)
6323 		{
6324 			log_error(_("insufficient free walsenders on the source node"));
6325 			log_detail(_("%i free walsenders required, %i free walsenders available"),
6326 					   min_replication_connections,
6327 					   available_wal_senders);
6328 			log_hint(_("increase \"max_wal_senders\" on the source node by at least %i"),
6329 					 (upstream_node_record->attached_wal_receivers + min_replication_connections) - upstream_node_record->max_wal_senders);
6330 
6331 			if (exit_on_error == true)
6332 			{
6333 				PQfinish(conn);
6334 				exit(ERR_BAD_CONFIG);
6335 			}
6336 		}
6337 		else if (runtime_options.dry_run == true)
6338 		{
6339 			log_info(_("sufficient walsenders available on the source node"));
6340 			log_detail(_("%i required, %i available"),
6341 					   min_replication_connections,
6342 					   available_wal_senders);
6343 		}
6344 
6345 
6346 		/*
6347 		 * Sufficient free walsenders appear to be available, check if
6348 		 * we can connect to them. We check that the required number
6349 		 * of connections can be made e.g. to rule out a very restrictive
6350 		 * "CONNECTION LIMIT" setting.
6351 		 */
6352 
6353 		log_notice(_("checking replication connections can be made to the source server (%i required)"),
6354 				   min_replication_connections);
6355 
6356 		/*
6357 		 * Make a copy of the connection parameter arrays, and append
6358 		 * "replication".
6359 		 */
6360 		initialize_conninfo_params(&repl_conninfo, false);
6361 
6362 		conn_to_param_list(conn, &repl_conninfo);
6363 
6364 		param_set(&repl_conninfo, "replication", "1");
6365 
6366 		if (runtime_options.replication_user[0] != '\0')
6367 		{
6368 			param_set(&repl_conninfo, "user", runtime_options.replication_user);
6369 		}
6370 		else if (upstream_repluser[0] != '\0')
6371 		{
6372 			param_set(&repl_conninfo, "user", upstream_repluser);
6373 		}
6374 		else if (upstream_node_record->repluser[0] != '\0')
6375 		{
6376 			param_set(&repl_conninfo, "user", upstream_node_record->repluser);
6377 		}
6378 
6379 		if (strcmp(param_get(&repl_conninfo, "user"), upstream_user) != 0)
6380 		{
6381 			param_set(&repl_conninfo, "dbname", "replication");
6382 		}
6383 
6384 		connections = pg_malloc0(sizeof(PGconn *) * min_replication_connections);
6385 
6386 		/*
6387 		 * Attempt to create the minimum number of required concurrent
6388 		 * connections
6389 		 */
6390 		for (i = 0; i < min_replication_connections; i++)
6391 		{
6392 			PGconn	   *replication_conn;
6393 
6394 			replication_conn = establish_db_connection_by_params(&repl_conninfo, false);
6395 
6396 			if (PQstatus(replication_conn) == CONNECTION_OK)
6397 			{
6398 				connections[i] = replication_conn;
6399 				possible_replication_connections++;
6400 			}
6401 		}
6402 
6403 		/* Close previously created connections */
6404 		for (i = 0; i < possible_replication_connections; i++)
6405 		{
6406 			PQfinish(connections[i]);
6407 		}
6408 
6409 		pfree(connections);
6410 		free_conninfo_params(&repl_conninfo);
6411 
6412 		if (possible_replication_connections < min_replication_connections)
6413 		{
6414 			config_ok = false;
6415 
6416 			log_error(_("unable to establish necessary replication connections"));
6417 			log_hint(_("check replication permissions on the source server"));
6418 
6419 			if (exit_on_error == true)
6420 			{
6421 				PQfinish(conn);
6422 				exit(ERR_BAD_CONFIG);
6423 			}
6424 		}
6425 
6426 		if (runtime_options.dry_run == true)
6427 		{
6428 			log_info(_("required number of replication connections could be made to the source server"));
6429 			log_detail(_("%i replication connections required"),
6430 					   min_replication_connections);
6431 		}
6432 		else
6433 		{
6434 			log_verbose(LOG_INFO, _("sufficient replication connections could be made to the source server (%i required)"),
6435 						min_replication_connections);
6436 		}
6437 	}
6438 
6439 	/*
6440 	 * Finally, add some checks for recommended settings
6441 	 */
6442 
6443 	{
6444 		bool data_checksums = false;
6445 		bool wal_log_hints = false;
6446 
6447 		/* data_checksums available from PostgreSQL 9.3; can be read by any user */
6448 		if (get_pg_setting_bool(conn, "data_checksums", &data_checksums) == false)
6449 		{
6450 			/* highly unlikely this will happen */
6451 			log_error(_("unable to determine value for \"data_checksums\""));
6452 			exit(ERR_BAD_CONFIG);
6453 		}
6454 
6455 		/* wal_log_hints available from PostgreSQL 9.4; can be read by any user */
6456 		if (get_pg_setting_bool(conn, "wal_log_hints", &wal_log_hints) == false)
6457 		{
6458 			/* highly unlikely this will happen */
6459 			log_error(_("unable to determine value for \"wal_log_hints\""));
6460 			exit(ERR_BAD_CONFIG);
6461 		}
6462 
6463 		if (data_checksums == false && wal_log_hints == false)
6464 		{
6465 			log_warning(_("data checksums are not enabled and \"wal_log_hints\" is \"off\""));
6466 			log_detail(_("pg_rewind requires \"wal_log_hints\" to be enabled"));
6467 		}
6468 	}
6469 
6470 	return config_ok;
6471 }
6472 
6473 
6474 /*
6475  * initialise_direct_clone()
6476  *
6477  * In pg_basebackup mode, configure the target data directory
6478  * if necessary, and fetch information about tablespaces and configuration
6479  * files.
6480  *
6481  * Event(s):
6482  * - standby_clone
6483  */
6484 static void
initialise_direct_clone(t_node_info * local_node_record,t_node_info * upstream_node_record)6485 initialise_direct_clone(t_node_info *local_node_record, t_node_info *upstream_node_record)
6486 {
6487 	/*
6488 	 * Check the destination data directory can be used (in Barman mode, this
6489 	 * directory will already have been created)
6490 	 */
6491 
6492 	if (!create_pg_dir(local_data_directory, runtime_options.force))
6493 	{
6494 		log_error(_("unable to use directory \"%s\""),
6495 				  local_data_directory);
6496 		log_hint(_("use -F/--force to force this directory to be overwritten"));
6497 		exit(ERR_BAD_CONFIG);
6498 	}
6499 
6500 	/*
6501 	 * Check that tablespaces named in any `tablespace_mapping` configuration
6502 	 * file parameters exist.
6503 	 *
6504 	 * pg_basebackup doesn't verify mappings, so any errors will not be
6505 	 * caught. We'll do that here as a value-added service.
6506 	 *
6507 	 */
6508 
6509 	if (config_file_options.tablespace_mapping.head != NULL)
6510 	{
6511 		TablespaceListCell *cell;
6512 		KeyValueList not_found = {NULL, NULL};
6513 		int			total = 0,
6514 					matched = 0;
6515 		bool		success = false;
6516 
6517 		for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
6518 		{
6519 			char	   *old_dir_escaped = escape_string(source_conn, cell->old_dir);
6520 			char		name[MAXLEN] = "";
6521 
6522 			success = get_tablespace_name_by_location(source_conn, old_dir_escaped, name);
6523 			pfree(old_dir_escaped);
6524 
6525 			if (success == true)
6526 			{
6527 				matched++;
6528 			}
6529 			else
6530 			{
6531 				key_value_list_set(&not_found,
6532 								   cell->old_dir,
6533 								   "");
6534 			}
6535 
6536 			total++;
6537 		}
6538 
6539 		if (not_found.head != NULL)
6540 		{
6541 			PQExpBufferData detail;
6542 			KeyValueListCell *kv_cell;
6543 
6544 			log_error(_("%i of %i mapped tablespaces not found"),
6545 					  total - matched, total);
6546 
6547 			initPQExpBuffer(&detail);
6548 
6549 			for (kv_cell = not_found.head; kv_cell; kv_cell = kv_cell->next)
6550 			{
6551 				appendPQExpBuffer(
6552 					&detail,
6553 					"  %s\n", kv_cell->key);
6554 			}
6555 
6556 			log_detail(_("following tablespaces not found:\n%s"),
6557 					   detail.data);
6558 			termPQExpBuffer(&detail);
6559 
6560 			exit(ERR_BAD_CONFIG);
6561 		}
6562 	}
6563 
6564 
6565 	/*
6566 	 * If replication slots requested, create appropriate slot on the source
6567 	 * node; this must be done before pg_basebackup is called.
6568 	 *
6569 	 * Note: if the source node is different to the specified upstream node,
6570 	 * we'll need to drop the slot and recreate it on the upstream.
6571 	 *
6572 	 * TODO: skip this for Pg10, and ensure temp slot option used
6573 	 *
6574 	 * Replication slots are not supported (and not very useful anyway) in
6575 	 * Barman mode.
6576 	 */
6577 
6578 	if (config_file_options.use_replication_slots == true)
6579 	{
6580 		PQExpBufferData event_details;
6581 
6582 		initPQExpBuffer(&event_details);
6583 
6584 		if (create_replication_slot(source_conn, local_node_record->slot_name, upstream_node_record, &event_details) == false)
6585 		{
6586 			log_error("%s", event_details.data);
6587 
6588 			create_event_notification(primary_conn,
6589 									  &config_file_options,
6590 									  config_file_options.node_id,
6591 									  "standby_clone",
6592 									  false,
6593 									  event_details.data);
6594 
6595 			PQfinish(source_conn);
6596 
6597 			exit(ERR_DB_QUERY);
6598 		}
6599 
6600 		termPQExpBuffer(&event_details);
6601 
6602 		log_verbose(LOG_INFO,
6603 					_("replication slot \"%s\" created on source node"),
6604 					local_node_record->slot_name);
6605 	}
6606 
6607 	return;
6608 }
6609 
6610 
6611 static int
run_basebackup(t_node_info * node_record)6612 run_basebackup(t_node_info *node_record)
6613 {
6614 	PQExpBufferData params;
6615 	PQExpBufferData script;
6616 
6617 	int			r = SUCCESS;
6618 
6619 	TablespaceListCell *cell = NULL;
6620 	t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6621 
6622 	/*
6623 	 * Parse the pg_basebackup_options provided in repmgr.conf - we'll want to
6624 	 * check later whether certain options were set by the user
6625 	 */
6626 	parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
6627 								&backup_options,
6628 								source_server_version_num,
6629 								NULL);
6630 
6631 	/* Create pg_basebackup command line options */
6632 
6633 	initPQExpBuffer(&params);
6634 
6635 	appendPQExpBuffer(&params, " -D %s", local_data_directory);
6636 
6637 	/*
6638 	 * conninfo string provided - pass it to pg_basebackup as the -d option
6639 	 * (pg_basebackup doesn't require or want a database name, but for
6640 	 * consistency with other applications accepts a conninfo string under
6641 	 * -d/--dbname)
6642 	 */
6643 	if (runtime_options.conninfo_provided == true)
6644 	{
6645 		t_conninfo_param_list conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
6646 		char	   *conninfo_str = NULL;
6647 
6648 		initialize_conninfo_params(&conninfo, false);
6649 
6650 		/* string will already have been parsed */
6651 		(void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false);
6652 
6653 		if (runtime_options.replication_user[0] != '\0')
6654 		{
6655 			param_set(&conninfo, "user", runtime_options.replication_user);
6656 		}
6657 		else if (upstream_repluser[0] != '\0')
6658 		{
6659 			param_set(&conninfo, "user", upstream_repluser);
6660 		}
6661 		else
6662 		{
6663 			param_set(&conninfo, "user", node_record->repluser);
6664 		}
6665 
6666 		conninfo_str = param_list_to_string(&conninfo);
6667 
6668 		appendPQExpBuffer(&params, " -d '%s'", conninfo_str);
6669 
6670 		pfree(conninfo_str);
6671 	}
6672 
6673 	/*
6674 	 * Connection parameters not passed to repmgr as conninfo string - provide
6675 	 * them individually to pg_basebackup (-d/--dbname not required)
6676 	 */
6677 	else
6678 	{
6679 		if (strlen(runtime_options.host))
6680 		{
6681 			appendPQExpBuffer(&params, " -h %s", runtime_options.host);
6682 		}
6683 
6684 		if (strlen(runtime_options.port))
6685 		{
6686 			appendPQExpBuffer(&params, " -p %s", runtime_options.port);
6687 		}
6688 
6689 		if (strlen(runtime_options.replication_user))
6690 		{
6691 			appendPQExpBuffer(&params, " -U %s", runtime_options.replication_user);
6692 		}
6693 		else if (strlen(upstream_repluser))
6694 		{
6695 			appendPQExpBuffer(&params, " -U %s", upstream_repluser);
6696 		}
6697 		else if (strlen(node_record->repluser))
6698 		{
6699 			appendPQExpBuffer(&params, " -U %s", node_record->repluser);
6700 		}
6701 		else if (strlen(runtime_options.username))
6702 		{
6703 			appendPQExpBuffer(&params, " -U %s", runtime_options.username);
6704 		}
6705 
6706 	}
6707 
6708 	if (runtime_options.fast_checkpoint)
6709 	{
6710 		appendPQExpBufferStr(&params, " -c fast");
6711 	}
6712 
6713 	if (config_file_options.tablespace_mapping.head != NULL)
6714 	{
6715 		for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
6716 		{
6717 			appendPQExpBuffer(&params, " -T %s=%s", cell->old_dir, cell->new_dir);
6718 		}
6719 	}
6720 
6721 	/*
6722 	 * To ensure we have all the WALs needed during basebackup execution we
6723 	 * stream them as the backup is taking place.
6724 	 *
6725 	 * From 9.6, if replication slots are in use, we'll have previously
6726 	 * created a slot with reserved LSN, and will stream from that slot to
6727 	 * avoid WAL buildup on the primary using the -S/--slot, which requires
6728 	 * -X/--xlog-method=stream (from 10, -X/--wal-method=stream)
6729 	 */
6730 	if (!strlen(backup_options.wal_method))
6731 	{
6732 		appendPQExpBufferStr(&params, " -X stream");
6733 	}
6734 
6735 	/*
6736 	 * From 9.6, pg_basebackup accepts -S/--slot, which forces WAL streaming
6737 	 * to use the specified replication slot. If replication slot usage is
6738 	 * specified, the slot will already have been created.
6739 	 *
6740 	 * NOTE: currently there's no way of disabling the --slot option while
6741 	 * using --xlog-method=stream - it's hard to imagine a use case for this,
6742 	 * so no provision has been made for doing it.
6743 	 *
6744 	 * NOTE: It's possible to set 'pg_basebackup_options' with an invalid
6745 	 * combination of values for --wal-method (--xlog-method) and --slot -
6746 	 * we're not checking that, just that we're not overriding any
6747 	 * user-supplied values
6748 	 */
6749 	if (source_server_version_num >= 90600 && config_file_options.use_replication_slots)
6750 	{
6751 		bool		slot_add = true;
6752 
6753 		/*
6754 		 * Check whether 'pg_basebackup_options' in repmgr.conf has the --slot
6755 		 * option set, or if --wal-method (--xlog-method) is set to a value
6756 		 * other than "stream" (in which case we can't use --slot).
6757 		 */
6758 		if (strlen(backup_options.slot) || (strlen(backup_options.wal_method) && strcmp(backup_options.wal_method, "stream") != 0))
6759 		{
6760 			slot_add = false;
6761 		}
6762 
6763 		if (slot_add == true)
6764 		{
6765 			appendPQExpBuffer(&params, " -S %s", node_record->slot_name);
6766 		}
6767 	}
6768 
6769 	initPQExpBuffer(&script);
6770 	make_pg_path(&script, "pg_basebackup");
6771 
6772 	appendPQExpBuffer(&script,
6773 					  " -l \"repmgr base backup\" %s %s",
6774 					  params.data,
6775 					  config_file_options.pg_basebackup_options);
6776 
6777 	termPQExpBuffer(&params);
6778 
6779 	log_info(_("executing:\n  %s"), script.data);
6780 
6781 	/*
6782 	 * As of 9.4, pg_basebackup only ever returns 0 or 1
6783 	 */
6784 
6785 	r = system(script.data);
6786 
6787 	termPQExpBuffer(&script);
6788 
6789 	if (r != 0)
6790 		return ERR_BAD_BASEBACKUP;
6791 
6792 	/* check connections are still available */
6793 	(void)connection_ping_reconnect(primary_conn);
6794 
6795 	if (source_conn != primary_conn)
6796 		(void)connection_ping_reconnect(source_conn);
6797 
6798 	/*
6799 	 * If replication slots in use, check the created slot is on the correct
6800 	 * node; the slot will initially get created on the source node, and will
6801 	 * need to be dropped and recreated on the actual upstream node if these
6802 	 * differ.
6803 	 */
6804 	if (config_file_options.use_replication_slots && upstream_node_id != UNKNOWN_NODE_ID)
6805 	{
6806 		t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
6807 		t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
6808 		RecordStatus record_status = RECORD_NOT_FOUND;
6809 		bool slot_exists_on_upstream = false;
6810 
6811 		record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
6812 
6813 		/*
6814 		 * If there's no upstream record, there's no point in trying to create
6815 		 * a replication slot on the designated upstream, as the assumption is
6816 		 * it won't exist at this point.
6817 		 */
6818 		if (record_status != RECORD_FOUND)
6819 		{
6820 			log_warning(_("no record exists for designated upstream node %i"),
6821 						upstream_node_id);
6822 			log_hint(_("you'll need to create the replication slot (\"%s\") manually"),
6823 					 node_record->slot_name);
6824 		}
6825 		else
6826 		{
6827 			PGconn	   *upstream_conn = NULL;
6828 
6829 			upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
6830 
6831 			/*
6832 			 * It's possible the upstream node is not yet running, in which case we'll
6833 			 * have to rely on the user taking action to create the slot
6834 			 */
6835 			if (PQstatus(upstream_conn) != CONNECTION_OK)
6836 			{
6837 				log_warning(_("unable to connect to upstream node to create replication slot"));
6838 				/*
6839 				 * TODO: if slot creation also handled by "standby register", update warning
6840 				 */
6841 				log_hint(_("you may need to create the replication slot manually"));
6842 			}
6843 			else
6844 			{
6845 				record_status = get_slot_record(upstream_conn, node_record->slot_name, &slot_info);
6846 
6847 				if (record_status == RECORD_FOUND)
6848 				{
6849 					log_verbose(LOG_INFO,
6850 								_("replication slot \"%s\" already exists on upstream node %i"),
6851 								node_record->slot_name,
6852 								upstream_node_id);
6853 					slot_exists_on_upstream = true;
6854 				}
6855 				else
6856 				{
6857 					PQExpBufferData event_details;
6858 
6859 					log_notice(_("creating replication slot \"%s\" on upstream node %i"),
6860 							   node_record->slot_name,
6861 							   upstream_node_id);
6862 
6863 					initPQExpBuffer(&event_details);
6864 					if (create_replication_slot(upstream_conn, node_record->slot_name, &upstream_node_record, &event_details) == false)
6865 					{
6866 						log_error("%s", event_details.data);
6867 
6868 						create_event_notification(primary_conn,
6869 												  &config_file_options,
6870 												  config_file_options.node_id,
6871 												  "standby_clone",
6872 												  false,
6873 												  event_details.data);
6874 
6875 						PQfinish(source_conn);
6876 
6877 						exit(ERR_DB_QUERY);
6878 					}
6879 
6880 					termPQExpBuffer(&event_details);
6881 				}
6882 
6883 				PQfinish(upstream_conn);
6884 			}
6885 		}
6886 
6887 		if (slot_info.active == false)
6888 		{
6889 			if (slot_exists_on_upstream == false)
6890 			{
6891 
6892 				/* delete slot on source server */
6893 
6894 				if (drop_replication_slot_if_exists(source_conn, UNKNOWN_NODE_ID, node_record->slot_name) == true)
6895 				{
6896 					log_notice(_("replication slot \"%s\" deleted on source node"),
6897 							   node_record->slot_name);
6898 				}
6899 				else
6900 				{
6901 					log_error(_("unable to delete replication slot \"%s\" on source node"),
6902 							  node_record->slot_name);
6903 				}
6904 			}
6905 		}
6906 
6907 		/*
6908 		 * if replication slot is still active (shouldn't happen), emit a
6909 		 * warning
6910 		 */
6911 		else
6912 		{
6913 			log_warning(_("replication slot \"%s\" is still active on source node"),
6914 						node_record->slot_name);
6915 		}
6916 	}
6917 
6918 	return SUCCESS;
6919 }
6920 
6921 
6922 /*
6923  * Perform a filesystem backup using rsync.
6924  *
6925  * From repmgr 4 this is only used for Barman backups.
6926  */
6927 static int
run_file_backup(t_node_info * local_node_record)6928 run_file_backup(t_node_info *local_node_record)
6929 {
6930 	int			r = SUCCESS,
6931 				i;
6932 
6933 	char		command[MAXLEN] = "";
6934 	char		filename[MAXLEN] = "";
6935 	char		buf[MAXLEN] = "";
6936 	char		basebackups_directory[MAXLEN] = "";
6937 	char		backup_id[MAXLEN] = "";
6938 	TablespaceDataList tablespace_list = {NULL, NULL};
6939 	TablespaceDataListCell *cell_t = NULL;
6940 
6941 	PQExpBufferData tablespace_map;
6942 	bool		tablespace_map_rewrite = false;
6943 
6944 	/* For the foreseeable future, no other modes are supported */
6945 	Assert(mode == barman);
6946 	if (mode == barman)
6947 	{
6948 		t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER;
6949 
6950 		/*
6951 		 * Locate Barman's base backups directory
6952 		 */
6953 
6954 		get_barman_property(basebackups_directory, "basebackups_directory", local_repmgr_tmp_directory);
6955 
6956 		/*
6957 		 * Read the list of backup files into a local file. In the process:
6958 		 *
6959 		 * - determine the backup ID
6960 		 * - check, and remove, the prefix
6961 		 * - detect tablespaces
6962 		 * - filter files in one list per tablespace
6963 		 */
6964 		{
6965 			FILE	   *fi;		/* input stream */
6966 			FILE	   *fd;		/* output for data.txt */
6967 			char		prefix[MAXLEN] = "";
6968 			char		output[MAXLEN] = "";
6969 			int			n = 0;
6970 			char	   *p = NULL,
6971 					   *q = NULL;
6972 
6973 			maxlen_snprintf(command, "%s list-files --target=data %s latest",
6974 							make_barman_ssh_command(barman_command_buf),
6975 							config_file_options.barman_server);
6976 
6977 			log_verbose(LOG_DEBUG, "executing:\n  %s", command);
6978 
6979 			fi = popen(command, "r");
6980 			if (fi == NULL)
6981 			{
6982 				log_error("cannot launch command: %s", command);
6983 				exit(ERR_BARMAN);
6984 			}
6985 
6986 			fd = fopen(datadir_list_filename, "w");
6987 			if (fd == NULL)
6988 			{
6989 				log_error("cannot open file: %s", datadir_list_filename);
6990 				exit(ERR_BARMAN);
6991 			}
6992 
6993 			maxlen_snprintf(prefix, "%s/", basebackups_directory);
6994 
6995 			while (fgets(output, MAXLEN, fi) != NULL)
6996 			{
6997 				/*
6998 				 * Remove prefix
6999 				 */
7000 				p = string_skip_prefix(prefix, output);
7001 
7002 				if (p == NULL)
7003 				{
7004 					log_error("unexpected output from \"barman list-files\"");
7005 					log_detail("%s", output);
7006 					exit(ERR_BARMAN);
7007 				}
7008 
7009 				/*
7010 				 * Remove and note backup ID; copy backup.info
7011 				 */
7012 				if (!strcmp(backup_id, ""))
7013 				{
7014 					FILE	   *fi2;
7015 
7016 					n = strcspn(p, "/");
7017 
7018 					strncpy(backup_id, p, n);
7019 
7020 					strncat(prefix, backup_id, MAXLEN - 1);
7021 					strncat(prefix, "/", MAXLEN - 1);
7022 					p = string_skip_prefix(backup_id, p);
7023 
7024 					if (p == NULL)
7025 					{
7026 						log_error("unexpected output from \"barman list-files\"");
7027 						log_detail("%s", output);
7028 						exit(ERR_BARMAN);
7029 					}
7030 
7031 					p = string_skip_prefix("/", p);
7032 
7033 					/*
7034 					 * Copy backup.info
7035 					 */
7036 					maxlen_snprintf(command,
7037 									"rsync -a %s:%s/%s/backup.info %s",
7038 									config_file_options.barman_host,
7039 									basebackups_directory,
7040 									backup_id,
7041 									local_repmgr_tmp_directory);
7042 
7043 					(void) local_command(command,
7044 										 NULL);
7045 
7046 					/*
7047 					 * Get tablespace data
7048 					 */
7049 					maxlen_snprintf(filename, "%s/backup.info",
7050 									local_repmgr_tmp_directory);
7051 					fi2 = fopen(filename, "r");
7052 					if (fi2 == NULL)
7053 					{
7054 						log_error("cannot open file: %s", filename);
7055 						exit(ERR_INTERNAL);
7056 					}
7057 					while (fgets(buf, MAXLEN, fi2) != NULL)
7058 					{
7059 						q = string_skip_prefix("tablespaces=", buf);
7060 						if (q != NULL && strncmp(q, "None\n", 5))
7061 						{
7062 							get_tablespace_data_barman(q, &tablespace_list);
7063 						}
7064 						q = string_skip_prefix("version=", buf);
7065 						if (q != NULL)
7066 						{
7067 							source_server_version_num = strtol(q, NULL, 10);
7068 						}
7069 					}
7070 					fclose(fi2);
7071 					unlink(filename);
7072 
7073 					continue;
7074 				}
7075 
7076 				/*
7077 				 * Skip backup.info
7078 				 */
7079 				if (string_skip_prefix("backup.info", p))
7080 					continue;
7081 
7082 				/*
7083 				 * Filter data directory files
7084 				 */
7085 				if ((q = string_skip_prefix("data/", p)) != NULL)
7086 				{
7087 					fputs(q, fd);
7088 					continue;
7089 				}
7090 
7091 				/*
7092 				 * Filter other files (i.e. tablespaces)
7093 				 */
7094 				for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
7095 				{
7096 					if ((q = string_skip_prefix(cell_t->oid, p)) != NULL && *q == '/')
7097 					{
7098 						if (cell_t->fptr == NULL)
7099 						{
7100 							maxlen_snprintf(filename, "%s/%s.txt", local_repmgr_tmp_directory, cell_t->oid);
7101 							cell_t->fptr = fopen(filename, "w");
7102 							if (cell_t->fptr == NULL)
7103 							{
7104 								log_error("cannot open file: %s", filename);
7105 								exit(ERR_INTERNAL);
7106 							}
7107 						}
7108 						fputs(q + 1, cell_t->fptr);
7109 						break;
7110 					}
7111 				}
7112 			}
7113 
7114 			fclose(fd);
7115 
7116 			pclose(fi);
7117 		}
7118 
7119 		/* For 9.5 and greater, create our own tablespace_map file */
7120 		if (source_server_version_num >= 90500)
7121 		{
7122 			initPQExpBuffer(&tablespace_map);
7123 		}
7124 
7125 		/*
7126 		 * As of Barman version 1.6.1, the file structure of a backup is as
7127 		 * follows:
7128 		 *
7129 		 * base/ - base backup wals/ - WAL files associated to the backup
7130 		 *
7131 		 * base/<ID> - backup files
7132 		 *
7133 		 * here ID has the standard timestamp form yyyymmddThhmmss
7134 		 *
7135 		 * base/<ID>/backup.info - backup metadata, in text format
7136 		 * base/<ID>/data        - data directory base/<ID>/<OID>       -
7137 		 * tablespace with the given oid
7138 		 */
7139 
7140 		/*
7141 		 * Copy all backup files from the Barman server
7142 		 */
7143 		maxlen_snprintf(command,
7144 						"rsync --progress -a --files-from=%s %s:%s/%s/data %s",
7145 						datadir_list_filename,
7146 						config_file_options.barman_host,
7147 						basebackups_directory,
7148 						backup_id,
7149 						local_data_directory);
7150 
7151 		(void) local_command(
7152 							 command,
7153 							 NULL);
7154 
7155 		unlink(datadir_list_filename);
7156 
7157 		/*
7158 		 * At this point we should have the source server version number.
7159 		 * If not, try and extract it from the data directory.
7160 		 */
7161 		if (source_server_version_num == UNKNOWN_SERVER_VERSION_NUM)
7162 		{
7163 			log_warning(_("server version number is unknown"));
7164 			source_server_version_num = get_pg_version(local_data_directory, NULL);
7165 
7166 			/*
7167 			 * In the unlikely we are still unable to obtain the server
7168 			 * version number, there's not a lot which can be done.
7169 			 */
7170 			if (source_server_version_num == UNKNOWN_SERVER_VERSION_NUM)
7171 			{
7172 				log_error(_("unable to extract server version number from the data directory, aborting"));
7173 				exit(ERR_BAD_CONFIG);
7174 			}
7175 			log_notice(_("server version number is: %i"), source_server_version_num);
7176 		}
7177 
7178 		/*
7179 		 * Parse the pg_basebackup_options provided in repmgr.conf - we need to
7180 		 * check if --waldir/--xlogdir was provided.
7181 		 */
7182 		parse_pg_basebackup_options(config_file_options.pg_basebackup_options,
7183 									&backup_options,
7184 									source_server_version_num,
7185 									NULL);
7186 
7187 
7188 		/*
7189 		 * We must create some PGDATA subdirectories because they are not
7190 		 * included in the Barman backup.
7191 		 *
7192 		 * See class RsyncBackupExecutor in the Barman source
7193 		 * (barman/backup_executor.py) for a definitive list of excluded
7194 		 * directories.
7195 		 */
7196 		{
7197 			const char *const dirs[] = {
7198 				/* Only from 10 */
7199 				"pg_wal",
7200 				/* Only from 9.5 */
7201 				"pg_commit_ts",
7202 				/* Only from 9.4 */
7203 				"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
7204 				/* Present in all versions from  9.3 */
7205 				"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp",
7206 				"pg_subtrans", "pg_tblspc", "pg_twophase",
7207 				/* Present from at least 9.3, but removed in 10 */
7208 				"pg_xlog",
7209 				/* Array delimiter */
7210 				0
7211 			};
7212 
7213 			/*
7214 			 * This array determines the major version each of the above directories
7215 			 * first appears in; or if the value is negative, which from major version
7216 			 * the directory does not appear in.
7217 			 */
7218 			const int	vers[] = {
7219 				100000,
7220 				90500,
7221 				90400, 90400, 90400, 90400, 90400,
7222 				0, 0, 0, 0, 0,
7223 				0, 0, 0,
7224 				-100000
7225 			};
7226 
7227 			for (i = 0; dirs[i]; i++)
7228 			{
7229 				/* directory exists in newer versions than this server - skip */
7230 				if (vers[i] > 0 && source_server_version_num < vers[i])
7231 					continue;
7232 
7233 				/*
7234 				 * directory existed in earlier versions than this server but
7235 				 * has been removed/renamed - skip
7236 				 */
7237 				if (vers[i] < 0 && source_server_version_num >= abs(vers[i]))
7238 					continue;
7239 
7240 				maxlen_snprintf(filename, "%s/%s", local_data_directory, dirs[i]);
7241 
7242 				/*
7243 				 * If --waldir/--xlogdir specified in "pg_basebackup_options",
7244 				 * create a symlink rather than make a directory.
7245 				 */
7246 				if (strcmp(dirs[i], "pg_wal") == 0 || strcmp(dirs[i], "pg_xlog") == 0)
7247 				{
7248 					if (backup_options.waldir[0] != '\0')
7249 					{
7250 						if (create_pg_dir(backup_options.waldir, false) == false)
7251 						{
7252 							/* create_pg_dir() will log specifics */
7253 							log_error(_("unable to create an empty directory for WAL files"));
7254 							log_hint(_("see preceding error messages"));
7255 							exit(ERR_BAD_CONFIG);
7256 						}
7257 
7258 						if (symlink(backup_options.waldir, filename) != 0)
7259 						{
7260 							log_error(_("could not create symbolic link \"%s\""), filename);
7261 							exit(ERR_BAD_CONFIG);
7262 						}
7263 						continue;
7264 					}
7265 				}
7266 
7267 				if (mkdir(filename, S_IRWXU) != 0 && errno != EEXIST)
7268 				{
7269 					log_error(_("unable to create the %s directory"), dirs[i]);
7270 					exit(ERR_INTERNAL);
7271 				}
7272 			}
7273 		}
7274 	}
7275 
7276 
7277 	for (cell_t = tablespace_list.head; cell_t; cell_t = cell_t->next)
7278 	{
7279 		bool		mapping_found = false;
7280 		TablespaceListCell *cell = NULL;
7281 		char	   *tblspc_dir_dest = NULL;
7282 
7283 		/*
7284 		 * Check if tablespace path matches one of the provided tablespace
7285 		 * mappings
7286 		 */
7287 		if (config_file_options.tablespace_mapping.head != NULL)
7288 		{
7289 			for (cell = config_file_options.tablespace_mapping.head; cell; cell = cell->next)
7290 			{
7291 				if (strcmp(cell_t->location, cell->old_dir) == 0)
7292 				{
7293 					mapping_found = true;
7294 					break;
7295 				}
7296 			}
7297 		}
7298 
7299 		if (mapping_found == true)
7300 		{
7301 			tblspc_dir_dest = cell->new_dir;
7302 			log_debug(_("mapping source tablespace \"%s\" (OID %s) to \"%s\""),
7303 					  cell_t->location, cell_t->oid, tblspc_dir_dest);
7304 		}
7305 		else
7306 		{
7307 			tblspc_dir_dest = cell_t->location;
7308 		}
7309 
7310 		/*
7311 		 * Tablespace file copy
7312 		 */
7313 
7314 		if (mode == barman)
7315 		{
7316 			create_pg_dir(tblspc_dir_dest, false);
7317 
7318 			if (cell_t->fptr != NULL)	/* cell_t->fptr == NULL iff the tablespace is
7319 										 * empty */
7320 			{
7321 				/* close the file to ensure the contents are flushed to disk */
7322 				fclose(cell_t->fptr);
7323 
7324 				maxlen_snprintf(command,
7325 								"rsync --progress -a --files-from=%s/%s.txt %s:%s/%s/%s %s",
7326 								local_repmgr_tmp_directory,
7327 								cell_t->oid,
7328 								config_file_options.barman_host,
7329 								basebackups_directory,
7330 								backup_id,
7331 								cell_t->oid,
7332 								tblspc_dir_dest);
7333 				(void) local_command(command,
7334 									 NULL);
7335 				maxlen_snprintf(filename,
7336 								"%s/%s.txt",
7337 								local_repmgr_tmp_directory,
7338 								cell_t->oid);
7339 				unlink(filename);
7340 			}
7341 		}
7342 
7343 
7344 		/*
7345 		 * If a valid mapping was provided for this tablespace, arrange for it
7346 		 * to be remapped (if no tablespace mapping was provided, the link
7347 		 * will be copied as-is by pg_basebackup and no action is required)
7348 		 */
7349 		if (mapping_found == true || mode == barman)
7350 		{
7351 			/* 9.5 and later - append to the tablespace_map file */
7352 			if (source_server_version_num >= 90500)
7353 			{
7354 				tablespace_map_rewrite = true;
7355 				appendPQExpBuffer(&tablespace_map,
7356 								  "%s %s\n",
7357 								  cell_t->oid,
7358 								  tblspc_dir_dest);
7359 			}
7360 
7361 			/*
7362 			 * Pre-9.5, we have to manipulate the symlinks in pg_tblspc/
7363 			 * ourselves
7364 			 */
7365 			else
7366 			{
7367 				PQExpBufferData tblspc_symlink;
7368 
7369 				initPQExpBuffer(&tblspc_symlink);
7370 				appendPQExpBuffer(&tblspc_symlink, "%s/pg_tblspc/%s",
7371 								  local_data_directory,
7372 								  cell_t->oid);
7373 
7374 				if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
7375 				{
7376 					log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
7377 					log_detail("%s", strerror(errno));
7378 					r = ERR_BAD_BASEBACKUP;
7379 					goto stop_backup;
7380 				}
7381 
7382 				if (symlink(tblspc_dir_dest, tblspc_symlink.data) < 0)
7383 				{
7384 					log_error(_("unable to create tablespace symlink from %s to %s"), tblspc_symlink.data, tblspc_dir_dest);
7385 
7386 					r = ERR_BAD_BASEBACKUP;
7387 					goto stop_backup;
7388 				}
7389 			}
7390 		}
7391 	}
7392 
7393 	/*
7394 	 * For 9.5 and later, if tablespace remapping was requested, we'll need to
7395 	 * rewrite the tablespace map file ourselves. The tablespace map file is
7396 	 * read on startup and any links created by the backend; we could do this
7397 	 * ourselves like for pre-9.5 servers, but it's better to rely on
7398 	 * functionality the backend provides.
7399 	 */
7400 
7401 	if (source_server_version_num >= 90500 && tablespace_map_rewrite == true)
7402 	{
7403 		PQExpBufferData tablespace_map_filename;
7404 		FILE	   *tablespace_map_file;
7405 
7406 		initPQExpBuffer(&tablespace_map_filename);
7407 		appendPQExpBuffer(&tablespace_map_filename, "%s/%s",
7408 						  local_data_directory,
7409 						  TABLESPACE_MAP);
7410 
7411 		/*
7412 		 * Unlink any existing file (it should be there, but we don't care if
7413 		 * it isn't)
7414 		 */
7415 		if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
7416 		{
7417 			log_error(_("unable to remove tablespace_map file \"%s\""),
7418 					  tablespace_map_filename.data);
7419 			log_detail("%s", strerror(errno));
7420 
7421 			r = ERR_BAD_BASEBACKUP;
7422 			goto stop_backup;
7423 		}
7424 
7425 		tablespace_map_file = fopen(tablespace_map_filename.data, "w");
7426 		if (tablespace_map_file == NULL)
7427 		{
7428 			log_error(_("unable to create tablespace_map file \"%s\""), tablespace_map_filename.data);
7429 
7430 			r = ERR_BAD_BASEBACKUP;
7431 			goto stop_backup;
7432 		}
7433 
7434 		if (fputs(tablespace_map.data, tablespace_map_file) == EOF)
7435 		{
7436 			fclose(tablespace_map_file);
7437 
7438 			log_error(_("unable to write to tablespace_map file \"%s\""), tablespace_map_filename.data);
7439 
7440 			r = ERR_BAD_BASEBACKUP;
7441 			goto stop_backup;
7442 		}
7443 
7444 		fclose(tablespace_map_file);
7445 
7446 		termPQExpBuffer(&tablespace_map_filename);
7447 		termPQExpBuffer(&tablespace_map);
7448 	}
7449 
7450 stop_backup:
7451 
7452 	if (mode == barman)
7453 	{
7454 		/*
7455 		 * In Barman mode, remove local_repmgr_tmp_directory,
7456 		 * which contains various temporary files containing Barman metadata.
7457 		 */
7458 		rmtree(local_repmgr_tmp_directory, true);
7459 	}
7460 
7461 
7462 	/*
7463 	 * if replication slots in use, create replication slot
7464 	 */
7465 	if (r == SUCCESS)
7466 	{
7467 		if (config_file_options.use_replication_slots == true)
7468 		{
7469 			bool slot_warning = false;
7470 			if (runtime_options.no_upstream_connection == true)
7471 			{
7472 				slot_warning = true;
7473 			}
7474 			else
7475 			{
7476 				t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
7477 				t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
7478 				RecordStatus record_status = RECORD_NOT_FOUND;
7479 				PGconn	   *upstream_conn = NULL;
7480 
7481 
7482 				/* check connections are still available */
7483 				(void)connection_ping_reconnect(primary_conn);
7484 
7485 				if (source_conn != primary_conn)
7486 					(void)connection_ping_reconnect(source_conn);
7487 
7488 				(void)connection_ping_reconnect(source_conn);
7489 
7490 				record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
7491 
7492 				if (record_status != RECORD_FOUND)
7493 				{
7494 					log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
7495 					slot_warning = true;
7496 				}
7497 				else
7498 				{
7499 					upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
7500 					if (PQstatus(upstream_conn) != CONNECTION_OK)
7501 					{
7502 						log_error(_("unable to connect to upstream node %i to create a replication slot"), upstream_node_id);
7503 						slot_warning = true;
7504 					}
7505 					else
7506 					{
7507 						record_status = get_slot_record(upstream_conn, local_node_record->slot_name, &slot_info);
7508 
7509 						if (record_status == RECORD_FOUND)
7510 						{
7511 							log_verbose(LOG_INFO,
7512 										_("replication slot \"%s\" aleady exists on upstream node %i"),
7513 										local_node_record->slot_name,
7514 										upstream_node_id);
7515 						}
7516 						else
7517 						{
7518 							PQExpBufferData errmsg;
7519 							bool success;
7520 
7521 							initPQExpBuffer(&errmsg);
7522 							success = create_replication_slot(upstream_conn,
7523 															  local_node_record->slot_name,
7524 															  &upstream_node_record,
7525 															  &errmsg);
7526 							if (success == false)
7527 							{
7528 								log_error(_("unable to create replication slot \"%s\" on upstream node %i"),
7529 										  local_node_record->slot_name,
7530 										  upstream_node_id);
7531 								log_detail("%s", errmsg.data);
7532 								slot_warning = true;
7533 							}
7534 							else
7535 							{
7536 								log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
7537 										   local_node_record->slot_name,
7538 										   upstream_node_record.node_name,
7539 										   upstream_node_id );
7540 							}
7541 							termPQExpBuffer(&errmsg);
7542 						}
7543 
7544 						PQfinish(upstream_conn);
7545 					}
7546 				}
7547 			}
7548 
7549 
7550 			if (slot_warning == true)
7551 			{
7552 				log_warning(_("\"use_replication_slots\" specified but a replication slot could not be created"));
7553 				log_hint(_("ensure a replication slot called \"%s\" is created on the upstream node (ID: %i)"),
7554 						 local_node_record->slot_name,
7555 						 upstream_node_id);
7556 			}
7557 		}
7558 	}
7559 
7560 	return r;
7561 }
7562 
7563 
7564 static char *
make_barman_ssh_command(char * buf)7565 make_barman_ssh_command(char *buf)
7566 {
7567 	static char config_opt[MAXLEN] = "";
7568 
7569 	if (strlen(config_file_options.barman_config))
7570 		maxlen_snprintf(config_opt,
7571 						" --config=%s",
7572 						config_file_options.barman_config);
7573 
7574 	maxlen_snprintf(buf,
7575 					"ssh %s barman%s",
7576 					config_file_options.barman_host,
7577 					config_opt);
7578 
7579 	return buf;
7580 }
7581 
7582 
7583 static int
get_tablespace_data_barman(char * tablespace_data_barman,TablespaceDataList * tablespace_list)7584 get_tablespace_data_barman(char *tablespace_data_barman,
7585 						   TablespaceDataList *tablespace_list)
7586 {
7587 	/*
7588 	 * Example: [('main', 24674, '/var/lib/postgresql/tablespaces/9.5/main'),
7589 	 * ('alt', 24678, '/var/lib/postgresql/tablespaces/9.5/alt')]
7590 	 */
7591 
7592 	char		name[MAXLEN] = "";
7593 	char		oid[MAXLEN] = "";
7594 	char		location[MAXPGPATH] = "";
7595 	char	   *p = tablespace_data_barman;
7596 	int			i = 0;
7597 
7598 	tablespace_list->head = NULL;
7599 	tablespace_list->tail = NULL;
7600 
7601 	p = string_skip_prefix("[", p);
7602 	if (p == NULL)
7603 		return -1;
7604 
7605 	while (*p == '(')
7606 	{
7607 		p = string_skip_prefix("('", p);
7608 		if (p == NULL)
7609 			return -1;
7610 
7611 		i = strcspn(p, "'");
7612 		strncpy(name, p, i);
7613 		name[i] = 0;
7614 
7615 		p = string_skip_prefix("', ", p + i);
7616 		if (p == NULL)
7617 			return -1;
7618 
7619 		i = strcspn(p, ",");
7620 		strncpy(oid, p, i);
7621 		oid[i] = 0;
7622 
7623 		p = string_skip_prefix(", '", p + i);
7624 		if (p == NULL)
7625 			return -1;
7626 
7627 		i = strcspn(p, "'");
7628 		strncpy(location, p, i);
7629 		location[i] = 0;
7630 
7631 		p = string_skip_prefix("')", p + i);
7632 		if (p == NULL)
7633 			return -1;
7634 
7635 		tablespace_data_append(tablespace_list, name, oid, location);
7636 
7637 		if (*p == ']')
7638 			break;
7639 
7640 		p = string_skip_prefix(", ", p);
7641 		if (p == NULL)
7642 			return -1;
7643 	}
7644 
7645 	return SUCCESS;
7646 }
7647 
7648 
7649 void
get_barman_property(char * dst,char * name,char * local_repmgr_directory)7650 get_barman_property(char *dst, char *name, char *local_repmgr_directory)
7651 {
7652 	PQExpBufferData command_output;
7653 	char		buf[MAXLEN] = "";
7654 	char		command[MAXLEN] = "";
7655 	char	   *p = NULL;
7656 
7657 	initPQExpBuffer(&command_output);
7658 
7659 	maxlen_snprintf(command,
7660 					"grep \"^[[:space:]]%s:\" %s/show-server.txt",
7661 					name, local_repmgr_tmp_directory);
7662 	(void) local_command(command, &command_output);
7663 
7664 	maxlen_snprintf(buf, "\t%s: ", name);
7665 	p = string_skip_prefix(buf, command_output.data);
7666 	if (p == NULL)
7667 	{
7668 		log_error("unexpected output from Barman: %s",
7669 				  command_output.data);
7670 		exit(ERR_INTERNAL);
7671 	}
7672 
7673 	strncpy(dst, p, MAXLEN);
7674 	string_remove_trailing_newlines(dst);
7675 
7676 	termPQExpBuffer(&command_output);
7677 }
7678 
7679 
7680 static void
copy_configuration_files(bool delete_after_copy)7681 copy_configuration_files(bool delete_after_copy)
7682 {
7683 	int			i,
7684 				r;
7685 	t_configfile_info *file = NULL;
7686 	char	   *host = NULL;
7687 
7688 	/* get host from upstream record */
7689 	host = param_get(&recovery_conninfo, "host");
7690 
7691 	if (host == NULL)
7692 		host = runtime_options.host;
7693 
7694 	log_notice(_("copying external configuration files from upstream node \"%s\""), host);
7695 
7696 	for (i = 0; i < config_files.entries; i++)
7697 	{
7698 		PQExpBufferData dest_path;
7699 
7700 		file = config_files.files[i];
7701 
7702 		/*
7703 		 * Skip files in the data directory - these will be copied during the
7704 		 * main backup
7705 		 */
7706 		if (file->in_data_directory == true)
7707 			continue;
7708 
7709 		initPQExpBuffer(&dest_path);
7710 
7711 		if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
7712 		{
7713 			appendPQExpBufferStr(&dest_path, file->filepath);
7714 		}
7715 		else
7716 		{
7717 			appendPQExpBuffer(&dest_path,
7718 							  "%s/%s",
7719 							  local_data_directory,
7720 							  file->filename);
7721 		}
7722 
7723 		r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
7724 							  file->filepath, dest_path.data, false, source_server_version_num);
7725 
7726 		/*
7727 		 * TODO: collate errors into list
7728 		 */
7729 
7730 		if (WEXITSTATUS(r))
7731 		{
7732 			log_error(_("standby clone: unable to copy config file \"%s\""),
7733 					  file->filename);
7734 			log_hint(_("see preceding messages for details"));
7735 
7736 			if (runtime_options.force == false)
7737 				exit(ERR_BAD_RSYNC);
7738 		}
7739 
7740 		/*
7741 		 * This is to check we can actually copy the files before running the
7742 		 * main clone operation
7743 		 */
7744 		if (delete_after_copy == true)
7745 		{
7746 			/* this is very unlikely to happen, but log in case it does */
7747 			if (unlink(dest_path.data) < 0 && errno != ENOENT)
7748 			{
7749 				log_warning(_("unable to delete %s"), dest_path.data);
7750 				log_detail("%s", strerror(errno));
7751 			}
7752 		}
7753 
7754 		termPQExpBuffer(&dest_path);
7755 	}
7756 
7757 	return;
7758 }
7759 
7760 
7761 static void
tablespace_data_append(TablespaceDataList * list,const char * name,const char * oid,const char * location)7762 tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location)
7763 {
7764 	TablespaceDataListCell *cell = NULL;
7765 
7766 	cell = (TablespaceDataListCell *) pg_malloc0(sizeof(TablespaceDataListCell));
7767 
7768 	if (cell == NULL)
7769 	{
7770 		log_error(_("unable to allocate memory; terminating"));
7771 		exit(ERR_OUT_OF_MEMORY);
7772 	}
7773 
7774 	cell->oid = pg_malloc(1 + strlen(oid));
7775 	cell->name = pg_malloc(1 + strlen(name));
7776 	cell->location = pg_malloc(1 + strlen(location));
7777 
7778 	strncpy(cell->oid, oid, 1 + strlen(oid));
7779 	strncpy(cell->name, name, 1 + strlen(name));
7780 	strncpy(cell->location, location, 1 + strlen(location));
7781 
7782 	if (list->tail)
7783 		list->tail->next = cell;
7784 	else
7785 		list->head = cell;
7786 
7787 	list->tail = cell;
7788 }
7789 
7790 
7791 
7792 /*
7793  * check_primary_standby_version_match()
7794  *
7795  * Check server versions of supplied connections are compatible for
7796  * replication purposes.
7797  *
7798  * Exits on error.
7799  */
7800 static void
check_primary_standby_version_match(PGconn * conn,PGconn * primary_conn)7801 check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn)
7802 {
7803 	char		standby_version[MAXVERSIONSTR] = "";
7804 	int			standby_version_num = UNKNOWN_SERVER_VERSION_NUM;
7805 
7806 	char		primary_version[MAXVERSIONSTR] = "";
7807 	int			primary_version_num = UNKNOWN_SERVER_VERSION_NUM;
7808 
7809 	standby_version_num = check_server_version(conn, "standby", true, standby_version);
7810 
7811 	/* Verify that primary is a supported server version */
7812 	primary_version_num = check_server_version(conn, "primary", false, primary_version);
7813 	if (primary_version_num < 0)
7814 	{
7815 		PQfinish(conn);
7816 		PQfinish(primary_conn);
7817 		exit(ERR_BAD_CONFIG);
7818 	}
7819 
7820 	/* primary and standby version should match */
7821 	if ((primary_version_num / 100) != (standby_version_num / 100))
7822 	{
7823 		PQfinish(conn);
7824 		PQfinish(primary_conn);
7825 		log_error(_("PostgreSQL versions on primary (%s) and standby (%s) must match"),
7826 				  primary_version, standby_version);
7827 		exit(ERR_BAD_CONFIG);
7828 	}
7829 }
7830 
7831 
7832 static void
check_recovery_type(PGconn * conn)7833 check_recovery_type(PGconn *conn)
7834 {
7835 	RecoveryType recovery_type = get_recovery_type(conn);
7836 
7837 	if (recovery_type != RECTYPE_STANDBY)
7838 	{
7839 		if (recovery_type == RECTYPE_PRIMARY)
7840 		{
7841 			log_error(_("this node should be a standby (%s)"),
7842 					  config_file_options.conninfo);
7843 			PQfinish(conn);
7844 			exit(ERR_BAD_CONFIG);
7845 		}
7846 		else
7847 		{
7848 			log_error(_("connection to node (%s) lost"),
7849 					  config_file_options.conninfo);
7850 			PQfinish(conn);
7851 			exit(ERR_DB_CONN);
7852 		}
7853 	}
7854 }
7855 
7856 
7857 
7858 /*
7859  * Creates recovery configuration for a standby.
7860  *
7861  * A database connection pointer is required for escaping primary_conninfo
7862  * parameters. When cloning from Barman and --no-upstream-connection supplied,
7863  * this might not be available.
7864  */
7865 static bool
create_recovery_file(t_node_info * node_record,t_conninfo_param_list * primary_conninfo,int server_version_num,char * dest,bool as_file)7866 create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file)
7867 {
7868 	PQExpBufferData recovery_file_buf;
7869 	PQExpBufferData primary_conninfo_buf;
7870 	char		recovery_file_path[MAXPGPATH] = "";
7871 	FILE	   *recovery_file;
7872 	mode_t		um;
7873 
7874 	KeyValueList recovery_config = {NULL, NULL};
7875 	KeyValueListCell *cell = NULL;
7876 
7877 	initPQExpBuffer(&primary_conninfo_buf);
7878 
7879 	/* standby_mode = 'on' (Pg 11 and earlier) */
7880 	if (server_version_num < 120000)
7881 	{
7882 		key_value_list_set(&recovery_config,
7883 						   "standby_mode", "on");
7884 	}
7885 
7886 	/* primary_conninfo = '...' */
7887 	write_primary_conninfo(&primary_conninfo_buf, primary_conninfo);
7888 	key_value_list_set(&recovery_config,
7889 					   "primary_conninfo", primary_conninfo_buf.data);
7890 
7891 	/*
7892 	 * recovery_target_timeline = 'latest'
7893 	 *
7894 	 * PostgreSQL 11 and earlier only; 'latest' is the default from PostgreSQL 12.
7895 	 */
7896 
7897 	if (server_version_num < 120000)
7898 	{
7899 		key_value_list_set(&recovery_config,
7900 						   "recovery_target_timeline", "latest");
7901 	}
7902 
7903 	/* recovery_min_apply_delay = ... (optional) */
7904 	if (config_file_options.recovery_min_apply_delay_provided == true)
7905 	{
7906 		key_value_list_set(&recovery_config,
7907 						   "recovery_min_apply_delay", config_file_options.recovery_min_apply_delay);
7908 	}
7909 
7910 	/* primary_slot_name = '...' (optional, for 9.4 and later) */
7911 	if (config_file_options.use_replication_slots)
7912 	{
7913 		key_value_list_set(&recovery_config,
7914 						   "primary_slot_name", node_record->slot_name);
7915 	}
7916 
7917 	/*
7918 	 * If restore_command is set, we use it as restore_command in
7919 	 * recovery.conf
7920 	 */
7921 	if (config_file_options.restore_command[0] != '\0')
7922 	{
7923 		char	   *escaped = escape_recovery_conf_value(config_file_options.restore_command);
7924 
7925 		key_value_list_set(&recovery_config,
7926 						  "restore_command", escaped);
7927 		free(escaped);
7928 	}
7929 
7930 	/* archive_cleanup_command (optional) */
7931 	if (config_file_options.archive_cleanup_command[0] != '\0')
7932 	{
7933 		char	   *escaped = escape_recovery_conf_value(config_file_options.archive_cleanup_command);
7934 
7935 		key_value_list_set(&recovery_config,
7936 						  "archive_cleanup_command", escaped);
7937 		free(escaped);
7938 	}
7939 
7940 
7941 
7942 
7943 	if (as_file == false)
7944 	{
7945 		/* create file in buffer */
7946 		initPQExpBuffer(&recovery_file_buf);
7947 
7948 		for (cell = recovery_config.head; cell; cell = cell->next)
7949 		{
7950 			appendPQExpBuffer(&recovery_file_buf,
7951 							  "%s = '%s'\n",
7952 							  cell->key, cell->value);
7953 		}
7954 
7955 		maxlen_snprintf(dest, "%s", recovery_file_buf.data);
7956 
7957 		termPQExpBuffer(&recovery_file_buf);
7958 
7959 		return true;
7960 	}
7961 
7962 
7963 	/*
7964 	 * PostgreSQL 12 and later: modify postgresql.auto.conf
7965 	 *
7966 	 */
7967 	if (server_version_num >= 120000)
7968 	{
7969 
7970 		if (modify_auto_conf(dest, &recovery_config) == false)
7971 		{
7972 			return false;
7973 		}
7974 
7975 		if (write_standby_signal() == false)
7976 		{
7977 			return false;
7978 		}
7979 
7980 		return true;
7981 	}
7982 
7983 	/*
7984 	 * PostgreSQL 11 and earlier: write recovery.conf
7985 	 */
7986 	maxpath_snprintf(recovery_file_path, "%s/%s", dest, RECOVERY_COMMAND_FILE);
7987 	log_debug("create_recovery_file(): creating \"%s\"...",
7988 			  recovery_file_path);
7989 
7990 	/* Set umask to 0600 */
7991 	um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
7992 	recovery_file = fopen(recovery_file_path, "w");
7993 	umask(um);
7994 
7995 	if (recovery_file == NULL)
7996 	{
7997 		log_error(_("unable to create recovery.conf file at \"%s\""),
7998 				  recovery_file_path);
7999 		log_detail("%s", strerror(errno));
8000 
8001 		return false;
8002 	}
8003 
8004 	for (cell = recovery_config.head; cell; cell = cell->next)
8005 	{
8006 		initPQExpBuffer(&recovery_file_buf);
8007 		appendPQExpBuffer(&recovery_file_buf,
8008 						  "%s = '%s'\n",
8009 						  cell->key, cell->value);
8010 
8011 		log_debug("recovery.conf line: %s", recovery_file_buf.data);
8012 
8013 		if (fputs(recovery_file_buf.data, recovery_file) == EOF)
8014 		{
8015 			log_error(_("unable to write to recovery file at \"%s\""), recovery_file_path);
8016 			fclose(recovery_file);
8017 			termPQExpBuffer(&recovery_file_buf);
8018 			return false;
8019 		}
8020 
8021 		termPQExpBuffer(&recovery_file_buf);
8022 	}
8023 
8024 
8025 	fclose(recovery_file);
8026 
8027 	return true;
8028 }
8029 
8030 
8031 static void
write_primary_conninfo(PQExpBufferData * dest,t_conninfo_param_list * param_list)8032 write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
8033 {
8034 	PQExpBufferData conninfo_buf;
8035 	bool		application_name_provided = false;
8036 	bool		password_provided = false;
8037 	int			c;
8038 	char	   *escaped = NULL;
8039 	t_conninfo_param_list env_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
8040 
8041 	initialize_conninfo_params(&env_conninfo, true);
8042 
8043 	initPQExpBuffer(&conninfo_buf);
8044 
8045 	for (c = 0; c < param_list->size && param_list->keywords[c] != NULL; c++)
8046 	{
8047 		/*
8048 		 * Skip empty settings and ones which don't make any sense in
8049 		 * recovery.conf
8050 		 */
8051 		if (strcmp(param_list->keywords[c], "dbname") == 0 ||
8052 			strcmp(param_list->keywords[c], "replication") == 0 ||
8053 			(param_list->values[c] == NULL) ||
8054 			(param_list->values[c] != NULL && param_list->values[c][0] == '\0'))
8055 			continue;
8056 
8057 		/* only include "password" if explicitly requested */
8058 		if (strcmp(param_list->keywords[c], "password") == 0)
8059 		{
8060 			password_provided = true;
8061 		}
8062 
8063 		if (conninfo_buf.len != 0)
8064 			appendPQExpBufferChar(&conninfo_buf, ' ');
8065 
8066 		if (strcmp(param_list->keywords[c], "application_name") == 0)
8067 			application_name_provided = true;
8068 
8069 		appendPQExpBuffer(&conninfo_buf, "%s=", param_list->keywords[c]);
8070 		appendConnStrVal(&conninfo_buf, param_list->values[c]);
8071 	}
8072 
8073 	/* "application_name" not provided - default to repmgr node name */
8074 	if (application_name_provided == false)
8075 	{
8076 		if (strlen(config_file_options.node_name))
8077 		{
8078 			appendPQExpBufferStr(&conninfo_buf, " application_name=");
8079 			appendConnStrVal(&conninfo_buf, config_file_options.node_name);
8080 		}
8081 		else
8082 		{
8083 			appendPQExpBufferStr(&conninfo_buf, " application_name=repmgr");
8084 		}
8085 	}
8086 
8087 	/* no password provided explicitly  */
8088 	if (password_provided == false)
8089 	{
8090 		if (config_file_options.use_primary_conninfo_password == true)
8091 		{
8092 			const char *password = param_get(&env_conninfo, "password");
8093 
8094 			if (password != NULL)
8095 			{
8096 				appendPQExpBufferStr(&conninfo_buf, " password=");
8097 				appendConnStrVal(&conninfo_buf, password);
8098 			}
8099 		}
8100 	}
8101 
8102 	/* passfile provided as configuration option */
8103 	if (config_file_options.passfile[0] != '\0')
8104 	{
8105 		/* check if the libpq we're using supports "passfile=" */
8106 		if (has_passfile() == true)
8107 		{
8108 			appendPQExpBufferStr(&conninfo_buf, " passfile=");
8109 			appendConnStrVal(&conninfo_buf, config_file_options.passfile);
8110 		}
8111 	}
8112 
8113 	escaped = escape_recovery_conf_value(conninfo_buf.data);
8114 
8115 	appendPQExpBufferStr(dest, escaped);
8116 
8117 	free(escaped);
8118 	free_conninfo_params(&env_conninfo);
8119 	termPQExpBuffer(&conninfo_buf);
8120 }
8121 
8122 
8123 /*
8124  * For "standby promote" and "standby follow", check for sibling nodes.
8125  * If "--siblings-follow" was specified, fill the provided SiblingNodeStats
8126  * struct with some aggregate info about the nodes for later
8127  * decision making.
8128  */
8129 static bool
check_sibling_nodes(NodeInfoList * sibling_nodes,SiblingNodeStats * sibling_nodes_stats)8130 check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
8131 {
8132 	char		host[MAXLEN] = "";
8133 	NodeInfoListCell *cell;
8134 	int			r;
8135 
8136 	/*
8137 	 * If --siblings-follow not specified, warn about any extant
8138 	 * siblings which will not follow the new primary
8139 	 */
8140 
8141 	if (runtime_options.siblings_follow == false)
8142 	{
8143 		if (sibling_nodes->node_count > 0)
8144 		{
8145 			PQExpBufferData nodes;
8146 			NodeInfoListCell *cell;
8147 
8148 			initPQExpBuffer(&nodes);
8149 
8150 			for (cell = sibling_nodes->head; cell; cell = cell->next)
8151 			{
8152 				appendPQExpBuffer(&nodes,
8153 								  "  %s (node ID: %i",
8154 								  cell->node_info->node_name,
8155 								  cell->node_info->node_id);
8156 
8157 				if (cell->node_info->type == WITNESS)
8158 				{
8159 					appendPQExpBufferStr(&nodes,
8160 										 ", witness server");
8161 				}
8162 				appendPQExpBufferChar(&nodes,
8163 									  ')');
8164 				if (cell->next)
8165 					appendPQExpBufferStr(&nodes, "\n");
8166 			}
8167 
8168 			log_warning(_("%i sibling nodes found, but option \"--siblings-follow\" not specified"),
8169 						sibling_nodes->node_count);
8170 			log_detail(_("these nodes will remain attached to the current primary:\n%s"), nodes.data);
8171 
8172 			termPQExpBuffer(&nodes);
8173 		}
8174 
8175 		return true;
8176 	}
8177 
8178 	log_verbose(LOG_INFO, _("%i active sibling nodes found"),
8179 				sibling_nodes->node_count);
8180 
8181 	if (sibling_nodes->node_count == 0)
8182 	{
8183 		log_warning(_("option \"--sibling-nodes\" specified, but no sibling nodes exist"));
8184 		return true;
8185 	}
8186 
8187 	for (cell = sibling_nodes->head; cell; cell = cell->next)
8188 	{
8189 		/* get host from node record */
8190 		get_conninfo_value(cell->node_info->conninfo, "host", host);
8191 		r = test_ssh_connection(host, runtime_options.remote_user);
8192 
8193 		if (r != 0)
8194 		{
8195 			cell->node_info->reachable = false;
8196 			sibling_nodes_stats->unreachable_sibling_node_count++;
8197 		}
8198 		else
8199 		{
8200 			cell->node_info->reachable = true;
8201 			sibling_nodes_stats->reachable_sibling_node_count++;
8202 			sibling_nodes_stats->min_required_wal_senders++;
8203 
8204 			if (cell->node_info->slot_name[0] != '\0')
8205 			{
8206 				sibling_nodes_stats->reachable_sibling_nodes_with_slot_count++;
8207 				sibling_nodes_stats->min_required_free_slots++;
8208 			}
8209 		}
8210 	}
8211 
8212 	if (sibling_nodes_stats->unreachable_sibling_node_count > 0)
8213 	{
8214 		if (runtime_options.force == false)
8215 		{
8216 			log_error(_("%i of %i sibling nodes unreachable via SSH:"),
8217 					  sibling_nodes_stats->unreachable_sibling_node_count,
8218 					  sibling_nodes->node_count);
8219 		}
8220 		else
8221 		{
8222 			log_warning(_("%i of %i sibling nodes unreachable via SSH:"),
8223 						sibling_nodes_stats->unreachable_sibling_node_count,
8224 						sibling_nodes->node_count);
8225 		}
8226 
8227 		/* display list of unreachable sibling nodes */
8228 		for (cell = sibling_nodes->head; cell; cell = cell->next)
8229 		{
8230 			if (cell->node_info->reachable == true)
8231 				continue;
8232 			log_detail("  %s (ID: %i)",
8233 					   cell->node_info->node_name,
8234 					   cell->node_info->node_id);
8235 		}
8236 
8237 		if (runtime_options.force == false)
8238 		{
8239 			log_hint(_("use -F/--force to proceed in any case"));
8240 			return false;
8241 		}
8242 
8243 		if (runtime_options.dry_run == true)
8244 		{
8245 			log_detail(_("F/--force specified, would proceed anyway"));
8246 		}
8247 		else
8248 		{
8249 			log_detail(_("F/--force specified, proceeding anyway"));
8250 		}
8251 	}
8252 	else
8253 	{
8254 		char	   *msg = _("all sibling nodes are reachable via SSH");
8255 
8256 		if (runtime_options.dry_run == true)
8257 		{
8258 			log_info("%s", msg);
8259 		}
8260 		else
8261 		{
8262 			log_verbose(LOG_INFO, "%s", msg);
8263 		}
8264 	}
8265 
8266 	return true;
8267 }
8268 
8269 
8270 static bool
check_free_wal_senders(int available_wal_senders,SiblingNodeStats * sibling_nodes_stats,bool * dry_run_success)8271 check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success)
8272 {
8273 	if (available_wal_senders < sibling_nodes_stats->min_required_wal_senders)
8274 	{
8275 		if (runtime_options.force == false || runtime_options.dry_run == true)
8276 		{
8277 			log_error(_("insufficient free walsenders on promotion candidate"));
8278 			log_detail(_("at least %i walsenders required but only %i free walsenders on promotion candidate"),
8279 					   sibling_nodes_stats->min_required_wal_senders,
8280 					   available_wal_senders);
8281 			log_hint(_("increase parameter \"max_wal_senders\" or use -F/--force to proceed in any case"));
8282 
8283 			if (runtime_options.dry_run == true)
8284 			{
8285 				*dry_run_success = false;
8286 			}
8287 			else
8288 			{
8289 				return false;
8290 			}
8291 		}
8292 		else
8293 		{
8294 			log_warning(_("insufficient free walsenders on promotion candidate"));
8295 			log_detail(_("at least %i walsenders required but only %i free walsender(s) on promotion candidate"),
8296 					   sibling_nodes_stats->min_required_wal_senders,
8297 					   available_wal_senders);
8298 			return false;
8299 		}
8300 	}
8301 	else
8302 	{
8303 		if (runtime_options.dry_run == true)
8304 		{
8305 			log_info(_("%i walsenders required, %i available"),
8306 					 sibling_nodes_stats->min_required_wal_senders,
8307 					 available_wal_senders);
8308 		}
8309 	}
8310 
8311 	return true;
8312 }
8313 
8314 
8315 static bool
check_free_slots(t_node_info * local_node_record,SiblingNodeStats * sibling_nodes_stats,bool * dry_run_success)8316 check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success)
8317 {
8318 	if (sibling_nodes_stats->min_required_free_slots > 0 )
8319 	{
8320 		int available_slots = local_node_record->max_replication_slots -
8321 			local_node_record->active_replication_slots;
8322 
8323 		log_debug("minimum of %i free slots (%i for siblings) required; %i available",
8324 				  sibling_nodes_stats->min_required_free_slots,
8325 				  sibling_nodes_stats->reachable_sibling_nodes_with_slot_count,
8326 				  available_slots);
8327 
8328 		if (available_slots < sibling_nodes_stats->min_required_free_slots)
8329 		{
8330 			if (runtime_options.force == false || runtime_options.dry_run == true)
8331 			{
8332 				log_error(_("insufficient free replication slots to attach all nodes"));
8333 				log_detail(_("at least %i additional replication slots required but only %i free slots available on promotion candidate"),
8334 						   sibling_nodes_stats->min_required_free_slots,
8335 						   available_slots);
8336 				log_hint(_("increase parameter \"max_replication_slots\" or use -F/--force to proceed in any case"));
8337 
8338 				if (runtime_options.dry_run == true)
8339 				{
8340 					*dry_run_success = false;
8341 				}
8342 				else
8343 				{
8344 					return false;
8345 				}
8346 			}
8347 		}
8348 		else
8349 		{
8350 			if (runtime_options.dry_run == true)
8351 			{
8352 				log_info(_("%i replication slots required, %i available"),
8353 						 sibling_nodes_stats->min_required_free_slots,
8354 						 available_slots);
8355 			}
8356 		}
8357 	}
8358 
8359 	return true;
8360 }
8361 
8362 
8363 static void
sibling_nodes_follow(t_node_info * local_node_record,NodeInfoList * sibling_nodes,SiblingNodeStats * sibling_nodes_stats)8364 sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
8365 {
8366 	int			failed_follow_count = 0;
8367 	char		host[MAXLEN] = "";
8368 	NodeInfoListCell *cell = NULL;
8369 	PQExpBufferData remote_command_str;
8370 	PQExpBufferData command_output;
8371 
8372 	log_notice(_("executing STANDBY FOLLOW on %i of %i siblings"),
8373 			   sibling_nodes->node_count - sibling_nodes_stats->unreachable_sibling_node_count,
8374 			   sibling_nodes->node_count);
8375 
8376 	for (cell = sibling_nodes->head; cell; cell = cell->next)
8377 	{
8378 		bool		success = false;
8379 
8380 		/* skip nodes previously determined as unreachable */
8381 		if (cell->node_info->reachable == false)
8382 			continue;
8383 
8384 		initPQExpBuffer(&remote_command_str);
8385 		make_remote_repmgr_path(&remote_command_str, cell->node_info);
8386 
8387 		if (cell->node_info->type == WITNESS)
8388 		{
8389 			PGconn *witness_conn = NULL;
8390 
8391 			/* TODO: create "repmgr witness resync" or similar */
8392 			appendPQExpBuffer(&remote_command_str,
8393 							  "witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
8394 							  local_node_record->conninfo);
8395 
8396 			/*
8397 			 * Notify the witness repmgrd about the new primary, as at this point it will be assuming
8398 			 * a failover situation is in place. It will detect the new primary at some point, this
8399 			 * just speeds up the process.
8400 			 *
8401 			 * In the unlikely event repmgrd is not running or not in use, this will have no effect.
8402 			 */
8403 			witness_conn = establish_db_connection_quiet(cell->node_info->conninfo);
8404 
8405 			if (PQstatus(witness_conn) == CONNECTION_OK)
8406 			{
8407 				notify_follow_primary(witness_conn, local_node_record->node_id);
8408 			}
8409 			PQfinish(witness_conn);
8410 		}
8411 		else
8412 		{
8413 			appendPQExpBufferStr(&remote_command_str,
8414 								 "standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
8415 		}
8416 		get_conninfo_value(cell->node_info->conninfo, "host", host);
8417 		log_debug("executing:\n  %s", remote_command_str.data);
8418 
8419 		initPQExpBuffer(&command_output);
8420 
8421 		success = remote_command(host,
8422 								 runtime_options.remote_user,
8423 								 remote_command_str.data,
8424 								 config_file_options.ssh_options,
8425 								 &command_output);
8426 
8427 		termPQExpBuffer(&remote_command_str);
8428 
8429 		if (success == false || command_output.data[0] == '0')
8430 		{
8431 			if (cell->node_info->type == WITNESS)
8432 			{
8433 				log_warning(_("WITNESS REGISTER failed on node \"%s\""),
8434 							cell->node_info->node_name);
8435 			}
8436 			else
8437 			{
8438 				log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
8439 							cell->node_info->node_name);
8440 			}
8441 
8442 			failed_follow_count++;
8443 		}
8444 
8445 		termPQExpBuffer(&command_output);
8446 	}
8447 
8448 	if (failed_follow_count == 0)
8449 	{
8450 		log_info(_("STANDBY FOLLOW successfully executed on all reachable sibling nodes"));
8451 	}
8452 	else
8453 	{
8454 		log_warning(_("execution of STANDBY FOLLOW failed on %i sibling nodes"),
8455 					failed_follow_count);
8456 	}
8457 
8458 	/*
8459 	 * TODO: double-check all expected nodes are in pg_stat_replication
8460 	 * and entries in repmgr.nodes match
8461 	 */
8462 }
8463 
8464 
8465 
8466 static t_remote_error_type
parse_remote_error(const char * error)8467 parse_remote_error(const char *error)
8468 {
8469 	if (error[0] == '\0')
8470 		return REMOTE_ERROR_UNKNOWN;
8471 
8472 	if (strcasecmp(error, "DB_CONNECTION") == 0)
8473 		return REMOTE_ERROR_DB_CONNECTION;
8474 
8475 	if (strcasecmp(error, "CONNINFO_PARSE") == 0)
8476 		return REMOTE_ERROR_CONNINFO_PARSE;
8477 
8478 	return REMOTE_ERROR_UNKNOWN;
8479 }
8480 
8481 
8482 static CheckStatus
parse_check_status(const char * status_str)8483 parse_check_status(const char *status_str)
8484 {
8485 	CheckStatus status = CHECK_STATUS_UNKNOWN;
8486 
8487 	if (strncmp(status_str, "OK", MAXLEN) == 0)
8488 	{
8489 		status = CHECK_STATUS_OK;
8490 	}
8491 	else if (strncmp(status_str, "WARNING", MAXLEN) == 0)
8492 	{
8493 		status = CHECK_STATUS_WARNING;
8494 	}
8495 	else if (strncmp(status_str, "CRITICAL", MAXLEN) == 0)
8496 	{
8497 		status = CHECK_STATUS_CRITICAL;
8498 	}
8499 	else if (strncmp(status_str, "UNKNOWN", MAXLEN) == 0)
8500 	{
8501 		status = CHECK_STATUS_UNKNOWN;
8502 	}
8503 
8504 	return status;
8505 }
8506 
8507 static NodeStatus
parse_node_status_is_shutdown_cleanly(const char * node_status_output,XLogRecPtr * checkPoint)8508 parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint)
8509 {
8510 	NodeStatus	node_status = NODE_STATUS_UNKNOWN;
8511 
8512 	int			c = 0,
8513 				argc_item = 0;
8514 	char	  **argv_array = NULL;
8515 	int			optindex = 0;
8516 
8517 	/* We're only interested in these options */
8518 	struct option node_status_options[] =
8519 	{
8520 		{"last-checkpoint-lsn", required_argument, NULL, 'L'},
8521 		{"state", required_argument, NULL, 'S'},
8522 		{NULL, 0, NULL, 0}
8523 	};
8524 
8525 	/* Don't attempt to tokenise an empty string */
8526 	if (!strlen(node_status_output))
8527 	{
8528 		*checkPoint = InvalidXLogRecPtr;
8529 		return node_status;
8530 	}
8531 
8532 	argc_item = parse_output_to_argv(node_status_output, &argv_array);
8533 
8534 	/* Reset getopt's optind variable */
8535 	optind = 0;
8536 
8537 	/* Prevent getopt from emitting errors */
8538 	opterr = 0;
8539 
8540 	while ((c = getopt_long(argc_item, argv_array, "L:S:", node_status_options,
8541 							&optindex)) != -1)
8542 	{
8543 		switch (c)
8544 		{
8545 				/* --last-checkpoint-lsn */
8546 			case 'L':
8547 				*checkPoint = parse_lsn(optarg);
8548 				break;
8549 				/* --state */
8550 			case 'S':
8551 				{
8552 					if (strncmp(optarg, "RUNNING", MAXLEN) == 0)
8553 					{
8554 						node_status = NODE_STATUS_UP;
8555 					}
8556 					else if (strncmp(optarg, "SHUTDOWN", MAXLEN) == 0)
8557 					{
8558 						node_status = NODE_STATUS_DOWN;
8559 					}
8560 					else if (strncmp(optarg, "UNCLEAN_SHUTDOWN", MAXLEN) == 0)
8561 					{
8562 						node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
8563 					}
8564 					else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
8565 					{
8566 						node_status = NODE_STATUS_UNKNOWN;
8567 					}
8568 				}
8569 				break;
8570 		}
8571 	}
8572 
8573 	free_parsed_argv(&argv_array);
8574 
8575 	return node_status;
8576 }
8577 
8578 
8579 static ConnectionStatus
parse_remote_node_replication_connection(const char * node_check_output)8580 parse_remote_node_replication_connection(const char *node_check_output)
8581 {
8582 	ConnectionStatus	conn_status = CONN_UNKNOWN;
8583 
8584 	int			c = 0,
8585 				argc_item = 0;
8586 	char	  **argv_array = NULL;
8587 	int			optindex = 0;
8588 
8589 	/* We're only interested in these options */
8590 	struct option node_check_options[] =
8591 	{
8592 		{"connection", required_argument, NULL, 'c'},
8593 		{NULL, 0, NULL, 0}
8594 	};
8595 
8596 	/* Don't attempt to tokenise an empty string */
8597 	if (!strlen(node_check_output))
8598 	{
8599 		return CONN_UNKNOWN;
8600 	}
8601 
8602 	argc_item = parse_output_to_argv(node_check_output, &argv_array);
8603 
8604 	/* Reset getopt's optind variable */
8605 	optind = 0;
8606 
8607 	/* Prevent getopt from emitting errors */
8608 	opterr = 0;
8609 
8610 	while ((c = getopt_long(argc_item, argv_array, "L:S:", node_check_options,
8611 							&optindex)) != -1)
8612 	{
8613 		switch (c)
8614 		{
8615 
8616 			/* --connection */
8617 			case 'c':
8618 				{
8619 					if (strncmp(optarg, "OK", MAXLEN) == 0)
8620 					{
8621 						conn_status = CONN_OK;
8622 					}
8623 					else if (strncmp(optarg, "BAD", MAXLEN) == 0)
8624 					{
8625 						conn_status = CONN_BAD;
8626 					}
8627 					else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0)
8628 					{
8629 						conn_status = CONN_UNKNOWN;
8630 					}
8631 				}
8632 				break;
8633 		}
8634 	}
8635 
8636 	free_parsed_argv(&argv_array);
8637 
8638 	return conn_status;
8639 }
8640 
8641 
8642 static CheckStatus
parse_node_check_archiver(const char * node_check_output,int * files,int * threshold,t_remote_error_type * remote_error)8643 parse_node_check_archiver(const char *node_check_output, int *files, int *threshold, t_remote_error_type *remote_error)
8644 {
8645 	CheckStatus status = CHECK_STATUS_UNKNOWN;
8646 
8647 	int			c = 0,
8648 				argc_item = 0;
8649 	char	  **argv_array = NULL;
8650 	int			optindex = 0;
8651 
8652 	/* We're only interested in these options */
8653 	struct option node_check_options[] =
8654 	{
8655 		{"status", required_argument, NULL, 'S'},
8656 		{"files", required_argument, NULL, 'f'},
8657 		{"threshold", required_argument, NULL, 't'},
8658 		{"error", required_argument, NULL, 'E'},
8659 		{NULL, 0, NULL, 0}
8660 	};
8661 
8662 	*files = 0;
8663 	*threshold = 0;
8664 
8665 	/* Don't attempt to tokenise an empty string */
8666 	if (!strlen(node_check_output))
8667 	{
8668 		return status;
8669 	}
8670 
8671 	argc_item = parse_output_to_argv(node_check_output, &argv_array);
8672 
8673 
8674 	/* Reset getopt's optind variable */
8675 	optind = 0;
8676 
8677 	/* Prevent getopt from emitting errors */
8678 	opterr = 0;
8679 
8680 	while ((c = getopt_long(argc_item, argv_array, "f:S:t:", node_check_options,
8681 							&optindex)) != -1)
8682 	{
8683 		switch (c)
8684 		{
8685 				/* --files */
8686 			case 'f':
8687 				*files = atoi(optarg);
8688 				break;
8689 
8690 			case 't':
8691 				*threshold = atoi(optarg);
8692 				break;
8693 
8694 				/* --status */
8695 			case 'S':
8696 				status = parse_check_status(optarg);
8697 				break;
8698 			case 'E':
8699 				{
8700 					*remote_error = parse_remote_error(optarg);
8701 					status = CHECK_STATUS_UNKNOWN;
8702 				}
8703 				break;
8704 		}
8705 	}
8706 
8707 	free_parsed_argv(&argv_array);
8708 
8709 	return status;
8710 }
8711 
8712 
8713 static bool
parse_data_directory_config(const char * node_check_output,t_remote_error_type * remote_error)8714 parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error)
8715 {
8716 	bool		config_ok = true;
8717 
8718 	int			c = 0,
8719 				argc_item = 0;
8720 	char	  **argv_array = NULL;
8721 	int			optindex = 0;
8722 
8723 	/* We're only interested in these options */
8724 	struct option node_check_options[] =
8725 	{
8726 		{"configured-data-directory", required_argument, NULL, 'C'},
8727 		{"error", required_argument, NULL, 'E'},
8728 		{NULL, 0, NULL, 0}
8729 	};
8730 
8731 	/* Don't attempt to tokenise an empty string */
8732 	if (!strlen(node_check_output))
8733 	{
8734 		return false;
8735 	}
8736 
8737 	argc_item = parse_output_to_argv(node_check_output, &argv_array);
8738 
8739 	/* Reset getopt's optind variable */
8740 	optind = 0;
8741 
8742 	/* Prevent getopt from emitting errors */
8743 	opterr = 0;
8744 
8745 	while ((c = getopt_long(argc_item, argv_array, "C:E:", node_check_options,
8746 							&optindex)) != -1)
8747 	{
8748 		switch (c)
8749 		{
8750 			/* --configured-data-directory */
8751 			case 'C':
8752 				{
8753 					/* we only care whether it's "OK" or not */
8754 					if (strncmp(optarg, "OK", 2) != 0)
8755 						config_ok = false;
8756 				}
8757 				break;
8758 			case 'E':
8759 				{
8760 					*remote_error = parse_remote_error(optarg);
8761 					config_ok = false;
8762 				}
8763 				break;
8764 		}
8765 	}
8766 	free_parsed_argv(&argv_array);
8767 
8768 	return config_ok;
8769 }
8770 
8771 
8772 static bool
parse_replication_config_owner(const char * node_check_output)8773 parse_replication_config_owner(const char *node_check_output)
8774 {
8775 	bool		config_ok = true;
8776 
8777 	int			c = 0,
8778 				argc_item = 0;
8779 	char	  **argv_array = NULL;
8780 	int			optindex = 0;
8781 
8782 	/* We're only interested in these options */
8783 	struct option node_check_options[] =
8784 	{
8785 		{"replication-config-owner", required_argument, NULL, 'C'},
8786 		{NULL, 0, NULL, 0}
8787 	};
8788 
8789 	/* Don't attempt to tokenise an empty string */
8790 	if (!strlen(node_check_output))
8791 	{
8792 		return false;
8793 	}
8794 
8795 	argc_item = parse_output_to_argv(node_check_output, &argv_array);
8796 
8797 	/* Reset getopt's optind variable */
8798 	optind = 0;
8799 
8800 	/* Prevent getopt from emitting errors */
8801 	opterr = 0;
8802 
8803 	while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options,
8804 							&optindex)) != -1)
8805 	{
8806 		switch (c)
8807 		{
8808 			/* --configured-data-directory */
8809 			case 'C':
8810 				{
8811 					/* we only care whether it's "OK" or not */
8812 					if (strncmp(optarg, "OK", 2) != 0)
8813 						config_ok = false;
8814 				}
8815 				break;
8816 		}
8817 	}
8818 
8819 	free_parsed_argv(&argv_array);
8820 
8821 	return config_ok;
8822 }
8823 
8824 
8825 static CheckStatus
parse_db_connection(const char * db_connection)8826 parse_db_connection(const char *db_connection)
8827 {
8828 	CheckStatus status = CHECK_STATUS_UNKNOWN;
8829 
8830 	int			c = 0,
8831 				argc_item = 0;
8832 	char	  **argv_array = NULL;
8833 	int			optindex = 0;
8834 
8835 	/* We're only interested in this option */
8836 	struct option node_check_options[] =
8837 	{
8838 		{"db-connection", required_argument, NULL, 'c'},
8839 		{NULL, 0, NULL, 0}
8840 	};
8841 
8842 	/* Don't attempt to tokenise an empty string */
8843 	if (!strlen(db_connection))
8844 	{
8845 		return false;
8846 	}
8847 
8848 	argc_item = parse_output_to_argv(db_connection, &argv_array);
8849 
8850 	/* Reset getopt's optind variable */
8851 	optind = 0;
8852 
8853 	/* Prevent getopt from emitting errors */
8854 	opterr = 0;
8855 
8856 	while ((c = getopt_long(argc_item, argv_array, "c:", node_check_options,
8857 							&optindex)) != -1)
8858 	{
8859 		switch (c)
8860 		{
8861 			/* --db-connection */
8862 			case 'c':
8863 				{
8864 					status = parse_check_status(optarg);
8865 				}
8866 				break;
8867 		}
8868 	}
8869 
8870 	free_parsed_argv(&argv_array);
8871 
8872 	return status;
8873 }
8874 
8875 
8876 void
do_standby_help(void)8877 do_standby_help(void)
8878 {
8879 	print_help_header();
8880 
8881 	printf(_("Usage:\n"));
8882 	printf(_("    %s [OPTIONS] standby clone\n"), progname());
8883 	printf(_("    %s [OPTIONS] standby register\n"), progname());
8884 	printf(_("    %s [OPTIONS] standby unregister\n"), progname());
8885 	printf(_("    %s [OPTIONS] standby promote\n"), progname());
8886 	printf(_("    %s [OPTIONS] standby follow\n"), progname());
8887 	printf(_("    %s [OPTIONS] standby switchover\n"), progname());
8888 
8889 	puts("");
8890 
8891 	printf(_("STANDBY CLONE\n"));
8892 	puts("");
8893 	printf(_("  \"standby clone\" clones a standby from the primary or an upstream node.\n"));
8894 	puts("");
8895 	printf(_("  -d, --dbname=conninfo               conninfo of the upstream node to use for cloning.\n"));
8896 	printf(_("  -c, --fast-checkpoint               force fast checkpoint\n"));
8897 	printf(_("  --copy-external-config-files[={samepath|pgdata}]\n" \
8898 			 "                                      copy configuration files located outside the \n" \
8899 			 "                                        data directory to the same path on the standby (default) or to the\n" \
8900 			 "                                        PostgreSQL data directory\n"));
8901 	printf(_("  --dry-run                           perform checks but don't actually clone the standby\n"));
8902 	printf(_("  --no-upstream-connection            when using Barman, do not connect to upstream node\n"));
8903 	printf(_("  -R, --remote-user=USERNAME          database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
8904 	printf(_("  --replication-user                  user to make replication connections with (optional, not usually required)\n"));
8905 	printf(_("  -S, --superuser=USERNAME            superuser to use, if repmgr user is not superuser\n"));
8906 	printf(_("  --upstream-conninfo                 \"primary_conninfo\" value to write in recovery.conf\n" \
8907 			 "                                        when the intended upstream server does not yet exist\n"));
8908 	printf(_("  --upstream-node-id                  ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
8909 #if (PG_VERSION_NUM >= 130000)
8910 	printf(_("  --verify-backup                     verify a cloned node using the \"pg_verifybackup\" utility\n"));
8911 #endif
8912 	printf(_("  --without-barman                    do not clone from Barman even if configured\n"));
8913 	printf(_("  --replication-conf-only             generate replication configuration for a previously cloned instance\n"));
8914 
8915 	puts("");
8916 
8917 	printf(_("STANDBY REGISTER\n"));
8918 	puts("");
8919 	printf(_("  \"standby register\" registers the standby node.\n"));
8920 	puts("");
8921 	printf(_("  -F, --force                         overwrite an existing node record, or if primary connection\n" \
8922 			 "                                        parameters supplied, create record even if standby offline\n"));
8923 	printf(_("  --upstream-node-id                  ID of the upstream node to replicate from (optional)\n"));
8924 	printf(_("  --wait-start=VALUE                  wait for the standby to start (timeout in seconds, default %i)\n"), DEFAULT_WAIT_START);
8925 
8926 	printf(_("  --wait-sync[=VALUE]                 wait for the node record to synchronise to the standby\n" \
8927 			 "                                        (optional timeout in seconds)\n"));
8928 
8929 	puts("");
8930 
8931 	printf(_("STANDBY UNREGISTER\n"));
8932 	puts("");
8933 	printf(_("  \"standby unregister\" unregisters an inactive standby node.\n"));
8934 	puts("");
8935 	printf(_("  --node-id                           ID of node to unregister (optional, used when the node to\n" \
8936 			 "                                         unregister is offline)\n"));
8937 	puts("");
8938 
8939 	printf(_("STANDBY PROMOTE\n"));
8940 	puts("");
8941 	printf(_("  \"standby promote\" promotes a standby node to primary.\n"));
8942 	puts("");
8943 	printf(_("  --dry-run                           perform checks etc. but don't actually promote the node\n"));
8944 	printf(_("  -F, --force                         ignore warnings and continue anyway\n"));
8945 	printf(_("  --siblings-follow                   have other standbys follow new primary\n"));
8946 	puts("");
8947 
8948 	printf(_("STANDBY FOLLOW\n"));
8949 	puts("");
8950 	printf(_("  \"standby follow\" instructs a standby node to follow a new primary.\n"));
8951 	puts("");
8952 	printf(_("  --dry-run                           perform checks but don't actually follow the new primary\n"));
8953 	printf(_("  --upstream-node-id                  node ID of the new primary\n"));
8954 	printf(_("  -W, --wait                          wait for a primary to appear\n"));
8955 	puts("");
8956 
8957 
8958 	printf(_("STANDBY SWITCHOVER\n"));
8959 	puts("");
8960 	printf(_("  \"standby switchover\" promotes a standby node to primary, and demotes the previous primary to a standby.\n"));
8961 	puts("");
8962 	printf(_("  --always-promote                    promote standby even if behind original primary\n"));
8963 	printf(_("  --dry-run                           perform checks etc. but don't actually execute switchover\n"));
8964 	printf(_("  -F, --force                         ignore warnings and continue anyway\n"));
8965 	printf(_("  --force-rewind[=VALUE]              use \"pg_rewind\" to reintegrate the old primary if necessary\n"));
8966 	printf(_("                                        (9.3 and 9.4 - provide \"pg_rewind\" path)\n"));
8967 
8968 	printf(_("  -R, --remote-user=USERNAME          database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
8969 	printf(_("  -S, --superuser=USERNAME            superuser to use, if repmgr user is not superuser\n"));
8970 	printf(_("  --repmgrd-no-pause                  don't pause repmgrd\n"));
8971 	printf(_("  --siblings-follow                   have other standbys follow new primary\n"));
8972 
8973 	puts("");
8974 
8975 	printf(_("%s home page: <%s>\n"), "repmgr", REPMGR_URL);
8976 
8977 }
8978