1 /*
2  * repmgr-client.c - Command interpreter for the repmgr package
3  *
4  * Copyright (c) 2ndQuadrant, 2010-2020
5  *
6  * This module is a command-line utility to easily setup a cluster of
7  * hot standby servers for an HA environment
8  *
9  * Commands implemented are:
10  *
11  * [ PRIMARY | MASTER ] REGISTER
12  * [ PRIMARY | MASTER ] UNREGISTER
13  *
14  * STANDBY CLONE
15  * STANDBY REGISTER
16  * STANDBY UNREGISTER
17  * STANDBY PROMOTE
18  * STANDBY FOLLOW
19  * STANDBY SWITCHOVER
20  *
21  * CLUSTER SHOW
22  * CLUSTER EVENT
23  * CLUSTER CROSSCHECK
24  * CLUSTER MATRIX
25  * CLUSTER CLEANUP
26  *
27  * NODE STATUS
28  * NODE CHECK
29  * NODE REJOIN
30  * NODE SERVICE
31  * NODE CONTROL
32  *
33  * SERVICE STATUS
34  * SERVICE PAUSE
35  * SERVICE UNPAUSE
36  *
37  * DAEMON START
38  * DAEMON STOP
39  *
40  * This program is free software: you can redistribute it and/or modify
41  * it under the terms of the GNU General Public License as published by
42  * the Free Software Foundation, either version 3 of the License, or
43  * (at your option) any later version.
44  *
45  * This program is distributed in the hope that it will be useful,
46  * but WITHOUT ANY WARRANTY; without even the implied warranty of
47  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
48  * GNU General Public License for more details.
49  *
50  * You should have received a copy of the GNU General Public License
51  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
52  */
53 
54 #include <unistd.h>
55 #include <sys/stat.h>
56 #include <signal.h>
57 
58 
59 #include "repmgr.h"
60 #include "compat.h"
61 #include "controldata.h"
62 #include "repmgr-client.h"
63 #include "repmgr-client-global.h"
64 #include "repmgr-action-primary.h"
65 #include "repmgr-action-standby.h"
66 #include "repmgr-action-witness.h"
67 #include "repmgr-action-node.h"
68 #include "repmgr-action-cluster.h"
69 #include "repmgr-action-service.h"
70 #include "repmgr-action-daemon.h"
71 
72 #include <storage/fd.h>			/* for PG_TEMP_FILE_PREFIX */
73 
74 /* globally available variables *
75  * ============================ */
76 
77 t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
78 
79 
80 /* conninfo params for the node we're operating on */
81 t_conninfo_param_list source_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
82 
83 bool		config_file_required = true;
84 char		pg_bindir[MAXPGPATH] = "";
85 
86 /*
87  * if --node-id/--node-name provided, place that node's record here
88  * for later use
89  */
90 t_node_info target_node_info = T_NODE_INFO_INITIALIZER;
91 
92 /* used by create_replication_slot() */
93 static t_user_type ReplicationSlotUser = USER_TYPE_UNKNOWN;
94 
95 /* Collate command line errors and warnings here for friendlier reporting */
96 static ItemList cli_errors = {NULL, NULL};
97 static ItemList cli_warnings = {NULL, NULL};
98 
99 static void _determine_replication_slot_user(PGconn *conn,
100 											 t_node_info *upstream_node_record,
101 											 char **replication_user);
102 
103 int
main(int argc,char ** argv)104 main(int argc, char **argv)
105 {
106 	t_conninfo_param_list default_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
107 
108 	int			optindex = 0;
109 	int			c;
110 
111 	char	   *repmgr_command = NULL;
112 	char	   *repmgr_action = NULL;
113 	bool		valid_repmgr_command_found = true;
114 	int			action = NO_ACTION;
115 	char	   *dummy_action = "";
116 
117 	bool		help_option = false;
118 	bool		option_error_found = false;
119 
120 	set_progname(argv[0]);
121 
122 	/*
123 	 * Tell the logger we're a command-line program - this will ensure any
124 	 * output logged before the logger is initialized will be formatted
125 	 * correctly. Can be overriden with "--log-to-file".
126 	 */
127 	logger_output_mode = OM_COMMAND_LINE;
128 
129 	/*
130 	 * Initialize and pre-populate conninfo parameters; these will be
131 	 * overwritten if matching command line parameters are provided.
132 	 *
133 	 * Only some actions will need these, but we need to do this before the
134 	 * command line is parsed.
135 	 *
136 	 * Note: PQconndefaults() does not provide a default value for "dbname",
137 	 * but if none is provided will default to "username" when the connection
138 	 * is made. We won't set "dbname" here if no default available, as that
139 	 * would break the libpq behaviour if non-default username is provided.
140 	 */
141 	initialize_conninfo_params(&default_conninfo, true);
142 
143 	for (c = 0; c < default_conninfo.size && default_conninfo.keywords[c]; c++)
144 	{
145 		if (strcmp(default_conninfo.keywords[c], "host") == 0 &&
146 			(default_conninfo.values[c] != NULL))
147 		{
148 			strncpy(runtime_options.host, default_conninfo.values[c], MAXLEN);
149 		}
150 		else if (strcmp(default_conninfo.keywords[c], "hostaddr") == 0 &&
151 				 (default_conninfo.values[c] != NULL))
152 		{
153 			strncpy(runtime_options.host, default_conninfo.values[c], MAXLEN);
154 		}
155 		else if (strcmp(default_conninfo.keywords[c], "port") == 0 &&
156 				 (default_conninfo.values[c] != NULL))
157 		{
158 			strncpy(runtime_options.port, default_conninfo.values[c], MAXLEN);
159 		}
160 		else if (strcmp(default_conninfo.keywords[c], "dbname") == 0 &&
161 				 (default_conninfo.values[c] != NULL))
162 		{
163 			strncpy(runtime_options.dbname, default_conninfo.values[c], MAXLEN);
164 		}
165 		else if (strcmp(default_conninfo.keywords[c], "user") == 0 &&
166 				 (default_conninfo.values[c] != NULL))
167 		{
168 			strncpy(runtime_options.username, default_conninfo.values[c], MAXLEN);
169 		}
170 	}
171 	free_conninfo_params(&default_conninfo);
172 
173 	initialize_conninfo_params(&source_conninfo, false);
174 
175 	/* set default user for -R/--remote-user */
176 	{
177 		struct passwd *pw = getpwuid(geteuid());
178 
179 		if (pw == NULL)
180 		{
181 			fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
182 			exit(ERR_BAD_CONFIG);
183 		}
184 
185 		strncpy(runtime_options.username, pw->pw_name, MAXLEN);
186 	}
187 
188 	/* Make getopt emit errors */
189 	opterr = 1;
190 
191 	while ((c = getopt_long(argc, argv, "?Vb:f:FwWd:h:p:U:R:S:D:ck:L:qtvC:", long_options,
192 							&optindex)) != -1)
193 	{
194 		/*
195 		 * NOTE: some integer parameters (e.g. -p/--port) are stored
196 		 * internally as strings. We use repmgr_atoi() to check these but
197 		 * discard the returned integer; repmgr_atoi() will append the error
198 		 * message to the provided list.
199 		 */
200 		switch (c)
201 		{
202 				/*
203 				 * Options which cause repmgr to exit in this block; these are
204 				 * the only ones which can be executed as root user
205 				 */
206 			case OPT_HELP:		/* --help */
207 				help_option = true;
208 				break;
209 
210 				/* -V/--version */
211 			case 'V':
212 
213 				/*
214 				 * in contrast to repmgr3 and earlier, we only display the
215 				 * repmgr version as it's not specific to a particular
216 				 * PostgreSQL version
217 				 */
218 				printf("%s %s\n", progname(), REPMGR_VERSION);
219 				exit(SUCCESS);
220 
221 				/* --version-number */
222 			case OPT_VERSION_NUMBER:
223 				printf("%i\n", REPMGR_VERSION_NUM);
224 				exit(SUCCESS);
225 
226 				/*------------------------------
227 				 * general configuration options
228 				 *------------------------------
229 				 */
230 
231 				/* -b/--pg_bindir */
232 			case 'b':
233 				strncpy(runtime_options.pg_bindir, optarg, MAXLEN);
234 				break;
235 
236 				/* -f/--config-file */
237 			case 'f':
238 				strncpy(runtime_options.config_file, optarg, MAXLEN);
239 				break;
240 
241 				/* --dry-run */
242 			case OPT_DRY_RUN:
243 				runtime_options.dry_run = true;
244 				break;
245 
246 				/* -F/--force */
247 			case 'F':
248 				runtime_options.force = true;
249 				break;
250 
251 				/* --replication-user (primary/standby register only) */
252 			case OPT_REPLICATION_USER:
253 				strncpy(runtime_options.replication_user, optarg, MAXLEN);
254 				break;
255 
256 				/* -w/--wait */
257 			case 'w':
258 				runtime_options.wait_provided = true;
259 				if (optarg != NULL)
260 				{
261 					runtime_options.wait = repmgr_atoi(optarg, "--wait", &cli_errors, 0);
262 				}
263 				break;
264 
265 				/* -W/--no-wait */
266 			case 'W':
267 				runtime_options.no_wait = true;
268 				break;
269 
270 				/* --compact */
271 			case OPT_COMPACT:
272 				runtime_options.compact = true;
273 				break;
274 
275 				/* --detail */
276 			case OPT_DETAIL:
277 				runtime_options.detail = true;
278 				break;
279 
280 				/* --dump-config */
281 			case OPT_DUMP_CONFIG:
282 				runtime_options.dump_config = true;
283 				break;
284 
285 				/*----------------------------
286 				 * database connection options
287 				 *----------------------------
288 				 */
289 
290 				/*
291 				 * These are the standard database connection options; with
292 				 * the exception of -d/--dbname (which could be a conninfo
293 				 * string) we'll also set these values in "source_conninfo"
294 				 * (overwriting preset values from environment variables).
295 				 */
296 				/* -d/--dbname */
297 			case 'd':
298 				strncpy(runtime_options.dbname, optarg, MAXLEN);
299 
300 				/*
301 				 * dbname will be set in source_conninfo later after checking
302 				 * if it's a conninfo string
303 				 */
304 				runtime_options.connection_param_provided = true;
305 				break;
306 
307 				/* -h/--host */
308 			case 'h':
309 				strncpy(runtime_options.host, optarg, MAXLEN);
310 				param_set(&source_conninfo, "host", optarg);
311 				runtime_options.connection_param_provided = true;
312 				runtime_options.host_param_provided = true;
313 				break;
314 
315 			case 'p':
316 				/*
317 				 * minimum TCP port number is 1; in practice PostgreSQL
318 				 * won't be running on a privileged port, but we don't want
319 				 * to be concerned with that level of checking
320 				 */
321 				(void) repmgr_atoi(optarg, "-p/--port", &cli_errors, 1);
322 				param_set(&source_conninfo, "port", optarg);
323 				strncpy(runtime_options.port,
324 						optarg,
325 						MAXLEN);
326 				runtime_options.connection_param_provided = true;
327 				break;
328 
329 				/* -U/--user */
330 			case 'U':
331 				strncpy(runtime_options.username, optarg, MAXLEN);
332 				param_set(&source_conninfo, "user", optarg);
333 				runtime_options.connection_param_provided = true;
334 				break;
335 
336 				/*-------------------------
337 				 * other connection options
338 				 *-------------------------
339 				 */
340 
341 				/* -R/--remote_user */
342 			case 'R':
343 				strncpy(runtime_options.remote_user, optarg, MAXLEN);
344 				break;
345 
346 				/* -S/--superuser */
347 			case 'S':
348 				strncpy(runtime_options.superuser, optarg, MAXLEN);
349 				break;
350 
351 				/*-------------
352 				 * node options
353 				 *-------------
354 				 */
355 
356 				/* -D/--pgdata/--data-dir */
357 			case 'D':
358 				strncpy(runtime_options.data_dir, optarg, MAXPGPATH);
359 				break;
360 
361 				/* --node-id */
362 			case OPT_NODE_ID:
363 				runtime_options.node_id = repmgr_atoi(optarg, "--node-id", &cli_errors, MIN_NODE_ID);
364 				break;
365 
366 				/* --node-name */
367 			case OPT_NODE_NAME:
368 			{
369 				if (strlen(optarg) < sizeof(runtime_options.node_name))
370 					strncpy(runtime_options.node_name, optarg, sizeof(runtime_options.node_name));
371 				else
372 					item_list_append_format(&cli_errors,
373 											_("value for \"--node-name\" must contain fewer than %lu characters"),
374 											sizeof(runtime_options.node_name));
375 				break;
376 			}
377 				/* --remote-node-id */
378 			case OPT_REMOTE_NODE_ID:
379 				runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, MIN_NODE_ID);
380 				break;
381 
382 				/*
383 				 * standby options * ---------------
384 				 */
385 
386 				/* --upstream-node-id */
387 			case OPT_UPSTREAM_NODE_ID:
388 				runtime_options.upstream_node_id = repmgr_atoi(optarg, "--upstream-node-id", &cli_errors, MIN_NODE_ID);
389 				break;
390 
391 				/*------------------------
392 				 * "standby clone" options
393 				 *------------------------
394 				 */
395 
396 				/* -c/--fast-checkpoint */
397 			case 'c':
398 				runtime_options.fast_checkpoint = true;
399 				break;
400 
401 				/* --copy-external-config-files(=[samepath|pgdata]) */
402 			case OPT_COPY_EXTERNAL_CONFIG_FILES:
403 				runtime_options.copy_external_config_files = true;
404 				if (optarg != NULL)
405 				{
406 					if (strcmp(optarg, "samepath") == 0)
407 					{
408 						runtime_options.copy_external_config_files_destination = CONFIG_FILE_SAMEPATH;
409 					}
410 					/* allow "data_directory" as synonym for "pgdata" */
411 					else if (strcmp(optarg, "pgdata") == 0 || strcmp(optarg, "data_directory") == 0)
412 					{
413 						runtime_options.copy_external_config_files_destination = CONFIG_FILE_PGDATA;
414 					}
415 					else
416 					{
417 						item_list_append(&cli_errors,
418 										 _("value provided for \"--copy-external-config-files\" must be \"samepath\" or \"pgdata\""));
419 					}
420 				}
421 				break;
422 
423 				/* --no-upstream-connection */
424 			case OPT_NO_UPSTREAM_CONNECTION:
425 				runtime_options.no_upstream_connection = true;
426 				break;
427 
428 
429 			case OPT_UPSTREAM_CONNINFO:
430 				strncpy(runtime_options.upstream_conninfo, optarg, MAXLEN);
431 				break;
432 
433 			case OPT_WITHOUT_BARMAN:
434 				runtime_options.without_barman = true;
435 				break;
436 
437 			case OPT_REPLICATION_CONF_ONLY:
438 				runtime_options.replication_conf_only = true;
439 				break;
440 
441 				/* --verify-backup */
442 			case OPT_VERIFY_BACKUP:
443 				runtime_options.verify_backup = true;
444 				break;
445 
446 				/*---------------------------
447 				 * "standby register" options
448 				 *---------------------------
449 				 */
450 
451 			case OPT_WAIT_START:
452 				runtime_options.wait_start = repmgr_atoi(optarg, "--wait-start", &cli_errors, 0);
453 				break;
454 
455 			case OPT_WAIT_SYNC:
456 				runtime_options.wait_register_sync = true;
457 				if (optarg != NULL)
458 				{
459 					runtime_options.wait_register_sync_seconds = repmgr_atoi(optarg, "--wait-sync", &cli_errors, 0);
460 				}
461 				break;
462 
463 				/*-----------------------------
464 				 * "standby switchover" options
465 				 *-----------------------------
466 				 */
467 
468 			case OPT_ALWAYS_PROMOTE:
469 				runtime_options.always_promote = true;
470 				break;
471 
472 			case OPT_FORCE_REWIND:
473 				runtime_options.force_rewind_used = true;
474 
475 				if (optarg != NULL)
476 				{
477 					strncpy(runtime_options.force_rewind_path, optarg, MAXPGPATH);
478 				}
479 
480 				break;
481 
482 			case OPT_SIBLINGS_FOLLOW:
483 				runtime_options.siblings_follow = true;
484 				break;
485 
486 			case OPT_REPMGRD_NO_PAUSE:
487 				runtime_options.repmgrd_no_pause = true;
488 				break;
489 
490 			case OPT_REPMGRD_FORCE_UNPAUSE:
491 				runtime_options.repmgrd_force_unpause = true;
492 				break;
493 
494 				/*----------------------
495 				 * "node status" options
496 				 *----------------------
497 				 */
498 
499 			case OPT_IS_SHUTDOWN_CLEANLY:
500 				runtime_options.is_shutdown_cleanly = true;
501 				break;
502 
503 				/*---------------------
504 				 * "node check" options
505 				 *--------------------
506 				 */
507 			case OPT_ARCHIVE_READY:
508 				runtime_options.archive_ready = true;
509 				break;
510 
511 			case OPT_DOWNSTREAM:
512 				runtime_options.downstream = true;
513 				break;
514 
515 			case OPT_UPSTREAM:
516 				runtime_options.upstream = true;
517 				break;
518 
519 			case OPT_REPLICATION_LAG:
520 				runtime_options.replication_lag = true;
521 				break;
522 
523 			case OPT_ROLE:
524 				runtime_options.role = true;
525 				break;
526 
527 			case OPT_SLOTS:
528 				runtime_options.slots = true;
529 				break;
530 
531 			case OPT_MISSING_SLOTS:
532 				runtime_options.missing_slots = true;
533 				break;
534 
535 			case OPT_HAS_PASSFILE:
536 				runtime_options.has_passfile = true;
537 				break;
538 
539 			case OPT_REPL_CONN:
540 				runtime_options.replication_connection = true;
541 				break;
542 
543 			case OPT_DATA_DIRECTORY_CONFIG:
544 				runtime_options.data_directory_config = true;
545 				break;
546 
547 			case OPT_REPLICATION_CONFIG_OWNER:
548 				runtime_options.replication_config_owner = true;
549 				break;
550 
551 			case OPT_DB_CONNECTION:
552 				runtime_options.db_connection = true;
553 				break;
554 
555 				/*--------------------
556 				 * "node rejoin" options
557 				 *--------------------
558 				 */
559 			case OPT_CONFIG_FILES:
560 				strncpy(runtime_options.config_files, optarg, MAXLEN);
561 				break;
562 
563 			case OPT_CONFIG_ARCHIVE_DIR:
564 				/* TODO: check this is an absolute path */
565 				strncpy(runtime_options.config_archive_dir, optarg, MAXPGPATH);
566 				break;
567 
568 				/*-----------------------
569 				 * "node service" options
570 				 *-----------------------
571 				 */
572 
573 				/* --action (repmgr node service --action) */
574 			case OPT_ACTION:
575 				strncpy(runtime_options.action, optarg, MAXLEN);
576 				break;
577 
578 			case OPT_LIST_ACTIONS:
579 				runtime_options.list_actions = true;
580 				break;
581 
582 			case OPT_CHECKPOINT:
583 				runtime_options.checkpoint = true;
584 				break;
585 
586 				/*------------------------
587 				 * "cluster event" options
588 				 *------------------------
589 				 */
590 
591 			case OPT_EVENT:
592 				strncpy(runtime_options.event, optarg, MAXLEN);
593 				break;
594 
595 			case OPT_LIMIT:
596 				runtime_options.limit = repmgr_atoi(optarg, "--limit", &cli_errors, 1);
597 				runtime_options.limit_provided = true;
598 				break;
599 
600 			case OPT_ALL:
601 				runtime_options.all = true;
602 				break;
603 
604 				/*------------------------
605 				 * "cluster cleanup" options
606 				 *------------------------
607 				 */
608 
609 				/* -k/--keep-history */
610 			case 'k':
611 				runtime_options.keep_history = repmgr_atoi(optarg, "-k/--keep-history", &cli_errors, 0);
612 				break;
613 
614 				/*----------------
615 				 * logging options
616 				 *----------------
617 				 */
618 
619 				/* -L/--log-level */
620 			case 'L':
621 				{
622 					int			detected_log_level = detect_log_level(optarg);
623 
624 					if (detected_log_level != -1)
625 					{
626 						strncpy(runtime_options.log_level, optarg, MAXLEN);
627 					}
628 					else
629 					{
630 						PQExpBufferData invalid_log_level;
631 
632 						initPQExpBuffer(&invalid_log_level);
633 						appendPQExpBuffer(&invalid_log_level, _("invalid log level \"%s\" provided"), optarg);
634 						item_list_append(&cli_errors, invalid_log_level.data);
635 						termPQExpBuffer(&invalid_log_level);
636 					}
637 					break;
638 				}
639 
640 				/* --log-to-file */
641 			case OPT_LOG_TO_FILE:
642 				runtime_options.log_to_file = true;
643 				logger_output_mode = OM_DAEMON;
644 				break;
645 
646 
647 				/* --quiet */
648 			case 'q':
649 				runtime_options.quiet = true;
650 				break;
651 
652 				/* --terse */
653 			case 't':
654 				runtime_options.terse = true;
655 				break;
656 
657 				/* --verbose */
658 			case 'v':
659 				runtime_options.verbose = true;
660 				break;
661 
662 
663 				/*---------------
664 				 * output options
665 				 *---------------
666 				 */
667 			case OPT_CSV:
668 				runtime_options.csv = true;
669 				break;
670 
671 			case OPT_NAGIOS:
672 				runtime_options.nagios = true;
673 				break;
674 
675 			case OPT_OPTFORMAT:
676 				runtime_options.optformat = true;
677 				break;
678 
679 				/*---------------------------------
680 				 * undocumented options for testing
681 				 *----------------------------------
682 				 */
683 
684 			case OPT_DISABLE_WAL_RECEIVER:
685 				runtime_options.disable_wal_receiver = true;
686 				break;
687 
688 			case OPT_ENABLE_WAL_RECEIVER:
689 				runtime_options.enable_wal_receiver = true;
690 				break;
691 
692 				/*-----------------------------
693 				 * options deprecated since 4.0
694 				 *-----------------------------
695 				 */
696 			case OPT_CHECK_UPSTREAM_CONFIG:
697 				item_list_append(&cli_warnings,
698 								 _("--check-upstream-config is deprecated; use --dry-run instead"));
699 				break;
700 
701 				/* -C/--remote-config-file */
702 			case 'C':
703 				item_list_append(&cli_warnings,
704 								 _("--remote-config-file is no longer required"));
705 				break;
706 
707 			case ':':   /* missing option argument */
708 				option_error_found = true;
709 				break;
710 			case '?':
711 				/* Actual help option given? */
712 				if (strcmp(argv[optind - 1], "-?") == 0)
713 				{
714 					help_option = true;
715 				}
716 				else
717 				{
718 					option_error_found = true;
719 				}
720 				break;
721 			default:    /* invalid option */
722 				option_error_found = true;
723 				break;
724 		}
725 	}
726 
727 
728 	/*
729 	 * If -d/--dbname appears to be a conninfo string, validate by attempting
730 	 * to parse it (and if successful, store the parsed parameters)
731 	 */
732 	if (runtime_options.dbname[0])
733 	{
734 		if (strncmp(runtime_options.dbname, "postgresql://", 13) == 0 ||
735 			strncmp(runtime_options.dbname, "postgres://", 11) == 0 ||
736 			strchr(runtime_options.dbname, '=') != NULL)
737 		{
738 			char	   *errmsg = NULL;
739 			PQconninfoOption *opts;
740 
741 			runtime_options.conninfo_provided = true;
742 
743 			opts = PQconninfoParse(runtime_options.dbname, &errmsg);
744 
745 			if (opts == NULL)
746 			{
747 				PQExpBufferData conninfo_error;
748 
749 				initPQExpBuffer(&conninfo_error);
750 				appendPQExpBuffer(&conninfo_error, _("error parsing conninfo:\n%s"), errmsg);
751 				item_list_append(&cli_errors, conninfo_error.data);
752 
753 				termPQExpBuffer(&conninfo_error);
754 				pfree(errmsg);
755 			}
756 			else
757 			{
758 				/*
759 				 * Store any parameters provided in the conninfo string in our
760 				 * internal array; also overwrite any options set in
761 				 * runtime_options.(host|port|username), as the conninfo
762 				 * settings take priority
763 				 */
764 				PQconninfoOption *opt;
765 
766 				for (opt = opts; opt->keyword != NULL; opt++)
767 				{
768 					if (opt->val != NULL && opt->val[0] != '\0')
769 					{
770 						param_set(&source_conninfo, opt->keyword, opt->val);
771 					}
772 
773 					if (strcmp(opt->keyword, "host") == 0 &&
774 						(opt->val != NULL && opt->val[0] != '\0'))
775 					{
776 						strncpy(runtime_options.host, opt->val, MAXLEN);
777 						runtime_options.host_param_provided = true;
778 					}
779 					if (strcmp(opt->keyword, "hostaddr") == 0 &&
780 						(opt->val != NULL && opt->val[0] != '\0'))
781 					{
782 						strncpy(runtime_options.host, opt->val, MAXLEN);
783 						runtime_options.host_param_provided = true;
784 					}
785 					else if (strcmp(opt->keyword, "port") == 0 &&
786 							 (opt->val != NULL && opt->val[0] != '\0'))
787 					{
788 						strncpy(runtime_options.port, opt->val, MAXLEN);
789 					}
790 					else if (strcmp(opt->keyword, "user") == 0 &&
791 							 (opt->val != NULL && opt->val[0] != '\0'))
792 					{
793 						strncpy(runtime_options.username, opt->val, MAXLEN);
794 					}
795 				}
796 
797 				PQconninfoFree(opts);
798 			}
799 		}
800 		else
801 		{
802 			param_set(&source_conninfo, "dbname", runtime_options.dbname);
803 		}
804 	}
805 
806 	/*
807 	 * Disallow further running as root to prevent directory ownership
808 	 * problems. We check this here to give the root user a chance to execute
809 	 * --help/--version options.
810 	 */
811 	if (geteuid() == 0 && help_option == false)
812 	{
813 		fprintf(stderr,
814 				_("%s: cannot be run as root\n"
815 				  "Please log in (using, e.g., \"su\") as the "
816 				  "(unprivileged) user that owns "
817 				  "the data directory.\n"
818 				  ),
819 				progname());
820 		free_conninfo_params(&source_conninfo);
821 		exit(ERR_BAD_CONFIG);
822 	}
823 
824 	/* Exit here already if errors in command line options found */
825 	if (cli_errors.head != NULL)
826 	{
827 		free_conninfo_params(&source_conninfo);
828 		exit_with_cli_errors(&cli_errors, NULL);
829 	}
830 
831 	/*----------
832 	 * Determine the node type and action; following are valid:
833 	 *
834 	 *   { PRIMARY | MASTER } REGISTER |
835 	 *   STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
836 	 *   WITNESS { CREATE | REGISTER | UNREGISTER }
837 	 *   NODE { STATUS | CHECK | REJOIN | SERVICE } |
838 	 *   CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
839 	 *   SERVICE { STATUS | PAUSE | UNPAUSE | START | STOP }
840 	 *
841 	 * [node] is an optional hostname, provided instead of the -h/--host
842 	 * option
843 	 * ---------
844 	 */
845 	if (optind < argc)
846 	{
847 		repmgr_command = argv[optind++];
848 	}
849 
850 	if (optind < argc)
851 	{
852 		repmgr_action = argv[optind++];
853 	}
854 	else
855 	{
856 		repmgr_action = dummy_action;
857 	}
858 
859 	if (repmgr_command != NULL)
860 	{
861 		if (strcasecmp(repmgr_command, "PRIMARY") == 0 || strcasecmp(repmgr_command, "MASTER") == 0)
862 		{
863 			if (help_option == true)
864 			{
865 				do_primary_help();
866 				exit(SUCCESS);
867 			}
868 
869 			if (strcasecmp(repmgr_action, "REGISTER") == 0)
870 				action = PRIMARY_REGISTER;
871 			else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
872 				action = PRIMARY_UNREGISTER;
873 			/* allow "primary check"/"primary status" as aliases for "node check"/"node status" */
874 			else if (strcasecmp(repmgr_action, "CHECK") == 0)
875 				action = NODE_CHECK;
876 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
877 				action = NODE_STATUS;
878 		}
879 
880 		else if (strcasecmp(repmgr_command, "STANDBY") == 0)
881 		{
882 			if (help_option == true)
883 			{
884 				do_standby_help();
885 				exit(SUCCESS);
886 			}
887 
888 			if (strcasecmp(repmgr_action, "CLONE") == 0)
889 				action = STANDBY_CLONE;
890 			else if (strcasecmp(repmgr_action, "REGISTER") == 0)
891 				action = STANDBY_REGISTER;
892 			else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
893 				action = STANDBY_UNREGISTER;
894 			else if (strcasecmp(repmgr_action, "PROMOTE") == 0)
895 				action = STANDBY_PROMOTE;
896 			else if (strcasecmp(repmgr_action, "FOLLOW") == 0)
897 				action = STANDBY_FOLLOW;
898 			else if (strcasecmp(repmgr_action, "SWITCHOVER") == 0)
899 				action = STANDBY_SWITCHOVER;
900 			/* allow "standby check"/"standby status" as aliases for "node check"/"node status" */
901 			else if (strcasecmp(repmgr_action, "CHECK") == 0)
902 				action = NODE_CHECK;
903 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
904 				action = NODE_STATUS;
905 		}
906 
907 		else if (strcasecmp(repmgr_command, "WITNESS") == 0)
908 		{
909 			if (help_option == true)
910 			{
911 				do_witness_help();
912 				exit(SUCCESS);
913 			}
914 			else if (strcasecmp(repmgr_action, "REGISTER") == 0)
915 				action = WITNESS_REGISTER;
916 			else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
917 				action = WITNESS_UNREGISTER;
918 		}
919 
920 		else if (strcasecmp(repmgr_command, "NODE") == 0)
921 		{
922 			if (help_option == true)
923 			{
924 				do_node_help();
925 				exit(SUCCESS);
926 			}
927 
928 			if (strcasecmp(repmgr_action, "CHECK") == 0)
929 				action = NODE_CHECK;
930 			else if (strcasecmp(repmgr_action, "STATUS") == 0)
931 				action = NODE_STATUS;
932 			else if (strcasecmp(repmgr_action, "REJOIN") == 0)
933 				action = NODE_REJOIN;
934 			else if (strcasecmp(repmgr_action, "SERVICE") == 0)
935 				action = NODE_SERVICE;
936 			else if (strcasecmp(repmgr_action, "CONTROL") == 0)
937 				action = NODE_CONTROL;
938 		}
939 
940 		else if (strcasecmp(repmgr_command, "CLUSTER") == 0)
941 		{
942 			if (help_option == true)
943 			{
944 				do_cluster_help();
945 				exit(SUCCESS);
946 			}
947 
948 			if (strcasecmp(repmgr_action, "SHOW") == 0)
949 				action = CLUSTER_SHOW;
950 			else if (strcasecmp(repmgr_action, "EVENT") == 0)
951 				action = CLUSTER_EVENT;
952 			/* allow "CLUSTER EVENTS" as synonym for "CLUSTER EVENT" */
953 			else if (strcasecmp(repmgr_action, "EVENTS") == 0)
954 				action = CLUSTER_EVENT;
955 			else if (strcasecmp(repmgr_action, "CROSSCHECK") == 0)
956 				action = CLUSTER_CROSSCHECK;
957 			else if (strcasecmp(repmgr_action, "MATRIX") == 0)
958 				action = CLUSTER_MATRIX;
959 			else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
960 				action = CLUSTER_CLEANUP;
961 		}
962 		else if (strcasecmp(repmgr_command, "SERVICE") == 0)
963 		{
964 			if (help_option == true)
965 			{
966 				do_service_help();
967 				exit(SUCCESS);
968 			}
969 
970 			if (strcasecmp(repmgr_action, "STATUS") == 0)
971 				action = SERVICE_STATUS;
972 			else if (strcasecmp(repmgr_action, "PAUSE") == 0)
973 				action = SERVICE_PAUSE;
974 			else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
975 				action = SERVICE_UNPAUSE;
976 
977 		}
978 		else if (strcasecmp(repmgr_command, "DAEMON") == 0)
979 		{
980 			if (help_option == true)
981 			{
982 				do_daemon_help();
983 				exit(SUCCESS);
984 			}
985 
986 			if (strcasecmp(repmgr_action, "START") == 0)
987 				action = DAEMON_START;
988 			else if (strcasecmp(repmgr_action, "STOP") == 0)
989 				action = DAEMON_STOP;
990 
991 			/* allow "daemon" as an alias for "service" for repmgr 4.x compatibility */
992 			if (strcasecmp(repmgr_action, "STATUS") == 0)
993 				action = SERVICE_STATUS;
994 			else if (strcasecmp(repmgr_action, "PAUSE") == 0)
995 				action = SERVICE_PAUSE;
996 			else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
997 				action = SERVICE_UNPAUSE;
998 		}
999 		else
1000 		{
1001 			valid_repmgr_command_found = false;
1002 		}
1003 	}
1004 
1005 	if (help_option == true)
1006 	{
1007 		do_help();
1008 		exit(SUCCESS);
1009 	}
1010 
1011 	if (action == NO_ACTION)
1012 	{
1013 		PQExpBufferData command_error;
1014 
1015 		initPQExpBuffer(&command_error);
1016 
1017 		if (repmgr_command == NULL)
1018 		{
1019 			appendPQExpBuffer(&command_error,
1020 							  _("no repmgr command provided"));
1021 		}
1022 		else if (valid_repmgr_command_found == false && repmgr_action[0] == '\0')
1023 		{
1024 			appendPQExpBuffer(&command_error,
1025 							  _("unknown repmgr command '%s'"),
1026 							  repmgr_command);
1027 		}
1028 		else if (repmgr_action[0] == '\0')
1029 		{
1030 			appendPQExpBuffer(&command_error,
1031 							  _("no action provided for command '%s'"),
1032 							  repmgr_command);
1033 		}
1034 		else
1035 		{
1036 			appendPQExpBuffer(&command_error,
1037 							  _("unknown repmgr action '%s %s'"),
1038 							  repmgr_command,
1039 							  repmgr_action);
1040 		}
1041 
1042 		item_list_append(&cli_errors, command_error.data);
1043 	}
1044 
1045 	/*
1046 	 * STANDBY CLONE historically accepts the upstream hostname as an
1047 	 * additional argument
1048 	 */
1049 	if (action == STANDBY_CLONE)
1050 	{
1051 		if (optind < argc)
1052 		{
1053 			if (runtime_options.host_param_provided == true)
1054 			{
1055 				PQExpBufferData additional_host_arg;
1056 
1057 				initPQExpBuffer(&additional_host_arg);
1058 				appendPQExpBuffer(&additional_host_arg,
1059 								  _("host name provided both with %s and as an extra parameter"),
1060 								  runtime_options.conninfo_provided == true ? "host=" : "-h/--host");
1061 				item_list_append(&cli_errors, additional_host_arg.data);
1062 			}
1063 			else
1064 			{
1065 				strncpy(runtime_options.host, argv[optind++], MAXLEN);
1066 				param_set(&source_conninfo, "host", runtime_options.host);
1067 				runtime_options.host_param_provided = true;
1068 			}
1069 		}
1070 	}
1071 
1072 	if (optind < argc)
1073 	{
1074 		PQExpBufferData too_many_args;
1075 
1076 		initPQExpBuffer(&too_many_args);
1077 		appendPQExpBuffer(&too_many_args, _("too many command-line arguments (first extra is \"%s\")"), argv[optind]);
1078 		item_list_append(&cli_errors, too_many_args.data);
1079 	}
1080 
1081 
1082 	/*
1083 	 * The configuration file is not required for some actions (e.g. 'standby
1084 	 * clone'), however if available we'll parse it anyway for options like
1085 	 * 'log_level', 'use_replication_slots' etc.
1086 	 */
1087 	load_config(runtime_options.config_file,
1088 				runtime_options.verbose,
1089 				runtime_options.terse,
1090 				argv[0]);
1091 
1092 
1093 	/*
1094 	 * Handle options which must be executed without a repmgr command
1095 	 */
1096 	if (runtime_options.dump_config == true)
1097 	{
1098 		if (repmgr_command != NULL)
1099 		{
1100 			fprintf(stderr,
1101 					_("--dump-config cannot be used in combination with a repmgr command"));
1102 			exit(ERR_BAD_CONFIG);
1103 		}
1104 		dump_config();
1105 		exit(SUCCESS);
1106 	}
1107 
1108 
1109 
1110 	check_cli_parameters(action);
1111 
1112 	/*
1113 	 * Sanity checks for command line parameters completed by now; any further
1114 	 * errors will be runtime ones
1115 	 */
1116 	if (cli_errors.head != NULL)
1117 	{
1118 		free_conninfo_params(&source_conninfo);
1119 
1120 		exit_with_cli_errors(&cli_errors, valid_repmgr_command_found == true ? repmgr_command : NULL);
1121 	}
1122 
1123 	/* no errors detected by repmgr, but getopt might have */
1124 	if (option_error_found == true)
1125 	{
1126 		if (valid_repmgr_command_found == true)
1127 		{
1128 			printf(_("Try \"%s --help\" or \"%s %s --help\" for more information.\n"),
1129 				   progname(),
1130 				   progname(),
1131 				   repmgr_command);
1132 		}
1133 		else
1134 		{
1135 			printf(_("Try \"repmgr --help\" for more information.\n"));
1136 		}
1137 
1138 		free_conninfo_params(&source_conninfo);
1139 		exit(ERR_BAD_CONFIG);
1140 	}
1141 
1142 
1143 	/*
1144 	 * Print any warnings about inappropriate command line options, unless
1145 	 * -t/--terse set
1146 	 */
1147 	if (cli_warnings.head != NULL && runtime_options.terse == false)
1148 	{
1149 		log_warning(_("following problems with command line parameters detected:"));
1150 		print_item_list(&cli_warnings);
1151 	}
1152 
1153 	/*
1154 	 * post-processing following command line parameter checks
1155 	 * =======================================================
1156 	 */
1157 
1158 	if (runtime_options.csv == true)
1159 	{
1160 		runtime_options.output_mode = OM_CSV;
1161 	}
1162 	else if (runtime_options.nagios == true)
1163 	{
1164 		runtime_options.output_mode = OM_NAGIOS;
1165 	}
1166 	else if (runtime_options.optformat == true)
1167 	{
1168 		runtime_options.output_mode = OM_OPTFORMAT;
1169 	}
1170 
1171 	/*
1172 	 * Check for configuration file items which can be overriden by runtime
1173 	 * options
1174 	 * =====================================================================
1175 	 */
1176 
1177 	/*
1178 	 * Command-line parameter -L/--log-level overrides any setting in config
1179 	 * file
1180 	 */
1181 	if (*runtime_options.log_level != '\0')
1182 	{
1183 		strncpy(config_file_options.log_level, runtime_options.log_level, MAXLEN);
1184 	}
1185 
1186 	/*
1187 	 * Initialise pg_bindir - command line parameter will override any setting
1188 	 * in the configuration file
1189 	 */
1190 	if (!strlen(runtime_options.pg_bindir))
1191 	{
1192 		strncpy(runtime_options.pg_bindir, config_file_options.pg_bindir, MAXLEN);
1193 	}
1194 
1195 	/* Add trailing slash */
1196 	if (strlen(runtime_options.pg_bindir))
1197 	{
1198 		int			len = strlen(runtime_options.pg_bindir);
1199 
1200 		if (runtime_options.pg_bindir[len - 1] != '/')
1201 		{
1202 			maxlen_snprintf(pg_bindir, "%s/", runtime_options.pg_bindir);
1203 		}
1204 		else
1205 		{
1206 			strncpy(pg_bindir, runtime_options.pg_bindir, MAXLEN);
1207 		}
1208 	}
1209 
1210 	/*
1211 	 * Initialize the logger. We've previously requested STDERR logging only
1212 	 * to ensure the repmgr command doesn't have its output diverted to a
1213 	 * logging facility (which usually doesn't make sense for a command line
1214 	 * program).
1215 	 *
1216 	 * If required (e.g. when calling repmgr from repmgrd), this behaviour can
1217 	 * be overridden with "--log-to-file".
1218 	 */
1219 
1220 	logger_init(&config_file_options, progname());
1221 
1222 	if (runtime_options.verbose)
1223 		logger_set_verbose();
1224 
1225 	if (runtime_options.terse)
1226 		logger_set_terse();
1227 
1228 	/*
1229 	 * If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
1230 	 * of what's in the configuration file or -L/--log-level parameter, otherwise
1231 	 * some or output might not be displayed.
1232 	 */
1233 	if (runtime_options.dry_run == true)
1234 	{
1235 		logger_set_min_level(LOG_INFO);
1236 	}
1237 
1238 	/*
1239 	 * If -q/--quiet supplied, suppress any non-ERROR log output.
1240 	 * This overrides everything else; we'll leave it up to the user to deal with the
1241 	 * consequences of e.g. running --dry-run together with -q/--quiet.
1242 	 */
1243 	if (runtime_options.quiet == true)
1244 	{
1245 		logger_set_level(LOG_ERROR);
1246 	}
1247 
1248 	/*
1249 	 * Node configuration information is not needed for all actions, with
1250 	 * STANDBY CLONE being the main exception.
1251 	 */
1252 	if (config_file_required)
1253 	{
1254 		/*
1255 		 * if a configuration file was provided, the configuration file parser
1256 		 * will already have errored out if no valid node_id found
1257 		 */
1258 		if (config_file_options.node_id == NODE_NOT_FOUND)
1259 		{
1260 			free_conninfo_params(&source_conninfo);
1261 
1262 			log_error(_("no node information was found - please supply a configuration file"));
1263 			exit(ERR_BAD_CONFIG);
1264 		}
1265 	}
1266 
1267 	/*
1268 	 * If a node was specified (by --node-id or --node-name), check it exists
1269 	 * (and pre-populate a record for later use).
1270 	 *
1271 	 * At this point check_cli_parameters() will already have determined if
1272 	 * provision of these is valid for the action, otherwise it unsets them.
1273 	 *
1274 	 * We need to check this much later than other command line parameters as
1275 	 * we need to wait until the configuration file is parsed and we can
1276 	 * obtain the conninfo string.
1277 	 */
1278 
1279 	if (runtime_options.node_id != UNKNOWN_NODE_ID || runtime_options.node_name[0] != '\0')
1280 	{
1281 		PGconn	   *conn = NULL;
1282 		RecordStatus record_status = RECORD_NOT_FOUND;
1283 
1284 		log_verbose(LOG_DEBUG, "connecting to local node to retrieve record for node specified with --node-id or --node-name");
1285 
1286 		if (strlen(config_file_options.conninfo))
1287 			conn = establish_db_connection(config_file_options.conninfo, true);
1288 		else
1289 			conn = establish_db_connection_by_params(&source_conninfo, true);
1290 
1291 		if (runtime_options.node_id != UNKNOWN_NODE_ID)
1292 		{
1293 			record_status = get_node_record(conn, runtime_options.node_id, &target_node_info);
1294 
1295 			if (record_status != RECORD_FOUND)
1296 			{
1297 				log_error(_("node %i (specified with --node-id) not found"),
1298 						  runtime_options.node_id);
1299 				PQfinish(conn);
1300 				free_conninfo_params(&source_conninfo);
1301 
1302 				exit(ERR_BAD_CONFIG);
1303 			}
1304 		}
1305 		else if (runtime_options.node_name[0] != '\0')
1306 		{
1307 			char	   *escaped = escape_string(conn, runtime_options.node_name);
1308 
1309 			if (escaped == NULL)
1310 			{
1311 				log_error(_("unable to escape value provided for --node-name"));
1312 				PQfinish(conn);
1313 				free_conninfo_params(&source_conninfo);
1314 
1315 				exit(ERR_BAD_CONFIG);
1316 			}
1317 
1318 			record_status = get_node_record_by_name(conn, escaped, &target_node_info);
1319 
1320 			pfree(escaped);
1321 			if (record_status != RECORD_FOUND)
1322 			{
1323 				log_error(_("node \"%s\" (specified with --node-name) not found"),
1324 						  runtime_options.node_name);
1325 				PQfinish(conn);
1326 				free_conninfo_params(&source_conninfo);
1327 
1328 				exit(ERR_BAD_CONFIG);
1329 			}
1330 		}
1331 
1332 		PQfinish(conn);
1333 	}
1334 
1335 
1336 	switch (action)
1337 	{
1338 			/* PRIMARY */
1339 		case PRIMARY_REGISTER:
1340 			do_primary_register();
1341 			break;
1342 		case PRIMARY_UNREGISTER:
1343 			do_primary_unregister();
1344 			break;
1345 
1346 			/* STANDBY */
1347 		case STANDBY_CLONE:
1348 			do_standby_clone();
1349 			break;
1350 		case STANDBY_REGISTER:
1351 			do_standby_register();
1352 			break;
1353 		case STANDBY_UNREGISTER:
1354 			do_standby_unregister();
1355 			break;
1356 		case STANDBY_PROMOTE:
1357 			do_standby_promote();
1358 			break;
1359 		case STANDBY_FOLLOW:
1360 			do_standby_follow();
1361 			break;
1362 		case STANDBY_SWITCHOVER:
1363 			do_standby_switchover();
1364 			break;
1365 
1366 			/* WITNESS */
1367 		case WITNESS_REGISTER:
1368 			do_witness_register();
1369 			break;
1370 		case WITNESS_UNREGISTER:
1371 			do_witness_unregister();
1372 			break;
1373 
1374 			/* NODE */
1375 		case NODE_STATUS:
1376 			do_node_status();
1377 			break;
1378 		case NODE_CHECK:
1379 			do_node_check();
1380 			break;
1381 		case NODE_REJOIN:
1382 			do_node_rejoin();
1383 			break;
1384 		case NODE_SERVICE:
1385 			do_node_service();
1386 			break;
1387 		case NODE_CONTROL:
1388 			do_node_control();
1389 			break;
1390 
1391 			/* CLUSTER */
1392 		case CLUSTER_SHOW:
1393 			do_cluster_show();
1394 			break;
1395 		case CLUSTER_EVENT:
1396 			do_cluster_event();
1397 			break;
1398 		case CLUSTER_CROSSCHECK:
1399 			do_cluster_crosscheck();
1400 			break;
1401 		case CLUSTER_MATRIX:
1402 			do_cluster_matrix();
1403 			break;
1404 		case CLUSTER_CLEANUP:
1405 			do_cluster_cleanup();
1406 			break;
1407 
1408 			/* SERVICE */
1409 		case SERVICE_STATUS:
1410 			do_service_status();
1411 			break;
1412 		case SERVICE_PAUSE:
1413 			do_service_pause();
1414 			break;
1415 		case SERVICE_UNPAUSE:
1416 			do_service_unpause();
1417 			break;
1418 
1419 			/* DAEMON */
1420 		case DAEMON_START:
1421 			do_daemon_start();
1422 			break;
1423 		case DAEMON_STOP:
1424 			do_daemon_stop();
1425 			break;
1426 
1427 		default:
1428 			/* An action will have been determined by this point  */
1429 			break;
1430 	}
1431 
1432 	free_conninfo_params(&source_conninfo);
1433 
1434 	return SUCCESS;
1435 }
1436 
1437 
1438 
1439 /*
1440  * Check for useless or conflicting parameters, and also whether a
1441  * configuration file is required.
1442  *
1443  * Messages will be added to the command line warning and error lists
1444  * as appropriate.
1445  */
1446 
1447 static void
check_cli_parameters(const int action)1448 check_cli_parameters(const int action)
1449 {
1450 	/*
1451 	 * ========================================================================
1452 	 * check all parameters required for an action are provided, and warn
1453 	 * about ineffective actions
1454 	 * ========================================================================
1455 	 */
1456 	switch (action)
1457 	{
1458 		case PRIMARY_REGISTER:
1459 			/* no required parameters */
1460 			break;
1461 		case STANDBY_CLONE:
1462 			{
1463 				standy_clone_mode mode = get_standby_clone_mode();
1464 
1465 				config_file_required = false;
1466 
1467 				if (mode == barman)
1468 				{
1469 					if (runtime_options.copy_external_config_files)
1470 					{
1471 						item_list_append(&cli_warnings,
1472 										 _("--copy-external-config-files ineffective in Barman mode"));
1473 					}
1474 
1475 					if (runtime_options.fast_checkpoint)
1476 					{
1477 						item_list_append(&cli_warnings,
1478 										 _("-c/--fast-checkpoint has no effect in Barman mode"));
1479 					}
1480 
1481 
1482 				}
1483 				else
1484 				{
1485 					if (!runtime_options.host_param_provided)
1486 					{
1487 						item_list_append_format(&cli_errors,
1488 												_("host name for the source node must be provided with -h/--host when executing %s"),
1489 												action_name(action));
1490 					}
1491 
1492 					if (!runtime_options.connection_param_provided)
1493 					{
1494 						item_list_append_format(&cli_errors,
1495 												_("database connection parameters for the source node must be provided when executing %s"),
1496 												action_name(action));
1497 					}
1498 
1499 					/*
1500 					 * If -D/--pgdata was provided, but config_file_options.pgdata
1501 					 * is set, warn that -D/--pgdata will be ignored.
1502 					 */
1503 					if (runtime_options.data_dir[0] && config_file_options.data_directory[0])
1504 					{
1505 						item_list_append(&cli_warnings,
1506 										 _("-D/--pgdata will be ignored if a repmgr configuration file is provided"));
1507 
1508 					}
1509 
1510 					if (*runtime_options.upstream_conninfo)
1511 					{
1512 						if (*runtime_options.replication_user)
1513 						{
1514 							item_list_append(&cli_warnings,
1515 											 _("--replication-user ineffective when specifying --upstream-conninfo"));
1516 						}
1517 					}
1518 
1519 					if (runtime_options.no_upstream_connection == true)
1520 					{
1521 						item_list_append(&cli_warnings,
1522 										 _("--no-upstream-connection only effective in Barman mode"));
1523 					}
1524 				}
1525 
1526 				if (strlen(config_file_options.config_directory))
1527 				{
1528 					if (runtime_options.copy_external_config_files == false)
1529 					{
1530 						item_list_append(&cli_warnings,
1531 										 _("\"config_directory\" set in repmgr.conf, but --copy-external-config-files not provided"));
1532 					}
1533 				}
1534 			}
1535 			break;
1536 
1537 		case STANDBY_FOLLOW:
1538 			{
1539 				/*
1540 				 * if `repmgr standby follow` executed with host params,
1541 				 * ensure data directory was provided
1542 				 */
1543 			}
1544 			break;
1545 		case WITNESS_REGISTER:
1546 			{
1547 				if (!runtime_options.host_param_provided)
1548 				{
1549 					item_list_append_format(&cli_errors,
1550 											_("host name for the source node must be provided with -h/--host when executing %s"),
1551 											action_name(action));
1552 				}
1553 			}
1554 			break;
1555 		case NODE_CHECK:
1556 			if (runtime_options.has_passfile == true)
1557 			{
1558 				config_file_required = false;
1559 			}
1560 			break;
1561 		case NODE_STATUS:
1562 			if (runtime_options.node_id != UNKNOWN_NODE_ID)
1563 			{
1564 				item_list_append(
1565 								 &cli_warnings,
1566 								 "--node-id will be ignored; \"repmgr node status\" can only be executed on the local node");
1567 			}
1568 			if (runtime_options.node_name[0] != '\0')
1569 			{
1570 				item_list_append(
1571 								 &cli_warnings,
1572 								 "--node-name will be ignored; \"repmgr node status\" can only be executed on the local node");
1573 			}
1574 			break;
1575 		case NODE_REJOIN:
1576 			if (runtime_options.connection_param_provided == false)
1577 			{
1578 				item_list_append(
1579 								 &cli_errors,
1580 								 "database connection parameters for an available node must be provided when executing NODE REJOIN");
1581 			}
1582 			break;
1583 		case CLUSTER_SHOW:
1584 		case CLUSTER_MATRIX:
1585 		case CLUSTER_CROSSCHECK:
1586 			if (runtime_options.connection_param_provided)
1587 				config_file_required = false;
1588 			break;
1589 		case CLUSTER_EVENT:
1590 			/* no required parameters */
1591 			break;
1592 
1593 	}
1594 
1595 	/*
1596 	 * ========================================================================
1597 	 * warn if parameters provided for an action where they're not relevant
1598 	 * ========================================================================
1599 	 */
1600 
1601 	/* --host etc. */
1602 	if (runtime_options.connection_param_provided)
1603 	{
1604 		switch (action)
1605 		{
1606 			case STANDBY_CLONE:
1607 			case STANDBY_FOLLOW:
1608 			case STANDBY_REGISTER:
1609 			case WITNESS_REGISTER:
1610 			case WITNESS_UNREGISTER:
1611 			case CLUSTER_SHOW:
1612 			case CLUSTER_MATRIX:
1613 			case CLUSTER_CROSSCHECK:
1614 			case NODE_REJOIN:
1615 				break;
1616 			default:
1617 				item_list_append_format(&cli_warnings,
1618 										_("database connection parameters not required when executing %s"),
1619 										action_name(action));
1620 		}
1621 	}
1622 
1623 	/* -D/--pgdata */
1624 	if (runtime_options.data_dir[0])
1625 	{
1626 		switch (action)
1627 		{
1628 			case STANDBY_CLONE:
1629 			case STANDBY_FOLLOW:
1630 			case NODE_SERVICE:
1631 				break;
1632 			default:
1633 				item_list_append_format(&cli_warnings,
1634 										_("-D/--pgdata not required when executing %s"),
1635 										action_name(action));
1636 		}
1637 	}
1638 
1639 	/*
1640 	 * --node-id
1641 	 *
1642 	 * NOTE: overrides --node-name, if present
1643 	 */
1644 	if (runtime_options.node_id != UNKNOWN_NODE_ID)
1645 	{
1646 		switch (action)
1647 		{
1648 			case PRIMARY_UNREGISTER:
1649 			case STANDBY_UNREGISTER:
1650 			case WITNESS_UNREGISTER:
1651 			case CLUSTER_CLEANUP:
1652 			case CLUSTER_EVENT:
1653 			case CLUSTER_MATRIX:
1654 			case CLUSTER_CROSSCHECK:
1655 				break;
1656 			default:
1657 				item_list_append_format(&cli_warnings,
1658 										_("--node-id not required when executing %s"),
1659 										action_name(action));
1660 				runtime_options.node_id = UNKNOWN_NODE_ID;
1661 		}
1662 	}
1663 
1664 	if (runtime_options.node_name[0])
1665 	{
1666 		switch (action)
1667 		{
1668 			case STANDBY_UNREGISTER:
1669 			case CLUSTER_EVENT:
1670 				if (runtime_options.node_id != UNKNOWN_NODE_ID)
1671 				{
1672 					item_list_append(&cli_warnings,
1673 									 _("--node-id provided, ignoring --node-name"));
1674 					memset(runtime_options.node_name, 0, sizeof(runtime_options.node_name));
1675 				}
1676 				break;
1677 			default:
1678 				item_list_append_format(&cli_warnings,
1679 										_("--node-name not required when executing %s"),
1680 										action_name(action));
1681 				memset(runtime_options.node_name, 0, sizeof(runtime_options.node_name));
1682 		}
1683 	}
1684 
1685 	if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
1686 	{
1687 		switch (action)
1688 		{
1689 			case STANDBY_CLONE:
1690 			case STANDBY_REGISTER:
1691 			case STANDBY_FOLLOW:
1692 				break;
1693 			default:
1694 				item_list_append_format(&cli_warnings,
1695 										_("--upstream-node-id will be ignored when executing %s"),
1696 										action_name(action));
1697 		}
1698 	}
1699 
1700 	if (runtime_options.replication_user[0])
1701 	{
1702 		switch (action)
1703 		{
1704 			case PRIMARY_REGISTER:
1705 			case STANDBY_REGISTER:
1706 			case STANDBY_CLONE:
1707 				break;
1708 
1709 			case STANDBY_FOLLOW:
1710 				item_list_append_format(&cli_warnings,
1711 										_("--replication-user ignored when executing %s"),
1712 										action_name(action));
1713 				break;
1714 
1715 			default:
1716 				item_list_append_format(&cli_warnings,
1717 										_("--replication-user not required when executing %s"),
1718 										action_name(action));
1719 		}
1720 	}
1721 
1722 	if (runtime_options.superuser[0])
1723 	{
1724 		switch (action)
1725 		{
1726 			case STANDBY_CLONE:
1727 			case STANDBY_SWITCHOVER:
1728 			case NODE_CHECK:
1729 			case NODE_SERVICE:
1730 				break;
1731 			default:
1732 				item_list_append_format(&cli_warnings,
1733 										_("--superuser ignored when executing %s"),
1734 										action_name(action));
1735 		}
1736 	}
1737 
1738 
1739 	if (runtime_options.replication_conf_only == true)
1740 	{
1741 		switch (action)
1742 		{
1743 			case STANDBY_CLONE:
1744 				break;
1745 			default:
1746 				item_list_append_format(&cli_warnings,
1747 										_("--create-recovery-conf will be ignored when executing %s"),
1748 										action_name(action));
1749 		}
1750 	}
1751 
1752 	if (runtime_options.event[0])
1753 	{
1754 		switch (action)
1755 		{
1756 			case CLUSTER_EVENT:
1757 				break;
1758 			default:
1759 				item_list_append_format(&cli_warnings,
1760 										_("--event not required when executing %s"),
1761 										action_name(action));
1762 		}
1763 	}
1764 
1765 	if (runtime_options.limit_provided)
1766 	{
1767 		switch (action)
1768 		{
1769 			case CLUSTER_EVENT:
1770 				break;
1771 			default:
1772 				item_list_append_format(&cli_warnings,
1773 										_("--limit not required when executing %s"),
1774 										action_name(action));
1775 		}
1776 	}
1777 
1778 	if (runtime_options.all)
1779 	{
1780 		switch (action)
1781 		{
1782 			case CLUSTER_EVENT:
1783 				if (runtime_options.limit_provided == true)
1784 				{
1785 					runtime_options.all = false;
1786 					item_list_append(&cli_warnings,
1787 									 _("--limit provided, ignoring --all"));
1788 				}
1789 				break;
1790 			default:
1791 				item_list_append_format(&cli_warnings,
1792 										_("--all not required when executing %s"),
1793 										action_name(action));
1794 		}
1795 	}
1796 
1797 	/* --wait/--no-wait */
1798 
1799 	if (runtime_options.wait_provided == true && runtime_options.no_wait == true)
1800 	{
1801 		item_list_append_format(&cli_errors,
1802 								_("both --wait and --no-wait options provided"));
1803 	}
1804 	else
1805 	{
1806 		if (runtime_options.wait_provided)
1807 		{
1808 			switch (action)
1809 			{
1810 				case DAEMON_START:
1811 				case DAEMON_STOP:
1812 				case STANDBY_FOLLOW:
1813 					break;
1814 				default:
1815 					item_list_append_format(&cli_warnings,
1816 											_("--wait will be ignored when executing %s"),
1817 											action_name(action));
1818 			}
1819 		}
1820 		else if (runtime_options.no_wait)
1821 		{
1822 			switch (action)
1823 			{
1824 				case DAEMON_START:
1825 				case DAEMON_STOP:
1826 				case NODE_REJOIN:
1827 					break;
1828 				default:
1829 					item_list_append_format(&cli_warnings,
1830 											_("--no-wait will be ignored when executing %s"),
1831 											action_name(action));
1832 			}
1833 		}
1834 	}
1835 
1836 	/* repmgr node service --action */
1837 	if (runtime_options.action[0] != '\0')
1838 	{
1839 		switch (action)
1840 		{
1841 			case NODE_SERVICE:
1842 				break;
1843 			default:
1844 				item_list_append_format(&cli_warnings,
1845 										_("--action will be ignored when executing %s"),
1846 										action_name(action));
1847 		}
1848 	}
1849 
1850 	/* repmgr node status --is-shutdown-cleanly */
1851 	if (runtime_options.is_shutdown_cleanly == true)
1852 	{
1853 		switch (action)
1854 		{
1855 			case NODE_STATUS:
1856 				break;
1857 			default:
1858 				item_list_append_format(&cli_warnings,
1859 										_("--is-shutdown-cleanly will be ignored when executing %s"),
1860 										action_name(action));
1861 		}
1862 	}
1863 
1864 	if (runtime_options.always_promote == true)
1865 	{
1866 		switch (action)
1867 		{
1868 			case STANDBY_SWITCHOVER:
1869 				break;
1870 			default:
1871 				item_list_append_format(&cli_warnings,
1872 										_("--always-promote will be ignored when executing %s"),
1873 										action_name(action));
1874 		}
1875 	}
1876 
1877 	if (runtime_options.force_rewind_used == true)
1878 	{
1879 		switch (action)
1880 		{
1881 			case STANDBY_SWITCHOVER:
1882 			case NODE_REJOIN:
1883 				break;
1884 			default:
1885 				item_list_append_format(&cli_warnings,
1886 										_("--force-rewind will be ignored when executing %s"),
1887 										action_name(action));
1888 		}
1889 	}
1890 
1891 	if (runtime_options.repmgrd_no_pause == true)
1892 	{
1893 		switch (action)
1894 		{
1895 			case STANDBY_SWITCHOVER:
1896 				break;
1897 			default:
1898 				item_list_append_format(&cli_warnings,
1899 										_("--repmgrd-no-pause will be ignored when executing %s"),
1900 										action_name(action));
1901 		}
1902 	}
1903 
1904 	if (runtime_options.repmgrd_force_unpause == true)
1905 	{
1906 		switch (action)
1907 		{
1908 			case STANDBY_SWITCHOVER:
1909 				if (runtime_options.repmgrd_no_pause == true)
1910 					item_list_append(&cli_errors,
1911 									 _("--repmgrd-force-unpause and --repmgrd-no-pause cannot be used together"));
1912 				break;
1913 			default:
1914 				item_list_append_format(&cli_warnings,
1915 										_("--repmgrd-force-unpause will be ignored when executing %s"),
1916 										action_name(action));
1917 		}
1918 	}
1919 
1920 	if (runtime_options.config_files[0] != '\0')
1921 	{
1922 		switch (action)
1923 		{
1924 			case NODE_REJOIN:
1925 				break;
1926 			default:
1927 				item_list_append_format(&cli_warnings,
1928 										_("--config-files will be ignored when executing %s"),
1929 										action_name(action));
1930 		}
1931 	}
1932 
1933 	if (runtime_options.dry_run == true)
1934 	{
1935 		switch (action)
1936 		{
1937 			case PRIMARY_REGISTER:
1938 			case PRIMARY_UNREGISTER:
1939 			case STANDBY_CLONE:
1940 			case STANDBY_REGISTER:
1941 			case STANDBY_FOLLOW:
1942 			case STANDBY_SWITCHOVER:
1943 			case STANDBY_PROMOTE:
1944 			case WITNESS_REGISTER:
1945 			case WITNESS_UNREGISTER:
1946 			case NODE_REJOIN:
1947 			case NODE_SERVICE:
1948 			case SERVICE_PAUSE:
1949 			case SERVICE_UNPAUSE:
1950 			case SERVICE_STATUS:
1951 			case DAEMON_START:
1952 			case DAEMON_STOP:
1953 				break;
1954 			default:
1955 				item_list_append_format(&cli_warnings,
1956 										_("--dry-run is not effective when executing %s"),
1957 										action_name(action));
1958 		}
1959 	}
1960 
1961 	/* check only one of --csv, --nagios and --optformat  used */
1962 	{
1963 		int			used_options = 0;
1964 
1965 		if (runtime_options.csv == true)
1966 			used_options++;
1967 
1968 		if (runtime_options.nagios == true)
1969 			used_options++;
1970 
1971 		if (runtime_options.optformat == true)
1972 			used_options++;
1973 
1974 		if (used_options > 1)
1975 		{
1976 			/* TODO: list which options were used */
1977 			item_list_append(&cli_errors,
1978 							 "only one of --csv, --nagios and --optformat can be used");
1979 		}
1980 	}
1981 
1982 	/* --compact */
1983 	if (runtime_options.compact == true)
1984 	{
1985 		switch (action)
1986 		{
1987 			case CLUSTER_SHOW:
1988 			case CLUSTER_EVENT:
1989 			case SERVICE_STATUS:
1990 				break;
1991 			default:
1992 				item_list_append_format(&cli_warnings,
1993 										_("--compact is not effective when executing %s"),
1994 										action_name(action));
1995 		}
1996 	}
1997 
1998 	/* --detail */
1999 	if (runtime_options.detail == true)
2000 	{
2001 		switch (action)
2002 		{
2003 			case SERVICE_STATUS:
2004 				break;
2005 			default:
2006 				item_list_append_format(&cli_warnings,
2007 										_("--detail is not effective when executing %s"),
2008 										action_name(action));
2009 		}
2010 	}
2011 
2012 	/* --siblings-follow */
2013 	if (runtime_options.siblings_follow == true)
2014 	{
2015 		switch (action)
2016 		{
2017 			case STANDBY_PROMOTE:
2018 			case STANDBY_SWITCHOVER:
2019 				break;
2020 			default:
2021 				item_list_append_format(&cli_warnings,
2022 										_("----siblings-follow is not effective when executing %s"),
2023 										action_name(action));
2024 		}
2025 	}
2026 
2027 	/* --disable-wal-receiver / --enable-wal-receiver */
2028 	if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true)
2029 	{
2030 		switch (action)
2031 		{
2032 			case NODE_CONTROL:
2033 			{
2034 				if (runtime_options.disable_wal_receiver == true && runtime_options.enable_wal_receiver == true)
2035 				{
2036 						item_list_append(&cli_errors,
2037 										 _("provide either --disable-wal-receiver or --enable-wal-receiver"));
2038 				}
2039 			}
2040 				break;
2041 			default:
2042 					item_list_append_format(&cli_warnings,
2043 											_("--disable-wal-receiver / --enable-wal-receiver not effective when executing %s"),
2044 											action_name(action));
2045 		}
2046 	}
2047 
2048 }
2049 
2050 
2051 /*
2052  * Generate formatted node status output for display by "cluster show" and
2053  * "service status".
2054  */
2055 bool
format_node_status(t_node_info * node_info,PQExpBufferData * node_status,PQExpBufferData * upstream,ItemList * warnings)2056 format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBufferData *upstream, ItemList *warnings)
2057 {
2058 	bool error_found = false;
2059 	t_node_info remote_node_rec = T_NODE_INFO_INITIALIZER;
2060 	RecordStatus remote_node_rec_found = RECORD_NOT_FOUND;
2061 
2062 	if (PQstatus(node_info->conn) == CONNECTION_OK)
2063 	{
2064 		node_info->node_status = NODE_STATUS_UP;
2065 		node_info->recovery_type = get_recovery_type(node_info->conn);
2066 		/* get node's copy of its record so we can see what it thinks its status is */
2067 		remote_node_rec_found = get_node_record_with_upstream(node_info->conn, node_info->node_id, &remote_node_rec);
2068 	}
2069 	else
2070 	{
2071 		/* check if node is reachable, but just not letting us in */
2072 		if (is_server_available_quiet(node_info->conninfo))
2073 			node_info->node_status = NODE_STATUS_REJECTED;
2074 		else
2075 			node_info->node_status = NODE_STATUS_DOWN;
2076 
2077 		node_info->recovery_type = RECTYPE_UNKNOWN;
2078 	}
2079 
2080 	/* format node status info */
2081 	switch (node_info->type)
2082 	{
2083 		case PRIMARY:
2084 		{
2085 			/* node is reachable */
2086 			if (node_info->node_status == NODE_STATUS_UP)
2087 			{
2088 				if (node_info->active == true)
2089 				{
2090 					switch (node_info->recovery_type)
2091 					{
2092 						case RECTYPE_PRIMARY:
2093 							appendPQExpBufferStr(node_status, "* running");
2094 							break;
2095 						case RECTYPE_STANDBY:
2096 							appendPQExpBufferStr(node_status, "! running as standby");
2097 							item_list_append_format(warnings,
2098 													"node \"%s\" (ID: %i) is registered as primary but running as standby",
2099 													node_info->node_name, node_info->node_id);
2100 							break;
2101 						case RECTYPE_UNKNOWN:
2102 							appendPQExpBufferStr(node_status, "! unknown");
2103 							item_list_append_format(warnings,
2104 													"node \"%s\" (ID: %i) has unknown replication status",
2105 													node_info->node_name, node_info->node_id);
2106 							break;
2107 					}
2108 				}
2109 				else
2110 				{
2111 					if (node_info->recovery_type == RECTYPE_PRIMARY)
2112 					{
2113 						appendPQExpBufferStr(node_status, "! running");
2114 						item_list_append_format(warnings,
2115 												"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
2116 												node_info->node_name, node_info->node_id);
2117 					}
2118 					else
2119 					{
2120 						appendPQExpBufferStr(node_status, "! running as standby");
2121 						item_list_append_format(warnings,
2122 												"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
2123 												node_info->node_name, node_info->node_id);
2124 					}
2125 				}
2126 			}
2127 			/* node is up but cannot connect */
2128 			else if (node_info->node_status == NODE_STATUS_REJECTED)
2129 			{
2130 				if (node_info->active == true)
2131 				{
2132 					appendPQExpBufferStr(node_status, "? running");
2133 				}
2134 				else
2135 				{
2136 					appendPQExpBufferStr(node_status, "! running");
2137 					error_found = true;
2138 				}
2139 			}
2140 			/* node is unreachable */
2141 			else
2142 			{
2143 				/* node is unreachable but marked active */
2144 				if (node_info->active == true)
2145 				{
2146 					appendPQExpBufferStr(node_status, "? unreachable");
2147 					item_list_append_format(warnings,
2148 											"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
2149 											node_info->node_name, node_info->node_id);
2150 				}
2151 				/* node is unreachable and marked as inactive */
2152 				else
2153 				{
2154 					appendPQExpBufferStr(node_status, "- failed");
2155 					error_found = true;
2156 				}
2157 			}
2158 		}
2159 		break;
2160 		case STANDBY:
2161 		{
2162 			/* node is reachable */
2163 			if (node_info->node_status == NODE_STATUS_UP)
2164 			{
2165 				if (node_info->active == true)
2166 				{
2167 					switch (node_info->recovery_type)
2168 					{
2169 						case RECTYPE_STANDBY:
2170 							appendPQExpBufferStr(node_status, "  running");
2171 							break;
2172 						case RECTYPE_PRIMARY:
2173 							appendPQExpBufferStr(node_status, "! running as primary");
2174 							item_list_append_format(warnings,
2175 													"node \"%s\" (ID: %i) is registered as standby but running as primary",
2176 													node_info->node_name, node_info->node_id);
2177 							break;
2178 						case RECTYPE_UNKNOWN:
2179 							appendPQExpBufferStr(node_status, "! unknown");
2180 							item_list_append_format(
2181 								warnings,
2182 								"node \"%s\" (ID: %i) has unknown replication status",
2183 								node_info->node_name, node_info->node_id);
2184 							break;
2185 					}
2186 				}
2187 				else
2188 				{
2189 					if (node_info->recovery_type == RECTYPE_STANDBY)
2190 					{
2191 						appendPQExpBufferStr(node_status, "! running");
2192 						item_list_append_format(warnings,
2193 												"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
2194 												node_info->node_name, node_info->node_id);
2195 					}
2196 					else
2197 					{
2198 						appendPQExpBufferStr(node_status, "! running as primary");
2199 						item_list_append_format(warnings,
2200 												"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
2201 												node_info->node_name, node_info->node_id);
2202 					}
2203 				}
2204 
2205 				/* warn about issue with paused WAL replay */
2206 				if (is_wal_replay_paused(node_info->conn, true))
2207 				{
2208 					item_list_append_format(warnings,
2209 											_("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
2210 											node_info->node_name, node_info->node_id);
2211 				}
2212 			}
2213 			/* node is up but cannot connect */
2214 			else if (node_info->node_status == NODE_STATUS_REJECTED)
2215 			{
2216 				if (node_info->active == true)
2217 				{
2218 					appendPQExpBufferStr(node_status, "? running");
2219 				}
2220 				else
2221 				{
2222 					appendPQExpBufferStr(node_status, "! running");
2223 					error_found = true;
2224 				}
2225 			}
2226 			/* node is unreachable */
2227 			else
2228 			{
2229 				/* node is unreachable but marked active */
2230 				if (node_info->active == true)
2231 				{
2232 					appendPQExpBufferStr(node_status, "? unreachable");
2233 					item_list_append_format(warnings,
2234 											"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
2235 											node_info->node_name, node_info->node_id);
2236 				}
2237 				else
2238 				{
2239 					appendPQExpBufferStr(node_status, "- failed");
2240 					error_found = true;
2241 				}
2242 			}
2243 		}
2244 
2245 		break;
2246 		case WITNESS:
2247 		{
2248 			/* node is reachable */
2249 			if (node_info->node_status == NODE_STATUS_UP)
2250 			{
2251 				if (node_info->active == true)
2252 				{
2253 					appendPQExpBufferStr(node_status, "* running");
2254 				}
2255 				else
2256 				{
2257 					appendPQExpBufferStr(node_status, "! running");
2258 					error_found = true;
2259 				}
2260 			}
2261 			/* node is up but cannot connect */
2262 			else if (node_info->node_status == NODE_STATUS_REJECTED)
2263 			{
2264 				if (node_info->active == true)
2265 				{
2266 					appendPQExpBufferStr(node_status, "? rejected");
2267 				}
2268 				else
2269 				{
2270 					appendPQExpBufferStr(node_status, "! failed");
2271 					error_found = true;
2272 				}
2273 			}
2274 			/* node is unreachable */
2275 			else
2276 			{
2277 				if (node_info->active == true)
2278 				{
2279 					appendPQExpBufferStr(node_status, "? unreachable");
2280 				}
2281 				else
2282 				{
2283 					appendPQExpBufferStr(node_status, "- failed");
2284 					error_found = true;
2285 				}
2286 			}
2287 		}
2288 		break;
2289 		case UNKNOWN:
2290 		{
2291 			/* this should never happen */
2292 			appendPQExpBufferStr(node_status, "? unknown node type");
2293 			error_found = true;
2294 		}
2295 		break;
2296 	}
2297 
2298 	/* format node upstream info */
2299 
2300 	if (remote_node_rec_found == RECORD_NOT_FOUND)
2301 	{
2302 		/*
2303 		 * Unable to retrieve the node's copy of its own record - copy the
2304 		 * name from our own copy of the record
2305 		 */
2306 		appendPQExpBuffer(upstream,
2307 						  "? %s",
2308 						  node_info->upstream_node_name);
2309 	}
2310 	else if (remote_node_rec.type == WITNESS)
2311 	{
2312 		/* no upstream - unlikely to happen */
2313 		if (remote_node_rec.upstream_node_id == NO_UPSTREAM_NODE)
2314 		{
2315 			appendPQExpBufferStr(upstream, "! ");
2316 			item_list_append_format(warnings,
2317 									"node \"%s\" (ID: %i) is a witness but reports it has no upstream node",
2318 									node_info->node_name,
2319 									node_info->node_id);
2320 		}
2321 		/* mismatch between reported upstream and upstream in local node's metadata */
2322 		else if (node_info->upstream_node_id != remote_node_rec.upstream_node_id)
2323 		{
2324 			appendPQExpBufferStr(upstream, "! ");
2325 
2326 			if (node_info->upstream_node_id != remote_node_rec.upstream_node_id)
2327 			{
2328 				item_list_append_format(warnings,
2329 										"node \"%s\" (ID: %i) reports a different upstream (reported: \"%s\", expected \"%s\")",
2330 										node_info->node_name,
2331 										node_info->node_id,
2332 										remote_node_rec.upstream_node_name,
2333 										node_info->upstream_node_name);
2334 			}
2335 		}
2336 		else
2337 		{
2338 			t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
2339 			RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
2340 																   node_info->upstream_node_id,
2341 																   &upstream_node_rec);
2342 
2343 			if (upstream_node_rec_found != RECORD_FOUND)
2344 			{
2345 				appendPQExpBufferStr(upstream, "? ");
2346 				item_list_append_format(warnings,
2347 										"unable to find record for upstream node ID %i",
2348 										node_info->upstream_node_id);
2349 
2350 			}
2351 			else
2352 			{
2353 				PGconn *upstream_conn = establish_db_connection_quiet(upstream_node_rec.conninfo);
2354 
2355 				if (PQstatus(upstream_conn) != CONNECTION_OK)
2356 				{
2357 					appendPQExpBufferStr(upstream, "? ");
2358 					item_list_append_format(warnings,
2359 											"unable to connect to node \"%s\" (ID: %i)'s upstream node \"%s\" (ID: %i)",
2360 											node_info->node_name,
2361 											node_info->node_id,
2362 											upstream_node_rec.node_name,
2363 											upstream_node_rec.node_id);
2364 				}
2365 
2366 				PQfinish(upstream_conn);
2367 			}
2368 		}
2369 
2370 		appendPQExpBufferStr(upstream,
2371 							 remote_node_rec.upstream_node_name);
2372 
2373 	}
2374 	else if (remote_node_rec.type == STANDBY)
2375 	{
2376 		if (node_info->upstream_node_id != NO_UPSTREAM_NODE && node_info->upstream_node_id == remote_node_rec.upstream_node_id)
2377 		{
2378 			/*
2379 			 * expected and reported upstreams match - check if node is actually
2380 			 * connected to the upstream
2381 			 */
2382 			NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
2383 			char *replication_state = NULL;
2384 			t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
2385 			RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
2386 																   node_info->upstream_node_id,
2387 																   &upstream_node_rec);
2388 
2389 			if (upstream_node_rec_found != RECORD_FOUND)
2390 			{
2391 				item_list_append_format(warnings,
2392 										"unable to find record for upstream node ID %i",
2393 										node_info->upstream_node_id);
2394 
2395 			}
2396 			else
2397 			{
2398 				PGconn *upstream_conn = establish_db_connection_quiet(upstream_node_rec.conninfo);
2399 
2400 				if (PQstatus(upstream_conn) != CONNECTION_OK)
2401 				{
2402 					item_list_append_format(warnings,
2403 											"unable to connect to node \"%s\" (ID: %i)'s upstream node \"%s\" (ID: %i)",
2404 											node_info->node_name,
2405 											node_info->node_id,
2406 											upstream_node_rec.node_name,
2407 											upstream_node_rec.node_id);
2408 				}
2409 				else
2410 				{
2411 					attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name, &replication_state);
2412 				}
2413 
2414 				PQfinish(upstream_conn);
2415 			}
2416 
2417 			if (attached_to_upstream == NODE_ATTACHED_UNKNOWN)
2418 			{
2419 				appendPQExpBufferStr(upstream, "? ");
2420 				item_list_append_format(warnings,
2421 										"unable to determine if node \"%s\" (ID: %i) is attached to its upstream node \"%s\" (ID: %i)",
2422 										node_info->node_name,
2423 										node_info->node_id,
2424 										upstream_node_rec.node_name,
2425 										upstream_node_rec.node_id);
2426 			}
2427 			if (attached_to_upstream == NODE_NOT_ATTACHED)
2428 			{
2429 				appendPQExpBufferStr(upstream, "? ");
2430 				item_list_append_format(warnings,
2431 										"node \"%s\" (ID: %i) attached to its upstream node \"%s\" (ID: %i) in state \"%s\"",
2432 										node_info->node_name,
2433 										node_info->node_id,
2434 										upstream_node_rec.node_name,
2435 										upstream_node_rec.node_id,
2436 										replication_state);
2437 			}
2438 
2439 			else if (attached_to_upstream == NODE_DETACHED)
2440 			{
2441 				appendPQExpBufferStr(upstream, "! ");
2442 				item_list_append_format(warnings,
2443 										"node \"%s\" (ID: %i) is not attached to its upstream node \"%s\" (ID: %i)",
2444 										node_info->node_name,
2445 										node_info->node_id,
2446 										upstream_node_rec.node_name,
2447 										upstream_node_rec.node_id);
2448 			}
2449 			appendPQExpBufferStr(upstream,
2450 								 node_info->upstream_node_name);
2451 
2452 		}
2453 		else
2454 		{
2455 			if (node_info->upstream_node_id != NO_UPSTREAM_NODE && remote_node_rec.upstream_node_id == NO_UPSTREAM_NODE)
2456 			{
2457 				appendPQExpBufferChar(upstream, '!');
2458 				item_list_append_format(warnings,
2459 										"node \"%s\" (ID: %i) reports it has no upstream (expected: \"%s\")",
2460 										node_info->node_name,
2461 										node_info->node_id,
2462 										node_info->upstream_node_name);
2463 			}
2464 			else if (node_info->upstream_node_id != NO_UPSTREAM_NODE && remote_node_rec.upstream_node_id != NO_UPSTREAM_NODE)
2465 
2466 			{
2467 				appendPQExpBuffer(upstream,
2468 								  "! %s", remote_node_rec.upstream_node_name);
2469 				item_list_append_format(warnings,
2470 										"node \"%s\" (ID: %i) reports a different upstream (reported: \"%s\", expected \"%s\")",
2471 										node_info->node_name,
2472 										node_info->node_id,
2473 										remote_node_rec.upstream_node_name,
2474 										node_info->upstream_node_name);
2475 			}
2476 		}
2477 	}
2478 
2479 	return error_found;
2480 }
2481 
2482 
2483 static const char *
action_name(const int action)2484 action_name(const int action)
2485 {
2486 	switch (action)
2487 	{
2488 		case PRIMARY_REGISTER:
2489 			return "PRIMARY REGISTER";
2490 		case PRIMARY_UNREGISTER:
2491 			return "PRIMARY UNREGISTER";
2492 
2493 		case STANDBY_CLONE:
2494 			return "STANDBY CLONE";
2495 		case STANDBY_REGISTER:
2496 			return "STANDBY REGISTER";
2497 		case STANDBY_UNREGISTER:
2498 			return "STANDBY UNREGISTER";
2499 		case STANDBY_PROMOTE:
2500 			return "STANDBY PROMOTE";
2501 		case STANDBY_FOLLOW:
2502 			return "STANDBY FOLLOW";
2503 		case STANDBY_SWITCHOVER:
2504 			return "STANDBY SWITCHOVER";
2505 
2506 		case WITNESS_REGISTER:
2507 			return "WITNESS REGISTER";
2508 		case WITNESS_UNREGISTER:
2509 			return "WITNESS UNREGISTER";
2510 
2511 		case NODE_STATUS:
2512 			return "NODE STATUS";
2513 		case NODE_CHECK:
2514 			return "NODE CHECK";
2515 		case NODE_REJOIN:
2516 			return "NODE REJOIN";
2517 		case NODE_SERVICE:
2518 			return "NODE SERVICE";
2519 		case NODE_CONTROL:
2520 			return "NODE CONTROL";
2521 
2522 		case CLUSTER_SHOW:
2523 			return "CLUSTER SHOW";
2524 		case CLUSTER_CLEANUP:
2525 			return "CLUSTER CLEANUP";
2526 		case CLUSTER_EVENT:
2527 			return "CLUSTER EVENT";
2528 		case CLUSTER_MATRIX:
2529 			return "CLUSTER MATRIX";
2530 		case CLUSTER_CROSSCHECK:
2531 			return "CLUSTER CROSSCHECK";
2532 
2533 		case SERVICE_STATUS:
2534 			return "SERVICE STATUS";
2535 		case SERVICE_PAUSE:
2536 			return "SERVICE PAUSE";
2537 		case SERVICE_UNPAUSE:
2538 			return "SERVICE UNPAUSE";
2539 
2540 		case DAEMON_START:
2541 			return "DAEMON START";
2542 		case DAEMON_STOP:
2543 			return "DAEMON STOP";
2544 	}
2545 
2546 	return "UNKNOWN ACTION";
2547 }
2548 
2549 
2550 void
print_error_list(ItemList * error_list,int log_level)2551 print_error_list(ItemList *error_list, int log_level)
2552 {
2553 	ItemListCell *cell = NULL;
2554 
2555 	for (cell = error_list->head; cell; cell = cell->next)
2556 	{
2557 		switch (log_level)
2558 		{
2559 				/* Currently we only need errors and warnings */
2560 			case LOG_ERROR:
2561 				log_error("%s", cell->string);
2562 				break;
2563 			case LOG_WARNING:
2564 				log_warning("%s", cell->string);
2565 				break;
2566 		}
2567 	}
2568 }
2569 
2570 
2571 void
print_status_header(int cols,ColHeader * headers)2572 print_status_header(int cols, ColHeader *headers)
2573 {
2574 	int i, di;
2575 	int max_cols = 0;
2576 
2577 
2578 	/* count how many columns we actually need to display */
2579 	for (i = 0; i < cols; i++)
2580 	{
2581 		if (headers[i].display == true)
2582 			max_cols ++;
2583 	}
2584 
2585 	for (i = 0; i < cols; i++)
2586 	{
2587 		if (headers[i].display == false)
2588 			continue;
2589 
2590 		if (i == 0)
2591 			printf(" ");
2592 		else
2593 			printf(" | ");
2594 
2595 		printf("%-*s",
2596 			   headers[i].max_length,
2597 			   headers[i].title);
2598 	}
2599 
2600 
2601 	printf("\n");
2602 	printf("-");
2603 
2604 	di = 0;
2605 	for (i = 0; i < cols; i++)
2606 	{
2607 		int			j;
2608 
2609 		if (headers[i].display == false)
2610 			continue;
2611 
2612 		for (j = 0; j < headers[i].max_length; j++)
2613 			printf("-");
2614 
2615 		if (di < (max_cols - 1))
2616 			printf("-+-");
2617 		else
2618 			printf("-");
2619 		di++;
2620 	}
2621 
2622 	printf("\n");
2623 }
2624 
2625 
2626 void
print_help_header(void)2627 print_help_header(void)
2628 {
2629 	printf(_("%s: replication management tool for PostgreSQL\n"), progname());
2630 	puts("");
2631 
2632 	/* add a big friendly warning if root is executing "repmgr --help" */
2633 	if (geteuid() == 0)
2634 	{
2635 		printf(_("	**************************************************\n"));
2636 		printf(_("	*** repmgr must be executed by a non-superuser ***\n"));
2637 		printf(_("	**************************************************\n"));
2638 		puts("");
2639 	}
2640 }
2641 
2642 static void
do_help(void)2643 do_help(void)
2644 {
2645 	print_help_header();
2646 
2647 	printf(_("Usage:\n"));
2648 	printf(_("    %s [OPTIONS] primary {register|unregister}\n"), progname());
2649 	printf(_("    %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
2650 	printf(_("    %s [OPTIONS] node    {status|check|rejoin|service}\n"), progname());
2651 	printf(_("    %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
2652 	printf(_("    %s [OPTIONS] witness {register|unregister}\n"), progname());
2653 	printf(_("    %s [OPTIONS] service {status|pause|unpause}\n"), progname());
2654 	printf(_("    %s [OPTIONS] daemon  {start|stop}\n"), progname());
2655 
2656 	puts("");
2657 
2658 	printf(_("  Execute \"%s {primary|standby|node|cluster|witness|service} --help\" to see command-specific options\n"), progname());
2659 
2660 	puts("");
2661 
2662 	printf(_("General options:\n"));
2663 	printf(_("  -?, --help                          show this help, then exit\n"));
2664 	printf(_("  -V, --version                       output version information, then exit\n"));
2665 	printf(_("  --version-number                    output version number, then exit\n"));
2666 	puts("");
2667 
2668 	printf(_("General configuration options:\n"));
2669 	printf(_("  -b, --pg_bindir=PATH                path to PostgreSQL binaries (optional)\n"));
2670 	printf(_("  -f, --config-file=PATH              path to the repmgr configuration file\n"));
2671 	printf(_("  -F, --force                         force potentially dangerous operations to happen\n"));
2672 	puts("");
2673 
2674 	printf(_("Database connection options:\n"));
2675 	printf(_("  -d, --dbname=DBNAME                 database to connect to (default: "));
2676 	if (runtime_options.dbname[0] != '\0')
2677 		printf(_("\"%s\")\n"), runtime_options.dbname);
2678 	else
2679 		printf(_("\"%s\")\n"), runtime_options.username);
2680 
2681 	printf(_("  -h, --host=HOSTNAME                 database server host"));
2682 	if (runtime_options.host[0] != '\0')
2683 		printf(_(" (default: \"%s\")"), runtime_options.host);
2684 	printf(_("\n"));
2685 
2686 	printf(_("  -p, --port=PORT                     database server port (default: \"%s\")\n"), runtime_options.port);
2687 	printf(_("  -U, --username=USERNAME             database user name to connect as (default: \"%s\")\n"), runtime_options.username);
2688 
2689 	puts("");
2690 
2691 	printf(_("Node-specific options:\n"));
2692 	printf(_("  -D, --pgdata=DIR                    location of the node's data directory \n"));
2693 	printf(_("  --node-id                           specify a node by id (only available for some operations)\n"));
2694 	printf(_("  --node-name                         specify a node by name (only available for some operations)\n"));
2695 
2696 	puts("");
2697 
2698 	printf(_("Logging options:\n"));
2699 	printf(_("  --dry-run                           show what would happen for action, but don't execute it\n"));
2700 	printf(_("  -L, --log-level                     set log level (overrides configuration file; default: NOTICE)\n"));
2701 	printf(_("  --log-to-file                       log to file (or logging facility) defined in repmgr.conf\n"));
2702 	printf(_("  -q, --quiet                         suppress all log output apart from errors\n"));
2703 	printf(_("  -t, --terse                         don't display detail, hints and other non-critical output\n"));
2704 	printf(_("  -v, --verbose                       display additional log output (useful for debugging)\n"));
2705 
2706 	puts("");
2707 
2708 	printf(_("%s home page: <%s>\n"), "repmgr", REPMGR_URL);
2709 }
2710 
2711 
2712 /*
2713  * Create the repmgr extension, and grant access for the repmgr
2714  * user if not a superuser.
2715  *
2716  * Note:
2717  *   This is one of two places where superuser rights are required.
2718  *   We should also consider possible scenarious where a non-superuser
2719  *   has sufficient privileges to install the extension.
2720  */
2721 
2722 bool
create_repmgr_extension(PGconn * conn)2723 create_repmgr_extension(PGconn *conn)
2724 {
2725 	PQExpBufferData query;
2726 	PGresult   *res;
2727 
2728 	ExtensionStatus extension_status = REPMGR_UNKNOWN;
2729 
2730 	t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
2731 	bool		is_superuser = false;
2732 	PGconn	   *superuser_conn = NULL;
2733 	PGconn	   *schema_create_conn = NULL;
2734 	t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
2735 
2736 	extension_status = get_repmgr_extension_status(conn, &extversions);
2737 
2738 	switch (extension_status)
2739 	{
2740 		case REPMGR_UNKNOWN:
2741 			log_error(_("unable to determine status of \"repmgr\" extension"));
2742 			return false;
2743 
2744 		case REPMGR_UNAVAILABLE:
2745 			log_error(_("\"repmgr\" extension is not available"));
2746 			return false;
2747 
2748 		case REPMGR_OLD_VERSION_INSTALLED:
2749 			log_error(_("an older version of the \"repmgr\" extension is installed"));
2750 			log_detail(_("version %s is installed but newer version %s is available"),
2751 					   extversions.installed_version,
2752 					   extversions.default_version);
2753 			log_hint(_("update the installed extension version by executing \"ALTER EXTENSION repmgr UPDATE\""));
2754 			return false;
2755 
2756 		case REPMGR_INSTALLED:
2757 			log_info(_("\"repmgr\" extension is already installed"));
2758 			return true;
2759 
2760 		case REPMGR_AVAILABLE:
2761 			if (runtime_options.dry_run == true)
2762 			{
2763 				log_notice(_("would now attempt to install extension \"repmgr\""));
2764 			}
2765 			else
2766 			{
2767 				log_notice(_("attempting to install extension \"repmgr\""));
2768 			}
2769 			break;
2770 	}
2771 
2772 	/* 3. Attempt to get a superuser connection */
2773 
2774 	is_superuser = is_superuser_connection(conn, &userinfo);
2775 
2776 	get_superuser_connection(&conn, &superuser_conn, &schema_create_conn);
2777 
2778 	if (runtime_options.dry_run == true)
2779 		return true;
2780 
2781 	/* 4. Create extension */
2782 
2783 	res = PQexec(schema_create_conn, "CREATE EXTENSION repmgr");
2784 
2785 	if ((PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK))
2786 	{
2787 		log_error(_("unable to create \"repmgr\" extension:\n  %s"),
2788 				  PQerrorMessage(schema_create_conn));
2789 		log_hint(_("check that the provided user has sufficient privileges for CREATE EXTENSION"));
2790 
2791 		PQclear(res);
2792 		if (superuser_conn != NULL)
2793 			PQfinish(superuser_conn);
2794 		return false;
2795 	}
2796 
2797 	PQclear(res);
2798 
2799 	/* 5. If not superuser, grant usage */
2800 	if (is_superuser == false)
2801 	{
2802 		initPQExpBuffer(&query);
2803 
2804 		appendPQExpBuffer(&query,
2805 						  "GRANT USAGE ON SCHEMA repmgr TO %s",
2806 						  userinfo.username);
2807 
2808 		res = PQexec(schema_create_conn, query.data);
2809 		termPQExpBuffer(&query);
2810 
2811 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
2812 		{
2813 			log_error(_("unable to grant usage on \"repmgr\" extension to %s:\n  %s"),
2814 					  userinfo.username,
2815 					  PQerrorMessage(schema_create_conn));
2816 			PQclear(res);
2817 
2818 			if (superuser_conn != 0)
2819 				PQfinish(superuser_conn);
2820 
2821 			return false;
2822 		}
2823 
2824 		initPQExpBuffer(&query);
2825 
2826 		appendPQExpBuffer(&query,
2827 						  "GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s",
2828 						  userinfo.username);
2829 
2830 		res = PQexec(schema_create_conn, query.data);
2831 		termPQExpBuffer(&query);
2832 
2833 		if (PQresultStatus(res) != PGRES_COMMAND_OK)
2834 		{
2835 			log_error(_("unable to grant permission on tables on \"repmgr\" extension to %s:\n  %s"),
2836 					  userinfo.username,
2837 					  PQerrorMessage(schema_create_conn));
2838 			PQclear(res);
2839 
2840 			if (superuser_conn != NULL)
2841 				PQfinish(superuser_conn);
2842 
2843 			return false;
2844 		}
2845 	}
2846 
2847 	if (superuser_conn != NULL)
2848 		PQfinish(superuser_conn);
2849 
2850 	log_notice(_("\"repmgr\" extension successfully installed"));
2851 
2852 	create_event_notification(conn,
2853 							  &config_file_options,
2854 							  config_file_options.node_id,
2855 							  "cluster_created",
2856 							  true,
2857 							  NULL);
2858 
2859 	return true;
2860 }
2861 
2862 
2863 /**
2864  * check_server_version()
2865  *
2866  * Verify that the server is MIN_SUPPORTED_VERSION_NUM or later
2867  *
2868  * PGconn *conn:
2869  *	 the connection to check
2870  *
2871  * char *server_type:
2872  *	 either "primary" or "standby"; used to format error message
2873  *
2874  * bool exit_on_error:
2875  *	 exit if reported server version is too low; optional to enable some callers
2876  *	 to perform additional cleanup
2877  *
2878  * char *server_version_string
2879  *	 passed to get_server_version(), which will place the human-readable
2880  *	 server version string there (e.g. "9.4.0")
2881  */
2882 int
check_server_version(PGconn * conn,char * server_type,bool exit_on_error,char * server_version_string)2883 check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
2884 {
2885 	char		version_string[MAXVERSIONSTR] = "";
2886 	int			conn_server_version_num = get_server_version(conn, version_string);
2887 
2888 	/* Copy the version string, if the caller wants it */
2889 	if (server_version_string != NULL)
2890 		strncpy(server_version_string, version_string, MAXVERSIONSTR);
2891 
2892 	if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
2893 	{
2894 		if (conn_server_version_num > 0)
2895 		{
2896 			log_error(_("%s requires %s to be PostgreSQL %s or later"),
2897 					  progname(),
2898 					  server_type,
2899 					  MIN_SUPPORTED_VERSION);
2900 			log_detail(_("%s server version is %s"),
2901 					   server_type,
2902 					   version_string);
2903 		}
2904 
2905 		if (exit_on_error == true)
2906 		{
2907 			PQfinish(conn);
2908 			exit(ERR_BAD_CONFIG);
2909 		}
2910 
2911 		return UNKNOWN_SERVER_VERSION_NUM;
2912 	}
2913 
2914 	/*
2915 	 * If it's clear a particular repmgr feature branch won't be able to support
2916 	 * PostgreSQL from a particular PostgreSQL release onwards (e.g. 4.4 with PostgreSQL
2917 	 * 12 and later due to recovery.conf removal), set MAX_UNSUPPORTED_VERSION and
2918 	 * MAX_UNSUPPORTED_VERSION_NUM in "repmgr.h" to define the first PostgreSQL
2919 	 * version which can't be suppored.
2920 	 */
2921 #ifdef MAX_UNSUPPORTED_VERSION_NUM
2922 	if (conn_server_version_num >= MAX_UNSUPPORTED_VERSION_NUM)
2923 	{
2924 		if (conn_server_version_num > 0)
2925 		{
2926 			log_error(_("%s %s does not support PostgreSQL %s or later"),
2927 					  progname(),
2928 					  REPMGR_VERSION,
2929 					  MAX_UNSUPPORTED_VERSION);
2930 			log_detail(_("%s server version is %s"),
2931 					   server_type,
2932 					   version_string);
2933 			log_hint(_("For details of supported versions see: https://repmgr.org/docs/current/install-requirements.html#INSTALL-COMPATIBILITY-MATRIX"));
2934 		}
2935 
2936 		if (exit_on_error == true)
2937 		{
2938 			PQfinish(conn);
2939 			exit(ERR_BAD_CONFIG);
2940 		}
2941 
2942 		return UNKNOWN_SERVER_VERSION_NUM;
2943 	}
2944 #endif
2945 
2946 	return conn_server_version_num;
2947 }
2948 
2949 
2950 int
test_ssh_connection(char * host,char * remote_user)2951 test_ssh_connection(char *host, char *remote_user)
2952 {
2953 	char		script[MAXLEN] = "";
2954 	int			r = 1,
2955 				i;
2956 
2957 	/*
2958 	 * On some OS, true is located in a different place than in Linux we have
2959 	 * to try them all until all alternatives are gone or we found `true'
2960 	 * because the target OS may differ from the source OS
2961 	 */
2962 	const char *bin_true_paths[] = {
2963 		"/bin/true",
2964 		"/usr/bin/true",
2965 		NULL
2966 	};
2967 
2968 	for (i = 0; bin_true_paths[i] && r != 0; ++i)
2969 	{
2970 		if (!remote_user[0])
2971 			maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s %s 2>/dev/null",
2972 							config_file_options.ssh_options, host, bin_true_paths[i]);
2973 		else
2974 			maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s 2>/dev/null",
2975 							config_file_options.ssh_options, host, remote_user,
2976 							bin_true_paths[i]);
2977 
2978 		log_verbose(LOG_DEBUG, _("test_ssh_connection(): executing %s"), script);
2979 		r = system(script);
2980 	}
2981 
2982 	if (r != 0)
2983 		log_warning(_("unable to connect to remote host \"%s\" via SSH"), host);
2984 
2985 	return r;
2986 }
2987 
2988 
2989 
2990 
2991 /*
2992  * get_superuser_connection()
2993  *
2994  * Check if provided connection "conn" is a superuser connection, if not attempt to
2995  * make a superuser connection "superuser_conn" with the provided --superuser parameter.
2996  *
2997  * "privileged_conn" is set to whichever connection is the superuser connection.
2998  */
2999 void
get_superuser_connection(PGconn ** conn,PGconn ** superuser_conn,PGconn ** privileged_conn)3000 get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn)
3001 {
3002 	t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
3003 	t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
3004 	bool		is_superuser = false;
3005 
3006 	/* this should never happen */
3007 	if (PQstatus(*conn) != CONNECTION_OK)
3008 	{
3009 		log_error(_("no database connection available"));
3010 		log_detail("\n%s", PQerrorMessage(*conn));
3011 		exit(ERR_INTERNAL);
3012 	}
3013 
3014 	is_superuser = is_superuser_connection(*conn, &userinfo);
3015 
3016 	if (is_superuser == true)
3017 	{
3018 		*privileged_conn = *conn;
3019 
3020 		return;
3021 	}
3022 
3023 	if (runtime_options.superuser[0] == '\0')
3024 	{
3025 		log_error(_("\"%s\" is not a superuser and no superuser name supplied"), userinfo.username);
3026 		log_hint(_("supply a valid superuser name with -S/--superuser"));
3027 		PQfinish(*conn);
3028 		exit(ERR_BAD_CONFIG);
3029 	}
3030 
3031 	initialize_conninfo_params(&conninfo_params, false);
3032 	conn_to_param_list(*conn, &conninfo_params);
3033 	param_set(&conninfo_params, "user", runtime_options.superuser);
3034 
3035 	*superuser_conn = establish_db_connection_by_params(&conninfo_params, false);
3036 
3037 	if (PQstatus(*superuser_conn) != CONNECTION_OK)
3038 	{
3039 		log_error(_("unable to establish superuser connection as \"%s\""),
3040 				  runtime_options.superuser);
3041 
3042 		PQfinish(*conn);
3043 		exit(ERR_BAD_CONFIG);
3044 	}
3045 
3046 	/* check provided superuser really is superuser */
3047 	if (!is_superuser_connection(*superuser_conn, NULL))
3048 	{
3049 		log_error(_("\"%s\" is not a superuser"), runtime_options.superuser);
3050 		PQfinish(*superuser_conn);
3051 		PQfinish(*conn);
3052 		exit(ERR_BAD_CONFIG);
3053 	}
3054 
3055 	log_debug("established superuser connection as \"%s\"", runtime_options.superuser);
3056 
3057 	*privileged_conn = *superuser_conn;
3058 	return;
3059 }
3060 
3061 
3062 standy_clone_mode
get_standby_clone_mode(void)3063 get_standby_clone_mode(void)
3064 {
3065 	standy_clone_mode mode;
3066 
3067 	if (*config_file_options.barman_host != '\0' && runtime_options.without_barman == false)
3068 		mode = barman;
3069 	else
3070 		mode = pg_basebackup;
3071 
3072 	return mode;
3073 }
3074 
3075 
3076 void
make_pg_path(PQExpBufferData * buf,const char * file)3077 make_pg_path(PQExpBufferData *buf, const char *file)
3078 {
3079 	appendPQExpBuffer(buf, "%s%s",
3080 					  pg_bindir, file);
3081 }
3082 
3083 
3084 int
copy_remote_files(char * host,char * remote_user,char * remote_path,char * local_path,bool is_directory,int server_version_num)3085 copy_remote_files(char *host, char *remote_user, char *remote_path,
3086 				  char *local_path, bool is_directory, int server_version_num)
3087 {
3088 	PQExpBufferData rsync_flags;
3089 	char		script[MAXLEN] = "";
3090 	char		host_string[MAXLEN] = "";
3091 	int			r = 0;
3092 
3093 	initPQExpBuffer(&rsync_flags);
3094 
3095 	if (*config_file_options.rsync_options == '\0')
3096 	{
3097 		appendPQExpBufferStr(&rsync_flags,
3098 							 "--archive --checksum --compress --progress --rsh=ssh");
3099 	}
3100 	else
3101 	{
3102 		appendPQExpBufferStr(&rsync_flags,
3103 							 config_file_options.rsync_options);
3104 	}
3105 
3106 	if (runtime_options.force)
3107 	{
3108 		appendPQExpBufferStr(&rsync_flags,
3109 							 " --delete --checksum");
3110 	}
3111 
3112 	if (!remote_user[0])
3113 	{
3114 		maxlen_snprintf(host_string, "%s", host);
3115 	}
3116 	else
3117 	{
3118 		maxlen_snprintf(host_string, "%s@%s", remote_user, host);
3119 	}
3120 
3121 	/*
3122 	 * When copying the main PGDATA directory, certain files and contents of
3123 	 * certain directories need to be excluded.
3124 	 *
3125 	 * See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
3126 	 * we're basically simulating what pg_basebackup does, but with rsync
3127 	 * rather than the BASEBACKUP replication protocol command.
3128 	 *
3129 	 * *However* currently we'll always copy the contents of the 'pg_replslot'
3130 	 * directory and delete later if appropriate.
3131 	 */
3132 	if (is_directory)
3133 	{
3134 		/* Files which we don't want */
3135 		appendPQExpBufferStr(&rsync_flags,
3136 							 " --exclude=postmaster.pid --exclude=postmaster.opts --exclude=global/pg_control");
3137 
3138 		appendPQExpBufferStr(&rsync_flags,
3139 							 " --exclude=recovery.conf --exclude=recovery.done");
3140 
3141 		/*
3142 		 * Ideally we'd use PG_AUTOCONF_FILENAME from utils/guc.h, but
3143 		 * that has too many dependencies for a mere client program.
3144 		 */
3145 		appendPQExpBuffer(&rsync_flags, " --exclude=%s.tmp",
3146 						  PG_AUTOCONF_FILENAME);
3147 
3148 		/* Temporary files which we don't want, if they exist */
3149 		appendPQExpBuffer(&rsync_flags, " --exclude=%s*",
3150 						  PG_TEMP_FILE_PREFIX);
3151 
3152 		/* Directories which we don't want */
3153 
3154 		if (server_version_num >= 100000)
3155 		{
3156 			appendPQExpBufferStr(&rsync_flags,
3157 								 " --exclude=pg_wal/* --exclude=log/*");
3158 		}
3159 		else
3160 		{
3161 			appendPQExpBufferStr(&rsync_flags,
3162 								 " --exclude=pg_xlog/* --exclude=pg_log/*");
3163 		}
3164 
3165 		appendPQExpBufferStr(&rsync_flags,
3166 							 " --exclude=pg_stat_tmp/*");
3167 
3168 		maxlen_snprintf(script, "rsync %s %s:%s/* %s",
3169 						rsync_flags.data, host_string, remote_path, local_path);
3170 	}
3171 	else
3172 	{
3173 		maxlen_snprintf(script, "rsync %s %s:%s %s",
3174 						rsync_flags.data, host_string, remote_path, local_path);
3175 	}
3176 
3177 	termPQExpBuffer(&rsync_flags);
3178 
3179 	log_info(_("rsync command line:\n  %s"), script);
3180 
3181 	r = system(script);
3182 
3183 	log_debug("copy_remote_files(): r = %i; WIFEXITED: %i; WEXITSTATUS: %i", r, WIFEXITED(r), WEXITSTATUS(r));
3184 
3185 	/* exit code 24 indicates vanished files, which isn't a problem for us */
3186 	if (WIFEXITED(r) && WEXITSTATUS(r) && WEXITSTATUS(r) != 24)
3187 		log_verbose(LOG_WARNING, "copy_remote_files(): rsync returned unexpected exit status %i", WEXITSTATUS(r));
3188 
3189 	return r;
3190 }
3191 
3192 
3193 
3194 
3195 void
make_remote_repmgr_path(PQExpBufferData * output_buf,t_node_info * remote_node_record)3196 make_remote_repmgr_path(PQExpBufferData *output_buf, t_node_info *remote_node_record)
3197 {
3198 	if (config_file_options.repmgr_bindir[0] != '\0')
3199 	{
3200 		int			len = strlen(config_file_options.repmgr_bindir);
3201 
3202 		appendPQExpBufferStr(output_buf,
3203 							 config_file_options.repmgr_bindir);
3204 
3205 		/* Add trailing slash */
3206 		if (config_file_options.repmgr_bindir[len - 1] != '/')
3207 		{
3208 			appendPQExpBufferChar(output_buf, '/');
3209 		}
3210 	}
3211 	else if (pg_bindir[0] != '\0')
3212 	{
3213 		appendPQExpBufferStr(output_buf,
3214 							 pg_bindir);
3215 	}
3216 
3217 	appendPQExpBuffer(output_buf,
3218 					  "%s -f %s ",
3219 					  progname(),
3220 					  remote_node_record->config_file);
3221 
3222 	/*
3223 	 * If --log-level was explicitly supplied, pass that through
3224 	 * to the remote repmgr client too.
3225 	 */
3226 	if (runtime_options.log_level[0] != '\0')
3227 	{
3228 		appendPQExpBuffer(output_buf,
3229 						  " -L %s ",
3230 						  runtime_options.log_level);
3231 	}
3232 
3233 }
3234 
3235 
3236 void
make_repmgrd_path(PQExpBufferData * output_buf)3237 make_repmgrd_path(PQExpBufferData *output_buf)
3238 {
3239 	if (config_file_options.repmgr_bindir[0] != '\0')
3240 	{
3241 		int			len = strlen(config_file_options.repmgr_bindir);
3242 
3243 		appendPQExpBufferStr(output_buf,
3244 							 config_file_options.repmgr_bindir);
3245 
3246 		/* Add trailing slash */
3247 		if (config_file_options.repmgr_bindir[len - 1] != '/')
3248 		{
3249 			appendPQExpBufferChar(output_buf, '/');
3250 		}
3251 	}
3252 	else if (pg_bindir[0] != '\0')
3253 	{
3254 		appendPQExpBufferStr(output_buf,
3255 							 pg_bindir);
3256 	}
3257 
3258 	appendPQExpBuffer(output_buf,
3259 					  "repmgrd -f %s ",
3260 					  config_file_path);
3261 }
3262 
3263 
3264 /* ======================== */
3265 /* server control functions */
3266 /* ======================== */
3267 
3268 void
get_server_action(t_server_action action,char * script,char * data_dir)3269 get_server_action(t_server_action action, char *script, char *data_dir)
3270 {
3271 	PQExpBufferData command;
3272 
3273 	if (data_dir == NULL || data_dir[0] == '\0')
3274 		data_dir = "(none provided)";
3275 
3276 	switch (action)
3277 	{
3278 		case ACTION_NONE:
3279 			script[0] = '\0';
3280 			return;
3281 
3282 		case ACTION_START:
3283 			{
3284 				if (config_file_options.service_start_command[0] != '\0')
3285 				{
3286 					maxlen_snprintf(script, "%s",
3287 									config_file_options.service_start_command);
3288 				}
3289 				else
3290 				{
3291 					initPQExpBuffer(&command);
3292 
3293 					make_pg_path(&command, "pg_ctl");
3294 
3295 					appendPQExpBuffer(&command,
3296 									  " %s -w -D ",
3297 									  config_file_options.pg_ctl_options);
3298 
3299 					appendShellString(&command,
3300 									  data_dir);
3301 
3302 					appendPQExpBuffer(&command,
3303 									  " start");
3304 
3305 					strncpy(script, command.data, MAXLEN);
3306 
3307 					termPQExpBuffer(&command);
3308 				}
3309 
3310 				return;
3311 			}
3312 
3313 		case ACTION_STOP:
3314 		case ACTION_STOP_WAIT:
3315 			{
3316 				if (config_file_options.service_stop_command[0] != '\0')
3317 				{
3318 					maxlen_snprintf(script, "%s",
3319 									config_file_options.service_stop_command);
3320 				}
3321 				else
3322 				{
3323 					initPQExpBuffer(&command);
3324 					make_pg_path(&command, "pg_ctl");
3325 
3326 					appendPQExpBuffer(&command,
3327 									  " %s -D ",
3328 									  config_file_options.pg_ctl_options);
3329 
3330 					appendShellString(&command,
3331 									  data_dir);
3332 
3333 					if (action == ACTION_STOP_WAIT)
3334 						appendPQExpBuffer(&command,
3335 										  " -w");
3336 					else
3337 						appendPQExpBuffer(&command,
3338 										  " -W");
3339 
3340 					appendPQExpBuffer(&command,
3341 									  " -m fast stop");
3342 
3343 					strncpy(script, command.data, MAXLEN);
3344 
3345 					termPQExpBuffer(&command);
3346 				}
3347 				return;
3348 			}
3349 
3350 		case ACTION_RESTART:
3351 			{
3352 				if (config_file_options.service_restart_command[0] != '\0')
3353 				{
3354 					maxlen_snprintf(script, "%s",
3355 									config_file_options.service_restart_command);
3356 				}
3357 				else
3358 				{
3359 					initPQExpBuffer(&command);
3360 
3361 					make_pg_path(&command, "pg_ctl");
3362 
3363 					appendPQExpBuffer(&command,
3364 									  " %s -w -D ",
3365 									  config_file_options.pg_ctl_options);
3366 
3367 					appendShellString(&command,
3368 									  data_dir);
3369 
3370 					appendPQExpBuffer(&command,
3371 									  " restart");
3372 
3373 					strncpy(script, command.data, MAXLEN);
3374 
3375 					termPQExpBuffer(&command);
3376 				}
3377 				return;
3378 			}
3379 
3380 		case ACTION_RELOAD:
3381 			{
3382 				if (config_file_options.service_reload_command[0] != '\0')
3383 				{
3384 					maxlen_snprintf(script, "%s",
3385 									config_file_options.service_reload_command);
3386 				}
3387 				else
3388 				{
3389 					initPQExpBuffer(&command);
3390 
3391 					make_pg_path(&command, "pg_ctl");
3392 
3393 					appendPQExpBuffer(&command,
3394 									  " %s -w -D ",
3395 									  config_file_options.pg_ctl_options);
3396 
3397 					appendShellString(&command,
3398 									  data_dir);
3399 
3400 					appendPQExpBuffer(&command,
3401 									  " reload");
3402 
3403 					strncpy(script, command.data, MAXLEN);
3404 
3405 					termPQExpBuffer(&command);
3406 
3407 				}
3408 				return;
3409 			}
3410 
3411 		case ACTION_PROMOTE:
3412 			{
3413 				if (config_file_options.service_promote_command[0] != '\0')
3414 				{
3415 					maxlen_snprintf(script, "%s",
3416 									config_file_options.service_promote_command);
3417 				}
3418 				else
3419 				{
3420 					initPQExpBuffer(&command);
3421 
3422 					make_pg_path(&command, "pg_ctl");
3423 
3424 					appendPQExpBuffer(&command,
3425 									  " %s -w -D ",
3426 									  config_file_options.pg_ctl_options);
3427 
3428 					appendShellString(&command,
3429 									  data_dir);
3430 
3431 					appendPQExpBuffer(&command,
3432 									  " promote");
3433 
3434 					strncpy(script, command.data, MAXLEN);
3435 
3436 					termPQExpBuffer(&command);
3437 				}
3438 				return;
3439 			}
3440 
3441 		default:
3442 			return;
3443 	}
3444 
3445 	return;
3446 }
3447 
3448 
3449 bool
data_dir_required_for_action(t_server_action action)3450 data_dir_required_for_action(t_server_action action)
3451 {
3452 	switch (action)
3453 	{
3454 		case ACTION_NONE:
3455 			return false;
3456 
3457 		case ACTION_START:
3458 			if (config_file_options.service_start_command[0] != '\0')
3459 			{
3460 				return false;
3461 			}
3462 			return true;
3463 
3464 		case ACTION_STOP:
3465 		case ACTION_STOP_WAIT:
3466 			if (config_file_options.service_stop_command[0] != '\0')
3467 			{
3468 				return false;
3469 			}
3470 			return true;
3471 
3472 		case ACTION_RESTART:
3473 			if (config_file_options.service_restart_command[0] != '\0')
3474 			{
3475 				return false;
3476 			}
3477 			return true;
3478 
3479 		case ACTION_RELOAD:
3480 			if (config_file_options.service_reload_command[0] != '\0')
3481 			{
3482 				return false;
3483 			}
3484 			return true;
3485 
3486 		case ACTION_PROMOTE:
3487 			if (config_file_options.service_promote_command[0] != '\0')
3488 			{
3489 				return false;
3490 			}
3491 			return true;
3492 
3493 		default:
3494 			return false;
3495 	}
3496 
3497 	return false;
3498 }
3499 
3500 
3501 /*
3502  * Copy the location of the configuration file directory into the
3503  * provided buffer; if "config_directory" provided, use that, otherwise
3504  * default to the data directory.
3505  *
3506  * This is primarily intended for use with "pg_ctl" (which itself shouldn't
3507  * be used outside of development environments).
3508  */
3509 void
get_node_config_directory(char * config_dir_buf)3510 get_node_config_directory(char *config_dir_buf)
3511 {
3512 	if (config_file_options.config_directory[0] != '\0')
3513 	{
3514 		strncpy(config_dir_buf, config_file_options.config_directory, MAXPGPATH);
3515 		return;
3516 	}
3517 
3518 	if (config_file_options.data_directory[0] != '\0')
3519 	{
3520 		strncpy(config_dir_buf, config_file_options.data_directory, MAXPGPATH);
3521 		return;
3522 	}
3523 
3524 	return;
3525 }
3526 
3527 
3528 void
get_node_data_directory(char * data_dir_buf)3529 get_node_data_directory(char *data_dir_buf)
3530 {
3531 	/*
3532 	 * the configuration file setting has priority, and will always be set
3533 	 * when a configuration file was provided
3534 	 */
3535 	if (config_file_options.data_directory[0] != '\0')
3536 	{
3537 		strncpy(data_dir_buf, config_file_options.data_directory, MAXPGPATH);
3538 		return;
3539 	}
3540 
3541 	if (runtime_options.data_dir[0] != '\0')
3542 	{
3543 		strncpy(data_dir_buf, runtime_options.data_dir, MAXPGPATH);
3544 		return;
3545 	}
3546 
3547 	return;
3548 }
3549 
3550 
3551 /*
3552  * initialise a node record from the provided configuration
3553  * parameters
3554  */
3555 void
init_node_record(t_node_info * node_record)3556 init_node_record(t_node_info *node_record)
3557 {
3558 	node_record->node_id = config_file_options.node_id;
3559 	node_record->upstream_node_id = runtime_options.upstream_node_id;
3560 	node_record->priority = config_file_options.priority;
3561 	node_record->active = true;
3562 
3563 	if (config_file_options.location[0] != '\0')
3564 		strncpy(node_record->location, config_file_options.location, MAXLEN);
3565 	else
3566 		strncpy(node_record->location, "default", MAXLEN);
3567 
3568 
3569 	strncpy(node_record->node_name, config_file_options.node_name, sizeof(node_record->node_name));
3570 	strncpy(node_record->conninfo, config_file_options.conninfo, MAXLEN);
3571 	strncpy(node_record->config_file, config_file_path, MAXPGPATH);
3572 
3573 	if (config_file_options.replication_user[0] != '\0')
3574 	{
3575 		/* replication user explicitly provided in configuration file */
3576 		strncpy(node_record->repluser, config_file_options.replication_user, NAMEDATALEN);
3577 	}
3578 	else
3579 	{
3580 		/* use the "user" value from "conninfo" */
3581 		char		repluser[MAXLEN] = "";
3582 
3583 		(void) get_conninfo_value(config_file_options.conninfo, "user", repluser);
3584 		strncpy(node_record->repluser, repluser, NAMEDATALEN);
3585 	}
3586 
3587 	if (config_file_options.use_replication_slots == true)
3588 	{
3589 		create_slot_name(node_record->slot_name, config_file_options.node_id);
3590 	}
3591 }
3592 
3593 
3594 bool
can_use_pg_rewind(PGconn * conn,const char * data_directory,PQExpBufferData * reason)3595 can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason)
3596 {
3597 	bool		can_use = true;
3598 
3599 	/* "full_page_writes" must be on in any case */
3600 	if (guc_set(conn, "full_page_writes", "=", "off"))
3601 	{
3602 		appendPQExpBuffer(reason,
3603 						  _("\"full_page_writes\" must be set to \"on\""));
3604 
3605 		can_use = false;
3606 	}
3607 
3608 	/*
3609 	 * "wal_log_hints" off - are data checksums available? Note: we're
3610 	 * checking the local pg_control file here as the value will be the same
3611 	 * throughout the cluster and saves a round-trip to the demotion
3612 	 * candidate.
3613 	 */
3614 	if (guc_set(conn, "wal_log_hints", "=", "on") == false)
3615 	{
3616 		int			data_checksum_version = get_data_checksum_version(data_directory);
3617 
3618 		if (data_checksum_version == UNKNOWN_DATA_CHECKSUM_VERSION)
3619 		{
3620 			if (can_use == false)
3621 				appendPQExpBuffer(reason, "; ");
3622 
3623 			appendPQExpBuffer(reason,
3624 							  _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version"));
3625 			can_use = false;
3626 		}
3627 		else if (data_checksum_version == 0)
3628 		{
3629 			if (can_use == false)
3630 				appendPQExpBuffer(reason, "; ");
3631 
3632 			appendPQExpBuffer(reason,
3633 							  _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled"));
3634 
3635 			can_use = false;
3636 		}
3637 	}
3638 
3639 	return can_use;
3640 }
3641 
3642 
3643 void
make_standby_signal_path(char * buf)3644 make_standby_signal_path(char *buf)
3645 {
3646 	snprintf(buf, MAXPGPATH,
3647 			 "%s/%s",
3648 			 config_file_options.data_directory,
3649 			 STANDBY_SIGNAL_FILE);
3650 }
3651 
3652 /*
3653  * create standby.signal (PostgreSQL 12 and later)
3654  */
3655 bool
write_standby_signal(void)3656 write_standby_signal(void)
3657 {
3658 	char	    standby_signal_file_path[MAXPGPATH] = "";
3659 	FILE	   *file;
3660 	mode_t		um;
3661 
3662 	make_standby_signal_path(standby_signal_file_path);
3663 
3664 	/* Set umask to 0600 */
3665 	um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
3666 	file = fopen(standby_signal_file_path, "w");
3667 	umask(um);
3668 
3669 	if (file == NULL)
3670 	{
3671 		log_error(_("unable to create %s file at \"%s\""),
3672 				  STANDBY_SIGNAL_FILE,
3673 				  standby_signal_file_path);
3674 		log_detail("%s", strerror(errno));
3675 
3676 		return false;
3677 	}
3678 
3679 	if (fputs("# created by repmgr\n", file) == EOF)
3680 	{
3681 		log_error(_("unable to write to %s file at \"%s\""),
3682 				  STANDBY_SIGNAL_FILE,
3683 				  standby_signal_file_path);
3684 		fclose(file);
3685 
3686 		return false;
3687 	}
3688 
3689 	fclose(file);
3690 
3691 	return true;
3692 }
3693 
3694 
3695 /*
3696  * NOTE:
3697  *  - the provided connection should be for the normal repmgr user
3698  *  - if upstream_node_record is not NULL, its "repluser" entry, if
3699  *    set, will be used as the fallback replication user
3700  */
3701 bool
create_replication_slot(PGconn * conn,char * slot_name,t_node_info * upstream_node_record,PQExpBufferData * error_msg)3702 create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg)
3703 {
3704 	PGconn *slot_conn = NULL;
3705 	bool use_replication_protocol = false;
3706 	bool success = true;
3707 	char *replication_user = NULL;
3708 
3709 	_determine_replication_slot_user(conn, upstream_node_record, &replication_user);
3710 	/*
3711 	 * If called in --dry-run context, if the replication slot user is not the
3712 	 * repmgr user, attempt to validate the connection.
3713 	 */
3714 	if (runtime_options.dry_run == true)
3715 	{
3716 		switch (ReplicationSlotUser)
3717 		{
3718 			case USER_TYPE_UNKNOWN:
3719 				log_error("unable to determine user for replication slot creation");
3720 				return false;
3721 			case  REPMGR_USER:
3722 				log_info(_("replication slots will be created by user \"%s\""),
3723 						 PQuser(conn));
3724 				return true;
3725 
3726 			case REPLICATION_USER_NODE:
3727 			case REPLICATION_USER_OPT:
3728 			{
3729 				PGconn *repl_conn = duplicate_connection(conn,
3730 														 replication_user,
3731 														 true);
3732 				if (repl_conn == NULL || PQstatus(repl_conn) != CONNECTION_OK)
3733 				{
3734 					log_error(_("unable to create replication connection as user \"%s\""),
3735 							  replication_user);
3736 					log_detail("%s", PQerrorMessage(repl_conn));
3737 
3738 					PQfinish(repl_conn);
3739 					return false;
3740 				}
3741 				log_info(_("replication slots will be created by replication user \"%s\""),
3742 						 replication_user);
3743 				PQfinish(repl_conn);
3744 				return true;
3745 			}
3746 			case SUPERUSER:
3747 			{
3748 				PGconn *superuser_conn = duplicate_connection(conn,
3749 															  runtime_options.superuser,
3750 															  false);
3751 				if (superuser_conn == NULL || PQstatus(superuser_conn )!= CONNECTION_OK)
3752 				{
3753 					log_error(_("unable to create superuser connection as user \"%s\""),
3754 							  runtime_options.superuser);
3755 					log_detail("%s", PQerrorMessage(superuser_conn));
3756 
3757 					PQfinish(superuser_conn);
3758 
3759 					return false;
3760 				}
3761 
3762 				log_info(_("replication slots will be created by superuser \"%s\""),
3763 						 runtime_options.superuser);
3764 				PQfinish(superuser_conn);
3765 			}
3766 		}
3767 
3768 	}
3769 
3770 	/*
3771 	 * If we can't create a replication slot with the connection provided to
3772 	 * the function, create an connection with appropriate permissions.
3773 	 */
3774 	switch (ReplicationSlotUser)
3775 	{
3776 		case USER_TYPE_UNKNOWN:
3777 			log_error("unable to determine user for replication slot creation");
3778 			return false;
3779 		case  REPMGR_USER:
3780 			slot_conn = conn;
3781 			log_info(_("creating replication slot as user \"%s\""),
3782 					 PQuser(conn));
3783 			break;
3784 
3785 		case REPLICATION_USER_NODE:
3786 		case REPLICATION_USER_OPT:
3787 		{
3788 			slot_conn = duplicate_connection(conn,
3789 											 replication_user,
3790 											 true);
3791 			if (slot_conn == NULL || PQstatus(slot_conn) != CONNECTION_OK)
3792 			{
3793 				log_error(_("unable to create replication connection as user \"%s\""),
3794 						  runtime_options.replication_user);
3795 				log_detail("%s", PQerrorMessage(slot_conn));
3796 
3797 				PQfinish(slot_conn);
3798 					return false;
3799 			}
3800 			use_replication_protocol = true;
3801 			log_info(_("creating replication slot as replication user \"%s\""),
3802 					 replication_user);
3803 		}
3804 			break;
3805 
3806 		case SUPERUSER:
3807 		{
3808 			slot_conn = duplicate_connection(conn,
3809 											 runtime_options.superuser,
3810 											 false);
3811 			if (slot_conn == NULL || PQstatus(slot_conn )!= CONNECTION_OK)
3812 			{
3813 				log_error(_("unable to create super connection as user \"%s\""),
3814 						  runtime_options.superuser);
3815 				log_detail("%s", PQerrorMessage(slot_conn));
3816 
3817 				PQfinish(slot_conn);
3818 
3819 				return false;
3820 			}
3821 			log_info(_("creating replication slot as superuser \"%s\""),
3822 					 runtime_options.superuser);
3823 		}
3824 			break;
3825 	}
3826 
3827 	if (use_replication_protocol == true)
3828 	{
3829 		success = create_replication_slot_replprot(conn, slot_conn, slot_name, error_msg);
3830 	}
3831 	else
3832 	{
3833 		success = create_replication_slot_sql(slot_conn, slot_name, error_msg);
3834 	}
3835 
3836 
3837 	if (slot_conn != conn)
3838 		PQfinish(slot_conn);
3839 
3840 	return success;
3841 }
3842 
3843 
3844 bool
drop_replication_slot_if_exists(PGconn * conn,int node_id,char * slot_name)3845 drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
3846 {
3847 	t_node_info node_record = T_NODE_INFO_INITIALIZER;
3848 	t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
3849 	RecordStatus record_status;
3850 
3851 	char *replication_user = NULL;
3852 	bool success = true;
3853 
3854 	if (node_id != UNKNOWN_NODE_ID)
3855 	{
3856 		record_status = get_node_record(conn, node_id, &node_record);
3857 	}
3858 
3859 	_determine_replication_slot_user(conn, &node_record, &replication_user);
3860 
3861 	record_status = get_slot_record(conn, slot_name, &slot_info);
3862 
3863 	log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
3864 				slot_name, node_id);
3865 
3866 	if (record_status != RECORD_FOUND)
3867 	{
3868 		/* this is not a bad good thing */
3869 		log_verbose(LOG_INFO,
3870 					_("slot \"%s\" does not exist on node %i, nothing to remove"),
3871 					slot_name, node_id);
3872 		return true;
3873 	}
3874 
3875 	if (slot_info.active == false)
3876 	{
3877 		if (drop_replication_slot_sql(conn, slot_name) == true)
3878 		{
3879 			log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
3880 		}
3881 		else
3882 		{
3883 			log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
3884 			success = false;
3885 		}
3886 	}
3887 
3888 	/*
3889 	 * If an active replication slot exists, call Houston as we have a
3890 	 * problem.
3891 	 */
3892 	else
3893 	{
3894 		log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
3895 		success = false;
3896 	}
3897 
3898 	return success;
3899 }
3900 
3901 
3902 static void
_determine_replication_slot_user(PGconn * conn,t_node_info * upstream_node_record,char ** replication_user)3903 _determine_replication_slot_user(PGconn *conn, t_node_info *upstream_node_record, char **replication_user)
3904 {
3905 	/*
3906 	 * If not previously done, work out which user will be responsible
3907 	 * for creating replication slots.
3908 	 */
3909 	if (ReplicationSlotUser == USER_TYPE_UNKNOWN)
3910 	{
3911 		/*
3912 		 * Is the repmgr user a superuser?
3913 		 */
3914 		if (is_superuser_connection(conn, NULL))
3915 		{
3916 			ReplicationSlotUser = REPMGR_USER;
3917 		}
3918 		/*
3919 		 * Does the repmgr user have the REPLICATION role?
3920 		 * Note we don't care here whether the repmgr user can actually
3921 		 * make a replication connection, we're just confirming that the
3922 		 * connection we have has the appropriate permissions.
3923 		 */
3924 		else if (is_replication_role(conn, NULL))
3925 		{
3926 			ReplicationSlotUser = REPMGR_USER;
3927 		}
3928 		/*
3929 		 * Is a superuser provided with --superuser?
3930 		 * We'll check later whether we can make a connection as that user.
3931 		 */
3932 		else if (runtime_options.superuser[0] != '\0')
3933 		{
3934 			ReplicationSlotUser = SUPERUSER;
3935 		}
3936 		/*
3937 		 * Is a replication user provided with --replication-user?
3938 		 * We'll check later whether we can make a replication connection as that user.
3939 		 * Overrides any replication user defined in the upstream node record.
3940 		 */
3941 		else if (runtime_options.replication_user[0] != '\0')
3942 		{
3943 			ReplicationSlotUser = REPLICATION_USER_OPT;
3944 			*replication_user = runtime_options.replication_user;
3945 		}
3946 		/*
3947 		 * Is the upstream's node record provided, and does it have a different
3948 		 * replication user?
3949 		 * We'll check later whether we can make a replication connection as that user.
3950 		 */
3951 		else if (upstream_node_record != NULL && upstream_node_record->node_id != UNKNOWN_NODE_ID
3952 			 && strncmp(upstream_node_record->repluser, PQuser(conn), NAMEDATALEN) != 0)
3953 		{
3954 			ReplicationSlotUser = REPLICATION_USER_NODE;
3955 			*replication_user = upstream_node_record->repluser;
3956 		}
3957 	}
3958 }
3959 
3960 
3961 bool
check_replication_slots_available(int node_id,PGconn * conn)3962 check_replication_slots_available(int node_id, PGconn* conn)
3963 {
3964 	int max_replication_slots = UNKNOWN_VALUE;
3965 	int free_slots = get_free_replication_slot_count(conn, &max_replication_slots);
3966 
3967 	if (free_slots < 0)
3968 	{
3969 		log_error(_("unable to determine number of free replication slots on node %i"),
3970 				  node_id);
3971 		return false;
3972 	}
3973 
3974 	if (free_slots == 0)
3975 	{
3976 		log_error(_("no free replication slots available on node %i"),
3977 				  node_id);
3978 		log_hint(_("consider increasing \"max_replication_slots\" (current value: %i)"),
3979 				 max_replication_slots);
3980 		return false;
3981 	}
3982 	else if (runtime_options.dry_run == true)
3983 	{
3984 		log_info(_("replication slots in use, %i free slots on node %i"),
3985 				 node_id,
3986 				 free_slots);
3987 	}
3988 
3989 	return true;
3990 }
3991 
3992 
3993 /*
3994  * Check whether the specified standby has joined to its upstream.
3995  *
3996  * This is used by "standby switchover" and "node rejoin" to check
3997  * the success of a node rejoin operation.
3998  *
3999  * IMPORTANT: the timeout settings will be taken from the node where the check
4000  * is performed, which might not be the standby itself.
4001  */
4002 standy_join_status
check_standby_join(PGconn * upstream_conn,t_node_info * upstream_node_record,t_node_info * standby_node_record)4003 check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_node_info *standby_node_record)
4004  {
4005 	 int i;
4006 	 bool available = false;
4007 
4008 	 for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
4009 	 {
4010 		 if (is_server_available(config_file_options.conninfo))
4011 		 {
4012 			 log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) is pingable"),
4013 						 standby_node_record->node_name,
4014 						 standby_node_record->node_id);
4015 			 available = true;
4016 			 break;
4017 		 }
4018 
4019 		 if (i % 5 == 0)
4020 		 {
4021 			 log_verbose(LOG_INFO, _("waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
4022 						 standby_node_record->node_name,
4023 						 standby_node_record->node_id,
4024 						 i + 1,
4025 						 config_file_options.node_rejoin_timeout);
4026 		 }
4027 		 else
4028 		 {
4029 			 log_debug("sleeping 1 second waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts",
4030 					   standby_node_record->node_name,
4031 					   standby_node_record->node_id,
4032 					   i + 1,
4033 					   config_file_options.node_rejoin_timeout);
4034 		 }
4035 
4036 		 sleep(1);
4037 	 }
4038 
4039 	 /* node did not become available */
4040 	 if (available == false)
4041 	 {
4042 		 return JOIN_FAIL_NO_PING;
4043 	 }
4044 
4045 	 for (; i < config_file_options.node_rejoin_timeout; i++)
4046 	 {
4047 		 char *node_state = NULL;
4048 		 NodeAttached node_attached = is_downstream_node_attached(upstream_conn,
4049 																  standby_node_record->node_name,
4050 																  &node_state);
4051 		 if (node_attached == NODE_ATTACHED)
4052 		 {
4053 			 log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) has attached to its upstream node"),
4054 						 standby_node_record->node_name,
4055 						 standby_node_record->node_id);
4056 			 return JOIN_SUCCESS;
4057 		 }
4058 
4059 		 if (i % 5 == 0)
4060 		 {
4061 			 log_info(_("waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
4062 					  standby_node_record->node_name,
4063 					  standby_node_record->node_id,
4064 					  i + 1,
4065 					  config_file_options.node_rejoin_timeout);
4066 
4067 			 if (node_attached == NODE_NOT_ATTACHED)
4068 			 {
4069 				 log_detail(_("node \"%s\" (ID: %i) is currrently attached to its upstream node in state \"%s\""),
4070 							upstream_node_record->node_name,
4071 							standby_node_record->node_id,
4072 							node_state);
4073 			 }
4074 			 else
4075 			 {
4076 				 log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
4077 							upstream_node_record->node_name,
4078 							standby_node_record->node_name);
4079 			 }
4080 		 }
4081 		 else
4082 		 {
4083 			 log_debug("sleeping 1 second waiting for node  \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts",
4084 					   standby_node_record->node_name,
4085 					   standby_node_record->node_id,
4086 					   i + 1,
4087 					   config_file_options.node_rejoin_timeout);
4088 		 }
4089 
4090 		 sleep(1);
4091 	 }
4092 
4093 	 return JOIN_FAIL_NO_REPLICATION;
4094 }
4095 
4096 
4097 /*
4098  * Here we'll perform some timeline sanity checks to ensure the follow target
4099  * can actually be followed or rejoined.
4100  *
4101  * See also comment for check_node_can_follow() in repmgrd-physical.c .
4102  */
4103 bool
check_node_can_attach(TimeLineID local_tli,XLogRecPtr local_xlogpos,PGconn * follow_target_conn,t_node_info * follow_target_node_record,bool is_rejoin)4104 check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
4105 {
4106 	uint64		local_system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
4107 	PGconn	   *follow_target_repl_conn = NULL;
4108 	t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
4109 	bool success = true;
4110 
4111 	const char *action = is_rejoin == true ? "rejoin" : "follow";
4112 
4113 	/* check replication connection */
4114 	follow_target_repl_conn = establish_replication_connection_from_conn(follow_target_conn,
4115 																		 follow_target_node_record->repluser);
4116 
4117 	if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
4118 	{
4119 		log_error(_("unable to establish a replication connection to the %s target node"), action);
4120 		return false;
4121 	}
4122 	else if (runtime_options.dry_run == true)
4123 	{
4124 		log_info(_("replication connection to the %s target node was successful"), action);
4125 	}
4126 
4127 	/* check system_identifiers match */
4128 	if (identify_system(follow_target_repl_conn, &follow_target_identification) == false)
4129 	{
4130 		log_error(_("unable to query the %s target node's system identification"), action);
4131 
4132 		PQfinish(follow_target_repl_conn);
4133 		return false;
4134 	}
4135 
4136 	local_system_identifier = get_system_identifier(config_file_options.data_directory);
4137 
4138 	/*
4139 	 * Check for things that should never happen, but expect the unexpected anyway.
4140 	 */
4141 
4142 	if (local_system_identifier == UNKNOWN_SYSTEM_IDENTIFIER)
4143 	{
4144 		/*
4145 		 * We don't return immediately here so subsequent checks can be
4146 		 * made, but indicate the node will not be able to rejoin.
4147 		 */
4148 		success = false;
4149 		if (runtime_options.dry_run == true)
4150 		{
4151 			log_warning(_("unable to retrieve system identifier from pg_control"));
4152 		}
4153 		else
4154 		{
4155 			log_error(_("unable to retrieve system identifier from pg_control, aborting"));
4156 		}
4157 	}
4158 	else if (follow_target_identification.system_identifier != local_system_identifier)
4159 	{
4160 		/*
4161 		 * It's never going to be possible to rejoin a node from another cluster,
4162 		 * so no need to bother with further checks.
4163 		 */
4164 		log_error(_("this node is not part of the %s target node's replication cluster"), action);
4165 		log_detail(_("this node's system identifier is %lu, %s target node's system identifier is %lu"),
4166 				   local_system_identifier,
4167 				   action,
4168 				   follow_target_identification.system_identifier);
4169 		PQfinish(follow_target_repl_conn);
4170 		return false;
4171 	}
4172 	else if (runtime_options.dry_run == true)
4173 	{
4174 		log_info(_("local and %s target system identifiers match"), action);
4175 		log_detail(_("system identifier is %lu"), local_system_identifier);
4176 	}
4177 
4178 	/* check timelines */
4179 
4180 	log_verbose(LOG_DEBUG, "local timeline: %i; %s target timeline: %i",
4181 				local_tli,
4182 				action,
4183 				follow_target_identification.timeline);
4184 
4185 	/*
4186 	 * The upstream's timeline is lower than ours - we cannot follow, and rejoin
4187 	 * requires PostgreSQL 9.6 and later.
4188 	 */
4189 	if (follow_target_identification.timeline < local_tli)
4190 	{
4191 		/*
4192 		 * "repmgr standby follow" is impossible in this case
4193 		 */
4194 		if (is_rejoin == false)
4195 		{
4196 			log_error(_("this node's timeline is ahead of the %s target node's timeline"), action);
4197 			log_detail(_("this node's timeline is %i, %s target node's timeline is %i"),
4198 					   local_tli,
4199 					   action,
4200 					   follow_target_identification.timeline);
4201 
4202 			if (PQserverVersion(follow_target_conn) >= 90600)
4203 			{
4204 				log_hint(_("use \"repmgr node rejoin --force-rewind\" to reattach this node"));
4205 			}
4206 
4207 			PQfinish(follow_target_repl_conn);
4208 			return false;
4209 		}
4210 
4211 		/*
4212 		 * pg_rewind can only rejoin to a lower timeline from PostgreSQL 9.6
4213 		 */
4214 		if (PQserverVersion(follow_target_conn) < 90600)
4215 		{
4216 			log_error(_("this node's timeline is ahead of the %s target node's timeline"), action);
4217 			log_detail(_("this node's timeline is %i, %s target node's timeline is %i"),
4218 					   local_tli,
4219 					   action,
4220 					   follow_target_identification.timeline);
4221 
4222 			if (runtime_options.force_rewind_used == true)
4223 			{
4224 				log_hint(_("pg_rewind can only be used to rejoin to a node with a lower timeline from PostgreSQL 9.6"));
4225 			}
4226 
4227 			PQfinish(follow_target_repl_conn);
4228 			return false;
4229 		}
4230 
4231 		if (runtime_options.force_rewind_used == false)
4232 		{
4233 			log_notice(_("pg_rewind execution required for this node to attach to rejoin target node %i"),
4234 					   follow_target_node_record->node_id);
4235 			log_hint(_("provide --force-rewind"));
4236 			PQfinish(follow_target_repl_conn);
4237 			return false;
4238 		}
4239 	}
4240 
4241 	/* timelines are the same - check relative positions */
4242 	else if (follow_target_identification.timeline == local_tli)
4243 	{
4244 		XLogRecPtr follow_target_xlogpos = get_node_current_lsn(follow_target_conn);
4245 
4246 		if (local_xlogpos == InvalidXLogRecPtr || follow_target_xlogpos == InvalidXLogRecPtr)
4247 		{
4248 			log_error(_("unable to compare LSN positions"));
4249 			PQfinish(follow_target_repl_conn);
4250 			return false;
4251 		}
4252 
4253 		if (local_xlogpos <= follow_target_xlogpos)
4254 		{
4255 			log_info(_("timelines are same, this server is not ahead"));
4256 			log_detail(_("local node lsn is %X/%X, %s target lsn is %X/%X"),
4257 					   format_lsn(local_xlogpos),
4258 					   action,
4259 					   format_lsn(follow_target_xlogpos));
4260 		}
4261 		else
4262 		{
4263 			/*
4264 			 * Unable to follow or join to a node we're ahead of, if we're on the
4265 			 * same timeline. Also, pg_rewind does not detect this situation,
4266 			 * as there is no definitive fork point.
4267 			 *
4268 			 * Note that Pg will still happily attach to the upstream in state "streaming"
4269 			 * for a while but then detach with an endless stream of
4270 			 * "record with incorrect prev-link" errors.
4271 			 */
4272 			log_error(_("this node ahead of the %s target on the same timeline (%i)"), action, local_tli);
4273 			log_detail(_("local node lsn is %X/%X, %s target lsn is %X/%X"),
4274 					   format_lsn(local_xlogpos),
4275 					   action,
4276 					   format_lsn(follow_target_xlogpos));
4277 
4278 			if (is_rejoin == true)
4279 			{
4280 				log_hint(_("the --force-rewind option is ineffective in this case"));
4281 			}
4282 
4283 			success = false;
4284 		}
4285 	}
4286 	else
4287 	{
4288 		/*
4289 		 * upstream has higher timeline - check where it forked off from this node's timeline
4290 		 */
4291 		TimeLineHistoryEntry *follow_target_history = get_timeline_history(follow_target_repl_conn,
4292 																		   local_tli + 1);
4293 
4294 		if (follow_target_history == NULL)
4295 		{
4296 			/* get_timeline_history() will emit relevant error messages */
4297 			PQfinish(follow_target_repl_conn);
4298 			return false;
4299 		}
4300 
4301 		log_debug("local tli: %i; local_xlogpos: %X/%X; follow_target_history->tli: %i; follow_target_history->end: %X/%X",
4302 				  local_tli,
4303 				  format_lsn(local_xlogpos),
4304 				  follow_target_history->tli,
4305 				  format_lsn(follow_target_history->end));
4306 
4307 		/*
4308 		 * Local node has proceeded beyond the follow target's fork, so we
4309 		 * definitely can't attach.
4310 		 *
4311 		 * This could be the case if the follow target was promoted, but does
4312 		 * not contain all changes which are being replayed to this standby.
4313 		 */
4314 		if (local_xlogpos > follow_target_history->end)
4315 		{
4316 			if (is_rejoin == true && runtime_options.force_rewind_used == true)
4317 			{
4318 				log_notice(_("pg_rewind execution required for this node to attach to rejoin target node %i"),
4319 						   follow_target_node_record->node_id);
4320 			}
4321 			else
4322 			{
4323 				log_error(_("this node cannot attach to %s target node %i"),
4324 						  action,
4325 						  follow_target_node_record->node_id);
4326 				success = false;
4327 			}
4328 
4329 			log_detail(_("%s target server's timeline %i forked off current database system timeline %i before current recovery point %X/%X"),
4330 					   action,
4331 					   local_tli + 1,
4332 					   local_tli,
4333 					   format_lsn(local_xlogpos));
4334 
4335 			if (is_rejoin == true && runtime_options.force_rewind_used == false)
4336 			{
4337 				log_hint(_("use --force-rewind to execute pg_rewind"));
4338 			}
4339 		}
4340 
4341 		if (success == true)
4342 		{
4343 			if (is_rejoin == false || (is_rejoin == true && runtime_options.force_rewind_used == false))
4344 			{
4345 				log_info(_("local node %i can attach to %s target node %i"),
4346 						 config_file_options.node_id,
4347 						 action,
4348 						 follow_target_node_record->node_id);
4349 
4350 				log_detail(_("local node's recovery point: %X/%X; %s target node's fork point: %X/%X"),
4351 						   format_lsn(local_xlogpos),
4352 						   action,
4353 						   format_lsn(follow_target_history->end));
4354 			}
4355 		}
4356 
4357 		pfree(follow_target_history);
4358 	}
4359 
4360 	PQfinish(follow_target_repl_conn);
4361 
4362 	return success;
4363 }
4364 
4365 
4366 /*
4367  * Check that the replication configuration file is owned by the user who
4368  * owns the data directory.
4369  */
4370 extern bool
check_replication_config_owner(int pg_version,const char * data_directory,PQExpBufferData * error_msg,PQExpBufferData * detail_msg)4371 check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg)
4372 {
4373 	PQExpBufferData replication_config_file;
4374 	struct stat     dirstat;
4375 	struct stat     confstat;
4376 
4377 	if (stat(data_directory, &dirstat))
4378 	{
4379 		if (error_msg != NULL)
4380 		{
4381 			appendPQExpBuffer(error_msg,
4382 							  "unable to check ownership of data directory \"%s\"",
4383 							  data_directory);
4384 			appendPQExpBufferStr(detail_msg,
4385 								 strerror(errno));
4386 		}
4387 		return false;
4388 	}
4389 
4390 	initPQExpBuffer(&replication_config_file);
4391 
4392 	appendPQExpBuffer(&replication_config_file,
4393 					  "%s/%s",
4394 					  config_file_options.data_directory,
4395 					  pg_version >= 120000 ? PG_AUTOCONF_FILENAME : RECOVERY_COMMAND_FILE);
4396 
4397 	stat(replication_config_file.data, &confstat);
4398 
4399 	if (confstat.st_uid == dirstat.st_uid)
4400 	{
4401 		termPQExpBuffer(&replication_config_file);
4402 		return true;
4403 	}
4404 
4405 	if (error_msg != NULL)
4406 	{
4407 		char conf_owner[MAXLEN];
4408 		char dir_owner[MAXLEN];
4409 		struct passwd *pw;
4410 
4411 		pw = getpwuid(confstat.st_uid);
4412 		if (!pw)
4413 		{
4414 			maxlen_snprintf(conf_owner,
4415 							"(unknown user %i)",
4416 							confstat.st_uid);
4417 		}
4418 		else
4419 		{
4420 			strncpy(conf_owner, pw->pw_name, MAXLEN);
4421 		}
4422 
4423 		pw = getpwuid(dirstat.st_uid);
4424 
4425 		if (!pw)
4426 		{
4427 			maxlen_snprintf(conf_owner,
4428 							"(unknown user %i)",
4429 							dirstat.st_uid);
4430 		}
4431 		else
4432 		{
4433 			strncpy(dir_owner, pw->pw_name, MAXLEN);
4434 		}
4435 
4436 		appendPQExpBuffer(error_msg,
4437 						  "ownership error for file \"%s\"",
4438 						  replication_config_file.data);
4439 		appendPQExpBuffer(detail_msg,
4440 						  "file owner is \"%s\", data directory owner is \"%s\"",
4441 						  conf_owner,
4442 						  dir_owner);
4443 	}
4444 
4445 	termPQExpBuffer(&replication_config_file);
4446 
4447 	return false;
4448 }
4449 
4450 
4451 /*
4452  * Simple check to see if "shared_preload_libraries" includes "repmgr".
4453  * Parsing "shared_preload_libraries" is non-trivial, as it's potentially
4454  * a comma-separated list, and worse may not be readable by the repmgr
4455  * user.
4456  *
4457  * Instead, we check if a function which should return a value returns
4458  * NULL; this indicates the shared library is not installed.
4459  */
4460 void
check_shared_library(PGconn * conn)4461 check_shared_library(PGconn *conn)
4462 {
4463 	bool ok = repmgrd_check_local_node_id(conn);
4464 
4465 	if (ok == true)
4466 		return;
4467 
4468 	log_error(_("repmgrd not configured for this node"));
4469 	log_hint(_("ensure \"shared_preload_libraries\" includes \"repmgr\" and restart PostgreSQL"));
4470 	PQfinish(conn);
4471 	exit(ERR_BAD_CONFIG);
4472 }
4473 
4474 
4475 bool
is_repmgrd_running(PGconn * conn)4476 is_repmgrd_running(PGconn *conn)
4477 {
4478 	pid_t		pid;
4479 	bool		is_running = false;
4480 
4481 	pid = repmgrd_get_pid(conn);
4482 
4483 	if (pid != UNKNOWN_PID)
4484 	{
4485 		if (kill(pid, 0) != -1)
4486 		{
4487 			is_running = true;
4488 		}
4489 	}
4490 
4491 	return is_running;
4492 }
4493 
4494 
4495 /**
4496  * Parse the string returned by "repmgr --version", e.g. "repmgr 4.1.2",
4497  * and return it as a version integer (e.g. 40102).
4498  *
4499  * This is required for backwards compatibility as versions prior to
4500  * 4.3 do not have the --version-number option.
4501  */
4502 int
parse_repmgr_version(const char * version_string)4503 parse_repmgr_version(const char *version_string)
4504 {
4505 	int series, major, minor;
4506 	int version_integer = UNKNOWN_REPMGR_VERSION_NUM;
4507 	PQExpBufferData sscanf_string;
4508 
4509 	initPQExpBuffer(&sscanf_string);
4510 
4511 	appendPQExpBuffer(&sscanf_string, "%s ",
4512 					  progname());
4513 	appendPQExpBufferStr(&sscanf_string, "%i.%i.%i");
4514 
4515 	if (sscanf(version_string, sscanf_string.data, &series, &major, &minor) == 3)
4516 	{
4517 		version_integer = (series * 10000) + (major * 100) + minor;
4518 	}
4519 	else
4520 	{
4521 		resetPQExpBuffer(&sscanf_string);
4522 		appendPQExpBuffer(&sscanf_string, "%s ",
4523 						  progname());
4524 		appendPQExpBufferStr(&sscanf_string, "%i.%i");
4525 
4526 		if (sscanf(version_string, "repmgr %i.%i", &series, &major) == 2)
4527 		{
4528 			version_integer = (series * 10000) + (major * 100);
4529 		}
4530 	}
4531 
4532 	return version_integer;
4533 }
4534