1 /*
2 * repmgr-client.c - Command interpreter for the repmgr package
3 *
4 * Copyright (c) 2ndQuadrant, 2010-2020
5 *
6 * This module is a command-line utility to easily setup a cluster of
7 * hot standby servers for an HA environment
8 *
9 * Commands implemented are:
10 *
11 * [ PRIMARY | MASTER ] REGISTER
12 * [ PRIMARY | MASTER ] UNREGISTER
13 *
14 * STANDBY CLONE
15 * STANDBY REGISTER
16 * STANDBY UNREGISTER
17 * STANDBY PROMOTE
18 * STANDBY FOLLOW
19 * STANDBY SWITCHOVER
20 *
21 * CLUSTER SHOW
22 * CLUSTER EVENT
23 * CLUSTER CROSSCHECK
24 * CLUSTER MATRIX
25 * CLUSTER CLEANUP
26 *
27 * NODE STATUS
28 * NODE CHECK
29 * NODE REJOIN
30 * NODE SERVICE
31 * NODE CONTROL
32 *
33 * SERVICE STATUS
34 * SERVICE PAUSE
35 * SERVICE UNPAUSE
36 *
37 * DAEMON START
38 * DAEMON STOP
39 *
40 * This program is free software: you can redistribute it and/or modify
41 * it under the terms of the GNU General Public License as published by
42 * the Free Software Foundation, either version 3 of the License, or
43 * (at your option) any later version.
44 *
45 * This program is distributed in the hope that it will be useful,
46 * but WITHOUT ANY WARRANTY; without even the implied warranty of
47 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
48 * GNU General Public License for more details.
49 *
50 * You should have received a copy of the GNU General Public License
51 * along with this program. If not, see <http://www.gnu.org/licenses/>.
52 */
53
54 #include <unistd.h>
55 #include <sys/stat.h>
56 #include <signal.h>
57
58
59 #include "repmgr.h"
60 #include "compat.h"
61 #include "controldata.h"
62 #include "repmgr-client.h"
63 #include "repmgr-client-global.h"
64 #include "repmgr-action-primary.h"
65 #include "repmgr-action-standby.h"
66 #include "repmgr-action-witness.h"
67 #include "repmgr-action-node.h"
68 #include "repmgr-action-cluster.h"
69 #include "repmgr-action-service.h"
70 #include "repmgr-action-daemon.h"
71
72 #include <storage/fd.h> /* for PG_TEMP_FILE_PREFIX */
73
74 /* globally available variables *
75 * ============================ */
76
77 t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
78
79
80 /* conninfo params for the node we're operating on */
81 t_conninfo_param_list source_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
82
83 bool config_file_required = true;
84 char pg_bindir[MAXPGPATH] = "";
85
86 /*
87 * if --node-id/--node-name provided, place that node's record here
88 * for later use
89 */
90 t_node_info target_node_info = T_NODE_INFO_INITIALIZER;
91
92 /* used by create_replication_slot() */
93 static t_user_type ReplicationSlotUser = USER_TYPE_UNKNOWN;
94
95 /* Collate command line errors and warnings here for friendlier reporting */
96 static ItemList cli_errors = {NULL, NULL};
97 static ItemList cli_warnings = {NULL, NULL};
98
99 static void _determine_replication_slot_user(PGconn *conn,
100 t_node_info *upstream_node_record,
101 char **replication_user);
102
103 int
main(int argc,char ** argv)104 main(int argc, char **argv)
105 {
106 t_conninfo_param_list default_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
107
108 int optindex = 0;
109 int c;
110
111 char *repmgr_command = NULL;
112 char *repmgr_action = NULL;
113 bool valid_repmgr_command_found = true;
114 int action = NO_ACTION;
115 char *dummy_action = "";
116
117 bool help_option = false;
118 bool option_error_found = false;
119
120 set_progname(argv[0]);
121
122 /*
123 * Tell the logger we're a command-line program - this will ensure any
124 * output logged before the logger is initialized will be formatted
125 * correctly. Can be overriden with "--log-to-file".
126 */
127 logger_output_mode = OM_COMMAND_LINE;
128
129 /*
130 * Initialize and pre-populate conninfo parameters; these will be
131 * overwritten if matching command line parameters are provided.
132 *
133 * Only some actions will need these, but we need to do this before the
134 * command line is parsed.
135 *
136 * Note: PQconndefaults() does not provide a default value for "dbname",
137 * but if none is provided will default to "username" when the connection
138 * is made. We won't set "dbname" here if no default available, as that
139 * would break the libpq behaviour if non-default username is provided.
140 */
141 initialize_conninfo_params(&default_conninfo, true);
142
143 for (c = 0; c < default_conninfo.size && default_conninfo.keywords[c]; c++)
144 {
145 if (strcmp(default_conninfo.keywords[c], "host") == 0 &&
146 (default_conninfo.values[c] != NULL))
147 {
148 strncpy(runtime_options.host, default_conninfo.values[c], MAXLEN);
149 }
150 else if (strcmp(default_conninfo.keywords[c], "hostaddr") == 0 &&
151 (default_conninfo.values[c] != NULL))
152 {
153 strncpy(runtime_options.host, default_conninfo.values[c], MAXLEN);
154 }
155 else if (strcmp(default_conninfo.keywords[c], "port") == 0 &&
156 (default_conninfo.values[c] != NULL))
157 {
158 strncpy(runtime_options.port, default_conninfo.values[c], MAXLEN);
159 }
160 else if (strcmp(default_conninfo.keywords[c], "dbname") == 0 &&
161 (default_conninfo.values[c] != NULL))
162 {
163 strncpy(runtime_options.dbname, default_conninfo.values[c], MAXLEN);
164 }
165 else if (strcmp(default_conninfo.keywords[c], "user") == 0 &&
166 (default_conninfo.values[c] != NULL))
167 {
168 strncpy(runtime_options.username, default_conninfo.values[c], MAXLEN);
169 }
170 }
171 free_conninfo_params(&default_conninfo);
172
173 initialize_conninfo_params(&source_conninfo, false);
174
175 /* set default user for -R/--remote-user */
176 {
177 struct passwd *pw = getpwuid(geteuid());
178
179 if (pw == NULL)
180 {
181 fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
182 exit(ERR_BAD_CONFIG);
183 }
184
185 strncpy(runtime_options.username, pw->pw_name, MAXLEN);
186 }
187
188 /* Make getopt emit errors */
189 opterr = 1;
190
191 while ((c = getopt_long(argc, argv, "?Vb:f:FwWd:h:p:U:R:S:D:ck:L:qtvC:", long_options,
192 &optindex)) != -1)
193 {
194 /*
195 * NOTE: some integer parameters (e.g. -p/--port) are stored
196 * internally as strings. We use repmgr_atoi() to check these but
197 * discard the returned integer; repmgr_atoi() will append the error
198 * message to the provided list.
199 */
200 switch (c)
201 {
202 /*
203 * Options which cause repmgr to exit in this block; these are
204 * the only ones which can be executed as root user
205 */
206 case OPT_HELP: /* --help */
207 help_option = true;
208 break;
209
210 /* -V/--version */
211 case 'V':
212
213 /*
214 * in contrast to repmgr3 and earlier, we only display the
215 * repmgr version as it's not specific to a particular
216 * PostgreSQL version
217 */
218 printf("%s %s\n", progname(), REPMGR_VERSION);
219 exit(SUCCESS);
220
221 /* --version-number */
222 case OPT_VERSION_NUMBER:
223 printf("%i\n", REPMGR_VERSION_NUM);
224 exit(SUCCESS);
225
226 /*------------------------------
227 * general configuration options
228 *------------------------------
229 */
230
231 /* -b/--pg_bindir */
232 case 'b':
233 strncpy(runtime_options.pg_bindir, optarg, MAXLEN);
234 break;
235
236 /* -f/--config-file */
237 case 'f':
238 strncpy(runtime_options.config_file, optarg, MAXLEN);
239 break;
240
241 /* --dry-run */
242 case OPT_DRY_RUN:
243 runtime_options.dry_run = true;
244 break;
245
246 /* -F/--force */
247 case 'F':
248 runtime_options.force = true;
249 break;
250
251 /* --replication-user (primary/standby register only) */
252 case OPT_REPLICATION_USER:
253 strncpy(runtime_options.replication_user, optarg, MAXLEN);
254 break;
255
256 /* -w/--wait */
257 case 'w':
258 runtime_options.wait_provided = true;
259 if (optarg != NULL)
260 {
261 runtime_options.wait = repmgr_atoi(optarg, "--wait", &cli_errors, 0);
262 }
263 break;
264
265 /* -W/--no-wait */
266 case 'W':
267 runtime_options.no_wait = true;
268 break;
269
270 /* --compact */
271 case OPT_COMPACT:
272 runtime_options.compact = true;
273 break;
274
275 /* --detail */
276 case OPT_DETAIL:
277 runtime_options.detail = true;
278 break;
279
280 /* --dump-config */
281 case OPT_DUMP_CONFIG:
282 runtime_options.dump_config = true;
283 break;
284
285 /*----------------------------
286 * database connection options
287 *----------------------------
288 */
289
290 /*
291 * These are the standard database connection options; with
292 * the exception of -d/--dbname (which could be a conninfo
293 * string) we'll also set these values in "source_conninfo"
294 * (overwriting preset values from environment variables).
295 */
296 /* -d/--dbname */
297 case 'd':
298 strncpy(runtime_options.dbname, optarg, MAXLEN);
299
300 /*
301 * dbname will be set in source_conninfo later after checking
302 * if it's a conninfo string
303 */
304 runtime_options.connection_param_provided = true;
305 break;
306
307 /* -h/--host */
308 case 'h':
309 strncpy(runtime_options.host, optarg, MAXLEN);
310 param_set(&source_conninfo, "host", optarg);
311 runtime_options.connection_param_provided = true;
312 runtime_options.host_param_provided = true;
313 break;
314
315 case 'p':
316 /*
317 * minimum TCP port number is 1; in practice PostgreSQL
318 * won't be running on a privileged port, but we don't want
319 * to be concerned with that level of checking
320 */
321 (void) repmgr_atoi(optarg, "-p/--port", &cli_errors, 1);
322 param_set(&source_conninfo, "port", optarg);
323 strncpy(runtime_options.port,
324 optarg,
325 MAXLEN);
326 runtime_options.connection_param_provided = true;
327 break;
328
329 /* -U/--user */
330 case 'U':
331 strncpy(runtime_options.username, optarg, MAXLEN);
332 param_set(&source_conninfo, "user", optarg);
333 runtime_options.connection_param_provided = true;
334 break;
335
336 /*-------------------------
337 * other connection options
338 *-------------------------
339 */
340
341 /* -R/--remote_user */
342 case 'R':
343 strncpy(runtime_options.remote_user, optarg, MAXLEN);
344 break;
345
346 /* -S/--superuser */
347 case 'S':
348 strncpy(runtime_options.superuser, optarg, MAXLEN);
349 break;
350
351 /*-------------
352 * node options
353 *-------------
354 */
355
356 /* -D/--pgdata/--data-dir */
357 case 'D':
358 strncpy(runtime_options.data_dir, optarg, MAXPGPATH);
359 break;
360
361 /* --node-id */
362 case OPT_NODE_ID:
363 runtime_options.node_id = repmgr_atoi(optarg, "--node-id", &cli_errors, MIN_NODE_ID);
364 break;
365
366 /* --node-name */
367 case OPT_NODE_NAME:
368 {
369 if (strlen(optarg) < sizeof(runtime_options.node_name))
370 strncpy(runtime_options.node_name, optarg, sizeof(runtime_options.node_name));
371 else
372 item_list_append_format(&cli_errors,
373 _("value for \"--node-name\" must contain fewer than %lu characters"),
374 sizeof(runtime_options.node_name));
375 break;
376 }
377 /* --remote-node-id */
378 case OPT_REMOTE_NODE_ID:
379 runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, MIN_NODE_ID);
380 break;
381
382 /*
383 * standby options * ---------------
384 */
385
386 /* --upstream-node-id */
387 case OPT_UPSTREAM_NODE_ID:
388 runtime_options.upstream_node_id = repmgr_atoi(optarg, "--upstream-node-id", &cli_errors, MIN_NODE_ID);
389 break;
390
391 /*------------------------
392 * "standby clone" options
393 *------------------------
394 */
395
396 /* -c/--fast-checkpoint */
397 case 'c':
398 runtime_options.fast_checkpoint = true;
399 break;
400
401 /* --copy-external-config-files(=[samepath|pgdata]) */
402 case OPT_COPY_EXTERNAL_CONFIG_FILES:
403 runtime_options.copy_external_config_files = true;
404 if (optarg != NULL)
405 {
406 if (strcmp(optarg, "samepath") == 0)
407 {
408 runtime_options.copy_external_config_files_destination = CONFIG_FILE_SAMEPATH;
409 }
410 /* allow "data_directory" as synonym for "pgdata" */
411 else if (strcmp(optarg, "pgdata") == 0 || strcmp(optarg, "data_directory") == 0)
412 {
413 runtime_options.copy_external_config_files_destination = CONFIG_FILE_PGDATA;
414 }
415 else
416 {
417 item_list_append(&cli_errors,
418 _("value provided for \"--copy-external-config-files\" must be \"samepath\" or \"pgdata\""));
419 }
420 }
421 break;
422
423 /* --no-upstream-connection */
424 case OPT_NO_UPSTREAM_CONNECTION:
425 runtime_options.no_upstream_connection = true;
426 break;
427
428
429 case OPT_UPSTREAM_CONNINFO:
430 strncpy(runtime_options.upstream_conninfo, optarg, MAXLEN);
431 break;
432
433 case OPT_WITHOUT_BARMAN:
434 runtime_options.without_barman = true;
435 break;
436
437 case OPT_REPLICATION_CONF_ONLY:
438 runtime_options.replication_conf_only = true;
439 break;
440
441 /* --verify-backup */
442 case OPT_VERIFY_BACKUP:
443 runtime_options.verify_backup = true;
444 break;
445
446 /*---------------------------
447 * "standby register" options
448 *---------------------------
449 */
450
451 case OPT_WAIT_START:
452 runtime_options.wait_start = repmgr_atoi(optarg, "--wait-start", &cli_errors, 0);
453 break;
454
455 case OPT_WAIT_SYNC:
456 runtime_options.wait_register_sync = true;
457 if (optarg != NULL)
458 {
459 runtime_options.wait_register_sync_seconds = repmgr_atoi(optarg, "--wait-sync", &cli_errors, 0);
460 }
461 break;
462
463 /*-----------------------------
464 * "standby switchover" options
465 *-----------------------------
466 */
467
468 case OPT_ALWAYS_PROMOTE:
469 runtime_options.always_promote = true;
470 break;
471
472 case OPT_FORCE_REWIND:
473 runtime_options.force_rewind_used = true;
474
475 if (optarg != NULL)
476 {
477 strncpy(runtime_options.force_rewind_path, optarg, MAXPGPATH);
478 }
479
480 break;
481
482 case OPT_SIBLINGS_FOLLOW:
483 runtime_options.siblings_follow = true;
484 break;
485
486 case OPT_REPMGRD_NO_PAUSE:
487 runtime_options.repmgrd_no_pause = true;
488 break;
489
490 case OPT_REPMGRD_FORCE_UNPAUSE:
491 runtime_options.repmgrd_force_unpause = true;
492 break;
493
494 /*----------------------
495 * "node status" options
496 *----------------------
497 */
498
499 case OPT_IS_SHUTDOWN_CLEANLY:
500 runtime_options.is_shutdown_cleanly = true;
501 break;
502
503 /*---------------------
504 * "node check" options
505 *--------------------
506 */
507 case OPT_ARCHIVE_READY:
508 runtime_options.archive_ready = true;
509 break;
510
511 case OPT_DOWNSTREAM:
512 runtime_options.downstream = true;
513 break;
514
515 case OPT_UPSTREAM:
516 runtime_options.upstream = true;
517 break;
518
519 case OPT_REPLICATION_LAG:
520 runtime_options.replication_lag = true;
521 break;
522
523 case OPT_ROLE:
524 runtime_options.role = true;
525 break;
526
527 case OPT_SLOTS:
528 runtime_options.slots = true;
529 break;
530
531 case OPT_MISSING_SLOTS:
532 runtime_options.missing_slots = true;
533 break;
534
535 case OPT_HAS_PASSFILE:
536 runtime_options.has_passfile = true;
537 break;
538
539 case OPT_REPL_CONN:
540 runtime_options.replication_connection = true;
541 break;
542
543 case OPT_DATA_DIRECTORY_CONFIG:
544 runtime_options.data_directory_config = true;
545 break;
546
547 case OPT_REPLICATION_CONFIG_OWNER:
548 runtime_options.replication_config_owner = true;
549 break;
550
551 case OPT_DB_CONNECTION:
552 runtime_options.db_connection = true;
553 break;
554
555 /*--------------------
556 * "node rejoin" options
557 *--------------------
558 */
559 case OPT_CONFIG_FILES:
560 strncpy(runtime_options.config_files, optarg, MAXLEN);
561 break;
562
563 case OPT_CONFIG_ARCHIVE_DIR:
564 /* TODO: check this is an absolute path */
565 strncpy(runtime_options.config_archive_dir, optarg, MAXPGPATH);
566 break;
567
568 /*-----------------------
569 * "node service" options
570 *-----------------------
571 */
572
573 /* --action (repmgr node service --action) */
574 case OPT_ACTION:
575 strncpy(runtime_options.action, optarg, MAXLEN);
576 break;
577
578 case OPT_LIST_ACTIONS:
579 runtime_options.list_actions = true;
580 break;
581
582 case OPT_CHECKPOINT:
583 runtime_options.checkpoint = true;
584 break;
585
586 /*------------------------
587 * "cluster event" options
588 *------------------------
589 */
590
591 case OPT_EVENT:
592 strncpy(runtime_options.event, optarg, MAXLEN);
593 break;
594
595 case OPT_LIMIT:
596 runtime_options.limit = repmgr_atoi(optarg, "--limit", &cli_errors, 1);
597 runtime_options.limit_provided = true;
598 break;
599
600 case OPT_ALL:
601 runtime_options.all = true;
602 break;
603
604 /*------------------------
605 * "cluster cleanup" options
606 *------------------------
607 */
608
609 /* -k/--keep-history */
610 case 'k':
611 runtime_options.keep_history = repmgr_atoi(optarg, "-k/--keep-history", &cli_errors, 0);
612 break;
613
614 /*----------------
615 * logging options
616 *----------------
617 */
618
619 /* -L/--log-level */
620 case 'L':
621 {
622 int detected_log_level = detect_log_level(optarg);
623
624 if (detected_log_level != -1)
625 {
626 strncpy(runtime_options.log_level, optarg, MAXLEN);
627 }
628 else
629 {
630 PQExpBufferData invalid_log_level;
631
632 initPQExpBuffer(&invalid_log_level);
633 appendPQExpBuffer(&invalid_log_level, _("invalid log level \"%s\" provided"), optarg);
634 item_list_append(&cli_errors, invalid_log_level.data);
635 termPQExpBuffer(&invalid_log_level);
636 }
637 break;
638 }
639
640 /* --log-to-file */
641 case OPT_LOG_TO_FILE:
642 runtime_options.log_to_file = true;
643 logger_output_mode = OM_DAEMON;
644 break;
645
646
647 /* --quiet */
648 case 'q':
649 runtime_options.quiet = true;
650 break;
651
652 /* --terse */
653 case 't':
654 runtime_options.terse = true;
655 break;
656
657 /* --verbose */
658 case 'v':
659 runtime_options.verbose = true;
660 break;
661
662
663 /*---------------
664 * output options
665 *---------------
666 */
667 case OPT_CSV:
668 runtime_options.csv = true;
669 break;
670
671 case OPT_NAGIOS:
672 runtime_options.nagios = true;
673 break;
674
675 case OPT_OPTFORMAT:
676 runtime_options.optformat = true;
677 break;
678
679 /*---------------------------------
680 * undocumented options for testing
681 *----------------------------------
682 */
683
684 case OPT_DISABLE_WAL_RECEIVER:
685 runtime_options.disable_wal_receiver = true;
686 break;
687
688 case OPT_ENABLE_WAL_RECEIVER:
689 runtime_options.enable_wal_receiver = true;
690 break;
691
692 /*-----------------------------
693 * options deprecated since 4.0
694 *-----------------------------
695 */
696 case OPT_CHECK_UPSTREAM_CONFIG:
697 item_list_append(&cli_warnings,
698 _("--check-upstream-config is deprecated; use --dry-run instead"));
699 break;
700
701 /* -C/--remote-config-file */
702 case 'C':
703 item_list_append(&cli_warnings,
704 _("--remote-config-file is no longer required"));
705 break;
706
707 case ':': /* missing option argument */
708 option_error_found = true;
709 break;
710 case '?':
711 /* Actual help option given? */
712 if (strcmp(argv[optind - 1], "-?") == 0)
713 {
714 help_option = true;
715 }
716 else
717 {
718 option_error_found = true;
719 }
720 break;
721 default: /* invalid option */
722 option_error_found = true;
723 break;
724 }
725 }
726
727
728 /*
729 * If -d/--dbname appears to be a conninfo string, validate by attempting
730 * to parse it (and if successful, store the parsed parameters)
731 */
732 if (runtime_options.dbname[0])
733 {
734 if (strncmp(runtime_options.dbname, "postgresql://", 13) == 0 ||
735 strncmp(runtime_options.dbname, "postgres://", 11) == 0 ||
736 strchr(runtime_options.dbname, '=') != NULL)
737 {
738 char *errmsg = NULL;
739 PQconninfoOption *opts;
740
741 runtime_options.conninfo_provided = true;
742
743 opts = PQconninfoParse(runtime_options.dbname, &errmsg);
744
745 if (opts == NULL)
746 {
747 PQExpBufferData conninfo_error;
748
749 initPQExpBuffer(&conninfo_error);
750 appendPQExpBuffer(&conninfo_error, _("error parsing conninfo:\n%s"), errmsg);
751 item_list_append(&cli_errors, conninfo_error.data);
752
753 termPQExpBuffer(&conninfo_error);
754 pfree(errmsg);
755 }
756 else
757 {
758 /*
759 * Store any parameters provided in the conninfo string in our
760 * internal array; also overwrite any options set in
761 * runtime_options.(host|port|username), as the conninfo
762 * settings take priority
763 */
764 PQconninfoOption *opt;
765
766 for (opt = opts; opt->keyword != NULL; opt++)
767 {
768 if (opt->val != NULL && opt->val[0] != '\0')
769 {
770 param_set(&source_conninfo, opt->keyword, opt->val);
771 }
772
773 if (strcmp(opt->keyword, "host") == 0 &&
774 (opt->val != NULL && opt->val[0] != '\0'))
775 {
776 strncpy(runtime_options.host, opt->val, MAXLEN);
777 runtime_options.host_param_provided = true;
778 }
779 if (strcmp(opt->keyword, "hostaddr") == 0 &&
780 (opt->val != NULL && opt->val[0] != '\0'))
781 {
782 strncpy(runtime_options.host, opt->val, MAXLEN);
783 runtime_options.host_param_provided = true;
784 }
785 else if (strcmp(opt->keyword, "port") == 0 &&
786 (opt->val != NULL && opt->val[0] != '\0'))
787 {
788 strncpy(runtime_options.port, opt->val, MAXLEN);
789 }
790 else if (strcmp(opt->keyword, "user") == 0 &&
791 (opt->val != NULL && opt->val[0] != '\0'))
792 {
793 strncpy(runtime_options.username, opt->val, MAXLEN);
794 }
795 }
796
797 PQconninfoFree(opts);
798 }
799 }
800 else
801 {
802 param_set(&source_conninfo, "dbname", runtime_options.dbname);
803 }
804 }
805
806 /*
807 * Disallow further running as root to prevent directory ownership
808 * problems. We check this here to give the root user a chance to execute
809 * --help/--version options.
810 */
811 if (geteuid() == 0 && help_option == false)
812 {
813 fprintf(stderr,
814 _("%s: cannot be run as root\n"
815 "Please log in (using, e.g., \"su\") as the "
816 "(unprivileged) user that owns "
817 "the data directory.\n"
818 ),
819 progname());
820 free_conninfo_params(&source_conninfo);
821 exit(ERR_BAD_CONFIG);
822 }
823
824 /* Exit here already if errors in command line options found */
825 if (cli_errors.head != NULL)
826 {
827 free_conninfo_params(&source_conninfo);
828 exit_with_cli_errors(&cli_errors, NULL);
829 }
830
831 /*----------
832 * Determine the node type and action; following are valid:
833 *
834 * { PRIMARY | MASTER } REGISTER |
835 * STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
836 * WITNESS { CREATE | REGISTER | UNREGISTER }
837 * NODE { STATUS | CHECK | REJOIN | SERVICE } |
838 * CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
839 * SERVICE { STATUS | PAUSE | UNPAUSE | START | STOP }
840 *
841 * [node] is an optional hostname, provided instead of the -h/--host
842 * option
843 * ---------
844 */
845 if (optind < argc)
846 {
847 repmgr_command = argv[optind++];
848 }
849
850 if (optind < argc)
851 {
852 repmgr_action = argv[optind++];
853 }
854 else
855 {
856 repmgr_action = dummy_action;
857 }
858
859 if (repmgr_command != NULL)
860 {
861 if (strcasecmp(repmgr_command, "PRIMARY") == 0 || strcasecmp(repmgr_command, "MASTER") == 0)
862 {
863 if (help_option == true)
864 {
865 do_primary_help();
866 exit(SUCCESS);
867 }
868
869 if (strcasecmp(repmgr_action, "REGISTER") == 0)
870 action = PRIMARY_REGISTER;
871 else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
872 action = PRIMARY_UNREGISTER;
873 /* allow "primary check"/"primary status" as aliases for "node check"/"node status" */
874 else if (strcasecmp(repmgr_action, "CHECK") == 0)
875 action = NODE_CHECK;
876 else if (strcasecmp(repmgr_action, "STATUS") == 0)
877 action = NODE_STATUS;
878 }
879
880 else if (strcasecmp(repmgr_command, "STANDBY") == 0)
881 {
882 if (help_option == true)
883 {
884 do_standby_help();
885 exit(SUCCESS);
886 }
887
888 if (strcasecmp(repmgr_action, "CLONE") == 0)
889 action = STANDBY_CLONE;
890 else if (strcasecmp(repmgr_action, "REGISTER") == 0)
891 action = STANDBY_REGISTER;
892 else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
893 action = STANDBY_UNREGISTER;
894 else if (strcasecmp(repmgr_action, "PROMOTE") == 0)
895 action = STANDBY_PROMOTE;
896 else if (strcasecmp(repmgr_action, "FOLLOW") == 0)
897 action = STANDBY_FOLLOW;
898 else if (strcasecmp(repmgr_action, "SWITCHOVER") == 0)
899 action = STANDBY_SWITCHOVER;
900 /* allow "standby check"/"standby status" as aliases for "node check"/"node status" */
901 else if (strcasecmp(repmgr_action, "CHECK") == 0)
902 action = NODE_CHECK;
903 else if (strcasecmp(repmgr_action, "STATUS") == 0)
904 action = NODE_STATUS;
905 }
906
907 else if (strcasecmp(repmgr_command, "WITNESS") == 0)
908 {
909 if (help_option == true)
910 {
911 do_witness_help();
912 exit(SUCCESS);
913 }
914 else if (strcasecmp(repmgr_action, "REGISTER") == 0)
915 action = WITNESS_REGISTER;
916 else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
917 action = WITNESS_UNREGISTER;
918 }
919
920 else if (strcasecmp(repmgr_command, "NODE") == 0)
921 {
922 if (help_option == true)
923 {
924 do_node_help();
925 exit(SUCCESS);
926 }
927
928 if (strcasecmp(repmgr_action, "CHECK") == 0)
929 action = NODE_CHECK;
930 else if (strcasecmp(repmgr_action, "STATUS") == 0)
931 action = NODE_STATUS;
932 else if (strcasecmp(repmgr_action, "REJOIN") == 0)
933 action = NODE_REJOIN;
934 else if (strcasecmp(repmgr_action, "SERVICE") == 0)
935 action = NODE_SERVICE;
936 else if (strcasecmp(repmgr_action, "CONTROL") == 0)
937 action = NODE_CONTROL;
938 }
939
940 else if (strcasecmp(repmgr_command, "CLUSTER") == 0)
941 {
942 if (help_option == true)
943 {
944 do_cluster_help();
945 exit(SUCCESS);
946 }
947
948 if (strcasecmp(repmgr_action, "SHOW") == 0)
949 action = CLUSTER_SHOW;
950 else if (strcasecmp(repmgr_action, "EVENT") == 0)
951 action = CLUSTER_EVENT;
952 /* allow "CLUSTER EVENTS" as synonym for "CLUSTER EVENT" */
953 else if (strcasecmp(repmgr_action, "EVENTS") == 0)
954 action = CLUSTER_EVENT;
955 else if (strcasecmp(repmgr_action, "CROSSCHECK") == 0)
956 action = CLUSTER_CROSSCHECK;
957 else if (strcasecmp(repmgr_action, "MATRIX") == 0)
958 action = CLUSTER_MATRIX;
959 else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
960 action = CLUSTER_CLEANUP;
961 }
962 else if (strcasecmp(repmgr_command, "SERVICE") == 0)
963 {
964 if (help_option == true)
965 {
966 do_service_help();
967 exit(SUCCESS);
968 }
969
970 if (strcasecmp(repmgr_action, "STATUS") == 0)
971 action = SERVICE_STATUS;
972 else if (strcasecmp(repmgr_action, "PAUSE") == 0)
973 action = SERVICE_PAUSE;
974 else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
975 action = SERVICE_UNPAUSE;
976
977 }
978 else if (strcasecmp(repmgr_command, "DAEMON") == 0)
979 {
980 if (help_option == true)
981 {
982 do_daemon_help();
983 exit(SUCCESS);
984 }
985
986 if (strcasecmp(repmgr_action, "START") == 0)
987 action = DAEMON_START;
988 else if (strcasecmp(repmgr_action, "STOP") == 0)
989 action = DAEMON_STOP;
990
991 /* allow "daemon" as an alias for "service" for repmgr 4.x compatibility */
992 if (strcasecmp(repmgr_action, "STATUS") == 0)
993 action = SERVICE_STATUS;
994 else if (strcasecmp(repmgr_action, "PAUSE") == 0)
995 action = SERVICE_PAUSE;
996 else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
997 action = SERVICE_UNPAUSE;
998 }
999 else
1000 {
1001 valid_repmgr_command_found = false;
1002 }
1003 }
1004
1005 if (help_option == true)
1006 {
1007 do_help();
1008 exit(SUCCESS);
1009 }
1010
1011 if (action == NO_ACTION)
1012 {
1013 PQExpBufferData command_error;
1014
1015 initPQExpBuffer(&command_error);
1016
1017 if (repmgr_command == NULL)
1018 {
1019 appendPQExpBuffer(&command_error,
1020 _("no repmgr command provided"));
1021 }
1022 else if (valid_repmgr_command_found == false && repmgr_action[0] == '\0')
1023 {
1024 appendPQExpBuffer(&command_error,
1025 _("unknown repmgr command '%s'"),
1026 repmgr_command);
1027 }
1028 else if (repmgr_action[0] == '\0')
1029 {
1030 appendPQExpBuffer(&command_error,
1031 _("no action provided for command '%s'"),
1032 repmgr_command);
1033 }
1034 else
1035 {
1036 appendPQExpBuffer(&command_error,
1037 _("unknown repmgr action '%s %s'"),
1038 repmgr_command,
1039 repmgr_action);
1040 }
1041
1042 item_list_append(&cli_errors, command_error.data);
1043 }
1044
1045 /*
1046 * STANDBY CLONE historically accepts the upstream hostname as an
1047 * additional argument
1048 */
1049 if (action == STANDBY_CLONE)
1050 {
1051 if (optind < argc)
1052 {
1053 if (runtime_options.host_param_provided == true)
1054 {
1055 PQExpBufferData additional_host_arg;
1056
1057 initPQExpBuffer(&additional_host_arg);
1058 appendPQExpBuffer(&additional_host_arg,
1059 _("host name provided both with %s and as an extra parameter"),
1060 runtime_options.conninfo_provided == true ? "host=" : "-h/--host");
1061 item_list_append(&cli_errors, additional_host_arg.data);
1062 }
1063 else
1064 {
1065 strncpy(runtime_options.host, argv[optind++], MAXLEN);
1066 param_set(&source_conninfo, "host", runtime_options.host);
1067 runtime_options.host_param_provided = true;
1068 }
1069 }
1070 }
1071
1072 if (optind < argc)
1073 {
1074 PQExpBufferData too_many_args;
1075
1076 initPQExpBuffer(&too_many_args);
1077 appendPQExpBuffer(&too_many_args, _("too many command-line arguments (first extra is \"%s\")"), argv[optind]);
1078 item_list_append(&cli_errors, too_many_args.data);
1079 }
1080
1081
1082 /*
1083 * The configuration file is not required for some actions (e.g. 'standby
1084 * clone'), however if available we'll parse it anyway for options like
1085 * 'log_level', 'use_replication_slots' etc.
1086 */
1087 load_config(runtime_options.config_file,
1088 runtime_options.verbose,
1089 runtime_options.terse,
1090 argv[0]);
1091
1092
1093 /*
1094 * Handle options which must be executed without a repmgr command
1095 */
1096 if (runtime_options.dump_config == true)
1097 {
1098 if (repmgr_command != NULL)
1099 {
1100 fprintf(stderr,
1101 _("--dump-config cannot be used in combination with a repmgr command"));
1102 exit(ERR_BAD_CONFIG);
1103 }
1104 dump_config();
1105 exit(SUCCESS);
1106 }
1107
1108
1109
1110 check_cli_parameters(action);
1111
1112 /*
1113 * Sanity checks for command line parameters completed by now; any further
1114 * errors will be runtime ones
1115 */
1116 if (cli_errors.head != NULL)
1117 {
1118 free_conninfo_params(&source_conninfo);
1119
1120 exit_with_cli_errors(&cli_errors, valid_repmgr_command_found == true ? repmgr_command : NULL);
1121 }
1122
1123 /* no errors detected by repmgr, but getopt might have */
1124 if (option_error_found == true)
1125 {
1126 if (valid_repmgr_command_found == true)
1127 {
1128 printf(_("Try \"%s --help\" or \"%s %s --help\" for more information.\n"),
1129 progname(),
1130 progname(),
1131 repmgr_command);
1132 }
1133 else
1134 {
1135 printf(_("Try \"repmgr --help\" for more information.\n"));
1136 }
1137
1138 free_conninfo_params(&source_conninfo);
1139 exit(ERR_BAD_CONFIG);
1140 }
1141
1142
1143 /*
1144 * Print any warnings about inappropriate command line options, unless
1145 * -t/--terse set
1146 */
1147 if (cli_warnings.head != NULL && runtime_options.terse == false)
1148 {
1149 log_warning(_("following problems with command line parameters detected:"));
1150 print_item_list(&cli_warnings);
1151 }
1152
1153 /*
1154 * post-processing following command line parameter checks
1155 * =======================================================
1156 */
1157
1158 if (runtime_options.csv == true)
1159 {
1160 runtime_options.output_mode = OM_CSV;
1161 }
1162 else if (runtime_options.nagios == true)
1163 {
1164 runtime_options.output_mode = OM_NAGIOS;
1165 }
1166 else if (runtime_options.optformat == true)
1167 {
1168 runtime_options.output_mode = OM_OPTFORMAT;
1169 }
1170
1171 /*
1172 * Check for configuration file items which can be overriden by runtime
1173 * options
1174 * =====================================================================
1175 */
1176
1177 /*
1178 * Command-line parameter -L/--log-level overrides any setting in config
1179 * file
1180 */
1181 if (*runtime_options.log_level != '\0')
1182 {
1183 strncpy(config_file_options.log_level, runtime_options.log_level, MAXLEN);
1184 }
1185
1186 /*
1187 * Initialise pg_bindir - command line parameter will override any setting
1188 * in the configuration file
1189 */
1190 if (!strlen(runtime_options.pg_bindir))
1191 {
1192 strncpy(runtime_options.pg_bindir, config_file_options.pg_bindir, MAXLEN);
1193 }
1194
1195 /* Add trailing slash */
1196 if (strlen(runtime_options.pg_bindir))
1197 {
1198 int len = strlen(runtime_options.pg_bindir);
1199
1200 if (runtime_options.pg_bindir[len - 1] != '/')
1201 {
1202 maxlen_snprintf(pg_bindir, "%s/", runtime_options.pg_bindir);
1203 }
1204 else
1205 {
1206 strncpy(pg_bindir, runtime_options.pg_bindir, MAXLEN);
1207 }
1208 }
1209
1210 /*
1211 * Initialize the logger. We've previously requested STDERR logging only
1212 * to ensure the repmgr command doesn't have its output diverted to a
1213 * logging facility (which usually doesn't make sense for a command line
1214 * program).
1215 *
1216 * If required (e.g. when calling repmgr from repmgrd), this behaviour can
1217 * be overridden with "--log-to-file".
1218 */
1219
1220 logger_init(&config_file_options, progname());
1221
1222 if (runtime_options.verbose)
1223 logger_set_verbose();
1224
1225 if (runtime_options.terse)
1226 logger_set_terse();
1227
1228 /*
1229 * If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
1230 * of what's in the configuration file or -L/--log-level parameter, otherwise
1231 * some or output might not be displayed.
1232 */
1233 if (runtime_options.dry_run == true)
1234 {
1235 logger_set_min_level(LOG_INFO);
1236 }
1237
1238 /*
1239 * If -q/--quiet supplied, suppress any non-ERROR log output.
1240 * This overrides everything else; we'll leave it up to the user to deal with the
1241 * consequences of e.g. running --dry-run together with -q/--quiet.
1242 */
1243 if (runtime_options.quiet == true)
1244 {
1245 logger_set_level(LOG_ERROR);
1246 }
1247
1248 /*
1249 * Node configuration information is not needed for all actions, with
1250 * STANDBY CLONE being the main exception.
1251 */
1252 if (config_file_required)
1253 {
1254 /*
1255 * if a configuration file was provided, the configuration file parser
1256 * will already have errored out if no valid node_id found
1257 */
1258 if (config_file_options.node_id == NODE_NOT_FOUND)
1259 {
1260 free_conninfo_params(&source_conninfo);
1261
1262 log_error(_("no node information was found - please supply a configuration file"));
1263 exit(ERR_BAD_CONFIG);
1264 }
1265 }
1266
1267 /*
1268 * If a node was specified (by --node-id or --node-name), check it exists
1269 * (and pre-populate a record for later use).
1270 *
1271 * At this point check_cli_parameters() will already have determined if
1272 * provision of these is valid for the action, otherwise it unsets them.
1273 *
1274 * We need to check this much later than other command line parameters as
1275 * we need to wait until the configuration file is parsed and we can
1276 * obtain the conninfo string.
1277 */
1278
1279 if (runtime_options.node_id != UNKNOWN_NODE_ID || runtime_options.node_name[0] != '\0')
1280 {
1281 PGconn *conn = NULL;
1282 RecordStatus record_status = RECORD_NOT_FOUND;
1283
1284 log_verbose(LOG_DEBUG, "connecting to local node to retrieve record for node specified with --node-id or --node-name");
1285
1286 if (strlen(config_file_options.conninfo))
1287 conn = establish_db_connection(config_file_options.conninfo, true);
1288 else
1289 conn = establish_db_connection_by_params(&source_conninfo, true);
1290
1291 if (runtime_options.node_id != UNKNOWN_NODE_ID)
1292 {
1293 record_status = get_node_record(conn, runtime_options.node_id, &target_node_info);
1294
1295 if (record_status != RECORD_FOUND)
1296 {
1297 log_error(_("node %i (specified with --node-id) not found"),
1298 runtime_options.node_id);
1299 PQfinish(conn);
1300 free_conninfo_params(&source_conninfo);
1301
1302 exit(ERR_BAD_CONFIG);
1303 }
1304 }
1305 else if (runtime_options.node_name[0] != '\0')
1306 {
1307 char *escaped = escape_string(conn, runtime_options.node_name);
1308
1309 if (escaped == NULL)
1310 {
1311 log_error(_("unable to escape value provided for --node-name"));
1312 PQfinish(conn);
1313 free_conninfo_params(&source_conninfo);
1314
1315 exit(ERR_BAD_CONFIG);
1316 }
1317
1318 record_status = get_node_record_by_name(conn, escaped, &target_node_info);
1319
1320 pfree(escaped);
1321 if (record_status != RECORD_FOUND)
1322 {
1323 log_error(_("node \"%s\" (specified with --node-name) not found"),
1324 runtime_options.node_name);
1325 PQfinish(conn);
1326 free_conninfo_params(&source_conninfo);
1327
1328 exit(ERR_BAD_CONFIG);
1329 }
1330 }
1331
1332 PQfinish(conn);
1333 }
1334
1335
1336 switch (action)
1337 {
1338 /* PRIMARY */
1339 case PRIMARY_REGISTER:
1340 do_primary_register();
1341 break;
1342 case PRIMARY_UNREGISTER:
1343 do_primary_unregister();
1344 break;
1345
1346 /* STANDBY */
1347 case STANDBY_CLONE:
1348 do_standby_clone();
1349 break;
1350 case STANDBY_REGISTER:
1351 do_standby_register();
1352 break;
1353 case STANDBY_UNREGISTER:
1354 do_standby_unregister();
1355 break;
1356 case STANDBY_PROMOTE:
1357 do_standby_promote();
1358 break;
1359 case STANDBY_FOLLOW:
1360 do_standby_follow();
1361 break;
1362 case STANDBY_SWITCHOVER:
1363 do_standby_switchover();
1364 break;
1365
1366 /* WITNESS */
1367 case WITNESS_REGISTER:
1368 do_witness_register();
1369 break;
1370 case WITNESS_UNREGISTER:
1371 do_witness_unregister();
1372 break;
1373
1374 /* NODE */
1375 case NODE_STATUS:
1376 do_node_status();
1377 break;
1378 case NODE_CHECK:
1379 do_node_check();
1380 break;
1381 case NODE_REJOIN:
1382 do_node_rejoin();
1383 break;
1384 case NODE_SERVICE:
1385 do_node_service();
1386 break;
1387 case NODE_CONTROL:
1388 do_node_control();
1389 break;
1390
1391 /* CLUSTER */
1392 case CLUSTER_SHOW:
1393 do_cluster_show();
1394 break;
1395 case CLUSTER_EVENT:
1396 do_cluster_event();
1397 break;
1398 case CLUSTER_CROSSCHECK:
1399 do_cluster_crosscheck();
1400 break;
1401 case CLUSTER_MATRIX:
1402 do_cluster_matrix();
1403 break;
1404 case CLUSTER_CLEANUP:
1405 do_cluster_cleanup();
1406 break;
1407
1408 /* SERVICE */
1409 case SERVICE_STATUS:
1410 do_service_status();
1411 break;
1412 case SERVICE_PAUSE:
1413 do_service_pause();
1414 break;
1415 case SERVICE_UNPAUSE:
1416 do_service_unpause();
1417 break;
1418
1419 /* DAEMON */
1420 case DAEMON_START:
1421 do_daemon_start();
1422 break;
1423 case DAEMON_STOP:
1424 do_daemon_stop();
1425 break;
1426
1427 default:
1428 /* An action will have been determined by this point */
1429 break;
1430 }
1431
1432 free_conninfo_params(&source_conninfo);
1433
1434 return SUCCESS;
1435 }
1436
1437
1438
1439 /*
1440 * Check for useless or conflicting parameters, and also whether a
1441 * configuration file is required.
1442 *
1443 * Messages will be added to the command line warning and error lists
1444 * as appropriate.
1445 */
1446
1447 static void
check_cli_parameters(const int action)1448 check_cli_parameters(const int action)
1449 {
1450 /*
1451 * ========================================================================
1452 * check all parameters required for an action are provided, and warn
1453 * about ineffective actions
1454 * ========================================================================
1455 */
1456 switch (action)
1457 {
1458 case PRIMARY_REGISTER:
1459 /* no required parameters */
1460 break;
1461 case STANDBY_CLONE:
1462 {
1463 standy_clone_mode mode = get_standby_clone_mode();
1464
1465 config_file_required = false;
1466
1467 if (mode == barman)
1468 {
1469 if (runtime_options.copy_external_config_files)
1470 {
1471 item_list_append(&cli_warnings,
1472 _("--copy-external-config-files ineffective in Barman mode"));
1473 }
1474
1475 if (runtime_options.fast_checkpoint)
1476 {
1477 item_list_append(&cli_warnings,
1478 _("-c/--fast-checkpoint has no effect in Barman mode"));
1479 }
1480
1481
1482 }
1483 else
1484 {
1485 if (!runtime_options.host_param_provided)
1486 {
1487 item_list_append_format(&cli_errors,
1488 _("host name for the source node must be provided with -h/--host when executing %s"),
1489 action_name(action));
1490 }
1491
1492 if (!runtime_options.connection_param_provided)
1493 {
1494 item_list_append_format(&cli_errors,
1495 _("database connection parameters for the source node must be provided when executing %s"),
1496 action_name(action));
1497 }
1498
1499 /*
1500 * If -D/--pgdata was provided, but config_file_options.pgdata
1501 * is set, warn that -D/--pgdata will be ignored.
1502 */
1503 if (runtime_options.data_dir[0] && config_file_options.data_directory[0])
1504 {
1505 item_list_append(&cli_warnings,
1506 _("-D/--pgdata will be ignored if a repmgr configuration file is provided"));
1507
1508 }
1509
1510 if (*runtime_options.upstream_conninfo)
1511 {
1512 if (*runtime_options.replication_user)
1513 {
1514 item_list_append(&cli_warnings,
1515 _("--replication-user ineffective when specifying --upstream-conninfo"));
1516 }
1517 }
1518
1519 if (runtime_options.no_upstream_connection == true)
1520 {
1521 item_list_append(&cli_warnings,
1522 _("--no-upstream-connection only effective in Barman mode"));
1523 }
1524 }
1525
1526 if (strlen(config_file_options.config_directory))
1527 {
1528 if (runtime_options.copy_external_config_files == false)
1529 {
1530 item_list_append(&cli_warnings,
1531 _("\"config_directory\" set in repmgr.conf, but --copy-external-config-files not provided"));
1532 }
1533 }
1534 }
1535 break;
1536
1537 case STANDBY_FOLLOW:
1538 {
1539 /*
1540 * if `repmgr standby follow` executed with host params,
1541 * ensure data directory was provided
1542 */
1543 }
1544 break;
1545 case WITNESS_REGISTER:
1546 {
1547 if (!runtime_options.host_param_provided)
1548 {
1549 item_list_append_format(&cli_errors,
1550 _("host name for the source node must be provided with -h/--host when executing %s"),
1551 action_name(action));
1552 }
1553 }
1554 break;
1555 case NODE_CHECK:
1556 if (runtime_options.has_passfile == true)
1557 {
1558 config_file_required = false;
1559 }
1560 break;
1561 case NODE_STATUS:
1562 if (runtime_options.node_id != UNKNOWN_NODE_ID)
1563 {
1564 item_list_append(
1565 &cli_warnings,
1566 "--node-id will be ignored; \"repmgr node status\" can only be executed on the local node");
1567 }
1568 if (runtime_options.node_name[0] != '\0')
1569 {
1570 item_list_append(
1571 &cli_warnings,
1572 "--node-name will be ignored; \"repmgr node status\" can only be executed on the local node");
1573 }
1574 break;
1575 case NODE_REJOIN:
1576 if (runtime_options.connection_param_provided == false)
1577 {
1578 item_list_append(
1579 &cli_errors,
1580 "database connection parameters for an available node must be provided when executing NODE REJOIN");
1581 }
1582 break;
1583 case CLUSTER_SHOW:
1584 case CLUSTER_MATRIX:
1585 case CLUSTER_CROSSCHECK:
1586 if (runtime_options.connection_param_provided)
1587 config_file_required = false;
1588 break;
1589 case CLUSTER_EVENT:
1590 /* no required parameters */
1591 break;
1592
1593 }
1594
1595 /*
1596 * ========================================================================
1597 * warn if parameters provided for an action where they're not relevant
1598 * ========================================================================
1599 */
1600
1601 /* --host etc. */
1602 if (runtime_options.connection_param_provided)
1603 {
1604 switch (action)
1605 {
1606 case STANDBY_CLONE:
1607 case STANDBY_FOLLOW:
1608 case STANDBY_REGISTER:
1609 case WITNESS_REGISTER:
1610 case WITNESS_UNREGISTER:
1611 case CLUSTER_SHOW:
1612 case CLUSTER_MATRIX:
1613 case CLUSTER_CROSSCHECK:
1614 case NODE_REJOIN:
1615 break;
1616 default:
1617 item_list_append_format(&cli_warnings,
1618 _("database connection parameters not required when executing %s"),
1619 action_name(action));
1620 }
1621 }
1622
1623 /* -D/--pgdata */
1624 if (runtime_options.data_dir[0])
1625 {
1626 switch (action)
1627 {
1628 case STANDBY_CLONE:
1629 case STANDBY_FOLLOW:
1630 case NODE_SERVICE:
1631 break;
1632 default:
1633 item_list_append_format(&cli_warnings,
1634 _("-D/--pgdata not required when executing %s"),
1635 action_name(action));
1636 }
1637 }
1638
1639 /*
1640 * --node-id
1641 *
1642 * NOTE: overrides --node-name, if present
1643 */
1644 if (runtime_options.node_id != UNKNOWN_NODE_ID)
1645 {
1646 switch (action)
1647 {
1648 case PRIMARY_UNREGISTER:
1649 case STANDBY_UNREGISTER:
1650 case WITNESS_UNREGISTER:
1651 case CLUSTER_CLEANUP:
1652 case CLUSTER_EVENT:
1653 case CLUSTER_MATRIX:
1654 case CLUSTER_CROSSCHECK:
1655 break;
1656 default:
1657 item_list_append_format(&cli_warnings,
1658 _("--node-id not required when executing %s"),
1659 action_name(action));
1660 runtime_options.node_id = UNKNOWN_NODE_ID;
1661 }
1662 }
1663
1664 if (runtime_options.node_name[0])
1665 {
1666 switch (action)
1667 {
1668 case STANDBY_UNREGISTER:
1669 case CLUSTER_EVENT:
1670 if (runtime_options.node_id != UNKNOWN_NODE_ID)
1671 {
1672 item_list_append(&cli_warnings,
1673 _("--node-id provided, ignoring --node-name"));
1674 memset(runtime_options.node_name, 0, sizeof(runtime_options.node_name));
1675 }
1676 break;
1677 default:
1678 item_list_append_format(&cli_warnings,
1679 _("--node-name not required when executing %s"),
1680 action_name(action));
1681 memset(runtime_options.node_name, 0, sizeof(runtime_options.node_name));
1682 }
1683 }
1684
1685 if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
1686 {
1687 switch (action)
1688 {
1689 case STANDBY_CLONE:
1690 case STANDBY_REGISTER:
1691 case STANDBY_FOLLOW:
1692 break;
1693 default:
1694 item_list_append_format(&cli_warnings,
1695 _("--upstream-node-id will be ignored when executing %s"),
1696 action_name(action));
1697 }
1698 }
1699
1700 if (runtime_options.replication_user[0])
1701 {
1702 switch (action)
1703 {
1704 case PRIMARY_REGISTER:
1705 case STANDBY_REGISTER:
1706 case STANDBY_CLONE:
1707 break;
1708
1709 case STANDBY_FOLLOW:
1710 item_list_append_format(&cli_warnings,
1711 _("--replication-user ignored when executing %s"),
1712 action_name(action));
1713 break;
1714
1715 default:
1716 item_list_append_format(&cli_warnings,
1717 _("--replication-user not required when executing %s"),
1718 action_name(action));
1719 }
1720 }
1721
1722 if (runtime_options.superuser[0])
1723 {
1724 switch (action)
1725 {
1726 case STANDBY_CLONE:
1727 case STANDBY_SWITCHOVER:
1728 case NODE_CHECK:
1729 case NODE_SERVICE:
1730 break;
1731 default:
1732 item_list_append_format(&cli_warnings,
1733 _("--superuser ignored when executing %s"),
1734 action_name(action));
1735 }
1736 }
1737
1738
1739 if (runtime_options.replication_conf_only == true)
1740 {
1741 switch (action)
1742 {
1743 case STANDBY_CLONE:
1744 break;
1745 default:
1746 item_list_append_format(&cli_warnings,
1747 _("--create-recovery-conf will be ignored when executing %s"),
1748 action_name(action));
1749 }
1750 }
1751
1752 if (runtime_options.event[0])
1753 {
1754 switch (action)
1755 {
1756 case CLUSTER_EVENT:
1757 break;
1758 default:
1759 item_list_append_format(&cli_warnings,
1760 _("--event not required when executing %s"),
1761 action_name(action));
1762 }
1763 }
1764
1765 if (runtime_options.limit_provided)
1766 {
1767 switch (action)
1768 {
1769 case CLUSTER_EVENT:
1770 break;
1771 default:
1772 item_list_append_format(&cli_warnings,
1773 _("--limit not required when executing %s"),
1774 action_name(action));
1775 }
1776 }
1777
1778 if (runtime_options.all)
1779 {
1780 switch (action)
1781 {
1782 case CLUSTER_EVENT:
1783 if (runtime_options.limit_provided == true)
1784 {
1785 runtime_options.all = false;
1786 item_list_append(&cli_warnings,
1787 _("--limit provided, ignoring --all"));
1788 }
1789 break;
1790 default:
1791 item_list_append_format(&cli_warnings,
1792 _("--all not required when executing %s"),
1793 action_name(action));
1794 }
1795 }
1796
1797 /* --wait/--no-wait */
1798
1799 if (runtime_options.wait_provided == true && runtime_options.no_wait == true)
1800 {
1801 item_list_append_format(&cli_errors,
1802 _("both --wait and --no-wait options provided"));
1803 }
1804 else
1805 {
1806 if (runtime_options.wait_provided)
1807 {
1808 switch (action)
1809 {
1810 case DAEMON_START:
1811 case DAEMON_STOP:
1812 case STANDBY_FOLLOW:
1813 break;
1814 default:
1815 item_list_append_format(&cli_warnings,
1816 _("--wait will be ignored when executing %s"),
1817 action_name(action));
1818 }
1819 }
1820 else if (runtime_options.no_wait)
1821 {
1822 switch (action)
1823 {
1824 case DAEMON_START:
1825 case DAEMON_STOP:
1826 case NODE_REJOIN:
1827 break;
1828 default:
1829 item_list_append_format(&cli_warnings,
1830 _("--no-wait will be ignored when executing %s"),
1831 action_name(action));
1832 }
1833 }
1834 }
1835
1836 /* repmgr node service --action */
1837 if (runtime_options.action[0] != '\0')
1838 {
1839 switch (action)
1840 {
1841 case NODE_SERVICE:
1842 break;
1843 default:
1844 item_list_append_format(&cli_warnings,
1845 _("--action will be ignored when executing %s"),
1846 action_name(action));
1847 }
1848 }
1849
1850 /* repmgr node status --is-shutdown-cleanly */
1851 if (runtime_options.is_shutdown_cleanly == true)
1852 {
1853 switch (action)
1854 {
1855 case NODE_STATUS:
1856 break;
1857 default:
1858 item_list_append_format(&cli_warnings,
1859 _("--is-shutdown-cleanly will be ignored when executing %s"),
1860 action_name(action));
1861 }
1862 }
1863
1864 if (runtime_options.always_promote == true)
1865 {
1866 switch (action)
1867 {
1868 case STANDBY_SWITCHOVER:
1869 break;
1870 default:
1871 item_list_append_format(&cli_warnings,
1872 _("--always-promote will be ignored when executing %s"),
1873 action_name(action));
1874 }
1875 }
1876
1877 if (runtime_options.force_rewind_used == true)
1878 {
1879 switch (action)
1880 {
1881 case STANDBY_SWITCHOVER:
1882 case NODE_REJOIN:
1883 break;
1884 default:
1885 item_list_append_format(&cli_warnings,
1886 _("--force-rewind will be ignored when executing %s"),
1887 action_name(action));
1888 }
1889 }
1890
1891 if (runtime_options.repmgrd_no_pause == true)
1892 {
1893 switch (action)
1894 {
1895 case STANDBY_SWITCHOVER:
1896 break;
1897 default:
1898 item_list_append_format(&cli_warnings,
1899 _("--repmgrd-no-pause will be ignored when executing %s"),
1900 action_name(action));
1901 }
1902 }
1903
1904 if (runtime_options.repmgrd_force_unpause == true)
1905 {
1906 switch (action)
1907 {
1908 case STANDBY_SWITCHOVER:
1909 if (runtime_options.repmgrd_no_pause == true)
1910 item_list_append(&cli_errors,
1911 _("--repmgrd-force-unpause and --repmgrd-no-pause cannot be used together"));
1912 break;
1913 default:
1914 item_list_append_format(&cli_warnings,
1915 _("--repmgrd-force-unpause will be ignored when executing %s"),
1916 action_name(action));
1917 }
1918 }
1919
1920 if (runtime_options.config_files[0] != '\0')
1921 {
1922 switch (action)
1923 {
1924 case NODE_REJOIN:
1925 break;
1926 default:
1927 item_list_append_format(&cli_warnings,
1928 _("--config-files will be ignored when executing %s"),
1929 action_name(action));
1930 }
1931 }
1932
1933 if (runtime_options.dry_run == true)
1934 {
1935 switch (action)
1936 {
1937 case PRIMARY_REGISTER:
1938 case PRIMARY_UNREGISTER:
1939 case STANDBY_CLONE:
1940 case STANDBY_REGISTER:
1941 case STANDBY_FOLLOW:
1942 case STANDBY_SWITCHOVER:
1943 case STANDBY_PROMOTE:
1944 case WITNESS_REGISTER:
1945 case WITNESS_UNREGISTER:
1946 case NODE_REJOIN:
1947 case NODE_SERVICE:
1948 case SERVICE_PAUSE:
1949 case SERVICE_UNPAUSE:
1950 case SERVICE_STATUS:
1951 case DAEMON_START:
1952 case DAEMON_STOP:
1953 break;
1954 default:
1955 item_list_append_format(&cli_warnings,
1956 _("--dry-run is not effective when executing %s"),
1957 action_name(action));
1958 }
1959 }
1960
1961 /* check only one of --csv, --nagios and --optformat used */
1962 {
1963 int used_options = 0;
1964
1965 if (runtime_options.csv == true)
1966 used_options++;
1967
1968 if (runtime_options.nagios == true)
1969 used_options++;
1970
1971 if (runtime_options.optformat == true)
1972 used_options++;
1973
1974 if (used_options > 1)
1975 {
1976 /* TODO: list which options were used */
1977 item_list_append(&cli_errors,
1978 "only one of --csv, --nagios and --optformat can be used");
1979 }
1980 }
1981
1982 /* --compact */
1983 if (runtime_options.compact == true)
1984 {
1985 switch (action)
1986 {
1987 case CLUSTER_SHOW:
1988 case CLUSTER_EVENT:
1989 case SERVICE_STATUS:
1990 break;
1991 default:
1992 item_list_append_format(&cli_warnings,
1993 _("--compact is not effective when executing %s"),
1994 action_name(action));
1995 }
1996 }
1997
1998 /* --detail */
1999 if (runtime_options.detail == true)
2000 {
2001 switch (action)
2002 {
2003 case SERVICE_STATUS:
2004 break;
2005 default:
2006 item_list_append_format(&cli_warnings,
2007 _("--detail is not effective when executing %s"),
2008 action_name(action));
2009 }
2010 }
2011
2012 /* --siblings-follow */
2013 if (runtime_options.siblings_follow == true)
2014 {
2015 switch (action)
2016 {
2017 case STANDBY_PROMOTE:
2018 case STANDBY_SWITCHOVER:
2019 break;
2020 default:
2021 item_list_append_format(&cli_warnings,
2022 _("----siblings-follow is not effective when executing %s"),
2023 action_name(action));
2024 }
2025 }
2026
2027 /* --disable-wal-receiver / --enable-wal-receiver */
2028 if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true)
2029 {
2030 switch (action)
2031 {
2032 case NODE_CONTROL:
2033 {
2034 if (runtime_options.disable_wal_receiver == true && runtime_options.enable_wal_receiver == true)
2035 {
2036 item_list_append(&cli_errors,
2037 _("provide either --disable-wal-receiver or --enable-wal-receiver"));
2038 }
2039 }
2040 break;
2041 default:
2042 item_list_append_format(&cli_warnings,
2043 _("--disable-wal-receiver / --enable-wal-receiver not effective when executing %s"),
2044 action_name(action));
2045 }
2046 }
2047
2048 }
2049
2050
2051 /*
2052 * Generate formatted node status output for display by "cluster show" and
2053 * "service status".
2054 */
2055 bool
format_node_status(t_node_info * node_info,PQExpBufferData * node_status,PQExpBufferData * upstream,ItemList * warnings)2056 format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBufferData *upstream, ItemList *warnings)
2057 {
2058 bool error_found = false;
2059 t_node_info remote_node_rec = T_NODE_INFO_INITIALIZER;
2060 RecordStatus remote_node_rec_found = RECORD_NOT_FOUND;
2061
2062 if (PQstatus(node_info->conn) == CONNECTION_OK)
2063 {
2064 node_info->node_status = NODE_STATUS_UP;
2065 node_info->recovery_type = get_recovery_type(node_info->conn);
2066 /* get node's copy of its record so we can see what it thinks its status is */
2067 remote_node_rec_found = get_node_record_with_upstream(node_info->conn, node_info->node_id, &remote_node_rec);
2068 }
2069 else
2070 {
2071 /* check if node is reachable, but just not letting us in */
2072 if (is_server_available_quiet(node_info->conninfo))
2073 node_info->node_status = NODE_STATUS_REJECTED;
2074 else
2075 node_info->node_status = NODE_STATUS_DOWN;
2076
2077 node_info->recovery_type = RECTYPE_UNKNOWN;
2078 }
2079
2080 /* format node status info */
2081 switch (node_info->type)
2082 {
2083 case PRIMARY:
2084 {
2085 /* node is reachable */
2086 if (node_info->node_status == NODE_STATUS_UP)
2087 {
2088 if (node_info->active == true)
2089 {
2090 switch (node_info->recovery_type)
2091 {
2092 case RECTYPE_PRIMARY:
2093 appendPQExpBufferStr(node_status, "* running");
2094 break;
2095 case RECTYPE_STANDBY:
2096 appendPQExpBufferStr(node_status, "! running as standby");
2097 item_list_append_format(warnings,
2098 "node \"%s\" (ID: %i) is registered as primary but running as standby",
2099 node_info->node_name, node_info->node_id);
2100 break;
2101 case RECTYPE_UNKNOWN:
2102 appendPQExpBufferStr(node_status, "! unknown");
2103 item_list_append_format(warnings,
2104 "node \"%s\" (ID: %i) has unknown replication status",
2105 node_info->node_name, node_info->node_id);
2106 break;
2107 }
2108 }
2109 else
2110 {
2111 if (node_info->recovery_type == RECTYPE_PRIMARY)
2112 {
2113 appendPQExpBufferStr(node_status, "! running");
2114 item_list_append_format(warnings,
2115 "node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
2116 node_info->node_name, node_info->node_id);
2117 }
2118 else
2119 {
2120 appendPQExpBufferStr(node_status, "! running as standby");
2121 item_list_append_format(warnings,
2122 "node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
2123 node_info->node_name, node_info->node_id);
2124 }
2125 }
2126 }
2127 /* node is up but cannot connect */
2128 else if (node_info->node_status == NODE_STATUS_REJECTED)
2129 {
2130 if (node_info->active == true)
2131 {
2132 appendPQExpBufferStr(node_status, "? running");
2133 }
2134 else
2135 {
2136 appendPQExpBufferStr(node_status, "! running");
2137 error_found = true;
2138 }
2139 }
2140 /* node is unreachable */
2141 else
2142 {
2143 /* node is unreachable but marked active */
2144 if (node_info->active == true)
2145 {
2146 appendPQExpBufferStr(node_status, "? unreachable");
2147 item_list_append_format(warnings,
2148 "node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
2149 node_info->node_name, node_info->node_id);
2150 }
2151 /* node is unreachable and marked as inactive */
2152 else
2153 {
2154 appendPQExpBufferStr(node_status, "- failed");
2155 error_found = true;
2156 }
2157 }
2158 }
2159 break;
2160 case STANDBY:
2161 {
2162 /* node is reachable */
2163 if (node_info->node_status == NODE_STATUS_UP)
2164 {
2165 if (node_info->active == true)
2166 {
2167 switch (node_info->recovery_type)
2168 {
2169 case RECTYPE_STANDBY:
2170 appendPQExpBufferStr(node_status, " running");
2171 break;
2172 case RECTYPE_PRIMARY:
2173 appendPQExpBufferStr(node_status, "! running as primary");
2174 item_list_append_format(warnings,
2175 "node \"%s\" (ID: %i) is registered as standby but running as primary",
2176 node_info->node_name, node_info->node_id);
2177 break;
2178 case RECTYPE_UNKNOWN:
2179 appendPQExpBufferStr(node_status, "! unknown");
2180 item_list_append_format(
2181 warnings,
2182 "node \"%s\" (ID: %i) has unknown replication status",
2183 node_info->node_name, node_info->node_id);
2184 break;
2185 }
2186 }
2187 else
2188 {
2189 if (node_info->recovery_type == RECTYPE_STANDBY)
2190 {
2191 appendPQExpBufferStr(node_status, "! running");
2192 item_list_append_format(warnings,
2193 "node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
2194 node_info->node_name, node_info->node_id);
2195 }
2196 else
2197 {
2198 appendPQExpBufferStr(node_status, "! running as primary");
2199 item_list_append_format(warnings,
2200 "node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
2201 node_info->node_name, node_info->node_id);
2202 }
2203 }
2204
2205 /* warn about issue with paused WAL replay */
2206 if (is_wal_replay_paused(node_info->conn, true))
2207 {
2208 item_list_append_format(warnings,
2209 _("WAL replay is paused on node \"%s\" (ID: %i) with WAL replay pending; this node cannot be manually promoted until WAL replay is resumed"),
2210 node_info->node_name, node_info->node_id);
2211 }
2212 }
2213 /* node is up but cannot connect */
2214 else if (node_info->node_status == NODE_STATUS_REJECTED)
2215 {
2216 if (node_info->active == true)
2217 {
2218 appendPQExpBufferStr(node_status, "? running");
2219 }
2220 else
2221 {
2222 appendPQExpBufferStr(node_status, "! running");
2223 error_found = true;
2224 }
2225 }
2226 /* node is unreachable */
2227 else
2228 {
2229 /* node is unreachable but marked active */
2230 if (node_info->active == true)
2231 {
2232 appendPQExpBufferStr(node_status, "? unreachable");
2233 item_list_append_format(warnings,
2234 "node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
2235 node_info->node_name, node_info->node_id);
2236 }
2237 else
2238 {
2239 appendPQExpBufferStr(node_status, "- failed");
2240 error_found = true;
2241 }
2242 }
2243 }
2244
2245 break;
2246 case WITNESS:
2247 {
2248 /* node is reachable */
2249 if (node_info->node_status == NODE_STATUS_UP)
2250 {
2251 if (node_info->active == true)
2252 {
2253 appendPQExpBufferStr(node_status, "* running");
2254 }
2255 else
2256 {
2257 appendPQExpBufferStr(node_status, "! running");
2258 error_found = true;
2259 }
2260 }
2261 /* node is up but cannot connect */
2262 else if (node_info->node_status == NODE_STATUS_REJECTED)
2263 {
2264 if (node_info->active == true)
2265 {
2266 appendPQExpBufferStr(node_status, "? rejected");
2267 }
2268 else
2269 {
2270 appendPQExpBufferStr(node_status, "! failed");
2271 error_found = true;
2272 }
2273 }
2274 /* node is unreachable */
2275 else
2276 {
2277 if (node_info->active == true)
2278 {
2279 appendPQExpBufferStr(node_status, "? unreachable");
2280 }
2281 else
2282 {
2283 appendPQExpBufferStr(node_status, "- failed");
2284 error_found = true;
2285 }
2286 }
2287 }
2288 break;
2289 case UNKNOWN:
2290 {
2291 /* this should never happen */
2292 appendPQExpBufferStr(node_status, "? unknown node type");
2293 error_found = true;
2294 }
2295 break;
2296 }
2297
2298 /* format node upstream info */
2299
2300 if (remote_node_rec_found == RECORD_NOT_FOUND)
2301 {
2302 /*
2303 * Unable to retrieve the node's copy of its own record - copy the
2304 * name from our own copy of the record
2305 */
2306 appendPQExpBuffer(upstream,
2307 "? %s",
2308 node_info->upstream_node_name);
2309 }
2310 else if (remote_node_rec.type == WITNESS)
2311 {
2312 /* no upstream - unlikely to happen */
2313 if (remote_node_rec.upstream_node_id == NO_UPSTREAM_NODE)
2314 {
2315 appendPQExpBufferStr(upstream, "! ");
2316 item_list_append_format(warnings,
2317 "node \"%s\" (ID: %i) is a witness but reports it has no upstream node",
2318 node_info->node_name,
2319 node_info->node_id);
2320 }
2321 /* mismatch between reported upstream and upstream in local node's metadata */
2322 else if (node_info->upstream_node_id != remote_node_rec.upstream_node_id)
2323 {
2324 appendPQExpBufferStr(upstream, "! ");
2325
2326 if (node_info->upstream_node_id != remote_node_rec.upstream_node_id)
2327 {
2328 item_list_append_format(warnings,
2329 "node \"%s\" (ID: %i) reports a different upstream (reported: \"%s\", expected \"%s\")",
2330 node_info->node_name,
2331 node_info->node_id,
2332 remote_node_rec.upstream_node_name,
2333 node_info->upstream_node_name);
2334 }
2335 }
2336 else
2337 {
2338 t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
2339 RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
2340 node_info->upstream_node_id,
2341 &upstream_node_rec);
2342
2343 if (upstream_node_rec_found != RECORD_FOUND)
2344 {
2345 appendPQExpBufferStr(upstream, "? ");
2346 item_list_append_format(warnings,
2347 "unable to find record for upstream node ID %i",
2348 node_info->upstream_node_id);
2349
2350 }
2351 else
2352 {
2353 PGconn *upstream_conn = establish_db_connection_quiet(upstream_node_rec.conninfo);
2354
2355 if (PQstatus(upstream_conn) != CONNECTION_OK)
2356 {
2357 appendPQExpBufferStr(upstream, "? ");
2358 item_list_append_format(warnings,
2359 "unable to connect to node \"%s\" (ID: %i)'s upstream node \"%s\" (ID: %i)",
2360 node_info->node_name,
2361 node_info->node_id,
2362 upstream_node_rec.node_name,
2363 upstream_node_rec.node_id);
2364 }
2365
2366 PQfinish(upstream_conn);
2367 }
2368 }
2369
2370 appendPQExpBufferStr(upstream,
2371 remote_node_rec.upstream_node_name);
2372
2373 }
2374 else if (remote_node_rec.type == STANDBY)
2375 {
2376 if (node_info->upstream_node_id != NO_UPSTREAM_NODE && node_info->upstream_node_id == remote_node_rec.upstream_node_id)
2377 {
2378 /*
2379 * expected and reported upstreams match - check if node is actually
2380 * connected to the upstream
2381 */
2382 NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
2383 char *replication_state = NULL;
2384 t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
2385 RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
2386 node_info->upstream_node_id,
2387 &upstream_node_rec);
2388
2389 if (upstream_node_rec_found != RECORD_FOUND)
2390 {
2391 item_list_append_format(warnings,
2392 "unable to find record for upstream node ID %i",
2393 node_info->upstream_node_id);
2394
2395 }
2396 else
2397 {
2398 PGconn *upstream_conn = establish_db_connection_quiet(upstream_node_rec.conninfo);
2399
2400 if (PQstatus(upstream_conn) != CONNECTION_OK)
2401 {
2402 item_list_append_format(warnings,
2403 "unable to connect to node \"%s\" (ID: %i)'s upstream node \"%s\" (ID: %i)",
2404 node_info->node_name,
2405 node_info->node_id,
2406 upstream_node_rec.node_name,
2407 upstream_node_rec.node_id);
2408 }
2409 else
2410 {
2411 attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name, &replication_state);
2412 }
2413
2414 PQfinish(upstream_conn);
2415 }
2416
2417 if (attached_to_upstream == NODE_ATTACHED_UNKNOWN)
2418 {
2419 appendPQExpBufferStr(upstream, "? ");
2420 item_list_append_format(warnings,
2421 "unable to determine if node \"%s\" (ID: %i) is attached to its upstream node \"%s\" (ID: %i)",
2422 node_info->node_name,
2423 node_info->node_id,
2424 upstream_node_rec.node_name,
2425 upstream_node_rec.node_id);
2426 }
2427 if (attached_to_upstream == NODE_NOT_ATTACHED)
2428 {
2429 appendPQExpBufferStr(upstream, "? ");
2430 item_list_append_format(warnings,
2431 "node \"%s\" (ID: %i) attached to its upstream node \"%s\" (ID: %i) in state \"%s\"",
2432 node_info->node_name,
2433 node_info->node_id,
2434 upstream_node_rec.node_name,
2435 upstream_node_rec.node_id,
2436 replication_state);
2437 }
2438
2439 else if (attached_to_upstream == NODE_DETACHED)
2440 {
2441 appendPQExpBufferStr(upstream, "! ");
2442 item_list_append_format(warnings,
2443 "node \"%s\" (ID: %i) is not attached to its upstream node \"%s\" (ID: %i)",
2444 node_info->node_name,
2445 node_info->node_id,
2446 upstream_node_rec.node_name,
2447 upstream_node_rec.node_id);
2448 }
2449 appendPQExpBufferStr(upstream,
2450 node_info->upstream_node_name);
2451
2452 }
2453 else
2454 {
2455 if (node_info->upstream_node_id != NO_UPSTREAM_NODE && remote_node_rec.upstream_node_id == NO_UPSTREAM_NODE)
2456 {
2457 appendPQExpBufferChar(upstream, '!');
2458 item_list_append_format(warnings,
2459 "node \"%s\" (ID: %i) reports it has no upstream (expected: \"%s\")",
2460 node_info->node_name,
2461 node_info->node_id,
2462 node_info->upstream_node_name);
2463 }
2464 else if (node_info->upstream_node_id != NO_UPSTREAM_NODE && remote_node_rec.upstream_node_id != NO_UPSTREAM_NODE)
2465
2466 {
2467 appendPQExpBuffer(upstream,
2468 "! %s", remote_node_rec.upstream_node_name);
2469 item_list_append_format(warnings,
2470 "node \"%s\" (ID: %i) reports a different upstream (reported: \"%s\", expected \"%s\")",
2471 node_info->node_name,
2472 node_info->node_id,
2473 remote_node_rec.upstream_node_name,
2474 node_info->upstream_node_name);
2475 }
2476 }
2477 }
2478
2479 return error_found;
2480 }
2481
2482
2483 static const char *
action_name(const int action)2484 action_name(const int action)
2485 {
2486 switch (action)
2487 {
2488 case PRIMARY_REGISTER:
2489 return "PRIMARY REGISTER";
2490 case PRIMARY_UNREGISTER:
2491 return "PRIMARY UNREGISTER";
2492
2493 case STANDBY_CLONE:
2494 return "STANDBY CLONE";
2495 case STANDBY_REGISTER:
2496 return "STANDBY REGISTER";
2497 case STANDBY_UNREGISTER:
2498 return "STANDBY UNREGISTER";
2499 case STANDBY_PROMOTE:
2500 return "STANDBY PROMOTE";
2501 case STANDBY_FOLLOW:
2502 return "STANDBY FOLLOW";
2503 case STANDBY_SWITCHOVER:
2504 return "STANDBY SWITCHOVER";
2505
2506 case WITNESS_REGISTER:
2507 return "WITNESS REGISTER";
2508 case WITNESS_UNREGISTER:
2509 return "WITNESS UNREGISTER";
2510
2511 case NODE_STATUS:
2512 return "NODE STATUS";
2513 case NODE_CHECK:
2514 return "NODE CHECK";
2515 case NODE_REJOIN:
2516 return "NODE REJOIN";
2517 case NODE_SERVICE:
2518 return "NODE SERVICE";
2519 case NODE_CONTROL:
2520 return "NODE CONTROL";
2521
2522 case CLUSTER_SHOW:
2523 return "CLUSTER SHOW";
2524 case CLUSTER_CLEANUP:
2525 return "CLUSTER CLEANUP";
2526 case CLUSTER_EVENT:
2527 return "CLUSTER EVENT";
2528 case CLUSTER_MATRIX:
2529 return "CLUSTER MATRIX";
2530 case CLUSTER_CROSSCHECK:
2531 return "CLUSTER CROSSCHECK";
2532
2533 case SERVICE_STATUS:
2534 return "SERVICE STATUS";
2535 case SERVICE_PAUSE:
2536 return "SERVICE PAUSE";
2537 case SERVICE_UNPAUSE:
2538 return "SERVICE UNPAUSE";
2539
2540 case DAEMON_START:
2541 return "DAEMON START";
2542 case DAEMON_STOP:
2543 return "DAEMON STOP";
2544 }
2545
2546 return "UNKNOWN ACTION";
2547 }
2548
2549
2550 void
print_error_list(ItemList * error_list,int log_level)2551 print_error_list(ItemList *error_list, int log_level)
2552 {
2553 ItemListCell *cell = NULL;
2554
2555 for (cell = error_list->head; cell; cell = cell->next)
2556 {
2557 switch (log_level)
2558 {
2559 /* Currently we only need errors and warnings */
2560 case LOG_ERROR:
2561 log_error("%s", cell->string);
2562 break;
2563 case LOG_WARNING:
2564 log_warning("%s", cell->string);
2565 break;
2566 }
2567 }
2568 }
2569
2570
2571 void
print_status_header(int cols,ColHeader * headers)2572 print_status_header(int cols, ColHeader *headers)
2573 {
2574 int i, di;
2575 int max_cols = 0;
2576
2577
2578 /* count how many columns we actually need to display */
2579 for (i = 0; i < cols; i++)
2580 {
2581 if (headers[i].display == true)
2582 max_cols ++;
2583 }
2584
2585 for (i = 0; i < cols; i++)
2586 {
2587 if (headers[i].display == false)
2588 continue;
2589
2590 if (i == 0)
2591 printf(" ");
2592 else
2593 printf(" | ");
2594
2595 printf("%-*s",
2596 headers[i].max_length,
2597 headers[i].title);
2598 }
2599
2600
2601 printf("\n");
2602 printf("-");
2603
2604 di = 0;
2605 for (i = 0; i < cols; i++)
2606 {
2607 int j;
2608
2609 if (headers[i].display == false)
2610 continue;
2611
2612 for (j = 0; j < headers[i].max_length; j++)
2613 printf("-");
2614
2615 if (di < (max_cols - 1))
2616 printf("-+-");
2617 else
2618 printf("-");
2619 di++;
2620 }
2621
2622 printf("\n");
2623 }
2624
2625
2626 void
print_help_header(void)2627 print_help_header(void)
2628 {
2629 printf(_("%s: replication management tool for PostgreSQL\n"), progname());
2630 puts("");
2631
2632 /* add a big friendly warning if root is executing "repmgr --help" */
2633 if (geteuid() == 0)
2634 {
2635 printf(_(" **************************************************\n"));
2636 printf(_(" *** repmgr must be executed by a non-superuser ***\n"));
2637 printf(_(" **************************************************\n"));
2638 puts("");
2639 }
2640 }
2641
2642 static void
do_help(void)2643 do_help(void)
2644 {
2645 print_help_header();
2646
2647 printf(_("Usage:\n"));
2648 printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
2649 printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
2650 printf(_(" %s [OPTIONS] node {status|check|rejoin|service}\n"), progname());
2651 printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
2652 printf(_(" %s [OPTIONS] witness {register|unregister}\n"), progname());
2653 printf(_(" %s [OPTIONS] service {status|pause|unpause}\n"), progname());
2654 printf(_(" %s [OPTIONS] daemon {start|stop}\n"), progname());
2655
2656 puts("");
2657
2658 printf(_(" Execute \"%s {primary|standby|node|cluster|witness|service} --help\" to see command-specific options\n"), progname());
2659
2660 puts("");
2661
2662 printf(_("General options:\n"));
2663 printf(_(" -?, --help show this help, then exit\n"));
2664 printf(_(" -V, --version output version information, then exit\n"));
2665 printf(_(" --version-number output version number, then exit\n"));
2666 puts("");
2667
2668 printf(_("General configuration options:\n"));
2669 printf(_(" -b, --pg_bindir=PATH path to PostgreSQL binaries (optional)\n"));
2670 printf(_(" -f, --config-file=PATH path to the repmgr configuration file\n"));
2671 printf(_(" -F, --force force potentially dangerous operations to happen\n"));
2672 puts("");
2673
2674 printf(_("Database connection options:\n"));
2675 printf(_(" -d, --dbname=DBNAME database to connect to (default: "));
2676 if (runtime_options.dbname[0] != '\0')
2677 printf(_("\"%s\")\n"), runtime_options.dbname);
2678 else
2679 printf(_("\"%s\")\n"), runtime_options.username);
2680
2681 printf(_(" -h, --host=HOSTNAME database server host"));
2682 if (runtime_options.host[0] != '\0')
2683 printf(_(" (default: \"%s\")"), runtime_options.host);
2684 printf(_("\n"));
2685
2686 printf(_(" -p, --port=PORT database server port (default: \"%s\")\n"), runtime_options.port);
2687 printf(_(" -U, --username=USERNAME database user name to connect as (default: \"%s\")\n"), runtime_options.username);
2688
2689 puts("");
2690
2691 printf(_("Node-specific options:\n"));
2692 printf(_(" -D, --pgdata=DIR location of the node's data directory \n"));
2693 printf(_(" --node-id specify a node by id (only available for some operations)\n"));
2694 printf(_(" --node-name specify a node by name (only available for some operations)\n"));
2695
2696 puts("");
2697
2698 printf(_("Logging options:\n"));
2699 printf(_(" --dry-run show what would happen for action, but don't execute it\n"));
2700 printf(_(" -L, --log-level set log level (overrides configuration file; default: NOTICE)\n"));
2701 printf(_(" --log-to-file log to file (or logging facility) defined in repmgr.conf\n"));
2702 printf(_(" -q, --quiet suppress all log output apart from errors\n"));
2703 printf(_(" -t, --terse don't display detail, hints and other non-critical output\n"));
2704 printf(_(" -v, --verbose display additional log output (useful for debugging)\n"));
2705
2706 puts("");
2707
2708 printf(_("%s home page: <%s>\n"), "repmgr", REPMGR_URL);
2709 }
2710
2711
2712 /*
2713 * Create the repmgr extension, and grant access for the repmgr
2714 * user if not a superuser.
2715 *
2716 * Note:
2717 * This is one of two places where superuser rights are required.
2718 * We should also consider possible scenarious where a non-superuser
2719 * has sufficient privileges to install the extension.
2720 */
2721
2722 bool
create_repmgr_extension(PGconn * conn)2723 create_repmgr_extension(PGconn *conn)
2724 {
2725 PQExpBufferData query;
2726 PGresult *res;
2727
2728 ExtensionStatus extension_status = REPMGR_UNKNOWN;
2729
2730 t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
2731 bool is_superuser = false;
2732 PGconn *superuser_conn = NULL;
2733 PGconn *schema_create_conn = NULL;
2734 t_extension_versions extversions = T_EXTENSION_VERSIONS_INITIALIZER;
2735
2736 extension_status = get_repmgr_extension_status(conn, &extversions);
2737
2738 switch (extension_status)
2739 {
2740 case REPMGR_UNKNOWN:
2741 log_error(_("unable to determine status of \"repmgr\" extension"));
2742 return false;
2743
2744 case REPMGR_UNAVAILABLE:
2745 log_error(_("\"repmgr\" extension is not available"));
2746 return false;
2747
2748 case REPMGR_OLD_VERSION_INSTALLED:
2749 log_error(_("an older version of the \"repmgr\" extension is installed"));
2750 log_detail(_("version %s is installed but newer version %s is available"),
2751 extversions.installed_version,
2752 extversions.default_version);
2753 log_hint(_("update the installed extension version by executing \"ALTER EXTENSION repmgr UPDATE\""));
2754 return false;
2755
2756 case REPMGR_INSTALLED:
2757 log_info(_("\"repmgr\" extension is already installed"));
2758 return true;
2759
2760 case REPMGR_AVAILABLE:
2761 if (runtime_options.dry_run == true)
2762 {
2763 log_notice(_("would now attempt to install extension \"repmgr\""));
2764 }
2765 else
2766 {
2767 log_notice(_("attempting to install extension \"repmgr\""));
2768 }
2769 break;
2770 }
2771
2772 /* 3. Attempt to get a superuser connection */
2773
2774 is_superuser = is_superuser_connection(conn, &userinfo);
2775
2776 get_superuser_connection(&conn, &superuser_conn, &schema_create_conn);
2777
2778 if (runtime_options.dry_run == true)
2779 return true;
2780
2781 /* 4. Create extension */
2782
2783 res = PQexec(schema_create_conn, "CREATE EXTENSION repmgr");
2784
2785 if ((PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK))
2786 {
2787 log_error(_("unable to create \"repmgr\" extension:\n %s"),
2788 PQerrorMessage(schema_create_conn));
2789 log_hint(_("check that the provided user has sufficient privileges for CREATE EXTENSION"));
2790
2791 PQclear(res);
2792 if (superuser_conn != NULL)
2793 PQfinish(superuser_conn);
2794 return false;
2795 }
2796
2797 PQclear(res);
2798
2799 /* 5. If not superuser, grant usage */
2800 if (is_superuser == false)
2801 {
2802 initPQExpBuffer(&query);
2803
2804 appendPQExpBuffer(&query,
2805 "GRANT USAGE ON SCHEMA repmgr TO %s",
2806 userinfo.username);
2807
2808 res = PQexec(schema_create_conn, query.data);
2809 termPQExpBuffer(&query);
2810
2811 if (PQresultStatus(res) != PGRES_COMMAND_OK)
2812 {
2813 log_error(_("unable to grant usage on \"repmgr\" extension to %s:\n %s"),
2814 userinfo.username,
2815 PQerrorMessage(schema_create_conn));
2816 PQclear(res);
2817
2818 if (superuser_conn != 0)
2819 PQfinish(superuser_conn);
2820
2821 return false;
2822 }
2823
2824 initPQExpBuffer(&query);
2825
2826 appendPQExpBuffer(&query,
2827 "GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s",
2828 userinfo.username);
2829
2830 res = PQexec(schema_create_conn, query.data);
2831 termPQExpBuffer(&query);
2832
2833 if (PQresultStatus(res) != PGRES_COMMAND_OK)
2834 {
2835 log_error(_("unable to grant permission on tables on \"repmgr\" extension to %s:\n %s"),
2836 userinfo.username,
2837 PQerrorMessage(schema_create_conn));
2838 PQclear(res);
2839
2840 if (superuser_conn != NULL)
2841 PQfinish(superuser_conn);
2842
2843 return false;
2844 }
2845 }
2846
2847 if (superuser_conn != NULL)
2848 PQfinish(superuser_conn);
2849
2850 log_notice(_("\"repmgr\" extension successfully installed"));
2851
2852 create_event_notification(conn,
2853 &config_file_options,
2854 config_file_options.node_id,
2855 "cluster_created",
2856 true,
2857 NULL);
2858
2859 return true;
2860 }
2861
2862
2863 /**
2864 * check_server_version()
2865 *
2866 * Verify that the server is MIN_SUPPORTED_VERSION_NUM or later
2867 *
2868 * PGconn *conn:
2869 * the connection to check
2870 *
2871 * char *server_type:
2872 * either "primary" or "standby"; used to format error message
2873 *
2874 * bool exit_on_error:
2875 * exit if reported server version is too low; optional to enable some callers
2876 * to perform additional cleanup
2877 *
2878 * char *server_version_string
2879 * passed to get_server_version(), which will place the human-readable
2880 * server version string there (e.g. "9.4.0")
2881 */
2882 int
check_server_version(PGconn * conn,char * server_type,bool exit_on_error,char * server_version_string)2883 check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
2884 {
2885 char version_string[MAXVERSIONSTR] = "";
2886 int conn_server_version_num = get_server_version(conn, version_string);
2887
2888 /* Copy the version string, if the caller wants it */
2889 if (server_version_string != NULL)
2890 strncpy(server_version_string, version_string, MAXVERSIONSTR);
2891
2892 if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
2893 {
2894 if (conn_server_version_num > 0)
2895 {
2896 log_error(_("%s requires %s to be PostgreSQL %s or later"),
2897 progname(),
2898 server_type,
2899 MIN_SUPPORTED_VERSION);
2900 log_detail(_("%s server version is %s"),
2901 server_type,
2902 version_string);
2903 }
2904
2905 if (exit_on_error == true)
2906 {
2907 PQfinish(conn);
2908 exit(ERR_BAD_CONFIG);
2909 }
2910
2911 return UNKNOWN_SERVER_VERSION_NUM;
2912 }
2913
2914 /*
2915 * If it's clear a particular repmgr feature branch won't be able to support
2916 * PostgreSQL from a particular PostgreSQL release onwards (e.g. 4.4 with PostgreSQL
2917 * 12 and later due to recovery.conf removal), set MAX_UNSUPPORTED_VERSION and
2918 * MAX_UNSUPPORTED_VERSION_NUM in "repmgr.h" to define the first PostgreSQL
2919 * version which can't be suppored.
2920 */
2921 #ifdef MAX_UNSUPPORTED_VERSION_NUM
2922 if (conn_server_version_num >= MAX_UNSUPPORTED_VERSION_NUM)
2923 {
2924 if (conn_server_version_num > 0)
2925 {
2926 log_error(_("%s %s does not support PostgreSQL %s or later"),
2927 progname(),
2928 REPMGR_VERSION,
2929 MAX_UNSUPPORTED_VERSION);
2930 log_detail(_("%s server version is %s"),
2931 server_type,
2932 version_string);
2933 log_hint(_("For details of supported versions see: https://repmgr.org/docs/current/install-requirements.html#INSTALL-COMPATIBILITY-MATRIX"));
2934 }
2935
2936 if (exit_on_error == true)
2937 {
2938 PQfinish(conn);
2939 exit(ERR_BAD_CONFIG);
2940 }
2941
2942 return UNKNOWN_SERVER_VERSION_NUM;
2943 }
2944 #endif
2945
2946 return conn_server_version_num;
2947 }
2948
2949
2950 int
test_ssh_connection(char * host,char * remote_user)2951 test_ssh_connection(char *host, char *remote_user)
2952 {
2953 char script[MAXLEN] = "";
2954 int r = 1,
2955 i;
2956
2957 /*
2958 * On some OS, true is located in a different place than in Linux we have
2959 * to try them all until all alternatives are gone or we found `true'
2960 * because the target OS may differ from the source OS
2961 */
2962 const char *bin_true_paths[] = {
2963 "/bin/true",
2964 "/usr/bin/true",
2965 NULL
2966 };
2967
2968 for (i = 0; bin_true_paths[i] && r != 0; ++i)
2969 {
2970 if (!remote_user[0])
2971 maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s %s 2>/dev/null",
2972 config_file_options.ssh_options, host, bin_true_paths[i]);
2973 else
2974 maxlen_snprintf(script, "ssh -o Batchmode=yes %s %s -l %s %s 2>/dev/null",
2975 config_file_options.ssh_options, host, remote_user,
2976 bin_true_paths[i]);
2977
2978 log_verbose(LOG_DEBUG, _("test_ssh_connection(): executing %s"), script);
2979 r = system(script);
2980 }
2981
2982 if (r != 0)
2983 log_warning(_("unable to connect to remote host \"%s\" via SSH"), host);
2984
2985 return r;
2986 }
2987
2988
2989
2990
2991 /*
2992 * get_superuser_connection()
2993 *
2994 * Check if provided connection "conn" is a superuser connection, if not attempt to
2995 * make a superuser connection "superuser_conn" with the provided --superuser parameter.
2996 *
2997 * "privileged_conn" is set to whichever connection is the superuser connection.
2998 */
2999 void
get_superuser_connection(PGconn ** conn,PGconn ** superuser_conn,PGconn ** privileged_conn)3000 get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn)
3001 {
3002 t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
3003 t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
3004 bool is_superuser = false;
3005
3006 /* this should never happen */
3007 if (PQstatus(*conn) != CONNECTION_OK)
3008 {
3009 log_error(_("no database connection available"));
3010 log_detail("\n%s", PQerrorMessage(*conn));
3011 exit(ERR_INTERNAL);
3012 }
3013
3014 is_superuser = is_superuser_connection(*conn, &userinfo);
3015
3016 if (is_superuser == true)
3017 {
3018 *privileged_conn = *conn;
3019
3020 return;
3021 }
3022
3023 if (runtime_options.superuser[0] == '\0')
3024 {
3025 log_error(_("\"%s\" is not a superuser and no superuser name supplied"), userinfo.username);
3026 log_hint(_("supply a valid superuser name with -S/--superuser"));
3027 PQfinish(*conn);
3028 exit(ERR_BAD_CONFIG);
3029 }
3030
3031 initialize_conninfo_params(&conninfo_params, false);
3032 conn_to_param_list(*conn, &conninfo_params);
3033 param_set(&conninfo_params, "user", runtime_options.superuser);
3034
3035 *superuser_conn = establish_db_connection_by_params(&conninfo_params, false);
3036
3037 if (PQstatus(*superuser_conn) != CONNECTION_OK)
3038 {
3039 log_error(_("unable to establish superuser connection as \"%s\""),
3040 runtime_options.superuser);
3041
3042 PQfinish(*conn);
3043 exit(ERR_BAD_CONFIG);
3044 }
3045
3046 /* check provided superuser really is superuser */
3047 if (!is_superuser_connection(*superuser_conn, NULL))
3048 {
3049 log_error(_("\"%s\" is not a superuser"), runtime_options.superuser);
3050 PQfinish(*superuser_conn);
3051 PQfinish(*conn);
3052 exit(ERR_BAD_CONFIG);
3053 }
3054
3055 log_debug("established superuser connection as \"%s\"", runtime_options.superuser);
3056
3057 *privileged_conn = *superuser_conn;
3058 return;
3059 }
3060
3061
3062 standy_clone_mode
get_standby_clone_mode(void)3063 get_standby_clone_mode(void)
3064 {
3065 standy_clone_mode mode;
3066
3067 if (*config_file_options.barman_host != '\0' && runtime_options.without_barman == false)
3068 mode = barman;
3069 else
3070 mode = pg_basebackup;
3071
3072 return mode;
3073 }
3074
3075
3076 void
make_pg_path(PQExpBufferData * buf,const char * file)3077 make_pg_path(PQExpBufferData *buf, const char *file)
3078 {
3079 appendPQExpBuffer(buf, "%s%s",
3080 pg_bindir, file);
3081 }
3082
3083
3084 int
copy_remote_files(char * host,char * remote_user,char * remote_path,char * local_path,bool is_directory,int server_version_num)3085 copy_remote_files(char *host, char *remote_user, char *remote_path,
3086 char *local_path, bool is_directory, int server_version_num)
3087 {
3088 PQExpBufferData rsync_flags;
3089 char script[MAXLEN] = "";
3090 char host_string[MAXLEN] = "";
3091 int r = 0;
3092
3093 initPQExpBuffer(&rsync_flags);
3094
3095 if (*config_file_options.rsync_options == '\0')
3096 {
3097 appendPQExpBufferStr(&rsync_flags,
3098 "--archive --checksum --compress --progress --rsh=ssh");
3099 }
3100 else
3101 {
3102 appendPQExpBufferStr(&rsync_flags,
3103 config_file_options.rsync_options);
3104 }
3105
3106 if (runtime_options.force)
3107 {
3108 appendPQExpBufferStr(&rsync_flags,
3109 " --delete --checksum");
3110 }
3111
3112 if (!remote_user[0])
3113 {
3114 maxlen_snprintf(host_string, "%s", host);
3115 }
3116 else
3117 {
3118 maxlen_snprintf(host_string, "%s@%s", remote_user, host);
3119 }
3120
3121 /*
3122 * When copying the main PGDATA directory, certain files and contents of
3123 * certain directories need to be excluded.
3124 *
3125 * See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
3126 * we're basically simulating what pg_basebackup does, but with rsync
3127 * rather than the BASEBACKUP replication protocol command.
3128 *
3129 * *However* currently we'll always copy the contents of the 'pg_replslot'
3130 * directory and delete later if appropriate.
3131 */
3132 if (is_directory)
3133 {
3134 /* Files which we don't want */
3135 appendPQExpBufferStr(&rsync_flags,
3136 " --exclude=postmaster.pid --exclude=postmaster.opts --exclude=global/pg_control");
3137
3138 appendPQExpBufferStr(&rsync_flags,
3139 " --exclude=recovery.conf --exclude=recovery.done");
3140
3141 /*
3142 * Ideally we'd use PG_AUTOCONF_FILENAME from utils/guc.h, but
3143 * that has too many dependencies for a mere client program.
3144 */
3145 appendPQExpBuffer(&rsync_flags, " --exclude=%s.tmp",
3146 PG_AUTOCONF_FILENAME);
3147
3148 /* Temporary files which we don't want, if they exist */
3149 appendPQExpBuffer(&rsync_flags, " --exclude=%s*",
3150 PG_TEMP_FILE_PREFIX);
3151
3152 /* Directories which we don't want */
3153
3154 if (server_version_num >= 100000)
3155 {
3156 appendPQExpBufferStr(&rsync_flags,
3157 " --exclude=pg_wal/* --exclude=log/*");
3158 }
3159 else
3160 {
3161 appendPQExpBufferStr(&rsync_flags,
3162 " --exclude=pg_xlog/* --exclude=pg_log/*");
3163 }
3164
3165 appendPQExpBufferStr(&rsync_flags,
3166 " --exclude=pg_stat_tmp/*");
3167
3168 maxlen_snprintf(script, "rsync %s %s:%s/* %s",
3169 rsync_flags.data, host_string, remote_path, local_path);
3170 }
3171 else
3172 {
3173 maxlen_snprintf(script, "rsync %s %s:%s %s",
3174 rsync_flags.data, host_string, remote_path, local_path);
3175 }
3176
3177 termPQExpBuffer(&rsync_flags);
3178
3179 log_info(_("rsync command line:\n %s"), script);
3180
3181 r = system(script);
3182
3183 log_debug("copy_remote_files(): r = %i; WIFEXITED: %i; WEXITSTATUS: %i", r, WIFEXITED(r), WEXITSTATUS(r));
3184
3185 /* exit code 24 indicates vanished files, which isn't a problem for us */
3186 if (WIFEXITED(r) && WEXITSTATUS(r) && WEXITSTATUS(r) != 24)
3187 log_verbose(LOG_WARNING, "copy_remote_files(): rsync returned unexpected exit status %i", WEXITSTATUS(r));
3188
3189 return r;
3190 }
3191
3192
3193
3194
3195 void
make_remote_repmgr_path(PQExpBufferData * output_buf,t_node_info * remote_node_record)3196 make_remote_repmgr_path(PQExpBufferData *output_buf, t_node_info *remote_node_record)
3197 {
3198 if (config_file_options.repmgr_bindir[0] != '\0')
3199 {
3200 int len = strlen(config_file_options.repmgr_bindir);
3201
3202 appendPQExpBufferStr(output_buf,
3203 config_file_options.repmgr_bindir);
3204
3205 /* Add trailing slash */
3206 if (config_file_options.repmgr_bindir[len - 1] != '/')
3207 {
3208 appendPQExpBufferChar(output_buf, '/');
3209 }
3210 }
3211 else if (pg_bindir[0] != '\0')
3212 {
3213 appendPQExpBufferStr(output_buf,
3214 pg_bindir);
3215 }
3216
3217 appendPQExpBuffer(output_buf,
3218 "%s -f %s ",
3219 progname(),
3220 remote_node_record->config_file);
3221
3222 /*
3223 * If --log-level was explicitly supplied, pass that through
3224 * to the remote repmgr client too.
3225 */
3226 if (runtime_options.log_level[0] != '\0')
3227 {
3228 appendPQExpBuffer(output_buf,
3229 " -L %s ",
3230 runtime_options.log_level);
3231 }
3232
3233 }
3234
3235
3236 void
make_repmgrd_path(PQExpBufferData * output_buf)3237 make_repmgrd_path(PQExpBufferData *output_buf)
3238 {
3239 if (config_file_options.repmgr_bindir[0] != '\0')
3240 {
3241 int len = strlen(config_file_options.repmgr_bindir);
3242
3243 appendPQExpBufferStr(output_buf,
3244 config_file_options.repmgr_bindir);
3245
3246 /* Add trailing slash */
3247 if (config_file_options.repmgr_bindir[len - 1] != '/')
3248 {
3249 appendPQExpBufferChar(output_buf, '/');
3250 }
3251 }
3252 else if (pg_bindir[0] != '\0')
3253 {
3254 appendPQExpBufferStr(output_buf,
3255 pg_bindir);
3256 }
3257
3258 appendPQExpBuffer(output_buf,
3259 "repmgrd -f %s ",
3260 config_file_path);
3261 }
3262
3263
3264 /* ======================== */
3265 /* server control functions */
3266 /* ======================== */
3267
3268 void
get_server_action(t_server_action action,char * script,char * data_dir)3269 get_server_action(t_server_action action, char *script, char *data_dir)
3270 {
3271 PQExpBufferData command;
3272
3273 if (data_dir == NULL || data_dir[0] == '\0')
3274 data_dir = "(none provided)";
3275
3276 switch (action)
3277 {
3278 case ACTION_NONE:
3279 script[0] = '\0';
3280 return;
3281
3282 case ACTION_START:
3283 {
3284 if (config_file_options.service_start_command[0] != '\0')
3285 {
3286 maxlen_snprintf(script, "%s",
3287 config_file_options.service_start_command);
3288 }
3289 else
3290 {
3291 initPQExpBuffer(&command);
3292
3293 make_pg_path(&command, "pg_ctl");
3294
3295 appendPQExpBuffer(&command,
3296 " %s -w -D ",
3297 config_file_options.pg_ctl_options);
3298
3299 appendShellString(&command,
3300 data_dir);
3301
3302 appendPQExpBuffer(&command,
3303 " start");
3304
3305 strncpy(script, command.data, MAXLEN);
3306
3307 termPQExpBuffer(&command);
3308 }
3309
3310 return;
3311 }
3312
3313 case ACTION_STOP:
3314 case ACTION_STOP_WAIT:
3315 {
3316 if (config_file_options.service_stop_command[0] != '\0')
3317 {
3318 maxlen_snprintf(script, "%s",
3319 config_file_options.service_stop_command);
3320 }
3321 else
3322 {
3323 initPQExpBuffer(&command);
3324 make_pg_path(&command, "pg_ctl");
3325
3326 appendPQExpBuffer(&command,
3327 " %s -D ",
3328 config_file_options.pg_ctl_options);
3329
3330 appendShellString(&command,
3331 data_dir);
3332
3333 if (action == ACTION_STOP_WAIT)
3334 appendPQExpBuffer(&command,
3335 " -w");
3336 else
3337 appendPQExpBuffer(&command,
3338 " -W");
3339
3340 appendPQExpBuffer(&command,
3341 " -m fast stop");
3342
3343 strncpy(script, command.data, MAXLEN);
3344
3345 termPQExpBuffer(&command);
3346 }
3347 return;
3348 }
3349
3350 case ACTION_RESTART:
3351 {
3352 if (config_file_options.service_restart_command[0] != '\0')
3353 {
3354 maxlen_snprintf(script, "%s",
3355 config_file_options.service_restart_command);
3356 }
3357 else
3358 {
3359 initPQExpBuffer(&command);
3360
3361 make_pg_path(&command, "pg_ctl");
3362
3363 appendPQExpBuffer(&command,
3364 " %s -w -D ",
3365 config_file_options.pg_ctl_options);
3366
3367 appendShellString(&command,
3368 data_dir);
3369
3370 appendPQExpBuffer(&command,
3371 " restart");
3372
3373 strncpy(script, command.data, MAXLEN);
3374
3375 termPQExpBuffer(&command);
3376 }
3377 return;
3378 }
3379
3380 case ACTION_RELOAD:
3381 {
3382 if (config_file_options.service_reload_command[0] != '\0')
3383 {
3384 maxlen_snprintf(script, "%s",
3385 config_file_options.service_reload_command);
3386 }
3387 else
3388 {
3389 initPQExpBuffer(&command);
3390
3391 make_pg_path(&command, "pg_ctl");
3392
3393 appendPQExpBuffer(&command,
3394 " %s -w -D ",
3395 config_file_options.pg_ctl_options);
3396
3397 appendShellString(&command,
3398 data_dir);
3399
3400 appendPQExpBuffer(&command,
3401 " reload");
3402
3403 strncpy(script, command.data, MAXLEN);
3404
3405 termPQExpBuffer(&command);
3406
3407 }
3408 return;
3409 }
3410
3411 case ACTION_PROMOTE:
3412 {
3413 if (config_file_options.service_promote_command[0] != '\0')
3414 {
3415 maxlen_snprintf(script, "%s",
3416 config_file_options.service_promote_command);
3417 }
3418 else
3419 {
3420 initPQExpBuffer(&command);
3421
3422 make_pg_path(&command, "pg_ctl");
3423
3424 appendPQExpBuffer(&command,
3425 " %s -w -D ",
3426 config_file_options.pg_ctl_options);
3427
3428 appendShellString(&command,
3429 data_dir);
3430
3431 appendPQExpBuffer(&command,
3432 " promote");
3433
3434 strncpy(script, command.data, MAXLEN);
3435
3436 termPQExpBuffer(&command);
3437 }
3438 return;
3439 }
3440
3441 default:
3442 return;
3443 }
3444
3445 return;
3446 }
3447
3448
3449 bool
data_dir_required_for_action(t_server_action action)3450 data_dir_required_for_action(t_server_action action)
3451 {
3452 switch (action)
3453 {
3454 case ACTION_NONE:
3455 return false;
3456
3457 case ACTION_START:
3458 if (config_file_options.service_start_command[0] != '\0')
3459 {
3460 return false;
3461 }
3462 return true;
3463
3464 case ACTION_STOP:
3465 case ACTION_STOP_WAIT:
3466 if (config_file_options.service_stop_command[0] != '\0')
3467 {
3468 return false;
3469 }
3470 return true;
3471
3472 case ACTION_RESTART:
3473 if (config_file_options.service_restart_command[0] != '\0')
3474 {
3475 return false;
3476 }
3477 return true;
3478
3479 case ACTION_RELOAD:
3480 if (config_file_options.service_reload_command[0] != '\0')
3481 {
3482 return false;
3483 }
3484 return true;
3485
3486 case ACTION_PROMOTE:
3487 if (config_file_options.service_promote_command[0] != '\0')
3488 {
3489 return false;
3490 }
3491 return true;
3492
3493 default:
3494 return false;
3495 }
3496
3497 return false;
3498 }
3499
3500
3501 /*
3502 * Copy the location of the configuration file directory into the
3503 * provided buffer; if "config_directory" provided, use that, otherwise
3504 * default to the data directory.
3505 *
3506 * This is primarily intended for use with "pg_ctl" (which itself shouldn't
3507 * be used outside of development environments).
3508 */
3509 void
get_node_config_directory(char * config_dir_buf)3510 get_node_config_directory(char *config_dir_buf)
3511 {
3512 if (config_file_options.config_directory[0] != '\0')
3513 {
3514 strncpy(config_dir_buf, config_file_options.config_directory, MAXPGPATH);
3515 return;
3516 }
3517
3518 if (config_file_options.data_directory[0] != '\0')
3519 {
3520 strncpy(config_dir_buf, config_file_options.data_directory, MAXPGPATH);
3521 return;
3522 }
3523
3524 return;
3525 }
3526
3527
3528 void
get_node_data_directory(char * data_dir_buf)3529 get_node_data_directory(char *data_dir_buf)
3530 {
3531 /*
3532 * the configuration file setting has priority, and will always be set
3533 * when a configuration file was provided
3534 */
3535 if (config_file_options.data_directory[0] != '\0')
3536 {
3537 strncpy(data_dir_buf, config_file_options.data_directory, MAXPGPATH);
3538 return;
3539 }
3540
3541 if (runtime_options.data_dir[0] != '\0')
3542 {
3543 strncpy(data_dir_buf, runtime_options.data_dir, MAXPGPATH);
3544 return;
3545 }
3546
3547 return;
3548 }
3549
3550
3551 /*
3552 * initialise a node record from the provided configuration
3553 * parameters
3554 */
3555 void
init_node_record(t_node_info * node_record)3556 init_node_record(t_node_info *node_record)
3557 {
3558 node_record->node_id = config_file_options.node_id;
3559 node_record->upstream_node_id = runtime_options.upstream_node_id;
3560 node_record->priority = config_file_options.priority;
3561 node_record->active = true;
3562
3563 if (config_file_options.location[0] != '\0')
3564 strncpy(node_record->location, config_file_options.location, MAXLEN);
3565 else
3566 strncpy(node_record->location, "default", MAXLEN);
3567
3568
3569 strncpy(node_record->node_name, config_file_options.node_name, sizeof(node_record->node_name));
3570 strncpy(node_record->conninfo, config_file_options.conninfo, MAXLEN);
3571 strncpy(node_record->config_file, config_file_path, MAXPGPATH);
3572
3573 if (config_file_options.replication_user[0] != '\0')
3574 {
3575 /* replication user explicitly provided in configuration file */
3576 strncpy(node_record->repluser, config_file_options.replication_user, NAMEDATALEN);
3577 }
3578 else
3579 {
3580 /* use the "user" value from "conninfo" */
3581 char repluser[MAXLEN] = "";
3582
3583 (void) get_conninfo_value(config_file_options.conninfo, "user", repluser);
3584 strncpy(node_record->repluser, repluser, NAMEDATALEN);
3585 }
3586
3587 if (config_file_options.use_replication_slots == true)
3588 {
3589 create_slot_name(node_record->slot_name, config_file_options.node_id);
3590 }
3591 }
3592
3593
3594 bool
can_use_pg_rewind(PGconn * conn,const char * data_directory,PQExpBufferData * reason)3595 can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason)
3596 {
3597 bool can_use = true;
3598
3599 /* "full_page_writes" must be on in any case */
3600 if (guc_set(conn, "full_page_writes", "=", "off"))
3601 {
3602 appendPQExpBuffer(reason,
3603 _("\"full_page_writes\" must be set to \"on\""));
3604
3605 can_use = false;
3606 }
3607
3608 /*
3609 * "wal_log_hints" off - are data checksums available? Note: we're
3610 * checking the local pg_control file here as the value will be the same
3611 * throughout the cluster and saves a round-trip to the demotion
3612 * candidate.
3613 */
3614 if (guc_set(conn, "wal_log_hints", "=", "on") == false)
3615 {
3616 int data_checksum_version = get_data_checksum_version(data_directory);
3617
3618 if (data_checksum_version == UNKNOWN_DATA_CHECKSUM_VERSION)
3619 {
3620 if (can_use == false)
3621 appendPQExpBuffer(reason, "; ");
3622
3623 appendPQExpBuffer(reason,
3624 _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version"));
3625 can_use = false;
3626 }
3627 else if (data_checksum_version == 0)
3628 {
3629 if (can_use == false)
3630 appendPQExpBuffer(reason, "; ");
3631
3632 appendPQExpBuffer(reason,
3633 _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled"));
3634
3635 can_use = false;
3636 }
3637 }
3638
3639 return can_use;
3640 }
3641
3642
3643 void
make_standby_signal_path(char * buf)3644 make_standby_signal_path(char *buf)
3645 {
3646 snprintf(buf, MAXPGPATH,
3647 "%s/%s",
3648 config_file_options.data_directory,
3649 STANDBY_SIGNAL_FILE);
3650 }
3651
3652 /*
3653 * create standby.signal (PostgreSQL 12 and later)
3654 */
3655 bool
write_standby_signal(void)3656 write_standby_signal(void)
3657 {
3658 char standby_signal_file_path[MAXPGPATH] = "";
3659 FILE *file;
3660 mode_t um;
3661
3662 make_standby_signal_path(standby_signal_file_path);
3663
3664 /* Set umask to 0600 */
3665 um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
3666 file = fopen(standby_signal_file_path, "w");
3667 umask(um);
3668
3669 if (file == NULL)
3670 {
3671 log_error(_("unable to create %s file at \"%s\""),
3672 STANDBY_SIGNAL_FILE,
3673 standby_signal_file_path);
3674 log_detail("%s", strerror(errno));
3675
3676 return false;
3677 }
3678
3679 if (fputs("# created by repmgr\n", file) == EOF)
3680 {
3681 log_error(_("unable to write to %s file at \"%s\""),
3682 STANDBY_SIGNAL_FILE,
3683 standby_signal_file_path);
3684 fclose(file);
3685
3686 return false;
3687 }
3688
3689 fclose(file);
3690
3691 return true;
3692 }
3693
3694
3695 /*
3696 * NOTE:
3697 * - the provided connection should be for the normal repmgr user
3698 * - if upstream_node_record is not NULL, its "repluser" entry, if
3699 * set, will be used as the fallback replication user
3700 */
3701 bool
create_replication_slot(PGconn * conn,char * slot_name,t_node_info * upstream_node_record,PQExpBufferData * error_msg)3702 create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg)
3703 {
3704 PGconn *slot_conn = NULL;
3705 bool use_replication_protocol = false;
3706 bool success = true;
3707 char *replication_user = NULL;
3708
3709 _determine_replication_slot_user(conn, upstream_node_record, &replication_user);
3710 /*
3711 * If called in --dry-run context, if the replication slot user is not the
3712 * repmgr user, attempt to validate the connection.
3713 */
3714 if (runtime_options.dry_run == true)
3715 {
3716 switch (ReplicationSlotUser)
3717 {
3718 case USER_TYPE_UNKNOWN:
3719 log_error("unable to determine user for replication slot creation");
3720 return false;
3721 case REPMGR_USER:
3722 log_info(_("replication slots will be created by user \"%s\""),
3723 PQuser(conn));
3724 return true;
3725
3726 case REPLICATION_USER_NODE:
3727 case REPLICATION_USER_OPT:
3728 {
3729 PGconn *repl_conn = duplicate_connection(conn,
3730 replication_user,
3731 true);
3732 if (repl_conn == NULL || PQstatus(repl_conn) != CONNECTION_OK)
3733 {
3734 log_error(_("unable to create replication connection as user \"%s\""),
3735 replication_user);
3736 log_detail("%s", PQerrorMessage(repl_conn));
3737
3738 PQfinish(repl_conn);
3739 return false;
3740 }
3741 log_info(_("replication slots will be created by replication user \"%s\""),
3742 replication_user);
3743 PQfinish(repl_conn);
3744 return true;
3745 }
3746 case SUPERUSER:
3747 {
3748 PGconn *superuser_conn = duplicate_connection(conn,
3749 runtime_options.superuser,
3750 false);
3751 if (superuser_conn == NULL || PQstatus(superuser_conn )!= CONNECTION_OK)
3752 {
3753 log_error(_("unable to create superuser connection as user \"%s\""),
3754 runtime_options.superuser);
3755 log_detail("%s", PQerrorMessage(superuser_conn));
3756
3757 PQfinish(superuser_conn);
3758
3759 return false;
3760 }
3761
3762 log_info(_("replication slots will be created by superuser \"%s\""),
3763 runtime_options.superuser);
3764 PQfinish(superuser_conn);
3765 }
3766 }
3767
3768 }
3769
3770 /*
3771 * If we can't create a replication slot with the connection provided to
3772 * the function, create an connection with appropriate permissions.
3773 */
3774 switch (ReplicationSlotUser)
3775 {
3776 case USER_TYPE_UNKNOWN:
3777 log_error("unable to determine user for replication slot creation");
3778 return false;
3779 case REPMGR_USER:
3780 slot_conn = conn;
3781 log_info(_("creating replication slot as user \"%s\""),
3782 PQuser(conn));
3783 break;
3784
3785 case REPLICATION_USER_NODE:
3786 case REPLICATION_USER_OPT:
3787 {
3788 slot_conn = duplicate_connection(conn,
3789 replication_user,
3790 true);
3791 if (slot_conn == NULL || PQstatus(slot_conn) != CONNECTION_OK)
3792 {
3793 log_error(_("unable to create replication connection as user \"%s\""),
3794 runtime_options.replication_user);
3795 log_detail("%s", PQerrorMessage(slot_conn));
3796
3797 PQfinish(slot_conn);
3798 return false;
3799 }
3800 use_replication_protocol = true;
3801 log_info(_("creating replication slot as replication user \"%s\""),
3802 replication_user);
3803 }
3804 break;
3805
3806 case SUPERUSER:
3807 {
3808 slot_conn = duplicate_connection(conn,
3809 runtime_options.superuser,
3810 false);
3811 if (slot_conn == NULL || PQstatus(slot_conn )!= CONNECTION_OK)
3812 {
3813 log_error(_("unable to create super connection as user \"%s\""),
3814 runtime_options.superuser);
3815 log_detail("%s", PQerrorMessage(slot_conn));
3816
3817 PQfinish(slot_conn);
3818
3819 return false;
3820 }
3821 log_info(_("creating replication slot as superuser \"%s\""),
3822 runtime_options.superuser);
3823 }
3824 break;
3825 }
3826
3827 if (use_replication_protocol == true)
3828 {
3829 success = create_replication_slot_replprot(conn, slot_conn, slot_name, error_msg);
3830 }
3831 else
3832 {
3833 success = create_replication_slot_sql(slot_conn, slot_name, error_msg);
3834 }
3835
3836
3837 if (slot_conn != conn)
3838 PQfinish(slot_conn);
3839
3840 return success;
3841 }
3842
3843
3844 bool
drop_replication_slot_if_exists(PGconn * conn,int node_id,char * slot_name)3845 drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
3846 {
3847 t_node_info node_record = T_NODE_INFO_INITIALIZER;
3848 t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
3849 RecordStatus record_status;
3850
3851 char *replication_user = NULL;
3852 bool success = true;
3853
3854 if (node_id != UNKNOWN_NODE_ID)
3855 {
3856 record_status = get_node_record(conn, node_id, &node_record);
3857 }
3858
3859 _determine_replication_slot_user(conn, &node_record, &replication_user);
3860
3861 record_status = get_slot_record(conn, slot_name, &slot_info);
3862
3863 log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
3864 slot_name, node_id);
3865
3866 if (record_status != RECORD_FOUND)
3867 {
3868 /* this is not a bad good thing */
3869 log_verbose(LOG_INFO,
3870 _("slot \"%s\" does not exist on node %i, nothing to remove"),
3871 slot_name, node_id);
3872 return true;
3873 }
3874
3875 if (slot_info.active == false)
3876 {
3877 if (drop_replication_slot_sql(conn, slot_name) == true)
3878 {
3879 log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
3880 }
3881 else
3882 {
3883 log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
3884 success = false;
3885 }
3886 }
3887
3888 /*
3889 * If an active replication slot exists, call Houston as we have a
3890 * problem.
3891 */
3892 else
3893 {
3894 log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
3895 success = false;
3896 }
3897
3898 return success;
3899 }
3900
3901
3902 static void
_determine_replication_slot_user(PGconn * conn,t_node_info * upstream_node_record,char ** replication_user)3903 _determine_replication_slot_user(PGconn *conn, t_node_info *upstream_node_record, char **replication_user)
3904 {
3905 /*
3906 * If not previously done, work out which user will be responsible
3907 * for creating replication slots.
3908 */
3909 if (ReplicationSlotUser == USER_TYPE_UNKNOWN)
3910 {
3911 /*
3912 * Is the repmgr user a superuser?
3913 */
3914 if (is_superuser_connection(conn, NULL))
3915 {
3916 ReplicationSlotUser = REPMGR_USER;
3917 }
3918 /*
3919 * Does the repmgr user have the REPLICATION role?
3920 * Note we don't care here whether the repmgr user can actually
3921 * make a replication connection, we're just confirming that the
3922 * connection we have has the appropriate permissions.
3923 */
3924 else if (is_replication_role(conn, NULL))
3925 {
3926 ReplicationSlotUser = REPMGR_USER;
3927 }
3928 /*
3929 * Is a superuser provided with --superuser?
3930 * We'll check later whether we can make a connection as that user.
3931 */
3932 else if (runtime_options.superuser[0] != '\0')
3933 {
3934 ReplicationSlotUser = SUPERUSER;
3935 }
3936 /*
3937 * Is a replication user provided with --replication-user?
3938 * We'll check later whether we can make a replication connection as that user.
3939 * Overrides any replication user defined in the upstream node record.
3940 */
3941 else if (runtime_options.replication_user[0] != '\0')
3942 {
3943 ReplicationSlotUser = REPLICATION_USER_OPT;
3944 *replication_user = runtime_options.replication_user;
3945 }
3946 /*
3947 * Is the upstream's node record provided, and does it have a different
3948 * replication user?
3949 * We'll check later whether we can make a replication connection as that user.
3950 */
3951 else if (upstream_node_record != NULL && upstream_node_record->node_id != UNKNOWN_NODE_ID
3952 && strncmp(upstream_node_record->repluser, PQuser(conn), NAMEDATALEN) != 0)
3953 {
3954 ReplicationSlotUser = REPLICATION_USER_NODE;
3955 *replication_user = upstream_node_record->repluser;
3956 }
3957 }
3958 }
3959
3960
3961 bool
check_replication_slots_available(int node_id,PGconn * conn)3962 check_replication_slots_available(int node_id, PGconn* conn)
3963 {
3964 int max_replication_slots = UNKNOWN_VALUE;
3965 int free_slots = get_free_replication_slot_count(conn, &max_replication_slots);
3966
3967 if (free_slots < 0)
3968 {
3969 log_error(_("unable to determine number of free replication slots on node %i"),
3970 node_id);
3971 return false;
3972 }
3973
3974 if (free_slots == 0)
3975 {
3976 log_error(_("no free replication slots available on node %i"),
3977 node_id);
3978 log_hint(_("consider increasing \"max_replication_slots\" (current value: %i)"),
3979 max_replication_slots);
3980 return false;
3981 }
3982 else if (runtime_options.dry_run == true)
3983 {
3984 log_info(_("replication slots in use, %i free slots on node %i"),
3985 node_id,
3986 free_slots);
3987 }
3988
3989 return true;
3990 }
3991
3992
3993 /*
3994 * Check whether the specified standby has joined to its upstream.
3995 *
3996 * This is used by "standby switchover" and "node rejoin" to check
3997 * the success of a node rejoin operation.
3998 *
3999 * IMPORTANT: the timeout settings will be taken from the node where the check
4000 * is performed, which might not be the standby itself.
4001 */
4002 standy_join_status
check_standby_join(PGconn * upstream_conn,t_node_info * upstream_node_record,t_node_info * standby_node_record)4003 check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_node_info *standby_node_record)
4004 {
4005 int i;
4006 bool available = false;
4007
4008 for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
4009 {
4010 if (is_server_available(config_file_options.conninfo))
4011 {
4012 log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) is pingable"),
4013 standby_node_record->node_name,
4014 standby_node_record->node_id);
4015 available = true;
4016 break;
4017 }
4018
4019 if (i % 5 == 0)
4020 {
4021 log_verbose(LOG_INFO, _("waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
4022 standby_node_record->node_name,
4023 standby_node_record->node_id,
4024 i + 1,
4025 config_file_options.node_rejoin_timeout);
4026 }
4027 else
4028 {
4029 log_debug("sleeping 1 second waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts",
4030 standby_node_record->node_name,
4031 standby_node_record->node_id,
4032 i + 1,
4033 config_file_options.node_rejoin_timeout);
4034 }
4035
4036 sleep(1);
4037 }
4038
4039 /* node did not become available */
4040 if (available == false)
4041 {
4042 return JOIN_FAIL_NO_PING;
4043 }
4044
4045 for (; i < config_file_options.node_rejoin_timeout; i++)
4046 {
4047 char *node_state = NULL;
4048 NodeAttached node_attached = is_downstream_node_attached(upstream_conn,
4049 standby_node_record->node_name,
4050 &node_state);
4051 if (node_attached == NODE_ATTACHED)
4052 {
4053 log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) has attached to its upstream node"),
4054 standby_node_record->node_name,
4055 standby_node_record->node_id);
4056 return JOIN_SUCCESS;
4057 }
4058
4059 if (i % 5 == 0)
4060 {
4061 log_info(_("waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
4062 standby_node_record->node_name,
4063 standby_node_record->node_id,
4064 i + 1,
4065 config_file_options.node_rejoin_timeout);
4066
4067 if (node_attached == NODE_NOT_ATTACHED)
4068 {
4069 log_detail(_("node \"%s\" (ID: %i) is currrently attached to its upstream node in state \"%s\""),
4070 upstream_node_record->node_name,
4071 standby_node_record->node_id,
4072 node_state);
4073 }
4074 else
4075 {
4076 log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
4077 upstream_node_record->node_name,
4078 standby_node_record->node_name);
4079 }
4080 }
4081 else
4082 {
4083 log_debug("sleeping 1 second waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts",
4084 standby_node_record->node_name,
4085 standby_node_record->node_id,
4086 i + 1,
4087 config_file_options.node_rejoin_timeout);
4088 }
4089
4090 sleep(1);
4091 }
4092
4093 return JOIN_FAIL_NO_REPLICATION;
4094 }
4095
4096
4097 /*
4098 * Here we'll perform some timeline sanity checks to ensure the follow target
4099 * can actually be followed or rejoined.
4100 *
4101 * See also comment for check_node_can_follow() in repmgrd-physical.c .
4102 */
4103 bool
check_node_can_attach(TimeLineID local_tli,XLogRecPtr local_xlogpos,PGconn * follow_target_conn,t_node_info * follow_target_node_record,bool is_rejoin)4104 check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
4105 {
4106 uint64 local_system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
4107 PGconn *follow_target_repl_conn = NULL;
4108 t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
4109 bool success = true;
4110
4111 const char *action = is_rejoin == true ? "rejoin" : "follow";
4112
4113 /* check replication connection */
4114 follow_target_repl_conn = establish_replication_connection_from_conn(follow_target_conn,
4115 follow_target_node_record->repluser);
4116
4117 if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
4118 {
4119 log_error(_("unable to establish a replication connection to the %s target node"), action);
4120 return false;
4121 }
4122 else if (runtime_options.dry_run == true)
4123 {
4124 log_info(_("replication connection to the %s target node was successful"), action);
4125 }
4126
4127 /* check system_identifiers match */
4128 if (identify_system(follow_target_repl_conn, &follow_target_identification) == false)
4129 {
4130 log_error(_("unable to query the %s target node's system identification"), action);
4131
4132 PQfinish(follow_target_repl_conn);
4133 return false;
4134 }
4135
4136 local_system_identifier = get_system_identifier(config_file_options.data_directory);
4137
4138 /*
4139 * Check for things that should never happen, but expect the unexpected anyway.
4140 */
4141
4142 if (local_system_identifier == UNKNOWN_SYSTEM_IDENTIFIER)
4143 {
4144 /*
4145 * We don't return immediately here so subsequent checks can be
4146 * made, but indicate the node will not be able to rejoin.
4147 */
4148 success = false;
4149 if (runtime_options.dry_run == true)
4150 {
4151 log_warning(_("unable to retrieve system identifier from pg_control"));
4152 }
4153 else
4154 {
4155 log_error(_("unable to retrieve system identifier from pg_control, aborting"));
4156 }
4157 }
4158 else if (follow_target_identification.system_identifier != local_system_identifier)
4159 {
4160 /*
4161 * It's never going to be possible to rejoin a node from another cluster,
4162 * so no need to bother with further checks.
4163 */
4164 log_error(_("this node is not part of the %s target node's replication cluster"), action);
4165 log_detail(_("this node's system identifier is %lu, %s target node's system identifier is %lu"),
4166 local_system_identifier,
4167 action,
4168 follow_target_identification.system_identifier);
4169 PQfinish(follow_target_repl_conn);
4170 return false;
4171 }
4172 else if (runtime_options.dry_run == true)
4173 {
4174 log_info(_("local and %s target system identifiers match"), action);
4175 log_detail(_("system identifier is %lu"), local_system_identifier);
4176 }
4177
4178 /* check timelines */
4179
4180 log_verbose(LOG_DEBUG, "local timeline: %i; %s target timeline: %i",
4181 local_tli,
4182 action,
4183 follow_target_identification.timeline);
4184
4185 /*
4186 * The upstream's timeline is lower than ours - we cannot follow, and rejoin
4187 * requires PostgreSQL 9.6 and later.
4188 */
4189 if (follow_target_identification.timeline < local_tli)
4190 {
4191 /*
4192 * "repmgr standby follow" is impossible in this case
4193 */
4194 if (is_rejoin == false)
4195 {
4196 log_error(_("this node's timeline is ahead of the %s target node's timeline"), action);
4197 log_detail(_("this node's timeline is %i, %s target node's timeline is %i"),
4198 local_tli,
4199 action,
4200 follow_target_identification.timeline);
4201
4202 if (PQserverVersion(follow_target_conn) >= 90600)
4203 {
4204 log_hint(_("use \"repmgr node rejoin --force-rewind\" to reattach this node"));
4205 }
4206
4207 PQfinish(follow_target_repl_conn);
4208 return false;
4209 }
4210
4211 /*
4212 * pg_rewind can only rejoin to a lower timeline from PostgreSQL 9.6
4213 */
4214 if (PQserverVersion(follow_target_conn) < 90600)
4215 {
4216 log_error(_("this node's timeline is ahead of the %s target node's timeline"), action);
4217 log_detail(_("this node's timeline is %i, %s target node's timeline is %i"),
4218 local_tli,
4219 action,
4220 follow_target_identification.timeline);
4221
4222 if (runtime_options.force_rewind_used == true)
4223 {
4224 log_hint(_("pg_rewind can only be used to rejoin to a node with a lower timeline from PostgreSQL 9.6"));
4225 }
4226
4227 PQfinish(follow_target_repl_conn);
4228 return false;
4229 }
4230
4231 if (runtime_options.force_rewind_used == false)
4232 {
4233 log_notice(_("pg_rewind execution required for this node to attach to rejoin target node %i"),
4234 follow_target_node_record->node_id);
4235 log_hint(_("provide --force-rewind"));
4236 PQfinish(follow_target_repl_conn);
4237 return false;
4238 }
4239 }
4240
4241 /* timelines are the same - check relative positions */
4242 else if (follow_target_identification.timeline == local_tli)
4243 {
4244 XLogRecPtr follow_target_xlogpos = get_node_current_lsn(follow_target_conn);
4245
4246 if (local_xlogpos == InvalidXLogRecPtr || follow_target_xlogpos == InvalidXLogRecPtr)
4247 {
4248 log_error(_("unable to compare LSN positions"));
4249 PQfinish(follow_target_repl_conn);
4250 return false;
4251 }
4252
4253 if (local_xlogpos <= follow_target_xlogpos)
4254 {
4255 log_info(_("timelines are same, this server is not ahead"));
4256 log_detail(_("local node lsn is %X/%X, %s target lsn is %X/%X"),
4257 format_lsn(local_xlogpos),
4258 action,
4259 format_lsn(follow_target_xlogpos));
4260 }
4261 else
4262 {
4263 /*
4264 * Unable to follow or join to a node we're ahead of, if we're on the
4265 * same timeline. Also, pg_rewind does not detect this situation,
4266 * as there is no definitive fork point.
4267 *
4268 * Note that Pg will still happily attach to the upstream in state "streaming"
4269 * for a while but then detach with an endless stream of
4270 * "record with incorrect prev-link" errors.
4271 */
4272 log_error(_("this node ahead of the %s target on the same timeline (%i)"), action, local_tli);
4273 log_detail(_("local node lsn is %X/%X, %s target lsn is %X/%X"),
4274 format_lsn(local_xlogpos),
4275 action,
4276 format_lsn(follow_target_xlogpos));
4277
4278 if (is_rejoin == true)
4279 {
4280 log_hint(_("the --force-rewind option is ineffective in this case"));
4281 }
4282
4283 success = false;
4284 }
4285 }
4286 else
4287 {
4288 /*
4289 * upstream has higher timeline - check where it forked off from this node's timeline
4290 */
4291 TimeLineHistoryEntry *follow_target_history = get_timeline_history(follow_target_repl_conn,
4292 local_tli + 1);
4293
4294 if (follow_target_history == NULL)
4295 {
4296 /* get_timeline_history() will emit relevant error messages */
4297 PQfinish(follow_target_repl_conn);
4298 return false;
4299 }
4300
4301 log_debug("local tli: %i; local_xlogpos: %X/%X; follow_target_history->tli: %i; follow_target_history->end: %X/%X",
4302 local_tli,
4303 format_lsn(local_xlogpos),
4304 follow_target_history->tli,
4305 format_lsn(follow_target_history->end));
4306
4307 /*
4308 * Local node has proceeded beyond the follow target's fork, so we
4309 * definitely can't attach.
4310 *
4311 * This could be the case if the follow target was promoted, but does
4312 * not contain all changes which are being replayed to this standby.
4313 */
4314 if (local_xlogpos > follow_target_history->end)
4315 {
4316 if (is_rejoin == true && runtime_options.force_rewind_used == true)
4317 {
4318 log_notice(_("pg_rewind execution required for this node to attach to rejoin target node %i"),
4319 follow_target_node_record->node_id);
4320 }
4321 else
4322 {
4323 log_error(_("this node cannot attach to %s target node %i"),
4324 action,
4325 follow_target_node_record->node_id);
4326 success = false;
4327 }
4328
4329 log_detail(_("%s target server's timeline %i forked off current database system timeline %i before current recovery point %X/%X"),
4330 action,
4331 local_tli + 1,
4332 local_tli,
4333 format_lsn(local_xlogpos));
4334
4335 if (is_rejoin == true && runtime_options.force_rewind_used == false)
4336 {
4337 log_hint(_("use --force-rewind to execute pg_rewind"));
4338 }
4339 }
4340
4341 if (success == true)
4342 {
4343 if (is_rejoin == false || (is_rejoin == true && runtime_options.force_rewind_used == false))
4344 {
4345 log_info(_("local node %i can attach to %s target node %i"),
4346 config_file_options.node_id,
4347 action,
4348 follow_target_node_record->node_id);
4349
4350 log_detail(_("local node's recovery point: %X/%X; %s target node's fork point: %X/%X"),
4351 format_lsn(local_xlogpos),
4352 action,
4353 format_lsn(follow_target_history->end));
4354 }
4355 }
4356
4357 pfree(follow_target_history);
4358 }
4359
4360 PQfinish(follow_target_repl_conn);
4361
4362 return success;
4363 }
4364
4365
4366 /*
4367 * Check that the replication configuration file is owned by the user who
4368 * owns the data directory.
4369 */
4370 extern bool
check_replication_config_owner(int pg_version,const char * data_directory,PQExpBufferData * error_msg,PQExpBufferData * detail_msg)4371 check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg)
4372 {
4373 PQExpBufferData replication_config_file;
4374 struct stat dirstat;
4375 struct stat confstat;
4376
4377 if (stat(data_directory, &dirstat))
4378 {
4379 if (error_msg != NULL)
4380 {
4381 appendPQExpBuffer(error_msg,
4382 "unable to check ownership of data directory \"%s\"",
4383 data_directory);
4384 appendPQExpBufferStr(detail_msg,
4385 strerror(errno));
4386 }
4387 return false;
4388 }
4389
4390 initPQExpBuffer(&replication_config_file);
4391
4392 appendPQExpBuffer(&replication_config_file,
4393 "%s/%s",
4394 config_file_options.data_directory,
4395 pg_version >= 120000 ? PG_AUTOCONF_FILENAME : RECOVERY_COMMAND_FILE);
4396
4397 stat(replication_config_file.data, &confstat);
4398
4399 if (confstat.st_uid == dirstat.st_uid)
4400 {
4401 termPQExpBuffer(&replication_config_file);
4402 return true;
4403 }
4404
4405 if (error_msg != NULL)
4406 {
4407 char conf_owner[MAXLEN];
4408 char dir_owner[MAXLEN];
4409 struct passwd *pw;
4410
4411 pw = getpwuid(confstat.st_uid);
4412 if (!pw)
4413 {
4414 maxlen_snprintf(conf_owner,
4415 "(unknown user %i)",
4416 confstat.st_uid);
4417 }
4418 else
4419 {
4420 strncpy(conf_owner, pw->pw_name, MAXLEN);
4421 }
4422
4423 pw = getpwuid(dirstat.st_uid);
4424
4425 if (!pw)
4426 {
4427 maxlen_snprintf(conf_owner,
4428 "(unknown user %i)",
4429 dirstat.st_uid);
4430 }
4431 else
4432 {
4433 strncpy(dir_owner, pw->pw_name, MAXLEN);
4434 }
4435
4436 appendPQExpBuffer(error_msg,
4437 "ownership error for file \"%s\"",
4438 replication_config_file.data);
4439 appendPQExpBuffer(detail_msg,
4440 "file owner is \"%s\", data directory owner is \"%s\"",
4441 conf_owner,
4442 dir_owner);
4443 }
4444
4445 termPQExpBuffer(&replication_config_file);
4446
4447 return false;
4448 }
4449
4450
4451 /*
4452 * Simple check to see if "shared_preload_libraries" includes "repmgr".
4453 * Parsing "shared_preload_libraries" is non-trivial, as it's potentially
4454 * a comma-separated list, and worse may not be readable by the repmgr
4455 * user.
4456 *
4457 * Instead, we check if a function which should return a value returns
4458 * NULL; this indicates the shared library is not installed.
4459 */
4460 void
check_shared_library(PGconn * conn)4461 check_shared_library(PGconn *conn)
4462 {
4463 bool ok = repmgrd_check_local_node_id(conn);
4464
4465 if (ok == true)
4466 return;
4467
4468 log_error(_("repmgrd not configured for this node"));
4469 log_hint(_("ensure \"shared_preload_libraries\" includes \"repmgr\" and restart PostgreSQL"));
4470 PQfinish(conn);
4471 exit(ERR_BAD_CONFIG);
4472 }
4473
4474
4475 bool
is_repmgrd_running(PGconn * conn)4476 is_repmgrd_running(PGconn *conn)
4477 {
4478 pid_t pid;
4479 bool is_running = false;
4480
4481 pid = repmgrd_get_pid(conn);
4482
4483 if (pid != UNKNOWN_PID)
4484 {
4485 if (kill(pid, 0) != -1)
4486 {
4487 is_running = true;
4488 }
4489 }
4490
4491 return is_running;
4492 }
4493
4494
4495 /**
4496 * Parse the string returned by "repmgr --version", e.g. "repmgr 4.1.2",
4497 * and return it as a version integer (e.g. 40102).
4498 *
4499 * This is required for backwards compatibility as versions prior to
4500 * 4.3 do not have the --version-number option.
4501 */
4502 int
parse_repmgr_version(const char * version_string)4503 parse_repmgr_version(const char *version_string)
4504 {
4505 int series, major, minor;
4506 int version_integer = UNKNOWN_REPMGR_VERSION_NUM;
4507 PQExpBufferData sscanf_string;
4508
4509 initPQExpBuffer(&sscanf_string);
4510
4511 appendPQExpBuffer(&sscanf_string, "%s ",
4512 progname());
4513 appendPQExpBufferStr(&sscanf_string, "%i.%i.%i");
4514
4515 if (sscanf(version_string, sscanf_string.data, &series, &major, &minor) == 3)
4516 {
4517 version_integer = (series * 10000) + (major * 100) + minor;
4518 }
4519 else
4520 {
4521 resetPQExpBuffer(&sscanf_string);
4522 appendPQExpBuffer(&sscanf_string, "%s ",
4523 progname());
4524 appendPQExpBufferStr(&sscanf_string, "%i.%i");
4525
4526 if (sscanf(version_string, "repmgr %i.%i", &series, &major) == 2)
4527 {
4528 version_integer = (series * 10000) + (major * 100);
4529 }
4530 }
4531
4532 return version_integer;
4533 }
4534