1#!/bin/bash
2# This script is run by failover_command.
3
4set -o xtrace
5exec > >(logger -i -p local1.info) 2>&1
6
7# Special values:
8#   %d = failed node id
9#   %h = failed node hostname
10#   %p = failed node port number
11#   %D = failed node database cluster path
12#   %m = new master node id
13#   %H = new master node hostname
14#   %M = old master node id
15#   %P = old primary node id
16#   %r = new master port number
17#   %R = new master database cluster path
18#   %N = old primary node hostname
19#   %S = old primary node port number
20#   %% = '%' character
21
22FAILED_NODE_ID="$1"
23FAILED_NODE_HOST="$2"
24FAILED_NODE_PORT="$3"
25FAILED_NODE_PGDATA="$4"
26NEW_MASTER_NODE_ID="$5"
27NEW_MASTER_NODE_HOST="$6"
28OLD_MASTER_NODE_ID="$7"
29OLD_PRIMARY_NODE_ID="$8"
30NEW_MASTER_NODE_PORT="$9"
31NEW_MASTER_NODE_PGDATA="${10}"
32OLD_PRIMARY_NODE_HOST="${11}"
33OLD_PRIMARY_NODE_PORT="${12}"
34
35PGHOME=/usr/pgsql-11
36REPL_SLOT_NAME=${FAILED_NODE_HOST//[-.]/_}
37
38logger -i -p local1.info failover.sh: start: failed_node_id=$FAILED_NODE_ID old_primary_node_id=$OLD_PRIMARY_NODE_ID failed_host=$FAILED_NODE_HOST new_master_host=$NEW_MASTER_NODE_HOST
39
40## If there's no master node anymore, skip failover.
41if [ $NEW_MASTER_NODE_ID -lt 0 ]; then
42    logger -i -p local1.info failover.sh: All nodes are down. Skipping failover.
43    exit 0
44fi
45
46## Test passwordless SSH
47ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
48
49if [ $? -ne 0 ]; then
50    logger -i -p local1.info failover.sh: passwordless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwordless SSH.
51    exit 1
52fi
53
54## If Standby node is down, skip failover.
55if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then
56    logger -i -p local1.info failover.sh: Standby node is down. Skipping failover.
57
58    ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$OLD_PRIMARY_NODE_HOST -i ~/.ssh/id_rsa_pgpool "
59        ${PGHOME}/bin/psql -p $OLD_PRIMARY_NODE_PORT -c \"SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}')\"
60    "
61
62    if [ $? -ne 0 ]; then
63        logger -i -p local1.error failover.sh: drop replication slot "${REPL_SLOT_NAME}" failed
64        exit 1
65    fi
66
67    exit 0
68fi
69
70## Promote Standby node.
71logger -i -p local1.info failover.sh: Primary node is down, promote standby node ${NEW_MASTER_NODE_HOST}.
72
73ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
74    postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MASTER_NODE_PGDATA} -w promote
75
76if [ $? -ne 0 ]; then
77    logger -i -p local1.error failover.sh: new_master_host=$NEW_MASTER_NODE_HOST promote failed
78    exit 1
79fi
80
81logger -i -p local1.info failover.sh: end: new_master_node_id=$NEW_MASTER_NODE_ID started as the primary node
82exit 0
83