1#!/bin/bash
2# This script is run by failover_command.
3
4set -o xtrace
5
6# Special values:
7# 1)  %d = failed node id
8# 2)  %h = failed node hostname
9# 3)  %p = failed node port number
10# 4)  %D = failed node database cluster path
11# 5)  %m = new main node id
12# 6)  %H = new main node hostname
13# 7)  %M = old main node id
14# 8)  %P = old primary node id
15# 9)  %r = new main port number
16# 10) %R = new main database cluster path
17# 11) %N = old primary node hostname
18# 12) %S = old primary node port number
19# 13) %% = '%' character
20
21FAILED_NODE_ID="$1"
22FAILED_NODE_HOST="$2"
23FAILED_NODE_PORT="$3"
24FAILED_NODE_PGDATA="$4"
25NEW_MAIN_NODE_ID="$5"
26NEW_MAIN_NODE_HOST="$6"
27OLD_MAIN_NODE_ID="$7"
28OLD_PRIMARY_NODE_ID="$8"
29NEW_MAIN_NODE_PORT="$9"
30NEW_MAIN_NODE_PGDATA="${10}"
31OLD_PRIMARY_NODE_HOST="${11}"
32OLD_PRIMARY_NODE_PORT="${12}"
33
34PGHOME=/usr/pgsql-13
35REPL_SLOT_NAME=${FAILED_NODE_HOST//[-.]/_}
36
37
38echo failover.sh: start: failed_node_id=$FAILED_NODE_ID failed_host=$FAILED_NODE_HOST \
39    old_primary_node_id=$OLD_PRIMARY_NODE_ID new_main_node_id=$NEW_MAIN_NODE_ID new_main_host=$NEW_MAIN_NODE_HOST
40
41## If there's no main node anymore, skip failover.
42if [ $NEW_MAIN_NODE_ID -lt 0 ]; then
43    echo failover.sh: All nodes are down. Skipping failover.
44	exit 0
45fi
46
47## Test passwrodless SSH
48ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MAIN_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null
49
50if [ $? -ne 0 ]; then
51    echo failover.sh: passwrodless SSH to postgres@${NEW_MAIN_NODE_HOST} failed. Please setup passwrodless SSH.
52    exit 1
53fi
54
55## If Standby node is down, skip failover.
56if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then
57
58    # If Standby node is down, drop replication slot.
59    ${PGHOME}/bin/psql -h ${OLD_PRIMARY_NODE_HOST} -p ${OLD_PRIMARY_NODE_PORT} \
60        -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');"  >/dev/null 2>&1
61
62    if [ $? -ne 0 ]; then
63        echo ERROR: failover.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually.
64    fi
65
66    echo failover.sh: end: standby node is down. Skipping failover.
67    exit 0
68fi
69
70## Promote Standby node.
71echo failover.sh: primary node is down, promote new_main_node_id=$NEW_MAIN_NODE_ID on ${NEW_MAIN_NODE_HOST}.
72
73ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
74    postgres@${NEW_MAIN_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MAIN_NODE_PGDATA} -w promote
75
76if [ $? -ne 0 ]; then
77    echo ERROR: failover.sh: end: failover failed
78    exit 1
79fi
80
81echo failover.sh: end: new_main_node_id=$NEW_MAIN_NODE_ID on ${NEW_MAIN_NODE_HOST} is promoted to a primary
82exit 0
83