1#!/bin/bash 2# This script is run by failover_command. 3 4set -o xtrace 5 6# Special values: 7# 1) %d = failed node id 8# 2) %h = failed node hostname 9# 3) %p = failed node port number 10# 4) %D = failed node database cluster path 11# 5) %m = new main node id 12# 6) %H = new main node hostname 13# 7) %M = old main node id 14# 8) %P = old primary node id 15# 9) %r = new main port number 16# 10) %R = new main database cluster path 17# 11) %N = old primary node hostname 18# 12) %S = old primary node port number 19# 13) %% = '%' character 20 21FAILED_NODE_ID="$1" 22FAILED_NODE_HOST="$2" 23FAILED_NODE_PORT="$3" 24FAILED_NODE_PGDATA="$4" 25NEW_MAIN_NODE_ID="$5" 26NEW_MAIN_NODE_HOST="$6" 27OLD_MAIN_NODE_ID="$7" 28OLD_PRIMARY_NODE_ID="$8" 29NEW_MAIN_NODE_PORT="$9" 30NEW_MAIN_NODE_PGDATA="${10}" 31OLD_PRIMARY_NODE_HOST="${11}" 32OLD_PRIMARY_NODE_PORT="${12}" 33 34PGHOME=/usr/pgsql-14 35REPL_SLOT_NAME=${FAILED_NODE_HOST//[-.]/_} 36 37 38echo failover.sh: start: failed_node_id=$FAILED_NODE_ID failed_host=$FAILED_NODE_HOST \ 39 old_primary_node_id=$OLD_PRIMARY_NODE_ID new_main_node_id=$NEW_MAIN_NODE_ID new_main_host=$NEW_MAIN_NODE_HOST 40 41## If there's no main node anymore, skip failover. 42if [ $NEW_MAIN_NODE_ID -lt 0 ]; then 43 echo failover.sh: All nodes are down. Skipping failover. 44 exit 0 45fi 46 47## Test passwordless SSH 48ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MAIN_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null 49 50if [ $? -ne 0 ]; then 51 echo failover.sh: passwordless SSH to postgres@${NEW_MAIN_NODE_HOST} failed. Please setup passwordless SSH. 52 exit 1 53fi 54 55## If Standby node is down, skip failover. 56if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then 57 58 # If Standby node is down, drop replication slot. 59 ${PGHOME}/bin/psql -h ${OLD_PRIMARY_NODE_HOST} -p ${OLD_PRIMARY_NODE_PORT} \ 60 -c "SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}');" >/dev/null 2>&1 61 62 if [ $? -ne 0 ]; then 63 echo ERROR: failover.sh: drop replication slot \"${REPL_SLOT_NAME}\" failed. You may need to drop replication slot manually. 64 fi 65 66 echo failover.sh: end: standby node is down. Skipping failover. 67 exit 0 68fi 69 70## Promote Standby node. 71echo failover.sh: primary node is down, promote new_main_node_id=$NEW_MAIN_NODE_ID on ${NEW_MAIN_NODE_HOST}. 72 73ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ 74 postgres@${NEW_MAIN_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MAIN_NODE_PGDATA} -w promote 75 76if [ $? -ne 0 ]; then 77 echo ERROR: failover.sh: end: failover failed 78 exit 1 79fi 80 81echo failover.sh: end: new_main_node_id=$NEW_MAIN_NODE_ID on ${NEW_MAIN_NODE_HOST} is promoted to a primary 82exit 0 83