1#!/bin/bash 2# This script is run by failover_command. 3 4set -o xtrace 5exec > >(logger -i -p local1.info) 2>&1 6 7# Special values: 8# %d = failed node id 9# %h = failed node hostname 10# %p = failed node port number 11# %D = failed node database cluster path 12# %m = new master node id 13# %H = new master node hostname 14# %M = old master node id 15# %P = old primary node id 16# %r = new master port number 17# %R = new master database cluster path 18# %N = old primary node hostname 19# %S = old primary node port number 20# %% = '%' character 21 22FAILED_NODE_ID="$1" 23FAILED_NODE_HOST="$2" 24FAILED_NODE_PORT="$3" 25FAILED_NODE_PGDATA="$4" 26NEW_MASTER_NODE_ID="$5" 27NEW_MASTER_NODE_HOST="$6" 28OLD_MASTER_NODE_ID="$7" 29OLD_PRIMARY_NODE_ID="$8" 30NEW_MASTER_NODE_PORT="$9" 31NEW_MASTER_NODE_PGDATA="${10}" 32OLD_PRIMARY_NODE_HOST="${11}" 33OLD_PRIMARY_NODE_PORT="${12}" 34 35PGHOME=/usr/pgsql-11 36REPL_SLOT_NAME=${FAILED_NODE_HOST//[-.]/_} 37 38logger -i -p local1.info failover.sh: start: failed_node_id=$FAILED_NODE_ID old_primary_node_id=$OLD_PRIMARY_NODE_ID failed_host=$FAILED_NODE_HOST new_master_host=$NEW_MASTER_NODE_HOST 39 40## If there's no master node anymore, skip failover. 41if [ $NEW_MASTER_NODE_ID -lt 0 ]; then 42 logger -i -p local1.info failover.sh: All nodes are down. Skipping failover. 43 exit 0 44fi 45 46## Test passwordless SSH 47ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ls /tmp > /dev/null 48 49if [ $? -ne 0 ]; then 50 logger -i -p local1.info failover.sh: passwordless SSH to postgres@${NEW_MASTER_NODE_HOST} failed. Please setup passwordless SSH. 51 exit 1 52fi 53 54## If Standby node is down, skip failover. 55if [ $FAILED_NODE_ID -ne $OLD_PRIMARY_NODE_ID ]; then 56 logger -i -p local1.info failover.sh: Standby node is down. Skipping failover. 57 58 ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null postgres@$OLD_PRIMARY_NODE_HOST -i ~/.ssh/id_rsa_pgpool " 59 ${PGHOME}/bin/psql -p $OLD_PRIMARY_NODE_PORT -c \"SELECT pg_drop_replication_slot('${REPL_SLOT_NAME}')\" 60 " 61 62 if [ $? -ne 0 ]; then 63 logger -i -p local1.error failover.sh: drop replication slot "${REPL_SLOT_NAME}" failed 64 exit 1 65 fi 66 67 exit 0 68fi 69 70## Promote Standby node. 71logger -i -p local1.info failover.sh: Primary node is down, promote standby node ${NEW_MASTER_NODE_HOST}. 72 73ssh -T -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ 74 postgres@${NEW_MASTER_NODE_HOST} -i ~/.ssh/id_rsa_pgpool ${PGHOME}/bin/pg_ctl -D ${NEW_MASTER_NODE_PGDATA} -w promote 75 76if [ $? -ne 0 ]; then 77 logger -i -p local1.error failover.sh: new_master_host=$NEW_MASTER_NODE_HOST promote failed 78 exit 1 79fi 80 81logger -i -p local1.info failover.sh: end: new_master_node_id=$NEW_MASTER_NODE_ID started as the primary node 82exit 0 83