#!/bin/bash
################################################################################
# Copyright (c) 2013-2016 VMware, Inc. All rights reserved.
################################################################################
# Run database as replica with specified primary
# Takes arguments which should be -h <host> -U <user>
# If -W option is used, then the password is prompted for and used
# If -b option is used, a base backup is first created from the primary before
# running as a replica. This option destroys the current contents of the
# data directory.
# If -s option is used, a physical replication slot is first created on the
# primary and used for the replica.

ENCODING=UTF8
THIS_DIR=`dirname $0`

# Load global parameters
if [ -z $VMWARE_POSTGRES_BASE ]; then
   echo "VMWARE_POSTGRES_BASE is not set."
   echo "Check your installation."
   exit 1
fi
SANITY_FILE=$VMWARE_POSTGRES_BASE/scripts/vpostgres_sanity_checks
if [ -f $SANITY_FILE ]; then
   source $SANITY_FILE
else
   echo "Sanity check file for environment variables of VMware Postgres"
   echo "is not available. Check your installation."
   exit 1
fi

if [ "`id -u -n`" != ${VMWARE_POSTGRES_OS_ADMIN} ]; then
   echo Run this program as user ${VMWARE_POSTGRES_OS_ADMIN}
   exit 1
fi

usage() {
   echo "Usage: `basename $0` [OPTION]..."
   echo " "
   echo "General Options:"
   echo " -b       Take a fresh base backup from node whose connection IP is"
   echo "          defined by -h <host>."
   echo " -s SLOT  Replication slot name to link node in recovery with."
   echo " -W       Get connection password from prompt for user defined by"
   echo "          -U <user>."
   echo " "
   echo "Connection options:"
   echo " -h HOST  IP of Postgres instance to do replication from. A base"
   echo "          backup is taken from this node if needed."
   echo " -U USER  Name of database user used for operations."
   exit 1
}

# Escape special character and backspace with backspaces
# The special character is ' in the libpq connection string and : in .pgpass
escape_str() {
   str=$1
   char=$2
   if [ -z "$char" ]; then
      char="'"
   fi
   sed -e 's/\\/\\\\/g' -e "s/$char/\\\\$char/g" <<<"$str"
}

GET_PASSWORD=no
BASE_BACKUP=no
USE_SLOT=no
PRIMARY_CONNINFO=()
BASE_BACKUP_ARGS=()
PGHOST=
PGUSER=
while getopts Wbh:s:U: opt; do
   case "$opt" in
      W) GET_PASSWORD=yes ;;
      b) BASE_BACKUP=yes ;;
      h)
         PGHOST=$OPTARG
         PRIMARY_CONNINFO=("${PRIMARY_CONNINFO[@]}" "host='$(escape_str "$OPTARG")'")
         BASE_BACKUP_ARGS=("${BASE_BACKUP_ARGS[@]}" "-h" "$OPTARG")
         ;;
      s)
         USE_SLOT=yes
         SLOT_NAME=$OPTARG
         ;;
      U)
         PGUSER=$OPTARG
         PRIMARY_CONNINFO=("${PRIMARY_CONNINFO[@]}" "user='$(escape_str "$OPTARG")'")
         BASE_BACKUP_ARGS=("${BASE_BACKUP_ARGS[@]}" "-U" "$OPTARG")
         ;;
      \?) usage;;
   esac
done
shift $((OPTIND-1))

# Sanity checks
if [ "$PGHOST" == "" ]; then
   echo "Missing host name or IP adress for database connection"
   usage
fi
if [ "$PGUSER" == "" ]; then
   echo "Missing user name for database connection"
   usage
fi

PRIMARY_CONNINFO=("${PRIMARY_CONNINFO[@]}" "application_name='$(escape_str $(hostname))'")

if [ "$GET_PASSWORD" == "yes" ]; then
   echo Enter password for DB user "$PGUSER".
   while ! read -p "Password: " -r -s pwd; do
      echo; echo Password input error.
   done
   echo
   PASSWORD=$pwd
   PRIMARY_CONNINFO=("${PRIMARY_CONNINFO[@]}" "password='$(escape_str "$PASSWORD")'")
fi

# Test connection with connection string
echo Testing connection to primary
$VMWARE_POSTGRES_BIN/psql -w -d "${PRIMARY_CONNINFO[*]} dbname=postgres" -c 'select version();'
ERR_NUM=$?
if [ "$ERR_NUM" != 0 ]; then
   echo "Failed connection test to primary"
   exit 1
fi

# Test and setup ssh keys for scp
if ! ssh -o PasswordAuthentication=no ${PGHOST} /bin/true; then
   if [ ! -f $HOME/.ssh/id_rsa ]; then
      echo Generating key id_rsa
      ssh-keygen -b 1024 -t rsa -N '' -f $HOME/.ssh/id_rsa
   fi
   echo Copying ssh key to remote server
   ssh-copy-id -i $HOME/.ssh/id_rsa ${PGHOST}
   ssh -o PasswordAuthentication=no ${PGHOST} true
   ERR_NUM=$?
   if [ "$ERR_NUM" != 0 ]; then
      echo "Password-less authorization failed during connection"
      exit 1
   fi
fi

# Stop DB while we get base backup or create recovery.conf
# And stop server if it is running
$VMWARE_POSTGRES_BIN/pg_ctl status -D $VMWARE_POSTGRES_DATA -w
ERR_NUM=$?
if [ "$ERR_NUM" == 0 ]; then
   echo "PostgreSQL instance is running, stopping it now."
   $VMWARE_POSTGRES_BIN/pg_ctl stop -D $VMWARE_POSTGRES_DATA -w -t 300 -m fast
   ERR_NUM=$?
   if [ "$ERR_NUM" != 0 ]; then
      echo "Failure when trying to stop PostgreSQL instance, hence leaving..."
      exit 1
   fi
fi

if [ "$BASE_BACKUP" == yes ]; then
   read -p "WARNING: The base backup operation will replace the current contents of the data directory. Please confirm by typing yes: " -r confirm
   [ "$confirm" == yes ] || { echo Not confirmed. Base backup not done.; exit 1; }
   tmpfile=`mktemp`
   if [ "$GET_PASSWORD" == yes ]; then
      echo '*:*:replication:*:'"$(escape_str "$PASSWORD" ':')" >$tmpfile
      export PGPASSFILE=$tmpfile
   fi
   # Remove former data folder and xlog folder, they are replaced by
   # fresh data from the base backup.
   rm -fr $VMWARE_POSTGRES_DATA
   rm -fr $VMWARE_POSTGRES_XLOG
   $VMWARE_POSTGRES_BIN/pg_basebackup -w -P -D $VMWARE_POSTGRES_DATA \
       --xlogdir $VMWARE_POSTGRES_XLOG "${BASE_BACKUP_ARGS[@]}"
   ERR_NUM=$?
   rm -f "$tmpfile"
   if [ "$ERR_NUM" != 0 ]; then
      echo "Base backup obtention failed"
      exit 1
   fi
fi

# Single quote and backslash need to be doubled in config file values.
PRIMARY_CONNINFO_GUC=$(sed -e "s/'/''/g" -e 's/\\/\\\\/g' <<<"${PRIMARY_CONNINFO[*]}")
cat >${VMWARE_POSTGRES_DATA}/recovery.conf <<EOF ||
standby_mode= 'on'
primary_conninfo = '${PRIMARY_CONNINFO_GUC}'
recovery_target_timeline = 'latest'
restore_command = 'scp ${PGHOST}:${}/%f %p'
EOF
{ echo "Failed to create recovery.conf"; exit 1; }

# The creation of the replication is delayed as much as possible to avoid
# useless operations, particularly if base backup obtention failed.
if [ "$USE_SLOT" == yes ]; then
   # Create replication slot
   $VMWARE_POSTGRES_BIN/psql -w -d "${PRIMARY_CONNINFO[*]} dbname=postgres" -c "select pg_create_physical_replication_slot('$SLOT_NAME');"
   ERR_NUM=$?
   if [ "$ERR_NUM" != 0 ]; then
      echo "Failed creation of replication slot on primary node"
      exit 1
   fi

   # Finally link replica with replication slot
   echo "primary_slot_name = '$SLOT_NAME'">> ${VMWARE_POSTGRES_DATA}/recovery.conf
   ERR_NUM=$?
   if [ "$ERR_NUM" != 0 ]; then
      echo "Failed to set primary_slot_name in recovery.conf"
      exit 1
   fi
fi

# Now restart instance from the fresh base backup
$VMWARE_POSTGRES_BIN/pg_ctl start -D $VMWARE_POSTGRES_DATA -w -t 300 -l $VMWARE_POSTGRES_DATA/pgstartup.log
ERR_NUM=$?
if [ "$ERR_NUM" != 0 ]; then
   echo "Failed to start PostgreSQL instance"
   exit 1
fi
