#!/bin/bash
#
# Copyright 2015 VMware, Inc.  All rights reserved.
#
# vmware-vcha     Start/Stop VMware vCenter Server HA Agent
#
#
# Basic support for IRIX style chkconfig
# chkconfig: 35 99 08
# description: VMware vCenter server HA Agent.

### BEGIN INIT INFO
# Provides: vmware-vcha
# Required-Start: $local_fs $remote_fs $network
# Should-Start: lsassd cgrulesengd
# Required-Stop: $local_fs $remote_fs $network
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Description: VMware vCenter Server HA Agent
### END INIT INFO

set +o posix

echo_failure() {
   logger -t vmware-vcha -s -p daemon.err "failed"
}
echo_success() {
   logger -t vmware-vcha -s -p daemon.info "success"
}

# Make sure that environment variables get exported even when vmware-vcha
# is started using "service" command (Ref bug #922507).
source /etc/profile

WATCHDOG=/usr/bin/vmware-watchdog

umask 0077

# TODO make an envionment variable for VCHA_DIR path
VCHA_DIR="/usr/lib/vmware-vcha"
COV_DIR="/gcov"
VCHA_LOG_DIR="${VMWARE_LOG_DIR}/vmware/vcha"
VCHA_PID_FILE="/var/run/vmware/vcha.PID"
VCHA_CONFIG_FILE="/etc/vmware-vcha/vcha.cfg";
VCHA_LOG_FILE="${VCHA_LOG_DIR}/vmware-vcha.log"


get_watchdog_status() {
   if $WATCHDOG -r vcha 1>/dev/null 2>&1 ; then
      return 1
   fi
   return 0
}


start_vcha() {
   # Start the watchdog process, which will start vcha.
   # Allow the service to fail for 2 times in 3600s (1 hour), upon 3rd failure
   # reboot the system.
   PID_FILE=$VCHA_PID_FILE \
      $WATCHDOG -a -s vcha -u 3600 -q 2 "/usr/sbin/vcha" > /dev/null 2>&1 &

   # Wait for 10 secs to give vcha a chance to start off.
   local vchapid=$(pidof vcha)
   local attempts=1
   local MAX_ATTEMPTS=10
   while [ "$vchapid" = '' ]; do
      # Giving up after 11 attempts.
      if [ $attempts -gt $MAX_ATTEMPTS ]; then
         return 1
      fi

      sleep 1
      vchapid=$(pidof vcha)
      attempts=$((attempts+1))
   done

   return 0
}

stop_vcha() {
   # Kill the watchdog.
   $WATCHDOG -k vcha > /dev/null 2>&1
   # Kill vcha.
   killall -TERM "vcha" > /dev/null 2>&1

   # If code coverage build, wait for vcha to flush coverage data
   # before force kill.
   if [ -d "$COV_DIR" ]; then
      echo "Flushing code coverage data. This could take minutes"
      sleep 5
      while [ "$(find $COV_DIR -mmin 1)" != "" ]; do
         sleep 5
      done
   fi

   # Wait a second, to give things a chance to either die or
   # restart (if the watchdog is still up for some reason).
   sleep 1

   # Now we must make sure things died.
   local attempts=1
   local MAX_ATTEMPTS=10
   vchapid=$(pidof vcha)
   while [ "${vchapid}" != "" ]; do
      # Giving up after 11 attempts.
      if [ $attempts -gt $MAX_ATTEMPTS ] ; then
         return 1
      fi

      # Retry to kill the watchdog, just for completeness.
      # It may already be dead.
      $WATCHDOG -k vcha > /dev/null 2>&1
      # Send vcha a KILL signal to show we're serious.
      killall -TERM "vcha" > /dev/null 2>&1
      sleep 1
      pidof "vcha" > /dev/null 2>&1
      if [ $? -eq 1 ]; then
         vchapid=''
      fi
      attempts=$((attempts+1))
   done
   rm -f ${VCHA_PID_FILE}
   return 0
}


status_vcha() {
   pidof "vcha" > /dev/null 2>&1
   if [ $? -eq 0 ]; then
      logger -t vmware-vcha -s -p daemon.info "running"
      return 0
   else
      logger -t vmware-vcha -s -p daemon.info "stopped"
      return 3
   fi
}


do_start() {
   if get_watchdog_status ; then
      logger -t vmware-vcha -s -p daemon.info "Starting vcha..."
      start_vcha
      if [ "$?" -eq 0 ]; then
         echo_success
      else
         echo_failure
         return 6 # Not the correct code, probably something > 150
      fi
   else
     logger -t vmware-vcha -s -p daemon.info "vcha is already running"
   fi
   return 0
}


main() {
   RESULT=0
   case $1 in
      start)
         IS_FLOCKED_BY_VMWARE_VCHA="Yes" \
            flock -on "/var/tmp/vmware-vcha.flock" "/etc/init.d/vmware-vcha"
         RESULT=$?
         if [ "$RESULT" -eq 1 ]; then
            echo "vcha is already starting up. Aborting the request."
         fi
         ;;

      stop)
         running=$(pidof "vcha")
         if [ "$?" -eq 0 ]; then
            logger -t vmware-vcha -s -p daemon.info \
               "Stopping vcha. process id was ${running}"
            stop_vcha
            if [ "$?" -gt 0 ]; then
               echo_failure
               RESULT=1
            else
               echo_success
            fi
         else
            logger -t vmware-vcha -s -p daemon.info "already stopped"
         fi
         ;;

      status)
         status_vcha
         RESULT=$?
         ;;

      restart)
         $0 stop
         $0 start
         RESULT=$?
         ;;

      *)
         echo "Usage: $(basename "$0") {start|stop|status|restart}"
         RESULT=2
   esac
}

if [ "$IS_FLOCKED_BY_VMWARE_VCHA" = "Yes" ]; then
   unset IS_FLOCKED_BY_VMWARE_VCHA
   do_start
   RESULT=$?
else
   MYPID=$$
   date "+%F %T %Z BEGIN $MYPID" >> "${VCHA_LOG_FILE}"
   echo "$0 $@" >> "${VCHA_LOG_FILE}"
   exec > >(/usr/bin/tee -a "${VCHA_LOG_FILE}")
   exec 2>&1
   main "$@"
   date "+%F %T %Z END $MYPID" >> "${VCHA_LOG_FILE}"
fi

exit $RESULT
