#!/bin/bash
# ======================================================================
#
# ICINGA PASSIVE CLIENT
#
# Run all passive checks and send response to Icinga Endpoint
#
# Requirements
# - curl
# - jq
# ----------------------------------------------------------------------
# ah = axel.hahn@iml.unibe.ch
# 2021-03-..  init
# 2022-01-11  v0.7   ah  shellcheck
# 2022-02-16  v0.8   ah  add --cfg param
# 2022-03-04  v0.9   ah  abort on http 5xx error
# 2022-03-14  v0.10  ah  less output and add _elog to run as a service
# ======================================================================


_product="ICINGA PASSIVE CLIENT"
_version="0.10"
_license="GNU GPL 3.0"
_copyright='(c) 2020 Institute for Medical Education * University of Bern'

typeset -i debug=0

# source config ...
. "$( dirname $0 )/inc_getconfig.sh"



# where to find check scripts ... first directory wins
# dir_plugins="/opt/imlmonitor/client/plugins/ /usr/lib64/nagios/plugins"
# dir_cfg="/etc/icinga2-passive-client"
# dir_data="/var/tmp/icinga2-passive-client"
# dir_logs="/var/log/icinga2-passive-client"
logfile="${dir_logs}/execution.log"

ch="$( dirname $0 )/inc/confighandler.sh"
myHost=$(hostname -f)

# for loop mode only: max. random sleep time
typeset -i sleeptime=30

typeset -i _rc_all=0

# ----------------------------------------------------------------------
#
# FUNCTIONS
#
# ----------------------------------------------------------------------
# ..................................................................
#
# helper to make http base setup for host actions
function _initHttp(){
  # see inc_functions
   _initHttpWithConfigfile "${dir_cfg}/api-icinga2.cfg"
  if [ $debug -ne 0 ]; then
    http.setDebug 1
  fi
}


# ......................................................................
#
# find first place of the check script in the known plugin dirs 
# see ${dir_plugins} in checks.cfg
# param  string  name of the check script without path
#
function findCheckScript(){
	local _script=$1
	for mydir in ${dir_plugins}
	do
		if [ -x "${mydir}/${_script}" ]; then
			echo "${mydir}/${_script}"
		fi
	done | head -1
}

# helper used function in loopChecks
# get a snapshot of a few files
function _getFileSnapshot(){
  ls -l $(dirname $0)/* ${dir_cfg}/*
}


# ......................................................................
#
# Loop over executing all checks
# no params
#
function loopChecks(){

      # TODO-MEMORY-CHECK
      # echo ${myHost} | egrep "^(kvm4|icinga)"
      # echo ${myHost} | egrep "^(monitortest)"
      # if [ $? -ne 0 ]; then
      #   echo "HARD EXIT - DO NOT EXECUTE ANY CHECK ON $myHost"
      #   exit 1
      # fi

      local lockfile
      lockfile="${dir_data}/loop.pid"
      local snapShotStart
      snapShotStart=${dir_data}/$(basename $0)-start.fingerprint
      local snapShotCurrent
      snapShotCurrent=${dir_data}/$(basename $0)-last.fingerprint
      if [ -f "${lockfile}" ]; then
        local lockpid
        lockpid=$(cat "${lockfile}" | cut -f 2 -d "-" | cut -f 4 -d " " | grep "[0-9]")
        ps -f --pid "$lockpid" | grep "$(basename $0)" | grep loop >/dev/null
        if [ $? -eq 0 ]; then
          _elog "ABORT: Loop seems to run already. See process with PID $lockpid"
          _elog $( ps -f --pid "$lockpid" )
          exit 0
        fi
      fi

      _log "---------- starting in a permanent loop"
      echo "Serviceloop started $(date) - process id $$" > "${lockfile}"
      if [ $? -ne 0 ]; then
          _elog "ABORT: Lock file is not writable ${lockfile}."
          _elog $( ls -l "${lockfile}" );
          exit 1
      fi

      _getFileSnapshot>"${snapShotStart}"
      if [ $? -ne 0 ]; then
          _elog "ABORT: Snapshot file is not writable ${snapShotStart}."
          _elog $( ls -l "${snapShotStart}" )
          exit 1
      fi
      while true; do
        # typeset -i local iSleep=$(($RANDOM%$sleeptime))
        # sleep minimum is half of $sleeptime
        typeset -i local iSleep=$(($RANDOM%$sleeptime/2+$sleeptime/2))
        _log "sleeping $iSleep sec ..."
        sleep $iSleep
        _log "______________________________________________________________________"
        _log ""
        _getFileSnapshot>$snapShotCurrent
        if [ $? -ne 0 ]; then
            _elog "ABORT: Snapshot file is not writable ${snapShotCurrent}."
            _elog $( ls -l "${snapShotCurrent}" )
            exit 1
        fi
        diff  $snapShotStart $snapShotCurrent >/dev/null
        if [ $? -ne 0 ]; then
          _elog "ABORT: Files were updated / overwritten. The loop must be restarted.\n`diff  $snapShotStart $snapShotCurrent`"
          exit 1
        fi
        icingaHostMustExist
        processAllChecks
      done
}
# ......................................................................
#
# execute all defined checks one by one
# no params
#
function processAllChecks(){
  # loop over all defined checks
  typeset -i local iChecksTotal
  iChecksTotal=$(getChecks | wc -l)
  typeset -i local iCounter
  iCounter=0

  _rc_all=0
  typeset -i local iLoopStart
  iLoopStart=$(_getUnixTs)

  _log ""
  _log "------ looping over all checks"
  for myconfig in $(getChecks)
  do
    iCounter=$iCounter+1
    _log "--- processing [$iCounter of $iChecksTotal] $myconfig"
    processCheck "$myconfig"
    _log ""

  done
  typeset -i local iLoopEnd
  iLoopEnd=$(_getUnixTs)
  typeset -i local iLoopTime
  iLoopTime=$iLoopEnd-$iLoopStart

  _log "------ loop done - needed $iLoopTime sec - rc=$_rc_all"
}

# ......................................................................
#
# parse a config file and set global vars:
#   checkName
#   checkCommand
#   checkInterval
# param  string  full path of a config file
#
function _parseCheckConfig(){
  local _myconfig="$1"

  if [ ! -r "$_myconfig" ]; then
    _elog "ERROR: config file is not readable [$_myconfig]"
    exit 1
  fi

  # EXAMPLE a config contains ...
  # checkname=check_cronstatus
  # command=check_cronstatus -param1 -param2
  # interval=60

  checkName=$(cat $_myconfig | grep ^checkname= | cut -f 2 -d "=")
  checkCommand=$(cat $_myconfig | grep ^command= | cut -f 2 -d "=")
  checkInterval=$(cat $_myconfig | grep ^interval= | cut -f 2 -d "=")

}

# actions for icinga host
# param  string  action; "get" only
function icingaHost(){
  local _logPrefix="${myHost} :: API |"
  local _apiRequest=objects/hosts/${myHost}
  local _localCache=${dir_data}/host_${myHost}_deployed-at-icinga.txt
  typeset -i local _iRefreshCache=120

  local sAction=$1

  _initHttp

  case $sAction in
    'get')
      # update after caching was added in http-component
      # http.responseImport $_localCache
      # typeset -i local iAgeLastGet=`http.getRequestAge`
      # _log "${_logPrefix} INFO: cache is $iAgeLastGet sec old ... TTL is _iRefreshCache=3600 sec"
      # if [ $iAgeLastGet -eq 0 -o $iAgeLastGet -gt $_iRefreshCache ]; then
      #   _log "${_logPrefix} INFO: request to Icinga GET $_apiRequest"
      #   _getApiObject $_apiRequest $_localCache
      # fi

      # new:
      http.setCacheTtl     $_iRefreshCache
      http.setCacheFile    $_localCache
      http.makeRequest GET $_apiRequest
      if http.isServerError >/dev/null; then
        _elog "CRITICAL ERROR: Icinga2 API request failed with a server error GET $_apiRequest"
        exit 1
      fi

      # set return code of GET action
      http.isOk >/dev/null
      ;;
    *)
      _log "ERROR: unknown action parameter $sAction"
  esac

}

# for check on the beginning of the script:
# execute a check only if the host exists on icinga2
# global  string  myHost 
# global  string  dir_data
function icingaHostMustExist(){
  _log "check if the host [${myHost}] exists on Icinga ..."
  icingaHost get
  if [ $? -ne 0 ]; then
    _echo $( http.getResponse )
    if [ "$(http.getStatuscode)" = "000" ]; then
      _elog "ERROR: Unable to reach the Icinga node. Stopping script current monitoring actions."
      exit 1
    fi
    _elog "ERROR: host object for ${myHost} is not available on Icinga service (yet) - Status: $(http.getStatuscode)"
    _echo
    _echo "ABORTING"
    _echo
    _echo "To run checks ..."
    _echo "- you must create the host on director (check director-cli.sh --hr)"
    _echo "- the director must deploy the host to icinga daemon"
    _echo
    rm -f "${dir_data}"/service__check* 2>/dev/null
    exit 1
  fi
  _log "OK, found."
}


# ......................................................................
#
# process a single check
# param  string  name of config file
# param  string  name of config file
#
function processCheck(){
  local _myconfig=$1
  local _force=$2

  typeset -i local iCheckStart
  iCheckStart=$(_getUnixTs)

  _parseCheckConfig "${_myconfig}"
  local _logPrefix="${checkName} |"
  _log "${_logPrefix} INFO: every ${checkInterval} sec: ${checkCommand}"

  local _outfile=${dir_data}/service__check__${checkName}__output.txt
  local _response=${dir_data}/service__check__${checkName}__icinga_response.txt
  typeset -i local _rc=0


  _initHttp

  # --- check last run ... if never or > $interval then execute
  doRun=0
  if [ ! -f "$_outfile" ]; then
    _log "${_logPrefix} INFO: Never executed before"
    doRun=1
  else
    # typeset -i iAgeLastRun=$(($(date +%s) - $(date +%s -r "$_outfile")))
    typeset -i iAgeLastRun
    iAgeLastRun=$(_getFileAge "$_outfile")
    _log "${_logPrefix} INFO: last run was $iAgeLastRun sec ago ... vs Interval = $checkInterval ... sleeptime = $sleeptime"
    iAgeLastRun=$iAgeLastRun+$sleeptime
    if [ $iAgeLastRun -gt $checkInterval ]; then
      doRun=1
    fi

    if [ ! -z "$_force" ]; then
      doRun=1
      _log "${_logPrefix} INFO: forced execution by given param "
    fi
  fi

  if [ $doRun -ne 0 ]; then
    myscript=$(echo "$checkCommand" | cut -f 1 -d " ")
    myFullscript=$(findCheckScript "$myscript")
    if [ -z "$myFullscript" ]; then
      _log "${_logPrefix} ERROR: $myscript was not found in any plugin dir"
    else
      myparams=$( echo $checkCommand | grep " " | cut -f 2- -d " " )

      #
      # --- this executes the check plugin ...
      #
      _log "${_logPrefix} starting $myFullscript $myparams"
      typeset -i local iTsStart=`date +%s`
      # $myFullscript $myparams | tee $_outfile
      eval $myFullscript $myparams > $_outfile
      rc=$?
      if [ ! -w $_outfile ]; then
            _elog "${_logPrefix} ERROR: output file $_outfile is not writable."
            _elog "${_logPrefix} $( ls -ld ${dir_data} $_outfile )"
            exit 1
      fi
      typeset -i local iTsEnd=`date +%s`
      # outPerfdata=`grep '|' $_outfile | cut -f 2 -d '|'`
      outPerfdata=`grep '|' $_outfile | rev | cut -f 1 -d '|' | rev`
      _echo
      _echo -------- check output:
      _echo $( cat $_outfile )
      _echo
      # echo -------- extracted performance data:
      # echo $outPerfdata
      # echo
      _log "${_logPrefix} check command finished with returncode $rc"

      _rc=$_rc+$rc
      #
      # --- send check result to Icinga
      #     fields of the object
      #     https://icinga.com/docs/icinga2/latest/doc/12-icinga2-api/#process-check-result
      export CFGSTORAGE="${checkName}output"

      outputAsText="$(cat $_outfile)"
      # outputAsJson="$(jq -nR --arg data """${outputAsText}""" '$data')"
      commandAsJson="$(jq -nR --arg data """${myFullscript} $myparams""" '$data')"
      (
        $ch --set      check_source      \"${myHost}\"
        $ch --set      check_command     "${commandAsJson}"
        $ch --set      exit_status       $rc
        # $ch --set      plugin_output     "${outputAsJson}"
        $ch --setfile  plugin_output     "${_outfile}"
        $ch --set      performance_data  "\"${outPerfdata}\""
        $ch --set      ttl               $checkInterval
        $ch --set      execution_start   $iTsStart
        $ch --set      execution_end     $iTsEnd
      ) 2>/dev/null
      # $ch --json
      data=`$ch --json 2>/dev/null`

      slot="`_getName4Svcathost ${checkName} | sed 's# #%20#g'`"

      _log "${_logPrefix} starting POST of data to monitoring server"
      _echo POST actions/process-check-result?service=${myHost}!${slot} "$data"
      _APIcall POST actions/process-check-result?service=${myHost}!${slot} "$data"
      http.responseExport "$_response"
      if [ ! -w "$_response" ]; then
            _elog "${_logPrefix} ERROR: responsefile $_response is not writable."
            _elog "${_logPrefix} $( ls -ld ${dir_data} $_response )"
            exit 1
      fi

      # --- check if data were sent successfully
      # fgrep "HTTP/1.1 200" ${_response} >/dev/null
      # _testHttpOk ${_response} >/dev/null
      http.isOk >/dev/null
      if [ $? -eq 0 ]; then
        _log "${_logPrefix} rc=$rc - OK, response was sent to Icinga"
      else
        _elog "${_logPrefix} rc=$rc - WARNING: the check response was NOT sent to Icinga"
        _rc=$_rc+1
        _echo
        _echo For Debugging:
        _echo "$( $ch --show --json )"
        _log "$( $ch --show --json )"
      fi
      $ch --flush 2>/dev/null

    fi

  else
    _log "${_logPrefix} SKIP execution."
  fi

  # add current result to global returncode
  _rc_all=$_rc_all+$_rc

  typeset -i local iCheckEnd=`_getUnixTs`
  typeset -i local iCheckTime=$iCheckEnd-$iCheckStart

  _log "${_logPrefix} finished after $iCheckTime sec with returncode $_rc"
  test $_rc -eq 0 || (_echo; _echo "    >>> Check ${checkName} was not OK. See Output block above!"; _echo; _echo)
}
# ----------------------------------------------------------------------
# help
# ----------------------------------------------------------------------

# show help text
function showHelp(){
self=`basename $0`
cat <<EOH


INTRODUCTION

$_product v$_version

Handle and execute icinga passive checks.
With this client can run a single check, all checks or make a permanent loop.
A new local check will be added to Icinga while running it the first time.


GENERAL PARAMETERS

  --cfg CONFIGFILE
    load a costom config file; default: ./inc_getconfig.sh
    This must be the 1st parameter to be processed.

  --help or -h or -?
    show this help and abort.

  --version or -v
    show the version abd abort

SERVICE ACTIONS

  --list
    get a list of local config files

  --loop
    Start to check all passive checks in a permanent loop.
    It makes a random sleep if $sleeptime sec between all loops. This shuffles
    the access time of all clients making requests to the icinga server.

    Multiple starts will be detected. This parameter is the optimal choice for a
    cronjob.

  --runonce
    Start to check all passive checks once.
    This method respects the interval per check. Only outdated checks will be
    executet.
    This is a second choice for a cronjob if the runtime of all checks
    is much shorter than your cronjob interval to prevent multiple processes.
    Multiple starts will NOT be detected.

  --run CONFIGFILE
    Run a check by pointing the config file (see --list)
    This execution ignores the interval and forces the execution.


CONFIG

  Config file
  /etc/icinga2-passive-client/client.cfg
  It sets other used pathes.

  Config files for checks are in ${dir_cfg}/checks/


DEBUGGING

  The Output of check and results from Icinga are in
  ${dir_data}.

  A log of all performed executed check runs by $self
  are in $logfile.
  BTW: do not forget to add a log rotation for it.

EOH
}

function showVersion(){
  echo "$_license"
  echo "$_copyright"
  echo
}
# ----------------------------------------------------------------------
#
# MAIN
#
# ----------------------------------------------------------------------

. "$( dirname $0 )/inc_functions.sh"

_echo "
______________________________________________________________________________________

 _______        __
|_     _|.----.|__|.-----.-----.---.-.
 _|   |_ |  __||  ||     |  _  |  _  |
|_______||____||__||__|__|___  |___._|
                         |_____|
 ______                     __                   ______ _____   __               __
|   __ \.---.-.-----.-----.|__|.--.--.-----.    |      |     |_|__|.-----.-----.|  |_
|    __/|  _  |__ --|__ --||  ||  |  |  -__|    |   ---|       |  ||  -__|     ||   _|
|___|   |___._|_____|_____||__| \___/|_____|    |______|_______|__||_____|__|__||____|
                                                                                 v${_version}

  $_license .. $_copyright

______________________________________________________________________________________

"

_elog "Starting $_product $_version"


if [ "$1" = "--cfg" ] && [ -n "$2" ]; then
  echo "INFO: loading custom config [$2]..."
  . "${2}"
  shift 2
else
  . "$( dirname $0 )/inc_getconfig.sh"
fi

. "$( dirname $0 )/inc/rest-api-client.sh"

if [ $# -eq 0 ]; then
  showHelp
  exit 0
fi

if [ -z "${dir_cfg}" ]; then
  echo ERROR: $_product is not installed/ configured yet on this machine.
  exit 1
fi

icingaHostMustExist


touch ${logfile}


while [ $# -gt 0 ];
do
  case "$1" in

    '--help' | '-h' | '-?')
      showHelp
      exit 0
      ;;
    '--version' | '-v')
      showVersion
      exit 0
      ;;


    '--list')
      getChecks
      ;;

    '--loop')
      loopChecks
      ;;

    '--runonce')
      processAllChecks
      ;;

    '--run')
      processCheck "$2" "force"
      shift 1
      ;;

    *)
      echo "ERROR: unknown parameter detected."
      exit 2
  esac
  shift 1
done
echo

# remark:
# $_rc_all is a collected status code in the loop of all actions
# if it is 0 then all checks were OK and have been sent to Icinga
# echo exit with status code $_rc_all
# exit $_rc_all
exit 0

# ----------------------------------------------------------------------