-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_cronstatus 5.72 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: check status of cronjobs executed with cronwrapper
#
# REQUIREMENTS
# - client uses a cronjob with cronwrapper (it creates parsable logs)
#
# It returns
# - UNKNOWN if no job was found
# - OK if all jobs are ok
# - ERROR if minimum one job failes (wrong exitcode or is expired)
#
# ----------------------------------------------------------------------
#
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
#
# 2017-10-13 v1.0 ah,ds
# 2017-10-17 v1.1 ah,ds remove PIPESTATUS for Debian8 compatibility
# 2019-04-30 v1.2 ah,ds show scriptlabel of failed jobs in 1st line
# 2020-02-28 v1.3 ah,ds ouput with separated error jobs and OK jobs
# 2020-03-05 v1.4 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2022-02-28 v1.5 <axel.hahn@iml.unibe.ch> fix output of error counter
# ======================================================================
. `dirname $0`/inc_pluginfunctions
LOGDIR=/var/tmp/cronlogs
errfile=/tmp/cronjob_status.$$.err
failfile=/tmp/cronjob_status.$$.fails
outputOk=/tmp/cronjob_status_out_ok.$$.txt
outputError=/tmp/cronjob_status_out_error.$$.txt
typeset -i iMaxAge=`date +%s`
typeset -i iJobs=0
typeset -i iErrJobs=0
# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
# get a value from logfile (everything behind "="
# param: label
# global: $logfile
function getLogValue(){
grep "^$1=" $logfile | cut -f 2- -d "="
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
iJobs=`ls -1 $LOGDIR/*log 2>/dev/null | fgrep -v "/__" | fgrep ".log" | wc -l`
if [ $iJobs -eq 0 ]; then
ph.abort "SKIP: no cronjobs with cronwrapper were found"
fi
# ----------------------------------------------------------------------
# check all logs
# ----------------------------------------------------------------------
ls -1t $LOGDIR/*log | fgrep -v "/__" | while read logfile
do
iJobs=$iJobs+1
typeset -i iErr=0
sTmpOutfile=$outputOk
server=`basename $logfile | cut -f 1 -d "_"`
jobname=`basename $logfile | cut -f 2 -d "_" | sed "s#\.log##"`
sPre=" "
sCmd=`getLogValue SCRIPTNAME`
sLastStart=`getLogValue SCRIPTSTARTTIME`
typeset -i iJobExpire=`getLogValue JOBEXPIRE`
typeset -i rc=`getLogValue 'SCRIPTRC' | head -1`
typeset -i iEcectime=`getLogValue 'SCRIPTEXECTIME' | head -1 | cut -f 1 -d " "`
sTTL=`getLogValue 'SCRIPTTTL'`
# ----- check return code
statusRc='OK'
if [ $rc -ne 0 ]; then
iErr=$iErr+1
statusRc='ERROR'
fi
# ----- check ttl value
typeset -i iTTL=$sTTL
typeset -i iTTLsec=0
iTTL=$iTTL
iTTLsec=$iTTL*60
ttlstatus="OK"
if [ -z $sTTL ]; then
iErr=$iErr+1
statusTtl="ERROR: ttl value is empty"
else
# human readable ttl in min/ hours/ days
statusTtl="$iTTL min"
if [ $iTTL -gt 60 ]; then
iTTL=$iTTL/60;
statusTtl="$sTTL - $iTTL h"
if [ $iTTL -gt 24 ]; then
iTTL=$iTTL/24;
statusTtl="$sTTL - $iTTL d"
fi
fi
if [ $iTTLsec -lt $iEcectime ]; then
iErr=$iErr+1
statusTtl="ERROR: $iTTL min = $iTTLsec s - is too low; exec time is $iEcectime s - set a higher TTL for this cronjob"
iErr=$iErr+1
else
statusTtl="$statusTtl OK"
fi
fi
# ----- check expire
statusExpire="`date -d @$iJobExpire '+%Y-%m-%d %H:%M:%S'`"
if [ $iJobExpire -lt $iMaxAge ]; then
statusExpire="${statusExpire} ERROR"
iErr=$iErr+1
else
statusExpire="${statusExpire} OK"
fi
# ----- show jobdetail and put to OK file or error file
sTmpOutfile=$outputOk
test $iErr -gt 0 && sTmpOutfile=$outputError
(
echo
echo --- $logfile
echo "${sPre}${sCmd}"
echo "${sPre}last start: ${sLastStart}"
echo "${sPre}returncode: ${rc} ${statusRc}"
echo "${sPre}duration: ${iEcectime} s"
echo "${sPre}ttl: ${statusTtl}"
echo "${sPre}expires: ${iJobExpire} ${statusExpire}"
if [ $iErr -gt 0 ]; then
echo "${sPre}CHECK FAILED"
iErrJobs=$iErrJobs+1
echo "$logfile" > $errfile
getLogValue SCRIPTLABEL >> $failfile
fi
)>>$sTmpOutfile
done
# ----------------------------------------------------------------------
# output
# ----------------------------------------------------------------------
if [ -f $errfile ]; then
iErrJobs=$( wc -l $errfile | cut -f 1 -d " " )
echo "ERROR: $iErrJobs of $iJobs jobs [`cat $failfile | sed ':a;N;$!ba;s/\n/, /g'`] have a problem"
echo
echo "********** Jobs with problems:"
cat $outputError
echo
echo
echo "********** OK:"
cat $outputOk
rm -f $errfile $failfile $outputError
else
echo "OK: $iJobs cronjob(s) run fine"
cat $outputOk
fi
# --- exit
if [ $iErrJobs -ne 0 ]; then
ph.setStatus "critical"
else
ph.setStatus "ok"
fi
rm -f $outputOk
ph.exit
# ----------------------------------------------------------------------