Skip to content
Snippets Groups Projects
check_cronstatus 5.72 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: check status of cronjobs executed with cronwrapper
#
# REQUIREMENTS
# - client uses a cronjob with cronwrapper (it creates parsable logs)
#
# It returns 
# - UNKNOWN if no job was found
# - OK if all jobs are ok
# - ERROR if minimum one job failes (wrong exitcode or is expired) 
#
# ----------------------------------------------------------------------
#
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
#
# 2017-10-13  v1.0  ah,ds
# 2017-10-17  v1.1  ah,ds  remove PIPESTATUS for Debian8 compatibility
# 2019-04-30  v1.2  ah,ds  show scriptlabel of failed jobs in 1st line
# 2020-02-28  v1.3  ah,ds  ouput with separated error jobs and OK jobs
# 2020-03-05  v1.4  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2022-02-28  v1.5  <axel.hahn@iml.unibe.ch> fix output of error counter
# ======================================================================


. `dirname $0`/inc_pluginfunctions

LOGDIR=/var/tmp/cronlogs
errfile=/tmp/cronjob_status.$$.err
failfile=/tmp/cronjob_status.$$.fails

outputOk=/tmp/cronjob_status_out_ok.$$.txt
outputError=/tmp/cronjob_status_out_error.$$.txt

typeset -i iMaxAge=`date +%s`
typeset -i iJobs=0
typeset -i iErrJobs=0


# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------

# get a value from logfile (everything behind "="
# param: label
# global: $logfile
function getLogValue(){
        grep "^$1=" $logfile | cut -f 2- -d "="
}



# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------

iJobs=`ls -1 $LOGDIR/*log 2>/dev/null | fgrep -v "/__" | fgrep ".log" | wc -l`

if [ $iJobs -eq 0 ]; then
  ph.abort "SKIP: no cronjobs with cronwrapper were found"
fi

# ----------------------------------------------------------------------
# check all logs
# ----------------------------------------------------------------------
ls -1t $LOGDIR/*log | fgrep -v "/__" | while read logfile
do
        iJobs=$iJobs+1
        typeset -i iErr=0
        sTmpOutfile=$outputOk

        server=`basename $logfile | cut -f 1 -d "_"`
        jobname=`basename $logfile | cut -f 2 -d "_" | sed "s#\.log##"`


        sPre="    "
        sCmd=`getLogValue SCRIPTNAME`
        sLastStart=`getLogValue SCRIPTSTARTTIME`
        typeset -i iJobExpire=`getLogValue JOBEXPIRE`
        typeset -i rc=`getLogValue 'SCRIPTRC' | head -1`
        typeset -i iEcectime=`getLogValue 'SCRIPTEXECTIME' | head -1 | cut -f 1 -d " "`
        sTTL=`getLogValue 'SCRIPTTTL'`

        # ----- check return code
        statusRc='OK'
        if [ $rc -ne 0 ]; then
                iErr=$iErr+1
                statusRc='ERROR'
        fi

        # ----- check ttl value
        typeset -i iTTL=$sTTL
        typeset -i iTTLsec=0
        iTTL=$iTTL
        iTTLsec=$iTTL*60
        ttlstatus="OK"
        if [ -z $sTTL ]; then
                iErr=$iErr+1
                statusTtl="ERROR: ttl value is empty"
        else
                # human readable ttl in min/ hours/ days
                statusTtl="$iTTL min"
                if [ $iTTL -gt 60 ]; then
                        iTTL=$iTTL/60;
                        statusTtl="$sTTL - $iTTL h"
                        if [ $iTTL -gt 24 ]; then
                                iTTL=$iTTL/24;
                                statusTtl="$sTTL - $iTTL d"
                        fi
                fi
                if [ $iTTLsec -lt $iEcectime ]; then
                        iErr=$iErr+1
                        statusTtl="ERROR: $iTTL min = $iTTLsec s - is too low; exec time is $iEcectime s - set a higher TTL for this cronjob"
                        iErr=$iErr+1
                else
                        statusTtl="$statusTtl OK"
                fi
        fi
        # ----- check expire
        statusExpire="`date -d @$iJobExpire '+%Y-%m-%d %H:%M:%S'`"
        if [ $iJobExpire -lt $iMaxAge ]; then
                statusExpire="${statusExpire} ERROR"
                iErr=$iErr+1
        else
                statusExpire="${statusExpire} OK"
        fi

        # ----- show jobdetail and put to OK file or error file
        sTmpOutfile=$outputOk
        test $iErr -gt 0 && sTmpOutfile=$outputError
        (
                echo
                echo --- $logfile

                echo "${sPre}${sCmd}"
                echo "${sPre}last start: ${sLastStart}"
                echo "${sPre}returncode: ${rc} ${statusRc}"
                echo "${sPre}duration: ${iEcectime} s"
                echo "${sPre}ttl: ${statusTtl}"
                echo "${sPre}expires: ${iJobExpire} ${statusExpire}"

                if [ $iErr -gt 0 ]; then
                        echo "${sPre}CHECK FAILED"
                        iErrJobs=$iErrJobs+1
                        echo "$logfile" > $errfile
                        getLogValue SCRIPTLABEL >> $failfile
                fi
        )>>$sTmpOutfile

done


# ----------------------------------------------------------------------
# output
# ----------------------------------------------------------------------

if [ -f $errfile ]; then
  iErrJobs=$( wc -l $errfile | cut -f 1 -d " " )
  echo "ERROR: $iErrJobs of $iJobs jobs [`cat $failfile | sed ':a;N;$!ba;s/\n/, /g'`] have a problem"
  echo
  echo "********** Jobs with problems:"
  cat $outputError
  echo
  echo
  echo "********** OK:"
  cat $outputOk
  rm -f $errfile $failfile $outputError
else
  echo "OK: $iJobs cronjob(s) run fine"
  cat $outputOk
fi


# --- exit
if [ $iErrJobs -ne 0 ]; then
  ph.setStatus "critical"
else
  ph.setStatus "ok"
fi

rm -f $outputOk

ph.exit

# ----------------------------------------------------------------------