Skip to content
Snippets Groups Projects
Select Git revision
  • db03ee549424012136878670f2daaaa9d7697be7
  • master default protected
  • simple-task/7248-eol-check-add-node-22
  • 6877_check_iml_deployment
4 results

check_couchdb.md

Blame
  • check_cronstatus 5.62 KiB
    #!/bin/bash
    # ======================================================================
    #
    # NAGIOS CLIENT CHECK :: check status of cronjobs executed with cronwrapper
    #
    # REQUIREMENTS
    # - client uses a cronjob with cronwrapper (it creates parsable logs)
    #
    # It returns 
    # - UNKNOWN if no job was found
    # - OK if all jobs are ok
    # - ERROR if minimum one job failes (wrong exitcode or is expired) 
    #
    # ----------------------------------------------------------------------
    #
    # ah=axel.hahn@iml.unibe.ch
    # ds=daniel.schueler@iml.unibe.ch
    #
    # 2017-10-13  v1.0  ah,ds
    # 2017-10-17  v1.1  ah,ds  remove PIPESTATUS for Debian8 compatibility
    # 2019-04-30  v1.2  ah,ds  show scriptlabel of failed jobs in 1st line
    # 2020-02-28  v1.3  ah,ds  ouput with separated error jobs and OK jobs
    # 2020-03-05  v1.4  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
    # ======================================================================
    
    
    . `dirname $0`/inc_pluginfunctions
    
    LOGDIR=/var/tmp/cronlogs
    errfile=/tmp/cronjob_status.$$.err
    failfile=/tmp/cronjob_status.$$.fails
    
    outputOk=/tmp/cronjob_status_out_ok.$$.txt
    outputError=/tmp/cronjob_status_out_error.$$.txt
    
    typeset -i iMaxAge=`date +%s`
    typeset -i iJobs=0
    typeset -i iErrJobs=0
    
    
    # ----------------------------------------------------------------------
    # FUNCTIONS
    # ----------------------------------------------------------------------
    
    # get a value from logfile (everything behind "="
    # param: label
    # global: $logfile
    function getLogValue(){
            grep "^$1=" $logfile | cut -f 2- -d "="
    }
    
    
    
    # ----------------------------------------------------------------------
    # MAIN
    # ----------------------------------------------------------------------
    
    iJobs=`ls -1 $LOGDIR/*log 2>/dev/null | fgrep -v "/__" | fgrep ".log" | wc -l`
    
    if [ $iJobs -eq 0 ]; then
      ph.abort "SKIP: no cronjobs with cronwrapper were found"
    fi
    
    # ----------------------------------------------------------------------
    # check all logs
    # ----------------------------------------------------------------------
    ls -1t $LOGDIR/*log | fgrep -v "/__" | while read logfile
    do
            iJobs=$iJobs+1
            typeset -i iErr=0
            sTmpOutfile=$outputOk
    
            server=`basename $logfile | cut -f 1 -d "_"`
            jobname=`basename $logfile | cut -f 2 -d "_" | sed "s#\.log##"`
    
    
            sPre="    "
            sCmd=`getLogValue SCRIPTNAME`
            sLastStart=`getLogValue SCRIPTSTARTTIME`
            typeset -i iJobExpire=`getLogValue JOBEXPIRE`
            typeset -i rc=`getLogValue 'SCRIPTRC' | head -1`
            typeset -i iEcectime=`getLogValue 'SCRIPTEXECTIME' | head -1 | cut -f 1 -d " "`
            sTTL=`getLogValue 'SCRIPTTTL'`
    
            # ----- check return code
            statusRc='OK'
            if [ $rc -ne 0 ]; then
                    iErr=$iErr+1
                    statusRc='ERROR'
            fi
    
            # ----- check ttl value
            typeset -i iTTL=$sTTL
            typeset -i iTTLsec=0
            iTTL=$iTTL
            iTTLsec=$iTTL*60
            ttlstatus="OK"
            if [ -z $sTTL ]; then
                    iErr=$iErr+1
                    statusTtl="ERROR: ttl value is empty"
            else
                    # human readable ttl in min/ hours/ days
                    statusTtl="$iTTL min"
                    if [ $iTTL -gt 60 ]; then
                            iTTL=$iTTL/60;
                            statusTtl="$sTTL - $iTTL h"
                            if [ $iTTL -gt 24 ]; then
                                    iTTL=$iTTL/24;
                                    statusTtl="$sTTL - $iTTL d"
                            fi
                    fi
                    if [ $iTTLsec -lt $iEcectime ]; then
                            iErr=$iErr+1
                            statusTtl="ERROR: $iTTL min = $iTTLsec s - is too low; exec time is $iEcectime s - set a higher TTL for this cronjob"
                            iErr=$iErr+1
                    else
                            statusTtl="$statusTtl OK"
                    fi
            fi
            # ----- check expire
            statusExpire="`date -d @$iJobExpire '+%Y-%m-%d %H:%M:%S'`"
            if [ $iJobExpire -lt $iMaxAge ]; then
                    statusExpire="${statusExpire} ERROR"
                    iErr=$iErr+1
            else
                    statusExpire="${statusExpire} OK"
            fi
    
            # ----- show jobdetail and put to OK file or error file
            sTmpOutfile=$outputOk
            test $iErr -gt 0 && sTmpOutfile=$outputError
            (
                    echo
                    echo --- $logfile
    
                    echo "${sPre}${sCmd}"
                    echo "${sPre}last start: ${sLastStart}"
                    echo "${sPre}returncode: ${rc} ${statusRc}"
                    echo "${sPre}duration: ${iEcectime} s"
                    echo "${sPre}ttl: ${statusTtl}"
                    echo "${sPre}expires: ${iJobExpire} ${statusExpire}"
    
                    if [ $iErr -gt 0 ]; then
                            echo "${sPre}CHECK FAILED"
                            iErrJobs=$iErrJobs+1
                            echo $iErrJobs > $errfile
                            getLogValue SCRIPTLABEL >> $failfile
                    fi
            )>>$sTmpOutfile
    
    done
    
    
    # ----------------------------------------------------------------------
    # output
    # ----------------------------------------------------------------------
    
    if [ -f $errfile ]; then
      iErrJobs=`cat $errfile`
      echo "ERROR: $iErrJobs of $iJobs jobs [`cat $failfile | sed ':a;N;$!ba;s/\n/, /g'`] have a problem"
      echo
      echo "********** Jobs with problems:"
      cat $outputError
      echo
      echo
      echo "********** OK:"
      cat $outputOk
      rm -f $errfile $failfile $outputError
    else
      echo "OK: $iJobs cronjob(s) run fine"
      cat $outputOk
    fi
    
    
    # --- exit
    if [ $iErrJobs -ne 0 ]; then
      ph.setStatus "critical"
    else
      ph.setStatus "ok"
    fi
    
    rm -f $outputOk
    
    ph.exit
    
    # ----------------------------------------------------------------------