Skip to content
Snippets Groups Projects
Select Git revision
  • ba700576d347369ee20d7711425044db33c20aaa
  • master default protected
  • 7771-harden-postgres-backup
  • pgsql-dump-with-snapshots
  • update-colors
  • update-docs-css
  • usb-repair-stick
  • desktop-notification
  • 7000-corrections
  • db-detector
10 results

detector.sh

Blame
  • check_proc_ressources 5.28 KiB
    #!/bin/bash
    # ======================================================================
    #
    # NAGIOS CLIENT CHECK :: SHOW PROCESSES USING A LOT OF CPU OR MEMORY
    #
    # ----------------------------------------------------------------------
    #
    # SYNTAX:
    #  ./check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
    #    METHOD         string   identify what to check; one of cpu|mem
    #    WARNLIMIT      integer  value in percent; default: 70
    #    CRITICALLIMIT  integer  value in percent; default: 90
    #
    # ----------------------------------------------------------------------
    # 2020-03-03  v1.0  inititial version
    # 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
    # 2020-06-06  v1.2  <axel.hahn@iml.unibe.ch> for cpu: limits are multiplicated with count of cpu 
    # ======================================================================
    
    _version="1.0"
    
    # --- tmp files for internal usage
    tmpfile=/tmp/processlist1_$$
    tmpfile2=/tmp/processlist2_$$
    
    outCritical=/tmp/processlist_critical_$$
    outWarning=/tmp/processlist_warning_$$
    
    # --- limits
    iWarnLimitCpu=70
    iCriticalLimitCpu=90
    
    iWarnLimitMem=70
    iCriticalLimitMem=90
    
    typeset -i iCountWarning=0
    typeset -i iCountCritical=0
    
    rm -f $tmpfile $tmpfile2 $outCritical $outWarning 2>/dev/null
    
    . `dirname $0`/inc_pluginfunctions
    
    # ----------------------------------------------------------------------
    # FUNCTIONS
    # ----------------------------------------------------------------------
    function showHelp(){
        cat <<EOH
    ______________________________________________________________________
    
    SHOW PROCESSES USING A LOT OF CPU OR MEMORY :: v${_version}
    
    (c) Institute for Medical Education - University of Bern
    Licence: GNU GPL 3
    ______________________________________________________________________
    
    SYNTAX:
    
      check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
        METHOD         string   identify what to check; one of cpu|mem
        WARNLIMIT      integer  optional: value in percent; 
                                default: $iWarnLimitCpu (cpu) | $iWarnLimitMem (mem)
        CRITICALLIMIT  integer  optional: value in percent
                                default: $iCriticalLimitCpu (cpu) | $iCriticalLimitMem (mem)
      
      Remark: for cpu the given value is multiplicated with count of cpu
    
    EXAMPLE:
    
      check_proc_ressources -m mem -w 50 -c 70
    
    EOH
    }
    
    # ----------------------------------------------------------------------
    # MAIN
    # ----------------------------------------------------------------------
    
    
    # ----- check limits
    
    typeset -i iCriticalLimit=0
    typeset -i iWarnLimit=0
    typeset -i iMulti=1
    
    # set default / override from command line params
    typeset -i iWarnLimit=`     ph.getValueWithParam 70 w "$@"`
    typeset -i iCriticalLimit=` ph.getValueWithParam 90 c "$@"`
    sMode=`                     ph.getValueWithParam '' m "$@"`
    
    case "${sMode}" in
    
        "cpu")
            scanfield="pcpu"
            unit="CPU"
    
            test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitCpu
            test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitCpu
            ;;
        "mem")
            scanfield="pmem"
            unit="Memory"
            test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitMem
            test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitMem
            ;;
        *)
            echo ERRROR: [${sMode}] is an INVALID mode
            showHelp
            ph.abort
    
    esac
    
    
    if [ "$sMode" = "cpu" ]; then
        typeset -i iCpucount=`cat /proc/cpuinfo | grep vendor_id | wc -l`
        if [ $iMulti -eq 0 ]; then
            ph.abort "ABORT: count of CPUs was not detected."
        fi
        iWarnLimit=$iWarnLimit*$iCpucount
        iCriticalLimit=$iCriticalLimit*$iCpucount
    fi
    
    # ----- read processlist and create helper table
    
    for line in `ps -eo $scanfield,pid,comm | awk -v limit=$iWarnLimit '$1 > limit {print $1":"$2":"$3 }'`
    do
        Value=`echo $line | cut -f 1 -d ':'`
        iValue=`echo $line | cut -f 1 -d ':' | sed "s#\..*##"`
        iProcessId=`echo $line | cut -f 2 -d ':'`
        processname=`echo $line | cut -f 3- -d ':'`
        if [ $iValue -ge $iCriticalLimit ]; then
            iCountCritical=$iCountCritical+1
            echo "Critical: $Value % $unit by process id $iProcessId $processname" >> $outCritical
        else
            iCountWarning=$iCountWarning+1
            echo "Warning : $Value % $unit by process id $iProcessId $processname" >> $outWarning
        fi
    done
    
    
    # ----- Status output
    
    if [ $iCountCritical -gt 0 ]; then
        ph.setStatus "critical"
        ph.status "$iCountCritical processes use $iCriticalLimit % (critical) or more"
        echo "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
        echo
        cat $outCritical | sort -n -k 2 -r
        cat $outWarning 2>/dev/null| sort -n -k 2 -r
    elif [ $iCountWarning -gt 0 ]; then
        ph.setStatus "warning"
        ph.status "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
        echo
        cat $outWarning | sort -n -k 2 -r
    else
        ph.setStatus "ok"
        ph.status "all processes below warning limit $iWarnLimit % $unit .. (and critical limit $iCriticalLimit %)"
    fi
    
    # ----- additional infos
    
    case "${sMode}" in
        "cpu")
            echo
            echo Remark: $iCpucount CPUs detected
            ;;
        "mem")
            echo
            egrep '^(MemTotal|MemFree|MemAvailable|Boffers|Cached):' /proc/meminfo
            ;;
    esac
    
    
    # ----- cleanup temp stuff
    
    rm -f $tmpfile $outCritical $outWarning 2>/dev/null
    
    ph.exit
    
    # ----------------------------------------------------------------------