#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: SHOW PROCESSES USING A LOT OF CPU OR MEMORY
#
# ----------------------------------------------------------------------
#
# SYNTAX:
#  ./check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
#    METHOD         string   identify what to check; one of cpu|mem
#    WARNLIMIT      integer  value in percent; default: 70
#    CRITICALLIMIT  integer  value in percent; default: 90
#
# ----------------------------------------------------------------------
# 2020-03-03  v1.0  inititial version
# 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2020-06-06  v1.2  <axel.hahn@iml.unibe.ch> for cpu: limits are multiplicated with count of cpu 
# ======================================================================

_version="1.0"

# --- tmp files for internal usage
tmpfile=/tmp/processlist1_$$
tmpfile2=/tmp/processlist2_$$

outCritical=/tmp/processlist_critical_$$
outWarning=/tmp/processlist_warning_$$

# --- limits
iWarnLimitCpu=70
iCriticalLimitCpu=90

iWarnLimitMem=70
iCriticalLimitMem=90

typeset -i iCountWarning=0
typeset -i iCountCritical=0

rm -f $tmpfile $tmpfile2 $outCritical $outWarning 2>/dev/null

. `dirname $0`/inc_pluginfunctions

# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
function showHelp(){
    cat <<EOH
______________________________________________________________________

SHOW PROCESSES USING A LOT OF CPU OR MEMORY :: v${_version}

(c) Institute for Medical Education - Univerity of Bern
Licence: GNU GPL 3
______________________________________________________________________

SYNTAX:

  check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
    METHOD         string   identify what to check; one of cpu|mem
    WARNLIMIT      integer  optional: value in percent; 
                            default: $iWarnLimitCpu (cpu) | $iWarnLimitMem (mem)
    CRITICALLIMIT  integer  optional: value in percent
                            default: $iCriticalLimitCpu (cpu) | $iCriticalLimitMem (mem)
  
  Remark: for cpu the given value is multiplicated with count of cpu

EXAMPLE:

  check_proc_ressources -m mem -w 50 -c 70

EOH
}

# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------


# ----- check limits

typeset -i iCriticalLimit=0
typeset -i iWarnLimit=0
typeset -i iMulti=1

# set default / override from command line params
typeset -i iWarnLimit=`     ph.getValueWithParam 70 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 90 c "$@"`
sMode=`                     ph.getValueWithParam '' m "$@"`

case "${sMode}" in

    "cpu")
        scanfield="pcpu"
        unit="CPU"

        test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitCpu
        test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitCpu
        ;;
    "mem")
        scanfield="pmem"
        unit="Memory"
        test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitMem
        test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitMem
        ;;
    *)
        echo ERRROR: [${sMode}] is an INVALID mode
        showHelp
        ph.abort

esac


if [ "$sMode" = "cpu" ]; then
    typeset -i iCpucount=`cat /proc/cpuinfo | grep vendor_id | wc -l`
    if [ $iMulti -eq 0 ]; then
        ph.abort "ABORT: count of CPUs was not detected."
    fi
    iWarnLimit=$iWarnLimit*$iCpucount
    iCriticalLimit=$iCriticalLimit*$iCpucount
fi

# ----- read processlist and create helper table

for line in `ps -eo $scanfield,pid,comm | awk -v limit=$iWarnLimit '$1 > limit {print $1":"$2":"$3 }'`
do
    Value=`echo $line | cut -f 1 -d ':'`
    iValue=`echo $line | cut -f 1 -d ':' | sed "s#\..*##"`
    iProcessId=`echo $line | cut -f 2 -d ':'`
    processname=`echo $line | cut -f 3- -d ':'`
    if [ $iValue -ge $iCriticalLimit ]; then
        iCountCritical=$iCountCritical+1
        echo "Critical: $Value % $unit by process id $iProcessId $processname" >> $outCritical
    else
        iCountWarning=$iCountWarning+1
        echo "Warning : $Value % $unit by process id $iProcessId $processname" >> $outWarning
    fi
done


# ----- Status output

if [ $iCountCritical -gt 0 ]; then
    ph.setStatus "critical"
    ph.status "$iCountCritical processes use $iCriticalLimit % (critical) or more"
    echo "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
    echo
    cat $outCritical | sort -n -k 2 -r
    cat $outWarning 2>/dev/null| sort -n -k 2 -r
elif [ $iCountWarning -gt 0 ]; then
    ph.setStatus "warning"
    ph.status "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
    echo
    cat $outWarning | sort -n -k 2 -r
else
    ph.setStatus "ok"
    ph.status "all processes below warning limit $iWarnLimit % $unit .. (and critical limit $iCriticalLimit %)"
fi

# ----- additional infos

case "${sMode}" in
    "cpu")
        echo
        echo Remark: $iCpucount CPUs detected
        ;;
    "mem")
        echo
        egrep '^(MemTotal|MemFree|MemAvailable|Boffers|Cached):' /proc/meminfo
        ;;
esac


# ----- cleanup temp stuff

rm -f $tmpfile $outCritical $outWarning 2>/dev/null

ph.exit

# ----------------------------------------------------------------------