-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_proc_ressources 5.28 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: SHOW PROCESSES USING A LOT OF CPU OR MEMORY
#
# ----------------------------------------------------------------------
#
# SYNTAX:
# ./check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
# METHOD string identify what to check; one of cpu|mem
# WARNLIMIT integer value in percent; default: 70
# CRITICALLIMIT integer value in percent; default: 90
#
# ----------------------------------------------------------------------
# 2020-03-03 v1.0 inititial version
# 2020-03-05 v1.1 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2020-06-06 v1.2 <axel.hahn@iml.unibe.ch> for cpu: limits are multiplicated with count of cpu
# ======================================================================
_version="1.0"
# --- tmp files for internal usage
tmpfile=/tmp/processlist1_$$
tmpfile2=/tmp/processlist2_$$
outCritical=/tmp/processlist_critical_$$
outWarning=/tmp/processlist_warning_$$
# --- limits
iWarnLimitCpu=70
iCriticalLimitCpu=90
iWarnLimitMem=70
iCriticalLimitMem=90
typeset -i iCountWarning=0
typeset -i iCountCritical=0
rm -f $tmpfile $tmpfile2 $outCritical $outWarning 2>/dev/null
. `dirname $0`/inc_pluginfunctions
# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
function showHelp(){
cat <<EOH
______________________________________________________________________
SHOW PROCESSES USING A LOT OF CPU OR MEMORY :: v${_version}
(c) Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________
SYNTAX:
check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
METHOD string identify what to check; one of cpu|mem
WARNLIMIT integer optional: value in percent;
default: $iWarnLimitCpu (cpu) | $iWarnLimitMem (mem)
CRITICALLIMIT integer optional: value in percent
default: $iCriticalLimitCpu (cpu) | $iCriticalLimitMem (mem)
Remark: for cpu the given value is multiplicated with count of cpu
EXAMPLE:
check_proc_ressources -m mem -w 50 -c 70
EOH
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
# ----- check limits
typeset -i iCriticalLimit=0
typeset -i iWarnLimit=0
typeset -i iMulti=1
# set default / override from command line params
typeset -i iWarnLimit=` ph.getValueWithParam 70 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 90 c "$@"`
sMode=` ph.getValueWithParam '' m "$@"`
case "${sMode}" in
"cpu")
scanfield="pcpu"
unit="CPU"
test $iWarnLimit -eq 0 && iWarnLimit=$iWarnLimitCpu
test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitCpu
;;
"mem")
scanfield="pmem"
unit="Memory"
test $iWarnLimit -eq 0 && iWarnLimit=$iWarnLimitMem
test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitMem
;;
*)
echo ERRROR: [${sMode}] is an INVALID mode
showHelp
ph.abort
esac
if [ "$sMode" = "cpu" ]; then
typeset -i iCpucount=`cat /proc/cpuinfo | grep vendor_id | wc -l`
if [ $iMulti -eq 0 ]; then
ph.abort "ABORT: count of CPUs was not detected."
fi
iWarnLimit=$iWarnLimit*$iCpucount
iCriticalLimit=$iCriticalLimit*$iCpucount
fi
# ----- read processlist and create helper table
for line in `ps -eo $scanfield,pid,comm | awk -v limit=$iWarnLimit '$1 > limit {print $1":"$2":"$3 }'`
do
Value=`echo $line | cut -f 1 -d ':'`
iValue=`echo $line | cut -f 1 -d ':' | sed "s#\..*##"`
iProcessId=`echo $line | cut -f 2 -d ':'`
processname=`echo $line | cut -f 3- -d ':'`
if [ $iValue -ge $iCriticalLimit ]; then
iCountCritical=$iCountCritical+1
echo "Critical: $Value % $unit by process id $iProcessId $processname" >> $outCritical
else
iCountWarning=$iCountWarning+1
echo "Warning : $Value % $unit by process id $iProcessId $processname" >> $outWarning
fi
done
# ----- Status output
if [ $iCountCritical -gt 0 ]; then
ph.setStatus "critical"
ph.status "$iCountCritical processes use $iCriticalLimit % (critical) or more"
echo "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
echo
cat $outCritical | sort -n -k 2 -r
cat $outWarning 2>/dev/null| sort -n -k 2 -r
elif [ $iCountWarning -gt 0 ]; then
ph.setStatus "warning"
ph.status "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
echo
cat $outWarning | sort -n -k 2 -r
else
ph.setStatus "ok"
ph.status "all processes below warning limit $iWarnLimit % $unit .. (and critical limit $iCriticalLimit %)"
fi
# ----- additional infos
case "${sMode}" in
"cpu")
echo
echo Remark: $iCpucount CPUs detected
;;
"mem")
echo
egrep '^(MemTotal|MemFree|MemAvailable|Boffers|Cached):' /proc/meminfo
;;
esac
# ----- cleanup temp stuff
rm -f $tmpfile $outCritical $outWarning 2>/dev/null
ph.exit
# ----------------------------------------------------------------------