Select Git revision
check_proc_ressources
Hahn Axel (hahn) authored
check_proc_ressources 5.44 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: SHOW PROCESSES USING A LOT OF CPU OR MEMORY
#
# ----------------------------------------------------------------------
#
# SYNTAX:
# ./check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
# METHOD string identify what to check; one of cpu|mem
# WARNLIMIT integer value in percent; default: 70
# CRITICALLIMIT integer value in percent; default: 90
#
# ----------------------------------------------------------------------
# 2020-03-03 v1.0 inititial version
# 2020-03-05 v1.1 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2020-06-06 v1.2 <axel.hahn@iml.unibe.ch> for cpu: limits are multiplicated with count of cpu
# 2023-12-22 v1.3 <axel.hahn@unibe.ch> update help; no temp files
# 2025-02-10 v1.4 <axel.hahn@unibe.ch> harden sourcing files
# ======================================================================
# shellcheck source=/dev/null
. "$( dirname "$0" )/inc_pluginfunctions" || exit 1
export self_APPVERSION=1.4
# --- limits
iWarnLimitCpu=70
iCriticalLimitCpu=90
iWarnLimitMem=70
iCriticalLimitMem=90
typeset -i iCountWarning=0
typeset -i iCountCritical=0
typeset -i iCriticalLimit=0
typeset -i iWarnLimit=0
typeset -i iMulti=1
# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
# show help
function showHelp(){
cat <<EOH
$( ph.showImlHelpHeader )
SYNTAX:
check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
OPTIONS:
-h, --help
show help
-c, --critical VALUE
critical value; defaults:
* cpu $iCriticalLimitCpu
* mem $iCriticalLimitMem
-m, --mode VALUE
identify what to check; one of cpu|mem
Remark: for cpu the given limits are multiplicated with count of cpu.
-w. --warning VALUE
warning limit value; defaults:
* cpu $iWarnLimitCpu
* mem $iWarnLimitMem
EXAMPLE:
check_proc_ressources -m mem -w 50 -c 70
Show processes consuming more than 50% of memory; mark as critical
when using 70% and more.
EOH
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
# ----- check limits
while [[ "$#" -gt 0 ]]; do case $1 in
-h|--help) showHelp; exit 0;;
-m|--mode) sMode=$2; shift ;shift;;
-c|--critcal) iCriticalLimit=$2; shift ;shift;;
-w|--warning) iWarnLimit=$2; shift ;shift;;
*) echo "ERROR: Unknown parameter: $1"; showHelp; exit 1;
esac; done
case "${sMode}" in
"cpu")
scanfield="pcpu"
unit="Cpu"
test $iWarnLimit -eq 0 && iWarnLimit=$iWarnLimitCpu
test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitCpu
;;
"mem")
scanfield="pmem"
unit="Memory"
test $iWarnLimit -eq 0 && iWarnLimit=$iWarnLimitMem
test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitMem
;;
*)
echo ERRROR: [${sMode}] is an INVALID mode
showHelp
ph.abort
esac
if [ "$sMode" = "cpu" ]; then
typeset -i iCpucount=$(cat /proc/cpuinfo | grep vendor_id | wc -l)
if [ $iMulti -eq 0 ]; then
ph.abort "ABORT: count of CPUs was not detected."
fi
iWarnLimit=$iWarnLimit*$iCpucount
iCriticalLimit=$iCriticalLimit*$iCpucount
fi
# ----- read processlist and create helper table
out+=$(
for line in $(ps -eo ${scanfield},pid,comm | awk -v limit=$iWarnLimit '$1 > limit {print $1":"$2":"$3 }' | sort -k 1 -t ":" -n -r )
do
Value=$(echo $line | cut -f 1 -d ':')
iValue=$(echo $line | cut -f 1 -d ':' | sed "s#\..*##")
iProcessId=$(echo $line | cut -f 2 -d ':')
processname=$(echo $line | cut -f 3- -d ':')
test $iValue -lt $iCountWarning && break
test $iValue -ge $iCountWarning && status="Warning"
test $iValue -ge $iCriticalLimit && status="Critical"
# echo "${status}: $Value % $unit by process id $iProcessId $processname"
printf "%-9s %5s %s %-10s %6s %s\n" ${status} $Value % $unit $iProcessId $processname
done
)
# ----- Status output
iCountCritical=$( grep -c "^Critical" <<< "$out" )
iCountWarning=$( grep -c "^Warning" <<< "$out" )
if [ $iCountCritical -gt 0 ]; then
ph.setStatus "critical"
ph.status "$iCountCritical processes use $iCriticalLimit % (critical) or more"
echo "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
echo
echo "Level Usage process id process"
echo "-----------------------------------------------------"
echo "$out"
elif [ $iCountWarning -gt 0 ]; then
ph.setStatus "warning"
ph.status "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
echo
echo "Level Usage process id process"
echo "-----------------------------------------------------"
echo "$out"
else
ph.setStatus "ok"
ph.status "all processes below warning limit $iWarnLimit % $unit .. (and critical limit $iCriticalLimit %)"
fi
# ----- additional infos
case "${sMode}" in
"cpu")
echo
echo Cpus detected: $iCpucount
;;
"mem")
echo
grep -E '^(MemTotal|MemFree|MemAvailable|Boffers|Cached):' /proc/meminfo
;;
esac
ph.exit
# ----------------------------------------------------------------------