-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_cpu 3.88 KiB
#!/bin/bash
# ======================================================================
#
# Check CPU usage
#
# requirements:
# - top
# - bc
#
# ----------------------------------------------------------------------
# 2020-03-10 v1.0 <axel.hahn@iml.unibe.ch>
# 2020-03-23 v1.1 <axel.hahn@iml.unibe.ch> added more data
# 2020-07-08 v1.2 <axel.hahn@iml.unibe.ch> FIX: set "ph." instead "ps."
# 2020-07-17 v1.3 <axel.hahn@iml.unibe.ch> use ph.require to check binaries
# ======================================================================
. `dirname $0`/inc_pluginfunctions
tmpfile=/tmp/check_cpu_$$
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
# --- check required tools
ph.require bc top
# --- check param -h
if [ "$1" = "-h" ]; then
echo "
usage: $0 [ -w value -c value -h ]
-w Warning level
-c Critical level
-h this help
"
exit 0
fi
# set default / override from command line params
typeset -i iWarnLimit=` ph.getValueWithParam 75 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 90 c "$@"`
# get cpu status i.e.
# %Cpu(s): 33.3 us, 9.5 sy, 0.0 ni, 57.1 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
# us, user : time running un-niced user processes
# sy, system : time running kernel processes
# ni, nice : time running niced user processes
# id, idle : time spent in the kernel idle handler
# wa, IO-wait : time waiting for I/O completion
# hi : time spent servicing hardware interrupts
# si : time spent servicing software interrupts
# st : time stolen from this vm by the hypervisor
top -b -n 1 | head -5 | grep "^\%Cpu" >$tmpfile
cpuUser=` awk '{ print $2 }' $tmpfile`
cpuSystem=` awk '{ print $4 }' $tmpfile`
cpuNice=` awk '{ print $6 }' $tmpfile`
cpuIdle=` awk '{ print $8 }' $tmpfile`
cpuWait=` awk '{ print $10 }' $tmpfile`
cpuHi=` awk '{ print $12 }' $tmpfile`
cpuSi=` awk '{ print $14 }' $tmpfile`
cpuSt=` awk '{ print $16 }' $tmpfile`
cpuNonIdle=`echo 100-$cpuIdle | bc`
rm -f $tmpfile
if [ "$(echo "${cpuNonIdle} > ${iWarnLimit}" | bc)" -eq 1 ]; then
if [ "$(echo "${cpuNonIdle} > ${iCriticalLimit}" | bc)" -eq 1 ]; then
ph.setStatus "critical"
else
ph.setStatus "warning"
fi
fi
# --- status output
ph.status "CPU-USAGE [%] ${cpuNonIdle} ... user: ${cpuUser} - system: ${cpuSystem} - idle: ${cpuIdle}"
echo "
Legend:
hwi - Time spent handling hardware interrupt routines. (Whenever a peripheral unit want attention form the CPU, it literally pulls a line, to signal the CPU to service it)
swi - Time spent handling software interrupt routines. (a piece of code, calls an interrupt routine...)
st - Time spent on involuntary waits by virtual cpu while hypervisor is servicing another processor (stolen from a virtual machine)
nice - Time spent running niced user processes (User defined priority)
wait - Time spent on waiting on IO peripherals (eg. disk)
system - Time spent in kernel space
user - Time spent in user space
idle - Time spent in idle operations
"
# --- performance data usage
ph.perfadd "cpu-usage" "${cpuNonIdle}" $iWarnLimit $iCriticalLimit 0 100
# for graphite module: send limits
# ph.perfadd "cpu-warn" $iWarnLimit "" "" 0 100
# ph.perfadd "cpu-crit" $iCriticalLimit "" "" 0 100
# --- performance data single values
ph.perfadd "cpu-system" "${cpuSystem}" "" "" 0 100
ph.perfadd "cpu-user" "${cpuUser}" "" "" 0 100
ph.perfadd "cpu-idle" "${cpuIdle}" "" "" 0 100
ph.perfadd "cpu-nice" "${cpuNice}" "" "" 0 100
ph.perfadd "cpu-wait" "${cpuWait}" "" "" 0 100
ph.perfadd "cpu-hwi" "${cpuHi}" "" "" 0 100
ph.perfadd "cpu-swi" "${cpuSi}" "" "" 0 100
ph.perfadd "cpu-st" "${cpuSt}" "" "" 0 100
ph.exit
# ----------------------------------------------------------------------