Skip to content
Snippets Groups Projects
Commit 7591e745 authored by Hahn Axel (hahn)'s avatar Hahn Axel (hahn)
Browse files

update check_cpu; fix typos

parent 82bbb4a2
No related branches found
No related tags found
No related merge requests found
......@@ -29,7 +29,7 @@ typeset -i iUsage=0
# ----------------------------------------------------------------------
# pre checks
# ----------------------------------------------------------------------
ph.reqire wget
ph.require wget
# ----------------------------------------------------------------------
# check output
......
......@@ -12,12 +12,51 @@
# 2020-03-23 v1.1 <axel.hahn@iml.unibe.ch> added more data
# 2020-07-08 v1.2 <axel.hahn@iml.unibe.ch> FIX: set "ph." instead "ps."
# 2020-07-17 v1.3 <axel.hahn@iml.unibe.ch> use ph.require to check binaries
# 2021-02-10 v1.4 <axel.hahn@iml.unibe.ch> added critical io wait
# ======================================================================
. `dirname $0`/inc_pluginfunctions
tmpfile=/tmp/check_cpu_$$
# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------
function showHelp(){
cat <<EOF
______________________________________________________________________
CHECK_CPU check cpu usage and cpu wait v1.4
(c) Institute for Medical Education - Univerity of Bern
Licence: GNU GPL 3
______________________________________________________________________
Cpu infos are taken from output of top command.
SYNTAX:
`basename $0` [-w WARN_LIMIT] [-c CRITICAL_LIMIT] [-i CRITICAL_IO_WAIT]
OPTIONS:
-w VALUE cpu usage warning level (default: 75)
-c VALUE cpu usage critical level (default: 90)
-i VALUE io wait critical level (default: 50)
-h or --help show this help.
PARAMETERS:
None.
EXAMPLE:
`basename $0` -w 60 -c 80 -i 40
EOF
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
......@@ -27,20 +66,19 @@ ph.require bc top
# --- check param -h
if [ "$1" = "-h" ]; then
echo "
usage: $0 [ -w value -c value -h ]
-w Warning level
-c Critical level
-h this help
"
exit 0
fi
case "$1" in
"--help"|"-h")
showHelp
exit 0
;;
*)
esac
# set default / override from command line params
typeset -i iWarnLimit=` ph.getValueWithParam 75 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 90 c "$@"`
typeset -i iCriticalWait=` ph.getValueWithParam 50 i "$@"`
# get cpu status i.e.
# %Cpu(s): 33.3 us, 9.5 sy, 0.0 ni, 57.1 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
......@@ -66,20 +104,29 @@ cpuNonIdle=`echo 100-$cpuIdle | bc`
rm -f $tmpfile
if [ "$(echo "${cpuNonIdle} > ${iWarnLimit}" | bc)" -eq 1 ]; then
if [ "$(echo "${cpuNonIdle} > ${iCriticalLimit}" | bc)" -eq 1 ]; then
ph.setStatus "critical"
else
ph.setStatus "warning"
fi
sInfo="INFO : cpu is in normal ranges."
if [ "$(echo "${cpuWait} > ${iCriticalWait}" | bc)" -eq 1 ]; then
ph.setStatus "critical"
sInfo="HINT : cpu WAIT is high - check hardware issues"
else
if [ "$(echo "${cpuNonIdle} > ${iWarnLimit}" | bc)" -eq 1 ]; then
if [ "$(echo "${cpuNonIdle} > ${iCriticalLimit}" | bc)" -eq 1 ]; then
ph.setStatus "critical"
sInfo="HINT : cpu usage is high - check preocesses"
else
ph.setStatus "warning"
sInfo="HINT : cpu usage is high - check preocesses"
fi
fi
fi
# --- status output
ph.status "CPU-USAGE [%] ${cpuNonIdle} ... user: ${cpuUser} - system: ${cpuSystem} - idle: ${cpuIdle}"
ph.status "CPU-USAGE [%] ${cpuNonIdle} ... user: ${cpuUser} - system: ${cpuSystem} - idle: ${cpuIdle} - wait: ${cpuWait}"
echo "
Limits: usage warn at ${iWarnLimit} .. critical at ${iCriticalLimit} .. io wait limit ${iCriticalWait}
$sInfo
Legend:
......@@ -103,12 +150,13 @@ ph.perfadd "cpu-usage" "${cpuNonIdle}" $iWarnLimit $iCriticalLimit 0 100
# ph.perfadd "cpu-warn" $iWarnLimit "" "" 0 100
# ph.perfadd "cpu-crit" $iCriticalLimit "" "" 0 100
ph.perfadd "cpu-wait" "${cpuWait}" "" "$iCriticalWait" 0 100
# --- performance data single values
ph.perfadd "cpu-system" "${cpuSystem}" "" "" 0 100
ph.perfadd "cpu-user" "${cpuUser}" "" "" 0 100
ph.perfadd "cpu-idle" "${cpuIdle}" "" "" 0 100
ph.perfadd "cpu-nice" "${cpuNice}" "" "" 0 100
ph.perfadd "cpu-wait" "${cpuWait}" "" "" 0 100
ph.perfadd "cpu-hwi" "${cpuHi}" "" "" 0 100
ph.perfadd "cpu-swi" "${cpuSi}" "" "" 0 100
ph.perfadd "cpu-st" "${cpuSt}" "" "" 0 100
......
......@@ -51,10 +51,17 @@ See https://www.kernel.org/doc/Documentation/block/stat.txt
SYNTAX:
`basename $0` -m MODE [-w WARN_LIMIT] [-c CRITICAL_LIMIT]
OPTIONS:
-m MODE set mode for type of output (required)
-w VALUE warning level (default: 0 for none)
-c VALUE critical level (default: 0 for none)
-h or --help show this help.
PARAMETERS:
-m MODE set mode for type of output
OPTIONS:
MODE
io read I/Os, write I/Os, discard I/0s
ticks read ticks, write ticks, discard ticks
......@@ -77,26 +84,25 @@ EOF
# echo "--- partitions: "
# getPartitions
ph.reqire bc lsblk
ph.require bc lsblk
typeset -i iDelta=0
# set default / override from command line params
typeset -i iWarnLimit=` ph.getValueWithParam 0 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 0 c "$@"`
sMode=` ph.getValueWithParam '' m "$@"`
# --- set mode
case "$1" in
"--help")
"--help"|"-h")
showHelp
exit 0
;;
*)
esac
# set default / override from command line params
typeset -i iWarnLimit=` ph.getValueWithParam 0 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 0 c "$@"`
# --- set mode
sMode=` ph.getValueWithParam '' m "$@"`
# --- labels and its columns in /sys/block/$myDisk/stat
......
......@@ -21,7 +21,7 @@ tmpfile=/tmp/check_haproxy_healthcheck_$$
# ----------------------------------------------------------------------
# pre checks
# ----------------------------------------------------------------------
ph.reqire wget
ph.require wget
cat $cfgfile >/dev/null
if [ $? -ne 0 ]; then
......
......@@ -30,7 +30,7 @@ tmpfileping=/tmp/check_haproxy_status3_$$
# ----------------------------------------------------------------------
# pre checks
# ----------------------------------------------------------------------
ph.reqire wget
ph.require wget
if [ ! -f $cfgfile ]; then
ph.abort "UNKNOWN: config file does not exist: $cfgfile"
......
......@@ -43,7 +43,7 @@ function getMemvalue(){
# ----------------------------------------------------------------------
# --- check required tools
ph.reqire bc
ph.require bc
# --- check param -h
if [ "$1" = "-h" ]; then
......
......@@ -190,7 +190,7 @@ if [ $bOptHelp -eq 1 -o $# -lt 1 ]; then
fi
# --- check required tools
ph.reqire mysql
ph.require mysql
# --- install
if [ $bOptInstall -eq 1 -a "$( whoami )" = "root" ]; then
......
......@@ -19,7 +19,7 @@
# ----------------------------------------------------------------------
# --- check required tools
ph.reqire netstat
ph.require netstat
# --- check param -h
if [ "$1" = "-h" ]; then
......
......@@ -20,7 +20,7 @@ tmpfile=/tmp/check_proc_zombie_$$
# ----------------------------------------------------------------------
# --- check required tools
ph.reqire top
ph.require top
# --- check param -h
if [ "$1" = "-h" ]; then
......
......@@ -5,9 +5,19 @@
#
# REQUIREMENTS
# - a physical machine (no vm)
# - sudo permissions for monitoring user to "smartctl"
# - sudo permissions for monitoring user to "smartctl" binary
#
# ----------------------------------------------------------------------
# short status in the 1st line:
# - count of available harddisks
# - found disks with an error
# - each device and status
# - PASSED - tests in SMART check were successful
# - OK - health status OK; No Self-tests have been logged
# - [not supported] - Disk available but no SMART data available (not supported)
# - [no access] - disk device found but not accessible (maybe corrupt)
# - [does not exist] - device does not exist in the system
# ----------------------------------------------------------------------
#
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
......@@ -16,6 +26,7 @@
# 2018-12-06 v0.4 ah,ds show details for each disc
# 2020-02-27 v0.5 ah,ds add /sbin to PATH variable
# 2020-03-05 v1.0 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2021-01-22 v1.1 <axel.hahn@iml.unibe.ch> add status [no access]
# ======================================================================
......@@ -41,8 +52,7 @@ function checkDrive(){
sLabel="^SMART.*Health"
sOK="(ok|passed)"
sOut="$sOut ${device}:"
sOut="$sOut | ${device}:"
ls -ld $device >/dev/null 2>&1
......@@ -53,26 +63,38 @@ function checkDrive(){
echo >>$tmpfile
echo "$device" >>$tmpfile
echo >>$tmpfile
sudo smartctl -i $device | grep Mode >>$tmpfile
echo >>$tmpfile
# sudo smartctl -i $device | grep Mode >>$tmpfile
# echo >>$tmpfile
# --- check health
sudo smartctl -Ha $device >>$tmpfile
grep -i "$sLabel" $tmpfile >/dev/null
if [ $? -eq 0 ]; then
status=`grep -i "$sLabel" $tmpfile | cut -f 2 -d ":"`
sOut="$sOut ${status}"
echo $status | egrep -i "$sOK" >>$tmpfile
if [ $? -ne 0 ]; then
iErrors=$iErrors+1
ph.setStatus "critical"
fi
# sudo smartctl -T permissive -Ha $device >>$tmpfile
sudo smartctl -Ha $device >>$tmpfile 2>&1
rcs=$?
echo $device - rc=$rcs >>$tmpfile
echo >>$tmpfile
if [ $rcs -eq 2 ]; then
iErrors=$iErrors+1
ph.setStatus "critical"
sOut="$sOut [no access]"
else
sOut="$sOut [not supported]"
grep -i "$sLabel" $tmpfile >/dev/null
if [ $? -eq 0 ]; then
status=`grep -i "$sLabel" $tmpfile | cut -f 2 -d ":"`
sOut="$sOut ${status}"
echo $status | egrep -i "$sOK" >>$tmpfile
if [ $? -ne 0 ]; then
iErrors=$iErrors+1
ph.setStatus "critical"
fi
else
# status=`grep -i "" $tmpfile | cut -f 2 -d ":"`
sOut="$sOut [not supported]"
fi
fi
cat $tmpfile >>$tmpDetailsfile
echo >>$tmpfile
echo >>$tmpfile
# echo >>$tmpfile
# echo >>$tmpfile
rm -f $tmpfile
else
sOut="$sOut [does not exist]"
......@@ -83,7 +105,7 @@ function checkDrive(){
# main
# ----------------------------------------------------------------------
ph.reqire smartctl
ph.require smartctl
rm -f $tmpDetailsfile 2>/dev/null
......
......@@ -55,7 +55,7 @@ function showHelp(){
# --- check requirements
ph.reqire openssl
ph.require openssl
if [ $# -eq 0 ]; then
showHelp
......
......@@ -55,7 +55,7 @@ function ph.abort(){
# check required binaries in the path
# param(s) string name of binary to check with "which" command
function ph.reqire(){
function ph.require(){
which $* >/dev/null
if [ $? -ne 0 ]; then
ph.setStatus "unknown"
......
......@@ -107,5 +107,5 @@ check if a binary PROG exists in search path (=are installed) - if not then exec
Example:
``ph.reqire bc lsblk``
``ph.require bc lsblk``
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment