-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_smartstatus 4.66 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: check smart status
#
# REQUIREMENTS
# - a physical machine (no vm)
# - sudo permissions for monitoring user to "smartctl" binary
#
# ----------------------------------------------------------------------
# short status in the 1st line:
# - count of available harddisks
# - found disks with an error
# - each device and status
# - PASSED - tests in SMART check were successful
# - OK - health status OK; No Self-tests have been logged
# - [not supported] - Disk available but no SMART data available (not supported)
# - [no access] - disk device found but not accessible (maybe corrupt)
# - [does not exist] - device does not exist in the system
# ----------------------------------------------------------------------
#
# v1.3 more smartctl params - parsed in function detectParam()
# 1st value is empty ... last one something invalid
# PARAMS:
# PARAMS:-d cciss,0
# PARAMS:NO-PARAM-WAS-DETECTED
# ----------------------------------------------------------------------
#
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
#
# 2016-12-23 v0.3 ah,ds
# 2018-12-06 v0.4 ah,ds show details for each disc
# 2020-02-27 v0.5 ah,ds add /sbin to PATH variable
# 2020-03-05 v1.0 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2021-01-22 v1.1 <axel.hahn@iml.unibe.ch> add status [no access]
# 2021-04-07 v1.2 <axel.hahn@iml.unibe.ch> update message if sudo smartctl -H fails
# 2021-10-28 v1.3 <axel.hahn@iml.unibe.ch> detect -d param for smartctl
# 2022-07-08 v1.4 <axel.hahn@iml.unibe.ch> remove pipe in status line
# ======================================================================
. `dirname $0`/inc_pluginfunctions
typeset -i iFound=0
typeset -i iErrors=0
sOut=
sDetails=
tmpDetailsfile=/tmp/smartstatus-all.log
# fix for v0.5
PATH=$PATH:/sbin/
export PATH
# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------
function detectParam(){
local _mydevice=$1
local _moreparam
grep '^# PARAM' $0 | cut -f 2- -d ':' | while read _moreparam
do
sudo smartctl -H $_mydevice $_moreparam >/dev/null 2>&1 && echo "$_moreparam"
done
echo "$_moreparam"
}
# smart details for given device
# param string device, i.e. /dev/sda
function checkDrive(){
tmpfile=/tmp/smartstatus.log
device=$1
sLabel="^SMART.*Health"
sOK="(ok|passed)"
sOut="$sOut ; ${device}:"
ls -ld $device >/dev/null 2>&1
if [ $? -eq 0 ]; then
# --- write an info header for the current disc
echo "---------------------------------------------------------------------- ">$tmpfile
echo >>$tmpfile
echo "$device" >>$tmpfile
echo >>$tmpfile
# sudo smartctl -i $device | grep Mode >>$tmpfile
# echo >>$tmpfile
# --- check health
moreparam=$( detectParam $device )
echo sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
rcs=$?
echo $device - rc=$rcs >>$tmpfile
echo >>$tmpfile
if [ $rcs -eq 2 ]; then
iErrors=$iErrors+1
ph.setStatus "critical"
sOut="$sOut [no access]"
else
grep -i "$sLabel" $tmpfile >/dev/null
if [ $? -eq 0 ]; then
status=`grep -i "$sLabel" $tmpfile | cut -f 2 -d ":"`
sOut="$sOut ${status}"
echo $status | grep -Ei "$sOK" >>$tmpfile
if [ $? -ne 0 ]; then
iErrors=$iErrors+1
ph.setStatus "critical"
fi
else
# status=`grep -i "" $tmpfile | cut -f 2 -d ":"`
sOut="$sOut [not supported]"
fi
fi
cat $tmpfile >>$tmpDetailsfile
# echo >>$tmpfile
# echo >>$tmpfile
rm -f $tmpfile
else
sOut="$sOut [does not exist]"
fi
}
# ----------------------------------------------------------------------
# main
# ----------------------------------------------------------------------
ph.require smartctl
rm -f $tmpDetailsfile 2>/dev/null
for mydevice in `ls -1 /dev/sd* | grep -v "[0-9]"`
do
iFound=$iFound+1
# on first device: check permissions
if [ $iFound -eq 1 ]; then
moreparam=$( detectParam $mydevice )
sudo smartctl -H $mydevice $moreparam >/dev/null 2>&1
if [ $? -ne 0 ]; then
ph.abort "SKIP: smartctl was found - but did not respond results. Maybe no sudo permissions or additional parameters required. $( sudo smartctl -H $mydevice )"
fi
fi
checkDrive $mydevice
done
ph.status "SMART check on $iFound HDs - $iErrors errors - $sOut"
cat $tmpDetailsfile
rm -f $tmpDetailsfile
ph.exit
# ----------------------------------------------------------------------