#!/bin/bash # ====================================================================== # # NAGIOS CLIENT CHECK :: check smart status # # REQUIREMENTS # - a physical machine (no vm) # - sudo permissions for monitoring user to "smartctl" binary # # ---------------------------------------------------------------------- # short status in the 1st line: # - count of available harddisks # - found disks with an error # - each device and status # - PASSED - tests in SMART check were successful # - OK - health status OK; No Self-tests have been logged # - [not supported] - Disk available but no SMART data available (not supported) # - [no access] - disk device found but not accessible (maybe corrupt) # - [does not exist] - device does not exist in the system # ---------------------------------------------------------------------- # # v1.3 more smartctl params - parsed in function detectParam() # 1st value is empty ... last one something invalid # PARAMS: # PARAMS:-d cciss,0 # PARAMS:NO-PARAM-WAS-DETECTED # ---------------------------------------------------------------------- # # ah=axel.hahn@iml.unibe.ch # ds=daniel.schueler@iml.unibe.ch # # 2016-12-23 v0.3 ah,ds # 2018-12-06 v0.4 ah,ds show details for each disc # 2020-02-27 v0.5 ah,ds add /sbin to PATH variable # 2020-03-05 v1.0 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions # 2021-01-22 v1.1 <axel.hahn@iml.unibe.ch> add status [no access] # 2021-04-07 v1.2 <axel.hahn@iml.unibe.ch> update message if sudo smartctl -H fails # 2021-10-28 v1.3 <axel.hahn@iml.unibe.ch> detect -d param for smartctl # 2022-07-08 v1.4 <axel.hahn@iml.unibe.ch> remove pipe in status line # ====================================================================== . `dirname $0`/inc_pluginfunctions typeset -i iFound=0 typeset -i iErrors=0 sOut= sDetails= tmpDetailsfile=/tmp/smartstatus-all.log # fix for v0.5 PATH=$PATH:/sbin/ export PATH # ---------------------------------------------------------------------- # functions # ---------------------------------------------------------------------- function detectParam(){ local _mydevice=$1 local _moreparam grep '^# PARAM' $0 | cut -f 2- -d ':' | while read _moreparam do sudo smartctl -H $_mydevice $_moreparam >/dev/null 2>&1 && echo "$_moreparam" done echo "$_moreparam" } # smart details for given device # param string device, i.e. /dev/sda function checkDrive(){ tmpfile=/tmp/smartstatus.log device=$1 sLabel="^SMART.*Health" sOK="(ok|passed)" sOut="$sOut ; ${device}:" ls -ld $device >/dev/null 2>&1 if [ $? -eq 0 ]; then # --- write an info header for the current disc echo "---------------------------------------------------------------------- ">$tmpfile echo >>$tmpfile echo "$device" >>$tmpfile echo >>$tmpfile # sudo smartctl -i $device | grep Mode >>$tmpfile # echo >>$tmpfile # --- check health moreparam=$( detectParam $device ) echo sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1 sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1 rcs=$? echo $device - rc=$rcs >>$tmpfile echo >>$tmpfile if [ $rcs -eq 2 ]; then iErrors=$iErrors+1 ph.setStatus "critical" sOut="$sOut [no access]" else grep -i "$sLabel" $tmpfile >/dev/null if [ $? -eq 0 ]; then status=`grep -i "$sLabel" $tmpfile | cut -f 2 -d ":"` sOut="$sOut ${status}" echo $status | grep -Ei "$sOK" >>$tmpfile if [ $? -ne 0 ]; then iErrors=$iErrors+1 ph.setStatus "critical" fi else # status=`grep -i "" $tmpfile | cut -f 2 -d ":"` sOut="$sOut [not supported]" fi fi cat $tmpfile >>$tmpDetailsfile # echo >>$tmpfile # echo >>$tmpfile rm -f $tmpfile else sOut="$sOut [does not exist]" fi } # ---------------------------------------------------------------------- # main # ---------------------------------------------------------------------- ph.require smartctl rm -f $tmpDetailsfile 2>/dev/null for mydevice in `ls -1 /dev/sd* | grep -v "[0-9]"` do iFound=$iFound+1 # on first device: check permissions if [ $iFound -eq 1 ]; then moreparam=$( detectParam $mydevice ) sudo smartctl -H $mydevice $moreparam >/dev/null 2>&1 if [ $? -ne 0 ]; then ph.abort "SKIP: smartctl was found - but did not respond results. Maybe no sudo permissions or additional parameters required. $( sudo smartctl -H $mydevice )" fi fi checkDrive $mydevice done ph.status "SMART check on $iFound HDs - $iErrors errors - $sOut" cat $tmpDetailsfile rm -f $tmpDetailsfile ph.exit # ----------------------------------------------------------------------