check_smartstatus 4.66 KB
Newer Older
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
1
2
3
4
5
6
7
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: check smart status
#
# REQUIREMENTS
# - a physical machine (no vm)
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
8
# - sudo permissions for monitoring user to "smartctl" binary
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
9
10
#
# ----------------------------------------------------------------------
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
11
12
13
14
15
16
17
18
19
20
# short status in the 1st line:
# - count of available harddisks
# - found disks with an error
# - each device and status
#     - PASSED           - tests in SMART check were successful
#     - OK               - health status OK; No Self-tests have been logged
#     - [not supported]  - Disk available but no SMART data available (not supported)
#     - [no access]      - disk device found but not accessible (maybe corrupt)
#     - [does not exist] - device does not exist in the system
# ----------------------------------------------------------------------
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
21
#
22
23
24
25
26
27
28
# v1.3 more smartctl params - parsed in function detectParam()
# 1st value is empty ... last one something invalid
# PARAMS:
# PARAMS:-d cciss,0
# PARAMS:NO-PARAM-WAS-DETECTED
# ----------------------------------------------------------------------
#
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
29
30
31
32
33
34
35
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
#
# 2016-12-23  v0.3  ah,ds
# 2018-12-06  v0.4  ah,ds  show details for each disc
# 2020-02-27  v0.5  ah,ds  add /sbin to PATH variable
# 2020-03-05  v1.0  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
36
# 2021-01-22  v1.1  <axel.hahn@iml.unibe.ch> add status [no access]
37
# 2021-04-07  v1.2  <axel.hahn@iml.unibe.ch> update message if sudo smartctl -H fails
38
# 2021-10-28  v1.3  <axel.hahn@iml.unibe.ch> detect -d param for smartctl
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
39
# 2022-07-08  v1.4  <axel.hahn@iml.unibe.ch> remove pipe in status line
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# ======================================================================


. `dirname $0`/inc_pluginfunctions

typeset -i iFound=0
typeset -i iErrors=0
sOut=
sDetails=
tmpDetailsfile=/tmp/smartstatus-all.log

# fix for v0.5
PATH=$PATH:/sbin/
export PATH

# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------

59
60
61
62
63
64
65
66
67
68
69
70
function detectParam(){
  local _mydevice=$1
  local _moreparam
  grep '^# PARAM' $0 | cut -f 2- -d ':' | while read _moreparam
  do
    sudo smartctl -H $_mydevice $_moreparam >/dev/null 2>&1 && echo "$_moreparam"
  done
  echo "$_moreparam"
}

# smart details for given device
# param  string  device, i.e. /dev/sda
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
71
72
73
74
75
76
function checkDrive(){
  tmpfile=/tmp/smartstatus.log
  device=$1

  sLabel="^SMART.*Health"
  sOK="(ok|passed)"
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
77
  sOut="$sOut ; ${device}:"
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
78
79
80
81
82
83
84
85
86
87

  ls -ld $device >/dev/null 2>&1

  if [ $? -eq 0 ]; then

    # --- write an info header for the current disc
    echo "---------------------------------------------------------------------- ">$tmpfile
    echo >>$tmpfile
    echo "$device" >>$tmpfile
    echo >>$tmpfile
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
88
89
    # sudo smartctl -i $device  | grep Mode >>$tmpfile
    # echo >>$tmpfile
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
90
91

    # --- check health
92
93
94
    moreparam=$( detectParam $device )
    echo sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
    sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
95
96
97
98
99
100
101
102
    rcs=$?
    echo $device - rc=$rcs >>$tmpfile
    echo >>$tmpfile

    if [ $rcs -eq 2 ]; then
      iErrors=$iErrors+1
      ph.setStatus "critical"
      sOut="$sOut [no access]"
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
103
    else
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
104
105
106
107
108
109
110
111
112
113
114
115
116
      grep -i "$sLabel" $tmpfile >/dev/null
      if [ $? -eq 0 ]; then
        status=`grep -i "$sLabel" $tmpfile | cut -f 2 -d ":"`
        sOut="$sOut ${status}"
        echo $status | egrep -i "$sOK" >>$tmpfile
        if [ $? -ne 0 ]; then
          iErrors=$iErrors+1
          ph.setStatus "critical"
        fi
      else
        # status=`grep -i "" $tmpfile | cut -f 2 -d ":"`
        sOut="$sOut [not supported]"
      fi
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
117
118
    fi
    cat $tmpfile >>$tmpDetailsfile
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
119
120
    # echo >>$tmpfile
    # echo >>$tmpfile
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
121
122
123
124
125
126
127
128
129
130
    rm -f $tmpfile
  else
    sOut="$sOut [does not exist]"
  fi
}

# ----------------------------------------------------------------------
# main
# ----------------------------------------------------------------------

Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
131
ph.require smartctl
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
132
133
134
135
136
137
138
139
140

rm -f $tmpDetailsfile 2>/dev/null

for mydevice in `ls -1 /dev/sd* | grep -v "[0-9]"`
do
  iFound=$iFound+1

  # on first device: check permissions
  if [ $iFound -eq 1 ]; then
141
142
    moreparam=$( detectParam $mydevice )
    sudo smartctl -H $mydevice $moreparam >/dev/null 2>&1
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
143
    if [ $? -ne 0 ]; then
144
      ph.abort "SKIP: smartctl was found - but did not respond results. Maybe no sudo permissions or additional parameters required. $( sudo smartctl -H $mydevice )"
Hahn Axel (hahn)'s avatar
Hahn Axel (hahn) committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
    fi
  fi

  checkDrive $mydevice
done

ph.status "SMART check on $iFound HDs - $iErrors errors - $sOut"
cat $tmpDetailsfile
rm -f $tmpDetailsfile


ph.exit


# ----------------------------------------------------------------------