#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: check smart status
#
# REQUIREMENTS
# - a physical machine (no vm)
# - sudo permissions for monitoring user to "smartctl" binary
#
# ----------------------------------------------------------------------
# short status in the 1st line:
# - count of available harddisks
# - found disks with an error
# - each device and status
#     - PASSED           - tests in SMART check were successful
#     - OK               - health status OK; No Self-tests have been logged
#     - [not supported]  - Disk available but no SMART data available (not supported)
#     - [no access]      - disk device found but not accessible (maybe corrupt)
#     - [does not exist] - device does not exist in the system
# ----------------------------------------------------------------------
#
# v1.3 more smartctl params - parsed in function detectParam()
# 1st value is empty ... last one something invalid
# PARAMS:
# PARAMS:-d cciss,0
# PARAMS:NO-PARAM-WAS-DETECTED
# ----------------------------------------------------------------------
#
# ah=axel.hahn@iml.unibe.ch
# ds=daniel.schueler@iml.unibe.ch
#
# 2016-12-23  v0.3  ah,ds
# 2018-12-06  v0.4  ah,ds  show details for each disc
# 2020-02-27  v0.5  ah,ds  add /sbin to PATH variable
# 2020-03-05  v1.0  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2021-01-22  v1.1  <axel.hahn@iml.unibe.ch> add status [no access]
# 2021-04-07  v1.2  <axel.hahn@iml.unibe.ch> update message if sudo smartctl -H fails
# 2021-10-28  v1.3  <axel.hahn@iml.unibe.ch> detect -d param for smartctl
# 2022-07-08  v1.4  <axel.hahn@iml.unibe.ch> remove pipe in status line
# 2023-10-20  v1.5  <axel.hahn@unibe.ch>     harden sudo command execution
# 2024-06-07  v1.6  <axel.hahn@unibe.ch>     add help page; use smartctl --scan to detect devices
# ======================================================================


. $(dirname $0)/inc_pluginfunctions

self_APPVERSION=1.6

typeset -i iFound=0
typeset -i iErrors=0
sOut=
tmpDetailsfile=/tmp/smartstatus-all.log

# fix for v0.5
PATH=$PATH:/sbin/
export PATH

# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------

# show help
function showHelp(){
    local self=$( basename $0 )
    cat <<EOH
$( ph.showImlHelpHeader )

Show status of local S.M.A.R.T. devices.

SYNTAX:
    $self [-h] [-l] [devices]

OPTIONS:

    -h|--help            show this help.
    -l|--list            list devices only.

PARAMETERS:

EXAMPLES

    $self
      Scan all local disks

    $self -l
      List all local disks without scanning them.

EOH
}

function detectParam(){
  local _mydevice=$1
  local _moreparam
  grep '^# PARAM' $0 | cut -f 2- -d ':' | while read _moreparam
  do
    sudo -n smartctl -H $_mydevice $_moreparam >/dev/null 2>&1 && echo "$_moreparam"
  done
  echo "$_moreparam"
}

# smart details for given device
# param  string  device, i.e. /dev/sda
function checkDrive(){
  tmpfile=/tmp/smartstatus.log
  device=$1

  sLabel="^SMART.*Health"
  sOK="(ok|passed)"
  test -n "$sOut" && sOut="$sOut ; "
  sOut="${sOut}${device}:"

  ls -ld $device >/dev/null 2>&1

  if [ $? -eq 0 ]; then

    # --- write an info header for the current disc
    echo "---------------------------------------------------------------------- ">$tmpfile
    echo >>$tmpfile
    echo "$device" >>$tmpfile
    echo >>$tmpfile
    # sudo smartctl -i $device  | grep Mode >>$tmpfile
    # echo >>$tmpfile

    # --- check health
    moreparam=$( detectParam $device )
    echo sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
    sudo -n smartctl -Ha $device $moreparam >>$tmpfile 2>&1
    rcs=$?
    echo $device - rc=$rcs >>$tmpfile
    echo >>$tmpfile

    if [ $rcs -eq 2 ]; then
      iErrors=$iErrors+1
      ph.setStatus "critical"
      sOut="$sOut [no access]"
    else
      grep -i "$sLabel" $tmpfile >/dev/null
      if [ $? -eq 0 ]; then
        status=$( grep -i "$sLabel" $tmpfile | cut -f 2 -d ":")
        sOut="$sOut ${status}"
        echo $status | grep -Ei "$sOK" >>$tmpfile
        if [ $? -ne 0 ]; then
          iErrors=$iErrors+1
          ph.setStatus "critical"
        fi
      else
        # status=`grep -i "" $tmpfile | cut -f 2 -d ":"`
        sOut="$sOut [not supported]"
      fi
    fi
    cat $tmpfile >>$tmpDetailsfile
    # echo >>$tmpfile
    # echo >>$tmpfile
    rm -f $tmpfile
  else
    sOut="$sOut [does not exist]"
  fi
}

function listDevices(){
  sudo -n smartctl --scan | grep -v "/dev/bus/"
}

# ----------------------------------------------------------------------
# main
# ----------------------------------------------------------------------

# parse params
customaction=
while [[ "$#" -gt 0 ]]; do case $1 in
    -h|--help)      showHelp; exit 0;;
    -l|--list)      customaction="list"; shift 1 ;;
    *) if grep "^-" <<< "$1" >/dev/null ; then
        echo; echo "ERROR: Unknown parameter: $1"; echo; exit 2
       fi
       break;
       ;;
esac; done

ph.require smartctl

rm -f $tmpDetailsfile 2>/dev/null

# --- pre check for sudo permissions on smartctl
if ! sudo -n smartctl -h >/dev/null 2>&1; then
  ph.abort "UNKNOWN: No sudo permissions to execute smartctl."
fi

if [ "$customaction" == "list" ]; then
  echo "Devices to scan:"
  listDevices | sed "s#^#- #"
  exit 0
fi

# --- loop over sd devices
# for mydevice in $(ls -1 /dev/sd* | grep -v "[0-9]")
for mydevice in $( listDevices | cut -f 1 -d " " )
do
  iFound=$iFound+1

  # on first device: check permissions
  if [ $iFound -eq 1 ]; then
    moreparam=$( detectParam $mydevice )
    sudo smartctl -H $mydevice $moreparam >/dev/null 2>&1
    if [ $? -ne 0 ]; then
      ph.abort "SKIP: smartctl was found - but did not respond results. Maybe no sudo permissions or additional parameters required. $( sudo smartctl -H $mydevice )"
    fi
  fi

  checkDrive $mydevice
done

ph.status "SMART check on $iFound Disks - $iErrors errors - $sOut"
cat $tmpDetailsfile 2>&1 && rm -f $tmpDetailsfile

ph.exit

# ----------------------------------------------------------------------