Skip to content
Snippets Groups Projects
Select Git revision
  • e479a50e0b4834278ccf1d6d53324ef3de56be3b
  • master default protected
  • 7771-harden-postgres-backup
  • pgsql-dump-with-snapshots
  • update-colors
  • update-docs-css
  • usb-repair-stick
  • desktop-notification
  • 7000-corrections
  • db-detector
10 results

inc_bash.sh

Blame
  • check_smartstatus 6.02 KiB
    #!/bin/bash
    # ======================================================================
    #
    # NAGIOS CLIENT CHECK :: check smart status
    #
    # REQUIREMENTS
    # - a physical machine (no vm)
    # - sudo permissions for monitoring user to "smartctl" binary
    #
    # ----------------------------------------------------------------------
    # short status in the 1st line:
    # - count of available harddisks
    # - found disks with an error
    # - each device and status
    #     - PASSED           - tests in SMART check were successful
    #     - OK               - health status OK; No Self-tests have been logged
    #     - [not supported]  - Disk available but no SMART data available (not supported)
    #     - [no access]      - disk device found but not accessible (maybe corrupt)
    #     - [does not exist] - device does not exist in the system
    # ----------------------------------------------------------------------
    #
    # v1.3 more smartctl params - parsed in function detectParam()
    # 1st value is empty ... last one something invalid
    # PARAMS:
    # PARAMS:-d cciss,0
    # PARAMS:NO-PARAM-WAS-DETECTED
    # ----------------------------------------------------------------------
    #
    # ah=axel.hahn@iml.unibe.ch
    # ds=daniel.schueler@iml.unibe.ch
    #
    # 2016-12-23  v0.3  ah,ds
    # 2018-12-06  v0.4  ah,ds  show details for each disc
    # 2020-02-27  v0.5  ah,ds  add /sbin to PATH variable
    # 2020-03-05  v1.0  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
    # 2021-01-22  v1.1  <axel.hahn@iml.unibe.ch> add status [no access]
    # 2021-04-07  v1.2  <axel.hahn@iml.unibe.ch> update message if sudo smartctl -H fails
    # 2021-10-28  v1.3  <axel.hahn@iml.unibe.ch> detect -d param for smartctl
    # 2022-07-08  v1.4  <axel.hahn@iml.unibe.ch> remove pipe in status line
    # 2023-10-20  v1.5  <axel.hahn@unibe.ch>     harden sudo command execution
    # 2024-06-07  v1.6  <axel.hahn@unibe.ch>     add help page; use smartctl --scan to detect devices
    # ======================================================================
    
    
    . $(dirname $0)/inc_pluginfunctions
    
    self_APPVERSION=1.6
    
    typeset -i iFound=0
    typeset -i iErrors=0
    sOut=
    tmpDetailsfile=/tmp/smartstatus-all.log
    
    # fix for v0.5
    PATH=$PATH:/sbin/
    export PATH
    
    # ----------------------------------------------------------------------
    # functions
    # ----------------------------------------------------------------------
    
    # show help
    function showHelp(){
        local self=$( basename $0 )
        cat <<EOH
    $( ph.showImlHelpHeader )
    
    Show status of local S.M.A.R.T. devices.
    
    SYNTAX:
        $self [-h] [-l] [devices]
    
    OPTIONS:
    
        -h|--help            show this help.
        -l|--list            list devices only.
    
    PARAMETERS:
    
    EXAMPLES
    
        $self
          Scan all local disks
    
        $self -l
          List all local disks without scanning them.
    
    EOH
    }
    
    function detectParam(){
      local _mydevice=$1
      local _moreparam
      grep '^# PARAM' $0 | cut -f 2- -d ':' | while read _moreparam
      do
        sudo -n smartctl -H $_mydevice $_moreparam >/dev/null 2>&1 && echo "$_moreparam"
      done
      echo "$_moreparam"
    }
    
    # smart details for given device
    # param  string  device, i.e. /dev/sda
    function checkDrive(){
      tmpfile=/tmp/smartstatus.log
      device=$1
    
      sLabel="^SMART.*Health"
      sOK="(ok|passed)"
      test -n "$sOut" && sOut="$sOut ; "
      sOut="${sOut}${device}:"
    
      ls -ld $device >/dev/null 2>&1
    
      if [ $? -eq 0 ]; then
    
        # --- write an info header for the current disc
        echo "---------------------------------------------------------------------- ">$tmpfile
        echo >>$tmpfile
        echo "$device" >>$tmpfile
        echo >>$tmpfile
        # sudo smartctl -i $device  | grep Mode >>$tmpfile
        # echo >>$tmpfile
    
        # --- check health
        moreparam=$( detectParam $device )
        echo sudo smartctl -Ha $device $moreparam >>$tmpfile 2>&1
        sudo -n smartctl -Ha $device $moreparam >>$tmpfile 2>&1
        rcs=$?
        echo $device - rc=$rcs >>$tmpfile
        echo >>$tmpfile
    
        if [ $rcs -eq 2 ]; then
          iErrors=$iErrors+1
          ph.setStatus "critical"
          sOut="$sOut [no access]"
        else
          grep -i "$sLabel" $tmpfile >/dev/null
          if [ $? -eq 0 ]; then
            status=$( grep -i "$sLabel" $tmpfile | cut -f 2 -d ":")
            sOut="$sOut ${status}"
            echo $status | grep -Ei "$sOK" >>$tmpfile
            if [ $? -ne 0 ]; then
              iErrors=$iErrors+1
              ph.setStatus "critical"
            fi
          else
            # status=`grep -i "" $tmpfile | cut -f 2 -d ":"`
            sOut="$sOut [not supported]"
          fi
        fi
        cat $tmpfile >>$tmpDetailsfile
        # echo >>$tmpfile
        # echo >>$tmpfile
        rm -f $tmpfile
      else
        sOut="$sOut [does not exist]"
      fi
    }
    
    function listDevices(){
      sudo -n smartctl --scan | grep -v "/dev/bus/"
    }
    
    # ----------------------------------------------------------------------
    # main
    # ----------------------------------------------------------------------
    
    # parse params
    customaction=
    while [[ "$#" -gt 0 ]]; do case $1 in
        -h|--help)      showHelp; exit 0;;
        -l|--list)      customaction="list"; shift 1 ;;
        *) if grep "^-" <<< "$1" >/dev/null ; then
            echo; echo "ERROR: Unknown parameter: $1"; echo; exit 2
           fi
           break;
           ;;
    esac; done
    
    ph.require smartctl
    
    rm -f $tmpDetailsfile 2>/dev/null
    
    # --- pre check for sudo permissions on smartctl
    if ! sudo -n smartctl -h >/dev/null 2>&1; then
      ph.abort "UNKNOWN: No sudo permissions to execute smartctl."
    fi
    
    if [ "$customaction" == "list" ]; then
      echo "Devices to scan:"
      listDevices | sed "s#^#- #"
      exit 0
    fi
    
    # --- loop over sd devices
    # for mydevice in $(ls -1 /dev/sd* | grep -v "[0-9]")
    for mydevice in $( listDevices | cut -f 1 -d " " )
    do
      iFound=$iFound+1
    
      # on first device: check permissions
      if [ $iFound -eq 1 ]; then
        moreparam=$( detectParam $mydevice )
        sudo smartctl -H $mydevice $moreparam >/dev/null 2>&1
        if [ $? -ne 0 ]; then
          ph.abort "SKIP: smartctl was found - but did not respond results. Maybe no sudo permissions or additional parameters required. $( sudo smartctl -H $mydevice )"
        fi
      fi
    
      checkDrive $mydevice
    done
    
    ph.status "SMART check on $iFound Disks - $iErrors errors - $sOut"
    cat $tmpDetailsfile 2>&1 && rm -f $tmpDetailsfile
    
    ph.exit
    
    # ----------------------------------------------------------------------