Skip to content
Snippets Groups Projects
check_snmp_synology 13.36 KiB
#!/bin/bash
# check_snmp_synology for nagios version 1.2
# 30.04.2013        Nicolas Ordonez, Switzerland
# 08.08.2020        Axel Hahn: add update, community string
# 03.05.2023  v1.2  Axel Hahn: support Snmpv3 connections
# 09.05.2023  v1.3  Axel Hahn: allow complete check or single check(s)
# 12.05.2023  v1.4  Axel Hahn: no raid size check (DSM 5.x)
# ----------------------------------------------------------------------
# this plugin check the health of your Synology NAS
# - System status (Power, Fans)
# - Disks status 
# - RAID status
# - available updates
#
# Tested with DSM 5.2 ... 6.2 ... 6.4 ... 7.1
# ----------------------------------------------------------------------
# see docs:
# https://global.download.synology.com/download/Document/Software/DeveloperGuide/Firmware/DSM/All/enu/Synology_DiskStation_MIB_Guide.pdf
# ----------------------------------------------------------------------

. $(dirname $0)/inc_pluginfunctions
. $(dirname $0)/check_snmp_includes

# --- basic vars

self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] )
self_APPVERSION=1.4

SNMPOUTPUT=
option_found=0
healthString=
verbose="no"
out=""

# --- OID declarations
OID_syno="1.3.6.1.4.1.6574"
OID_model="${OID_syno}.1.5.1.0"
OID_serialNumber="${OID_syno}.1.5.2.0"
OID_DSMVersion="${OID_syno}.1.5.3.0"
OID_DSMUpdateAvailable="${OID_syno}.1.5.4.0"
OID_systemStatus="${OID_syno}.1.1.0"
OID_powerStatus="${OID_syno}.1.3.0"
OID_systemFanStatus="${OID_syno}.1.4.1.0"
OID_CPUFanStatus="${OID_syno}.1.4.2.0"
OID_temp="${OID_syno}.1.2.0"

OID_disk=""
OID_diskID="${OID_syno}.2.1.1.2"
OID_diskModel="${OID_syno}.2.1.1.3"
OID_diskStatus="${OID_syno}.2.1.1.5"
OID_diskTemp="${OID_syno}.2.1.1.6"

OID_RAID=""
OID_RAIDName="${OID_syno}.3.1.1.2"
OID_RAIDStatus="${OID_syno}.3.1.1.3"

OID_RAIDFree="${OID_syno}.3.1.1.4"
OID_RAIDSize="${OID_syno}.3.1.1.5"

# --- status arrays to show results
#                0     1          2             3               4                      5
aStatusUpgrade=( "???" Yes        "Up to date"  Connecting      Disconnected           Others)
aStatusDisk=(    "???" Normal     Initialized   NotInitialized  SystemPartitionFailed  Crashed)
aStatusRaid=(    "???" Normal     Repairing     Migrating       Expanding              Deleting Creating RaidSyncing RaidParityChecking RaidAssembling Canceling Degrade Crashed DataScrubbing RaidDeploying RaidUnDeploying RaidMountCache RaidExpandingUnfinishedSHR RaidConvertSHRToPool RaidMigrateSHR1ToSHR2 RaidUnknownStatus)

FLAG_SINGLECHECK=0

# available single checks
FLAG_SYSTEM=1
FLAG_DISK=1
FLAG_UPDATE=1
FLAG_TEMPERATURE=1

_self=$( basename $0 )
USAGE="
______________________________________________________________________

$self_APPNAME 
v$self_APPVERSION

Based on script of Nicolas Ordonez.

Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________

Check health of a Synology drive using SNMP.

SYNTAX:
    $_self [options] -h TARGET

OPTIONS:
    -a STRING
        Authentication params for snmpwalk/ snmpget to connect to target; 
        default: \"-v2c -c public\" (Snmpv2 with community string \"public\")
    -h SNMPTARGET
        Set a target to connect as fqdn or ip address; default: localhost
    -f FILE
        Read authentication from config file. See section 'CONFIG FILE' below.
        default: \"/etc/icinga2/snmp.cfg\"

    By default all checks will be executed. You can linit the executed checks
    by naming single checks:

    -s  System check:
        - Show system data: model, serial number, DSM version
        - System status
        - Power status
        - System fan Status
        - CPU fan status
    -d  Disk check: 
        - status and temperature of each hard disk
        - status of all raid volumes
        - free disk space
    -u  Update check; check switches to warning if an update is available
    -t  Temerature check

    -v  Enable detailed output of the checks. It is recommended for
        system status and disk status.

CONFIG FILE:
    The config file can be multiline and has the syntax
    [SNMPTARGET[,target2]]:[auth parameters]
    The auth parameters set the version and all needed values to connect.
    Snmp v2 uses a community string.
    Snmp v3 is highly recommended (you should disable Snmp v2) and needs
    a user and password.

    Example:
    server-01.example.com:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-01
    server-02.example.com,192.168.0.4:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-02

EXAMPLE:

    $_self -h server-01.example.com -v
        Show complete Synology status of server-01 using connect data from 
        /etc/icinga2/snmp.cfg

    $_self -h server-01.example.com -v -f /opt/somewhere/snmp.conf
        Show complete Synology status of server-01 using connect data from 
        custom config file

    $_self -h server-01.example.com -u
        Make a single check if update is available.
"

# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------

# write verbose text with details in 2 columns
# param  string  label
# param  string  value
_wd()
{
    if [ "$verbose" = "yes" ] ; then 
        _label=$1
        shift 1
        out+=$( printf "%-25s %s" "${_label}" "$*" )"
"
    fi
}

# get a value from SNMP output data
# param  string  mib string
_get(){
    echo "$SNMPOUTPUT" | grep "${1} " | cut -d "=" -f2 | cut -f 2- -d " "
}

# show usage and abort
usage()
{
        ph.abort "$USAGE"
}

# disable all flags to perform single checks
# see param check in main section
_disableflags(){
    if [ $FLAG_SINGLECHECK -ne 1 ]; then
        FLAG_SINGLECHECK=1
        FLAG_SYSTEM=0
        FLAG_DISK=0
        FLAG_UPDATE=0
        FLAG_TEMPERATURE=0
    fi
}

# add something to the status line (1st line) in monitoring output
_add_status(){
    test -n "$healthString" && healthString+=", "
    healthString+="$*"
}


# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------

test $# -eq 0 && usage

while getopts a:h:f:vsdut OPTNAME; do
    case "$OPTNAME" in
        a)
            SNMPAUTH="$OPTARG"
            ;;
        f)
            SNMPCONFIG="$OPTARG"
            ;;
        h)
            SNMPTARGET="$OPTARG"
            ;;
        v)
            verbose="yes"
            ;;

        # Flags:
        s)
            _disableflags
            FLAG_SYSTEM=1
            ;;
        t)
            _disableflags
            FLAG_TEMPERATURE=1
            ;;
        u)
            _disableflags
            FLAG_UPDATE=1
            ;;
        d)
            _disableflags
            FLAG_DISK=1
            ;;
        *)
            usage
            ;;
    esac
done


# --- read config to get the authentication params for snmp commands

test -z "$SNMPAUTH" && read_config

# --- read raid and disks to get its single OIDs
if [ $FLAG_DISK -ne 0 ]; then
    nbDisk="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_diskID   2> /dev/null | wc -l )"
    nbRAID="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_RAIDName 2> /dev/null | wc -l )"

    for i in $(seq 1 $nbDisk);
    do
        OID_disk="$OID_disk $OID_diskID.$(($i-1)) $OID_diskModel.$(($i-1)) $OID_diskStatus.$(($i-1)) $OID_diskTemp.$(($i-1)) " 
    done
    for i in $(seq 1 $nbRAID);
    do
        OID_RAID="$OID_RAID $OID_RAIDName.$(($i-1)) $OID_RAIDStatus.$(($i-1)) $OID_RAIDSize.$(($i-1)) $OID_RAIDFree.$(($i-1))" 
    done
fi

# --- SNPGET to all wanted oids

OIDLIST=""
test $FLAG_SYSTEM      -ne 0 && OIDLIST+="$OID_model $OID_serialNumber $OID_DSMUpdateAvailable $OID_DSMVersion $OID_systemStatus $OID_powerStatus $OID_systemFanStatus $OID_CPUFanStatus "
test $FLAG_DISK        -ne 0 && OIDLIST+="$OID_disk $OID_RAID "
test $FLAG_UPDATE      -ne 0 && test $FLAG_SYSTEM -eq 0 && OIDLIST+="$OID_DSMUpdateAvailable $OID_DSMVersion "
test $FLAG_TEMPERATURE -ne 0 && OIDLIST+="$OID_temp "


if ! SNMPOUTPUT=$( $SNMPGET -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OIDLIST 2>&1 ); then
    ph.abort "SNMP request failed. $SNMPOUTPUT"
fi
SNMPOUTPUT=$( echo "$SNMPOUTPUT" | sed 's/^[ \t]*//;s/[ \t]*$//' )

_wd ""

# ----------------------------------------------------------------------
# GENERATE OUTPUT
# ----------------------------------------------------------------------

# ---------- check system data
if [ $FLAG_SYSTEM -ne 0 ]; then
    _wd "---------- INFORMATION"
    model=$(_get $OID_model)
    _wd "Synology model:" "$model"

    serialNumber=$(_get $OID_serialNumber)
    _wd "Synology s/n:" "$serialNumber"

    DSMVersion=$(_get $OID_DSMVersion)
    _wd "DSM Version:" "$DSMVersion"

    _add_status "  Synology $model (s/n: $serialNumber, $DSMVersion)"
fi

# ---------- check update
if [ $FLAG_SYSTEM -ne 0 -o $FLAG_UPDATE -ne 0 ]; then
    typeset -i DSMupdate=$(_get $OID_DSMUpdateAvailable)

    # test $FLAG_SINGLECHECK -ne 0 || ( _wd "Update available:  ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"; _wd "" )
    _wd "Update available:" "${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"
    _wd ""
    if [ $DSMupdate -eq 1 ] ; then
        ph.setStatus "warning"
        _add_status "Update available"
    else
        test $FLAG_SINGLECHECK -ne 0 && _add_status "Up to date"
    fi
fi

if [ $FLAG_SYSTEM -ne 0 ]; then
    _wd "---------- SYSTEM"

    # --- Check system status
    systemStatus="$(_get $OID_systemStatus)"

    if [ "$systemStatus" != "1" ] ; then
        if [ "$systemStatus" = "2" ] ; then      systemStatus="Failed";          fi
        ph.setStatus "critical"
        _add_status "System status: $systemStatus "
    else
        systemStatus="Normal"
    fi
    _wd "System Status:" "$systemStatus"

    # --- Check power status
    powerStatus="$(_get $OID_powerStatus)"

    if [ "$powerStatus" != "1" ] ; then
        if [ "$powerStatus" = "2" ] ; then       powerStatus="Failed";           fi
        ph.setStatus "critical"
        _add_status "Power status: $powerStatus "
    else
        powerStatus="Normal"
    fi
    _wd "Power Status:" "$powerStatus"


    # --- Check system fan status
    systemFanStatus=$(_get $OID_systemFanStatus)
    if [ "$systemFanStatus" != "1" ] ; then
        if [ "$systemFanStatus" = "2" ] ; then   systemFanStatus="Failed";               fi
        ph.setStatus "critical"
        _add_status "System fan status: $systemFanStatus "
    else
        systemFanStatus="Normal"
    fi
    _wd "System Fan Status:" "$systemFanStatus"


    # --- Check CPU fan status
    CPUFanStatus=$(_get $OID_CPUFanStatus)
    if [ "$CPUFanStatus" != "1" ] ; then
        if [ "$CPUFanStatus" = "2" ] ; then      CPUFanStatus="Failed";          fi
        ph.setStatus "critical"
        _add_status "CPU fan status: $CPUFanStatus "
    else
        CPUFanStatus="Normal"
    fi
    _wd "CPU Fan Status:" "$CPUFanStatus"
fi

# ---------- check temperature
if [ $FLAG_TEMPERATURE -ne 0 ]; then
    # --- Show temperature
    DeviceTemperature=$(_get $OID_temp)
    _wd "NAS temperature:" "$DeviceTemperature °C"

    if [ $FLAG_SINGLECHECK -ne 0 ]; then
        _add_status "NAS temperature: $DeviceTemperature °C"
        ph.perfadd "temp" "$DeviceTemperature"
    fi
    test $FLAG_DISK -ne 0 && _wd ""
fi

# ---------- Check all disk status + volume + raid
if [ $FLAG_DISK -ne 0 ]; then
    RAIDName=$(_get $OID_RAIDName)
    RAIDStatus=$(_get $OID_RAIDStatus)
    DISKOK=1

    _wd "---------- STORAGE"
    _wd "Number of disks:" "$nbDisk"
    for i in $(seq 1 $nbDisk);
    do
        diskID[$i]=$(_get $OID_diskID.$(($i-1)))
        diskModel[$i]=$(_get $OID_diskModel.$(($i-1)))
        diskTemp[$i]=$(_get $OID_diskTemp.$(($i-1)))
        idiskStatus=$(_get $OID_diskStatus.$(($i-1)))

        diskStatus[$i]=${aStatusDisk[$idiskStatus]}
    
        if [ "${idiskStatus}" != "1" ] ; then
            ph.setStatus "critical"
            DISKOK=0
            test $FLAG_SINGLECHECK -ne 0 && _add_status "NAS temperature: $DeviceTemperature °C"
            _add_status "problem with ${diskID[$i]} (model:${diskModel[$i]}) status:${diskStatus[$i]} temperature:${diskTemp[$i]} °C"
        fi

        _wd "  ${diskID[$i]} (model:${diskModel[$i]}) status: ${diskStatus[$i]} ($idiskStatus) temperature: ${diskTemp[$i]} °C"
    done


    # --- Check all RAID volume status
    _wd ""
    _wd "Number of RAID volumes:" "$nbRAID"
    for i in $(seq 1 $nbRAID);
    do
        RAIDName[$i]=$(_get $OID_RAIDName.$(($i-1)))
        iRAIDStatus=$(_get $OID_RAIDStatus.$(($i-1)))

        RAIDStatus[$i]=${aStatusRaid[${iRAIDStatus}]}

        if [ "$iRAIDStatus" != "1" ] ; then
            DISKOK=0
            ph.setStatus "critical"
            _add_status "RAID status: ($RAIDName ): $RAIDStatus[$i] "
        fi

        # size in integer GB
        if _get $OID_RAIDSize.$(($i-1)) | grep "[0-9]" >/dev/null; 
        then
                iRAIDSize=$(( $(_get $OID_RAIDSize.$(($i-1))) / 1024/1024/1024 ))
                iRAIDFree=$(( $(_get $OID_RAIDFree.$(($i-1))) / 1024/1024/1024 ))
                iFree=$(( iRAIDFree*100/$iRAIDSize ))

                _wd "  ${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - size $iRAIDSize GB, free $iRAIDFree GB (${iFree}%)"
        else
                _wd "  ${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - (size not available)"
        fi
    done

    if [ $DISKOK -eq 1 ]; then
        test $FLAG_SINGLECHECK -ne 0 && _add_status "Disks and volumes are fine."
    fi
fi

# --- output status
ph.status "$healthString"
echo "$out"

ph.exit