diff --git a/check_snmp_synology b/check_snmp_synology index c994e19df398d1765d2e8eca43426238fd312704..b01b4bc55924cc7dc4d741e4a6e52f83764c1786 100755 --- a/check_snmp_synology +++ b/check_snmp_synology @@ -3,7 +3,8 @@ # 30.04.2013 Nicolas Ordonez, Switzerland # 08.08.2020 Axel Hahn: add update, community string # 03.05.2023 v1.2 Axel Hahn: support Snmpv3 connections -#--------------------------------------------------- +# 09.05.2023 v1.3 Axel Hahn: allow complete check or single check(s) +# ---------------------------------------------------------------------- # this plugin check the health of your Synology NAS # - System status (Power, Fans) # - Disks status @@ -11,10 +12,10 @@ # - available updates # # Tested with DSM 6.2 + 6.4 + 7.1 -#--------------------------------------------------- +# ---------------------------------------------------------------------- # see docs: # https://global.download.synology.com/download/Document/Software/DeveloperGuide/Firmware/DSM/All/enu/Synology_DiskStation_MIB_Guide.pdf -#--------------------------------------------------- +# ---------------------------------------------------------------------- . $(dirname $0)/inc_pluginfunctions . $(dirname $0)/check_snmp_includes @@ -24,8 +25,9 @@ self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] ) self_APPVERSION=1.2 +SNMPOUTPUT= option_found=0 -healthString="" +healthString= verbose="no" out="" @@ -54,13 +56,20 @@ OID_RAIDStatus="${OID_syno}.3.1.1.3" OID_RAIDFree="${OID_syno}.3.1.1.4" OID_RAIDSize="${OID_syno}.3.1.1.5" - # --- status arrays to show results # 0 1 2 3 4 5 aStatusUpgrade=( "???" Yes "Up to date" Connecting Disconnected Others) aStatusDisk=( "???" Normal Initialized NotInitialized SystemPartitionFailed Crashed) aStatusRaid=( "???" Normal Repairing Migrating Expanding Deleting Creating RaidSyncing RaidParityChecking RaidAssembling Canceling Degrade Crashed DataScrubbing RaidDeploying RaidUnDeploying RaidMountCache RaidExpandingUnfinishedSHR RaidConvertSHRToPool RaidMigrateSHR1ToSHR2 RaidUnknownStatus) +FLAG_SINGLECHECK=0 + +# available single checks +FLAG_SYSTEM=1 +FLAG_DISK=1 +FLAG_UPDATE=1 +FLAG_TEMPERATURE=1 + _self=$( basename $0 ) USAGE=" ______________________________________________________________________ @@ -74,25 +83,39 @@ Institute for Medical Education - University of Bern Licence: GNU GPL 3 ______________________________________________________________________ -check cpu usage and cpu wait -Cpu infos are taken from output of top command. - -On higher cpu usage it can show processes that cause cpu waits and -with most cpu consumption. +Check health of a Synology drive using SNMP. SYNTAX: $_self [options] -h TARGET OPTIONS: -a STRING - authentication params for snmpwalk/ snmpget to connect to target; + Authentication params for snmpwalk/ snmpget to connect to target; default: \"-v2c -c public\" (Snmpv2 with community string \"public\") -h SNMPTARGET - as fqdn or ip address; default: localhost + Set a target to connect as fqdn or ip address; default: localhost -f FILE - read authentication from config file + Read authentication from config file. See section 'CONFIG FILE' below. default: \"/etc/icinga2/snmp.cfg\" - -v detailed output + + By default all checks will be executed. You can linit the executed checks + by naming single checks: + + -s System check: + - Show system data: model, serial number, DSM version + - System status + - Power status + - System fan Status + - CPU fan status + -d Disk check: + - status and temperature of each hard disk + - status of all raid volumes + - free disk space + -u Update check; check switches to warning if an update is available + -t Temerature check + + -v Enable detailed output of the checks. It is recommended for + system status and disk status. CONFIG FILE: The config file can be multiline and has the syntax @@ -104,20 +127,25 @@ CONFIG FILE: Example: server-01.example.com:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-01 - server-02.example.com:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-02 + server-02.example.com,192.168.0.4:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-02 EXAMPLE: $_self -h server-01.example.com -v - Show Synology status of server-01 using connect data from /etc/icinga2/snmp.cfg + Show complete Synology status of server-01 using connect data from + /etc/icinga2/snmp.cfg $_self -h server-01.example.com -v -f /opt/somewhere/snmp.conf - Show Synology status of server-01 using connect data from custom config + Show complete Synology status of server-01 using connect data from + custom config file + + $_self -h server-01.example.com -u + Make a single check if update is available. " -#--------------------------------------------------- +# ---------------------------------------------------------------------- # FUNCTIONS -#--------------------------------------------------- +# ---------------------------------------------------------------------- # --- write verbose text _wd() @@ -130,7 +158,7 @@ _wd() # --- get a value from SNMP output data # param string mib string _get(){ - echo "$syno" | grep "${1} " | cut -d "=" -f2 | cut -f 2- -d " " + echo "$SNMPOUTPUT" | grep "${1} " | cut -d "=" -f2 | cut -f 2- -d " " } # --- show usage @@ -139,12 +167,29 @@ usage() ph.abort "$USAGE" } +# disable all flags to perform single checks +_disableflags(){ + if [ $FLAG_SINGLECHECK -ne 1 ]; then + FLAG_SINGLECHECK=1 + FLAG_SYSTEM=0 + FLAG_DISK=0 + FLAG_UPDATE=0 + FLAG_TEMPERATURE=0 + fi +} + +# add something to the status line (1st line) in monitoring output +_add_status(){ + test -n "$healthString" && healthString+=", " + healthString+="$*" +} + -#--------------------------------------------------- +# ---------------------------------------------------------------------- # MAIN -#--------------------------------------------------- +# ---------------------------------------------------------------------- -while getopts a:h:f:v OPTNAME; do +while getopts a:h:f:vsdut OPTNAME; do case "$OPTNAME" in a) SNMPAUTH="$OPTARG" @@ -154,11 +199,28 @@ while getopts a:h:f:v OPTNAME; do ;; h) SNMPTARGET="$OPTARG" - option_found=1 ;; v) verbose="yes" ;; + + # Flags: + s) + _disableflags + FLAG_SYSTEM=1 + ;; + t) + _disableflags + FLAG_TEMPERATURE=1 + ;; + u) + _disableflags + FLAG_UPDATE=1 + ;; + d) + _disableflags + FLAG_DISK=1 + ;; *) usage ;; @@ -171,158 +233,186 @@ done test -z "$SNMPAUTH" && read_config # --- read raid and disks to get its single OIDs -nbDisk="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_diskID 2> /dev/null | wc -l )" -nbRAID="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_RAIDName 2> /dev/null | wc -l )" - -for i in $(seq 1 $nbDisk); -do - OID_disk="$OID_disk $OID_diskID.$(($i-1)) $OID_diskModel.$(($i-1)) $OID_diskStatus.$(($i-1)) $OID_diskTemp.$(($i-1)) " -done -for i in $(seq 1 $nbRAID); -do - OID_RAID="$OID_RAID $OID_RAIDName.$(($i-1)) $OID_RAIDStatus.$(($i-1)) $OID_RAIDSize.$(($i-1)) $OID_RAIDFree.$(($i-1))" -done - -# --- SNPGET to all wanted oids -syno=$($SNMPGET -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_model $OID_serialNumber $OID_DSMVersion $OID_DSMUpdateAvailable $OID_systemStatus $OID_powerStatus $OID_systemFanStatus $OID_CPUFanStatus $OID_temp $OID_disk $OID_RAID 2> /dev/null | sed 's/^[ \t]*//;s/[ \t]*$//') - -if [ "$?" != "0" ] ; then - ph.abort "Problem with SNMP request" +if [ $FLAG_DISK -ne 0 ]; then + nbDisk="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_diskID 2> /dev/null | wc -l )" + nbRAID="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_RAIDName 2> /dev/null | wc -l )" + + for i in $(seq 1 $nbDisk); + do + OID_disk="$OID_disk $OID_diskID.$(($i-1)) $OID_diskModel.$(($i-1)) $OID_diskStatus.$(($i-1)) $OID_diskTemp.$(($i-1)) " + done + for i in $(seq 1 $nbRAID); + do + OID_RAID="$OID_RAID $OID_RAIDName.$(($i-1)) $OID_RAIDStatus.$(($i-1)) $OID_RAIDSize.$(($i-1)) $OID_RAIDFree.$(($i-1))" + done fi -_wd "" - -# --- check fetched data -model=$(_get $OID_model) -_wd "Synology model: $model" - -serialNumber=$(_get $OID_serialNumber) -_wd "Synology s/n: $serialNumber" - -DSMVersion=$(_get $OID_DSMVersion) -_wd "DSM Version: $DSMVersion" - -healthString="Synology $model (s/n:$serialNumber, $DSMVersion)" +# --- SNPGET to all wanted oids -RAIDName=$(_get $OID_RAIDName) -RAIDStatus=$(_get $OID_RAIDStatus) +OIDLIST="" +test $FLAG_SYSTEM -ne 0 && OIDLIST+="$OID_model $OID_serialNumber $OID_DSMVersion $OID_systemStatus $OID_powerStatus $OID_systemFanStatus $OID_CPUFanStatus " +test $FLAG_DISK -ne 0 && OIDLIST+="$OID_disk $OID_RAID " +test $FLAG_UPDATE -ne 0 && OIDLIST+="$OID_DSMUpdateAvailable " +test $FLAG_UPDATE -ne 0 && test $FLAG_SYSTEM -eq 0 && OIDLIST+="$OID_DSMVersion " +test $FLAG_TEMPERATURE -ne 0 && OIDLIST+="$OID_temp " -# --- check update -typeset -i DSMupdate=$(_get $OID_DSMUpdateAvailable) -_wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)" -if [ $DSMupdate -eq 1 ] ; then - ph.setStatus "warning" - healthString="$healthString, Update available" +if ! SNMPOUTPUT=$( $SNMPGET -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OIDLIST 2>&1 ); then + ph.abort "SNMP request failed. $SNMPOUTPUT" fi -_wd "" - - -# --- Check system status -systemStatus="$(_get $OID_systemStatus)" +SNMPOUTPUT=$( echo "$SNMPOUTPUT" | sed 's/^[ \t]*//;s/[ \t]*$//' ) -if [ "$systemStatus" != "1" ] ; then - if [ "$systemStatus" = "2" ] ; then systemStatus="Failed"; fi - ph.setStatus "critical" - healthString="$healthString, System status: $systemStatus " -else - systemStatus="Normal" -fi -_wd "System Status: $systemStatus" +_wd "" +# ---------------------------------------------------------------------- +# GENERATE OUTPUT +# ---------------------------------------------------------------------- -# --- Check power status -powerStatus="$(_get $OID_powerStatus)" +# ---------- check system data +if [ $FLAG_SYSTEM -ne 0 ]; then + model=$(_get $OID_model) + _wd "Synology model: $model" -if [ "$powerStatus" != "1" ] ; then - if [ "$powerStatus" = "2" ] ; then powerStatus="Failed"; fi - ph.setStatus "critical" - healthString="$healthString, Power status: $powerStatus " -else - powerStatus="Normal" -fi -_wd "Power Status: $powerStatus" + serialNumber=$(_get $OID_serialNumber) + _wd "Synology s/n: $serialNumber" + DSMVersion=$(_get $OID_DSMVersion) + _wd "DSM Version: $DSMVersion" -# --- Check system fan status -systemFanStatus=$(_get $OID_systemFanStatus) -if [ "$systemFanStatus" != "1" ] ; then - if [ "$systemFanStatus" = "2" ] ; then systemFanStatus="Failed"; fi - ph.setStatus "critical" - healthString="$healthString, System fan status: $systemFanStatus " -else - systemFanStatus="Normal" -fi -_wd "System Fan Status: $systemFanStatus" + _add_status "Synology $model (s/n: $serialNumber, $DSMVersion)" + # --- Check system status + systemStatus="$(_get $OID_systemStatus)" -# --- Check CPU fan status -CPUFanStatus=$(_get $OID_CPUFanStatus) -if [ "$CPUFanStatus" != "1" ] ; then - if [ "$CPUFanStatus" = "2" ] ; then CPUFanStatus="Failed"; fi - ph.setStatus "critical" - healthString="$healthString, CPU fan status: $CPUFanStatus " -else - CPUFanStatus="Normal" -fi -_wd "CPU Fan Status: $CPUFanStatus" + if [ "$systemStatus" != "1" ] ; then + if [ "$systemStatus" = "2" ] ; then systemStatus="Failed"; fi + ph.setStatus "critical" + _add_status "System status: $systemStatus " + else + systemStatus="Normal" + fi + _wd "System Status: $systemStatus" + # --- Check power status + powerStatus="$(_get $OID_powerStatus)" -# --- Show temperature -DeviceTemperature=$(_get $OID_temp) -# if [ $DeviceTemperature ... ] ; then -# ph.setStatus "critical" -# healthString="$healthString, CPU fan status: $CPUFanStatus °C" -# else -# CPUFanStatus="Normal" -# fi -_wd "NAS temperature: $DeviceTemperature °C" + if [ "$powerStatus" != "1" ] ; then + if [ "$powerStatus" = "2" ] ; then powerStatus="Failed"; fi + ph.setStatus "critical" + _add_status "Power status: $powerStatus " + else + powerStatus="Normal" + fi + _wd "Power Status: $powerStatus" -# --- Check all disk status -_wd "" -_wd "Number of disks: $nbDisk" -for i in $(seq 1 $nbDisk); -do - diskID[$i]=$(_get $OID_diskID.$(($i-1))) - diskModel[$i]=$(_get $OID_diskModel.$(($i-1))) - diskTemp[$i]=$(_get $OID_diskTemp.$(($i-1))) - idiskStatus=$(_get $OID_diskStatus.$(($i-1))) - - diskStatus[$i]=${aStatusDisk[$idiskStatus]} - - if [ ${idiskStatus} != "1" ] ; then + # --- Check system fan status + systemFanStatus=$(_get $OID_systemFanStatus) + if [ "$systemFanStatus" != "1" ] ; then + if [ "$systemFanStatus" = "2" ] ; then systemFanStatus="Failed"; fi ph.setStatus "critical" - healthString="$healthString, problem with ${diskID[$i]} (model:${diskModel[$i]}) status:${diskStatus[$i]} temperature:${diskTemp[$i]} °C" + _add_status "System fan status: $systemFanStatus " + else + systemFanStatus="Normal" fi - - _wd "${diskID[$i]} (model:${diskModel[$i]}) status: ${diskStatus[$i]} ($idiskStatus) temperature: ${diskTemp[$i]} °C" -done + _wd "System Fan Status: $systemFanStatus" -# --- Check all RAID volume status -_wd "" -_wd "Number of RAID volume: $nbRAID" -for i in $(seq 1 $nbRAID); -do - RAIDName[$i]=$(_get $OID_RAIDName.$(($i-1))) - iRAIDStatus=$(_get $OID_RAIDStatus.$(($i-1))) + # --- Check CPU fan status + CPUFanStatus=$(_get $OID_CPUFanStatus) + if [ "$CPUFanStatus" != "1" ] ; then + if [ "$CPUFanStatus" = "2" ] ; then CPUFanStatus="Failed"; fi + ph.setStatus "critical" + _add_status "CPU fan status: $CPUFanStatus " + else + CPUFanStatus="Normal" + fi + _wd "CPU Fan Status: $CPUFanStatus" + _wd "" +fi - # size in integer GB - iRAIDSize=$(( $(_get $OID_RAIDSize.$(($i-1))) / 1024/1024/1024 )) - iRAIDFree=$(( $(_get $OID_RAIDFree.$(($i-1))) / 1024/1024/1024 )) - iFree=$(( iRAIDFree*100/$iRAIDSize )) +# ---------- check temperature +if [ $FLAG_TEMPERATURE -ne 0 ]; then + # --- Show temperature + DeviceTemperature=$(_get $OID_temp) + _wd "NAS temperature: $DeviceTemperature °C" + _wd "" - RAIDStatus[$i]=${aStatusRaid[${iRAIDStatus}]} + if [ $FLAG_SINGLECHECK -ne 0 ]; then + _add_status "NAS temperature: $DeviceTemperature °C" + ph.perfadd "temp" "$DeviceTemperature" + fi +fi - if [ $iRAIDStatus != "1" ] ; then - ph.setStatus "critical" - healthString="$healthString, RAID status: ($RAIDName ): $RAIDStatus[$i] " +# ---------- check update +if [ $FLAG_UPDATE -ne 0 ]; then + typeset -i DSMupdate=$(_get $OID_DSMUpdateAvailable) + + # test $FLAG_SINGLECHECK -ne 0 || ( _wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"; _wd "" ) + _wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"; + _wd "" + if [ $DSMupdate -eq 1 ] ; then + ph.setStatus "warning" + _add_status "Update available" + else + test $FLAG_SINGLECHECK -ne 0 && _add_status "Up to date" fi - _wd "${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - size $iRAIDSize GB, free $iRAIDFree GB (${iFree}%)" -done +fi -# _wd ""; _wd "DEBUG: SNMPAUTH= $( echo "$SNMPAUTH" | sed 's#\-A [^\ ]*#-A **********#g' )" +# ---------- Check all disk status + volume + raid +if [ $FLAG_DISK -ne 0 ]; then + RAIDName=$(_get $OID_RAIDName) + RAIDStatus=$(_get $OID_RAIDStatus) + DISKOK=1 + + _wd "Number of disks: $nbDisk" + for i in $(seq 1 $nbDisk); + do + diskID[$i]=$(_get $OID_diskID.$(($i-1))) + diskModel[$i]=$(_get $OID_diskModel.$(($i-1))) + diskTemp[$i]=$(_get $OID_diskTemp.$(($i-1))) + idiskStatus=$(_get $OID_diskStatus.$(($i-1))) + + diskStatus[$i]=${aStatusDisk[$idiskStatus]} + + if [ "${idiskStatus}" != "1" ] ; then + ph.setStatus "critical" + DISKOK=0 + test $FLAG_SINGLECHECK -ne 0 && _add_status "NAS temperature: $DeviceTemperature °C" + _add_status "problem with ${diskID[$i]} (model:${diskModel[$i]}) status:${diskStatus[$i]} temperature:${diskTemp[$i]} °C" + fi + + _wd "${diskID[$i]} (model:${diskModel[$i]}) status: ${diskStatus[$i]} ($idiskStatus) temperature: ${diskTemp[$i]} °C" + done + + + # --- Check all RAID volume status + _wd "" + _wd "Number of RAID volume: $nbRAID" + for i in $(seq 1 $nbRAID); + do + RAIDName[$i]=$(_get $OID_RAIDName.$(($i-1))) + iRAIDStatus=$(_get $OID_RAIDStatus.$(($i-1))) + + # size in integer GB + iRAIDSize=$(( $(_get $OID_RAIDSize.$(($i-1))) / 1024/1024/1024 )) + iRAIDFree=$(( $(_get $OID_RAIDFree.$(($i-1))) / 1024/1024/1024 )) + iFree=$(( iRAIDFree*100/$iRAIDSize )) + + RAIDStatus[$i]=${aStatusRaid[${iRAIDStatus}]} + + if [ "$iRAIDStatus" != "1" ] ; then + DISKOK=0 + ph.setStatus "critical" + _add_status "RAID status: ($RAIDName ): $RAIDStatus[$i] " + fi + _wd "${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - size $iRAIDSize GB, free $iRAIDFree GB (${iFree}%)" + done + + if [ $DISKOK -eq 1 ]; then + test $FLAG_SINGLECHECK -ne 0 && _add_status "Disks and volumes are fine." + fi +fi # --- output status ph.status "$healthString"