Skip to content
Snippets Groups Projects

Synology check: allow complete check or single check(s)

Merged Hahn Axel (hahn) requested to merge 6338-snmpv3 into master
1 file
+ 239
149
Compare changes
  • Side-by-side
  • Inline
+ 239
149
@@ -3,7 +3,8 @@
# 30.04.2013 Nicolas Ordonez, Switzerland
# 08.08.2020 Axel Hahn: add update, community string
# 03.05.2023 v1.2 Axel Hahn: support Snmpv3 connections
#---------------------------------------------------
# 09.05.2023 v1.3 Axel Hahn: allow complete check or single check(s)
# ----------------------------------------------------------------------
# this plugin check the health of your Synology NAS
# - System status (Power, Fans)
# - Disks status
@@ -11,10 +12,10 @@
# - available updates
#
# Tested with DSM 6.2 + 6.4 + 7.1
#---------------------------------------------------
# ----------------------------------------------------------------------
# see docs:
# https://global.download.synology.com/download/Document/Software/DeveloperGuide/Firmware/DSM/All/enu/Synology_DiskStation_MIB_Guide.pdf
#---------------------------------------------------
# ----------------------------------------------------------------------
. $(dirname $0)/inc_pluginfunctions
. $(dirname $0)/check_snmp_includes
@@ -24,8 +25,9 @@
self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] )
self_APPVERSION=1.2
SNMPOUTPUT=
option_found=0
healthString=""
healthString=
verbose="no"
out=""
@@ -54,13 +56,20 @@ OID_RAIDStatus="${OID_syno}.3.1.1.3"
OID_RAIDFree="${OID_syno}.3.1.1.4"
OID_RAIDSize="${OID_syno}.3.1.1.5"
# --- status arrays to show results
# 0 1 2 3 4 5
aStatusUpgrade=( "???" Yes "Up to date" Connecting Disconnected Others)
aStatusDisk=( "???" Normal Initialized NotInitialized SystemPartitionFailed Crashed)
aStatusRaid=( "???" Normal Repairing Migrating Expanding Deleting Creating RaidSyncing RaidParityChecking RaidAssembling Canceling Degrade Crashed DataScrubbing RaidDeploying RaidUnDeploying RaidMountCache RaidExpandingUnfinishedSHR RaidConvertSHRToPool RaidMigrateSHR1ToSHR2 RaidUnknownStatus)
FLAG_SINGLECHECK=0
# available single checks
FLAG_SYSTEM=1
FLAG_DISK=1
FLAG_UPDATE=1
FLAG_TEMPERATURE=1
_self=$( basename $0 )
USAGE="
______________________________________________________________________
@@ -74,25 +83,39 @@ Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________
check cpu usage and cpu wait
Cpu infos are taken from output of top command.
On higher cpu usage it can show processes that cause cpu waits and
with most cpu consumption.
Check health of a Synology drive using SNMP.
SYNTAX:
$_self [options] -h TARGET
OPTIONS:
-a STRING
authentication params for snmpwalk/ snmpget to connect to target;
Authentication params for snmpwalk/ snmpget to connect to target;
default: \"-v2c -c public\" (Snmpv2 with community string \"public\")
-h SNMPTARGET
as fqdn or ip address; default: localhost
Set a target to connect as fqdn or ip address; default: localhost
-f FILE
read authentication from config file
Read authentication from config file. See section 'CONFIG FILE' below.
default: \"/etc/icinga2/snmp.cfg\"
-v detailed output
By default all checks will be executed. You can linit the executed checks
by naming single checks:
-s System check:
- Show system data: model, serial number, DSM version
- System status
- Power status
- System fan Status
- CPU fan status
-d Disk check:
- status and temperature of each hard disk
- status of all raid volumes
- free disk space
-u Update check; check switches to warning if an update is available
-t Temerature check
-v Enable detailed output of the checks. It is recommended for
system status and disk status.
CONFIG FILE:
The config file can be multiline and has the syntax
@@ -104,20 +127,25 @@ CONFIG FILE:
Example:
server-01.example.com:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-01
server-02.example.com:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-02
server-02.example.com,192.168.0.4:-v 3 -l authnoPriv -a SHA -u snmpmonitor -A password-for-server-02
EXAMPLE:
$_self -h server-01.example.com -v
Show Synology status of server-01 using connect data from /etc/icinga2/snmp.cfg
Show complete Synology status of server-01 using connect data from
/etc/icinga2/snmp.cfg
$_self -h server-01.example.com -v -f /opt/somewhere/snmp.conf
Show Synology status of server-01 using connect data from custom config
Show complete Synology status of server-01 using connect data from
custom config file
$_self -h server-01.example.com -u
Make a single check if update is available.
"
#---------------------------------------------------
# ----------------------------------------------------------------------
# FUNCTIONS
#---------------------------------------------------
# ----------------------------------------------------------------------
# --- write verbose text
_wd()
@@ -130,7 +158,7 @@ _wd()
# --- get a value from SNMP output data
# param string mib string
_get(){
echo "$syno" | grep "${1} " | cut -d "=" -f2 | cut -f 2- -d " "
echo "$SNMPOUTPUT" | grep "${1} " | cut -d "=" -f2 | cut -f 2- -d " "
}
# --- show usage
@@ -139,12 +167,29 @@ usage()
ph.abort "$USAGE"
}
# disable all flags to perform single checks
_disableflags(){
if [ $FLAG_SINGLECHECK -ne 1 ]; then
FLAG_SINGLECHECK=1
FLAG_SYSTEM=0
FLAG_DISK=0
FLAG_UPDATE=0
FLAG_TEMPERATURE=0
fi
}
# add something to the status line (1st line) in monitoring output
_add_status(){
test -n "$healthString" && healthString+=", "
healthString+="$*"
}
#---------------------------------------------------
# ----------------------------------------------------------------------
# MAIN
#---------------------------------------------------
# ----------------------------------------------------------------------
while getopts a:h:f:v OPTNAME; do
while getopts a:h:f:vsdut OPTNAME; do
case "$OPTNAME" in
a)
SNMPAUTH="$OPTARG"
@@ -154,11 +199,28 @@ while getopts a:h:f:v OPTNAME; do
;;
h)
SNMPTARGET="$OPTARG"
option_found=1
;;
v)
verbose="yes"
;;
# Flags:
s)
_disableflags
FLAG_SYSTEM=1
;;
t)
_disableflags
FLAG_TEMPERATURE=1
;;
u)
_disableflags
FLAG_UPDATE=1
;;
d)
_disableflags
FLAG_DISK=1
;;
*)
usage
;;
@@ -171,158 +233,186 @@ done
test -z "$SNMPAUTH" && read_config
# --- read raid and disks to get its single OIDs
nbDisk="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_diskID 2> /dev/null | wc -l )"
nbRAID="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_RAIDName 2> /dev/null | wc -l )"
for i in $(seq 1 $nbDisk);
do
OID_disk="$OID_disk $OID_diskID.$(($i-1)) $OID_diskModel.$(($i-1)) $OID_diskStatus.$(($i-1)) $OID_diskTemp.$(($i-1)) "
done
for i in $(seq 1 $nbRAID);
do
OID_RAID="$OID_RAID $OID_RAIDName.$(($i-1)) $OID_RAIDStatus.$(($i-1)) $OID_RAIDSize.$(($i-1)) $OID_RAIDFree.$(($i-1))"
done
# --- SNPGET to all wanted oids
syno=$($SNMPGET -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_model $OID_serialNumber $OID_DSMVersion $OID_DSMUpdateAvailable $OID_systemStatus $OID_powerStatus $OID_systemFanStatus $OID_CPUFanStatus $OID_temp $OID_disk $OID_RAID 2> /dev/null | sed 's/^[ \t]*//;s/[ \t]*$//')
if [ "$?" != "0" ] ; then
ph.abort "Problem with SNMP request"
if [ $FLAG_DISK -ne 0 ]; then
nbDisk="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_diskID 2> /dev/null | wc -l )"
nbRAID="$( $SNMPWALK -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OID_RAIDName 2> /dev/null | wc -l )"
for i in $(seq 1 $nbDisk);
do
OID_disk="$OID_disk $OID_diskID.$(($i-1)) $OID_diskModel.$(($i-1)) $OID_diskStatus.$(($i-1)) $OID_diskTemp.$(($i-1)) "
done
for i in $(seq 1 $nbRAID);
do
OID_RAID="$OID_RAID $OID_RAIDName.$(($i-1)) $OID_RAIDStatus.$(($i-1)) $OID_RAIDSize.$(($i-1)) $OID_RAIDFree.$(($i-1))"
done
fi
_wd ""
# --- check fetched data
model=$(_get $OID_model)
_wd "Synology model: $model"
serialNumber=$(_get $OID_serialNumber)
_wd "Synology s/n: $serialNumber"
DSMVersion=$(_get $OID_DSMVersion)
_wd "DSM Version: $DSMVersion"
healthString="Synology $model (s/n:$serialNumber, $DSMVersion)"
# --- SNPGET to all wanted oids
RAIDName=$(_get $OID_RAIDName)
RAIDStatus=$(_get $OID_RAIDStatus)
OIDLIST=""
test $FLAG_SYSTEM -ne 0 && OIDLIST+="$OID_model $OID_serialNumber $OID_DSMVersion $OID_systemStatus $OID_powerStatus $OID_systemFanStatus $OID_CPUFanStatus "
test $FLAG_DISK -ne 0 && OIDLIST+="$OID_disk $OID_RAID "
test $FLAG_UPDATE -ne 0 && OIDLIST+="$OID_DSMUpdateAvailable "
test $FLAG_UPDATE -ne 0 && test $FLAG_SYSTEM -eq 0 && OIDLIST+="$OID_DSMVersion "
test $FLAG_TEMPERATURE -ne 0 && OIDLIST+="$OID_temp "
# --- check update
typeset -i DSMupdate=$(_get $OID_DSMUpdateAvailable)
_wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"
if [ $DSMupdate -eq 1 ] ; then
ph.setStatus "warning"
healthString="$healthString, Update available"
if ! SNMPOUTPUT=$( $SNMPGET -OQne -t $SNMPTIMEOUT ${SNMPAUTH} $SNMPTARGET $OIDLIST 2>&1 ); then
ph.abort "SNMP request failed. $SNMPOUTPUT"
fi
_wd ""
# --- Check system status
systemStatus="$(_get $OID_systemStatus)"
SNMPOUTPUT=$( echo "$SNMPOUTPUT" | sed 's/^[ \t]*//;s/[ \t]*$//' )
if [ "$systemStatus" != "1" ] ; then
if [ "$systemStatus" = "2" ] ; then systemStatus="Failed"; fi
ph.setStatus "critical"
healthString="$healthString, System status: $systemStatus "
else
systemStatus="Normal"
fi
_wd "System Status: $systemStatus"
_wd ""
# ----------------------------------------------------------------------
# GENERATE OUTPUT
# ----------------------------------------------------------------------
# --- Check power status
powerStatus="$(_get $OID_powerStatus)"
# ---------- check system data
if [ $FLAG_SYSTEM -ne 0 ]; then
model=$(_get $OID_model)
_wd "Synology model: $model"
if [ "$powerStatus" != "1" ] ; then
if [ "$powerStatus" = "2" ] ; then powerStatus="Failed"; fi
ph.setStatus "critical"
healthString="$healthString, Power status: $powerStatus "
else
powerStatus="Normal"
fi
_wd "Power Status: $powerStatus"
serialNumber=$(_get $OID_serialNumber)
_wd "Synology s/n: $serialNumber"
DSMVersion=$(_get $OID_DSMVersion)
_wd "DSM Version: $DSMVersion"
# --- Check system fan status
systemFanStatus=$(_get $OID_systemFanStatus)
if [ "$systemFanStatus" != "1" ] ; then
if [ "$systemFanStatus" = "2" ] ; then systemFanStatus="Failed"; fi
ph.setStatus "critical"
healthString="$healthString, System fan status: $systemFanStatus "
else
systemFanStatus="Normal"
fi
_wd "System Fan Status: $systemFanStatus"
_add_status "Synology $model (s/n: $serialNumber, $DSMVersion)"
# --- Check system status
systemStatus="$(_get $OID_systemStatus)"
# --- Check CPU fan status
CPUFanStatus=$(_get $OID_CPUFanStatus)
if [ "$CPUFanStatus" != "1" ] ; then
if [ "$CPUFanStatus" = "2" ] ; then CPUFanStatus="Failed"; fi
ph.setStatus "critical"
healthString="$healthString, CPU fan status: $CPUFanStatus "
else
CPUFanStatus="Normal"
fi
_wd "CPU Fan Status: $CPUFanStatus"
if [ "$systemStatus" != "1" ] ; then
if [ "$systemStatus" = "2" ] ; then systemStatus="Failed"; fi
ph.setStatus "critical"
_add_status "System status: $systemStatus "
else
systemStatus="Normal"
fi
_wd "System Status: $systemStatus"
# --- Check power status
powerStatus="$(_get $OID_powerStatus)"
# --- Show temperature
DeviceTemperature=$(_get $OID_temp)
# if [ $DeviceTemperature ... ] ; then
# ph.setStatus "critical"
# healthString="$healthString, CPU fan status: $CPUFanStatus °C"
# else
# CPUFanStatus="Normal"
# fi
_wd "NAS temperature: $DeviceTemperature °C"
if [ "$powerStatus" != "1" ] ; then
if [ "$powerStatus" = "2" ] ; then powerStatus="Failed"; fi
ph.setStatus "critical"
_add_status "Power status: $powerStatus "
else
powerStatus="Normal"
fi
_wd "Power Status: $powerStatus"
# --- Check all disk status
_wd ""
_wd "Number of disks: $nbDisk"
for i in $(seq 1 $nbDisk);
do
diskID[$i]=$(_get $OID_diskID.$(($i-1)))
diskModel[$i]=$(_get $OID_diskModel.$(($i-1)))
diskTemp[$i]=$(_get $OID_diskTemp.$(($i-1)))
idiskStatus=$(_get $OID_diskStatus.$(($i-1)))
diskStatus[$i]=${aStatusDisk[$idiskStatus]}
if [ ${idiskStatus} != "1" ] ; then
# --- Check system fan status
systemFanStatus=$(_get $OID_systemFanStatus)
if [ "$systemFanStatus" != "1" ] ; then
if [ "$systemFanStatus" = "2" ] ; then systemFanStatus="Failed"; fi
ph.setStatus "critical"
healthString="$healthString, problem with ${diskID[$i]} (model:${diskModel[$i]}) status:${diskStatus[$i]} temperature:${diskTemp[$i]} °C"
_add_status "System fan status: $systemFanStatus "
else
systemFanStatus="Normal"
fi
_wd "${diskID[$i]} (model:${diskModel[$i]}) status: ${diskStatus[$i]} ($idiskStatus) temperature: ${diskTemp[$i]} °C"
done
_wd "System Fan Status: $systemFanStatus"
# --- Check all RAID volume status
_wd ""
_wd "Number of RAID volume: $nbRAID"
for i in $(seq 1 $nbRAID);
do
RAIDName[$i]=$(_get $OID_RAIDName.$(($i-1)))
iRAIDStatus=$(_get $OID_RAIDStatus.$(($i-1)))
# --- Check CPU fan status
CPUFanStatus=$(_get $OID_CPUFanStatus)
if [ "$CPUFanStatus" != "1" ] ; then
if [ "$CPUFanStatus" = "2" ] ; then CPUFanStatus="Failed"; fi
ph.setStatus "critical"
_add_status "CPU fan status: $CPUFanStatus "
else
CPUFanStatus="Normal"
fi
_wd "CPU Fan Status: $CPUFanStatus"
_wd ""
fi
# size in integer GB
iRAIDSize=$(( $(_get $OID_RAIDSize.$(($i-1))) / 1024/1024/1024 ))
iRAIDFree=$(( $(_get $OID_RAIDFree.$(($i-1))) / 1024/1024/1024 ))
iFree=$(( iRAIDFree*100/$iRAIDSize ))
# ---------- check temperature
if [ $FLAG_TEMPERATURE -ne 0 ]; then
# --- Show temperature
DeviceTemperature=$(_get $OID_temp)
_wd "NAS temperature: $DeviceTemperature °C"
_wd ""
RAIDStatus[$i]=${aStatusRaid[${iRAIDStatus}]}
if [ $FLAG_SINGLECHECK -ne 0 ]; then
_add_status "NAS temperature: $DeviceTemperature °C"
ph.perfadd "temp" "$DeviceTemperature"
fi
fi
if [ $iRAIDStatus != "1" ] ; then
ph.setStatus "critical"
healthString="$healthString, RAID status: ($RAIDName ): $RAIDStatus[$i] "
# ---------- check update
if [ $FLAG_UPDATE -ne 0 ]; then
typeset -i DSMupdate=$(_get $OID_DSMUpdateAvailable)
# test $FLAG_SINGLECHECK -ne 0 || ( _wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)"; _wd "" )
_wd "Update available: ${aStatusUpgrade[$DSMupdate]} ($DSMupdate)";
_wd ""
if [ $DSMupdate -eq 1 ] ; then
ph.setStatus "warning"
_add_status "Update available"
else
test $FLAG_SINGLECHECK -ne 0 && _add_status "Up to date"
fi
_wd "${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - size $iRAIDSize GB, free $iRAIDFree GB (${iFree}%)"
done
fi
# _wd ""; _wd "DEBUG: SNMPAUTH= $( echo "$SNMPAUTH" | sed 's#\-A [^\ ]*#-A **********#g' )"
# ---------- Check all disk status + volume + raid
if [ $FLAG_DISK -ne 0 ]; then
RAIDName=$(_get $OID_RAIDName)
RAIDStatus=$(_get $OID_RAIDStatus)
DISKOK=1
_wd "Number of disks: $nbDisk"
for i in $(seq 1 $nbDisk);
do
diskID[$i]=$(_get $OID_diskID.$(($i-1)))
diskModel[$i]=$(_get $OID_diskModel.$(($i-1)))
diskTemp[$i]=$(_get $OID_diskTemp.$(($i-1)))
idiskStatus=$(_get $OID_diskStatus.$(($i-1)))
diskStatus[$i]=${aStatusDisk[$idiskStatus]}
if [ "${idiskStatus}" != "1" ] ; then
ph.setStatus "critical"
DISKOK=0
test $FLAG_SINGLECHECK -ne 0 && _add_status "NAS temperature: $DeviceTemperature °C"
_add_status "problem with ${diskID[$i]} (model:${diskModel[$i]}) status:${diskStatus[$i]} temperature:${diskTemp[$i]} °C"
fi
_wd "${diskID[$i]} (model:${diskModel[$i]}) status: ${diskStatus[$i]} ($idiskStatus) temperature: ${diskTemp[$i]} °C"
done
# --- Check all RAID volume status
_wd ""
_wd "Number of RAID volume: $nbRAID"
for i in $(seq 1 $nbRAID);
do
RAIDName[$i]=$(_get $OID_RAIDName.$(($i-1)))
iRAIDStatus=$(_get $OID_RAIDStatus.$(($i-1)))
# size in integer GB
iRAIDSize=$(( $(_get $OID_RAIDSize.$(($i-1))) / 1024/1024/1024 ))
iRAIDFree=$(( $(_get $OID_RAIDFree.$(($i-1))) / 1024/1024/1024 ))
iFree=$(( iRAIDFree*100/$iRAIDSize ))
RAIDStatus[$i]=${aStatusRaid[${iRAIDStatus}]}
if [ "$iRAIDStatus" != "1" ] ; then
DISKOK=0
ph.setStatus "critical"
_add_status "RAID status: ($RAIDName ): $RAIDStatus[$i] "
fi
_wd "${RAIDName[$i]} status: ${RAIDStatus[$i]} ($iRAIDStatus) - size $iRAIDSize GB, free $iRAIDFree GB (${iFree}%)"
done
if [ $DISKOK -eq 1 ]; then
test $FLAG_SINGLECHECK -ne 0 && _add_status "Disks and volumes are fine."
fi
fi
# --- output status
ph.status "$healthString"
Loading