-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_ceph_osd 3.83 KiB
#!/bin/bash
# ======================================================================
#
# Icinga/ Nagios Check
# CEPH OSD STATUS
#
# ----------------------------------------------------------------------
#
# REQUIREMENTS:
# - sudo permissions on ceph command
#
# SYNTAX:
# - check_ceph_status -w [count for warnming] -w [count for critical]
# No parameter required
#
# RESULT:
# OK = all OSDs up
# WARNING = 1 OSD is down
# CRITITCAL = min. 2 OSDs are down
# UNKNOWN = ceph osd tree is not executable
#
# ----------------------------------------------------------------------
# 2020-03-04 v1.0 <axel.hahn@iml.unibe.ch>
# 2020-03-05 v1.1 <axel.hahn@iml.unibe.ch> added params -w -c
# 2020-03-05 v1.2 <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2022-10-21 v1.3 <axel.hahn@unibe.ch> remove grep: warning: stray \ before white space
# 2023-04-24 v1.4 <axel.hahn@unibe.ch> update for newer ceph versions
# 2023-06-19 v1.5 <axel.hahn@unibe.ch> add help and param support; no more tmpfile
# 2023-07-27 v1.6 <axel.hahn@unibe.ch> shorten ceph exec; show output on error; shell fixes
# 2023-10-20 v1.7 <axel.hahn@unibe.ch> harden sudo command execution
# ======================================================================
. $(dirname $0)/inc_pluginfunctions
export self_APPVERSION=1.7
# column number in output where to find the up/ down info
iColUpDown=5
# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
function showHelp(){
local _self; _self=$(basename $0)
cat <<EOF
$( ph.showImlHelpHeader )
Show cheph osd status: how many OSDs exist and how many are up/ down.
This check sends performance data.
On your cluster you might want to increase the values for warning and
critical level.
SYNTAX:
$_self [-w WARN_LIMIT] [-c CRITICAL_LIMIT]
OPTIONS:
-h or --help show this help.
-w VALUE warning level (default: 1)
-c VALUE critical level (default: 2)
EXAMPLE:
$_self
no parameters; normal usage to get the ceph osd status
$_self -c 10
change to critical level if 10 osds are down.
EOF
}
# fetch lines of OSD entries only from output of ceph osd tree
# global string data output of command ceph osd tree
function getOsd(){
grep "^ *[0-9]" <<< "$data"
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
# --- check param -h
case "$1" in
"--help"|"-h")
showHelp
exit 0
;;
*)
esac
# --- check required tools
ph.require ceph
if ! data=$( sudo -n /bin/ceph osd tree 2>&1 ); then
ph.abort "UNKNOWN: No sudo permissions to execute ceph commands."
fi
# set default / override from command line params
typeset -i iWarnLimit; iWarnLimit=$( ph.getValueWithParam 1 w "$@")
typeset -i iCriticalLimit; iCriticalLimit=$( ph.getValueWithParam 2 c "$@")
typeset -i iOsdTotal; iOsdTotal=$( getOsd | wc -l)
typeset -i iOsdDown; iOsdDown=$( getOsd | awk '{ print $iColUpDown }' | grep "down" | wc -l)
typeset -i iOsdUp; iOsdUp=$( getOsd | awk '{ print $iColUpDown }' | grep "up" | wc -l)
if [ $iOsdDown -lt $iWarnLimit ]; then
ph.setStatus "ok"
else
if [ $iOsdDown -ge $iCriticalLimit ]; then
ph.setStatus "critical"
else
ph.setStatus "warning"
fi
fi
ph.status "Check of available OSDs - $iOsdTotal OSDs total .. $iOsdUp up .. $iOsdDown down (Limits: warn at $iWarnLimit; critical $iCriticalLimit)"
echo "$data"
ph.perfadd "osd-total" "${iOsdTotal}" "" "" 0 ${iOsdTotal}
ph.perfadd "osd-up" "${iOsdUp}" "" "" 0 ${iOsdTotal}
ph.perfadd "osd-down" "${iOsdDown}" "" "" 0 ${iOsdTotal}
ph.exit
# ----------------------------------------------------------------------