Skip to content
Snippets Groups Projects
check_ceph_diskfree 4.20 KiB
#!/bin/bash
# ======================================================================
#
# Icinga/ Nagios Check
# CEPH DISKFREE
#
# ----------------------------------------------------------------------
#
# REQUIREMENTS:
#   - ceph
#
# SYNTAX:
#   - check_ceph_diskfree [-w WARNINGLIMIT] [-c CRITICALLIMIT]
#     WARNINGLIMIT   integer  usage limit when to warn; default: 70
#     CRITICALLIMIT  integer  usage limit when to alert; default: 90
#
# ----------------------------------------------------------------------
# 2020-03-04  v1.0  <axel.hahn@iml.unibe.ch>
# 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch>  switch to ph.* helper functions
# 2023-04-24  v1.2  <axel.hahn@unibe.ch>      update for newer ceph versions
# 2023-05-09  v1.3  <axel.hahn@unibe.ch>      add help
# 2023-06-19  v1.4  <axel.hahn@unibe.ch>      no more tmpfile
# 2023-07-27  v1.5  <axel.hahn@unibe.ch>      update help page
# 2023-10-20  v1.6  <axel.hahn@unibe.ch>      harden sudo command execution
# ======================================================================

. $(dirname $0)/inc_pluginfunctions

export self_APPVERSION=1.6

typeset -i iWarning=0
typeset -i iCritical=0


function showHelp(){
    local _self; _self=$(basename $0)
    cat <<EOF
$( ph.showImlHelpHeader )

Show available and free space on a ceph cluster.
This check sends performance data.

It uses 'ceph df' to parse data.

SYNTAX:
$(basename $0)

OPTIONS:
    -h or --help   show this help.

EOF
}

# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------

# --- check param -h
case "$1" in
    "--help"|"-h")
        showHelp
        exit 0
        ;;
    *)
esac

# --- check required tools
ph.require ceph

# set default / override from command line params
typeset -i iWarnLimit=$(     ph.getValueWithParam 70 w "$@")
typeset -i iCriticalLimit=$( ph.getValueWithParam 90 c "$@")

if ! data=$( sudo -n /bin/ceph df 2>&1 )
then
    ph.abort "UNKNOWN: No sudo permissions to execute ceph commands."
fi

isHeader=0
area=

out=$( 
grep "[a-zA-Z]" <<< "$data" | while read line
do
    newArea=$(echo $line | grep "^\-\-\- [A-Z]*" | sed "s#--- ##" | sed "s# ---##" )
    lineStatus="          "
    if [ -n "$newArea" ]; then
        echo
        area=$(echo "$newArea" | cut -f 1 -d ":")
        isHeader=1

        # Position of disk usage in the sections
        iPos=0
        test "$area" = "RAW STORAGE" && iPos=10
        test "$area" = "POOLS"       && iPos=9
        if [ $iPos -eq 0 ]; then
            ph.abort "ERROR: unhandled section: [$area]"
        fi

    else
        if [ $isHeader -eq 1 ]; then
            isHeader=0
        else
            typeset -i percentUsed
            percentUsed=$(echo $line | awk -v ipos=$iPos '{ print $ipos }' | sed "s#\..*##")
            if [ $percentUsed -ge $iWarnLimit ]; then
                if [ $percentUsed -ge $iCriticalLimit ]; then
                    lineStatus="CRITICAL  "
                else
                    lineStatus="WARNING   "
                fi
            else
                lineStatus="OK        "
            fi
        fi
    fi
    echo "$lineStatus $line"
done
)

iWarning=$(  grep -c "^WARNING"  <<< "$out" )
iCritical=$( grep -c "^CRITICAL" <<< "$out" )

if [ $iCritical -gt 0 ]; then
    ph.setStatus "critical"
elif [ $iWarning -gt 0 ]; then
    ph.setStatus "warning"
else
    ph.setStatus "ok"
fi

ph.status "Disksize on Ceph cluster and its pools - warnings: $iWarning ($iWarnLimit %) .. critcal: $iCritical ($iCriticalLimit %)"
echo "$out"

# global size status is in TOTAL...
totalLine="$( grep '^TOTAL' <<< "$data" | sed 's#iB##g' )"

# echo "DEBUG: totalLine = $totalLine"
# DEBUG: totalLine = TOTAL  18 T  18 T  428 G   428 G       2.30

sTotal=$( echo $totalLine | awk '{ print $2 $3 }' )
sAvail=$( echo $totalLine | awk '{ print $4 $5 }' )
sUsed=$( echo $totalLine | awk '{ print $6 $7 }' )

iTotal=$(ph.toUnit $sTotal "")
iAvail=$(ph.toUnit $sAvail "")
iUsed=$( ph.toUnit $sUsed "")

ph.perfadd "global-total"    "${iTotal}"    "" "" 0 ${iTotal}
ph.perfadd "global-avail"    "${iAvail}"    "" "" 0 ${iTotal}
ph.perfadd "global-used"     "${iUsed}"     "" "" 0 ${iTotal}

ph.exit

# ----------------------------------------------------------------------