Skip to content
Snippets Groups Projects
Select Git revision
  • 470798e2c151720ee7f44f6ec6638045d6c1f3a5
  • master default protected
  • simple-task/7248-eol-check-add-node-22
  • 6877_check_iml_deployment
4 results

check_ceph_osd

Blame
  • check_ceph_osd 3.83 KiB
    #!/bin/bash
    # ======================================================================
    #
    # Icinga/ Nagios Check
    # CEPH OSD STATUS
    #
    # ----------------------------------------------------------------------
    #
    # REQUIREMENTS:
    #   - sudo permissions on ceph command
    #
    # SYNTAX:
    #   - check_ceph_status -w [count for warnming] -w [count for critical]
    #     No parameter required
    #
    # RESULT:
    #     OK        = all OSDs up
    #     WARNING   = 1 OSD is down
    #     CRITITCAL = min. 2 OSDs are down
    #     UNKNOWN   = ceph osd tree is not executable
    #
    # ----------------------------------------------------------------------
    # 2020-03-04  v1.0  <axel.hahn@iml.unibe.ch>
    # 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch>  added params -w -c 
    # 2020-03-05  v1.2  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
    # 2022-10-21  v1.3  <axel.hahn@unibe.ch>     remove grep: warning: stray \ before white space
    # 2023-04-24  v1.4  <axel.hahn@unibe.ch>     update for newer ceph versions
    # 2023-06-19  v1.5  <axel.hahn@unibe.ch>     add help and param support; no more tmpfile
    # ======================================================================
    
    . $(dirname $0)/inc_pluginfunctions
    
    self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] )
    self_APPVERSION=1.5
    
    # column number in output where to find the up/ down info
    iColUpDown=5
    
    # ----------------------------------------------------------------------
    # FUNCTIONS
    # ----------------------------------------------------------------------
    
    function showHelp(){
        _self=$(basename $0)
    cat <<EOF
    ______________________________________________________________________
    
    $self_APPNAME 
    v$self_APPVERSION
    
    (c) Institute for Medical Education - University of Bern
    Licence: GNU GPL 3
    ______________________________________________________________________
    
    Show cheph osd status: how many OSDs exist and how many are up/ down.
    This check sends performance data.
    
    On your cluster you might want to increase the values for warning and
    critical level.
    
    SYNTAX:
    $_self [-w WARN_LIMIT] [-c CRITICAL_LIMIT]
    
    OPTIONS:
        -h or --help   show this help.
        -w VALUE       warning level  (default: 1)
        -c VALUE       critical level (default: 2)
    
    EXAMPLE:
    $_self
        no parameters; normal usage to get the ceph osd status
    
    $_self -c 10
        change to critical level if 10 osds are down.
    
    EOF
    }
    
    # fetch lines of OSD entries only from output of ceph osd tree
    # global  string  data  output of command ceph osd tree
    function getOsd(){
        grep "^ *[0-9]" <<< "$data"
    }
    
    # ----------------------------------------------------------------------
    # MAIN
    # ----------------------------------------------------------------------
    # --- check param -h
    case "$1" in
        "--help"|"-h")
            showHelp
            exit 0
            ;;
        *)
    esac
    
    data=$( sudo /bin/ceph osd tree 2>&1 )
    if [ $? -ne 0 ]; then
        ph.abort "UNKNOWN: ceph is not available or no sudo permissions to execute ceph commands."
    fi
    
    # set default / override from command line params
    typeset -i iWarnLimit=$(     ph.getValueWithParam 1 w "$@")
    typeset -i iCriticalLimit=$( ph.getValueWithParam 2 c "$@")
    
    typeset -i iOsdTotal=$( getOsd | wc -l)
    typeset -i iOsdDown=$(  getOsd | awk '{ print $iColUpDown }' | grep "down" | wc -l)
    typeset -i iOsdUp=$(    getOsd | awk '{ print $iColUpDown }' | grep "up"   | wc -l)
    
    
    if [ $iOsdDown -lt $iWarnLimit ]; then
        ph.setStatus "ok"
    else
        if [ $iOsdDown -ge $iCriticalLimit ]; then
            ph.setStatus "critical"
        else
            ph.setStatus "warning"
        fi
    fi
    
    ph.status "Check of available OSDs - $iOsdTotal OSDs total .. $iOsdUp up .. $iOsdDown down (Limits: warn at $iWarnLimit; critical $iCriticalLimit)"
    echo "$data"
    
    ph.perfadd "osd-total"    "${iOsdTotal}"  "" "" 0 ${iOsdTotal}
    ph.perfadd "osd-up"       "${iOsdUp}"     "" "" 0 ${iOsdTotal}
    ph.perfadd "osd-down"     "${iOsdDown}"   "" "" 0 ${iOsdTotal}
    
    ph.exit
    
    # ----------------------------------------------------------------------