Skip to content
Snippets Groups Projects
Select Git revision
  • a9dced742858074b5e1300605493546952a7028c
  • master default protected
  • simple-task/7248-eol-check-add-node-22
  • 6877_check_iml_deployment
4 results

check_smartstatus

Blame
  • check_ceph_status 7.55 KiB
    #!/bin/bash
    # ======================================================================
    #
    # Icinga/ Nagios Check
    # CEPH STATUS / HEALTH
    #
    # ----------------------------------------------------------------------
    #
    # REQUIREMENTS:
    #   - ceph and sudo permissions on it
    #
    # SYNTAX:
    #   - check_ceph_status
    #     No parameter required
    #
    # ----------------------------------------------------------------------
    # 2020-03-04  v1.0  <axel.hahn@iml.unibe.ch>
    # 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch>  switch to ph.* helper functions
    # 2021-03-31  v1.2  <axel.hahn@iml.unibe.ch>  estimate remaining time of ceph recovery
    # 2021-04-12  v1.3  <axel.hahn@iml.unibe.ch>  if degraded items are 0 delete init file too
    # 2023-04-24  v1.4  <axel.hahn@unibe.ch>      update for newer ceph versions
    # 2023-06-19  v1.5  <axel.hahn@unibe.ch>      add help and param support; no more tmpfile
    # 2023-07-27  v1.6  <axel.hahn@unibe.ch>      update help page
    # ======================================================================
    
    . $(dirname $0)/inc_pluginfunctions
    
    export self_APPVERSION=1.6
    
    initfile="/tmp/ceph-status-not-ok-start-$USER"
    
    iSleep=3
    doLoop=1
    doSingleLoop=1
    
    # tbl="|%10s |%18s |%10s |%10s |%10s |%10s\n" 
    tbl="|%10s |%10s |%10s |%10s | %s\n" 
    
    line="____________________________________________________________________________________________________________"
    
    # ----------------------------------------------------------------------
    # FUNCTIONS
    # ----------------------------------------------------------------------
    
    function showHelp(){
        local _self; _self=$(basename $0)
    cat <<EOF
    ______________________________________________________________________
    
    $self_APPNAME 
    v$self_APPVERSION
    
    (c) Institute for Medical Education - University of Bern
    Licence: GNU GPL 3
    ______________________________________________________________________
    
    Show ceph health status.
    The state of the check switches to warning if HEALTH_WARN was detected
    and is error of other HEALTH values than HEALTH_WARN or HEALTH_OK.
    
    In the output is the complete output of the command "ceph status".
    
    If degraded objects are found it shows the progress of repair process.
    
    SYNTAX:
    $_self
    
    OPTIONS:
        -h or --help   show this help.
    
    EOF
    }
    
    function readCephStatus(){
            if ! data=$( sudo /bin/ceph status 2>&1 ); then
                    echo "$data"
                    ph.abort "UNKNOWN: ceph is not available or no sudo permissions to execute ceph commands."
            fi
    }
    function getCephStatus(){
            grep "health:" <<< "$data" | awk '{ print $2 }' 
    }
    function getTotalObjects(){
            grep "pgs:.*objects degraded"  <<< "$data" | awk '{ print $2 }' | cut -f 2 -d "/"
    }
    function getDegraded(){
            grep "pgs:.*objects degraded"  <<< "$data" | awk '{ print $2 }' | cut -f 1 -d "/"
    }
    function getMisplaced(){
            grep ".*objects misplaced"     <<< "$data" | awk '{ print $2 }' | cut -f 1 -d "/"
    }
    
    
    # ----------------------------------------------------------------------
    # MAIN
    # ----------------------------------------------------------------------
    
    # --- check param -h
    case "$1" in
        "--help"|"-h")
            showHelp
            exit 0
            ;;
        *)
    esac
    
    readCephStatus
    if [ ! -f $initfile ]; then
            echo "
                    sStart=\"$(date)\"
                    iTsStart=$(date +%s)
                    typeset -i iDeg=$(getDegraded)
                    typeset -i iMis=$(getMisplaced)
    
            ">$initfile
    fi
    . $initfile
    
    iLastDeg=$iDeg
    iLastMis=$iMis
    
    sCephStatus=$(getCephStatus)
    
    if [ "$sCephStatus" = "HEALTH_OK" ]; then
            ph.setStatus "ok"
    else
            if [ "$sCephStatus" = "HEALTH_WARN" ]; then
                    ph.setStatus "warning"
            else
                    ph.setStatus "critical"
            fi
    fi
    
    ph.status "Ceph status is $sCephStatus"
    echo
    
    while [ ! "$sCephStatus" = "HEALTH_OK" ] && [ $doLoop = 1 ]; do
    
            typeset -i iObjCount=$iDeg+$iMis
    
            iTsNow=$( date +%s )
    
            typeset -i iDegNow=$(getDegraded)
            typeset -i iMisNow=$(getMisplaced)
    
            typeset -i iTsDelta=$iTsNow-$iTsStart
            typeset -i iTsDeltaMin=$iTsDelta/60
    
            typeset -i iDoneDeg=$iDeg-$iDegNow
            typeset -i iDoneMis=$iMis-$iMisNow
    
            # typeset -i iDeltaDeg2=$iLastDeg-$iDegNow
            # typeset -i iDeltaMis2=$iLastMis-$iMisNow
    
    
            if [ $iDegNow+$iMisNow -gt 0 ]  && [ $iTsDelta -gt 0 ]; then
                    typeset -i iDegPerMin=$iDoneDeg/$iTsDelta*60
                    if [ $iDegPerMin -gt 0 ]; then
                            # timeByDeg=`echo $iTsDelta*$iObjCount/$iDoneDeg/60 - $iTsDelta/60 | bc`
                            timeByDeg=$(echo $iDegNow/$iDegPerMin | bc)
    
                    else
                            timeByDeg="???"
                    fi
    
                    typeset -i iMisPerMin=$iDoneMis/$iTsDelta*60
                    if [ $iMisPerMin -gt 0 ]; then
                            # timeByMis=`echo $iTsDelta*$iObjCount/$iDoneMis/60 - $iTsDelta/60 | bc`
                            timeByMis=$(echo $iMisNow/$iMisPerMin | bc)
                    else
                            timeByMis="???"
                    fi
                    if [ $iDegPerMin -gt 0 -o $iMisPerMin -gt 0 ]; then
                            sTimeLeft=$(echo "($iDegNow+$iMisNow)/($iDegPerMin+$iMisPerMin)" | bc)
                            if [ $sTimeLeft -gt 120 ]; then
                                    sTimeLeft="$sTimeLeft min ... about $(echo $sTimeLeft/60 | bc) h"
                            else
                                    sTimeLeft="$sTimeLeft min"
                            fi
                    else
                            sTimeLeft="???"
                    fi
            fi
            typeset -i iDoneTotal=$iDoneDeg+$iDoneMis
            typeset -i iNowTotal=$iDegNow+$iMisNow
            typeset -i iProgress=$(echo "$iDoneTotal*100/$iObjCount" | bc)
            if [ "$eater" = "o" ]; then
                    eater="C"
            else
                    eater="o"
            fi
            iLastDeg=$iDegNow
            iLastMis=$iMisNow
    
    
            # ----- output
    
            test $doSingleLoop = 1 || clear
            echo "Problem detected on $sStart ... running for $iTsDeltaMin min"
    
            # --- progress bar
            echo -n "["
            # printf '.%.0s' {1..100}
            # echo -n "] $iProgress %"
            # printf "\r["
            for ((j = 0 ; j < $iProgress ; j++)); do printf "#"; done
            printf "$eater"
            for ((j = $iProgress ; j<100; j++)); do printf "."; done
            echo -n "] $iProgress %"
            echo
    
            # echo $line
            # printf "           $tbl" "on start"  "delta last $iSleep s" "now"       "done"       "= per min"   "time left"
            # printf "degraded   $tbl" "$iDeg"     "$iDeltaDeg2"          "$iDegNow" "$iDoneDeg"   "$iDegPerMin" "$_timeByDeg"
            # printf "misplaced  $tbl" "$iMis"     "$iDeltaMis2"          "$iMisNow" "$iDoneMis"   "$iMisPerMin" "$_timeByMis"
            # printf "total      $tbl" $iObjCount  " "                     $iNowTotal  $iDoneTotal  " "           "$sTimeLeft"
            printf "           $tbl" "on start"  "now"       "done"       "= per min"   "time left"
            printf "degraded   $tbl" "$iDeg"     "$iDegNow" "$iDoneDeg"   "$iDegPerMin" "$_timeByDeg"
            printf "misplaced  $tbl" "$iMis"     "$iMisNow" "$iDoneMis"   "$iMisPerMin" "$_timeByMis"
            printf "total      $tbl" $iObjCount  $iNowTotal  $iDoneTotal  " "           "$sTimeLeft"
            echo $line
            echo "$data"
    
            if [ $doSingleLoop = 1 ]; then
                    doLoop=0
            else
                    sleep $iSleep
                    readCephStatus
                    sCephStatus=$(getCephStatus)
            fi
    
    done
    
    test $doSingleLoop = 0 -o "$sCephStatus" = "HEALTH_OK" && echo "$data"
    echo
    echo --- DONE $( date )
    test "$sCephStatus" = "HEALTH_OK" && rm -f $initfile 2>/dev/null
    test $iDeg -eq 0 && rm -f $initfile 2>/dev/null
    
    ph.exit