diff --git a/check_ceph_status b/check_ceph_status index d9c59ed286e1362fb35ed69ff63b827a7a42c476..aa18e5984925c9655b18048d483ea43919e63605 100755 --- a/check_ceph_status +++ b/check_ceph_status @@ -19,12 +19,15 @@ # 2021-03-31 v1.2 <axel.hahn@iml.unibe.ch> estimate remaining time of ceph recovery # 2021-04-12 v1.3 <axel.hahn@iml.unibe.ch> if degraded items are 0 delete init file too # 2023-04-24 v1.4 <axel.hahn@unibe.ch> update for newer ceph versions +# 2023-06-19 v1.5 <axel.hahn@unibe.ch> add help and param support; no more tmpfile # ====================================================================== . $(dirname $0)/inc_pluginfunctions -initfile=/tmp/ceph-status-not-ok-start -tmpfile=/tmp/ceph-status.out_$$ +initfile="/tmp/ceph-status-not-ok-start-$USER" + +self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] ) +self_APPVERSION=1.5 iSleep=3 doLoop=1 @@ -39,24 +42,52 @@ line="__________________________________________________________________________ # FUNCTIONS # ---------------------------------------------------------------------- +function showHelp(){ + _self=$(basename $0) +cat <<EOF +______________________________________________________________________ + +$self_APPNAME +v$self_APPVERSION + +(c) Institute for Medical Education - University of Bern +Licence: GNU GPL 3 +______________________________________________________________________ + +Show ceph health status. +The state of the check switches to warning if HEALTH_WARN was detected +and is error of other HEALTH values than HEALTH_WARN or HEALTH_OK. + +In the output is the complete output of the command "ceph status". + +If degraded objects are found it shows the progress of repair process. + +SYNTAX: +$_self + +OPTIONS: + -h or --help show this help. + +EOF +} + function readCephStatus(){ - sudo ceph status >$tmpfile - if [ $? -ne 0 ]; then - rm -f $tmpfile + if ! data=$( sudo /bin/ceph status 2>&1 ); then + echo "$data" ph.abort "UNKNOWN: ceph is not available or no sudo permissions to execute ceph commands." fi } function getCephStatus(){ - cat $tmpfile | grep "health:" | awk '{ print $2 }' + grep "health:" <<< "$data" | awk '{ print $2 }' } function getTotalObjects(){ - cat $tmpfile | grep "pgs:.*objects degraded" | awk '{ print $2 }' | cut -f 2 -d "/" + grep "pgs:.*objects degraded" <<< "$data" | awk '{ print $2 }' | cut -f 2 -d "/" } function getDegraded(){ - cat $tmpfile | grep "pgs:.*objects degraded" | awk '{ print $2 }' | cut -f 1 -d "/" + grep "pgs:.*objects degraded" <<< "$data" | awk '{ print $2 }' | cut -f 1 -d "/" } function getMisplaced(){ - cat $tmpfile | grep ".*objects misplaced" | awk '{ print $2 }' | cut -f 1 -d "/" + grep ".*objects misplaced" <<< "$data" | awk '{ print $2 }' | cut -f 1 -d "/" } @@ -64,6 +95,14 @@ function getMisplaced(){ # MAIN # ---------------------------------------------------------------------- +# --- check param -h +case "$1" in + "--help"|"-h") + showHelp + exit 0 + ;; + *) +esac readCephStatus if [ ! -f $initfile ]; then @@ -95,11 +134,11 @@ fi ph.status "Ceph status is $sCephStatus" echo -while [ ! "$sCephStatus" = "HEALTH_OK" -a $doLoop = 1 ]; do +while [ ! "$sCephStatus" = "HEALTH_OK" ] && [ $doLoop = 1 ]; do typeset -i iObjCount=$iDeg+$iMis - iTsNow=`date +%s` + iTsNow=$( date +%s ) typeset -i iDegNow=$(getDegraded) typeset -i iMisNow=$(getMisplaced) @@ -114,7 +153,7 @@ while [ ! "$sCephStatus" = "HEALTH_OK" -a $doLoop = 1 ]; do # typeset -i iDeltaMis2=$iLastMis-$iMisNow - if [ $iTsDelta -gt 0 ]; then + if [ $iDegNow+$iMisNow -gt 0 ] && [ $iTsDelta -gt 0 ]; then typeset -i iDegPerMin=$iDoneDeg/$iTsDelta*60 if [ $iDegPerMin -gt 0 ]; then # timeByDeg=`echo $iTsDelta*$iObjCount/$iDoneDeg/60 - $iTsDelta/60 | bc` @@ -180,9 +219,7 @@ while [ ! "$sCephStatus" = "HEALTH_OK" -a $doLoop = 1 ]; do printf "misplaced $tbl" "$iMis" "$iMisNow" "$iDoneMis" "$iMisPerMin" "$_timeByMis" printf "total $tbl" $iObjCount $iNowTotal $iDoneTotal " " "$sTimeLeft" echo $line - cat $tmpfile - rm -f $tmpfile 2>/dev/null - + echo "$data" if [ $doSingleLoop = 1 ]; then doLoop=0 @@ -194,12 +231,10 @@ while [ ! "$sCephStatus" = "HEALTH_OK" -a $doLoop = 1 ]; do done -test $doSingleLoop = 0 -o "$sCephStatus" = "HEALTH_OK" && cat $tmpfile +test $doSingleLoop = 0 -o "$sCephStatus" = "HEALTH_OK" && echo "$data" echo -echo --- DONE `date` +echo --- DONE $( date ) test "$sCephStatus" = "HEALTH_OK" && rm -f $initfile 2>/dev/null test $iDeg -eq 0 && rm -f $initfile 2>/dev/null -rm -f $tmpfile 2>/dev/null - ph.exit diff --git a/docs/20_Checks/_index.md b/docs/20_Checks/_index.md index b54fa7893243776184d8f099b8cacb4ee67a4430..aa243fb34e2ba46faf1b647e6b855bdd81ebbef9 100644 --- a/docs/20_Checks/_index.md +++ b/docs/20_Checks/_index.md @@ -12,7 +12,7 @@ There is one include script used by all checks: * [check_ceph_diskfree](check_ceph_diskfree.md) * [check_ceph_io](check_ceph_io.md) * [check_ceph_osd](check_ceph_osd.md) -* check_ceph_status +* [check_ceph_status](check_ceph_status.md) * check_clientbackup * check_couchdb-lb * [check_cpu](check_cpu.md) @@ -23,22 +23,28 @@ There is one include script used by all checks: * check_haproxy_health * check_haproxy_status * check_memory +* check_mysqlserver * check_netio * [check_netstat](check_netstat.md) * [check_onehost](check_onehost.md) * [check_onevm](check_onevm.md) * check_opencpu * check_packages2install +* check_php-fpm-status * check_proc_mem * check_proc_ressources * check_proc_zombie * [check_psqlserver](check_psqlserver.md) * [check_reboot_required](check_reboot_required.md) +* check_requirements * check_sensuplugins * check_smartstatus * [check_snmp_data](check_snmp_data.md) +* check_snmp_printer +* check_snmp_switch * [check_snmp_synology](check_snmp_synology.md) * check_ssl +* check_ssl_certs * check_systemdservices * check_timesync * check_uptime diff --git a/docs/20_Checks/check_ceph_diskfree.md b/docs/20_Checks/check_ceph_diskfree.md index c960504b219f787dd2c1ca8d56f6b21811fb329a..3b45879977102c874241bc17efe5707f52ae255e 100644 --- a/docs/20_Checks/check_ceph_diskfree.md +++ b/docs/20_Checks/check_ceph_diskfree.md @@ -1,4 +1,4 @@ -# check Ceph diskfree +# Check Ceph diskfree ## Introduction @@ -61,4 +61,4 @@ OK .mgr 1 1 18 MiB 6 18 MiB 0 16 TiB OK one 2 32 255 GiB 66.65k 255 GiB 0.50 16 TiB OK two 3 32 0 B 0 0 B 0 25 TiB |global-total=59373627899904;;;0;59373627899904 global-avail=58274116272128;;;0;59373627899904 global-used=823559979008;;;0;59373627899904 -... +``` diff --git a/docs/20_Checks/check_ceph_io.md b/docs/20_Checks/check_ceph_io.md index f406e88150418cf87c1ac6e2fd623823f34da697..5426c1f3611e34f31a6e394cfbc3016158c2d26d 100644 --- a/docs/20_Checks/check_ceph_io.md +++ b/docs/20_Checks/check_ceph_io.md @@ -1,4 +1,4 @@ -# check Ceph IO +# Check Ceph IO ## Introduction @@ -55,4 +55,4 @@ check_ceph_io -t " client: 255 B/s rd, 0 op/s rd, 0 op/s wr" ```txt OK: 13 KiB/s rd, 1.1 MiB/s wr, 130 op/s rd, 137 op/s wr |cephio-read=13K;; cephio-write=1.1M;; -... +``` diff --git a/docs/20_Checks/check_ceph_osd.md b/docs/20_Checks/check_ceph_osd.md index 84868caeab00260815f2e98dc7567494dca00efc..8855aecf4f6b1462fffa9fbdda75f704638b5ee5 100644 --- a/docs/20_Checks/check_ceph_osd.md +++ b/docs/20_Checks/check_ceph_osd.md @@ -92,4 +92,4 @@ ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF 22 ssd 1.74660 osd.22 up 1.00000 1.00000 28 ssd 1.74660 osd.28 up 1.00000 1.00000 |osd-total=30;;;0;30 osd-up=30;;;0;30 osd-down=0;;;0;30 -... +``` diff --git a/docs/20_Checks/check_ceph_status.md b/docs/20_Checks/check_ceph_status.md new file mode 100644 index 0000000000000000000000000000000000000000..47885682e87616f4b2c00df59473014f64e2bbb5 --- /dev/null +++ b/docs/20_Checks/check_ceph_status.md @@ -0,0 +1,67 @@ +# Check Ceph Status + +## Introduction + +**check_ceph_sttaus** is a plugin to show ceph status. +It switches the state in dependency of the HEALTH_* value. + +## Requirements + +* `ceph` binary and sudo permission on it to get the information + +## Syntax + +```txt +______________________________________________________________________ + +DEV_CHECK_CEPH_STATUS +v1.5 + +(c) Institute for Medical Education - University of Bern +Licence: GNU GPL 3 +______________________________________________________________________ + +Show ceph health status. +The state of the check switches to warning if HEALTH_WARN was detected +and is error of other HEALTH values than HEALTH_WARN or HEALTH_OK. + +In the output is the complete output of the command "ceph status". + +SYNTAX: +check_ceph_status + +OPTIONS: + -h or --help show this help. + +``` + +### Parameters + +(none) + +## Examples + +`$ check_ceph_status` returns + +```txt +OK: Ceph status is HEALTH_OK + + cluster: + id: 12345678-68e3-4c21-bf95-4b5f802f81a4 + health: HEALTH_OK + + services: + mon: 3 daemons, quorum cephmon1,cephmon2,cephmon3 (age 2w) + mgr: cephmon3(active, since 2w), standbys: cephmon2, cephmon1 + osd: 30 osds: 30 up (since 2w), 30 in (since 5w) + + data: + pools: 3 pools, 161 pgs + objects: 608.44k objects, 2.3 TiB + usage: 6.9 TiB used, 47 TiB / 54 TiB avail + pgs: 161 active+clean + + io: + client: 896 KiB/s rd, 12 MiB/s wr, 372 op/s rd, 401 op/s wr + +``` \ No newline at end of file diff --git a/readme.md b/readme.md index 6346c9e35959f0f146e4e388509d9d742fafbcae..974796e21f4f87c6acd5b2d0a448670f983f103c 100644 --- a/readme.md +++ b/readme.md @@ -8,51 +8,10 @@ This is a collection of our checks. They are used on Linux systems (Debian, Cent We use Icinga graphite module to show performance data. The templates are located in a sister repository - 📃 Sources: * Checks: <https://git-repo.iml.unibe.ch/iml-open-source/icinga-checks> * Graphs (Graphite): <https://git-repo.iml.unibe.ch/iml-open-source/icinga-graphite-templates> -📗 Docs: see [./docs/](./docs) folder \ -📜 Licence: GNU GPL 3.0 - -## Scripts - -There is one include script used by all checks: -[inc_pluginfunctions](inc_pluginfunctions.md) - -## Checks - -* check_apache_requests -* check_backup_one -* check_ceph_diskfree -* check_ceph_osd -* check_ceph_status -* check_clientbackup -* check_couchdb-lb -* check_cpu -* check_cronstatus -* check_disk-io -* check_dns_responsetime -* [check_eol](check_eol.md) -* check_haproxy_health -* check_haproxy_status -* check_memory -* check_netio -* check_netstat -* check_opencpu -* check_packages2install -* check_proc_mem -* check_proc_ressources -* check_proc_zombie -* check_reboot_required -* check_sensuplugins -* check_smartstatus -* [check_snmp_data](check_snmp_data.md) -* check_snmp_synology -* check_ssl -* check_systemdservices -* check_timesync -* check_uptime -* hello +📜 Licence: GNU GPL 3.0 \ +📗 Docs: see <https://os-docs.iml.unibe.ch/icinga-checks/> or [./docs/](./docs) folder