From 56f550a40490ebe20aa91123a88ba822cd2eb940 Mon Sep 17 00:00:00 2001
From: "Hahn Axel (hahn)" <axel.hahn@unibe.ch>
Date: Mon, 12 Jun 2023 15:19:35 +0200
Subject: [PATCH] detect failed and unknown vms

---
 check_onevm | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)
 mode change 100644 => 100755 check_onevm

diff --git a/check_onevm b/check_onevm
old mode 100644
new mode 100755
index 66547c1..3514385
--- a/check_onevm
+++ b/check_onevm
@@ -3,8 +3,7 @@
 #
 # Check ONEVM
 # show virtual machines
-#
-# requirements:
+## requirements:
 # - sudo onevm
 #
 # ----------------------------------------------------------------------
@@ -57,7 +56,10 @@ v$self_APPVERSION
 Licence: GNU GPL 3
 ______________________________________________________________________
 
-show count of vms in OpenNebula
+Show count of vms in OpenNebula - listed by state and by host.
+It will go to warning if a non running state was found.
+It will go to critical if a vm is on failure.
+
 
 SYNTAX:
 $(basename $0)
@@ -98,7 +100,7 @@ cmdout=$( sudo onevm list --csv 2>&1 )
 
 if ! grep "ID,USER" <<< "$cmdout" >/dev/null; then
         ph.setStatus "unknown"
-        echo "UNKNOWN: sudo onehost failed."
+        echo "UNKNOWN: sudo onevm failed."
         echo "$cmdout"
         ph.exit
 fi
@@ -111,17 +113,31 @@ csvdata=$( echo "$cmdout" | sed -n '2,$p' )
 out=""
 typeset -i iTotal;    iTotal=$(   echo "$csvdata" | wc -l )
 typeset -i iRunning;  iRunning=$( echo "$csvdata" | grep -c ",runn," )
-typeset -i iOther;    iOther=$iTotal-$iRunning
+typeset -i iFail;     iFail=$(    echo "$csvdata" | grep -c ",fail," )
+typeset -i iUnknown;  iUnknown=$( echo "$csvdata" | grep -c ",unkn," )
+typeset -i iOther;    iOther=$iTotal-$iRunning-$iFail-$iUnknown
 
 ph.perfadd "total"    "${iTotal}"
 ph.perfadd "running"  "${iRunning}"
-ph.perfadd "other"     "${iOther}"
+ph.perfadd "fail"     "${iFail}"
+ph.perfadd "unknown"  "${iUnknown}"
+ph.perfadd "other"    "${iOther}"
+
+if [ $iUnknown -ne 0 ]; then
+    ps.setStatus "warning"
+    out+="WARNING: a vm with status [Unknown] was found."
+fi
 
 if [ $iOther -ne 0 ]; then
     ps.setStatus "warning"
     out+="WARNING: There is a VM that has another status than [running]."
 fi
 
+if [ $iFail -ne 0 ]; then
+    ps.setStatus "critical"
+    out+="ERROR: a vm with status [Failed] was found."
+fi
+
 out+=$( 
 echo '>>>>>> By state'
 grep "^# STAT:" "$0" | while read -r line
-- 
GitLab