Skip to content
Snippets Groups Projects
check_systemdservices 5.29 KiB
#!/bin/bash
# ================================================================================
#
# IML SENSU CHECK :: CHECK RUNNING SYSTEMD SERVICES
#
# (1)
# shows overview of important services - edit check_systemdservices.cfg to define
# them
#
# (2)
# resturns
# - OK      if all systemd servises are running
# - UNKNOWN if systemctl command is not available
# - ERROR   if any systemd service is not running
#
# -------------------------------------------------------------------------------
# 2019-10-24  v0.9  <axel.hahn@iml.unibe.ch>
# 2019-10-29  v1.0  <axel.hahn@iml.unibe.ch>
# 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
# 2020-04-02  v1.2  <axel.hahn@iml.unibe.ch> added ph.execIfReady in _init
# 2021-10-21  v1.3  <axel.hahn@iml.unibe.ch> remove starting spaces in systemctl output
# 2022-04-14  v1.4  <axel.hahn@iml.unibe.ch> remove any special chars at beginning of line
# ================================================================================

. `dirname $0`/inc_pluginfunctions

cfgfile="$0.cfg"
ignorefile="$0.ignore"

tmpServices=/tmp/`basename $0`-services.txt
tmpErrors=/tmp/`basename $0`-errors.txt
tmpOut=/tmp/`basename $0`-out.txt
tmpOutOther=/tmp/`basename $0`-out-other.txt

typeset -i iErrors=0
typeset -i iOk=0
typeset -i iTotal=0
sErrorServices=""

# -------------------------------------------------------------------------------
# FUNCTIONS
# -------------------------------------------------------------------------------

# INIT: load list of systemd services and delete tmp files
# remark systemctl --type ... moves output to firstcolums
# params: none
function _init(){
  rm -f ${tmpErrors} ${tmpOut} ${tmpOutOther} 2>/dev/null
  # systemctl --no-legend --no-pager --all --type service >${tmpServices}
  # systemctl --no-legend --no-pager --type service >${tmpServices}
  ph.execIfReady "systemctl --no-legend --no-pager --type service" | sed "s#^[^a-zA-Z0-9]*##g" | grep -vf ${ignorefile} >${tmpServices}
}

# get wanted services from service list
function _getKnownServices(){
  cat ${tmpServices} | grep -f $cfgfile
}

# get service name only; returns "[servicename].service"
function _getKnownServicenames(){
  _getKnownServices | awk '{print $1}' | sed 's#\\#\\\\#g'
}

# get all services from service list excluding the wanted
function _getOtherServices(){
  cat ${tmpServices} | grep -vf $cfgfile
}

# get service name only; returns "[servicename].service"
function _getOtherServicenames(){
  _getOtherServices | awk '{print $1}' | sed 's#\\#\\\\#g'
}

# -------------------------------------------------------------------------------

# parse the line if a service
# and fill vars
# param 1 {string}  [servicename].service to fetch a single line
function _parseServiceLine(){
  line=`cat ${tmpServices} | grep "^${1}"`

  UNIT=`echo $line        | awk '{print $1}'`
  LOAD=`echo $line        | awk '{print $2}'`
  ACTIVE=`echo $line      | awk '{print $3}'`
  SUB=`echo $line         | awk '{print $4}'`
  DESCRIPTION=`echo $line | awk '{$1=$2=$3=$4=""; print $0}'`
}
# called on error: fetch error details of a service
function _systectlStatus(){
  echo "----- $1" >>$tmpErrors
  systemctl -l status "${1}" >>$tmpErrors
  echo >>$tmpErrors
}

# preocess a list of service names and
# - add status in $tmpOut
# - collect status of all failed services
# param 1..N  {string}  list of service names
function processServicelines(){
  for service in $*
  do
    iTotal=$iTotal+1
   _parseServiceLine "${service}"

    # if [ "$LOAD $ACTIVE $SUB" = "loaded active running" ]; then
   if [ "$LOAD $ACTIVE" = "loaded active" ]; then
     iOk=$iOk+1
     sStatus="OK"
    else
     iErrors=$iErrors+1
     sStatus="ERROR"
     sErrorServices="${sErrorServices} ${service}"
     _systectlStatus "${service}"
    fi

   echo "$sStatus	$LOAD	$ACTIVE [$SUB]	$UNIT: $DESCRIPTION" >>$tmpOut

done


}

# -------------------------------------------------------------------------------
# MAIN
# -------------------------------------------------------------------------------

_init

# ---------- process services

processServicelines `_getOtherServicenames`
mv ${tmpOut} ${tmpOutOther}

processServicelines `_getKnownServicenames`
typeset -i iKnownOk=`cat $tmpOut | grep "^OK" | wc -l`
typeset -i iKnownError=`cat $tmpOut | grep "^ERROR" | wc -l`

if [ $iTotal -gt 0 ]; then
  ph.setStatus "ok"
  if [ $iErrors -ne 0 ]; then
    ph.setStatus "critical"
  fi
else
  ph.setStatus "unknown"
fi

# ---------- output

# first line is visible in uchiwa overview page
ph.status "$iErrors of $iTotal services failed (known: $iKnownOk OK, $iKnownError errors) ${sErrorServices}"
echo
echo --- Overview of most relevant services
cat $tmpOut | sort

echo
echo --- Other failed services:
cat $tmpOutOther | sort | grep "^ERROR" || echo "None."

echo
echo ----- Status
echo "Total            : $iTotal services"
echo "OK               : $iOk"
echo "Errors           : $iErrors"
echo "known OK         : $iKnownOk"
echo "known with errors: $iKnownError"
echo

if [ $iErrors -ne 0 ]; then
  echo Below follows the status of each failed service
  echo
  cat $tmpErrors
  echo
fi

if [ $iTotal -eq 0 ]; then
  echo WARNING: no service was found. Check
  echo bash -vx $0
  echo
fi

echo
echo --- INFO: Ignored services:
ls -l $ignorefile
cat $ignorefile


# ---------- cleanup & exit

rm -f ${tmpServices} ${tmpErrors} ${tmpOut} ${tmpOutOther} 2>/dev/null

ph.exit