-
Hahn Axel (hahn) authoredHahn Axel (hahn) authored
check_php-fpm-status 9.55 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: php-fpm requests
#
# ----------------------------------------------------------------------
# script checks output of fpm "/status" and counts scoreboard chars
# ----------------------------------------------------------------------
# 2021-09-22 v0.1 <axel.hahn@iml.unibe.ch> initial version
# 2021-10-01 v0.2 <axel.hahn@iml.unibe.ch> fetch full status as json
# 2021-12-23 v0.4 <axel.hahn@iml.unibe.ch> remove switch to warning level if just one slow request was detected
# 2022-02-11 v0.5 <axel.hahn@iml.unibe.ch> show running workers in 1st line
# 2022-04-01 v0.6 <axel.hahn@iml.unibe.ch> use wget default params; shell fixes
# 2022-05-09 v0.7 <axel.hahn@iml.unibe.ch> use wget default params
# 2022-07-08 v0.8 <axel.hahn@iml.unibe.ch> eliminate division by 0
# 2022-10-21 v1.3 <axel.hahn@unibe.ch> remove grep: warning: stray \ before white space
# ======================================================================
. $(dirname $0)/inc_pluginfunctions
tmpfile=/tmp/check_fpm_processes_1
defaulturl=localhost/status
sDeltaunit="min"
# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------
# get service data from json output
function _getServicedata(){
cat $tmpfile | jq | grep '^ "' | grep -v "\[" | cut -f 1 -d ","
}
function _getWorkerStates(){
cat $tmpfile | jq | grep '"state": ' | cut -f 2 -d ":" | cut -f 1 -d "," | sort -u
}
function _getWorkerOfState(){
cat $tmpfile | jq -c ".processes[] | select(.state == \"$1\" )"
}
# get a value from fpm status
#
# example output:
# pool: www
# process manager: dynamic
# start time: 21/Sep/2021:16:01:12 +0200
# start since: 65914
# accepted conn: 34
# listen queue: 0
# max listen queue: 0
# listen queue len: 0
# idle processes: 6
# active processes: 3
# total processes: 9
# max active processes: 6
# max children reached: 0
# slow requests: 0
#
# param string variable (part before ":")
function _getvalue(){
# grep "^$1:" $tmpfile | cut -d ":" -f 2 | awk '{ print $1 }'
_getServicedata | grep "^ \"$1\":" | cut -d ":" -f 2 | awk '{ print $1 }'
}
function showHelp(){
local _self=$(basename $0)
cat <<EOF
______________________________________________________________________
CHECK_PHP-FPM-Status
Get counters from PHP-FPM status output for active/ idle processes.
(c) Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________
The check fetches several counters from php-fm-status page.
It shows a short service status in a single line and then the dump of the
status page.
For performance data it echos:
php-fpm-active count of active workers (="Rrunning" + "Reading headers")
php-fpm-maxactive max active processes (sum of idle + running + reading)
php-fpm-idle count of workers in state "Idle"
php-fpm-running count of workers in state "Running"
php-fpm-reading count of workers in state "Reading headers"
php-fpm-queue count of items in the queue
php-fpm-maxqueue max listen queue
php-fpm-slow slow requests per $sDeltaunit (since last execution of this check)
php-fpm-speed requests per $sDeltaunit (since last execution of this check)
SYNTAX:
$_self [-u URL]
OPTIONS:
-u url to fpm status page (optional; default: $defaulturl)
-h or --help show this help.
PARAMETERS:
None.
EXAMPLE:
$_self -u http://localhost/my-custom-fpm-statuspage.php
EOF
}
# ----------------------------------------------------------------------
# check help
# ----------------------------------------------------------------------
case "$1" in
"--help"|"-h")
showHelp
exit 0
;;
*)
esac
# ----------------------------------------------------------------------
# pre checks
# ----------------------------------------------------------------------
ph.require jq wget
# ----------------------------------------------------------------------
# check params
# ----------------------------------------------------------------------
# set default / override from command line params
typeset -i iWarnLimit=$( ph.getValueWithParam 75 w "$@")
typeset -i iCriticalLimit=$( ph.getValueWithParam 90 c "$@")
url=$( ph.getValueWithParam $defaulturl u "$@" )
paramsWget="-T 5 -t 1 --no-check-certificate"
# --- get /server-status page
wget $paramsWget -O $tmpfile "$url?full&json" 2>/dev/null
if [ $? -ne 0 ]; then
rm -f $tmpfile
ph.abort "UNKNOWN: request to url $url failed. $(wget $paramsWget -O - -S $url)"
fi
# ----------------------------------------------------------------------
# get values from status output
# ----------------------------------------------------------------------
# --- handled requests per sec
typeset -i iConn=$( _getvalue "accepted conn")
typeset -i iSpeed=$( ph.perfdeltaspeed "fpm-accepted" $iConn $sDeltaunit )
# --- count slots
typeset -i iActive=$( _getvalue "active processes" )
typeset -i iMaxActive=$( _getvalue "max active processes" )
typeset -i iIdle=$( _getvalue "idle processes")
# --- experimental: generate warning / error
typeset -i iQueue=$( _getvalue "listen queue len")
typeset -i iMaxQueue=$( _getvalue "max listen queue")
typeset -i iSlowTotal=$( _getvalue "slow requests")
typeset -i iSlow=$( ph.perfdeltaspeed "fpm-slow" $iSlowTotal $sDeltaunit )
typeset -i iMaxChilds=$( _getvalue "max children reached")
typeset -i iSlowPercent=0
test $iSpeed -gt 0 && iSlowPercent=$iSlow*100/$iSpeed
typeset -i iWorkerRunning=$( _getWorkerOfState "Running" | wc -l )
typeset -i iWorkerReading=$( _getWorkerOfState "Reading headers" | wc -l )
typeset -i iWorkerIdle=$( _getWorkerOfState "Idle" | wc -l )
# ----------------------------------------------------------------------
# set status
# ----------------------------------------------------------------------
# damn, count of slots is in the config only - not in status output
# iUsage=$iActive*100/$iSlots
# ph.setStatusByLimit $iUsage $iWarnLimit $iCriticalLimit
if [ $iQueue -gt 0 ]; then
ph.setStatus warning
fi
# remove switch to warning level if just one slow request was detected
# if [ $iSlow -gt 0 ]; then
# ph.setStatus warning
# fi
# 5601 - remove warning for this limit (it is a flag and won't remove)
# if [ $iMaxChilds -gt 0 ]; then
# # ph.setStatus critical
# ph.setStatus warning
# fi
if [ $iWorkerIdle -eq 0 ]; then
ph.setStatus warning
fi
# seems not to be useful
# if [ $iWorkerReading -eq 0 ]; then
# ph.setStatus warning
# fi
# ----------------------------------------------------------------------
# output
# ----------------------------------------------------------------------
ph.status "PHP-FPM service: running: $iWorkerRunning .. active: $iActive (max: $iMaxActive) .. idle workers: $iIdle .. queue: $iQueue .. speed: $iSpeed req per $sDeltaunit ... slow: $iSlow req per $sDeltaunit ($iSlowPercent%; total: $iSlowTotal)"
echo "Workers: Running: $iWorkerRunning"
echo " Reading headers: $iWorkerReading"
echo " Idle: $iWorkerIdle"
echo
echo " Waiting for a worker (queue): $iQueue (max: $iMaxQueue)"
echo
# ----- output hints on warning level
hint="!! IMPORTANT !! Any non-OK status is still experimmental."
if [ $iWorkerIdle -eq 0 ]; then
echo $hint
echo "WARNING: No idle workers available."
echo " Maybe there is a current peak only."
echo " Or count of allowed workers (pm.max_children) or spare servers (pm.XXX_spare_servers) is too low."
echo
fi
# if [ $iWorkerReading -eq 0 ]; then
# echo $hint
# echo "WARNING: No reading workers available."
# echo " Maybe there is a current peak only."
# echo " Or count of allowed workers (pm.max_children) or spare servers (pm.XXX_spare_servers) is too low."
# echo
# fi
# 5601 - remove warning for this limit (it is a flag and won't remove)
# if [ $iMaxChilds -gt 0 ]; then
# echo $hint
# echo "WARNING: Max. count of children was reached: $iMaxChilds. Maximum of active workers was $iMaxActive - maybe count of allowed workers (pm.max_children) is too low."
# echo
# fi
if [ $iQueue -gt 0 ]; then
echo $hint
echo "WARNING: $iQueue queued requests were found. Maximum of queued items is $iMaxQueue (since last start of fpm service)."
echo
fi
if [ $iSlow -gt 0 ]; then
# echo $hint
echo "HINT: $iSlow slow requests per $sDeltaunit were found ($iSlowPercent%)... total $iSlowTotal slow req were detected (since last start of fpm service)."
echo
fi
echo "--- Status of service"
_getServicedata
echo
echo "--- workers in state Running"
_getWorkerOfState "Running"
echo
# --- add performnce data
ph.perfadd "php-fpm-active" "${iActive}" "" "" 0 0
ph.perfadd "php-fpm-maxactive" "${iMaxActive}" "" "" 0 0
# ph.perfadd "php-fpm-idle" "${iIdle}" "" "" 0 0
ph.perfadd "php-fpm-queue" "${iQueue}" "" "" 0 0
ph.perfadd "php-fpm-maxqueue" "${iMaxQueue}" "" "" 0 0
ph.perfadd "php-fpm-slow" "${iSlow}" "" "" 0 0
ph.perfadd "php-fpm-speed" "${iSpeed}" "" "" 0 0
# use process infos to count by worker state:
ph.perfadd "php-fpm-idle" "${iWorkerIdle}" "" "" 0 0
ph.perfadd "php-fpm-running" "${iWorkerRunning}" "" "" 0 0
ph.perfadd "php-fpm-reading" "${iWorkerReading}" "" "" 0 0
rm -f $tmpfile
ph.exit
# ----------------------------------------------------------------------