Skip to content
Snippets Groups Projects
check_php-fpm-status 9.55 KiB
#!/bin/bash
# ======================================================================
#
# NAGIOS CLIENT CHECK :: php-fpm requests
#
# ----------------------------------------------------------------------
# script checks output of fpm "/status" and counts scoreboard chars
# ----------------------------------------------------------------------
# 2021-09-22  v0.1  <axel.hahn@iml.unibe.ch>  initial version
# 2021-10-01  v0.2  <axel.hahn@iml.unibe.ch>  fetch full status as json
# 2021-12-23  v0.4  <axel.hahn@iml.unibe.ch>  remove switch to warning level if just one slow request was detected
# 2022-02-11  v0.5  <axel.hahn@iml.unibe.ch>  show running workers in 1st line
# 2022-04-01  v0.6  <axel.hahn@iml.unibe.ch>  use wget default params; shell fixes
# 2022-05-09  v0.7  <axel.hahn@iml.unibe.ch>  use wget default params
# 2022-07-08  v0.8  <axel.hahn@iml.unibe.ch>  eliminate division by 0
# 2022-10-21  v1.3  <axel.hahn@unibe.ch>      remove grep: warning: stray \ before white space
# ======================================================================

. $(dirname $0)/inc_pluginfunctions

tmpfile=/tmp/check_fpm_processes_1
defaulturl=localhost/status

sDeltaunit="min"

# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------

# get service data from json output
function _getServicedata(){
    cat $tmpfile | jq | grep '^  "' | grep -v "\[" | cut -f 1 -d ","
}

function _getWorkerStates(){
    cat $tmpfile | jq | grep '"state": ' | cut -f 2 -d ":" | cut -f 1 -d "," | sort -u
}


function _getWorkerOfState(){
    cat $tmpfile | jq -c ".processes[] | select(.state == \"$1\" )"
}

# get a value from fpm status
#
# example output:
# pool:                 www
# process manager:      dynamic
# start time:           21/Sep/2021:16:01:12 +0200
# start since:          65914
# accepted conn:        34
# listen queue:         0
# max listen queue:     0
# listen queue len:     0
# idle processes:       6
# active processes:     3
# total processes:      9
# max active processes: 6
# max children reached: 0
# slow requests:        0
#
# param  string  variable (part before ":")
function _getvalue(){
        # grep "^$1:" $tmpfile | cut -d ":" -f 2 | awk '{ print $1 }'
        _getServicedata | grep "^  \"$1\":" | cut -d ":" -f 2 | awk '{ print $1 }'
}


function showHelp(){
    local _self=$(basename $0)
cat <<EOF
______________________________________________________________________

CHECK_PHP-FPM-Status
Get counters from PHP-FPM status output for active/ idle processes.

(c) Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________

The check fetches several counters from php-fm-status page.

It shows a short service status in a single line and then the dump of the 
status page.
For performance data it echos:

    php-fpm-active     count of active workers (="Rrunning" + "Reading headers")
    php-fpm-maxactive  max active processes (sum of idle + running + reading)
    php-fpm-idle       count of workers in state "Idle"
    php-fpm-running    count of workers in state "Running"
    php-fpm-reading    count of workers in state "Reading headers"
    php-fpm-queue      count of items in the queue
    php-fpm-maxqueue   max listen queue
    php-fpm-slow       slow requests per $sDeltaunit (since last execution of this check)
    php-fpm-speed      requests per $sDeltaunit (since last execution of this check)

SYNTAX:
$_self [-u URL]

OPTIONS:

    -u  url to fpm status page  (optional; default: $defaulturl)
    -h or --help   show this help.

PARAMETERS:

    None.

EXAMPLE:
$_self -u http://localhost/my-custom-fpm-statuspage.php

EOF
}

# ----------------------------------------------------------------------
# check help
# ----------------------------------------------------------------------

case "$1" in
    "--help"|"-h")
        showHelp
        exit 0
        ;;
    *)
esac

# ----------------------------------------------------------------------
# pre checks
# ----------------------------------------------------------------------
ph.require jq wget


# ----------------------------------------------------------------------
# check params
# ----------------------------------------------------------------------

# set default / override from command line params
typeset -i iWarnLimit=$(     ph.getValueWithParam 75 w "$@")
typeset -i iCriticalLimit=$( ph.getValueWithParam 90 c "$@")
url=$( ph.getValueWithParam $defaulturl u "$@" )

paramsWget="-T 5 -t 1 --no-check-certificate"

# --- get /server-status page
wget $paramsWget -O $tmpfile "$url?full&json" 2>/dev/null
if [ $? -ne 0 ]; then
   rm -f $tmpfile
   ph.abort "UNKNOWN: request to url $url failed. $(wget $paramsWget -O - -S $url)"
fi

# ----------------------------------------------------------------------
# get values from status output
# ----------------------------------------------------------------------

# --- handled requests per sec
typeset -i iConn=$(      _getvalue "accepted conn")
typeset -i iSpeed=$(     ph.perfdeltaspeed "fpm-accepted" $iConn $sDeltaunit )

# --- count slots
typeset -i iActive=$(    _getvalue "active processes" )
typeset -i iMaxActive=$( _getvalue "max active processes" )
typeset -i iIdle=$(      _getvalue "idle processes")

# --- experimental: generate warning / error 
typeset -i iQueue=$(     _getvalue "listen queue len")
typeset -i iMaxQueue=$(  _getvalue "max listen queue")
typeset -i iSlowTotal=$( _getvalue "slow requests")
typeset -i iSlow=$(      ph.perfdeltaspeed "fpm-slow" $iSlowTotal $sDeltaunit )
typeset -i iMaxChilds=$( _getvalue "max children reached")

typeset -i iSlowPercent=0
test $iSpeed -gt 0 && iSlowPercent=$iSlow*100/$iSpeed


typeset -i iWorkerRunning=$( _getWorkerOfState "Running"         | wc -l )
typeset -i iWorkerReading=$( _getWorkerOfState "Reading headers" | wc -l )
typeset -i iWorkerIdle=$(    _getWorkerOfState "Idle"            | wc -l )


# ----------------------------------------------------------------------
# set status
# ----------------------------------------------------------------------
# damn, count of slots is in the config only - not in status output
# iUsage=$iActive*100/$iSlots
# ph.setStatusByLimit $iUsage $iWarnLimit $iCriticalLimit
if [ $iQueue -gt 0 ]; then
    ph.setStatus warning
fi

# remove switch to warning level if just one slow request was detected
# if [ $iSlow -gt 0 ]; then
#     ph.setStatus warning
# fi

# 5601 - remove warning for this limit (it is a flag and won't remove)
# if [ $iMaxChilds -gt 0 ]; then
#     # ph.setStatus critical
#     ph.setStatus warning
# fi

if [ $iWorkerIdle -eq 0 ]; then
    ph.setStatus warning
fi

# seems not to be useful
# if [ $iWorkerReading -eq 0 ]; then
#     ph.setStatus warning
# fi

# ----------------------------------------------------------------------
# output
# ----------------------------------------------------------------------
ph.status "PHP-FPM service: running: $iWorkerRunning .. active: $iActive (max: $iMaxActive) .. idle workers: $iIdle .. queue: $iQueue .. speed: $iSpeed req per $sDeltaunit ... slow: $iSlow req per $sDeltaunit ($iSlowPercent%; total: $iSlowTotal)"
echo "Workers:               Running: $iWorkerRunning"
echo "               Reading headers: $iWorkerReading"
echo "                          Idle: $iWorkerIdle"
echo
echo "  Waiting for a worker (queue): $iQueue (max: $iMaxQueue)"
echo

# ----- output hints on warning level
hint="!! IMPORTANT !! Any non-OK status is still experimmental."
if [ $iWorkerIdle -eq 0 ]; then
    echo $hint
    echo "WARNING: No idle workers available."
    echo "         Maybe there is a current peak only."
    echo "         Or count of allowed workers (pm.max_children) or spare servers (pm.XXX_spare_servers) is too low."
    echo
fi
# if [ $iWorkerReading -eq 0 ]; then
#     echo $hint
#     echo "WARNING: No reading workers available."
#     echo "         Maybe there is a current peak only."
#     echo "         Or count of allowed workers (pm.max_children) or spare servers (pm.XXX_spare_servers) is too low."
#     echo
# fi

# 5601 - remove warning for this limit (it is a flag and won't remove)
# if [ $iMaxChilds -gt 0 ]; then
#     echo $hint
#     echo "WARNING: Max. count of children was reached: $iMaxChilds. Maximum of active workers was $iMaxActive - maybe count of allowed workers (pm.max_children) is too low."
#     echo
# fi
if [ $iQueue -gt 0 ]; then
    echo $hint
    echo "WARNING: $iQueue queued requests were found. Maximum of queued items is $iMaxQueue (since last start of fpm service)."
    echo
fi
if [ $iSlow -gt 0 ]; then
    # echo $hint
    echo "HINT: $iSlow slow requests per $sDeltaunit were found  ($iSlowPercent%)... total $iSlowTotal slow req were detected (since last start of fpm service)."
    echo
fi

echo "--- Status of service"
_getServicedata

echo
echo "--- workers in state Running"
_getWorkerOfState "Running"
echo

# --- add performnce data
ph.perfadd "php-fpm-active"    "${iActive}"    "" "" 0 0
ph.perfadd "php-fpm-maxactive" "${iMaxActive}" "" "" 0 0
# ph.perfadd "php-fpm-idle"      "${iIdle}"      "" "" 0 0

ph.perfadd "php-fpm-queue"     "${iQueue}"     "" "" 0 0
ph.perfadd "php-fpm-maxqueue"  "${iMaxQueue}"  "" "" 0 0
ph.perfadd "php-fpm-slow"      "${iSlow}"      "" "" 0 0
ph.perfadd "php-fpm-speed"     "${iSpeed}"     "" "" 0 0

# use process infos to count by worker state:
ph.perfadd "php-fpm-idle"      "${iWorkerIdle}"      "" "" 0 0
ph.perfadd "php-fpm-running"   "${iWorkerRunning}"   "" "" 0 0
ph.perfadd "php-fpm-reading"   "${iWorkerReading}"   "" "" 0 0


rm -f $tmpfile
ph.exit

# ----------------------------------------------------------------------