Select Git revision
check_netstat
check_psqlserver 10.59 KiB
#!/bin/bash
# ======================================================================
#
# Check PSQL SERVER
#
# requirements:
# - psql client
#
# installation:
# - execute check_psqlserver -i
#
# ----------------------------------------------------------------------
# Postgresql docs:
# https://www.postgresql.org/docs/current/monitoring-stats.html
# ----------------------------------------------------------------------
# 2023-06-07 v0.2 <axel.hahn@unibe.ch>
# 2023-06-08 v0.3 <axel.hahn@unibe.ch> show unknown if database connection fails
# 2023-06-08 v0.4 <axel.hahn@unibe.ch> get summary for cronflicts and problems
# 2023-06-09 v0.5 <axel.hahn@unibe.ch> deltaunit can be set as parameter
# ======================================================================
. $(dirname $0)/inc_pluginfunctions
self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] )
self_APPVERSION=0.5
# --- other vars...
cfgfile=/etc/icingaclient/.psql.conf
myuser=icingamonitor
# new line
NL="
"
out=""
# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------
# exit with status unknown if a database query failed
function _queryFailed(){
ph.setStatus "unknown"
echo "UNKNOWN: the database query failed"
echo "Quick troubleshooter:"
echo "* install a database user $myuser with command $(basename $0) -i"
echo "* check pg_hba.conf login fails if it set to ident (eg. change it to md5)"
ph.exit
}
# uninstall database user
function _uninstall(){
echo "UNINSTALL ..."
su - postgres -c "psql -c \"DROP USER ${myuser};\"" 2>&1
unset PGHOST
unset PGUSER
unset PGPASSWORD
unset PGDATABASE
}
# (re)install database user for monitoring
function _install(){
echo "INSTALLING ..."
local pwlength=64
echo "- check psql connection..."
su postgres -c "psql -V postgres"
if [ $? -ne 0 ]; then
echo "ERROR: psql connect without password failed."
exit 1
fi
echo "- creating database user $myuser with random password ($pwlength chars)..."
mypw=$( head /dev/urandom | tr -dc A-Za-z0-9 | head -c $pwlength )
SQL1="CREATE USER ${myuser} WITH PASSWORD '${mypw}' CONNECTION LIMIT 5";
SQL2="GRANT pg_monitor TO ${myuser};"
# SQL2="GRANT MONITOR_QUERIES TO ${myuser};"
if ! su - postgres -c "psql -c \"${SQL1}\" 2>&1"; then
echo "ERROR: psql command to create user failed."
exit 1
fi
echo "- grant ..."
if ! su - postgres -c "psql -c \"${SQL2}\" 2>&1" ; then
echo "ERROR: psql command to grant permissions failed."
# exit 1
fi
echo "- creating config file $cfgfile ... "
cat >$cfgfile <<EOF
#
# generated on $(date)
#
export PGUSER=${myuser}
export PGPASSWORD=${mypw}
export PGHOST=localhost
# set default database because a user db won't be created
export PGDATABASE=postgres
EOF
if ! ls -l $cfgfile; then
echo "ERROR: creation of config file failed."
exit 1
fi
}
# show help
function _usage(){
local _self=$( basename $0 )
cat <<EOH
______________________________________________________________________
${self_APPNAME} :: v${self_APPVERSION}
(c) Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________
USAGE:
$_self [OPTIONS] -m METHOD
OPTIONS:
-h this help
-i install monitoring user (must be executed as root)
-u uninstall monitoring user (must be executed as root)
PARAMETERS:
-m method; valid methods are:
activity running processes and queries
conflicts Detected conflicts from pg_stat_database_conflicts
dbrows Count of database row actions
diskblock Count of diskblocks physically read or coming from cache
problems Problems and troublemakers
replication Replication status (table output only)
transactions Count of transactions over all databases
EXAMPLES:
$_self -i
$_self -m activity
EOH
}
# render incremental counters from integer results of a given sql query
# global string out output of check
# param string database query
# param string time unit; one of sec|min; default: sec
function renderCounters(){
local _query; _query="$1"
local _sDeltaUnit; _sDeltaUnit=${2:-sec}
local _out; _out=$( psql -c "${_query} " 2>&1) || _queryFailed
local _iCounter; typeset -i _iCounter=0
local _header; _header=$( echo "${_out}" | head -1 | tr -d ' ')
local _data; _data=$( echo "${_out}" | head -3 | tail -1 | tr -d ' ')
local _iSpeed; typeset -i _iSpeed=0
local _sStoreid; _sStoreid=$( md5sum <<< "${_query}" | awk '{ print $1 }' )
local _iValue; typeset -i _iValue
# read psql result and put columns and values to an array
IFS="|"
read -ra aCols <<< "$_header"
read -ra aVals <<< "$_data"
for sColumn in ${aCols[*]}
do
_iValue=${aVals[$_iCounter]}
_iSpeed=$( ph.perfdeltaspeed "psql-${sColumn}-${_sStoreid}" ${_iValue} sec $_sDeltaUnit)
out=${out}$( printf "%-25s: %15s %s \n" "${sColumn}" "${_iValue}" "... delta = ${_iSpeed} per $_sDeltaUnit${NL}" )
ph.perfadd "${sColumn}" "${_iSpeed}"
_iCounter+=1
done
}
# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------
bOptInstall=$( ph.hasParamoption "i" "$@")
bOptUninstall=$( ph.hasParamoption "u" "$@")
bOptHelp=$( ph.hasParamoption "h" "$@")
if [ $bOptHelp -eq 1 -o $# -lt 1 ]; then
_usage
exit 0
fi
# --- check required tools
# ph.require mysql
# --- install
if [ $bOptInstall -eq 1 -a "$( whoami )" = "root" ]; then
if [ -f $cfgfile ]; then
ph.status "SKIP installation. config file already exists: $cfgfile."
ph.exit
fi
_uninstall
_install
ph.status "Installation was done"
ph.exit
fi
# --- uninstall
if [ $bOptUninstall -eq 1 -a "$( whoami )" = "root" ]; then
_uninstall
rm -f $cfgfile
ph.status "Uninstalled."
ph.exit
fi
# --- check installation
if ! grep $myuser $cfgfile >/dev/null 2>&1; then
ph.abort "PSQL access not possible yet. You need to install the monitoring user first: as root execute `basename $0` -i"
fi
. "${cfgfile}"
# ----------------------------------------------------------------------
sMode=$(ph.getValueWithParam '' m "$@")
case "${sMode}" in
"activity")
_out=$( psql -c "select backend_type,state,query from pg_stat_activity" 2>&1 ) || _queryFailed
typeset -i iQTotal; iQTotal=$( tail -1 <<< "$_out" | cut -f 1 -d ' ' | tr -d '(' )
typeset -i iQActive; iQActive=$( awk '{ print $4 }' <<< "$_out" | grep -c "active" )
typeset -i iQIdle; iQIdle=$( awk '{ print $4 }' <<< "$_out" | grep -c "idle" )
typeset -i iQFastpath; iQFastpath=$(awk '{ print $4 }' <<< "$_out" | grep -c "fastpath" )
typeset -i iQOther; iQOther=$iQTotal-$iQActive-$iQIdle-$iQFastpath
descr="Running total: $iQTotal ... active: $iQActive idle: $iQIdle fastpath: $iQFastpath other: $iQOther"
out=$( grep "^.[a-z]" <<< "${_out}" | cut -c 1-150 | tr '|' ':' )
ph.perfadd "running-total" "${iQTotal}"
ph.perfadd "running-active" "${iQActive}"
ph.perfadd "running-idle" "${iQIdle}"
ph.perfadd "running-fastpath" "${iQFastpath}"
ph.perfadd "running-other" "${iQOther}"
;;
"conflicts")
deltaunit=min
descr="Detected conflicts (from pg_stat_database_conflicts)"
renderCounters "select \
sum(confl_tablespace) as confl_tablespace, \
sum(confl_lock) as confl_lock, \
sum(confl_snapshot) as confl_snapshot, \
sum(confl_bufferpin) as confl_bufferpin, \
sum(confl_deadlock) as confl_deadlock \
from pg_stat_database_conflicts " "${deltaunit}"
# sum of 7th column (with delta per sec)
typeset -i iTotal; iTotal=$( echo "$out" | awk '{ sum+=$7} END { print sum;}' )
if [ $iTotal -gt 0 ]; then
ph.setStatus "warning"
fi
descr+=" - currently $iTotal per ${deltaunit}"
;;
"dbrows")
descr="Count of database row actions (from pg_stat_database)"
renderCounters "select \
sum(tup_returned) as tup_returned, \
sum(tup_fetched) as tup_fetched, \
sum(tup_inserted) as tup_inserted, \
sum(tup_updated) as tup_updated, \
sum(tup_deleted) as tup_deleted \
from pg_stat_database "
;;
"diskblock")
descr="Count of diskblocks physically read or coming from cache (from pg_stat_database)"
renderCounters "select \
sum(blks_read) as blks_read, \
sum(blks_hit) as blks_hit \
from pg_stat_database "
;;
"problems")
deltaunit=min
descr="Problems and troublemakers (from pg_stat_database)"
renderCounters "select \
sum(conflicts) as conflicts, \
sum(deadlocks) as deadlocks, \
sum(checksum_failures) as checksumfailures, \
sum(temp_files) as temp_files, \
sum(temp_bytes) as temp_bytes \
from pg_stat_database" "${deltaunit}"
# sum of 7th column (with delta per sec)
typeset -i iTotal; iTotal=$( echo "$out" | awk '{ sum+=$7} END { print sum;}' )
if [ $iTotal -gt 0 ]; then
ph.setStatus "warning"
fi
descr+=" - currently $iTotal per ${deltaunit}"
;;
"replication")
_out=$( psql -c "select * from pg_stat_replication" 2>&1 ) || _queryFailed
if tail -1 <<< "$_out" | grep "(0 rows)" >/dev/null ; then
descr="No data in pg_stat_replication - this is no slave."
out=""
else
descr="status (from pg_stat_replication)"
out=$( echo "${_out}${NL}" | tr '|' ':' )
fi
;;
"transactions")
descr="Count of transactions over all databases (from pg_stat_database)"
renderCounters "select \
sum(xact_commit) as commit, \
sum(xact_rollback) as rollback \
from pg_stat_database "
;;
*)
echo ERRROR: [${sMode}] is an INVALID mode
_usage
ph.abort
esac
ph.status "Pgsql $sMode :: $descr"
echo " $out"
ph.exit
# ----------------------------------------------------------------------