From 0ea1c74717f81df222d894d353f648cae6d28776 Mon Sep 17 00:00:00 2001 From: "Hahn Axel (hahn)" <axel.hahn@unibe.ch> Date: Thu, 27 Jul 2023 12:24:19 +0200 Subject: [PATCH] update check_dns_responsetime; add doc page --- check_dns_responsetime | 90 +++++++++++++++++------- docs/20_Checks/_index.md | 2 +- docs/20_Checks/check_dns_responsetime.md | 80 +++++++++++++++++++++ 3 files changed, 147 insertions(+), 25 deletions(-) create mode 100644 docs/20_Checks/check_dns_responsetime.md diff --git a/check_dns_responsetime b/check_dns_responsetime index e78903f..6e86510 100755 --- a/check_dns_responsetime +++ b/check_dns_responsetime @@ -11,24 +11,22 @@ # test tcp 53 first # check result depends on 1st nameserver only # 2022-10-24 v1.2 <axel.hahn@unibe.ch> shell fixes; remove pipe char in output +# 2023-07-27 v1.3 <axel.hahn@iml.unibe.ch> shell fixes; add help page # ====================================================================== -. $(dirname $0)/inc_pluginfunctions +. $( dirname $0 )/inc_pluginfunctions + +self_APPNAME=$( basename $0 | tr [:lower:] [:upper:] ) +self_APPVERSION=1.3 + tmpfile=/tmp/check_netstat_out_$$ infofile=/tmp/check_netstat_out_2_$$ myHost=$( hostname -f ) -echo $myHost | cut -f 3- -d "." | grep "\." >/dev/null -if [ $? -ne 0 ]; then - ph.setStatus unknown - ph.status "DNS check for [$myHost] - SKIP: hostname -f returned a FQDN with less than 2 dots" - ph.exit -fi - # set default / override from command line params -typeset -i iWarnLimit=$( ph.getValueWithParam 300 w "$@") -typeset -i iCriticalLimit=$( ph.getValueWithParam 1000 c "$@") +typeset -i iWarnLimit; iWarnLimit=$( ph.getValueWithParam 300 w "$@") +typeset -i iCriticalLimit; iCriticalLimit=$( ph.getValueWithParam 1000 c "$@") rm -f $tmpfile $infofile 2>/dev/null @@ -38,23 +36,67 @@ typeset -i iTime=0 typeset -i iCounter=0 typeset -i iNotReachable=0 +# show help +function showHelp(){ + _self=$(basename $0) +cat <<EOF +______________________________________________________________________ + +$self_APPNAME +v$self_APPVERSION + +(c) Institute for Medical Education - University of Bern +Licence: GNU GPL 3 +______________________________________________________________________ + +Loop over all defined dns servers (in /etc/resolv.conf) and check +each: + - detect of port 53 is available (DNS service) + - 5x check time of a nslookup for current host (${myHost}) + +The a warning / critical response will be reached if the maximum time +of nslookup on the first dns server reaches a limit. + +The critical response will be returned if the first of the +nameservers is not available. + +An unknown response will be returned if the current hostname +(hostname -f) is no FQDN. + +SYNTAX: +$_self [ -w VALUE -c VALUE -h ] + +OPTIONS: + + -w VALUE warning level (default: 300) + -c VALUE critical level (default: 1000) + -h or --help Show this help. + +EOF +} + # ---------------------------------------------------------------------- # MAIN # ---------------------------------------------------------------------- # --- check param -h -if [ "$1" = "-h" ]; then - echo " - usage: $0 [ -w value -c value -h ] - - -w Warning level - -c Critical level - -h this help - " - exit 0 +case "$1" in + "--help"|"-h") + showHelp + exit 0 + ;; + *) +esac + +if ! echo $myHost | cut -f 3- -d "." | grep "\." >/dev/null; then + ph.setStatus unknown + ph.status "DNS check for [$myHost] - SKIP: hostname -f returned a FQDN with less than 2 dots" + ph.exit fi +ph.require nslookup + for mydns in $(grep ^nameserver /etc/resolv.conf | awk '{ print $2 } ' ) do iCounter=$iCounter+1 @@ -63,13 +105,12 @@ do echo "---------- $iCounter - $mydns " >>$infofile # todo loop - >/dev/tcp/${mydns}/53 >>$infofile 2>&1 - if [ $? -ne 0 ]; then + if ! >/dev/tcp/${mydns}/53 >>$infofile 2>&1; then iNotReachable=$iNotReachable+1 test $iCounter -eq 1 && ph.setStatus critical echo "ERROR: ${mydns} is not reachable on tcp 53" >>$infofile else - for i in `seq 5` + for i in $(seq 5) do (time nslookup ${myHost} $mydns) >$tmpfile 2>&1 @@ -84,7 +125,7 @@ do # --- set status test $iCounter -eq 1 && ph.setStatusByLimit $iSrvMax $iWarnLimit $iCriticalLimit - label=$(echo $mydns | sed "s#\.#-#g" ) + label=${mydns//\./-} ph.perfadd "response-$label" "${iSrvMax}" test $iSrvMax -gt $iMax && iMax=$iSrvMax fi @@ -93,7 +134,8 @@ do done -ph.status "DNS check for $myHost - found maximum was $iMax ms - $iNotReachable of $iCounter nameservers not reachable" +test $iNotReachable -eq 0 && ph.status "DNS check for $myHost - found maximum was $iMax ms - OK: all nameservers are reachable" +test $iNotReachable -ne 0 && ph.status "DNS check for $myHost - found maximum was $iMax ms - INFO: $iNotReachable of $iCounter nameservers not reachable" cat $infofile rm -f $tmpfile $infofile diff --git a/docs/20_Checks/_index.md b/docs/20_Checks/_index.md index 4cb4b73..80d492c 100644 --- a/docs/20_Checks/_index.md +++ b/docs/20_Checks/_index.md @@ -18,7 +18,7 @@ There is one include script used by all checks: * [check_cpu](check_cpu.md) * [check_cronstatus](check_cronstatus.md) * [check_disk-io](check_disk-io.md) -* check_dns_responsetime +* [check_dns_responsetime](check_dns_responsetime.md) * [check_eol](check_eol.md) * check_haproxy_health * check_haproxy_status diff --git a/docs/20_Checks/check_dns_responsetime.md b/docs/20_Checks/check_dns_responsetime.md new file mode 100644 index 0000000..438566e --- /dev/null +++ b/docs/20_Checks/check_dns_responsetime.md @@ -0,0 +1,80 @@ +# CHECK_DNS_RESPONSETIME + +## Introduction + +**check_dns_responsetime** check dns response time of all dns servers found in local /etc/resolv.conf + +## Requirements + +* `nslookup` query Internet name servers + +## Syntax + +```txt +______________________________________________________________________ + +CHECK_DNS_RESPONSETIME +v1.3 + +(c) Institute for Medical Education - University of Bern +Licence: GNU GPL 3 +______________________________________________________________________ + +Loop over all defined dns servers (in /etc/resolv.conf) and check +each: + - detect of port 53 is available (DNS service) + - 5x check time of a nslookup for current host (www.example.com) + +The a warning / critical response will be reached if the maximum time +of nslookup on the first dns server reaches a limit. + +The critical response will be returned if the first of the +nameservers is not available. + +An unknown response will be returned if the current hostname +(hostname -f) is no FQDN. + +SYNTAX: +check_dns_responsetime [ -w VALUE -c VALUE -h ] + +OPTIONS: + + -w VALUE warning level (default: 300) + -c VALUE critical level (default: 1000) + -h or --help Show this help. + +``` + +### Parameters + +(none) + +## Examples + +`$ check_disk-io -m io` returns + +```txt +./check_dns_responsetime +OK: DNS check for www.example.com - found maximum was 46 ms - OK: all nameservers are reachable + +---------- 1 - 8.8.4.4 +8.8.4.4 #1 >>> 36 ms +8.8.4.4 #2 >>> 28 ms +8.8.4.4 #3 >>> 24 ms +8.8.4.4 #4 >>> 30 ms +8.8.4.4 #5 >>> 31 ms +max: 36 ms + ^ + : + +--- 1st nameserver is relevant for total status of the check. Limits are warning=300 and critical=1000 + + +---------- 2 - 8.8.8.8 +8.8.8.8 #1 >>> 39 ms +8.8.8.8 #2 >>> 35 ms +8.8.8.8 #3 >>> 33 ms +8.8.8.8 #4 >>> 46 ms +8.8.8.8 #5 >>> 43 ms +max: 46 ms + + |response-8-8-4-4=36;; response-8-8-8-8=46;;``` -- GitLab