#!/bin/bash
# ======================================================================
#
# Check DISK IO over all disks
# data besed on /proc/diskstats
# https://www.kernel.org/doc/Documentation/iostats.txt
#
# based on /sys/block/*/stat
# https://www.kernel.org/doc/Documentation/block/stat.txt
#
# ----------------------------------------------------------------------
# 2020-07-17  v1.0  <axel.hahn@iml.unibe.ch>
# ======================================================================


. `dirname $0`/inc_pluginfunctions


# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------

# diskinfo based on lsblk
# 
function _diskInfo(){
    local _fields=$1
    test -z $_fields && _fields='NAME,MAJ:MIN,TYPE,SIZE,FSTYPE,MOUNTPOINT,STATE,ROTA,VENDOR,MODEL,SERIAL,HCTL'
    lsblk -ai --output $_fields
}

function getDisks(){
    _diskInfo "NAME,TYPE" | grep "disk" | awk '{ print $1 }' | sed "s#[^a-z0-9]##g"
}
function getPartitions(){
    _diskInfo "NAME,TYPE" | grep "part" | awk '{ print $1 }' | sed "s#[^a-z0-9]##g"
}

function showHelp(){
cat <<EOF
______________________________________________________________________

CHECK_DISK IO AND LATENCY

(c) Institute for Medical Education - University of Bern
Licence: GNU GPL 3
______________________________________________________________________

Disk infos based on /sys/block/[NAME]/stat
See https://www.kernel.org/doc/Documentation/block/stat.txt

SYNTAX:
`basename $0` -m MODE [-w WARN_LIMIT] [-c CRITICAL_LIMIT]

OPTIONS:

    -m MODE        set mode for type of output (required)

    -w VALUE       warning level  (default: 0 for none)
    -c VALUE       critical level (default: 0 for none)

    -h or --help   show this help.

PARAMETERS:

    MODE
        io         read I/Os, write I/Os, discard I/0s
        ticks      read ticks, write ticks, discard ticks
        wait       total wait time for all requests

EXAMPLE:
`basename $0` -m io

EOF
}

# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------

# TESTAREA 51
# _diskInfo
# echo "--- disks: "
# getDisks
# echo "--- partitions: "
# getPartitions

ph.require bc lsblk

typeset -i iDelta=0


case "$1" in
    "--help"|"-h")
        showHelp
        exit 0
        ;;
    *)
esac

# set default / override from command line params
typeset -i iWarnLimit=`     ph.getValueWithParam 0 w "$@"`
typeset -i iCriticalLimit=` ph.getValueWithParam 0 c "$@"`

# --- set mode
sMode=`                     ph.getValueWithParam '' m "$@"`

# --- labels and its columns in /sys/block/$myDisk/stat

#    Name            units         description
#    ----            -----         -----------
#  1 read I/Os       requests      number of read I/Os processed
#  2 read merges     requests      number of read I/Os merged with in-queue I/O
#  3 read sectors    sectors       number of sectors read
#  4 read ticks      milliseconds  total wait time for read requests
#  5 write I/Os      requests      number of write I/Os processed
#  6 write merges    requests      number of write I/Os merged with in-queue I/O
#  7 write sectors   sectors       number of sectors written
#  8 write ticks     milliseconds  total wait time for write requests
#  9 in_flight       requests      number of I/Os currently in flight
# 10 io_ticks        milliseconds  total time this block device has been active
# 11 time_in_queue   milliseconds  total wait time for all requests
# 12 discard I/Os    requests      number of discard I/Os processed
# 13 discard merges  requests      number of discard I/Os merged with in-queue I/O
# 14 discard sectors sectors       number of sectors discarded
# 15 discard ticks   milliseconds  total wait time for discard requests

case "$sMode" in
    "io")
        info="read I/Os, write I/Os, discard I/0s, number of I/Os currently in flight"
        aNames=(ReadIO WriteIO DiscardIO FlightIO)
        aColums=(1 5 12 9)
        ;;
    "ticks")
        info="ticks=total wait time [ms] --> read ticks, write ticks, discard ticks, io ticks (total time this block device has been active)"
        aNames=(ReadTicks WriteTicks DiscardTicks IoTicks)
        aColums=(4 8 15 10)
        ;;
    "wait")
        info="total wait time [ms] for all requests"
        aNames=(Wait)
        aColums=(11)
        ;;
    *)
        echo "ERROR: missing or wrong MODE parameter -m"
        showHelp
        exit 0
esac





tmpfile1=`mktemp`

# --- add data for each disk
for myDisk in `getDisks`
do
    echo >>$tmpfile1
    echo "--- $myDisk" >> $tmpfile1

    diskdata=$( cat /sys/block/$myDisk/stat )
    # echo $diskdata >> $tmpfile1
    for index in ${!aNames[*]}
    do
        label="disk-$myDisk-${aNames[$index]}"
        column=${aColums[$index]}
        value=$( echo $diskdata | cut -f $column -d " " )

        iDelta=`ph.perfdeltaspeed "$label" $value`
        
        typeset -i aTotals[$index]=${aTotals[$index]}+$iDelta
        # echo "    $label $iDelta per sec ... total: $value" >> $tmpfile1
        printf "%30s %10d \n" "$label:" "$iDelta" >> $tmpfile1

    done
done

# --- add total
echo >>$tmpfile1
echo "--- TOTAL" >> $tmpfile1
for index in ${!aNames[*]}
do

    label="${aNames[$index]}"
    value=${aTotals[$index]}
    # echo "    $label: $value" >> $tmpfile1
    printf "%30s %10d \n" "$label:" "$value" >> $tmpfile1
    ph.perfadd "$label"   "$value"

done
echo >>$tmpfile1

# --- output
ph.status "Disk data ... $info " # READ `toUnit $iTotalRead M` MB/s << [DISC] << `toUnit $iTotalWrite M` MB/s WRITE"

cat $tmpfile1

# --- cleanup and bye
rm -f $tmpfile1
ph.exit