#!/bin/bash
# ======================================================================
#
# Check DISK IO over all disks
#
# data besed on /proc/diskstats
# https://www.kernel.org/doc/Documentation/iostats.txt
#
# based on /sys/block/*/stat
# https://www.kernel.org/doc/Documentation/block/stat.txt
#
# Requires: bc, lsblk
#
# ----------------------------------------------------------------------
# 2020-07-17  v1.0  <axel.hahn@iml.unibe.ch>
# 2023-07-27  v1.1  <axel.hahn@unibe.ch>      shell fixes; remove unsupported warn and critical
# 2025-02-10  v1.2  <axel.hahn@unibe.ch>      harden sourcing files
# 2025-04-02  v1.3  <axel.hahn@unibe.ch>      add mode "measure" for read write speed
# ======================================================================

# shellcheck source=/dev/null
. "$( dirname "$0" )/inc_pluginfunctions" || exit 1

export self_APPVERSION=1.3

# ----------------------------------------------------------------------
# FUNCTIONS
# ----------------------------------------------------------------------

# diskinfo based on lsblk
# param  string  comma separated list of names (no spaces)
function _diskInfo(){
    local _fields=$1
    test -z "$_fields" && _fields='NAME,MAJ:MIN,TYPE,SIZE,FSTYPE,MOUNTPOINT,STATE,ROTA,VENDOR,MODEL,SERIAL,HCTL'
    lsblk -ai --output "$_fields"
}

# get a list of local disks
function getDisks(){
    _diskInfo "NAME,TYPE" | grep "disk" | awk '{ print $1 }' | sed "s#[^a-z0-9]##g"
}


function measureline(){
   local mydevice="$1"
   local bsize="$2"
   local mode="$3"
   local out="$4"
   local timer
   local speed
   timer=$( tail -1 <<< "${out}" | cut -f 3 -d ",")
   speed=$( tail -1 <<< "${out}" | cut -f 4 -d ",")

   info+=" $mode $speed"

   # printf "%-25s %-10s %-10s %-15s %10s\n" "$mydevice" "$mode" "$bsize" "$timer" "$speed"
   # perf="$( sed "s#\ \([.]\).*#\1#g" <<< "$speed" )"
   perf="$( awk '{ print $1 " " $2 }' <<< "$speed" | sed "s#\ \(.\).*#\1#g" )"
   perf="$( ph.toUnit "$perf" "M" )"
   ph.perfadd "$mode" "$perf"

}

# UNUSED get a list of local partitions
# function getPartitions(){
#     _diskInfo "NAME,TYPE" | grep "part" | awk '{ print $1 }' | sed "s#[^a-z0-9]##g"
# }

# show help
function showHelp(){
    local _self; _self="$( basename "$0" )"
    cat <<EOF
$( ph.showImlHelpHeader )

Disk infos based on /sys/block/[NAME]/stat
See https://www.kernel.org/doc/Documentation/block/stat.txt
and https://www.kernel.org/doc/Documentation/iostats.txt

The system data are counters that are difficult to read.
The output of this check for each value a delta value per second since 
last check.

SYNTAX:
$_self -m MODE

OPTIONS:

    -h or --help   show this help.
    -m MODE        set mode for type of output (required)
    -d DIR         for measure: set a directory, default: "/tmp"
    -s SIZE        for measure: set a size, default: "100M"

PARAMETERS:

    MODE
        io         read I/Os, write I/Os, discard I/0s
        measure    measure write and read speed
        ticks      read ticks, write ticks, discard ticks
        wait       total wait time for all requests

    DIR
        Directory to perform read and write test.

    SIZE
        Block size for dd command: a number followed multiplicative suffix.
        c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024, xM=M, 
        GB=1000*1000*1000, G=1024*1024*1024, and so on for T, P, E, Z, Y, R, Q

EXAMPLE:

    $_self -m io
    $_self -m measure
    $_self -m measure -d /mnt/data -s 10M

EOF
}

# ----------------------------------------------------------------------
# MAIN
# ----------------------------------------------------------------------

typeset -i iDelta=0

case "$1" in
    "--help"|"-h")
        showHelp
        exit 0
        ;;
    *)
esac

ph.require bc lsblk

# --- set mode
sMode=$(                     ph.getValueWithParam '' m "$@")

if [ "$sMode" = "measure" ]; then
        sDir=$(  ph.getValueWithParam '/tmp' d "$@")
        bsize=$( ph.getValueWithParam '100M' s "$@")
        target="${sDir}/$( tr -cd 'a-f0-9' <<< /dev/urandom | head -c 32 )"

        info="Disk speed in ${sDir} using $bsize - "

        measureline "${sDir}" "$bsize" "WRITE" "$( dd if=/dev/zero of="${target}" bs=$bsize count=1 oflag=dsync 2>&1 )"
        measureline "${sDir}" "$bsize" "READ" "$( dd if="${target}" of=/dev/null bs=$bsize count=1 oflag=dsync 2>&1 )"
        rm -f "${target}"

        ph.status "$info"

        ph.exit
fi

# --- labels and its columns in /sys/block/$myDisk/stat

#    Name            units         description
#    ----            -----         -----------
#  1 read I/Os       requests      number of read I/Os processed
#  2 read merges     requests      number of read I/Os merged with in-queue I/O
#  3 read sectors    sectors       number of sectors read
#  4 read ticks      milliseconds  total wait time for read requests
#  5 write I/Os      requests      number of write I/Os processed
#  6 write merges    requests      number of write I/Os merged with in-queue I/O
#  7 write sectors   sectors       number of sectors written
#  8 write ticks     milliseconds  total wait time for write requests
#  9 in_flight       requests      number of I/Os currently in flight
# 10 io_ticks        milliseconds  total time this block device has been active
# 11 time_in_queue   milliseconds  total wait time for all requests
# 12 discard I/Os    requests      number of discard I/Os processed
# 13 discard merges  requests      number of discard I/Os merged with in-queue I/O
# 14 discard sectors sectors       number of sectors discarded
# 15 discard ticks   milliseconds  total wait time for discard requests

case "$sMode" in
    "io")
        info="read I/Os, write I/Os, discard I/0s, number of I/Os currently in flight"
        aNames=(ReadIO WriteIO DiscardIO FlightIO)
        aColums=(1 5 12 9)
        ;;
    "ticks")
        info="ticks=total wait time [ms] --> read ticks, write ticks, discard ticks, io ticks (total time this block device has been active)"
        aNames=(ReadTicks WriteTicks DiscardTicks IoTicks)
        aColums=(4 8 15 10)
        ;;
    "wait")
        info="total wait time [ms] for all requests"
        aNames=(Wait)
        aColums=(11)
        ;;
    *)
        echo "ERROR: missing or wrong MODE parameter -m"
        showHelp
        exit 1
esac





tmpfile1=$(mktemp)

# --- add data for each disk
for myDisk in $(getDisks)
do
    echo >>"$tmpfile1"
    echo "--- $myDisk" >> "$tmpfile1"

    diskdata=$( cat /sys/block/$myDisk/stat )
    # echo $diskdata >> $tmpfile1
    for index in ${!aNames[*]}
    do
        label="disk-$myDisk-${aNames[$index]}"
        column=${aColums[$index]}
        value=$( echo $diskdata | cut -f $column -d " " )

        iDelta=$(ph.perfdeltaspeed "$label" "$value")
        
        # typeset -i aTotals["$index"]=${aTotals["$index"]}+$iDelta

        typeset -i aMax["$index"]
        if [ -z "${aMax["$index"]}" ] || [ ${aMax["$index"]} -lt $iDelta ]; then
            aMax["$index"]=$iDelta
        fi
        # echo "    $label $iDelta per sec ... total: $value" >> "$tmpfile1"
        printf "%30s %10d \n" "$label:" "$iDelta" >> "$tmpfile1"

    done
done

# --- add total
echo >>"$tmpfile1"
echo "--- MAX" >> "$tmpfile1"
for index in ${!aNames[*]}
do

    label="${aNames[$index]}"
    # value=${aTotals[$index]}
    value=${aMax[$index]}
    # echo "    $label: $value" >> $tmpfile1
    printf "%30s %10d \n" "$label:" "$value" >> "$tmpfile1"
    ph.perfadd "$label"   "$value"

done
echo >>"$tmpfile1"

# --- output
ph.status "Disk data ... $info " # READ `toUnit $iTotalRead M` MB/s << [DISC] << `toUnit $iTotalWrite M` MB/s WRITE"

cat "$tmpfile1"

# --- cleanup and bye
rm -f "$tmpfile1"
ph.exit