Skip to content
Snippets Groups Projects
Select Git revision
  • 4b554489e816d68512f8779f56e24725e8933add
  • master default protected
  • simple-task/7248-eol-check-add-node-22
  • 6877_check_iml_deployment
4 results

check_proc_ressources

Blame
  • check_proc_ressources 5.44 KiB
    #!/bin/bash
    # ======================================================================
    #
    # NAGIOS CLIENT CHECK :: SHOW PROCESSES USING A LOT OF CPU OR MEMORY
    #
    # ----------------------------------------------------------------------
    #
    # SYNTAX:
    #  ./check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
    #    METHOD         string   identify what to check; one of cpu|mem
    #    WARNLIMIT      integer  value in percent; default: 70
    #    CRITICALLIMIT  integer  value in percent; default: 90
    #
    # ----------------------------------------------------------------------
    # 2020-03-03  v1.0  inititial version
    # 2020-03-05  v1.1  <axel.hahn@iml.unibe.ch> switch to ph.* helper functions
    # 2020-06-06  v1.2  <axel.hahn@iml.unibe.ch> for cpu: limits are multiplicated with count of cpu 
    # 2023-12-22  v1.3  <axel.hahn@unibe.ch>     update help; no temp files
    # 2025-02-10  v1.4  <axel.hahn@unibe.ch>      harden sourcing files
    # ======================================================================
    
    # shellcheck source=/dev/null
    . "$( dirname "$0" )/inc_pluginfunctions" || exit 1
    
    export self_APPVERSION=1.4
    
    # --- limits
    iWarnLimitCpu=70
    iCriticalLimitCpu=90
    
    iWarnLimitMem=70
    iCriticalLimitMem=90
    
    typeset -i iCountWarning=0
    typeset -i iCountCritical=0
    
    typeset -i iCriticalLimit=0
    typeset -i iWarnLimit=0
    typeset -i iMulti=1
    
    # ----------------------------------------------------------------------
    # FUNCTIONS
    # ----------------------------------------------------------------------
    
    # show help
    function showHelp(){
        cat <<EOH
    $( ph.showImlHelpHeader )
    
    SYNTAX:
    
      check_proc_ressources -m METHOD [-w WARNLIMIT] [-c CRITICALLIMIT]
      
    OPTIONS:
    
      -h, --help
          show help
    
      -c, --critical VALUE
          critical value; defaults: 
            * cpu $iCriticalLimitCpu
            * mem $iCriticalLimitMem
    
      -m, --mode VALUE
          identify what to check; one of cpu|mem
          Remark: for cpu the given limits are multiplicated with count of cpu.
    
      -w. --warning VALUE
          warning limit value; defaults: 
            * cpu $iWarnLimitCpu
            * mem $iWarnLimitMem
    
    EXAMPLE:
    
      check_proc_ressources -m mem -w 50 -c 70
          Show processes consuming more than 50% of memory; mark as critical
          when using 70% and more.
    
    EOH
    }
    
    # ----------------------------------------------------------------------
    # MAIN
    # ----------------------------------------------------------------------
    
    
    # ----- check limits
    
    while [[ "$#" -gt 0 ]]; do case $1 in
        -h|--help)      showHelp; exit 0;;
        -m|--mode)      sMode=$2; shift ;shift;;
        -c|--critcal)   iCriticalLimit=$2; shift ;shift;;
        -w|--warning)   iWarnLimit=$2; shift ;shift;;
        *) echo "ERROR: Unknown parameter: $1"; showHelp; exit 1;
    esac; done
    
    case "${sMode}" in
    
        "cpu")
            scanfield="pcpu"
            unit="Cpu"
    
            test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitCpu
            test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitCpu
            ;;
        "mem")
            scanfield="pmem"
            unit="Memory"
            test $iWarnLimit     -eq 0 && iWarnLimit=$iWarnLimitMem
            test $iCriticalLimit -eq 0 && iCriticalLimit=$iCriticalLimitMem
            ;;
        *)
            echo ERRROR: [${sMode}] is an INVALID mode
            showHelp
            ph.abort
    
    esac
    
    
    if [ "$sMode" = "cpu" ]; then
        typeset -i iCpucount=$(cat /proc/cpuinfo | grep vendor_id | wc -l)
        if [ $iMulti -eq 0 ]; then
            ph.abort "ABORT: count of CPUs was not detected."
        fi
        iWarnLimit=$iWarnLimit*$iCpucount
        iCriticalLimit=$iCriticalLimit*$iCpucount
    fi
    
    # ----- read processlist and create helper table
    
    out+=$(
        for line in $(ps -eo ${scanfield},pid,comm | awk -v limit=$iWarnLimit '$1 > limit {print $1":"$2":"$3 }' | sort -k 1 -t ":" -n -r )
        do
    
            Value=$(echo $line | cut -f 1 -d ':')
            iValue=$(echo $line | cut -f 1 -d ':' | sed "s#\..*##")
            iProcessId=$(echo $line | cut -f 2 -d ':')
            processname=$(echo $line | cut -f 3- -d ':')
    
            test $iValue -lt $iCountWarning  && break
    
            test $iValue -ge $iCountWarning  && status="Warning"
            test $iValue -ge $iCriticalLimit && status="Critical"
    
            # echo "${status}: $Value % $unit by process id $iProcessId $processname"
            printf "%-9s %5s %s %-10s %6s  %s\n" ${status} $Value % $unit $iProcessId $processname
    
        done
    )
    
    
    # ----- Status output
    
    iCountCritical=$( grep -c "^Critical" <<< "$out" )
    iCountWarning=$(  grep -c "^Warning" <<< "$out" )
    
    if [ $iCountCritical -gt 0 ]; then
        ph.setStatus "critical"
        ph.status "$iCountCritical processes use $iCriticalLimit % (critical) or more"
        echo "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
        echo
        echo "Level       Usage        process id  process"
        echo "-----------------------------------------------------"
        echo "$out"
    elif [ $iCountWarning -gt 0 ]; then
        ph.setStatus "warning"
        ph.status "$iCountWarning processes use $iWarnLimit % .. $iCriticalLimit % $unit"
        echo
        echo "Level       Usage        process id  process"
        echo "-----------------------------------------------------"
        echo "$out"
    else
        ph.setStatus "ok"
        ph.status "all processes below warning limit $iWarnLimit % $unit .. (and critical limit $iCriticalLimit %)"
    fi
    
    # ----- additional infos
    
    case "${sMode}" in
        "cpu")
            echo
            echo Cpus detected: $iCpucount
            ;;
        "mem")
            echo
            grep -E '^(MemTotal|MemFree|MemAvailable|Boffers|Cached):' /proc/meminfo
            ;;
    esac
    
    ph.exit
    
    # ----------------------------------------------------------------------