Peek is a fun and useful example of what shell scripts can do. A resource monitoring and troubleshoot script, peek
sleeps for 20 or 30 seconds. Upon waking, it checks (or “peeks at”) system statistics using commands such as free
and vmstat
, and draws graphs representing the computer activity. Across the top of the display is the time of the last update, and the actual CPU and memory usage. Below the graphs, warnings appear about potential problems or bottlenecks.
Because the format of commands such as vmstat
changes over time, the script might have to be modified for certain distributions. This version was designed for Red Hat 7.3. Figure A.1 shows peek in action.
The full source code appears in Listing A.1.
Example A.1. The Full Source Code for peek.sh
#!/bin/bash # # peek.sh # # Show system resource usage graphs. Record and display alarms messages # for important conditions # # by Ken O. Burtch # # CVS: $Id$ # ———————————————————————————————————— shopt -s -o nounset shopt -s -o noclobber # Bash Variables # # Bash may not declare these. Declare them before we test their values. # ———————————————————————————————————— declare -ri COLUMNS # BASH COLUMNS variable declare -ri LINES # BASH LINES variable # Global Declarations # ———————————————————————————————————— declare -rx SCRIPT=${0##*/} # script name # Commands paths # # Change these paths as required for your system declare -rx ping="/bin/ping" # check network connections declare -rx vmstat="/usr/bin/vmstat" # check resource usage declare -rx df="/bin/df" # check disk space declare -rx free="/usr/bin/free" # check free memory declare -rx hostname="/bin/hostname" # name of computer # Settings # # How long vmstat tests and delay between screen updates. These values should # be good for most systems. declare -rix INTERVAL=4 # vmstat interval (seconds) declare -rix SLEEP=23 # sleep time between updates (seconds) # Alarm Settings # # These determine when an alarm is announced. Changes these to suit your # system. declare -rix THRASHING=8000 # context switch limit before warning (faults) declare -rix DISK_LIMIT=2500 # disk bottleneck limit before warning (faults) declare -rix CPU_LIMIT=90 # CPU busy limit before warning (percent) declare -rix NET_LIMIT=9 # LAN ping limit before warning (mseconds) declare -rix MEM_LIMIT=95 # Virtual memory warning limit (percent) declare -rx LAN_HOST=hitomi # Computer to ping over LAN (hostname) declare -rix DISK_GRAPH_TOP=100 # disk interrupt limit (interrupts) # Graph bar values # # All bars start at zero for each of the seven columns declare -i CPU1=0 CPU2=0 CPU3=0 CPU4=0 CPU5=0 CPU6=0 CPU7=0 CPU8=0 declare -i MEM1=0 MEM2=0 MEM3=0 MEM4=0 MEM5=0 MEM6=0 MEM7=0 MEM8=0 declare -i PAG1=0 PAG2=0 PAG3=0 PAG4=0 PAG5=0 PAG6=0 PAG7=0 PAG8=0 declare -i NET1=0 NET2=0 NET3=0 NET4=0 NET5=0 NET6=0 NET7=0 NET8=0 declare -i DSK1=0 DSK2=0 DSK3=0 DSK4=0 DSK5=0 DSK6=0 DSK7=0 DSK8=0 # Alarm Log # # The alarm log holds no more than 8 entries declare LOG1 LOG2 LOG3 LOG4 LOG5 LOG6 LOG7 LOG8 declare LASTLOG= # last alarm message declare SYSTEM_OK= # cleared if there was an alarm declare LOG_UPDATED= # set if the log has changed # Global constants # # Put tput values in variables for speed declare -rx HOME_CURSOR=`tput home` # cursor to top-left corner declare -rx UP=`tput cuu1` # move cursor up declare -rx DOWN=`tput cud1` # move cursor down #declare -rx DOWN=`echo -e "e[B"` declare -rx LEFT=`tput cub1` # move cursor left declare -rx RIGHT=`tput cuf1` # move cursor right declare -rx CEOL=`tput el` # clear to end of the line declare -rx INVERSE=`tput smso` # reverse video on declare -rx INVERSE_OFF=`tput rmso` # reverse video off declare -rx UP10="$UP$UP$UP$UP$UP$UP$UP$UP$UP$UP" declare -rx DOWN10="$DOWN$DOWN$DOWN$DOWN$DOWN$DOWN$DOWN$DOWN$DOWN" # VMSTAT fields declare -ix PROC_R= # Ready processes declare -ix PROC_B= # Blocked processes declare -ix PROC_W= # Swapped out processes declare -ix MEM_SWPD= # Active virtual mem (KB) declare -ix MEM_FREE= # Free virtual mem (KB) declare -ix MEM_BUF= # Buffer space (KB) declare -ix MEM_CACHE= # Disk cache (KB) declare -ix SWAP_SI= # Swapped in processes declare -ix SWAP_SO= # Swapped out processes declare -ix IO_BLKI= # Blocks sent to IO devices declare -ix IO_BLKO= # Blocks received from IO devices declare -ix SYS_SI= # interrupts (incl. the clock) declare -ix SYS_CS= # Context switches declare -ix CPU_US= # User CPU usage declare -ix CPU_SY= # System CPU usage declare -ix IDLE= # CPU idle time # Misc Global Variables declare TIME=`date '+%H:%M:%S'` # current time declare TMP= # temporary results declare TMP2= declare TMP3= declare -i CPU= # current CPU busy (percent) declare -i MEM= # current virtual memory (percent) declare PAGE= # current context switches declare NET= # current network speed (mseconds) declare LEN= # length of a string declare OLD= # for fixing vmstat results declare -i DSK= # disk activity (interrupts) declare -i MEM_TOTAL=0 # Total virtual memory # Functions # VERTICAL BAR # # Draw a vertical bar graph 10 characters high. The bar is drawn with '#' # and the blank areas with '.'. # # Parameter 1 - the value of the graph , a percent # Parameter 2 - the old value of the bar, to speed drawing # ———————————————————————————————————— function vertical_bar { declare -i DOTCNT # number of periods to draw declare -i CNT # number of number signs to draw # If the new bar is equal to the old one in this spot, don't bother # redrawing since it hasn't changed—unless it's zero since we may be # drawing this column for the first time. if [ $1 -ne 0 ] ; then if [ $1 -eq $2 ] ; then printf "%s" "$RIGHT""$RIGHT" return fi fi # Convert the magnitude of the bar to a number between 0 and 10 # Round to the nearest integer # Constrain the percent to 0..10 if it is out of range if [ $1 -gt 100 ] ; then CNT=10 elif [ $1 -lt 0 ]; then CNT=0 else CNT=($1+5)/10 # 0..100 rounded to 0..10 fi # Draw the vertical bar. DOTCNT is the number of periods to draw # to fill in the graph. DOTCNT=10-CNT while [ $((DOTCNT—)) -gt 0 ] ; do printf "%s" ".""$DOWN""$LEFT" done while [ $((CNT—)) -gt 0 ] ; do printf "%s" "#""$DOWN""$LEFT" done printf "%s" "$UP10""$RIGHT""$RIGHT" } readonly -f vertical_bar declare -t vertical_bar # CPU GRAPH # # Update the vertical bar CPU usage graph. # Parmaeter 1 - the new bar height to add to the graph # ———————————————————————————————————— function cpu_graph { if [ -z "$1" ] ; then # debug alarm "$FUNCNAME received a null string" return fi LAST="$CPU8" CPU8="$CPU7" CPU7="$CPU6" CPU6="$CPU5" CPU5="$CPU4" CPU4="$CPU3" CPU3="$CPU2" CPU2="$CPU1" CPU1="$1" vertical_bar "$CPU8" "$LAST" vertical_bar "$CPU7" "$CPU8" vertical_bar "$CPU6" "$CPU7" vertical_bar "$CPU5" "$CPU6" vertical_bar "$CPU4" "$CPU5" vertical_bar "$CPU3" "$CPU4" vertical_bar "$CPU2" "$CPU3" vertical_bar "$CPU1" "$CPU2" printf "%s" "$RIGHT""$RIGHT" } readonly -f cpu_graph declare -t cpu_graph # MEM GRAPH # # Update the vertical bar memory graph # Parmaeter 1 - the new bar height to add to the graph # ———————————————————————————————————— function mem_graph { # Sanity check if [ -z "$1" ] ; then # debug alarm "$FUNCNAME received a null string" return fi # Insert new value into the graph LAST="$MEM8" MEM8="$MEM7" MEM7="$MEM6" MEM6="$MEM5" MEM5="$MEM4" MEM4="$MEM3" MEM3="$MEM2" MEM2="$MEM1" MEM1="$1" # Draw the bars vertical_bar "$MEM8" "$LAST" vertical_bar "$MEM7" "$MEM8" vertical_bar "$MEM6" "$MEM7" vertical_bar "$MEM5" "$MEM6" vertical_bar "$MEM4" "$MEM5" vertical_bar "$MEM3" "$MEM4" vertical_bar "$MEM2" "$MEM3" vertical_bar "$MEM1" "$MEM2" printf "%s" "$RIGHT""$RIGHT" } readonly -f mem_graph declare -t mem_graph # PAGE GRAPH # # Update the vertical bar page fault graph # Parmaeter 1 - the new bar height to add to the graph # ———————————————————————————————————— function page_graph { # Sanity check if [ -z "$1" ] ; then # debug alarm "$FUNCNAME received a null string" return fi # Insert new value into the graph LAST="$PAG8" PAG8="$PAG7" PAG7="$PAG6" PAG6="$PAG5" PAG5="$PAG4" PAG4="$PAG3" PAG3="$PAG2" PAG2="$PAG1" PAG1="$1" # Draw the bars vertical_bar "$PAG8" "$LAST" vertical_bar "$PAG7" "$PAG8" vertical_bar "$PAG6" "$PAG7" vertical_bar "$PAG5" "$PAG6" vertical_bar "$PAG4" "$PAG5" vertical_bar "$PAG3" "$PAG4" vertical_bar "$PAG2" "$PAG3" vertical_bar "$PAG1" "$PAG2" printf "%s" "$RIGHT""$RIGHT" } readonly -f page_graph declare -t page_graph # NET GRAPH # # Update the vertical bar page network traffic # Parmaeter 1 - the new bar height to add to the graph # ———————————————————————————————————— function net_graph { # Sanity check if [ -z "$1" ] ; then # debug alarm "$FUNCNAME received a null string" return fi # Insert new value into the graph LAST="$NET8" NET8="$NET7" NET7="$NET6" NET6="$NET5" NET5="$NET4" NET4="$NET3" NET3="$NET2" NET2="$NET1" NET1="$1" # Draw the bars vertical_bar "$NET8" "$LAST" vertical_bar "$NET7" "$NET8" vertical_bar "$NET6" "$NET7" vertical_bar "$NET5" "$NET6" vertical_bar "$NET4" "$NET5" vertical_bar "$NET3" "$NET4" vertical_bar "$NET2" "$NET3" vertical_bar "$NET1" "$NET2" printf "%s" "$RIGHT""$RIGHT" } readonly -f net_graph declare -t net_graph # DISK GRAPH # # Update the vertical bar page disk interrupts # Parmaeter 1 - the new bar height to add to the graph # ———————————————————————————————————— function disk_graph { # Sanity check if [ -z "$1" ] ; then # debug alarm "$FUNCNAME received a null string" return fi # Insert new value into the graph LAST="$DSK8" DSK8="$DSK7" DSK7="$DSK6" DSK6="$DSK5" DSK5="$DSK4" DSK4="$DSK3" DSK3="$DSK2" DSK2="$DSK1" DSK1="$1" # Draw the bars vertical_bar "$DSK8" "$LAST" vertical_bar "$DSK7" "$DSK8" vertical_bar "$DSK6" "$DSK7" vertical_bar "$DSK5" "$DSK6" vertical_bar "$DSK4" "$DSK5" vertical_bar "$DSK3" "$DSK4" vertical_bar "$DSK2" "$DSK3" vertical_bar "$DSK1" "$DSK2" printf "%s" "$RIGHT""$RIGHT" } readonly -f disk_graph declare -t disk_graph # ALARM # # Add a message to the alarm log. Duplicate messages are discarded. # Parameter 1 = the message to add # ———————————————————————————————————— function alarm { # Anything logged this time around means system isn't OK, even if it was # a repeated message that was suppressed SYSTEM_OK= # Ignore repeated alarms [ "$1" = "$LASTLOG" ] && return LASTLOG="$1" # Add the log message to the list of alarms LOG8="$LOG7" LOG7="$LOG6" LOG6="$LOG5" LOG5="$LOG4" LOG4="$LOG3" LOG3="$LOG2" LOG2="$LOG1" LOG1="$TIME - ""$1" LOG_UPDATED=1 } readonly -f alarm declare -t alarm # ———————————————————————————————————— # Main Script Begins Here # ———————————————————————————————————— # Usage/Help # ———————————————————————————————————— if [ $# -gt 0 ] ; then if [ "$1" = "-h" -o "$1" = "—help" ] ; then printf "%s " "$SCRIPT:$LINENO: Show system resource usage graphs" printf "%s " "There are no parameters for this script" printf " " exit 0 else printf "%s " "$SCRIPT:$LINENO: Unexpected options/parameters" exit 192 fi fi # Sanity checks # ———————————————————————————————————— if [ ! -x "$ping" ] ; then printf "%s " "$SCRIPT:$LINENO: Can't find/execute $ping" >&2 exit 192 fi if [ ! -x "$vmstat" ] ; then printf "%s " "$SCRIPT:$LINENO: Can't find/execute $vmstat" >&2 exit 192 fi if [ ! -x "$df" ] ; then printf "%s " "$SCRIPT:$LINENO: Can't find/execute $df" >&2 exit 192 fi if [ ! -x "$free" ] ; then printf "%s " "$SCRIPT:$LINENO: Can't find/execute $free" >&2 exit 192 fi if [ -z "$LINES" ] ; then printf "%s " "$SCRIPT:$LINENO: LINES is not declared. Export LINES" >&2 exit 192 fi if [ $LINES -lt 35 ] ; then printf "%s " "$SCRIPT:$LINENO: Your display must be >= 35 lines high" >&2 exit 192 fi if [ -z "$COLUMNS" ] ; then printf "%s " "$SCRIPT:$LINENO: COLUMNS is not declare. Export COLUMNS" >&2 exit 192 fi if [ $COLUMNS -lt 80 ] ; then printf "%s " "$SCRIPT:$LINENO: Your display must be >= 80 columns wide" >&2 exit 192 fi if [ -z "$HOME_CURSOR" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot HOME the cursor" >&2 printf "%s " "on this $TERM display" >&2 exit 192 fi if [ -z "$UP" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot move the cursor UP" >&2 printf "%s " "on this $TERM display" >&2 exit 192 fi if [ -z "$DOWN" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot move the cursor DOWN" >&2 printf "%s" " (Some terminfo/termcap databases have cud1/do set" >&2 printf "%s " " improperly)" >&2 printf "%s " " (Try declaring down as DOWN=$'e[B')" >&2 exit 192 fi if [ -z "$LEFT" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot move the cursor LEFT" >&2 printf "%s " "on this $TERM display" >&2 exit 192 fi if [ -z "$RIGHT" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot move the cursor RIGHT" >&2 printf "%s " "on this $TERM display" >&2 exit 192 fi if [ -z "$CEOL" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot clear to end of line" >&2 printf "%s " "on this $TERM display" >&2 exit 192 fi if [ ! -x "$hostname" ] ; then printf "%s " "$SCRIPT:$LINENO: cannot find or execute $hostname" >&2 exit 192 fi declare -rx HOST=`uname -n` # name of computer # Get the total amount of physical memory MEM_TOTAL=`free | grep "^Mem" | tr -s ' ' | cut -d -f2` # Clear the screen tput reset tput clear printf "%s - system monitor script Please wait... " "$0" # ———————————————————————————————————— # Main Loop # ———————————————————————————————————— while true ; do # Get the system statistics with vmstat TMP=`nice -20 $vmstat $INTERVAL 2 2>&1` if [ $? -ne 0 ] ; then alarm `printf "vmstat error: %s" "$TMP" | tail -1` VMSTAT= else VMSTAT=`printf "%s " "$TMP" | tail -1` fi LEN=${#VMSTAT} # length of VMSTAT OLD=0 # Reduce all spaces to single spaces. Trim leading spaces while [ $LEN -ne $OLD ] ; do OLD=$LEN VMSTAT="${VMSTAT// / }" VMSTAT=`printf "%s " "$VMSTAT" | sed 's/^ //g'` LEN=${#VMSTAT} done # Extract the vmstat columns into variables # Use only what we need #PROC_R=`printf "%s " "$VMSTAT" | cut -d -f1` #PROC_B=`printf "%s " "$VMSTAT" | cut -d -f2` PROC_W=`printf "%s " "$VMSTAT" | cut -d -f3` #MEM_SWPD=`printf "%s " "$VMSTAT" | cut -d -f4` MEM_FREE=`printf "%s " "$VMSTAT" | cut -d -f5` MEM_BUF=`printf "%s " "$VMSTAT" | cut -d -f6` MEM_CACHE=`printf "%s " "$VMSTAT" | cut -d -f7` #SWAP_SI=`printf "%s " "$VMSTAT" | cut -d -f8` #SWAP_SO=`printf "%s " "$VMSTAT" | cut -d -f9` #IO_BLKI=`printf "%s " "$VMSTAT" | cut -d -f10` #IO_BLKO=`printf "%s " "$VMSTAT" | cut -d -f11` SYS_SI=`printf "%s " "$VMSTAT" | cut -d -f12` SYS_CS=`printf "%s " "$VMSTAT" | cut -d -f13` CPU_US=`printf "%s " "$VMSTAT" | cut -d -f14` CPU_SY=`printf "%s " "$VMSTAT" | cut -d -f15` IDLE=`printf "%s " "$VMSTAT" | cut -d -f16` # Get ready to update the screen TIME=`date '+%H:%M:%S'` SYSTEM_OK=1 LOG_UPDATED= # Perform the network test # # (some ping's won't return an error if it fails so we'll double check by # making sure "trip" is in the # result. No "trip" means probably an error # message.) TMP=`$ping $LAN_HOST -c 1 2>&1` TMP2="$?" TMP3=`printf "%s" "$TMP" | grep trip` if [ $TMP2 -ne 0 -o -z "$TMP3" ] ; then alarm "Ping to LAN host $LAN_HOST failed: network load unknown" NET=99 else NET=`printf "%s " "$TMP3" | cut -d/ -f4 | cut -d. -f1` if [ ${NET:0:4} = "mdev" ] ; then NET=`printf "%s " "$TMP3" | cut -d/ -f5 | cut -d. -f1` fi fi # Draw stats bar at the top of the screen printf "%s" "$HOME_CURSOR""$INVERSE""$TIME"" " printf "%s" "CPU: $CPU_US""%"" usr, $CPU_SY""%"" sys, $IDLE""%"" idle " printf "%s " "MEM: $MEM_TOTAL total, $MEM_FREE free $CEOL""$INVERSE_OFF" # Compute values for the vertical bar graphs & Alarm tests # CPU busy is the 100% minus the idle percent # Show an alarm if the CPU limit is exceeded three times # or if the kernel usage exceeds 50% CPU=100-IDLE if [ $CPU_SY -gt 50 ] ; then alarm "Kernel bottleneck - system CPU usage $CPU_SY""%" fi if [ $CPU -gt $CPU_LIMIT ] ; then if [ $CPU1 -gt $CPU_LIMIT ] ; then if [ $CPU2 -gt $CPU_LIMIT ] ; then alarm "CPU bottleneck - $CPU""% busy" fi fi fi # Signs of heavy system loads if [ $SYS_CS -gt $THRASHING ] ; then alarm "System Thrashing - $SYS_CS context switches" elif [ "$PROC_W" -gt 0 ] ; then alarm "Swapped out $PROC_W processes" fi # Show an alarm if the net limit is exceeded three times if [ $NET -gt $NET_LIMIT ] ; then if [ $NET1 -gt $NET_LIMIT ] ; then if [ $NET2 -gt $NET_LIMIT ] ; then alarm "Network bottleneck - $NET ms" fi fi fi if [ $SYS_SI -gt $DISK_LIMIT ] ; then alarm "Disk bottleneck - $SYS_SI device interrupts" fi # Physical memory is used memory / total memory # Show an alarm if the limit is exceeded three times MEM=100*(MEM_TOTAL-MEM_FREE)/MEM_TOTAL if [ $MEM2 -gt $MEM_LIMIT ] ; then if [ $MEM1 -gt $MEM_LIMIT ] ; then if [ $MEM -gt $MEM_LIMIT ] ; then alarm "virtual memory shortage - memory $MEM""% in use" fi fi fi # Check temp directory space TMP=`$df 2>&1` if [ $? -ne 0 ] ; then alarm "`printf "%s" "$TMP" | tail -1`" else TMP=`printf "%s" "$TMP" | grep " /tmp"` if [ -n "$TMP" ] ; then # if on a partition TMP=`printf "%s" "$TMP" | grep "100%"` if [ -n "$TMP" ] ; then alarm "$0: /tmp appears to be full" fi fi fi PAGE=SYS_CS/80 # top of Faults graph is 8000 # Draw vertical bar graphs printf " " cpu_graph "$CPU" mem_graph "$MEM" page_graph "$PAGE" net_graph "$((10*NET))" # 100% = 10 nanoseconds printf "%s " "$DOWN10" printf "%s" "CPU Used Memory Thrashing (CS)" printf "%s " " LAN Traffic" DSK=SYS_SI/DISK_GRAPH_TOP disk_graph "$DSK" printf "%s " "$DOWN10" printf "%s " "Disk Traffic" # Nothing new logged? Then show all is well in log # (SYSTEM_OK is cleared in the alarm function) [ -n "$SYSTEM_OK" ] && alarm "System OK" # Show alarm history, but only if it has changed if [ -n "$LOG_UPDATED" ] ; then printf "%s " printf "%s " "$LOG1""$CEOL" printf "%s " "$LOG2""$CEOL" printf "%s " "$LOG3""$CEOL" printf "%s " "$LOG4""$CEOL" printf "%s " "$LOG5""$CEOL" printf "%s " "$LOG6""$CEOL" printf "%s " "$LOG7""$CEOL" printf "%s " "$LOG8""$CEOL" fi # sit quietly for a while before generating next screen nice sleep $SLEEP done exit 0
3.143.3.208