#!/bin/tcsh -f
# JLdL 18Apr12.
#
# Copyright (C) 2005-2012 by Jorge L. deLyra <delyra@fma.if.usp.br>.
# This program may be copied and/or distributed freely. See the
# _ terms and conditions in /usr/share/doc/<package>/copyright.
#
# This program lists the jobs running on the nodes of a cluster;
# _ it tries to access only nodes that are up and running.
#
# Record the name this script was called with.
set name = `basename $0`
#
# Initialize variables for the configuration file.
set conflag = 0
set confile = "/etc/cluster.conf"
#
# Initialize a variable for the '-p' and '-t' options.
set prog = ps
#
# Initialize a variable for the list of nodes.
set nodes = ""
#
# Initialize a flag for the '-i' and '-e' options.
set exclude = 0
#
# We need a variable with the value '$'.
set dollar = '$'
#
# Process the command-line arguments.
foreach cla ( $* )
    #
    # Detect options.
    if ( "`echo -n $cla | cut -c 1`" == "-" ) then
	#
	# If we got here with the argument flag up, there is an error.
	if ( $conflag == 1 ) then
	    echo "${name}: ERROR: option -C requires an argument"
	    exit 1
	endif
	#
	# Now process the options.
	switch ( $cla )
	case "-h":
	case "--help":
	    #
	    # Print a usage message.
	    echo "usage: $name [-C <config>] [-p|-t] [<node> <node> ...]"
	    echo "       -C: use alternate configuration file <config>"
	    echo "       -p: use 'ps' to get node process information"
	    echo "       -t: use 'top' to get node process information"
	    echo "       -i: include only the nodes entered as arguments"
	    echo "       -e: exclude all the nodes entered as arguments"
	    echo "       list the jobs running on nodes in a cluster;"
	    echo "       each <node> must be the number of a node;"
	    echo "       if no nodes are entered, all the nodes that"
	    echo "       seem to be up and running will be listed;"
	    echo "       in order to get the details run 'man $name'"
	    exit 0
	    breaksw
	case "-C":
	case "--Config-file":
	    #
	    # Raise the flag.
	    set conflag = 1
	    breaksw
	case "-p":
	case "--ps":
	    #
	    # Choose 'ps' as the program.
	    set prog = ps
	    breaksw
	case "-t":
	case "--top":
	    #
	    # Choose 'top' as the program.
	    set prog = top
	    breaksw
	case "-i":
	case "--include":
	    #
	    # Lower the exclude flag.
	    set exclude = 0
	    breaksw
	case "-e":
	case "--exclude":
	    #
	    # Raise the exclude flag.
	    set exclude = 1
	    breaksw
	default:
	    #
	    # Print an error message.
	    echo "${name}: ERROR: unknown option $cla; try -h to get help"
	    exit 1
	    breaksw
	endsw
    #
    # Process non-option arguments.
    else
	#
	# Get the arguments of options.
	if ( $conflag == 1 ) then
	    #
	    # Set the configuration file.
	    set confile = $cla
	    #
	    # Lower the flag.
	    set conflag = 0
	#
	# Get the arguments of the program.
	else
	    #
	    # Detect arguments consisting of digits and assume
	    # _ that they are the numbers of existing nodes.
	    if ( "`echo -n $cla | egrep '^[0-9]+$dollar'`" != "" ) then
		set nodes = ( $nodes $cla )
	    #
	    # Any other command-line arguments are illegal.
	    else
		#
		# Print an error message.
		echo "${name}: ERROR: illegal argument $cla"
		exit 1
	    endif
	endif
    endif
end
#
# If we got here with the argument flag up, there is an error.
if ( $conflag == 1 ) then
    echo "${name}: ERROR: option -C requires an argument"
    exit 1
endif
#
# Source the configuration file; this must define the following variables:
# _ nick_name; virt_node.
if ( -r $confile ) then
    source $confile
else
    echo "${name}: ERROR: cannot read configuration file $confile"
    exit 1
endif
#
# Do some simple error detection: check that the necessary
# _ variables are defined in the configuration file.
if ( ! $?nick_name ) then
    echo "${name}: ERROR: nick_name not defined in configuration file"
    exit 1
endif
if ( ! $?virt_node ) then
    echo "${name}: ERROR: virt_node not defined in configuration file"
    exit 1
endif
#
# Give the default value to the optional configuration variable.
if ( ! $?rem_shell ) then
    set rem_shell = rsh
endif
#
# Get the number of digits in the node numbers.
set ndig = `echo -n $virt_node | wc -c`
#
# Define the location of the programs.
set bindir = /usr/bin
#
# If no nodes were entered, acquire the list of running nodes.
if ( "$nodes" == "" ) then
    #
    # Get the number of characters in the nickname.
    set nchr = `echo -n $nick_name | wc -c`
    #
    # Go to the next character.
    @ nchr = $nchr + 1
    #
    # Get the list of the numbers of the nodes which are up.
    set nodes = `$bindir/cruptime -C $confile | grep " up " | cut -d" " -f1 | cut -c $nchr-`
#
# Otherwise, complete the node numbers to the correct number of digits.
else
    #
    # Start a counter for the nodes.
    set j = 0
    #
    # Loop over the nodes entered.
    foreach node ( $nodes )
	#
	# Increment the counter.
	@ j = $j + 1
	#
	# Eliminate leading zeros.
	@ n = $node
	#
	# Count the number of missing digits.
	set m = `echo -n $n | wc -c`
	@ m = $ndig - $m
	#
	# It is an error to have too many digits.
	if ( $m < 0 ) then
	    echo "${name}: ERROR: incorrect node number $node entered"
	    exit 1
	endif
	#
	# Recover the value of the node.
	set node = $n
	#
	# Start a counter for digits.
	set i = 0
	#
	# Add leading zeros.
	while ( $i < $m )
	    @ i = $i + 1
	    set node = 0$node
	end
	#
	# Fix the value within the list of nodes.
	set nodes[$j] = $node
    end
    #
    # If the exclude flag is up, acquire the list of running nodes and
    # _ then exclude the ones entered as command-line arguments.
    if ( $exclude ) then
	#
	# Set an egrep target for the exclusion of the nodes.
	set etarg = ""
	foreach node ( $nodes )
	    if ( $etarg == "" ) then
		set etarg = "$node"
	    else
		set etarg = "$etarg|$node"
	    endif
	end
	#
	# Get the number of characters in the nickname.
	set nchr = `echo -n $nick_name | wc -c`
	#
	# Go to the next character.
	@ nchr = $nchr + 1
	#
	# Get the list of the numbers of the nodes which are up,
	# _ but exclude the nodes entered as arguments.
	set nodes = `$bindir/cruptime -C $confile | grep " up " | cut -d" " -f1 | cut -c $nchr- | egrep -v "$etarg"`
    endif
endif
#
# Loop over the list of nodes.
foreach node ( $nodes )
    #
    # Define another variable in order to format the output.
    set nn = '('$node')'
    #
    # Start a counter for characters.
    @ i = $ndig
    #
    # Add leading and trailing dashes.
    while ( $i < 6 )
	@ i = $i + 1
	@ p = 2 * ( $i / 2 )
	if ( $p == $i ) then
	    set nn = '-'$nn
	else
	    set nn = $nn'-'
	endif
    end
    #
    # Define the hostname of the node.
    set nm = $nick_name$node
    #
    # Use 'ps' as the program to get process information;
    # _ this is faster, but the percentile CPU usages
    # _ shown for each job may not be very precise.
    if ( $prog == ps ) then
	#
	# Print a separator and header.
	echo -n "[0;36m"
	echo "-$nn-PID--%CPU-%MEM---VSZ--RSS-TTY------STAT-START---TIME-COMMAND-$nn-"
	echo -n "[0m"
	#
	# Acquire, colorize and print the data.
	$rem_shell $nm ps aux | egrep -v '^root | ps aux|^Debian-exim ' | \
	    egrep ' R | RN | SN | R\+ | RN\+ | SN\+ ' | cut -c 1-80 | \
	    sed -e 's|[0-9][0-9]*:[0-9][0-9]*|[1;32m&[0m|g' \
		-e 's|[0-9][0-9]*\.[0-9][0-9]* *[0-9][0-9]*\.[0-9][0-9]*|[1;34m&[0m|g' \
		-e 's|^[a-z]*|[1;32m&[0m|g'
    #
    # Use 'top' as the program to get process information;
    # _ this is slower, but the percentile CPU usages
    # _ shown for each job are quite precise.
    else if ( $prog == top ) then
	#
	# Determine the number of processes to look at; take off
	# _ the dot from the load to get the number times 100.
	set np = `$bindir/cruptime -C $confile | grep $nm | tr -s " " | cut -d\) -f2 | cut -d" " -f7 | cut -d, -f1 | sed -e "s|\.||g"`
	#
	# Round the number up.
	@ np = ( $np + 50 ) / 100
	#
	# Print a separator and header.
	echo -n "[0;36m"
	echo "--PID-$nn-PRI--NI--SIZE--RSS-SHARE-STAT-%CPU-%MEM---TIME--COMMAND-$nn-"
	echo -n "[0m"
	#
	# Build an exclusion target for egrep.
	set etarg = ' root | daemon '
	#
	# Set a grep target for the empty line.
	set el = '^$'
	#
	# Acquire, colorize and print the data; in Sarge, one must
	# _ set the TERM variable or top will exit in silly error.
	$rem_shell -n $nm "export TERM=xterm >& /dev/null ; setenv TERM xterm >& /dev/null ; top -b -n 1 | grep -v $el | tail -n +7 | egrep -v '$etarg' | head -$np" | \
	    sed -e 's|[0-9][0-9]*:[0-9][0-9]* |[1;32m&[0m|g' \
		-e 's|[0-9][0-9]*:[0-9][0-9]*\.[0-9][0-9]* |[1;32m&[0m|g' \
		-e 's| [0-9][0-9][0-9][0-9]\([0-9]*\)m |[1;32m&[0m|g' \
		-e 's|[0-9][0-9]*\.[0-9][0-9]* *[0-9][0-9]*\.[0-9][0-9]*|[1;34m&[0m|g' \
		-e 's|\(^ *[0-9][0-9]* *\)\([a-z]*\)|\1[1;32m\2[0m|g'
    endif
end
#
# Print a final separator.
echo -n "[0;36m"
echo "--------------------------------------------------------------------------------"
echo -n "[0m"
