#!/bin/bash
# JLdL 19Jan11.
#
# Copyright (C) 2009-2011 by Jorge L. deLyra <delyra@fma.if.usp.br>.
# This program may be copied and/or distributed freely. See the
# _ terms and conditions in /usr/share/doc/<package>/copyright.
#
# Clean the user-account backup area, by finding and removing
# _ second-layer backup files which are identical to the
# _ corresponding first-layer files; the removal is global,
# _ not user-specific, and hence this program is meant to
# _ be run only as root, by system administrators.
#
# Trap the "[Ctrl]-C" and "kill -15" signals
# _ and do a clean exit if they come.
trap cleanexit SIGINT SIGTERM
#
# Record the name this script was called with.
name=`basename $0`
#
# Initialize variables for the configuration file.
conflag=0
#
# Set the default configuration file.
confile="/etc/quotas.conf"
#
# Set the default value for the action flag.
action=l
#
# Set a variable with the tab character.
tab="`echo -e '\t'`"
#
# Process the command-line arguments.
for cla in $* ; do
    #
    # Detect options.
    if [ "`echo -n $cla | cut -c 1`" == "-" ] ; then
	#
	# If we got here with conflag up, there is an error.
	if [ $conflag == 1 ] ; then
	    echo "${name}: ERROR: option -C requires an argument"
	    exit 1
	fi
	#
	# Now process the options.
	case "$cla" in
	    "-h"|"--help")
		#
		# Print a usage message.
		echo "usage: $name [-C <config>] [-l|-k]"
		echo "       -C: use alternate configuration file <config>"
		echo "       -l (list): show what would be done"
		echo "       -k (kill): actually do the actions"
		exit 0
		;;
	    "-C"|"--Config-file")
		#
		# Raise the flag.
		conflag=1
		;;
	    "-l"|"--list")
		#
		# Show what would be done.
		action=l
		;;
	    "-k"|"--kill")
		#
		# Actually do the actions.
		action=k
		;;
	    *)
		#
		# Print an error message.
		echo "${name}: ERROR: unknown option $cla; try -h to get help"
		exit 1
		;;
	esac
    #
    # Process non-option arguments.
    else
	#
	# Get the arguments of options.
	if [ $conflag == 1 ] ; then
	    #
	    # Set the configuration file.
	    confile=$cla
	    #
	    # Lower the flag.
	    conflag=0
	#
	# This script takes no arguments itself.
	else
	    #
	    # Write out an error message.
	    echo "${name}: ERROR: this program takes no arguments"
	    exit 1
	    #
	fi
    fi
done
#
# If we got here with conflag up, there is an error.
if [ $conflag == 1 ] ; then
    echo "${name}: ERROR: option -C requires an argument"
    exit 1
fi
#
# A function to do a clean exit on interrupt or kill.
function cleanexit()
{
    rm -f $listfile
    exit 0
}
#
# Define the default for the root of the backup tree;
# _ this must be an absolute path.
bckp=/bckp
#
# Define the default for the user home area or areas;
# _ these must be absolute paths.
hpths=/home
#
# Get some data from the configuration file; this should define
# _ the following variables: backup_root and home_path; since
# _ the configuration file uses the tcsh shell syntax, we must
# _ do some syntax interpretation here.
if [ -r $confile ] ; then
    backup_root=`cat $confile | \
		 grep -v "^[ $tab]*#" | \
		 grep 'set *backup_root' | \
		 cut -d= -f2 | \
		 cut -d\" -f2`
    home_path=`cat $confile | \
	       grep -v "^[ $tab]*#" | \
	       grep 'set *home_path' | \
	       cut -d= -f2 | \
	       cut -d\" -f2`
else
    echo "${name}: ERROR: cannot read configuration file $confile"
    exit 1
fi
#
# Get the root of the backup tree from the configuration file;
# _ this is the bash syntax for verifying whether or not a
# _ parameter is set; it is not really necessary here, since
# _ the parameters were set above, but it might be usefull
# _ in the future, if we migrate everything to bash.
if [ ${!backup_root[*]} == 0 ] ; then
    #
    # But only do it if the value of the variable
    # _ is not the empty string.
    if [ -n "$backup_root" ] ; then
	bckp=$backup_root
    fi
fi
#
# Get the user home area or areas from the configuration file;
# _ this is the bash syntax for verifying whether or not a
# _ parameter is set; it is not really necessary here, since
# _ the parameters were set above, but it might be usefull
# _ in the future, if we migrate everything to bash.
if [ ${!home_path[*]} == 0 ] ; then
    #
    # But only do it if the value of the variable
    # _ is not the empty string.
    if [ -n "$home_path" ] ; then
	hpths=$home_path
    fi
fi
#
# Loop over the user home areas.
for hpth in $hpths ; do
    #
    # Define the temporary file for the list.
    thisdate=`date +%s`
    listfile=/tmp/$name-lis`echo $hpth | tr '/' '-'`-$thisdate
    #
    # Check that the backup home directory is there.
    if [ -d $bckp$hpth ] ; then
	#
	# Find and store all the second-layer backup files; note that we write out
	# _ the filenames with a NULL at the end, rather than a newline character;
	# _ the character manipulation that follows is there to avoid errors with
	# _ bad filenames that may contain a newline character; backslashes in
	# _ the filenames are also dealt with.
	find $bckp$hpth/ -type f -and -name \*\.~P~ -print0 | \
	    tr '\n' '\a' | tr '\000' '\n' | \
	    sed -e 's|\\|\\\\|g' > $listfile
	#
    else
	echo "${name}: ERROR: backup home directory $bckp$hpth not found"
	exit 1
    fi
    #
    # Loop over the candidate files in a record-by-record fashion, which avoids
    # _ trouble with files with special characters within their names.
    cat $listfile | \
    while read full ; do
	#
	# Determine the length of the full path and filename.
	leng=`echo -n "$full" | wc -c`
	#
	# Subtract the length of the second-layer termination.
	leng=$(( $leng - 4 ))
	#
	# Define the name of the corresponding first-layer file.
	part=`echo -n "$full" | cut -c 1-$leng`
	#
	# If both exist, then compare them.
	if [ -f "$part" ] && [ -f "$full" ] ; then
	    #
	    # Initialize a state variable.
	    stat=keep
	    #
	    # Compare que two files quietly, and if they are identical
	    # _ (diff returns with 0, no error), then set the state
	    # _ variable so as to remove the second-layer file.
	    diff -q "$part" "$full" >& /dev/null && stat=remove
	    #
	    # Check the state variable.
	    if [ "$stat" == "remove" ] ; then
		#
		# Just print out what would be done.
		if [ "$action" == "l" ] ; then
		    echo "would remove $full"
		#
		# Actually do it.
		else
		    rm -f "$full"
		fi
	        #
	    fi
	    #
	elif [ ! -f "$full" ] ; then
	    #
	    # If the full pathname is not found, then it probably contains
	    # _ a newline character and hence did not survive the character
	    # _ manipulation that we did before, so issue a warning.
	    echo "${name}: WARNING: missing file or bad character in filename:"
	    echo "         "`echo "$full" | tr '\a' '?'`
	    #
	fi
        #
    done
    #
    # Remove the temporary file.
    rm -f $listfile
    #
done
