#! /bin/sh

# General style correction and cleanup.
t=__wt.$$
trap 'rm -f $t' 0 1 2 3 13 15

# Parallelize if possible.
xp=""
echo date | xargs -P 20 >/dev/null 2>&1
if test $? -eq 0; then
	xp="-P 20"
fi

# s_style is re-entrant, when run with no parameters it calls itself
# again for each file that needs checking.
if [ $# -ne 1 ]; then
	cd ..

	find bench examples ext src test \
	    -name '*.[chsy]' -o -name '*.in' -o -name '*.dox' -o -name '*.cxx' -o -name '*.cpp' -o -name '*.py' -o -name '*.cmake' -o -name '*.i' |
	sed -e '/Makefile.in/d' \
	    -e '/test\/3rdparty/d' \
	    -e '/test\/checksum\/power8/d' \
	    -e '/checksum\/zseries/d' |
	xargs $xp -n 1 -I{} sh ./dist/s_style {}
else
	# General style correction and cleanup for a single file
	f=$1
	fname=`basename $f`
	t=__wt_s_style.$fname.$$

	if [ ! -e $f ]; then
		echo "$0 error $1 does not exist"
		exit 1;
	fi

	# Ignore styling errors of external libraries.
	if expr "$f" : 'src/' > /dev/null &&
	  ! expr "$f" : 'src/os_win/' > /dev/null &&
	  ! expr "$f" : 'src/docs/' > /dev/null &&
	  ! expr "$f" : 'src/tags' > /dev/null &&
	  ! expr "$f" : 'src/.*/hash_city.*' > /dev/null &&
	  ! expr "$f" : 'src/.*/huffman.*' > /dev/null &&
	  ! expr "$f" : 'src/checksum.*' > /dev/null; then

		# Camel case style is not allowed.
		if egrep -r -n '\b[a-z]+[A-Z]' $f | egrep -v ':[     ]+\*|"|UNCHECKED_STRING'; then
			echo "$f: Styling requires variables that use underscores to separate parts of a name instead of camel casing.";
		fi
		
		# Return values should be wrapped in parentheses.
		egrep -r -n '^[^*/"]*[[:space:]]*return [^(]' $f > $t;
		test -s $t && {
			echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
			echo 'Add parentheses to return values indicated below in file '"$f"
			echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
			cat $t
		}
	fi

	# Remove non-ascii characters and substitute some expressions with accepted version.
	tr -cd '[:alnum:][:space:][:punct:]' < $f |
	sed -e '/for /!s/;;$/;/' \
	    -e 's/(EOPNOTSUPP)/(ENOTSUP)/' \
	    -e 's/(unsigned)/(u_int)/' \
	    -e 's/hazard reference/hazard pointer/' >$t

	cmp $t $f > /dev/null 2>&1 || (echo "modifying $f" && cp $t $f)

	# Finding paired typos in the comments of different file types and excluding invalid edge cases.
	if [ "${fname##*.}" = "py" ]; then
		egrep '#.*[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f > $t
	elif [ "${fname##*.}" = "c" ] || [ "${fname##*.}" = "h" ]; then
		egrep '/?\*.*[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f | egrep -v -e "@" -e "long long" > $t
	else
		egrep '[[:space:]]\b([a-zA-Z]+)[[:space:]]\b\1[[:space:]\.]' $f | egrep -v -e "@" -e "^\(" > $t
	fi

	test -s $t && {
		echo "paired typo"
		echo "============================"
		echo $f
		cat $t
	}

	extension="${fname##*.}"
	if [ "x$extension" = "xdox" ]; then
		exit 0;
	fi

	if grep "^[^}]*while (0);" $f > $t; then
		echo "$f: while (0) has trailing semi-colon"
		cat $t
	fi

	if grep WT_DEADLOCK $f | grep -v '#define.WT_DEADLOCK' > $t; then
		echo "$f: WT_DEADLOCK deprecated in favor of WT_ROLLBACK"
		cat $t
	fi

	if grep 'sizeof(WT_UPDATE)' $f > $t; then
		echo "$f: Use WT_UPDATE_SIZE rather than sizeof(WT_UPDATE)"
		cat $t
	fi

	if ! expr "$f" : 'examples/c/.*' > /dev/null &&
	   ! expr "$f" : 'ext/.*' > /dev/null &&
	   ! expr "$f" : 'src/include/wiredtiger_ext\.h' > /dev/null &&
	   ! expr "$f" : 'src/txn/txn_ext\.c' > /dev/null &&
	   grep WT_TXN_ISO_ $f; then
		echo "$f: WT_TXN_ISO_XXX constants only for the extension API"
		cat $t
	fi

	if ! expr "$f" : 'src/include/queue\.h' > /dev/null &&
	    egrep 'STAILQ_|SLIST_|\bLIST_' $f ; then
		echo "$f: use TAILQ for all lists"
	fi

	if ! expr "$f" : 'src/include/extern.h' > /dev/null &&
	   ! expr "$f" : 'src/include/extern_posix.h' > /dev/null &&
	   ! expr "$f" : 'src/include/extern_win.h' > /dev/null &&
	   ! expr "$f" : 'src/include/os.h' > /dev/null &&
	   ! expr "$f" : 'src/os_common/.*' > /dev/null &&
	   ! expr "$f" : 'src/os_posix/.*' > /dev/null &&
	   ! expr "$f" : 'src/os_win/.*' > /dev/null &&
	    grep '__wt_errno' $f > $t; then
		echo "$f: upper-level code should not call __wt_errno"
		cat $t
	fi

	if ! expr "$f" : 'examples/c/.*' > /dev/null &&
	   ! expr "$f" : 'src/include/os.h' > /dev/null &&
	    egrep "%[0-9]*zu" $f | grep -v 'SIZET_FMT' > $t; then
		echo "$f: %zu needs to be fixed for Windows"
		cat $t
	fi

	egrep -w 'off_t' $f > $t
	test -s $t && {
		echo "$f: off_t type declaration, use wt_off_t"
		cat $t
	}

	if ! expr "$f" : 'src/include/misc.h' > /dev/null &&
	    grep '[[:space:]]qsort(' $f > $t; then
		echo "$f: qsort call, use WiredTiger __wt_qsort instead"
		cat $t
	fi

	if ! expr "$f" : 'src/.*/os_setvbuf.c' > /dev/null &&
	    egrep -w 'setvbuf' $f > $t; then
		echo "$f: setvbuf call, use WiredTiger library replacements"
		cat $t
	fi

	if ! expr "$f" : 'examples/c/*' > /dev/null &&
	   ! expr "$f" : '.*cxx' > /dev/null &&
	   ! expr "$f" : '.*cpp' > /dev/null &&
	   ! expr "$f" : 'ext/*' > /dev/null &&
	   ! expr "$f" : 'src/os_posix/os_snprintf.c' > /dev/null &&
	    egrep '[^a-z_]snprintf\(|[^a-z_]vsnprintf\(' $f > $t; then
		echo "$f: snprintf call, use WiredTiger library replacements"
		cat $t
	fi

	# If we don't have matching pack-begin and pack-end calls, we don't get
	# an error, we just get a Windows performance regression. Using awk and
	# not wc to ensure there's no whitespace in the assignment.
	egrep WT_PACKED_STRUCT $f > $t
	cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t`
	test `expr "$cnt" % 2` -ne 0 && {
		echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines"
		cat $t
	}

	# Direct calls to functions we're not supposed to use in the library.
	# We don't check for all of them, just a few of the common ones.
	if ! expr "$f" : 'bench/.*' > /dev/null &&
	   ! expr "$f" : 'examples/.*' > /dev/null &&
	   ! expr "$f" : 'ext/.*' > /dev/null &&
	   ! expr "$f" : 'test/.*' > /dev/null &&
	   ! expr "$f" : '.*/utilities/.*' > /dev/null; then
		if ! expr "$f" : '.*/os_alloc.c' > /dev/null &&
		     egrep '[[:space:]]free[(]|[[:space:]]strdup[(]|[[:space:]]strndup[(]|[[:space:]]malloc[(]|[[:space:]]calloc[(]|[[:space:]]realloc[(]|[[:space:]]sprintf[(]' $f > $t; then
			test -s $t && {
				echo "$f: call to illegal function"
				cat $t
			}
		fi
		if ! expr "$f" : '.*/os_strtouq.c' > /dev/null &&
		     egrep '[[:space:]]strtouq[(]' $f > $t; then
			test -s $t && {
				echo "$f: call to illegal function"
				cat $t
			}
		fi
		if egrep '[[:space:]]exit[(]' $f > $t; then
			test -s $t && {
				echo "$f: call to illegal function"
				cat $t
			}
		fi
	fi

	# Declaration of an integer return variable.
	if ! expr "$f" : 'bench/.*' > /dev/null &&
	   ! expr "$f" : 'examples/.*' > /dev/null &&
	   ! expr "$f" : 'test/.*' > /dev/null &&
	   ! expr "$f" : 'ext/.*' > /dev/null; then
	   # This regex can return false positives on functions that take ret as
	   # an argument. Filter out matches that contain brackets.
		egrep -w ret $f | egrep 'int.*[, ]ret[,;]' | egrep -v '[()]' > $t
		test -s $t && {
			echo "$f: explicit declaration of \"ret\""
			cat $t
		}
	fi

	# Use of ctype functions that sign extend their arguments.
	if ! expr "$f" : 'bench/.*' > /dev/null &&
	   ! expr "$f" : 'test/csuite/.*' > /dev/null &&
	   ! expr "$f" : 'examples/.*' > /dev/null &&
	   ! expr "$f" : 'ext/.*' > /dev/null &&
	   ! expr "$f" : '.*py' > /dev/null &&
	   ! expr "$f" : 'src/include/ctype.i' > /dev/null; then
		if egrep '(#include.*["</]ctype.h[">]|\b(is(alnum|alpha|cntrl|digit|graph|lower|print|punct|space|upper|xdigit)|to(lower|toupper))\()' $f > $t; then
			test -s $t && {
				echo "$f: direct use of ctype.h functions, instead of ctype.i equivalents"
				cat $t
			}
		fi
	fi
fi

exit 0
