#!/bin/sh

# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

# Written by Theo Van Dinter <felicity@apache.org>
# Please feel free to mail with any questions. :)

# This script will run a mass-check against all targets specified on the
# commandline, or optionally via a program called "mbox-to-check" located in
# the same directory as this script.
#
# At the end, you will have a ham.log, spam.log, and results.log in the
# current directory.

# Parameters
#
# The first parameter is the directory to use for mass-check (if there
# is no parameter or the path isn't a directory, "../spamassassin-head" will
# be the default).  On my machine, spamassassin-head is the latest version
# from SVN.
#
# All other parameters are passed directly to mass-check.
# Default parameters: --progress -c $SADIR/rules -j $PROCS 
#  (PROCS is determined by running $SADIR/masses/cpucount)
#
# Example:
#  ./run-masses -n
#  ./run-masses -n ham:mbox:mboxfile spam:mbox:mboxfile


# DEFOPTS sets default options to be passes to mass-check.  I like
# "--all --cache", but ymmv.
DEFOPTS=""

# TARGETS should include your list of mbox/mbx/dir/file targets for mass-check
# or if you use the mbox-to-check script, you can leave this blank.
TARGETS=


# go through and limit out resource usage
# (-c)		no core files
# (-v -m)	max memory of 200MB
# (-n)		max open files of 256
# (-u)		max child procs of 32
ulimit -c 0
ulimit -v 204800
ulimit -m 204800
ulimit -n 256
ulimit -u 32

# fix the umask so group write is available
umask 002

# Setup the path as necessary
PATH=/bin:/usr/bin:/usr/local/bin
if [ -d /sw/bin ]; then
  PATH=${PATH}:/sw/bin
fi
export PATH

# Use the specified directory for tests
if [ -z "$1" -o ! -d "$1" ]; then
  DIR=../spamassassin-head
else
  DIR="$1"
  shift
fi

# Where are our files located?
MASS=$DIR/masses
RULES=$DIR/rules

# if mbox-to-check is setup, go ahead and run it for additional targets
CORPUS="`dirname $0`"
if [ -x "$CORPUS/mbox-to-check" ]; then
  TARGETS="$TARGETS `$CORPUS/mbox-to-check $CORPUS`"
fi

# Make sure the old rules.pl file is gone
rm -rf $MASS/tmp

# How many processes should we run at once?
PROCS=`$MASS/cpucount`

# do the mass-check
$MASS/mass-check --progress -c $RULES -j $PROCS $DEFOPTS "$@" $TARGETS

if [ -f spam.log -a -f ham.log ]; then
  echo "Generating hit frequency results"
  $MASS/hit-frequencies -c $RULES -x -p -a > results.log
fi

# remove the parse-rules-for-masses tmp directory
echo "Removing temporary files"
rm -rf ./tmp
