1#!/bin/sh
2
3# This is a self extracting archive designed for the TREC 2005 spam
4# filtering track. It deletes several directories and files in the current
5# working dir, which will then be replaced.
6#
7# We construct the archive by cat SFX dbacl-xxx.tar.gz > dbacl-xxx.sfx
8#
9# This script accepts one optional command line argument. If present,
10# we check whether a corresponding file named OPTIONS.zzz is present
11# in the TREC subdirectory, where zzz is the argument value. This
12# file is used to overwrite the OPTIONS file containing the switches
13# for the simulation. In this way, we can self-install several
14# variations of the classifier.
15#
16# If no options are given, we use OPTIONS.default if it exists in the
17# current working directory. Each time a command line option is presented,
18# the OPTIONS.default file is (re)created automatically from that argument.
19#
20# If no options are give, and no OPTIONS.default exists in the current
21# working directory, then we present a help message and a list of
22# possible OPTIONS.
23#
24#
25
26NAME=`basename $0 .TREC.sfx.sh`
27W=$PWD
28SKIP=`grep -a -n -m 1 '^__ARCHIVE_FOLLOWS__' $0 | sed 's/:.*//'`
29
30function usage() {
31	echo "Usage: $0 [XXX]"
32	echo ""
33	echo "Welcome to the dbacl TREC/spam evaluation package."
34	echo "This script unpacks automatically into the current directory"
35	echo "a fresh copy of all the files and programs expected by the"
36	echo "TREC 2005 spamjig (spam filter evaluation system), such as the"
37	echo "initialize script."
38	echo ""
39	echo "If you are seeing this message, then you have yet to select"
40	echo "which algorithms and runtime options are to be tested in this"
41	echo "instance of the spamjig test run. All you have to do is rerun"
42	echo "the present script with the appropriate value of XXX chosen"
43	echo "from the list below. This will copy a file named OPTIONS.default"
44	echo "into the current directory which will lock your chosen options"
45	echo "for all required scripts."
46	echo ""
47	echo "You can change options later by rerunning this script with"
48	echo "another value of XXX, or even edit the OPTIONS.default file"
49	echo "directly if you know what you are doing."
50	echo ""
51	echo "Possible values for XXX:"
52
53	tail -n +`expr $SKIP + 1` $0 | gunzip -c | tar t | grep 'TREC/OPTIONS.' | sed 's/^.*OPTIONS.//'
54
55}
56
57function warn_fs {
58    echo "################################################################"
59    echo "# A ramdisk speeds up simulation and protects your disks.      #"
60    echo "# If you haven't done so already, use a ramdisk!               #"
61    echo "# % mkdir /path/to/ramdisk                                     #"
62    echo "# % mount tmpfs /path/to/ramdisk -t tmpfs -o size=150m         #"
63    echo "# % cp OPTTIONS.default dbacl-xxx.TREC.sfx.sh /path/to/ramdisk #"
64    echo "# % cd /path/to/ramdisk                                        #"
65    echo "# % ./dbacl-xxx.TREC.sfx.sh                                    #"
66    echo "# (run simulation, copy results file away from ramdisk)        #"
67    echo "# % umount /path/to/ramdisk                                    #"
68    echo "################################################################"
69    echo "Press Ctrl-C to abort, or Enter to proceed."
70    read
71}
72
73
74OPTARG=$1
75if [ -z $OPTARG ]; then
76	OPTARG=default
77	if [ ! -e "$W/OPTIONS.$OPTARG" ]; then
78		usage
79		exit 1
80	fi
81fi
82
83echo ""
84echo "Installing $NAME - please wait...."
85echo ""
86
87warn_fs
88
89rm -rf "$W/$NAME"
90tail -n +`expr $SKIP + 1` $0 | gunzip -c | tar x
91
92if [ -d "$W/$NAME" ]; then
93	if [ -n $OPTARG ]; then
94		if [ -e "$W/$NAME/TREC/OPTIONS.$OPTARG" ]; then
95			cat "$W/$NAME/TREC/OPTIONS.$OPTARG" > "$W/$NAME/TREC/OPTIONS"
96			cp -f "$W/$NAME/TREC/OPTIONS.$OPTARG"  "$W/OPTIONS.default"
97		else
98			echo "No OPTIONS.$OPTARG in $W/$NAME/TREC, trying working directory..."
99			if [ -e "$W/OPTIONS.$OPTARG" ]; then
100			    echo "Found $W/OPTIONS.$OPTARG."
101			    cat "$W/OPTIONS.$OPTARG" > "$W/$NAME/TREC/OPTIONS"
102			else
103			    echo "Could not find $W/OPTIONS.$OPTARG, using defaults."
104			fi
105
106		fi
107	fi
108
109	echo "Installing these options:"
110	echo "---------"
111	cat $W/$NAME/TREC/OPTIONS
112	echo "---------"
113
114	cd "$W/$NAME"
115	./configure "--prefix=$W" && make && make check && make install
116	if [ -x "$W/bin/dbacl" ]; then
117
118	    for f in initialize finalize classify train checkpoint restart; do
119		rm -f "$W/$f" && cp "$W/share/dbacl/TREC/$f" "$W/$f"
120		chmod u+x "$W/$f"
121	    done
122
123	else
124	    echo "Installation did NOT complete successfully."
125	    exit 1
126	fi
127
128	cd "$W"
129	rm -rf "$W/db" && mkdir "$W/db"
130	rm -f "$W/stderr.log"
131
132	. $W/share/dbacl/TREC/OPTIONS
133
134	rm -rf "$W/audit" && mkdir "$W/audit"
135	if [ -f "$W/share/dbacl/TREC/$AUDITCOMMAND" ]; then
136	    cp "$W/share/dbacl/TREC/$AUDITCOMMAND" "$W/$AUDITCOMMAND"
137	    chmod u+x "$W/$AUDITCOMMAND"
138	    "$W/$AUDITCOMMAND" initialize "$W"
139	fi
140
141	"$W/train" ham "$W/share/dbacl/TREC/basic-email"
142	"$W/train" spam "$W/share/dbacl/TREC/basic-email"
143
144	if ! cmp -s "$W/stderr.log" "$W/share/dbacl/TREC/verify-stderr" ; then
145	    echo "Basic learning failed. See stderr.log below:"
146	    echo "---"
147	    cat "$W/stderr.log"
148	    exit 1
149	fi
150
151else
152	echo "There was a problem while extracting the archive."
153	exit 1
154fi
155
156echo "Done!"
157
158exit 0
159
160# no extra characters allowed after this line!
161__ARCHIVE_FOLLOWS__
162