1#! /bin/sh
2
3# rundig.sh
4# a script to drive ht://Dig updates
5# Copyright (c) 1998 Colin Viebrock <cmv@shmooze.net>
6# Copyright (c) 1998-1999 Geoff Hutchison <ghutchis@wso.williams.edu>
7# Updated for ht://Dig 3.2.0b3 Feb 2001, Copyright (c) 2001 Geoff Hutchison
8# Distributed under the GNU GPL version 2 or later
9
10if [ "$1" = "-v" ]; then
11    verbose="-v"
12fi
13
14# This is the directory where htdig lives
15BASEDIR=/export/htdig
16
17# This is the db dir
18DBDIR=$BASEDIR/db/
19
20# This is the name of a temporary report file
21REPORT=/tmp/htdig.report
22
23# This is who gets the report
24REPORT_DEST="webmaster@yourdomain.com"
25export REPORT_DEST
26
27# This is the subject line of the report
28SUBJECT="cron: htdig report for domain"
29
30# This is the name of the conf file to use
31CONF=htdig.conf
32
33# This is the directory htdig will use for temporary sort files
34TMPDIR=$DBDIR
35export TMPDIR
36
37# This is the PATH used by this script. Change it if you have problems
38#  with not finding wc or grep.
39PATH=/usr/local/bin:/usr/bin:/bin
40
41##### Dig phase
42STARTTIME=`date`
43echo Start time: $STARTTIME
44echo rundig: Start time:   $STARTTIME > $REPORT
45$BASEDIR/bin/htdig $verbose -s -a -c $BASEDIR/conf/$CONF >> $REPORT
46TIME=`date`
47echo Done Digging: $TIME
48echo rundig: Done Digging: $TIME >> $REPORT
49
50##### Purge Phase
51# (clean out broken links, etc.)
52$BASEDIR/bin/htpurge $verbose -a -c $BASEDIR/conf/$CONF >> $REPORT
53TIME=`date`
54echo Done Purging: $TIME
55echo rundig: Done Purging: $TIME >> $REPORT
56
57##### Cleanup Phase
58# To enable htnotify or the soundex search, uncomment the following lines
59# $BASEDIR/bin/htnotify $verbose >>$REPORT
60# $BASEDIR/bin/htfuzzy $verbose soundex
61# To get additional statistics, uncomment the following line
62# $BASEDIR/bin/htstat $verbose >>$REPORT
63
64# Move 'em into place. Since these are only used by htdig for update digs
65# and we always use -a, we just leave them as .work
66# mv $DBDIR/db.docs.index.work $DBDIR/db.docs.index
67# (this is just a mapping from a URL to a DocID)
68# We need the .work for next time as an update dig, plus the copy for searching
69cp $DBDIR/db.docdb.work $DBDIR/db.docdb
70cp $DBDIR/db.excerpts.work $DBDIR/db.excerpts
71cp $DBDIR/db.words.db.work $DBDIR/db.words.db
72test -f $DBDIR/db.words.db.work_weakcmpr &&
73  cp $DBDIR/db.words.db.work_weakcmpr $DBDIR/db.words.db_weakcmpr
74
75END=`date`
76echo End time: $END
77echo rundig: End time:     $END >> $REPORT
78echo
79
80# Grab the important statistics from the report file
81# All lines begin with htdig: or htmerge:
82fgrep "htdig:" $REPORT
83echo
84fgrep "htmerge:" $REPORT
85echo
86fgrep "rundig:" $REPORT
87echo
88
89WC=`wc -l $REPORT`
90echo Total lines in $REPORT: $WC
91
92# Send out the report ...
93mail -s "$SUBJECT - $STARTTIME" $REPORT_DEST < $REPORT
94
95# ... and clean up
96rm $REPORT
97