1#! /bin/sh
2
3#================================================================
4# estautoreg
5# Create an inverted index with breaking up the target list.
6#================================================================
7
8
9# set variables
10LANG=C ; export LANG
11LC_ALL=C ; export LC_ALL
12progname="estautoreg"
13tmpdir="/tmp/$progname.$$"
14alllist="$tmpdir/all"
15unit="65536"
16name="casket"
17wmax="4096"
18PATH="$PATH:/usr/local/bin:." ; export PATH
19PATH="`estsiutil bindir`:$PATH" ; export PATH
20libexecdir=`estsiutil libexecdir`
21datadir=`estsiutil datadir`
22
23
24# show help message
25if [ "$1" = "--help" ]
26then
27  printf 'Create an inverted index with breaking up the target list.\n'
28  printf '\n'
29  printf 'Usage:\n'
30  printf '  %s [num]\n' "$progname"
31  printf '  %s --clean\n' "$progname"
32  printf '\n'
33  exit 0
34fi
35
36
37# clean files
38if [ "$1" = "--clean" ]
39then
40  rm -rf casket estsearch.cgi estsearch.conf estsearch.tmpl estsearch.top
41  exit 0
42fi
43
44
45# parse arguments
46if [ $# -ge 1 ]
47then
48  unit="$1"
49fi
50
51
52# function to remove the temporary directory
53tmpclean(){
54  rm -rf "$tmpdir"
55}
56
57
58# function to sync
59mysync(){
60  printf '%s: synchronizing\n' "$progname"
61  sync ; sync
62  sleep 1
63  sync ; sync
64  sleep 2
65}
66
67
68# set the exit trap
69trap tmpclean 1 2 3 13 15
70
71
72# create the temporary directory
73mkdir -p "$tmpdir"
74
75
76# remove existing database
77rm -rf "$name" "$name"-*
78
79
80# create list
81printf '%s: finding targets\n' "$progname"
82find . -follow -type f | egrep -i '\.(html|htm|txt|asc|eml|mht)$' > "$alllist"
83allnum=`wc -l $alllist | sed -e 's/^ *//g' -e 's/ .*//g'`
84printf '%s: %d files were found\n' "$progname" "$allnum"
85printf '%s: dividing the list\n' "$progname"
86num=0
87split -l "$unit" "$alllist" "$alllist"-
88mysync
89
90
91# register files
92num=0
93ls "$alllist"-* |
94while read list
95do
96  num=`expr $num + 1`
97  index=`printf "$name-%04d" "$num"`
98  estindex register -list "$list" -wmax "$wmax" -rich "$index"
99  mysync
100done
101
102
103# merge element indexes
104if [ `expr $allnum / $unit` -gt 0 ]
105then
106  estindex merge -rich "$name" "$name"-*
107else
108  printf '%s: renaming the temporary index as the real one\n' "$progname"
109  mv -f "$name-0001" "$name"
110fi
111mysync
112
113
114# remove temporary indexes
115if [ -d "$name" ]
116then
117  printf '%s: removing temporary indexes\n' "$progname"
118  rm -rf "$name"-*
119  mysync
120fi
121
122
123# add score information
124if [ -d "$name" ]
125then
126  estindex relate "$name"
127  mysync
128fi
129
130
131# deploy the CGI script and its configurations
132if [ -f "estsearch.cgi" ]
133then
134  printf '%s: estsearch.cgi already exists\n' "$progname"
135else
136  printf '%s: deploying estsearch.cgi and its confgurations\n' "$progname"
137  cp -f "$libexecdir/estsearch.cgi" .
138  cp -f "$datadir/estsearch".* .
139  mysync
140fi
141
142
143# clean up the temporary directory
144printf '%s: cleaning up the temporary directory\n' "$progname"
145tmpclean
146
147
148# exit normally
149printf '%s: finished\n' "$progname"
150exit 0
151
152
153
154# END OF FILE
155