1#! /bin/sh 2 3#================================================================ 4# estautoreg 5# Create an inverted index with breaking up the target list. 6#================================================================ 7 8 9# set variables 10LANG=C ; export LANG 11LC_ALL=C ; export LC_ALL 12progname="estautoreg" 13tmpdir="/tmp/$progname.$$" 14alllist="$tmpdir/all" 15unit="65536" 16name="casket" 17wmax="4096" 18PATH="$PATH:/usr/local/bin:." ; export PATH 19PATH="`estsiutil bindir`:$PATH" ; export PATH 20libexecdir=`estsiutil libexecdir` 21datadir=`estsiutil datadir` 22 23 24# show help message 25if [ "$1" = "--help" ] 26then 27 printf 'Create an inverted index with breaking up the target list.\n' 28 printf '\n' 29 printf 'Usage:\n' 30 printf ' %s [num]\n' "$progname" 31 printf ' %s --clean\n' "$progname" 32 printf '\n' 33 exit 0 34fi 35 36 37# clean files 38if [ "$1" = "--clean" ] 39then 40 rm -rf casket estsearch.cgi estsearch.conf estsearch.tmpl estsearch.top 41 exit 0 42fi 43 44 45# parse arguments 46if [ $# -ge 1 ] 47then 48 unit="$1" 49fi 50 51 52# function to remove the temporary directory 53tmpclean(){ 54 rm -rf "$tmpdir" 55} 56 57 58# function to sync 59mysync(){ 60 printf '%s: synchronizing\n' "$progname" 61 sync ; sync 62 sleep 1 63 sync ; sync 64 sleep 2 65} 66 67 68# set the exit trap 69trap tmpclean 1 2 3 13 15 70 71 72# create the temporary directory 73mkdir -p "$tmpdir" 74 75 76# remove existing database 77rm -rf "$name" "$name"-* 78 79 80# create list 81printf '%s: finding targets\n' "$progname" 82find . -follow -type f | egrep -i '\.(html|htm|txt|asc|eml|mht)$' > "$alllist" 83allnum=`wc -l $alllist | sed -e 's/^ *//g' -e 's/ .*//g'` 84printf '%s: %d files were found\n' "$progname" "$allnum" 85printf '%s: dividing the list\n' "$progname" 86num=0 87split -l "$unit" "$alllist" "$alllist"- 88mysync 89 90 91# register files 92num=0 93ls "$alllist"-* | 94while read list 95do 96 num=`expr $num + 1` 97 index=`printf "$name-%04d" "$num"` 98 estindex register -list "$list" -wmax "$wmax" -rich "$index" 99 mysync 100done 101 102 103# merge element indexes 104if [ `expr $allnum / $unit` -gt 0 ] 105then 106 estindex merge -rich "$name" "$name"-* 107else 108 printf '%s: renaming the temporary index as the real one\n' "$progname" 109 mv -f "$name-0001" "$name" 110fi 111mysync 112 113 114# remove temporary indexes 115if [ -d "$name" ] 116then 117 printf '%s: removing temporary indexes\n' "$progname" 118 rm -rf "$name"-* 119 mysync 120fi 121 122 123# add score information 124if [ -d "$name" ] 125then 126 estindex relate "$name" 127 mysync 128fi 129 130 131# deploy the CGI script and its configurations 132if [ -f "estsearch.cgi" ] 133then 134 printf '%s: estsearch.cgi already exists\n' "$progname" 135else 136 printf '%s: deploying estsearch.cgi and its confgurations\n' "$progname" 137 cp -f "$libexecdir/estsearch.cgi" . 138 cp -f "$datadir/estsearch".* . 139 mysync 140fi 141 142 143# clean up the temporary directory 144printf '%s: cleaning up the temporary directory\n' "$progname" 145tmpclean 146 147 148# exit normally 149printf '%s: finished\n' "$progname" 150exit 0 151 152 153 154# END OF FILE 155