1#! /bin/sh 2 3#================================================================ 4# estxdwhtml 5# Strip a file of DocuWorks and extract its text as HTML. 6#================================================================ 7 8 9# set variables 10LANG=C ; export LANG 11LC_ALL=C ; export LC_ALL 12progname="estxdwhtml" 13tmpfile="/tmp/$progname-$$.xdw" 14dummyfile="/tmp/$progname-$$.dummy" 15nulldev="/dev/null" 16infile="$1" 17outfile="$2" 18 19 20# show help message 21if [ "$1" = "--help" ] 22then 23 printf 'Strip a file of DocuWorks and extract its text as HTML.\n' 24 printf '\n' 25 printf 'Usage:\n' 26 printf ' %s [infile] [outfile]\n' "$progname" 27 printf ' estindex register -xsuf .xdw \\\n' 28 printf ' application/vnd.fujixerox.docuworks %s casket\n' "$progname" 29 printf '\n' 30 exit 0 31fi 32 33 34# function to remove the temporary file 35tmpclean(){ 36 rm -rf "$tmpfile" 37} 38 39 40# function to create the temporary file for input 41output(){ 42 if [ -z "$outfile" ] 43 then 44 cat 45 else 46 cat >> "$outfile" 47 fi 48} 49 50 51# set the exit trap 52trap tmpclean 1 2 3 13 15 53 54 55# check the input file existence 56if [ -n "$infile" ] && [ ! -f "$infile" ] 57then 58 printf '%s: %s: no such file\n' "$progname" "$infile" 59 exit 1 60fi 61 62 63# create the temporary file 64if [ -z "$infile" ] 65then 66 cat > "$tmpfile" 67 infile="$tmpfile" 68fi 69 70 71# output the result 72xdw2text -p "$infile" "$dummyfile" 2> "$nulldev" | 73iconv -f Shift_JIS -t UTF-8 -c | 74awk ' 75BEGIN { 76 printf("<html>\n") 77 printf("<head>\n") 78 printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n") 79 printf("</head>\n") 80 printf("<body>\n") 81 printf("<p>") 82} 83{ 84 gsub(/&/, "\\&", $0) 85 gsub(/</, "\\<", $0) 86 gsub(/>/, "\\>", $0) 87 printf("%s", $0) 88} 89END { 90 printf("</p>\n") 91 printf("</body>\n") 92 printf("</html>\n") 93} 94' | 95output 96 97 98# clean up the temporary directory 99tmpclean 100 101 102# exit normally 103exit 0 104 105 106 107# END OF FILE 108