1#! /bin/sh 2 3#================================================================ 4# estxlshtml 5# Strip a file of MS-Excel and extract its text as HTML. 6#================================================================ 7 8 9# set variables 10LANG=C ; export LANG 11LC_ALL=C ; export LC_ALL 12progname="estxlshtml" 13tmpfile="/tmp/$progname-$$.xls" 14nulldev="/dev/null" 15infile="$1" 16outfile="$2" 17 18 19# show help message 20if [ "$1" = "--help" ] 21then 22 printf 'Strip a file of MS-Excel and extract its text as HTML.\n' 23 printf '\n' 24 printf 'Usage:\n' 25 printf ' %s [infile] [outfile]\n' "$progname" 26 printf ' estindex register -xsuf .xls \\\n' 27 printf ' application/vnd.ms-excel %s casket\n' "$progname" 28 printf '\n' 29 exit 0 30fi 31 32 33# function to remove the temporary file 34tmpclean(){ 35 rm -rf "$tmpfile" 36} 37 38 39# function to create the temporary file for input 40output(){ 41 if [ -z "$outfile" ] 42 then 43 cat 44 else 45 cat >> "$outfile" 46 fi 47} 48 49 50# set the exit trap 51trap tmpclean 1 2 3 13 15 52 53 54# check the input file existence 55if [ -n "$infile" ] && [ ! -f "$infile" ] 56then 57 printf '%s: %s: no such file\n' "$progname" "$infile" 58 exit 1 59fi 60 61 62# create the temporary file 63if [ -z "$infile" ] 64then 65 cat > "$tmpfile" 66 infile="$tmpfile" 67fi 68 69 70# output the result 71xlhtml -xml "$infile" 2> "$nulldev" | 72iconv -f UTF-8 -t UTF-8 -c | 73awk ' 74BEGIN { 75 title = "" 76 lnum = 0 77} 78{ 79 if(match($0, /<pagetitle>/) > 0){ 80 title = $0 81 gsub(/<[^>]*>/, "", title) 82 sub(/^[ \t]*/, "", title) 83 sub(/[ \t]*$/, "", title) 84 } else if(match($0, /<cell/) > 0){ 85 sub(/.*<cell[^>]*>/, ""); 86 sub(/<\/cell>.*/, ""); 87 gsub(/<[^>]*>/, " ", $0) 88 sub(/^[ \t]*/, "", $0) 89 sub(/[ \t]*$/, "", $0) 90 body[lnum++] = $0 91 } 92} 93END { 94 printf "<html>\n" 95 printf "<head>\n" 96 printf "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n" 97 if(length(title) > 0){ 98 printf("<title>%s</title>\n", title) 99 } 100 printf "</head>\n" 101 printf "<body>\n" 102 for(i = 0; i < lnum; i++){ 103 if(length(body[i]) > 1){ 104 printf("<div>%s</div>\n", body[i]); 105 } 106 } 107 printf "</body>\n" 108 printf "</html>\n" 109} 110' | 111output 112 113 114# clean up the temporary directory 115tmpclean 116 117 118# exit normally 119exit 0 120 121 122 123# END OF FILE 124