1#! /bin/sh
2
3#================================================================
4# estxlshtml
5# Strip a file of MS-Excel and extract its text as HTML.
6#================================================================
7
8
9# set variables
10LANG=C ; export LANG
11LC_ALL=C ; export LC_ALL
12progname="estxlshtml"
13tmpfile="/tmp/$progname-$$.xls"
14nulldev="/dev/null"
15infile="$1"
16outfile="$2"
17
18
19# show help message
20if [ "$1" = "--help" ]
21then
22  printf 'Strip a file of MS-Excel and extract its text as HTML.\n'
23  printf '\n'
24  printf 'Usage:\n'
25  printf '  %s [infile] [outfile]\n' "$progname"
26  printf '  estindex register -xsuf .xls \\\n'
27  printf '      application/vnd.ms-excel %s casket\n' "$progname"
28  printf '\n'
29  exit 0
30fi
31
32
33# function to remove the temporary file
34tmpclean(){
35  rm -rf "$tmpfile"
36}
37
38
39# function to create the temporary file for input
40output(){
41  if [ -z "$outfile" ]
42  then
43    cat
44  else
45    cat >> "$outfile"
46  fi
47}
48
49
50# set the exit trap
51trap tmpclean 1 2 3 13 15
52
53
54# check the input file existence
55if [ -n "$infile" ] && [ ! -f "$infile" ]
56then
57  printf '%s: %s: no such file\n' "$progname" "$infile"
58  exit 1
59fi
60
61
62# create the temporary file
63if [ -z "$infile" ]
64then
65  cat > "$tmpfile"
66  infile="$tmpfile"
67fi
68
69
70# output the result
71xlhtml -xml "$infile" 2> "$nulldev" |
72iconv -f UTF-8 -t UTF-8 -c |
73awk '
74BEGIN {
75  title = ""
76  lnum = 0
77}
78{
79  if(match($0, /<pagetitle>/) > 0){
80    title = $0
81    gsub(/<[^>]*>/, "", title)
82    sub(/^[ \t]*/, "", title)
83    sub(/[ \t]*$/, "", title)
84  } else if(match($0, /<cell/) > 0){
85    sub(/.*<cell[^>]*>/, "");
86    sub(/<\/cell>.*/, "");
87    gsub(/<[^>]*>/, " ", $0)
88    sub(/^[ \t]*/, "", $0)
89    sub(/[ \t]*$/, "", $0)
90    body[lnum++] = $0
91  }
92}
93END {
94  printf "<html>\n"
95  printf "<head>\n"
96  printf "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n"
97  if(length(title) > 0){
98    printf("<title>%s</title>\n", title)
99  }
100  printf "</head>\n"
101  printf "<body>\n"
102  for(i = 0; i < lnum; i++){
103    if(length(body[i]) > 1){
104      printf("<div>%s</div>\n", body[i]);
105    }
106  }
107  printf "</body>\n"
108  printf "</html>\n"
109}
110' |
111output
112
113
114# clean up the temporary directory
115tmpclean
116
117
118# exit normally
119exit 0
120
121
122
123# END OF FILE
124