1#! /bin/sh
2
3#================================================================
4# estxdwhtml
5# Strip a file of DocuWorks and extract its text as HTML.
6#================================================================
7
8
9# set variables
10LANG=C ; export LANG
11LC_ALL=C ; export LC_ALL
12progname="estxdwhtml"
13tmpfile="/tmp/$progname-$$.xdw"
14dummyfile="/tmp/$progname-$$.dummy"
15nulldev="/dev/null"
16infile="$1"
17outfile="$2"
18
19
20# show help message
21if [ "$1" = "--help" ]
22then
23  printf 'Strip a file of DocuWorks and extract its text as HTML.\n'
24  printf '\n'
25  printf 'Usage:\n'
26  printf '  %s [infile] [outfile]\n' "$progname"
27  printf '  estindex register -xsuf .xdw \\\n'
28  printf '      application/vnd.fujixerox.docuworks %s casket\n' "$progname"
29  printf '\n'
30  exit 0
31fi
32
33
34# function to remove the temporary file
35tmpclean(){
36  rm -rf "$tmpfile"
37}
38
39
40# function to create the temporary file for input
41output(){
42  if [ -z "$outfile" ]
43  then
44    cat
45  else
46    cat >> "$outfile"
47  fi
48}
49
50
51# set the exit trap
52trap tmpclean 1 2 3 13 15
53
54
55# check the input file existence
56if [ -n "$infile" ] && [ ! -f "$infile" ]
57then
58  printf '%s: %s: no such file\n' "$progname" "$infile"
59  exit 1
60fi
61
62
63# create the temporary file
64if [ -z "$infile" ]
65then
66  cat > "$tmpfile"
67  infile="$tmpfile"
68fi
69
70
71# output the result
72xdw2text -p "$infile" "$dummyfile" 2> "$nulldev" |
73iconv -f Shift_JIS -t UTF-8 -c |
74awk '
75BEGIN {
76  printf("<html>\n")
77  printf("<head>\n")
78  printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n")
79  printf("</head>\n")
80  printf("<body>\n")
81  printf("<p>")
82}
83{
84  gsub(/&/, "\\&amp;", $0)
85  gsub(/</, "\\&lt;", $0)
86  gsub(/>/, "\\&gt;", $0)
87  printf("%s", $0)
88}
89END {
90  printf("</p>\n")
91  printf("</body>\n")
92  printf("</html>\n")
93}
94' |
95output
96
97
98# clean up the temporary directory
99tmpclean
100
101
102# exit normally
103exit 0
104
105
106
107# END OF FILE
108