1#!/usr/bin/perl
2
3use lib '.';
4
5#
6#   Grutatxt - A text to HTML (and other things) converter
7#
8#   Angel Ortega <angel@triptico.com> et al.
9#
10#   This software is released into the public domain.
11#   NO WARRANTY. See file LICENSE for details.
12#
13
14use Getopt::Long;
15use locale;
16use Grutatxt;
17
18$VERSION = $Grutatxt::VERSION . ':1';
19
20# input file
21$input_file = '-';
22
23# output file
24$output_file = '>-';
25
26# CSS information
27$css = '';
28$embed_css = 0;
29
30# page title
31$title = '';
32
33# offset for the h? headers
34$header_offset = 0;
35
36# default mode
37$mode = 'HTML';
38
39# use real dl
40$dl_as_dl = 0;
41
42# troff table type
43$table_type = 'allbox';
44
45# abstract line number
46$abstract = 0;
47
48# marks
49@marks = ();
50
51# man page section
52$man_section = 1;
53
54# default tab size in LaTeX mode
55$tabsize = 8;
56
57# avoid time signature
58$no_time_sig = 0;
59
60# disable pure verbatim mode
61$no_pure_verbatim = 0;
62
63# enable TOC
64$toc = 0;
65
66#####################################################################
67
68# parse options
69if (!GetOptions('i|input=s' =>  \$input_file,
70        'o|output=s'        =>  \$output_file,
71        'c|css=s'           =>  \$css,
72        'e|embed-css'       =>  \$embed_css,
73        't|title=s'         =>  \$title,
74        'f|header-offset=s' =>  \$header_offset,
75        'b|table-headers'   =>  \$table_headers,
76        'ct|center-tables'  =>  \$center_tables,
77        'xt|expand-tables'  =>  \$expand_tables,
78        'sp|strip-parens'   =>  \$strip_parens,
79        'ts|tabsize=s'      =>      \$tabsize,
80        'nb|no-body'        =>  \$no_body,
81        'v|version'         =>  \$version,
82        'h|help'            =>  \$usage,
83        'm|mode=s'          =>  \$mode,
84        's|man-section=s'   =>  \$man_section,
85        'docclass=s'        =>  \$latex_docclass,
86        'papersize=s'       =>  \$papersize,
87        'encoding=s'        =>  \$encoding,
88        'dl'                =>  \$dl_as_dl,
89        'no-time-sig'       =>  \$no_time_sig,
90        'no-pure-verbatim'  =>  \$no_pure_verbatim,
91        'toc'               =>  \$toc,
92        'href-new-window'   =>  \$href_new_window
93    ) or $usage) {
94    usage();
95}
96
97if ($version) {
98    print "$VERSION\n"; exit(0);
99}
100
101open I, $input_file or die "Can't open $input_file: $!";
102open O, ">$output_file" or die "Can't create $output_file: $!";
103
104# if utf-8 encoding is wanted, set the filehandles as utf-8
105# so that regular expressions match all characters
106# (this is crap)
107if (defined($encoding) && $encoding =~ /^utf-?8/i) {
108    binmode(I, ":utf8");
109    binmode(O, ":utf8");
110}
111
112$content = join('',<I>);
113close I;
114
115$content_title = '';
116
117# make tab to space conversion only in LaTeX mode
118$tabsize = 0 unless $mode =~ /^latex$/i;
119
120$grutatxt = new Grutatxt(
121        'mode'             => $mode,
122        'header-offset'    => $header_offset,
123        'table-headers'    => $table_headers,
124        'center-tables'    => $center_tables,
125        'expand-tables'    => $expand_tables,
126        'strip-parens'     => $strip_parens,
127        'strip-dollars'    => $strip_dollars,
128        'tabsize'          => $tabsize,
129        'dl-as-dl'         => $dl_as_dl,
130        'table-type'       => $table_type,
131        'title'            => \$content_title,
132        'abstract'         => \$abstract,
133        'marks'            => \@marks,
134        'page-name'        => $title,
135        'section'          => $man_section,
136        'docclass'         => $latex_docclass,
137        'papersize'        => $papersize,
138        'encoding'         => $encoding,
139        'no-pure-verbatim' => $no_pure_verbatim,
140        'toc'              => $toc,
141        'href-new-window'  => $href_new_window
142        );
143
144@result = $grutatxt->process($content);
145
146if ($mode eq 'HTML') {
147    if (scalar(@marks) && @marks[0]) {
148        $abstract = $marks[0] - 1;
149    }
150
151    @result = (
152        @result[0 .. $abstract],
153        '<->',
154        @result[$abstract + 1 .. scalar(@result)]
155    );
156}
157
158$title = $content_title unless $title;
159$no_body = 1 unless $mode =~ /^html$/i;
160
161unless ($no_body) {
162    print O "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n";
163    print O "   \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n";
164    print O "<html><head>\n";
165    print O "<meta http-equiv='Content-Type' content='text/html; charset=" .
166        ($encoding || 'utf-8') . "'>\n";
167    print O "<title>$title</title>\n";
168
169    printf O "<!-- converted from text by grutatxt $VERSION on %s -->\n", scalar(localtime)
170        unless $no_time_sig;
171
172    if ($css) {
173        if ($embed_css) {
174            if (open C, $css) {
175                my ($c) = join('', <C>);
176                close C;
177
178                print O "<style type='text/css'>\n";
179                print O $c . "\n";
180                print O "</style>\n";
181            }
182            else {
183                die "Can't open '$css' CSS file.";
184            }
185        }
186        else {
187            print O "<link rel=StyleSheet href='$css' type='text/css'>";
188        }
189    }
190
191    print O "</head><body>\n";
192}
193
194foreach my $l (@result) {
195    print O "$l\n";
196}
197
198print O "</body></html>\n" unless $no_body;
199
200close O;
201
202exit(0);
203
204
205sub usage
206{
207    print "grutatxt $VERSION - Grutatxt format processor\n";
208    print "Angel Ortega <angel\@triptico.com> et al.\n";
209    print "This software is released into the public domain. NO WARRANTY.\n\n";
210
211    print "Usage:\n";
212    print "\n";
213    print "grutatxt [options] < input_text_file > output_html_file\n";
214    print "\n";
215    print "Global options:\n\n";
216    print "    -i|--input=FILE            Input file (STDIN)\n";
217    print "    -o|--output=FILE           Output file (STDOUT)\n";
218    print "    -t|--title=TITLE           Document title (if unset,\n";
219    print "                               level 1 heading is used)\n";
220    print "    -sp|--strip-parens         Strip parentheses in function\n";
221    print "                               names (shown monospaced anyway)\n";
222    print "    -sd|--strip-dollars        Strip leading \$ in variable\n";
223    print "                               names (shown monospaced anyway)\n";
224    print "    -m|--mode=[HTML|troff|man|latex|rtf]\n";
225    print "                               Output mode: HTML, troff, man, LaTEX or RTF\n";
226    print "                               (default: HTML)\n";
227    print "    --no-time-sig              Avoid time signature in HTML comment\n";
228    print "    --no-pure-verbatim         Disable pure verbatim mode\n";
229    print "    --toc                      Add a table of contents after abstract\n\n";
230    print "HTML options:\n\n";
231    print "    -c|--css=CSS_URL_OR_FILE   CSS URL (or file if using --embed-css)\n";
232    print "    -e|--embed-css             Embed CSS instead of linking to it\n";
233    print "    -f|--header-offset=NUMBER  Offset to add to <h1>,\n";
234    print "                               <h2>... headers (default 0)\n";
235    print "    -b|--table-headers         Use <th> instead of <td> in\n";
236    print "                               the first row of each table\n";
237    print "    -ct|--center-tables        Centers the tables\n";
238    print "    -xt|--expand-tables        Expands the tables (width=100\%)\n";
239    print "    -nb|-no-body               Don't generate <html><body>...\n";
240    print "                               </body></html> enclosing\n";
241    print "    --encoding=ENCODING        Character encoding (default: utf-8)\n";
242    print "    -dl                        Use real <dl>, <dd> and <dt>\n";
243    print "                               instead of tables in definition lists\n";
244    print "    --href-new-window          Open links in new windows/tabs\n\n";
245    print "troff options:\n\n";
246    print "    --table-type=TYPE          Table type. Possible values:\n";
247    print "                               box, allbox, doublebox (default allbox)\n";
248    print "man options:\n\n";
249    print "    -s|--man-section=SECTION   Man page section (default: 1)\n\n";
250    print "LaTeX options:\n\n";
251    print "    --docclass=CLASS           Document class (default: report)\n";
252    print "    --papersize=SIZE           Paper size (default: a4paper)\n";
253    print "    --encoding=ENCODING        Character encoding (default: latin1)\n";
254    print "    -ts|--tabsize=NUMBER       Tab size for tab to space conversions in\n";
255    print "                               LaTeX verbatim environment (default: 8)\n";
256
257    exit(1);
258}
259