1#!/usr/bin/perl 2 3use lib '.'; 4 5# 6# Grutatxt - A text to HTML (and other things) converter 7# 8# Angel Ortega <angel@triptico.com> et al. 9# 10# This software is released into the public domain. 11# NO WARRANTY. See file LICENSE for details. 12# 13 14use Getopt::Long; 15use locale; 16use Grutatxt; 17 18$VERSION = $Grutatxt::VERSION . ':1'; 19 20# input file 21$input_file = '-'; 22 23# output file 24$output_file = '>-'; 25 26# CSS information 27$css = ''; 28$embed_css = 0; 29 30# page title 31$title = ''; 32 33# offset for the h? headers 34$header_offset = 0; 35 36# default mode 37$mode = 'HTML'; 38 39# use real dl 40$dl_as_dl = 0; 41 42# troff table type 43$table_type = 'allbox'; 44 45# abstract line number 46$abstract = 0; 47 48# marks 49@marks = (); 50 51# man page section 52$man_section = 1; 53 54# default tab size in LaTeX mode 55$tabsize = 8; 56 57# avoid time signature 58$no_time_sig = 0; 59 60# disable pure verbatim mode 61$no_pure_verbatim = 0; 62 63# enable TOC 64$toc = 0; 65 66##################################################################### 67 68# parse options 69if (!GetOptions('i|input=s' => \$input_file, 70 'o|output=s' => \$output_file, 71 'c|css=s' => \$css, 72 'e|embed-css' => \$embed_css, 73 't|title=s' => \$title, 74 'f|header-offset=s' => \$header_offset, 75 'b|table-headers' => \$table_headers, 76 'ct|center-tables' => \$center_tables, 77 'xt|expand-tables' => \$expand_tables, 78 'sp|strip-parens' => \$strip_parens, 79 'ts|tabsize=s' => \$tabsize, 80 'nb|no-body' => \$no_body, 81 'v|version' => \$version, 82 'h|help' => \$usage, 83 'm|mode=s' => \$mode, 84 's|man-section=s' => \$man_section, 85 'docclass=s' => \$latex_docclass, 86 'papersize=s' => \$papersize, 87 'encoding=s' => \$encoding, 88 'dl' => \$dl_as_dl, 89 'no-time-sig' => \$no_time_sig, 90 'no-pure-verbatim' => \$no_pure_verbatim, 91 'toc' => \$toc, 92 'href-new-window' => \$href_new_window 93 ) or $usage) { 94 usage(); 95} 96 97if ($version) { 98 print "$VERSION\n"; exit(0); 99} 100 101open I, $input_file or die "Can't open $input_file: $!"; 102open O, ">$output_file" or die "Can't create $output_file: $!"; 103 104# if utf-8 encoding is wanted, set the filehandles as utf-8 105# so that regular expressions match all characters 106# (this is crap) 107if (defined($encoding) && $encoding =~ /^utf-?8/i) { 108 binmode(I, ":utf8"); 109 binmode(O, ":utf8"); 110} 111 112$content = join('',<I>); 113close I; 114 115$content_title = ''; 116 117# make tab to space conversion only in LaTeX mode 118$tabsize = 0 unless $mode =~ /^latex$/i; 119 120$grutatxt = new Grutatxt( 121 'mode' => $mode, 122 'header-offset' => $header_offset, 123 'table-headers' => $table_headers, 124 'center-tables' => $center_tables, 125 'expand-tables' => $expand_tables, 126 'strip-parens' => $strip_parens, 127 'strip-dollars' => $strip_dollars, 128 'tabsize' => $tabsize, 129 'dl-as-dl' => $dl_as_dl, 130 'table-type' => $table_type, 131 'title' => \$content_title, 132 'abstract' => \$abstract, 133 'marks' => \@marks, 134 'page-name' => $title, 135 'section' => $man_section, 136 'docclass' => $latex_docclass, 137 'papersize' => $papersize, 138 'encoding' => $encoding, 139 'no-pure-verbatim' => $no_pure_verbatim, 140 'toc' => $toc, 141 'href-new-window' => $href_new_window 142 ); 143 144@result = $grutatxt->process($content); 145 146if ($mode eq 'HTML') { 147 if (scalar(@marks) && @marks[0]) { 148 $abstract = $marks[0] - 1; 149 } 150 151 @result = ( 152 @result[0 .. $abstract], 153 '<->', 154 @result[$abstract + 1 .. scalar(@result)] 155 ); 156} 157 158$title = $content_title unless $title; 159$no_body = 1 unless $mode =~ /^html$/i; 160 161unless ($no_body) { 162 print O "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n"; 163 print O " \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n"; 164 print O "<html><head>\n"; 165 print O "<meta http-equiv='Content-Type' content='text/html; charset=" . 166 ($encoding || 'utf-8') . "'>\n"; 167 print O "<title>$title</title>\n"; 168 169 printf O "<!-- converted from text by grutatxt $VERSION on %s -->\n", scalar(localtime) 170 unless $no_time_sig; 171 172 if ($css) { 173 if ($embed_css) { 174 if (open C, $css) { 175 my ($c) = join('', <C>); 176 close C; 177 178 print O "<style type='text/css'>\n"; 179 print O $c . "\n"; 180 print O "</style>\n"; 181 } 182 else { 183 die "Can't open '$css' CSS file."; 184 } 185 } 186 else { 187 print O "<link rel=StyleSheet href='$css' type='text/css'>"; 188 } 189 } 190 191 print O "</head><body>\n"; 192} 193 194foreach my $l (@result) { 195 print O "$l\n"; 196} 197 198print O "</body></html>\n" unless $no_body; 199 200close O; 201 202exit(0); 203 204 205sub usage 206{ 207 print "grutatxt $VERSION - Grutatxt format processor\n"; 208 print "Angel Ortega <angel\@triptico.com> et al.\n"; 209 print "This software is released into the public domain. NO WARRANTY.\n\n"; 210 211 print "Usage:\n"; 212 print "\n"; 213 print "grutatxt [options] < input_text_file > output_html_file\n"; 214 print "\n"; 215 print "Global options:\n\n"; 216 print " -i|--input=FILE Input file (STDIN)\n"; 217 print " -o|--output=FILE Output file (STDOUT)\n"; 218 print " -t|--title=TITLE Document title (if unset,\n"; 219 print " level 1 heading is used)\n"; 220 print " -sp|--strip-parens Strip parentheses in function\n"; 221 print " names (shown monospaced anyway)\n"; 222 print " -sd|--strip-dollars Strip leading \$ in variable\n"; 223 print " names (shown monospaced anyway)\n"; 224 print " -m|--mode=[HTML|troff|man|latex|rtf]\n"; 225 print " Output mode: HTML, troff, man, LaTEX or RTF\n"; 226 print " (default: HTML)\n"; 227 print " --no-time-sig Avoid time signature in HTML comment\n"; 228 print " --no-pure-verbatim Disable pure verbatim mode\n"; 229 print " --toc Add a table of contents after abstract\n\n"; 230 print "HTML options:\n\n"; 231 print " -c|--css=CSS_URL_OR_FILE CSS URL (or file if using --embed-css)\n"; 232 print " -e|--embed-css Embed CSS instead of linking to it\n"; 233 print " -f|--header-offset=NUMBER Offset to add to <h1>,\n"; 234 print " <h2>... headers (default 0)\n"; 235 print " -b|--table-headers Use <th> instead of <td> in\n"; 236 print " the first row of each table\n"; 237 print " -ct|--center-tables Centers the tables\n"; 238 print " -xt|--expand-tables Expands the tables (width=100\%)\n"; 239 print " -nb|-no-body Don't generate <html><body>...\n"; 240 print " </body></html> enclosing\n"; 241 print " --encoding=ENCODING Character encoding (default: utf-8)\n"; 242 print " -dl Use real <dl>, <dd> and <dt>\n"; 243 print " instead of tables in definition lists\n"; 244 print " --href-new-window Open links in new windows/tabs\n\n"; 245 print "troff options:\n\n"; 246 print " --table-type=TYPE Table type. Possible values:\n"; 247 print " box, allbox, doublebox (default allbox)\n"; 248 print "man options:\n\n"; 249 print " -s|--man-section=SECTION Man page section (default: 1)\n\n"; 250 print "LaTeX options:\n\n"; 251 print " --docclass=CLASS Document class (default: report)\n"; 252 print " --papersize=SIZE Paper size (default: a4paper)\n"; 253 print " --encoding=ENCODING Character encoding (default: latin1)\n"; 254 print " -ts|--tabsize=NUMBER Tab size for tab to space conversions in\n"; 255 print " LaTeX verbatim environment (default: 8)\n"; 256 257 exit(1); 258} 259