1# 2# fmt_txt.pl 3# 4# --------------------------------------------------- 5# TXT-specific driver stuff 6# 7# Copyright (C) 1994-1996, Matt Welsh 8# Copyright (C) 1996, Cees de Groot 9# Copyright (C) 1999-2001, Taketoshi Sano 10# Copyright (C) 2007-2020, Agustin Martin 11# --------------------------------------------------- 12 13package LinuxDocTools::fmt_txt; 14use strict; 15 16use File::Copy; 17use Text::EntityMap; 18use Text::Wrap; 19use LinuxDocTools::CharEnts; 20use LinuxDocTools::Lang; 21use LinuxDocTools::Vars; 22use LinuxDocTools::Utils qw(create_temp ldt_log); 23 24my $txt = {}; 25$txt->{NAME} = "txt"; 26$txt->{HELP} = ""; 27$txt->{OPTIONS} = [ 28 { option => "manpage", type => "f", short => "m" }, 29 { option => "filter", type => "f", short => "f" }, 30 { option => "blanks", type => "i", short => "b" } 31 ]; 32$txt->{manpage} = 0; 33$txt->{filter} = 0; 34$txt->{blanks} = 3; 35 36$Formats{$txt->{NAME}} = $txt; 37 38# --------------------------------------------------------------- 39sub txt_parse_data 40# --------------------------------------------------------------- 41# Wrapper to parse_data, removing some things if not in verbatim 42# --------------------------------------------------------------- 43{ 44 my $string = shift; 45 my $verbatim = shift; 46 my $char_maps = shift; 47 my $txt_escape = shift; 48 my $msgheader = "fmt_txt::txt_parse_data"; 49 50 die "$msgheader: Bad number of arguments\n" unless $txt_escape; 51 52 $string = &parse_data ($string, $char_maps, $txt_escape); 53 54 unless ( $verbatim ){ 55 $string =~ s/([^\\])\\011/$1 /g; # No tabulars in text 56 $string =~ s/\s+/ /g; # 57 } 58 59 return $string; 60} 61 62# --------------------------------------------------------------- 63$txt->{preNSGMLS} = sub 64# --------------------------------------------------------------- 65# Set correct NsgmlsOpts 66# --------------------------------------------------------------- 67{ 68 my $msgheader = "fmt_txt::preNSGMLS"; 69 70 $global->{NsgmlsOpts} .= ( $txt->{manpage} ) ? " -iman " : " -ifmttxt "; 71 72 # Adjust charset names for groff 73 $global->{charset} = "latin1" if $global->{charset} eq "latin"; 74 75 # Adjust format names for groff 76 $global->{format} = $global->{charset}; 77 $global->{format} = "groff" if $global->{format} eq "latin1"; 78 $global->{format} = "groff" if $global->{format} eq "ascii"; 79 $global->{format} = "groff" if $global->{format} eq "nippon"; 80 $global->{format} = "groff" if $global->{format} eq "euc-kr"; 81 $global->{format} = "groff" if $global->{format} eq "utf-8"; 82 $global->{format} = "man" if $txt->{manpage}; 83 84 $ENV{SGML_SEARCH_PATH} =~ s/txt/$global->{format}/; 85 86 $Formats{"groff"} = $txt; 87 $Formats{"latin1"} = $txt; 88 $Formats{"man"} = $txt; 89 90 ldt_log " ${msgheader}:\n format: $global->{format}, charset: $global->{charset}, lang: $global->{language}"; 91 92 my $nsgmlsprepipecommand = "cat $global->{file} | sed 's/_/\\_/g' "; 93 ldt_log " ${msgheader}::NsgmlsPrePipe:\n $nsgmlsprepipecommand"; 94 95 $global->{NsgmlsPrePipe} = "$nsgmlsprepipecommand"; 96}; 97 98# --------------------------------------------------------------- 99my $txt_escape = sub 100# --------------------------------------------------------------- 101# Ascii escape sub. This is called-back by `parse_data' below in 102# `txt_preASP' to properly escape `\' characters coming from the SGML 103# source. 104# --------------------------------------------------------------- 105{ 106 my ($data) = @_; 107 108 $data =~ s|"|\\\&\"|g; # Insert zero-width space in front of " 109 $data =~ s|^\.|\\&.|; # ditto in front of . at start of line 110 $data =~ s|^\'|\\&\'|; # ditto in front of ' at start of line 111 $data =~ s|\\|\\\\|g; # Escape backslashes 112 113 return ($data); 114}; 115 116# --------------------------------------------------------------- 117$txt->{preASP} = sub 118# --------------------------------------------------------------- 119# Pre-process file before sgmlsasp and create a TOC unless producing 120# a manpage. Code based in the genertoc utility and in code from FJM. 121# --------------------------------------------------------------- 122{ 123 my ($INFILE, $OUTFILE) = @_; 124 my $char_maps = ( $global->{charset} eq "latin1" ) ? '.2l1tr' : '.2tr'; 125 # Note: `sdata_dirs' list made an anonymous array to have a single argument 126 $char_maps = load_char_maps ($char_maps, [ Text::EntityMap::sdata_dirs() ]); 127 128 if ($txt->{manpage}){ 129 while (<$INFILE>){ 130 if ( s/^-// ){ 131 chomp; 132 print $OUTFILE "-" . &parse_data ($_, $char_maps, $txt_escape) . "\n"; 133 } elsif (/^A/) { 134 /^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/ 135 || die "bad attribute data: $_\n"; 136 my ($name,$type,$value) = ($1,$2,$4); 137 if ($type eq "CDATA"){ 138 # CDATA attributes get also translated 139 $value = &parse_data ($value, $char_maps, $txt_escape); 140 } 141 print $OUTFILE "A$name $type $value\n"; 142 } else { 143 print $OUTFILE $_; 144 } 145 } 146 return; 147 } 148 149 # --------------------------------- 150 # Pre-process file and extract TOC info 151 # --------------------------------- 152 153 my $inheading = 0; 154 my $headertext = ''; 155 my $sectionlevel = ''; 156 my $appendix = 0; 157 my $txtout = ""; 158 my $thetoc = ''; 159 my $chapterskip = 0; 160 my $verbatim = 0; 161 my @tocarray = (); 162 my @header = (); 163 my @prevheader = (); 164 165 while ( <$INFILE> ) { 166 if ($inheading){ 167 next if ( /^(\(|\))(BF|EM|IT|LABEL|TT)/ ); 168 next if ( /^\)TOC/ ); 169 170 if ( s/^-// ) { # Header text 171 chomp; 172 $headertext .= $_; 173 $headertext .= " "; 174 } elsif (/^\)HEADING/){ # End of header: Write full header text 175 $headertext =~ s/[ \n]*$//; 176 if ( $headertext ) { 177 $headertext = &txt_parse_data ($headertext, $verbatim, $char_maps, $txt_escape); 178 $headertext =~ s/^\\n/ /g; # No newlines in header text BOL 179 $headertext =~ s/([^\\])\\n/$1 /g; # No unescaped \n in headertext 180 } else { 181 $headertext = " "; 182 } 183 $txtout .= "-" . $headertext . "\n"; 184 push @tocarray, [$sectionlevel, $headertext]; 185 $inheading = 0; 186 $sectionlevel = ''; 187 $txtout .= $_; 188 } else { # labels and friends: copy to output 189 $txtout .= $_; 190 } 191 192 } else { # --- Not in heading 193 if ( s/^-// ) { 194 chomp; 195 $txtout .= "-" . &txt_parse_data ($_, $verbatim, $char_maps, $txt_escape) . "\n"; 196 } elsif (/^A/) { 197 /^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/ 198 || die "bad attribute data: $_\n"; 199 my ($name,$type,$value) = ($1,$2,$4); 200 if ($type eq "CDATA") { # CDATA attributes get also translated 201 $value = &txt_parse_data ($value, $verbatim, $char_maps, $txt_escape); 202 } 203 $txtout .= "A$name $type $value\n"; 204 } elsif (/^\(TOC/) { # Placeholder for TOC 205 $txtout .= "##TOC##"; 206 } else { # Nothing below changes output, just info is recorded 207 if (/^\(HEADING/) { # Go into heading processing mode. 208 $headertext = ''; 209 $inheading = 1; 210 } elsif (/^\(CHAPT/) { 211 $sectionlevel = 0; 212 $chapterskip = 1; # Start sectioning with chapter 213 if ( $appendix ) { 214 $sectionlevel = "A$sectionlevel"; 215 $appendix = 0; 216 } 217 } elsif (/^\(SECT(.*)/) { 218 $sectionlevel = $1 ? $1 : 0; 219 $sectionlevel += $chapterskip; 220 if ( $appendix ) { 221 $sectionlevel = "A$sectionlevel"; 222 $appendix = 0; 223 } 224 } elsif (/^\(APPEND(.*)/) { # appendix mode 225 $appendix = 1; 226 } elsif (/^\(VERB/) { # verbatim mode 227 $verbatim = 1; 228 } elsif (/^\)VERB/){ # end of verbatim 229 $verbatim = 0; 230 } 231 $txtout .= $_; 232 } 233 } 234 } # end of while (<$INFILE>) loop 235 236 # ---------------------------- 237 # Post-process the TOC, if any 238 # ---------------------------- 239 240 if ( @tocarray ) { 241 my $toclinelength = 72; # Length of a normal line 242 @header = @prevheader = (); 243 $thetoc = join ("\n",("(HLINE", 244 ")HLINE", 245 "(P", 246 "-" . Xlat ("Table of Contents"), 247 ")P", 248 "(VERB\n")); 249 250 foreach my $entry ( @tocarray ) { 251 my $level = $$entry[0]; # Section level 252 my $text = $$entry[1]; # section entry 253 my $number = ''; # Numbering of the item 254 my $nwhite = ''; # Will be length($number) times " " 255 256 $text =~ s/(\(|\))(BF|EM|IT|LABEL|TT)//g; 257 $text =~ s/AID * CDATA.*$//g; 258 $text =~ s/\s+/ /g; 259 260 @prevheader = @header; 261 @header = @header[0..$level]; 262 263 if ( $level =~ s/^A// ){ 264 $header[$level] = "A"; 265 } else { 266 $header[$level]++; 267 } 268 269 $number = join ('.',@header); 270 271 if ( ! $#header ) { 272 # put a . after top level sections 273 $number .= '.'; 274 # put a newline before top-level sections unless previous is one 275 $number = "\\n" . $number unless (!$#prevheader); 276 $number = "-" . $number; 277 } else { 278 # subsections get indentation matching hierarchy 279 $number = "-" . " " x $#header . $number; 280 } 281 unless ( $text =~ /^(\(|\))(NCDX|NIDX)$/ ){ 282 $nwhite = $number; 283 $nwhite =~ s/^[-\\n]*//; 284 $nwhite = "-" . " " x length($nwhite); 285 $Text::Wrap::columns = $toclinelength - length($nwhite); 286 foreach ( split("\n",wrap('','',$text)) ){ 287 $thetoc .= "$number $_\\n\n"; 288 $number = $nwhite; # Whitespaces if number is already printed 289 } 290 } 291 } 292 $thetoc .= join ("\n",(")VERB", 293 "(HLINE", 294 ")HLINE\n")); 295 } # Parsed @tocarray 296 297 if ( $thetoc ){ 298 $txtout =~ s/^\#\#TOC\#\#/$thetoc/m; 299 } else { 300 $txtout =~ s/^\#\#TOC\#\#//m; 301 } 302 print $OUTFILE $txtout; 303 return 0; 304}; 305 306# --------------------------------------------------------------- 307$txt->{postASP} = sub 308# --------------------------------------------------------------- 309# Take the sgmlsasp output, and make something useful from it. 310# --------------------------------------------------------------- 311{ 312 my $INFILE = shift; 313 my $OUTFILE; 314 my $TXTFILE; 315 my $GROFFOUT; 316 my $manfile = "$global->{filename}.man"; 317 my $txtfile = "$global->{filename}.txt"; 318 my $groffout = "$global->{tmpbase}.groffout"; 319 my $txtout = ( $global->{language} eq "en" ) ? "" : ".nr HY 0\n"; 320 my $txtout0 = "$txtout"; 321 my $msgheader = "fmt_txt::postASP"; 322 323 # Put document in a single large page. 99999 lines should be enough. 324 $txtout .= q/.\" Trim page to total height of text. 325.de sp-adj 326. br 327. pl \\n[nl]u 328.. 329. 330.\" Set single-page mode 331.de sp-set 332. pl 99999 333. em sp-adj 334.. 335.sp-set 336/; 337 338 while ( <$INFILE> ) { # Feed $txtout with roff input. 339 $txtout0 .= $_; 340 unless (/^\.DS/.../^\.DE/) { 341 s/^[ \t]{1,}(.*)/$1/g; 342 } 343 s/^\.[ \t].*/\\\&$&/g; 344 s/\\fC/\\fR/g; 345 s/^.ft C/.ft R/g; 346 $txtout .= $_; 347 } 348 349 # Remove some extra .Pp 350 $txtout =~ s/(\.Pp\n){2,}/\.Pp\n/g; # Collapse adjacent .Pp 351 $txtout =~ s/\.Pp\n(\.(IP|NH))/$1/g; # Remove .Pp before headers and exdented pars 352 353 if ( $global->{debug} ){ 354 my $GROFF0; 355 my $groff0 = "$global->{tmpbase}.groff.0"; 356 open ( $GROFF0, "> $groff0") 357 or die "${msgheader}: Could not open \"$groff0\" for write.\n"; 358 print $GROFF0 $txtout0; 359 close $GROFF0; 360 361 my $GROFF; 362 my $groff = "$global->{tmpbase}.groff"; 363 open ( $GROFF, "> $groff") 364 or die "${msgheader}: Could not open \"$groff\" for write.\n"; 365 print $GROFF $txtout; 366 close $GROFF; 367 } 368 369 if ( $txt->{manpage} ) { 370 open ( $OUTFILE, "> $manfile" ) 371 or die "${msgheader}: Could not open \"$manfile\" for writing\n"; 372 } else { 373 unless ( $global->{pass} ){ # Use old overstrike format 374 $global->{pass} = $txt->{filter} ? "-P-cbou" : "-P-c"; 375 } 376 my $groffdevice = ($global->{charset} eq "utf-8") ? "-k -Kutf8 -Tutf8 " : "-T $global->{charset}"; 377 my $groffcommand = "| $main::progs->{GROFF} $global->{pass} $groffdevice -t $main::progs->{GROFFMACRO} > $groffout"; 378 ldt_log " ${msgheader}::groffcommand:\n $groffcommand"; 379 open ( $OUTFILE, $groffcommand ) 380 or die "${msgheader}: Could not open pipe to groff:\n $groffcommand\n"; 381 print STDERR "groff_PIPE: $groffcommand\n" 382 if ( $global->{debug} && exists $ENV{'LDT_DEBUG'} ); 383 } 384 385 print $OUTFILE $txtout; 386 close $OUTFILE; 387 388 die " ${msgheader}: Empty output file, error when calling groff. Aborting...\n" 389 if ( ! $txt->{manpage} && -z $groffout ); 390 391 # Unless making a manpage, a bit of work is left. 392 393 unless ( $txt->{manpage} ) { 394 open ( $TXTFILE, "> $txtfile") 395 or die "${msgheader}: Could not open \"$txtfile\" for writing\n"; 396 397 open ( $GROFFOUT, "< $groffout") 398 or die "${msgheader}: Could not open \"$groffout\" for reading\n"; 399 400 if ( $txt->{blanks} ) { # No more than $txt->{blanks} continuous blank lines 401 my $count = 0; 402 while ( <$GROFFOUT> ){ 403 $count = ( /^$/ ) ? $count + 1 : 0; 404 print $TXTFILE $_ if ( $count <= $txt->{blanks} ); 405 } 406 } else { 407 copy ($GROFFOUT, $TXTFILE); 408 } 409 410 close $TXTFILE; 411 close $GROFFOUT; 412 } 413 return 0; 414}; 415 416# Ensure we evaluate to true. 4171; 418 419__END__ 420 421# Local Variables: 422# mode: perl 423# perl-indent-level: 2 424# End: 425