1#
2#  fmt_txt.pl
3#
4# ---------------------------------------------------
5#  TXT-specific driver stuff
6#
7#  Copyright (C) 1994-1996, Matt Welsh
8#  Copyright (C) 1996, Cees de Groot
9#  Copyright (C) 1999-2001, Taketoshi Sano
10#  Copyright (C) 2007-2020, Agustin Martin
11# ---------------------------------------------------
12
13package LinuxDocTools::fmt_txt;
14use strict;
15
16use File::Copy;
17use Text::EntityMap;
18use Text::Wrap;
19use LinuxDocTools::CharEnts;
20use LinuxDocTools::Lang;
21use LinuxDocTools::Vars;
22use LinuxDocTools::Utils qw(create_temp ldt_log);
23
24my $txt = {};
25$txt->{NAME}    = "txt";
26$txt->{HELP}    = "";
27$txt->{OPTIONS} = [
28		   { option => "manpage", type => "f", short => "m" },
29		   { option => "filter",  type => "f", short => "f" },
30		   { option => "blanks",  type => "i", short => "b" }
31		   ];
32$txt->{manpage} = 0;
33$txt->{filter}  = 0;
34$txt->{blanks}  = 3;
35
36$Formats{$txt->{NAME}} = $txt;
37
38# ---------------------------------------------------------------
39sub txt_parse_data
40# ---------------------------------------------------------------
41# Wrapper to parse_data, removing some things if not in verbatim
42# ---------------------------------------------------------------
43{
44  my $string     = shift;
45  my $verbatim   = shift;
46  my $char_maps  = shift;
47  my $txt_escape = shift;
48  my $msgheader  = "fmt_txt::txt_parse_data";
49
50  die "$msgheader: Bad number of arguments\n" unless $txt_escape;
51
52  $string =  &parse_data ($string, $char_maps, $txt_escape);
53
54  unless ( $verbatim ){
55    $string =~ s/([^\\])\\011/$1 /g;      # No tabulars in text
56    $string =~ s/\s+/ /g;                 #
57  }
58
59  return $string;
60}
61
62# ---------------------------------------------------------------
63$txt->{preNSGMLS} = sub
64# ---------------------------------------------------------------
65#  Set correct NsgmlsOpts
66# ---------------------------------------------------------------
67{
68  my $msgheader = "fmt_txt::preNSGMLS";
69
70  $global->{NsgmlsOpts} .= ( $txt->{manpage} ) ? " -iman " : " -ifmttxt ";
71
72  # Adjust charset names for groff
73  $global->{charset} = "latin1" if $global->{charset} eq "latin";
74
75  # Adjust format names for groff
76  $global->{format}  = $global->{charset};
77  $global->{format}  = "groff"  if $global->{format} eq "latin1";
78  $global->{format}  = "groff"  if $global->{format} eq "ascii";
79  $global->{format}  = "groff"  if $global->{format} eq "nippon";
80  $global->{format}  = "groff"  if $global->{format} eq "euc-kr";
81  $global->{format}  = "groff"  if $global->{format} eq "utf-8";
82  $global->{format}  = "man"    if $txt->{manpage};
83
84  $ENV{SGML_SEARCH_PATH} =~ s/txt/$global->{format}/;
85
86  $Formats{"groff"}  = $txt;
87  $Formats{"latin1"} = $txt;
88  $Formats{"man"}    = $txt;
89
90  ldt_log "  ${msgheader}:\n    format: $global->{format}, charset: $global->{charset}, lang: $global->{language}";
91
92  my $nsgmlsprepipecommand = "cat $global->{file} | sed 's/_/\\_/g' ";
93  ldt_log "  ${msgheader}::NsgmlsPrePipe:\n    $nsgmlsprepipecommand";
94
95  $global->{NsgmlsPrePipe} = "$nsgmlsprepipecommand";
96};
97
98# ---------------------------------------------------------------
99my $txt_escape = sub
100# ---------------------------------------------------------------
101# Ascii escape sub. This is called-back by `parse_data' below in
102# `txt_preASP' to properly escape `\' characters coming from the SGML
103# source.
104# ---------------------------------------------------------------
105{
106  my ($data) = @_;
107
108  $data =~ s|"|\\\&\"|g;   # Insert zero-width space in front of "
109  $data =~ s|^\.|\\&.|;	   # ditto in front of . at start of line
110  $data =~ s|^\'|\\&\'|;   # ditto in front of ' at start of line
111  $data =~ s|\\|\\\\|g;	   # Escape backslashes
112
113  return ($data);
114};
115
116# ---------------------------------------------------------------
117$txt->{preASP} = sub
118# ---------------------------------------------------------------
119# Pre-process file before sgmlsasp and create a TOC unless producing
120# a manpage. Code based in the genertoc utility and in code from FJM.
121# ---------------------------------------------------------------
122{
123  my ($INFILE, $OUTFILE) = @_;
124  my $char_maps = ( $global->{charset} eq "latin1" ) ? '.2l1tr' : '.2tr';
125  # Note: `sdata_dirs' list made an anonymous array to have a single argument
126  $char_maps = load_char_maps ($char_maps, [ Text::EntityMap::sdata_dirs() ]);
127
128  if ($txt->{manpage}){
129    while (<$INFILE>){
130      if ( s/^-// ){
131	chomp;
132	print $OUTFILE "-" . &parse_data ($_, $char_maps, $txt_escape) . "\n";
133      } elsif (/^A/) {
134	/^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
135	  || die "bad attribute data: $_\n";
136	my ($name,$type,$value) = ($1,$2,$4);
137	if ($type eq "CDATA"){
138	  # CDATA attributes get also translated
139	  $value = &parse_data ($value, $char_maps, $txt_escape);
140	}
141	print $OUTFILE "A$name $type $value\n";
142      } else {
143	print $OUTFILE $_;
144      }
145    }
146    return;
147  }
148
149  # ---------------------------------
150  # Pre-process file and extract TOC info
151  # ---------------------------------
152
153  my $inheading    = 0;
154  my $headertext   = '';
155  my $sectionlevel = '';
156  my $appendix     = 0;
157  my $txtout       = "";
158  my $thetoc       = '';
159  my $chapterskip  = 0;
160  my $verbatim     = 0;
161  my @tocarray     = ();
162  my @header       = ();
163  my @prevheader   = ();
164
165  while ( <$INFILE> ) {
166    if ($inheading){
167      next if ( /^(\(|\))(BF|EM|IT|LABEL|TT)/ );
168      next if ( /^\)TOC/ );
169
170      if ( s/^-// ) {                # Header text
171	chomp;
172	$headertext .= $_;
173	$headertext .= " ";
174      } elsif (/^\)HEADING/){        # End of header: Write full header text
175	$headertext =~ s/[ \n]*$//;
176	if ( $headertext ) {
177	  $headertext = &txt_parse_data ($headertext, $verbatim, $char_maps, $txt_escape);
178	  $headertext =~ s/^\\n/ /g;      # No newlines in header text BOL
179	  $headertext =~ s/([^\\])\\n/$1 /g;  # No unescaped \n in headertext
180	} else {
181	  $headertext = " ";
182	}
183	$txtout .= "-" . $headertext . "\n";
184	push @tocarray, [$sectionlevel, $headertext];
185	$inheading    = 0;
186	$sectionlevel = '';
187	$txtout .= $_;
188      } else {                       # labels and friends: copy to output
189	$txtout .= $_;
190      }
191
192    } else { # --- Not in heading
193      if ( s/^-// ) {
194	chomp;
195	$txtout .=  "-" . &txt_parse_data ($_, $verbatim, $char_maps, $txt_escape) . "\n";
196      } elsif (/^A/) {
197	/^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
198	  || die "bad attribute data: $_\n";
199	my ($name,$type,$value) = ($1,$2,$4);
200	if ($type eq "CDATA") {      # CDATA attributes get also translated
201	  $value = &txt_parse_data ($value, $verbatim, $char_maps, $txt_escape);
202	}
203	$txtout .= "A$name $type $value\n";
204      } elsif (/^\(TOC/) {           # Placeholder for TOC
205	$txtout .= "##TOC##";
206      } else {       # Nothing below changes output, just info is recorded
207	if (/^\(HEADING/) {          #  Go into heading processing mode.
208	  $headertext   = '';
209	  $inheading    = 1;
210	} elsif (/^\(CHAPT/) {
211	  $sectionlevel = 0;
212	  $chapterskip  = 1;         # Start sectioning with chapter
213	  if ( $appendix ) {
214	    $sectionlevel = "A$sectionlevel";
215	    $appendix     = 0;
216	  }
217	} elsif (/^\(SECT(.*)/) {
218	  $sectionlevel = $1 ? $1 : 0;
219	  $sectionlevel += $chapterskip;
220	  if ( $appendix ) {
221	    $sectionlevel = "A$sectionlevel";
222	    $appendix     = 0;
223	  }
224	} elsif (/^\(APPEND(.*)/) {  # appendix mode
225	  $appendix = 1;
226	} elsif (/^\(VERB/) {        # verbatim mode
227	  $verbatim = 1;
228	} elsif (/^\)VERB/){         # end of verbatim
229	  $verbatim = 0;
230	}
231	$txtout .= $_;
232      }
233    }
234  } # end of  while (<$INFILE>) loop
235
236  # ----------------------------
237  # Post-process the TOC, if any
238  # ----------------------------
239
240  if ( @tocarray ) {
241    my $toclinelength = 72;          # Length of a normal line
242    @header = @prevheader = ();
243    $thetoc = join ("\n",("(HLINE",
244			  ")HLINE",
245			  "(P",
246			  "-" . Xlat ("Table of Contents"),
247			  ")P",
248			  "(VERB\n"));
249
250    foreach my $entry ( @tocarray ) {
251      my $level  = $$entry[0];       # Section level
252      my $text   = $$entry[1];       # section entry
253      my $number = '';               # Numbering of the item
254      my $nwhite = '';               # Will be length($number) times " "
255
256      $text =~ s/(\(|\))(BF|EM|IT|LABEL|TT)//g;
257      $text =~ s/AID * CDATA.*$//g;
258      $text =~ s/\s+/ /g;
259
260      @prevheader = @header;
261      @header     = @header[0..$level];
262
263      if ( $level =~ s/^A// ){
264	$header[$level] = "A";
265      } else {
266	$header[$level]++;
267      }
268
269      $number = join ('.',@header);
270
271      if ( ! $#header ) {
272	# put a . after top level sections
273	$number .= '.';
274	# put a newline before top-level sections unless previous is one
275	$number = "\\n" . $number unless (!$#prevheader);
276	$number = "-" . $number;
277      } else {
278	# subsections get indentation matching hierarchy
279	$number = "-" . "   " x $#header . $number;
280      }
281      unless ( $text =~ /^(\(|\))(NCDX|NIDX)$/ ){
282	$nwhite = $number;
283	$nwhite =~ s/^[-\\n]*//;
284	$nwhite = "-" . " " x length($nwhite);
285	$Text::Wrap::columns = $toclinelength - length($nwhite);
286	foreach ( split("\n",wrap('','',$text)) ){
287	  $thetoc .= "$number $_\\n\n";
288	  $number = $nwhite;     # Whitespaces if number is already printed
289	}
290      }
291    }
292    $thetoc .= join ("\n",(")VERB",
293			   "(HLINE",
294			   ")HLINE\n"));
295  } # Parsed @tocarray
296
297  if ( $thetoc ){
298    $txtout =~ s/^\#\#TOC\#\#/$thetoc/m;
299  } else {
300    $txtout =~ s/^\#\#TOC\#\#//m;
301  }
302  print $OUTFILE $txtout;
303  return 0;
304};
305
306# ---------------------------------------------------------------
307$txt->{postASP} = sub
308# ---------------------------------------------------------------
309#  Take the sgmlsasp output, and make something useful from it.
310# ---------------------------------------------------------------
311{
312  my $INFILE    = shift;
313  my $OUTFILE;
314  my $TXTFILE;
315  my $GROFFOUT;
316  my $manfile   = "$global->{filename}.man";
317  my $txtfile   = "$global->{filename}.txt";
318  my $groffout  = "$global->{tmpbase}.groffout";
319  my $txtout    = ( $global->{language} eq "en" ) ? "" : ".nr HY 0\n";
320  my $txtout0   = "$txtout";
321  my $msgheader = "fmt_txt::postASP";
322
323  # Put document in a single large page. 99999 lines should be enough.
324  $txtout .= q/.\" Trim page to total height of text.
325.de sp-adj
326.  br
327.  pl \\n[nl]u
328..
329.
330.\" Set single-page mode
331.de sp-set
332.  pl 99999
333.  em sp-adj
334..
335.sp-set
336/;
337
338  while ( <$INFILE> ) {    #  Feed $txtout with roff input.
339    $txtout0 .= $_;
340    unless (/^\.DS/.../^\.DE/)  {
341      s/^[ \t]{1,}(.*)/$1/g;
342    }
343    s/^\.[ \t].*/\\\&$&/g;
344    s/\\fC/\\fR/g;
345    s/^.ft C/.ft R/g;
346    $txtout .= $_;
347  }
348
349  # Remove some extra .Pp
350  $txtout =~ s/(\.Pp\n){2,}/\.Pp\n/g;  # Collapse adjacent .Pp
351  $txtout =~ s/\.Pp\n(\.(IP|NH))/$1/g; # Remove .Pp before headers and exdented pars
352
353  if ( $global->{debug} ){
354    my $GROFF0;
355    my $groff0 = "$global->{tmpbase}.groff.0";
356    open ( $GROFF0, "> $groff0")
357      or die "${msgheader}: Could not open \"$groff0\" for write.\n";
358    print $GROFF0 $txtout0;
359    close $GROFF0;
360
361    my $GROFF;
362    my $groff  = "$global->{tmpbase}.groff";
363    open ( $GROFF, "> $groff")
364      or die "${msgheader}: Could not open \"$groff\" for write.\n";
365    print $GROFF $txtout;
366    close $GROFF;
367  }
368
369  if ( $txt->{manpage} ) {
370    open ( $OUTFILE, "> $manfile" )
371      or die "${msgheader}: Could not open \"$manfile\" for writing\n";
372  } else {
373    unless ( $global->{pass} ){  # Use old overstrike format
374      $global->{pass} = $txt->{filter} ? "-P-cbou" : "-P-c";
375    }
376    my $groffdevice  = ($global->{charset} eq "utf-8") ? "-k -Kutf8 -Tutf8 " : "-T $global->{charset}";
377    my $groffcommand = "| $main::progs->{GROFF} $global->{pass} $groffdevice -t $main::progs->{GROFFMACRO} > $groffout";
378    ldt_log "  ${msgheader}::groffcommand:\n    $groffcommand";
379    open ( $OUTFILE, $groffcommand )
380      or die "${msgheader}: Could not open pipe to groff:\n  $groffcommand\n";
381    print STDERR "groff_PIPE: $groffcommand\n"
382      if ( $global->{debug} &&  exists $ENV{'LDT_DEBUG'} );
383  }
384
385  print $OUTFILE $txtout;
386  close $OUTFILE;
387
388  die " ${msgheader}: Empty output file, error when calling groff. Aborting...\n"
389    if ( ! $txt->{manpage} && -z $groffout );
390
391  #  Unless making a manpage, a bit of work is left.
392
393  unless ( $txt->{manpage} ) {
394    open ( $TXTFILE, "> $txtfile")
395      or die "${msgheader}: Could not open \"$txtfile\" for writing\n";
396
397    open ( $GROFFOUT, "< $groffout")
398      or die "${msgheader}: Could not open \"$groffout\" for reading\n";
399
400    if ( $txt->{blanks} ) { # No more than $txt->{blanks} continuous blank lines
401      my $count = 0;
402      while ( <$GROFFOUT> ){
403	$count = ( /^$/ ) ? $count + 1  : 0;
404	print $TXTFILE $_ if ( $count <= $txt->{blanks} );
405      }
406    } else {
407      copy ($GROFFOUT, $TXTFILE);
408    }
409
410    close $TXTFILE;
411    close $GROFFOUT;
412  }
413  return 0;
414};
415
416# Ensure we evaluate to true.
4171;
418
419__END__
420
421# Local Variables:
422#  mode: perl
423#  perl-indent-level: 2
424# End:
425