1# Text.pm: output tree as simple text.
2#
3# Copyright 2010-2020 Free Software Foundation, Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3 of the License,
8# or (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program.  If not, see <http://www.gnu.org/licenses/>.
17#
18# Original author: Patrice Dumas <pertusus@free.fr>
19
20package Texinfo::Convert::Text;
21
22use 5.00405;
23use strict;
24
25use Texinfo::Convert::Converter;
26# accent commands list.
27use Texinfo::Common;
28use Texinfo::Convert::Unicode;
29# for debugging
30use Texinfo::Convert::Texinfo;
31use Data::Dumper;
32use Carp qw(cluck carp);
33
34use File::Basename;
35
36require Exporter;
37use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
38@ISA = qw(Exporter Texinfo::Convert::Converter);
39
40%EXPORT_TAGS = ( 'all' => [ qw(
41  convert
42  ascii_accent
43  text_accents
44) ] );
45
46@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
47
48@EXPORT = qw(
49);
50
51$VERSION = '6.8';
52
53# this is in fact not needed for 'footnote', 'shortcaption', 'caption'
54# when they have no brace_command_arg, see below.
55my %ignored_brace_commands;
56foreach my $ignored_brace_command (#'xref','ref','pxref','inforef',
57   'anchor',
58   'footnote', 'shortcaption', 'caption', 'hyphenation', 'errormsg') {
59  $ignored_brace_commands{$ignored_brace_command} = 1;
60}
61
62my %ignored_block_commands;
63foreach my $ignored_command ('titlepage', 'copying', 'documentdescription',
64  'html', 'tex', 'xml', 'docbook', 'ignore', 'macro', 'rmacro') {
65  $ignored_block_commands{$ignored_command} = 1;
66}
67
68# used by Texinfo::Convert::NodeNormalization
69our %text_brace_no_arg_commands = (
70               'TeX'                => 'TeX',
71               'LaTeX'              => 'LaTeX',
72               'bullet'             => '*',
73               'copyright'          => '(C)',
74               'registeredsymbol'   => '(R)',
75               'dots'         => '...',
76               'enddots'      => '...',
77               'equiv'        => '==',
78               'error'        => 'error-->',
79               'expansion'    => '==>',
80               'arrow'        => '->',
81               'minus'        => '-',
82               'point'        => '-!-',
83               'print'        => '-|',
84               'result'       => '=>',
85               'today'        => '',
86               'aa'           => 'aa',
87               'AA'           => 'AA',
88               'ae'           => 'ae',
89               'oe'           => 'oe',
90               'AE'           => 'AE',
91               'OE'           => 'OE',
92               'o'            => '/o',
93               'O'            => '/O',
94               'ss'           => 'ss',
95               'l'            => '/l',
96               'L'            => '/L',
97               'DH'           => 'D',
98               'dh'           => 'd',
99               'TH'           => 'TH', # http://www.evertype.com/standards/wynnyogh/thorn.html
100
101               'th'           => 'th',
102               'exclamdown'   => '!',
103               'questiondown' => '?',
104               'pounds'       => '#',
105               'ordf'         => 'a',
106               'ordm'         => 'o',
107               'comma'        => ',',
108               'atchar'       => '@',
109               'ampchar'      => '&',
110               'lbracechar'   => '{',
111               'rbracechar'   => '}',
112               'backslashchar' => '\\',
113               'hashchar'      => '#',
114               'euro'         => 'Euro',
115               'geq'          => '>=',
116               'leq'          => '<=',
117               'tie'          => ' ',
118               'textdegree'      => 'o',
119               'quotedblleft'    => '``',
120               'quotedblright'   => "''",
121               'quoteleft'       => '`',
122               'quoteright'      => "'",
123               'quotedblbase'    => ',,',
124               'quotesinglbase'  => ',',
125               'guillemetleft'   => '<<',
126               'guillemetright'  => '>>',
127               'guillemotleft'   => '<<',
128               'guillemotright'  => '>>',
129               'guilsinglleft'   => '<',
130               'guilsinglright'  => '>',
131               'click'           => '', # specially treated
132);
133
134my %sort_brace_no_arg_commands = (
135  'copyright' => 'C',
136  'registeredsymbol' => 'R',
137  'today' => 't',
138);
139
140foreach my $accent_letter ('o','O','l','L') {
141  $sort_brace_no_arg_commands{$accent_letter} = $accent_letter;
142}
143
144my %accent_commands = %Texinfo::Common::accent_commands;
145my %no_brace_commands = %Texinfo::Common::no_brace_commands;
146
147our %formatting_misc_commands;
148foreach my $command ('verbatiminclude', 'sp', 'center', 'exdent',
149                     'item', 'itemx', 'tab', 'headitem',
150    'node', keys(%Texinfo::Common::sectioning_commands)) {
151  $formatting_misc_commands{$command} = 1;
152}
153
154my %ignored_types;
155foreach my $type ('empty_line_after_command', 'preamble',
156            'empty_spaces_after_command', 'spaces_at_end',
157            'empty_spaces_before_argument', 'empty_spaces_before_paragraph',
158            'empty_spaces_after_close_brace') {
159  $ignored_types{$type} = 1;
160}
161
162
163sub ascii_accent($$)
164{
165  my $text = shift;
166  my $command = shift;
167  my $accent = $command->{'cmdname'};
168  return $text if ($accent eq 'dotless');
169  return $text . "''" if ($accent eq 'H');
170  return $text . '.' if ($accent eq 'dotaccent');
171  return $text . '*' if ($accent eq 'ringaccent');
172  return $text . '[' if ($accent eq 'tieaccent');
173  return $text . '(' if ($accent eq 'u');
174  return $text . '_' if ($accent eq 'ubaraccent');
175  return '.' . $text  if ($accent eq 'udotaccent');
176  return $text . '<' if ($accent eq 'v');
177  return $text . ';' if ($accent eq 'ogonek');
178  return $text . $accent;
179}
180
181# format a stack of accents as ascii
182sub ascii_accents($$;$)
183{
184  my $result = shift;
185  my $stack = shift;
186  my $set_case = shift;
187
188  if ($set_case and $result =~ /^\w$/) {
189    if ($set_case > 0) {
190      $result = uc($result);
191    } else {
192      $result = lc($result);
193    }
194  }
195  foreach my $accent_command (reverse(@$stack)) {
196    $result = ascii_accent($result, $accent_command);
197  }
198  return $result;
199}
200
201# Same as ascii_accent, but with a converter as first argument to be consistent
202# with calling conventions of fallback accent formatting functions given
203# to convert_accents/encoded_accents
204sub ascii_accent_fallback($$$)
205{
206  my $converter = shift;
207  my $text = shift;
208  my $command = shift;
209
210  return ascii_accent($text, $command);
211}
212
213# format an accent command and nested accents within as Text.
214sub text_accents($;$$)
215{
216  my $accent = shift;
217  my $encoding = shift;
218  my $set_case = shift;
219
220  my ($contents, $stack)
221      = Texinfo::Common::find_innermost_accent_contents($accent);
222
223  my $options = {};
224  $options->{'enabled_encoding'} = $encoding if (defined($encoding));
225  $options->{'sc'} = $set_case if (defined($set_case));
226  my $text = convert({'contents' => $contents}, $options);
227
228  my $result = Texinfo::Convert::Unicode::encoded_accents(undef, $text,
229                     $stack, $encoding, \&ascii_accent_fallback, $set_case);
230  if (defined($result)) {
231    return $result;
232  } else {
233    return ascii_accents($text, $stack, $set_case);
234  }
235}
236
237sub brace_no_arg_command($;$)
238{
239  my $root = shift;
240  my $options = shift;
241  my $encoding;
242  $encoding = $options->{'enabled_encoding'}
243    if ($options and $options->{'enabled_encoding'});
244
245  my $command = $root->{'cmdname'};
246  $command = $root->{'extra'}->{'clickstyle'}
247     if ($root->{'extra'}
248      and defined($root->{'extra'}->{'clickstyle'})
249      and defined($text_brace_no_arg_commands{$root->{'extra'}->{'clickstyle'}}));
250  my $result;
251  if (!$options->{'no_extra_unicode'}
252      or !$Texinfo::Convert::Unicode::extra_unicode_map{$command}) {
253    $result = Texinfo::Convert::Unicode::unicode_for_brace_no_arg_command(
254                       $command, $encoding);
255  }
256  if (!defined($result) and $options and $options->{'converter'}) {
257    my $tree = Texinfo::Common::translated_command_tree(
258                  $options->{'converter'}, $command);
259    if ($tree) {
260      $result = _convert($tree, $options);
261    }
262  }
263  if (!defined($result)) {
264    if ($options and $options->{'sort_string'}
265        and $sort_brace_no_arg_commands{$command}) {
266      $result = $sort_brace_no_arg_commands{$command};
267    } else {
268      $result = $text_brace_no_arg_commands{$command};
269    }
270  }
271  if ($options and $Texinfo::Common::letter_no_arg_commands{$command}) {
272    if ($options->{'sc'}) {
273      $result = uc($result);
274    } elsif ($options->{'lc'}) {
275      $result = lc($result);
276    }
277  }
278  return $result;
279}
280
281my %underline_symbol = (
282  0 => '*',
283  1 => '*',
284  2 => '=',
285  3 => '-',
286  4 => '.'
287);
288
289# Return the text of an underlined heading, possibly indented.
290sub heading($$$;$$)
291{
292  my $current = shift;
293  my $text = shift;
294  my $converter = shift;
295  my $numbered = shift;
296  my $indent_length = shift;
297
298  # REMARK to get the numberig right in case of an indented text, the
299  # indentation should be given here.  But this should never happen as
300  # the only @-commands allowed in indented context are not number.
301  $text = Texinfo::Common::numbered_heading($converter, $current, $text,
302                                            $numbered);
303  return '' if ($text !~ /\S/);
304  my $result = $text ."\n";
305  if (defined($indent_length)) {
306    if ($indent_length < 0) {
307      $indent_length = 0;
308    }
309    $result .= (' ' x $indent_length);
310  } else {
311    $indent_length = 0;
312  }
313  if (!defined $current->{'level'}) {
314    $current->{'level'} = Texinfo::Structuring::section_level($current);
315  }
316  $result .=($underline_symbol{$current->{'level'}}
317     x (Texinfo::Convert::Unicode::string_width($text) - $indent_length))."\n";
318  return $result;
319}
320
321sub _code_options($)
322{
323  my $options = shift;
324  my $code_options;
325  if (defined($options)) {
326    $code_options = { %$options };
327  } else {
328    $code_options = {};
329  }
330  $code_options->{'code'} = 1;
331  return $code_options;
332}
333
334sub convert($;$)
335{
336  my $root = shift;
337  # means it was called object oriented
338  if (ref($root) ne 'HASH') {
339    if (ref($root) eq 'ARRAY') {
340      carp ("convert argument $root not blessed reference or HASH");
341      return undef;
342    }
343    $root = shift;
344  }
345  my $options = shift;
346  #print STDERR "CONVERT\n";
347  return _convert($root, $options);
348}
349
350sub _convert($;$);
351
352sub _convert($;$)
353{
354  my $root = shift;
355  my $options = shift;
356
357  return '' if (!($root->{'type'} and $root->{'type'} eq 'def_line')
358     and (($root->{'type'} and $ignored_types{$root->{'type'}})
359          or ($root->{'cmdname'}
360             and ($ignored_brace_commands{$root->{'cmdname'}}
361                 or ($ignored_block_commands{$root->{'cmdname'}}
362                     and !(defined($options->{'expanded_formats_hash'})
363                           and $options->{'expanded_formats_hash'}->{$root->{'cmdname'}}))
364                 or ($Texinfo::Common::inline_commands{$root->{'cmdname'}}
365                     and $root->{'cmdname'} ne 'inlinefmtifelse'
366                     and (($Texinfo::Common::inline_format_commands{$root->{'cmdname'}}
367                          and (!$root->{'extra'}->{'format'}
368                               or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}}))
369                         or (!$Texinfo::Common::inline_format_commands{$root->{'cmdname'}}
370                             and !defined($root->{'extra'}->{'expand_index'}))))
371             # here ignore most of the misc commands
372                 or ($root->{'args'} and $root->{'args'}->[0]
373                     and $root->{'args'}->[0]->{'type'}
374                     and ($root->{'args'}->[0]->{'type'} eq 'line_arg'
375                         or $root->{'args'}->[0]->{'type'} eq 'misc_arg')
376                     and !$formatting_misc_commands{$root->{'cmdname'}})))));
377  my $result = '';
378  if (defined($root->{'text'})) {
379    if ($root->{'type'} and $root->{'type'} eq 'untranslated'
380        and $options and $options->{'converter'}) {
381      my $save_lang = $options->{'converter'}->get_conf('documentlanguage');
382      $options->{'converter'}->{'documentlanguage'}
383        = $root->{'extra'}->{'documentlanguage'};
384      my $tree = Texinfo::Report::gdt($options->{'converter'},
385                                      $root->{'text'});
386      $result = _convert($tree, $options);
387      $options->{'converter'}->{'documentlanguage'} = $save_lang;
388    } else {
389      $result = $root->{'text'};
390      if ((! defined($root->{'type'})
391           or $root->{'type'} ne 'raw')
392           and !$options->{'raw'}) {
393        if ($options->{'sc'}) {
394          $result = uc($result);
395        }
396        if (!$options->{'code'}) {
397          $result =~ s/``/"/g;
398          $result =~ s/\'\'/"/g;
399          $result =~ s/---/\x{1F}/g;
400          $result =~ s/--/-/g;
401          $result =~ s/\x{1F}/--/g;
402        }
403      }
404    }
405  }
406  if ($root->{'cmdname'}) {
407    my $command = $root->{'cmdname'};
408    if (defined($no_brace_commands{$root->{'cmdname'}})) {
409      return $no_brace_commands{$root->{'cmdname'}};
410    } elsif ($root->{'cmdname'} eq 'today') {
411      if ($options->{'sort_string'}
412          and $sort_brace_no_arg_commands{$root->{'cmdname'}}) {
413        return $sort_brace_no_arg_commands{$root->{'cmdname'}};
414      } elsif ($options->{'converter'}) {
415        return _convert(Texinfo::Common::expand_today($options->{'converter'}),
416                       $options);
417      } elsif ($options->{'TEST'}) {
418        return 'a sunny day';
419      } else {
420        my($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)
421          = localtime(time);
422        $year += ($year < 70) ? 2000 : 1900;
423        return "$Texinfo::Common::MONTH_NAMES[$mon] $mday, $year";
424      }
425    } elsif (defined($text_brace_no_arg_commands{$root->{'cmdname'}})) {
426      return brace_no_arg_command($root, $options);
427    # commands with braces
428    } elsif ($accent_commands{$root->{'cmdname'}}) {
429      my $result = text_accents ($root, $options->{'enabled_encoding'},
430                                        $options->{'sc'});
431      return $result;
432    } elsif ($root->{'cmdname'} eq 'image') {
433      return _convert($root->{'args'}->[0], _code_options($options));
434    } elsif ($root->{'cmdname'} eq 'email') {
435      my $mail = _convert($root->{'args'}->[0], _code_options($options));
436      my $text;
437      $text = _convert($root->{'args'}->[1], $options)
438         if (defined($root->{'args'}->[1]));
439      return $text if (defined($text) and ($text ne ''));
440      return $mail;
441    } elsif ($root->{'cmdname'} eq 'uref' or $root->{'cmdname'} eq 'url') {
442      my $replacement;
443      $replacement = _convert($root->{'args'}->[2], $options)
444        if (defined($root->{'args'}->[2]));
445      return $replacement if (defined($replacement) and $replacement ne '');
446      my $text;
447      $text = _convert($root->{'args'}->[1], $options)
448        if (defined($root->{'args'}->[1]));
449      my $url = _convert($root->{'args'}->[0], _code_options($options));
450      if (defined($text) and $text ne '') {
451        return "$url ($text)";
452      } else {
453        return $url;
454      }
455    } elsif ($Texinfo::Common::explained_commands{$root->{'cmdname'}}
456             and $root->{'args'} and $root->{'args'}->[1]) {
457      my $explanation = _convert($root->{'args'}->[1], $options);
458      if ($explanation ne '') {
459        return _convert($root->{'args'}->[0], $options) ." ($explanation)";
460      } else {
461        return _convert($root->{'args'}->[0], $options);
462      }
463    } elsif ($Texinfo::Common::inline_commands{$root->{'cmdname'}}) {
464      $options->{'raw'} = 1 if ($root->{'cmdname'} eq 'inlineraw');
465      my $arg_index = 1;
466      if ($root->{'cmdname'} eq 'inlinefmtifelse'
467          and (!$root->{'extra'}->{'format'}
468               or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}})) {
469        $arg_index = 2;
470      }
471      if (scalar(@{$root->{'args'}}) > $arg_index) {
472        return _convert($root->{'args'}->[$arg_index], $options);
473      } else {
474        return '';
475      }
476    } elsif ($root->{'args'} and $root->{'args'}->[0]
477           and (($root->{'args'}->[0]->{'type'}
478                and $root->{'args'}->[0]->{'type'} eq 'brace_command_arg')
479                or $Texinfo::Common::math_commands{$root->{'cmdname'}})) {
480      my $result;
481      if ($root->{'cmdname'} eq 'sc') {
482        $options = {%$options, 'sc' => 1};
483      } elsif ($Texinfo::Common::code_style_commands{$root->{'cmdname'}}
484               or $Texinfo::Common::math_commands{$root->{'cmdname'}}) {
485        $options = _code_options($options);
486      }
487      $result = _convert($root->{'args'}->[0], $options);
488      return $result;
489    # block commands
490    } elsif ($root->{'cmdname'} eq 'quotation'
491             or $root->{'cmdname'} eq 'smallquotation'
492             or $root->{'cmdname'} eq 'float') {
493      if ($root->{'args'}) {
494        foreach my $arg (@{$root->{'args'}}) {
495          my $converted_arg = _convert($arg, $options);
496          if ($converted_arg =~ /\S/) {
497            $result .= $converted_arg.", ";
498          }
499        }
500        $result =~ s/, $//;
501        chomp ($result);
502        $result .= "\n" if ($result =~ /\S/);
503      }
504    } elsif ($options->{'expanded_formats_hash'}->{$root->{'cmdname'}}) {
505      $options->{'raw'} = 1;
506    } elsif ($formatting_misc_commands{$root->{'cmdname'}} and $root->{'args'}) {
507      if ($root->{'cmdname'} eq 'sp') {
508        if ($root->{'extra'} and $root->{'extra'}->{'misc_args'}
509            and $root->{'extra'}->{'misc_args'}->[0]) {
510          # this useless copy avoids perl changing the type to integer!
511          my $sp_nr = $root->{'extra'}->{'misc_args'}->[0];
512          $result = "\n" x $sp_nr;
513        }
514      } elsif ($root->{'cmdname'} eq 'verbatiminclude') {
515        my $verbatim_include_verbatim
516          = Texinfo::Common::expand_verbatiminclude($options->{'converter'},
517                                                    $root);
518        if (defined($verbatim_include_verbatim)) {
519          $result .= _convert($verbatim_include_verbatim, $options);
520        }
521      } elsif ($root->{'cmdname'} ne 'node') {
522        $result = _convert($root->{'args'}->[0], $options);
523        if ($Texinfo::Common::sectioning_commands{$root->{'cmdname'}}) {
524          $result = heading($root, $result, $options->{'converter'},
525                            $options->{'NUMBER_SECTIONS'});
526        } else {
527        # we always want an end of line even if is was eaten by a command
528          chomp($result);
529          $result .= "\n";
530        }
531      }
532    } elsif ($root->{'cmdname'} eq 'item'
533            and $root->{'parent'}->{'cmdname'}
534            and $root->{'parent'}->{'cmdname'} eq 'enumerate') {
535      $result .= Texinfo::Common::enumerate_item_representation(
536         $root->{'parent'}->{'extra'}->{'enumerate_specification'},
537         $root->{'extra'}->{'item_number'}) . '. ';
538    }
539  }
540  if ($root->{'type'} and $root->{'type'} eq 'def_line') {
541    #print STDERR "$root->{'extra'}->{'def_command'}\n";
542    if ($root->{'extra'} and $root->{'extra'}->{'def_parsed_hash'}
543             and %{$root->{'extra'}->{'def_parsed_hash'}}) {
544      my $parsed_definition_category
545        = Texinfo::Common::definition_category ($options->{'converter'}, $root);
546      my @contents = ($parsed_definition_category, {'text' => ': '});
547      if ($root->{'extra'}->{'def_parsed_hash'}->{'type'}) {
548        push @contents, ($root->{'extra'}->{'def_parsed_hash'}->{'type'},
549                         {'text' => ' '});
550      }
551      push @contents, $root->{'extra'}->{'def_parsed_hash'}->{'name'};
552
553      my $arguments = Texinfo::Common::definition_arguments_content($root);
554      if ($arguments) {
555        push @contents, {'text' => ' '};
556        push @contents, @$arguments;
557      }
558      push @contents, {'text' => "\n"};
559      $result = _convert({'contents' => \@contents}, _code_options($options));
560    }
561    #$result = convert($root->{'args'}->[0], $options) if ($root->{'args'});
562  } elsif ($root->{'type'} and $root->{'type'} eq 'menu_entry') {
563    foreach my $arg (@{$root->{'args'}}) {
564      if ($arg->{'type'} eq 'menu_entry_node') {
565        $result .= _convert($arg, _code_options($options));
566      } else {
567        $result .= _convert($arg, $options);
568      }
569    }
570    if (!$root->{'parent'}->{'type'}
571        or ($root->{'parent'}->{'type'} ne 'preformatted'
572            and $root->{'parent'}->{'type'} ne 'rawpreformatted')) {
573      chomp($result);
574      $result .= "\n";
575    }
576  }
577  if ($root->{'contents'}) {
578    if ($root->{'cmdname'}
579        and ($Texinfo::Common::preformatted_code_commands{$root->{'cmdname'}}
580             or $Texinfo::Common::math_commands{$root->{'cmdname'}}
581             or (defined($Texinfo::Common::block_commands{$root->{'cmdname'}})
582                 and $Texinfo::Common::block_commands{$root->{'cmdname'}} eq 'raw'))) {
583      $options = _code_options($options);
584    }
585    if (ref($root->{'contents'}) ne 'ARRAY') {
586      cluck "contents not an array($root->{'contents'}).";
587    }
588    foreach my $content (@{$root->{'contents'}}) {
589      $result .= _convert($content, $options);
590    }
591  }
592  $result = '{'.$result.'}'
593     if ($root->{'type'} and $root->{'type'} eq 'bracketed'
594         and (!$root->{'parent'}->{'type'} or
595              ($root->{'parent'}->{'type'} ne 'block_line_arg'
596               and $root->{'parent'}->{'type'} ne 'line_arg')));
597  #print STDERR "  RR ($root) -> $result\n";
598  return $result;
599}
600
601
602
603# Implement the converters API, but as simply as possible
604# initialization
605sub converter($)
606{
607  my $class = shift;
608  my $conf;
609  my $converter = {};
610  if (ref($class) eq 'HASH') {
611    $conf = $class;
612    bless $converter;
613  } elsif (defined($class)) {
614    bless $converter, $class;
615    $conf = shift;
616  } else {
617    bless $converter;
618    $conf = shift;
619  }
620
621  if ($conf) {
622    %{$converter} = %{$conf};
623  }
624
625  my $expanded_formats = $converter->{'expanded_formats'};;
626  if ($converter->{'parser'}) {
627    $converter->{'info'} = $converter->{'parser'}->global_informations();
628    $converter->{'extra'} = $converter->{'parser'}->global_commands_information();
629    foreach my $global_command ('documentencoding') {
630      if (defined($converter->{'extra'}->{$global_command})) {
631        my $root = $converter->{'extra'}->{$global_command}->[0];
632        if ($global_command eq 'documentencoding'
633            and defined($root->{'extra'})
634            and defined($root->{'extra'}->{'input_perl_encoding'})) {
635          $converter->{'OUTPUT_ENCODING_NAME'}
636             = $root->{'extra'}->{'input_encoding_name'};
637          $converter->{'OUTPUT_PERL_ENCODING'}
638             = $root->{'extra'}->{'input_perl_encoding'};
639        }
640      }
641    }
642    if (!$expanded_formats and $converter->{'parser'}->{'expanded_formats'}) {
643      $expanded_formats = $converter->{'parser'}->{'expanded_formats'};
644    }
645  }
646  if ($expanded_formats) {
647    foreach my $expanded_format(@$expanded_formats) {
648      $converter->{'expanded_formats_hash'}->{$expanded_format} = 1;
649    }
650  }
651
652  bless $converter;
653  return $converter;
654}
655
656sub convert_tree($$)
657{
658  my $self = shift;
659  my $root = shift;
660
661  return _convert($root);
662}
663
664# determine outfile and output to that file
665my $STDIN_DOCU_NAME = 'stdin';
666sub output($$)
667{
668  my $self = shift;
669  my $tree = shift;
670  #print STDERR "OUTPUT\n";
671  my $input_basename;
672  if (defined($self->{'info'}->{'input_file_name'})) {
673    my ($directories, $suffix);
674    ($input_basename, $directories, $suffix)
675       = fileparse($self->{'info'}->{'input_file_name'});
676  } else {
677    # This could happen if called on a piece of texinfo
678    $input_basename = '';
679  }
680  $self->{'input_basename'} = $input_basename;
681  $input_basename = $STDIN_DOCU_NAME if ($input_basename eq '-');
682  $input_basename =~ s/\.te?x(i|info)?$//;
683
684  my $setfilename;
685  $setfilename = $self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'}
686    if ($self->{'extra'} and $self->{'extra'}->{'setfilename'}
687        and $self->{'extra'}->{'setfilename'}->{'extra'}
688        and defined($self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'}));
689  my $outfile;
690  if (!defined($self->{'OUTFILE'})) {
691    if (defined($setfilename)) {
692      $outfile = $setfilename;
693      $outfile =~ s/\.[^\.]*$//;
694    } elsif ($input_basename ne '') {
695      $outfile = $input_basename;
696    }
697    if (defined($outfile)) {
698      $outfile .= '.txt';
699    }
700  } else {
701    $outfile = $self->{'OUTFILE'};
702  }
703  my $fh;
704  if (defined($outfile)) {
705    $fh = $self->Texinfo::Common::open_out($outfile);
706    return undef if (!$fh);
707  }
708  my %options = $self->Texinfo::Common::_convert_text_options();
709  my $result = _convert($tree, \%options);
710  if ($fh) {
711    print $fh $result;
712    return undef if (!close($fh));
713    $result = '';
714  }
715  return $result;
716}
717
718sub get_conf($$)
719{
720  my $self = shift;
721  my $key = shift;
722
723  return $self->{$key};
724}
725
726sub errors()
727{
728  return undef;
729}
730
731sub converter_unclosed_files()
732{
733  return undef;
734}
735
736sub converter_opened_files()
737{
738  return ();
739}
740
741sub converter_defaults()
742{
743  return ();
744}
745
7461;
747
748__END__
749
750=head1 NAME
751
752Texinfo::Convert::Text - Convert Texinfo tree to simple text
753
754=head1 SYNOPSIS
755
756  use Texinfo::Convert::Text qw(convert ascii_accent text_accents);
757
758  my $result = convert($tree);
759  my $result_encoded = convert($tree,
760             {'enabled_encoding' => 'utf-8'});
761  my $result_converter = convert($tree,
762             {'converter' => $converter});
763
764  my $result_accent_text = ascii_accent('e', $accent_command);
765  my $accents_text = text_accents($accents, 'utf-8');
766
767=head1 DESCRIPTION
768
769Texinfo::Convert::Text is a simple backend that converts a Texinfo tree
770to simple text.  It is used for some command argument expansion in
771C<Texinfo::Parser>, for instance the file names, or encoding names.
772The converter is very simple, and, in the default case, cannot handle
773output strings translation or error handling.
774
775=head1 METHODS
776
777=over
778
779=item $result = convert($tree, $options)
780
781Convert a Texinfo tree to simple text.  I<$options> is a hash reference of
782options.  The converter is very simple, and has no internal state besides
783the options.  It cannot handle as is output strings translation or error
784storing.
785
786If the I<converter> option is set, some additional features may be available
787for the conversion of some @-commands, like output strings translation or
788error reporting.
789
790The following options may be set:
791
792=over
793
794=item enabled_encoding
795
796If set, the value is considered to be the encoding name texinfo accented
797letters should be converted to.  This option corresponds to the
798C<--enable-encoding> option, or the C<ENABLE_ENCODING> customization
799variable.
800
801=item sc
802
803If set, the text is upper-cased.
804
805=item code
806
807If set the text is in code style.  (mostly --, ---, '' and `` are kept as
808is).
809
810=item NUMBER_SECTIONS
811
812If set, sections are numbered when output.
813
814=item sort_string
815
816A somehow internal option to convert to text more suitable for alphabetical
817sorting rather than presentation.
818
819=item converter
820
821If this converter object is passed to the function, some features of this
822object may be used during conversion.  Mostly error reporting and strings
823translation, as the converter object is also supposed to be a
824L<Texinfo::Report> objet.  See also L<Texinfo::Convert::Converter>.
825
826=item expanded_formats_hash
827
828A reference on a hash.  The keys should be format names (like C<html>,
829C<tex>), and if the corresponding  value is set, the format is expanded.
830
831=back
832
833=item $result_accent_text = ascii_accent($text, $accent_command)
834
835I<$text> is the text appearing within an accent command.  I<$accent_command>
836should be a Texinfo tree element corresponding to an accent command taking
837an argument.  The function returns a transliteration of the accented
838character.
839
840=item $result_accent_text = ascii_accent_fallback($converter, $text, $accent_command)
841
842Same as C<ascii_accent> but  with an additional first argument
843converter, which is in ignored, but needed if this function is to
844be in argument of functions that need a fallback for accents
845conversion.
846
847=item $accents_text = text_accents($accents, $encoding, $set_case)
848
849I<$accents> is an accent command that may contain other nested accent
850commands.  The function will format the whole stack of nested accent
851commands and the innermost text.  If I<$encoding> is set, the formatted
852text is converted to this encoding as much as possible instead of being
853converted as simple ascii.  If I<$set_case> is positive, the result
854is meant to be upper-cased, if it is negative, the result is to be
855lower-cased.
856
857=back
858
859=head1 AUTHOR
860
861Patrice Dumas, E<lt>pertusus@free.frE<gt>
862
863=cut
864