1# Text.pm: output tree as simple text. 2# 3# Copyright 2010-2020 Free Software Foundation, Inc. 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 3 of the License, 8# or (at your option) any later version. 9# 10# This program is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13# GNU General Public License for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with this program. If not, see <http://www.gnu.org/licenses/>. 17# 18# Original author: Patrice Dumas <pertusus@free.fr> 19 20package Texinfo::Convert::Text; 21 22use 5.00405; 23use strict; 24 25use Texinfo::Convert::Converter; 26# accent commands list. 27use Texinfo::Common; 28use Texinfo::Convert::Unicode; 29# for debugging 30use Texinfo::Convert::Texinfo; 31use Data::Dumper; 32use Carp qw(cluck carp); 33 34use File::Basename; 35 36require Exporter; 37use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 38@ISA = qw(Exporter Texinfo::Convert::Converter); 39 40%EXPORT_TAGS = ( 'all' => [ qw( 41 convert 42 ascii_accent 43 text_accents 44) ] ); 45 46@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); 47 48@EXPORT = qw( 49); 50 51$VERSION = '6.8'; 52 53# this is in fact not needed for 'footnote', 'shortcaption', 'caption' 54# when they have no brace_command_arg, see below. 55my %ignored_brace_commands; 56foreach my $ignored_brace_command (#'xref','ref','pxref','inforef', 57 'anchor', 58 'footnote', 'shortcaption', 'caption', 'hyphenation', 'errormsg') { 59 $ignored_brace_commands{$ignored_brace_command} = 1; 60} 61 62my %ignored_block_commands; 63foreach my $ignored_command ('titlepage', 'copying', 'documentdescription', 64 'html', 'tex', 'xml', 'docbook', 'ignore', 'macro', 'rmacro') { 65 $ignored_block_commands{$ignored_command} = 1; 66} 67 68# used by Texinfo::Convert::NodeNormalization 69our %text_brace_no_arg_commands = ( 70 'TeX' => 'TeX', 71 'LaTeX' => 'LaTeX', 72 'bullet' => '*', 73 'copyright' => '(C)', 74 'registeredsymbol' => '(R)', 75 'dots' => '...', 76 'enddots' => '...', 77 'equiv' => '==', 78 'error' => 'error-->', 79 'expansion' => '==>', 80 'arrow' => '->', 81 'minus' => '-', 82 'point' => '-!-', 83 'print' => '-|', 84 'result' => '=>', 85 'today' => '', 86 'aa' => 'aa', 87 'AA' => 'AA', 88 'ae' => 'ae', 89 'oe' => 'oe', 90 'AE' => 'AE', 91 'OE' => 'OE', 92 'o' => '/o', 93 'O' => '/O', 94 'ss' => 'ss', 95 'l' => '/l', 96 'L' => '/L', 97 'DH' => 'D', 98 'dh' => 'd', 99 'TH' => 'TH', # http://www.evertype.com/standards/wynnyogh/thorn.html 100 101 'th' => 'th', 102 'exclamdown' => '!', 103 'questiondown' => '?', 104 'pounds' => '#', 105 'ordf' => 'a', 106 'ordm' => 'o', 107 'comma' => ',', 108 'atchar' => '@', 109 'ampchar' => '&', 110 'lbracechar' => '{', 111 'rbracechar' => '}', 112 'backslashchar' => '\\', 113 'hashchar' => '#', 114 'euro' => 'Euro', 115 'geq' => '>=', 116 'leq' => '<=', 117 'tie' => ' ', 118 'textdegree' => 'o', 119 'quotedblleft' => '``', 120 'quotedblright' => "''", 121 'quoteleft' => '`', 122 'quoteright' => "'", 123 'quotedblbase' => ',,', 124 'quotesinglbase' => ',', 125 'guillemetleft' => '<<', 126 'guillemetright' => '>>', 127 'guillemotleft' => '<<', 128 'guillemotright' => '>>', 129 'guilsinglleft' => '<', 130 'guilsinglright' => '>', 131 'click' => '', # specially treated 132); 133 134my %sort_brace_no_arg_commands = ( 135 'copyright' => 'C', 136 'registeredsymbol' => 'R', 137 'today' => 't', 138); 139 140foreach my $accent_letter ('o','O','l','L') { 141 $sort_brace_no_arg_commands{$accent_letter} = $accent_letter; 142} 143 144my %accent_commands = %Texinfo::Common::accent_commands; 145my %no_brace_commands = %Texinfo::Common::no_brace_commands; 146 147our %formatting_misc_commands; 148foreach my $command ('verbatiminclude', 'sp', 'center', 'exdent', 149 'item', 'itemx', 'tab', 'headitem', 150 'node', keys(%Texinfo::Common::sectioning_commands)) { 151 $formatting_misc_commands{$command} = 1; 152} 153 154my %ignored_types; 155foreach my $type ('empty_line_after_command', 'preamble', 156 'empty_spaces_after_command', 'spaces_at_end', 157 'empty_spaces_before_argument', 'empty_spaces_before_paragraph', 158 'empty_spaces_after_close_brace') { 159 $ignored_types{$type} = 1; 160} 161 162 163sub ascii_accent($$) 164{ 165 my $text = shift; 166 my $command = shift; 167 my $accent = $command->{'cmdname'}; 168 return $text if ($accent eq 'dotless'); 169 return $text . "''" if ($accent eq 'H'); 170 return $text . '.' if ($accent eq 'dotaccent'); 171 return $text . '*' if ($accent eq 'ringaccent'); 172 return $text . '[' if ($accent eq 'tieaccent'); 173 return $text . '(' if ($accent eq 'u'); 174 return $text . '_' if ($accent eq 'ubaraccent'); 175 return '.' . $text if ($accent eq 'udotaccent'); 176 return $text . '<' if ($accent eq 'v'); 177 return $text . ';' if ($accent eq 'ogonek'); 178 return $text . $accent; 179} 180 181# format a stack of accents as ascii 182sub ascii_accents($$;$) 183{ 184 my $result = shift; 185 my $stack = shift; 186 my $set_case = shift; 187 188 if ($set_case and $result =~ /^\w$/) { 189 if ($set_case > 0) { 190 $result = uc($result); 191 } else { 192 $result = lc($result); 193 } 194 } 195 foreach my $accent_command (reverse(@$stack)) { 196 $result = ascii_accent($result, $accent_command); 197 } 198 return $result; 199} 200 201# Same as ascii_accent, but with a converter as first argument to be consistent 202# with calling conventions of fallback accent formatting functions given 203# to convert_accents/encoded_accents 204sub ascii_accent_fallback($$$) 205{ 206 my $converter = shift; 207 my $text = shift; 208 my $command = shift; 209 210 return ascii_accent($text, $command); 211} 212 213# format an accent command and nested accents within as Text. 214sub text_accents($;$$) 215{ 216 my $accent = shift; 217 my $encoding = shift; 218 my $set_case = shift; 219 220 my ($contents, $stack) 221 = Texinfo::Common::find_innermost_accent_contents($accent); 222 223 my $options = {}; 224 $options->{'enabled_encoding'} = $encoding if (defined($encoding)); 225 $options->{'sc'} = $set_case if (defined($set_case)); 226 my $text = convert({'contents' => $contents}, $options); 227 228 my $result = Texinfo::Convert::Unicode::encoded_accents(undef, $text, 229 $stack, $encoding, \&ascii_accent_fallback, $set_case); 230 if (defined($result)) { 231 return $result; 232 } else { 233 return ascii_accents($text, $stack, $set_case); 234 } 235} 236 237sub brace_no_arg_command($;$) 238{ 239 my $root = shift; 240 my $options = shift; 241 my $encoding; 242 $encoding = $options->{'enabled_encoding'} 243 if ($options and $options->{'enabled_encoding'}); 244 245 my $command = $root->{'cmdname'}; 246 $command = $root->{'extra'}->{'clickstyle'} 247 if ($root->{'extra'} 248 and defined($root->{'extra'}->{'clickstyle'}) 249 and defined($text_brace_no_arg_commands{$root->{'extra'}->{'clickstyle'}})); 250 my $result; 251 if (!$options->{'no_extra_unicode'} 252 or !$Texinfo::Convert::Unicode::extra_unicode_map{$command}) { 253 $result = Texinfo::Convert::Unicode::unicode_for_brace_no_arg_command( 254 $command, $encoding); 255 } 256 if (!defined($result) and $options and $options->{'converter'}) { 257 my $tree = Texinfo::Common::translated_command_tree( 258 $options->{'converter'}, $command); 259 if ($tree) { 260 $result = _convert($tree, $options); 261 } 262 } 263 if (!defined($result)) { 264 if ($options and $options->{'sort_string'} 265 and $sort_brace_no_arg_commands{$command}) { 266 $result = $sort_brace_no_arg_commands{$command}; 267 } else { 268 $result = $text_brace_no_arg_commands{$command}; 269 } 270 } 271 if ($options and $Texinfo::Common::letter_no_arg_commands{$command}) { 272 if ($options->{'sc'}) { 273 $result = uc($result); 274 } elsif ($options->{'lc'}) { 275 $result = lc($result); 276 } 277 } 278 return $result; 279} 280 281my %underline_symbol = ( 282 0 => '*', 283 1 => '*', 284 2 => '=', 285 3 => '-', 286 4 => '.' 287); 288 289# Return the text of an underlined heading, possibly indented. 290sub heading($$$;$$) 291{ 292 my $current = shift; 293 my $text = shift; 294 my $converter = shift; 295 my $numbered = shift; 296 my $indent_length = shift; 297 298 # REMARK to get the numberig right in case of an indented text, the 299 # indentation should be given here. But this should never happen as 300 # the only @-commands allowed in indented context are not number. 301 $text = Texinfo::Common::numbered_heading($converter, $current, $text, 302 $numbered); 303 return '' if ($text !~ /\S/); 304 my $result = $text ."\n"; 305 if (defined($indent_length)) { 306 if ($indent_length < 0) { 307 $indent_length = 0; 308 } 309 $result .= (' ' x $indent_length); 310 } else { 311 $indent_length = 0; 312 } 313 if (!defined $current->{'level'}) { 314 $current->{'level'} = Texinfo::Structuring::section_level($current); 315 } 316 $result .=($underline_symbol{$current->{'level'}} 317 x (Texinfo::Convert::Unicode::string_width($text) - $indent_length))."\n"; 318 return $result; 319} 320 321sub _code_options($) 322{ 323 my $options = shift; 324 my $code_options; 325 if (defined($options)) { 326 $code_options = { %$options }; 327 } else { 328 $code_options = {}; 329 } 330 $code_options->{'code'} = 1; 331 return $code_options; 332} 333 334sub convert($;$) 335{ 336 my $root = shift; 337 # means it was called object oriented 338 if (ref($root) ne 'HASH') { 339 if (ref($root) eq 'ARRAY') { 340 carp ("convert argument $root not blessed reference or HASH"); 341 return undef; 342 } 343 $root = shift; 344 } 345 my $options = shift; 346 #print STDERR "CONVERT\n"; 347 return _convert($root, $options); 348} 349 350sub _convert($;$); 351 352sub _convert($;$) 353{ 354 my $root = shift; 355 my $options = shift; 356 357 return '' if (!($root->{'type'} and $root->{'type'} eq 'def_line') 358 and (($root->{'type'} and $ignored_types{$root->{'type'}}) 359 or ($root->{'cmdname'} 360 and ($ignored_brace_commands{$root->{'cmdname'}} 361 or ($ignored_block_commands{$root->{'cmdname'}} 362 and !(defined($options->{'expanded_formats_hash'}) 363 and $options->{'expanded_formats_hash'}->{$root->{'cmdname'}})) 364 or ($Texinfo::Common::inline_commands{$root->{'cmdname'}} 365 and $root->{'cmdname'} ne 'inlinefmtifelse' 366 and (($Texinfo::Common::inline_format_commands{$root->{'cmdname'}} 367 and (!$root->{'extra'}->{'format'} 368 or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}})) 369 or (!$Texinfo::Common::inline_format_commands{$root->{'cmdname'}} 370 and !defined($root->{'extra'}->{'expand_index'})))) 371 # here ignore most of the misc commands 372 or ($root->{'args'} and $root->{'args'}->[0] 373 and $root->{'args'}->[0]->{'type'} 374 and ($root->{'args'}->[0]->{'type'} eq 'line_arg' 375 or $root->{'args'}->[0]->{'type'} eq 'misc_arg') 376 and !$formatting_misc_commands{$root->{'cmdname'}}))))); 377 my $result = ''; 378 if (defined($root->{'text'})) { 379 if ($root->{'type'} and $root->{'type'} eq 'untranslated' 380 and $options and $options->{'converter'}) { 381 my $save_lang = $options->{'converter'}->get_conf('documentlanguage'); 382 $options->{'converter'}->{'documentlanguage'} 383 = $root->{'extra'}->{'documentlanguage'}; 384 my $tree = Texinfo::Report::gdt($options->{'converter'}, 385 $root->{'text'}); 386 $result = _convert($tree, $options); 387 $options->{'converter'}->{'documentlanguage'} = $save_lang; 388 } else { 389 $result = $root->{'text'}; 390 if ((! defined($root->{'type'}) 391 or $root->{'type'} ne 'raw') 392 and !$options->{'raw'}) { 393 if ($options->{'sc'}) { 394 $result = uc($result); 395 } 396 if (!$options->{'code'}) { 397 $result =~ s/``/"/g; 398 $result =~ s/\'\'/"/g; 399 $result =~ s/---/\x{1F}/g; 400 $result =~ s/--/-/g; 401 $result =~ s/\x{1F}/--/g; 402 } 403 } 404 } 405 } 406 if ($root->{'cmdname'}) { 407 my $command = $root->{'cmdname'}; 408 if (defined($no_brace_commands{$root->{'cmdname'}})) { 409 return $no_brace_commands{$root->{'cmdname'}}; 410 } elsif ($root->{'cmdname'} eq 'today') { 411 if ($options->{'sort_string'} 412 and $sort_brace_no_arg_commands{$root->{'cmdname'}}) { 413 return $sort_brace_no_arg_commands{$root->{'cmdname'}}; 414 } elsif ($options->{'converter'}) { 415 return _convert(Texinfo::Common::expand_today($options->{'converter'}), 416 $options); 417 } elsif ($options->{'TEST'}) { 418 return 'a sunny day'; 419 } else { 420 my($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) 421 = localtime(time); 422 $year += ($year < 70) ? 2000 : 1900; 423 return "$Texinfo::Common::MONTH_NAMES[$mon] $mday, $year"; 424 } 425 } elsif (defined($text_brace_no_arg_commands{$root->{'cmdname'}})) { 426 return brace_no_arg_command($root, $options); 427 # commands with braces 428 } elsif ($accent_commands{$root->{'cmdname'}}) { 429 my $result = text_accents ($root, $options->{'enabled_encoding'}, 430 $options->{'sc'}); 431 return $result; 432 } elsif ($root->{'cmdname'} eq 'image') { 433 return _convert($root->{'args'}->[0], _code_options($options)); 434 } elsif ($root->{'cmdname'} eq 'email') { 435 my $mail = _convert($root->{'args'}->[0], _code_options($options)); 436 my $text; 437 $text = _convert($root->{'args'}->[1], $options) 438 if (defined($root->{'args'}->[1])); 439 return $text if (defined($text) and ($text ne '')); 440 return $mail; 441 } elsif ($root->{'cmdname'} eq 'uref' or $root->{'cmdname'} eq 'url') { 442 my $replacement; 443 $replacement = _convert($root->{'args'}->[2], $options) 444 if (defined($root->{'args'}->[2])); 445 return $replacement if (defined($replacement) and $replacement ne ''); 446 my $text; 447 $text = _convert($root->{'args'}->[1], $options) 448 if (defined($root->{'args'}->[1])); 449 my $url = _convert($root->{'args'}->[0], _code_options($options)); 450 if (defined($text) and $text ne '') { 451 return "$url ($text)"; 452 } else { 453 return $url; 454 } 455 } elsif ($Texinfo::Common::explained_commands{$root->{'cmdname'}} 456 and $root->{'args'} and $root->{'args'}->[1]) { 457 my $explanation = _convert($root->{'args'}->[1], $options); 458 if ($explanation ne '') { 459 return _convert($root->{'args'}->[0], $options) ." ($explanation)"; 460 } else { 461 return _convert($root->{'args'}->[0], $options); 462 } 463 } elsif ($Texinfo::Common::inline_commands{$root->{'cmdname'}}) { 464 $options->{'raw'} = 1 if ($root->{'cmdname'} eq 'inlineraw'); 465 my $arg_index = 1; 466 if ($root->{'cmdname'} eq 'inlinefmtifelse' 467 and (!$root->{'extra'}->{'format'} 468 or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}})) { 469 $arg_index = 2; 470 } 471 if (scalar(@{$root->{'args'}}) > $arg_index) { 472 return _convert($root->{'args'}->[$arg_index], $options); 473 } else { 474 return ''; 475 } 476 } elsif ($root->{'args'} and $root->{'args'}->[0] 477 and (($root->{'args'}->[0]->{'type'} 478 and $root->{'args'}->[0]->{'type'} eq 'brace_command_arg') 479 or $Texinfo::Common::math_commands{$root->{'cmdname'}})) { 480 my $result; 481 if ($root->{'cmdname'} eq 'sc') { 482 $options = {%$options, 'sc' => 1}; 483 } elsif ($Texinfo::Common::code_style_commands{$root->{'cmdname'}} 484 or $Texinfo::Common::math_commands{$root->{'cmdname'}}) { 485 $options = _code_options($options); 486 } 487 $result = _convert($root->{'args'}->[0], $options); 488 return $result; 489 # block commands 490 } elsif ($root->{'cmdname'} eq 'quotation' 491 or $root->{'cmdname'} eq 'smallquotation' 492 or $root->{'cmdname'} eq 'float') { 493 if ($root->{'args'}) { 494 foreach my $arg (@{$root->{'args'}}) { 495 my $converted_arg = _convert($arg, $options); 496 if ($converted_arg =~ /\S/) { 497 $result .= $converted_arg.", "; 498 } 499 } 500 $result =~ s/, $//; 501 chomp ($result); 502 $result .= "\n" if ($result =~ /\S/); 503 } 504 } elsif ($options->{'expanded_formats_hash'}->{$root->{'cmdname'}}) { 505 $options->{'raw'} = 1; 506 } elsif ($formatting_misc_commands{$root->{'cmdname'}} and $root->{'args'}) { 507 if ($root->{'cmdname'} eq 'sp') { 508 if ($root->{'extra'} and $root->{'extra'}->{'misc_args'} 509 and $root->{'extra'}->{'misc_args'}->[0]) { 510 # this useless copy avoids perl changing the type to integer! 511 my $sp_nr = $root->{'extra'}->{'misc_args'}->[0]; 512 $result = "\n" x $sp_nr; 513 } 514 } elsif ($root->{'cmdname'} eq 'verbatiminclude') { 515 my $verbatim_include_verbatim 516 = Texinfo::Common::expand_verbatiminclude($options->{'converter'}, 517 $root); 518 if (defined($verbatim_include_verbatim)) { 519 $result .= _convert($verbatim_include_verbatim, $options); 520 } 521 } elsif ($root->{'cmdname'} ne 'node') { 522 $result = _convert($root->{'args'}->[0], $options); 523 if ($Texinfo::Common::sectioning_commands{$root->{'cmdname'}}) { 524 $result = heading($root, $result, $options->{'converter'}, 525 $options->{'NUMBER_SECTIONS'}); 526 } else { 527 # we always want an end of line even if is was eaten by a command 528 chomp($result); 529 $result .= "\n"; 530 } 531 } 532 } elsif ($root->{'cmdname'} eq 'item' 533 and $root->{'parent'}->{'cmdname'} 534 and $root->{'parent'}->{'cmdname'} eq 'enumerate') { 535 $result .= Texinfo::Common::enumerate_item_representation( 536 $root->{'parent'}->{'extra'}->{'enumerate_specification'}, 537 $root->{'extra'}->{'item_number'}) . '. '; 538 } 539 } 540 if ($root->{'type'} and $root->{'type'} eq 'def_line') { 541 #print STDERR "$root->{'extra'}->{'def_command'}\n"; 542 if ($root->{'extra'} and $root->{'extra'}->{'def_parsed_hash'} 543 and %{$root->{'extra'}->{'def_parsed_hash'}}) { 544 my $parsed_definition_category 545 = Texinfo::Common::definition_category ($options->{'converter'}, $root); 546 my @contents = ($parsed_definition_category, {'text' => ': '}); 547 if ($root->{'extra'}->{'def_parsed_hash'}->{'type'}) { 548 push @contents, ($root->{'extra'}->{'def_parsed_hash'}->{'type'}, 549 {'text' => ' '}); 550 } 551 push @contents, $root->{'extra'}->{'def_parsed_hash'}->{'name'}; 552 553 my $arguments = Texinfo::Common::definition_arguments_content($root); 554 if ($arguments) { 555 push @contents, {'text' => ' '}; 556 push @contents, @$arguments; 557 } 558 push @contents, {'text' => "\n"}; 559 $result = _convert({'contents' => \@contents}, _code_options($options)); 560 } 561 #$result = convert($root->{'args'}->[0], $options) if ($root->{'args'}); 562 } elsif ($root->{'type'} and $root->{'type'} eq 'menu_entry') { 563 foreach my $arg (@{$root->{'args'}}) { 564 if ($arg->{'type'} eq 'menu_entry_node') { 565 $result .= _convert($arg, _code_options($options)); 566 } else { 567 $result .= _convert($arg, $options); 568 } 569 } 570 if (!$root->{'parent'}->{'type'} 571 or ($root->{'parent'}->{'type'} ne 'preformatted' 572 and $root->{'parent'}->{'type'} ne 'rawpreformatted')) { 573 chomp($result); 574 $result .= "\n"; 575 } 576 } 577 if ($root->{'contents'}) { 578 if ($root->{'cmdname'} 579 and ($Texinfo::Common::preformatted_code_commands{$root->{'cmdname'}} 580 or $Texinfo::Common::math_commands{$root->{'cmdname'}} 581 or (defined($Texinfo::Common::block_commands{$root->{'cmdname'}}) 582 and $Texinfo::Common::block_commands{$root->{'cmdname'}} eq 'raw'))) { 583 $options = _code_options($options); 584 } 585 if (ref($root->{'contents'}) ne 'ARRAY') { 586 cluck "contents not an array($root->{'contents'})."; 587 } 588 foreach my $content (@{$root->{'contents'}}) { 589 $result .= _convert($content, $options); 590 } 591 } 592 $result = '{'.$result.'}' 593 if ($root->{'type'} and $root->{'type'} eq 'bracketed' 594 and (!$root->{'parent'}->{'type'} or 595 ($root->{'parent'}->{'type'} ne 'block_line_arg' 596 and $root->{'parent'}->{'type'} ne 'line_arg'))); 597 #print STDERR " RR ($root) -> $result\n"; 598 return $result; 599} 600 601 602 603# Implement the converters API, but as simply as possible 604# initialization 605sub converter($) 606{ 607 my $class = shift; 608 my $conf; 609 my $converter = {}; 610 if (ref($class) eq 'HASH') { 611 $conf = $class; 612 bless $converter; 613 } elsif (defined($class)) { 614 bless $converter, $class; 615 $conf = shift; 616 } else { 617 bless $converter; 618 $conf = shift; 619 } 620 621 if ($conf) { 622 %{$converter} = %{$conf}; 623 } 624 625 my $expanded_formats = $converter->{'expanded_formats'};; 626 if ($converter->{'parser'}) { 627 $converter->{'info'} = $converter->{'parser'}->global_informations(); 628 $converter->{'extra'} = $converter->{'parser'}->global_commands_information(); 629 foreach my $global_command ('documentencoding') { 630 if (defined($converter->{'extra'}->{$global_command})) { 631 my $root = $converter->{'extra'}->{$global_command}->[0]; 632 if ($global_command eq 'documentencoding' 633 and defined($root->{'extra'}) 634 and defined($root->{'extra'}->{'input_perl_encoding'})) { 635 $converter->{'OUTPUT_ENCODING_NAME'} 636 = $root->{'extra'}->{'input_encoding_name'}; 637 $converter->{'OUTPUT_PERL_ENCODING'} 638 = $root->{'extra'}->{'input_perl_encoding'}; 639 } 640 } 641 } 642 if (!$expanded_formats and $converter->{'parser'}->{'expanded_formats'}) { 643 $expanded_formats = $converter->{'parser'}->{'expanded_formats'}; 644 } 645 } 646 if ($expanded_formats) { 647 foreach my $expanded_format(@$expanded_formats) { 648 $converter->{'expanded_formats_hash'}->{$expanded_format} = 1; 649 } 650 } 651 652 bless $converter; 653 return $converter; 654} 655 656sub convert_tree($$) 657{ 658 my $self = shift; 659 my $root = shift; 660 661 return _convert($root); 662} 663 664# determine outfile and output to that file 665my $STDIN_DOCU_NAME = 'stdin'; 666sub output($$) 667{ 668 my $self = shift; 669 my $tree = shift; 670 #print STDERR "OUTPUT\n"; 671 my $input_basename; 672 if (defined($self->{'info'}->{'input_file_name'})) { 673 my ($directories, $suffix); 674 ($input_basename, $directories, $suffix) 675 = fileparse($self->{'info'}->{'input_file_name'}); 676 } else { 677 # This could happen if called on a piece of texinfo 678 $input_basename = ''; 679 } 680 $self->{'input_basename'} = $input_basename; 681 $input_basename = $STDIN_DOCU_NAME if ($input_basename eq '-'); 682 $input_basename =~ s/\.te?x(i|info)?$//; 683 684 my $setfilename; 685 $setfilename = $self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'} 686 if ($self->{'extra'} and $self->{'extra'}->{'setfilename'} 687 and $self->{'extra'}->{'setfilename'}->{'extra'} 688 and defined($self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'})); 689 my $outfile; 690 if (!defined($self->{'OUTFILE'})) { 691 if (defined($setfilename)) { 692 $outfile = $setfilename; 693 $outfile =~ s/\.[^\.]*$//; 694 } elsif ($input_basename ne '') { 695 $outfile = $input_basename; 696 } 697 if (defined($outfile)) { 698 $outfile .= '.txt'; 699 } 700 } else { 701 $outfile = $self->{'OUTFILE'}; 702 } 703 my $fh; 704 if (defined($outfile)) { 705 $fh = $self->Texinfo::Common::open_out($outfile); 706 return undef if (!$fh); 707 } 708 my %options = $self->Texinfo::Common::_convert_text_options(); 709 my $result = _convert($tree, \%options); 710 if ($fh) { 711 print $fh $result; 712 return undef if (!close($fh)); 713 $result = ''; 714 } 715 return $result; 716} 717 718sub get_conf($$) 719{ 720 my $self = shift; 721 my $key = shift; 722 723 return $self->{$key}; 724} 725 726sub errors() 727{ 728 return undef; 729} 730 731sub converter_unclosed_files() 732{ 733 return undef; 734} 735 736sub converter_opened_files() 737{ 738 return (); 739} 740 741sub converter_defaults() 742{ 743 return (); 744} 745 7461; 747 748__END__ 749 750=head1 NAME 751 752Texinfo::Convert::Text - Convert Texinfo tree to simple text 753 754=head1 SYNOPSIS 755 756 use Texinfo::Convert::Text qw(convert ascii_accent text_accents); 757 758 my $result = convert($tree); 759 my $result_encoded = convert($tree, 760 {'enabled_encoding' => 'utf-8'}); 761 my $result_converter = convert($tree, 762 {'converter' => $converter}); 763 764 my $result_accent_text = ascii_accent('e', $accent_command); 765 my $accents_text = text_accents($accents, 'utf-8'); 766 767=head1 DESCRIPTION 768 769Texinfo::Convert::Text is a simple backend that converts a Texinfo tree 770to simple text. It is used for some command argument expansion in 771C<Texinfo::Parser>, for instance the file names, or encoding names. 772The converter is very simple, and, in the default case, cannot handle 773output strings translation or error handling. 774 775=head1 METHODS 776 777=over 778 779=item $result = convert($tree, $options) 780 781Convert a Texinfo tree to simple text. I<$options> is a hash reference of 782options. The converter is very simple, and has no internal state besides 783the options. It cannot handle as is output strings translation or error 784storing. 785 786If the I<converter> option is set, some additional features may be available 787for the conversion of some @-commands, like output strings translation or 788error reporting. 789 790The following options may be set: 791 792=over 793 794=item enabled_encoding 795 796If set, the value is considered to be the encoding name texinfo accented 797letters should be converted to. This option corresponds to the 798C<--enable-encoding> option, or the C<ENABLE_ENCODING> customization 799variable. 800 801=item sc 802 803If set, the text is upper-cased. 804 805=item code 806 807If set the text is in code style. (mostly --, ---, '' and `` are kept as 808is). 809 810=item NUMBER_SECTIONS 811 812If set, sections are numbered when output. 813 814=item sort_string 815 816A somehow internal option to convert to text more suitable for alphabetical 817sorting rather than presentation. 818 819=item converter 820 821If this converter object is passed to the function, some features of this 822object may be used during conversion. Mostly error reporting and strings 823translation, as the converter object is also supposed to be a 824L<Texinfo::Report> objet. See also L<Texinfo::Convert::Converter>. 825 826=item expanded_formats_hash 827 828A reference on a hash. The keys should be format names (like C<html>, 829C<tex>), and if the corresponding value is set, the format is expanded. 830 831=back 832 833=item $result_accent_text = ascii_accent($text, $accent_command) 834 835I<$text> is the text appearing within an accent command. I<$accent_command> 836should be a Texinfo tree element corresponding to an accent command taking 837an argument. The function returns a transliteration of the accented 838character. 839 840=item $result_accent_text = ascii_accent_fallback($converter, $text, $accent_command) 841 842Same as C<ascii_accent> but with an additional first argument 843converter, which is in ignored, but needed if this function is to 844be in argument of functions that need a fallback for accents 845conversion. 846 847=item $accents_text = text_accents($accents, $encoding, $set_case) 848 849I<$accents> is an accent command that may contain other nested accent 850commands. The function will format the whole stack of nested accent 851commands and the innermost text. If I<$encoding> is set, the formatted 852text is converted to this encoding as much as possible instead of being 853converted as simple ascii. If I<$set_case> is positive, the result 854is meant to be upper-cased, if it is negative, the result is to be 855lower-cased. 856 857=back 858 859=head1 AUTHOR 860 861Patrice Dumas, E<lt>pertusus@free.frE<gt> 862 863=cut 864