1#!@INTLTOOL_PERL@ -w 2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4# 5# The Intltool Message Merger 6# 7# Copyright (C) 2000, 2003 Free Software Foundation. 8# Copyright (C) 2000, 2001 Eazel, Inc 9# 10# Intltool is free software; you can redistribute it and/or 11# modify it under the terms of the GNU General Public License 12# version 2 published by the Free Software Foundation. 13# 14# Intltool is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17# General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23# As a special exception to the GNU General Public License, if you 24# distribute this file as part of a program that contains a 25# configuration script generated by Autoconf, you may include it under 26# the same distribution terms that you use for the rest of that program. 27# 28# Authors: Maciej Stachowiak <mjs@noisehavoc.org> 29# Kenneth Christiansen <kenneth@gnu.org> 30# Darin Adler <darin@bentspoon.com> 31# 32# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> 33# 34 35## Release information 36my $PROGRAM = "intltool-merge"; 37my $PACKAGE = "intltool"; 38my $VERSION = "0.35.0"; 39 40## Loaded modules 41use strict; 42use Getopt::Long; 43use Text::Wrap; 44use File::Basename; 45 46my $must_end_tag = -1; 47my $last_depth = -1; 48my $translation_depth = -1; 49my @tag_stack = (); 50my @entered_tag = (); 51my @translation_strings = (); 52my $leading_space = ""; 53 54## Scalars used by the option stuff 55my $HELP_ARG = 0; 56my $VERSION_ARG = 0; 57my $BA_STYLE_ARG = 0; 58my $XML_STYLE_ARG = 0; 59my $KEYS_STYLE_ARG = 0; 60my $DESKTOP_STYLE_ARG = 0; 61my $SCHEMAS_STYLE_ARG = 0; 62my $RFC822DEB_STYLE_ARG = 0; 63my $QUIET_ARG = 0; 64my $PASS_THROUGH_ARG = 0; 65my $UTF8_ARG = 0; 66my $MULTIPLE_OUTPUT = 0; 67my $cache_file; 68 69## Handle options 70GetOptions 71( 72 "help" => \$HELP_ARG, 73 "version" => \$VERSION_ARG, 74 "quiet|q" => \$QUIET_ARG, 75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility 76 "ba-style|b" => \$BA_STYLE_ARG, 77 "xml-style|x" => \$XML_STYLE_ARG, 78 "keys-style|k" => \$KEYS_STYLE_ARG, 79 "desktop-style|d" => \$DESKTOP_STYLE_ARG, 80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG, 81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, 82 "pass-through|p" => \$PASS_THROUGH_ARG, 83 "utf8|u" => \$UTF8_ARG, 84 "multiple-output|m" => \$MULTIPLE_OUTPUT, 85 "cache|c=s" => \$cache_file 86 ) or &error; 87 88my $PO_DIR; 89my $FILE; 90my $OUTFILE; 91 92my %po_files_by_lang = (); 93my %translations = (); 94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@"; 95my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null'); 96 97# Use this instead of \w for XML files to handle more possible characters. 98my $w = "[-A-Za-z0-9._:]"; 99 100# XML quoted string contents 101my $q = "[^\\\"]*"; 102 103## Check for options. 104 105if ($VERSION_ARG) 106{ 107 &print_version; 108} 109elsif ($HELP_ARG) 110{ 111 &print_help; 112} 113elsif ($BA_STYLE_ARG && @ARGV > 2) 114{ 115 &utf8_sanity_check; 116 &preparation; 117 &print_message; 118 &ba_merge_translations; 119 &finalize; 120} 121elsif ($XML_STYLE_ARG && @ARGV > 2) 122{ 123 &utf8_sanity_check; 124 &preparation; 125 &print_message; 126 &xml_merge_output; 127 &finalize; 128} 129elsif ($KEYS_STYLE_ARG && @ARGV > 2) 130{ 131 &utf8_sanity_check; 132 &preparation; 133 &print_message; 134 &keys_merge_translations; 135 &finalize; 136} 137elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 138{ 139 &utf8_sanity_check; 140 &preparation; 141 &print_message; 142 &desktop_merge_translations; 143 &finalize; 144} 145elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 146{ 147 &utf8_sanity_check; 148 &preparation; 149 &print_message; 150 &schemas_merge_translations; 151 &finalize; 152} 153elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 154{ 155 &preparation; 156 &print_message; 157 &rfc822deb_merge_translations; 158 &finalize; 159} 160else 161{ 162 &print_help; 163} 164 165exit; 166 167## Sub for printing release information 168sub print_version 169{ 170 print <<_EOF_; 171${PROGRAM} (${PACKAGE}) ${VERSION} 172Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. 173 174Copyright (C) 2000-2003 Free Software Foundation, Inc. 175Copyright (C) 2000-2001 Eazel, Inc. 176This is free software; see the source for copying conditions. There is NO 177warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 178_EOF_ 179 exit; 180} 181 182## Sub for printing usage information 183sub print_help 184{ 185 print <<_EOF_; 186Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE 187Generates an output file that includes some localized attributes from an 188untranslated source file. 189 190Mandatory options: (exactly one must be specified) 191 -b, --ba-style includes translations in the bonobo-activation style 192 -d, --desktop-style includes translations in the desktop style 193 -k, --keys-style includes translations in the keys style 194 -s, --schemas-style includes translations in the schemas style 195 -r, --rfc822deb-style includes translations in the RFC822 style 196 -x, --xml-style includes translations in the standard xml style 197 198Other options: 199 -u, --utf8 convert all strings to UTF-8 before merging 200 (default for everything except RFC822 style) 201 -p, --pass-through deprecated, does nothing and issues a warning 202 -m, --multiple-output output one localized file per locale, instead of 203 a single file containing all localized elements 204 -c, --cache=FILE specify cache file name 205 (usually \$top_builddir/po/.intltool-merge-cache) 206 -q, --quiet suppress most messages 207 --help display this help and exit 208 --version output version information and exit 209 210Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 211or send email to <xml-i18n-tools\@gnome.org>. 212_EOF_ 213 exit; 214} 215 216 217## Sub for printing error messages 218sub print_error 219{ 220 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 221 exit; 222} 223 224 225sub print_message 226{ 227 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; 228} 229 230 231sub preparation 232{ 233 $PO_DIR = $ARGV[0]; 234 $FILE = $ARGV[1]; 235 $OUTFILE = $ARGV[2]; 236 237 &gather_po_files; 238 &get_translation_database; 239} 240 241# General-purpose code for looking up translations in .po files 242 243sub po_file2lang 244{ 245 my ($tmp) = @_; 246 $tmp =~ s/^.*\/(.*)\.po$/$1/; 247 return $tmp; 248} 249 250sub gather_po_files 251{ 252 for my $po_file (glob "$PO_DIR/*.po") { 253 $po_files_by_lang{po_file2lang($po_file)} = $po_file; 254 } 255} 256 257sub get_local_charset 258{ 259 my ($encoding) = @_; 260 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias"; 261 262 # seek character encoding aliases in charset.alias (glib) 263 264 if (open CHARSET_ALIAS, $alias_file) 265 { 266 while (<CHARSET_ALIAS>) 267 { 268 next if /^\#/; 269 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) 270 } 271 272 close CHARSET_ALIAS; 273 } 274 275 # if not found, return input string 276 277 return $encoding; 278} 279 280sub get_po_encoding 281{ 282 my ($in_po_file) = @_; 283 my $encoding = ""; 284 285 open IN_PO_FILE, $in_po_file or die; 286 while (<IN_PO_FILE>) 287 { 288 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" 289 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 290 { 291 $encoding = $1; 292 last; 293 } 294 } 295 close IN_PO_FILE; 296 297 if (!$encoding) 298 { 299 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; 300 $encoding = "ISO-8859-1"; 301 } 302 303 system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull"); 304 if ($?) { 305 $encoding = get_local_charset($encoding); 306 } 307 308 return $encoding 309} 310 311sub utf8_sanity_check 312{ 313 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; 314 $UTF8_ARG = 1; 315} 316 317sub get_translation_database 318{ 319 if ($cache_file) { 320 &get_cached_translation_database; 321 } else { 322 &create_translation_database; 323 } 324} 325 326sub get_newest_po_age 327{ 328 my $newest_age; 329 330 foreach my $file (values %po_files_by_lang) 331 { 332 my $file_age = -M $file; 333 $newest_age = $file_age if !$newest_age || $file_age < $newest_age; 334 } 335 336 $newest_age = 0 if !$newest_age; 337 338 return $newest_age; 339} 340 341sub create_cache 342{ 343 print "Generating and caching the translation database\n" unless $QUIET_ARG; 344 345 &create_translation_database; 346 347 open CACHE, ">$cache_file" || die; 348 print CACHE join "\x01", %translations; 349 close CACHE; 350} 351 352sub load_cache 353{ 354 print "Found cached translation database\n" unless $QUIET_ARG; 355 356 my $contents; 357 open CACHE, "<$cache_file" || die; 358 { 359 local $/; 360 $contents = <CACHE>; 361 } 362 close CACHE; 363 %translations = split "\x01", $contents; 364} 365 366sub get_cached_translation_database 367{ 368 my $cache_file_age = -M $cache_file; 369 if (defined $cache_file_age) 370 { 371 if ($cache_file_age <= &get_newest_po_age) 372 { 373 &load_cache; 374 return; 375 } 376 print "Found too-old cached translation database\n" unless $QUIET_ARG; 377 } 378 379 &create_cache; 380} 381 382sub create_translation_database 383{ 384 for my $lang (keys %po_files_by_lang) 385 { 386 my $po_file = $po_files_by_lang{$lang}; 387 388 if ($UTF8_ARG) 389 { 390 my $encoding = get_po_encoding ($po_file); 391 392 if (lc $encoding eq "utf-8") 393 { 394 open PO_FILE, "<$po_file"; 395 } 396 else 397 { 398 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; 399 400 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 401 } 402 } 403 else 404 { 405 open PO_FILE, "<$po_file"; 406 } 407 408 my $nextfuzzy = 0; 409 my $inmsgid = 0; 410 my $inmsgstr = 0; 411 my $msgid = ""; 412 my $msgstr = ""; 413 414 while (<PO_FILE>) 415 { 416 $nextfuzzy = 1 if /^#, fuzzy/; 417 418 if (/^msgid "((\\.|[^\\])*)"/ ) 419 { 420 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 421 $msgid = ""; 422 $msgstr = ""; 423 424 if ($nextfuzzy) { 425 $inmsgid = 0; 426 } else { 427 $msgid = unescape_po_string($1); 428 $inmsgid = 1; 429 } 430 $inmsgstr = 0; 431 $nextfuzzy = 0; 432 } 433 434 if (/^msgstr "((\\.|[^\\])*)"/) 435 { 436 $msgstr = unescape_po_string($1); 437 $inmsgstr = 1; 438 $inmsgid = 0; 439 } 440 441 if (/^"((\\.|[^\\])*)"/) 442 { 443 $msgid .= unescape_po_string($1) if $inmsgid; 444 $msgstr .= unescape_po_string($1) if $inmsgstr; 445 } 446 } 447 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 448 } 449} 450 451sub finalize 452{ 453} 454 455sub unescape_one_sequence 456{ 457 my ($sequence) = @_; 458 459 return "\\" if $sequence eq "\\\\"; 460 return "\"" if $sequence eq "\\\""; 461 return "\n" if $sequence eq "\\n"; 462 return "\r" if $sequence eq "\\r"; 463 return "\t" if $sequence eq "\\t"; 464 return "\b" if $sequence eq "\\b"; 465 return "\f" if $sequence eq "\\f"; 466 return "\a" if $sequence eq "\\a"; 467 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) 468 469 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); 470 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); 471 472 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 473 474 return $sequence; 475} 476 477sub unescape_po_string 478{ 479 my ($string) = @_; 480 481 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; 482 483 return $string; 484} 485 486## NOTE: deal with < - < but not > - > because it seems its ok to have 487## > in the entity. For further info please look at #84738. 488sub entity_decode 489{ 490 local ($_) = @_; 491 492 s/'/'/g; # ' 493 s/"/"/g; # " 494 s/&/&/g; 495 s/</</g; 496 497 return $_; 498} 499 500# entity_encode: (string) 501# 502# Encode the given string to XML format (encode '<' etc). 503 504sub entity_encode 505{ 506 my ($pre_encoded) = @_; 507 508 my @list_of_chars = unpack ('C*', $pre_encoded); 509 510 # with UTF-8 we only encode minimalistic 511 return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); 512} 513 514sub entity_encode_int_minimalist 515{ 516 return """ if $_ == 34; 517 return "&" if $_ == 38; 518 return "'" if $_ == 39; 519 return "<" if $_ == 60; 520 return chr $_; 521} 522 523sub entity_encoded_translation 524{ 525 my ($lang, $string) = @_; 526 527 my $translation = $translations{$lang, $string}; 528 return $string if !$translation; 529 return entity_encode ($translation); 530} 531 532## XML (bonobo-activation specific) merge code 533 534sub ba_merge_translations 535{ 536 my $source; 537 538 { 539 local $/; # slurp mode 540 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 541 $source = <INPUT>; 542 close INPUT; 543 } 544 545 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; 546 # Binmode so that selftest works ok if using a native Win32 Perl... 547 binmode (OUTPUT) if $^O eq 'MSWin32'; 548 549 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 550 { 551 print OUTPUT $1; 552 553 my $node = $2 . "\n"; 554 555 my @strings = (); 556 $_ = $node; 557 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { 558 push @strings, entity_decode($3); 559 } 560 print OUTPUT; 561 562 my %langs; 563 for my $string (@strings) 564 { 565 for my $lang (keys %po_files_by_lang) 566 { 567 $langs{$lang} = 1 if $translations{$lang, $string}; 568 } 569 } 570 571 for my $lang (sort keys %langs) 572 { 573 $_ = $node; 574 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; 575 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; 576 print OUTPUT; 577 } 578 } 579 580 print OUTPUT $source; 581 582 close OUTPUT; 583} 584 585 586## XML (non-bonobo-activation) merge code 587 588 589# Process tag attributes 590# Only parameter is a HASH containing attributes -> values mapping 591sub getAttributeString 592{ 593 my $sub = shift; 594 my $do_translate = shift || 0; 595 my $language = shift || ""; 596 my $result = ""; 597 my $translate = shift; 598 foreach my $e (reverse(sort(keys %{ $sub }))) { 599 my $key = $e; 600 my $string = $sub->{$e}; 601 my $quote = '"'; 602 603 $string =~ s/^[\s]+//; 604 $string =~ s/[\s]+$//; 605 606 if ($string =~ /^'.*'$/) 607 { 608 $quote = "'"; 609 } 610 $string =~ s/^['"]//g; 611 $string =~ s/['"]$//g; 612 613 if ($do_translate && $key =~ /^_/) { 614 $key =~ s|^_||g; 615 if ($language) { 616 # Handle translation 617 my $decode_string = entity_decode($string); 618 my $translation = $translations{$language, $decode_string}; 619 if ($translation) { 620 $translation = entity_encode($translation); 621 $string = $translation; 622 } 623 $$translate = 2; 624 } else { 625 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate 626 } 627 } 628 629 $result .= " $key=$quote$string$quote"; 630 } 631 return $result; 632} 633 634# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree 635sub getXMLstring 636{ 637 my $ref = shift; 638 my $spacepreserve = shift || 0; 639 my @list = @{ $ref }; 640 my $result = ""; 641 642 my $count = scalar(@list); 643 my $attrs = $list[0]; 644 my $index = 1; 645 646 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 647 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 648 649 while ($index < $count) { 650 my $type = $list[$index]; 651 my $content = $list[$index+1]; 652 if (! $type ) { 653 # We've got CDATA 654 if ($content) { 655 # lets strip the whitespace here, and *ONLY* here 656 $content =~ s/\s+/ /gs if (!$spacepreserve); 657 $result .= $content; 658 } 659 } elsif ( "$type" ne "1" ) { 660 # We've got another element 661 $result .= "<$type"; 662 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements 663 if ($content) { 664 my $subresult = getXMLstring($content, $spacepreserve); 665 if ($subresult) { 666 $result .= ">".$subresult . "</$type>"; 667 } else { 668 $result .= "/>"; 669 } 670 } else { 671 $result .= "/>"; 672 } 673 } 674 $index += 2; 675 } 676 return $result; 677} 678 679# Translate list of nodes if necessary 680sub translate_subnodes 681{ 682 my $fh = shift; 683 my $content = shift; 684 my $language = shift || ""; 685 my $singlelang = shift || 0; 686 my $spacepreserve = shift || 0; 687 688 my @nodes = @{ $content }; 689 690 my $count = scalar(@nodes); 691 my $index = 0; 692 while ($index < $count) { 693 my $type = $nodes[$index]; 694 my $rest = $nodes[$index+1]; 695 if ($singlelang) { 696 my $oldMO = $MULTIPLE_OUTPUT; 697 $MULTIPLE_OUTPUT = 1; 698 traverse($fh, $type, $rest, $language, $spacepreserve); 699 $MULTIPLE_OUTPUT = $oldMO; 700 } else { 701 traverse($fh, $type, $rest, $language, $spacepreserve); 702 } 703 $index += 2; 704 } 705} 706 707sub isWellFormedXmlFragment 708{ 709 my $ret = eval 'require XML::Parser'; 710 if(!$ret) { 711 die "You must have XML::Parser installed to run $0\n\n"; 712 } 713 714 my $fragment = shift; 715 return 0 if (!$fragment); 716 717 $fragment = "<root>$fragment</root>"; 718 my $xp = new XML::Parser(Style => 'Tree'); 719 my $tree = 0; 720 eval { $tree = $xp->parse($fragment); }; 721 return $tree; 722} 723 724sub traverse 725{ 726 my $fh = shift; 727 my $nodename = shift; 728 my $content = shift; 729 my $language = shift || ""; 730 my $spacepreserve = shift || 0; 731 732 if (!$nodename) { 733 if ($content =~ /^[\s]*$/) { 734 $leading_space .= $content; 735 } 736 print $fh $content; 737 } else { 738 # element 739 my @all = @{ $content }; 740 my $attrs = shift @all; 741 my $translate = 0; 742 my $outattr = getAttributeString($attrs, 1, $language, \$translate); 743 744 if ($nodename =~ /^_/) { 745 $translate = 1; 746 $nodename =~ s/^_//; 747 } 748 my $lookup = ''; 749 750 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 751 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 752 753 print $fh "<$nodename", $outattr; 754 if ($translate) { 755 $lookup = getXMLstring($content, $spacepreserve); 756 if (!$spacepreserve) { 757 $lookup =~ s/^\s+//s; 758 $lookup =~ s/\s+$//s; 759 } 760 761 if ($lookup || $translate == 2) { 762 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup}); 763 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { 764 $translation = $lookup if (!$translation); 765 print $fh " xml:lang=\"", $language, "\"" if $language; 766 print $fh ">"; 767 if ($translate == 2) { 768 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 769 } else { 770 print $fh $translation; 771 } 772 print $fh "</$nodename>"; 773 774 return; # this means there will be no same translation with xml:lang="$language"... 775 # if we want them both, just remove this "return" 776 } else { 777 print $fh ">"; 778 if ($translate == 2) { 779 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 780 } else { 781 print $fh $lookup; 782 } 783 print $fh "</$nodename>"; 784 } 785 } else { 786 print $fh "/>"; 787 } 788 789 for my $lang (sort keys %po_files_by_lang) { 790 if ($MULTIPLE_OUTPUT && $lang ne "$language") { 791 next; 792 } 793 if ($lang) { 794 # Handle translation 795 # 796 my $translate = 0; 797 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); 798 my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup}); 799 if ($translate && !$translation) { 800 $translation = $lookup; 801 } 802 803 if ($translation || $translate) { 804 print $fh "\n"; 805 $leading_space =~ s/.*\n//g; 806 print $fh $leading_space; 807 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; 808 if ($translate == 2) { 809 translate_subnodes($fh, \@all, $lang, 1, $spacepreserve); 810 } else { 811 print $fh $translation; 812 } 813 print $fh "</$nodename>"; 814 } 815 } 816 } 817 818 } else { 819 my $count = scalar(@all); 820 if ($count > 0) { 821 print $fh ">"; 822 my $index = 0; 823 while ($index < $count) { 824 my $type = $all[$index]; 825 my $rest = $all[$index+1]; 826 traverse($fh, $type, $rest, $language, $spacepreserve); 827 $index += 2; 828 } 829 print $fh "</$nodename>"; 830 } else { 831 print $fh "/>"; 832 } 833 } 834 } 835} 836 837sub intltool_tree_comment 838{ 839 my $expat = shift; 840 my $data = shift; 841 my $clist = $expat->{Curlist}; 842 my $pos = $#$clist; 843 844 push @$clist, 1 => $data; 845} 846 847sub intltool_tree_cdatastart 848{ 849 my $expat = shift; 850 my $clist = $expat->{Curlist}; 851 my $pos = $#$clist; 852 853 push @$clist, 0 => $expat->original_string(); 854} 855 856sub intltool_tree_cdataend 857{ 858 my $expat = shift; 859 my $clist = $expat->{Curlist}; 860 my $pos = $#$clist; 861 862 $clist->[$pos] .= $expat->original_string(); 863} 864 865sub intltool_tree_char 866{ 867 my $expat = shift; 868 my $text = shift; 869 my $clist = $expat->{Curlist}; 870 my $pos = $#$clist; 871 872 # Use original_string so that we retain escaped entities 873 # in CDATA sections. 874 # 875 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 876 $clist->[$pos] .= $expat->original_string(); 877 } else { 878 push @$clist, 0 => $expat->original_string(); 879 } 880} 881 882sub intltool_tree_start 883{ 884 my $expat = shift; 885 my $tag = shift; 886 my @origlist = (); 887 888 # Use original_string so that we retain escaped entities 889 # in attribute values. We must convert the string to an 890 # @origlist array to conform to the structure of the Tree 891 # Style. 892 # 893 my @original_array = split /\x/, $expat->original_string(); 894 my $source = $expat->original_string(); 895 896 # Remove leading tag. 897 # 898 $source =~ s|^\s*<\s*(\S+)||s; 899 900 # Grab attribute key/value pairs and push onto @origlist array. 901 # 902 while ($source) 903 { 904 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) 905 { 906 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; 907 push @origlist, $1; 908 push @origlist, '"' . $2 . '"'; 909 } 910 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) 911 { 912 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; 913 push @origlist, $1; 914 push @origlist, "'" . $2 . "'"; 915 } 916 else 917 { 918 last; 919 } 920 } 921 922 my $ol = [ { @origlist } ]; 923 924 push @{ $expat->{Lists} }, $expat->{Curlist}; 925 push @{ $expat->{Curlist} }, $tag => $ol; 926 $expat->{Curlist} = $ol; 927} 928 929sub readXml 930{ 931 my $filename = shift || return; 932 if(!-f $filename) { 933 die "ERROR Cannot find filename: $filename\n"; 934 } 935 936 my $ret = eval 'require XML::Parser'; 937 if(!$ret) { 938 die "You must have XML::Parser installed to run $0\n\n"; 939 } 940 my $xp = new XML::Parser(Style => 'Tree'); 941 $xp->setHandlers(Char => \&intltool_tree_char); 942 $xp->setHandlers(Start => \&intltool_tree_start); 943 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); 944 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); 945 my $tree = $xp->parsefile($filename); 946 947# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 948# would be: 949# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, 950# 0, "Howdy", ref, [{}]], 0, "do" ] ] 951 952 return $tree; 953} 954 955sub print_header 956{ 957 my $infile = shift; 958 my $fh = shift; 959 my $source; 960 961 if(!-f $infile) { 962 die "ERROR Cannot find filename: $infile\n"; 963 } 964 965 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; 966 { 967 local $/; 968 open DOCINPUT, "<${FILE}" or die; 969 $source = <DOCINPUT>; 970 close DOCINPUT; 971 } 972 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) 973 { 974 print $fh "$1\n"; 975 } 976 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) 977 { 978 print $fh "$1\n"; 979 } 980} 981 982sub parseTree 983{ 984 my $fh = shift; 985 my $ref = shift; 986 my $language = shift || ""; 987 988 my $name = shift @{ $ref }; 989 my $cont = shift @{ $ref }; 990 991 while (!$name || "$name" eq "1") { 992 $name = shift @{ $ref }; 993 $cont = shift @{ $ref }; 994 } 995 996 my $spacepreserve = 0; 997 my $attrs = @{$cont}[0]; 998 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 999 1000 traverse($fh, $name, $cont, $language, $spacepreserve); 1001} 1002 1003sub xml_merge_output 1004{ 1005 my $source; 1006 1007 if ($MULTIPLE_OUTPUT) { 1008 for my $lang (sort keys %po_files_by_lang) { 1009 if ( ! -e $lang ) { 1010 mkdir $lang, 0777 or die "Cannot create subdirectory $lang: $!\n"; 1011 } 1012 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1013 binmode (OUTPUT) if $^O eq 'MSWin32'; 1014 my $tree = readXml($FILE); 1015 print_header($FILE, \*OUTPUT); 1016 parseTree(\*OUTPUT, $tree, $lang); 1017 close OUTPUT; 1018 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 1019 } 1020 } 1021 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; 1022 binmode (OUTPUT) if $^O eq 'MSWin32'; 1023 my $tree = readXml($FILE); 1024 print_header($FILE, \*OUTPUT); 1025 parseTree(\*OUTPUT, $tree); 1026 close OUTPUT; 1027 print "CREATED $OUTFILE\n" unless $QUIET_ARG; 1028} 1029 1030sub keys_merge_translations 1031{ 1032 open INPUT, "<${FILE}" or die; 1033 open OUTPUT, ">${OUTFILE}" or die; 1034 binmode (OUTPUT) if $^O eq 'MSWin32'; 1035 1036 while (<INPUT>) 1037 { 1038 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1039 { 1040 my $string = $3; 1041 1042 print OUTPUT; 1043 1044 my $non_translated_line = $_; 1045 1046 for my $lang (sort keys %po_files_by_lang) 1047 { 1048 my $translation = $translations{$lang, $string}; 1049 next if !$translation; 1050 1051 $_ = $non_translated_line; 1052 s/(\w+)=.*/[$lang]$1=$translation/; 1053 print OUTPUT; 1054 } 1055 } 1056 else 1057 { 1058 print OUTPUT; 1059 } 1060 } 1061 1062 close OUTPUT; 1063 close INPUT; 1064} 1065 1066sub desktop_merge_translations 1067{ 1068 open INPUT, "<${FILE}" or die; 1069 open OUTPUT, ">${OUTFILE}" or die; 1070 binmode (OUTPUT) if $^O eq 'MSWin32'; 1071 1072 while (<INPUT>) 1073 { 1074 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1075 { 1076 my $string = $3; 1077 1078 print OUTPUT; 1079 1080 my $non_translated_line = $_; 1081 1082 for my $lang (sort keys %po_files_by_lang) 1083 { 1084 my $translation = $translations{$lang, $string}; 1085 next if !$translation; 1086 1087 $_ = $non_translated_line; 1088 s/(\w+)=.*/${1}[$lang]=$translation/; 1089 print OUTPUT; 1090 } 1091 } 1092 else 1093 { 1094 print OUTPUT; 1095 } 1096 } 1097 1098 close OUTPUT; 1099 close INPUT; 1100} 1101 1102sub schemas_merge_translations 1103{ 1104 my $source; 1105 1106 { 1107 local $/; # slurp mode 1108 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1109 $source = <INPUT>; 1110 close INPUT; 1111 } 1112 1113 open OUTPUT, ">$OUTFILE" or die; 1114 binmode (OUTPUT) if $^O eq 'MSWin32'; 1115 1116 # FIXME: support attribute translations 1117 1118 # Empty nodes never need translation, so unmark all of them. 1119 # For example, <_foo/> is just replaced by <foo/>. 1120 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; 1121 1122 while ($source =~ s/ 1123 (.*?) 1124 (\s+)(<locale\ name="C">(\s*) 1125 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) 1126 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) 1127 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) 1128 <\/locale>) 1129 //sx) 1130 { 1131 print OUTPUT $1; 1132 1133 my $locale_start_spaces = $2 ? $2 : ''; 1134 my $default_spaces = $4 ? $4 : ''; 1135 my $short_spaces = $7 ? $7 : ''; 1136 my $long_spaces = $10 ? $10 : ''; 1137 my $locale_end_spaces = $13 ? $13 : ''; 1138 my $c_default_block = $3 ? $3 : ''; 1139 my $default_string = $6 ? $6 : ''; 1140 my $short_string = $9 ? $9 : ''; 1141 my $long_string = $12 ? $12 : ''; 1142 1143 print OUTPUT "$locale_start_spaces$c_default_block"; 1144 1145 $default_string =~ s/\s+/ /g; 1146 $default_string = entity_decode($default_string); 1147 $short_string =~ s/\s+/ /g; 1148 $short_string = entity_decode($short_string); 1149 $long_string =~ s/\s+/ /g; 1150 $long_string = entity_decode($long_string); 1151 1152 for my $lang (sort keys %po_files_by_lang) 1153 { 1154 my $default_translation = $translations{$lang, $default_string}; 1155 my $short_translation = $translations{$lang, $short_string}; 1156 my $long_translation = $translations{$lang, $long_string}; 1157 1158 next if (!$default_translation && !$short_translation && 1159 !$long_translation); 1160 1161 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; 1162 1163 print OUTPUT "$default_spaces"; 1164 1165 if ($default_translation) 1166 { 1167 $default_translation = entity_encode($default_translation); 1168 print OUTPUT "<default>$default_translation</default>"; 1169 } 1170 1171 print OUTPUT "$short_spaces"; 1172 1173 if ($short_translation) 1174 { 1175 $short_translation = entity_encode($short_translation); 1176 print OUTPUT "<short>$short_translation</short>"; 1177 } 1178 1179 print OUTPUT "$long_spaces"; 1180 1181 if ($long_translation) 1182 { 1183 $long_translation = entity_encode($long_translation); 1184 print OUTPUT "<long>$long_translation</long>"; 1185 } 1186 1187 print OUTPUT "$locale_end_spaces</locale>"; 1188 } 1189 } 1190 1191 print OUTPUT $source; 1192 1193 close OUTPUT; 1194} 1195 1196sub rfc822deb_merge_translations 1197{ 1198 my %encodings = (); 1199 for my $lang (keys %po_files_by_lang) { 1200 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); 1201 } 1202 1203 my $source; 1204 1205 $Text::Wrap::huge = 'overflow'; 1206 $Text::Wrap::break = qr/\n|\s(?=\S)/; 1207 1208 { 1209 local $/; # slurp mode 1210 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1211 $source = <INPUT>; 1212 close INPUT; 1213 } 1214 1215 open OUTPUT, ">${OUTFILE}" or die; 1216 binmode (OUTPUT) if $^O eq 'MSWin32'; 1217 1218 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) 1219 { 1220 my $sep = $1; 1221 my $non_translated_line = $3.$4; 1222 my $string = $5; 1223 my $underscore = length($2); 1224 next if $underscore eq 0 && $non_translated_line =~ /^#/; 1225 # Remove [] dummy strings 1226 my $stripped = $string; 1227 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; 1228 $stripped =~ s/\[\s[^\[\]]*\]$//; 1229 $non_translated_line .= $stripped; 1230 1231 print OUTPUT $sep.$non_translated_line; 1232 1233 if ($underscore) 1234 { 1235 my @str_list = rfc822deb_split($underscore, $string); 1236 1237 for my $lang (sort keys %po_files_by_lang) 1238 { 1239 my $is_translated = 1; 1240 my $str_translated = ''; 1241 my $first = 1; 1242 1243 for my $str (@str_list) 1244 { 1245 my $translation = $translations{$lang, $str}; 1246 1247 if (!$translation) 1248 { 1249 $is_translated = 0; 1250 last; 1251 } 1252 1253 # $translation may also contain [] dummy 1254 # strings, mostly to indicate an empty string 1255 $translation =~ s/\[\s[^\[\]]*\]$//; 1256 1257 if ($first) 1258 { 1259 if ($underscore eq 2) 1260 { 1261 $str_translated .= $translation; 1262 } 1263 else 1264 { 1265 $str_translated .= 1266 Text::Tabs::expand($translation) . 1267 "\n"; 1268 } 1269 } 1270 else 1271 { 1272 if ($underscore eq 2) 1273 { 1274 $str_translated .= ', ' . $translation; 1275 } 1276 else 1277 { 1278 $str_translated .= Text::Tabs::expand( 1279 Text::Wrap::wrap(' ', ' ', $translation)) . 1280 "\n .\n"; 1281 } 1282 } 1283 $first = 0; 1284 1285 # To fix some problems with Text::Wrap::wrap 1286 $str_translated =~ s/(\n )+\n/\n .\n/g; 1287 } 1288 next unless $is_translated; 1289 1290 $str_translated =~ s/\n \.\n$//; 1291 $str_translated =~ s/\s+$//; 1292 1293 $_ = $non_translated_line; 1294 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; 1295 print OUTPUT; 1296 } 1297 } 1298 } 1299 print OUTPUT "\n"; 1300 1301 close OUTPUT; 1302 close INPUT; 1303} 1304 1305sub rfc822deb_split 1306{ 1307 # Debian defines a special way to deal with rfc822-style files: 1308 # when a value contain newlines, it consists of 1309 # 1. a short form (first line) 1310 # 2. a long description, all lines begin with a space, 1311 # and paragraphs are separated by a single dot on a line 1312 # This routine returns an array of all paragraphs, and reformat 1313 # them. 1314 # When first argument is 2, the string is a comma separated list of 1315 # values. 1316 my $type = shift; 1317 my $text = shift; 1318 $text =~ s/^[ \t]//mg; 1319 return (split(/, */, $text, 0)) if $type ne 1; 1320 return ($text) if $text !~ /\n/; 1321 1322 $text =~ s/([^\n]*)\n//; 1323 my @list = ($1); 1324 my $str = ''; 1325 1326 for my $line (split (/\n/, $text)) 1327 { 1328 chomp $line; 1329 if ($line =~ /^\.\s*$/) 1330 { 1331 # New paragraph 1332 $str =~ s/\s*$//; 1333 push(@list, $str); 1334 $str = ''; 1335 } 1336 elsif ($line =~ /^\s/) 1337 { 1338 # Line which must not be reformatted 1339 $str .= "\n" if length ($str) && $str !~ /\n$/; 1340 $line =~ s/\s+$//; 1341 $str .= $line."\n"; 1342 } 1343 else 1344 { 1345 # Continuation line, remove newline 1346 $str .= " " if length ($str) && $str !~ /\n$/; 1347 $str .= $line; 1348 } 1349 } 1350 1351 $str =~ s/\s*$//; 1352 push(@list, $str) if length ($str); 1353 1354 return @list; 1355} 1356 1357