1#!@INTLTOOL_PERL@ -w 2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4# 5# The Intltool Message Merger 6# 7# Copyright (C) 2000, 2003 Free Software Foundation. 8# Copyright (C) 2000, 2001 Eazel, Inc 9# 10# Intltool is free software; you can redistribute it and/or 11# modify it under the terms of the GNU General Public License 12# version 2 published by the Free Software Foundation. 13# 14# Intltool is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17# General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23# As a special exception to the GNU General Public License, if you 24# distribute this file as part of a program that contains a 25# configuration script generated by Autoconf, you may include it under 26# the same distribution terms that you use for the rest of that program. 27# 28# Authors: Maciej Stachowiak <mjs@noisehavoc.org> 29# Kenneth Christiansen <kenneth@gnu.org> 30# Darin Adler <darin@bentspoon.com> 31# 32# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> 33# 34 35## Release information 36my $PROGRAM = "intltool-merge"; 37my $PACKAGE = "intltool"; 38my $VERSION = "0.36.2"; 39 40## Loaded modules 41use strict; 42use Getopt::Long; 43use Text::Wrap; 44use File::Basename; 45 46my $must_end_tag = -1; 47my $last_depth = -1; 48my $translation_depth = -1; 49my @tag_stack = (); 50my @entered_tag = (); 51my @translation_strings = (); 52my $leading_space = ""; 53 54## Scalars used by the option stuff 55my $HELP_ARG = 0; 56my $VERSION_ARG = 0; 57my $BA_STYLE_ARG = 0; 58my $XML_STYLE_ARG = 0; 59my $KEYS_STYLE_ARG = 0; 60my $DESKTOP_STYLE_ARG = 0; 61my $SCHEMAS_STYLE_ARG = 0; 62my $RFC822DEB_STYLE_ARG = 0; 63my $QUOTED_STYLE_ARG = 0; 64my $QUIET_ARG = 0; 65my $PASS_THROUGH_ARG = 0; 66my $UTF8_ARG = 0; 67my $MULTIPLE_OUTPUT = 0; 68my $cache_file; 69 70## Handle options 71GetOptions 72( 73 "help" => \$HELP_ARG, 74 "version" => \$VERSION_ARG, 75 "quiet|q" => \$QUIET_ARG, 76 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility 77 "ba-style|b" => \$BA_STYLE_ARG, 78 "xml-style|x" => \$XML_STYLE_ARG, 79 "keys-style|k" => \$KEYS_STYLE_ARG, 80 "desktop-style|d" => \$DESKTOP_STYLE_ARG, 81 "schemas-style|s" => \$SCHEMAS_STYLE_ARG, 82 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, 83 "quoted-style" => \$QUOTED_STYLE_ARG, 84 "pass-through|p" => \$PASS_THROUGH_ARG, 85 "utf8|u" => \$UTF8_ARG, 86 "multiple-output|m" => \$MULTIPLE_OUTPUT, 87 "cache|c=s" => \$cache_file 88 ) or &error; 89 90my $PO_DIR; 91my $FILE; 92my $OUTFILE; 93 94my %po_files_by_lang = (); 95my %translations = (); 96my $iconv = $ENV{"ICONV"} || "iconv"; 97my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null'); 98 99sub isProgramInPath 100{ 101 my ($file) = @_; 102 # If either a file exists, or when run it returns 0 exit status 103 return 1 if ((-x $file) or (system("$file -l >$devnull") == 0)); 104 return 0; 105} 106 107if (! isProgramInPath ("$iconv")) 108{ 109 print STDERR " *** iconv is not found on this system!\n". 110 " *** Without it, intltool-merge can not convert encodings.\n"; 111 exit; 112} 113 114# Use this instead of \w for XML files to handle more possible characters. 115my $w = "[-A-Za-z0-9._:]"; 116 117# XML quoted string contents 118my $q = "[^\\\"]*"; 119 120## Check for options. 121 122if ($VERSION_ARG) 123{ 124 &print_version; 125} 126elsif ($HELP_ARG) 127{ 128 &print_help; 129} 130elsif ($BA_STYLE_ARG && @ARGV > 2) 131{ 132 &utf8_sanity_check; 133 &preparation; 134 &print_message; 135 &ba_merge_translations; 136 &finalize; 137} 138elsif ($XML_STYLE_ARG && @ARGV > 2) 139{ 140 &utf8_sanity_check; 141 &preparation; 142 &print_message; 143 &xml_merge_output; 144 &finalize; 145} 146elsif ($KEYS_STYLE_ARG && @ARGV > 2) 147{ 148 &utf8_sanity_check; 149 &preparation; 150 &print_message; 151 &keys_merge_translations; 152 &finalize; 153} 154elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 155{ 156 &utf8_sanity_check; 157 &preparation; 158 &print_message; 159 &desktop_merge_translations; 160 &finalize; 161} 162elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 163{ 164 &utf8_sanity_check; 165 &preparation; 166 &print_message; 167 &schemas_merge_translations; 168 &finalize; 169} 170elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 171{ 172 &preparation; 173 &print_message; 174 &rfc822deb_merge_translations; 175 &finalize; 176} 177elsif ($QUOTED_STYLE_ARG && @ARGV > 2) 178{ 179 &utf8_sanity_check; 180 &preparation; 181 &print_message; 182 "ed_merge_translations; 183 &finalize; 184} 185else 186{ 187 &print_help; 188} 189 190exit; 191 192## Sub for printing release information 193sub print_version 194{ 195 print <<_EOF_; 196${PROGRAM} (${PACKAGE}) ${VERSION} 197Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. 198 199Copyright (C) 2000-2003 Free Software Foundation, Inc. 200Copyright (C) 2000-2001 Eazel, Inc. 201This is free software; see the source for copying conditions. There is NO 202warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 203_EOF_ 204 exit; 205} 206 207## Sub for printing usage information 208sub print_help 209{ 210 print <<_EOF_; 211Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE 212Generates an output file that includes some localized attributes from an 213untranslated source file. 214 215Mandatory options: (exactly one must be specified) 216 -b, --ba-style includes translations in the bonobo-activation style 217 -d, --desktop-style includes translations in the desktop style 218 -k, --keys-style includes translations in the keys style 219 -s, --schemas-style includes translations in the schemas style 220 -r, --rfc822deb-style includes translations in the RFC822 style 221 --quoted-style includes translations in the quoted string style 222 -x, --xml-style includes translations in the standard xml style 223 224Other options: 225 -u, --utf8 convert all strings to UTF-8 before merging 226 (default for everything except RFC822 style) 227 -p, --pass-through deprecated, does nothing and issues a warning 228 -m, --multiple-output output one localized file per locale, instead of 229 a single file containing all localized elements 230 -c, --cache=FILE specify cache file name 231 (usually \$top_builddir/po/.intltool-merge-cache) 232 -q, --quiet suppress most messages 233 --help display this help and exit 234 --version output version information and exit 235 236Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 237or send email to <xml-i18n-tools\@gnome.org>. 238_EOF_ 239 exit; 240} 241 242 243## Sub for printing error messages 244sub print_error 245{ 246 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 247 exit; 248} 249 250 251sub print_message 252{ 253 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; 254} 255 256 257sub preparation 258{ 259 $PO_DIR = $ARGV[0]; 260 $FILE = $ARGV[1]; 261 $OUTFILE = $ARGV[2]; 262 263 &gather_po_files; 264 &get_translation_database; 265} 266 267# General-purpose code for looking up translations in .po files 268 269sub po_file2lang 270{ 271 my ($tmp) = @_; 272 $tmp =~ s/^.*\/(.*)\.po$/$1/; 273 return $tmp; 274} 275 276sub gather_po_files 277{ 278 if (my $linguas = $ENV{"LINGUAS"}) 279 { 280 for my $lang (split / /, $linguas) { 281 my $po_file = $PO_DIR . "/" . $lang . ".po"; 282 if (-e $po_file) { 283 $po_files_by_lang{$lang} = $po_file; 284 } 285 } 286 } 287 else 288 { 289 if (open LINGUAS_FILE, "$PO_DIR/LINGUAS") 290 { 291 while (<LINGUAS_FILE>) 292 { 293 next if /^#/; 294 295 if (/([-a-zA-Z_@.]+)\n/) 296 { 297 my $lang = $1; 298 299 my $po_file = $PO_DIR . "/" . $lang . ".po"; 300 if (-e $po_file) { 301 $po_files_by_lang{$lang} = $po_file; 302 } 303 } 304 } 305 306 close LINGUAS_FILE; 307 } 308 else 309 { 310 for my $po_file (glob "$PO_DIR/*.po") { 311 $po_files_by_lang{po_file2lang($po_file)} = $po_file; 312 } 313 } 314 } 315} 316 317sub get_local_charset 318{ 319 my ($encoding) = @_; 320 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias"; 321 322 # seek character encoding aliases in charset.alias (glib) 323 324 if (open CHARSET_ALIAS, $alias_file) 325 { 326 while (<CHARSET_ALIAS>) 327 { 328 next if /^\#/; 329 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) 330 } 331 332 close CHARSET_ALIAS; 333 } 334 335 # if not found, return input string 336 337 return $encoding; 338} 339 340sub get_po_encoding 341{ 342 my ($in_po_file) = @_; 343 my $encoding = ""; 344 345 open IN_PO_FILE, $in_po_file or die; 346 while (<IN_PO_FILE>) 347 { 348 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" 349 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 350 { 351 $encoding = $1; 352 last; 353 } 354 } 355 close IN_PO_FILE; 356 357 if (!$encoding) 358 { 359 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; 360 $encoding = "ISO-8859-1"; 361 } 362 363 system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull"); 364 if ($?) { 365 $encoding = get_local_charset($encoding); 366 } 367 368 return $encoding 369} 370 371sub utf8_sanity_check 372{ 373 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; 374 $UTF8_ARG = 1; 375} 376 377sub get_translation_database 378{ 379 if ($cache_file) { 380 &get_cached_translation_database; 381 } else { 382 &create_translation_database; 383 } 384} 385 386sub get_newest_po_age 387{ 388 my $newest_age; 389 390 foreach my $file (values %po_files_by_lang) 391 { 392 my $file_age = -M $file; 393 $newest_age = $file_age if !$newest_age || $file_age < $newest_age; 394 } 395 396 $newest_age = 0 if !$newest_age; 397 398 return $newest_age; 399} 400 401sub create_cache 402{ 403 print "Generating and caching the translation database\n" unless $QUIET_ARG; 404 405 &create_translation_database; 406 407 open CACHE, ">$cache_file" || die; 408 print CACHE join "\x01", %translations; 409 close CACHE; 410} 411 412sub load_cache 413{ 414 print "Found cached translation database\n" unless $QUIET_ARG; 415 416 my $contents; 417 open CACHE, "<$cache_file" || die; 418 { 419 local $/; 420 $contents = <CACHE>; 421 } 422 close CACHE; 423 %translations = split "\x01", $contents; 424} 425 426sub get_cached_translation_database 427{ 428 my $cache_file_age = -M $cache_file; 429 if (defined $cache_file_age) 430 { 431 if ($cache_file_age <= &get_newest_po_age) 432 { 433 &load_cache; 434 return; 435 } 436 print "Found too-old cached translation database\n" unless $QUIET_ARG; 437 } 438 439 &create_cache; 440} 441 442sub create_translation_database 443{ 444 for my $lang (keys %po_files_by_lang) 445 { 446 my $po_file = $po_files_by_lang{$lang}; 447 448 if ($UTF8_ARG) 449 { 450 my $encoding = get_po_encoding ($po_file); 451 452 if (lc $encoding eq "utf-8") 453 { 454 open PO_FILE, "<$po_file"; 455 } 456 else 457 { 458 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; 459 460 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 461 } 462 } 463 else 464 { 465 open PO_FILE, "<$po_file"; 466 } 467 468 my $nextfuzzy = 0; 469 my $inmsgid = 0; 470 my $inmsgstr = 0; 471 my $msgid = ""; 472 my $msgstr = ""; 473 474 while (<PO_FILE>) 475 { 476 $nextfuzzy = 1 if /^#, fuzzy/; 477 478 if (/^msgid "((\\.|[^\\]+)*)"/ ) 479 { 480 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 481 $msgid = ""; 482 $msgstr = ""; 483 484 if ($nextfuzzy) { 485 $inmsgid = 0; 486 } else { 487 $msgid = unescape_po_string($1); 488 $inmsgid = 1; 489 } 490 $inmsgstr = 0; 491 $nextfuzzy = 0; 492 } 493 494 if (/^msgstr "((\\.|[^\\]+)*)"/) 495 { 496 $msgstr = unescape_po_string($1); 497 $inmsgstr = 1; 498 $inmsgid = 0; 499 } 500 501 if (/^"((\\.|[^\\]+)*)"/) 502 { 503 $msgid .= unescape_po_string($1) if $inmsgid; 504 $msgstr .= unescape_po_string($1) if $inmsgstr; 505 } 506 } 507 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 508 } 509} 510 511sub finalize 512{ 513} 514 515sub unescape_one_sequence 516{ 517 my ($sequence) = @_; 518 519 return "\\" if $sequence eq "\\\\"; 520 return "\"" if $sequence eq "\\\""; 521 return "\n" if $sequence eq "\\n"; 522 return "\r" if $sequence eq "\\r"; 523 return "\t" if $sequence eq "\\t"; 524 return "\b" if $sequence eq "\\b"; 525 return "\f" if $sequence eq "\\f"; 526 return "\a" if $sequence eq "\\a"; 527 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) 528 529 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); 530 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); 531 532 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 533 534 return $sequence; 535} 536 537sub unescape_po_string 538{ 539 my ($string) = @_; 540 541 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; 542 543 return $string; 544} 545 546sub entity_decode 547{ 548 local ($_) = @_; 549 550 s/'/'/g; # ' 551 s/"/"/g; # " 552 s/</</g; 553 s/>/>/g; 554 s/&/&/g; 555 556 return $_; 557} 558 559# entity_encode: (string) 560# 561# Encode the given string to XML format (encode '<' etc). 562 563sub entity_encode 564{ 565 my ($pre_encoded) = @_; 566 567 my @list_of_chars = unpack ('C*', $pre_encoded); 568 569 # with UTF-8 we only encode minimalistic 570 return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); 571} 572 573sub entity_encode_int_minimalist 574{ 575 return """ if $_ == 34; 576 return "&" if $_ == 38; 577 return "'" if $_ == 39; 578 return "<" if $_ == 60; 579 return chr $_; 580} 581 582sub entity_encoded_translation 583{ 584 my ($lang, $string) = @_; 585 586 my $translation = $translations{$lang, $string}; 587 return $string if !$translation; 588 return entity_encode ($translation); 589} 590 591## XML (bonobo-activation specific) merge code 592 593sub ba_merge_translations 594{ 595 my $source; 596 597 { 598 local $/; # slurp mode 599 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 600 $source = <INPUT>; 601 close INPUT; 602 } 603 604 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; 605 # Binmode so that selftest works ok if using a native Win32 Perl... 606 binmode (OUTPUT) if $^O eq 'MSWin32'; 607 608 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 609 { 610 print OUTPUT $1; 611 612 my $node = $2 . "\n"; 613 614 my @strings = (); 615 $_ = $node; 616 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { 617 push @strings, entity_decode($3); 618 } 619 print OUTPUT; 620 621 my %langs; 622 for my $string (@strings) 623 { 624 for my $lang (keys %po_files_by_lang) 625 { 626 $langs{$lang} = 1 if $translations{$lang, $string}; 627 } 628 } 629 630 for my $lang (sort keys %langs) 631 { 632 $_ = $node; 633 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; 634 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; 635 print OUTPUT; 636 } 637 } 638 639 print OUTPUT $source; 640 641 close OUTPUT; 642} 643 644 645## XML (non-bonobo-activation) merge code 646 647 648# Process tag attributes 649# Only parameter is a HASH containing attributes -> values mapping 650sub getAttributeString 651{ 652 my $sub = shift; 653 my $do_translate = shift || 0; 654 my $language = shift || ""; 655 my $result = ""; 656 my $translate = shift; 657 foreach my $e (reverse(sort(keys %{ $sub }))) { 658 my $key = $e; 659 my $string = $sub->{$e}; 660 my $quote = '"'; 661 662 $string =~ s/^[\s]+//; 663 $string =~ s/[\s]+$//; 664 665 if ($string =~ /^'.*'$/) 666 { 667 $quote = "'"; 668 } 669 $string =~ s/^['"]//g; 670 $string =~ s/['"]$//g; 671 672 if ($do_translate && $key =~ /^_/) { 673 $key =~ s|^_||g; 674 if ($language) { 675 # Handle translation 676 my $decode_string = entity_decode($string); 677 my $translation = $translations{$language, $decode_string}; 678 if ($translation) { 679 $translation = entity_encode($translation); 680 $string = $translation; 681 } 682 $$translate = 2; 683 } else { 684 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate 685 } 686 } 687 688 $result .= " $key=$quote$string$quote"; 689 } 690 return $result; 691} 692 693# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree 694sub getXMLstring 695{ 696 my $ref = shift; 697 my $spacepreserve = shift || 0; 698 my @list = @{ $ref }; 699 my $result = ""; 700 701 my $count = scalar(@list); 702 my $attrs = $list[0]; 703 my $index = 1; 704 705 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 706 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 707 708 while ($index < $count) { 709 my $type = $list[$index]; 710 my $content = $list[$index+1]; 711 if (! $type ) { 712 # We've got CDATA 713 if ($content) { 714 # lets strip the whitespace here, and *ONLY* here 715 $content =~ s/\s+/ /gs if (!$spacepreserve); 716 $result .= $content; 717 } 718 } elsif ( "$type" ne "1" ) { 719 # We've got another element 720 $result .= "<$type"; 721 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements 722 if ($content) { 723 my $subresult = getXMLstring($content, $spacepreserve); 724 if ($subresult) { 725 $result .= ">".$subresult . "</$type>"; 726 } else { 727 $result .= "/>"; 728 } 729 } else { 730 $result .= "/>"; 731 } 732 } 733 $index += 2; 734 } 735 return $result; 736} 737 738# Translate list of nodes if necessary 739sub translate_subnodes 740{ 741 my $fh = shift; 742 my $content = shift; 743 my $language = shift || ""; 744 my $singlelang = shift || 0; 745 my $spacepreserve = shift || 0; 746 747 my @nodes = @{ $content }; 748 749 my $count = scalar(@nodes); 750 my $index = 0; 751 while ($index < $count) { 752 my $type = $nodes[$index]; 753 my $rest = $nodes[$index+1]; 754 if ($singlelang) { 755 my $oldMO = $MULTIPLE_OUTPUT; 756 $MULTIPLE_OUTPUT = 1; 757 traverse($fh, $type, $rest, $language, $spacepreserve); 758 $MULTIPLE_OUTPUT = $oldMO; 759 } else { 760 traverse($fh, $type, $rest, $language, $spacepreserve); 761 } 762 $index += 2; 763 } 764} 765 766sub isWellFormedXmlFragment 767{ 768 my $ret = eval 'require XML::Parser'; 769 if(!$ret) { 770 die "You must have XML::Parser installed to run $0\n\n"; 771 } 772 773 my $fragment = shift; 774 return 0 if (!$fragment); 775 776 $fragment = "<root>$fragment</root>"; 777 my $xp = new XML::Parser(Style => 'Tree'); 778 my $tree = 0; 779 eval { $tree = $xp->parse($fragment); }; 780 return $tree; 781} 782 783sub traverse 784{ 785 my $fh = shift; 786 my $nodename = shift; 787 my $content = shift; 788 my $language = shift || ""; 789 my $spacepreserve = shift || 0; 790 791 if (!$nodename) { 792 if ($content =~ /^[\s]*$/) { 793 $leading_space .= $content; 794 } 795 print $fh $content; 796 } else { 797 # element 798 my @all = @{ $content }; 799 my $attrs = shift @all; 800 my $translate = 0; 801 my $outattr = getAttributeString($attrs, 1, $language, \$translate); 802 803 if ($nodename =~ /^_/) { 804 $translate = 1; 805 $nodename =~ s/^_//; 806 } 807 my $lookup = ''; 808 809 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 810 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 811 812 print $fh "<$nodename", $outattr; 813 if ($translate) { 814 $lookup = getXMLstring($content, $spacepreserve); 815 if (!$spacepreserve) { 816 $lookup =~ s/^\s+//s; 817 $lookup =~ s/\s+$//s; 818 } 819 820 if ($lookup || $translate == 2) { 821 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup}); 822 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { 823 $translation = $lookup if (!$translation); 824 print $fh " xml:lang=\"", $language, "\"" if $language; 825 print $fh ">"; 826 if ($translate == 2) { 827 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 828 } else { 829 print $fh $translation; 830 } 831 print $fh "</$nodename>"; 832 833 return; # this means there will be no same translation with xml:lang="$language"... 834 # if we want them both, just remove this "return" 835 } else { 836 print $fh ">"; 837 if ($translate == 2) { 838 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 839 } else { 840 print $fh $lookup; 841 } 842 print $fh "</$nodename>"; 843 } 844 } else { 845 print $fh "/>"; 846 } 847 848 for my $lang (sort keys %po_files_by_lang) { 849 if ($MULTIPLE_OUTPUT && $lang ne "$language") { 850 next; 851 } 852 if ($lang) { 853 # Handle translation 854 # 855 my $translate = 0; 856 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); 857 my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup}); 858 if ($translate && !$translation) { 859 $translation = $lookup; 860 } 861 862 if ($translation || $translate) { 863 print $fh "\n"; 864 $leading_space =~ s/.*\n//g; 865 print $fh $leading_space; 866 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; 867 if ($translate == 2) { 868 translate_subnodes($fh, \@all, $lang, 1, $spacepreserve); 869 } else { 870 print $fh $translation; 871 } 872 print $fh "</$nodename>"; 873 } 874 } 875 } 876 877 } else { 878 my $count = scalar(@all); 879 if ($count > 0) { 880 print $fh ">"; 881 my $index = 0; 882 while ($index < $count) { 883 my $type = $all[$index]; 884 my $rest = $all[$index+1]; 885 traverse($fh, $type, $rest, $language, $spacepreserve); 886 $index += 2; 887 } 888 print $fh "</$nodename>"; 889 } else { 890 print $fh "/>"; 891 } 892 } 893 } 894} 895 896sub intltool_tree_comment 897{ 898 my $expat = shift; 899 my $data = shift; 900 my $clist = $expat->{Curlist}; 901 my $pos = $#$clist; 902 903 push @$clist, 1 => $data; 904} 905 906sub intltool_tree_cdatastart 907{ 908 my $expat = shift; 909 my $clist = $expat->{Curlist}; 910 my $pos = $#$clist; 911 912 push @$clist, 0 => $expat->original_string(); 913} 914 915sub intltool_tree_cdataend 916{ 917 my $expat = shift; 918 my $clist = $expat->{Curlist}; 919 my $pos = $#$clist; 920 921 $clist->[$pos] .= $expat->original_string(); 922} 923 924sub intltool_tree_char 925{ 926 my $expat = shift; 927 my $text = shift; 928 my $clist = $expat->{Curlist}; 929 my $pos = $#$clist; 930 931 # Use original_string so that we retain escaped entities 932 # in CDATA sections. 933 # 934 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 935 $clist->[$pos] .= $expat->original_string(); 936 } else { 937 push @$clist, 0 => $expat->original_string(); 938 } 939} 940 941sub intltool_tree_start 942{ 943 my $expat = shift; 944 my $tag = shift; 945 my @origlist = (); 946 947 # Use original_string so that we retain escaped entities 948 # in attribute values. We must convert the string to an 949 # @origlist array to conform to the structure of the Tree 950 # Style. 951 # 952 my @original_array = split /\x/, $expat->original_string(); 953 my $source = $expat->original_string(); 954 955 # Remove leading tag. 956 # 957 $source =~ s|^\s*<\s*(\S+)||s; 958 959 # Grab attribute key/value pairs and push onto @origlist array. 960 # 961 while ($source) 962 { 963 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) 964 { 965 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; 966 push @origlist, $1; 967 push @origlist, '"' . $2 . '"'; 968 } 969 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) 970 { 971 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; 972 push @origlist, $1; 973 push @origlist, "'" . $2 . "'"; 974 } 975 else 976 { 977 last; 978 } 979 } 980 981 my $ol = [ { @origlist } ]; 982 983 push @{ $expat->{Lists} }, $expat->{Curlist}; 984 push @{ $expat->{Curlist} }, $tag => $ol; 985 $expat->{Curlist} = $ol; 986} 987 988sub readXml 989{ 990 my $filename = shift || return; 991 if(!-f $filename) { 992 die "ERROR Cannot find filename: $filename\n"; 993 } 994 995 my $ret = eval 'require XML::Parser'; 996 if(!$ret) { 997 die "You must have XML::Parser installed to run $0\n\n"; 998 } 999 my $xp = new XML::Parser(Style => 'Tree'); 1000 $xp->setHandlers(Char => \&intltool_tree_char); 1001 $xp->setHandlers(Start => \&intltool_tree_start); 1002 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); 1003 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); 1004 my $tree = $xp->parsefile($filename); 1005 1006# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 1007# would be: 1008# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, 1009# 0, "Howdy", ref, [{}]], 0, "do" ] ] 1010 1011 return $tree; 1012} 1013 1014sub print_header 1015{ 1016 my $infile = shift; 1017 my $fh = shift; 1018 my $source; 1019 1020 if(!-f $infile) { 1021 die "ERROR Cannot find filename: $infile\n"; 1022 } 1023 1024 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; 1025 { 1026 local $/; 1027 open DOCINPUT, "<${FILE}" or die; 1028 $source = <DOCINPUT>; 1029 close DOCINPUT; 1030 } 1031 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) 1032 { 1033 print $fh "$1\n"; 1034 } 1035 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) 1036 { 1037 print $fh "$1\n"; 1038 } 1039} 1040 1041sub parseTree 1042{ 1043 my $fh = shift; 1044 my $ref = shift; 1045 my $language = shift || ""; 1046 1047 my $name = shift @{ $ref }; 1048 my $cont = shift @{ $ref }; 1049 1050 while (!$name || "$name" eq "1") { 1051 $name = shift @{ $ref }; 1052 $cont = shift @{ $ref }; 1053 } 1054 1055 my $spacepreserve = 0; 1056 my $attrs = @{$cont}[0]; 1057 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 1058 1059 traverse($fh, $name, $cont, $language, $spacepreserve); 1060} 1061 1062sub xml_merge_output 1063{ 1064 my $source; 1065 1066 if ($MULTIPLE_OUTPUT) { 1067 for my $lang (sort keys %po_files_by_lang) { 1068 if ( ! -d $lang ) { 1069 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; 1070 } 1071 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1072 binmode (OUTPUT) if $^O eq 'MSWin32'; 1073 my $tree = readXml($FILE); 1074 print_header($FILE, \*OUTPUT); 1075 parseTree(\*OUTPUT, $tree, $lang); 1076 close OUTPUT; 1077 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 1078 } 1079 } 1080 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; 1081 binmode (OUTPUT) if $^O eq 'MSWin32'; 1082 my $tree = readXml($FILE); 1083 print_header($FILE, \*OUTPUT); 1084 parseTree(\*OUTPUT, $tree); 1085 close OUTPUT; 1086 print "CREATED $OUTFILE\n" unless $QUIET_ARG; 1087} 1088 1089sub keys_merge_translations 1090{ 1091 open INPUT, "<${FILE}" or die; 1092 open OUTPUT, ">${OUTFILE}" or die; 1093 binmode (OUTPUT) if $^O eq 'MSWin32'; 1094 1095 while (<INPUT>) 1096 { 1097 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1098 { 1099 my $string = $3; 1100 1101 print OUTPUT; 1102 1103 my $non_translated_line = $_; 1104 1105 for my $lang (sort keys %po_files_by_lang) 1106 { 1107 my $translation = $translations{$lang, $string}; 1108 next if !$translation; 1109 1110 $_ = $non_translated_line; 1111 s/(\w+)=.*/[$lang]$1=$translation/; 1112 print OUTPUT; 1113 } 1114 } 1115 else 1116 { 1117 print OUTPUT; 1118 } 1119 } 1120 1121 close OUTPUT; 1122 close INPUT; 1123} 1124 1125sub desktop_merge_translations 1126{ 1127 open INPUT, "<${FILE}" or die; 1128 open OUTPUT, ">${OUTFILE}" or die; 1129 binmode (OUTPUT) if $^O eq 'MSWin32'; 1130 1131 while (<INPUT>) 1132 { 1133 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1134 { 1135 my $string = $3; 1136 1137 print OUTPUT; 1138 1139 my $non_translated_line = $_; 1140 1141 for my $lang (sort keys %po_files_by_lang) 1142 { 1143 my $translation = $translations{$lang, $string}; 1144 next if !$translation; 1145 1146 $_ = $non_translated_line; 1147 s/(\w+)=.*/${1}[$lang]=$translation/; 1148 print OUTPUT; 1149 } 1150 } 1151 else 1152 { 1153 print OUTPUT; 1154 } 1155 } 1156 1157 close OUTPUT; 1158 close INPUT; 1159} 1160 1161sub schemas_merge_translations 1162{ 1163 my $source; 1164 1165 { 1166 local $/; # slurp mode 1167 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1168 $source = <INPUT>; 1169 close INPUT; 1170 } 1171 1172 open OUTPUT, ">$OUTFILE" or die; 1173 binmode (OUTPUT) if $^O eq 'MSWin32'; 1174 1175 # FIXME: support attribute translations 1176 1177 # Empty nodes never need translation, so unmark all of them. 1178 # For example, <_foo/> is just replaced by <foo/>. 1179 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; 1180 1181 while ($source =~ s/ 1182 (.*?) 1183 (\s+)(<locale\ name="C">(\s*) 1184 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) 1185 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) 1186 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) 1187 <\/locale>) 1188 //sx) 1189 { 1190 print OUTPUT $1; 1191 1192 my $locale_start_spaces = $2 ? $2 : ''; 1193 my $default_spaces = $4 ? $4 : ''; 1194 my $short_spaces = $7 ? $7 : ''; 1195 my $long_spaces = $10 ? $10 : ''; 1196 my $locale_end_spaces = $13 ? $13 : ''; 1197 my $c_default_block = $3 ? $3 : ''; 1198 my $default_string = $6 ? $6 : ''; 1199 my $short_string = $9 ? $9 : ''; 1200 my $long_string = $12 ? $12 : ''; 1201 1202 print OUTPUT "$locale_start_spaces$c_default_block"; 1203 1204 $default_string =~ s/\s+/ /g; 1205 $default_string = entity_decode($default_string); 1206 $short_string =~ s/\s+/ /g; 1207 $short_string = entity_decode($short_string); 1208 $long_string =~ s/\s+/ /g; 1209 $long_string = entity_decode($long_string); 1210 1211 for my $lang (sort keys %po_files_by_lang) 1212 { 1213 my $default_translation = $translations{$lang, $default_string}; 1214 my $short_translation = $translations{$lang, $short_string}; 1215 my $long_translation = $translations{$lang, $long_string}; 1216 1217 next if (!$default_translation && !$short_translation && 1218 !$long_translation); 1219 1220 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; 1221 1222 print OUTPUT "$default_spaces"; 1223 1224 if ($default_translation) 1225 { 1226 $default_translation = entity_encode($default_translation); 1227 print OUTPUT "<default>$default_translation</default>"; 1228 } 1229 1230 print OUTPUT "$short_spaces"; 1231 1232 if ($short_translation) 1233 { 1234 $short_translation = entity_encode($short_translation); 1235 print OUTPUT "<short>$short_translation</short>"; 1236 } 1237 1238 print OUTPUT "$long_spaces"; 1239 1240 if ($long_translation) 1241 { 1242 $long_translation = entity_encode($long_translation); 1243 print OUTPUT "<long>$long_translation</long>"; 1244 } 1245 1246 print OUTPUT "$locale_end_spaces</locale>"; 1247 } 1248 } 1249 1250 print OUTPUT $source; 1251 1252 close OUTPUT; 1253} 1254 1255sub rfc822deb_merge_translations 1256{ 1257 my %encodings = (); 1258 for my $lang (keys %po_files_by_lang) { 1259 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); 1260 } 1261 1262 my $source; 1263 1264 $Text::Wrap::huge = 'overflow'; 1265 $Text::Wrap::break = qr/\n|\s(?=\S)/; 1266 1267 { 1268 local $/; # slurp mode 1269 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1270 $source = <INPUT>; 1271 close INPUT; 1272 } 1273 1274 open OUTPUT, ">${OUTFILE}" or die; 1275 binmode (OUTPUT) if $^O eq 'MSWin32'; 1276 1277 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) 1278 { 1279 my $sep = $1; 1280 my $non_translated_line = $3.$4; 1281 my $string = $5; 1282 my $underscore = length($2); 1283 next if $underscore eq 0 && $non_translated_line =~ /^#/; 1284 # Remove [] dummy strings 1285 my $stripped = $string; 1286 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; 1287 $stripped =~ s/\[\s[^\[\]]*\]$//; 1288 $non_translated_line .= $stripped; 1289 1290 print OUTPUT $sep.$non_translated_line; 1291 1292 if ($underscore) 1293 { 1294 my @str_list = rfc822deb_split($underscore, $string); 1295 1296 for my $lang (sort keys %po_files_by_lang) 1297 { 1298 my $is_translated = 1; 1299 my $str_translated = ''; 1300 my $first = 1; 1301 1302 for my $str (@str_list) 1303 { 1304 my $translation = $translations{$lang, $str}; 1305 1306 if (!$translation) 1307 { 1308 $is_translated = 0; 1309 last; 1310 } 1311 1312 # $translation may also contain [] dummy 1313 # strings, mostly to indicate an empty string 1314 $translation =~ s/\[\s[^\[\]]*\]$//; 1315 1316 if ($first) 1317 { 1318 if ($underscore eq 2) 1319 { 1320 $str_translated .= $translation; 1321 } 1322 else 1323 { 1324 $str_translated .= 1325 Text::Tabs::expand($translation) . 1326 "\n"; 1327 } 1328 } 1329 else 1330 { 1331 if ($underscore eq 2) 1332 { 1333 $str_translated .= ', ' . $translation; 1334 } 1335 else 1336 { 1337 $str_translated .= Text::Tabs::expand( 1338 Text::Wrap::wrap(' ', ' ', $translation)) . 1339 "\n .\n"; 1340 } 1341 } 1342 $first = 0; 1343 1344 # To fix some problems with Text::Wrap::wrap 1345 $str_translated =~ s/(\n )+\n/\n .\n/g; 1346 } 1347 next unless $is_translated; 1348 1349 $str_translated =~ s/\n \.\n$//; 1350 $str_translated =~ s/\s+$//; 1351 1352 $_ = $non_translated_line; 1353 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; 1354 print OUTPUT; 1355 } 1356 } 1357 } 1358 print OUTPUT "\n"; 1359 1360 close OUTPUT; 1361 close INPUT; 1362} 1363 1364sub rfc822deb_split 1365{ 1366 # Debian defines a special way to deal with rfc822-style files: 1367 # when a value contain newlines, it consists of 1368 # 1. a short form (first line) 1369 # 2. a long description, all lines begin with a space, 1370 # and paragraphs are separated by a single dot on a line 1371 # This routine returns an array of all paragraphs, and reformat 1372 # them. 1373 # When first argument is 2, the string is a comma separated list of 1374 # values. 1375 my $type = shift; 1376 my $text = shift; 1377 $text =~ s/^[ \t]//mg; 1378 return (split(/, */, $text, 0)) if $type ne 1; 1379 return ($text) if $text !~ /\n/; 1380 1381 $text =~ s/([^\n]*)\n//; 1382 my @list = ($1); 1383 my $str = ''; 1384 1385 for my $line (split (/\n/, $text)) 1386 { 1387 chomp $line; 1388 if ($line =~ /^\.\s*$/) 1389 { 1390 # New paragraph 1391 $str =~ s/\s*$//; 1392 push(@list, $str); 1393 $str = ''; 1394 } 1395 elsif ($line =~ /^\s/) 1396 { 1397 # Line which must not be reformatted 1398 $str .= "\n" if length ($str) && $str !~ /\n$/; 1399 $line =~ s/\s+$//; 1400 $str .= $line."\n"; 1401 } 1402 else 1403 { 1404 # Continuation line, remove newline 1405 $str .= " " if length ($str) && $str !~ /\n$/; 1406 $str .= $line; 1407 } 1408 } 1409 1410 $str =~ s/\s*$//; 1411 push(@list, $str) if length ($str); 1412 1413 return @list; 1414} 1415 1416sub quoted_translation 1417{ 1418 my ($lang, $string) = @_; 1419 1420 $string =~ s/\\\"/\"/g; 1421 1422 my $translation = $translations{$lang, $string}; 1423 $translation = $string if !$translation; 1424 1425 $translation =~ s/\"/\\\"/g; 1426 return $translation 1427} 1428 1429sub quoted_merge_translations 1430{ 1431 if (!$MULTIPLE_OUTPUT) { 1432 print "Quoted only supports Multiple Output.\n"; 1433 exit(1); 1434 } 1435 1436 for my $lang (sort keys %po_files_by_lang) { 1437 if ( ! -d $lang ) { 1438 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; 1439 } 1440 open INPUT, "<${FILE}" or die; 1441 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1442 binmode (OUTPUT) if $^O eq 'MSWin32'; 1443 while (<INPUT>) 1444 { 1445 s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . "ed_translation($lang, $1) . "\""/ge; 1446 print OUTPUT; 1447 } 1448 close OUTPUT; 1449 close INPUT; 1450 } 1451} 1452