1#!@INTLTOOL_PERL@ -w 2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4# 5# The Intltool Message Merger 6# 7# Copyright (C) 2000, 2003 Free Software Foundation. 8# Copyright (C) 2000, 2001 Eazel, Inc 9# 10# Intltool is free software; you can redistribute it and/or 11# modify it under the terms of the GNU General Public License 12# version 2 published by the Free Software Foundation. 13# 14# Intltool is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17# General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23# As a special exception to the GNU General Public License, if you 24# distribute this file as part of a program that contains a 25# configuration script generated by Autoconf, you may include it under 26# the same distribution terms that you use for the rest of that program. 27# 28# Authors: Maciej Stachowiak <mjs@noisehavoc.org> 29# Kenneth Christiansen <kenneth@gnu.org> 30# Darin Adler <darin@bentspoon.com> 31# 32# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> 33# 34 35## Release information 36my $PROGRAM = "intltool-merge"; 37my $PACKAGE = "intltool"; 38my $VERSION = "0.32.1"; 39 40## Loaded modules 41use strict; 42use Getopt::Long; 43use Text::Wrap; 44use File::Basename; 45 46my $must_end_tag = -1; 47my $last_depth = -1; 48my $translation_depth = -1; 49my @tag_stack = (); 50my @entered_tag = (); 51my @translation_strings = (); 52my $leading_space = ""; 53 54## Scalars used by the option stuff 55my $HELP_ARG = 0; 56my $VERSION_ARG = 0; 57my $BA_STYLE_ARG = 0; 58my $XML_STYLE_ARG = 0; 59my $KEYS_STYLE_ARG = 0; 60my $DESKTOP_STYLE_ARG = 0; 61my $SCHEMAS_STYLE_ARG = 0; 62my $RFC822DEB_STYLE_ARG = 0; 63my $QUIET_ARG = 0; 64my $PASS_THROUGH_ARG = 0; 65my $UTF8_ARG = 0; 66my $MULTIPLE_OUTPUT = 0; 67my $cache_file; 68 69## Handle options 70GetOptions 71( 72 "help" => \$HELP_ARG, 73 "version" => \$VERSION_ARG, 74 "quiet|q" => \$QUIET_ARG, 75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility 76 "ba-style|b" => \$BA_STYLE_ARG, 77 "xml-style|x" => \$XML_STYLE_ARG, 78 "keys-style|k" => \$KEYS_STYLE_ARG, 79 "desktop-style|d" => \$DESKTOP_STYLE_ARG, 80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG, 81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, 82 "pass-through|p" => \$PASS_THROUGH_ARG, 83 "utf8|u" => \$UTF8_ARG, 84 "multiple-output|m" => \$MULTIPLE_OUTPUT, 85 "cache|c=s" => \$cache_file 86 ) or &error; 87 88my $PO_DIR; 89my $FILE; 90my $OUTFILE; 91 92my %po_files_by_lang = (); 93my %translations = (); 94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@"; 95 96# Use this instead of \w for XML files to handle more possible characters. 97my $w = "[-A-Za-z0-9._:]"; 98 99# XML quoted string contents 100my $q = "[^\\\"]*"; 101 102## Check for options. 103 104if ($VERSION_ARG) 105{ 106 &print_version; 107} 108elsif ($HELP_ARG) 109{ 110 &print_help; 111} 112elsif ($BA_STYLE_ARG && @ARGV > 2) 113{ 114 &utf8_sanity_check; 115 &preparation; 116 &print_message; 117 &ba_merge_translations; 118 &finalize; 119} 120elsif ($XML_STYLE_ARG && @ARGV > 2) 121{ 122 &utf8_sanity_check; 123 &preparation; 124 &print_message; 125 &xml_merge_output; 126 &finalize; 127} 128elsif ($KEYS_STYLE_ARG && @ARGV > 2) 129{ 130 &utf8_sanity_check; 131 &preparation; 132 &print_message; 133 &keys_merge_translations; 134 &finalize; 135} 136elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 137{ 138 &utf8_sanity_check; 139 &preparation; 140 &print_message; 141 &desktop_merge_translations; 142 &finalize; 143} 144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 145{ 146 &utf8_sanity_check; 147 &preparation; 148 &print_message; 149 &schemas_merge_translations; 150 &finalize; 151} 152elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 153{ 154 &preparation; 155 &print_message; 156 &rfc822deb_merge_translations; 157 &finalize; 158} 159else 160{ 161 &print_help; 162} 163 164exit; 165 166## Sub for printing release information 167sub print_version 168{ 169 print <<_EOF_; 170${PROGRAM} (${PACKAGE}) ${VERSION} 171Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. 172 173Copyright (C) 2000-2003 Free Software Foundation, Inc. 174Copyright (C) 2000-2001 Eazel, Inc. 175This is free software; see the source for copying conditions. There is NO 176warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 177_EOF_ 178 exit; 179} 180 181## Sub for printing usage information 182sub print_help 183{ 184 print <<_EOF_; 185Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE 186Generates an output file that includes some localized attributes from an 187untranslated source file. 188 189Mandatory options: (exactly one must be specified) 190 -b, --ba-style includes translations in the bonobo-activation style 191 -d, --desktop-style includes translations in the desktop style 192 -k, --keys-style includes translations in the keys style 193 -s, --schemas-style includes translations in the schemas style 194 -r, --rfc822deb-style includes translations in the RFC822 style 195 -x, --xml-style includes translations in the standard xml style 196 197Other options: 198 -u, --utf8 convert all strings to UTF-8 before merging 199 (default for everything except RFC822 style) 200 -p, --pass-through deprecated, does nothing and issues a warning 201 -m, --multiple-output output one localized file per locale, instead of 202 a single file containing all localized elements 203 -c, --cache=FILE specify cache file name 204 (usually \$top_builddir/po/.intltool-merge-cache) 205 -q, --quiet suppress most messages 206 --help display this help and exit 207 --version output version information and exit 208 209Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 210or send email to <xml-i18n-tools\@gnome.org>. 211_EOF_ 212 exit; 213} 214 215 216## Sub for printing error messages 217sub print_error 218{ 219 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 220 exit; 221} 222 223 224sub print_message 225{ 226 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; 227} 228 229 230sub preparation 231{ 232 $PO_DIR = $ARGV[0]; 233 $FILE = $ARGV[1]; 234 $OUTFILE = $ARGV[2]; 235 236 &gather_po_files; 237 &get_translation_database; 238} 239 240# General-purpose code for looking up translations in .po files 241 242sub po_file2lang 243{ 244 my ($tmp) = @_; 245 $tmp =~ s/^.*\/(.*)\.po$/$1/; 246 return $tmp; 247} 248 249sub gather_po_files 250{ 251 for my $po_file (glob "$PO_DIR/*.po") { 252 $po_files_by_lang{po_file2lang($po_file)} = $po_file; 253 } 254} 255 256sub get_local_charset 257{ 258 my ($encoding) = @_; 259 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/local/libdata/charset.alias"; 260 261 # seek character encoding aliases in charset.alias (glib) 262 263 if (open CHARSET_ALIAS, $alias_file) 264 { 265 while (<CHARSET_ALIAS>) 266 { 267 next if /^\#/; 268 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) 269 } 270 271 close CHARSET_ALIAS; 272 } 273 274 # if not found, return input string 275 276 return $encoding; 277} 278 279sub get_po_encoding 280{ 281 my ($in_po_file) = @_; 282 my $encoding = ""; 283 284 open IN_PO_FILE, $in_po_file or die; 285 while (<IN_PO_FILE>) 286 { 287 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" 288 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 289 { 290 $encoding = $1; 291 last; 292 } 293 } 294 close IN_PO_FILE; 295 296 if (!$encoding) 297 { 298 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; 299 $encoding = "ISO-8859-1"; 300 } 301 302 system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null"); 303 if ($?) { 304 $encoding = get_local_charset($encoding); 305 } 306 307 return $encoding 308} 309 310sub utf8_sanity_check 311{ 312 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; 313 $UTF8_ARG = 1; 314} 315 316sub get_translation_database 317{ 318 if ($cache_file) { 319 &get_cached_translation_database; 320 } else { 321 &create_translation_database; 322 } 323} 324 325sub get_newest_po_age 326{ 327 my $newest_age; 328 329 foreach my $file (values %po_files_by_lang) 330 { 331 my $file_age = -M $file; 332 $newest_age = $file_age if !$newest_age || $file_age < $newest_age; 333 } 334 335 $newest_age = 0 if !$newest_age; 336 337 return $newest_age; 338} 339 340sub create_cache 341{ 342 print "Generating and caching the translation database\n" unless $QUIET_ARG; 343 344 &create_translation_database; 345 346 open CACHE, ">$cache_file" || die; 347 print CACHE join "\x01", %translations; 348 close CACHE; 349} 350 351sub load_cache 352{ 353 print "Found cached translation database\n" unless $QUIET_ARG; 354 355 my $contents; 356 open CACHE, "<$cache_file" || die; 357 { 358 local $/; 359 $contents = <CACHE>; 360 } 361 close CACHE; 362 %translations = split "\x01", $contents; 363} 364 365sub get_cached_translation_database 366{ 367 my $cache_file_age = -M $cache_file; 368 if (defined $cache_file_age) 369 { 370 if ($cache_file_age <= &get_newest_po_age) 371 { 372 &load_cache; 373 return; 374 } 375 print "Found too-old cached translation database\n" unless $QUIET_ARG; 376 } 377 378 &create_cache; 379} 380 381sub create_translation_database 382{ 383 for my $lang (keys %po_files_by_lang) 384 { 385 my $po_file = $po_files_by_lang{$lang}; 386 387 if ($UTF8_ARG) 388 { 389 my $encoding = get_po_encoding ($po_file); 390 391 if (lc $encoding eq "utf-8") 392 { 393 open PO_FILE, "<$po_file"; 394 } 395 else 396 { 397 print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; 398 399 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 400 } 401 } 402 else 403 { 404 open PO_FILE, "<$po_file"; 405 } 406 407 my $nextfuzzy = 0; 408 my $inmsgid = 0; 409 my $inmsgstr = 0; 410 my $msgid = ""; 411 my $msgstr = ""; 412 413 while (<PO_FILE>) 414 { 415 $nextfuzzy = 1 if /^#, fuzzy/; 416 417 if (/^msgid "((\\.|[^\\])*)"/ ) 418 { 419 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 420 $msgid = ""; 421 $msgstr = ""; 422 423 if ($nextfuzzy) { 424 $inmsgid = 0; 425 } else { 426 $msgid = unescape_po_string($1); 427 $inmsgid = 1; 428 } 429 $inmsgstr = 0; 430 $nextfuzzy = 0; 431 } 432 433 if (/^msgstr "((\\.|[^\\])*)"/) 434 { 435 $msgstr = unescape_po_string($1); 436 $inmsgstr = 1; 437 $inmsgid = 0; 438 } 439 440 if (/^"((\\.|[^\\])*)"/) 441 { 442 $msgid .= unescape_po_string($1) if $inmsgid; 443 $msgstr .= unescape_po_string($1) if $inmsgstr; 444 } 445 } 446 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 447 } 448} 449 450sub finalize 451{ 452} 453 454sub unescape_one_sequence 455{ 456 my ($sequence) = @_; 457 458 return "\\" if $sequence eq "\\\\"; 459 return "\"" if $sequence eq "\\\""; 460 return "\n" if $sequence eq "\\n"; 461 return "\r" if $sequence eq "\\r"; 462 return "\t" if $sequence eq "\\t"; 463 return "\b" if $sequence eq "\\b"; 464 return "\f" if $sequence eq "\\f"; 465 return "\a" if $sequence eq "\\a"; 466 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) 467 468 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); 469 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); 470 471 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 472 473 return $sequence; 474} 475 476sub unescape_po_string 477{ 478 my ($string) = @_; 479 480 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; 481 482 return $string; 483} 484 485## NOTE: deal with < - < but not > - > because it seems its ok to have 486## > in the entity. For further info please look at #84738. 487sub entity_decode 488{ 489 local ($_) = @_; 490 491 s/'/'/g; # ' 492 s/"/"/g; # " 493 s/&/&/g; 494 s/</</g; 495 496 return $_; 497} 498 499# entity_encode: (string) 500# 501# Encode the given string to XML format (encode '<' etc). 502 503sub entity_encode 504{ 505 my ($pre_encoded) = @_; 506 507 my @list_of_chars = unpack ('C*', $pre_encoded); 508 509 # with UTF-8 we only encode minimalistic 510 return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); 511} 512 513sub entity_encode_int_minimalist 514{ 515 return """ if $_ == 34; 516 return "&" if $_ == 38; 517 return "'" if $_ == 39; 518 return "<" if $_ == 60; 519 return chr $_; 520} 521 522sub entity_encoded_translation 523{ 524 my ($lang, $string) = @_; 525 526 my $translation = $translations{$lang, $string}; 527 return $string if !$translation; 528 return entity_encode ($translation); 529} 530 531## XML (bonobo-activation specific) merge code 532 533sub ba_merge_translations 534{ 535 my $source; 536 537 { 538 local $/; # slurp mode 539 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 540 $source = <INPUT>; 541 close INPUT; 542 } 543 544 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; 545 546 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 547 { 548 print OUTPUT $1; 549 550 my $node = $2 . "\n"; 551 552 my @strings = (); 553 $_ = $node; 554 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { 555 push @strings, entity_decode($3); 556 } 557 print OUTPUT; 558 559 my %langs; 560 for my $string (@strings) 561 { 562 for my $lang (keys %po_files_by_lang) 563 { 564 $langs{$lang} = 1 if $translations{$lang, $string}; 565 } 566 } 567 568 for my $lang (sort keys %langs) 569 { 570 $_ = $node; 571 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; 572 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; 573 print OUTPUT; 574 } 575 } 576 577 print OUTPUT $source; 578 579 close OUTPUT; 580} 581 582 583## XML (non-bonobo-activation) merge code 584 585 586# Process tag attributes 587# Only parameter is a HASH containing attributes -> values mapping 588sub getAttributeString 589{ 590 my $sub = shift; 591 my $do_translate = shift || 0; 592 my $language = shift || ""; 593 my $result = ""; 594 my $translate = shift; 595 foreach my $e (reverse(sort(keys %{ $sub }))) { 596 my $key = $e; 597 my $string = $sub->{$e}; 598 my $quote = '"'; 599 600 $string =~ s/^[\s]+//; 601 $string =~ s/[\s]+$//; 602 603 if ($string =~ /^'.*'$/) 604 { 605 $quote = "'"; 606 } 607 $string =~ s/^['"]//g; 608 $string =~ s/['"]$//g; 609 610 if ($do_translate && $key =~ /^_/) { 611 $key =~ s|^_||g; 612 if ($language) { 613 614 # Handle translation 615 # 616 my $decode_string = entity_decode($string); 617 my $translation = $translations{$language, $decode_string}; 618 if ($translation) { 619 $translation = entity_encode($translation); 620 $string = $translation; 621 $$translate = 2; 622 } 623 } else { 624 $$translate = 1 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2 625 } 626 } 627 628 $result .= " $key=$quote$string$quote"; 629 } 630 return $result; 631} 632 633# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree 634# doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides 635# can you define the correct semantics for this?) 636# 637 638sub getXMLstring 639{ 640 my $ref = shift; 641 my @list = @{ $ref }; 642 my $result = ""; 643 644 my $count = scalar(@list); 645 my $attrs = $list[0]; 646 my $index = 1; 647 648 while ($index < $count) { 649 my $type = $list[$index]; 650 my $content = $list[$index+1]; 651 if (! $type ) { 652 # We've got CDATA 653 if ($content) { 654 # lets strip the whitespace here, and *ONLY* here 655 $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))); 656 $result .= ($content); 657 } else { 658 #print "no cdata content when expected it\n"; # is this possible, is this ok? 659 # what to do if this happens? 660 # Did I mention that I hate XML::Parser tree style? 661 } 662 } else { 663 # We've got another element 664 $result .= "<$type"; 665 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements 666 if ($content) { 667 my $subresult = getXMLstring($content); 668 if ($subresult) { 669 $result .= ">".$subresult . "</$type>"; 670 } else { 671 $result .= "/>"; 672 } 673 } else { 674 $result .= "/>"; 675 } 676 } 677 $index += 2; 678 } 679 return $result; 680} 681 682sub traverse 683{ 684 my $fh = shift; 685 my $nodename = shift; 686 my $content = shift; 687 my $language = shift || ""; 688 689 if (!$nodename) { 690 if ($content =~ /^[\s]*$/) { 691 $leading_space .= $content; 692 } 693 print $fh $content; 694 } else { 695 # element 696 my @all = @{ $content }; 697 my $attrs = shift @all; 698 my $translate = 0; 699 my $outattr = getAttributeString($attrs, 1, $language, \$translate); 700 701 if ($nodename =~ /^_/) { 702 $translate = 1; 703 $nodename =~ s/^_//; 704 } 705 my $lookup = ''; 706 print $fh "<$nodename", $outattr; 707 if ($translate) { 708 $lookup = getXMLstring($content); 709 if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) { 710 $lookup =~ s/^\s+//s; 711 $lookup =~ s/\s+$//s; 712 } 713 714 if ($lookup || $translate == 2) { 715 my $translation = $translations{$language, $lookup}; 716 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { 717 $translation = $lookup if (!$translation); 718 print $fh " xml:lang=\"", $language, "\""; 719 print $fh ">", $translation, "</$nodename>"; 720 return; # this means there will be no same translation with xml:lang="$language"... 721 # if we want them both, just remove this "return" 722 } else { 723 print $fh ">$lookup</$nodename>"; 724 } 725 } else { 726 print $fh "/>"; 727 } 728 729 for my $lang (sort keys %po_files_by_lang) { 730 if ($MULTIPLE_OUTPUT && $lang ne "$language") { 731 next; 732 } 733 if ($lang) { 734 # Handle translation 735 # 736 my $translate = 0; 737 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); 738 my $translation = $translations{$lang, $lookup}; 739 if ($translate && !$translation) { 740 $translation = $lookup; 741 } 742 743 if ($translation || $translate) { 744 $translation = ($translation); 745 print $fh "\n"; 746 $leading_space =~ s/.*\n//g; 747 print $fh $leading_space; 748 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs; 749 print $fh ">", $translation , "</$nodename>"; 750 } 751 } 752 } 753 754 } else { 755 my $count = scalar(@all); 756 if ($count > 0) { 757 print $fh ">"; 758 } else { 759 print $fh "/>"; 760 } 761 my $index = 0; 762 while ($index < $count) { 763 my $type = $all[$index]; 764 my $rest = $all[$index+1]; 765 traverse($fh, $type, $rest, $language); 766 $index += 2; 767 } 768 if ($count > 0) { 769 print $fh "</$nodename>"; 770 } 771 } 772 } 773} 774 775sub intltool_tree_char 776{ 777 my $expat = shift; 778 my $text = shift; 779 my $clist = $expat->{Curlist}; 780 my $pos = $#$clist; 781 782 # Use original_string so that we retain escaped entities 783 # in CDATA sections. 784 # 785 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 786 $clist->[$pos] .= $expat->original_string(); 787 } else { 788 push @$clist, 0 => $expat->original_string(); 789 } 790} 791 792sub intltool_tree_start 793{ 794 my $expat = shift; 795 my $tag = shift; 796 my @origlist = (); 797 798 # Use original_string so that we retain escaped entities 799 # in attribute values. We must convert the string to an 800 # @origlist array to conform to the structure of the Tree 801 # Style. 802 # 803 my @original_array = split /\x/, $expat->original_string(); 804 my $source = $expat->original_string(); 805 806 # Remove leading tag. 807 # 808 $source =~ s|^\s*<\s*(\S+)||s; 809 810 # Grab attribute key/value pairs and push onto @origlist array. 811 # 812 while ($source) 813 { 814 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) 815 { 816 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; 817 push @origlist, $1; 818 push @origlist, '"' . $2 . '"'; 819 } 820 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) 821 { 822 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; 823 push @origlist, $1; 824 push @origlist, "'" . $2 . "'"; 825 } 826 else 827 { 828 last; 829 } 830 } 831 832 my $ol = [ { @origlist } ]; 833 834 push @{ $expat->{Lists} }, $expat->{Curlist}; 835 push @{ $expat->{Curlist} }, $tag => $ol; 836 $expat->{Curlist} = $ol; 837} 838 839sub readXml 840{ 841 my $filename = shift || return; 842 if(!-f $filename) { 843 die "ERROR Cannot find filename: $filename\n"; 844 } 845 846 my $ret = eval 'require XML::Parser'; 847 if(!$ret) { 848 die "You must have XML::Parser installed to run $0\n\n"; 849 } 850 my $xp = new XML::Parser(Style => 'Tree'); 851 $xp->setHandlers(Char => \&intltool_tree_char); 852 $xp->setHandlers(Start => \&intltool_tree_start); 853 my $tree = $xp->parsefile($filename); 854 855# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 856# would be: 857# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, 858# 0, "Howdy", ref, [{}]], 0, "do" ] ] 859 860 return $tree; 861} 862 863sub print_header 864{ 865 my $infile = shift; 866 my $fh = shift; 867 my $source; 868 869 if(!-f $infile) { 870 die "ERROR Cannot find filename: $infile\n"; 871 } 872 873 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; 874 { 875 local $/; 876 open DOCINPUT, "<${FILE}" or die; 877 $source = <DOCINPUT>; 878 close DOCINPUT; 879 } 880 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) 881 { 882 print $fh "$1\n"; 883 } 884 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) 885 { 886 print $fh "$1\n"; 887 } 888} 889 890sub parseTree 891{ 892 my $fh = shift; 893 my $ref = shift; 894 my $language = shift || ""; 895 896 my $name = shift @{ $ref }; 897 my $cont = shift @{ $ref }; 898 traverse($fh, $name, $cont, $language); 899} 900 901sub xml_merge_output 902{ 903 my $source; 904 905 if ($MULTIPLE_OUTPUT) { 906 for my $lang (sort keys %po_files_by_lang) { 907 if ( ! -e $lang ) { 908 mkdir $lang, 0777 or die "Cannot create subdirectory $lang: $!\n"; 909 } 910 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 911 my $tree = readXml($FILE); 912 print_header($FILE, \*OUTPUT); 913 parseTree(\*OUTPUT, $tree, $lang); 914 close OUTPUT; 915 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 916 } 917 } 918 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; 919 my $tree = readXml($FILE); 920 print_header($FILE, \*OUTPUT); 921 parseTree(\*OUTPUT, $tree); 922 close OUTPUT; 923 print "CREATED $OUTFILE\n" unless $QUIET_ARG; 924} 925 926sub keys_merge_translations 927{ 928 open INPUT, "<${FILE}" or die; 929 open OUTPUT, ">${OUTFILE}" or die; 930 931 while (<INPUT>) 932 { 933 if (s/^(\s*)_(\w+=(.*))/$1$2/) 934 { 935 my $string = $3; 936 937 print OUTPUT; 938 939 my $non_translated_line = $_; 940 941 for my $lang (sort keys %po_files_by_lang) 942 { 943 my $translation = $translations{$lang, $string}; 944 next if !$translation; 945 946 $_ = $non_translated_line; 947 s/(\w+)=.*/[$lang]$1=$translation/; 948 print OUTPUT; 949 } 950 } 951 else 952 { 953 print OUTPUT; 954 } 955 } 956 957 close OUTPUT; 958 close INPUT; 959} 960 961sub desktop_merge_translations 962{ 963 open INPUT, "<${FILE}" or die; 964 open OUTPUT, ">${OUTFILE}" or die; 965 966 while (<INPUT>) 967 { 968 if (s/^(\s*)_(\w+=(.*))/$1$2/) 969 { 970 my $string = $3; 971 972 print OUTPUT; 973 974 my $non_translated_line = $_; 975 976 for my $lang (sort keys %po_files_by_lang) 977 { 978 my $translation = $translations{$lang, $string}; 979 next if !$translation; 980 981 $_ = $non_translated_line; 982 s/(\w+)=.*/${1}[$lang]=$translation/; 983 print OUTPUT; 984 } 985 } 986 else 987 { 988 print OUTPUT; 989 } 990 } 991 992 close OUTPUT; 993 close INPUT; 994} 995 996sub schemas_merge_translations 997{ 998 my $source; 999 1000 { 1001 local $/; # slurp mode 1002 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1003 $source = <INPUT>; 1004 close INPUT; 1005 } 1006 1007 open OUTPUT, ">$OUTFILE" or die; 1008 1009 # FIXME: support attribute translations 1010 1011 # Empty nodes never need translation, so unmark all of them. 1012 # For example, <_foo/> is just replaced by <foo/>. 1013 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; 1014 1015 while ($source =~ s/ 1016 (.*?) 1017 (\s+)(<locale\ name="C">(\s*) 1018 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) 1019 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) 1020 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) 1021 <\/locale>) 1022 //sx) 1023 { 1024 print OUTPUT $1; 1025 1026 my $locale_start_spaces = $2 ? $2 : ''; 1027 my $default_spaces = $4 ? $4 : ''; 1028 my $short_spaces = $7 ? $7 : ''; 1029 my $long_spaces = $10 ? $10 : ''; 1030 my $locale_end_spaces = $13 ? $13 : ''; 1031 my $c_default_block = $3 ? $3 : ''; 1032 my $default_string = $6 ? $6 : ''; 1033 my $short_string = $9 ? $9 : ''; 1034 my $long_string = $12 ? $12 : ''; 1035 1036 print OUTPUT "$locale_start_spaces$c_default_block"; 1037 1038 $default_string =~ s/\s+/ /g; 1039 $default_string = entity_decode($default_string); 1040 $short_string =~ s/\s+/ /g; 1041 $short_string = entity_decode($short_string); 1042 $long_string =~ s/\s+/ /g; 1043 $long_string = entity_decode($long_string); 1044 1045 for my $lang (sort keys %po_files_by_lang) 1046 { 1047 my $default_translation = $translations{$lang, $default_string}; 1048 my $short_translation = $translations{$lang, $short_string}; 1049 my $long_translation = $translations{$lang, $long_string}; 1050 1051 next if (!$default_translation && !$short_translation && 1052 !$long_translation); 1053 1054 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; 1055 1056 print OUTPUT "$default_spaces"; 1057 1058 if ($default_translation) 1059 { 1060 $default_translation = entity_encode($default_translation); 1061 print OUTPUT "<default>$default_translation</default>"; 1062 } 1063 1064 print OUTPUT "$short_spaces"; 1065 1066 if ($short_translation) 1067 { 1068 $short_translation = entity_encode($short_translation); 1069 print OUTPUT "<short>$short_translation</short>"; 1070 } 1071 1072 print OUTPUT "$long_spaces"; 1073 1074 if ($long_translation) 1075 { 1076 $long_translation = entity_encode($long_translation); 1077 print OUTPUT "<long>$long_translation</long>"; 1078 } 1079 1080 print OUTPUT "$locale_end_spaces</locale>"; 1081 } 1082 } 1083 1084 print OUTPUT $source; 1085 1086 close OUTPUT; 1087} 1088 1089sub rfc822deb_merge_translations 1090{ 1091 my %encodings = (); 1092 for my $lang (keys %po_files_by_lang) { 1093 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); 1094 } 1095 1096 my $source; 1097 1098 $Text::Wrap::huge = 'overflow'; 1099 $Text::Wrap::break = qr/\n|\s(?=\S)/; 1100 1101 { 1102 local $/; # slurp mode 1103 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1104 $source = <INPUT>; 1105 close INPUT; 1106 } 1107 1108 open OUTPUT, ">${OUTFILE}" or die; 1109 1110 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) 1111 { 1112 my $sep = $1; 1113 my $non_translated_line = $3.$4; 1114 my $string = $5; 1115 my $underscore = length($2); 1116 next if $underscore eq 0 && $non_translated_line =~ /^#/; 1117 # Remove [] dummy strings 1118 my $stripped = $string; 1119 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; 1120 $stripped =~ s/\[\s[^\[\]]*\]$//; 1121 $non_translated_line .= $stripped; 1122 1123 print OUTPUT $sep.$non_translated_line; 1124 1125 if ($underscore) 1126 { 1127 my @str_list = rfc822deb_split($underscore, $string); 1128 1129 for my $lang (sort keys %po_files_by_lang) 1130 { 1131 my $is_translated = 1; 1132 my $str_translated = ''; 1133 my $first = 1; 1134 1135 for my $str (@str_list) 1136 { 1137 my $translation = $translations{$lang, $str}; 1138 1139 if (!$translation) 1140 { 1141 $is_translated = 0; 1142 last; 1143 } 1144 1145 # $translation may also contain [] dummy 1146 # strings, mostly to indicate an empty string 1147 $translation =~ s/\[\s[^\[\]]*\]$//; 1148 1149 if ($first) 1150 { 1151 if ($underscore eq 2) 1152 { 1153 $str_translated .= $translation; 1154 } 1155 else 1156 { 1157 $str_translated .= 1158 Text::Tabs::expand($translation) . 1159 "\n"; 1160 } 1161 } 1162 else 1163 { 1164 if ($underscore eq 2) 1165 { 1166 $str_translated .= ', ' . $translation; 1167 } 1168 else 1169 { 1170 $str_translated .= Text::Tabs::expand( 1171 Text::Wrap::wrap(' ', ' ', $translation)) . 1172 "\n .\n"; 1173 } 1174 } 1175 $first = 0; 1176 1177 # To fix some problems with Text::Wrap::wrap 1178 $str_translated =~ s/(\n )+\n/\n .\n/g; 1179 } 1180 next unless $is_translated; 1181 1182 $str_translated =~ s/\n \.\n$//; 1183 $str_translated =~ s/\s+$//; 1184 1185 $_ = $non_translated_line; 1186 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; 1187 print OUTPUT; 1188 } 1189 } 1190 } 1191 print OUTPUT "\n"; 1192 1193 close OUTPUT; 1194 close INPUT; 1195} 1196 1197sub rfc822deb_split 1198{ 1199 # Debian defines a special way to deal with rfc822-style files: 1200 # when a value contain newlines, it consists of 1201 # 1. a short form (first line) 1202 # 2. a long description, all lines begin with a space, 1203 # and paragraphs are separated by a single dot on a line 1204 # This routine returns an array of all paragraphs, and reformat 1205 # them. 1206 # When first argument is 2, the string is a comma separated list of 1207 # values. 1208 my $type = shift; 1209 my $text = shift; 1210 $text =~ s/^[ \t]//mg; 1211 return (split(/, */, $text, 0)) if $type ne 1; 1212 return ($text) if $text !~ /\n/; 1213 1214 $text =~ s/([^\n]*)\n//; 1215 my @list = ($1); 1216 my $str = ''; 1217 1218 for my $line (split (/\n/, $text)) 1219 { 1220 chomp $line; 1221 if ($line =~ /^\.\s*$/) 1222 { 1223 # New paragraph 1224 $str =~ s/\s*$//; 1225 push(@list, $str); 1226 $str = ''; 1227 } 1228 elsif ($line =~ /^\s/) 1229 { 1230 # Line which must not be reformatted 1231 $str .= "\n" if length ($str) && $str !~ /\n$/; 1232 $line =~ s/\s+$//; 1233 $str .= $line."\n"; 1234 } 1235 else 1236 { 1237 # Continuation line, remove newline 1238 $str .= " " if length ($str) && $str !~ /\n$/; 1239 $str .= $line; 1240 } 1241 } 1242 1243 $str =~ s/\s*$//; 1244 push(@list, $str) if length ($str); 1245 1246 return @list; 1247} 1248 1249