1#!@INTLTOOL_PERL@ -w 2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4# 5# The Intltool Message Merger 6# 7# Copyright (C) 2000, 2003 Free Software Foundation. 8# Copyright (C) 2000, 2001 Eazel, Inc 9# 10# Intltool is free software; you can redistribute it and/or 11# modify it under the terms of the GNU General Public License 12# version 2 published by the Free Software Foundation. 13# 14# Intltool is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17# General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23# As a special exception to the GNU General Public License, if you 24# distribute this file as part of a program that contains a 25# configuration script generated by Autoconf, you may include it under 26# the same distribution terms that you use for the rest of that program. 27# 28# Authors: Maciej Stachowiak <mjs@noisehavoc.org> 29# Kenneth Christiansen <kenneth@gnu.org> 30# Darin Adler <darin@bentspoon.com> 31# 32# Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> 33# 34 35## Release information 36my $PROGRAM = "intltool-merge"; 37my $PACKAGE = "intltool"; 38my $VERSION = "0.33"; 39 40## Loaded modules 41use strict; 42use Getopt::Long; 43use Text::Wrap; 44use File::Basename; 45 46my $must_end_tag = -1; 47my $last_depth = -1; 48my $translation_depth = -1; 49my @tag_stack = (); 50my @entered_tag = (); 51my @translation_strings = (); 52my $leading_space = ""; 53 54## Scalars used by the option stuff 55my $HELP_ARG = 0; 56my $VERSION_ARG = 0; 57my $BA_STYLE_ARG = 0; 58my $XML_STYLE_ARG = 0; 59my $KEYS_STYLE_ARG = 0; 60my $DESKTOP_STYLE_ARG = 0; 61my $SCHEMAS_STYLE_ARG = 0; 62my $RFC822DEB_STYLE_ARG = 0; 63my $QUIET_ARG = 0; 64my $PASS_THROUGH_ARG = 0; 65my $UTF8_ARG = 0; 66my $MULTIPLE_OUTPUT = 0; 67my $cache_file; 68 69## Handle options 70GetOptions 71( 72 "help" => \$HELP_ARG, 73 "version" => \$VERSION_ARG, 74 "quiet|q" => \$QUIET_ARG, 75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility 76 "ba-style|b" => \$BA_STYLE_ARG, 77 "xml-style|x" => \$XML_STYLE_ARG, 78 "keys-style|k" => \$KEYS_STYLE_ARG, 79 "desktop-style|d" => \$DESKTOP_STYLE_ARG, 80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG, 81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, 82 "pass-through|p" => \$PASS_THROUGH_ARG, 83 "utf8|u" => \$UTF8_ARG, 84 "multiple-output|m" => \$MULTIPLE_OUTPUT, 85 "cache|c=s" => \$cache_file 86 ) or &error; 87 88my $PO_DIR; 89my $FILE; 90my $OUTFILE; 91 92my %po_files_by_lang = (); 93my %translations = (); 94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || ""; 95 96# Use this instead of \w for XML files to handle more possible characters. 97my $w = "[-A-Za-z0-9._:]"; 98 99# XML quoted string contents 100my $q = "[^\\\"]*"; 101 102## Check for options. 103 104if ($VERSION_ARG) 105{ 106 &print_version; 107} 108elsif ($HELP_ARG) 109{ 110 &print_help; 111} 112elsif ($BA_STYLE_ARG && @ARGV > 2) 113{ 114 &utf8_sanity_check; 115 &preparation; 116 &print_message; 117 &ba_merge_translations; 118 &finalize; 119} 120elsif ($XML_STYLE_ARG && @ARGV > 2) 121{ 122 &utf8_sanity_check; 123 &preparation; 124 &print_message; 125 &xml_merge_output; 126 &finalize; 127} 128elsif ($KEYS_STYLE_ARG && @ARGV > 2) 129{ 130 &utf8_sanity_check; 131 &preparation; 132 &print_message; 133 &keys_merge_translations; 134 &finalize; 135} 136elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 137{ 138 &utf8_sanity_check; 139 &preparation; 140 &print_message; 141 &desktop_merge_translations; 142 &finalize; 143} 144elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 145{ 146 &utf8_sanity_check; 147 &preparation; 148 &print_message; 149 &schemas_merge_translations; 150 &finalize; 151} 152elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 153{ 154 &preparation; 155 &print_message; 156 &rfc822deb_merge_translations; 157 &finalize; 158} 159else 160{ 161 &print_help; 162} 163 164exit; 165 166## Sub for printing release information 167sub print_version 168{ 169 print <<_EOF_; 170${PROGRAM} (${PACKAGE}) ${VERSION} 171Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. 172 173Copyright (C) 2000-2003 Free Software Foundation, Inc. 174Copyright (C) 2000-2001 Eazel, Inc. 175This is free software; see the source for copying conditions. There is NO 176warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 177_EOF_ 178 exit; 179} 180 181## Sub for printing usage information 182sub print_help 183{ 184 print <<_EOF_; 185Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE 186Generates an output file that includes some localized attributes from an 187untranslated source file. 188 189Mandatory options: (exactly one must be specified) 190 -b, --ba-style includes translations in the bonobo-activation style 191 -d, --desktop-style includes translations in the desktop style 192 -k, --keys-style includes translations in the keys style 193 -s, --schemas-style includes translations in the schemas style 194 -r, --rfc822deb-style includes translations in the RFC822 style 195 -x, --xml-style includes translations in the standard xml style 196 197Other options: 198 -u, --utf8 convert all strings to UTF-8 before merging 199 (default for everything except RFC822 style) 200 -p, --pass-through deprecated, does nothing and issues a warning 201 -m, --multiple-output output one localized file per locale, instead of 202 a single file containing all localized elements 203 -c, --cache=FILE specify cache file name 204 (usually \$top_builddir/po/.intltool-merge-cache) 205 -q, --quiet suppress most messages 206 --help display this help and exit 207 --version output version information and exit 208 209Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 210or send email to <xml-i18n-tools\@gnome.org>. 211_EOF_ 212 exit; 213} 214 215 216## Sub for printing error messages 217sub print_error 218{ 219 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 220 exit; 221} 222 223 224sub print_message 225{ 226 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; 227} 228 229 230sub preparation 231{ 232 $PO_DIR = $ARGV[0]; 233 $FILE = $ARGV[1]; 234 $OUTFILE = $ARGV[2]; 235 236 &gather_po_files; 237 &get_translation_database; 238} 239 240# General-purpose code for looking up translations in .po files 241 242sub po_file2lang 243{ 244 my ($tmp) = @_; 245 $tmp =~ s/^.*\/(.*)\.po$/$1/; 246 return $tmp; 247} 248 249sub gather_po_files 250{ 251 for my $po_file (glob "$PO_DIR/*.po") { 252 $po_files_by_lang{po_file2lang($po_file)} = $po_file; 253 } 254} 255 256sub get_local_charset 257{ 258 my ($encoding) = @_; 259 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias"; 260 261 # seek character encoding aliases in charset.alias (glib) 262 263 if (open CHARSET_ALIAS, $alias_file) 264 { 265 while (<CHARSET_ALIAS>) 266 { 267 next if /^\#/; 268 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) 269 } 270 271 close CHARSET_ALIAS; 272 } 273 274 # if not found, return input string 275 276 return $encoding; 277} 278 279sub get_po_encoding 280{ 281 my ($in_po_file) = @_; 282 my $encoding = ""; 283 284 open IN_PO_FILE, $in_po_file or die; 285 while (<IN_PO_FILE>) 286 { 287 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" 288 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 289 { 290 $encoding = $1; 291 last; 292 } 293 } 294 close IN_PO_FILE; 295 296 if (!$encoding) 297 { 298 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; 299 $encoding = "ISO-8859-1"; 300 } 301 302 system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null"); 303 if ($?) { 304 $encoding = get_local_charset($encoding); 305 } 306 307 return $encoding 308} 309 310sub utf8_sanity_check 311{ 312 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; 313 $UTF8_ARG = 1; 314} 315 316sub get_translation_database 317{ 318 if ($cache_file) { 319 &get_cached_translation_database; 320 } else { 321 &create_translation_database; 322 } 323} 324 325sub get_newest_po_age 326{ 327 my $newest_age; 328 329 foreach my $file (values %po_files_by_lang) 330 { 331 my $file_age = -M $file; 332 $newest_age = $file_age if !$newest_age || $file_age < $newest_age; 333 } 334 335 $newest_age = 0 if !$newest_age; 336 337 return $newest_age; 338} 339 340sub create_cache 341{ 342 print "Generating and caching the translation database\n" unless $QUIET_ARG; 343 344 &create_translation_database; 345 346 open CACHE, ">$cache_file" || die; 347 print CACHE join "\x01", %translations; 348 close CACHE; 349} 350 351sub load_cache 352{ 353 print "Found cached translation database\n" unless $QUIET_ARG; 354 355 my $contents; 356 open CACHE, "<$cache_file" || die; 357 { 358 local $/; 359 $contents = <CACHE>; 360 } 361 close CACHE; 362 %translations = split "\x01", $contents; 363} 364 365sub get_cached_translation_database 366{ 367 my $cache_file_age = -M $cache_file; 368 if (defined $cache_file_age) 369 { 370 if ($cache_file_age <= &get_newest_po_age) 371 { 372 &load_cache; 373 return; 374 } 375 print "Found too-old cached translation database\n" unless $QUIET_ARG; 376 } 377 378 &create_cache; 379} 380 381sub create_translation_database 382{ 383 for my $lang (keys %po_files_by_lang) 384 { 385 my $po_file = $po_files_by_lang{$lang}; 386 387 if ($UTF8_ARG) 388 { 389 my $encoding = get_po_encoding ($po_file); 390 391 if (lc $encoding eq "utf-8") 392 { 393 open PO_FILE, "<$po_file"; 394 } 395 else 396 { 397 print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; 398 399 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 400 } 401 } 402 else 403 { 404 open PO_FILE, "<$po_file"; 405 } 406 407 my $nextfuzzy = 0; 408 my $inmsgid = 0; 409 my $inmsgstr = 0; 410 my $msgid = ""; 411 my $msgstr = ""; 412 413 while (<PO_FILE>) 414 { 415 $nextfuzzy = 1 if /^#, fuzzy/; 416 417 if (/^msgid "((\\.|[^\\])*)"/ ) 418 { 419 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 420 $msgid = ""; 421 $msgstr = ""; 422 423 if ($nextfuzzy) { 424 $inmsgid = 0; 425 } else { 426 $msgid = unescape_po_string($1); 427 $inmsgid = 1; 428 } 429 $inmsgstr = 0; 430 $nextfuzzy = 0; 431 } 432 433 if (/^msgstr "((\\.|[^\\])*)"/) 434 { 435 $msgstr = unescape_po_string($1); 436 $inmsgstr = 1; 437 $inmsgid = 0; 438 } 439 440 if (/^"((\\.|[^\\])*)"/) 441 { 442 $msgid .= unescape_po_string($1) if $inmsgid; 443 $msgstr .= unescape_po_string($1) if $inmsgstr; 444 } 445 } 446 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 447 } 448} 449 450sub finalize 451{ 452} 453 454sub unescape_one_sequence 455{ 456 my ($sequence) = @_; 457 458 return "\\" if $sequence eq "\\\\"; 459 return "\"" if $sequence eq "\\\""; 460 return "\n" if $sequence eq "\\n"; 461 return "\r" if $sequence eq "\\r"; 462 return "\t" if $sequence eq "\\t"; 463 return "\b" if $sequence eq "\\b"; 464 return "\f" if $sequence eq "\\f"; 465 return "\a" if $sequence eq "\\a"; 466 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) 467 468 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); 469 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); 470 471 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 472 473 return $sequence; 474} 475 476sub unescape_po_string 477{ 478 my ($string) = @_; 479 480 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; 481 482 return $string; 483} 484 485## NOTE: deal with < - < but not > - > because it seems its ok to have 486## > in the entity. For further info please look at #84738. 487sub entity_decode 488{ 489 local ($_) = @_; 490 491 s/'/'/g; # ' 492 s/"/"/g; # " 493 s/&/&/g; 494 s/</</g; 495 496 return $_; 497} 498 499# entity_encode: (string) 500# 501# Encode the given string to XML format (encode '<' etc). 502 503sub entity_encode 504{ 505 my ($pre_encoded) = @_; 506 507 my @list_of_chars = unpack ('C*', $pre_encoded); 508 509 # with UTF-8 we only encode minimalistic 510 return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); 511} 512 513sub entity_encode_int_minimalist 514{ 515 return """ if $_ == 34; 516 return "&" if $_ == 38; 517 return "'" if $_ == 39; 518 return "<" if $_ == 60; 519 return chr $_; 520} 521 522sub entity_encoded_translation 523{ 524 my ($lang, $string) = @_; 525 526 my $translation = $translations{$lang, $string}; 527 return $string if !$translation; 528 return entity_encode ($translation); 529} 530 531## XML (bonobo-activation specific) merge code 532 533sub ba_merge_translations 534{ 535 my $source; 536 537 { 538 local $/; # slurp mode 539 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 540 $source = <INPUT>; 541 close INPUT; 542 } 543 544 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; 545 546 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 547 { 548 print OUTPUT $1; 549 550 my $node = $2 . "\n"; 551 552 my @strings = (); 553 $_ = $node; 554 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { 555 push @strings, entity_decode($3); 556 } 557 print OUTPUT; 558 559 my %langs; 560 for my $string (@strings) 561 { 562 for my $lang (keys %po_files_by_lang) 563 { 564 $langs{$lang} = 1 if $translations{$lang, $string}; 565 } 566 } 567 568 for my $lang (sort keys %langs) 569 { 570 $_ = $node; 571 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; 572 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; 573 print OUTPUT; 574 } 575 } 576 577 print OUTPUT $source; 578 579 close OUTPUT; 580} 581 582 583## XML (non-bonobo-activation) merge code 584 585 586# Process tag attributes 587# Only parameter is a HASH containing attributes -> values mapping 588sub getAttributeString 589{ 590 my $sub = shift; 591 my $do_translate = shift || 0; 592 my $language = shift || ""; 593 my $result = ""; 594 my $translate = shift; 595 foreach my $e (reverse(sort(keys %{ $sub }))) { 596 my $key = $e; 597 my $string = $sub->{$e}; 598 my $quote = '"'; 599 600 $string =~ s/^[\s]+//; 601 $string =~ s/[\s]+$//; 602 603 if ($string =~ /^'.*'$/) 604 { 605 $quote = "'"; 606 } 607 $string =~ s/^['"]//g; 608 $string =~ s/['"]$//g; 609 610 if ($do_translate && $key =~ /^_/) { 611 $key =~ s|^_||g; 612 if ($language) { 613 614 # Handle translation 615 # 616 my $decode_string = entity_decode($string); 617 my $translation = $translations{$language, $decode_string}; 618 if ($translation) { 619 $translation = entity_encode($translation); 620 $string = $translation; 621 $$translate = 2; 622 } else { 623 $$translate = 2; # we still want translations for deep nesting (FIXME: this will cause 624 # problems since we might get untranslated duplicated entries, but with xml:lang set) 625 # Fix would be to set it here to eg. 3, and do a check in traverse() to see if any of the containing tags 626 # really need translation, and only emit "translation" if there is (this means parsing same data twice) 627 } 628 } else { 629 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2 630 } 631 } 632 633 $result .= " $key=$quote$string$quote"; 634 } 635 return $result; 636} 637 638# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree 639# doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides 640# can you define the correct semantics for this?) 641# 642 643sub getXMLstring 644{ 645 my $ref = shift; 646 my @list = @{ $ref }; 647 my $result = ""; 648 649 my $count = scalar(@list); 650 my $attrs = $list[0]; 651 my $index = 1; 652 653 while ($index < $count) { 654 my $type = $list[$index]; 655 my $content = $list[$index+1]; 656 if (! $type ) { 657 # We've got CDATA 658 if ($content) { 659 # lets strip the whitespace here, and *ONLY* here 660 $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))); 661 $result .= ($content); 662 } else { 663 #print "no cdata content when expected it\n"; # is this possible, is this ok? 664 # what to do if this happens? 665 # Did I mention that I hate XML::Parser tree style? 666 } 667 } else { 668 # We've got another element 669 $result .= "<$type"; 670 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements 671 if ($content) { 672 my $subresult = getXMLstring($content); 673 if ($subresult) { 674 $result .= ">".$subresult . "</$type>"; 675 } else { 676 $result .= "/>"; 677 } 678 } else { 679 $result .= "/>"; 680 } 681 } 682 $index += 2; 683 } 684 return $result; 685} 686 687# Translate list of nodes if necessary 688sub translate_subnodes 689{ 690 my $fh = shift; 691 my $content = shift; 692 my $language = shift || ""; 693 my $singlelang = shift || 0; 694 695 my @nodes = @{ $content }; 696 697 my $count = scalar(@nodes); 698 my $index = 0; 699 while ($index < $count) { 700 my $type = $nodes[$index]; 701 my $rest = $nodes[$index+1]; 702 if ($singlelang) { 703 my $oldMO = $MULTIPLE_OUTPUT; 704 $MULTIPLE_OUTPUT = 1; 705 traverse($fh, $type, $rest, $language); 706 $MULTIPLE_OUTPUT = $oldMO; 707 } else { 708 traverse($fh, $type, $rest, $language); 709 } 710 $index += 2; 711 } 712} 713 714sub traverse 715{ 716 my $fh = shift; 717 my $nodename = shift; 718 my $content = shift; 719 my $language = shift || ""; 720 721 if (!$nodename) { 722 if ($content =~ /^[\s]*$/) { 723 $leading_space .= $content; 724 } 725 print $fh $content; 726 } else { 727 # element 728 my @all = @{ $content }; 729 my $attrs = shift @all; 730 my $translate = 0; 731 my $outattr = getAttributeString($attrs, 1, $language, \$translate); 732 733 if ($nodename =~ /^_/) { 734 $translate = 1; 735 $nodename =~ s/^_//; 736 } 737 my $lookup = ''; 738 print $fh "<$nodename", $outattr; 739 if ($translate) { 740 $lookup = getXMLstring($content); 741 if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) { 742 $lookup =~ s/^\s+//s; 743 $lookup =~ s/\s+$//s; 744 } 745 746 if ($lookup || $translate == 2) { 747 my $translation = $translations{$language, $lookup}; 748 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { 749 $translation = $lookup if (!$translation); 750 print $fh " xml:lang=\"", $language, "\"" if $language; 751 print $fh ">"; 752 if ($translate == 2) { 753 translate_subnodes($fh, \@all, $language, 1); 754 } else { 755 print $fh $translation; 756 } 757 print $fh "</$nodename>"; 758 759 return; # this means there will be no same translation with xml:lang="$language"... 760 # if we want them both, just remove this "return" 761 } else { 762 print $fh ">"; 763 if ($translate == 2) { 764 translate_subnodes($fh, \@all, $language, 1); 765 } else { 766 print $fh $lookup; 767 } 768 print $fh "</$nodename>"; 769 } 770 } else { 771 print $fh "/>"; 772 } 773 774 for my $lang (sort keys %po_files_by_lang) { 775 if ($MULTIPLE_OUTPUT && $lang ne "$language") { 776 next; 777 } 778 if ($lang) { 779 # Handle translation 780 # 781 my $translate = 0; 782 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); 783 my $translation = $translations{$lang, $lookup}; 784 if ($translate && !$translation) { 785 $translation = $lookup; 786 } 787 788 if ($translation || $translate) { 789 print $fh "\n"; 790 $leading_space =~ s/.*\n//g; 791 print $fh $leading_space; 792 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; 793 if ($translate == 2) { 794 translate_subnodes($fh, \@all, $lang, 1); 795 } else { 796 print $fh $translation; 797 } 798 print $fh "</$nodename>"; 799 } 800 } 801 } 802 803 } else { 804 my $count = scalar(@all); 805 if ($count > 0) { 806 print $fh ">"; 807 my $index = 0; 808 while ($index < $count) { 809 my $type = $all[$index]; 810 my $rest = $all[$index+1]; 811 traverse($fh, $type, $rest, $language); 812 $index += 2; 813 } 814 print $fh "</$nodename>"; 815 } else { 816 print $fh "/>"; 817 } 818 } 819 } 820} 821 822sub intltool_tree_cdatastart 823{ 824 my $expat = shift; 825 my $clist = $expat->{Curlist}; 826 my $pos = $#$clist; 827 828 push @$clist, 0 => $expat->original_string(); 829} 830 831sub intltool_tree_cdataend 832{ 833 my $expat = shift; 834 my $clist = $expat->{Curlist}; 835 my $pos = $#$clist; 836 837 $clist->[$pos] .= $expat->original_string(); 838} 839 840sub intltool_tree_char 841{ 842 my $expat = shift; 843 my $text = shift; 844 my $clist = $expat->{Curlist}; 845 my $pos = $#$clist; 846 847 # Use original_string so that we retain escaped entities 848 # in CDATA sections. 849 # 850 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 851 $clist->[$pos] .= $expat->original_string(); 852 } else { 853 push @$clist, 0 => $expat->original_string(); 854 } 855} 856 857sub intltool_tree_start 858{ 859 my $expat = shift; 860 my $tag = shift; 861 my @origlist = (); 862 863 # Use original_string so that we retain escaped entities 864 # in attribute values. We must convert the string to an 865 # @origlist array to conform to the structure of the Tree 866 # Style. 867 # 868 my @original_array = split /\x/, $expat->original_string(); 869 my $source = $expat->original_string(); 870 871 # Remove leading tag. 872 # 873 $source =~ s|^\s*<\s*(\S+)||s; 874 875 # Grab attribute key/value pairs and push onto @origlist array. 876 # 877 while ($source) 878 { 879 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) 880 { 881 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; 882 push @origlist, $1; 883 push @origlist, '"' . $2 . '"'; 884 } 885 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) 886 { 887 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; 888 push @origlist, $1; 889 push @origlist, "'" . $2 . "'"; 890 } 891 else 892 { 893 last; 894 } 895 } 896 897 my $ol = [ { @origlist } ]; 898 899 push @{ $expat->{Lists} }, $expat->{Curlist}; 900 push @{ $expat->{Curlist} }, $tag => $ol; 901 $expat->{Curlist} = $ol; 902} 903 904sub readXml 905{ 906 my $filename = shift || return; 907 if(!-f $filename) { 908 die "ERROR Cannot find filename: $filename\n"; 909 } 910 911 my $ret = eval 'require XML::Parser'; 912 if(!$ret) { 913 die "You must have XML::Parser installed to run $0\n\n"; 914 } 915 my $xp = new XML::Parser(Style => 'Tree'); 916 $xp->setHandlers(Char => \&intltool_tree_char); 917 $xp->setHandlers(Start => \&intltool_tree_start); 918 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); 919 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); 920 my $tree = $xp->parsefile($filename); 921 922# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 923# would be: 924# [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, 925# 0, "Howdy", ref, [{}]], 0, "do" ] ] 926 927 return $tree; 928} 929 930sub print_header 931{ 932 my $infile = shift; 933 my $fh = shift; 934 my $source; 935 936 if(!-f $infile) { 937 die "ERROR Cannot find filename: $infile\n"; 938 } 939 940 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; 941 { 942 local $/; 943 open DOCINPUT, "<${FILE}" or die; 944 $source = <DOCINPUT>; 945 close DOCINPUT; 946 } 947 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) 948 { 949 print $fh "$1\n"; 950 } 951 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) 952 { 953 print $fh "$1\n"; 954 } 955} 956 957sub parseTree 958{ 959 my $fh = shift; 960 my $ref = shift; 961 my $language = shift || ""; 962 963 my $name = shift @{ $ref }; 964 my $cont = shift @{ $ref }; 965 traverse($fh, $name, $cont, $language); 966} 967 968sub xml_merge_output 969{ 970 my $source; 971 972 if ($MULTIPLE_OUTPUT) { 973 for my $lang (sort keys %po_files_by_lang) { 974 if ( ! -e $lang ) { 975 mkdir $lang, 0777 or die "Cannot create subdirectory $lang: $!\n"; 976 } 977 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 978 my $tree = readXml($FILE); 979 print_header($FILE, \*OUTPUT); 980 parseTree(\*OUTPUT, $tree, $lang); 981 close OUTPUT; 982 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 983 } 984 } 985 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; 986 my $tree = readXml($FILE); 987 print_header($FILE, \*OUTPUT); 988 parseTree(\*OUTPUT, $tree); 989 close OUTPUT; 990 print "CREATED $OUTFILE\n" unless $QUIET_ARG; 991} 992 993sub keys_merge_translations 994{ 995 open INPUT, "<${FILE}" or die; 996 open OUTPUT, ">${OUTFILE}" or die; 997 998 while (<INPUT>) 999 { 1000 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1001 { 1002 my $string = $3; 1003 1004 print OUTPUT; 1005 1006 my $non_translated_line = $_; 1007 1008 for my $lang (sort keys %po_files_by_lang) 1009 { 1010 my $translation = $translations{$lang, $string}; 1011 next if !$translation; 1012 1013 $_ = $non_translated_line; 1014 s/(\w+)=.*/[$lang]$1=$translation/; 1015 print OUTPUT; 1016 } 1017 } 1018 else 1019 { 1020 print OUTPUT; 1021 } 1022 } 1023 1024 close OUTPUT; 1025 close INPUT; 1026} 1027 1028sub desktop_merge_translations 1029{ 1030 open INPUT, "<${FILE}" or die; 1031 open OUTPUT, ">${OUTFILE}" or die; 1032 1033 while (<INPUT>) 1034 { 1035 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1036 { 1037 my $string = $3; 1038 1039 print OUTPUT; 1040 1041 my $non_translated_line = $_; 1042 1043 for my $lang (sort keys %po_files_by_lang) 1044 { 1045 my $translation = $translations{$lang, $string}; 1046 next if !$translation; 1047 1048 $_ = $non_translated_line; 1049 s/(\w+)=.*/${1}[$lang]=$translation/; 1050 print OUTPUT; 1051 } 1052 } 1053 else 1054 { 1055 print OUTPUT; 1056 } 1057 } 1058 1059 close OUTPUT; 1060 close INPUT; 1061} 1062 1063sub schemas_merge_translations 1064{ 1065 my $source; 1066 1067 { 1068 local $/; # slurp mode 1069 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1070 $source = <INPUT>; 1071 close INPUT; 1072 } 1073 1074 open OUTPUT, ">$OUTFILE" or die; 1075 1076 # FIXME: support attribute translations 1077 1078 # Empty nodes never need translation, so unmark all of them. 1079 # For example, <_foo/> is just replaced by <foo/>. 1080 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; 1081 1082 while ($source =~ s/ 1083 (.*?) 1084 (\s+)(<locale\ name="C">(\s*) 1085 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) 1086 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) 1087 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) 1088 <\/locale>) 1089 //sx) 1090 { 1091 print OUTPUT $1; 1092 1093 my $locale_start_spaces = $2 ? $2 : ''; 1094 my $default_spaces = $4 ? $4 : ''; 1095 my $short_spaces = $7 ? $7 : ''; 1096 my $long_spaces = $10 ? $10 : ''; 1097 my $locale_end_spaces = $13 ? $13 : ''; 1098 my $c_default_block = $3 ? $3 : ''; 1099 my $default_string = $6 ? $6 : ''; 1100 my $short_string = $9 ? $9 : ''; 1101 my $long_string = $12 ? $12 : ''; 1102 1103 print OUTPUT "$locale_start_spaces$c_default_block"; 1104 1105 $default_string =~ s/\s+/ /g; 1106 $default_string = entity_decode($default_string); 1107 $short_string =~ s/\s+/ /g; 1108 $short_string = entity_decode($short_string); 1109 $long_string =~ s/\s+/ /g; 1110 $long_string = entity_decode($long_string); 1111 1112 for my $lang (sort keys %po_files_by_lang) 1113 { 1114 my $default_translation = $translations{$lang, $default_string}; 1115 my $short_translation = $translations{$lang, $short_string}; 1116 my $long_translation = $translations{$lang, $long_string}; 1117 1118 next if (!$default_translation && !$short_translation && 1119 !$long_translation); 1120 1121 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; 1122 1123 print OUTPUT "$default_spaces"; 1124 1125 if ($default_translation) 1126 { 1127 $default_translation = entity_encode($default_translation); 1128 print OUTPUT "<default>$default_translation</default>"; 1129 } 1130 1131 print OUTPUT "$short_spaces"; 1132 1133 if ($short_translation) 1134 { 1135 $short_translation = entity_encode($short_translation); 1136 print OUTPUT "<short>$short_translation</short>"; 1137 } 1138 1139 print OUTPUT "$long_spaces"; 1140 1141 if ($long_translation) 1142 { 1143 $long_translation = entity_encode($long_translation); 1144 print OUTPUT "<long>$long_translation</long>"; 1145 } 1146 1147 print OUTPUT "$locale_end_spaces</locale>"; 1148 } 1149 } 1150 1151 print OUTPUT $source; 1152 1153 close OUTPUT; 1154} 1155 1156sub rfc822deb_merge_translations 1157{ 1158 my %encodings = (); 1159 for my $lang (keys %po_files_by_lang) { 1160 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); 1161 } 1162 1163 my $source; 1164 1165 $Text::Wrap::huge = 'overflow'; 1166 $Text::Wrap::break = qr/\n|\s(?=\S)/; 1167 1168 { 1169 local $/; # slurp mode 1170 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1171 $source = <INPUT>; 1172 close INPUT; 1173 } 1174 1175 open OUTPUT, ">${OUTFILE}" or die; 1176 1177 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) 1178 { 1179 my $sep = $1; 1180 my $non_translated_line = $3.$4; 1181 my $string = $5; 1182 my $underscore = length($2); 1183 next if $underscore eq 0 && $non_translated_line =~ /^#/; 1184 # Remove [] dummy strings 1185 my $stripped = $string; 1186 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; 1187 $stripped =~ s/\[\s[^\[\]]*\]$//; 1188 $non_translated_line .= $stripped; 1189 1190 print OUTPUT $sep.$non_translated_line; 1191 1192 if ($underscore) 1193 { 1194 my @str_list = rfc822deb_split($underscore, $string); 1195 1196 for my $lang (sort keys %po_files_by_lang) 1197 { 1198 my $is_translated = 1; 1199 my $str_translated = ''; 1200 my $first = 1; 1201 1202 for my $str (@str_list) 1203 { 1204 my $translation = $translations{$lang, $str}; 1205 1206 if (!$translation) 1207 { 1208 $is_translated = 0; 1209 last; 1210 } 1211 1212 # $translation may also contain [] dummy 1213 # strings, mostly to indicate an empty string 1214 $translation =~ s/\[\s[^\[\]]*\]$//; 1215 1216 if ($first) 1217 { 1218 if ($underscore eq 2) 1219 { 1220 $str_translated .= $translation; 1221 } 1222 else 1223 { 1224 $str_translated .= 1225 Text::Tabs::expand($translation) . 1226 "\n"; 1227 } 1228 } 1229 else 1230 { 1231 if ($underscore eq 2) 1232 { 1233 $str_translated .= ', ' . $translation; 1234 } 1235 else 1236 { 1237 $str_translated .= Text::Tabs::expand( 1238 Text::Wrap::wrap(' ', ' ', $translation)) . 1239 "\n .\n"; 1240 } 1241 } 1242 $first = 0; 1243 1244 # To fix some problems with Text::Wrap::wrap 1245 $str_translated =~ s/(\n )+\n/\n .\n/g; 1246 } 1247 next unless $is_translated; 1248 1249 $str_translated =~ s/\n \.\n$//; 1250 $str_translated =~ s/\s+$//; 1251 1252 $_ = $non_translated_line; 1253 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; 1254 print OUTPUT; 1255 } 1256 } 1257 } 1258 print OUTPUT "\n"; 1259 1260 close OUTPUT; 1261 close INPUT; 1262} 1263 1264sub rfc822deb_split 1265{ 1266 # Debian defines a special way to deal with rfc822-style files: 1267 # when a value contain newlines, it consists of 1268 # 1. a short form (first line) 1269 # 2. a long description, all lines begin with a space, 1270 # and paragraphs are separated by a single dot on a line 1271 # This routine returns an array of all paragraphs, and reformat 1272 # them. 1273 # When first argument is 2, the string is a comma separated list of 1274 # values. 1275 my $type = shift; 1276 my $text = shift; 1277 $text =~ s/^[ \t]//mg; 1278 return (split(/, */, $text, 0)) if $type ne 1; 1279 return ($text) if $text !~ /\n/; 1280 1281 $text =~ s/([^\n]*)\n//; 1282 my @list = ($1); 1283 my $str = ''; 1284 1285 for my $line (split (/\n/, $text)) 1286 { 1287 chomp $line; 1288 if ($line =~ /^\.\s*$/) 1289 { 1290 # New paragraph 1291 $str =~ s/\s*$//; 1292 push(@list, $str); 1293 $str = ''; 1294 } 1295 elsif ($line =~ /^\s/) 1296 { 1297 # Line which must not be reformatted 1298 $str .= "\n" if length ($str) && $str !~ /\n$/; 1299 $line =~ s/\s+$//; 1300 $str .= $line."\n"; 1301 } 1302 else 1303 { 1304 # Continuation line, remove newline 1305 $str .= " " if length ($str) && $str !~ /\n$/; 1306 $str .= $line; 1307 } 1308 } 1309 1310 $str =~ s/\s*$//; 1311 push(@list, $str) if length ($str); 1312 1313 return @list; 1314} 1315 1316