1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.36.2";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUOTED_STYLE_ARG = 0;
64my $QUIET_ARG = 0;
65my $PASS_THROUGH_ARG = 0;
66my $UTF8_ARG = 0;
67my $MULTIPLE_OUTPUT = 0;
68my $cache_file;
69
70## Handle options
71GetOptions
72(
73 "help" => \$HELP_ARG,
74 "version" => \$VERSION_ARG,
75 "quiet|q" => \$QUIET_ARG,
76 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
77 "ba-style|b" => \$BA_STYLE_ARG,
78 "xml-style|x" => \$XML_STYLE_ARG,
79 "keys-style|k" => \$KEYS_STYLE_ARG,
80 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
81 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
82 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
83 "quoted-style" => \$QUOTED_STYLE_ARG,
84 "pass-through|p" => \$PASS_THROUGH_ARG,
85 "utf8|u" => \$UTF8_ARG,
86 "multiple-output|m" => \$MULTIPLE_OUTPUT,
87 "cache|c=s" => \$cache_file
88 ) or &error;
89
90my $PO_DIR;
91my $FILE;
92my $OUTFILE;
93
94my %po_files_by_lang = ();
95my %translations = ();
96my $iconv = $ENV{"ICONV"} || "iconv";
97my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
98
99sub isProgramInPath
100{
101    my ($file) = @_;
102    # If either a file exists, or when run it returns 0 exit status
103    return 1 if ((-x $file) or (system("$file -l >$devnull") == 0));
104    return 0;
105}
106
107if (! isProgramInPath ("$iconv"))
108{
109	print STDERR " *** iconv is not found on this system!\n".
110		     " *** Without it, intltool-merge can not convert encodings.\n";
111	exit;
112}
113
114# Use this instead of \w for XML files to handle more possible characters.
115my $w = "[-A-Za-z0-9._:]";
116
117# XML quoted string contents
118my $q = "[^\\\"]*";
119
120## Check for options.
121
122if ($VERSION_ARG)
123{
124	&print_version;
125}
126elsif ($HELP_ARG)
127{
128	&print_help;
129}
130elsif ($BA_STYLE_ARG && @ARGV > 2)
131{
132	&utf8_sanity_check;
133	&preparation;
134	&print_message;
135	&ba_merge_translations;
136	&finalize;
137}
138elsif ($XML_STYLE_ARG && @ARGV > 2)
139{
140	&utf8_sanity_check;
141	&preparation;
142	&print_message;
143	&xml_merge_output;
144	&finalize;
145}
146elsif ($KEYS_STYLE_ARG && @ARGV > 2)
147{
148	&utf8_sanity_check;
149	&preparation;
150	&print_message;
151	&keys_merge_translations;
152	&finalize;
153}
154elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
155{
156	&utf8_sanity_check;
157	&preparation;
158	&print_message;
159	&desktop_merge_translations;
160	&finalize;
161}
162elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
163{
164	&utf8_sanity_check;
165	&preparation;
166	&print_message;
167	&schemas_merge_translations;
168	&finalize;
169}
170elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
171{
172	&preparation;
173	&print_message;
174	&rfc822deb_merge_translations;
175	&finalize;
176}
177elsif ($QUOTED_STYLE_ARG && @ARGV > 2)
178{
179	&utf8_sanity_check;
180	&preparation;
181	&print_message;
182	&quoted_merge_translations;
183	&finalize;
184}
185else
186{
187	&print_help;
188}
189
190exit;
191
192## Sub for printing release information
193sub print_version
194{
195    print <<_EOF_;
196${PROGRAM} (${PACKAGE}) ${VERSION}
197Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
198
199Copyright (C) 2000-2003 Free Software Foundation, Inc.
200Copyright (C) 2000-2001 Eazel, Inc.
201This is free software; see the source for copying conditions.  There is NO
202warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
203_EOF_
204    exit;
205}
206
207## Sub for printing usage information
208sub print_help
209{
210    print <<_EOF_;
211Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
212Generates an output file that includes some localized attributes from an
213untranslated source file.
214
215Mandatory options: (exactly one must be specified)
216  -b, --ba-style         includes translations in the bonobo-activation style
217  -d, --desktop-style    includes translations in the desktop style
218  -k, --keys-style       includes translations in the keys style
219  -s, --schemas-style    includes translations in the schemas style
220  -r, --rfc822deb-style  includes translations in the RFC822 style
221      --quoted-style     includes translations in the quoted string style
222  -x, --xml-style        includes translations in the standard xml style
223
224Other options:
225  -u, --utf8             convert all strings to UTF-8 before merging 
226                         (default for everything except RFC822 style)
227  -p, --pass-through     deprecated, does nothing and issues a warning
228  -m, --multiple-output  output one localized file per locale, instead of 
229	                 a single file containing all localized elements
230  -c, --cache=FILE       specify cache file name
231                         (usually \$top_builddir/po/.intltool-merge-cache)
232  -q, --quiet            suppress most messages
233      --help             display this help and exit
234      --version          output version information and exit
235
236Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
237or send email to <xml-i18n-tools\@gnome.org>.
238_EOF_
239    exit;
240}
241
242
243## Sub for printing error messages
244sub print_error
245{
246    print STDERR "Try `${PROGRAM} --help' for more information.\n";
247    exit;
248}
249
250
251sub print_message
252{
253    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
254}
255
256
257sub preparation
258{
259    $PO_DIR = $ARGV[0];
260    $FILE = $ARGV[1];
261    $OUTFILE = $ARGV[2];
262
263    &gather_po_files;
264    &get_translation_database;
265}
266
267# General-purpose code for looking up translations in .po files
268
269sub po_file2lang
270{
271    my ($tmp) = @_;
272    $tmp =~ s/^.*\/(.*)\.po$/$1/;
273    return $tmp;
274}
275
276sub gather_po_files
277{
278    if (my $linguas = $ENV{"LINGUAS"})
279    {
280        for my $lang (split / /, $linguas) {
281            my $po_file = $PO_DIR . "/" . $lang . ".po";
282            if (-e $po_file) {
283                $po_files_by_lang{$lang} = $po_file;
284            }
285        }
286    }
287    else
288    {
289        if (open LINGUAS_FILE, "$PO_DIR/LINGUAS")
290        {
291            while (<LINGUAS_FILE>)
292            {
293                next if /^#/;
294
295                if (/([-a-zA-Z_@.]+)\n/)
296                {
297                    my $lang = $1;
298
299                    my $po_file = $PO_DIR . "/" . $lang . ".po";
300                    if (-e $po_file) {
301                        $po_files_by_lang{$lang} = $po_file;
302                    }
303                }
304            }
305
306            close LINGUAS_FILE;
307        }
308        else
309        {
310            for my $po_file (glob "$PO_DIR/*.po") {
311                $po_files_by_lang{po_file2lang($po_file)} = $po_file;
312            }
313        }
314    }
315}
316
317sub get_local_charset
318{
319    my ($encoding) = @_;
320    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";
321
322    # seek character encoding aliases in charset.alias (glib)
323
324    if (open CHARSET_ALIAS, $alias_file)
325    {
326	while (<CHARSET_ALIAS>)
327        {
328            next if /^\#/;
329            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
330        }
331
332        close CHARSET_ALIAS;
333    }
334
335    # if not found, return input string
336
337    return $encoding;
338}
339
340sub get_po_encoding
341{
342    my ($in_po_file) = @_;
343    my $encoding = "";
344
345    open IN_PO_FILE, $in_po_file or die;
346    while (<IN_PO_FILE>)
347    {
348        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
349        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
350        {
351            $encoding = $1;
352            last;
353        }
354    }
355    close IN_PO_FILE;
356
357    if (!$encoding)
358    {
359        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
360        $encoding = "ISO-8859-1";
361    }
362
363    system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
364    if ($?) {
365	$encoding = get_local_charset($encoding);
366    }
367
368    return $encoding
369}
370
371sub utf8_sanity_check
372{
373    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
374    $UTF8_ARG = 1;
375}
376
377sub get_translation_database
378{
379    if ($cache_file) {
380	&get_cached_translation_database;
381    } else {
382        &create_translation_database;
383    }
384}
385
386sub get_newest_po_age
387{
388    my $newest_age;
389
390    foreach my $file (values %po_files_by_lang)
391    {
392	my $file_age = -M $file;
393	$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
394    }
395
396    $newest_age = 0 if !$newest_age;
397
398    return $newest_age;
399}
400
401sub create_cache
402{
403    print "Generating and caching the translation database\n" unless $QUIET_ARG;
404
405    &create_translation_database;
406
407    open CACHE, ">$cache_file" || die;
408    print CACHE join "\x01", %translations;
409    close CACHE;
410}
411
412sub load_cache
413{
414    print "Found cached translation database\n" unless $QUIET_ARG;
415
416    my $contents;
417    open CACHE, "<$cache_file" || die;
418    {
419        local $/;
420        $contents = <CACHE>;
421    }
422    close CACHE;
423    %translations = split "\x01", $contents;
424}
425
426sub get_cached_translation_database
427{
428    my $cache_file_age = -M $cache_file;
429    if (defined $cache_file_age)
430    {
431        if ($cache_file_age <= &get_newest_po_age)
432        {
433            &load_cache;
434            return;
435        }
436        print "Found too-old cached translation database\n" unless $QUIET_ARG;
437    }
438
439    &create_cache;
440}
441
442sub create_translation_database
443{
444    for my $lang (keys %po_files_by_lang)
445    {
446    	my $po_file = $po_files_by_lang{$lang};
447
448        if ($UTF8_ARG)
449        {
450            my $encoding = get_po_encoding ($po_file);
451
452            if (lc $encoding eq "utf-8")
453            {
454                open PO_FILE, "<$po_file";
455            }
456            else
457            {
458		print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
459
460                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
461            }
462        }
463        else
464        {
465            open PO_FILE, "<$po_file";
466        }
467
468	my $nextfuzzy = 0;
469	my $inmsgid = 0;
470	my $inmsgstr = 0;
471	my $msgid = "";
472	my $msgstr = "";
473
474        while (<PO_FILE>)
475        {
476	    $nextfuzzy = 1 if /^#, fuzzy/;
477
478	    if (/^msgid "((\\.|[^\\]+)*)"/ )
479            {
480		$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
481		$msgid = "";
482		$msgstr = "";
483
484		if ($nextfuzzy) {
485		    $inmsgid = 0;
486		} else {
487		    $msgid = unescape_po_string($1);
488		    $inmsgid = 1;
489		}
490		$inmsgstr = 0;
491		$nextfuzzy = 0;
492	    }
493
494	    if (/^msgstr "((\\.|[^\\]+)*)"/)
495            {
496	        $msgstr = unescape_po_string($1);
497		$inmsgstr = 1;
498		$inmsgid = 0;
499	    }
500
501	    if (/^"((\\.|[^\\]+)*)"/)
502            {
503	        $msgid .= unescape_po_string($1) if $inmsgid;
504	        $msgstr .= unescape_po_string($1) if $inmsgstr;
505	    }
506	}
507	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
508    }
509}
510
511sub finalize
512{
513}
514
515sub unescape_one_sequence
516{
517    my ($sequence) = @_;
518
519    return "\\" if $sequence eq "\\\\";
520    return "\"" if $sequence eq "\\\"";
521    return "\n" if $sequence eq "\\n";
522    return "\r" if $sequence eq "\\r";
523    return "\t" if $sequence eq "\\t";
524    return "\b" if $sequence eq "\\b";
525    return "\f" if $sequence eq "\\f";
526    return "\a" if $sequence eq "\\a";
527    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
528
529    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
530    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
531
532    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
533
534    return $sequence;
535}
536
537sub unescape_po_string
538{
539    my ($string) = @_;
540
541    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
542
543    return $string;
544}
545
546sub entity_decode
547{
548    local ($_) = @_;
549
550    s/&apos;/'/g; # '
551    s/&quot;/"/g; # "
552    s/&lt;/</g;
553    s/&gt;/>/g;
554    s/&amp;/&/g;
555
556    return $_;
557}
558
559# entity_encode: (string)
560#
561# Encode the given string to XML format (encode '<' etc).
562
563sub entity_encode
564{
565    my ($pre_encoded) = @_;
566
567    my @list_of_chars = unpack ('C*', $pre_encoded);
568
569    # with UTF-8 we only encode minimalistic
570    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
571}
572
573sub entity_encode_int_minimalist
574{
575    return "&quot;" if $_ == 34;
576    return "&amp;" if $_ == 38;
577    return "&apos;" if $_ == 39;
578    return "&lt;" if $_ == 60;
579    return chr $_;
580}
581
582sub entity_encoded_translation
583{
584    my ($lang, $string) = @_;
585
586    my $translation = $translations{$lang, $string};
587    return $string if !$translation;
588    return entity_encode ($translation);
589}
590
591## XML (bonobo-activation specific) merge code
592
593sub ba_merge_translations
594{
595    my $source;
596
597    {
598       local $/; # slurp mode
599       open INPUT, "<$FILE" or die "can't open $FILE: $!";
600       $source = <INPUT>;
601       close INPUT;
602    }
603
604    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
605    # Binmode so that selftest works ok if using a native Win32 Perl...
606    binmode (OUTPUT) if $^O eq 'MSWin32';
607
608    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
609    {
610        print OUTPUT $1;
611
612        my $node = $2 . "\n";
613
614        my @strings = ();
615        $_ = $node;
616	while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
617             push @strings, entity_decode($3);
618        }
619	print OUTPUT;
620
621	my %langs;
622	for my $string (@strings)
623        {
624	    for my $lang (keys %po_files_by_lang)
625            {
626                $langs{$lang} = 1 if $translations{$lang, $string};
627	    }
628	}
629
630	for my $lang (sort keys %langs)
631        {
632	    $_ = $node;
633	    s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
634	    s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
635	    print OUTPUT;
636        }
637    }
638
639    print OUTPUT $source;
640
641    close OUTPUT;
642}
643
644
645## XML (non-bonobo-activation) merge code
646
647
648# Process tag attributes
649#   Only parameter is a HASH containing attributes -> values mapping
650sub getAttributeString
651{
652    my $sub = shift;
653    my $do_translate = shift || 0;
654    my $language = shift || "";
655    my $result = "";
656    my $translate = shift;
657    foreach my $e (reverse(sort(keys %{ $sub }))) {
658	my $key    = $e;
659	my $string = $sub->{$e};
660	my $quote = '"';
661
662	$string =~ s/^[\s]+//;
663	$string =~ s/[\s]+$//;
664
665	if ($string =~ /^'.*'$/)
666	{
667	    $quote = "'";
668	}
669	$string =~ s/^['"]//g;
670	$string =~ s/['"]$//g;
671
672	if ($do_translate && $key =~ /^_/) {
673	    $key =~ s|^_||g;
674	    if ($language) {
675		# Handle translation
676		my $decode_string = entity_decode($string);
677		my $translation = $translations{$language, $decode_string};
678		if ($translation) {
679		    $translation = entity_encode($translation);
680		    $string = $translation;
681                }
682                $$translate = 2;
683            } else {
684                 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
685            }
686	}
687
688	$result .= " $key=$quote$string$quote";
689    }
690    return $result;
691}
692
693# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
694sub getXMLstring
695{
696    my $ref = shift;
697    my $spacepreserve = shift || 0;
698    my @list = @{ $ref };
699    my $result = "";
700
701    my $count = scalar(@list);
702    my $attrs = $list[0];
703    my $index = 1;
704
705    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
706    $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
707
708    while ($index < $count) {
709	my $type = $list[$index];
710	my $content = $list[$index+1];
711        if (! $type ) {
712	    # We've got CDATA
713	    if ($content) {
714		# lets strip the whitespace here, and *ONLY* here
715                $content =~ s/\s+/ /gs if (!$spacepreserve);
716		$result .= $content;
717	    }
718	} elsif ( "$type" ne "1" ) {
719	    # We've got another element
720	    $result .= "<$type";
721	    $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
722	    if ($content) {
723		my $subresult = getXMLstring($content, $spacepreserve);
724		if ($subresult) {
725		    $result .= ">".$subresult . "</$type>";
726		} else {
727		    $result .= "/>";
728		}
729	    } else {
730		$result .= "/>";
731	    }
732	}
733	$index += 2;
734    }
735    return $result;
736}
737
738# Translate list of nodes if necessary
739sub translate_subnodes
740{
741    my $fh = shift;
742    my $content = shift;
743    my $language = shift || "";
744    my $singlelang = shift || 0;
745    my $spacepreserve = shift || 0;
746
747    my @nodes = @{ $content };
748
749    my $count = scalar(@nodes);
750    my $index = 0;
751    while ($index < $count) {
752        my $type = $nodes[$index];
753        my $rest = $nodes[$index+1];
754        if ($singlelang) {
755            my $oldMO = $MULTIPLE_OUTPUT;
756            $MULTIPLE_OUTPUT = 1;
757            traverse($fh, $type, $rest, $language, $spacepreserve);
758            $MULTIPLE_OUTPUT = $oldMO;
759        } else {
760            traverse($fh, $type, $rest, $language, $spacepreserve);
761        }
762        $index += 2;
763    }
764}
765
766sub isWellFormedXmlFragment
767{
768    my $ret = eval 'require XML::Parser';
769    if(!$ret) {
770        die "You must have XML::Parser installed to run $0\n\n";
771    }
772
773    my $fragment = shift;
774    return 0 if (!$fragment);
775
776    $fragment = "<root>$fragment</root>";
777    my $xp = new XML::Parser(Style => 'Tree');
778    my $tree = 0;
779    eval { $tree = $xp->parse($fragment); };
780    return $tree;
781}
782
783sub traverse
784{
785    my $fh = shift;
786    my $nodename = shift;
787    my $content = shift;
788    my $language = shift || "";
789    my $spacepreserve = shift || 0;
790
791    if (!$nodename) {
792	if ($content =~ /^[\s]*$/) {
793	    $leading_space .= $content;
794	}
795	print $fh $content;
796    } else {
797	# element
798	my @all = @{ $content };
799	my $attrs = shift @all;
800	my $translate = 0;
801	my $outattr = getAttributeString($attrs, 1, $language, \$translate);
802
803	if ($nodename =~ /^_/) {
804	    $translate = 1;
805	    $nodename =~ s/^_//;
806	}
807	my $lookup = '';
808
809        $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
810        $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
811
812	print $fh "<$nodename", $outattr;
813	if ($translate) {
814	    $lookup = getXMLstring($content, $spacepreserve);
815            if (!$spacepreserve) {
816                $lookup =~ s/^\s+//s;
817                $lookup =~ s/\s+$//s;
818            }
819
820	    if ($lookup || $translate == 2) {
821                my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
822                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
823                    $translation = $lookup if (!$translation);
824                    print $fh " xml:lang=\"", $language, "\"" if $language;
825                    print $fh ">";
826                    if ($translate == 2) {
827                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
828                    } else {
829                        print $fh $translation;
830                    }
831                    print $fh "</$nodename>";
832
833                    return; # this means there will be no same translation with xml:lang="$language"...
834                            # if we want them both, just remove this "return"
835                } else {
836                    print $fh ">";
837                    if ($translate == 2) {
838                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
839                    } else {
840                        print $fh $lookup;
841                    }
842                    print $fh "</$nodename>";
843                }
844	    } else {
845		print $fh "/>";
846	    }
847
848	    for my $lang (sort keys %po_files_by_lang) {
849                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
850                        next;
851                    }
852		    if ($lang) {
853                        # Handle translation
854                        #
855                        my $translate = 0;
856                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
857                        my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
858                        if ($translate && !$translation) {
859                            $translation = $lookup;
860                        }
861
862                        if ($translation || $translate) {
863			    print $fh "\n";
864			    $leading_space =~ s/.*\n//g;
865			    print $fh $leading_space;
866 			    print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
867                            if ($translate == 2) {
868                               translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
869                            } else {
870                                print $fh $translation;
871                            }
872                            print $fh "</$nodename>";
873			}
874                    }
875	    }
876
877	} else {
878	    my $count = scalar(@all);
879	    if ($count > 0) {
880		print $fh ">";
881                my $index = 0;
882                while ($index < $count) {
883                    my $type = $all[$index];
884                    my $rest = $all[$index+1];
885                    traverse($fh, $type, $rest, $language, $spacepreserve);
886                    $index += 2;
887                }
888		print $fh "</$nodename>";
889	    } else {
890		print $fh "/>";
891	    }
892	}
893    }
894}
895
896sub intltool_tree_comment
897{
898    my $expat = shift;
899    my $data  = shift;
900    my $clist = $expat->{Curlist};
901    my $pos   = $#$clist;
902
903    push @$clist, 1 => $data;
904}
905
906sub intltool_tree_cdatastart
907{
908    my $expat    = shift;
909    my $clist = $expat->{Curlist};
910    my $pos   = $#$clist;
911
912    push @$clist, 0 => $expat->original_string();
913}
914
915sub intltool_tree_cdataend
916{
917    my $expat    = shift;
918    my $clist = $expat->{Curlist};
919    my $pos   = $#$clist;
920
921    $clist->[$pos] .= $expat->original_string();
922}
923
924sub intltool_tree_char
925{
926    my $expat = shift;
927    my $text  = shift;
928    my $clist = $expat->{Curlist};
929    my $pos   = $#$clist;
930
931    # Use original_string so that we retain escaped entities
932    # in CDATA sections.
933    #
934    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
935        $clist->[$pos] .= $expat->original_string();
936    } else {
937        push @$clist, 0 => $expat->original_string();
938    }
939}
940
941sub intltool_tree_start
942{
943    my $expat    = shift;
944    my $tag      = shift;
945    my @origlist = ();
946
947    # Use original_string so that we retain escaped entities
948    # in attribute values.  We must convert the string to an
949    # @origlist array to conform to the structure of the Tree
950    # Style.
951    #
952    my @original_array = split /\x/, $expat->original_string();
953    my $source         = $expat->original_string();
954
955    # Remove leading tag.
956    #
957    $source =~ s|^\s*<\s*(\S+)||s;
958
959    # Grab attribute key/value pairs and push onto @origlist array.
960    #
961    while ($source)
962    {
963       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
964       {
965           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
966           push @origlist, $1;
967           push @origlist, '"' . $2 . '"';
968       }
969       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
970       {
971           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
972           push @origlist, $1;
973           push @origlist, "'" . $2 . "'";
974       }
975       else
976       {
977           last;
978       }
979    }
980
981    my $ol = [ { @origlist } ];
982
983    push @{ $expat->{Lists} }, $expat->{Curlist};
984    push @{ $expat->{Curlist} }, $tag => $ol;
985    $expat->{Curlist} = $ol;
986}
987
988sub readXml
989{
990    my $filename = shift || return;
991    if(!-f $filename) {
992        die "ERROR Cannot find filename: $filename\n";
993    }
994
995    my $ret = eval 'require XML::Parser';
996    if(!$ret) {
997        die "You must have XML::Parser installed to run $0\n\n";
998    }
999    my $xp = new XML::Parser(Style => 'Tree');
1000    $xp->setHandlers(Char => \&intltool_tree_char);
1001    $xp->setHandlers(Start => \&intltool_tree_start);
1002    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
1003    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
1004    my $tree = $xp->parsefile($filename);
1005
1006# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
1007# would be:
1008# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
1009# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
1010
1011    return $tree;
1012}
1013
1014sub print_header
1015{
1016    my $infile = shift;
1017    my $fh = shift;
1018    my $source;
1019
1020    if(!-f $infile) {
1021        die "ERROR Cannot find filename: $infile\n";
1022    }
1023
1024    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
1025    {
1026        local $/;
1027        open DOCINPUT, "<${FILE}" or die;
1028        $source = <DOCINPUT>;
1029        close DOCINPUT;
1030    }
1031    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
1032    {
1033        print $fh "$1\n";
1034    }
1035    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
1036    {
1037        print $fh "$1\n";
1038    }
1039}
1040
1041sub parseTree
1042{
1043    my $fh        = shift;
1044    my $ref       = shift;
1045    my $language  = shift || "";
1046
1047    my $name = shift @{ $ref };
1048    my $cont = shift @{ $ref };
1049
1050    while (!$name || "$name" eq "1") {
1051        $name = shift @{ $ref };
1052        $cont = shift @{ $ref };
1053    }
1054
1055    my $spacepreserve = 0;
1056    my $attrs = @{$cont}[0];
1057    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
1058
1059    traverse($fh, $name, $cont, $language, $spacepreserve);
1060}
1061
1062sub xml_merge_output
1063{
1064    my $source;
1065
1066    if ($MULTIPLE_OUTPUT) {
1067        for my $lang (sort keys %po_files_by_lang) {
1068	    if ( ! -d $lang ) {
1069	        mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1070            }
1071            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1072            binmode (OUTPUT) if $^O eq 'MSWin32';
1073            my $tree = readXml($FILE);
1074            print_header($FILE, \*OUTPUT);
1075            parseTree(\*OUTPUT, $tree, $lang);
1076            close OUTPUT;
1077            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1078        }
1079    }
1080    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1081    binmode (OUTPUT) if $^O eq 'MSWin32';
1082    my $tree = readXml($FILE);
1083    print_header($FILE, \*OUTPUT);
1084    parseTree(\*OUTPUT, $tree);
1085    close OUTPUT;
1086    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1087}
1088
1089sub keys_merge_translations
1090{
1091    open INPUT, "<${FILE}" or die;
1092    open OUTPUT, ">${OUTFILE}" or die;
1093    binmode (OUTPUT) if $^O eq 'MSWin32';
1094
1095    while (<INPUT>)
1096    {
1097        if (s/^(\s*)_(\w+=(.*))/$1$2/)
1098        {
1099	    my $string = $3;
1100
1101            print OUTPUT;
1102
1103	    my $non_translated_line = $_;
1104
1105            for my $lang (sort keys %po_files_by_lang)
1106            {
1107		my $translation = $translations{$lang, $string};
1108                next if !$translation;
1109
1110                $_ = $non_translated_line;
1111		s/(\w+)=.*/[$lang]$1=$translation/;
1112                print OUTPUT;
1113            }
1114	}
1115        else
1116        {
1117            print OUTPUT;
1118        }
1119    }
1120
1121    close OUTPUT;
1122    close INPUT;
1123}
1124
1125sub desktop_merge_translations
1126{
1127    open INPUT, "<${FILE}" or die;
1128    open OUTPUT, ">${OUTFILE}" or die;
1129    binmode (OUTPUT) if $^O eq 'MSWin32';
1130
1131    while (<INPUT>)
1132    {
1133        if (s/^(\s*)_(\w+=(.*))/$1$2/)
1134        {
1135	    my $string = $3;
1136
1137            print OUTPUT;
1138
1139	    my $non_translated_line = $_;
1140
1141            for my $lang (sort keys %po_files_by_lang)
1142            {
1143                my $translation = $translations{$lang, $string};
1144                next if !$translation;
1145
1146                $_ = $non_translated_line;
1147                s/(\w+)=.*/${1}[$lang]=$translation/;
1148                print OUTPUT;
1149            }
1150	}
1151        else
1152        {
1153            print OUTPUT;
1154        }
1155    }
1156
1157    close OUTPUT;
1158    close INPUT;
1159}
1160
1161sub schemas_merge_translations
1162{
1163    my $source;
1164
1165    {
1166       local $/; # slurp mode
1167       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1168       $source = <INPUT>;
1169       close INPUT;
1170    }
1171
1172    open OUTPUT, ">$OUTFILE" or die;
1173    binmode (OUTPUT) if $^O eq 'MSWin32';
1174
1175    # FIXME: support attribute translations
1176
1177    # Empty nodes never need translation, so unmark all of them.
1178    # For example, <_foo/> is just replaced by <foo/>.
1179    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1180
1181    while ($source =~ s/
1182                        (.*?)
1183                        (\s+)(<locale\ name="C">(\s*)
1184                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1185                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1186                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1187                        <\/locale>)
1188                       //sx)
1189    {
1190        print OUTPUT $1;
1191
1192	my $locale_start_spaces = $2 ? $2 : '';
1193	my $default_spaces = $4 ? $4 : '';
1194	my $short_spaces = $7 ? $7 : '';
1195	my $long_spaces = $10 ? $10 : '';
1196	my $locale_end_spaces = $13 ? $13 : '';
1197	my $c_default_block = $3 ? $3 : '';
1198	my $default_string = $6 ? $6 : '';
1199	my $short_string = $9 ? $9 : '';
1200	my $long_string = $12 ? $12 : '';
1201
1202	print OUTPUT "$locale_start_spaces$c_default_block";
1203
1204        $default_string =~ s/\s+/ /g;
1205        $default_string = entity_decode($default_string);
1206	$short_string =~ s/\s+/ /g;
1207	$short_string = entity_decode($short_string);
1208	$long_string =~ s/\s+/ /g;
1209	$long_string = entity_decode($long_string);
1210
1211	for my $lang (sort keys %po_files_by_lang)
1212        {
1213	    my $default_translation = $translations{$lang, $default_string};
1214	    my $short_translation = $translations{$lang, $short_string};
1215	    my $long_translation  = $translations{$lang, $long_string};
1216
1217	    next if (!$default_translation && !$short_translation &&
1218                     !$long_translation);
1219
1220	    print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1221
1222        print OUTPUT "$default_spaces";
1223
1224        if ($default_translation)
1225        {
1226            $default_translation = entity_encode($default_translation);
1227            print OUTPUT "<default>$default_translation</default>";
1228        }
1229
1230	    print OUTPUT "$short_spaces";
1231
1232	    if ($short_translation)
1233	    {
1234			$short_translation = entity_encode($short_translation);
1235			print OUTPUT "<short>$short_translation</short>";
1236	    }
1237
1238	    print OUTPUT "$long_spaces";
1239
1240	    if ($long_translation)
1241	    {
1242			$long_translation = entity_encode($long_translation);
1243			print OUTPUT "<long>$long_translation</long>";
1244	    }
1245
1246	    print OUTPUT "$locale_end_spaces</locale>";
1247        }
1248    }
1249
1250    print OUTPUT $source;
1251
1252    close OUTPUT;
1253}
1254
1255sub rfc822deb_merge_translations
1256{
1257    my %encodings = ();
1258    for my $lang (keys %po_files_by_lang) {
1259        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1260    }
1261
1262    my $source;
1263
1264    $Text::Wrap::huge = 'overflow';
1265    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1266
1267    {
1268       local $/; # slurp mode
1269       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1270       $source = <INPUT>;
1271       close INPUT;
1272    }
1273
1274    open OUTPUT, ">${OUTFILE}" or die;
1275    binmode (OUTPUT) if $^O eq 'MSWin32';
1276
1277    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1278    {
1279	    my $sep = $1;
1280	    my $non_translated_line = $3.$4;
1281	    my $string = $5;
1282	    my $underscore = length($2);
1283	    next if $underscore eq 0 && $non_translated_line =~ /^#/;
1284	    #  Remove [] dummy strings
1285	    my $stripped = $string;
1286	    $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1287	    $stripped =~ s/\[\s[^\[\]]*\]$//;
1288	    $non_translated_line .= $stripped;
1289
1290	    print OUTPUT $sep.$non_translated_line;
1291
1292	    if ($underscore)
1293	    {
1294	        my @str_list = rfc822deb_split($underscore, $string);
1295
1296	        for my $lang (sort keys %po_files_by_lang)
1297                {
1298                    my $is_translated = 1;
1299                    my $str_translated = '';
1300                    my $first = 1;
1301
1302                    for my $str (@str_list)
1303                    {
1304                        my $translation = $translations{$lang, $str};
1305
1306                        if (!$translation)
1307                        {
1308                            $is_translated = 0;
1309                            last;
1310                        }
1311
1312	                #  $translation may also contain [] dummy
1313                        #  strings, mostly to indicate an empty string
1314	                $translation =~ s/\[\s[^\[\]]*\]$//;
1315
1316                        if ($first)
1317                        {
1318                            if ($underscore eq 2)
1319                            {
1320                                $str_translated .= $translation;
1321                            }
1322                            else
1323                            {
1324                                $str_translated .=
1325                                    Text::Tabs::expand($translation) .
1326                                    "\n";
1327                            }
1328                        }
1329                        else
1330                        {
1331                            if ($underscore eq 2)
1332                            {
1333                                $str_translated .= ', ' . $translation;
1334                            }
1335                            else
1336                            {
1337                                $str_translated .= Text::Tabs::expand(
1338                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1339                                    "\n .\n";
1340                            }
1341                        }
1342                        $first = 0;
1343
1344                        #  To fix some problems with Text::Wrap::wrap
1345                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1346                    }
1347                    next unless $is_translated;
1348
1349                    $str_translated =~ s/\n \.\n$//;
1350                    $str_translated =~ s/\s+$//;
1351
1352                    $_ = $non_translated_line;
1353                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1354                    print OUTPUT;
1355                }
1356	    }
1357    }
1358    print OUTPUT "\n";
1359
1360    close OUTPUT;
1361    close INPUT;
1362}
1363
1364sub rfc822deb_split
1365{
1366    # Debian defines a special way to deal with rfc822-style files:
1367    # when a value contain newlines, it consists of
1368    #   1.  a short form (first line)
1369    #   2.  a long description, all lines begin with a space,
1370    #       and paragraphs are separated by a single dot on a line
1371    # This routine returns an array of all paragraphs, and reformat
1372    # them.
1373    # When first argument is 2, the string is a comma separated list of
1374    # values.
1375    my $type = shift;
1376    my $text = shift;
1377    $text =~ s/^[ \t]//mg;
1378    return (split(/, */, $text, 0)) if $type ne 1;
1379    return ($text) if $text !~ /\n/;
1380
1381    $text =~ s/([^\n]*)\n//;
1382    my @list = ($1);
1383    my $str = '';
1384
1385    for my $line (split (/\n/, $text))
1386    {
1387        chomp $line;
1388        if ($line =~ /^\.\s*$/)
1389        {
1390            #  New paragraph
1391            $str =~ s/\s*$//;
1392            push(@list, $str);
1393            $str = '';
1394        }
1395        elsif ($line =~ /^\s/)
1396        {
1397            #  Line which must not be reformatted
1398            $str .= "\n" if length ($str) && $str !~ /\n$/;
1399            $line =~ s/\s+$//;
1400            $str .= $line."\n";
1401        }
1402        else
1403        {
1404            #  Continuation line, remove newline
1405            $str .= " " if length ($str) && $str !~ /\n$/;
1406            $str .= $line;
1407        }
1408    }
1409
1410    $str =~ s/\s*$//;
1411    push(@list, $str) if length ($str);
1412
1413    return @list;
1414}
1415
1416sub quoted_translation
1417{
1418    my ($lang, $string) = @_;
1419
1420    $string =~ s/\\\"/\"/g;
1421
1422    my $translation = $translations{$lang, $string};
1423    $translation = $string if !$translation;
1424
1425    $translation =~ s/\"/\\\"/g;
1426    return $translation
1427}
1428
1429sub quoted_merge_translations
1430{
1431    if (!$MULTIPLE_OUTPUT) {
1432        print "Quoted only supports Multiple Output.\n";
1433        exit(1);
1434    }
1435
1436    for my $lang (sort keys %po_files_by_lang) {
1437        if ( ! -d $lang ) {
1438            mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n";
1439        }
1440        open INPUT, "<${FILE}" or die;
1441        open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1442        binmode (OUTPUT) if $^O eq 'MSWin32';
1443        while (<INPUT>)
1444        {
1445            s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . &quoted_translation($lang, $1) . "\""/ge;
1446            print OUTPUT;
1447        }
1448        close OUTPUT;
1449        close INPUT;
1450    }
1451}
1452