1#!@INTLTOOL_PERL@ -w
2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
3
4#
5#  The Intltool Message Merger
6#
7#  Copyright (C) 2000, 2003 Free Software Foundation.
8#  Copyright (C) 2000, 2001 Eazel, Inc
9#
10#  Intltool is free software; you can redistribute it and/or
11#  modify it under the terms of the GNU General Public License
12#  version 2 published by the Free Software Foundation.
13#
14#  Intltool is distributed in the hope that it will be useful,
15#  but WITHOUT ANY WARRANTY; without even the implied warranty of
16#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17#  General Public License for more details.
18#
19#  You should have received a copy of the GNU General Public License
20#  along with this program; if not, write to the Free Software
21#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23#  As a special exception to the GNU General Public License, if you
24#  distribute this file as part of a program that contains a
25#  configuration script generated by Autoconf, you may include it under
26#  the same distribution terms that you use for the rest of that program.
27#
28#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
29#            Kenneth Christiansen <kenneth@gnu.org>
30#            Darin Adler <darin@bentspoon.com>
31#
32#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
33#
34
35## Release information
36my $PROGRAM = "intltool-merge";
37my $PACKAGE = "intltool";
38my $VERSION = "0.35.0";
39
40## Loaded modules
41use strict;
42use Getopt::Long;
43use Text::Wrap;
44use File::Basename;
45
46my $must_end_tag      = -1;
47my $last_depth        = -1;
48my $translation_depth = -1;
49my @tag_stack = ();
50my @entered_tag = ();
51my @translation_strings = ();
52my $leading_space = "";
53
54## Scalars used by the option stuff
55my $HELP_ARG = 0;
56my $VERSION_ARG = 0;
57my $BA_STYLE_ARG = 0;
58my $XML_STYLE_ARG = 0;
59my $KEYS_STYLE_ARG = 0;
60my $DESKTOP_STYLE_ARG = 0;
61my $SCHEMAS_STYLE_ARG = 0;
62my $RFC822DEB_STYLE_ARG = 0;
63my $QUIET_ARG = 0;
64my $PASS_THROUGH_ARG = 0;
65my $UTF8_ARG = 0;
66my $MULTIPLE_OUTPUT = 0;
67my $cache_file;
68
69## Handle options
70GetOptions
71(
72 "help" => \$HELP_ARG,
73 "version" => \$VERSION_ARG,
74 "quiet|q" => \$QUIET_ARG,
75 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
76 "ba-style|b" => \$BA_STYLE_ARG,
77 "xml-style|x" => \$XML_STYLE_ARG,
78 "keys-style|k" => \$KEYS_STYLE_ARG,
79 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
80 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
81 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
82 "pass-through|p" => \$PASS_THROUGH_ARG,
83 "utf8|u" => \$UTF8_ARG,
84 "multiple-output|m" => \$MULTIPLE_OUTPUT,
85 "cache|c=s" => \$cache_file
86 ) or &error;
87
88my $PO_DIR;
89my $FILE;
90my $OUTFILE;
91
92my %po_files_by_lang = ();
93my %translations = ();
94my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
95my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');
96
97# Use this instead of \w for XML files to handle more possible characters.
98my $w = "[-A-Za-z0-9._:]";
99
100# XML quoted string contents
101my $q = "[^\\\"]*";
102
103## Check for options.
104
105if ($VERSION_ARG)
106{
107	&print_version;
108}
109elsif ($HELP_ARG)
110{
111	&print_help;
112}
113elsif ($BA_STYLE_ARG && @ARGV > 2)
114{
115	&utf8_sanity_check;
116	&preparation;
117	&print_message;
118	&ba_merge_translations;
119	&finalize;
120}
121elsif ($XML_STYLE_ARG && @ARGV > 2)
122{
123	&utf8_sanity_check;
124	&preparation;
125	&print_message;
126	&xml_merge_output;
127	&finalize;
128}
129elsif ($KEYS_STYLE_ARG && @ARGV > 2)
130{
131	&utf8_sanity_check;
132	&preparation;
133	&print_message;
134	&keys_merge_translations;
135	&finalize;
136}
137elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
138{
139	&utf8_sanity_check;
140	&preparation;
141	&print_message;
142	&desktop_merge_translations;
143	&finalize;
144}
145elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
146{
147	&utf8_sanity_check;
148	&preparation;
149	&print_message;
150	&schemas_merge_translations;
151	&finalize;
152}
153elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
154{
155	&preparation;
156	&print_message;
157	&rfc822deb_merge_translations;
158	&finalize;
159}
160else
161{
162	&print_help;
163}
164
165exit;
166
167## Sub for printing release information
168sub print_version
169{
170    print <<_EOF_;
171${PROGRAM} (${PACKAGE}) ${VERSION}
172Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
173
174Copyright (C) 2000-2003 Free Software Foundation, Inc.
175Copyright (C) 2000-2001 Eazel, Inc.
176This is free software; see the source for copying conditions.  There is NO
177warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
178_EOF_
179    exit;
180}
181
182## Sub for printing usage information
183sub print_help
184{
185    print <<_EOF_;
186Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
187Generates an output file that includes some localized attributes from an
188untranslated source file.
189
190Mandatory options: (exactly one must be specified)
191  -b, --ba-style         includes translations in the bonobo-activation style
192  -d, --desktop-style    includes translations in the desktop style
193  -k, --keys-style       includes translations in the keys style
194  -s, --schemas-style    includes translations in the schemas style
195  -r, --rfc822deb-style  includes translations in the RFC822 style
196  -x, --xml-style        includes translations in the standard xml style
197
198Other options:
199  -u, --utf8             convert all strings to UTF-8 before merging 
200                         (default for everything except RFC822 style)
201  -p, --pass-through     deprecated, does nothing and issues a warning
202  -m, --multiple-output  output one localized file per locale, instead of 
203	                 a single file containing all localized elements
204  -c, --cache=FILE       specify cache file name
205                         (usually \$top_builddir/po/.intltool-merge-cache)
206  -q, --quiet            suppress most messages
207      --help             display this help and exit
208      --version          output version information and exit
209
210Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
211or send email to <xml-i18n-tools\@gnome.org>.
212_EOF_
213    exit;
214}
215
216
217## Sub for printing error messages
218sub print_error
219{
220    print STDERR "Try `${PROGRAM} --help' for more information.\n";
221    exit;
222}
223
224
225sub print_message
226{
227    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
228}
229
230
231sub preparation
232{
233    $PO_DIR = $ARGV[0];
234    $FILE = $ARGV[1];
235    $OUTFILE = $ARGV[2];
236
237    &gather_po_files;
238    &get_translation_database;
239}
240
241# General-purpose code for looking up translations in .po files
242
243sub po_file2lang
244{
245    my ($tmp) = @_;
246    $tmp =~ s/^.*\/(.*)\.po$/$1/;
247    return $tmp;
248}
249
250sub gather_po_files
251{
252    for my $po_file (glob "$PO_DIR/*.po") {
253	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
254    }
255}
256
257sub get_local_charset
258{
259    my ($encoding) = @_;
260    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";
261
262    # seek character encoding aliases in charset.alias (glib)
263
264    if (open CHARSET_ALIAS, $alias_file)
265    {
266	while (<CHARSET_ALIAS>)
267        {
268            next if /^\#/;
269            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
270        }
271
272        close CHARSET_ALIAS;
273    }
274
275    # if not found, return input string
276
277    return $encoding;
278}
279
280sub get_po_encoding
281{
282    my ($in_po_file) = @_;
283    my $encoding = "";
284
285    open IN_PO_FILE, $in_po_file or die;
286    while (<IN_PO_FILE>)
287    {
288        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
289        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
290        {
291            $encoding = $1;
292            last;
293        }
294    }
295    close IN_PO_FILE;
296
297    if (!$encoding)
298    {
299        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
300        $encoding = "ISO-8859-1";
301    }
302
303    system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
304    if ($?) {
305	$encoding = get_local_charset($encoding);
306    }
307
308    return $encoding
309}
310
311sub utf8_sanity_check
312{
313    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
314    $UTF8_ARG = 1;
315}
316
317sub get_translation_database
318{
319    if ($cache_file) {
320	&get_cached_translation_database;
321    } else {
322        &create_translation_database;
323    }
324}
325
326sub get_newest_po_age
327{
328    my $newest_age;
329
330    foreach my $file (values %po_files_by_lang)
331    {
332	my $file_age = -M $file;
333	$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
334    }
335
336    $newest_age = 0 if !$newest_age;
337
338    return $newest_age;
339}
340
341sub create_cache
342{
343    print "Generating and caching the translation database\n" unless $QUIET_ARG;
344
345    &create_translation_database;
346
347    open CACHE, ">$cache_file" || die;
348    print CACHE join "\x01", %translations;
349    close CACHE;
350}
351
352sub load_cache
353{
354    print "Found cached translation database\n" unless $QUIET_ARG;
355
356    my $contents;
357    open CACHE, "<$cache_file" || die;
358    {
359        local $/;
360        $contents = <CACHE>;
361    }
362    close CACHE;
363    %translations = split "\x01", $contents;
364}
365
366sub get_cached_translation_database
367{
368    my $cache_file_age = -M $cache_file;
369    if (defined $cache_file_age)
370    {
371        if ($cache_file_age <= &get_newest_po_age)
372        {
373            &load_cache;
374            return;
375        }
376        print "Found too-old cached translation database\n" unless $QUIET_ARG;
377    }
378
379    &create_cache;
380}
381
382sub create_translation_database
383{
384    for my $lang (keys %po_files_by_lang)
385    {
386    	my $po_file = $po_files_by_lang{$lang};
387
388        if ($UTF8_ARG)
389        {
390            my $encoding = get_po_encoding ($po_file);
391
392            if (lc $encoding eq "utf-8")
393            {
394                open PO_FILE, "<$po_file";
395            }
396            else
397            {
398		print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
399
400                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
401            }
402        }
403        else
404        {
405            open PO_FILE, "<$po_file";
406        }
407
408	my $nextfuzzy = 0;
409	my $inmsgid = 0;
410	my $inmsgstr = 0;
411	my $msgid = "";
412	my $msgstr = "";
413
414        while (<PO_FILE>)
415        {
416	    $nextfuzzy = 1 if /^#, fuzzy/;
417
418	    if (/^msgid "((\\.|[^\\])*)"/ )
419            {
420		$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
421		$msgid = "";
422		$msgstr = "";
423
424		if ($nextfuzzy) {
425		    $inmsgid = 0;
426		} else {
427		    $msgid = unescape_po_string($1);
428		    $inmsgid = 1;
429		}
430		$inmsgstr = 0;
431		$nextfuzzy = 0;
432	    }
433
434	    if (/^msgstr "((\\.|[^\\])*)"/)
435            {
436	        $msgstr = unescape_po_string($1);
437		$inmsgstr = 1;
438		$inmsgid = 0;
439	    }
440
441	    if (/^"((\\.|[^\\])*)"/)
442            {
443	        $msgid .= unescape_po_string($1) if $inmsgid;
444	        $msgstr .= unescape_po_string($1) if $inmsgstr;
445	    }
446	}
447	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
448    }
449}
450
451sub finalize
452{
453}
454
455sub unescape_one_sequence
456{
457    my ($sequence) = @_;
458
459    return "\\" if $sequence eq "\\\\";
460    return "\"" if $sequence eq "\\\"";
461    return "\n" if $sequence eq "\\n";
462    return "\r" if $sequence eq "\\r";
463    return "\t" if $sequence eq "\\t";
464    return "\b" if $sequence eq "\\b";
465    return "\f" if $sequence eq "\\f";
466    return "\a" if $sequence eq "\\a";
467    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
468
469    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
470    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
471
472    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
473
474    return $sequence;
475}
476
477sub unescape_po_string
478{
479    my ($string) = @_;
480
481    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
482
483    return $string;
484}
485
486## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
487## > in the entity. For further info please look at #84738.
488sub entity_decode
489{
490    local ($_) = @_;
491
492    s/&apos;/'/g; # '
493    s/&quot;/"/g; # "
494    s/&amp;/&/g;
495    s/&lt;/</g;
496
497    return $_;
498}
499
500# entity_encode: (string)
501#
502# Encode the given string to XML format (encode '<' etc).
503
504sub entity_encode
505{
506    my ($pre_encoded) = @_;
507
508    my @list_of_chars = unpack ('C*', $pre_encoded);
509
510    # with UTF-8 we only encode minimalistic
511    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
512}
513
514sub entity_encode_int_minimalist
515{
516    return "&quot;" if $_ == 34;
517    return "&amp;" if $_ == 38;
518    return "&apos;" if $_ == 39;
519    return "&lt;" if $_ == 60;
520    return chr $_;
521}
522
523sub entity_encoded_translation
524{
525    my ($lang, $string) = @_;
526
527    my $translation = $translations{$lang, $string};
528    return $string if !$translation;
529    return entity_encode ($translation);
530}
531
532## XML (bonobo-activation specific) merge code
533
534sub ba_merge_translations
535{
536    my $source;
537
538    {
539       local $/; # slurp mode
540       open INPUT, "<$FILE" or die "can't open $FILE: $!";
541       $source = <INPUT>;
542       close INPUT;
543    }
544
545    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
546    # Binmode so that selftest works ok if using a native Win32 Perl...
547    binmode (OUTPUT) if $^O eq 'MSWin32';
548
549    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
550    {
551        print OUTPUT $1;
552
553        my $node = $2 . "\n";
554
555        my @strings = ();
556        $_ = $node;
557	while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
558             push @strings, entity_decode($3);
559        }
560	print OUTPUT;
561
562	my %langs;
563	for my $string (@strings)
564        {
565	    for my $lang (keys %po_files_by_lang)
566            {
567                $langs{$lang} = 1 if $translations{$lang, $string};
568	    }
569	}
570
571	for my $lang (sort keys %langs)
572        {
573	    $_ = $node;
574	    s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
575	    s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
576	    print OUTPUT;
577        }
578    }
579
580    print OUTPUT $source;
581
582    close OUTPUT;
583}
584
585
586## XML (non-bonobo-activation) merge code
587
588
589# Process tag attributes
590#   Only parameter is a HASH containing attributes -> values mapping
591sub getAttributeString
592{
593    my $sub = shift;
594    my $do_translate = shift || 0;
595    my $language = shift || "";
596    my $result = "";
597    my $translate = shift;
598    foreach my $e (reverse(sort(keys %{ $sub }))) {
599	my $key    = $e;
600	my $string = $sub->{$e};
601	my $quote = '"';
602
603	$string =~ s/^[\s]+//;
604	$string =~ s/[\s]+$//;
605
606	if ($string =~ /^'.*'$/)
607	{
608	    $quote = "'";
609	}
610	$string =~ s/^['"]//g;
611	$string =~ s/['"]$//g;
612
613	if ($do_translate && $key =~ /^_/) {
614	    $key =~ s|^_||g;
615	    if ($language) {
616		# Handle translation
617		my $decode_string = entity_decode($string);
618		my $translation = $translations{$language, $decode_string};
619		if ($translation) {
620		    $translation = entity_encode($translation);
621		    $string = $translation;
622                }
623                $$translate = 2;
624            } else {
625                 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
626            }
627	}
628
629	$result .= " $key=$quote$string$quote";
630    }
631    return $result;
632}
633
634# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
635sub getXMLstring
636{
637    my $ref = shift;
638    my $spacepreserve = shift || 0;
639    my @list = @{ $ref };
640    my $result = "";
641
642    my $count = scalar(@list);
643    my $attrs = $list[0];
644    my $index = 1;
645
646    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
647    $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
648
649    while ($index < $count) {
650	my $type = $list[$index];
651	my $content = $list[$index+1];
652        if (! $type ) {
653	    # We've got CDATA
654	    if ($content) {
655		# lets strip the whitespace here, and *ONLY* here
656                $content =~ s/\s+/ /gs if (!$spacepreserve);
657		$result .= $content;
658	    }
659	} elsif ( "$type" ne "1" ) {
660	    # We've got another element
661	    $result .= "<$type";
662	    $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
663	    if ($content) {
664		my $subresult = getXMLstring($content, $spacepreserve);
665		if ($subresult) {
666		    $result .= ">".$subresult . "</$type>";
667		} else {
668		    $result .= "/>";
669		}
670	    } else {
671		$result .= "/>";
672	    }
673	}
674	$index += 2;
675    }
676    return $result;
677}
678
679# Translate list of nodes if necessary
680sub translate_subnodes
681{
682    my $fh = shift;
683    my $content = shift;
684    my $language = shift || "";
685    my $singlelang = shift || 0;
686    my $spacepreserve = shift || 0;
687
688    my @nodes = @{ $content };
689
690    my $count = scalar(@nodes);
691    my $index = 0;
692    while ($index < $count) {
693        my $type = $nodes[$index];
694        my $rest = $nodes[$index+1];
695        if ($singlelang) {
696            my $oldMO = $MULTIPLE_OUTPUT;
697            $MULTIPLE_OUTPUT = 1;
698            traverse($fh, $type, $rest, $language, $spacepreserve);
699            $MULTIPLE_OUTPUT = $oldMO;
700        } else {
701            traverse($fh, $type, $rest, $language, $spacepreserve);
702        }
703        $index += 2;
704    }
705}
706
707sub isWellFormedXmlFragment
708{
709    my $ret = eval 'require XML::Parser';
710    if(!$ret) {
711        die "You must have XML::Parser installed to run $0\n\n";
712    }
713
714    my $fragment = shift;
715    return 0 if (!$fragment);
716
717    $fragment = "<root>$fragment</root>";
718    my $xp = new XML::Parser(Style => 'Tree');
719    my $tree = 0;
720    eval { $tree = $xp->parse($fragment); };
721    return $tree;
722}
723
724sub traverse
725{
726    my $fh = shift;
727    my $nodename = shift;
728    my $content = shift;
729    my $language = shift || "";
730    my $spacepreserve = shift || 0;
731
732    if (!$nodename) {
733	if ($content =~ /^[\s]*$/) {
734	    $leading_space .= $content;
735	}
736	print $fh $content;
737    } else {
738	# element
739	my @all = @{ $content };
740	my $attrs = shift @all;
741	my $translate = 0;
742	my $outattr = getAttributeString($attrs, 1, $language, \$translate);
743
744	if ($nodename =~ /^_/) {
745	    $translate = 1;
746	    $nodename =~ s/^_//;
747	}
748	my $lookup = '';
749
750        $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
751        $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
752
753	print $fh "<$nodename", $outattr;
754	if ($translate) {
755	    $lookup = getXMLstring($content, $spacepreserve);
756            if (!$spacepreserve) {
757                $lookup =~ s/^\s+//s;
758                $lookup =~ s/\s+$//s;
759            }
760
761	    if ($lookup || $translate == 2) {
762                my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
763                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
764                    $translation = $lookup if (!$translation);
765                    print $fh " xml:lang=\"", $language, "\"" if $language;
766                    print $fh ">";
767                    if ($translate == 2) {
768                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
769                    } else {
770                        print $fh $translation;
771                    }
772                    print $fh "</$nodename>";
773
774                    return; # this means there will be no same translation with xml:lang="$language"...
775                            # if we want them both, just remove this "return"
776                } else {
777                    print $fh ">";
778                    if ($translate == 2) {
779                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
780                    } else {
781                        print $fh $lookup;
782                    }
783                    print $fh "</$nodename>";
784                }
785	    } else {
786		print $fh "/>";
787	    }
788
789	    for my $lang (sort keys %po_files_by_lang) {
790                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
791                        next;
792                    }
793		    if ($lang) {
794                        # Handle translation
795                        #
796                        my $translate = 0;
797                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
798                        my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
799                        if ($translate && !$translation) {
800                            $translation = $lookup;
801                        }
802
803                        if ($translation || $translate) {
804			    print $fh "\n";
805			    $leading_space =~ s/.*\n//g;
806			    print $fh $leading_space;
807 			    print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
808                            if ($translate == 2) {
809                               translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
810                            } else {
811                                print $fh $translation;
812                            }
813                            print $fh "</$nodename>";
814			}
815                    }
816	    }
817
818	} else {
819	    my $count = scalar(@all);
820	    if ($count > 0) {
821		print $fh ">";
822                my $index = 0;
823                while ($index < $count) {
824                    my $type = $all[$index];
825                    my $rest = $all[$index+1];
826                    traverse($fh, $type, $rest, $language, $spacepreserve);
827                    $index += 2;
828                }
829		print $fh "</$nodename>";
830	    } else {
831		print $fh "/>";
832	    }
833	}
834    }
835}
836
837sub intltool_tree_comment
838{
839    my $expat = shift;
840    my $data  = shift;
841    my $clist = $expat->{Curlist};
842    my $pos   = $#$clist;
843
844    push @$clist, 1 => $data;
845}
846
847sub intltool_tree_cdatastart
848{
849    my $expat    = shift;
850    my $clist = $expat->{Curlist};
851    my $pos   = $#$clist;
852
853    push @$clist, 0 => $expat->original_string();
854}
855
856sub intltool_tree_cdataend
857{
858    my $expat    = shift;
859    my $clist = $expat->{Curlist};
860    my $pos   = $#$clist;
861
862    $clist->[$pos] .= $expat->original_string();
863}
864
865sub intltool_tree_char
866{
867    my $expat = shift;
868    my $text  = shift;
869    my $clist = $expat->{Curlist};
870    my $pos   = $#$clist;
871
872    # Use original_string so that we retain escaped entities
873    # in CDATA sections.
874    #
875    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
876        $clist->[$pos] .= $expat->original_string();
877    } else {
878        push @$clist, 0 => $expat->original_string();
879    }
880}
881
882sub intltool_tree_start
883{
884    my $expat    = shift;
885    my $tag      = shift;
886    my @origlist = ();
887
888    # Use original_string so that we retain escaped entities
889    # in attribute values.  We must convert the string to an
890    # @origlist array to conform to the structure of the Tree
891    # Style.
892    #
893    my @original_array = split /\x/, $expat->original_string();
894    my $source         = $expat->original_string();
895
896    # Remove leading tag.
897    #
898    $source =~ s|^\s*<\s*(\S+)||s;
899
900    # Grab attribute key/value pairs and push onto @origlist array.
901    #
902    while ($source)
903    {
904       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
905       {
906           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
907           push @origlist, $1;
908           push @origlist, '"' . $2 . '"';
909       }
910       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
911       {
912           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
913           push @origlist, $1;
914           push @origlist, "'" . $2 . "'";
915       }
916       else
917       {
918           last;
919       }
920    }
921
922    my $ol = [ { @origlist } ];
923
924    push @{ $expat->{Lists} }, $expat->{Curlist};
925    push @{ $expat->{Curlist} }, $tag => $ol;
926    $expat->{Curlist} = $ol;
927}
928
929sub readXml
930{
931    my $filename = shift || return;
932    if(!-f $filename) {
933        die "ERROR Cannot find filename: $filename\n";
934    }
935
936    my $ret = eval 'require XML::Parser';
937    if(!$ret) {
938        die "You must have XML::Parser installed to run $0\n\n";
939    }
940    my $xp = new XML::Parser(Style => 'Tree');
941    $xp->setHandlers(Char => \&intltool_tree_char);
942    $xp->setHandlers(Start => \&intltool_tree_start);
943    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
944    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
945    my $tree = $xp->parsefile($filename);
946
947# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
948# would be:
949# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
950# 0, "Howdy",  ref, [{}]], 0, "do" ] ]
951
952    return $tree;
953}
954
955sub print_header
956{
957    my $infile = shift;
958    my $fh = shift;
959    my $source;
960
961    if(!-f $infile) {
962        die "ERROR Cannot find filename: $infile\n";
963    }
964
965    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
966    {
967        local $/;
968        open DOCINPUT, "<${FILE}" or die;
969        $source = <DOCINPUT>;
970        close DOCINPUT;
971    }
972    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
973    {
974        print $fh "$1\n";
975    }
976    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
977    {
978        print $fh "$1\n";
979    }
980}
981
982sub parseTree
983{
984    my $fh        = shift;
985    my $ref       = shift;
986    my $language  = shift || "";
987
988    my $name = shift @{ $ref };
989    my $cont = shift @{ $ref };
990
991    while (!$name || "$name" eq "1") {
992        $name = shift @{ $ref };
993        $cont = shift @{ $ref };
994    }
995
996    my $spacepreserve = 0;
997    my $attrs = @{$cont}[0];
998    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
999
1000    traverse($fh, $name, $cont, $language, $spacepreserve);
1001}
1002
1003sub xml_merge_output
1004{
1005    my $source;
1006
1007    if ($MULTIPLE_OUTPUT) {
1008        for my $lang (sort keys %po_files_by_lang) {
1009	    if ( ! -e $lang ) {
1010	        mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
1011            }
1012            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1013            binmode (OUTPUT) if $^O eq 'MSWin32';
1014            my $tree = readXml($FILE);
1015            print_header($FILE, \*OUTPUT);
1016            parseTree(\*OUTPUT, $tree, $lang);
1017            close OUTPUT;
1018            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1019        }
1020    }
1021    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1022    binmode (OUTPUT) if $^O eq 'MSWin32';
1023    my $tree = readXml($FILE);
1024    print_header($FILE, \*OUTPUT);
1025    parseTree(\*OUTPUT, $tree);
1026    close OUTPUT;
1027    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1028}
1029
1030sub keys_merge_translations
1031{
1032    open INPUT, "<${FILE}" or die;
1033    open OUTPUT, ">${OUTFILE}" or die;
1034    binmode (OUTPUT) if $^O eq 'MSWin32';
1035
1036    while (<INPUT>)
1037    {
1038        if (s/^(\s*)_(\w+=(.*))/$1$2/)
1039        {
1040	    my $string = $3;
1041
1042            print OUTPUT;
1043
1044	    my $non_translated_line = $_;
1045
1046            for my $lang (sort keys %po_files_by_lang)
1047            {
1048		my $translation = $translations{$lang, $string};
1049                next if !$translation;
1050
1051                $_ = $non_translated_line;
1052		s/(\w+)=.*/[$lang]$1=$translation/;
1053                print OUTPUT;
1054            }
1055	}
1056        else
1057        {
1058            print OUTPUT;
1059        }
1060    }
1061
1062    close OUTPUT;
1063    close INPUT;
1064}
1065
1066sub desktop_merge_translations
1067{
1068    open INPUT, "<${FILE}" or die;
1069    open OUTPUT, ">${OUTFILE}" or die;
1070    binmode (OUTPUT) if $^O eq 'MSWin32';
1071
1072    while (<INPUT>)
1073    {
1074        if (s/^(\s*)_(\w+=(.*))/$1$2/)
1075        {
1076	    my $string = $3;
1077
1078            print OUTPUT;
1079
1080	    my $non_translated_line = $_;
1081
1082            for my $lang (sort keys %po_files_by_lang)
1083            {
1084                my $translation = $translations{$lang, $string};
1085                next if !$translation;
1086
1087                $_ = $non_translated_line;
1088                s/(\w+)=.*/${1}[$lang]=$translation/;
1089                print OUTPUT;
1090            }
1091	}
1092        else
1093        {
1094            print OUTPUT;
1095        }
1096    }
1097
1098    close OUTPUT;
1099    close INPUT;
1100}
1101
1102sub schemas_merge_translations
1103{
1104    my $source;
1105
1106    {
1107       local $/; # slurp mode
1108       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1109       $source = <INPUT>;
1110       close INPUT;
1111    }
1112
1113    open OUTPUT, ">$OUTFILE" or die;
1114    binmode (OUTPUT) if $^O eq 'MSWin32';
1115
1116    # FIXME: support attribute translations
1117
1118    # Empty nodes never need translation, so unmark all of them.
1119    # For example, <_foo/> is just replaced by <foo/>.
1120    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1121
1122    while ($source =~ s/
1123                        (.*?)
1124                        (\s+)(<locale\ name="C">(\s*)
1125                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1126                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1127                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1128                        <\/locale>)
1129                       //sx)
1130    {
1131        print OUTPUT $1;
1132
1133	my $locale_start_spaces = $2 ? $2 : '';
1134	my $default_spaces = $4 ? $4 : '';
1135	my $short_spaces = $7 ? $7 : '';
1136	my $long_spaces = $10 ? $10 : '';
1137	my $locale_end_spaces = $13 ? $13 : '';
1138	my $c_default_block = $3 ? $3 : '';
1139	my $default_string = $6 ? $6 : '';
1140	my $short_string = $9 ? $9 : '';
1141	my $long_string = $12 ? $12 : '';
1142
1143	print OUTPUT "$locale_start_spaces$c_default_block";
1144
1145        $default_string =~ s/\s+/ /g;
1146        $default_string = entity_decode($default_string);
1147	$short_string =~ s/\s+/ /g;
1148	$short_string = entity_decode($short_string);
1149	$long_string =~ s/\s+/ /g;
1150	$long_string = entity_decode($long_string);
1151
1152	for my $lang (sort keys %po_files_by_lang)
1153        {
1154	    my $default_translation = $translations{$lang, $default_string};
1155	    my $short_translation = $translations{$lang, $short_string};
1156	    my $long_translation  = $translations{$lang, $long_string};
1157
1158	    next if (!$default_translation && !$short_translation &&
1159                     !$long_translation);
1160
1161	    print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1162
1163        print OUTPUT "$default_spaces";
1164
1165        if ($default_translation)
1166        {
1167            $default_translation = entity_encode($default_translation);
1168            print OUTPUT "<default>$default_translation</default>";
1169        }
1170
1171	    print OUTPUT "$short_spaces";
1172
1173	    if ($short_translation)
1174	    {
1175			$short_translation = entity_encode($short_translation);
1176			print OUTPUT "<short>$short_translation</short>";
1177	    }
1178
1179	    print OUTPUT "$long_spaces";
1180
1181	    if ($long_translation)
1182	    {
1183			$long_translation = entity_encode($long_translation);
1184			print OUTPUT "<long>$long_translation</long>";
1185	    }
1186
1187	    print OUTPUT "$locale_end_spaces</locale>";
1188        }
1189    }
1190
1191    print OUTPUT $source;
1192
1193    close OUTPUT;
1194}
1195
1196sub rfc822deb_merge_translations
1197{
1198    my %encodings = ();
1199    for my $lang (keys %po_files_by_lang) {
1200        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1201    }
1202
1203    my $source;
1204
1205    $Text::Wrap::huge = 'overflow';
1206    $Text::Wrap::break = qr/\n|\s(?=\S)/;
1207
1208    {
1209       local $/; # slurp mode
1210       open INPUT, "<$FILE" or die "can't open $FILE: $!";
1211       $source = <INPUT>;
1212       close INPUT;
1213    }
1214
1215    open OUTPUT, ">${OUTFILE}" or die;
1216    binmode (OUTPUT) if $^O eq 'MSWin32';
1217
1218    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1219    {
1220	    my $sep = $1;
1221	    my $non_translated_line = $3.$4;
1222	    my $string = $5;
1223	    my $underscore = length($2);
1224	    next if $underscore eq 0 && $non_translated_line =~ /^#/;
1225	    #  Remove [] dummy strings
1226	    my $stripped = $string;
1227	    $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1228	    $stripped =~ s/\[\s[^\[\]]*\]$//;
1229	    $non_translated_line .= $stripped;
1230
1231	    print OUTPUT $sep.$non_translated_line;
1232
1233	    if ($underscore)
1234	    {
1235	        my @str_list = rfc822deb_split($underscore, $string);
1236
1237	        for my $lang (sort keys %po_files_by_lang)
1238                {
1239                    my $is_translated = 1;
1240                    my $str_translated = '';
1241                    my $first = 1;
1242
1243                    for my $str (@str_list)
1244                    {
1245                        my $translation = $translations{$lang, $str};
1246
1247                        if (!$translation)
1248                        {
1249                            $is_translated = 0;
1250                            last;
1251                        }
1252
1253	                #  $translation may also contain [] dummy
1254                        #  strings, mostly to indicate an empty string
1255	                $translation =~ s/\[\s[^\[\]]*\]$//;
1256
1257                        if ($first)
1258                        {
1259                            if ($underscore eq 2)
1260                            {
1261                                $str_translated .= $translation;
1262                            }
1263                            else
1264                            {
1265                                $str_translated .=
1266                                    Text::Tabs::expand($translation) .
1267                                    "\n";
1268                            }
1269                        }
1270                        else
1271                        {
1272                            if ($underscore eq 2)
1273                            {
1274                                $str_translated .= ', ' . $translation;
1275                            }
1276                            else
1277                            {
1278                                $str_translated .= Text::Tabs::expand(
1279                                    Text::Wrap::wrap(' ', ' ', $translation)) .
1280                                    "\n .\n";
1281                            }
1282                        }
1283                        $first = 0;
1284
1285                        #  To fix some problems with Text::Wrap::wrap
1286                        $str_translated =~ s/(\n )+\n/\n .\n/g;
1287                    }
1288                    next unless $is_translated;
1289
1290                    $str_translated =~ s/\n \.\n$//;
1291                    $str_translated =~ s/\s+$//;
1292
1293                    $_ = $non_translated_line;
1294                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1295                    print OUTPUT;
1296                }
1297	    }
1298    }
1299    print OUTPUT "\n";
1300
1301    close OUTPUT;
1302    close INPUT;
1303}
1304
1305sub rfc822deb_split
1306{
1307    # Debian defines a special way to deal with rfc822-style files:
1308    # when a value contain newlines, it consists of
1309    #   1.  a short form (first line)
1310    #   2.  a long description, all lines begin with a space,
1311    #       and paragraphs are separated by a single dot on a line
1312    # This routine returns an array of all paragraphs, and reformat
1313    # them.
1314    # When first argument is 2, the string is a comma separated list of
1315    # values.
1316    my $type = shift;
1317    my $text = shift;
1318    $text =~ s/^[ \t]//mg;
1319    return (split(/, */, $text, 0)) if $type ne 1;
1320    return ($text) if $text !~ /\n/;
1321
1322    $text =~ s/([^\n]*)\n//;
1323    my @list = ($1);
1324    my $str = '';
1325
1326    for my $line (split (/\n/, $text))
1327    {
1328        chomp $line;
1329        if ($line =~ /^\.\s*$/)
1330        {
1331            #  New paragraph
1332            $str =~ s/\s*$//;
1333            push(@list, $str);
1334            $str = '';
1335        }
1336        elsif ($line =~ /^\s/)
1337        {
1338            #  Line which must not be reformatted
1339            $str .= "\n" if length ($str) && $str !~ /\n$/;
1340            $line =~ s/\s+$//;
1341            $str .= $line."\n";
1342        }
1343        else
1344        {
1345            #  Continuation line, remove newline
1346            $str .= " " if length ($str) && $str !~ /\n$/;
1347            $str .= $line;
1348        }
1349    }
1350
1351    $str =~ s/\s*$//;
1352    push(@list, $str) if length ($str);
1353
1354    return @list;
1355}
1356
1357