1#!@INTLTOOL_PERL@ -w 2# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4# 5# The Intltool Message Extractor 6# 7# Copyright (C) 2000-2001, 2003 Free Software Foundation. 8# 9# Intltool is free software; you can redistribute it and/or 10# modify it under the terms of the GNU General Public License as 11# published by the Free Software Foundation; either version 2 of the 12# License, or (at your option) any later version. 13# 14# Intltool is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17# General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23# As a special exception to the GNU General Public License, if you 24# distribute this file as part of a program that contains a 25# configuration script generated by Autoconf, you may include it under 26# the same distribution terms that you use for the rest of that program. 27# 28# Authors: Kenneth Christiansen <kenneth@gnu.org> 29# Darin Adler <darin@bentspoon.com> 30# 31 32## Release information 33my $PROGRAM = "intltool-extract"; 34my $PACKAGE = "intltool"; 35my $VERSION = "0.33"; 36 37## Loaded modules 38use strict; 39use File::Basename; 40use Getopt::Long; 41 42## Scalars used by the option stuff 43my $TYPE_ARG = "0"; 44my $LOCAL_ARG = "0"; 45my $HELP_ARG = "0"; 46my $VERSION_ARG = "0"; 47my $UPDATE_ARG = "0"; 48my $QUIET_ARG = "0"; 49my $SRCDIR_ARG = "."; 50 51my $FILE; 52my $OUTFILE; 53 54my $gettext_type = ""; 55my $input; 56my %messages = (); 57my %loc = (); 58my %count = (); 59my %comments = (); 60my $strcount = 0; 61 62## Use this instead of \w for XML files to handle more possible characters. 63my $w = "[-A-Za-z0-9._:]"; 64 65## Always print first 66$| = 1; 67 68## Handle options 69GetOptions ( 70 "type=s" => \$TYPE_ARG, 71 "local|l" => \$LOCAL_ARG, 72 "help|h" => \$HELP_ARG, 73 "version|v" => \$VERSION_ARG, 74 "update" => \$UPDATE_ARG, 75 "quiet|q" => \$QUIET_ARG, 76 "srcdir=s" => \$SRCDIR_ARG, 77 ) or &error; 78 79&split_on_argument; 80 81 82## Check for options. 83## This section will check for the different options. 84 85sub split_on_argument { 86 87 if ($VERSION_ARG) { 88 &version; 89 90 } elsif ($HELP_ARG) { 91 &help; 92 93 } elsif ($LOCAL_ARG) { 94 &place_local; 95 &extract; 96 97 } elsif ($UPDATE_ARG) { 98 &place_normal; 99 &extract; 100 101 } elsif (@ARGV > 0) { 102 &place_normal; 103 &message; 104 &extract; 105 106 } else { 107 &help; 108 109 } 110} 111 112sub place_normal { 113 $FILE = $ARGV[0]; 114 $OUTFILE = "$FILE.h"; 115} 116 117sub place_local { 118 $FILE = $ARGV[0]; 119 $OUTFILE = fileparse($FILE, ()); 120 if (!-e "tmp/") { 121 system("mkdir tmp/"); 122 } 123 $OUTFILE = "./tmp/$OUTFILE.h" 124} 125 126sub determine_type { 127 if ($TYPE_ARG =~ /^gettext\/(.*)/) { 128 $gettext_type=$1 129 } 130} 131 132## Sub for printing release information 133sub version{ 134 print <<_EOF_; 135${PROGRAM} (${PACKAGE}) $VERSION 136Copyright (C) 2000, 2003 Free Software Foundation, Inc. 137Written by Kenneth Christiansen, 2000. 138 139This is free software; see the source for copying conditions. There is NO 140warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 141_EOF_ 142 exit; 143} 144 145## Sub for printing usage information 146sub help { 147 print <<_EOF_; 148Usage: ${PROGRAM} [OPTION]... [FILENAME] 149Generates a header file from an XML source file. 150 151It grabs all strings between <_translatable_node> and its end tag in 152XML files. Read manpage (man ${PROGRAM}) for more info. 153 154 --type=TYPE Specify the file type of FILENAME. Currently supports: 155 "gettext/glade", "gettext/ini", "gettext/keys" 156 "gettext/rfc822deb", "gettext/schemas", 157 "gettext/scheme", "gettext/xml" 158 -l, --local Writes output into current working directory 159 (conflicts with --update) 160 --update Writes output into the same directory the source file 161 reside (conflicts with --local) 162 --srcdir Root of the source tree 163 -v, --version Output version information and exit 164 -h, --help Display this help and exit 165 -q, --quiet Quiet mode 166 167Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 168or send email to <xml-i18n-tools\@gnome.org>. 169_EOF_ 170 exit; 171} 172 173## Sub for printing error messages 174sub error{ 175 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 176 exit; 177} 178 179sub message { 180 print "Generating C format header file for translation.\n" unless $QUIET_ARG; 181} 182 183sub extract { 184 &determine_type; 185 186 &convert; 187 188 open OUT, ">$OUTFILE"; 189 &msg_write; 190 close OUT; 191 192 print "Wrote $OUTFILE\n" unless $QUIET_ARG; 193} 194 195sub convert { 196 197 ## Reading the file 198 { 199 local (*IN); 200 local $/; #slurp mode 201 open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!"; 202 $input = <IN>; 203 } 204 205 &type_ini if $gettext_type eq "ini"; 206 &type_keys if $gettext_type eq "keys"; 207 &type_xml if $gettext_type eq "xml"; 208 &type_glade if $gettext_type eq "glade"; 209 &type_scheme if $gettext_type eq "scheme"; 210 &type_schemas if $gettext_type eq "schemas"; 211 &type_rfc822deb if $gettext_type eq "rfc822deb"; 212} 213 214sub entity_decode_minimal 215{ 216 local ($_) = @_; 217 218 s/'/'/g; # ' 219 s/"/"/g; # " 220 s/&/&/g; 221 222 return $_; 223} 224 225sub entity_decode 226{ 227 local ($_) = @_; 228 229 s/'/'/g; # ' 230 s/"/"/g; # " 231 s/&/&/g; 232 s/</</g; 233 s/>/>/g; 234 235 return $_; 236} 237 238sub escape_char 239{ 240 return '\"' if $_ eq '"'; 241 return '\n' if $_ eq "\n"; 242 return '\\' if $_ eq '\\'; 243 244 return $_; 245} 246 247sub escape 248{ 249 my ($string) = @_; 250 return join "", map &escape_char, split //, $string; 251} 252 253sub type_ini { 254 ### For generic translatable desktop files ### 255 while ($input =~ /^_.*=(.*)$/mg) { 256 $messages{$1} = []; 257 } 258} 259 260sub type_keys { 261 ### For generic translatable mime/keys files ### 262 while ($input =~ /^\s*_\w+=(.*)$/mg) { 263 $messages{$1} = []; 264 } 265} 266 267sub type_xml { 268 ### For generic translatable XML files ### 269 270 while ($input =~ /(?:<!--([^>]*?)-->[^\n]*\n?[^\n]*)?\s_$w+\s*=\s*\"([^"]*)\"/sg) { # " 271 $messages{entity_decode_minimal($2)} = []; 272 $comments{entity_decode_minimal($2)} = $1 if (defined($1)); 273 } 274 275 while ($input =~ /(?:<!--([^>]*?)-->\s*)?<_($w+)(?: xml:space="($w+)")?[^>]*>(.*?)<\/_\2>/sg) { 276 $_ = $4; 277 if (!defined($3) || $3 ne "preserve") { 278 s/\s+/ /g; 279 s/^ //; 280 s/ $//; 281 } 282 $messages{$_} = []; 283 $comments{$_} = $1 if (defined($1)); 284 } 285} 286 287sub type_schemas { 288 ### For schemas XML files ### 289 290 # FIXME: We should handle escaped < (less than) 291 while ($input =~ / 292 <locale\ name="C">\s* 293 (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)? 294 (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)? 295 (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)? 296 <\/locale> 297 /sgx) { 298 my @totranslate = ($3,$6,$9); 299 my @eachcomment = ($2,$5,$8); 300 foreach (@totranslate) { 301 my $currentcomment = shift @eachcomment; 302 next if !$_; 303 s/\s+/ /g; 304 $messages{entity_decode_minimal($_)} = []; 305 $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment)); 306 } 307 } 308} 309 310sub type_rfc822deb { 311 ### For rfc822-style Debian configuration files ### 312 313 my $lineno = 1; 314 my $type = ''; 315 while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg) 316 { 317 my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5); 318 while ($pre =~ m/\n/g) 319 { 320 $lineno ++; 321 } 322 $lineno += length($newline); 323 my @str_list = rfc822deb_split(length($underscore), $text); 324 for my $str (@str_list) 325 { 326 $strcount++; 327 $messages{$str} = []; 328 $loc{$str} = $lineno; 329 $count{$str} = $strcount; 330 my $usercomment = ''; 331 while($pre =~ s/(^|\n)#([^\n]*)$//s) 332 { 333 $usercomment = "\n" . $2 . $usercomment; 334 } 335 $comments{$str} = $tag . $usercomment; 336 } 337 $lineno += ($text =~ s/\n//g); 338 } 339} 340 341sub rfc822deb_split { 342 # Debian defines a special way to deal with rfc822-style files: 343 # when a value contain newlines, it consists of 344 # 1. a short form (first line) 345 # 2. a long description, all lines begin with a space, 346 # and paragraphs are separated by a single dot on a line 347 # This routine returns an array of all paragraphs, and reformat 348 # them. 349 # When first argument is 2, the string is a comma separated list of 350 # values. 351 my $type = shift; 352 my $text = shift; 353 $text =~ s/^[ \t]//mg; 354 return (split(/, */, $text, 0)) if $type ne 1; 355 return ($text) if $text !~ /\n/; 356 357 $text =~ s/([^\n]*)\n//; 358 my @list = ($1); 359 my $str = ''; 360 for my $line (split (/\n/, $text)) 361 { 362 chomp $line; 363 if ($line =~ /^\.\s*$/) 364 { 365 # New paragraph 366 $str =~ s/\s*$//; 367 push(@list, $str); 368 $str = ''; 369 } 370 elsif ($line =~ /^\s/) 371 { 372 # Line which must not be reformatted 373 $str .= "\n" if length ($str) && $str !~ /\n$/; 374 $line =~ s/\s+$//; 375 $str .= $line."\n"; 376 } 377 else 378 { 379 # Continuation line, remove newline 380 $str .= " " if length ($str) && $str !~ /\n$/; 381 $str .= $line; 382 } 383 } 384 $str =~ s/\s*$//; 385 push(@list, $str) if length ($str); 386 return @list; 387} 388 389sub type_glade { 390 ### For translatable Glade XML files ### 391 392 my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message"; 393 394 while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) { 395 # Glade sometimes uses tags that normally mark translatable things for 396 # little bits of non-translatable content. We work around this by not 397 # translating strings that only includes something like label4 or window1. 398 $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label|dialog)[0-9]+$/; 399 } 400 401 while ($input =~ /<items>(..[^<]*)<\/items>/sg) { 402 for my $item (split (/\n/, $1)) { 403 $messages{entity_decode($item)} = []; 404 } 405 } 406 407 ## handle new glade files 408 while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) { 409 $messages{entity_decode($3)} = [] unless $3 =~ /^(window|label)[0-9]+$/; 410 if (defined($2) and !($3 =~ /^(window|label)[0-9]+$/)) { 411 $comments{entity_decode($3)} = entity_decode($2) ; 412 } 413 } 414 while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) { 415 $messages{entity_decode_minimal($2)} = []; 416 } 417} 418 419sub type_scheme { 420 my ($line, $i, $state, $str, $trcomment, $char); 421 for $line (split(/\n/, $input)) { 422 $i = 0; 423 $state = 0; # 0 - nothing, 1 - string, 2 - translatable string 424 while ($i < length($line)) { 425 if (substr($line,$i,1) eq "\"") { 426 if ($state == 2) { 427 $comments{$str} = $trcomment if ($trcomment); 428 $messages{$str} = []; 429 $str = ''; 430 $state = 0; $trcomment = ""; 431 } elsif ($state == 1) { 432 $str = ''; 433 $state = 0; $trcomment = ""; 434 } else { 435 $state = 1; 436 $str = ''; 437 if ($i>0 && substr($line,$i-1,1) eq '_') { 438 $state = 2; 439 } 440 } 441 } elsif (!$state) { 442 if (substr($line,$i,1) eq ";") { 443 $trcomment = substr($line,$i+1); 444 $trcomment =~ s/^;*\s*//; 445 $i = length($line); 446 } elsif ($trcomment && substr($line,$i,1) !~ /\s|\(|\)|_/) { 447 $trcomment = ""; 448 } 449 } else { 450 if (substr($line,$i,1) eq "\\") { 451 $char = substr($line,$i+1,1); 452 if ($char ne "\"" && $char ne "\\") { 453 $str = $str . "\\"; 454 } 455 $i++; 456 } 457 $str = $str . substr($line,$i,1); 458 } 459 $i++; 460 } 461 } 462} 463 464sub msg_write { 465 my @msgids; 466 if (%count) 467 { 468 @msgids = sort { $count{$a} <=> $count{$b} } keys %count; 469 } 470 else 471 { 472 @msgids = sort keys %messages; 473 } 474 for my $message (@msgids) 475 { 476 my $offsetlines = 1; 477 $offsetlines++ if $message =~ /%/; 478 if (defined ($comments{$message})) 479 { 480 while ($comments{$message} =~ m/\n/g) 481 { 482 $offsetlines++; 483 } 484 } 485 print OUT "# ".($loc{$message} - $offsetlines). " \"$FILE\"\n" 486 if defined $loc{$message}; 487 print OUT "/* ".$comments{$message}." */\n" 488 if defined $comments{$message}; 489 print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/; 490 491 my @lines = split (/\n/, $message, -1); 492 for (my $n = 0; $n < @lines; $n++) 493 { 494 if ($n == 0) 495 { 496 print OUT "char *s = N_(\""; 497 } 498 else 499 { 500 print OUT " \""; 501 } 502 503 print OUT escape($lines[$n]); 504 505 if ($n < @lines - 1) 506 { 507 print OUT "\\n\"\n"; 508 } 509 else 510 { 511 print OUT "\");\n"; 512 } 513 } 514 } 515} 516 517