1# Locale::Po4a::Po -- manipulation of PO files 2# 3# This program is free software; you may redistribute it and/or modify it 4# under the terms of GPL (see COPYING). 5 6############################################################################ 7# Modules and declarations 8############################################################################ 9 10=encoding UTF-8 11 12=head1 NAME 13 14Locale::Po4a::Po - PO file manipulation module 15 16=head1 SYNOPSIS 17 18 use Locale::Po4a::Po; 19 my $pofile=Locale::Po4a::Po->new(); 20 21 # Read PO file 22 $pofile->read('file.po'); 23 24 # Add an entry 25 $pofile->push('msgid' => 'Hello', 'msgstr' => 'bonjour', 26 'flags' => "wrap", 'reference'=>'file.c:46'); 27 28 # Extract a translation 29 $pofile->gettext("Hello"); # returns 'bonjour' 30 31 # Write back to a file 32 $pofile->write('otherfile.po'); 33 34=head1 DESCRIPTION 35 36Locale::Po4a::Po is a module that allows you to manipulate message 37catalogs. You can load and write from/to a file (which extension is often 38I<po>), you can build new entries on the fly or request for the translation 39of a string. 40 41For a more complete description of message catalogs in the PO format and 42their use, please refer to the info documentation of the gettext program (node "`PO Files"'). 43 44This module is part of the po4a project, which objective is to use PO files 45(designed at origin to ease the translation of program messages) to 46translate everything, including documentation (man page, info manual), 47package description, debconf templates, and everything which may benefit 48from this. 49 50=head1 OPTIONS ACCEPTED BY THIS MODULE 51 52=over 4 53 54=item B<--porefs> I<type> 55 56Specify the reference format. Argument I<type> can be one of B<never> 57to not produce any reference, B<file> to only specify the file 58without the line number, B<counter> to replace line number by an 59increasing counter, and B<full> to include complete references (default: full). 60 61=item B<--wrap-po> B<no>|B<newlines>|I<number> (default: 76) 62 63Specify how the po file should be wrapped. This gives the choice between either 64files that are nicely wrapped but could lead to git conflicts, or files that are 65easier to handle automatically, but harder to read for humans. 66 67Historically, the gettext suite has reformatted the po files at the 77th column 68for cosmetics. This option specifies the behavior of po4a. If set to a numerical 69value, po4a will wrap the po file after this column and after newlines in the 70content. If set to B<newlines>, po4a will only split the msgid and msgstr after 71newlines in the content. If set to B<no>, po4a will not wrap the po file at all. 72The reference comments are always wrapped by the gettext tools that we use internally. 73 74Note that this option has no impact on how the msgid and msgstr are wrapped, ie 75on how newlines are added to the content of these strings. 76 77=item B<--msgid-bugs-address> I<email@address> 78 79Set the report address for msgid bugs. By default, the created POT files 80have no Report-Msgid-Bugs-To fields. 81 82=item B<--copyright-holder> I<string> 83 84Set the copyright holder in the POT header. The default value is 85"Free Software Foundation, Inc." 86 87=item B<--package-name> I<string> 88 89Set the package name for the POT header. The default is "PACKAGE". 90 91=item B<--package-version> I<string> 92 93Set the package version for the POT header. The default is "VERSION". 94 95=back 96 97=cut 98 99use IO::File; 100 101require Exporter; 102 103package Locale::Po4a::Po; 104use DynaLoader; 105 106use Locale::Po4a::Common qw(wrap_msg wrap_mod wrap_ref_mod dgettext); 107 108use subs qw(makespace); 109use vars qw(@ISA @EXPORT_OK); 110@ISA = qw(Exporter DynaLoader); 111@EXPORT = qw(%debug); 112@EXPORT_OK = qw(&move_po_if_needed); 113 114use Locale::Po4a::TransTractor; 115 116# Try to use a C extension if present. 117eval("bootstrap Locale::Po4a::Po $Locale::Po4a::TransTractor::VERSION"); 118 119use 5.006; 120use strict; 121use warnings; 122 123use Carp qw(croak); 124use File::Basename; 125use File::Path; # mkdir before write 126use File::Copy; # move 127use POSIX qw(strftime floor); 128use Time::Local; 129 130use Encode; 131use Config; 132 133my @known_flags = qw( 134 wrap no-wrap fuzzy 135 c-format no-c-format 136 objc-format no-objc-format 137 sh-format no-sh-format 138 python-format no-python-format 139 python-brace-format no-python-brace-format 140 lisp-format no-lisp-format 141 elisp-format no-elisp-format 142 librep-format no-librep-format 143 scheme-format no-scheme-format 144 smalltalk-format no-smalltalk-format 145 java-format no-java-format 146 csharp-format no-csharp-format 147 awk-format no-awk-format 148 object-pascal-format no-object-pascal-format 149 ycp-format no-ycp-format 150 tcl-format no-tcl-format 151 perl-format no-perl-format 152 perl-brace-format no-perl-brace-format 153 php-format no-php-format 154 gcc-internal-format no-gcc-internal-format 155 gfc-internal-format no-gfc-internal-format 156 qt-format no-qt-format 157 qt-plural-format no-qt-plural-format 158 kde-format no-kde-format 159 boost-format no-boost-format 160 lua-format no-lua-format 161 javascript-format no-javascript-format 162); 163 164# Custom flags, used for example by weblate 165push @known_flags, 'markdown-text'; 166 167our %debug = ( 168 'canonize' => 0, 169 'quote' => 0, 170 'escape' => 0, 171 'encoding' => 0, 172 'filter' => 0 173); 174 175=head1 Functions concerning entire message catalogs 176 177=over 4 178 179=item new() 180 181Creates a new message catalog. If an argument is provided, it's the name of 182a PO file we should load. 183 184=cut 185 186sub new { 187 my ( $this, $options ) = ( shift, shift ); 188 my $class = ref($this) || $this; 189 my $self = {}; 190 bless $self, $class; 191 $self->initialize($options); 192 193 my $filename = shift; 194 $self->read($filename) if length($filename); 195 return $self; 196} 197 198# Return the numerical timezone (e.g. +0200) 199# Neither the %z nor the %s formats of strftime are portable: 200# '%s' is not supported on Solaris and '%z' indicates 201# "2006-10-25 19:36E. Europe Standard Time" on MS Windows. 202sub timezone { 203 my ($time) = @_; 204 my @l = localtime($time); 205 206 my $diff = floor( timegm(@l) / 60 + 0.5 ) - floor( $time / 60 + 0.5 ); 207 my $sign = ( $diff >= 0 ? 1 : -1 ); 208 $diff = abs($diff); 209 210 my $h = $sign * floor( $diff / 60 ); 211 my $m = $diff % 60; 212 213 return sprintf "%+03d%02d\n", $h, $m; 214} 215 216sub initialize { 217 my ( $self, $options ) = ( shift, shift ); 218 my $time = time; 219 my $date = strftime( "%Y-%m-%d %H:%M", localtime($time) ) . timezone($time); 220 chomp $date; 221 222 $self->{options}{'porefs'} = 'full'; 223 $self->{options}{'msgid-bugs-address'} = undef; 224 $self->{options}{'copyright-holder'} = "Free Software Foundation, Inc."; 225 $self->{options}{'package-name'} = "PACKAGE"; 226 $self->{options}{'package-version'} = "VERSION"; 227 $self->{options}{'wrap-po'} = 76; 228 $self->{options}{'pot-charset'} = "UTF-8"; 229 $self->{options}{'pot-language'} = ""; 230 231 foreach my $opt ( keys %$options ) { 232 233 # print STDERR "$opt: ".(defined($options->{$opt})?$options->{$opt}:"(undef)")."\n"; 234 if ( $options->{$opt} ) { 235 die wrap_mod( "po4a::po", dgettext( "po4a", "Unknown option: %s" ), $opt ) 236 unless exists $self->{options}{$opt}; 237 $self->{options}{$opt} = $options->{$opt}; 238 } 239 } 240 $self->{options}{'wrap-po'} =~ /^(no|newlines|\d+)$/ 241 || die wrap_mod( 242 "po4a::po", 243 dgettext( "po4a", "Invalid value for option 'wrap-po' ('%s' is not 'no' nor 'newlines' nor a number)" ), 244 $self->{options}{'wrap-po'} 245 ); 246 247 $self->{options}{'porefs'} =~ /^(full|counter|noline|file|none|never)?$/ 248 || die wrap_mod( 249 "po4a::po", 250 dgettext( 251 "po4a", 252 "Invalid value for option 'porefs' ('%s' is " 253 . "not one of 'full', 'counter', 'noline', 'file' or 'never')" 254 ), 255 $self->{options}{'porefs'} 256 ); 257 $self->{options}{'porefs'} =~ s/noline/file/; # backward compat. 'file' used to be called 'noline'. 258 $self->{options}{'porefs'} =~ s/none/never/; # backward compat. 'never' used to be called 'none'. 259 if ( $self->{options}{'porefs'} =~ m/^counter/ ) { 260 $self->{counter} = {}; 261 } 262 263 $self->{po} = (); 264 $self->{count} = 0; # number of msgids in the PO 265 # count_doc: number of strings in the document 266 # (duplicate strings counted multiple times) 267 $self->{count_doc} = 0; 268 $self->{header_comment} = 269 " SOME DESCRIPTIVE TITLE\n" 270 . " Copyright (C) YEAR " 271 . $self->{options}{'copyright-holder'} . "\n" 272 . " This file is distributed under the same license " 273 . "as the " 274 . $self->{options}{'package-name'} 275 . " package.\n" 276 . " FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.\n" . "\n" 277 . ", fuzzy"; 278 279 # $self->header_tag="fuzzy"; 280 $self->{header} = escape_text( 281 "Project-Id-Version: " 282 . $self->{options}{'package-name'} . " " 283 . $self->{options}{'package-version'} . "\n" 284 . ( 285 ( defined $self->{options}{'msgid-bugs-address'} ) 286 ? "Report-Msgid-Bugs-To: " . $self->{options}{'msgid-bugs-address'} . "\n" 287 : "" 288 ) 289 . "POT-Creation-Date: $date\n" 290 . "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 291 . "Last-Translator: FULL NAME <EMAIL\@ADDRESS>\n" 292 . "Language-Team: LANGUAGE <LL\@li.org>\n" 293 . "Language: " 294 . $self->{options}{'pot-language'} . "\n" 295 . "MIME-Version: 1.0\n" 296 . "Content-Type: text/plain; charset=" 297 . $self->{options}{'pot-charset'} . "\n" 298 . "Content-Transfer-Encoding: 8bit\n" 299 ); 300 301 $self->{encoder} = find_encoding("UTF-8"); 302 $self->{footer} = []; 303 304 # To make stats about gettext hits 305 $self->stats_clear(); 306} 307 308=item read($) 309 310Reads a PO file (which name is given as argument). Previously existing 311entries in self are not removed, the new ones are added to the end of the 312catalog. 313 314=cut 315 316sub read { 317 my $self = shift; 318 my $filename = shift 319 or croak wrap_mod( "po4a::po", dgettext( "po4a", "Please provide a non-null filename" ) ); 320 321 my $lang = basename($filename); 322 $lang =~ s/\.po$//; 323 $self->{lang} = $lang; 324 325 my $cmd = "msgfmt" . $Config{_exe} . " --check-format --check-domain -o /dev/null " . $filename; 326 327 my $locale = $ENV{'LC_ALL'}; 328 $ENV{'LC_ALL'} = "C"; 329 my $out = qx/$cmd 2>&1/; 330 $ENV{'LC_ALL'} = $locale; 331 332 die wrap_msg( dgettext( "po4a", "Invalid po file %s:\n%s" ), $filename, $out ) 333 unless ( $? == 0 ); 334 335 my $fh; 336 if ( $filename eq '-' ) { 337 $fh = *STDIN; 338 } else { 339 open $fh, "<$filename" 340 or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot read from %s: %s" ), $filename, $! ); 341 } 342 343 ## Read paragraphs line-by-line 344 my $pofile = ""; 345 my $textline; 346 while ( defined( $textline = <$fh> ) ) { 347 $pofile .= $textline; 348 } 349 350 # close INPUT 351 # or croak (sprintf(dgettext("po4a", 352 # "Cannot close %s after reading: %s"), 353 # $filename,$!)."\n"); 354 355 my $linenum = 0; 356 357 foreach my $msg ( split( /\n\n/, $pofile ) ) { 358 my ( $msgid, $msgstr, $comment, $previous, $automatic, $reference, $flags, $buffer ); 359 my ( $msgid_plural, $msgstr_plural ); 360 if ( $msg =~ m/^#~/m ) { 361 push( @{ $self->{footer} }, $msg ); 362 next; 363 } 364 foreach my $line ( split( /\n/, $msg ) ) { 365 $linenum++; 366 if ( $line =~ /^#\. ?(.*)$/ ) { # Automatic comment 367 $automatic .= ( defined($automatic) ? "\n" : "" ) . $1; 368 369 } elsif ( $line =~ /^#: ?(.*)$/ ) { # reference 370 $reference .= ( defined($reference) ? "\n" : "" ) . $1; 371 372 } elsif ( $line =~ /^#, ?(.*)$/ ) { # flags 373 $flags .= ( defined($flags) ? "\n" : "" ) . $1; 374 375 } elsif ( $line =~ /^#\| ?(.*)$/ ) { # previous translation 376 $previous .= ( defined($previous) ? "\n" : "" ) . ( $1 || "" ); 377 378 } elsif ( $line =~ /^#(.*)$/ ) { # Translator comments 379 $comment .= ( defined($comment) ? "\n" : "" ) . ( $1 || "" ); 380 381 } elsif ( $line =~ /^msgid (".*")$/ ) { # begin of msgid 382 $buffer = $1; 383 384 } elsif ( $line =~ /^msgid_plural (".*")$/ ) { 385 386 # begin of msgid_plural, end of msgid 387 388 $msgid = $buffer; 389 $buffer = $1; 390 391 } elsif ( $line =~ /^msgstr (".*")$/ ) { 392 393 # begin of msgstr, end of msgid 394 395 $msgid = $buffer; 396 $buffer = "$1"; 397 398 } elsif ( $line =~ /^msgstr\[([0-9]+)\] (".*")$/ ) { 399 400 # begin of msgstr[x], end of msgid_plural or msgstr[x-1] 401 402 # Note: po4a cannot uses plural forms 403 # (no integer to use the plural form) 404 # * drop the msgstr[x] where x >= 2 405 # * use msgstr[0] as the translation of msgid 406 # * use msgstr[1] as the translation of msgid_plural 407 408 if ( $1 eq "0" ) { 409 $msgid_plural = $buffer; 410 $buffer = "$2"; 411 } elsif ( $1 eq "1" ) { 412 $msgstr = $buffer; 413 $buffer = "$2"; 414 } elsif ( $1 eq "2" ) { 415 $msgstr_plural = $buffer; 416 warn wrap_ref_mod( "$filename:$linenum", "po4a::po", 417 dgettext( "po4a", "Messages with more than 2 plural forms are not supported." ) ); 418 } 419 } elsif ( $line =~ /^(".*")$/ ) { 420 421 # continuation of a line 422 $buffer .= "\n$1"; 423 424 } else { 425 warn wrap_ref_mod( "$filename:$linenum", "po4a::po", dgettext( "po4a", "Parse error at: -->%s<--" ), 426 $line ); 427 } 428 } 429 $linenum++; 430 if ( defined $msgid_plural ) { 431 $msgstr_plural = $buffer; 432 433 $msgid = unquote_text($msgid) if ( defined($msgid) ); 434 $msgstr = unquote_text($msgstr) if ( defined($msgstr) ); 435 436 $self->push_raw( 437 'msgid' => $msgid, 438 'msgstr' => $msgstr, 439 'reference' => $reference, 440 'flags' => $flags, 441 'comment' => $comment, 442 'previous' => $previous, 443 'automatic' => $automatic, 444 'plural' => 0 445 ); 446 447 $msgid_plural = unquote_text($msgid_plural) 448 if ( defined($msgid_plural) ); 449 $msgstr_plural = unquote_text($msgstr_plural) 450 if ( defined($msgstr_plural) ); 451 452 $self->push_raw( 453 'msgid' => $msgid_plural, 454 'msgstr' => $msgstr_plural, 455 'reference' => $reference, 456 'flags' => $flags, 457 'comment' => $comment, 458 'previous' => $previous, 459 'automatic' => $automatic, 460 'plural' => 1 461 ); 462 } else { 463 $msgstr = $buffer; 464 465 $msgid = unquote_text($msgid) if ( defined($msgid) ); 466 $msgstr = unquote_text($msgstr) if ( defined($msgstr) ); 467 468 $self->push_raw( 469 'msgid' => $msgid, 470 'msgstr' => $msgstr, 471 'reference' => $reference, 472 'flags' => $flags, 473 'comment' => $comment, 474 'previous' => $previous, 475 'automatic' => $automatic 476 ); 477 } 478 } 479} 480 481=item write($) 482 483Writes the current catalog to the given file. 484 485=cut 486 487sub write { 488 my $self = shift; 489 my $filename = shift 490 or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n"; 491 492 my $fh; 493 if ( $filename eq '-' ) { 494 $fh = \*STDOUT; 495 } else { 496 497 # make sure the directory in which we should write the localized 498 # file exists 499 my $dir = $filename; 500 if ( $dir =~ m|/| ) { 501 $dir =~ s|/[^/]*$||; 502 503 File::Path::mkpath( $dir, 0, 0755 ) # Croaks on error 504 if ( length($dir) && !-e $dir ); 505 } 506 open $fh, ">$filename" 507 or croak wrap_mod( "po4a::po", dgettext( "po4a", "Cannot write to %s: %s" ), $filename, $! ); 508 } 509 510 print $fh "" . format_comment( $self->{header_comment}, "" ) 511 if length( $self->{header_comment} ); 512 513 print $fh "msgid \"\"\n"; 514 print $fh "msgstr " . quote_text( $self->{header}, $self->{options}{'wrap-po'} ) . "\n\n"; 515 516 my $buf_msgstr_plural; # Used to keep the first msgstr of plural forms 517 my $first = 1; 518 foreach my $msgid ( sort { ( $self->{po}{"$a"}{'pos'} ) <=> ( $self->{po}{"$b"}{'pos'} ) } keys %{ $self->{po} } ) { 519 my $output = ""; 520 521 if ($first) { 522 $first = 0; 523 } else { 524 $output .= "\n"; 525 } 526 527 $output .= format_comment( $self->{po}{$msgid}{'comment'}, "" ) 528 if length( $self->{po}{$msgid}{'comment'} ); 529 if ( length( $self->{po}{$msgid}{'automatic'} ) ) { 530 foreach my $comment ( split( /\\n/, $self->{po}{$msgid}{'automatic'} ) ) { 531 $output .= format_comment( $comment, ". " ); 532 } 533 } 534 $output .= format_comment( $self->{po}{$msgid}{'type'}, ". type: " ) 535 if length( $self->{po}{$msgid}{'type'} ); 536 537 if ( length( $self->{po}{$msgid}{'reference'} ) ) { 538 my $output_ref = wrap( $self->{po}{$msgid}{'reference'} ); 539 $output_ref =~ s/\s+$//mg; 540 $output .= format_comment( $output_ref, ": " ); 541 } 542 $output .= "#, " . join( ", ", sort split( /\s+/, $self->{po}{$msgid}{'flags'} ) ) . "\n" 543 if length( $self->{po}{$msgid}{'flags'} ); 544 $output .= format_comment( $self->{po}{$msgid}{'previous'}, "| " ) 545 if length( $self->{po}{$msgid}{'previous'} ); 546 547 if ( exists $self->{po}{$msgid}{'plural'} ) { 548 if ( $self->{po}{$msgid}{'plural'} == 0 ) { 549 if ( $self->get_charset =~ /^utf-8$/i ) { 550 my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); 551 $msgid = Encode::decode_utf8($msgid); 552 $output .= 553 Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); 554 $buf_msgstr_plural = 555 Encode::encode_utf8( "msgstr[0] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); 556 } else { 557 $output = "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; 558 $buf_msgstr_plural = 559 "msgstr[0] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; 560 } 561 } elsif ( $self->{po}{$msgid}{'plural'} == 1 ) { 562 563 # TODO: there may be only one plural form 564 if ( $self->get_charset =~ /^utf-8$/i ) { 565 my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); 566 $msgid = Encode::decode_utf8($msgid); 567 $output = 568 Encode::encode_utf8( "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); 569 $output .= $buf_msgstr_plural; 570 $output .= 571 Encode::encode_utf8( "msgstr[1] " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); 572 $buf_msgstr_plural = ""; 573 } else { 574 $output = "msgid_plural " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; 575 $output .= $buf_msgstr_plural; 576 $output .= 577 "msgstr[1] " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; 578 } 579 } else { 580 die wrap_msg( dgettext( "po4a", "Cannot write PO files with more than two plural forms." ) ); 581 } 582 } else { 583 if ( $self->get_charset =~ /^utf-8$/i ) { 584 my $msgstr = Encode::decode_utf8( $self->{po}{$msgid}{'msgstr'} ); 585 $msgid = Encode::decode_utf8($msgid); 586 $output .= Encode::encode_utf8( "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n" ); 587 $output .= Encode::encode_utf8( "msgstr " . quote_text( $msgstr, $self->{options}{'wrap-po'} ) . "\n" ); 588 } else { 589 $output .= "msgid " . quote_text( $msgid, $self->{options}{'wrap-po'} ) . "\n"; 590 $output .= "msgstr " . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ) . "\n"; 591 } 592 } 593 594 print $fh $output; 595 } 596 print $fh join( "\n\n", @{ $self->{footer} } ) if scalar @{ $self->{footer} }; 597 598 # print STDERR "$fh"; 599 # if ($filename ne '-') { 600 # close $fh 601 # or croak (sprintf(dgettext("po4a", 602 # "Cannot close %s after writing: %s\n"), 603 # $filename,$!)); 604 # } 605} 606 607=item write_if_needed($$) 608 609Like write, but if the PO or POT file already exists, the object will be 610written in a temporary file which will be compared with the existing file 611to check if the update is needed (this avoids to change a POT just to 612update a line reference or the POT-Creation-Date field). 613 614=cut 615 616sub move_po_if_needed { 617 my ( $new_po, $old_po, $backup ) = ( shift, shift, shift ); 618 my $diff; 619 620 if ( -e $old_po ) { 621 $diff = qx(diff -q -I'^#:' -I'^\"POT-Creation-Date:' -I'^\"PO-Revision-Date:' $old_po $new_po); 622 if ( $diff eq "" ) { 623 unlink $new_po 624 or die wrap_msg( dgettext( "po4a", "Cannot unlink %s: %s." ), $new_po, $! ); 625 626 # touch the old PO 627 my ( $atime, $mtime ) = ( time, time ); 628 utime $atime, $mtime, $old_po; 629 } else { 630 move $new_po, $old_po 631 or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! ); 632 } 633 } else { 634 move $new_po, $old_po 635 or die wrap_msg( dgettext( "po4a", "Cannot move %s to %s: %s." ), $new_po, $old_po, $! ); 636 } 637} 638 639sub write_if_needed { 640 my $self = shift; 641 my $filename = shift 642 or croak dgettext( "po4a", "Cannot write to a file without filename" ) . "\n"; 643 644 if ( -e $filename ) { 645 my ($tmp_filename); 646 my $basename = basename($filename); 647 ( undef, $tmp_filename ) = File::Temp::tempfile( 648 $basename . "XXXX", 649 DIR => $ENV{TMPDIR} || "/tmp", 650 OPEN => 0, 651 UNLINK => 0 652 ); 653 $self->write($tmp_filename); 654 move_po_if_needed( $tmp_filename, $filename ); 655 } else { 656 $self->write($filename); 657 } 658} 659 660=item gettextize($$) 661 662This function produces one translated message catalog from two catalogs, an 663original and a translation. This process is described in L<po4a(7)|po4a.7>, 664section I<Gettextization: how does it work?>. 665 666=cut 667 668sub gettextize { 669 my $this = shift; 670 my $class = ref($this) || $this; 671 my ( $poorig, $potrans ) = ( shift, shift ); 672 673 my $pores = Locale::Po4a::Po->new(); 674 675 my $please_fail = 0; 676 my $toobad = dgettext( "po4a", 677 "\nThe gettextization failed (once again). Don't give up, " 678 . "gettextizing is a subtle art, but this is only needed once " 679 . "to convert a project to the gorgeous luxus offered by po4a " 680 . "to translators." 681 . "\nPlease refer to the po4a(7) documentation, the section " 682 . "\"HOWTO convert a pre-existing translation to po4a?\" " 683 . "contains several hints to help you in your task" ); 684 685 # Don't fail right now when the entry count does not match. Instead, give 686 # it a try so that the user can see where we fail (which is probably where 687 # the problem is). 688 if ( $poorig->count_entries_doc() > $potrans->count_entries_doc() ) { 689 warn wrap_mod( 690 "po4a gettextize", 691 dgettext( 692 "po4a", 693 "Original has more strings than the translation (%d>%d). " 694 . "Please fix it by editing the translated version to add " 695 . "some dummy entry." 696 ), 697 $poorig->count_entries_doc(), 698 $potrans->count_entries_doc() 699 ); 700 $please_fail = 1; 701 } elsif ( $poorig->count_entries_doc() < $potrans->count_entries_doc() ) { 702 warn wrap_mod( 703 "po4a gettextize", 704 dgettext( 705 "po4a", 706 "Original has less strings than the translation (%d<%d). " 707 . "Please fix it by removing the extra entry from the " 708 . "translated file. You may need an addendum (cf po4a(7)) " 709 . "to reput the chunk in place after gettextization. A " 710 . "possible cause is that a text duplicated in the original " 711 . "is not translated the same way each time. Remove one of " 712 . "the translations, and you're fine." 713 ), 714 $poorig->count_entries_doc(), 715 $potrans->count_entries_doc() 716 ); 717 $please_fail = 1; 718 } 719 720 if ( $poorig->get_charset =~ /^utf-8$/i ) { 721 $potrans->to_utf8; 722 $pores->set_charset("UTF-8"); 723 } else { 724 my $charset = $potrans->get_charset(); 725 $charset = "UTF-8" if $charset eq "CHARSET"; 726 $pores->set_charset($charset); 727 } 728 print "Po character sets:\n" 729 . " original=" 730 . $poorig->get_charset . "\n" 731 . " translated=" 732 . $potrans->get_charset . "\n" 733 . " result=" 734 . $pores->get_charset . "\n" 735 if $debug{'encoding'}; 736 737 for ( 738 my ( $o, $t ) = ( 0, 0 ) ; 739 $o < $poorig->count_entries_doc() && $t < $potrans->count_entries_doc() ; 740 $o++, $t++ 741 ) 742 { 743 # 744 # Extract some informations 745 746 my ( $orig, $trans ) = ( $poorig->msgid_doc($o), $potrans->msgid_doc($t) ); 747 748 # print STDERR "Matches [[$orig]]<<$trans>>\n"; 749 750 my ( $reforig, $reftrans ) = ( $poorig->{po}{$orig}{'reference'}, $potrans->{po}{$trans}{'reference'} ); 751 my ( $typeorig, $typetrans ) = ( $poorig->{po}{$orig}{'type'}, $potrans->{po}{$trans}{'type'} ); 752 753 # 754 # Make sure the type of both string exist 755 # 756 die wrap_mod( "po4a gettextize", "Internal error: type of original string number %s " . "isn't provided", $o ) 757 if ( $typeorig eq '' ); 758 759 die wrap_mod( "po4a gettextize", "Internal error: type of translated string number %s " . "isn't provided", $o ) 760 if ( $typetrans eq '' ); 761 762 # 763 # Make sure both type are the same 764 # 765 if ( $typeorig ne $typetrans ) { 766 $pores->write("gettextization.failed.po"); 767 eval { 768 # Recode $trans into current charset, if possible 769 require I18N::Langinfo; 770 I18N::Langinfo->import(qw(langinfo CODESET)); 771 my $codeset = langinfo( CODESET() ); 772 Encode::from_to( $trans, $potrans->get_charset, $codeset ); 773 }; 774 die wrap_msg( 775 dgettext( "po4a", 776 "po4a gettextization: Structure disparity between " 777 . "original and translated files:\n" 778 . "msgid (at %s) is of type '%s' while\n" 779 . "msgstr (at %s) is of type '%s'.\n" 780 . "Original text: %s\n" 781 . "Translated text: %s\n" 782 . "(result so far dumped to gettextization.failed.po)" ) 783 . "%s", 784 $reforig, 785 $typeorig, 786 $reftrans, 787 $typetrans, 788 $orig, $trans, $toobad 789 ); 790 } 791 792 # 793 # Push the entry 794 # 795 my $flags; 796 if ( defined $poorig->{po}{$orig}{'flags'} ) { 797 $flags = $poorig->{po}{$orig}{'flags'} . " fuzzy"; 798 } else { 799 $flags = "fuzzy"; 800 } 801 $pores->push_raw( 802 'msgid' => $orig, 803 'msgstr' => $trans, 804 'flags' => $flags, 805 'type' => $typeorig, 806 'reference' => $reforig, 807 'conflict' => 1, 808 'transref' => $potrans->{po}{$trans}{'reference'} 809 ) 810 unless ( defined( $pores->{po}{$orig} ) 811 and ( $pores->{po}{$orig}{'msgstr'} eq $trans ) ) 812 813 # FIXME: maybe we should be smarter about what reference should be 814 # sent to push_raw. 815 } 816 817 # make sure we return a useful error message when entry count differ 818 die "$toobad\n" if $please_fail; 819 820 return $pores; 821} 822 823=item filter($) 824 825This function extracts a catalog from an existing one. Only the entries having 826a reference in the given file will be placed in the resulting catalog. 827 828This function parses its argument, converts it to a Perl function definition, 829evals this definition and filters the fields for which this function returns 830true. 831 832I love Perl sometimes ;) 833 834=cut 835 836sub filter { 837 my $self = shift; 838 our $filter = shift; 839 840 my $res; 841 $res = Locale::Po4a::Po->new(); 842 843 # Parse the filter 844 our $code = "sub apply { return "; 845 our $pos = 0; 846 our $length = length $filter; 847 848 # explode chars to parts. How to subscript a string in Perl? 849 our @filter = split( //, $filter ); 850 851 sub gloups { 852 my $fmt = shift; 853 my $space = ""; 854 for ( 1 .. $pos ) { 855 $space .= ' '; 856 } 857 die wrap_msg("$fmt\n$filter\n$space^ HERE"); 858 } 859 860 sub showmethecode { 861 return unless $debug{'filter'}; 862 my $fmt = shift; 863 my $space = ""; 864 for ( 1 .. $pos ) { 865 $space .= ' '; 866 } 867 print STDERR "$filter\n$space^ $fmt\n"; #"$code\n"; 868 } 869 870 # I dream of a lex in perl :-/ 871 sub parse_expression { 872 showmethecode("Begin expression") 873 if $debug{'filter'}; 874 875 gloups( "Begin of expression expected, got '%s'", $filter[$pos] ) 876 unless ( $filter[$pos] eq '(' ); 877 $pos++; # pass the '(' 878 if ( $filter[$pos] eq '&' ) { 879 880 # AND 881 $pos++; 882 showmethecode("Begin of AND") 883 if $debug{'filter'}; 884 $code .= "("; 885 while (1) { 886 gloups("Unfinished AND statement.") 887 if ( $pos == $length ); 888 parse_expression(); 889 if ( $filter[$pos] eq '(' ) { 890 $code .= " && "; 891 } elsif ( $filter[$pos] eq ')' ) { 892 last; # do not eat that char 893 } else { 894 gloups( "End of AND or begin of sub-expression expected, got '%s'", $filter[$pos] ); 895 } 896 } 897 $code .= ")"; 898 } elsif ( $filter[$pos] eq '|' ) { 899 900 # OR 901 $pos++; 902 $code .= "("; 903 while (1) { 904 gloups("Unfinished OR statement.") 905 if ( $pos == $length ); 906 parse_expression(); 907 if ( $filter[$pos] eq '(' ) { 908 $code .= " || "; 909 } elsif ( $filter[$pos] eq ')' ) { 910 last; # do not eat that char 911 } else { 912 gloups( "End of OR or begin of sub-expression expected, got '%s'", $filter[$pos] ); 913 } 914 } 915 $code .= ")"; 916 } elsif ( $filter[$pos] eq '!' ) { 917 918 # NOT 919 $pos++; 920 $code .= "(!"; 921 gloups("Missing sub-expression in NOT statement.") 922 if ( $pos == $length ); 923 parse_expression(); 924 $code .= ")"; 925 } else { 926 927 # must be an equal. Let's get field and argument 928 my ( $field, $arg, $done ); 929 $field = substr( $filter, $pos ); 930 gloups("EQ statement contains no '=' or invalid field name") 931 unless ( $field =~ /([a-z]*)=/i ); 932 $field = lc($1); 933 $pos += ( length $field ) + 1; 934 935 # check that we've got a valid field name, 936 # and the number it referes to 937 # DO NOT CHANGE THE ORDER 938 my @names = qw(msgid msgstr reference flags comment previous automatic); 939 my $fieldpos; 940 for ( $fieldpos = 0 ; $fieldpos < scalar @names && $field ne $names[$fieldpos] ; $fieldpos++ ) { } 941 gloups( "Invalid field name: %s", $field ) 942 if $fieldpos == scalar @names; # not found 943 944 # Now, get the argument value. It has to be between quotes, 945 # which can be escaped 946 # We point right on the first char of the argument 947 # (first quote already eaten) 948 my $escaped = 0; 949 my $quoted = 0; 950 if ( $filter[$pos] eq '"' ) { 951 $pos++; 952 $quoted = 1; 953 } 954 showmethecode( ( $quoted ? "Quoted" : "Unquoted" ) . " argument of field '$field'" ) 955 if $debug{'filter'}; 956 957 while ( !$done ) { 958 gloups("Unfinished EQ argument.") 959 if ( $pos == $length ); 960 961 if ($quoted) { 962 if ( $filter[$pos] eq '\\' ) { 963 if ($escaped) { 964 $arg .= '\\'; 965 $escaped = 0; 966 } else { 967 $escaped = 1; 968 } 969 } elsif ($escaped) { 970 if ( $filter[$pos] eq '"' ) { 971 $arg .= '"'; 972 $escaped = 0; 973 } else { 974 gloups( "Invalid escape sequence in argument: '\\%s'", $filter[$pos] ); 975 } 976 } else { 977 if ( $filter[$pos] eq '"' ) { 978 $done = 1; 979 } else { 980 $arg .= $filter[$pos]; 981 } 982 } 983 } else { 984 if ( $filter[$pos] eq ')' ) { 985 986 # counter the next ++ since we don't want to eat 987 # this char 988 $pos--; 989 $done = 1; 990 } else { 991 $arg .= $filter[$pos]; 992 } 993 } 994 $pos++; 995 } 996 997 # and now, add the code to check this equality 998 $code .= "(\$_[$fieldpos] =~ m{$arg})"; 999 1000 } 1001 showmethecode("End of expression") 1002 if $debug{'filter'}; 1003 gloups("Unfinished statement.") 1004 if ( $pos == $length ); 1005 gloups( "End of expression expected, got '%s'", $filter[$pos] ) 1006 unless ( $filter[$pos] eq ')' ); 1007 $pos++; 1008 } 1009 1010 # And now, launch the beast, finish the function and use eval 1011 # to construct this function. 1012 # Ok, the lack of lexer is a fair price for the eval ;) 1013 parse_expression(); 1014 gloups("Garbage at the end of the expression") 1015 if ( $pos != $length ); 1016 $code .= "; }"; 1017 print STDERR "CODE = $code\n" 1018 if $debug{'filter'}; 1019 eval $code; 1020 die wrap_mod( "po4a::po", dgettext( "po4a", "Evaluating the provided filter failed: %s" ), $@ ) 1021 if $@; 1022 1023 for ( my $cpt = (0) ; $cpt < $self->count_entries() ; $cpt++ ) { 1024 1025 my ( $msgid, $ref, $msgstr, $flags, $type, $comment, $previous, $automatic ); 1026 1027 $msgid = $self->msgid($cpt); 1028 $ref = $self->{po}{$msgid}{'reference'}; 1029 1030 $msgstr = $self->{po}{$msgid}{'msgstr'}; 1031 $flags = $self->{po}{$msgid}{'flags'}; 1032 $type = $self->{po}{$msgid}{'type'}; 1033 $comment = $self->{po}{$msgid}{'comment'}; 1034 $previous = $self->{po}{$msgid}{'previous'}; 1035 $automatic = $self->{po}{$msgid}{'automatic'}; 1036 1037 # DO NOT CHANGE THE ORDER 1038 $res->push_raw( 1039 'msgid' => $msgid, 1040 'msgstr' => $msgstr, 1041 'flags' => $flags, 1042 'type' => $type, 1043 'reference' => $ref, 1044 'comment' => $comment, 1045 'previous' => $previous, 1046 'automatic' => $automatic 1047 ) if ( apply( $msgid, $msgstr, $ref, $flags, $comment, $previous, $automatic ) ); 1048 } 1049 1050 # delete the apply subroutine 1051 # otherwise it will be redefined. 1052 undef &apply; 1053 return $res; 1054} 1055 1056=item to_utf8() 1057 1058Recodes to UTF-8 the PO's msgstrs. Does nothing if the charset is not 1059specified in the PO file ("CHARSET" value), or if it's already UTF-8 or 1060ASCII. 1061 1062=cut 1063 1064sub to_utf8 { 1065 my $this = shift; 1066 my $charset = $this->get_charset(); 1067 1068 unless ( $charset eq "CHARSET" 1069 or $charset =~ /^ascii$/i 1070 or $charset =~ /^utf-8$/i ) 1071 { 1072 foreach my $msgid ( keys %{ $this->{po} } ) { 1073 Encode::from_to( $this->{po}{$msgid}{'msgstr'}, $charset, "utf-8" ); 1074 } 1075 $this->set_charset("UTF-8"); 1076 } 1077} 1078 1079=back 1080 1081=head1 Functions to use a message catalog for translations 1082 1083=over 4 1084 1085=item gettext($%) 1086 1087Request the translation of the string given as argument in the current catalog. 1088The function returns the original (untranslated) string if the string was not 1089found. 1090 1091After the string to translate, you can pass a hash of extra 1092arguments. Here are the valid entries: 1093 1094=over 1095 1096=item B<wrap> 1097 1098boolean indicating whether we can consider that whitespaces in string are 1099not important. If yes, the function canonizes the string before looking for 1100a translation, and wraps the result. 1101 1102=item B<wrapcol> 1103 1104the column at which we should wrap (default: 76). 1105 1106=back 1107 1108=cut 1109 1110sub gettext { 1111 my $self = shift; 1112 my $text = shift; 1113 my (%opt) = @_; 1114 my $res; 1115 1116 return "" unless length($text); # Avoid returning the header. 1117 my $validoption = "reference wrap wrapcol"; 1118 my %validoption; 1119 1120 map { $validoption{$_} = 1 } ( split( / /, $validoption ) ); 1121 foreach ( keys %opt ) { 1122 Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n" 1123 unless $validoption{$_}; 1124 } 1125 1126 $text = canonize($text) 1127 if ( $opt{'wrap'} ); 1128 1129 my $esc_text = escape_text($text); 1130 1131 $self->{gettextqueries}++; 1132 1133 if ( 1134 defined $self->{po}{$esc_text} 1135 and defined $self->{po}{$esc_text}{'msgstr'} 1136 and length $self->{po}{$esc_text}{'msgstr'} 1137 and ( not defined $self->{po}{$esc_text}{'flags'} 1138 or $self->{po}{$esc_text}{'flags'} !~ /fuzzy/ ) 1139 ) 1140 { 1141 1142 $self->{gettexthits}++; 1143 $res = unescape_text( $self->{po}{$esc_text}{'msgstr'} ); 1144 if ( defined $self->{po}{$esc_text}{'plural'} ) { 1145 if ( $self->{po}{$esc_text}{'plural'} eq "0" ) { 1146 warn wrap_mod( 1147 "po4a gettextize", 1148 dgettext( 1149 "po4a", 1150 "'%s' is the singular form of a message, " . "po4a will use the msgstr[0] translation (%s)." 1151 ), 1152 $esc_text, 1153 $res 1154 ); 1155 } else { 1156 warn wrap_mod( 1157 "po4a gettextize", 1158 dgettext( 1159 "po4a", 1160 "'%s' is the plural form of a message, " . "po4a will use the msgstr[1] translation (%s)." 1161 ), 1162 $esc_text, 1163 $res 1164 ); 1165 } 1166 } 1167 } else { 1168 $res = $text; 1169 } 1170 1171 if ( $opt{'wrap'} ) { 1172 if ( $self->get_charset =~ /^utf-8$/i ) { 1173 $res = Encode::decode_utf8($res); 1174 $res = wrap( $res, $opt{'wrapcol'} || 76 ); 1175 $res = Encode::encode_utf8($res); 1176 } else { 1177 $res = wrap( $res, $opt{'wrapcol'} || 76 ); 1178 } 1179 } 1180 1181 # print STDERR "Gettext >>>$text<<<(escaped=$esc_text)=[[[$res]]]\n\n"; 1182 return $res; 1183} 1184 1185=item stats_get() 1186 1187Returns statistics about the hit ratio of gettext since the last time that 1188stats_clear() was called. Please note that it's not the same 1189statistics than the one printed by msgfmt --statistic. Here, it's statistics 1190about recent usage of the PO file, while msgfmt reports the status of the 1191file. Example of use: 1192 1193 [some use of the PO file to translate stuff] 1194 1195 ($percent,$hit,$queries) = $pofile->stats_get(); 1196 print "So far, we found translations for $percent\% ($hit of $queries) of strings.\n"; 1197 1198=cut 1199 1200sub stats_get() { 1201 my $self = shift; 1202 my ( $h, $q ) = ( $self->{gettexthits}, $self->{gettextqueries} ); 1203 my $p = ( $q == 0 ? 100 : int( $h / $q * 10000 ) / 100 ); 1204 1205 # $p =~ s/\.00//; 1206 # $p =~ s/(\..)0/$1/; 1207 1208 return ( $p, $h, $q ); 1209} 1210 1211=item stats_clear() 1212 1213Clears the statistics about gettext hits. 1214 1215=cut 1216 1217sub stats_clear { 1218 my $self = shift; 1219 $self->{gettextqueries} = 0; 1220 $self->{gettexthits} = 0; 1221} 1222 1223=back 1224 1225=head1 Functions to build a message catalog 1226 1227=over 4 1228 1229=item push(%) 1230 1231Push a new entry at the end of the current catalog. The arguments should 1232form a hash table. The valid keys are: 1233 1234=over 4 1235 1236=item B<msgid> 1237 1238the string in original language. 1239 1240=item B<msgstr> 1241 1242the translation. 1243 1244=item B<reference> 1245 1246an indication of where this string was found. Example: file.c:46 (meaning 1247in 'file.c' at line 46). It can be a space-separated list in case of 1248multiple occurrences. 1249 1250=item B<comment> 1251 1252a comment added here manually (by the translators). The format here is free. 1253 1254=item B<automatic> 1255 1256a comment which was automatically added by the string extraction 1257program. See the B<--add-comments> option of the B<xgettext> program for 1258more information. 1259 1260=item B<flags> 1261 1262space-separated list of all defined flags for this entry. 1263 1264Valid flags are: B<c-text>, B<python-text>, B<lisp-text>, B<elisp-text>, B<librep-text>, 1265B<smalltalk-text>, B<java-text>, B<awk-text>, B<object-pascal-text>, B<ycp-text>, 1266B<tcl-text>, B<wrap>, B<no-wrap> and B<fuzzy>. 1267 1268See the gettext documentation for their meaning. 1269 1270=item B<type> 1271 1272this is mostly an internal argument: it is used while gettextizing 1273documents. The idea here is to parse both the original and the translation 1274into a PO object, and merge them, using one's msgid as msgid and the 1275other's msgid as msgstr. To make sure that things get ok, each msgid in PO 1276objects are given a type, based on their structure (like "chapt", "sect1", 1277"p" and so on in DocBook). If the types of strings are not the same, that 1278means that both files do not share the same structure, and the process 1279reports an error. 1280 1281This information is written as automatic comment in the PO file since this 1282gives to translators some context about the strings to translate. 1283 1284=item B<wrap> 1285 1286boolean indicating whether whitespaces can be mangled in cosmetic 1287reformattings. If true, the string is canonized before use. 1288 1289This information is written to the PO file using the B<wrap> or B<no-wrap> flag. 1290 1291=item B<wrapcol> 1292 1293the column at which we should wrap (default: 76). 1294 1295This information is not written to the PO file. 1296 1297=back 1298 1299=cut 1300 1301sub push { 1302 my $self = shift; 1303 my %entry = @_; 1304 1305 my $validoption = "wrap wrapcol type msgid msgstr automatic previous flags reference"; 1306 my %validoption; 1307 1308 map { $validoption{$_} = 1 } ( split( / /, $validoption ) ); 1309 foreach ( keys %entry ) { 1310 Carp::confess "internal error: unknown arg $_.\n" . "Here are the valid options: $validoption.\n" 1311 unless $validoption{$_}; 1312 } 1313 1314 unless ( $entry{'wrap'} ) { 1315 $entry{'flags'} .= " no-wrap"; 1316 } 1317 if ( defined( $entry{'msgid'} ) ) { 1318 $entry{'msgid'} = canonize( $entry{'msgid'} ) 1319 if ( $entry{'wrap'} ); 1320 1321 $entry{'msgid'} = escape_text( $entry{'msgid'} ); 1322 } 1323 if ( defined( $entry{'msgstr'} ) ) { 1324 $entry{'msgstr'} = canonize( $entry{'msgstr'} ) 1325 if ( $entry{'wrap'} ); 1326 1327 $entry{'msgstr'} = escape_text( $entry{'msgstr'} ); 1328 } 1329 1330 $self->push_raw(%entry); 1331} 1332 1333# The same as push(), but assuming that msgid and msgstr are already escaped 1334sub push_raw { 1335 my $self = shift; 1336 my %entry = @_; 1337 my ( $msgid, $msgstr, $reference, $comment, $automatic, $previous, $flags, $type, $transref ) = ( 1338 $entry{'msgid'}, $entry{'msgstr'}, $entry{'reference'}, $entry{'comment'}, $entry{'automatic'}, 1339 $entry{'previous'}, $entry{'flags'}, $entry{'type'}, $entry{'transref'} 1340 ); 1341 my $keep_conflict = $entry{'conflict'}; 1342 1343 # print STDERR "Push_raw\n"; 1344 # print STDERR " msgid=>>>$msgid<<<\n" if $msgid; 1345 # print STDERR " msgstr=[[[$msgstr]]]\n" if $msgstr; 1346 # Carp::cluck " flags=$flags\n" if $flags; 1347 1348 return unless defined( $entry{'msgid'} ); 1349 1350 # no msgid => header definition 1351 unless ( length( $entry{'msgid'} ) ) { 1352 1353 # if (defined($self->{header}) && $self->{header} =~ /\S/) { 1354 # warn dgettext("po4a","Redefinition of the header. ". 1355 # "The old one will be discarded\n"); 1356 # } FIXME: do that iff the header isn't the default one. 1357 $self->{header} = $msgstr; 1358 $self->{header_comment} = $comment; 1359 my $charset = $self->get_charset; 1360 if ( $charset ne "CHARSET" ) { 1361 $self->{encoder} = find_encoding($charset); 1362 } else { 1363 $self->{encoder} = find_encoding("UTF-8"); 1364 } 1365 return; 1366 } 1367 1368 if ( $self->{options}{'porefs'} =~ m/^never/ ) { 1369 $reference = ""; 1370 } elsif ( $self->{options}{'porefs'} =~ m/^counter/ ) { 1371 if ( $reference =~ m/^(.+?)(?=\S+:\d+)/g ) { 1372 my $new_ref = $1; 1373 1 while $reference =~ s{ # x modifier is added to add formatting and improve readability 1374 \G(\s*)(\S+):\d+ # \G is the last match in m//g (see also the (?=) syntax above) 1375 # $2 is the file name 1376 }{ 1377 $self->{counter}{$2} ||= 0, # each file has its own counter 1378 ++$self->{counter}{$2}, # increment it 1379 $new_ref .= "$1$2:".$self->{counter}{$2} # replace line number by this counter 1380 }gex && pos($reference); 1381 $reference = $new_ref; 1382 } 1383 } elsif ( $self->{options}{'porefs'} =~ m/^file/ ) { 1384 $reference =~ s/:\d+//g; 1385 } 1386 1387 if ( defined( $self->{po}{$msgid} ) ) { 1388 warn wrap_mod( "po4a::po", dgettext( "po4a", "msgid defined twice: %s" ), $msgid ) 1389 if (0); # FIXME: put a verbose stuff 1390 if ( defined $msgstr 1391 and defined $self->{po}{$msgid}{'msgstr'} 1392 and $self->{po}{$msgid}{'msgstr'} ne $msgstr ) 1393 { 1394 my $txt = quote_text( $msgid, $self->{options}{'wrap-po'} ); 1395 my ( $first, $second ) = ( 1396 format_comment( ". ", $self->{po}{$msgid}{'reference'} ) 1397 . quote_text( $self->{po}{$msgid}{'msgstr'}, $self->{options}{'wrap-po'} ), 1398 1399 format_comment( ". ", $reference ) . quote_text($msgstr), $self->{options}{'wrap-po'} 1400 ); 1401 1402 if ($keep_conflict) { 1403 if ( $self->{po}{$msgid}{'msgstr'} =~ m/^#-#-#-#-# .* #-#-#-#-#\\n/s ) { 1404 $msgstr = $self->{po}{$msgid}{'msgstr'} . "\\n#-#-#-#-# $transref #-#-#-#-#\\n" . $msgstr; 1405 } else { 1406 $msgstr = 1407 "#-#-#-#-# " 1408 . $self->{po}{$msgid}{'transref'} 1409 . " #-#-#-#-#\\n" 1410 . $self->{po}{$msgid}{'msgstr'} . "\\n" 1411 . "#-#-#-#-# $transref #-#-#-#-#\\n" 1412 . $msgstr; 1413 } 1414 1415 # Every msgid will have the same list of references. 1416 # Only keep the last list. 1417 $self->{po}{$msgid}{'reference'} = ""; 1418 } else { 1419 warn wrap_msg( 1420 dgettext( 1421 "po4a", 1422 "Translations don't match for:\n" . "%s\n" 1423 . "-->First translation:\n" . "%s\n" 1424 . " Second translation:\n" . "%s\n" 1425 . " Old translation discarded." 1426 ), 1427 $txt, $first, $second 1428 ); 1429 } 1430 } 1431 } 1432 if ( defined $transref ) { 1433 $self->{po}{$msgid}{'transref'} = $transref; 1434 } 1435 if ( length($reference) ) { 1436 if ( defined $self->{po}{$msgid}{'reference'} ) { 1437 1438 # Only add the new reference if it's not already included in the existing string 1439 # It'd be much easier if $self->{po}{$msgid}{'reference'} were an array instead of a joined string... 1440 my $oldref = $self->{po}{$msgid}{'reference'}; 1441 $self->{po}{$msgid}{'reference'} .= " " . $reference 1442 unless ( ( $oldref =~ m/ $reference / ) 1443 || ( $oldref =~ m/ $reference$/ ) 1444 || ( $oldref =~ m/^$reference$/ ) 1445 || ( $oldref =~ m/^$reference / ) ); 1446 } else { 1447 $self->{po}{$msgid}{'reference'} = $reference; 1448 } 1449 } 1450 $self->{po}{$msgid}{'msgstr'} = $msgstr; 1451 $self->{po}{$msgid}{'comment'} = $comment; 1452 $self->{po}{$msgid}{'automatic'} = $automatic; 1453 $self->{po}{$msgid}{'previous'} = $previous; 1454 if ( defined( $self->{po}{$msgid}{'pos_doc'} ) ) { 1455 $self->{po}{$msgid}{'pos_doc'} .= " " . $self->{count_doc}++; 1456 } else { 1457 $self->{po}{$msgid}{'pos_doc'} = $self->{count_doc}++; 1458 } 1459 unless ( defined( $self->{po}{$msgid}{'pos'} ) ) { 1460 $self->{po}{$msgid}{'pos'} = $self->{count}++; 1461 } 1462 $self->{po}{$msgid}{'type'} = $type; 1463 $self->{po}{$msgid}{'plural'} = $entry{'plural'} 1464 if defined $entry{'plural'}; 1465 1466 if ( defined($flags) ) { 1467 $flags = " $flags "; 1468 $flags =~ s/,/ /g; 1469 foreach my $flag (@known_flags) { 1470 if ( index( $flags, " $flag " ) != -1 ) { # if flag to be set 1471 unless ( defined( $self->{po}{$msgid}{'flags'} ) 1472 && $self->{po}{$msgid}{'flags'} =~ /\b$flag\b/ ) 1473 { 1474 # flag not already set 1475 if ( defined $self->{po}{$msgid}{'flags'} ) { 1476 $self->{po}{$msgid}{'flags'} .= " " . $flag; 1477 } else { 1478 $self->{po}{$msgid}{'flags'} = $flag; 1479 } 1480 } 1481 } 1482 } 1483 } 1484 1485 # print STDERR "stored ((($msgid)))=>(((".$self->{po}{$msgid}{'msgstr'}.")))\n\n"; 1486 1487} 1488 1489=back 1490 1491=head1 Miscellaneous functions 1492 1493=over 4 1494 1495=item count_entries() 1496 1497Returns the number of entries in the catalog (without the header). 1498 1499=cut 1500 1501sub count_entries($) { 1502 my $self = shift; 1503 return $self->{count}; 1504} 1505 1506=item count_entries_doc() 1507 1508Returns the number of entries in document. If a string appears multiple times 1509in the document, it will be counted multiple times. 1510 1511=cut 1512 1513sub count_entries_doc($) { 1514 my $self = shift; 1515 return $self->{count_doc}; 1516} 1517 1518=item equals_msgid(po) 1519 1520Returns ($uptodate, $diagnostic) with $uptodate indicating whether all msgid of the current po file are 1521also present in the one passed as parameter (all other fields are ignored in the file comparison). 1522Informally, if $uptodate returns false, then the po files would be changed when going through B<po4a-updatepo>. 1523 1524If $uptodate is false, then $diagnostic contains a diagnostic of why this is so. 1525 1526=cut 1527 1528sub equals_msgid($$) { 1529 my ( $self, $other ) = ( shift, shift ); 1530 1531 unless ( $self->count_entries() == $other->count_entries() ) { 1532 return ( 1533 0, 1534 wrap_msg( 1535 dgettext( "po4a", "The amount of entries differ between files: %d is not %d" ), 1536 $self->count_entries(), 1537 $other->count_entries() 1538 ) 1539 ); 1540 } 1541 foreach my $msgid ( keys %{ $self->{po} } ) { 1542 unless ( defined( $self->{po}{$msgid} ) && defined( $other->{po}{$msgid} ) ) { 1543 return ( 0, wrap_msg( dgettext( "po4a", "msgid declared in one file only: %s\n" ), $msgid ) ); 1544 } 1545 } 1546 return ( 1, "" ); 1547} 1548 1549=item msgid($) 1550 1551Returns the msgid of the given number. 1552 1553=cut 1554 1555sub msgid($$) { 1556 my $self = shift; 1557 my $num = shift; 1558 1559 foreach my $msgid ( keys %{ $self->{po} } ) { 1560 return $msgid if ( $self->{po}{$msgid}{'pos'} eq $num ); 1561 } 1562 return undef; 1563} 1564 1565=item msgid_doc($) 1566 1567Returns the msgid with the given position in the document. 1568 1569=cut 1570 1571sub msgid_doc($$) { 1572 my $self = shift; 1573 my $num = shift; 1574 1575 foreach my $msgid ( keys %{ $self->{po} } ) { 1576 foreach my $pos ( split / /, $self->{po}{$msgid}{'pos_doc'} ) { 1577 return $msgid if ( $pos eq $num ); 1578 } 1579 } 1580 return undef; 1581} 1582 1583=item get_charset() 1584 1585Returns the character set specified in the PO header. If it hasn't been 1586set, it will return "UTF-8". 1587 1588=cut 1589 1590sub get_charset() { 1591 my $self = shift; 1592 1593 $self->{header} =~ /charset=(.*?)[\s\\]/; 1594 1595 if ( defined $1 ) { 1596 return $1; 1597 } else { 1598 return "UTF-8"; 1599 } 1600} 1601 1602=item set_charset($) 1603 1604This sets the character set of the PO header to the value specified in its 1605first argument. If you never call this function (and no file with a specified 1606character set is read), the default value is left to "UTF-8". This value 1607doesn't change the behavior of this module, it's just used to fill that field 1608in the header, and to return it in get_charset(). 1609 1610=cut 1611 1612sub set_charset() { 1613 my $self = shift; 1614 1615 my ( $newchar, $oldchar ); 1616 $newchar = shift; 1617 $oldchar = $self->get_charset(); 1618 1619 $self->{header} =~ s/$oldchar/$newchar/; 1620 $self->{encoder} = find_encoding($newchar); 1621} 1622 1623#----[ helper functions ]--------------------------------------------------- 1624 1625# transforme the string from its PO file representation to the form which 1626# should be used to print it 1627sub unescape_text { 1628 my $text = shift; 1629 1630 print STDERR "\nunescape [$text]====" if $debug{'escape'}; 1631 $text = join( "", split( /\n/, $text ) ); 1632 $text =~ s/\\"/"/g; 1633 1634 # unescape newlines 1635 # NOTE on \G: 1636 # The following regular expression introduce newlines. 1637 # Thus, ^ doesn't match all beginnings of lines. 1638 # \G is a zero-width assertion that matches the position 1639 # of the previous substitution with s///g. As every 1640 # substitution ends by a newline, it always matches a 1641 # position just after a newline. 1642 $text =~ s/( # $1: 1643 (\G|[^\\]) # beginning of the line or any char 1644 # different from '\' 1645 (\\\\)* # followed by any even number of '\' 1646 )\\n # and followed by an escaped newline 1647 /$1\n/sgx; # single string, match globally, allow comments 1648 # unescape carriage returns 1649 $text =~ s/( # $1: 1650 (\G|[^\\]) # beginning of the line or any char 1651 # different from '\' 1652 (\\\\)* # followed by any even number of '\' 1653 )\\r # and followed by an escaped carriage return 1654 /$1\r/sgx; # single string, match globally, allow comments 1655 # unescape tabulations 1656 $text =~ s/( # $1: 1657 (\G|[^\\])# beginning of the line or any char 1658 # different from '\' 1659 (\\\\)* # followed by any even number of '\' 1660 )\\t # and followed by an escaped tabulation 1661 /$1\t/mgx; # multilines string, match globally, allow comments 1662 # and unescape the escape character 1663 $text =~ s/\\\\/\\/g; 1664 print STDERR ">$text<\n" if $debug{'escape'}; 1665 1666 return $text; 1667} 1668 1669# transform the string to its representation as it should be written in PO 1670# files 1671sub escape_text { 1672 my $text = shift; 1673 1674 print STDERR "\nescape [$text]====" if $debug{'escape'}; 1675 $text =~ s/\\/\\\\/g; 1676 $text =~ s/"/\\"/g; 1677 $text =~ s/\n/\\n/g; 1678 $text =~ s/\r/\\r/g; 1679 $text =~ s/\t/\\t/g; 1680 print STDERR ">$text<\n" if $debug{'escape'}; 1681 1682 return $text; 1683} 1684 1685# put quotes around the string on each lines (without escaping it) 1686# It does also normalize the text (ie, make sure its representation is wrapped 1687# on the 80th char, but without changing the meaning of the string) 1688sub quote_text { 1689 my $string = shift; 1690 my $do_wrap = shift; # either 'no' or 'newlines', or column at which we should wrap 1691 1692 return '""' unless length($string); 1693 1694 return "\"$string\"" if ( $do_wrap eq 'no' ); 1695 1696 print STDERR "\nquote $do_wrap [$string]====" if $debug{'quote'}; 1697 1698 # break lines on newlines, if any 1699 # see unescape_text for an explanation on \G 1700 $string =~ s/( # $1: 1701 (\G|[^\\]) # beginning of the line or any char 1702 # different from '\' 1703 (\\\\)* # followed by any even number of '\' 1704 \\n) # and followed by an escaped newline 1705 /$1\n/sgx; # single string, match globally, allow comments 1706 1707 $string = wrap( $string, $do_wrap ) if ( $do_wrap ne 'newlines' ); 1708 my @string = split( /\n/, $string ); 1709 $string = join( "\"\n\"", @string ); 1710 $string = "\"$string\""; 1711 if ( scalar @string > 1 && $string[0] ne '' ) { 1712 $string = "\"\"\n" . $string; 1713 } 1714 1715 print STDERR ">$string<\n" if $debug{'quote'}; 1716 return $string; 1717} 1718 1719# undo the work of the quote_text function 1720sub unquote_text { 1721 my $string = shift; 1722 print STDERR "\nunquote [$string]====" if $debug{'quote'}; 1723 $string =~ s/^""\\n//s; 1724 $string =~ s/^"(.*)"$/$1/s; 1725 $string =~ s/"\n"//gm; 1726 1727 # Note: an even number of '\' could precede \\n, but I could not build a 1728 # document to test this 1729 $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; 1730 $string =~ s|!!DUMMYPOPM!!|\\n|gm; 1731 print STDERR ">$string<\n" if $debug{'quote'}; 1732 return $string; 1733} 1734 1735# canonize the string: write it on only one line, changing consecutive 1736# whitespace to only one space. 1737# Warning, it changes the string and should only be called if the string is 1738# plain text 1739sub canonize { 1740 my $text = shift; 1741 print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; 1742 $text =~ s/^ *//s; 1743 $text =~ s/^[ \t]+/ /gm; 1744 1745 # if ($text eq "\n"), it messed up the first string (header) 1746 $text =~ s/\n/ /gm if ( $text ne "\n" ); 1747 $text =~ s/([.)]) +/$1 /gm; 1748 $text =~ s/([^.)]) */$1 /gm; 1749 $text =~ s/ *$//s; 1750 print STDERR ">$text<\n" if $debug{'canonize'}; 1751 return $text; 1752} 1753 1754# wraps the string. We don't use Text::Wrap since it mangles whitespace at the end of the split line 1755sub wrap { 1756 my $text = shift; 1757 return "0" if ( $text eq '0' ); 1758 my $col = shift || 76; 1759 my @lines = split( /\n/, "$text" ); 1760 my $res = ""; 1761 my $first = 1; 1762 while ( defined( my $line = shift @lines ) ) { 1763 if ( $first && length($line) > $col - 10 ) { 1764 unshift @lines, $line; 1765 $first = 0; 1766 next; 1767 } 1768 if ( length($line) > $col ) { 1769 my $pos = rindex( $line, " ", $col ); 1770 while ( substr( $line, $pos - 1, 1 ) eq '.' && $pos != -1 ) { 1771 $pos = rindex( $line, " ", $pos - 1 ); 1772 } 1773 if ( $pos == -1 ) { 1774 1775 # There are no spaces in the first $col chars, pick-up the 1776 # first space 1777 $pos = index( $line, " " ); 1778 } 1779 if ( $pos != -1 ) { 1780 my $end = substr( $line, $pos + 1 ); 1781 $line = substr( $line, 0, $pos + 1 ); 1782 if ( $end =~ s/^( +)// ) { 1783 $line .= $1; 1784 } 1785 unshift @lines, $end; 1786 } 1787 } 1788 $first = 0; 1789 $res .= "$line\n"; 1790 } 1791 1792 # Restore the original trailing spaces 1793 $res =~ s/\s+$//s; 1794 if ( $text =~ m/(\s+)$/s ) { 1795 $res .= $1; 1796 } 1797 return $res; 1798} 1799 1800# outputs properly a '# ... ' line to be put in the PO file 1801sub format_comment { 1802 my $comment = shift; 1803 my $char = shift; 1804 my $result = "#" . $char . $comment; 1805 $result =~ s/\n/\n#$char/gs; 1806 $result =~ s/^#$char$/#/gm; 1807 $result .= "\n"; 1808 return $result; 1809} 1810 18111; 1812__END__ 1813 1814=back 1815 1816=head1 AUTHORS 1817 1818 Denis Barbier <barbier@linuxfr.org> 1819 Martin Quinson (mquinson#debian.org) 1820 1821=cut 1822