1#!/usr/local/bin/perl -wC 2# $FreeBSD$ 3 4use strict; 5use File::Copy; 6use XML::Parser; 7use Tie::IxHash; 8use Text::Iconv; 9#use Data::Dumper; 10use Getopt::Long; 11use Digest::SHA qw(sha1_hex); 12require "charmaps.pm"; 13 14 15if ($#ARGV < 2) { 16 print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n"; 17 exit(1); 18} 19 20my $DEFENCODING = "UTF-8"; 21 22my $UNIDIR = undef; 23my $ETCDIR = undef; 24my $TYPE = undef; 25 26my $result = GetOptions ( 27 "unidir=s" => \$UNIDIR, 28 "etc=s" => \$ETCDIR, 29 "type=s" => \$TYPE, 30 ); 31 32my %convertors = (); 33 34my %ucd = (); 35my %values = (); 36my %hashtable = (); 37my %languages = (); 38my %translations = (); 39my %encodings = (); 40my %alternativemonths = (); 41get_languages(); 42 43my %utf8map = (); 44my %utf8aliases = (); 45get_unidata($UNIDIR); 46get_utf8map("$UNIDIR/posix/$DEFENCODING.cm"); 47get_encodings("$ETCDIR/charmaps"); 48 49my %keys = (); 50tie(%keys, "Tie::IxHash"); 51tie(%hashtable, "Tie::IxHash"); 52 53my %FILESNAMES = ( 54 "monetdef" => "LC_MONETARY", 55 "timedef" => "LC_TIME", 56 "msgdef" => "LC_MESSAGES", 57 "numericdef" => "LC_NUMERIC", 58 "colldef" => "LC_COLLATE", 59 "ctypedef" => "LC_CTYPE" 60); 61 62my %callback = ( 63 mdorder => \&callback_mdorder, 64 altmon => \&callback_altmon, 65 cformat => \&callback_cformat, 66 dformat => \&callback_dformat, 67 dtformat => \&callback_dtformat, 68 cbabmon => \&callback_abmon, 69 cbampm => \&callback_ampm, 70 data => undef, 71); 72 73my %DESC = ( 74 75 # numericdef 76 "decimal_point" => "decimal_point", 77 "thousands_sep" => "thousands_sep", 78 "grouping" => "grouping", 79 80 # monetdef 81 "int_curr_symbol" => "int_curr_symbol (last character always " . 82 "SPACE)", 83 "currency_symbol" => "currency_symbol", 84 "mon_decimal_point" => "mon_decimal_point", 85 "mon_thousands_sep" => "mon_thousands_sep", 86 "mon_grouping" => "mon_grouping", 87 "positive_sign" => "positive_sign", 88 "negative_sign" => "negative_sign", 89 "int_frac_digits" => "int_frac_digits", 90 "frac_digits" => "frac_digits", 91 "p_cs_precedes" => "p_cs_precedes", 92 "p_sep_by_space" => "p_sep_by_space", 93 "n_cs_precedes" => "n_cs_precedes", 94 "n_sep_by_space" => "n_sep_by_space", 95 "p_sign_posn" => "p_sign_posn", 96 "n_sign_posn" => "n_sign_posn", 97 98 # msgdef 99 "yesexpr" => "yesexpr", 100 "noexpr" => "noexpr", 101 "yesstr" => "yesstr", 102 "nostr" => "nostr", 103 104 # timedef 105 "abmon" => "Short month names", 106 "mon" => "Long month names (as in a date)", 107 "abday" => "Short weekday names", 108 "day" => "Long weekday names", 109 "t_fmt" => "X_fmt", 110 "d_fmt" => "x_fmt", 111 "c_fmt" => "c_fmt", 112 "am_pm" => "AM/PM", 113 "d_t_fmt" => "date_fmt", 114 "altmon" => "Long month names (without case ending)", 115 "md_order" => "md_order", 116 "t_fmt_ampm" => "ampm_fmt", 117); 118 119if ($TYPE eq "colldef") { 120 transform_collation(); 121 make_makefile(); 122} 123 124if ($TYPE eq "ctypedef") { 125 transform_ctypes(); 126 make_makefile(); 127} 128 129if ($TYPE eq "numericdef") { 130 %keys = ( 131 "decimal_point" => "s", 132 "thousands_sep" => "s", 133 "grouping" => "ai", 134 ); 135 get_fields(); 136 print_fields(); 137 make_makefile(); 138} 139 140if ($TYPE eq "monetdef") { 141 %keys = ( 142 "int_curr_symbol" => "s", 143 "currency_symbol" => "s", 144 "mon_decimal_point" => "s", 145 "mon_thousands_sep" => "s", 146 "mon_grouping" => "ai", 147 "positive_sign" => "s", 148 "negative_sign" => "s", 149 "int_frac_digits" => "i", 150 "frac_digits" => "i", 151 "p_cs_precedes" => "i", 152 "p_sep_by_space" => "i", 153 "n_cs_precedes" => "i", 154 "n_sep_by_space" => "i", 155 "p_sign_posn" => "i", 156 "n_sign_posn" => "i" 157 ); 158 get_fields(); 159 print_fields(); 160 make_makefile(); 161} 162 163if ($TYPE eq "msgdef") { 164 %keys = ( 165 "yesexpr" => "s", 166 "noexpr" => "s", 167 "yesstr" => "s", 168 "nostr" => "s" 169 ); 170 get_fields(); 171 print_fields(); 172 make_makefile(); 173} 174 175if ($TYPE eq "timedef") { 176 %keys = ( 177 "abmon" => "<cbabmon<abmon<as", 178 "mon" => "as", 179 "abday" => "as", 180 "day" => "as", 181 "t_fmt" => "s", 182 "d_fmt" => "<dformat<d_fmt<s", 183 "c_fmt" => "<cformat<d_t_fmt<s", 184 "am_pm" => "<cbampm<am_pm<as", 185 "d_t_fmt" => "<dtformat<d_t_fmt<s", 186 "altmon" => "<altmon<mon<as", 187 "md_order" => "<mdorder<d_fmt<s", 188 "t_fmt_ampm" => "s", 189 ); 190 get_fields(); 191 print_fields(); 192 make_makefile(); 193} 194 195sub callback_ampm { 196 my $s = shift; 197 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 198 my $enc = $callback{data}{e}; 199 200 if ($nl eq 'ru_RU') { 201 if ($enc eq 'UTF-8') { 202 $s = 'дп;пп'; 203 } else { 204 my $converter = Text::Iconv->new("utf-8", "$enc"); 205 $s = $converter->convert("дп;пп"); 206 } 207 } 208 return $s; 209} 210 211sub callback_cformat { 212 my $s = shift; 213 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 214 215 if ($nl eq 'ko_KR') { 216 $s =~ s/(> )(%p)/$1%A $2/; 217 } 218 $s =~ s/\.,/\./; 219 $s =~ s/ %Z//; 220 $s =~ s/ %z//; 221 $s =~ s/^"%e\./%A %e/; 222 $s =~ s/^"(%B %e, )/"%A, $1/; 223 $s =~ s/^"(%e %B )/"%A $1/; 224 return $s; 225}; 226 227sub callback_dformat { 228 my $s = shift; 229 230 $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/; 231 $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/; 232 return $s; 233}; 234 235sub callback_dtformat { 236 my $s = shift; 237 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 238 239 if ($nl eq 'ja_JP') { 240 $s =~ s/(> )(%H)/$1%A $2/; 241 } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { 242 if ($nl ne 'ko_KR') { 243 $s =~ s/%m/%_m/; 244 } 245 $s =~ s/(> )(%p)/$1%A $2/; 246 } 247 $s =~ s/\.,/\./; 248 $s =~ s/^"%e\./%A %e/; 249 $s =~ s/^"(%B %e, )/"%A, $1/; 250 $s =~ s/^"(%e %B )/"%A $1/; 251 return $s; 252}; 253 254sub callback_mdorder { 255 my $s = shift; 256 return undef if (!defined $s); 257 $s =~ s/[^dem]//g; 258 $s =~ s/e/d/g; 259 return $s; 260}; 261 262sub callback_altmon { 263 # if the language/country is known in %alternative months then 264 # return that, otherwise repeat mon 265 my $s = shift; 266 267 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 268 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 269 my @cleaned; 270 foreach (@altnames) 271 { 272 $_ =~ s/^\s+//; 273 $_ =~ s/\s+$//; 274 push @cleaned, $_; 275 } 276 return join(";",@cleaned); 277 } 278 279 return $s; 280} 281 282sub callback_abmon { 283 # for specified CJK locales, pad result with a space to enable 284 # columns to line up (style established in FreeBSD in 2001) 285 my $s = shift; 286 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 287 288 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 289 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 290 my @monthnames = split(";", $s); 291 my @cleaned; 292 foreach (@monthnames) 293 { 294 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 295 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 296 { 297 $_ =~ s/^"/"<space>/; 298 } 299 push @cleaned, $_; 300 } 301 return join(";",@cleaned); 302 } 303 return $s; 304} 305 306############################ 307 308sub get_unidata { 309 my $directory = shift; 310 311 open(FIN, "$directory/UnicodeData.txt") 312 or die("Cannot open $directory/UnicodeData.txt");; 313 my @lines = <FIN>; 314 chomp(@lines); 315 close(FIN); 316 317 foreach my $l (@lines) { 318 my @a = split(/;/, $l); 319 320 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 321 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 322 } 323} 324 325sub get_utf8map { 326 my $file = shift; 327 328 open(FIN, $file); 329 my @lines = <FIN>; 330 close(FIN); 331 chomp(@lines); 332 333 my $prev_k = undef; 334 my $prev_v = ""; 335 my $incharmap = 0; 336 foreach my $l (@lines) { 337 $l =~ s/\r//; 338 next if ($l =~ /^\#/); 339 next if ($l eq ""); 340 341 if ($l eq "CHARMAP") { 342 $incharmap = 1; 343 next; 344 } 345 346 next if (!$incharmap); 347 last if ($l eq "END CHARMAP"); 348 349 $l =~ /^<([^\s]+)>\s+(.*)/; 350 my $k = $1; 351 my $v = $2; 352 $k =~ s/_/ /g; # unicode char string 353 $v =~ s/\\x//g; # UTF-8 char code 354 $utf8map{$k} = $v; 355 356 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 357 358 $prev_v = $v; 359 $prev_k = $k; 360 } 361} 362 363sub get_encodings { 364 my $dir = shift; 365 foreach my $e (sort(keys(%encodings))) { 366 if (!open(FIN, "$dir/$e.TXT")) { 367 print "Cannot open charmap for $e\n"; 368 next; 369 370 } 371 $encodings{$e} = 1; 372 my @lines = <FIN>; 373 close(FIN); 374 chomp(@lines); 375 foreach my $l (@lines) { 376 $l =~ s/\r//; 377 next if ($l =~ /^\#/); 378 next if ($l eq ""); 379 380 my @a = split(" ", $l); 381 next if ($#a < 1); 382 $a[0] =~ s/^0[xX]//; # local char code 383 $a[1] =~ s/^0[xX]//; # unicode char code 384 $convertors{$e}{uc($a[1])} = uc($a[0]); 385 } 386 } 387} 388 389sub get_languages { 390 my %data = get_xmldata($ETCDIR); 391 %languages = %{$data{L}}; 392 %translations = %{$data{T}}; 393 %alternativemonths = %{$data{AM}}; 394 %encodings = %{$data{E}}; 395} 396 397sub transform_ctypes { 398 # Add the C.UTF-8 399 $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; 400 401 foreach my $l (sort keys(%languages)) { 402 foreach my $f (sort keys(%{$languages{$l}})) { 403 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 404 next if (defined $languages{$l}{$f}{definitions} 405 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 406 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 407 my $file = $l; 408 $file .= "_" . $f if ($f ne "x"); 409 $file .= "_" . $c if ($c ne "x"); 410 my $actfile = $file; 411 412 my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src"; 413 if (! -f $filename) { 414 print STDERR "Cannot open $filename\n"; 415 next; 416 } 417 open(FIN, "$filename"); 418 print "Reading from $filename for ${l}_${f}_${c}\n"; 419 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 420 my @lines; 421 my $shex; 422 my $uhex; 423 while (<FIN>) { 424 push @lines, $_; 425 } 426 close(FIN); 427 $shex = sha1_hex(join("\n", @lines)); 428 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 429 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 430 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 431 print FOUT @lines; 432 close(FOUT); 433 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 434 next if ($enc eq $DEFENCODING); 435 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 436 if (! -f $filename) { 437 print STDERR "Cannot open $filename\n"; 438 next; 439 } 440 @lines = (); 441 open(FIN, "$filename"); 442 while (<FIN>) { 443 if ((/^comment_char\s/) || (/^escape_char\s/)){ 444 push @lines, $_; 445 } 446 if (/^LC_CTYPE/../^END LC_CTYPE/) { 447 push @lines, $_; 448 } 449 } 450 close(FIN); 451 $uhex = sha1_hex(join("\n", @lines) . $enc); 452 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 453 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 454 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 455 print FOUT <<EOF; 456# Warning: Do not edit. This file is automatically extracted from the 457# tools in /usr/src/tools/tools/locale. The data is obtained from the 458# CLDR project, obtained from http://cldr.unicode.org/ 459# ----------------------------------------------------------------------------- 460EOF 461 print FOUT @lines; 462 close(FOUT); 463 } 464 } 465 } 466 } 467} 468 469 470sub transform_collation { 471 foreach my $l (sort keys(%languages)) { 472 foreach my $f (sort keys(%{$languages{$l}})) { 473 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 474 next if (defined $languages{$l}{$f}{definitions} 475 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 476 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 477 my $file; 478 $file = $l . "_"; 479 $file .= $f . "_" if ($f ne "x"); 480 $file .= $c; 481 my $actfile = $file; 482 483 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 484 $filename = "$ETCDIR/$file.$DEFENCODING.src" 485 if (! -f $filename); 486 if (! -f $filename 487 && defined $languages{$l}{$f}{fallback}) { 488 $file = $languages{$l}{$f}{fallback}; 489 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 490 } 491 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 492 if (! -f $filename); 493 if (! -f $filename) { 494 print STDERR 495 "Cannot open $file.$DEFENCODING.src or fallback\n"; 496 next; 497 } 498 open(FIN, "$filename"); 499 print "Reading from $filename for ${l}_${f}_${c}\n"; 500 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 501 my @lines; 502 my $shex; 503 while (<FIN>) { 504 if ((/^comment_char\s/) || (/^escape_char\s/)){ 505 push @lines, $_; 506 } 507 if (/^LC_COLLATE/../^END LC_COLLATE/) { 508 $_ =~ s/[ ]+/ /g; 509 push @lines, $_; 510 } 511 } 512 close(FIN); 513 $shex = sha1_hex(join("\n", @lines)); 514 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 515 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 516 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 517 print FOUT <<EOF; 518# Warning: Do not edit. This file is automatically extracted from the 519# tools in /usr/src/tools/tools/locale. The data is obtained from the 520# CLDR project, obtained from http://cldr.unicode.org/ 521# ----------------------------------------------------------------------------- 522EOF 523 print FOUT @lines; 524 close(FOUT); 525 526 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 527 next if ($enc eq $DEFENCODING); 528 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 529 "$TYPE.draft/$actfile.$enc.src"); 530 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 531 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 532 } 533 } 534 } 535 } 536} 537 538sub get_fields { 539 foreach my $l (sort keys(%languages)) { 540 foreach my $f (sort keys(%{$languages{$l}})) { 541 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 542 next if (defined $languages{$l}{$f}{definitions} 543 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 544 545 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 546 my $file; 547 $file = $l . "_"; 548 $file .= $f . "_" if ($f ne "x"); 549 $file .= $c; 550 551 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 552 $filename = "$ETCDIR/$file.$DEFENCODING.src" 553 if (! -f $filename); 554 if (! -f $filename 555 && defined $languages{$l}{$f}{fallback}) { 556 $file = $languages{$l}{$f}{fallback}; 557 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 558 } 559 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 560 if (! -f $filename); 561 if (! -f $filename) { 562 print STDERR 563 "Cannot open $file.$DEFENCODING.src or fallback\n"; 564 next; 565 } 566 open(FIN, "$filename"); 567 print "Reading from $filename for ${l}_${f}_${c}\n"; 568 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 569 my @lines = <FIN>; 570 chomp(@lines); 571 close(FIN); 572 my $continue = 0; 573 foreach my $k (keys(%keys)) { 574 foreach my $line (@lines) { 575 $line =~ s/\r//; 576 next if (!$continue && $line !~ /^$k\s/); 577 if ($continue) { 578 $line =~ s/^\s+//; 579 } else { 580 $line =~ s/^$k\s+//; 581 } 582 583 $values{$l}{$f}{$c}{$k} = "" 584 if (!defined $values{$l}{$f}{$c}{$k}); 585 586 $continue = ($line =~ /\/$/); 587 $line =~ s/\/$// if ($continue); 588 589 while ($line =~ /_/) { 590 $line =~ 591 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 592 } 593 die "_ in data - $line" if ($line =~ /_/); 594 $values{$l}{$f}{$c}{$k} .= $line; 595 596 last if (!$continue); 597 } 598 } 599 } 600 } 601 } 602} 603 604sub decodecldr { 605 my $e = shift; 606 my $s = shift; 607 608 my $v = undef; 609 610 if ($e eq "UTF-8") { 611 # 612 # Conversion to UTF-8 can be done from the Unicode name to 613 # the UTF-8 character code. 614 # 615 $v = $utf8map{$s}; 616 die "Cannot convert $s in $e (charmap)" if (!defined $v); 617 } else { 618 # 619 # Conversion to these encodings can be done from the Unicode 620 # name to Unicode code to the encodings code. 621 # 622 my $ucc = undef; 623 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 624 $ucc = $ucd{name2code}{$utf8aliases{$s}} 625 if (!defined $ucc 626 && $utf8aliases{$s} 627 && defined $ucd{name2code}{$utf8aliases{$s}}); 628 629 if (!defined $ucc) { 630 if (defined $translations{$e}{$s}{hex}) { 631 $v = $translations{$e}{$s}{hex}; 632 $ucc = 0; 633 } elsif (defined $translations{$e}{$s}{ucc}) { 634 $ucc = $translations{$e}{$s}{ucc}; 635 } 636 } 637 638 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 639 $v = $convertors{$e}{$ucc} if (!defined $v); 640 641 $v = $translations{$e}{$s}{hex} 642 if (!defined $v && defined $translations{$e}{$s}{hex}); 643 644 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 645 my $ucn = $translations{$e}{$s}{unicode}; 646 $ucc = $ucd{name2code}{$ucn} 647 if (defined $ucd{name2code}{$ucn}); 648 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 649 if (!defined $ucc 650 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 651 $v = $convertors{$e}{$ucc}; 652 } 653 654 die "Cannot convert $s in $e (charmap)" if (!defined $v); 655 } 656 657 return pack("C", hex($v)) if (length($v) == 2); 658 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 659 if (length($v) == 4); 660 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 661 hex(substr($v, 4, 2))) if (length($v) == 6); 662 print STDERR "Cannot convert $e $s\n"; 663 return "length = " . length($v); 664 665} 666 667sub translate { 668 my $enc = shift; 669 my $v = shift; 670 671 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 672 return undef; 673} 674 675sub print_fields { 676 foreach my $l (sort keys(%languages)) { 677 foreach my $f (sort keys(%{$languages{$l}})) { 678 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 679 next if (defined $languages{$l}{$f}{definitions} 680 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 681 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 682 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 683 print "Skipping ${l}_" . 684 ($f eq "x" ? "" : "${f}_") . 685 "${c} - not read\n"; 686 next; 687 } 688 my $file = $l; 689 $file .= "_" . $f if ($f ne "x"); 690 $file .= "_" . $c; 691 print "Writing to $file in $enc\n"; 692 693 if ($enc ne $DEFENCODING && 694 !defined $convertors{$enc}) { 695 print "Failed! Cannot convert to $enc.\n"; 696 next; 697 }; 698 699 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 700 my $okay = 1; 701 my $output = ""; 702 print FOUT <<EOF; 703# Warning: Do not edit. This file is automatically generated from the 704# tools in /usr/src/tools/tools/locale. The data is obtained from the 705# CLDR project, obtained from http://cldr.unicode.org/ 706# ----------------------------------------------------------------------------- 707EOF 708 foreach my $k (keys(%keys)) { 709 my $g = $keys{$k}; 710 711 die("Unknown $k in \%DESC") 712 if (!defined $DESC{$k}); 713 714 $output .= "#\n# $DESC{$k}\n"; 715 716 # Replace one row with another 717 if ($g =~ /^>/) { 718 $k = substr($g, 1); 719 $g = $keys{$k}; 720 } 721 722 # Callback function 723 if ($g =~ /^\</) { 724 $callback{data}{c} = $c; 725 $callback{data}{k} = $k; 726 $callback{data}{f} = $f; 727 $callback{data}{l} = $l; 728 $callback{data}{e} = $enc; 729 my @a = split(/\</, substr($g, 1)); 730 my $rv = 731 &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]}); 732 $values{$l}{$f}{$c}{$k} = $rv; 733 $g = $a[2]; 734 $callback{data} = (); 735 } 736 737 my $v = $values{$l}{$f}{$c}{$k}; 738 $v = "undef" if (!defined $v); 739 740 if ($g eq "i") { 741 $output .= "$v\n"; 742 next; 743 } 744 if ($g eq "ai") { 745 $output .= "$v\n"; 746 next; 747 } 748 if ($g eq "s") { 749 $v =~ s/^"//; 750 $v =~ s/"$//; 751 my $cm = ""; 752 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 753 my $p1 = $1; 754 $cm = $2; 755 my $p3 = $3; 756 757 my $rv = decodecldr($enc, $cm); 758# $rv = translate($enc, $cm) 759# if (!defined $rv); 760 if (!defined $rv) { 761 print STDERR 762"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 763 $okay = 0; 764 next; 765 } 766 767 $v = $p1 . $rv . $p3; 768 } 769 $output .= "$v\n"; 770 next; 771 } 772 if ($g eq "as") { 773 foreach my $v (split(/;/, $v)) { 774 $v =~ s/^"//; 775 $v =~ s/"$//; 776 my $cm = ""; 777 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 778 my $p1 = $1; 779 $cm = $2; 780 my $p3 = $3; 781 782 my $rv = 783 decodecldr($enc, 784 $cm); 785# $rv = translate($enc, 786# $cm) 787# if (!defined $rv); 788 if (!defined $rv) { 789 print STDERR 790"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 791 $okay = 0; 792 next; 793 } 794 795 $v = $1 . $rv . $3; 796 } 797 $output .= "$v\n"; 798 } 799 next; 800 } 801 802 die("$k is '$g'"); 803 804 } 805 806 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 807 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 808 print FOUT "$output# EOF\n"; 809 close(FOUT); 810 811 if ($okay) { 812 rename("$TYPE.draft/$file.$enc.new", 813 "$TYPE.draft/$file.$enc.src"); 814 } else { 815 rename("$TYPE.draft/$file.$enc.new", 816 "$TYPE.draft/$file.$enc.failed"); 817 } 818 } 819 } 820 } 821 } 822} 823 824sub make_makefile { 825 print "Creating Makefile for $TYPE\n"; 826 my $SRCOUT; 827 my $SRCOUT2; 828 my $SRCOUT3 = ""; 829 my $SRCOUT4 = ""; 830 my $MAPLOC; 831 if ($TYPE eq "colldef") { 832 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . 833 "-i \${.IMPSRC} \\\n" . 834 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . 835 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 836 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 837 "locale/etc/final-maps\n"; 838 $SRCOUT2 = "LC_COLLATE"; 839 $SRCOUT3 = "" . 840 ".for f t in \${LOCALES_MAPPED}\n" . 841 "FILES+=\t\$t.LC_COLLATE\n" . 842 "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . 843 "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . 844 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . 845 "-i \${.ALLSRC} \\\n" . 846 "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . 847 "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . 848 ".endfor\n\n"; 849 $SRCOUT4 = "## LOCALES_MAPPED\n"; 850 } 851 elsif ($TYPE eq "ctypedef") { 852 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . 853 "-w \${MAPLOC}/widths.txt \\\n" . 854 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . 855 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 856 " || true"; 857 $SRCOUT2 = "LC_CTYPE"; 858 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 859 "locale/etc/final-maps\n"; 860 $SRCOUT3 = "## SYMPAIRS\n\n" . 861 ".for s t in \${SYMPAIRS}\n" . 862 "\${t:S/src\$/LC_CTYPE/}: " . 863 "\$s\n" . 864 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . 865 "-w \${MAPLOC}/widths.txt \\\n" . 866 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 867 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 868 " || true\n" . 869 ".endfor\n\n"; 870 } 871 else { 872 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 873 $SRCOUT2 = "out"; 874 $MAPLOC = ""; 875 } 876 open(FOUT, ">$TYPE.draft/Makefile"); 877 print FOUT <<EOF; 878# \$FreeBSD\$ 879# Warning: Do not edit. This file is automatically generated from the 880# tools in /usr/src/tools/tools/locale. 881 882LOCALEDIR= \${SHAREDIR}/locale 883FILESNAME= $FILESNAMES{$TYPE} 884.SUFFIXES: .src .${SRCOUT2} 885${MAPLOC} 886EOF 887 888 if ($TYPE eq "colldef" || $TYPE eq "ctypedef") { 889 print FOUT <<EOF; 890.include <bsd.endian.mk> 891 892EOF 893 } 894 895 print FOUT <<EOF; 896.src.${SRCOUT2}: 897 $SRCOUT 898 899## PLACEHOLDER 900 901${SRCOUT4} 902 903EOF 904 905 foreach my $hash (keys(%hashtable)) { 906 # For colldef, weight LOCALES to UTF-8 907 # Sort as upper-case and reverse to achieve it 908 # Make en_US, ru_RU, and ca_AD preferred 909 my @files; 910 if ($TYPE eq "colldef") { 911 @files = sort { 912 if ($a eq 'en_x_US.UTF-8' || 913 $a eq 'ru_x_RU.UTF-8' || 914 $a eq 'ca_x_AD.UTF-8') { return -1; } 915 elsif ($b eq 'en_x_US.UTF-8' || 916 $b eq 'ru_x_RU.UTF-8' || 917 $b eq 'ca_x_AD.UTF-8') { return 1; } 918 else { return uc($b) cmp uc($a); } 919 } keys(%{$hashtable{$hash}}); 920 } elsif ($TYPE eq "ctypedef") { 921 @files = sort { 922 if ($a eq 'C_x_x.UTF-8') { return -1; } 923 elsif ($b eq 'C_x_x.UTF-8') { return 1; } 924 if ($a =~ /^en_x_US/) { return -1; } 925 elsif ($b =~ /^en_x_US/) { return 1; } 926 927 if ($a =~ /^en_x_GB.ISO8859-15/ || 928 $a =~ /^ru_x_RU/) { return -1; } 929 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 930 $b =~ /ru_x_RU/) { return 1; } 931 else { return uc($b) cmp uc($a); } 932 933 } keys(%{$hashtable{$hash}}); 934 } else { 935 @files = sort { 936 if ($a =~ /_Comm_/ || 937 $b eq 'en_x_US.UTF-8') { return 1; } 938 elsif ($b =~ /_Comm_/ || 939 $a eq 'en_x_US.UTF-8') { return -1; } 940 else { return uc($b) cmp uc($a); } 941 } keys(%{$hashtable{$hash}}); 942 } 943 if ($#files > 0) { 944 my $link = shift(@files); 945 $link =~ s/_x_x//; # special case for C 946 $link =~ s/_x_/_/; # strip family if none there 947 foreach my $file (@files) { 948 my @a = split(/_/, $file); 949 my @b = split(/\./, $a[-1]); 950 $file =~ s/_x_/_/; 951 print FOUT "SAME+=\t\t$link $file\n"; 952 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 953 } 954 } 955 } 956 957 foreach my $l (sort keys(%languages)) { 958 foreach my $f (sort keys(%{$languages{$l}})) { 959 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 960 next if (defined $languages{$l}{$f}{definitions} 961 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 962 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 963 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 964 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 965 "${c} - not read\n"; 966 next; 967 } 968 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 969 my $file = $l; 970 $file .= "_" . $f if ($f ne "x"); 971 $file .= "_" . $c if ($c ne "x"); 972 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 973 print FOUT "LOCALES+=\t$file.$e\n"; 974 } 975 976 if (defined $languages{$l}{$f}{nc_link}) { 977 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 978 my $file = $l . "_"; 979 $file .= $f . "_" if ($f ne "x"); 980 $file .= $c; 981 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 982 } 983 } 984 985 if (defined $languages{$l}{$f}{e_link}) { 986 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 987 my @a = split(/:/, $el); 988 my $file = $l . "_"; 989 $file .= $f . "_" if ($f ne "x"); 990 $file .= $c; 991 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; 992 } 993 } 994 995 } 996 } 997 } 998 999 print FOUT <<EOF; 1000 1001FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 1002CLEANFILES= \${FILES} 1003 1004.for f t in \${SAME} 1005SYMLINKS+= ../\$f/\${FILESNAME} \\ 1006 \${LOCALEDIR}/\$t/\${FILESNAME} 1007.endfor 1008 1009.for f in \${LOCALES} 1010FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 1011.endfor 1012 1013${SRCOUT3}.include <bsd.prog.mk> 1014EOF 1015 1016 close(FOUT); 1017} 1018