1#!/usr/local/bin/perl -wC 2 3# SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4# 5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org> 6# Copyright 2015 John Marino <draco@marino.st> 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions 10# are met: 11# 1. Redistributions of source code must retain the above copyright 12# notice, this list of conditions and the following disclaimer. 13# 2. Redistributions in binary form must reproduce the above copyright 14# notice, this list of conditions and the following disclaimer in the 15# documentation and/or other materials provided with the distribution. 16# 17# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27# SUCH DAMAGE. 28# 29# $FreeBSD$ 30 31use strict; 32use File::Copy; 33use XML::Parser; 34use Tie::IxHash; 35use Text::Iconv; 36#use Data::Dumper; 37use Getopt::Long; 38use Digest::SHA qw(sha1_hex); 39require "charmaps.pm"; 40 41 42if ($#ARGV < 2) { 43 print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n"; 44 exit(1); 45} 46 47my $DEFENCODING = "UTF-8"; 48 49my $UNIDIR = undef; 50my $ETCDIR = undef; 51my $TYPE = undef; 52 53my $result = GetOptions ( 54 "unidir=s" => \$UNIDIR, 55 "etc=s" => \$ETCDIR, 56 "type=s" => \$TYPE, 57 ); 58 59my %convertors = (); 60 61my %ucd = (); 62my %values = (); 63my %hashtable = (); 64my %languages = (); 65my %translations = (); 66my %encodings = (); 67my %alternativemonths = (); 68get_languages(); 69 70my %utf8map = (); 71my %utf8aliases = (); 72get_unidata($UNIDIR); 73get_utf8map("$UNIDIR/posix/$DEFENCODING.cm"); 74get_encodings("$ETCDIR/charmaps"); 75 76my %keys = (); 77tie(%keys, "Tie::IxHash"); 78tie(%hashtable, "Tie::IxHash"); 79 80my %FILESNAMES = ( 81 "monetdef" => "LC_MONETARY", 82 "timedef" => "LC_TIME", 83 "msgdef" => "LC_MESSAGES", 84 "numericdef" => "LC_NUMERIC", 85 "colldef" => "LC_COLLATE", 86 "ctypedef" => "LC_CTYPE" 87); 88 89my %callback = ( 90 mdorder => \&callback_mdorder, 91 altmon => \&callback_altmon, 92 cformat => \&callback_cformat, 93 dformat => \&callback_dformat, 94 dtformat => \&callback_dtformat, 95 cbabmon => \&callback_abmon, 96 cbampm => \&callback_ampm, 97 data => undef, 98); 99 100my %DESC = ( 101 102 # numericdef 103 "decimal_point" => "decimal_point", 104 "thousands_sep" => "thousands_sep", 105 "grouping" => "grouping", 106 107 # monetdef 108 "int_curr_symbol" => "int_curr_symbol (last character always " . 109 "SPACE)", 110 "currency_symbol" => "currency_symbol", 111 "mon_decimal_point" => "mon_decimal_point", 112 "mon_thousands_sep" => "mon_thousands_sep", 113 "mon_grouping" => "mon_grouping", 114 "positive_sign" => "positive_sign", 115 "negative_sign" => "negative_sign", 116 "int_frac_digits" => "int_frac_digits", 117 "frac_digits" => "frac_digits", 118 "p_cs_precedes" => "p_cs_precedes", 119 "p_sep_by_space" => "p_sep_by_space", 120 "n_cs_precedes" => "n_cs_precedes", 121 "n_sep_by_space" => "n_sep_by_space", 122 "p_sign_posn" => "p_sign_posn", 123 "n_sign_posn" => "n_sign_posn", 124 125 # msgdef 126 "yesexpr" => "yesexpr", 127 "noexpr" => "noexpr", 128 "yesstr" => "yesstr", 129 "nostr" => "nostr", 130 131 # timedef 132 "abmon" => "Short month names", 133 "mon" => "Long month names (as in a date)", 134 "abday" => "Short weekday names", 135 "day" => "Long weekday names", 136 "t_fmt" => "X_fmt", 137 "d_fmt" => "x_fmt", 138 "c_fmt" => "c_fmt", 139 "am_pm" => "AM/PM", 140 "d_t_fmt" => "date_fmt", 141 "altmon" => "Long month names (without case ending)", 142 "md_order" => "md_order", 143 "t_fmt_ampm" => "ampm_fmt", 144); 145 146if ($TYPE eq "colldef") { 147 transform_collation(); 148 make_makefile(); 149} 150 151if ($TYPE eq "ctypedef") { 152 transform_ctypes(); 153 make_makefile(); 154} 155 156if ($TYPE eq "numericdef") { 157 %keys = ( 158 "decimal_point" => "s", 159 "thousands_sep" => "s", 160 "grouping" => "ai", 161 ); 162 get_fields(); 163 print_fields(); 164 make_makefile(); 165} 166 167if ($TYPE eq "monetdef") { 168 %keys = ( 169 "int_curr_symbol" => "s", 170 "currency_symbol" => "s", 171 "mon_decimal_point" => "s", 172 "mon_thousands_sep" => "s", 173 "mon_grouping" => "ai", 174 "positive_sign" => "s", 175 "negative_sign" => "s", 176 "int_frac_digits" => "i", 177 "frac_digits" => "i", 178 "p_cs_precedes" => "i", 179 "p_sep_by_space" => "i", 180 "n_cs_precedes" => "i", 181 "n_sep_by_space" => "i", 182 "p_sign_posn" => "i", 183 "n_sign_posn" => "i" 184 ); 185 get_fields(); 186 print_fields(); 187 make_makefile(); 188} 189 190if ($TYPE eq "msgdef") { 191 %keys = ( 192 "yesexpr" => "s", 193 "noexpr" => "s", 194 "yesstr" => "s", 195 "nostr" => "s" 196 ); 197 get_fields(); 198 print_fields(); 199 make_makefile(); 200} 201 202if ($TYPE eq "timedef") { 203 %keys = ( 204 "abmon" => "<cbabmon<abmon<as", 205 "mon" => "as", 206 "abday" => "as", 207 "day" => "as", 208 "t_fmt" => "s", 209 "d_fmt" => "<dformat<d_fmt<s", 210 "c_fmt" => "<cformat<d_t_fmt<s", 211 "am_pm" => "<cbampm<am_pm<as", 212 "d_t_fmt" => "<dtformat<d_t_fmt<s", 213 "altmon" => "<altmon<mon<as", 214 "md_order" => "<mdorder<d_fmt<s", 215 "t_fmt_ampm" => "s", 216 ); 217 get_fields(); 218 print_fields(); 219 make_makefile(); 220} 221 222sub callback_ampm { 223 my $s = shift; 224 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 225 my $enc = $callback{data}{e}; 226 227 if ($nl eq 'ru_RU') { 228 if ($enc eq 'UTF-8') { 229 $s = 'дп;пп'; 230 } else { 231 my $converter = Text::Iconv->new("utf-8", "$enc"); 232 $s = $converter->convert("дп;пп"); 233 } 234 } 235 return $s; 236} 237 238sub callback_cformat { 239 my $s = shift; 240 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 241 242 if ($nl eq 'ko_KR') { 243 $s =~ s/(> )(%p)/$1%A $2/; 244 } 245 $s =~ s/\.,/\./; 246 $s =~ s/ %Z//; 247 $s =~ s/ %z//; 248 $s =~ s/^"%e\./%A %e/; 249 $s =~ s/^"(%B %e, )/"%A, $1/; 250 $s =~ s/^"(%e %B )/"%A $1/; 251 return $s; 252}; 253 254sub callback_dformat { 255 my $s = shift; 256 257 $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/; 258 $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/; 259 return $s; 260}; 261 262sub callback_dtformat { 263 my $s = shift; 264 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 265 266 if ($nl eq 'ja_JP') { 267 $s =~ s/(> )(%H)/$1%A $2/; 268 } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { 269 if ($nl ne 'ko_KR') { 270 $s =~ s/%m/%_m/; 271 } 272 $s =~ s/(> )(%p)/$1%A $2/; 273 } 274 $s =~ s/\.,/\./; 275 $s =~ s/^"%e\./%A %e/; 276 $s =~ s/^"(%B %e, )/"%A, $1/; 277 $s =~ s/^"(%e %B )/"%A $1/; 278 return $s; 279}; 280 281sub callback_mdorder { 282 my $s = shift; 283 return undef if (!defined $s); 284 $s =~ s/[^dem]//g; 285 $s =~ s/e/d/g; 286 return $s; 287}; 288 289sub callback_altmon { 290 # if the language/country is known in %alternative months then 291 # return that, otherwise repeat mon 292 my $s = shift; 293 294 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 295 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 296 my @cleaned; 297 foreach (@altnames) 298 { 299 $_ =~ s/^\s+//; 300 $_ =~ s/\s+$//; 301 push @cleaned, $_; 302 } 303 return join(";",@cleaned); 304 } 305 306 return $s; 307} 308 309sub callback_abmon { 310 # for specified CJK locales, pad result with a space to enable 311 # columns to line up (style established in FreeBSD in 2001) 312 my $s = shift; 313 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 314 315 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 316 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 317 my @monthnames = split(";", $s); 318 my @cleaned; 319 foreach (@monthnames) 320 { 321 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 322 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 323 { 324 $_ =~ s/^"/"<space>/; 325 } 326 push @cleaned, $_; 327 } 328 return join(";",@cleaned); 329 } 330 return $s; 331} 332 333############################ 334 335sub get_unidata { 336 my $directory = shift; 337 338 open(FIN, "$directory/UnicodeData.txt") 339 or die("Cannot open $directory/UnicodeData.txt");; 340 my @lines = <FIN>; 341 chomp(@lines); 342 close(FIN); 343 344 foreach my $l (@lines) { 345 my @a = split(/;/, $l); 346 347 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 348 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 349 } 350} 351 352sub get_utf8map { 353 my $file = shift; 354 355 open(FIN, $file); 356 my @lines = <FIN>; 357 close(FIN); 358 chomp(@lines); 359 360 my $prev_k = undef; 361 my $prev_v = ""; 362 my $incharmap = 0; 363 foreach my $l (@lines) { 364 $l =~ s/\r//; 365 next if ($l =~ /^\#/); 366 next if ($l eq ""); 367 368 if ($l eq "CHARMAP") { 369 $incharmap = 1; 370 next; 371 } 372 373 next if (!$incharmap); 374 last if ($l eq "END CHARMAP"); 375 376 $l =~ /^<([^\s]+)>\s+(.*)/; 377 my $k = $1; 378 my $v = $2; 379 $k =~ s/_/ /g; # unicode char string 380 $v =~ s/\\x//g; # UTF-8 char code 381 $utf8map{$k} = $v; 382 383 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 384 385 $prev_v = $v; 386 $prev_k = $k; 387 } 388} 389 390sub get_encodings { 391 my $dir = shift; 392 foreach my $e (sort(keys(%encodings))) { 393 if (!open(FIN, "$dir/$e.TXT")) { 394 print "Cannot open charmap for $e\n"; 395 next; 396 397 } 398 $encodings{$e} = 1; 399 my @lines = <FIN>; 400 close(FIN); 401 chomp(@lines); 402 foreach my $l (@lines) { 403 $l =~ s/\r//; 404 next if ($l =~ /^\#/); 405 next if ($l eq ""); 406 407 my @a = split(" ", $l); 408 next if ($#a < 1); 409 $a[0] =~ s/^0[xX]//; # local char code 410 $a[1] =~ s/^0[xX]//; # unicode char code 411 $convertors{$e}{uc($a[1])} = uc($a[0]); 412 } 413 } 414} 415 416sub get_languages { 417 my %data = get_xmldata($ETCDIR); 418 %languages = %{$data{L}}; 419 %translations = %{$data{T}}; 420 %alternativemonths = %{$data{AM}}; 421 %encodings = %{$data{E}}; 422} 423 424sub transform_ctypes { 425 # Add the C.UTF-8 426 $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; 427 428 foreach my $l (sort keys(%languages)) { 429 foreach my $f (sort keys(%{$languages{$l}})) { 430 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 431 next if (defined $languages{$l}{$f}{definitions} 432 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 433 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 434 my $file = $l; 435 $file .= "_" . $f if ($f ne "x"); 436 $file .= "_" . $c if ($c ne "x"); 437 my $actfile = $file; 438 439 my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src"; 440 if (! -f $filename) { 441 print STDERR "Cannot open $filename\n"; 442 next; 443 } 444 open(FIN, "$filename"); 445 print "Reading from $filename for ${l}_${f}_${c}\n"; 446 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 447 my @lines; 448 my $shex; 449 my $uhex; 450 while (<FIN>) { 451 push @lines, $_; 452 } 453 close(FIN); 454 $shex = sha1_hex(join("\n", @lines)); 455 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 456 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 457 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 458 print FOUT @lines; 459 close(FOUT); 460 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 461 next if ($enc eq $DEFENCODING); 462 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 463 if ($file eq 'ja_JP') { 464 # Override $filename for ja_JP because 465 # its CTYPE is not compatible with UTF-8. 466 $filename = "$UNIDIR/posix/$file.eucJP.src"; 467 } 468 if (! -f $filename) { 469 print STDERR "Cannot open $filename\n"; 470 next; 471 } 472 @lines = (); 473 open(FIN, "$filename"); 474 while (<FIN>) { 475 if ((/^comment_char\s/) || (/^escape_char\s/)){ 476 push @lines, $_; 477 } 478 if (/^LC_CTYPE/../^END LC_CTYPE/) { 479 push @lines, $_; 480 } 481 } 482 close(FIN); 483 $uhex = sha1_hex(join("\n", @lines) . $enc); 484 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 485 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 486 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 487 print FOUT <<EOF; 488# Warning: Do not edit. This file is automatically extracted from the 489# tools in /usr/src/tools/tools/locale. The data is obtained from the 490# CLDR project, obtained from http://cldr.unicode.org/ 491# ----------------------------------------------------------------------------- 492EOF 493 print FOUT @lines; 494 close(FOUT); 495 } 496 } 497 } 498 } 499} 500 501 502sub transform_collation { 503 foreach my $l (sort keys(%languages)) { 504 foreach my $f (sort keys(%{$languages{$l}})) { 505 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 506 next if (defined $languages{$l}{$f}{definitions} 507 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 508 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 509 my $file; 510 $file = $l . "_"; 511 $file .= $f . "_" if ($f ne "x"); 512 $file .= $c; 513 my $actfile = $file; 514 515 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 516 $filename = "$ETCDIR/$file.$DEFENCODING.src" 517 if (! -f $filename); 518 if (! -f $filename 519 && defined $languages{$l}{$f}{fallback}) { 520 $file = $languages{$l}{$f}{fallback}; 521 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 522 } 523 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 524 if (! -f $filename); 525 if (! -f $filename) { 526 print STDERR 527 "Cannot open $file.$DEFENCODING.src or fallback\n"; 528 next; 529 } 530 open(FIN, "$filename"); 531 print "Reading from $filename for ${l}_${f}_${c}\n"; 532 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 533 my @lines; 534 my $shex; 535 while (<FIN>) { 536 if ((/^comment_char\s/) || (/^escape_char\s/)){ 537 push @lines, $_; 538 } 539 if (/^LC_COLLATE/../^END LC_COLLATE/) { 540 $_ =~ s/[ ]+/ /g; 541 push @lines, $_; 542 } 543 } 544 close(FIN); 545 $shex = sha1_hex(join("\n", @lines)); 546 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 547 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 548 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 549 print FOUT <<EOF; 550# Warning: Do not edit. This file is automatically extracted from the 551# tools in /usr/src/tools/tools/locale. The data is obtained from the 552# CLDR project, obtained from http://cldr.unicode.org/ 553# ----------------------------------------------------------------------------- 554EOF 555 print FOUT @lines; 556 close(FOUT); 557 558 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 559 next if ($enc eq $DEFENCODING); 560 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 561 "$TYPE.draft/$actfile.$enc.src"); 562 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 563 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 564 } 565 } 566 } 567 } 568} 569 570sub get_fields { 571 foreach my $l (sort keys(%languages)) { 572 foreach my $f (sort keys(%{$languages{$l}})) { 573 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 574 next if (defined $languages{$l}{$f}{definitions} 575 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 576 577 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 578 my $file; 579 $file = $l . "_"; 580 $file .= $f . "_" if ($f ne "x"); 581 $file .= $c; 582 583 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 584 $filename = "$ETCDIR/$file.$DEFENCODING.src" 585 if (! -f $filename); 586 if (! -f $filename 587 && defined $languages{$l}{$f}{fallback}) { 588 $file = $languages{$l}{$f}{fallback}; 589 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 590 } 591 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 592 if (! -f $filename); 593 if (! -f $filename) { 594 print STDERR 595 "Cannot open $file.$DEFENCODING.src or fallback\n"; 596 next; 597 } 598 open(FIN, "$filename"); 599 print "Reading from $filename for ${l}_${f}_${c}\n"; 600 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 601 my @lines = <FIN>; 602 chomp(@lines); 603 close(FIN); 604 my $continue = 0; 605 foreach my $k (keys(%keys)) { 606 foreach my $line (@lines) { 607 $line =~ s/\r//; 608 next if (!$continue && $line !~ /^$k\s/); 609 if ($continue) { 610 $line =~ s/^\s+//; 611 } else { 612 $line =~ s/^$k\s+//; 613 } 614 615 $values{$l}{$f}{$c}{$k} = "" 616 if (!defined $values{$l}{$f}{$c}{$k}); 617 618 $continue = ($line =~ /\/$/); 619 $line =~ s/\/$// if ($continue); 620 621 while ($line =~ /_/) { 622 $line =~ 623 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 624 } 625 die "_ in data - $line" if ($line =~ /_/); 626 $values{$l}{$f}{$c}{$k} .= $line; 627 628 last if (!$continue); 629 } 630 } 631 } 632 } 633 } 634} 635 636sub decodecldr { 637 my $e = shift; 638 my $s = shift; 639 640 my $v = undef; 641 642 if ($e eq "UTF-8") { 643 # 644 # Conversion to UTF-8 can be done from the Unicode name to 645 # the UTF-8 character code. 646 # 647 $v = $utf8map{$s}; 648 die "Cannot convert $s in $e (charmap)" if (!defined $v); 649 } else { 650 # 651 # Conversion to these encodings can be done from the Unicode 652 # name to Unicode code to the encodings code. 653 # 654 my $ucc = undef; 655 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 656 $ucc = $ucd{name2code}{$utf8aliases{$s}} 657 if (!defined $ucc 658 && $utf8aliases{$s} 659 && defined $ucd{name2code}{$utf8aliases{$s}}); 660 661 if (!defined $ucc) { 662 if (defined $translations{$e}{$s}{hex}) { 663 $v = $translations{$e}{$s}{hex}; 664 $ucc = 0; 665 } elsif (defined $translations{$e}{$s}{ucc}) { 666 $ucc = $translations{$e}{$s}{ucc}; 667 } 668 } 669 670 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 671 $v = $convertors{$e}{$ucc} if (!defined $v); 672 673 $v = $translations{$e}{$s}{hex} 674 if (!defined $v && defined $translations{$e}{$s}{hex}); 675 676 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 677 my $ucn = $translations{$e}{$s}{unicode}; 678 $ucc = $ucd{name2code}{$ucn} 679 if (defined $ucd{name2code}{$ucn}); 680 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 681 if (!defined $ucc 682 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 683 $v = $convertors{$e}{$ucc}; 684 } 685 686 die "Cannot convert $s in $e (charmap)" if (!defined $v); 687 } 688 689 return pack("C", hex($v)) if (length($v) == 2); 690 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 691 if (length($v) == 4); 692 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 693 hex(substr($v, 4, 2))) if (length($v) == 6); 694 print STDERR "Cannot convert $e $s\n"; 695 return "length = " . length($v); 696 697} 698 699sub translate { 700 my $enc = shift; 701 my $v = shift; 702 703 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 704 return undef; 705} 706 707sub print_fields { 708 foreach my $l (sort keys(%languages)) { 709 foreach my $f (sort keys(%{$languages{$l}})) { 710 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 711 next if (defined $languages{$l}{$f}{definitions} 712 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 713 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 714 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 715 print "Skipping ${l}_" . 716 ($f eq "x" ? "" : "${f}_") . 717 "${c} - not read\n"; 718 next; 719 } 720 my $file = $l; 721 $file .= "_" . $f if ($f ne "x"); 722 $file .= "_" . $c; 723 print "Writing to $file in $enc\n"; 724 725 if ($enc ne $DEFENCODING && 726 !defined $convertors{$enc}) { 727 print "Failed! Cannot convert to $enc.\n"; 728 next; 729 }; 730 731 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 732 my $okay = 1; 733 my $output = ""; 734 print FOUT <<EOF; 735# Warning: Do not edit. This file is automatically generated from the 736# tools in /usr/src/tools/tools/locale. The data is obtained from the 737# CLDR project, obtained from http://cldr.unicode.org/ 738# ----------------------------------------------------------------------------- 739EOF 740 foreach my $k (keys(%keys)) { 741 my $g = $keys{$k}; 742 743 die("Unknown $k in \%DESC") 744 if (!defined $DESC{$k}); 745 746 $output .= "#\n# $DESC{$k}\n"; 747 748 # Replace one row with another 749 if ($g =~ /^>/) { 750 $k = substr($g, 1); 751 $g = $keys{$k}; 752 } 753 754 # Callback function 755 if ($g =~ /^\</) { 756 $callback{data}{c} = $c; 757 $callback{data}{k} = $k; 758 $callback{data}{f} = $f; 759 $callback{data}{l} = $l; 760 $callback{data}{e} = $enc; 761 my @a = split(/\</, substr($g, 1)); 762 my $rv = 763 &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]}); 764 $values{$l}{$f}{$c}{$k} = $rv; 765 $g = $a[2]; 766 $callback{data} = (); 767 } 768 769 my $v = $values{$l}{$f}{$c}{$k}; 770 $v = "undef" if (!defined $v); 771 772 if ($g eq "i") { 773 $output .= "$v\n"; 774 next; 775 } 776 if ($g eq "ai") { 777 $output .= "$v\n"; 778 next; 779 } 780 if ($g eq "s") { 781 $v =~ s/^"//; 782 $v =~ s/"$//; 783 my $cm = ""; 784 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 785 my $p1 = $1; 786 $cm = $2; 787 my $p3 = $3; 788 789 my $rv = decodecldr($enc, $cm); 790# $rv = translate($enc, $cm) 791# if (!defined $rv); 792 if (!defined $rv) { 793 print STDERR 794"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 795 $okay = 0; 796 next; 797 } 798 799 $v = $p1 . $rv . $p3; 800 } 801 $output .= "$v\n"; 802 next; 803 } 804 if ($g eq "as") { 805 foreach my $v (split(/;/, $v)) { 806 $v =~ s/^"//; 807 $v =~ s/"$//; 808 my $cm = ""; 809 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 810 my $p1 = $1; 811 $cm = $2; 812 my $p3 = $3; 813 814 my $rv = 815 decodecldr($enc, 816 $cm); 817# $rv = translate($enc, 818# $cm) 819# if (!defined $rv); 820 if (!defined $rv) { 821 print STDERR 822"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 823 $okay = 0; 824 next; 825 } 826 827 $v = $1 . $rv . $3; 828 } 829 $output .= "$v\n"; 830 } 831 next; 832 } 833 834 die("$k is '$g'"); 835 836 } 837 838 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 839 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 840 print FOUT "$output# EOF\n"; 841 close(FOUT); 842 843 if ($okay) { 844 rename("$TYPE.draft/$file.$enc.new", 845 "$TYPE.draft/$file.$enc.src"); 846 } else { 847 rename("$TYPE.draft/$file.$enc.new", 848 "$TYPE.draft/$file.$enc.failed"); 849 } 850 } 851 } 852 } 853 } 854} 855 856sub make_makefile { 857 print "Creating Makefile for $TYPE\n"; 858 my $SRCOUT; 859 my $SRCOUT2; 860 my $SRCOUT3 = ""; 861 my $SRCOUT4 = ""; 862 my $MAPLOC; 863 if ($TYPE eq "colldef") { 864 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . 865 "-i \${.IMPSRC} \\\n" . 866 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . 867 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 868 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 869 "locale/etc/final-maps\n"; 870 $SRCOUT2 = "LC_COLLATE"; 871 $SRCOUT3 = "" . 872 ".for f t in \${LOCALES_MAPPED}\n" . 873 "FILES+=\t\$t.LC_COLLATE\n" . 874 "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . 875 "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . 876 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . 877 "-i \${.ALLSRC} \\\n" . 878 "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . 879 "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . 880 ".endfor\n\n"; 881 $SRCOUT4 = "## LOCALES_MAPPED\n"; 882 } 883 elsif ($TYPE eq "ctypedef") { 884 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . 885 "-w \${MAPLOC}/widths.txt \\\n" . 886 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . 887 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 888 " || true"; 889 $SRCOUT2 = "LC_CTYPE"; 890 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 891 "locale/etc/final-maps\n"; 892 $SRCOUT3 = "## SYMPAIRS\n\n" . 893 ".for s t in \${SYMPAIRS}\n" . 894 "\${t:S/src\$/LC_CTYPE/}: " . 895 "\$s\n" . 896 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . 897 "-w \${MAPLOC}/widths.txt \\\n" . 898 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 899 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 900 " || true\n" . 901 ".endfor\n\n"; 902 } 903 else { 904 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 905 $SRCOUT2 = "out"; 906 $MAPLOC = ""; 907 } 908 open(FOUT, ">$TYPE.draft/Makefile"); 909 print FOUT <<EOF; 910# \$FreeBSD\$ 911# Warning: Do not edit. This file is automatically generated from the 912# tools in /usr/src/tools/tools/locale. 913 914LOCALEDIR= \${SHAREDIR}/locale 915FILESNAME= $FILESNAMES{$TYPE} 916.SUFFIXES: .src .${SRCOUT2} 917${MAPLOC} 918EOF 919 920 if ($TYPE eq "colldef" || $TYPE eq "ctypedef") { 921 print FOUT <<EOF; 922.include <bsd.endian.mk> 923 924EOF 925 } 926 927 print FOUT <<EOF; 928.src.${SRCOUT2}: 929 $SRCOUT 930 931## PLACEHOLDER 932 933${SRCOUT4} 934 935EOF 936 937 foreach my $hash (keys(%hashtable)) { 938 # For colldef, weight LOCALES to UTF-8 939 # Sort as upper-case and reverse to achieve it 940 # Make en_US, ru_RU, and ca_AD preferred 941 my @files; 942 if ($TYPE eq "colldef") { 943 @files = sort { 944 if ($a eq 'en_x_US.UTF-8' || 945 $a eq 'ru_x_RU.UTF-8' || 946 $a eq 'ca_x_AD.UTF-8') { return -1; } 947 elsif ($b eq 'en_x_US.UTF-8' || 948 $b eq 'ru_x_RU.UTF-8' || 949 $b eq 'ca_x_AD.UTF-8') { return 1; } 950 else { return uc($b) cmp uc($a); } 951 } keys(%{$hashtable{$hash}}); 952 } elsif ($TYPE eq "ctypedef") { 953 @files = sort { 954 if ($a eq 'C_x_x.UTF-8') { return -1; } 955 elsif ($b eq 'C_x_x.UTF-8') { return 1; } 956 if ($a =~ /^en_x_US/) { return -1; } 957 elsif ($b =~ /^en_x_US/) { return 1; } 958 959 if ($a =~ /^en_x_GB.ISO8859-15/ || 960 $a =~ /^ru_x_RU/) { return -1; } 961 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 962 $b =~ /ru_x_RU/) { return 1; } 963 else { return uc($b) cmp uc($a); } 964 965 } keys(%{$hashtable{$hash}}); 966 } else { 967 @files = sort { 968 if ($a =~ /_Comm_/ || 969 $b eq 'en_x_US.UTF-8') { return 1; } 970 elsif ($b =~ /_Comm_/ || 971 $a eq 'en_x_US.UTF-8') { return -1; } 972 else { return uc($b) cmp uc($a); } 973 } keys(%{$hashtable{$hash}}); 974 } 975 if ($#files > 0) { 976 my $link = shift(@files); 977 $link =~ s/_x_x//; # special case for C 978 $link =~ s/_x_/_/; # strip family if none there 979 foreach my $file (@files) { 980 my @a = split(/_/, $file); 981 my @b = split(/\./, $a[-1]); 982 $file =~ s/_x_/_/; 983 print FOUT "SAME+=\t\t$link $file\n"; 984 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 985 } 986 } 987 } 988 989 foreach my $l (sort keys(%languages)) { 990 foreach my $f (sort keys(%{$languages{$l}})) { 991 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 992 next if (defined $languages{$l}{$f}{definitions} 993 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 994 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 995 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 996 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 997 "${c} - not read\n"; 998 next; 999 } 1000 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 1001 my $file = $l; 1002 $file .= "_" . $f if ($f ne "x"); 1003 $file .= "_" . $c if ($c ne "x"); 1004 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 1005 print FOUT "LOCALES+=\t$file.$e\n"; 1006 } 1007 1008 if (defined $languages{$l}{$f}{nc_link}) { 1009 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 1010 my $file = $l . "_"; 1011 $file .= $f . "_" if ($f ne "x"); 1012 $file .= $c; 1013 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 1014 } 1015 } 1016 1017 if (defined $languages{$l}{$f}{e_link}) { 1018 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 1019 my @a = split(/:/, $el); 1020 my $file = $l . "_"; 1021 $file .= $f . "_" if ($f ne "x"); 1022 $file .= $c; 1023 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; 1024 } 1025 } 1026 1027 } 1028 } 1029 } 1030 1031 print FOUT <<EOF; 1032 1033FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 1034CLEANFILES= \${FILES} 1035 1036.for f t in \${SAME} 1037SYMLINKS+= ../\$f/\${FILESNAME} \\ 1038 \${LOCALEDIR}/\$t/\${FILESNAME} 1039.endfor 1040 1041.for f in \${LOCALES} 1042FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 1043.endfor 1044 1045${SRCOUT3}.include <bsd.prog.mk> 1046EOF 1047 1048 close(FOUT); 1049} 1050