1#!/usr/local/bin/perl -wC 2 3# SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4# 5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org> 6# Copyright 2015 John Marino <draco@marino.st> 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions 10# are met: 11# 1. Redistributions of source code must retain the above copyright 12# notice, this list of conditions and the following disclaimer. 13# 2. Redistributions in binary form must reproduce the above copyright 14# notice, this list of conditions and the following disclaimer in the 15# documentation and/or other materials provided with the distribution. 16# 17# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27# SUCH DAMAGE. 28# 29# $FreeBSD$ 30 31use strict; 32use File::Copy; 33use XML::Parser; 34use Tie::IxHash; 35use Text::Iconv; 36#use Data::Dumper; 37use Getopt::Long; 38use Digest::SHA qw(sha1_hex); 39require "charmaps.pm"; 40 41 42if ($#ARGV < 2) { 43 print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n"; 44 exit(1); 45} 46 47my $DEFENCODING = "UTF-8"; 48 49my $UNIDIR = undef; 50my $ETCDIR = undef; 51my $TYPE = undef; 52 53my $result = GetOptions ( 54 "unidir=s" => \$UNIDIR, 55 "etc=s" => \$ETCDIR, 56 "type=s" => \$TYPE, 57 ); 58 59my %convertors = (); 60 61my %ucd = (); 62my %values = (); 63my %hashtable = (); 64my %languages = (); 65my %translations = (); 66my %encodings = (); 67my %alternativemonths = (); 68get_languages(); 69 70my %utf8map = (); 71my %utf8aliases = (); 72get_unidata($UNIDIR); 73get_utf8map("$UNIDIR/posix/$DEFENCODING.cm"); 74get_encodings("$ETCDIR/charmaps"); 75 76my %keys = (); 77tie(%keys, "Tie::IxHash"); 78tie(%hashtable, "Tie::IxHash"); 79 80my %FILESNAMES = ( 81 "monetdef" => "LC_MONETARY", 82 "timedef" => "LC_TIME", 83 "msgdef" => "LC_MESSAGES", 84 "numericdef" => "LC_NUMERIC", 85 "colldef" => "LC_COLLATE", 86 "ctypedef" => "LC_CTYPE" 87); 88 89my %callback = ( 90 mdorder => \&callback_mdorder, 91 altmon => \&callback_altmon, 92 cformat => \&callback_cformat, 93 dformat => \&callback_dformat, 94 dtformat => \&callback_dtformat, 95 cbabmon => \&callback_abmon, 96 cbampm => \&callback_ampm, 97 data => undef, 98); 99 100my %DESC = ( 101 102 # numericdef 103 "decimal_point" => "decimal_point", 104 "thousands_sep" => "thousands_sep", 105 "grouping" => "grouping", 106 107 # monetdef 108 "int_curr_symbol" => "int_curr_symbol (last character always " . 109 "SPACE)", 110 "currency_symbol" => "currency_symbol", 111 "mon_decimal_point" => "mon_decimal_point", 112 "mon_thousands_sep" => "mon_thousands_sep", 113 "mon_grouping" => "mon_grouping", 114 "positive_sign" => "positive_sign", 115 "negative_sign" => "negative_sign", 116 "int_frac_digits" => "int_frac_digits", 117 "frac_digits" => "frac_digits", 118 "p_cs_precedes" => "p_cs_precedes", 119 "p_sep_by_space" => "p_sep_by_space", 120 "n_cs_precedes" => "n_cs_precedes", 121 "n_sep_by_space" => "n_sep_by_space", 122 "p_sign_posn" => "p_sign_posn", 123 "n_sign_posn" => "n_sign_posn", 124 125 # msgdef 126 "yesexpr" => "yesexpr", 127 "noexpr" => "noexpr", 128 "yesstr" => "yesstr", 129 "nostr" => "nostr", 130 131 # timedef 132 "abmon" => "Short month names", 133 "mon" => "Long month names (as in a date)", 134 "abday" => "Short weekday names", 135 "day" => "Long weekday names", 136 "t_fmt" => "X_fmt", 137 "d_fmt" => "x_fmt", 138 "c_fmt" => "c_fmt", 139 "am_pm" => "AM/PM", 140 "d_t_fmt" => "date_fmt", 141 "altmon" => "Long month names (without case ending)", 142 "md_order" => "md_order", 143 "t_fmt_ampm" => "ampm_fmt", 144); 145 146if ($TYPE eq "colldef") { 147 transform_collation(); 148 make_makefile(); 149} 150 151if ($TYPE eq "ctypedef") { 152 transform_ctypes(); 153 make_makefile(); 154} 155 156if ($TYPE eq "numericdef") { 157 %keys = ( 158 "decimal_point" => "s", 159 "thousands_sep" => "s", 160 "grouping" => "ai", 161 ); 162 get_fields(); 163 print_fields(); 164 make_makefile(); 165} 166 167if ($TYPE eq "monetdef") { 168 %keys = ( 169 "int_curr_symbol" => "s", 170 "currency_symbol" => "s", 171 "mon_decimal_point" => "s", 172 "mon_thousands_sep" => "s", 173 "mon_grouping" => "ai", 174 "positive_sign" => "s", 175 "negative_sign" => "s", 176 "int_frac_digits" => "i", 177 "frac_digits" => "i", 178 "p_cs_precedes" => "i", 179 "p_sep_by_space" => "i", 180 "n_cs_precedes" => "i", 181 "n_sep_by_space" => "i", 182 "p_sign_posn" => "i", 183 "n_sign_posn" => "i" 184 ); 185 get_fields(); 186 print_fields(); 187 make_makefile(); 188} 189 190if ($TYPE eq "msgdef") { 191 %keys = ( 192 "yesexpr" => "s", 193 "noexpr" => "s", 194 "yesstr" => "s", 195 "nostr" => "s" 196 ); 197 get_fields(); 198 print_fields(); 199 make_makefile(); 200} 201 202if ($TYPE eq "timedef") { 203 %keys = ( 204 "abmon" => "<cbabmon<abmon<as", 205 "mon" => "as", 206 "abday" => "as", 207 "day" => "as", 208 "t_fmt" => "s", 209 "d_fmt" => "<dformat<d_fmt<s", 210 "c_fmt" => "<cformat<d_t_fmt<s", 211 "am_pm" => "<cbampm<am_pm<as", 212 "d_t_fmt" => "<dtformat<d_t_fmt<s", 213 "altmon" => "<altmon<mon<as", 214 "md_order" => "<mdorder<d_fmt<s", 215 "t_fmt_ampm" => "s", 216 ); 217 get_fields(); 218 print_fields(); 219 make_makefile(); 220} 221 222sub callback_ampm { 223 my $s = shift; 224 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 225 my $enc = $callback{data}{e}; 226 227 if ($nl eq 'ru_RU') { 228 if ($enc eq 'UTF-8') { 229 $s = 'дп;пп'; 230 } else { 231 my $converter = Text::Iconv->new("utf-8", "$enc"); 232 $s = $converter->convert("дп;пп"); 233 } 234 } 235 return $s; 236} 237 238sub callback_cformat { 239 my $s = shift; 240 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 241 242 if ($nl eq 'ko_KR') { 243 $s =~ s/(> )(%p)/$1%A $2/; 244 } 245 $s =~ s/\.,/\./; 246 $s =~ s/ %Z//; 247 $s =~ s/ %z//; 248 $s =~ s/^"%e\./%A %e/; 249 $s =~ s/^"(%B %e, )/"%A, $1/; 250 $s =~ s/^"(%e %B )/"%A $1/; 251 return $s; 252}; 253 254sub callback_dformat { 255 my $s = shift; 256 257 $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/; 258 $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/; 259 return $s; 260}; 261 262sub callback_dtformat { 263 my $s = shift; 264 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 265 266 if ($nl eq 'ja_JP') { 267 $s =~ s/(> )(%H)/$1%A $2/; 268 } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { 269 if ($nl ne 'ko_KR') { 270 $s =~ s/%m/%_m/; 271 } 272 $s =~ s/(> )(%p)/$1%A $2/; 273 } 274 $s =~ s/\.,/\./; 275 $s =~ s/^"%e\./%A %e/; 276 $s =~ s/^"(%B %e, )/"%A, $1/; 277 $s =~ s/^"(%e %B )/"%A $1/; 278 return $s; 279}; 280 281sub callback_mdorder { 282 my $s = shift; 283 return undef if (!defined $s); 284 $s =~ s/[^dem]//g; 285 $s =~ s/e/d/g; 286 return $s; 287}; 288 289sub callback_altmon { 290 # if the language/country is known in %alternative months then 291 # return that, otherwise repeat mon 292 my $s = shift; 293 294 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 295 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 296 my @cleaned; 297 foreach (@altnames) 298 { 299 $_ =~ s/^\s+//; 300 $_ =~ s/\s+$//; 301 push @cleaned, $_; 302 } 303 return join(";",@cleaned); 304 } 305 306 return $s; 307} 308 309sub callback_abmon { 310 # for specified CJK locales, pad result with a space to enable 311 # columns to line up (style established in FreeBSD in 2001) 312 my $s = shift; 313 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 314 315 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 316 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 317 my @monthnames = split(";", $s); 318 my @cleaned; 319 foreach (@monthnames) 320 { 321 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 322 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 323 { 324 $_ =~ s/^"/"<space>/; 325 } 326 push @cleaned, $_; 327 } 328 return join(";",@cleaned); 329 } 330 return $s; 331} 332 333############################ 334 335sub get_unidata { 336 my $directory = shift; 337 338 open(FIN, "$directory/UnicodeData.txt") 339 or die("Cannot open $directory/UnicodeData.txt");; 340 my @lines = <FIN>; 341 chomp(@lines); 342 close(FIN); 343 344 foreach my $l (@lines) { 345 my @a = split(/;/, $l); 346 347 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 348 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 349 } 350} 351 352sub get_utf8map { 353 my $file = shift; 354 355 open(FIN, $file); 356 my @lines = <FIN>; 357 close(FIN); 358 chomp(@lines); 359 360 my $prev_k = undef; 361 my $prev_v = ""; 362 my $incharmap = 0; 363 foreach my $l (@lines) { 364 $l =~ s/\r//; 365 next if ($l =~ /^\#/); 366 next if ($l eq ""); 367 368 if ($l eq "CHARMAP") { 369 $incharmap = 1; 370 next; 371 } 372 373 next if (!$incharmap); 374 last if ($l eq "END CHARMAP"); 375 376 $l =~ /^<([^\s]+)>\s+(.*)/; 377 my $k = $1; 378 my $v = $2; 379 $k =~ s/_/ /g; # unicode char string 380 $v =~ s/\\x//g; # UTF-8 char code 381 $utf8map{$k} = $v; 382 383 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 384 385 $prev_v = $v; 386 $prev_k = $k; 387 } 388} 389 390sub get_encodings { 391 my $dir = shift; 392 foreach my $e (sort(keys(%encodings))) { 393 if (!open(FIN, "$dir/$e.TXT")) { 394 print "Cannot open charmap for $e\n"; 395 next; 396 397 } 398 $encodings{$e} = 1; 399 my @lines = <FIN>; 400 close(FIN); 401 chomp(@lines); 402 foreach my $l (@lines) { 403 $l =~ s/\r//; 404 next if ($l =~ /^\#/); 405 next if ($l eq ""); 406 407 my @a = split(" ", $l); 408 next if ($#a < 1); 409 $a[0] =~ s/^0[xX]//; # local char code 410 $a[1] =~ s/^0[xX]//; # unicode char code 411 $convertors{$e}{uc($a[1])} = uc($a[0]); 412 } 413 } 414} 415 416sub get_languages { 417 my %data = get_xmldata($ETCDIR); 418 %languages = %{$data{L}}; 419 %translations = %{$data{T}}; 420 %alternativemonths = %{$data{AM}}; 421 %encodings = %{$data{E}}; 422} 423 424sub transform_ctypes { 425 # Add the C.UTF-8 426 $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; 427 428 foreach my $l (sort keys(%languages)) { 429 foreach my $f (sort keys(%{$languages{$l}})) { 430 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 431 next if (defined $languages{$l}{$f}{definitions} 432 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 433 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 434 my $file = $l; 435 $file .= "_" . $f if ($f ne "x"); 436 $file .= "_" . $c if ($c ne "x"); 437 my $actfile = $file; 438 439 my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src"; 440 if (! -f $filename) { 441 print STDERR "Cannot open $filename\n"; 442 next; 443 } 444 open(FIN, "$filename"); 445 print "Reading from $filename for ${l}_${f}_${c}\n"; 446 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 447 my @lines; 448 my $shex; 449 my $uhex; 450 while (<FIN>) { 451 push @lines, $_; 452 } 453 close(FIN); 454 $shex = sha1_hex(join("\n", @lines)); 455 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 456 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 457 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 458 print FOUT @lines; 459 close(FOUT); 460 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 461 next if ($enc eq $DEFENCODING); 462 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 463 if (! -f $filename) { 464 print STDERR "Cannot open $filename\n"; 465 next; 466 } 467 @lines = (); 468 open(FIN, "$filename"); 469 while (<FIN>) { 470 if ((/^comment_char\s/) || (/^escape_char\s/)){ 471 push @lines, $_; 472 } 473 if (/^LC_CTYPE/../^END LC_CTYPE/) { 474 push @lines, $_; 475 } 476 } 477 close(FIN); 478 $uhex = sha1_hex(join("\n", @lines) . $enc); 479 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 480 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 481 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 482 print FOUT <<EOF; 483# Warning: Do not edit. This file is automatically extracted from the 484# tools in /usr/src/tools/tools/locale. The data is obtained from the 485# CLDR project, obtained from http://cldr.unicode.org/ 486# ----------------------------------------------------------------------------- 487EOF 488 print FOUT @lines; 489 close(FOUT); 490 } 491 } 492 } 493 } 494} 495 496 497sub transform_collation { 498 foreach my $l (sort keys(%languages)) { 499 foreach my $f (sort keys(%{$languages{$l}})) { 500 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 501 next if (defined $languages{$l}{$f}{definitions} 502 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 503 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 504 my $file; 505 $file = $l . "_"; 506 $file .= $f . "_" if ($f ne "x"); 507 $file .= $c; 508 my $actfile = $file; 509 510 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 511 $filename = "$ETCDIR/$file.$DEFENCODING.src" 512 if (! -f $filename); 513 if (! -f $filename 514 && defined $languages{$l}{$f}{fallback}) { 515 $file = $languages{$l}{$f}{fallback}; 516 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 517 } 518 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 519 if (! -f $filename); 520 if (! -f $filename) { 521 print STDERR 522 "Cannot open $file.$DEFENCODING.src or fallback\n"; 523 next; 524 } 525 open(FIN, "$filename"); 526 print "Reading from $filename for ${l}_${f}_${c}\n"; 527 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 528 my @lines; 529 my $shex; 530 while (<FIN>) { 531 if ((/^comment_char\s/) || (/^escape_char\s/)){ 532 push @lines, $_; 533 } 534 if (/^LC_COLLATE/../^END LC_COLLATE/) { 535 $_ =~ s/[ ]+/ /g; 536 push @lines, $_; 537 } 538 } 539 close(FIN); 540 $shex = sha1_hex(join("\n", @lines)); 541 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 542 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 543 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 544 print FOUT <<EOF; 545# Warning: Do not edit. This file is automatically extracted from the 546# tools in /usr/src/tools/tools/locale. The data is obtained from the 547# CLDR project, obtained from http://cldr.unicode.org/ 548# ----------------------------------------------------------------------------- 549EOF 550 print FOUT @lines; 551 close(FOUT); 552 553 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 554 next if ($enc eq $DEFENCODING); 555 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 556 "$TYPE.draft/$actfile.$enc.src"); 557 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 558 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 559 } 560 } 561 } 562 } 563} 564 565sub get_fields { 566 foreach my $l (sort keys(%languages)) { 567 foreach my $f (sort keys(%{$languages{$l}})) { 568 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 569 next if (defined $languages{$l}{$f}{definitions} 570 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 571 572 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 573 my $file; 574 $file = $l . "_"; 575 $file .= $f . "_" if ($f ne "x"); 576 $file .= $c; 577 578 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 579 $filename = "$ETCDIR/$file.$DEFENCODING.src" 580 if (! -f $filename); 581 if (! -f $filename 582 && defined $languages{$l}{$f}{fallback}) { 583 $file = $languages{$l}{$f}{fallback}; 584 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; 585 } 586 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src" 587 if (! -f $filename); 588 if (! -f $filename) { 589 print STDERR 590 "Cannot open $file.$DEFENCODING.src or fallback\n"; 591 next; 592 } 593 open(FIN, "$filename"); 594 print "Reading from $filename for ${l}_${f}_${c}\n"; 595 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 596 my @lines = <FIN>; 597 chomp(@lines); 598 close(FIN); 599 my $continue = 0; 600 foreach my $k (keys(%keys)) { 601 foreach my $line (@lines) { 602 $line =~ s/\r//; 603 next if (!$continue && $line !~ /^$k\s/); 604 if ($continue) { 605 $line =~ s/^\s+//; 606 } else { 607 $line =~ s/^$k\s+//; 608 } 609 610 $values{$l}{$f}{$c}{$k} = "" 611 if (!defined $values{$l}{$f}{$c}{$k}); 612 613 $continue = ($line =~ /\/$/); 614 $line =~ s/\/$// if ($continue); 615 616 while ($line =~ /_/) { 617 $line =~ 618 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 619 } 620 die "_ in data - $line" if ($line =~ /_/); 621 $values{$l}{$f}{$c}{$k} .= $line; 622 623 last if (!$continue); 624 } 625 } 626 } 627 } 628 } 629} 630 631sub decodecldr { 632 my $e = shift; 633 my $s = shift; 634 635 my $v = undef; 636 637 if ($e eq "UTF-8") { 638 # 639 # Conversion to UTF-8 can be done from the Unicode name to 640 # the UTF-8 character code. 641 # 642 $v = $utf8map{$s}; 643 die "Cannot convert $s in $e (charmap)" if (!defined $v); 644 } else { 645 # 646 # Conversion to these encodings can be done from the Unicode 647 # name to Unicode code to the encodings code. 648 # 649 my $ucc = undef; 650 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 651 $ucc = $ucd{name2code}{$utf8aliases{$s}} 652 if (!defined $ucc 653 && $utf8aliases{$s} 654 && defined $ucd{name2code}{$utf8aliases{$s}}); 655 656 if (!defined $ucc) { 657 if (defined $translations{$e}{$s}{hex}) { 658 $v = $translations{$e}{$s}{hex}; 659 $ucc = 0; 660 } elsif (defined $translations{$e}{$s}{ucc}) { 661 $ucc = $translations{$e}{$s}{ucc}; 662 } 663 } 664 665 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 666 $v = $convertors{$e}{$ucc} if (!defined $v); 667 668 $v = $translations{$e}{$s}{hex} 669 if (!defined $v && defined $translations{$e}{$s}{hex}); 670 671 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 672 my $ucn = $translations{$e}{$s}{unicode}; 673 $ucc = $ucd{name2code}{$ucn} 674 if (defined $ucd{name2code}{$ucn}); 675 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 676 if (!defined $ucc 677 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 678 $v = $convertors{$e}{$ucc}; 679 } 680 681 die "Cannot convert $s in $e (charmap)" if (!defined $v); 682 } 683 684 return pack("C", hex($v)) if (length($v) == 2); 685 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 686 if (length($v) == 4); 687 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 688 hex(substr($v, 4, 2))) if (length($v) == 6); 689 print STDERR "Cannot convert $e $s\n"; 690 return "length = " . length($v); 691 692} 693 694sub translate { 695 my $enc = shift; 696 my $v = shift; 697 698 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 699 return undef; 700} 701 702sub print_fields { 703 foreach my $l (sort keys(%languages)) { 704 foreach my $f (sort keys(%{$languages{$l}})) { 705 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 706 next if (defined $languages{$l}{$f}{definitions} 707 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 708 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 709 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 710 print "Skipping ${l}_" . 711 ($f eq "x" ? "" : "${f}_") . 712 "${c} - not read\n"; 713 next; 714 } 715 my $file = $l; 716 $file .= "_" . $f if ($f ne "x"); 717 $file .= "_" . $c; 718 print "Writing to $file in $enc\n"; 719 720 if ($enc ne $DEFENCODING && 721 !defined $convertors{$enc}) { 722 print "Failed! Cannot convert to $enc.\n"; 723 next; 724 }; 725 726 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 727 my $okay = 1; 728 my $output = ""; 729 print FOUT <<EOF; 730# Warning: Do not edit. This file is automatically generated from the 731# tools in /usr/src/tools/tools/locale. The data is obtained from the 732# CLDR project, obtained from http://cldr.unicode.org/ 733# ----------------------------------------------------------------------------- 734EOF 735 foreach my $k (keys(%keys)) { 736 my $g = $keys{$k}; 737 738 die("Unknown $k in \%DESC") 739 if (!defined $DESC{$k}); 740 741 $output .= "#\n# $DESC{$k}\n"; 742 743 # Replace one row with another 744 if ($g =~ /^>/) { 745 $k = substr($g, 1); 746 $g = $keys{$k}; 747 } 748 749 # Callback function 750 if ($g =~ /^\</) { 751 $callback{data}{c} = $c; 752 $callback{data}{k} = $k; 753 $callback{data}{f} = $f; 754 $callback{data}{l} = $l; 755 $callback{data}{e} = $enc; 756 my @a = split(/\</, substr($g, 1)); 757 my $rv = 758 &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]}); 759 $values{$l}{$f}{$c}{$k} = $rv; 760 $g = $a[2]; 761 $callback{data} = (); 762 } 763 764 my $v = $values{$l}{$f}{$c}{$k}; 765 $v = "undef" if (!defined $v); 766 767 if ($g eq "i") { 768 $output .= "$v\n"; 769 next; 770 } 771 if ($g eq "ai") { 772 $output .= "$v\n"; 773 next; 774 } 775 if ($g eq "s") { 776 $v =~ s/^"//; 777 $v =~ s/"$//; 778 my $cm = ""; 779 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 780 my $p1 = $1; 781 $cm = $2; 782 my $p3 = $3; 783 784 my $rv = decodecldr($enc, $cm); 785# $rv = translate($enc, $cm) 786# if (!defined $rv); 787 if (!defined $rv) { 788 print STDERR 789"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 790 $okay = 0; 791 next; 792 } 793 794 $v = $p1 . $rv . $p3; 795 } 796 $output .= "$v\n"; 797 next; 798 } 799 if ($g eq "as") { 800 foreach my $v (split(/;/, $v)) { 801 $v =~ s/^"//; 802 $v =~ s/"$//; 803 my $cm = ""; 804 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 805 my $p1 = $1; 806 $cm = $2; 807 my $p3 = $3; 808 809 my $rv = 810 decodecldr($enc, 811 $cm); 812# $rv = translate($enc, 813# $cm) 814# if (!defined $rv); 815 if (!defined $rv) { 816 print STDERR 817"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 818 $okay = 0; 819 next; 820 } 821 822 $v = $1 . $rv . $3; 823 } 824 $output .= "$v\n"; 825 } 826 next; 827 } 828 829 die("$k is '$g'"); 830 831 } 832 833 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 834 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 835 print FOUT "$output# EOF\n"; 836 close(FOUT); 837 838 if ($okay) { 839 rename("$TYPE.draft/$file.$enc.new", 840 "$TYPE.draft/$file.$enc.src"); 841 } else { 842 rename("$TYPE.draft/$file.$enc.new", 843 "$TYPE.draft/$file.$enc.failed"); 844 } 845 } 846 } 847 } 848 } 849} 850 851sub make_makefile { 852 print "Creating Makefile for $TYPE\n"; 853 my $SRCOUT; 854 my $SRCOUT2; 855 my $SRCOUT3 = ""; 856 my $SRCOUT4 = ""; 857 my $MAPLOC; 858 if ($TYPE eq "colldef") { 859 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . 860 "-i \${.IMPSRC} \\\n" . 861 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . 862 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 863 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 864 "locale/etc/final-maps\n"; 865 $SRCOUT2 = "LC_COLLATE"; 866 $SRCOUT3 = "" . 867 ".for f t in \${LOCALES_MAPPED}\n" . 868 "FILES+=\t\$t.LC_COLLATE\n" . 869 "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . 870 "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . 871 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . 872 "-i \${.ALLSRC} \\\n" . 873 "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . 874 "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . 875 ".endfor\n\n"; 876 $SRCOUT4 = "## LOCALES_MAPPED\n"; 877 } 878 elsif ($TYPE eq "ctypedef") { 879 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . 880 "-w \${MAPLOC}/widths.txt \\\n" . 881 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . 882 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 883 " || true"; 884 $SRCOUT2 = "LC_CTYPE"; 885 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 886 "locale/etc/final-maps\n"; 887 $SRCOUT3 = "## SYMPAIRS\n\n" . 888 ".for s t in \${SYMPAIRS}\n" . 889 "\${t:S/src\$/LC_CTYPE/}: " . 890 "\$s\n" . 891 "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . 892 "-w \${MAPLOC}/widths.txt \\\n" . 893 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 894 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 895 " || true\n" . 896 ".endfor\n\n"; 897 } 898 else { 899 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 900 $SRCOUT2 = "out"; 901 $MAPLOC = ""; 902 } 903 open(FOUT, ">$TYPE.draft/Makefile"); 904 print FOUT <<EOF; 905# \$FreeBSD\$ 906# Warning: Do not edit. This file is automatically generated from the 907# tools in /usr/src/tools/tools/locale. 908 909LOCALEDIR= \${SHAREDIR}/locale 910FILESNAME= $FILESNAMES{$TYPE} 911.SUFFIXES: .src .${SRCOUT2} 912${MAPLOC} 913EOF 914 915 if ($TYPE eq "colldef" || $TYPE eq "ctypedef") { 916 print FOUT <<EOF; 917.include <bsd.endian.mk> 918 919EOF 920 } 921 922 print FOUT <<EOF; 923.src.${SRCOUT2}: 924 $SRCOUT 925 926## PLACEHOLDER 927 928${SRCOUT4} 929 930EOF 931 932 foreach my $hash (keys(%hashtable)) { 933 # For colldef, weight LOCALES to UTF-8 934 # Sort as upper-case and reverse to achieve it 935 # Make en_US, ru_RU, and ca_AD preferred 936 my @files; 937 if ($TYPE eq "colldef") { 938 @files = sort { 939 if ($a eq 'en_x_US.UTF-8' || 940 $a eq 'ru_x_RU.UTF-8' || 941 $a eq 'ca_x_AD.UTF-8') { return -1; } 942 elsif ($b eq 'en_x_US.UTF-8' || 943 $b eq 'ru_x_RU.UTF-8' || 944 $b eq 'ca_x_AD.UTF-8') { return 1; } 945 else { return uc($b) cmp uc($a); } 946 } keys(%{$hashtable{$hash}}); 947 } elsif ($TYPE eq "ctypedef") { 948 @files = sort { 949 if ($a eq 'C_x_x.UTF-8') { return -1; } 950 elsif ($b eq 'C_x_x.UTF-8') { return 1; } 951 if ($a =~ /^en_x_US/) { return -1; } 952 elsif ($b =~ /^en_x_US/) { return 1; } 953 954 if ($a =~ /^en_x_GB.ISO8859-15/ || 955 $a =~ /^ru_x_RU/) { return -1; } 956 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 957 $b =~ /ru_x_RU/) { return 1; } 958 else { return uc($b) cmp uc($a); } 959 960 } keys(%{$hashtable{$hash}}); 961 } else { 962 @files = sort { 963 if ($a =~ /_Comm_/ || 964 $b eq 'en_x_US.UTF-8') { return 1; } 965 elsif ($b =~ /_Comm_/ || 966 $a eq 'en_x_US.UTF-8') { return -1; } 967 else { return uc($b) cmp uc($a); } 968 } keys(%{$hashtable{$hash}}); 969 } 970 if ($#files > 0) { 971 my $link = shift(@files); 972 $link =~ s/_x_x//; # special case for C 973 $link =~ s/_x_/_/; # strip family if none there 974 foreach my $file (@files) { 975 my @a = split(/_/, $file); 976 my @b = split(/\./, $a[-1]); 977 $file =~ s/_x_/_/; 978 print FOUT "SAME+=\t\t$link $file\n"; 979 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 980 } 981 } 982 } 983 984 foreach my $l (sort keys(%languages)) { 985 foreach my $f (sort keys(%{$languages{$l}})) { 986 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 987 next if (defined $languages{$l}{$f}{definitions} 988 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 989 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 990 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 991 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 992 "${c} - not read\n"; 993 next; 994 } 995 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 996 my $file = $l; 997 $file .= "_" . $f if ($f ne "x"); 998 $file .= "_" . $c if ($c ne "x"); 999 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 1000 print FOUT "LOCALES+=\t$file.$e\n"; 1001 } 1002 1003 if (defined $languages{$l}{$f}{nc_link}) { 1004 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 1005 my $file = $l . "_"; 1006 $file .= $f . "_" if ($f ne "x"); 1007 $file .= $c; 1008 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 1009 } 1010 } 1011 1012 if (defined $languages{$l}{$f}{e_link}) { 1013 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 1014 my @a = split(/:/, $el); 1015 my $file = $l . "_"; 1016 $file .= $f . "_" if ($f ne "x"); 1017 $file .= $c; 1018 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; 1019 } 1020 } 1021 1022 } 1023 } 1024 } 1025 1026 print FOUT <<EOF; 1027 1028FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 1029CLEANFILES= \${FILES} 1030 1031.for f t in \${SAME} 1032SYMLINKS+= ../\$f/\${FILESNAME} \\ 1033 \${LOCALEDIR}/\$t/\${FILESNAME} 1034.endfor 1035 1036.for f in \${LOCALES} 1037FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 1038.endfor 1039 1040${SRCOUT3}.include <bsd.prog.mk> 1041EOF 1042 1043 close(FOUT); 1044} 1045