1#!/usr/local/bin/perl -wC 2# $FreeBSD$ 3 4use strict; 5use File::Copy; 6use XML::Parser; 7use Tie::IxHash; 8use Text::Iconv; 9use Data::Dumper; 10use Getopt::Long; 11use Digest::SHA qw(sha1_hex); 12require "charmaps.pm"; 13 14 15if ($#ARGV < 2) { 16 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; 17 exit(1); 18} 19 20my $DEFENCODING = "UTF-8"; 21my @filter = (); 22 23my $CLDRDIR = undef; 24my $UNIDATADIR = undef; 25my $ETCDIR = undef; 26my $TYPE = undef; 27my $doonly = undef; 28 29my $result = GetOptions ( 30 "cldr=s" => \$CLDRDIR, 31 "unidata=s" => \$UNIDATADIR, 32 "etc=s" => \$ETCDIR, 33 "type=s" => \$TYPE, 34 "lc=s" => \$doonly 35 ); 36 37my %convertors = (); 38 39my %ucd = (); 40my %values = (); 41my %hashtable = (); 42my %languages = (); 43my %translations = (); 44my %encodings = (); 45my %alternativemonths = (); 46get_languages(); 47 48my %utf8map = (); 49my %utf8aliases = (); 50get_unidata($UNIDATADIR); 51get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); 52get_encodings("$ETCDIR/charmaps"); 53 54my %keys = (); 55tie(%keys, "Tie::IxHash"); 56tie(%hashtable, "Tie::IxHash"); 57 58my %FILESNAMES = ( 59 "monetdef" => "LC_MONETARY", 60 "timedef" => "LC_TIME", 61 "msgdef" => "LC_MESSAGES", 62 "numericdef" => "LC_NUMERIC", 63 "colldef" => "LC_COLLATE", 64 "ctypedef" => "LC_CTYPE" 65); 66 67my %callback = ( 68 mdorder => \&callback_mdorder, 69 altmon => \&callback_altmon, 70 cformat => \&callback_cformat, 71 dformat => \&callback_dformat, 72 dtformat => \&callback_dtformat, 73 cbabmon => \&callback_abmon, 74 cbampm => \&callback_ampm, 75 data => undef, 76); 77 78my %DESC = ( 79 80 # numericdef 81 "decimal_point" => "decimal_point", 82 "thousands_sep" => "thousands_sep", 83 "grouping" => "grouping", 84 85 # monetdef 86 "int_curr_symbol" => "int_curr_symbol (last character always " . 87 "SPACE)", 88 "currency_symbol" => "currency_symbol", 89 "mon_decimal_point" => "mon_decimal_point", 90 "mon_thousands_sep" => "mon_thousands_sep", 91 "mon_grouping" => "mon_grouping", 92 "positive_sign" => "positive_sign", 93 "negative_sign" => "negative_sign", 94 "int_frac_digits" => "int_frac_digits", 95 "frac_digits" => "frac_digits", 96 "p_cs_precedes" => "p_cs_precedes", 97 "p_sep_by_space" => "p_sep_by_space", 98 "n_cs_precedes" => "n_cs_precedes", 99 "n_sep_by_space" => "n_sep_by_space", 100 "p_sign_posn" => "p_sign_posn", 101 "n_sign_posn" => "n_sign_posn", 102 103 # msgdef 104 "yesexpr" => "yesexpr", 105 "noexpr" => "noexpr", 106 "yesstr" => "yesstr", 107 "nostr" => "nostr", 108 109 # timedef 110 "abmon" => "Short month names", 111 "mon" => "Long month names (as in a date)", 112 "abday" => "Short weekday names", 113 "day" => "Long weekday names", 114 "t_fmt" => "X_fmt", 115 "d_fmt" => "x_fmt", 116 "c_fmt" => "c_fmt", 117 "am_pm" => "AM/PM", 118 "d_t_fmt" => "date_fmt", 119 "altmon" => "Long month names (without case ending)", 120 "md_order" => "md_order", 121 "t_fmt_ampm" => "ampm_fmt", 122); 123 124if ($TYPE eq "colldef") { 125 transform_collation(); 126 make_makefile(); 127} 128 129if ($TYPE eq "ctypedef") { 130 transform_ctypes(); 131 make_makefile(); 132} 133 134if ($TYPE eq "numericdef") { 135 %keys = ( 136 "decimal_point" => "s", 137 "thousands_sep" => "s", 138 "grouping" => "ai", 139 ); 140 get_fields(); 141 print_fields(); 142 make_makefile(); 143} 144 145if ($TYPE eq "monetdef") { 146 %keys = ( 147 "int_curr_symbol" => "s", 148 "currency_symbol" => "s", 149 "mon_decimal_point" => "s", 150 "mon_thousands_sep" => "s", 151 "mon_grouping" => "ai", 152 "positive_sign" => "s", 153 "negative_sign" => "s", 154 "int_frac_digits" => "i", 155 "frac_digits" => "i", 156 "p_cs_precedes" => "i", 157 "p_sep_by_space" => "i", 158 "n_cs_precedes" => "i", 159 "n_sep_by_space" => "i", 160 "p_sign_posn" => "i", 161 "n_sign_posn" => "i" 162 ); 163 get_fields(); 164 print_fields(); 165 make_makefile(); 166} 167 168if ($TYPE eq "msgdef") { 169 %keys = ( 170 "yesexpr" => "s", 171 "noexpr" => "s", 172 "yesstr" => "s", 173 "nostr" => "s" 174 ); 175 get_fields(); 176 print_fields(); 177 make_makefile(); 178} 179 180if ($TYPE eq "timedef") { 181 %keys = ( 182 "abmon" => "<cbabmon<abmon<as", 183 "mon" => "as", 184 "abday" => "as", 185 "day" => "as", 186 "t_fmt" => "s", 187 "d_fmt" => "<dformat<d_fmt<s", 188 "c_fmt" => "<cformat<d_t_fmt<s", 189 "am_pm" => "<cbampm<am_pm<as", 190 "d_t_fmt" => "<dtformat<d_t_fmt<s", 191 "altmon" => "<altmon<mon<as", 192 "md_order" => "<mdorder<d_fmt<s", 193 "t_fmt_ampm" => "s", 194 ); 195 get_fields(); 196 print_fields(); 197 make_makefile(); 198} 199 200sub callback_ampm { 201 my $s = shift; 202 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 203 my $enc = $callback{data}{e}; 204 205 if ($nl eq 'ru_RU') { 206 if ($enc eq 'UTF-8') { 207 $s = 'дп;пп'; 208 } else { 209 my $converter = Text::Iconv->new("utf-8", "$enc"); 210 $s = $converter->convert("дп;пп"); 211 } 212 } 213 return $s; 214} 215 216sub callback_cformat { 217 my $s = shift; 218 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 219 220 if ($nl eq 'ko_KR') { 221 $s =~ s/(> )(%p)/$1%A $2/; 222 } 223 $s =~ s/\.,/\./; 224 $s =~ s/ %Z//; 225 $s =~ s/ %z//; 226 $s =~ s/^"%e\./%A %e/; 227 $s =~ s/^"(%B %e, )/"%A, $1/; 228 $s =~ s/^"(%e %B )/"%A $1/; 229 return $s; 230}; 231 232sub callback_dformat { 233 my $s = shift; 234 235 $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/; 236 $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/; 237 return $s; 238}; 239 240sub callback_dtformat { 241 my $s = shift; 242 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 243 244 if ($nl eq 'ja_JP') { 245 $s =~ s/(> )(%H)/$1%A $2/; 246 } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { 247 if ($nl ne 'ko_KR') { 248 $s =~ s/%m/%_m/; 249 } 250 $s =~ s/(> )(%p)/$1%A $2/; 251 } 252 $s =~ s/\.,/\./; 253 $s =~ s/^"%e\./%A %e/; 254 $s =~ s/^"(%B %e, )/"%A, $1/; 255 $s =~ s/^"(%e %B )/"%A $1/; 256 return $s; 257}; 258 259sub callback_mdorder { 260 my $s = shift; 261 return undef if (!defined $s); 262 $s =~ s/[^dem]//g; 263 $s =~ s/e/d/g; 264 return $s; 265}; 266 267sub callback_altmon { 268 # if the language/country is known in %alternative months then 269 # return that, otherwise repeat mon 270 my $s = shift; 271 272 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 273 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 274 my @cleaned; 275 foreach (@altnames) 276 { 277 $_ =~ s/^\s+//; 278 $_ =~ s/\s+$//; 279 push @cleaned, $_; 280 } 281 return join(";",@cleaned); 282 } 283 284 return $s; 285} 286 287sub callback_abmon { 288 # for specified CJK locales, pad result with a space to enable 289 # columns to line up (style established in FreeBSD in 2001) 290 my $s = shift; 291 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 292 293 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 294 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 295 my @monthnames = split(";", $s); 296 my @cleaned; 297 foreach (@monthnames) 298 { 299 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 300 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 301 { 302 $_ =~ s/^"/"<space>/; 303 } 304 push @cleaned, $_; 305 } 306 return join(";",@cleaned); 307 } 308 return $s; 309} 310 311############################ 312 313sub get_unidata { 314 my $directory = shift; 315 316 open(FIN, "$directory/UnicodeData.txt") 317 or die("Cannot open $directory/UnicodeData.txt");; 318 my @lines = <FIN>; 319 chomp(@lines); 320 close(FIN); 321 322 foreach my $l (@lines) { 323 my @a = split(/;/, $l); 324 325 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 326 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 327 } 328} 329 330sub get_utf8map { 331 my $file = shift; 332 333 open(FIN, $file); 334 my @lines = <FIN>; 335 close(FIN); 336 chomp(@lines); 337 338 my $prev_k = undef; 339 my $prev_v = ""; 340 my $incharmap = 0; 341 foreach my $l (@lines) { 342 $l =~ s/\r//; 343 next if ($l =~ /^\#/); 344 next if ($l eq ""); 345 346 if ($l eq "CHARMAP") { 347 $incharmap = 1; 348 next; 349 } 350 351 next if (!$incharmap); 352 last if ($l eq "END CHARMAP"); 353 354 $l =~ /^<([^\s]+)>\s+(.*)/; 355 my $k = $1; 356 my $v = $2; 357 $k =~ s/_/ /g; # unicode char string 358 $v =~ s/\\x//g; # UTF-8 char code 359 $utf8map{$k} = $v; 360 361 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 362 363 $prev_v = $v; 364 $prev_k = $k; 365 } 366} 367 368sub get_encodings { 369 my $dir = shift; 370 foreach my $e (sort(keys(%encodings))) { 371 if (!open(FIN, "$dir/$e.TXT")) { 372 print "Cannot open charmap for $e\n"; 373 next; 374 375 } 376 $encodings{$e} = 1; 377 my @lines = <FIN>; 378 close(FIN); 379 chomp(@lines); 380 foreach my $l (@lines) { 381 $l =~ s/\r//; 382 next if ($l =~ /^\#/); 383 next if ($l eq ""); 384 385 my @a = split(" ", $l); 386 next if ($#a < 1); 387 $a[0] =~ s/^0[xX]//; # local char code 388 $a[1] =~ s/^0[xX]//; # unicode char code 389 $convertors{$e}{uc($a[1])} = uc($a[0]); 390 } 391 } 392} 393 394sub get_languages { 395 my %data = get_xmldata($ETCDIR); 396 %languages = %{$data{L}}; 397 %translations = %{$data{T}}; 398 %alternativemonths = %{$data{AM}}; 399 %encodings = %{$data{E}}; 400 401 return if (!defined $doonly); 402 403 my @a = split(/_/, $doonly); 404 if ($#a == 1) { 405 $filter[0] = $a[0]; 406 $filter[1] = "x"; 407 $filter[2] = $a[1]; 408 } elsif ($#a == 2) { 409 $filter[0] = $a[0]; 410 $filter[1] = $a[1]; 411 $filter[2] = $a[2]; 412 } 413 414 print Dumper(@filter); 415 return; 416} 417 418sub transform_ctypes { 419 foreach my $l (sort keys(%languages)) { 420 foreach my $f (sort keys(%{$languages{$l}})) { 421 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 422 next if ($#filter == 2 && ($filter[0] ne $l 423 || $filter[1] ne $f || $filter[2] ne $c)); 424 next if (defined $languages{$l}{$f}{definitions} 425 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 426 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 427 my $file; 428 $file = $l . "_"; 429 $file .= $f . "_" if ($f ne "x"); 430 $file .= $c; 431 my $actfile = $file; 432 433 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; 434 if (! -f $filename) { 435 print STDERR "Cannot open $filename\n"; 436 next; 437 } 438 open(FIN, "$filename"); 439 print "Reading from $filename for ${l}_${f}_${c}\n"; 440 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 441 my @lines; 442 my $shex; 443 my $uhex; 444 while (<FIN>) { 445 push @lines, $_; 446 } 447 close(FIN); 448 $shex = sha1_hex(join("\n", @lines)); 449 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 450 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 451 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 452 print FOUT @lines; 453 close(FOUT); 454 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 455 next if ($enc eq $DEFENCODING); 456 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 457 if (! -f $filename) { 458 print STDERR "Cannot open $filename\n"; 459 next; 460 } 461 @lines = (); 462 open(FIN, "$filename"); 463 while (<FIN>) { 464 if ((/^comment_char\s/) || (/^escape_char\s/)){ 465 push @lines, $_; 466 } 467 if (/^LC_CTYPE/../^END LC_CTYPE/) { 468 push @lines, $_; 469 } 470 } 471 close(FIN); 472 $uhex = sha1_hex(join("\n", @lines) . $enc); 473 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 474 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 475 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 476 print FOUT <<EOF; 477# Warning: Do not edit. This file is automatically extracted from the 478# tools in /usr/src/tools/tools/locale. The data is obtained from the 479# CLDR project, obtained from http://cldr.unicode.org/ 480# ----------------------------------------------------------------------------- 481EOF 482 print FOUT @lines; 483 close(FOUT); 484 } 485 } 486 } 487 } 488} 489 490 491sub transform_collation { 492 foreach my $l (sort keys(%languages)) { 493 foreach my $f (sort keys(%{$languages{$l}})) { 494 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 495 next if ($#filter == 2 && ($filter[0] ne $l 496 || $filter[1] ne $f || $filter[2] ne $c)); 497 next if (defined $languages{$l}{$f}{definitions} 498 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 499 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 500 my $file; 501 $file = $l . "_"; 502 $file .= $f . "_" if ($f ne "x"); 503 $file .= $c; 504 my $actfile = $file; 505 506 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 507 $filename = "$ETCDIR/$file.$DEFENCODING.src" 508 if (! -f $filename); 509 if (! -f $filename 510 && defined $languages{$l}{$f}{fallback}) { 511 $file = $languages{$l}{$f}{fallback}; 512 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 513 } 514 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 515 if (! -f $filename); 516 if (! -f $filename) { 517 print STDERR 518 "Cannot open $file.$DEFENCODING.src or fallback\n"; 519 next; 520 } 521 open(FIN, "$filename"); 522 print "Reading from $filename for ${l}_${f}_${c}\n"; 523 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 524 my @lines; 525 my $shex; 526 while (<FIN>) { 527 if ((/^comment_char\s/) || (/^escape_char\s/)){ 528 push @lines, $_; 529 } 530 if (/^LC_COLLATE/../^END LC_COLLATE/) { 531 $_ =~ s/[ ]+/ /g; 532 push @lines, $_; 533 } 534 } 535 close(FIN); 536 $shex = sha1_hex(join("\n", @lines)); 537 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 538 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 539 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 540 print FOUT <<EOF; 541# Warning: Do not edit. This file is automatically extracted from the 542# tools in /usr/src/tools/tools/locale. The data is obtained from the 543# CLDR project, obtained from http://cldr.unicode.org/ 544# ----------------------------------------------------------------------------- 545EOF 546 print FOUT @lines; 547 close(FOUT); 548 549 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 550 next if ($enc eq $DEFENCODING); 551 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 552 "$TYPE.draft/$actfile.$enc.src"); 553 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 554 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 555 } 556 } 557 } 558 } 559} 560 561sub get_fields { 562 foreach my $l (sort keys(%languages)) { 563 foreach my $f (sort keys(%{$languages{$l}})) { 564 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 565 next if ($#filter == 2 && ($filter[0] ne $l 566 || $filter[1] ne $f || $filter[2] ne $c)); 567 next if (defined $languages{$l}{$f}{definitions} 568 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 569 570 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 571 my $file; 572 $file = $l . "_"; 573 $file .= $f . "_" if ($f ne "x"); 574 $file .= $c; 575 576 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 577 $filename = "$ETCDIR/$file.$DEFENCODING.src" 578 if (! -f $filename); 579 if (! -f $filename 580 && defined $languages{$l}{$f}{fallback}) { 581 $file = $languages{$l}{$f}{fallback}; 582 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 583 } 584 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 585 if (! -f $filename); 586 if (! -f $filename) { 587 print STDERR 588 "Cannot open $file.$DEFENCODING.src or fallback\n"; 589 next; 590 } 591 open(FIN, "$filename"); 592 print "Reading from $filename for ${l}_${f}_${c}\n"; 593 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 594 my @lines = <FIN>; 595 chomp(@lines); 596 close(FIN); 597 my $continue = 0; 598 foreach my $k (keys(%keys)) { 599 foreach my $line (@lines) { 600 $line =~ s/\r//; 601 next if (!$continue && $line !~ /^$k\s/); 602 if ($continue) { 603 $line =~ s/^\s+//; 604 } else { 605 $line =~ s/^$k\s+//; 606 } 607 608 $values{$l}{$f}{$c}{$k} = "" 609 if (!defined $values{$l}{$f}{$c}{$k}); 610 611 $continue = ($line =~ /\/$/); 612 $line =~ s/\/$// if ($continue); 613 614 while ($line =~ /_/) { 615 $line =~ 616 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 617 } 618 die "_ in data - $line" if ($line =~ /_/); 619 $values{$l}{$f}{$c}{$k} .= $line; 620 621 last if (!$continue); 622 } 623 } 624 } 625 } 626 } 627} 628 629sub decodecldr { 630 my $e = shift; 631 my $s = shift; 632 633 my $v = undef; 634 635 if ($e eq "UTF-8") { 636 # 637 # Conversion to UTF-8 can be done from the Unicode name to 638 # the UTF-8 character code. 639 # 640 $v = $utf8map{$s}; 641 die "Cannot convert $s in $e (charmap)" if (!defined $v); 642 } else { 643 # 644 # Conversion to these encodings can be done from the Unicode 645 # name to Unicode code to the encodings code. 646 # 647 my $ucc = undef; 648 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 649 $ucc = $ucd{name2code}{$utf8aliases{$s}} 650 if (!defined $ucc 651 && $utf8aliases{$s} 652 && defined $ucd{name2code}{$utf8aliases{$s}}); 653 654 if (!defined $ucc) { 655 if (defined $translations{$e}{$s}{hex}) { 656 $v = $translations{$e}{$s}{hex}; 657 $ucc = 0; 658 } elsif (defined $translations{$e}{$s}{ucc}) { 659 $ucc = $translations{$e}{$s}{ucc}; 660 } 661 } 662 663 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 664 $v = $convertors{$e}{$ucc} if (!defined $v); 665 666 $v = $translations{$e}{$s}{hex} 667 if (!defined $v && defined $translations{$e}{$s}{hex}); 668 669 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 670 my $ucn = $translations{$e}{$s}{unicode}; 671 $ucc = $ucd{name2code}{$ucn} 672 if (defined $ucd{name2code}{$ucn}); 673 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 674 if (!defined $ucc 675 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 676 $v = $convertors{$e}{$ucc}; 677 } 678 679 die "Cannot convert $s in $e (charmap)" if (!defined $v); 680 } 681 682 return pack("C", hex($v)) if (length($v) == 2); 683 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 684 if (length($v) == 4); 685 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 686 hex(substr($v, 4, 2))) if (length($v) == 6); 687 print STDERR "Cannot convert $e $s\n"; 688 return "length = " . length($v); 689 690} 691 692sub translate { 693 my $enc = shift; 694 my $v = shift; 695 696 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 697 return undef; 698} 699 700sub print_fields { 701 foreach my $l (sort keys(%languages)) { 702 foreach my $f (sort keys(%{$languages{$l}})) { 703 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 704 next if ($#filter == 2 && ($filter[0] ne $l 705 || $filter[1] ne $f || $filter[2] ne $c)); 706 next if (defined $languages{$l}{$f}{definitions} 707 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 708 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 709 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 710 print "Skipping ${l}_" . 711 ($f eq "x" ? "" : "${f}_") . 712 "${c} - not read\n"; 713 next; 714 } 715 my $file = $l; 716 $file .= "_" . $f if ($f ne "x"); 717 $file .= "_" . $c; 718 print "Writing to $file in $enc\n"; 719 720 if ($enc ne $DEFENCODING && 721 !defined $convertors{$enc}) { 722 print "Failed! Cannot convert to $enc.\n"; 723 next; 724 }; 725 726 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 727 my $okay = 1; 728 my $output = ""; 729 print FOUT <<EOF; 730# Warning: Do not edit. This file is automatically generated from the 731# tools in /usr/src/tools/tools/locale. The data is obtained from the 732# CLDR project, obtained from http://cldr.unicode.org/ 733# ----------------------------------------------------------------------------- 734EOF 735 foreach my $k (keys(%keys)) { 736 my $g = $keys{$k}; 737 738 die("Unknown $k in \%DESC") 739 if (!defined $DESC{$k}); 740 741 $output .= "#\n# $DESC{$k}\n"; 742 743 # Replace one row with another 744 if ($g =~ /^>/) { 745 $k = substr($g, 1); 746 $g = $keys{$k}; 747 } 748 749 # Callback function 750 if ($g =~ /^\</) { 751 $callback{data}{c} = $c; 752 $callback{data}{k} = $k; 753 $callback{data}{f} = $f; 754 $callback{data}{l} = $l; 755 $callback{data}{e} = $enc; 756 my @a = split(/\</, substr($g, 1)); 757 my $rv = 758 &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]}); 759 $values{$l}{$f}{$c}{$k} = $rv; 760 $g = $a[2]; 761 $callback{data} = (); 762 } 763 764 my $v = $values{$l}{$f}{$c}{$k}; 765 $v = "undef" if (!defined $v); 766 767 if ($g eq "i") { 768 $output .= "$v\n"; 769 next; 770 } 771 if ($g eq "ai") { 772 $output .= "$v\n"; 773 next; 774 } 775 if ($g eq "s") { 776 $v =~ s/^"//; 777 $v =~ s/"$//; 778 my $cm = ""; 779 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 780 my $p1 = $1; 781 $cm = $2; 782 my $p3 = $3; 783 784 my $rv = decodecldr($enc, $cm); 785# $rv = translate($enc, $cm) 786# if (!defined $rv); 787 if (!defined $rv) { 788 print STDERR 789"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 790 $okay = 0; 791 next; 792 } 793 794 $v = $p1 . $rv . $p3; 795 } 796 $output .= "$v\n"; 797 next; 798 } 799 if ($g eq "as") { 800 foreach my $v (split(/;/, $v)) { 801 $v =~ s/^"//; 802 $v =~ s/"$//; 803 my $cm = ""; 804 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 805 my $p1 = $1; 806 $cm = $2; 807 my $p3 = $3; 808 809 my $rv = 810 decodecldr($enc, 811 $cm); 812# $rv = translate($enc, 813# $cm) 814# if (!defined $rv); 815 if (!defined $rv) { 816 print STDERR 817"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 818 $okay = 0; 819 next; 820 } 821 822 $v = $1 . $rv . $3; 823 } 824 $output .= "$v\n"; 825 } 826 next; 827 } 828 829 die("$k is '$g'"); 830 831 } 832 833 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 834 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 835 print FOUT "$output# EOF\n"; 836 close(FOUT); 837 838 if ($okay) { 839 rename("$TYPE.draft/$file.$enc.new", 840 "$TYPE.draft/$file.$enc.src"); 841 } else { 842 rename("$TYPE.draft/$file.$enc.new", 843 "$TYPE.draft/$file.$enc.failed"); 844 } 845 } 846 } 847 } 848 } 849} 850 851sub make_makefile { 852 return if ($#filter > -1); 853 print "Creating Makefile for $TYPE\n"; 854 my $SRCOUT; 855 my $SRCOUT2; 856 my $SRCOUT3 = ""; 857 my $SRCOUT4 = ""; 858 my $MAPLOC; 859 if ($TYPE eq "colldef") { 860 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . 861 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . 862 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 863 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 864 "locale/etc/final-maps\n"; 865 $SRCOUT2 = "LC_COLLATE"; 866 $SRCOUT3 = "" . 867 ".for f t in \${LOCALES_MAPPED}\n" . 868 "FILES+=\t\$t.LC_COLLATE\n" . 869 "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . 870 "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . 871 "\tlocaledef -D -U -i \${.ALLSRC} \\\n" . 872 "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . 873 "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . 874 ".endfor\n\n"; 875 $SRCOUT4 = "## LOCALES_MAPPED\n"; 876 } 877 elsif ($TYPE eq "ctypedef") { 878 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 879 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . 880 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 881 " || true"; 882 $SRCOUT2 = "LC_CTYPE"; 883 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 884 "locale/etc/final-maps\n"; 885 $SRCOUT3 = "## SYMPAIRS\n\n" . 886 ".for s t in \${SYMPAIRS}\n" . 887 "\${t:S/src\$/LC_CTYPE/}: " . 888 "\$s\n" . 889 "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 890 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 891 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 892 " || true\n" . 893 ".endfor\n\n"; 894 } 895 else { 896 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 897 $SRCOUT2 = "out"; 898 $MAPLOC = ""; 899 } 900 open(FOUT, ">$TYPE.draft/Makefile"); 901 print FOUT <<EOF; 902# \$FreeBSD\$ 903# Warning: Do not edit. This file is automatically generated from the 904# tools in /usr/src/tools/tools/locale. 905 906LOCALEDIR= \${SHAREDIR}/locale 907FILESNAME= $FILESNAMES{$TYPE} 908.SUFFIXES: .src .${SRCOUT2} 909${MAPLOC} 910.src.${SRCOUT2}: 911 $SRCOUT 912 913## PLACEHOLDER 914 915${SRCOUT4} 916 917EOF 918 919 foreach my $hash (keys(%hashtable)) { 920 # For colldef, weight LOCALES to UTF-8 921 # Sort as upper-case and reverse to achieve it 922 # Make en_US, ru_RU, and ca_AD preferred 923 my @files; 924 if ($TYPE eq "colldef") { 925 @files = sort { 926 if ($a eq 'en_x_US.UTF-8' || 927 $a eq 'ru_x_RU.UTF-8' || 928 $a eq 'ca_x_AD.UTF-8') { return -1; } 929 elsif ($b eq 'en_x_US.UTF-8' || 930 $b eq 'ru_x_RU.UTF-8' || 931 $b eq 'ca_x_AD.UTF-8') { return 1; } 932 else { return uc($b) cmp uc($a); } 933 } keys(%{$hashtable{$hash}}); 934 } elsif ($TYPE eq "ctypedef") { 935 @files = sort { 936 if ($a eq 'en_x_US.UTF-8') { return -1; } 937 elsif ($b eq 'en_x_US.UTF-8') { return 1; } 938 if ($a =~ /^en_x_US/) { return -1; } 939 elsif ($b =~ /^en_x_US/) { return 1; } 940 941 if ($a =~ /^en_x_GB.ISO8859-15/ || 942 $a =~ /^ru_x_RU/) { return -1; } 943 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 944 $b =~ /ru_x_RU/) { return 1; } 945 else { return uc($b) cmp uc($a); } 946 947 } keys(%{$hashtable{$hash}}); 948 } else { 949 @files = sort { 950 if ($a =~ /_Comm_/ || 951 $b eq 'en_x_US.UTF-8') { return 1; } 952 elsif ($b =~ /_Comm_/ || 953 $a eq 'en_x_US.UTF-8') { return -1; } 954 else { return uc($b) cmp uc($a); } 955 } keys(%{$hashtable{$hash}}); 956 } 957 if ($#files > 0) { 958 my $link = shift(@files); 959 $link =~ s/_x_/_/; # strip family if none there 960 foreach my $file (@files) { 961 my @a = split(/_/, $file); 962 my @b = split(/\./, $a[-1]); 963 $file =~ s/_x_/_/; 964 print FOUT "SAME+=\t\t$link $file\n"; 965 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 966 } 967 } 968 } 969 970 foreach my $l (sort keys(%languages)) { 971 foreach my $f (sort keys(%{$languages{$l}})) { 972 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 973 next if ($#filter == 2 && ($filter[0] ne $l 974 || $filter[1] ne $f || $filter[2] ne $c)); 975 next if (defined $languages{$l}{$f}{definitions} 976 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 977 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 978 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 979 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 980 "${c} - not read\n"; 981 next; 982 } 983 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 984 my $file = $l . "_"; 985 $file .= $f . "_" if ($f ne "x"); 986 $file .= $c; 987 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 988 print FOUT "LOCALES+=\t$file.$e\n"; 989 } 990 991 if (defined $languages{$l}{$f}{nc_link}) { 992 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 993 my $file = $l . "_"; 994 $file .= $f . "_" if ($f ne "x"); 995 $file .= $c; 996 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 997 } 998 } 999 1000 if (defined $languages{$l}{$f}{e_link}) { 1001 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 1002 my @a = split(/:/, $el); 1003 my $file = $l . "_"; 1004 $file .= $f . "_" if ($f ne "x"); 1005 $file .= $c; 1006 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; 1007 } 1008 } 1009 1010 } 1011 } 1012 } 1013 1014 print FOUT <<EOF; 1015 1016FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 1017CLEANFILES= \${FILES} 1018 1019.for f t in \${SAME} 1020SYMLINKS+= ../\$f/\${FILESNAME} \\ 1021 \${LOCALEDIR}/\$t/\${FILESNAME} 1022.endfor 1023 1024.for f in \${LOCALES} 1025FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 1026.endfor 1027 1028${SRCOUT3}.include <bsd.prog.mk> 1029EOF 1030 1031 close(FOUT); 1032} 1033