1#!/usr/bin/env perl -wC 2 3use strict; 4use File::Copy; 5use XML::Parser; 6use Tie::IxHash; 7use Data::Dumper; 8use Getopt::Long; 9use Digest::SHA qw(sha1_hex); 10require "charmaps.pm"; 11 12 13if ($#ARGV < 2) { 14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; 15 exit(1); 16} 17 18my $DEFENCODING = "UTF-8"; 19my @filter = (); 20 21my $CLDRDIR = undef; 22my $UNIDATADIR = undef; 23my $ETCDIR = undef; 24my $TYPE = undef; 25my $doonly = undef; 26 27my $result = GetOptions ( 28 "cldr=s" => \$CLDRDIR, 29 "unidata=s" => \$UNIDATADIR, 30 "etc=s" => \$ETCDIR, 31 "type=s" => \$TYPE, 32 "lc=s" => \$doonly 33 ); 34 35my %convertors = (); 36 37my %ucd = (); 38my %values = (); 39my %hashtable = (); 40my %languages = (); 41my %translations = (); 42my %encodings = (); 43my %alternativemonths = (); 44get_languages(); 45 46my %utf8map = (); 47my %utf8aliases = (); 48get_unidata($UNIDATADIR); 49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); 50get_encodings("$ETCDIR/charmaps"); 51 52my %keys = (); 53tie(%keys, "Tie::IxHash"); 54tie(%hashtable, "Tie::IxHash"); 55 56my %FILESNAMES = ( 57 "monetdef" => "LC_MONETARY", 58 "timedef" => "LC_TIME", 59 "msgdef" => "LC_MESSAGES", 60 "numericdef" => "LC_NUMERIC", 61 "colldef" => "LC_COLLATE", 62 "ctypedef" => "LC_CTYPE" 63); 64 65my %callback = ( 66 mdorder => \&callback_mdorder, 67 altmon => \&callback_altmon, 68 cformat => \&callback_cformat, 69 cbabmon => \&callback_abmon, 70 data => undef, 71); 72 73my %DESC = ( 74 75 # numericdef 76 "decimal_point" => "decimal_point", 77 "thousands_sep" => "thousands_sep", 78 "grouping" => "grouping", 79 80 # monetdef 81 "int_curr_symbol" => "int_curr_symbol (last character always " . 82 "SPACE)", 83 "currency_symbol" => "currency_symbol", 84 "mon_decimal_point" => "mon_decimal_point", 85 "mon_thousands_sep" => "mon_thousands_sep", 86 "mon_grouping" => "mon_grouping", 87 "positive_sign" => "positive_sign", 88 "negative_sign" => "negative_sign", 89 "int_frac_digits" => "int_frac_digits", 90 "frac_digits" => "frac_digits", 91 "p_cs_precedes" => "p_cs_precedes", 92 "p_sep_by_space" => "p_sep_by_space", 93 "n_cs_precedes" => "n_cs_precedes", 94 "n_sep_by_space" => "n_sep_by_space", 95 "p_sign_posn" => "p_sign_posn", 96 "n_sign_posn" => "n_sign_posn", 97 98 # msgdef 99 "yesexpr" => "yesexpr", 100 "noexpr" => "noexpr", 101 "yesstr" => "yesstr", 102 "nostr" => "nostr", 103 104 # timedef 105 "abmon" => "Short month names", 106 "mon" => "Long month names (as in a date)", 107 "abday" => "Short weekday names", 108 "day" => "Long weekday names", 109 "t_fmt" => "X_fmt", 110 "d_fmt" => "x_fmt", 111 "c_fmt" => "c_fmt", 112 "am_pm" => "AM/PM", 113 "d_t_fmt" => "date_fmt", 114 "altmon" => "Long month names (without case ending)", 115 "md_order" => "md_order", 116 "t_fmt_ampm" => "ampm_fmt", 117); 118 119if ($TYPE eq "colldef") { 120 transform_collation(); 121 make_makefile(); 122} 123 124if ($TYPE eq "ctypedef") { 125 transform_ctypes(); 126 make_makefile(); 127} 128 129if ($TYPE eq "numericdef") { 130 %keys = ( 131 "decimal_point" => "s", 132 "thousands_sep" => "s", 133 "grouping" => "ai", 134 ); 135 get_fields(); 136 print_fields(); 137 make_makefile(); 138} 139 140if ($TYPE eq "monetdef") { 141 %keys = ( 142 "int_curr_symbol" => "s", 143 "currency_symbol" => "s", 144 "mon_decimal_point" => "s", 145 "mon_thousands_sep" => "s", 146 "mon_grouping" => "ai", 147 "positive_sign" => "s", 148 "negative_sign" => "s", 149 "int_frac_digits" => "i", 150 "frac_digits" => "i", 151 "p_cs_precedes" => "i", 152 "p_sep_by_space" => "i", 153 "n_cs_precedes" => "i", 154 "n_sep_by_space" => "i", 155 "p_sign_posn" => "i", 156 "n_sign_posn" => "i" 157 ); 158 get_fields(); 159 print_fields(); 160 make_makefile(); 161} 162 163if ($TYPE eq "msgdef") { 164 %keys = ( 165 "yesexpr" => "s", 166 "noexpr" => "s", 167 "yesstr" => "s", 168 "nostr" => "s" 169 ); 170 get_fields(); 171 print_fields(); 172 make_makefile(); 173} 174 175if ($TYPE eq "timedef") { 176 %keys = ( 177 "abmon" => "<cbabmon<abmon<as", 178 "mon" => "as", 179 "abday" => "as", 180 "day" => "as", 181 "t_fmt" => "s", 182 "d_fmt" => "s", 183 "c_fmt" => "<cformat<d_t_fmt<s", 184 "am_pm" => "as", 185 "d_fmt" => "s", 186 "d_t_fmt" => "s", 187 "altmon" => "<altmon<mon<as", 188 "md_order" => "<mdorder<d_fmt<s", 189 "t_fmt_ampm" => "s", 190 ); 191 get_fields(); 192 print_fields(); 193 make_makefile(); 194} 195 196sub callback_cformat { 197 my $s = shift; 198 $s =~ s/ %Z//; 199 $s =~ s/ %z//; 200 return $s; 201}; 202 203sub callback_mdorder { 204 my $s = shift; 205 return undef if (!defined $s); 206 $s =~ s/[^dm]//g; 207 return $s; 208}; 209 210sub callback_altmon { 211 # if the language/country is known in %alternative months then 212 # return that, otherwise repeat mon 213 my $s = shift; 214 215 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 216 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 217 my @cleaned; 218 foreach (@altnames) 219 { 220 $_ =~ s/^\s+//; 221 $_ =~ s/\s+$//; 222 push @cleaned, $_; 223 } 224 return join(";",@cleaned); 225 } 226 227 return $s; 228} 229 230sub callback_abmon { 231 # for specified CJK locales, pad result with a space to enable 232 # columns to line up (style established in FreeBSD in 2001) 233 my $s = shift; 234 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 235 236 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 237 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 238 my @monthnames = split(";", $s); 239 my @cleaned; 240 foreach (@monthnames) 241 { 242 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 243 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 244 { 245 $_ =~ s/^"/"<space>/; 246 } 247 push @cleaned, $_; 248 } 249 return join(";",@cleaned); 250 } 251 return $s; 252} 253 254############################ 255 256sub get_unidata { 257 my $directory = shift; 258 259 open(FIN, "$directory/UnicodeData.txt") 260 or die("Cannot open $directory/UnicodeData.txt");; 261 my @lines = <FIN>; 262 chomp(@lines); 263 close(FIN); 264 265 foreach my $l (@lines) { 266 my @a = split(/;/, $l); 267 268 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 269 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 270 } 271} 272 273sub get_utf8map { 274 my $file = shift; 275 276 open(FIN, $file); 277 my @lines = <FIN>; 278 close(FIN); 279 chomp(@lines); 280 281 my $prev_k = undef; 282 my $prev_v = ""; 283 my $incharmap = 0; 284 foreach my $l (@lines) { 285 $l =~ s/\r//; 286 next if ($l =~ /^\#/); 287 next if ($l eq ""); 288 289 if ($l eq "CHARMAP") { 290 $incharmap = 1; 291 next; 292 } 293 294 next if (!$incharmap); 295 last if ($l eq "END CHARMAP"); 296 297 $l =~ /^<([^\s]+)>\s+(.*)/; 298 my $k = $1; 299 my $v = $2; 300 $k =~ s/_/ /g; # unicode char string 301 $v =~ s/\\x//g; # UTF-8 char code 302 $utf8map{$k} = $v; 303 304 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 305 306 $prev_v = $v; 307 $prev_k = $k; 308 } 309} 310 311sub get_encodings { 312 my $dir = shift; 313 foreach my $e (sort(keys(%encodings))) { 314 if (!open(FIN, "$dir/$e.TXT")) { 315 print "Cannot open charmap for $e\n"; 316 next; 317 318 } 319 $encodings{$e} = 1; 320 my @lines = <FIN>; 321 close(FIN); 322 chomp(@lines); 323 foreach my $l (@lines) { 324 $l =~ s/\r//; 325 next if ($l =~ /^\#/); 326 next if ($l eq ""); 327 328 my @a = split(" ", $l); 329 next if ($#a < 1); 330 $a[0] =~ s/^0[xX]//; # local char code 331 $a[1] =~ s/^0[xX]//; # unicode char code 332 $convertors{$e}{uc($a[1])} = uc($a[0]); 333 } 334 } 335} 336 337sub get_languages { 338 my %data = get_xmldata($ETCDIR); 339 %languages = %{$data{L}}; 340 %translations = %{$data{T}}; 341 %alternativemonths = %{$data{AM}}; 342 %encodings = %{$data{E}}; 343 344 return if (!defined $doonly); 345 346 my @a = split(/_/, $doonly); 347 if ($#a == 1) { 348 $filter[0] = $a[0]; 349 $filter[1] = "x"; 350 $filter[2] = $a[1]; 351 } elsif ($#a == 2) { 352 $filter[0] = $a[0]; 353 $filter[1] = $a[1]; 354 $filter[2] = $a[2]; 355 } 356 357 print Dumper(@filter); 358 return; 359} 360 361sub transform_ctypes { 362 foreach my $l (sort keys(%languages)) { 363 foreach my $f (sort keys(%{$languages{$l}})) { 364 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 365 next if ($#filter == 2 && ($filter[0] ne $l 366 || $filter[1] ne $f || $filter[2] ne $c)); 367 next if (defined $languages{$l}{$f}{definitions} 368 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 369 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 370 my $file; 371 $file = $l . "_"; 372 $file .= $f . "_" if ($f ne "x"); 373 $file .= $c; 374 my $actfile = $file; 375 376 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; 377 if (! -f $filename) { 378 print STDERR "Cannot open $filename\n"; 379 next; 380 } 381 open(FIN, "$filename"); 382 print "Reading from $filename for ${l}_${f}_${c}\n"; 383 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 384 my @lines; 385 my $shex; 386 my $uhex; 387 while (<FIN>) { 388 push @lines, $_; 389 } 390 close(FIN); 391 $shex = sha1_hex(join("\n", @lines)); 392 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 393 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 394 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 395 print FOUT @lines; 396 close(FOUT); 397 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 398 next if ($enc eq $DEFENCODING); 399 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 400 if (! -f $filename) { 401 print STDERR "Cannot open $filename\n"; 402 next; 403 } 404 @lines = (); 405 open(FIN, "$filename"); 406 while (<FIN>) { 407 if ((/^comment_char\s/) || (/^escape_char\s/)){ 408 push @lines, $_; 409 } 410 if (/^LC_CTYPE/../^END LC_CTYPE/) { 411 push @lines, $_; 412 } 413 } 414 close(FIN); 415 $uhex = sha1_hex(join("\n", @lines) . $enc); 416 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 417 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 418 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 419 print FOUT <<EOF; 420# Warning: Do not edit. This file is automatically extracted from the 421# tools in /usr/src/tools/tools/locale. The data is obtained from the 422# CLDR project, obtained from http://cldr.unicode.org/ 423# ----------------------------------------------------------------------------- 424EOF 425 print FOUT @lines; 426 close(FOUT); 427 } 428 } 429 } 430 } 431} 432 433 434sub transform_collation { 435 foreach my $l (sort keys(%languages)) { 436 foreach my $f (sort keys(%{$languages{$l}})) { 437 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 438 next if ($#filter == 2 && ($filter[0] ne $l 439 || $filter[1] ne $f || $filter[2] ne $c)); 440 next if (defined $languages{$l}{$f}{definitions} 441 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 442 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 443 my $file; 444 $file = $l . "_"; 445 $file .= $f . "_" if ($f ne "x"); 446 $file .= $c; 447 my $actfile = $file; 448 449 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 450 $filename = "$ETCDIR/$file.$DEFENCODING.src" 451 if (! -f $filename); 452 if (! -f $filename 453 && defined $languages{$l}{$f}{fallback}) { 454 $file = $languages{$l}{$f}{fallback}; 455 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 456 } 457 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 458 if (! -f $filename); 459 if (! -f $filename) { 460 print STDERR 461 "Cannot open $file.$DEFENCODING.src or fallback\n"; 462 next; 463 } 464 open(FIN, "$filename"); 465 print "Reading from $filename for ${l}_${f}_${c}\n"; 466 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 467 my @lines; 468 my $shex; 469 while (<FIN>) { 470 if ((/^comment_char\s/) || (/^escape_char\s/)){ 471 push @lines, $_; 472 } 473 if (/^LC_COLLATE/../^END LC_COLLATE/) { 474 $_ =~ s/[ ]+/ /g; 475 push @lines, $_; 476 } 477 } 478 close(FIN); 479 $shex = sha1_hex(join("\n", @lines)); 480 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 481 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 482 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 483 print FOUT <<EOF; 484# Warning: Do not edit. This file is automatically extracted from the 485# tools in /usr/src/tools/tools/locale. The data is obtained from the 486# CLDR project, obtained from http://cldr.unicode.org/ 487# ----------------------------------------------------------------------------- 488EOF 489 print FOUT @lines; 490 close(FOUT); 491 492 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 493 next if ($enc eq $DEFENCODING); 494 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 495 "$TYPE.draft/$actfile.$enc.src"); 496 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 497 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 498 } 499 } 500 } 501 } 502} 503 504sub get_fields { 505 foreach my $l (sort keys(%languages)) { 506 foreach my $f (sort keys(%{$languages{$l}})) { 507 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 508 next if ($#filter == 2 && ($filter[0] ne $l 509 || $filter[1] ne $f || $filter[2] ne $c)); 510 next if (defined $languages{$l}{$f}{definitions} 511 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 512 513 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 514 my $file; 515 $file = $l . "_"; 516 $file .= $f . "_" if ($f ne "x"); 517 $file .= $c; 518 519 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 520 $filename = "$ETCDIR/$file.$DEFENCODING.src" 521 if (! -f $filename); 522 if (! -f $filename 523 && defined $languages{$l}{$f}{fallback}) { 524 $file = $languages{$l}{$f}{fallback}; 525 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 526 } 527 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 528 if (! -f $filename); 529 if (! -f $filename) { 530 print STDERR 531 "Cannot open $file.$DEFENCODING.src or fallback\n"; 532 next; 533 } 534 open(FIN, "$filename"); 535 print "Reading from $filename for ${l}_${f}_${c}\n"; 536 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 537 my @lines = <FIN>; 538 chomp(@lines); 539 close(FIN); 540 my $continue = 0; 541 foreach my $k (keys(%keys)) { 542 foreach my $line (@lines) { 543 $line =~ s/\r//; 544 next if (!$continue && $line !~ /^$k\s/); 545 if ($continue) { 546 $line =~ s/^\s+//; 547 } else { 548 $line =~ s/^$k\s+//; 549 } 550 551 $values{$l}{$c}{$k} = "" 552 if (!defined $values{$l}{$c}{$k}); 553 554 $continue = ($line =~ /\/$/); 555 $line =~ s/\/$// if ($continue); 556 557 while ($line =~ /_/) { 558 $line =~ 559 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 560 } 561 die "_ in data - $line" if ($line =~ /_/); 562 $values{$l}{$c}{$k} .= $line; 563 564 last if (!$continue); 565 } 566 } 567 } 568 } 569 } 570} 571 572sub decodecldr { 573 my $e = shift; 574 my $s = shift; 575 576 my $v = undef; 577 578 if ($e eq "UTF-8") { 579 # 580 # Conversion to UTF-8 can be done from the Unicode name to 581 # the UTF-8 character code. 582 # 583 $v = $utf8map{$s}; 584 die "Cannot convert $s in $e (charmap)" if (!defined $v); 585 } else { 586 # 587 # Conversion to these encodings can be done from the Unicode 588 # name to Unicode code to the encodings code. 589 # 590 my $ucc = undef; 591 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 592 $ucc = $ucd{name2code}{$utf8aliases{$s}} 593 if (!defined $ucc 594 && $utf8aliases{$s} 595 && defined $ucd{name2code}{$utf8aliases{$s}}); 596 597 if (!defined $ucc) { 598 if (defined $translations{$e}{$s}{hex}) { 599 $v = $translations{$e}{$s}{hex}; 600 $ucc = 0; 601 } elsif (defined $translations{$e}{$s}{ucc}) { 602 $ucc = $translations{$e}{$s}{ucc}; 603 } 604 } 605 606 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 607 $v = $convertors{$e}{$ucc} if (!defined $v); 608 609 $v = $translations{$e}{$s}{hex} 610 if (!defined $v && defined $translations{$e}{$s}{hex}); 611 612 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 613 my $ucn = $translations{$e}{$s}{unicode}; 614 $ucc = $ucd{name2code}{$ucn} 615 if (defined $ucd{name2code}{$ucn}); 616 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 617 if (!defined $ucc 618 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 619 $v = $convertors{$e}{$ucc}; 620 } 621 622 die "Cannot convert $s in $e (charmap)" if (!defined $v); 623 } 624 625 return pack("C", hex($v)) if (length($v) == 2); 626 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 627 if (length($v) == 4); 628 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 629 hex(substr($v, 4, 2))) if (length($v) == 6); 630 print STDERR "Cannot convert $e $s\n"; 631 return "length = " . length($v); 632 633} 634 635sub translate { 636 my $enc = shift; 637 my $v = shift; 638 639 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 640 return undef; 641} 642 643sub print_fields { 644 foreach my $l (sort keys(%languages)) { 645 foreach my $f (sort keys(%{$languages{$l}})) { 646 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 647 next if ($#filter == 2 && ($filter[0] ne $l 648 || $filter[1] ne $f || $filter[2] ne $c)); 649 next if (defined $languages{$l}{$f}{definitions} 650 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 651 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 652 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 653 print "Skipping ${l}_" . 654 ($f eq "x" ? "" : "${f}_") . 655 "${c} - not read\n"; 656 next; 657 } 658 my $file = $l; 659 $file .= "_" . $f if ($f ne "x"); 660 $file .= "_" . $c; 661 print "Writing to $file in $enc\n"; 662 663 if ($enc ne $DEFENCODING && 664 !defined $convertors{$enc}) { 665 print "Failed! Cannot convert to $enc.\n"; 666 next; 667 }; 668 669 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 670 my $okay = 1; 671 my $output = ""; 672 print FOUT <<EOF; 673# Warning: Do not edit. This file is automatically generated from the 674# tools in /usr/src/tools/tools/locale. The data is obtained from the 675# CLDR project, obtained from http://cldr.unicode.org/ 676# ----------------------------------------------------------------------------- 677EOF 678 foreach my $k (keys(%keys)) { 679 my $f = $keys{$k}; 680 681 die("Unknown $k in \%DESC") 682 if (!defined $DESC{$k}); 683 684 $output .= "#\n# $DESC{$k}\n"; 685 686 # Replace one row with another 687 if ($f =~ /^>/) { 688 $k = substr($f, 1); 689 $f = $keys{$k}; 690 } 691 692 # Callback function 693 if ($f =~ /^\</) { 694 $callback{data}{c} = $c; 695 $callback{data}{k} = $k; 696 $callback{data}{l} = $l; 697 $callback{data}{e} = $enc; 698 my @a = split(/\</, substr($f, 1)); 699 my $rv = 700 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); 701 $values{$l}{$c}{$k} = $rv; 702 $f = $a[2]; 703 $callback{data} = (); 704 } 705 706 my $v = $values{$l}{$c}{$k}; 707 $v = "undef" if (!defined $v); 708 709 if ($f eq "i") { 710 $output .= "$v\n"; 711 next; 712 } 713 if ($f eq "ai") { 714 $output .= "$v\n"; 715 next; 716 } 717 if ($f eq "s") { 718 $v =~ s/^"//; 719 $v =~ s/"$//; 720 my $cm = ""; 721 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 722 my $p1 = $1; 723 $cm = $2; 724 my $p3 = $3; 725 726 my $rv = decodecldr($enc, $cm); 727# $rv = translate($enc, $cm) 728# if (!defined $rv); 729 if (!defined $rv) { 730 print STDERR 731"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 732 $okay = 0; 733 next; 734 } 735 736 $v = $p1 . $rv . $p3; 737 } 738 $output .= "$v\n"; 739 next; 740 } 741 if ($f eq "as") { 742 foreach my $v (split(/;/, $v)) { 743 $v =~ s/^"//; 744 $v =~ s/"$//; 745 my $cm = ""; 746 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 747 my $p1 = $1; 748 $cm = $2; 749 my $p3 = $3; 750 751 my $rv = 752 decodecldr($enc, 753 $cm); 754# $rv = translate($enc, 755# $cm) 756# if (!defined $rv); 757 if (!defined $rv) { 758 print STDERR 759"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 760 $okay = 0; 761 next; 762 } 763 764 $v = $1 . $rv . $3; 765 } 766 $output .= "$v\n"; 767 } 768 next; 769 } 770 771 die("$k is '$f'"); 772 773 } 774 775 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 776 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 777 print FOUT "$output# EOF\n"; 778 close(FOUT); 779 780 if ($okay) { 781 rename("$TYPE.draft/$file.$enc.new", 782 "$TYPE.draft/$file.$enc.src"); 783 } else { 784 rename("$TYPE.draft/$file.$enc.new", 785 "$TYPE.draft/$file.$enc.failed"); 786 } 787 } 788 } 789 } 790 } 791} 792 793sub make_makefile { 794 return if ($#filter > -1); 795 print "Creating Makefile for $TYPE\n"; 796 my $SRCOUT; 797 my $SRCOUT2; 798 my $SRCOUT3; 799 my $MAPLOC; 800 if ($TYPE eq "colldef") { 801 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . 802 "\t-f \${MAPLOC}/map.UTF-8 " . 803 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 804 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 805 "locale/etc/final-maps\n"; 806 $SRCOUT2 = "LC_COLLATE"; 807 } 808 elsif ($TYPE eq "ctypedef") { 809 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 810 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " . 811 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 812 " || true"; 813 $SRCOUT2 = "LC_CTYPE"; 814 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 815 "locale/etc/final-maps\n"; 816 $SRCOUT3 = "## SYMPAIRS\n\n" . 817 ".for PAIR in \${SYMPAIRS}\n" . 818 "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " . 819 "\${PAIR:C/:.*//}\n" . 820 "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 821 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 822 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 823 " || true\n" . 824 ".endfor\n\n"; 825 } 826 else { 827 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 828 $SRCOUT2 = "out"; 829 $MAPLOC = ""; 830 } 831 open(FOUT, ">$TYPE.draft/Makefile"); 832 print FOUT <<EOF; 833# Warning: Do not edit. This file is automatically generated from the 834# tools in /usr/src/tools/tools/locale. 835 836LOCALEDIR= \${SHAREDIR}/locale 837FILESNAME= $FILESNAMES{$TYPE} 838.SUFFIXES: .src .${SRCOUT2} 839${MAPLOC} 840.src.${SRCOUT2}: 841 $SRCOUT 842 843## PLACEHOLDER 844 845EOF 846 847 foreach my $hash (keys(%hashtable)) { 848 # For colldef, weight LOCALES to UTF-8 849 # Sort as upper-case and reverse to achieve it 850 # Make en_US, ru_RU, and ca_AD preferred 851 my @files; 852 if ($TYPE eq "colldef") { 853 @files = sort { 854 if ($a eq 'en_x_US.UTF-8' || 855 $a eq 'ru_x_RU.UTF-8' || 856 $a eq 'ca_x_AD.UTF-8') { return -1; } 857 elsif ($b eq 'en_x_US.UTF-8' || 858 $b eq 'ru_x_RU.UTF-8' || 859 $b eq 'ca_x_AD.UTF-8') { return 1; } 860 else { return uc($b) cmp uc($a); } 861 } keys(%{$hashtable{$hash}}); 862 } elsif ($TYPE eq "ctypedef") { 863 @files = sort { 864 if ($a eq 'en_x_US.UTF-8') { return -1; } 865 elsif ($b eq 'en_x_US.UTF-8') { return 1; } 866 if ($a =~ /^en_x_US/) { return -1; } 867 elsif ($b =~ /^en_x_US/) { return 1; } 868 869 if ($a =~ /^en_x_GB.ISO8859-15/ || 870 $a =~ /^ru_x_RU/) { return -1; } 871 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 872 $b =~ /ru_x_RU/) { return 1; } 873 else { return uc($b) cmp uc($a); } 874 875 } keys(%{$hashtable{$hash}}); 876 } else { 877 @files = sort { 878 if ($a =~ /_Comm_/ || 879 $b eq 'en_x_US.UTF-8') { return 1; } 880 elsif ($b =~ /_Comm_/ || 881 $a eq 'en_x_US.UTF-8') { return -1; } 882 else { return uc($b) cmp uc($a); } 883 } keys(%{$hashtable{$hash}}); 884 } 885 if ($#files > 0) { 886 my $link = shift(@files); 887 $link =~ s/_x_/_/; # strip family if none there 888 foreach my $file (@files) { 889 my @a = split(/_/, $file); 890 my @b = split(/\./, $a[-1]); 891 $file =~ s/_x_/_/; 892 print FOUT "SAME+=\t\t$link:$file\n"; 893 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 894 } 895 } 896 } 897 898 foreach my $l (sort keys(%languages)) { 899 foreach my $f (sort keys(%{$languages{$l}})) { 900 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 901 next if ($#filter == 2 && ($filter[0] ne $l 902 || $filter[1] ne $f || $filter[2] ne $c)); 903 next if (defined $languages{$l}{$f}{definitions} 904 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 905 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 906 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 907 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 908 "${c} - not read\n"; 909 next; 910 } 911 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 912 my $file = $l . "_"; 913 $file .= $f . "_" if ($f ne "x"); 914 $file .= $c; 915 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 916 print FOUT "LOCALES+=\t$file.$e\n"; 917 } 918 919 if (defined $languages{$l}{$f}{nc_link}) { 920 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 921 my $file = $l . "_"; 922 $file .= $f . "_" if ($f ne "x"); 923 $file .= $c; 924 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 925 } 926 } 927 928 if (defined $languages{$l}{$f}{e_link}) { 929 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 930 my @a = split(/:/, $el); 931 my $file = $l . "_"; 932 $file .= $f . "_" if ($f ne "x"); 933 $file .= $c; 934 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n"; 935 } 936 } 937 938 } 939 } 940 } 941 942 print FOUT <<EOF; 943 944FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 945CLEANFILES= \${FILES} 946 947.for f in \${SAME} 948SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \\ 949 \${LOCALEDIR}/\${f:C/^.*://}/\${FILESNAME} 950.endfor 951 952.for f in \${LOCALES} 953FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 954.endfor 955 956${SRCOUT3}.include <bsd.prog.mk> 957EOF 958 959 close(FOUT); 960} 961