1#!/usr/local/bin/perl -wC 2 3use strict; 4use File::Copy; 5use XML::Parser; 6use Tie::IxHash; 7use Data::Dumper; 8use Getopt::Long; 9use Digest::SHA qw(sha1_hex); 10require "charmaps.pm"; 11 12 13if ($#ARGV < 2) { 14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; 15 exit(1); 16} 17 18my $DEFENCODING = "UTF-8"; 19my @filter = (); 20 21my $CLDRDIR = undef; 22my $UNIDATADIR = undef; 23my $ETCDIR = undef; 24my $TYPE = undef; 25my $doonly = undef; 26 27my $result = GetOptions ( 28 "cldr=s" => \$CLDRDIR, 29 "unidata=s" => \$UNIDATADIR, 30 "etc=s" => \$ETCDIR, 31 "type=s" => \$TYPE, 32 "lc=s" => \$doonly 33 ); 34 35my %convertors = (); 36 37my %ucd = (); 38my %values = (); 39my %hashtable = (); 40my %languages = (); 41my %translations = (); 42my %encodings = (); 43my %alternativemonths = (); 44get_languages(); 45 46my %utf8map = (); 47my %utf8aliases = (); 48get_unidata($UNIDATADIR); 49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); 50get_encodings("$ETCDIR/charmaps"); 51 52my %keys = (); 53tie(%keys, "Tie::IxHash"); 54tie(%hashtable, "Tie::IxHash"); 55 56my %FILESNAMES = ( 57 "monetdef" => "LC_MONETARY", 58 "timedef" => "LC_TIME", 59 "msgdef" => "LC_MESSAGES", 60 "numericdef" => "LC_NUMERIC", 61 "colldef" => "LC_COLLATE", 62 "ctypedef" => "LC_CTYPE" 63); 64 65my %callback = ( 66 mdorder => \&callback_mdorder, 67 altmon => \&callback_altmon, 68 cformat => \&callback_cformat, 69 data => undef, 70); 71 72my %DESC = ( 73 74 # numericdef 75 "decimal_point" => "decimal_point", 76 "thousands_sep" => "thousands_sep", 77 "grouping" => "grouping", 78 79 # monetdef 80 "int_curr_symbol" => "int_curr_symbol (last character always " . 81 "SPACE)", 82 "currency_symbol" => "currency_symbol", 83 "mon_decimal_point" => "mon_decimal_point", 84 "mon_thousands_sep" => "mon_thousands_sep", 85 "mon_grouping" => "mon_grouping", 86 "positive_sign" => "positive_sign", 87 "negative_sign" => "negative_sign", 88 "int_frac_digits" => "int_frac_digits", 89 "frac_digits" => "frac_digits", 90 "p_cs_precedes" => "p_cs_precedes", 91 "p_sep_by_space" => "p_sep_by_space", 92 "n_cs_precedes" => "n_cs_precedes", 93 "n_sep_by_space" => "n_sep_by_space", 94 "p_sign_posn" => "p_sign_posn", 95 "n_sign_posn" => "n_sign_posn", 96 97 # msgdef 98 "yesexpr" => "yesexpr", 99 "noexpr" => "noexpr", 100 "yesstr" => "yesstr", 101 "nostr" => "nostr", 102 103 # timedef 104 "abmon" => "Short month names", 105 "mon" => "Long month names (as in a date)", 106 "abday" => "Short weekday names", 107 "day" => "Long weekday names", 108 "t_fmt" => "X_fmt", 109 "d_fmt" => "x_fmt", 110 "c_fmt" => "c_fmt", 111 "am_pm" => "AM/PM", 112 "d_t_fmt" => "date_fmt", 113 "altmon" => "Long month names (without case ending)", 114 "md_order" => "md_order", 115 "t_fmt_ampm" => "ampm_fmt", 116); 117 118if ($TYPE eq "colldef") { 119 transform_collation(); 120 make_makefile(); 121} 122 123if ($TYPE eq "ctypedef") { 124 transform_ctypes(); 125 make_makefile(); 126} 127 128if ($TYPE eq "numericdef") { 129 %keys = ( 130 "decimal_point" => "s", 131 "thousands_sep" => "s", 132 "grouping" => "ai", 133 ); 134 get_fields(); 135 print_fields(); 136 make_makefile(); 137} 138 139if ($TYPE eq "monetdef") { 140 %keys = ( 141 "int_curr_symbol" => "s", 142 "currency_symbol" => "s", 143 "mon_decimal_point" => "s", 144 "mon_thousands_sep" => "s", 145 "mon_grouping" => "ai", 146 "positive_sign" => "s", 147 "negative_sign" => "s", 148 "int_frac_digits" => "i", 149 "frac_digits" => "i", 150 "p_cs_precedes" => "i", 151 "p_sep_by_space" => "i", 152 "n_cs_precedes" => "i", 153 "n_sep_by_space" => "i", 154 "p_sign_posn" => "i", 155 "n_sign_posn" => "i" 156 ); 157 get_fields(); 158 print_fields(); 159 make_makefile(); 160} 161 162if ($TYPE eq "msgdef") { 163 %keys = ( 164 "yesexpr" => "s", 165 "noexpr" => "s", 166 "yesstr" => "s", 167 "nostr" => "s" 168 ); 169 get_fields(); 170 print_fields(); 171 make_makefile(); 172} 173 174if ($TYPE eq "timedef") { 175 %keys = ( 176 "abmon" => "as", 177 "mon" => "as", 178 "abday" => "as", 179 "day" => "as", 180 "t_fmt" => "s", 181 "d_fmt" => "s", 182 "c_fmt" => "<cformat<d_t_fmt<s", 183 "am_pm" => "as", 184 "d_fmt" => "s", 185 "d_t_fmt" => "s", 186 "altmon" => "<altmon<mon<as", 187 "md_order" => "<mdorder<d_fmt<s", 188 "t_fmt_ampm" => "s", 189 ); 190 get_fields(); 191 print_fields(); 192 make_makefile(); 193} 194 195sub callback_cformat { 196 my $s = shift; 197 $s =~ s/ %Z//; 198 $s =~ s/ %z//; 199 return $s; 200}; 201 202sub callback_mdorder { 203 my $s = shift; 204 return undef if (!defined $s); 205 $s =~ s/[^dm]//g; 206 return $s; 207}; 208 209sub callback_altmon { 210 # if the language/country is known in %alternative months then 211 # return that, otherwise repeat mon 212 my $s = shift; 213 214 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 215 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 216 my @cleaned; 217 foreach (@altnames) 218 { 219 $_ =~ s/^\s+//; 220 $_ =~ s/\s+$//; 221 push @cleaned, $_; 222 } 223 return join(";",@cleaned); 224 } 225 226 return $s; 227} 228 229############################ 230 231sub get_unidata { 232 my $directory = shift; 233 234 open(FIN, "$directory/UnicodeData.txt") 235 or die("Cannot open $directory/UnicodeData.txt");; 236 my @lines = <FIN>; 237 chomp(@lines); 238 close(FIN); 239 240 foreach my $l (@lines) { 241 my @a = split(/;/, $l); 242 243 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 244 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 245 } 246} 247 248sub get_utf8map { 249 my $file = shift; 250 251 open(FIN, $file); 252 my @lines = <FIN>; 253 close(FIN); 254 chomp(@lines); 255 256 my $prev_k = undef; 257 my $prev_v = ""; 258 my $incharmap = 0; 259 foreach my $l (@lines) { 260 $l =~ s/\r//; 261 next if ($l =~ /^\#/); 262 next if ($l eq ""); 263 264 if ($l eq "CHARMAP") { 265 $incharmap = 1; 266 next; 267 } 268 269 next if (!$incharmap); 270 last if ($l eq "END CHARMAP"); 271 272 $l =~ /^<([^\s]+)>\s+(.*)/; 273 my $k = $1; 274 my $v = $2; 275 $k =~ s/_/ /g; # unicode char string 276 $v =~ s/\\x//g; # UTF-8 char code 277 $utf8map{$k} = $v; 278 279 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 280 281 $prev_v = $v; 282 $prev_k = $k; 283 } 284} 285 286sub get_encodings { 287 my $dir = shift; 288 foreach my $e (sort(keys(%encodings))) { 289 if (!open(FIN, "$dir/$e.TXT")) { 290 print "Cannot open charmap for $e\n"; 291 next; 292 293 } 294 $encodings{$e} = 1; 295 my @lines = <FIN>; 296 close(FIN); 297 chomp(@lines); 298 foreach my $l (@lines) { 299 $l =~ s/\r//; 300 next if ($l =~ /^\#/); 301 next if ($l eq ""); 302 303 my @a = split(" ", $l); 304 next if ($#a < 1); 305 $a[0] =~ s/^0[xX]//; # local char code 306 $a[1] =~ s/^0[xX]//; # unicode char code 307 $convertors{$e}{uc($a[1])} = uc($a[0]); 308 } 309 } 310} 311 312sub get_languages { 313 my %data = get_xmldata($ETCDIR); 314 %languages = %{$data{L}}; 315 %translations = %{$data{T}}; 316 %alternativemonths = %{$data{AM}}; 317 %encodings = %{$data{E}}; 318 319 return if (!defined $doonly); 320 321 my @a = split(/_/, $doonly); 322 if ($#a == 1) { 323 $filter[0] = $a[0]; 324 $filter[1] = "x"; 325 $filter[2] = $a[1]; 326 } elsif ($#a == 2) { 327 $filter[0] = $a[0]; 328 $filter[1] = $a[1]; 329 $filter[2] = $a[2]; 330 } 331 332 print Dumper(@filter); 333 return; 334} 335 336sub transform_ctypes { 337 foreach my $l (sort keys(%languages)) { 338 foreach my $f (sort keys(%{$languages{$l}})) { 339 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 340 next if ($#filter == 2 && ($filter[0] ne $l 341 || $filter[1] ne $f || $filter[2] ne $c)); 342 next if (defined $languages{$l}{$f}{definitions} 343 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 344 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 345 my $file; 346 $file = $l . "_"; 347 $file .= $f . "_" if ($f ne "x"); 348 $file .= $c; 349 my $actfile = $file; 350 351 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; 352 if (! -f $filename) { 353 print STDERR "Cannot open $filename\n"; 354 next; 355 } 356 open(FIN, "$filename"); 357 print "Reading from $filename for ${l}_${f}_${c}\n"; 358 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 359 my @lines; 360 my $shex; 361 my $uhex; 362 while (<FIN>) { 363 push @lines, $_; 364 } 365 close(FIN); 366 $shex = sha1_hex(join("\n", @lines)); 367 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 368 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 369 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 370 print FOUT @lines; 371 close(FOUT); 372 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 373 next if ($enc eq $DEFENCODING); 374 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 375 if (! -f $filename) { 376 print STDERR "Cannot open $filename\n"; 377 next; 378 } 379 @lines = (); 380 open(FIN, "$filename"); 381 while (<FIN>) { 382 if ((/^comment_char\s/) || (/^escape_char\s/)){ 383 push @lines, $_; 384 } 385 if (/^LC_CTYPE/../^END LC_CTYPE/) { 386 push @lines, $_; 387 } 388 } 389 close(FIN); 390 $uhex = sha1_hex(join("\n", @lines) . $enc); 391 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 392 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 393 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 394 print FOUT <<EOF; 395# Warning: Do not edit. This file is automatically extracted from the 396# tools in /usr/src/tools/tools/locale. The data is obtained from the 397# CLDR project, obtained from http://cldr.unicode.org/ 398# ----------------------------------------------------------------------------- 399EOF 400 print FOUT @lines; 401 close(FOUT); 402 } 403 } 404 } 405 } 406} 407 408 409sub transform_collation { 410 foreach my $l (sort keys(%languages)) { 411 foreach my $f (sort keys(%{$languages{$l}})) { 412 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 413 next if ($#filter == 2 && ($filter[0] ne $l 414 || $filter[1] ne $f || $filter[2] ne $c)); 415 next if (defined $languages{$l}{$f}{definitions} 416 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 417 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 418 my $file; 419 $file = $l . "_"; 420 $file .= $f . "_" if ($f ne "x"); 421 $file .= $c; 422 my $actfile = $file; 423 424 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 425 $filename = "$ETCDIR/$file.$DEFENCODING.src" 426 if (! -f $filename); 427 if (! -f $filename 428 && defined $languages{$l}{$f}{fallback}) { 429 $file = $languages{$l}{$f}{fallback}; 430 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 431 } 432 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 433 if (! -f $filename); 434 if (! -f $filename) { 435 print STDERR 436 "Cannot open $file.$DEFENCODING.src or fallback\n"; 437 next; 438 } 439 open(FIN, "$filename"); 440 print "Reading from $filename for ${l}_${f}_${c}\n"; 441 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 442 my @lines; 443 my $shex; 444 while (<FIN>) { 445 if ((/^comment_char\s/) || (/^escape_char\s/)){ 446 push @lines, $_; 447 } 448 if (/^LC_COLLATE/../^END LC_COLLATE/) { 449 $_ =~ s/[ ]+/ /g; 450 push @lines, $_; 451 } 452 } 453 close(FIN); 454 $shex = sha1_hex(join("\n", @lines)); 455 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 456 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 457 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 458 print FOUT <<EOF; 459# Warning: Do not edit. This file is automatically extracted from the 460# tools in /usr/src/tools/tools/locale. The data is obtained from the 461# CLDR project, obtained from http://cldr.unicode.org/ 462# ----------------------------------------------------------------------------- 463EOF 464 print FOUT @lines; 465 close(FOUT); 466 467 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 468 next if ($enc eq $DEFENCODING); 469 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 470 "$TYPE.draft/$actfile.$enc.src"); 471 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 472 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 473 } 474 } 475 } 476 } 477} 478 479sub get_fields { 480 foreach my $l (sort keys(%languages)) { 481 foreach my $f (sort keys(%{$languages{$l}})) { 482 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 483 next if ($#filter == 2 && ($filter[0] ne $l 484 || $filter[1] ne $f || $filter[2] ne $c)); 485 next if (defined $languages{$l}{$f}{definitions} 486 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 487 488 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 489 my $file; 490 $file = $l . "_"; 491 $file .= $f . "_" if ($f ne "x"); 492 $file .= $c; 493 494 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 495 $filename = "$ETCDIR/$file.$DEFENCODING.src" 496 if (! -f $filename); 497 if (! -f $filename 498 && defined $languages{$l}{$f}{fallback}) { 499 $file = $languages{$l}{$f}{fallback}; 500 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 501 } 502 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 503 if (! -f $filename); 504 if (! -f $filename) { 505 print STDERR 506 "Cannot open $file.$DEFENCODING.src or fallback\n"; 507 next; 508 } 509 open(FIN, "$filename"); 510 print "Reading from $filename for ${l}_${f}_${c}\n"; 511 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 512 my @lines = <FIN>; 513 chomp(@lines); 514 close(FIN); 515 my $continue = 0; 516 foreach my $k (keys(%keys)) { 517 foreach my $line (@lines) { 518 $line =~ s/\r//; 519 next if (!$continue && $line !~ /^$k\s/); 520 if ($continue) { 521 $line =~ s/^\s+//; 522 } else { 523 $line =~ s/^$k\s+//; 524 } 525 526 $values{$l}{$c}{$k} = "" 527 if (!defined $values{$l}{$c}{$k}); 528 529 $continue = ($line =~ /\/$/); 530 $line =~ s/\/$// if ($continue); 531 532 while ($line =~ /_/) { 533 $line =~ 534 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 535 } 536 die "_ in data - $line" if ($line =~ /_/); 537 $values{$l}{$c}{$k} .= $line; 538 539 last if (!$continue); 540 } 541 } 542 } 543 } 544 } 545} 546 547sub decodecldr { 548 my $e = shift; 549 my $s = shift; 550 551 my $v = undef; 552 553 if ($e eq "UTF-8") { 554 # 555 # Conversion to UTF-8 can be done from the Unicode name to 556 # the UTF-8 character code. 557 # 558 $v = $utf8map{$s}; 559 die "Cannot convert $s in $e (charmap)" if (!defined $v); 560 } else { 561 # 562 # Conversion to these encodings can be done from the Unicode 563 # name to Unicode code to the encodings code. 564 # 565 my $ucc = undef; 566 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 567 $ucc = $ucd{name2code}{$utf8aliases{$s}} 568 if (!defined $ucc 569 && $utf8aliases{$s} 570 && defined $ucd{name2code}{$utf8aliases{$s}}); 571 572 if (!defined $ucc) { 573 if (defined $translations{$e}{$s}{hex}) { 574 $v = $translations{$e}{$s}{hex}; 575 $ucc = 0; 576 } elsif (defined $translations{$e}{$s}{ucc}) { 577 $ucc = $translations{$e}{$s}{ucc}; 578 } 579 } 580 581 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 582 $v = $convertors{$e}{$ucc} if (!defined $v); 583 584 $v = $translations{$e}{$s}{hex} 585 if (!defined $v && defined $translations{$e}{$s}{hex}); 586 587 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 588 my $ucn = $translations{$e}{$s}{unicode}; 589 $ucc = $ucd{name2code}{$ucn} 590 if (defined $ucd{name2code}{$ucn}); 591 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 592 if (!defined $ucc 593 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 594 $v = $convertors{$e}{$ucc}; 595 } 596 597 die "Cannot convert $s in $e (charmap)" if (!defined $v); 598 } 599 600 return pack("C", hex($v)) if (length($v) == 2); 601 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 602 if (length($v) == 4); 603 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 604 hex(substr($v, 4, 2))) if (length($v) == 6); 605 print STDERR "Cannot convert $e $s\n"; 606 return "length = " . length($v); 607 608} 609 610sub translate { 611 my $enc = shift; 612 my $v = shift; 613 614 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 615 return undef; 616} 617 618sub print_fields { 619 foreach my $l (sort keys(%languages)) { 620 foreach my $f (sort keys(%{$languages{$l}})) { 621 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 622 next if ($#filter == 2 && ($filter[0] ne $l 623 || $filter[1] ne $f || $filter[2] ne $c)); 624 next if (defined $languages{$l}{$f}{definitions} 625 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 626 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 627 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 628 print "Skipping ${l}_" . 629 ($f eq "x" ? "" : "${f}_") . 630 "${c} - not read\n"; 631 next; 632 } 633 my $file = $l; 634 $file .= "_" . $f if ($f ne "x"); 635 $file .= "_" . $c; 636 print "Writing to $file in $enc\n"; 637 638 if ($enc ne $DEFENCODING && 639 !defined $convertors{$enc}) { 640 print "Failed! Cannot convert to $enc.\n"; 641 next; 642 }; 643 644 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 645 my $okay = 1; 646 my $output = ""; 647 print FOUT <<EOF; 648# Warning: Do not edit. This file is automatically generated from the 649# tools in /usr/src/tools/tools/locale. The data is obtained from the 650# CLDR project, obtained from http://cldr.unicode.org/ 651# ----------------------------------------------------------------------------- 652EOF 653 foreach my $k (keys(%keys)) { 654 my $f = $keys{$k}; 655 656 die("Unknown $k in \%DESC") 657 if (!defined $DESC{$k}); 658 659 $output .= "#\n# $DESC{$k}\n"; 660 661 # Replace one row with another 662 if ($f =~ /^>/) { 663 $k = substr($f, 1); 664 $f = $keys{$k}; 665 } 666 667 # Callback function 668 if ($f =~ /^\</) { 669 $callback{data}{c} = $c; 670 $callback{data}{k} = $k; 671 $callback{data}{l} = $l; 672 $callback{data}{e} = $enc; 673 my @a = split(/\</, substr($f, 1)); 674 my $rv = 675 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); 676 $values{$l}{$c}{$k} = $rv; 677 $f = $a[2]; 678 $callback{data} = (); 679 } 680 681 my $v = $values{$l}{$c}{$k}; 682 $v = "undef" if (!defined $v); 683 684 if ($f eq "i") { 685 $output .= "$v\n"; 686 next; 687 } 688 if ($f eq "ai") { 689 $output .= "$v\n"; 690 next; 691 } 692 if ($f eq "s") { 693 $v =~ s/^"//; 694 $v =~ s/"$//; 695 my $cm = ""; 696 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 697 my $p1 = $1; 698 $cm = $2; 699 my $p3 = $3; 700 701 my $rv = decodecldr($enc, $cm); 702# $rv = translate($enc, $cm) 703# if (!defined $rv); 704 if (!defined $rv) { 705 print STDERR 706"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 707 $okay = 0; 708 next; 709 } 710 711 $v = $p1 . $rv . $p3; 712 } 713 $output .= "$v\n"; 714 next; 715 } 716 if ($f eq "as") { 717 foreach my $v (split(/;/, $v)) { 718 $v =~ s/^"//; 719 $v =~ s/"$//; 720 my $cm = ""; 721 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 722 my $p1 = $1; 723 $cm = $2; 724 my $p3 = $3; 725 726 my $rv = 727 decodecldr($enc, 728 $cm); 729# $rv = translate($enc, 730# $cm) 731# if (!defined $rv); 732 if (!defined $rv) { 733 print STDERR 734"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 735 $okay = 0; 736 next; 737 } 738 739 $v = $1 . $rv . $3; 740 } 741 $output .= "$v\n"; 742 } 743 next; 744 } 745 746 die("$k is '$f'"); 747 748 } 749 750 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 751 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 752 print FOUT "$output# EOF\n"; 753 close(FOUT); 754 755 if ($okay) { 756 rename("$TYPE.draft/$file.$enc.new", 757 "$TYPE.draft/$file.$enc.src"); 758 } else { 759 rename("$TYPE.draft/$file.$enc.new", 760 "$TYPE.draft/$file.$enc.failed"); 761 } 762 } 763 } 764 } 765 } 766} 767 768sub make_makefile { 769 return if ($#filter > -1); 770 print "Creating Makefile for $TYPE\n"; 771 my $SRCOUT; 772 my $SRCOUT2; 773 my $MAPLOC; 774 if ($TYPE eq "colldef") { 775 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . 776 "\t-f \${MAPLOC}/map.UTF-8 " . 777 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 778 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 779 "locale/etc/final-maps\n"; 780 $SRCOUT2 = "LC_COLLATE"; 781 } 782 elsif ($TYPE eq "ctypedef") { 783 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 784 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " . 785 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 786 " || true"; 787 $SRCOUT2 = "LC_CTYPE"; 788 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 789 "locale/etc/final-maps\n"; 790 } 791 else { 792 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 793 $SRCOUT2 = "out"; 794 $MAPLOC = ""; 795 } 796 open(FOUT, ">$TYPE.draft/Makefile"); 797 print FOUT <<EOF; 798# Warning: Do not edit. This file is automatically generated from the 799# tools in /usr/src/tools/tools/locale. 800 801LOCALEDIR= /usr/share/locale 802FILESNAME= $FILESNAMES{$TYPE} 803.SUFFIXES: .src .${SRCOUT2} 804${MAPLOC} 805.src.${SRCOUT2}: 806 $SRCOUT 807 808## PLACEHOLDER 809 810EOF 811 812 foreach my $hash (keys(%hashtable)) { 813 # For colldef, weight LOCALES to UTF-8 814 # Sort as upper-case and reverse to achieve it 815 # Make en_US, ru_RU, and ca_AD preferred 816 my @files; 817 if ($TYPE eq "colldef") { 818 @files = sort { 819 if ($a eq 'en_x_US.UTF-8' || 820 $a eq 'ru_x_RU.UTF-8' || 821 $a eq 'ca_x_AD.UTF-8') { return -1; } 822 elsif ($b eq 'en_x_US.UTF-8' || 823 $b eq 'ru_x_RU.UTF-8' || 824 $b eq 'ca_x_AD.UTF-8') { return 1; } 825 else { return uc($b) cmp uc($a); } 826 } keys(%{$hashtable{$hash}}); 827 } elsif ($TYPE eq "ctypedef") { 828 @files = sort { 829 if ($a eq 'en_x_US.UTF-8') { return -1; } 830 elsif ($b eq 'en_x_US.UTF-8') { return 1; } 831 if ($a =~ /^en_x_US/) { return -1; } 832 elsif ($b =~ /^en_x_US/) { return 1; } 833 834 if ($a =~ /^en_x_GB.ISO8859-15/ || 835 $a =~ /^ru_x_RU/) { return -1; } 836 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 837 $b =~ /ru_x_RU/) { return 1; } 838 else { return uc($b) cmp uc($a); } 839 840 } keys(%{$hashtable{$hash}}); 841 } else { 842 @files = sort { 843 if ($a =~ /_Comm_/ || 844 $b eq 'en_x_US.UTF-8') { return 1; } 845 elsif ($b =~ /_Comm_/ || 846 $a eq 'en_x_US.UTF-8') { return -1; } 847 else { return uc($b) cmp uc($a); } 848 } keys(%{$hashtable{$hash}}); 849 } 850 if ($#files > 0) { 851 my $link = shift(@files); 852 $link =~ s/_x_/_/; # strip family if none there 853 foreach my $file (@files) { 854 my @a = split(/_/, $file); 855 my @b = split(/\./, $a[-1]); 856 $file =~ s/_x_/_/; 857 print FOUT "SAME+=\t\t$link:$file\n"; 858 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 859 } 860 } 861 } 862 863 foreach my $l (sort keys(%languages)) { 864 foreach my $f (sort keys(%{$languages{$l}})) { 865 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 866 next if ($#filter == 2 && ($filter[0] ne $l 867 || $filter[1] ne $f || $filter[2] ne $c)); 868 next if (defined $languages{$l}{$f}{definitions} 869 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 870 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 871 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 872 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 873 "${c} - not read\n"; 874 next; 875 } 876 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 877 my $file = $l . "_"; 878 $file .= $f . "_" if ($f ne "x"); 879 $file .= $c; 880 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 881 print FOUT "LOCALES+=\t$file.$e\n"; 882 } 883 884 if (defined $languages{$l}{$f}{nc_link}) { 885 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 886 my $file = $l . "_"; 887 $file .= $f . "_" if ($f ne "x"); 888 $file .= $c; 889 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 890 } 891 } 892 893 if (defined $languages{$l}{$f}{e_link}) { 894 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 895 my @a = split(/:/, $el); 896 my $file = $l . "_"; 897 $file .= $f . "_" if ($f ne "x"); 898 $file .= $c; 899 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n"; 900 } 901 } 902 903 } 904 } 905 } 906 907 print FOUT <<EOF; 908 909FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 910CLEANFILES= \${FILES} 911 912.for f in \${SAME} 913SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://} 914.endfor 915 916.for f in \${LOCALES} 917FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 918.endfor 919 920.include <bsd.prog.mk> 921EOF 922 923 close(FOUT); 924} 925