1#!/usr/local/bin/perl -wC 2 3use strict; 4use File::Copy; 5use XML::Parser; 6use Tie::IxHash; 7use Data::Dumper; 8use Getopt::Long; 9use Digest::SHA qw(sha1_hex); 10require "charmaps.pm"; 11 12 13if ($#ARGV < 2) { 14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; 15 exit(1); 16} 17 18my $DEFENCODING = "UTF-8"; 19my @filter = (); 20 21my $CLDRDIR = undef; 22my $UNIDATADIR = undef; 23my $ETCDIR = undef; 24my $TYPE = undef; 25my $doonly = undef; 26 27my $result = GetOptions ( 28 "cldr=s" => \$CLDRDIR, 29 "unidata=s" => \$UNIDATADIR, 30 "etc=s" => \$ETCDIR, 31 "type=s" => \$TYPE, 32 "lc=s" => \$doonly 33 ); 34 35my %convertors = (); 36 37my %ucd = (); 38my %values = (); 39my %hashtable = (); 40my %languages = (); 41my %translations = (); 42my %encodings = (); 43my %alternativemonths = (); 44get_languages(); 45 46my %utf8map = (); 47my %utf8aliases = (); 48get_unidata($UNIDATADIR); 49get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); 50get_encodings("$ETCDIR/charmaps"); 51 52my %keys = (); 53tie(%keys, "Tie::IxHash"); 54tie(%hashtable, "Tie::IxHash"); 55 56my %FILESNAMES = ( 57 "monetdef" => "LC_MONETARY", 58 "timedef" => "LC_TIME", 59 "msgdef" => "LC_MESSAGES", 60 "numericdef" => "LC_NUMERIC", 61 "colldef" => "LC_COLLATE", 62 "ctypedef" => "LC_CTYPE" 63); 64 65my %callback = ( 66 mdorder => \&callback_mdorder, 67 altmon => \&callback_altmon, 68 cformat => \&callback_cformat, 69 data => undef, 70); 71 72my %DESC = ( 73 74 # numericdef 75 "decimal_point" => "decimal_point", 76 "thousands_sep" => "thousands_sep", 77 "grouping" => "grouping", 78 79 # monetdef 80 "int_curr_symbol" => "int_curr_symbol (last character always " . 81 "SPACE)", 82 "currency_symbol" => "currency_symbol", 83 "mon_decimal_point" => "mon_decimal_point", 84 "mon_thousands_sep" => "mon_thousands_sep", 85 "mon_grouping" => "mon_grouping", 86 "positive_sign" => "positive_sign", 87 "negative_sign" => "negative_sign", 88 "int_frac_digits" => "int_frac_digits", 89 "frac_digits" => "frac_digits", 90 "p_cs_precedes" => "p_cs_precedes", 91 "p_sep_by_space" => "p_sep_by_space", 92 "n_cs_precedes" => "n_cs_precedes", 93 "n_sep_by_space" => "n_sep_by_space", 94 "p_sign_posn" => "p_sign_posn", 95 "n_sign_posn" => "n_sign_posn", 96 97 # msgdef 98 "yesexpr" => "yesexpr", 99 "noexpr" => "noexpr", 100 "yesstr" => "yesstr", 101 "nostr" => "nostr", 102 103 # timedef 104 "abmon" => "Short month names", 105 "mon" => "Long month names (as in a date)", 106 "abday" => "Short weekday names", 107 "day" => "Long weekday names", 108 "t_fmt" => "X_fmt", 109 "d_fmt" => "x_fmt", 110 "c_fmt" => "c_fmt", 111 "am_pm" => "AM/PM", 112 "d_t_fmt" => "date_fmt", 113 "altmon" => "Long month names (without case ending)", 114 "md_order" => "md_order", 115 "t_fmt_ampm" => "ampm_fmt", 116); 117 118if ($TYPE eq "colldef") { 119 transform_collation(); 120 make_makefile(); 121} 122 123if ($TYPE eq "ctypedef") { 124 transform_ctypes(); 125 make_makefile(); 126} 127 128if ($TYPE eq "numericdef") { 129 %keys = ( 130 "decimal_point" => "s", 131 "thousands_sep" => "s", 132 "grouping" => "ai", 133 ); 134 get_fields(); 135 print_fields(); 136 make_makefile(); 137} 138 139if ($TYPE eq "monetdef") { 140 %keys = ( 141 "int_curr_symbol" => "s", 142 "currency_symbol" => "s", 143 "mon_decimal_point" => "s", 144 "mon_thousands_sep" => "s", 145 "mon_grouping" => "ai", 146 "positive_sign" => "s", 147 "negative_sign" => "s", 148 "int_frac_digits" => "i", 149 "frac_digits" => "i", 150 "p_cs_precedes" => "i", 151 "p_sep_by_space" => "i", 152 "n_cs_precedes" => "i", 153 "n_sep_by_space" => "i", 154 "p_sign_posn" => "i", 155 "n_sign_posn" => "i" 156 ); 157 get_fields(); 158 print_fields(); 159 make_makefile(); 160} 161 162if ($TYPE eq "msgdef") { 163 %keys = ( 164 "yesexpr" => "s", 165 "noexpr" => "s", 166 "yesstr" => "s", 167 "nostr" => "s" 168 ); 169 get_fields(); 170 print_fields(); 171 make_makefile(); 172} 173 174if ($TYPE eq "timedef") { 175 %keys = ( 176 "abmon" => "as", 177 "mon" => "as", 178 "abday" => "as", 179 "day" => "as", 180 "t_fmt" => "s", 181 "d_fmt" => "s", 182 "c_fmt" => "<cformat<d_t_fmt<s", 183 "am_pm" => "as", 184 "d_fmt" => "s", 185 "d_t_fmt" => "s", 186 "altmon" => "<altmon<mon<as", 187 "md_order" => "<mdorder<d_fmt<s", 188 "t_fmt_ampm" => "s", 189 ); 190 get_fields(); 191 print_fields(); 192 make_makefile(); 193} 194 195sub callback_cformat { 196 my $s = shift; 197 $s =~ s/ %Z//; 198 $s =~ s/ %z//; 199 return $s; 200}; 201 202sub callback_mdorder { 203 my $s = shift; 204 return undef if (!defined $s); 205 $s =~ s/[^dm]//g; 206 return $s; 207}; 208 209sub callback_altmon { 210 # if the language/country is known in %alternative months then 211 # return that, otherwise repeat mon 212 my $s = shift; 213 214 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 215 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 216 my @cleaned; 217 foreach (@altnames) 218 { 219 $_ =~ s/^\s+//; 220 $_ =~ s/\s+$//; 221 push @cleaned, $_; 222 } 223 return join(";",@cleaned); 224 } 225 226 return $s; 227} 228 229############################ 230 231sub get_unidata { 232 my $directory = shift; 233 234 open(FIN, "$directory/UnicodeData.txt") 235 or die("Cannot open $directory/UnicodeData.txt");; 236 my @lines = <FIN>; 237 chomp(@lines); 238 close(FIN); 239 240 foreach my $l (@lines) { 241 my @a = split(/;/, $l); 242 243 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 244 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 245 } 246} 247 248sub get_utf8map { 249 my $file = shift; 250 251 open(FIN, $file); 252 my @lines = <FIN>; 253 close(FIN); 254 chomp(@lines); 255 256 my $prev_k = undef; 257 my $prev_v = ""; 258 my $incharmap = 0; 259 foreach my $l (@lines) { 260 $l =~ s/\r//; 261 next if ($l =~ /^\#/); 262 next if ($l eq ""); 263 264 if ($l eq "CHARMAP") { 265 $incharmap = 1; 266 next; 267 } 268 269 next if (!$incharmap); 270 last if ($l eq "END CHARMAP"); 271 272 $l =~ /^<([^\s]+)>\s+(.*)/; 273 my $k = $1; 274 my $v = $2; 275 $k =~ s/_/ /g; # unicode char string 276 $v =~ s/\\x//g; # UTF-8 char code 277 $utf8map{$k} = $v; 278 279 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 280 281 $prev_v = $v; 282 $prev_k = $k; 283 } 284} 285 286sub get_encodings { 287 my $dir = shift; 288 foreach my $e (sort(keys(%encodings))) { 289 if (!open(FIN, "$dir/$e.TXT")) { 290 print "Cannot open charmap for $e\n"; 291 next; 292 293 } 294 $encodings{$e} = 1; 295 my @lines = <FIN>; 296 close(FIN); 297 chomp(@lines); 298 foreach my $l (@lines) { 299 $l =~ s/\r//; 300 next if ($l =~ /^\#/); 301 next if ($l eq ""); 302 303 my @a = split(" ", $l); 304 next if ($#a < 1); 305 $a[0] =~ s/^0[xX]//; # local char code 306 $a[1] =~ s/^0[xX]//; # unicode char code 307 $convertors{$e}{uc($a[1])} = uc($a[0]); 308 } 309 } 310} 311 312sub get_languages { 313 my %data = get_xmldata($ETCDIR); 314 %languages = %{$data{L}}; 315 %translations = %{$data{T}}; 316 %alternativemonths = %{$data{AM}}; 317 %encodings = %{$data{E}}; 318 319 return if (!defined $doonly); 320 321 my @a = split(/_/, $doonly); 322 if ($#a == 1) { 323 $filter[0] = $a[0]; 324 $filter[1] = "x"; 325 $filter[2] = $a[1]; 326 } elsif ($#a == 2) { 327 $filter[0] = $a[0]; 328 $filter[1] = $a[1]; 329 $filter[2] = $a[2]; 330 } 331 332 print Dumper(@filter); 333 return; 334} 335 336sub transform_ctypes { 337 foreach my $l (sort keys(%languages)) { 338 foreach my $f (sort keys(%{$languages{$l}})) { 339 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 340 next if ($#filter == 2 && ($filter[0] ne $l 341 || $filter[1] ne $f || $filter[2] ne $c)); 342 next if (defined $languages{$l}{$f}{definitions} 343 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 344 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 345 my $file; 346 $file = $l . "_"; 347 $file .= $f . "_" if ($f ne "x"); 348 $file .= $c; 349 my $actfile = $file; 350 if ($c eq "COMMON") { $actfile = "common"; } 351 352 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 353 $filename = "$ETCDIR/$file.$DEFENCODING.src" 354 if (! -f $filename); 355 if (! -f $filename 356 && defined $languages{$l}{$f}{fallback}) { 357 $file = $languages{$l}{$f}{fallback}; 358 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 359 } 360 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 361 if (! -f $filename); 362 if (! -f $filename) { 363 print STDERR 364 "Cannot open $file.$DEFENCODING.src or fallback\n"; 365 next; 366 } 367 open(FIN, "$filename"); 368 print "Reading from $filename for ${l}_${f}_${c}\n"; 369 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 370 my @lines; 371 my $shex; 372 my $uhex; 373 while (<FIN>) { 374 if ((/^comment_char\s/) || (/^escape_char\s/)){ 375 push @lines, $_; 376 } 377 if (/^LC_CTYPE/../^END LC_CTYPE/) { 378 push @lines, $_; 379 } 380 } 381 close(FIN); 382 $shex = sha1_hex(join("\n", @lines)); 383 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 384 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 385 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 386 print FOUT <<EOF; 387# Warning: Do not edit. This file is automatically extracted from the 388# tools in /usr/src/tools/tools/locale. The data is obtained from the 389# CLDR project, obtained from http://cldr.unicode.org/ 390# ----------------------------------------------------------------------------- 391EOF 392 print FOUT @lines; 393 close(FOUT); 394 395 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 396 next if ($enc eq $DEFENCODING); 397 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 398 "$TYPE.draft/$actfile.$enc.src"); 399 $uhex = sha1_hex(join("\n", @lines) . $enc); 400 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 401 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 402 } 403 } 404 } 405 } 406} 407 408 409sub transform_collation { 410 foreach my $l (sort keys(%languages)) { 411 foreach my $f (sort keys(%{$languages{$l}})) { 412 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 413 next if ($#filter == 2 && ($filter[0] ne $l 414 || $filter[1] ne $f || $filter[2] ne $c)); 415 next if (defined $languages{$l}{$f}{definitions} 416 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 417 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 418 my $file; 419 $file = $l . "_"; 420 $file .= $f . "_" if ($f ne "x"); 421 $file .= $c; 422 my $actfile = $file; 423 if ($c eq "COMMON") { $actfile = "common"; } 424 425 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 426 $filename = "$ETCDIR/$file.$DEFENCODING.src" 427 if (! -f $filename); 428 if (! -f $filename 429 && defined $languages{$l}{$f}{fallback}) { 430 $file = $languages{$l}{$f}{fallback}; 431 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 432 } 433 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 434 if (! -f $filename); 435 if (! -f $filename) { 436 print STDERR 437 "Cannot open $file.$DEFENCODING.src or fallback\n"; 438 next; 439 } 440 open(FIN, "$filename"); 441 print "Reading from $filename for ${l}_${f}_${c}\n"; 442 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 443 my @lines; 444 my $shex; 445 while (<FIN>) { 446 if ((/^comment_char\s/) || (/^escape_char\s/)){ 447 push @lines, $_; 448 } 449 if (/^LC_COLLATE/../^END LC_COLLATE/) { 450 $_ =~ s/[ ]+/ /g; 451 push @lines, $_; 452 } 453 } 454 close(FIN); 455 $shex = sha1_hex(join("\n", @lines)); 456 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 457 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 458 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 459 print FOUT <<EOF; 460# Warning: Do not edit. This file is automatically extracted from the 461# tools in /usr/src/tools/tools/locale. The data is obtained from the 462# CLDR project, obtained from http://cldr.unicode.org/ 463# ----------------------------------------------------------------------------- 464EOF 465 print FOUT @lines; 466 close(FOUT); 467 468 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 469 next if ($enc eq $DEFENCODING); 470 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 471 "$TYPE.draft/$actfile.$enc.src"); 472 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 473 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 474 } 475 } 476 } 477 } 478} 479 480sub get_fields { 481 foreach my $l (sort keys(%languages)) { 482 foreach my $f (sort keys(%{$languages{$l}})) { 483 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 484 next if ($#filter == 2 && ($filter[0] ne $l 485 || $filter[1] ne $f || $filter[2] ne $c)); 486 next if (defined $languages{$l}{$f}{definitions} 487 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 488 489 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 490 my $file; 491 $file = $l . "_"; 492 $file .= $f . "_" if ($f ne "x"); 493 $file .= $c; 494 495 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 496 $filename = "$ETCDIR/$file.$DEFENCODING.src" 497 if (! -f $filename); 498 if (! -f $filename 499 && defined $languages{$l}{$f}{fallback}) { 500 $file = $languages{$l}{$f}{fallback}; 501 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 502 } 503 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 504 if (! -f $filename); 505 if (! -f $filename) { 506 print STDERR 507 "Cannot open $file.$DEFENCODING.src or fallback\n"; 508 next; 509 } 510 open(FIN, "$filename"); 511 print "Reading from $filename for ${l}_${f}_${c}\n"; 512 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 513 my @lines = <FIN>; 514 chomp(@lines); 515 close(FIN); 516 my $continue = 0; 517 foreach my $k (keys(%keys)) { 518 foreach my $line (@lines) { 519 $line =~ s/\r//; 520 next if (!$continue && $line !~ /^$k\s/); 521 if ($continue) { 522 $line =~ s/^\s+//; 523 } else { 524 $line =~ s/^$k\s+//; 525 } 526 527 $values{$l}{$c}{$k} = "" 528 if (!defined $values{$l}{$c}{$k}); 529 530 $continue = ($line =~ /\/$/); 531 $line =~ s/\/$// if ($continue); 532 533 while ($line =~ /_/) { 534 $line =~ 535 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 536 } 537 die "_ in data - $line" if ($line =~ /_/); 538 $values{$l}{$c}{$k} .= $line; 539 540 last if (!$continue); 541 } 542 } 543 } 544 } 545 } 546} 547 548sub decodecldr { 549 my $e = shift; 550 my $s = shift; 551 552 my $v = undef; 553 554 if ($e eq "UTF-8") { 555 # 556 # Conversion to UTF-8 can be done from the Unicode name to 557 # the UTF-8 character code. 558 # 559 $v = $utf8map{$s}; 560 die "Cannot convert $s in $e (charmap)" if (!defined $v); 561 } else { 562 # 563 # Conversion to these encodings can be done from the Unicode 564 # name to Unicode code to the encodings code. 565 # 566 my $ucc = undef; 567 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 568 $ucc = $ucd{name2code}{$utf8aliases{$s}} 569 if (!defined $ucc 570 && $utf8aliases{$s} 571 && defined $ucd{name2code}{$utf8aliases{$s}}); 572 573 if (!defined $ucc) { 574 if (defined $translations{$e}{$s}{hex}) { 575 $v = $translations{$e}{$s}{hex}; 576 $ucc = 0; 577 } elsif (defined $translations{$e}{$s}{ucc}) { 578 $ucc = $translations{$e}{$s}{ucc}; 579 } 580 } 581 582 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 583 $v = $convertors{$e}{$ucc} if (!defined $v); 584 585 $v = $translations{$e}{$s}{hex} 586 if (!defined $v && defined $translations{$e}{$s}{hex}); 587 588 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 589 my $ucn = $translations{$e}{$s}{unicode}; 590 $ucc = $ucd{name2code}{$ucn} 591 if (defined $ucd{name2code}{$ucn}); 592 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 593 if (!defined $ucc 594 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 595 $v = $convertors{$e}{$ucc}; 596 } 597 598 die "Cannot convert $s in $e (charmap)" if (!defined $v); 599 } 600 601 return pack("C", hex($v)) if (length($v) == 2); 602 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 603 if (length($v) == 4); 604 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 605 hex(substr($v, 4, 2))) if (length($v) == 6); 606 print STDERR "Cannot convert $e $s\n"; 607 return "length = " . length($v); 608 609} 610 611sub translate { 612 my $enc = shift; 613 my $v = shift; 614 615 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 616 return undef; 617} 618 619sub print_fields { 620 foreach my $l (sort keys(%languages)) { 621 foreach my $f (sort keys(%{$languages{$l}})) { 622 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 623 next if ($#filter == 2 && ($filter[0] ne $l 624 || $filter[1] ne $f || $filter[2] ne $c)); 625 next if (defined $languages{$l}{$f}{definitions} 626 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 627 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 628 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 629 print "Skipping ${l}_" . 630 ($f eq "x" ? "" : "${f}_") . 631 "${c} - not read\n"; 632 next; 633 } 634 my $file = $l; 635 $file .= "_" . $f if ($f ne "x"); 636 $file .= "_" . $c; 637 if ($c eq "COMMON") { $file = "common"; } 638 print "Writing to $file in $enc\n"; 639 640 if ($enc ne $DEFENCODING && 641 !defined $convertors{$enc}) { 642 print "Failed! Cannot convert to $enc.\n"; 643 next; 644 }; 645 646 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 647 my $okay = 1; 648 my $output = ""; 649 print FOUT <<EOF; 650# Warning: Do not edit. This file is automatically generated from the 651# tools in /usr/src/tools/tools/locale. The data is obtained from the 652# CLDR project, obtained from http://cldr.unicode.org/ 653# ----------------------------------------------------------------------------- 654EOF 655 foreach my $k (keys(%keys)) { 656 my $f = $keys{$k}; 657 658 die("Unknown $k in \%DESC") 659 if (!defined $DESC{$k}); 660 661 $output .= "#\n# $DESC{$k}\n"; 662 663 # Replace one row with another 664 if ($f =~ /^>/) { 665 $k = substr($f, 1); 666 $f = $keys{$k}; 667 } 668 669 # Callback function 670 if ($f =~ /^\</) { 671 $callback{data}{c} = $c; 672 $callback{data}{k} = $k; 673 $callback{data}{l} = $l; 674 $callback{data}{e} = $enc; 675 my @a = split(/\</, substr($f, 1)); 676 my $rv = 677 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); 678 $values{$l}{$c}{$k} = $rv; 679 $f = $a[2]; 680 $callback{data} = (); 681 } 682 683 my $v = $values{$l}{$c}{$k}; 684 $v = "undef" if (!defined $v); 685 686 if ($f eq "i") { 687 $output .= "$v\n"; 688 next; 689 } 690 if ($f eq "ai") { 691 $output .= "$v\n"; 692 next; 693 } 694 if ($f eq "s") { 695 $v =~ s/^"//; 696 $v =~ s/"$//; 697 my $cm = ""; 698 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 699 my $p1 = $1; 700 $cm = $2; 701 my $p3 = $3; 702 703 my $rv = decodecldr($enc, $cm); 704# $rv = translate($enc, $cm) 705# if (!defined $rv); 706 if (!defined $rv) { 707 print STDERR 708"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 709 $okay = 0; 710 next; 711 } 712 713 $v = $p1 . $rv . $p3; 714 } 715 $output .= "$v\n"; 716 next; 717 } 718 if ($f eq "as") { 719 foreach my $v (split(/;/, $v)) { 720 $v =~ s/^"//; 721 $v =~ s/"$//; 722 my $cm = ""; 723 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 724 my $p1 = $1; 725 $cm = $2; 726 my $p3 = $3; 727 728 my $rv = 729 decodecldr($enc, 730 $cm); 731# $rv = translate($enc, 732# $cm) 733# if (!defined $rv); 734 if (!defined $rv) { 735 print STDERR 736"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 737 $okay = 0; 738 next; 739 } 740 741 $v = $1 . $rv . $3; 742 } 743 $output .= "$v\n"; 744 } 745 next; 746 } 747 748 die("$k is '$f'"); 749 750 } 751 752 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 753 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 754 print FOUT "$output# EOF\n"; 755 close(FOUT); 756 757 if ($okay) { 758 rename("$TYPE.draft/$file.$enc.new", 759 "$TYPE.draft/$file.$enc.src"); 760 } else { 761 rename("$TYPE.draft/$file.$enc.new", 762 "$TYPE.draft/$file.$enc.failed"); 763 } 764 } 765 } 766 } 767 } 768} 769 770sub make_makefile { 771 return if ($#filter > -1); 772 print "Creating Makefile for $TYPE\n"; 773 my $SRCOUT; 774 my $SRCOUT2; 775 my $MAPLOC; 776 if ($TYPE eq "colldef") { 777 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . 778 "\t-f \${MAPLOC}/map.UTF-8 " . 779 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 780 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 781 "locale/etc/final-maps\n"; 782 $SRCOUT2 = "LC_COLLATE"; 783 } 784 elsif ($TYPE eq "ctypedef") { 785 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 786 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " . 787 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 788 " || true"; 789 $SRCOUT2 = "LC_CTYPE"; 790 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 791 "locale/etc/final-maps\n"; 792 } 793 else { 794 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 795 $SRCOUT2 = "out"; 796 $MAPLOC = ""; 797 } 798 open(FOUT, ">$TYPE.draft/Makefile"); 799 print FOUT <<EOF; 800# Warning: Do not edit. This file is automatically generated from the 801# tools in /usr/src/tools/tools/locale. 802 803LOCALEDIR= /usr/share/locale 804FILESNAME= $FILESNAMES{$TYPE} 805.SUFFIXES: .src .${SRCOUT2} 806${MAPLOC} 807.src.${SRCOUT2}: 808 $SRCOUT 809 810## PLACEHOLDER 811 812EOF 813 814 foreach my $hash (keys(%hashtable)) { 815 # For colldef, weight LOCALES to UTF-8 816 # Sort as upper-case and reverse to achieve it 817 # Make en_US, ru_RU, and ca_AD preferred 818 my @files; 819 if ($TYPE eq "colldef") { 820 @files = sort { 821 if ($a eq 'en_x_US.UTF-8' || 822 $a eq 'ru_x_RU.UTF-8' || 823 $a eq 'ca_x_AD.UTF-8') { return -1; } 824 elsif ($b eq 'en_x_US.UTF-8' || 825 $b eq 'ru_x_RU.UTF-8' || 826 $b eq 'ca_x_AD.UTF-8') { return 1; } 827 else { return uc($b) cmp uc($a); } 828 } keys(%{$hashtable{$hash}}); 829 } elsif ($TYPE eq "ctypedef") { 830 @files = sort { 831 if ($a =~ /^en_x_US/ || 832 $a =~ /^en_x_GB.ISO8859-15/ || 833 $a =~ /^ru_x_RU/) { return -1; } 834 elsif ($b =~ /^en_x_US/ || 835 $b =~ /^en_x_GB.ISO8859-15/ || 836 $b =~ /ru_x_RU/) { return 1; } 837 else { return uc($b) cmp uc($a); } 838 839 if ($a eq 'en_x_US.UTF-8') { return -1; } 840 elsif ($b eq 'en_x_US.UTF-8') { return 1; } 841 else { return uc($b) cmp uc($a); } 842 } keys(%{$hashtable{$hash}}); 843 } else { 844 @files = sort { 845 if ($a =~ /COMMON/ || 846 $b =~ /^en_x_US.UT/) { return 1; } 847 elsif ($b =~ /COMMON/ || 848 $a =~ /^en_x_US.UT/) { return -1; } 849 else { return uc($b) cmp uc($a); } 850 } keys(%{$hashtable{$hash}}); 851 } 852 if ($#files > 0) { 853 my $link = shift(@files); 854 $link =~ s/_x_/_/; # strip family if none there 855 $link =~ s/en_COMMON/common/; 856 foreach my $file (@files) { 857 my @a = split(/_/, $file); 858 my @b = split(/\./, $a[-1]); 859 $file =~ s/_x_/_/; 860 $file =~ s/en_COMMON/common/; 861 print FOUT "SAME+=\t\t$link:$file\n"; 862 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 863 } 864 } 865 } 866 867 foreach my $l (sort keys(%languages)) { 868 foreach my $f (sort keys(%{$languages{$l}})) { 869 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 870 next if ($#filter == 2 && ($filter[0] ne $l 871 || $filter[1] ne $f || $filter[2] ne $c)); 872 next if (defined $languages{$l}{$f}{definitions} 873 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 874 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 875 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 876 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 877 "${c} - not read\n"; 878 next; 879 } 880 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 881 my $file = $l . "_"; 882 $file .= $f . "_" if ($f ne "x"); 883 $file .= $c; 884 if ($c eq "COMMON") { $file = "common"; } 885 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 886 print FOUT "LOCALES+=\t$file.$e\n"; 887 } 888 889 if (defined $languages{$l}{$f}{nc_link}) { 890 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 891 my $file = $l . "_"; 892 $file .= $f . "_" if ($f ne "x"); 893 $file .= $c; 894 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 895 } 896 } 897 898 if (defined $languages{$l}{$f}{e_link}) { 899 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 900 my @a = split(/:/, $el); 901 my $file = $l . "_"; 902 $file .= $f . "_" if ($f ne "x"); 903 $file .= $c; 904 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n"; 905 } 906 } 907 908 } 909 } 910 } 911 912 print FOUT <<EOF; 913 914FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 915CLEANFILES= \${FILES} 916 917.for f in \${SAME} 918SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://} 919.endfor 920 921.for f in \${LOCALES} 922FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 923.endfor 924 925.include <bsd.prog.mk> 926EOF 927 928 close(FOUT); 929} 930