1# Common tools for test files to find the locales which exist on the 2# system. Caller should have verified that this isn't miniperl before calling 3# the functions. 4 5# Note that it's okay that some languages have their native names 6# capitalized here even though that's not "right". They are lowercased 7# anyway later during the scanning process (and besides, some clueless 8# vendor might have them capitalized erroneously anyway). 9 10# Functions whose names begin with underscore are internal helper functions 11# for this file, and are not to be used by outside callers. 12 13use Config; 14use strict; 15use warnings; 16use feature 'state'; 17 18my %known_bad_locales = ( # XXX eventually will need version info if and 19 # when these get fixed. 20 solaris => [ 'vi_VN.UTF-8', ], # Use of U+A8 segfaults: GH #20578 21); 22 23eval { require POSIX; import POSIX 'locale_h'; }; 24my $has_locale_h = ! $@; 25 26my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY 27 LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION 28 LC_MEASUREMENT LC_PAPER LC_TELEPHONE LC_SYNTAX 29 LC_TOD LC_NAME)); 30my @platform_categories; 31 32my $has_excluded_category = $Config{ccflags} =~ /\bD?NO_LOCALE_/; 33sub category_excluded($) { 34 my $cat_name = shift =~ s/^LC_//r; 35 36 # Recognize Configure option to exclude a category 37 return $has_excluded_category 38 && $Config{ccflags} =~ /\bD?NO_LOCALE_$cat_name\b/; 39} 40 41# LC_ALL can be -1 on some platforms. And, in fact the implementors could 42# legally use any integer to represent any category. But it makes the most 43# sense for them to have used small integers. Below, we create new locale 44# numbers for ones missing from this machine. We make them very negative, 45# hopefully more negative than anything likely to be a valid category on the 46# platform, but also below is a check to be sure that our guess is valid. 47my $max_bad_category_number = -1000000; 48 49# Initialize this hash so that it looks like e.g., 50# 6 => 'CTYPE', 51# where 6 is the value of &POSIX::LC_CTYPE 52my %category_name; 53my %category_number; 54if ($has_locale_h) { 55 my $number_for_missing_category = $max_bad_category_number; 56 foreach my $name (@known_categories) { 57 my $number = eval "&POSIX::$name"; 58 if ($@) { 59 # Use a negative number (smaller than any legitimate category 60 # number) if the platform doesn't support this category, so we 61 # have an entry for all the ones that might be specified in calls 62 # to us. 63 $number = $number_for_missing_category--; 64 } 65 elsif ( $number !~ / ^ -? \d+ $ /x 66 || $number <= $max_bad_category_number) 67 { 68 # We think this should be an int. And it has to be larger than 69 # any of our synthetic numbers. 70 die "Unexpected locale category number '$number' for $name" 71 } 72 else { 73 push @platform_categories, $name; 74 } 75 76 $name =~ s/LC_//; 77 $category_name{$number} = "$name"; 78 $category_number{$name} = $number; 79 } 80} 81 82sub _my_diag($) { 83 my $message = shift; 84 if (defined &main::diag) { 85 diag($message); 86 } 87 else { 88 local($\, $", $,) = (undef, ' ', ''); 89 print STDERR $message, "\n"; 90 } 91} 92 93# Larger than any real test 94my $my_count = 1_000_000; 95 96sub _my_fail($) { 97 my $message = shift; 98 if (defined &main::fail) { 99 fail($message); 100 } 101 else { 102 local($\, $", $,) = (undef, ' ', ''); 103 print "not ok " . $my_count++ . $message . "\n"; 104 } 105} 106 107sub platform_locale_categories() { 108 return @platform_categories; 109} 110 111sub valid_locale_categories() { 112 # Returns a list of the locale categories (expressed as strings, like 113 # "LC_ALL") known to this program that are available on this platform. 114 115 return grep { ! category_excluded($_) } @platform_categories; 116} 117 118sub is_category_valid($) { 119 my $name = shift; 120 $name = 'LC_' . $name =~ s/^LC_//r; 121 return grep { $name eq $_ } valid_locale_categories(); 122} 123 124# It turns out that strings generated under the control of a given locale 125# category are often affected as well by LC_CTYPE. If the two categories 126# don't match, one can get mojibake or even core dumps. (khw thinks it more 127# likely that it's the code set, not the locale that's critical here; but 128# didn't run experiments to verify this.) Hence, in the code below, CTYPE and 129# the tested categories are all set to the same locale. If CTYPE isn't 130# available on the platform, LC_ALL is instead used. One might think to just 131# use LC_ALL all the time, but on Windows 132# setlocale(LC_ALL, "some_borked_locale") 133# can return success, whereas setting LC_CTYPE to it fails. 134my $master_category; 135$master_category = $category_number{'CTYPE'} 136 if is_category_valid('LC_CTYPE') && defined $category_number{'CTYPE'}; 137$master_category = $category_number{'ALL'} 138 if ! defined $master_category 139 && is_category_valid('LC_ALL') && defined $category_number{'ALL'}; 140 141my @platform_locales; # cache of locales found on this platform 142my $gathering_platform_locales = 0; # Should we gather locales, or use the 143 # cache? 144my %seen; # Used to avoid duplicates 145 146sub _trylocale ($$$$) { # For use only by other functions in this file! 147 148 # Adds the locale given by the first parameter to the list given by the 149 # 3rd iff the platform supports the locale in each of the category numbers 150 # given by the 2nd parameter, which is either a single category or a 151 # reference to a list of categories. 152 # 153 # The 4th parameter is true if to accept locales that aren't apparently 154 # fully compatible with Perl. 155 156 my $locale = shift; 157 my $categories = shift; 158 my $list = shift; 159 my $allow_incompatible = shift; 160 161 my $normalized_locale = lc ($locale =~ s/\W//gr); 162 return if ! $locale || grep { $normalized_locale eq lc ($_ =~ s/\W//gr) } @$list; 163 164 # This is a toy (pig latin) locale that is not fully implemented on some 165 # systems 166 return if $locale =~ / ^ pig $ /ix; 167 168 # Certain platforms have a crippled locale system in which setlocale 169 # returns success for just about any possible locale name, but if anything 170 # actually happens as a result of the call, it is that the underlying 171 # locale is set to a system default, likely C or C.UTF-8. We can't test 172 # such systems fully, but we shouldn't disable the user from using 173 # locales, as it may work out for them (or not). 174 return if defined $Config{d_setlocale_accepts_any_locale_name} 175 && $locale !~ / ^ (?: C | POSIX | C\.UTF-?8 ) $/ix; 176 177 if (exists $known_bad_locales{$^O}) { 178 my @bad_locales = $known_bad_locales{$^O}->@*; 179 return if grep { $locale eq $_ } @bad_locales; 180 } 181 182 183 my $badutf8 = 0; 184 my $plays_well = 1; 185 my $unsupported = 0; 186 187 use warnings 'locale'; 188 189 local $SIG{__WARN__} = sub { 190 $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; 191 $unsupported = 1 if grep { /Locale .* is unsupported/i } @_; 192 $plays_well = 0 if grep { 193 /The following characters .* may not have the same meaning as the Perl program expects(?# 194 )|The Perl program will use the expected meanings/i 195 } @_; 196 }; 197 198 my $result; 199 my @category_list; 200 if (defined $categories) { 201 $categories = [ $categories ] unless ref $categories; 202 push @category_list, $categories->@*; 203 } 204 205 # Make the master category first thing on the list; adding it if necessary 206 if (defined $master_category) { 207 @category_list = grep { $_ != $master_category } @category_list; 208 unshift @category_list, $master_category; 209 } 210 211 foreach my $category (@category_list) { 212 my $save_locale = setlocale($category); 213 if (! $save_locale) { 214 _my_fail("Verify could save previous locale"); 215 return; 216 } 217 218 # Incompatible locales aren't warned about unless using locales. 219 use locale; 220 221 my $cur_result = setlocale($category, $locale); 222 return unless defined $cur_result; 223 224 no locale; 225 226 if ( $gathering_platform_locales 227 && $category eq $master_category 228 && ! $seen{$locale}) 229 { 230 push @platform_locales, $locale; 231 $seen{$locale}++; 232 } 233 234 # We definitely don't want the locale set to something that is 235 # unsupported 236 if (! setlocale($category, $save_locale)) { 237 my $error_text = "\$!=$!"; 238 $error_text .= "; \$^E=$^E" if $^E != $!; 239 die "Couldn't restore locale '$save_locale', category $category;" 240 . $error_text; 241 } 242 if ($badutf8) { 243 _my_fail("Verify locale name doesn't contain malformed utf8"); 244 return; 245 } 246 247 return if $unsupported; 248 249 # Commas in locale names are bad in Windows, and there is a bug in 250 # some versions where setlocale() turns a legal input locale name into 251 # an illegal return value, which it can't later parse. 252 return if $cur_result =~ /,/; 253 254 return unless $plays_well || $allow_incompatible; 255 256 if (! defined $result) { # First time 257 258 # If the name returned as $cur_result by the setlocale() above is the 259 # same as we requested, there are no complications: use that. 260 if ($locale eq $cur_result) { 261 $result = $cur_result; 262 } 263 else { 264 265 # But if it's different, we check if it's part of a disparate 266 # LC_ALL. If so, use the input locale; if not it means the 267 # input was a synonym, and we use what it maps to. 268 # 269 # First, if the platform uses positional notation 270 if ($Config{PERL_LC_ALL_SEPARATOR}) { 271 $result = (index($cur_result, $Config{PERL_LC_ALL_SEPARATOR}) 272 >= 0) 273 ? $locale 274 : $cur_result; 275 } 276 else { # Must be using name=value notation 277 $result = ($cur_result =~ / = .* ; /x) 278 ? $locale 279 : $cur_result; 280 } 281 } 282 } 283 elsif (! $has_excluded_category && $result ne $cur_result) { 284 285 # Some platforms will translate POSIX into C 286 if (! ( ($result eq "C" && $cur_result eq "POSIX") 287 || ($result eq "POSIX" && $cur_result eq "C"))) 288 { 289 # But otherwise if the new result for this category doesn't 290 # match what we already have for a previous category for this 291 # same input locale, it's problematic, so discard this whole 292 # locale. 293 return; 294 } 295 } 296 } 297 298 push @$list, $result; 299} 300 301sub _decode_encodings { # For use only by other functions in this file! 302 my @enc; 303 304 foreach (split(/ /, shift)) { 305 if (/^(\d+)$/) { 306 push @enc, "ISO8859-$1"; 307 push @enc, "ISO-8859-$1"; 308 push @enc, "iso8859$1"; # HP 309 if ($1 eq '1') { 310 push @enc, "roman8"; # HP 311 } 312 push @enc, $_; 313 push @enc, "$_.UTF-8"; 314 push @enc, "$_.65001"; # Windows UTF-8 315 } 316 } 317 if ($^O eq 'os390') { 318 push @enc, qw(IBM-037 IBM-819 IBM-1047); 319 } 320 push @enc, "UTF-8"; 321 push @enc, "65001"; # Windows UTF-8 322 push @enc, "1252"; # Windows 323 324 return @enc; 325} 326 327sub locales_enabled(;$) { 328 # If no parameter is specified, the function returns 1 if there is any 329 # "safe" locale handling available to the caller; otherwise 0. Safeness 330 # is defined here as the caller operating in the main thread of a program, 331 # or if threaded locales are safe on the platform and Configured to be 332 # used. This sub is used for testing purposes, and for those, this 333 # definition of safety is sufficient, and necessary to get some tests to 334 # run on certain configurations on certain platforms. But beware that the 335 # main thread can change the locale of any subthreads unless 336 # ${^SAFE_LOCALES} is non-zero. 337 # 338 # Use the optional parameter to discover if a particular category or 339 # categories are available on the system. 1 is returned if the global 340 # criteria described in the previous paragraph are true, AND if all the 341 # specified categories are available on the platform and Configured to be 342 # used. Otherwise 0 is returned. The parameter is either a single POSIX 343 # locale category or a reference to a list of them. Each category must be 344 # its name as a string, like 'LC_TIME' (the initial 'LC_' is optional), or 345 # the number this platform uses to signify the category (e.g., 346 # 'locales_enabled(&POSIX::LC_CTYPE)' 347 # 348 # When the function returns 1 and a parameter was specified as a list 349 # reference, the reference will be altered on return to point to an 350 # equivalent list such that the categories are numeric instead of strings 351 # and sorted to meet the input expectations of _trylocale(). 352 # 353 # It is a fatal error to call this with something that isn't a known 354 # category to this file. If this happens, look first for a typo, and 355 # second if you are using a category unknown to Perl. In the latter case 356 # a bug report should be submitted. 357 358 # khw cargo-culted the '?' in the pattern on the next line. 359 return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/; 360 361 # If we can't load the POSIX XS module, we can't have locales even if they 362 # normally would be available 363 return 0 if ! defined &DynaLoader::boot_DynaLoader; 364 365 # Don't test locales where they aren't safe. On systems with unsafe 366 # threads, for the purposes of testing, we consider the main thread safe, 367 # and all other threads unsafe. 368 if (! ${^SAFE_LOCALES}) { 369 return 0 if $^O eq 'os390'; # Threaded locales don't work well here 370 require threads; 371 return 0 if threads->tid() != 0; 372 } 373 374 # If no setlocale, we need the POSIX 2008 alternatives 375 if (! $Config{d_setlocale}) { 376 return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/; 377 return 0 unless $Config{d_newlocale}; 378 return 0 unless $Config{d_uselocale}; 379 return 0 unless $Config{d_duplocale}; 380 return 0 unless $Config{d_freelocale}; 381 } 382 383 # Done with the global possibilities. Now check if any passed in category 384 # is disabled. 385 386 my $categories_ref = $_[0]; 387 my $return_categories_numbers = 0; 388 my @categories_numbers; 389 my $has_LC_ALL = 0; 390 my $has_LC_COLLATE = 0; 391 392 if (defined $categories_ref) { 393 my @local_categories_copy; 394 395 my $reftype = ref $categories_ref; 396 if ($reftype eq 'ARRAY') { 397 @local_categories_copy = @$categories_ref; 398 $return_categories_numbers = 1; 399 } 400 elsif ($reftype ne "") { 401 die "Parameter to locales_enabled() must be an ARRAY;" 402 . " instead you used a $reftype"; 403 } 404 else { # Single category passed in 405 @local_categories_copy = $categories_ref; 406 } 407 408 for my $category_name_or_number (@local_categories_copy) { 409 my $name; 410 my $number; 411 if ($category_name_or_number =~ / ^ -? \d+ $ /x) { 412 $number = $category_name_or_number; 413 die "Invalid locale category number '$number'" 414 unless grep { $number == $_ } keys %category_name; 415 $name = $category_name{$number}; 416 } 417 else { 418 $name = $category_name_or_number; 419 $name =~ s/ ^ LC_ //x; 420 foreach my $trial (keys %category_name) { 421 if ($category_name{$trial} eq $name) { 422 $number = $trial; 423 last; 424 } 425 } 426 die "Invalid locale category name '$name'" 427 unless defined $number; 428 } 429 430 return 0 if $number <= $max_bad_category_number 431 || category_excluded($name); 432 433 434 eval "defined &POSIX::LC_$name"; 435 return 0 if $@; 436 437 if ($return_categories_numbers) { 438 if ($name eq 'CTYPE') { 439 unshift @categories_numbers, $number; # Always first 440 } 441 elsif ($name eq 'ALL') { 442 $has_LC_ALL = 1; 443 } 444 elsif ($name eq 'COLLATE') { 445 $has_LC_COLLATE = 1; 446 } 447 else { 448 push @categories_numbers, $number; 449 } 450 } 451 } 452 } 453 454 if ($return_categories_numbers) { 455 456 # COLLATE comes after all other locales except ALL, which comes last 457 if ($has_LC_COLLATE) { 458 push @categories_numbers, $category_number{'COLLATE'}; 459 } 460 if ($has_LC_ALL) { 461 push @categories_numbers, $category_number{'ALL'}; 462 } 463 464 @$categories_ref = @categories_numbers; 465 } 466 467 return 1; 468} 469 470 471sub find_locales ($;$) { 472 473 # Returns an array of all the locales we found on the system. If the 474 # optional 2nd parameter is non-zero, the list includes all found locales; 475 # otherwise it is restricted to those locales that play well with Perl, as 476 # far as we can easily determine. 477 # 478 # The first parameter is either a single locale category or a reference to 479 # a list of categories to find valid locales for it (or in the case of 480 # multiple) for all of them. Each category can be a name (like 'LC_ALL' 481 # or simply 'ALL') or the C enum value for the category. 482 483 my $input_categories = shift; 484 my $allow_incompatible = shift // 0; 485 486 die ("Usage: find_locales( category | [ categories ] )") 487 unless defined $input_categories; 488 my @categories = (ref $input_categories) 489 ? $input_categories->@* 490 : $input_categories; 491 492 # If we can't use at least one of these categories, investigate further 493 if (! locales_enabled(\@categories)) { 494 495 # Not usable at all if system doesn't have locales 496 return unless locales_enabled(); 497 498 # Nor if any of the required categories isn't on the system 499 my @on_platform = platform_locale_categories(); 500 for my $category (@categories) { 501 return unless grep { $category eq $_ } @on_platform; 502 } 503 504 # Otherwise the category is on the system, but not generally usable. 505 # But the two always-present locales should be usable 506 return ( "C", "POSIX" ); 507 } 508 509 510 # Note, the subroutine call above converts the $categories into a form 511 # suitable for _trylocale(). 512 513 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" 514 # and mingw32 uses said silly CRT 515 # This doesn't seem to be an issue any more, at least on Windows XP, 516 # so re-enable the tests for Windows XP onwards. 517 my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && 518 join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); 519 return if (($^O eq 'MSWin32' && !$winxp) 520 && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); 521 522 my @Locale; 523 524 if (@platform_locales) { 525 $gathering_platform_locales = 0; 526 foreach my $locale (@platform_locales) { 527 _trylocale($locale, \@categories, \@Locale, $allow_incompatible); 528 } 529 } 530 else { 531 $gathering_platform_locales = 1; 532 533 _trylocale("C", \@categories, \@Locale, $allow_incompatible); 534 _trylocale("POSIX", \@categories, \@Locale, $allow_incompatible); 535 536 if ($Config{d_has_C_UTF8} && $Config{d_has_C_UTF8} eq 'true') { 537 _trylocale("C.UTF-8", \@categories, \@Locale, $allow_incompatible); 538 } 539 540 # There's no point in looking at anything more if we know that 541 # setlocale will return success on any garbage or non-garbage name. 542 return sort @Locale 543 if defined $Config{d_setlocale_accepts_any_locale_name}; 544 545 foreach (1..16) { 546 _trylocale("ISO8859-$_", \@categories, \@Locale, 547 $allow_incompatible); 548 _trylocale("iso8859$_", \@categories, \@Locale, 549 $allow_incompatible); 550 _trylocale("iso8859-$_", \@categories, \@Locale, 551 $allow_incompatible); 552 _trylocale("iso_8859_$_", \@categories, \@Locale, 553 $allow_incompatible); 554 _trylocale("isolatin$_", \@categories, \@Locale, 555 $allow_incompatible); 556 _trylocale("isolatin-$_", \@categories, \@Locale, 557 $allow_incompatible); 558 _trylocale("iso_latin_$_", \@categories, \@Locale, 559 $allow_incompatible); 560 } 561 562 # Sanitize the environment so that we can run the external 'locale' 563 # program without the taint mode getting grumpy. 564 565 # $ENV{PATH} is special in VMS. 566 delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; 567 568 # Other subversive stuff. 569 delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; 570 571 if (-x "/usr/bin/locale" 572 && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) 573 { 574 while (<LOCALES>) { 575 576 # It seems that /usr/bin/locale steadfastly outputs 8 bit 577 # data, which ain't great when we're running this 578 # testPERL_UNICODE= so that utf8 locales will cause all IO 579 # hadles to default to (assume) utf8 580 next unless utf8::valid($_); 581 chomp; 582 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 583 } 584 585 close(LOCALES); 586 } elsif ($^O eq 'VMS' 587 && defined($ENV{'SYS$I18N_LOCALE'}) 588 && -d 'SYS$I18N_LOCALE') 589 { 590 # The SYS$I18N_LOCALE logical name search list was not present on 591 # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later 592 # versions. 593 opendir(LOCALES, "SYS\$I18N_LOCALE:"); 594 while ($_ = readdir(LOCALES)) { 595 chomp; 596 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 597 } 598 close(LOCALES); 599 } elsif ( ($^O eq 'openbsd' || $^O eq 'bitrig' ) 600 && -e '/usr/share/locale') 601 { 602 603 # OpenBSD doesn't have a locale executable, so reading 604 # /usr/share/locale is much easier and faster than the last resort 605 # method. 606 607 opendir(LOCALES, '/usr/share/locale'); 608 while ($_ = readdir(LOCALES)) { 609 chomp; 610 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 611 } 612 close(LOCALES); 613 } else { # Final fallback. Try our list of locales hard-coded here 614 615 # This is going to be slow. 616 my @Data; 617 618 # Locales whose name differs if the utf8 bit is on are stored in 619 # these two files with appropriate encodings. 620 my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) 621 ? _source_location() . "/lib/locale/utf8" 622 : _source_location() . "/lib/locale/latin1"; 623 if (-e $data_file) { 624 @Data = do $data_file; 625 } 626 else { 627 _my_diag(__FILE__ . ":" . __LINE__ . 628 ": '$data_file' doesn't exist"); 629 } 630 631 # The rest of the locales are in this file. 632 state @my_data = <DATA>; close DATA if fileno DATA; 633 push @Data, @my_data; 634 635 foreach my $default (qw(.ACP .OCP)) { 636 _trylocale($default, \@categories, \@Locale, 637 $allow_incompatible); 638 } 639 640 foreach my $line (@Data) { 641 chomp $line; 642 my ($locale_name, $language_codes, $country_codes, $encodings) = 643 split /:/, $line; 644 _my_diag(__FILE__ . ":" . __LINE__ 645 . ": Unexpected syntax in '$line'") 646 unless defined $locale_name; 647 my @enc = _decode_encodings($encodings); 648 foreach my $loc (split(/ /, $locale_name)) { 649 _trylocale($loc, \@categories, \@Locale, 650 $allow_incompatible); 651 foreach my $enc (@enc) { 652 _trylocale("$loc.$enc", \@categories, \@Locale, 653 $allow_incompatible); 654 } 655 $loc = lc $loc; 656 foreach my $enc (@enc) { 657 _trylocale("$loc.$enc", \@categories, \@Locale, 658 $allow_incompatible); 659 } 660 } 661 foreach my $lang (split(/ /, $language_codes)) { 662 _trylocale($lang, \@categories, \@Locale, 663 $allow_incompatible); 664 foreach my $country (split(/ /, $country_codes)) { 665 my $lc = "${lang}_${country}"; 666 _trylocale($lc, \@categories, \@Locale, 667 $allow_incompatible); 668 foreach my $enc (@enc) { 669 _trylocale("$lc.$enc", \@categories, \@Locale, 670 $allow_incompatible); 671 } 672 my $lC = "${lang}_\U${country}"; 673 _trylocale($lC, \@categories, \@Locale, 674 $allow_incompatible); 675 foreach my $enc (@enc) { 676 _trylocale("$lC.$enc", \@categories, \@Locale, 677 $allow_incompatible); 678 } 679 } 680 } 681 } 682 } 683 } 684 685 my %Locale; 686 $Locale{$_} = 1 for @Locale; 687 @Locale = sort keys %Locale; 688 689 return @Locale; 690} 691 692sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input 693 # is a UTF-8 locale 694 695 # On z/OS, even locales marked as UTF-8 aren't. 696 return 0 if ord "A" != 65; 697 698 return 0 unless locales_enabled('LC_CTYPE'); 699 700 my $locale = shift; 701 702 no warnings 'locale'; # We may be trying out a weird locale 703 use locale; 704 705 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 706 if (! $save_locale) { 707 _my_fail("Verify could save previous locale"); 708 return 0; 709 } 710 711 if (! setlocale(&POSIX::LC_CTYPE(), $locale)) { 712 _my_fail("Verify could setlocale to $locale"); 713 return 0; 714 } 715 716 my $ret = 0; 717 718 # Use an op that gives different results for UTF-8 than any other locale. 719 # If a platform has UTF-8 locales, there should be at least one locale on 720 # most platforms with UTF-8 in its name, so if there is a bug in the op 721 # giving a false negative, we should get a failure for those locales as we 722 # go through testing all the locales on the platform. 723 if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") { 724 if ($locale =~ /UTF-?8/i) { 725 _my_fail("Verify $locale with UTF-8 in name is a UTF-8 locale"); 726 } 727 } 728 else { 729 $ret = 1; 730 } 731 732 die "Couldn't restore locale '$save_locale'" 733 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 734 735 return $ret; 736} 737 738sub classify_locales_wrt_utf8ness($) { 739 740 # Takes the input list of locales, and returns two lists split apart from 741 # it: the UTF-8 ones, and the non-UTF-8 ones. 742 743 my $locales_ref = shift; 744 my (@utf8, @non_utf8); 745 746 if (! locales_enabled('LC_CTYPE')) { # No CTYPE implies all are non-UTF-8 747 @non_utf8 = $locales_ref->@*; 748 return ( \@utf8, \@non_utf8 ); 749 } 750 751 foreach my $locale (@$locales_ref) { 752 my $which = (is_locale_utf8($locale)) ? \@utf8 : \@non_utf8; 753 push $which->@*, $locale; 754 } 755 756 return ( \@utf8, \@non_utf8 ); 757} 758 759sub find_utf8_ctype_locales (;$) { 760 761 # Return the names of the locales that core Perl thinks are UTF-8 LC_CTYPE 762 # locales. Optional parameter is a reference to a list of locales to try; 763 # if omitted, this tries all locales it can find on the platform 764 765 return unless locales_enabled('LC_CTYPE'); 766 767 my $locales_ref = shift; 768 if (! defined $locales_ref) { 769 770 my @locales = find_locales(&POSIX::LC_CTYPE()); 771 $locales_ref = \@locales; 772 } 773 774 my ($utf8_ref, undef) = classify_locales_wrt_utf8ness($locales_ref); 775 return unless $utf8_ref; 776 return $utf8_ref->@*; 777} 778 779sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl 780 # thinks is a UTF-8 LC_CTYPE non-turkic 781 # locale. 782 # Optional parameter is a reference to a 783 # list of locales to try; if omitted, this 784 # tries all locales it can find on the 785 # platform 786 my $try_locales_ref = shift; 787 788 my @utf8_locales = find_utf8_ctype_locales($try_locales_ref); 789 my @turkic_locales = find_utf8_turkic_locales($try_locales_ref); 790 791 my %seen_turkic; 792 793 # Create undef elements in the hash for turkic locales 794 @seen_turkic{@turkic_locales} = (); 795 796 foreach my $locale (@utf8_locales) { 797 return $locale unless exists $seen_turkic{$locale}; 798 } 799 800 return; 801} 802 803sub find_utf8_turkic_locales (;$) { 804 805 # Return the name of all the locales that core Perl thinks are UTF-8 806 # Turkic LC_CTYPE. Optional parameter is a reference to a list of locales 807 # to try; if omitted, this tries all locales it can find on the platform 808 809 my @return; 810 811 return unless locales_enabled('LC_CTYPE'); 812 813 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 814 foreach my $locale (find_utf8_ctype_locales(shift)) { 815 use locale; 816 setlocale(&POSIX::LC_CTYPE(), $locale); 817 push @return, $locale if uc('i') eq "\x{130}"; 818 } 819 820 die "Couldn't restore locale '$save_locale'" 821 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 822 823 return @return; 824} 825 826sub find_utf8_turkic_locale (;$) { 827 my @turkics = find_utf8_turkic_locales(shift); 828 829 return unless @turkics; 830 return $turkics[0] 831} 832 833 834# returns full path to the directory containing the current source 835# file, inspired by mauke's Dir::Self 836sub _source_location { 837 require File::Spec; 838 839 my $caller_filename = (caller)[1]; 840 841 my $loc = File::Spec->rel2abs( 842 File::Spec->catpath( 843 (File::Spec->splitpath($caller_filename))[0, 1], '' 844 ) 845 ); 846 847 return ($loc =~ /^(.*)$/)[0]; # untaint 848} 849 8501 851 852# Format of data is: locale_name, language_codes, country_codes, encodings 853__DATA__ 854Afrikaans:af:za:1 15 855Arabic:ar:dz eg sa:6 arabic8 856Brezhoneg Breton:br:fr:1 15 857Bulgarski Bulgarian:bg:bg:5 858Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC 859Hrvatski Croatian:hr:hr:2 860Cymraeg Welsh:cy:cy:1 14 15 861Czech:cs:cz:2 862Dansk Danish:da:dk:1 15 863Nederlands Dutch:nl:be nl:1 15 864English American British:en:au ca gb ie nz us uk zw:1 15 cp850 865Esperanto:eo:eo:3 866Eesti Estonian:et:ee:4 6 13 867Suomi Finnish:fi:fi:1 15 868Flamish::fl:1 15 869Deutsch German:de:at be ch de lu:1 15 870Euskaraz Basque:eu:es fr:1 15 871Galego Galician:gl:es:1 15 872Ellada Greek:el:gr:7 g8 873Frysk:fy:nl:1 15 874Greenlandic:kl:gl:4 6 875Hebrew:iw:il:8 hebrew8 876Hungarian:hu:hu:2 877Indonesian:id:id:1 15 878Gaeilge Irish:ga:IE:1 14 15 879Italiano Italian:it:ch it:1 15 880Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis 881Korean:ko:kr: 882Latine Latin:la:va:1 15 883Latvian:lv:lv:4 6 13 884Lithuanian:lt:lt:4 6 13 885Macedonian:mk:mk:1 15 886Maltese:mt:mt:3 887Moldovan:mo:mo:2 888Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 889Occitan:oc:es:1 15 890Polski Polish:pl:pl:2 891Rumanian:ro:ro:2 892Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 893Serbski Serbian:sr:yu:5 894Slovak:sk:sk:2 895Slovene Slovenian:sl:si:2 896Sqhip Albanian:sq:sq:1 15 897Svenska Swedish:sv:fi se:1 15 898Thai:th:th:11 tis620 899Turkish:tr:tr:9 turkish8 900Yiddish:yi::1 15 901