1# Common tools for test files files to find the locales which exist on the 2# system. Caller should have verified that this isn't miniperl before calling 3# the functions. 4 5# Note that it's okay that some languages have their native names 6# capitalized here even though that's not "right". They are lowercased 7# anyway later during the scanning process (and besides, some clueless 8# vendor might have them capitalized erroneously anyway). 9 10# Functions whose names begin with underscore are internal helper functions 11# for this file, and are not to be used by outside callers. 12 13use Config; 14use strict; 15 16eval { require POSIX; import POSIX 'locale_h'; }; 17my $has_locale_h = ! $@; 18 19my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY 20 LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION 21 LC_MEASUREMENT LC_PAPER LC_TELEPHONE)); 22my @platform_categories; 23 24# LC_ALL can be -1 on some platforms. And, in fact the implementors could 25# legally use any integer to represent any category. But it makes the most 26# sense for them to have used small integers. Below, we create new locale 27# numbers for ones missing from this machine. We make them very negative, 28# hopefully more negative than anything likely to be a valid category on the 29# platform, but also below is a check to be sure that our guess is valid. 30my $max_bad_category_number = -1000000; 31 32# Initialize this hash so that it looks like e.g., 33# 6 => 'CTYPE', 34# where 6 is the value of &POSIX::LC_CTYPE 35my %category_name; 36my %category_number; 37if ($has_locale_h) { 38 my $number_for_missing_category = $max_bad_category_number; 39 foreach my $name (@known_categories) { 40 my $number = eval "&POSIX::$name"; 41 if ($@) { 42 # Use a negative number (smaller than any legitimate category 43 # number) if the platform doesn't support this category, so we 44 # have an entry for all the ones that might be specified in calls 45 # to us. 46 $number = $number_for_missing_category-- if $@; 47 } 48 elsif ( $number !~ / ^ -? \d+ $ /x 49 || $number <= $max_bad_category_number) 50 { 51 # We think this should be an int. And it has to be larger than 52 # any of our synthetic numbers. 53 die "Unexpected locale category number '$number' for $name" 54 } 55 else { 56 push @platform_categories, $name; 57 } 58 59 $name =~ s/LC_//; 60 $category_name{$number} = "$name"; 61 $category_number{$name} = $number; 62 } 63} 64 65sub _my_diag($) { 66 my $message = shift; 67 if (defined &main::diag) { 68 diag($message); 69 } 70 else { 71 local($\, $", $,) = (undef, ' ', ''); 72 print STDERR $message, "\n"; 73 } 74} 75 76sub _my_fail($) { 77 my $message = shift; 78 if (defined &main::fail) { 79 fail($message); 80 } 81 else { 82 local($\, $", $,) = (undef, ' ', ''); 83 print "not ok 0 $message\n"; 84 } 85} 86 87sub _trylocale ($$$$) { # For use only by other functions in this file! 88 89 # Adds the locale given by the first parameter to the list given by the 90 # 3rd iff the platform supports the locale in each of the category numbers 91 # given by the 2nd parameter, which is either a single category or a 92 # reference to a list of categories. The list MUST be sorted so that 93 # CTYPE is first, COLLATE is last unless ALL is present, in which case 94 # that comes after COLLATE. This is because locale.c detects bad locales 95 # only with CTYPE, and COLLATE on some platforms can core dump if it is a 96 # bad locale. 97 # 98 # The 4th parameter is true if to accept locales that aren't apparently 99 # fully compatible with Perl. 100 101 my $locale = shift; 102 my $categories = shift; 103 my $list = shift; 104 my $allow_incompatible = shift; 105 106 return if ! $locale || grep { $locale eq $_ } @$list; 107 108 # This is a toy (pig latin) locale that is not fully implemented on some 109 # systems 110 return if $locale =~ / ^ pig $ /ix; 111 112 # As of 6.3, this platform's locale handling is basically broken. khw 113 # filed a bug report (no ticket number was returned), and it is supposedly 114 # going to change in a future release, so the statements here below sunset 115 # for any larger version, at which point this may start failing and have 116 # to be revisited. 117 # 118 # Given a legal individual category, basically whatever you set the locale 119 # to, the return from setlocale() indicates that it has taken effect, even 120 # if it hasn't. However, the return from querying LC_ALL won't reflect 121 # this. 122 if ($Config{osname} =~ /openbsd/i && $locale !~ / ^ (?: C | POSIX ) $/ix) { 123 my ($major, $minor) = $Config{osvers} =~ / ^ ( \d+ ) \. ( \d+ ) /ax; 124 return if ! defined $major || ! defined $minor 125 || $major < 6 || ($major == 6 && $minor <= 3); 126 } 127 128 $categories = [ $categories ] unless ref $categories; 129 130 my $badutf8 = 0; 131 my $plays_well = 1; 132 133 use warnings 'locale'; 134 135 local $SIG{__WARN__} = sub { 136 $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; 137 $plays_well = 0 if grep { 138 /Locale .* may not work well(?# 139 )|The Perl program will use the expected meanings/i 140 } @_; 141 }; 142 143 # Incompatible locales aren't warned about unless using locales. 144 use locale; 145 146 foreach my $category (@$categories) { 147 die "category '$category' must instead be a number" 148 unless $category =~ / ^ -? \d+ $ /x; 149 150 return unless setlocale($category, $locale); 151 last if $badutf8 || ! $plays_well; 152 } 153 154 if ($badutf8) { 155 _my_fail("Verify locale name doesn't contain malformed utf8"); 156 return; 157 } 158 push @$list, $locale if $plays_well || $allow_incompatible; 159} 160 161sub _decode_encodings { # For use only by other functions in this file! 162 my @enc; 163 164 foreach (split(/ /, shift)) { 165 if (/^(\d+)$/) { 166 push @enc, "ISO8859-$1"; 167 push @enc, "iso8859$1"; # HP 168 if ($1 eq '1') { 169 push @enc, "roman8"; # HP 170 } 171 push @enc, $_; 172 push @enc, "$_.UTF-8"; 173 push @enc, "$_.65001"; # Windows UTF-8 174 push @enc, "$_.ACP"; # Windows ANSI code page 175 push @enc, "$_.OCP"; # Windows OEM code page 176 push @enc, "$_.1252"; # Windows 177 } 178 } 179 if ($^O eq 'os390') { 180 push @enc, qw(IBM-037 IBM-819 IBM-1047); 181 } 182 push @enc, "UTF-8"; 183 push @enc, "65001"; # Windows UTF-8 184 185 return @enc; 186} 187 188sub valid_locale_categories() { 189 # Returns a list of the locale categories (expressed as strings, like 190 # "LC_ALL) known to this program that are available on this platform. 191 192 return @platform_categories; 193} 194 195sub locales_enabled(;$) { 196 # Returns 0 if no locale handling is available on this platform; otherwise 197 # 1. 198 # 199 # The optional parameter is a reference to a list of individual POSIX 200 # locale categories. If any of the individual categories specified by the 201 # optional parameter is all digits (and an optional leading minus), it is 202 # taken to be the C enum for the category (e.g., &POSIX::LC_CTYPE). 203 # Otherwise it should be a string name of the category, like 'LC_TIME'. 204 # The initial 'LC_' is optional. It is a fatal error to call this with 205 # something that isn't a known category to this file. 206 # 207 # This optional parameter denotes which POSIX locale categories must be 208 # available on the platform. If any aren't available, this function 209 # returns 0; otherwise it returns 1 and changes the list for the caller so 210 # that any category names are converted into their equivalent numbers, and 211 # sorts it to match the expectations of _trylocale. 212 # 213 # It is acceptable for the second parameter to be just a simple scalar 214 # denoting a single category (either name or number). No conversion into 215 # a number is done in this case. 216 217 return 0 unless $Config{d_setlocale} 218 # I (khw) cargo-culted the '?' in the pattern on the 219 # next line. 220 && $Config{ccflags} !~ /\bD?NO_LOCALE\b/ 221 && $has_locale_h; 222 223 # Done with the global possibilities. Now check if any passed in category 224 # is disabled. 225 226 my $categories_ref = shift; 227 my $return_categories_numbers = 0; 228 my @categories_numbers; 229 my $has_LC_ALL = 0; 230 my $has_LC_COLLATE = 0; 231 232 if (defined $categories_ref) { 233 my @local_categories_copy; 234 235 if (ref $categories_ref) { 236 @local_categories_copy = @$$categories_ref; 237 $return_categories_numbers = 1; 238 } 239 else { # Single category passed in 240 @local_categories_copy = $categories_ref; 241 } 242 243 for my $category_name_or_number (@local_categories_copy) { 244 my $name; 245 my $number; 246 if ($category_name_or_number =~ / ^ -? \d+ $ /x) { 247 $number = $category_name_or_number; 248 die "Invalid locale category number '$number'" 249 unless grep { $number == $_ } keys %category_name; 250 $name = $category_name{$number}; 251 } 252 else { 253 $name = $category_name_or_number; 254 $name =~ s/ ^ LC_ //x; 255 foreach my $trial (keys %category_name) { 256 if ($category_name{$trial} eq $name) { 257 $number = $trial; 258 last; 259 } 260 } 261 die "Invalid locale category name '$name'" 262 unless defined $number; 263 } 264 265 return 0 if $number <= $max_bad_category_number 266 || $Config{ccflags} =~ /\bD?NO_LOCALE_$name\b/; 267 268 eval "defined &POSIX::LC_$name"; 269 return 0 if $@; 270 271 if ($return_categories_numbers) { 272 if ($name eq 'CTYPE') { 273 unshift @categories_numbers, $number; # Always first 274 } 275 elsif ($name eq 'ALL') { 276 $has_LC_ALL = 1; 277 } 278 elsif ($name eq 'COLLATE') { 279 $has_LC_COLLATE = 1; 280 } 281 else { 282 push @categories_numbers, $number; 283 } 284 } 285 } 286 } 287 288 if ($return_categories_numbers) { 289 290 # COLLATE comes after all other locales except ALL, which comes last 291 if ($has_LC_COLLATE) { 292 push @categories_numbers, $category_number{'COLLATE'}; 293 } 294 if ($has_LC_ALL) { 295 push @categories_numbers, $category_number{'ALL'}; 296 } 297 $$categories_ref = \@categories_numbers; 298 } 299 300 return 1; 301} 302 303 304sub find_locales ($;$) { 305 306 # Returns an array of all the locales we found on the system. If the 307 # optional 2nd parameter is non-zero, the list includes all found locales; 308 # otherwise it is restricted to those locales that play well with Perl, as 309 # far as we can easily determine. 310 # 311 # The first parameter is either a single locale category or a reference to 312 # a list of categories to find valid locales for it (or in the case of 313 # multiple) for all of them. Each category can be a name (like 'LC_ALL' 314 # or simply 'ALL') or the C enum value for the category. 315 316 my $categories = shift; 317 my $allow_incompatible = shift // 0; 318 319 $categories = [ $categories ] unless ref $categories; 320 return unless locales_enabled(\$categories); 321 322 # Note, the subroutine call above converts the $categories into a form 323 # suitable for _trylocale(). 324 325 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" 326 # and mingw32 uses said silly CRT 327 # This doesn't seem to be an issue any more, at least on Windows XP, 328 # so re-enable the tests for Windows XP onwards. 329 my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && 330 join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); 331 return if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') 332 && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); 333 334 # UWIN seems to loop after taint tests, just skip for now 335 return if ($^O =~ /^uwin/); 336 337 my @Locale; 338 _trylocale("C", $categories, \@Locale, $allow_incompatible); 339 _trylocale("POSIX", $categories, \@Locale, $allow_incompatible); 340 foreach (1..16) { 341 _trylocale("ISO8859-$_", $categories, \@Locale, $allow_incompatible); 342 _trylocale("iso8859$_", $categories, \@Locale, $allow_incompatible); 343 _trylocale("iso8859-$_", $categories, \@Locale, $allow_incompatible); 344 _trylocale("iso_8859_$_", $categories, \@Locale, $allow_incompatible); 345 _trylocale("isolatin$_", $categories, \@Locale, $allow_incompatible); 346 _trylocale("isolatin-$_", $categories, \@Locale, $allow_incompatible); 347 _trylocale("iso_latin_$_", $categories, \@Locale, $allow_incompatible); 348 } 349 350 # Sanitize the environment so that we can run the external 'locale' 351 # program without the taint mode getting grumpy. 352 353 # $ENV{PATH} is special in VMS. 354 delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; 355 356 # Other subversive stuff. 357 delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; 358 359 if (-x "/usr/bin/locale" 360 && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) 361 { 362 while (<LOCALES>) { 363 # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which 364 # ain't great when we're running this testPERL_UNICODE= so that utf8 365 # locales will cause all IO hadles to default to (assume) utf8 366 next unless utf8::valid($_); 367 chomp; 368 _trylocale($_, $categories, \@Locale, $allow_incompatible); 369 } 370 close(LOCALES); 371 } elsif ($^O eq 'VMS' 372 && defined($ENV{'SYS$I18N_LOCALE'}) 373 && -d 'SYS$I18N_LOCALE') 374 { 375 # The SYS$I18N_LOCALE logical name search list was not present on 376 # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions. 377 opendir(LOCALES, "SYS\$I18N_LOCALE:"); 378 while ($_ = readdir(LOCALES)) { 379 chomp; 380 _trylocale($_, $categories, \@Locale, $allow_incompatible); 381 } 382 close(LOCALES); 383 } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') { 384 385 # OpenBSD doesn't have a locale executable, so reading 386 # /usr/share/locale is much easier and faster than the last resort 387 # method. 388 389 opendir(LOCALES, '/usr/share/locale'); 390 while ($_ = readdir(LOCALES)) { 391 chomp; 392 _trylocale($_, $categories, \@Locale, $allow_incompatible); 393 } 394 close(LOCALES); 395 } else { # Final fallback. Try our list of locales hard-coded here 396 397 # This is going to be slow. 398 my @Data; 399 400 # Locales whose name differs if the utf8 bit is on are stored in these 401 # two files with appropriate encodings. 402 my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) 403 ? _source_location() . "/lib/locale/utf8" 404 : _source_location() . "/lib/locale/latin1"; 405 if (-e $data_file) { 406 @Data = do $data_file; 407 } 408 else { 409 _my_diag(__FILE__ . ":" . __LINE__ . ": '$data_file' doesn't exist"); 410 } 411 412 # The rest of the locales are in this file. 413 push @Data, <DATA>; 414 415 foreach my $line (@Data) { 416 my ($locale_name, $language_codes, $country_codes, $encodings) = 417 split /:/, $line; 418 _my_diag(__FILE__ . ":" . __LINE__ . ": Unexpected syntax in '$line'") 419 unless defined $locale_name; 420 my @enc = _decode_encodings($encodings); 421 foreach my $loc (split(/ /, $locale_name)) { 422 _trylocale($loc, $categories, \@Locale, $allow_incompatible); 423 foreach my $enc (@enc) { 424 _trylocale("$loc.$enc", $categories, \@Locale, 425 $allow_incompatible); 426 } 427 $loc = lc $loc; 428 foreach my $enc (@enc) { 429 _trylocale("$loc.$enc", $categories, \@Locale, 430 $allow_incompatible); 431 } 432 } 433 foreach my $lang (split(/ /, $language_codes)) { 434 _trylocale($lang, $categories, \@Locale, $allow_incompatible); 435 foreach my $country (split(/ /, $country_codes)) { 436 my $lc = "${lang}_${country}"; 437 _trylocale($lc, $categories, \@Locale, $allow_incompatible); 438 foreach my $enc (@enc) { 439 _trylocale("$lc.$enc", $categories, \@Locale, 440 $allow_incompatible); 441 } 442 my $lC = "${lang}_\U${country}"; 443 _trylocale($lC, $categories, \@Locale, $allow_incompatible); 444 foreach my $enc (@enc) { 445 _trylocale("$lC.$enc", $categories, \@Locale, 446 $allow_incompatible); 447 } 448 } 449 } 450 } 451 } 452 453 @Locale = sort @Locale; 454 455 return @Locale; 456} 457 458sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input 459 # is a UTF-8 locale 460 461 # On z/OS, even locales marked as UTF-8 aren't. 462 return 0 if ord "A" != 65; 463 464 return 0 unless locales_enabled('LC_CTYPE'); 465 466 my $locale = shift; 467 468 use locale; 469 no warnings 'locale'; # We may be trying out a weird locale 470 471 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 472 if (! $save_locale) { 473 ok(0, "Verify could save previous locale"); 474 return 0; 475 } 476 477 if (! setlocale(&POSIX::LC_CTYPE(), $locale)) { 478 ok(0, "Verify could setlocale to $locale"); 479 return 0; 480 } 481 482 my $ret = 0; 483 484 # Use an op that gives different results for UTF-8 than any other locale. 485 # If a platform has UTF-8 locales, there should be at least one locale on 486 # most platforms with UTF-8 in its name, so if there is a bug in the op 487 # giving a false negative, we should get a failure for those locales as we 488 # go through testing all the locales on the platform. 489 if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") { 490 if ($locale =~ /UTF-?8/i) { 491 ok (0, "Verify $locale with UTF-8 in name is a UTF-8 locale"); 492 } 493 } 494 else { 495 $ret = 1; 496 } 497 498 die "Couldn't restore locale '$save_locale'" 499 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 500 501 return $ret; 502} 503 504sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl 505 # thinks is a UTF-8 LC_CTYPE locale. 506 # Optional parameter is a reference to a 507 # list of locales to try; if omitted, this 508 # tries all locales it can find on the 509 # platform 510 return unless locales_enabled('LC_CTYPE'); 511 512 my $locales_ref = shift; 513 514 if (! defined $locales_ref) { 515 516 my @locales = find_locales(&POSIX::LC_CTYPE()); 517 $locales_ref = \@locales; 518 } 519 520 foreach my $locale (@$locales_ref) { 521 return $locale if is_locale_utf8($locale); 522 } 523 524 return; 525} 526 527# returns full path to the directory containing the current source 528# file, inspired by mauke's Dir::Self 529sub _source_location { 530 require File::Spec; 531 532 my $caller_filename = (caller)[1]; 533 534 my $loc = File::Spec->rel2abs( 535 File::Spec->catpath( 536 (File::Spec->splitpath($caller_filename))[0, 1], '' 537 ) 538 ); 539 540 return ($loc =~ /^(.*)$/)[0]; # untaint 541} 542 5431 544 545# Format of data is: locale_name, language_codes, country_codes, encodings 546__DATA__ 547Afrikaans:af:za:1 15 548Arabic:ar:dz eg sa:6 arabic8 549Brezhoneg Breton:br:fr:1 15 550Bulgarski Bulgarian:bg:bg:5 551Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC 552Hrvatski Croatian:hr:hr:2 553Cymraeg Welsh:cy:cy:1 14 15 554Czech:cs:cz:2 555Dansk Danish:da:dk:1 15 556Nederlands Dutch:nl:be nl:1 15 557English American British:en:au ca gb ie nz us uk zw:1 15 cp850 558Esperanto:eo:eo:3 559Eesti Estonian:et:ee:4 6 13 560Suomi Finnish:fi:fi:1 15 561Flamish::fl:1 15 562Deutsch German:de:at be ch de lu:1 15 563Euskaraz Basque:eu:es fr:1 15 564Galego Galician:gl:es:1 15 565Ellada Greek:el:gr:7 g8 566Frysk:fy:nl:1 15 567Greenlandic:kl:gl:4 6 568Hebrew:iw:il:8 hebrew8 569Hungarian:hu:hu:2 570Indonesian:id:id:1 15 571Gaeilge Irish:ga:IE:1 14 15 572Italiano Italian:it:ch it:1 15 573Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis 574Korean:ko:kr: 575Latine Latin:la:va:1 15 576Latvian:lv:lv:4 6 13 577Lithuanian:lt:lt:4 6 13 578Macedonian:mk:mk:1 15 579Maltese:mt:mt:3 580Moldovan:mo:mo:2 581Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 582Occitan:oc:es:1 15 583Polski Polish:pl:pl:2 584Rumanian:ro:ro:2 585Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 586Serbski Serbian:sr:yu:5 587Slovak:sk:sk:2 588Slovene Slovenian:sl:si:2 589Sqhip Albanian:sq:sq:1 15 590Svenska Swedish:sv:fi se:1 15 591Thai:th:th:11 tis620 592Turkish:tr:tr:9 turkish8 593Yiddish:yi::1 15 594