156d68f1eSafresh1# Common tools for test files to find the locales which exist on the 29f11ffb7Safresh1# system. Caller should have verified that this isn't miniperl before calling 36fb12b70Safresh1# the functions. 46fb12b70Safresh1 56fb12b70Safresh1# Note that it's okay that some languages have their native names 66fb12b70Safresh1# capitalized here even though that's not "right". They are lowercased 76fb12b70Safresh1# anyway later during the scanning process (and besides, some clueless 86fb12b70Safresh1# vendor might have them capitalized erroneously anyway). 96fb12b70Safresh1 10b8851fccSafresh1# Functions whose names begin with underscore are internal helper functions 11b8851fccSafresh1# for this file, and are not to be used by outside callers. 12b8851fccSafresh1 139f11ffb7Safresh1use Config; 149f11ffb7Safresh1use strict; 15eac174f2Safresh1use warnings; 16eac174f2Safresh1use feature 'state'; 179f11ffb7Safresh1 18e0680481Safresh1my %known_bad_locales = ( # XXX eventually will need version info if and 19e0680481Safresh1 # when these get fixed. 20e0680481Safresh1 solaris => [ 'vi_VN.UTF-8', ], # Use of U+A8 segfaults: GH #20578 21e0680481Safresh1); 22e0680481Safresh1 23b8851fccSafresh1eval { require POSIX; import POSIX 'locale_h'; }; 249f11ffb7Safresh1my $has_locale_h = ! $@; 259f11ffb7Safresh1 269f11ffb7Safresh1my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY 279f11ffb7Safresh1 LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION 28eac174f2Safresh1 LC_MEASUREMENT LC_PAPER LC_TELEPHONE LC_SYNTAX 29e0680481Safresh1 LC_TOD LC_NAME)); 309f11ffb7Safresh1my @platform_categories; 319f11ffb7Safresh1 32*3d61058aSafresh1my $has_excluded_category = $Config{ccflags} =~ /\bD?NO_LOCALE_/; 33e0680481Safresh1sub category_excluded($) { 34eac174f2Safresh1 my $cat_name = shift =~ s/^LC_//r; 35eac174f2Safresh1 36eac174f2Safresh1 # Recognize Configure option to exclude a category 37*3d61058aSafresh1 return $has_excluded_category 38*3d61058aSafresh1 && $Config{ccflags} =~ /\bD?NO_LOCALE_$cat_name\b/; 39eac174f2Safresh1} 40eac174f2Safresh1 419f11ffb7Safresh1# LC_ALL can be -1 on some platforms. And, in fact the implementors could 429f11ffb7Safresh1# legally use any integer to represent any category. But it makes the most 439f11ffb7Safresh1# sense for them to have used small integers. Below, we create new locale 449f11ffb7Safresh1# numbers for ones missing from this machine. We make them very negative, 459f11ffb7Safresh1# hopefully more negative than anything likely to be a valid category on the 469f11ffb7Safresh1# platform, but also below is a check to be sure that our guess is valid. 479f11ffb7Safresh1my $max_bad_category_number = -1000000; 489f11ffb7Safresh1 499f11ffb7Safresh1# Initialize this hash so that it looks like e.g., 509f11ffb7Safresh1# 6 => 'CTYPE', 519f11ffb7Safresh1# where 6 is the value of &POSIX::LC_CTYPE 529f11ffb7Safresh1my %category_name; 539f11ffb7Safresh1my %category_number; 549f11ffb7Safresh1if ($has_locale_h) { 559f11ffb7Safresh1 my $number_for_missing_category = $max_bad_category_number; 569f11ffb7Safresh1 foreach my $name (@known_categories) { 579f11ffb7Safresh1 my $number = eval "&POSIX::$name"; 589f11ffb7Safresh1 if ($@) { 599f11ffb7Safresh1 # Use a negative number (smaller than any legitimate category 609f11ffb7Safresh1 # number) if the platform doesn't support this category, so we 619f11ffb7Safresh1 # have an entry for all the ones that might be specified in calls 629f11ffb7Safresh1 # to us. 63eac174f2Safresh1 $number = $number_for_missing_category--; 649f11ffb7Safresh1 } 659f11ffb7Safresh1 elsif ( $number !~ / ^ -? \d+ $ /x 669f11ffb7Safresh1 || $number <= $max_bad_category_number) 679f11ffb7Safresh1 { 689f11ffb7Safresh1 # We think this should be an int. And it has to be larger than 699f11ffb7Safresh1 # any of our synthetic numbers. 709f11ffb7Safresh1 die "Unexpected locale category number '$number' for $name" 719f11ffb7Safresh1 } 729f11ffb7Safresh1 else { 739f11ffb7Safresh1 push @platform_categories, $name; 749f11ffb7Safresh1 } 759f11ffb7Safresh1 769f11ffb7Safresh1 $name =~ s/LC_//; 779f11ffb7Safresh1 $category_name{$number} = "$name"; 789f11ffb7Safresh1 $category_number{$name} = $number; 799f11ffb7Safresh1 } 809f11ffb7Safresh1} 819f11ffb7Safresh1 829f11ffb7Safresh1sub _my_diag($) { 839f11ffb7Safresh1 my $message = shift; 849f11ffb7Safresh1 if (defined &main::diag) { 859f11ffb7Safresh1 diag($message); 869f11ffb7Safresh1 } 879f11ffb7Safresh1 else { 889f11ffb7Safresh1 local($\, $", $,) = (undef, ' ', ''); 899f11ffb7Safresh1 print STDERR $message, "\n"; 909f11ffb7Safresh1 } 919f11ffb7Safresh1} 929f11ffb7Safresh1 93e0680481Safresh1# Larger than any real test 94e0680481Safresh1my $my_count = 1_000_000; 95e0680481Safresh1 969f11ffb7Safresh1sub _my_fail($) { 979f11ffb7Safresh1 my $message = shift; 989f11ffb7Safresh1 if (defined &main::fail) { 999f11ffb7Safresh1 fail($message); 1009f11ffb7Safresh1 } 1019f11ffb7Safresh1 else { 1029f11ffb7Safresh1 local($\, $", $,) = (undef, ' ', ''); 103e0680481Safresh1 print "not ok " . $my_count++ . $message . "\n"; 1049f11ffb7Safresh1 } 1059f11ffb7Safresh1} 106b8851fccSafresh1 107*3d61058aSafresh1sub platform_locale_categories() { 108*3d61058aSafresh1 return @platform_categories; 109*3d61058aSafresh1} 110*3d61058aSafresh1 111e0680481Safresh1sub valid_locale_categories() { 112e0680481Safresh1 # Returns a list of the locale categories (expressed as strings, like 113*3d61058aSafresh1 # "LC_ALL") known to this program that are available on this platform. 114e0680481Safresh1 115e0680481Safresh1 return grep { ! category_excluded($_) } @platform_categories; 116e0680481Safresh1} 117e0680481Safresh1 118e0680481Safresh1sub is_category_valid($) { 119e0680481Safresh1 my $name = shift; 120e0680481Safresh1 $name = 'LC_' . $name =~ s/^LC_//r; 121e0680481Safresh1 return grep { $name eq $_ } valid_locale_categories(); 122e0680481Safresh1} 123e0680481Safresh1 124e0680481Safresh1# It turns out that strings generated under the control of a given locale 125e0680481Safresh1# category are often affected as well by LC_CTYPE. If the two categories 126e0680481Safresh1# don't match, one can get mojibake or even core dumps. (khw thinks it more 127e0680481Safresh1# likely that it's the code set, not the locale that's critical here; but 128e0680481Safresh1# didn't run experiments to verify this.) Hence, in the code below, CTYPE and 129e0680481Safresh1# the tested categories are all set to the same locale. If CTYPE isn't 130e0680481Safresh1# available on the platform, LC_ALL is instead used. One might think to just 131e0680481Safresh1# use LC_ALL all the time, but on Windows 132e0680481Safresh1# setlocale(LC_ALL, "some_borked_locale") 133e0680481Safresh1# can return success, whereas setting LC_CTYPE to it fails. 134e0680481Safresh1my $master_category; 135e0680481Safresh1$master_category = $category_number{'CTYPE'} 136e0680481Safresh1 if is_category_valid('LC_CTYPE') && defined $category_number{'CTYPE'}; 137e0680481Safresh1$master_category = $category_number{'ALL'} 138e0680481Safresh1 if ! defined $master_category 139e0680481Safresh1 && is_category_valid('LC_ALL') && defined $category_number{'ALL'}; 140e0680481Safresh1 141*3d61058aSafresh1my @platform_locales; # cache of locales found on this platform 142*3d61058aSafresh1my $gathering_platform_locales = 0; # Should we gather locales, or use the 143*3d61058aSafresh1 # cache? 144*3d61058aSafresh1my %seen; # Used to avoid duplicates 145*3d61058aSafresh1 146b8851fccSafresh1sub _trylocale ($$$$) { # For use only by other functions in this file! 147b8851fccSafresh1 148b8851fccSafresh1 # Adds the locale given by the first parameter to the list given by the 1499f11ffb7Safresh1 # 3rd iff the platform supports the locale in each of the category numbers 1509f11ffb7Safresh1 # given by the 2nd parameter, which is either a single category or a 151eac174f2Safresh1 # reference to a list of categories. 1529f11ffb7Safresh1 # 1539f11ffb7Safresh1 # The 4th parameter is true if to accept locales that aren't apparently 1549f11ffb7Safresh1 # fully compatible with Perl. 155b8851fccSafresh1 1566fb12b70Safresh1 my $locale = shift; 1576fb12b70Safresh1 my $categories = shift; 1586fb12b70Safresh1 my $list = shift; 1599f11ffb7Safresh1 my $allow_incompatible = shift; 160b8851fccSafresh1 161e0680481Safresh1 my $normalized_locale = lc ($locale =~ s/\W//gr); 162e0680481Safresh1 return if ! $locale || grep { $normalized_locale eq lc ($_ =~ s/\W//gr) } @$list; 1636fb12b70Safresh1 1649f11ffb7Safresh1 # This is a toy (pig latin) locale that is not fully implemented on some 1659f11ffb7Safresh1 # systems 1669f11ffb7Safresh1 return if $locale =~ / ^ pig $ /ix; 1679f11ffb7Safresh1 168b46d8ef2Safresh1 # Certain platforms have a crippled locale system in which setlocale 169b46d8ef2Safresh1 # returns success for just about any possible locale name, but if anything 170b46d8ef2Safresh1 # actually happens as a result of the call, it is that the underlying 171b46d8ef2Safresh1 # locale is set to a system default, likely C or C.UTF-8. We can't test 172b46d8ef2Safresh1 # such systems fully, but we shouldn't disable the user from using 173b46d8ef2Safresh1 # locales, as it may work out for them (or not). 174b46d8ef2Safresh1 return if defined $Config{d_setlocale_accepts_any_locale_name} 175e0680481Safresh1 && $locale !~ / ^ (?: C | POSIX | C\.UTF-?8 ) $/ix; 176e0680481Safresh1 177e0680481Safresh1 if (exists $known_bad_locales{$^O}) { 178e0680481Safresh1 my @bad_locales = $known_bad_locales{$^O}->@*; 179e0680481Safresh1 return if grep { $locale eq $_ } @bad_locales; 180e0680481Safresh1 } 1819f11ffb7Safresh1 1826fb12b70Safresh1 183b8851fccSafresh1 my $badutf8 = 0; 184b8851fccSafresh1 my $plays_well = 1; 185e0680481Safresh1 my $unsupported = 0; 186b8851fccSafresh1 187b8851fccSafresh1 use warnings 'locale'; 188b8851fccSafresh1 189b8851fccSafresh1 local $SIG{__WARN__} = sub { 1909f11ffb7Safresh1 $badutf8 = 1 if grep { /Malformed UTF-8/ } @_; 191e0680481Safresh1 $unsupported = 1 if grep { /Locale .* is unsupported/i } @_; 1929f11ffb7Safresh1 $plays_well = 0 if grep { 193e0680481Safresh1 /The following characters .* may not have the same meaning as the Perl program expects(?# 1949f11ffb7Safresh1 )|The Perl program will use the expected meanings/i 1959f11ffb7Safresh1 } @_; 196b8851fccSafresh1 }; 197b8851fccSafresh1 198*3d61058aSafresh1 my $result; 199*3d61058aSafresh1 my @category_list; 200*3d61058aSafresh1 if (defined $categories) { 201*3d61058aSafresh1 $categories = [ $categories ] unless ref $categories; 202*3d61058aSafresh1 push @category_list, $categories->@*; 203*3d61058aSafresh1 } 204e0680481Safresh1 205*3d61058aSafresh1 # Make the master category first thing on the list; adding it if necessary 206*3d61058aSafresh1 if (defined $master_category) { 207*3d61058aSafresh1 @category_list = grep { $_ != $master_category } @category_list; 208*3d61058aSafresh1 unshift @category_list, $master_category; 209*3d61058aSafresh1 } 210*3d61058aSafresh1 211*3d61058aSafresh1 foreach my $category (@category_list) { 212e0680481Safresh1 my $save_locale = setlocale($category); 213e0680481Safresh1 if (! $save_locale) { 214e0680481Safresh1 _my_fail("Verify could save previous locale"); 215e0680481Safresh1 return; 216e0680481Safresh1 } 217e0680481Safresh1 2189f11ffb7Safresh1 # Incompatible locales aren't warned about unless using locales. 2199f11ffb7Safresh1 use locale; 2209f11ffb7Safresh1 221*3d61058aSafresh1 my $cur_result = setlocale($category, $locale); 222*3d61058aSafresh1 return unless defined $cur_result; 2239f11ffb7Safresh1 224e0680481Safresh1 no locale; 2256fb12b70Safresh1 226*3d61058aSafresh1 if ( $gathering_platform_locales 227*3d61058aSafresh1 && $category eq $master_category 228*3d61058aSafresh1 && ! $seen{$locale}) 229*3d61058aSafresh1 { 230*3d61058aSafresh1 push @platform_locales, $locale; 231*3d61058aSafresh1 $seen{$locale}++; 232*3d61058aSafresh1 } 233*3d61058aSafresh1 234e0680481Safresh1 # We definitely don't want the locale set to something that is 235e0680481Safresh1 # unsupported 236e0680481Safresh1 if (! setlocale($category, $save_locale)) { 237e0680481Safresh1 my $error_text = "\$!=$!"; 238e0680481Safresh1 $error_text .= "; \$^E=$^E" if $^E != $!; 239e0680481Safresh1 die "Couldn't restore locale '$save_locale', category $category;" 240e0680481Safresh1 . $error_text; 241e0680481Safresh1 } 2426fb12b70Safresh1 if ($badutf8) { 2439f11ffb7Safresh1 _my_fail("Verify locale name doesn't contain malformed utf8"); 2446fb12b70Safresh1 return; 2456fb12b70Safresh1 } 246e0680481Safresh1 247e0680481Safresh1 return if $unsupported; 248e0680481Safresh1 249e0680481Safresh1 # Commas in locale names are bad in Windows, and there is a bug in 250e0680481Safresh1 # some versions where setlocale() turns a legal input locale name into 251e0680481Safresh1 # an illegal return value, which it can't later parse. 252*3d61058aSafresh1 return if $cur_result =~ /,/; 253e0680481Safresh1 254e0680481Safresh1 return unless $plays_well || $allow_incompatible; 255*3d61058aSafresh1 256*3d61058aSafresh1 if (! defined $result) { # First time 257*3d61058aSafresh1 258*3d61058aSafresh1 # If the name returned as $cur_result by the setlocale() above is the 259*3d61058aSafresh1 # same as we requested, there are no complications: use that. 260*3d61058aSafresh1 if ($locale eq $cur_result) { 261*3d61058aSafresh1 $result = $cur_result; 262*3d61058aSafresh1 } 263*3d61058aSafresh1 else { 264*3d61058aSafresh1 265*3d61058aSafresh1 # But if it's different, we check if it's part of a disparate 266*3d61058aSafresh1 # LC_ALL. If so, use the input locale; if not it means the 267*3d61058aSafresh1 # input was a synonym, and we use what it maps to. 268*3d61058aSafresh1 # 269*3d61058aSafresh1 # First, if the platform uses positional notation 270*3d61058aSafresh1 if ($Config{PERL_LC_ALL_SEPARATOR}) { 271*3d61058aSafresh1 $result = (index($cur_result, $Config{PERL_LC_ALL_SEPARATOR}) 272*3d61058aSafresh1 >= 0) 273*3d61058aSafresh1 ? $locale 274*3d61058aSafresh1 : $cur_result; 275*3d61058aSafresh1 } 276*3d61058aSafresh1 else { # Must be using name=value notation 277*3d61058aSafresh1 $result = ($cur_result =~ / = .* ; /x) 278*3d61058aSafresh1 ? $locale 279*3d61058aSafresh1 : $cur_result; 280*3d61058aSafresh1 } 281*3d61058aSafresh1 } 282*3d61058aSafresh1 } 283*3d61058aSafresh1 elsif (! $has_excluded_category && $result ne $cur_result) { 284*3d61058aSafresh1 285*3d61058aSafresh1 # Some platforms will translate POSIX into C 286*3d61058aSafresh1 if (! ( ($result eq "C" && $cur_result eq "POSIX") 287*3d61058aSafresh1 || ($result eq "POSIX" && $cur_result eq "C"))) 288*3d61058aSafresh1 { 289*3d61058aSafresh1 # But otherwise if the new result for this category doesn't 290*3d61058aSafresh1 # match what we already have for a previous category for this 291*3d61058aSafresh1 # same input locale, it's problematic, so discard this whole 292*3d61058aSafresh1 # locale. 293*3d61058aSafresh1 return; 294*3d61058aSafresh1 } 295*3d61058aSafresh1 } 296e0680481Safresh1 } 297e0680481Safresh1 298*3d61058aSafresh1 push @$list, $result; 2996fb12b70Safresh1} 3006fb12b70Safresh1 301b8851fccSafresh1sub _decode_encodings { # For use only by other functions in this file! 3026fb12b70Safresh1 my @enc; 3036fb12b70Safresh1 3046fb12b70Safresh1 foreach (split(/ /, shift)) { 3056fb12b70Safresh1 if (/^(\d+)$/) { 3066fb12b70Safresh1 push @enc, "ISO8859-$1"; 307*3d61058aSafresh1 push @enc, "ISO-8859-$1"; 3086fb12b70Safresh1 push @enc, "iso8859$1"; # HP 3096fb12b70Safresh1 if ($1 eq '1') { 3106fb12b70Safresh1 push @enc, "roman8"; # HP 3116fb12b70Safresh1 } 3126fb12b70Safresh1 push @enc, $_; 3136fb12b70Safresh1 push @enc, "$_.UTF-8"; 3146fb12b70Safresh1 push @enc, "$_.65001"; # Windows UTF-8 3156fb12b70Safresh1 } 3166fb12b70Safresh1 } 3176fb12b70Safresh1 if ($^O eq 'os390') { 3186fb12b70Safresh1 push @enc, qw(IBM-037 IBM-819 IBM-1047); 3196fb12b70Safresh1 } 3206fb12b70Safresh1 push @enc, "UTF-8"; 3216fb12b70Safresh1 push @enc, "65001"; # Windows UTF-8 322*3d61058aSafresh1 push @enc, "1252"; # Windows 3236fb12b70Safresh1 3246fb12b70Safresh1 return @enc; 3256fb12b70Safresh1} 3266fb12b70Safresh1 327b8851fccSafresh1sub locales_enabled(;$) { 328eac174f2Safresh1 # If no parameter is specified, the function returns 1 if there is any 329eac174f2Safresh1 # "safe" locale handling available to the caller; otherwise 0. Safeness 330eac174f2Safresh1 # is defined here as the caller operating in the main thread of a program, 331eac174f2Safresh1 # or if threaded locales are safe on the platform and Configured to be 332eac174f2Safresh1 # used. This sub is used for testing purposes, and for those, this 333eac174f2Safresh1 # definition of safety is sufficient, and necessary to get some tests to 334eac174f2Safresh1 # run on certain configurations on certain platforms. But beware that the 335eac174f2Safresh1 # main thread can change the locale of any subthreads unless 336eac174f2Safresh1 # ${^SAFE_LOCALES} is non-zero. 337b8851fccSafresh1 # 338eac174f2Safresh1 # Use the optional parameter to discover if a particular category or 339eac174f2Safresh1 # categories are available on the system. 1 is returned if the global 340eac174f2Safresh1 # criteria described in the previous paragraph are true, AND if all the 341eac174f2Safresh1 # specified categories are available on the platform and Configured to be 342eac174f2Safresh1 # used. Otherwise 0 is returned. The parameter is either a single POSIX 343eac174f2Safresh1 # locale category or a reference to a list of them. Each category must be 344eac174f2Safresh1 # its name as a string, like 'LC_TIME' (the initial 'LC_' is optional), or 345eac174f2Safresh1 # the number this platform uses to signify the category (e.g., 346eac174f2Safresh1 # 'locales_enabled(&POSIX::LC_CTYPE)' 347b8851fccSafresh1 # 348eac174f2Safresh1 # When the function returns 1 and a parameter was specified as a list 349eac174f2Safresh1 # reference, the reference will be altered on return to point to an 350eac174f2Safresh1 # equivalent list such that the categories are numeric instead of strings 351eac174f2Safresh1 # and sorted to meet the input expectations of _trylocale(). 3529f11ffb7Safresh1 # 353eac174f2Safresh1 # It is a fatal error to call this with something that isn't a known 354eac174f2Safresh1 # category to this file. If this happens, look first for a typo, and 355eac174f2Safresh1 # second if you are using a category unknown to Perl. In the latter case 356eac174f2Safresh1 # a bug report should be submitted. 357b8851fccSafresh1 358b46d8ef2Safresh1 # khw cargo-culted the '?' in the pattern on the next line. 359b46d8ef2Safresh1 return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/; 360b46d8ef2Safresh1 361b46d8ef2Safresh1 # If we can't load the POSIX XS module, we can't have locales even if they 362b46d8ef2Safresh1 # normally would be available 363b46d8ef2Safresh1 return 0 if ! defined &DynaLoader::boot_DynaLoader; 364b46d8ef2Safresh1 365eac174f2Safresh1 # Don't test locales where they aren't safe. On systems with unsafe 366eac174f2Safresh1 # threads, for the purposes of testing, we consider the main thread safe, 367eac174f2Safresh1 # and all other threads unsafe. 368eac174f2Safresh1 if (! ${^SAFE_LOCALES}) { 369eac174f2Safresh1 return 0 if $^O eq 'os390'; # Threaded locales don't work well here 370eac174f2Safresh1 require threads; 371eac174f2Safresh1 return 0 if threads->tid() != 0; 372eac174f2Safresh1 } 373eac174f2Safresh1 374eac174f2Safresh1 # If no setlocale, we need the POSIX 2008 alternatives 375b46d8ef2Safresh1 if (! $Config{d_setlocale}) { 376b46d8ef2Safresh1 return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/; 377b46d8ef2Safresh1 return 0 unless $Config{d_newlocale}; 378b46d8ef2Safresh1 return 0 unless $Config{d_uselocale}; 379b46d8ef2Safresh1 return 0 unless $Config{d_duplocale}; 380b46d8ef2Safresh1 return 0 unless $Config{d_freelocale}; 381b46d8ef2Safresh1 } 382b8851fccSafresh1 383b8851fccSafresh1 # Done with the global possibilities. Now check if any passed in category 384b8851fccSafresh1 # is disabled. 3859f11ffb7Safresh1 386eac174f2Safresh1 my $categories_ref = $_[0]; 3879f11ffb7Safresh1 my $return_categories_numbers = 0; 3889f11ffb7Safresh1 my @categories_numbers; 3899f11ffb7Safresh1 my $has_LC_ALL = 0; 3909f11ffb7Safresh1 my $has_LC_COLLATE = 0; 3919f11ffb7Safresh1 392b8851fccSafresh1 if (defined $categories_ref) { 3939f11ffb7Safresh1 my @local_categories_copy; 3949f11ffb7Safresh1 395eac174f2Safresh1 my $reftype = ref $categories_ref; 396eac174f2Safresh1 if ($reftype eq 'ARRAY') { 397eac174f2Safresh1 @local_categories_copy = @$categories_ref; 3989f11ffb7Safresh1 $return_categories_numbers = 1; 3999f11ffb7Safresh1 } 400eac174f2Safresh1 elsif ($reftype ne "") { 401eac174f2Safresh1 die "Parameter to locales_enabled() must be an ARRAY;" 402eac174f2Safresh1 . " instead you used a $reftype"; 403eac174f2Safresh1 } 4049f11ffb7Safresh1 else { # Single category passed in 4059f11ffb7Safresh1 @local_categories_copy = $categories_ref; 4069f11ffb7Safresh1 } 4079f11ffb7Safresh1 408b8851fccSafresh1 for my $category_name_or_number (@local_categories_copy) { 409b8851fccSafresh1 my $name; 410b8851fccSafresh1 my $number; 411b8851fccSafresh1 if ($category_name_or_number =~ / ^ -? \d+ $ /x) { 412b8851fccSafresh1 $number = $category_name_or_number; 413b8851fccSafresh1 die "Invalid locale category number '$number'" 414b8851fccSafresh1 unless grep { $number == $_ } keys %category_name; 415b8851fccSafresh1 $name = $category_name{$number}; 416b8851fccSafresh1 } 417b8851fccSafresh1 else { 418b8851fccSafresh1 $name = $category_name_or_number; 419b8851fccSafresh1 $name =~ s/ ^ LC_ //x; 420b8851fccSafresh1 foreach my $trial (keys %category_name) { 421b8851fccSafresh1 if ($category_name{$trial} eq $name) { 422b8851fccSafresh1 $number = $trial; 423b8851fccSafresh1 last; 424b8851fccSafresh1 } 425b8851fccSafresh1 } 426b8851fccSafresh1 die "Invalid locale category name '$name'" 427b8851fccSafresh1 unless defined $number; 428b8851fccSafresh1 } 429b8851fccSafresh1 430b8851fccSafresh1 return 0 if $number <= $max_bad_category_number 431e0680481Safresh1 || category_excluded($name); 432eac174f2Safresh1 433b8851fccSafresh1 434b8851fccSafresh1 eval "defined &POSIX::LC_$name"; 435b8851fccSafresh1 return 0 if $@; 4369f11ffb7Safresh1 4379f11ffb7Safresh1 if ($return_categories_numbers) { 4389f11ffb7Safresh1 if ($name eq 'CTYPE') { 4399f11ffb7Safresh1 unshift @categories_numbers, $number; # Always first 440b8851fccSafresh1 } 4419f11ffb7Safresh1 elsif ($name eq 'ALL') { 4429f11ffb7Safresh1 $has_LC_ALL = 1; 4439f11ffb7Safresh1 } 4449f11ffb7Safresh1 elsif ($name eq 'COLLATE') { 4459f11ffb7Safresh1 $has_LC_COLLATE = 1; 4469f11ffb7Safresh1 } 4479f11ffb7Safresh1 else { 4489f11ffb7Safresh1 push @categories_numbers, $number; 4499f11ffb7Safresh1 } 4509f11ffb7Safresh1 } 4519f11ffb7Safresh1 } 4529f11ffb7Safresh1 } 4539f11ffb7Safresh1 4549f11ffb7Safresh1 if ($return_categories_numbers) { 4559f11ffb7Safresh1 4569f11ffb7Safresh1 # COLLATE comes after all other locales except ALL, which comes last 4579f11ffb7Safresh1 if ($has_LC_COLLATE) { 4589f11ffb7Safresh1 push @categories_numbers, $category_number{'COLLATE'}; 4599f11ffb7Safresh1 } 4609f11ffb7Safresh1 if ($has_LC_ALL) { 4619f11ffb7Safresh1 push @categories_numbers, $category_number{'ALL'}; 4629f11ffb7Safresh1 } 463eac174f2Safresh1 464eac174f2Safresh1 @$categories_ref = @categories_numbers; 465b8851fccSafresh1 } 466b8851fccSafresh1 467b8851fccSafresh1 return 1; 468b8851fccSafresh1} 469b8851fccSafresh1 470b8851fccSafresh1 4719f11ffb7Safresh1sub find_locales ($;$) { 472b8851fccSafresh1 4739f11ffb7Safresh1 # Returns an array of all the locales we found on the system. If the 4749f11ffb7Safresh1 # optional 2nd parameter is non-zero, the list includes all found locales; 4759f11ffb7Safresh1 # otherwise it is restricted to those locales that play well with Perl, as 4769f11ffb7Safresh1 # far as we can easily determine. 4779f11ffb7Safresh1 # 4789f11ffb7Safresh1 # The first parameter is either a single locale category or a reference to 4799f11ffb7Safresh1 # a list of categories to find valid locales for it (or in the case of 4809f11ffb7Safresh1 # multiple) for all of them. Each category can be a name (like 'LC_ALL' 4819f11ffb7Safresh1 # or simply 'ALL') or the C enum value for the category. 4829f11ffb7Safresh1 483eac174f2Safresh1 my $input_categories = shift; 4849f11ffb7Safresh1 my $allow_incompatible = shift // 0; 4859f11ffb7Safresh1 486*3d61058aSafresh1 die ("Usage: find_locales( category | [ categories ] )") 487*3d61058aSafresh1 unless defined $input_categories; 488e0680481Safresh1 my @categories = (ref $input_categories) 489e0680481Safresh1 ? $input_categories->@* 490e0680481Safresh1 : $input_categories; 491*3d61058aSafresh1 492*3d61058aSafresh1 # If we can't use at least one of these categories, investigate further 493*3d61058aSafresh1 if (! locales_enabled(\@categories)) { 494*3d61058aSafresh1 495*3d61058aSafresh1 # Not usable at all if system doesn't have locales 496*3d61058aSafresh1 return unless locales_enabled(); 497*3d61058aSafresh1 498*3d61058aSafresh1 # Nor if any of the required categories isn't on the system 499*3d61058aSafresh1 my @on_platform = platform_locale_categories(); 500*3d61058aSafresh1 for my $category (@categories) { 501*3d61058aSafresh1 return unless grep { $category eq $_ } @on_platform; 502*3d61058aSafresh1 } 503*3d61058aSafresh1 504*3d61058aSafresh1 # Otherwise the category is on the system, but not generally usable. 505*3d61058aSafresh1 # But the two always-present locales should be usable 506*3d61058aSafresh1 return ( "C", "POSIX" ); 507*3d61058aSafresh1 } 508*3d61058aSafresh1 5099f11ffb7Safresh1 5109f11ffb7Safresh1 # Note, the subroutine call above converts the $categories into a form 5119f11ffb7Safresh1 # suitable for _trylocale(). 5126fb12b70Safresh1 5136fb12b70Safresh1 # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1" 5146fb12b70Safresh1 # and mingw32 uses said silly CRT 5156fb12b70Safresh1 # This doesn't seem to be an issue any more, at least on Windows XP, 5166fb12b70Safresh1 # so re-enable the tests for Windows XP onwards. 5176fb12b70Safresh1 my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion && 5186fb12b70Safresh1 join('.', (Win32::GetOSVersion())[1..2]) >= 5.1); 519eac174f2Safresh1 return if (($^O eq 'MSWin32' && !$winxp) 520b8851fccSafresh1 && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i); 5216fb12b70Safresh1 5229f11ffb7Safresh1 my @Locale; 523*3d61058aSafresh1 524*3d61058aSafresh1 if (@platform_locales) { 525*3d61058aSafresh1 $gathering_platform_locales = 0; 526*3d61058aSafresh1 foreach my $locale (@platform_locales) { 527*3d61058aSafresh1 _trylocale($locale, \@categories, \@Locale, $allow_incompatible); 528*3d61058aSafresh1 } 529*3d61058aSafresh1 } 530*3d61058aSafresh1 else { 531*3d61058aSafresh1 $gathering_platform_locales = 1; 532*3d61058aSafresh1 533eac174f2Safresh1 _trylocale("C", \@categories, \@Locale, $allow_incompatible); 534eac174f2Safresh1 _trylocale("POSIX", \@categories, \@Locale, $allow_incompatible); 535b46d8ef2Safresh1 53656d68f1eSafresh1 if ($Config{d_has_C_UTF8} && $Config{d_has_C_UTF8} eq 'true') { 537eac174f2Safresh1 _trylocale("C.UTF-8", \@categories, \@Locale, $allow_incompatible); 538b46d8ef2Safresh1 } 539b46d8ef2Safresh1 540*3d61058aSafresh1 # There's no point in looking at anything more if we know that 541*3d61058aSafresh1 # setlocale will return success on any garbage or non-garbage name. 542*3d61058aSafresh1 return sort @Locale 543*3d61058aSafresh1 if defined $Config{d_setlocale_accepts_any_locale_name}; 544b46d8ef2Safresh1 5459f11ffb7Safresh1 foreach (1..16) { 546*3d61058aSafresh1 _trylocale("ISO8859-$_", \@categories, \@Locale, 547*3d61058aSafresh1 $allow_incompatible); 548*3d61058aSafresh1 _trylocale("iso8859$_", \@categories, \@Locale, 549*3d61058aSafresh1 $allow_incompatible); 550*3d61058aSafresh1 _trylocale("iso8859-$_", \@categories, \@Locale, 551*3d61058aSafresh1 $allow_incompatible); 552*3d61058aSafresh1 _trylocale("iso_8859_$_", \@categories, \@Locale, 553*3d61058aSafresh1 $allow_incompatible); 554*3d61058aSafresh1 _trylocale("isolatin$_", \@categories, \@Locale, 555*3d61058aSafresh1 $allow_incompatible); 556*3d61058aSafresh1 _trylocale("isolatin-$_", \@categories, \@Locale, 557*3d61058aSafresh1 $allow_incompatible); 558*3d61058aSafresh1 _trylocale("iso_latin_$_", \@categories, \@Locale, 559*3d61058aSafresh1 $allow_incompatible); 5606fb12b70Safresh1 } 5616fb12b70Safresh1 5626fb12b70Safresh1 # Sanitize the environment so that we can run the external 'locale' 5636fb12b70Safresh1 # program without the taint mode getting grumpy. 5646fb12b70Safresh1 5656fb12b70Safresh1 # $ENV{PATH} is special in VMS. 5666fb12b70Safresh1 delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv}; 5676fb12b70Safresh1 5686fb12b70Safresh1 # Other subversive stuff. 5696fb12b70Safresh1 delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; 5706fb12b70Safresh1 5716fb12b70Safresh1 if (-x "/usr/bin/locale" 5729f11ffb7Safresh1 && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null")) 5736fb12b70Safresh1 { 5746fb12b70Safresh1 while (<LOCALES>) { 575*3d61058aSafresh1 576*3d61058aSafresh1 # It seems that /usr/bin/locale steadfastly outputs 8 bit 577*3d61058aSafresh1 # data, which ain't great when we're running this 578*3d61058aSafresh1 # testPERL_UNICODE= so that utf8 locales will cause all IO 579*3d61058aSafresh1 # hadles to default to (assume) utf8 5806fb12b70Safresh1 next unless utf8::valid($_); 5816fb12b70Safresh1 chomp; 582eac174f2Safresh1 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 5836fb12b70Safresh1 } 584*3d61058aSafresh1 5856fb12b70Safresh1 close(LOCALES); 5866fb12b70Safresh1 } elsif ($^O eq 'VMS' 5876fb12b70Safresh1 && defined($ENV{'SYS$I18N_LOCALE'}) 5886fb12b70Safresh1 && -d 'SYS$I18N_LOCALE') 5896fb12b70Safresh1 { 5906fb12b70Safresh1 # The SYS$I18N_LOCALE logical name search list was not present on 591*3d61058aSafresh1 # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later 592*3d61058aSafresh1 # versions. 5936fb12b70Safresh1 opendir(LOCALES, "SYS\$I18N_LOCALE:"); 5946fb12b70Safresh1 while ($_ = readdir(LOCALES)) { 5956fb12b70Safresh1 chomp; 596eac174f2Safresh1 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 5976fb12b70Safresh1 } 5986fb12b70Safresh1 close(LOCALES); 599*3d61058aSafresh1 } elsif ( ($^O eq 'openbsd' || $^O eq 'bitrig' ) 600*3d61058aSafresh1 && -e '/usr/share/locale') 601*3d61058aSafresh1 { 6026fb12b70Safresh1 603b8851fccSafresh1 # OpenBSD doesn't have a locale executable, so reading 604b8851fccSafresh1 # /usr/share/locale is much easier and faster than the last resort 605b8851fccSafresh1 # method. 6066fb12b70Safresh1 6076fb12b70Safresh1 opendir(LOCALES, '/usr/share/locale'); 6086fb12b70Safresh1 while ($_ = readdir(LOCALES)) { 6096fb12b70Safresh1 chomp; 610eac174f2Safresh1 _trylocale($_, \@categories, \@Locale, $allow_incompatible); 6116fb12b70Safresh1 } 6126fb12b70Safresh1 close(LOCALES); 6136fb12b70Safresh1 } else { # Final fallback. Try our list of locales hard-coded here 6146fb12b70Safresh1 6156fb12b70Safresh1 # This is going to be slow. 6166fb12b70Safresh1 my @Data; 6176fb12b70Safresh1 618*3d61058aSafresh1 # Locales whose name differs if the utf8 bit is on are stored in 619*3d61058aSafresh1 # these two files with appropriate encodings. 6209f11ffb7Safresh1 my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) 6219f11ffb7Safresh1 ? _source_location() . "/lib/locale/utf8" 6229f11ffb7Safresh1 : _source_location() . "/lib/locale/latin1"; 6239f11ffb7Safresh1 if (-e $data_file) { 6249f11ffb7Safresh1 @Data = do $data_file; 6259f11ffb7Safresh1 } 6269f11ffb7Safresh1 else { 627*3d61058aSafresh1 _my_diag(__FILE__ . ":" . __LINE__ . 628*3d61058aSafresh1 ": '$data_file' doesn't exist"); 6296fb12b70Safresh1 } 6306fb12b70Safresh1 6316fb12b70Safresh1 # The rest of the locales are in this file. 632eac174f2Safresh1 state @my_data = <DATA>; close DATA if fileno DATA; 633eac174f2Safresh1 push @Data, @my_data; 6346fb12b70Safresh1 635*3d61058aSafresh1 foreach my $default (qw(.ACP .OCP)) { 636*3d61058aSafresh1 _trylocale($default, \@categories, \@Locale, 637*3d61058aSafresh1 $allow_incompatible); 638*3d61058aSafresh1 } 639*3d61058aSafresh1 6406fb12b70Safresh1 foreach my $line (@Data) { 64156d68f1eSafresh1 chomp $line; 6426fb12b70Safresh1 my ($locale_name, $language_codes, $country_codes, $encodings) = 6436fb12b70Safresh1 split /:/, $line; 644*3d61058aSafresh1 _my_diag(__FILE__ . ":" . __LINE__ 645*3d61058aSafresh1 . ": Unexpected syntax in '$line'") 6469f11ffb7Safresh1 unless defined $locale_name; 6476fb12b70Safresh1 my @enc = _decode_encodings($encodings); 6486fb12b70Safresh1 foreach my $loc (split(/ /, $locale_name)) { 649*3d61058aSafresh1 _trylocale($loc, \@categories, \@Locale, 650*3d61058aSafresh1 $allow_incompatible); 6516fb12b70Safresh1 foreach my $enc (@enc) { 652eac174f2Safresh1 _trylocale("$loc.$enc", \@categories, \@Locale, 6539f11ffb7Safresh1 $allow_incompatible); 6546fb12b70Safresh1 } 6556fb12b70Safresh1 $loc = lc $loc; 6566fb12b70Safresh1 foreach my $enc (@enc) { 657eac174f2Safresh1 _trylocale("$loc.$enc", \@categories, \@Locale, 6589f11ffb7Safresh1 $allow_incompatible); 6596fb12b70Safresh1 } 6606fb12b70Safresh1 } 6616fb12b70Safresh1 foreach my $lang (split(/ /, $language_codes)) { 662*3d61058aSafresh1 _trylocale($lang, \@categories, \@Locale, 663*3d61058aSafresh1 $allow_incompatible); 6646fb12b70Safresh1 foreach my $country (split(/ /, $country_codes)) { 6656fb12b70Safresh1 my $lc = "${lang}_${country}"; 666*3d61058aSafresh1 _trylocale($lc, \@categories, \@Locale, 667*3d61058aSafresh1 $allow_incompatible); 6686fb12b70Safresh1 foreach my $enc (@enc) { 669eac174f2Safresh1 _trylocale("$lc.$enc", \@categories, \@Locale, 6709f11ffb7Safresh1 $allow_incompatible); 6716fb12b70Safresh1 } 6726fb12b70Safresh1 my $lC = "${lang}_\U${country}"; 673*3d61058aSafresh1 _trylocale($lC, \@categories, \@Locale, 674*3d61058aSafresh1 $allow_incompatible); 6756fb12b70Safresh1 foreach my $enc (@enc) { 676eac174f2Safresh1 _trylocale("$lC.$enc", \@categories, \@Locale, 6779f11ffb7Safresh1 $allow_incompatible); 6786fb12b70Safresh1 } 6796fb12b70Safresh1 } 6806fb12b70Safresh1 } 6816fb12b70Safresh1 } 6826fb12b70Safresh1 } 683*3d61058aSafresh1 } 6846fb12b70Safresh1 685*3d61058aSafresh1 my %Locale; 686*3d61058aSafresh1 $Locale{$_} = 1 for @Locale; 687*3d61058aSafresh1 @Locale = sort keys %Locale; 6886fb12b70Safresh1 6896fb12b70Safresh1 return @Locale; 6906fb12b70Safresh1} 6916fb12b70Safresh1 6926fb12b70Safresh1sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input 6936fb12b70Safresh1 # is a UTF-8 locale 694b8851fccSafresh1 695b8851fccSafresh1 # On z/OS, even locales marked as UTF-8 aren't. 696b8851fccSafresh1 return 0 if ord "A" != 65; 697b8851fccSafresh1 698b8851fccSafresh1 return 0 unless locales_enabled('LC_CTYPE'); 699b8851fccSafresh1 7006fb12b70Safresh1 my $locale = shift; 7016fb12b70Safresh1 702b8851fccSafresh1 no warnings 'locale'; # We may be trying out a weird locale 703e0680481Safresh1 use locale; 7046fb12b70Safresh1 7056fb12b70Safresh1 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 7066fb12b70Safresh1 if (! $save_locale) { 707e0680481Safresh1 _my_fail("Verify could save previous locale"); 7086fb12b70Safresh1 return 0; 7096fb12b70Safresh1 } 7106fb12b70Safresh1 7116fb12b70Safresh1 if (! setlocale(&POSIX::LC_CTYPE(), $locale)) { 712e0680481Safresh1 _my_fail("Verify could setlocale to $locale"); 7136fb12b70Safresh1 return 0; 7146fb12b70Safresh1 } 7156fb12b70Safresh1 7166fb12b70Safresh1 my $ret = 0; 7176fb12b70Safresh1 7186fb12b70Safresh1 # Use an op that gives different results for UTF-8 than any other locale. 7196fb12b70Safresh1 # If a platform has UTF-8 locales, there should be at least one locale on 7206fb12b70Safresh1 # most platforms with UTF-8 in its name, so if there is a bug in the op 7216fb12b70Safresh1 # giving a false negative, we should get a failure for those locales as we 7226fb12b70Safresh1 # go through testing all the locales on the platform. 7236fb12b70Safresh1 if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") { 7246fb12b70Safresh1 if ($locale =~ /UTF-?8/i) { 725e0680481Safresh1 _my_fail("Verify $locale with UTF-8 in name is a UTF-8 locale"); 7266fb12b70Safresh1 } 7276fb12b70Safresh1 } 7286fb12b70Safresh1 else { 7296fb12b70Safresh1 $ret = 1; 7306fb12b70Safresh1 } 7316fb12b70Safresh1 7326fb12b70Safresh1 die "Couldn't restore locale '$save_locale'" 7336fb12b70Safresh1 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 7346fb12b70Safresh1 7356fb12b70Safresh1 return $ret; 7366fb12b70Safresh1} 7376fb12b70Safresh1 738e0680481Safresh1sub classify_locales_wrt_utf8ness($) { 739e0680481Safresh1 740e0680481Safresh1 # Takes the input list of locales, and returns two lists split apart from 741e0680481Safresh1 # it: the UTF-8 ones, and the non-UTF-8 ones. 742e0680481Safresh1 743e0680481Safresh1 my $locales_ref = shift; 744e0680481Safresh1 my (@utf8, @non_utf8); 745e0680481Safresh1 746e0680481Safresh1 if (! locales_enabled('LC_CTYPE')) { # No CTYPE implies all are non-UTF-8 747e0680481Safresh1 @non_utf8 = $locales_ref->@*; 748e0680481Safresh1 return ( \@utf8, \@non_utf8 ); 749e0680481Safresh1 } 750e0680481Safresh1 751e0680481Safresh1 foreach my $locale (@$locales_ref) { 752e0680481Safresh1 my $which = (is_locale_utf8($locale)) ? \@utf8 : \@non_utf8; 753e0680481Safresh1 push $which->@*, $locale; 754e0680481Safresh1 } 755e0680481Safresh1 756e0680481Safresh1 return ( \@utf8, \@non_utf8 ); 757e0680481Safresh1} 758e0680481Safresh1 759e0680481Safresh1sub find_utf8_ctype_locales (;$) { 760e0680481Safresh1 761e0680481Safresh1 # Return the names of the locales that core Perl thinks are UTF-8 LC_CTYPE 762e0680481Safresh1 # locales. Optional parameter is a reference to a list of locales to try; 763e0680481Safresh1 # if omitted, this tries all locales it can find on the platform 764e0680481Safresh1 765b8851fccSafresh1 return unless locales_enabled('LC_CTYPE'); 766b8851fccSafresh1 7676fb12b70Safresh1 my $locales_ref = shift; 7686fb12b70Safresh1 if (! defined $locales_ref) { 769b8851fccSafresh1 7709f11ffb7Safresh1 my @locales = find_locales(&POSIX::LC_CTYPE()); 7716fb12b70Safresh1 $locales_ref = \@locales; 7726fb12b70Safresh1 } 7736fb12b70Safresh1 774e0680481Safresh1 my ($utf8_ref, undef) = classify_locales_wrt_utf8ness($locales_ref); 775e0680481Safresh1 return unless $utf8_ref; 776e0680481Safresh1 return $utf8_ref->@*; 777b46d8ef2Safresh1} 778b46d8ef2Safresh1 779b46d8ef2Safresh1sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl 780b46d8ef2Safresh1 # thinks is a UTF-8 LC_CTYPE non-turkic 781b46d8ef2Safresh1 # locale. 782b46d8ef2Safresh1 # Optional parameter is a reference to a 783b46d8ef2Safresh1 # list of locales to try; if omitted, this 784b46d8ef2Safresh1 # tries all locales it can find on the 785b46d8ef2Safresh1 # platform 786b46d8ef2Safresh1 my $try_locales_ref = shift; 787b46d8ef2Safresh1 788b46d8ef2Safresh1 my @utf8_locales = find_utf8_ctype_locales($try_locales_ref); 789b46d8ef2Safresh1 my @turkic_locales = find_utf8_turkic_locales($try_locales_ref); 790b46d8ef2Safresh1 791b46d8ef2Safresh1 my %seen_turkic; 792b46d8ef2Safresh1 793b46d8ef2Safresh1 # Create undef elements in the hash for turkic locales 794b46d8ef2Safresh1 @seen_turkic{@turkic_locales} = (); 795b46d8ef2Safresh1 796b46d8ef2Safresh1 foreach my $locale (@utf8_locales) { 797b46d8ef2Safresh1 return $locale unless exists $seen_turkic{$locale}; 7986fb12b70Safresh1 } 7996fb12b70Safresh1 8006fb12b70Safresh1 return; 8016fb12b70Safresh1} 8026fb12b70Safresh1 803b46d8ef2Safresh1sub find_utf8_turkic_locales (;$) { 804b46d8ef2Safresh1 805b46d8ef2Safresh1 # Return the name of all the locales that core Perl thinks are UTF-8 806b46d8ef2Safresh1 # Turkic LC_CTYPE. Optional parameter is a reference to a list of locales 807b46d8ef2Safresh1 # to try; if omitted, this tries all locales it can find on the platform 808b46d8ef2Safresh1 809b46d8ef2Safresh1 my @return; 810b46d8ef2Safresh1 811b46d8ef2Safresh1 return unless locales_enabled('LC_CTYPE'); 812b46d8ef2Safresh1 813b46d8ef2Safresh1 my $save_locale = setlocale(&POSIX::LC_CTYPE()); 814b46d8ef2Safresh1 foreach my $locale (find_utf8_ctype_locales(shift)) { 815b46d8ef2Safresh1 use locale; 816b46d8ef2Safresh1 setlocale(&POSIX::LC_CTYPE(), $locale); 817b46d8ef2Safresh1 push @return, $locale if uc('i') eq "\x{130}"; 818b46d8ef2Safresh1 } 819e0680481Safresh1 820e0680481Safresh1 die "Couldn't restore locale '$save_locale'" 821e0680481Safresh1 unless setlocale(&POSIX::LC_CTYPE(), $save_locale); 822b46d8ef2Safresh1 823b46d8ef2Safresh1 return @return; 824b46d8ef2Safresh1} 825b46d8ef2Safresh1 826b46d8ef2Safresh1sub find_utf8_turkic_locale (;$) { 827b46d8ef2Safresh1 my @turkics = find_utf8_turkic_locales(shift); 828b46d8ef2Safresh1 829b46d8ef2Safresh1 return unless @turkics; 830b46d8ef2Safresh1 return $turkics[0] 831b46d8ef2Safresh1} 832b46d8ef2Safresh1 833b46d8ef2Safresh1 8349f11ffb7Safresh1# returns full path to the directory containing the current source 8359f11ffb7Safresh1# file, inspired by mauke's Dir::Self 8369f11ffb7Safresh1sub _source_location { 8379f11ffb7Safresh1 require File::Spec; 8389f11ffb7Safresh1 8399f11ffb7Safresh1 my $caller_filename = (caller)[1]; 8409f11ffb7Safresh1 8419f11ffb7Safresh1 my $loc = File::Spec->rel2abs( 8429f11ffb7Safresh1 File::Spec->catpath( 8439f11ffb7Safresh1 (File::Spec->splitpath($caller_filename))[0, 1], '' 8449f11ffb7Safresh1 ) 8459f11ffb7Safresh1 ); 8469f11ffb7Safresh1 8479f11ffb7Safresh1 return ($loc =~ /^(.*)$/)[0]; # untaint 8489f11ffb7Safresh1} 8499f11ffb7Safresh1 8506fb12b70Safresh11 8516fb12b70Safresh1 8526fb12b70Safresh1# Format of data is: locale_name, language_codes, country_codes, encodings 8536fb12b70Safresh1__DATA__ 8546fb12b70Safresh1Afrikaans:af:za:1 15 8556fb12b70Safresh1Arabic:ar:dz eg sa:6 arabic8 8566fb12b70Safresh1Brezhoneg Breton:br:fr:1 15 8576fb12b70Safresh1Bulgarski Bulgarian:bg:bg:5 8586fb12b70Safresh1Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC 8596fb12b70Safresh1Hrvatski Croatian:hr:hr:2 8606fb12b70Safresh1Cymraeg Welsh:cy:cy:1 14 15 8616fb12b70Safresh1Czech:cs:cz:2 8626fb12b70Safresh1Dansk Danish:da:dk:1 15 8636fb12b70Safresh1Nederlands Dutch:nl:be nl:1 15 8646fb12b70Safresh1English American British:en:au ca gb ie nz us uk zw:1 15 cp850 8656fb12b70Safresh1Esperanto:eo:eo:3 8666fb12b70Safresh1Eesti Estonian:et:ee:4 6 13 8676fb12b70Safresh1Suomi Finnish:fi:fi:1 15 8686fb12b70Safresh1Flamish::fl:1 15 8696fb12b70Safresh1Deutsch German:de:at be ch de lu:1 15 8706fb12b70Safresh1Euskaraz Basque:eu:es fr:1 15 8716fb12b70Safresh1Galego Galician:gl:es:1 15 8726fb12b70Safresh1Ellada Greek:el:gr:7 g8 8736fb12b70Safresh1Frysk:fy:nl:1 15 8746fb12b70Safresh1Greenlandic:kl:gl:4 6 8756fb12b70Safresh1Hebrew:iw:il:8 hebrew8 8766fb12b70Safresh1Hungarian:hu:hu:2 8776fb12b70Safresh1Indonesian:id:id:1 15 8786fb12b70Safresh1Gaeilge Irish:ga:IE:1 14 15 8796fb12b70Safresh1Italiano Italian:it:ch it:1 15 8806fb12b70Safresh1Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis 8816fb12b70Safresh1Korean:ko:kr: 8826fb12b70Safresh1Latine Latin:la:va:1 15 8836fb12b70Safresh1Latvian:lv:lv:4 6 13 8846fb12b70Safresh1Lithuanian:lt:lt:4 6 13 8856fb12b70Safresh1Macedonian:mk:mk:1 15 8866fb12b70Safresh1Maltese:mt:mt:3 8876fb12b70Safresh1Moldovan:mo:mo:2 8886fb12b70Safresh1Norsk Norwegian:no no\@nynorsk nb nn:no:1 15 8896fb12b70Safresh1Occitan:oc:es:1 15 8906fb12b70Safresh1Polski Polish:pl:pl:2 8916fb12b70Safresh1Rumanian:ro:ro:2 8926fb12b70Safresh1Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866 8936fb12b70Safresh1Serbski Serbian:sr:yu:5 8946fb12b70Safresh1Slovak:sk:sk:2 8956fb12b70Safresh1Slovene Slovenian:sl:si:2 8966fb12b70Safresh1Sqhip Albanian:sq:sq:1 15 8976fb12b70Safresh1Svenska Swedish:sv:fi se:1 15 8986fb12b70Safresh1Thai:th:th:11 tis620 8996fb12b70Safresh1Turkish:tr:tr:9 turkish8 9006fb12b70Safresh1Yiddish:yi::1 15 901