xref: /openbsd/gnu/usr.bin/perl/t/loc_tools.pl (revision 3d61058a)
156d68f1eSafresh1# Common tools for test files to find the locales which exist on the
29f11ffb7Safresh1# system.  Caller should have verified that this isn't miniperl before calling
36fb12b70Safresh1# the functions.
46fb12b70Safresh1
56fb12b70Safresh1# Note that it's okay that some languages have their native names
66fb12b70Safresh1# capitalized here even though that's not "right".  They are lowercased
76fb12b70Safresh1# anyway later during the scanning process (and besides, some clueless
86fb12b70Safresh1# vendor might have them capitalized erroneously anyway).
96fb12b70Safresh1
10b8851fccSafresh1# Functions whose names begin with underscore are internal helper functions
11b8851fccSafresh1# for this file, and are not to be used by outside callers.
12b8851fccSafresh1
139f11ffb7Safresh1use Config;
149f11ffb7Safresh1use strict;
15eac174f2Safresh1use warnings;
16eac174f2Safresh1use feature 'state';
179f11ffb7Safresh1
18e0680481Safresh1my %known_bad_locales = (   # XXX eventually will need version info if and
19e0680481Safresh1                            # when these get fixed.
20e0680481Safresh1    solaris => [ 'vi_VN.UTF-8', ],  # Use of U+A8 segfaults: GH #20578
21e0680481Safresh1);
22e0680481Safresh1
23b8851fccSafresh1eval { require POSIX; import POSIX 'locale_h'; };
249f11ffb7Safresh1my $has_locale_h = ! $@;
259f11ffb7Safresh1
269f11ffb7Safresh1my @known_categories = ( qw(LC_ALL LC_COLLATE LC_CTYPE LC_MESSAGES LC_MONETARY
279f11ffb7Safresh1                            LC_NUMERIC LC_TIME LC_ADDRESS LC_IDENTIFICATION
28eac174f2Safresh1                            LC_MEASUREMENT LC_PAPER LC_TELEPHONE LC_SYNTAX
29e0680481Safresh1                            LC_TOD LC_NAME));
309f11ffb7Safresh1my @platform_categories;
319f11ffb7Safresh1
32*3d61058aSafresh1my $has_excluded_category = $Config{ccflags} =~ /\bD?NO_LOCALE_/;
33e0680481Safresh1sub category_excluded($) {
34eac174f2Safresh1    my $cat_name = shift =~ s/^LC_//r;
35eac174f2Safresh1
36eac174f2Safresh1    # Recognize Configure option to exclude a category
37*3d61058aSafresh1    return $has_excluded_category
38*3d61058aSafresh1        && $Config{ccflags} =~ /\bD?NO_LOCALE_$cat_name\b/;
39eac174f2Safresh1}
40eac174f2Safresh1
419f11ffb7Safresh1# LC_ALL can be -1 on some platforms.  And, in fact the implementors could
429f11ffb7Safresh1# legally use any integer to represent any category.  But it makes the most
439f11ffb7Safresh1# sense for them to have used small integers.  Below, we create new locale
449f11ffb7Safresh1# numbers for ones missing from this machine.  We make them very negative,
459f11ffb7Safresh1# hopefully more negative than anything likely to be a valid category on the
469f11ffb7Safresh1# platform, but also below is a check to be sure that our guess is valid.
479f11ffb7Safresh1my $max_bad_category_number = -1000000;
489f11ffb7Safresh1
499f11ffb7Safresh1# Initialize this hash so that it looks like e.g.,
509f11ffb7Safresh1#   6 => 'CTYPE',
519f11ffb7Safresh1# where 6 is the value of &POSIX::LC_CTYPE
529f11ffb7Safresh1my %category_name;
539f11ffb7Safresh1my %category_number;
549f11ffb7Safresh1if ($has_locale_h) {
559f11ffb7Safresh1    my $number_for_missing_category = $max_bad_category_number;
569f11ffb7Safresh1    foreach my $name (@known_categories) {
579f11ffb7Safresh1        my $number = eval "&POSIX::$name";
589f11ffb7Safresh1        if ($@) {
599f11ffb7Safresh1            # Use a negative number (smaller than any legitimate category
609f11ffb7Safresh1            # number) if the platform doesn't support this category, so we
619f11ffb7Safresh1            # have an entry for all the ones that might be specified in calls
629f11ffb7Safresh1            # to us.
63eac174f2Safresh1            $number = $number_for_missing_category--;
649f11ffb7Safresh1        }
659f11ffb7Safresh1        elsif (   $number !~ / ^ -? \d+ $ /x
669f11ffb7Safresh1               || $number <=  $max_bad_category_number)
679f11ffb7Safresh1        {
689f11ffb7Safresh1            # We think this should be an int.  And it has to be larger than
699f11ffb7Safresh1            # any of our synthetic numbers.
709f11ffb7Safresh1            die "Unexpected locale category number '$number' for $name"
719f11ffb7Safresh1        }
729f11ffb7Safresh1        else {
739f11ffb7Safresh1            push @platform_categories, $name;
749f11ffb7Safresh1        }
759f11ffb7Safresh1
769f11ffb7Safresh1        $name =~ s/LC_//;
779f11ffb7Safresh1        $category_name{$number} = "$name";
789f11ffb7Safresh1        $category_number{$name} = $number;
799f11ffb7Safresh1    }
809f11ffb7Safresh1}
819f11ffb7Safresh1
829f11ffb7Safresh1sub _my_diag($) {
839f11ffb7Safresh1    my $message = shift;
849f11ffb7Safresh1    if (defined &main::diag) {
859f11ffb7Safresh1        diag($message);
869f11ffb7Safresh1    }
879f11ffb7Safresh1    else {
889f11ffb7Safresh1        local($\, $", $,) = (undef, ' ', '');
899f11ffb7Safresh1        print STDERR $message, "\n";
909f11ffb7Safresh1    }
919f11ffb7Safresh1}
929f11ffb7Safresh1
93e0680481Safresh1# Larger than any real test
94e0680481Safresh1my $my_count = 1_000_000;
95e0680481Safresh1
969f11ffb7Safresh1sub _my_fail($) {
979f11ffb7Safresh1    my $message = shift;
989f11ffb7Safresh1    if (defined &main::fail) {
999f11ffb7Safresh1        fail($message);
1009f11ffb7Safresh1    }
1019f11ffb7Safresh1    else {
1029f11ffb7Safresh1        local($\, $", $,) = (undef, ' ', '');
103e0680481Safresh1        print "not ok " . $my_count++ . $message . "\n";
1049f11ffb7Safresh1    }
1059f11ffb7Safresh1}
106b8851fccSafresh1
107*3d61058aSafresh1sub platform_locale_categories() {
108*3d61058aSafresh1    return @platform_categories;
109*3d61058aSafresh1}
110*3d61058aSafresh1
111e0680481Safresh1sub valid_locale_categories() {
112e0680481Safresh1    # Returns a list of the locale categories (expressed as strings, like
113*3d61058aSafresh1    # "LC_ALL") known to this program that are available on this platform.
114e0680481Safresh1
115e0680481Safresh1    return grep { ! category_excluded($_) } @platform_categories;
116e0680481Safresh1}
117e0680481Safresh1
118e0680481Safresh1sub is_category_valid($) {
119e0680481Safresh1    my $name = shift;
120e0680481Safresh1    $name = 'LC_' . $name =~ s/^LC_//r;
121e0680481Safresh1    return grep { $name eq $_ } valid_locale_categories();
122e0680481Safresh1}
123e0680481Safresh1
124e0680481Safresh1# It turns out that strings generated under the control of a given locale
125e0680481Safresh1# category are often affected as well by LC_CTYPE.  If the two categories
126e0680481Safresh1# don't match, one can get mojibake or even core dumps.  (khw thinks it more
127e0680481Safresh1# likely that it's the code set, not the locale that's critical here; but
128e0680481Safresh1# didn't run experiments to verify this.)  Hence, in the code below, CTYPE and
129e0680481Safresh1# the tested categories are all set to the same locale.  If CTYPE isn't
130e0680481Safresh1# available on the platform, LC_ALL is instead used.  One might think to just
131e0680481Safresh1# use LC_ALL all the time, but on Windows
132e0680481Safresh1#    setlocale(LC_ALL, "some_borked_locale")
133e0680481Safresh1# can return success, whereas setting LC_CTYPE to it fails.
134e0680481Safresh1my $master_category;
135e0680481Safresh1$master_category = $category_number{'CTYPE'}
136e0680481Safresh1        if is_category_valid('LC_CTYPE') && defined $category_number{'CTYPE'};
137e0680481Safresh1$master_category = $category_number{'ALL'}
138e0680481Safresh1        if ! defined $master_category
139e0680481Safresh1          && is_category_valid('LC_ALL') && defined $category_number{'ALL'};
140e0680481Safresh1
141*3d61058aSafresh1my @platform_locales;       # cache of locales found on this platform
142*3d61058aSafresh1my $gathering_platform_locales = 0; # Should we gather locales, or use the
143*3d61058aSafresh1                                    # cache?
144*3d61058aSafresh1my %seen;   # Used to avoid duplicates
145*3d61058aSafresh1
146b8851fccSafresh1sub _trylocale ($$$$) { # For use only by other functions in this file!
147b8851fccSafresh1
148b8851fccSafresh1    # Adds the locale given by the first parameter to the list given by the
1499f11ffb7Safresh1    # 3rd iff the platform supports the locale in each of the category numbers
1509f11ffb7Safresh1    # given by the 2nd parameter, which is either a single category or a
151eac174f2Safresh1    # reference to a list of categories.
1529f11ffb7Safresh1    #
1539f11ffb7Safresh1    # The 4th parameter is true if to accept locales that aren't apparently
1549f11ffb7Safresh1    # fully compatible with Perl.
155b8851fccSafresh1
1566fb12b70Safresh1    my $locale = shift;
1576fb12b70Safresh1    my $categories = shift;
1586fb12b70Safresh1    my $list = shift;
1599f11ffb7Safresh1    my $allow_incompatible = shift;
160b8851fccSafresh1
161e0680481Safresh1    my $normalized_locale = lc ($locale =~ s/\W//gr);
162e0680481Safresh1    return if ! $locale || grep { $normalized_locale eq lc ($_ =~ s/\W//gr) } @$list;
1636fb12b70Safresh1
1649f11ffb7Safresh1    # This is a toy (pig latin) locale that is not fully implemented on some
1659f11ffb7Safresh1    # systems
1669f11ffb7Safresh1    return if $locale =~ / ^ pig $ /ix;
1679f11ffb7Safresh1
168b46d8ef2Safresh1    # Certain platforms have a crippled locale system in which setlocale
169b46d8ef2Safresh1    # returns success for just about any possible locale name, but if anything
170b46d8ef2Safresh1    # actually happens as a result of the call, it is that the underlying
171b46d8ef2Safresh1    # locale is set to a system default, likely C or C.UTF-8.  We can't test
172b46d8ef2Safresh1    # such systems fully, but we shouldn't disable the user from using
173b46d8ef2Safresh1    # locales, as it may work out for them (or not).
174b46d8ef2Safresh1    return if    defined $Config{d_setlocale_accepts_any_locale_name}
175e0680481Safresh1              && $locale !~ / ^ (?: C | POSIX | C\.UTF-?8 ) $/ix;
176e0680481Safresh1
177e0680481Safresh1    if (exists $known_bad_locales{$^O}) {
178e0680481Safresh1        my @bad_locales = $known_bad_locales{$^O}->@*;
179e0680481Safresh1        return if grep { $locale eq $_ } @bad_locales;
180e0680481Safresh1    }
1819f11ffb7Safresh1
1826fb12b70Safresh1
183b8851fccSafresh1    my $badutf8 = 0;
184b8851fccSafresh1    my $plays_well = 1;
185e0680481Safresh1    my $unsupported = 0;
186b8851fccSafresh1
187b8851fccSafresh1    use warnings 'locale';
188b8851fccSafresh1
189b8851fccSafresh1    local $SIG{__WARN__} = sub {
1909f11ffb7Safresh1        $badutf8 = 1 if grep { /Malformed UTF-8/ } @_;
191e0680481Safresh1        $unsupported = 1 if grep { /Locale .* is unsupported/i } @_;
1929f11ffb7Safresh1        $plays_well = 0 if grep {
193e0680481Safresh1                    /The following characters .* may not have the same meaning as the Perl program expects(?#
1949f11ffb7Safresh1                   )|The Perl program will use the expected meanings/i
1959f11ffb7Safresh1            } @_;
196b8851fccSafresh1    };
197b8851fccSafresh1
198*3d61058aSafresh1    my $result;
199*3d61058aSafresh1    my @category_list;
200*3d61058aSafresh1    if (defined $categories) {
201*3d61058aSafresh1        $categories = [ $categories ] unless ref $categories;
202*3d61058aSafresh1        push @category_list, $categories->@*;
203*3d61058aSafresh1    }
204e0680481Safresh1
205*3d61058aSafresh1    # Make the master category first thing on the list; adding it if necessary
206*3d61058aSafresh1    if (defined $master_category) {
207*3d61058aSafresh1        @category_list =  grep { $_ != $master_category } @category_list;
208*3d61058aSafresh1        unshift @category_list, $master_category;
209*3d61058aSafresh1    }
210*3d61058aSafresh1
211*3d61058aSafresh1    foreach my $category (@category_list) {
212e0680481Safresh1        my $save_locale = setlocale($category);
213e0680481Safresh1        if (! $save_locale) {
214e0680481Safresh1            _my_fail("Verify could save previous locale");
215e0680481Safresh1            return;
216e0680481Safresh1        }
217e0680481Safresh1
2189f11ffb7Safresh1        # Incompatible locales aren't warned about unless using locales.
2199f11ffb7Safresh1        use locale;
2209f11ffb7Safresh1
221*3d61058aSafresh1        my $cur_result = setlocale($category, $locale);
222*3d61058aSafresh1        return unless defined $cur_result;
2239f11ffb7Safresh1
224e0680481Safresh1        no locale;
2256fb12b70Safresh1
226*3d61058aSafresh1        if (   $gathering_platform_locales
227*3d61058aSafresh1            && $category eq $master_category
228*3d61058aSafresh1            && ! $seen{$locale})
229*3d61058aSafresh1        {
230*3d61058aSafresh1            push @platform_locales, $locale;
231*3d61058aSafresh1            $seen{$locale}++;
232*3d61058aSafresh1        }
233*3d61058aSafresh1
234e0680481Safresh1        # We definitely don't want the locale set to something that is
235e0680481Safresh1        # unsupported
236e0680481Safresh1        if (! setlocale($category, $save_locale)) {
237e0680481Safresh1            my $error_text = "\$!=$!";
238e0680481Safresh1            $error_text .= "; \$^E=$^E" if $^E != $!;
239e0680481Safresh1            die "Couldn't restore locale '$save_locale', category $category;"
240e0680481Safresh1              . $error_text;
241e0680481Safresh1        }
2426fb12b70Safresh1        if ($badutf8) {
2439f11ffb7Safresh1            _my_fail("Verify locale name doesn't contain malformed utf8");
2446fb12b70Safresh1            return;
2456fb12b70Safresh1        }
246e0680481Safresh1
247e0680481Safresh1        return if $unsupported;
248e0680481Safresh1
249e0680481Safresh1        # Commas in locale names are bad in Windows, and there is a bug in
250e0680481Safresh1        # some versions where setlocale() turns a legal input locale name into
251e0680481Safresh1        # an illegal return value, which it can't later parse.
252*3d61058aSafresh1        return if $cur_result =~ /,/;
253e0680481Safresh1
254e0680481Safresh1        return unless $plays_well || $allow_incompatible;
255*3d61058aSafresh1
256*3d61058aSafresh1        if (! defined $result) {    # First time
257*3d61058aSafresh1
258*3d61058aSafresh1            # If the name returned as $cur_result by the setlocale() above is the
259*3d61058aSafresh1            # same as we requested, there are no complications: use that.
260*3d61058aSafresh1            if ($locale eq $cur_result) {
261*3d61058aSafresh1                $result = $cur_result;
262*3d61058aSafresh1            }
263*3d61058aSafresh1            else {
264*3d61058aSafresh1
265*3d61058aSafresh1                # But if it's different, we check if it's part of a disparate
266*3d61058aSafresh1                # LC_ALL.  If so, use the input locale; if not it means the
267*3d61058aSafresh1                # input was a synonym, and we use what it maps to.
268*3d61058aSafresh1                #
269*3d61058aSafresh1                # First, if the platform uses positional notation
270*3d61058aSafresh1                if ($Config{PERL_LC_ALL_SEPARATOR}) {
271*3d61058aSafresh1                    $result = (index($cur_result, $Config{PERL_LC_ALL_SEPARATOR})
272*3d61058aSafresh1                                                                            >= 0)
273*3d61058aSafresh1                              ? $locale
274*3d61058aSafresh1                              : $cur_result;
275*3d61058aSafresh1                }
276*3d61058aSafresh1                else {  # Must be using name=value notation
277*3d61058aSafresh1                    $result = ($cur_result =~ / = .* ; /x)
278*3d61058aSafresh1                            ? $locale
279*3d61058aSafresh1                            : $cur_result;
280*3d61058aSafresh1                }
281*3d61058aSafresh1            }
282*3d61058aSafresh1        }
283*3d61058aSafresh1        elsif (! $has_excluded_category && $result ne $cur_result) {
284*3d61058aSafresh1
285*3d61058aSafresh1            # Some platforms will translate POSIX into C
286*3d61058aSafresh1            if (! (   ($result eq "C" && $cur_result eq "POSIX")
287*3d61058aSafresh1                   || ($result eq "POSIX" && $cur_result eq "C")))
288*3d61058aSafresh1            {
289*3d61058aSafresh1                # But otherwise if the new result for this category doesn't
290*3d61058aSafresh1                # match what we already have for a previous category for this
291*3d61058aSafresh1                # same input locale, it's problematic, so discard this whole
292*3d61058aSafresh1                # locale.
293*3d61058aSafresh1                return;
294*3d61058aSafresh1            }
295*3d61058aSafresh1        }
296e0680481Safresh1    }
297e0680481Safresh1
298*3d61058aSafresh1    push @$list, $result;
2996fb12b70Safresh1}
3006fb12b70Safresh1
301b8851fccSafresh1sub _decode_encodings { # For use only by other functions in this file!
3026fb12b70Safresh1    my @enc;
3036fb12b70Safresh1
3046fb12b70Safresh1    foreach (split(/ /, shift)) {
3056fb12b70Safresh1	if (/^(\d+)$/) {
3066fb12b70Safresh1	    push @enc, "ISO8859-$1";
307*3d61058aSafresh1	    push @enc, "ISO-8859-$1";
3086fb12b70Safresh1	    push @enc, "iso8859$1";	# HP
3096fb12b70Safresh1	    if ($1 eq '1') {
3106fb12b70Safresh1		 push @enc, "roman8";	# HP
3116fb12b70Safresh1	    }
3126fb12b70Safresh1	    push @enc, $_;
3136fb12b70Safresh1            push @enc, "$_.UTF-8";
3146fb12b70Safresh1            push @enc, "$_.65001"; # Windows UTF-8
3156fb12b70Safresh1	}
3166fb12b70Safresh1    }
3176fb12b70Safresh1    if ($^O eq 'os390') {
3186fb12b70Safresh1	push @enc, qw(IBM-037 IBM-819 IBM-1047);
3196fb12b70Safresh1    }
3206fb12b70Safresh1    push @enc, "UTF-8";
3216fb12b70Safresh1    push @enc, "65001"; # Windows UTF-8
322*3d61058aSafresh1    push @enc, "1252";  # Windows
3236fb12b70Safresh1
3246fb12b70Safresh1    return @enc;
3256fb12b70Safresh1}
3266fb12b70Safresh1
327b8851fccSafresh1sub locales_enabled(;$) {
328eac174f2Safresh1    # If no parameter is specified, the function returns 1 if there is any
329eac174f2Safresh1    # "safe" locale handling available to the caller; otherwise 0.  Safeness
330eac174f2Safresh1    # is defined here as the caller operating in the main thread of a program,
331eac174f2Safresh1    # or if threaded locales are safe on the platform and Configured to be
332eac174f2Safresh1    # used.  This sub is used for testing purposes, and for those, this
333eac174f2Safresh1    # definition of safety is sufficient, and necessary to get some tests to
334eac174f2Safresh1    # run on certain configurations on certain platforms.  But beware that the
335eac174f2Safresh1    # main thread can change the locale of any subthreads unless
336eac174f2Safresh1    # ${^SAFE_LOCALES} is non-zero.
337b8851fccSafresh1    #
338eac174f2Safresh1    # Use the optional parameter to discover if a particular category or
339eac174f2Safresh1    # categories are available on the system.  1 is returned if the global
340eac174f2Safresh1    # criteria described in the previous paragraph are true, AND if all the
341eac174f2Safresh1    # specified categories are available on the platform and Configured to be
342eac174f2Safresh1    # used.  Otherwise 0 is returned.  The parameter is either a single POSIX
343eac174f2Safresh1    # locale category or a reference to a list of them.  Each category must be
344eac174f2Safresh1    # its name as a string, like 'LC_TIME' (the initial 'LC_' is optional), or
345eac174f2Safresh1    # the number this platform uses to signify the category (e.g.,
346eac174f2Safresh1    # 'locales_enabled(&POSIX::LC_CTYPE)'
347b8851fccSafresh1    #
348eac174f2Safresh1    # When the function returns 1 and a parameter was specified as a list
349eac174f2Safresh1    # reference, the reference will be altered on return to point to an
350eac174f2Safresh1    # equivalent list such that  the categories are numeric instead of strings
351eac174f2Safresh1    # and sorted to meet the input expectations of _trylocale().
3529f11ffb7Safresh1    #
353eac174f2Safresh1    # It is a fatal error to call this with something that isn't a known
354eac174f2Safresh1    # category to this file.  If this happens, look first for a typo, and
355eac174f2Safresh1    # second if you are using a category unknown to Perl.  In the latter case
356eac174f2Safresh1    # a bug report should be submitted.
357b8851fccSafresh1
358b46d8ef2Safresh1    # khw cargo-culted the '?' in the pattern on the next line.
359b46d8ef2Safresh1    return 0 if $Config{ccflags} =~ /\bD?NO_LOCALE\b/;
360b46d8ef2Safresh1
361b46d8ef2Safresh1    # If we can't load the POSIX XS module, we can't have locales even if they
362b46d8ef2Safresh1    # normally would be available
363b46d8ef2Safresh1    return 0 if ! defined &DynaLoader::boot_DynaLoader;
364b46d8ef2Safresh1
365eac174f2Safresh1    # Don't test locales where they aren't safe.  On systems with unsafe
366eac174f2Safresh1    # threads, for the purposes of testing, we consider the main thread safe,
367eac174f2Safresh1    # and all other threads unsafe.
368eac174f2Safresh1    if (! ${^SAFE_LOCALES}) {
369eac174f2Safresh1        return 0 if $^O eq 'os390'; # Threaded locales don't work well here
370eac174f2Safresh1        require threads;
371eac174f2Safresh1        return 0 if threads->tid() != 0;
372eac174f2Safresh1    }
373eac174f2Safresh1
374eac174f2Safresh1    # If no setlocale, we need the POSIX 2008 alternatives
375b46d8ef2Safresh1    if (! $Config{d_setlocale}) {
376b46d8ef2Safresh1        return 0 if $Config{ccflags} =~ /\bD?NO_POSIX_2008_LOCALE\b/;
377b46d8ef2Safresh1        return 0 unless $Config{d_newlocale};
378b46d8ef2Safresh1        return 0 unless $Config{d_uselocale};
379b46d8ef2Safresh1        return 0 unless $Config{d_duplocale};
380b46d8ef2Safresh1        return 0 unless $Config{d_freelocale};
381b46d8ef2Safresh1    }
382b8851fccSafresh1
383b8851fccSafresh1    # Done with the global possibilities.  Now check if any passed in category
384b8851fccSafresh1    # is disabled.
3859f11ffb7Safresh1
386eac174f2Safresh1    my $categories_ref = $_[0];
3879f11ffb7Safresh1    my $return_categories_numbers = 0;
3889f11ffb7Safresh1    my @categories_numbers;
3899f11ffb7Safresh1    my $has_LC_ALL = 0;
3909f11ffb7Safresh1    my $has_LC_COLLATE = 0;
3919f11ffb7Safresh1
392b8851fccSafresh1    if (defined $categories_ref) {
3939f11ffb7Safresh1        my @local_categories_copy;
3949f11ffb7Safresh1
395eac174f2Safresh1        my $reftype = ref $categories_ref;
396eac174f2Safresh1        if ($reftype eq 'ARRAY') {
397eac174f2Safresh1            @local_categories_copy = @$categories_ref;
3989f11ffb7Safresh1            $return_categories_numbers = 1;
3999f11ffb7Safresh1        }
400eac174f2Safresh1        elsif ($reftype ne "") {
401eac174f2Safresh1            die "Parameter to locales_enabled() must be an ARRAY;"
402eac174f2Safresh1              . " instead you used a $reftype";
403eac174f2Safresh1        }
4049f11ffb7Safresh1        else {  # Single category passed in
4059f11ffb7Safresh1            @local_categories_copy = $categories_ref;
4069f11ffb7Safresh1        }
4079f11ffb7Safresh1
408b8851fccSafresh1        for my $category_name_or_number (@local_categories_copy) {
409b8851fccSafresh1            my $name;
410b8851fccSafresh1            my $number;
411b8851fccSafresh1            if ($category_name_or_number =~ / ^ -? \d+ $ /x) {
412b8851fccSafresh1                $number = $category_name_or_number;
413b8851fccSafresh1                die "Invalid locale category number '$number'"
414b8851fccSafresh1                    unless grep { $number == $_ } keys %category_name;
415b8851fccSafresh1                $name = $category_name{$number};
416b8851fccSafresh1            }
417b8851fccSafresh1            else {
418b8851fccSafresh1                $name = $category_name_or_number;
419b8851fccSafresh1                $name =~ s/ ^ LC_ //x;
420b8851fccSafresh1                foreach my $trial (keys %category_name) {
421b8851fccSafresh1                    if ($category_name{$trial} eq $name) {
422b8851fccSafresh1                        $number = $trial;
423b8851fccSafresh1                        last;
424b8851fccSafresh1                    }
425b8851fccSafresh1                }
426b8851fccSafresh1                die "Invalid locale category name '$name'"
427b8851fccSafresh1                    unless defined $number;
428b8851fccSafresh1            }
429b8851fccSafresh1
430b8851fccSafresh1            return 0 if   $number <= $max_bad_category_number
431e0680481Safresh1                       || category_excluded($name);
432eac174f2Safresh1
433b8851fccSafresh1
434b8851fccSafresh1            eval "defined &POSIX::LC_$name";
435b8851fccSafresh1            return 0 if $@;
4369f11ffb7Safresh1
4379f11ffb7Safresh1            if ($return_categories_numbers) {
4389f11ffb7Safresh1                if ($name eq 'CTYPE') {
4399f11ffb7Safresh1                    unshift @categories_numbers, $number;   # Always first
440b8851fccSafresh1                }
4419f11ffb7Safresh1                elsif ($name eq 'ALL') {
4429f11ffb7Safresh1                    $has_LC_ALL = 1;
4439f11ffb7Safresh1                }
4449f11ffb7Safresh1                elsif ($name eq 'COLLATE') {
4459f11ffb7Safresh1                    $has_LC_COLLATE = 1;
4469f11ffb7Safresh1                }
4479f11ffb7Safresh1                else {
4489f11ffb7Safresh1                    push @categories_numbers, $number;
4499f11ffb7Safresh1                }
4509f11ffb7Safresh1            }
4519f11ffb7Safresh1        }
4529f11ffb7Safresh1    }
4539f11ffb7Safresh1
4549f11ffb7Safresh1    if ($return_categories_numbers) {
4559f11ffb7Safresh1
4569f11ffb7Safresh1        # COLLATE comes after all other locales except ALL, which comes last
4579f11ffb7Safresh1        if ($has_LC_COLLATE) {
4589f11ffb7Safresh1            push @categories_numbers, $category_number{'COLLATE'};
4599f11ffb7Safresh1        }
4609f11ffb7Safresh1        if ($has_LC_ALL) {
4619f11ffb7Safresh1            push @categories_numbers, $category_number{'ALL'};
4629f11ffb7Safresh1        }
463eac174f2Safresh1
464eac174f2Safresh1        @$categories_ref = @categories_numbers;
465b8851fccSafresh1    }
466b8851fccSafresh1
467b8851fccSafresh1    return 1;
468b8851fccSafresh1}
469b8851fccSafresh1
470b8851fccSafresh1
4719f11ffb7Safresh1sub find_locales ($;$) {
472b8851fccSafresh1
4739f11ffb7Safresh1    # Returns an array of all the locales we found on the system.  If the
4749f11ffb7Safresh1    # optional 2nd parameter is non-zero, the list includes all found locales;
4759f11ffb7Safresh1    # otherwise it is restricted to those locales that play well with Perl, as
4769f11ffb7Safresh1    # far as we can easily determine.
4779f11ffb7Safresh1    #
4789f11ffb7Safresh1    # The first parameter is either a single locale category or a reference to
4799f11ffb7Safresh1    # a list of categories to find valid locales for it (or in the case of
4809f11ffb7Safresh1    # multiple) for all of them.  Each category can be a name (like 'LC_ALL'
4819f11ffb7Safresh1    # or simply 'ALL') or the C enum value for the category.
4829f11ffb7Safresh1
483eac174f2Safresh1    my $input_categories = shift;
4849f11ffb7Safresh1    my $allow_incompatible = shift // 0;
4859f11ffb7Safresh1
486*3d61058aSafresh1    die ("Usage: find_locales( category | [ categories ] )")
487*3d61058aSafresh1                                                unless defined $input_categories;
488e0680481Safresh1    my @categories = (ref $input_categories)
489e0680481Safresh1                      ? $input_categories->@*
490e0680481Safresh1                      : $input_categories;
491*3d61058aSafresh1
492*3d61058aSafresh1    # If we can't use at least one of these categories, investigate further
493*3d61058aSafresh1    if (! locales_enabled(\@categories)) {
494*3d61058aSafresh1
495*3d61058aSafresh1        # Not usable at all if system doesn't have locales
496*3d61058aSafresh1        return unless locales_enabled();
497*3d61058aSafresh1
498*3d61058aSafresh1        # Nor if any of the required categories isn't on the system
499*3d61058aSafresh1        my @on_platform = platform_locale_categories();
500*3d61058aSafresh1        for my $category (@categories) {
501*3d61058aSafresh1            return unless grep { $category eq $_ } @on_platform;
502*3d61058aSafresh1        }
503*3d61058aSafresh1
504*3d61058aSafresh1        # Otherwise the category is on the system, but not generally usable.
505*3d61058aSafresh1        # But the two always-present locales should be usable
506*3d61058aSafresh1        return ( "C", "POSIX" );
507*3d61058aSafresh1    }
508*3d61058aSafresh1
5099f11ffb7Safresh1
5109f11ffb7Safresh1    # Note, the subroutine call above converts the $categories into a form
5119f11ffb7Safresh1    # suitable for _trylocale().
5126fb12b70Safresh1
5136fb12b70Safresh1    # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
5146fb12b70Safresh1    # and mingw32 uses said silly CRT
5156fb12b70Safresh1    # This doesn't seem to be an issue any more, at least on Windows XP,
5166fb12b70Safresh1    # so re-enable the tests for Windows XP onwards.
5176fb12b70Safresh1    my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion &&
5186fb12b70Safresh1                    join('.', (Win32::GetOSVersion())[1..2]) >= 5.1);
519eac174f2Safresh1    return if (($^O eq 'MSWin32' && !$winxp)
520b8851fccSafresh1                && $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i);
5216fb12b70Safresh1
5229f11ffb7Safresh1    my @Locale;
523*3d61058aSafresh1
524*3d61058aSafresh1    if (@platform_locales) {
525*3d61058aSafresh1        $gathering_platform_locales = 0;
526*3d61058aSafresh1        foreach my $locale (@platform_locales) {
527*3d61058aSafresh1            _trylocale($locale, \@categories, \@Locale, $allow_incompatible);
528*3d61058aSafresh1        }
529*3d61058aSafresh1    }
530*3d61058aSafresh1    else {
531*3d61058aSafresh1        $gathering_platform_locales = 1;
532*3d61058aSafresh1
533eac174f2Safresh1        _trylocale("C", \@categories, \@Locale, $allow_incompatible);
534eac174f2Safresh1        _trylocale("POSIX", \@categories, \@Locale, $allow_incompatible);
535b46d8ef2Safresh1
53656d68f1eSafresh1        if ($Config{d_has_C_UTF8} && $Config{d_has_C_UTF8} eq 'true') {
537eac174f2Safresh1            _trylocale("C.UTF-8", \@categories, \@Locale, $allow_incompatible);
538b46d8ef2Safresh1        }
539b46d8ef2Safresh1
540*3d61058aSafresh1        # There's no point in looking at anything more if we know that
541*3d61058aSafresh1        # setlocale will return success on any garbage or non-garbage name.
542*3d61058aSafresh1        return sort @Locale
543*3d61058aSafresh1                        if defined $Config{d_setlocale_accepts_any_locale_name};
544b46d8ef2Safresh1
5459f11ffb7Safresh1        foreach (1..16) {
546*3d61058aSafresh1            _trylocale("ISO8859-$_", \@categories, \@Locale,
547*3d61058aSafresh1                       $allow_incompatible);
548*3d61058aSafresh1            _trylocale("iso8859$_", \@categories, \@Locale,
549*3d61058aSafresh1                       $allow_incompatible);
550*3d61058aSafresh1            _trylocale("iso8859-$_", \@categories, \@Locale,
551*3d61058aSafresh1                       $allow_incompatible);
552*3d61058aSafresh1            _trylocale("iso_8859_$_", \@categories, \@Locale,
553*3d61058aSafresh1                       $allow_incompatible);
554*3d61058aSafresh1            _trylocale("isolatin$_", \@categories, \@Locale,
555*3d61058aSafresh1                       $allow_incompatible);
556*3d61058aSafresh1            _trylocale("isolatin-$_", \@categories, \@Locale,
557*3d61058aSafresh1                       $allow_incompatible);
558*3d61058aSafresh1            _trylocale("iso_latin_$_", \@categories, \@Locale,
559*3d61058aSafresh1                       $allow_incompatible);
5606fb12b70Safresh1        }
5616fb12b70Safresh1
5626fb12b70Safresh1        # Sanitize the environment so that we can run the external 'locale'
5636fb12b70Safresh1        # program without the taint mode getting grumpy.
5646fb12b70Safresh1
5656fb12b70Safresh1        # $ENV{PATH} is special in VMS.
5666fb12b70Safresh1        delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv};
5676fb12b70Safresh1
5686fb12b70Safresh1        # Other subversive stuff.
5696fb12b70Safresh1        delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)};
5706fb12b70Safresh1
5716fb12b70Safresh1        if (-x "/usr/bin/locale"
5729f11ffb7Safresh1            && open(LOCALES, '-|', "/usr/bin/locale -a 2>/dev/null"))
5736fb12b70Safresh1        {
5746fb12b70Safresh1            while (<LOCALES>) {
575*3d61058aSafresh1
576*3d61058aSafresh1                # It seems that /usr/bin/locale steadfastly outputs 8 bit
577*3d61058aSafresh1                # data, which ain't great when we're running this
578*3d61058aSafresh1                # testPERL_UNICODE= so that utf8 locales will cause all IO
579*3d61058aSafresh1                # hadles to default to (assume) utf8
5806fb12b70Safresh1                next unless utf8::valid($_);
5816fb12b70Safresh1                chomp;
582eac174f2Safresh1                _trylocale($_, \@categories, \@Locale, $allow_incompatible);
5836fb12b70Safresh1            }
584*3d61058aSafresh1
5856fb12b70Safresh1            close(LOCALES);
5866fb12b70Safresh1        } elsif ($^O eq 'VMS'
5876fb12b70Safresh1                && defined($ENV{'SYS$I18N_LOCALE'})
5886fb12b70Safresh1                && -d 'SYS$I18N_LOCALE')
5896fb12b70Safresh1        {
5906fb12b70Safresh1            # The SYS$I18N_LOCALE logical name search list was not present on
591*3d61058aSafresh1            # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later
592*3d61058aSafresh1            # versions.
5936fb12b70Safresh1            opendir(LOCALES, "SYS\$I18N_LOCALE:");
5946fb12b70Safresh1            while ($_ = readdir(LOCALES)) {
5956fb12b70Safresh1                chomp;
596eac174f2Safresh1                _trylocale($_, \@categories, \@Locale, $allow_incompatible);
5976fb12b70Safresh1            }
5986fb12b70Safresh1            close(LOCALES);
599*3d61058aSafresh1        } elsif (   ($^O eq 'openbsd' || $^O eq 'bitrig' )
600*3d61058aSafresh1                 && -e '/usr/share/locale')
601*3d61058aSafresh1        {
6026fb12b70Safresh1
603b8851fccSafresh1            # OpenBSD doesn't have a locale executable, so reading
604b8851fccSafresh1            # /usr/share/locale is much easier and faster than the last resort
605b8851fccSafresh1            # method.
6066fb12b70Safresh1
6076fb12b70Safresh1            opendir(LOCALES, '/usr/share/locale');
6086fb12b70Safresh1            while ($_ = readdir(LOCALES)) {
6096fb12b70Safresh1                chomp;
610eac174f2Safresh1                _trylocale($_, \@categories, \@Locale, $allow_incompatible);
6116fb12b70Safresh1            }
6126fb12b70Safresh1            close(LOCALES);
6136fb12b70Safresh1        } else { # Final fallback.  Try our list of locales hard-coded here
6146fb12b70Safresh1
6156fb12b70Safresh1            # This is going to be slow.
6166fb12b70Safresh1            my @Data;
6176fb12b70Safresh1
618*3d61058aSafresh1            # Locales whose name differs if the utf8 bit is on are stored in
619*3d61058aSafresh1            # these two files with appropriate encodings.
6209f11ffb7Safresh1            my $data_file = ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/)
6219f11ffb7Safresh1                            ? _source_location() . "/lib/locale/utf8"
6229f11ffb7Safresh1                            : _source_location() . "/lib/locale/latin1";
6239f11ffb7Safresh1            if (-e $data_file) {
6249f11ffb7Safresh1                @Data = do $data_file;
6259f11ffb7Safresh1            }
6269f11ffb7Safresh1            else {
627*3d61058aSafresh1                _my_diag(__FILE__ . ":" . __LINE__ .
628*3d61058aSafresh1                         ": '$data_file' doesn't exist");
6296fb12b70Safresh1            }
6306fb12b70Safresh1
6316fb12b70Safresh1            # The rest of the locales are in this file.
632eac174f2Safresh1            state @my_data = <DATA>; close DATA if fileno DATA;
633eac174f2Safresh1            push @Data, @my_data;
6346fb12b70Safresh1
635*3d61058aSafresh1            foreach my $default (qw(.ACP .OCP)) {
636*3d61058aSafresh1                _trylocale($default, \@categories, \@Locale,
637*3d61058aSafresh1                           $allow_incompatible);
638*3d61058aSafresh1            }
639*3d61058aSafresh1
6406fb12b70Safresh1            foreach my $line (@Data) {
64156d68f1eSafresh1                chomp $line;
6426fb12b70Safresh1                my ($locale_name, $language_codes, $country_codes, $encodings) =
6436fb12b70Safresh1                    split /:/, $line;
644*3d61058aSafresh1                _my_diag(__FILE__ . ":" . __LINE__
645*3d61058aSafresh1                         . ": Unexpected syntax in '$line'")
6469f11ffb7Safresh1                                                    unless defined $locale_name;
6476fb12b70Safresh1                my @enc = _decode_encodings($encodings);
6486fb12b70Safresh1                foreach my $loc (split(/ /, $locale_name)) {
649*3d61058aSafresh1                    _trylocale($loc, \@categories, \@Locale,
650*3d61058aSafresh1                               $allow_incompatible);
6516fb12b70Safresh1                    foreach my $enc (@enc) {
652eac174f2Safresh1                        _trylocale("$loc.$enc", \@categories, \@Locale,
6539f11ffb7Safresh1                                   $allow_incompatible);
6546fb12b70Safresh1                    }
6556fb12b70Safresh1                    $loc = lc $loc;
6566fb12b70Safresh1                    foreach my $enc (@enc) {
657eac174f2Safresh1                        _trylocale("$loc.$enc", \@categories, \@Locale,
6589f11ffb7Safresh1                                   $allow_incompatible);
6596fb12b70Safresh1                    }
6606fb12b70Safresh1                }
6616fb12b70Safresh1                foreach my $lang (split(/ /, $language_codes)) {
662*3d61058aSafresh1                    _trylocale($lang, \@categories, \@Locale,
663*3d61058aSafresh1                               $allow_incompatible);
6646fb12b70Safresh1                    foreach my $country (split(/ /, $country_codes)) {
6656fb12b70Safresh1                        my $lc = "${lang}_${country}";
666*3d61058aSafresh1                        _trylocale($lc, \@categories, \@Locale,
667*3d61058aSafresh1                                   $allow_incompatible);
6686fb12b70Safresh1                        foreach my $enc (@enc) {
669eac174f2Safresh1                            _trylocale("$lc.$enc", \@categories, \@Locale,
6709f11ffb7Safresh1                                       $allow_incompatible);
6716fb12b70Safresh1                        }
6726fb12b70Safresh1                        my $lC = "${lang}_\U${country}";
673*3d61058aSafresh1                        _trylocale($lC, \@categories, \@Locale,
674*3d61058aSafresh1                                   $allow_incompatible);
6756fb12b70Safresh1                        foreach my $enc (@enc) {
676eac174f2Safresh1                            _trylocale("$lC.$enc", \@categories, \@Locale,
6779f11ffb7Safresh1                                       $allow_incompatible);
6786fb12b70Safresh1                        }
6796fb12b70Safresh1                    }
6806fb12b70Safresh1                }
6816fb12b70Safresh1            }
6826fb12b70Safresh1        }
683*3d61058aSafresh1    }
6846fb12b70Safresh1
685*3d61058aSafresh1    my %Locale;
686*3d61058aSafresh1    $Locale{$_} = 1 for @Locale;
687*3d61058aSafresh1    @Locale = sort keys %Locale;
6886fb12b70Safresh1
6896fb12b70Safresh1    return @Locale;
6906fb12b70Safresh1}
6916fb12b70Safresh1
6926fb12b70Safresh1sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input
6936fb12b70Safresh1                         # is a UTF-8 locale
694b8851fccSafresh1
695b8851fccSafresh1    # On z/OS, even locales marked as UTF-8 aren't.
696b8851fccSafresh1    return 0 if ord "A" != 65;
697b8851fccSafresh1
698b8851fccSafresh1    return 0 unless locales_enabled('LC_CTYPE');
699b8851fccSafresh1
7006fb12b70Safresh1    my $locale = shift;
7016fb12b70Safresh1
702b8851fccSafresh1    no warnings 'locale'; # We may be trying out a weird locale
703e0680481Safresh1    use locale;
7046fb12b70Safresh1
7056fb12b70Safresh1    my $save_locale = setlocale(&POSIX::LC_CTYPE());
7066fb12b70Safresh1    if (! $save_locale) {
707e0680481Safresh1        _my_fail("Verify could save previous locale");
7086fb12b70Safresh1        return 0;
7096fb12b70Safresh1    }
7106fb12b70Safresh1
7116fb12b70Safresh1    if (! setlocale(&POSIX::LC_CTYPE(), $locale)) {
712e0680481Safresh1        _my_fail("Verify could setlocale to $locale");
7136fb12b70Safresh1        return 0;
7146fb12b70Safresh1    }
7156fb12b70Safresh1
7166fb12b70Safresh1    my $ret = 0;
7176fb12b70Safresh1
7186fb12b70Safresh1    # Use an op that gives different results for UTF-8 than any other locale.
7196fb12b70Safresh1    # If a platform has UTF-8 locales, there should be at least one locale on
7206fb12b70Safresh1    # most platforms with UTF-8 in its name, so if there is a bug in the op
7216fb12b70Safresh1    # giving a false negative, we should get a failure for those locales as we
7226fb12b70Safresh1    # go through testing all the locales on the platform.
7236fb12b70Safresh1    if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") {
7246fb12b70Safresh1        if ($locale =~ /UTF-?8/i) {
725e0680481Safresh1            _my_fail("Verify $locale with UTF-8 in name is a UTF-8 locale");
7266fb12b70Safresh1        }
7276fb12b70Safresh1    }
7286fb12b70Safresh1    else {
7296fb12b70Safresh1        $ret = 1;
7306fb12b70Safresh1    }
7316fb12b70Safresh1
7326fb12b70Safresh1    die "Couldn't restore locale '$save_locale'"
7336fb12b70Safresh1                            unless setlocale(&POSIX::LC_CTYPE(), $save_locale);
7346fb12b70Safresh1
7356fb12b70Safresh1    return $ret;
7366fb12b70Safresh1}
7376fb12b70Safresh1
738e0680481Safresh1sub classify_locales_wrt_utf8ness($) {
739e0680481Safresh1
740e0680481Safresh1    # Takes the input list of locales, and returns two lists split apart from
741e0680481Safresh1    # it: the UTF-8 ones, and the non-UTF-8 ones.
742e0680481Safresh1
743e0680481Safresh1    my $locales_ref = shift;
744e0680481Safresh1    my (@utf8, @non_utf8);
745e0680481Safresh1
746e0680481Safresh1    if (! locales_enabled('LC_CTYPE')) {  # No CTYPE implies all are non-UTF-8
747e0680481Safresh1        @non_utf8 = $locales_ref->@*;
748e0680481Safresh1        return ( \@utf8, \@non_utf8 );
749e0680481Safresh1    }
750e0680481Safresh1
751e0680481Safresh1    foreach my $locale (@$locales_ref) {
752e0680481Safresh1        my $which = (is_locale_utf8($locale)) ? \@utf8 : \@non_utf8;
753e0680481Safresh1        push $which->@*, $locale;
754e0680481Safresh1    }
755e0680481Safresh1
756e0680481Safresh1    return ( \@utf8, \@non_utf8 );
757e0680481Safresh1}
758e0680481Safresh1
759e0680481Safresh1sub find_utf8_ctype_locales (;$) {
760e0680481Safresh1
761e0680481Safresh1    # Return the names of the locales that core Perl thinks are UTF-8 LC_CTYPE
762e0680481Safresh1    # locales.  Optional parameter is a reference to a list of locales to try;
763e0680481Safresh1    # if omitted, this tries all locales it can find on the platform
764e0680481Safresh1
765b8851fccSafresh1    return unless locales_enabled('LC_CTYPE');
766b8851fccSafresh1
7676fb12b70Safresh1    my $locales_ref = shift;
7686fb12b70Safresh1    if (! defined $locales_ref) {
769b8851fccSafresh1
7709f11ffb7Safresh1        my @locales = find_locales(&POSIX::LC_CTYPE());
7716fb12b70Safresh1        $locales_ref = \@locales;
7726fb12b70Safresh1    }
7736fb12b70Safresh1
774e0680481Safresh1    my ($utf8_ref, undef) = classify_locales_wrt_utf8ness($locales_ref);
775e0680481Safresh1    return unless $utf8_ref;
776e0680481Safresh1    return $utf8_ref->@*;
777b46d8ef2Safresh1}
778b46d8ef2Safresh1
779b46d8ef2Safresh1sub find_utf8_ctype_locale (;$) { # Return the name of a locale that core Perl
780b46d8ef2Safresh1                                  # thinks is a UTF-8 LC_CTYPE non-turkic
781b46d8ef2Safresh1                                  # locale.
782b46d8ef2Safresh1                                  # Optional parameter is a reference to a
783b46d8ef2Safresh1                                  # list of locales to try; if omitted, this
784b46d8ef2Safresh1                                  # tries all locales it can find on the
785b46d8ef2Safresh1                                  # platform
786b46d8ef2Safresh1    my $try_locales_ref = shift;
787b46d8ef2Safresh1
788b46d8ef2Safresh1    my @utf8_locales = find_utf8_ctype_locales($try_locales_ref);
789b46d8ef2Safresh1    my @turkic_locales = find_utf8_turkic_locales($try_locales_ref);
790b46d8ef2Safresh1
791b46d8ef2Safresh1    my %seen_turkic;
792b46d8ef2Safresh1
793b46d8ef2Safresh1    # Create undef elements in the hash for turkic locales
794b46d8ef2Safresh1    @seen_turkic{@turkic_locales} = ();
795b46d8ef2Safresh1
796b46d8ef2Safresh1    foreach my $locale (@utf8_locales) {
797b46d8ef2Safresh1        return $locale unless exists $seen_turkic{$locale};
7986fb12b70Safresh1    }
7996fb12b70Safresh1
8006fb12b70Safresh1    return;
8016fb12b70Safresh1}
8026fb12b70Safresh1
803b46d8ef2Safresh1sub find_utf8_turkic_locales (;$) {
804b46d8ef2Safresh1
805b46d8ef2Safresh1    # Return the name of all the locales that core Perl thinks are UTF-8
806b46d8ef2Safresh1    # Turkic LC_CTYPE.  Optional parameter is a reference to a list of locales
807b46d8ef2Safresh1    # to try; if omitted, this tries all locales it can find on the platform
808b46d8ef2Safresh1
809b46d8ef2Safresh1    my @return;
810b46d8ef2Safresh1
811b46d8ef2Safresh1    return unless locales_enabled('LC_CTYPE');
812b46d8ef2Safresh1
813b46d8ef2Safresh1    my $save_locale = setlocale(&POSIX::LC_CTYPE());
814b46d8ef2Safresh1    foreach my $locale (find_utf8_ctype_locales(shift)) {
815b46d8ef2Safresh1        use locale;
816b46d8ef2Safresh1        setlocale(&POSIX::LC_CTYPE(), $locale);
817b46d8ef2Safresh1        push @return, $locale if uc('i') eq "\x{130}";
818b46d8ef2Safresh1    }
819e0680481Safresh1
820e0680481Safresh1    die "Couldn't restore locale '$save_locale'"
821e0680481Safresh1                            unless setlocale(&POSIX::LC_CTYPE(), $save_locale);
822b46d8ef2Safresh1
823b46d8ef2Safresh1    return @return;
824b46d8ef2Safresh1}
825b46d8ef2Safresh1
826b46d8ef2Safresh1sub find_utf8_turkic_locale (;$) {
827b46d8ef2Safresh1    my @turkics = find_utf8_turkic_locales(shift);
828b46d8ef2Safresh1
829b46d8ef2Safresh1    return unless @turkics;
830b46d8ef2Safresh1    return $turkics[0]
831b46d8ef2Safresh1}
832b46d8ef2Safresh1
833b46d8ef2Safresh1
8349f11ffb7Safresh1# returns full path to the directory containing the current source
8359f11ffb7Safresh1# file, inspired by mauke's Dir::Self
8369f11ffb7Safresh1sub _source_location {
8379f11ffb7Safresh1    require File::Spec;
8389f11ffb7Safresh1
8399f11ffb7Safresh1    my $caller_filename = (caller)[1];
8409f11ffb7Safresh1
8419f11ffb7Safresh1    my $loc = File::Spec->rel2abs(
8429f11ffb7Safresh1        File::Spec->catpath(
8439f11ffb7Safresh1            (File::Spec->splitpath($caller_filename))[0, 1], ''
8449f11ffb7Safresh1        )
8459f11ffb7Safresh1    );
8469f11ffb7Safresh1
8479f11ffb7Safresh1    return ($loc =~ /^(.*)$/)[0]; # untaint
8489f11ffb7Safresh1}
8499f11ffb7Safresh1
8506fb12b70Safresh11
8516fb12b70Safresh1
8526fb12b70Safresh1# Format of data is: locale_name, language_codes, country_codes, encodings
8536fb12b70Safresh1__DATA__
8546fb12b70Safresh1Afrikaans:af:za:1 15
8556fb12b70Safresh1Arabic:ar:dz eg sa:6 arabic8
8566fb12b70Safresh1Brezhoneg Breton:br:fr:1 15
8576fb12b70Safresh1Bulgarski Bulgarian:bg:bg:5
8586fb12b70Safresh1Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC
8596fb12b70Safresh1Hrvatski Croatian:hr:hr:2
8606fb12b70Safresh1Cymraeg Welsh:cy:cy:1 14 15
8616fb12b70Safresh1Czech:cs:cz:2
8626fb12b70Safresh1Dansk Danish:da:dk:1 15
8636fb12b70Safresh1Nederlands Dutch:nl:be nl:1 15
8646fb12b70Safresh1English American British:en:au ca gb ie nz us uk zw:1 15 cp850
8656fb12b70Safresh1Esperanto:eo:eo:3
8666fb12b70Safresh1Eesti Estonian:et:ee:4 6 13
8676fb12b70Safresh1Suomi Finnish:fi:fi:1 15
8686fb12b70Safresh1Flamish::fl:1 15
8696fb12b70Safresh1Deutsch German:de:at be ch de lu:1 15
8706fb12b70Safresh1Euskaraz Basque:eu:es fr:1 15
8716fb12b70Safresh1Galego Galician:gl:es:1 15
8726fb12b70Safresh1Ellada Greek:el:gr:7 g8
8736fb12b70Safresh1Frysk:fy:nl:1 15
8746fb12b70Safresh1Greenlandic:kl:gl:4 6
8756fb12b70Safresh1Hebrew:iw:il:8 hebrew8
8766fb12b70Safresh1Hungarian:hu:hu:2
8776fb12b70Safresh1Indonesian:id:id:1 15
8786fb12b70Safresh1Gaeilge Irish:ga:IE:1 14 15
8796fb12b70Safresh1Italiano Italian:it:ch it:1 15
8806fb12b70Safresh1Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis
8816fb12b70Safresh1Korean:ko:kr:
8826fb12b70Safresh1Latine Latin:la:va:1 15
8836fb12b70Safresh1Latvian:lv:lv:4 6 13
8846fb12b70Safresh1Lithuanian:lt:lt:4 6 13
8856fb12b70Safresh1Macedonian:mk:mk:1 15
8866fb12b70Safresh1Maltese:mt:mt:3
8876fb12b70Safresh1Moldovan:mo:mo:2
8886fb12b70Safresh1Norsk Norwegian:no no\@nynorsk nb nn:no:1 15
8896fb12b70Safresh1Occitan:oc:es:1 15
8906fb12b70Safresh1Polski Polish:pl:pl:2
8916fb12b70Safresh1Rumanian:ro:ro:2
8926fb12b70Safresh1Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866
8936fb12b70Safresh1Serbski Serbian:sr:yu:5
8946fb12b70Safresh1Slovak:sk:sk:2
8956fb12b70Safresh1Slovene Slovenian:sl:si:2
8966fb12b70Safresh1Sqhip Albanian:sq:sq:1 15
8976fb12b70Safresh1Svenska Swedish:sv:fi se:1 15
8986fb12b70Safresh1Thai:th:th:11 tis620
8996fb12b70Safresh1Turkish:tr:tr:9 turkish8
9006fb12b70Safresh1Yiddish:yi::1 15
901