xref: /openbsd/gnu/usr.bin/perl/t/run/locale.t (revision 3d61058a)
1#!./perl
2BEGIN {
3    chdir 't' if -d 't';
4    @INC = '../lib';
5    require './test.pl';    # for fresh_perl_is() etc
6    require './loc_tools.pl'; # to find locales
7}
8
9use strict;
10use warnings;
11
12########
13# These tests are here instead of lib/locale.t because
14# some bugs depend on the internal state of the locale
15# settings and pragma/locale messes up that state pretty badly.
16# We need "fresh runs".
17BEGIN {
18    eval { require POSIX; POSIX->import("locale_h") };
19    if ($@) {
20	skip_all("could not load the POSIX module"); # running minitest?
21    }
22}
23use Config;
24
25use I18N::Langinfo qw(langinfo RADIXCHAR);
26my $have_strtod = $Config{d_strtod} eq 'define';
27my $have_localeconv = defined $Config{d_locconv} && $Config{d_locconv} eq 'define';
28my @locales = find_locales('LC_NUMERIC');
29skip_all("no locales available") unless @locales;
30note("locales available: @locales");
31
32my $debug = 0;
33my $switches = "";
34if (defined $ARGV[0] && $ARGV[0] ne "") {
35    if ($ARGV[0] ne 'debug') {
36        print STDERR "Usage: $0 [ debug ]\n";
37        exit 1
38    }
39    $debug = 1;
40}
41$switches = "switches => [ '-DLv' ]" if $debug;
42
43# reset the locale environment
44delete local @ENV{'LANGUAGE', 'LANG', (grep /^LC_[A-Z]+$/, keys %ENV)};
45
46# If user wants this to happen, they set the environment variable AND use
47# 'debug'
48delete local $ENV{'PERL_DEBUG_LOCALE_INIT'} unless $debug;
49
50my $has_ctype = grep { $_ eq "LC_CTYPE" } platform_locale_categories();
51
52SKIP: {
53    skip("LC_CTYPE not available on the system", 1 ) unless $has_ctype;
54    fresh_perl_is(<<"EOF",
55            use locale;
56            use POSIX;
57            POSIX::setlocale(POSIX::LC_CTYPE(),"C");
58            print "h" =~ /[g\\w]/i || 0;
59            print "\\n";
60EOF
61        1, { stderr => 'devnull' }, "/il matching of [bracketed] doesn't skip POSIX class if fails individ char");
62}
63
64SKIP: {
65    skip("LC_CTYPE not available on the system", 1 ) unless $has_ctype;
66    fresh_perl_is(<<"EOF",
67            use locale;
68            use POSIX;
69            POSIX::setlocale(POSIX::LC_CTYPE(),"C");
70            print "0" =~ /[\\d[:punct:]]/l || 0;
71            print "\\n";
72EOF
73        1, { stderr => 'devnull' }, "/l matching of [bracketed] doesn't skip non-first POSIX class");
74
75}
76
77my $non_C_locale;
78foreach my $locale (@locales) {
79    next if $locale eq "C" || $locale eq 'POSIX' || $locale eq "C.UTF-8";
80    $non_C_locale = $locale;
81    last;
82}
83
84if ($non_C_locale) {
85    note("using non-C locale '$non_C_locale'");
86    setlocale(LC_NUMERIC, $non_C_locale);
87    isnt(setlocale(LC_NUMERIC), "C", "retrieving current non-C LC_NUMERIC doesn't give 'C'");
88    setlocale(LC_ALL, $non_C_locale);
89    isnt(setlocale(LC_ALL), "C", "retrieving current non-C LC_ALL doesn't give 'C'");
90
91    my @test_numeric_locales = @locales;
92
93    # Skip this locale on these cygwin versions as the returned radix character
94    # length is wrong
95    if (   $^O eq 'cygwin'
96        && version->new(($Config{'osvers'} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1)
97    {
98        @test_numeric_locales = grep { $_ !~ m/ps_AF/i } @test_numeric_locales;
99    }
100
101    # Similarly the arabic locales on solaris don't work right on the
102    # multi-byte radix character, generating malformed UTF-8.
103    if ($^O eq 'solaris') {
104        @test_numeric_locales = grep { $_ !~ m/ ^ ( ar_ | pa_ ) /x }
105                                                        @test_numeric_locales;
106    }
107
108    fresh_perl_is("for (qw(@test_numeric_locales)) {\n" . <<'EOF',
109        use POSIX qw(locale_h);
110        use locale;
111        setlocale(LC_NUMERIC, "$_") or next;
112        my $s = sprintf "%g %g", 3.1, 3.1;
113        next if $s eq '3.1 3.1' || $s =~ /^(3.+1) \1$/;
114        no warnings "utf8";
115        print "$_ $s\n";
116    }
117EOF
118        "", { eval $switches }, "no locales where LC_NUMERIC breaks");
119
120    SKIP: {
121        skip("Windows stores locale defaults in the registry", 1 )
122                                                                if $^O eq 'MSWin32';
123        fresh_perl_is("for (qw(@locales)) {\n" . <<'EOF',
124            use POSIX qw(locale_h);
125            use locale;
126            my $in = 4.2;
127            my $s = sprintf "%g", $in; # avoid any constant folding bugs
128            next if $s eq "4.2";
129            no warnings "utf8";
130            print "$_ $s\n";
131        }
132EOF
133        "", { eval $switches }, "LC_NUMERIC without environment nor setlocale() has no effect in any locale");
134    }
135
136    # try to find out a locale where LC_NUMERIC makes a difference
137    my $original_locale = setlocale(LC_NUMERIC);
138
139    my ($base, $different, $comma, $difference, $utf8_radix);
140    my $radix_encoded_as_utf8;
141    for ("C", @locales) { # prefer C for the base if available
142        use locale;
143        setlocale(LC_NUMERIC, $_) or next;
144        my $in = 4.2; # avoid any constant folding bugs
145        if ((my $s = sprintf("%g", $in)) eq "4.2")  {
146            $base ||= $_;
147        } else {
148            $different ||= $_;
149            $difference ||= $s;
150            my $radix = langinfo(RADIXCHAR);
151
152            # For utf8 locales with a non-ascii radix, it should be encoded as
153            # UTF-8 with the internal flag so set.
154            if (! defined $utf8_radix
155                && $radix =~ /[[:^ascii:]]/u  # /u because /l can raise warnings
156                && is_locale_utf8($_))
157            {
158                $utf8_radix = $_;
159                $radix_encoded_as_utf8 = utf8::is_utf8($radix);
160            }
161            else {
162                $comma ||= $_ if $radix eq ',';
163            }
164        }
165
166        last if $base && $different && $comma && $utf8_radix;
167    }
168    setlocale(LC_NUMERIC, $original_locale);
169
170    SKIP: {
171        skip("no UTF-8 locale available where LC_NUMERIC radix isn't ASCII", 1 )
172            unless $utf8_radix;
173        is($radix_encoded_as_utf8, 1, "UTF-8 locale '$utf8_radix' with non-ASCII"
174                                    . " radix is marked UTF-8");
175    }
176
177    SKIP: {
178        skip("no locale available where LC_NUMERIC radix isn't '.'", 30) unless $different;
179        note("using the '$different' locale for LC_NUMERIC tests");
180        {
181            local $ENV{LC_NUMERIC} = $different;
182
183            fresh_perl_is(<<'EOF', "4.2", { eval $switches },
184    format STDOUT =
185@.#
1864.179
187.
188    write;
189EOF
190                "format() does not look at LC_NUMERIC without 'use locale'");
191
192            {
193                fresh_perl_is(<<'EOF', "$difference\n", { eval $switches },
194                use POSIX;
195                use locale;
196                format STDOUT =
197@.#
1984.179
199.
200    write;
201EOF
202                "format() looks at LC_NUMERIC with 'use locale'");
203            }
204
205      SKIP: {
206                unless ($have_localeconv) {
207                    skip("no localeconv()", 1);
208                }
209                else {
210                    fresh_perl_is(<<'EOF', ",,", { eval $switches },
211    use POSIX;
212    no warnings "utf8";
213    print localeconv()->{decimal_point};
214    use locale;
215    print localeconv()->{decimal_point};
216EOF
217                "localeconv() looks at LC_NUMERIC with and without 'use locale'");
218                }
219            }
220
221            {
222                my $categories = ":collate :characters :collate :ctype :monetary :time";
223                fresh_perl_is(<<"EOF", "4.2", { eval $switches },
224    use locale qw($categories);
225    format STDOUT =
226@.#
2274.179
228.
229    write;
230EOF
231                "format() does not look at LC_NUMERIC with 'use locale qw($categories)'");
232            }
233
234            {
235                fresh_perl_is(<<'EOF', $difference, { eval $switches },
236    use locale;
237    format STDOUT =
238@.#
2394.179
240.
241    write;
242EOF
243                "format() looks at LC_NUMERIC with 'use locale'");
244            }
245
246            for my $category (qw(collate characters collate ctype monetary time)) {
247                for my $negation ("!", "not_") {
248                    fresh_perl_is(<<"EOF", $difference, { eval $switches },
249    use locale ":$negation$category";
250format STDOUT =
251@.#
2524.179
253.
254    write;
255EOF
256                    "format() looks at LC_NUMERIC with 'use locale \":"
257                    . "$negation$category\"'");
258                }
259            }
260
261            {
262                fresh_perl_is(<<'EOF', $difference, { eval $switches },
263    use locale ":numeric";
264format STDOUT =
265@.#
2664.179
267.
268    write;
269EOF
270                "format() looks at LC_NUMERIC with 'use locale \":numeric\"'");
271            }
272
273            {
274                fresh_perl_is(<<'EOF', "4.2", { eval $switches },
275format STDOUT =
276@.#
2774.179
278.
279    { use locale; write; }
280EOF
281                "too late to look at the locale at write() time");
282            }
283
284            {
285                fresh_perl_is(<<'EOF', $difference, { eval $switches },
286    use locale;
287    format STDOUT =
288@.#
2894.179
290.
291    { no locale; write; }
292EOF
293                "too late to ignore the locale at write() time");
294            }
295        }
296
297        {
298            # do not let "use 5.000" affect the locale!
299            # this test is to prevent regression of [rt.perl.org #105784]
300            fresh_perl_is(<<"EOF",
301                use locale;
302                use POSIX;
303                my \$i = 0.123;
304                POSIX::setlocale(POSIX::LC_NUMERIC(),"$different");
305                \$a = sprintf("%.2f", \$i);
306                require version;
307                \$b = sprintf("%.2f", \$i);
308                no warnings "utf8";
309                print ".\$a \$b" unless \$a eq \$b
310EOF
311                "", { eval $switches }, "version does not clobber version");
312
313            fresh_perl_is(<<"EOF",
314                use locale;
315                use POSIX;
316                my \$i = 0.123;
317                POSIX::setlocale(POSIX::LC_NUMERIC(),"$different");
318                \$a = sprintf("%.2f", \$i);
319                eval "use v5.0.0";
320                \$b = sprintf("%.2f", \$i);
321                no warnings "utf8";
322                print "\$a \$b" unless \$a eq \$b
323EOF
324                "", { eval $switches }, "version does not clobber version (via eval)");
325        }
326
327        {
328            local $ENV{LC_NUMERIC} = $different;
329            fresh_perl_is(<<'EOF', "$difference "x4, { eval $switches },
330                use locale;
331                use POSIX qw(locale_h);
332                my $in = 4.2;
333                printf("%g %g %s %s ", $in, 4.2, sprintf("%g", $in), sprintf("%g", 4.2));
334EOF
335            "sprintf() and printf() look at LC_NUMERIC regardless of constant folding");
336        }
337
338        {
339            local $ENV{LC_NUMERIC} = $different;
340            fresh_perl_is(<<'EOF', "$difference "x4, { eval $switches },
341                use locale;
342                use POSIX qw(locale_h);
343                my $in = 4.2;
344                printf("%g %g %s %s ", $in, 4.2, sprintf("%g", $in), sprintf("%g", 4.2));
345EOF
346            "Uses the above test to verify that on Windows the system default locale has lower priority than LC_NUMERIC");
347        }
348
349
350        # within this block, STDERR is closed. This is because fresh_perl_is()
351        # forks a shell, and some shells (like bash) can complain noisily when
352        # LC_ALL or similar is set to an invalid value
353
354        {
355            open my $saved_stderr, ">&STDERR" or die "Can't dup STDERR: $!";
356            # Comment out the following line to get error output when running the test
357            close STDERR;
358
359            {
360                local $ENV{LC_ALL} = "invalid";
361                local $ENV{LC_NUMERIC} = "invalid";
362                local $ENV{LANG} = $different;
363                local $ENV{PERL_BADLANG} = 0;
364
365                if (! fresh_perl_is(<<"EOF", "$difference", { eval $switches  },
366                    if (\$ENV{LC_ALL} ne "invalid") {
367                        # Make the test pass if the sh didn't accept the ENV set
368                        no warnings "utf8";
369                        print "$difference\n";
370                        exit 0;
371                    }
372                    use locale;
373                    use POSIX qw(locale_h);
374                    my \$in = 4.2;
375                    printf("%g", \$in);
376EOF
377                "LANG is used if LC_ALL, LC_NUMERIC are invalid"))
378            {
379                note "To see details change '" . __FILE__ . "', to not close STDERR";
380            }
381            }
382
383            SKIP: {
384                if ($^O eq 'MSWin32') {
385                    skip("Win32 uses system default locale in preference to \"C\"",
386                            1);
387                }
388                else {
389                    local $ENV{LC_ALL} = "invalid";
390                    local $ENV{LC_NUMERIC} = "invalid";
391                    local $ENV{LANG} = "invalid";
392                    local $ENV{PERL_BADLANG} = 0;
393
394                    if (! fresh_perl_is(<<"EOF", 4.2, { eval $switches  },
395                        if (\$ENV{LC_ALL} ne "invalid") {
396                            no warnings "utf8";
397                            print "$difference\n";
398                            exit 0;
399                        }
400                        use locale;
401                        use POSIX qw(locale_h);
402                        my \$in = 4.2;
403                        printf("%g", \$in);
404EOF
405                    'C locale is used if LC_ALL, LC_NUMERIC, LANG are invalid'))
406                    {
407                        note "To see details change '" . __FILE__ . "', to not close STDERR";
408                    }
409                }
410            }
411
412        open STDERR, ">&", $saved_stderr or die "Can't dup \$saved_stderr: $!";
413        }
414
415        {
416            local $ENV{LC_NUMERIC} = $different;
417            fresh_perl_is(<<"EOF",
418                use POSIX qw(locale_h);
419
420                BEGIN { setlocale(LC_NUMERIC, \"$different\"); };
421                setlocale(LC_ALL, "C");
422                use 5.008;
423                print setlocale(LC_NUMERIC);
424EOF
425            "C", { stderr => 'devnull' },
426            "No compile error on v-strings when setting the locale to non-dot radix at compile time when default environment has non-dot radix");
427        }
428
429        unless ($comma) {
430            skip("no locale available where LC_NUMERIC is a comma", 3);
431        }
432        else {
433
434            fresh_perl_is(<<"EOF",
435                my \$i = 1.5;
436                {
437                    use locale;
438                    use POSIX;
439                    POSIX::setlocale(POSIX::LC_NUMERIC(),"$comma");
440                    print \$i, "\n";
441                }
442                print \$i, "\n";
443EOF
444                "1,5\n1.5", { stderr => 'devnull' }, "Radix print properly in locale scope, and without");
445
446            fresh_perl_is(<<"EOF",
447                my \$i = 1.5;   # Should be exactly representable as a base 2
448                                # fraction, so can use 'eq' below
449                use locale;
450                use POSIX;
451                POSIX::setlocale(POSIX::LC_NUMERIC(),"$comma");
452                print \$i, "\n";
453                \$i += 1;
454                print \$i, "\n";
455EOF
456                "1,5\n2,5", { stderr => 'devnull' }, "Can do math when radix is a comma"); # [perl 115800]
457
458            SKIP: {
459                skip "Perl not compiled with 'useithreads'", 1 if ! $Config{'useithreads'};
460
461                local $ENV{LC_ALL} = undef;
462                local $ENV{LC_NUMERIC} = $comma;
463                fresh_perl_is(<<"EOF",
464                    use threads;
465
466                    my \$x = eval "1.25";
467                    print "\$x", "\n";  # number is ok before thread
468                    my \$str_x = "\$x";
469
470                    my \$thr = threads->create(sub {});
471                    \$thr->join();
472
473                    print "\$x\n";  # number stringifies the same after thread
474
475                    my \$y = eval "1.25";
476                    print "\$y\n";  # number is ok after threads
477                    print "\$y" eq "\$str_x" || 0;    # new number stringifies the same as old number
478EOF
479                "1.25\n1.25\n1.25\n1", { eval $switches }, "Thread join doesn't disrupt calling thread"
480                ); # [GH 20155]
481            }
482
483          SKIP: {
484            unless ($have_strtod) {
485                skip("no strtod()", 1);
486            }
487            else {
488                fresh_perl_is(<<"EOF",
489                    use POSIX;
490                    POSIX::setlocale(POSIX::LC_NUMERIC(),"$comma");
491                    my \$one_point_5 = POSIX::strtod("1,5");
492                    \$one_point_5 =~ s/0+\$//;  # Remove any trailing zeros
493                    print \$one_point_5, "\n";
494EOF
495                "1.5", { stderr => 'devnull' }, "POSIX::strtod() uses underlying locale");
496            }
497          }
498
499          { # GH #21746
500                local $ENV{LANG} = $comma;
501                fresh_perl_is(<<"EOF",
502                    use POSIX;
503                    POSIX::setlocale(POSIX::LC_ALL(),'');
504                    eval q{ use constant X => \$] };
505                    print \$@;
506EOF
507                "", {},
508                "Properly toggles to radix dot locale");
509          }
510        }
511    }
512
513SKIP: {
514        if ($Config{d_setlocale_accepts_any_locale_name})
515        {
516            skip("Can't distinguish between valid and invalid locale names on this system", 2);
517        }
518        if (! $Config{d_perl_lc_all_uses_name_value_pairs}) {
519            skip("Test only valid when LC_ALL syntax is name=value pairs", 2);
520        }
521
522        my @valid_categories = valid_locale_categories();
523
524        my $valid_string = "";
525        my $invalid_string = "";
526
527        # Deliberately don't include all categories, so as to test this situation
528        for my $i (0 .. @valid_categories - 2) {
529            my $category = $valid_categories[$i];
530            if ($category ne "LC_ALL") {
531                $invalid_string .= ";" if $invalid_string ne "";
532                $invalid_string .= "$category=foo_BAR";
533
534                next unless $non_C_locale;
535                $valid_string .= ";" if $valid_string ne "";
536                $valid_string .= "$category=$non_C_locale";
537            }
538        }
539
540        fresh_perl_is(<<"EOF",
541                use locale;
542                use POSIX;
543                POSIX::setlocale(LC_ALL, "$invalid_string");
544EOF
545            "", { eval $switches },
546            "In setting complicated invalid LC_ALL, final individ category doesn't need a \';'");
547
548        skip("no non-C locale available", 1 ) unless $non_C_locale;
549        fresh_perl_is(<<"EOF",
550                use locale;
551                use POSIX;
552                POSIX::setlocale(LC_ALL, "$valid_string");
553EOF
554            "", { eval $switches },
555            "In setting complicated valid LC_ALL, final individ category doesn't need a \';'");
556    }
557
558}
559
560SKIP:
561{
562    use locale;
563    # look for an English locale (so 'a' < 'B', hopefully)
564    my ($en) = grep { /^en_/ } find_locales( [ 'LC_COLLATE' ]);
565    defined $en
566        or skip "didn't find a suitable locale", 1;
567    POSIX::setlocale(LC_COLLATE, $en);
568    unless ("a" lt "B") {
569        skip "didn't find a suitable locale", 1;
570    }
571    fresh_perl_is(<<'EOF', "ok\n", { args => [ $en ] }, "check for failed assertion");
572use locale ':collate';
573use POSIX qw(setlocale LC_COLLATE);
574if (setlocale(LC_COLLATE, shift)) {
575     my $x = "a";
576     my $y = "B";
577     print $x lt $y ? "ok\n" : "not ok\n";
578     $x = "c"; # should empty the collxfrm magic but not remove it
579     # which the free code asserts on
580}
581else {
582     print "ok\n";
583}
584EOF
585}
586
587SKIP: {   # GH #20085
588    my @utf8_locales = find_utf8_ctype_locales();
589    skip "didn't find a UTF-8 locale", 1 unless @utf8_locales;
590
591    local $ENV{LC_CTYPE} = $utf8_locales[0];
592    local $ENV{LC_ALL} = undef;
593    fresh_perl_is(<<~'EOF', "ok\n", {}, "check that setlocale overrides startup");
594        use POSIX;
595
596        my $a_acute = "\N{LATIN SMALL LETTER A WITH ACUTE}";
597        my $egrave  = "\N{LATIN SMALL LETTER E WITH GRAVE}";
598        my $combo = "$a_acute.$egrave";
599
600        setlocale(&POSIX::LC_ALL, "C");
601        use locale;
602
603        # In a UTF-8 locale, \b matches Latin1 before string, mid, and end
604        if ($combo eq ($combo =~ s/\b/!/gr)) {
605            print "ok\n";
606        }
607        else {
608            print "not ok\n";
609        }
610    EOF
611}
612
613SKIP: {   # GH #20054
614    skip "Even illegal locale names are accepted", 1
615                    if $Config{d_setlocale_accepts_any_locale_name}
616                    && $Config{d_setlocale_accepts_any_locale_name} eq 'define';
617
618    my @lc_all_locales = find_locales('LC_ALL');
619    my $locale = $lc_all_locales[0];
620    skip "LC_ALL not enabled on this platform", 1 unless $locale;
621    my $fallback = ($^O eq "MSWin32")
622                    ? "system default"
623                    : "standard";
624    fresh_perl_like(<<~EOT,
625                        local \$ENV{LC_ALL} = "This is not a legal locale name";
626                        local \$ENV{LANG} = "Nor this neither";
627                        system "\$^X -e1";
628                    EOT
629                    qr/Falling back to the $fallback locale/,
630                    {}, "check that illegal startup environment falls back");
631}
632
633done_testing();
634