xref: /openbsd/gnu/usr.bin/perl/lib/locale_threads.t (revision 5486feef)
15759b3d2Safresh1use strict;
25759b3d2Safresh1use warnings;
35759b3d2Safresh1
45759b3d2Safresh1# This file tests interactions with locale and threads
55759b3d2Safresh1
65759b3d2Safresh1BEGIN {
7*5486feefSafresh1    $| = 1;
8*5486feefSafresh1
95759b3d2Safresh1    chdir 't' if -d 't';
105759b3d2Safresh1    require './test.pl';
115759b3d2Safresh1    set_up_inc('../lib');
12*5486feefSafresh1
135759b3d2Safresh1    skip_all_without_config('useithreads');
14*5486feefSafresh1    skip_all("Fails on threaded builds on OpenBSD")
15*5486feefSafresh1        if ($^O =~ m/^(openbsd)$/);
16*5486feefSafresh1
17*5486feefSafresh1    require './loc_tools.pl';
18*5486feefSafresh1
19*5486feefSafresh1    eval { require POSIX; POSIX->import(qw(errno_h locale_h unistd_h )) };
205759b3d2Safresh1    if ($@) {
215759b3d2Safresh1        skip_all("could not load the POSIX module"); # running minitest?
225759b3d2Safresh1    }
235759b3d2Safresh1}
245759b3d2Safresh1
25*5486feefSafresh1use Time::HiRes qw(time usleep);
26*5486feefSafresh1
27*5486feefSafresh1use Devel::Peek;
28*5486feefSafresh1$Devel::Peek::pv_limit = 0; $Devel::Peek::pv_limit = 0;
29*5486feefSafresh1use Data::Dumper;
30*5486feefSafresh1$Data::Dumper::Sortkeys=1;
31*5486feefSafresh1$Data::Dumper::Useqq = 1;
32*5486feefSafresh1$Data::Dumper::Deepcopy = 1;
33*5486feefSafresh1
34*5486feefSafresh1my $debug = 0;
35*5486feefSafresh1
36*5486feefSafresh1my %map_category_name_to_number;
37*5486feefSafresh1my %map_category_number_to_name;
38*5486feefSafresh1my @valid_categories = valid_locale_categories();
39*5486feefSafresh1foreach my $category (@valid_categories) {
40*5486feefSafresh1    my $cat_num = eval "&POSIX::$category";
41*5486feefSafresh1    die "Can't determine ${category}'s number: $@" if $@;
42*5486feefSafresh1
43*5486feefSafresh1    $map_category_name_to_number{$category} = $cat_num;
44*5486feefSafresh1    $map_category_number_to_name{$cat_num} = $category;
45*5486feefSafresh1}
46*5486feefSafresh1
47*5486feefSafresh1my $LC_ALL;
48*5486feefSafresh1my $LC_ALL_string;
49*5486feefSafresh1if (defined $map_category_name_to_number{LC_ALL}) {
50*5486feefSafresh1    $LC_ALL_string = 'LC_ALL';
51*5486feefSafresh1    $LC_ALL = $map_category_name_to_number{LC_ALL};
52*5486feefSafresh1}
53*5486feefSafresh1elsif (defined $map_category_name_to_number{LC_CTYPE}) {
54*5486feefSafresh1    $LC_ALL_string = 'LC_CTYPE';
55*5486feefSafresh1    $LC_ALL = $map_category_name_to_number{LC_CTYPE};
56*5486feefSafresh1}
57*5486feefSafresh1else {
58*5486feefSafresh1    skip_all("No LC_ALL nor LC_CTYPE");
59*5486feefSafresh1}
60*5486feefSafresh1
615759b3d2Safresh1# reset the locale environment
62*5486feefSafresh1delete local @ENV{'LANGUAGE', 'LANG', keys %map_category_name_to_number};
635759b3d2Safresh1
64*5486feefSafresh1my @locales = find_locales($LC_ALL);
65*5486feefSafresh1skip_all("Couldn't find any locales") if @locales == 0;
66*5486feefSafresh1
67*5486feefSafresh1plan(2);
68*5486feefSafresh1
69*5486feefSafresh1my ($utf8_locales_ref, $non_utf8_locales_ref)
70*5486feefSafresh1                                    = classify_locales_wrt_utf8ness(\@locales);
71*5486feefSafresh1
72*5486feefSafresh1my $official_ascii_name = 'ansi_x341968';
73*5486feefSafresh1
74*5486feefSafresh1my %lang_code_to_script = (     # ISO 639.2, but without the many codes that
75*5486feefSafresh1                                # are for latin (but the few western European
76*5486feefSafresh1                                # ones that are latin1 are included)
77*5486feefSafresh1                            am          => 'amharic',
78*5486feefSafresh1                            amh         => 'amharic',
79*5486feefSafresh1                            amharic     => 'amharic',
80*5486feefSafresh1                            ar          => 'arabic',
81*5486feefSafresh1                            be          => 'cyrillic',
82*5486feefSafresh1                            bel         => 'cyrillic',
83*5486feefSafresh1                            ben         => 'bengali',
84*5486feefSafresh1                            bn          => 'bengali',
85*5486feefSafresh1                            bg          => 'cyrillic',
86*5486feefSafresh1                            bul         => 'cyrillic',
87*5486feefSafresh1                            bulgarski   => 'cyrillic',
88*5486feefSafresh1                            bulgarian   => 'cyrillic',
89*5486feefSafresh1                            c           => $official_ascii_name,
90*5486feefSafresh1                            cnr         => 'cyrillic',
91*5486feefSafresh1                            de          => 'latin_1',
92*5486feefSafresh1                            deu         => 'latin_1',
93*5486feefSafresh1                            deutsch     => 'latin_1',
94*5486feefSafresh1                            german      => 'latin_1',
95*5486feefSafresh1                            div         => 'thaana',
96*5486feefSafresh1                            dv          => 'thaana',
97*5486feefSafresh1                            dzo         => 'tibetan',
98*5486feefSafresh1                            dz          => 'tibetan',
99*5486feefSafresh1                            el          => 'greek',
100*5486feefSafresh1                            ell         => 'greek',
101*5486feefSafresh1                            ellada      => 'greek',
102*5486feefSafresh1                            en          => $official_ascii_name,
103*5486feefSafresh1                            eng         => $official_ascii_name,
104*5486feefSafresh1                            american    => $official_ascii_name,
105*5486feefSafresh1                            british     => $official_ascii_name,
106*5486feefSafresh1                            es          => 'latin_1',
107*5486feefSafresh1                            fa          => 'arabic',
108*5486feefSafresh1                            fas         => 'arabic',
109*5486feefSafresh1                            flamish     => 'latin_1',
110*5486feefSafresh1                            fra         => 'latin_1',
111*5486feefSafresh1                            fr          => 'latin_1',
112*5486feefSafresh1                            heb         => 'hebrew',
113*5486feefSafresh1                            he          => 'hebrew',
114*5486feefSafresh1                            hi          => 'hindi',
115*5486feefSafresh1                            hin         => 'hindi',
116*5486feefSafresh1                            hy          => 'armenian',
117*5486feefSafresh1                            hye         => 'armenian',
118*5486feefSafresh1                            ita         => 'latin_1',
119*5486feefSafresh1                            it          => 'latin_1',
120*5486feefSafresh1                            ja          => 'katakana',
121*5486feefSafresh1                            jpn         => 'katakana',
122*5486feefSafresh1                            nihongo     => 'katakana',
123*5486feefSafresh1                            japanese    => 'katakana',
124*5486feefSafresh1                            ka          => 'georgian',
125*5486feefSafresh1                            kat         => 'georgian',
126*5486feefSafresh1                            kaz         => 'cyrillic',
127*5486feefSafresh1                            khm         => 'khmer',
128*5486feefSafresh1                            kir         => 'cyrillic',
129*5486feefSafresh1                            kk          => 'cyrillic',
130*5486feefSafresh1                            km          => 'khmer',
131*5486feefSafresh1                            ko          => 'hangul',
132*5486feefSafresh1                            kor         => 'hangul',
133*5486feefSafresh1                            korean      => 'hangul',
134*5486feefSafresh1                            ku          => 'arabic',
135*5486feefSafresh1                            kur         => 'arabic',
136*5486feefSafresh1                            ky          => 'cyrillic',
137*5486feefSafresh1                            latin1      => 'latin_1',
138*5486feefSafresh1                            lao         => 'lao',
139*5486feefSafresh1                            lo          => 'lao',
140*5486feefSafresh1                            mk          => 'cyrillic',
141*5486feefSafresh1                            mkd         => 'cyrillic',
142*5486feefSafresh1                            macedonian  => 'cyrillic',
143*5486feefSafresh1                            mn          => 'cyrillic',
144*5486feefSafresh1                            mon         => 'cyrillic',
145*5486feefSafresh1                            mya         => 'myanmar',
146*5486feefSafresh1                            my          => 'myanmar',
147*5486feefSafresh1                            ne          => 'devanagari',
148*5486feefSafresh1                            nep         => 'devanagari',
149*5486feefSafresh1                            nld         => 'latin_1',
150*5486feefSafresh1                            nl          => 'latin_1',
151*5486feefSafresh1                            nederlands  => 'latin_1',
152*5486feefSafresh1                            dutch       => 'latin_1',
153*5486feefSafresh1                            por         => 'latin_1',
154*5486feefSafresh1                            posix       => $official_ascii_name,
155*5486feefSafresh1                            ps          => 'arabic',
156*5486feefSafresh1                            pt          => 'latin_1',
157*5486feefSafresh1                            pus         => 'arabic',
158*5486feefSafresh1                            ru          => 'cyrillic',
159*5486feefSafresh1                            russki      => 'cyrillic',
160*5486feefSafresh1                            russian     => 'cyrillic',
161*5486feefSafresh1                            rus         => 'cyrillic',
162*5486feefSafresh1                            sin         => 'sinhala',
163*5486feefSafresh1                            si          => 'sinhala',
164*5486feefSafresh1                            so          => 'arabic',
165*5486feefSafresh1                            som         => 'arabic',
166*5486feefSafresh1                            spa         => 'latin_1',
167*5486feefSafresh1                            sr          => 'cyrillic',
168*5486feefSafresh1                            srp         => 'cyrillic',
169*5486feefSafresh1                            tam         => 'tamil',
170*5486feefSafresh1                            ta          => 'tamil',
171*5486feefSafresh1                            tg          => 'cyrillic',
172*5486feefSafresh1                            tgk         => 'cyrillic',
173*5486feefSafresh1                            tha         => 'thai',
174*5486feefSafresh1                            th          => 'thai',
175*5486feefSafresh1                            thai        => 'thai',
176*5486feefSafresh1                            ti          => 'ethiopian',
177*5486feefSafresh1                            tir         => 'ethiopian',
178*5486feefSafresh1                            uk          => 'cyrillic',
179*5486feefSafresh1                            ukr         => 'cyrillic',
180*5486feefSafresh1                            ur          => 'arabic',
181*5486feefSafresh1                            urd         => 'arabic',
182*5486feefSafresh1                            zgh         => 'arabic',
183*5486feefSafresh1                            zh          => 'chinese',
184*5486feefSafresh1                            zho         => 'chinese',
185*5486feefSafresh1                        );
186*5486feefSafresh1my %codeset_to_script = (
187*5486feefSafresh1                            88591  => 'latin_1',
188*5486feefSafresh1                            88592  => 'latin_2',
189*5486feefSafresh1                            88593  => 'latin_3',
190*5486feefSafresh1                            88594  => 'latin_4',
191*5486feefSafresh1                            88595  => 'cyrillic',
192*5486feefSafresh1                            88596  => 'arabic',
193*5486feefSafresh1                            88597  => 'greek',
194*5486feefSafresh1                            88598  => 'hebrew',
195*5486feefSafresh1                            88599  => 'latin_5',
196*5486feefSafresh1                            885910 => 'latin_6',
197*5486feefSafresh1                            885911 => 'thai',
198*5486feefSafresh1                            885912 => 'devanagari',
199*5486feefSafresh1                            885913 => 'latin_7',
200*5486feefSafresh1                            885914 => 'latin_8',
201*5486feefSafresh1                            885915 => 'latin_9',
202*5486feefSafresh1                            885916 => 'latin_10',
203*5486feefSafresh1                            cp1251 => 'cyrillic',
204*5486feefSafresh1                            cp1255 => 'hebrew',
205*5486feefSafresh1                      );
206*5486feefSafresh1
207*5486feefSafresh1my %script_priorities = (       # In trying to make the results as distinct as
208*5486feefSafresh1                                # possible, make the ones closest to Unicode,
209*5486feefSafresh1                                # and ASCII lowest priority
210*5486feefSafresh1                            $official_ascii_name => 15,
211*5486feefSafresh1                            latin_1 => 14,
212*5486feefSafresh1                            latin_9 => 13,
213*5486feefSafresh1                            latin_2 => 12,
214*5486feefSafresh1                            latin_4 => 12,
215*5486feefSafresh1                            latin_5 => 12,
216*5486feefSafresh1                            latin_6 => 12,
217*5486feefSafresh1                            latin_7 => 12,
218*5486feefSafresh1                            latin_8 => 12,
219*5486feefSafresh1                            latin_10 => 12,
220*5486feefSafresh1                            latin   => 11,  # Unknown latin version
221*5486feefSafresh1                        );
222*5486feefSafresh1
223*5486feefSafresh1my %script_instances;   # Keys are scripts, values are how many locales use
224*5486feefSafresh1                        # this script.
225*5486feefSafresh1
226*5486feefSafresh1sub analyze_locale_name($) {
227*5486feefSafresh1
228*5486feefSafresh1    # Takes the input name of a locale and creates (and returns) a hash
229*5486feefSafresh1    # containing information about that locale
230*5486feefSafresh1
231*5486feefSafresh1    my %ret;
232*5486feefSafresh1    my $input_locale_name = shift;
233*5486feefSafresh1
234*5486feefSafresh1    my $old_locale = setlocale(LC_CTYPE);
235*5486feefSafresh1
236*5486feefSafresh1    # Often a locale has multiple aliases, and the base one is returned
237*5486feefSafresh1    # by setlocale() when called with an alias.  The base is more likely to
238*5486feefSafresh1    # meet the XPG standards than the alias.
239*5486feefSafresh1    my $new_locale = setlocale(LC_CTYPE, $input_locale_name);
240*5486feefSafresh1    if (! $new_locale) {
241*5486feefSafresh1        diag "Unexpectedly can't setlocale(LC_CTYPE, $new_locale);"
242*5486feefSafresh1           . " \$!=$!, \$^E=$^E";
243*5486feefSafresh1        return;
244*5486feefSafresh1    }
245*5486feefSafresh1
246*5486feefSafresh1    $ret{locale_name} = $new_locale;
247*5486feefSafresh1
248*5486feefSafresh1    # XPG standard for locale names:
249*5486feefSafresh1    #   language[_territory[.codeset]][@modifier]
250*5486feefSafresh1    # But, there are instances which violate this, where there is a codeset
251*5486feefSafresh1    # without a territory, so instead match:
252*5486feefSafresh1    #   language[_territory][.codeset][@modifier]
253*5486feefSafresh1    $ret{locale_name} =~ / ^
254*5486feefSafresh1                                      ( .+? )          # language
255*5486feefSafresh1                              (?:  _  ( .+? ) )?       # territory
256*5486feefSafresh1                              (?: \.  ( .+? ) )?       # codeset
257*5486feefSafresh1                              (?: \@  ( .+  ) )?       # modifier
258*5486feefSafresh1                            $
259*5486feefSafresh1                         /x;
260*5486feefSafresh1
261*5486feefSafresh1    $ret{language}  = $1 // "";
262*5486feefSafresh1    $ret{territory} = $2 // "";
263*5486feefSafresh1    $ret{codeset}   = $3 // "";
264*5486feefSafresh1    $ret{modifier}  = $4 // "";
265*5486feefSafresh1
266*5486feefSafresh1    # Normalize all but 'territory' to lowercase
267*5486feefSafresh1    foreach my $key (qw(language codeset modifier)) {
268*5486feefSafresh1        $ret{$key} = lc $ret{$key};
269*5486feefSafresh1    }
270*5486feefSafresh1
271*5486feefSafresh1    # Often, the codeset is omitted from the locale name, but it is still
272*5486feefSafresh1    # discoverable (via langinfo() ) for the current locale on many platforms.
273*5486feefSafresh1    # We already have switched locales
274*5486feefSafresh1    use I18N::Langinfo qw(langinfo CODESET);
275*5486feefSafresh1    my $langinfo_codeset = lc langinfo(CODESET);
276*5486feefSafresh1
277*5486feefSafresh1    # Now can switch back to the locale current on entry to this sub
278*5486feefSafresh1    if (! setlocale(LC_CTYPE, $old_locale)) {
279*5486feefSafresh1        die "Unexpectedly can't restore locale to $old_locale from"
280*5486feefSafresh1          . " $new_locale; \$!=$!, \$^E=$^E";
281*5486feefSafresh1    }
282*5486feefSafresh1
283*5486feefSafresh1    # Normalize the codesets
284*5486feefSafresh1    foreach my $codeset_ref (\$langinfo_codeset, \$ret{codeset}) {
285*5486feefSafresh1        $$codeset_ref =~ s/\W//g;
286*5486feefSafresh1        $$codeset_ref =~ s/iso8859/8859/g;
287*5486feefSafresh1        $$codeset_ref =~ s/\b65001\b/utf8/;     # Windows synonym
288*5486feefSafresh1        $$codeset_ref =~ s/\b646\b/$official_ascii_name/;
289*5486feefSafresh1        $$codeset_ref =~ s/\busascii\b/$official_ascii_name/;
290*5486feefSafresh1    }
291*5486feefSafresh1
292*5486feefSafresh1    # The langinfo codeset, if found, is considered more reliable than the one
293*5486feefSafresh1    # in the name.  (This is because libc looks into the actual data
294*5486feefSafresh1    # definition.)  So use it unconditionally when found.  But note any
295*5486feefSafresh1    # discrepancy as an aid for improving this test.
296*5486feefSafresh1    if ($langinfo_codeset) {
297*5486feefSafresh1        if ($ret{codeset} && $ret{codeset} ne $langinfo_codeset) {
298*5486feefSafresh1            diag "In $ret{locale_name}, codeset from langinfo"
299*5486feefSafresh1               . " ($langinfo_codeset) doesn't match codeset in"
300*5486feefSafresh1               . " locale_name ($ret{codeset})";
301*5486feefSafresh1        }
302*5486feefSafresh1        $ret{codeset} = $langinfo_codeset;
303*5486feefSafresh1    }
304*5486feefSafresh1
305*5486feefSafresh1    $ret{is_utf8} = 0 + ($ret{codeset} eq 'utf8');
306*5486feefSafresh1
307*5486feefSafresh1    # If the '@' modifier is a known script, use it as the script.
308*5486feefSafresh1    if (    $ret{modifier}
309*5486feefSafresh1        and grep { $_ eq $ret{modifier} } values %lang_code_to_script)
310*5486feefSafresh1    {
311*5486feefSafresh1        $ret{script} = $ret{nominal_script} = $ret{modifier};
312*5486feefSafresh1        $ret{modifier} = "";
313*5486feefSafresh1    }
314*5486feefSafresh1    elsif ($ret{codeset} && ! $ret{is_utf8}) {
315*5486feefSafresh1
316*5486feefSafresh1        # The codeset determines the script being used, except if we don't
317*5486feefSafresh1        # have the codeset, or it is UTF-8 (which covers a multitude of
318*5486feefSafresh1        # scripts).
319*5486feefSafresh1        #
320*5486feefSafresh1        # We have hard-coded the scripts corresponding to a few of these
321*5486feefSafresh1        # non-UTF-8 codesets.  See if this is one of them.
322*5486feefSafresh1        $ret{script} = $codeset_to_script{$ret{codeset}};
323*5486feefSafresh1        if ($ret{script}) {
324*5486feefSafresh1
325*5486feefSafresh1            # For these, the script is likely a combination of ASCII (from
326*5486feefSafresh1            # 0-127), and the script from (128-255).  Reflect that in the name
327*5486feefSafresh1            # used (for distinguishing below)
328*5486feefSafresh1            $ret{script} .= '_' . $official_ascii_name;
329*5486feefSafresh1        }
330*5486feefSafresh1        elsif ($ret{codeset} =~ /^koi/) {   # Another common set.
331*5486feefSafresh1            $ret{script} = "cyrillic_${official_ascii_name}";
332*5486feefSafresh1        }
333*5486feefSafresh1        else {  # Here the codeset name is unknown to us.  Just assume it
334*5486feefSafresh1                # means a whole new script.  Add the language at the end of
335*5486feefSafresh1                # the name to further make it distinct
336*5486feefSafresh1            $ret{script} = $ret{codeset};
337*5486feefSafresh1            $ret{script} .= "_$ret{language}"
338*5486feefSafresh1                                    if $ret{codeset} !~ /$official_ascii_name/;
339*5486feefSafresh1        }
340*5486feefSafresh1    }
341*5486feefSafresh1    else {  # Here, the codeset is unknown or is UTF-8.
342*5486feefSafresh1
343*5486feefSafresh1        # In these cases look up the script based on the language.  The table
344*5486feefSafresh1        # is meant to be pretty complete, but omits the many scripts that are
345*5486feefSafresh1        # ASCII or Latin1.  And it omits the fullnames of languages whose
346*5486feefSafresh1        # scripts are themselves.  The grep below catches those.  Defaulting
347*5486feefSafresh1        # to Latin means that a non-standard language name is considered to be
348*5486feefSafresh1        # latin -- maybe not the best outcome but what else is better?
349*5486feefSafresh1        $ret{script} = $lang_code_to_script{$ret{language}};
350*5486feefSafresh1        if (! $ret{script}) {
351*5486feefSafresh1            $ret{script} = (grep { $ret{language} eq $_ }
352*5486feefSafresh1                                                    values %lang_code_to_script)
353*5486feefSafresh1                            ? $ret{language}
354*5486feefSafresh1                            : 'latin';
355*5486feefSafresh1        }
356*5486feefSafresh1    }
357*5486feefSafresh1
358*5486feefSafresh1    # If we have @euro, and the script is ASCII or latin or latin1, change it
359*5486feefSafresh1    # into latin9, which is closer to what is going on.  latin9 has a few
360*5486feefSafresh1    # other differences from latin1, but it's not worth creating a whole new
361*5486feefSafresh1    # script type that differs only in the currency symbol.
362*5486feefSafresh1    if (  ($ret{modifier} && $ret{modifier} eq 'euro')
363*5486feefSafresh1        && $ret{script} =~ / ^ ($official_ascii_name | latin (_1)? ) $ /x)
364*5486feefSafresh1    {
365*5486feefSafresh1        $ret{script} = 'latin_9';
366*5486feefSafresh1    }
367*5486feefSafresh1
368*5486feefSafresh1    #  Look up the priority of this script.  All the non-listed ones have
369*5486feefSafresh1    #  highest (0 or 1) priority.  We arbitrarily make the ones higher
370*5486feefSafresh1    #  priority (0) that aren't known to be half-ascii, simply because they
371*5486feefSafresh1    #  might be entirely different than most locales.
372*5486feefSafresh1    $ret{priority} = $script_priorities{$ret{script}};
373*5486feefSafresh1    if (! $ret{priority}) {
374*5486feefSafresh1        $ret{priority} = (   $ret{script} ne $official_ascii_name
375*5486feefSafresh1                          && $ret{script} =~ $official_ascii_name)
376*5486feefSafresh1                         ? 0
377*5486feefSafresh1                         : 1;
378*5486feefSafresh1    }
379*5486feefSafresh1
380*5486feefSafresh1    # Script names have been set up so that anything after an underscore is a
381*5486feefSafresh1    # modifier of the main script.  We keep a counter of which occurence of
382*5486feefSafresh1    # this script this is.  This is used along with the priority to order the
383*5486feefSafresh1    # locales so that the characters are as varied as possible.
384*5486feefSafresh1    my $script_root = ($ret{script} =~ s/_.*//r) . "_$ret{is_utf8}";
385*5486feefSafresh1    $ret{script_instance} = $script_instances{$script_root}++;
386*5486feefSafresh1
387*5486feefSafresh1    return \%ret;
388*5486feefSafresh1}
389*5486feefSafresh1
390*5486feefSafresh1# Prioritize locales that are most unlike the standard C/Latin1-ish ones.
391*5486feefSafresh1# This is to minimize getting passes for tests on a category merely because
392*5486feefSafresh1# they share many of the same characteristics as the locale of another
393*5486feefSafresh1# category simultaneously in effect.
394*5486feefSafresh1sub sort_locales ()
395*5486feefSafresh1{
396*5486feefSafresh1    my $cmp =  $a->{script_instance} <=> $b->{script_instance};
397*5486feefSafresh1    return $cmp if $cmp;
398*5486feefSafresh1
399*5486feefSafresh1    $cmp =  $a->{priority} <=> $b->{priority};
400*5486feefSafresh1    return $cmp if $cmp;
401*5486feefSafresh1
402*5486feefSafresh1    $cmp =  $a->{script} cmp $b->{script};
403*5486feefSafresh1    return $cmp if $cmp;
404*5486feefSafresh1
405*5486feefSafresh1    $cmp =  $a->{modifier} cmp $b->{modifier};
406*5486feefSafresh1    return $cmp if $cmp;
407*5486feefSafresh1
408*5486feefSafresh1    $cmp =  $a->{codeset} cmp $b->{codeset};
409*5486feefSafresh1    return $cmp if $cmp;
410*5486feefSafresh1
411*5486feefSafresh1    $cmp =  $a->{territory} cmp $b->{territory};
412*5486feefSafresh1    return $cmp if $cmp;
413*5486feefSafresh1
414*5486feefSafresh1    return lc $a cmp lc $b;
415*5486feefSafresh1}
416*5486feefSafresh1
417*5486feefSafresh1# Find out extra info about each locale
418*5486feefSafresh1my @cleaned_up_locales;
419*5486feefSafresh1for my $locale (@locales) {
420*5486feefSafresh1    my $locale_struct = analyze_locale_name($locale);
421*5486feefSafresh1
422*5486feefSafresh1    next unless $locale_struct;
423*5486feefSafresh1
424*5486feefSafresh1    my $name = $locale_struct->{locale_name};
425*5486feefSafresh1    next if grep { $name eq $_->{locale_name} } @cleaned_up_locales;
426*5486feefSafresh1
427*5486feefSafresh1    push @cleaned_up_locales, $locale_struct;
428*5486feefSafresh1}
429*5486feefSafresh1
430*5486feefSafresh1@locales = @cleaned_up_locales;
431*5486feefSafresh1
432*5486feefSafresh1# Without a proper codeset, we can't really know how to test.  This should
433*5486feefSafresh1# only happen on platforms that lack the ability to determine the codeset.
434*5486feefSafresh1@locales = grep { $_->{codeset} ne "" } @locales;
435*5486feefSafresh1
436*5486feefSafresh1# Sort into priority order.
437*5486feefSafresh1@locales = sort sort_locales @locales;
438*5486feefSafresh1
439*5486feefSafresh1# First test
4405759b3d2Safresh1SKIP: { # perl #127708
441*5486feefSafresh1    my $locale = $locales[0];
442*5486feefSafresh1    skip("No valid locale to test with", 1) if $locale->{codeset} eq
443*5486feefSafresh1                                                          $official_ascii_name;
444*5486feefSafresh1    local $ENV{LC_MESSAGES} = $locale->{locale_name};
4455759b3d2Safresh1
4465759b3d2Safresh1    # We're going to try with all possible error numbers on this platform
4475759b3d2Safresh1    my $error_count = keys(%!) + 1;
4485759b3d2Safresh1
4495759b3d2Safresh1    print fresh_perl("
4505759b3d2Safresh1        use threads;
4515759b3d2Safresh1        use strict;
4525759b3d2Safresh1        use warnings;
453*5486feefSafresh1        use Time::HiRes qw(usleep);
4545759b3d2Safresh1
4555759b3d2Safresh1        my \$errnum = 1;
4565759b3d2Safresh1
4575759b3d2Safresh1        my \@threads = map +threads->create(sub {
458*5486feefSafresh1            usleep 0.1;
459*5486feefSafresh1            'threads'->yield();
4605759b3d2Safresh1
4615759b3d2Safresh1            for (1..5_000) {
4625759b3d2Safresh1                \$errnum = (\$errnum + 1) % $error_count;
4635759b3d2Safresh1                \$! = \$errnum;
4645759b3d2Safresh1
4655759b3d2Safresh1                # no-op to trigger stringification
4665759b3d2Safresh1                next if \"\$!\" eq \"\";
4675759b3d2Safresh1            }
4685759b3d2Safresh1        }), (0..1);
4695759b3d2Safresh1        \$_->join for splice \@threads;",
4705759b3d2Safresh1    {}
4715759b3d2Safresh1    );
4725759b3d2Safresh1
4735759b3d2Safresh1    pass("Didn't segfault");
4745759b3d2Safresh1}
4755759b3d2Safresh1
476*5486feefSafresh1# Second test setup
477*5486feefSafresh1my %locale_name_to_object;
478*5486feefSafresh1for my $locale (@locales) {
479*5486feefSafresh1    $locale_name_to_object{$locale->{locale_name}} = $locale;
480*5486feefSafresh1}
481*5486feefSafresh1
482*5486feefSafresh1sub sort_by_hashed_locale {
483*5486feefSafresh1    local $a = $locale_name_to_object{$a};
484*5486feefSafresh1    local $b = $locale_name_to_object{$b};
485*5486feefSafresh1
486*5486feefSafresh1    return sort_locales;
487*5486feefSafresh1}
488*5486feefSafresh1
489*5486feefSafresh1sub min {
490*5486feefSafresh1    my ($a, $b) = @_;
491*5486feefSafresh1    return $a if $a <= $b;
492*5486feefSafresh1    return $b;
493*5486feefSafresh1}
494*5486feefSafresh1
495*5486feefSafresh1# Smokes have shown this to be about the maximum numbers some platforms can
496*5486feefSafresh1# handle.  khw has tried 500 threads/1000 iterations on Linux
497*5486feefSafresh1my $thread_count = 15;
498*5486feefSafresh1my $iterations = 100;
499*5486feefSafresh1
500*5486feefSafresh1my $alarm_clock = (1 * 10 * 60);    # A long time, just to prevent hanging
501*5486feefSafresh1
502*5486feefSafresh1# Chunk the iterations, so that every so often the test comes up for air.
503*5486feefSafresh1my $iterations_per_test_set = min(30, int($iterations / 5));
504*5486feefSafresh1$iterations_per_test_set = 1 if $iterations_per_test_set == 0;
505*5486feefSafresh1
506*5486feefSafresh1# Sometimes the test calls setlocale() for each individual locale category.
507*5486feefSafresh1# But every this many threads, it will be called just once, using LC_ALL to
508*5486feefSafresh1# specify the categories.  This way both setting individual categories and
509*5486feefSafresh1# LC_ALL get tested.  But skip this nicety on platforms where we are restricted from
510*5486feefSafresh1# using all the available categories, as it would make the code more complex
511*5486feefSafresh1# for not that much gain.
512*5486feefSafresh1my @platform_categories = platform_locale_categories();
513*5486feefSafresh1my $lc_all_frequency =  scalar @platform_categories == scalar @valid_categories
514*5486feefSafresh1                        ? 3
515*5486feefSafresh1                        : -1;
516*5486feefSafresh1
517*5486feefSafresh1# To avoid things getting too big; skip tests whose results are larger than
518*5486feefSafresh1# this many characters.
519*5486feefSafresh1my $max_result_length = 10000;
520*5486feefSafresh1
521*5486feefSafresh1# Estimate as to how long in seconds to allow a thread to be ready to roll
522*5486feefSafresh1# after creation, so as to try to get all the threads to start as
523*5486feefSafresh1# simultaneously as possible
524*5486feefSafresh1my $per_thread_startup = .18;
525*5486feefSafresh1
526*5486feefSafresh1# For use in experimentally tuning the above value
527*5486feefSafresh1my $die_on_negative_sleep = 1;
528*5486feefSafresh1
529*5486feefSafresh1# We don't need to test every possible errno, but you could change this to do
530*5486feefSafresh1# so by setting it to negative
531*5486feefSafresh1my $max_message_catalog_entries = 10;
532*5486feefSafresh1
533*5486feefSafresh1# December 18, 1987
534*5486feefSafresh1my $strftime_args = "'%c', 0, 0, , 12, 18, 11, 87";
535*5486feefSafresh1
536*5486feefSafresh1my %distincts;  # The distinct 'operation => result' cases
537*5486feefSafresh1my %op_counts;  # So we can bail early if more test cases than threads
538*5486feefSafresh1my $separator = '____';     # The operation and result are often melded into a
539*5486feefSafresh1                            # string separated by this.
540*5486feefSafresh1
541*5486feefSafresh1sub pack_op_result($$) {
542*5486feefSafresh1    my ($op, $result) = @_;
543*5486feefSafresh1    return $op . $separator
544*5486feefSafresh1         . (0 + utf8::is_utf8($op)) . $separator
545*5486feefSafresh1         . $result . $separator
546*5486feefSafresh1         . (0 + utf8::is_utf8($result));
547*5486feefSafresh1}
548*5486feefSafresh1
549*5486feefSafresh1sub fixup_utf8ness($$) {
550*5486feefSafresh1    my ($operand, $utf8ness) = @_;
551*5486feefSafresh1
552*5486feefSafresh1    # Make sure $operand is encoded properly
553*5486feefSafresh1
554*5486feefSafresh1    if ($utf8ness + 0 != 0 + utf8::is_utf8($$operand)) {
555*5486feefSafresh1        if ($utf8ness) {
556*5486feefSafresh1            utf8::upgrade($$operand);
557*5486feefSafresh1        }
558*5486feefSafresh1        else {
559*5486feefSafresh1            utf8::downgrade($$operand);
560*5486feefSafresh1        }
561*5486feefSafresh1    }
562*5486feefSafresh1}
563*5486feefSafresh1
564*5486feefSafresh1sub unpack_op_result($) {
565*5486feefSafresh1    my $op_result = shift;
566*5486feefSafresh1
567*5486feefSafresh1    my ($op, $op_utf8ness, $result, $result_utf8ness) =
568*5486feefSafresh1                                            split $separator, $op_result;
569*5486feefSafresh1    fixup_utf8ness(\$op, $op_utf8ness);
570*5486feefSafresh1    fixup_utf8ness(\$result, $result_utf8ness);
571*5486feefSafresh1
572*5486feefSafresh1    return ($op, $result);
573*5486feefSafresh1}
574*5486feefSafresh1
575*5486feefSafresh1sub add_trials($$;$)
576*5486feefSafresh1{
577*5486feefSafresh1    # Add a test case for category $1.
578*5486feefSafresh1    # $2 is the test case operation to perform
579*5486feefSafresh1    # $3 is a constraint, optional.
580*5486feefSafresh1
581*5486feefSafresh1    my $category_name = shift;
582*5486feefSafresh1    my $input_op = shift;                   # The eval string to perform
583*5486feefSafresh1    my $locale_constraint = shift // "";    # If defined, the test will be
584*5486feefSafresh1                                            # created only for locales that
585*5486feefSafresh1                                            # match this
586*5486feefSafresh1  LOCALE:
587*5486feefSafresh1    foreach my $locale (@locales) {
588*5486feefSafresh1        my $locale_name = $locale->{locale_name};
589*5486feefSafresh1        my $op = $input_op;
590*5486feefSafresh1
591*5486feefSafresh1        # All categories should be set to the same locale to make sure
592*5486feefSafresh1        # this test gets the valid results.
593*5486feefSafresh1        next unless setlocale($LC_ALL, $locale_name);
594*5486feefSafresh1
595*5486feefSafresh1        # As of NetBSD 10, it doesn't implement LC_COLLATE, and setting that
596*5486feefSafresh1        # category to anything but C or POSIX fails.  But setting LC_ALL to
597*5486feefSafresh1        # other locales (as we just did) returns success, while leaving
598*5486feefSafresh1        # LC_COLLATE untouched.  Therefore, also set the category individually
599*5486feefSafresh1        # to catch such things.  This problem may not be confined to NetBSD.
600*5486feefSafresh1        # This also works if the platform lacks LC_ALL.  We at least set
601*5486feefSafresh1        # LC_CTYPE (via '$LC_ALL' above) besides the category.
602*5486feefSafresh1        next unless setlocale($map_category_name_to_number{$category_name},
603*5486feefSafresh1                              $locale_name);
604*5486feefSafresh1
605*5486feefSafresh1        # Use a placeholder if this test requires a particular constraint,
606*5486feefSafresh1        # which isn't met in this case.
607*5486feefSafresh1        if ($locale_constraint) {
608*5486feefSafresh1            if ($locale_constraint eq 'utf8_only') {
609*5486feefSafresh1                next if ! $locale->{is_utf8};
610*5486feefSafresh1            }
611*5486feefSafresh1            elsif ($locale_constraint eq 'a<b') {
612*5486feefSafresh1                my $result = eval "use locale; 'a' lt 'B'";
613*5486feefSafresh1                die "$category_name: '$op (a lt B)': $@" if $@;
614*5486feefSafresh1                next unless $result;
615*5486feefSafresh1            }
616*5486feefSafresh1            else {
617*5486feefSafresh1                die "Only accepted locale constraints are 'utf8_only' and 'a<b'"
618*5486feefSafresh1            }
619*5486feefSafresh1        }
620*5486feefSafresh1
621*5486feefSafresh1        # Calculate what the expected value of the test should be.  We're
622*5486feefSafresh1        # doing this here in the main thread and with all the locales set to
623*5486feefSafresh1        # be the same thing.  The test will be that we should get this value
624*5486feefSafresh1        # under stress, with each thread using different locales for each
625*5486feefSafresh1        # category, and multiple threads simultaneously executing with
626*5486feefSafresh1        # disparate locales
627*5486feefSafresh1        my $eval_string = ($op) ? "use locale; $op;" : "";
628*5486feefSafresh1        my $result = eval $eval_string;
629*5486feefSafresh1        die "$category_name: '$op': $@" if $@;
630*5486feefSafresh1        if (! defined $result) {
631*5486feefSafresh1            if ($debug) {
632*5486feefSafresh1                print STDERR __FILE__, ": ", __LINE__,
633*5486feefSafresh1                             ": Undefined result for $locale_name",
634*5486feefSafresh1                             " $category_name: '$op'\n";
635*5486feefSafresh1            }
636*5486feefSafresh1            next;
637*5486feefSafresh1        }
638*5486feefSafresh1        elsif ($debug > 1) {
639*5486feefSafresh1            print STDERR "\n", __FILE__, ": ", __LINE__, ": $category_name:",
640*5486feefSafresh1                         " $locale_name: Op = ", Dumper($op), "; Returned ";
641*5486feefSafresh1            Dump $result;
642*5486feefSafresh1        }
643*5486feefSafresh1        if (length $result > $max_result_length) {
644*5486feefSafresh1            diag("For $locale_name, '$op', result is too long; skipped");
645*5486feefSafresh1            next;
646*5486feefSafresh1        }
647*5486feefSafresh1
648*5486feefSafresh1        # It seems best to not include tests with mojibake results, which here
649*5486feefSafresh1        # is checked for by two question marks in a row.  (strxfrm is excluded
650*5486feefSafresh1        # from this restriction, as the result is really binary, so '??' could
651*5486feefSafresh1        # and does come up, not meaning mojibake.)  A concrete example of this
652*5486feefSafresh1        # is in Mingw the locale Yi_China.1252.  CP 1252 is for a Latin
653*5486feefSafresh1        # script; just about anything from an East Asian script is bound to
654*5486feefSafresh1        # fail.  It makes no sense to have this locale, but it exists.
655*5486feefSafresh1        if ($eval_string !~ /xfrm/ && $result =~ /\?\?/) {
656*5486feefSafresh1            if ($debug)  {
657*5486feefSafresh1                print STDERR __FILE__, ": ", __LINE__,
658*5486feefSafresh1                  " For $locale_name, op=$op, result has mojibake: $result\n";
659*5486feefSafresh1            }
660*5486feefSafresh1
661*5486feefSafresh1            next;
662*5486feefSafresh1        }
663*5486feefSafresh1
664*5486feefSafresh1        # Some systems are buggy in that setlocale() gives non-deterministic
665*5486feefSafresh1        # results for some locales.   Here we try to exclude those from our
666*5486feefSafresh1        # test by trying the setlocale this many times to see if it varies:
667*5486feefSafresh1        my $deterministic_trial_count = 5;
668*5486feefSafresh1
669*5486feefSafresh1        # To do this, we set the locale to an 'alternate' locale between
670*5486feefSafresh1        # trials.  This defeats any attempt by the implementation to skip the
671*5486feefSafresh1        # setlocale if it is already in said locale.
672*5486feefSafresh1        my $alternate;
673*5486feefSafresh1        my @alternate;
674*5486feefSafresh1
675*5486feefSafresh1        # If possible, the alternate is chosen to be of the opposite UTF8ness,
676*5486feefSafresh1        # so as to reset internal states about that.
677*5486feefSafresh1        if (! $utf8_locales_ref || ! $utf8_locales_ref->@*) {
678*5486feefSafresh1
679*5486feefSafresh1            # If no UTF-8 locales, must choose one that is non-UTF-8.
680*5486feefSafresh1            @alternate = grep { $_ ne $locale_name } $non_utf8_locales_ref->@*;
681*5486feefSafresh1        }
682*5486feefSafresh1        elsif (! $non_utf8_locales_ref || ! $non_utf8_locales_ref->@*) {
683*5486feefSafresh1
684*5486feefSafresh1            # If no non-UTF-8 locales, must choose one that is UTF-8.
685*5486feefSafresh1            @alternate = grep { $_ ne $locale_name } $utf8_locales_ref->@*;
686*5486feefSafresh1        }
687*5486feefSafresh1        elsif (grep { $_ eq $locale_name } $utf8_locales_ref->@*) {
688*5486feefSafresh1            @alternate = $non_utf8_locales_ref->@*;
689*5486feefSafresh1        }
690*5486feefSafresh1        else {
691*5486feefSafresh1            @alternate = $utf8_locales_ref->@*;
692*5486feefSafresh1        }
693*5486feefSafresh1
694*5486feefSafresh1        # Now do the trials.  For each, we choose the next alternate on the
695*5486feefSafresh1        # list, rotating the list so the following iteration will choose a
696*5486feefSafresh1        # different alternate.
697*5486feefSafresh1        for my $i (1 .. $deterministic_trial_count - 1) {
698*5486feefSafresh1            my $other = shift @alternate;
699*5486feefSafresh1            push @alternate, $other;
700*5486feefSafresh1
701*5486feefSafresh1            # Run the test on the alternate locale
702*5486feefSafresh1            if (! setlocale($LC_ALL, $other)) {
703*5486feefSafresh1                if (   $LC_ALL_string eq 'LC_ALL'
704*5486feefSafresh1                    || ! setlocale($map_category_name_to_number{$category_name},
705*5486feefSafresh1                                   $other))
706*5486feefSafresh1                {
707*5486feefSafresh1                    die "Unexpectedly can't set locale to $other:"
708*5486feefSafresh1                      . " \$!=$!, \$^E=$^E";
709*5486feefSafresh1                }
710*5486feefSafresh1            }
711*5486feefSafresh1
712*5486feefSafresh1            eval $eval_string;
713*5486feefSafresh1
714*5486feefSafresh1            # Then run it on the one we are hoping to test
715*5486feefSafresh1            if (! setlocale($LC_ALL, $locale_name)) {
716*5486feefSafresh1                if (   $LC_ALL_string eq 'LC_ALL'
717*5486feefSafresh1                    || ! setlocale($map_category_name_to_number{$category_name},
718*5486feefSafresh1                                   $locale_name))
719*5486feefSafresh1                {
720*5486feefSafresh1                    die "Unexpectedly can't set locale to $locale_name from "
721*5486feefSafresh1                      . setlocale($LC_ALL)
722*5486feefSafresh1                      . "; \$!=$!, \$^E=$^E";
723*5486feefSafresh1                }
724*5486feefSafresh1            }
725*5486feefSafresh1
726*5486feefSafresh1            my $got = eval $eval_string;
727*5486feefSafresh1            next if $got eq $result
728*5486feefSafresh1                 && utf8::is_utf8($got) == utf8::is_utf8($result);
729*5486feefSafresh1
730*5486feefSafresh1            # If the result varied from the expected value, this is a
731*5486feefSafresh1            # non-deterministic locale, so, don't test it.
732*5486feefSafresh1            diag("For '$eval_string',\nresults in iteration $i differed from"
733*5486feefSafresh1               . " the original\ngot");
734*5486feefSafresh1            Dump($got);
735*5486feefSafresh1            diag("expected");
736*5486feefSafresh1            Dump($result);
737*5486feefSafresh1            next LOCALE;
738*5486feefSafresh1        }
739*5486feefSafresh1
740*5486feefSafresh1        # Here, the setlocale for this locale appears deterministic.  Use it.
741*5486feefSafresh1        my $op_result = pack_op_result($op, $result);
742*5486feefSafresh1        push $distincts{$category_name}{$op_result}{locales}->@*, $locale_name;
743*5486feefSafresh1        # No point in looking beyond this if we already have all the tests we
744*5486feefSafresh1        # need.  Note this assumes that the same op isn't used in two
745*5486feefSafresh1        # categories.
746*5486feefSafresh1        if (defined $op_counts{$op} && $op_counts{$op} >= $thread_count)
747*5486feefSafresh1        {
748*5486feefSafresh1            last;
749*5486feefSafresh1        }
750*5486feefSafresh1    }
751*5486feefSafresh1}
752*5486feefSafresh1
753*5486feefSafresh1use Config;
754*5486feefSafresh1
755*5486feefSafresh1# Figure out from config how to represent disparate LC_ALL
756*5486feefSafresh1my @valid_category_numbers = sort { $a <=> $b }
757*5486feefSafresh1                    map { $map_category_name_to_number{$_} } @valid_categories;
758*5486feefSafresh1
759*5486feefSafresh1my $use_name_value_pairs = defined $Config{d_perl_lc_all_uses_name_value_pairs};
760*5486feefSafresh1my $lc_all_separator = ($use_name_value_pairs)
761*5486feefSafresh1                       ? ";"
762*5486feefSafresh1                       : $Config{perl_lc_all_separator} =~ s/"//gr;
763*5486feefSafresh1my @position_to_category_number;
764*5486feefSafresh1if (! $use_name_value_pairs) {
765*5486feefSafresh1    my $positions = $Config{perl_lc_all_category_positions_init} =~ s/[{}]//gr;
766*5486feefSafresh1    $positions =~ s/,//g;
767*5486feefSafresh1    $positions =~ s/^ +//;
768*5486feefSafresh1    $positions =~ s/ +$//;
769*5486feefSafresh1    @position_to_category_number = split / \s+ /x, $positions
770*5486feefSafresh1}
771*5486feefSafresh1
772*5486feefSafresh1sub get_next_category() {
773*5486feefSafresh1    use feature 'state';
774*5486feefSafresh1    state $index;
775*5486feefSafresh1
776*5486feefSafresh1    # Called to rotate all the legal locale categories
777*5486feefSafresh1
778*5486feefSafresh1    my $which = ($use_name_value_pairs)
779*5486feefSafresh1                ? \@valid_category_numbers
780*5486feefSafresh1                : \@position_to_category_number;
781*5486feefSafresh1
782*5486feefSafresh1    $index = -1 unless defined $index;
783*5486feefSafresh1    $index++;
784*5486feefSafresh1
785*5486feefSafresh1    if (! defined $which->[$index]) {
786*5486feefSafresh1        undef $index;
787*5486feefSafresh1        return;
788*5486feefSafresh1    }
789*5486feefSafresh1
790*5486feefSafresh1    my $category_number = $which->[$index];
791*5486feefSafresh1    return $category_number if $category_number != $LC_ALL;
792*5486feefSafresh1
793*5486feefSafresh1    # If this was LC_ALL, the next one won't be
794*5486feefSafresh1    return &get_next_category();
795*5486feefSafresh1}
796*5486feefSafresh1
7975759b3d2Safresh1SKIP: {
798*5486feefSafresh1    skip("Unsafe locale threads", 1) unless ${^SAFE_LOCALES};
7995759b3d2Safresh1
800*5486feefSafresh1    # The second test is several threads nearly simulataneously executing
801*5486feefSafresh1    # locale-sensitive operations with the categories set to disparate
802*5486feefSafresh1    # locales.  This catches cases where the results of a given category is
803*5486feefSafresh1    # related to what the locale is of another category.  (As an example, this
804*5486feefSafresh1    # test showed that some platforms require LC_CTYPE to be the same as
805*5486feefSafresh1    # LC_COLLATION, and/or LC_MESSAGES for proper results, so that Perl had to
806*5486feefSafresh1    # change to bring these into congruence under the hood).  And it also
807*5486feefSafresh1    # catches where there is interference between multiple threads.
808*5486feefSafresh1    #
809*5486feefSafresh1    # This test tries to exercise every underlying locale-dependent operation
810*5486feefSafresh1    # available in Perl.  It doesn't test every use of the operation, but
811*5486feefSafresh1    # includes some Perl construct that uses each.  For example, it tests lc
812*5486feefSafresh1    # but not lcfirst.  That would be redundant for this test; it wants to
813*5486feefSafresh1    # know if lowercasing works under threads and locales.  But if the
814*5486feefSafresh1    # implementations were disjoint at the time this test was written, it
815*5486feefSafresh1    # would try each implementation.  So, various things in the POSIX module
816*5486feefSafresh1    # have separate tests from the ones in core.
817*5486feefSafresh1    #
818*5486feefSafresh1    # For each such underlying locale-dependent operation, a Perl-visible
819*5486feefSafresh1    # construct is chosen that uses it.  And a typical input or set of inputs
820*5486feefSafresh1    # is passed to that and the results are noted for every available locale
821*5486feefSafresh1    # on the platform.  Many locales will have identical results, so the
822*5486feefSafresh1    # duplicates are stored separately.
823*5486feefSafresh1    #
824*5486feefSafresh1    # There will be N simultaneous threads.  Each thread is configured to set
825*5486feefSafresh1    # a locale for each category, to run operations whose results depend on
826*5486feefSafresh1    # that locale, then check that the result matches the expected value, and
827*5486feefSafresh1    # to immediately repeat some largish number of iterations.  The goal is to
828*5486feefSafresh1    # see if the locales on each thread are truly independent of those on the
829*5486feefSafresh1    # other threads.
830*5486feefSafresh1    #
831*5486feefSafresh1    # To that end, the locales are chosen so that the results differ from
832*5486feefSafresh1    # every other locale.  Otherwise, the thread results wouldn't be truly
833*5486feefSafresh1    # independent.  But if there are more threads than there are distinct
834*5486feefSafresh1    # results, duplicates are used to fill up what would otherwise be empty
835*5486feefSafresh1    # slots.  That is the best we can do on those platforms.
836*5486feefSafresh1    #
837*5486feefSafresh1    # Having lots of locales to continually switch between stresses things so
838*5486feefSafresh1    # as to find potential segfaults where locale changing isn't really thread
839*5486feefSafresh1    # safe.
8405759b3d2Safresh1
841*5486feefSafresh1    # There is a bug in older Windows runtimes in which locales in CP1252 and
842*5486feefSafresh1    # similar code pages whose names aren't entirely ASCII aren't recognized
843*5486feefSafresh1    # by later setlocales.  Some names that are all ASCII are synonyms for
844*5486feefSafresh1    # such names.  Weed those out by doing a setlocale of the original name,
845*5486feefSafresh1    # and then a setlocale of the resulting one.  Discard locales which have
846*5486feefSafresh1    # any unacceptable name
847*5486feefSafresh1    if (${^O} eq "MSWin32" && $Config{'libc'} !~ /ucrt/) {
848*5486feefSafresh1        @locales = grep {
849*5486feefSafresh1            my $locale_name = $_->{locale_name};
850*5486feefSafresh1            my $underlying_name = setlocale(&LC_CTYPE, $locale_name);
851*5486feefSafresh1
852*5486feefSafresh1            # Defeat any attempt to skip the setlocale if the same as current,
853*5486feefSafresh1            # by switching to a locale very unlikey to be the current one.
854*5486feefSafresh1            setlocale($LC_ALL, "Albanian");
855*5486feefSafresh1
856*5486feefSafresh1            defined($underlying_name) && setlocale(&LC_CTYPE, $underlying_name)
857*5486feefSafresh1        } @locales;
8585759b3d2Safresh1    }
8595759b3d2Safresh1
860*5486feefSafresh1    # Create a hash of the errnos:
861*5486feefSafresh1    #          "1" => "Operation\\ not\\ permitted",
862*5486feefSafresh1    #          "2" => "No\\ such\\ file\\ or\\ directory",
863*5486feefSafresh1    #          etc.
864*5486feefSafresh1    my %msg_catalog;
865*5486feefSafresh1    foreach my $error (sort keys %!) {
866*5486feefSafresh1        my $number = eval "Errno::$error";
867*5486feefSafresh1        $! = $number;
868*5486feefSafresh1        my $description = "$!";
869*5486feefSafresh1        next unless "$description";
870*5486feefSafresh1        $msg_catalog{$number} = quotemeta "$description";
8715759b3d2Safresh1    }
8725759b3d2Safresh1
873*5486feefSafresh1    # Then just the errnos.
874*5486feefSafresh1    my @msg_catalog = sort { $a <=> $b } keys %msg_catalog;
875*5486feefSafresh1
876*5486feefSafresh1    # Remove the excess ones.
877*5486feefSafresh1    splice @msg_catalog, $max_message_catalog_entries
878*5486feefSafresh1                                          if $max_message_catalog_entries >= 0;
879*5486feefSafresh1    my $msg_catalog = join ',', @msg_catalog;
880*5486feefSafresh1
881*5486feefSafresh1    eval  { my $discard = POSIX::localeconv()->{currency_symbol}; };
882*5486feefSafresh1    my $has_localeconv = $@ eq "";
883*5486feefSafresh1
884*5486feefSafresh1    # Now go through and create tests for each locale category on the system.
885*5486feefSafresh1    # These tests were determined by grepping through the code base for
886*5486feefSafresh1    # locale-sensitive operations, and then figuring out something to exercise
887*5486feefSafresh1    # them.
888*5486feefSafresh1    foreach my $category (@valid_categories) {
889*5486feefSafresh1        no warnings 'uninitialized';
890*5486feefSafresh1
891*5486feefSafresh1        next if $category eq 'LC_ALL';  # Tested below as a combination of the
892*5486feefSafresh1                                        # individual categories
893*5486feefSafresh1        if ($category eq 'LC_COLLATE') {
894*5486feefSafresh1            add_trials('LC_COLLATE',
895*5486feefSafresh1                       # 'reverse' causes it to be definitely out of order for
896*5486feefSafresh1                       # the 'sort' to correct
897*5486feefSafresh1                       'quotemeta join "", sort reverse map { chr } (1..255)');
898*5486feefSafresh1
899*5486feefSafresh1            # We pass an re to exclude testing locales that don't necessarily
900*5486feefSafresh1            # have a lt b.
901*5486feefSafresh1            add_trials('LC_COLLATE', '"a" lt "B"', 'a<b');
902*5486feefSafresh1            add_trials('LC_COLLATE', 'my $a = "a"; my $b = "B";'
903*5486feefSafresh1                                   . ' POSIX::strcoll($a, $b) < 0;',
904*5486feefSafresh1                        'a<b');
905*5486feefSafresh1
906*5486feefSafresh1            # Doesn't include NUL because our memcollxfrm implementation of it
907*5486feefSafresh1            # isn't perfect
908*5486feefSafresh1            add_trials('LC_COLLATE', 'my $string = quotemeta join "",'
909*5486feefSafresh1                                   . ' map { chr } (1..255);'
910*5486feefSafresh1                                   . ' POSIX::strxfrm($string)');
911*5486feefSafresh1            next;
912*5486feefSafresh1        }
913*5486feefSafresh1
914*5486feefSafresh1        if ($category eq 'LC_CTYPE') {
915*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta lc'
916*5486feefSafresh1                                 . ' join "" , map { chr } (0..255)');
917*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta uc'
918*5486feefSafresh1                                 . ' join "", map { chr } (0..255)');
919*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale"; quotemeta CORE::fc'
920*5486feefSafresh1                                 . ' join "", map { chr } (0..255)');
921*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
922*5486feefSafresh1                                 . ' my $string = join "", map { chr } 0..255;'
923*5486feefSafresh1                                 . ' $string =~ s|(.)|$1=~/\d/?1:0|gers');
924*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
925*5486feefSafresh1                                 . ' my $string = join "", map { chr } 0..255;'
926*5486feefSafresh1                                 . ' $string =~ s|(.)|$1=~/\s/?1:0|gers');
927*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
928*5486feefSafresh1                                 . ' my $string = join "", map { chr } 0..255;'
929*5486feefSafresh1                                 . ' $string =~ s|(.)|$1=~/\w/?1:0|gers');
930*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
931*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
932*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:alpha:]]/?1:0|gers');
933*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
934*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
935*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:alnum:]]/?1:0|gers');
936*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
937*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
938*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:ascii:]]/?1:0|gers');
939*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
940*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
941*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:blank:]]/?1:0|gers');
942*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
943*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
944*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:cntrl:]]/?1:0|gers');
945*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
946*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
947*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:graph:]]/?1:0|gers');
948*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
949*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
950*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:lower:]]/?1:0|gers');
951*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
952*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
953*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:print:]]/?1:0|gers');
954*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
955*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
956*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:punct:]]/?1:0|gers');
957*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
958*5486feefSafresh1                              . ' my $string = join "", map { chr } 0..255;'
959*5486feefSafresh1                              . ' $string =~ s|(.)|$1=~/[[:upper:]]/?1:0|gers');
960*5486feefSafresh1            add_trials('LC_CTYPE', 'no warnings "locale";'
961*5486feefSafresh1                             . ' my $string = join "", map { chr } 0..255;'
962*5486feefSafresh1                             . ' $string =~ s|(.)|$1=~/[[:xdigit:]]/?1:0|gers');
963*5486feefSafresh1            add_trials('LC_CTYPE', 'use I18N::Langinfo qw(langinfo CODESET);'
964*5486feefSafresh1                                 . ' no warnings "uninitialized";'
965*5486feefSafresh1                                 . ' langinfo(CODESET);');
966*5486feefSafresh1
967*5486feefSafresh1            # In the multibyte functions, the non-reentrant ones can't be made
968*5486feefSafresh1            # thread safe
969*5486feefSafresh1            if ($Config{'d_mbrlen'} eq 'define') {
970*5486feefSafresh1                add_trials('LC_CTYPE', 'my $string = chr 0x100;'
971*5486feefSafresh1                                     . ' utf8::encode($string);'
972*5486feefSafresh1                                     . ' no warnings "uninitialized";'
973*5486feefSafresh1                                     . ' POSIX::mblen(undef);'
974*5486feefSafresh1                                     . ' POSIX::mblen($string)',
975*5486feefSafresh1                           'utf8_only');
976*5486feefSafresh1            }
977*5486feefSafresh1            if ($Config{'d_mbrtowc'} eq 'define') {
978*5486feefSafresh1                add_trials('LC_CTYPE', 'my $value; my $str = "\x{100}";'
979*5486feefSafresh1                                     . ' utf8::encode($str);'
980*5486feefSafresh1                                     . ' no warnings "uninitialized";'
981*5486feefSafresh1                                     . ' POSIX::mbtowc(undef, undef);'
982*5486feefSafresh1                                     . ' POSIX::mbtowc($value, $str); $value;',
983*5486feefSafresh1                           'utf8_only');
984*5486feefSafresh1            }
985*5486feefSafresh1            if ($Config{'d_wcrtomb'} eq 'define') {
986*5486feefSafresh1                add_trials('LC_CTYPE', 'my $value;'
987*5486feefSafresh1                                     . ' no warnings "uninitialized";'
988*5486feefSafresh1                                     . ' POSIX::wctomb(undef, undef);'
989*5486feefSafresh1                                     . ' POSIX::wctomb($value, 0xFF);'
990*5486feefSafresh1                                     . ' $value;',
991*5486feefSafresh1                           'utf8_only');
992*5486feefSafresh1            }
993*5486feefSafresh1
994*5486feefSafresh1            add_trials('LC_CTYPE',
995*5486feefSafresh1                       'no warnings "locale";'
996*5486feefSafresh1                     . ' my $uc = CORE::uc join "", map { chr } (0..255);'
997*5486feefSafresh1                     . ' my $fc = quotemeta CORE::fc $uc;'
998*5486feefSafresh1                     . ' $uc =~ / \A $fc \z /xi;');
999*5486feefSafresh1            next;
1000*5486feefSafresh1        }
1001*5486feefSafresh1
1002*5486feefSafresh1        if ($category eq 'LC_MESSAGES') {
1003*5486feefSafresh1            add_trials('LC_MESSAGES',
1004*5486feefSafresh1                     "join \"\n\", map { \$! = \$_; \"\$!\" } ($msg_catalog)");
1005*5486feefSafresh1            add_trials('LC_MESSAGES',
1006*5486feefSafresh1                  'use I18N::Langinfo qw(langinfo YESSTR NOSTR YESEXPR NOEXPR);'
1007*5486feefSafresh1                . ' no warnings "uninitialized";'
1008*5486feefSafresh1                . ' join ",",'
1009*5486feefSafresh1                . '     map { langinfo($_) } YESSTR, NOSTR, YESEXPR, NOEXPR;');
1010*5486feefSafresh1            next;
1011*5486feefSafresh1        }
1012*5486feefSafresh1
1013*5486feefSafresh1        if ($category eq 'LC_MONETARY') {
1014*5486feefSafresh1            if ($has_localeconv) {
1015*5486feefSafresh1                add_trials('LC_MONETARY', "localeconv()->{currency_symbol}");
1016*5486feefSafresh1            }
1017*5486feefSafresh1            add_trials('LC_MONETARY',
1018*5486feefSafresh1                       'use I18N::Langinfo qw(langinfo CRNCYSTR);'
1019*5486feefSafresh1                    . ' no warnings "uninitialized";'
1020*5486feefSafresh1                    . ' join "|",  map { langinfo($_) } CRNCYSTR;');
1021*5486feefSafresh1            next;
1022*5486feefSafresh1        }
1023*5486feefSafresh1
1024*5486feefSafresh1        if ($category eq 'LC_NUMERIC') {
1025*5486feefSafresh1            if ($has_localeconv) {
1026*5486feefSafresh1                add_trials('LC_NUMERIC', "no warnings; 'uninitialised';"
1027*5486feefSafresh1                                       . " join '|',"
1028*5486feefSafresh1                                       . " localeconv()->{decimal_point},"
1029*5486feefSafresh1                                       . " localeconv()->{thousands_sep}");
1030*5486feefSafresh1            }
1031*5486feefSafresh1            add_trials('LC_NUMERIC',
1032*5486feefSafresh1                       'use I18N::Langinfo qw(langinfo RADIXCHAR THOUSEP);'
1033*5486feefSafresh1                     . ' no warnings "uninitialized";'
1034*5486feefSafresh1                     . ' join "|",  map { langinfo($_) } RADIXCHAR, THOUSEP;');
1035*5486feefSafresh1
1036*5486feefSafresh1            # Use a variable to avoid runtime bugs being hidden by constant
1037*5486feefSafresh1            # folding
1038*5486feefSafresh1            add_trials('LC_NUMERIC', 'my $in = 4.2; sprintf("%g", $in)');
1039*5486feefSafresh1            next;
1040*5486feefSafresh1        }
1041*5486feefSafresh1
1042*5486feefSafresh1        if ($category eq 'LC_TIME') {
1043*5486feefSafresh1            add_trials('LC_TIME', "POSIX::strftime($strftime_args)");
1044*5486feefSafresh1            add_trials('LC_TIME', <<~'END_OF_CODE');
1045*5486feefSafresh1                use I18N::Langinfo qw(langinfo
1046*5486feefSafresh1                    ABDAY_1 ABDAY_2 ABDAY_3 ABDAY_4 ABDAY_5 ABDAY_6 ABDAY_7
1047*5486feefSafresh1                    ABMON_1 ABMON_2 ABMON_3 ABMON_4 ABMON_5 ABMON_6
1048*5486feefSafresh1                    ABMON_7 ABMON_8 ABMON_9 ABMON_10 ABMON_11 ABMON_12
1049*5486feefSafresh1                    DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7
1050*5486feefSafresh1                    MON_1 MON_2 MON_3 MON_4 MON_5 MON_6
1051*5486feefSafresh1                    MON_7 MON_8 MON_9 MON_10 MON_11 MON_12
1052*5486feefSafresh1                    D_FMT D_T_FMT T_FMT);
1053*5486feefSafresh1                no warnings "uninitialized";
1054*5486feefSafresh1                join "|",
1055*5486feefSafresh1                    map { langinfo($_) }
1056*5486feefSafresh1                        ABDAY_1,ABDAY_2,ABDAY_3,ABDAY_4,ABDAY_5,
1057*5486feefSafresh1                        ABDAY_6,ABDAY_7,
1058*5486feefSafresh1                        ABMON_1,ABMON_2,ABMON_3,ABMON_4,ABMON_5,
1059*5486feefSafresh1                        ABMON_6, ABMON_7,ABMON_8,ABMON_9,ABMON_10,
1060*5486feefSafresh1                        ABMON_11,ABMON_12,
1061*5486feefSafresh1                        DAY_1,DAY_2,DAY_3,DAY_4,DAY_5,DAY_6,DAY_7,
1062*5486feefSafresh1                        MON_1,MON_2,MON_3,MON_4,MON_5,MON_6, MON_7,
1063*5486feefSafresh1                        MON_8,MON_9,MON_10,MON_11,MON_12,
1064*5486feefSafresh1                        D_FMT,D_T_FMT,T_FMT;
1065*5486feefSafresh1                END_OF_CODE
1066*5486feefSafresh1            next;
1067*5486feefSafresh1        }
1068*5486feefSafresh1    } # End of creating test cases.
1069*5486feefSafresh1
1070*5486feefSafresh1
1071*5486feefSafresh1    # Now analyze the test cases
1072*5486feefSafresh1    my %all_tests;
1073*5486feefSafresh1    foreach my $category (keys %distincts) {
1074*5486feefSafresh1        my %results;
1075*5486feefSafresh1        my %distinct_results_count;
1076*5486feefSafresh1
1077*5486feefSafresh1        # Find just the distinct test operations; sort for repeatibility
1078*5486feefSafresh1        my %distinct_ops;
1079*5486feefSafresh1        for my $op_result (sort keys $distincts{$category}->%*) {
1080*5486feefSafresh1            my ($op, $result) = unpack_op_result($op_result);
1081*5486feefSafresh1
1082*5486feefSafresh1            $distinct_ops{$op}++;
1083*5486feefSafresh1            push $results{$op}->@*, $result;
1084*5486feefSafresh1            $distinct_results_count{$result} +=
1085*5486feefSafresh1                        scalar $distincts{$category}{$op_result}{locales}->@*;
1086*5486feefSafresh1        }
1087*5486feefSafresh1
1088*5486feefSafresh1        # And get a sorted list of all the test operations
1089*5486feefSafresh1        my @ops = sort keys %distinct_ops;
1090*5486feefSafresh1
1091*5486feefSafresh1        sub gen_combinations {
1092*5486feefSafresh1
1093*5486feefSafresh1            # Generate all the non-empty combinations of operations and
1094*5486feefSafresh1            # results (for the current category) possible on this platform.
1095*5486feefSafresh1            # That is, if a category has N operations, it will generate a list
1096*5486feefSafresh1            # of entries.  Each entry will itself have N elements, one for
1097*5486feefSafresh1            # each operation, and when all the entries are considered
1098*5486feefSafresh1            # together, every possible outcome is represented.
1099*5486feefSafresh1
1100*5486feefSafresh1            my $op_ref = shift;         # Reference to list of operations
1101*5486feefSafresh1            my $results_ref = shift;    # Reference to hash; key is operation;
1102*5486feefSafresh1                                        # value is an array of all possible
1103*5486feefSafresh1                                        # outcomes of this operation.
1104*5486feefSafresh1            my $distincts_ref = shift;  # Reference to %distincts of this
1105*5486feefSafresh1                                        # category
1106*5486feefSafresh1
1107*5486feefSafresh1            # Get the first operation on the list
1108*5486feefSafresh1            my $op = shift $op_ref->@*;
1109*5486feefSafresh1
1110*5486feefSafresh1            # The return starts out as a list of hashes of all possible
1111*5486feefSafresh1            # outcomes for executing 'op'.  Each hash has two keys:
1112*5486feefSafresh1            #   'op_results' is an array of one element: 'op => result',
1113*5486feefSafresh1            #                packed into a string.
1114*5486feefSafresh1            #   'locales'    is an array of all the locales which have the
1115*5486feefSafresh1            #                same result for 'op'
1116*5486feefSafresh1            my @return;
1117*5486feefSafresh1            foreach my $result ($results_ref->{$op}->@*) {
1118*5486feefSafresh1                my $op_result = pack_op_result($op, $result);
1119*5486feefSafresh1                push @return, {
1120*5486feefSafresh1                            op_results => [ $op_result ],
1121*5486feefSafresh1                            locales    => $distincts_ref->{$op_result}{locales},
1122*5486feefSafresh1                          };
1123*5486feefSafresh1            }
1124*5486feefSafresh1
1125*5486feefSafresh1            # If this is the final element of the list, we are done.
1126*5486feefSafresh1            return (\@return) unless $op_ref->@*;
1127*5486feefSafresh1
1128*5486feefSafresh1            # Otherwise recurse to generate the combinations for the remainder
1129*5486feefSafresh1            # of the list.
1130*5486feefSafresh1            my $recurse_return = &gen_combinations($op_ref,
1131*5486feefSafresh1                                                   $results_ref,
1132*5486feefSafresh1                                                   $distincts_ref);
1133*5486feefSafresh1            # Now we have to generate the combinations of the current item
1134*5486feefSafresh1            # with the ones returned by the recursion.  Each element of the
1135*5486feefSafresh1            # current item is combined with each element of the recursed.
1136*5486feefSafresh1            my @combined;
1137*5486feefSafresh1            foreach my $this (@return) {
1138*5486feefSafresh1                my @this_locales = $this->{locales}->@*;
1139*5486feefSafresh1                foreach my $recursed ($recurse_return->@*) {
1140*5486feefSafresh1                    my @recursed_locales = $recursed->{locales}->@*;
1141*5486feefSafresh1
1142*5486feefSafresh1                    # @this_locales is a list of locales this op => result is
1143*5486feefSafresh1                    # valid for.  @recursed_locales is similarly a list of the
1144*5486feefSafresh1                    # valid ones for the recursed return.  Their intersection
1145*5486feefSafresh1                    # is a list of the locales valid for this combination.
1146*5486feefSafresh1                    my %seen;
1147*5486feefSafresh1                    $seen{$_}++ foreach @this_locales, @recursed_locales;
1148*5486feefSafresh1                    my @intersection = grep $seen{$_} == 2, keys %seen;
1149*5486feefSafresh1
1150*5486feefSafresh1                    # An alternative intersection algorithm:
1151*5486feefSafresh1                    # my (%set1, %set2);
1152*5486feefSafresh1                    # @set1{@list1} = ();
1153*5486feefSafresh1                    # @set2{@list2} = ();
1154*5486feefSafresh1                    # my @intersection = grep exists $set1{$_}, keys %set2;
1155*5486feefSafresh1
1156*5486feefSafresh1                    # If the intersection is empty, this combination can't
1157*5486feefSafresh1                    # actually happen on this platform.
1158*5486feefSafresh1                    next unless @intersection;
1159*5486feefSafresh1
1160*5486feefSafresh1                    # Append the recursed list to the current list to form the
1161*5486feefSafresh1                    # combined list.
1162*5486feefSafresh1                    my @combined_result = $this->{op_results}->@*;
1163*5486feefSafresh1                    push @combined_result, $recursed->{op_results}->@*;
1164*5486feefSafresh1                    # And create the hash for the combined result, including
1165*5486feefSafresh1                    # the locales it is valid for
1166*5486feefSafresh1                    push @combined, {
1167*5486feefSafresh1                                      op_results => \@combined_result,
1168*5486feefSafresh1                                      locales    => \@intersection,
1169*5486feefSafresh1                                    };
1170*5486feefSafresh1                }
1171*5486feefSafresh1            }
1172*5486feefSafresh1
1173*5486feefSafresh1            return \@combined;
1174*5486feefSafresh1        } # End of gen_combinations() definition
1175*5486feefSafresh1
1176*5486feefSafresh1        # The result of calling gen_combinations() will be an array of hashes.
1177*5486feefSafresh1        #
1178*5486feefSafresh1        # The main value in each hash is an array (whose key is 'op_results')
1179*5486feefSafresh1        # containing all the tests for this category for a thread.  If there
1180*5486feefSafresh1        # were N calls to 'add_trial' for this category, there will be 'N'
1181*5486feefSafresh1        # elements in the array.  Each element is a string packed with the
1182*5486feefSafresh1        # operation to eval in a thread and the operation's expected result.
1183*5486feefSafresh1        #
1184*5486feefSafresh1        # The other data structure in each hash is an array with the key
1185*5486feefSafresh1        # 'locales'.  That array is a list of every locale which yields the
1186*5486feefSafresh1        # identical results in 'op_results'.
1187*5486feefSafresh1        #
1188*5486feefSafresh1        # Effectively, each hash gives all the tests for this category for a
1189*5486feefSafresh1        # thread.  The total array of hashes gives the complete list of
1190*5486feefSafresh1        # distinct tests possible on this system.  So later, a thread will
1191*5486feefSafresh1        # pluck the next available one from the array..
1192*5486feefSafresh1        my $combinations_ref = gen_combinations(\@ops, \%results,
1193*5486feefSafresh1                                                $distincts{$category});
1194*5486feefSafresh1
1195*5486feefSafresh1        # Fix up the entries ...
1196*5486feefSafresh1        foreach my $test ($combinations_ref->@*) {
1197*5486feefSafresh1
1198*5486feefSafresh1            # Sort the locale names; this makes it work for later comparisons
1199*5486feefSafresh1            # to look at just the first element of each list.
1200*5486feefSafresh1            $test->{locales}->@* =
1201*5486feefSafresh1                                sort sort_by_hashed_locale $test->{locales}->@*;
1202*5486feefSafresh1
1203*5486feefSafresh1            # And for each test, calculate and store how many locales have the
1204*5486feefSafresh1            # same result (saves recomputation later in a sort).  This adds
1205*5486feefSafresh1            # another data structure to each hash in the main array.
1206*5486feefSafresh1            my @individual_tests = $test->{op_results}->@*;
1207*5486feefSafresh1            my @in_common_locale_counts;
1208*5486feefSafresh1            foreach my $this_test (@individual_tests) {
1209*5486feefSafresh1
1210*5486feefSafresh1                # Each test came from %distincts, and there we have stored the
1211*5486feefSafresh1                # list of all locales that yield the same result
1212*5486feefSafresh1                push @in_common_locale_counts,
1213*5486feefSafresh1                        scalar $distincts{$category}{$this_test}{locales}->@*;
1214*5486feefSafresh1            }
1215*5486feefSafresh1            push $test->{in_common_locale_counts}->@*, @in_common_locale_counts;
1216*5486feefSafresh1        }
1217*5486feefSafresh1
1218*5486feefSafresh1        # Make a copy
1219*5486feefSafresh1        my @cat_tests = $combinations_ref->@*;
1220*5486feefSafresh1
1221*5486feefSafresh1        # This sorts the test cases so that the ones with the least overlap
1222*5486feefSafresh1        # with other cases are first.
1223*5486feefSafresh1        sub sort_test_order {
1224*5486feefSafresh1            my $a_tests_count = scalar $a->{in_common_locale_counts}->@*;
1225*5486feefSafresh1            my $b_tests_count = scalar $b->{in_common_locale_counts}->@*;
1226*5486feefSafresh1            my $tests_count = min($a_tests_count, $b_tests_count);
1227*5486feefSafresh1
1228*5486feefSafresh1            # Choose the one that is most distinctive (least overlap); that is
1229*5486feefSafresh1            # the one that has the most tests whose results are not shared by
1230*5486feefSafresh1            # any other locale.
1231*5486feefSafresh1            my $a_nondistincts = 0;
1232*5486feefSafresh1            my $b_nondistincts = 0;
1233*5486feefSafresh1            for my $i (0 .. $tests_count - 1) {
1234*5486feefSafresh1                $a_nondistincts += ($a->{in_common_locale_counts}[$i] != 1);
1235*5486feefSafresh1                $b_nondistincts += ($b->{in_common_locale_counts}[$i] != 1);
1236*5486feefSafresh1            }
1237*5486feefSafresh1
1238*5486feefSafresh1            my $cmp = $a_nondistincts <=> $b_nondistincts;
1239*5486feefSafresh1            return $cmp if $cmp;
1240*5486feefSafresh1
1241*5486feefSafresh1            # If they have the same number of those, choose the one with the
1242*5486feefSafresh1            # fewest total number of locales that have the same result
1243*5486feefSafresh1            my $a_count = 0;
1244*5486feefSafresh1            my $b_count = 0;
1245*5486feefSafresh1            for my $i (0 .. $tests_count - 1) {
1246*5486feefSafresh1                $a_count += $a->{in_common_locale_counts}[$i];
1247*5486feefSafresh1                $b_count += $b->{in_common_locale_counts}[$i];
1248*5486feefSafresh1            }
1249*5486feefSafresh1
1250*5486feefSafresh1            $cmp = $a_count <=> $b_count;
1251*5486feefSafresh1            return $cmp if $cmp;
1252*5486feefSafresh1
1253*5486feefSafresh1            # If that still doesn't yield a winner, use the general sort order.
1254*5486feefSafresh1            local $a = $a->{locales}[0];
1255*5486feefSafresh1            local $b = $b->{locales}[0];
1256*5486feefSafresh1            return sort_by_hashed_locale;
1257*5486feefSafresh1        }
1258*5486feefSafresh1
1259*5486feefSafresh1        # Actually perform the sort.
1260*5486feefSafresh1        @cat_tests = sort sort_test_order @cat_tests;
1261*5486feefSafresh1
1262*5486feefSafresh1        # This category will now have all the distinct tests possible for it
1263*5486feefSafresh1        # on this platform, with the first test being the one with the least
1264*5486feefSafresh1        # overlap with other test cases
1265*5486feefSafresh1        push $all_tests{$category}->@*, @cat_tests;
1266*5486feefSafresh1    }     # End of loop through the categories creating and sorting the test
1267*5486feefSafresh1          # cases
1268*5486feefSafresh1
1269*5486feefSafresh1    my %thread_already_used_locales;
1270*5486feefSafresh1
1271*5486feefSafresh1    # Now generate the tests for each thread.
1272*5486feefSafresh1    my @tests_by_thread;
1273*5486feefSafresh1    for my $i (0 .. $thread_count - 1) {
1274*5486feefSafresh1        foreach my $category (sort keys %all_tests) {
1275*5486feefSafresh1            my $skipped = 0;    # Used below to not loop infinitely
1276*5486feefSafresh1
1277*5486feefSafresh1            # Get the next test case
1278*5486feefSafresh1          NEXT_CANDIDATE:
1279*5486feefSafresh1            my $candidate = shift $all_tests{$category}->@*;
1280*5486feefSafresh1
1281*5486feefSafresh1            my $locale_name = $candidate->{locales}[0];
1282*5486feefSafresh1
1283*5486feefSafresh1            # Avoid, if possible, using the same locale name twice (for
1284*5486feefSafresh1            # different categories) in the same thread.
1285*5486feefSafresh1            if (defined $thread_already_used_locales{$locale_name =~ s/\W.*//r})
1286*5486feefSafresh1            {
1287*5486feefSafresh1                # Look through the synonyms of this locale for an
1288*5486feefSafresh1                # as-yet-unused one
1289*5486feefSafresh1                for my $j (1 .. $candidate->{locales}->@* - 1) {
1290*5486feefSafresh1                    my $synonym = $candidate->{locales}[$j];
1291*5486feefSafresh1                    next if defined $thread_already_used_locales{$synonym =~
1292*5486feefSafresh1                                                                    s/\W.*//r};
1293*5486feefSafresh1                    $locale_name = $synonym;
1294*5486feefSafresh1                    goto found_synonym;
1295*5486feefSafresh1                }
1296*5486feefSafresh1
1297*5486feefSafresh1                # Here, no synonym was found.  If we haven't cycled through
1298*5486feefSafresh1                # all the possible tests, try another (putting this one at the
1299*5486feefSafresh1                # end as a last resort in the future).
1300*5486feefSafresh1                $skipped++;
1301*5486feefSafresh1                if ($skipped < scalar $all_tests{$category}->@*) {
1302*5486feefSafresh1                    push $all_tests{$category}->@*, $candidate;
1303*5486feefSafresh1                    goto NEXT_CANDIDATE;
1304*5486feefSafresh1                }
1305*5486feefSafresh1
1306*5486feefSafresh1                # Here no synonym was found, this test has already been used,
1307*5486feefSafresh1                # but there are no unused ones, so have to re-use it.
1308*5486feefSafresh1
1309*5486feefSafresh1              found_synonym:
1310*5486feefSafresh1            }
1311*5486feefSafresh1
1312*5486feefSafresh1            # Here, we have found a test case.  The thread needs to know what
1313*5486feefSafresh1            # locale to use,
1314*5486feefSafresh1            $tests_by_thread[$i]->{$category}{locale_name} = $locale_name;
1315*5486feefSafresh1
1316*5486feefSafresh1            # And it needs to know each test to run, and the expected result.
1317*5486feefSafresh1            my @cases;
1318*5486feefSafresh1            for my $j (0 .. $candidate->{op_results}->@* - 1) {
1319*5486feefSafresh1                my ($op, $result) =
1320*5486feefSafresh1                             unpack_op_result($candidate->{op_results}[$j]);
1321*5486feefSafresh1                push @cases, { op => $op, expected => $result };
1322*5486feefSafresh1            }
1323*5486feefSafresh1            push $tests_by_thread[$i]->{$category}{locale_tests}->@*, @cases;
1324*5486feefSafresh1
1325*5486feefSafresh1            # Done with this category in this thread.  Setup for subsequent
1326*5486feefSafresh1            # categories in this thread, and subsequent threads.
1327*5486feefSafresh1            #
1328*5486feefSafresh1            # It's best to not have two categories in a thread use the same
1329*5486feefSafresh1            # locale.  Save this locale name so that later iterations handling
1330*5486feefSafresh1            # other categories can avoid using it, if possible.
1331*5486feefSafresh1            $thread_already_used_locales{$locale_name =~ s/\W.*//r} = 1;
1332*5486feefSafresh1
1333*5486feefSafresh1            # In pursuit of using as many different locales as possible, the
1334*5486feefSafresh1            # first shall be last in line next time, and eventually the last
1335*5486feefSafresh1            # shall be first
1336*5486feefSafresh1            push $candidate->{locales}->@*, shift $candidate->{locales}->@*;
1337*5486feefSafresh1
1338*5486feefSafresh1            # Similarly, this test case is added back at the end of the list,
1339*5486feefSafresh1            # so will be used only as a last resort in the next thread, and as
1340*5486feefSafresh1            # the penultimate resort in the thread following that, etc. as the
1341*5486feefSafresh1            # test cases are cycled through.
1342*5486feefSafresh1            push $all_tests{$category}->@*, $candidate;
1343*5486feefSafresh1        } # End of looping through the categories for this thread
1344*5486feefSafresh1    } # End of generating all threads
1345*5486feefSafresh1
1346*5486feefSafresh1    # Now reformat the tests to a form convenient for the actual test file
1347*5486feefSafresh1    # script to use; minimizing the amount of ancillary work it needs to do.
1348*5486feefSafresh1    my @cooked_tests;
1349*5486feefSafresh1    for my $i (0 .. $#tests_by_thread) {
1350*5486feefSafresh1
1351*5486feefSafresh1        my $this_tests = $tests_by_thread[$i];
1352*5486feefSafresh1        my @this_cooked_tests;
1353*5486feefSafresh1        my (@this_categories, @this_locales);    # Parallel arrays
1354*5486feefSafresh1
1355*5486feefSafresh1        # Every so often we use LC_ALL instead of individual locales, provided
1356*5486feefSafresh1        # it is available on the platform
1357*5486feefSafresh1        if (   ($i % $lc_all_frequency == $lc_all_frequency - 1)
1358*5486feefSafresh1            && $LC_ALL_string eq 'LC_ALL')
1359*5486feefSafresh1        {
1360*5486feefSafresh1            my $lc_all= "";
1361*5486feefSafresh1            my $category_number;
1362*5486feefSafresh1
1363*5486feefSafresh1            # Compute the LC_ALL string for the syntax accepted by this
1364*5486feefSafresh1            # platform from the locale each category is to be set to.
1365*5486feefSafresh1            while (defined($category_number = get_next_category())) {
1366*5486feefSafresh1                my $category_name =
1367*5486feefSafresh1                                $map_category_number_to_name{$category_number};
1368*5486feefSafresh1                my $locale = $this_tests->{$category_name}{locale_name};
1369*5486feefSafresh1                $locale = "C" unless defined $locale;
1370*5486feefSafresh1                $category_name =~ s/\@/\\@/g;
1371*5486feefSafresh1
1372*5486feefSafresh1                $lc_all .= $lc_all_separator if $lc_all ne "";
1373*5486feefSafresh1
1374*5486feefSafresh1                if ($use_name_value_pairs) {
1375*5486feefSafresh1                    $lc_all .= $category_name . "=";
1376*5486feefSafresh1                }
1377*5486feefSafresh1
1378*5486feefSafresh1                $lc_all .= $locale;
1379*5486feefSafresh1            }
1380*5486feefSafresh1
1381*5486feefSafresh1            $this_categories[0] = $LC_ALL;
1382*5486feefSafresh1            $this_locales[0] = $lc_all;
1383*5486feefSafresh1        }
1384*5486feefSafresh1        else {  # The other times, just set each category to its locale
1385*5486feefSafresh1                # individually
1386*5486feefSafresh1            foreach my $category_name (sort keys $this_tests->%*) {
1387*5486feefSafresh1                push @this_categories,
1388*5486feefSafresh1                                $map_category_name_to_number{$category_name};
1389*5486feefSafresh1                push @this_locales,
1390*5486feefSafresh1                            $this_tests->{$category_name}{locale_name};
1391*5486feefSafresh1            }
1392*5486feefSafresh1        }
1393*5486feefSafresh1
1394*5486feefSafresh1        while (keys $this_tests->%*) {
1395*5486feefSafresh1            foreach my $category_name (sort keys $this_tests->%*) {
1396*5486feefSafresh1                my $this_category_tests = $this_tests->{$category_name};
1397*5486feefSafresh1                my $test = shift
1398*5486feefSafresh1                                $this_category_tests->{locale_tests}->@*;
1399*5486feefSafresh1                print STDERR __FILE__, ': ', __LINE__, ': ', Dumper $test
1400*5486feefSafresh1                                                                    if $debug;
1401*5486feefSafresh1                if (! $test) {
1402*5486feefSafresh1                    delete $this_tests->{$category_name};
1403*5486feefSafresh1                    next;
1404*5486feefSafresh1                }
1405*5486feefSafresh1
1406*5486feefSafresh1                $test->{category_name} = $category_name;
1407*5486feefSafresh1                my $locale_name = $this_category_tests->{locale_name};
1408*5486feefSafresh1                $test->{locale_name} = $locale_name;
1409*5486feefSafresh1                $test->{codeset} =
1410*5486feefSafresh1                                $locale_name_to_object{$locale_name}{codeset};
1411*5486feefSafresh1
1412*5486feefSafresh1                push @this_cooked_tests, $test;
1413*5486feefSafresh1            }
1414*5486feefSafresh1        }
1415*5486feefSafresh1
1416*5486feefSafresh1        push @cooked_tests, {
1417*5486feefSafresh1                              thread => $i,
1418*5486feefSafresh1                              categories => \@this_categories,
1419*5486feefSafresh1                              locales => \@this_locales,
1420*5486feefSafresh1                              tests => \@this_cooked_tests,
1421*5486feefSafresh1                            };
1422*5486feefSafresh1    }
1423*5486feefSafresh1
1424*5486feefSafresh1    my $all_tests_ref = \@cooked_tests;
1425*5486feefSafresh1    my $all_tests_file = tempfile();
1426*5486feefSafresh1
1427*5486feefSafresh1    # Store the tests into a file, retrievable by the subprocess
1428*5486feefSafresh1    use Storable;
1429*5486feefSafresh1    if (! defined store($all_tests_ref, $all_tests_file)) {
1430*5486feefSafresh1        die "Could not save the built-up data structure";
1431*5486feefSafresh1    }
1432*5486feefSafresh1
1433*5486feefSafresh1    my $category_number_to_name = Data::Dumper->Dump(
1434*5486feefSafresh1                                            [ \%map_category_number_to_name ],
1435*5486feefSafresh1                                            [  'map_category_number_to_name']);
1436*5486feefSafresh1
1437*5486feefSafresh1    my $switches = "";
1438*5486feefSafresh1    $switches = "switches => [ -DLv ]" if $debug > 2;
1439*5486feefSafresh1
1440*5486feefSafresh1    # Build up the program to run.  This stresses locale thread safety.  We
1441*5486feefSafresh1    # start a bunch of threads.  Each sets the locale of each category being
1442*5486feefSafresh1    # tested to the value determined in the code above.  Then each sleeps to a
1443*5486feefSafresh1    # common start time, at which point they awaken and iterate their
1444*5486feefSafresh1    # respective loops.  Each iteration runs a set of tests and checks that
1445*5486feefSafresh1    # the results are as expected.  This should catch any instances of other
1446*5486feefSafresh1    # threads interfering.  Every so often, each thread shifts to instead use
1447*5486feefSafresh1    # the locales and tests of another thread.  This catches bugs dealing with
1448*5486feefSafresh1    # changing the locale on the fly.
1449*5486feefSafresh1    #
1450*5486feefSafresh1    # The code above has set up things so that each thread has as disparate
1451*5486feefSafresh1    # results from the other threads as possible, so to more likely catch any
1452*5486feefSafresh1    # bleed-through.
1453*5486feefSafresh1    my $program = <<EOT;
1454*5486feefSafresh1
1455*5486feefSafresh1    BEGIN { \$| = 1; }
1456*5486feefSafresh1    my \$debug = $debug;
1457*5486feefSafresh1    my \$thread_count = $thread_count;
1458*5486feefSafresh1    my \$iterations_per_test_set = $iterations_per_test_set;
1459*5486feefSafresh1    my \$iterations = $iterations;
1460*5486feefSafresh1    my \$die_on_negative_sleep = $die_on_negative_sleep;
1461*5486feefSafresh1    my \$per_thread_startup = $per_thread_startup;
1462*5486feefSafresh1    my \$all_tests_file = $all_tests_file;
1463*5486feefSafresh1    my \$alarm_clock = $alarm_clock;
1464*5486feefSafresh1EOT
1465*5486feefSafresh1
1466*5486feefSafresh1    $program .= <<'EOT';
14675759b3d2Safresh1    use threads;
14685759b3d2Safresh1    use strict;
14695759b3d2Safresh1    use warnings;
14705759b3d2Safresh1    use POSIX qw(locale_h);
1471*5486feefSafresh1    use utf8;
1472*5486feefSafresh1    use Time::HiRes qw(time usleep);
1473*5486feefSafresh1    $|=1;
14745759b3d2Safresh1
1475*5486feefSafresh1    use Data::Dumper;
1476*5486feefSafresh1    $Data::Dumper::Sortkeys=1;
1477*5486feefSafresh1    $Data::Dumper::Useqq = 1;
1478*5486feefSafresh1    $Data::Dumper::Deepcopy = 1;
14795759b3d2Safresh1
1480*5486feefSafresh1    # Get the tests stored for us by the setup process
1481*5486feefSafresh1    use Storable;
1482*5486feefSafresh1    my $all_tests_ref = retrieve($all_tests_file);
1483*5486feefSafresh1    if (! defined $all_tests_ref) {
1484*5486feefSafresh1        die "Could not restore the built-up data structure";
14855759b3d2Safresh1    }
14865759b3d2Safresh1
1487*5486feefSafresh1    my %corrects;
1488*5486feefSafresh1
1489*5486feefSafresh1    sub output_test_failure_prefix {
1490*5486feefSafresh1        my ($iteration, $category_name, $test) = @_;
1491*5486feefSafresh1        my $tid = threads->tid();
1492*5486feefSafresh1        print STDERR "\nthread ", $tid,
1493*5486feefSafresh1                     " failed in iteration $iteration",
1494*5486feefSafresh1                     " for locale $test->{locale_name}",
1495*5486feefSafresh1                     " codeset='$test->{codeset}'",
1496*5486feefSafresh1                     " $category_name",
1497*5486feefSafresh1                     "\nop='$test->{op}'",
1498*5486feefSafresh1                     "\nafter getting ", ($corrects{$category_name}
1499*5486feefSafresh1                                          {$test->{locale_name}}
1500*5486feefSafresh1                                          {all} // 0),
1501*5486feefSafresh1                     " previous correct results for this category and",
1502*5486feefSafresh1                     " locale,\nincluding ", ($corrects{$category_name}
1503*5486feefSafresh1                                              {$test->{locale_name}}
1504*5486feefSafresh1                                              {$tid} // 0),
1505*5486feefSafresh1                     " in this thread\n";
1506*5486feefSafresh1    }
1507*5486feefSafresh1
1508*5486feefSafresh1    sub output_test_result($$$) {
1509*5486feefSafresh1        my ($type, $result, $utf8_matches) = @_;
1510*5486feefSafresh1
1511*5486feefSafresh1        no locale;
1512*5486feefSafresh1
1513*5486feefSafresh1        print STDERR "$type";
1514*5486feefSafresh1
1515*5486feefSafresh1        my $copy = $result;
1516*5486feefSafresh1        if (! $utf8_matches) {
1517*5486feefSafresh1            if (utf8::is_utf8($copy)) {
1518*5486feefSafresh1                print STDERR " (result already was in UTF-8)";
1519*5486feefSafresh1            }
1520*5486feefSafresh1            else {
1521*5486feefSafresh1                utf8::upgrade($copy);
1522*5486feefSafresh1                print STDERR " (result wasn't in UTF-8; converted for easier",
1523*5486feefSafresh1                             " comparison)";
1524*5486feefSafresh1            }
1525*5486feefSafresh1        }
1526*5486feefSafresh1        print STDERR ":\n";
1527*5486feefSafresh1
1528*5486feefSafresh1        use Devel::Peek;
1529*5486feefSafresh1        Dump $copy;
1530*5486feefSafresh1    }
1531*5486feefSafresh1
1532*5486feefSafresh1    sub iterate {       # Run some chunk of iterations of the tests
1533*5486feefSafresh1        my ($tid,                  # Which thread
1534*5486feefSafresh1            $initial_iteration,    # The number of the first iteration
1535*5486feefSafresh1            $count,                # How many
1536*5486feefSafresh1            $tests_ref)            # The tests
1537*5486feefSafresh1            = @_;
1538*5486feefSafresh1
1539*5486feefSafresh1        my $iteration = $initial_iteration;
1540*5486feefSafresh1        $count += $initial_iteration;
1541*5486feefSafresh1
1542*5486feefSafresh1        # Repeatedly ...
1543*5486feefSafresh1        while ($iteration < $count) {
1544*5486feefSafresh1            my $errors = 0;
1545*5486feefSafresh1
1546*5486feefSafresh1            use locale;
1547*5486feefSafresh1
1548*5486feefSafresh1            # ... execute the tests
1549*5486feefSafresh1            foreach my $test ($tests_ref->@*) {
1550*5486feefSafresh1
1551*5486feefSafresh1                # We know what we are expecting
1552*5486feefSafresh1                my $expected = $test->{expected};
1553*5486feefSafresh1
1554*5486feefSafresh1                my $category_name = $test->{category_name};
1555*5486feefSafresh1
1556*5486feefSafresh1                # And do the test.
1557*5486feefSafresh1                my $got = eval $test->{op};
1558*5486feefSafresh1
1559*5486feefSafresh1                if (! defined $got) {
1560*5486feefSafresh1                    output_test_failure_prefix($iteration,
1561*5486feefSafresh1                                               $category_name,
1562*5486feefSafresh1                                               $test);
1563*5486feefSafresh1                    output_test_result("expected", $expected,
1564*5486feefSafresh1                                        1 # utf8ness matches, since only one
1565*5486feefSafresh1                                      );
1566*5486feefSafresh1                    $errors++;
1567*5486feefSafresh1                    next;
1568*5486feefSafresh1                }
1569*5486feefSafresh1
1570*5486feefSafresh1                my $utf8ness_matches = (   utf8::is_utf8($got)
1571*5486feefSafresh1                                        == utf8::is_utf8($expected));
1572*5486feefSafresh1
1573*5486feefSafresh1                my $matched = ($got eq $expected);
1574*5486feefSafresh1                if ($matched) {
1575*5486feefSafresh1                    if ($utf8ness_matches) {
1576*5486feefSafresh1                        no warnings 'uninitialized';
1577*5486feefSafresh1                        $corrects{$category_name}{$test->{locale_name}}{all}++;
1578*5486feefSafresh1                        $corrects{$category_name}{$test->{locale_name}}{$tid}++;
1579*5486feefSafresh1                        next;   # Complete success!
1580*5486feefSafresh1                    }
1581*5486feefSafresh1                }
1582*5486feefSafresh1
1583*5486feefSafresh1                $errors++;
1584*5486feefSafresh1                output_test_failure_prefix($iteration, $category_name, $test);
1585*5486feefSafresh1
1586*5486feefSafresh1                if ($matched) {
1587*5486feefSafresh1                    print STDERR "Only difference is UTF8ness of results\n";
1588*5486feefSafresh1                }
1589*5486feefSafresh1                output_test_result("expected", $expected, $utf8ness_matches);
1590*5486feefSafresh1                output_test_result("got", $got, $utf8ness_matches);
1591*5486feefSafresh1
1592*5486feefSafresh1            } # Loop to do the remaining tests for this iteration
1593*5486feefSafresh1
1594*5486feefSafresh1            return 0 if $errors;
1595*5486feefSafresh1
1596*5486feefSafresh1            $iteration++;
1597*5486feefSafresh1
1598*5486feefSafresh1            # A way to set a gdb break point pp_study
1599*5486feefSafresh1            #study if $iteration % 10 == 0;
1600*5486feefSafresh1
1601*5486feefSafresh1            threads->yield();
16025759b3d2Safresh1        }
16035759b3d2Safresh1
16045759b3d2Safresh1        return 1;
1605*5486feefSafresh1    } # End of iterate() definition
16065759b3d2Safresh1
1607*5486feefSafresh1EOT
1608*5486feefSafresh1
1609*5486feefSafresh1    $program .= "my $category_number_to_name\n";
1610*5486feefSafresh1
1611*5486feefSafresh1    $program .= <<'EOT';
1612*5486feefSafresh1    sub setlocales {
1613*5486feefSafresh1        # Set each category to the appropriate locale for this test set
1614*5486feefSafresh1        my ($categories, $locales) = @_;
1615*5486feefSafresh1        for my $i (0 .. $categories->@* - 1) {
1616*5486feefSafresh1            if (! setlocale($categories->[$i], $locales->[$i])) {
1617*5486feefSafresh1                my $category_name =
1618*5486feefSafresh1                            $map_category_number_to_name->{$categories->[$i]};
1619*5486feefSafresh1                print STDERR "\nthread ", threads->tid(),
1620*5486feefSafresh1                             " setlocale($category_name ($categories->[$i]),",
1621*5486feefSafresh1                             " $locales->[$i]) failed\n";
1622*5486feefSafresh1                return 0;
1623*5486feefSafresh1            }
16245759b3d2Safresh1        }
16255759b3d2Safresh1
1626*5486feefSafresh1        return 1;
1627*5486feefSafresh1    }
1628*5486feefSafresh1
1629*5486feefSafresh1    my $startup_insurance = 1;
1630*5486feefSafresh1    my $future = $startup_insurance + $thread_count * $per_thread_startup;
1631*5486feefSafresh1    my $starting_time = time() + $future;
1632*5486feefSafresh1
1633*5486feefSafresh1    sub wait_until_time {
1634*5486feefSafresh1
1635*5486feefSafresh1        # Sleep until the time when all the threads are due to wake up, so
1636*5486feefSafresh1        # they run as simultaneously as we can make it.
1637*5486feefSafresh1        my $sleep_time = ($starting_time - time());
1638*5486feefSafresh1        #printf STDERR "thread %d started, sleeping %g sec\n",
1639*5486feefSafresh1        #              threads->tid, $sleep_time;
1640*5486feefSafresh1        if ($sleep_time < 0 && $die_on_negative_sleep) {
1641*5486feefSafresh1            # What the start time should have been
1642*5486feefSafresh1            my $a_better_future = $future - $sleep_time;
1643*5486feefSafresh1
1644*5486feefSafresh1            my $better_per_thread =
1645*5486feefSafresh1                        ($a_better_future - $startup_insurance) / $thread_count;
1646*5486feefSafresh1            printf STDERR "$per_thread_startup would need to be %g",
1647*5486feefSafresh1                          " for thread %d to have started\nin sync with",
1648*5486feefSafresh1                          " the other threads\n",
1649*5486feefSafresh1                          $better_per_thread, threads->tid;
1650*5486feefSafresh1            die "Thread started too late";
1651*5486feefSafresh1        }
1652*5486feefSafresh1        else {
1653*5486feefSafresh1            usleep($sleep_time * 1_000_000) if $sleep_time > 0;
1654*5486feefSafresh1        }
1655*5486feefSafresh1    }
1656*5486feefSafresh1
1657*5486feefSafresh1    # Create all the subthreads: 1..n
1658*5486feefSafresh1    my @threads = map +threads->create(sub {
1659*5486feefSafresh1        $SIG{'KILL'} = sub { threads->exit(); };
1660*5486feefSafresh1
1661*5486feefSafresh1        my $thread = shift;
1662*5486feefSafresh1
1663*5486feefSafresh1        # Start out with the set of tests whose number is the same as the
1664*5486feefSafresh1        # thread number
1665*5486feefSafresh1        my $test_set = $thread;
1666*5486feefSafresh1
1667*5486feefSafresh1        wait_until_time();
1668*5486feefSafresh1
1669*5486feefSafresh1        # Loop through all the iterations for this thread
1670*5486feefSafresh1        my $this_iteration_start = 1;
1671*5486feefSafresh1        do {
1672*5486feefSafresh1             # Set up each category with its locale;
1673*5486feefSafresh1            my $this_ref = $all_tests_ref->[$test_set];
1674*5486feefSafresh1            return 0 unless setlocales($this_ref->{categories},
1675*5486feefSafresh1                                       $this_ref->{locales});
1676*5486feefSafresh1            # Then run one batch of iterations
1677*5486feefSafresh1            my $result = iterate($thread,
1678*5486feefSafresh1                                 $this_iteration_start,
1679*5486feefSafresh1                                 $iterations_per_test_set,
1680*5486feefSafresh1                                 $this_ref->{tests});
1681*5486feefSafresh1            return 0 if $result == 0;   # Quit if failed
1682*5486feefSafresh1
1683*5486feefSafresh1            # Next iteration will shift to use a different set of locales for
1684*5486feefSafresh1            # each category
1685*5486feefSafresh1            $test_set++;
1686*5486feefSafresh1            $test_set = 0 if $test_set >= $thread_count;
1687*5486feefSafresh1            $this_iteration_start += $iterations_per_test_set;
1688*5486feefSafresh1        } while ($this_iteration_start <= $iterations);
1689*5486feefSafresh1
1690*5486feefSafresh1        return 1;   # Success
1691*5486feefSafresh1
1692*5486feefSafresh1    }, $_), (1..$thread_count - 1);     # For each non-0 thread
1693*5486feefSafresh1
1694*5486feefSafresh1    # Here is thread 0.  We do a smaller chunk of iterations in it; then
1695*5486feefSafresh1    # join whatever threads have finished so far, then do another chunk.
1696*5486feefSafresh1    # This tests for bugs that arise as a result of joining.
1697*5486feefSafresh1
1698*5486feefSafresh1    my %thread0_corrects = ();
1699*5486feefSafresh1    my $this_iteration_start = 1;
1700*5486feefSafresh1    my $result = 1;    # So far, everything is ok
1701*5486feefSafresh1    my $test_set = -1;  # Start with 0th test set
1702*5486feefSafresh1
1703*5486feefSafresh1    wait_until_time();
1704*5486feefSafresh1    alarm($alarm_clock);    # Guard against hangs
1705*5486feefSafresh1
1706*5486feefSafresh1    do {
1707*5486feefSafresh1        # Next time, we'll use the next test set
1708*5486feefSafresh1        $test_set++;
1709*5486feefSafresh1        $test_set = 0 if $test_set >= $thread_count;
1710*5486feefSafresh1
1711*5486feefSafresh1        my $this_ref = $all_tests_ref->[$test_set];
1712*5486feefSafresh1
1713*5486feefSafresh1        # set the locales for this test set.  Do this even if we
1714*5486feefSafresh1        # are going to bail, so that it will be set correctly for the final
1715*5486feefSafresh1        # batch after the loop.
1716*5486feefSafresh1        $result &= setlocales($this_ref->{categories}, $this_ref->{locales});
1717*5486feefSafresh1
1718*5486feefSafresh1        if ($debug > 1) {
1719*5486feefSafresh1            my @joinable = threads->list(threads::joinable);
1720*5486feefSafresh1            if (@joinable) {
1721*5486feefSafresh1                print STDERR "In thread 0, before iteration ",
1722*5486feefSafresh1                             $this_iteration_start,
1723*5486feefSafresh1                             " these threads are done: ",
1724*5486feefSafresh1                             join (", ", map { $_->tid() } @joinable),
1725*5486feefSafresh1                             "\n";
1726*5486feefSafresh1            }
1727*5486feefSafresh1        }
1728*5486feefSafresh1
1729*5486feefSafresh1        # Join anything already finished.
1730*5486feefSafresh1        for my $thread (threads->list(threads::joinable)) {
1731*5486feefSafresh1            my $thread_result = $thread->join;
1732*5486feefSafresh1            if ($debug > 1) {
1733*5486feefSafresh1                print STDERR "In thread 0, before iteration ",
1734*5486feefSafresh1                             $this_iteration_start,
1735*5486feefSafresh1                             " joining thread ", $thread->tid(),
1736*5486feefSafresh1                             "; result=", ((defined $thread_result)
1737*5486feefSafresh1                                           ? $thread_result
1738*5486feefSafresh1                                           : "undef"),
1739*5486feefSafresh1                             "\n";
1740*5486feefSafresh1            }
1741*5486feefSafresh1
1742*5486feefSafresh1            # If the thread failed badly, stop testing anything else.
1743*5486feefSafresh1            if (! defined $thread_result) {
1744*5486feefSafresh1                $_->kill('KILL')->detach() for threads->list();
1745*5486feefSafresh1                print 0;
1746*5486feefSafresh1                exit;
1747*5486feefSafresh1            }
1748*5486feefSafresh1
1749*5486feefSafresh1            # Update the status
1750*5486feefSafresh1            $result &= $thread_result;
1751*5486feefSafresh1        }
1752*5486feefSafresh1
1753*5486feefSafresh1        # Do a chunk of iterations on this thread 0.
1754*5486feefSafresh1        $result &= iterate(0,
1755*5486feefSafresh1                           $this_iteration_start,
1756*5486feefSafresh1                           $iterations_per_test_set,
1757*5486feefSafresh1                           $this_ref->{tests},
1758*5486feefSafresh1                           \%thread0_corrects);
1759*5486feefSafresh1        $this_iteration_start += $iterations_per_test_set;
1760*5486feefSafresh1
1761*5486feefSafresh1        # And repeat as long as there are other tests
1762*5486feefSafresh1    } while (threads->list(threads::all));
1763*5486feefSafresh1
1764*5486feefSafresh1    print $result;
1765*5486feefSafresh1EOT
1766*5486feefSafresh1
1767*5486feefSafresh1    # Finally ready to run the test.
1768*5486feefSafresh1    fresh_perl_is($program,
1769*5486feefSafresh1        1,
1770*5486feefSafresh1        { eval $switches },
1771*5486feefSafresh1        "Verify there were no failures with simultaneous running threads"
1772*5486feefSafresh1    );
1773*5486feefSafresh1}
1774