1#!./perl -wT
2
3use strict;
4use warnings;
5
6# This tests plain 'use locale' and adorned 'use locale ":not_characters"'
7# Because these pragmas are compile time, and I (khw) am trying to test
8# without using 'eval' as much as possible, which might cloud the issue,  the
9# crucial parts of the code are duplicated in a block for each pragma.
10
11# Unfortunately, many systems have defective locale definitions.  This test
12# file looks for both perl bugs and bugs in the system's locale definitions.
13# It can be difficult to tease apart which is which.  For the latter, there
14# are tests that are based on the POSIX standard.  A character isn't supposed
15# to be both a space and graphic, for example.  Another example is if a
16# character is the uppercase of another, that other should be the lowercase of
17# the first.  Including tests for these allows you to test for defective
18# locales, as described in perllocale.  The way this file distinguishes
19# between defective locales, and perl bugs is to see what percentage of
20# locales fail a given test.  If it's a lot, then it's more likely to be a
21# perl bug; only a few, those particular locales are likely defective.  In
22# that case the failing tests are marked TODO.  (They should be reported to
23# the vendor, however; but it's not perl's problem.)  In some cases, this
24# script has caused tickets to be filed against perl which turn out to be the
25# platform's bug, but a higher percentage of locales are failing than the
26# built-in cut-off point.  For those platforms, code has been added to
27# increase the cut-off, so those platforms don't trigger failing test reports.
28# Ideally, the platforms would get fixed and that code would be changed to
29# only kick-in when run on versions that are earlier than the fixed one.  But,
30# this rarely happens in practice.
31
32# To make a TODO test, add the string 'TODO' to its %test_names value
33
34my $is_ebcdic = ord("A") == 193;
35my $os = lc $^O;
36
37no warnings 'locale';  # We test even weird locales; and do some scary things
38                       # in ok locales
39
40binmode STDOUT, ':utf8';
41binmode STDERR, ':utf8';
42
43BEGIN {
44    chdir 't' if -d 't';
45    @INC = '../lib';
46    unshift @INC, '.';
47    require './loc_tools.pl';
48    unless (locales_enabled('LC_CTYPE')) {
49	print "1..0\n";
50	exit;
51    }
52    $| = 1;
53    require Config; import Config;
54}
55
56use feature 'fc';
57
58# =1 adds debugging output; =2 increases the verbosity somewhat
59our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0;
60
61# Certain tests have been shown to be problematical for a few locales.  Don't
62# fail them unless at least this percentage of the tested locales fail.
63# On AIX machines, many locales call a no-break space a graphic.
64# (There aren't 1000 locales currently in existence, so 99.9 works)
65# EBCDIC os390 has more locales fail than normal, because it has locales that
66# move various critical characters like '['.
67my $acceptable_failure_percentage = ($os =~ / ^ ( aix ) $ /x)
68                                     ? 99.9
69                                     : ($os =~ / ^ ( os390 ) $ /x)
70                                       ? 10
71                                       : 5;
72
73# The list of test numbers of the problematic tests.
74my %problematical_tests;
75
76# If any %problematical_tests fails in one of these locales, it is
77# considered a TODO.
78my %known_bad_locales = (
79                          irix => qr/ ^ (?: cs | hu | sk ) $/x,
80                          darwin => qr/ ^ lt_LT.ISO8859 /ix,
81                          os390 => qr/ ^ italian /ix,
82                          netbsd => qr/\bISO8859-2\b/i,
83
84                          # This may be the same bug as the cygwin below; it's
85                          # generating malformed UTF-8 on the radix being
86                          # mulit-byte
87                          solaris => qr/ ^ ( ar_ | pa_ ) /x,
88                        );
89
90# cygwin isn't returning proper radix length in this locale, but supposedly to
91# be fixed in later versions.
92if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) {
93    $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix;
94}
95
96use Dumpvalue;
97
98my $dumper = Dumpvalue->new(
99                            tick => qq{"},
100                            quoteHighBit => 0,
101                            unctrl => "quote"
102                           );
103
104sub debug {
105  return unless $debug;
106  my($mess) = join "", '# ', @_;
107  chomp $mess;
108  print STDERR $dumper->stringify($mess,1), "\n";
109}
110
111sub note {
112    local $debug = 1;
113    debug @_;
114}
115
116sub debug_more {
117  return unless $debug > 1;
118  return debug(@_);
119}
120
121sub debugf {
122    printf STDERR @_ if $debug;
123}
124
125$a = 'abc %9';
126
127my $test_num = 0;
128
129sub ok {
130    my ($result, $message) = @_;
131    $message = "" unless defined $message;
132
133    print 'not ' unless ($result);
134    print "ok " . ++$test_num;
135    print " $message";
136    print "\n";
137    return ($result) ? 1 : 0;
138}
139
140sub skip {
141    return ok 1, "skipped: " . shift;
142}
143
144sub fail {
145    return ok 0, shift;
146}
147
148# First we'll do a lot of taint checking for locales.
149# This is the easiest to test, actually, as any locale,
150# even the default locale will taint under 'use locale'.
151
152sub is_tainted { # hello, camel two.
153    no warnings 'uninitialized' ;
154    my $dummy;
155    local $@;
156    not eval { $dummy = join("", @_), kill 0; 1 }
157}
158
159sub check_taint ($;$) {
160    my $message_tail = $_[1] // "";
161
162    # Extra blanks are so aligns with taint_not output
163    $message_tail = ":     $message_tail" if $message_tail;
164    ok is_tainted($_[0]), "verify that is tainted$message_tail";
165}
166
167sub check_taint_not ($;$) {
168    my $message_tail = $_[1] // "";
169    $message_tail = ":  $message_tail" if $message_tail;
170    ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
171}
172
173foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) {
174    my $short_result = locales_enabled($category);
175    ok ($short_result == 0 || $short_result == 1,
176        "Verify locales_enabled('$category') returns 0 or 1");
177    debug("locales_enabled('$category') returned '$short_result'");
178    my $long_result = locales_enabled("LC_$category");
179    if (! ok ($long_result == $short_result,
180              "   and locales_enabled('LC_$category') returns "
181            . "the same value")
182    ) {
183        debug("locales_enabled('LC_$category') returned $long_result");
184    }
185}
186
187"\tb\t" =~ /^m?(\s)(.*)\1$/;
188check_taint_not   $&, "not tainted outside 'use locale'";
189;
190
191use locale;	# engage locale and therefore locale taint.
192
193# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for
194# ":notcharacters"
195
196check_taint_not   $a, '$a';
197
198check_taint       uc($a), 'uc($a)';
199check_taint       "\U$a", '"\U$a"';
200check_taint       ucfirst($a), 'ucfirst($a)';
201check_taint       "\u$a", '"\u$a"';
202check_taint       lc($a), 'lc($a)';
203check_taint       fc($a), 'fc($a)';
204check_taint       "\L$a", '"\L$a"';
205check_taint       "\F$a", '"\F$a"';
206check_taint       lcfirst($a), 'lcfirst($a)';
207check_taint       "\l$a", '"\l$a"';
208
209check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
210check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
211check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
212check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
213check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
214
215$_ = $a;	# untaint $_
216
217$_ = uc($a);	# taint $_
218
219check_taint      $_, '$_ = uc($a)';
220
221/(\w)/;	# taint $&, $`, $', $+, $1.
222check_taint      $&, "\$& from /(\\w)/";
223check_taint      $`, "\t\$`";
224check_taint      $', "\t\$'";
225check_taint      $+, "\t\$+";
226check_taint      $1, "\t\$1";
227check_taint_not  $2, "\t\$2";
228
229/(.)/;	# untaint $&, $`, $', $+, $1.
230check_taint_not  $&, "\$& from /(.)/";
231check_taint_not  $`, "\t\$`";
232check_taint_not  $', "\t\$'";
233check_taint_not  $+, "\t\$+";
234check_taint_not  $1, "\t\$1";
235check_taint_not  $2, "\t\$2";
236
237/(\W)/;	# taint $&, $`, $', $+, $1.
238check_taint      $&, "\$& from /(\\W)/";
239check_taint      $`, "\t\$`";
240check_taint      $', "\t\$'";
241check_taint      $+, "\t\$+";
242check_taint      $1, "\t\$1";
243check_taint_not  $2, "\t\$2";
244
245/(.)/;	# untaint $&, $`, $', $+, $1.
246check_taint_not  $&, "\$& from /(.)/";
247check_taint_not  $`, "\t\$`";
248check_taint_not  $', "\t\$'";
249check_taint_not  $+, "\t\$+";
250check_taint_not  $1, "\t\$1";
251check_taint_not  $2, "\t\$2";
252
253/(\s)/;	# taint $&, $`, $', $+, $1.
254check_taint      $&, "\$& from /(\\s)/";
255check_taint      $`, "\t\$`";
256check_taint      $', "\t\$'";
257check_taint      $+, "\t\$+";
258check_taint      $1, "\t\$1";
259check_taint_not  $2, "\t\$2";
260
261/(.)/;	# untaint $&, $`, $', $+, $1.
262check_taint_not  $&, "\$& from /(.)/";
263
264/(\S)/;	# taint $&, $`, $', $+, $1.
265check_taint      $&, "\$& from /(\\S)/";
266check_taint      $`, "\t\$`";
267check_taint      $', "\t\$'";
268check_taint      $+, "\t\$+";
269check_taint      $1, "\t\$1";
270check_taint_not  $2, "\t\$2";
271
272/(.)/;	# untaint $&, $`, $', $+, $1.
273check_taint_not  $&, "\$& from /(.)/";
274
275"0" =~ /(\d)/;	# taint $&, $`, $', $+, $1.
276check_taint      $&, "\$& from /(\\d)/";
277check_taint      $`, "\t\$`";
278check_taint      $', "\t\$'";
279check_taint      $+, "\t\$+";
280check_taint      $1, "\t\$1";
281check_taint_not  $2, "\t\$2";
282
283/(.)/;	# untaint $&, $`, $', $+, $1.
284check_taint_not  $&, "\$& from /(.)/";
285
286/(\D)/;	# taint $&, $`, $', $+, $1.
287check_taint      $&, "\$& from /(\\D)/";
288check_taint      $`, "\t\$`";
289check_taint      $', "\t\$'";
290check_taint      $+, "\t\$+";
291check_taint      $1, "\t\$1";
292check_taint_not  $2, "\t\$2";
293
294/(.)/;	# untaint $&, $`, $', $+, $1.
295check_taint_not  $&, "\$& from /(.)/";
296
297/([[:alnum:]])/;	# taint $&, $`, $', $+, $1.
298check_taint      $&, "\$& from /([[:alnum:]])/";
299check_taint      $`, "\t\$`";
300check_taint      $', "\t\$'";
301check_taint      $+, "\t\$+";
302check_taint      $1, "\t\$1";
303check_taint_not  $2, "\t\$2";
304
305/(.)/;	# untaint $&, $`, $', $+, $1.
306check_taint_not  $&, "\$& from /(.)/";
307
308/([[:^alnum:]])/;	# taint $&, $`, $', $+, $1.
309check_taint      $&, "\$& from /([[:^alnum:]])/";
310check_taint      $`, "\t\$`";
311check_taint      $', "\t\$'";
312check_taint      $+, "\t\$+";
313check_taint      $1, "\t\$1";
314check_taint_not  $2, "\t\$2";
315
316"a" =~ /(a)|(\w)/;	# taint $&, $`, $', $+, $1.
317check_taint      $&, "\$& from /(a)|(\\w)/";
318check_taint      $`, "\t\$`";
319check_taint      $', "\t\$'";
320check_taint      $+, "\t\$+";
321check_taint      $1, "\t\$1";
322ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
323ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
324check_taint_not  $2, "\t\$2";
325check_taint_not  $3, "\t\$3";
326
327/(.)/;	# untaint $&, $`, $', $+, $1.
328check_taint_not  $&, "\$& from /(.)/";
329
330"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;	# no tainting because no locale dependence
331check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
332check_taint_not      $`, "\t\$`";
333check_taint_not      $', "\t\$'";
334check_taint_not      $+, "\t\$+";
335check_taint_not      $1, "\t\$1";
336ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
337check_taint_not      $2, "\t\$2";
338
339/(.)/;	# untaint $&, $`, $', $+, $1.
340check_taint_not  $&, "\$& from /./";
341
342"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;	# taints because depends on locale
343check_taint      $&, "\$& from /(\\N{KELVIN SIGN})/i";
344check_taint      $`, "\t\$`";
345check_taint      $', "\t\$'";
346check_taint      $+, "\t\$+";
347check_taint      $1, "\t\$1";
348check_taint_not      $2, "\t\$2";
349
350/(.)/;	# untaint $&, $`, $', $+, $1.
351check_taint_not  $&, "\$& from /(.)/";
352
353"a:" =~ /(.)\b(.)/;	# taint $&, $`, $', $+, $1.
354check_taint      $&, "\$& from /(.)\\b(.)/";
355check_taint      $`, "\t\$`";
356check_taint      $', "\t\$'";
357check_taint      $+, "\t\$+";
358check_taint      $1, "\t\$1";
359check_taint      $2, "\t\$2";
360check_taint_not  $3, "\t\$3";
361
362/(.)/;	# untaint $&, $`, $', $+, $1.
363check_taint_not  $&, "\$& from /./";
364
365"aa" =~ /(.)\B(.)/;	# taint $&, $`, $', $+, $1.
366check_taint      $&, "\$& from /(.)\\B(.)/";
367check_taint      $`, "\t\$`";
368check_taint      $', "\t\$'";
369check_taint      $+, "\t\$+";
370check_taint      $1, "\t\$1";
371check_taint      $2, "\t\$2";
372check_taint_not  $3, "\t\$3";
373
374/(.)/;	# untaint $&, $`, $', $+, $1.
375check_taint_not  $&, "\$& from /./";
376
377"aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
378check_taint_not      $&, "\$ & from /(.).(\\1)/";
379check_taint_not      $`, "\t\$`";
380check_taint_not      $', "\t\$'";
381check_taint_not      $+, "\t\$+";
382check_taint_not      $1, "\t\$1";
383check_taint_not      $2, "\t\$2";
384check_taint_not      $3, "\t\$3";
385
386/(.)/;	# untaint $&, $`, $', $+, $1.
387check_taint_not  $&, "\$ & from /./";
388
389$_ = $a;	# untaint $_
390
391check_taint_not  $_, 'untainting $_ works';
392
393/(b)/;		# this must not taint
394check_taint_not  $&, "\$ & from /(b)/";
395check_taint_not  $`, "\t\$`";
396check_taint_not  $', "\t\$'";
397check_taint_not  $+, "\t\$+";
398check_taint_not  $1, "\t\$1";
399check_taint_not  $2, "\t\$2";
400
401$_ = $a;	# untaint $_
402
403check_taint_not  $_, 'untainting $_ works';
404
405$b = uc($a);	# taint $b
406s/(.+)/$b/;	# this must taint only the $_
407
408check_taint      $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted';
409check_taint_not  $&, "\t\$&";
410check_taint_not  $`, "\t\$`";
411check_taint_not  $', "\t\$'";
412check_taint_not  $+, "\t\$+";
413check_taint_not  $1, "\t\$1";
414check_taint_not  $2, "\t\$2";
415
416$_ = $a;	# untaint $_
417
418s/(.+)/b/;	# this must not taint
419check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
420check_taint_not  $&, "\t\$&";
421check_taint_not  $`, "\t\$`";
422check_taint_not  $', "\t\$'";
423check_taint_not  $+, "\t\$+";
424check_taint_not  $1, "\t\$1";
425check_taint_not  $2, "\t\$2";
426
427$b = $a;	# untaint $b
428
429($b = $a) =~ s/\w/$&/;
430check_taint      $b, '$b from ($b = $a) =~ s/\w/$&/';	# $b should be tainted.
431check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';	# $a should be not.
432
433$_ = $a;	# untaint $_
434
435s/(\w)/\l$1/;	# this must taint
436check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
437check_taint      $&, "\t\$&";
438check_taint      $`, "\t\$`";
439check_taint      $', "\t\$'";
440check_taint      $+, "\t\$+";
441check_taint      $1, "\t\$1";
442check_taint_not  $2, "\t\$2";
443
444$_ = $a;	# untaint $_
445
446s/(\w)/\L$1/;	# this must taint
447check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
448check_taint      $&, "\t\$&";
449check_taint      $`, "\t\$`";
450check_taint      $', "\t\$'";
451check_taint      $+, "\t\$+";
452check_taint      $1, "\t\$1";
453check_taint_not  $2, "\t\$2";
454
455$_ = $a;	# untaint $_
456
457s/(\w)/\u$1/;	# this must taint
458check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
459check_taint      $&, "\t\$&";
460check_taint      $`, "\t\$`";
461check_taint      $', "\t\$'";
462check_taint      $+, "\t\$+";
463check_taint      $1, "\t\$1";
464check_taint_not  $2, "\t\$2";
465
466$_ = $a;	# untaint $_
467
468s/(\w)/\U$1/;	# this must taint
469check_taint      $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
470check_taint      $&, "\t\$&";
471check_taint      $`, "\t\$`";
472check_taint      $', "\t\$'";
473check_taint      $+, "\t\$+";
474check_taint      $1, "\t\$1";
475check_taint_not  $2, "\t\$2";
476
477# After all this tainting $a should be cool.
478
479check_taint_not  $a, '$a still not tainted';
480
481"a" =~ /([a-z])/;
482check_taint_not $1, '"a" =~ /([a-z])/';
483"foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
484check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
485
486# BE SURE TO COPY ANYTHING YOU ADD to the block below
487
488{   # This is just the previous tests copied here with a different
489    # compile-time pragma.
490
491    use locale ':not_characters'; # engage restricted locale with different
492                                  # tainting rules
493    check_taint_not   $a, '$a';
494
495    check_taint_not   uc($a), 'uc($a)';
496    check_taint_not   "\U$a", '"\U$a"';
497    check_taint_not   ucfirst($a), 'ucfirst($a)';
498    check_taint_not   "\u$a", '"\u$a"';
499    check_taint_not   lc($a), 'lc($a)';
500    check_taint_not   fc($a), 'fc($a)';
501    check_taint_not   "\L$a", '"\L$a"';
502    check_taint_not   "\F$a", '"\F$a"';
503    check_taint_not   lcfirst($a), 'lcfirst($a)';
504    check_taint_not   "\l$a", '"\l$a"';
505
506    check_taint_not  sprintf('%e', 123.456), "sprintf('%e', 123.456)";
507    check_taint_not  sprintf('%f', 123.456), "sprintf('%f', 123.456)";
508    check_taint_not  sprintf('%g', 123.456), "sprintf('%g', 123.456)";
509    check_taint_not  sprintf('%d', 123.456), "sprintf('%d', 123.456)";
510    check_taint_not  sprintf('%x', 123.456), "sprintf('%x', 123.456)";
511
512    $_ = $a;	# untaint $_
513
514    $_ = uc($a);
515
516    check_taint_not  $_, '$_ = uc($a)';
517
518    /(\w)/;
519    check_taint_not  $&, "\$& from /(\\w)/";
520    check_taint_not  $`, "\t\$`";
521    check_taint_not  $', "\t\$'";
522    check_taint_not  $+, "\t\$+";
523    check_taint_not  $1, "\t\$1";
524    check_taint_not  $2, "\t\$2";
525
526    /(.)/;	# untaint $&, $`, $', $+, $1.
527    check_taint_not  $&, "\$& from /(.)/";
528    check_taint_not  $`, "\t\$`";
529    check_taint_not  $', "\t\$'";
530    check_taint_not  $+, "\t\$+";
531    check_taint_not  $1, "\t\$1";
532    check_taint_not  $2, "\t\$2";
533
534    /(\W)/;
535    check_taint_not  $&, "\$& from /(\\W)/";
536    check_taint_not  $`, "\t\$`";
537    check_taint_not  $', "\t\$'";
538    check_taint_not  $+, "\t\$+";
539    check_taint_not  $1, "\t\$1";
540    check_taint_not  $2, "\t\$2";
541
542    /(.)/;	# untaint $&, $`, $', $+, $1.
543    check_taint_not  $&, "\$& from /(.)/";
544    check_taint_not  $`, "\t\$`";
545    check_taint_not  $', "\t\$'";
546    check_taint_not  $+, "\t\$+";
547    check_taint_not  $1, "\t\$1";
548    check_taint_not  $2, "\t\$2";
549
550    /(\s)/;
551    check_taint_not  $&, "\$& from /(\\s)/";
552    check_taint_not  $`, "\t\$`";
553    check_taint_not  $', "\t\$'";
554    check_taint_not  $+, "\t\$+";
555    check_taint_not  $1, "\t\$1";
556    check_taint_not  $2, "\t\$2";
557
558    /(.)/;	# untaint $&, $`, $', $+, $1.
559    check_taint_not  $&, "\$& from /(.)/";
560
561    /(\S)/;
562    check_taint_not  $&, "\$& from /(\\S)/";
563    check_taint_not  $`, "\t\$`";
564    check_taint_not  $', "\t\$'";
565    check_taint_not  $+, "\t\$+";
566    check_taint_not  $1, "\t\$1";
567    check_taint_not  $2, "\t\$2";
568
569    /(.)/;	# untaint $&, $`, $', $+, $1.
570    check_taint_not  $&, "\$& from /(.)/";
571
572    "0" =~ /(\d)/;
573    check_taint_not  $&, "\$& from /(\\d)/";
574    check_taint_not  $`, "\t\$`";
575    check_taint_not  $', "\t\$'";
576    check_taint_not  $+, "\t\$+";
577    check_taint_not  $1, "\t\$1";
578    check_taint_not  $2, "\t\$2";
579
580    /(.)/;	# untaint $&, $`, $', $+, $1.
581    check_taint_not  $&, "\$& from /(.)/";
582
583    /(\D)/;
584    check_taint_not  $&, "\$& from /(\\D)/";
585    check_taint_not  $`, "\t\$`";
586    check_taint_not  $', "\t\$'";
587    check_taint_not  $+, "\t\$+";
588    check_taint_not  $1, "\t\$1";
589    check_taint_not  $2, "\t\$2";
590
591    /(.)/;	# untaint $&, $`, $', $+, $1.
592    check_taint_not  $&, "\$& from /(.)/";
593
594    /([[:alnum:]])/;
595    check_taint_not  $&, "\$& from /([[:alnum:]])/";
596    check_taint_not  $`, "\t\$`";
597    check_taint_not  $', "\t\$'";
598    check_taint_not  $+, "\t\$+";
599    check_taint_not  $1, "\t\$1";
600    check_taint_not  $2, "\t\$2";
601
602    /(.)/;	# untaint $&, $`, $', $+, $1.
603    check_taint_not  $&, "\$& from /(.)/";
604
605    /([[:^alnum:]])/;
606    check_taint_not  $&, "\$& from /([[:^alnum:]])/";
607    check_taint_not  $`, "\t\$`";
608    check_taint_not  $', "\t\$'";
609    check_taint_not  $+, "\t\$+";
610    check_taint_not  $1, "\t\$1";
611    check_taint_not  $2, "\t\$2";
612
613    "a" =~ /(a)|(\w)/;
614    check_taint_not  $&, "\$& from /(a)|(\\w)/";
615    check_taint_not  $`, "\t\$`";
616    check_taint_not  $', "\t\$'";
617    check_taint_not  $+, "\t\$+";
618    check_taint_not  $1, "\t\$1";
619    ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'");
620    ok(! defined $2, ("\t" x 5) . "\$2 is undefined");
621    check_taint_not  $2, "\t\$2";
622    check_taint_not  $3, "\t\$3";
623
624    /(.)/;	# untaint $&, $`, $', $+, $1.
625    check_taint_not  $&, "\$& from /(.)/";
626
627    "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;
628    check_taint_not      $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i";
629    check_taint_not      $`, "\t\$`";
630    check_taint_not      $', "\t\$'";
631    check_taint_not      $+, "\t\$+";
632    check_taint_not      $1, "\t\$1";
633    ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'");
634    check_taint_not      $2, "\t\$2";
635
636    /(.)/;	# untaint $&, $`, $', $+, $1.
637    check_taint_not  $&, "\$& from /./";
638
639    "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i;
640    check_taint_not  $&, "\$& from /(\\N{KELVIN SIGN})/i";
641    check_taint_not  $`, "\t\$`";
642    check_taint_not  $', "\t\$'";
643    check_taint_not  $+, "\t\$+";
644    check_taint_not  $1, "\t\$1";
645    check_taint_not      $2, "\t\$2";
646
647    /(.)/;	# untaint $&, $`, $', $+, $1.
648    check_taint_not  $&, "\$& from /(.)/";
649
650    "a:" =~ /(.)\b(.)/;
651    check_taint_not  $&, "\$& from /(.)\\b(.)/";
652    check_taint_not  $`, "\t\$`";
653    check_taint_not  $', "\t\$'";
654    check_taint_not  $+, "\t\$+";
655    check_taint_not  $1, "\t\$1";
656    check_taint_not  $2, "\t\$2";
657    check_taint_not  $3, "\t\$3";
658
659    /(.)/;	# untaint $&, $`, $', $+, $1.
660    check_taint_not  $&, "\$& from /./";
661
662    "aa" =~ /(.)\B(.)/;
663    check_taint_not  $&, "\$& from /(.)\\B(.)/";
664    check_taint_not  $`, "\t\$`";
665    check_taint_not  $', "\t\$'";
666    check_taint_not  $+, "\t\$+";
667    check_taint_not  $1, "\t\$1";
668    check_taint_not  $2, "\t\$2";
669    check_taint_not  $3, "\t\$3";
670
671    /(.)/;	# untaint $&, $`, $', $+, $1.
672    check_taint_not  $&, "\$& from /./";
673
674    "aaa" =~ /(.).(\1)/i;	# notaint because not locale dependent
675    check_taint_not      $&, "\$ & from /(.).(\\1)/";
676    check_taint_not      $`, "\t\$`";
677    check_taint_not      $', "\t\$'";
678    check_taint_not      $+, "\t\$+";
679    check_taint_not      $1, "\t\$1";
680    check_taint_not      $2, "\t\$2";
681    check_taint_not      $3, "\t\$3";
682
683    /(.)/;	# untaint $&, $`, $', $+, $1.
684    check_taint_not  $&, "\$ & from /./";
685
686    $_ = $a;	# untaint $_
687
688    check_taint_not  $_, 'untainting $_ works';
689
690    /(b)/;
691    check_taint_not  $&, "\$ & from /(b)/";
692    check_taint_not  $`, "\t\$`";
693    check_taint_not  $', "\t\$'";
694    check_taint_not  $+, "\t\$+";
695    check_taint_not  $1, "\t\$1";
696    check_taint_not  $2, "\t\$2";
697
698    $_ = $a;	# untaint $_
699
700    check_taint_not  $_, 'untainting $_ works';
701
702    s/(.+)/b/;
703    check_taint_not  $_, '$_ (wasn\'t tainted) from s/(.+)/b/';
704    check_taint_not  $&, "\t\$&";
705    check_taint_not  $`, "\t\$`";
706    check_taint_not  $', "\t\$'";
707    check_taint_not  $+, "\t\$+";
708    check_taint_not  $1, "\t\$1";
709    check_taint_not  $2, "\t\$2";
710
711    $b = $a;	# untaint $b
712
713    ($b = $a) =~ s/\w/$&/;
714    check_taint_not     $b, '$b from ($b = $a) =~ s/\w/$&/';
715    check_taint_not  $a, '$a from ($b = $a) =~ s/\w/$&/';
716
717    $_ = $a;	# untaint $_
718
719    s/(\w)/\l$1/;
720    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,';	# this must taint
721    check_taint_not     $&, "\t\$&";
722    check_taint_not     $`, "\t\$`";
723    check_taint_not     $', "\t\$'";
724    check_taint_not     $+, "\t\$+";
725    check_taint_not     $1, "\t\$1";
726    check_taint_not  $2, "\t\$2";
727
728    $_ = $a;	# untaint $_
729
730    s/(\w)/\L$1/;
731    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,';
732    check_taint_not     $&, "\t\$&";
733    check_taint_not     $`, "\t\$`";
734    check_taint_not     $', "\t\$'";
735    check_taint_not     $+, "\t\$+";
736    check_taint_not     $1, "\t\$1";
737    check_taint_not  $2, "\t\$2";
738
739    $_ = $a;	# untaint $_
740
741    s/(\w)/\u$1/;
742    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/';
743    check_taint_not     $&, "\t\$&";
744    check_taint_not     $`, "\t\$`";
745    check_taint_not     $', "\t\$'";
746    check_taint_not     $+, "\t\$+";
747    check_taint_not     $1, "\t\$1";
748    check_taint_not  $2, "\t\$2";
749
750    $_ = $a;	# untaint $_
751
752    s/(\w)/\U$1/;
753    check_taint_not     $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/';
754    check_taint_not     $&, "\t\$&";
755    check_taint_not     $`, "\t\$`";
756    check_taint_not     $', "\t\$'";
757    check_taint_not     $+, "\t\$+";
758    check_taint_not     $1, "\t\$1";
759    check_taint_not  $2, "\t\$2";
760
761    # After all this tainting $a should be cool.
762
763    check_taint_not  $a, '$a still not tainted';
764
765    "a" =~ /([a-z])/;
766    check_taint_not $1, '"a" =~ /([a-z])/';
767    "foo.bar_baz" =~ /^(.*)[._](.*?)$/;  # Bug 120675
768    check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/';
769
770}
771
772# Here are in scope of 'use locale'
773
774# I think we've seen quite enough of taint.
775# Let us do some *real* locale work now,
776# unless setlocale() is missing (i.e. minitest).
777
778# The test number before our first setlocale()
779my $final_without_setlocale = $test_num;
780
781# Find locales.
782
783debug "Scanning for locales...\n";
784
785require POSIX; import POSIX ':locale_h';
786
787my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ];
788debug "Scanning for just compatible";
789my @Locale = find_locales($categories);
790debug "Scanning for even incompatible";
791my @include_incompatible_locales = find_locales($categories,
792                                                'even incompatible locales');
793
794# The locales included in the incompatible list that aren't in the compatible
795# one.
796my @incompatible_locales;
797
798if (@Locale < @include_incompatible_locales) {
799    my %seen;
800    @seen{@Locale} = ();
801
802    foreach my $item (@include_incompatible_locales) {
803        push @incompatible_locales, $item unless exists $seen{$item};
804    }
805
806    # For each bad locale, switch into it to find out why it's incompatible
807    for my $bad_locale (@incompatible_locales) {
808        my @warnings;
809
810        use warnings 'locale';
811
812        local $SIG{__WARN__} = sub {
813            my $warning = $_[0];
814            chomp $warning;
815            push @warnings, ($warning =~ s/\n/\n# /sgr);
816        };
817
818        debug "Trying incompatible $bad_locale";
819        my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale);
820
821        my $message = "testing of locale '$bad_locale' is skipped";
822        if (@warnings) {
823            skip $message . ":\n# " . join "\n# ", @warnings;
824        }
825        elsif (! $ret) {
826            skip("$message:\n#"
827               . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed");
828        }
829        else {
830            fail $message . ", because it is was found to be incompatible with"
831                          . " Perl, but could not discern reason";
832        }
833    }
834}
835
836debug "Locales =\n";
837for ( @Locale ) {
838    debug "$_\n";
839}
840
841unless (@Locale) {
842    print "1..$test_num\n";
843    exit;
844}
845
846
847setlocale(&POSIX::LC_ALL, "C");
848
849my %posixes;
850
851my %Problem;
852my %Okay;
853my %Known_bad_locale;   # Failed test for a locale known to be bad
854my %Testing;
855my @Added_alpha;   # Alphas that aren't in the C locale.
856my %test_names;
857
858sub disp_chars {
859    # This returns a display string denoting the input parameter @_, each
860    # entry of which is a single character in the range 0-255.  The first part
861    # of the output is a string of the characters in @_ that are ASCII
862    # graphics, and hence unambiguously displayable.  They are given by code
863    # point order.  The second part is the remaining code points, the ordinals
864    # of which are each displayed as 2-digit hex.  Blanks are inserted so as
865    # to keep anything from the first part looking like a 2-digit hex number.
866
867    no locale;
868    my @chars = sort { ord $a <=> ord $b } @_;
869    my $output = "";
870    my $range_start;
871    my $start_class;
872    push @chars, chr(258);  # This sentinel simplifies the loop termination
873                            # logic
874    foreach my $i (0 .. @chars - 1) {
875        my $char = $chars[$i];
876        my $range_end;
877        my $class;
878
879        # We avoid using [:posix:] classes, as these are being tested in this
880        # file.  Each equivalence class below is for things that can appear in
881        # a range; those that can't be in a range have class -1.  0 for those
882        # which should be output in hex; and >0 for the other ranges
883        if ($char =~ /[A-Z]/) {
884            $class = 2;
885        }
886        elsif ($char =~ /[a-z]/) {
887            $class = 3;
888        }
889        elsif ($char =~ /[0-9]/) {
890            $class = 4;
891        }
892        # Uncomment to get literal punctuation displayed instead of hex
893        #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) {
894        #    $class = -1;    # Punct never appears in a range
895        #}
896        else {
897            $class = 0;     # Output in hex
898        }
899
900        if (! defined $range_start) {
901            if ($class < 0) {
902                $output .= " " . $char;
903            }
904            else {
905                $range_start = ord $char;
906                $start_class = $class;
907            }
908        } # A range ends if not consecutive, or the class-type changes
909        elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1
910              || $class != $start_class)
911        {
912
913            # Here, the current character is not in the range.  This means the
914            # previous character must have been.  Output the range up through
915            # that one.
916            my $range_length = $range_end - $range_start + 1;
917            if ($start_class > 0) {
918                $output .= " " . chr($range_start);
919                $output .= "-" . chr($range_end) if $range_length > 1;
920            }
921            else {
922                $output .= sprintf(" %02X", $range_start);
923                $output .= sprintf("-%02X", $range_end) if $range_length > 1;
924            }
925
926            # Handle the new current character, as potentially beginning a new
927            # range
928            undef $range_start;
929            redo;
930        }
931    }
932
933    $output =~ s/^ //;
934    return $output;
935}
936
937sub disp_str ($) {
938    my $string = shift;
939
940    # Displays the string unambiguously.  ASCII printables are always output
941    # as-is, though perhaps separated by blanks from other characters.  If
942    # entirely printable ASCII, just returns the string.  Otherwise if valid
943    # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
944    # outputs hex for each non-ASCII-printable byte.
945
946    return $string if $string =~ / ^ [[:print:]]* $/xa;
947
948    my $result = "";
949    my $prev_was_punct = 1; # Beginning is considered punct
950    if (utf8::valid($string) && utf8::is_utf8($string)) {
951        use charnames ();
952        foreach my $char (split "", $string) {
953
954            # Keep punctuation adjacent to other characters; otherwise
955            # separate them with a blank
956            if ($char =~ /[[:punct:]]/a) {
957                $result .= $char;
958                $prev_was_punct = 1;
959            }
960            elsif ($char =~ /[[:print:]]/a) {
961                $result .= "  " unless $prev_was_punct;
962                $result .= $char;
963                $prev_was_punct = 0;
964            }
965            else {
966                $result .= "  " unless $prev_was_punct;
967                my $name = charnames::viacode(ord $char);
968                $result .= (defined $name) ? $name : ':unknown:';
969                $prev_was_punct = 0;
970            }
971        }
972    }
973    else {
974        use bytes;
975        foreach my $char (split "", $string) {
976            if ($char =~ /[[:punct:]]/a) {
977                $result .= $char;
978                $prev_was_punct = 1;
979            }
980            elsif ($char =~ /[[:print:]]/a) {
981                $result .= " " unless $prev_was_punct;
982                $result .= $char;
983                $prev_was_punct = 0;
984            }
985            else {
986                $result .= " " unless $prev_was_punct;
987                $result .= sprintf("%02X", ord $char);
988                $prev_was_punct = 0;
989            }
990        }
991    }
992
993    return $result;
994}
995
996sub report_result {
997    my ($Locale, $i, $pass_fail, $message) = @_;
998    if ($pass_fail) {
999	push @{$Okay{$i}}, $Locale;
1000    }
1001    else {
1002        $message //= "";
1003        $message = "  ($message)" if $message;
1004	$Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os}
1005                                         && $Locale =~ $known_bad_locales{$os};
1006	$Problem{$i}{$Locale} = 1;
1007	debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n";
1008    }
1009}
1010
1011sub report_multi_result {
1012    my ($Locale, $i, $results_ref) = @_;
1013
1014    # $results_ref points to an array, each element of which is a character that was
1015    # in error for this test numbered '$i'.  If empty, the test passed
1016
1017    my $message = "";
1018    if (@$results_ref) {
1019        $message = join " ", "for", disp_chars(@$results_ref);
1020    }
1021    report_result($Locale, $i, @$results_ref == 0, $message);
1022}
1023
1024my $first_locales_test_number = $final_without_setlocale
1025                              + 1 + @incompatible_locales;
1026my $locales_test_number;
1027my $not_necessarily_a_problem_test_number;
1028my $first_casing_test_number;
1029my %setlocale_failed;   # List of locales that setlocale() didn't work on
1030
1031foreach my $Locale (@Locale) {
1032    $locales_test_number = $first_locales_test_number - 1;
1033    debug "\n";
1034    debug "Locale = $Locale\n";
1035
1036    unless (setlocale(&POSIX::LC_ALL, $Locale)) {
1037        $setlocale_failed{$Locale} = $Locale;
1038	next;
1039    }
1040
1041    # We test UTF-8 locales only under ':not_characters';  It is easier to
1042    # test them in other test files than here.  Non- UTF-8 locales are tested
1043    # only under plain 'use locale', as otherwise we would have to convert
1044    # everything in them to Unicode.
1045
1046    my %UPPER = ();     # All alpha X for which uc(X) == X and lc(X) != X
1047    my %lower = ();     # All alpha X for which lc(X) == X and uc(X) != X
1048    my %BoThCaSe = ();  # All alpha X for which uc(X) == lc(X) == X
1049
1050    my $is_utf8_locale = is_locale_utf8($Locale);
1051
1052    debug "is utf8 locale? = $is_utf8_locale\n";
1053
1054    debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n";
1055
1056    if (! $is_utf8_locale) {
1057        use locale;
1058        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1059        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1060        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1061        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1062        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1063        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1064        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1065        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1066        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1067        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1068        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1069        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1070        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1071        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1072        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1073
1074        # Sieve the uppercase and the lowercase.
1075
1076        for (@{$posixes{'word'}}) {
1077            if (/[^\d_]/) { # skip digits and the _
1078                if (uc($_) eq $_) {
1079                    $UPPER{$_} = $_;
1080                }
1081                if (lc($_) eq $_) {
1082                    $lower{$_} = $_;
1083                }
1084            }
1085        }
1086    }
1087    else {
1088        use locale ':not_characters';
1089        @{$posixes{'word'}} = grep /\w/, map { chr } 0..255;
1090        @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255;
1091        @{$posixes{'space'}} = grep /\s/, map { chr } 0..255;
1092        @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255;
1093        @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255;
1094        @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255;
1095        @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255;
1096        @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255;
1097        @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255;
1098        @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255;
1099        @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255;
1100        @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
1101        @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
1102        @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
1103        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
1104        for (@{$posixes{'word'}}) {
1105            if (/[^\d_]/) { # skip digits and the _
1106                if (uc($_) eq $_) {
1107                    $UPPER{$_} = $_;
1108                }
1109                if (lc($_) eq $_) {
1110                    $lower{$_} = $_;
1111                }
1112            }
1113        }
1114    }
1115
1116    # Ordered, where possible,  in groups of "this is a subset of the next
1117    # one"
1118    debug ":upper:  = ", disp_chars(@{$posixes{'upper'}}), "\n";
1119    debug ":lower:  = ", disp_chars(@{$posixes{'lower'}}), "\n";
1120    debug ":cased:  = ", disp_chars(@{$posixes{'cased'}}), "\n";
1121    debug ":alpha:  = ", disp_chars(@{$posixes{'alpha'}}), "\n";
1122    debug ":alnum:  = ", disp_chars(@{$posixes{'alnum'}}), "\n";
1123    debug ' \w      = ', disp_chars(@{$posixes{'word'}}), "\n";
1124    debug ":graph:  = ", disp_chars(@{$posixes{'graph'}}), "\n";
1125    debug ":print:  = ", disp_chars(@{$posixes{'print'}}), "\n";
1126    debug ' \d      = ', disp_chars(@{$posixes{'digit'}}), "\n";
1127    debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n";
1128    debug ":blank:  = ", disp_chars(@{$posixes{'blank'}}), "\n";
1129    debug ' \s      = ', disp_chars(@{$posixes{'space'}}), "\n";
1130    debug ":punct:  = ", disp_chars(@{$posixes{'punct'}}), "\n";
1131    debug ":cntrl:  = ", disp_chars(@{$posixes{'cntrl'}}), "\n";
1132    debug ":ascii:  = ", disp_chars(@{$posixes{'ascii'}}), "\n";
1133
1134    foreach (keys %UPPER) {
1135
1136	$BoThCaSe{$_}++ if exists $lower{$_};
1137    }
1138    foreach (keys %lower) {
1139	$BoThCaSe{$_}++ if exists $UPPER{$_};
1140    }
1141    foreach (keys %BoThCaSe) {
1142	delete $UPPER{$_};
1143	delete $lower{$_};
1144    }
1145
1146    my %Unassigned;
1147    foreach my $ord ( 0 .. 255 ) {
1148        $Unassigned{chr $ord} = 1;
1149    }
1150    foreach my $class (keys %posixes) {
1151        foreach my $char (@{$posixes{$class}}) {
1152            delete $Unassigned{$char};
1153        }
1154    }
1155
1156    debug "UPPER    = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n";
1157    debug "lower    = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n";
1158    debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n";
1159    debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n";
1160
1161    my @failures;
1162    my @fold_failures;
1163    foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
1164        my $ok;
1165        my $fold_ok;
1166        if ($is_utf8_locale) {
1167            use locale ':not_characters';
1168            $ok = $x =~ /[[:upper:]]/;
1169            $fold_ok = $x =~ /[[:lower:]]/i;
1170        }
1171        else {
1172            use locale;
1173            $ok = $x =~ /[[:upper:]]/;
1174            $fold_ok = $x =~ /[[:lower:]]/i;
1175        }
1176        push @failures, $x unless $ok;
1177        push @fold_failures, $x unless $fold_ok;
1178    }
1179    $locales_test_number++;
1180    $first_casing_test_number = $locales_test_number;
1181    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X';
1182    report_multi_result($Locale, $locales_test_number, \@failures);
1183
1184    $locales_test_number++;
1185
1186    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X';
1187    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1188
1189    undef @failures;
1190    undef @fold_failures;
1191
1192    foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
1193        my $ok;
1194        my $fold_ok;
1195        if ($is_utf8_locale) {
1196            use locale ':not_characters';
1197            $ok = $x =~ /[[:lower:]]/;
1198            $fold_ok = $x =~ /[[:upper:]]/i;
1199        }
1200        else {
1201            use locale;
1202            $ok = $x =~ /[[:lower:]]/;
1203            $fold_ok = $x =~ /[[:upper:]]/i;
1204        }
1205        push @failures, $x unless $ok;
1206        push @fold_failures, $x unless $fold_ok;
1207    }
1208
1209    $locales_test_number++;
1210    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X';
1211    report_multi_result($Locale, $locales_test_number, \@failures);
1212
1213    $locales_test_number++;
1214    $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X';
1215    report_multi_result($Locale, $locales_test_number, \@fold_failures);
1216
1217    {   # Find the alphabetic characters that are not considered alphabetics
1218        # in the default (C) locale.
1219
1220	no locale;
1221
1222	@Added_alpha = ();
1223	for (keys %UPPER, keys %lower, keys %BoThCaSe) {
1224	    push(@Added_alpha, $_) if (/\W/);
1225	}
1226    }
1227
1228    @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha;
1229
1230    debug "Added_alpha = ", disp_chars(@Added_alpha), "\n";
1231
1232    # Cross-check the whole 8-bit character set.
1233
1234    ++$locales_test_number;
1235    my @f;
1236    $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical';
1237    for (map { chr } 0..255) {
1238        if ($is_utf8_locale) {
1239            use locale ':not_characters';
1240            push @f, $_ unless /[[:word:]]/ == /\w/;
1241        }
1242        else {
1243            push @f, $_ unless /[[:word:]]/ == /\w/;
1244        }
1245    }
1246    report_multi_result($Locale, $locales_test_number, \@f);
1247
1248    ++$locales_test_number;
1249    undef @f;
1250    $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical';
1251    for (map { chr } 0..255) {
1252        if ($is_utf8_locale) {
1253            use locale ':not_characters';
1254            push @f, $_ unless /[[:digit:]]/ == /\d/;
1255        }
1256        else {
1257            push @f, $_ unless /[[:digit:]]/ == /\d/;
1258        }
1259    }
1260    report_multi_result($Locale, $locales_test_number, \@f);
1261
1262    ++$locales_test_number;
1263    undef @f;
1264    $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical';
1265    for (map { chr } 0..255) {
1266        if ($is_utf8_locale) {
1267            use locale ':not_characters';
1268            push @f, $_ unless /[[:space:]]/ == /\s/;
1269        }
1270        else {
1271            push @f, $_ unless /[[:space:]]/ == /\s/;
1272        }
1273    }
1274    report_multi_result($Locale, $locales_test_number, \@f);
1275
1276    ++$locales_test_number;
1277    undef @f;
1278    $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive';
1279    for (map { chr } 0..255) {
1280        if ($is_utf8_locale) {
1281            use locale ':not_characters';
1282            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1283                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1284                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1285                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1286                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1287                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1288                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1289                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1290                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1291                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1292                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1293                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1294                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1295
1296                    # effectively is what [:cased:] would be if it existed.
1297                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1298        }
1299        else {
1300            push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
1301                    (/[[:alnum:]]/ xor /[[:^alnum:]]/)   ||
1302                    (/[[:ascii:]]/ xor /[[:^ascii:]]/)   ||
1303                    (/[[:blank:]]/ xor /[[:^blank:]]/)   ||
1304                    (/[[:cntrl:]]/ xor /[[:^cntrl:]]/)   ||
1305                    (/[[:digit:]]/ xor /[[:^digit:]]/)   ||
1306                    (/[[:graph:]]/ xor /[[:^graph:]]/)   ||
1307                    (/[[:lower:]]/ xor /[[:^lower:]]/)   ||
1308                    (/[[:print:]]/ xor /[[:^print:]]/)   ||
1309                    (/[[:space:]]/ xor /[[:^space:]]/)   ||
1310                    (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
1311                    (/[[:word:]]/  xor /[[:^word:]]/)    ||
1312                    (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
1313                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
1314        }
1315    }
1316    report_multi_result($Locale, $locales_test_number, \@f);
1317
1318    # The rules for the relationships are given in:
1319    # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
1320
1321
1322    ++$locales_test_number;
1323    undef @f;
1324    $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z';
1325    for ('a' .. 'z') {
1326        if ($is_utf8_locale) {
1327            use locale ':not_characters';
1328            push @f, $_  unless /[[:lower:]]/;
1329        }
1330        else {
1331            push @f, $_  unless /[[:lower:]]/;
1332        }
1333    }
1334    report_multi_result($Locale, $locales_test_number, \@f);
1335
1336    ++$locales_test_number;
1337    undef @f;
1338    $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]';
1339    for (map { chr } 0..255) {
1340        if ($is_utf8_locale) {
1341            use locale ':not_characters';
1342            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1343        }
1344        else {
1345            push @f, $_  if /[[:lower:]]/ and ! /[[:alpha:]]/;
1346        }
1347    }
1348    report_multi_result($Locale, $locales_test_number, \@f);
1349
1350    ++$locales_test_number;
1351    undef @f;
1352    $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z';
1353    for ('A' .. 'Z') {
1354        if ($is_utf8_locale) {
1355            use locale ':not_characters';
1356            push @f, $_  unless /[[:upper:]]/;
1357        }
1358        else {
1359            push @f, $_  unless /[[:upper:]]/;
1360        }
1361    }
1362    report_multi_result($Locale, $locales_test_number, \@f);
1363
1364    ++$locales_test_number;
1365    undef @f;
1366    $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]';
1367    for (map { chr } 0..255) {
1368        if ($is_utf8_locale) {
1369            use locale ':not_characters';
1370            push @f, $_  if /[[:upper:]]/ and ! /[[:alpha:]]/;
1371        }
1372        else {
1373            push @f, $_ if /[[:upper:]]/  and ! /[[:alpha:]]/;
1374        }
1375    }
1376    report_multi_result($Locale, $locales_test_number, \@f);
1377
1378    ++$locales_test_number;
1379    undef @f;
1380    $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]';
1381    for (map { chr } 0..255) {
1382        if ($is_utf8_locale) {
1383            use locale ':not_characters';
1384            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1385        }
1386        else {
1387            push @f, $_ if /[[:lower:]]/i  and ! /[[:alpha:]]/;
1388        }
1389    }
1390    report_multi_result($Locale, $locales_test_number, \@f);
1391
1392    ++$locales_test_number;
1393    undef @f;
1394    $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]';
1395    for (map { chr } 0..255) {
1396        if ($is_utf8_locale) {
1397            use locale ':not_characters';
1398            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1399        }
1400        else {
1401            push @f, $_ if /[[:alpha:]]/  and ! /[[:alnum:]]/;
1402        }
1403    }
1404    report_multi_result($Locale, $locales_test_number, \@f);
1405
1406    ++$locales_test_number;
1407    undef @f;
1408    $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9';
1409    for ('0' .. '9') {
1410        if ($is_utf8_locale) {
1411            use locale ':not_characters';
1412            push @f, $_  unless /[[:digit:]]/;
1413        }
1414        else {
1415            push @f, $_  unless /[[:digit:]]/;
1416        }
1417    }
1418    report_multi_result($Locale, $locales_test_number, \@f);
1419
1420    ++$locales_test_number;
1421    undef @f;
1422    $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]';
1423    for (map { chr } 0..255) {
1424        if ($is_utf8_locale) {
1425            use locale ':not_characters';
1426            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1427        }
1428        else {
1429            push @f, $_ if /[[:digit:]]/  and ! /[[:alnum:]]/;
1430        }
1431    }
1432    report_multi_result($Locale, $locales_test_number, \@f);
1433
1434    ++$locales_test_number;
1435    undef @f;
1436    $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points';
1437    report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20);
1438
1439    ++$locales_test_number;
1440    undef @f;
1441    $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive';
1442    if (@{$posixes{'digit'}} == 20) {
1443        my $previous_ord;
1444        for (map { chr } 0..255) {
1445            next unless /[[:digit:]]/;
1446            next if /[0-9]/;
1447            if (defined $previous_ord) {
1448                if ($is_utf8_locale) {
1449                    use locale ':not_characters';
1450                    push @f, $_ if ord $_ != $previous_ord + 1;
1451                }
1452                else {
1453                    push @f, $_ if ord $_ != $previous_ord + 1;
1454                }
1455            }
1456            $previous_ord = ord $_;
1457        }
1458    }
1459    report_multi_result($Locale, $locales_test_number, \@f);
1460
1461    ++$locales_test_number;
1462    undef @f;
1463    my @xdigit_digits;  # :digit: & :xdigit:
1464    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars';
1465    for (map { chr } 0..255) {
1466        if ($is_utf8_locale) {
1467            use locale ':not_characters';
1468            # For utf8 locales, we actually use a stricter test: that :digit:
1469            # is a subset of :xdigit:, as we know that only 0-9 should match
1470            push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/;
1471        }
1472        else {
1473            push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/;
1474        }
1475    }
1476    if (! $is_utf8_locale) {
1477
1478        # For non-utf8 locales, @xdigit_digits is a list of the characters
1479        # that are both :xdigit: and :digit:.  Because :digit: is stored in
1480        # increasing code point order (unless the tests above failed),
1481        # @xdigit_digits is as well.  There should be exactly 10 or
1482        # 20 of these.
1483        if (@xdigit_digits != 10 && @xdigit_digits != 20) {
1484            @f = @xdigit_digits;
1485        }
1486        else {
1487
1488            # Look for contiguity in the series, adding any wrong ones to @f
1489            my @temp = @xdigit_digits;
1490            while (@temp > 1) {
1491                push @f, $temp[1] if ($temp[0] != $temp[1] - 1)
1492
1493                                     # Skip this test for the 0th character of
1494                                     # the second block of 10, as it won't be
1495                                     # contiguous with the previous block
1496                                     && (! defined $xdigit_digits[10]
1497                                         || $temp[1] != $xdigit_digits[10]);
1498                shift @temp;
1499            }
1500        }
1501    }
1502
1503    report_multi_result($Locale, $locales_test_number, \@f);
1504
1505    ++$locales_test_number;
1506    undef @f;
1507    $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f';
1508    for ('A' .. 'F', 'a' .. 'f') {
1509        if ($is_utf8_locale) {
1510            use locale ':not_characters';
1511            push @f, $_  unless /[[:xdigit:]]/;
1512        }
1513        else {
1514            push @f, $_  unless /[[:xdigit:]]/;
1515        }
1516    }
1517    report_multi_result($Locale, $locales_test_number, \@f);
1518
1519    ++$locales_test_number;
1520    undef @f;
1521    $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points';
1522    my $previous_ord;
1523    my $count = 0;
1524    for my $chr (map { chr } 0..255) {
1525        next unless $chr =~ /[[:xdigit:]]/;
1526        if ($is_utf8_locale) {
1527            next if $chr =~ /[[:digit:]]/;
1528        }
1529        else {
1530            next if grep { $chr eq $_ } @xdigit_digits;
1531        }
1532        next if $chr =~ /[A-Fa-f]/;
1533        if (defined $previous_ord) {
1534            if ($is_utf8_locale) {
1535                use locale ':not_characters';
1536                push @f, $chr if ord $chr != $previous_ord + 1;
1537            }
1538            else {
1539                push @f, $chr if ord $chr != $previous_ord + 1;
1540            }
1541        }
1542        $count++;
1543        if ($count == 6) {
1544            undef $previous_ord;
1545        }
1546        else {
1547            $previous_ord = ord $chr;
1548        }
1549    }
1550    report_multi_result($Locale, $locales_test_number, \@f);
1551
1552    ++$locales_test_number;
1553    undef @f;
1554    $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]';
1555    for (map { chr } 0..255) {
1556        if ($is_utf8_locale) {
1557            use locale ':not_characters';
1558            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1559        }
1560        else {
1561            push @f, $_ if /[[:xdigit:]]/  and ! /[[:graph:]]/;
1562        }
1563    }
1564    report_multi_result($Locale, $locales_test_number, \@f);
1565
1566    # Note that xdigit doesn't have to be a subset of alnum
1567
1568    ++$locales_test_number;
1569    undef @f;
1570    $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]';
1571    for (map { chr } 0..255) {
1572        if ($is_utf8_locale) {
1573            use locale ':not_characters';
1574            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1575        }
1576        else {
1577            push @f, $_ if /[[:punct:]]/  and ! /[[:graph:]]/;
1578        }
1579    }
1580    report_multi_result($Locale, $locales_test_number, \@f);
1581
1582    ++$locales_test_number;
1583    undef @f;
1584    $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]';
1585    if ($is_utf8_locale) {
1586        use locale ':not_characters';
1587        push @f, " " if " " =~ /[[:graph:]]/;
1588    }
1589    else {
1590        push @f, " " if " " =~ /[[:graph:]]/;
1591    }
1592    report_multi_result($Locale, $locales_test_number, \@f);
1593
1594    ++$locales_test_number;
1595    undef @f;
1596    $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]';
1597    for (' ', "\f", "\n", "\r", "\t", "\cK") {
1598        if ($is_utf8_locale) {
1599            use locale ':not_characters';
1600            push @f, $_  unless /[[:space:]]/;
1601        }
1602        else {
1603            push @f, $_  unless /[[:space:]]/;
1604        }
1605    }
1606    report_multi_result($Locale, $locales_test_number, \@f);
1607
1608    ++$locales_test_number;
1609    undef @f;
1610    $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]';
1611    for (' ', "\t") {
1612        if ($is_utf8_locale) {
1613            use locale ':not_characters';
1614            push @f, $_  unless /[[:blank:]]/;
1615        }
1616        else {
1617            push @f, $_  unless /[[:blank:]]/;
1618        }
1619    }
1620    report_multi_result($Locale, $locales_test_number, \@f);
1621
1622    ++$locales_test_number;
1623    undef @f;
1624    $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]';
1625    for (map { chr } 0..255) {
1626        if ($is_utf8_locale) {
1627            use locale ':not_characters';
1628            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1629        }
1630        else {
1631            push @f, $_ if /[[:blank:]]/  and ! /[[:space:]]/;
1632        }
1633    }
1634    report_multi_result($Locale, $locales_test_number, \@f);
1635
1636    ++$locales_test_number;
1637    undef @f;
1638    $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]';
1639    for (map { chr } 0..255) {
1640        if ($is_utf8_locale) {
1641            use locale ':not_characters';
1642            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1643        }
1644        else {
1645            push @f, $_ if /[[:graph:]]/  and ! /[[:print:]]/;
1646        }
1647    }
1648    report_multi_result($Locale, $locales_test_number, \@f);
1649
1650    ++$locales_test_number;
1651    undef @f;
1652    $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]';
1653    if ($is_utf8_locale) {
1654        use locale ':not_characters';
1655        push @f, " " if " " !~ /[[:print:]]/;
1656    }
1657    else {
1658        push @f, " " if " " !~ /[[:print:]]/;
1659    }
1660    report_multi_result($Locale, $locales_test_number, \@f);
1661
1662    ++$locales_test_number;
1663    undef @f;
1664    $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]';
1665    for (map { chr } 0..255) {
1666        if ($is_utf8_locale) {
1667            use locale ':not_characters';
1668            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1669        }
1670        else {
1671            push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/);
1672        }
1673    }
1674    report_multi_result($Locale, $locales_test_number, \@f);
1675
1676    ++$locales_test_number;
1677    undef @f;
1678    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]';
1679    for (map { chr } 0..255) {
1680        if ($is_utf8_locale) {
1681            use locale ':not_characters';
1682            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1683        }
1684        else {
1685            push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/;
1686        }
1687    }
1688    report_multi_result($Locale, $locales_test_number, \@f);
1689
1690    ++$locales_test_number;
1691    undef @f;
1692    $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]';
1693    for (map { chr } 0..255) {
1694        if ($is_utf8_locale) {
1695            use locale ':not_characters';
1696            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1697        }
1698        else {
1699            push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/;
1700        }
1701    }
1702    report_multi_result($Locale, $locales_test_number, \@f);
1703
1704    ++$locales_test_number;
1705    undef @f;
1706    $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]';
1707    for (map { chr } 0..255) {
1708        if ($is_utf8_locale) {
1709            use locale ':not_characters';
1710            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1711        }
1712        else {
1713            push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/);
1714        }
1715    }
1716    report_multi_result($Locale, $locales_test_number, \@f);
1717
1718    ++$locales_test_number;
1719    undef @f;
1720    $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]';
1721    for (map { chr } 0..255) {
1722        if ($is_utf8_locale) {
1723            use locale ':not_characters';
1724            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1725        }
1726        else {
1727            push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/);
1728        }
1729    }
1730    report_multi_result($Locale, $locales_test_number, \@f);
1731
1732    foreach ($first_casing_test_number..$locales_test_number) {
1733        $problematical_tests{$_} = 1;
1734    }
1735
1736
1737    # Test for read-only scalars' locale vs non-locale comparisons.
1738
1739    {
1740        no locale;
1741        my $ok;
1742        $a = "qwerty";
1743        if ($is_utf8_locale) {
1744            use locale ':not_characters';
1745            $ok = ($a cmp "qwerty") == 0;
1746        }
1747        else {
1748            use locale;
1749            $ok = ($a cmp "qwerty") == 0;
1750        }
1751        report_result($Locale, ++$locales_test_number, $ok);
1752        $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale';
1753    }
1754
1755    {
1756        my ($from, $to, $lesser, $greater,
1757            @test, %test, $test, $yes, $no, $sign);
1758
1759        ++$locales_test_number;
1760        $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work';
1761        $not_necessarily_a_problem_test_number = $locales_test_number;
1762        for (0..9) {
1763            # Select a slice.
1764            $from = int(($_*@{$posixes{'word'}})/10);
1765            $to = $from + int(@{$posixes{'word'}}/10);
1766            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1767            $lesser  = join('', @{$posixes{'word'}}[$from..$to]);
1768            # Select a slice one character on.
1769            $from++; $to++;
1770            $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}});
1771            $greater = join('', @{$posixes{'word'}}[$from..$to]);
1772            if ($is_utf8_locale) {
1773                use locale ':not_characters';
1774                ($yes, $no, $sign) = ($lesser lt $greater
1775                                    ? ("    ", "not ", 1)
1776                                    : ("not ", "    ", -1));
1777            }
1778            else {
1779                use locale;
1780                ($yes, $no, $sign) = ($lesser lt $greater
1781                                    ? ("    ", "not ", 1)
1782                                    : ("not ", "    ", -1));
1783            }
1784            # all these tests should FAIL (return 0).  Exact lt or gt cannot
1785            # be tested because in some locales, say, eacute and E may test
1786            # equal.
1787            @test =
1788                (
1789                    $no.'    ($lesser  le $greater)',  # 1
1790                    'not      ($lesser  ne $greater)', # 2
1791                    '         ($lesser  eq $greater)', # 3
1792                    $yes.'    ($lesser  ge $greater)', # 4
1793                    $yes.'    ($lesser  ge $greater)', # 5
1794                    $yes.'    ($greater le $lesser )', # 7
1795                    'not      ($greater ne $lesser )', # 8
1796                    '         ($greater eq $lesser )', # 9
1797                    $no.'     ($greater ge $lesser )', # 10
1798                    'not (($lesser cmp $greater) == -($sign))' # 11
1799                    );
1800            @test{@test} = 0 x @test;
1801            $test = 0;
1802            for my $ti (@test) {
1803                if ($is_utf8_locale) {
1804                    use locale ':not_characters';
1805                    $test{$ti} = eval $ti;
1806                }
1807                else {
1808                    # Already in 'use locale';
1809                    $test{$ti} = eval $ti;
1810                }
1811                $test ||= $test{$ti}
1812            }
1813            report_result($Locale, $locales_test_number, $test == 0);
1814            if ($test) {
1815                debug "lesser  = '$lesser'\n";
1816                debug "greater = '$greater'\n";
1817                debug "lesser cmp greater = ",
1818                        $lesser cmp $greater, "\n";
1819                debug "greater cmp lesser = ",
1820                        $greater cmp $lesser, "\n";
1821                debug "(greater) from = $from, to = $to\n";
1822                for my $ti (@test) {
1823                    debugf("# %-40s %-4s", $ti,
1824                            $test{$ti} ? 'FAIL' : 'ok');
1825                    if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) {
1826                        debugf("(%s == %4d)", $1, eval $1);
1827                    }
1828                    debugf("\n#");
1829                }
1830
1831                last;
1832            }
1833        }
1834
1835        use locale;
1836
1837        my @sorted_controls;
1838
1839        ++$locales_test_number;
1840        $test_names{$locales_test_number}
1841                = 'Skip in locales where there are no controls;'
1842                . ' otherwise verify that \0 sorts before any (other) control';
1843        if (! $posixes{'cntrl'}) {
1844            report_result($Locale, $locales_test_number, 1);
1845
1846            # We use all code points for the tests below since there aren't
1847            # any controls
1848            push @sorted_controls, chr $_ for 1..255;
1849            @sorted_controls = sort @sorted_controls;
1850        }
1851        else {
1852            @sorted_controls = @{$posixes{'cntrl'}};
1853            push @sorted_controls, "\0",
1854                                unless grep { $_ eq "\0" } @sorted_controls;
1855            @sorted_controls = sort @sorted_controls;
1856            my $output = "";
1857            for my $control (@sorted_controls) {
1858                $output .= " " . disp_chars($control);
1859            }
1860            debug "sorted :cntrl: (plus NUL) = $output\n";
1861            my $ok = $sorted_controls[0] eq "\0";
1862            report_result($Locale, $locales_test_number, $ok);
1863
1864            shift @sorted_controls if $ok;
1865        }
1866
1867        my $lowest_control = $sorted_controls[0];
1868
1869        ++$locales_test_number;
1870        $test_names{$locales_test_number}
1871            = 'Skip in locales where all controls have primary sorting weight; '
1872            . 'otherwise verify that \0 doesn\'t have primary sorting weight';
1873        if ("a${lowest_control}c" lt "ab") {
1874            report_result($Locale, $locales_test_number, 1);
1875        }
1876        else {
1877            my $ok = "ab" lt "a\0c";
1878            report_result($Locale, $locales_test_number, $ok);
1879        }
1880
1881        ++$locales_test_number;
1882        $test_names{$locales_test_number}
1883                            = 'Verify that strings with embedded NUL collate';
1884        my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a";
1885        report_result($Locale, $locales_test_number, $ok);
1886
1887        ++$locales_test_number;
1888        $test_names{$locales_test_number}
1889                            = 'Verify that strings with embedded NUL and '
1890                            . 'extra trailing NUL collate';
1891        $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}";
1892        report_result($Locale, $locales_test_number, $ok);
1893
1894        ++$locales_test_number;
1895        $test_names{$locales_test_number}
1896                            = 'Verify that empty strings collate';
1897        $ok = "" le "";
1898        report_result($Locale, $locales_test_number, $ok);
1899
1900        ++$locales_test_number;
1901        $test_names{$locales_test_number}
1902            = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness "
1903            . "doesn't matter with collation";
1904        if (! $is_utf8_locale) {
1905            report_result($Locale, $locales_test_number, 1);
1906        }
1907        else {
1908
1909            # khw can't think of anything better.  Start with a string that is
1910            # higher than its UTF-8 representation in both EBCDIC and ASCII
1911            my $string = chr utf8::unicode_to_native(0xff);
1912            my $utf8_string = $string;
1913            utf8::upgrade($utf8_string);
1914
1915            # 8 should be lt 9 in all locales (except ones that aren't
1916            # ASCII-based, which might fail this)
1917            $ok = ("a${string}8") lt ("a${utf8_string}9");
1918            report_result($Locale, $locales_test_number, $ok);
1919        }
1920
1921        ++$locales_test_number;
1922        $test_names{$locales_test_number}
1923            = "Skip in UTF-8 locales; otherwise verify that single byte "
1924            . "collates before 0x100 and above";
1925        if ($is_utf8_locale) {
1926            report_result($Locale, $locales_test_number, 1);
1927        }
1928        else {
1929            my $max_collating = chr 0;  # Find byte that collates highest
1930            for my $i (0 .. 255) {
1931                my $char = chr $i;
1932                $max_collating = $char if $char gt $max_collating;
1933            }
1934            $ok = $max_collating lt chr 0x100;
1935            report_result($Locale, $locales_test_number, $ok);
1936        }
1937
1938        ++$locales_test_number;
1939        $test_names{$locales_test_number}
1940            = "Skip in UTF-8 locales; otherwise verify that 0x100 and "
1941            . "above collate in code point order";
1942        if ($is_utf8_locale) {
1943            report_result($Locale, $locales_test_number, 1);
1944        }
1945        else {
1946            $ok = chr 0x100 lt chr 0x101;
1947            report_result($Locale, $locales_test_number, $ok);
1948        }
1949    }
1950
1951    my $ok1;
1952    my $ok2;
1953    my $ok3;
1954    my $ok4;
1955    my $ok5;
1956    my $ok6;
1957    my $ok7;
1958    my $ok8;
1959    my $ok9;
1960    my $ok10;
1961    my $ok11;
1962    my $ok12;
1963    my $ok13;
1964    my $ok14;
1965    my $ok14_5;
1966    my $ok15;
1967    my $ok16;
1968    my $ok17;
1969    my $ok18;
1970    my $ok19;
1971    my $ok20;
1972    my $ok21;
1973
1974    my $c;
1975    my $d;
1976    my $e;
1977    my $f;
1978    my $g;
1979    my $h;
1980    my $i;
1981    my $j;
1982
1983    if (! $is_utf8_locale) {
1984        use locale;
1985
1986        my ($x, $y) = (1.23, 1.23);
1987
1988        $a = "$x";
1989        printf ''; # printf used to reset locale to "C"
1990        $b = "$y";
1991        $ok1 = $a eq $b;
1992
1993        $c = "$x";
1994        my $z = sprintf ''; # sprintf used to reset locale to "C"
1995        $d = "$y";
1996        $ok2 = $c eq $d;
1997        {
1998
1999            use warnings;
2000            my $w = 0;
2001            local $SIG{__WARN__} =
2002                sub {
2003                    print "# @_\n";
2004                    $w++;
2005                };
2006
2007            # The == (among other ops) used to warn for locales
2008            # that had something else than "." as the radix character.
2009
2010            $ok3 = $c == 1.23;
2011            $ok4 = $c == $x;
2012            $ok5 = $c == $d;
2013            {
2014                no locale;
2015
2016                $e = "$x";
2017
2018                $ok6 = $e == 1.23;
2019                $ok7 = $e == $x;
2020                $ok8 = $e == $c;
2021            }
2022
2023            $f = "1.23";
2024            $g = 2.34;
2025            $h = 1.5;
2026            $i = 1.25;
2027            $j = "$h:$i";
2028
2029            $ok9 = $f == 1.23;
2030            $ok10 = $f == $x;
2031            $ok11 = $f == $c;
2032            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2033            $ok13 = $w == 0;
2034            $ok14 = $ok14_5 = $ok15 = $ok16 = 1;  # Skip for non-utf8 locales
2035        }
2036        {
2037            no locale;
2038            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2039        }
2040        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2041    }
2042    else {
2043        use locale ':not_characters';
2044
2045        my ($x, $y) = (1.23, 1.23);
2046        $a = "$x";
2047        printf ''; # printf used to reset locale to "C"
2048        $b = "$y";
2049        $ok1 = $a eq $b;
2050
2051        $c = "$x";
2052        my $z = sprintf ''; # sprintf used to reset locale to "C"
2053        $d = "$y";
2054        $ok2 = $c eq $d;
2055        {
2056            use warnings;
2057            my $w = 0;
2058            local $SIG{__WARN__} =
2059                sub {
2060                    print "# @_\n";
2061                    $w++;
2062                };
2063            $ok3 = $c == 1.23;
2064            $ok4 = $c == $x;
2065            $ok5 = $c == $d;
2066            {
2067                no locale;
2068                $e = "$x";
2069
2070                $ok6 = $e == 1.23;
2071                $ok7 = $e == $x;
2072                $ok8 = $e == $c;
2073            }
2074
2075            $f = "1.23";
2076            $g = 2.34;
2077            $h = 1.5;
2078            $i = 1.25;
2079            $j = "$h:$i";
2080
2081            $ok9 = $f == 1.23;
2082            $ok10 = $f == $x;
2083            $ok11 = $f == $c;
2084            $ok12 = abs(($f + $g) - 3.57) < 0.01;
2085            $ok13 = $w == 0;
2086
2087            # Look for non-ASCII error messages, and verify that the first
2088            # such is in UTF-8 (the others almost certainly will be like the
2089            # first).  This is only done if the current locale has LC_MESSAGES
2090            $ok14 = 1;
2091            $ok14_5 = 1;
2092            if (   locales_enabled('LC_MESSAGES')
2093                && setlocale(&POSIX::LC_MESSAGES, $Locale))
2094            {
2095                foreach my $err (keys %!) {
2096                    use Errno;
2097                    $! = eval "&Errno::$err";   # Convert to strerror() output
2098                    my $errnum = 0+$!;
2099                    my $strerror = "$!";
2100                    if ("$strerror" =~ /\P{ASCII}/) {
2101                        $ok14 = utf8::is_utf8($strerror);
2102                        no locale;
2103                        $ok14_5 = "$!" !~ /\P{ASCII}/;
2104                        debug( disp_str(
2105                        "non-ASCII \$! for error $errnum='$strerror'"))
2106                                                                   if ! $ok14_5;
2107                        last;
2108                    }
2109                }
2110            }
2111
2112            # Similarly, we verify that a non-ASCII radix is in UTF-8.  This
2113            # also catches if there is a disparity between sprintf and
2114            # stringification.
2115
2116            my $string_g = "$g";
2117            my $sprintf_g = sprintf("%g", $g);
2118
2119            $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g);
2120            $ok16 = $sprintf_g eq $string_g;
2121        }
2122        {
2123            no locale;
2124            $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i);
2125        }
2126        $ok18 = $j eq sprintf("%g:%g", $h, $i);
2127    }
2128
2129    $ok19 = $ok20 = 1;
2130    if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by
2131                                               # :not_characters
2132        my @times = CORE::localtime();
2133
2134        use locale;
2135        $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425]
2136        my $date = POSIX::strftime("'%A'  '%B'  '%Z'  '%p'", @times);
2137        debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date));
2138
2139        # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and
2140        # not UTF-8 if the locale isn't UTF-8.
2141        $ok20 = $date =~ / ^ \p{ASCII}+ $ /x
2142                || $is_utf8_locale == utf8::is_utf8($date);
2143    }
2144
2145    $ok21 = 1;
2146    if (locales_enabled('LC_MESSAGES')) {
2147        foreach my $err (keys %!) {
2148            no locale;
2149            use Errno;
2150            $! = eval "&Errno::$err";   # Convert to strerror() output
2151            my $strerror = "$!";
2152            if ($strerror =~ /\P{ASCII}/) {
2153                $ok21 = 0;
2154                debug(disp_str("non-ASCII strerror=$strerror"));
2155                last;
2156            }
2157        }
2158    }
2159
2160    report_result($Locale, ++$locales_test_number, $ok1);
2161    $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results';
2162    my $first_a_test = $locales_test_number;
2163
2164    debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n";
2165
2166    report_result($Locale, ++$locales_test_number, $ok2);
2167    $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results';
2168
2169    my $first_c_test = $locales_test_number;
2170
2171    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant';
2172    report_result($Locale, $locales_test_number, $ok3);
2173    $problematical_tests{$locales_test_number} = 1;
2174
2175    $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar';
2176    report_result($Locale, $locales_test_number, $ok4);
2177    $problematical_tests{$locales_test_number} = 1;
2178
2179    report_result($Locale, ++$locales_test_number, $ok5);
2180    $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf';
2181    $problematical_tests{$locales_test_number} = 1;
2182
2183    debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n";
2184
2185    report_result($Locale, ++$locales_test_number, $ok6);
2186    $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block';
2187    my $first_e_test = $locales_test_number;
2188
2189    report_result($Locale, ++$locales_test_number, $ok7);
2190    $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale';
2191
2192    $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale';
2193    report_result($Locale, $locales_test_number, $ok8);
2194    $problematical_tests{$locales_test_number} = 1;
2195
2196    debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n";
2197
2198    report_result($Locale, ++$locales_test_number, $ok9);
2199    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant';
2200    $problematical_tests{$locales_test_number} = 1;
2201    my $first_f_test = $locales_test_number;
2202
2203    report_result($Locale, ++$locales_test_number, $ok10);
2204    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar';
2205    $problematical_tests{$locales_test_number} = 1;
2206
2207    $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf';
2208    report_result($Locale, $locales_test_number, $ok11);
2209    $problematical_tests{$locales_test_number} = 1;
2210
2211    report_result($Locale, ++$locales_test_number, $ok12);
2212    $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric';
2213    $problematical_tests{$locales_test_number} = 1;
2214
2215    report_result($Locale, ++$locales_test_number, $ok13);
2216    $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot';
2217    $problematical_tests{$locales_test_number} = 1;
2218
2219    report_result($Locale, ++$locales_test_number, $ok14);
2220    $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8';
2221
2222    report_result($Locale, ++$locales_test_number, $ok14_5);
2223    $test_names{$locales_test_number} = '... and are ASCII outside "use locale"';
2224
2225    report_result($Locale, ++$locales_test_number, $ok15);
2226    $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification';
2227    $problematical_tests{$locales_test_number} = 1;
2228
2229    report_result($Locale, ++$locales_test_number, $ok16);
2230    $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8';
2231    $problematical_tests{$locales_test_number} = 1;
2232
2233    report_result($Locale, ++$locales_test_number, $ok17);
2234    $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix';
2235
2236    report_result($Locale, ++$locales_test_number, $ok18);
2237    $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix';
2238    $problematical_tests{$locales_test_number} = 1;
2239
2240    report_result($Locale, ++$locales_test_number, $ok19);
2241    $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty';
2242
2243    report_result($Locale, ++$locales_test_number, $ok20);
2244    $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set';
2245    $problematical_tests{$locales_test_number} = 1;   # This is broken in
2246                                                      # OS X 10.9.3
2247
2248    report_result($Locale, ++$locales_test_number, $ok21);
2249    $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope';
2250
2251    debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n";
2252
2253    # Does taking lc separately differ from taking
2254    # the lc "in-line"?  (This was the bug 19990704.002 (#965), change #3568.)
2255    # The bug was in the caching of the 'o'-magic.
2256    if (! $is_utf8_locale) {
2257	use locale;
2258
2259	sub lcA {
2260	    my $lc0 = lc $_[0];
2261	    my $lc1 = lc $_[1];
2262	    return $lc0 cmp $lc1;
2263	}
2264
2265        sub lcB {
2266	    return lc($_[0]) cmp lc($_[1]);
2267	}
2268
2269        my $x = "ab";
2270        my $y = "aa";
2271        my $z = "AB";
2272
2273        report_result($Locale, ++$locales_test_number,
2274		    lcA($x, $y) == 1 && lcB($x, $y) == 1 ||
2275		    lcA($x, $z) == 0 && lcB($x, $z) == 0);
2276    }
2277    else {
2278	use locale ':not_characters';
2279
2280	sub lcC {
2281	    my $lc0 = lc $_[0];
2282	    my $lc1 = lc $_[1];
2283	    return $lc0 cmp $lc1;
2284	}
2285
2286        sub lcD {
2287	    return lc($_[0]) cmp lc($_[1]);
2288	}
2289
2290        my $x = "ab";
2291        my $y = "aa";
2292        my $z = "AB";
2293
2294        report_result($Locale, ++$locales_test_number,
2295		    lcC($x, $y) == 1 && lcD($x, $y) == 1 ||
2296		    lcC($x, $z) == 0 && lcD($x, $z) == 0);
2297    }
2298    $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp';
2299
2300    # Does lc of an UPPER (if different from the UPPER) match
2301    # case-insensitively the UPPER, and does the UPPER match
2302    # case-insensitively the lc of the UPPER.  And vice versa.
2303    {
2304        use locale;
2305        no utf8;
2306        my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
2307
2308        my @f = ();
2309        ++$locales_test_number;
2310        $test_names{$locales_test_number} = 'Verify case insensitive matching works';
2311        foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) {
2312            if (! $is_utf8_locale) {
2313                my $y = lc $x;
2314                next unless uc $y eq $x;
2315                debug_more( "UPPER=", disp_chars(($x)),
2316                            "; lc=", disp_chars(($y)), "; ",
2317                            "; fc=", disp_chars((fc $x)), "; ",
2318                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2319                            $x =~ /\Q$y/i ? 1 : 0,
2320                            "; ",
2321                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2322                            $y =~ /\Q$x/i ? 1 : 0,
2323                            "\n");
2324                #
2325                # If $x and $y contain regular expression characters
2326                # AND THEY lowercase (/i) to regular expression characters,
2327                # regcomp() will be mightily confused.  No, the \Q doesn't
2328                # help here (maybe regex engine internal lowercasing
2329                # is done after the \Q?)  An example of this happening is
2330                # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
2331                # the chr(173) (the "[") is the lowercase of the chr(235).
2332                #
2333                # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
2334                # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
2335                # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
2336                # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
2337                # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
2338                # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
2339                #
2340                # Similar things can happen even under (bastardised)
2341                # non-EBCDIC locales: in many European countries before the
2342                # advent of ISO 8859-x nationally customised versions of
2343                # ISO 646 were devised, reusing certain punctuation
2344                # characters for modified characters needed by the
2345                # country/language.  For example, the "|" might have
2346                # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS.
2347                #
2348                if ($x =~ $re || $y =~ $re) {
2349                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2350                    next;
2351                }
2352                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2353
2354                # fc is not a locale concept, so Perl uses lc for it.
2355                push @f, $x unless lc $x eq fc $x;
2356            }
2357            else {
2358                use locale ':not_characters';
2359                my $y = lc $x;
2360                next unless uc $y eq $x;
2361                debug_more( "UPPER=", disp_chars(($x)),
2362                            "; lc=", disp_chars(($y)), "; ",
2363                            "; fc=", disp_chars((fc $x)), "; ",
2364                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2365                            $x =~ /\Q$y/i ? 1 : 0,
2366                            "; ",
2367                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2368                            $y =~ /\Q$x/i ? 1 : 0,
2369                            "\n");
2370
2371                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2372
2373                # The places where Unicode's lc is different from fc are
2374                # skipped here by virtue of the 'next unless uc...' line above
2375                push @f, $x unless lc $x eq fc $x;
2376            }
2377        }
2378
2379	foreach my $x (sort { ord $a <=> ord $b } keys %lower) {
2380            if (! $is_utf8_locale) {
2381                my $y = uc $x;
2382                next unless lc $y eq $x;
2383                debug_more( "lower=", disp_chars(($x)),
2384                            "; uc=", disp_chars(($y)), "; ",
2385                            "; fc=", disp_chars((fc $x)), "; ",
2386                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2387                            $x =~ /\Q$y/i ? 1 : 0,
2388                            "; ",
2389                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2390                            $y =~ /\Q$x/i ? 1 : 0,
2391                            "\n");
2392                if ($x =~ $re || $y =~ $re) { # See above.
2393                    print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n";
2394                    next;
2395                }
2396                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2397
2398                push @f, $x unless lc $x eq fc $x;
2399            }
2400            else {
2401                use locale ':not_characters';
2402                my $y = uc $x;
2403                next unless lc $y eq $x;
2404                debug_more( "lower=", disp_chars(($x)),
2405                            "; uc=", disp_chars(($y)), "; ",
2406                            "; fc=", disp_chars((fc $x)), "; ",
2407                            disp_chars(($x)), "=~/", disp_chars(($y)), "/i=",
2408                            $x =~ /\Q$y/i ? 1 : 0,
2409                            "; ",
2410                            disp_chars(($y)), "=~/", disp_chars(($x)), "/i=",
2411                            $y =~ /\Q$x/i ? 1 : 0,
2412                            "\n");
2413                push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i;
2414
2415                push @f, $x unless lc $x eq fc $x;
2416            }
2417	}
2418	report_multi_result($Locale, $locales_test_number, \@f);
2419        $problematical_tests{$locales_test_number} = 1;
2420    }
2421
2422    # [perl #109318]
2423    {
2424        my @f = ();
2425        ++$locales_test_number;
2426        $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent';
2427        $problematical_tests{$locales_test_number} = 1;
2428
2429        my $radix = POSIX::localeconv()->{decimal_point};
2430        my @nums = (
2431             "3.14e+9",  "3${radix}14e+9",  "3.14e-9",  "3${radix}14e-9",
2432            "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9",
2433        );
2434
2435        if (! $is_utf8_locale) {
2436            use locale;
2437            for my $num (@nums) {
2438                push @f, $num
2439                    unless sprintf("%g", $num) =~ /3.+14/;
2440            }
2441        }
2442        else {
2443            use locale ':not_characters';
2444            for my $num (@nums) {
2445                push @f, $num
2446                    unless sprintf("%g", $num) =~ /3.+14/;
2447            }
2448        }
2449
2450        report_result($Locale, $locales_test_number, @f == 0);
2451        if (@f) {
2452            print "# failed $locales_test_number locale '$Locale' numbers @f\n"
2453	}
2454    }
2455}
2456
2457my $final_locales_test_number = $locales_test_number;
2458
2459# Recount the errors.
2460
2461TEST_NUM:
2462foreach $test_num ($first_locales_test_number..$final_locales_test_number) {
2463    my $has_non_global_failure = $Problem{$test_num}
2464                            || ! defined $Okay{$test_num}
2465                            || ! @{$Okay{$test_num}};
2466    print "not " if %setlocale_failed || $has_non_global_failure;
2467    print "ok $test_num";
2468    $test_names{$test_num} = "" unless defined $test_names{$test_num};
2469
2470    # If TODO is in the test name, make it thus
2471    my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//;
2472    print " $test_names{$test_num}";
2473    if ($todo) {
2474        print " # TODO\n";
2475    }
2476    elsif (%setlocale_failed || ! $has_non_global_failure) {
2477        print "\n";
2478    }
2479    elsif ($has_non_global_failure) {
2480
2481        # If there are any locales that pass this test, or are known-bad, it
2482        # may be that there are enough passes that we TODO the failure, but
2483        # only for tests that we have decided can be problematical.
2484        if (  ($Okay{$test_num} || $Known_bad_locale{$test_num})
2485            && grep { $_ == $test_num } keys %problematical_tests)
2486        {
2487            # Don't count the known-bad failures when calculating the
2488            # percentage that fail.
2489            my $known_failures = (exists $Known_bad_locale{$test_num})
2490                                  ? scalar(keys $Known_bad_locale{$test_num}->%*)
2491                                  : 0;
2492            my $adjusted_failures = scalar(keys $Problem{$test_num}->%*)
2493                                    - $known_failures;
2494
2495            # Specially handle failures where only known-bad locales fail.
2496            # This makes the diagnositics clearer.
2497            if ($adjusted_failures <= 0) {
2498                print " # TODO fails only on known bad locales: ",
2499                      join " ", keys $Known_bad_locale{$test_num}->%*, "\n";
2500                next TEST_NUM;
2501            }
2502
2503            # Round to nearest .1%
2504            my $percent_fail = (int(.5 + (1000 * $adjusted_failures
2505                                          / scalar(@Locale))))
2506                               / 10;
2507            $todo = $percent_fail < $acceptable_failure_percentage;
2508            print " # TODO" if $todo;
2509            print "\n";
2510
2511            if ($debug) {
2512                print "# $percent_fail% of locales (",
2513                      scalar(keys $Problem{$test_num}->%*),
2514                      " of ",
2515                      scalar(@Locale),
2516                      ") fail the above test (TODO cut-off is ",
2517                      $acceptable_failure_percentage,
2518                      "%)\n";
2519            }
2520            elsif ($todo) {
2521                print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n";
2522                print "# pass the above test, so it is likely that the failures\n";
2523                print "# are errors in the locale definitions.  The test is marked TODO, as the\n";
2524                print "# problem is not likely to be Perl's\n";
2525            }
2526        }
2527
2528        if ($debug) {
2529            print "# The code points that had this failure are given above.  Look for lines\n";
2530            print "# that match 'failed $test_num'\n";
2531        }
2532        else {
2533            print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2534            print "# Then look at that output for lines that match 'failed $test_num'\n";
2535        }
2536	if (defined $not_necessarily_a_problem_test_number
2537            && $test_num == $not_necessarily_a_problem_test_number)
2538        {
2539	    print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n";
2540	    print "# It usually indicates a problem in the environment,\n";
2541	    print "# not in Perl itself.\n";
2542	}
2543    }
2544}
2545
2546$test_num = $final_locales_test_number;
2547
2548if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) {
2549    # perl #115808
2550    use warnings;
2551    my $warned = 0;
2552    local $SIG{__WARN__} = sub {
2553        $warned = $_[0] =~ /uninitialized/;
2554    };
2555    my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy");
2556    ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized");
2557}
2558
2559# Test that tainting and case changing works on utf8 strings.  These tests are
2560# placed last to avoid disturbing the hard-coded test numbers that existed at
2561# the time these were added above this in this file.
2562# This also tests that locale overrides unicode_strings in the same scope for
2563# non-utf8 strings.
2564setlocale(&POSIX::LC_ALL, "C");
2565{
2566    use locale;
2567    use feature 'unicode_strings';
2568
2569    foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") {
2570        my @list;   # List of code points to test for $function
2571
2572        # Used to calculate the changed case for ASCII characters by using the
2573        # ord, instead of using one of the functions under test.
2574        my $ascii_case_change_delta;
2575        my $above_latin1_case_change_delta; # Same for the specific ords > 255
2576                                            # that we use
2577
2578        # We test an ASCII character, which should change case;
2579        # a Latin1 character, which shouldn't change case under this C locale,
2580        # an above-Latin1 character that when the case is changed would cross
2581        #   the 255/256 boundary, so doesn't change case
2582        #   (the \x{149} is one of these, but changes into 2 characters, the
2583        #   first one of which doesn't cross the boundary.
2584        # the final one in each list is an above-Latin1 character whose case
2585        #   does change.  The code below uses its position in its list as a
2586        #   marker to indicate that it, unlike the other code points above
2587        #   ASCII, has a successful case change
2588        #
2589        # All casing operations under locale (but not :not_characters) should
2590        # taint
2591        if ($function =~ /^u/) {
2592            @list = ("", "a",
2593                     chr(utf8::unicode_to_native(0xe0)),
2594                     chr(utf8::unicode_to_native(0xff)),
2595                     "\x{fb00}", "\x{149}", "\x{101}");
2596            $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32;
2597            $above_latin1_case_change_delta = -1;
2598        }
2599        else {
2600            @list = ("", "A",
2601                     chr(utf8::unicode_to_native(0xC0)),
2602                     "\x{17F}", "\x{100}");
2603            $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32;
2604            $above_latin1_case_change_delta = +1;
2605        }
2606        foreach my $is_utf8_locale (0 .. 1) {
2607            foreach my $j (0 .. $#list) {
2608                my $char = $list[$j];
2609
2610                for my $encoded_in_utf8 (0 .. 1) {
2611                    my $should_be;
2612                    my $changed;
2613                    if (! $is_utf8_locale) {
2614                        no warnings 'locale';
2615                        $should_be = ($j == $#list)
2616                            ? chr(ord($char) + $above_latin1_case_change_delta)
2617                            : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127)
2618                              ? $char
2619                              : chr(ord($char) + $ascii_case_change_delta);
2620
2621                        # This monstrosity is in order to avoid using an eval,
2622                        # which might perturb the results
2623                        $changed = ($function eq "uc")
2624                                    ? uc($char)
2625                                    : ($function eq "ucfirst")
2626                                      ? ucfirst($char)
2627                                      : ($function eq "lc")
2628                                        ? lc($char)
2629                                        : ($function eq "lcfirst")
2630                                          ? lcfirst($char)
2631                                          : ($function eq "fc")
2632                                            ? fc($char)
2633                                            : die("Unexpected function \"$function\"");
2634                    }
2635                    else {
2636                        {
2637                            no locale;
2638
2639                            # For utf8-locales the case changing functions
2640                            # should work just like they do outside of locale.
2641                            # Can use eval here because not testing it when
2642                            # not in locale.
2643                            $should_be = eval "$function('$char')";
2644                            die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if  $@;
2645
2646                        }
2647                        use locale ':not_characters';
2648                        $changed = ($function eq "uc")
2649                                    ? uc($char)
2650                                    : ($function eq "ucfirst")
2651                                      ? ucfirst($char)
2652                                      : ($function eq "lc")
2653                                        ? lc($char)
2654                                        : ($function eq "lcfirst")
2655                                          ? lcfirst($char)
2656                                          : ($function eq "fc")
2657                                            ? fc($char)
2658                                            : die("Unexpected function \"$function\"");
2659                    }
2660                    ok($changed eq $should_be,
2661                        "$function(\"$char\") in C locale "
2662                        . (($is_utf8_locale)
2663                            ? "(use locale ':not_characters'"
2664                            : "(use locale")
2665                        . (($encoded_in_utf8)
2666                            ? "; encoded in utf8)"
2667                            : "; not encoded in utf8)")
2668                        . " should be \"$should_be\", got \"$changed\"");
2669
2670                    # Tainting shouldn't happen for use locale :not_character
2671                    # (a utf8 locale)
2672                    (! $is_utf8_locale)
2673                    ? check_taint($changed)
2674                    : check_taint_not($changed);
2675
2676                    # Use UTF-8 next time through the loop
2677                    utf8::upgrade($char);
2678                }
2679            }
2680        }
2681    }
2682}
2683
2684# Give final advice.
2685
2686my $didwarn = 0;
2687
2688foreach ($first_locales_test_number..$final_locales_test_number) {
2689    if ($Problem{$_}) {
2690	my @f = sort keys %{ $Problem{$_} };
2691
2692        # Don't list the failures caused by known-bad locales.
2693        if (exists $known_bad_locales{$os}) {
2694            @f = grep { $_ !~ $known_bad_locales{$os} } @f;
2695            next unless @f;
2696        }
2697	my $f = join(" ", @f);
2698	$f =~ s/(.{50,60}) /$1\n#\t/g;
2699	print
2700	    "#\n",
2701            "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n",
2702	    "#\t", $f, "\n#\n",
2703	    "# on your system may have errors because the locale test $_\n",
2704	    "# \"$test_names{$_}\"\n",
2705            "# failed in ", (@f == 1 ? "that locale" : "those locales"),
2706            ".\n";
2707	print <<EOW;
2708#
2709# If your users are not using these locales you are safe for the moment,
2710# but please report this failure first to perlbug\@perl.org using the
2711# perlbug script (as described in the INSTALL file) so that the exact
2712# details of the failures can be sorted out first and then your operating
2713# system supplier can be alerted about these anomalies.
2714#
2715EOW
2716	$didwarn = 1;
2717    }
2718}
2719
2720# Tell which locales were okay and which were not.
2721
2722if ($didwarn) {
2723    my (@s, @F);
2724
2725    foreach my $l (@Locale) {
2726	my $p = 0;
2727        if ($setlocale_failed{$l}) {
2728            $p++;
2729        }
2730        else {
2731            foreach my $t
2732                        ($first_locales_test_number..$final_locales_test_number)
2733            {
2734                $p++ if $Problem{$t}{$l};
2735            }
2736	}
2737	push @s, $l if $p == 0;
2738        push @F, $l unless $p == 0;
2739    }
2740
2741    if (@s) {
2742        my $s = join(" ", @s);
2743        $s =~ s/(.{50,60}) /$1\n#\t/g;
2744
2745        print
2746            "# The following locales\n#\n",
2747            "#\t", $s, "\n#\n",
2748	    "# tested okay.\n#\n",
2749    } else {
2750        print "# None of your locales were fully okay.\n";
2751    }
2752
2753    if (@F) {
2754        my $F = join(" ", @F);
2755        $F =~ s/(.{50,60}) /$1\n#\t/g;
2756
2757        my $details = "";
2758        unless ($debug) {
2759            $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n";
2760        }
2761        elsif ($debug == 1) {
2762            $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n";
2763        }
2764
2765        print
2766          "# The following locales\n#\n",
2767          "#\t", $F, "\n#\n",
2768          "# had problems.\n#\n",
2769          $details;
2770    } else {
2771        print "# None of your locales were broken.\n";
2772    }
2773}
2774
2775if (exists $known_bad_locales{$os} && ! %Known_bad_locale) {
2776    $test_num++;
2777    print "ok $test_num $^O no longer has known bad locales # TODO\n";
2778}
2779
2780print "1..$test_num\n";
2781
2782# eof
2783