1#!./perl -wT 2 3use strict; 4use warnings; 5 6# This tests plain 'use locale' and adorned 'use locale ":not_characters"' 7# Because these pragmas are compile time, and I (khw) am trying to test 8# without using 'eval' as much as possible, which might cloud the issue, the 9# crucial parts of the code are duplicated in a block for each pragma. 10 11# Unfortunately, many systems have defective locale definitions. This test 12# file looks for both perl bugs and bugs in the system's locale definitions. 13# It can be difficult to tease apart which is which. For the latter, there 14# are tests that are based on the POSIX standard. A character isn't supposed 15# to be both a space and graphic, for example. Another example is if a 16# character is the uppercase of another, that other should be the lowercase of 17# the first. Including tests for these allows you to test for defective 18# locales, as described in perllocale. The way this file distinguishes 19# between defective locales, and perl bugs is to see what percentage of 20# locales fail a given test. If it's a lot, then it's more likely to be a 21# perl bug; only a few, those particular locales are likely defective. In 22# that case the failing tests are marked TODO. (They should be reported to 23# the vendor, however; but it's not perl's problem.) In some cases, this 24# script has caused tickets to be filed against perl which turn out to be the 25# platform's bug, but a higher percentage of locales are failing than the 26# built-in cut-off point. For those platforms, code has been added to 27# increase the cut-off, so those platforms don't trigger failing test reports. 28# Ideally, the platforms would get fixed and that code would be changed to 29# only kick-in when run on versions that are earlier than the fixed one. But, 30# this rarely happens in practice. 31 32# To make a TODO test, add the string 'TODO' to its %test_names value 33 34my $is_ebcdic = ord("A") == 193; 35my $os = lc $^O; 36 37no warnings 'locale'; # We test even weird locales; and do some scary things 38 # in ok locales 39 40binmode STDOUT, ':utf8'; 41binmode STDERR, ':utf8'; 42 43BEGIN { 44 chdir 't' if -d 't'; 45 @INC = '../lib'; 46 unshift @INC, '.'; 47 require './loc_tools.pl'; 48 unless (locales_enabled('LC_CTYPE')) { 49 print "1..0\n"; 50 exit; 51 } 52 $| = 1; 53 require Config; import Config; 54} 55 56use feature 'fc'; 57 58# =1 adds debugging output; =2 increases the verbosity somewhat 59our $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; 60 61# Certain tests have been shown to be problematical for a few locales. Don't 62# fail them unless at least this percentage of the tested locales fail. 63# On AIX machines, many locales call a no-break space a graphic. 64# (There aren't 1000 locales currently in existence, so 99.9 works) 65# EBCDIC os390 has more locales fail than normal, because it has locales that 66# move various critical characters like '['. 67my $acceptable_failure_percentage = ($os =~ / ^ ( aix ) $ /x) 68 ? 99.9 69 : ($os =~ / ^ ( os390 ) $ /x) 70 ? 10 71 : 5; 72 73# The list of test numbers of the problematic tests. 74my %problematical_tests; 75 76# If any %problematical_tests fails in one of these locales, it is 77# considered a TODO. 78my %known_bad_locales = ( 79 irix => qr/ ^ (?: cs | hu | sk ) $/x, 80 darwin => qr/ ^ lt_LT.ISO8859 /ix, 81 os390 => qr/ ^ italian /ix, 82 netbsd => qr/\bISO8859-2\b/i, 83 84 # This may be the same bug as the cygwin below; it's 85 # generating malformed UTF-8 on the radix being 86 # mulit-byte 87 solaris => qr/ ^ ( ar_ | pa_ ) /x, 88 ); 89 90# cygwin isn't returning proper radix length in this locale, but supposedly to 91# be fixed in later versions. 92if ($os eq 'cygwin' && version->new(($Config{osvers} =~ /^(\d+(?:\.\d+)+)/)[0]) le v2.4.1) { 93 $known_bad_locales{'cygwin'} = qr/ ^ ps_AF /ix; 94} 95 96use Dumpvalue; 97 98my $dumper = Dumpvalue->new( 99 tick => qq{"}, 100 quoteHighBit => 0, 101 unctrl => "quote" 102 ); 103 104sub debug { 105 return unless $debug; 106 my($mess) = join "", '# ', @_; 107 chomp $mess; 108 print STDERR $dumper->stringify($mess,1), "\n"; 109} 110 111sub note { 112 local $debug = 1; 113 debug @_; 114} 115 116sub debug_more { 117 return unless $debug > 1; 118 return debug(@_); 119} 120 121sub debugf { 122 printf STDERR @_ if $debug; 123} 124 125$a = 'abc %9'; 126 127my $test_num = 0; 128 129sub ok { 130 my ($result, $message) = @_; 131 $message = "" unless defined $message; 132 133 print 'not ' unless ($result); 134 print "ok " . ++$test_num; 135 print " $message"; 136 print "\n"; 137 return ($result) ? 1 : 0; 138} 139 140sub skip { 141 return ok 1, "skipped: " . shift; 142} 143 144sub fail { 145 return ok 0, shift; 146} 147 148# First we'll do a lot of taint checking for locales. 149# This is the easiest to test, actually, as any locale, 150# even the default locale will taint under 'use locale'. 151 152sub is_tainted { # hello, camel two. 153 no warnings 'uninitialized' ; 154 my $dummy; 155 local $@; 156 not eval { $dummy = join("", @_), kill 0; 1 } 157} 158 159sub check_taint ($;$) { 160 my $message_tail = $_[1] // ""; 161 162 # Extra blanks are so aligns with taint_not output 163 $message_tail = ": $message_tail" if $message_tail; 164 ok is_tainted($_[0]), "verify that is tainted$message_tail"; 165} 166 167sub check_taint_not ($;$) { 168 my $message_tail = $_[1] // ""; 169 $message_tail = ": $message_tail" if $message_tail; 170 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); 171} 172 173foreach my $category (qw(ALL COLLATE CTYPE MESSAGES MONETARY NUMERIC TIME)) { 174 my $short_result = locales_enabled($category); 175 ok ($short_result == 0 || $short_result == 1, 176 "Verify locales_enabled('$category') returns 0 or 1"); 177 debug("locales_enabled('$category') returned '$short_result'"); 178 my $long_result = locales_enabled("LC_$category"); 179 if (! ok ($long_result == $short_result, 180 " and locales_enabled('LC_$category') returns " 181 . "the same value") 182 ) { 183 debug("locales_enabled('LC_$category') returned $long_result"); 184 } 185} 186 187"\tb\t" =~ /^m?(\s)(.*)\1$/; 188check_taint_not $&, "not tainted outside 'use locale'"; 189; 190 191use locale; # engage locale and therefore locale taint. 192 193# BE SURE TO COPY ANYTHING YOU ADD to these tests to the block below for 194# ":notcharacters" 195 196check_taint_not $a, '$a'; 197 198check_taint uc($a), 'uc($a)'; 199check_taint "\U$a", '"\U$a"'; 200check_taint ucfirst($a), 'ucfirst($a)'; 201check_taint "\u$a", '"\u$a"'; 202check_taint lc($a), 'lc($a)'; 203check_taint fc($a), 'fc($a)'; 204check_taint "\L$a", '"\L$a"'; 205check_taint "\F$a", '"\F$a"'; 206check_taint lcfirst($a), 'lcfirst($a)'; 207check_taint "\l$a", '"\l$a"'; 208 209check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 210check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 211check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 212check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 213check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 214 215$_ = $a; # untaint $_ 216 217$_ = uc($a); # taint $_ 218 219check_taint $_, '$_ = uc($a)'; 220 221/(\w)/; # taint $&, $`, $', $+, $1. 222check_taint $&, "\$& from /(\\w)/"; 223check_taint $`, "\t\$`"; 224check_taint $', "\t\$'"; 225check_taint $+, "\t\$+"; 226check_taint $1, "\t\$1"; 227check_taint_not $2, "\t\$2"; 228 229/(.)/; # untaint $&, $`, $', $+, $1. 230check_taint_not $&, "\$& from /(.)/"; 231check_taint_not $`, "\t\$`"; 232check_taint_not $', "\t\$'"; 233check_taint_not $+, "\t\$+"; 234check_taint_not $1, "\t\$1"; 235check_taint_not $2, "\t\$2"; 236 237/(\W)/; # taint $&, $`, $', $+, $1. 238check_taint $&, "\$& from /(\\W)/"; 239check_taint $`, "\t\$`"; 240check_taint $', "\t\$'"; 241check_taint $+, "\t\$+"; 242check_taint $1, "\t\$1"; 243check_taint_not $2, "\t\$2"; 244 245/(.)/; # untaint $&, $`, $', $+, $1. 246check_taint_not $&, "\$& from /(.)/"; 247check_taint_not $`, "\t\$`"; 248check_taint_not $', "\t\$'"; 249check_taint_not $+, "\t\$+"; 250check_taint_not $1, "\t\$1"; 251check_taint_not $2, "\t\$2"; 252 253/(\s)/; # taint $&, $`, $', $+, $1. 254check_taint $&, "\$& from /(\\s)/"; 255check_taint $`, "\t\$`"; 256check_taint $', "\t\$'"; 257check_taint $+, "\t\$+"; 258check_taint $1, "\t\$1"; 259check_taint_not $2, "\t\$2"; 260 261/(.)/; # untaint $&, $`, $', $+, $1. 262check_taint_not $&, "\$& from /(.)/"; 263 264/(\S)/; # taint $&, $`, $', $+, $1. 265check_taint $&, "\$& from /(\\S)/"; 266check_taint $`, "\t\$`"; 267check_taint $', "\t\$'"; 268check_taint $+, "\t\$+"; 269check_taint $1, "\t\$1"; 270check_taint_not $2, "\t\$2"; 271 272/(.)/; # untaint $&, $`, $', $+, $1. 273check_taint_not $&, "\$& from /(.)/"; 274 275"0" =~ /(\d)/; # taint $&, $`, $', $+, $1. 276check_taint $&, "\$& from /(\\d)/"; 277check_taint $`, "\t\$`"; 278check_taint $', "\t\$'"; 279check_taint $+, "\t\$+"; 280check_taint $1, "\t\$1"; 281check_taint_not $2, "\t\$2"; 282 283/(.)/; # untaint $&, $`, $', $+, $1. 284check_taint_not $&, "\$& from /(.)/"; 285 286/(\D)/; # taint $&, $`, $', $+, $1. 287check_taint $&, "\$& from /(\\D)/"; 288check_taint $`, "\t\$`"; 289check_taint $', "\t\$'"; 290check_taint $+, "\t\$+"; 291check_taint $1, "\t\$1"; 292check_taint_not $2, "\t\$2"; 293 294/(.)/; # untaint $&, $`, $', $+, $1. 295check_taint_not $&, "\$& from /(.)/"; 296 297/([[:alnum:]])/; # taint $&, $`, $', $+, $1. 298check_taint $&, "\$& from /([[:alnum:]])/"; 299check_taint $`, "\t\$`"; 300check_taint $', "\t\$'"; 301check_taint $+, "\t\$+"; 302check_taint $1, "\t\$1"; 303check_taint_not $2, "\t\$2"; 304 305/(.)/; # untaint $&, $`, $', $+, $1. 306check_taint_not $&, "\$& from /(.)/"; 307 308/([[:^alnum:]])/; # taint $&, $`, $', $+, $1. 309check_taint $&, "\$& from /([[:^alnum:]])/"; 310check_taint $`, "\t\$`"; 311check_taint $', "\t\$'"; 312check_taint $+, "\t\$+"; 313check_taint $1, "\t\$1"; 314check_taint_not $2, "\t\$2"; 315 316"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. 317check_taint $&, "\$& from /(a)|(\\w)/"; 318check_taint $`, "\t\$`"; 319check_taint $', "\t\$'"; 320check_taint $+, "\t\$+"; 321check_taint $1, "\t\$1"; 322ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 323ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 324check_taint_not $2, "\t\$2"; 325check_taint_not $3, "\t\$3"; 326 327/(.)/; # untaint $&, $`, $', $+, $1. 328check_taint_not $&, "\$& from /(.)/"; 329 330"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence 331check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 332check_taint_not $`, "\t\$`"; 333check_taint_not $', "\t\$'"; 334check_taint_not $+, "\t\$+"; 335check_taint_not $1, "\t\$1"; 336ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 337check_taint_not $2, "\t\$2"; 338 339/(.)/; # untaint $&, $`, $', $+, $1. 340check_taint_not $&, "\$& from /./"; 341 342"(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; # taints because depends on locale 343check_taint $&, "\$& from /(\\N{KELVIN SIGN})/i"; 344check_taint $`, "\t\$`"; 345check_taint $', "\t\$'"; 346check_taint $+, "\t\$+"; 347check_taint $1, "\t\$1"; 348check_taint_not $2, "\t\$2"; 349 350/(.)/; # untaint $&, $`, $', $+, $1. 351check_taint_not $&, "\$& from /(.)/"; 352 353"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. 354check_taint $&, "\$& from /(.)\\b(.)/"; 355check_taint $`, "\t\$`"; 356check_taint $', "\t\$'"; 357check_taint $+, "\t\$+"; 358check_taint $1, "\t\$1"; 359check_taint $2, "\t\$2"; 360check_taint_not $3, "\t\$3"; 361 362/(.)/; # untaint $&, $`, $', $+, $1. 363check_taint_not $&, "\$& from /./"; 364 365"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. 366check_taint $&, "\$& from /(.)\\B(.)/"; 367check_taint $`, "\t\$`"; 368check_taint $', "\t\$'"; 369check_taint $+, "\t\$+"; 370check_taint $1, "\t\$1"; 371check_taint $2, "\t\$2"; 372check_taint_not $3, "\t\$3"; 373 374/(.)/; # untaint $&, $`, $', $+, $1. 375check_taint_not $&, "\$& from /./"; 376 377"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 378check_taint_not $&, "\$ & from /(.).(\\1)/"; 379check_taint_not $`, "\t\$`"; 380check_taint_not $', "\t\$'"; 381check_taint_not $+, "\t\$+"; 382check_taint_not $1, "\t\$1"; 383check_taint_not $2, "\t\$2"; 384check_taint_not $3, "\t\$3"; 385 386/(.)/; # untaint $&, $`, $', $+, $1. 387check_taint_not $&, "\$ & from /./"; 388 389$_ = $a; # untaint $_ 390 391check_taint_not $_, 'untainting $_ works'; 392 393/(b)/; # this must not taint 394check_taint_not $&, "\$ & from /(b)/"; 395check_taint_not $`, "\t\$`"; 396check_taint_not $', "\t\$'"; 397check_taint_not $+, "\t\$+"; 398check_taint_not $1, "\t\$1"; 399check_taint_not $2, "\t\$2"; 400 401$_ = $a; # untaint $_ 402 403check_taint_not $_, 'untainting $_ works'; 404 405$b = uc($a); # taint $b 406s/(.+)/$b/; # this must taint only the $_ 407 408check_taint $_, '$_ (wasn\'t tainted) from s/(.+)/$b/ where $b is tainted'; 409check_taint_not $&, "\t\$&"; 410check_taint_not $`, "\t\$`"; 411check_taint_not $', "\t\$'"; 412check_taint_not $+, "\t\$+"; 413check_taint_not $1, "\t\$1"; 414check_taint_not $2, "\t\$2"; 415 416$_ = $a; # untaint $_ 417 418s/(.+)/b/; # this must not taint 419check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 420check_taint_not $&, "\t\$&"; 421check_taint_not $`, "\t\$`"; 422check_taint_not $', "\t\$'"; 423check_taint_not $+, "\t\$+"; 424check_taint_not $1, "\t\$1"; 425check_taint_not $2, "\t\$2"; 426 427$b = $a; # untaint $b 428 429($b = $a) =~ s/\w/$&/; 430check_taint $b, '$b from ($b = $a) =~ s/\w/$&/'; # $b should be tainted. 431check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; # $a should be not. 432 433$_ = $a; # untaint $_ 434 435s/(\w)/\l$1/; # this must taint 436check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 437check_taint $&, "\t\$&"; 438check_taint $`, "\t\$`"; 439check_taint $', "\t\$'"; 440check_taint $+, "\t\$+"; 441check_taint $1, "\t\$1"; 442check_taint_not $2, "\t\$2"; 443 444$_ = $a; # untaint $_ 445 446s/(\w)/\L$1/; # this must taint 447check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 448check_taint $&, "\t\$&"; 449check_taint $`, "\t\$`"; 450check_taint $', "\t\$'"; 451check_taint $+, "\t\$+"; 452check_taint $1, "\t\$1"; 453check_taint_not $2, "\t\$2"; 454 455$_ = $a; # untaint $_ 456 457s/(\w)/\u$1/; # this must taint 458check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 459check_taint $&, "\t\$&"; 460check_taint $`, "\t\$`"; 461check_taint $', "\t\$'"; 462check_taint $+, "\t\$+"; 463check_taint $1, "\t\$1"; 464check_taint_not $2, "\t\$2"; 465 466$_ = $a; # untaint $_ 467 468s/(\w)/\U$1/; # this must taint 469check_taint $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 470check_taint $&, "\t\$&"; 471check_taint $`, "\t\$`"; 472check_taint $', "\t\$'"; 473check_taint $+, "\t\$+"; 474check_taint $1, "\t\$1"; 475check_taint_not $2, "\t\$2"; 476 477# After all this tainting $a should be cool. 478 479check_taint_not $a, '$a still not tainted'; 480 481"a" =~ /([a-z])/; 482check_taint_not $1, '"a" =~ /([a-z])/'; 483"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 484check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 485 486# BE SURE TO COPY ANYTHING YOU ADD to the block below 487 488{ # This is just the previous tests copied here with a different 489 # compile-time pragma. 490 491 use locale ':not_characters'; # engage restricted locale with different 492 # tainting rules 493 check_taint_not $a, '$a'; 494 495 check_taint_not uc($a), 'uc($a)'; 496 check_taint_not "\U$a", '"\U$a"'; 497 check_taint_not ucfirst($a), 'ucfirst($a)'; 498 check_taint_not "\u$a", '"\u$a"'; 499 check_taint_not lc($a), 'lc($a)'; 500 check_taint_not fc($a), 'fc($a)'; 501 check_taint_not "\L$a", '"\L$a"'; 502 check_taint_not "\F$a", '"\F$a"'; 503 check_taint_not lcfirst($a), 'lcfirst($a)'; 504 check_taint_not "\l$a", '"\l$a"'; 505 506 check_taint_not sprintf('%e', 123.456), "sprintf('%e', 123.456)"; 507 check_taint_not sprintf('%f', 123.456), "sprintf('%f', 123.456)"; 508 check_taint_not sprintf('%g', 123.456), "sprintf('%g', 123.456)"; 509 check_taint_not sprintf('%d', 123.456), "sprintf('%d', 123.456)"; 510 check_taint_not sprintf('%x', 123.456), "sprintf('%x', 123.456)"; 511 512 $_ = $a; # untaint $_ 513 514 $_ = uc($a); 515 516 check_taint_not $_, '$_ = uc($a)'; 517 518 /(\w)/; 519 check_taint_not $&, "\$& from /(\\w)/"; 520 check_taint_not $`, "\t\$`"; 521 check_taint_not $', "\t\$'"; 522 check_taint_not $+, "\t\$+"; 523 check_taint_not $1, "\t\$1"; 524 check_taint_not $2, "\t\$2"; 525 526 /(.)/; # untaint $&, $`, $', $+, $1. 527 check_taint_not $&, "\$& from /(.)/"; 528 check_taint_not $`, "\t\$`"; 529 check_taint_not $', "\t\$'"; 530 check_taint_not $+, "\t\$+"; 531 check_taint_not $1, "\t\$1"; 532 check_taint_not $2, "\t\$2"; 533 534 /(\W)/; 535 check_taint_not $&, "\$& from /(\\W)/"; 536 check_taint_not $`, "\t\$`"; 537 check_taint_not $', "\t\$'"; 538 check_taint_not $+, "\t\$+"; 539 check_taint_not $1, "\t\$1"; 540 check_taint_not $2, "\t\$2"; 541 542 /(.)/; # untaint $&, $`, $', $+, $1. 543 check_taint_not $&, "\$& from /(.)/"; 544 check_taint_not $`, "\t\$`"; 545 check_taint_not $', "\t\$'"; 546 check_taint_not $+, "\t\$+"; 547 check_taint_not $1, "\t\$1"; 548 check_taint_not $2, "\t\$2"; 549 550 /(\s)/; 551 check_taint_not $&, "\$& from /(\\s)/"; 552 check_taint_not $`, "\t\$`"; 553 check_taint_not $', "\t\$'"; 554 check_taint_not $+, "\t\$+"; 555 check_taint_not $1, "\t\$1"; 556 check_taint_not $2, "\t\$2"; 557 558 /(.)/; # untaint $&, $`, $', $+, $1. 559 check_taint_not $&, "\$& from /(.)/"; 560 561 /(\S)/; 562 check_taint_not $&, "\$& from /(\\S)/"; 563 check_taint_not $`, "\t\$`"; 564 check_taint_not $', "\t\$'"; 565 check_taint_not $+, "\t\$+"; 566 check_taint_not $1, "\t\$1"; 567 check_taint_not $2, "\t\$2"; 568 569 /(.)/; # untaint $&, $`, $', $+, $1. 570 check_taint_not $&, "\$& from /(.)/"; 571 572 "0" =~ /(\d)/; 573 check_taint_not $&, "\$& from /(\\d)/"; 574 check_taint_not $`, "\t\$`"; 575 check_taint_not $', "\t\$'"; 576 check_taint_not $+, "\t\$+"; 577 check_taint_not $1, "\t\$1"; 578 check_taint_not $2, "\t\$2"; 579 580 /(.)/; # untaint $&, $`, $', $+, $1. 581 check_taint_not $&, "\$& from /(.)/"; 582 583 /(\D)/; 584 check_taint_not $&, "\$& from /(\\D)/"; 585 check_taint_not $`, "\t\$`"; 586 check_taint_not $', "\t\$'"; 587 check_taint_not $+, "\t\$+"; 588 check_taint_not $1, "\t\$1"; 589 check_taint_not $2, "\t\$2"; 590 591 /(.)/; # untaint $&, $`, $', $+, $1. 592 check_taint_not $&, "\$& from /(.)/"; 593 594 /([[:alnum:]])/; 595 check_taint_not $&, "\$& from /([[:alnum:]])/"; 596 check_taint_not $`, "\t\$`"; 597 check_taint_not $', "\t\$'"; 598 check_taint_not $+, "\t\$+"; 599 check_taint_not $1, "\t\$1"; 600 check_taint_not $2, "\t\$2"; 601 602 /(.)/; # untaint $&, $`, $', $+, $1. 603 check_taint_not $&, "\$& from /(.)/"; 604 605 /([[:^alnum:]])/; 606 check_taint_not $&, "\$& from /([[:^alnum:]])/"; 607 check_taint_not $`, "\t\$`"; 608 check_taint_not $', "\t\$'"; 609 check_taint_not $+, "\t\$+"; 610 check_taint_not $1, "\t\$1"; 611 check_taint_not $2, "\t\$2"; 612 613 "a" =~ /(a)|(\w)/; 614 check_taint_not $&, "\$& from /(a)|(\\w)/"; 615 check_taint_not $`, "\t\$`"; 616 check_taint_not $', "\t\$'"; 617 check_taint_not $+, "\t\$+"; 618 check_taint_not $1, "\t\$1"; 619 ok($1 eq 'a', ("\t" x 5) . "\$1 is 'a'"); 620 ok(! defined $2, ("\t" x 5) . "\$2 is undefined"); 621 check_taint_not $2, "\t\$2"; 622 check_taint_not $3, "\t\$3"; 623 624 /(.)/; # untaint $&, $`, $', $+, $1. 625 check_taint_not $&, "\$& from /(.)/"; 626 627 "\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; 628 check_taint_not $&, "\$& from /(\\N{CYRILLIC CAPITAL LETTER A})/i"; 629 check_taint_not $`, "\t\$`"; 630 check_taint_not $', "\t\$'"; 631 check_taint_not $+, "\t\$+"; 632 check_taint_not $1, "\t\$1"; 633 ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\t\$1 is 'small cyrillic a'"); 634 check_taint_not $2, "\t\$2"; 635 636 /(.)/; # untaint $&, $`, $', $+, $1. 637 check_taint_not $&, "\$& from /./"; 638 639 "(\N{KELVIN SIGN})" =~ /(\N{KELVIN SIGN})/i; 640 check_taint_not $&, "\$& from /(\\N{KELVIN SIGN})/i"; 641 check_taint_not $`, "\t\$`"; 642 check_taint_not $', "\t\$'"; 643 check_taint_not $+, "\t\$+"; 644 check_taint_not $1, "\t\$1"; 645 check_taint_not $2, "\t\$2"; 646 647 /(.)/; # untaint $&, $`, $', $+, $1. 648 check_taint_not $&, "\$& from /(.)/"; 649 650 "a:" =~ /(.)\b(.)/; 651 check_taint_not $&, "\$& from /(.)\\b(.)/"; 652 check_taint_not $`, "\t\$`"; 653 check_taint_not $', "\t\$'"; 654 check_taint_not $+, "\t\$+"; 655 check_taint_not $1, "\t\$1"; 656 check_taint_not $2, "\t\$2"; 657 check_taint_not $3, "\t\$3"; 658 659 /(.)/; # untaint $&, $`, $', $+, $1. 660 check_taint_not $&, "\$& from /./"; 661 662 "aa" =~ /(.)\B(.)/; 663 check_taint_not $&, "\$& from /(.)\\B(.)/"; 664 check_taint_not $`, "\t\$`"; 665 check_taint_not $', "\t\$'"; 666 check_taint_not $+, "\t\$+"; 667 check_taint_not $1, "\t\$1"; 668 check_taint_not $2, "\t\$2"; 669 check_taint_not $3, "\t\$3"; 670 671 /(.)/; # untaint $&, $`, $', $+, $1. 672 check_taint_not $&, "\$& from /./"; 673 674 "aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 675 check_taint_not $&, "\$ & from /(.).(\\1)/"; 676 check_taint_not $`, "\t\$`"; 677 check_taint_not $', "\t\$'"; 678 check_taint_not $+, "\t\$+"; 679 check_taint_not $1, "\t\$1"; 680 check_taint_not $2, "\t\$2"; 681 check_taint_not $3, "\t\$3"; 682 683 /(.)/; # untaint $&, $`, $', $+, $1. 684 check_taint_not $&, "\$ & from /./"; 685 686 $_ = $a; # untaint $_ 687 688 check_taint_not $_, 'untainting $_ works'; 689 690 /(b)/; 691 check_taint_not $&, "\$ & from /(b)/"; 692 check_taint_not $`, "\t\$`"; 693 check_taint_not $', "\t\$'"; 694 check_taint_not $+, "\t\$+"; 695 check_taint_not $1, "\t\$1"; 696 check_taint_not $2, "\t\$2"; 697 698 $_ = $a; # untaint $_ 699 700 check_taint_not $_, 'untainting $_ works'; 701 702 s/(.+)/b/; 703 check_taint_not $_, '$_ (wasn\'t tainted) from s/(.+)/b/'; 704 check_taint_not $&, "\t\$&"; 705 check_taint_not $`, "\t\$`"; 706 check_taint_not $', "\t\$'"; 707 check_taint_not $+, "\t\$+"; 708 check_taint_not $1, "\t\$1"; 709 check_taint_not $2, "\t\$2"; 710 711 $b = $a; # untaint $b 712 713 ($b = $a) =~ s/\w/$&/; 714 check_taint_not $b, '$b from ($b = $a) =~ s/\w/$&/'; 715 check_taint_not $a, '$a from ($b = $a) =~ s/\w/$&/'; 716 717 $_ = $a; # untaint $_ 718 719 s/(\w)/\l$1/; 720 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\l$1/,'; # this must taint 721 check_taint_not $&, "\t\$&"; 722 check_taint_not $`, "\t\$`"; 723 check_taint_not $', "\t\$'"; 724 check_taint_not $+, "\t\$+"; 725 check_taint_not $1, "\t\$1"; 726 check_taint_not $2, "\t\$2"; 727 728 $_ = $a; # untaint $_ 729 730 s/(\w)/\L$1/; 731 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\L$1/,'; 732 check_taint_not $&, "\t\$&"; 733 check_taint_not $`, "\t\$`"; 734 check_taint_not $', "\t\$'"; 735 check_taint_not $+, "\t\$+"; 736 check_taint_not $1, "\t\$1"; 737 check_taint_not $2, "\t\$2"; 738 739 $_ = $a; # untaint $_ 740 741 s/(\w)/\u$1/; 742 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\u$1/'; 743 check_taint_not $&, "\t\$&"; 744 check_taint_not $`, "\t\$`"; 745 check_taint_not $', "\t\$'"; 746 check_taint_not $+, "\t\$+"; 747 check_taint_not $1, "\t\$1"; 748 check_taint_not $2, "\t\$2"; 749 750 $_ = $a; # untaint $_ 751 752 s/(\w)/\U$1/; 753 check_taint_not $_, '$_ (wasn\'t tainted) from s/(\w)/\U$1/'; 754 check_taint_not $&, "\t\$&"; 755 check_taint_not $`, "\t\$`"; 756 check_taint_not $', "\t\$'"; 757 check_taint_not $+, "\t\$+"; 758 check_taint_not $1, "\t\$1"; 759 check_taint_not $2, "\t\$2"; 760 761 # After all this tainting $a should be cool. 762 763 check_taint_not $a, '$a still not tainted'; 764 765 "a" =~ /([a-z])/; 766 check_taint_not $1, '"a" =~ /([a-z])/'; 767 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 768 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 769 770} 771 772# Here are in scope of 'use locale' 773 774# I think we've seen quite enough of taint. 775# Let us do some *real* locale work now, 776# unless setlocale() is missing (i.e. minitest). 777 778# The test number before our first setlocale() 779my $final_without_setlocale = $test_num; 780 781# Find locales. 782 783debug "Scanning for locales...\n"; 784 785require POSIX; import POSIX ':locale_h'; 786 787my $categories = [ 'LC_CTYPE', 'LC_NUMERIC', 'LC_ALL' ]; 788debug "Scanning for just compatible"; 789my @Locale = find_locales($categories); 790debug "Scanning for even incompatible"; 791my @include_incompatible_locales = find_locales($categories, 792 'even incompatible locales'); 793 794# The locales included in the incompatible list that aren't in the compatible 795# one. 796my @incompatible_locales; 797 798if (@Locale < @include_incompatible_locales) { 799 my %seen; 800 @seen{@Locale} = (); 801 802 foreach my $item (@include_incompatible_locales) { 803 push @incompatible_locales, $item unless exists $seen{$item}; 804 } 805 806 # For each bad locale, switch into it to find out why it's incompatible 807 for my $bad_locale (@incompatible_locales) { 808 my @warnings; 809 810 use warnings 'locale'; 811 812 local $SIG{__WARN__} = sub { 813 my $warning = $_[0]; 814 chomp $warning; 815 push @warnings, ($warning =~ s/\n/\n# /sgr); 816 }; 817 818 debug "Trying incompatible $bad_locale"; 819 my $ret = setlocale(&POSIX::LC_CTYPE, $bad_locale); 820 821 my $message = "testing of locale '$bad_locale' is skipped"; 822 if (@warnings) { 823 skip $message . ":\n# " . join "\n# ", @warnings; 824 } 825 elsif (! $ret) { 826 skip("$message:\n#" 827 . " setlocale(&POSIX::LC_CTYPE, '$bad_locale') failed"); 828 } 829 else { 830 fail $message . ", because it is was found to be incompatible with" 831 . " Perl, but could not discern reason"; 832 } 833 } 834} 835 836debug "Locales =\n"; 837for ( @Locale ) { 838 debug "$_\n"; 839} 840 841unless (@Locale) { 842 print "1..$test_num\n"; 843 exit; 844} 845 846 847setlocale(&POSIX::LC_ALL, "C"); 848 849my %posixes; 850 851my %Problem; 852my %Okay; 853my %Known_bad_locale; # Failed test for a locale known to be bad 854my %Testing; 855my @Added_alpha; # Alphas that aren't in the C locale. 856my %test_names; 857 858sub disp_chars { 859 # This returns a display string denoting the input parameter @_, each 860 # entry of which is a single character in the range 0-255. The first part 861 # of the output is a string of the characters in @_ that are ASCII 862 # graphics, and hence unambiguously displayable. They are given by code 863 # point order. The second part is the remaining code points, the ordinals 864 # of which are each displayed as 2-digit hex. Blanks are inserted so as 865 # to keep anything from the first part looking like a 2-digit hex number. 866 867 no locale; 868 my @chars = sort { ord $a <=> ord $b } @_; 869 my $output = ""; 870 my $range_start; 871 my $start_class; 872 push @chars, chr(258); # This sentinel simplifies the loop termination 873 # logic 874 foreach my $i (0 .. @chars - 1) { 875 my $char = $chars[$i]; 876 my $range_end; 877 my $class; 878 879 # We avoid using [:posix:] classes, as these are being tested in this 880 # file. Each equivalence class below is for things that can appear in 881 # a range; those that can't be in a range have class -1. 0 for those 882 # which should be output in hex; and >0 for the other ranges 883 if ($char =~ /[A-Z]/) { 884 $class = 2; 885 } 886 elsif ($char =~ /[a-z]/) { 887 $class = 3; 888 } 889 elsif ($char =~ /[0-9]/) { 890 $class = 4; 891 } 892 # Uncomment to get literal punctuation displayed instead of hex 893 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { 894 # $class = -1; # Punct never appears in a range 895 #} 896 else { 897 $class = 0; # Output in hex 898 } 899 900 if (! defined $range_start) { 901 if ($class < 0) { 902 $output .= " " . $char; 903 } 904 else { 905 $range_start = ord $char; 906 $start_class = $class; 907 } 908 } # A range ends if not consecutive, or the class-type changes 909 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 910 || $class != $start_class) 911 { 912 913 # Here, the current character is not in the range. This means the 914 # previous character must have been. Output the range up through 915 # that one. 916 my $range_length = $range_end - $range_start + 1; 917 if ($start_class > 0) { 918 $output .= " " . chr($range_start); 919 $output .= "-" . chr($range_end) if $range_length > 1; 920 } 921 else { 922 $output .= sprintf(" %02X", $range_start); 923 $output .= sprintf("-%02X", $range_end) if $range_length > 1; 924 } 925 926 # Handle the new current character, as potentially beginning a new 927 # range 928 undef $range_start; 929 redo; 930 } 931 } 932 933 $output =~ s/^ //; 934 return $output; 935} 936 937sub disp_str ($) { 938 my $string = shift; 939 940 # Displays the string unambiguously. ASCII printables are always output 941 # as-is, though perhaps separated by blanks from other characters. If 942 # entirely printable ASCII, just returns the string. Otherwise if valid 943 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it 944 # outputs hex for each non-ASCII-printable byte. 945 946 return $string if $string =~ / ^ [[:print:]]* $/xa; 947 948 my $result = ""; 949 my $prev_was_punct = 1; # Beginning is considered punct 950 if (utf8::valid($string) && utf8::is_utf8($string)) { 951 use charnames (); 952 foreach my $char (split "", $string) { 953 954 # Keep punctuation adjacent to other characters; otherwise 955 # separate them with a blank 956 if ($char =~ /[[:punct:]]/a) { 957 $result .= $char; 958 $prev_was_punct = 1; 959 } 960 elsif ($char =~ /[[:print:]]/a) { 961 $result .= " " unless $prev_was_punct; 962 $result .= $char; 963 $prev_was_punct = 0; 964 } 965 else { 966 $result .= " " unless $prev_was_punct; 967 my $name = charnames::viacode(ord $char); 968 $result .= (defined $name) ? $name : ':unknown:'; 969 $prev_was_punct = 0; 970 } 971 } 972 } 973 else { 974 use bytes; 975 foreach my $char (split "", $string) { 976 if ($char =~ /[[:punct:]]/a) { 977 $result .= $char; 978 $prev_was_punct = 1; 979 } 980 elsif ($char =~ /[[:print:]]/a) { 981 $result .= " " unless $prev_was_punct; 982 $result .= $char; 983 $prev_was_punct = 0; 984 } 985 else { 986 $result .= " " unless $prev_was_punct; 987 $result .= sprintf("%02X", ord $char); 988 $prev_was_punct = 0; 989 } 990 } 991 } 992 993 return $result; 994} 995 996sub report_result { 997 my ($Locale, $i, $pass_fail, $message) = @_; 998 if ($pass_fail) { 999 push @{$Okay{$i}}, $Locale; 1000 } 1001 else { 1002 $message //= ""; 1003 $message = " ($message)" if $message; 1004 $Known_bad_locale{$i}{$Locale} = 1 if exists $known_bad_locales{$os} 1005 && $Locale =~ $known_bad_locales{$os}; 1006 $Problem{$i}{$Locale} = 1; 1007 debug "failed $i ($test_names{$i}) with locale '$Locale'$message\n"; 1008 } 1009} 1010 1011sub report_multi_result { 1012 my ($Locale, $i, $results_ref) = @_; 1013 1014 # $results_ref points to an array, each element of which is a character that was 1015 # in error for this test numbered '$i'. If empty, the test passed 1016 1017 my $message = ""; 1018 if (@$results_ref) { 1019 $message = join " ", "for", disp_chars(@$results_ref); 1020 } 1021 report_result($Locale, $i, @$results_ref == 0, $message); 1022} 1023 1024my $first_locales_test_number = $final_without_setlocale 1025 + 1 + @incompatible_locales; 1026my $locales_test_number; 1027my $not_necessarily_a_problem_test_number; 1028my $first_casing_test_number; 1029my %setlocale_failed; # List of locales that setlocale() didn't work on 1030 1031foreach my $Locale (@Locale) { 1032 $locales_test_number = $first_locales_test_number - 1; 1033 debug "\n"; 1034 debug "Locale = $Locale\n"; 1035 1036 unless (setlocale(&POSIX::LC_ALL, $Locale)) { 1037 $setlocale_failed{$Locale} = $Locale; 1038 next; 1039 } 1040 1041 # We test UTF-8 locales only under ':not_characters'; It is easier to 1042 # test them in other test files than here. Non- UTF-8 locales are tested 1043 # only under plain 'use locale', as otherwise we would have to convert 1044 # everything in them to Unicode. 1045 1046 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X 1047 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X 1048 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X 1049 1050 my $is_utf8_locale = is_locale_utf8($Locale); 1051 1052 debug "is utf8 locale? = $is_utf8_locale\n"; 1053 1054 debug "radix = " . disp_str(localeconv()->{decimal_point}) . "\n"; 1055 1056 if (! $is_utf8_locale) { 1057 use locale; 1058 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1059 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1060 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1061 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1062 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1063 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1064 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1065 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1066 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1067 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1068 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1069 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1070 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1071 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1072 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1073 1074 # Sieve the uppercase and the lowercase. 1075 1076 for (@{$posixes{'word'}}) { 1077 if (/[^\d_]/) { # skip digits and the _ 1078 if (uc($_) eq $_) { 1079 $UPPER{$_} = $_; 1080 } 1081 if (lc($_) eq $_) { 1082 $lower{$_} = $_; 1083 } 1084 } 1085 } 1086 } 1087 else { 1088 use locale ':not_characters'; 1089 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 1090 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 1091 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 1092 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 1093 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 1094 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 1095 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 1096 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 1097 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 1098 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 1099 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 1100 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 1101 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 1102 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 1103 @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255; 1104 for (@{$posixes{'word'}}) { 1105 if (/[^\d_]/) { # skip digits and the _ 1106 if (uc($_) eq $_) { 1107 $UPPER{$_} = $_; 1108 } 1109 if (lc($_) eq $_) { 1110 $lower{$_} = $_; 1111 } 1112 } 1113 } 1114 } 1115 1116 # Ordered, where possible, in groups of "this is a subset of the next 1117 # one" 1118 debug ":upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; 1119 debug ":lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; 1120 debug ":cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; 1121 debug ":alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; 1122 debug ":alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; 1123 debug ' \w = ', disp_chars(@{$posixes{'word'}}), "\n"; 1124 debug ":graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; 1125 debug ":print: = ", disp_chars(@{$posixes{'print'}}), "\n"; 1126 debug ' \d = ', disp_chars(@{$posixes{'digit'}}), "\n"; 1127 debug ":xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; 1128 debug ":blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; 1129 debug ' \s = ', disp_chars(@{$posixes{'space'}}), "\n"; 1130 debug ":punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; 1131 debug ":cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; 1132 debug ":ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; 1133 1134 foreach (keys %UPPER) { 1135 1136 $BoThCaSe{$_}++ if exists $lower{$_}; 1137 } 1138 foreach (keys %lower) { 1139 $BoThCaSe{$_}++ if exists $UPPER{$_}; 1140 } 1141 foreach (keys %BoThCaSe) { 1142 delete $UPPER{$_}; 1143 delete $lower{$_}; 1144 } 1145 1146 my %Unassigned; 1147 foreach my $ord ( 0 .. 255 ) { 1148 $Unassigned{chr $ord} = 1; 1149 } 1150 foreach my $class (keys %posixes) { 1151 foreach my $char (@{$posixes{$class}}) { 1152 delete $Unassigned{$char}; 1153 } 1154 } 1155 1156 debug "UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; 1157 debug "lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; 1158 debug "BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; 1159 debug "Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; 1160 1161 my @failures; 1162 my @fold_failures; 1163 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 1164 my $ok; 1165 my $fold_ok; 1166 if ($is_utf8_locale) { 1167 use locale ':not_characters'; 1168 $ok = $x =~ /[[:upper:]]/; 1169 $fold_ok = $x =~ /[[:lower:]]/i; 1170 } 1171 else { 1172 use locale; 1173 $ok = $x =~ /[[:upper:]]/; 1174 $fold_ok = $x =~ /[[:lower:]]/i; 1175 } 1176 push @failures, $x unless $ok; 1177 push @fold_failures, $x unless $fold_ok; 1178 } 1179 $locales_test_number++; 1180 $first_casing_test_number = $locales_test_number; 1181 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; 1182 report_multi_result($Locale, $locales_test_number, \@failures); 1183 1184 $locales_test_number++; 1185 1186 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; 1187 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1188 1189 undef @failures; 1190 undef @fold_failures; 1191 1192 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 1193 my $ok; 1194 my $fold_ok; 1195 if ($is_utf8_locale) { 1196 use locale ':not_characters'; 1197 $ok = $x =~ /[[:lower:]]/; 1198 $fold_ok = $x =~ /[[:upper:]]/i; 1199 } 1200 else { 1201 use locale; 1202 $ok = $x =~ /[[:lower:]]/; 1203 $fold_ok = $x =~ /[[:upper:]]/i; 1204 } 1205 push @failures, $x unless $ok; 1206 push @fold_failures, $x unless $fold_ok; 1207 } 1208 1209 $locales_test_number++; 1210 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; 1211 report_multi_result($Locale, $locales_test_number, \@failures); 1212 1213 $locales_test_number++; 1214 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; 1215 report_multi_result($Locale, $locales_test_number, \@fold_failures); 1216 1217 { # Find the alphabetic characters that are not considered alphabetics 1218 # in the default (C) locale. 1219 1220 no locale; 1221 1222 @Added_alpha = (); 1223 for (keys %UPPER, keys %lower, keys %BoThCaSe) { 1224 push(@Added_alpha, $_) if (/\W/); 1225 } 1226 } 1227 1228 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; 1229 1230 debug "Added_alpha = ", disp_chars(@Added_alpha), "\n"; 1231 1232 # Cross-check the whole 8-bit character set. 1233 1234 ++$locales_test_number; 1235 my @f; 1236 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; 1237 for (map { chr } 0..255) { 1238 if ($is_utf8_locale) { 1239 use locale ':not_characters'; 1240 push @f, $_ unless /[[:word:]]/ == /\w/; 1241 } 1242 else { 1243 push @f, $_ unless /[[:word:]]/ == /\w/; 1244 } 1245 } 1246 report_multi_result($Locale, $locales_test_number, \@f); 1247 1248 ++$locales_test_number; 1249 undef @f; 1250 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; 1251 for (map { chr } 0..255) { 1252 if ($is_utf8_locale) { 1253 use locale ':not_characters'; 1254 push @f, $_ unless /[[:digit:]]/ == /\d/; 1255 } 1256 else { 1257 push @f, $_ unless /[[:digit:]]/ == /\d/; 1258 } 1259 } 1260 report_multi_result($Locale, $locales_test_number, \@f); 1261 1262 ++$locales_test_number; 1263 undef @f; 1264 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; 1265 for (map { chr } 0..255) { 1266 if ($is_utf8_locale) { 1267 use locale ':not_characters'; 1268 push @f, $_ unless /[[:space:]]/ == /\s/; 1269 } 1270 else { 1271 push @f, $_ unless /[[:space:]]/ == /\s/; 1272 } 1273 } 1274 report_multi_result($Locale, $locales_test_number, \@f); 1275 1276 ++$locales_test_number; 1277 undef @f; 1278 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; 1279 for (map { chr } 0..255) { 1280 if ($is_utf8_locale) { 1281 use locale ':not_characters'; 1282 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1283 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1284 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1285 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1286 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1287 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1288 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1289 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1290 (/[[:print:]]/ xor /[[:^print:]]/) || 1291 (/[[:space:]]/ xor /[[:^space:]]/) || 1292 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1293 (/[[:word:]]/ xor /[[:^word:]]/) || 1294 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1295 1296 # effectively is what [:cased:] would be if it existed. 1297 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1298 } 1299 else { 1300 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 1301 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 1302 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 1303 (/[[:blank:]]/ xor /[[:^blank:]]/) || 1304 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 1305 (/[[:digit:]]/ xor /[[:^digit:]]/) || 1306 (/[[:graph:]]/ xor /[[:^graph:]]/) || 1307 (/[[:lower:]]/ xor /[[:^lower:]]/) || 1308 (/[[:print:]]/ xor /[[:^print:]]/) || 1309 (/[[:space:]]/ xor /[[:^space:]]/) || 1310 (/[[:upper:]]/ xor /[[:^upper:]]/) || 1311 (/[[:word:]]/ xor /[[:^word:]]/) || 1312 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 1313 (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i); 1314 } 1315 } 1316 report_multi_result($Locale, $locales_test_number, \@f); 1317 1318 # The rules for the relationships are given in: 1319 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html 1320 1321 1322 ++$locales_test_number; 1323 undef @f; 1324 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; 1325 for ('a' .. 'z') { 1326 if ($is_utf8_locale) { 1327 use locale ':not_characters'; 1328 push @f, $_ unless /[[:lower:]]/; 1329 } 1330 else { 1331 push @f, $_ unless /[[:lower:]]/; 1332 } 1333 } 1334 report_multi_result($Locale, $locales_test_number, \@f); 1335 1336 ++$locales_test_number; 1337 undef @f; 1338 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; 1339 for (map { chr } 0..255) { 1340 if ($is_utf8_locale) { 1341 use locale ':not_characters'; 1342 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1343 } 1344 else { 1345 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 1346 } 1347 } 1348 report_multi_result($Locale, $locales_test_number, \@f); 1349 1350 ++$locales_test_number; 1351 undef @f; 1352 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; 1353 for ('A' .. 'Z') { 1354 if ($is_utf8_locale) { 1355 use locale ':not_characters'; 1356 push @f, $_ unless /[[:upper:]]/; 1357 } 1358 else { 1359 push @f, $_ unless /[[:upper:]]/; 1360 } 1361 } 1362 report_multi_result($Locale, $locales_test_number, \@f); 1363 1364 ++$locales_test_number; 1365 undef @f; 1366 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; 1367 for (map { chr } 0..255) { 1368 if ($is_utf8_locale) { 1369 use locale ':not_characters'; 1370 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1371 } 1372 else { 1373 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1374 } 1375 } 1376 report_multi_result($Locale, $locales_test_number, \@f); 1377 1378 ++$locales_test_number; 1379 undef @f; 1380 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; 1381 for (map { chr } 0..255) { 1382 if ($is_utf8_locale) { 1383 use locale ':not_characters'; 1384 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1385 } 1386 else { 1387 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1388 } 1389 } 1390 report_multi_result($Locale, $locales_test_number, \@f); 1391 1392 ++$locales_test_number; 1393 undef @f; 1394 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; 1395 for (map { chr } 0..255) { 1396 if ($is_utf8_locale) { 1397 use locale ':not_characters'; 1398 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1399 } 1400 else { 1401 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1402 } 1403 } 1404 report_multi_result($Locale, $locales_test_number, \@f); 1405 1406 ++$locales_test_number; 1407 undef @f; 1408 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; 1409 for ('0' .. '9') { 1410 if ($is_utf8_locale) { 1411 use locale ':not_characters'; 1412 push @f, $_ unless /[[:digit:]]/; 1413 } 1414 else { 1415 push @f, $_ unless /[[:digit:]]/; 1416 } 1417 } 1418 report_multi_result($Locale, $locales_test_number, \@f); 1419 1420 ++$locales_test_number; 1421 undef @f; 1422 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; 1423 for (map { chr } 0..255) { 1424 if ($is_utf8_locale) { 1425 use locale ':not_characters'; 1426 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1427 } 1428 else { 1429 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1430 } 1431 } 1432 report_multi_result($Locale, $locales_test_number, \@f); 1433 1434 ++$locales_test_number; 1435 undef @f; 1436 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; 1437 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); 1438 1439 ++$locales_test_number; 1440 undef @f; 1441 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; 1442 if (@{$posixes{'digit'}} == 20) { 1443 my $previous_ord; 1444 for (map { chr } 0..255) { 1445 next unless /[[:digit:]]/; 1446 next if /[0-9]/; 1447 if (defined $previous_ord) { 1448 if ($is_utf8_locale) { 1449 use locale ':not_characters'; 1450 push @f, $_ if ord $_ != $previous_ord + 1; 1451 } 1452 else { 1453 push @f, $_ if ord $_ != $previous_ord + 1; 1454 } 1455 } 1456 $previous_ord = ord $_; 1457 } 1458 } 1459 report_multi_result($Locale, $locales_test_number, \@f); 1460 1461 ++$locales_test_number; 1462 undef @f; 1463 my @xdigit_digits; # :digit: & :xdigit: 1464 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; 1465 for (map { chr } 0..255) { 1466 if ($is_utf8_locale) { 1467 use locale ':not_characters'; 1468 # For utf8 locales, we actually use a stricter test: that :digit: 1469 # is a subset of :xdigit:, as we know that only 0-9 should match 1470 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; 1471 } 1472 else { 1473 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; 1474 } 1475 } 1476 if (! $is_utf8_locale) { 1477 1478 # For non-utf8 locales, @xdigit_digits is a list of the characters 1479 # that are both :xdigit: and :digit:. Because :digit: is stored in 1480 # increasing code point order (unless the tests above failed), 1481 # @xdigit_digits is as well. There should be exactly 10 or 1482 # 20 of these. 1483 if (@xdigit_digits != 10 && @xdigit_digits != 20) { 1484 @f = @xdigit_digits; 1485 } 1486 else { 1487 1488 # Look for contiguity in the series, adding any wrong ones to @f 1489 my @temp = @xdigit_digits; 1490 while (@temp > 1) { 1491 push @f, $temp[1] if ($temp[0] != $temp[1] - 1) 1492 1493 # Skip this test for the 0th character of 1494 # the second block of 10, as it won't be 1495 # contiguous with the previous block 1496 && (! defined $xdigit_digits[10] 1497 || $temp[1] != $xdigit_digits[10]); 1498 shift @temp; 1499 } 1500 } 1501 } 1502 1503 report_multi_result($Locale, $locales_test_number, \@f); 1504 1505 ++$locales_test_number; 1506 undef @f; 1507 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; 1508 for ('A' .. 'F', 'a' .. 'f') { 1509 if ($is_utf8_locale) { 1510 use locale ':not_characters'; 1511 push @f, $_ unless /[[:xdigit:]]/; 1512 } 1513 else { 1514 push @f, $_ unless /[[:xdigit:]]/; 1515 } 1516 } 1517 report_multi_result($Locale, $locales_test_number, \@f); 1518 1519 ++$locales_test_number; 1520 undef @f; 1521 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; 1522 my $previous_ord; 1523 my $count = 0; 1524 for my $chr (map { chr } 0..255) { 1525 next unless $chr =~ /[[:xdigit:]]/; 1526 if ($is_utf8_locale) { 1527 next if $chr =~ /[[:digit:]]/; 1528 } 1529 else { 1530 next if grep { $chr eq $_ } @xdigit_digits; 1531 } 1532 next if $chr =~ /[A-Fa-f]/; 1533 if (defined $previous_ord) { 1534 if ($is_utf8_locale) { 1535 use locale ':not_characters'; 1536 push @f, $chr if ord $chr != $previous_ord + 1; 1537 } 1538 else { 1539 push @f, $chr if ord $chr != $previous_ord + 1; 1540 } 1541 } 1542 $count++; 1543 if ($count == 6) { 1544 undef $previous_ord; 1545 } 1546 else { 1547 $previous_ord = ord $chr; 1548 } 1549 } 1550 report_multi_result($Locale, $locales_test_number, \@f); 1551 1552 ++$locales_test_number; 1553 undef @f; 1554 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; 1555 for (map { chr } 0..255) { 1556 if ($is_utf8_locale) { 1557 use locale ':not_characters'; 1558 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1559 } 1560 else { 1561 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1562 } 1563 } 1564 report_multi_result($Locale, $locales_test_number, \@f); 1565 1566 # Note that xdigit doesn't have to be a subset of alnum 1567 1568 ++$locales_test_number; 1569 undef @f; 1570 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; 1571 for (map { chr } 0..255) { 1572 if ($is_utf8_locale) { 1573 use locale ':not_characters'; 1574 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1575 } 1576 else { 1577 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1578 } 1579 } 1580 report_multi_result($Locale, $locales_test_number, \@f); 1581 1582 ++$locales_test_number; 1583 undef @f; 1584 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; 1585 if ($is_utf8_locale) { 1586 use locale ':not_characters'; 1587 push @f, " " if " " =~ /[[:graph:]]/; 1588 } 1589 else { 1590 push @f, " " if " " =~ /[[:graph:]]/; 1591 } 1592 report_multi_result($Locale, $locales_test_number, \@f); 1593 1594 ++$locales_test_number; 1595 undef @f; 1596 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; 1597 for (' ', "\f", "\n", "\r", "\t", "\cK") { 1598 if ($is_utf8_locale) { 1599 use locale ':not_characters'; 1600 push @f, $_ unless /[[:space:]]/; 1601 } 1602 else { 1603 push @f, $_ unless /[[:space:]]/; 1604 } 1605 } 1606 report_multi_result($Locale, $locales_test_number, \@f); 1607 1608 ++$locales_test_number; 1609 undef @f; 1610 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; 1611 for (' ', "\t") { 1612 if ($is_utf8_locale) { 1613 use locale ':not_characters'; 1614 push @f, $_ unless /[[:blank:]]/; 1615 } 1616 else { 1617 push @f, $_ unless /[[:blank:]]/; 1618 } 1619 } 1620 report_multi_result($Locale, $locales_test_number, \@f); 1621 1622 ++$locales_test_number; 1623 undef @f; 1624 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; 1625 for (map { chr } 0..255) { 1626 if ($is_utf8_locale) { 1627 use locale ':not_characters'; 1628 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1629 } 1630 else { 1631 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1632 } 1633 } 1634 report_multi_result($Locale, $locales_test_number, \@f); 1635 1636 ++$locales_test_number; 1637 undef @f; 1638 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; 1639 for (map { chr } 0..255) { 1640 if ($is_utf8_locale) { 1641 use locale ':not_characters'; 1642 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1643 } 1644 else { 1645 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1646 } 1647 } 1648 report_multi_result($Locale, $locales_test_number, \@f); 1649 1650 ++$locales_test_number; 1651 undef @f; 1652 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; 1653 if ($is_utf8_locale) { 1654 use locale ':not_characters'; 1655 push @f, " " if " " !~ /[[:print:]]/; 1656 } 1657 else { 1658 push @f, " " if " " !~ /[[:print:]]/; 1659 } 1660 report_multi_result($Locale, $locales_test_number, \@f); 1661 1662 ++$locales_test_number; 1663 undef @f; 1664 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; 1665 for (map { chr } 0..255) { 1666 if ($is_utf8_locale) { 1667 use locale ':not_characters'; 1668 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1669 } 1670 else { 1671 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1672 } 1673 } 1674 report_multi_result($Locale, $locales_test_number, \@f); 1675 1676 ++$locales_test_number; 1677 undef @f; 1678 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; 1679 for (map { chr } 0..255) { 1680 if ($is_utf8_locale) { 1681 use locale ':not_characters'; 1682 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1683 } 1684 else { 1685 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1686 } 1687 } 1688 report_multi_result($Locale, $locales_test_number, \@f); 1689 1690 ++$locales_test_number; 1691 undef @f; 1692 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; 1693 for (map { chr } 0..255) { 1694 if ($is_utf8_locale) { 1695 use locale ':not_characters'; 1696 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1697 } 1698 else { 1699 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1700 } 1701 } 1702 report_multi_result($Locale, $locales_test_number, \@f); 1703 1704 ++$locales_test_number; 1705 undef @f; 1706 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; 1707 for (map { chr } 0..255) { 1708 if ($is_utf8_locale) { 1709 use locale ':not_characters'; 1710 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1711 } 1712 else { 1713 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1714 } 1715 } 1716 report_multi_result($Locale, $locales_test_number, \@f); 1717 1718 ++$locales_test_number; 1719 undef @f; 1720 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; 1721 for (map { chr } 0..255) { 1722 if ($is_utf8_locale) { 1723 use locale ':not_characters'; 1724 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1725 } 1726 else { 1727 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1728 } 1729 } 1730 report_multi_result($Locale, $locales_test_number, \@f); 1731 1732 foreach ($first_casing_test_number..$locales_test_number) { 1733 $problematical_tests{$_} = 1; 1734 } 1735 1736 1737 # Test for read-only scalars' locale vs non-locale comparisons. 1738 1739 { 1740 no locale; 1741 my $ok; 1742 $a = "qwerty"; 1743 if ($is_utf8_locale) { 1744 use locale ':not_characters'; 1745 $ok = ($a cmp "qwerty") == 0; 1746 } 1747 else { 1748 use locale; 1749 $ok = ($a cmp "qwerty") == 0; 1750 } 1751 report_result($Locale, ++$locales_test_number, $ok); 1752 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; 1753 } 1754 1755 { 1756 my ($from, $to, $lesser, $greater, 1757 @test, %test, $test, $yes, $no, $sign); 1758 1759 ++$locales_test_number; 1760 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; 1761 $not_necessarily_a_problem_test_number = $locales_test_number; 1762 for (0..9) { 1763 # Select a slice. 1764 $from = int(($_*@{$posixes{'word'}})/10); 1765 $to = $from + int(@{$posixes{'word'}}/10); 1766 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1767 $lesser = join('', @{$posixes{'word'}}[$from..$to]); 1768 # Select a slice one character on. 1769 $from++; $to++; 1770 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1771 $greater = join('', @{$posixes{'word'}}[$from..$to]); 1772 if ($is_utf8_locale) { 1773 use locale ':not_characters'; 1774 ($yes, $no, $sign) = ($lesser lt $greater 1775 ? (" ", "not ", 1) 1776 : ("not ", " ", -1)); 1777 } 1778 else { 1779 use locale; 1780 ($yes, $no, $sign) = ($lesser lt $greater 1781 ? (" ", "not ", 1) 1782 : ("not ", " ", -1)); 1783 } 1784 # all these tests should FAIL (return 0). Exact lt or gt cannot 1785 # be tested because in some locales, say, eacute and E may test 1786 # equal. 1787 @test = 1788 ( 1789 $no.' ($lesser le $greater)', # 1 1790 'not ($lesser ne $greater)', # 2 1791 ' ($lesser eq $greater)', # 3 1792 $yes.' ($lesser ge $greater)', # 4 1793 $yes.' ($lesser ge $greater)', # 5 1794 $yes.' ($greater le $lesser )', # 7 1795 'not ($greater ne $lesser )', # 8 1796 ' ($greater eq $lesser )', # 9 1797 $no.' ($greater ge $lesser )', # 10 1798 'not (($lesser cmp $greater) == -($sign))' # 11 1799 ); 1800 @test{@test} = 0 x @test; 1801 $test = 0; 1802 for my $ti (@test) { 1803 if ($is_utf8_locale) { 1804 use locale ':not_characters'; 1805 $test{$ti} = eval $ti; 1806 } 1807 else { 1808 # Already in 'use locale'; 1809 $test{$ti} = eval $ti; 1810 } 1811 $test ||= $test{$ti} 1812 } 1813 report_result($Locale, $locales_test_number, $test == 0); 1814 if ($test) { 1815 debug "lesser = '$lesser'\n"; 1816 debug "greater = '$greater'\n"; 1817 debug "lesser cmp greater = ", 1818 $lesser cmp $greater, "\n"; 1819 debug "greater cmp lesser = ", 1820 $greater cmp $lesser, "\n"; 1821 debug "(greater) from = $from, to = $to\n"; 1822 for my $ti (@test) { 1823 debugf("# %-40s %-4s", $ti, 1824 $test{$ti} ? 'FAIL' : 'ok'); 1825 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { 1826 debugf("(%s == %4d)", $1, eval $1); 1827 } 1828 debugf("\n#"); 1829 } 1830 1831 last; 1832 } 1833 } 1834 1835 use locale; 1836 1837 my @sorted_controls; 1838 1839 ++$locales_test_number; 1840 $test_names{$locales_test_number} 1841 = 'Skip in locales where there are no controls;' 1842 . ' otherwise verify that \0 sorts before any (other) control'; 1843 if (! $posixes{'cntrl'}) { 1844 report_result($Locale, $locales_test_number, 1); 1845 1846 # We use all code points for the tests below since there aren't 1847 # any controls 1848 push @sorted_controls, chr $_ for 1..255; 1849 @sorted_controls = sort @sorted_controls; 1850 } 1851 else { 1852 @sorted_controls = @{$posixes{'cntrl'}}; 1853 push @sorted_controls, "\0", 1854 unless grep { $_ eq "\0" } @sorted_controls; 1855 @sorted_controls = sort @sorted_controls; 1856 my $output = ""; 1857 for my $control (@sorted_controls) { 1858 $output .= " " . disp_chars($control); 1859 } 1860 debug "sorted :cntrl: (plus NUL) = $output\n"; 1861 my $ok = $sorted_controls[0] eq "\0"; 1862 report_result($Locale, $locales_test_number, $ok); 1863 1864 shift @sorted_controls if $ok; 1865 } 1866 1867 my $lowest_control = $sorted_controls[0]; 1868 1869 ++$locales_test_number; 1870 $test_names{$locales_test_number} 1871 = 'Skip in locales where all controls have primary sorting weight; ' 1872 . 'otherwise verify that \0 doesn\'t have primary sorting weight'; 1873 if ("a${lowest_control}c" lt "ab") { 1874 report_result($Locale, $locales_test_number, 1); 1875 } 1876 else { 1877 my $ok = "ab" lt "a\0c"; 1878 report_result($Locale, $locales_test_number, $ok); 1879 } 1880 1881 ++$locales_test_number; 1882 $test_names{$locales_test_number} 1883 = 'Verify that strings with embedded NUL collate'; 1884 my $ok = "a\0a\0a" lt "a${lowest_control}a${lowest_control}a"; 1885 report_result($Locale, $locales_test_number, $ok); 1886 1887 ++$locales_test_number; 1888 $test_names{$locales_test_number} 1889 = 'Verify that strings with embedded NUL and ' 1890 . 'extra trailing NUL collate'; 1891 $ok = "a\0a\0" lt "a${lowest_control}a${lowest_control}"; 1892 report_result($Locale, $locales_test_number, $ok); 1893 1894 ++$locales_test_number; 1895 $test_names{$locales_test_number} 1896 = 'Verify that empty strings collate'; 1897 $ok = "" le ""; 1898 report_result($Locale, $locales_test_number, $ok); 1899 1900 ++$locales_test_number; 1901 $test_names{$locales_test_number} 1902 = "Skip in non-UTF-8 locales; otherwise verify that UTF8ness " 1903 . "doesn't matter with collation"; 1904 if (! $is_utf8_locale) { 1905 report_result($Locale, $locales_test_number, 1); 1906 } 1907 else { 1908 1909 # khw can't think of anything better. Start with a string that is 1910 # higher than its UTF-8 representation in both EBCDIC and ASCII 1911 my $string = chr utf8::unicode_to_native(0xff); 1912 my $utf8_string = $string; 1913 utf8::upgrade($utf8_string); 1914 1915 # 8 should be lt 9 in all locales (except ones that aren't 1916 # ASCII-based, which might fail this) 1917 $ok = ("a${string}8") lt ("a${utf8_string}9"); 1918 report_result($Locale, $locales_test_number, $ok); 1919 } 1920 1921 ++$locales_test_number; 1922 $test_names{$locales_test_number} 1923 = "Skip in UTF-8 locales; otherwise verify that single byte " 1924 . "collates before 0x100 and above"; 1925 if ($is_utf8_locale) { 1926 report_result($Locale, $locales_test_number, 1); 1927 } 1928 else { 1929 my $max_collating = chr 0; # Find byte that collates highest 1930 for my $i (0 .. 255) { 1931 my $char = chr $i; 1932 $max_collating = $char if $char gt $max_collating; 1933 } 1934 $ok = $max_collating lt chr 0x100; 1935 report_result($Locale, $locales_test_number, $ok); 1936 } 1937 1938 ++$locales_test_number; 1939 $test_names{$locales_test_number} 1940 = "Skip in UTF-8 locales; otherwise verify that 0x100 and " 1941 . "above collate in code point order"; 1942 if ($is_utf8_locale) { 1943 report_result($Locale, $locales_test_number, 1); 1944 } 1945 else { 1946 $ok = chr 0x100 lt chr 0x101; 1947 report_result($Locale, $locales_test_number, $ok); 1948 } 1949 } 1950 1951 my $ok1; 1952 my $ok2; 1953 my $ok3; 1954 my $ok4; 1955 my $ok5; 1956 my $ok6; 1957 my $ok7; 1958 my $ok8; 1959 my $ok9; 1960 my $ok10; 1961 my $ok11; 1962 my $ok12; 1963 my $ok13; 1964 my $ok14; 1965 my $ok14_5; 1966 my $ok15; 1967 my $ok16; 1968 my $ok17; 1969 my $ok18; 1970 my $ok19; 1971 my $ok20; 1972 my $ok21; 1973 1974 my $c; 1975 my $d; 1976 my $e; 1977 my $f; 1978 my $g; 1979 my $h; 1980 my $i; 1981 my $j; 1982 1983 if (! $is_utf8_locale) { 1984 use locale; 1985 1986 my ($x, $y) = (1.23, 1.23); 1987 1988 $a = "$x"; 1989 printf ''; # printf used to reset locale to "C" 1990 $b = "$y"; 1991 $ok1 = $a eq $b; 1992 1993 $c = "$x"; 1994 my $z = sprintf ''; # sprintf used to reset locale to "C" 1995 $d = "$y"; 1996 $ok2 = $c eq $d; 1997 { 1998 1999 use warnings; 2000 my $w = 0; 2001 local $SIG{__WARN__} = 2002 sub { 2003 print "# @_\n"; 2004 $w++; 2005 }; 2006 2007 # The == (among other ops) used to warn for locales 2008 # that had something else than "." as the radix character. 2009 2010 $ok3 = $c == 1.23; 2011 $ok4 = $c == $x; 2012 $ok5 = $c == $d; 2013 { 2014 no locale; 2015 2016 $e = "$x"; 2017 2018 $ok6 = $e == 1.23; 2019 $ok7 = $e == $x; 2020 $ok8 = $e == $c; 2021 } 2022 2023 $f = "1.23"; 2024 $g = 2.34; 2025 $h = 1.5; 2026 $i = 1.25; 2027 $j = "$h:$i"; 2028 2029 $ok9 = $f == 1.23; 2030 $ok10 = $f == $x; 2031 $ok11 = $f == $c; 2032 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2033 $ok13 = $w == 0; 2034 $ok14 = $ok14_5 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales 2035 } 2036 { 2037 no locale; 2038 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2039 } 2040 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2041 } 2042 else { 2043 use locale ':not_characters'; 2044 2045 my ($x, $y) = (1.23, 1.23); 2046 $a = "$x"; 2047 printf ''; # printf used to reset locale to "C" 2048 $b = "$y"; 2049 $ok1 = $a eq $b; 2050 2051 $c = "$x"; 2052 my $z = sprintf ''; # sprintf used to reset locale to "C" 2053 $d = "$y"; 2054 $ok2 = $c eq $d; 2055 { 2056 use warnings; 2057 my $w = 0; 2058 local $SIG{__WARN__} = 2059 sub { 2060 print "# @_\n"; 2061 $w++; 2062 }; 2063 $ok3 = $c == 1.23; 2064 $ok4 = $c == $x; 2065 $ok5 = $c == $d; 2066 { 2067 no locale; 2068 $e = "$x"; 2069 2070 $ok6 = $e == 1.23; 2071 $ok7 = $e == $x; 2072 $ok8 = $e == $c; 2073 } 2074 2075 $f = "1.23"; 2076 $g = 2.34; 2077 $h = 1.5; 2078 $i = 1.25; 2079 $j = "$h:$i"; 2080 2081 $ok9 = $f == 1.23; 2082 $ok10 = $f == $x; 2083 $ok11 = $f == $c; 2084 $ok12 = abs(($f + $g) - 3.57) < 0.01; 2085 $ok13 = $w == 0; 2086 2087 # Look for non-ASCII error messages, and verify that the first 2088 # such is in UTF-8 (the others almost certainly will be like the 2089 # first). This is only done if the current locale has LC_MESSAGES 2090 $ok14 = 1; 2091 $ok14_5 = 1; 2092 if ( locales_enabled('LC_MESSAGES') 2093 && setlocale(&POSIX::LC_MESSAGES, $Locale)) 2094 { 2095 foreach my $err (keys %!) { 2096 use Errno; 2097 $! = eval "&Errno::$err"; # Convert to strerror() output 2098 my $errnum = 0+$!; 2099 my $strerror = "$!"; 2100 if ("$strerror" =~ /\P{ASCII}/) { 2101 $ok14 = utf8::is_utf8($strerror); 2102 no locale; 2103 $ok14_5 = "$!" !~ /\P{ASCII}/; 2104 debug( disp_str( 2105 "non-ASCII \$! for error $errnum='$strerror'")) 2106 if ! $ok14_5; 2107 last; 2108 } 2109 } 2110 } 2111 2112 # Similarly, we verify that a non-ASCII radix is in UTF-8. This 2113 # also catches if there is a disparity between sprintf and 2114 # stringification. 2115 2116 my $string_g = "$g"; 2117 my $sprintf_g = sprintf("%g", $g); 2118 2119 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); 2120 $ok16 = $sprintf_g eq $string_g; 2121 } 2122 { 2123 no locale; 2124 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 2125 } 2126 $ok18 = $j eq sprintf("%g:%g", $h, $i); 2127 } 2128 2129 $ok19 = $ok20 = 1; 2130 if (setlocale(&POSIX::LC_TIME, $Locale)) { # These tests aren't affected by 2131 # :not_characters 2132 my @times = CORE::localtime(); 2133 2134 use locale; 2135 $ok19 = POSIX::strftime("%p", @times) ne "%p"; # [perl #119425] 2136 my $date = POSIX::strftime("'%A' '%B' '%Z' '%p'", @times); 2137 debug("'Day' 'Month' 'TZ' 'am/pm' = ", disp_str($date)); 2138 2139 # If there is any non-ascii, it better be UTF-8 in a UTF-8 locale, and 2140 # not UTF-8 if the locale isn't UTF-8. 2141 $ok20 = $date =~ / ^ \p{ASCII}+ $ /x 2142 || $is_utf8_locale == utf8::is_utf8($date); 2143 } 2144 2145 $ok21 = 1; 2146 if (locales_enabled('LC_MESSAGES')) { 2147 foreach my $err (keys %!) { 2148 no locale; 2149 use Errno; 2150 $! = eval "&Errno::$err"; # Convert to strerror() output 2151 my $strerror = "$!"; 2152 if ($strerror =~ /\P{ASCII}/) { 2153 $ok21 = 0; 2154 debug(disp_str("non-ASCII strerror=$strerror")); 2155 last; 2156 } 2157 } 2158 } 2159 2160 report_result($Locale, ++$locales_test_number, $ok1); 2161 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; 2162 my $first_a_test = $locales_test_number; 2163 2164 debug "$first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; 2165 2166 report_result($Locale, ++$locales_test_number, $ok2); 2167 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; 2168 2169 my $first_c_test = $locales_test_number; 2170 2171 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; 2172 report_result($Locale, $locales_test_number, $ok3); 2173 $problematical_tests{$locales_test_number} = 1; 2174 2175 $test_names{++$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; 2176 report_result($Locale, $locales_test_number, $ok4); 2177 $problematical_tests{$locales_test_number} = 1; 2178 2179 report_result($Locale, ++$locales_test_number, $ok5); 2180 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; 2181 $problematical_tests{$locales_test_number} = 1; 2182 2183 debug "$first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; 2184 2185 report_result($Locale, ++$locales_test_number, $ok6); 2186 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; 2187 my $first_e_test = $locales_test_number; 2188 2189 report_result($Locale, ++$locales_test_number, $ok7); 2190 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; 2191 2192 $test_names{++$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; 2193 report_result($Locale, $locales_test_number, $ok8); 2194 $problematical_tests{$locales_test_number} = 1; 2195 2196 debug "$first_e_test..$locales_test_number: \$e = $e, no locale\n"; 2197 2198 report_result($Locale, ++$locales_test_number, $ok9); 2199 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; 2200 $problematical_tests{$locales_test_number} = 1; 2201 my $first_f_test = $locales_test_number; 2202 2203 report_result($Locale, ++$locales_test_number, $ok10); 2204 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; 2205 $problematical_tests{$locales_test_number} = 1; 2206 2207 $test_names{++$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; 2208 report_result($Locale, $locales_test_number, $ok11); 2209 $problematical_tests{$locales_test_number} = 1; 2210 2211 report_result($Locale, ++$locales_test_number, $ok12); 2212 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; 2213 $problematical_tests{$locales_test_number} = 1; 2214 2215 report_result($Locale, ++$locales_test_number, $ok13); 2216 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; 2217 $problematical_tests{$locales_test_number} = 1; 2218 2219 report_result($Locale, ++$locales_test_number, $ok14); 2220 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are in UTF-8'; 2221 2222 report_result($Locale, ++$locales_test_number, $ok14_5); 2223 $test_names{$locales_test_number} = '... and are ASCII outside "use locale"'; 2224 2225 report_result($Locale, ++$locales_test_number, $ok15); 2226 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; 2227 $problematical_tests{$locales_test_number} = 1; 2228 2229 report_result($Locale, ++$locales_test_number, $ok16); 2230 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; 2231 $problematical_tests{$locales_test_number} = 1; 2232 2233 report_result($Locale, ++$locales_test_number, $ok17); 2234 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; 2235 2236 report_result($Locale, ++$locales_test_number, $ok18); 2237 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; 2238 $problematical_tests{$locales_test_number} = 1; 2239 2240 report_result($Locale, ++$locales_test_number, $ok19); 2241 $test_names{$locales_test_number} = 'Verify that strftime doesn\'t return "%p" in locales where %p is empty'; 2242 2243 report_result($Locale, ++$locales_test_number, $ok20); 2244 $test_names{$locales_test_number} = 'Verify that strftime returns date with UTF-8 flag appropriately set'; 2245 $problematical_tests{$locales_test_number} = 1; # This is broken in 2246 # OS X 10.9.3 2247 2248 report_result($Locale, ++$locales_test_number, $ok21); 2249 $test_names{$locales_test_number} = '"$!" is ASCII-only outside of locale scope'; 2250 2251 debug "$first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; 2252 2253 # Does taking lc separately differ from taking 2254 # the lc "in-line"? (This was the bug 19990704.002 (#965), change #3568.) 2255 # The bug was in the caching of the 'o'-magic. 2256 if (! $is_utf8_locale) { 2257 use locale; 2258 2259 sub lcA { 2260 my $lc0 = lc $_[0]; 2261 my $lc1 = lc $_[1]; 2262 return $lc0 cmp $lc1; 2263 } 2264 2265 sub lcB { 2266 return lc($_[0]) cmp lc($_[1]); 2267 } 2268 2269 my $x = "ab"; 2270 my $y = "aa"; 2271 my $z = "AB"; 2272 2273 report_result($Locale, ++$locales_test_number, 2274 lcA($x, $y) == 1 && lcB($x, $y) == 1 || 2275 lcA($x, $z) == 0 && lcB($x, $z) == 0); 2276 } 2277 else { 2278 use locale ':not_characters'; 2279 2280 sub lcC { 2281 my $lc0 = lc $_[0]; 2282 my $lc1 = lc $_[1]; 2283 return $lc0 cmp $lc1; 2284 } 2285 2286 sub lcD { 2287 return lc($_[0]) cmp lc($_[1]); 2288 } 2289 2290 my $x = "ab"; 2291 my $y = "aa"; 2292 my $z = "AB"; 2293 2294 report_result($Locale, ++$locales_test_number, 2295 lcC($x, $y) == 1 && lcD($x, $y) == 1 || 2296 lcC($x, $z) == 0 && lcD($x, $z) == 0); 2297 } 2298 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; 2299 2300 # Does lc of an UPPER (if different from the UPPER) match 2301 # case-insensitively the UPPER, and does the UPPER match 2302 # case-insensitively the lc of the UPPER. And vice versa. 2303 { 2304 use locale; 2305 no utf8; 2306 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; 2307 2308 my @f = (); 2309 ++$locales_test_number; 2310 $test_names{$locales_test_number} = 'Verify case insensitive matching works'; 2311 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 2312 if (! $is_utf8_locale) { 2313 my $y = lc $x; 2314 next unless uc $y eq $x; 2315 debug_more( "UPPER=", disp_chars(($x)), 2316 "; lc=", disp_chars(($y)), "; ", 2317 "; fc=", disp_chars((fc $x)), "; ", 2318 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2319 $x =~ /\Q$y/i ? 1 : 0, 2320 "; ", 2321 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2322 $y =~ /\Q$x/i ? 1 : 0, 2323 "\n"); 2324 # 2325 # If $x and $y contain regular expression characters 2326 # AND THEY lowercase (/i) to regular expression characters, 2327 # regcomp() will be mightily confused. No, the \Q doesn't 2328 # help here (maybe regex engine internal lowercasing 2329 # is done after the \Q?) An example of this happening is 2330 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): 2331 # the chr(173) (the "[") is the lowercase of the chr(235). 2332 # 2333 # Similarly losing EBCDIC locales include cs_cz, cs_CZ, 2334 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), 2335 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, 2336 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, 2337 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, 2338 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. 2339 # 2340 # Similar things can happen even under (bastardised) 2341 # non-EBCDIC locales: in many European countries before the 2342 # advent of ISO 8859-x nationally customised versions of 2343 # ISO 646 were devised, reusing certain punctuation 2344 # characters for modified characters needed by the 2345 # country/language. For example, the "|" might have 2346 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. 2347 # 2348 if ($x =~ $re || $y =~ $re) { 2349 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2350 next; 2351 } 2352 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2353 2354 # fc is not a locale concept, so Perl uses lc for it. 2355 push @f, $x unless lc $x eq fc $x; 2356 } 2357 else { 2358 use locale ':not_characters'; 2359 my $y = lc $x; 2360 next unless uc $y eq $x; 2361 debug_more( "UPPER=", disp_chars(($x)), 2362 "; lc=", disp_chars(($y)), "; ", 2363 "; fc=", disp_chars((fc $x)), "; ", 2364 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2365 $x =~ /\Q$y/i ? 1 : 0, 2366 "; ", 2367 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2368 $y =~ /\Q$x/i ? 1 : 0, 2369 "\n"); 2370 2371 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2372 2373 # The places where Unicode's lc is different from fc are 2374 # skipped here by virtue of the 'next unless uc...' line above 2375 push @f, $x unless lc $x eq fc $x; 2376 } 2377 } 2378 2379 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 2380 if (! $is_utf8_locale) { 2381 my $y = uc $x; 2382 next unless lc $y eq $x; 2383 debug_more( "lower=", disp_chars(($x)), 2384 "; uc=", disp_chars(($y)), "; ", 2385 "; fc=", disp_chars((fc $x)), "; ", 2386 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2387 $x =~ /\Q$y/i ? 1 : 0, 2388 "; ", 2389 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2390 $y =~ /\Q$x/i ? 1 : 0, 2391 "\n"); 2392 if ($x =~ $re || $y =~ $re) { # See above. 2393 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 2394 next; 2395 } 2396 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2397 2398 push @f, $x unless lc $x eq fc $x; 2399 } 2400 else { 2401 use locale ':not_characters'; 2402 my $y = uc $x; 2403 next unless lc $y eq $x; 2404 debug_more( "lower=", disp_chars(($x)), 2405 "; uc=", disp_chars(($y)), "; ", 2406 "; fc=", disp_chars((fc $x)), "; ", 2407 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 2408 $x =~ /\Q$y/i ? 1 : 0, 2409 "; ", 2410 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 2411 $y =~ /\Q$x/i ? 1 : 0, 2412 "\n"); 2413 push @f, $x unless $x =~ /\Q$y/i && $y =~ /\Q$x/i; 2414 2415 push @f, $x unless lc $x eq fc $x; 2416 } 2417 } 2418 report_multi_result($Locale, $locales_test_number, \@f); 2419 $problematical_tests{$locales_test_number} = 1; 2420 } 2421 2422 # [perl #109318] 2423 { 2424 my @f = (); 2425 ++$locales_test_number; 2426 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; 2427 $problematical_tests{$locales_test_number} = 1; 2428 2429 my $radix = POSIX::localeconv()->{decimal_point}; 2430 my @nums = ( 2431 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", 2432 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", 2433 ); 2434 2435 if (! $is_utf8_locale) { 2436 use locale; 2437 for my $num (@nums) { 2438 push @f, $num 2439 unless sprintf("%g", $num) =~ /3.+14/; 2440 } 2441 } 2442 else { 2443 use locale ':not_characters'; 2444 for my $num (@nums) { 2445 push @f, $num 2446 unless sprintf("%g", $num) =~ /3.+14/; 2447 } 2448 } 2449 2450 report_result($Locale, $locales_test_number, @f == 0); 2451 if (@f) { 2452 print "# failed $locales_test_number locale '$Locale' numbers @f\n" 2453 } 2454 } 2455} 2456 2457my $final_locales_test_number = $locales_test_number; 2458 2459# Recount the errors. 2460 2461TEST_NUM: 2462foreach $test_num ($first_locales_test_number..$final_locales_test_number) { 2463 my $has_non_global_failure = $Problem{$test_num} 2464 || ! defined $Okay{$test_num} 2465 || ! @{$Okay{$test_num}}; 2466 print "not " if %setlocale_failed || $has_non_global_failure; 2467 print "ok $test_num"; 2468 $test_names{$test_num} = "" unless defined $test_names{$test_num}; 2469 2470 # If TODO is in the test name, make it thus 2471 my $todo = $test_names{$test_num} =~ s/\s*TODO\s*//; 2472 print " $test_names{$test_num}"; 2473 if ($todo) { 2474 print " # TODO\n"; 2475 } 2476 elsif (%setlocale_failed || ! $has_non_global_failure) { 2477 print "\n"; 2478 } 2479 elsif ($has_non_global_failure) { 2480 2481 # If there are any locales that pass this test, or are known-bad, it 2482 # may be that there are enough passes that we TODO the failure, but 2483 # only for tests that we have decided can be problematical. 2484 if ( ($Okay{$test_num} || $Known_bad_locale{$test_num}) 2485 && grep { $_ == $test_num } keys %problematical_tests) 2486 { 2487 # Don't count the known-bad failures when calculating the 2488 # percentage that fail. 2489 my $known_failures = (exists $Known_bad_locale{$test_num}) 2490 ? scalar(keys $Known_bad_locale{$test_num}->%*) 2491 : 0; 2492 my $adjusted_failures = scalar(keys $Problem{$test_num}->%*) 2493 - $known_failures; 2494 2495 # Specially handle failures where only known-bad locales fail. 2496 # This makes the diagnositics clearer. 2497 if ($adjusted_failures <= 0) { 2498 print " # TODO fails only on known bad locales: ", 2499 join " ", keys $Known_bad_locale{$test_num}->%*, "\n"; 2500 next TEST_NUM; 2501 } 2502 2503 # Round to nearest .1% 2504 my $percent_fail = (int(.5 + (1000 * $adjusted_failures 2505 / scalar(@Locale)))) 2506 / 10; 2507 $todo = $percent_fail < $acceptable_failure_percentage; 2508 print " # TODO" if $todo; 2509 print "\n"; 2510 2511 if ($debug) { 2512 print "# $percent_fail% of locales (", 2513 scalar(keys $Problem{$test_num}->%*), 2514 " of ", 2515 scalar(@Locale), 2516 ") fail the above test (TODO cut-off is ", 2517 $acceptable_failure_percentage, 2518 "%)\n"; 2519 } 2520 elsif ($todo) { 2521 print "# ", 100 - $percent_fail, "% of locales not known to be problematic on this platform\n"; 2522 print "# pass the above test, so it is likely that the failures\n"; 2523 print "# are errors in the locale definitions. The test is marked TODO, as the\n"; 2524 print "# problem is not likely to be Perl's\n"; 2525 } 2526 } 2527 2528 if ($debug) { 2529 print "# The code points that had this failure are given above. Look for lines\n"; 2530 print "# that match 'failed $test_num'\n"; 2531 } 2532 else { 2533 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2534 print "# Then look at that output for lines that match 'failed $test_num'\n"; 2535 } 2536 if (defined $not_necessarily_a_problem_test_number 2537 && $test_num == $not_necessarily_a_problem_test_number) 2538 { 2539 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; 2540 print "# It usually indicates a problem in the environment,\n"; 2541 print "# not in Perl itself.\n"; 2542 } 2543 } 2544} 2545 2546$test_num = $final_locales_test_number; 2547 2548if ( ! defined $Config{d_setlocale_accepts_any_locale_name}) { 2549 # perl #115808 2550 use warnings; 2551 my $warned = 0; 2552 local $SIG{__WARN__} = sub { 2553 $warned = $_[0] =~ /uninitialized/; 2554 }; 2555 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); 2556 ok($warned, "variable set to setlocale(\"invalid locale name\") is considered uninitialized"); 2557} 2558 2559# Test that tainting and case changing works on utf8 strings. These tests are 2560# placed last to avoid disturbing the hard-coded test numbers that existed at 2561# the time these were added above this in this file. 2562# This also tests that locale overrides unicode_strings in the same scope for 2563# non-utf8 strings. 2564setlocale(&POSIX::LC_ALL, "C"); 2565{ 2566 use locale; 2567 use feature 'unicode_strings'; 2568 2569 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { 2570 my @list; # List of code points to test for $function 2571 2572 # Used to calculate the changed case for ASCII characters by using the 2573 # ord, instead of using one of the functions under test. 2574 my $ascii_case_change_delta; 2575 my $above_latin1_case_change_delta; # Same for the specific ords > 255 2576 # that we use 2577 2578 # We test an ASCII character, which should change case; 2579 # a Latin1 character, which shouldn't change case under this C locale, 2580 # an above-Latin1 character that when the case is changed would cross 2581 # the 255/256 boundary, so doesn't change case 2582 # (the \x{149} is one of these, but changes into 2 characters, the 2583 # first one of which doesn't cross the boundary. 2584 # the final one in each list is an above-Latin1 character whose case 2585 # does change. The code below uses its position in its list as a 2586 # marker to indicate that it, unlike the other code points above 2587 # ASCII, has a successful case change 2588 # 2589 # All casing operations under locale (but not :not_characters) should 2590 # taint 2591 if ($function =~ /^u/) { 2592 @list = ("", "a", 2593 chr(utf8::unicode_to_native(0xe0)), 2594 chr(utf8::unicode_to_native(0xff)), 2595 "\x{fb00}", "\x{149}", "\x{101}"); 2596 $ascii_case_change_delta = ($is_ebcdic) ? +64 : -32; 2597 $above_latin1_case_change_delta = -1; 2598 } 2599 else { 2600 @list = ("", "A", 2601 chr(utf8::unicode_to_native(0xC0)), 2602 "\x{17F}", "\x{100}"); 2603 $ascii_case_change_delta = ($is_ebcdic) ? -64 : +32; 2604 $above_latin1_case_change_delta = +1; 2605 } 2606 foreach my $is_utf8_locale (0 .. 1) { 2607 foreach my $j (0 .. $#list) { 2608 my $char = $list[$j]; 2609 2610 for my $encoded_in_utf8 (0 .. 1) { 2611 my $should_be; 2612 my $changed; 2613 if (! $is_utf8_locale) { 2614 no warnings 'locale'; 2615 $should_be = ($j == $#list) 2616 ? chr(ord($char) + $above_latin1_case_change_delta) 2617 : (length $char == 0 || utf8::native_to_unicode(ord($char)) > 127) 2618 ? $char 2619 : chr(ord($char) + $ascii_case_change_delta); 2620 2621 # This monstrosity is in order to avoid using an eval, 2622 # which might perturb the results 2623 $changed = ($function eq "uc") 2624 ? uc($char) 2625 : ($function eq "ucfirst") 2626 ? ucfirst($char) 2627 : ($function eq "lc") 2628 ? lc($char) 2629 : ($function eq "lcfirst") 2630 ? lcfirst($char) 2631 : ($function eq "fc") 2632 ? fc($char) 2633 : die("Unexpected function \"$function\""); 2634 } 2635 else { 2636 { 2637 no locale; 2638 2639 # For utf8-locales the case changing functions 2640 # should work just like they do outside of locale. 2641 # Can use eval here because not testing it when 2642 # not in locale. 2643 $should_be = eval "$function('$char')"; 2644 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; 2645 2646 } 2647 use locale ':not_characters'; 2648 $changed = ($function eq "uc") 2649 ? uc($char) 2650 : ($function eq "ucfirst") 2651 ? ucfirst($char) 2652 : ($function eq "lc") 2653 ? lc($char) 2654 : ($function eq "lcfirst") 2655 ? lcfirst($char) 2656 : ($function eq "fc") 2657 ? fc($char) 2658 : die("Unexpected function \"$function\""); 2659 } 2660 ok($changed eq $should_be, 2661 "$function(\"$char\") in C locale " 2662 . (($is_utf8_locale) 2663 ? "(use locale ':not_characters'" 2664 : "(use locale") 2665 . (($encoded_in_utf8) 2666 ? "; encoded in utf8)" 2667 : "; not encoded in utf8)") 2668 . " should be \"$should_be\", got \"$changed\""); 2669 2670 # Tainting shouldn't happen for use locale :not_character 2671 # (a utf8 locale) 2672 (! $is_utf8_locale) 2673 ? check_taint($changed) 2674 : check_taint_not($changed); 2675 2676 # Use UTF-8 next time through the loop 2677 utf8::upgrade($char); 2678 } 2679 } 2680 } 2681 } 2682} 2683 2684# Give final advice. 2685 2686my $didwarn = 0; 2687 2688foreach ($first_locales_test_number..$final_locales_test_number) { 2689 if ($Problem{$_}) { 2690 my @f = sort keys %{ $Problem{$_} }; 2691 2692 # Don't list the failures caused by known-bad locales. 2693 if (exists $known_bad_locales{$os}) { 2694 @f = grep { $_ !~ $known_bad_locales{$os} } @f; 2695 next unless @f; 2696 } 2697 my $f = join(" ", @f); 2698 $f =~ s/(.{50,60}) /$1\n#\t/g; 2699 print 2700 "#\n", 2701 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", 2702 "#\t", $f, "\n#\n", 2703 "# on your system may have errors because the locale test $_\n", 2704 "# \"$test_names{$_}\"\n", 2705 "# failed in ", (@f == 1 ? "that locale" : "those locales"), 2706 ".\n"; 2707 print <<EOW; 2708# 2709# If your users are not using these locales you are safe for the moment, 2710# but please report this failure first to perlbug\@perl.org using the 2711# perlbug script (as described in the INSTALL file) so that the exact 2712# details of the failures can be sorted out first and then your operating 2713# system supplier can be alerted about these anomalies. 2714# 2715EOW 2716 $didwarn = 1; 2717 } 2718} 2719 2720# Tell which locales were okay and which were not. 2721 2722if ($didwarn) { 2723 my (@s, @F); 2724 2725 foreach my $l (@Locale) { 2726 my $p = 0; 2727 if ($setlocale_failed{$l}) { 2728 $p++; 2729 } 2730 else { 2731 foreach my $t 2732 ($first_locales_test_number..$final_locales_test_number) 2733 { 2734 $p++ if $Problem{$t}{$l}; 2735 } 2736 } 2737 push @s, $l if $p == 0; 2738 push @F, $l unless $p == 0; 2739 } 2740 2741 if (@s) { 2742 my $s = join(" ", @s); 2743 $s =~ s/(.{50,60}) /$1\n#\t/g; 2744 2745 print 2746 "# The following locales\n#\n", 2747 "#\t", $s, "\n#\n", 2748 "# tested okay.\n#\n", 2749 } else { 2750 print "# None of your locales were fully okay.\n"; 2751 } 2752 2753 if (@F) { 2754 my $F = join(" ", @F); 2755 $F =~ s/(.{50,60}) /$1\n#\t/g; 2756 2757 my $details = ""; 2758 unless ($debug) { 2759 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2760 } 2761 elsif ($debug == 1) { 2762 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; 2763 } 2764 2765 print 2766 "# The following locales\n#\n", 2767 "#\t", $F, "\n#\n", 2768 "# had problems.\n#\n", 2769 $details; 2770 } else { 2771 print "# None of your locales were broken.\n"; 2772 } 2773} 2774 2775if (exists $known_bad_locales{$os} && ! %Known_bad_locale) { 2776 $test_num++; 2777 print "ok $test_num $^O no longer has known bad locales # TODO\n"; 2778} 2779 2780print "1..$test_num\n"; 2781 2782# eof 2783