1#!./perl -wT 2 3# This tests plain 'use locale' and adorned 'use locale ":not_characters"' 4# Because these pragmas are compile time, and I (khw) am trying to test 5# without using 'eval' as much as possible, which might cloud the issue, the 6# crucial parts of the code are duplicated in a block for each pragma. 7 8# To make a TODO test, add the string 'TODO' to its %test_names value 9 10binmode STDOUT, ':utf8'; 11binmode STDERR, ':utf8'; 12 13BEGIN { 14 chdir 't' if -d 't'; 15 @INC = '../lib'; 16 unshift @INC, '.'; 17 require Config; import Config; 18 if (!$Config{d_setlocale} || $Config{ccflags} =~ /\bD?NO_LOCALE(_|\b)/) { 19 print "1..0\n"; 20 exit; 21 } 22 require './loc_tools.pl'; 23 $| = 1; 24} 25 26use strict; 27use feature 'fc'; 28 29# =1 adds debugging output; =2 increases the verbosity somewhat 30my $debug = $ENV{PERL_DEBUG_FULL_TEST} // 0; 31 32# Certain tests have been shown to be problematical for a few locales. Don't 33# fail them unless at least this percentage of the tested locales fail. 34# Some Windows machines are defective in every locale but the C, calling \t 35# printable; superscripts to be digits, etc. See 36# http://markmail.org/message/5jwam4xsx4amsdnv. Also on AIX machines, many 37# locales call a no-break space a graphic. 38# (There aren't 1000 locales currently in existence, so 99.9 works) 39my $acceptable_failure_percentage = ($^O =~ / ^ ( MSWin32 | AIX ) $ /ix) 40 ? 99.9 41 : 5; 42 43# The list of test numbers of the problematic tests. 44my %problematical_tests; 45 46 47use Dumpvalue; 48 49my $dumper = Dumpvalue->new( 50 tick => qq{"}, 51 quoteHighBit => 0, 52 unctrl => "quote" 53 ); 54sub debug { 55 return unless $debug; 56 my($mess) = join "", @_; 57 chop $mess; 58 print $dumper->stringify($mess,1), "\n"; 59} 60 61sub debug_more { 62 return unless $debug > 1; 63 return debug(@_); 64} 65 66sub debugf { 67 printf @_ if $debug; 68} 69 70$a = 'abc %'; 71 72my $test_num = 0; 73 74sub ok { 75 my ($result, $message) = @_; 76 $message = "" unless defined $message; 77 78 print 'not ' unless ($result); 79 print "ok " . ++$test_num; 80 print " $message"; 81 print "\n"; 82} 83 84# First we'll do a lot of taint checking for locales. 85# This is the easiest to test, actually, as any locale, 86# even the default locale will taint under 'use locale'. 87 88sub is_tainted { # hello, camel two. 89 no warnings 'uninitialized' ; 90 my $dummy; 91 local $@; 92 not eval { $dummy = join("", @_), kill 0; 1 } 93} 94 95sub check_taint ($;$) { 96 my $message_tail = $_[1] // ""; 97 $message_tail = ": $message_tail" if $message_tail; 98 ok is_tainted($_[0]), "verify that is tainted$message_tail"; 99} 100 101sub check_taint_not ($;$) { 102 my $message_tail = $_[1] // ""; 103 $message_tail = ": $message_tail" if $message_tail; 104 ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail"); 105} 106 107"\tb\t" =~ /^m?(\s)(.*)\1$/; 108check_taint_not $&, "not tainted outside 'use locale'"; 109; 110 111use locale; # engage locale and therefore locale taint. 112 113check_taint_not $a, "\t\$a"; 114 115check_taint uc($a); 116check_taint "\U$a"; 117check_taint ucfirst($a); 118check_taint "\u$a"; 119check_taint lc($a); 120check_taint fc($a); 121check_taint "\L$a"; 122check_taint "\F$a"; 123check_taint lcfirst($a); 124check_taint "\l$a"; 125 126check_taint_not sprintf('%e', 123.456); 127check_taint_not sprintf('%f', 123.456); 128check_taint_not sprintf('%g', 123.456); 129check_taint_not sprintf('%d', 123.456); 130check_taint_not sprintf('%x', 123.456); 131 132$_ = $a; # untaint $_ 133 134$_ = uc($a); # taint $_ 135 136check_taint $_, "\t\$_"; 137 138/(\w)/; # taint $&, $`, $', $+, $1. 139check_taint $&, "\t/(\\w)/ \$&"; 140check_taint $`, "\t\$`"; 141check_taint $', "\t\$'"; 142check_taint $+, "\t\$+"; 143check_taint $1, "\t\$1"; 144check_taint_not $2, "\t\$2"; 145 146/(.)/; # untaint $&, $`, $', $+, $1. 147check_taint_not $&, "\t/(.)/ \$&"; 148check_taint_not $`, "\t\$`"; 149check_taint_not $', "\t\$'"; 150check_taint_not $+, "\t\$+"; 151check_taint_not $1, "\t\$1"; 152check_taint_not $2, "\t\$2"; 153 154/(\W)/; # taint $&, $`, $', $+, $1. 155check_taint $&, "\t/(\\W)/ \$&"; 156check_taint $`, "\t\$`"; 157check_taint $', "\t\$'"; 158check_taint $+, "\t\$+"; 159check_taint $1, "\t\$1"; 160check_taint_not $2, "\t\$2"; 161 162/(.)/; # untaint $&, $`, $', $+, $1. 163check_taint_not $&, "\t/(.)/ \$&"; 164check_taint_not $`, "\t\$`"; 165check_taint_not $', "\t\$'"; 166check_taint_not $+, "\t\$+"; 167check_taint_not $1, "\t\$1"; 168check_taint_not $2, "\t\$2"; 169 170/(\s)/; # taint $&, $`, $', $+, $1. 171check_taint $&, "\t/(\\s)/ \$&"; 172check_taint $`, "\t\$`"; 173check_taint $', "\t\$'"; 174check_taint $+, "\t\$+"; 175check_taint $1, "\t\$1"; 176check_taint_not $2, "\t\$2"; 177 178/(.)/; # untaint $&, $`, $', $+, $1. 179check_taint_not $&, "\t/(.)/ \$&"; 180 181/(\S)/; # taint $&, $`, $', $+, $1. 182check_taint $&, "\t/(\\S)/ \$&"; 183check_taint $`, "\t\$`"; 184check_taint $', "\t\$'"; 185check_taint $+, "\t\$+"; 186check_taint $1, "\t\$1"; 187check_taint_not $2, "\t\$2"; 188 189/(.)/; # untaint $&, $`, $', $+, $1. 190check_taint_not $&, "\t/(.)/ \$&"; 191 192"a" =~ /(a)|(\w)/; # taint $&, $`, $', $+, $1. 193check_taint $&, "\t/(a)|(\\w)/ \$&"; 194check_taint $`, "\t\$`"; 195check_taint $', "\t\$'"; 196check_taint $+, "\t\$+"; 197check_taint $1, "\t\$1"; 198ok($1 eq 'a', ("\t" x 4) . "\$1 is 'a'"); 199ok(! defined $2, ("\t" x 4) . "\$2 is undefined"); 200check_taint_not $2, "\t\$2"; 201check_taint_not $3, "\t\$3"; 202 203/(.)/; # untaint $&, $`, $', $+, $1. 204check_taint_not $&, "\t/(.)/ \$&"; 205 206"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i; # no tainting because no locale dependence 207check_taint_not $&, "\t/(\\N{CYRILLIC CAPITAL LETTER A})/i \$&"; 208check_taint_not $`, "\t\$`"; 209check_taint_not $', "\t\$'"; 210check_taint_not $+, "\t\$+"; 211check_taint_not $1, "\t\$1"; 212ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\$1 is 'small cyrillic a'"); 213check_taint_not $2, "\t\$2"; 214 215/(.)/; # untaint $&, $`, $', $+, $1. 216check_taint_not $&, "\t/./ \$&"; 217 218/(.)/; # untaint $&, $`, $', $+, $1. 219check_taint_not $&, "\t/(.)/ \$&"; 220 221"a:" =~ /(.)\b(.)/; # taint $&, $`, $', $+, $1. 222check_taint $&, "\t/(.)\\b(.)/ \$&"; 223check_taint $`, "\t\$`"; 224check_taint $', "\t\$'"; 225check_taint $+, "\t\$+"; 226check_taint $1, "\t\$1"; 227check_taint $2, "\t\$2"; 228check_taint_not $3, "\t\$3"; 229 230/(.)/; # untaint $&, $`, $', $+, $1. 231check_taint_not $&, "\t/./ \$&"; 232 233"aa" =~ /(.)\B(.)/; # taint $&, $`, $', $+, $1. 234check_taint $&, "\t/(.)\\B(.)/ \$&"; 235check_taint $`, "\t\$`"; 236check_taint $', "\t\$'"; 237check_taint $+, "\t\$+"; 238check_taint $1, "\t\$1"; 239check_taint $2, "\t\$2"; 240check_taint_not $3, "\t\$3"; 241 242/(.)/; # untaint $&, $`, $', $+, $1. 243check_taint_not $&, "\t/./ \$&"; 244 245"aaa" =~ /(.).(\1)/i; # notaint because not locale dependent 246check_taint_not $&, "\t/(.).(\\1)/ \$&"; 247check_taint_not $`, "\t\$`"; 248check_taint_not $', "\t\$'"; 249check_taint_not $+, "\t\$+"; 250check_taint_not $1, "\t\$1"; 251check_taint_not $2, "\t\$2"; 252check_taint_not $3, "\t\$3"; 253 254/(.)/; # untaint $&, $`, $', $+, $1. 255check_taint_not $&, "\t/./ \$&"; 256 257$_ = $a; # untaint $_ 258 259check_taint_not $_, "\t\$_"; 260 261/(b)/; # this must not taint 262check_taint_not $&, "\t/(b)/ \$&"; 263check_taint_not $`, "\t\$`"; 264check_taint_not $', "\t\$'"; 265check_taint_not $+, "\t\$+"; 266check_taint_not $1, "\t\$1"; 267check_taint_not $2, "\t\$2"; 268 269$_ = $a; # untaint $_ 270 271check_taint_not $_, "\t\$_"; 272 273$b = uc($a); # taint $b 274s/(.+)/$b/; # this must taint only the $_ 275 276check_taint $_, "\t\$_"; 277check_taint_not $&, "\t\$&"; 278check_taint_not $`, "\t\$`"; 279check_taint_not $', "\t\$'"; 280check_taint_not $+, "\t\$+"; 281check_taint_not $1, "\t\$1"; 282check_taint_not $2, "\t\$2"; 283 284$_ = $a; # untaint $_ 285 286s/(.+)/b/; # this must not taint 287check_taint_not $_, "\t\$_"; 288check_taint_not $&, "\t\$&"; 289check_taint_not $`, "\t\$`"; 290check_taint_not $', "\t\$'"; 291check_taint_not $+, "\t\$+"; 292check_taint_not $1, "\t\$1"; 293check_taint_not $2, "\t\$2"; 294 295$b = $a; # untaint $b 296 297($b = $a) =~ s/\w/$&/; 298check_taint $b, "\t\$b"; # $b should be tainted. 299check_taint_not $a, "\t\$a"; # $a should be not. 300 301$_ = $a; # untaint $_ 302 303s/(\w)/\l$1/; # this must taint 304check_taint $_, "\t\$_"; 305check_taint $&, "\t\$&"; 306check_taint $`, "\t\$`"; 307check_taint $', "\t\$'"; 308check_taint $+, "\t\$+"; 309check_taint $1, "\t\$1"; 310check_taint_not $2, "\t\$2"; 311 312$_ = $a; # untaint $_ 313 314s/(\w)/\L$1/; # this must taint 315check_taint $_, "\t\$_"; 316check_taint $&, "\t\$&"; 317check_taint $`, "\t\$`"; 318check_taint $', "\t\$'"; 319check_taint $+, "\t\$+"; 320check_taint $1, "\t\$1"; 321check_taint_not $2, "\t\$2"; 322 323$_ = $a; # untaint $_ 324 325s/(\w)/\u$1/; # this must taint 326check_taint $_, "\t\$_"; 327check_taint $&, "\t\$&"; 328check_taint $`, "\t\$`"; 329check_taint $', "\t\$'"; 330check_taint $+, "\t\$+"; 331check_taint $1, "\t\$1"; 332check_taint_not $2, "\t\$2"; 333 334$_ = $a; # untaint $_ 335 336s/(\w)/\U$1/; # this must taint 337check_taint $_, "\t\$_"; 338check_taint $&, "\t\$&"; 339check_taint $`, "\t\$`"; 340check_taint $', "\t\$'"; 341check_taint $+, "\t\$+"; 342check_taint $1, "\t\$1"; 343check_taint_not $2, "\t\$2"; 344 345# After all this tainting $a should be cool. 346 347check_taint_not $a, "\t\$a"; 348 349"a" =~ /([a-z])/; 350check_taint_not $1, '"a" =~ /([a-z])/'; 351"foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 352check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 353 354# BE SURE TO COPY ANYTHING YOU ADD to the block below 355 356{ # This is just the previous tests copied here with a different 357 # compile-time pragma. 358 359 use locale ':not_characters'; # engage restricted locale with different 360 # tainting rules 361 362 check_taint_not $a; 363 364 check_taint_not uc($a); 365 check_taint_not "\U$a"; 366 check_taint_not ucfirst($a); 367 check_taint_not "\u$a"; 368 check_taint_not lc($a); 369 check_taint_not fc($a); 370 check_taint_not "\L$a"; 371 check_taint_not "\F$a"; 372 check_taint_not lcfirst($a); 373 check_taint_not "\l$a"; 374 375 check_taint_not sprintf('%e', 123.456); 376 check_taint_not sprintf('%f', 123.456); 377 check_taint_not sprintf('%g', 123.456); 378 check_taint_not sprintf('%d', 123.456); 379 check_taint_not sprintf('%x', 123.456); 380 381 $_ = $a; # untaint $_ 382 383 $_ = uc($a); # taint $_ 384 385 check_taint_not $_; 386 387 /(\w)/; # taint $&, $`, $', $+, $1. 388 check_taint_not $&; 389 check_taint_not $`; 390 check_taint_not $'; 391 check_taint_not $+; 392 check_taint_not $1; 393 check_taint_not $2; 394 395 /(.)/; # untaint $&, $`, $', $+, $1. 396 check_taint_not $&; 397 check_taint_not $`; 398 check_taint_not $'; 399 check_taint_not $+; 400 check_taint_not $1; 401 check_taint_not $2; 402 403 /(\W)/; # taint $&, $`, $', $+, $1. 404 check_taint_not $&; 405 check_taint_not $`; 406 check_taint_not $'; 407 check_taint_not $+; 408 check_taint_not $1; 409 check_taint_not $2; 410 411 /(\s)/; # taint $&, $`, $', $+, $1. 412 check_taint_not $&; 413 check_taint_not $`; 414 check_taint_not $'; 415 check_taint_not $+; 416 check_taint_not $1; 417 check_taint_not $2; 418 419 /(\S)/; # taint $&, $`, $', $+, $1. 420 check_taint_not $&; 421 check_taint_not $`; 422 check_taint_not $'; 423 check_taint_not $+; 424 check_taint_not $1; 425 check_taint_not $2; 426 427 $_ = $a; # untaint $_ 428 429 check_taint_not $_; 430 431 /(b)/; # this must not taint 432 check_taint_not $&; 433 check_taint_not $`; 434 check_taint_not $'; 435 check_taint_not $+; 436 check_taint_not $1; 437 check_taint_not $2; 438 439 $_ = $a; # untaint $_ 440 441 check_taint_not $_; 442 443 $b = uc($a); # taint $b 444 s/(.+)/$b/; # this must taint only the $_ 445 446 check_taint_not $_; 447 check_taint_not $&; 448 check_taint_not $`; 449 check_taint_not $'; 450 check_taint_not $+; 451 check_taint_not $1; 452 check_taint_not $2; 453 454 $_ = $a; # untaint $_ 455 456 s/(.+)/b/; # this must not taint 457 check_taint_not $_; 458 check_taint_not $&; 459 check_taint_not $`; 460 check_taint_not $'; 461 check_taint_not $+; 462 check_taint_not $1; 463 check_taint_not $2; 464 465 $b = $a; # untaint $b 466 467 ($b = $a) =~ s/\w/$&/; 468 check_taint_not $b; # $b should be tainted. 469 check_taint_not $a; # $a should be not. 470 471 $_ = $a; # untaint $_ 472 473 s/(\w)/\l$1/; # this must taint 474 check_taint_not $_; 475 check_taint_not $&; 476 check_taint_not $`; 477 check_taint_not $'; 478 check_taint_not $+; 479 check_taint_not $1; 480 check_taint_not $2; 481 482 $_ = $a; # untaint $_ 483 484 s/(\w)/\L$1/; # this must taint 485 check_taint_not $_; 486 check_taint_not $&; 487 check_taint_not $`; 488 check_taint_not $'; 489 check_taint_not $+; 490 check_taint_not $1; 491 check_taint_not $2; 492 493 $_ = $a; # untaint $_ 494 495 s/(\w)/\u$1/; # this must taint 496 check_taint_not $_; 497 check_taint_not $&; 498 check_taint_not $`; 499 check_taint_not $'; 500 check_taint_not $+; 501 check_taint_not $1; 502 check_taint_not $2; 503 504 $_ = $a; # untaint $_ 505 506 s/(\w)/\U$1/; # this must taint 507 check_taint_not $_; 508 check_taint_not $&; 509 check_taint_not $`; 510 check_taint_not $'; 511 check_taint_not $+; 512 check_taint_not $1; 513 check_taint_not $2; 514 515 # After all this tainting $a should be cool. 516 517 check_taint_not $a; 518 519 "a" =~ /([a-z])/; 520 check_taint_not $1, '"a" =~ /([a-z])/'; 521 "foo.bar_baz" =~ /^(.*)[._](.*?)$/; # Bug 120675 522 check_taint_not $1, '"foo.bar_baz" =~ /^(.*)[._](.*?)$/'; 523} 524 525# Here are in scope of 'use locale' 526 527# I think we've seen quite enough of taint. 528# Let us do some *real* locale work now, 529# unless setlocale() is missing (i.e. minitest). 530 531# The test number before our first setlocale() 532my $final_without_setlocale = $test_num; 533 534# Find locales. 535 536debug "# Scanning for locales...\n"; 537 538require POSIX; import POSIX ':locale_h'; 539 540my @Locale = find_locales([ &POSIX::LC_CTYPE, &POSIX::LC_ALL ]); 541 542debug "# Locales =\n"; 543for ( @Locale ) { 544 debug "# $_\n"; 545} 546 547unless (@Locale) { 548 print "1..$test_num\n"; 549 exit; 550} 551 552 553setlocale(&POSIX::LC_ALL, "C"); 554 555my %posixes; 556 557my %Problem; 558my %Okay; 559my %Testing; 560my @Added_alpha; # Alphas that aren't in the C locale. 561my %test_names; 562 563sub disp_chars { 564 # This returns a display string denoting the input parameter @_, each 565 # entry of which is a single character in the range 0-255. The first part 566 # of the output is a string of the characters in @_ that are ASCII 567 # graphics, and hence unambiguously displayable. They are given by code 568 # point order. The second part is the remaining code points, the ordinals 569 # of which are each displayed as 2-digit hex. Blanks are inserted so as 570 # to keep anything from the first part looking like a 2-digit hex number. 571 572 no locale; 573 my @chars = sort { ord $a <=> ord $b } @_; 574 my $output = ""; 575 my $range_start; 576 my $start_class; 577 push @chars, chr(258); # This sentinel simplifies the loop termination 578 # logic 579 foreach my $i (0 .. @chars - 1) { 580 my $char = $chars[$i]; 581 my $range_end; 582 my $class; 583 584 # We avoid using [:posix:] classes, as these are being tested in this 585 # file. Each equivalence class below is for things that can appear in 586 # a range; those that can't be in a range have class -1. 0 for those 587 # which should be output in hex; and >0 for the other ranges 588 if ($char =~ /[A-Z]/) { 589 $class = 2; 590 } 591 elsif ($char =~ /[a-z]/) { 592 $class = 3; 593 } 594 elsif ($char =~ /[0-9]/) { 595 $class = 4; 596 } 597 # Uncomment to get literal punctuation displayed instead of hex 598 #elsif ($char =~ /[[\]!"#\$\%&\'()*+,.\/:\\;<=>?\@\^_`{|}~-]/) { 599 # $class = -1; # Punct never appears in a range 600 #} 601 else { 602 $class = 0; # Output in hex 603 } 604 605 if (! defined $range_start) { 606 if ($class < 0) { 607 $output .= " " . $char; 608 } 609 else { 610 $range_start = ord $char; 611 $start_class = $class; 612 } 613 } # A range ends if not consecutive, or the class-type changes 614 elsif (ord $char != ($range_end = ord($chars[$i-1])) + 1 615 || $class != $start_class) 616 { 617 618 # Here, the current character is not in the range. This means the 619 # previous character must have been. Output the range up through 620 # that one. 621 my $range_length = $range_end - $range_start + 1; 622 if ($start_class > 0) { 623 $output .= " " . chr($range_start); 624 $output .= "-" . chr($range_end) if $range_length > 1; 625 } 626 else { 627 $output .= sprintf(" %02X", $range_start); 628 $output .= sprintf("-%02X", $range_end) if $range_length > 1; 629 } 630 631 # Handle the new current character, as potentially beginning a new 632 # range 633 undef $range_start; 634 redo; 635 } 636 } 637 638 $output =~ s/^ //; 639 return $output; 640} 641 642sub report_result { 643 my ($Locale, $i, $pass_fail, $message) = @_; 644 $message //= ""; 645 $message = " ($message)" if $message; 646 unless ($pass_fail) { 647 $Problem{$i}{$Locale} = 1; 648 debug "# failed $i ($test_names{$i}) with locale '$Locale'$message\n"; 649 } else { 650 push @{$Okay{$i}}, $Locale; 651 } 652} 653 654sub report_multi_result { 655 my ($Locale, $i, $results_ref) = @_; 656 657 # $results_ref points to an array, each element of which is a character that was 658 # in error for this test numbered '$i'. If empty, the test passed 659 660 my $message = ""; 661 if (@$results_ref) { 662 $message = join " ", "for", disp_chars(@$results_ref); 663 } 664 report_result($Locale, $i, @$results_ref == 0, $message); 665} 666 667my $first_locales_test_number = $final_without_setlocale + 1; 668my $locales_test_number; 669my $not_necessarily_a_problem_test_number; 670my $first_casing_test_number; 671my %setlocale_failed; # List of locales that setlocale() didn't work on 672 673foreach my $Locale (@Locale) { 674 $locales_test_number = $first_locales_test_number - 1; 675 debug "#\n"; 676 debug "# Locale = $Locale\n"; 677 678 unless (setlocale(&POSIX::LC_ALL, $Locale)) { 679 $setlocale_failed{$Locale} = $Locale; 680 next; 681 } 682 683 # We test UTF-8 locales only under ':not_characters'; It is easier to 684 # test them in other test files than here. Non- UTF-8 locales are tested 685 # only under plain 'use locale', as otherwise we would have to convert 686 # everything in them to Unicode. 687 688 my %UPPER = (); # All alpha X for which uc(X) == X and lc(X) != X 689 my %lower = (); # All alpha X for which lc(X) == X and uc(X) != X 690 my %BoThCaSe = (); # All alpha X for which uc(X) == lc(X) == X 691 692 my $is_utf8_locale = is_locale_utf8($Locale); 693 694 debug "# is utf8 locale? = $is_utf8_locale\n"; 695 696 my $radix = localeconv()->{decimal_point}; 697 if ($radix !~ / ^ [[:ascii:]] + $/x) { 698 use bytes; 699 $radix = disp_chars(split "", $radix); 700 } 701 debug "# radix = $radix\n"; 702 703 if (! $is_utf8_locale) { 704 use locale; 705 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 706 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 707 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 708 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 709 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 710 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 711 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 712 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 713 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 714 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 715 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 716 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 717 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 718 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 719 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; 720 721 # Sieve the uppercase and the lowercase. 722 723 for (@{$posixes{'word'}}) { 724 if (/[^\d_]/) { # skip digits and the _ 725 if (uc($_) eq $_) { 726 $UPPER{$_} = $_; 727 } 728 if (lc($_) eq $_) { 729 $lower{$_} = $_; 730 } 731 } 732 } 733 } 734 else { 735 use locale ':not_characters'; 736 @{$posixes{'word'}} = grep /\w/, map { chr } 0..255; 737 @{$posixes{'digit'}} = grep /\d/, map { chr } 0..255; 738 @{$posixes{'space'}} = grep /\s/, map { chr } 0..255; 739 @{$posixes{'alpha'}} = grep /[[:alpha:]]/, map {chr } 0..255; 740 @{$posixes{'alnum'}} = grep /[[:alnum:]]/, map {chr } 0..255; 741 @{$posixes{'ascii'}} = grep /[[:ascii:]]/, map {chr } 0..255; 742 @{$posixes{'blank'}} = grep /[[:blank:]]/, map {chr } 0..255; 743 @{$posixes{'cntrl'}} = grep /[[:cntrl:]]/, map {chr } 0..255; 744 @{$posixes{'graph'}} = grep /[[:graph:]]/, map {chr } 0..255; 745 @{$posixes{'lower'}} = grep /[[:lower:]]/, map {chr } 0..255; 746 @{$posixes{'print'}} = grep /[[:print:]]/, map {chr } 0..255; 747 @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255; 748 @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255; 749 @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255; 750 @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255; 751 for (@{$posixes{'word'}}) { 752 if (/[^\d_]/) { # skip digits and the _ 753 if (uc($_) eq $_) { 754 $UPPER{$_} = $_; 755 } 756 if (lc($_) eq $_) { 757 $lower{$_} = $_; 758 } 759 } 760 } 761 } 762 763 # Ordered, where possible, in groups of "this is a subset of the next 764 # one" 765 debug "# :upper: = ", disp_chars(@{$posixes{'upper'}}), "\n"; 766 debug "# :lower: = ", disp_chars(@{$posixes{'lower'}}), "\n"; 767 debug "# :cased: = ", disp_chars(@{$posixes{'cased'}}), "\n"; 768 debug "# :alpha: = ", disp_chars(@{$posixes{'alpha'}}), "\n"; 769 debug "# :alnum: = ", disp_chars(@{$posixes{'alnum'}}), "\n"; 770 debug "# w = ", disp_chars(@{$posixes{'word'}}), "\n"; 771 debug "# :graph: = ", disp_chars(@{$posixes{'graph'}}), "\n"; 772 debug "# :print: = ", disp_chars(@{$posixes{'print'}}), "\n"; 773 debug "# d = ", disp_chars(@{$posixes{'digit'}}), "\n"; 774 debug "# :xdigit: = ", disp_chars(@{$posixes{'xdigit'}}), "\n"; 775 debug "# :blank: = ", disp_chars(@{$posixes{'blank'}}), "\n"; 776 debug "# s = ", disp_chars(@{$posixes{'space'}}), "\n"; 777 debug "# :punct: = ", disp_chars(@{$posixes{'punct'}}), "\n"; 778 debug "# :cntrl: = ", disp_chars(@{$posixes{'cntrl'}}), "\n"; 779 debug "# :ascii: = ", disp_chars(@{$posixes{'ascii'}}), "\n"; 780 781 foreach (keys %UPPER) { 782 783 $BoThCaSe{$_}++ if exists $lower{$_}; 784 } 785 foreach (keys %lower) { 786 $BoThCaSe{$_}++ if exists $UPPER{$_}; 787 } 788 foreach (keys %BoThCaSe) { 789 delete $UPPER{$_}; 790 delete $lower{$_}; 791 } 792 793 my %Unassigned; 794 foreach my $ord ( 0 .. 255 ) { 795 $Unassigned{chr $ord} = 1; 796 } 797 foreach my $class (keys %posixes) { 798 foreach my $char (@{$posixes{$class}}) { 799 delete $Unassigned{$char}; 800 } 801 } 802 803 debug "# UPPER = ", disp_chars(sort { ord $a <=> ord $b } keys %UPPER), "\n"; 804 debug "# lower = ", disp_chars(sort { ord $a <=> ord $b } keys %lower), "\n"; 805 debug "# BoThCaSe = ", disp_chars(sort { ord $a <=> ord $b } keys %BoThCaSe), "\n"; 806 debug "# Unassigned = ", disp_chars(sort { ord $a <=> ord $b } keys %Unassigned), "\n"; 807 808 my @failures; 809 my @fold_failures; 810 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 811 my $ok; 812 my $fold_ok; 813 if ($is_utf8_locale) { 814 use locale ':not_characters'; 815 $ok = $x =~ /[[:upper:]]/; 816 $fold_ok = $x =~ /[[:lower:]]/i; 817 } 818 else { 819 use locale; 820 $ok = $x =~ /[[:upper:]]/; 821 $fold_ok = $x =~ /[[:lower:]]/i; 822 } 823 push @failures, $x unless $ok; 824 push @fold_failures, $x unless $fold_ok; 825 } 826 $locales_test_number++; 827 $first_casing_test_number = $locales_test_number; 828 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/ matches all alpha X for which uc(X) == X and lc(X) != X'; 829 report_multi_result($Locale, $locales_test_number, \@failures); 830 831 $locales_test_number++; 832 833 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i matches all alpha X for which uc(X) == X and lc(X) != X'; 834 report_multi_result($Locale, $locales_test_number, \@fold_failures); 835 836 undef @failures; 837 undef @fold_failures; 838 839 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 840 my $ok; 841 my $fold_ok; 842 if ($is_utf8_locale) { 843 use locale ':not_characters'; 844 $ok = $x =~ /[[:lower:]]/; 845 $fold_ok = $x =~ /[[:upper:]]/i; 846 } 847 else { 848 use locale; 849 $ok = $x =~ /[[:lower:]]/; 850 $fold_ok = $x =~ /[[:upper:]]/i; 851 } 852 push @failures, $x unless $ok; 853 push @fold_failures, $x unless $fold_ok; 854 } 855 856 $locales_test_number++; 857 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/ matches all alpha X for which lc(X) == X and uc(X) != X'; 858 report_multi_result($Locale, $locales_test_number, \@failures); 859 860 $locales_test_number++; 861 $test_names{$locales_test_number} = 'Verify that /[[:upper:]]/i matches all alpha X for which lc(X) == X and uc(X) != X'; 862 report_multi_result($Locale, $locales_test_number, \@fold_failures); 863 864 { # Find the alphabetic characters that are not considered alphabetics 865 # in the default (C) locale. 866 867 no locale; 868 869 @Added_alpha = (); 870 for (keys %UPPER, keys %lower, keys %BoThCaSe) { 871 push(@Added_alpha, $_) if (/\W/); 872 } 873 } 874 875 @Added_alpha = sort { ord $a <=> ord $b } @Added_alpha; 876 877 debug "# Added_alpha = ", disp_chars(@Added_alpha), "\n"; 878 879 # Cross-check the whole 8-bit character set. 880 881 ++$locales_test_number; 882 my @f; 883 $test_names{$locales_test_number} = 'Verify that \w and [:word:] are identical'; 884 for (map { chr } 0..255) { 885 if ($is_utf8_locale) { 886 use locale ':not_characters'; 887 push @f, $_ unless /[[:word:]]/ == /\w/; 888 } 889 else { 890 push @f, $_ unless /[[:word:]]/ == /\w/; 891 } 892 } 893 report_multi_result($Locale, $locales_test_number, \@f); 894 895 ++$locales_test_number; 896 undef @f; 897 $test_names{$locales_test_number} = 'Verify that \d and [:digit:] are identical'; 898 for (map { chr } 0..255) { 899 if ($is_utf8_locale) { 900 use locale ':not_characters'; 901 push @f, $_ unless /[[:digit:]]/ == /\d/; 902 } 903 else { 904 push @f, $_ unless /[[:digit:]]/ == /\d/; 905 } 906 } 907 report_multi_result($Locale, $locales_test_number, \@f); 908 909 ++$locales_test_number; 910 undef @f; 911 $test_names{$locales_test_number} = 'Verify that \s and [:space:] are identical'; 912 for (map { chr } 0..255) { 913 if ($is_utf8_locale) { 914 use locale ':not_characters'; 915 push @f, $_ unless /[[:space:]]/ == /\s/; 916 } 917 else { 918 push @f, $_ unless /[[:space:]]/ == /\s/; 919 } 920 } 921 report_multi_result($Locale, $locales_test_number, \@f); 922 923 ++$locales_test_number; 924 undef @f; 925 $test_names{$locales_test_number} = 'Verify that [:posix:] and [:^posix:] are mutually exclusive'; 926 for (map { chr } 0..255) { 927 if ($is_utf8_locale) { 928 use locale ':not_characters'; 929 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 930 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 931 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 932 (/[[:blank:]]/ xor /[[:^blank:]]/) || 933 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 934 (/[[:digit:]]/ xor /[[:^digit:]]/) || 935 (/[[:graph:]]/ xor /[[:^graph:]]/) || 936 (/[[:lower:]]/ xor /[[:^lower:]]/) || 937 (/[[:print:]]/ xor /[[:^print:]]/) || 938 (/[[:space:]]/ xor /[[:^space:]]/) || 939 (/[[:upper:]]/ xor /[[:^upper:]]/) || 940 (/[[:word:]]/ xor /[[:^word:]]/) || 941 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 942 943 # effectively is what [:cased:] would be if it existed. 944 (/[[:upper:]]/i xor /[[:^upper:]]/i); 945 } 946 else { 947 push @f, $_ unless (/[[:alpha:]]/ xor /[[:^alpha:]]/) || 948 (/[[:alnum:]]/ xor /[[:^alnum:]]/) || 949 (/[[:ascii:]]/ xor /[[:^ascii:]]/) || 950 (/[[:blank:]]/ xor /[[:^blank:]]/) || 951 (/[[:cntrl:]]/ xor /[[:^cntrl:]]/) || 952 (/[[:digit:]]/ xor /[[:^digit:]]/) || 953 (/[[:graph:]]/ xor /[[:^graph:]]/) || 954 (/[[:lower:]]/ xor /[[:^lower:]]/) || 955 (/[[:print:]]/ xor /[[:^print:]]/) || 956 (/[[:space:]]/ xor /[[:^space:]]/) || 957 (/[[:upper:]]/ xor /[[:^upper:]]/) || 958 (/[[:word:]]/ xor /[[:^word:]]/) || 959 (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) || 960 (/[[:upper:]]/i xor /[[:^upper:]]/i); 961 } 962 } 963 report_multi_result($Locale, $locales_test_number, \@f); 964 965 # The rules for the relationships are given in: 966 # http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html 967 968 969 ++$locales_test_number; 970 undef @f; 971 $test_names{$locales_test_number} = 'Verify that [:lower:] contains at least a-z'; 972 for ('a' .. 'z') { 973 if ($is_utf8_locale) { 974 use locale ':not_characters'; 975 push @f, $_ unless /[[:lower:]]/; 976 } 977 else { 978 push @f, $_ unless /[[:lower:]]/; 979 } 980 } 981 report_multi_result($Locale, $locales_test_number, \@f); 982 983 ++$locales_test_number; 984 undef @f; 985 $test_names{$locales_test_number} = 'Verify that [:lower:] is a subset of [:alpha:]'; 986 for (map { chr } 0..255) { 987 if ($is_utf8_locale) { 988 use locale ':not_characters'; 989 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 990 } 991 else { 992 push @f, $_ if /[[:lower:]]/ and ! /[[:alpha:]]/; 993 } 994 } 995 report_multi_result($Locale, $locales_test_number, \@f); 996 997 ++$locales_test_number; 998 undef @f; 999 $test_names{$locales_test_number} = 'Verify that [:upper:] contains at least A-Z'; 1000 for ('A' .. 'Z') { 1001 if ($is_utf8_locale) { 1002 use locale ':not_characters'; 1003 push @f, $_ unless /[[:upper:]]/; 1004 } 1005 else { 1006 push @f, $_ unless /[[:upper:]]/; 1007 } 1008 } 1009 report_multi_result($Locale, $locales_test_number, \@f); 1010 1011 ++$locales_test_number; 1012 undef @f; 1013 $test_names{$locales_test_number} = 'Verify that [:upper:] is a subset of [:alpha:]'; 1014 for (map { chr } 0..255) { 1015 if ($is_utf8_locale) { 1016 use locale ':not_characters'; 1017 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1018 } 1019 else { 1020 push @f, $_ if /[[:upper:]]/ and ! /[[:alpha:]]/; 1021 } 1022 } 1023 report_multi_result($Locale, $locales_test_number, \@f); 1024 1025 ++$locales_test_number; 1026 undef @f; 1027 $test_names{$locales_test_number} = 'Verify that /[[:lower:]]/i is a subset of [:alpha:]'; 1028 for (map { chr } 0..255) { 1029 if ($is_utf8_locale) { 1030 use locale ':not_characters'; 1031 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1032 } 1033 else { 1034 push @f, $_ if /[[:lower:]]/i and ! /[[:alpha:]]/; 1035 } 1036 } 1037 report_multi_result($Locale, $locales_test_number, \@f); 1038 1039 ++$locales_test_number; 1040 undef @f; 1041 $test_names{$locales_test_number} = 'Verify that [:alpha:] is a subset of [:alnum:]'; 1042 for (map { chr } 0..255) { 1043 if ($is_utf8_locale) { 1044 use locale ':not_characters'; 1045 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1046 } 1047 else { 1048 push @f, $_ if /[[:alpha:]]/ and ! /[[:alnum:]]/; 1049 } 1050 } 1051 report_multi_result($Locale, $locales_test_number, \@f); 1052 1053 ++$locales_test_number; 1054 undef @f; 1055 $test_names{$locales_test_number} = 'Verify that [:digit:] contains at least 0-9'; 1056 for ('0' .. '9') { 1057 if ($is_utf8_locale) { 1058 use locale ':not_characters'; 1059 push @f, $_ unless /[[:digit:]]/; 1060 } 1061 else { 1062 push @f, $_ unless /[[:digit:]]/; 1063 } 1064 } 1065 report_multi_result($Locale, $locales_test_number, \@f); 1066 1067 ++$locales_test_number; 1068 undef @f; 1069 $test_names{$locales_test_number} = 'Verify that [:digit:] is a subset of [:alnum:]'; 1070 for (map { chr } 0..255) { 1071 if ($is_utf8_locale) { 1072 use locale ':not_characters'; 1073 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1074 } 1075 else { 1076 push @f, $_ if /[[:digit:]]/ and ! /[[:alnum:]]/; 1077 } 1078 } 1079 report_multi_result($Locale, $locales_test_number, \@f); 1080 1081 ++$locales_test_number; 1082 undef @f; 1083 $test_names{$locales_test_number} = 'Verify that [:digit:] matches either 10 or 20 code points'; 1084 report_result($Locale, $locales_test_number, @{$posixes{'digit'}} == 10 || @{$posixes{'digit'}} == 20); 1085 1086 ++$locales_test_number; 1087 undef @f; 1088 $test_names{$locales_test_number} = 'Verify that if there is a second set of digits in [:digit:], they are consecutive'; 1089 if (@{$posixes{'digit'}} == 20) { 1090 my $previous_ord; 1091 for (map { chr } 0..255) { 1092 next unless /[[:digit:]]/; 1093 next if /[0-9]/; 1094 if (defined $previous_ord) { 1095 if ($is_utf8_locale) { 1096 use locale ':not_characters'; 1097 push @f, $_ if ord $_ != $previous_ord + 1; 1098 } 1099 else { 1100 push @f, $_ if ord $_ != $previous_ord + 1; 1101 } 1102 } 1103 $previous_ord = ord $_; 1104 } 1105 } 1106 report_multi_result($Locale, $locales_test_number, \@f); 1107 1108 ++$locales_test_number; 1109 undef @f; 1110 my @xdigit_digits; # :digit: & :xdigit: 1111 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains one or two blocks of 10 consecutive [:digit:] chars'; 1112 for (map { chr } 0..255) { 1113 if ($is_utf8_locale) { 1114 use locale ':not_characters'; 1115 # For utf8 locales, we actually use a stricter test: that :digit: 1116 # is a subset of :xdigit:, as we know that only 0-9 should match 1117 push @f, $_ if /[[:digit:]]/ and ! /[[:xdigit:]]/; 1118 } 1119 else { 1120 push @xdigit_digits, $_ if /[[:digit:]]/ and /[[:xdigit:]]/; 1121 } 1122 } 1123 if (! $is_utf8_locale) { 1124 1125 # For non-utf8 locales, @xdigit_digits is a list of the characters 1126 # that are both :xdigit: and :digit:. Because :digit: is stored in 1127 # increasing code point order (unless the tests above failed), 1128 # @xdigit_digits is as well. There should be exactly 10 or 1129 # 20 of these. 1130 if (@xdigit_digits != 10 && @xdigit_digits != 20) { 1131 @f = @xdigit_digits; 1132 } 1133 else { 1134 1135 # Look for contiguity in the series, adding any wrong ones to @f 1136 my @temp = @xdigit_digits; 1137 while (@temp > 1) { 1138 push @f, $temp[1] if ($temp[0] != $temp[1] - 1) 1139 1140 # Skip this test for the 0th character of 1141 # the second block of 10, as it won't be 1142 # contiguous with the previous block 1143 && (! defined $xdigit_digits[10] 1144 || $temp[1] != $xdigit_digits[10]); 1145 shift @temp; 1146 } 1147 } 1148 } 1149 1150 report_multi_result($Locale, $locales_test_number, \@f); 1151 1152 ++$locales_test_number; 1153 undef @f; 1154 $test_names{$locales_test_number} = 'Verify that [:xdigit:] contains at least A-F, a-f'; 1155 for ('A' .. 'F', 'a' .. 'f') { 1156 if ($is_utf8_locale) { 1157 use locale ':not_characters'; 1158 push @f, $_ unless /[[:xdigit:]]/; 1159 } 1160 else { 1161 push @f, $_ unless /[[:xdigit:]]/; 1162 } 1163 } 1164 report_multi_result($Locale, $locales_test_number, \@f); 1165 1166 ++$locales_test_number; 1167 undef @f; 1168 $test_names{$locales_test_number} = 'Verify that any additional members of [:xdigit:], are in groups of 6 consecutive code points'; 1169 my $previous_ord; 1170 my $count = 0; 1171 for my $chr (map { chr } 0..255) { 1172 next unless $chr =~ /[[:xdigit:]]/; 1173 if ($is_utf8_locale) { 1174 next if $chr =~ /[[:digit:]]/; 1175 } 1176 else { 1177 next if grep { $chr eq $_ } @xdigit_digits; 1178 } 1179 next if $chr =~ /[A-Fa-f]/; 1180 if (defined $previous_ord) { 1181 if ($is_utf8_locale) { 1182 use locale ':not_characters'; 1183 push @f, $chr if ord $chr != $previous_ord + 1; 1184 } 1185 else { 1186 push @f, $chr if ord $chr != $previous_ord + 1; 1187 } 1188 } 1189 $count++; 1190 if ($count == 6) { 1191 undef $previous_ord; 1192 } 1193 else { 1194 $previous_ord = ord $chr; 1195 } 1196 } 1197 report_multi_result($Locale, $locales_test_number, \@f); 1198 1199 ++$locales_test_number; 1200 undef @f; 1201 $test_names{$locales_test_number} = 'Verify that [:xdigit:] is a subset of [:graph:]'; 1202 for (map { chr } 0..255) { 1203 if ($is_utf8_locale) { 1204 use locale ':not_characters'; 1205 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1206 } 1207 else { 1208 push @f, $_ if /[[:xdigit:]]/ and ! /[[:graph:]]/; 1209 } 1210 } 1211 report_multi_result($Locale, $locales_test_number, \@f); 1212 1213 # Note that xdigit doesn't have to be a subset of alnum 1214 1215 ++$locales_test_number; 1216 undef @f; 1217 $test_names{$locales_test_number} = 'Verify that [:punct:] is a subset of [:graph:]'; 1218 for (map { chr } 0..255) { 1219 if ($is_utf8_locale) { 1220 use locale ':not_characters'; 1221 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1222 } 1223 else { 1224 push @f, $_ if /[[:punct:]]/ and ! /[[:graph:]]/; 1225 } 1226 } 1227 report_multi_result($Locale, $locales_test_number, \@f); 1228 1229 ++$locales_test_number; 1230 undef @f; 1231 $test_names{$locales_test_number} = 'Verify that the space character is not in [:graph:]'; 1232 if ($is_utf8_locale) { 1233 use locale ':not_characters'; 1234 push @f, " " if " " =~ /[[:graph:]]/; 1235 } 1236 else { 1237 push @f, " " if " " =~ /[[:graph:]]/; 1238 } 1239 report_multi_result($Locale, $locales_test_number, \@f); 1240 1241 ++$locales_test_number; 1242 undef @f; 1243 $test_names{$locales_test_number} = 'Verify that [:space:] contains at least [\f\n\r\t\cK ]'; 1244 for (' ', "\f", "\n", "\r", "\t", "\cK") { 1245 if ($is_utf8_locale) { 1246 use locale ':not_characters'; 1247 push @f, $_ unless /[[:space:]]/; 1248 } 1249 else { 1250 push @f, $_ unless /[[:space:]]/; 1251 } 1252 } 1253 report_multi_result($Locale, $locales_test_number, \@f); 1254 1255 ++$locales_test_number; 1256 undef @f; 1257 $test_names{$locales_test_number} = 'Verify that [:blank:] contains at least [\t ]'; 1258 for (' ', "\t") { 1259 if ($is_utf8_locale) { 1260 use locale ':not_characters'; 1261 push @f, $_ unless /[[:blank:]]/; 1262 } 1263 else { 1264 push @f, $_ unless /[[:blank:]]/; 1265 } 1266 } 1267 report_multi_result($Locale, $locales_test_number, \@f); 1268 1269 ++$locales_test_number; 1270 undef @f; 1271 $test_names{$locales_test_number} = 'Verify that [:blank:] is a subset of [:space:]'; 1272 for (map { chr } 0..255) { 1273 if ($is_utf8_locale) { 1274 use locale ':not_characters'; 1275 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1276 } 1277 else { 1278 push @f, $_ if /[[:blank:]]/ and ! /[[:space:]]/; 1279 } 1280 } 1281 report_multi_result($Locale, $locales_test_number, \@f); 1282 1283 ++$locales_test_number; 1284 undef @f; 1285 $test_names{$locales_test_number} = 'Verify that [:graph:] is a subset of [:print:]'; 1286 for (map { chr } 0..255) { 1287 if ($is_utf8_locale) { 1288 use locale ':not_characters'; 1289 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1290 } 1291 else { 1292 push @f, $_ if /[[:graph:]]/ and ! /[[:print:]]/; 1293 } 1294 } 1295 report_multi_result($Locale, $locales_test_number, \@f); 1296 1297 ++$locales_test_number; 1298 undef @f; 1299 $test_names{$locales_test_number} = 'Verify that the space character is in [:print:]'; 1300 if ($is_utf8_locale) { 1301 use locale ':not_characters'; 1302 push @f, " " if " " !~ /[[:print:]]/; 1303 } 1304 else { 1305 push @f, " " if " " !~ /[[:print:]]/; 1306 } 1307 report_multi_result($Locale, $locales_test_number, \@f); 1308 1309 ++$locales_test_number; 1310 undef @f; 1311 $test_names{$locales_test_number} = 'Verify that isn\'t both [:cntrl:] and [:print:]'; 1312 for (map { chr } 0..255) { 1313 if ($is_utf8_locale) { 1314 use locale ':not_characters'; 1315 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1316 } 1317 else { 1318 push @f, $_ if (/[[:print:]]/ and /[[:cntrl:]]/); 1319 } 1320 } 1321 report_multi_result($Locale, $locales_test_number, \@f); 1322 1323 ++$locales_test_number; 1324 undef @f; 1325 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alpha:] and [:digit:]'; 1326 for (map { chr } 0..255) { 1327 if ($is_utf8_locale) { 1328 use locale ':not_characters'; 1329 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1330 } 1331 else { 1332 push @f, $_ if /[[:alpha:]]/ and /[[:digit:]]/; 1333 } 1334 } 1335 report_multi_result($Locale, $locales_test_number, \@f); 1336 1337 ++$locales_test_number; 1338 undef @f; 1339 $test_names{$locales_test_number} = 'Verify that isn\'t both [:alnum:] and [:punct:]'; 1340 for (map { chr } 0..255) { 1341 if ($is_utf8_locale) { 1342 use locale ':not_characters'; 1343 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1344 } 1345 else { 1346 push @f, $_ if /[[:alnum:]]/ and /[[:punct:]]/; 1347 } 1348 } 1349 report_multi_result($Locale, $locales_test_number, \@f); 1350 1351 ++$locales_test_number; 1352 undef @f; 1353 $test_names{$locales_test_number} = 'Verify that isn\'t both [:xdigit:] and [:punct:]'; 1354 for (map { chr } 0..255) { 1355 if ($is_utf8_locale) { 1356 use locale ':not_characters'; 1357 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1358 } 1359 else { 1360 push @f, $_ if (/[[:punct:]]/ and /[[:xdigit:]]/); 1361 } 1362 } 1363 report_multi_result($Locale, $locales_test_number, \@f); 1364 1365 ++$locales_test_number; 1366 undef @f; 1367 $test_names{$locales_test_number} = 'Verify that isn\'t both [:graph:] and [:space:]'; 1368 for (map { chr } 0..255) { 1369 if ($is_utf8_locale) { 1370 use locale ':not_characters'; 1371 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1372 } 1373 else { 1374 push @f, $_ if (/[[:graph:]]/ and /[[:space:]]/); 1375 } 1376 } 1377 report_multi_result($Locale, $locales_test_number, \@f); 1378 1379 foreach ($first_casing_test_number..$locales_test_number) { 1380 $problematical_tests{$_} = 1; 1381 } 1382 1383 1384 # Test for read-only scalars' locale vs non-locale comparisons. 1385 1386 { 1387 no locale; 1388 my $ok; 1389 $a = "qwerty"; 1390 if ($is_utf8_locale) { 1391 use locale ':not_characters'; 1392 $ok = ($a cmp "qwerty") == 0; 1393 } 1394 else { 1395 use locale; 1396 $ok = ($a cmp "qwerty") == 0; 1397 } 1398 report_result($Locale, ++$locales_test_number, $ok); 1399 $test_names{$locales_test_number} = 'Verify that cmp works with a read-only scalar; no- vs locale'; 1400 } 1401 1402 { 1403 my ($from, $to, $lesser, $greater, 1404 @test, %test, $test, $yes, $no, $sign); 1405 1406 ++$locales_test_number; 1407 $test_names{$locales_test_number} = 'Verify that "le", "ne", etc work'; 1408 $not_necessarily_a_problem_test_number = $locales_test_number; 1409 for (0..9) { 1410 # Select a slice. 1411 $from = int(($_*@{$posixes{'word'}})/10); 1412 $to = $from + int(@{$posixes{'word'}}/10); 1413 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1414 $lesser = join('', @{$posixes{'word'}}[$from..$to]); 1415 # Select a slice one character on. 1416 $from++; $to++; 1417 $to = $#{$posixes{'word'}} if ($to > $#{$posixes{'word'}}); 1418 $greater = join('', @{$posixes{'word'}}[$from..$to]); 1419 if ($is_utf8_locale) { 1420 use locale ':not_characters'; 1421 ($yes, $no, $sign) = ($lesser lt $greater 1422 ? (" ", "not ", 1) 1423 : ("not ", " ", -1)); 1424 } 1425 else { 1426 use locale; 1427 ($yes, $no, $sign) = ($lesser lt $greater 1428 ? (" ", "not ", 1) 1429 : ("not ", " ", -1)); 1430 } 1431 # all these tests should FAIL (return 0). Exact lt or gt cannot 1432 # be tested because in some locales, say, eacute and E may test 1433 # equal. 1434 @test = 1435 ( 1436 $no.' ($lesser le $greater)', # 1 1437 'not ($lesser ne $greater)', # 2 1438 ' ($lesser eq $greater)', # 3 1439 $yes.' ($lesser ge $greater)', # 4 1440 $yes.' ($lesser ge $greater)', # 5 1441 $yes.' ($greater le $lesser )', # 7 1442 'not ($greater ne $lesser )', # 8 1443 ' ($greater eq $lesser )', # 9 1444 $no.' ($greater ge $lesser )', # 10 1445 'not (($lesser cmp $greater) == -($sign))' # 11 1446 ); 1447 @test{@test} = 0 x @test; 1448 $test = 0; 1449 for my $ti (@test) { 1450 if ($is_utf8_locale) { 1451 use locale ':not_characters'; 1452 $test{$ti} = eval $ti; 1453 } 1454 else { 1455 # Already in 'use locale'; 1456 $test{$ti} = eval $ti; 1457 } 1458 $test ||= $test{$ti} 1459 } 1460 report_result($Locale, $locales_test_number, $test == 0); 1461 if ($test) { 1462 debug "# lesser = '$lesser'\n"; 1463 debug "# greater = '$greater'\n"; 1464 debug "# lesser cmp greater = ", 1465 $lesser cmp $greater, "\n"; 1466 debug "# greater cmp lesser = ", 1467 $greater cmp $lesser, "\n"; 1468 debug "# (greater) from = $from, to = $to\n"; 1469 for my $ti (@test) { 1470 debugf("# %-40s %-4s", $ti, 1471 $test{$ti} ? 'FAIL' : 'ok'); 1472 if ($ti =~ /\(\.*(\$.+ +cmp +\$[^\)]+)\.*\)/) { 1473 debugf("(%s == %4d)", $1, eval $1); 1474 } 1475 debug "\n#"; 1476 } 1477 1478 last; 1479 } 1480 } 1481 } 1482 1483 my $ok1; 1484 my $ok2; 1485 my $ok3; 1486 my $ok4; 1487 my $ok5; 1488 my $ok6; 1489 my $ok7; 1490 my $ok8; 1491 my $ok9; 1492 my $ok10; 1493 my $ok11; 1494 my $ok12; 1495 my $ok13; 1496 my $ok14; 1497 my $ok15; 1498 my $ok16; 1499 my $ok17; 1500 my $ok18; 1501 1502 my $c; 1503 my $d; 1504 my $e; 1505 my $f; 1506 my $g; 1507 my $h; 1508 my $i; 1509 my $j; 1510 1511 if (! $is_utf8_locale) { 1512 use locale; 1513 1514 my ($x, $y) = (1.23, 1.23); 1515 1516 $a = "$x"; 1517 printf ''; # printf used to reset locale to "C" 1518 $b = "$y"; 1519 $ok1 = $a eq $b; 1520 1521 $c = "$x"; 1522 my $z = sprintf ''; # sprintf used to reset locale to "C" 1523 $d = "$y"; 1524 $ok2 = $c eq $d; 1525 { 1526 1527 use warnings; 1528 my $w = 0; 1529 local $SIG{__WARN__} = 1530 sub { 1531 print "# @_\n"; 1532 $w++; 1533 }; 1534 1535 # The == (among other ops) used to warn for locales 1536 # that had something else than "." as the radix character. 1537 1538 $ok3 = $c == 1.23; 1539 $ok4 = $c == $x; 1540 $ok5 = $c == $d; 1541 { 1542 no locale; 1543 1544 $e = "$x"; 1545 1546 $ok6 = $e == 1.23; 1547 $ok7 = $e == $x; 1548 $ok8 = $e == $c; 1549 } 1550 1551 $f = "1.23"; 1552 $g = 2.34; 1553 $h = 1.5; 1554 $i = 1.25; 1555 $j = "$h:$i"; 1556 1557 $ok9 = $f == 1.23; 1558 $ok10 = $f == $x; 1559 $ok11 = $f == $c; 1560 $ok12 = abs(($f + $g) - 3.57) < 0.01; 1561 $ok13 = $w == 0; 1562 $ok14 = $ok15 = $ok16 = 1; # Skip for non-utf8 locales 1563 } 1564 { 1565 no locale; 1566 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 1567 } 1568 $ok18 = $j eq sprintf("%g:%g", $h, $i); 1569 } 1570 else { 1571 use locale ':not_characters'; 1572 1573 my ($x, $y) = (1.23, 1.23); 1574 $a = "$x"; 1575 printf ''; # printf used to reset locale to "C" 1576 $b = "$y"; 1577 $ok1 = $a eq $b; 1578 1579 $c = "$x"; 1580 my $z = sprintf ''; # sprintf used to reset locale to "C" 1581 $d = "$y"; 1582 $ok2 = $c eq $d; 1583 { 1584 use warnings; 1585 my $w = 0; 1586 local $SIG{__WARN__} = 1587 sub { 1588 print "# @_\n"; 1589 $w++; 1590 }; 1591 $ok3 = $c == 1.23; 1592 $ok4 = $c == $x; 1593 $ok5 = $c == $d; 1594 { 1595 no locale; 1596 $e = "$x"; 1597 1598 $ok6 = $e == 1.23; 1599 $ok7 = $e == $x; 1600 $ok8 = $e == $c; 1601 } 1602 1603 $f = "1.23"; 1604 $g = 2.34; 1605 $h = 1.5; 1606 $i = 1.25; 1607 $j = "$h:$i"; 1608 1609 $ok9 = $f == 1.23; 1610 $ok10 = $f == $x; 1611 $ok11 = $f == $c; 1612 $ok12 = abs(($f + $g) - 3.57) < 0.01; 1613 $ok13 = $w == 0; 1614 1615 # Look for non-ASCII error messages, and verify that the first 1616 # such is NOT in UTF-8 (the others almost certainly will be like 1617 # the first) See [perl #119499]. 1618 $ok14 = 1; 1619 foreach my $err (keys %!) { 1620 use Errno; 1621 $! = eval "&Errno::$err"; # Convert to strerror() output 1622 my $strerror = "$!"; 1623 if ("$strerror" =~ /\P{ASCII}/) { 1624 $ok14 = ! utf8::is_utf8($strerror); 1625 last; 1626 } 1627 } 1628 1629 # Similarly, we verify that a non-ASCII radix is in UTF-8. This 1630 # also catches if there is a disparity between sprintf and 1631 # stringification. 1632 1633 my $string_g = "$g"; 1634 my $sprintf_g = sprintf("%g", $g); 1635 1636 $ok15 = $string_g =~ / ^ \p{ASCII}+ $ /x || utf8::is_utf8($string_g); 1637 $ok16 = $sprintf_g eq $string_g; 1638 } 1639 { 1640 no locale; 1641 $ok17 = "1.5:1.25" eq sprintf("%g:%g", $h, $i); 1642 } 1643 $ok18 = $j eq sprintf("%g:%g", $h, $i); 1644 } 1645 1646 report_result($Locale, ++$locales_test_number, $ok1); 1647 $test_names{$locales_test_number} = 'Verify that an intervening printf doesn\'t change assignment results'; 1648 my $first_a_test = $locales_test_number; 1649 1650 debug "# $first_a_test..$locales_test_number: \$a = $a, \$b = $b, Locale = $Locale\n"; 1651 1652 report_result($Locale, ++$locales_test_number, $ok2); 1653 $test_names{$locales_test_number} = 'Verify that an intervening sprintf doesn\'t change assignment results'; 1654 1655 my $first_c_test = $locales_test_number; 1656 1657 report_result($Locale, ++$locales_test_number, $ok3); 1658 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a constant'; 1659 $problematical_tests{$locales_test_number} = 1; 1660 1661 report_result($Locale, ++$locales_test_number, $ok4); 1662 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar'; 1663 $problematical_tests{$locales_test_number} = 1; 1664 1665 report_result($Locale, ++$locales_test_number, $ok5); 1666 $test_names{$locales_test_number} = 'Verify that a different locale radix works when doing "==" with a scalar and an intervening sprintf'; 1667 $problematical_tests{$locales_test_number} = 1; 1668 1669 debug "# $first_c_test..$locales_test_number: \$c = $c, \$d = $d, Locale = $Locale\n"; 1670 1671 report_result($Locale, ++$locales_test_number, $ok6); 1672 $test_names{$locales_test_number} = 'Verify that can assign stringified under inner no-locale block'; 1673 my $first_e_test = $locales_test_number; 1674 1675 report_result($Locale, ++$locales_test_number, $ok7); 1676 $test_names{$locales_test_number} = 'Verify that "==" with a scalar still works in inner no locale'; 1677 1678 report_result($Locale, ++$locales_test_number, $ok8); 1679 $test_names{$locales_test_number} = 'Verify that "==" with a scalar and an intervening sprintf still works in inner no locale'; 1680 $problematical_tests{$locales_test_number} = 1; 1681 1682 debug "# $first_e_test..$locales_test_number: \$e = $e, no locale\n"; 1683 1684 report_result($Locale, ++$locales_test_number, $ok9); 1685 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a constant'; 1686 $problematical_tests{$locales_test_number} = 1; 1687 my $first_f_test = $locales_test_number; 1688 1689 report_result($Locale, ++$locales_test_number, $ok10); 1690 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar'; 1691 $problematical_tests{$locales_test_number} = 1; 1692 1693 report_result($Locale, ++$locales_test_number, $ok11); 1694 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix still works when doing "==" with a scalar and an intervening sprintf'; 1695 $problematical_tests{$locales_test_number} = 1; 1696 1697 report_result($Locale, ++$locales_test_number, $ok12); 1698 $test_names{$locales_test_number} = 'Verify that after a no-locale block, a different locale radix can participate in an addition and function call as numeric'; 1699 $problematical_tests{$locales_test_number} = 1; 1700 1701 report_result($Locale, ++$locales_test_number, $ok13); 1702 $test_names{$locales_test_number} = 'Verify that don\'t get warning under "==" even if radix is not a dot'; 1703 $problematical_tests{$locales_test_number} = 1; 1704 1705 report_result($Locale, ++$locales_test_number, $ok14); 1706 $test_names{$locales_test_number} = 'Verify that non-ASCII UTF-8 error messages are NOT in UTF-8'; 1707 1708 report_result($Locale, ++$locales_test_number, $ok15); 1709 $test_names{$locales_test_number} = 'Verify that a number with a UTF-8 radix has a UTF-8 stringification'; 1710 1711 report_result($Locale, ++$locales_test_number, $ok16); 1712 $test_names{$locales_test_number} = 'Verify that a sprintf of a number with a UTF-8 radix yields UTF-8'; 1713 1714 report_result($Locale, ++$locales_test_number, $ok17); 1715 $test_names{$locales_test_number} = 'Verify that a sprintf of a number outside locale scope uses a dot radix'; 1716 1717 report_result($Locale, ++$locales_test_number, $ok18); 1718 $test_names{$locales_test_number} = 'Verify that a sprintf of a number back within locale scope uses locale radix'; 1719 1720 debug "# $first_f_test..$locales_test_number: \$f = $f, \$g = $g, back to locale = $Locale\n"; 1721 1722 # Does taking lc separately differ from taking 1723 # the lc "in-line"? (This was the bug 19990704.002, change #3568.) 1724 # The bug was in the caching of the 'o'-magic. 1725 if (! $is_utf8_locale) { 1726 use locale; 1727 1728 sub lcA { 1729 my $lc0 = lc $_[0]; 1730 my $lc1 = lc $_[1]; 1731 return $lc0 cmp $lc1; 1732 } 1733 1734 sub lcB { 1735 return lc($_[0]) cmp lc($_[1]); 1736 } 1737 1738 my $x = "ab"; 1739 my $y = "aa"; 1740 my $z = "AB"; 1741 1742 report_result($Locale, ++$locales_test_number, 1743 lcA($x, $y) == 1 && lcB($x, $y) == 1 || 1744 lcA($x, $z) == 0 && lcB($x, $z) == 0); 1745 } 1746 else { 1747 use locale ':not_characters'; 1748 1749 sub lcC { 1750 my $lc0 = lc $_[0]; 1751 my $lc1 = lc $_[1]; 1752 return $lc0 cmp $lc1; 1753 } 1754 1755 sub lcD { 1756 return lc($_[0]) cmp lc($_[1]); 1757 } 1758 1759 my $x = "ab"; 1760 my $y = "aa"; 1761 my $z = "AB"; 1762 1763 report_result($Locale, ++$locales_test_number, 1764 lcC($x, $y) == 1 && lcD($x, $y) == 1 || 1765 lcC($x, $z) == 0 && lcD($x, $z) == 0); 1766 } 1767 $test_names{$locales_test_number} = 'Verify "lc(foo) cmp lc(bar)" is the same as using intermediaries for the cmp'; 1768 1769 # Does lc of an UPPER (if different from the UPPER) match 1770 # case-insensitively the UPPER, and does the UPPER match 1771 # case-insensitively the lc of the UPPER. And vice versa. 1772 { 1773 use locale; 1774 no utf8; 1775 my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/; 1776 1777 my @f = (); 1778 ++$locales_test_number; 1779 $test_names{$locales_test_number} = 'Verify case insensitive matching works'; 1780 foreach my $x (sort { ord $a <=> ord $b } keys %UPPER) { 1781 if (! $is_utf8_locale) { 1782 my $y = lc $x; 1783 next unless uc $y eq $x; 1784 debug_more( "# UPPER=", disp_chars(($x)), 1785 "; lc=", disp_chars(($y)), "; ", 1786 "; fc=", disp_chars((fc $x)), "; ", 1787 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 1788 $x =~ /$y/i ? 1 : 0, 1789 "; ", 1790 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 1791 $y =~ /$x/i ? 1 : 0, 1792 "\n"); 1793 # 1794 # If $x and $y contain regular expression characters 1795 # AND THEY lowercase (/i) to regular expression characters, 1796 # regcomp() will be mightily confused. No, the \Q doesn't 1797 # help here (maybe regex engine internal lowercasing 1798 # is done after the \Q?) An example of this happening is 1799 # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS): 1800 # the chr(173) (the "[") is the lowercase of the chr(235). 1801 # 1802 # Similarly losing EBCDIC locales include cs_cz, cs_CZ, 1803 # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!), 1804 # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT, 1805 # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037, 1806 # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU, 1807 # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR. 1808 # 1809 # Similar things can happen even under (bastardised) 1810 # non-EBCDIC locales: in many European countries before the 1811 # advent of ISO 8859-x nationally customised versions of 1812 # ISO 646 were devised, reusing certain punctuation 1813 # characters for modified characters needed by the 1814 # country/language. For example, the "|" might have 1815 # stood for U+00F6 or LATIN SMALL LETTER O WITH DIAERESIS. 1816 # 1817 if ($x =~ $re || $y =~ $re) { 1818 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 1819 next; 1820 } 1821 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; 1822 1823 # fc is not a locale concept, so Perl uses lc for it. 1824 push @f, $x unless lc $x eq fc $x; 1825 } 1826 else { 1827 use locale ':not_characters'; 1828 my $y = lc $x; 1829 next unless uc $y eq $x; 1830 debug_more( "# UPPER=", disp_chars(($x)), 1831 "; lc=", disp_chars(($y)), "; ", 1832 "; fc=", disp_chars((fc $x)), "; ", 1833 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 1834 $x =~ /$y/i ? 1 : 0, 1835 "; ", 1836 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 1837 $y =~ /$x/i ? 1 : 0, 1838 "\n"); 1839 1840 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; 1841 1842 # The places where Unicode's lc is different from fc are 1843 # skipped here by virtue of the 'next unless uc...' line above 1844 push @f, $x unless lc $x eq fc $x; 1845 } 1846 } 1847 1848 foreach my $x (sort { ord $a <=> ord $b } keys %lower) { 1849 if (! $is_utf8_locale) { 1850 my $y = uc $x; 1851 next unless lc $y eq $x; 1852 debug_more( "# lower=", disp_chars(($x)), 1853 "; uc=", disp_chars(($y)), "; ", 1854 "; fc=", disp_chars((fc $x)), "; ", 1855 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 1856 $x =~ /$y/i ? 1 : 0, 1857 "; ", 1858 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 1859 $y =~ /$x/i ? 1 : 0, 1860 "\n"); 1861 if ($x =~ $re || $y =~ $re) { # See above. 1862 print "# Regex characters in '$x' or '$y', skipping test $locales_test_number for locale '$Locale'\n"; 1863 next; 1864 } 1865 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; 1866 1867 push @f, $x unless lc $x eq fc $x; 1868 } 1869 else { 1870 use locale ':not_characters'; 1871 my $y = uc $x; 1872 next unless lc $y eq $x; 1873 debug_more( "# lower=", disp_chars(($x)), 1874 "; uc=", disp_chars(($y)), "; ", 1875 "; fc=", disp_chars((fc $x)), "; ", 1876 disp_chars(($x)), "=~/", disp_chars(($y)), "/i=", 1877 $x =~ /$y/i ? 1 : 0, 1878 "; ", 1879 disp_chars(($y)), "=~/", disp_chars(($x)), "/i=", 1880 $y =~ /$x/i ? 1 : 0, 1881 "\n"); 1882 push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; 1883 1884 push @f, $x unless lc $x eq fc $x; 1885 } 1886 } 1887 report_multi_result($Locale, $locales_test_number, \@f); 1888 $problematical_tests{$locales_test_number} = 1; 1889 } 1890 1891 # [perl #109318] 1892 { 1893 my @f = (); 1894 ++$locales_test_number; 1895 $test_names{$locales_test_number} = 'Verify atof with locale radix and negative exponent'; 1896 $problematical_tests{$locales_test_number} = 1; 1897 1898 my $radix = POSIX::localeconv()->{decimal_point}; 1899 my @nums = ( 1900 "3.14e+9", "3${radix}14e+9", "3.14e-9", "3${radix}14e-9", 1901 "-3.14e+9", "-3${radix}14e+9", "-3.14e-9", "-3${radix}14e-9", 1902 ); 1903 1904 if (! $is_utf8_locale) { 1905 use locale; 1906 for my $num (@nums) { 1907 push @f, $num 1908 unless sprintf("%g", $num) =~ /3.+14/; 1909 } 1910 } 1911 else { 1912 use locale ':not_characters'; 1913 for my $num (@nums) { 1914 push @f, $num 1915 unless sprintf("%g", $num) =~ /3.+14/; 1916 } 1917 } 1918 1919 report_result($Locale, $locales_test_number, @f == 0); 1920 if (@f) { 1921 print "# failed $locales_test_number locale '$Locale' numbers @f\n" 1922 } 1923 } 1924} 1925 1926my $final_locales_test_number = $locales_test_number; 1927 1928# Recount the errors. 1929 1930foreach $test_num ($first_locales_test_number..$final_locales_test_number) { 1931 if (%setlocale_failed) { 1932 print "not "; 1933 } 1934 elsif ($Problem{$test_num} || !defined $Okay{$test_num} || !@{$Okay{$test_num}}) { 1935 if (defined $not_necessarily_a_problem_test_number 1936 && $test_num == $not_necessarily_a_problem_test_number) 1937 { 1938 print "# The failure of test $not_necessarily_a_problem_test_number is not necessarily fatal.\n"; 1939 print "# It usually indicates a problem in the environment,\n"; 1940 print "# not in Perl itself.\n"; 1941 } 1942 if ($Okay{$test_num} && grep { $_ == $test_num } keys %problematical_tests) { 1943 no warnings 'experimental::autoderef'; 1944 # Round to nearest .1% 1945 my $percent_fail = (int(.5 + (1000 * scalar(keys $Problem{$test_num}) 1946 / scalar(@Locale)))) 1947 / 10; 1948 if (! $debug && $percent_fail < $acceptable_failure_percentage) 1949 { 1950 $test_names{$test_num} .= 'TODO'; 1951 print "# ", 100 - $percent_fail, "% of locales pass the following test, so it is likely that the failures\n"; 1952 print "# are errors in the locale definitions. The test is marked TODO, as the\n"; 1953 print "# problem is not likely to be Perl's\n"; 1954 } 1955 } 1956 print "#\n"; 1957 if ($debug) { 1958 print "# The code points that had this failure are given above. Look for lines\n"; 1959 print "# that match 'failed $test_num'\n"; 1960 } 1961 else { 1962 print "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 1963 print "# Then look at that output for lines that match 'failed $test_num'\n"; 1964 } 1965 print "not "; 1966 } 1967 print "ok $test_num"; 1968 if (defined $test_names{$test_num}) { 1969 # If TODO is in the test name, make it thus 1970 my $todo = $test_names{$test_num} =~ s/TODO\s*//; 1971 print " $test_names{$test_num}"; 1972 print " # TODO" if $todo; 1973 } 1974 print "\n"; 1975} 1976 1977$test_num = $final_locales_test_number; 1978 1979unless ( $^O =~ m!^(dragonfly|openbsd|bitrig|mirbsd)$! ) { 1980 # perl #115808 1981 use warnings; 1982 my $warned = 0; 1983 local $SIG{__WARN__} = sub { 1984 $warned = $_[0] =~ /uninitialized/; 1985 }; 1986 my $z = "y" . setlocale(&POSIX::LC_ALL, "xyzzy"); 1987 ok($warned, "variable set to setlocale(BAD LOCALE) is considered uninitialized"); 1988} 1989 1990# Test that tainting and case changing works on utf8 strings. These tests are 1991# placed last to avoid disturbing the hard-coded test numbers that existed at 1992# the time these were added above this in this file. 1993# This also tests that locale overrides unicode_strings in the same scope for 1994# non-utf8 strings. 1995setlocale(&POSIX::LC_ALL, "C"); 1996{ 1997 use locale; 1998 use feature 'unicode_strings'; 1999 2000 foreach my $function ("uc", "ucfirst", "lc", "lcfirst", "fc") { 2001 my @list; # List of code points to test for $function 2002 2003 # Used to calculate the changed case for ASCII characters by using the 2004 # ord, instead of using one of the functions under test. 2005 my $ascii_case_change_delta; 2006 my $above_latin1_case_change_delta; # Same for the specific ords > 255 2007 # that we use 2008 2009 # We test an ASCII character, which should change case; 2010 # a Latin1 character, which shouldn't change case under this C locale, 2011 # an above-Latin1 character that when the case is changed would cross 2012 # the 255/256 boundary, so doesn't change case 2013 # (the \x{149} is one of these, but changes into 2 characters, the 2014 # first one of which doesn't cross the boundary. 2015 # the final one in each list is an above-Latin1 character whose case 2016 # does change. The code below uses its position in its list as a 2017 # marker to indicate that it, unlike the other code points above 2018 # ASCII, has a successful case change 2019 # 2020 # All casing operations under locale (but not :not_characters) should 2021 # taint 2022 if ($function =~ /^u/) { 2023 @list = ("", "a", "\xe0", "\xff", "\x{fb00}", "\x{149}", "\x{101}"); 2024 $ascii_case_change_delta = -32; 2025 $above_latin1_case_change_delta = -1; 2026 } 2027 else { 2028 @list = ("", "A", "\xC0", "\x{17F}", "\x{100}"); 2029 $ascii_case_change_delta = +32; 2030 $above_latin1_case_change_delta = +1; 2031 } 2032 foreach my $is_utf8_locale (0 .. 1) { 2033 foreach my $j (0 .. $#list) { 2034 my $char = $list[$j]; 2035 2036 for my $encoded_in_utf8 (0 .. 1) { 2037 my $should_be; 2038 my $changed; 2039 if (! $is_utf8_locale) { 2040 $should_be = ($j == $#list) 2041 ? chr(ord($char) + $above_latin1_case_change_delta) 2042 : (length $char == 0 || ord($char) > 127) 2043 ? $char 2044 : chr(ord($char) + $ascii_case_change_delta); 2045 2046 # This monstrosity is in order to avoid using an eval, 2047 # which might perturb the results 2048 $changed = ($function eq "uc") 2049 ? uc($char) 2050 : ($function eq "ucfirst") 2051 ? ucfirst($char) 2052 : ($function eq "lc") 2053 ? lc($char) 2054 : ($function eq "lcfirst") 2055 ? lcfirst($char) 2056 : ($function eq "fc") 2057 ? fc($char) 2058 : die("Unexpected function \"$function\""); 2059 } 2060 else { 2061 { 2062 no locale; 2063 2064 # For utf8-locales the case changing functions 2065 # should work just like they do outside of locale. 2066 # Can use eval here because not testing it when 2067 # not in locale. 2068 $should_be = eval "$function('$char')"; 2069 die "Unexpected eval error $@ from 'eval \"$function('$char')\"'" if $@; 2070 2071 } 2072 use locale ':not_characters'; 2073 $changed = ($function eq "uc") 2074 ? uc($char) 2075 : ($function eq "ucfirst") 2076 ? ucfirst($char) 2077 : ($function eq "lc") 2078 ? lc($char) 2079 : ($function eq "lcfirst") 2080 ? lcfirst($char) 2081 : ($function eq "fc") 2082 ? fc($char) 2083 : die("Unexpected function \"$function\""); 2084 } 2085 ok($changed eq $should_be, 2086 "$function(\"$char\") in C locale " 2087 . (($is_utf8_locale) 2088 ? "(use locale ':not_characters'" 2089 : "(use locale") 2090 . (($encoded_in_utf8) 2091 ? "; encoded in utf8)" 2092 : "; not encoded in utf8)") 2093 . " should be \"$should_be\", got \"$changed\""); 2094 2095 # Tainting shouldn't happen for use locale :not_character 2096 # (a utf8 locale) 2097 (! $is_utf8_locale) 2098 ? check_taint($changed) 2099 : check_taint_not($changed); 2100 2101 # Use UTF-8 next time through the loop 2102 utf8::upgrade($char); 2103 } 2104 } 2105 } 2106 } 2107} 2108 2109# Give final advice. 2110 2111my $didwarn = 0; 2112 2113foreach ($first_locales_test_number..$final_locales_test_number) { 2114 if ($Problem{$_}) { 2115 my @f = sort keys %{ $Problem{$_} }; 2116 my $f = join(" ", @f); 2117 $f =~ s/(.{50,60}) /$1\n#\t/g; 2118 print 2119 "#\n", 2120 "# The locale ", (@f == 1 ? "definition" : "definitions"), "\n#\n", 2121 "#\t", $f, "\n#\n", 2122 "# on your system may have errors because the locale test $_\n", 2123 "# \"$test_names{$_}\"\n", 2124 "# failed in ", (@f == 1 ? "that locale" : "those locales"), 2125 ".\n"; 2126 print <<EOW; 2127# 2128# If your users are not using these locales you are safe for the moment, 2129# but please report this failure first to perlbug\@perl.com using the 2130# perlbug script (as described in the INSTALL file) so that the exact 2131# details of the failures can be sorted out first and then your operating 2132# system supplier can be alerted about these anomalies. 2133# 2134EOW 2135 $didwarn = 1; 2136 } 2137} 2138 2139# Tell which locales were okay and which were not. 2140 2141if ($didwarn) { 2142 my (@s, @F); 2143 2144 foreach my $l (@Locale) { 2145 my $p = 0; 2146 if ($setlocale_failed{$l}) { 2147 $p++; 2148 } 2149 else { 2150 foreach my $t 2151 ($first_locales_test_number..$final_locales_test_number) 2152 { 2153 $p++ if $Problem{$t}{$l}; 2154 } 2155 } 2156 push @s, $l if $p == 0; 2157 push @F, $l unless $p == 0; 2158 } 2159 2160 if (@s) { 2161 my $s = join(" ", @s); 2162 $s =~ s/(.{50,60}) /$1\n#\t/g; 2163 2164 print 2165 "# The following locales\n#\n", 2166 "#\t", $s, "\n#\n", 2167 "# tested okay.\n#\n", 2168 } else { 2169 print "# None of your locales were fully okay.\n"; 2170 } 2171 2172 if (@F) { 2173 my $F = join(" ", @F); 2174 $F =~ s/(.{50,60}) /$1\n#\t/g; 2175 2176 my $details = ""; 2177 unless ($debug) { 2178 $details = "# For more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=1.\n"; 2179 } 2180 elsif ($debug == 1) { 2181 $details = "# For even more details, rerun, with environment variable PERL_DEBUG_FULL_TEST=2.\n"; 2182 } 2183 2184 print 2185 "# The following locales\n#\n", 2186 "#\t", $F, "\n#\n", 2187 "# had problems.\n#\n", 2188 $details; 2189 } else { 2190 print "# None of your locales were broken.\n"; 2191 } 2192} 2193 2194print "1..$test_num\n"; 2195 2196# eof 2197