1use warnings; 2use strict; 3 4BEGIN { 5 chdir 't' if -d 't'; 6 @INC = '../lib'; 7 require './test.pl'; 8} 9 10plan(20736); # Determined by experimentation 11 12# In this section, test the upper/lower/title case mappings for all characters 13# 0-255. 14 15# First compute the case mappings without resorting to the functions we're 16# testing. 17 18# Initialize the arrays so each $i maps to itself. 19my @posix_to_upper; 20for my $i (0 .. 255) { 21 $posix_to_upper[$i] = chr($i); 22} 23my @posix_to_lower 24= my @posix_to_title 25= my @latin1_to_upper 26= my @latin1_to_lower 27= my @latin1_to_title 28= @posix_to_upper; 29 30# Override the elements in the to_lower arrays that have different standard 31# lower case mappings. (standard meaning they are 32 numbers apart) 32for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) { 33 my $upper_ord = utf8::unicode_to_native $i; 34 my $lower_ord = utf8::unicode_to_native($i + 32); 35 36 $latin1_to_lower[$upper_ord] = chr($lower_ord); 37 38 next if $i > 127; 39 40 $posix_to_lower[$upper_ord] = chr($lower_ord); 41} 42 43# Same for upper and title 44for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) { 45 my $lower_ord = utf8::unicode_to_native $i; 46 my $upper_ord = utf8::unicode_to_native($i - 32); 47 48 $latin1_to_upper[$lower_ord] = chr($upper_ord); 49 $latin1_to_title[$lower_ord] = chr($upper_ord); 50 51 next if $i > 127; 52 53 $posix_to_upper[$lower_ord] = chr($upper_ord); 54 $posix_to_title[$lower_ord] = chr($upper_ord); 55} 56 57# Override the abnormal cases. 58$latin1_to_upper[utf8::unicode_to_native 0xB5] = chr(0x39C); 59$latin1_to_title[utf8::unicode_to_native 0xB5] = chr(0x39C); 60$latin1_to_upper[utf8::unicode_to_native 0xDF] = 'SS'; 61$latin1_to_title[utf8::unicode_to_native 0xDF] = 'Ss'; 62$latin1_to_upper[utf8::unicode_to_native 0xFF] = chr(0x178); 63$latin1_to_title[utf8::unicode_to_native 0xFF] = chr(0x178); 64 65my $repeat = 25; # Length to make strings. 66 67# Create hashes of strings in several ranges, both for uc and lc. 68my %posix; 69$posix{'uc'} = 'A' x $repeat; 70$posix{'lc'} = 'a' x $repeat ; 71 72my %cyrillic; 73$cyrillic{'uc'} = chr(0x42F) x $repeat; 74$cyrillic{'lc'} = chr(0x44F) x $repeat; 75 76my %latin1; 77$latin1{'uc'} = chr(utf8::unicode_to_native 0xD8) x $repeat; 78$latin1{'lc'} = chr(utf8::unicode_to_native 0xF8) x $repeat; 79 80my %empty; 81$empty{'lc'} = $empty{'uc'} = ""; 82 83# Loop so prefix each character being tested with nothing, and the various 84# strings; then loop for suffixes of those strings as well. 85for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) { 86 for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) { 87 for my $i (0 .. 255) { # For each possible posix or latin1 character 88 my $cp = sprintf "U+%04X", $i; 89 90 # First try using latin1 (Unicode) semantics. 91 use feature "unicode_strings"; 92 93 my $phrase = 'in uni8bit'; 94 my $char = chr($i); 95 my $pre_lc = $prefix->{'lc'}; 96 my $pre_uc = $prefix->{'uc'}; 97 my $post_lc = $suffix->{'lc'}; 98 my $post_uc = $suffix->{'uc'}; 99 my $to_upper = $pre_lc . $char . $post_lc; 100 my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc; 101 my $to_lower = $pre_uc . $char . $post_uc; 102 my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc; 103 104 is (uc($to_upper), $expected_upper, 105 display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper")); 106 is (lc($to_lower), $expected_lower, 107 display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower")); 108 109 if ($pre_uc eq "") { # Title case if null prefix. 110 my $expected_title = $latin1_to_title[$i] . $post_lc; 111 is (ucfirst($to_upper), $expected_title, 112 display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title")); 113 my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc; 114 is (lcfirst($to_lower), $expected_lcfirst, 115 display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst")); 116 } 117 118 # Then try with posix semantics. 119 no feature "unicode_strings"; 120 $phrase = 'no uni8bit'; 121 122 # These don't contribute anything in this case. 123 next if $suffix == \%cyrillic; 124 next if $suffix == \%latin1; 125 next if $prefix == \%cyrillic; 126 next if $prefix == \%latin1; 127 128 $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc; 129 $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc; 130 131 is (uc($to_upper), $expected_upper, 132 display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper")); 133 is (lc($to_lower), $expected_lower, 134 display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower")); 135 136 if ($pre_uc eq "") { 137 my $expected_title = $posix_to_title[$i] . $post_lc; 138 is (ucfirst($to_upper), $expected_title, 139 display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title")); 140 my $expected_lcfirst = $posix_to_lower[$i] . $post_uc; 141 is (lcfirst($to_lower), $expected_lcfirst, 142 display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst")); 143 } 144 } 145 } 146} 147 148# In this section test that \w, \s, and \b (and complements) work correctly. 149# These are the only character classes affected by this pragma. Above ASCII 150# range Latin-1 characters are in \w and \s iff the pragma is on. 151 152# Construct the expected full Latin1 values without using anything we're 153# testing. All these were determined manually by looking in the manual. 154# Boolean: is w[$i] a \w character? 155my @w = (0) x 256; 156for my $i ( 0x30 .. 0x39, # 0-9 157 0x41 .. 0x5a, # A-Z 158 0x61 .. 0x7a, # a-z 159 0x5F, # _ 160 0xAA, # FEMININE ORDINAL INDICATOR 161 0xB5, # MICRO SIGN 162 0xBA, # MASCULINE ORDINAL INDICATOR 163 0xC0 .. 0xD6, # various 164 0xD8 .. 0xF6, # various 165 0xF8 .. 0xFF, # various 166 ) 167{ 168 $w[utf8::unicode_to_native $i] = 1; 169} 170 171# Boolean: is s[$i] a \s character? 172my @s = (0) x 256; 173$s[utf8::unicode_to_native 0x09] = 1; # Tab 174$s[utf8::unicode_to_native 0x0A] = 1; # LF 175$s[utf8::unicode_to_native 0x0B] = 1; # VT 176$s[utf8::unicode_to_native 0x0C] = 1; # FF 177$s[utf8::unicode_to_native 0x0D] = 1; # CR 178$s[utf8::unicode_to_native 0x20] = 1; # SPACE 179$s[utf8::unicode_to_native 0x85] = 1; # NEL 180$s[utf8::unicode_to_native 0xA0] = 1; # NO BREAK SPACE 181 182for my $i (0 .. 255) { 183 my $char = chr($i); 184 my $hex_i = sprintf "%02X", $i; 185 foreach my $which (\@s, \@w) { 186 my $basic_name; 187 if ($which == \@s) { 188 $basic_name = 's'; 189 } else { 190 $basic_name = 'w' 191 } 192 193 # Test \w \W \s \S 194 foreach my $complement (0, 1) { 195 my $name = '\\' . (($complement) ? uc($basic_name) : $basic_name); 196 197 # in and out of [...] 198 foreach my $charclass (0, 1) { 199 200 # And like [^...] or just plain [...] 201 foreach my $complement_class (0, 1) { 202 next if ! $charclass && $complement_class; 203 204 # Start with the boolean as to if the character is in the 205 # class, and then complement as needed. 206 my $expect_success = $which->[$i]; 207 $expect_success = ! $expect_success if $complement; 208 $expect_success = ! $expect_success if $complement_class; 209 210 my $test = $name; 211 $test = "^$test" if $complement_class; 212 $test = "[$test]" if $charclass; 213 $test = "^$test\$"; 214 215 use feature 'unicode_strings'; 216 my $prefix = "in uni8bit; Verify chr(0x$hex_i)"; 217 if ($expect_success) { 218 like($char, qr/$test/, display("$prefix =~ qr/$test/")); 219 } else { 220 unlike($char, qr/$test/, display("$prefix !~ qr/$test/")); 221 } 222 223 no feature 'unicode_strings'; 224 $prefix = "no uni8bit; Verify chr(0x$hex_i)"; 225 226 # With the legacy, nothing above 128 should be in the 227 # class 228 if (utf8::native_to_unicode($i) >= 128) { 229 $expect_success = 0; 230 $expect_success = ! $expect_success if $complement; 231 $expect_success = ! $expect_success if $complement_class; 232 } 233 if ($expect_success) { 234 like($char, qr/$test/, display("$prefix =~ qr/$test/")); 235 } else { 236 unlike($char, qr/$test/, display("$prefix !~ qr/$test/")); 237 } 238 } 239 } 240 } 241 } 242 243 # Similarly for \b and \B. 244 foreach my $complement (0, 1) { 245 my $name = '\\' . (($complement) ? 'B' : 'b'); 246 my $expect_success = ! $w[$i]; # \b is complement of \w 247 $expect_success = ! $expect_success if $complement; 248 249 my $string = "a$char"; 250 my $test = "(^a$name\\x{$hex_i}\$)"; 251 252 use feature 'unicode_strings'; 253 my $prefix = "in uni8bit; Verify $string"; 254 if ($expect_success) { 255 like($string, qr/$test/, display("$prefix =~ qr/$test/")); 256 } else { 257 unlike($string, qr/$test/, display("$prefix !~ qr/$test/")); 258 } 259 260 no feature 'unicode_strings'; 261 $prefix = "no uni8bit; Verify $string"; 262 if (utf8::native_to_unicode($i) >= 128) { 263 $expect_success = 1; 264 $expect_success = ! $expect_success if $complement; 265 } 266 if ($expect_success) { 267 like($string, qr/$test/, display("$prefix =~ qr/$test/")); 268 like($string, qr/$test/, display("$prefix =~ qr/$test/")); 269 } else { 270 unlike($string, qr/$test/, display("$prefix !~ qr/$test/")); 271 } 272 } 273} 274