1use warnings;
2use strict;
3
4BEGIN {
5    chdir 't' if -d 't';
6    @INC = '../lib';
7    require './test.pl';
8}
9
10plan(20736);    # Determined by experimentation
11
12# In this section, test the upper/lower/title case mappings for all characters
13# 0-255.
14
15# First compute the case mappings without resorting to the functions we're
16# testing.
17
18# Initialize the arrays so each $i maps to itself.
19my @posix_to_upper;
20for my $i (0 .. 255) {
21    $posix_to_upper[$i] = chr($i);
22}
23my @posix_to_lower
24= my @posix_to_title
25= my @latin1_to_upper
26= my @latin1_to_lower
27= my @latin1_to_title
28= @posix_to_upper;
29
30# Override the elements in the to_lower arrays that have different standard
31# lower case mappings.  (standard meaning they are 32 numbers apart)
32for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
33    my $upper_ord = utf8::unicode_to_native $i;
34    my $lower_ord = utf8::unicode_to_native($i + 32);
35
36    $latin1_to_lower[$upper_ord] = chr($lower_ord);
37
38    next if $i > 127;
39
40    $posix_to_lower[$upper_ord] = chr($lower_ord);
41}
42
43# Same for upper and title
44for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
45    my $lower_ord = utf8::unicode_to_native $i;
46    my $upper_ord = utf8::unicode_to_native($i - 32);
47
48    $latin1_to_upper[$lower_ord] = chr($upper_ord);
49    $latin1_to_title[$lower_ord] = chr($upper_ord);
50
51    next if $i > 127;
52
53    $posix_to_upper[$lower_ord] = chr($upper_ord);
54    $posix_to_title[$lower_ord] = chr($upper_ord);
55}
56
57# Override the abnormal cases.
58$latin1_to_upper[utf8::unicode_to_native 0xB5] = chr(0x39C);
59$latin1_to_title[utf8::unicode_to_native 0xB5] = chr(0x39C);
60$latin1_to_upper[utf8::unicode_to_native 0xDF] = 'SS';
61$latin1_to_title[utf8::unicode_to_native 0xDF] = 'Ss';
62$latin1_to_upper[utf8::unicode_to_native 0xFF] = chr(0x178);
63$latin1_to_title[utf8::unicode_to_native 0xFF] = chr(0x178);
64
65my $repeat = 25;    # Length to make strings.
66
67# Create hashes of strings in several ranges, both for uc and lc.
68my %posix;
69$posix{'uc'} = 'A' x $repeat;
70$posix{'lc'} = 'a' x $repeat ;
71
72my %cyrillic;
73$cyrillic{'uc'} = chr(0x42F) x $repeat;
74$cyrillic{'lc'} = chr(0x44F) x $repeat;
75
76my %latin1;
77$latin1{'uc'} = chr(utf8::unicode_to_native 0xD8) x $repeat;
78$latin1{'lc'} = chr(utf8::unicode_to_native 0xF8) x $repeat;
79
80my %empty;
81$empty{'lc'} = $empty{'uc'} = "";
82
83# Loop so prefix each character being tested with nothing, and the various
84# strings; then loop for suffixes of those strings as well.
85for my  $prefix (\%empty, \%posix, \%cyrillic, \%latin1) {
86    for my  $suffix (\%empty, \%posix, \%cyrillic, \%latin1) {
87        for my $i (0 .. 255) {  # For each possible posix or latin1 character
88            my $cp = sprintf "U+%04X", $i;
89
90            # First try using latin1 (Unicode) semantics.
91            use feature "unicode_strings";
92
93            my $phrase = 'in uni8bit';
94            my $char = chr($i);
95            my $pre_lc = $prefix->{'lc'};
96            my $pre_uc = $prefix->{'uc'};
97            my $post_lc = $suffix->{'lc'};
98            my $post_uc = $suffix->{'uc'};
99            my $to_upper = $pre_lc . $char . $post_lc;
100            my $expected_upper = $pre_uc . $latin1_to_upper[$i] . $post_uc;
101            my $to_lower = $pre_uc . $char . $post_uc;
102            my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc;
103
104            is (uc($to_upper), $expected_upper,
105                display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
106            is (lc($to_lower), $expected_lower,
107                display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
108
109            if ($pre_uc eq "") {    # Title case if null prefix.
110                my $expected_title = $latin1_to_title[$i] . $post_lc;
111                is (ucfirst($to_upper), $expected_title,
112                    display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
113                my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc;
114                is (lcfirst($to_lower), $expected_lcfirst,
115                    display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
116            }
117
118            # Then try with posix semantics.
119            no feature "unicode_strings";
120            $phrase = 'no uni8bit';
121
122            # These don't contribute anything in this case.
123            next if $suffix == \%cyrillic;
124            next if $suffix == \%latin1;
125            next if $prefix == \%cyrillic;
126            next if $prefix == \%latin1;
127
128            $expected_upper = $pre_uc . $posix_to_upper[$i] . $post_uc;
129            $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc;
130
131            is (uc($to_upper), $expected_upper,
132                display("$cp: $phrase: Verify uc($to_upper) eq $expected_upper"));
133            is (lc($to_lower), $expected_lower,
134                display("$cp: $phrase: Verify lc($to_lower) eq $expected_lower"));
135
136            if ($pre_uc eq "") {
137                my $expected_title = $posix_to_title[$i] . $post_lc;
138                is (ucfirst($to_upper), $expected_title,
139                    display("$cp: $phrase: Verify ucfirst($to_upper) eq $expected_title"));
140                my $expected_lcfirst = $posix_to_lower[$i] . $post_uc;
141                is (lcfirst($to_lower), $expected_lcfirst,
142                    display("$cp: $phrase: Verify lcfirst($to_lower) eq $expected_lcfirst"));
143            }
144        }
145    }
146}
147
148# In this section test that \w, \s, and \b (and complements) work correctly.
149# These are the only character classes affected by this pragma.  Above ASCII
150# range Latin-1 characters are in \w and \s iff the pragma is on.
151
152# Construct the expected full Latin1 values without using anything we're
153# testing.  All these were determined manually by looking in the manual.
154# Boolean: is w[$i] a \w character?
155my @w = (0) x 256;
156for my $i ( 0x30 .. 0x39,   # 0-9
157            0x41 .. 0x5a,   # A-Z
158            0x61 .. 0x7a,   # a-z
159            0x5F,           # _
160            0xAA,           # FEMININE ORDINAL INDICATOR
161            0xB5,           # MICRO SIGN
162            0xBA,           # MASCULINE ORDINAL INDICATOR
163            0xC0 .. 0xD6,   # various
164            0xD8 .. 0xF6,   # various
165            0xF8 .. 0xFF,   # various
166        )
167{
168    $w[utf8::unicode_to_native $i] = 1;
169}
170
171# Boolean: is s[$i] a \s character?
172my @s = (0) x 256;
173$s[utf8::unicode_to_native 0x09] = 1;   # Tab
174$s[utf8::unicode_to_native 0x0A] = 1;   # LF
175$s[utf8::unicode_to_native 0x0B] = 1;   # VT
176$s[utf8::unicode_to_native 0x0C] = 1;   # FF
177$s[utf8::unicode_to_native 0x0D] = 1;   # CR
178$s[utf8::unicode_to_native 0x20] = 1;   # SPACE
179$s[utf8::unicode_to_native 0x85] = 1;   # NEL
180$s[utf8::unicode_to_native 0xA0] = 1;   # NO BREAK SPACE
181
182for my $i (0 .. 255) {
183    my $char = chr($i);
184    my $hex_i = sprintf "%02X", $i;
185    foreach my $which (\@s, \@w) {
186        my $basic_name;
187        if ($which == \@s) {
188            $basic_name = 's';
189        } else {
190            $basic_name = 'w'
191        }
192
193        # Test \w \W \s \S
194        foreach my $complement (0, 1) {
195            my $name = '\\' . (($complement) ? uc($basic_name) : $basic_name);
196
197            # in and out of [...]
198            foreach my $charclass (0, 1) {
199
200                # And like [^...] or just plain [...]
201                foreach my $complement_class (0, 1) {
202                    next if ! $charclass && $complement_class;
203
204                    # Start with the boolean as to if the character is in the
205                    # class, and then complement as needed.
206                    my $expect_success = $which->[$i];
207                    $expect_success = ! $expect_success if $complement;
208                    $expect_success = ! $expect_success if $complement_class;
209
210                    my $test = $name;
211                    $test = "^$test" if $complement_class;
212                    $test = "[$test]" if $charclass;
213                    $test = "^$test\$";
214
215                    use feature 'unicode_strings';
216                    my $prefix = "in uni8bit; Verify chr(0x$hex_i)";
217                    if ($expect_success) {
218                        like($char, qr/$test/, display("$prefix =~ qr/$test/"));
219                    } else {
220                        unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
221                    }
222
223                    no feature 'unicode_strings';
224                    $prefix = "no uni8bit; Verify chr(0x$hex_i)";
225
226                    # With the legacy, nothing above 128 should be in the
227                    # class
228                    if (utf8::native_to_unicode($i) >= 128) {
229                        $expect_success = 0;
230                        $expect_success = ! $expect_success if $complement;
231                        $expect_success = ! $expect_success if $complement_class;
232                    }
233                    if ($expect_success) {
234                        like($char, qr/$test/, display("$prefix =~ qr/$test/"));
235                    } else {
236                        unlike($char, qr/$test/, display("$prefix !~ qr/$test/"));
237                    }
238                }
239            }
240        }
241    }
242
243    # Similarly for \b and \B.
244    foreach my $complement (0, 1) {
245        my $name = '\\' . (($complement) ? 'B' : 'b');
246        my $expect_success = ! $w[$i];  # \b is complement of \w
247        $expect_success = ! $expect_success if $complement;
248
249        my $string = "a$char";
250        my $test = "(^a$name\\x{$hex_i}\$)";
251
252        use feature 'unicode_strings';
253        my $prefix = "in uni8bit; Verify $string";
254        if ($expect_success) {
255            like($string, qr/$test/, display("$prefix =~ qr/$test/"));
256        } else {
257            unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));
258        }
259
260        no feature 'unicode_strings';
261        $prefix = "no uni8bit; Verify $string";
262        if (utf8::native_to_unicode($i) >= 128) {
263            $expect_success = 1;
264            $expect_success = ! $expect_success if $complement;
265        }
266        if ($expect_success) {
267            like($string, qr/$test/, display("$prefix =~ qr/$test/"));
268            like($string, qr/$test/, display("$prefix =~ qr/$test/"));
269        } else {
270            unlike($string, qr/$test/, display("$prefix !~ qr/$test/"));
271        }
272    }
273}
274