1#!perl -w 2 3BEGIN { 4 chdir 't'; 5 @INC = '../lib'; 6 require './test.pl'; 7} 8 9plan(tests => 215); 10 11package UTF8Toggle; 12use strict; 13 14use overload '""' => 'stringify', fallback => 1; 15 16sub new { 17 my $class = shift; 18 my $value = shift; 19 my $state = shift||0; 20 return bless [$value, $state], $class; 21} 22 23sub stringify { 24 my $self = shift; 25 $self->[1] = ! $self->[1]; 26 if ($self->[1]) { 27 utf8::downgrade($self->[0]); 28 } else { 29 utf8::upgrade($self->[0]); 30 } 31 $self->[0]; 32} 33 34package main; 35 36# These tests are based on characters 128-255 not having latin1, and hence 37# Unicode, semantics 38# no feature "unicode_strings"; 39 40# Bug 34297 41foreach my $t ("ASCII", "B\366se") { 42 my $length = length $t; 43 44 my $u = UTF8Toggle->new($t); 45 is (length $u, $length, "length of '$t'"); 46 is (length $u, $length, "length of '$t'"); 47 is (length $u, $length, "length of '$t'"); 48 is (length $u, $length, "length of '$t'"); 49} 50 51my $u = UTF8Toggle->new("\311"); 52my $lc = lc $u; 53is (length $lc, 1); 54is ($lc, "\311", "E acute -> e acute"); 55$lc = lc $u; 56is (length $lc, 1); 57is ($lc, "\351", "E acute -> e acute"); 58$lc = lc $u; 59is (length $lc, 1); 60is ($lc, "\311", "E acute -> e acute"); 61 62$u = UTF8Toggle->new("\351"); 63my $uc = uc $u; 64is (length $uc, 1); 65is ($uc, "\351", "e acute -> E acute"); 66$uc = uc $u; 67is (length $uc, 1); 68is ($uc, "\311", "e acute -> E acute"); 69$uc = uc $u; 70is (length $uc, 1); 71is ($uc, "\351", "e acute -> E acute"); 72 73$u = UTF8Toggle->new("\311"); 74$lc = lcfirst $u; 75is (length $lc, 1); 76is ($lc, "\311", "E acute -> e acute"); 77$lc = lcfirst $u; 78is (length $lc, 1); 79is ($lc, "\351", "E acute -> e acute"); 80$lc = lcfirst $u; 81is (length $lc, 1); 82is ($lc, "\311", "E acute -> e acute"); 83 84$u = UTF8Toggle->new("\351"); 85$uc = ucfirst $u; 86is (length $uc, 1); 87is ($uc, "\351", "e acute -> E acute"); 88$uc = ucfirst $u; 89is (length $uc, 1); 90is ($uc, "\311", "e acute -> E acute"); 91$uc = ucfirst $u; 92is (length $uc, 1); 93is ($uc, "\351", "e acute -> E acute"); 94 95my $have_setlocale = 0; 96eval { 97 require POSIX; 98 import POSIX ':locale_h'; 99 $have_setlocale++; 100}; 101 102SKIP: { 103 if (!$have_setlocale) { 104 skip "No setlocale", 24; 105 } elsif (!setlocale(&POSIX::LC_ALL, "en_GB.ISO8859-1")) { 106 skip "Could not setlocale to en_GB.ISO8859-1", 24; 107 } elsif ($^O eq 'dec_osf' || $^O eq 'VMS') { 108 skip "$^O has broken en_GB.ISO8859-1 locale", 24; 109 } else { 110 use locale; 111 my $u = UTF8Toggle->new("\311"); 112 my $lc = lc $u; 113 is (length $lc, 1); 114 is ($lc, "\351", "E acute -> e acute"); 115 $lc = lc $u; 116 is (length $lc, 1); 117 is ($lc, "\351", "E acute -> e acute"); 118 $lc = lc $u; 119 is (length $lc, 1); 120 is ($lc, "\351", "E acute -> e acute"); 121 122 $u = UTF8Toggle->new("\351"); 123 my $uc = uc $u; 124 is (length $uc, 1); 125 is ($uc, "\311", "e acute -> E acute"); 126 $uc = uc $u; 127 is (length $uc, 1); 128 is ($uc, "\311", "e acute -> E acute"); 129 $uc = uc $u; 130 is (length $uc, 1); 131 is ($uc, "\311", "e acute -> E acute"); 132 133 $u = UTF8Toggle->new("\311"); 134 $lc = lcfirst $u; 135 is (length $lc, 1); 136 is ($lc, "\351", "E acute -> e acute"); 137 $lc = lcfirst $u; 138 is (length $lc, 1); 139 is ($lc, "\351", "E acute -> e acute"); 140 $lc = lcfirst $u; 141 is (length $lc, 1); 142 is ($lc, "\351", "E acute -> e acute"); 143 144 $u = UTF8Toggle->new("\351"); 145 $uc = ucfirst $u; 146 is (length $uc, 1); 147 is ($uc, "\311", "e acute -> E acute"); 148 $uc = ucfirst $u; 149 is (length $uc, 1); 150 is ($uc, "\311", "e acute -> E acute"); 151 $uc = ucfirst $u; 152 is (length $uc, 1); 153 is ($uc, "\311", "e acute -> E acute"); 154 } 155} 156 157my $tmpfile = tempfile(); 158 159foreach my $operator ('print', 'syswrite', 'syswrite len', 'syswrite off', 160 'syswrite len off') { 161 foreach my $layer ('', ':utf8') { 162 open my $fh, "+>$layer", $tmpfile or die $!; 163 my $pad = $operator =~ /\boff\b/ ? "\243" : ""; 164 my $trail = $operator =~ /\blen\b/ ? "!" : ""; 165 my $u = UTF8Toggle->new("$pad\311\n$trail"); 166 my $l = UTF8Toggle->new("$pad\351\n$trail", 1); 167 if ($operator eq 'print') { 168 no warnings 'utf8'; 169 print $fh $u; 170 print $fh $u; 171 print $fh $u; 172 print $fh $l; 173 print $fh $l; 174 print $fh $l; 175 } elsif ($operator eq 'syswrite') { 176 syswrite $fh, $u; 177 syswrite $fh, $u; 178 syswrite $fh, $u; 179 syswrite $fh, $l; 180 syswrite $fh, $l; 181 syswrite $fh, $l; 182 } elsif ($operator eq 'syswrite len') { 183 syswrite $fh, $u, 2; 184 syswrite $fh, $u, 2; 185 syswrite $fh, $u, 2; 186 syswrite $fh, $l, 2; 187 syswrite $fh, $l, 2; 188 syswrite $fh, $l, 2; 189 } elsif ($operator eq 'syswrite off' 190 || $operator eq 'syswrite len off') { 191 syswrite $fh, $u, 2, 1; 192 syswrite $fh, $u, 2, 1; 193 syswrite $fh, $u, 2, 1; 194 syswrite $fh, $l, 2, 1; 195 syswrite $fh, $l, 2, 1; 196 syswrite $fh, $l, 2, 1; 197 } else { 198 die $operator; 199 } 200 201 seek $fh, 0, 0 or die $!; 202 my $line; 203 chomp ($line = <$fh>); 204 is ($line, "\311", "$operator $layer"); 205 chomp ($line = <$fh>); 206 is ($line, "\311", "$operator $layer"); 207 chomp ($line = <$fh>); 208 is ($line, "\311", "$operator $layer"); 209 chomp ($line = <$fh>); 210 is ($line, "\351", "$operator $layer"); 211 chomp ($line = <$fh>); 212 is ($line, "\351", "$operator $layer"); 213 chomp ($line = <$fh>); 214 is ($line, "\351", "$operator $layer"); 215 216 close $fh or die $!; 217 } 218} 219 220my $little = "\243\243"; 221my $big = " \243 $little ! $little ! $little \243 "; 222my $right = rindex $big, $little; 223my $right1 = rindex $big, $little, 11; 224my $left = index $big, $little; 225my $left1 = index $big, $little, 4; 226 227cmp_ok ($right, ">", $right1, "Sanity check our rindex tests"); 228cmp_ok ($left, "<", $left1, "Sanity check our index tests"); 229 230foreach my $b ($big, UTF8Toggle->new($big)) { 231 foreach my $l ($little, UTF8Toggle->new($little), 232 UTF8Toggle->new($little, 1)) { 233 is (rindex ($b, $l), $right, "rindex"); 234 is (rindex ($b, $l), $right, "rindex"); 235 is (rindex ($b, $l), $right, "rindex"); 236 237 is (rindex ($b, $l, 11), $right1, "rindex 11"); 238 is (rindex ($b, $l, 11), $right1, "rindex 11"); 239 is (rindex ($b, $l, 11), $right1, "rindex 11"); 240 241 is (index ($b, $l), $left, "index"); 242 is (index ($b, $l), $left, "index"); 243 is (index ($b, $l), $left, "index"); 244 245 is (index ($b, $l, 4), $left1, "index 4"); 246 is (index ($b, $l, 4), $left1, "index 4"); 247 is (index ($b, $l, 4), $left1, "index 4"); 248 } 249} 250 251my $bits = "\311"; 252foreach my $pieces ($bits, UTF8Toggle->new($bits)) { 253 like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); 254 like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); 255 like ($bits ^ $pieces, qr/\A\0+\z/, "something xor itself is zeros"); 256 257 like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); 258 like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); 259 like ($pieces ^ $bits, qr/\A\0+\z/, "something xor itself is zeros"); 260} 261 262foreach my $value ("\243", UTF8Toggle->new("\243")) { 263 is (pack ("A/A", $value), pack ("A/A", "\243"), 264 "pack copes with overloading"); 265 is (pack ("A/A", $value), pack ("A/A", "\243")); 266 is (pack ("A/A", $value), pack ("A/A", "\243")); 267} 268 269foreach my $value ("\243", UTF8Toggle->new("\243")) { 270 my $v; 271 $v = substr $value, 0, 1; 272 is ($v, "\243"); 273 $v = substr $value, 0, 1; 274 is ($v, "\243"); 275 $v = substr $value, 0, 1; 276 is ($v, "\243"); 277} 278 279{ 280 package RT69422; 281 use overload '""' => sub { $_[0]->{data} } 282} 283 284{ 285 my $text = bless { data => "\x{3075}" }, 'RT69422'; 286 my $p = substr $text, 0, 1; 287 is ($p, "\x{3075}"); 288} 289