1#!/usr/bin/perl 2 3use strict; 4use warnings; 5use charnames ":full"; 6 7use Test::More; 8$| = 1; 9 10BEGIN { 11 $] < 5.008002 and 12 plan skip_all => "UTF8 tests useless in this ancient perl version"; 13 } 14 15my @tests; 16my $ebcdic = ord ("A") == 0xC1; 17 18BEGIN { 19 delete $ENV{PERLIO}; 20 21 my $pu = $ENV{PERL_UNICODE}; 22 $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16)); 23 24 my $euro_ch = "\x{20ac}"; 25 26 utf8::encode (my $bytes = $euro_ch); 27 utf8::downgrade (my $bytes_dn = $bytes); 28 utf8::upgrade (my $bytes_up = $bytes); 29 30 @tests = ( 31 # $test $perlio $data, $encoding $expect_w 32 # ---------------------------- ------------------- ----------- --------- ---------- 33 [ "Unicode default", "", $euro_ch, "utf8", $pu ? "no warn" : "warn" ], 34 [ "Unicode binmode", "[binmode]", $euro_ch, "utf8", "warn", ], 35 [ "Unicode :utf8", ":utf8", $euro_ch, "utf8", "no warn", ], 36 [ "Unicode :encoding(utf8)", ":encoding(utf8)", $euro_ch, "utf8", "no warn", ], 37 [ "Unicode :encoding(UTF-8)", ":encoding(UTF-8)", $euro_ch, "utf8", "no warn", ], 38 39 [ "bytes dn default", "", $bytes_dn, "[none]", "no warn", ], 40 [ "bytes dn binmode", "[binmode]", $bytes_dn, "[none]", "no warn", ], 41 [ "bytes dn :utf8", ":utf8", $bytes_dn, "utf8", "no warn", ], 42 [ "bytes dn :encoding(utf8)", ":encoding(utf8)", $bytes_dn, "utf8", "no warn", ], 43 [ "bytes dn :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_dn, "utf8", "no warn", ], 44 45 [ "bytes up default", "", $bytes_up, "[none]", "no warn", ], 46 [ "bytes up binmode", "[binmode]", $bytes_up, "[none]", "no warn", ], 47 [ "bytes up :utf8", ":utf8", $bytes_up, "utf8", "no warn", ], 48 [ "bytes up :encoding(utf8)", ":encoding(utf8)", $bytes_up, "utf8", "no warn", ], 49 [ "bytes up :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_up, "utf8", "no warn", ], 50 ); 51 52 my $builder = Test::More->builder; 53 binmode $builder->output, ":encoding(utf8)"; 54 binmode $builder->failure_output, ":encoding(utf8)"; 55 binmode $builder->todo_output, ":encoding(utf8)"; 56 57 plan tests => 11 + 6 * @tests + 4 * 22 + 6 + 10 + 2; 58 } 59 60BEGIN { 61 use_ok "Text::CSV_XS", ("csv"); 62 plan skip_all => "Cannot load Text::CSV_XS" if $@; 63 require "./t/util.pl"; 64 } 65 66sub hexify { join " ", map { sprintf "%02x", $_ } unpack "C*", @_ } 67sub warned { length ($_[0]) ? "warn" : "no warn" } 68 69my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); 70 71for (@tests) { 72 my ($test, $perlio, $data, $enc, $expect_w) = @$_; 73 74 my $expect = qq{"$data"}; 75 $enc eq "utf8" and utf8::encode ($expect); 76 77 my ($p_out, $p_fh) = (""); 78 my ($c_out, $c_fh) = (""); 79 80 if ($perlio eq "[binmode]") { 81 open $p_fh, ">", \$p_out or die "IO: $!\n"; binmode $p_fh; 82 open $c_fh, ">", \$c_out or die "IO: $!\n"; binmode $c_fh; 83 } 84 else { 85 open $p_fh, ">$perlio", \$p_out or die "IO: $!\n"; 86 open $c_fh, ">$perlio", \$c_out or die "IO: $!\n"; 87 } 88 89 my $p_warn = ""; 90 { local $SIG{__WARN__} = sub { $p_warn .= join "", @_ }; 91 ok ((print $p_fh qq{"$data"}), "$test perl print"); 92 close $p_fh; 93 } 94 95 my $c_warn = ""; 96 { local $SIG{__WARN__} = sub { $c_warn .= join "", @_ }; 97 ok ($csv->print ($c_fh, [ $data ]), "$test csv print"); 98 close $c_fh; 99 } 100 101 is (hexify ($c_out), hexify ($p_out), "$test against Perl"); 102 is (hexify ($c_out), hexify ($expect), "$test against expected"); 103 104 is (warned ($c_warn), warned ($p_warn), "$test against Perl warning"); 105 is (warned ($c_warn), $expect_w, "$test against expected warning"); 106 } 107 108# Test automatic upgrades for valid UTF-8 109{ my $blob = pack "C*", 0..255; $blob =~ tr/",//d; 110 # perl-5.10.x has buggy SvCUR () on blob 111 $] >= 5.010000 && $] <= 5.012001 and $blob =~ tr/\0//d; 112 my $b1 = "\x{b6}"; # PILCROW SIGN in ISO-8859-1 113 my $b2 = $ebcdic # ARABIC COMMA in UTF-8 114 ? "\x{b8}\x{57}\x{53}" 115 : "\x{d8}\x{8c}"; 116 my @data = ( 117 qq[1,aap,3], # No diac 118 qq[1,a${b1}p,3], # Single-byte 119 qq[1,a${b2}p,3], # Multi-byte 120 qq[1,"$blob",3], # Binary shit 121 ) x 2; 122 my $data = join "\n" => @data; 123 my @expect = ("aap", "a\266p", "a\x{060c}p", $blob) x 2; 124 125 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); 126 127 foreach my $bc (undef, 3) { 128 my @read; 129 130 # Using getline () 131 open my $fh, "<", \$data or die "IO: $!\n"; binmode $fh; 132 $bc and $csv->bind_columns (\my ($f1, $f2, $f3)); 133 is (scalar $csv->bind_columns, $bc, "Columns_bound?"); 134 while (my $row = $csv->getline ($fh)) { 135 push @read, $bc ? $f2 : $row->[1]; 136 } 137 close $fh; 138 is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns")); 139 is_deeply ([ map { utf8::is_utf8 ($_) } @read ], 140 [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags"); 141 142 # Using parse () 143 @read = map { 144 $csv->parse ($_); 145 $bc ? $f2 : ($csv->fields)[1]; 146 } @data; 147 is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns")); 148 is_deeply ([ map { utf8::is_utf8 ($_) } @read ], 149 [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags"); 150 } 151 } 152 153my $sep = "\x{2665}";#"\N{INVISIBLE SEPARATOR}"; 154my $quo = "\x{2661}";#"\N{FULLWIDTH QUOTATION MARK}"; 155foreach my $new (0, 1, 2, 3) { 156 my %attr = ( 157 binary => 1, 158 always_quote => 1, 159 );; 160 $new & 1 and $attr{sep} = $sep; 161 $new & 2 and $attr{quote} = $quo; 162 my $csv = Text::CSV_XS->new (\%attr); 163 164 my $s = $attr{sep} || ','; 165 my $q = $attr{quote} || '"'; 166 167 note ("Test SEP: '$s', QUO: '$q'") if $Test::More::VERSION > 0.81; 168 is ($csv->sep, $s, "sep"); 169 is ($csv->quote, $q, "quote"); 170 171 foreach my $data ( 172 [ 1, 2 ], 173 [ "\N{EURO SIGN}", "\N{SNOWMAN}" ], 174# [ $sep, $quo ], 175 ) { 176 177 my $exp8 = join $s => map { qq{$q$_$q} } @$data; 178 utf8::encode (my $expb = $exp8); 179 my @exp = ($expb, $exp8); 180 181 ok ($csv->combine (@$data), "combine"); 182 my $x = $csv->string; 183 is ($csv->string, $exp8, "string"); 184 185 open my $fh, ">:encoding(utf8)", \(my $out = "") or die "IO: $!\n"; 186 ok ($csv->print ($fh, $data), "print with UTF8 sep"); 187 close $fh; 188 189 is ($out, $expb, "output"); 190 191 ok ($csv->parse ($expb), "parse"); 192 is_deeply ([ $csv->fields ], $data, "fields"); 193 194 open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh; 195 is_deeply ($csv->getline ($fh), $data, "data from getline ()"); 196 close $fh; 197 198 $expb =~ tr/"//d; 199 200 ok ($csv->parse ($expb), "parse"); 201 is_deeply ([ $csv->fields ], $data, "fields"); 202 203 open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh; 204 is_deeply ($csv->getline ($fh), $data, "data from getline ()"); 205 close $fh; 206 } 207 } 208 209{ my $h = "\N{WHITE HEART SUIT}"; 210 my $H = "\N{BLACK HEART SUIT}"; 211 my $str = "${h}I$h$H${h}L\"${h}ve$h$H${h}Perl$h"; 212 utf8::encode ($str); 213 ok (my $aoa = csv (in => \$str, sep => $H, quote => $h), "Hearts"); 214 is_deeply ($aoa, [[ "I", "L${h}ve", "Perl"]], "I $H Perl"); 215 216 ok (my $csv = Text::CSV_XS->new ({ 217 binary => 1, sep => $H, quote => $h }), "new hearts"); 218 ok ($csv->combine (@{$aoa->[0]}), "combine"); 219 ok ($str = $csv->string, "string"); 220 utf8::decode ($str); 221 is ($str, "I${H}${h}L\"${h}ve${h}${H}Perl", "Correct quotation"); 222 } 223 224# Tests pulled from tests in Raku 225{ my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); 226 my $h = pack "C*", 224, 34, 204, 182; 227 ok ($csv->combine (1, $h, 3)); 228 ok (my $s = $csv->string, "String"); 229 my $b = $h; 230 utf8::encode ($b); 231 ok ($csv->combine (1, $b, 3)); 232 ok ($s = $csv->string, "String"); 233 } 234 235{ my $h = qq{\x{10fffd}xE0"}; #" 236 my $b = $h; 237 ok ($csv->combine (1, $b, 3)); 238 ok (my $s = $csv->string, "String"); 239 $b = $h; 240 utf8::encode ($b); 241 ok ($csv->combine (1, $b, 3)); 242 ok ($s = $csv->string, "String"); 243 $b = $h; 244 utf8::encode ($b); 245 ok ($csv->combine (1, $b, 3)); 246 ok ($s = $csv->string, "String"); 247 } 248 249{ my $file = "Eric,\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC\n"; 250 utf8::encode ($file); 251 open my $fh, "<", \$file or die $!; 252 253 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 2 }); 254 is_deeply ( 255 [ $csv->header ($fh) ], 256 [ "eric", "\N{LATIN SMALL LETTER E WITH ACUTE}ric" ], 257 "Lowercase unicode header"); 258 } 259 260{ my $file = "Eric,\N{LATIN SMALL LETTER E WITH ACUTE}ric\n"; 261 utf8::encode ($file); 262 open my $fh, "<", \$file or die $!; 263 264 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 2 }); 265 is_deeply ( 266 [ $csv->header ($fh, { munge => "uc" }) ], 267 [ "ERIC", "\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC" ], 268 "Uppercase unicode header"); 269 } 270