1#!/usr/bin/perl 2 3use strict; 4$^W = 1; 5 6use Test::More; 7 8my $ebcdic = ord ("A") == 0xC1; 9my $pu; 10BEGIN { 11 $pu = $ENV{PERL_UNICODE}; 12 $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16)); 13 14 if ($] < 5.008002) { 15 plan skip_all => "This test unit requires perl-5.8.2 or higher"; 16 } 17 else { 18 my $n = 1448; 19 $pu and $n -= 120; 20 plan tests => $n; 21 } 22 23 $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0; 24 25 use_ok "Text::CSV", "csv"; 26 # Encode up to and including 2.01 have an error in a regex: 27 # False [] range "\s-" in regex; marked by <-- HERE in m/\bkoi8[\s- <-- HERE _]*([ru])$/ 28 # in Encode::Alias. This however does not influence this test, as then *all* encodings 29 # are skipped as unsupported 30 require Encode; 31 require "./t/util.pl"; 32 } 33 34$| = 1; 35 36ok (my $csv = Text::CSV->new, "new for header tests"); 37is ($csv->sep_char, ",", "Sep = ,"); 38 39my $hdr_lc = [qw( bar foo )]; 40 41foreach my $sep (",", ";") { 42 my $data = "bAr,foo\n1,2\n3,4,5\n"; 43 $data =~ s/,/$sep/g; 44 45 $csv->column_names (undef); 46 { open my $fh, "<", \$data; 47 ok (my $slf = $csv->header ($fh), "header"); 48 is ($slf, $csv, "Return self"); 49 is ($csv->sep_char, $sep, "Sep = $sep"); 50 is_deeply ([ $csv->column_names ], $hdr_lc, "headers"); 51 is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1"); 52 is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2"); 53 close $fh; 54 } 55 56 $csv->column_names (undef); 57 { open my $fh, "<", \$data; 58 ok (my @hdr = $csv->header ($fh), "header"); 59 is_deeply (\@hdr, $hdr_lc, "Return headers"); 60 close $fh; 61 } 62 63 $csv->column_names (undef); 64 { open my $fh, "<", \$data; 65 ok (my $slf = $csv->header ($fh), "header"); 66 is ($slf, $csv, "Return self"); 67 is ($csv->sep_char, $sep, "Sep = $sep"); 68 is_deeply ([ $csv->column_names ], $hdr_lc, "headers"); 69 is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1"); 70 is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2"); 71 close $fh; 72 } 73 74 { open my $fh, "<", \$data; 75 is_deeply (csv (in => $fh, bom => 1), 76 [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }], 77 "use header () from csv () with $sep"); 78 } 79 80 { open my $fh, "<", \$data; 81 is_deeply (csv (in => $fh, seps => [ ",", ";" ]), 82 [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }], 83 "use header () from csv () with $sep"); 84 } 85 86 { open my $fh, "<", \$data; 87 is_deeply (csv (in => $fh, bom => 1, key => "bar"), 88 { 1 => { bar => 1, foo => 2 }, 3 => { bar => 3, foo => 4 }}, 89 "use header () from csv (key) with $sep"); 90 } 91 92 { open my $fh, "<", \$data; 93 is_deeply (csv (in => $fh, munge => "uc", key => "BAR"), 94 { 1 => { BAR => 1, FOO => 2 }, 3 => { BAR => 3, FOO => 4 }}, 95 "use header () from csv (key, uc) with $sep"); 96 } 97 98 { open my $fh, "<", \$data; 99 is_deeply (csv (in => $fh, set_column_names => 0), 100 [[ "bar", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]], 101 "use header () from csv () with $sep to ARRAY not setting column names"); 102 } 103 { open my $fh, "<", \$data; 104 is_deeply (csv (in => $fh, set_column_names => 0, munge => "none"), 105 [[ "bAr", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]], 106 "use header () from csv () with $sep to ARRAY not setting column names not lc"); 107 } 108 } 109 110my $sep_utf = byte_utf8a_to_utf8n ("\xe2\x81\xa3"); # U+2063 INVISIBLE SEPARATOR 111my $sep_ok = [ "\t", "|", ",", ";", "##", $sep_utf ]; 112unless ($pu) { 113 foreach my $sep (@$sep_ok) { 114 my $data = "bAr,foo\n1,2\n3,4,5\n"; 115 $data =~ s/,/$sep/g; 116 117 $csv->column_names (undef); 118 { open my $fh, "<", \$data; 119 ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set"); 120 is ($slf, $csv, "Return self"); 121 is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep"); 122 is_deeply ([ $csv->column_names ], $hdr_lc, "headers"); 123 is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1"); 124 is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2"); 125 close $fh; 126 } 127 128 $csv->column_names (undef); 129 { open my $fh, "<", \$data; 130 ok (my @hdr = $csv->header ($fh, $sep_ok), "header with specific sep set"); 131 is_deeply (\@hdr, $hdr_lc, "Return headers"); 132 close $fh; 133 } 134 135 $csv->column_names (undef); 136 { open my $fh, "<", \$data; 137 ok (my $slf = $csv->header ($fh, { sep_set => $sep_ok }), "header with specific sep set as opt"); 138 is ($slf, $csv, "Return self"); 139 is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep"); 140 is_deeply ([ $csv->column_names ], $hdr_lc, "headers"); 141 is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1"); 142 is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2"); 143 close $fh; 144 } 145 146 $csv->column_names (undef); 147 { open my $fh, "<", \$data; 148 ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set"); 149 is ($slf, $csv, "Return self"); 150 is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep"); 151 is_deeply ([ $csv->column_names ], $hdr_lc, "headers"); 152 is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1"); 153 is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2"); 154 close $fh; 155 } 156 } 157 } 158 159for ( [ 1010, 0, qq{} ], # Empty header 160 [ 1011, 0, qq{a,b;c,d} ], # Multiple allowed separators 161 [ 1012, 0, qq{a,,b} ], # Empty header field 162 [ 1013, 0, qq{a,a,b} ], # Non-unique headers 163 [ 2027, 1, qq{a,"b\nc",c} ], # Embedded newline binary on 164 [ 2021, 0, qq{a,"b\nc",c} ], # Embedded newline binary off 165 ) { 166 my ($err, $bin, $data) = @$_; 167 $csv->binary ($bin); 168 open my $fh, "<", \$data; 169 my $self = eval { $csv->header ($fh); }; 170 is ($self, undef, "FAIL for '$data'"); 171 ok ($@, "Error"); 172 is (0 + $csv->error_diag, $err, "Error code $err"); 173 close $fh; 174 } 175{ open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4"; 176 $csv->column_names (undef); 177 ok ($csv->header ($fh, { munge_column_names => "none", detect_bom => 0 }), "non-unique unfolded headers"); 178 is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers"); 179 close $fh; 180 } 181{ open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4"; 182 $csv->column_names (undef); 183 ok (my @hdr = $csv->header ($fh, { munge_column_names => "none" }), "non-unique unfolded headers"); 184 is_deeply (\@hdr, [qw( bar bAr bAR BAR )], "Headers from method"); 185 is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers from column_names"); 186 close $fh; 187 } 188 189foreach my $sep (",", ";") { 190 my $data = "bAr,foo\n1,2\n3,4,5\n"; 191 $data =~ s/,/$sep/g; 192 193 $csv->column_names (undef); 194 { open my $fh, "<", \$data; 195 ok (my $slf = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting"); 196 is ($slf, $csv, "Return self"); 197 is ($csv->sep_char, $sep, "Sep = $sep"); 198 is_deeply ([ $csv->column_names ], [], "headers"); 199 is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1"); 200 is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2"); 201 close $fh; 202 } 203 $csv->column_names (undef); 204 { open my $fh, "<", \$data; 205 ok (my @hdr = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting"); 206 is_deeply (\@hdr, $hdr_lc, "Headers from method"); 207 is_deeply ([ $csv->column_names ], [], "Headers from column_names"); 208 close $fh; 209 } 210 } 211 212foreach my $ss ("", "bad", sub { 1; }, \*STDOUT, +{}) { 213 my $dta = "a,b\n1,2\n"; 214 open my $fh, "<", \$dta; 215 my @hdr = eval { $csv->header ($fh, { sep_set => $ss }) }; 216 is (scalar @hdr, 0, "No header on invalid sep_set"); 217 is (0 + $csv->error_diag, 1500, "Error code"); 218 } 219 220foreach my $dta ("", "\xfe\xff", "\xf7\x64\x4c", "\xdd\x73\x66\x73", 221 "\x0e\xfe\xff", "\xfb\xee\x28", "\x84\x31\x95\x33") { 222 open my $fh, "<", \$dta; 223 my @hdr = eval { $csv->header ($fh) }; 224 is (scalar @hdr, 0, "No header on empty stream"); 225 is (0 + $csv->error_diag, 1010, "Error code"); 226 } 227 228my $n; 229for ([ undef, "_bar" ], [ "lc", "_bar" ], [ "uc", "_BAR" ], [ "none", "_bAr" ], 230 [ sub { "column_".$n++ }, "column_0" ], [ "db", "bar" ]) { 231 my ($munge, $hdr) = @$_; 232 233 my $data = "_bAr,foo\n1,2\n3,4,5\n"; 234 my $how = defined $munge ? ref $munge ? "CB" : $munge : "undef"; 235 236 $n = 0; 237 $csv->column_names (undef); 238 open my $fh, "<", \$data; 239 ok (my $slf = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how"); 240 is (($csv->column_names)[0], $hdr, "folded header to $hdr"); 241 close $fh; 242 243 $n = 0; 244 $csv->column_names (undef); 245 open $fh, "<", \$data; 246 ok (my @hdr = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how"); 247 is ($hdr[0], $hdr, "folded header to $hdr"); 248 close $fh; 249 } 250 251my $fnm = "_85hdr.csv"; END { unlink $fnm; } 252 253my $a_ring = chr (utf8::unicode_to_native (0xe5)); 254foreach my $irs ("\n", chr (utf8::unicode_to_native (0xaa))) { 255 local $/ = $irs; 256 foreach my $eol ("\n", "\r\n", "\r") { 257 my $str = join $eol => 258 qq{zoo,b${a_ring}r}, 259 qq{1,"1 \x{20ac} each"}, 260 ""; 261 for ( [ "none" => "" ], 262 [ "utf-8" => "\xef\xbb\xbf" ], 263 [ "utf-16be" => "\xfe\xff" ], 264 [ "utf-16le" => "\xff\xfe" ], 265 [ "utf-32be" => "\x00\x00\xfe\xff" ], 266 [ "utf-32le" => "\xff\xfe\x00\x00" ], 267 # Below 5 not (yet) supported by Encode 268 [ "utf-1" => "\xf7\x64\x4c" ], 269 [ "utf-ebcdic" => "\xdd\x73\x66\x73" ], 270 [ "scsu" => "\x0e\xfe\xff" ], 271 [ "bocu-1" => "\xfb\xee\x28" ], 272 [ "gb-18030" => "\x84\x31\x95" ], 273 # 274 [ "UTF-8" => "\x{feff}" ], 275 ) { 276 my ($enc, $bom) = @$_; 277 my ($enx, $box, $has_enc) = ($enc, $bom, 0); 278 $enc eq "UTF-8" || $enc eq "none" or 279 $box = eval { Encode::encode ($enc, chr (0xfeff)) }; 280 $enc eq "none" and $enx = "utf-8"; 281 282 # On os390, Encode only supports the following EBCDIC 283 # cp37, cp500, cp875, cp1026, cp1047, and posix-bc 284 # utf-ebcdic is not in the list 285 eval { 286 no warnings "utf8"; 287 open my $fh, ">", $fnm; 288 binmode $fh; 289 if (defined $box) { 290 print $fh byte_utf8a_to_utf8n ($box); 291 print $fh Encode::encode ($enx, $str); 292 $has_enc = 1; 293 } 294 else { 295 print $fh Encode::encode ("utf-8", $str); 296 } 297 298 close $fh; 299 }; 300 #$ebcdic and $has_enc = 0; # TODO 301 302 $csv = Text::CSV->new ({ binary => 1, auto_diag => 9 }); 303 304 SKIP: { 305 $has_enc or skip "Encoding $enc not supported", $enc =~ m/^utf/ ? 10 : 9; 306 $csv->column_names (undef); 307 open my $fh, "<", $fnm; 308 binmode $fh; 309 ok (1, "$fnm opened for enc $enc"); 310 ok ($csv->header ($fh), "headers with BOM for $enc"); 311 $enc =~ m/^utf/ and is ($csv->{ENCODING}, uc $enc, "Encoding inquirable"); 312 313 is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded"); 314 ok (my $row = $csv->getline_hr ($fh), "getline_hr"); 315 is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode"); 316 close $fh; 317 318 my $aoh; 319 ok ($aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)"); 320 is_deeply ($aoh, 321 [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data bom = 1"); 322 323 ok ($aoh = csv (in => $fnm, encoding => "auto"), "csv (encoding => auto)"); 324 is_deeply ($aoh, 325 [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data auto"); 326 } 327 328 SKIP: { 329 $has_enc or skip "Encoding $enc not supported", 7; 330 $csv->column_names (undef); 331 open my $fh, "<", $fnm; 332 $enc eq "none" or binmode $fh, ":encoding($enc)"; 333 ok (1, "$fnm opened for enc $enc"); 334 ok ($csv->header ($fh), "headers with BOM for $enc"); 335 is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded"); 336 ok (my $row = $csv->getline_hr ($fh), "getline_hr"); 337 is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode"); 338 close $fh; 339 340 ok (my $aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)"); 341 is_deeply ($aoh, 342 [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data"); 343 } 344 345 unlink $fnm; 346 } 347 } 348 } 349 350{ # Header after first line with sep= 351 open my $fh, ">", $fnm or die "$fnm: $!"; 352 print $fh "sep=;\n"; 353 print $fh "a;b 1;c\n"; 354 print $fh "1;2;3\n"; 355 close $fh; 356 ok (my $aoh = csv (in => $fnm, munge => "db"), "Read header with sep=;"); 357 is_deeply ($aoh, [{ a => 1, "b_1" => 2, c => 3 }], "Munged to db with sep"); 358 } 359