1#!/usr/bin/perl
2
3use strict;
4$^W = 1;
5
6use Test::More;
7
8my $ebcdic = ord ("A") == 0xC1;
9my $pu;
10BEGIN {
11    $pu = $ENV{PERL_UNICODE};
12    $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16));
13
14    if ($] < 5.008002) {
15        plan skip_all => "This test unit requires perl-5.8.2 or higher";
16        }
17    else {
18	my $n = 1448;
19	$pu and $n -= 120;
20	plan tests => $n;
21	}
22
23    $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
24
25    use_ok "Text::CSV", "csv";
26    # Encode up to and including 2.01 have an error in a regex:
27    # False [] range "\s-" in regex; marked by <-- HERE in m/\bkoi8[\s- <-- HERE _]*([ru])$/
28    # in Encode::Alias. This however does not influence this test, as then *all* encodings
29    # are skipped as unsupported
30    require Encode;
31    require "./t/util.pl";
32    }
33
34$| = 1;
35
36ok (my $csv = Text::CSV->new, "new for header tests");
37is ($csv->sep_char, ",", "Sep = ,");
38
39my $hdr_lc = [qw( bar foo )];
40
41foreach my $sep (",", ";") {
42    my $data = "bAr,foo\n1,2\n3,4,5\n";
43    $data =~ s/,/$sep/g;
44
45    $csv->column_names (undef);
46    {   open my $fh, "<", \$data;
47	ok (my $slf = $csv->header ($fh), "header");
48	is ($slf, $csv, "Return self");
49	is ($csv->sep_char, $sep, "Sep = $sep");
50	is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
51	is_deeply ($csv->getline ($fh), [ 1, 2 ],    "Line 1");
52	is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
53	close $fh;
54	}
55
56    $csv->column_names (undef);
57    {   open my $fh, "<", \$data;
58	ok (my @hdr = $csv->header ($fh), "header");
59	is_deeply (\@hdr, $hdr_lc, "Return headers");
60	close $fh;
61	}
62
63    $csv->column_names (undef);
64    {   open my $fh, "<", \$data;
65	ok (my $slf = $csv->header ($fh), "header");
66	is ($slf, $csv, "Return self");
67	is ($csv->sep_char, $sep, "Sep = $sep");
68	is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
69	is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1");
70	is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2");
71	close $fh;
72	}
73
74    {   open my $fh, "<", \$data;
75	is_deeply (csv (in => $fh, bom => 1),
76	    [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }],
77	    "use header () from csv () with $sep");
78	}
79
80    {   open my $fh, "<", \$data;
81	is_deeply (csv (in => $fh, seps => [ ",", ";" ]),
82	    [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }],
83	    "use header () from csv () with $sep");
84	}
85
86    {   open my $fh, "<", \$data;
87	is_deeply (csv (in => $fh, bom => 1, key => "bar"),
88	    { 1 => { bar => 1, foo => 2 }, 3 => { bar => 3, foo => 4 }},
89	    "use header () from csv (key) with $sep");
90	}
91
92    {   open my $fh, "<", \$data;
93	is_deeply (csv (in => $fh, munge => "uc", key => "BAR"),
94	    { 1 => { BAR => 1, FOO => 2 }, 3 => { BAR => 3, FOO => 4 }},
95	    "use header () from csv (key, uc) with $sep");
96	}
97
98    {   open my $fh, "<", \$data;
99	is_deeply (csv (in => $fh, set_column_names => 0),
100	    [[ "bar", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]],
101	    "use header () from csv () with $sep to ARRAY not setting column names");
102	}
103    {   open my $fh, "<", \$data;
104	is_deeply (csv (in => $fh, set_column_names => 0, munge => "none"),
105	    [[ "bAr", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]],
106	    "use header () from csv () with $sep to ARRAY not setting column names not lc");
107	}
108    }
109
110my $sep_utf = byte_utf8a_to_utf8n ("\xe2\x81\xa3"); # U+2063 INVISIBLE SEPARATOR
111my $sep_ok = [ "\t", "|", ",", ";", "##", $sep_utf ];
112unless ($pu) {
113    foreach my $sep (@$sep_ok) {
114	my $data = "bAr,foo\n1,2\n3,4,5\n";
115	$data =~ s/,/$sep/g;
116
117	$csv->column_names (undef);
118	{   open my $fh, "<", \$data;
119	    ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set");
120	    is ($slf, $csv, "Return self");
121	    is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
122	    is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
123	    is_deeply ($csv->getline ($fh), [ 1, 2 ],    "Line 1");
124	    is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
125	    close $fh;
126	    }
127
128	$csv->column_names (undef);
129	{   open my $fh, "<", \$data;
130	    ok (my @hdr = $csv->header ($fh, $sep_ok), "header with specific sep set");
131	    is_deeply (\@hdr, $hdr_lc, "Return headers");
132	    close $fh;
133	    }
134
135	$csv->column_names (undef);
136	{   open my $fh, "<", \$data;
137	    ok (my $slf = $csv->header ($fh, { sep_set => $sep_ok }), "header with specific sep set as opt");
138	    is ($slf, $csv, "Return self");
139	    is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
140	    is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
141	    is_deeply ($csv->getline ($fh), [ 1, 2 ],    "Line 1");
142	    is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
143	    close $fh;
144	    }
145
146	$csv->column_names (undef);
147	{   open my $fh, "<", \$data;
148	    ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set");
149	    is ($slf, $csv, "Return self");
150	    is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
151	    is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
152	    is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1");
153	    is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2");
154	    close $fh;
155	    }
156	}
157    }
158
159for ( [ 1010, 0, qq{}		],	# Empty header
160      [ 1011, 0, qq{a,b;c,d}	],	# Multiple allowed separators
161      [ 1012, 0, qq{a,,b}	],	# Empty header field
162      [ 1013, 0, qq{a,a,b}	],	# Non-unique headers
163      [ 2027, 1, qq{a,"b\nc",c}	],	# Embedded newline binary on
164      [ 2021, 0, qq{a,"b\nc",c}	],	# Embedded newline binary off
165      ) {
166    my ($err, $bin, $data) = @$_;
167    $csv->binary ($bin);
168    open my $fh, "<", \$data;
169    my $self = eval { $csv->header ($fh); };
170    is ($self, undef, "FAIL for '$data'");
171    ok ($@, "Error");
172    is (0 + $csv->error_diag, $err, "Error code $err");
173    close $fh;
174    }
175{   open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4";
176    $csv->column_names (undef);
177    ok ($csv->header ($fh, { munge_column_names => "none", detect_bom => 0 }), "non-unique unfolded headers");
178    is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers");
179    close $fh;
180    }
181{   open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4";
182    $csv->column_names (undef);
183    ok (my @hdr = $csv->header ($fh, { munge_column_names => "none" }), "non-unique unfolded headers");
184    is_deeply (\@hdr, [qw( bar bAr bAR BAR )], "Headers from method");
185    is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers from column_names");
186    close $fh;
187    }
188
189foreach my $sep (",", ";") {
190    my $data = "bAr,foo\n1,2\n3,4,5\n";
191    $data =~ s/,/$sep/g;
192
193    $csv->column_names (undef);
194    {   open my $fh, "<", \$data;
195	ok (my $slf = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting");
196	is ($slf, $csv, "Return self");
197	is ($csv->sep_char, $sep, "Sep = $sep");
198	is_deeply ([ $csv->column_names ], [], "headers");
199	is_deeply ($csv->getline ($fh), [ 1, 2 ],    "Line 1");
200	is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
201	close $fh;
202	}
203    $csv->column_names (undef);
204    {   open my $fh, "<", \$data;
205	ok (my @hdr = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting");
206	is_deeply (\@hdr, $hdr_lc, "Headers from method");
207	is_deeply ([ $csv->column_names ], [], "Headers from column_names");
208	close $fh;
209	}
210    }
211
212foreach my $ss ("", "bad", sub { 1; }, \*STDOUT, +{}) {
213    my $dta = "a,b\n1,2\n";
214    open my $fh, "<", \$dta;
215    my @hdr = eval { $csv->header ($fh, { sep_set => $ss }) };
216    is (scalar @hdr, 0, "No header on invalid sep_set");
217    is (0 + $csv->error_diag, 1500, "Error code");
218    }
219
220foreach my $dta ("", "\xfe\xff", "\xf7\x64\x4c", "\xdd\x73\x66\x73",
221	"\x0e\xfe\xff", "\xfb\xee\x28", "\x84\x31\x95\x33") {
222    open my $fh, "<", \$dta;
223    my @hdr = eval { $csv->header ($fh) };
224    is (scalar @hdr, 0, "No header on empty stream");
225    is (0 + $csv->error_diag, 1010, "Error code");
226    }
227
228my $n;
229for ([ undef, "_bar" ], [ "lc", "_bar" ], [ "uc", "_BAR" ], [ "none", "_bAr" ],
230     [ sub { "column_".$n++ }, "column_0" ], [ "db", "bar" ]) {
231    my ($munge, $hdr) = @$_;
232
233    my $data = "_bAr,foo\n1,2\n3,4,5\n";
234    my $how  = defined $munge ? ref $munge ? "CB" : $munge : "undef";
235
236    $n = 0;
237    $csv->column_names (undef);
238    open my $fh, "<", \$data;
239    ok (my $slf = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how");
240    is (($csv->column_names)[0], $hdr, "folded header to $hdr");
241    close $fh;
242
243    $n = 0;
244    $csv->column_names (undef);
245    open $fh, "<", \$data;
246    ok (my @hdr = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how");
247    is ($hdr[0], $hdr, "folded header to $hdr");
248    close $fh;
249    }
250
251my $fnm = "_85hdr.csv"; END { unlink $fnm; }
252
253my $a_ring = chr (utf8::unicode_to_native (0xe5));
254foreach my $irs ("\n", chr (utf8::unicode_to_native (0xaa))) {
255    local $/ = $irs;
256    foreach my $eol ("\n", "\r\n", "\r") {
257	my $str = join $eol =>
258	    qq{zoo,b${a_ring}r},
259	    qq{1,"1 \x{20ac} each"},
260	    "";
261	for (   [ "none"       => ""			],
262		[ "utf-8"      => "\xef\xbb\xbf"	],
263		[ "utf-16be"   => "\xfe\xff"		],
264		[ "utf-16le"   => "\xff\xfe"		],
265		[ "utf-32be"   => "\x00\x00\xfe\xff"	],
266		[ "utf-32le"   => "\xff\xfe\x00\x00"	],
267		# Below 5 not (yet) supported by Encode
268		[ "utf-1"      => "\xf7\x64\x4c"	],
269		[ "utf-ebcdic" => "\xdd\x73\x66\x73"	],
270		[ "scsu"       => "\x0e\xfe\xff"	],
271		[ "bocu-1"     => "\xfb\xee\x28"	],
272		[ "gb-18030"   => "\x84\x31\x95"	],
273		#
274		[ "UTF-8"      => "\x{feff}"		],
275		) {
276	    my ($enc, $bom) = @$_;
277	    my ($enx, $box, $has_enc) = ($enc, $bom, 0);
278	    $enc eq "UTF-8" || $enc eq "none" or
279		$box = eval { Encode::encode ($enc, chr (0xfeff)) };
280	    $enc eq "none" and $enx = "utf-8";
281
282	    # On os390, Encode only supports the following EBCDIC
283	    #  cp37, cp500, cp875, cp1026, cp1047, and posix-bc
284	    # utf-ebcdic is not in the list
285	    eval {
286		no warnings "utf8";
287		open my $fh, ">", $fnm;
288		binmode $fh;
289		if (defined $box) {
290		    print $fh byte_utf8a_to_utf8n ($box);
291		    print $fh Encode::encode ($enx, $str);
292		    $has_enc = 1;
293		    }
294		else {
295		    print $fh Encode::encode ("utf-8", $str);
296		    }
297
298		close $fh;
299		};
300	    #$ebcdic and $has_enc = 0; # TODO
301
302	    $csv = Text::CSV->new ({ binary => 1, auto_diag => 9 });
303
304	    SKIP: {
305		$has_enc or skip "Encoding $enc not supported", $enc =~ m/^utf/ ? 10 : 9;
306		$csv->column_names (undef);
307		open my $fh, "<", $fnm;
308		binmode $fh;
309		ok (1, "$fnm opened for enc $enc");
310		ok ($csv->header ($fh), "headers with BOM for $enc");
311		$enc =~ m/^utf/ and is ($csv->{ENCODING}, uc $enc, "Encoding inquirable");
312
313		is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded");
314		ok (my $row = $csv->getline_hr ($fh), "getline_hr");
315		is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode");
316		close $fh;
317
318		my $aoh;
319		ok ($aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)");
320		is_deeply ($aoh,
321		    [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data bom = 1");
322
323		ok ($aoh = csv (in => $fnm, encoding => "auto"), "csv (encoding => auto)");
324		is_deeply ($aoh,
325		    [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data auto");
326		}
327
328	    SKIP: {
329		$has_enc or skip "Encoding $enc not supported", 7;
330		$csv->column_names (undef);
331		open my $fh, "<", $fnm;
332		$enc eq "none" or binmode $fh, ":encoding($enc)";
333		ok (1, "$fnm opened for enc $enc");
334		ok ($csv->header ($fh), "headers with BOM for $enc");
335		is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded");
336		ok (my $row = $csv->getline_hr ($fh), "getline_hr");
337		is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode");
338		close $fh;
339
340		ok (my $aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)");
341		is_deeply ($aoh,
342		    [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data");
343		}
344
345	    unlink $fnm;
346	    }
347	}
348    }
349
350{   # Header after first line with sep=
351    open my $fh, ">", $fnm or die "$fnm: $!";
352    print $fh "sep=;\n";
353    print $fh "a;b 1;c\n";
354    print $fh "1;2;3\n";
355    close $fh;
356    ok (my $aoh = csv (in => $fnm, munge => "db"), "Read header with sep=;");
357    is_deeply ($aoh, [{ a => 1, "b_1" => 2, c => 3 }], "Munged to db with sep");
358    }
359