1#!/usr/bin/perl
2
3use strict;
4use warnings;
5use charnames ":full";
6
7use Test::More;
8$| = 1;
9
10BEGIN {
11    $] < 5.008002 and
12	plan skip_all => "UTF8 tests useless in this ancient perl version";
13    }
14
15my @tests;
16my $ebcdic = ord ("A") == 0xC1;
17
18BEGIN {
19    delete $ENV{PERLIO};
20
21    my $pu = $ENV{PERL_UNICODE};
22    $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16));
23
24    my $euro_ch = "\x{20ac}";
25
26    utf8::encode    (my $bytes = $euro_ch);
27    utf8::downgrade (my $bytes_dn = $bytes);
28    utf8::upgrade   (my $bytes_up = $bytes);
29
30    @tests = (
31	# $test                        $perlio             $data,      $encoding $expect_w
32	# ---------------------------- ------------------- ----------- --------- ----------
33	[ "Unicode  default",          "",                 $euro_ch,   "utf8",   $pu ? "no warn" : "warn" ],
34	[ "Unicode  binmode",          "[binmode]",        $euro_ch,   "utf8",   "warn",    ],
35	[ "Unicode  :utf8",            ":utf8",            $euro_ch,   "utf8",   "no warn", ],
36	[ "Unicode  :encoding(utf8)",  ":encoding(utf8)",  $euro_ch,   "utf8",   "no warn", ],
37	[ "Unicode  :encoding(UTF-8)", ":encoding(UTF-8)", $euro_ch,   "utf8",   "no warn", ],
38
39	[ "bytes dn default",          "",                 $bytes_dn,  "[none]", "no warn", ],
40	[ "bytes dn binmode",          "[binmode]",        $bytes_dn,  "[none]", "no warn", ],
41	[ "bytes dn :utf8",            ":utf8",            $bytes_dn,  "utf8",   "no warn", ],
42	[ "bytes dn :encoding(utf8)",  ":encoding(utf8)",  $bytes_dn,  "utf8",   "no warn", ],
43	[ "bytes dn :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_dn,  "utf8",   "no warn", ],
44
45	[ "bytes up default",          "",                 $bytes_up,  "[none]", "no warn", ],
46	[ "bytes up binmode",          "[binmode]",        $bytes_up,  "[none]", "no warn", ],
47	[ "bytes up :utf8",            ":utf8",            $bytes_up,  "utf8",   "no warn", ],
48	[ "bytes up :encoding(utf8)",  ":encoding(utf8)",  $bytes_up,  "utf8",   "no warn", ],
49	[ "bytes up :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_up,  "utf8",   "no warn", ],
50	);
51
52    my $builder = Test::More->builder;
53    binmode $builder->output,         ":encoding(utf8)";
54    binmode $builder->failure_output, ":encoding(utf8)";
55    binmode $builder->todo_output,    ":encoding(utf8)";
56
57    plan tests => 11 + 6 * @tests + 4 * 22 + 6 + 10 + 2;
58    }
59
60BEGIN {
61    use_ok "Text::CSV_XS", ("csv");
62    plan skip_all => "Cannot load Text::CSV_XS" if $@;
63    require "./t/util.pl";
64    }
65
66sub hexify { join " ", map { sprintf "%02x", $_ } unpack "C*", @_ }
67sub warned { length ($_[0]) ? "warn" : "no warn" }
68
69my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
70
71for (@tests) {
72    my ($test, $perlio, $data, $enc, $expect_w) = @$_;
73
74    my $expect = qq{"$data"};
75    $enc eq "utf8" and utf8::encode ($expect);
76
77    my ($p_out, $p_fh) = ("");
78    my ($c_out, $c_fh) = ("");
79
80    if ($perlio eq "[binmode]") {
81	open $p_fh, ">",        \$p_out or die "IO: $!\n"; binmode $p_fh;
82	open $c_fh, ">",        \$c_out or die "IO: $!\n"; binmode $c_fh;
83	}
84    else {
85	open $p_fh, ">$perlio", \$p_out or die "IO: $!\n";
86	open $c_fh, ">$perlio", \$c_out or die "IO: $!\n";
87	}
88
89    my $p_warn = "";
90    {	local $SIG{__WARN__} = sub { $p_warn .= join "", @_ };
91	ok ((print $p_fh qq{"$data"}),        "$test perl print");
92	close $p_fh;
93	}
94
95    my $c_warn = "";
96    {	local $SIG{__WARN__} = sub { $c_warn .= join "", @_ };
97	ok ($csv->print ($c_fh, [ $data ]),   "$test csv print");
98	close $c_fh;
99	}
100
101    is (hexify ($c_out), hexify ($p_out),   "$test against Perl");
102    is (hexify ($c_out), hexify ($expect),  "$test against expected");
103
104    is (warned ($c_warn), warned ($p_warn), "$test against Perl warning");
105    is (warned ($c_warn), $expect_w,        "$test against expected warning");
106    }
107
108# Test automatic upgrades for valid UTF-8
109{   my $blob = pack "C*", 0..255; $blob =~ tr/",//d;
110    # perl-5.10.x has buggy SvCUR () on blob
111    $] >= 5.010000 && $] <= 5.012001 and $blob =~ tr/\0//d;
112    my $b1 = "\x{b6}";		# PILCROW SIGN in ISO-8859-1
113    my $b2 = $ebcdic		# ARABIC COMMA in UTF-8
114	? "\x{b8}\x{57}\x{53}"
115	: "\x{d8}\x{8c}";
116    my @data = (
117	qq[1,aap,3],		# No diac
118	qq[1,a${b1}p,3],	# Single-byte
119	qq[1,a${b2}p,3],	# Multi-byte
120	qq[1,"$blob",3],	# Binary shit
121	) x 2;
122    my $data = join "\n" => @data;
123    my @expect = ("aap", "a\266p", "a\x{060c}p", $blob) x 2;
124
125    my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
126
127    foreach my $bc (undef, 3) {
128	my @read;
129
130	# Using getline ()
131	open my $fh, "<", \$data or die "IO: $!\n"; binmode $fh;
132	$bc and $csv->bind_columns (\my ($f1, $f2, $f3));
133	is (scalar $csv->bind_columns, $bc, "Columns_bound?");
134	while (my $row = $csv->getline ($fh)) {
135	    push @read, $bc ? $f2 : $row->[1];
136	    }
137	close $fh;
138	is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns"));
139	is_deeply ([ map { utf8::is_utf8 ($_) } @read ],
140	    [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags");
141
142	# Using parse ()
143	@read = map {
144	    $csv->parse ($_);
145	    $bc ? $f2 : ($csv->fields)[1];
146	    } @data;
147	is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns"));
148	is_deeply ([ map { utf8::is_utf8 ($_) } @read ],
149	    [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags");
150	}
151    }
152
153my $sep = "\x{2665}";#"\N{INVISIBLE SEPARATOR}";
154my $quo = "\x{2661}";#"\N{FULLWIDTH QUOTATION MARK}";
155foreach my $new (0, 1, 2, 3) {
156    my %attr = (
157	binary       => 1,
158	always_quote => 1,
159	);;
160    $new & 1 and $attr{sep}   = $sep;
161    $new & 2 and $attr{quote} = $quo;
162    my $csv = Text::CSV_XS->new (\%attr);
163
164    my $s = $attr{sep}   || ',';
165    my $q = $attr{quote} || '"';
166
167    note ("Test SEP: '$s', QUO: '$q'") if $Test::More::VERSION > 0.81;
168    is ($csv->sep,   $s, "sep");
169    is ($csv->quote, $q, "quote");
170
171    foreach my $data (
172	    [ 1,		2		],
173	    [ "\N{EURO SIGN}",	"\N{SNOWMAN}"	],
174#	    [ $sep,		$quo		],
175	    ) {
176
177	my $exp8 = join $s => map { qq{$q$_$q} } @$data;
178	utf8::encode (my $expb = $exp8);
179	my @exp = ($expb, $exp8);
180
181	ok ($csv->combine (@$data),		"combine");
182	my $x = $csv->string;
183	is ($csv->string, $exp8,		"string");
184
185	open my $fh, ">:encoding(utf8)", \(my $out = "") or die "IO: $!\n";
186	ok ($csv->print ($fh, $data),		"print with UTF8 sep");
187	close $fh;
188
189	is ($out, $expb,			"output");
190
191	ok ($csv->parse ($expb),		"parse");
192	is_deeply ([ $csv->fields ],    $data,	"fields");
193
194	open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh;
195	is_deeply ($csv->getline ($fh), $data,	"data from getline ()");
196	close $fh;
197
198	$expb =~ tr/"//d;
199
200	ok ($csv->parse ($expb),		"parse");
201	is_deeply ([ $csv->fields ],    $data,	"fields");
202
203	open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh;
204	is_deeply ($csv->getline ($fh), $data,	"data from getline ()");
205	close $fh;
206	}
207    }
208
209{   my $h = "\N{WHITE HEART SUIT}";
210    my $H = "\N{BLACK HEART SUIT}";
211    my $str = "${h}I$h$H${h}L\"${h}ve$h$H${h}Perl$h";
212    utf8::encode ($str);
213    ok (my $aoa = csv (in => \$str, sep => $H, quote => $h),	"Hearts");
214    is_deeply ($aoa, [[ "I", "L${h}ve", "Perl"]],		"I $H Perl");
215
216    ok (my $csv = Text::CSV_XS->new ({
217			binary => 1, sep => $H, quote => $h }),	"new hearts");
218    ok ($csv->combine (@{$aoa->[0]}),				"combine");
219    ok ($str = $csv->string,					"string");
220    utf8::decode ($str);
221    is ($str, "I${H}${h}L\"${h}ve${h}${H}Perl", "Correct quotation");
222    }
223
224# Tests pulled from tests in Raku
225{   my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
226    my $h = pack "C*", 224, 34, 204, 182;
227    ok ($csv->combine (1, $h, 3));
228    ok (my $s = $csv->string, "String");
229    my $b = $h;
230    utf8::encode ($b);
231    ok ($csv->combine (1, $b, 3));
232    ok ($s = $csv->string, "String");
233    }
234
235{   my $h = qq{\x{10fffd}xE0"}; #"
236    my $b = $h;
237    ok ($csv->combine (1, $b, 3));
238    ok (my $s = $csv->string, "String");
239    $b = $h;
240    utf8::encode ($b);
241    ok ($csv->combine (1, $b, 3));
242    ok ($s = $csv->string, "String");
243    $b = $h;
244    utf8::encode ($b);
245    ok ($csv->combine (1, $b, 3));
246    ok ($s = $csv->string, "String");
247    }
248
249{   my $file = "Eric,\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC\n";
250    utf8::encode ($file);
251    open my $fh, "<", \$file or die $!;
252
253    my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 2 });
254    is_deeply (
255	[ $csv->header ($fh) ],
256	[ "eric", "\N{LATIN SMALL LETTER E WITH ACUTE}ric" ],
257	"Lowercase unicode header");
258    }
259
260{   my $file = "Eric,\N{LATIN SMALL LETTER E WITH ACUTE}ric\n";
261    utf8::encode ($file);
262    open my $fh, "<", \$file or die $!;
263
264    my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 2 });
265    is_deeply (
266	[ $csv->header ($fh, { munge => "uc" }) ],
267	[ "ERIC", "\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC" ],
268	"Uppercase unicode header");
269    }
270