1package Text::CSV_XS;
2
3# Copyright (c) 2007-2021 H.Merijn Brand.  All rights reserved.
4# Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
5# Copyright (c) 1997 Alan Citterman.       All rights reserved.
6#
7# This program is free software; you can redistribute it and/or
8# modify it under the same terms as Perl itself.
9
10# HISTORY
11#
12# 0.24 -
13#    H.Merijn Brand (h.m.brand@xs4all.nl)
14# 0.10 - 0.23
15#    Jochen Wiedmann <joe@ispsoft.de>
16# Based on (the original) Text::CSV by:
17#    Alan Citterman <alan@mfgrtl.com>
18
19require 5.006001;
20
21use strict;
22use warnings;
23
24require Exporter;
25use XSLoader;
26use Carp;
27
28use vars   qw( $VERSION @ISA @EXPORT_OK );
29$VERSION   = "1.47";
30@ISA       = qw( Exporter );
31@EXPORT_OK = qw( csv );
32XSLoader::load ("Text::CSV_XS", $VERSION);
33
34sub PV { 0 }
35sub IV { 1 }
36sub NV { 2 }
37
38if ($] < 5.008002) {
39    no warnings "redefine";
40    *utf8::decode = sub {};
41    }
42
43# version
44#
45#   class/object method expecting no arguments and returning the version
46#   number of Text::CSV.  there are no side-effects.
47
48sub version {
49    return $VERSION;
50    } # version
51
52# new
53#
54#   class/object method expecting no arguments and returning a reference to
55#   a newly created Text::CSV object.
56
57my %def_attr = (
58    'eol'			=> '',
59    'sep_char'			=> ',',
60    'quote_char'		=> '"',
61    'escape_char'		=> '"',
62    'binary'			=> 0,
63    'decode_utf8'		=> 1,
64    'auto_diag'			=> 0,
65    'diag_verbose'		=> 0,
66    'strict'			=> 0,
67    'blank_is_undef'		=> 0,
68    'empty_is_undef'		=> 0,
69    'allow_whitespace'		=> 0,
70    'allow_loose_quotes'	=> 0,
71    'allow_loose_escapes'	=> 0,
72    'allow_unquoted_escape'	=> 0,
73    'always_quote'		=> 0,
74    'quote_empty'		=> 0,
75    'quote_space'		=> 1,
76    'quote_binary'		=> 1,
77    'escape_null'		=> 1,
78    'keep_meta_info'		=> 0,
79    'verbatim'			=> 0,
80    'formula'			=> 0,
81    'skip_empty_rows'		=> 0,
82    'undef_str'			=> undef,
83    'comment_str'		=> undef,
84    'types'			=> undef,
85    'callbacks'			=> undef,
86
87    '_EOF'			=> "",
88    '_RECNO'			=> 0,
89    '_STATUS'			=> undef,
90    '_FIELDS'			=> undef,
91    '_FFLAGS'			=> undef,
92    '_STRING'			=> undef,
93    '_ERROR_INPUT'		=> undef,
94    '_COLUMN_NAMES'		=> undef,
95    '_BOUND_COLUMNS'		=> undef,
96    '_AHEAD'			=> undef,
97    '_FORMULA_CB'		=> undef,
98
99    'ENCODING'			=> undef,
100    );
101my %attr_alias = (
102    'quote_always'		=> "always_quote",
103    'verbose_diag'		=> "diag_verbose",
104    'quote_null'		=> "escape_null",
105    'escape'			=> "escape_char",
106    'comment'			=> "comment_str",
107    );
108my $last_new_err = Text::CSV_XS->SetDiag (0);
109my $ebcdic       = ord ("A") == 0xC1;	# Faster than $Config{'ebcdic'}
110
111# NOT a method: is also used before bless
112sub _unhealthy_whitespace {
113    my ($self, $aw) = @_;
114    $aw or return 0; # no checks needed without allow_whitespace
115
116    my $quo = $self->{'quote'};
117    defined $quo && length ($quo) or $quo = $self->{'quote_char'};
118    my $esc = $self->{'escape_char'};
119
120    defined $quo && $quo =~ m/^[ \t]/ and return 1002;
121    defined $esc && $esc =~ m/^[ \t]/ and return 1002;
122
123    return 0;
124    } # _unhealty_whitespace
125
126sub _check_sanity {
127    my $self = shift;
128
129    my $eol = $self->{'eol'};
130    my $sep = $self->{'sep'};
131    defined $sep && length ($sep) or $sep = $self->{'sep_char'};
132    my $quo = $self->{'quote'};
133    defined $quo && length ($quo) or $quo = $self->{'quote_char'};
134    my $esc = $self->{'escape_char'};
135
136#    use DP;::diag ("SEP: '", DPeek ($sep),
137#	        "', QUO: '", DPeek ($quo),
138#	        "', ESC: '", DPeek ($esc),"'");
139
140    # sep_char should not be undefined
141    $sep ne ""			or  return 1008;
142    length ($sep) > 16		and return 1006;
143    $sep =~ m/[\r\n]/		and return 1003;
144
145    if (defined $quo) {
146	$quo eq $sep		and return 1001;
147	length ($quo) > 16	and return 1007;
148	$quo =~ m/[\r\n]/	and return 1003;
149	}
150    if (defined $esc) {
151	$esc eq $sep		and return 1001;
152	$esc =~ m/[\r\n]/	and return 1003;
153	}
154    if (defined $eol) {
155	length ($eol) > 16	and return 1005;
156	}
157
158    return _unhealthy_whitespace ($self, $self->{'allow_whitespace'});
159    } # _check_sanity
160
161sub known_attributes {
162    sort grep !m/^_/ => "sep", "quote", keys %def_attr;
163    } # known_attributes
164
165sub new {
166    $last_new_err = Text::CSV_XS->SetDiag (1000,
167	"usage: my \$csv = Text::CSV_XS->new ([{ option => value, ... }]);");
168
169    my $proto = shift;
170    my $class = ref $proto || $proto	or  return;
171    @_ > 0 &&   ref $_[0] ne "HASH"	and return;
172    my $attr  = shift || {};
173    my %attr  = map {
174	my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
175	exists $attr_alias{$k} and $k = $attr_alias{$k};
176	($k => $attr->{$_});
177	} keys %{$attr};
178
179    my $sep_aliased = 0;
180    if (exists $attr{'sep'}) {
181	$attr{'sep_char'} = delete $attr{'sep'};
182	$sep_aliased = 1;
183	}
184    my $quote_aliased = 0;
185    if (exists $attr{'quote'}) {
186	$attr{'quote_char'} = delete $attr{'quote'};
187	$quote_aliased = 1;
188	}
189    exists $attr{'formula_handling'} and
190	$attr{'formula'} = delete $attr{'formula_handling'};
191    my $attr_formula = delete $attr{'formula'};
192
193    for (keys %attr) {
194	if (m/^[a-z]/ && exists $def_attr{$_}) {
195	    # uncoverable condition false
196	    defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
197	    next;
198	    }
199#	croak?
200	$last_new_err = Text::CSV_XS->SetDiag (1000, "INI - Unknown attribute '$_'");
201	$attr{'auto_diag'} and error_diag ();
202	return;
203	}
204    if ($sep_aliased) {
205	my @b = unpack "U0C*", $attr{'sep_char'};
206	if (@b > 1) {
207	    $attr{'sep'} = $attr{'sep_char'};
208	    $attr{'sep_char'} = "\0";
209	    }
210	else {
211	    $attr{'sep'} = undef;
212	    }
213	}
214    if ($quote_aliased and defined $attr{'quote_char'}) {
215	my @b = unpack "U0C*", $attr{'quote_char'};
216	if (@b > 1) {
217	    $attr{'quote'} = $attr{'quote_char'};
218	    $attr{'quote_char'} = "\0";
219	    }
220	else {
221	    $attr{'quote'} = undef;
222	    }
223	}
224
225    my $self = { %def_attr, %attr };
226    if (my $ec = _check_sanity ($self)) {
227	$last_new_err = Text::CSV_XS->SetDiag ($ec);
228	$attr{'auto_diag'} and error_diag ();
229	return;
230	}
231    if (defined $self->{'callbacks'} && ref $self->{'callbacks'} ne "HASH") {
232	carp ("The 'callbacks' attribute is set but is not a hash: ignored\n");
233	$self->{'callbacks'} = undef;
234	}
235
236    $last_new_err = Text::CSV_XS->SetDiag (0);
237    defined $\ && !exists $attr{'eol'} and $self->{'eol'} = $\;
238    bless $self, $class;
239    defined $self->{'types'} and $self->types ($self->{'types'});
240    defined $attr_formula  and $self->{'formula'} = _supported_formula ($self, $attr_formula);
241    $self;
242    } # new
243
244# Keep in sync with XS!
245my %_cache_id = ( # Only expose what is accessed from within PM
246    'quote_char'		=>  0,
247    'escape_char'		=>  1,
248    'sep_char'			=>  2,
249    'sep'			=> 39,	# 39 .. 55
250    'binary'			=>  3,
251    'keep_meta_info'		=>  4,
252    'always_quote'		=>  5,
253    'allow_loose_quotes'	=>  6,
254    'allow_loose_escapes'	=>  7,
255    'allow_unquoted_escape'	=>  8,
256    'allow_whitespace'		=>  9,
257    'blank_is_undef'		=> 10,
258    'eol'			=> 11,
259    'quote'			=> 15,
260    'verbatim'			=> 22,
261    'empty_is_undef'		=> 23,
262    'auto_diag'			=> 24,
263    'diag_verbose'		=> 33,
264    'quote_space'		=> 25,
265    'quote_empty'		=> 37,
266    'quote_binary'		=> 32,
267    'escape_null'		=> 31,
268    'decode_utf8'		=> 35,
269    '_has_ahead'		=> 30,
270    '_has_hooks'		=> 36,
271    '_is_bound'			=> 26,	# 26 .. 29
272    'formula'			=> 38,
273    'strict'			=> 42,
274    'skip_empty_rows'		=> 43,
275    'undef_str'			=> 46,
276    'comment_str'		=> 54,
277    'types'			=> 62,
278    );
279
280# A `character'
281sub _set_attr_C {
282    my ($self, $name, $val, $ec) = @_;
283    defined $val and utf8::decode ($val);
284    $self->{$name} = $val;
285    $ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
286    $self->_cache_set ($_cache_id{$name}, $val);
287    } # _set_attr_C
288
289# A flag
290sub _set_attr_X {
291    my ($self, $name, $val) = @_;
292    defined $val or $val = 0;
293    $self->{$name} = $val;
294    $self->_cache_set ($_cache_id{$name}, 0 + $val);
295    } # _set_attr_X
296
297# A number
298sub _set_attr_N {
299    my ($self, $name, $val) = @_;
300    $self->{$name} = $val;
301    $self->_cache_set ($_cache_id{$name}, 0 + $val);
302    } # _set_attr_N
303
304# Accessor methods.
305#   It is unwise to change them halfway through a single file!
306sub quote_char {
307    my $self = shift;
308    if (@_) {
309	$self->_set_attr_C ("quote_char", shift);
310	$self->_cache_set ($_cache_id{'quote'}, "");
311	}
312    $self->{'quote_char'};
313    } # quote_char
314
315sub quote {
316    my $self = shift;
317    if (@_) {
318	my $quote = shift;
319	defined $quote or $quote = "";
320	utf8::decode ($quote);
321	my @b = unpack "U0C*", $quote;
322	if (@b > 1) {
323	    @b > 16 and croak ($self->SetDiag (1007));
324	    $self->quote_char ("\0");
325	    }
326	else {
327	    $self->quote_char ($quote);
328	    $quote = "";
329	    }
330	$self->{'quote'} = $quote;
331
332	my $ec = _check_sanity ($self);
333	$ec and croak ($self->SetDiag ($ec));
334
335	$self->_cache_set ($_cache_id{'quote'}, $quote);
336	}
337    my $quote = $self->{'quote'};
338    defined $quote && length ($quote) ? $quote : $self->{'quote_char'};
339    } # quote
340
341sub escape_char {
342    my $self = shift;
343    if (@_) {
344	my $ec = shift;
345	$self->_set_attr_C ("escape_char", $ec);
346	$ec or $self->_set_attr_X ("escape_null", 0);
347	}
348    $self->{'escape_char'};
349    } # escape_char
350
351sub sep_char {
352    my $self = shift;
353    if (@_) {
354	$self->_set_attr_C ("sep_char", shift);
355	$self->_cache_set ($_cache_id{'sep'}, "");
356	}
357    $self->{'sep_char'};
358    } # sep_char
359
360sub sep {
361    my $self = shift;
362    if (@_) {
363	my $sep = shift;
364	defined $sep or $sep = "";
365	utf8::decode ($sep);
366	my @b = unpack "U0C*", $sep;
367	if (@b > 1) {
368	    @b > 16 and croak ($self->SetDiag (1006));
369	    $self->sep_char ("\0");
370	    }
371	else {
372	    $self->sep_char ($sep);
373	    $sep = "";
374	    }
375	$self->{'sep'} = $sep;
376
377	my $ec = _check_sanity ($self);
378	$ec and croak ($self->SetDiag ($ec));
379
380	$self->_cache_set ($_cache_id{'sep'}, $sep);
381	}
382    my $sep = $self->{'sep'};
383    defined $sep && length ($sep) ? $sep : $self->{'sep_char'};
384    } # sep
385
386sub eol {
387    my $self = shift;
388    if (@_) {
389	my $eol = shift;
390	defined $eol or $eol = "";
391	length ($eol) > 16 and croak ($self->SetDiag (1005));
392	$self->{'eol'} = $eol;
393	$self->_cache_set ($_cache_id{'eol'}, $eol);
394	}
395    $self->{'eol'};
396    } # eol
397
398sub always_quote {
399    my $self = shift;
400    @_ and $self->_set_attr_X ("always_quote", shift);
401    $self->{'always_quote'};
402    } # always_quote
403
404sub quote_space {
405    my $self = shift;
406    @_ and $self->_set_attr_X ("quote_space", shift);
407    $self->{'quote_space'};
408    } # quote_space
409
410sub quote_empty {
411    my $self = shift;
412    @_ and $self->_set_attr_X ("quote_empty", shift);
413    $self->{'quote_empty'};
414    } # quote_empty
415
416sub escape_null {
417    my $self = shift;
418    @_ and $self->_set_attr_X ("escape_null", shift);
419    $self->{'escape_null'};
420    } # escape_null
421sub quote_null { goto &escape_null; }
422
423sub quote_binary {
424    my $self = shift;
425    @_ and $self->_set_attr_X ("quote_binary", shift);
426    $self->{'quote_binary'};
427    } # quote_binary
428
429sub binary {
430    my $self = shift;
431    @_ and $self->_set_attr_X ("binary", shift);
432    $self->{'binary'};
433    } # binary
434
435sub strict {
436    my $self = shift;
437    @_ and $self->_set_attr_X ("strict", shift);
438    $self->{'strict'};
439    } # always_quote
440
441sub skip_empty_rows {
442    my $self = shift;
443    @_ and $self->_set_attr_X ("skip_empty_rows", shift);
444    $self->{'skip_empty_rows'};
445    } # always_quote
446
447sub _SetDiagInfo {
448    my ($self, $err, $msg) = @_;
449    $self->SetDiag ($err);
450    my $em  = $self->error_diag ();
451    $em =~ s/^\d+$// and $msg =~ s/^/# /;
452    my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
453    join $sep => grep m/\S\S\S/ => $em, $msg;
454    } # _SetDiagInfo
455
456sub _supported_formula {
457    my ($self, $f) = @_;
458    defined $f or return 5;
459    if ($self && $f && ref $f && ref $f eq "CODE") {
460	$self->{'_FORMULA_CB'} = $f;
461	return 6;
462	}
463    $f =~ m/^(?: 0 | none    )$/xi ? 0 :
464    $f =~ m/^(?: 1 | die     )$/xi ? 1 :
465    $f =~ m/^(?: 2 | croak   )$/xi ? 2 :
466    $f =~ m/^(?: 3 | diag    )$/xi ? 3 :
467    $f =~ m/^(?: 4 | empty | )$/xi ? 4 :
468    $f =~ m/^(?: 5 | undef   )$/xi ? 5 :
469    $f =~ m/^(?: 6 | cb      )$/xi ? 6 : do {
470	$self ||= "Text::CSV_XS";
471	croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
472	};
473    } # _supported_formula
474
475sub formula {
476    my $self = shift;
477    @_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
478    $self->{'formula'} == 6 or $self->{'_FORMULA_CB'} = undef;
479    [qw( none die croak diag empty undef cb )]->[_supported_formula ($self, $self->{'formula'})];
480    } # always_quote
481sub formula_handling {
482    my $self = shift;
483    $self->formula (@_);
484    } # formula_handling
485
486sub decode_utf8 {
487    my $self = shift;
488    @_ and $self->_set_attr_X ("decode_utf8", shift);
489    $self->{'decode_utf8'};
490    } # decode_utf8
491
492sub keep_meta_info {
493    my $self = shift;
494    if (@_) {
495	my $v = shift;
496	!defined $v || $v eq "" and $v = 0;
497	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
498	$self->_set_attr_X ("keep_meta_info", $v);
499	}
500    $self->{'keep_meta_info'};
501    } # keep_meta_info
502
503sub allow_loose_quotes {
504    my $self = shift;
505    @_ and $self->_set_attr_X ("allow_loose_quotes", shift);
506    $self->{'allow_loose_quotes'};
507    } # allow_loose_quotes
508
509sub allow_loose_escapes {
510    my $self = shift;
511    @_ and $self->_set_attr_X ("allow_loose_escapes", shift);
512    $self->{'allow_loose_escapes'};
513    } # allow_loose_escapes
514
515sub allow_whitespace {
516    my $self = shift;
517    if (@_) {
518	my $aw = shift;
519	_unhealthy_whitespace ($self, $aw) and
520	    croak ($self->SetDiag (1002));
521	$self->_set_attr_X ("allow_whitespace", $aw);
522	}
523    $self->{'allow_whitespace'};
524    } # allow_whitespace
525
526sub allow_unquoted_escape {
527    my $self = shift;
528    @_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
529    $self->{'allow_unquoted_escape'};
530    } # allow_unquoted_escape
531
532sub blank_is_undef {
533    my $self = shift;
534    @_ and $self->_set_attr_X ("blank_is_undef", shift);
535    $self->{'blank_is_undef'};
536    } # blank_is_undef
537
538sub empty_is_undef {
539    my $self = shift;
540    @_ and $self->_set_attr_X ("empty_is_undef", shift);
541    $self->{'empty_is_undef'};
542    } # empty_is_undef
543
544sub verbatim {
545    my $self = shift;
546    @_ and $self->_set_attr_X ("verbatim", shift);
547    $self->{'verbatim'};
548    } # verbatim
549
550sub undef_str {
551    my $self = shift;
552    if (@_) {
553	my $v = shift;
554	$self->{'undef_str'} = defined $v ? "$v" : undef;
555	$self->_cache_set ($_cache_id{'undef_str'}, $self->{'undef_str'});
556	}
557    $self->{'undef_str'};
558    } # undef_str
559
560sub comment_str {
561    my $self = shift;
562    if (@_) {
563	my $v = shift;
564	$self->{'comment_str'} = defined $v ? "$v" : undef;
565	$self->_cache_set ($_cache_id{'comment_str'}, $self->{'comment_str'});
566	}
567    $self->{'comment_str'};
568    } # comment_str
569
570sub auto_diag {
571    my $self = shift;
572    if (@_) {
573	my $v = shift;
574	!defined $v || $v eq "" and $v = 0;
575	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
576	$self->_set_attr_X ("auto_diag", $v);
577	}
578    $self->{'auto_diag'};
579    } # auto_diag
580
581sub diag_verbose {
582    my $self = shift;
583    if (@_) {
584	my $v = shift;
585	!defined $v || $v eq "" and $v = 0;
586	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
587	$self->_set_attr_X ("diag_verbose", $v);
588	}
589    $self->{'diag_verbose'};
590    } # diag_verbose
591
592# status
593#
594#   object method returning the success or failure of the most recent
595#   combine () or parse ().  there are no side-effects.
596
597sub status {
598    my $self = shift;
599    return $self->{'_STATUS'};
600    } # status
601
602sub eof {
603    my $self = shift;
604    return $self->{'_EOF'};
605    } # status
606
607sub types {
608    my $self = shift;
609    if (@_) {
610	if (my $types = shift) {
611	    $self->{'_types'} = join "", map { chr } @{$types};
612	    $self->{'types'}  = $types;
613	    $self->_cache_set ($_cache_id{'types'}, $self->{'_types'});
614	    }
615	else {
616	    delete $self->{'types'};
617	    delete $self->{'_types'};
618	    $self->_cache_set ($_cache_id{'types'}, undef);
619	    undef;
620	    }
621	}
622    else {
623	$self->{'types'};
624	}
625    } # types
626
627sub callbacks {
628    my $self = shift;
629    if (@_) {
630	my $cb;
631	my $hf = 0x00;
632	if (defined $_[0]) {
633	    grep { !defined } @_ and croak ($self->SetDiag (1004));
634	    $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
635	        : @_ % 2 == 0                    ? { @_ }
636	        : croak ($self->SetDiag (1004));
637	    foreach my $cbk (keys %{$cb}) {
638		# A key cannot be a ref. That would be stored as the *string
639		# 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
640		$cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
641		    croak ($self->SetDiag (1004));
642		}
643	    exists $cb->{'error'}        and $hf |= 0x01;
644	    exists $cb->{'after_parse'}  and $hf |= 0x02;
645	    exists $cb->{'before_print'} and $hf |= 0x04;
646	    }
647	elsif (@_ > 1) {
648	    # (undef, whatever)
649	    croak ($self->SetDiag (1004));
650	    }
651	$self->_set_attr_X ("_has_hooks", $hf);
652	$self->{'callbacks'} = $cb;
653	}
654    $self->{'callbacks'};
655    } # callbacks
656
657# error_diag
658#
659#   If (and only if) an error occurred, this function returns a code that
660#   indicates the reason of failure
661
662sub error_diag {
663    my $self = shift;
664    my @diag = (0 + $last_new_err, $last_new_err, 0, 0, 0);
665
666    # Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an
667    # overridden isa method in any class. Well, that is exacly what I want here
668    if ($self && ref $self and # Not a class method or direct call
669	 UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{'_ERROR_DIAG'}) {
670	$diag[0] = 0 + $self->{'_ERROR_DIAG'};
671	$diag[1] =     $self->{'_ERROR_DIAG'};
672	$diag[2] = 1 + $self->{'_ERROR_POS'} if exists $self->{'_ERROR_POS'};
673	$diag[3] =     $self->{'_RECNO'};
674	$diag[4] =     $self->{'_ERROR_FLD'} if exists $self->{'_ERROR_FLD'};
675
676	$diag[0] && $self->{'callbacks'} && $self->{'callbacks'}{'error'} and
677	    return $self->{'callbacks'}{'error'}->(@diag);
678	}
679
680    my $context = wantarray;
681    unless (defined $context) {	# Void context, auto-diag
682	if ($diag[0] && $diag[0] != 2012) {
683	    my $msg = "# CSV_XS ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
684	    $diag[4] and $msg =~ s/$/ field $diag[4]/;
685
686	    unless ($self && ref $self) {	# auto_diag
687		# called without args in void context
688		warn $msg;
689		return;
690		}
691
692	    $self->{'diag_verbose'} && $self->{'_ERROR_INPUT'} and
693		$msg .= $self->{'_ERROR_INPUT'}."\n".
694			(" " x ($diag[2] - 1))."^\n";
695
696	    my $lvl = $self->{'auto_diag'};
697	    if ($lvl < 2) {
698		my @c = caller (2);
699		if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
700		    my $hints = $c[10];
701		    (exists $hints->{'autodie'} && $hints->{'autodie'} or
702		     exists $hints->{'guard Fatal'} &&
703		    !exists $hints->{'no Fatal'}) and
704			$lvl++;
705		    # Future releases of autodie will probably set $^H{autodie}
706		    #  to "autodie @args", like "autodie :all" or "autodie open"
707		    #  so we can/should check for "open" or "new"
708		    }
709		}
710	    $lvl > 1 ? die $msg : warn $msg;
711	    }
712	return;
713	}
714    return $context ? @diag : $diag[1];
715    } # error_diag
716
717sub record_number {
718    my $self = shift;
719    return $self->{'_RECNO'};
720    } # record_number
721
722# string
723#
724#   object method returning the result of the most recent combine () or the
725#   input to the most recent parse (), whichever is more recent.  there are
726#   no side-effects.
727
728sub string {
729    my $self = shift;
730    return ref $self->{'_STRING'} ? ${$self->{'_STRING'}} : undef;
731    } # string
732
733# fields
734#
735#   object method returning the result of the most recent parse () or the
736#   input to the most recent combine (), whichever is more recent.  there
737#   are no side-effects.
738
739sub fields {
740    my $self = shift;
741    return ref $self->{'_FIELDS'} ? @{$self->{'_FIELDS'}} : undef;
742    } # fields
743
744# meta_info
745#
746#   object method returning the result of the most recent parse () or the
747#   input to the most recent combine (), whichever is more recent.  there
748#   are no side-effects. meta_info () returns (if available)  some of the
749#   field's properties
750
751sub meta_info {
752    my $self = shift;
753    return ref $self->{'_FFLAGS'} ? @{$self->{'_FFLAGS'}} : undef;
754    } # meta_info
755
756sub is_quoted {
757    my ($self, $idx) = @_;
758    ref $self->{'_FFLAGS'} &&
759	$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
760    $self->{'_FFLAGS'}[$idx] & 0x0001 ? 1 : 0;
761    } # is_quoted
762
763sub is_binary {
764    my ($self, $idx) = @_;
765    ref $self->{'_FFLAGS'} &&
766	$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
767    $self->{'_FFLAGS'}[$idx] & 0x0002 ? 1 : 0;
768    } # is_binary
769
770sub is_missing {
771    my ($self, $idx) = @_;
772    $idx < 0 || !ref $self->{'_FFLAGS'} and return;
773    $idx >= @{$self->{'_FFLAGS'}} and return 1;
774    $self->{'_FFLAGS'}[$idx] & 0x0010 ? 1 : 0;
775    } # is_missing
776
777# combine
778#
779#  Object method returning success or failure. The given arguments are
780#  combined into a single comma-separated value. Failure can be the
781#  result of no arguments or an argument containing an invalid character.
782#  side-effects include:
783#      setting status ()
784#      setting fields ()
785#      setting string ()
786#      setting error_input ()
787
788sub combine {
789    my $self = shift;
790    my $str  = "";
791    $self->{'_FIELDS'} = \@_;
792    $self->{'_STATUS'} = (@_ > 0) && $self->Combine (\$str, \@_, 0);
793    $self->{'_STRING'} = \$str;
794    $self->{'_STATUS'};
795    } # combine
796
797# parse
798#
799#  Object method returning success or failure. The given argument is
800#  expected to be a valid comma-separated value. Failure can be the
801#  result of no arguments or an argument containing an invalid sequence
802#  of characters. Side-effects include:
803#      setting status ()
804#      setting fields ()
805#      setting meta_info ()
806#      setting string ()
807#      setting error_input ()
808
809sub parse {
810    my ($self, $str) = @_;
811
812    ref $str and croak ($self->SetDiag (1500));
813
814    my $fields = [];
815    my $fflags = [];
816    $self->{'_STRING'} = \$str;
817    if (defined $str && $self->Parse ($str, $fields, $fflags)) {
818	$self->{'_FIELDS'} = $fields;
819	$self->{'_FFLAGS'} = $fflags;
820	$self->{'_STATUS'} = 1;
821	}
822    else {
823	$self->{'_FIELDS'} = undef;
824	$self->{'_FFLAGS'} = undef;
825	$self->{'_STATUS'} = 0;
826	}
827    $self->{'_STATUS'};
828    } # parse
829
830sub column_names {
831    my ($self, @keys) = @_;
832    @keys or
833	return defined $self->{'_COLUMN_NAMES'} ? @{$self->{'_COLUMN_NAMES'}} : ();
834
835    @keys == 1 && ! defined $keys[0] and
836	return $self->{'_COLUMN_NAMES'} = undef;
837
838    if (@keys == 1 && ref $keys[0] eq "ARRAY") {
839	@keys = @{$keys[0]};
840	}
841    elsif (join "", map { defined $_ ? ref $_ : "" } @keys) {
842	croak ($self->SetDiag (3001));
843	}
844
845    $self->{'_BOUND_COLUMNS'} && @keys != @{$self->{'_BOUND_COLUMNS'}} and
846	croak ($self->SetDiag (3003));
847
848    $self->{'_COLUMN_NAMES'} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @keys ];
849    @{$self->{'_COLUMN_NAMES'}};
850    } # column_names
851
852sub header {
853    my ($self, $fh, @args) = @_;
854
855    $fh or croak ($self->SetDiag (1014));
856
857    my (@seps, %args);
858    for (@args) {
859	if (ref $_ eq "ARRAY") {
860	    push @seps, @{$_};
861	    next;
862	    }
863	if (ref $_ eq "HASH") {
864	    %args = %{$_};
865	    next;
866	    }
867	croak ('usage: $csv->header ($fh, [ seps ], { options })');
868	}
869
870    defined $args{'munge'} && !defined $args{'munge_column_names'} and
871	$args{'munge_column_names'} = $args{'munge'}; # munge as alias
872    defined $args{'detect_bom'}         or $args{'detect_bom'}         = 1;
873    defined $args{'set_column_names'}   or $args{'set_column_names'}   = 1;
874    defined $args{'munge_column_names'} or $args{'munge_column_names'} = "lc";
875
876    # Reset any previous leftovers
877    $self->{'_RECNO'}		= 0;
878    $self->{'_AHEAD'}		= undef;
879    $self->{'_COLUMN_NAMES'}	= undef if $args{'set_column_names'};
880    $self->{'_BOUND_COLUMNS'}	= undef if $args{'set_column_names'};
881
882    if (defined $args{'sep_set'}) {
883	ref $args{'sep_set'} eq "ARRAY" or
884	    croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
885	@seps =  @{$args{'sep_set'}};
886	}
887
888    $^O eq "MSWin32" and binmode $fh;
889    my $hdr = <$fh>;
890    # check if $hdr can be empty here, I don't think so
891    defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
892
893    my %sep;
894    @seps or @seps = (",", ";");
895    foreach my $sep (@seps) {
896	index ($hdr, $sep) >= 0 and $sep{$sep}++;
897	}
898
899    keys %sep >= 2 and croak ($self->SetDiag (1011));
900
901    $self->sep (keys %sep);
902    my $enc = "";
903    if ($args{'detect_bom'}) { # UTF-7 is not supported
904	   if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be"   }
905	elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le"   }
906	elsif ($hdr =~ s/^\xfe\xff//)         { $enc = "utf-16be"   }
907	elsif ($hdr =~ s/^\xff\xfe//)         { $enc = "utf-16le"   }
908	elsif ($hdr =~ s/^\xef\xbb\xbf//)     { $enc = "utf-8"      }
909	elsif ($hdr =~ s/^\xf7\x64\x4c//)     { $enc = "utf-1"      }
910	elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
911	elsif ($hdr =~ s/^\x0e\xfe\xff//)     { $enc = "scsu"       }
912	elsif ($hdr =~ s/^\xfb\xee\x28//)     { $enc = "bocu-1"     }
913	elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030"   }
914	elsif ($hdr =~ s/^\x{feff}//)         { $enc = ""           }
915
916	$self->{'ENCODING'} = $enc ? uc $enc : undef;
917
918	$hdr eq "" and croak ($self->SetDiag (1010));
919
920	if ($enc) {
921	    $ebcdic && $enc eq "utf-ebcdic" and $enc = "";
922	    if ($enc =~ m/([13]).le$/) {
923		my $l = 0 + $1;
924		my $x;
925		$hdr .= "\0" x $l;
926		read $fh, $x, $l;
927		}
928	    if ($enc) {
929		if ($enc ne "utf-8") {
930		    require Encode;
931		    $hdr = Encode::decode ($enc, $hdr);
932		    }
933		binmode $fh, ":encoding($enc)";
934		}
935	    }
936	}
937
938    my ($ahead, $eol);
939    if ($hdr and $hdr =~ s/\Asep=(\S)([\r\n]+)//i) { # Also look in xs:Parse
940	$self->sep ($1);
941	length $hdr or $hdr = <$fh>;
942	}
943    if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
944	$eol   = $2;
945	$ahead = $3;
946	}
947
948    my $hr = \$hdr; # Will cause croak on perl-5.6.x
949    open my $h, "<", $hr or croak ($self->SetDiag (1010));
950
951    my $row = $self->getline ($h) or croak ();
952    close $h;
953
954    if (   $args{'munge_column_names'} eq "lc") {
955	$_ = lc for @{$row};
956	}
957    elsif ($args{'munge_column_names'} eq "uc") {
958	$_ = uc for @{$row};
959	}
960    elsif ($args{'munge_column_names'} eq "db") {
961	for (@{$row}) {
962	    s/\W+/_/g;
963	    s/^_+//;
964	    $_ = lc;
965	    }
966	}
967
968    if ($ahead) { # Must be after getline, which creates the cache
969	$self->_cache_set ($_cache_id{'_has_ahead'}, 1);
970	$self->{'_AHEAD'} = $ahead;
971	$eol =~ m/^\r([^\n]|\z)/ and $self->eol ($eol);
972	}
973
974    my @hdr = @{$row};
975    ref $args{'munge_column_names'} eq "CODE" and
976	@hdr = map { $args{'munge_column_names'}->($_)       } @hdr;
977    ref $args{'munge_column_names'} eq "HASH" and
978	@hdr = map { $args{'munge_column_names'}->{$_} || $_ } @hdr;
979    my %hdr; $hdr{$_}++ for @hdr;
980    exists $hdr{""} and croak ($self->SetDiag (1012));
981    unless (keys %hdr == @hdr) {
982	croak ($self->_SetDiagInfo (1013, join ", " =>
983	    map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
984	}
985    $args{'set_column_names'} and $self->column_names (@hdr);
986    wantarray ? @hdr : $self;
987    } # header
988
989sub bind_columns {
990    my ($self, @refs) = @_;
991    @refs or
992	return defined $self->{'_BOUND_COLUMNS'} ? @{$self->{'_BOUND_COLUMNS'}} : undef;
993
994    if (@refs == 1 && ! defined $refs[0]) {
995	$self->{'_COLUMN_NAMES'} = undef;
996	return $self->{'_BOUND_COLUMNS'} = undef;
997	}
998
999    $self->{'_COLUMN_NAMES'} && @refs != @{$self->{'_COLUMN_NAMES'}} and
1000	croak ($self->SetDiag (3003));
1001
1002    join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
1003	croak ($self->SetDiag (3004));
1004
1005    $self->_set_attr_N ("_is_bound", scalar @refs);
1006    $self->{'_BOUND_COLUMNS'} = [ @refs ];
1007    @refs;
1008    } # bind_columns
1009
1010sub getline_hr {
1011    my ($self, @args, %hr) = @_;
1012    $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1013    my $fr = $self->getline (@args) or return;
1014    if (ref $self->{'_FFLAGS'}) { # missing
1015	$self->{'_FFLAGS'}[$_] = 0x0010
1016	    for (@{$fr} ? $#{$fr} + 1 : 0) .. $#{$self->{'_COLUMN_NAMES'}};
1017	@{$fr} == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
1018	    $self->{'_FFLAGS'}[0] ||= 0x0010;
1019	}
1020    @hr{@{$self->{'_COLUMN_NAMES'}}} = @{$fr};
1021    \%hr;
1022    } # getline_hr
1023
1024sub getline_hr_all {
1025    my ($self, @args) = @_;
1026    $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1027    my @cn = @{$self->{'_COLUMN_NAMES'}};
1028    [ map { my %h; @h{@cn} = @{$_}; \%h } @{$self->getline_all (@args)} ];
1029    } # getline_hr_all
1030
1031sub say {
1032    my ($self, $io, @f) = @_;
1033    my $eol = $self->eol ();
1034    $eol eq "" and $self->eol ($\ || $/);
1035    # say ($fh, undef) does not propage actual undef to print ()
1036    my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1037    $self->eol ($eol);
1038    return $state;
1039    } # say
1040
1041sub print_hr {
1042    my ($self, $io, $hr) = @_;
1043    $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3009));
1044    ref $hr eq "HASH"      or croak ($self->SetDiag (3010));
1045    $self->print ($io, [ map { $hr->{$_} } $self->column_names () ]);
1046    } # print_hr
1047
1048sub fragment {
1049    my ($self, $io, $spec) = @_;
1050
1051    my $qd = qr{\s* [0-9]+ \s* }x;		# digit
1052    my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x;	# digit or star
1053    my $qr = qr{$qd (?: - $qs )?}x;		# range
1054    my $qc = qr{$qr (?: ; $qr )*}x;		# list
1055    defined $spec && $spec =~ m{^ \s*
1056	\x23 ? \s*				# optional leading #
1057	( row | col | cell ) \s* =
1058	( $qc					# for row and col
1059	| $qd , $qd (?: - $qs , $qs)?		# for cell (ranges)
1060	  (?: ; $qd , $qd (?: - $qs , $qs)? )*	# and cell (range) lists
1061	) \s* $}xi or croak ($self->SetDiag (2013));
1062    my ($type, $range) = (lc $1, $2);
1063
1064    my @h = $self->column_names ();
1065
1066    my @c;
1067    if ($type eq "cell") {
1068	my @spec;
1069	my $min_row;
1070	my $max_row = 0;
1071	for (split m/\s*;\s*/ => $range) {
1072	    my ($tlr, $tlc, $brr, $brc) = (m{
1073		    ^ \s* ([0-9]+     ) \s* , \s* ([0-9]+     ) \s*
1074		(?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
1075		    $}x) or croak ($self->SetDiag (2013));
1076	    defined $brr or ($brr, $brc) = ($tlr, $tlc);
1077	    $tlr == 0 || $tlc == 0 ||
1078		($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
1079		($brc ne "*" && ($brc == 0 || $brc < $tlc))
1080		    and croak ($self->SetDiag (2013));
1081	    $tlc--;
1082	    $brc-- unless $brc eq "*";
1083	    defined $min_row or $min_row = $tlr;
1084	    $tlr < $min_row and $min_row = $tlr;
1085	    $brr eq "*" || $brr > $max_row and
1086		$max_row = $brr;
1087	    push @spec, [ $tlr, $tlc, $brr, $brc ];
1088	    }
1089	my $r = 0;
1090	while (my $row = $self->getline ($io)) {
1091	    ++$r < $min_row and next;
1092	    my %row;
1093	    my $lc;
1094	    foreach my $s (@spec) {
1095		my ($tlr, $tlc, $brr, $brc) = @{$s};
1096		$r <  $tlr || ($brr ne "*" && $r > $brr) and next;
1097		!defined $lc || $tlc < $lc and $lc = $tlc;
1098		my $rr = $brc eq "*" ? $#{$row} : $brc;
1099		$row{$_} = $row->[$_] for $tlc .. $rr;
1100		}
1101	    push @c, [ @row{sort { $a <=> $b } keys %row } ];
1102	    if (@h) {
1103		my %h; @h{@h} = @{$c[-1]};
1104		$c[-1] = \%h;
1105		}
1106	    $max_row ne "*" && $r == $max_row and last;
1107	    }
1108	return \@c;
1109	}
1110
1111    # row or col
1112    my @r;
1113    my $eod = 0;
1114    for (split m/\s*;\s*/ => $range) {
1115	my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
1116	    or croak ($self->SetDiag (2013));
1117	$to ||= $from;
1118	$to eq "*" and ($to, $eod) = ($from, 1);
1119	# $to cannot be <= 0 due to regex and ||=
1120	$from <= 0 || $to < $from and croak ($self->SetDiag (2013));
1121	$r[$_] = 1 for $from .. $to;
1122	}
1123
1124    my $r = 0;
1125    $type eq "col" and shift @r;
1126    $_ ||= 0 for @r;
1127    while (my $row = $self->getline ($io)) {
1128	$r++;
1129	if ($type eq "row") {
1130	    if (($r > $#r && $eod) || $r[$r]) {
1131		push @c, $row;
1132		if (@h) {
1133		    my %h; @h{@h} = @{$c[-1]};
1134		    $c[-1] = \%h;
1135		    }
1136		}
1137	    next;
1138	    }
1139	push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#{$row} ];
1140	if (@h) {
1141	    my %h; @h{@h} = @{$c[-1]};
1142	    $c[-1] = \%h;
1143	    }
1144	}
1145
1146    return \@c;
1147    } # fragment
1148
1149my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1150
1151sub _csv_attr {
1152    my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak ();
1153
1154    $attr{'binary'} = 1;
1155
1156    my $enc = delete $attr{'enc'} || delete $attr{'encoding'} || "";
1157    $enc eq "auto" and ($attr{'detect_bom'}, $enc) = (1, "");
1158    my $stack = $enc =~ s/(:\w.*)// ? $1 : "";
1159    $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1160    $enc .= $stack;
1161
1162    my $fh;
1163    my $sink = 0;
1164    my $cls  = 0;	# If I open a file, I have to close it
1165    my $in   = delete $attr{'in'}  || delete $attr{'file'} or croak ($csv_usage);
1166    my $out  = exists $attr{'out'} && !$attr{'out'} ? \"skip"
1167	     : delete $attr{'out'} || delete $attr{'file'};
1168
1169    ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
1170
1171    $in && $out && !ref $in && !ref $out and croak (join "\n" =>
1172	qq{Cannot use a string for both in and out. Instead use:},
1173	qq{ csv (in => csv (in => "$in"), out => "$out");\n});
1174
1175    if ($out) {
1176	if (ref $out and ("ARRAY" eq ref $out or "HASH" eq ref $out)) {
1177	    delete $attr{'out'};
1178	    $sink = 1;
1179	    }
1180	elsif ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1181	    $fh = $out;
1182	    }
1183	elsif (ref $out and "SCALAR" eq ref $out and defined ${$out} and ${$out} eq "skip") {
1184	    delete $attr{'out'};
1185	    $sink = 1;
1186	    }
1187	else {
1188	    open $fh, ">", $out or croak ("$out: $!");
1189	    $cls = 1;
1190	    }
1191	if ($fh) {
1192	    if ($enc) {
1193		binmode $fh, $enc;
1194		my $fn = fileno $fh; # This is a workaround for a bug in PerlIO::via::gzip
1195		}
1196	    unless (defined $attr{'eol'}) {
1197		my @layers = eval { PerlIO::get_layers ($fh) };
1198		$attr{'eol'} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1199		}
1200	    }
1201	}
1202
1203    if (   ref $in eq "CODE" or ref $in eq "ARRAY") {
1204	# All done
1205	}
1206    elsif (ref $in eq "SCALAR") {
1207	# Strings with code points over 0xFF may not be mapped into in-memory file handles
1208	# "<$enc" does not change that :(
1209	open $fh, "<", $in or croak ("Cannot open from SCALAR using PerlIO");
1210	$cls = 1;
1211	}
1212    elsif (ref $in or "GLOB" eq ref \$in) {
1213	if (!ref $in && $] < 5.008005) {
1214	    $fh = \*{$in}; # uncoverable statement ancient perl version required
1215	    }
1216	else {
1217	    $fh = $in;
1218	    }
1219	}
1220    else {
1221	open $fh, "<$enc", $in or croak ("$in: $!");
1222	$cls = 1;
1223	}
1224    $fh || $sink or croak (qq{No valid source passed. "in" is required});
1225
1226    my $hdrs = delete $attr{'headers'};
1227    my $frag = delete $attr{'fragment'};
1228    my $key  = delete $attr{'key'};
1229    my $val  = delete $attr{'value'};
1230    my $kh   = delete $attr{'keep_headers'}		||
1231	       delete $attr{'keep_column_names'}	||
1232	       delete $attr{'kh'};
1233
1234    my $cbai = delete $attr{'callbacks'}{'after_in'}	||
1235	       delete $attr{'after_in'}			||
1236	       delete $attr{'callbacks'}{'after_parse'}	||
1237	       delete $attr{'after_parse'};
1238    my $cbbo = delete $attr{'callbacks'}{'before_out'}	||
1239	       delete $attr{'before_out'};
1240    my $cboi = delete $attr{'callbacks'}{'on_in'}	||
1241	       delete $attr{'on_in'};
1242
1243    my $hd_s = delete $attr{'sep_set'}			||
1244	       delete $attr{'seps'};
1245    my $hd_b = delete $attr{'detect_bom'}		||
1246	       delete $attr{'bom'};
1247    my $hd_m = delete $attr{'munge'}			||
1248	       delete $attr{'munge_column_names'};
1249    my $hd_c = delete $attr{'set_column_names'};
1250
1251    for ([ 'quo'    => "quote"		],
1252	 [ 'esc'    => "escape"		],
1253	 [ 'escape' => "escape_char"	],
1254	 ) {
1255	my ($f, $t) = @{$_};
1256	exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1257	}
1258
1259    my $fltr = delete $attr{'filter'};
1260    my %fltr = (
1261	'not_blank' => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
1262	'not_empty' => sub { grep { defined && $_ ne "" } @{$_[1]} },
1263	'filled'    => sub { grep { defined && m/\S/    } @{$_[1]} },
1264	);
1265    defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1266	$fltr = { '0' => $fltr{$fltr} };
1267    ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1268    ref $fltr eq "HASH" or  $fltr = undef;
1269
1270    my $form = delete $attr{'formula'};
1271
1272    defined $attr{'auto_diag'}   or $attr{'auto_diag'}   = 1;
1273    defined $attr{'escape_null'} or $attr{'escape_null'} = 0;
1274    my $csv = delete $attr{'csv'} || Text::CSV_XS->new (\%attr)
1275	or croak ($last_new_err);
1276    defined $form and $csv->formula ($form);
1277
1278    return {
1279	'csv'  => $csv,
1280	'attr' => { %attr },
1281	'fh'   => $fh,
1282	'cls'  => $cls,
1283	'in'   => $in,
1284	'sink' => $sink,
1285	'out'  => $out,
1286	'enc'  => $enc,
1287	'hdrs' => $hdrs,
1288	'key'  => $key,
1289	'val'  => $val,
1290	'kh'   => $kh,
1291	'frag' => $frag,
1292	'fltr' => $fltr,
1293	'cbai' => $cbai,
1294	'cbbo' => $cbbo,
1295	'cboi' => $cboi,
1296	'hd_s' => $hd_s,
1297	'hd_b' => $hd_b,
1298	'hd_m' => $hd_m,
1299	'hd_c' => $hd_c,
1300	};
1301    } # _csv_attr
1302
1303sub csv {
1304    @_ && ref $_[0] eq __PACKAGE__ and splice @_, 0, 0, "csv";
1305    @_ or croak ($csv_usage);
1306
1307    my $c = _csv_attr (@_);
1308
1309    my ($csv, $in, $fh, $hdrs) = @{$c}{qw( csv in fh hdrs )};
1310    my %hdr;
1311    if (ref $hdrs eq "HASH") {
1312	%hdr  = %{$hdrs};
1313	$hdrs = "auto";
1314	}
1315
1316    if ($c->{'out'} && !$c->{'sink'}) {
1317	if (ref $in eq "CODE") {
1318	    my $hdr = 1;
1319	    while (my $row = $in->($csv)) {
1320		if (ref $row eq "ARRAY") {
1321		    $csv->print ($fh, $row);
1322		    next;
1323		    }
1324		if (ref $row eq "HASH") {
1325		    if ($hdr) {
1326			$hdrs ||= [ map { $hdr{$_} || $_ } keys %{$row} ];
1327			$csv->print ($fh, $hdrs);
1328			$hdr = 0;
1329			}
1330		    $csv->print ($fh, [ @{$row}{@{$hdrs}} ]);
1331		    }
1332		}
1333	    }
1334	elsif (@{$in} == 0 or ref $in->[0] eq "ARRAY") { # aoa
1335	    ref $hdrs and $csv->print ($fh, $hdrs);
1336	    for (@{$in}) {
1337		$c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1338		$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1339		$csv->print ($fh, $_);
1340		}
1341	    }
1342	else { # aoh
1343	    my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
1344	    defined $hdrs or $hdrs = "auto";
1345	    ref $hdrs || $hdrs eq "auto" and @hdrs and
1346		$csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]);
1347	    for (@{$in}) {
1348		local %_;
1349		*_ = $_;
1350		$c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1351		$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1352		$csv->print ($fh, [ @{$_}{@hdrs} ]);
1353		}
1354	    }
1355
1356	$c->{'cls'} and close $fh;
1357	return 1;
1358	}
1359
1360    my @row1;
1361    if (defined $c->{'hd_s'} || defined $c->{'hd_b'} || defined $c->{'hd_m'} || defined $c->{'hd_c'}) {
1362	my %harg;
1363	defined $c->{'hd_s'} and $harg{'set_set'}            = $c->{'hd_s'};
1364	defined $c->{'hd_d'} and $harg{'detect_bom'}         = $c->{'hd_b'};
1365	defined $c->{'hd_m'} and $harg{'munge_column_names'} = $hdrs ? "none" : $c->{'hd_m'};
1366	defined $c->{'hd_c'} and $harg{'set_column_names'}   = $hdrs ? 0      : $c->{'hd_c'};
1367	@row1 = $csv->header ($fh, \%harg);
1368	my @hdr = $csv->column_names ();
1369	@hdr and $hdrs ||= \@hdr;
1370	}
1371
1372    if ($c->{'kh'}) {
1373	ref $c->{'kh'} eq "ARRAY" or croak ($csv->SetDiag (1501));
1374	$hdrs ||= "auto";
1375	}
1376
1377    my $key = $c->{'key'};
1378    if ($key) {
1379	!ref $key or ref $key eq "ARRAY" && @{$key} > 1 or croak ($csv->SetDiag (1501));
1380	$hdrs ||= "auto";
1381	}
1382    my $val = $c->{'val'};
1383    if ($val) {
1384	$key					      or croak ($csv->SetDiag (1502));
1385	!ref $val or ref $val eq "ARRAY" && @{$val} > 0 or croak ($csv->SetDiag (1503));
1386	}
1387
1388    $c->{'fltr'} && grep m/\D/ => keys %{$c->{'fltr'}} and $hdrs ||= "auto";
1389    if (defined $hdrs) {
1390	if (!ref $hdrs) {
1391	    if ($hdrs eq "skip") {
1392		$csv->getline ($fh); # discard;
1393		}
1394	    elsif ($hdrs eq "auto") {
1395		my $h = $csv->getline ($fh) or return;
1396		$hdrs = [ map {      $hdr{$_} || $_ } @{$h} ];
1397		}
1398	    elsif ($hdrs eq "lc") {
1399		my $h = $csv->getline ($fh) or return;
1400		$hdrs = [ map { lc ($hdr{$_} || $_) } @{$h} ];
1401		}
1402	    elsif ($hdrs eq "uc") {
1403		my $h = $csv->getline ($fh) or return;
1404		$hdrs = [ map { uc ($hdr{$_} || $_) } @{$h} ];
1405		}
1406	    }
1407	elsif (ref $hdrs eq "CODE") {
1408	    my $h  = $csv->getline ($fh) or return;
1409	    my $cr = $hdrs;
1410	    $hdrs  = [ map {  $cr->($hdr{$_} || $_) } @{$h} ];
1411	    }
1412	$c->{'kh'} and $hdrs and @{$c->{'kh'}} = @{$hdrs};
1413	}
1414
1415    if ($c->{'fltr'}) {
1416	my %f = %{$c->{'fltr'}};
1417	# convert headers to index
1418	my @hdr;
1419	if (ref $hdrs) {
1420	    @hdr = @{$hdrs};
1421	    for (0 .. $#hdr) {
1422		exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1423		}
1424	    }
1425	$csv->callbacks ('after_parse' => sub {
1426	    my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1427	    foreach my $FLD (sort keys %f) {
1428		local $_ = $ROW->[$FLD - 1];
1429		local %_;
1430		@hdr and @_{@hdr} = @{$ROW};
1431		$f{$FLD}->($CSV, $ROW) or return \"skip";
1432		$ROW->[$FLD - 1] = $_;
1433		}
1434	    });
1435	}
1436
1437    my $frag = $c->{'frag'};
1438    my $ref = ref $hdrs
1439	? # aoh
1440	  do {
1441	    my @h = $csv->column_names ($hdrs);
1442	    my %h; $h{$_}++ for @h;
1443	    exists $h{""} and croak ($csv->SetDiag (1012));
1444	    unless (keys %h == @h) {
1445		croak ($csv->_SetDiagInfo (1013, join ", " =>
1446		    map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
1447		}
1448	    $frag ? $csv->fragment ($fh, $frag) :
1449	    $key  ? do {
1450			my ($k, $j, @f) = ref $key ? (undef, @{$key}) : ($key);
1451			if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
1452			    croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1453			    }
1454			+{ map {
1455			    my $r = $_;
1456			    my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
1457			    ( $K => (
1458			    $val
1459				? ref $val
1460				    ? { map { $_ => $r->{$_} } @{$val} }
1461				    : $r->{$val}
1462			        : $r ));
1463			    } @{$csv->getline_hr_all ($fh)} }
1464			}
1465		  : $csv->getline_hr_all ($fh);
1466	    }
1467	: # aoa
1468	    $frag ? $csv->fragment ($fh, $frag)
1469		  : $csv->getline_all ($fh);
1470    if ($ref) {
1471	@row1 && !$c->{'hd_c'} && !ref $hdrs and unshift @{$ref}, \@row1;
1472	}
1473    else {
1474	Text::CSV_XS->auto_diag ();
1475	}
1476    $c->{'cls'} and close $fh;
1477    if ($ref and $c->{'cbai'} || $c->{'cboi'}) {
1478	# Default is ARRAYref, but with key =>, you'll get a hashref
1479	foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
1480	    local %_;
1481	    ref $r eq "HASH" and *_ = $r;
1482	    $c->{'cbai'} and $c->{'cbai'}->($csv, $r);
1483	    $c->{'cboi'} and $c->{'cboi'}->($csv, $r);
1484	    }
1485	}
1486
1487    if ($c->{'sink'}) {
1488	my $ro = ref $c->{'out'} or return;
1489
1490	$ro eq "SCALAR" && ${$c->{'out'}} eq "skip" and
1491	    return;
1492
1493	$ro eq ref $ref or
1494	    croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1495
1496	if ($ro eq "ARRAY") {
1497	    if (@{$c->{'out'}} and @$ref and ref $c->{'out'}[0] eq ref $ref->[0]) {
1498		push @{$c->{'out'}} => @$ref;
1499		return $c->{'out'};
1500		}
1501	    croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1502	    }
1503
1504	if ($ro eq "HASH") {
1505	    @{$c->{'out'}}{keys %{$ref}} = values %{$ref};
1506	    return $c->{'out'};
1507	    }
1508
1509	croak ($csv->_SetDiagInfo (5002, "Unsupported output type"));
1510	}
1511
1512    defined wantarray or
1513	return csv (
1514	    'in'      => $ref,
1515	    'headers' => $hdrs,
1516	    %{$c->{'attr'}},
1517	    );
1518
1519    return $ref;
1520    } # csv
1521
15221;
1523
1524__END__
1525
1526=encoding utf-8
1527
1528=head1 NAME
1529
1530Text::CSV_XS - comma-separated values manipulation routines
1531
1532=head1 SYNOPSIS
1533
1534 # Functional interface
1535 use Text::CSV_XS qw( csv );
1536
1537 # Read whole file in memory
1538 my $aoa = csv (in => "data.csv");    # as array of array
1539 my $aoh = csv (in => "data.csv",
1540                headers => "auto");   # as array of hash
1541
1542 # Write array of arrays as csv file
1543 csv (in => $aoa, out => "file.csv", sep_char=> ";");
1544
1545 # Only show lines where "code" is odd
1546 csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
1547
1548
1549 # Object interface
1550 use Text::CSV_XS;
1551
1552 my @rows;
1553 # Read/parse CSV
1554 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
1555 open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
1556 while (my $row = $csv->getline ($fh)) {
1557     $row->[2] =~ m/pattern/ or next; # 3rd field should match
1558     push @rows, $row;
1559     }
1560 close $fh;
1561
1562 # and write as CSV
1563 open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
1564 $csv->say ($fh, $_) for @rows;
1565 close $fh or die "new.csv: $!";
1566
1567=head1 DESCRIPTION
1568
1569Text::CSV_XS  provides facilities for the composition  and decomposition of
1570comma-separated values.  An instance of the Text::CSV_XS class will combine
1571fields into a C<CSV> string and parse a C<CSV> string into fields.
1572
1573The module accepts either strings or files as input  and support the use of
1574user-specified characters for delimiters, separators, and escapes.
1575
1576=head2 Embedded newlines
1577
1578B<Important Note>:  The default behavior is to accept only ASCII characters
1579in the range from C<0x20> (space) to C<0x7E> (tilde).   This means that the
1580fields can not contain newlines. If your data contains newlines embedded in
1581fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
1582set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
1583parsing options, you will always want to set binary.
1584
1585But you still have the problem  that you have to pass a correct line to the
1586L</parse> method, which is more complicated from the usual point of usage:
1587
1588 my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
1589 while (<>) {		#  WRONG!
1590     $csv->parse ($_);
1591     my @fields = $csv->fields ();
1592     }
1593
1594this will break, as the C<while> might read broken lines:  it does not care
1595about the quoting. If you need to support embedded newlines,  the way to go
1596is to  B<not>  pass L<C<eol>|/eol> in the parser  (it accepts C<\n>, C<\r>,
1597B<and> C<\r\n> by default) and then
1598
1599 my $csv = Text::CSV_XS->new ({ binary => 1 });
1600 open my $fh, "<", $file or die "$file: $!";
1601 while (my $row = $csv->getline ($fh)) {
1602     my @fields = @$row;
1603     }
1604
1605The old(er) way of using global file handles is still supported
1606
1607 while (my $row = $csv->getline (*ARGV)) { ... }
1608
1609=head2 Unicode
1610
1611Unicode is only tested to work with perl-5.8.2 and up.
1612
1613See also L</BOM>.
1614
1615The simplest way to ensure the correct encoding is used for  in- and output
1616is by either setting layers on the filehandles, or setting the L</encoding>
1617argument for L</csv>.
1618
1619 open my $fh, "<:encoding(UTF-8)", "in.csv"  or die "in.csv: $!";
1620or
1621 my $aoa = csv (in => "in.csv",     encoding => "UTF-8");
1622
1623 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
1624or
1625 csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
1626
1627On parsing (both for  L</getline> and  L</parse>),  if the source is marked
1628being UTF8, then all fields that are marked binary will also be marked UTF8.
1629
1630On combining (L</print>  and  L</combine>):  if any of the combining fields
1631was marked UTF8, the resulting string will be marked as UTF8.  Note however
1632that all fields  I<before>  the first field marked UTF8 and contained 8-bit
1633characters that were not upgraded to UTF8,  these will be  C<bytes>  in the
1634resulting string too, possibly causing unexpected errors.  If you pass data
1635of different encoding,  or you don't know if there is  different  encoding,
1636force it to be upgraded before you pass them on:
1637
1638 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
1639
1640For complete control over encoding, please use L<Text::CSV::Encoded>:
1641
1642 use Text::CSV::Encoded;
1643 my $csv = Text::CSV::Encoded->new ({
1644     encoding_in  => "iso-8859-1", # the encoding comes into   Perl
1645     encoding_out => "cp1252",     # the encoding comes out of Perl
1646     });
1647
1648 $csv = Text::CSV::Encoded->new ({ encoding  => "utf8" });
1649 # combine () and print () accept *literally* utf8 encoded data
1650 # parse () and getline () return *literally* utf8 encoded data
1651
1652 $csv = Text::CSV::Encoded->new ({ encoding  => undef }); # default
1653 # combine () and print () accept UTF8 marked data
1654 # parse () and getline () return UTF8 marked data
1655
1656=head2 BOM
1657
1658BOM  (or Byte Order Mark)  handling is available only inside the L</header>
1659method.   This method supports the following encodings: C<utf-8>, C<utf-1>,
1660C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
1661C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>.
1662
1663If a file has a BOM, the easiest way to deal with that is
1664
1665 my $aoh = csv (in => $file, detect_bom => 1);
1666
1667All records will be encoded based on the detected BOM.
1668
1669This implies a call to the  L</header>  method,  which defaults to also set
1670the L</column_names>. So this is B<not> the same as
1671
1672 my $aoh = csv (in => $file, headers => "auto");
1673
1674which only reads the first record to set  L</column_names>  but ignores any
1675meaning of possible present BOM.
1676
1677=head1 SPECIFICATION
1678
1679While no formal specification for CSV exists, L<RFC 4180|https://datatracker.ietf.org/doc/html/rfc4180>
1680(I<1>) describes the common format and establishes  C<text/csv> as the MIME
1681type registered with the IANA. L<RFC 7111|https://datatracker.ietf.org/doc/html/rfc7111>
1682(I<2>) adds fragments to CSV.
1683
1684Many informal documents exist that describe the C<CSV> format.   L<"How To:
1685The Comma Separated Value (CSV) File Format"|http://creativyst.com/Doc/Articles/CSV/CSV01.shtml>
1686(I<3>)  provides an overview of the  C<CSV>  format in the most widely used
1687applications and explains how it can best be used and supported.
1688
1689 1) https://datatracker.ietf.org/doc/html/rfc4180
1690 2) https://datatracker.ietf.org/doc/html/rfc7111
1691 3) http://creativyst.com/Doc/Articles/CSV/CSV01.shtml
1692
1693The basic rules are as follows:
1694
1695B<CSV>  is a delimited data format that has fields/columns separated by the
1696comma character and records/rows separated by newlines. Fields that contain
1697a special character (comma, newline, or double quote),  must be enclosed in
1698double quotes. However, if a line contains a single entry that is the empty
1699string, it may be enclosed in double quotes.  If a field's value contains a
1700double quote character it is escaped by placing another double quote
1701character next to it. The C<CSV> file format does not require a specific
1702character encoding, byte order, or line terminator format.
1703
1704=over 2
1705
1706=item *
1707
1708Each record is a single line ended by a line feed  (ASCII/C<LF>=C<0x0A>) or
1709a carriage return and line feed pair (ASCII/C<CRLF>=C<0x0D 0x0A>), however,
1710line-breaks may be embedded.
1711
1712=item *
1713
1714Fields are separated by commas.
1715
1716=item *
1717
1718Allowable characters within a C<CSV> field include C<0x09> (C<TAB>) and the
1719inclusive range of C<0x20> (space) through C<0x7E> (tilde).  In binary mode
1720all characters are accepted, at least in quoted fields.
1721
1722=item *
1723
1724A field within  C<CSV>  must be surrounded by  double-quotes to  contain  a
1725separator character (comma).
1726
1727=back
1728
1729Though this is the most clear and restrictive definition,  Text::CSV_XS  is
1730way more liberal than this, and allows extension:
1731
1732=over 2
1733
1734=item *
1735
1736Line termination by a single carriage return is accepted by default
1737
1738=item *
1739
1740The separation-, escape-, and escape- characters can be any ASCII character
1741in the range from  C<0x20> (space) to  C<0x7E> (tilde).  Characters outside
1742this range may or may not work as expected.  Multibyte characters, like UTF
1743C<U+060C> (ARABIC COMMA),   C<U+FF0C> (FULLWIDTH COMMA),  C<U+241B> (SYMBOL
1744FOR ESCAPE), C<U+2424> (SYMBOL FOR NEWLINE), C<U+FF02> (FULLWIDTH QUOTATION
1745MARK), and C<U+201C> (LEFT DOUBLE QUOTATION MARK) (to give some examples of
1746what might look promising) work for newer versions of perl for C<sep_char>,
1747and C<quote_char> but not for C<escape_char>.
1748
1749If you use perl-5.8.2 or higher these three attributes are utf8-decoded, to
1750increase the likelihood of success. This way C<U+00FE> will be allowed as a
1751quote character.
1752
1753=item *
1754
1755A field in  C<CSV>  must be surrounded by double-quotes to make an embedded
1756double-quote, represented by a pair of consecutive double-quotes, valid. In
1757binary mode you may additionally use the sequence  C<"0> for representation
1758of a NULL byte. Using C<0x00> in binary mode is just as valid.
1759
1760=item *
1761
1762Several violations of the above specification may be lifted by passing some
1763options as attributes to the object constructor.
1764
1765=back
1766
1767=head1 METHODS
1768
1769=head2 version
1770X<version>
1771
1772(Class method) Returns the current module version.
1773
1774=head2 new
1775X<new>
1776
1777(Class method) Returns a new instance of class Text::CSV_XS. The attributes
1778are described by the (optional) hash ref C<\%attr>.
1779
1780 my $csv = Text::CSV_XS->new ({ attributes ... });
1781
1782The following attributes are available:
1783
1784=head3 eol
1785X<eol>
1786
1787 my $csv = Text::CSV_XS->new ({ eol => $/ });
1788           $csv->eol (undef);
1789 my $eol = $csv->eol;
1790
1791The end-of-line string to add to rows for L</print> or the record separator
1792for L</getline>.
1793
1794When not passed in a B<parser> instance,  the default behavior is to accept
1795C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
1796all. Passing C<undef> or the empty string behave the same.
1797
1798When not passed in a B<generating> instance,  records are not terminated at
1799all, so it is probably wise to pass something you expect. A safe choice for
1800C<eol> on output is either C<$/> or C<\r\n>.
1801
1802Common values for C<eol> are C<"\012"> (C<\n> or Line Feed),  C<"\015\012">
1803(C<\r\n> or Carriage Return, Line Feed),  and C<"\015">  (C<\r> or Carriage
1804Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
1805
1806If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
1807only a Carriage Return without Line Feed, will be L</parse>d correct.
1808
1809=head3 sep_char
1810X<sep_char>
1811
1812 my $csv = Text::CSV_XS->new ({ sep_char => ";" });
1813         $csv->sep_char (";");
1814 my $c = $csv->sep_char;
1815
1816The char used to separate fields, by default a comma. (C<,>).  Limited to a
1817single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
1818(tilde). When longer sequences are required, use L<C<sep>|/sep>.
1819
1820The separation character can not be equal to the quote character  or to the
1821escape character.
1822
1823See also L</CAVEATS>
1824
1825=head3 sep
1826X<sep>
1827
1828 my $csv = Text::CSV_XS->new ({ sep => "\N{FULLWIDTH COMMA}" });
1829           $csv->sep (";");
1830 my $sep = $csv->sep;
1831
1832The chars used to separate fields, by default undefined. Limited to 8 bytes.
1833
1834When set, overrules L<C<sep_char>|/sep_char>.  If its length is one byte it
1835acts as an alias to L<C<sep_char>|/sep_char>.
1836
1837See also L</CAVEATS>
1838
1839=head3 quote_char
1840X<quote_char>
1841
1842 my $csv = Text::CSV_XS->new ({ quote_char => "'" });
1843         $csv->quote_char (undef);
1844 my $c = $csv->quote_char;
1845
1846The character to quote fields containing blanks or binary data,  by default
1847the double quote character (C<">).  A value of undef suppresses quote chars
1848(for simple cases only). Limited to a single-byte character, usually in the
1849range from  C<0x20> (space) to  C<0x7E> (tilde).  When longer sequences are
1850required, use L<C<quote>|/quote>.
1851
1852C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
1853
1854=head3 quote
1855X<quote>
1856
1857 my $csv = Text::CSV_XS->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
1858             $csv->quote ("'");
1859 my $quote = $csv->quote;
1860
1861The chars used to quote fields, by default undefined. Limited to 8 bytes.
1862
1863When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
1864it acts as an alias to L<C<quote_char>|/quote_char>.
1865
1866This method does not support C<undef>.  Use L<C<quote_char>|/quote_char> to
1867disable quotation.
1868
1869See also L</CAVEATS>
1870
1871=head3 escape_char
1872X<escape_char>
1873
1874 my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
1875         $csv->escape_char (":");
1876 my $c = $csv->escape_char;
1877
1878The character to  escape  certain characters inside quoted fields.  This is
1879limited to a  single-byte  character,  usually  in the  range from  C<0x20>
1880(space) to C<0x7E> (tilde).
1881
1882The C<escape_char> defaults to being the double-quote mark (C<">). In other
1883words the same as the default L<C<quote_char>|/quote_char>. This means that
1884doubling the quote mark in a field escapes it:
1885
1886 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
1887
1888If  you  change  the   L<C<quote_char>|/quote_char>  without  changing  the
1889C<escape_char>,  the  C<escape_char> will still be the double-quote (C<">).
1890If instead you want to escape the  L<C<quote_char>|/quote_char> by doubling
1891it you will need to also change the  C<escape_char>  to be the same as what
1892you have changed the L<C<quote_char>|/quote_char> to.
1893
1894Setting C<escape_char> to <undef> or C<""> will disable escaping completely
1895and is greatly discouraged. This will also disable C<escape_null>.
1896
1897The escape character can not be equal to the separation character.
1898
1899=head3 binary
1900X<binary>
1901
1902 my $csv = Text::CSV_XS->new ({ binary => 1 });
1903         $csv->binary (0);
1904 my $f = $csv->binary;
1905
1906If this attribute is C<1>,  you may use binary characters in quoted fields,
1907including line feeds, carriage returns and C<NULL> bytes. (The latter could
1908be escaped as C<"0>.) By default this feature is off.
1909
1910If a string is marked UTF8,  C<binary> will be turned on automatically when
1911binary characters other than C<CR> and C<NL> are encountered.   Note that a
1912simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
1913so setting C<< { binary => 1 } >> is still a wise option.
1914
1915=head3 strict
1916X<strict>
1917
1918 my $csv = Text::CSV_XS->new ({ strict => 1 });
1919         $csv->strict (0);
1920 my $f = $csv->strict;
1921
1922If this attribute is set to C<1>, any row that parses to a different number
1923of fields than the previous row will cause the parser to throw error 2014.
1924
1925=head3 skip_empty_rows
1926X<skip_empty_rows>
1927
1928 my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 });
1929         $csv->skip_empty_rows (0);
1930 my $f = $csv->skip_empty_rows;
1931
1932If this attribute is set to C<1>,  any row that has an  L</eol> immediately
1933following the start of line will be skipped.  Default behavior is to return
1934one single empty field.
1935
1936This attribute is only used in parsing.
1937
1938=head3 formula_handling
1939
1940=head3 formula
1941X<formula_handling>
1942X<formula>
1943
1944 my $csv = Text::CSV_XS->new ({ formula => "none" });
1945         $csv->formula ("none");
1946 my $f = $csv->formula;
1947
1948This defines the behavior of fields containing I<formulas>. As formulas are
1949considered dangerous in spreadsheets, this attribute can define an optional
1950action to be taken if a field starts with an equal sign (C<=>).
1951
1952For purpose of code-readability, this can also be written as
1953
1954 my $csv = Text::CSV_XS->new ({ formula_handling => "none" });
1955         $csv->formula_handling ("none");
1956 my $f = $csv->formula_handling;
1957
1958Possible values for this attribute are
1959
1960=over 2
1961
1962=item none
1963
1964Take no specific action. This is the default.
1965
1966 $csv->formula ("none");
1967
1968=item die
1969
1970Cause the process to C<die> whenever a leading C<=> is encountered.
1971
1972 $csv->formula ("die");
1973
1974=item croak
1975
1976Cause the process to C<croak> whenever a leading C<=> is encountered.  (See
1977L<Carp>)
1978
1979 $csv->formula ("croak");
1980
1981=item diag
1982
1983Report position and content of the field whenever a leading  C<=> is found.
1984The value of the field is unchanged.
1985
1986 $csv->formula ("diag");
1987
1988=item empty
1989
1990Replace the content of fields that start with a C<=> with the empty string.
1991
1992 $csv->formula ("empty");
1993 $csv->formula ("");
1994
1995=item undef
1996
1997Replace the content of fields that start with a C<=> with C<undef>.
1998
1999 $csv->formula ("undef");
2000 $csv->formula (undef);
2001
2002=item a callback
2003
2004Modify the content of fields that start with a  C<=>  with the return-value
2005of the callback.  The original content of the field is available inside the
2006callback as C<$_>;
2007
2008 # Replace all formula's with 42
2009 $csv->formula (sub { 42; });
2010
2011 # same as $csv->formula ("empty") but slower
2012 $csv->formula (sub { "" });
2013
2014 # Allow =4+12
2015 $csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer });
2016
2017 # Allow more complex calculations
2018 $csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ });
2019
2020=back
2021
2022All other values will give a warning and then fallback to C<diag>.
2023
2024=head3 decode_utf8
2025X<decode_utf8>
2026
2027 my $csv = Text::CSV_XS->new ({ decode_utf8 => 1 });
2028         $csv->decode_utf8 (0);
2029 my $f = $csv->decode_utf8;
2030
2031This attributes defaults to TRUE.
2032
2033While I<parsing>,  fields that are valid UTF-8, are automatically set to be
2034UTF-8, so that
2035
2036  $csv->parse ("\xC4\xA8\n");
2037
2038results in
2039
2040  PV("\304\250"\0) [UTF8 "\x{128}"]
2041
2042Sometimes it might not be a desired action.  To prevent those upgrades, set
2043this attribute to false, and the result will be
2044
2045  PV("\304\250"\0)
2046
2047=head3 auto_diag
2048X<auto_diag>
2049
2050 my $csv = Text::CSV_XS->new ({ auto_diag => 1 });
2051         $csv->auto_diag (2);
2052 my $l = $csv->auto_diag;
2053
2054Set this attribute to a number between C<1> and C<9> causes  L</error_diag>
2055to be automatically called in void context upon errors.
2056
2057In case of error C<2012 - EOF>, this call will be void.
2058
2059If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
2060on errors instead of C<warn>.  If set to anything unrecognized,  it will be
2061silently ignored.
2062
2063Future extensions to this feature will include more reliable auto-detection
2064of  C<autodie>  being active in the scope of which the error occurred which
2065will increment the value of C<auto_diag> with  C<1> the moment the error is
2066detected.
2067
2068=head3 diag_verbose
2069X<diag_verbose>
2070
2071 my $csv = Text::CSV_XS->new ({ diag_verbose => 1 });
2072         $csv->diag_verbose (2);
2073 my $l = $csv->diag_verbose;
2074
2075Set the verbosity of the output triggered by C<auto_diag>.   Currently only
2076adds the current  input-record-number  (if known)  to the diagnostic output
2077with an indication of the position of the error.
2078
2079=head3 blank_is_undef
2080X<blank_is_undef>
2081
2082 my $csv = Text::CSV_XS->new ({ blank_is_undef => 1 });
2083         $csv->blank_is_undef (0);
2084 my $f = $csv->blank_is_undef;
2085
2086Under normal circumstances, C<CSV> data makes no distinction between quoted-
2087and unquoted empty fields.  These both end up in an empty string field once
2088read, thus
2089
2090 1,"",," ",2
2091
2092is read as
2093
2094 ("1", "", "", " ", "2")
2095
2096When I<writing>  C<CSV> files with either  L<C<always_quote>|/always_quote>
2097or  L<C<quote_empty>|/quote_empty> set, the unquoted  I<empty> field is the
2098result of an undefined value.   To enable this distinction when  I<reading>
2099C<CSV>  data,  the  C<blank_is_undef>  attribute will cause  unquoted empty
2100fields to be set to C<undef>, causing the above to be parsed as
2101
2102 ("1", "", undef, " ", "2")
2103
2104Note that this is specifically important when loading  C<CSV> fields into a
2105database that allows C<NULL> values,  as the perl equivalent for C<NULL> is
2106C<undef> in L<DBI> land.
2107
2108=head3 empty_is_undef
2109X<empty_is_undef>
2110
2111 my $csv = Text::CSV_XS->new ({ empty_is_undef => 1 });
2112         $csv->empty_is_undef (0);
2113 my $f = $csv->empty_is_undef;
2114
2115Going one  step  further  than  L<C<blank_is_undef>|/blank_is_undef>,  this
2116attribute converts all empty fields to C<undef>, so
2117
2118 1,"",," ",2
2119
2120is read as
2121
2122 (1, undef, undef, " ", 2)
2123
2124Note that this affects only fields that are  originally  empty,  not fields
2125that are empty after stripping allowed whitespace. YMMV.
2126
2127=head3 allow_whitespace
2128X<allow_whitespace>
2129
2130 my $csv = Text::CSV_XS->new ({ allow_whitespace => 1 });
2131         $csv->allow_whitespace (0);
2132 my $f = $csv->allow_whitespace;
2133
2134When this option is set to true,  the whitespace  (C<TAB>'s and C<SPACE>'s)
2135surrounding  the  separation character  is removed when parsing.  If either
2136C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
2137L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
2138be considered whitespace.
2139
2140Now lines like:
2141
2142 1 , "foo" , bar , 3 , zapp
2143
2144are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
2145
2146Note that  B<all>  whitespace is stripped from both  start and  end of each
2147field.  That would make it  I<more> than a I<feature> to enable parsing bad
2148C<CSV> lines, as
2149
2150 1,   2.0,  3,   ape  , monkey
2151
2152will now be parsed as
2153
2154 ("1", "2.0", "3", "ape", "monkey")
2155
2156even if the original line was perfectly acceptable C<CSV>.
2157
2158=head3 allow_loose_quotes
2159X<allow_loose_quotes>
2160
2161 my $csv = Text::CSV_XS->new ({ allow_loose_quotes => 1 });
2162         $csv->allow_loose_quotes (0);
2163 my $f = $csv->allow_loose_quotes;
2164
2165By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
2166characters like
2167
2168 1,foo "bar" baz,42
2169
2170would result in parse error 2034.  Though it is still bad practice to allow
2171this format,  we  cannot  help  the  fact  that  some  vendors  make  their
2172applications spit out lines styled this way.
2173
2174If there is B<really> bad C<CSV> data, like
2175
2176 1,"foo "bar" baz",42
2177
2178or
2179
2180 1,""foo bar baz"",42
2181
2182there is a way to get this data-line parsed and leave the quotes inside the
2183quoted field as-is.  This can be achieved by setting  C<allow_loose_quotes>
2184B<AND> making sure that the L<C<escape_char>|/escape_char> is  I<not> equal
2185to L<C<quote_char>|/quote_char>.
2186
2187=head3 allow_loose_escapes
2188X<allow_loose_escapes>
2189
2190 my $csv = Text::CSV_XS->new ({ allow_loose_escapes => 1 });
2191         $csv->allow_loose_escapes (0);
2192 my $f = $csv->allow_loose_escapes;
2193
2194Parsing fields  that  have  L<C<escape_char>|/escape_char>  characters that
2195escape characters that do not need to be escaped, like:
2196
2197 my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
2198 $csv->parse (qq{1,"my bar\'s",baz,42});
2199
2200would result in parse error 2025.   Though it is bad practice to allow this
2201format,  this attribute enables you to treat all escape character sequences
2202equal.
2203
2204=head3 allow_unquoted_escape
2205X<allow_unquoted_escape>
2206
2207 my $csv = Text::CSV_XS->new ({ allow_unquoted_escape => 1 });
2208         $csv->allow_unquoted_escape (0);
2209 my $f = $csv->allow_unquoted_escape;
2210
2211A backward compatibility issue where L<C<escape_char>|/escape_char> differs
2212from L<C<quote_char>|/quote_char>  prevents  L<C<escape_char>|/escape_char>
2213to be in the first position of a field.  If L<C<quote_char>|/quote_char> is
2214equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
2215this would be illegal:
2216
2217 1,\0,2
2218
2219Setting this attribute to C<1>  might help to overcome issues with backward
2220compatibility and allow this style.
2221
2222=head3 always_quote
2223X<always_quote>
2224
2225 my $csv = Text::CSV_XS->new ({ always_quote => 1 });
2226         $csv->always_quote (0);
2227 my $f = $csv->always_quote;
2228
2229By default the generated fields are quoted only if they I<need> to be.  For
2230example, if they contain the separator character. If you set this attribute
2231to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
2232quoted, see L</blank_is_undef>). This makes it quite often easier to handle
2233exported data in external applications.   (Poor creatures who are better to
2234use Text::CSV_XS. :)
2235
2236=head3 quote_space
2237X<quote_space>
2238
2239 my $csv = Text::CSV_XS->new ({ quote_space => 1 });
2240         $csv->quote_space (0);
2241 my $f = $csv->quote_space;
2242
2243By default,  a space in a field would trigger quotation.  As no rule exists
2244this to be forced in C<CSV>,  nor any for the opposite, the default is true
2245for safety.   You can exclude the space  from this trigger  by setting this
2246attribute to 0.
2247
2248=head3 quote_empty
2249X<quote_empty>
2250
2251 my $csv = Text::CSV_XS->new ({ quote_empty => 1 });
2252         $csv->quote_empty (0);
2253 my $f = $csv->quote_empty;
2254
2255By default the generated fields are quoted only if they I<need> to be.   An
2256empty (defined) field does not need quotation. If you set this attribute to
2257C<1> then I<empty> defined fields will be quoted.  (C<undef> fields are not
2258quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
2259
2260=head3 quote_binary
2261X<quote_binary>
2262
2263 my $csv = Text::CSV_XS->new ({ quote_binary => 1 });
2264         $csv->quote_binary (0);
2265 my $f = $csv->quote_binary;
2266
2267By default,  all "unsafe" bytes inside a string cause the combined field to
2268be quoted.  By setting this attribute to C<0>, you can disable that trigger
2269for bytes >= C<0x7F>.
2270
2271=head3 escape_null
2272X<escape_null>
2273X<quote_null>
2274
2275 my $csv = Text::CSV_XS->new ({ escape_null => 1 });
2276         $csv->escape_null (0);
2277 my $f = $csv->escape_null;
2278
2279By default, a C<NULL> byte in a field would be escaped. This option enables
2280you to treat the  C<NULL>  byte as a simple binary character in binary mode
2281(the C<< { binary => 1 } >> is set).  The default is true.  You can prevent
2282C<NULL> escapes by setting this attribute to C<0>.
2283
2284When the C<escape_char> attribute is set to undefined,  this attribute will
2285be set to false.
2286
2287The default setting will encode "=\x00=" as
2288
2289 "="0="
2290
2291With C<escape_null> set, this will result in
2292
2293 "=\x00="
2294
2295The default when using the C<csv> function is C<false>.
2296
2297For backward compatibility reasons,  the deprecated old name  C<quote_null>
2298is still recognized.
2299
2300=head3 keep_meta_info
2301X<keep_meta_info>
2302
2303 my $csv = Text::CSV_XS->new ({ keep_meta_info => 1 });
2304         $csv->keep_meta_info (0);
2305 my $f = $csv->keep_meta_info;
2306
2307By default, the parsing of input records is as simple and fast as possible.
2308However,  some parsing information - like quotation of the original field -
2309is lost in that process.  Setting this flag to true enables retrieving that
2310information after parsing with  the methods  L</meta_info>,  L</is_quoted>,
2311and L</is_binary> described below.  Default is false for performance.
2312
2313If you set this attribute to a value greater than 9,   then you can control
2314output quotation style like it was used in the input of the the last parsed
2315record (unless quotation was added because of other reasons).
2316
2317 my $csv = Text::CSV_XS->new ({
2318    binary         => 1,
2319    keep_meta_info => 1,
2320    quote_space    => 0,
2321    });
2322
2323 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
2324
2325 $csv->print (*STDOUT, \@row);
2326 # 1,,, , ,f,g,"h""h",help,help
2327 $csv->keep_meta_info (11);
2328 $csv->print (*STDOUT, \@row);
2329 # 1,,"", ," ",f,"g","h""h",help,"help"
2330
2331=head3 undef_str
2332X<undef_str>
2333
2334 my $csv = Text::CSV_XS->new ({ undef_str => "\\N" });
2335         $csv->undef_str (undef);
2336 my $s = $csv->undef_str;
2337
2338This attribute optionally defines the output of undefined fields. The value
2339passed is not changed at all, so if it needs quotation, the quotation needs
2340to be included in the value of the attribute.  Use with caution, as passing
2341a value like  C<",",,,,""">  will for sure mess up your output. The default
2342for this attribute is C<undef>, meaning no special treatment.
2343
2344This attribute is useful when exporting  CSV data  to be imported in custom
2345loaders, like for MySQL, that recognize special sequences for C<NULL> data.
2346
2347This attribute has no meaning when parsing CSV data.
2348
2349=head3 comment_str
2350X<comment_str>
2351
2352 my $csv = Text::CSV_XS->new ({ comment_str => "#" });
2353         $csv->comment_str (undef);
2354 my $s = $csv->comment_str;
2355
2356This attribute optionally defines a string to be recognized as comment.  If
2357this attribute is defined,   all lines starting with this sequence will not
2358be parsed as CSV but skipped as comment.
2359
2360This attribute has no meaning when generating CSV.
2361
2362Comment strings that start with any of the special characters/sequences are
2363not supported (so it cannot start with any of L</sep_char>, L</quote_char>,
2364L</escape_char>, L</sep>, L</quote>, or L</eol>).
2365
2366For convenience, C<comment> is an alias for C<comment_str>.
2367
2368=head3 verbatim
2369X<verbatim>
2370
2371 my $csv = Text::CSV_XS->new ({ verbatim => 1 });
2372         $csv->verbatim (0);
2373 my $f = $csv->verbatim;
2374
2375This is a quite controversial attribute to set,  but makes some hard things
2376possible.
2377
2378The rationale behind this attribute is to tell the parser that the normally
2379special characters newline (C<NL>) and Carriage Return (C<CR>)  will not be
2380special when this flag is set,  and be dealt with  as being ordinary binary
2381characters. This will ease working with data with embedded newlines.
2382
2383When  C<verbatim>  is used with  L</getline>,  L</getline>  auto-C<chomp>'s
2384every line.
2385
2386Imagine a file format like
2387
2388 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
2389
2390where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
2391C<^> (caret).   None of the fields is quoted,   but embedded binary data is
2392likely to be present. With the specific line ending, this should not be too
2393hard to detect.
2394
2395By default,  Text::CSV_XS'  parse function is instructed to only know about
2396C<"\n"> and C<"\r">  to be legal line endings,  and so has to deal with the
2397embedded newline as a real C<end-of-line>,  so it can scan the next line if
2398binary is true, and the newline is inside a quoted field. With this option,
2399we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
2400a binary character.
2401
2402For L</parse> this means that the parser has no more idea about line ending
2403and L</getline> C<chomp>s line endings on reading.
2404
2405=head3 types
2406
2407A set of column types; the attribute is immediately passed to the L</types>
2408method.
2409
2410=head3 callbacks
2411X<callbacks>
2412
2413See the L</Callbacks> section below.
2414
2415=head3 accessors
2416
2417To sum it up,
2418
2419 $csv = Text::CSV_XS->new ();
2420
2421is equivalent to
2422
2423 $csv = Text::CSV_XS->new ({
2424     eol                   => undef, # \r, \n, or \r\n
2425     sep_char              => ',',
2426     sep                   => undef,
2427     quote_char            => '"',
2428     quote                 => undef,
2429     escape_char           => '"',
2430     binary                => 0,
2431     decode_utf8           => 1,
2432     auto_diag             => 0,
2433     diag_verbose          => 0,
2434     blank_is_undef        => 0,
2435     empty_is_undef        => 0,
2436     allow_whitespace      => 0,
2437     allow_loose_quotes    => 0,
2438     allow_loose_escapes   => 0,
2439     allow_unquoted_escape => 0,
2440     always_quote          => 0,
2441     quote_empty           => 0,
2442     quote_space           => 1,
2443     escape_null           => 1,
2444     quote_binary          => 1,
2445     keep_meta_info        => 0,
2446     strict                => 0,
2447     skip_empty_rows       => 0,
2448     formula               => 0,
2449     verbatim              => 0,
2450     undef_str             => undef,
2451     comment_str           => undef,
2452     types                 => undef,
2453     callbacks             => undef,
2454     });
2455
2456For all of the above mentioned flags, an accessor method is available where
2457you can inquire the current value, or change the value
2458
2459 my $quote = $csv->quote_char;
2460 $csv->binary (1);
2461
2462It is not wise to change these settings halfway through writing C<CSV> data
2463to a stream. If however you want to create a new stream using the available
2464C<CSV> object, there is no harm in changing them.
2465
2466If the L</new> constructor call fails,  it returns C<undef>,  and makes the
2467fail reason available through the L</error_diag> method.
2468
2469 $csv = Text::CSV_XS->new ({ ecs_char => 1 }) or
2470     die "".Text::CSV_XS->error_diag ();
2471
2472L</error_diag> will return a string like
2473
2474 "INI - Unknown attribute 'ecs_char'"
2475
2476=head2 known_attributes
2477X<known_attributes>
2478
2479 @attr = Text::CSV_XS->known_attributes;
2480 @attr = Text::CSV_XS::known_attributes;
2481 @attr = $csv->known_attributes;
2482
2483This method will return an ordered list of all the supported  attributes as
2484described above.   This can be useful for knowing what attributes are valid
2485in classes that use or extend Text::CSV_XS.
2486
2487=head2 print
2488X<print>
2489
2490 $status = $csv->print ($fh, $colref);
2491
2492Similar to  L</combine> + L</string> + L</print>,  but much more efficient.
2493It expects an array ref as input  (not an array!)  and the resulting string
2494is not really  created,  but  immediately  written  to the  C<$fh>  object,
2495typically an IO handle or any other object that offers a L</print> method.
2496
2497For performance reasons  C<print>  does not create a result string,  so all
2498L</string>, L</status>, L</fields>, and L</error_input> methods will return
2499undefined information after executing this method.
2500
2501If C<$colref> is C<undef>  (explicit,  not through a variable argument) and
2502L</bind_columns>  was used to specify fields to be printed,  it is possible
2503to make performance improvements, as otherwise data would have to be copied
2504as arguments to the method call:
2505
2506 $csv->bind_columns (\($foo, $bar));
2507 $status = $csv->print ($fh, undef);
2508
2509A short benchmark
2510
2511 my @data = ("aa" .. "zz");
2512 $csv->bind_columns (\(@data));
2513
2514 $csv->print ($fh, [ @data ]);   # 11800 recs/sec
2515 $csv->print ($fh,  \@data  );   # 57600 recs/sec
2516 $csv->print ($fh,   undef  );   # 48500 recs/sec
2517
2518=head2 say
2519X<say>
2520
2521 $status = $csv->say ($fh, $colref);
2522
2523Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
2524
2525=head2 print_hr
2526X<print_hr>
2527
2528 $csv->print_hr ($fh, $ref);
2529
2530Provides an easy way  to print a  C<$ref>  (as fetched with L</getline_hr>)
2531provided the column names are set with L</column_names>.
2532
2533It is just a wrapper method with basic parameter checks over
2534
2535 $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
2536
2537=head2 combine
2538X<combine>
2539
2540 $status = $csv->combine (@fields);
2541
2542This method constructs a C<CSV> record from  C<@fields>,  returning success
2543or failure.   Failure can result from lack of arguments or an argument that
2544contains an invalid character.   Upon success,  L</string> can be called to
2545retrieve the resultant C<CSV> string.  Upon failure,  the value returned by
2546L</string> is undefined and L</error_input> could be called to retrieve the
2547invalid argument.
2548
2549=head2 string
2550X<string>
2551
2552 $line = $csv->string ();
2553
2554This method returns the input to  L</parse>  or the resultant C<CSV> string
2555of L</combine>, whichever was called more recently.
2556
2557=head2 getline
2558X<getline>
2559
2560 $colref = $csv->getline ($fh);
2561
2562This is the counterpart to  L</print>,  as L</parse>  is the counterpart to
2563L</combine>:  it parses a row from the C<$fh>  handle using the L</getline>
2564method associated with C<$fh>  and parses this row into an array ref.  This
2565array ref is returned by the function or C<undef> for failure.  When C<$fh>
2566does not support C<getline>, you are likely to hit errors.
2567
2568When fields are bound with L</bind_columns> the return value is a reference
2569to an empty list.
2570
2571The L</string>, L</fields>, and L</status> methods are meaningless again.
2572
2573=head2 getline_all
2574X<getline_all>
2575
2576 $arrayref = $csv->getline_all ($fh);
2577 $arrayref = $csv->getline_all ($fh, $offset);
2578 $arrayref = $csv->getline_all ($fh, $offset, $length);
2579
2580This will return a reference to a list of L<getline ($fh)|/getline> results.
2581In this call, C<keep_meta_info> is disabled.  If C<$offset> is negative, as
2582with C<splice>, only the last  C<abs ($offset)> records of C<$fh> are taken
2583into consideration.
2584
2585Given a CSV file with 10 lines:
2586
2587 lines call
2588 ----- ---------------------------------------------------------
2589 0..9  $csv->getline_all ($fh)         # all
2590 0..9  $csv->getline_all ($fh,  0)     # all
2591 8..9  $csv->getline_all ($fh,  8)     # start at 8
2592 -     $csv->getline_all ($fh,  0,  0) # start at 0 first 0 rows
2593 0..4  $csv->getline_all ($fh,  0,  5) # start at 0 first 5 rows
2594 4..5  $csv->getline_all ($fh,  4,  2) # start at 4 first 2 rows
2595 8..9  $csv->getline_all ($fh, -2)     # last 2 rows
2596 6..7  $csv->getline_all ($fh, -4,  2) # first 2 of last  4 rows
2597
2598=head2 getline_hr
2599X<getline_hr>
2600
2601The L</getline_hr> and L</column_names> methods work together  to allow you
2602to have rows returned as hashrefs.  You must call L</column_names> first to
2603declare your column names.
2604
2605 $csv->column_names (qw( code name price description ));
2606 $hr = $csv->getline_hr ($fh);
2607 print "Price for $hr->{name} is $hr->{price} EUR\n";
2608
2609L</getline_hr> will croak if called before L</column_names>.
2610
2611Note that  L</getline_hr>  creates a hashref for every row and will be much
2612slower than the combined use of L</bind_columns>  and L</getline> but still
2613offering the same easy to use hashref inside the loop:
2614
2615 my @cols = @{$csv->getline ($fh)};
2616 $csv->column_names (@cols);
2617 while (my $row = $csv->getline_hr ($fh)) {
2618     print $row->{price};
2619     }
2620
2621Could easily be rewritten to the much faster:
2622
2623 my @cols = @{$csv->getline ($fh)};
2624 my $row = {};
2625 $csv->bind_columns (\@{$row}{@cols});
2626 while ($csv->getline ($fh)) {
2627     print $row->{price};
2628     }
2629
2630Your mileage may vary for the size of the data and the number of rows. With
2631perl-5.14.2 the comparison for a 100_000 line file with 14 columns:
2632
2633            Rate hashrefs getlines
2634 hashrefs 1.00/s       --     -76%
2635 getlines 4.15/s     313%       --
2636
2637=head2 getline_hr_all
2638X<getline_hr_all>
2639
2640 $arrayref = $csv->getline_hr_all ($fh);
2641 $arrayref = $csv->getline_hr_all ($fh, $offset);
2642 $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
2643
2644This will return a reference to a list of   L<getline_hr ($fh)|/getline_hr>
2645results.  In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
2646
2647=head2 parse
2648X<parse>
2649
2650 $status = $csv->parse ($line);
2651
2652This method decomposes a  C<CSV>  string into fields,  returning success or
2653failure.   Failure can result from a lack of argument  or the given  C<CSV>
2654string is improperly formatted.   Upon success, L</fields> can be called to
2655retrieve the decomposed fields. Upon failure calling L</fields> will return
2656undefined data and  L</error_input>  can be called to retrieve  the invalid
2657argument.
2658
2659You may use the L</types>  method for setting column types.  See L</types>'
2660description below.
2661
2662The C<$line> argument is supposed to be a simple scalar. Everything else is
2663supposed to croak and set error 1500.
2664
2665=head2 fragment
2666X<fragment>
2667
2668This function tries to implement RFC7111  (URI Fragment Identifiers for the
2669text/csv Media Type) - https://datatracker.ietf.org/doc/html/rfc7111
2670
2671 my $AoA = $csv->fragment ($fh, $spec);
2672
2673In specifications,  C<*> is used to specify the I<last> item, a dash (C<->)
2674to indicate a range.   All indices are C<1>-based:  the first row or column
2675has index C<1>. Selections can be combined with the semi-colon (C<;>).
2676
2677When using this method in combination with  L</column_names>,  the returned
2678reference  will point to a  list of hashes  instead of a  list of lists.  A
2679disjointed  cell-based combined selection  might return rows with different
2680number of columns making the use of hashes unpredictable.
2681
2682 $csv->column_names ("Name", "Age");
2683 my $AoH = $csv->fragment ($fh, "col=3;8");
2684
2685If the L</after_parse> callback is active,  it is also called on every line
2686parsed and skipped before the fragment.
2687
2688=over 2
2689
2690=item row
2691
2692 row=4
2693 row=5-7
2694 row=6-*
2695 row=1-2;4;6-*
2696
2697=item col
2698
2699 col=2
2700 col=1-3
2701 col=4-*
2702 col=1-2;4;7-*
2703
2704=item cell
2705
2706In cell-based selection, the comma (C<,>) is used to pair row and column
2707
2708 cell=4,1
2709
2710The range operator (C<->) using C<cell>s can be used to define top-left and
2711bottom-right C<cell> location
2712
2713 cell=3,1-4,6
2714
2715The C<*> is only allowed in the second part of a pair
2716
2717 cell=3,2-*,2    # row 3 till end, only column 2
2718 cell=3,2-3,*    # column 2 till end, only row 3
2719 cell=3,2-*,*    # strip row 1 and 2, and column 1
2720
2721Cells and cell ranges may be combined with C<;>, possibly resulting in rows
2722with different numbers of columns
2723
2724 cell=1,1-2,2;3,3-4,4;1,4;4,1
2725
2726Disjointed selections will only return selected cells.   The cells that are
2727not  specified  will  not  be  included  in the  returned set,  not even as
2728C<undef>.  As an example given a C<CSV> like
2729
2730 11,12,13,...19
2731 21,22,...28,29
2732 :            :
2733 91,...97,98,99
2734
2735with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
2736
2737 11,12,14
2738 21,22
2739 33,34
2740 41,43,44
2741
2742Overlapping cell-specs will return those cells only once, So
2743C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
2744
2745 11,12,13
2746 21,22,23,24
2747 31,32,33,34
2748 42,43,44
2749
2750=back
2751
2752L<RFC7111|https://datatracker.ietf.org/doc/html/rfc7111> does  B<not>  allow different
2753types of specs to be combined   (either C<row> I<or> C<col> I<or> C<cell>).
2754Passing an invalid fragment specification will croak and set error 2013.
2755
2756=head2 column_names
2757X<column_names>
2758
2759Set the "keys" that will be used in the  L</getline_hr>  calls.  If no keys
2760(column names) are passed, it will return the current setting as a list.
2761
2762L</column_names> accepts a list of scalars  (the column names)  or a single
2763array_ref, so you can pass the return value from L</getline> too:
2764
2765 $csv->column_names ($csv->getline ($fh));
2766
2767L</column_names> does B<no> checking on duplicates at all, which might lead
2768to unexpected results.   Undefined entries will be replaced with the string
2769C<"\cAUNDEF\cA">, so
2770
2771 $csv->column_names (undef, "", "name", "name");
2772 $hr = $csv->getline_hr ($fh);
2773
2774will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field,  C<< $hr->{""} >> to
2775the 2nd field, and C<< $hr->{name} >> to the 4th field,  discarding the 3rd
2776field.
2777
2778L</column_names> croaks on invalid arguments.
2779
2780=head2 header
2781
2782This method does NOT work in perl-5.6.x
2783
2784Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
2785
2786 my @hdr = $csv->header ($fh);
2787 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
2788 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
2789
2790The first argument should be a file handle.
2791
2792This method resets some object properties,  as it is supposed to be invoked
2793only once per file or stream.  It will leave attributes C<column_names> and
2794C<bound_columns> alone if setting column names is disabled. Reading headers
2795on previously process objects might fail on perl-5.8.0 and older.
2796
2797Assuming that the file opened for parsing has a header, and the header does
2798not contain problematic characters like embedded newlines,   read the first
2799line from the open handle then auto-detect whether the header separates the
2800column names with a character from the allowed separator list.
2801
2802If any of the allowed separators matches,  and none of the I<other> allowed
2803separators match,  set  L<C<sep>|/sep>  to that  separator  for the current
2804CSV_XS instance and use it to parse the first line, map those to lowercase,
2805and use that to set the instance L</column_names>:
2806
2807 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
2808 open my $fh, "<", "file.csv";
2809 binmode $fh; # for Windows
2810 $csv->header ($fh);
2811 while (my $row = $csv->getline_hr ($fh)) {
2812     ...
2813     }
2814
2815If the header is empty,  contains more than one unique separator out of the
2816allowed set,  contains empty fields,   or contains identical fields  (after
2817folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
2818
2819If the header contains embedded newlines or is not valid  CSV  in any other
2820way, this method will croak and leave the parse error untouched.
2821
2822A successful call to C<header>  will always set the  L<C<sep>|/sep>  of the
2823C<$csv> object. This behavior can not be disabled.
2824
2825=head3 return value
2826
2827On error this method will croak.
2828
2829In list context,  the headers will be returned whether they are used to set
2830L</column_names> or not.
2831
2832In scalar context, the instance itself is returned.  B<Note>: the values as
2833found in the header will effectively be  B<lost> if  C<set_column_names> is
2834false.
2835
2836=head3 Options
2837
2838=over 2
2839
2840=item sep_set
2841X<sep_set>
2842
2843 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
2844
2845The list of legal separators defaults to C<[ ";", "," ]> and can be changed
2846by this option.  As this is probably the most often used option,  it can be
2847passed on its own as an unnamed argument:
2848
2849 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
2850
2851Multi-byte  sequences are allowed,  both multi-character and  Unicode.  See
2852L<C<sep>|/sep>.
2853
2854=item detect_bom
2855X<detect_bom>
2856
2857 $csv->header ($fh, { detect_bom => 1 });
2858
2859The default behavior is to detect if the header line starts with a BOM.  If
2860the header has a BOM, use that to set the encoding of C<$fh>.  This default
2861behavior can be disabled by passing a false value to C<detect_bom>.
2862
2863Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE,  and
2864UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1,  and GB-18030
2865but L<Encode> does not (yet). UTF-7 is not supported.
2866
2867If a supported BOM was detected as start of the stream, it is stored in the
2868object attribute C<ENCODING>.
2869
2870 my $enc = $csv->{ENCODING};
2871
2872The encoding is used with C<binmode> on C<$fh>.
2873
2874If the handle was opened in a (correct) encoding,  this method will  B<not>
2875alter the encoding, as it checks the leading B<bytes> of the first line. In
2876case the stream starts with a decoded BOM (C<U+FEFF>), C<{ENCODING}> will be
2877C<""> (empty) instead of the default C<undef>.
2878
2879=item munge_column_names
2880X<munge_column_names>
2881
2882This option offers the means to modify the column names into something that
2883is most useful to the application.   The default is to map all column names
2884to lower case.
2885
2886 $csv->header ($fh, { munge_column_names => "lc" });
2887
2888The following values are available:
2889
2890  lc     - lower case
2891  uc     - upper case
2892  db     - valid DB field names
2893  none   - do not change
2894  \%hash - supply a mapping
2895  \&cb   - supply a callback
2896
2897=over 2
2898
2899=item Lower case
2900
2901 $csv->header ($fh, { munge_column_names => "lc" });
2902
2903The header is changed to all lower-case
2904
2905 $_ = lc;
2906
2907=item Upper case
2908
2909 $csv->header ($fh, { munge_column_names => "uc" });
2910
2911The header is changed to all upper-case
2912
2913 $_ = uc;
2914
2915=item Literal
2916
2917 $csv->header ($fh, { munge_column_names => "none" });
2918
2919=item Hash
2920
2921 $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
2922
2923if a value does not exist, the original value is used unchanged
2924
2925=item Database
2926
2927 $csv->header ($fh, { munge_column_names => "db" });
2928
2929=over 2
2930
2931=item -
2932
2933lower-case
2934
2935=item -
2936
2937all sequences of non-word characters are replaced with an underscore
2938
2939=item -
2940
2941all leading underscores are removed
2942
2943=back
2944
2945 $_ = lc (s/\W+/_/gr =~ s/^_+//r);
2946
2947=item Callback
2948
2949 $csv->header ($fh, { munge_column_names => sub { fc } });
2950 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
2951 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
2952
2953As this callback is called in a C<map>, you can use C<$_> directly.
2954
2955=back
2956
2957=item set_column_names
2958X<set_column_names>
2959
2960 $csv->header ($fh, { set_column_names => 1 });
2961
2962The default is to set the instances column names using  L</column_names> if
2963the method is successful,  so subsequent calls to L</getline_hr> can return
2964a hash. Disable setting the header can be forced by using a false value for
2965this option.
2966
2967As described in L</return value> above, content is lost in scalar context.
2968
2969=back
2970
2971=head3 Validation
2972
2973When receiving CSV files from external sources,  this method can be used to
2974protect against changes in the layout by restricting to known headers  (and
2975typos in the header fields).
2976
2977 my %known = (
2978     "record key" => "c_rec",
2979     "rec id"     => "c_rec",
2980     "id_rec"     => "c_rec",
2981     "kode"       => "code",
2982     "code"       => "code",
2983     "vaule"      => "value",
2984     "value"      => "value",
2985     );
2986 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
2987 open my $fh, "<", $source or die "$source: $!";
2988 $csv->header ($fh, { munge_column_names => sub {
2989     s/\s+$//;
2990     s/^\s+//;
2991     $known{lc $_} or die "Unknown column '$_' in $source";
2992     }});
2993 while (my $row = $csv->getline_hr ($fh)) {
2994     say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
2995     }
2996
2997=head2 bind_columns
2998X<bind_columns>
2999
3000Takes a list of scalar references to be used for output with  L</print>  or
3001to store in the fields fetched by L</getline>.  When you do not pass enough
3002references to store the fetched fields in, L</getline> will fail with error
3003C<3006>.  If you pass more than there are fields to return,  the content of
3004the remaining references is left untouched.
3005
3006 $csv->bind_columns (\$code, \$name, \$price, \$description);
3007 while ($csv->getline ($fh)) {
3008     print "The price of a $name is \x{20ac} $price\n";
3009     }
3010
3011To reset or clear all column binding, call L</bind_columns> with the single
3012argument C<undef>. This will also clear column names.
3013
3014 $csv->bind_columns (undef);
3015
3016If no arguments are passed at all, L</bind_columns> will return the list of
3017current bindings or C<undef> if no binds are active.
3018
3019Note that in parsing with  C<bind_columns>,  the fields are set on the fly.
3020That implies that if the third field of a row causes an error  (or this row
3021has just two fields where the previous row had more),  the first two fields
3022already have been assigned the values of the current row, while the rest of
3023the fields will still hold the values of the previous row.  If you want the
3024parser to fail in these cases, use the L<C<strict>|/strict> attribute.
3025
3026=head2 eof
3027X<eof>
3028
3029 $eof = $csv->eof ();
3030
3031If L</parse> or  L</getline>  was used with an IO stream,  this method will
3032return true (1) if the last call hit end of file,  otherwise it will return
3033false ('').  This is useful to see the difference between a failure and end
3034of file.
3035
3036Note that if the parsing of the last line caused an error,  C<eof> is still
3037true.  That means that if you are I<not> using L</auto_diag>, an idiom like
3038
3039 while (my $row = $csv->getline ($fh)) {
3040     # ...
3041     }
3042 $csv->eof or $csv->error_diag;
3043
3044will I<not> report the error. You would have to change that to
3045
3046 while (my $row = $csv->getline ($fh)) {
3047     # ...
3048     }
3049 +$csv->error_diag and $csv->error_diag;
3050
3051=head2 types
3052X<types>
3053
3054 $csv->types (\@tref);
3055
3056This method is used to force that  (all)  columns are of a given type.  For
3057example, if you have an integer column,  two  columns  with  doubles  and a
3058string column, then you might do a
3059
3060 $csv->types ([Text::CSV_XS::IV (),
3061               Text::CSV_XS::NV (),
3062               Text::CSV_XS::NV (),
3063               Text::CSV_XS::PV ()]);
3064
3065Column types are used only for I<decoding> columns while parsing,  in other
3066words by the L</parse> and L</getline> methods.
3067
3068You can unset column types by doing a
3069
3070 $csv->types (undef);
3071
3072or fetch the current type settings with
3073
3074 $types = $csv->types ();
3075
3076=over 4
3077
3078=item IV
3079X<IV>
3080
3081Set field type to integer.
3082
3083=item NV
3084X<NV>
3085
3086Set field type to numeric/float.
3087
3088=item PV
3089X<PV>
3090
3091Set field type to string.
3092
3093=back
3094
3095=head2 fields
3096X<fields>
3097
3098 @columns = $csv->fields ();
3099
3100This method returns the input to   L</combine>  or the resultant decomposed
3101fields of a successful L</parse>, whichever was called more recently.
3102
3103Note that the return value is undefined after using L</getline>, which does
3104not fill the data structures returned by L</parse>.
3105
3106=head2 meta_info
3107X<meta_info>
3108
3109 @flags = $csv->meta_info ();
3110
3111This method returns the "flags" of the input to L</combine> or the flags of
3112the resultant  decomposed fields of  L</parse>,   whichever was called more
3113recently.
3114
3115For each field,  a meta_info field will hold  flags that  inform  something
3116about  the  field  returned  by  the  L</fields>  method or  passed to  the
3117L</combine> method. The flags are bit-wise-C<or>'d like:
3118
3119=over 2
3120
3121=item C< >0x0001
3122
3123The field was quoted.
3124
3125=item C< >0x0002
3126
3127The field was binary.
3128
3129=back
3130
3131See the C<is_***> methods below.
3132
3133=head2 is_quoted
3134X<is_quoted>
3135
3136 my $quoted = $csv->is_quoted ($column_idx);
3137
3138where  C<$column_idx> is the  (zero-based)  index of the column in the last
3139result of L</parse>.
3140
3141This returns a true value  if the data in the indicated column was enclosed
3142in L<C<quote_char>|/quote_char> quotes.  This might be important for fields
3143where content C<,20070108,> is to be treated as a numeric value,  and where
3144C<,"20070108",> is explicitly marked as character string data.
3145
3146This method is only valid when L</keep_meta_info> is set to a true value.
3147
3148=head2 is_binary
3149X<is_binary>
3150
3151 my $binary = $csv->is_binary ($column_idx);
3152
3153where  C<$column_idx> is the  (zero-based)  index of the column in the last
3154result of L</parse>.
3155
3156This returns a true value if the data in the indicated column contained any
3157byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
3158
3159This method is only valid when L</keep_meta_info> is set to a true value.
3160
3161=head2 is_missing
3162X<is_missing>
3163
3164 my $missing = $csv->is_missing ($column_idx);
3165
3166where  C<$column_idx> is the  (zero-based)  index of the column in the last
3167result of L</getline_hr>.
3168
3169 $csv->keep_meta_info (1);
3170 while (my $hr = $csv->getline_hr ($fh)) {
3171     $csv->is_missing (0) and next; # This was an empty line
3172     }
3173
3174When using  L</getline_hr>,  it is impossible to tell if the  parsed fields
3175are C<undef> because they where not filled in the C<CSV> stream  or because
3176they were not read at all, as B<all> the fields defined by L</column_names>
3177are set in the hash-ref.    If you still need to know if all fields in each
3178row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
3179you can check the flags.
3180
3181If  L<C<keep_meta_info>|/keep_meta_info>  is C<false>,  C<is_missing>  will
3182always return C<undef>, regardless of C<$column_idx> being valid or not. If
3183this attribute is C<true> it will return either C<0> (the field is present)
3184or C<1> (the field is missing).
3185
3186A special case is the empty line.  If the line is completely empty -  after
3187dealing with the flags - this is still a valid CSV line:  it is a record of
3188just one single empty field. However, if C<keep_meta_info> is set, invoking
3189C<is_missing> with index C<0> will now return true.
3190
3191=head2 status
3192X<status>
3193
3194 $status = $csv->status ();
3195
3196This method returns the status of the last invoked L</combine> or L</parse>
3197call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
3198
3199Note that as this only keeps track of the status of above mentioned methods,
3200you are probably looking for L<C<error_diag>|/error_diag> instead.
3201
3202=head2 error_input
3203X<error_input>
3204
3205 $bad_argument = $csv->error_input ();
3206
3207This method returns the erroneous argument (if it exists) of L</combine> or
3208L</parse>,  whichever was called more recently.  If the last invocation was
3209successful, C<error_input> will return C<undef>.
3210
3211Depending on the type of error, it I<might> also hold the data for the last
3212error-input of L</getline>.
3213
3214=head2 error_diag
3215X<error_diag>
3216
3217 Text::CSV_XS->error_diag ();
3218 $csv->error_diag ();
3219 $error_code               = 0  + $csv->error_diag ();
3220 $error_str                = "" . $csv->error_diag ();
3221 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag ();
3222
3223If (and only if) an error occurred,  this function returns  the diagnostics
3224of that error.
3225
3226If called in void context,  this will print the internal error code and the
3227associated error message to STDERR.
3228
3229If called in list context,  this will return  the error code  and the error
3230message in that order.  If the last error was from parsing, the rest of the
3231values returned are a best guess at the location  within the line  that was
3232being parsed. Their values are 1-based.  The position currently is index of
3233the byte at which the parsing failed in the current record. It might change
3234to be the index of the current character in a later release. The records is
3235the index of the record parsed by the csv instance. The field number is the
3236index of the field the parser thinks it is currently  trying to  parse. See
3237F<examples/csv-check> for how this can be used.
3238
3239If called in  scalar context,  it will return  the diagnostics  in a single
3240scalar, a-la C<$!>.  It will contain the error code in numeric context, and
3241the diagnostics message in string context.
3242
3243When called as a class method or a  direct function call,  the  diagnostics
3244are that of the last L</new> call.
3245
3246=head2 record_number
3247X<record_number>
3248
3249 $recno = $csv->record_number ();
3250
3251Returns the records parsed by this csv instance.  This value should be more
3252accurate than C<$.> when embedded newlines come in play. Records written by
3253this instance are not counted.
3254
3255=head2 SetDiag
3256X<SetDiag>
3257
3258 $csv->SetDiag (0);
3259
3260Use to reset the diagnostics if you are dealing with errors.
3261
3262=head1 FUNCTIONS
3263
3264=head2 csv
3265X<csv>
3266
3267This function is not exported by default and should be explicitly requested:
3268
3269 use Text::CSV_XS qw( csv );
3270
3271This is a high-level function that aims at simple (user) interfaces.  This
3272can be used to read/parse a C<CSV> file or stream (the default behavior) or
3273to produce a file or write to a stream (define the  C<out>  attribute).  It
3274returns an array- or hash-reference on parsing (or C<undef> on fail) or the
3275numeric value of  L</error_diag>  on writing.  When this function fails you
3276can get to the error using the class call to L</error_diag>
3277
3278 my $aoa = csv (in => "test.csv") or
3279     die Text::CSV_XS->error_diag;
3280
3281This function takes the arguments as key-value pairs. This can be passed as
3282a list or as an anonymous hash:
3283
3284 my $aoa = csv (  in => "test.csv", sep_char => ";");
3285 my $aoh = csv ({ in => $fh, headers => "auto" });
3286
3287The arguments passed consist of two parts:  the arguments to L</csv> itself
3288and the optional attributes to the  C<CSV>  object used inside the function
3289as enumerated and explained in L</new>.
3290
3291If not overridden, the default option used for CSV is
3292
3293 auto_diag   => 1
3294 escape_null => 0
3295
3296The option that is always set and cannot be altered is
3297
3298 binary      => 1
3299
3300As this function will likely be used in one-liners,  it allows  C<quote> to
3301be abbreviated as C<quo>,  and  C<escape_char> to be abbreviated as  C<esc>
3302or C<escape>.
3303
3304Alternative invocations:
3305
3306 my $aoa = Text::CSV_XS::csv (in => "file.csv");
3307
3308 my $csv = Text::CSV_XS->new ();
3309 my $aoa = $csv->csv (in => "file.csv");
3310
3311In the latter case, the object attributes are used from the existing object
3312and the attribute arguments in the function call are ignored:
3313
3314 my $csv = Text::CSV_XS->new ({ sep_char => ";" });
3315 my $aoh = $csv->csv (in => "file.csv", bom => 1);
3316
3317will parse using C<;> as C<sep_char>, not C<,>.
3318
3319=head3 in
3320X<in>
3321
3322Used to specify the source.  C<in> can be a file name (e.g. C<"file.csv">),
3323which will be  opened for reading  and closed when finished,  a file handle
3324(e.g.  C<$fh> or C<FH>),  a reference to a glob (e.g. C<\*ARGV>),  the glob
3325itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
3326
3327When used with L</out>, C<in> should be a reference to a CSV structure (AoA
3328or AoH)  or a CODE-ref that returns an array-reference or a hash-reference.
3329The code-ref will be invoked with no arguments.
3330
3331 my $aoa = csv (in => "file.csv");
3332
3333 open my $fh, "<", "file.csv";
3334 my $aoa = csv (in => $fh);
3335
3336 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
3337 my $err = csv (in => $csv, out => "file.csv");
3338
3339If called in void context without the L</out> attribute, the resulting ref
3340will be used as input to a subsequent call to csv:
3341
3342 csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
3343
3344will be a shortcut to
3345
3346 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
3347
3348where, in the absence of the C<out> attribute, this is a shortcut to
3349
3350 csv (in  => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
3351      out => *STDOUT)
3352
3353=head3 out
3354X<out>
3355
3356 csv (in => $aoa, out => "file.csv");
3357 csv (in => $aoa, out => $fh);
3358 csv (in => $aoa, out =>   STDOUT);
3359 csv (in => $aoa, out =>  *STDOUT);
3360 csv (in => $aoa, out => \*STDOUT);
3361 csv (in => $aoa, out => \my $data);
3362 csv (in => $aoa, out =>  undef);
3363 csv (in => $aoa, out => \"skip");
3364
3365 csv (in => $fh,  out => \@aoa);
3366 csv (in => $fh,  out => \@aoh, bom => 1);
3367 csv (in => $fh,  out => \%hsh, key => "key");
3368
3369In output mode, the default CSV options when producing CSV are
3370
3371 eol       => "\r\n"
3372
3373The L</fragment> attribute is ignored in output mode.
3374
3375C<out> can be a file name  (e.g.  C<"file.csv">),  which will be opened for
3376writing and closed when finished,  a file handle (e.g. C<$fh> or C<FH>),  a
3377reference to a glob (e.g. C<\*STDOUT>),  the glob itself (e.g. C<*STDOUT>),
3378or a reference to a scalar (e.g. C<\my $data>).
3379
3380 csv (in => sub { $sth->fetch },            out => "dump.csv");
3381 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
3382      headers => $sth->{NAME_lc});
3383
3384When a code-ref is used for C<in>, the output is generated  per invocation,
3385so no buffering is involved. This implies that there is no size restriction
3386on the number of records. The C<csv> function ends when the coderef returns
3387a false value.
3388
3389If C<out> is set to a reference of the literal string C<"skip">, the output
3390will be suppressed completely,  which might be useful in combination with a
3391filter for side effects only.
3392
3393 my %cache;
3394 csv (in    => "dump.csv",
3395      out   => \"skip",
3396      on_in => sub { $cache{$_[1][1]}++ });
3397
3398Currently,  setting C<out> to any false value  (C<undef>, C<"">, 0) will be
3399equivalent to C<\"skip">.
3400
3401If the C<in> argument point to something to parse, and the C<out> is set to
3402a reference to an C<ARRAY> or a C<HASH>, the output is appended to the data
3403in the existing reference. The result of the parse should match what exists
3404in the reference passed. This might come handy when you have to parse a set
3405of files with similar content (like data stored per period) and you want to
3406collect that into a single data structure:
3407
3408 my %hash;
3409 csv (in => $_, out => \%hash, key => "id") for sort glob "foo-[0-9]*.csv";
3410
3411 my @list; # List of arrays
3412 csv (in => $_, out => \@list)              for sort glob "foo-[0-9]*.csv";
3413
3414 my @list; # List of hashes
3415 csv (in => $_, out => \@list, bom => 1)    for sort glob "foo-[0-9]*.csv";
3416
3417=head3 encoding
3418X<encoding>
3419
3420If passed,  it should be an encoding accepted by the  C<:encoding()> option
3421to C<open>. There is no default value. This attribute does not work in perl
34225.6.x.  C<encoding> can be abbreviated to C<enc> for ease of use in command
3423line invocations.
3424
3425If C<encoding> is set to the literal value C<"auto">, the method L</header>
3426will be invoked on the opened stream to check if there is a BOM and set the
3427encoding accordingly.   This is equal to passing a true value in the option
3428L<C<detect_bom>|/detect_bom>.
3429
3430Encodings can be stacked, as supported by C<binmode>:
3431
3432 # Using PerlIO::via::gzip
3433 csv (in       => \@csv,
3434      out      => "test.csv:via.gz",
3435      encoding => ":via(gzip):encoding(utf-8)",
3436      );
3437 $aoa = csv (in => "test.csv:via.gz",  encoding => ":via(gzip)");
3438
3439 # Using PerlIO::gzip
3440 csv (in       => \@csv,
3441      out      => "test.csv:via.gz",
3442      encoding => ":gzip:encoding(utf-8)",
3443      );
3444 $aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip");
3445
3446=head3 detect_bom
3447X<detect_bom>
3448
3449If  C<detect_bom>  is given, the method  L</header>  will be invoked on the
3450opened stream to check if there is a BOM and set the encoding accordingly.
3451
3452C<detect_bom> can be abbreviated to C<bom>.
3453
3454This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
3455
3456Note that as the method  L</header> is invoked,  its default is to also set
3457the headers.
3458
3459=head3 headers
3460X<headers>
3461
3462If this attribute is not given, the default behavior is to produce an array
3463of arrays.
3464
3465If C<headers> is supplied,  it should be an anonymous list of column names,
3466an anonymous hashref, a coderef, or a literal flag:  C<auto>, C<lc>, C<uc>,
3467or C<skip>.
3468
3469=over 2
3470
3471=item skip
3472X<skip>
3473
3474When C<skip> is used, the header will not be included in the output.
3475
3476 my $aoa = csv (in => $fh, headers => "skip");
3477
3478=item auto
3479X<auto>
3480
3481If C<auto> is used, the first line of the C<CSV> source will be read as the
3482list of field headers and used to produce an array of hashes.
3483
3484 my $aoh = csv (in => $fh, headers => "auto");
3485
3486=item lc
3487X<lc>
3488
3489If C<lc> is used,  the first line of the  C<CSV> source will be read as the
3490list of field headers mapped to  lower case and used to produce an array of
3491hashes. This is a variation of C<auto>.
3492
3493 my $aoh = csv (in => $fh, headers => "lc");
3494
3495=item uc
3496X<uc>
3497
3498If C<uc> is used,  the first line of the  C<CSV> source will be read as the
3499list of field headers mapped to  upper case and used to produce an array of
3500hashes. This is a variation of C<auto>.
3501
3502 my $aoh = csv (in => $fh, headers => "uc");
3503
3504=item CODE
3505X<CODE>
3506
3507If a coderef is used,  the first line of the  C<CSV> source will be read as
3508the list of mangled field headers in which each field is passed as the only
3509argument to the coderef. This list is used to produce an array of hashes.
3510
3511 my $aoh = csv (in      => $fh,
3512                headers => sub { lc ($_[0]) =~ s/kode/code/gr });
3513
3514this example is a variation of using C<lc> where all occurrences of C<kode>
3515are replaced with C<code>.
3516
3517=item ARRAY
3518X<ARRAY>
3519
3520If  C<headers>  is an anonymous list,  the entries in the list will be used
3521as field names. The first line is considered data instead of headers.
3522
3523 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
3524 csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
3525
3526=item HASH
3527X<HASH>
3528
3529If C<headers> is a hash reference, this implies C<auto>, but header fields
3530that exist as key in the hashref will be replaced by the value for that
3531key. Given a CSV file like
3532
3533 post-kode,city,name,id number,fubble
3534 1234AA,Duckstad,Donald,13,"X313DF"
3535
3536using
3537
3538 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
3539
3540will return an entry like
3541
3542 { pc     => "1234AA",
3543   city   => "Duckstad",
3544   name   => "Donald",
3545   ID     => "13",
3546   fubble => "X313DF",
3547   }
3548
3549=back
3550
3551See also L<C<munge_column_names>|/munge_column_names> and
3552L<C<set_column_names>|/set_column_names>.
3553
3554=head3 munge_column_names
3555X<munge_column_names>
3556
3557If C<munge_column_names> is set,  the method  L</header>  is invoked on the
3558opened stream with all matching arguments to detect and set the headers.
3559
3560C<munge_column_names> can be abbreviated to C<munge>.
3561
3562=head3 key
3563X<key>
3564
3565If passed,  will default  L<C<headers>|/headers>  to C<"auto"> and return a
3566hashref instead of an array of hashes. Allowed values are simple scalars or
3567array-references where the first element is the joiner and the rest are the
3568fields to join to combine the key.
3569
3570 my $ref = csv (in => "test.csv", key => "code");
3571 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
3572
3573with test.csv like
3574
3575 code,product,price,color
3576 1,pc,850,gray
3577 2,keyboard,12,white
3578 3,mouse,5,black
3579
3580the first example will return
3581
3582  { 1   => {
3583        code    => 1,
3584        color   => 'gray',
3585        price   => 850,
3586        product => 'pc'
3587        },
3588    2   => {
3589        code    => 2,
3590        color   => 'white',
3591        price   => 12,
3592        product => 'keyboard'
3593        },
3594    3   => {
3595        code    => 3,
3596        color   => 'black',
3597        price   => 5,
3598        product => 'mouse'
3599        }
3600    }
3601
3602the second example will return
3603
3604  { "1:gray"    => {
3605        code    => 1,
3606        color   => 'gray',
3607        price   => 850,
3608        product => 'pc'
3609        },
3610    "2:white"   => {
3611        code    => 2,
3612        color   => 'white',
3613        price   => 12,
3614        product => 'keyboard'
3615        },
3616    "3:black"   => {
3617        code    => 3,
3618        color   => 'black',
3619        price   => 5,
3620        product => 'mouse'
3621        }
3622    }
3623
3624The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV>
3625date that has no header line, like
3626
3627 my $ref = csv (
3628     in      => "foo.csv",
3629     headers => [qw( c_foo foo bar description stock )],
3630     key     =>     "c_foo",
3631     );
3632
3633=head3 value
3634X<value>
3635
3636Used to create key-value hashes.
3637
3638Only allowed when C<key> is valid. A C<value> can be either a single column
3639label or an anonymous list of column labels.  In the first case,  the value
3640will be a simple scalar value, in the latter case, it will be a hashref.
3641
3642 my $ref = csv (in => "test.csv", key   => "code",
3643                                  value => "price");
3644 my $ref = csv (in => "test.csv", key   => "code",
3645                                  value => [ "product", "price" ]);
3646 my $ref = csv (in => "test.csv", key   => [ ":" => "code", "color" ],
3647                                  value => "price");
3648 my $ref = csv (in => "test.csv", key   => [ ":" => "code", "color" ],
3649                                  value => [ "product", "price" ]);
3650
3651with test.csv like
3652
3653 code,product,price,color
3654 1,pc,850,gray
3655 2,keyboard,12,white
3656 3,mouse,5,black
3657
3658the first example will return
3659
3660  { 1 => 850,
3661    2 =>  12,
3662    3 =>   5,
3663    }
3664
3665the second example will return
3666
3667  { 1   => {
3668        price   => 850,
3669        product => 'pc'
3670        },
3671    2   => {
3672        price   => 12,
3673        product => 'keyboard'
3674        },
3675    3   => {
3676        price   => 5,
3677        product => 'mouse'
3678        }
3679    }
3680
3681the third example will return
3682
3683  { "1:gray"    => 850,
3684    "2:white"   =>  12,
3685    "3:black"   =>   5,
3686    }
3687
3688the fourth example will return
3689
3690  { "1:gray"    => {
3691        price   => 850,
3692        product => 'pc'
3693        },
3694    "2:white"   => {
3695        price   => 12,
3696        product => 'keyboard'
3697        },
3698    "3:black"   => {
3699        price   => 5,
3700        product => 'mouse'
3701        }
3702    }
3703
3704=head3 keep_headers
3705X<keep_headers>
3706X<keep_column_names>
3707X<kh>
3708
3709When using hashes,  keep the column names into the arrayref passed,  so all
3710headers are available after the call in the original order.
3711
3712 my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
3713
3714This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
3715
3716This attribute implies a default of C<auto> for the C<headers> attribute.
3717
3718=head3 fragment
3719X<fragment>
3720
3721Only output the fragment as defined in the L</fragment> method. This option
3722is ignored when I<generating> C<CSV>. See L</out>.
3723
3724Combining all of them could give something like
3725
3726 use Text::CSV_XS qw( csv );
3727 my $aoh = csv (
3728     in       => "test.txt",
3729     encoding => "utf-8",
3730     headers  => "auto",
3731     sep_char => "|",
3732     fragment => "row=3;6-9;15-*",
3733     );
3734 say $aoh->[15]{Foo};
3735
3736=head3 sep_set
3737X<sep_set>
3738X<seps>
3739
3740If C<sep_set> is set, the method L</header> is invoked on the opened stream
3741to detect and set L<C<sep_char>|/sep_char> with the given set.
3742
3743C<sep_set> can be abbreviated to C<seps>.
3744
3745Note that as the  L</header> method is invoked,  its default is to also set
3746the headers.
3747
3748=head3 set_column_names
3749X<set_column_names>
3750
3751If  C<set_column_names> is passed,  the method L</header> is invoked on the
3752opened stream with all arguments meant for L</header>.
3753
3754If C<set_column_names> is passed as a false value, the content of the first
3755row is only preserved if the output is AoA:
3756
3757With an input-file like
3758
3759 bAr,foo
3760 1,2
3761 3,4,5
3762
3763This call
3764
3765 my $aoa = csv (in => $file, set_column_names => 0);
3766
3767will result in
3768
3769 [[ "bar", "foo"     ],
3770  [ "1",   "2"       ],
3771  [ "3",   "4",  "5" ]]
3772
3773and
3774
3775 my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
3776
3777will result in
3778
3779 [[ "bAr", "foo"     ],
3780  [ "1",   "2"       ],
3781  [ "3",   "4",  "5" ]]
3782
3783=head2 Callbacks
3784X<Callbacks>
3785
3786Callbacks enable actions triggered from the I<inside> of Text::CSV_XS.
3787
3788While most of what this enables  can easily be done in an  unrolled loop as
3789described in the L</SYNOPSIS> callbacks can be used to meet special demands
3790or enhance the L</csv> function.
3791
3792=over 2
3793
3794=item error
3795X<error>
3796
3797 $csv->callbacks (error => sub { $csv->SetDiag (0) });
3798
3799the C<error>  callback is invoked when an error occurs,  but  I<only>  when
3800L</auto_diag> is set to a true value. A callback is invoked with the values
3801returned by L</error_diag>:
3802
3803 my ($c, $s);
3804
3805 sub ignore3006 {
3806     my ($err, $msg, $pos, $recno, $fldno) = @_;
3807     if ($err == 3006) {
3808         # ignore this error
3809         ($c, $s) = (undef, undef);
3810         Text::CSV_XS->SetDiag (0);
3811         }
3812     # Any other error
3813     return;
3814     } # ignore3006
3815
3816 $csv->callbacks (error => \&ignore3006);
3817 $csv->bind_columns (\$c, \$s);
3818 while ($csv->getline ($fh)) {
3819     # Error 3006 will not stop the loop
3820     }
3821
3822=item after_parse
3823X<after_parse>
3824
3825 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
3826 while (my $row = $csv->getline ($fh)) {
3827     $row->[-1] eq "NEW";
3828     }
3829
3830This callback is invoked after parsing with  L</getline>  only if no  error
3831occurred.  The callback is invoked with two arguments:   the current C<CSV>
3832parser object and an array reference to the fields parsed.
3833
3834The return code of the callback is ignored  unless it is a reference to the
3835string "skip", in which case the record will be skipped in L</getline_all>.
3836
3837 sub add_from_db {
3838     my ($csv, $row) = @_;
3839     $sth->execute ($row->[4]);
3840     push @$row, $sth->fetchrow_array;
3841     } # add_from_db
3842
3843 my $aoa = csv (in => "file.csv", callbacks => {
3844     after_parse => \&add_from_db });
3845
3846This hook can be used for validation:
3847X<data_validation>
3848
3849=over 2
3850
3851=item FAIL
3852
3853Die if any of the records does not validate a rule:
3854
3855 after_parse => sub {
3856     $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
3857         die "5th field does not have a valid Dutch zipcode";
3858     }
3859
3860=item DEFAULT
3861
3862Replace invalid fields with a default value:
3863
3864 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
3865
3866=item SKIP
3867
3868Skip records that have invalid fields (only applies to L</getline_all>):
3869
3870 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
3871
3872=back
3873
3874=item before_print
3875X<before_print>
3876
3877 my $idx = 1;
3878 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
3879 $csv->print (*STDOUT, [ 0, $_ ]) for @members;
3880
3881This callback is invoked  before printing with  L</print>  only if no error
3882occurred.  The callback is invoked with two arguments:  the current  C<CSV>
3883parser object and an array reference to the fields passed.
3884
3885The return code of the callback is ignored.
3886
3887 sub max_4_fields {
3888     my ($csv, $row) = @_;
3889     @$row > 4 and splice @$row, 4;
3890     } # max_4_fields
3891
3892 csv (in => csv (in => "file.csv"), out => *STDOUT,
3893     callbacks => { before_print => \&max_4_fields });
3894
3895This callback is not active for L</combine>.
3896
3897=back
3898
3899=head3 Callbacks for csv ()
3900
3901The L</csv> allows for some callbacks that do not integrate in XS internals
3902but only feature the L</csv> function.
3903
3904  csv (in        => "file.csv",
3905       callbacks => {
3906           filter       => { 6 => sub { $_ > 15 } },    # first
3907           after_parse  => sub { say "AFTER PARSE";  }, # first
3908           after_in     => sub { say "AFTER IN";     }, # second
3909           on_in        => sub { say "ON IN";        }, # third
3910           },
3911       );
3912
3913  csv (in        => $aoh,
3914       out       => "file.csv",
3915       callbacks => {
3916           on_in        => sub { say "ON IN";        }, # first
3917           before_out   => sub { say "BEFORE OUT";   }, # second
3918           before_print => sub { say "BEFORE PRINT"; }, # third
3919           },
3920       );
3921
3922=over 2
3923
3924=item filter
3925X<filter>
3926
3927This callback can be used to filter records.  It is called just after a new
3928record has been scanned.  The callback accepts a:
3929
3930=over 2
3931
3932=item hashref
3933
3934The keys are the index to the row (the field name or field number, 1-based)
3935and the values are subs to return a true or false value.
3936
3937 csv (in => "file.csv", filter => {
3938            3 => sub { m/a/ },       # third field should contain an "a"
3939            5 => sub { length > 4 }, # length of the 5th field minimal 5
3940            });
3941
3942 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
3943
3944If the keys to the filter hash contain any character that is not a digit it
3945will also implicitly set L</headers> to C<"auto">  unless  L</headers>  was
3946already passed as argument.  When headers are active, returning an array of
3947hashes, the filter is not applicable to the header itself.
3948
3949All sub results should match, as in AND.
3950
3951The context of the callback sets  C<$_> localized to the field indicated by
3952the filter. The two arguments are as with all other callbacks, so the other
3953fields in the current row can be seen:
3954
3955 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
3956
3957If the context is set to return a list of hashes  (L</headers> is defined),
3958the current record will also be available in the localized C<%_>:
3959
3960 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000  }}
3961
3962If the filter is used to I<alter> the content by changing C<$_>,  make sure
3963that the sub returns true in order not to have that record skipped:
3964
3965 filter => { 2 => sub { $_ = uc }}
3966
3967will upper-case the second field, and then skip it if the resulting content
3968evaluates to false. To always accept, end with truth:
3969
3970 filter => { 2 => sub { $_ = uc; 1 }}
3971
3972=item coderef
3973
3974 csv (in => "file.csv", filter => sub { $n++; 0; });
3975
3976If the argument to C<filter> is a coderef,  it is an alias or shortcut to a
3977filter on column 0:
3978
3979 csv (filter => sub { $n++; 0 });
3980
3981is equal to
3982
3983 csv (filter => { 0 => sub { $n++; 0 });
3984
3985=item filter-name
3986
3987 csv (in => "file.csv", filter => "not_blank");
3988 csv (in => "file.csv", filter => "not_empty");
3989 csv (in => "file.csv", filter => "filled");
3990
3991These are predefined filters
3992
3993Given a file like (line numbers prefixed for doc purpose only):
3994
3995 1:1,2,3
3996 2:
3997 3:,
3998 4:""
3999 5:,,
4000 6:, ,
4001 7:"",
4002 8:" "
4003 9:4,5,6
4004
4005=over 2
4006
4007=item not_blank
4008
4009Filter out the blank lines
4010
4011This filter is a shortcut for
4012
4013 filter => { 0 => sub { @{$_[1]} > 1 or
4014             defined $_[1][0] && $_[1][0] ne "" } }
4015
4016Due to the implementation,  it is currently impossible to also filter lines
4017that consists only of a quoted empty field. These lines are also considered
4018blank lines.
4019
4020With the given example, lines 2 and 4 will be skipped.
4021
4022=item not_empty
4023
4024Filter out lines where all the fields are empty.
4025
4026This filter is a shortcut for
4027
4028 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
4029
4030A space is not regarded being empty, so given the example data, lines 2, 3,
40314, 5, and 7 are skipped.
4032
4033=item filled
4034
4035Filter out lines that have no visible data
4036
4037This filter is a shortcut for
4038
4039 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
4040
4041This filter rejects all lines that I<not> have at least one field that does
4042not evaluate to the empty string.
4043
4044With the given example data, this filter would skip lines 2 through 8.
4045
4046=back
4047
4048=back
4049
4050One could also use modules like L<Types::Standard>:
4051
4052 use Types::Standard -types;
4053
4054 my $type   = Tuple[Str, Str, Int, Bool, Optional[Num]];
4055 my $check  = $type->compiled_check;
4056
4057 # filter with compiled check and warnings
4058 my $aoa = csv (
4059    in     => \$data,
4060    filter => {
4061        0 => sub {
4062            my $ok = $check->($_[1]) or
4063                warn $type->get_message ($_[1]), "\n";
4064            return $ok;
4065            },
4066        },
4067    );
4068
4069=item after_in
4070X<after_in>
4071
4072This callback is invoked for each record after all records have been parsed
4073but before returning the reference to the caller.  The hook is invoked with
4074two arguments:  the current  C<CSV>  parser object  and a  reference to the
4075record.   The reference can be a reference to a  HASH  or a reference to an
4076ARRAY as determined by the arguments.
4077
4078This callback can also be passed as  an attribute without the  C<callbacks>
4079wrapper.
4080
4081=item before_out
4082X<before_out>
4083
4084This callback is invoked for each record before the record is printed.  The
4085hook is invoked with two arguments:  the current C<CSV> parser object and a
4086reference to the record.   The reference can be a reference to a  HASH or a
4087reference to an ARRAY as determined by the arguments.
4088
4089This callback can also be passed as an attribute  without the  C<callbacks>
4090wrapper.
4091
4092This callback makes the row available in C<%_> if the row is a hashref.  In
4093this case C<%_> is writable and will change the original row.
4094
4095=item on_in
4096X<on_in>
4097
4098This callback acts exactly as the L</after_in> or the L</before_out> hooks.
4099
4100This callback can also be passed as an attribute  without the  C<callbacks>
4101wrapper.
4102
4103This callback makes the row available in C<%_> if the row is a hashref.  In
4104this case C<%_> is writable and will change the original row. So e.g. with
4105
4106  my $aoh = csv (
4107      in      => \"foo\n1\n2\n",
4108      headers => "auto",
4109      on_in   => sub { $_{bar} = 2; },
4110      );
4111
4112C<$aoh> will be:
4113
4114  [ { foo => 1,
4115      bar => 2,
4116      }
4117    { foo => 2,
4118      bar => 2,
4119      }
4120    ]
4121
4122=item csv
4123
4124The I<function>  L</csv> can also be called as a method or with an existing
4125Text::CSV_XS object. This could help if the function is to be invoked a lot
4126of times and the overhead of creating the object internally over  and  over
4127again would be prevented by passing an existing instance.
4128
4129 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4130
4131 my $aoa = $csv->csv (in => $fh);
4132 my $aoa = csv (in => $fh, csv => $csv);
4133
4134both act the same. Running this 20000 times on a 20 lines CSV file,  showed
4135a 53% speedup.
4136
4137=back
4138
4139=head1 INTERNALS
4140
4141=over 4
4142
4143=item Combine (...)
4144
4145=item Parse (...)
4146
4147=back
4148
4149The arguments to these internal functions are deliberately not described or
4150documented in order to enable the  module authors make changes it when they
4151feel the need for it.  Using them is  highly  discouraged  as  the  API may
4152change in future releases.
4153
4154=head1 EXAMPLES
4155
4156=head2 Reading a CSV file line by line:
4157
4158 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4159 open my $fh, "<", "file.csv" or die "file.csv: $!";
4160 while (my $row = $csv->getline ($fh)) {
4161     # do something with @$row
4162     }
4163 close $fh or die "file.csv: $!";
4164
4165or
4166
4167 my $aoa = csv (in => "file.csv", on_in => sub {
4168     # do something with %_
4169     });
4170
4171=head3 Reading only a single column
4172
4173 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4174 open my $fh, "<", "file.csv" or die "file.csv: $!";
4175 # get only the 4th column
4176 my @column = map { $_->[3] } @{$csv->getline_all ($fh)};
4177 close $fh or die "file.csv: $!";
4178
4179with L</csv>, you could do
4180
4181 my @column = map { $_->[0] }
4182     @{csv (in => "file.csv", fragment => "col=4")};
4183
4184=head2 Parsing CSV strings:
4185
4186 my $csv = Text::CSV_XS->new ({ keep_meta_info => 1, binary => 1 });
4187
4188 my $sample_input_string =
4189     qq{"I said, ""Hi!""",Yes,"",2.34,,"1.09","\x{20ac}",};
4190 if ($csv->parse ($sample_input_string)) {
4191     my @field = $csv->fields;
4192     foreach my $col (0 .. $#field) {
4193         my $quo = $csv->is_quoted ($col) ? $csv->{quote_char} : "";
4194         printf "%2d: %s%s%s\n", $col, $quo, $field[$col], $quo;
4195         }
4196     }
4197 else {
4198     print STDERR "parse () failed on argument: ",
4199         $csv->error_input, "\n";
4200     $csv->error_diag ();
4201     }
4202
4203=head3 Parsing CSV from memory
4204
4205Given a complete CSV data-set in scalar C<$data>,  generate a list of lists
4206to represent the rows and fields
4207
4208 # The data
4209 my $data = join "\r\n" => map { join "," => 0 .. 5 } 0 .. 5;
4210
4211 # in a loop
4212 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4213 open my $fh, "<", \$data;
4214 my @foo;
4215 while (my $row = $csv->getline ($fh)) {
4216     push @foo, $row;
4217     }
4218 close $fh;
4219
4220 # a single call
4221 my $foo = csv (in => \$data);
4222
4223=head2 Printing CSV data
4224
4225=head3 The fast way: using L</print>
4226
4227An example for creating C<CSV> files using the L</print> method:
4228
4229 my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
4230 open my $fh, ">", "foo.csv" or die "foo.csv: $!";
4231 for (1 .. 10) {
4232     $csv->print ($fh, [ $_, "$_" ]) or $csv->error_diag;
4233     }
4234 close $fh or die "$tbl.csv: $!";
4235
4236=head3 The slow way: using L</combine> and L</string>
4237
4238or using the slower L</combine> and L</string> methods:
4239
4240 my $csv = Text::CSV_XS->new;
4241
4242 open my $csv_fh, ">", "hello.csv" or die "hello.csv: $!";
4243
4244 my @sample_input_fields = (
4245     'You said, "Hello!"',   5.67,
4246     '"Surely"',   '',   '3.14159');
4247 if ($csv->combine (@sample_input_fields)) {
4248     print $csv_fh $csv->string, "\n";
4249     }
4250 else {
4251     print "combine () failed on argument: ",
4252         $csv->error_input, "\n";
4253     }
4254 close $csv_fh or die "hello.csv: $!";
4255
4256=head3 Generating CSV into memory
4257
4258Format a data-set (C<@foo>) into a scalar value in memory (C<$data>):
4259
4260 # The data
4261 my @foo = map { [ 0 .. 5 ] } 0 .. 3;
4262
4263 # in a loop
4264 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n" });
4265 open my $fh, ">", \my $data;
4266 $csv->print ($fh, $_) for @foo;
4267 close $fh;
4268
4269 # a single call
4270 csv (in => \@foo, out => \my $data);
4271
4272=head2 Rewriting CSV
4273
4274Rewrite C<CSV> files with C<;> as separator character to well-formed C<CSV>:
4275
4276 use Text::CSV_XS qw( csv );
4277 csv (in => csv (in => "bad.csv", sep_char => ";"), out => *STDOUT);
4278
4279As C<STDOUT> is now default in L</csv>, a one-liner converting a UTF-16 CSV
4280file with BOM and TAB-separation to valid UTF-8 CSV could be:
4281
4282 $ perl -C3 -MText::CSV_XS=csv -we\
4283    'csv(in=>"utf16tab.csv",encoding=>"utf16",sep=>"\t")' >utf8.csv
4284
4285=head2 Dumping database tables to CSV
4286
4287Dumping a database table can be simple as this (TIMTOWTDI):
4288
4289 my $dbh = DBI->connect (...);
4290 my $sql = "select * from foo";
4291
4292 # using your own loop
4293 open my $fh, ">", "foo.csv" or die "foo.csv: $!\n";
4294 my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\r\n" });
4295 my $sth = $dbh->prepare ($sql); $sth->execute;
4296 $csv->print ($fh, $sth->{NAME_lc});
4297 while (my $row = $sth->fetch) {
4298     $csv->print ($fh, $row);
4299     }
4300
4301 # using the csv function, all in memory
4302 csv (out => "foo.csv", in => $dbh->selectall_arrayref ($sql));
4303
4304 # using the csv function, streaming with callbacks
4305 my $sth = $dbh->prepare ($sql); $sth->execute;
4306 csv (out => "foo.csv", in => sub { $sth->fetch            });
4307 csv (out => "foo.csv", in => sub { $sth->fetchrow_hashref });
4308
4309Note that this does not discriminate between "empty" values and NULL-values
4310from the database,  as both will be the same empty field in CSV.  To enable
4311distinction between the two, use L<C<quote_empty>|/quote_empty>.
4312
4313 csv (out => "foo.csv", in => sub { $sth->fetch }, quote_empty => 1);
4314
4315If the database import utility supports special sequences to insert C<NULL>
4316values into the database,  like MySQL/MariaDB supports C<\N>,  use a filter
4317or a map
4318
4319 csv (out => "foo.csv", in => sub { $sth->fetch },
4320                     on_in => sub { $_ //= "\\N" for @{$_[1]} });
4321
4322 while (my $row = $sth->fetch) {
4323     $csv->print ($fh, [ map { $_ // "\\N" } @$row ]);
4324     }
4325
4326Note that this will not work as expected when choosing the backslash (C<\>)
4327as C<escape_char>, as that will cause the C<\> to need to be escaped by yet
4328another C<\>,  which will cause the field to need quotation and thus ending
4329up as C<"\\N"> instead of C<\N>. See also L<C<undef_str>|/undef_str>.
4330
4331 csv (out => "foo.csv", in => sub { $sth->fetch }, undef_str => "\\N");
4332
4333These special sequences are not recognized by  Text::CSV_XS  on parsing the
4334CSV generated like this, but map and filter are your friends again
4335
4336 while (my $row = $csv->getline ($fh)) {
4337     $sth->execute (map { $_ eq "\\N" ? undef : $_ } @$row);
4338     }
4339
4340 csv (in => "foo.csv", filter => { 1 => sub {
4341     $sth->execute (map { $_ eq "\\N" ? undef : $_ } @{$_[1]}); 0; }});
4342
4343=head2 Converting CSV to JSON
4344
4345 use Text::CSV_XS qw( csv );
4346 use JSON; # or Cpanel::JSON::XS for better performance
4347
4348 # AoA (no header interpretation)
4349 say encode_json (csv (in => "file.csv"));
4350
4351 # AoH (convert to structures)
4352 say encode_json (csv (in => "file.csv", bom => 1));
4353
4354Yes, it is that simple.
4355
4356=head2 The examples folder
4357
4358For more extended examples, see the F<examples/> C<1>. sub-directory in the
4359original distribution or the git repository C<2>.
4360
4361 1. https://github.com/Tux/Text-CSV_XS/tree/master/examples
4362 2. https://github.com/Tux/Text-CSV_XS
4363
4364The following files can be found there:
4365
4366=over 2
4367
4368=item parser-xs.pl
4369X<parser-xs.pl>
4370
4371This can be used as a boilerplate to parse invalid C<CSV>  and parse beyond
4372(expected) errors alternative to using the L</error> callback.
4373
4374 $ perl examples/parser-xs.pl bad.csv >good.csv
4375
4376=item csv-check
4377X<csv-check>
4378
4379This is a command-line tool that uses parser-xs.pl  techniques to check the
4380C<CSV> file and report on its content.
4381
4382 $ csv-check files/utf8.csv
4383 Checked files/utf8.csv  with csv-check 1.9
4384 using Text::CSV_XS 1.32 with perl 5.26.0 and Unicode 9.0.0
4385 OK: rows: 1, columns: 2
4386     sep = <,>, quo = <">, bin = <1>, eol = <"\n">
4387
4388=item csv-split
4389X<csv-split>
4390
4391This command splits C<CSV> files into smaller files,  keeping (part of) the
4392header.  Options include maximum number of (data) rows per file and maximum
4393number of columns per file or a combination of the two.
4394
4395=item csv2xls
4396X<csv2xls>
4397
4398A script to convert C<CSV> to Microsoft Excel (C<XLS>). This requires extra
4399modules L<Date::Calc> and L<Spreadsheet::WriteExcel>. The converter accepts
4400various options and can produce UTF-8 compliant Excel files.
4401
4402=item csv2xlsx
4403X<csv2xlsx>
4404
4405A script to convert C<CSV> to Microsoft Excel (C<XLSX>).  This requires the
4406modules L<Date::Calc> and L<Spreadsheet::Writer::XLSX>.  The converter does
4407accept various options including merging several C<CSV> files into a single
4408Excel file.
4409
4410=item csvdiff
4411X<csvdiff>
4412
4413A script that provides colorized diff on sorted CSV files,  assuming  first
4414line is header and first field is the key. Output options include colorized
4415ANSI escape codes or HTML.
4416
4417 $ csvdiff --html --output=diff.html file1.csv file2.csv
4418
4419=item rewrite.pl
4420X<rewrite.pl>
4421
4422A script to rewrite (in)valid CSV into valid CSV files.  Script has options
4423to generate confusing CSV files or CSV files that conform to Dutch MS-Excel
4424exports (using C<;> as separation).
4425
4426Script - by default - honors BOM  and auto-detects separation converting it
4427to default standard CSV with C<,> as separator.
4428
4429=back
4430
4431=head1 CAVEATS
4432
4433Text::CSV_XS  is I<not> designed to detect the characters used to quote and
4434separate fields.  The parsing is done using predefined  (default) settings.
4435In the examples  sub-directory,  you can find scripts  that demonstrate how
4436you could try to detect these characters yourself.
4437
4438=head2 Microsoft Excel
4439
4440The import/export from Microsoft Excel is a I<risky task>, according to the
4441documentation in C<Text::CSV::Separator>.  Microsoft uses the system's list
4442separator defined in the regional settings, which happens to be a semicolon
4443for Dutch, German and Spanish (and probably some others as well).   For the
4444English locale,  the default is a comma.   In Windows however,  the user is
4445free to choose a  predefined locale,  and then change  I<every>  individual
4446setting in it, so checking the locale is no solution.
4447
4448As of version 1.17, a lone first line with just
4449
4450  sep=;
4451
4452will be recognized and honored when parsing with L</getline>.
4453
4454=head1 TODO
4455
4456=over 2
4457
4458=item More Errors & Warnings
4459
4460New extensions ought to be  clear and concise  in reporting what  error has
4461occurred where and why, and maybe also offer a remedy to the problem.
4462
4463L</error_diag> is a (very) good start, but there is more work to be done in
4464this area.
4465
4466Basic calls  should croak or warn on  illegal parameters.  Errors should be
4467documented.
4468
4469=item setting meta info
4470
4471Future extensions might include extending the L</meta_info>, L</is_quoted>,
4472and  L</is_binary>  to accept setting these  flags for  fields,  so you can
4473specify which fields are quoted in the L</combine>/L</string> combination.
4474
4475 $csv->meta_info (0, 1, 1, 3, 0, 0);
4476 $csv->is_quoted (3, 1);
4477
4478L<Metadata Vocabulary for Tabular Data|http://w3c.github.io/csvw/metadata/>
4479(a W3C editor's draft) could be an example for supporting more metadata.
4480
4481=item Parse the whole file at once
4482
4483Implement new methods or functions  that enable parsing of a  complete file
4484at once, returning a list of hashes. Possible extension to this could be to
4485enable a column selection on the call:
4486
4487 my @AoH = $csv->parse_file ($filename, { cols => [ 1, 4..8, 12 ]});
4488
4489returning something like
4490
4491 [ { fields => [ 1, 2, "foo", 4.5, undef, "", 8 ],
4492     flags  => [ ... ],
4493     },
4494   { fields => [ ... ],
4495     .
4496     },
4497   ]
4498
4499Note that the L</csv> function already supports most of this,  but does not
4500return flags. L</getline_all> returns all rows for an open stream, but this
4501will not return flags either.  L</fragment>  can reduce the  required  rows
4502I<or> columns, but cannot combine them.
4503
4504=item Cookbook
4505
4506Write a document that has recipes for  most known  non-standard  (and maybe
4507some standard)  C<CSV> formats,  including formats that use  C<TAB>,  C<;>,
4508C<|>, or other non-comma separators.
4509
4510Examples could be taken from W3C's L<CSV on the Web: Use Cases and
4511Requirements|http://w3c.github.io/csvw/use-cases-and-requirements/index.html>
4512
4513=item Steal
4514
4515Steal good new ideas and features from L<PapaParse|http://papaparse.com> or
4516L<csvkit|http://csvkit.readthedocs.org>.
4517
4518=item Raku support
4519
4520Raku support can be found L<here|https://github.com/Tux/CSV>. The interface
4521is richer in support than the Perl5 API, as Raku supports more types.
4522
4523The Raku version does not (yet) support pure binary CSV datasets.
4524
4525=back
4526
4527=head2 NOT TODO
4528
4529=over 2
4530
4531=item combined methods
4532
4533Requests for adding means (methods) that combine L</combine> and L</string>
4534in a single call will B<not> be honored (use L</print> instead).   Likewise
4535for L</parse> and L</fields>  (use L</getline> instead), given the problems
4536with embedded newlines.
4537
4538=back
4539
4540=head2 Release plan
4541
4542No guarantees, but this is what I had in mind some time ago:
4543
4544=over 2
4545
4546=item *
4547
4548DIAGNOSTICS section in pod to *describe* the errors (see below)
4549
4550=back
4551
4552=head1 EBCDIC
4553
4554Everything should now work on native EBCDIC systems.   As the test does not
4555cover all possible codepoints and L<Encode> does not support C<utf-ebcdic>,
4556there is no guarantee that all handling of Unicode is done correct.
4557
4558Opening C<EBCDIC> encoded files on  C<ASCII>+  systems is likely to succeed
4559using Encode's C<cp37>, C<cp1047>, or C<posix-bc>:
4560
4561 open my $fh, "<:encoding(cp1047)", "ebcdic_file.csv" or die "...";
4562
4563=head1 DIAGNOSTICS
4564
4565Still under construction ...
4566
4567If an error occurs,  C<< $csv->error_diag >> can be used to get information
4568on the cause of the failure. Note that for speed reasons the internal value
4569is never cleared on success,  so using the value returned by L</error_diag>
4570in normal cases - when no error occurred - may cause unexpected results.
4571
4572If the constructor failed, the cause can be found using L</error_diag> as a
4573class method, like C<< Text::CSV_XS->error_diag >>.
4574
4575The C<< $csv->error_diag >> method is automatically invoked upon error when
4576the contractor was called with  L<C<auto_diag>|/auto_diag>  set to  C<1> or
4577C<2>, or when L<autodie> is in effect.  When set to C<1>, this will cause a
4578C<warn> with the error message,  when set to C<2>, it will C<die>. C<2012 -
4579EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
4580
4581Errors can be (individually) caught using the L</error> callback.
4582
4583The errors as described below are available. I have tried to make the error
4584itself explanatory enough, but more descriptions will be added. For most of
4585these errors, the first three capitals describe the error category:
4586
4587=over 2
4588
4589=item *
4590INI
4591
4592Initialization error or option conflict.
4593
4594=item *
4595ECR
4596
4597Carriage-Return related parse error.
4598
4599=item *
4600EOF
4601
4602End-Of-File related parse error.
4603
4604=item *
4605EIQ
4606
4607Parse error inside quotation.
4608
4609=item *
4610EIF
4611
4612Parse error inside field.
4613
4614=item *
4615ECB
4616
4617Combine error.
4618
4619=item *
4620EHR
4621
4622HashRef parse related error.
4623
4624=back
4625
4626And below should be the complete list of error codes that can be returned:
4627
4628=over 2
4629
4630=item *
46311001 "INI - sep_char is equal to quote_char or escape_char"
4632X<1001>
4633
4634The  L<separation character|/sep_char>  cannot be equal to  L<the quotation
4635character|/quote_char> or to L<the escape character|/escape_char>,  as this
4636would invalidate all parsing rules.
4637
4638=item *
46391002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
4640X<1002>
4641
4642Using the  L<C<allow_whitespace>|/allow_whitespace>  attribute  when either
4643L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char>  is equal to
4644C<SPACE> or C<TAB> is too ambiguous to allow.
4645
4646=item *
46471003 "INI - \r or \n in main attr not allowed"
4648X<1003>
4649
4650Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
4651L<C<quote_char>|/quote_char>,   or  L<C<escape_char>|/escape_char>  is  not
4652allowed.
4653
4654=item *
46551004 "INI - callbacks should be undef or a hashref"
4656X<1004>
4657
4658The L<C<callbacks>|/Callbacks>  attribute only allows one to be C<undef> or
4659a hash reference.
4660
4661=item *
46621005 "INI - EOL too long"
4663X<1005>
4664
4665The value passed for EOL is exceeding its maximum length (16).
4666
4667=item *
46681006 "INI - SEP too long"
4669X<1006>
4670
4671The value passed for SEP is exceeding its maximum length (16).
4672
4673=item *
46741007 "INI - QUOTE too long"
4675X<1007>
4676
4677The value passed for QUOTE is exceeding its maximum length (16).
4678
4679=item *
46801008 "INI - SEP undefined"
4681X<1008>
4682
4683The value passed for SEP should be defined and not empty.
4684
4685=item *
46861010 "INI - the header is empty"
4687X<1010>
4688
4689The header line parsed in the L</header> is empty.
4690
4691=item *
46921011 "INI - the header contains more than one valid separator"
4693X<1011>
4694
4695The header line parsed in the  L</header>  contains more than one  (unique)
4696separator character out of the allowed set of separators.
4697
4698=item *
46991012 "INI - the header contains an empty field"
4700X<1012>
4701
4702The header line parsed in the L</header> contains an empty field.
4703
4704=item *
47051013 "INI - the header contains nun-unique fields"
4706X<1013>
4707
4708The header line parsed in the  L</header>  contains at least  two identical
4709fields.
4710
4711=item *
47121014 "INI - header called on undefined stream"
4713X<1014>
4714
4715The header line cannot be parsed from an undefined source.
4716
4717=item *
47181500 "PRM - Invalid/unsupported argument(s)"
4719X<1500>
4720
4721Function or method called with invalid argument(s) or parameter(s).
4722
4723=item *
47241501 "PRM - The key attribute is passed as an unsupported type"
4725X<1501>
4726
4727The C<key> attribute is of an unsupported type.
4728
4729=item *
47301502 "PRM - The value attribute is passed without the key attribute"
4731X<1502>
4732
4733The C<value> attribute is only allowed when a valid key is given.
4734
4735=item *
47361503 "PRM - The value attribute is passed as an unsupported type"
4737X<1503>
4738
4739The C<value> attribute is of an unsupported type.
4740
4741=item *
47422010 "ECR - QUO char inside quotes followed by CR not part of EOL"
4743X<2010>
4744
4745When  L<C<eol>|/eol>  has  been  set  to  anything  but the  default,  like
4746C<"\r\t\n">,  and  the  C<"\r">  is  following  the   B<second>   (closing)
4747L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
4748not make up the L<C<eol>|/eol> sequence, this is an error.
4749
4750=item *
47512011 "ECR - Characters after end of quoted field"
4752X<2011>
4753
4754Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
4755field and after the closing double-quote, there should be either a new-line
4756sequence or a separation character.
4757
4758=item *
47592012 "EOF - End of data in parsing input stream"
4760X<2012>
4761
4762Self-explaining. End-of-file while inside parsing a stream. Can happen only
4763when reading from streams with L</getline>,  as using  L</parse> is done on
4764strings that are not required to have a trailing L<C<eol>|/eol>.
4765
4766=item *
47672013 "INI - Specification error for fragments RFC7111"
4768X<2013>
4769
4770Invalid specification for URI L</fragment> specification.
4771
4772=item *
47732014 "ENF - Inconsistent number of fields"
4774X<2014>
4775
4776Inconsistent number of fields under strict parsing.
4777
4778=item *
47792021 "EIQ - NL char inside quotes, binary off"
4780X<2021>
4781
4782Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
4783has been selected with the constructor.
4784
4785=item *
47862022 "EIQ - CR char inside quotes, binary off"
4787X<2022>
4788
4789Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
4790has been selected with the constructor.
4791
4792=item *
47932023 "EIQ - QUO character not allowed"
4794X<2023>
4795
4796Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
4797will cause this error.
4798
4799=item *
48002024 "EIQ - EOF cannot be escaped, not even inside quotes"
4801X<2024>
4802
4803The escape character is not allowed as last character in an input stream.
4804
4805=item *
48062025 "EIQ - Loose unescaped escape"
4807X<2025>
4808
4809An escape character should escape only characters that need escaping.
4810
4811Allowing  the escape  for other characters  is possible  with the attribute
4812L</allow_loose_escapes>.
4813
4814=item *
48152026 "EIQ - Binary character inside quoted field, binary off"
4816X<2026>
4817
4818Binary characters are not allowed by default.    Exceptions are fields that
4819contain valid UTF-8,  that will automatically be upgraded if the content is
4820valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
4821
4822=item *
48232027 "EIQ - Quoted field not terminated"
4824X<2027>
4825
4826When parsing a field that started with a quotation character,  the field is
4827expected to be closed with a quotation character.   When the parsed line is
4828exhausted before the quote is found, that field is not terminated.
4829
4830=item *
48312030 "EIF - NL char inside unquoted verbatim, binary off"
4832X<2030>
4833
4834=item *
48352031 "EIF - CR char is first char of field, not part of EOL"
4836X<2031>
4837
4838=item *
48392032 "EIF - CR char inside unquoted, not part of EOL"
4840X<2032>
4841
4842=item *
48432034 "EIF - Loose unescaped quote"
4844X<2034>
4845
4846=item *
48472035 "EIF - Escaped EOF in unquoted field"
4848X<2035>
4849
4850=item *
48512036 "EIF - ESC error"
4852X<2036>
4853
4854=item *
48552037 "EIF - Binary character in unquoted field, binary off"
4856X<2037>
4857
4858=item *
48592110 "ECB - Binary character in Combine, binary off"
4860X<2110>
4861
4862=item *
48632200 "EIO - print to IO failed. See errno"
4864X<2200>
4865
4866=item *
48673001 "EHR - Unsupported syntax for column_names ()"
4868X<3001>
4869
4870=item *
48713002 "EHR - getline_hr () called before column_names ()"
4872X<3002>
4873
4874=item *
48753003 "EHR - bind_columns () and column_names () fields count mismatch"
4876X<3003>
4877
4878=item *
48793004 "EHR - bind_columns () only accepts refs to scalars"
4880X<3004>
4881
4882=item *
48833006 "EHR - bind_columns () did not pass enough refs for parsed fields"
4884X<3006>
4885
4886=item *
48873007 "EHR - bind_columns needs refs to writable scalars"
4888X<3007>
4889
4890=item *
48913008 "EHR - unexpected error in bound fields"
4892X<3008>
4893
4894=item *
48953009 "EHR - print_hr () called before column_names ()"
4896X<3009>
4897
4898=item *
48993010 "EHR - print_hr () called with invalid arguments"
4900X<3010>
4901
4902=back
4903
4904=head1 SEE ALSO
4905
4906L<IO::File>,  L<IO::Handle>,  L<IO::Wrap>,  L<Text::CSV>,  L<Text::CSV_PP>,
4907L<Text::CSV::Encoded>,     L<Text::CSV::Separator>,    L<Text::CSV::Slurp>,
4908L<Spreadsheet::CSV> and L<Spreadsheet::Read>, and of course L<perl>.
4909
4910If you are using Raku,  have a look at C<Text::CSV> in the Raku ecosystem,
4911offering the same features.
4912
4913=head3 non-perl
4914
4915A CSV parser in JavaScript,  also used by L<W3C|http://www.w3.org>,  is the
4916multi-threaded in-browser L<PapaParse|http://papaparse.com/>.
4917
4918L<csvkit|http://csvkit.readthedocs.org> is a python CSV parsing toolkit.
4919
4920=head1 AUTHOR
4921
4922Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> wrote the original Perl module.
4923Please don't send mail concerning Text::CSV_XS to Alan, who is not involved
4924in the C/XS part that is now the main part of the module.
4925
4926Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the en- and decoding in
4927C by implementing a simple finite-state machine.   He added variable quote,
4928escape and separator characters, the binary mode and the print and getline
4929methods. See F<ChangeLog> releases 0.10 through 0.23.
4930
4931H.Merijn Brand F<E<lt>h.m.brand@xs4all.nlE<gt>> cleaned up the code,  added
4932the field flags methods,  wrote the major part of the test suite, completed
4933the documentation,   fixed most RT bugs,  added all the allow flags and the
4934L</csv> function. See ChangeLog releases 0.25 and on.
4935
4936=head1 COPYRIGHT AND LICENSE
4937
4938 Copyright (C) 2007-2021 H.Merijn Brand.  All rights reserved.
4939 Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
4940 Copyright (C) 1997      Alan Citterman.  All rights reserved.
4941
4942This library is free software;  you can redistribute and/or modify it under
4943the same terms as Perl itself.
4944
4945=cut
4946
4947=for elvis
4948:ex:se gw=75|color guide #ff0000:
4949
4950=cut
4951