1package Text::CSV; 2 3 4use strict; 5use Exporter; 6use Carp (); 7use vars qw( $VERSION $DEBUG @ISA @EXPORT_OK ); 8@ISA = qw( Exporter ); 9@EXPORT_OK = qw( csv ); 10 11BEGIN { 12 $VERSION = '2.01'; 13 $DEBUG = 0; 14} 15 16# if use CSV_XS, requires version 17my $Module_XS = 'Text::CSV_XS'; 18my $Module_PP = 'Text::CSV_PP'; 19my $XS_Version = '1.46'; 20 21my $Is_Dynamic = 0; 22 23my @PublicMethods = qw/ 24 version error_diag error_input 25 known_attributes csv 26 PV IV NV 27/; 28# 29 30# Check the environment variable to decide worker module. 31 32unless ($Text::CSV::Worker) { 33 $Text::CSV::DEBUG and Carp::carp("Check used worker module..."); 34 35 if ( exists $ENV{PERL_TEXT_CSV} ) { 36 if ($ENV{PERL_TEXT_CSV} eq '0' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_PP') { 37 _load_pp() or Carp::croak $@; 38 } 39 elsif ($ENV{PERL_TEXT_CSV} eq '1' or $ENV{PERL_TEXT_CSV} =~ /Text::CSV_XS\s*,\s*Text::CSV_PP/) { 40 _load_xs() or _load_pp() or Carp::croak $@; 41 } 42 elsif ($ENV{PERL_TEXT_CSV} eq '2' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_XS') { 43 _load_xs() or Carp::croak $@; 44 } 45 else { 46 Carp::croak "The value of environmental variable 'PERL_TEXT_CSV' is invalid."; 47 } 48 } 49 else { 50 _load_xs() or _load_pp() or Carp::croak $@; 51 } 52 53} 54 55sub new { # normal mode 56 my $proto = shift; 57 my $class = ref($proto) || $proto; 58 59 unless ( $proto ) { # for Text::CSV_XS/PP::new(0); 60 return eval qq| $Text::CSV::Worker\::new( \$proto ) |; 61 } 62 63 #if (ref $_[0] and $_[0]->{module}) { 64 # Carp::croak("Can't set 'module' in non dynamic mode."); 65 #} 66 67 if ( my $obj = $Text::CSV::Worker->new(@_) ) { 68 $obj->{_MODULE} = $Text::CSV::Worker; 69 bless $obj, $class; 70 return $obj; 71 } 72 else { 73 return; 74 } 75 76 77} 78 79 80sub require_xs_version { $XS_Version; } 81 82 83sub module { 84 my $proto = shift; 85 return !ref($proto) ? $Text::CSV::Worker 86 : ref($proto->{_MODULE}) ? ref($proto->{_MODULE}) : $proto->{_MODULE}; 87} 88 89*backend = *module; 90 91 92sub is_xs { 93 return $_[0]->module eq $Module_XS; 94} 95 96 97sub is_pp { 98 return $_[0]->module eq $Module_PP; 99} 100 101 102sub is_dynamic { $Is_Dynamic; } 103 104sub _load_xs { _load($Module_XS, $XS_Version) } 105 106sub _load_pp { _load($Module_PP) } 107 108sub _load { 109 my ($module, $version) = @_; 110 $version ||= ''; 111 112 $Text::CSV::DEBUG and Carp::carp "Load $module."; 113 114 eval qq| use $module $version |; 115 116 return if $@; 117 118 push @Text::CSV::ISA, $module; 119 $Text::CSV::Worker = $module; 120 121 local $^W; 122 no strict qw(refs); 123 124 for my $method (@PublicMethods) { 125 *{"Text::CSV::$method"} = \&{"$module\::$method"}; 126 } 127 return 1; 128} 129 130 131 1321; 133__END__ 134 135=pod 136 137=head1 NAME 138 139Text::CSV - comma-separated values manipulator (using XS or PurePerl) 140 141 142=head1 SYNOPSIS 143 144This section is taken from Text::CSV_XS. 145 146 # Functional interface 147 use Text::CSV qw( csv ); 148 149 # Read whole file in memory 150 my $aoa = csv (in => "data.csv"); # as array of array 151 my $aoh = csv (in => "data.csv", 152 headers => "auto"); # as array of hash 153 154 # Write array of arrays as csv file 155 csv (in => $aoa, out => "file.csv", sep_char=> ";"); 156 157 # Only show lines where "code" is odd 158 csv (in => "data.csv", filter => { code => sub { $_ % 2 }}); 159 160 # Object interface 161 use Text::CSV; 162 163 my @rows; 164 # Read/parse CSV 165 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); 166 open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!"; 167 while (my $row = $csv->getline ($fh)) { 168 $row->[2] =~ m/pattern/ or next; # 3rd field should match 169 push @rows, $row; 170 } 171 close $fh; 172 173 # and write as CSV 174 open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!"; 175 $csv->say ($fh, $_) for @rows; 176 close $fh or die "new.csv: $!"; 177 178=head1 DESCRIPTION 179 180Text::CSV is a thin wrapper for L<Text::CSV_XS>-compatible modules now. 181All the backend modules provide facilities for the composition and 182decomposition of comma-separated values. Text::CSV uses Text::CSV_XS 183by default, and when Text::CSV_XS is not available, falls back on 184L<Text::CSV_PP>, which is bundled in the same distribution as this module. 185 186=head1 CHOOSING BACKEND 187 188This module respects an environmental variable called C<PERL_TEXT_CSV> 189when it decides a backend module to use. If this environmental variable 190is not set, it tries to load Text::CSV_XS, and if Text::CSV_XS is not 191available, falls back on Text::CSV_PP; 192 193If you always don't want it to fall back on Text::CSV_PP, set the variable 194like this (C<export> may be C<setenv>, C<set> and the likes, depending 195on your environment): 196 197 > export PERL_TEXT_CSV=Text::CSV_XS 198 199If you prefer Text::CSV_XS to Text::CSV_PP (default), then: 200 201 > export PERL_TEXT_CSV=Text::CSV_XS,Text::CSV_PP 202 203You may also want to set this variable at the top of your test files, in order 204not to be bothered with incompatibilities between backends (you need to wrap 205this in C<BEGIN>, and set before actually C<use>-ing Text::CSV module, as it 206decides its backend as soon as it's loaded): 207 208 BEGIN { $ENV{PERL_TEXT_CSV}='Text::CSV_PP'; } 209 use Text::CSV; 210 211=head1 NOTES 212 213This section is also taken from Text::CSV_XS. 214 215=head2 Embedded newlines 216 217B<Important Note>: The default behavior is to accept only ASCII characters 218in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the 219fields can not contain newlines. If your data contains newlines embedded in 220fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>> 221set C<< binary => 1 >> in the call to L</new>. To cover the widest range of 222parsing options, you will always want to set binary. 223 224But you still have the problem that you have to pass a correct line to the 225L</parse> method, which is more complicated from the usual point of usage: 226 227 my $csv = Text::CSV->new ({ binary => 1, eol => $/ }); 228 while (<>) { # WRONG! 229 $csv->parse ($_); 230 my @fields = $csv->fields (); 231 } 232 233this will break, as the C<while> might read broken lines: it does not care 234about the quoting. If you need to support embedded newlines, the way to go 235is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>, 236B<and> C<\r\n> by default) and then 237 238 my $csv = Text::CSV->new ({ binary => 1 }); 239 open my $fh, "<", $file or die "$file: $!"; 240 while (my $row = $csv->getline ($fh)) { 241 my @fields = @$row; 242 } 243 244The old(er) way of using global file handles is still supported 245 246 while (my $row = $csv->getline (*ARGV)) { ... } 247 248=head2 Unicode 249 250Unicode is only tested to work with perl-5.8.2 and up. 251 252See also L</BOM>. 253 254The simplest way to ensure the correct encoding is used for in- and output 255is by either setting layers on the filehandles, or setting the L</encoding> 256argument for L</csv>. 257 258 open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!"; 259or 260 my $aoa = csv (in => "in.csv", encoding => "UTF-8"); 261 262 open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!"; 263or 264 csv (in => $aoa, out => "out.csv", encoding => "UTF-8"); 265 266On parsing (both for L</getline> and L</parse>), if the source is marked 267being UTF8, then all fields that are marked binary will also be marked UTF8. 268 269On combining (L</print> and L</combine>): if any of the combining fields 270was marked UTF8, the resulting string will be marked as UTF8. Note however 271that all fields I<before> the first field marked UTF8 and contained 8-bit 272characters that were not upgraded to UTF8, these will be C<bytes> in the 273resulting string too, possibly causing unexpected errors. If you pass data 274of different encoding, or you don't know if there is different encoding, 275force it to be upgraded before you pass them on: 276 277 $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]); 278 279For complete control over encoding, please use L<Text::CSV::Encoded>: 280 281 use Text::CSV::Encoded; 282 my $csv = Text::CSV::Encoded->new ({ 283 encoding_in => "iso-8859-1", # the encoding comes into Perl 284 encoding_out => "cp1252", # the encoding comes out of Perl 285 }); 286 287 $csv = Text::CSV::Encoded->new ({ encoding => "utf8" }); 288 # combine () and print () accept *literally* utf8 encoded data 289 # parse () and getline () return *literally* utf8 encoded data 290 291 $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default 292 # combine () and print () accept UTF8 marked data 293 # parse () and getline () return UTF8 marked data 294 295=head2 BOM 296 297BOM (or Byte Order Mark) handling is available only inside the L</header> 298method. This method supports the following encodings: C<utf-8>, C<utf-1>, 299C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>, 300C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>. 301 302If a file has a BOM, the easiest way to deal with that is 303 304 my $aoh = csv (in => $file, detect_bom => 1); 305 306All records will be encoded based on the detected BOM. 307 308This implies a call to the L</header> method, which defaults to also set 309the L</column_names>. So this is B<not> the same as 310 311 my $aoh = csv (in => $file, headers => "auto"); 312 313which only reads the first record to set L</column_names> but ignores any 314meaning of possible present BOM. 315 316=head1 METHODS 317 318This section is also taken from Text::CSV_XS. 319 320=head2 version 321 322(Class method) Returns the current module version. 323 324=head2 new 325 326(Class method) Returns a new instance of class Text::CSV. The attributes 327are described by the (optional) hash ref C<\%attr>. 328 329 my $csv = Text::CSV->new ({ attributes ... }); 330 331The following attributes are available: 332 333=head3 eol 334 335 my $csv = Text::CSV->new ({ eol => $/ }); 336 $csv->eol (undef); 337 my $eol = $csv->eol; 338 339The end-of-line string to add to rows for L</print> or the record separator 340for L</getline>. 341 342When not passed in a B<parser> instance, the default behavior is to accept 343C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at 344all. Passing C<undef> or the empty string behave the same. 345 346When not passed in a B<generating> instance, records are not terminated at 347all, so it is probably wise to pass something you expect. A safe choice for 348C<eol> on output is either C<$/> or C<\r\n>. 349 350Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012"> 351(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage 352Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters. 353 354If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on 355only a Carriage Return without Line Feed, will be L</parse>d correct. 356 357=head3 sep_char 358 359 my $csv = Text::CSV->new ({ sep_char => ";" }); 360 $csv->sep_char (";"); 361 my $c = $csv->sep_char; 362 363The char used to separate fields, by default a comma. (C<,>). Limited to a 364single-byte character, usually in the range from C<0x20> (space) to C<0x7E> 365(tilde). When longer sequences are required, use L<C<sep>|/sep>. 366 367The separation character can not be equal to the quote character or to the 368escape character. 369 370=head3 sep 371 372 my $csv = Text::CSV->new ({ sep => "\N{FULLWIDTH COMMA}" }); 373 $csv->sep (";"); 374 my $sep = $csv->sep; 375 376The chars used to separate fields, by default undefined. Limited to 8 bytes. 377 378When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it 379acts as an alias to L<C<sep_char>|/sep_char>. 380 381=head3 quote_char 382 383 my $csv = Text::CSV->new ({ quote_char => "'" }); 384 $csv->quote_char (undef); 385 my $c = $csv->quote_char; 386 387The character to quote fields containing blanks or binary data, by default 388the double quote character (C<">). A value of undef suppresses quote chars 389(for simple cases only). Limited to a single-byte character, usually in the 390range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are 391required, use L<C<quote>|/quote>. 392 393C<quote_char> can not be equal to L<C<sep_char>|/sep_char>. 394 395=head3 quote 396 397 my $csv = Text::CSV->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" }); 398 $csv->quote ("'"); 399 my $quote = $csv->quote; 400 401The chars used to quote fields, by default undefined. Limited to 8 bytes. 402 403When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte 404it acts as an alias to L<C<quote_char>|/quote_char>. 405 406This method does not support C<undef>. Use L<C<quote_char>|/quote_char> to 407disable quotation. 408 409=head3 escape_char 410 411 my $csv = Text::CSV->new ({ escape_char => "\\" }); 412 $csv->escape_char (":"); 413 my $c = $csv->escape_char; 414 415The character to escape certain characters inside quoted fields. This is 416limited to a single-byte character, usually in the range from C<0x20> 417(space) to C<0x7E> (tilde). 418 419The C<escape_char> defaults to being the double-quote mark (C<">). In other 420words the same as the default L<C<quote_char>|/quote_char>. This means that 421doubling the quote mark in a field escapes it: 422 423 "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz" 424 425If you change the L<C<quote_char>|/quote_char> without changing the 426C<escape_char>, the C<escape_char> will still be the double-quote (C<">). 427If instead you want to escape the L<C<quote_char>|/quote_char> by doubling 428it you will need to also change the C<escape_char> to be the same as what 429you have changed the L<C<quote_char>|/quote_char> to. 430 431Setting C<escape_char> to <undef> or C<""> will disable escaping completely 432and is greatly discouraged. This will also disable C<escape_null>. 433 434The escape character can not be equal to the separation character. 435 436=head3 binary 437 438 my $csv = Text::CSV->new ({ binary => 1 }); 439 $csv->binary (0); 440 my $f = $csv->binary; 441 442If this attribute is C<1>, you may use binary characters in quoted fields, 443including line feeds, carriage returns and C<NULL> bytes. (The latter could 444be escaped as C<"0>.) By default this feature is off. 445 446If a string is marked UTF8, C<binary> will be turned on automatically when 447binary characters other than C<CR> and C<NL> are encountered. Note that a 448simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8, 449so setting C<< { binary => 1 } >> is still a wise option. 450 451=head3 strict 452 453 my $csv = Text::CSV->new ({ strict => 1 }); 454 $csv->strict (0); 455 my $f = $csv->strict; 456 457If this attribute is set to C<1>, any row that parses to a different number 458of fields than the previous row will cause the parser to throw error 2014. 459 460=head3 skip_empty_rows 461 462 my $csv = Text::CSV->new ({ skip_empty_rows => 1 }); 463 $csv->skip_empty_rows (0); 464 my $f = $csv->skip_empty_rows; 465 466If this attribute is set to C<1>, any row that has an L</eol> immediately 467following the start of line will be skipped. Default behavior is to return 468one single empty field. 469 470This attribute is only used in parsing. 471 472=head3 formula_handling 473 474=head3 formula 475 476 my $csv = Text::CSV->new ({ formula => "none" }); 477 $csv->formula ("none"); 478 my $f = $csv->formula; 479 480This defines the behavior of fields containing I<formulas>. As formulas are 481considered dangerous in spreadsheets, this attribute can define an optional 482action to be taken if a field starts with an equal sign (C<=>). 483 484For purpose of code-readability, this can also be written as 485 486 my $csv = Text::CSV->new ({ formula_handling => "none" }); 487 $csv->formula_handling ("none"); 488 my $f = $csv->formula_handling; 489 490Possible values for this attribute are 491 492=over 2 493 494=item none 495 496Take no specific action. This is the default. 497 498 $csv->formula ("none"); 499 500=item die 501 502Cause the process to C<die> whenever a leading C<=> is encountered. 503 504 $csv->formula ("die"); 505 506=item croak 507 508Cause the process to C<croak> whenever a leading C<=> is encountered. (See 509L<Carp>) 510 511 $csv->formula ("croak"); 512 513=item diag 514 515Report position and content of the field whenever a leading C<=> is found. 516The value of the field is unchanged. 517 518 $csv->formula ("diag"); 519 520=item empty 521 522Replace the content of fields that start with a C<=> with the empty string. 523 524 $csv->formula ("empty"); 525 $csv->formula (""); 526 527=item undef 528 529Replace the content of fields that start with a C<=> with C<undef>. 530 531 $csv->formula ("undef"); 532 $csv->formula (undef); 533 534=item a callback 535 536Modify the content of fields that start with a C<=> with the return-value 537of the callback. The original content of the field is available inside the 538callback as C<$_>; 539 540 # Replace all formula's with 42 541 $csv->formula (sub { 42; }); 542 543 # same as $csv->formula ("empty") but slower 544 $csv->formula (sub { "" }); 545 546 # Allow =4+12 547 $csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer }); 548 549 # Allow more complex calculations 550 $csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ }); 551 552=back 553 554All other values will give a warning and then fallback to C<diag>. 555 556=head3 decode_utf8 557 558 my $csv = Text::CSV->new ({ decode_utf8 => 1 }); 559 $csv->decode_utf8 (0); 560 my $f = $csv->decode_utf8; 561 562This attributes defaults to TRUE. 563 564While I<parsing>, fields that are valid UTF-8, are automatically set to be 565UTF-8, so that 566 567 $csv->parse ("\xC4\xA8\n"); 568 569results in 570 571 PV("\304\250"\0) [UTF8 "\x{128}"] 572 573Sometimes it might not be a desired action. To prevent those upgrades, set 574this attribute to false, and the result will be 575 576 PV("\304\250"\0) 577 578=head3 auto_diag 579 580 my $csv = Text::CSV->new ({ auto_diag => 1 }); 581 $csv->auto_diag (2); 582 my $l = $csv->auto_diag; 583 584Set this attribute to a number between C<1> and C<9> causes L</error_diag> 585to be automatically called in void context upon errors. 586 587In case of error C<2012 - EOF>, this call will be void. 588 589If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die> 590on errors instead of C<warn>. If set to anything unrecognized, it will be 591silently ignored. 592 593Future extensions to this feature will include more reliable auto-detection 594of C<autodie> being active in the scope of which the error occurred which 595will increment the value of C<auto_diag> with C<1> the moment the error is 596detected. 597 598=head3 diag_verbose 599 600 my $csv = Text::CSV->new ({ diag_verbose => 1 }); 601 $csv->diag_verbose (2); 602 my $l = $csv->diag_verbose; 603 604Set the verbosity of the output triggered by C<auto_diag>. Currently only 605adds the current input-record-number (if known) to the diagnostic output 606with an indication of the position of the error. 607 608=head3 blank_is_undef 609 610 my $csv = Text::CSV->new ({ blank_is_undef => 1 }); 611 $csv->blank_is_undef (0); 612 my $f = $csv->blank_is_undef; 613 614Under normal circumstances, C<CSV> data makes no distinction between quoted- 615and unquoted empty fields. These both end up in an empty string field once 616read, thus 617 618 1,"",," ",2 619 620is read as 621 622 ("1", "", "", " ", "2") 623 624When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote> 625or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the 626result of an undefined value. To enable this distinction when I<reading> 627C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty 628fields to be set to C<undef>, causing the above to be parsed as 629 630 ("1", "", undef, " ", "2") 631 632Note that this is specifically important when loading C<CSV> fields into a 633database that allows C<NULL> values, as the perl equivalent for C<NULL> is 634C<undef> in L<DBI> land. 635 636=head3 empty_is_undef 637 638 my $csv = Text::CSV->new ({ empty_is_undef => 1 }); 639 $csv->empty_is_undef (0); 640 my $f = $csv->empty_is_undef; 641 642Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this 643attribute converts all empty fields to C<undef>, so 644 645 1,"",," ",2 646 647is read as 648 649 (1, undef, undef, " ", 2) 650 651Note that this affects only fields that are originally empty, not fields 652that are empty after stripping allowed whitespace. YMMV. 653 654=head3 allow_whitespace 655 656 my $csv = Text::CSV->new ({ allow_whitespace => 1 }); 657 $csv->allow_whitespace (0); 658 my $f = $csv->allow_whitespace; 659 660When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s) 661surrounding the separation character is removed when parsing. If either 662C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>, 663L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not 664be considered whitespace. 665 666Now lines like: 667 668 1 , "foo" , bar , 3 , zapp 669 670are parsed as valid C<CSV>, even though it violates the C<CSV> specs. 671 672Note that B<all> whitespace is stripped from both start and end of each 673field. That would make it I<more> than a I<feature> to enable parsing bad 674C<CSV> lines, as 675 676 1, 2.0, 3, ape , monkey 677 678will now be parsed as 679 680 ("1", "2.0", "3", "ape", "monkey") 681 682even if the original line was perfectly acceptable C<CSV>. 683 684=head3 allow_loose_quotes 685 686 my $csv = Text::CSV->new ({ allow_loose_quotes => 1 }); 687 $csv->allow_loose_quotes (0); 688 my $f = $csv->allow_loose_quotes; 689 690By default, parsing unquoted fields containing L<C<quote_char>|/quote_char> 691characters like 692 693 1,foo "bar" baz,42 694 695would result in parse error 2034. Though it is still bad practice to allow 696this format, we cannot help the fact that some vendors make their 697applications spit out lines styled this way. 698 699If there is B<really> bad C<CSV> data, like 700 701 1,"foo "bar" baz",42 702 703or 704 705 1,""foo bar baz"",42 706 707there is a way to get this data-line parsed and leave the quotes inside the 708quoted field as-is. This can be achieved by setting C<allow_loose_quotes> 709B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal 710to L<C<quote_char>|/quote_char>. 711 712=head3 allow_loose_escapes 713 714 my $csv = Text::CSV->new ({ allow_loose_escapes => 1 }); 715 $csv->allow_loose_escapes (0); 716 my $f = $csv->allow_loose_escapes; 717 718Parsing fields that have L<C<escape_char>|/escape_char> characters that 719escape characters that do not need to be escaped, like: 720 721 my $csv = Text::CSV->new ({ escape_char => "\\" }); 722 $csv->parse (qq{1,"my bar\'s",baz,42}); 723 724would result in parse error 2025. Though it is bad practice to allow this 725format, this attribute enables you to treat all escape character sequences 726equal. 727 728=head3 allow_unquoted_escape 729 730 my $csv = Text::CSV->new ({ allow_unquoted_escape => 1 }); 731 $csv->allow_unquoted_escape (0); 732 my $f = $csv->allow_unquoted_escape; 733 734A backward compatibility issue where L<C<escape_char>|/escape_char> differs 735from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char> 736to be in the first position of a field. If L<C<quote_char>|/quote_char> is 737equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>, 738this would be illegal: 739 740 1,\0,2 741 742Setting this attribute to C<1> might help to overcome issues with backward 743compatibility and allow this style. 744 745=head3 always_quote 746 747 my $csv = Text::CSV->new ({ always_quote => 1 }); 748 $csv->always_quote (0); 749 my $f = $csv->always_quote; 750 751By default the generated fields are quoted only if they I<need> to be. For 752example, if they contain the separator character. If you set this attribute 753to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not 754quoted, see L</blank_is_undef>). This makes it quite often easier to handle 755exported data in external applications. 756 757=head3 quote_space 758 759 my $csv = Text::CSV->new ({ quote_space => 1 }); 760 $csv->quote_space (0); 761 my $f = $csv->quote_space; 762 763By default, a space in a field would trigger quotation. As no rule exists 764this to be forced in C<CSV>, nor any for the opposite, the default is true 765for safety. You can exclude the space from this trigger by setting this 766attribute to 0. 767 768=head3 quote_empty 769 770 my $csv = Text::CSV->new ({ quote_empty => 1 }); 771 $csv->quote_empty (0); 772 my $f = $csv->quote_empty; 773 774By default the generated fields are quoted only if they I<need> to be. An 775empty (defined) field does not need quotation. If you set this attribute to 776C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not 777quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>. 778 779=head3 quote_binary 780 781 my $csv = Text::CSV->new ({ quote_binary => 1 }); 782 $csv->quote_binary (0); 783 my $f = $csv->quote_binary; 784 785By default, all "unsafe" bytes inside a string cause the combined field to 786be quoted. By setting this attribute to C<0>, you can disable that trigger 787for bytes >= C<0x7F>. 788 789=head3 escape_null 790 791 my $csv = Text::CSV->new ({ escape_null => 1 }); 792 $csv->escape_null (0); 793 my $f = $csv->escape_null; 794 795By default, a C<NULL> byte in a field would be escaped. This option enables 796you to treat the C<NULL> byte as a simple binary character in binary mode 797(the C<< { binary => 1 } >> is set). The default is true. You can prevent 798C<NULL> escapes by setting this attribute to C<0>. 799 800When the C<escape_char> attribute is set to undefined, this attribute will 801be set to false. 802 803The default setting will encode "=\x00=" as 804 805 "="0=" 806 807With C<escape_null> set, this will result in 808 809 "=\x00=" 810 811The default when using the C<csv> function is C<false>. 812 813For backward compatibility reasons, the deprecated old name C<quote_null> 814is still recognized. 815 816=head3 keep_meta_info 817 818 my $csv = Text::CSV->new ({ keep_meta_info => 1 }); 819 $csv->keep_meta_info (0); 820 my $f = $csv->keep_meta_info; 821 822By default, the parsing of input records is as simple and fast as possible. 823However, some parsing information - like quotation of the original field - 824is lost in that process. Setting this flag to true enables retrieving that 825information after parsing with the methods L</meta_info>, L</is_quoted>, 826and L</is_binary> described below. Default is false for performance. 827 828If you set this attribute to a value greater than 9, then you can control 829output quotation style like it was used in the input of the the last parsed 830record (unless quotation was added because of other reasons). 831 832 my $csv = Text::CSV->new ({ 833 binary => 1, 834 keep_meta_info => 1, 835 quote_space => 0, 836 }); 837 838 my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"}); 839 840 $csv->print (*STDOUT, \@row); 841 # 1,,, , ,f,g,"h""h",help,help 842 $csv->keep_meta_info (11); 843 $csv->print (*STDOUT, \@row); 844 # 1,,"", ," ",f,"g","h""h",help,"help" 845 846=head3 undef_str 847 848 my $csv = Text::CSV->new ({ undef_str => "\\N" }); 849 $csv->undef_str (undef); 850 my $s = $csv->undef_str; 851 852This attribute optionally defines the output of undefined fields. The value 853passed is not changed at all, so if it needs quotation, the quotation needs 854to be included in the value of the attribute. Use with caution, as passing 855a value like C<",",,,,"""> will for sure mess up your output. The default 856for this attribute is C<undef>, meaning no special treatment. 857 858This attribute is useful when exporting CSV data to be imported in custom 859loaders, like for MySQL, that recognize special sequences for C<NULL> data. 860 861This attribute has no meaning when parsing CSV data. 862 863=head3 comment_str 864 865 my $csv = Text::CSV->new ({ comment_str => "#" }); 866 $csv->comment_str (undef); 867 my $s = $csv->comment_str; 868 869This attribute optionally defines a string to be recognized as comment. If 870this attribute is defined, all lines starting with this sequence will not 871be parsed as CSV but skipped as comment. 872 873This attribute has no meaning when generating CSV. 874 875Comment strings that start with any of the special characters/sequences are 876not supported (so it cannot start with any of L</sep_char>, L</quote_char>, 877L</escape_char>, L</sep>, L</quote>, or L</eol>). 878 879For convenience, C<comment> is an alias for C<comment_str>. 880 881=head3 verbatim 882 883 my $csv = Text::CSV->new ({ verbatim => 1 }); 884 $csv->verbatim (0); 885 my $f = $csv->verbatim; 886 887This is a quite controversial attribute to set, but makes some hard things 888possible. 889 890The rationale behind this attribute is to tell the parser that the normally 891special characters newline (C<NL>) and Carriage Return (C<CR>) will not be 892special when this flag is set, and be dealt with as being ordinary binary 893characters. This will ease working with data with embedded newlines. 894 895When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s 896every line. 897 898Imagine a file format like 899 900 M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n 901 902where, the line ending is a very specific C<"#\r\n">, and the sep_char is a 903C<^> (caret). None of the fields is quoted, but embedded binary data is 904likely to be present. With the specific line ending, this should not be too 905hard to detect. 906 907By default, Text::CSV' parse function is instructed to only know about 908C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the 909embedded newline as a real C<end-of-line>, so it can scan the next line if 910binary is true, and the newline is inside a quoted field. With this option, 911we tell L</parse> to parse the line as if C<"\n"> is just nothing more than 912a binary character. 913 914For L</parse> this means that the parser has no more idea about line ending 915and L</getline> C<chomp>s line endings on reading. 916 917=head3 types 918 919A set of column types; the attribute is immediately passed to the L</types> 920method. 921 922=head3 callbacks 923 924See the L</Callbacks> section below. 925 926=head3 accessors 927 928To sum it up, 929 930 $csv = Text::CSV->new (); 931 932is equivalent to 933 934 $csv = Text::CSV->new ({ 935 eol => undef, # \r, \n, or \r\n 936 sep_char => ',', 937 sep => undef, 938 quote_char => '"', 939 quote => undef, 940 escape_char => '"', 941 binary => 0, 942 decode_utf8 => 1, 943 auto_diag => 0, 944 diag_verbose => 0, 945 blank_is_undef => 0, 946 empty_is_undef => 0, 947 allow_whitespace => 0, 948 allow_loose_quotes => 0, 949 allow_loose_escapes => 0, 950 allow_unquoted_escape => 0, 951 always_quote => 0, 952 quote_empty => 0, 953 quote_space => 1, 954 escape_null => 1, 955 quote_binary => 1, 956 keep_meta_info => 0, 957 strict => 0, 958 skip_empty_rows => 0, 959 formula => 0, 960 verbatim => 0, 961 undef_str => undef, 962 comment_str => undef, 963 types => undef, 964 callbacks => undef, 965 }); 966 967For all of the above mentioned flags, an accessor method is available where 968you can inquire the current value, or change the value 969 970 my $quote = $csv->quote_char; 971 $csv->binary (1); 972 973It is not wise to change these settings halfway through writing C<CSV> data 974to a stream. If however you want to create a new stream using the available 975C<CSV> object, there is no harm in changing them. 976 977If the L</new> constructor call fails, it returns C<undef>, and makes the 978fail reason available through the L</error_diag> method. 979 980 $csv = Text::CSV->new ({ ecs_char => 1 }) or 981 die "".Text::CSV->error_diag (); 982 983L</error_diag> will return a string like 984 985 "INI - Unknown attribute 'ecs_char'" 986 987=head2 known_attributes 988 989 @attr = Text::CSV->known_attributes; 990 @attr = Text::CSV::known_attributes; 991 @attr = $csv->known_attributes; 992 993This method will return an ordered list of all the supported attributes as 994described above. This can be useful for knowing what attributes are valid 995in classes that use or extend Text::CSV. 996 997=head2 print 998 999 $status = $csv->print ($fh, $colref); 1000 1001Similar to L</combine> + L</string> + L</print>, but much more efficient. 1002It expects an array ref as input (not an array!) and the resulting string 1003is not really created, but immediately written to the C<$fh> object, 1004typically an IO handle or any other object that offers a L</print> method. 1005 1006For performance reasons C<print> does not create a result string, so all 1007L</string>, L</status>, L</fields>, and L</error_input> methods will return 1008undefined information after executing this method. 1009 1010If C<$colref> is C<undef> (explicit, not through a variable argument) and 1011L</bind_columns> was used to specify fields to be printed, it is possible 1012to make performance improvements, as otherwise data would have to be copied 1013as arguments to the method call: 1014 1015 $csv->bind_columns (\($foo, $bar)); 1016 $status = $csv->print ($fh, undef); 1017 1018A short benchmark 1019 1020 my @data = ("aa" .. "zz"); 1021 $csv->bind_columns (\(@data)); 1022 1023 $csv->print ($fh, [ @data ]); # 11800 recs/sec 1024 $csv->print ($fh, \@data ); # 57600 recs/sec 1025 $csv->print ($fh, undef ); # 48500 recs/sec 1026 1027=head2 say 1028 1029 $status = $csv->say ($fh, $colref); 1030 1031Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>. 1032 1033=head2 print_hr 1034 1035 $csv->print_hr ($fh, $ref); 1036 1037Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>) 1038provided the column names are set with L</column_names>. 1039 1040It is just a wrapper method with basic parameter checks over 1041 1042 $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]); 1043 1044=head2 combine 1045 1046 $status = $csv->combine (@fields); 1047 1048This method constructs a C<CSV> record from C<@fields>, returning success 1049or failure. Failure can result from lack of arguments or an argument that 1050contains an invalid character. Upon success, L</string> can be called to 1051retrieve the resultant C<CSV> string. Upon failure, the value returned by 1052L</string> is undefined and L</error_input> could be called to retrieve the 1053invalid argument. 1054 1055=head2 string 1056 1057 $line = $csv->string (); 1058 1059This method returns the input to L</parse> or the resultant C<CSV> string 1060of L</combine>, whichever was called more recently. 1061 1062=head2 getline 1063 1064 $colref = $csv->getline ($fh); 1065 1066This is the counterpart to L</print>, as L</parse> is the counterpart to 1067L</combine>: it parses a row from the C<$fh> handle using the L</getline> 1068method associated with C<$fh> and parses this row into an array ref. This 1069array ref is returned by the function or C<undef> for failure. When C<$fh> 1070does not support C<getline>, you are likely to hit errors. 1071 1072When fields are bound with L</bind_columns> the return value is a reference 1073to an empty list. 1074 1075The L</string>, L</fields>, and L</status> methods are meaningless again. 1076 1077=head2 getline_all 1078 1079 $arrayref = $csv->getline_all ($fh); 1080 $arrayref = $csv->getline_all ($fh, $offset); 1081 $arrayref = $csv->getline_all ($fh, $offset, $length); 1082 1083This will return a reference to a list of L<getline ($fh)|/getline> results. 1084In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as 1085with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken 1086into consideration. 1087 1088Given a CSV file with 10 lines: 1089 1090 lines call 1091 ----- --------------------------------------------------------- 1092 0..9 $csv->getline_all ($fh) # all 1093 0..9 $csv->getline_all ($fh, 0) # all 1094 8..9 $csv->getline_all ($fh, 8) # start at 8 1095 - $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows 1096 0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows 1097 4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows 1098 8..9 $csv->getline_all ($fh, -2) # last 2 rows 1099 6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows 1100 1101=head2 getline_hr 1102 1103The L</getline_hr> and L</column_names> methods work together to allow you 1104to have rows returned as hashrefs. You must call L</column_names> first to 1105declare your column names. 1106 1107 $csv->column_names (qw( code name price description )); 1108 $hr = $csv->getline_hr ($fh); 1109 print "Price for $hr->{name} is $hr->{price} EUR\n"; 1110 1111L</getline_hr> will croak if called before L</column_names>. 1112 1113Note that L</getline_hr> creates a hashref for every row and will be much 1114slower than the combined use of L</bind_columns> and L</getline> but still 1115offering the same easy to use hashref inside the loop: 1116 1117 my @cols = @{$csv->getline ($fh)}; 1118 $csv->column_names (@cols); 1119 while (my $row = $csv->getline_hr ($fh)) { 1120 print $row->{price}; 1121 } 1122 1123Could easily be rewritten to the much faster: 1124 1125 my @cols = @{$csv->getline ($fh)}; 1126 my $row = {}; 1127 $csv->bind_columns (\@{$row}{@cols}); 1128 while ($csv->getline ($fh)) { 1129 print $row->{price}; 1130 } 1131 1132Your mileage may vary for the size of the data and the number of rows. With 1133perl-5.14.2 the comparison for a 100_000 line file with 14 columns: 1134 1135 Rate hashrefs getlines 1136 hashrefs 1.00/s -- -76% 1137 getlines 4.15/s 313% -- 1138 1139=head2 getline_hr_all 1140 1141 $arrayref = $csv->getline_hr_all ($fh); 1142 $arrayref = $csv->getline_hr_all ($fh, $offset); 1143 $arrayref = $csv->getline_hr_all ($fh, $offset, $length); 1144 1145This will return a reference to a list of L<getline_hr ($fh)|/getline_hr> 1146results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled. 1147 1148=head2 parse 1149 1150 $status = $csv->parse ($line); 1151 1152This method decomposes a C<CSV> string into fields, returning success or 1153failure. Failure can result from a lack of argument or the given C<CSV> 1154string is improperly formatted. Upon success, L</fields> can be called to 1155retrieve the decomposed fields. Upon failure calling L</fields> will return 1156undefined data and L</error_input> can be called to retrieve the invalid 1157argument. 1158 1159You may use the L</types> method for setting column types. See L</types>' 1160description below. 1161 1162The C<$line> argument is supposed to be a simple scalar. Everything else is 1163supposed to croak and set error 1500. 1164 1165=head2 fragment 1166 1167This function tries to implement RFC7111 (URI Fragment Identifiers for the 1168text/csv Media Type) - http://tools.ietf.org/html/rfc7111 1169 1170 my $AoA = $csv->fragment ($fh, $spec); 1171 1172In specifications, C<*> is used to specify the I<last> item, a dash (C<->) 1173to indicate a range. All indices are C<1>-based: the first row or column 1174has index C<1>. Selections can be combined with the semi-colon (C<;>). 1175 1176When using this method in combination with L</column_names>, the returned 1177reference will point to a list of hashes instead of a list of lists. A 1178disjointed cell-based combined selection might return rows with different 1179number of columns making the use of hashes unpredictable. 1180 1181 $csv->column_names ("Name", "Age"); 1182 my $AoH = $csv->fragment ($fh, "col=3;8"); 1183 1184If the L</after_parse> callback is active, it is also called on every line 1185parsed and skipped before the fragment. 1186 1187=over 2 1188 1189=item row 1190 1191 row=4 1192 row=5-7 1193 row=6-* 1194 row=1-2;4;6-* 1195 1196=item col 1197 1198 col=2 1199 col=1-3 1200 col=4-* 1201 col=1-2;4;7-* 1202 1203=item cell 1204 1205In cell-based selection, the comma (C<,>) is used to pair row and column 1206 1207 cell=4,1 1208 1209The range operator (C<->) using C<cell>s can be used to define top-left and 1210bottom-right C<cell> location 1211 1212 cell=3,1-4,6 1213 1214The C<*> is only allowed in the second part of a pair 1215 1216 cell=3,2-*,2 # row 3 till end, only column 2 1217 cell=3,2-3,* # column 2 till end, only row 3 1218 cell=3,2-*,* # strip row 1 and 2, and column 1 1219 1220Cells and cell ranges may be combined with C<;>, possibly resulting in rows 1221with different numbers of columns 1222 1223 cell=1,1-2,2;3,3-4,4;1,4;4,1 1224 1225Disjointed selections will only return selected cells. The cells that are 1226not specified will not be included in the returned set, not even as 1227C<undef>. As an example given a C<CSV> like 1228 1229 11,12,13,...19 1230 21,22,...28,29 1231 : : 1232 91,...97,98,99 1233 1234with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return: 1235 1236 11,12,14 1237 21,22 1238 33,34 1239 41,43,44 1240 1241Overlapping cell-specs will return those cells only once, So 1242C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return: 1243 1244 11,12,13 1245 21,22,23,24 1246 31,32,33,34 1247 42,43,44 1248 1249=back 1250 1251L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different 1252types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>). 1253Passing an invalid fragment specification will croak and set error 2013. 1254 1255=head2 column_names 1256 1257Set the "keys" that will be used in the L</getline_hr> calls. If no keys 1258(column names) are passed, it will return the current setting as a list. 1259 1260L</column_names> accepts a list of scalars (the column names) or a single 1261array_ref, so you can pass the return value from L</getline> too: 1262 1263 $csv->column_names ($csv->getline ($fh)); 1264 1265L</column_names> does B<no> checking on duplicates at all, which might lead 1266to unexpected results. Undefined entries will be replaced with the string 1267C<"\cAUNDEF\cA">, so 1268 1269 $csv->column_names (undef, "", "name", "name"); 1270 $hr = $csv->getline_hr ($fh); 1271 1272will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to 1273the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd 1274field. 1275 1276L</column_names> croaks on invalid arguments. 1277 1278=head2 header 1279 1280This method does NOT work in perl-5.6.x 1281 1282Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding. 1283 1284 my @hdr = $csv->header ($fh); 1285 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] }); 1286 $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" }); 1287 1288The first argument should be a file handle. 1289 1290This method resets some object properties, as it is supposed to be invoked 1291only once per file or stream. It will leave attributes C<column_names> and 1292C<bound_columns> alone if setting column names is disabled. Reading headers 1293on previously process objects might fail on perl-5.8.0 and older. 1294 1295Assuming that the file opened for parsing has a header, and the header does 1296not contain problematic characters like embedded newlines, read the first 1297line from the open handle then auto-detect whether the header separates the 1298column names with a character from the allowed separator list. 1299 1300If any of the allowed separators matches, and none of the I<other> allowed 1301separators match, set L<C<sep>|/sep> to that separator for the current 1302CSV instance and use it to parse the first line, map those to lowercase, 1303and use that to set the instance L</column_names>: 1304 1305 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); 1306 open my $fh, "<", "file.csv"; 1307 binmode $fh; # for Windows 1308 $csv->header ($fh); 1309 while (my $row = $csv->getline_hr ($fh)) { 1310 ... 1311 } 1312 1313If the header is empty, contains more than one unique separator out of the 1314allowed set, contains empty fields, or contains identical fields (after 1315folding), it will croak with error 1010, 1011, 1012, or 1013 respectively. 1316 1317If the header contains embedded newlines or is not valid CSV in any other 1318way, this method will croak and leave the parse error untouched. 1319 1320A successful call to C<header> will always set the L<C<sep>|/sep> of the 1321C<$csv> object. This behavior can not be disabled. 1322 1323=head3 return value 1324 1325On error this method will croak. 1326 1327In list context, the headers will be returned whether they are used to set 1328L</column_names> or not. 1329 1330In scalar context, the instance itself is returned. B<Note>: the values as 1331found in the header will effectively be B<lost> if C<set_column_names> is 1332false. 1333 1334=head3 Options 1335 1336=over 2 1337 1338=item sep_set 1339 1340 $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] }); 1341 1342The list of legal separators defaults to C<[ ";", "," ]> and can be changed 1343by this option. As this is probably the most often used option, it can be 1344passed on its own as an unnamed argument: 1345 1346 $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]); 1347 1348Multi-byte sequences are allowed, both multi-character and Unicode. See 1349L<C<sep>|/sep>. 1350 1351=item detect_bom 1352 1353 $csv->header ($fh, { detect_bom => 1 }); 1354 1355The default behavior is to detect if the header line starts with a BOM. If 1356the header has a BOM, use that to set the encoding of C<$fh>. This default 1357behavior can be disabled by passing a false value to C<detect_bom>. 1358 1359Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and 1360UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030 1361but L<Encode> does not (yet). UTF-7 is not supported. 1362 1363If a supported BOM was detected as start of the stream, it is stored in the 1364object attribute C<ENCODING>. 1365 1366 my $enc = $csv->{ENCODING}; 1367 1368The encoding is used with C<binmode> on C<$fh>. 1369 1370If the handle was opened in a (correct) encoding, this method will B<not> 1371alter the encoding, as it checks the leading B<bytes> of the first line. In 1372case the stream starts with a decoded BOM (C<U+FEFF>), C<{ENCODING}> will be 1373C<""> (empty) instead of the default C<undef>. 1374 1375=item munge_column_names 1376 1377This option offers the means to modify the column names into something that 1378is most useful to the application. The default is to map all column names 1379to lower case. 1380 1381 $csv->header ($fh, { munge_column_names => "lc" }); 1382 1383The following values are available: 1384 1385 lc - lower case 1386 uc - upper case 1387 db - valid DB field names 1388 none - do not change 1389 \%hash - supply a mapping 1390 \&cb - supply a callback 1391 1392=over 2 1393 1394=item Lower case 1395 1396 $csv->header ($fh, { munge_column_names => "lc" }); 1397 1398The header is changed to all lower-case 1399 1400 $_ = lc; 1401 1402=item Upper case 1403 1404 $csv->header ($fh, { munge_column_names => "uc" }); 1405 1406The header is changed to all upper-case 1407 1408 $_ = uc; 1409 1410=item Literal 1411 1412 $csv->header ($fh, { munge_column_names => "none" }); 1413 1414=item Hash 1415 1416 $csv->header ($fh, { munge_column_names => { foo => "sombrero" }); 1417 1418if a value does not exist, the original value is used unchanged 1419 1420=item Database 1421 1422 $csv->header ($fh, { munge_column_names => "db" }); 1423 1424=over 2 1425 1426=item - 1427 1428lower-case 1429 1430=item - 1431 1432all sequences of non-word characters are replaced with an underscore 1433 1434=item - 1435 1436all leading underscores are removed 1437 1438=back 1439 1440 $_ = lc (s/\W+/_/gr =~ s/^_+//r); 1441 1442=item Callback 1443 1444 $csv->header ($fh, { munge_column_names => sub { fc } }); 1445 $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } }); 1446 $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } }); 1447 1448As this callback is called in a C<map>, you can use C<$_> directly. 1449 1450=back 1451 1452=item set_column_names 1453 1454 $csv->header ($fh, { set_column_names => 1 }); 1455 1456The default is to set the instances column names using L</column_names> if 1457the method is successful, so subsequent calls to L</getline_hr> can return 1458a hash. Disable setting the header can be forced by using a false value for 1459this option. 1460 1461As described in L</return value> above, content is lost in scalar context. 1462 1463=back 1464 1465=head3 Validation 1466 1467When receiving CSV files from external sources, this method can be used to 1468protect against changes in the layout by restricting to known headers (and 1469typos in the header fields). 1470 1471 my %known = ( 1472 "record key" => "c_rec", 1473 "rec id" => "c_rec", 1474 "id_rec" => "c_rec", 1475 "kode" => "code", 1476 "code" => "code", 1477 "vaule" => "value", 1478 "value" => "value", 1479 ); 1480 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); 1481 open my $fh, "<", $source or die "$source: $!"; 1482 $csv->header ($fh, { munge_column_names => sub { 1483 s/\s+$//; 1484 s/^\s+//; 1485 $known{lc $_} or die "Unknown column '$_' in $source"; 1486 }}); 1487 while (my $row = $csv->getline_hr ($fh)) { 1488 say join "\t", $row->{c_rec}, $row->{code}, $row->{value}; 1489 } 1490 1491=head2 bind_columns 1492 1493Takes a list of scalar references to be used for output with L</print> or 1494to store in the fields fetched by L</getline>. When you do not pass enough 1495references to store the fetched fields in, L</getline> will fail with error 1496C<3006>. If you pass more than there are fields to return, the content of 1497the remaining references is left untouched. 1498 1499 $csv->bind_columns (\$code, \$name, \$price, \$description); 1500 while ($csv->getline ($fh)) { 1501 print "The price of a $name is \x{20ac} $price\n"; 1502 } 1503 1504To reset or clear all column binding, call L</bind_columns> with the single 1505argument C<undef>. This will also clear column names. 1506 1507 $csv->bind_columns (undef); 1508 1509If no arguments are passed at all, L</bind_columns> will return the list of 1510current bindings or C<undef> if no binds are active. 1511 1512Note that in parsing with C<bind_columns>, the fields are set on the fly. 1513That implies that if the third field of a row causes an error (or this row 1514has just two fields where the previous row had more), the first two fields 1515already have been assigned the values of the current row, while the rest of 1516the fields will still hold the values of the previous row. If you want the 1517parser to fail in these cases, use the L<C<strict>|/strict> attribute. 1518 1519=head2 eof 1520 1521 $eof = $csv->eof (); 1522 1523If L</parse> or L</getline> was used with an IO stream, this method will 1524return true (1) if the last call hit end of file, otherwise it will return 1525false (''). This is useful to see the difference between a failure and end 1526of file. 1527 1528Note that if the parsing of the last line caused an error, C<eof> is still 1529true. That means that if you are I<not> using L</auto_diag>, an idiom like 1530 1531 while (my $row = $csv->getline ($fh)) { 1532 # ... 1533 } 1534 $csv->eof or $csv->error_diag; 1535 1536will I<not> report the error. You would have to change that to 1537 1538 while (my $row = $csv->getline ($fh)) { 1539 # ... 1540 } 1541 +$csv->error_diag and $csv->error_diag; 1542 1543=head2 types 1544 1545 $csv->types (\@tref); 1546 1547This method is used to force that (all) columns are of a given type. For 1548example, if you have an integer column, two columns with doubles and a 1549string column, then you might do a 1550 1551 $csv->types ([Text::CSV::IV (), 1552 Text::CSV::NV (), 1553 Text::CSV::NV (), 1554 Text::CSV::PV ()]); 1555 1556Column types are used only for I<decoding> columns while parsing, in other 1557words by the L</parse> and L</getline> methods. 1558 1559You can unset column types by doing a 1560 1561 $csv->types (undef); 1562 1563or fetch the current type settings with 1564 1565 $types = $csv->types (); 1566 1567=over 4 1568 1569=item IV 1570 1571Set field type to integer. 1572 1573=item NV 1574 1575Set field type to numeric/float. 1576 1577=item PV 1578 1579Set field type to string. 1580 1581=back 1582 1583=head2 fields 1584 1585 @columns = $csv->fields (); 1586 1587This method returns the input to L</combine> or the resultant decomposed 1588fields of a successful L</parse>, whichever was called more recently. 1589 1590Note that the return value is undefined after using L</getline>, which does 1591not fill the data structures returned by L</parse>. 1592 1593=head2 meta_info 1594 1595 @flags = $csv->meta_info (); 1596 1597This method returns the "flags" of the input to L</combine> or the flags of 1598the resultant decomposed fields of L</parse>, whichever was called more 1599recently. 1600 1601For each field, a meta_info field will hold flags that inform something 1602about the field returned by the L</fields> method or passed to the 1603L</combine> method. The flags are bit-wise-C<or>'d like: 1604 1605=over 2 1606 1607=item C< >0x0001 1608 1609The field was quoted. 1610 1611=item C< >0x0002 1612 1613The field was binary. 1614 1615=back 1616 1617See the C<is_***> methods below. 1618 1619=head2 is_quoted 1620 1621 my $quoted = $csv->is_quoted ($column_idx); 1622 1623where C<$column_idx> is the (zero-based) index of the column in the last 1624result of L</parse>. 1625 1626This returns a true value if the data in the indicated column was enclosed 1627in L<C<quote_char>|/quote_char> quotes. This might be important for fields 1628where content C<,20070108,> is to be treated as a numeric value, and where 1629C<,"20070108",> is explicitly marked as character string data. 1630 1631This method is only valid when L</keep_meta_info> is set to a true value. 1632 1633=head2 is_binary 1634 1635 my $binary = $csv->is_binary ($column_idx); 1636 1637where C<$column_idx> is the (zero-based) index of the column in the last 1638result of L</parse>. 1639 1640This returns a true value if the data in the indicated column contained any 1641byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>. 1642 1643This method is only valid when L</keep_meta_info> is set to a true value. 1644 1645=head2 is_missing 1646 1647 my $missing = $csv->is_missing ($column_idx); 1648 1649where C<$column_idx> is the (zero-based) index of the column in the last 1650result of L</getline_hr>. 1651 1652 $csv->keep_meta_info (1); 1653 while (my $hr = $csv->getline_hr ($fh)) { 1654 $csv->is_missing (0) and next; # This was an empty line 1655 } 1656 1657When using L</getline_hr>, it is impossible to tell if the parsed fields 1658are C<undef> because they where not filled in the C<CSV> stream or because 1659they were not read at all, as B<all> the fields defined by L</column_names> 1660are set in the hash-ref. If you still need to know if all fields in each 1661row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so 1662you can check the flags. 1663 1664If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will 1665always return C<undef>, regardless of C<$column_idx> being valid or not. If 1666this attribute is C<true> it will return either C<0> (the field is present) 1667or C<1> (the field is missing). 1668 1669A special case is the empty line. If the line is completely empty - after 1670dealing with the flags - this is still a valid CSV line: it is a record of 1671just one single empty field. However, if C<keep_meta_info> is set, invoking 1672C<is_missing> with index C<0> will now return true. 1673 1674=head2 status 1675 1676 $status = $csv->status (); 1677 1678This method returns the status of the last invoked L</combine> or L</parse> 1679call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>). 1680 1681Note that as this only keeps track of the status of above mentioned methods, 1682you are probably looking for L<C<error_diag>|/error_diag> instead. 1683 1684=head2 error_input 1685 1686 $bad_argument = $csv->error_input (); 1687 1688This method returns the erroneous argument (if it exists) of L</combine> or 1689L</parse>, whichever was called more recently. If the last invocation was 1690successful, C<error_input> will return C<undef>. 1691 1692Depending on the type of error, it I<might> also hold the data for the last 1693error-input of L</getline>. 1694 1695=head2 error_diag 1696 1697 Text::CSV->error_diag (); 1698 $csv->error_diag (); 1699 $error_code = 0 + $csv->error_diag (); 1700 $error_str = "" . $csv->error_diag (); 1701 ($cde, $str, $pos, $rec, $fld) = $csv->error_diag (); 1702 1703If (and only if) an error occurred, this function returns the diagnostics 1704of that error. 1705 1706If called in void context, this will print the internal error code and the 1707associated error message to STDERR. 1708 1709If called in list context, this will return the error code and the error 1710message in that order. If the last error was from parsing, the rest of the 1711values returned are a best guess at the location within the line that was 1712being parsed. Their values are 1-based. The position currently is index of 1713the byte at which the parsing failed in the current record. It might change 1714to be the index of the current character in a later release. The records is 1715the index of the record parsed by the csv instance. The field number is the 1716index of the field the parser thinks it is currently trying to parse. See 1717F<examples/csv-check> for how this can be used. 1718 1719If called in scalar context, it will return the diagnostics in a single 1720scalar, a-la C<$!>. It will contain the error code in numeric context, and 1721the diagnostics message in string context. 1722 1723When called as a class method or a direct function call, the diagnostics 1724are that of the last L</new> call. 1725 1726=head2 record_number 1727 1728 $recno = $csv->record_number (); 1729 1730Returns the records parsed by this csv instance. This value should be more 1731accurate than C<$.> when embedded newlines come in play. Records written by 1732this instance are not counted. 1733 1734=head2 SetDiag 1735 1736 $csv->SetDiag (0); 1737 1738Use to reset the diagnostics if you are dealing with errors. 1739 1740=head1 ADDITIONAL METHODS 1741 1742=over 1743 1744=item backend 1745 1746Returns the backend module name called by Text::CSV. 1747C<module> is an alias. 1748 1749=item is_xs 1750 1751Returns true value if Text::CSV uses an XS backend. 1752 1753=item is_pp 1754 1755Returns true value if Text::CSV uses a pure-Perl backend. 1756 1757=back 1758 1759=head1 FUNCTIONS 1760 1761This section is also taken from Text::CSV_XS. 1762 1763=head2 csv 1764 1765This function is not exported by default and should be explicitly requested: 1766 1767 use Text::CSV qw( csv ); 1768 1769This is a high-level function that aims at simple (user) interfaces. This 1770can be used to read/parse a C<CSV> file or stream (the default behavior) or 1771to produce a file or write to a stream (define the C<out> attribute). It 1772returns an array- or hash-reference on parsing (or C<undef> on fail) or the 1773numeric value of L</error_diag> on writing. When this function fails you 1774can get to the error using the class call to L</error_diag> 1775 1776 my $aoa = csv (in => "test.csv") or 1777 die Text::CSV->error_diag; 1778 1779This function takes the arguments as key-value pairs. This can be passed as 1780a list or as an anonymous hash: 1781 1782 my $aoa = csv ( in => "test.csv", sep_char => ";"); 1783 my $aoh = csv ({ in => $fh, headers => "auto" }); 1784 1785The arguments passed consist of two parts: the arguments to L</csv> itself 1786and the optional attributes to the C<CSV> object used inside the function 1787as enumerated and explained in L</new>. 1788 1789If not overridden, the default option used for CSV is 1790 1791 auto_diag => 1 1792 escape_null => 0 1793 1794The option that is always set and cannot be altered is 1795 1796 binary => 1 1797 1798As this function will likely be used in one-liners, it allows C<quote> to 1799be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc> 1800or C<escape>. 1801 1802Alternative invocations: 1803 1804 my $aoa = Text::CSV::csv (in => "file.csv"); 1805 1806 my $csv = Text::CSV->new (); 1807 my $aoa = $csv->csv (in => "file.csv"); 1808 1809In the latter case, the object attributes are used from the existing object 1810and the attribute arguments in the function call are ignored: 1811 1812 my $csv = Text::CSV->new ({ sep_char => ";" }); 1813 my $aoh = $csv->csv (in => "file.csv", bom => 1); 1814 1815will parse using C<;> as C<sep_char>, not C<,>. 1816 1817=head3 in 1818 1819Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">), 1820which will be opened for reading and closed when finished, a file handle 1821(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob 1822itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>). 1823 1824When used with L</out>, C<in> should be a reference to a CSV structure (AoA 1825or AoH) or a CODE-ref that returns an array-reference or a hash-reference. 1826The code-ref will be invoked with no arguments. 1827 1828 my $aoa = csv (in => "file.csv"); 1829 1830 open my $fh, "<", "file.csv"; 1831 my $aoa = csv (in => $fh); 1832 1833 my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]]; 1834 my $err = csv (in => $csv, out => "file.csv"); 1835 1836If called in void context without the L</out> attribute, the resulting ref 1837will be used as input to a subsequent call to csv: 1838 1839 csv (in => "file.csv", filter => { 2 => sub { length > 2 }}) 1840 1841will be a shortcut to 1842 1843 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }})) 1844 1845where, in the absence of the C<out> attribute, this is a shortcut to 1846 1847 csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}), 1848 out => *STDOUT) 1849 1850=head3 out 1851 1852 csv (in => $aoa, out => "file.csv"); 1853 csv (in => $aoa, out => $fh); 1854 csv (in => $aoa, out => STDOUT); 1855 csv (in => $aoa, out => *STDOUT); 1856 csv (in => $aoa, out => \*STDOUT); 1857 csv (in => $aoa, out => \my $data); 1858 csv (in => $aoa, out => undef); 1859 csv (in => $aoa, out => \"skip"); 1860 1861 csv (in => $fh, out => \@aoa); 1862 csv (in => $fh, out => \@aoh, bom => 1); 1863 csv (in => $fh, out => \%hsh, key => "key"); 1864 1865In output mode, the default CSV options when producing CSV are 1866 1867 eol => "\r\n" 1868 1869The L</fragment> attribute is ignored in output mode. 1870 1871C<out> can be a file name (e.g. C<"file.csv">), which will be opened for 1872writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a 1873reference to a glob (e.g. C<\*STDOUT>), the glob itself (e.g. C<*STDOUT>), 1874or a reference to a scalar (e.g. C<\my $data>). 1875 1876 csv (in => sub { $sth->fetch }, out => "dump.csv"); 1877 csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv", 1878 headers => $sth->{NAME_lc}); 1879 1880When a code-ref is used for C<in>, the output is generated per invocation, 1881so no buffering is involved. This implies that there is no size restriction 1882on the number of records. The C<csv> function ends when the coderef returns 1883a false value. 1884 1885If C<out> is set to a reference of the literal string C<"skip">, the output 1886will be suppressed completely, which might be useful in combination with a 1887filter for side effects only. 1888 1889 my %cache; 1890 csv (in => "dump.csv", 1891 out => \"skip", 1892 on_in => sub { $cache{$_[1][1]}++ }); 1893 1894Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be 1895equivalent to C<\"skip">. 1896 1897If the C<in> argument point to something to parse, and the C<out> is set to 1898a reference to an C<ARRAY> or a C<HASH>, the output is appended to the data 1899in the existing reference. The result of the parse should match what exists 1900in the reference passed. This might come handy when you have to parse a set 1901of files with similar content (like data stored per period) and you want to 1902collect that into a single data structure: 1903 1904 my %hash; 1905 csv (in => $_, out => \%hash, key => "id") for sort glob "foo-[0-9]*.csv"; 1906 1907 my @list; # List of arrays 1908 csv (in => $_, out => \@list) for sort glob "foo-[0-9]*.csv"; 1909 1910 my @list; # List of hashes 1911 csv (in => $_, out => \@list, bom => 1) for sort glob "foo-[0-9]*.csv"; 1912 1913=head3 encoding 1914 1915If passed, it should be an encoding accepted by the C<:encoding()> option 1916to C<open>. There is no default value. This attribute does not work in perl 19175.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command 1918line invocations. 1919 1920If C<encoding> is set to the literal value C<"auto">, the method L</header> 1921will be invoked on the opened stream to check if there is a BOM and set the 1922encoding accordingly. This is equal to passing a true value in the option 1923L<C<detect_bom>|/detect_bom>. 1924 1925Encodings can be stacked, as supported by C<binmode>: 1926 1927 # Using PerlIO::via::gzip 1928 csv (in => \@csv, 1929 out => "test.csv:via.gz", 1930 encoding => ":via(gzip):encoding(utf-8)", 1931 ); 1932 $aoa = csv (in => "test.csv:via.gz", encoding => ":via(gzip)"); 1933 1934 # Using PerlIO::gzip 1935 csv (in => \@csv, 1936 out => "test.csv:via.gz", 1937 encoding => ":gzip:encoding(utf-8)", 1938 ); 1939 $aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip"); 1940 1941=head3 detect_bom 1942 1943If C<detect_bom> is given, the method L</header> will be invoked on the 1944opened stream to check if there is a BOM and set the encoding accordingly. 1945 1946C<detect_bom> can be abbreviated to C<bom>. 1947 1948This is the same as setting L<C<encoding>|/encoding> to C<"auto">. 1949 1950Note that as the method L</header> is invoked, its default is to also set 1951the headers. 1952 1953=head3 headers 1954 1955If this attribute is not given, the default behavior is to produce an array 1956of arrays. 1957 1958If C<headers> is supplied, it should be an anonymous list of column names, 1959an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>, 1960or C<skip>. 1961 1962=over 2 1963 1964=item skip 1965 1966When C<skip> is used, the header will not be included in the output. 1967 1968 my $aoa = csv (in => $fh, headers => "skip"); 1969 1970=item auto 1971 1972If C<auto> is used, the first line of the C<CSV> source will be read as the 1973list of field headers and used to produce an array of hashes. 1974 1975 my $aoh = csv (in => $fh, headers => "auto"); 1976 1977=item lc 1978 1979If C<lc> is used, the first line of the C<CSV> source will be read as the 1980list of field headers mapped to lower case and used to produce an array of 1981hashes. This is a variation of C<auto>. 1982 1983 my $aoh = csv (in => $fh, headers => "lc"); 1984 1985=item uc 1986 1987If C<uc> is used, the first line of the C<CSV> source will be read as the 1988list of field headers mapped to upper case and used to produce an array of 1989hashes. This is a variation of C<auto>. 1990 1991 my $aoh = csv (in => $fh, headers => "uc"); 1992 1993=item CODE 1994 1995If a coderef is used, the first line of the C<CSV> source will be read as 1996the list of mangled field headers in which each field is passed as the only 1997argument to the coderef. This list is used to produce an array of hashes. 1998 1999 my $aoh = csv (in => $fh, 2000 headers => sub { lc ($_[0]) =~ s/kode/code/gr }); 2001 2002this example is a variation of using C<lc> where all occurrences of C<kode> 2003are replaced with C<code>. 2004 2005=item ARRAY 2006 2007If C<headers> is an anonymous list, the entries in the list will be used 2008as field names. The first line is considered data instead of headers. 2009 2010 my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]); 2011 csv (in => $aoa, out => $fh, headers => [qw( code description price )]); 2012 2013=item HASH 2014 2015If C<headers> is a hash reference, this implies C<auto>, but header fields 2016that exist as key in the hashref will be replaced by the value for that 2017key. Given a CSV file like 2018 2019 post-kode,city,name,id number,fubble 2020 1234AA,Duckstad,Donald,13,"X313DF" 2021 2022using 2023 2024 csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ... 2025 2026will return an entry like 2027 2028 { pc => "1234AA", 2029 city => "Duckstad", 2030 name => "Donald", 2031 ID => "13", 2032 fubble => "X313DF", 2033 } 2034 2035=back 2036 2037See also L<C<munge_column_names>|/munge_column_names> and 2038L<C<set_column_names>|/set_column_names>. 2039 2040=head3 munge_column_names 2041 2042If C<munge_column_names> is set, the method L</header> is invoked on the 2043opened stream with all matching arguments to detect and set the headers. 2044 2045C<munge_column_names> can be abbreviated to C<munge>. 2046 2047=head3 key 2048 2049If passed, will default L<C<headers>|/headers> to C<"auto"> and return a 2050hashref instead of an array of hashes. Allowed values are simple scalars or 2051array-references where the first element is the joiner and the rest are the 2052fields to join to combine the key. 2053 2054 my $ref = csv (in => "test.csv", key => "code"); 2055 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]); 2056 2057with test.csv like 2058 2059 code,product,price,color 2060 1,pc,850,gray 2061 2,keyboard,12,white 2062 3,mouse,5,black 2063 2064the first example will return 2065 2066 { 1 => { 2067 code => 1, 2068 color => 'gray', 2069 price => 850, 2070 product => 'pc' 2071 }, 2072 2 => { 2073 code => 2, 2074 color => 'white', 2075 price => 12, 2076 product => 'keyboard' 2077 }, 2078 3 => { 2079 code => 3, 2080 color => 'black', 2081 price => 5, 2082 product => 'mouse' 2083 } 2084 } 2085 2086the second example will return 2087 2088 { "1:gray" => { 2089 code => 1, 2090 color => 'gray', 2091 price => 850, 2092 product => 'pc' 2093 }, 2094 "2:white" => { 2095 code => 2, 2096 color => 'white', 2097 price => 12, 2098 product => 'keyboard' 2099 }, 2100 "3:black" => { 2101 code => 3, 2102 color => 'black', 2103 price => 5, 2104 product => 'mouse' 2105 } 2106 } 2107 2108The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV> 2109date that has no header line, like 2110 2111 my $ref = csv ( 2112 in => "foo.csv", 2113 headers => [qw( c_foo foo bar description stock )], 2114 key => "c_foo", 2115 ); 2116 2117=head3 value 2118 2119Used to create key-value hashes. 2120 2121Only allowed when C<key> is valid. A C<value> can be either a single column 2122label or an anonymous list of column labels. In the first case, the value 2123will be a simple scalar value, in the latter case, it will be a hashref. 2124 2125 my $ref = csv (in => "test.csv", key => "code", 2126 value => "price"); 2127 my $ref = csv (in => "test.csv", key => "code", 2128 value => [ "product", "price" ]); 2129 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ], 2130 value => "price"); 2131 my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ], 2132 value => [ "product", "price" ]); 2133 2134with test.csv like 2135 2136 code,product,price,color 2137 1,pc,850,gray 2138 2,keyboard,12,white 2139 3,mouse,5,black 2140 2141the first example will return 2142 2143 { 1 => 850, 2144 2 => 12, 2145 3 => 5, 2146 } 2147 2148the second example will return 2149 2150 { 1 => { 2151 price => 850, 2152 product => 'pc' 2153 }, 2154 2 => { 2155 price => 12, 2156 product => 'keyboard' 2157 }, 2158 3 => { 2159 price => 5, 2160 product => 'mouse' 2161 } 2162 } 2163 2164the third example will return 2165 2166 { "1:gray" => 850, 2167 "2:white" => 12, 2168 "3:black" => 5, 2169 } 2170 2171the fourth example will return 2172 2173 { "1:gray" => { 2174 price => 850, 2175 product => 'pc' 2176 }, 2177 "2:white" => { 2178 price => 12, 2179 product => 'keyboard' 2180 }, 2181 "3:black" => { 2182 price => 5, 2183 product => 'mouse' 2184 } 2185 } 2186 2187=head3 keep_headers 2188 2189When using hashes, keep the column names into the arrayref passed, so all 2190headers are available after the call in the original order. 2191 2192 my $aoh = csv (in => "file.csv", keep_headers => \my @hdr); 2193 2194This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>. 2195 2196This attribute implies a default of C<auto> for the C<headers> attribute. 2197 2198=head3 fragment 2199 2200Only output the fragment as defined in the L</fragment> method. This option 2201is ignored when I<generating> C<CSV>. See L</out>. 2202 2203Combining all of them could give something like 2204 2205 use Text::CSV qw( csv ); 2206 my $aoh = csv ( 2207 in => "test.txt", 2208 encoding => "utf-8", 2209 headers => "auto", 2210 sep_char => "|", 2211 fragment => "row=3;6-9;15-*", 2212 ); 2213 say $aoh->[15]{Foo}; 2214 2215=head3 sep_set 2216 2217If C<sep_set> is set, the method L</header> is invoked on the opened stream 2218to detect and set L<C<sep_char>|/sep_char> with the given set. 2219 2220C<sep_set> can be abbreviated to C<seps>. 2221 2222Note that as the L</header> method is invoked, its default is to also set 2223the headers. 2224 2225=head3 set_column_names 2226 2227If C<set_column_names> is passed, the method L</header> is invoked on the 2228opened stream with all arguments meant for L</header>. 2229 2230If C<set_column_names> is passed as a false value, the content of the first 2231row is only preserved if the output is AoA: 2232 2233With an input-file like 2234 2235 bAr,foo 2236 1,2 2237 3,4,5 2238 2239This call 2240 2241 my $aoa = csv (in => $file, set_column_names => 0); 2242 2243will result in 2244 2245 [[ "bar", "foo" ], 2246 [ "1", "2" ], 2247 [ "3", "4", "5" ]] 2248 2249and 2250 2251 my $aoa = csv (in => $file, set_column_names => 0, munge => "none"); 2252 2253will result in 2254 2255 [[ "bAr", "foo" ], 2256 [ "1", "2" ], 2257 [ "3", "4", "5" ]] 2258 2259=head2 Callbacks 2260 2261Callbacks enable actions triggered from the I<inside> of Text::CSV. 2262 2263While most of what this enables can easily be done in an unrolled loop as 2264described in the L</SYNOPSIS> callbacks can be used to meet special demands 2265or enhance the L</csv> function. 2266 2267=over 2 2268 2269=item error 2270 2271 $csv->callbacks (error => sub { $csv->SetDiag (0) }); 2272 2273the C<error> callback is invoked when an error occurs, but I<only> when 2274L</auto_diag> is set to a true value. A callback is invoked with the values 2275returned by L</error_diag>: 2276 2277 my ($c, $s); 2278 2279 sub ignore3006 { 2280 my ($err, $msg, $pos, $recno, $fldno) = @_; 2281 if ($err == 3006) { 2282 # ignore this error 2283 ($c, $s) = (undef, undef); 2284 Text::CSV->SetDiag (0); 2285 } 2286 # Any other error 2287 return; 2288 } # ignore3006 2289 2290 $csv->callbacks (error => \&ignore3006); 2291 $csv->bind_columns (\$c, \$s); 2292 while ($csv->getline ($fh)) { 2293 # Error 3006 will not stop the loop 2294 } 2295 2296=item after_parse 2297 2298 $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" }); 2299 while (my $row = $csv->getline ($fh)) { 2300 $row->[-1] eq "NEW"; 2301 } 2302 2303This callback is invoked after parsing with L</getline> only if no error 2304occurred. The callback is invoked with two arguments: the current C<CSV> 2305parser object and an array reference to the fields parsed. 2306 2307The return code of the callback is ignored unless it is a reference to the 2308string "skip", in which case the record will be skipped in L</getline_all>. 2309 2310 sub add_from_db { 2311 my ($csv, $row) = @_; 2312 $sth->execute ($row->[4]); 2313 push @$row, $sth->fetchrow_array; 2314 } # add_from_db 2315 2316 my $aoa = csv (in => "file.csv", callbacks => { 2317 after_parse => \&add_from_db }); 2318 2319This hook can be used for validation: 2320 2321=over 2 2322 2323=item FAIL 2324 2325Die if any of the records does not validate a rule: 2326 2327 after_parse => sub { 2328 $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or 2329 die "5th field does not have a valid Dutch zipcode"; 2330 } 2331 2332=item DEFAULT 2333 2334Replace invalid fields with a default value: 2335 2336 after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 } 2337 2338=item SKIP 2339 2340Skip records that have invalid fields (only applies to L</getline_all>): 2341 2342 after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; } 2343 2344=back 2345 2346=item before_print 2347 2348 my $idx = 1; 2349 $csv->callbacks (before_print => sub { $_[1][0] = $idx++ }); 2350 $csv->print (*STDOUT, [ 0, $_ ]) for @members; 2351 2352This callback is invoked before printing with L</print> only if no error 2353occurred. The callback is invoked with two arguments: the current C<CSV> 2354parser object and an array reference to the fields passed. 2355 2356The return code of the callback is ignored. 2357 2358 sub max_4_fields { 2359 my ($csv, $row) = @_; 2360 @$row > 4 and splice @$row, 4; 2361 } # max_4_fields 2362 2363 csv (in => csv (in => "file.csv"), out => *STDOUT, 2364 callbacks => { before_print => \&max_4_fields }); 2365 2366This callback is not active for L</combine>. 2367 2368=back 2369 2370=head3 Callbacks for csv () 2371 2372The L</csv> allows for some callbacks that do not integrate in XS internals 2373but only feature the L</csv> function. 2374 2375 csv (in => "file.csv", 2376 callbacks => { 2377 filter => { 6 => sub { $_ > 15 } }, # first 2378 after_parse => sub { say "AFTER PARSE"; }, # first 2379 after_in => sub { say "AFTER IN"; }, # second 2380 on_in => sub { say "ON IN"; }, # third 2381 }, 2382 ); 2383 2384 csv (in => $aoh, 2385 out => "file.csv", 2386 callbacks => { 2387 on_in => sub { say "ON IN"; }, # first 2388 before_out => sub { say "BEFORE OUT"; }, # second 2389 before_print => sub { say "BEFORE PRINT"; }, # third 2390 }, 2391 ); 2392 2393=over 2 2394 2395=item filter 2396 2397This callback can be used to filter records. It is called just after a new 2398record has been scanned. The callback accepts a: 2399 2400=over 2 2401 2402=item hashref 2403 2404The keys are the index to the row (the field name or field number, 1-based) 2405and the values are subs to return a true or false value. 2406 2407 csv (in => "file.csv", filter => { 2408 3 => sub { m/a/ }, # third field should contain an "a" 2409 5 => sub { length > 4 }, # length of the 5th field minimal 5 2410 }); 2411 2412 csv (in => "file.csv", filter => { foo => sub { $_ > 4 }}); 2413 2414If the keys to the filter hash contain any character that is not a digit it 2415will also implicitly set L</headers> to C<"auto"> unless L</headers> was 2416already passed as argument. When headers are active, returning an array of 2417hashes, the filter is not applicable to the header itself. 2418 2419All sub results should match, as in AND. 2420 2421The context of the callback sets C<$_> localized to the field indicated by 2422the filter. The two arguments are as with all other callbacks, so the other 2423fields in the current row can be seen: 2424 2425 filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }} 2426 2427If the context is set to return a list of hashes (L</headers> is defined), 2428the current record will also be available in the localized C<%_>: 2429 2430 filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }} 2431 2432If the filter is used to I<alter> the content by changing C<$_>, make sure 2433that the sub returns true in order not to have that record skipped: 2434 2435 filter => { 2 => sub { $_ = uc }} 2436 2437will upper-case the second field, and then skip it if the resulting content 2438evaluates to false. To always accept, end with truth: 2439 2440 filter => { 2 => sub { $_ = uc; 1 }} 2441 2442=item coderef 2443 2444 csv (in => "file.csv", filter => sub { $n++; 0; }); 2445 2446If the argument to C<filter> is a coderef, it is an alias or shortcut to a 2447filter on column 0: 2448 2449 csv (filter => sub { $n++; 0 }); 2450 2451is equal to 2452 2453 csv (filter => { 0 => sub { $n++; 0 }); 2454 2455=item filter-name 2456 2457 csv (in => "file.csv", filter => "not_blank"); 2458 csv (in => "file.csv", filter => "not_empty"); 2459 csv (in => "file.csv", filter => "filled"); 2460 2461These are predefined filters 2462 2463Given a file like (line numbers prefixed for doc purpose only): 2464 2465 1:1,2,3 2466 2: 2467 3:, 2468 4:"" 2469 5:,, 2470 6:, , 2471 7:"", 2472 8:" " 2473 9:4,5,6 2474 2475=over 2 2476 2477=item not_blank 2478 2479Filter out the blank lines 2480 2481This filter is a shortcut for 2482 2483 filter => { 0 => sub { @{$_[1]} > 1 or 2484 defined $_[1][0] && $_[1][0] ne "" } } 2485 2486Due to the implementation, it is currently impossible to also filter lines 2487that consists only of a quoted empty field. These lines are also considered 2488blank lines. 2489 2490With the given example, lines 2 and 4 will be skipped. 2491 2492=item not_empty 2493 2494Filter out lines where all the fields are empty. 2495 2496This filter is a shortcut for 2497 2498 filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } } 2499 2500A space is not regarded being empty, so given the example data, lines 2, 3, 25014, 5, and 7 are skipped. 2502 2503=item filled 2504 2505Filter out lines that have no visible data 2506 2507This filter is a shortcut for 2508 2509 filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } } 2510 2511This filter rejects all lines that I<not> have at least one field that does 2512not evaluate to the empty string. 2513 2514With the given example data, this filter would skip lines 2 through 8. 2515 2516=back 2517 2518=back 2519 2520One could also use modules like L<Types::Standard>: 2521 2522 use Types::Standard -types; 2523 2524 my $type = Tuple[Str, Str, Int, Bool, Optional[Num]]; 2525 my $check = $type->compiled_check; 2526 2527 # filter with compiled check and warnings 2528 my $aoa = csv ( 2529 in => \$data, 2530 filter => { 2531 0 => sub { 2532 my $ok = $check->($_[1]) or 2533 warn $type->get_message ($_[1]), "\n"; 2534 return $ok; 2535 }, 2536 }, 2537 ); 2538 2539=item after_in 2540 2541This callback is invoked for each record after all records have been parsed 2542but before returning the reference to the caller. The hook is invoked with 2543two arguments: the current C<CSV> parser object and a reference to the 2544record. The reference can be a reference to a HASH or a reference to an 2545ARRAY as determined by the arguments. 2546 2547This callback can also be passed as an attribute without the C<callbacks> 2548wrapper. 2549 2550=item before_out 2551 2552This callback is invoked for each record before the record is printed. The 2553hook is invoked with two arguments: the current C<CSV> parser object and a 2554reference to the record. The reference can be a reference to a HASH or a 2555reference to an ARRAY as determined by the arguments. 2556 2557This callback can also be passed as an attribute without the C<callbacks> 2558wrapper. 2559 2560This callback makes the row available in C<%_> if the row is a hashref. In 2561this case C<%_> is writable and will change the original row. 2562 2563=item on_in 2564 2565This callback acts exactly as the L</after_in> or the L</before_out> hooks. 2566 2567This callback can also be passed as an attribute without the C<callbacks> 2568wrapper. 2569 2570This callback makes the row available in C<%_> if the row is a hashref. In 2571this case C<%_> is writable and will change the original row. So e.g. with 2572 2573 my $aoh = csv ( 2574 in => \"foo\n1\n2\n", 2575 headers => "auto", 2576 on_in => sub { $_{bar} = 2; }, 2577 ); 2578 2579C<$aoh> will be: 2580 2581 [ { foo => 1, 2582 bar => 2, 2583 } 2584 { foo => 2, 2585 bar => 2, 2586 } 2587 ] 2588 2589=item csv 2590 2591The I<function> L</csv> can also be called as a method or with an existing 2592Text::CSV object. This could help if the function is to be invoked a lot 2593of times and the overhead of creating the object internally over and over 2594again would be prevented by passing an existing instance. 2595 2596 my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); 2597 2598 my $aoa = $csv->csv (in => $fh); 2599 my $aoa = csv (in => $fh, csv => $csv); 2600 2601both act the same. Running this 20000 times on a 20 lines CSV file, showed 2602a 53% speedup. 2603 2604=back 2605 2606=head1 DIAGNOSTICS 2607 2608This section is also taken from Text::CSV_XS. 2609 2610Still under construction ... 2611 2612If an error occurs, C<< $csv->error_diag >> can be used to get information 2613on the cause of the failure. Note that for speed reasons the internal value 2614is never cleared on success, so using the value returned by L</error_diag> 2615in normal cases - when no error occurred - may cause unexpected results. 2616 2617If the constructor failed, the cause can be found using L</error_diag> as a 2618class method, like C<< Text::CSV->error_diag >>. 2619 2620The C<< $csv->error_diag >> method is automatically invoked upon error when 2621the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or 2622C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a 2623C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 - 2624EOF> is excluded from L<C<auto_diag>|/auto_diag> reports. 2625 2626Errors can be (individually) caught using the L</error> callback. 2627 2628The errors as described below are available. I have tried to make the error 2629itself explanatory enough, but more descriptions will be added. For most of 2630these errors, the first three capitals describe the error category: 2631 2632=over 2 2633 2634=item * 2635INI 2636 2637Initialization error or option conflict. 2638 2639=item * 2640ECR 2641 2642Carriage-Return related parse error. 2643 2644=item * 2645EOF 2646 2647End-Of-File related parse error. 2648 2649=item * 2650EIQ 2651 2652Parse error inside quotation. 2653 2654=item * 2655EIF 2656 2657Parse error inside field. 2658 2659=item * 2660ECB 2661 2662Combine error. 2663 2664=item * 2665EHR 2666 2667HashRef parse related error. 2668 2669=back 2670 2671And below should be the complete list of error codes that can be returned: 2672 2673=over 2 2674 2675=item * 26761001 "INI - sep_char is equal to quote_char or escape_char" 2677 2678The L<separation character|/sep_char> cannot be equal to L<the quotation 2679character|/quote_char> or to L<the escape character|/escape_char>, as this 2680would invalidate all parsing rules. 2681 2682=item * 26831002 "INI - allow_whitespace with escape_char or quote_char SP or TAB" 2684 2685Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either 2686L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to 2687C<SPACE> or C<TAB> is too ambiguous to allow. 2688 2689=item * 26901003 "INI - \r or \n in main attr not allowed" 2691 2692Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>, 2693L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not 2694allowed. 2695 2696=item * 26971004 "INI - callbacks should be undef or a hashref" 2698 2699The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or 2700a hash reference. 2701 2702=item * 27031005 "INI - EOL too long" 2704 2705The value passed for EOL is exceeding its maximum length (16). 2706 2707=item * 27081006 "INI - SEP too long" 2709 2710The value passed for SEP is exceeding its maximum length (16). 2711 2712=item * 27131007 "INI - QUOTE too long" 2714 2715The value passed for QUOTE is exceeding its maximum length (16). 2716 2717=item * 27181008 "INI - SEP undefined" 2719 2720The value passed for SEP should be defined and not empty. 2721 2722=item * 27231010 "INI - the header is empty" 2724 2725The header line parsed in the L</header> is empty. 2726 2727=item * 27281011 "INI - the header contains more than one valid separator" 2729 2730The header line parsed in the L</header> contains more than one (unique) 2731separator character out of the allowed set of separators. 2732 2733=item * 27341012 "INI - the header contains an empty field" 2735 2736The header line parsed in the L</header> contains an empty field. 2737 2738=item * 27391013 "INI - the header contains nun-unique fields" 2740 2741The header line parsed in the L</header> contains at least two identical 2742fields. 2743 2744=item * 27451014 "INI - header called on undefined stream" 2746 2747The header line cannot be parsed from an undefined source. 2748 2749=item * 27501500 "PRM - Invalid/unsupported argument(s)" 2751 2752Function or method called with invalid argument(s) or parameter(s). 2753 2754=item * 27551501 "PRM - The key attribute is passed as an unsupported type" 2756 2757The C<key> attribute is of an unsupported type. 2758 2759=item * 27601502 "PRM - The value attribute is passed without the key attribute" 2761 2762The C<value> attribute is only allowed when a valid key is given. 2763 2764=item * 27651503 "PRM - The value attribute is passed as an unsupported type" 2766 2767The C<value> attribute is of an unsupported type. 2768 2769=item * 27702010 "ECR - QUO char inside quotes followed by CR not part of EOL" 2771 2772When L<C<eol>|/eol> has been set to anything but the default, like 2773C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing) 2774L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do 2775not make up the L<C<eol>|/eol> sequence, this is an error. 2776 2777=item * 27782011 "ECR - Characters after end of quoted field" 2779 2780Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted 2781field and after the closing double-quote, there should be either a new-line 2782sequence or a separation character. 2783 2784=item * 27852012 "EOF - End of data in parsing input stream" 2786 2787Self-explaining. End-of-file while inside parsing a stream. Can happen only 2788when reading from streams with L</getline>, as using L</parse> is done on 2789strings that are not required to have a trailing L<C<eol>|/eol>. 2790 2791=item * 27922013 "INI - Specification error for fragments RFC7111" 2793 2794Invalid specification for URI L</fragment> specification. 2795 2796=item * 27972014 "ENF - Inconsistent number of fields" 2798 2799Inconsistent number of fields under strict parsing. 2800 2801=item * 28022021 "EIQ - NL char inside quotes, binary off" 2803 2804Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option 2805has been selected with the constructor. 2806 2807=item * 28082022 "EIQ - CR char inside quotes, binary off" 2809 2810Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option 2811has been selected with the constructor. 2812 2813=item * 28142023 "EIQ - QUO character not allowed" 2815 2816Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n> 2817will cause this error. 2818 2819=item * 28202024 "EIQ - EOF cannot be escaped, not even inside quotes" 2821 2822The escape character is not allowed as last character in an input stream. 2823 2824=item * 28252025 "EIQ - Loose unescaped escape" 2826 2827An escape character should escape only characters that need escaping. 2828 2829Allowing the escape for other characters is possible with the attribute 2830L</allow_loose_escapes>. 2831 2832=item * 28332026 "EIQ - Binary character inside quoted field, binary off" 2834 2835Binary characters are not allowed by default. Exceptions are fields that 2836contain valid UTF-8, that will automatically be upgraded if the content is 2837valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data. 2838 2839=item * 28402027 "EIQ - Quoted field not terminated" 2841 2842When parsing a field that started with a quotation character, the field is 2843expected to be closed with a quotation character. When the parsed line is 2844exhausted before the quote is found, that field is not terminated. 2845 2846=item * 28472030 "EIF - NL char inside unquoted verbatim, binary off" 2848 2849=item * 28502031 "EIF - CR char is first char of field, not part of EOL" 2851 2852=item * 28532032 "EIF - CR char inside unquoted, not part of EOL" 2854 2855=item * 28562034 "EIF - Loose unescaped quote" 2857 2858=item * 28592035 "EIF - Escaped EOF in unquoted field" 2860 2861=item * 28622036 "EIF - ESC error" 2863 2864=item * 28652037 "EIF - Binary character in unquoted field, binary off" 2866 2867=item * 28682110 "ECB - Binary character in Combine, binary off" 2869 2870=item * 28712200 "EIO - print to IO failed. See errno" 2872 2873=item * 28743001 "EHR - Unsupported syntax for column_names ()" 2875 2876=item * 28773002 "EHR - getline_hr () called before column_names ()" 2878 2879=item * 28803003 "EHR - bind_columns () and column_names () fields count mismatch" 2881 2882=item * 28833004 "EHR - bind_columns () only accepts refs to scalars" 2884 2885=item * 28863006 "EHR - bind_columns () did not pass enough refs for parsed fields" 2887 2888=item * 28893007 "EHR - bind_columns needs refs to writable scalars" 2890 2891=item * 28923008 "EHR - unexpected error in bound fields" 2893 2894=item * 28953009 "EHR - print_hr () called before column_names ()" 2896 2897=item * 28983010 "EHR - print_hr () called with invalid arguments" 2899 2900=back 2901 2902=head1 SEE ALSO 2903 2904L<Text::CSV_PP>, L<Text::CSV_XS> and L<Text::CSV::Encoded>. 2905 2906 2907=head1 AUTHORS and MAINTAINERS 2908 2909Alan Citterman F<E<lt>alan[at]mfgrtl.comE<gt>> wrote the original Perl 2910module. Please don't send mail concerning Text::CSV to Alan, as 2911he's not a present maintainer. 2912 2913Jochen Wiedmann F<E<lt>joe[at]ispsoft.deE<gt>> rewrote the encoding and 2914decoding in C by implementing a simple finite-state machine and added 2915the variable quote, escape and separator characters, the binary mode 2916and the print and getline methods. See ChangeLog releases 0.10 through 29170.23. 2918 2919H.Merijn Brand F<E<lt>h.m.brand[at]xs4all.nlE<gt>> cleaned up the code, 2920added the field flags methods, wrote the major part of the test suite, 2921completed the documentation, fixed some RT bugs. See ChangeLog releases 29220.25 and on. 2923 2924Makamaka Hannyaharamitu, E<lt>makamaka[at]cpan.orgE<gt> wrote Text::CSV_PP 2925which is the pure-Perl version of Text::CSV_XS. 2926 2927New Text::CSV (since 0.99) is maintained by Makamaka, and Kenichi Ishigaki 2928since 1.91. 2929 2930 2931=head1 COPYRIGHT AND LICENSE 2932 2933Text::CSV 2934 2935Copyright (C) 1997 Alan Citterman. All rights reserved. 2936Copyright (C) 2007-2015 Makamaka Hannyaharamitu. 2937Copyright (C) 2017- Kenichi Ishigaki 2938A large portion of the doc is taken from Text::CSV_XS. See below. 2939 2940Text::CSV_PP: 2941 2942Copyright (C) 2005-2015 Makamaka Hannyaharamitu. 2943Copyright (C) 2017- Kenichi Ishigaki 2944A large portion of the code/doc are also taken from Text::CSV_XS. See below. 2945 2946Text:CSV_XS: 2947 2948Copyright (C) 2007-2016 H.Merijn Brand for PROCURA B.V. 2949Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved. 2950Portions Copyright (C) 1997 Alan Citterman. All rights reserved. 2951 2952 2953This library is free software; you can redistribute it and/or modify 2954it under the same terms as Perl itself. 2955 2956=cut 2957