1package Text::xSV;
2$VERSION = 0.21;
3use strict;
4use Carp;
5
6sub alias {
7  my ($self, $from, $to) = @_;
8  my $field_pos = $self->{field_pos}
9    or return $self->error_handler(
10      "Can't call alias before headers are bound");
11  unless (exists $field_pos->{$from}) {
12    return $self->error_handler("'$from' is not available to alias");
13  }
14  $field_pos->{$to} = $field_pos->{$from};
15}
16
17sub add_compute {
18  my ($self, $name, $compute) = @_;
19  my $field_pos = $self->{field_pos}
20    or return $self->error_handler(
21      "Can't call add_compute before headers are bound");
22  unless (UNIVERSAL::isa($compute, "CODE")) {
23    return $self->error_handler(
24      'Usage: $csv->add_compute("name", sub {FUNCTION});');
25  }
26  $field_pos->{$name} = $compute;
27}
28
29sub bind_fields {
30  my $self = shift;
31  my %field_pos;
32  foreach my $i (0..$#_) {
33    $field_pos{$_[$i]} = $i;
34  }
35  $self->{field_pos} = \%field_pos;
36}
37
38sub bind_header {
39  my $self = shift;
40  $self->bind_fields($self->get_row());
41}
42
43*read_headers = \&bind_header;
44*read_header = \&bind_header;
45
46sub delete {
47  my $self = shift;
48  my $field_pos = $self->{field_pos}
49    or return $self->error_handler(
50      "Can't call delete before headers are bound");
51  foreach my $field (@_) {
52    if (exists $field_pos->{$field}) {
53      delete $field_pos->{$field};
54    }
55    else {
56      $self->error_handler(
57        "Cannot delete field '$field': it doesn't exist");
58    }
59  }
60}
61
62sub error_handler {
63  my $self = shift;
64  $self->{error_handler}->(@_);
65}
66
67sub extract {
68  my $self = shift;
69  my $cached_results = $self->{cached} ||= {};
70  my $in_compute = $self->{in_compute} ||= {};
71  my $row = $self->{row} or return $self->error_handler(
72    "No row found (did you call get_row())?");
73  my $lookup = $self->{field_pos}
74    or return $self->error_handler(
75      "Can't find field info (did you bind_fields or read_header?)");
76  my @data;
77  foreach my $field (@_) {
78    if (exists $lookup->{$field}) {
79      my $position_or_compute = $lookup->{$field};
80      if (not ref($position_or_compute)) {
81        push @data, $row->[$position_or_compute];
82      }
83      elsif (exists $cached_results->{$field}) {
84        push @data, $cached_results->{$field};
85      }
86      elsif ($in_compute->{$field}) {
87        $self->error_handler(
88          "Infinite recursion detected in computing '$field'");
89      }
90      else {
91        # Have to do compute
92        $in_compute->{$field} = 1;
93        $cached_results->{$field} = $position_or_compute->($self);
94        push @data, $cached_results->{$field};
95      }
96    }
97    else {
98      my @allowed = sort keys %$lookup;
99      $self->error_handler(
100        "Invalid field $field for file '$self->{filename}'.\n" .
101        "Valid fields are: (@allowed)\n"
102      );
103    }
104  }
105  return wantarray ? @data : \@data;
106}
107
108sub extract_hash {
109  my $self = shift;
110  my @fields = @_ ? @_ : $self->get_fields();
111  my %hash;
112  @hash{@fields} = $self->extract(@fields);
113  wantarray ? %hash : \%hash;
114}
115
116sub fetchrow_hash {
117  my $self = shift;
118  return unless $self->get_row();
119  $self->extract_hash(@_);
120}
121
122sub format_data {
123  my $self = shift;
124  my %data = @_;
125  my @row;
126  my $field_pos = $self->{field_pos} or $self->error_handler(
127    "Can't find field info (did you bind_fields or read_header?)"
128  );
129  while (my ($field, $value) = each %data) {
130    my $pos = $field_pos->{$field};
131    if (defined($pos)) {
132      $row[$pos] = $value;
133    }
134    else {
135      $self->warning_handler("Ignoring unknown field '$field'");
136    }
137  }
138  $self->{row} = \@row;
139  my $header = $self->{header}
140    or $self->error_handler("Cannot format_data when no header is set");
141  $self->format_row( $self->extract( @$header ));
142}
143
144sub format_header {
145  my $self = shift;
146  if ($self->{header}) {
147    return $self->format_row(@{$self->{header}});
148  }
149  else {
150    $self->error_handler("Cannot format_header when no header is set");
151  }
152}
153
154*format_headers = \&format_header;
155
156sub format_row {
157  my $self = shift;
158
159  $self->{row_num}++;
160
161  if ($self->{row_size_warning}) {
162    if (not exists $self->{row_size}) {
163      $self->{row_size} = @_;
164    }
165    elsif ( @_ != $self->{row_size}) {
166      my $count = @_;
167      $self->warning_handler(
168        "Formatting $count fields at row $self->{row_num}, "
169        . "expected $self->{row_size}"
170      );
171    }
172  }
173
174  my $sep = $self->{sep};
175  my @row;
176  foreach my $value (@_) {
177    if (not defined($value)) {
178      # Empty fields are undef
179      push @row, $self->{quote_all} ? qq("") : "";
180    }
181    elsif ("" eq $value) {
182      # The empty string has to be quoted unless dont_quote is set
183      push @row, $self->{dont_quote} ? "" : qq{""};
184    }
185    elsif ($value =~ /\s|\Q$sep\E|"/) {
186      # quote it
187      local $_ = $value;
188      s/"/""/g;
189      # If dont_quote is set, just output the data element,
190      # otherwise follow the 'proper' CSV quoted format (that breaks
191      # MS SQL Server's bulk insert on date values)
192      push @row, $self->{dont_quote} ? $_ : qq{"$_"};
193    }
194    else {
195      # Unquoted is fine (that is, unless the quote_all option is set)
196      push @row, $self->{quote_all} ? qq("$value") : $value;
197    }
198  }
199  my $row = join $sep, @row;
200  return $row . "\n";
201}
202
203sub get_fields {
204  my $self = shift;
205  my $field_pos = $self->{field_pos}
206    or return $self->error_handler(
207      "Can't call get_fields before headers are bound");
208  return keys %$field_pos;
209}
210
211# Private block for shared variables in a small "parse engine".
212# The concept here is to use pos to step through a string.
213# This is the real engine, all else is syntactic sugar.
214{
215  my ($self, $fh, $line, $is_error);
216
217  sub get_row {
218    $self = shift;
219    $is_error = 0;
220    delete $self->{row};
221    delete $self->{cached};
222    delete $self->{in_compute};
223    $fh = ($self->{fh}
224      ||= $self->{filename}
225        ? $self->open_file($self->{filename}, "<")
226        : ($self->{filename} = "ARGV", \*ARGV)
227        # Sorry for the above convoluted way to sneak in defining filename.
228    );
229    return unless $fh;
230    defined($line = <$fh>) or return;
231    if ($self->{filter}) {
232      $line = $self->{filter}->($line);
233    }
234    chomp($line);
235    my @row = _get_row();
236    if ($is_error) {
237      return @row[0..$#row];
238    }
239    if (not exists $self->{row_size}) {
240      $self->{row_size} = @row;
241    }
242    elsif (not $self->{row_size_warning}) {
243      # The user asked not to get this warning, so don't issue it.
244    }
245    elsif ($self->{row_size} != @row) {
246      my $new = @row;
247      my $where = "Line $., file $self->{filename}";
248      $self->warning_handler(
249        "$where had $new fields, expected $self->{row_size}" );
250    }
251    $self->{row} = \@row;
252    return wantarray ? @row : [@row];
253  }
254
255  sub _get_row {
256    my @row;
257    my $q_sep = quotemeta($self->{sep});
258    my $match_sep = qr/\G$q_sep/;
259    my $start_field = qr/\G(")/;
260    my $start_field_ms = qr/\G([^"$q_sep]*)/;
261
262    # This loop is the heart of the engine
263    while ($line =~ /$start_field/gc or $line =~ /$start_field_ms/gc ) {
264      if ($1 eq '"') {
265        push @row, _get_quoted();
266      }
267      else {
268        # Needed for Microsoft compatibility
269        push @row, length($1) ? $1 : undef;
270      }
271      my $pos = pos($line);
272      if ($line !~ /$match_sep/g) {
273        if ($pos == length($line)) {
274          return @row;
275        }
276        elsif ($self->{strict}) {
277          my $expected = "Expected '$self->{sep}'";
278          $is_error = 1;
279          return $self->error_handler(
280            "$expected at $self->{filename}, line $., char $pos");
281        }
282        else {
283          TRY: {
284            my $expected = "Expected '$self->{sep}'";
285            $self->warning_handler(
286              "$expected at $self->{filename}, line $., char $pos");
287
288            # Assume we are in non-strict mode and encountered a single "
289            # so we need to recover and finish my quoted field.
290            $row[-1] .= '"' . _get_quoted();
291            $pos = pos($line);
292            if ($line !~ /$match_sep/g) {
293              if ($pos == length($line)) {
294                return @row;
295              }
296              else {
297                redo TRY;
298              }
299            }
300          }
301        }
302      }
303    }
304    $is_error = 1;
305    $self->error_handler(
306      "I have no idea how parsing $self->{filename} left me here!");
307  }
308
309  sub _get_quoted {
310    my $piece = "";
311    my $start_line = $.;
312    my $start_pos = pos($line);
313
314    while(1) {
315      if ($line =~ /\G([^"]+)/gc) {
316        # sequence of non-quote characters
317        $piece .= $1;
318      } elsif ($line =~ /\G""/gc) {
319        # replace "" with "
320        $piece .= '"';
321      } elsif ($line =~ /\G"/g) {
322        # closing quote
323        return $piece;  # EXIT HERE
324      }
325      else {
326        # Must be at end of line
327        $piece .= $/;
328        unless(defined($line = <$fh>)) {
329          croak(
330            "File $self->{filename} ended inside a quoted field\n"
331              . "Field started at char $start_pos, line $start_line\n"
332          );
333        }
334        if ($self->{filter}) {
335          $line = $self->{filter}->($line);
336        }
337        chomp($line);
338      }
339    }
340    $is_error = 1;
341    $self->error_handler(
342      "I have no idea how parsing $self->{filename} left me here!");
343  }
344}
345
346my @normal_accessors = qw(
347  close_fh error_handler warning_handler filename filter fh
348  row_size row_size_warning strict
349);
350foreach my $accessor (@normal_accessors) {
351  no strict 'refs';
352  *{"set_$accessor"} = sub {
353    $_[0]->{$accessor} = $_[1];
354  };
355}
356
357# These two are mutually exclusive
358foreach my $accessor (qw(dont_quote quote_all)) {
359  no strict 'refs';
360  *{"set_$accessor"} = sub {
361    my $self = shift;
362    $self->{$accessor} = shift;
363    if ($self->{dont_quote} and $self->{quote_all}) {
364      $self->error_handler("Can't set both dont_quote and quote_all");
365    }
366  };
367}
368
369sub new {
370  my $self = bless ({}, shift);
371  my %allowed = map {
372                  $_=>1
373                } @normal_accessors, qw(
374                  header headers row sep dont_quote quote_all
375                );
376
377  my %args = (
378    error_handler => \&confess,
379    filter => sub {my $line = shift; $line =~ s/\r$//; $line;},
380    sep => ",",
381    row_size_warning => 1,
382    close_fh => 0,
383    strict => 1,
384    @_
385  );
386  # Note, must set error_handler and warning_handler first because they
387  # might get called while processing the other args.
388  foreach my $arg ('error_handler', 'warning_handler', keys %args) {
389    unless (exists $allowed{$arg}) {
390      my @allowed = sort keys %allowed;
391      croak("Invalid argument '$arg', allowed args: (@allowed)");
392    }
393    my $method = "set_$arg";
394    $self->$method($args{$arg});
395  }
396  return $self;
397}
398
399# Note the undocumented third argument for the mode.  Most of the time this
400# will do what is wanted without requiring Perl 5.6 or better.  Users who
401# supply their own metacharacters will also not be surprised at the result.
402# Note the return of 0.  I cannot assume that the user's error handler dies...
403sub open_file {
404  my $self = shift;
405  my $file = $self->{filename} = shift || return $self->error_handler(
406    "No filename specified at open_file"
407  );
408  if ($file !~ /\||<|>/ and @_) {
409    my $mode = shift;
410    $file = "$mode $file";
411  }
412  my $fh = do {local *FH}; # Old trick, not needed in 5.6
413  unless (open ($fh, $file)) {
414    $self->error_handler("Cannot open '$file': $!");
415    return 0;
416  }
417  $self->{close_fh} = 1;
418  $self->{fh} = $fh;
419}
420
421sub print {
422  my $self = shift;
423  $self->{row_out}++;
424  my $fh = ($self->{fh}
425      ||= $self->{filename}
426        ? $self->open_file($self->{filename}, ">")
427        : ($self->{filename} = "STDOUT", \*STDOUT)
428        # Sorry for the above convoluted way to sneak in defining filename.
429      );
430  return unless $fh;
431  print $fh @_ or $self->error_handler( "Print #$self->{row_out}: $!" );
432}
433
434sub print_data {
435  my $self = shift;
436  $self->print($self->format_data(@_));
437}
438
439sub print_header {
440  my $self = shift;
441  $self->print($self->format_header(@_));
442}
443
444*print_headers = \&print_header;
445
446sub print_row {
447  my $self = shift;
448  $self->print($self->format_row(@_));
449}
450
451sub set_header {
452  my $self = shift;
453  if (1 == @_ and UNIVERSAL::isa($_[0], 'ARRAY')) {
454    $self->{header} = $_[0];
455  }
456  else {
457    $self->{header} = \@_;
458  }
459  if (not exists $self->{field_pos}) {
460    $self->bind_fields(@{$self->{header}});
461  }
462}
463
464*set_headers = \&set_header;
465
466sub set_sep {
467  my $self = shift;
468  my $sep = shift;
469  # The reason for this limitation is so that $start_field in _get_row
470  # will do what it is supposed to.  (I should use a negative lookahead,
471  # but I'm documenting this late at night and want some sleep.)
472  if (1 == length($sep)) {
473    $self->{sep} = $sep;
474  }
475  else {
476    $self->error_handler("The separator '$sep' is not of length 1");
477  }
478}
479
480sub warning_handler {
481  my $self = shift;
482  if ($self->{warning_handler}) {
483    $self->{warning_handler}->(@_);
484  }
485  else {
486    eval { $self->{error_handler}->(@_) };
487    warn $@ if $@;
488  }
489}
490
491sub DESTROY {
492  my $self = shift;
493  if ($self->{close_fh}) {
494    close($self->{fh}) or $self->error_handler(
495      $! ? "Cannot close '$self->{filename}': $!"
496         : "Exit status $? closing '$self->{filename}'"
497    );
498  }
499}
500
5011;
502
503__END__
504
505=head1 NAME
506
507Text::xSV - read character separated files
508
509=head1 SYNOPSIS
510
511  use Text::xSV;
512  my $csv = new Text::xSV;
513  $csv->open_file("foo.csv");
514  $csv->read_header();
515  # Make the headers case insensitive
516  foreach my $field ($csv->get_fields) {
517    if (lc($field) ne $field) {
518      $csv->alias($field, lc($field));
519    }
520  }
521
522  $csv->add_compute("message", sub {
523    my $csv = shift;
524    my ($name, $age) = $csv->extract(qw(name age));
525    return "$name is $age years old\n";
526  });
527
528  while ($csv->get_row()) {
529    my ($name, $age) = $csv->extract(qw(name age));
530    print "$name is $age years old\n";
531    # Same as
532    #   print $csv->extract("message");
533  }
534
535  # The file above could have been created with:
536  my $csv = Text::xSV->new(
537    filename => "foo.csv",
538    header   => ["Name", "Age", "Sex"],
539  );
540  $csv->print_header();
541  $csv->print_row("Ben Tilly", 34, "M");
542  # Same thing.
543  $csv->print_data(
544    Age  => 34,
545    Name => "Ben Tilly",
546    Sex  => "M",
547  );
548
549=head1 DESCRIPTION
550
551This module is for reading and writing a common variation of character
552separated data.  The most common example is comma-separated.  However
553that is far from the only possibility, the same basic format is
554exported by Microsoft products using tabs, colons, or other characters.
555
556The format is a series of rows separated by returns.  Within each row
557you have a series of fields separated by your character separator.
558Fields may either be unquoted, in which case they do not contain a
559double-quote, separator, or return, or they are quoted, in which case
560they may contain anything, and will encode double-quotes by pairing
561them.  In Microsoft products, quoted fields are strings and unquoted
562fields can be interpreted as being of various datatypes based on a
563set of heuristics.  By and large this fact is irrelevant in Perl
564because Perl is largely untyped.  The one exception that this module
565handles that empty unquoted fields are treated as nulls which are
566represented in Perl as undefined values.  If you want a zero-length
567string, quote it.
568
569People usually naively solve this with split.  A next step up is to
570read a line and parse it.  Unfortunately this choice of interface
571(which is made by Text::CSV on CPAN) makes it difficult to handle
572returns embedded in a field.  (Earlier versions of this document
573claimed impossible.  That is false.  But the calling code has to
574supply the logic to add lines until you have a valid row.  To the
575extent that you don't do this consistently, your code will be buggy.)
576Therefore you it is good for the parsing logic to have access to the
577whole file.
578
579This module solves the problem by creating a xSV object with access to
580the filehandle, if in parsing it notices that a new line is needed, it
581can read at will.
582
583=head1 USAGE
584
585First you set up and initialize an object, then you read the xSV file
586through it.  The creation can also do multiple initializations as
587well.  Here are the available methods
588
589=over 4
590
591=item C<new>
592
593This is the constructor.  It takes a hash of optional arguments.
594They correspond to the following set_* methods without the set_ prefix.
595For instance if you pass filename=>... in, then set_filename will be
596called.
597
598=over 8
599
600=item C<set_sep>
601
602Sets the one character separator that divides fields.  Defaults to a
603comma.
604
605=item C<set_filename>
606
607The filename of the xSV file that you are reading.  Used heavily in
608error reporting.  If fh is not set and filename is, then fh will be
609set to the result of calling open on filename.
610
611=item C<set_fh>
612
613Sets the fh that this Text::xSV object will read from or write to.  If it
614is not set, it will be set to the result of opening filename if that
615is set, otherwise it will default to ARGV (ie acts like <>) or STDOUT,
616depending on whether you first try to read or write.  The old default
617used to be STDIN.
618
619=item C<set_header>
620
621Sets the internal header array of fields that is referred to in
622arranging data on the *_data output methods.  If C<bind_fields> has
623not been called, also calls that on the assumption that the fields
624that you want to output matches the fields that you will provide.
625
626The return from this function is inconsistent and should not be
627relied on to be anything useful.
628
629=item C<set_headers>
630
631An alias to C<set_header>.
632
633=item C<set_error_handler>
634
635The error handler is an anonymous function which is expected to
636take an error message and do something useful with it.  The
637default error handler is Carp::confess.  Error handlers that do
638not trip exceptions (eg with die) are less tested and may not work
639perfectly in all circumstances.
640
641=item C<set_warning_handler>
642
643The warning handler is an anonymous function which is expected to
644take a warning and do something useful with it.  If no warning
645handler is supplied, the error handler is wrapped with C<eval>
646and the trapped error is warned.
647
648=item C<set_filter>
649
650The filter is an anonymous function which is expected to
651accept a line of input, and return a filtered line of output.  The
652default filter removes \r so that Windows files can be read under
653Unix.  This could also be used to, eg, strip out Microsoft smart
654quotes.
655
656=item C<set_quote_qll>
657
658The quote_all option simply puts every output field into
659double quotation marks.  This can't be set if C<dont_quote> is.
660
661=item C<set_dont_quote>
662
663The dont_quote option turns off the otherwise mandatory quotation marks
664that bracket the data fields when there are separator characters, spaces
665or other non-printable characters in the data field.  This is perhaps a
666bit antithetical to the idea of safely enclosing data fields in
667quotation marks, but some applications, for instance Microsoft SQL
668Server's BULK INSERT, can't handle them.  This can't be set if
669C<quote_all> is.
670
671=item C<set_row_size>
672
673The number of elements that you expect to see in each row.  It
674defaults to the size of the first row read or set.  If
675row_size_warning is true and the size of the row read or formatted
676does not match, then a warning is issued.
677
678=item C<set_row_size_warning>
679
680Determines whether or not to issue warnings when the row read or set
681has a number of fields different than the expected number.  Defaults
682to true.  Whether or not this is on, missing fields are always read
683as undef, and extra fields are ignored.
684
685=item C<set_close_fh>
686
687Whether or not to close fh when the object is DESTROYed.  Defaults
688to false if fh was passed in, or true if the object has to open its
689own fh.  (This may be removed in a future version.)
690
691=item C<set_strict>
692
693In strict mode a single " within a quoted field is an error.  In
694non-strict mode it is a warning.  The default is strict.
695
696=back
697
698=item C<open_file>
699
700Takes the name of a file, opens it, then sets the filename and fh.
701
702=item C<bind_fields>
703
704Takes an array of fieldnames, memorizes the field positions for later
705use.  C<read_header> is preferred.
706
707=item C<read_header>
708
709Reads a row from the file as a header line and memorizes the positions
710of the fields for later use.  File formats that carry field information
711tend to be far more robust than ones which do not, so this is the
712preferred function.
713
714=item C<read_headers>
715
716An alias for C<read_header>.  (If I'm going to keep on typing the plural,
717I'll just make it work...)
718
719=item C<bind_header>
720
721Another alias for C<read_header> maintained for backwards compatibility.
722Deprecated because the name doesn't distinguish it well enough from the
723unrelated C<set_header>.
724
725=item C<get_row>
726
727Reads a row from the file.  Returns an array or reference to an array
728depending on context.  Will also store the row in the row property for
729later access.
730
731=item C<extract>
732
733Extracts a list of fields out of the last row read.  In list context
734returns the list, in scalar context returns an anonymous array.
735
736=item C<extract_hash>
737
738Extracts fields into a hash.  If a list of fields is passed, that is
739the list of fields that go into the hash.  If no list, it extracts all
740fields that it knows about.  In list context returns the hash.  In
741scalar context returns a reference to the hash.
742
743=item C<fetchrow_hash>
744
745Combines C<get_row> and C<extract_hash> to fetch the next row and return a
746hash or hashref depending on context.
747
748=item C<alias>
749
750Makes an existing field available under a new name.
751
752  $csv->alias($old_name, $new_name);
753
754=item C<get_fields>
755
756Returns a list of all known fields in no particular order.
757
758=item C<add_compute>
759
760Adds an arbitrary compute.  A compute is an arbitrary anonymous
761function.  When the computed field is extracted, Text::xSV will call
762the compute in scalar context with the Text::xSV object as the only
763argument.
764
765Text::xSV caches results in case computes call other computes.  It
766will also catch infinite recursion with a hopefully useful message.
767
768=item C<format_row>
769
770Takes a list of fields, and returns them quoted as necessary, joined with
771sep, with a newline at the end.
772
773=item C<format_header>
774
775Returns the formatted header row based on what was submitted with
776C<set_header>.  Will cause an error if C<set_header> was not called.
777
778=item C<format_headers>
779
780Continuing the meme, an alias for format_header.
781
782=item C<format_data>
783
784Takes a hash of data.  Sets internal data, and then formats
785the result of C<extract>ing out the fields corresponding to the
786headers.  Note that if you called C<bind_fields> and then defined
787some more fields with C<add_compute>, computes would be done for you
788on the fly.
789
790=item C<print>
791
792Prints the arguments directly to fh.  If fh is not supplied but filename
793is, first sets fh to the result of opening filename.  Otherwise it
794defaults fh to STDOUT.  You probably don't want to use this directly.
795Instead use one of the other print methods.
796
797=item C<print_row>
798
799Does a C<print> of C<format_row>.  Convenient when you wish to maintain
800your knowledge of the field order.
801
802=item C<print_header>
803
804Does a C<print> of C<format_header>.  Makes sense when you will be
805using print_data for your actual data because the field order is
806guaranteed to match up.
807
808=item C<print_headers>
809
810An alias to C<print_header>.
811
812=item C<print_data>
813
814Does a C<print> of C<format_data>.  Relieves you from having to
815synchronize field order in your code.
816
817=back
818
819=head1 TODO
820
821Add utility interfaces.  (Suggested by Ken Clark.)
822
823Offer an option for working around the broken tab-delimited output
824that some versions of Excel present for cut-and-paste.
825
826Add tests for the output half of the module.
827
828=head1 BUGS
829
830When I say single character separator, I mean it.
831
832Performance could be better.  That is largely because the API was
833chosen for simplicity of a "proof of concept", rather than for
834performance.  One idea to speed it up you would be to provide an
835API where you bind the requested fields once and then fetch many
836times rather than binding the request for every row.
837
838Also note that should you ever play around with the special variables
839$`, $&, or $', you will find that it can get much, much slower.  The
840cause of this problem is that Perl only calculates those if it has
841ever seen one of those.  This does many, many matches and calculating
842those is slow.
843
844I need to find out what conversions are done by Microsoft products
845that Perl won't do on the fly upon trying to use the values.
846
847=head1 ACKNOWLEDGEMENTS
848
849My thanks to people who have given me feedback on how they would like
850to use this module, and particularly to Klaus Weidner for his patch
851fixing a nasty segmentation fault from a stack overflow in the regular
852expression engine on large fields.
853
854Rob Kinyon (dragonchild) motivated me to do the writing interface, and
855gave me useful feedback on what it should look like.  I'm not sure that
856he likes the result, but it is how I understood what he said...
857
858Jess Robinson (castaway) convinced me that ARGV was a better default
859input handle than STDIN.  I hope that switching that default doesn't
860inconvenience anyone.
861
862Gyepi SAM noticed that fetchrow_hash complained about missing data at
863the end of the loop and sent a patch.  Applied.
864
865shotgunefx noticed that bind_header changed its return between versions.
866It is actually worse than that, it changes its return if you call it
867twice.  Documented that its return should not be relied upon.
868
869Fred Steinberg found that writes did not happen promptly upon closing
870the object.  This turned out to be a self-reference causing a DESTROY
871bug.  I fixed it.
872
873Carey Drake and Steve Caldwell noticed that the default
874warning_handler expected different arguments than it got.  Both
875suggested the same fix that I implemented.
876
877Geoff Gariepy suggested adding dont_quote and quote_all.  Then found a
878silly bug in my first implementation.
879
880Ryan Martin improved read performance over 75% with a small patch.
881
882Bauernhaus Panoramablick and Geoff Gariepy convinced me to add the
883ability to get non-strict mode.
884
885=head1 AUTHOR AND COPYRIGHT
886
887Ben Tilly (btilly@gmail.com).  Originally posted at
888http://www.perlmonks.org/node_id=65094.
889
890Copyright 2001-2009.  This may be modified and distributed on the same
891terms as Perl.
892