1=head1 NAME
2
3Text::Delimited - Module for parsing delimited text files
4
5=head1 SYNOPSIS
6
7Text::Delimited provides a programattical interface to data stored in
8delimited text files. It is dependant upon the first row of the text
9file containing header information for each corresponding "column" in the
10remainder of the file.
11
12After instancing, for each call to Read the next row's data is returned as
13a hash reference. The individual elements are keyed by their corresonding
14column headings.
15
16=head1 USAGE
17
18A short example of usage is detailed below. It opens a pipe delimited file
19called 'infile.txt', reads through every row and prints out the data from
20"COLUMN1" in that row. It then closes the file.
21
22  my $file = new Text::Delimited;
23  $file->delimiter('|');
24  $file->open('infile.txt');
25
26  my @header = $file->fields;
27
28  while ( my $row = $file->read ) {
29    print $row->{COLUMN1}, "\n";
30  }
31
32  $file->close;
33
34The close() method is atuomatically called when the object passes out of
35scope. However, you should not depend on this. Use close() when
36approrpiate.
37
38Other informational methods are also available. They are listed blow:
39
40=head1 METHODS:
41
42=over
43
44=item close()
45
46Closes the file or connection, and cleans up various bits.
47
48=item delimiter(delimiter)
49
50Allows you to set the delimiter if a value is given. The default
51delimiter is a tab. Returns the delimiter.
52
53=item fields()
54
55Returns an array (or arrayref, depending on the requested context) with
56the column header fields in the order specified by the source file.
57
58=item filename()
59
60If open() was given a filename, this function will return that value.
61
62=item linenumber()
63
64This returns the line number of the last line read. If no calls to Read
65have been made, will be 0. After the first call to Read, this will return
661, etc.
67
68=item new([filename|filepointer],[enumerate])
69
70Creates a new Text::Delimited object. Takes optional parameter that is either
71a filename or a globbed filehandle. Files specified by filename must
72already exist.
73
74Can optionally take a second argument. If this argument evaluates to true,
75Text::Delimited will append a _NUM to the end of all fields with duplicate names.
76That is, if your header row contains 2 columns named "NAME", one will be
77changed to NAME_1, the other to NAME_2.
78
79=item open([filename|filepointer], [enumerate])
80
81Opens the given filename or globbed filehandle and reads the header line.
82Returns 0 if the operation failed. Returns the file object if succeeds.
83
84Can optionally take a second argument. If this argument evaluates to true,
85Text::Delimited will append a _NUM to the end of all fields with duplicate names.
86That is, if your header row contains 2 columns named "NAME", one will be
87changed to NAME_1, the other to NAME_2.
88
89=item read()
90
91Returns a hashref with the next record of data. The hash keys are determined
92by the header line.
93
94__DATA__ and __LINE__ are also returned as keys.
95
96__DATA__ is an arrayref with the record values in order.
97
98__LINE__ is a string with the original tab-separated record.
99
100This method returns undef if there is no more data to be read.
101
102=item setmode(encoding)
103
104Set the given encoding scheme on the input file to allow for reading files
105encoded in standards other than ASCII.
106
107=back
108
109=head1 EXPORTABLE METHODS
110
111For convienience, the following methods are exportable. These are handy
112for quickly writing output delimited files.
113
114=over
115
116=item d_join(@STUFF)
117
118Delimited Join. Returns the given array as a string joined with the
119current delimiter.
120
121=item d_line(@STUFF)
122
123Delimited Line. Returns the given array as a string joined with the
124current delimiter and with newline appended.
125
126=back
127
128=head1 BUGS AND SOURCE
129
130	Bug tracking for this module: https://rt.cpan.org/Dist/Display.html?Name=Text-Delimited
131
132	Source hosting: http://www.github.com/bennie/perl-Text-Delimited
133
134=head1 VERSION
135
136    Text::Delimited v2.11 (2014/04/30)
137
138=head1 COPYRIGHT
139
140    (c) 2004-2014, Phillip Pollard <bennie@cpan.org>
141
142=head1 LICENSE
143
144This source code is released under the "Perl Artistic License 2.0," the text of
145which is included in the LICENSE file of this distribution. It may also be
146reviewed here: http://opensource.org/licenses/artistic-license-2.0
147
148=head1 AUTHORSHIP
149
150    I'd like to thank PetBlvd for sponsoring continued work on this module.
151    http://www.petblvd.com/
152
153    Additional contributions by Kristina Davis <krd@menagerie.tf>
154    Based upon the original module by Andrew Barnett <abarnett@hmsonline.com>
155
156    Originally derived from Util::TabFile 1.9 2003/11/05
157    With permission granted from Health Market Science, Inc.
158
159=cut
160
161package Text::Delimited;
162
163use Symbol;
164
165use 5.006001;
166use warnings;
167use strict;
168
169$Text::Delimited::VERSION = '2.11';
170
171### Private mthods
172
173sub DESTROY {
174  return $_[0]->Close;
175}
176
177sub _line {
178  my $self = shift @_;
179  $self->{CURRENT_LINE} = readline($self->{FP});
180  $self->{CURRENT_LINE} =~ s/[\r\n]+$//;
181  $self->{CURRENT_DATA} = [ split /\Q$self->{DELIMITER}\E/o, $self->{CURRENT_LINE} ];
182  $self->{LINE_NUMBER}++;
183  return $self->{CURRENT_DATA};
184}
185
186### Public Methods
187
188sub Close {
189  my $self = shift @_;
190  return $self->close(@_);
191}
192
193sub close {
194  my $self = shift @_;
195  close $self->{FP} if $self->{FP};
196
197  $self->{CURRENT_DATA} = $self->{CURRENT_LINE} = $self->{FILENAME} =
198  $self->{FP} = $self->{HDR} = $self->{LINE_NUMBER} = undef;
199
200  return 1;
201}
202
203sub Delimiter {
204  my $self = shift @_;
205  return $self->delimiter(@_);
206}
207
208sub delimiter {
209  my $self = shift @_;
210  my $new  = shift @_;
211
212  if ( $new and $self->{LINE_NUMBER} > 0 ) {
213    warn "You cannot change the delimiter after you have opened the file for processing.\n";
214  } elsif ( $new ) {
215    $self->{DELIMITER} = $new;
216  }
217
218  return $self->{DELIMITER};
219}
220
221sub Fields {
222  my $self = shift @_;
223  return $self->fields(@_);
224}
225
226sub fields {
227  my $self = shift @_;
228  return wantarray ? @{$self->{HDR}} : $self->{HDR};
229}
230
231sub FileName {
232  my $self = shift @_;
233  return $self->filename(@_);
234}
235
236sub filename {
237  return $_[0]->{FILENAME};
238}
239
240sub LineNumber {
241  my $self = shift @_;
242  return $self->linenumber(@_);
243}
244
245sub linenumber {
246  return $_[0]->{LINE_NUMBER};
247}
248
249sub new {
250  my $class = shift @_;
251  my $file  = shift @_;
252  my $enumerate = shift @_;
253
254  my $self = {
255    DELIMITER   => "\t",
256    LINE_NUMBER => 0
257  };
258
259  bless $self, $class;
260
261  $self->_init;
262
263  my $status = $self->open($file, $enumerate) if $file;
264
265  return $self;
266}
267
268sub _init { }
269
270sub Open {
271  my $self = shift @_;
272  return $self->open(@_);
273}
274
275sub open {
276    my $self = shift @_;
277    my $file = shift @_;
278    my $enumerate = shift @_;
279
280    $self->{ENUMERATE} = $enumerate;
281
282    if ( ref($file) eq 'GLOB' || not $file ) {
283      $self->{FP} = $file || \*STDIN;
284      $self->{FILENAME} = 'GLOB';
285    } elsif ( -r $file ) {
286      $self->{FP} = gensym;
287      open $self->{FP}, $file or die "Can't open the file $file\n";
288      $self->{FILENAME} = $file;
289    } else {
290      die "$file is neither a filehandle or an existing, readable file.";
291    }
292
293    $self->{HDR} = $self->_line;
294
295    my %fields = ( );
296    my %dupes = ( );
297    for (my $i = 0; $i < scalar @{$self->{HDR}}; $i++) {
298	my $field = ${$self->{HDR}}[$i];
299	if ($fields{$field}) {
300	    if ($self->{ENUMERATE} > 0) {
301		$dupes{$field} += 1;
302		${$self->{HDR}}[$i] = "$field\_$dupes{$field}";
303	    }
304	    else {
305		die "ERROR: There is a duplicate column name: $field.  This is not good.\n";
306	    }
307	}
308	$fields{$field} = 1;
309    }
310    return 1;
311}
312
313sub Read {
314  my $self = shift @_;
315  return $self->read(@_);
316}
317
318sub read {
319  my $self = shift @_;
320  my $out  = {};
321
322  my $data = $self->_line;
323
324  return undef unless scalar @$data > 0;
325
326  my $i = 0;
327  for my $val ( @$data ) {
328    $val =~ s/^\s+|\s+$//g;
329    $out->{$self->{HDR}->[$i++]} = $val;
330  }
331
332  $out->{__DATA__} = $self->{CURRENT_DATA};
333  $out->{__LINE__} = $self->{CURRENT_LINE};
334
335  return $out;
336}
337
338sub setMode {
339  my $self = shift @_;
340  return $self->setMode(@_);
341}
342
343sub setmode {
344  my $self = shift @_;
345  my $mode = shift @_;
346  return binmode $self->{FP}, $mode;
347}
348
349### Exportable methods
350
351sub d_join {
352  if ( ref($_[0]) ) {
353    my $self = shift @_;
354    return join($self->{DELIMITER},map {defined($_)?$_:''} @_);
355  } else {
356    return join("\t",map {defined($_)?$_:''} @_);
357  }
358}
359
360sub d_line {
361  if ( ref($_[0]) ) {
362    my $self = shift @_;
363    return join($self->{DELIMITER},map {defined($_)?$_:''} @_) . "\n";
364  } else {
365    return join("\t",map {defined($_)?$_:''} @_) . "\n";
366  }
367}
368
3691;
370