1package MIME::Parser::Reader;
2
3=head1 NAME
4
5MIME::Parser::Reader - a line-oriented reader for a MIME::Parser
6
7
8=head1 SYNOPSIS
9
10This module is used internally by MIME::Parser; you probably
11don't need to be looking at it at all.  But just in case...
12
13    ### Create a top-level reader, where chunks end at EOF:
14    $rdr = MIME::Parser::Reader->new();
15
16    ### Spawn a child reader, where chunks also end at a boundary:
17    $subrdr = $rdr->spawn->add_boundary($bound);
18
19    ### Spawn a child reader, where chunks also end at a given string:
20    $subrdr = $rdr->spawn->add_terminator($string);
21
22    ### Read until boundary or terminator:
23    $subrdr->read_chunk($in, $out);
24
25
26=head1 DESCRIPTION
27
28A line-oriented reader which can deal with virtual end-of-stream
29defined by a collection of boundaries.
30
31B<Warning:> this is a private class solely for use by MIME::Parser.
32This class has no official public interface
33
34=cut
35
36use strict;
37
38### All possible end-of-line sequences.
39### Note that "" is included because last line of stream may have no newline!
40my @EOLs = ("", "\r", "\n", "\r\n", "\n\r");
41
42### Long line:
43my $LONGLINE = ' ' x 1000;
44
45
46#------------------------------
47#
48# new
49#
50# I<Class method.>
51# Construct an empty (top-level) reader.
52#
53sub new {
54    my ($class) = @_;
55    my $eos;
56    return bless {
57	Bounds => [],
58	BH     => {},
59	TH     => {},
60	EOS    => \$eos,
61    }, $class;
62}
63
64#------------------------------
65#
66# spawn
67#
68# I<Instance method.>
69# Return a reader which is mostly a duplicate, except that the EOS
70# accumulator is shared.
71#
72sub spawn {
73    my $self = shift;
74    my $dup = bless {}, ref($self);
75    $dup->{Bounds} = [ @{$self->{Bounds}} ];  ### deep copy
76    $dup->{BH}     = { %{$self->{BH}} };      ### deep copy
77    $dup->{TH}     = { %{$self->{TH}} };      ### deep copy
78    $dup->{EOS}    = $self->{EOS};            ### shallow copy; same ref!
79    $dup;
80}
81
82#------------------------------
83#
84# add_boundary BOUND
85#
86# I<Instance method.>
87# Let BOUND be the new innermost boundary.  Returns self.
88#
89sub add_boundary {
90    my ($self, $bound) = @_;
91    unshift @{$self->{Bounds}}, $bound;   ### now at index 0
92    $self->{BH}{"--$bound"}   = "DELIM $bound";
93    $self->{BH}{"--$bound--"} = "CLOSE $bound";
94    $self;
95}
96
97#------------------------------
98#
99# add_terminator LINE
100#
101# I<Instance method.>
102# Let LINE be another terminator.  Returns self.
103#
104sub add_terminator {
105    my ($self, $line) = @_;
106    foreach (@EOLs) {
107	$self->{TH}{"$line$_"} = "DONE $line";
108    }
109    $self;
110}
111
112#------------------------------
113#
114# has_bounds
115#
116# I<Instance method.>
117# Are there boundaries to contend with?
118#
119sub has_bounds {
120    scalar(@{shift->{Bounds}});
121}
122
123#------------------------------
124#
125# depth
126#
127# I<Instance method.>
128# How many levels are there?
129#
130sub depth {
131    scalar(@{shift->{Bounds}});
132}
133
134#------------------------------
135#
136# eos [EOS]
137#
138# I<Instance method.>
139# Return the last end-of-stream token seen.
140# See read_chunk() for what these might be.
141#
142sub eos {
143    my $self = shift;
144    ${$self->{EOS}} = $_[0] if @_;
145    ${$self->{EOS}};
146}
147
148#------------------------------
149#
150# eos_type [EOSTOKEN]
151#
152# I<Instance method.>
153# Return the high-level type of the given token (defaults to our token).
154#
155#    DELIM       saw an innermost boundary like --xyz
156#    CLOSE       saw an innermost boundary like --xyz--
157#    DONE        callback returned false
158#    EOF         end of file
159#    EXT         saw boundary of some higher-level
160#
161sub eos_type {
162    my ($self, $eos) = @_;
163    $eos = $self->eos if (@_ == 1);
164
165    if    ($eos =~ /^(DONE|EOF)/) {
166	return $1;
167    }
168    elsif ($eos =~ /^(DELIM|CLOSE) (.*)$/) {
169	return (($2 eq $self->{Bounds}[0]) ? $1 : 'EXT');
170    }
171    else {
172	die("internal error: unable to classify boundary token ($eos)");
173    }
174}
175
176#------------------------------
177#
178# native_handle HANDLE
179#
180# I<Function.>
181# Can we do native i/o on HANDLE?  If true, returns the handle
182# that will respond to native I/O calls; else, returns undef.
183#
184sub native_handle {
185    my $fh = shift;
186    return $fh if ($fh->isa('IO::File') || $fh->isa('IO::Handle'));
187    return $fh if (ref $fh eq 'GLOB');
188    undef;
189}
190
191#------------------------------
192#
193# read_chunk INHANDLE, OUTHANDLE
194#
195# I<Instance method.>
196# Get lines until end-of-stream.
197# Returns the terminating-condition token:
198#
199#    DELIM xyz   saw boundary line "--xyz"
200#    CLOSE xyz   saw boundary line "--xyz--"
201#    DONE xyz    saw terminator line "xyz"
202#    EOF         end of file
203
204# Parse up to (and including) the boundary, and dump output.
205# Follows the RFC 2046 specification, that the CRLF immediately preceding
206# the boundary is part of the boundary, NOT part of the input!
207#
208# NOTE: while parsing bodies, we take care to remember the EXACT end-of-line
209# sequence.  This is because we *may* be handling 'binary' encoded data, and
210# in that case we can't just massage \r\n into \n!  Don't worry... if the
211# data is styled as '7bit' or '8bit', the "decoder" will massage the CRLF
212# for us.  For now, we're just trying to chop up the data stream.
213
214# NBK - Oct 12, 1999
215# The CRLF at the end of the current line is considered part
216# of the boundary.  I buffer the current line and output the
217# last.  I strip the last CRLF when I hit the boundary.
218
219sub read_chunk {
220    my ($self, $in, $out, $keep_newline, $normalize_newlines) = @_;
221
222    # If we're parsing a preamble or epilogue, we need to keep the blank line
223    # that precedes the boundary line.
224    $keep_newline ||= 0;
225
226    $normalize_newlines ||= 0;
227    ### Init:
228    my %bh = %{$self->{BH}};
229    my %th = %{$self->{TH}}; my $thx = keys %th;
230    local $_ = $LONGLINE;
231    my $maybe;
232    my $last = '';
233    my $eos  = '';
234
235    ### Determine types:
236    my $n_in  = native_handle($in);
237    my $n_out = native_handle($out);
238
239    ### Handle efficiently by type:
240    if ($n_in) {
241	if ($n_out) {            ### native input, native output [fastest]
242	    while (<$n_in>) {
243		# Normalize line ending
244		$_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
245		if (substr($_, 0, 2) eq '--') {
246		    ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
247		    $bh{$maybe} and do { $eos = $bh{$maybe}; last };
248		}
249		$thx and $th{$_} and do { $eos = $th{$_}; last };
250		print $n_out $last; $last = $_;
251	    }
252	}
253	else {                   ### native input, OO output [slower]
254	    while (<$n_in>) {
255		# Normalize line ending
256		$_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
257		if (substr($_, 0, 2) eq '--') {
258		    ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
259		    $bh{$maybe} and do { $eos = $bh{$maybe}; last };
260		}
261		$thx and $th{$_} and do { $eos = $th{$_}; last };
262		$out->print($last); $last = $_;
263	    }
264	}
265    }
266    else {
267	if ($n_out) {            ### OO input, native output [even slower]
268	    while (defined($_ = $in->getline)) {
269		# Normalize line ending
270		$_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
271		if (substr($_, 0, 2) eq '--') {
272		    ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
273		    $bh{$maybe} and do { $eos = $bh{$maybe}; last };
274		}
275		$thx and $th{$_} and do { $eos = $th{$_}; last };
276		print $n_out $last; $last = $_;
277	    }
278	}
279	else {                   ### OO input, OO output [slowest]
280	    while (defined($_ = $in->getline)) {
281		# Normalize line ending
282		$_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines;
283		if (substr($_, 0, 2) eq '--') {
284		    ($maybe = $_) =~ s/[ \t\r\n]+\Z//;
285		    $bh{$maybe} and do { $eos = $bh{$maybe}; last };
286		}
287		$thx and $th{$_} and do { $eos = $th{$_}; last };
288		$out->print($last); $last = $_;
289	    }
290	}
291    }
292
293    # Write out last held line, removing terminating CRLF if ended on bound,
294    # unless the line consists only of CRLF and we're wanting to keep the
295    # preceding blank line (as when parsing a preamble)
296    $last =~ s/[\r\n]+\Z// if ($eos =~ /^(DELIM|CLOSE)/ && !($keep_newline && $last =~ m/^[\r\n]\z/));
297    $out->print($last);
298
299    ### Save and return what we finished on:
300    ${$self->{EOS}} = ($eos || 'EOF');
301    1;
302}
303
304#------------------------------
305#
306# read_lines INHANDLE, \@OUTLINES
307#
308# I<Instance method.>
309# Read lines into the given array.
310#
311sub read_lines {
312    my ($self, $in, $outlines) = @_;
313
314    my $data = '';
315    open(my $fh, '>', \$data) or die $!;
316    $self->read_chunk($in, $fh);
317    @$outlines =  split(/^/, $data);
318    close $fh;
319
320    1;
321}
322
3231;
324__END__
325
326=head1 SEE ALSO
327
328L<MIME::Tools>, L<MIME::Parser>
329