1package MIME::Parser::Reader; 2 3=head1 NAME 4 5MIME::Parser::Reader - a line-oriented reader for a MIME::Parser 6 7 8=head1 SYNOPSIS 9 10This module is used internally by MIME::Parser; you probably 11don't need to be looking at it at all. But just in case... 12 13 ### Create a top-level reader, where chunks end at EOF: 14 $rdr = MIME::Parser::Reader->new(); 15 16 ### Spawn a child reader, where chunks also end at a boundary: 17 $subrdr = $rdr->spawn->add_boundary($bound); 18 19 ### Spawn a child reader, where chunks also end at a given string: 20 $subrdr = $rdr->spawn->add_terminator($string); 21 22 ### Read until boundary or terminator: 23 $subrdr->read_chunk($in, $out); 24 25 26=head1 DESCRIPTION 27 28A line-oriented reader which can deal with virtual end-of-stream 29defined by a collection of boundaries. 30 31B<Warning:> this is a private class solely for use by MIME::Parser. 32This class has no official public interface 33 34=cut 35 36use strict; 37 38### All possible end-of-line sequences. 39### Note that "" is included because last line of stream may have no newline! 40my @EOLs = ("", "\r", "\n", "\r\n", "\n\r"); 41 42### Long line: 43my $LONGLINE = ' ' x 1000; 44 45 46#------------------------------ 47# 48# new 49# 50# I<Class method.> 51# Construct an empty (top-level) reader. 52# 53sub new { 54 my ($class) = @_; 55 my $eos; 56 return bless { 57 Bounds => [], 58 BH => {}, 59 TH => {}, 60 EOS => \$eos, 61 }, $class; 62} 63 64#------------------------------ 65# 66# spawn 67# 68# I<Instance method.> 69# Return a reader which is mostly a duplicate, except that the EOS 70# accumulator is shared. 71# 72sub spawn { 73 my $self = shift; 74 my $dup = bless {}, ref($self); 75 $dup->{Bounds} = [ @{$self->{Bounds}} ]; ### deep copy 76 $dup->{BH} = { %{$self->{BH}} }; ### deep copy 77 $dup->{TH} = { %{$self->{TH}} }; ### deep copy 78 $dup->{EOS} = $self->{EOS}; ### shallow copy; same ref! 79 $dup; 80} 81 82#------------------------------ 83# 84# add_boundary BOUND 85# 86# I<Instance method.> 87# Let BOUND be the new innermost boundary. Returns self. 88# 89sub add_boundary { 90 my ($self, $bound) = @_; 91 unshift @{$self->{Bounds}}, $bound; ### now at index 0 92 $self->{BH}{"--$bound"} = "DELIM $bound"; 93 $self->{BH}{"--$bound--"} = "CLOSE $bound"; 94 $self; 95} 96 97#------------------------------ 98# 99# add_terminator LINE 100# 101# I<Instance method.> 102# Let LINE be another terminator. Returns self. 103# 104sub add_terminator { 105 my ($self, $line) = @_; 106 foreach (@EOLs) { 107 $self->{TH}{"$line$_"} = "DONE $line"; 108 } 109 $self; 110} 111 112#------------------------------ 113# 114# has_bounds 115# 116# I<Instance method.> 117# Are there boundaries to contend with? 118# 119sub has_bounds { 120 scalar(@{shift->{Bounds}}); 121} 122 123#------------------------------ 124# 125# depth 126# 127# I<Instance method.> 128# How many levels are there? 129# 130sub depth { 131 scalar(@{shift->{Bounds}}); 132} 133 134#------------------------------ 135# 136# eos [EOS] 137# 138# I<Instance method.> 139# Return the last end-of-stream token seen. 140# See read_chunk() for what these might be. 141# 142sub eos { 143 my $self = shift; 144 ${$self->{EOS}} = $_[0] if @_; 145 ${$self->{EOS}}; 146} 147 148#------------------------------ 149# 150# eos_type [EOSTOKEN] 151# 152# I<Instance method.> 153# Return the high-level type of the given token (defaults to our token). 154# 155# DELIM saw an innermost boundary like --xyz 156# CLOSE saw an innermost boundary like --xyz-- 157# DONE callback returned false 158# EOF end of file 159# EXT saw boundary of some higher-level 160# 161sub eos_type { 162 my ($self, $eos) = @_; 163 $eos = $self->eos if (@_ == 1); 164 165 if ($eos =~ /^(DONE|EOF)/) { 166 return $1; 167 } 168 elsif ($eos =~ /^(DELIM|CLOSE) (.*)$/) { 169 return (($2 eq $self->{Bounds}[0]) ? $1 : 'EXT'); 170 } 171 else { 172 die("internal error: unable to classify boundary token ($eos)"); 173 } 174} 175 176#------------------------------ 177# 178# native_handle HANDLE 179# 180# I<Function.> 181# Can we do native i/o on HANDLE? If true, returns the handle 182# that will respond to native I/O calls; else, returns undef. 183# 184sub native_handle { 185 my $fh = shift; 186 return $fh if ($fh->isa('IO::File') || $fh->isa('IO::Handle')); 187 return $fh if (ref $fh eq 'GLOB'); 188 undef; 189} 190 191#------------------------------ 192# 193# read_chunk INHANDLE, OUTHANDLE 194# 195# I<Instance method.> 196# Get lines until end-of-stream. 197# Returns the terminating-condition token: 198# 199# DELIM xyz saw boundary line "--xyz" 200# CLOSE xyz saw boundary line "--xyz--" 201# DONE xyz saw terminator line "xyz" 202# EOF end of file 203 204# Parse up to (and including) the boundary, and dump output. 205# Follows the RFC 2046 specification, that the CRLF immediately preceding 206# the boundary is part of the boundary, NOT part of the input! 207# 208# NOTE: while parsing bodies, we take care to remember the EXACT end-of-line 209# sequence. This is because we *may* be handling 'binary' encoded data, and 210# in that case we can't just massage \r\n into \n! Don't worry... if the 211# data is styled as '7bit' or '8bit', the "decoder" will massage the CRLF 212# for us. For now, we're just trying to chop up the data stream. 213 214# NBK - Oct 12, 1999 215# The CRLF at the end of the current line is considered part 216# of the boundary. I buffer the current line and output the 217# last. I strip the last CRLF when I hit the boundary. 218 219sub read_chunk { 220 my ($self, $in, $out, $keep_newline, $normalize_newlines) = @_; 221 222 # If we're parsing a preamble or epilogue, we need to keep the blank line 223 # that precedes the boundary line. 224 $keep_newline ||= 0; 225 226 $normalize_newlines ||= 0; 227 ### Init: 228 my %bh = %{$self->{BH}}; 229 my %th = %{$self->{TH}}; my $thx = keys %th; 230 local $_ = $LONGLINE; 231 my $maybe; 232 my $last = ''; 233 my $eos = ''; 234 235 ### Determine types: 236 my $n_in = native_handle($in); 237 my $n_out = native_handle($out); 238 239 ### Handle efficiently by type: 240 if ($n_in) { 241 if ($n_out) { ### native input, native output [fastest] 242 while (<$n_in>) { 243 # Normalize line ending 244 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines; 245 if (substr($_, 0, 2) eq '--') { 246 ($maybe = $_) =~ s/[ \t\r\n]+\Z//; 247 $bh{$maybe} and do { $eos = $bh{$maybe}; last }; 248 } 249 $thx and $th{$_} and do { $eos = $th{$_}; last }; 250 print $n_out $last; $last = $_; 251 } 252 } 253 else { ### native input, OO output [slower] 254 while (<$n_in>) { 255 # Normalize line ending 256 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines; 257 if (substr($_, 0, 2) eq '--') { 258 ($maybe = $_) =~ s/[ \t\r\n]+\Z//; 259 $bh{$maybe} and do { $eos = $bh{$maybe}; last }; 260 } 261 $thx and $th{$_} and do { $eos = $th{$_}; last }; 262 $out->print($last); $last = $_; 263 } 264 } 265 } 266 else { 267 if ($n_out) { ### OO input, native output [even slower] 268 while (defined($_ = $in->getline)) { 269 # Normalize line ending 270 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines; 271 if (substr($_, 0, 2) eq '--') { 272 ($maybe = $_) =~ s/[ \t\r\n]+\Z//; 273 $bh{$maybe} and do { $eos = $bh{$maybe}; last }; 274 } 275 $thx and $th{$_} and do { $eos = $th{$_}; last }; 276 print $n_out $last; $last = $_; 277 } 278 } 279 else { ### OO input, OO output [slowest] 280 while (defined($_ = $in->getline)) { 281 # Normalize line ending 282 $_ =~ s/(?:\n\r|\r\n|\r)$/\n/ if $normalize_newlines; 283 if (substr($_, 0, 2) eq '--') { 284 ($maybe = $_) =~ s/[ \t\r\n]+\Z//; 285 $bh{$maybe} and do { $eos = $bh{$maybe}; last }; 286 } 287 $thx and $th{$_} and do { $eos = $th{$_}; last }; 288 $out->print($last); $last = $_; 289 } 290 } 291 } 292 293 # Write out last held line, removing terminating CRLF if ended on bound, 294 # unless the line consists only of CRLF and we're wanting to keep the 295 # preceding blank line (as when parsing a preamble) 296 $last =~ s/[\r\n]+\Z// if ($eos =~ /^(DELIM|CLOSE)/ && !($keep_newline && $last =~ m/^[\r\n]\z/)); 297 $out->print($last); 298 299 ### Save and return what we finished on: 300 ${$self->{EOS}} = ($eos || 'EOF'); 301 1; 302} 303 304#------------------------------ 305# 306# read_lines INHANDLE, \@OUTLINES 307# 308# I<Instance method.> 309# Read lines into the given array. 310# 311sub read_lines { 312 my ($self, $in, $outlines) = @_; 313 314 my $data = ''; 315 open(my $fh, '>', \$data) or die $!; 316 $self->read_chunk($in, $fh); 317 @$outlines = split(/^/, $data); 318 close $fh; 319 320 1; 321} 322 3231; 324__END__ 325 326=head1 SEE ALSO 327 328L<MIME::Tools>, L<MIME::Parser> 329