1#!@PERL@ 2# @configure_input@ 3# @file 4# @brief Extract HTML from an RFC-822 email 5# 6# Copyright (C) 2010,2015,2018 Olly Betts 7# 8# Permission is hereby granted, free of charge, to any person obtaining a copy 9# of this software and associated documentation files (the "Software"), to 10# deal in the Software without restriction, including without limitation the 11# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12# sell copies of the Software, and to permit persons to whom the Software is 13# furnished to do so, subject to the following conditions: 14# 15# The above copyright notice and this permission notice shall be included in 16# all copies or substantial portions of the Software. 17# 18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24# IN THE SOFTWARE. 25 26use strict; 27eval { 28 require MIME::Parser; 29 require MIME::WordDecoder; 30 require HTML::Entities; 31 # In core since Perl 5.9.5: 32 require Time::Piece; 33}; 34if ($@) { 35 print STDERR $@; 36 # Exit with code 127 which omindex interprets as "filter not installed" 37 # and won't try further .msg files. 38 exit 127; 39} 40 41my $in = shift @ARGV; 42my $parser = new MIME::Parser; 43# Keep data in memory rather than spraying files onto disk. 44$parser->output_to_core(1); 45$parser->tmp_to_core(1); 46open IN, '<', $in or die "Couldn't open '$in' ($?)\n"; 47my $ent = $parser->parse(\*IN) or die "Failed to parse '$in' as MIME message\n"; 48 49my $head = $ent->head; 50print "<head>\n<title>"; 51print do_header($head, 'Subject'); 52print "</title>\n<meta name=\"author\" content=\""; 53print do_header($head, 'From'); 54print "\">\n"; 55 56my $date = do_header($head, 'Date'); 57chomp $date; 58eval { 59 eval { 60 $date = Time::Piece->strptime($date, '%a, %d %b %Y %T %z'); 61 }; 62 # The "%a, " part is optional in RFC822 and RFC2822. 63 $date = Time::Piece->strptime($date, '%d %b %Y %T %z') if $@; 64 my $iso8601_date = $date->datetime; 65 print "<meta name=\"created\" content=\"$iso8601_date\">\n"; 66}; 67 68print "</head>\n"; 69 70handle_mimepart($ent); 71 72sub do_header { 73 my ($head, $header) = @_; 74 my $s = MIME::WordDecoder::mime_to_perl_string($head->get($header, 0)); 75 chomp($s); 76 return HTML::Entities::encode_entities($s); 77} 78 79sub handle_mimepart { 80 my $e = shift; 81 my ($type, $sub) = ((lc $e->mime_type) =~ m,^(.*?)/(.*?)(?:;.*)?$,); 82 if ($type eq 'multipart') { 83 if ($sub eq 'alternative') { 84 # Take the first mime part which we get text from. 85 for my $s ($e->parts) { 86 my $res = handle_mimepart($s); 87 return $res if $res; 88 } 89 } else { 90 my $res = 0; 91 for my $s ($e->parts) { 92 $res += handle_mimepart($s); 93 } 94 return $res; 95 } 96 } elsif ($type eq 'text') { 97 if ($sub eq 'plain') { 98 my $m = $e->bodyhandle->as_string; 99 print "<pre>", HTML::Entities::encode_entities($m), "</pre>\n"; 100 return 1; 101 } elsif ($sub eq 'html') { 102 my $m = $e->bodyhandle->as_string; 103 $m =~ s!</?body[^>]*>!\n!gi; 104 print $m, "\n"; 105 return 1; 106 } 107 } 108 return 0; 109} 110