1package Email::MIME::RFC2047::Encoder; 2$Email::MIME::RFC2047::Encoder::VERSION = '0.97'; 3use strict; 4use warnings; 5 6# ABSTRACT: Encoding of non-ASCII MIME headers 7 8use Encode (); 9use MIME::Base64 (); 10 11my $rfc_specials = '()<>\[\]:;\@\\,."'; 12 13sub new { 14 my $package = shift; 15 my $options = ref($_[0]) ? $_[0] : { @_ }; 16 17 my ($encoding, $method) = ($options->{encoding}, $options->{method}); 18 19 if (!defined($encoding)) { 20 $encoding = 'utf-8'; 21 $method = 'Q' if !defined($method); 22 } 23 else { 24 $method = 'B' if !defined($method); 25 } 26 27 my $encoder = Encode::find_encoding($encoding) 28 or die("encoding '$encoding' not found"); 29 30 my $self = { 31 encoding => $encoding, 32 encoder => $encoder, 33 method => uc($method), 34 }; 35 36 return bless($self, $package); 37} 38 39sub encode_text { 40 my ($self, $string) = @_; 41 42 return $self->_encode('text', $string); 43} 44 45sub encode_phrase { 46 my ($self, $string) = @_; 47 48 return $self->_encode('phrase', $string); 49} 50 51sub _encode { 52 my ($self, $mode, $string) = @_; 53 54 my $encoder = $self->{encoder}; 55 my $result = ''; 56 57 # $string is split on whitespace. Each $word is categorized into 58 # 'mime', 'quoted' or 'text'. The intermediate result of the conversion of 59 # consecutive words of the same types is accumulated in $buffer. 60 # The type of the buffer is tracked in $buffer_type. 61 # The method _finish_buffer is called to finish the encoding of the 62 # buffered content and append to the result. 63 my $buffer = ''; 64 my $buffer_type; 65 66 for my $word (split(/\s+/, $string)) { 67 next if $word eq ''; # ignore leading white space 68 69 $word =~ s/[\x00-\x1f\x7f]//g; # better remove control chars 70 71 my $word_type; 72 73 if ($word =~ /[\x80-\x{10ffff}]|(^=\?.*\?=\z)/s) { 74 # also encode any word that starts with '=?' and ends with '?=' 75 $word_type = 'mime'; 76 } 77 elsif ($mode eq 'phrase') { 78 $word_type = 'quoted'; 79 } 80 else { 81 $word_type = 'text'; 82 } 83 84 $self->_finish_buffer(\$result, $buffer_type, \$buffer) 85 if $buffer ne '' && $buffer_type ne $word_type; 86 $buffer_type = $word_type; 87 88 if ($word_type eq 'text') { 89 $result .= ' ' if $result ne ''; 90 $result .= $word; 91 } 92 elsif ($word_type eq 'quoted') { 93 $buffer .= ' ' if $buffer ne ''; 94 $buffer .= $word; 95 } 96 else { 97 my $max_len = 75 - 7 - length($self->{encoding}); 98 $max_len = 3 * ($max_len >> 2) if $self->{method} eq 'B'; 99 100 my @chars; 101 push(@chars, ' ') if $buffer ne ''; 102 push(@chars, split(//, $word)); 103 104 for my $char (@chars) { 105 my $chunk; 106 107 if ($self->{method} eq 'B') { 108 $chunk = $encoder->encode($char); 109 } 110 elsif ($char =~ /[()<>@,;:\\".\[\]=?_]/) { 111 # special character 112 $chunk = sprintf('=%02x', ord($char)); 113 } 114 elsif ($char =~ /[\x80-\x{10ffff}]/) { 115 # non-ASCII character 116 117 my $enc_char = $encoder->encode($char); 118 $chunk = ''; 119 120 for my $byte (unpack('C*', $enc_char)) { 121 $chunk .= sprintf('=%02x', $byte); 122 } 123 } 124 elsif ($char eq ' ') { 125 $chunk = '_'; 126 } 127 else { 128 $chunk = $char; 129 } 130 131 if (length($buffer) + length($chunk) <= $max_len) { 132 $buffer .= $chunk; 133 } 134 else { 135 $self->_finish_buffer(\$result, 'mime', \$buffer); 136 $buffer = $chunk; 137 } 138 } 139 } 140 } 141 142 $self->_finish_buffer(\$result, $buffer_type, \$buffer) 143 if $buffer ne ''; 144 145 return $result; 146} 147 148sub _finish_buffer { 149 my ($self, $result, $buffer_type, $buffer) = @_; 150 151 $$result .= ' ' if $$result ne ''; 152 153 if ($buffer_type eq 'quoted') { 154 if ($$buffer =~ /[$rfc_specials]/) { 155 # use quoted string if buffer contains special chars 156 $$buffer =~ s/[\\"]/\\$&/g; 157 158 $$result .= qq("$$buffer"); 159 } 160 else { 161 $$result .= $$buffer; 162 } 163 } 164 elsif ($buffer_type eq 'mime') { 165 $$result .= "=?$self->{encoding}?$self->{method}?"; 166 167 if ($self->{method} eq 'B') { 168 $$result .= MIME::Base64::encode_base64($$buffer, ''); 169 } 170 else { 171 $$result .= $$buffer; 172 } 173 174 $$result .= '?='; 175 } 176 177 $$buffer = ''; 178 179 return; 180} 181 1821; 183 184__END__ 185 186=pod 187 188=encoding UTF-8 189 190=head1 NAME 191 192Email::MIME::RFC2047::Encoder - Encoding of non-ASCII MIME headers 193 194=head1 VERSION 195 196version 0.97 197 198=head1 SYNOPSIS 199 200 use Email::MIME::RFC2047::Encoder; 201 202 my $encoder = Email::MIME::RFC2047::Encoder->new( 203 encoding => 'utf-8', 204 method => 'Q', 205 ); 206 207 my $encoded_text = $encoder->encode_text($string); 208 my $encoded_phrase = $encoder->encode_phrase($string); 209 210=head1 DESCRIPTION 211 212This module encodes non-ASCII text for MIME email message headers according to 213RFC 2047. 214 215=head1 CONSTRUCTOR 216 217=head2 new 218 219 my $encoder = Email::MIME::RFC2047::Encoder->new( 220 encoding => $encoding, 221 method => $method, 222 ); 223 224Creates a new encoder object. 225 226I<encoding> specifies the encoding ("character set" in the RFC) to use. This 227is passed to L<Encode>. See L<Encode::Supported> for supported encodings. 228 229I<method> specifies the encoding method ("encoding" in the RFC). Must be 230either C<'B'> or C<'Q'>. 231 232If both I<encoding> and I<method> are omitted, encoding defaults to 233C<'utf-8'> and method to C<'Q'>. If only I<encoding> is omitted, it defaults 234to C<'utf-8'>. If only I<method> is omitted, it defaults to C<'B'>. 235 236=head1 METHODS 237 238=head2 encode_text 239 240 my $encoded_text = $encoder->encode_text($string); 241 242Encodes a string for use in any I<Subject> or I<Comments> header field, any 243extension message header field, or any MIME body part field for which the 244field body is defined as C<unstructured> (RFC 2822) or C<*text> (RFC 822). 245C<$string> is expected to be an unencoded Perl string. 246 247This method tries to use the MIME encoding for as few characters of the 248input string as possible. So the result may consist of a mix of encoded and 249unencoded words. 250 251The source string is trimmed and any whitespace is collapsed. The words in 252the result are separated by single space characters without folding of long 253lines. 254 255=head2 encode_phrase 256 257 my $encoded_phrase = $encoder->encode_phrase($string); 258 259Encodes a string that may replace a C<phrase> token (as defined by RFC 2822), 260for example, one that precedes an address in a I<From>, I<To>, or I<Cc> 261header. 262 263This method works like I<encode_text> but additionally converts remaining 264text that contains special characters to C<quoted-string>s. 265 266=head1 AUTHOR 267 268Nick Wellnhofer <wellnhofer@aevum.de> 269 270=head1 COPYRIGHT AND LICENSE 271 272This software is copyright (c) 2017 by Nick Wellnhofer. 273 274This is free software; you can redistribute it and/or modify it under 275the same terms as the Perl 5 programming language system itself. 276 277=cut 278