1package Email::MIME::RFC2047::Encoder;
2$Email::MIME::RFC2047::Encoder::VERSION = '0.97';
3use strict;
4use warnings;
5
6# ABSTRACT: Encoding of non-ASCII MIME headers
7
8use Encode ();
9use MIME::Base64 ();
10
11my $rfc_specials = '()<>\[\]:;\@\\,."';
12
13sub new {
14    my $package = shift;
15    my $options = ref($_[0]) ? $_[0] : { @_ };
16
17    my ($encoding, $method) = ($options->{encoding}, $options->{method});
18
19    if (!defined($encoding)) {
20        $encoding = 'utf-8';
21        $method = 'Q' if !defined($method);
22    }
23    else {
24        $method = 'B' if !defined($method);
25    }
26
27    my $encoder = Encode::find_encoding($encoding)
28        or die("encoding '$encoding' not found");
29
30    my $self = {
31        encoding => $encoding,
32        encoder  => $encoder,
33        method   => uc($method),
34    };
35
36    return bless($self, $package);
37}
38
39sub encode_text {
40    my ($self, $string) = @_;
41
42    return $self->_encode('text', $string);
43}
44
45sub encode_phrase {
46    my ($self, $string) = @_;
47
48    return $self->_encode('phrase', $string);
49}
50
51sub _encode {
52    my ($self, $mode, $string) = @_;
53
54    my $encoder = $self->{encoder};
55    my $result = '';
56
57    # $string is split on whitespace. Each $word is categorized into
58    # 'mime', 'quoted' or 'text'. The intermediate result of the conversion of
59    # consecutive words of the same types is accumulated in $buffer.
60    # The type of the buffer is tracked in $buffer_type.
61    # The method _finish_buffer is called to finish the encoding of the
62    # buffered content and append to the result.
63    my $buffer = '';
64    my $buffer_type;
65
66    for my $word (split(/\s+/, $string)) {
67        next if $word eq ''; # ignore leading white space
68
69        $word =~ s/[\x00-\x1f\x7f]//g; # better remove control chars
70
71        my $word_type;
72
73        if ($word =~ /[\x80-\x{10ffff}]|(^=\?.*\?=\z)/s) {
74            # also encode any word that starts with '=?' and ends with '?='
75            $word_type = 'mime';
76        }
77        elsif ($mode eq 'phrase') {
78            $word_type = 'quoted';
79        }
80        else {
81            $word_type = 'text';
82        }
83
84        $self->_finish_buffer(\$result, $buffer_type, \$buffer)
85            if $buffer ne '' && $buffer_type ne $word_type;
86        $buffer_type = $word_type;
87
88        if ($word_type eq 'text') {
89            $result .= ' ' if $result ne '';
90            $result .= $word;
91        }
92        elsif ($word_type eq 'quoted') {
93            $buffer .= ' ' if $buffer ne '';
94            $buffer .= $word;
95        }
96        else {
97            my $max_len = 75 - 7 - length($self->{encoding});
98            $max_len = 3 * ($max_len >> 2) if $self->{method} eq 'B';
99
100            my @chars;
101            push(@chars, ' ') if $buffer ne '';
102            push(@chars, split(//, $word));
103
104            for my $char (@chars) {
105                my $chunk;
106
107                if ($self->{method} eq 'B') {
108                    $chunk = $encoder->encode($char);
109                }
110                elsif ($char =~ /[()<>@,;:\\".\[\]=?_]/) {
111                    # special character
112                    $chunk = sprintf('=%02x', ord($char));
113                }
114                elsif ($char =~ /[\x80-\x{10ffff}]/) {
115                    # non-ASCII character
116
117                    my $enc_char = $encoder->encode($char);
118                    $chunk = '';
119
120                    for my $byte (unpack('C*', $enc_char)) {
121                        $chunk .= sprintf('=%02x', $byte);
122                    }
123                }
124                elsif ($char eq ' ') {
125                    $chunk = '_';
126                }
127                else {
128                    $chunk = $char;
129                }
130
131                if (length($buffer) + length($chunk) <= $max_len) {
132                    $buffer .= $chunk;
133                }
134                else {
135                    $self->_finish_buffer(\$result, 'mime', \$buffer);
136                    $buffer = $chunk;
137                }
138            }
139        }
140    }
141
142    $self->_finish_buffer(\$result, $buffer_type, \$buffer)
143        if $buffer ne '';
144
145    return $result;
146}
147
148sub _finish_buffer {
149    my ($self, $result, $buffer_type, $buffer) = @_;
150
151    $$result .= ' ' if $$result ne '';
152
153    if ($buffer_type eq 'quoted') {
154        if ($$buffer =~ /[$rfc_specials]/) {
155            # use quoted string if buffer contains special chars
156            $$buffer =~ s/[\\"]/\\$&/g;
157
158            $$result .= qq("$$buffer");
159        }
160        else {
161            $$result .= $$buffer;
162        }
163    }
164    elsif ($buffer_type eq 'mime') {
165        $$result .= "=?$self->{encoding}?$self->{method}?";
166
167        if ($self->{method} eq 'B') {
168            $$result .= MIME::Base64::encode_base64($$buffer, '');
169        }
170        else {
171            $$result .= $$buffer;
172        }
173
174        $$result .= '?=';
175    }
176
177    $$buffer = '';
178
179    return;
180}
181
1821;
183
184__END__
185
186=pod
187
188=encoding UTF-8
189
190=head1 NAME
191
192Email::MIME::RFC2047::Encoder - Encoding of non-ASCII MIME headers
193
194=head1 VERSION
195
196version 0.97
197
198=head1 SYNOPSIS
199
200    use Email::MIME::RFC2047::Encoder;
201
202    my $encoder = Email::MIME::RFC2047::Encoder->new(
203        encoding => 'utf-8',
204        method   => 'Q',
205    );
206
207    my $encoded_text   = $encoder->encode_text($string);
208    my $encoded_phrase = $encoder->encode_phrase($string);
209
210=head1 DESCRIPTION
211
212This module encodes non-ASCII text for MIME email message headers according to
213RFC 2047.
214
215=head1 CONSTRUCTOR
216
217=head2 new
218
219    my $encoder = Email::MIME::RFC2047::Encoder->new(
220        encoding => $encoding,
221        method   => $method,
222    );
223
224Creates a new encoder object.
225
226I<encoding> specifies the encoding ("character set" in the RFC) to use. This
227is passed to L<Encode>. See L<Encode::Supported> for supported encodings.
228
229I<method> specifies the encoding method ("encoding" in the RFC). Must be
230either C<'B'> or C<'Q'>.
231
232If both I<encoding> and I<method> are omitted, encoding defaults to
233C<'utf-8'> and method to C<'Q'>. If only I<encoding> is omitted, it defaults
234to C<'utf-8'>. If only I<method> is omitted, it defaults to C<'B'>.
235
236=head1 METHODS
237
238=head2 encode_text
239
240    my $encoded_text = $encoder->encode_text($string);
241
242Encodes a string for use in any I<Subject> or I<Comments> header field, any
243extension message header field, or any MIME body part field for which the
244field body is defined as C<unstructured> (RFC 2822) or C<*text> (RFC 822).
245C<$string> is expected to be an unencoded Perl string.
246
247This method tries to use the MIME encoding for as few characters of the
248input string as possible. So the result may consist of a mix of encoded and
249unencoded words.
250
251The source string is trimmed and any whitespace is collapsed. The words in
252the result are separated by single space characters without folding of long
253lines.
254
255=head2 encode_phrase
256
257    my $encoded_phrase = $encoder->encode_phrase($string);
258
259Encodes a string that may replace a C<phrase> token (as defined by RFC 2822),
260for example, one that precedes an address in a I<From>, I<To>, or I<Cc>
261header.
262
263This method works like I<encode_text> but additionally converts remaining
264text that contains special characters to C<quoted-string>s.
265
266=head1 AUTHOR
267
268Nick Wellnhofer <wellnhofer@aevum.de>
269
270=head1 COPYRIGHT AND LICENSE
271
272This software is copyright (c) 2017 by Nick Wellnhofer.
273
274This is free software; you can redistribute it and/or modify it under
275the same terms as the Perl 5 programming language system itself.
276
277=cut
278