1# Copyright (C) 2013 MURATA Yasuhisa
2# This program is free software; you can redistribute it and/or
3# modify it under the same terms as Perl itself.
4
5package MIME::EcoEncode::Param;
6
7use 5.008005;
8use strict;
9use warnings;
10
11require Exporter;
12
13our @ISA = qw(Exporter);
14our @EXPORT_OK = qw($VERSION);
15our @EXPORT = qw(mime_eco_param mime_deco_param);
16our $VERSION = '0.95';
17
18our $HEAD; # head string
19our $HTL;  # head + tail length
20our $LF;   # line feed
21our $BPL;  # bytes per line
22our $UTF8;
23our $REG_W;
24
25sub mime_eco_param {
26    my $str = shift;
27
28    return '' unless defined $str;
29    return '' if $str eq '';
30
31    my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/);
32    $str =~ tr/\n\r//d;
33    if ($str =~ /^\s*$/) {
34       return $trailing_crlf ? $str . $trailing_crlf : $str;
35    }
36
37    my $charset = shift || 'UTF-8';
38
39    our $HEAD; # head string
40
41    my $cs;
42    my $type; # 0: RFC 2231, 1: "Q", 2: "B"
43    if ($charset =~ /^([-0-9A-Za-z_]+)(\'[^\']*\')?$/i) {
44	$cs = lc($1);
45	$type = 0;
46	$HEAD = $2 ? $charset : $charset . "''";
47    }
48    elsif ($charset =~ /^([-0-9A-Za-z_]+)(\*[^\?]*)?(\?[QB])?$/i) {
49	$cs = lc($1);
50	if (defined $3) {
51	    $type = (lc($3) eq '?q') ? 1 : 2;
52	    $HEAD = '=?' . $charset . '?';
53	}
54	else {
55	    $type = 2;
56	    $HEAD = '=?' . $charset . '?B?';
57	}
58    }
59    else { # invalid option
60	return undef;
61    }
62
63    our $HTL;  # head + tail length
64    our $LF  = shift || "\n"; # line feed
65    our $BPL = shift || 76;   # bytes per line
66    our $UTF8 = 1;
67    our $REG_W = qr/(.)/;
68
69    my $jp = 0;
70    my $np;
71
72    $HTL = length($HEAD) + 2;
73
74    if ($cs ne 'utf-8') {
75	$UTF8 = 0;
76	if ($cs eq 'iso-2022-jp') {
77	    $jp = 1;
78	}
79	elsif ($cs eq 'shift_jis') {
80	    # range of 2nd byte : [\x40-\x7e\x80-\xfc]
81	    $REG_W = qr/([\x81-\x9f\xe0-\xfc]?.)/;
82	}
83	elsif ($cs eq 'gb2312') { # Simplified Chinese
84	    # range of 2nd byte : [\xa1-\xfe]
85	    $REG_W = qr/([\xa1-\xfe]?.)/;
86	}
87	elsif ($cs eq 'euc-kr') { # Korean
88	    # range of 2nd byte : [\xa1-\xfe]
89	    $REG_W = qr/([\xa1-\xfe]?.)/;
90	}
91	elsif ($cs eq 'big5') { # Traditional Chinese
92	    # range of 2nd byte : [\x40-\x7e\xa1-\xfe]
93	    $REG_W = qr/([\x81-\xfe]?.)/;
94	}
95	else { # Single Byte (Latin, Cyrillic, ...)
96	    ;
97	}
98    }
99
100    $str =~ s/^(\s*)//; # leading whitespace
101    my $sps = $1;
102    my ($param, $value) = split('=', $str, 2);
103
104    unless (defined $value) {
105        return $trailing_crlf ? $str . $trailing_crlf : $str;
106    }
107
108    my $quote = 0;
109
110    if ($value =~ s/^\s*"(.*)"$/$1/) {
111        $quote = 1;
112    }
113    if ($value eq '') {
114        return $trailing_crlf ? $str . $trailing_crlf : $str;
115    }
116
117    my $result = "$sps$param=";
118    my $v_len = length($value);
119    my $ll_len = length($result);
120
121    if (!$quote && $value !~ /[^\w!#\$&\+-\.\^`\{\|}~]/) { # regular token
122        if ($type or $ll_len + $v_len <= $BPL) {
123            $result .= $value;
124            return $trailing_crlf ? $result . $trailing_crlf : $result;
125        }
126
127        my $n = 0;
128        my $c;
129        my $p_str;
130
131        $result = "$sps$param\*0=";
132        $ll_len += 2;
133        while ($value =~ /(.)/g) {
134            $c = $1;
135            if ($ll_len + 1 > $BPL) {
136                $n++;
137                $p_str = " $param\*$n=";
138                $result .= "$LF$p_str$c";
139                $ll_len = 1 + length($p_str);
140            }
141            else {
142                $result .= $c;
143                $ll_len++;
144            }
145        }
146        return $trailing_crlf ? $result . $trailing_crlf : $result;
147    }
148    if ($quote && $value !~ /[^\t\x20-\x7e]/) { # regular quoted-string
149        if ($type or $ll_len + $v_len + 2 <= $BPL) {
150            $result .= "\"$value\"";
151            return $trailing_crlf ? $result . $trailing_crlf : $result;
152        }
153
154        my $n = 0;
155        my $vc;
156        my $vc_len;
157        my $p_str;
158
159        $result = "$sps$param\*0=\"";
160        $ll_len += 3;
161        while ($value =~ /(\\.|.)/g) {
162            $vc = $1;
163            $vc_len = length($vc);
164            if ($ll_len + $vc_len + 1 > $BPL) {
165                $n++;
166                $p_str = " $param\*$n=\"";
167                $result .= "\"$LF$p_str$vc";
168                $ll_len = $vc_len + length($p_str);
169            }
170            else {
171                $result .= $vc;
172                $ll_len += $vc_len;
173            }
174        }
175        $result .= '"';
176        return $trailing_crlf ? $result . $trailing_crlf : $result;
177    }
178
179    #
180    # extended parameter (contain regular parameter)
181    #
182
183    if ($jp) {
184	if ($type == 0) {
185	    return param_enc_jp($param, $value, $sps, $trailing_crlf, $quote);
186	}
187
188	if ($type == 1) { # "Q" encoding
189	    require MIME::EcoEncode::JP_Q;
190	    $MIME::EcoEncode::JP_Q::HEAD  = $HEAD;
191	    $MIME::EcoEncode::JP_Q::HTL   = $HTL;
192	    $MIME::EcoEncode::JP_Q::LF    = $LF;
193	    $MIME::EcoEncode::JP_Q::BPL   = $BPL;
194	    $MIME::EcoEncode::JP_Q::MODE  = 0;
195
196	    my $enc =
197		MIME::EcoEncode::JP_Q::add_ew_jp_q($value,
198						   length($result) + 1,
199						   \$np, 1, 1);
200	    if ($enc eq ' ') {
201		$enc =
202		    MIME::EcoEncode::JP_Q::add_ew_jp_q($value, 2, \$np, 1);
203		$result .= "$LF \"$enc\"";
204	    }
205	    else {
206		$result .= "\"$enc\"";
207	    }
208	    return $trailing_crlf ? $result . $trailing_crlf : $result;
209	}
210	else { # "B" encoding
211	    require MIME::EcoEncode::JP_B;
212	    $MIME::EcoEncode::JP_B::HEAD  = $HEAD;
213	    $MIME::EcoEncode::JP_B::HTL   = $HTL;
214	    $MIME::EcoEncode::JP_B::LF    = $LF;
215	    $MIME::EcoEncode::JP_B::BPL   = $BPL;
216
217	    my $enc =
218		MIME::EcoEncode::JP_B::add_ew_jp_b($value,
219						   length($result) + 1,
220						   \$np, 1, 1);
221	    if ($enc eq ' ') {
222		$enc =
223		    MIME::EcoEncode::JP_B::add_ew_jp_b($value, 2, \$np, 1);
224		$result .= "$LF \"$enc\"";
225	    }
226	    else {
227		$result .= "\"$enc\"";
228	    }
229	    return $trailing_crlf ? $result . $trailing_crlf : $result;
230	}
231    }
232
233    if ($type == 0) {
234	return param_enc($param, $value, $sps, $trailing_crlf, $quote);
235    }
236    if ($type == 1) { # "Q" encoding
237	require MIME::EcoEncode;
238        $MIME::EcoEncode::HEAD  = $HEAD;
239        $MIME::EcoEncode::HTL   = $HTL;
240        $MIME::EcoEncode::LF    = $LF;
241        $MIME::EcoEncode::BPL   = $BPL;
242        $MIME::EcoEncode::REG_W = $REG_W;
243
244        my $enc =
245	    MIME::EcoEncode::add_ew_q($value, length($result) + 1,
246                                         \$np, 1, 1);
247        if ($enc eq ' ') {
248            $enc =
249		MIME::EcoEncode::add_ew_q($value, 2, \$np, 1);
250            $result .= "$LF \"$enc\"";
251        }
252        else {
253            $result .= "\"$enc\"";
254        }
255        return $trailing_crlf ? $result . $trailing_crlf : $result;
256    }
257    else { # "B" encoding
258	require MIME::EcoEncode;
259        $MIME::EcoEncode::HEAD  = $HEAD;
260        $MIME::EcoEncode::HTL   = $HTL;
261        $MIME::EcoEncode::LF    = $LF;
262        $MIME::EcoEncode::BPL   = $BPL;
263        $MIME::EcoEncode::REG_W = $REG_W;
264
265        my $enc =
266	    MIME::EcoEncode::add_ew_b($value, length($result) + 1,
267                                         \$np, 1, 1);
268        if ($enc eq ' ') {
269            $enc =
270		MIME::EcoEncode::add_ew_b($value, 2, \$np, 1);
271            $result .= "$LF \"$enc\"";
272        }
273        else {
274            $result .= "\"$enc\"";
275        }
276        return $trailing_crlf ? $result . $trailing_crlf : $result;
277    }
278}
279
280
281sub param_enc {
282    my $param = shift;
283    my $value = shift;
284    my $sps = shift;
285    my $trailing_crlf = shift;
286    my $quote = shift;
287
288    my $result;
289    my $ll_len;
290
291    our $UTF8;
292    our $REG_W;
293    our $HEAD;
294
295    $value = "\"$value\"" if $quote;
296    my $vstr = $value;
297
298    $value =~ s/([^\w!#\$&\+-\.\^`\{\|}~])/
299        sprintf("%%%X",ord($1))/egox;
300
301    $result = "$sps$param\*=$HEAD";
302    if (length($result) + length($value) <= $BPL) {
303        $result .= $value;
304        return $trailing_crlf ? $result . $trailing_crlf : $result;
305    }
306
307    my $n = 0;
308    my $nn = 1;
309    my $w1;
310    my $p_str;
311    my $w;
312    my $w_len;
313    my $chunk = '';
314    my $ascii = 1;
315
316    $result = "$sps$param\*0\*=$HEAD";
317    $ll_len = length($result);
318
319    utf8::decode($vstr) if $UTF8; # UTF8 flag on
320
321    while ($vstr =~ /$REG_W/g) {
322        $w1 = $1;
323	utf8::encode($w1) if $UTF8; # UTF8 flag off
324        $w_len = length($w1); # size of one character
325
326        $value =~ /((?:%..|.){$w_len})/g;
327        $w = $1;
328        $w_len = length($w);
329
330        $ascii = 0 if $w_len > 1;
331
332        # 1 is ';'
333        if ($ll_len + $w_len + 1 > $BPL) {
334            $p_str = " $param\*$nn\*=";
335            if ($ascii) {
336                if ($n == 0) {
337                    $result = "$sps$param\*0=$HEAD$chunk$w;";
338                }
339                else {
340                    $result .= "$LF $param\*$n=$chunk$w;";
341                }
342                $ll_len = length($p_str);
343                $chunk = '';
344            }
345            else {
346                if ($n == 0) {
347                    $result = "$result$chunk;";
348                }
349                else {
350                    $result .= "$LF $param\*$n\*=$chunk;";
351                }
352                $ll_len = length($p_str) + $w_len;
353                $chunk = $w;
354            }
355            $ascii = 1 if $w_len == 1;
356            $n = $nn;
357            $nn++;
358        }
359        else {
360            $chunk .= $w;
361            $ll_len += $w_len;
362        }
363    }
364    if ($ascii) {
365        if ($chunk eq '') {
366            chop($result);
367        }
368        else {
369            $result .= "$LF $param\*$n=$chunk";
370        }
371    }
372    else {
373        $result .= "$LF $param\*$n\*=$chunk";
374    }
375    return $trailing_crlf ? $result . $trailing_crlf : $result;
376}
377
378
379sub param_enc_jp {
380    my $param = shift;
381    my $value = shift;
382    my $sps = shift;
383    my $trailing_crlf = shift;
384    my $quote = shift;
385
386    my $result;
387    my $ll_len;
388
389    our $HEAD;
390
391    $value = "\"$value\"" if $quote;
392    my $vstr = $value;
393
394    $value =~ s/([^\w!#\$&\+-\.\^`\{\|}~])/
395        sprintf("%%%X",ord($1))/egox;
396
397    $result = "$sps$param\*=$HEAD";
398    if (length($result) + length($value) <= $BPL) {
399        $result .= $value;
400        return $trailing_crlf ? $result . $trailing_crlf : $result;
401    }
402
403    my $n = 0;
404    my $nn = 1;
405    my $p_str;
406    my $ascii = 1;
407
408    my $ee_str = '%1B%28B';
409    my $ee_len = 7;
410
411    my $vstr_len = length($vstr);
412
413    my $k_in = 0; # ascii: 0, zen: 1 or 2, han: 9
414    my $k_in_bak = 0;
415    my $ec;
416    my ($w, $w_len) = ('', 0);
417    my ($chunk, $chunk_len) = ('', 0);
418    my ($w1, $w1_bak);
419    my $enc_len;
420
421    $vstr =~ s/\e\(B$//;
422    $result = "$sps$param\*0\*=$HEAD";
423    $ll_len = length($result);
424
425    while ($vstr =~ /\e(..)|./g) {
426        $ec = $1;
427        $value =~ /(%1B(?:%..|.)(?:%..|.)|(?:%..|.))/g;
428        $w1 = $1;
429        $w .= $w1;
430        if (defined $ec) {
431            $w1_bak = $w1;
432            if ($ec eq '(B') {
433                $k_in = 0;
434            }
435            elsif ($ec eq '$B') {
436                $k_in = 1;
437            }
438            else {
439                $k_in = 9;
440            }
441            next;
442        }
443        else {
444            if ($k_in == 1) {
445                $k_in = 2;
446                next;
447            }
448            elsif ($k_in == 2) {
449                $k_in = 1;
450            }
451        }
452        $w_len = length($w);
453        $enc_len = $w_len + ($k_in ? $ee_len : 0);
454        $ascii = 0 if $w_len > 1;
455
456        # 1 is ';'
457        if ($ll_len + $enc_len + 1 > $BPL) {
458            $p_str = " $param\*$nn\*=";
459            if ($ascii) {
460                if ($n == 0) {
461                    $result = "$sps$param\*0=$HEAD$chunk$w;";
462                }
463                else {
464                    $result .= "$LF $param\*$n=$chunk$w;";
465                }
466                $ll_len = length($p_str);
467                $chunk = '';
468            }
469            else {
470                if ($k_in_bak) {
471                    $chunk .= $ee_str;
472                    if ($k_in) {
473                        if ($k_in_bak == $k_in) {
474                            $w = $w1_bak . $w;
475                            $w_len += length($w1_bak);
476                        }
477                    }
478                    else {
479                        $w = $w1;
480                        $w_len = length($w1);
481                    }
482                }
483                if ($n == 0) {
484                    $result = "$result$chunk;";
485                }
486                else {
487                    $result .= "$LF $param\*$n\*=$chunk;";
488                }
489                $ll_len = length($p_str) + $w_len;
490                $chunk = $w;
491            }
492            $ascii = 1 if $w_len == 1;
493            $n = $nn;
494            $nn++;
495        }
496        else {
497            $chunk .= $w;
498            $ll_len += $w_len;
499        }
500        $k_in_bak = $k_in;
501        $w = '';
502        $w_len = 0;
503    }
504    if ($ascii) {
505        if ($chunk eq '') {
506            chop($result);
507        }
508        else {
509            $result .= "$LF $param\*$n=$chunk";
510        }
511    }
512    else {
513        $chunk .= $ee_str if $k_in_bak;
514        $result .= "$LF $param\*$n\*=$chunk";
515    }
516    return $trailing_crlf ? $result . $trailing_crlf : $result;
517}
518
519
520sub mime_deco_param {
521    my $str = shift;
522    if ((!defined $str) || $str eq '') {
523        return ('') x 5 if wantarray;
524        return '';
525    }
526
527    my ($trailing_crlf) = ($str =~ /(\x0d?\x0a|\x0d)$/);
528    $str =~ tr/\n\r//d;
529    if ($str =~ /^\s*$/) {
530        return ($trailing_crlf ? $str . $trailing_crlf : $str,
531                ('') x 4) if wantarray;
532        return $trailing_crlf ? $str . $trailing_crlf : $str;
533    }
534
535    $str =~ s/^(\s*)//; # leading whitespace
536    my $sps = $1;
537
538    my $result = '';
539    my ($param, $value, $charset, $lang);
540    my ($param0, $value0, $charset0, $lang0) = ('') x 4;
541
542    my $bq_on = shift; # "B/Q" decode ON/OFF
543    $bq_on = 1 unless defined $bq_on;
544
545    if ($bq_on) {
546	$str =~ /([^=]*)=\s*"(.*?[^\\])"\s*/;
547	($param, $value) = ($1, $2);
548
549	my $reg_ew =
550	    qr{^
551	       =\?
552	       ([-0-9A-Za-z_]+)                         # charset
553	       (?:\*([A-Za-z]{1,8}                      # language
554		       (?:-[A-Za-z]{1,8})*))?           # (RFC 2231 section 5)
555	       \?
556	       (?:
557		   [Bb]\?([0-9A-Za-z\+\/]+={0,2})\?=    # "B" encoding
558	       |
559		   [Qq]\?([\x21-\x3e\x40-\x7e]+)\?=     # "Q" encoding
560	       )}x;
561
562	if ($value and $value =~ qr/$reg_ew(\s|$)/) { # "B" or "Q"
563	    ($charset0, $lang0) = ($1, $2);
564	    $lang0 = '' unless defined $lang0;
565	    $param0 = $param;
566
567	    require MIME::Base64;
568	    MIME::Base64->import();
569
570	    require MIME::QuotedPrint;
571	    MIME::QuotedPrint->import();
572
573	    my ($b_enc, $q_enc);
574
575	    for my $w (split /\s+/, $value) {
576		if ($w =~ qr/$reg_ew$/o) {
577		    ($charset, $lang, $b_enc, $q_enc) = ($1, $2, $3, $4);
578		    if (defined $q_enc) {
579			$q_enc =~ tr/_/ /;
580			$value0 .= decode_qp($q_enc);
581		    }
582		    else {
583			$value0 .= decode_base64($b_enc);
584		    }
585		}
586	    }
587	    if (lc($charset0) eq
588		'iso-2022-jp') { # remove redundant ESC sequences
589		$value0 =~ s/(\e..)([^\e]+)\e\(B(?=\1)/$1$2\n/g;
590		$value0 =~ s/\n\e..//g;
591		$value0 =~ s/\e\(B(\e..)/$1/g;
592	    }
593	    $result = "$sps$param0=\"$value0\"";
594	    if (wantarray) {
595		return ($trailing_crlf ? $result . $trailing_crlf : $result,
596			$param0, $charset0, $lang0, $value0);
597	    }
598	    return $trailing_crlf ? $result . $trailing_crlf : $result;
599	}
600    }
601
602    my ($param0_init, $cs_init, $quote) = (0) x 3;
603    my %params;
604
605    while ($str =~ /([^=]*)=(\s*".*?[^\\]";?|\S*)\s*/g) {
606        ($param, $value) = ($1, $2);
607        $value =~ s/;$//;
608        if ($value =~ s/^\s*"(.*)"$/$1/) {
609            $quote = 1;
610        }
611        if ($param =~ s/\*$//) {
612            if (!$cs_init) {
613                if ($value =~ /^(.*?)'(.*?)'(.*)/) {
614                    ($charset0, $lang0, $value) = ($1, $2, $3);
615                }
616                $cs_init = 1;
617            }
618            $value =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
619        }
620        if (!$param0_init) {
621            $param =~ s/\*0$//;
622            $param0 = $param;
623            $param0_init = 1;
624        }
625        $params{$param} = $value;
626    }
627
628    my $n = keys %params;
629
630    $result = ($n == 0) ? "$sps$str" : "$sps$param0=";
631    $value0 = $params{$param0};
632    $value0 = '' unless defined $value0;
633    if ($n > 1) {
634        for (my $i = 1; $i < $n; $i++) {
635            $value = $params{$param0 . "\*$i"};
636            $value0 .= $value if defined $value;
637        }
638    }
639    if (lc($charset0) eq 'iso-2022-jp') { # remove redundant ESC sequences
640        $value0 =~ s/(\e..)([^\e]+)\e\(B(?=\1)/$1$2\n/g;
641        $value0 =~ s/\n\e..//g;
642        $value0 =~ s/\e\(B(\e..)/$1/g;
643    }
644    $result .= ($quote ? "\"$value0\"" : $value0);
645    if (wantarray) {
646        if (!$cs_init and $quote) {
647            $value0 =~ s/\\(.)/$1/g;
648        }
649        return ($trailing_crlf ? $result . $trailing_crlf : $result,
650                $param0, $charset0, $lang0, $value0);
651    }
652    return $trailing_crlf ? $result . $trailing_crlf : $result;
653}
654
6551;
656