1##---------------------------------------------------------------------------##
2##  File:
3##	$Id: readmail.pl,v 2.45 2014/04/22 02:33:10 ehood Exp $
4##  Author:
5##      Earl Hood       mhonarc AT mhonarc DOT org
6##  Description:
7##      Library defining routines to parse MIME e-mail messages.  The
8##	library is designed so it may be reused for other e-mail
9##	filtering programs.  The default behavior is for mail->html
10##	filtering, however, the defaults can be overridden to allow
11##	mail->whatever filtering.
12##
13##	Public Functions:
14##	----------------
15##	$data 		= MAILdecode_1522_str($str);
16##	($data, @files) = MAILread_body($fields_hash_ref, $body_ref);
17##	$hash_ref 	= MAILread_file_header($handle);
18##	$hash_ref 	= MAILread_header($mesg_str_ref);
19##
20##	($disp, $file, $raw, $html_name)  =
21##			  MAILhead_get_disposition($fields_hash_ref, $do_html);
22##	$boolean 	= MAILis_excluded($content_type);
23##	$parm_hash_ref  = MAILparse_parameter_str($header_field);
24##	$parm_hash_ref  = MAILparse_parameter_str($header_field, 1);
25##
26##---------------------------------------------------------------------------##
27##    Copyright (C) 1996-2003	Earl Hood, mhonarc AT mhonarc DOT org
28##
29##    This program is free software; you can redistribute it and/or modify
30##    it under the terms of the GNU General Public License as published by
31##    the Free Software Foundation; either version 2 of the License, or
32##    (at your option) any later version.
33##
34##    This program is distributed in the hope that it will be useful,
35##    but WITHOUT ANY WARRANTY; without even the implied warranty of
36##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37##    GNU General Public License for more details.
38##
39##    You should have received a copy of the GNU General Public License
40##    along with this program; if not, write to the Free Software
41##    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
42##    02111-1307, USA
43##---------------------------------------------------------------------------##
44
45package readmail;
46
47no warnings qw(deprecated);
48
49$DEBUG = 0;
50
51###############################################################################
52##	Private Globals							     ##
53###############################################################################
54
55#my $Url	  = '(\w+://|\w+:)';
56my @_MIMEAltPrefs = ();
57my %_MIMEAltPrefs = ();
58
59###############################################################################
60##	Public Globals							     ##
61###############################################################################
62
63##---------------------------------------------------------------------------##
64##	Constants
65##
66
67## String for matching the start of a URL: It seems unnecessary to
68#  try to recognize all valid schemes, so we use a simplier regex.
69#  Keep the old one around just in case we need to resurrect it.
70#$UrlRxStr	  =
71#    '(?:(?:https?|ftp|afs|wais|telnet|ldap|gopher|z39\.50[rs]|vemmi|imap|'.
72#          'nfs|acap|rtspu?|tip|pop|sip|(?:soap|xmlrpc)\.beeps?|go|ipp|'.
73#          'tftp)://|'.
74#       'news:(?://)?|'.
75#       '(?:nntp|mid|cid|mailto|prospero|data|service|tel|fax|modem|h\.323):)';
76$UrlRxStr = '(?:(?:https?|ftp|ldap|gopher)://|news:(?://)?|(?:nntp|mailto):)';
77
78##  Constants for use as second argument to MAILdecode_1522_str().
79sub JUST_DECODE() { 1; }
80sub DECODE_ALL()  { 2; }
81sub TEXT_ENCODE() { 3; }
82
83##---------------------------------------------------------------------------##
84
85##---------------------------------------------------------------------------##
86##	Scalar Variables
87##
88
89##  Flag if message headers are decoded in the parse header routines:
90##  MAILread_header, MAILread_file_header.  This only affects the
91##  values of the field hash created.  The original header is still
92##  passed as the return value.
93##
94##  The only 1522 data that will be decoded is data encoded with charsets
95##  set to "-decode-" in the %MIMECharSetConverters hash.
96
97$DecodeHeader = 0;
98
99##---------------------------------------------------------------------------##
100##	Variables for holding information related to the functions used
101##	for processing MIME data.  Variables are defined in the scope
102##	of main.
103
104## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
105##  %MIMEDecoders is the associative array for storing functions for
106##  decoding mime data.
107##
108##	Keys => content-transfer-encoding (should be in lowercase)
109##	Values => function name.
110##
111##  Function names should be qualified with package identifiers.
112##  Functions are called as follows:
113##
114##	$decoded_data = &function($data);
115##
116##  The value "as-is" may be used to allow the data to be passed without
117##  decoding to the registered filter, but the decoded flag will be
118##  set to true.
119
120%MIMEDecoders = ()
121    unless %MIMEDecoders;
122%MIMEDecodersSrc = ()
123    unless %MIMEDecodersSrc;
124
125## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
126##  %MIMECharSetConverters is the associative array for storing functions
127##  for converting data in a particular charset to a destination format
128##  within the MAILdecode_1522_str() routine. Destination format is defined
129##  by the function.
130##
131##	Keys => charset (should be in lowercase)
132##	Values => function name.
133##
134##  Charset values take on a form like "iso-8859-1" or "us-ascii".
135##              NOTE: Values need to be in lower-case.
136##
137##  The key "default" can be assigned to define the default function
138##  to call if no explicit charset function is defined.
139##
140##  The key "plain" can be set to a function for decoded regular text not
141##  encoded in 1522 format.
142##
143##  Function names are name of defined perl function and should be
144##  qualified with package identifiers. Functions are called as follows:
145##
146##	$converted_data = &function($data, $charset);
147##
148##  A function called "-decode-" implies that the data should be
149##  decoded, but no converter is to be invoked.
150##
151##  A function called "-ignore-" implies that the data should
152##  not be decoded and converted.  Ie.  For the specified charset,
153##  the encoding will stay unprocessed and passed back in the return
154##  string.
155
156%MIMECharSetConverters = ()
157    unless %MIMECharSetConverters;
158%MIMECharSetConvertersSrc = ()
159    unless %MIMECharSetConvertersSrc;
160
161## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
162##  %MIMEFilters is the associative array for storing functions that
163##  process various content-types in the MAILread_body routine.
164##
165##	Keys => Content-type (should be in lowercase)
166##	Values => function name.
167##
168##  Function names should be qualified with package identifiers.
169##  Functions are called as follows:
170##
171##	$converted_data = &function($header, *parsed_header_assoc_array,
172##				    *message_data, $decoded_flag,
173##				    $optional_filter_arguments);
174##
175##  Functions can be registered for base types.  Example:
176##
177##	$MIMEFilters{"image/*"} = "mypackage'function";
178##
179##  IMPORTANT: If a function specified is not defined when MAILread_body
180##  tries to invoke it, MAILread_body will silently ignore.  Make sure
181##  that all functions are defined before invoking MAILread_body.
182
183%MIMEFilters = ()
184    unless %MIMEFilters;
185%MIMEFiltersSrc = ()
186    unless %MIMEFiltersSrc;
187
188## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189##  %MIMEFiltersArgs is the associative array for storing any optional
190##  arguments to functions specified in MIMEFilters (the
191##  $optional_filter_arguments from above).
192##
193##	Keys => Either one of the following: content-type, function name.
194##	Values => Argument string (format determined by filter function).
195##
196##  Arguments listed for a content-type will be used over arguments
197##  listed for a function if both are applicable.
198
199%MIMEFiltersArgs = ()
200    unless %MIMEFiltersArgs;
201
202## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
203##  %MIMEExcs is the associative array listing which data types
204##  should be auto-excluded during parsing:
205##
206##	Keys => content-type, or base-type
207##	Values => <should evaluate to a true expression>
208
209%MIMEExcs = ()
210    unless %MIMEExcs;
211
212## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
213##  %MIMEIncs is the associative array listing which data types
214##  should be auto-included during parsing:
215##
216##	Keys => content-type, or base-type
217##	Values => <should evaluate to a true expression>
218##
219##  If there are any keys defined in %MIMEIncs, then any content-type
220##  not in the hash is automatically excluded.  I.e.  %MIMEIncs can
221##  be used to only allow a well-defined set of content-types.
222
223%MIMEIncs = ()
224    unless %MIMEIncs;
225
226## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
227##  %MIMECharsetAliases is a mapping of charset names to charset names.
228##  The MAILset_charset_aliases() routine should be used to set the
229##  values of this hash.
230##
231##	Keys => charset name
232##	Values => real charset name
233##
234%MIMECharsetAliases = ()
235    unless %MIMECharsetAliases;
236
237##---------------------------------------------------------------------------
238##	Text entity-related variables
239##
240
241##  Default character set if none specified.
242$TextDefCharset = 'us-ascii'
243    unless defined($TextDefCharset);
244
245##  Destination character encoding for text entities.
246$TextEncode = undef
247    unless defined($TextEncode);
248##  Text encoding function.
249$TextEncoderFunc = undef
250    unless defined($TextEncoderFunc);
251##  Text encoding function source file.
252$TextEncoderSrc = undef
253    unless defined($TextEncoderSrc);
254
255##  Prefilter function
256$TextPreFilter = undef
257    unless defined($TextPreFilter);
258
259##---------------------------------------------------------------------------
260##	Variables holding functions for generating processed output
261##	for MAILread_body().  The default functions generate HTML.
262##	However, the variables can be set to functions that generate
263##	a different type of output.
264##
265##	$FormatHeaderFunc has no default, and must be defined by
266##	the calling program.
267##
268##  Function that returns a message when failing to process a part of a
269##  a multipart message.  The content-type of the message is passed
270##  as an argument.
271
272$CantProcessPartFunc = \&cantProcessPart
273    unless (defined($CantProcessPartFunc));
274
275##  Function that returns a message when a part is excluded via %MIMEExcs.
276
277$ExcludedPartFunc = \&excludedPart
278    unless (defined($ExcludedPartFunc));
279
280##  Function that returns a message when a part is unrecognized in a
281##  multipart/alternative message.  I.e. No part could be processed.
282##  No arguments are passed to function.
283
284$UnrecognizedAltPartFunc = \&unrecognizedAltPart
285    unless (defined($UnrecognizedAltPartFunc));
286
287##  Function that returns a string to go before any data generated generating
288##  from processing an embedded message (message/rfc822 or message/news).
289##  No arguments are passed to function.
290
291$BeginEmbeddedMesgFunc = \&beginEmbeddedMesg
292    unless (defined($BeginEmbeddedMesgFunc));
293
294##  Function that returns a string to go after any data generated generating
295##  from processing an embedded message (message/rfc822 or message/news).
296##  No arguments are passed to function.
297
298$EndEmbeddedMesgFunc = \&endEmbeddedMesg
299    unless (defined($EndEmbeddedMesgFunc));
300
301##  Function to return a string that is a result of the functions
302##  processing of a message header.  The function is called for
303##  embedded messages (message/rfc822 and message/news).  The
304##  arguments to function are:
305##
306##   1.	Pointer to associative array representing message header
307##	contents with the keys as field labels (in all lower-case)
308##	and the values as field values of the labels.
309##
310##   2. Pointer to associative array mapping lower-case keys of
311##	argument 1 to original case.
312##
313##  Prototype: $return_data = &function(*fields, *lower2orig_fields);
314
315$FormatHeaderFunc = undef
316    unless (defined($FormatHeaderFunc));
317
318###############################################################################
319##	Public Routines							     ##
320###############################################################################
321##---------------------------------------------------------------------------##
322##	MAILdecode_1522_str() decodes a string encoded in a format
323##	specified by RFC 1522.  The decoded string is the return value.
324##	If no MIMECharSetConverters is registered for a charset, then
325##	the decoded data is returned "as-is".
326##
327##	Usage:
328##
329##	    $ret_data = &MAILdecode_1522_str($str, $dec_flag);
330##
331##	If $dec_flag is JUST_DECODE, $str will be decoded for only
332##	the charsets specified as "-decode-".  If it is equal to
333##	DECODE_ALL, all encoded data is decoded without any conversion.
334##	If $dec_flag is TEXT_ENCODE, then all data will be converted
335##	and encoded according to $readmail::TextEncode and
336##	$readmail::TextEncoderFunc.
337##
338sub MAILdecode_1522_str {
339    my $str      = shift;
340    my $dec_flag = shift || 0;
341    my $ret      = ('');
342    my ($charset, $encoding,     $pos,      $dec,
343        $charcnv, $real_charset, $plaincnv, $plain_real_charset,
344        $strtxt,  $str_before
345    );
346
347    # Get text encoder
348    my $encfunc = undef;
349    if ($dec_flag == TEXT_ENCODE) {
350        $encfunc = load_textencoder();
351        if (!defined($encfunc)) {
352            $encfunc = undef unless defined($encfunc);
353            $dec_flag = 0;
354        }
355    }
356
357    # Get plain converter
358    ($plaincnv, $plain_real_charset) = MAILload_charset_converter('plain');
359    $plain_real_charset = 'us-ascii' if $plain_real_charset eq 'plain';
360
361    # Decode string
362    my $firsttime = 1;
363    while ($str =~ /(=\?([^?]+)\?(.)\?([^?]*)\?=)/g) {
364        # Grab components
365        $pos = pos($str);
366        ($charset, $encoding, $strtxt) = (lc($2), lc($3), $4);
367        $str_before = substr($str, 0, $pos - length($1));
368        substr($str, 0, $pos) = '';
369        pos($str) = 0;
370
371        # Check encoding method and grab proper decoder
372        if ($encoding eq 'b') {
373            $dec = &load_decoder('base64');
374        } else {
375            $dec = &load_decoder('quoted-printable');
376        }
377
378        # Convert before (unencoded) text
379        if ($firsttime || $str_before =~ /\S/) {
380            if (defined($encfunc)) {    # encoding
381                &$encfunc(\$str_before, $plain_real_charset, $TextEncode);
382                $ret .= $str_before;
383            } elsif ($dec_flag) {       # ignore if just decode
384                $ret .= $str_before;
385            } elsif (defined(&$plaincnv)) {    # decode and convert
386                $ret .= &$plaincnv($str_before, $plain_real_charset);
387            } else {                           # ignore
388                $ret .= $str_before;
389            }
390        }
391        $firsttime = 0;
392
393        # Encoding text
394        if (defined($encfunc)) {
395            $real_charset =
396                  $MIMECharsetAliases{$charset}
397                ? $MIMECharsetAliases{$charset}
398                : $charset;
399            $strtxt =~ s/_/ /g;
400            $strtxt = &$dec($strtxt);
401            $strtxt =~ s/[\r\n]/ /g;
402            &$encfunc(\$strtxt, $real_charset, $TextEncode);
403            $ret .= $strtxt;
404
405            # Regular conversion
406        } else {
407            if ($dec_flag == DECODE_ALL) {
408                $charcnv = '-decode-';
409            } else {
410                ($charcnv, $real_charset) =
411                    MAILload_charset_converter($charset);
412            }
413            # Decode only
414            if ($charcnv eq '-decode-') {
415                $strtxt =~ s/_/ /g;
416                $strtxt = &$dec($strtxt);
417                $strtxt =~ s/[\r\n]/ /g;
418                $ret .= $strtxt;
419
420                # Ignore if just decoding
421            } elsif ($dec_flag) {
422                $ret .= "=?$charset?$encoding?$strtxt?=";
423
424                # Decode and convert
425            } elsif (defined(&$charcnv)) {
426                $strtxt =~ s/_/ /g;
427                $strtxt = &$dec($strtxt);
428                $strtxt =~ s/[\r\n]/ /g;
429                $ret .= &$charcnv($strtxt, $real_charset);
430
431                # Fallback is to ignore
432            } else {
433                $ret .= "=?$charset?$encoding?$strtxt?=";
434            }
435        }
436    }
437
438    # Convert left-over unencoded text
439    if (defined($encfunc)) {    # encoding
440        &$encfunc(\$str, $plain_real_charset, $TextEncode);
441        $ret .= $str;
442    } elsif ($dec_flag) {       # ignore if just decode
443        $ret .= $str;
444    } elsif (defined(&$plaincnv)) {    # decode and convert
445        $ret .= &$plaincnv($str, $plain_real_charset);
446    } else {                           # ignore
447        $ret .= $str;
448    }
449
450    $ret;
451}
452
453##---------------------------------------------------------------------------##
454##	MAILread_body() parses a MIME message body.
455##	Usage:
456##	  ($data, @files) =
457##	      MAILread_body($fields_hash_ref, $body_data_ref);
458##
459##	Parameters:
460##	  $fields_hash_ref
461##		      A reference to hash of message/part header
462##		      fields.  Keys are field names in lowercase
463##		      and values are array references containing the
464##		      field values.  For example, to obtain the
465##		      content-type, if defined, one would do:
466##
467##			$fields_hash_ref->{'content-type'}[0]
468##
469##		      Values for a fields are stored in arrays since
470##		      duplication of fields are possible.  For example,
471##		      the Received: header field is typically repeated
472##		      multiple times.  For fields that only occur once,
473##		      then array for the field will only contain one
474##		      item.
475##
476##	  $body_data_ref
477##		      Reference to body data.  It is okay for the
478##		      filter to modify the text in-place.
479##
480##	Return:
481##	  The first item in the return list is the text that should
482##	  printed to the message page.	Any other items in the return
483##	  list are derived filenames created.
484##
485##	See Also:
486##	  MAILread_header(), MAILread_file_header()
487##
488sub MAILread_body {
489    my ($fields,    # Parsed header hash
490        $body,      # Reference to raw body text
491        $inaltArg
492    ) = @_;         # Flag if in multipart/alternative
493
494    my ($type,  $subtype, $boundary, $content,
495        $ctype, $pos,     $encoding, $decodefunc,
496        $args,  $part,    $uribase
497    );
498    my (@parts) = ();
499    my (@files) = ();
500    my (@array) = ();
501    my $ret     = "";
502
503    ## Get type/subtype
504    if (defined($fields->{'content-type'})) {
505        $content = $fields->{'content-type'}->[0];
506    }
507    $content = 'text/plain' unless $content;
508    ($ctype) = $content =~ m%^\s*([\w\-\./]+)%;    # Extract content-type
509    $ctype =~ tr/A-Z/a-z/;                         # Convert to lowercase
510    if ($ctype =~ m%/%) {    # Extract base and sub types
511        ($type, $subtype) = split(/\//, $ctype, 2);
512    } elsif ($ctype =~ /text/i) {
513        $ctype   = 'text/plain';
514        $type    = 'text';
515        $subtype = 'plain';
516    } else {
517        $type = $subtype = '';
518    }
519    $fields->{'x-mha-content-type'} = $ctype;
520
521    ## Check if type is excluded
522    if (MAILis_excluded($ctype)) {
523        return (&$ExcludedPartFunc($ctype));
524    }
525
526    ## Get entity URI base
527    if (defined($fields->{'content-base'})
528        && ($uribase = $fields->{'content-base'}[0])) {
529        $uribase =~ s/['"\s]//g;
530    } elsif (defined($fields->{'content-location'})
531        && ($uribase = $fields->{'content-location'}[0])) {
532        $uribase =~ s/['"\s]//g;
533    }
534    $uribase =~ s|(.*/).*|$1| if $uribase;
535
536    ## Load content-type filter
537    if (   (!defined($filter = &load_filter($ctype)) || !defined(&$filter))
538        && (!defined($filter = &load_filter("$type/*")) || !defined(&$filter))
539        && (   !$inaltArg
540            && (!defined($filter = &load_filter('*/*')) || !defined(&$filter))
541            && $ctype !~ m^\bmessage/(?:rfc822|news)\b^i
542            && $type !~ /\bmultipart\b/)
543    ) {
544        warn qq|Warning: Unrecognized content-type, "$ctype", |,
545            qq|assuming "application/octet-stream"\n|;
546        $filter = &load_filter('application/octet-stream');
547    }
548
549    ## Check for filter arguments
550    $args = get_filter_args($ctype, "$type/*", $filter);
551
552    ## Check encoding
553    if (defined($fields->{'content-transfer-encoding'})) {
554        $encoding = lc $fields->{'content-transfer-encoding'}[0];
555        $encoding =~ s/\s//g;
556        $decodefunc = &load_decoder($encoding);
557    } else {
558        $encoding   = undef;
559        $decodefunc = undef;
560    }
561    my $decoded = 0;
562    if (defined($decodefunc) && defined(&$decodefunc)) {
563        $$body   = &$decodefunc($$body);
564        $decoded = 1;
565    } elsif ($decodefunc =~ /as-is/i) {
566        $decoded = 1;
567    }
568
569    ## Convert text encoding
570    if ($type eq 'text') {
571        my $charset = extract_charset($content, $subtype, $body);
572        $fields->{'x-mha-charset'} = $charset;
573        my $textfunc = load_textencoder();
574        if (defined($textfunc)) {
575            if ($DEBUG) {
576                print STDERR "MAILread_body: have textfunc: $textfunc\n";
577            }
578            $fields->{'x-mha-charset'} = $TextEncode
579                if defined(&$textfunc($body, $charset, $TextEncode));
580        }
581        if (defined($TextPreFilter) && defined(&$TextPreFilter)) {
582            if ($DEBUG) {
583                print STDERR 'MAILread_body: have TextPreFilter: ',
584                    $TextPreFilter, "\n";
585            }
586            &$TextPreFilter($fields, $body);
587        }
588    } else {
589        # define x-mha-charset in case text filter associated with
590        # a non-text type
591        $fields->{'x-mha-charset'} = $TextDefCharset;
592    }
593    if ($DEBUG) {
594        print STDERR 'MAILread_body: charset: ', $fields->{'x-mha-charset'},
595            "\n";
596    }
597
598    ## A filter is defined for given content-type
599    if ($filter && defined(&$filter)) {
600        @array = &$filter($fields, $body, $decoded, $args);
601        ## Setup return variables
602        $ret = shift @array;    # Return string
603        push(@files, @array);   # Derived files
604
605        ## No filter defined for given content-type
606    } else {
607        ## If multipart, recursively process each part
608        if ($type =~ /\bmultipart\b/i) {
609            local (%Cid) = () unless scalar(caller) eq 'readmail';
610            my ($isalt) = $subtype =~ /\balternative\b/i;
611
612            ## Get boundary
613            $boundary = "";
614            if ($content =~ m/\bboundary\s*=\s*"([^"]*)"/i) {
615                $boundary = $1;
616            } else {
617                ($boundary) = $content =~ m/\bboundary\s*=\s*([^\s;]+)/i;
618                $boundary =~ s/;$//;    # chop ';' if grabbed
619            }
620
621            ## If boundary defined, split body into parts
622            if ($boundary =~ /\S/) {
623                my $found     = 0;
624                my $have_end  = 0;
625                my $start_pos = 0;
626                substr($$body,    0, 0) = "\n";
627                substr($boundary, 0, 0) = "\n--";
628                my $blen = length($boundary);
629                my $bchkstr;
630
631                while (($pos = index($$body, $boundary, $start_pos)) > -1) {
632                    # have to check for case when boundary is a substring
633                    #	of another boundary, yuck!
634                    $bchkstr = substr($$body, $pos + $blen, 2);
635                    unless ($bchkstr =~ /\A\r?\n/ || $bchkstr =~ /\A--/) {
636                        # incomplete match, continue search
637                        $start_pos = $pos + $blen;
638                        next;
639                    }
640                    $found = 1;
641                    push(@parts, substr($$body, 0, $pos));
642                    $parts[$#parts] =~ s/^\r//;
643
644                    # prune out part data just grabbed
645                    substr($$body, 0, $pos + $blen) = "";
646
647                    # check if hit end
648                    if ($$body =~ /\A--/) {
649                        $have_end = 1;
650                        last;
651                    }
652
653                    # remove EOL at the beginning
654                    $$body =~ s/\A\r?\n//;
655                    $start_pos = 0;
656                }
657                if ($found) {
658                    if (!$have_end) {
659                        warn qq/Warning: No end boundary delimiter found in /,
660                            qq/message body\n/;
661                        push(@parts, $$body);
662                        $parts[$#parts] =~ s/^\r//;
663                        $$body = "";
664                    } else {
665                        # discard front-matter
666                        shift(@parts);
667                    }
668                } else {
669                    # no boundary separators in message!
670                    warn qq/Warning: No boundary delimiters found in /,
671                        qq/multipart body\n/;
672                    if ($$body =~ m/\A\n[\w\-]+:\s/) {
673                        # remove \n added above if part looks like it has
674                        # headers.  we keep if it does not to avoid body
675                        # data being parsed as a header below.
676                        substr($$body, 0, 1) = "";
677                    }
678                    push(@parts, $$body);
679                }
680
681                ## Else treat body as one part
682            } else {
683                @parts = ($$body);
684            }
685
686            ## Process parts
687            my (@entity) = ();
688            my ($cid, $href, $pctype);
689            my %alt_exc        = ();
690            my $have_alt_prefs = $isalt && scalar(@_MIMEAltPrefs);
691            my $partno         = 0;
692            @parts = \(@parts);
693            while (defined($part = shift(@parts))) {
694                $href = {};
695                $partfields = $href->{'fields'} = (MAILread_header($part))[0];
696                $href->{'body'}                    = $part;
697                $href->{'filtered'}                = 0;
698                $partfields->{'x-mha-part-number'} = ++$partno;
699                $pctype =
700                    extract_ctype($partfields->{'content-type'}, $ctype);
701
702                ## check alternative preferences
703                if ($have_alt_prefs) {
704                    next if ($alt_exc{$pctype});
705                    my $pos = $_MIMEAltPrefs{$pctype};
706                    if (defined($pos)) {
707                        for (++$pos; $pos <= $#_MIMEAltPrefs; ++$pos) {
708                            $alt_exc{$_MIMEAltPrefs[$pos]} = 1;
709                        }
710                    }
711                }
712
713                ## only add to %Cid if not excluded
714                if (!&MAILis_excluded($pctype)) {
715                    if ($isalt) {
716                        unshift(@entity, $href);
717                    } else {
718                        push(@entity, $href);
719                    }
720                    $cid = $partfields->{'content-id'}[0]
721                        || $partfields->{'message-id'}[0];
722                    if (defined($cid)) {
723                        $cid =~ s/[\s<>]//g;
724                        $Cid{"cid:$cid"} = $href if $cid =~ /\S/;
725                    }
726                    $cid = undef;
727                    if (defined($partfields->{'content-location'})
728                        && ($cid = $partfields->{'content-location'}[0])) {
729                        my $partbase = $uribase;
730                        $cid =~ s/['"\s]//g;
731                        if (defined($partfields->{'content-base'})) {
732                            $partbase = $partfields->{'content-base'}[0];
733                        }
734                        $cid = apply_base_url($partbase, $cid);
735                        if ($cid =~ /\S/ && !$Cid{$cid}) {
736                            $Cid{$cid} = $href;
737                        }
738                    }
739                    if ($cid) {
740                        $partfields->{'content-location'} = [$cid];
741                    } elsif (!defined($partfields->{'content-base'})) {
742                        $partfields->{'content-base'} = [$uribase];
743                    }
744
745                    $partfields->{'x-mha-parent-header'} = $fields;
746                }
747            }
748
749            my ($entity);
750        ENTITY: foreach $entity (@entity) {
751                if ($entity->{'filtered'}) {
752                    next ENTITY;
753                }
754
755                ## If content-type not defined for part, then determine
756                ## content-type based upon multipart subtype.
757                $partfields = $entity->{'fields'};
758                if (!defined($partfields->{'content-type'})) {
759                    $partfields->{'content-type'} = [
760                        ($subtype =~ /digest/)
761                        ? 'message/rfc822'
762                        : 'text/plain'
763                    ];
764                }
765
766                ## Process part
767                @array =
768                    MAILread_body($partfields, $entity->{'body'}, $isalt);
769
770                ## Only use last filterable part in alternate
771                if ($isalt) {
772                    $ret = shift @array;
773                    if ($ret) {
774                        push(@files, @array);
775                        $entity->{'filtered'} = 1;
776                        last ENTITY;
777                    }
778                } else {
779                    if (!$array[0]) {
780                        $array[0] =
781                            &$CantProcessPartFunc(
782                            $partfields->{'content-type'}[0]);
783                    }
784                    $ret .= shift @array;
785                }
786                push(@files, @array);
787                $entity->{'filtered'} = 1;
788            }
789
790            ## Check if multipart/alternative, and no success
791            if (!$ret && $isalt) {
792                warn
793                    qq|Warning: No recognized part in multipart/alternative; |,
794                    qq|will try to decode last part\n|;
795                $entity = $entity[0];
796                @array =
797                    &MAILread_body($entity->{'fields'}, $entity->{'body'});
798                $ret = shift @array;
799                if ($ret) {
800                    push(@files, @array);
801                } else {
802                    $ret = &$UnrecognizedAltPartFunc();
803                }
804            }
805
806            ## Aid garbage collection(?)
807            foreach $entity (@entity) {
808                delete $entity->{'fields'}{'x-mha-parent-header'};
809            }
810
811            ## Else if message/rfc822 or message/news
812        } elsif ($ctype =~ m^\bmessage/(?:rfc822|news)\b^i) {
813            $partfields = (MAILread_header($body))[0];
814
815            # propogate parent and part no to message/* header
816            $partfields->{'x-mha-parent-header'} =
817                $fields->{'x-mha-parent-header'};
818            $partfields->{'x-mha-part-number'} =
819                $fields->{'x-mha-part-number'};
820
821            $ret = &$BeginEmbeddedMesgFunc();
822            if ($FormatHeaderFunc && defined(&$FormatHeaderFunc)) {
823                $ret .= &$FormatHeaderFunc($partfields);
824            } else {
825                warn "Warning: readmail: No message header formatting ",
826                    "function defined\n";
827            }
828            @array = MAILread_body($partfields, $body);
829            $ret .= shift @array
830                || &$CantProcessPartFunc($partfields->{'content-type'}[0]
831                    || 'text/plain');
832            $ret .= &$EndEmbeddedMesgFunc();
833
834            push(@files, @array);
835            delete $partfields->{'x-mha-parent-header'};
836
837            ## Else cannot handle type
838        } else {
839            $ret = '';
840        }
841    }
842
843    ($ret, @files);
844}
845
846##---------------------------------------------------------------------------##
847##	MAILread_header reads (and strips) a mail message header from the
848##	variable $mesg.  $mesg is a reference to the mail message in
849##	a string.
850##
851##	$fields is a reference to a hash to put field values indexed by
852##	field labels that have been converted to all lowercase.
853##	Field values are array references to the values
854##	for each field.
855##
856##	($fields_hash_ref, $header_txt) = MAILread_header($mesg_data);
857##
858sub MAILread_header {
859    my $mesg = shift;
860
861    my $fields = {};
862    my $label  = '';
863    my $header = '';
864    my ($value, $tmp, $pos);
865
866    ## Read a line at a time.
867    for ($pos = 0; $pos >= 0;) {
868        $pos = index($$mesg, "\n");
869        if ($pos >= 0) {
870            $tmp = substr($$mesg, 0, $pos + 1);
871            substr($$mesg, 0, $pos + 1) = "";
872            last if $tmp =~ /^\r?$/;    # Done if blank line
873
874            $header .= $tmp;
875            chop $tmp;                  # Chop newline
876            $tmp =~ s/\r$//;            # Delete <CR> characters
877        } else {
878            $tmp = $$mesg;
879            $header .= $tmp;
880        }
881
882        ## Check for continuation of a field
883        if ($tmp =~ /^\s/) {
884            $fields->{$label}[-1] .= $tmp if $label;
885            next;
886        }
887
888        ## Separate head from field text
889        if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
890            ($label, $value) = (lc($1), $2);
891            if ($fields->{$label}) {
892                push(@{$fields->{$label}}, $value);
893            } else {
894                $fields->{$label} = [$value];
895            }
896        }
897    }
898    decode_1522_fields($fields);
899    ($fields, $header);
900}
901
902##---------------------------------------------------------------------------##
903##	MAILread_file_header reads (and strips) a mail message header
904##	from the filehandle $handle.  The routine behaves in the
905##	same manner as MAILread_header;
906##
907##	($fields_hash, $header_text) = MAILread_file_header($filehandle);
908##
909sub MAILread_file_header {
910    my $handle = shift;
911    my $encode = shift;
912
913    my $label  = '';
914    my $header = '';
915    my $fields = {};
916    local $/ = "\n";
917
918    my ($value, $tmp);
919    while (($tmp = <$handle>) !~ /^[\r]?$/) {
920        ## Save raw text
921        $header .= $tmp;
922
923        ## Delete eol characters
924        $tmp =~ s/[\r\n]//g;
925
926        ## Check for continuation of a field
927        if ($tmp =~ /^\s/) {
928            $fields->{$label}[-1] .= $tmp if $label;
929            next;
930        }
931
932        ## Separate head from field text
933        if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) {
934            ($label, $value) = (lc($1), $2);
935            if (defined($fields->{$label})) {
936                push(@{$fields->{$label}}, $value);
937            } else {
938                $fields->{$label} = [$value];
939            }
940        }
941    }
942    decode_1522_fields($fields);
943    ($fields, $header);
944}
945
946##---------------------------------------------------------------------------##
947##	MAILis_excluded() checks if specified content-type has been
948##	specified to be excluded.
949##
950sub MAILis_excluded {
951    my $ctype = lc($_[0]) || 'text/plain';
952    my $btype = undef;
953
954    $ctype =~ s/\/x-/\//;
955    if ($ctype =~ m|([^/]+)/|) {
956        $btype = $1;
957    }
958
959MIMEINCS: {
960        # Treat multipart special: It is always included unless present
961        # in MIMEExcs.
962        last MIMEINCS if ($ctype =~ /^multipart\b/);
963
964        if (%MIMEIncs) {
965            if ($MIMEIncs{$ctype} || (defined($btype) && $MIMEIncs{$btype})) {
966                last MIMEINCS;
967            } else {
968                return 1;
969            }
970        }
971    }
972    if ($MIMEExcs{$ctype} || (defined($btype) && $MIMEExcs{$btype})) {
973        return 1;
974    }
975    0;
976}
977
978##---------------------------------------------------------------------------##
979##	MAILhead_get_disposition gets the content disposition and
980##	filename from $hfields, $hfields is a hash produced by the
981##	MAILread_header and MAILread_file_header routines.
982##
983sub MAILhead_get_disposition {
984    my $hfields = shift;
985    my $do_html = shift;
986
987    my ($disp, $filename, $raw) = ('', '', '');
988    my $html_name = undef;
989    local ($_);
990
991    if (defined($hfields->{'content-disposition'})
992        && ($_ = $hfields->{'content-disposition'}->[0])) {
993        ($disp) = /^\s*([^\s;]+)/;
994        if (/filename="([^"]+)"/i) {
995            $raw = $1;
996        } elsif (/filename=(\S+)/i) {
997            ($raw = $1) =~ s/;\s*$//g;
998        }
999    }
1000    if (!$raw && defined($_ = $hfields->{'content-type'}[0])) {
1001        if (/name="([^"]+)"/i) {
1002            $raw = $1;
1003        } elsif (/name=(\S+)/i) {
1004            ($raw = $1) =~ s/;\s*$//g;
1005        }
1006    }
1007    $filename = MAILdecode_1522_str($raw, DECODE_ALL);
1008    $filename =~ s%.*[/\\:]%%;    # Remove any path component
1009    $filename =~ s/^\s+//;        # Remove leading whitespace
1010    $filename =~ s/\s+$//;        # Remove trailing whitespace
1011    $filename =~ tr/\0-\40\t\n\r?:*"'<>|\177-\377/_/;
1012    # Remove questionable/invalid characters
1013
1014    # Only provide HTML display version if requested
1015    $html_name = MAILdecode_1522_str($raw) if $do_html;
1016
1017    ($disp, $filename, $raw, $html_name);
1018}
1019
1020##---------------------------------------------------------------------------##
1021##	MAILparse_parameter_str(): parses a parameter/value string.
1022##	Support for RFC 2184 extensions exists.  The $hasmain flag tells
1023##	the method if there is an intial main value for the sting.  For
1024##      example:
1025##
1026##          text/plain; charset=us-ascii
1027##      ----^^^^^^^^^^
1028##
1029##      The "text/plain" part is not a parameter/value pair, but having
1030##      an initial value is common among some header fields that can have
1031##      parameter/value pairs (egs: Content-Type, Content-Disposition).
1032##
1033##	Return Value:
1034##	    Reference to a hash.  Each key is the attribute name.
1035##	    The special key, 'x-main', is the main value if the
1036##	    $hasmain flag is set.
1037##
1038##	    Each hash value is a hash reference with three keys:
1039##	    'charset', 'lang', 'value'.  'charset' and 'lang' may be
1040##	    undef if character set or language information is not
1041##	    specified.
1042##
1043##	Example Usage:
1044##
1045##	    $content_type_field = 'text/plain; charset=us-ascii';
1046##	    $parms = MAILparse_parameter_str($content_type_field, 1);
1047##	    $ctype = $parms->{'x-main'};
1048##	    $mesg_body_charset = $parms->{'charset'}{'value'};
1049##
1050sub MAILparse_parameter_str {
1051    my $str     = shift;    # Input string
1052    my $hasmain = shift;    # Flag if there is a main value to extract
1053
1054    require MHonArc::RFC822;
1055
1056    my $parm = {};
1057    my @toks = MHonArc::RFC822::uncomment($str);
1058    my ($tok, $name, $value, $charset, $lang, $isPart);
1059
1060    $parm->{'x-main'} = shift @toks if $hasmain;
1061
1062    ## Loop thru token list
1063    while ($tok = shift @toks) {
1064        next if $tok eq ";";
1065        ($name, $value) = split(/=/, $tok, 2);
1066        ## Check if charset/lang specified
1067        if ($name =~ s/\*$//) {
1068            if ($value =~ s/^([^']*)'([^']*)'//) {
1069                ($charset, $lang) = ($1, $2);
1070            } else {
1071                ($charset, $lang) = (undef, undef);
1072            }
1073        }
1074        ## Check if parameter is only part
1075        if ($name =~ s/\*(\d+)$//) {
1076            $isPart = 1;
1077        } else {
1078            $isPart = 0;
1079        }
1080        ## Set values for parameter
1081        $name = lc $name;
1082        $parm->{$name} = {} unless defined($parm->{$name});
1083        $parm->{$name}{'charset'} = $charset;
1084        $parm->{$name}{'lang'}    = $lang;
1085        ## Check if value is next token
1086        if ($value eq "") {
1087            ## If value next token, than it must be quoted
1088            $value = shift @toks;
1089            $value =~ s/^"//;
1090            $value =~ s/"$//;
1091            $value =~ s/\\//g;
1092        }
1093        if ($isPart && defined($parm->{$name}{'vlist'})) {
1094            push(@{$parm->{$name}{'vlist'}}, $value);
1095        } else {
1096            $parm->{$name}{'vlist'} = [$value];
1097        }
1098    }
1099
1100    ## Now we loop thru each parameter and define the final values from
1101    ## the parts
1102    foreach $name (keys %$parm) {
1103        next if $name eq 'x-main';
1104        $parm->{$name}{'value'} = join("", @{$parm->{$name}{'vlist'}});
1105    }
1106
1107    $parm;
1108}
1109
1110##---------------------------------------------------------------------------##
1111##	MAILset_alternative_prefs() is used to set content-type
1112##	preferences for multipart/alternative entities.  The list
1113##	specified will supercede the prefered format as denoted by
1114##	the ording of parts in the entity.
1115##
1116##	A content-type listed earlier in the array will be prefered
1117##	over one later.  For example:
1118##
1119##	  MAILset_alternative_prefs('text/plain', 'text/html');
1120##
1121##	States that if a multipart/alternative entity contains a
1122##	text/plain part and a text/html part, the text/plain part will
1123##	be prefered over the text/html part.
1124##
1125sub MAILset_alternative_prefs {
1126    @_MIMEAltPrefs = map {lc} @_;
1127    %_MIMEAltPrefs = ();
1128    my $i = 0;
1129    my $ctype;
1130    foreach $ctype (@_MIMEAltPrefs) {
1131        $_MIMEAltPrefs{$ctype} = $i++;
1132    }
1133}
1134
1135##---------------------------------------------------------------------------##
1136##	MAILset_charset_aliases() is used to define name aliases for
1137##	charset names.
1138##
1139##	Example usage:
1140##	  MAILset_charset_aliases( {
1141##	    'iso-8859-1' =>  [ 'latin1', 'iso_8859_1', '8859-1' ],
1142##	    'iso-8859-15' => [ 'latin9', 'iso_8859_15', '8859-15' ],
1143##	  }, $override );
1144##
1145sub MAILset_charset_aliases {
1146    my $map      = shift;
1147    my $override = shift;
1148
1149    %MIMECharsetAliases = () if $override;
1150    my ($charset, $aliases, $alias);
1151    while (($charset, $aliases) = each(%$map)) {
1152        $charset = lc $charset;
1153        foreach $alias (@$aliases) {
1154            $MIMECharsetAliases{lc $alias} = $charset;
1155        }
1156    }
1157}
1158
1159##---------------------------------------------------------------------------##
1160##	MAILload_charset_converter() loads the charset converter function
1161##	associated with given charset name.
1162##
1163##	Example usage:
1164##	  ($func, $real_charset) = MAILload_charset_converter($charset);
1165##
1166##	$func is the reference to the converter function, which may be
1167##	undef.  $real_charset is the real charset name that should be
1168##	used when invoking the function.
1169##
1170sub MAILload_charset_converter {
1171    my $charset = lc shift;
1172    $charset = $MIMECharsetAliases{$charset} if $MIMECharsetAliases{$charset};
1173    my $func = load_charset($charset);
1174    if (!defined($func) || !defined(&$func)) {
1175        $func = load_charset('default');
1176    }
1177    ($func, $charset);
1178}
1179
1180###############################################################################
1181##	Private Routines
1182###############################################################################
1183
1184##---------------------------------------------------------------------------##
1185##	Default function for unable to process a part of a multipart
1186##	message.
1187##
1188sub cantProcessPart {
1189    my ($ctype) = $_[0];
1190    warn "Warning: Could not process part with given Content-Type: ",
1191        "$ctype\n";
1192    "<br><tt>&lt;&lt;&lt; $ctype: Unrecognized &gt;&gt;&gt;</tt><br>\n";
1193}
1194##---------------------------------------------------------------------------##
1195##	Default function returning message for content-types excluded.
1196##
1197sub excludedPart {
1198    my ($ctype) = $_[0];
1199    "<br><tt>&lt;&lt;&lt; $ctype: EXCLUDED &gt;&gt;&gt;</tt><br>\n";
1200}
1201##---------------------------------------------------------------------------##
1202##	Default function for unrecognizeable part in multipart/alternative.
1203##
1204sub unrecognizedAltPart {
1205    warn "Warning: No recognizable part in multipart/alternative\n";
1206    "<br><tt>&lt;&lt;&lt; multipart/alternative: "
1207        . "No recognizable part &gt;&gt;&gt;</tt><br>\n";
1208}
1209##---------------------------------------------------------------------------##
1210##	Default function for beggining of embedded message
1211##	(ie message/rfc822 or message/news).
1212##
1213sub beginEmbeddedMesg {
1214    qq|<blockquote><small>---&nbsp;<i>Begin&nbsp;Message</i>&nbsp;---</small>\n|;
1215}
1216##---------------------------------------------------------------------------##
1217##	Default function for end of embedded message
1218##	(ie message/rfc822 or message/news).
1219##
1220sub endEmbeddedMesg {
1221    qq|<br><small>---&nbsp;<i>End Message</i>&nbsp;---</small></blockquote>\n|;
1222}
1223
1224##---------------------------------------------------------------------------##
1225
1226sub load_charset {
1227    require $MIMECharSetConvertersSrc{$_[0]}
1228        if defined($MIMECharSetConvertersSrc{$_[0]})
1229        && $MIMECharSetConvertersSrc{$_[0]};
1230    $MIMECharSetConverters{$_[0]};
1231}
1232
1233sub load_decoder {
1234    my $enc = lc shift;
1235    $enc =~ s/\s//;
1236    require $MIMEDecodersSrc{$enc}
1237        if defined($MIMEDecodersSrc{$enc})
1238        && $MIMEDecodersSrc{$enc};
1239    $MIMEDecoders{$enc};
1240}
1241
1242sub load_filter {
1243    require $MIMEFiltersSrc{$_[0]}
1244        if defined($MIMEFiltersSrc{$_[0]})
1245        && $MIMEFiltersSrc{$_[0]};
1246    $MIMEFilters{$_[0]};
1247}
1248
1249sub get_filter_args {
1250    my $args = '';
1251    my $s;
1252    foreach $s (@_) {
1253        next unless defined $s;
1254        $args = $MIMEFiltersArgs{$s};
1255        last if defined($args) && ($args ne '');
1256    }
1257    $args;
1258}
1259
1260sub load_textencoder {
1261    return undef unless $TextEncode;
1262TRY: {
1263        if (!defined($TextEncoderFunc)) {
1264            last TRY;
1265        }
1266        if (defined(&$TextEncoderFunc)) {
1267            return $TextEncoderFunc;
1268        }
1269        if (!defined($TextEncoderSrc)) {
1270            last TRY;
1271        }
1272        require $TextEncoderSrc;
1273        if (defined(&$TextEncoderFunc)) {
1274            return $TextEncoderFunc;
1275        }
1276    }
1277    warn qq/Warning: Unable to load text encode for "$TextEncode"\n/;
1278    $TextEncode      = undef;
1279    $TextEncoderFunc = undef;
1280    $TextEncoderSrc  = undef;
1281}
1282
1283##---------------------------------------------------------------------------##
1284##	extract_ctype() extracts the content-type specification from
1285##	the beginning of given string.
1286##
1287sub extract_ctype {
1288    if (   !defined($_[0])
1289        || (ref($_[0]) && ($_[0][0] !~ /\S/))
1290        || ($_[0] !~ /\S/)) {
1291        return 'message/rfc822'
1292            if (defined($_[1]) && ($_[1] eq 'multipart/digest'));
1293        return 'text/plain';
1294    }
1295    if (ref($_[0])) {
1296        $_[0][0] =~ m|^\s*([\w\-\./]+)|;
1297        return lc($1);
1298    }
1299    $_[0] =~ m|^\s*([\w\-\./]+)|;
1300    lc($1);
1301}
1302
1303##---------------------------------------------------------------------------##
1304##	apply_base_url(): Convert a relative URL to a full URL with
1305##	specific base;
1306##
1307sub apply_base_url {
1308    my $b = shift;    # Base URL
1309    my $u = shift;    # URL to apply base to
1310
1311    ## If no base, nothing to do
1312    return $u if !defined($b) || $b !~ /\S/;
1313
1314    ## If absolute URL or scroll link; do nothing
1315    $u =~ s/^\s+//;
1316    if ($u =~ /^$UrlRxStr/o || $u =~ m/^#/) {
1317        return $u;
1318    }
1319
1320    ## Check if base URL allows relative resolution
1321    my ($host_part, $scheme);
1322    if ($b =~ s{^((https?|ftp|file|nfs|acap|tftp)://[\w\-:\d.\@\%=~&]+)/?}{})
1323    {
1324        $host_part = $1;
1325        $scheme    = lc $2;
1326    } else {
1327        warn qq/Warning: Invalid base url, "$b" to apply to "$u"\n/;
1328        return $u;
1329    }
1330
1331    ## If "/---", just use hostname:port of base.
1332    if ($u =~ /^\//) {
1333        return $host_part . $u;
1334    }
1335
1336    ## Clean up base URL
1337SCHEME: {
1338        if ($scheme eq 'http' || $scheme eq 'https' || $scheme eq 'acap') {
1339            $b =~ s/\?.*$//;
1340            last SCHEME;
1341        }
1342        if ($scheme eq 'ftp') {
1343            $b =~ s/;type=.$//;
1344            last SCHEME;
1345        }
1346        if ($scheme eq 'tftp') {
1347            $b =~ s/;mode=\w+$//;
1348            last SCHEME;
1349        }
1350    }
1351    $b =~ s/\/$//;    # strip any trailing '/' (we add it back later)
1352
1353    ## "./---" or "../---": Need to remove and adjust base accordingly.
1354    my $cnt = 0;
1355    while ($u =~ s|^(\.{1,2})/||) { ++$cnt if length($1) == 2; }
1356
1357    if ($b eq '') {
1358        # base is just host
1359        return join('/', $host_part, $u);
1360    }
1361    if ($cnt > 0) {
1362        # trim path
1363        my @a = split(/\//, $b);
1364        if ($cnt <= scalar(@a)) {
1365            splice(@a, -$cnt);
1366            return join('/', $host_part, @a, $u);
1367        }
1368        # invalid relative path, tries to go past root
1369        return join('/', $host_part, $u);
1370
1371    }
1372    return join('/', $host_part, $b, $u);
1373}
1374
1375##---------------------------------------------------------------------------##
1376
1377sub extract_charset {
1378    my $content = shift;             # Content-type string of entity
1379    my $subtype = shift;             # Text sub-type
1380    my $body    = shift;             # Reference to entity text
1381    my $charset = $TextDefCharset;
1382
1383    if ($content =~ /\bcharset\s*=\s*([^\s;]+)/i) {
1384        $charset = lc $1;
1385        $charset =~ s/['";\s]//g;
1386    }
1387
1388    # If HTML, check <meta http-equiv=content-type> tag since it
1389    # can be different than what is specified in the entity header.
1390    if (($subtype eq 'html' || $subtype eq 'x-html')
1391        && ($$body =~ m/(<meta\s+http-equiv\s*=\s*['"]?
1392		     content-type\b[^>]*>)/xi
1393        )
1394    ) {
1395        my $meta = $1;
1396        if ($meta =~ m/\bcharset\s*=\s*['"]?([\w\.\-]+)/i) {
1397            $charset = lc $1;
1398        }
1399    }
1400    $charset = $MIMECharsetAliases{$charset}
1401        if $MIMECharsetAliases{$charset};
1402
1403    # If us-ascii, but 8-bit chars in body, we change to iso-8859-1
1404    if ($charset eq 'us-ascii') {
1405        $charset = 'iso-8859-1' if $$body =~ /[\x80-\xFF]/;
1406    }
1407    $charset;
1408}
1409
1410##---------------------------------------------------------------------------##
1411##	gen_full_part_number creates a full part number of an entity
1412##	from the given entity header.
1413##
1414sub gen_full_part_number {
1415    my $fields = shift;
1416    my @number = ();
1417    while (defined($fields->{'x-mha-parent-header'})) {
1418        unshift(@number, ($fields->{'x-mha-part-number'} || '1'));
1419        $fields = $fields->{'x-mha-parent-header'};
1420    }
1421    if (!scalar(@number)) {
1422        return $fields->{'x-mha-part-number'} || '1';
1423    }
1424    join('.', @number);
1425}
1426
1427##---------------------------------------------------------------------------##
1428
1429sub decode_1522_fields {
1430    my $fields  = shift;
1431    my $encfunc = load_textencoder();
1432    my $mode =
1433        defined($encfunc) ? TEXT_ENCODE : ($DecodeHeader ? JUST_DECODE : -1);
1434    if ($mode == -1) {
1435        return $fields;
1436    }
1437    my ($label, $value, $v);
1438    while (($label, $value) = each %$fields) {
1439        foreach $v (@$value) {
1440            $v = &MAILdecode_1522_str($v, $mode);
1441        }
1442    }
1443    $fields;
1444}
1445
1446##---------------------------------------------------------------------------##
1447
1448sub dump_header {
1449    my $fh     = shift;
1450    my $fields = shift;
1451    my ($key, $a, $value);
1452    foreach $key (sort keys %$fields) {
1453        $a = $fields->{$key};
1454        if (ref($a)) {
1455            foreach $value (@$a) {
1456                print $fh "$key: $value\n";
1457            }
1458        } else {
1459            print $fh "$key: $a\n";
1460        }
1461    }
1462}
1463
1464##---------------------------------------------------------------------------##
14651;    # for require
1466