1##---------------------------------------------------------------------------## 2## File: 3## $Id: readmail.pl,v 2.45 2014/04/22 02:33:10 ehood Exp $ 4## Author: 5## Earl Hood mhonarc AT mhonarc DOT org 6## Description: 7## Library defining routines to parse MIME e-mail messages. The 8## library is designed so it may be reused for other e-mail 9## filtering programs. The default behavior is for mail->html 10## filtering, however, the defaults can be overridden to allow 11## mail->whatever filtering. 12## 13## Public Functions: 14## ---------------- 15## $data = MAILdecode_1522_str($str); 16## ($data, @files) = MAILread_body($fields_hash_ref, $body_ref); 17## $hash_ref = MAILread_file_header($handle); 18## $hash_ref = MAILread_header($mesg_str_ref); 19## 20## ($disp, $file, $raw, $html_name) = 21## MAILhead_get_disposition($fields_hash_ref, $do_html); 22## $boolean = MAILis_excluded($content_type); 23## $parm_hash_ref = MAILparse_parameter_str($header_field); 24## $parm_hash_ref = MAILparse_parameter_str($header_field, 1); 25## 26##---------------------------------------------------------------------------## 27## Copyright (C) 1996-2003 Earl Hood, mhonarc AT mhonarc DOT org 28## 29## This program is free software; you can redistribute it and/or modify 30## it under the terms of the GNU General Public License as published by 31## the Free Software Foundation; either version 2 of the License, or 32## (at your option) any later version. 33## 34## This program is distributed in the hope that it will be useful, 35## but WITHOUT ANY WARRANTY; without even the implied warranty of 36## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 37## GNU General Public License for more details. 38## 39## You should have received a copy of the GNU General Public License 40## along with this program; if not, write to the Free Software 41## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 42## 02111-1307, USA 43##---------------------------------------------------------------------------## 44 45package readmail; 46 47no warnings qw(deprecated); 48 49$DEBUG = 0; 50 51############################################################################### 52## Private Globals ## 53############################################################################### 54 55#my $Url = '(\w+://|\w+:)'; 56my @_MIMEAltPrefs = (); 57my %_MIMEAltPrefs = (); 58 59############################################################################### 60## Public Globals ## 61############################################################################### 62 63##---------------------------------------------------------------------------## 64## Constants 65## 66 67## String for matching the start of a URL: It seems unnecessary to 68# try to recognize all valid schemes, so we use a simplier regex. 69# Keep the old one around just in case we need to resurrect it. 70#$UrlRxStr = 71# '(?:(?:https?|ftp|afs|wais|telnet|ldap|gopher|z39\.50[rs]|vemmi|imap|'. 72# 'nfs|acap|rtspu?|tip|pop|sip|(?:soap|xmlrpc)\.beeps?|go|ipp|'. 73# 'tftp)://|'. 74# 'news:(?://)?|'. 75# '(?:nntp|mid|cid|mailto|prospero|data|service|tel|fax|modem|h\.323):)'; 76$UrlRxStr = '(?:(?:https?|ftp|ldap|gopher)://|news:(?://)?|(?:nntp|mailto):)'; 77 78## Constants for use as second argument to MAILdecode_1522_str(). 79sub JUST_DECODE() { 1; } 80sub DECODE_ALL() { 2; } 81sub TEXT_ENCODE() { 3; } 82 83##---------------------------------------------------------------------------## 84 85##---------------------------------------------------------------------------## 86## Scalar Variables 87## 88 89## Flag if message headers are decoded in the parse header routines: 90## MAILread_header, MAILread_file_header. This only affects the 91## values of the field hash created. The original header is still 92## passed as the return value. 93## 94## The only 1522 data that will be decoded is data encoded with charsets 95## set to "-decode-" in the %MIMECharSetConverters hash. 96 97$DecodeHeader = 0; 98 99##---------------------------------------------------------------------------## 100## Variables for holding information related to the functions used 101## for processing MIME data. Variables are defined in the scope 102## of main. 103 104## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 105## %MIMEDecoders is the associative array for storing functions for 106## decoding mime data. 107## 108## Keys => content-transfer-encoding (should be in lowercase) 109## Values => function name. 110## 111## Function names should be qualified with package identifiers. 112## Functions are called as follows: 113## 114## $decoded_data = &function($data); 115## 116## The value "as-is" may be used to allow the data to be passed without 117## decoding to the registered filter, but the decoded flag will be 118## set to true. 119 120%MIMEDecoders = () 121 unless %MIMEDecoders; 122%MIMEDecodersSrc = () 123 unless %MIMEDecodersSrc; 124 125## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 126## %MIMECharSetConverters is the associative array for storing functions 127## for converting data in a particular charset to a destination format 128## within the MAILdecode_1522_str() routine. Destination format is defined 129## by the function. 130## 131## Keys => charset (should be in lowercase) 132## Values => function name. 133## 134## Charset values take on a form like "iso-8859-1" or "us-ascii". 135## NOTE: Values need to be in lower-case. 136## 137## The key "default" can be assigned to define the default function 138## to call if no explicit charset function is defined. 139## 140## The key "plain" can be set to a function for decoded regular text not 141## encoded in 1522 format. 142## 143## Function names are name of defined perl function and should be 144## qualified with package identifiers. Functions are called as follows: 145## 146## $converted_data = &function($data, $charset); 147## 148## A function called "-decode-" implies that the data should be 149## decoded, but no converter is to be invoked. 150## 151## A function called "-ignore-" implies that the data should 152## not be decoded and converted. Ie. For the specified charset, 153## the encoding will stay unprocessed and passed back in the return 154## string. 155 156%MIMECharSetConverters = () 157 unless %MIMECharSetConverters; 158%MIMECharSetConvertersSrc = () 159 unless %MIMECharSetConvertersSrc; 160 161## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 162## %MIMEFilters is the associative array for storing functions that 163## process various content-types in the MAILread_body routine. 164## 165## Keys => Content-type (should be in lowercase) 166## Values => function name. 167## 168## Function names should be qualified with package identifiers. 169## Functions are called as follows: 170## 171## $converted_data = &function($header, *parsed_header_assoc_array, 172## *message_data, $decoded_flag, 173## $optional_filter_arguments); 174## 175## Functions can be registered for base types. Example: 176## 177## $MIMEFilters{"image/*"} = "mypackage'function"; 178## 179## IMPORTANT: If a function specified is not defined when MAILread_body 180## tries to invoke it, MAILread_body will silently ignore. Make sure 181## that all functions are defined before invoking MAILread_body. 182 183%MIMEFilters = () 184 unless %MIMEFilters; 185%MIMEFiltersSrc = () 186 unless %MIMEFiltersSrc; 187 188## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 189## %MIMEFiltersArgs is the associative array for storing any optional 190## arguments to functions specified in MIMEFilters (the 191## $optional_filter_arguments from above). 192## 193## Keys => Either one of the following: content-type, function name. 194## Values => Argument string (format determined by filter function). 195## 196## Arguments listed for a content-type will be used over arguments 197## listed for a function if both are applicable. 198 199%MIMEFiltersArgs = () 200 unless %MIMEFiltersArgs; 201 202## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 203## %MIMEExcs is the associative array listing which data types 204## should be auto-excluded during parsing: 205## 206## Keys => content-type, or base-type 207## Values => <should evaluate to a true expression> 208 209%MIMEExcs = () 210 unless %MIMEExcs; 211 212## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 213## %MIMEIncs is the associative array listing which data types 214## should be auto-included during parsing: 215## 216## Keys => content-type, or base-type 217## Values => <should evaluate to a true expression> 218## 219## If there are any keys defined in %MIMEIncs, then any content-type 220## not in the hash is automatically excluded. I.e. %MIMEIncs can 221## be used to only allow a well-defined set of content-types. 222 223%MIMEIncs = () 224 unless %MIMEIncs; 225 226## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 227## %MIMECharsetAliases is a mapping of charset names to charset names. 228## The MAILset_charset_aliases() routine should be used to set the 229## values of this hash. 230## 231## Keys => charset name 232## Values => real charset name 233## 234%MIMECharsetAliases = () 235 unless %MIMECharsetAliases; 236 237##--------------------------------------------------------------------------- 238## Text entity-related variables 239## 240 241## Default character set if none specified. 242$TextDefCharset = 'us-ascii' 243 unless defined($TextDefCharset); 244 245## Destination character encoding for text entities. 246$TextEncode = undef 247 unless defined($TextEncode); 248## Text encoding function. 249$TextEncoderFunc = undef 250 unless defined($TextEncoderFunc); 251## Text encoding function source file. 252$TextEncoderSrc = undef 253 unless defined($TextEncoderSrc); 254 255## Prefilter function 256$TextPreFilter = undef 257 unless defined($TextPreFilter); 258 259##--------------------------------------------------------------------------- 260## Variables holding functions for generating processed output 261## for MAILread_body(). The default functions generate HTML. 262## However, the variables can be set to functions that generate 263## a different type of output. 264## 265## $FormatHeaderFunc has no default, and must be defined by 266## the calling program. 267## 268## Function that returns a message when failing to process a part of a 269## a multipart message. The content-type of the message is passed 270## as an argument. 271 272$CantProcessPartFunc = \&cantProcessPart 273 unless (defined($CantProcessPartFunc)); 274 275## Function that returns a message when a part is excluded via %MIMEExcs. 276 277$ExcludedPartFunc = \&excludedPart 278 unless (defined($ExcludedPartFunc)); 279 280## Function that returns a message when a part is unrecognized in a 281## multipart/alternative message. I.e. No part could be processed. 282## No arguments are passed to function. 283 284$UnrecognizedAltPartFunc = \&unrecognizedAltPart 285 unless (defined($UnrecognizedAltPartFunc)); 286 287## Function that returns a string to go before any data generated generating 288## from processing an embedded message (message/rfc822 or message/news). 289## No arguments are passed to function. 290 291$BeginEmbeddedMesgFunc = \&beginEmbeddedMesg 292 unless (defined($BeginEmbeddedMesgFunc)); 293 294## Function that returns a string to go after any data generated generating 295## from processing an embedded message (message/rfc822 or message/news). 296## No arguments are passed to function. 297 298$EndEmbeddedMesgFunc = \&endEmbeddedMesg 299 unless (defined($EndEmbeddedMesgFunc)); 300 301## Function to return a string that is a result of the functions 302## processing of a message header. The function is called for 303## embedded messages (message/rfc822 and message/news). The 304## arguments to function are: 305## 306## 1. Pointer to associative array representing message header 307## contents with the keys as field labels (in all lower-case) 308## and the values as field values of the labels. 309## 310## 2. Pointer to associative array mapping lower-case keys of 311## argument 1 to original case. 312## 313## Prototype: $return_data = &function(*fields, *lower2orig_fields); 314 315$FormatHeaderFunc = undef 316 unless (defined($FormatHeaderFunc)); 317 318############################################################################### 319## Public Routines ## 320############################################################################### 321##---------------------------------------------------------------------------## 322## MAILdecode_1522_str() decodes a string encoded in a format 323## specified by RFC 1522. The decoded string is the return value. 324## If no MIMECharSetConverters is registered for a charset, then 325## the decoded data is returned "as-is". 326## 327## Usage: 328## 329## $ret_data = &MAILdecode_1522_str($str, $dec_flag); 330## 331## If $dec_flag is JUST_DECODE, $str will be decoded for only 332## the charsets specified as "-decode-". If it is equal to 333## DECODE_ALL, all encoded data is decoded without any conversion. 334## If $dec_flag is TEXT_ENCODE, then all data will be converted 335## and encoded according to $readmail::TextEncode and 336## $readmail::TextEncoderFunc. 337## 338sub MAILdecode_1522_str { 339 my $str = shift; 340 my $dec_flag = shift || 0; 341 my $ret = (''); 342 my ($charset, $encoding, $pos, $dec, 343 $charcnv, $real_charset, $plaincnv, $plain_real_charset, 344 $strtxt, $str_before 345 ); 346 347 # Get text encoder 348 my $encfunc = undef; 349 if ($dec_flag == TEXT_ENCODE) { 350 $encfunc = load_textencoder(); 351 if (!defined($encfunc)) { 352 $encfunc = undef unless defined($encfunc); 353 $dec_flag = 0; 354 } 355 } 356 357 # Get plain converter 358 ($plaincnv, $plain_real_charset) = MAILload_charset_converter('plain'); 359 $plain_real_charset = 'us-ascii' if $plain_real_charset eq 'plain'; 360 361 # Decode string 362 my $firsttime = 1; 363 while ($str =~ /(=\?([^?]+)\?(.)\?([^?]*)\?=)/g) { 364 # Grab components 365 $pos = pos($str); 366 ($charset, $encoding, $strtxt) = (lc($2), lc($3), $4); 367 $str_before = substr($str, 0, $pos - length($1)); 368 substr($str, 0, $pos) = ''; 369 pos($str) = 0; 370 371 # Check encoding method and grab proper decoder 372 if ($encoding eq 'b') { 373 $dec = &load_decoder('base64'); 374 } else { 375 $dec = &load_decoder('quoted-printable'); 376 } 377 378 # Convert before (unencoded) text 379 if ($firsttime || $str_before =~ /\S/) { 380 if (defined($encfunc)) { # encoding 381 &$encfunc(\$str_before, $plain_real_charset, $TextEncode); 382 $ret .= $str_before; 383 } elsif ($dec_flag) { # ignore if just decode 384 $ret .= $str_before; 385 } elsif (defined(&$plaincnv)) { # decode and convert 386 $ret .= &$plaincnv($str_before, $plain_real_charset); 387 } else { # ignore 388 $ret .= $str_before; 389 } 390 } 391 $firsttime = 0; 392 393 # Encoding text 394 if (defined($encfunc)) { 395 $real_charset = 396 $MIMECharsetAliases{$charset} 397 ? $MIMECharsetAliases{$charset} 398 : $charset; 399 $strtxt =~ s/_/ /g; 400 $strtxt = &$dec($strtxt); 401 $strtxt =~ s/[\r\n]/ /g; 402 &$encfunc(\$strtxt, $real_charset, $TextEncode); 403 $ret .= $strtxt; 404 405 # Regular conversion 406 } else { 407 if ($dec_flag == DECODE_ALL) { 408 $charcnv = '-decode-'; 409 } else { 410 ($charcnv, $real_charset) = 411 MAILload_charset_converter($charset); 412 } 413 # Decode only 414 if ($charcnv eq '-decode-') { 415 $strtxt =~ s/_/ /g; 416 $strtxt = &$dec($strtxt); 417 $strtxt =~ s/[\r\n]/ /g; 418 $ret .= $strtxt; 419 420 # Ignore if just decoding 421 } elsif ($dec_flag) { 422 $ret .= "=?$charset?$encoding?$strtxt?="; 423 424 # Decode and convert 425 } elsif (defined(&$charcnv)) { 426 $strtxt =~ s/_/ /g; 427 $strtxt = &$dec($strtxt); 428 $strtxt =~ s/[\r\n]/ /g; 429 $ret .= &$charcnv($strtxt, $real_charset); 430 431 # Fallback is to ignore 432 } else { 433 $ret .= "=?$charset?$encoding?$strtxt?="; 434 } 435 } 436 } 437 438 # Convert left-over unencoded text 439 if (defined($encfunc)) { # encoding 440 &$encfunc(\$str, $plain_real_charset, $TextEncode); 441 $ret .= $str; 442 } elsif ($dec_flag) { # ignore if just decode 443 $ret .= $str; 444 } elsif (defined(&$plaincnv)) { # decode and convert 445 $ret .= &$plaincnv($str, $plain_real_charset); 446 } else { # ignore 447 $ret .= $str; 448 } 449 450 $ret; 451} 452 453##---------------------------------------------------------------------------## 454## MAILread_body() parses a MIME message body. 455## Usage: 456## ($data, @files) = 457## MAILread_body($fields_hash_ref, $body_data_ref); 458## 459## Parameters: 460## $fields_hash_ref 461## A reference to hash of message/part header 462## fields. Keys are field names in lowercase 463## and values are array references containing the 464## field values. For example, to obtain the 465## content-type, if defined, one would do: 466## 467## $fields_hash_ref->{'content-type'}[0] 468## 469## Values for a fields are stored in arrays since 470## duplication of fields are possible. For example, 471## the Received: header field is typically repeated 472## multiple times. For fields that only occur once, 473## then array for the field will only contain one 474## item. 475## 476## $body_data_ref 477## Reference to body data. It is okay for the 478## filter to modify the text in-place. 479## 480## Return: 481## The first item in the return list is the text that should 482## printed to the message page. Any other items in the return 483## list are derived filenames created. 484## 485## See Also: 486## MAILread_header(), MAILread_file_header() 487## 488sub MAILread_body { 489 my ($fields, # Parsed header hash 490 $body, # Reference to raw body text 491 $inaltArg 492 ) = @_; # Flag if in multipart/alternative 493 494 my ($type, $subtype, $boundary, $content, 495 $ctype, $pos, $encoding, $decodefunc, 496 $args, $part, $uribase 497 ); 498 my (@parts) = (); 499 my (@files) = (); 500 my (@array) = (); 501 my $ret = ""; 502 503 ## Get type/subtype 504 if (defined($fields->{'content-type'})) { 505 $content = $fields->{'content-type'}->[0]; 506 } 507 $content = 'text/plain' unless $content; 508 ($ctype) = $content =~ m%^\s*([\w\-\./]+)%; # Extract content-type 509 $ctype =~ tr/A-Z/a-z/; # Convert to lowercase 510 if ($ctype =~ m%/%) { # Extract base and sub types 511 ($type, $subtype) = split(/\//, $ctype, 2); 512 } elsif ($ctype =~ /text/i) { 513 $ctype = 'text/plain'; 514 $type = 'text'; 515 $subtype = 'plain'; 516 } else { 517 $type = $subtype = ''; 518 } 519 $fields->{'x-mha-content-type'} = $ctype; 520 521 ## Check if type is excluded 522 if (MAILis_excluded($ctype)) { 523 return (&$ExcludedPartFunc($ctype)); 524 } 525 526 ## Get entity URI base 527 if (defined($fields->{'content-base'}) 528 && ($uribase = $fields->{'content-base'}[0])) { 529 $uribase =~ s/['"\s]//g; 530 } elsif (defined($fields->{'content-location'}) 531 && ($uribase = $fields->{'content-location'}[0])) { 532 $uribase =~ s/['"\s]//g; 533 } 534 $uribase =~ s|(.*/).*|$1| if $uribase; 535 536 ## Load content-type filter 537 if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) 538 && (!defined($filter = &load_filter("$type/*")) || !defined(&$filter)) 539 && ( !$inaltArg 540 && (!defined($filter = &load_filter('*/*')) || !defined(&$filter)) 541 && $ctype !~ m^\bmessage/(?:rfc822|news)\b^i 542 && $type !~ /\bmultipart\b/) 543 ) { 544 warn qq|Warning: Unrecognized content-type, "$ctype", |, 545 qq|assuming "application/octet-stream"\n|; 546 $filter = &load_filter('application/octet-stream'); 547 } 548 549 ## Check for filter arguments 550 $args = get_filter_args($ctype, "$type/*", $filter); 551 552 ## Check encoding 553 if (defined($fields->{'content-transfer-encoding'})) { 554 $encoding = lc $fields->{'content-transfer-encoding'}[0]; 555 $encoding =~ s/\s//g; 556 $decodefunc = &load_decoder($encoding); 557 } else { 558 $encoding = undef; 559 $decodefunc = undef; 560 } 561 my $decoded = 0; 562 if (defined($decodefunc) && defined(&$decodefunc)) { 563 $$body = &$decodefunc($$body); 564 $decoded = 1; 565 } elsif ($decodefunc =~ /as-is/i) { 566 $decoded = 1; 567 } 568 569 ## Convert text encoding 570 if ($type eq 'text') { 571 my $charset = extract_charset($content, $subtype, $body); 572 $fields->{'x-mha-charset'} = $charset; 573 my $textfunc = load_textencoder(); 574 if (defined($textfunc)) { 575 if ($DEBUG) { 576 print STDERR "MAILread_body: have textfunc: $textfunc\n"; 577 } 578 $fields->{'x-mha-charset'} = $TextEncode 579 if defined(&$textfunc($body, $charset, $TextEncode)); 580 } 581 if (defined($TextPreFilter) && defined(&$TextPreFilter)) { 582 if ($DEBUG) { 583 print STDERR 'MAILread_body: have TextPreFilter: ', 584 $TextPreFilter, "\n"; 585 } 586 &$TextPreFilter($fields, $body); 587 } 588 } else { 589 # define x-mha-charset in case text filter associated with 590 # a non-text type 591 $fields->{'x-mha-charset'} = $TextDefCharset; 592 } 593 if ($DEBUG) { 594 print STDERR 'MAILread_body: charset: ', $fields->{'x-mha-charset'}, 595 "\n"; 596 } 597 598 ## A filter is defined for given content-type 599 if ($filter && defined(&$filter)) { 600 @array = &$filter($fields, $body, $decoded, $args); 601 ## Setup return variables 602 $ret = shift @array; # Return string 603 push(@files, @array); # Derived files 604 605 ## No filter defined for given content-type 606 } else { 607 ## If multipart, recursively process each part 608 if ($type =~ /\bmultipart\b/i) { 609 local (%Cid) = () unless scalar(caller) eq 'readmail'; 610 my ($isalt) = $subtype =~ /\balternative\b/i; 611 612 ## Get boundary 613 $boundary = ""; 614 if ($content =~ m/\bboundary\s*=\s*"([^"]*)"/i) { 615 $boundary = $1; 616 } else { 617 ($boundary) = $content =~ m/\bboundary\s*=\s*([^\s;]+)/i; 618 $boundary =~ s/;$//; # chop ';' if grabbed 619 } 620 621 ## If boundary defined, split body into parts 622 if ($boundary =~ /\S/) { 623 my $found = 0; 624 my $have_end = 0; 625 my $start_pos = 0; 626 substr($$body, 0, 0) = "\n"; 627 substr($boundary, 0, 0) = "\n--"; 628 my $blen = length($boundary); 629 my $bchkstr; 630 631 while (($pos = index($$body, $boundary, $start_pos)) > -1) { 632 # have to check for case when boundary is a substring 633 # of another boundary, yuck! 634 $bchkstr = substr($$body, $pos + $blen, 2); 635 unless ($bchkstr =~ /\A\r?\n/ || $bchkstr =~ /\A--/) { 636 # incomplete match, continue search 637 $start_pos = $pos + $blen; 638 next; 639 } 640 $found = 1; 641 push(@parts, substr($$body, 0, $pos)); 642 $parts[$#parts] =~ s/^\r//; 643 644 # prune out part data just grabbed 645 substr($$body, 0, $pos + $blen) = ""; 646 647 # check if hit end 648 if ($$body =~ /\A--/) { 649 $have_end = 1; 650 last; 651 } 652 653 # remove EOL at the beginning 654 $$body =~ s/\A\r?\n//; 655 $start_pos = 0; 656 } 657 if ($found) { 658 if (!$have_end) { 659 warn qq/Warning: No end boundary delimiter found in /, 660 qq/message body\n/; 661 push(@parts, $$body); 662 $parts[$#parts] =~ s/^\r//; 663 $$body = ""; 664 } else { 665 # discard front-matter 666 shift(@parts); 667 } 668 } else { 669 # no boundary separators in message! 670 warn qq/Warning: No boundary delimiters found in /, 671 qq/multipart body\n/; 672 if ($$body =~ m/\A\n[\w\-]+:\s/) { 673 # remove \n added above if part looks like it has 674 # headers. we keep if it does not to avoid body 675 # data being parsed as a header below. 676 substr($$body, 0, 1) = ""; 677 } 678 push(@parts, $$body); 679 } 680 681 ## Else treat body as one part 682 } else { 683 @parts = ($$body); 684 } 685 686 ## Process parts 687 my (@entity) = (); 688 my ($cid, $href, $pctype); 689 my %alt_exc = (); 690 my $have_alt_prefs = $isalt && scalar(@_MIMEAltPrefs); 691 my $partno = 0; 692 @parts = \(@parts); 693 while (defined($part = shift(@parts))) { 694 $href = {}; 695 $partfields = $href->{'fields'} = (MAILread_header($part))[0]; 696 $href->{'body'} = $part; 697 $href->{'filtered'} = 0; 698 $partfields->{'x-mha-part-number'} = ++$partno; 699 $pctype = 700 extract_ctype($partfields->{'content-type'}, $ctype); 701 702 ## check alternative preferences 703 if ($have_alt_prefs) { 704 next if ($alt_exc{$pctype}); 705 my $pos = $_MIMEAltPrefs{$pctype}; 706 if (defined($pos)) { 707 for (++$pos; $pos <= $#_MIMEAltPrefs; ++$pos) { 708 $alt_exc{$_MIMEAltPrefs[$pos]} = 1; 709 } 710 } 711 } 712 713 ## only add to %Cid if not excluded 714 if (!&MAILis_excluded($pctype)) { 715 if ($isalt) { 716 unshift(@entity, $href); 717 } else { 718 push(@entity, $href); 719 } 720 $cid = $partfields->{'content-id'}[0] 721 || $partfields->{'message-id'}[0]; 722 if (defined($cid)) { 723 $cid =~ s/[\s<>]//g; 724 $Cid{"cid:$cid"} = $href if $cid =~ /\S/; 725 } 726 $cid = undef; 727 if (defined($partfields->{'content-location'}) 728 && ($cid = $partfields->{'content-location'}[0])) { 729 my $partbase = $uribase; 730 $cid =~ s/['"\s]//g; 731 if (defined($partfields->{'content-base'})) { 732 $partbase = $partfields->{'content-base'}[0]; 733 } 734 $cid = apply_base_url($partbase, $cid); 735 if ($cid =~ /\S/ && !$Cid{$cid}) { 736 $Cid{$cid} = $href; 737 } 738 } 739 if ($cid) { 740 $partfields->{'content-location'} = [$cid]; 741 } elsif (!defined($partfields->{'content-base'})) { 742 $partfields->{'content-base'} = [$uribase]; 743 } 744 745 $partfields->{'x-mha-parent-header'} = $fields; 746 } 747 } 748 749 my ($entity); 750 ENTITY: foreach $entity (@entity) { 751 if ($entity->{'filtered'}) { 752 next ENTITY; 753 } 754 755 ## If content-type not defined for part, then determine 756 ## content-type based upon multipart subtype. 757 $partfields = $entity->{'fields'}; 758 if (!defined($partfields->{'content-type'})) { 759 $partfields->{'content-type'} = [ 760 ($subtype =~ /digest/) 761 ? 'message/rfc822' 762 : 'text/plain' 763 ]; 764 } 765 766 ## Process part 767 @array = 768 MAILread_body($partfields, $entity->{'body'}, $isalt); 769 770 ## Only use last filterable part in alternate 771 if ($isalt) { 772 $ret = shift @array; 773 if ($ret) { 774 push(@files, @array); 775 $entity->{'filtered'} = 1; 776 last ENTITY; 777 } 778 } else { 779 if (!$array[0]) { 780 $array[0] = 781 &$CantProcessPartFunc( 782 $partfields->{'content-type'}[0]); 783 } 784 $ret .= shift @array; 785 } 786 push(@files, @array); 787 $entity->{'filtered'} = 1; 788 } 789 790 ## Check if multipart/alternative, and no success 791 if (!$ret && $isalt) { 792 warn 793 qq|Warning: No recognized part in multipart/alternative; |, 794 qq|will try to decode last part\n|; 795 $entity = $entity[0]; 796 @array = 797 &MAILread_body($entity->{'fields'}, $entity->{'body'}); 798 $ret = shift @array; 799 if ($ret) { 800 push(@files, @array); 801 } else { 802 $ret = &$UnrecognizedAltPartFunc(); 803 } 804 } 805 806 ## Aid garbage collection(?) 807 foreach $entity (@entity) { 808 delete $entity->{'fields'}{'x-mha-parent-header'}; 809 } 810 811 ## Else if message/rfc822 or message/news 812 } elsif ($ctype =~ m^\bmessage/(?:rfc822|news)\b^i) { 813 $partfields = (MAILread_header($body))[0]; 814 815 # propogate parent and part no to message/* header 816 $partfields->{'x-mha-parent-header'} = 817 $fields->{'x-mha-parent-header'}; 818 $partfields->{'x-mha-part-number'} = 819 $fields->{'x-mha-part-number'}; 820 821 $ret = &$BeginEmbeddedMesgFunc(); 822 if ($FormatHeaderFunc && defined(&$FormatHeaderFunc)) { 823 $ret .= &$FormatHeaderFunc($partfields); 824 } else { 825 warn "Warning: readmail: No message header formatting ", 826 "function defined\n"; 827 } 828 @array = MAILread_body($partfields, $body); 829 $ret .= shift @array 830 || &$CantProcessPartFunc($partfields->{'content-type'}[0] 831 || 'text/plain'); 832 $ret .= &$EndEmbeddedMesgFunc(); 833 834 push(@files, @array); 835 delete $partfields->{'x-mha-parent-header'}; 836 837 ## Else cannot handle type 838 } else { 839 $ret = ''; 840 } 841 } 842 843 ($ret, @files); 844} 845 846##---------------------------------------------------------------------------## 847## MAILread_header reads (and strips) a mail message header from the 848## variable $mesg. $mesg is a reference to the mail message in 849## a string. 850## 851## $fields is a reference to a hash to put field values indexed by 852## field labels that have been converted to all lowercase. 853## Field values are array references to the values 854## for each field. 855## 856## ($fields_hash_ref, $header_txt) = MAILread_header($mesg_data); 857## 858sub MAILread_header { 859 my $mesg = shift; 860 861 my $fields = {}; 862 my $label = ''; 863 my $header = ''; 864 my ($value, $tmp, $pos); 865 866 ## Read a line at a time. 867 for ($pos = 0; $pos >= 0;) { 868 $pos = index($$mesg, "\n"); 869 if ($pos >= 0) { 870 $tmp = substr($$mesg, 0, $pos + 1); 871 substr($$mesg, 0, $pos + 1) = ""; 872 last if $tmp =~ /^\r?$/; # Done if blank line 873 874 $header .= $tmp; 875 chop $tmp; # Chop newline 876 $tmp =~ s/\r$//; # Delete <CR> characters 877 } else { 878 $tmp = $$mesg; 879 $header .= $tmp; 880 } 881 882 ## Check for continuation of a field 883 if ($tmp =~ /^\s/) { 884 $fields->{$label}[-1] .= $tmp if $label; 885 next; 886 } 887 888 ## Separate head from field text 889 if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) { 890 ($label, $value) = (lc($1), $2); 891 if ($fields->{$label}) { 892 push(@{$fields->{$label}}, $value); 893 } else { 894 $fields->{$label} = [$value]; 895 } 896 } 897 } 898 decode_1522_fields($fields); 899 ($fields, $header); 900} 901 902##---------------------------------------------------------------------------## 903## MAILread_file_header reads (and strips) a mail message header 904## from the filehandle $handle. The routine behaves in the 905## same manner as MAILread_header; 906## 907## ($fields_hash, $header_text) = MAILread_file_header($filehandle); 908## 909sub MAILread_file_header { 910 my $handle = shift; 911 my $encode = shift; 912 913 my $label = ''; 914 my $header = ''; 915 my $fields = {}; 916 local $/ = "\n"; 917 918 my ($value, $tmp); 919 while (($tmp = <$handle>) !~ /^[\r]?$/) { 920 ## Save raw text 921 $header .= $tmp; 922 923 ## Delete eol characters 924 $tmp =~ s/[\r\n]//g; 925 926 ## Check for continuation of a field 927 if ($tmp =~ /^\s/) { 928 $fields->{$label}[-1] .= $tmp if $label; 929 next; 930 } 931 932 ## Separate head from field text 933 if ($tmp =~ /^([^:\s]+):\s*([\s\S]*)$/) { 934 ($label, $value) = (lc($1), $2); 935 if (defined($fields->{$label})) { 936 push(@{$fields->{$label}}, $value); 937 } else { 938 $fields->{$label} = [$value]; 939 } 940 } 941 } 942 decode_1522_fields($fields); 943 ($fields, $header); 944} 945 946##---------------------------------------------------------------------------## 947## MAILis_excluded() checks if specified content-type has been 948## specified to be excluded. 949## 950sub MAILis_excluded { 951 my $ctype = lc($_[0]) || 'text/plain'; 952 my $btype = undef; 953 954 $ctype =~ s/\/x-/\//; 955 if ($ctype =~ m|([^/]+)/|) { 956 $btype = $1; 957 } 958 959MIMEINCS: { 960 # Treat multipart special: It is always included unless present 961 # in MIMEExcs. 962 last MIMEINCS if ($ctype =~ /^multipart\b/); 963 964 if (%MIMEIncs) { 965 if ($MIMEIncs{$ctype} || (defined($btype) && $MIMEIncs{$btype})) { 966 last MIMEINCS; 967 } else { 968 return 1; 969 } 970 } 971 } 972 if ($MIMEExcs{$ctype} || (defined($btype) && $MIMEExcs{$btype})) { 973 return 1; 974 } 975 0; 976} 977 978##---------------------------------------------------------------------------## 979## MAILhead_get_disposition gets the content disposition and 980## filename from $hfields, $hfields is a hash produced by the 981## MAILread_header and MAILread_file_header routines. 982## 983sub MAILhead_get_disposition { 984 my $hfields = shift; 985 my $do_html = shift; 986 987 my ($disp, $filename, $raw) = ('', '', ''); 988 my $html_name = undef; 989 local ($_); 990 991 if (defined($hfields->{'content-disposition'}) 992 && ($_ = $hfields->{'content-disposition'}->[0])) { 993 ($disp) = /^\s*([^\s;]+)/; 994 if (/filename="([^"]+)"/i) { 995 $raw = $1; 996 } elsif (/filename=(\S+)/i) { 997 ($raw = $1) =~ s/;\s*$//g; 998 } 999 } 1000 if (!$raw && defined($_ = $hfields->{'content-type'}[0])) { 1001 if (/name="([^"]+)"/i) { 1002 $raw = $1; 1003 } elsif (/name=(\S+)/i) { 1004 ($raw = $1) =~ s/;\s*$//g; 1005 } 1006 } 1007 $filename = MAILdecode_1522_str($raw, DECODE_ALL); 1008 $filename =~ s%.*[/\\:]%%; # Remove any path component 1009 $filename =~ s/^\s+//; # Remove leading whitespace 1010 $filename =~ s/\s+$//; # Remove trailing whitespace 1011 $filename =~ tr/\0-\40\t\n\r?:*"'<>|\177-\377/_/; 1012 # Remove questionable/invalid characters 1013 1014 # Only provide HTML display version if requested 1015 $html_name = MAILdecode_1522_str($raw) if $do_html; 1016 1017 ($disp, $filename, $raw, $html_name); 1018} 1019 1020##---------------------------------------------------------------------------## 1021## MAILparse_parameter_str(): parses a parameter/value string. 1022## Support for RFC 2184 extensions exists. The $hasmain flag tells 1023## the method if there is an intial main value for the sting. For 1024## example: 1025## 1026## text/plain; charset=us-ascii 1027## ----^^^^^^^^^^ 1028## 1029## The "text/plain" part is not a parameter/value pair, but having 1030## an initial value is common among some header fields that can have 1031## parameter/value pairs (egs: Content-Type, Content-Disposition). 1032## 1033## Return Value: 1034## Reference to a hash. Each key is the attribute name. 1035## The special key, 'x-main', is the main value if the 1036## $hasmain flag is set. 1037## 1038## Each hash value is a hash reference with three keys: 1039## 'charset', 'lang', 'value'. 'charset' and 'lang' may be 1040## undef if character set or language information is not 1041## specified. 1042## 1043## Example Usage: 1044## 1045## $content_type_field = 'text/plain; charset=us-ascii'; 1046## $parms = MAILparse_parameter_str($content_type_field, 1); 1047## $ctype = $parms->{'x-main'}; 1048## $mesg_body_charset = $parms->{'charset'}{'value'}; 1049## 1050sub MAILparse_parameter_str { 1051 my $str = shift; # Input string 1052 my $hasmain = shift; # Flag if there is a main value to extract 1053 1054 require MHonArc::RFC822; 1055 1056 my $parm = {}; 1057 my @toks = MHonArc::RFC822::uncomment($str); 1058 my ($tok, $name, $value, $charset, $lang, $isPart); 1059 1060 $parm->{'x-main'} = shift @toks if $hasmain; 1061 1062 ## Loop thru token list 1063 while ($tok = shift @toks) { 1064 next if $tok eq ";"; 1065 ($name, $value) = split(/=/, $tok, 2); 1066 ## Check if charset/lang specified 1067 if ($name =~ s/\*$//) { 1068 if ($value =~ s/^([^']*)'([^']*)'//) { 1069 ($charset, $lang) = ($1, $2); 1070 } else { 1071 ($charset, $lang) = (undef, undef); 1072 } 1073 } 1074 ## Check if parameter is only part 1075 if ($name =~ s/\*(\d+)$//) { 1076 $isPart = 1; 1077 } else { 1078 $isPart = 0; 1079 } 1080 ## Set values for parameter 1081 $name = lc $name; 1082 $parm->{$name} = {} unless defined($parm->{$name}); 1083 $parm->{$name}{'charset'} = $charset; 1084 $parm->{$name}{'lang'} = $lang; 1085 ## Check if value is next token 1086 if ($value eq "") { 1087 ## If value next token, than it must be quoted 1088 $value = shift @toks; 1089 $value =~ s/^"//; 1090 $value =~ s/"$//; 1091 $value =~ s/\\//g; 1092 } 1093 if ($isPart && defined($parm->{$name}{'vlist'})) { 1094 push(@{$parm->{$name}{'vlist'}}, $value); 1095 } else { 1096 $parm->{$name}{'vlist'} = [$value]; 1097 } 1098 } 1099 1100 ## Now we loop thru each parameter and define the final values from 1101 ## the parts 1102 foreach $name (keys %$parm) { 1103 next if $name eq 'x-main'; 1104 $parm->{$name}{'value'} = join("", @{$parm->{$name}{'vlist'}}); 1105 } 1106 1107 $parm; 1108} 1109 1110##---------------------------------------------------------------------------## 1111## MAILset_alternative_prefs() is used to set content-type 1112## preferences for multipart/alternative entities. The list 1113## specified will supercede the prefered format as denoted by 1114## the ording of parts in the entity. 1115## 1116## A content-type listed earlier in the array will be prefered 1117## over one later. For example: 1118## 1119## MAILset_alternative_prefs('text/plain', 'text/html'); 1120## 1121## States that if a multipart/alternative entity contains a 1122## text/plain part and a text/html part, the text/plain part will 1123## be prefered over the text/html part. 1124## 1125sub MAILset_alternative_prefs { 1126 @_MIMEAltPrefs = map {lc} @_; 1127 %_MIMEAltPrefs = (); 1128 my $i = 0; 1129 my $ctype; 1130 foreach $ctype (@_MIMEAltPrefs) { 1131 $_MIMEAltPrefs{$ctype} = $i++; 1132 } 1133} 1134 1135##---------------------------------------------------------------------------## 1136## MAILset_charset_aliases() is used to define name aliases for 1137## charset names. 1138## 1139## Example usage: 1140## MAILset_charset_aliases( { 1141## 'iso-8859-1' => [ 'latin1', 'iso_8859_1', '8859-1' ], 1142## 'iso-8859-15' => [ 'latin9', 'iso_8859_15', '8859-15' ], 1143## }, $override ); 1144## 1145sub MAILset_charset_aliases { 1146 my $map = shift; 1147 my $override = shift; 1148 1149 %MIMECharsetAliases = () if $override; 1150 my ($charset, $aliases, $alias); 1151 while (($charset, $aliases) = each(%$map)) { 1152 $charset = lc $charset; 1153 foreach $alias (@$aliases) { 1154 $MIMECharsetAliases{lc $alias} = $charset; 1155 } 1156 } 1157} 1158 1159##---------------------------------------------------------------------------## 1160## MAILload_charset_converter() loads the charset converter function 1161## associated with given charset name. 1162## 1163## Example usage: 1164## ($func, $real_charset) = MAILload_charset_converter($charset); 1165## 1166## $func is the reference to the converter function, which may be 1167## undef. $real_charset is the real charset name that should be 1168## used when invoking the function. 1169## 1170sub MAILload_charset_converter { 1171 my $charset = lc shift; 1172 $charset = $MIMECharsetAliases{$charset} if $MIMECharsetAliases{$charset}; 1173 my $func = load_charset($charset); 1174 if (!defined($func) || !defined(&$func)) { 1175 $func = load_charset('default'); 1176 } 1177 ($func, $charset); 1178} 1179 1180############################################################################### 1181## Private Routines 1182############################################################################### 1183 1184##---------------------------------------------------------------------------## 1185## Default function for unable to process a part of a multipart 1186## message. 1187## 1188sub cantProcessPart { 1189 my ($ctype) = $_[0]; 1190 warn "Warning: Could not process part with given Content-Type: ", 1191 "$ctype\n"; 1192 "<br><tt><<< $ctype: Unrecognized >>></tt><br>\n"; 1193} 1194##---------------------------------------------------------------------------## 1195## Default function returning message for content-types excluded. 1196## 1197sub excludedPart { 1198 my ($ctype) = $_[0]; 1199 "<br><tt><<< $ctype: EXCLUDED >>></tt><br>\n"; 1200} 1201##---------------------------------------------------------------------------## 1202## Default function for unrecognizeable part in multipart/alternative. 1203## 1204sub unrecognizedAltPart { 1205 warn "Warning: No recognizable part in multipart/alternative\n"; 1206 "<br><tt><<< multipart/alternative: " 1207 . "No recognizable part >>></tt><br>\n"; 1208} 1209##---------------------------------------------------------------------------## 1210## Default function for beggining of embedded message 1211## (ie message/rfc822 or message/news). 1212## 1213sub beginEmbeddedMesg { 1214 qq|<blockquote><small>--- <i>Begin Message</i> ---</small>\n|; 1215} 1216##---------------------------------------------------------------------------## 1217## Default function for end of embedded message 1218## (ie message/rfc822 or message/news). 1219## 1220sub endEmbeddedMesg { 1221 qq|<br><small>--- <i>End Message</i> ---</small></blockquote>\n|; 1222} 1223 1224##---------------------------------------------------------------------------## 1225 1226sub load_charset { 1227 require $MIMECharSetConvertersSrc{$_[0]} 1228 if defined($MIMECharSetConvertersSrc{$_[0]}) 1229 && $MIMECharSetConvertersSrc{$_[0]}; 1230 $MIMECharSetConverters{$_[0]}; 1231} 1232 1233sub load_decoder { 1234 my $enc = lc shift; 1235 $enc =~ s/\s//; 1236 require $MIMEDecodersSrc{$enc} 1237 if defined($MIMEDecodersSrc{$enc}) 1238 && $MIMEDecodersSrc{$enc}; 1239 $MIMEDecoders{$enc}; 1240} 1241 1242sub load_filter { 1243 require $MIMEFiltersSrc{$_[0]} 1244 if defined($MIMEFiltersSrc{$_[0]}) 1245 && $MIMEFiltersSrc{$_[0]}; 1246 $MIMEFilters{$_[0]}; 1247} 1248 1249sub get_filter_args { 1250 my $args = ''; 1251 my $s; 1252 foreach $s (@_) { 1253 next unless defined $s; 1254 $args = $MIMEFiltersArgs{$s}; 1255 last if defined($args) && ($args ne ''); 1256 } 1257 $args; 1258} 1259 1260sub load_textencoder { 1261 return undef unless $TextEncode; 1262TRY: { 1263 if (!defined($TextEncoderFunc)) { 1264 last TRY; 1265 } 1266 if (defined(&$TextEncoderFunc)) { 1267 return $TextEncoderFunc; 1268 } 1269 if (!defined($TextEncoderSrc)) { 1270 last TRY; 1271 } 1272 require $TextEncoderSrc; 1273 if (defined(&$TextEncoderFunc)) { 1274 return $TextEncoderFunc; 1275 } 1276 } 1277 warn qq/Warning: Unable to load text encode for "$TextEncode"\n/; 1278 $TextEncode = undef; 1279 $TextEncoderFunc = undef; 1280 $TextEncoderSrc = undef; 1281} 1282 1283##---------------------------------------------------------------------------## 1284## extract_ctype() extracts the content-type specification from 1285## the beginning of given string. 1286## 1287sub extract_ctype { 1288 if ( !defined($_[0]) 1289 || (ref($_[0]) && ($_[0][0] !~ /\S/)) 1290 || ($_[0] !~ /\S/)) { 1291 return 'message/rfc822' 1292 if (defined($_[1]) && ($_[1] eq 'multipart/digest')); 1293 return 'text/plain'; 1294 } 1295 if (ref($_[0])) { 1296 $_[0][0] =~ m|^\s*([\w\-\./]+)|; 1297 return lc($1); 1298 } 1299 $_[0] =~ m|^\s*([\w\-\./]+)|; 1300 lc($1); 1301} 1302 1303##---------------------------------------------------------------------------## 1304## apply_base_url(): Convert a relative URL to a full URL with 1305## specific base; 1306## 1307sub apply_base_url { 1308 my $b = shift; # Base URL 1309 my $u = shift; # URL to apply base to 1310 1311 ## If no base, nothing to do 1312 return $u if !defined($b) || $b !~ /\S/; 1313 1314 ## If absolute URL or scroll link; do nothing 1315 $u =~ s/^\s+//; 1316 if ($u =~ /^$UrlRxStr/o || $u =~ m/^#/) { 1317 return $u; 1318 } 1319 1320 ## Check if base URL allows relative resolution 1321 my ($host_part, $scheme); 1322 if ($b =~ s{^((https?|ftp|file|nfs|acap|tftp)://[\w\-:\d.\@\%=~&]+)/?}{}) 1323 { 1324 $host_part = $1; 1325 $scheme = lc $2; 1326 } else { 1327 warn qq/Warning: Invalid base url, "$b" to apply to "$u"\n/; 1328 return $u; 1329 } 1330 1331 ## If "/---", just use hostname:port of base. 1332 if ($u =~ /^\//) { 1333 return $host_part . $u; 1334 } 1335 1336 ## Clean up base URL 1337SCHEME: { 1338 if ($scheme eq 'http' || $scheme eq 'https' || $scheme eq 'acap') { 1339 $b =~ s/\?.*$//; 1340 last SCHEME; 1341 } 1342 if ($scheme eq 'ftp') { 1343 $b =~ s/;type=.$//; 1344 last SCHEME; 1345 } 1346 if ($scheme eq 'tftp') { 1347 $b =~ s/;mode=\w+$//; 1348 last SCHEME; 1349 } 1350 } 1351 $b =~ s/\/$//; # strip any trailing '/' (we add it back later) 1352 1353 ## "./---" or "../---": Need to remove and adjust base accordingly. 1354 my $cnt = 0; 1355 while ($u =~ s|^(\.{1,2})/||) { ++$cnt if length($1) == 2; } 1356 1357 if ($b eq '') { 1358 # base is just host 1359 return join('/', $host_part, $u); 1360 } 1361 if ($cnt > 0) { 1362 # trim path 1363 my @a = split(/\//, $b); 1364 if ($cnt <= scalar(@a)) { 1365 splice(@a, -$cnt); 1366 return join('/', $host_part, @a, $u); 1367 } 1368 # invalid relative path, tries to go past root 1369 return join('/', $host_part, $u); 1370 1371 } 1372 return join('/', $host_part, $b, $u); 1373} 1374 1375##---------------------------------------------------------------------------## 1376 1377sub extract_charset { 1378 my $content = shift; # Content-type string of entity 1379 my $subtype = shift; # Text sub-type 1380 my $body = shift; # Reference to entity text 1381 my $charset = $TextDefCharset; 1382 1383 if ($content =~ /\bcharset\s*=\s*([^\s;]+)/i) { 1384 $charset = lc $1; 1385 $charset =~ s/['";\s]//g; 1386 } 1387 1388 # If HTML, check <meta http-equiv=content-type> tag since it 1389 # can be different than what is specified in the entity header. 1390 if (($subtype eq 'html' || $subtype eq 'x-html') 1391 && ($$body =~ m/(<meta\s+http-equiv\s*=\s*['"]? 1392 content-type\b[^>]*>)/xi 1393 ) 1394 ) { 1395 my $meta = $1; 1396 if ($meta =~ m/\bcharset\s*=\s*['"]?([\w\.\-]+)/i) { 1397 $charset = lc $1; 1398 } 1399 } 1400 $charset = $MIMECharsetAliases{$charset} 1401 if $MIMECharsetAliases{$charset}; 1402 1403 # If us-ascii, but 8-bit chars in body, we change to iso-8859-1 1404 if ($charset eq 'us-ascii') { 1405 $charset = 'iso-8859-1' if $$body =~ /[\x80-\xFF]/; 1406 } 1407 $charset; 1408} 1409 1410##---------------------------------------------------------------------------## 1411## gen_full_part_number creates a full part number of an entity 1412## from the given entity header. 1413## 1414sub gen_full_part_number { 1415 my $fields = shift; 1416 my @number = (); 1417 while (defined($fields->{'x-mha-parent-header'})) { 1418 unshift(@number, ($fields->{'x-mha-part-number'} || '1')); 1419 $fields = $fields->{'x-mha-parent-header'}; 1420 } 1421 if (!scalar(@number)) { 1422 return $fields->{'x-mha-part-number'} || '1'; 1423 } 1424 join('.', @number); 1425} 1426 1427##---------------------------------------------------------------------------## 1428 1429sub decode_1522_fields { 1430 my $fields = shift; 1431 my $encfunc = load_textencoder(); 1432 my $mode = 1433 defined($encfunc) ? TEXT_ENCODE : ($DecodeHeader ? JUST_DECODE : -1); 1434 if ($mode == -1) { 1435 return $fields; 1436 } 1437 my ($label, $value, $v); 1438 while (($label, $value) = each %$fields) { 1439 foreach $v (@$value) { 1440 $v = &MAILdecode_1522_str($v, $mode); 1441 } 1442 } 1443 $fields; 1444} 1445 1446##---------------------------------------------------------------------------## 1447 1448sub dump_header { 1449 my $fh = shift; 1450 my $fields = shift; 1451 my ($key, $a, $value); 1452 foreach $key (sort keys %$fields) { 1453 $a = $fields->{$key}; 1454 if (ref($a)) { 1455 foreach $value (@$a) { 1456 print $fh "$key: $value\n"; 1457 } 1458 } else { 1459 print $fh "$key: $a\n"; 1460 } 1461 } 1462} 1463 1464##---------------------------------------------------------------------------## 14651; # for require 1466