1<?php 2 3/** 4 +-----------------------------------------------------------------------+ 5 | This file is part of the Roundcube Webmail client | 6 | | 7 | Copyright (C) The Roundcube Dev Team | 8 | Copyright (C) Kolab Systems AG | 9 | | 10 | Licensed under the GNU General Public License version 3 or | 11 | any later version with exceptions for skins & plugins. | 12 | See the README file for a full license statement. | 13 | | 14 | PURPOSE: | 15 | MIME message parsing utilities | 16 +-----------------------------------------------------------------------+ 17 | Author: Thomas Bruederli <roundcube@gmail.com> | 18 | Author: Aleksander Machniak <alec@alec.pl> | 19 +-----------------------------------------------------------------------+ 20*/ 21 22/** 23 * Class for parsing MIME messages 24 * 25 * @package Framework 26 * @subpackage Storage 27 */ 28class rcube_mime 29{ 30 private static $default_charset; 31 32 33 /** 34 * Object constructor. 35 */ 36 function __construct($default_charset = null) 37 { 38 self::$default_charset = $default_charset; 39 } 40 41 /** 42 * Returns message/object character set name 43 * 44 * @return string Character set name 45 */ 46 public static function get_charset() 47 { 48 if (self::$default_charset) { 49 return self::$default_charset; 50 } 51 52 if ($charset = rcube::get_instance()->config->get('default_charset')) { 53 return $charset; 54 } 55 56 return RCUBE_CHARSET; 57 } 58 59 /** 60 * Parse the given raw message source and return a structure 61 * of rcube_message_part objects. 62 * 63 * It makes use of the rcube_mime_decode library 64 * 65 * @param string $raw_body The message source 66 * 67 * @return object rcube_message_part The message structure 68 */ 69 public static function parse_message($raw_body) 70 { 71 $conf = [ 72 'include_bodies' => true, 73 'decode_bodies' => true, 74 'decode_headers' => false, 75 'default_charset' => self::get_charset(), 76 ]; 77 78 $mime = new rcube_mime_decode($conf); 79 80 return $mime->decode($raw_body); 81 } 82 83 /** 84 * Split an address list into a structured array list 85 * 86 * @param string|array $input Input string (or list of strings) 87 * @param int $max List only this number of addresses 88 * @param bool $decode Decode address strings 89 * @param string $fallback Fallback charset if none specified 90 * @param bool $addronly Return flat array with e-mail addresses only 91 * 92 * @return array Indexed list of addresses 93 */ 94 static function decode_address_list($input, $max = null, $decode = true, $fallback = null, $addronly = false) 95 { 96 // A common case when the same header is used many times in a mail message 97 if (is_array($input)) { 98 $input = implode(', ', $input); 99 } 100 101 $a = self::parse_address_list($input, $decode, $fallback); 102 $out = []; 103 $j = 0; 104 105 // Special chars as defined by RFC 822 need to in quoted string (or escaped). 106 $special_chars = '[\(\)\<\>\\\.\[\]@,;:"]'; 107 108 if (!is_array($a)) { 109 return $out; 110 } 111 112 foreach ($a as $val) { 113 $j++; 114 $address = trim($val['address']); 115 116 if ($addronly) { 117 $out[$j] = $address; 118 } 119 else { 120 $name = trim($val['name']); 121 $string = ''; 122 123 if ($name && $address && $name != $address) { 124 $string = sprintf('%s <%s>', preg_match("/$special_chars/", $name) ? '"'.addcslashes($name, '"').'"' : $name, $address); 125 } 126 else if ($address) { 127 $string = $address; 128 } 129 else if ($name) { 130 $string = $name; 131 } 132 133 $out[$j] = ['name' => $name, 'mailto' => $address, 'string' => $string]; 134 } 135 136 if ($max && $j == $max) { 137 break; 138 } 139 } 140 141 return $out; 142 } 143 144 /** 145 * Decode a message header value 146 * 147 * @param string $input Header value 148 * @param string $fallback Fallback charset if none specified 149 * 150 * @return string Decoded string 151 */ 152 public static function decode_header($input, $fallback = null) 153 { 154 $str = self::decode_mime_string((string)$input, $fallback); 155 156 return $str; 157 } 158 159 /** 160 * Decode a mime-encoded string to internal charset 161 * 162 * @param string $input Header value 163 * @param string $fallback Fallback charset if none specified 164 * 165 * @return string Decoded string 166 */ 167 public static function decode_mime_string($input, $fallback = null) 168 { 169 $default_charset = $fallback ?: self::get_charset(); 170 171 // rfc: all line breaks or other characters not found 172 // in the Base64 Alphabet must be ignored by decoding software 173 // delete all blanks between MIME-lines, differently we can 174 // receive unnecessary blanks and broken utf-8 symbols 175 $input = preg_replace("/\?=\s+=\?/", '?==?', $input); 176 177 // encoded-word regexp 178 $re = '/=\?([^?]+)\?([BbQq])\?([^\n]*?)\?=/'; 179 180 // Find all RFC2047's encoded words 181 if (preg_match_all($re, $input, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { 182 // Initialize variables 183 $tmp = []; 184 $out = ''; 185 $start = 0; 186 187 foreach ($matches as $idx => $m) { 188 $pos = $m[0][1]; 189 $charset = $m[1][0]; 190 $encoding = $m[2][0]; 191 $text = $m[3][0]; 192 $length = strlen($m[0][0]); 193 194 // Append everything that is before the text to be decoded 195 if ($start != $pos) { 196 $substr = substr($input, $start, $pos-$start); 197 $out .= rcube_charset::convert($substr, $default_charset); 198 $start = $pos; 199 } 200 $start += $length; 201 202 // Per RFC2047, each string part "MUST represent an integral number 203 // of characters . A multi-octet character may not be split across 204 // adjacent encoded-words." However, some mailers break this, so we 205 // try to handle characters spanned across parts anyway by iterating 206 // through and aggregating sequential encoded parts with the same 207 // character set and encoding, then perform the decoding on the 208 // aggregation as a whole. 209 210 $tmp[] = $text; 211 if (!empty($matches[$idx+1]) && ($next_match = $matches[$idx+1])) { 212 if ($next_match[0][1] == $start 213 && $next_match[1][0] == $charset 214 && $next_match[2][0] == $encoding 215 ) { 216 continue; 217 } 218 } 219 220 $count = count($tmp); 221 $text = ''; 222 223 // Decode and join encoded-word's chunks 224 if ($encoding == 'B' || $encoding == 'b') { 225 $rest = ''; 226 // base64 must be decoded a segment at a time. 227 // However, there are broken implementations that continue 228 // in the following word, we'll handle that (#6048) 229 for ($i=0; $i<$count; $i++) { 230 $chunk = $rest . $tmp[$i]; 231 $length = strlen($chunk); 232 if ($length % 4) { 233 $length = floor($length / 4) * 4; 234 $rest = substr($chunk, $length); 235 $chunk = substr($chunk, 0, $length); 236 } 237 238 $text .= base64_decode($chunk); 239 } 240 } 241 else { // if ($encoding == 'Q' || $encoding == 'q') { 242 // quoted printable can be combined and processed at once 243 for ($i=0; $i<$count; $i++) { 244 $text .= $tmp[$i]; 245 } 246 247 $text = str_replace('_', ' ', $text); 248 $text = quoted_printable_decode($text); 249 } 250 251 $out .= rcube_charset::convert($text, $charset); 252 $tmp = []; 253 } 254 255 // add the last part of the input string 256 if ($start != strlen($input)) { 257 $out .= rcube_charset::convert(substr($input, $start), $default_charset); 258 } 259 260 // return the results 261 return $out; 262 } 263 264 // no encoding information, use fallback 265 return rcube_charset::convert($input, $default_charset); 266 } 267 268 /** 269 * Decode a mime part 270 * 271 * @param string $input Input string 272 * @param string $encoding Part encoding 273 * 274 * @return string Decoded string 275 */ 276 public static function decode($input, $encoding = '7bit') 277 { 278 switch (strtolower($encoding)) { 279 case 'quoted-printable': 280 return quoted_printable_decode($input); 281 case 'base64': 282 return base64_decode($input); 283 case 'x-uuencode': 284 case 'x-uue': 285 case 'uue': 286 case 'uuencode': 287 return convert_uudecode($input); 288 case '7bit': 289 default: 290 return $input; 291 } 292 } 293 294 /** 295 * Split RFC822 header string into an associative array 296 */ 297 public static function parse_headers($headers) 298 { 299 $result = []; 300 $headers = preg_replace('/\r?\n(\t| )+/', ' ', $headers); 301 $lines = explode("\n", $headers); 302 $count = count($lines); 303 304 for ($i=0; $i<$count; $i++) { 305 if ($p = strpos($lines[$i], ': ')) { 306 $field = strtolower(substr($lines[$i], 0, $p)); 307 $value = trim(substr($lines[$i], $p+1)); 308 if (!empty($value)) { 309 $result[$field] = $value; 310 } 311 } 312 } 313 314 return $result; 315 } 316 317 /** 318 * E-mail address list parser 319 */ 320 private static function parse_address_list($str, $decode = true, $fallback = null) 321 { 322 // remove any newlines and carriage returns before 323 $str = preg_replace('/\r?\n(\s|\t)?/', ' ', $str); 324 325 // extract list items, remove comments 326 $str = self::explode_header_string(',;', $str, true); 327 328 // simplified regexp, supporting quoted local part 329 $email_rx = '([^\s:]+|("\s*(?:[^"\f\n\r\t\v\b\s]+\s*)+"))@\S+'; 330 331 $result = []; 332 333 foreach ($str as $key => $val) { 334 $name = ''; 335 $address = ''; 336 $val = trim($val); 337 338 // First token might be a group name, ignore it 339 $tokens = self::explode_header_string(' ', $val); 340 if (isset($tokens[0]) && $tokens[0][strlen($tokens[0])-1] == ':') { 341 $val = substr($val, strlen($tokens[0])); 342 } 343 344 if (preg_match('/(.*)<('.$email_rx.')$/', $val, $m)) { 345 // Note: There are cases like "Test<test@domain.tld" with no closing bracket, 346 // therefor we do not include it in the regexp above, but we have to 347 // remove it later, because $email_rx will catch it (#8164) 348 $address = rtrim($m[2], '>'); 349 $name = trim($m[1]); 350 } 351 else if (preg_match('/^('.$email_rx.')$/', $val, $m)) { 352 $address = $m[1]; 353 $name = ''; 354 } 355 // special case (#1489092) 356 else if (preg_match('/(\s*<MAILER-DAEMON>)$/', $val, $m)) { 357 $address = 'MAILER-DAEMON'; 358 $name = substr($val, 0, -strlen($m[1])); 359 } 360 else if (preg_match('/('.$email_rx.')/', $val, $m)) { 361 $name = $m[1]; 362 } 363 else { 364 $name = $val; 365 } 366 367 // unquote and/or decode name 368 if ($name) { 369 // An unquoted name ending with colon is a address group name, ignore it 370 if ($name[strlen($name)-1] == ':') { 371 $name = ''; 372 } 373 374 if (strlen($name) > 1 && $name[0] == '"' && $name[strlen($name)-1] == '"') { 375 $name = substr($name, 1, -1); 376 $name = stripslashes($name); 377 } 378 379 if ($decode) { 380 $name = self::decode_header($name, $fallback); 381 // some clients encode addressee name with quotes around it 382 if (strlen($name) > 1 && $name[0] == '"' && $name[strlen($name)-1] == '"') { 383 $name = substr($name, 1, -1); 384 } 385 } 386 } 387 388 if (!$address && $name) { 389 $address = $name; 390 $name = ''; 391 } 392 393 if ($address) { 394 $address = self::fix_email($address); 395 $result[$key] = ['name' => $name, 'address' => $address]; 396 } 397 } 398 399 return $result; 400 } 401 402 /** 403 * Explodes header (e.g. address-list) string into array of strings 404 * using specified separator characters with proper handling 405 * of quoted-strings and comments (RFC2822) 406 * 407 * @param string $separator String containing separator characters 408 * @param string $str Header string 409 * @param bool $remove_comments Enable to remove comments 410 * 411 * @return array Header items 412 */ 413 public static function explode_header_string($separator, $str, $remove_comments = false) 414 { 415 $length = strlen($str); 416 $result = []; 417 $quoted = false; 418 $comment = 0; 419 $out = ''; 420 421 for ($i=0; $i<$length; $i++) { 422 // we're inside a quoted string 423 if ($quoted) { 424 if ($str[$i] == '"') { 425 $quoted = false; 426 } 427 else if ($str[$i] == "\\") { 428 if ($comment <= 0) { 429 $out .= "\\"; 430 } 431 $i++; 432 } 433 } 434 // we are inside a comment string 435 else if ($comment > 0) { 436 if ($str[$i] == ')') { 437 $comment--; 438 } 439 else if ($str[$i] == '(') { 440 $comment++; 441 } 442 else if ($str[$i] == "\\") { 443 $i++; 444 } 445 continue; 446 } 447 // separator, add to result array 448 else if (strpos($separator, $str[$i]) !== false) { 449 if ($out) { 450 $result[] = $out; 451 } 452 $out = ''; 453 continue; 454 } 455 // start of quoted string 456 else if ($str[$i] == '"') { 457 $quoted = true; 458 } 459 // start of comment 460 else if ($remove_comments && $str[$i] == '(') { 461 $comment++; 462 } 463 464 if ($comment <= 0) { 465 $out .= $str[$i]; 466 } 467 } 468 469 if ($out && $comment <= 0) { 470 $result[] = $out; 471 } 472 473 return $result; 474 } 475 476 /** 477 * Interpret a format=flowed message body according to RFC 2646 478 * 479 * @param string $text Raw body formatted as flowed text 480 * @param string $mark Mark each flowed line with specified character 481 * @param bool $delsp Remove the trailing space of each flowed line 482 * 483 * @return string Interpreted text with unwrapped lines and stuffed space removed 484 */ 485 public static function unfold_flowed($text, $mark = null, $delsp = false) 486 { 487 $text = preg_split('/\r?\n/', $text); 488 $last = -1; 489 $q_level = 0; 490 $marks = []; 491 492 foreach ($text as $idx => $line) { 493 if ($q = strspn($line, '>')) { 494 // remove quote chars 495 $line = substr($line, $q); 496 // remove (optional) space-staffing 497 if (isset($line[0]) && $line[0] === ' ') { 498 $line = substr($line, 1); 499 } 500 501 // The same paragraph (We join current line with the previous one) when: 502 // - the same level of quoting 503 // - previous line was flowed 504 // - previous line contains more than only one single space (and quote char(s)) 505 if ($q == $q_level 506 && isset($text[$last]) && $text[$last][strlen($text[$last])-1] == ' ' 507 && !preg_match('/^>+ {0,1}$/', $text[$last]) 508 ) { 509 if ($delsp) { 510 $text[$last] = substr($text[$last], 0, -1); 511 } 512 $text[$last] .= $line; 513 unset($text[$idx]); 514 515 if ($mark) { 516 $marks[$last] = true; 517 } 518 } 519 else { 520 $last = $idx; 521 } 522 } 523 else { 524 if ($line == '-- ') { 525 $last = $idx; 526 } 527 else { 528 // remove space-stuffing 529 if (isset($line[0]) && $line[0] === ' ') { 530 $line = substr($line, 1); 531 } 532 533 $last_len = isset($text[$last]) ? strlen($text[$last]) : 0; 534 535 if ( 536 $last_len && $line && !$q_level && $text[$last] != '-- ' 537 && isset($text[$last][$last_len-1]) && $text[$last][$last_len-1] == ' ' 538 ) { 539 if ($delsp) { 540 $text[$last] = substr($text[$last], 0, -1); 541 } 542 $text[$last] .= $line; 543 unset($text[$idx]); 544 545 if ($mark) { 546 $marks[$last] = true; 547 } 548 } 549 else { 550 $text[$idx] = $line; 551 $last = $idx; 552 } 553 } 554 } 555 $q_level = $q; 556 } 557 558 if (!empty($marks)) { 559 foreach (array_keys($marks) as $mk) { 560 $text[$mk] = $mark . $text[$mk]; 561 } 562 } 563 564 return implode("\r\n", $text); 565 } 566 567 /** 568 * Wrap the given text to comply with RFC 2646 569 * 570 * @param string $text Text to wrap 571 * @param int $length Length 572 * @param string $charset Character encoding of $text 573 * 574 * @return string Wrapped text 575 */ 576 public static function format_flowed($text, $length = 72, $charset = null) 577 { 578 $text = preg_split('/\r?\n/', $text); 579 580 foreach ($text as $idx => $line) { 581 if ($line != '-- ') { 582 if ($level = strspn($line, '>')) { 583 // remove quote chars 584 $line = substr($line, $level); 585 // remove (optional) space-staffing and spaces before the line end 586 $line = rtrim($line, ' '); 587 if (isset($line[0]) && $line[0] === ' ') { 588 $line = substr($line, 1); 589 } 590 591 $prefix = str_repeat('>', $level) . ' '; 592 $line = $prefix . self::wordwrap($line, $length - $level - 2, " \r\n$prefix", false, $charset); 593 } 594 else if ($line) { 595 $line = self::wordwrap(rtrim($line), $length - 2, " \r\n", false, $charset); 596 // space-stuffing 597 $line = preg_replace('/(^|\r\n)(From| |>)/', '\\1 \\2', $line); 598 } 599 600 $text[$idx] = $line; 601 } 602 } 603 604 return implode("\r\n", $text); 605 } 606 607 /** 608 * Improved wordwrap function with multibyte support. 609 * The code is based on Zend_Text_MultiByte::wordWrap(). 610 * 611 * @param string $string Text to wrap 612 * @param int $width Line width 613 * @param string $break Line separator 614 * @param bool $cut Enable to cut word 615 * @param string $charset Charset of $string 616 * @param bool $wrap_quoted When enabled quoted lines will not be wrapped 617 * 618 * @return string Text 619 */ 620 public static function wordwrap($string, $width = 75, $break = "\n", $cut = false, $charset = null, $wrap_quoted = true) 621 { 622 // Note: Never try to use iconv instead of mbstring functions here 623 // Iconv's substr/strlen are 100x slower (#1489113) 624 625 if ($charset && $charset != RCUBE_CHARSET) { 626 $charset = rcube_charset::parse_charset($charset); 627 mb_internal_encoding($charset); 628 } 629 630 // Convert \r\n to \n, this is our line-separator 631 $string = str_replace("\r\n", "\n", $string); 632 $separator = "\n"; // must be 1 character length 633 $result = []; 634 635 while (($stringLength = mb_strlen($string)) > 0) { 636 $breakPos = mb_strpos($string, $separator, 0); 637 638 // quoted line (do not wrap) 639 if ($wrap_quoted && $string[0] == '>') { 640 if ($breakPos === $stringLength - 1 || $breakPos === false) { 641 $subString = $string; 642 $cutLength = null; 643 } 644 else { 645 $subString = mb_substr($string, 0, $breakPos); 646 $cutLength = $breakPos + 1; 647 } 648 } 649 // next line found and current line is shorter than the limit 650 else if ($breakPos !== false && $breakPos < $width) { 651 if ($breakPos === $stringLength - 1) { 652 $subString = $string; 653 $cutLength = null; 654 } 655 else { 656 $subString = mb_substr($string, 0, $breakPos); 657 $cutLength = $breakPos + 1; 658 } 659 } 660 else { 661 $subString = mb_substr($string, 0, $width); 662 663 // last line 664 if ($breakPos === false && $subString === $string) { 665 $cutLength = null; 666 } 667 else { 668 $nextChar = mb_substr($string, $width, 1); 669 670 if ($nextChar === ' ' || $nextChar === $separator) { 671 $afterNextChar = mb_substr($string, $width + 1, 1); 672 673 // Note: mb_substr() does never return False 674 if ($afterNextChar === false || $afterNextChar === '') { 675 $subString .= $nextChar; 676 } 677 678 $cutLength = mb_strlen($subString) + 1; 679 } 680 else { 681 $spacePos = mb_strrpos($subString, ' ', 0); 682 683 if ($spacePos !== false) { 684 $subString = mb_substr($subString, 0, $spacePos); 685 $cutLength = $spacePos + 1; 686 } 687 else if ($cut === false) { 688 $spacePos = mb_strpos($string, ' ', 0); 689 690 if ($spacePos !== false && ($breakPos === false || $spacePos < $breakPos)) { 691 $subString = mb_substr($string, 0, $spacePos); 692 $cutLength = $spacePos + 1; 693 } 694 else if ($breakPos === false) { 695 $subString = $string; 696 $cutLength = null; 697 } 698 else { 699 $subString = mb_substr($string, 0, $breakPos); 700 $cutLength = $breakPos + 1; 701 } 702 } 703 else { 704 $cutLength = $width; 705 } 706 } 707 } 708 } 709 710 $result[] = $subString; 711 712 if ($cutLength !== null) { 713 $string = mb_substr($string, $cutLength, ($stringLength - $cutLength)); 714 } 715 else { 716 break; 717 } 718 } 719 720 if ($charset && $charset != RCUBE_CHARSET) { 721 mb_internal_encoding(RCUBE_CHARSET); 722 } 723 724 return implode($break, $result); 725 } 726 727 /** 728 * A method to guess the mime_type of an attachment. 729 * 730 * @param string $path Path to the file or file contents 731 * @param string $name File name (with suffix) 732 * @param string $failover Mime type supplied for failover 733 * @param bool $is_stream Set to True if $path contains file contents 734 * @param bool $skip_suffix Set to True if the config/mimetypes.php map should be ignored 735 * 736 * @return string 737 * @author Till Klampaeckel <till@php.net> 738 * @see http://de2.php.net/manual/en/ref.fileinfo.php 739 * @see http://de2.php.net/mime_content_type 740 */ 741 public static function file_content_type($path, $name, $failover = 'application/octet-stream', $is_stream = false, $skip_suffix = false) 742 { 743 $mime_type = null; 744 $config = rcube::get_instance()->config; 745 746 // Detect mimetype using filename extension 747 if (!$skip_suffix) { 748 $mime_type = self::file_ext_type($name); 749 } 750 751 // try fileinfo extension if available 752 if (!$mime_type && function_exists('finfo_open')) { 753 $mime_magic = $config->get('mime_magic'); 754 // null as a 2nd argument should be the same as no argument 755 // this however is not true on all systems/versions 756 if ($mime_magic) { 757 $finfo = finfo_open(FILEINFO_MIME, $mime_magic); 758 } 759 else { 760 $finfo = finfo_open(FILEINFO_MIME); 761 } 762 763 if ($finfo) { 764 $func = $is_stream ? 'finfo_buffer' : 'finfo_file'; 765 $mime_type = $func($finfo, $path, FILEINFO_MIME_TYPE); 766 finfo_close($finfo); 767 } 768 } 769 770 // try PHP's mime_content_type 771 if (!$mime_type && !$is_stream && function_exists('mime_content_type')) { 772 $mime_type = @mime_content_type($path); 773 } 774 775 // fall back to user-submitted string 776 if (!$mime_type) { 777 $mime_type = $failover; 778 } 779 780 return $mime_type; 781 } 782 783 /** 784 * File type detection based on file name only. 785 * 786 * @param string $filename Path to the file or file contents 787 * 788 * @return string|null Mimetype label 789 */ 790 public static function file_ext_type($filename) 791 { 792 static $mime_ext = []; 793 794 if (empty($mime_ext)) { 795 foreach (rcube::get_instance()->config->resolve_paths('mimetypes.php') as $fpath) { 796 $mime_ext = array_merge($mime_ext, (array) @include($fpath)); 797 } 798 } 799 800 // use file name suffix with hard-coded mime-type map 801 if (!empty($mime_ext) && $filename) { 802 $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); 803 if ($ext && !empty($mime_ext[$ext])) { 804 return $mime_ext[$ext]; 805 } 806 } 807 } 808 809 /** 810 * Get mimetype => file extension mapping 811 * 812 * @param string Mime-Type to get extensions for 813 * 814 * @return array List of extensions matching the given mimetype or a hash array 815 * with ext -> mimetype mappings if $mimetype is not given 816 */ 817 public static function get_mime_extensions($mimetype = null) 818 { 819 static $mime_types, $mime_extensions; 820 821 // return cached data 822 if (is_array($mime_types)) { 823 return $mimetype ? $mime_types[$mimetype] : $mime_extensions; 824 } 825 826 // load mapping file 827 $file_paths = []; 828 829 if ($mime_types = rcube::get_instance()->config->get('mime_types')) { 830 $file_paths[] = $mime_types; 831 } 832 833 // try common locations 834 if (strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') { 835 $file_paths[] = 'C:/xampp/apache/conf/mime.types.'; 836 } 837 else { 838 $file_paths[] = '/etc/mime.types'; 839 $file_paths[] = '/etc/httpd/mime.types'; 840 $file_paths[] = '/etc/httpd2/mime.types'; 841 $file_paths[] = '/etc/apache/mime.types'; 842 $file_paths[] = '/etc/apache2/mime.types'; 843 $file_paths[] = '/etc/nginx/mime.types'; 844 $file_paths[] = '/usr/local/etc/httpd/conf/mime.types'; 845 $file_paths[] = '/usr/local/etc/apache/conf/mime.types'; 846 $file_paths[] = '/usr/local/etc/apache24/mime.types'; 847 } 848 849 $mime_types = []; 850 $mime_extensions = []; 851 $lines = []; 852 $regex = "/([\w\+\-\.\/]+)\s+([\w\s]+)/i"; 853 854 foreach ($file_paths as $fp) { 855 if (@is_readable($fp)) { 856 $lines = file($fp, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); 857 break; 858 } 859 } 860 861 foreach ($lines as $line) { 862 // skip comments or mime types w/o any extensions 863 if ($line[0] == '#' || !preg_match($regex, $line, $matches)) { 864 continue; 865 } 866 867 $mime = $matches[1]; 868 869 foreach (explode(' ', $matches[2]) as $ext) { 870 $ext = trim($ext); 871 $mime_types[$mime][] = $ext; 872 $mime_extensions[$ext] = $mime; 873 } 874 } 875 876 // fallback to some well-known types most important for daily emails 877 if (empty($mime_types)) { 878 foreach (rcube::get_instance()->config->resolve_paths('mimetypes.php') as $fpath) { 879 $mime_extensions = array_merge($mime_extensions, (array) @include($fpath)); 880 } 881 882 foreach ($mime_extensions as $ext => $mime) { 883 $mime_types[$mime][] = $ext; 884 } 885 } 886 887 // Add some known aliases that aren't included by some mime.types (#1488891) 888 // the order is important here so standard extensions have higher prio 889 $aliases = [ 890 'image/gif' => ['gif'], 891 'image/png' => ['png'], 892 'image/x-png' => ['png'], 893 'image/jpeg' => ['jpg', 'jpeg', 'jpe'], 894 'image/jpg' => ['jpg', 'jpeg', 'jpe'], 895 'image/pjpeg' => ['jpg', 'jpeg', 'jpe'], 896 'image/tiff' => ['tif'], 897 'image/bmp' => ['bmp'], 898 'image/x-ms-bmp' => ['bmp'], 899 'message/rfc822' => ['eml'], 900 'text/x-mail' => ['eml'], 901 ]; 902 903 foreach ($aliases as $mime => $exts) { 904 if (isset($mime_types[$mime])) { 905 $mime_types[$mime] = array_unique(array_merge((array) $mime_types[$mime], $exts)); 906 } 907 else { 908 $mime_types[$mime] = $exts; 909 } 910 911 foreach ($exts as $ext) { 912 if (!isset($mime_extensions[$ext])) { 913 $mime_extensions[$ext] = $mime; 914 } 915 } 916 } 917 918 if ($mimetype) { 919 return !empty($mime_types[$mimetype]) ? $mime_types[$mimetype] : []; 920 } 921 922 return $mime_extensions; 923 } 924 925 /** 926 * Detect image type of the given binary data by checking magic numbers. 927 * 928 * @param string $data Binary file content 929 * 930 * @return string Detected mime-type or jpeg as fallback 931 */ 932 public static function image_content_type($data) 933 { 934 $type = 'jpeg'; 935 if (preg_match('/^\x89\x50\x4E\x47/', $data)) $type = 'png'; 936 else if (preg_match('/^\x47\x49\x46\x38/', $data)) $type = 'gif'; 937 else if (preg_match('/^\x00\x00\x01\x00/', $data)) $type = 'ico'; 938 // else if (preg_match('/^\xFF\xD8\xFF\xE0/', $data)) $type = 'jpeg'; 939 940 return 'image/' . $type; 941 } 942 943 /** 944 * Try to fix invalid email addresses 945 */ 946 public static function fix_email($email) 947 { 948 $parts = rcube_utils::explode_quoted_string('@', $email); 949 950 foreach ($parts as $idx => $part) { 951 // remove redundant quoting (#1490040) 952 if ($part[0] == '"' && preg_match('/^"([a-zA-Z0-9._+=-]+)"$/', $part, $m)) { 953 $parts[$idx] = $m[1]; 954 } 955 } 956 957 return implode('@', $parts); 958 } 959 960 /** 961 * Fix mimetype name. 962 * 963 * @param string $type Mimetype 964 * 965 * @return string Mimetype 966 */ 967 public static function fix_mimetype($type) 968 { 969 $type = strtolower(trim($type)); 970 $aliases = [ 971 'image/x-ms-bmp' => 'image/bmp', // #4771 972 'pdf' => 'application/pdf', // #6816 973 ]; 974 975 if (!empty($aliases[$type])) { 976 return $aliases[$type]; 977 } 978 979 // Some versions of Outlook create garbage Content-Type: 980 // application/pdf.A520491B_3BF7_494D_8855_7FAC2C6C0608 981 if (preg_match('/^application\/pdf.+/', $type)) { 982 return 'application/pdf'; 983 } 984 985 // treat image/pjpeg (image/pjpg, image/jpg) as image/jpeg (#4196) 986 if (preg_match('/^image\/p?jpe?g$/', $type)) { 987 return 'image/jpeg'; 988 } 989 990 return $type; 991 } 992} 993