1 #![forbid(unsafe_code)]
2 
3 extern crate base64;
4 extern crate charset;
5 extern crate quoted_printable;
6 
7 use std::borrow::Cow;
8 use std::collections::{BTreeMap, HashMap};
9 use std::error;
10 use std::fmt;
11 
12 use charset::{Charset, decode_latin1};
13 
14 mod addrparse;
15 pub mod body;
16 mod dateparse;
17 mod header;
18 pub mod headers;
19 mod msgidparse;
20 
21 pub use crate::addrparse::{
22     addrparse, addrparse_header, GroupInfo, MailAddr, MailAddrList, SingleInfo,
23 };
24 use crate::body::Body;
25 pub use crate::dateparse::dateparse;
26 use crate::header::HeaderToken;
27 use crate::headers::Headers;
28 pub use crate::msgidparse::{msgidparse, MessageIdList};
29 
30 /// An error type that represents the different kinds of errors that may be
31 /// encountered during message parsing.
32 #[derive(Debug)]
33 pub enum MailParseError {
34     /// Data that was specified as being in the quoted-printable transfer-encoding
35     /// could not be successfully decoded as quoted-printable data.
36     QuotedPrintableDecodeError(quoted_printable::QuotedPrintableError),
37     /// Data that was specified as being in the base64 transfer-encoding could
38     /// not be successfully decoded as base64 data.
39     Base64DecodeError(base64::DecodeError),
40     /// An error occurred when converting the raw byte data to Rust UTF-8 string
41     /// format using the charset specified in the message.
42     EncodingError(std::borrow::Cow<'static, str>),
43     /// Some other error occurred while parsing the message; the description string
44     /// provides additional details.
45     Generic(&'static str),
46 }
47 
48 impl fmt::Display for MailParseError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result49     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
50         match *self {
51             MailParseError::QuotedPrintableDecodeError(ref err) => {
52                 write!(f, "QuotedPrintable decode error: {}", err)
53             }
54             MailParseError::Base64DecodeError(ref err) => write!(f, "Base64 decode error: {}", err),
55             MailParseError::EncodingError(ref err) => write!(f, "Encoding error: {}", err),
56             MailParseError::Generic(ref description) => write!(f, "{}", description),
57         }
58     }
59 }
60 
61 impl error::Error for MailParseError {
cause(&self) -> Option<&dyn error::Error>62     fn cause(&self) -> Option<&dyn error::Error> {
63         match *self {
64             MailParseError::QuotedPrintableDecodeError(ref err) => Some(err),
65             MailParseError::Base64DecodeError(ref err) => Some(err),
66             _ => None,
67         }
68     }
69 
source(&self) -> Option<&(dyn error::Error + 'static)>70     fn source(&self) -> Option<&(dyn error::Error + 'static)> {
71         match *self {
72             MailParseError::QuotedPrintableDecodeError(ref err) => Some(err),
73             MailParseError::Base64DecodeError(ref err) => Some(err),
74             _ => None,
75         }
76     }
77 }
78 
79 impl From<quoted_printable::QuotedPrintableError> for MailParseError {
from(err: quoted_printable::QuotedPrintableError) -> MailParseError80     fn from(err: quoted_printable::QuotedPrintableError) -> MailParseError {
81         MailParseError::QuotedPrintableDecodeError(err)
82     }
83 }
84 
85 impl From<base64::DecodeError> for MailParseError {
from(err: base64::DecodeError) -> MailParseError86     fn from(err: base64::DecodeError) -> MailParseError {
87         MailParseError::Base64DecodeError(err)
88     }
89 }
90 
91 impl From<std::borrow::Cow<'static, str>> for MailParseError {
from(err: std::borrow::Cow<'static, str>) -> MailParseError92     fn from(err: std::borrow::Cow<'static, str>) -> MailParseError {
93         MailParseError::EncodingError(err)
94     }
95 }
96 
97 /// A struct that represents a single header in the message.
98 /// It holds slices into the raw byte array passed to parse_mail, and so the
99 /// lifetime of this struct must be contained within the lifetime of the raw
100 /// input. There are additional accessor functions on this struct to extract
101 /// the data as Rust strings.
102 pub struct MailHeader<'a> {
103     key: &'a [u8],
104     value: &'a [u8],
105 }
106 
107 /// Custom Debug trait for better formatting and printing of MailHeader items.
108 impl<'a> fmt::Debug for MailHeader<'a> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result109     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110         f.debug_struct("MailHeader")
111             .field("key", &String::from_utf8_lossy(&self.key))
112             .field("value", &String::from_utf8_lossy(&self.value))
113             .finish()
114     }
115 }
116 
find_from(line: &str, ix_start: usize, key: &str) -> Option<usize>117 pub(crate) fn find_from(line: &str, ix_start: usize, key: &str) -> Option<usize> {
118     line[ix_start..].find(key).map(|v| ix_start + v)
119 }
120 
find_from_u8(line: &[u8], ix_start: usize, key: &[u8]) -> Option<usize>121 fn find_from_u8(line: &[u8], ix_start: usize, key: &[u8]) -> Option<usize> {
122     assert!(!key.is_empty());
123     assert!(ix_start < line.len());
124     if line.len() < key.len() {
125         return None;
126     }
127     let ix_end = line.len() - key.len();
128     if ix_start <= ix_end {
129         for i in ix_start..ix_end {
130             if line[i] == key[0] {
131                 let mut success = true;
132                 for j in 1..key.len() {
133                     if line[i + j] != key[j] {
134                         success = false;
135                         break;
136                     }
137                 }
138                 if success {
139                     return Some(i);
140                 }
141             }
142         }
143     }
144     None
145 }
146 
147 #[test]
test_find_from_u8()148 fn test_find_from_u8() {
149     assert_eq!(find_from_u8(b"hello world", 0, b"hell"), Some(0));
150     assert_eq!(find_from_u8(b"hello world", 0, b"o"), Some(4));
151     assert_eq!(find_from_u8(b"hello world", 4, b"o"), Some(4));
152     assert_eq!(find_from_u8(b"hello world", 5, b"o"), Some(7));
153     assert_eq!(find_from_u8(b"hello world", 8, b"o"), None);
154     assert_eq!(find_from_u8(b"hello world", 10, b"d"), None);
155 }
156 
157 impl<'a> MailHeader<'a> {
158     /// Get the name of the header. Note that header names are case-insensitive.
159     /// Prefer using get_key_ref where possible for better performance.
get_key(&self) -> String160     pub fn get_key(&self) -> String {
161         decode_latin1(self.key).into_owned()
162     }
163 
164     /// Get the name of the header, borrowing if it's ASCII-only.
165     /// Note that header names are case-insensitive.
get_key_ref(&self) -> Cow<str>166     pub fn get_key_ref(&self) -> Cow<str> {
167         decode_latin1(self.key)
168     }
169 
170     /// Get the value of the header. Any sequences of newlines characters followed
171     /// by whitespace are collapsed into a single space. In effect, header values
172     /// wrapped across multiple lines are compacted back into one line, while
173     /// discarding the extra whitespace required by the MIME format. Additionally,
174     /// any quoted-printable words in the value are decoded.
175     ///
176     /// # Examples
177     /// ```
178     ///     use mailparse::parse_header;
179     ///     let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
180     ///     assert_eq!(parsed.get_key(), "Subject");
181     ///     assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
182     /// ```
get_value(&self) -> String183     pub fn get_value(&self) -> String {
184         let mut result = String::new();
185 
186         let chars = decode_latin1(self.value);
187         for tok in header::normalized_tokens(&chars) {
188             match tok {
189                 HeaderToken::Text(t) => {
190                     result.push_str(t);
191                 }
192                 HeaderToken::Whitespace(ws) => {
193                     result.push_str(ws);
194                 }
195                 HeaderToken::Newline(Some(ws)) => {
196                     result.push_str(&ws);
197                 }
198                 HeaderToken::Newline(None) => {}
199                 HeaderToken::DecodedWord(dw) => {
200                     result.push_str(&dw);
201                 }
202             }
203         }
204 
205         result
206     }
207 
208     /// Get the raw, unparsed value of the header key.
209     ///
210     /// # Examples
211     /// ```
212     ///     use mailparse::parse_header;
213     ///     let (parsed, _) = parse_header(b"SuBJect : =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
214     ///     assert_eq!(parsed.get_key_raw(), "SuBJect ".as_bytes());
215     /// ```
get_key_raw(&self) -> &[u8]216     pub fn get_key_raw(&self) -> &[u8] {
217         self.key
218     }
219 
220     /// Get the raw, unparsed value of the header value.
221     ///
222     /// # Examples
223     /// ```
224     ///     use mailparse::parse_header;
225     ///     let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
226     ///     assert_eq!(parsed.get_key(), "Subject");
227     ///     assert_eq!(parsed.get_value_raw(), "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes());
228     /// ```
get_value_raw(&self) -> &[u8]229     pub fn get_value_raw(&self) -> &[u8] {
230         self.value
231     }
232 }
233 
234 #[derive(Debug)]
235 enum HeaderParseState {
236     Initial,
237     Key,
238     PreValue,
239     Value,
240     ValueNewline,
241 }
242 
243 /// Parse a single header from the raw data given.
244 /// This function takes raw byte data, and starts parsing it, expecting there
245 /// to be a MIME header key-value pair right at the beginning. It parses that
246 /// header and returns it, along with the index at which the next header is
247 /// expected to start. If you just want to parse a single header, you can ignore
248 /// the second component of the tuple, which is the index of the next header.
249 /// Error values are returned if the data could not be successfully interpreted
250 /// as a MIME key-value pair.
251 ///
252 /// # Examples
253 /// ```
254 ///     use mailparse::parse_header;
255 ///     let (parsed, _) = parse_header(concat!(
256 ///             "Subject: Hello, sir,\n",
257 ///             "   I am multiline\n",
258 ///             "Next:Header").as_bytes())
259 ///         .unwrap();
260 ///     assert_eq!(parsed.get_key(), "Subject");
261 ///     assert_eq!(parsed.get_value(), "Hello, sir, I am multiline");
262 /// ```
parse_header(raw_data: &[u8]) -> Result<(MailHeader, usize), MailParseError>263 pub fn parse_header(raw_data: &[u8]) -> Result<(MailHeader, usize), MailParseError> {
264     let mut it = raw_data.iter();
265     let mut ix = 0;
266     let mut c = match it.next() {
267         None => return Err(MailParseError::Generic("Empty string provided")),
268         Some(v) => *v,
269     };
270 
271     let mut ix_key_end = None;
272     let mut ix_value_start = 0;
273     let mut ix_value_end = 0;
274 
275     let mut state = HeaderParseState::Initial;
276     loop {
277         match state {
278             HeaderParseState::Initial => {
279                 if c == b' ' {
280                     return Err(MailParseError::Generic(
281                         "Header cannot start with a space; it is \
282                          likely an overhanging line from a \
283                          previous header",
284                     ));
285                 };
286                 state = HeaderParseState::Key;
287                 continue;
288             }
289             HeaderParseState::Key => {
290                 if c == b':' {
291                     ix_key_end = Some(ix);
292                     state = HeaderParseState::PreValue;
293                 } else if c == b'\n' {
294                     // Technically this is invalid. We'll handle it gracefully
295                     // since it does appear to happen in the wild and other
296                     // MTAs deal with it. Our handling is to just treat everything
297                     // encountered so far on this line as the header key, and
298                     // leave the value empty.
299                     ix_key_end = Some(ix);
300                     ix_value_start = ix;
301                     ix_value_end = ix;
302                     ix += 1;
303                     break;
304                 }
305             }
306             HeaderParseState::PreValue => {
307                 if c != b' ' {
308                     ix_value_start = ix;
309                     ix_value_end = ix;
310                     state = HeaderParseState::Value;
311                     continue;
312                 }
313             }
314             HeaderParseState::Value => {
315                 if c == b'\n' {
316                     state = HeaderParseState::ValueNewline;
317                 } else if c != b'\r' {
318                     ix_value_end = ix + 1;
319                 }
320             }
321             HeaderParseState::ValueNewline => {
322                 if c == b' ' || c == b'\t' {
323                     state = HeaderParseState::Value;
324                     continue;
325                 } else {
326                     break;
327                 }
328             }
329         }
330         ix += 1;
331         c = match it.next() {
332             None => break,
333             Some(v) => *v,
334         };
335     }
336     match ix_key_end {
337         Some(v) => Ok((
338             MailHeader {
339                 key: &raw_data[0..v],
340                 value: &raw_data[ix_value_start..ix_value_end],
341             },
342             ix,
343         )),
344 
345         None => Ok((
346             // Technically this is invalid. We'll handle it gracefully
347             // since we handle the analogous situation above. Our handling
348             // is to just treat everything encountered on this line as
349             // the header key, and leave the value empty.
350             MailHeader {
351                 key: &raw_data[0..ix],
352                 value: &raw_data[ix..ix],
353             },
354             ix,
355         )),
356     }
357 }
358 
359 /// A trait that is implemented by the [MailHeader] slice. These functions are
360 /// also available on Vec<MailHeader> which is returned by the parse_headers
361 /// function. It provides a map-like interface to look up header values by their
362 /// name.
363 pub trait MailHeaderMap {
364     /// Look through the list of headers and return the value of the first one
365     /// that matches the provided key. It returns Ok(None) if the no matching
366     /// header was found. Header names are matched case-insensitively.
367     ///
368     /// # Examples
369     /// ```
370     ///     use mailparse::{parse_mail, MailHeaderMap};
371     ///     let headers = parse_mail(concat!(
372     ///             "Subject: Test\n",
373     ///             "\n",
374     ///             "This is a test message").as_bytes())
375     ///         .unwrap().headers;
376     ///     assert_eq!(headers.get_first_value("Subject"), Some("Test".to_string()));
377     /// ```
get_first_value(&self, key: &str) -> Option<String>378     fn get_first_value(&self, key: &str) -> Option<String>;
379 
380     /// Similar to `get_first_value`, except it returns a reference to the
381     /// MailHeader struct instead of just extracting the value.
get_first_header(&self, key: &str) -> Option<&MailHeader>382     fn get_first_header(&self, key: &str) -> Option<&MailHeader>;
383 
384     /// Look through the list of headers and return the values of all headers
385     /// matching the provided key. Returns an empty vector if no matching headers
386     /// were found. The order of the returned values is the same as the order
387     /// of the matching headers in the message. Header names are matched
388     /// case-insensitively.
389     ///
390     /// # Examples
391     /// ```
392     ///     use mailparse::{parse_mail, MailHeaderMap};
393     ///     let headers = parse_mail(concat!(
394     ///             "Key: Value1\n",
395     ///             "Key: Value2").as_bytes())
396     ///         .unwrap().headers;
397     ///     assert_eq!(headers.get_all_values("Key"),
398     ///         vec!["Value1".to_string(), "Value2".to_string()]);
399     /// ```
get_all_values(&self, key: &str) -> Vec<String>400     fn get_all_values(&self, key: &str) -> Vec<String>;
401 
402     /// Similar to `get_all_values`, except it returns references to the
403     /// MailHeader structs instead of just extracting the values.
get_all_headers(&self, key: &str) -> Vec<&MailHeader>404     fn get_all_headers(&self, key: &str) -> Vec<&MailHeader>;
405 }
406 
407 impl<'a> MailHeaderMap for [MailHeader<'a>] {
get_first_value(&self, key: &str) -> Option<String>408     fn get_first_value(&self, key: &str) -> Option<String> {
409         for x in self {
410             if x.get_key_ref().eq_ignore_ascii_case(key) {
411                 return Some(x.get_value());
412             }
413         }
414         None
415     }
416 
get_first_header(&self, key: &str) -> Option<&MailHeader>417     fn get_first_header(&self, key: &str) -> Option<&MailHeader> {
418         for x in self {
419             if x.get_key_ref().eq_ignore_ascii_case(key) {
420                 return Some(x);
421             }
422         }
423         None
424     }
425 
get_all_values(&self, key: &str) -> Vec<String>426     fn get_all_values(&self, key: &str) -> Vec<String> {
427         let mut values: Vec<String> = Vec::new();
428         for x in self {
429             if x.get_key_ref().eq_ignore_ascii_case(key) {
430                 values.push(x.get_value());
431             }
432         }
433         values
434     }
435 
get_all_headers(&self, key: &str) -> Vec<&MailHeader>436     fn get_all_headers(&self, key: &str) -> Vec<&MailHeader> {
437         let mut headers: Vec<&MailHeader> = Vec::new();
438         for x in self {
439             if x.get_key_ref().eq_ignore_ascii_case(key) {
440                 headers.push(x);
441             }
442         }
443         headers
444     }
445 }
446 
447 /// Parses all the headers from the raw data given.
448 /// This function takes raw byte data, and starts parsing it, expecting there
449 /// to be zero or more MIME header key-value pair right at the beginning,
450 /// followed by two consecutive newlines (i.e. a blank line). It parses those
451 /// headers and returns them in a vector. The normal vector functions can be
452 /// used to access the headers linearly, or the MailHeaderMap trait can be used
453 /// to access them in a map-like fashion. Along with this vector, the function
454 /// returns the index at which the message body is expected to start. If you
455 /// just care about the headers, you can ignore the second component of the
456 /// returned tuple.
457 /// Error values are returned if there was some sort of parsing error.
458 ///
459 /// # Examples
460 /// ```
461 ///     use mailparse::{parse_headers, MailHeaderMap};
462 ///     let (headers, _) = parse_headers(concat!(
463 ///             "Subject: Test\n",
464 ///             "From: me@myself.com\n",
465 ///             "To: you@yourself.com").as_bytes())
466 ///         .unwrap();
467 ///     assert_eq!(headers[1].get_key(), "From");
468 ///     assert_eq!(headers.get_first_value("To"), Some("you@yourself.com".to_string()));
469 /// ```
parse_headers(raw_data: &[u8]) -> Result<(Vec<MailHeader>, usize), MailParseError>470 pub fn parse_headers(raw_data: &[u8]) -> Result<(Vec<MailHeader>, usize), MailParseError> {
471     let mut headers: Vec<MailHeader> = Vec::new();
472     let mut ix = 0;
473     loop {
474         if ix >= raw_data.len() {
475             break;
476         } else if raw_data[ix] == b'\n' {
477             ix += 1;
478             break;
479         } else if raw_data[ix] == b'\r' {
480             if ix + 1 < raw_data.len() && raw_data[ix + 1] == b'\n' {
481                 ix += 2;
482                 break;
483             } else {
484                 return Err(MailParseError::Generic(
485                     "Headers were followed by an unexpected lone \
486                      CR character!",
487                 ));
488             }
489         }
490         let (header, ix_next) = parse_header(&raw_data[ix..])?;
491         headers.push(header);
492         ix += ix_next;
493     }
494     Ok((headers, ix))
495 }
496 
497 /// A struct to hold a more structured representation of the Content-Type header.
498 /// This is provided mostly as a convenience since this metadata is usually
499 /// needed to interpret the message body properly.
500 #[derive(Debug)]
501 pub struct ParsedContentType {
502     /// The type of the data, for example "text/plain" or "application/pdf".
503     pub mimetype: String,
504     /// The charset used to decode the raw byte data, for example "iso-8859-1"
505     /// or "utf-8".
506     pub charset: String,
507     /// The additional params of Content-Type, e.g. filename and boundary. The
508     /// keys in the map will be lowercased, and the values will have any
509     /// enclosing quotes stripped.
510     pub params: BTreeMap<String, String>,
511 }
512 
513 impl Default for ParsedContentType {
default() -> Self514     fn default() -> Self {
515         ParsedContentType {
516             mimetype: "text/plain".to_string(),
517             charset: "us-ascii".to_string(),
518             params: BTreeMap::new(),
519         }
520     }
521 }
522 
523 /// Helper method to parse a header value as a Content-Type header. Note that
524 /// the returned object's `params` map will contain a charset key if a charset
525 /// was explicitly specified in the header; otherwise the `params` map will not
526 /// contain a charset key. Regardless, the `charset` field will contain a
527 /// charset - either the one explicitly specified or the default of "us-ascii".
528 ///
529 /// # Examples
530 /// ```
531 ///     use mailparse::{parse_header, parse_content_type};
532 ///     let (parsed, _) = parse_header(
533 ///             b"Content-Type: text/html; charset=foo; boundary=\"quotes_are_removed\"")
534 ///         .unwrap();
535 ///     let ctype = parse_content_type(&parsed.get_value());
536 ///     assert_eq!(ctype.mimetype, "text/html");
537 ///     assert_eq!(ctype.charset, "foo");
538 ///     assert_eq!(ctype.params.get("boundary"), Some(&"quotes_are_removed".to_string()));
539 ///     assert_eq!(ctype.params.get("charset"), Some(&"foo".to_string()));
540 /// ```
541 /// ```
542 ///     use mailparse::{parse_header, parse_content_type};
543 ///     let (parsed, _) = parse_header(b"Content-Type: bogus").unwrap();
544 ///     let ctype = parse_content_type(&parsed.get_value());
545 ///     assert_eq!(ctype.mimetype, "bogus");
546 ///     assert_eq!(ctype.charset, "us-ascii");
547 ///     assert_eq!(ctype.params.get("boundary"), None);
548 ///     assert_eq!(ctype.params.get("charset"), None);
549 /// ```
550 /// ```
551 ///     use mailparse::{parse_header, parse_content_type};
552 ///     let (parsed, _) = parse_header(br#"Content-Type: application/octet-stream;name="=?utf8?B?6L+O5ai255m95a+M576O?=";charset="utf8""#).unwrap();
553 ///     let ctype = parse_content_type(&parsed.get_value());
554 ///     assert_eq!(ctype.mimetype, "application/octet-stream");
555 ///     assert_eq!(ctype.charset, "utf8");
556 ///     assert_eq!(ctype.params.get("boundary"), None);
557 ///     assert_eq!(ctype.params.get("name"), Some(&"迎娶白富美".to_string()));
558 /// ```
parse_content_type(header: &str) -> ParsedContentType559 pub fn parse_content_type(header: &str) -> ParsedContentType {
560     let params = parse_param_content(header);
561     let mimetype = params.value.to_lowercase();
562     let charset = params
563         .params
564         .get("charset")
565         .cloned()
566         .unwrap_or_else(|| "us-ascii".to_string());
567 
568     ParsedContentType {
569         mimetype,
570         charset,
571         params: params.params,
572     }
573 }
574 
575 /// The possible disposition types in a Content-Disposition header. A more
576 /// comprehensive list of IANA-recognized types can be found at
577 /// https://www.iana.org/assignments/cont-disp/cont-disp.xhtml. This library
578 /// only enumerates the types most commonly found in email messages, and
579 /// provides the `Extension` value for holding all other types.
580 #[derive(Debug, Clone, PartialEq)]
581 pub enum DispositionType {
582     /// Default value, indicating the content is to be displayed inline as
583     /// part of the enclosing document.
584     Inline,
585     /// A disposition indicating the content is not meant for inline display,
586     /// but whose content can be accessed for use.
587     Attachment,
588     /// A disposition indicating the content contains a form submission.
589     FormData,
590     /// Extension type to hold any disposition not explicitly enumerated.
591     Extension(String),
592 }
593 
594 impl Default for DispositionType {
default() -> Self595     fn default() -> Self {
596         DispositionType::Inline
597     }
598 }
599 
600 /// Convert the string represented disposition type to enum.
parse_disposition_type(disposition: &str) -> DispositionType601 fn parse_disposition_type(disposition: &str) -> DispositionType {
602     match &disposition.to_lowercase()[..] {
603         "inline" => DispositionType::Inline,
604         "attachment" => DispositionType::Attachment,
605         "form-data" => DispositionType::FormData,
606         extension => DispositionType::Extension(extension.to_string()),
607     }
608 }
609 
610 /// A struct to hold a more structured representation of the Content-Disposition header.
611 /// This is provided mostly as a convenience since this metadata is usually
612 /// needed to interpret the message body properly.
613 #[derive(Debug, Default)]
614 pub struct ParsedContentDisposition {
615     /// The disposition type of the Content-Disposition header. If this
616     /// is an extension type, the string will be lowercased.
617     pub disposition: DispositionType,
618     /// The additional params of Content-Disposition, e.g. filename. The
619     /// keys in the map will be lowercased, and the values will have any
620     /// enclosing quotes stripped.
621     pub params: BTreeMap<String, String>,
622 }
623 
624 /// Helper method to parse a header value as a Content-Disposition header. The disposition
625 /// defaults to "inline" if no disposition parameter is provided in the header
626 /// value.
627 ///
628 /// # Examples
629 /// ```
630 ///     use mailparse::{parse_header, parse_content_disposition, DispositionType};
631 ///     let (parsed, _) = parse_header(
632 ///             b"Content-Disposition: attachment; filename=\"yummy dummy\"")
633 ///         .unwrap();
634 ///     let dis = parse_content_disposition(&parsed.get_value());
635 ///     assert_eq!(dis.disposition, DispositionType::Attachment);
636 ///     assert_eq!(dis.params.get("name"), None);
637 ///     assert_eq!(dis.params.get("filename"), Some(&"yummy dummy".to_string()));
638 /// ```
parse_content_disposition(header: &str) -> ParsedContentDisposition639 pub fn parse_content_disposition(header: &str) -> ParsedContentDisposition {
640     let params = parse_param_content(header);
641     let disposition = parse_disposition_type(&params.value);
642     ParsedContentDisposition {
643         disposition,
644         params: params.params,
645     }
646 }
647 
648 /// Struct that holds the structured representation of the message. Note that
649 /// since MIME allows for nested multipart messages, a tree-like structure is
650 /// necessary to represent it properly. This struct accomplishes that by holding
651 /// a vector of other ParsedMail structures for the subparts.
652 #[derive(Debug)]
653 pub struct ParsedMail<'a> {
654     /// The raw bytes that make up the header block for this message (or subpart).
655     header_bytes: &'a [u8],
656     /// The headers for the message (or message subpart).
657     pub headers: Vec<MailHeader<'a>>,
658     /// The Content-Type information for the message (or message subpart).
659     pub ctype: ParsedContentType,
660     /// The raw bytes that make up the body of the message (or message subpart).
661     body_bytes: &'a [u8],
662     /// The subparts of this message or subpart. This vector is only non-empty
663     /// if ctype.mimetype starts with "multipart/".
664     pub subparts: Vec<ParsedMail<'a>>,
665 }
666 
667 impl<'a> ParsedMail<'a> {
668     /// Get the body of the message as a Rust string. This function tries to
669     /// unapply the Content-Transfer-Encoding if there is one, and then converts
670     /// the result into a Rust UTF-8 string using the charset in the Content-Type
671     /// (or "us-ascii" if the charset was missing or not recognized). Note that
672     /// in some cases the body may be binary data that doesn't make sense as a
673     /// Rust string - it is up to the caller to handle those cases gracefully.
674     /// These cases may occur in particular when the body is of a "binary"
675     /// Content-Transfer-Encoding (i.e. where `get_body_encoded()` returns a
676     /// `Body::Binary` variant) but may also occur in other cases because of the
677     /// messiness of the real world and non-compliant mail implementations.
678     ///
679     /// # Examples
680     /// ```
681     ///     use mailparse::parse_mail;
682     ///     let p = parse_mail(concat!(
683     ///             "Subject: test\n",
684     ///             "\n",
685     ///             "This is the body").as_bytes())
686     ///         .unwrap();
687     ///     assert_eq!(p.get_body().unwrap(), "This is the body");
688     /// ```
get_body(&self) -> Result<String, MailParseError>689     pub fn get_body(&self) -> Result<String, MailParseError> {
690         match self.get_body_encoded() {
691             Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded_as_string(),
692             Body::SevenBit(body) | Body::EightBit(body) => body.get_as_string(),
693             Body::Binary(body) => body.get_as_string(),
694         }
695     }
696 
697     /// Get the body of the message as a Rust Vec<u8>. This function tries to
698     /// unapply the Content-Transfer-Encoding if there is one, but won't do
699     /// any charset decoding.
700     ///
701     /// # Examples
702     /// ```
703     ///     use mailparse::parse_mail;
704     ///     let p = parse_mail(concat!(
705     ///             "Subject: test\n",
706     ///             "\n",
707     ///             "This is the body").as_bytes())
708     ///         .unwrap();
709     ///     assert_eq!(p.get_body_raw().unwrap(), b"This is the body");
710     /// ```
get_body_raw(&self) -> Result<Vec<u8>, MailParseError>711     pub fn get_body_raw(&self) -> Result<Vec<u8>, MailParseError> {
712         match self.get_body_encoded() {
713             Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded(),
714             Body::SevenBit(body) | Body::EightBit(body) => Ok(Vec::<u8>::from(body.get_raw())),
715             Body::Binary(body) => Ok(Vec::<u8>::from(body.get_raw())),
716         }
717     }
718 
719     /// Get the body of the message.
720     /// This function returns the original body without attempting to
721     /// unapply the Content-Transfer-Encoding. The returned object
722     /// contains information that allows the caller to control decoding
723     /// as desired.
724     ///
725     /// # Examples
726     /// ```
727     ///     use mailparse::parse_mail;
728     ///     use mailparse::body::Body;
729     ///
730     ///     let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
731     ///
732     ///     match mail.get_body_encoded() {
733     ///         Body::Base64(body) => {
734     ///             assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
735     ///             assert_eq!(body.get_decoded().unwrap(), b"hello world");
736     ///             assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
737     ///         },
738     ///         _ => assert!(false),
739     ///     };
740     ///
741     ///
742     ///     // An email whose body encoding is not known upfront
743     ///     let another_mail = parse_mail(b"").unwrap();
744     ///
745     ///     match another_mail.get_body_encoded() {
746     ///         Body::Base64(body) | Body::QuotedPrintable(body) => {
747     ///             println!("mail body encoded: {:?}", body.get_raw());
748     ///             println!("mail body decoded: {:?}", body.get_decoded().unwrap());
749     ///             println!("mail body decoded as string: {}", body.get_decoded_as_string().unwrap());
750     ///         },
751     ///         Body::SevenBit(body) | Body::EightBit(body) => {
752     ///             println!("mail body: {:?}", body.get_raw());
753     ///             println!("mail body as string: {}", body.get_as_string().unwrap());
754     ///         },
755     ///         Body::Binary(body) => {
756     ///             println!("mail body binary: {:?}", body.get_raw());
757     ///         }
758     ///     }
759     /// ```
get_body_encoded(&'a self) -> Body<'a>760     pub fn get_body_encoded(&'a self) -> Body<'a> {
761         let transfer_encoding = self
762             .headers
763             .get_first_value("Content-Transfer-Encoding")
764             .map(|s| s.to_lowercase());
765 
766         Body::new(self.body_bytes, &self.ctype, &transfer_encoding)
767     }
768 
769     /// Returns a struct that wraps the headers for this message.
770     /// The struct provides utility methods to read the individual headers.
get_headers(&'a self) -> Headers<'a>771     pub fn get_headers(&'a self) -> Headers<'a> {
772         Headers::new(&self.header_bytes, &self.headers)
773     }
774 
775     /// Returns a struct containing a parsed representation of the
776     /// Content-Disposition header. The first header with this name
777     /// is used, if there are multiple. See the `parse_content_disposition`
778     /// method documentation for more details on the semantics of the
779     /// returned object.
get_content_disposition(&self) -> ParsedContentDisposition780     pub fn get_content_disposition(&self) -> ParsedContentDisposition {
781         let disposition = self
782             .headers
783             .get_first_value("Content-Disposition")
784             .map(|s| parse_content_disposition(&s))
785             .unwrap_or_default();
786         disposition
787     }
788 }
789 
790 /// The main mail-parsing entry point.
791 /// This function takes the raw data making up the message body and returns a
792 /// structured version of it, which allows easily accessing the header and body
793 /// information as needed.
794 ///
795 /// # Examples
796 /// ```
797 ///     use mailparse::*;
798 ///     let parsed = parse_mail(concat!(
799 ///             "Subject: This is a test email\n",
800 ///             "Content-Type: multipart/alternative; boundary=foobar\n",
801 ///             "Date: Sun, 02 Oct 2016 07:06:22 -0700 (PDT)\n",
802 ///             "\n",
803 ///             "--foobar\n",
804 ///             "Content-Type: text/plain; charset=utf-8\n",
805 ///             "Content-Transfer-Encoding: quoted-printable\n",
806 ///             "\n",
807 ///             "This is the plaintext version, in utf-8. Proof by Euro: =E2=82=AC\n",
808 ///             "--foobar\n",
809 ///             "Content-Type: text/html\n",
810 ///             "Content-Transfer-Encoding: base64\n",
811 ///             "\n",
812 ///             "PGh0bWw+PGJvZHk+VGhpcyBpcyB0aGUgPGI+SFRNTDwvYj4gdmVyc2lvbiwgaW4g \n",
813 ///             "dXMtYXNjaWkuIFByb29mIGJ5IEV1cm86ICZldXJvOzwvYm9keT48L2h0bWw+Cg== \n",
814 ///             "--foobar--\n",
815 ///             "After the final boundary stuff gets ignored.\n").as_bytes())
816 ///         .unwrap();
817 ///     assert_eq!(parsed.headers.get_first_value("Subject"),
818 ///         Some("This is a test email".to_string()));
819 ///     assert_eq!(parsed.subparts.len(), 2);
820 ///     assert_eq!(parsed.subparts[0].get_body().unwrap(),
821 ///         "This is the plaintext version, in utf-8. Proof by Euro: \u{20AC}");
822 ///     assert_eq!(parsed.subparts[1].headers[1].get_value(), "base64");
823 ///     assert_eq!(parsed.subparts[1].ctype.mimetype, "text/html");
824 ///     assert!(parsed.subparts[1].get_body().unwrap().starts_with("<html>"));
825 ///     assert_eq!(dateparse(parsed.headers.get_first_value("Date").unwrap().as_str()).unwrap(), 1475417182);
826 /// ```
parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError>827 pub fn parse_mail(raw_data: &[u8]) -> Result<ParsedMail, MailParseError> {
828     let (headers, ix_body) = parse_headers(raw_data)?;
829     let ctype = headers
830         .get_first_value("Content-Type")
831         .map(|s| parse_content_type(&s))
832         .unwrap_or_default();
833 
834     let mut result = ParsedMail {
835         header_bytes: &raw_data[0..ix_body],
836         headers,
837         ctype,
838         body_bytes: &raw_data[ix_body..],
839         subparts: Vec::<ParsedMail>::new(),
840     };
841     if result.ctype.mimetype.starts_with("multipart/")
842         && result.ctype.params.get("boundary").is_some()
843         && raw_data.len() > ix_body
844     {
845         let boundary = String::from("--") + &result.ctype.params["boundary"];
846         if let Some(ix_body_end) = find_from_u8(raw_data, ix_body, boundary.as_bytes()) {
847             result.body_bytes = &raw_data[ix_body..ix_body_end];
848             let mut ix_boundary_end = ix_body_end + boundary.len();
849             while let Some(ix_part_start) =
850                 find_from_u8(raw_data, ix_boundary_end, b"\n").map(|v| v + 1)
851             {
852                 // if there is no terminating boundary, assume the part end is the end of the email
853                 let ix_part_end = find_from_u8(raw_data, ix_part_start, boundary.as_bytes())
854                     .unwrap_or_else(|| raw_data.len());
855 
856                 result
857                     .subparts
858                     .push(parse_mail(&raw_data[ix_part_start..ix_part_end])?);
859                 ix_boundary_end = ix_part_end + boundary.len();
860                 if ix_boundary_end + 2 > raw_data.len()
861                     || (raw_data[ix_boundary_end] == b'-' && raw_data[ix_boundary_end + 1] == b'-')
862                 {
863                     break;
864                 }
865             }
866         }
867     }
868     Ok(result)
869 }
870 
871 /// Used to store params for content-type and content-disposition
872 struct ParamContent {
873     value: String,
874     params: BTreeMap<String, String>,
875 }
876 
877 /// Parse parameterized header values such as that for Content-Type
878 /// e.g. `multipart/alternative; boundary=foobar`
879 /// Note: this function is not made public as it may require
880 /// significant changes to be fully correct. For instance,
881 /// it does not handle quoted parameter values containing the
882 /// semicolon (';') character. It also produces a BTreeMap,
883 /// which implicitly does not support multiple parameters with
884 /// the same key. Also, the parameter values may contain language
885 /// information in a format specified by RFC 2184 which is thrown
886 /// away. The format for parameterized header values doesn't
887 /// appear to be strongly specified anywhere.
parse_param_content(content: &str) -> ParamContent888 fn parse_param_content(content: &str) -> ParamContent {
889     let mut tokens = content.split(';');
890     // There must be at least one token produced by split, even if it's empty.
891     let value = tokens.next().unwrap().trim();
892     let mut map: BTreeMap<String, String> = tokens
893         .filter_map(|kv| {
894             kv.find('=').map(|idx| {
895                 let key = kv[0..idx].trim().to_lowercase();
896                 let mut value = kv[idx + 1..].trim();
897                 if value.starts_with('"') && value.ends_with('"') && value.len() > 1 {
898                     value = &value[1..value.len() - 1];
899                 }
900                 (key, value.to_string())
901             })
902         })
903         .collect();
904 
905     // Decode charset encoding, as described in RFC 2184, Section 4.
906     let decode_key_list: Vec<String> = map
907         .keys()
908         .filter_map(|k| k.strip_suffix("*"))
909         .map(String::from)
910         // Skip encoded keys where there is already an equivalent decoded key in the map
911         .filter(|k| !map.contains_key(k))
912         .collect();
913     let encodings = compute_parameter_encodings(&map, &decode_key_list);
914     // Note that when we get here, we might still have entries in `encodings` for continuation segments
915     // that didn't have a *0 segment at all. These shouldn't exist per spec so we can do whatever we want,
916     // as long as we don't panic.
917     for (k, (e, strip)) in encodings {
918         if let Some(charset) = Charset::for_label_no_replacement(e.as_bytes()) {
919             let key = format!("{}*", k);
920             let percent_encoded_value = map.remove(&key).unwrap();
921             let encoded_value = if strip {
922                 percent_decode(percent_encoded_value.splitn(3, '\'').nth(2).unwrap_or(""))
923             } else {
924                 percent_decode(&percent_encoded_value)
925             };
926             let decoded_value = charset.decode_without_bom_handling(&encoded_value).0;
927             map.insert(k, decoded_value.to_string());
928         }
929     }
930 
931     // Unwrap parameter value continuations, as described in RFC 2184, Section 3.
932     let unwrap_key_list: Vec<String> = map
933         .keys()
934         .filter_map(|k| k.strip_suffix("*0"))
935         .map(String::from)
936         // Skip wrapped keys where there is already an unwrapped equivalent in the map
937         .filter(|k| !map.contains_key(k))
938         .collect();
939     for unwrap_key in unwrap_key_list {
940         let mut unwrapped_value = String::new();
941         let mut index = 0;
942         while let Some(wrapped_value_part) = map.remove(&format!("{}*{}", &unwrap_key, index)) {
943             index = index + 1;
944             unwrapped_value.push_str(&wrapped_value_part);
945         }
946         let old_value = map.insert(unwrap_key, unwrapped_value);
947         assert!(old_value.is_none());
948     }
949 
950     ParamContent {
951         value: value.into(),
952         params: map,
953     }
954 }
955 
956 /// In the returned map, the key is one of the entries from the decode_key_list,
957 /// (i.e. the parameter key with the trailing '*' stripped). The value is a tuple
958 /// containing the encoding (or empty string for no encoding found) and a flag
959 /// that indicates if the encoding needs to be stripped from the value. This is
960 /// set to true for non-continuation parameter values.
compute_parameter_encodings(map: &BTreeMap<String, String>, decode_key_list: &Vec<String>) -> HashMap<String, (String, bool)>961 fn compute_parameter_encodings(map: &BTreeMap<String, String>, decode_key_list: &Vec<String>) -> HashMap<String, (String, bool)> {
962     // To handle section 4.1 (combining encodings with continuations), we first
963     // compute the encoding for each parameter value or parameter value segment
964     // that is encoded. For continuation segments the encoding from the *0 segment
965     // overwrites the continuation segment's encoding, if there is one.
966     let mut encodings: HashMap<String, (String, bool)> = HashMap::new();
967     for decode_key in decode_key_list {
968         if let Some(unwrap_key) = decode_key.strip_suffix("*0") {
969             // Per spec, there should always be an encoding. If it's missing, handle that case gracefully
970             // by setting it to an empty string that we handle specially later.
971             let encoding = map.get(&format!("{}*", decode_key)).unwrap().split('\'').next().unwrap_or("");
972             let continuation_prefix = format!("{}*", unwrap_key);
973             for continuation_key in decode_key_list {
974                 if continuation_key.starts_with(&continuation_prefix) {
975                     // This may (intentionally) overwite encodings previously found for the
976                     // continuation segments (which are bogus). In those cases, the flag
977                     // in the tuple should get updated from true to false.
978                     encodings.insert(continuation_key.clone(), (encoding.to_string(), continuation_key == decode_key));
979                 }
980             }
981         } else if !encodings.contains_key(decode_key) {
982             let encoding = map.get(&format!("{}*", decode_key)).unwrap().split('\'').next().unwrap_or("").to_string();
983             let old_value = encodings.insert(decode_key.clone(), (encoding, true));
984             assert!(old_value.is_none());
985         }
986         // else this is a continuation segment and the encoding has already been populated
987         // by the initial *0 segment, so we can ignore it.
988     }
989     encodings
990 }
991 
percent_decode(encoded: &str) -> Vec<u8>992 fn percent_decode(encoded: &str) -> Vec<u8> {
993     let mut decoded = Vec::with_capacity(encoded.len());
994     let mut bytes = encoded.bytes();
995     let mut next = bytes.next();
996     while next.is_some() {
997         let b = next.unwrap();
998         if b != b'%' {
999             decoded.push(b);
1000             next = bytes.next();
1001             continue;
1002         }
1003 
1004         let top = match bytes.next() {
1005             Some(n) if n.is_ascii_hexdigit() => n,
1006             n @ _ => {
1007                 decoded.push(b);
1008                 next = n;
1009                 continue;
1010             }
1011         };
1012         let bottom = match bytes.next() {
1013             Some(n) if n.is_ascii_hexdigit() => n,
1014             n @ _ => {
1015                 decoded.push(b);
1016                 decoded.push(top);
1017                 next = n;
1018                 continue;
1019             }
1020         };
1021         let decoded_byte = (hex_to_nybble(top) << 4) | hex_to_nybble(bottom);
1022         decoded.push(decoded_byte);
1023 
1024         next = bytes.next();
1025     }
1026     decoded
1027 }
1028 
hex_to_nybble(byte: u8) -> u81029 fn hex_to_nybble(byte: u8) -> u8 {
1030     match byte {
1031         b'0'..=b'9' => byte - b'0',
1032         b'a'..=b'f' => byte - b'a' + 10,
1033         b'A'..=b'F' => byte - b'A' + 10,
1034         _ => panic!("Not a hex character!"),
1035     }
1036 }
1037 
1038 #[cfg(test)]
1039 mod tests {
1040     use super::*;
1041 
1042     #[test]
parse_basic_header()1043     fn parse_basic_header() {
1044         let (parsed, _) = parse_header(b"Key: Value").unwrap();
1045         assert_eq!(parsed.key, b"Key");
1046         assert_eq!(parsed.get_key(), "Key");
1047         assert_eq!(parsed.get_key_ref(), "Key");
1048         assert_eq!(parsed.value, b"Value");
1049         assert_eq!(parsed.get_value(), "Value");
1050         assert_eq!(parsed.get_value_raw(), "Value".as_bytes());
1051 
1052         let (parsed, _) = parse_header(b"Key :  Value ").unwrap();
1053         assert_eq!(parsed.key, b"Key ");
1054         assert_eq!(parsed.value, b"Value ");
1055         assert_eq!(parsed.get_value(), "Value ");
1056         assert_eq!(parsed.get_value_raw(), "Value ".as_bytes());
1057 
1058         let (parsed, _) = parse_header(b"Key:").unwrap();
1059         assert_eq!(parsed.key, b"Key");
1060         assert_eq!(parsed.value, b"");
1061 
1062         let (parsed, _) = parse_header(b":\n").unwrap();
1063         assert_eq!(parsed.key, b"");
1064         assert_eq!(parsed.value, b"");
1065 
1066         let (parsed, _) = parse_header(b"Key:Multi-line\n value").unwrap();
1067         assert_eq!(parsed.key, b"Key");
1068         assert_eq!(parsed.value, b"Multi-line\n value");
1069         assert_eq!(parsed.get_value(), "Multi-line value");
1070         assert_eq!(parsed.get_value_raw(), "Multi-line\n value".as_bytes());
1071 
1072         let (parsed, _) = parse_header(b"Key:  Multi\n  line\n value\n").unwrap();
1073         assert_eq!(parsed.key, b"Key");
1074         assert_eq!(parsed.value, b"Multi\n  line\n value");
1075         assert_eq!(parsed.get_value(), "Multi line value");
1076         assert_eq!(parsed.get_value_raw(), "Multi\n  line\n value".as_bytes());
1077 
1078         let (parsed, _) = parse_header(b"Key: One\nKey2: Two").unwrap();
1079         assert_eq!(parsed.key, b"Key");
1080         assert_eq!(parsed.value, b"One");
1081 
1082         let (parsed, _) = parse_header(b"Key: One\n\tOverhang").unwrap();
1083         assert_eq!(parsed.key, b"Key");
1084         assert_eq!(parsed.value, b"One\n\tOverhang");
1085         assert_eq!(parsed.get_value(), "One Overhang");
1086         assert_eq!(parsed.get_value_raw(), "One\n\tOverhang".as_bytes());
1087 
1088         let (parsed, _) = parse_header(b"SPAM: VIAGRA \xAE").unwrap();
1089         assert_eq!(parsed.key, b"SPAM");
1090         assert_eq!(parsed.value, b"VIAGRA \xAE");
1091         assert_eq!(parsed.get_value(), "VIAGRA \u{ae}");
1092         assert_eq!(parsed.get_value_raw(), b"VIAGRA \xAE");
1093 
1094         parse_header(b" Leading: Space").unwrap_err();
1095 
1096         let (parsed, _) = parse_header(b"Just a string").unwrap();
1097         assert_eq!(parsed.key, b"Just a string");
1098         assert_eq!(parsed.value, b"");
1099         assert_eq!(parsed.get_value(), "");
1100         assert_eq!(parsed.get_value_raw(), b"");
1101 
1102         let (parsed, _) = parse_header(b"Key\nBroken: Value").unwrap();
1103         assert_eq!(parsed.key, b"Key");
1104         assert_eq!(parsed.value, b"");
1105         assert_eq!(parsed.get_value(), "");
1106         assert_eq!(parsed.get_value_raw(), b"");
1107 
1108         let (parsed, _) = parse_header(b"Key: With CRLF\r\n").unwrap();
1109         assert_eq!(parsed.key, b"Key");
1110         assert_eq!(parsed.value, b"With CRLF");
1111         assert_eq!(parsed.get_value(), "With CRLF");
1112         assert_eq!(parsed.get_value_raw(), b"With CRLF");
1113 
1114         let (parsed, _) = parse_header(b"Key: With spurious CRs\r\r\r\n").unwrap();
1115         assert_eq!(parsed.value, b"With spurious CRs");
1116         assert_eq!(parsed.get_value(), "With spurious CRs");
1117         assert_eq!(parsed.get_value_raw(), b"With spurious CRs");
1118 
1119         let (parsed, _) = parse_header(b"Key: With \r mixed CR\r\n").unwrap();
1120         assert_eq!(parsed.value, b"With \r mixed CR");
1121         assert_eq!(parsed.get_value(), "With \r mixed CR");
1122         assert_eq!(parsed.get_value_raw(), b"With \r mixed CR");
1123 
1124         let (parsed, _) = parse_header(b"Key:\r\n Value after linebreak").unwrap();
1125         assert_eq!(parsed.value, b"\r\n Value after linebreak");
1126         assert_eq!(parsed.get_value(), " Value after linebreak");
1127         assert_eq!(parsed.get_value_raw(), b"\r\n Value after linebreak");
1128     }
1129 
1130     #[test]
parse_encoded_headers()1131     fn parse_encoded_headers() {
1132         let (parsed, _) = parse_header(b"Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=").unwrap();
1133         assert_eq!(parsed.get_key(), "Subject");
1134         assert_eq!(parsed.get_key_ref(), "Subject");
1135         assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
1136         assert_eq!(
1137             parsed.get_value_raw(),
1138             "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=".as_bytes()
1139         );
1140 
1141         let (parsed, _) = parse_header(
1142             b"Subject: =?iso-8859-1?Q?=A1Hola,?=\n \
1143                                         =?iso-8859-1?Q?_se=F1or!?=",
1144         )
1145         .unwrap();
1146         assert_eq!(parsed.get_key(), "Subject");
1147         assert_eq!(parsed.get_key_ref(), "Subject");
1148         assert_eq!(parsed.get_value(), "\u{a1}Hola, se\u{f1}or!");
1149         assert_eq!(
1150             parsed.get_value_raw(),
1151             "=?iso-8859-1?Q?=A1Hola,?=\n \
1152                                           =?iso-8859-1?Q?_se=F1or!?="
1153                 .as_bytes()
1154         );
1155 
1156         let (parsed, _) = parse_header(b"Euro: =?utf-8?Q?=E2=82=AC?=").unwrap();
1157         assert_eq!(parsed.get_key(), "Euro");
1158         assert_eq!(parsed.get_key_ref(), "Euro");
1159         assert_eq!(parsed.get_value(), "\u{20ac}");
1160         assert_eq!(parsed.get_value_raw(), "=?utf-8?Q?=E2=82=AC?=".as_bytes());
1161 
1162         let (parsed, _) = parse_header(b"HelloWorld: =?utf-8?B?aGVsbG8gd29ybGQ=?=").unwrap();
1163         assert_eq!(parsed.get_value(), "hello world");
1164         assert_eq!(
1165             parsed.get_value_raw(),
1166             "=?utf-8?B?aGVsbG8gd29ybGQ=?=".as_bytes()
1167         );
1168 
1169         let (parsed, _) = parse_header(b"Empty: =?utf-8?Q??=").unwrap();
1170         assert_eq!(parsed.get_value(), "");
1171         assert_eq!(parsed.get_value_raw(), "=?utf-8?Q??=".as_bytes());
1172 
1173         let (parsed, _) = parse_header(b"Incomplete: =?").unwrap();
1174         assert_eq!(parsed.get_value(), "=?");
1175         assert_eq!(parsed.get_value_raw(), "=?".as_bytes());
1176 
1177         let (parsed, _) = parse_header(b"BadEncoding: =?garbage?Q??=").unwrap();
1178         assert_eq!(parsed.get_value(), "=?garbage?Q??=");
1179         assert_eq!(parsed.get_value_raw(), "=?garbage?Q??=".as_bytes());
1180 
1181         let (parsed, _) = parse_header(b"Invalid: =?utf-8?Q?=E2=AC?=").unwrap();
1182         assert_eq!(parsed.get_value(), "\u{fffd}");
1183 
1184         let (parsed, _) = parse_header(b"LineBreak: =?utf-8?Q?=E2=82\n =AC?=").unwrap();
1185         assert_eq!(parsed.get_value(), "=?utf-8?Q?=E2=82 =AC?=");
1186 
1187         let (parsed, _) = parse_header(b"NotSeparateWord: hello=?utf-8?Q?world?=").unwrap();
1188         assert_eq!(parsed.get_value(), "hello=?utf-8?Q?world?=");
1189 
1190         let (parsed, _) = parse_header(b"NotSeparateWord2: =?utf-8?Q?hello?=world").unwrap();
1191         assert_eq!(parsed.get_value(), "=?utf-8?Q?hello?=world");
1192 
1193         let (parsed, _) = parse_header(b"Key: \"=?utf-8?Q?value?=\"").unwrap();
1194         assert_eq!(parsed.get_value(), "\"value\"");
1195 
1196         let (parsed, _) = parse_header(b"Subject: =?utf-8?q?=5BOntario_Builder=5D_Understanding_home_shopping_=E2=80=93_a_q?=\n \
1197                                         =?utf-8?q?uick_survey?=")
1198             .unwrap();
1199         assert_eq!(parsed.get_key(), "Subject");
1200         assert_eq!(parsed.get_key_ref(), "Subject");
1201         assert_eq!(
1202             parsed.get_value(),
1203             "[Ontario Builder] Understanding home shopping \u{2013} a quick survey"
1204         );
1205 
1206         let (parsed, _) = parse_header(
1207             b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= non-qp words\n \
1208              and the subject continues",
1209         )
1210         .unwrap();
1211         assert_eq!(
1212             parsed.get_value(),
1213             "[Ontario Builder] non-qp words and the subject continues"
1214         );
1215 
1216         let (parsed, _) = parse_header(
1217             b"Subject: =?utf-8?q?=5BOntario_Builder=5D?= \n \
1218              and the subject continues",
1219         )
1220         .unwrap();
1221         assert_eq!(
1222             parsed.get_value(),
1223             "[Ontario Builder]  and the subject continues"
1224         );
1225         assert_eq!(
1226             parsed.get_value_raw(),
1227             "=?utf-8?q?=5BOntario_Builder=5D?= \n \
1228                and the subject continues"
1229                 .as_bytes()
1230         );
1231 
1232         let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=")
1233             .unwrap();
1234         assert_eq!(parsed.get_key(), "Subject");
1235         assert_eq!(parsed.get_key_ref(), "Subject");
1236         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1237         assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}");
1238         assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?B?GyRCRnwbKEI=?=\n\t=?ISO-2022-JP?B?GyRCS1wbKEI=?=\n\t=?ISO-2022-JP?B?GyRCOGwbKEI=?=".as_bytes());
1239 
1240         let (parsed, _) = parse_header(b"Subject: =?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=")
1241             .unwrap();
1242         assert_eq!(parsed.get_key(), "Subject");
1243         assert_eq!(parsed.get_key_ref(), "Subject");
1244         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1245         assert_eq!(parsed.get_value(), "\u{65E5}\u{672C}\u{8A9E}");
1246         assert_eq!(parsed.get_value_raw(), "=?ISO-2022-JP?Q?=1B\x24\x42\x46\x7C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x4B\x5C=1B\x28\x42?=\n\t=?ISO-2022-JP?Q?=1B\x24\x42\x38\x6C=1B\x28\x42?=".as_bytes());
1247 
1248         let (parsed, _) = parse_header(b"Subject: =?UTF-7?Q?+JgM-?=").unwrap();
1249         assert_eq!(parsed.get_key(), "Subject");
1250         assert_eq!(parsed.get_key_ref(), "Subject");
1251         assert_eq!(parsed.get_key_raw(), "Subject".as_bytes());
1252         assert_eq!(parsed.get_value(), "\u{2603}");
1253         assert_eq!(parsed.get_value_raw(), b"=?UTF-7?Q?+JgM-?=");
1254 
1255         let (parsed, _) =
1256             parse_header(b"Content-Type: image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"")
1257                 .unwrap();
1258         assert_eq!(parsed.get_key(), "Content-Type");
1259         assert_eq!(parsed.get_key_ref(), "Content-Type");
1260         assert_eq!(parsed.get_key_raw(), "Content-Type".as_bytes());
1261         assert_eq!(parsed.get_value(), "image/jpeg; name=\"06613981.jpg\"");
1262         assert_eq!(
1263             parsed.get_value_raw(),
1264             "image/jpeg; name=\"=?UTF-8?B?MDY2MTM5ODEuanBn?=\"".as_bytes()
1265         );
1266 
1267         let (parsed, _) = parse_header(
1268             b"From: =?UTF-8?Q?\"Motorola_Owners=E2=80=99_Forums\"_?=<forums@motorola.com>",
1269         )
1270         .unwrap();
1271         assert_eq!(parsed.get_key(), "From");
1272         assert_eq!(parsed.get_key_ref(), "From");
1273         assert_eq!(parsed.get_key_raw(), "From".as_bytes());
1274         assert_eq!(
1275             parsed.get_value(),
1276             "\"Motorola Owners\u{2019} Forums\" <forums@motorola.com>"
1277         );
1278     }
1279 
1280     #[test]
encoded_words_and_spaces()1281     fn encoded_words_and_spaces() {
1282         let (parsed, _) = parse_header(b"K: an =?utf-8?q?encoded?=\n word").unwrap();
1283         assert_eq!(parsed.get_value(), "an encoded word");
1284         assert_eq!(
1285             parsed.get_value_raw(),
1286             "an =?utf-8?q?encoded?=\n word".as_bytes()
1287         );
1288 
1289         let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= =?utf-8?q?these?= \n words").unwrap();
1290         assert_eq!(parsed.get_value(), "gluethese  words");
1291         assert_eq!(
1292             parsed.get_value_raw(),
1293             "=?utf-8?q?glue?= =?utf-8?q?these?= \n words".as_bytes()
1294         );
1295 
1296         let (parsed, _) = parse_header(b"K: =?utf-8?q?glue?= \n =?utf-8?q?again?=").unwrap();
1297         assert_eq!(parsed.get_value(), "glueagain");
1298         assert_eq!(
1299             parsed.get_value_raw(),
1300             "=?utf-8?q?glue?= \n =?utf-8?q?again?=".as_bytes()
1301         );
1302     }
1303 
1304     #[test]
parse_multiple_headers()1305     fn parse_multiple_headers() {
1306         let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second").unwrap();
1307         assert_eq!(parsed.len(), 2);
1308         assert_eq!(parsed[0].key, b"Key");
1309         assert_eq!(parsed[0].value, b"Value");
1310         assert_eq!(parsed[1].key, b"Two");
1311         assert_eq!(parsed[1].value, b"Second");
1312 
1313         let (parsed, _) =
1314             parse_headers(b"Key: Value\n Overhang\nTwo: Second\nThree: Third").unwrap();
1315         assert_eq!(parsed.len(), 3);
1316         assert_eq!(parsed[0].key, b"Key");
1317         assert_eq!(parsed[0].value, b"Value\n Overhang");
1318         assert_eq!(parsed[1].key, b"Two");
1319         assert_eq!(parsed[1].value, b"Second");
1320         assert_eq!(parsed[2].key, b"Three");
1321         assert_eq!(parsed[2].value, b"Third");
1322 
1323         let (parsed, _) = parse_headers(b"Key: Value\nTwo: Second\n\nBody").unwrap();
1324         assert_eq!(parsed.len(), 2);
1325         assert_eq!(parsed[0].key, b"Key");
1326         assert_eq!(parsed[0].value, b"Value");
1327         assert_eq!(parsed[1].key, b"Two");
1328         assert_eq!(parsed[1].value, b"Second");
1329 
1330         let (parsed, _) = parse_headers(
1331             concat!(
1332                 "Return-Path: <kats@foobar.staktrace.com>\n",
1333                 "X-Original-To: kats@baz.staktrace.com\n",
1334                 "Delivered-To: kats@baz.staktrace.com\n",
1335                 "Received: from foobar.staktrace.com (localhost [127.0.0.1])\n",
1336                 "    by foobar.staktrace.com (Postfix) with ESMTP id \
1337                  139F711C1C34\n",
1338                 "    for <kats@baz.staktrace.com>; Fri, 27 May 2016 02:34:26 \
1339                  -0400 (EDT)\n",
1340                 "Date: Fri, 27 May 2016 02:34:25 -0400\n",
1341                 "To: kats@baz.staktrace.com\n",
1342                 "From: kats@foobar.staktrace.com\n",
1343                 "Subject: test Fri, 27 May 2016 02:34:25 -0400\n",
1344                 "X-Mailer: swaks v20130209.0 jetmore.org/john/code/swaks/\n",
1345                 "Message-Id: \
1346                  <20160527063426.139F711C1C34@foobar.staktrace.com>\n",
1347                 "\n",
1348                 "This is a test mailing\n"
1349             )
1350             .as_bytes(),
1351         )
1352         .unwrap();
1353         assert_eq!(parsed.len(), 10);
1354         assert_eq!(parsed[0].key, b"Return-Path");
1355         assert_eq!(parsed[9].key, b"Message-Id");
1356 
1357         let (parsed, _) =
1358             parse_headers(b"Key: Value\nAnotherKey: AnotherValue\nKey: Value2\nKey: Value3\n")
1359                 .unwrap();
1360         assert_eq!(parsed.len(), 4);
1361         assert_eq!(parsed.get_first_value("Key"), Some("Value".to_string()));
1362         assert_eq!(
1363             parsed.get_all_values("Key"),
1364             vec!["Value", "Value2", "Value3"]
1365         );
1366         assert_eq!(
1367             parsed.get_first_value("AnotherKey"),
1368             Some("AnotherValue".to_string())
1369         );
1370         assert_eq!(parsed.get_all_values("AnotherKey"), vec!["AnotherValue"]);
1371         assert_eq!(parsed.get_first_value("NoKey"), None);
1372         assert_eq!(parsed.get_all_values("NoKey"), Vec::<String>::new());
1373 
1374         let (parsed, _) = parse_headers(b"Key: value\r\nWith: CRLF\r\n\r\nBody").unwrap();
1375         assert_eq!(parsed.len(), 2);
1376         assert_eq!(parsed.get_first_value("Key"), Some("value".to_string()));
1377         assert_eq!(parsed.get_first_value("With"), Some("CRLF".to_string()));
1378 
1379         let (parsed, _) = parse_headers(b"Bad\nKey\n").unwrap();
1380         assert_eq!(parsed.len(), 2);
1381         assert_eq!(parsed.get_first_value("Bad"), Some("".to_string()));
1382         assert_eq!(parsed.get_first_value("Key"), Some("".to_string()));
1383 
1384         let (parsed, _) = parse_headers(b"K:V\nBad\nKey").unwrap();
1385         assert_eq!(parsed.len(), 3);
1386         assert_eq!(parsed.get_first_value("K"), Some("V".to_string()));
1387         assert_eq!(parsed.get_first_value("Bad"), Some("".to_string()));
1388         assert_eq!(parsed.get_first_value("Key"), Some("".to_string()));
1389     }
1390 
1391     #[test]
test_parse_content_type()1392     fn test_parse_content_type() {
1393         let ctype = parse_content_type("text/html; charset=utf-8");
1394         assert_eq!(ctype.mimetype, "text/html");
1395         assert_eq!(ctype.charset, "utf-8");
1396         assert_eq!(ctype.params.get("boundary"), None);
1397 
1398         let ctype = parse_content_type(" foo/bar; x=y; charset=\"fake\" ; x2=y2");
1399         assert_eq!(ctype.mimetype, "foo/bar");
1400         assert_eq!(ctype.charset, "fake");
1401         assert_eq!(ctype.params.get("boundary"), None);
1402 
1403         let ctype = parse_content_type(" multipart/bar; boundary=foo ");
1404         assert_eq!(ctype.mimetype, "multipart/bar");
1405         assert_eq!(ctype.charset, "us-ascii");
1406         assert_eq!(ctype.params.get("boundary").unwrap(), "foo");
1407     }
1408 
1409     #[test]
test_parse_content_disposition()1410     fn test_parse_content_disposition() {
1411         let dis = parse_content_disposition("inline");
1412         assert_eq!(dis.disposition, DispositionType::Inline);
1413         assert_eq!(dis.params.get("name"), None);
1414         assert_eq!(dis.params.get("filename"), None);
1415 
1416         let dis = parse_content_disposition(
1417             " attachment; x=y; charset=\"fake\" ; x2=y2; name=\"King Joffrey.death\"",
1418         );
1419         assert_eq!(dis.disposition, DispositionType::Attachment);
1420         assert_eq!(
1421             dis.params.get("name"),
1422             Some(&"King Joffrey.death".to_string())
1423         );
1424         assert_eq!(dis.params.get("filename"), None);
1425 
1426         let dis = parse_content_disposition(" form-data");
1427         assert_eq!(dis.disposition, DispositionType::FormData);
1428         assert_eq!(dis.params.get("name"), None);
1429         assert_eq!(dis.params.get("filename"), None);
1430     }
1431 
1432     #[test]
test_parse_mail()1433     fn test_parse_mail() {
1434         let mail = parse_mail(b"Key: value\r\n\r\nSome body stuffs").unwrap();
1435         assert_eq!(mail.header_bytes, b"Key: value\r\n\r\n");
1436         assert_eq!(mail.headers.len(), 1);
1437         assert_eq!(mail.headers[0].get_key(), "Key");
1438         assert_eq!(mail.headers[0].get_key_ref(), "Key");
1439         assert_eq!(mail.headers[0].get_value(), "value");
1440         assert_eq!(mail.ctype.mimetype, "text/plain");
1441         assert_eq!(mail.ctype.charset, "us-ascii");
1442         assert_eq!(mail.ctype.params.get("boundary"), None);
1443         assert_eq!(mail.body_bytes, b"Some body stuffs");
1444         assert_eq!(mail.get_body_raw().unwrap(), b"Some body stuffs");
1445         assert_eq!(mail.get_body().unwrap(), "Some body stuffs");
1446         assert_eq!(mail.subparts.len(), 0);
1447 
1448         let mail = parse_mail(
1449             concat!(
1450                 "Content-Type: MULTIpart/alternative; bounDAry=myboundary\r\n\r\n",
1451                 "--myboundary\r\n",
1452                 "Content-Type: text/plain\r\n\r\n",
1453                 "This is the plaintext version.\r\n",
1454                 "--myboundary\r\n",
1455                 "Content-Type: text/html;chARset=utf-8\r\n\r\n",
1456                 "This is the <b>HTML</b> version with fake --MYBOUNDARY.\r\n",
1457                 "--myboundary--"
1458             )
1459             .as_bytes(),
1460         )
1461         .unwrap();
1462         assert_eq!(mail.headers.len(), 1);
1463         assert_eq!(mail.headers[0].get_key(), "Content-Type");
1464         assert_eq!(mail.headers[0].get_key_ref(), "Content-Type");
1465         assert_eq!(mail.ctype.mimetype, "multipart/alternative");
1466         assert_eq!(mail.ctype.charset, "us-ascii");
1467         assert_eq!(mail.ctype.params.get("boundary").unwrap(), "myboundary");
1468         assert_eq!(mail.subparts.len(), 2);
1469         assert_eq!(mail.subparts[0].headers.len(), 1);
1470         assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain");
1471         assert_eq!(mail.subparts[0].ctype.charset, "us-ascii");
1472         assert_eq!(mail.subparts[0].ctype.params.get("boundary"), None);
1473         assert_eq!(mail.subparts[1].ctype.mimetype, "text/html");
1474         assert_eq!(mail.subparts[1].ctype.charset, "utf-8");
1475         assert_eq!(mail.subparts[1].ctype.params.get("boundary"), None);
1476 
1477         let mail =
1478             parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
1479         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1480         assert_eq!(mail.get_body().unwrap(), "hello world");
1481 
1482         let mail =
1483             parse_mail(b"Content-Type: text/plain; charset=x-unknown\r\n\r\nhello world").unwrap();
1484         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1485         assert_eq!(mail.get_body().unwrap(), "hello world");
1486 
1487         let mail = parse_mail(b"ConTENT-tyPE: text/html\r\n\r\nhello world").unwrap();
1488         assert_eq!(mail.ctype.mimetype, "text/html");
1489         assert_eq!(mail.get_body_raw().unwrap(), b"hello world");
1490         assert_eq!(mail.get_body().unwrap(), "hello world");
1491 
1492         let mail = parse_mail(
1493             b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-",
1494         ).unwrap();
1495         assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-");
1496         assert_eq!(mail.get_body().unwrap(), "\u{2603}");
1497 
1498         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap();
1499         assert_eq!(mail.get_body_raw().unwrap(), b"+JgM-");
1500         assert_eq!(mail.get_body().unwrap(), "\u{2603}");
1501     }
1502 
1503     #[test]
test_missing_terminating_boundary()1504     fn test_missing_terminating_boundary() {
1505         let mail = parse_mail(
1506             concat!(
1507                 "Content-Type: multipart/alternative; boundary=myboundary\r\n\r\n",
1508                 "--myboundary\r\n",
1509                 "Content-Type: text/plain\r\n\r\n",
1510                 "part0\r\n",
1511                 "--myboundary\r\n",
1512                 "Content-Type: text/html\r\n\r\n",
1513                 "part1\r\n"
1514             )
1515             .as_bytes(),
1516         )
1517         .unwrap();
1518         assert_eq!(mail.subparts[0].get_body().unwrap(), "part0\r\n");
1519         assert_eq!(mail.subparts[1].get_body().unwrap(), "part1\r\n");
1520     }
1521 
1522     #[test]
test_missing_body()1523     fn test_missing_body() {
1524         let parsed =
1525             parse_mail("Content-Type: multipart/related; boundary=\"----=_\"\n".as_bytes())
1526                 .unwrap();
1527         assert_eq!(parsed.headers[0].get_key(), "Content-Type");
1528         assert_eq!(parsed.get_body_raw().unwrap(), b"");
1529         assert_eq!(parsed.get_body().unwrap(), "");
1530     }
1531 
1532     #[test]
test_no_headers_in_subpart()1533     fn test_no_headers_in_subpart() {
1534         let mail = parse_mail(
1535             concat!(
1536                 "Content-Type: multipart/report; report-type=delivery-status;\n",
1537                 "\tboundary=\"1404630116.22555.postech.q0.x.x.x\"\n",
1538                 "\n",
1539                 "--1404630116.22555.postech.q0.x.x.x\n",
1540                 "\n",
1541                 "--1404630116.22555.postech.q0.x.x.x--\n"
1542             )
1543             .as_bytes(),
1544         )
1545         .unwrap();
1546         assert_eq!(mail.ctype.mimetype, "multipart/report");
1547         assert_eq!(mail.subparts[0].headers.len(), 0);
1548         assert_eq!(mail.subparts[0].ctype.mimetype, "text/plain");
1549         assert_eq!(mail.subparts[0].get_body_raw().unwrap(), b"");
1550         assert_eq!(mail.subparts[0].get_body().unwrap(), "");
1551     }
1552 
1553     #[test]
test_empty()1554     fn test_empty() {
1555         let mail = parse_mail("".as_bytes()).unwrap();
1556         assert_eq!(mail.get_body_raw().unwrap(), b"");
1557         assert_eq!(mail.get_body().unwrap(), "");
1558     }
1559 
1560     #[test]
test_dont_panic_for_value_with_new_lines()1561     fn test_dont_panic_for_value_with_new_lines() {
1562         let parsed = parse_param_content(r#"application/octet-stream; name=""#);
1563         assert_eq!(parsed.params["name"], "\"");
1564     }
1565 
1566     #[test]
test_parameter_value_continuations()1567     fn test_parameter_value_continuations() {
1568         let parsed = parse_param_content("attachment;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\"");
1569         assert_eq!(parsed.value, "attachment");
1570         assert_eq!(parsed.params["filename"], "XY.pdf");
1571         assert_eq!(parsed.params.contains_key("filename*0"), false);
1572         assert_eq!(parsed.params.contains_key("filename*1"), false);
1573 
1574         let parsed = parse_param_content("attachment;\n\tfilename=XX.pdf;\n\tfilename*0=\"X\";\n\tfilename*1=\"Y.pdf\"");
1575         assert_eq!(parsed.value, "attachment");
1576         assert_eq!(parsed.params["filename"], "XX.pdf");
1577         assert_eq!(parsed.params["filename*0"], "X");
1578         assert_eq!(parsed.params["filename*1"], "Y.pdf");
1579 
1580         let parsed = parse_param_content("attachment; filename*1=\"Y.pdf\"");
1581         assert_eq!(parsed.params["filename*1"], "Y.pdf");
1582         assert_eq!(parsed.params.contains_key("filename"), false);
1583     }
1584 
1585     #[test]
test_parameter_encodings()1586     fn test_parameter_encodings() {
1587         let parsed = parse_param_content("attachment;\n\tfilename*0*=us-ascii''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf");
1588         // Note this is a real-world case from mutt, but it's wrong. The original filename had an en dash \u{2013} but mutt
1589         // declared us-ascii as the encoding instead of utf-8 for some reason.
1590         assert_eq!(parsed.params["filename"], "(X) 801 - X \u{00E2}\u{20AC}\u{201C} X X X.pdf");
1591         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1592         assert_eq!(parsed.params.contains_key("filename*0"), false);
1593         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1594         assert_eq!(parsed.params.contains_key("filename*1"), false);
1595         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1596         assert_eq!(parsed.params.contains_key("filename*2"), false);
1597 
1598         // Here is the corrected version.
1599         let parsed = parse_param_content("attachment;\n\tfilename*0*=utf-8''%28X%29%20801%20-%20X;\n\tfilename*1*=%20%E2%80%93%20X%20;\n\tfilename*2*=X%20X%2Epdf");
1600         assert_eq!(parsed.params["filename"], "(X) 801 - X \u{2013} X X X.pdf");
1601         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1602         assert_eq!(parsed.params.contains_key("filename*0"), false);
1603         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1604         assert_eq!(parsed.params.contains_key("filename*1"), false);
1605         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1606         assert_eq!(parsed.params.contains_key("filename*2"), false);
1607         let parsed = parse_param_content("attachment; filename*=utf-8'en'%e2%80%A1.bin");
1608         assert_eq!(parsed.params["filename"], "\u{2021}.bin");
1609         assert_eq!(parsed.params.contains_key("filename*"), false);
1610 
1611         let parsed = parse_param_content("attachment; filename*='foo'%e2%80%A1.bin");
1612         assert_eq!(parsed.params["filename*"], "'foo'%e2%80%A1.bin");
1613         assert_eq!(parsed.params.contains_key("filename"), false);
1614 
1615         let parsed = parse_param_content("attachment; filename*=nonexistent'foo'%e2%80%a1.bin");
1616         assert_eq!(parsed.params["filename*"], "nonexistent'foo'%e2%80%a1.bin");
1617         assert_eq!(parsed.params.contains_key("filename"), false);
1618 
1619         let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1*=%e2%80%A1.bin");
1620         assert_eq!(parsed.params["filename"], "\u{2021}\u{2021}.bin");
1621         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1622         assert_eq!(parsed.params.contains_key("filename*0"), false);
1623         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1624         assert_eq!(parsed.params.contains_key("filename*1"), false);
1625 
1626         let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*1=%20.bin");
1627         assert_eq!(parsed.params["filename"], "\u{2021}%20.bin");
1628         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1629         assert_eq!(parsed.params.contains_key("filename*0"), false);
1630         assert_eq!(parsed.params.contains_key("filename*1*"), false);
1631         assert_eq!(parsed.params.contains_key("filename*1"), false);
1632 
1633         let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*2*=%20.bin");
1634         assert_eq!(parsed.params["filename"], "\u{2021}");
1635         assert_eq!(parsed.params["filename*2"], " .bin");
1636         assert_eq!(parsed.params.contains_key("filename*0*"), false);
1637         assert_eq!(parsed.params.contains_key("filename*0"), false);
1638         assert_eq!(parsed.params.contains_key("filename*2*"), false);
1639 
1640         let parsed = parse_param_content("attachment; filename*0*=utf-8'en'%e2%80%a1; filename*0=foo.bin");
1641         assert_eq!(parsed.params["filename"], "foo.bin");
1642         assert_eq!(parsed.params["filename*0*"], "utf-8'en'%e2%80%a1");
1643         assert_eq!(parsed.params.contains_key("filename*0"), false);
1644 
1645     }
1646 
1647     #[test]
test_default_content_encoding()1648     fn test_default_content_encoding() {
1649         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap();
1650         let body = mail.get_body_encoded();
1651         match body {
1652             Body::SevenBit(body) => {
1653                 assert_eq!(body.get_raw(), b"+JgM-");
1654                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1655             }
1656             _ => assert!(false),
1657         };
1658     }
1659 
1660     #[test]
test_7bit_content_encoding()1661     fn test_7bit_content_encoding() {
1662         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 7bit\r\n\r\n+JgM-").unwrap();
1663         let body = mail.get_body_encoded();
1664         match body {
1665             Body::SevenBit(body) => {
1666                 assert_eq!(body.get_raw(), b"+JgM-");
1667                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1668             }
1669             _ => assert!(false),
1670         };
1671     }
1672 
1673     #[test]
test_8bit_content_encoding()1674     fn test_8bit_content_encoding() {
1675         let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 8bit\r\n\r\n+JgM-").unwrap();
1676         let body = mail.get_body_encoded();
1677         match body {
1678             Body::EightBit(body) => {
1679                 assert_eq!(body.get_raw(), b"+JgM-");
1680                 assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
1681             }
1682             _ => assert!(false),
1683         };
1684     }
1685 
1686     #[test]
test_quoted_printable_content_encoding()1687     fn test_quoted_printable_content_encoding() {
1688         let mail = parse_mail(
1689             b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-",
1690         ).unwrap();
1691         match mail.get_body_encoded() {
1692             Body::QuotedPrintable(body) => {
1693                 assert_eq!(body.get_raw(), b"+JgM-");
1694                 assert_eq!(body.get_decoded().unwrap(), b"+JgM-");
1695                 assert_eq!(body.get_decoded_as_string().unwrap(), "\u{2603}");
1696             }
1697             _ => assert!(false),
1698         };
1699     }
1700 
1701     #[test]
test_base64_content_encoding()1702     fn test_base64_content_encoding() {
1703         let mail =
1704             parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
1705         match mail.get_body_encoded() {
1706             Body::Base64(body) => {
1707                 assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
1708                 assert_eq!(body.get_decoded().unwrap(), b"hello world");
1709                 assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
1710             }
1711             _ => assert!(false),
1712         };
1713     }
1714 
1715     #[test]
test_binary_content_encoding()1716     fn test_binary_content_encoding() {
1717         let mail = parse_mail(b"Content-Transfer-Encoding: binary\r\n\r\n######").unwrap();
1718         let body = mail.get_body_encoded();
1719         match body {
1720             Body::Binary(body) => {
1721                 assert_eq!(body.get_raw(), b"######");
1722             }
1723             _ => assert!(false),
1724         };
1725     }
1726 
1727     #[test]
test_body_content_encoding_with_multipart()1728     fn test_body_content_encoding_with_multipart() {
1729         let mail_filepath = "./tests/files/test_email_01.txt";
1730         let mail = std::fs::read(mail_filepath)
1731             .expect(&format!("Unable to open the file [{}]", mail_filepath));
1732         let mail = parse_mail(&mail).unwrap();
1733 
1734         let subpart_0 = mail.subparts.get(0).unwrap();
1735         match subpart_0.get_body_encoded() {
1736             Body::SevenBit(body) => {
1737                 assert_eq!(
1738                     body.get_as_string().unwrap().trim(),
1739                     "<html>Test with attachments</html>"
1740                 );
1741             }
1742             _ => assert!(false),
1743         };
1744 
1745         let subpart_1 = mail.subparts.get(1).unwrap();
1746         match subpart_1.get_body_encoded() {
1747             Body::Base64(body) => {
1748                 let pdf_filepath = "./tests/files/test_email_01_sample.pdf";
1749                 let original_pdf = std::fs::read(pdf_filepath)
1750                     .expect(&format!("Unable to open the file [{}]", pdf_filepath));
1751                 assert_eq!(body.get_decoded().unwrap(), original_pdf);
1752             }
1753             _ => assert!(false),
1754         };
1755 
1756         let subpart_2 = mail.subparts.get(2).unwrap();
1757         match subpart_2.get_body_encoded() {
1758             Body::Base64(body) => {
1759                 assert_eq!(
1760                     body.get_decoded_as_string().unwrap(),
1761                     "txt file context for email collector\n1234567890987654321\n"
1762                 );
1763             }
1764             _ => assert!(false),
1765         };
1766     }
1767 
1768     #[test]
test_fuzzer_testcase()1769     fn test_fuzzer_testcase() {
1770         const INPUT: &'static str = "U3ViamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IW11bAAAAAAAAAAAamVjdDplcy1UeXBlOiBtdW50ZW50LVV5cGU6IG11bAAAAAAAAAAAAAAAAABTTUFZdWJqZf86OiP/dCBTdWJqZWN0Ol8KRGF0ZTog/////////////////////wAAAAAAAAAAAHQgYnJmAHQgYnJmZXItRW5jeXBlOnY9NmU3OjA2OgAAAAAAAAAAAAAAADEAAAAAAP/8mAAAAAAAAAAA+f///wAAAAAAAP8AAAAAAAAAAAAAAAAAAAAAAAAAPT0/PzEAAAEAAA==";
1771 
1772         if let Ok(parsed) = parse_mail(&base64::decode(INPUT).unwrap()) {
1773             if let Some(date) = parsed.headers.get_first_value("Date") {
1774                 let _ = dateparse(&date);
1775             }
1776         }
1777     }
1778 
1779     #[test]
test_fuzzer_testcase_2()1780     fn test_fuzzer_testcase_2() {
1781         const INPUT: &'static str = "U3ViamVjdDogVGhpcyBpcyBhIHRlc3QgZW1haWwKQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5PczMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMZm9vYmFyCkRhdGU6IFN1biwgMDIgT2MKCi1TdWJqZWMtZm9vYmFydDo=";
1782         if let Ok(parsed) = parse_mail(&base64::decode(INPUT).unwrap()) {
1783             if let Some(date) = parsed.headers.get_first_value("Date") {
1784                 let _ = dateparse(&date);
1785             }
1786         }
1787     }
1788 
1789     #[test]
test_header_split()1790     fn test_header_split() {
1791         let mail = parse_mail(
1792             b"Content-Type: text/plain;\r\ncharset=\"utf-8\"\r\nContent-Transfer-Encoding: 8bit\r\n\r\n",
1793         ).unwrap();
1794         assert_eq!(mail.ctype.mimetype, "text/plain");
1795         assert_eq!(mail.ctype.charset, "us-ascii");
1796     }
1797 
1798     #[test]
test_percent_decoder()1799     fn test_percent_decoder() {
1800         assert_eq!(percent_decode("hi %0d%0A%%2A%zz%"), b"hi \r\n%*%zz%");
1801     }
1802 }
1803