1 #![forbid(unsafe_code)]
2 
3 use std::fmt;
4 use std::error;
5 
6 const LINE_LENGTH_LIMIT: usize = 76;
7 
8 static HEX_CHARS: &[char] = &[
9     '0',
10     '1',
11     '2',
12     '3',
13     '4',
14     '5',
15     '6',
16     '7',
17     '8',
18     '9',
19     'A',
20     'B',
21     'C',
22     'D',
23     'E',
24     'F',
25 ];
26 
27 /// A flag that allows control over the decoding strictness.
28 #[derive(Debug)]
29 #[derive(PartialEq)]
30 pub enum ParseMode {
31     /// Perform strict checking over the input, and return an error if any
32     /// input appears malformed.
33     Strict,
34     /// Perform robust parsing, and gracefully handle any malformed input. This
35     /// can result in the decoded output being different than what was intended.
36     Robust,
37 }
38 
39 /// An error type that represents different kinds of decoding errors.
40 #[derive(Debug)]
41 pub enum QuotedPrintableError {
42     /// A byte was found in the input that was outside of the allowed range. The
43     /// allowed range is the horizontal tab (ASCII 0x09), CR/LF characters (ASCII
44     /// 0x0D and 0x0A), and anything in the ASCII range 0x20 to 0x7E, inclusive.
45     InvalidByte,
46     /// Lines where found in the input that exceeded 76 bytes in length, excluding
47     /// the terminating CRLF.
48     LineTooLong,
49     /// An '=' character was found in the input without the proper number of
50     /// hex-characters following it. This includes '=' characters followed
51     /// by a single character and then the CRLF pair, for example.
52     IncompleteHexOctet,
53     /// An '=' character was found with two following characters, but they were
54     /// not hex characters. '=Hi' for example would be an invalid encoding.
55     InvalidHexOctet,
56     /// An '=' character was found with two following hex characters, but the
57     /// hex characters were lowercase rather than uppercase. The spec explicitly
58     /// requires uppercase hex to be used, so this is considered an error.
59     LowercaseHexOctet,
60 }
61 
62 impl fmt::Display for QuotedPrintableError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result63     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64         match *self {
65             QuotedPrintableError::InvalidByte => {
66                 write!(
67                     f,
68                     "A unallowed byte was found in the quoted-printable input"
69                 )
70             }
71             QuotedPrintableError::LineTooLong => {
72                 write!(
73                     f,
74                     "A line length in the quoted-printed input exceeded 76 bytes"
75                 )
76             }
77             QuotedPrintableError::IncompleteHexOctet => {
78                 write!(
79                     f,
80                     "A '=' followed by only one character was found in the input"
81                 )
82             }
83             QuotedPrintableError::InvalidHexOctet => {
84                 write!(
85                     f,
86                     "A '=' followed by non-hex characters was found in the input"
87                 )
88             }
89             QuotedPrintableError::LowercaseHexOctet => {
90                 write!(f, "A '=' was followed by lowercase hex characters")
91             }
92         }
93     }
94 }
95 
96 impl error::Error for QuotedPrintableError {
description(&self) -> &str97     fn description(&self) -> &str {
98         "invalid quoted-printable input"
99     }
100 
cause(&self) -> Option<&dyn error::Error>101     fn cause(&self) -> Option<&dyn error::Error> {
102         None
103     }
104 }
105 
106 
107 /// Decodes a piece of quoted-printable data.
108 ///
109 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
110 /// 6.7. This function attempts to decode input that is conformant with that
111 /// spec. Note that quoted-printable encoding is independent of charset, and so
112 /// this function returns a Vec<u8> of bytes upon success. It is up to the caller
113 /// to convert that to a String if desired; the charset required to do so must
114 /// come from somewhere else.
115 ///
116 /// # Examples
117 ///
118 /// ```
119 ///     use quoted_printable::{decode, ParseMode};
120 ///     let decoded = decode("hello=3Dworld=0D=0A".as_bytes(), ParseMode::Robust).unwrap();
121 ///     assert_eq!("hello=world\r\n", String::from_utf8(decoded).unwrap());
122 /// ```
123 ///
124 /// # Errors
125 ///
126 /// If this function is called with ParseMode::Strict, then it may return
127 /// a QuotedPrintableError if it detects that the input does not strictly conform
128 /// to the quoted-printable spec. If this function is called with ParseMode::Robust,
129 /// then it will attempt to gracefully handle any errors that arise. This might
130 /// result in input bytes being stripped out and ignored in some cases. Refer
131 /// to IETF RFC 2045, section 6.7 for details on what constitutes valid and
132 /// invalid input, and what a "robust" implementation would do in the face of
133 /// invalid input.
134 #[inline(always)]
decode<R: AsRef<[u8]>>(input: R, mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError>135 pub fn decode<R: AsRef<[u8]>>(input: R, mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError> {
136     _decode(input.as_ref(), mode)
137 }
138 
_decode(input: &[u8], mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError>139 fn _decode(input: &[u8], mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError> {
140     let filtered = input
141         .into_iter()
142         .filter_map(|&c| match c {
143             b'\t' | b'\r' | b'\n' | b' '..=b'~' => Some(c as char),
144             _ => None,
145         })
146         .collect::<String>();
147     if mode == ParseMode::Strict && filtered.len() != input.len() {
148         return Err(QuotedPrintableError::InvalidByte);
149     }
150     let mut decoded = Vec::new();
151     let mut lines = filtered.lines();
152     let mut add_line_break = None;
153     loop {
154         let mut bytes = match lines.next() {
155             Some(v) => v.trim_end().bytes(),
156             None => {
157                 if mode == ParseMode::Strict && add_line_break == Some(false) {
158                     return Err(QuotedPrintableError::IncompleteHexOctet);
159                 }
160                 break;
161             }
162         };
163 
164         if mode == ParseMode::Strict && bytes.len() > LINE_LENGTH_LIMIT {
165             return Err(QuotedPrintableError::LineTooLong);
166         }
167 
168         if add_line_break == Some(true) {
169             decoded.push(b'\r');
170             decoded.push(b'\n');
171             add_line_break = Some(false);
172         }
173 
174         loop {
175             let byte = match bytes.next() {
176                 Some(v) => v,
177                 None => {
178                     add_line_break = Some(true);
179                     break;
180                 }
181             };
182 
183             if byte == b'=' {
184                 let upper = match bytes.next() {
185                     Some(v) => v,
186                     None => break,
187                 };
188                 let lower = match bytes.next() {
189                     Some(v) => v,
190                     None => {
191                         if mode == ParseMode::Strict {
192                             return Err(QuotedPrintableError::IncompleteHexOctet);
193                         }
194                         decoded.push(byte);
195                         decoded.push(upper);
196                         add_line_break = Some(true);
197                         break;
198                     }
199                 };
200                 let upper_char = upper as char;
201                 let lower_char = lower as char;
202                 if upper_char.is_digit(16) && lower_char.is_digit(16) {
203                     if mode == ParseMode::Strict {
204                         if upper_char.to_uppercase().next() != Some(upper_char) ||
205                             lower_char.to_uppercase().next() != Some(lower_char)
206                         {
207                             return Err(QuotedPrintableError::LowercaseHexOctet);
208                         }
209                     }
210                     let combined = upper_char.to_digit(16).unwrap() << 4 |
211                         lower_char.to_digit(16).unwrap();
212                     decoded.push(combined as u8);
213                 } else {
214                     if mode == ParseMode::Strict {
215                         return Err(QuotedPrintableError::InvalidHexOctet);
216                     }
217                     decoded.push(byte);
218                     decoded.push(upper);
219                     decoded.push(lower);
220                 }
221             } else {
222                 decoded.push(byte);
223             }
224         }
225     }
226     Ok(decoded)
227 }
228 
append( result: &mut String, to_append: &[char], bytes_on_line: &mut usize, backup_pos: &mut usize, )229 fn append(
230     result: &mut String,
231     to_append: &[char],
232     bytes_on_line: &mut usize,
233     backup_pos: &mut usize,
234 ) {
235     if *bytes_on_line + to_append.len() > LINE_LENGTH_LIMIT {
236         if *bytes_on_line == LINE_LENGTH_LIMIT {
237             // We're already at the max length, so inserting the '=' in the soft
238             // line break would put us over. Instead, we insert the soft line
239             // break at the backup pos, which is just before the last thing
240             // appended.
241             *bytes_on_line = result.len() - *backup_pos;
242             result.insert_str(*backup_pos, "=\r\n");
243         } else {
244             result.push_str("=\r\n");
245             *bytes_on_line = 0;
246         }
247     }
248     result.extend(to_append);
249     *bytes_on_line = *bytes_on_line + to_append.len();
250     *backup_pos = result.len() - to_append.len();
251 }
252 
253 /// Encodes some bytes into quoted-printable format.
254 ///
255 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
256 /// 6.7. This function encodes a set of raw bytes into a format conformant with
257 /// that spec. The output contains CRLF pairs as needed so that each line is
258 /// wrapped to 76 characters or less (not including the CRLF).
259 ///
260 /// # Examples
261 ///
262 /// ```
263 ///     use quoted_printable::encode;
264 ///     let encoded = encode("hello, \u{20ac} zone!");
265 ///     assert_eq!("hello, =E2=82=AC zone!", String::from_utf8(encoded).unwrap());
266 /// ```
267 #[inline(always)]
encode<R: AsRef<[u8]>>(input: R) -> Vec<u8>268 pub fn encode<R: AsRef<[u8]>>(input: R) -> Vec<u8> {
269     let encoded_as_string = _encode(input.as_ref());
270     encoded_as_string.into()
271 }
272 
_encode(input: &[u8]) -> String273 fn _encode(input: &[u8]) -> String {
274     let mut result = String::new();
275     let mut on_line: usize = 0;
276     let mut backup_pos: usize = 0;
277     let mut was_cr = false;
278     let mut it = input.iter();
279 
280     while let Some(&byte) = it.next() {
281         if was_cr {
282             if byte == b'\n' {
283                 result.push_str("\r\n");
284                 on_line = 0;
285                 was_cr = false;
286                 continue;
287             }
288             // encode the CR ('\r') we skipped over before
289             append(&mut result, &['=', '0', 'D'], &mut on_line, &mut backup_pos);
290         }
291         if byte == b'\r' {
292             // remember we had a CR ('\r') but do not encode it yet
293             was_cr = true;
294             continue;
295         } else {
296             was_cr = false;
297         }
298         encode_byte(&mut result, byte, &mut on_line, &mut backup_pos);
299     }
300 
301     // we haven't yet encoded the last CR ('\r') so do it now
302     if was_cr {
303         append(&mut result, &['=', '0', 'D'], &mut on_line, &mut backup_pos);
304     }
305 
306     result
307 }
308 
309 /// Encodes some bytes into quoted-printable format.
310 ///
311 /// The difference to `encode` is that this function returns a `String`.
312 ///
313 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
314 /// 6.7. This function encodes a set of raw bytes into a format conformant with
315 /// that spec. The output contains CRLF pairs as needed so that each line is
316 /// wrapped to 76 characters or less (not including the CRLF).
317 ///
318 /// # Examples
319 ///
320 /// ```
321 ///     use quoted_printable::encode_to_str;
322 ///     let encoded = encode_to_str("hello, \u{20ac} zone!");
323 ///     assert_eq!("hello, =E2=82=AC zone!", encoded);
324 /// ```
325 #[inline(always)]
encode_to_str<R: AsRef<[u8]>>(input: R) -> String326 pub fn encode_to_str<R: AsRef<[u8]>>(input: R) -> String {
327     _encode(input.as_ref())
328 }
329 
330 #[inline]
encode_byte(result: &mut String, to_append: u8, on_line: &mut usize, backup_pos: &mut usize)331 fn encode_byte(result: &mut String, to_append: u8, on_line: &mut usize, backup_pos: &mut usize) {
332     match to_append {
333         b'=' => append(result, &['=', '3', 'D'], on_line, backup_pos),
334         b'\t' | b' '..=b'~' => append(result, &[char::from(to_append)], on_line, backup_pos),
335         _ => append(result, &hex_encode_byte(to_append), on_line, backup_pos),
336     }
337 }
338 
339 #[inline(always)]
hex_encode_byte(byte: u8) -> [char; 3]340 fn hex_encode_byte(byte: u8) -> [char; 3] {
341     [
342         '=',
343         lower_nibble_to_hex(byte >> 4),
344         lower_nibble_to_hex(byte),
345     ]
346 }
347 
348 #[inline(always)]
lower_nibble_to_hex(half_byte: u8) -> char349 fn lower_nibble_to_hex(half_byte: u8) -> char {
350     HEX_CHARS[(half_byte & 0x0F) as usize]
351 }
352 
353 #[cfg(test)]
354 mod tests {
355     use super::*;
356 
357     #[test]
test_decode()358     fn test_decode() {
359         assert_eq!(
360             "hello world",
361             String::from_utf8(decode("hello world", ParseMode::Strict).unwrap()).unwrap()
362         );
363         assert_eq!(
364             "Now's the time for all folk to come to the aid of their country.",
365             String::from_utf8(
366                 decode(
367                     "Now's the time =\r\nfor all folk to come=\r\n \
368                                                  to the aid of their country.",
369                     ParseMode::Strict,
370                 ).unwrap(),
371             ).unwrap()
372         );
373         assert_eq!(
374             "\r\nhello=world",
375             String::from_utf8(decode("=0D=0Ahello=3Dworld", ParseMode::Strict).unwrap()).unwrap()
376         );
377         assert_eq!(
378             "hello world\r\ngoodbye world",
379             String::from_utf8(
380                 decode("hello world\r\ngoodbye world", ParseMode::Strict).unwrap(),
381             ).unwrap()
382         );
383         assert_eq!(
384             "hello world\r\ngoodbye world",
385             String::from_utf8(
386                 decode("hello world   \r\ngoodbye world   ", ParseMode::Strict).unwrap(),
387             ).unwrap()
388         );
389         assert_eq!(
390             "hello world\r\ngoodbye world x",
391             String::from_utf8(
392                 decode(
393                     "hello world   \r\ngoodbye world =  \r\nx",
394                     ParseMode::Strict,
395                 ).unwrap(),
396             ).unwrap()
397         );
398 
399         assert_eq!(true, decode("hello world=x", ParseMode::Strict).is_err());
400         assert_eq!(
401             "hello world=x",
402             String::from_utf8(decode("hello world=x", ParseMode::Robust).unwrap()).unwrap()
403         );
404 
405         assert_eq!(true, decode("hello =world=", ParseMode::Strict).is_err());
406         assert_eq!(
407             "hello =world",
408             String::from_utf8(decode("hello =world=", ParseMode::Robust).unwrap()).unwrap()
409         );
410 
411         assert_eq!(true, decode("hello world=3d", ParseMode::Strict).is_err());
412         assert_eq!(
413             "hello world=",
414             String::from_utf8(decode("hello world=3d", ParseMode::Robust).unwrap()).unwrap()
415         );
416 
417         assert_eq!(true, decode("hello world=3m", ParseMode::Strict).is_err());
418         assert_eq!(
419             "hello world=3m",
420             String::from_utf8(decode("hello world=3m", ParseMode::Robust).unwrap()).unwrap()
421         );
422 
423         assert_eq!(true, decode("hello\u{FF}world", ParseMode::Strict).is_err());
424         assert_eq!(
425             "helloworld",
426             String::from_utf8(decode("hello\u{FF}world", ParseMode::Robust).unwrap()).unwrap()
427         );
428 
429         assert_eq!(
430             true,
431             decode(
432                 "12345678901234567890123456789012345678901234567890123456789012345678901234567",
433                 ParseMode::Strict,
434             ).is_err()
435         );
436         assert_eq!(
437             "12345678901234567890123456789012345678901234567890123456789012345678901234567",
438             String::from_utf8(
439                 decode(
440                     "12345678901234567890123456789012345678901234567890123456789012345678901234567",
441                     ParseMode::Robust,
442                 ).unwrap(),
443             ).unwrap()
444         );
445         assert_eq!(
446             "1234567890123456789012345678901234567890123456789012345678901234567890123456",
447             String::from_utf8(
448                 decode(
449                     "1234567890123456789012345678901234567890123456789012345678901234567890123456",
450                     ParseMode::Strict,
451                 ).unwrap(),
452             ).unwrap()
453         );
454     }
455 
456     #[test]
test_encode()457     fn test_encode() {
458         assert_eq!("hello, world!", encode_to_str("hello, world!".as_bytes()));
459         assert_eq!(
460             "hello,=0Cworld!",
461             encode_to_str("hello,\u{c}world!".as_bytes())
462         );
463         assert_eq!(
464             "this=00is=C3=BFa=3Dlong=0Dstring=0Athat gets wrapped and stuff, \
465                     woohoo!=C3=\r\n=89",
466             encode_to_str(
467                 "this\u{0}is\u{FF}a=long\rstring\nthat gets \
468                                              wrapped and stuff, woohoo!\u{c9}",
469             )
470         );
471         assert_eq!(
472             "this=00is=C3=BFa=3Dlong=0Dstring=0Athat just fits in a line,   woohoo!=C3=89",
473             encode_to_str(
474                 "this\u{0}is\u{FF}a=long\rstring\nthat just fits \
475                                              in a line,   woohoo!\u{c9}",
476             )
477         );
478         assert_eq!(
479             "this \r\nhas linebreaks\r\n built right in.",
480             encode_to_str("this \r\nhas linebreaks\r\n built right in.")
481         );
482         // Test that soft line breaks get inserted at the right place
483         assert_eq!(
484             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
485             encode_to_str(
486                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
487             )
488         );
489         assert_eq!(
490             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\nXY",
491             encode_to_str(
492                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
493             )
494         );
495         assert_eq!(
496             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\nXXY",
497             encode_to_str(
498                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
499             )
500         );
501         // Test that soft line breaks don't break up an encoded octet
502         assert_eq!(
503             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=00Y",
504             encode_to_str(
505                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
506             )
507         );
508         assert_eq!(
509             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
510             encode_to_str(
511                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
512             )
513         );
514         assert_eq!(
515             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
516             encode_to_str(
517                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
518             )
519         );
520         assert_eq!(
521             "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
522             encode_to_str(
523                 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
524             )
525         );
526         assert_eq!("=0D=3D", encode_to_str("\r="));
527         assert_eq!("=0D\r\n", encode_to_str("\r\r\n"));
528         assert_eq!("a=0D\r\nb", encode_to_str("a\r\r\nb"));
529         assert_eq!("=0D", encode_to_str("\r"));
530         assert_eq!("=0D=0D", encode_to_str("\r\r"));
531     }
532 
533     #[test]
test_lower_nibble_to_hex()534     fn test_lower_nibble_to_hex() {
535         let test_data: &[(u8, char, char)] = &[
536             (0, '0', '0'),
537             (1, '0', '1'),
538             (9, '0', '9'),
539             (10, '0', 'A'),
540             (15, '0', 'F'),
541             (16, '1', '0'),
542             (255, 'F', 'F'),
543         ];
544 
545         for &(nr, high, low) in test_data.iter() {
546             let got_high = lower_nibble_to_hex(nr >> 4);
547             assert_eq!(high, got_high);
548             let got_low = lower_nibble_to_hex(nr);
549             assert_eq!(low, got_low);
550         }
551     }
552 }
553