1 #![forbid(unsafe_code)]
2
3 use std::fmt;
4 use std::error;
5
6 const LINE_LENGTH_LIMIT: usize = 76;
7
8 static HEX_CHARS: &[char] = &[
9 '0',
10 '1',
11 '2',
12 '3',
13 '4',
14 '5',
15 '6',
16 '7',
17 '8',
18 '9',
19 'A',
20 'B',
21 'C',
22 'D',
23 'E',
24 'F',
25 ];
26
27 /// A flag that allows control over the decoding strictness.
28 #[derive(Debug)]
29 #[derive(PartialEq)]
30 pub enum ParseMode {
31 /// Perform strict checking over the input, and return an error if any
32 /// input appears malformed.
33 Strict,
34 /// Perform robust parsing, and gracefully handle any malformed input. This
35 /// can result in the decoded output being different than what was intended.
36 Robust,
37 }
38
39 /// An error type that represents different kinds of decoding errors.
40 #[derive(Debug)]
41 pub enum QuotedPrintableError {
42 /// A byte was found in the input that was outside of the allowed range. The
43 /// allowed range is the horizontal tab (ASCII 0x09), CR/LF characters (ASCII
44 /// 0x0D and 0x0A), and anything in the ASCII range 0x20 to 0x7E, inclusive.
45 InvalidByte,
46 /// Lines where found in the input that exceeded 76 bytes in length, excluding
47 /// the terminating CRLF.
48 LineTooLong,
49 /// An '=' character was found in the input without the proper number of
50 /// hex-characters following it. This includes '=' characters followed
51 /// by a single character and then the CRLF pair, for example.
52 IncompleteHexOctet,
53 /// An '=' character was found with two following characters, but they were
54 /// not hex characters. '=Hi' for example would be an invalid encoding.
55 InvalidHexOctet,
56 /// An '=' character was found with two following hex characters, but the
57 /// hex characters were lowercase rather than uppercase. The spec explicitly
58 /// requires uppercase hex to be used, so this is considered an error.
59 LowercaseHexOctet,
60 }
61
62 impl fmt::Display for QuotedPrintableError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result63 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64 match *self {
65 QuotedPrintableError::InvalidByte => {
66 write!(
67 f,
68 "A unallowed byte was found in the quoted-printable input"
69 )
70 }
71 QuotedPrintableError::LineTooLong => {
72 write!(
73 f,
74 "A line length in the quoted-printed input exceeded 76 bytes"
75 )
76 }
77 QuotedPrintableError::IncompleteHexOctet => {
78 write!(
79 f,
80 "A '=' followed by only one character was found in the input"
81 )
82 }
83 QuotedPrintableError::InvalidHexOctet => {
84 write!(
85 f,
86 "A '=' followed by non-hex characters was found in the input"
87 )
88 }
89 QuotedPrintableError::LowercaseHexOctet => {
90 write!(f, "A '=' was followed by lowercase hex characters")
91 }
92 }
93 }
94 }
95
96 impl error::Error for QuotedPrintableError {
description(&self) -> &str97 fn description(&self) -> &str {
98 "invalid quoted-printable input"
99 }
100
cause(&self) -> Option<&dyn error::Error>101 fn cause(&self) -> Option<&dyn error::Error> {
102 None
103 }
104 }
105
106
107 /// Decodes a piece of quoted-printable data.
108 ///
109 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
110 /// 6.7. This function attempts to decode input that is conformant with that
111 /// spec. Note that quoted-printable encoding is independent of charset, and so
112 /// this function returns a Vec<u8> of bytes upon success. It is up to the caller
113 /// to convert that to a String if desired; the charset required to do so must
114 /// come from somewhere else.
115 ///
116 /// # Examples
117 ///
118 /// ```
119 /// use quoted_printable::{decode, ParseMode};
120 /// let decoded = decode("hello=3Dworld=0D=0A".as_bytes(), ParseMode::Robust).unwrap();
121 /// assert_eq!("hello=world\r\n", String::from_utf8(decoded).unwrap());
122 /// ```
123 ///
124 /// # Errors
125 ///
126 /// If this function is called with ParseMode::Strict, then it may return
127 /// a QuotedPrintableError if it detects that the input does not strictly conform
128 /// to the quoted-printable spec. If this function is called with ParseMode::Robust,
129 /// then it will attempt to gracefully handle any errors that arise. This might
130 /// result in input bytes being stripped out and ignored in some cases. Refer
131 /// to IETF RFC 2045, section 6.7 for details on what constitutes valid and
132 /// invalid input, and what a "robust" implementation would do in the face of
133 /// invalid input.
134 #[inline(always)]
decode<R: AsRef<[u8]>>(input: R, mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError>135 pub fn decode<R: AsRef<[u8]>>(input: R, mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError> {
136 _decode(input.as_ref(), mode)
137 }
138
_decode(input: &[u8], mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError>139 fn _decode(input: &[u8], mode: ParseMode) -> Result<Vec<u8>, QuotedPrintableError> {
140 let filtered = input
141 .into_iter()
142 .filter_map(|&c| match c {
143 b'\t' | b'\r' | b'\n' | b' '..=b'~' => Some(c as char),
144 _ => None,
145 })
146 .collect::<String>();
147 if mode == ParseMode::Strict && filtered.len() != input.len() {
148 return Err(QuotedPrintableError::InvalidByte);
149 }
150 let mut decoded = Vec::new();
151 let mut lines = filtered.lines();
152 let mut add_line_break = None;
153 loop {
154 let mut bytes = match lines.next() {
155 Some(v) => v.trim_end().bytes(),
156 None => {
157 if mode == ParseMode::Strict && add_line_break == Some(false) {
158 return Err(QuotedPrintableError::IncompleteHexOctet);
159 }
160 break;
161 }
162 };
163
164 if mode == ParseMode::Strict && bytes.len() > LINE_LENGTH_LIMIT {
165 return Err(QuotedPrintableError::LineTooLong);
166 }
167
168 if add_line_break == Some(true) {
169 decoded.push(b'\r');
170 decoded.push(b'\n');
171 add_line_break = Some(false);
172 }
173
174 loop {
175 let byte = match bytes.next() {
176 Some(v) => v,
177 None => {
178 add_line_break = Some(true);
179 break;
180 }
181 };
182
183 if byte == b'=' {
184 let upper = match bytes.next() {
185 Some(v) => v,
186 None => break,
187 };
188 let lower = match bytes.next() {
189 Some(v) => v,
190 None => {
191 if mode == ParseMode::Strict {
192 return Err(QuotedPrintableError::IncompleteHexOctet);
193 }
194 decoded.push(byte);
195 decoded.push(upper);
196 add_line_break = Some(true);
197 break;
198 }
199 };
200 let upper_char = upper as char;
201 let lower_char = lower as char;
202 if upper_char.is_digit(16) && lower_char.is_digit(16) {
203 if mode == ParseMode::Strict {
204 if upper_char.to_uppercase().next() != Some(upper_char) ||
205 lower_char.to_uppercase().next() != Some(lower_char)
206 {
207 return Err(QuotedPrintableError::LowercaseHexOctet);
208 }
209 }
210 let combined = upper_char.to_digit(16).unwrap() << 4 |
211 lower_char.to_digit(16).unwrap();
212 decoded.push(combined as u8);
213 } else {
214 if mode == ParseMode::Strict {
215 return Err(QuotedPrintableError::InvalidHexOctet);
216 }
217 decoded.push(byte);
218 decoded.push(upper);
219 decoded.push(lower);
220 }
221 } else {
222 decoded.push(byte);
223 }
224 }
225 }
226 Ok(decoded)
227 }
228
append( result: &mut String, to_append: &[char], bytes_on_line: &mut usize, backup_pos: &mut usize, )229 fn append(
230 result: &mut String,
231 to_append: &[char],
232 bytes_on_line: &mut usize,
233 backup_pos: &mut usize,
234 ) {
235 if *bytes_on_line + to_append.len() > LINE_LENGTH_LIMIT {
236 if *bytes_on_line == LINE_LENGTH_LIMIT {
237 // We're already at the max length, so inserting the '=' in the soft
238 // line break would put us over. Instead, we insert the soft line
239 // break at the backup pos, which is just before the last thing
240 // appended.
241 *bytes_on_line = result.len() - *backup_pos;
242 result.insert_str(*backup_pos, "=\r\n");
243 } else {
244 result.push_str("=\r\n");
245 *bytes_on_line = 0;
246 }
247 }
248 result.extend(to_append);
249 *bytes_on_line = *bytes_on_line + to_append.len();
250 *backup_pos = result.len() - to_append.len();
251 }
252
253 /// Encodes some bytes into quoted-printable format.
254 ///
255 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
256 /// 6.7. This function encodes a set of raw bytes into a format conformant with
257 /// that spec. The output contains CRLF pairs as needed so that each line is
258 /// wrapped to 76 characters or less (not including the CRLF).
259 ///
260 /// # Examples
261 ///
262 /// ```
263 /// use quoted_printable::encode;
264 /// let encoded = encode("hello, \u{20ac} zone!");
265 /// assert_eq!("hello, =E2=82=AC zone!", String::from_utf8(encoded).unwrap());
266 /// ```
267 #[inline(always)]
encode<R: AsRef<[u8]>>(input: R) -> Vec<u8>268 pub fn encode<R: AsRef<[u8]>>(input: R) -> Vec<u8> {
269 let encoded_as_string = _encode(input.as_ref());
270 encoded_as_string.into()
271 }
272
_encode(input: &[u8]) -> String273 fn _encode(input: &[u8]) -> String {
274 let mut result = String::new();
275 let mut on_line: usize = 0;
276 let mut backup_pos: usize = 0;
277 let mut was_cr = false;
278 let mut it = input.iter();
279
280 while let Some(&byte) = it.next() {
281 if was_cr {
282 if byte == b'\n' {
283 result.push_str("\r\n");
284 on_line = 0;
285 was_cr = false;
286 continue;
287 }
288 // encode the CR ('\r') we skipped over before
289 append(&mut result, &['=', '0', 'D'], &mut on_line, &mut backup_pos);
290 }
291 if byte == b'\r' {
292 // remember we had a CR ('\r') but do not encode it yet
293 was_cr = true;
294 continue;
295 } else {
296 was_cr = false;
297 }
298 encode_byte(&mut result, byte, &mut on_line, &mut backup_pos);
299 }
300
301 // we haven't yet encoded the last CR ('\r') so do it now
302 if was_cr {
303 append(&mut result, &['=', '0', 'D'], &mut on_line, &mut backup_pos);
304 }
305
306 result
307 }
308
309 /// Encodes some bytes into quoted-printable format.
310 ///
311 /// The difference to `encode` is that this function returns a `String`.
312 ///
313 /// The quoted-printable transfer-encoding is defined in IETF RFC 2045, section
314 /// 6.7. This function encodes a set of raw bytes into a format conformant with
315 /// that spec. The output contains CRLF pairs as needed so that each line is
316 /// wrapped to 76 characters or less (not including the CRLF).
317 ///
318 /// # Examples
319 ///
320 /// ```
321 /// use quoted_printable::encode_to_str;
322 /// let encoded = encode_to_str("hello, \u{20ac} zone!");
323 /// assert_eq!("hello, =E2=82=AC zone!", encoded);
324 /// ```
325 #[inline(always)]
encode_to_str<R: AsRef<[u8]>>(input: R) -> String326 pub fn encode_to_str<R: AsRef<[u8]>>(input: R) -> String {
327 _encode(input.as_ref())
328 }
329
330 #[inline]
encode_byte(result: &mut String, to_append: u8, on_line: &mut usize, backup_pos: &mut usize)331 fn encode_byte(result: &mut String, to_append: u8, on_line: &mut usize, backup_pos: &mut usize) {
332 match to_append {
333 b'=' => append(result, &['=', '3', 'D'], on_line, backup_pos),
334 b'\t' | b' '..=b'~' => append(result, &[char::from(to_append)], on_line, backup_pos),
335 _ => append(result, &hex_encode_byte(to_append), on_line, backup_pos),
336 }
337 }
338
339 #[inline(always)]
hex_encode_byte(byte: u8) -> [char; 3]340 fn hex_encode_byte(byte: u8) -> [char; 3] {
341 [
342 '=',
343 lower_nibble_to_hex(byte >> 4),
344 lower_nibble_to_hex(byte),
345 ]
346 }
347
348 #[inline(always)]
lower_nibble_to_hex(half_byte: u8) -> char349 fn lower_nibble_to_hex(half_byte: u8) -> char {
350 HEX_CHARS[(half_byte & 0x0F) as usize]
351 }
352
353 #[cfg(test)]
354 mod tests {
355 use super::*;
356
357 #[test]
test_decode()358 fn test_decode() {
359 assert_eq!(
360 "hello world",
361 String::from_utf8(decode("hello world", ParseMode::Strict).unwrap()).unwrap()
362 );
363 assert_eq!(
364 "Now's the time for all folk to come to the aid of their country.",
365 String::from_utf8(
366 decode(
367 "Now's the time =\r\nfor all folk to come=\r\n \
368 to the aid of their country.",
369 ParseMode::Strict,
370 ).unwrap(),
371 ).unwrap()
372 );
373 assert_eq!(
374 "\r\nhello=world",
375 String::from_utf8(decode("=0D=0Ahello=3Dworld", ParseMode::Strict).unwrap()).unwrap()
376 );
377 assert_eq!(
378 "hello world\r\ngoodbye world",
379 String::from_utf8(
380 decode("hello world\r\ngoodbye world", ParseMode::Strict).unwrap(),
381 ).unwrap()
382 );
383 assert_eq!(
384 "hello world\r\ngoodbye world",
385 String::from_utf8(
386 decode("hello world \r\ngoodbye world ", ParseMode::Strict).unwrap(),
387 ).unwrap()
388 );
389 assert_eq!(
390 "hello world\r\ngoodbye world x",
391 String::from_utf8(
392 decode(
393 "hello world \r\ngoodbye world = \r\nx",
394 ParseMode::Strict,
395 ).unwrap(),
396 ).unwrap()
397 );
398
399 assert_eq!(true, decode("hello world=x", ParseMode::Strict).is_err());
400 assert_eq!(
401 "hello world=x",
402 String::from_utf8(decode("hello world=x", ParseMode::Robust).unwrap()).unwrap()
403 );
404
405 assert_eq!(true, decode("hello =world=", ParseMode::Strict).is_err());
406 assert_eq!(
407 "hello =world",
408 String::from_utf8(decode("hello =world=", ParseMode::Robust).unwrap()).unwrap()
409 );
410
411 assert_eq!(true, decode("hello world=3d", ParseMode::Strict).is_err());
412 assert_eq!(
413 "hello world=",
414 String::from_utf8(decode("hello world=3d", ParseMode::Robust).unwrap()).unwrap()
415 );
416
417 assert_eq!(true, decode("hello world=3m", ParseMode::Strict).is_err());
418 assert_eq!(
419 "hello world=3m",
420 String::from_utf8(decode("hello world=3m", ParseMode::Robust).unwrap()).unwrap()
421 );
422
423 assert_eq!(true, decode("hello\u{FF}world", ParseMode::Strict).is_err());
424 assert_eq!(
425 "helloworld",
426 String::from_utf8(decode("hello\u{FF}world", ParseMode::Robust).unwrap()).unwrap()
427 );
428
429 assert_eq!(
430 true,
431 decode(
432 "12345678901234567890123456789012345678901234567890123456789012345678901234567",
433 ParseMode::Strict,
434 ).is_err()
435 );
436 assert_eq!(
437 "12345678901234567890123456789012345678901234567890123456789012345678901234567",
438 String::from_utf8(
439 decode(
440 "12345678901234567890123456789012345678901234567890123456789012345678901234567",
441 ParseMode::Robust,
442 ).unwrap(),
443 ).unwrap()
444 );
445 assert_eq!(
446 "1234567890123456789012345678901234567890123456789012345678901234567890123456",
447 String::from_utf8(
448 decode(
449 "1234567890123456789012345678901234567890123456789012345678901234567890123456",
450 ParseMode::Strict,
451 ).unwrap(),
452 ).unwrap()
453 );
454 }
455
456 #[test]
test_encode()457 fn test_encode() {
458 assert_eq!("hello, world!", encode_to_str("hello, world!".as_bytes()));
459 assert_eq!(
460 "hello,=0Cworld!",
461 encode_to_str("hello,\u{c}world!".as_bytes())
462 );
463 assert_eq!(
464 "this=00is=C3=BFa=3Dlong=0Dstring=0Athat gets wrapped and stuff, \
465 woohoo!=C3=\r\n=89",
466 encode_to_str(
467 "this\u{0}is\u{FF}a=long\rstring\nthat gets \
468 wrapped and stuff, woohoo!\u{c9}",
469 )
470 );
471 assert_eq!(
472 "this=00is=C3=BFa=3Dlong=0Dstring=0Athat just fits in a line, woohoo!=C3=89",
473 encode_to_str(
474 "this\u{0}is\u{FF}a=long\rstring\nthat just fits \
475 in a line, woohoo!\u{c9}",
476 )
477 );
478 assert_eq!(
479 "this \r\nhas linebreaks\r\n built right in.",
480 encode_to_str("this \r\nhas linebreaks\r\n built right in.")
481 );
482 // Test that soft line breaks get inserted at the right place
483 assert_eq!(
484 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
485 encode_to_str(
486 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
487 )
488 );
489 assert_eq!(
490 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\nXY",
491 encode_to_str(
492 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
493 )
494 );
495 assert_eq!(
496 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\nXXY",
497 encode_to_str(
498 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXY",
499 )
500 );
501 // Test that soft line breaks don't break up an encoded octet
502 assert_eq!(
503 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=00Y",
504 encode_to_str(
505 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
506 )
507 );
508 assert_eq!(
509 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
510 encode_to_str(
511 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
512 )
513 );
514 assert_eq!(
515 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
516 encode_to_str(
517 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
518 )
519 );
520 assert_eq!(
521 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX=\r\n=00Y",
522 encode_to_str(
523 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\u{0}Y",
524 )
525 );
526 assert_eq!("=0D=3D", encode_to_str("\r="));
527 assert_eq!("=0D\r\n", encode_to_str("\r\r\n"));
528 assert_eq!("a=0D\r\nb", encode_to_str("a\r\r\nb"));
529 assert_eq!("=0D", encode_to_str("\r"));
530 assert_eq!("=0D=0D", encode_to_str("\r\r"));
531 }
532
533 #[test]
test_lower_nibble_to_hex()534 fn test_lower_nibble_to_hex() {
535 let test_data: &[(u8, char, char)] = &[
536 (0, '0', '0'),
537 (1, '0', '1'),
538 (9, '0', '9'),
539 (10, '0', 'A'),
540 (15, '0', 'F'),
541 (16, '1', '0'),
542 (255, 'F', 'F'),
543 ];
544
545 for &(nr, high, low) in test_data.iter() {
546 let got_high = lower_nibble_to_hex(nr >> 4);
547 assert_eq!(high, got_high);
548 let got_low = lower_nibble_to_hex(nr);
549 assert_eq!(low, got_low);
550 }
551 }
552 }
553