1 // Copyright 2016 `multipart` Crate Developers
2 //
3 // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 // http://opensource.org/licenses/MIT>, at your option. This file may not be
6 // copied, modified, or distributed except according to those terms.
7 
8 //! Boundary parsing for `multipart` requests.
9 
10 use ::safemem;
11 
12 use super::buf_redux::BufReader;
13 use super::buf_redux::policy::MinBuffered;
14 use super::twoway;
15 
16 use std::cmp;
17 use std::borrow::Borrow;
18 
19 use std::io;
20 use std::io::prelude::*;
21 
22 use self::State::*;
23 
24 pub const MIN_BUF_SIZE: usize = 1024;
25 
26 #[derive(Debug, PartialEq, Eq)]
27 enum State {
28     Searching,
29     BoundaryRead,
30     AtEnd
31 }
32 
33 /// A struct implementing `Read` and `BufRead` that will yield bytes until it sees a given sequence.
34 #[derive(Debug)]
35 pub struct BoundaryReader<R> {
36     source: BufReader<R, MinBuffered>,
37     boundary: Vec<u8>,
38     search_idx: usize,
39     state: State,
40 }
41 
42 impl<R> BoundaryReader<R> where R: Read {
43     /// Internal API
from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R>44     pub fn from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R> {
45         let mut boundary = boundary.into();
46         safemem::prepend(b"--", &mut boundary);
47         let source = BufReader::new(reader).set_policy(MinBuffered(MIN_BUF_SIZE));
48 
49         BoundaryReader {
50             source,
51             boundary,
52             search_idx: 0,
53             state: Searching,
54         }
55     }
56 
read_to_boundary(&mut self) -> io::Result<&[u8]>57     fn read_to_boundary(&mut self) -> io::Result<&[u8]> {
58         let buf = self.source.fill_buf()?;
59 
60         trace!("Buf: {:?}", String::from_utf8_lossy(buf));
61 
62         debug!("Before search Buf len: {} Search idx: {} State: {:?}",
63                buf.len(), self.search_idx, self.state);
64 
65         if self.state == BoundaryRead || self.state == AtEnd {
66             return Ok(&buf[..self.search_idx])
67         }
68 
69         if self.state == Searching && self.search_idx < buf.len() {
70             let lookahead = &buf[self.search_idx..];
71 
72             // Look for the boundary, or if it isn't found, stop near the end.
73             match find_boundary(lookahead, &self.boundary) {
74                 Ok(found_idx) => {
75                     self.search_idx += found_idx;
76                     self.state = BoundaryRead;
77                 },
78                 Err(yield_len) => {
79                     self.search_idx += yield_len;
80                 }
81             }
82         }
83 
84         debug!("After search Buf len: {} Search idx: {} State: {:?}",
85                buf.len(), self.search_idx, self.state);
86 
87         // back up the cursor to before the boundary's preceding CRLF if we haven't already
88         if self.search_idx >= 2 && !buf[self.search_idx..].starts_with(b"\r\n") {
89             let two_bytes_before = &buf[self.search_idx - 2 .. self.search_idx];
90 
91             trace!("Two bytes before: {:?} ({:?}) (\"\\r\\n\": {:?})",
92                    String::from_utf8_lossy(two_bytes_before), two_bytes_before, b"\r\n");
93 
94             if two_bytes_before == *b"\r\n" {
95                 debug!("Subtract two!");
96                 self.search_idx -= 2;
97             }
98         }
99 
100         let ret_buf = &buf[..self.search_idx];
101 
102         trace!("Returning buf: {:?}", String::from_utf8_lossy(ret_buf));
103 
104         Ok(ret_buf)
105     }
106 
set_min_buf_size(&mut self, min_buf_size: usize)107     pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
108         // ensure the minimum buf size is at least enough to find a boundary with some extra
109         let min_buf_size = cmp::max(self.boundary.len() * 2, min_buf_size);
110 
111         self.source.policy_mut().0 = min_buf_size;
112     }
113 
consume_boundary(&mut self) -> io::Result<bool>114     pub fn consume_boundary(&mut self) -> io::Result<bool> {
115         if self.state == AtEnd {
116             return Ok(false);
117         }
118 
119         while self.state == Searching {
120             debug!("Boundary not found yet");
121 
122             let buf_len = self.read_to_boundary()?.len();
123 
124             if buf_len == 0 && self.state == Searching {
125                 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
126                                           "unexpected end of request body"));
127             }
128 
129             debug!("Discarding {} bytes", buf_len);
130 
131             self.consume(buf_len);
132         }
133 
134         let consume_amt = {
135             let buf = self.source.fill_buf()?;
136 
137             // if the boundary is found we should have at least this much in-buffer
138             let mut consume_amt = self.search_idx + self.boundary.len();
139 
140             // we don't care about data before the cursor
141             let bnd_segment = &buf[self.search_idx..];
142 
143             if bnd_segment.starts_with(b"\r\n") {
144                 // preceding CRLF needs to be consumed as well
145                 consume_amt += 2;
146 
147                 // assert that we've found the boundary after the CRLF
148                 debug_assert_eq!(*self.boundary, bnd_segment[2 .. self.boundary.len() + 2]);
149             } else {
150                 // assert that we've found the boundary
151                 debug_assert_eq!(*self.boundary, bnd_segment[..self.boundary.len()]);
152             }
153 
154             // include the trailing CRLF or --
155             consume_amt += 2;
156 
157             if buf.len() < consume_amt {
158                 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
159                                           "not enough bytes to verify boundary"));
160             }
161 
162             // we have enough bytes to verify
163             self.state = Searching;
164 
165             let last_two = &buf[consume_amt - 2 .. consume_amt];
166 
167             match last_two {
168                 b"\r\n" => self.state = Searching,
169                 b"--" => self.state = AtEnd,
170                 _ => return Err(io::Error::new(
171                     io::ErrorKind::InvalidData,
172                     format!("unexpected bytes following multipart boundary: {:X} {:X}",
173                             last_two[0], last_two[1])
174                 )),
175             }
176 
177             consume_amt
178         };
179 
180         trace!("Consuming {} bytes, remaining buf: {:?}",
181                consume_amt,
182                String::from_utf8_lossy(self.source.buffer()));
183 
184         self.source.consume(consume_amt);
185 
186         if cfg!(debug_assertions) {
187 
188         }
189 
190         self.search_idx = 0;
191 
192         trace!("Consumed boundary (state: {:?}), remaining buf: {:?}", self.state,
193                String::from_utf8_lossy(self.source.buffer()));
194 
195         Ok(self.state != AtEnd)
196     }
197 }
198 
199 /// Find the boundary occurrence or the highest length to safely yield
find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize>200 fn find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize> {
201     if let Some(idx) = twoway::find_bytes(buf, boundary) {
202         return Ok(idx);
203     }
204 
205     let search_start = buf.len().saturating_sub(boundary.len());
206 
207     // search for just the boundary fragment
208     for i in search_start .. buf.len() {
209         if boundary.starts_with(&buf[i..]) {
210             return Err(i);
211         }
212     }
213 
214     Err(buf.len())
215 }
216 
217 #[cfg(feature = "bench")]
218 impl<'a> BoundaryReader<io::Cursor<&'a [u8]>> {
new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self219     fn new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self {
220         Self::from_reader(io::Cursor::new(bytes), boundary)
221     }
222 
reset(&mut self)223     fn reset(&mut self) {
224         // Dump buffer and reset cursor
225         self.source.seek(io::SeekFrom::Start(0));
226         self.state = Searching;
227         self.search_idx = 0;
228     }
229 }
230 
231 impl<R> Borrow<R> for BoundaryReader<R> {
borrow(&self) -> &R232     fn borrow(&self) -> &R {
233         self.source.get_ref()
234     }
235 }
236 
237 impl<R> Read for BoundaryReader<R> where R: Read {
read(&mut self, out: &mut [u8]) -> io::Result<usize>238     fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
239         let read = {
240             let mut buf = self.read_to_boundary()?;
241             // This shouldn't ever be an error so unwrapping is fine.
242             buf.read(out).unwrap()
243         };
244 
245         self.consume(read);
246         Ok(read)
247     }
248 }
249 
250 impl<R> BufRead for BoundaryReader<R> where R: Read {
fill_buf(&mut self) -> io::Result<&[u8]>251     fn fill_buf(&mut self) -> io::Result<&[u8]> {
252         self.read_to_boundary()
253     }
254 
consume(&mut self, amt: usize)255     fn consume(&mut self, amt: usize) {
256         let true_amt = cmp::min(amt, self.search_idx);
257 
258         debug!("Consume! amt: {} true amt: {}", amt, true_amt);
259 
260         self.source.consume(true_amt);
261         self.search_idx -= true_amt;
262     }
263 }
264 
265 #[cfg(test)]
266 mod test {
267     use super::BoundaryReader;
268 
269     use std::io;
270     use std::io::prelude::*;
271 
272     const BOUNDARY: &'static str = "boundary";
273     const TEST_VAL: &'static str = "--boundary\r\n\
274                                     dashed-value-1\r\n\
275                                     --boundary\r\n\
276                                     dashed-value-2\r\n\
277                                     --boundary--";
278 
279     #[test]
test_boundary()280     fn test_boundary() {
281         ::init_log();
282 
283         debug!("Testing boundary (no split)");
284 
285         let src = &mut TEST_VAL.as_bytes();
286         let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
287 
288         let mut buf = String::new();
289 
290         test_boundary_reader(&mut reader, &mut buf);
291     }
292 
293     struct SplitReader<'a> {
294         left: &'a [u8],
295         right: &'a [u8],
296     }
297 
298     impl<'a> SplitReader<'a> {
split(data: &'a [u8], at: usize) -> SplitReader<'a>299         fn split(data: &'a [u8], at: usize) -> SplitReader<'a> {
300             let (left, right) = data.split_at(at);
301 
302             SplitReader {
303                 left: left,
304                 right: right,
305             }
306         }
307     }
308 
309     impl<'a> Read for SplitReader<'a> {
read(&mut self, dst: &mut [u8]) -> io::Result<usize>310         fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> {
311             fn copy_bytes_partial(src: &mut &[u8], dst: &mut [u8]) -> usize {
312                 src.read(dst).unwrap()
313             }
314 
315             let mut copy_amt = copy_bytes_partial(&mut self.left, dst);
316 
317             if copy_amt == 0 {
318                 copy_amt = copy_bytes_partial(&mut self.right, dst)
319             };
320 
321             Ok(copy_amt)
322         }
323     }
324 
325     #[test]
test_split_boundary()326     fn test_split_boundary() {
327         ::init_log();
328 
329         debug!("Testing boundary (split)");
330 
331         let mut buf = String::new();
332 
333         // Substitute for `.step_by()` being unstable.
334         for split_at in 0 .. TEST_VAL.len(){
335             debug!("Testing split at: {}", split_at);
336 
337             let src = SplitReader::split(TEST_VAL.as_bytes(), split_at);
338             let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
339             test_boundary_reader(&mut reader, &mut buf);
340         }
341     }
342 
test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String)343     fn test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String) {
344         buf.clear();
345 
346         debug!("Read 1");
347         let _ = reader.read_to_string(buf).unwrap();
348         assert!(buf.is_empty(), "Buffer not empty: {:?}", buf);
349         buf.clear();
350 
351         debug!("Consume 1");
352         reader.consume_boundary().unwrap();
353 
354         debug!("Read 2");
355         let _ = reader.read_to_string(buf).unwrap();
356         assert_eq!(buf, "dashed-value-1");
357         buf.clear();
358 
359         debug!("Consume 2");
360         reader.consume_boundary().unwrap();
361 
362         debug!("Read 3");
363         let _ = reader.read_to_string(buf).unwrap();
364         assert_eq!(buf, "dashed-value-2");
365         buf.clear();
366 
367         debug!("Consume 3");
368         reader.consume_boundary().unwrap();
369 
370         debug!("Read 4");
371         let _ = reader.read_to_string(buf).unwrap();
372         assert_eq!(buf, "");
373     }
374 
375     #[test]
test_empty_body()376     fn test_empty_body() {
377         ::init_log();
378 
379         // empty body contains closing boundary only
380         let mut body: &[u8] = b"--boundary--";
381 
382         let ref mut buf = String::new();
383         let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
384 
385         debug!("Consume 1");
386         assert_eq!(reader.consume_boundary().unwrap(), false);
387 
388         debug!("Read 1");
389         let _ = reader.read_to_string(buf).unwrap();
390         assert_eq!(buf, "");
391         buf.clear();
392 
393         debug!("Consume 2");
394         assert_eq!(reader.consume_boundary().unwrap(), false);
395     }
396 
397     #[test]
test_leading_crlf()398     fn test_leading_crlf() {
399         ::init_log();
400 
401         let mut body: &[u8] = b"\r\n\r\n--boundary\r\n\
402                          asdf1234\
403                          \r\n\r\n--boundary--";
404 
405         let ref mut buf = String::new();
406         let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
407 
408 
409         debug!("Consume 1");
410         assert_eq!(reader.consume_boundary().unwrap(), true);
411 
412         debug!("Read 1");
413         let _ = reader.read_to_string(buf).unwrap();
414         assert_eq!(buf, "asdf1234\r\n");
415         buf.clear();
416 
417         debug!("Consume 2");
418         assert_eq!(reader.consume_boundary().unwrap(), false);
419 
420         debug!("Read 2 (empty)");
421         let _ = reader.read_to_string(buf).unwrap();
422         assert_eq!(buf, "");
423     }
424 
425     #[test]
test_trailing_crlf()426     fn test_trailing_crlf() {
427         ::init_log();
428 
429         let mut body: &[u8] = b"--boundary\r\n\
430                          asdf1234\
431                          \r\n\r\n--boundary\r\n\
432                          hjkl5678\r\n--boundary--";
433 
434         let ref mut buf = String::new();
435         let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
436 
437         debug!("Consume 1");
438         assert_eq!(reader.consume_boundary().unwrap(), true);
439 
440         debug!("Read 1");
441 
442         // Repro for https://github.com/abonander/multipart/issues/93
443         // These two reads should produce the same buffer
444         let buf1 = reader.read_to_boundary().unwrap().to_owned();
445         let buf2 = reader.read_to_boundary().unwrap().to_owned();
446         assert_eq!(buf1, buf2);
447 
448         let _ = reader.read_to_string(buf).unwrap();
449         assert_eq!(buf, "asdf1234\r\n");
450         buf.clear();
451 
452         debug!("Consume 2");
453         assert_eq!(reader.consume_boundary().unwrap(), true);
454 
455         debug!("Read 2");
456         let _ = reader.read_to_string(buf).unwrap();
457         assert_eq!(buf, "hjkl5678");
458         buf.clear();
459 
460         debug!("Consume 3");
461         assert_eq!(reader.consume_boundary().unwrap(), false);
462 
463         debug!("Read 3 (empty)");
464         let _ = reader.read_to_string(buf).unwrap();
465         assert_eq!(buf, "");
466     }
467 
468     // https://github.com/abonander/multipart/issues/93#issuecomment-343610587
469     #[test]
test_trailing_lflf()470     fn test_trailing_lflf() {
471         ::init_log();
472 
473         let mut body: &[u8] = b"--boundary\r\n\
474                          asdf1234\
475                          \n\n\r\n--boundary\r\n\
476                          hjkl5678\r\n--boundary--";
477 
478         let ref mut buf = String::new();
479         let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
480 
481         debug!("Consume 1");
482         assert_eq!(reader.consume_boundary().unwrap(), true);
483 
484         debug!("Read 1");
485 
486         // same as above
487         let buf1 = reader.read_to_boundary().unwrap().to_owned();
488         let buf2 = reader.read_to_boundary().unwrap().to_owned();
489         assert_eq!(buf1, buf2);
490 
491         let _ = reader.read_to_string(buf).unwrap();
492         assert_eq!(buf, "asdf1234\n\n");
493         buf.clear();
494 
495         debug!("Consume 2");
496         assert_eq!(reader.consume_boundary().unwrap(), true);
497 
498         debug!("Read 2");
499         let _ = reader.read_to_string(buf).unwrap();
500         assert_eq!(buf, "hjkl5678");
501         buf.clear();
502 
503         debug!("Consume 3");
504         assert_eq!(reader.consume_boundary().unwrap(), false);
505 
506         debug!("Read 3 (empty)");
507         let _ = reader.read_to_string(buf).unwrap();
508         assert_eq!(buf, "");
509     }
510 
511     // https://github.com/abonander/multipart/issues/104
512     #[test]
test_unterminated_body()513     fn test_unterminated_body() {
514         ::init_log();
515 
516         let mut body: &[u8] = b"--boundary\r\n\
517                          asdf1234\
518                          \n\n\r\n--boundary\r\n\
519                          hjkl5678  ";
520 
521         let ref mut buf = String::new();
522         let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
523 
524         debug!("Consume 1");
525         assert_eq!(reader.consume_boundary().unwrap(), true);
526 
527         debug!("Read 1");
528 
529         // same as above
530         let buf1 = reader.read_to_boundary().unwrap().to_owned();
531         let buf2 = reader.read_to_boundary().unwrap().to_owned();
532         assert_eq!(buf1, buf2);
533 
534         let _ = reader.read_to_string(buf).unwrap();
535         assert_eq!(buf, "asdf1234\n\n");
536         buf.clear();
537 
538         debug!("Consume 2");
539         assert_eq!(reader.consume_boundary().unwrap(), true);
540 
541         debug!("Read 2");
542         let _ = reader.read_to_string(buf).unwrap();
543         assert_eq!(buf, "hjkl5678  ");
544         buf.clear();
545 
546         debug!("Consume 3 - expecting error");
547         reader.consume_boundary().unwrap_err();
548     }
549 
550     #[test]
test_lone_boundary()551     fn test_lone_boundary() {
552         let mut body: &[u8] = b"--boundary";
553         let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
554         reader.consume_boundary().unwrap_err();
555     }
556 
557     #[test]
test_invalid_boundary()558     fn test_invalid_boundary() {
559         let mut body: &[u8] = b"--boundary\x00\x00";
560         let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
561         reader.consume_boundary().unwrap_err();
562     }
563 
564     #[test]
test_skip_field()565     fn test_skip_field() {
566         let mut body: &[u8] = b"--boundary\r\nfield1\r\n--boundary\r\nfield2\r\n--boundary--";
567         let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
568 
569         assert_eq!(reader.consume_boundary().unwrap(), true);
570         // skip `field1`
571         assert_eq!(reader.consume_boundary().unwrap(), true);
572 
573         let mut buf = String::new();
574         reader.read_to_string(&mut buf).unwrap();
575         assert_eq!(buf, "field2");
576 
577         assert_eq!(reader.consume_boundary().unwrap(), false);
578     }
579 
580     #[cfg(feature = "bench")]
581     mod bench {
582         extern crate test;
583         use self::test::Bencher;
584 
585         use super::*;
586 
587         #[bench]
bench_boundary_reader(b: &mut Bencher)588         fn bench_boundary_reader(b: &mut Bencher) {
589             let mut reader = BoundaryReader::new_with_bytes(TEST_VAL.as_bytes(), BOUNDARY);
590             let mut buf = String::with_capacity(256);
591 
592             b.iter(|| {
593                 reader.reset();
594                 test_boundary_reader(&mut reader, &mut buf);
595             });
596         }
597     }
598 }
599