1 // Copyright 2016 `multipart` Crate Developers
2 //
3 // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 // http://opensource.org/licenses/MIT>, at your option. This file may not be
6 // copied, modified, or distributed except according to those terms.
7
8 //! Boundary parsing for `multipart` requests.
9
10 use ::safemem;
11
12 use super::buf_redux::BufReader;
13 use super::buf_redux::policy::MinBuffered;
14 use super::twoway;
15
16 use std::cmp;
17 use std::borrow::Borrow;
18
19 use std::io;
20 use std::io::prelude::*;
21
22 use self::State::*;
23
24 pub const MIN_BUF_SIZE: usize = 1024;
25
26 #[derive(Debug, PartialEq, Eq)]
27 enum State {
28 Searching,
29 BoundaryRead,
30 AtEnd
31 }
32
33 /// A struct implementing `Read` and `BufRead` that will yield bytes until it sees a given sequence.
34 #[derive(Debug)]
35 pub struct BoundaryReader<R> {
36 source: BufReader<R, MinBuffered>,
37 boundary: Vec<u8>,
38 search_idx: usize,
39 state: State,
40 }
41
42 impl<R> BoundaryReader<R> where R: Read {
43 /// Internal API
from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R>44 pub fn from_reader<B: Into<Vec<u8>>>(reader: R, boundary: B) -> BoundaryReader<R> {
45 let mut boundary = boundary.into();
46 safemem::prepend(b"--", &mut boundary);
47 let source = BufReader::new(reader).set_policy(MinBuffered(MIN_BUF_SIZE));
48
49 BoundaryReader {
50 source,
51 boundary,
52 search_idx: 0,
53 state: Searching,
54 }
55 }
56
read_to_boundary(&mut self) -> io::Result<&[u8]>57 fn read_to_boundary(&mut self) -> io::Result<&[u8]> {
58 let buf = self.source.fill_buf()?;
59
60 trace!("Buf: {:?}", String::from_utf8_lossy(buf));
61
62 debug!("Before search Buf len: {} Search idx: {} State: {:?}",
63 buf.len(), self.search_idx, self.state);
64
65 if self.state == BoundaryRead || self.state == AtEnd {
66 return Ok(&buf[..self.search_idx])
67 }
68
69 if self.state == Searching && self.search_idx < buf.len() {
70 let lookahead = &buf[self.search_idx..];
71
72 // Look for the boundary, or if it isn't found, stop near the end.
73 match find_boundary(lookahead, &self.boundary) {
74 Ok(found_idx) => {
75 self.search_idx += found_idx;
76 self.state = BoundaryRead;
77 },
78 Err(yield_len) => {
79 self.search_idx += yield_len;
80 }
81 }
82 }
83
84 debug!("After search Buf len: {} Search idx: {} State: {:?}",
85 buf.len(), self.search_idx, self.state);
86
87 // back up the cursor to before the boundary's preceding CRLF if we haven't already
88 if self.search_idx >= 2 && !buf[self.search_idx..].starts_with(b"\r\n") {
89 let two_bytes_before = &buf[self.search_idx - 2 .. self.search_idx];
90
91 trace!("Two bytes before: {:?} ({:?}) (\"\\r\\n\": {:?})",
92 String::from_utf8_lossy(two_bytes_before), two_bytes_before, b"\r\n");
93
94 if two_bytes_before == *b"\r\n" {
95 debug!("Subtract two!");
96 self.search_idx -= 2;
97 }
98 }
99
100 let ret_buf = &buf[..self.search_idx];
101
102 trace!("Returning buf: {:?}", String::from_utf8_lossy(ret_buf));
103
104 Ok(ret_buf)
105 }
106
set_min_buf_size(&mut self, min_buf_size: usize)107 pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
108 // ensure the minimum buf size is at least enough to find a boundary with some extra
109 let min_buf_size = cmp::max(self.boundary.len() * 2, min_buf_size);
110
111 self.source.policy_mut().0 = min_buf_size;
112 }
113
consume_boundary(&mut self) -> io::Result<bool>114 pub fn consume_boundary(&mut self) -> io::Result<bool> {
115 if self.state == AtEnd {
116 return Ok(false);
117 }
118
119 while self.state == Searching {
120 debug!("Boundary not found yet");
121
122 let buf_len = self.read_to_boundary()?.len();
123
124 if buf_len == 0 && self.state == Searching {
125 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
126 "unexpected end of request body"));
127 }
128
129 debug!("Discarding {} bytes", buf_len);
130
131 self.consume(buf_len);
132 }
133
134 let consume_amt = {
135 let buf = self.source.fill_buf()?;
136
137 // if the boundary is found we should have at least this much in-buffer
138 let mut consume_amt = self.search_idx + self.boundary.len();
139
140 // we don't care about data before the cursor
141 let bnd_segment = &buf[self.search_idx..];
142
143 if bnd_segment.starts_with(b"\r\n") {
144 // preceding CRLF needs to be consumed as well
145 consume_amt += 2;
146
147 // assert that we've found the boundary after the CRLF
148 debug_assert_eq!(*self.boundary, bnd_segment[2 .. self.boundary.len() + 2]);
149 } else {
150 // assert that we've found the boundary
151 debug_assert_eq!(*self.boundary, bnd_segment[..self.boundary.len()]);
152 }
153
154 // include the trailing CRLF or --
155 consume_amt += 2;
156
157 if buf.len() < consume_amt {
158 return Err(io::Error::new(io::ErrorKind::UnexpectedEof,
159 "not enough bytes to verify boundary"));
160 }
161
162 // we have enough bytes to verify
163 self.state = Searching;
164
165 let last_two = &buf[consume_amt - 2 .. consume_amt];
166
167 match last_two {
168 b"\r\n" => self.state = Searching,
169 b"--" => self.state = AtEnd,
170 _ => return Err(io::Error::new(
171 io::ErrorKind::InvalidData,
172 format!("unexpected bytes following multipart boundary: {:X} {:X}",
173 last_two[0], last_two[1])
174 )),
175 }
176
177 consume_amt
178 };
179
180 trace!("Consuming {} bytes, remaining buf: {:?}",
181 consume_amt,
182 String::from_utf8_lossy(self.source.buffer()));
183
184 self.source.consume(consume_amt);
185
186 if cfg!(debug_assertions) {
187
188 }
189
190 self.search_idx = 0;
191
192 trace!("Consumed boundary (state: {:?}), remaining buf: {:?}", self.state,
193 String::from_utf8_lossy(self.source.buffer()));
194
195 Ok(self.state != AtEnd)
196 }
197 }
198
199 /// Find the boundary occurrence or the highest length to safely yield
find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize>200 fn find_boundary(buf: &[u8], boundary: &[u8]) -> Result<usize, usize> {
201 if let Some(idx) = twoway::find_bytes(buf, boundary) {
202 return Ok(idx);
203 }
204
205 let search_start = buf.len().saturating_sub(boundary.len());
206
207 // search for just the boundary fragment
208 for i in search_start .. buf.len() {
209 if boundary.starts_with(&buf[i..]) {
210 return Err(i);
211 }
212 }
213
214 Err(buf.len())
215 }
216
217 #[cfg(feature = "bench")]
218 impl<'a> BoundaryReader<io::Cursor<&'a [u8]>> {
new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self219 fn new_with_bytes(bytes: &'a [u8], boundary: &str) -> Self {
220 Self::from_reader(io::Cursor::new(bytes), boundary)
221 }
222
reset(&mut self)223 fn reset(&mut self) {
224 // Dump buffer and reset cursor
225 self.source.seek(io::SeekFrom::Start(0));
226 self.state = Searching;
227 self.search_idx = 0;
228 }
229 }
230
231 impl<R> Borrow<R> for BoundaryReader<R> {
borrow(&self) -> &R232 fn borrow(&self) -> &R {
233 self.source.get_ref()
234 }
235 }
236
237 impl<R> Read for BoundaryReader<R> where R: Read {
read(&mut self, out: &mut [u8]) -> io::Result<usize>238 fn read(&mut self, out: &mut [u8]) -> io::Result<usize> {
239 let read = {
240 let mut buf = self.read_to_boundary()?;
241 // This shouldn't ever be an error so unwrapping is fine.
242 buf.read(out).unwrap()
243 };
244
245 self.consume(read);
246 Ok(read)
247 }
248 }
249
250 impl<R> BufRead for BoundaryReader<R> where R: Read {
fill_buf(&mut self) -> io::Result<&[u8]>251 fn fill_buf(&mut self) -> io::Result<&[u8]> {
252 self.read_to_boundary()
253 }
254
consume(&mut self, amt: usize)255 fn consume(&mut self, amt: usize) {
256 let true_amt = cmp::min(amt, self.search_idx);
257
258 debug!("Consume! amt: {} true amt: {}", amt, true_amt);
259
260 self.source.consume(true_amt);
261 self.search_idx -= true_amt;
262 }
263 }
264
265 #[cfg(test)]
266 mod test {
267 use super::BoundaryReader;
268
269 use std::io;
270 use std::io::prelude::*;
271
272 const BOUNDARY: &'static str = "boundary";
273 const TEST_VAL: &'static str = "--boundary\r\n\
274 dashed-value-1\r\n\
275 --boundary\r\n\
276 dashed-value-2\r\n\
277 --boundary--";
278
279 #[test]
test_boundary()280 fn test_boundary() {
281 ::init_log();
282
283 debug!("Testing boundary (no split)");
284
285 let src = &mut TEST_VAL.as_bytes();
286 let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
287
288 let mut buf = String::new();
289
290 test_boundary_reader(&mut reader, &mut buf);
291 }
292
293 struct SplitReader<'a> {
294 left: &'a [u8],
295 right: &'a [u8],
296 }
297
298 impl<'a> SplitReader<'a> {
split(data: &'a [u8], at: usize) -> SplitReader<'a>299 fn split(data: &'a [u8], at: usize) -> SplitReader<'a> {
300 let (left, right) = data.split_at(at);
301
302 SplitReader {
303 left: left,
304 right: right,
305 }
306 }
307 }
308
309 impl<'a> Read for SplitReader<'a> {
read(&mut self, dst: &mut [u8]) -> io::Result<usize>310 fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> {
311 fn copy_bytes_partial(src: &mut &[u8], dst: &mut [u8]) -> usize {
312 src.read(dst).unwrap()
313 }
314
315 let mut copy_amt = copy_bytes_partial(&mut self.left, dst);
316
317 if copy_amt == 0 {
318 copy_amt = copy_bytes_partial(&mut self.right, dst)
319 };
320
321 Ok(copy_amt)
322 }
323 }
324
325 #[test]
test_split_boundary()326 fn test_split_boundary() {
327 ::init_log();
328
329 debug!("Testing boundary (split)");
330
331 let mut buf = String::new();
332
333 // Substitute for `.step_by()` being unstable.
334 for split_at in 0 .. TEST_VAL.len(){
335 debug!("Testing split at: {}", split_at);
336
337 let src = SplitReader::split(TEST_VAL.as_bytes(), split_at);
338 let mut reader = BoundaryReader::from_reader(src, BOUNDARY);
339 test_boundary_reader(&mut reader, &mut buf);
340 }
341 }
342
test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String)343 fn test_boundary_reader<R: Read>(reader: &mut BoundaryReader<R>, buf: &mut String) {
344 buf.clear();
345
346 debug!("Read 1");
347 let _ = reader.read_to_string(buf).unwrap();
348 assert!(buf.is_empty(), "Buffer not empty: {:?}", buf);
349 buf.clear();
350
351 debug!("Consume 1");
352 reader.consume_boundary().unwrap();
353
354 debug!("Read 2");
355 let _ = reader.read_to_string(buf).unwrap();
356 assert_eq!(buf, "dashed-value-1");
357 buf.clear();
358
359 debug!("Consume 2");
360 reader.consume_boundary().unwrap();
361
362 debug!("Read 3");
363 let _ = reader.read_to_string(buf).unwrap();
364 assert_eq!(buf, "dashed-value-2");
365 buf.clear();
366
367 debug!("Consume 3");
368 reader.consume_boundary().unwrap();
369
370 debug!("Read 4");
371 let _ = reader.read_to_string(buf).unwrap();
372 assert_eq!(buf, "");
373 }
374
375 #[test]
test_empty_body()376 fn test_empty_body() {
377 ::init_log();
378
379 // empty body contains closing boundary only
380 let mut body: &[u8] = b"--boundary--";
381
382 let ref mut buf = String::new();
383 let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
384
385 debug!("Consume 1");
386 assert_eq!(reader.consume_boundary().unwrap(), false);
387
388 debug!("Read 1");
389 let _ = reader.read_to_string(buf).unwrap();
390 assert_eq!(buf, "");
391 buf.clear();
392
393 debug!("Consume 2");
394 assert_eq!(reader.consume_boundary().unwrap(), false);
395 }
396
397 #[test]
test_leading_crlf()398 fn test_leading_crlf() {
399 ::init_log();
400
401 let mut body: &[u8] = b"\r\n\r\n--boundary\r\n\
402 asdf1234\
403 \r\n\r\n--boundary--";
404
405 let ref mut buf = String::new();
406 let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
407
408
409 debug!("Consume 1");
410 assert_eq!(reader.consume_boundary().unwrap(), true);
411
412 debug!("Read 1");
413 let _ = reader.read_to_string(buf).unwrap();
414 assert_eq!(buf, "asdf1234\r\n");
415 buf.clear();
416
417 debug!("Consume 2");
418 assert_eq!(reader.consume_boundary().unwrap(), false);
419
420 debug!("Read 2 (empty)");
421 let _ = reader.read_to_string(buf).unwrap();
422 assert_eq!(buf, "");
423 }
424
425 #[test]
test_trailing_crlf()426 fn test_trailing_crlf() {
427 ::init_log();
428
429 let mut body: &[u8] = b"--boundary\r\n\
430 asdf1234\
431 \r\n\r\n--boundary\r\n\
432 hjkl5678\r\n--boundary--";
433
434 let ref mut buf = String::new();
435 let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
436
437 debug!("Consume 1");
438 assert_eq!(reader.consume_boundary().unwrap(), true);
439
440 debug!("Read 1");
441
442 // Repro for https://github.com/abonander/multipart/issues/93
443 // These two reads should produce the same buffer
444 let buf1 = reader.read_to_boundary().unwrap().to_owned();
445 let buf2 = reader.read_to_boundary().unwrap().to_owned();
446 assert_eq!(buf1, buf2);
447
448 let _ = reader.read_to_string(buf).unwrap();
449 assert_eq!(buf, "asdf1234\r\n");
450 buf.clear();
451
452 debug!("Consume 2");
453 assert_eq!(reader.consume_boundary().unwrap(), true);
454
455 debug!("Read 2");
456 let _ = reader.read_to_string(buf).unwrap();
457 assert_eq!(buf, "hjkl5678");
458 buf.clear();
459
460 debug!("Consume 3");
461 assert_eq!(reader.consume_boundary().unwrap(), false);
462
463 debug!("Read 3 (empty)");
464 let _ = reader.read_to_string(buf).unwrap();
465 assert_eq!(buf, "");
466 }
467
468 // https://github.com/abonander/multipart/issues/93#issuecomment-343610587
469 #[test]
test_trailing_lflf()470 fn test_trailing_lflf() {
471 ::init_log();
472
473 let mut body: &[u8] = b"--boundary\r\n\
474 asdf1234\
475 \n\n\r\n--boundary\r\n\
476 hjkl5678\r\n--boundary--";
477
478 let ref mut buf = String::new();
479 let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
480
481 debug!("Consume 1");
482 assert_eq!(reader.consume_boundary().unwrap(), true);
483
484 debug!("Read 1");
485
486 // same as above
487 let buf1 = reader.read_to_boundary().unwrap().to_owned();
488 let buf2 = reader.read_to_boundary().unwrap().to_owned();
489 assert_eq!(buf1, buf2);
490
491 let _ = reader.read_to_string(buf).unwrap();
492 assert_eq!(buf, "asdf1234\n\n");
493 buf.clear();
494
495 debug!("Consume 2");
496 assert_eq!(reader.consume_boundary().unwrap(), true);
497
498 debug!("Read 2");
499 let _ = reader.read_to_string(buf).unwrap();
500 assert_eq!(buf, "hjkl5678");
501 buf.clear();
502
503 debug!("Consume 3");
504 assert_eq!(reader.consume_boundary().unwrap(), false);
505
506 debug!("Read 3 (empty)");
507 let _ = reader.read_to_string(buf).unwrap();
508 assert_eq!(buf, "");
509 }
510
511 // https://github.com/abonander/multipart/issues/104
512 #[test]
test_unterminated_body()513 fn test_unterminated_body() {
514 ::init_log();
515
516 let mut body: &[u8] = b"--boundary\r\n\
517 asdf1234\
518 \n\n\r\n--boundary\r\n\
519 hjkl5678 ";
520
521 let ref mut buf = String::new();
522 let mut reader = BoundaryReader::from_reader(&mut body, BOUNDARY);
523
524 debug!("Consume 1");
525 assert_eq!(reader.consume_boundary().unwrap(), true);
526
527 debug!("Read 1");
528
529 // same as above
530 let buf1 = reader.read_to_boundary().unwrap().to_owned();
531 let buf2 = reader.read_to_boundary().unwrap().to_owned();
532 assert_eq!(buf1, buf2);
533
534 let _ = reader.read_to_string(buf).unwrap();
535 assert_eq!(buf, "asdf1234\n\n");
536 buf.clear();
537
538 debug!("Consume 2");
539 assert_eq!(reader.consume_boundary().unwrap(), true);
540
541 debug!("Read 2");
542 let _ = reader.read_to_string(buf).unwrap();
543 assert_eq!(buf, "hjkl5678 ");
544 buf.clear();
545
546 debug!("Consume 3 - expecting error");
547 reader.consume_boundary().unwrap_err();
548 }
549
550 #[test]
test_lone_boundary()551 fn test_lone_boundary() {
552 let mut body: &[u8] = b"--boundary";
553 let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
554 reader.consume_boundary().unwrap_err();
555 }
556
557 #[test]
test_invalid_boundary()558 fn test_invalid_boundary() {
559 let mut body: &[u8] = b"--boundary\x00\x00";
560 let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
561 reader.consume_boundary().unwrap_err();
562 }
563
564 #[test]
test_skip_field()565 fn test_skip_field() {
566 let mut body: &[u8] = b"--boundary\r\nfield1\r\n--boundary\r\nfield2\r\n--boundary--";
567 let mut reader = BoundaryReader::from_reader(&mut body, "boundary");
568
569 assert_eq!(reader.consume_boundary().unwrap(), true);
570 // skip `field1`
571 assert_eq!(reader.consume_boundary().unwrap(), true);
572
573 let mut buf = String::new();
574 reader.read_to_string(&mut buf).unwrap();
575 assert_eq!(buf, "field2");
576
577 assert_eq!(reader.consume_boundary().unwrap(), false);
578 }
579
580 #[cfg(feature = "bench")]
581 mod bench {
582 extern crate test;
583 use self::test::Bencher;
584
585 use super::*;
586
587 #[bench]
bench_boundary_reader(b: &mut Bencher)588 fn bench_boundary_reader(b: &mut Bencher) {
589 let mut reader = BoundaryReader::new_with_bytes(TEST_VAL.as_bytes(), BOUNDARY);
590 let mut buf = String::with_capacity(256);
591
592 b.iter(|| {
593 reader.reset();
594 test_boundary_reader(&mut reader, &mut buf);
595 });
596 }
597 }
598 }
599