1 // Copyright 2016 `multipart` Crate Developers
2 //
3 // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 // http://opensource.org/licenses/MIT>, at your option. This file may not be
6 // copied, modified, or distributed except according to those terms.
7 
8 //! `multipart` field header parsing.
9 use mime::{Mime, TopLevel, SubLevel};
10 
11 use quick_error::ResultExt;
12 
13 use std::error::Error;
14 use std::io::{self, Read, BufRead};
15 use std::{str, fmt};
16 
17 // The AsciiExt import is needed for Rust older than 1.23.0. These two lines can
18 // be removed when supporting older Rust is no longer needed.
19 #[allow(deprecated, unused_imports)]
20 use std::ascii::AsciiExt;
21 
22 use super::httparse::{self, EMPTY_HEADER, Header, Status, Error as HttparseError};
23 
24 use self::ReadEntryResult::*;
25 
26 use super::save::SaveBuilder;
27 
28 use super::ArcStr;
29 
30 const EMPTY_STR_HEADER: StrHeader<'static> = StrHeader {
31     name: "",
32     val: "",
33 };
34 
35 macro_rules! invalid_cont_disp {
36     ($reason: expr, $cause: expr) => {
37         return Err(
38             ParseHeaderError::InvalidContDisp($reason, $cause.to_string())
39         );
40     }
41 }
42 
43 /// Not exposed
44 #[derive(Copy, Clone, Debug)]
45 pub struct StrHeader<'a> {
46     name: &'a str,
47     val: &'a str,
48 }
49 
50 struct DisplayHeaders<'s, 'a: 's>(&'s [StrHeader<'a>]);
51 
52 impl <'s, 'a: 's> fmt::Display for  DisplayHeaders<'s, 'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result53     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54         for hdr in self.0 {
55             writeln!(f, "{}: {}", hdr.name, hdr.val)?;
56         }
57 
58         Ok(())
59     }
60 }
61 
with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError> where R: BufRead, F: FnOnce(&[StrHeader]) -> Ret62 fn with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError>
63 where R: BufRead, F: FnOnce(&[StrHeader]) -> Ret {
64     const HEADER_LEN: usize = 4;
65 
66     let consume;
67     let ret;
68 
69     let mut last_len = 0;
70 
71     loop {
72         // this should return a larger buffer each time
73         let buf = r.fill_buf()?;
74 
75         // buffer has stopped growing
76         if buf.len() == last_len {
77             return Err(ParseHeaderError::TooLarge);
78         }
79 
80         let mut raw_headers = [EMPTY_HEADER; HEADER_LEN];
81 
82         match httparse::parse_headers(buf, &mut raw_headers)? {
83             // read more and try again
84             Status::Partial => last_len = buf.len(),
85             Status::Complete((consume_, raw_headers)) => {
86                 let mut headers = [EMPTY_STR_HEADER; HEADER_LEN];
87                 let headers = copy_headers(raw_headers, &mut headers)?;
88                 debug!("Parsed headers: {:?}", headers);
89                 consume = consume_;
90                 ret = closure(headers);
91                 break;
92             },
93         }
94     }
95 
96     r.consume(consume);
97     Ok(ret)
98 }
99 
copy_headers<'h, 'b: 'h>(raw: &[Header<'b>], headers: &'h mut [StrHeader<'b>]) -> io::Result<&'h [StrHeader<'b>]>100 fn copy_headers<'h, 'b: 'h>(raw: &[Header<'b>], headers: &'h mut [StrHeader<'b>]) -> io::Result<&'h [StrHeader<'b>]> {
101     for (raw, header) in raw.iter().zip(&mut *headers) {
102         header.name = raw.name;
103         header.val = io_str_utf8(raw.value)?;
104     }
105 
106     Ok(&headers[..raw.len()])
107 }
108 
109 /// The headers that (may) appear before a `multipart/form-data` field.
110 ///
111 /// ### Warning: Values are Client-Provided
112 /// Everything in this struct are values from the client and should be considered **untrustworthy**.
113 /// This crate makes no effort to validate or sanitize any client inputs.
114 #[derive(Clone, Debug)]
115 pub struct FieldHeaders {
116     /// The field's name from the form.
117     pub name: ArcStr,
118 
119     /// The filename of this entry, if supplied. This is not guaranteed to match the original file
120     /// or even to be a valid filename for the current platform.
121     pub filename: Option<String>,
122 
123     /// The MIME type (`Content-Type` value) of this file, if supplied by the client.
124     ///
125     /// If this is not supplied, the content-type of the field should default to `text/plain` as
126     /// per [IETF RFC 7578, section 4.4](https://tools.ietf.org/html/rfc7578#section-4.4), but this
127     /// should not be implicitly trusted. This crate makes no attempt to identify or validate
128     /// the content-type of the actual field data.
129     pub content_type: Option<Mime>,
130 }
131 
132 impl FieldHeaders {
133     /// Parse the field headers from the passed `BufRead`, consuming the relevant bytes.
read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError>134     fn read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError> {
135         with_headers(r, Self::parse)?
136     }
137 
parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError>138     fn parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError> {
139         let cont_disp = ContentDisp::parse_required(headers)?;
140 
141         Ok(FieldHeaders {
142             name: cont_disp.field_name.into(),
143             filename: cont_disp.filename,
144             content_type: parse_content_type(headers)?,
145         })
146     }
147 }
148 
149 /// The `Content-Disposition` header.
150 struct ContentDisp {
151     /// The name of the `multipart/form-data` field.
152     field_name: String,
153     /// The optional filename for this field.
154     filename: Option<String>,
155 }
156 
157 impl ContentDisp {
parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError>158     fn parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError> {
159         let header = if let Some(header) = find_header(headers, "Content-Disposition") {
160             header
161         } else {
162             return Err(ParseHeaderError::MissingContentDisposition(
163                 DisplayHeaders(headers).to_string()
164             ));
165         };
166 
167         // Content-Disposition: ?
168         let after_disp_type = match split_once(header.val, ';') {
169             Some((disp_type, after_disp_type)) => {
170                 // assert Content-Disposition: form-data
171                 // but needs to be parsed out to trim the spaces (allowed by spec IIRC)
172                 if disp_type.trim() != "form-data" {
173                     invalid_cont_disp!("unexpected Content-Disposition value", disp_type);
174                 }
175                 after_disp_type
176             },
177             None => invalid_cont_disp!("expected additional data after Content-Disposition type",
178                                        header.val),
179         };
180 
181         // Content-Disposition: form-data; name=?
182         let (field_name, filename) = match get_str_after("name=", ';', after_disp_type) {
183             None => invalid_cont_disp!("expected field name and maybe filename, got",
184                                        after_disp_type),
185             // Content-Disposition: form-data; name={field_name}; filename=?
186             Some((field_name, after_field_name)) => {
187                 let field_name = trim_quotes(field_name);
188                 let filename = get_str_after("filename=", ';', after_field_name)
189                     .map(|(filename, _)| trim_quotes(filename).to_owned());
190                 (field_name, filename)
191             },
192         };
193 
194         Ok(ContentDisp { field_name: field_name.to_owned(), filename })
195     }
196 }
197 
parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError>198 fn parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError> {
199     if let Some(header) = find_header(headers, "Content-Type") {
200         // Boundary parameter will be parsed into the `Mime`
201         debug!("Found Content-Type: {:?}", header.val);
202         Ok(Some(header.val.parse::<Mime>()
203             .map_err(|_| ParseHeaderError::MimeError(header.val.into()))?))
204     } else {
205         Ok(None)
206     }
207 }
208 
209 /// A field in a multipart request with its associated headers and data.
210 #[derive(Debug)]
211 pub struct MultipartField<M: ReadEntry> {
212     /// The headers for this field, including the name, filename, and content-type, if provided.
213     ///
214     /// ### Warning: Values are Client-Provided
215     /// Everything in this struct are values from the client and should be considered **untrustworthy**.
216     /// This crate makes no effort to validate or sanitize any client inputs.
217     pub headers: FieldHeaders,
218 
219     /// The field's data.
220     pub data: MultipartData<M>,
221 }
222 
223 impl<M: ReadEntry> MultipartField<M> {
224     /// Returns `true` if this field has no content-type or the content-type is `text/...`.
225     ///
226     /// This typically means it can be read to a string, but it could still be using an unsupported
227     /// character encoding, so decoding to `String` needs to ensure that the data is valid UTF-8.
228     ///
229     /// Note also that the field contents may be too large to reasonably fit in memory.
230     /// The `.save()` adapter can be used to enforce a size limit.
231     ///
232     /// Detecting character encodings by any means is (currently) beyond the scope of this crate.
is_text(&self) -> bool233     pub fn is_text(&self) -> bool {
234         self.headers.content_type.as_ref().map_or(true, |ct| ct.0 == TopLevel::Text)
235     }
236 
237     /// Read the next entry in the request.
next_entry(self) -> ReadEntryResult<M>238     pub fn next_entry(self) -> ReadEntryResult<M> {
239         self.data.into_inner().read_entry()
240     }
241 
242     /// Update `self` as the next entry.
243     ///
244     /// Returns `Ok(Some(self))` if another entry was read, `Ok(None)` if the end of the body was
245     /// reached, and `Err(e)` for any errors that occur.
next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> where for<'a> &'a mut M: ReadEntry246     pub fn next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> where for<'a> &'a mut M: ReadEntry {
247         let multipart = self.data.take_inner();
248 
249         match multipart.read_entry() {
250             Entry(entry) => {
251                 *self = entry;
252                 Ok(Some(self))
253             },
254             End(multipart) => {
255                 self.data.give_inner(multipart);
256                 Ok(None)
257             },
258             Error(multipart, err) => {
259                 self.data.give_inner(multipart);
260                 Err(err)
261             }
262         }
263     }
264 }
265 
266 /// The data of a field in a `multipart/form-data` request.
267 ///
268 /// You can read it to EOF, or use the `save()` adaptor to save it to disk/memory.
269 #[derive(Debug)]
270 pub struct MultipartData<M> {
271     inner: Option<M>,
272 }
273 
274 const DATA_INNER_ERR: &str = "MultipartFile::inner taken and not replaced; this is likely \
275                               caused by a logic error in `multipart` or by resuming after \
276                               a previously caught panic.\nPlease open an issue with the \
277                               relevant backtrace and debug logs at \
278                               https://github.com/abonander/multipart";
279 
280 impl<M> MultipartData<M> where M: ReadEntry {
281     /// Get a builder type which can save the field with or without a size limit.
save(&mut self) -> SaveBuilder<&mut Self>282     pub fn save(&mut self) -> SaveBuilder<&mut Self> {
283         SaveBuilder::new(self)
284     }
285 
286     /// Take the inner `Multipart` or `&mut Multipart`
into_inner(self) -> M287     pub fn into_inner(self) -> M {
288         self.inner.expect(DATA_INNER_ERR)
289     }
290 
291     /// Set the minimum buffer size that `BufRead::fill_buf(self)` will return
292     /// until the end of the stream is reached. Set this as small as you can tolerate
293     /// to minimize `read()` calls (`read()` won't be called again until the buffer
294     /// is smaller than this).
295     ///
296     /// This value is reset between fields.
set_min_buf_size(&mut self, min_buf_size: usize)297     pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
298         self.inner_mut().set_min_buf_size(min_buf_size)
299     }
300 
inner_mut(&mut self) -> &mut M301     fn inner_mut(&mut self) -> &mut M {
302         self.inner.as_mut().expect(DATA_INNER_ERR)
303     }
304 
take_inner(&mut self) -> M305     fn take_inner(&mut self) -> M {
306         self.inner.take().expect(DATA_INNER_ERR)
307     }
308 
give_inner(&mut self, inner: M)309     fn give_inner(&mut self, inner: M) {
310         self.inner = Some(inner);
311     }
312 }
313 
314 impl<M: ReadEntry> Read for MultipartData<M> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>315     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize>{
316         self.inner_mut().source_mut().read(buf)
317     }
318 }
319 
320 /// In this implementation, `fill_buf()` can return more data with each call.
321 ///
322 /// Use `set_min_buf_size()` if you require a minimum buffer length.
323 impl<M: ReadEntry> BufRead for MultipartData<M> {
fill_buf(&mut self) -> io::Result<&[u8]>324     fn fill_buf(&mut self) -> io::Result<&[u8]> {
325         self.inner_mut().source_mut().fill_buf()
326     }
327 
consume(&mut self, amt: usize)328     fn consume(&mut self, amt: usize) {
329         self.inner_mut().source_mut().consume(amt)
330     }
331 }
332 
split_once(s: &str, delim: char) -> Option<(&str, &str)>333 fn split_once(s: &str, delim: char) -> Option<(&str, &str)> {
334     s.find(delim).map(|idx| s.split_at(idx))
335 }
336 
trim_quotes(s: &str) -> &str337 fn trim_quotes(s: &str) -> &str {
338     s.trim_matches('"')
339 }
340 
341 /// Get the string after `needle` in `haystack`, stopping before `end_val_delim`
get_str_after<'a>(needle: &str, end_val_delim: char, haystack: &'a str) -> Option<(&'a str, &'a str)>342 fn get_str_after<'a>(needle: &str, end_val_delim: char, haystack: &'a str) -> Option<(&'a str, &'a str)> {
343     let val_start_idx = try_opt!(haystack.find(needle)) + needle.len();
344     let val_end_idx = haystack[val_start_idx..].find(end_val_delim)
345         .map_or(haystack.len(), |end_idx| end_idx + val_start_idx);
346     Some((&haystack[val_start_idx..val_end_idx], &haystack[val_end_idx..]))
347 }
348 
io_str_utf8(buf: &[u8]) -> io::Result<&str>349 fn io_str_utf8(buf: &[u8]) -> io::Result<&str> {
350     str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
351 }
352 
find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>>353 fn find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>> {
354     // Field names are case insensitive and consist of ASCII characters
355     // only (see https://tools.ietf.org/html/rfc822#section-3.2).
356     headers.iter().find(|header| header.name.eq_ignore_ascii_case(name))
357 }
358 
359 /// Common trait for `Multipart` and `&mut Multipart`
360 pub trait ReadEntry: PrivReadEntry + Sized {
361     /// Attempt to read the next entry in the multipart stream.
read_entry(mut self) -> ReadEntryResult<Self>362     fn read_entry(mut self) -> ReadEntryResult<Self> {
363         self.set_min_buf_size(super::boundary::MIN_BUF_SIZE);
364 
365         debug!("ReadEntry::read_entry()");
366 
367         if !try_read_entry!(self; self.consume_boundary()) {
368             return End(self);
369         }
370 
371         let field_headers: FieldHeaders = try_read_entry!(self; self.read_headers());
372 
373         if let Some(ct) = field_headers.content_type.as_ref() {
374             if ct.0 == TopLevel::Multipart {
375                 // fields of this type are sent by (supposedly) no known clients
376                 // (https://tools.ietf.org/html/rfc7578#appendix-A) so I'd be fascinated
377                 // to hear about any in the wild
378                 info!("Found nested multipart field: {:?}:\r\n\
379                        Please report this client's User-Agent and any other available details \
380                        at https://github.com/abonander/multipart/issues/56",
381                        field_headers);
382             }
383         }
384 
385         Entry(
386             MultipartField {
387                 headers: field_headers,
388                 data: MultipartData {
389                     inner: Some(self),
390                 },
391             }
392         )
393     }
394 
395     /// Equivalent to `read_entry()` but takes `&mut self`
read_entry_mut(&mut self) -> ReadEntryResult<&mut Self>396     fn read_entry_mut(&mut self) -> ReadEntryResult<&mut Self> {
397         ReadEntry::read_entry(self)
398     }
399 }
400 
401 impl<T> ReadEntry for T where T: PrivReadEntry {}
402 
403 /// Public trait but not re-exported.
404 pub trait PrivReadEntry {
405     type Source: BufRead;
406 
source_mut(&mut self) -> &mut Self::Source407     fn source_mut(&mut self) -> &mut Self::Source;
408 
set_min_buf_size(&mut self, min_buf_size: usize)409     fn set_min_buf_size(&mut self, min_buf_size: usize);
410 
411     /// Consume the next boundary.
412     /// Returns `true` if a field should follow, `false` otherwise.
consume_boundary(&mut self) -> io::Result<bool>413     fn consume_boundary(&mut self) -> io::Result<bool>;
414 
read_headers(&mut self) -> Result<FieldHeaders, io::Error>415     fn read_headers(&mut self) -> Result<FieldHeaders, io::Error> {
416         FieldHeaders::read_from(self.source_mut())
417             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
418     }
419 
read_to_string(&mut self) -> io::Result<String>420     fn read_to_string(&mut self) -> io::Result<String> {
421         let mut buf = String::new();
422 
423         match self.source_mut().read_to_string(&mut buf) {
424             Ok(_) => Ok(buf),
425             Err(err) => Err(err),
426         }
427     }
428 }
429 
430 impl<'a, M: ReadEntry> PrivReadEntry for &'a mut M {
431     type Source = M::Source;
432 
source_mut(&mut self) -> &mut M::Source433     fn source_mut(&mut self) -> &mut M::Source {
434         (**self).source_mut()
435     }
436 
set_min_buf_size(&mut self, min_buf_size: usize)437     fn set_min_buf_size(&mut self, min_buf_size: usize) {
438         (**self).set_min_buf_size(min_buf_size)
439     }
440 
consume_boundary(&mut self) -> io::Result<bool>441     fn consume_boundary(&mut self) -> io::Result<bool> {
442         (**self).consume_boundary()
443     }
444 }
445 
446 /// Ternary result type returned by `ReadEntry::next_entry()`,
447 /// `Multipart::into_entry()` and `MultipartField::next_entry()`.
448 pub enum ReadEntryResult<M: ReadEntry, Entry = MultipartField<M>> {
449     /// The next entry was found.
450     Entry(Entry),
451     /// No  more entries could be read.
452     End(M),
453     /// An error occurred.
454     Error(M, io::Error),
455 }
456 
457 impl<M: ReadEntry, Entry> ReadEntryResult<M, Entry> {
458     /// Convert `self` into `Result<Option<Entry>>` as follows:
459     ///
460     /// * `Entry(entry) -> Ok(Some(entry))`
461     /// * `End(_) -> Ok(None)`
462     /// * `Error(_, err) -> Err(err)`
into_result(self) -> io::Result<Option<Entry>>463     pub fn into_result(self) -> io::Result<Option<Entry>> {
464         match self {
465             ReadEntryResult::Entry(entry) => Ok(Some(entry)),
466             ReadEntryResult::End(_) => Ok(None),
467             ReadEntryResult::Error(_, err) => Err(err),
468         }
469     }
470 
471     /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
unwrap(self) -> Entry472     pub fn unwrap(self) -> Entry {
473         self.expect_alt("`ReadEntryResult::unwrap()` called on `End` value",
474                         "`ReadEntryResult::unwrap()` called on `Error` value: {:?}")
475     }
476 
477     /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`
478     /// with the given message. Adds the error's message in the `Error` case.
expect(self, msg: &str) -> Entry479     pub fn expect(self, msg: &str) -> Entry {
480         self.expect_alt(msg, msg)
481     }
482 
483     /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
484     /// If this is `End`, panics with `end_msg`; if `Error`, panics with `err_msg`
485     /// as well as the error's message.
expect_alt(self, end_msg: &str, err_msg: &str) -> Entry486     pub fn expect_alt(self, end_msg: &str, err_msg: &str) -> Entry {
487         match self {
488             Entry(entry) => entry,
489             End(_) => panic!("{}", end_msg),
490             Error(_, err) => panic!("{}: {:?}", err_msg, err),
491         }
492     }
493 
494     /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case.
unwrap_opt(self) -> Option<Entry>495     pub fn unwrap_opt(self) -> Option<Entry> {
496         self.expect_opt("`ReadEntryResult::unwrap_opt()` called on `Error` value")
497     }
498 
499     /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case
500     /// with the given message as well as the error's message.
expect_opt(self, msg: &str) -> Option<Entry>501     pub fn expect_opt(self, msg: &str) -> Option<Entry> {
502         match self {
503             Entry(entry) => Some(entry),
504             End(_) => None,
505             Error(_, err) => panic!("{}: {:?}", msg, err),
506         }
507     }
508 }
509 
510 const GENERIC_PARSE_ERR: &str = "an error occurred while parsing field headers";
511 
512 quick_error! {
513     #[derive(Debug)]
514     enum ParseHeaderError {
515         /// The `Content-Disposition` header was not found
516         MissingContentDisposition(headers: String) {
517             display(x) -> ("{}:\n{}", x.description(), headers)
518             description("\"Content-Disposition\" header not found in field headers")
519         }
520         InvalidContDisp(reason: &'static str, cause: String) {
521             display(x) -> ("{}: {}: {}", x.description(), reason, cause)
522             description("invalid \"Content-Disposition\" header")
523         }
524         /// The header was found but could not be parsed
525         TokenizeError(err: HttparseError) {
526             description(GENERIC_PARSE_ERR)
527             display(x) -> ("{}: {}", x.description(), err)
528             cause(err)
529             from()
530         }
531         MimeError(cont_type: String) {
532             description("Failed to parse Content-Type")
533             display(this) -> ("{}: {}", this.description(), cont_type)
534         }
535         TooLarge {
536             description("field headers section ridiculously long or missing trailing CRLF-CRLF")
537         }
538         /// IO error
539         Io(err: io::Error) {
540             description("an io error occurred while parsing the headers")
541             display(x) -> ("{}: {}", x.description(), err)
542             cause(err)
543             from()
544         }
545     }
546 }
547 
548 #[test]
test_find_header()549 fn test_find_header() {
550     let headers = [
551         StrHeader { name: "Content-Type", val: "text/plain" },
552         StrHeader { name: "Content-disposition", val: "form-data" },
553         StrHeader { name: "content-transfer-encoding", val: "binary" }
554     ];
555 
556     assert_eq!(find_header(&headers, "Content-Type").unwrap().val, "text/plain");
557     assert_eq!(find_header(&headers, "Content-Disposition").unwrap().val, "form-data");
558     assert_eq!(find_header(&headers, "Content-Transfer-Encoding").unwrap().val, "binary");
559 }
560