1 // Copyright 2016 `multipart` Crate Developers
2 //
3 // Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 // http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 // http://opensource.org/licenses/MIT>, at your option. This file may not be
6 // copied, modified, or distributed except according to those terms.
7
8 //! `multipart` field header parsing.
9 use mime::{Mime, TopLevel, SubLevel};
10
11 use quick_error::ResultExt;
12
13 use std::error::Error;
14 use std::io::{self, Read, BufRead};
15 use std::{str, fmt};
16
17 // The AsciiExt import is needed for Rust older than 1.23.0. These two lines can
18 // be removed when supporting older Rust is no longer needed.
19 #[allow(deprecated, unused_imports)]
20 use std::ascii::AsciiExt;
21
22 use super::httparse::{self, EMPTY_HEADER, Header, Status, Error as HttparseError};
23
24 use self::ReadEntryResult::*;
25
26 use super::save::SaveBuilder;
27
28 use super::ArcStr;
29
30 const EMPTY_STR_HEADER: StrHeader<'static> = StrHeader {
31 name: "",
32 val: "",
33 };
34
35 macro_rules! invalid_cont_disp {
36 ($reason: expr, $cause: expr) => {
37 return Err(
38 ParseHeaderError::InvalidContDisp($reason, $cause.to_string())
39 );
40 }
41 }
42
43 /// Not exposed
44 #[derive(Copy, Clone, Debug)]
45 pub struct StrHeader<'a> {
46 name: &'a str,
47 val: &'a str,
48 }
49
50 struct DisplayHeaders<'s, 'a: 's>(&'s [StrHeader<'a>]);
51
52 impl <'s, 'a: 's> fmt::Display for DisplayHeaders<'s, 'a> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result53 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54 for hdr in self.0 {
55 writeln!(f, "{}: {}", hdr.name, hdr.val)?;
56 }
57
58 Ok(())
59 }
60 }
61
with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError> where R: BufRead, F: FnOnce(&[StrHeader]) -> Ret62 fn with_headers<R, F, Ret>(r: &mut R, closure: F) -> Result<Ret, ParseHeaderError>
63 where R: BufRead, F: FnOnce(&[StrHeader]) -> Ret {
64 const HEADER_LEN: usize = 4;
65
66 let consume;
67 let ret;
68
69 let mut last_len = 0;
70
71 loop {
72 // this should return a larger buffer each time
73 let buf = r.fill_buf()?;
74
75 // buffer has stopped growing
76 if buf.len() == last_len {
77 return Err(ParseHeaderError::TooLarge);
78 }
79
80 let mut raw_headers = [EMPTY_HEADER; HEADER_LEN];
81
82 match httparse::parse_headers(buf, &mut raw_headers)? {
83 // read more and try again
84 Status::Partial => last_len = buf.len(),
85 Status::Complete((consume_, raw_headers)) => {
86 let mut headers = [EMPTY_STR_HEADER; HEADER_LEN];
87 let headers = copy_headers(raw_headers, &mut headers)?;
88 debug!("Parsed headers: {:?}", headers);
89 consume = consume_;
90 ret = closure(headers);
91 break;
92 },
93 }
94 }
95
96 r.consume(consume);
97 Ok(ret)
98 }
99
copy_headers<'h, 'b: 'h>(raw: &[Header<'b>], headers: &'h mut [StrHeader<'b>]) -> io::Result<&'h [StrHeader<'b>]>100 fn copy_headers<'h, 'b: 'h>(raw: &[Header<'b>], headers: &'h mut [StrHeader<'b>]) -> io::Result<&'h [StrHeader<'b>]> {
101 for (raw, header) in raw.iter().zip(&mut *headers) {
102 header.name = raw.name;
103 header.val = io_str_utf8(raw.value)?;
104 }
105
106 Ok(&headers[..raw.len()])
107 }
108
109 /// The headers that (may) appear before a `multipart/form-data` field.
110 ///
111 /// ### Warning: Values are Client-Provided
112 /// Everything in this struct are values from the client and should be considered **untrustworthy**.
113 /// This crate makes no effort to validate or sanitize any client inputs.
114 #[derive(Clone, Debug)]
115 pub struct FieldHeaders {
116 /// The field's name from the form.
117 pub name: ArcStr,
118
119 /// The filename of this entry, if supplied. This is not guaranteed to match the original file
120 /// or even to be a valid filename for the current platform.
121 pub filename: Option<String>,
122
123 /// The MIME type (`Content-Type` value) of this file, if supplied by the client.
124 ///
125 /// If this is not supplied, the content-type of the field should default to `text/plain` as
126 /// per [IETF RFC 7578, section 4.4](https://tools.ietf.org/html/rfc7578#section-4.4), but this
127 /// should not be implicitly trusted. This crate makes no attempt to identify or validate
128 /// the content-type of the actual field data.
129 pub content_type: Option<Mime>,
130 }
131
132 impl FieldHeaders {
133 /// Parse the field headers from the passed `BufRead`, consuming the relevant bytes.
read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError>134 fn read_from<R: BufRead>(r: &mut R) -> Result<Self, ParseHeaderError> {
135 with_headers(r, Self::parse)?
136 }
137
parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError>138 fn parse(headers: &[StrHeader]) -> Result<FieldHeaders, ParseHeaderError> {
139 let cont_disp = ContentDisp::parse_required(headers)?;
140
141 Ok(FieldHeaders {
142 name: cont_disp.field_name.into(),
143 filename: cont_disp.filename,
144 content_type: parse_content_type(headers)?,
145 })
146 }
147 }
148
149 /// The `Content-Disposition` header.
150 struct ContentDisp {
151 /// The name of the `multipart/form-data` field.
152 field_name: String,
153 /// The optional filename for this field.
154 filename: Option<String>,
155 }
156
157 impl ContentDisp {
parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError>158 fn parse_required(headers: &[StrHeader]) -> Result<ContentDisp, ParseHeaderError> {
159 let header = if let Some(header) = find_header(headers, "Content-Disposition") {
160 header
161 } else {
162 return Err(ParseHeaderError::MissingContentDisposition(
163 DisplayHeaders(headers).to_string()
164 ));
165 };
166
167 // Content-Disposition: ?
168 let after_disp_type = match split_once(header.val, ';') {
169 Some((disp_type, after_disp_type)) => {
170 // assert Content-Disposition: form-data
171 // but needs to be parsed out to trim the spaces (allowed by spec IIRC)
172 if disp_type.trim() != "form-data" {
173 invalid_cont_disp!("unexpected Content-Disposition value", disp_type);
174 }
175 after_disp_type
176 },
177 None => invalid_cont_disp!("expected additional data after Content-Disposition type",
178 header.val),
179 };
180
181 // Content-Disposition: form-data; name=?
182 let (field_name, filename) = match get_str_after("name=", ';', after_disp_type) {
183 None => invalid_cont_disp!("expected field name and maybe filename, got",
184 after_disp_type),
185 // Content-Disposition: form-data; name={field_name}; filename=?
186 Some((field_name, after_field_name)) => {
187 let field_name = trim_quotes(field_name);
188 let filename = get_str_after("filename=", ';', after_field_name)
189 .map(|(filename, _)| trim_quotes(filename).to_owned());
190 (field_name, filename)
191 },
192 };
193
194 Ok(ContentDisp { field_name: field_name.to_owned(), filename })
195 }
196 }
197
parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError>198 fn parse_content_type(headers: &[StrHeader]) -> Result<Option<Mime>, ParseHeaderError> {
199 if let Some(header) = find_header(headers, "Content-Type") {
200 // Boundary parameter will be parsed into the `Mime`
201 debug!("Found Content-Type: {:?}", header.val);
202 Ok(Some(header.val.parse::<Mime>()
203 .map_err(|_| ParseHeaderError::MimeError(header.val.into()))?))
204 } else {
205 Ok(None)
206 }
207 }
208
209 /// A field in a multipart request with its associated headers and data.
210 #[derive(Debug)]
211 pub struct MultipartField<M: ReadEntry> {
212 /// The headers for this field, including the name, filename, and content-type, if provided.
213 ///
214 /// ### Warning: Values are Client-Provided
215 /// Everything in this struct are values from the client and should be considered **untrustworthy**.
216 /// This crate makes no effort to validate or sanitize any client inputs.
217 pub headers: FieldHeaders,
218
219 /// The field's data.
220 pub data: MultipartData<M>,
221 }
222
223 impl<M: ReadEntry> MultipartField<M> {
224 /// Returns `true` if this field has no content-type or the content-type is `text/...`.
225 ///
226 /// This typically means it can be read to a string, but it could still be using an unsupported
227 /// character encoding, so decoding to `String` needs to ensure that the data is valid UTF-8.
228 ///
229 /// Note also that the field contents may be too large to reasonably fit in memory.
230 /// The `.save()` adapter can be used to enforce a size limit.
231 ///
232 /// Detecting character encodings by any means is (currently) beyond the scope of this crate.
is_text(&self) -> bool233 pub fn is_text(&self) -> bool {
234 self.headers.content_type.as_ref().map_or(true, |ct| ct.0 == TopLevel::Text)
235 }
236
237 /// Read the next entry in the request.
next_entry(self) -> ReadEntryResult<M>238 pub fn next_entry(self) -> ReadEntryResult<M> {
239 self.data.into_inner().read_entry()
240 }
241
242 /// Update `self` as the next entry.
243 ///
244 /// Returns `Ok(Some(self))` if another entry was read, `Ok(None)` if the end of the body was
245 /// reached, and `Err(e)` for any errors that occur.
next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> where for<'a> &'a mut M: ReadEntry246 pub fn next_entry_inplace(&mut self) -> io::Result<Option<&mut Self>> where for<'a> &'a mut M: ReadEntry {
247 let multipart = self.data.take_inner();
248
249 match multipart.read_entry() {
250 Entry(entry) => {
251 *self = entry;
252 Ok(Some(self))
253 },
254 End(multipart) => {
255 self.data.give_inner(multipart);
256 Ok(None)
257 },
258 Error(multipart, err) => {
259 self.data.give_inner(multipart);
260 Err(err)
261 }
262 }
263 }
264 }
265
266 /// The data of a field in a `multipart/form-data` request.
267 ///
268 /// You can read it to EOF, or use the `save()` adaptor to save it to disk/memory.
269 #[derive(Debug)]
270 pub struct MultipartData<M> {
271 inner: Option<M>,
272 }
273
274 const DATA_INNER_ERR: &str = "MultipartFile::inner taken and not replaced; this is likely \
275 caused by a logic error in `multipart` or by resuming after \
276 a previously caught panic.\nPlease open an issue with the \
277 relevant backtrace and debug logs at \
278 https://github.com/abonander/multipart";
279
280 impl<M> MultipartData<M> where M: ReadEntry {
281 /// Get a builder type which can save the field with or without a size limit.
save(&mut self) -> SaveBuilder<&mut Self>282 pub fn save(&mut self) -> SaveBuilder<&mut Self> {
283 SaveBuilder::new(self)
284 }
285
286 /// Take the inner `Multipart` or `&mut Multipart`
into_inner(self) -> M287 pub fn into_inner(self) -> M {
288 self.inner.expect(DATA_INNER_ERR)
289 }
290
291 /// Set the minimum buffer size that `BufRead::fill_buf(self)` will return
292 /// until the end of the stream is reached. Set this as small as you can tolerate
293 /// to minimize `read()` calls (`read()` won't be called again until the buffer
294 /// is smaller than this).
295 ///
296 /// This value is reset between fields.
set_min_buf_size(&mut self, min_buf_size: usize)297 pub fn set_min_buf_size(&mut self, min_buf_size: usize) {
298 self.inner_mut().set_min_buf_size(min_buf_size)
299 }
300
inner_mut(&mut self) -> &mut M301 fn inner_mut(&mut self) -> &mut M {
302 self.inner.as_mut().expect(DATA_INNER_ERR)
303 }
304
take_inner(&mut self) -> M305 fn take_inner(&mut self) -> M {
306 self.inner.take().expect(DATA_INNER_ERR)
307 }
308
give_inner(&mut self, inner: M)309 fn give_inner(&mut self, inner: M) {
310 self.inner = Some(inner);
311 }
312 }
313
314 impl<M: ReadEntry> Read for MultipartData<M> {
read(&mut self, buf: &mut [u8]) -> io::Result<usize>315 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize>{
316 self.inner_mut().source_mut().read(buf)
317 }
318 }
319
320 /// In this implementation, `fill_buf()` can return more data with each call.
321 ///
322 /// Use `set_min_buf_size()` if you require a minimum buffer length.
323 impl<M: ReadEntry> BufRead for MultipartData<M> {
fill_buf(&mut self) -> io::Result<&[u8]>324 fn fill_buf(&mut self) -> io::Result<&[u8]> {
325 self.inner_mut().source_mut().fill_buf()
326 }
327
consume(&mut self, amt: usize)328 fn consume(&mut self, amt: usize) {
329 self.inner_mut().source_mut().consume(amt)
330 }
331 }
332
split_once(s: &str, delim: char) -> Option<(&str, &str)>333 fn split_once(s: &str, delim: char) -> Option<(&str, &str)> {
334 s.find(delim).map(|idx| s.split_at(idx))
335 }
336
trim_quotes(s: &str) -> &str337 fn trim_quotes(s: &str) -> &str {
338 s.trim_matches('"')
339 }
340
341 /// Get the string after `needle` in `haystack`, stopping before `end_val_delim`
get_str_after<'a>(needle: &str, end_val_delim: char, haystack: &'a str) -> Option<(&'a str, &'a str)>342 fn get_str_after<'a>(needle: &str, end_val_delim: char, haystack: &'a str) -> Option<(&'a str, &'a str)> {
343 let val_start_idx = try_opt!(haystack.find(needle)) + needle.len();
344 let val_end_idx = haystack[val_start_idx..].find(end_val_delim)
345 .map_or(haystack.len(), |end_idx| end_idx + val_start_idx);
346 Some((&haystack[val_start_idx..val_end_idx], &haystack[val_end_idx..]))
347 }
348
io_str_utf8(buf: &[u8]) -> io::Result<&str>349 fn io_str_utf8(buf: &[u8]) -> io::Result<&str> {
350 str::from_utf8(buf).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
351 }
352
find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>>353 fn find_header<'a, 'b>(headers: &'a [StrHeader<'b>], name: &str) -> Option<&'a StrHeader<'b>> {
354 // Field names are case insensitive and consist of ASCII characters
355 // only (see https://tools.ietf.org/html/rfc822#section-3.2).
356 headers.iter().find(|header| header.name.eq_ignore_ascii_case(name))
357 }
358
359 /// Common trait for `Multipart` and `&mut Multipart`
360 pub trait ReadEntry: PrivReadEntry + Sized {
361 /// Attempt to read the next entry in the multipart stream.
read_entry(mut self) -> ReadEntryResult<Self>362 fn read_entry(mut self) -> ReadEntryResult<Self> {
363 self.set_min_buf_size(super::boundary::MIN_BUF_SIZE);
364
365 debug!("ReadEntry::read_entry()");
366
367 if !try_read_entry!(self; self.consume_boundary()) {
368 return End(self);
369 }
370
371 let field_headers: FieldHeaders = try_read_entry!(self; self.read_headers());
372
373 if let Some(ct) = field_headers.content_type.as_ref() {
374 if ct.0 == TopLevel::Multipart {
375 // fields of this type are sent by (supposedly) no known clients
376 // (https://tools.ietf.org/html/rfc7578#appendix-A) so I'd be fascinated
377 // to hear about any in the wild
378 info!("Found nested multipart field: {:?}:\r\n\
379 Please report this client's User-Agent and any other available details \
380 at https://github.com/abonander/multipart/issues/56",
381 field_headers);
382 }
383 }
384
385 Entry(
386 MultipartField {
387 headers: field_headers,
388 data: MultipartData {
389 inner: Some(self),
390 },
391 }
392 )
393 }
394
395 /// Equivalent to `read_entry()` but takes `&mut self`
read_entry_mut(&mut self) -> ReadEntryResult<&mut Self>396 fn read_entry_mut(&mut self) -> ReadEntryResult<&mut Self> {
397 ReadEntry::read_entry(self)
398 }
399 }
400
401 impl<T> ReadEntry for T where T: PrivReadEntry {}
402
403 /// Public trait but not re-exported.
404 pub trait PrivReadEntry {
405 type Source: BufRead;
406
source_mut(&mut self) -> &mut Self::Source407 fn source_mut(&mut self) -> &mut Self::Source;
408
set_min_buf_size(&mut self, min_buf_size: usize)409 fn set_min_buf_size(&mut self, min_buf_size: usize);
410
411 /// Consume the next boundary.
412 /// Returns `true` if a field should follow, `false` otherwise.
consume_boundary(&mut self) -> io::Result<bool>413 fn consume_boundary(&mut self) -> io::Result<bool>;
414
read_headers(&mut self) -> Result<FieldHeaders, io::Error>415 fn read_headers(&mut self) -> Result<FieldHeaders, io::Error> {
416 FieldHeaders::read_from(self.source_mut())
417 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
418 }
419
read_to_string(&mut self) -> io::Result<String>420 fn read_to_string(&mut self) -> io::Result<String> {
421 let mut buf = String::new();
422
423 match self.source_mut().read_to_string(&mut buf) {
424 Ok(_) => Ok(buf),
425 Err(err) => Err(err),
426 }
427 }
428 }
429
430 impl<'a, M: ReadEntry> PrivReadEntry for &'a mut M {
431 type Source = M::Source;
432
source_mut(&mut self) -> &mut M::Source433 fn source_mut(&mut self) -> &mut M::Source {
434 (**self).source_mut()
435 }
436
set_min_buf_size(&mut self, min_buf_size: usize)437 fn set_min_buf_size(&mut self, min_buf_size: usize) {
438 (**self).set_min_buf_size(min_buf_size)
439 }
440
consume_boundary(&mut self) -> io::Result<bool>441 fn consume_boundary(&mut self) -> io::Result<bool> {
442 (**self).consume_boundary()
443 }
444 }
445
446 /// Ternary result type returned by `ReadEntry::next_entry()`,
447 /// `Multipart::into_entry()` and `MultipartField::next_entry()`.
448 pub enum ReadEntryResult<M: ReadEntry, Entry = MultipartField<M>> {
449 /// The next entry was found.
450 Entry(Entry),
451 /// No more entries could be read.
452 End(M),
453 /// An error occurred.
454 Error(M, io::Error),
455 }
456
457 impl<M: ReadEntry, Entry> ReadEntryResult<M, Entry> {
458 /// Convert `self` into `Result<Option<Entry>>` as follows:
459 ///
460 /// * `Entry(entry) -> Ok(Some(entry))`
461 /// * `End(_) -> Ok(None)`
462 /// * `Error(_, err) -> Err(err)`
into_result(self) -> io::Result<Option<Entry>>463 pub fn into_result(self) -> io::Result<Option<Entry>> {
464 match self {
465 ReadEntryResult::Entry(entry) => Ok(Some(entry)),
466 ReadEntryResult::End(_) => Ok(None),
467 ReadEntryResult::Error(_, err) => Err(err),
468 }
469 }
470
471 /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
unwrap(self) -> Entry472 pub fn unwrap(self) -> Entry {
473 self.expect_alt("`ReadEntryResult::unwrap()` called on `End` value",
474 "`ReadEntryResult::unwrap()` called on `Error` value: {:?}")
475 }
476
477 /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`
478 /// with the given message. Adds the error's message in the `Error` case.
expect(self, msg: &str) -> Entry479 pub fn expect(self, msg: &str) -> Entry {
480 self.expect_alt(msg, msg)
481 }
482
483 /// Attempt to unwrap `Entry`, panicking if this is `End` or `Error`.
484 /// If this is `End`, panics with `end_msg`; if `Error`, panics with `err_msg`
485 /// as well as the error's message.
expect_alt(self, end_msg: &str, err_msg: &str) -> Entry486 pub fn expect_alt(self, end_msg: &str, err_msg: &str) -> Entry {
487 match self {
488 Entry(entry) => entry,
489 End(_) => panic!("{}", end_msg),
490 Error(_, err) => panic!("{}: {:?}", err_msg, err),
491 }
492 }
493
494 /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case.
unwrap_opt(self) -> Option<Entry>495 pub fn unwrap_opt(self) -> Option<Entry> {
496 self.expect_opt("`ReadEntryResult::unwrap_opt()` called on `Error` value")
497 }
498
499 /// Attempt to unwrap as `Option<Entry>`, panicking in the `Error` case
500 /// with the given message as well as the error's message.
expect_opt(self, msg: &str) -> Option<Entry>501 pub fn expect_opt(self, msg: &str) -> Option<Entry> {
502 match self {
503 Entry(entry) => Some(entry),
504 End(_) => None,
505 Error(_, err) => panic!("{}: {:?}", msg, err),
506 }
507 }
508 }
509
510 const GENERIC_PARSE_ERR: &str = "an error occurred while parsing field headers";
511
512 quick_error! {
513 #[derive(Debug)]
514 enum ParseHeaderError {
515 /// The `Content-Disposition` header was not found
516 MissingContentDisposition(headers: String) {
517 display(x) -> ("{}:\n{}", x.description(), headers)
518 description("\"Content-Disposition\" header not found in field headers")
519 }
520 InvalidContDisp(reason: &'static str, cause: String) {
521 display(x) -> ("{}: {}: {}", x.description(), reason, cause)
522 description("invalid \"Content-Disposition\" header")
523 }
524 /// The header was found but could not be parsed
525 TokenizeError(err: HttparseError) {
526 description(GENERIC_PARSE_ERR)
527 display(x) -> ("{}: {}", x.description(), err)
528 cause(err)
529 from()
530 }
531 MimeError(cont_type: String) {
532 description("Failed to parse Content-Type")
533 display(this) -> ("{}: {}", this.description(), cont_type)
534 }
535 TooLarge {
536 description("field headers section ridiculously long or missing trailing CRLF-CRLF")
537 }
538 /// IO error
539 Io(err: io::Error) {
540 description("an io error occurred while parsing the headers")
541 display(x) -> ("{}: {}", x.description(), err)
542 cause(err)
543 from()
544 }
545 }
546 }
547
548 #[test]
test_find_header()549 fn test_find_header() {
550 let headers = [
551 StrHeader { name: "Content-Type", val: "text/plain" },
552 StrHeader { name: "Content-disposition", val: "form-data" },
553 StrHeader { name: "content-transfer-encoding", val: "binary" }
554 ];
555
556 assert_eq!(find_header(&headers, "Content-Type").unwrap().val, "text/plain");
557 assert_eq!(find_header(&headers, "Content-Disposition").unwrap().val, "form-data");
558 assert_eq!(find_header(&headers, "Content-Transfer-Encoding").unwrap().val, "binary");
559 }
560