1 //! # Mime
2 //!
3 //! Mime is now Media Type, technically, but `Mime` is more immediately
4 //! understandable, so the main type here is `Mime`.
5 //!
6 //! ## What is Mime?
7 //!
8 //! Example mime string: `text/plain;charset=utf-8`
9 //!
10 //! ```rust
11 //! # #[macro_use] extern crate mime;
12 //! # fn main() {
13 //! let plain_text: mime::Mime = "text/plain;charset=utf-8".parse().unwrap();
14 //! assert_eq!(plain_text, mime!(Text/Plain; Charset=Utf8));
15 //! # }
16 //! ```
17 
18 #![doc(html_root_url = "https://hyperium.github.io/mime.rs")]
19 #![cfg_attr(test, deny(warnings))]
20 #![cfg_attr(all(feature = "nightly", test), feature(test))]
21 
22 #[macro_use]
23 extern crate log;
24 
25 #[cfg(feature = "nightly")]
26 #[cfg(test)]
27 extern crate test;
28 
29 #[cfg(feature = "serde")]
30 extern crate serde;
31 
32 #[cfg(feature = "serde")]
33 #[cfg(test)]
34 extern crate serde_json;
35 
36 #[cfg(feature = "heapsize")]
37 extern crate heapsize;
38 
39 use std::ascii::AsciiExt;
40 use std::fmt;
41 use std::iter::Enumerate;
42 use std::str::{FromStr, Chars};
43 
44 /// Mime, or Media Type. Encapsulates common registers types.
45 ///
46 /// Consider that a traditional mime type contains a "top level type",
47 /// a "sub level type", and 0-N "parameters". And they're all strings.
48 /// Strings everywhere. Strings mean typos. Rust has type safety. We should
49 /// use types!
50 ///
51 /// So, Mime bundles together this data into types so the compiler can catch
52 /// your typos.
53 ///
54 /// This improves things so you use match without Strings:
55 ///
56 /// ```rust
57 /// use mime::{Mime, TopLevel, SubLevel};
58 ///
59 /// let mime: Mime = "application/json".parse().unwrap();
60 ///
61 /// match mime {
62 ///     Mime(TopLevel::Application, SubLevel::Json, _) => println!("matched json!"),
63 ///     _ => ()
64 /// }
65 /// ```
66 #[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd)]
67 pub struct Mime<T: AsRef<[Param]> = Vec<Param>>(pub TopLevel, pub SubLevel, pub T);
68 
69 #[cfg(feature = "heapsize")]
70 impl<T: AsRef<[Param]> + heapsize::HeapSizeOf> heapsize::HeapSizeOf for Mime<T> {
heap_size_of_children(&self) -> usize71     fn heap_size_of_children(&self) -> usize {
72         self.0.heap_size_of_children() +
73         self.1.heap_size_of_children() +
74         self.2.heap_size_of_children()
75     }
76 }
77 
78 impl<LHS: AsRef<[Param]>, RHS: AsRef<[Param]>> PartialEq<Mime<RHS>> for Mime<LHS> {
79     #[inline]
eq(&self, other: &Mime<RHS>) -> bool80     fn eq(&self, other: &Mime<RHS>) -> bool {
81         self.0 == other.0 && self.1 == other.1 && self.2.as_ref() == other.2.as_ref()
82     }
83 }
84 
85 /// Easily create a Mime without having to import so many enums.
86 ///
87 /// # Example
88 ///
89 /// ```
90 /// # #[macro_use] extern crate mime;
91 ///
92 /// # fn main() {
93 /// let json = mime!(Application/Json);
94 /// let plain = mime!(Text/Plain; Charset=Utf8);
95 /// let text = mime!(Text/Html; Charset=("bar"), ("baz")=("quux"));
96 /// let img = mime!(Image/_);
97 /// # }
98 /// ```
99 #[macro_export]
100 macro_rules! mime {
101     ($top:tt / $sub:tt) => (
102         mime!($top / $sub;)
103     );
104 
105     ($top:tt / $sub:tt ; $($attr:tt = $val:tt),*) => (
106         $crate::Mime(
107             __mime__ident_or_ext!(TopLevel::$top),
108             __mime__ident_or_ext!(SubLevel::$sub),
109             vec![ $((__mime__ident_or_ext!(Attr::$attr), __mime__ident_or_ext!(Value::$val))),* ]
110         )
111     );
112 }
113 
114 #[doc(hidden)]
115 #[macro_export]
116 macro_rules! __mime__ident_or_ext {
117     ($enoom:ident::_) => (
118         $crate::$enoom::Star
119     );
120     ($enoom:ident::($inner:expr)) => (
121         $crate::$enoom::Ext($inner.to_string())
122     );
123     ($enoom:ident::$var:ident) => (
124         $crate::$enoom::$var
125     )
126 }
127 
128 macro_rules! enoom {
129     (pub enum $en:ident; $ext:ident; $($ty:ident, $text:expr;)*) => (
130 
131         #[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd)]
132         pub enum $en {
133             $($ty),*,
134             $ext(String)
135         }
136 
137         impl $en {
138             pub fn as_str(&self) -> &str {
139                 match *self {
140                     $($en::$ty => $text),*,
141                     $en::$ext(ref s) => &s
142                 }
143             }
144         }
145 
146         impl ::std::ops::Deref for $en {
147             type Target = str;
148             fn deref(&self) -> &str {
149                 self.as_str()
150             }
151         }
152 
153         impl PartialEq for $en {
154             #[inline]
155             fn eq(&self, other: &$en) -> bool {
156                 match (self, other) {
157                     $( (&$en::$ty, &$en::$ty) => true ),*,
158                     (&$en::$ext(ref a), &$en::$ext(ref b)) => a == b,
159                     (_, _) => self.as_str() == other.as_str(),
160                 }
161             }
162         }
163 
164         impl PartialEq<String> for $en {
165             fn eq(&self, other: &String) -> bool {
166                 self.as_str() == other
167             }
168         }
169 
170         impl PartialEq<str> for $en {
171             fn eq(&self, other: &str) -> bool {
172                 self.as_str() == other
173             }
174         }
175 
176         impl<'a> PartialEq<&'a str> for $en {
177             fn eq(&self, other: &&'a str) -> bool {
178                 self.as_str() == *other
179             }
180         }
181 
182         impl PartialEq<$en> for String {
183             fn eq(&self, other: &$en) -> bool {
184                 self == other.as_str()
185             }
186         }
187 
188         impl PartialEq<$en> for str {
189             fn eq(&self, other: &$en) -> bool {
190                 self == other.as_str()
191             }
192         }
193 
194         impl<'a> PartialEq<$en> for &'a str {
195             fn eq(&self, other: &$en) -> bool {
196                 *self == other.as_str()
197             }
198         }
199 
200         impl fmt::Display for $en {
201             #[inline]
202             fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
203                 fmt.write_str(match *self {
204                     $($en::$ty => $text),*,
205                     $en::$ext(ref s) => s
206                 })
207             }
208         }
209 
210         impl FromStr for $en {
211             type Err = ();
212             fn from_str(s: &str) -> Result<$en, ()> {
213                 Ok(match s {
214                     $(_s if _s == $text => $en::$ty),*,
215                     s => $en::$ext(s.to_string())
216                 })
217             }
218         }
219 
220         #[cfg(feature = "heapsize")]
221         impl heapsize::HeapSizeOf for $en {
222             fn heap_size_of_children(&self) -> usize {
223                 match *self {
224                     $en::$ext(ref ext) => ext.heap_size_of_children(),
225                     _ => 0,
226                 }
227             }
228         }
229     )
230 }
231 
232 enoom! {
233     pub enum TopLevel;
234     Ext;
235     Star, "*";
236     Text, "text";
237     Image, "image";
238     Audio, "audio";
239     Video, "video";
240     Application, "application";
241     Multipart, "multipart";
242     Message, "message";
243     Model, "model";
244 }
245 
246 enoom! {
247     pub enum SubLevel;
248     Ext;
249     Star, "*";
250 
251     // common text/*
252     Plain, "plain";
253     Html, "html";
254     Xml, "xml";
255     Javascript, "javascript";
256     Css, "css";
257     EventStream, "event-stream";
258 
259     // common application/*
260     Json, "json";
261     WwwFormUrlEncoded, "x-www-form-urlencoded";
262     Msgpack, "msgpack";
263     OctetStream, "octet-stream";
264 
265     // multipart/*
266     FormData, "form-data";
267 
268     // common image/*
269     Png, "png";
270     Gif, "gif";
271     Bmp, "bmp";
272     Jpeg, "jpeg";
273 
274     // audio/*
275     Mpeg, "mpeg";
276     Mp4, "mp4";
277     Ogg, "ogg";
278 }
279 
280 enoom! {
281     pub enum Attr;
282     Ext;
283     Charset, "charset";
284     Boundary, "boundary";
285     Q, "q";
286 }
287 
288 enoom! {
289     pub enum Value;
290     Ext;
291     Utf8, "utf-8";
292 }
293 
294 pub type Param = (Attr, Value);
295 
296 impl<T: AsRef<[Param]>> fmt::Display for Mime<T> {
297     #[inline]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result298     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
299         // It's much faster to write a single string, as opposed to push
300         // several parts through f.write_str(). So, check for the most common
301         // mime types, and fast track them.
302         if let TopLevel::Text = self.0 {
303             if let SubLevel::Plain = self.1 {
304                 let attrs = self.2.as_ref();
305                 if attrs.len() == 0 {
306                     return f.write_str("text/plain");
307                 } else if &[(Attr::Charset, Value::Utf8)] == attrs {
308                     return f.write_str("text/plain; charset=utf-8");
309                 }
310             }
311         } else if let TopLevel::Application = self.0 {
312             if let SubLevel::Json = self.1 {
313                 let attrs = self.2.as_ref();
314                 if attrs.len() == 0 {
315                     return f.write_str("application/json");
316                 }
317             }
318         } else if let TopLevel::Star = self.0 {
319             if let SubLevel::Star = self.1 {
320                 let attrs = self.2.as_ref();
321                 if attrs.len() == 0 {
322                     return f.write_str("*/*");
323                 }
324             }
325         }
326 
327         // slower general purpose fmt
328         try!(fmt::Display::fmt(&self.0, f));
329         try!(f.write_str("/"));
330         try!(fmt::Display::fmt(&self.1, f));
331         for param in self.2.as_ref() {
332             try!(f.write_str("; "));
333             try!(fmt::Display::fmt(&param.0, f));
334             try!(f.write_str("="));
335             try!(fmt::Display::fmt(&param.1, f));
336         }
337         Ok(())
338     }
339 }
340 
341 impl<P: AsRef<[Param]>> Mime<P> {
get_param<A: PartialEq<Attr>>(&self, attr: A) -> Option<&Value>342     pub fn get_param<A: PartialEq<Attr>>(&self, attr: A) -> Option<&Value> {
343         self.2.as_ref().iter().find(|&&(ref name, _)| attr == *name).map(|&(_, ref value)| value)
344     }
345 }
346 
347 impl FromStr for Mime {
348     type Err = ();
from_str(raw: &str) -> Result<Mime, ()>349     fn from_str(raw: &str) -> Result<Mime, ()> {
350         if raw == "*/*" {
351             return Ok(mime!(Star/Star));
352         }
353 
354         let ascii = raw.to_ascii_lowercase(); // lifetimes :(
355         let len = ascii.len();
356         let mut iter = ascii.chars().enumerate();
357         let mut params = vec![];
358         // toplevel
359         let mut start;
360         let top;
361         loop {
362             match iter.next() {
363                 Some((0, c)) if is_restricted_name_first_char(c) => (),
364                 Some((i, c)) if i > 0 && is_restricted_name_char(c) => (),
365                 Some((i, '/')) if i > 0 => match FromStr::from_str(&ascii[..i]) {
366                     Ok(t) => {
367                         top = t;
368                         start = i + 1;
369                         break;
370                     }
371                     Err(_) => return Err(())
372                 },
373                 _ => return Err(()) // EOF and no toplevel is no Mime
374             };
375 
376         }
377 
378         // sublevel
379         let sub;
380         let mut sub_star = false;
381         loop {
382             match iter.next() {
383                 Some((i, '*')) if i == start => {
384                     sub_star = true;
385                 },
386                 Some((i, c)) if i == start && is_restricted_name_first_char(c) => (),
387                 Some((i, c)) if !sub_star && i > start && is_restricted_name_char(c) => (),
388                 Some((i, ';')) if i > start => match FromStr::from_str(&ascii[start..i]) {
389                     Ok(s) => {
390                         sub = s;
391                         start = i + 1;
392                         break;
393                     }
394                     Err(_) => return Err(())
395                 },
396                 None => match FromStr::from_str(&ascii[start..]) {
397                     Ok(s) => return Ok(Mime(top, s, params)),
398                     Err(_) => return Err(())
399                 },
400                 _ => return Err(())
401             };
402         }
403 
404         // params
405         debug!("starting params, len={}", len);
406         loop {
407             match param_from_str(raw, &ascii, &mut iter, start) {
408                 Some((p, end)) => {
409                     params.push(p);
410                     start = end;
411                     if start >= len {
412                         break;
413                     }
414                 }
415                 None => break
416             }
417         }
418 
419         Ok(Mime(top, sub, params))
420     }
421 }
422 
423 #[cfg(feature = "serde")]
424 impl serde::ser::Serialize for Mime {
serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::ser::Serializer425     fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error>
426         where S: serde::ser::Serializer
427     {
428         serializer.serialize_str(&*format!("{}",self))
429     }
430 }
431 
432 #[cfg(feature = "serde")]
433 impl serde::de::Deserialize for Mime {
deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: serde::de::Deserializer434     fn deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error>
435         where D: serde::de::Deserializer
436     {
437         let string: String = try!(serde::Deserialize::deserialize(deserializer));
438         let mime: Mime = match FromStr::from_str(&*string) {
439             Ok(mime) => mime,
440             Err(_) => return Err(serde::de::Error::custom("Invalid serialized mime")),
441         };
442         Ok(mime)
443     }
444 }
445 
param_from_str(raw: &str, ascii: &str, iter: &mut Enumerate<Chars>, mut start: usize) -> Option<(Param, usize)>446 fn param_from_str(raw: &str, ascii: &str, iter: &mut Enumerate<Chars>, mut start: usize) -> Option<(Param, usize)> {
447     let attr;
448     debug!("param_from_str, start={}", start);
449     loop {
450         match iter.next() {
451             Some((i, ' ')) if i == start => start = i + 1,
452             Some((i, c)) if i == start && is_restricted_name_first_char(c) => (),
453             Some((i, c)) if i > start && is_restricted_name_char(c) => (),
454             Some((i, '=')) if i > start => match FromStr::from_str(&ascii[start..i]) {
455                 Ok(a) => {
456                     attr = a;
457                     start = i + 1;
458                     break;
459                 },
460                 Err(_) => return None
461             },
462             _ => return None
463         }
464     }
465 
466     let value;
467     // values must be restrict-name-char or "anything goes"
468     let mut is_quoted = false;
469 
470     {
471         let substr = |a,b| { if attr==Attr::Charset { &ascii[a..b] } else { &raw[a..b] } };
472         let endstr = |a| { if attr==Attr::Charset { &ascii[a..] } else { &raw[a..] } };
473         loop {
474             match iter.next() {
475                 Some((i, '"')) if i == start => {
476                     debug!("quoted");
477                     is_quoted = true;
478                     start = i + 1;
479                 },
480                 Some((i, c)) if i == start && is_restricted_name_first_char(c) => (),
481                 Some((i, '"')) if i > start && is_quoted => match FromStr::from_str(substr(start,i)) {
482                     Ok(v) => {
483                         value = v;
484                         start = i + 1;
485                         break;
486                     },
487                     Err(_) => return None
488                 },
489                 Some((i, c)) if i > start && is_quoted || is_restricted_name_char(c) => (),
490                 Some((i, ';')) if i > start => match FromStr::from_str(substr(start,i)) {
491                     Ok(v) => {
492                         value = v;
493                         start = i + 1;
494                         break;
495                     },
496                     Err(_) => return None
497                 },
498                 None => match FromStr::from_str(endstr(start)) {
499                     Ok(v) => {
500                         value = v;
501                         start = raw.len();
502                         break;
503                     },
504                     Err(_) => return None
505                 },
506 
507                 _ => return None
508             }
509         }
510     }
511 
512     Some(((attr, value), start))
513 }
514 
515 // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
516 //
517 // > All registered media types MUST be assigned top-level type and
518 // > subtype names.  The combination of these names serves to uniquely
519 // > identify the media type, and the subtype name facet (or the absence
520 // > of one) identifies the registration tree.  Both top-level type and
521 // > subtype names are case-insensitive.
522 // >
523 // > Type and subtype names MUST conform to the following ABNF:
524 // >
525 // >     type-name = restricted-name
526 // >     subtype-name = restricted-name
527 // >
528 // >     restricted-name = restricted-name-first *126restricted-name-chars
529 // >     restricted-name-first  = ALPHA / DIGIT
530 // >     restricted-name-chars  = ALPHA / DIGIT / "!" / "#" /
531 // >                              "$" / "&" / "-" / "^" / "_"
532 // >     restricted-name-chars =/ "." ; Characters before first dot always
533 // >                                  ; specify a facet name
534 // >     restricted-name-chars =/ "+" ; Characters after last plus always
535 // >                                  ; specify a structured syntax suffix
536 //
is_restricted_name_first_char(c: char) -> bool537 fn is_restricted_name_first_char(c: char) -> bool {
538     match c {
539         'a'...'z' |
540         '0'...'9' => true,
541         _ => false
542     }
543 }
544 
is_restricted_name_char(c: char) -> bool545 fn is_restricted_name_char(c: char) -> bool {
546     if is_restricted_name_first_char(c) {
547         true
548     } else {
549         match c {
550             '!' |
551             '#' |
552             '$' |
553             '&' |
554             '-' |
555             '^' |
556             '.' |
557             '+' |
558             '_' => true,
559             _ => false
560         }
561     }
562 }
563 
564 #[cfg(test)]
565 mod tests {
566     use std::str::FromStr;
567     #[cfg(feature = "nightly")]
568     use test::Bencher;
569     use super::{Mime, Value, Attr};
570 
571     #[test]
test_mime_show()572     fn test_mime_show() {
573         let mime = mime!(Text/Plain);
574         assert_eq!(mime.to_string(), "text/plain".to_string());
575         let mime = mime!(Text/Plain; Charset=Utf8);
576         assert_eq!(mime.to_string(), "text/plain; charset=utf-8".to_string());
577     }
578 
579     #[test]
test_mime_from_str()580     fn test_mime_from_str() {
581         assert_eq!(Mime::from_str("text/plain").unwrap(), mime!(Text/Plain));
582         assert_eq!(Mime::from_str("TEXT/PLAIN").unwrap(), mime!(Text/Plain));
583         assert_eq!(Mime::from_str("text/plain; charset=utf-8").unwrap(), mime!(Text/Plain; Charset=Utf8));
584         assert_eq!(Mime::from_str("text/plain;charset=\"utf-8\"").unwrap(), mime!(Text/Plain; Charset=Utf8));
585         assert_eq!(Mime::from_str("text/plain; charset=utf-8; foo=bar").unwrap(),
586             mime!(Text/Plain; Charset=Utf8, ("foo")=("bar")));
587         assert_eq!("*/*".parse::<Mime>().unwrap(), mime!(Star/Star));
588         assert_eq!("image/*".parse::<Mime>().unwrap(), mime!(Image/Star));
589         assert_eq!("text/*; charset=utf-8".parse::<Mime>().unwrap(), mime!(Text/Star; Charset=Utf8));
590         assert!("*/png".parse::<Mime>().is_err());
591         assert!("*image/png".parse::<Mime>().is_err());
592         assert!("text/*plain".parse::<Mime>().is_err());
593     }
594 
595     #[test]
test_case_sensitive_values()596     fn test_case_sensitive_values() {
597         assert_eq!(Mime::from_str("multipart/form-data; boundary=ABCDEFG").unwrap(),
598                    mime!(Multipart/FormData; Boundary=("ABCDEFG")));
599         assert_eq!(Mime::from_str("multipart/form-data; charset=BASE64; boundary=ABCDEFG").unwrap(),
600                    mime!(Multipart/FormData; Charset=("base64"), Boundary=("ABCDEFG")));
601     }
602 
603     #[test]
test_get_param()604     fn test_get_param() {
605         let mime = Mime::from_str("text/plain; charset=utf-8; foo=bar").unwrap();
606         assert_eq!(mime.get_param(Attr::Charset), Some(&Value::Utf8));
607         assert_eq!(mime.get_param("charset"), Some(&Value::Utf8));
608         assert_eq!(mime.get_param("foo").unwrap(), "bar");
609         assert_eq!(mime.get_param("baz"), None);
610     }
611 
612     #[test]
test_value_as_str()613     fn test_value_as_str() {
614         assert_eq!(Value::Utf8.as_str(), "utf-8");
615     }
616 
617     #[test]
test_value_eq_str()618     fn test_value_eq_str() {
619         assert_eq!(Value::Utf8, "utf-8");
620         assert_eq!("utf-8", Value::Utf8);
621     }
622 
623     #[cfg(feature = "serde")]
624     #[test]
test_serialize_deserialize()625     fn test_serialize_deserialize() {
626         use serde_json;
627 
628         let mime = Mime::from_str("text/plain; charset=utf-8; foo=bar").unwrap();
629         let serialized = serde_json::to_string(&mime).unwrap();
630         let deserialized: Mime = serde_json::from_str(&serialized).unwrap();
631         assert_eq!(mime, deserialized);
632     }
633 
634     #[cfg(feature = "nightly")]
635     #[bench]
bench_fmt(b: &mut Bencher)636     fn bench_fmt(b: &mut Bencher) {
637         use std::fmt::Write;
638         let mime = mime!(Text/Plain; Charset=Utf8);
639         b.bytes = mime.to_string().as_bytes().len() as u64;
640         let mut s = String::with_capacity(64);
641         b.iter(|| {
642             let _ = write!(s, "{}", mime);
643             ::test::black_box(&s);
644             unsafe { s.as_mut_vec().set_len(0); }
645         })
646     }
647 
648     #[cfg(feature = "nightly")]
649     #[bench]
bench_from_str(b: &mut Bencher)650     fn bench_from_str(b: &mut Bencher) {
651         let s = "text/plain; charset=utf-8; foo=bar";
652         b.bytes = s.as_bytes().len() as u64;
653         b.iter(|| s.parse::<Mime>())
654     }
655 }
656