1 #[allow(unused, deprecated)]
2 use std::ascii::AsciiExt;
3 use std::error::Error;
4 use std::fmt;
5 use std::iter::Enumerate;
6 use std::str::Bytes;
7 
8 use super::{Mime, Source, ParamSource, Indexed, CHARSET, UTF_8};
9 
10 #[derive(Debug)]
11 pub enum ParseError {
12     MissingSlash,
13     MissingEqual,
14     MissingQuote,
15     InvalidToken {
16         pos: usize,
17         byte: u8,
18     },
19 }
20 
21 impl ParseError {
s(&self) -> &str22     fn s(&self) -> &str {
23         use self::ParseError::*;
24 
25         match *self {
26             MissingSlash => "a slash (/) was missing between the type and subtype",
27             MissingEqual => "an equals sign (=) was missing between a parameter and its value",
28             MissingQuote => "a quote (\") was missing from a parameter value",
29             InvalidToken { .. } => "an invalid token was encountered",
30         }
31     }
32 }
33 
34 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result35     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36         if let ParseError::InvalidToken { pos, byte } = *self {
37             write!(f, "{}, {:X} at position {}", self.s(), byte, pos)
38         } else {
39             f.write_str(self.s())
40         }
41     }
42 }
43 
44 impl Error for ParseError {
45     // Minimum Rust is 1.15, Error::description was still required then
46     #[allow(deprecated)]
description(&self) -> &str47     fn description(&self) -> &str {
48         self.s()
49     }
50 }
51 
parse(s: &str) -> Result<Mime, ParseError>52 pub fn parse(s: &str) -> Result<Mime, ParseError> {
53     if s == "*/*" {
54         return Ok(::STAR_STAR);
55     }
56 
57     let mut iter = s.bytes().enumerate();
58     // toplevel
59     let mut start;
60     let slash;
61     loop {
62         match iter.next() {
63             Some((_, c)) if is_token(c) => (),
64             Some((i, b'/')) if i > 0 => {
65                 slash = i;
66                 start = i + 1;
67                 break;
68             },
69             None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime
70             Some((pos, byte)) => return Err(ParseError::InvalidToken {
71                 pos: pos,
72                 byte: byte,
73             })
74         };
75 
76     }
77 
78     // sublevel
79     let mut plus = None;
80     loop {
81         match iter.next() {
82             Some((i, b'+')) if i > start => {
83                 plus = Some(i);
84             },
85             Some((i, b';')) if i > start => {
86                 start = i;
87                 break;
88             },
89             Some((_, c)) if is_token(c) => (),
90             None => {
91                 return Ok(Mime {
92                     source: Source::Dynamic(s.to_ascii_lowercase()),
93                     slash: slash,
94                     plus: plus,
95                     params: ParamSource::None,
96                 });
97             },
98             Some((pos, byte)) => return Err(ParseError::InvalidToken {
99                 pos: pos,
100                 byte: byte,
101             })
102         };
103     }
104 
105     // params
106     let params = params_from_str(s, &mut iter, start)?;
107 
108     let src = match params {
109         ParamSource::Utf8(_)  => s.to_ascii_lowercase(),
110         ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices),
111         ParamSource::None => {
112             // Chop off the empty list
113             s[..start].to_ascii_lowercase()
114         }
115     };
116 
117     Ok(Mime {
118         source: Source::Dynamic(src),
119         slash: slash,
120         plus: plus,
121         params: params,
122     })
123 }
124 
125 
params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError>126 fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> {
127     let semicolon = start;
128     start += 1;
129     let mut params = ParamSource::None;
130     'params: while start < s.len() {
131         let name;
132         // name
133         'name: loop {
134             match iter.next() {
135                 Some((i, b' ')) if i == start => {
136                     start = i + 1;
137                     continue 'params;
138                 },
139                 Some((_, c)) if is_token(c) => (),
140                 Some((i, b'=')) if i > start => {
141                     name = Indexed(start, i);
142                     start = i + 1;
143                     break 'name;
144                 },
145                 None => return Err(ParseError::MissingEqual),
146                 Some((pos, byte)) => return Err(ParseError::InvalidToken {
147                     pos: pos,
148                     byte: byte,
149                 }),
150             }
151         }
152 
153         let value;
154         // values must be restrict-name-char or "anything goes"
155         let mut is_quoted = false;
156 
157         'value: loop {
158             if is_quoted {
159                 match iter.next() {
160                     Some((i, b'"')) if i > start => {
161                         value = Indexed(start, i);
162                         break 'value;
163                     },
164                     Some((_, c)) if is_restricted_quoted_char(c) => (),
165                     None => return Err(ParseError::MissingQuote),
166                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
167                         pos: pos,
168                         byte: byte,
169                     }),
170                 }
171             } else {
172                 match iter.next() {
173                     Some((i, b'"')) if i == start => {
174                         is_quoted = true;
175                         start = i + 1;
176                     },
177                     Some((_, c)) if is_token(c) => (),
178                     Some((i, b';')) if i > start => {
179                         value = Indexed(start, i);
180                         start = i + 1;
181                         break 'value;
182                     }
183                     None => {
184                         value = Indexed(start, s.len());
185                         start = s.len();
186                         break 'value;
187                     },
188 
189                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
190                         pos: pos,
191                         byte: byte,
192                     }),
193                 }
194             }
195         }
196 
197         if is_quoted {
198             'ws: loop {
199                 match iter.next() {
200                     Some((i, b';')) => {
201                         // next param
202                         start = i + 1;
203                         break 'ws;
204                     },
205                     Some((_, b' ')) => {
206                         // skip whitespace
207                     },
208                     None => {
209                         // eof
210                         start = s.len();
211                         break 'ws;
212                     },
213                     Some((pos, byte)) => return Err(ParseError::InvalidToken {
214                         pos: pos,
215                         byte: byte,
216                     }),
217                 }
218             }
219         }
220 
221         match params {
222             ParamSource::Utf8(i) => {
223                 let i = i + 2;
224                 let charset = Indexed(i, "charset".len() + i);
225                 let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1);
226                 params = ParamSource::Custom(semicolon, vec![
227                     (charset, utf8),
228                     (name, value),
229                 ]);
230             },
231             ParamSource::Custom(_, ref mut vec) => {
232                 vec.push((name, value));
233             },
234             ParamSource::None => {
235                 if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] {
236                     if UTF_8 == &s[value.0..value.1] {
237                         params = ParamSource::Utf8(semicolon);
238                         continue 'params;
239                     }
240                 }
241                 params = ParamSource::Custom(semicolon, vec![(name, value)]);
242             },
243         }
244     }
245     Ok(params)
246 }
247 
lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String248 fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String {
249     let mut owned = s.to_owned();
250     owned[..semi].make_ascii_lowercase();
251 
252     for &(ref name, ref value) in params {
253         owned[name.0..name.1].make_ascii_lowercase();
254         // Since we just converted this part of the string to lowercase,
255         // we can skip the `Name == &str` unicase check and do a faster
256         // memcmp instead.
257         if &owned[name.0..name.1] == CHARSET.source {
258             owned[value.0..value.1].make_ascii_lowercase();
259         }
260     }
261 
262     owned
263 }
264 
265 // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2):
266 //
267 // > All registered media types MUST be assigned top-level type and
268 // > subtype names.  The combination of these names serves to uniquely
269 // > identify the media type, and the subtype name facet (or the absence
270 // > of one) identifies the registration tree.  Both top-level type and
271 // > subtype names are case-insensitive.
272 // >
273 // > Type and subtype names MUST conform to the following ABNF:
274 // >
275 // >     type-name = restricted-name
276 // >     subtype-name = restricted-name
277 // >
278 // >     restricted-name = restricted-name-first *126restricted-name-chars
279 // >     restricted-name-first  = ALPHA / DIGIT
280 // >     restricted-name-chars  = ALPHA / DIGIT / "!" / "#" /
281 // >                              "$" / "&" / "-" / "^" / "_"
282 // >     restricted-name-chars =/ "." ; Characters before first dot always
283 // >                                  ; specify a facet name
284 // >     restricted-name-chars =/ "+" ; Characters after last plus always
285 // >                                  ; specify a structured syntax suffix
286 
287 // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1):
288 //
289 // >     media-type = type "/" subtype *( OWS ";" OWS parameter )
290 // >     type       = token
291 // >     subtype    = token
292 // >     parameter  = token "=" ( token / quoted-string )
293 //
294 // Where token is defined as:
295 //
296 // >     token = 1*tchar
297 // >     tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
298 // >        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
299 //
300 // So, clearly, ¯\_(Ä_/¯
301 
302 macro_rules! byte_map {
303     ($($flag:expr,)*) => ([
304         $($flag != 0,)*
305     ])
306 }
307 
308 static TOKEN_MAP: [bool; 256] = byte_map![
309     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311     0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
312     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
313     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
314     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
315     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
316     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
317     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325 ];
326 
is_token(c: u8) -> bool327 fn is_token(c: u8) -> bool {
328     TOKEN_MAP[c as usize]
329 }
330 
is_restricted_quoted_char(c: u8) -> bool331 fn is_restricted_quoted_char(c: u8) -> bool {
332     c > 31 && c != 127
333 }
334 
335 #[test]
336 #[allow(warnings)] // ... ranges deprecated
test_lookup_tables()337 fn test_lookup_tables() {
338     for (i, &valid) in TOKEN_MAP.iter().enumerate() {
339         let i = i as u8;
340         let should = match i {
341             b'a'...b'z' |
342             b'A'...b'Z' |
343             b'0'...b'9' |
344             b'!' |
345             b'#' |
346             b'$' |
347             b'%' |
348             b'&' |
349             b'\'' |
350             b'*' |
351             b'+' |
352             b'-' |
353             b'.' |
354             b'^' |
355             b'_' |
356             b'`' |
357             b'|' |
358             b'~' => true,
359             _ => false
360         };
361         assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should);
362     }
363 }
364