1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10 //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11 //! as used by HTML forms.
12 //!
13 //! Converts between a string (such as an URL’s query string)
14 //! and a sequence of (name, value) pairs.
15 
16 use percent_encoding::{percent_decode, percent_encode_byte};
17 use query_encoding::{self, decode_utf8_lossy, EncodingOverride};
18 use std::borrow::{Borrow, Cow};
19 use std::str;
20 
21 /// Convert a byte string in the `application/x-www-form-urlencoded` syntax
22 /// into a iterator of (name, value) pairs.
23 ///
24 /// Use `parse(input.as_bytes())` to parse a `&str` string.
25 ///
26 /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
27 /// converted to `[("#first", "%try%")]`.
28 #[inline]
parse(input: &[u8]) -> Parse29 pub fn parse(input: &[u8]) -> Parse {
30     Parse { input }
31 }
32 /// The return type of `parse()`.
33 #[derive(Copy, Clone)]
34 pub struct Parse<'a> {
35     input: &'a [u8],
36 }
37 
38 impl<'a> Iterator for Parse<'a> {
39     type Item = (Cow<'a, str>, Cow<'a, str>);
40 
next(&mut self) -> Option<Self::Item>41     fn next(&mut self) -> Option<Self::Item> {
42         loop {
43             if self.input.is_empty() {
44                 return None;
45             }
46             let mut split2 = self.input.splitn(2, |&b| b == b'&');
47             let sequence = split2.next().unwrap();
48             self.input = split2.next().unwrap_or(&[][..]);
49             if sequence.is_empty() {
50                 continue;
51             }
52             let mut split2 = sequence.splitn(2, |&b| b == b'=');
53             let name = split2.next().unwrap();
54             let value = split2.next().unwrap_or(&[][..]);
55             return Some((decode(name), decode(value)));
56         }
57     }
58 }
59 
decode(input: &[u8]) -> Cow<str>60 fn decode(input: &[u8]) -> Cow<str> {
61     let replaced = replace_plus(input);
62     decode_utf8_lossy(match percent_decode(&replaced).into() {
63         Cow::Owned(vec) => Cow::Owned(vec),
64         Cow::Borrowed(_) => replaced,
65     })
66 }
67 
68 /// Replace b'+' with b' '
replace_plus(input: &[u8]) -> Cow<[u8]>69 fn replace_plus(input: &[u8]) -> Cow<[u8]> {
70     match input.iter().position(|&b| b == b'+') {
71         None => Cow::Borrowed(input),
72         Some(first_position) => {
73             let mut replaced = input.to_owned();
74             replaced[first_position] = b' ';
75             for byte in &mut replaced[first_position + 1..] {
76                 if *byte == b'+' {
77                     *byte = b' ';
78                 }
79             }
80             Cow::Owned(replaced)
81         }
82     }
83 }
84 
85 impl<'a> Parse<'a> {
86     /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
into_owned(self) -> ParseIntoOwned<'a>87     pub fn into_owned(self) -> ParseIntoOwned<'a> {
88         ParseIntoOwned { inner: self }
89     }
90 }
91 
92 /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
93 pub struct ParseIntoOwned<'a> {
94     inner: Parse<'a>,
95 }
96 
97 impl<'a> Iterator for ParseIntoOwned<'a> {
98     type Item = (String, String);
99 
next(&mut self) -> Option<Self::Item>100     fn next(&mut self) -> Option<Self::Item> {
101         self.inner
102             .next()
103             .map(|(k, v)| (k.into_owned(), v.into_owned()))
104     }
105 }
106 
107 /// The [`application/x-www-form-urlencoded` byte serializer](
108 /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
109 ///
110 /// Return an iterator of `&str` slices.
byte_serialize(input: &[u8]) -> ByteSerialize111 pub fn byte_serialize(input: &[u8]) -> ByteSerialize {
112     ByteSerialize { bytes: input }
113 }
114 
115 /// Return value of `byte_serialize()`.
116 #[derive(Debug)]
117 pub struct ByteSerialize<'a> {
118     bytes: &'a [u8],
119 }
120 
byte_serialized_unchanged(byte: u8) -> bool121 fn byte_serialized_unchanged(byte: u8) -> bool {
122     matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
123 }
124 
125 impl<'a> Iterator for ByteSerialize<'a> {
126     type Item = &'a str;
127 
next(&mut self) -> Option<&'a str>128     fn next(&mut self) -> Option<&'a str> {
129         if let Some((&first, tail)) = self.bytes.split_first() {
130             if !byte_serialized_unchanged(first) {
131                 self.bytes = tail;
132                 return Some(if first == b' ' {
133                     "+"
134                 } else {
135                     percent_encode_byte(first)
136                 });
137             }
138             let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
139             let (unchanged_slice, remaining) = match position {
140                 // 1 for first_byte + i unchanged in tail
141                 Some(i) => self.bytes.split_at(1 + i),
142                 None => (self.bytes, &[][..]),
143             };
144             self.bytes = remaining;
145             Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
146         } else {
147             None
148         }
149     }
150 
size_hint(&self) -> (usize, Option<usize>)151     fn size_hint(&self) -> (usize, Option<usize>) {
152         if self.bytes.is_empty() {
153             (0, Some(0))
154         } else {
155             (1, Some(self.bytes.len()))
156         }
157     }
158 }
159 
160 /// The [`application/x-www-form-urlencoded` serializer](
161 /// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
162 pub struct Serializer<'a, T: Target> {
163     target: Option<T>,
164     start_position: usize,
165     encoding: EncodingOverride<'a>,
166 }
167 
168 pub trait Target {
as_mut_string(&mut self) -> &mut String169     fn as_mut_string(&mut self) -> &mut String;
finish(self) -> Self::Finished170     fn finish(self) -> Self::Finished;
171     type Finished;
172 }
173 
174 impl Target for String {
as_mut_string(&mut self) -> &mut String175     fn as_mut_string(&mut self) -> &mut String {
176         self
177     }
finish(self) -> Self178     fn finish(self) -> Self {
179         self
180     }
181     type Finished = Self;
182 }
183 
184 impl<'a> Target for &'a mut String {
as_mut_string(&mut self) -> &mut String185     fn as_mut_string(&mut self) -> &mut String {
186         &mut **self
187     }
finish(self) -> Self188     fn finish(self) -> Self {
189         self
190     }
191     type Finished = Self;
192 }
193 
194 // `as_mut_string` string here exposes the internal serialization of an `Url`,
195 // which should not be exposed to users.
196 // We achieve that by not giving users direct access to `UrlQuery`:
197 // * Its fields are private
198 //   (and so can not be constructed with struct literal syntax outside of this crate),
199 // * It has no constructor
200 // * It is only visible (on the type level) to users in the return type of
201 //   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
202 // * `Serializer` keeps its target in a private field
203 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
204 impl<'a> Target for ::UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String205     fn as_mut_string(&mut self) -> &mut String {
206         &mut self.url.as_mut().unwrap().serialization
207     }
208 
finish(mut self) -> &'a mut ::Url209     fn finish(mut self) -> &'a mut ::Url {
210         let url = self.url.take().unwrap();
211         url.restore_already_parsed_fragment(self.fragment.take());
212         url
213     }
214 
215     type Finished = &'a mut ::Url;
216 }
217 
218 impl<'a, T: Target> Serializer<'a, T> {
219     /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
220     ///
221     /// If the target is non-empty,
222     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
new(target: T) -> Self223     pub fn new(target: T) -> Self {
224         Self::for_suffix(target, 0)
225     }
226 
227     /// Create a new `application/x-www-form-urlencoded` serializer
228     /// for a suffix of the given target.
229     ///
230     /// If that suffix is non-empty,
231     /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
for_suffix(mut target: T, start_position: usize) -> Self232     pub fn for_suffix(mut target: T, start_position: usize) -> Self {
233         &target.as_mut_string()[start_position..]; // Panic if out of bounds
234         Serializer {
235             target: Some(target),
236             start_position,
237             encoding: None,
238         }
239     }
240 
241     /// Remove any existing name/value pair.
242     ///
243     /// Panics if called after `.finish()`.
clear(&mut self) -> &mut Self244     pub fn clear(&mut self) -> &mut Self {
245         string(&mut self.target).truncate(self.start_position);
246         self
247     }
248 
249     /// Set the character encoding to be used for names and values before percent-encoding.
encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self250     pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
251         self.encoding = new;
252         self
253     }
254 
255     /// Serialize and append a name/value pair.
256     ///
257     /// Panics if called after `.finish()`.
append_pair(&mut self, name: &str, value: &str) -> &mut Self258     pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
259         append_pair(
260             string(&mut self.target),
261             self.start_position,
262             self.encoding,
263             name,
264             value,
265         );
266         self
267     }
268 
269     /// Serialize and append a number of name/value pairs.
270     ///
271     /// This simply calls `append_pair` repeatedly.
272     /// This can be more convenient, so the user doesn’t need to introduce a block
273     /// to limit the scope of `Serializer`’s borrow of its string.
274     ///
275     /// Panics if called after `.finish()`.
extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,276     pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
277     where
278         I: IntoIterator,
279         I::Item: Borrow<(K, V)>,
280         K: AsRef<str>,
281         V: AsRef<str>,
282     {
283         {
284             let string = string(&mut self.target);
285             for pair in iter {
286                 let &(ref k, ref v) = pair.borrow();
287                 append_pair(
288                     string,
289                     self.start_position,
290                     self.encoding,
291                     k.as_ref(),
292                     v.as_ref(),
293                 );
294             }
295         }
296         self
297     }
298 
299     /// If this serializer was constructed with a string, take and return that string.
300     ///
301     /// ```rust
302     /// use url::form_urlencoded;
303     /// let encoded: String = form_urlencoded::Serializer::new(String::new())
304     ///     .append_pair("foo", "bar & baz")
305     ///     .append_pair("saison", "Été+hiver")
306     ///     .finish();
307     /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
308     /// ```
309     ///
310     /// Panics if called more than once.
finish(&mut self) -> T::Finished311     pub fn finish(&mut self) -> T::Finished {
312         self.target
313             .take()
314             .expect("url::form_urlencoded::Serializer double finish")
315             .finish()
316     }
317 }
318 
append_separator_if_needed(string: &mut String, start_position: usize)319 fn append_separator_if_needed(string: &mut String, start_position: usize) {
320     if string.len() > start_position {
321         string.push('&')
322     }
323 }
324 
string<T: Target>(target: &mut Option<T>) -> &mut String325 fn string<T: Target>(target: &mut Option<T>) -> &mut String {
326     target
327         .as_mut()
328         .expect("url::form_urlencoded::Serializer finished")
329         .as_mut_string()
330 }
331 
append_pair( string: &mut String, start_position: usize, encoding: EncodingOverride, name: &str, value: &str, )332 fn append_pair(
333     string: &mut String,
334     start_position: usize,
335     encoding: EncodingOverride,
336     name: &str,
337     value: &str,
338 ) {
339     append_separator_if_needed(string, start_position);
340     append_encoded(name, string, encoding);
341     string.push('=');
342     append_encoded(value, string, encoding);
343 }
344 
append_encoded(s: &str, string: &mut String, encoding: EncodingOverride)345 fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride) {
346     string.extend(byte_serialize(&query_encoding::encode(encoding, s.into())))
347 }
348