1 use std::str;
2 use std::fmt;
3 use std::borrow::Cow;
4 use std::io;
5 
6 /// Wrapper type that implements `Display`. Encodes on the fly, without allocating.
7 /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
8 ///
9 /// ```rust
10 /// use urlencoding::Encoded;
11 /// format!("{}", Encoded("hello!"));
12 /// ```
13 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
14 #[repr(transparent)]
15 pub struct Encoded<Str>(pub Str);
16 
17 impl<Str: AsRef<[u8]>> Encoded<Str> {
18     /// Long way of writing `Encoded(data)`
19     ///
20     /// Takes any string-like type or a slice of bytes, either owned or borrowed.
21     #[inline(always)]
new(string: Str) -> Self22     pub fn new(string: Str) -> Self {
23         Self(string)
24     }
25 
26     #[inline(always)]
to_str(&self) -> Cow<str>27     pub fn to_str(&self) -> Cow<str> {
28         encode_binary(self.0.as_ref())
29     }
30 
31     /// Perform urlencoding to a string
32     #[inline]
33     #[allow(clippy::inherent_to_string_shadow_display)]
to_string(&self) -> String34     pub fn to_string(&self) -> String {
35         self.to_str().into_owned()
36     }
37 
38     /// Perform urlencoding into a writer
39     #[inline]
write<W: io::Write>(&self, writer: &mut W) -> io::Result<()>40     pub fn write<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
41         encode_into(self.0.as_ref(), false, |s| writer.write_all(s.as_bytes()))?;
42         Ok(())
43     }
44 
45     /// Perform urlencoding into a string
46     #[inline]
append_to(&self, string: &mut String)47     pub fn append_to(&self, string: &mut String) {
48         append_string(&self.0.as_ref(), string, false);
49     }
50 }
51 
52 impl<String: AsRef<[u8]>> fmt::Display for Encoded<String> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result53     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
54         encode_into(self.0.as_ref(), false, |s| f.write_str(s))?;
55         Ok(())
56     }
57 }
58 
59 /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`. Assumes UTF-8 encoding.
60 #[inline]
encode(data: &str) -> String61 pub fn encode(data: &str) -> String {
62     encode_binary(data.as_bytes()).into_owned()
63 }
64 
65 /// Percent-encodes every byte except alphanumerics and `-`, `_`, `.`, `~`.
66 #[inline]
encode_binary(data: &[u8]) -> Cow<str>67 pub fn encode_binary(data: &[u8]) -> Cow<str> {
68     // add maybe extra capacity, but try not to exceed allocator's bucket size
69     let mut escaped = String::with_capacity(data.len() | 15);
70     let unmodified = append_string(data, &mut escaped, true);
71     if unmodified {
72         return Cow::Borrowed(unsafe {
73             // encode_into has checked it's ASCII
74             str::from_utf8_unchecked(data)
75         });
76     }
77     Cow::Owned(escaped)
78 }
79 
append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool80 fn append_string(data: &[u8], escaped: &mut String, may_skip: bool) -> bool {
81     encode_into(data, may_skip, |s| Ok::<_, std::convert::Infallible>(escaped.push_str(s))).unwrap()
82 }
83 
encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E>84 fn encode_into<E>(mut data: &[u8], may_skip_write: bool, mut push_str: impl FnMut(&str) -> Result<(), E>) -> Result<bool, E> {
85     let mut pushed = false;
86     loop {
87         // Fast path to skip over safe chars at the beginning of the remaining string
88         let ascii_len = data.iter()
89             .take_while(|&&c| matches!(c, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' |  b'-' | b'.' | b'_' | b'~')).count();
90 
91         let (safe, rest) = if ascii_len >= data.len() {
92             if !pushed && may_skip_write {
93                 return Ok(true);
94             }
95             (data, &[][..]) // redundatnt to optimize out a panic in split_at
96         } else {
97             data.split_at(ascii_len)
98         };
99         pushed = true;
100         if !safe.is_empty() {
101             push_str(unsafe { str::from_utf8_unchecked(safe) })?;
102         }
103         if rest.is_empty() {
104             break;
105         }
106 
107         match rest.split_first() {
108             Some((byte, rest)) => {
109                 let enc = &[b'%', to_hex_digit(byte >> 4), to_hex_digit(byte & 15)];
110                 push_str(unsafe { str::from_utf8_unchecked(enc) })?;
111                 data = rest;
112             }
113             None => break,
114         };
115     }
116     Ok(false)
117 }
118 
119 #[inline]
to_hex_digit(digit: u8) -> u8120 fn to_hex_digit(digit: u8) -> u8 {
121     match digit {
122         0..=9 => b'0' + digit,
123         10..=255 => b'A' - 10 + digit,
124     }
125 }
126 
127 #[test]
lazy_writer()128 fn lazy_writer() {
129     let mut s = "he".to_string();
130     Encoded("llo").append_to(&mut s);
131     assert_eq!("hello", s);
132 
133     assert_eq!("hello", Encoded("hello").to_string());
134     assert_eq!("hello", format!("{}", Encoded("hello")));
135     assert_eq!("hello", Encoded("hello").to_str());
136     assert!(matches!(Encoded("hello").to_str(), Cow::Borrowed(_)));
137 }
138