1 use std::{
2     borrow::Borrow,
3     cmp::{self, Ordering},
4     fmt, hash, iter,
5     ops::Deref,
6     sync::Arc,
7 };
8 
9 /// A `SmolStr` is a string type that has the following properties:
10 ///
11 /// * `size_of::<SmolStr>() == size_of::<String>()`
12 /// * `Clone` is `O(1)`
13 /// * Strings are stack-allocated if they are:
14 ///     * Up to 22 bytes long
15 ///     * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist
16 ///     solely of consecutive newlines, followed by consecutive spaces
17 /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
18 ///
19 /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
20 /// `SmolStr` is a good enough default storage for tokens of typical programming
21 /// languages. Strings consisting of a series of newlines, followed by a series of
22 /// whitespace are a typical pattern in computer programs because of indentation.
23 /// Note that a specialized interner might be a better solution for some use cases.
24 #[derive(Clone)]
25 pub struct SmolStr(Repr);
26 
27 impl SmolStr {
28     #[deprecated = "Use `new_inline` instead"]
new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr29     pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr {
30         let _len_is_short = [(); INLINE_CAP + 1][len];
31 
32         const ZEROS: &[u8] = &[0; INLINE_CAP];
33 
34         let mut buf = [0; INLINE_CAP];
35         macro_rules! s {
36             ($($idx:literal),*) => ( $(s!(set $idx);)* );
37             (set $idx:literal) => ({
38                 let src: &[u8] = [ZEROS, bytes][($idx < len) as usize];
39                 let byte = src[$idx];
40                 let _is_ascii = [(); 128][byte as usize];
41                 buf[$idx] = byte
42             });
43         }
44         s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
45         SmolStr(Repr::Inline {
46             len: len as u8,
47             buf,
48         })
49     }
50 
51     /// Constructs inline variant of `SmolStr`.
52     ///
53     /// Panics if `text.len() > 22`.
54     #[inline]
new_inline(text: &str) -> SmolStr55     pub const fn new_inline(text: &str) -> SmolStr {
56         let mut buf = [0; INLINE_CAP];
57         let mut i = 0;
58         while i < text.len() {
59             buf[i] = text.as_bytes()[i];
60             i += 1
61         }
62         SmolStr(Repr::Inline {
63             len: text.len() as u8,
64             buf,
65         })
66     }
67 
new<T>(text: T) -> SmolStr where T: AsRef<str>,68     pub fn new<T>(text: T) -> SmolStr
69     where
70         T: AsRef<str>,
71     {
72         SmolStr(Repr::new(text))
73     }
74 
75     #[inline(always)]
as_str(&self) -> &str76     pub fn as_str(&self) -> &str {
77         self.0.as_str()
78     }
79 
80     #[inline(always)]
to_string(&self) -> String81     pub fn to_string(&self) -> String {
82         self.as_str().to_string()
83     }
84 
85     #[inline(always)]
len(&self) -> usize86     pub fn len(&self) -> usize {
87         self.0.len()
88     }
89 
90     #[inline(always)]
is_empty(&self) -> bool91     pub fn is_empty(&self) -> bool {
92         self.0.is_empty()
93     }
94 
95     #[inline(always)]
is_heap_allocated(&self) -> bool96     pub fn is_heap_allocated(&self) -> bool {
97         match self.0 {
98             Repr::Heap(..) => true,
99             _ => false,
100         }
101     }
102 
from_char_iter<I: iter::Iterator<Item = char>>(mut iter: I) -> SmolStr103     fn from_char_iter<I: iter::Iterator<Item = char>>(mut iter: I) -> SmolStr {
104         let (min_size, _) = iter.size_hint();
105         if min_size > INLINE_CAP {
106             let heap: String = iter.collect();
107             return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
108         }
109         let mut len = 0;
110         let mut buf = [0u8; INLINE_CAP];
111         while let Some(ch) = iter.next() {
112             let size = ch.len_utf8();
113             if size + len > INLINE_CAP {
114                 let (min_remaining, _) = iter.size_hint();
115                 let mut heap = String::with_capacity(size + len + min_remaining);
116                 heap.push_str(std::str::from_utf8(&buf[..len]).unwrap());
117                 heap.push(ch);
118                 heap.extend(iter);
119                 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
120             }
121             ch.encode_utf8(&mut buf[len..]);
122             len += size;
123         }
124         SmolStr(Repr::Inline {
125             len: len as u8,
126             buf,
127         })
128     }
129 }
130 
131 impl Default for SmolStr {
default() -> SmolStr132     fn default() -> SmolStr {
133         SmolStr::new("")
134     }
135 }
136 
137 impl Deref for SmolStr {
138     type Target = str;
139 
deref(&self) -> &str140     fn deref(&self) -> &str {
141         self.as_str()
142     }
143 }
144 
145 impl PartialEq<SmolStr> for SmolStr {
eq(&self, other: &SmolStr) -> bool146     fn eq(&self, other: &SmolStr) -> bool {
147         self.as_str() == other.as_str()
148     }
149 }
150 
151 impl Eq for SmolStr {}
152 
153 impl PartialEq<str> for SmolStr {
eq(&self, other: &str) -> bool154     fn eq(&self, other: &str) -> bool {
155         self.as_str() == other
156     }
157 }
158 
159 impl PartialEq<SmolStr> for str {
eq(&self, other: &SmolStr) -> bool160     fn eq(&self, other: &SmolStr) -> bool {
161         other == self
162     }
163 }
164 
165 impl<'a> PartialEq<&'a str> for SmolStr {
eq(&self, other: &&'a str) -> bool166     fn eq(&self, other: &&'a str) -> bool {
167         self == *other
168     }
169 }
170 
171 impl<'a> PartialEq<SmolStr> for &'a str {
eq(&self, other: &SmolStr) -> bool172     fn eq(&self, other: &SmolStr) -> bool {
173         *self == other
174     }
175 }
176 
177 impl PartialEq<String> for SmolStr {
eq(&self, other: &String) -> bool178     fn eq(&self, other: &String) -> bool {
179         self.as_str() == other
180     }
181 }
182 
183 impl PartialEq<SmolStr> for String {
eq(&self, other: &SmolStr) -> bool184     fn eq(&self, other: &SmolStr) -> bool {
185         other == self
186     }
187 }
188 
189 impl<'a> PartialEq<&'a String> for SmolStr {
eq(&self, other: &&'a String) -> bool190     fn eq(&self, other: &&'a String) -> bool {
191         self == *other
192     }
193 }
194 
195 impl<'a> PartialEq<SmolStr> for &'a String {
eq(&self, other: &SmolStr) -> bool196     fn eq(&self, other: &SmolStr) -> bool {
197         *self == other
198     }
199 }
200 
201 impl Ord for SmolStr {
cmp(&self, other: &SmolStr) -> Ordering202     fn cmp(&self, other: &SmolStr) -> Ordering {
203         self.as_str().cmp(other.as_str())
204     }
205 }
206 
207 impl PartialOrd for SmolStr {
partial_cmp(&self, other: &SmolStr) -> Option<Ordering>208     fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
209         Some(self.cmp(other))
210     }
211 }
212 
213 impl hash::Hash for SmolStr {
hash<H: hash::Hasher>(&self, hasher: &mut H)214     fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
215         self.as_str().hash(hasher)
216     }
217 }
218 
219 impl fmt::Debug for SmolStr {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result220     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221         fmt::Debug::fmt(self.as_str(), f)
222     }
223 }
224 
225 impl fmt::Display for SmolStr {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result226     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
227         fmt::Display::fmt(self.as_str(), f)
228     }
229 }
230 
231 impl iter::FromIterator<char> for SmolStr {
from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr232     fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
233         let iter = iter.into_iter();
234         Self::from_char_iter(iter)
235     }
236 }
237 
build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr where T: AsRef<str>, String: iter::Extend<T>,238 fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
239 where
240     T: AsRef<str>,
241     String: iter::Extend<T>,
242 {
243     let mut len = 0;
244     let mut buf = [0u8; INLINE_CAP];
245     while let Some(slice) = iter.next() {
246         let slice = slice.as_ref();
247         let size = slice.len();
248         if size + len > INLINE_CAP {
249             let mut heap = String::with_capacity(size + len);
250             heap.push_str(std::str::from_utf8(&buf[..len]).unwrap());
251             heap.push_str(&slice);
252             heap.extend(iter);
253             return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
254         }
255         (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes());
256         len += size;
257     }
258     SmolStr(Repr::Inline {
259         len: len as u8,
260         buf,
261     })
262 }
263 
264 impl iter::FromIterator<String> for SmolStr {
from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr265     fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
266         build_from_str_iter(iter.into_iter())
267     }
268 }
269 
270 impl<'a> iter::FromIterator<&'a String> for SmolStr {
from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr271     fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
272         SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
273     }
274 }
275 
276 impl<'a> iter::FromIterator<&'a str> for SmolStr {
from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr277     fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
278         build_from_str_iter(iter.into_iter())
279     }
280 }
281 
282 impl<T> From<T> for SmolStr
283 where
284     T: Into<String> + AsRef<str>,
285 {
from(text: T) -> Self286     fn from(text: T) -> Self {
287         Self::new(text)
288     }
289 }
290 
291 impl From<SmolStr> for String {
from(text: SmolStr) -> Self292     fn from(text: SmolStr) -> Self {
293         text.as_str().into()
294     }
295 }
296 
297 impl Borrow<str> for SmolStr {
borrow(&self) -> &str298     fn borrow(&self) -> &str {
299         self.as_str()
300     }
301 }
302 
303 #[cfg(feature = "arbitrary")]
304 impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error>305     fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
306         let s = <&str>::arbitrary(u)?;
307         Ok(SmolStr::new(s))
308     }
309 }
310 
311 const INLINE_CAP: usize = 22;
312 const N_NEWLINES: usize = 32;
313 const N_SPACES: usize = 128;
314 const WS: &str =
315     "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                                                                                                                                ";
316 
317 #[derive(Clone, Debug)]
318 enum Repr {
319     Heap(Arc<str>),
320     Inline { len: u8, buf: [u8; INLINE_CAP] },
321     Substring { newlines: usize, spaces: usize },
322 }
323 
324 impl Repr {
new<T>(text: T) -> Self where T: AsRef<str>,325     fn new<T>(text: T) -> Self
326     where
327         T: AsRef<str>,
328     {
329         {
330             let text = text.as_ref();
331 
332             let len = text.len();
333             if len <= INLINE_CAP {
334                 let mut buf = [0; INLINE_CAP];
335                 buf[..len].copy_from_slice(text.as_bytes());
336                 return Repr::Inline {
337                     len: len as u8,
338                     buf,
339                 };
340             }
341 
342             if len <= N_NEWLINES + N_SPACES {
343                 let bytes = text.as_bytes();
344                 let possible_newline_count = cmp::min(len, N_NEWLINES);
345                 let newlines = bytes[..possible_newline_count]
346                     .iter()
347                     .take_while(|&&b| b == b'\n')
348                     .count();
349                 let possible_space_count = len - newlines;
350                 if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ')
351                 {
352                     let spaces = possible_space_count;
353                     return Repr::Substring { newlines, spaces };
354                 }
355             }
356         }
357 
358         Repr::Heap(text.as_ref().into())
359     }
360 
361     #[inline(always)]
len(&self) -> usize362     fn len(&self) -> usize {
363         match self {
364             Repr::Heap(data) => data.len(),
365             Repr::Inline { len, .. } => *len as usize,
366             Repr::Substring { newlines, spaces } => *newlines + *spaces,
367         }
368     }
369 
370     #[inline(always)]
is_empty(&self) -> bool371     fn is_empty(&self) -> bool {
372         match self {
373             Repr::Heap(data) => data.is_empty(),
374             Repr::Inline { len, .. } => *len == 0,
375             // A substring isn't created for an empty string.
376             Repr::Substring { .. } => false,
377         }
378     }
379 
380     #[inline]
as_str(&self) -> &str381     fn as_str(&self) -> &str {
382         match self {
383             Repr::Heap(data) => &*data,
384             Repr::Inline { len, buf } => {
385                 let len = *len as usize;
386                 let buf = &buf[..len];
387                 unsafe { ::std::str::from_utf8_unchecked(buf) }
388             }
389             Repr::Substring { newlines, spaces } => {
390                 let newlines = *newlines;
391                 let spaces = *spaces;
392                 assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
393                 &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
394             }
395         }
396     }
397 }
398 
399 #[cfg(feature = "serde")]
400 mod serde {
401     use super::SmolStr;
402     use ::serde::de::{Deserializer, Error, Unexpected, Visitor};
403     use std::fmt;
404 
405     // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125
smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error> where D: Deserializer<'de>,406     fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>
407     where
408         D: Deserializer<'de>,
409     {
410         struct SmolStrVisitor;
411 
412         impl<'a> Visitor<'a> for SmolStrVisitor {
413             type Value = SmolStr;
414 
415             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
416                 formatter.write_str("a string")
417             }
418 
419             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
420             where
421                 E: Error,
422             {
423                 Ok(SmolStr::from(v))
424             }
425 
426             fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>
427             where
428                 E: Error,
429             {
430                 Ok(SmolStr::from(v))
431             }
432 
433             fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
434             where
435                 E: Error,
436             {
437                 Ok(SmolStr::from(v))
438             }
439 
440             fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
441             where
442                 E: Error,
443             {
444                 match std::str::from_utf8(v) {
445                     Ok(s) => Ok(SmolStr::from(s)),
446                     Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
447                 }
448             }
449 
450             fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>
451             where
452                 E: Error,
453             {
454                 match std::str::from_utf8(v) {
455                     Ok(s) => Ok(SmolStr::from(s)),
456                     Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
457                 }
458             }
459 
460             fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
461             where
462                 E: Error,
463             {
464                 match String::from_utf8(v) {
465                     Ok(s) => Ok(SmolStr::from(s)),
466                     Err(e) => Err(Error::invalid_value(
467                         Unexpected::Bytes(&e.into_bytes()),
468                         &self,
469                     )),
470                 }
471             }
472         }
473 
474         deserializer.deserialize_str(SmolStrVisitor)
475     }
476 
477     impl serde::Serialize for SmolStr {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,478         fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
479         where
480             S: serde::Serializer,
481         {
482             self.as_str().serialize(serializer)
483         }
484     }
485 
486     impl<'de> serde::Deserialize<'de> for SmolStr {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,487         fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
488         where
489             D: serde::Deserializer<'de>,
490         {
491             smol_str(deserializer)
492         }
493     }
494 }
495