1 use std::{
2 borrow::Borrow,
3 cmp::{self, Ordering},
4 fmt, hash, iter,
5 ops::Deref,
6 sync::Arc,
7 };
8
9 /// A `SmolStr` is a string type that has the following properties:
10 ///
11 /// * `size_of::<SmolStr>() == size_of::<String>()`
12 /// * `Clone` is `O(1)`
13 /// * Strings are stack-allocated if they are:
14 /// * Up to 22 bytes long
15 /// * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist
16 /// solely of consecutive newlines, followed by consecutive spaces
17 /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
18 ///
19 /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
20 /// `SmolStr` is a good enough default storage for tokens of typical programming
21 /// languages. Strings consisting of a series of newlines, followed by a series of
22 /// whitespace are a typical pattern in computer programs because of indentation.
23 /// Note that a specialized interner might be a better solution for some use cases.
24 #[derive(Clone)]
25 pub struct SmolStr(Repr);
26
27 impl SmolStr {
28 #[deprecated = "Use `new_inline` instead"]
new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr29 pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr {
30 let _len_is_short = [(); INLINE_CAP + 1][len];
31
32 const ZEROS: &[u8] = &[0; INLINE_CAP];
33
34 let mut buf = [0; INLINE_CAP];
35 macro_rules! s {
36 ($($idx:literal),*) => ( $(s!(set $idx);)* );
37 (set $idx:literal) => ({
38 let src: &[u8] = [ZEROS, bytes][($idx < len) as usize];
39 let byte = src[$idx];
40 let _is_ascii = [(); 128][byte as usize];
41 buf[$idx] = byte
42 });
43 }
44 s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21);
45 SmolStr(Repr::Inline {
46 len: len as u8,
47 buf,
48 })
49 }
50
51 /// Constructs inline variant of `SmolStr`.
52 ///
53 /// Panics if `text.len() > 22`.
54 #[inline]
new_inline(text: &str) -> SmolStr55 pub const fn new_inline(text: &str) -> SmolStr {
56 let mut buf = [0; INLINE_CAP];
57 let mut i = 0;
58 while i < text.len() {
59 buf[i] = text.as_bytes()[i];
60 i += 1
61 }
62 SmolStr(Repr::Inline {
63 len: text.len() as u8,
64 buf,
65 })
66 }
67
new<T>(text: T) -> SmolStr where T: AsRef<str>,68 pub fn new<T>(text: T) -> SmolStr
69 where
70 T: AsRef<str>,
71 {
72 SmolStr(Repr::new(text))
73 }
74
75 #[inline(always)]
as_str(&self) -> &str76 pub fn as_str(&self) -> &str {
77 self.0.as_str()
78 }
79
80 #[inline(always)]
to_string(&self) -> String81 pub fn to_string(&self) -> String {
82 self.as_str().to_string()
83 }
84
85 #[inline(always)]
len(&self) -> usize86 pub fn len(&self) -> usize {
87 self.0.len()
88 }
89
90 #[inline(always)]
is_empty(&self) -> bool91 pub fn is_empty(&self) -> bool {
92 self.0.is_empty()
93 }
94
95 #[inline(always)]
is_heap_allocated(&self) -> bool96 pub fn is_heap_allocated(&self) -> bool {
97 match self.0 {
98 Repr::Heap(..) => true,
99 _ => false,
100 }
101 }
102
from_char_iter<I: iter::Iterator<Item = char>>(mut iter: I) -> SmolStr103 fn from_char_iter<I: iter::Iterator<Item = char>>(mut iter: I) -> SmolStr {
104 let (min_size, _) = iter.size_hint();
105 if min_size > INLINE_CAP {
106 let heap: String = iter.collect();
107 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
108 }
109 let mut len = 0;
110 let mut buf = [0u8; INLINE_CAP];
111 while let Some(ch) = iter.next() {
112 let size = ch.len_utf8();
113 if size + len > INLINE_CAP {
114 let (min_remaining, _) = iter.size_hint();
115 let mut heap = String::with_capacity(size + len + min_remaining);
116 heap.push_str(std::str::from_utf8(&buf[..len]).unwrap());
117 heap.push(ch);
118 heap.extend(iter);
119 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
120 }
121 ch.encode_utf8(&mut buf[len..]);
122 len += size;
123 }
124 SmolStr(Repr::Inline {
125 len: len as u8,
126 buf,
127 })
128 }
129 }
130
131 impl Default for SmolStr {
default() -> SmolStr132 fn default() -> SmolStr {
133 SmolStr::new("")
134 }
135 }
136
137 impl Deref for SmolStr {
138 type Target = str;
139
deref(&self) -> &str140 fn deref(&self) -> &str {
141 self.as_str()
142 }
143 }
144
145 impl PartialEq<SmolStr> for SmolStr {
eq(&self, other: &SmolStr) -> bool146 fn eq(&self, other: &SmolStr) -> bool {
147 self.as_str() == other.as_str()
148 }
149 }
150
151 impl Eq for SmolStr {}
152
153 impl PartialEq<str> for SmolStr {
eq(&self, other: &str) -> bool154 fn eq(&self, other: &str) -> bool {
155 self.as_str() == other
156 }
157 }
158
159 impl PartialEq<SmolStr> for str {
eq(&self, other: &SmolStr) -> bool160 fn eq(&self, other: &SmolStr) -> bool {
161 other == self
162 }
163 }
164
165 impl<'a> PartialEq<&'a str> for SmolStr {
eq(&self, other: &&'a str) -> bool166 fn eq(&self, other: &&'a str) -> bool {
167 self == *other
168 }
169 }
170
171 impl<'a> PartialEq<SmolStr> for &'a str {
eq(&self, other: &SmolStr) -> bool172 fn eq(&self, other: &SmolStr) -> bool {
173 *self == other
174 }
175 }
176
177 impl PartialEq<String> for SmolStr {
eq(&self, other: &String) -> bool178 fn eq(&self, other: &String) -> bool {
179 self.as_str() == other
180 }
181 }
182
183 impl PartialEq<SmolStr> for String {
eq(&self, other: &SmolStr) -> bool184 fn eq(&self, other: &SmolStr) -> bool {
185 other == self
186 }
187 }
188
189 impl<'a> PartialEq<&'a String> for SmolStr {
eq(&self, other: &&'a String) -> bool190 fn eq(&self, other: &&'a String) -> bool {
191 self == *other
192 }
193 }
194
195 impl<'a> PartialEq<SmolStr> for &'a String {
eq(&self, other: &SmolStr) -> bool196 fn eq(&self, other: &SmolStr) -> bool {
197 *self == other
198 }
199 }
200
201 impl Ord for SmolStr {
cmp(&self, other: &SmolStr) -> Ordering202 fn cmp(&self, other: &SmolStr) -> Ordering {
203 self.as_str().cmp(other.as_str())
204 }
205 }
206
207 impl PartialOrd for SmolStr {
partial_cmp(&self, other: &SmolStr) -> Option<Ordering>208 fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
209 Some(self.cmp(other))
210 }
211 }
212
213 impl hash::Hash for SmolStr {
hash<H: hash::Hasher>(&self, hasher: &mut H)214 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
215 self.as_str().hash(hasher)
216 }
217 }
218
219 impl fmt::Debug for SmolStr {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result220 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
221 fmt::Debug::fmt(self.as_str(), f)
222 }
223 }
224
225 impl fmt::Display for SmolStr {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result226 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
227 fmt::Display::fmt(self.as_str(), f)
228 }
229 }
230
231 impl iter::FromIterator<char> for SmolStr {
from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr232 fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
233 let iter = iter.into_iter();
234 Self::from_char_iter(iter)
235 }
236 }
237
build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr where T: AsRef<str>, String: iter::Extend<T>,238 fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
239 where
240 T: AsRef<str>,
241 String: iter::Extend<T>,
242 {
243 let mut len = 0;
244 let mut buf = [0u8; INLINE_CAP];
245 while let Some(slice) = iter.next() {
246 let slice = slice.as_ref();
247 let size = slice.len();
248 if size + len > INLINE_CAP {
249 let mut heap = String::with_capacity(size + len);
250 heap.push_str(std::str::from_utf8(&buf[..len]).unwrap());
251 heap.push_str(&slice);
252 heap.extend(iter);
253 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
254 }
255 (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes());
256 len += size;
257 }
258 SmolStr(Repr::Inline {
259 len: len as u8,
260 buf,
261 })
262 }
263
264 impl iter::FromIterator<String> for SmolStr {
from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr265 fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
266 build_from_str_iter(iter.into_iter())
267 }
268 }
269
270 impl<'a> iter::FromIterator<&'a String> for SmolStr {
from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr271 fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
272 SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
273 }
274 }
275
276 impl<'a> iter::FromIterator<&'a str> for SmolStr {
from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr277 fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
278 build_from_str_iter(iter.into_iter())
279 }
280 }
281
282 impl<T> From<T> for SmolStr
283 where
284 T: Into<String> + AsRef<str>,
285 {
from(text: T) -> Self286 fn from(text: T) -> Self {
287 Self::new(text)
288 }
289 }
290
291 impl From<SmolStr> for String {
from(text: SmolStr) -> Self292 fn from(text: SmolStr) -> Self {
293 text.as_str().into()
294 }
295 }
296
297 impl Borrow<str> for SmolStr {
borrow(&self) -> &str298 fn borrow(&self) -> &str {
299 self.as_str()
300 }
301 }
302
303 #[cfg(feature = "arbitrary")]
304 impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error>305 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
306 let s = <&str>::arbitrary(u)?;
307 Ok(SmolStr::new(s))
308 }
309 }
310
311 const INLINE_CAP: usize = 22;
312 const N_NEWLINES: usize = 32;
313 const N_SPACES: usize = 128;
314 const WS: &str =
315 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
316
317 #[derive(Clone, Debug)]
318 enum Repr {
319 Heap(Arc<str>),
320 Inline { len: u8, buf: [u8; INLINE_CAP] },
321 Substring { newlines: usize, spaces: usize },
322 }
323
324 impl Repr {
new<T>(text: T) -> Self where T: AsRef<str>,325 fn new<T>(text: T) -> Self
326 where
327 T: AsRef<str>,
328 {
329 {
330 let text = text.as_ref();
331
332 let len = text.len();
333 if len <= INLINE_CAP {
334 let mut buf = [0; INLINE_CAP];
335 buf[..len].copy_from_slice(text.as_bytes());
336 return Repr::Inline {
337 len: len as u8,
338 buf,
339 };
340 }
341
342 if len <= N_NEWLINES + N_SPACES {
343 let bytes = text.as_bytes();
344 let possible_newline_count = cmp::min(len, N_NEWLINES);
345 let newlines = bytes[..possible_newline_count]
346 .iter()
347 .take_while(|&&b| b == b'\n')
348 .count();
349 let possible_space_count = len - newlines;
350 if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ')
351 {
352 let spaces = possible_space_count;
353 return Repr::Substring { newlines, spaces };
354 }
355 }
356 }
357
358 Repr::Heap(text.as_ref().into())
359 }
360
361 #[inline(always)]
len(&self) -> usize362 fn len(&self) -> usize {
363 match self {
364 Repr::Heap(data) => data.len(),
365 Repr::Inline { len, .. } => *len as usize,
366 Repr::Substring { newlines, spaces } => *newlines + *spaces,
367 }
368 }
369
370 #[inline(always)]
is_empty(&self) -> bool371 fn is_empty(&self) -> bool {
372 match self {
373 Repr::Heap(data) => data.is_empty(),
374 Repr::Inline { len, .. } => *len == 0,
375 // A substring isn't created for an empty string.
376 Repr::Substring { .. } => false,
377 }
378 }
379
380 #[inline]
as_str(&self) -> &str381 fn as_str(&self) -> &str {
382 match self {
383 Repr::Heap(data) => &*data,
384 Repr::Inline { len, buf } => {
385 let len = *len as usize;
386 let buf = &buf[..len];
387 unsafe { ::std::str::from_utf8_unchecked(buf) }
388 }
389 Repr::Substring { newlines, spaces } => {
390 let newlines = *newlines;
391 let spaces = *spaces;
392 assert!(newlines <= N_NEWLINES && spaces <= N_SPACES);
393 &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]
394 }
395 }
396 }
397 }
398
399 #[cfg(feature = "serde")]
400 mod serde {
401 use super::SmolStr;
402 use ::serde::de::{Deserializer, Error, Unexpected, Visitor};
403 use std::fmt;
404
405 // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125
smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error> where D: Deserializer<'de>,406 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>
407 where
408 D: Deserializer<'de>,
409 {
410 struct SmolStrVisitor;
411
412 impl<'a> Visitor<'a> for SmolStrVisitor {
413 type Value = SmolStr;
414
415 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
416 formatter.write_str("a string")
417 }
418
419 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
420 where
421 E: Error,
422 {
423 Ok(SmolStr::from(v))
424 }
425
426 fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>
427 where
428 E: Error,
429 {
430 Ok(SmolStr::from(v))
431 }
432
433 fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
434 where
435 E: Error,
436 {
437 Ok(SmolStr::from(v))
438 }
439
440 fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
441 where
442 E: Error,
443 {
444 match std::str::from_utf8(v) {
445 Ok(s) => Ok(SmolStr::from(s)),
446 Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
447 }
448 }
449
450 fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>
451 where
452 E: Error,
453 {
454 match std::str::from_utf8(v) {
455 Ok(s) => Ok(SmolStr::from(s)),
456 Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
457 }
458 }
459
460 fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
461 where
462 E: Error,
463 {
464 match String::from_utf8(v) {
465 Ok(s) => Ok(SmolStr::from(s)),
466 Err(e) => Err(Error::invalid_value(
467 Unexpected::Bytes(&e.into_bytes()),
468 &self,
469 )),
470 }
471 }
472 }
473
474 deserializer.deserialize_str(SmolStrVisitor)
475 }
476
477 impl serde::Serialize for SmolStr {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,478 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
479 where
480 S: serde::Serializer,
481 {
482 self.as_str().serialize(serializer)
483 }
484 }
485
486 impl<'de> serde::Deserialize<'de> for SmolStr {
deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,487 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
488 where
489 D: serde::Deserializer<'de>,
490 {
491 smol_str(deserializer)
492 }
493 }
494 }
495