1 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
4 // option. This file may not be copied, modified, or distributed
5 // except according to those terms.
6 
7 use std::borrow::Borrow;
8 use std::cell::{Cell, UnsafeCell};
9 use std::cmp::Ordering;
10 use std::default::Default;
11 use std::fmt as strfmt;
12 use std::iter::FromIterator;
13 use std::marker::PhantomData;
14 use std::num::NonZeroUsize;
15 use std::ops::{Deref, DerefMut};
16 use std::sync::atomic::Ordering as AtomicOrdering;
17 use std::sync::atomic::{self, AtomicUsize};
18 use std::{hash, io, mem, ptr, str, u32};
19 
20 #[cfg(feature = "encoding")]
21 use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef};
22 
23 use buf32::{self, Buf32};
24 use fmt::imp::Fixup;
25 use fmt::{self, Slice};
26 use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut};
27 use OFLOW;
28 
29 const MAX_INLINE_LEN: usize = 8;
30 const MAX_INLINE_TAG: usize = 0xF;
31 const EMPTY_TAG: usize = 0xF;
32 
33 #[inline(always)]
inline_tag(len: u32) -> NonZeroUsize34 fn inline_tag(len: u32) -> NonZeroUsize {
35     debug_assert!(len <= MAX_INLINE_LEN as u32);
36     unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) }
37 }
38 
39 /// The multithreadedness of a tendril.
40 ///
41 /// Exactly two types implement this trait:
42 ///
43 /// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works
44 ///   across threads; this is akin to `Arc`.
45 ///
46 /// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither
47 ///   `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`.
48 ///
49 /// The layout of this trait is also mandated to be that of a `usize`,
50 /// for it is used for reference counting.
51 pub unsafe trait Atomicity: 'static {
52     #[doc(hidden)]
new() -> Self53     fn new() -> Self;
54 
55     #[doc(hidden)]
increment(&self) -> usize56     fn increment(&self) -> usize;
57 
58     #[doc(hidden)]
decrement(&self) -> usize59     fn decrement(&self) -> usize;
60 
61     #[doc(hidden)]
fence_acquire()62     fn fence_acquire();
63 }
64 
65 /// A marker of a non-atomic tendril.
66 ///
67 /// This is the default for the second type parameter of a `Tendril`
68 /// and so doesn't typically need to be written.
69 ///
70 /// This is akin to using `Rc` for reference counting.
71 #[repr(C)]
72 pub struct NonAtomic(Cell<PackedUsize>);
73 
74 #[repr(C, packed)]
75 #[derive(Copy, Clone)]
76 struct PackedUsize(usize);
77 
78 unsafe impl Atomicity for NonAtomic {
79     #[inline]
new() -> Self80     fn new() -> Self {
81         NonAtomic(Cell::new(PackedUsize(1)))
82     }
83 
84     #[inline]
increment(&self) -> usize85     fn increment(&self) -> usize {
86         let value = self.0.get().0;
87         self.0.set(PackedUsize(value.checked_add(1).expect(OFLOW)));
88         value
89     }
90 
91     #[inline]
decrement(&self) -> usize92     fn decrement(&self) -> usize {
93         let value = self.0.get().0;
94         self.0.set(PackedUsize(value - 1));
95         value
96     }
97 
98     #[inline]
fence_acquire()99     fn fence_acquire() {}
100 }
101 
102 /// A marker of an atomic (and hence concurrent) tendril.
103 ///
104 /// This is used as the second, optional type parameter of a `Tendril`;
105 /// `Tendril<F, Atomic>` thus implements`Send`.
106 ///
107 /// This is akin to using `Arc` for reference counting.
108 pub struct Atomic(AtomicUsize);
109 
110 unsafe impl Atomicity for Atomic {
111     #[inline]
new() -> Self112     fn new() -> Self {
113         Atomic(AtomicUsize::new(1))
114     }
115 
116     #[inline]
increment(&self) -> usize117     fn increment(&self) -> usize {
118         // Relaxed is OK because we have a reference already.
119         self.0.fetch_add(1, AtomicOrdering::Relaxed)
120     }
121 
122     #[inline]
decrement(&self) -> usize123     fn decrement(&self) -> usize {
124         self.0.fetch_sub(1, AtomicOrdering::Release)
125     }
126 
127     #[inline]
fence_acquire()128     fn fence_acquire() {
129         atomic::fence(AtomicOrdering::Acquire);
130     }
131 }
132 
133 struct Header<A: Atomicity> {
134     refcount: A,
135     cap: u32,
136 }
137 
138 impl<A> Header<A>
139 where
140     A: Atomicity,
141 {
142     #[inline(always)]
new() -> Header<A>143     unsafe fn new() -> Header<A> {
144         Header {
145             refcount: A::new(),
146             cap: 0,
147         }
148     }
149 }
150 
151 /// Errors that can occur when slicing a `Tendril`.
152 #[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)]
153 pub enum SubtendrilError {
154     OutOfBounds,
155     ValidationFailed,
156 }
157 
158 /// Compact string type for zero-copy parsing.
159 ///
160 /// `Tendril`s have the semantics of owned strings, but are sometimes views
161 /// into shared buffers. When you mutate a `Tendril`, an owned copy is made
162 /// if necessary. Further mutations occur in-place until the string becomes
163 /// shared, e.g. with `clone()` or `subtendril()`.
164 ///
165 /// Buffer sharing is accomplished through thread-local (non-atomic) reference
166 /// counting, which has very low overhead. The Rust type system will prevent
167 /// you at compile time from sending a `Tendril` between threads. We plan to
168 /// relax this restriction in the future; see `README.md`.
169 ///
170 /// Whereas `String` allocates in the heap for any non-empty string, `Tendril`
171 /// can store small strings (up to 8 bytes) in-line, without a heap allocation.
172 /// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes
173 /// versus 24.
174 ///
175 /// The type parameter `F` specifies the format of the tendril, for example
176 /// UTF-8 text or uninterpreted bytes. The parameter will be instantiated
177 /// with one of the marker types from `tendril::fmt`. See the `StrTendril`
178 /// and `ByteTendril` type aliases for two examples.
179 ///
180 /// The type parameter `A` indicates the atomicity of the tendril; it is by
181 /// default `NonAtomic`, but can be specified as `Atomic` to get a tendril
182 /// which implements `Send` (viz. a thread-safe tendril).
183 ///
184 /// The maximum length of a `Tendril` is 4 GB. The library will panic if
185 /// you attempt to go over the limit.
186 #[repr(C)]
187 pub struct Tendril<F, A = NonAtomic>
188 where
189     F: fmt::Format,
190     A: Atomicity,
191 {
192     ptr: Cell<NonZeroUsize>,
193     buf: UnsafeCell<Buffer>,
194     marker: PhantomData<*mut F>,
195     refcount_marker: PhantomData<A>,
196 }
197 
198 #[repr(C)]
199 union Buffer {
200     heap: Heap,
201     inline: [u8; 8],
202 }
203 
204 #[derive(Copy, Clone)]
205 #[repr(C)]
206 struct Heap {
207     len: u32,
208     aux: u32,
209 }
210 
211 unsafe impl<F, A> Send for Tendril<F, A>
212 where
213     F: fmt::Format,
214     A: Atomicity + Sync,
215 {
216 }
217 
218 /// `Tendril` for storing native Rust strings.
219 pub type StrTendril = Tendril<fmt::UTF8>;
220 
221 /// `Tendril` for storing binary data.
222 pub type ByteTendril = Tendril<fmt::Bytes>;
223 
224 impl<F, A> Clone for Tendril<F, A>
225 where
226     F: fmt::Format,
227     A: Atomicity,
228 {
229     #[inline]
clone(&self) -> Tendril<F, A>230     fn clone(&self) -> Tendril<F, A> {
231         unsafe {
232             if self.ptr.get().get() > MAX_INLINE_TAG {
233                 self.make_buf_shared();
234                 self.incref();
235             }
236 
237             ptr::read(self)
238         }
239     }
240 }
241 
242 impl<F, A> Drop for Tendril<F, A>
243 where
244     F: fmt::Format,
245     A: Atomicity,
246 {
247     #[inline]
drop(&mut self)248     fn drop(&mut self) {
249         unsafe {
250             let p = self.ptr.get().get();
251             if p <= MAX_INLINE_TAG {
252                 return;
253             }
254 
255             let (buf, shared, _) = self.assume_buf();
256             if shared {
257                 let header = self.header();
258                 if (*header).refcount.decrement() == 1 {
259                     A::fence_acquire();
260                     buf.destroy();
261                 }
262             } else {
263                 buf.destroy();
264             }
265         }
266     }
267 }
268 
269 macro_rules! from_iter_method {
270     ($ty:ty) => {
271         #[inline]
272         fn from_iter<I>(iterable: I) -> Self
273         where
274             I: IntoIterator<Item = $ty>,
275         {
276             let mut output = Self::new();
277             output.extend(iterable);
278             output
279         }
280     };
281 }
282 
283 impl<A> Extend<char> for Tendril<fmt::UTF8, A>
284 where
285     A: Atomicity,
286 {
287     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = char>,288     fn extend<I>(&mut self, iterable: I)
289     where
290         I: IntoIterator<Item = char>,
291     {
292         let iterator = iterable.into_iter();
293         self.force_reserve(iterator.size_hint().0 as u32);
294         for c in iterator {
295             self.push_char(c);
296         }
297     }
298 }
299 
300 impl<A> FromIterator<char> for Tendril<fmt::UTF8, A>
301 where
302     A: Atomicity,
303 {
304     from_iter_method!(char);
305 }
306 
307 impl<A> Extend<u8> for Tendril<fmt::Bytes, A>
308 where
309     A: Atomicity,
310 {
311     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = u8>,312     fn extend<I>(&mut self, iterable: I)
313     where
314         I: IntoIterator<Item = u8>,
315     {
316         let iterator = iterable.into_iter();
317         self.force_reserve(iterator.size_hint().0 as u32);
318         for b in iterator {
319             self.push_slice(&[b]);
320         }
321     }
322 }
323 
324 impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A>
325 where
326     A: Atomicity,
327 {
328     from_iter_method!(u8);
329 }
330 
331 impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A>
332 where
333     A: Atomicity,
334 {
335     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a u8>,336     fn extend<I>(&mut self, iterable: I)
337     where
338         I: IntoIterator<Item = &'a u8>,
339     {
340         let iterator = iterable.into_iter();
341         self.force_reserve(iterator.size_hint().0 as u32);
342         for &b in iterator {
343             self.push_slice(&[b]);
344         }
345     }
346 }
347 
348 impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A>
349 where
350     A: Atomicity,
351 {
352     from_iter_method!(&'a u8);
353 }
354 
355 impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A>
356 where
357     A: Atomicity,
358 {
359     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a str>,360     fn extend<I>(&mut self, iterable: I)
361     where
362         I: IntoIterator<Item = &'a str>,
363     {
364         for s in iterable {
365             self.push_slice(s);
366         }
367     }
368 }
369 
370 impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A>
371 where
372     A: Atomicity,
373 {
374     from_iter_method!(&'a str);
375 }
376 
377 impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A>
378 where
379     A: Atomicity,
380 {
381     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a [u8]>,382     fn extend<I>(&mut self, iterable: I)
383     where
384         I: IntoIterator<Item = &'a [u8]>,
385     {
386         for s in iterable {
387             self.push_slice(s);
388         }
389     }
390 }
391 
392 impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A>
393 where
394     A: Atomicity,
395 {
396     from_iter_method!(&'a [u8]);
397 }
398 
399 impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A>
400 where
401     F: fmt::Format + 'a,
402     A: Atomicity,
403 {
404     #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a Tendril<F, A>>,405     fn extend<I>(&mut self, iterable: I)
406     where
407         I: IntoIterator<Item = &'a Tendril<F, A>>,
408     {
409         for t in iterable {
410             self.push_tendril(t);
411         }
412     }
413 }
414 
415 impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A>
416 where
417     F: fmt::Format + 'a,
418     A: Atomicity,
419 {
420     from_iter_method!(&'a Tendril<F, A>);
421 }
422 
423 impl<F, A> Deref for Tendril<F, A>
424 where
425     F: fmt::SliceFormat,
426     A: Atomicity,
427 {
428     type Target = F::Slice;
429 
430     #[inline]
deref(&self) -> &F::Slice431     fn deref(&self) -> &F::Slice {
432         unsafe { F::Slice::from_bytes(self.as_byte_slice()) }
433     }
434 }
435 
436 impl<F, A> DerefMut for Tendril<F, A>
437 where
438     F: fmt::SliceFormat,
439     A: Atomicity,
440 {
441     #[inline]
deref_mut(&mut self) -> &mut F::Slice442     fn deref_mut(&mut self) -> &mut F::Slice {
443         unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) }
444     }
445 }
446 
447 impl<F, A> Borrow<[u8]> for Tendril<F, A>
448 where
449     F: fmt::SliceFormat,
450     A: Atomicity,
451 {
borrow(&self) -> &[u8]452     fn borrow(&self) -> &[u8] {
453         self.as_byte_slice()
454     }
455 }
456 
457 // Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently,
458 // and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick.
459 // https://github.com/rust-lang/rust/issues/27108
460 
461 impl<F, A> PartialEq for Tendril<F, A>
462 where
463     F: fmt::Format,
464     A: Atomicity,
465 {
466     #[inline]
eq(&self, other: &Self) -> bool467     fn eq(&self, other: &Self) -> bool {
468         self.as_byte_slice() == other.as_byte_slice()
469     }
470 
471     #[inline]
ne(&self, other: &Self) -> bool472     fn ne(&self, other: &Self) -> bool {
473         self.as_byte_slice() != other.as_byte_slice()
474     }
475 }
476 
477 impl<F, A> Eq for Tendril<F, A>
478 where
479     F: fmt::Format,
480     A: Atomicity,
481 {
482 }
483 
484 impl<F, A> PartialOrd for Tendril<F, A>
485 where
486     F: fmt::SliceFormat,
487     <F as fmt::SliceFormat>::Slice: PartialOrd,
488     A: Atomicity,
489 {
490     #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>491     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
492         PartialOrd::partial_cmp(&**self, &**other)
493     }
494 }
495 
496 impl<F, A> Ord for Tendril<F, A>
497 where
498     F: fmt::SliceFormat,
499     <F as fmt::SliceFormat>::Slice: Ord,
500     A: Atomicity,
501 {
502     #[inline]
cmp(&self, other: &Self) -> Ordering503     fn cmp(&self, other: &Self) -> Ordering {
504         Ord::cmp(&**self, &**other)
505     }
506 }
507 
508 impl<F, A> Default for Tendril<F, A>
509 where
510     F: fmt::Format,
511     A: Atomicity,
512 {
513     #[inline(always)]
default() -> Tendril<F, A>514     fn default() -> Tendril<F, A> {
515         Tendril::new()
516     }
517 }
518 
519 impl<F, A> strfmt::Debug for Tendril<F, A>
520 where
521     F: fmt::SliceFormat + Default + strfmt::Debug,
522     <F as fmt::SliceFormat>::Slice: strfmt::Debug,
523     A: Atomicity,
524 {
525     #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result526     fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
527         let kind = match self.ptr.get().get() {
528             p if p <= MAX_INLINE_TAG => "inline",
529             p if p & 1 == 1 => "shared",
530             _ => "owned",
531         };
532 
533         write!(f, "Tendril<{:?}>({}: ", <F as Default>::default(), kind)?;
534         <<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f)?;
535         write!(f, ")")
536     }
537 }
538 
539 impl<F, A> hash::Hash for Tendril<F, A>
540 where
541     F: fmt::Format,
542     A: Atomicity,
543 {
544     #[inline]
hash<H: hash::Hasher>(&self, hasher: &mut H)545     fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
546         self.as_byte_slice().hash(hasher)
547     }
548 }
549 
550 impl<F, A> Tendril<F, A>
551 where
552     F: fmt::Format,
553     A: Atomicity,
554 {
555     /// Create a new, empty `Tendril` in any format.
556     #[inline(always)]
new() -> Tendril<F, A>557     pub fn new() -> Tendril<F, A> {
558         unsafe { Tendril::inline(&[]) }
559     }
560 
561     /// Create a new, empty `Tendril` with a specified capacity.
562     #[inline]
with_capacity(capacity: u32) -> Tendril<F, A>563     pub fn with_capacity(capacity: u32) -> Tendril<F, A> {
564         let mut t: Tendril<F, A> = Tendril::new();
565         if capacity > MAX_INLINE_LEN as u32 {
566             unsafe {
567                 t.make_owned_with_capacity(capacity);
568             }
569         }
570         t
571     }
572 
573     /// Reserve space for additional bytes.
574     ///
575     /// This is only a suggestion. There are cases where `Tendril` will
576     /// decline to allocate until the buffer is actually modified.
577     #[inline]
reserve(&mut self, additional: u32)578     pub fn reserve(&mut self, additional: u32) {
579         if !self.is_shared() {
580             // Don't grow a shared tendril because we'd have to copy
581             // right away.
582             self.force_reserve(additional);
583         }
584     }
585 
586     /// Reserve space for additional bytes, even for shared buffers.
587     #[inline]
force_reserve(&mut self, additional: u32)588     fn force_reserve(&mut self, additional: u32) {
589         let new_len = self.len32().checked_add(additional).expect(OFLOW);
590         if new_len > MAX_INLINE_LEN as u32 {
591             unsafe {
592                 self.make_owned_with_capacity(new_len);
593             }
594         }
595     }
596 
597     /// Get the length of the `Tendril`.
598     ///
599     /// This is named not to conflict with `len()` on the underlying
600     /// slice, if any.
601     #[inline(always)]
len32(&self) -> u32602     pub fn len32(&self) -> u32 {
603         match self.ptr.get().get() {
604             EMPTY_TAG => 0,
605             n if n <= MAX_INLINE_LEN => n as u32,
606             _ => unsafe { self.raw_len() },
607         }
608     }
609 
610     /// Is the backing buffer shared?
611     #[inline]
is_shared(&self) -> bool612     pub fn is_shared(&self) -> bool {
613         let n = self.ptr.get().get();
614 
615         (n > MAX_INLINE_TAG) && ((n & 1) == 1)
616     }
617 
618     /// Is the backing buffer shared with this other `Tendril`?
619     #[inline]
is_shared_with(&self, other: &Tendril<F, A>) -> bool620     pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool {
621         let n = self.ptr.get().get();
622 
623         (n > MAX_INLINE_TAG) && (n == other.ptr.get().get())
624     }
625 
626     /// Truncate to length 0 without discarding any owned storage.
627     #[inline]
clear(&mut self)628     pub fn clear(&mut self) {
629         if self.ptr.get().get() <= MAX_INLINE_TAG {
630             self.ptr
631                 .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) });
632         } else {
633             let (_, shared, _) = unsafe { self.assume_buf() };
634             if shared {
635                 // No need to keep a reference alive for a 0-size slice.
636                 *self = Tendril::new();
637             } else {
638                 unsafe { self.set_len(0) };
639             }
640         }
641     }
642 
643     /// Build a `Tendril` by copying a byte slice, if it conforms to the format.
644     #[inline]
try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()>645     pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> {
646         match F::validate(x) {
647             true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }),
648             false => Err(()),
649         }
650     }
651 
652     /// View as uninterpreted bytes.
653     #[inline(always)]
as_bytes(&self) -> &Tendril<fmt::Bytes, A>654     pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> {
655         unsafe { mem::transmute(self) }
656     }
657 
658     /// Convert into uninterpreted bytes.
659     #[inline(always)]
into_bytes(self) -> Tendril<fmt::Bytes, A>660     pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> {
661         unsafe { mem::transmute(self) }
662     }
663 
664     /// Convert `self` into a type which is `Send`.
665     ///
666     /// If the tendril is owned or inline, this is free,
667     /// but if it's shared this will entail a copy of the contents.
668     #[inline]
into_send(mut self) -> SendTendril<F>669     pub fn into_send(mut self) -> SendTendril<F> {
670         self.make_owned();
671         SendTendril {
672             // This changes the header.refcount from A to NonAtomic, but that's
673             // OK because we have defined the format of A as a usize.
674             tendril: unsafe { mem::transmute(self) },
675         }
676     }
677 
678     /// View as a superset format, for free.
679     #[inline(always)]
as_superset<Super>(&self) -> &Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,680     pub fn as_superset<Super>(&self) -> &Tendril<Super, A>
681     where
682         F: fmt::SubsetOf<Super>,
683         Super: fmt::Format,
684     {
685         unsafe { mem::transmute(self) }
686     }
687 
688     /// Convert into a superset format, for free.
689     #[inline(always)]
into_superset<Super>(self) -> Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,690     pub fn into_superset<Super>(self) -> Tendril<Super, A>
691     where
692         F: fmt::SubsetOf<Super>,
693         Super: fmt::Format,
694     {
695         unsafe { mem::transmute(self) }
696     }
697 
698     /// View as a subset format, if the `Tendril` conforms to that subset.
699     #[inline]
try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()> where Sub: fmt::SubsetOf<F>,700     pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()>
701     where
702         Sub: fmt::SubsetOf<F>,
703     {
704         match Sub::revalidate_subset(self.as_byte_slice()) {
705             true => Ok(unsafe { mem::transmute(self) }),
706             false => Err(()),
707         }
708     }
709 
710     /// Convert into a subset format, if the `Tendril` conforms to that subset.
711     #[inline]
try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self> where Sub: fmt::SubsetOf<F>,712     pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self>
713     where
714         Sub: fmt::SubsetOf<F>,
715     {
716         match Sub::revalidate_subset(self.as_byte_slice()) {
717             true => Ok(unsafe { mem::transmute(self) }),
718             false => Err(self),
719         }
720     }
721 
722     /// View as another format, if the bytes of the `Tendril` are valid for
723     /// that format.
724     #[inline]
try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()> where Other: fmt::Format,725     pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()>
726     where
727         Other: fmt::Format,
728     {
729         match Other::validate(self.as_byte_slice()) {
730             true => Ok(unsafe { mem::transmute(self) }),
731             false => Err(()),
732         }
733     }
734 
735     /// Convert into another format, if the `Tendril` conforms to that format.
736     ///
737     /// This only re-validates the existing bytes under the new format. It
738     /// will *not* change the byte content of the tendril!
739     ///
740     /// See the `encode` and `decode` methods for character encoding conversion.
741     #[inline]
try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self> where Other: fmt::Format,742     pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self>
743     where
744         Other: fmt::Format,
745     {
746         match Other::validate(self.as_byte_slice()) {
747             true => Ok(unsafe { mem::transmute(self) }),
748             false => Err(self),
749         }
750     }
751 
752     /// Push some bytes onto the end of the `Tendril`, if they conform to the
753     /// format.
754     #[inline]
try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()>755     pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> {
756         match F::validate(buf) {
757             true => unsafe {
758                 self.push_bytes_without_validating(buf);
759                 Ok(())
760             },
761             false => Err(()),
762         }
763     }
764 
765     /// Push another `Tendril` onto the end of this one.
766     #[inline]
push_tendril(&mut self, other: &Tendril<F, A>)767     pub fn push_tendril(&mut self, other: &Tendril<F, A>) {
768         let new_len = self.len32().checked_add(other.len32()).expect(OFLOW);
769 
770         unsafe {
771             if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) {
772                 let (self_buf, self_shared, _) = self.assume_buf();
773                 let (other_buf, other_shared, _) = other.assume_buf();
774 
775                 if self_shared
776                     && other_shared
777                     && (self_buf.data_ptr() == other_buf.data_ptr())
778                     && other.aux() == self.aux() + self.raw_len()
779                 {
780                     self.set_len(new_len);
781                     return;
782                 }
783             }
784 
785             self.push_bytes_without_validating(other.as_byte_slice())
786         }
787     }
788 
789     /// Attempt to slice this `Tendril` as a new `Tendril`.
790     ///
791     /// This will share the buffer when possible. Mutating a shared buffer
792     /// will copy the contents.
793     ///
794     /// The offset and length are in bytes. The function will return
795     /// `Err` if these are out of bounds, or if the resulting slice
796     /// does not conform to the format.
797     #[inline]
try_subtendril( &self, offset: u32, length: u32, ) -> Result<Tendril<F, A>, SubtendrilError>798     pub fn try_subtendril(
799         &self,
800         offset: u32,
801         length: u32,
802     ) -> Result<Tendril<F, A>, SubtendrilError> {
803         let self_len = self.len32();
804         if offset > self_len || length > (self_len - offset) {
805             return Err(SubtendrilError::OutOfBounds);
806         }
807 
808         unsafe {
809             let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize);
810             if !F::validate_subseq(byte_slice) {
811                 return Err(SubtendrilError::ValidationFailed);
812             }
813 
814             Ok(self.unsafe_subtendril(offset, length))
815         }
816     }
817 
818     /// Slice this `Tendril` as a new `Tendril`.
819     ///
820     /// Panics on bounds or validity check failure.
821     #[inline]
subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>822     pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
823         self.try_subtendril(offset, length).unwrap()
824     }
825 
826     /// Try to drop `n` bytes from the front.
827     ///
828     /// Returns `Err` if the bytes are not available, or the suffix fails
829     /// validation.
830     #[inline]
try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError>831     pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> {
832         if n == 0 {
833             return Ok(());
834         }
835         let old_len = self.len32();
836         if n > old_len {
837             return Err(SubtendrilError::OutOfBounds);
838         }
839         let new_len = old_len - n;
840 
841         unsafe {
842             if !F::validate_suffix(unsafe_slice(
843                 self.as_byte_slice(),
844                 n as usize,
845                 new_len as usize,
846             )) {
847                 return Err(SubtendrilError::ValidationFailed);
848             }
849 
850             self.unsafe_pop_front(n);
851             Ok(())
852         }
853     }
854 
855     /// Drop `n` bytes from the front.
856     ///
857     /// Panics if the bytes are not available, or the suffix fails
858     /// validation.
859     #[inline]
pop_front(&mut self, n: u32)860     pub fn pop_front(&mut self, n: u32) {
861         self.try_pop_front(n).unwrap()
862     }
863 
864     /// Drop `n` bytes from the back.
865     ///
866     /// Returns `Err` if the bytes are not available, or the prefix fails
867     /// validation.
868     #[inline]
try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError>869     pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> {
870         if n == 0 {
871             return Ok(());
872         }
873         let old_len = self.len32();
874         if n > old_len {
875             return Err(SubtendrilError::OutOfBounds);
876         }
877         let new_len = old_len - n;
878 
879         unsafe {
880             if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) {
881                 return Err(SubtendrilError::ValidationFailed);
882             }
883 
884             self.unsafe_pop_back(n);
885             Ok(())
886         }
887     }
888 
889     /// Drop `n` bytes from the back.
890     ///
891     /// Panics if the bytes are not available, or the prefix fails
892     /// validation.
893     #[inline]
pop_back(&mut self, n: u32)894     pub fn pop_back(&mut self, n: u32) {
895         self.try_pop_back(n).unwrap()
896     }
897 
898     /// View as another format, without validating.
899     #[inline(always)]
reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A> where Other: fmt::Format,900     pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A>
901     where
902         Other: fmt::Format,
903     {
904         mem::transmute(self)
905     }
906 
907     /// Convert into another format, without validating.
908     #[inline(always)]
reinterpret_without_validating<Other>(self) -> Tendril<Other, A> where Other: fmt::Format,909     pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A>
910     where
911         Other: fmt::Format,
912     {
913         mem::transmute(self)
914     }
915 
916     /// Build a `Tendril` by copying a byte slice, without validating.
917     #[inline]
from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A>918     pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> {
919         assert!(x.len() <= buf32::MAX_LEN);
920         if x.len() <= MAX_INLINE_LEN {
921             Tendril::inline(x)
922         } else {
923             Tendril::owned_copy(x)
924         }
925     }
926 
927     /// Push some bytes onto the end of the `Tendril`, without validating.
928     #[inline]
push_bytes_without_validating(&mut self, buf: &[u8])929     pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) {
930         assert!(buf.len() <= buf32::MAX_LEN);
931 
932         let Fixup {
933             drop_left,
934             drop_right,
935             insert_len,
936             insert_bytes,
937         } = F::fixup(self.as_byte_slice(), buf);
938 
939         // FIXME: think more about overflow
940         let adj_len = self.len32() + insert_len - drop_left;
941 
942         let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right;
943 
944         let drop_left = drop_left as usize;
945         let drop_right = drop_right as usize;
946 
947         if new_len <= MAX_INLINE_LEN as u32 {
948             let mut tmp = [0_u8; MAX_INLINE_LEN];
949             {
950                 let old = self.as_byte_slice();
951                 let mut dest = tmp.as_mut_ptr();
952                 copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left));
953                 copy_and_advance(
954                     &mut dest,
955                     unsafe_slice(&insert_bytes, 0, insert_len as usize),
956                 );
957                 copy_and_advance(
958                     &mut dest,
959                     unsafe_slice(buf, drop_right, buf.len() - drop_right),
960                 );
961             }
962             *self = Tendril::inline(&tmp[..new_len as usize]);
963         } else {
964             self.make_owned_with_capacity(new_len);
965             let (owned, _, _) = self.assume_buf();
966             let mut dest = owned
967                 .data_ptr()
968                 .offset((owned.len as usize - drop_left) as isize);
969             copy_and_advance(
970                 &mut dest,
971                 unsafe_slice(&insert_bytes, 0, insert_len as usize),
972             );
973             copy_and_advance(
974                 &mut dest,
975                 unsafe_slice(buf, drop_right, buf.len() - drop_right),
976             );
977             self.set_len(new_len);
978         }
979     }
980 
981     /// Slice this `Tendril` as a new `Tendril`.
982     ///
983     /// Does not check validity or bounds!
984     #[inline]
unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>985     pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
986         if length <= MAX_INLINE_LEN as u32 {
987             Tendril::inline(unsafe_slice(
988                 self.as_byte_slice(),
989                 offset as usize,
990                 length as usize,
991             ))
992         } else {
993             self.make_buf_shared();
994             self.incref();
995             let (buf, _, _) = self.assume_buf();
996             Tendril::shared(buf, self.aux() + offset, length)
997         }
998     }
999 
1000     /// Drop `n` bytes from the front.
1001     ///
1002     /// Does not check validity or bounds!
1003     #[inline]
unsafe_pop_front(&mut self, n: u32)1004     pub unsafe fn unsafe_pop_front(&mut self, n: u32) {
1005         let new_len = self.len32() - n;
1006         if new_len <= MAX_INLINE_LEN as u32 {
1007             *self = Tendril::inline(unsafe_slice(
1008                 self.as_byte_slice(),
1009                 n as usize,
1010                 new_len as usize,
1011             ));
1012         } else {
1013             self.make_buf_shared();
1014             self.set_aux(self.aux() + n);
1015             let len = self.raw_len();
1016             self.set_len(len - n);
1017         }
1018     }
1019 
1020     /// Drop `n` bytes from the back.
1021     ///
1022     /// Does not check validity or bounds!
1023     #[inline]
unsafe_pop_back(&mut self, n: u32)1024     pub unsafe fn unsafe_pop_back(&mut self, n: u32) {
1025         let new_len = self.len32() - n;
1026         if new_len <= MAX_INLINE_LEN as u32 {
1027             *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize));
1028         } else {
1029             self.make_buf_shared();
1030             let len = self.raw_len();
1031             self.set_len(len - n);
1032         }
1033     }
1034 
1035     #[inline]
incref(&self)1036     unsafe fn incref(&self) {
1037         (*self.header()).refcount.increment();
1038     }
1039 
1040     #[inline]
make_buf_shared(&self)1041     unsafe fn make_buf_shared(&self) {
1042         let p = self.ptr.get().get();
1043         if p & 1 == 0 {
1044             let header = p as *mut Header<A>;
1045             (*header).cap = self.aux();
1046 
1047             self.ptr.set(NonZeroUsize::new_unchecked(p | 1));
1048             self.set_aux(0);
1049         }
1050     }
1051 
1052     // This is not public as it is of no practical value to users.
1053     // By and large they shouldn't need to worry about the distinction at all,
1054     // and going out of your way to make it owned is pointless.
1055     #[inline]
make_owned(&mut self)1056     fn make_owned(&mut self) {
1057         unsafe {
1058             let ptr = self.ptr.get().get();
1059             if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 {
1060                 *self = Tendril::owned_copy(self.as_byte_slice());
1061             }
1062         }
1063     }
1064 
1065     #[inline]
make_owned_with_capacity(&mut self, cap: u32)1066     unsafe fn make_owned_with_capacity(&mut self, cap: u32) {
1067         self.make_owned();
1068         let mut buf = self.assume_buf().0;
1069         buf.grow(cap);
1070         self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize));
1071         self.set_aux(buf.cap);
1072     }
1073 
1074     #[inline(always)]
header(&self) -> *mut Header<A>1075     unsafe fn header(&self) -> *mut Header<A> {
1076         (self.ptr.get().get() & !1) as *mut Header<A>
1077     }
1078 
1079     #[inline]
assume_buf(&self) -> (Buf32<Header<A>>, bool, u32)1080     unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) {
1081         let ptr = self.ptr.get().get();
1082         let header = self.header();
1083         let shared = (ptr & 1) == 1;
1084         let (cap, offset) = match shared {
1085             true => ((*header).cap, self.aux()),
1086             false => (self.aux(), 0),
1087         };
1088 
1089         (
1090             Buf32 {
1091                 ptr: header,
1092                 len: offset + self.len32(),
1093                 cap: cap,
1094             },
1095             shared,
1096             offset,
1097         )
1098     }
1099 
1100     #[inline]
inline(x: &[u8]) -> Tendril<F, A>1101     unsafe fn inline(x: &[u8]) -> Tendril<F, A> {
1102         let len = x.len();
1103         let t = Tendril {
1104             ptr: Cell::new(inline_tag(len as u32)),
1105             buf: UnsafeCell::new(Buffer { inline: [0; 8] }),
1106             marker: PhantomData,
1107             refcount_marker: PhantomData,
1108         };
1109         ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len);
1110         t
1111     }
1112 
1113     #[inline]
owned(x: Buf32<Header<A>>) -> Tendril<F, A>1114     unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> {
1115         Tendril {
1116             ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)),
1117             buf: UnsafeCell::new(Buffer {
1118                 heap: Heap {
1119                     len: x.len,
1120                     aux: x.cap,
1121                 },
1122             }),
1123             marker: PhantomData,
1124             refcount_marker: PhantomData,
1125         }
1126     }
1127 
1128     #[inline]
owned_copy(x: &[u8]) -> Tendril<F, A>1129     unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> {
1130         let len32 = x.len() as u32;
1131         let mut b = Buf32::with_capacity(len32, Header::new());
1132         ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len());
1133         b.len = len32;
1134         Tendril::owned(b)
1135     }
1136 
1137     #[inline]
shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A>1138     unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> {
1139         Tendril {
1140             ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)),
1141             buf: UnsafeCell::new(Buffer {
1142                 heap: Heap { len, aux: off },
1143             }),
1144             marker: PhantomData,
1145             refcount_marker: PhantomData,
1146         }
1147     }
1148 
1149     #[inline]
as_byte_slice<'a>(&'a self) -> &'a [u8]1150     fn as_byte_slice<'a>(&'a self) -> &'a [u8] {
1151         unsafe {
1152             match self.ptr.get().get() {
1153                 EMPTY_TAG => &[],
1154                 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n),
1155                 _ => {
1156                     let (buf, _, offset) = self.assume_buf();
1157                     copy_lifetime(
1158                         self,
1159                         unsafe_slice(buf.data(), offset as usize, self.len32() as usize),
1160                     )
1161                 }
1162             }
1163         }
1164     }
1165 
1166     // There's no need to worry about locking on an atomic Tendril, because it makes it unique as
1167     // soon as you do that.
1168     #[inline]
as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8]1169     fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] {
1170         unsafe {
1171             match self.ptr.get().get() {
1172                 EMPTY_TAG => &mut [],
1173                 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n),
1174                 _ => {
1175                     self.make_owned();
1176                     let (mut buf, _, offset) = self.assume_buf();
1177                     let len = self.len32() as usize;
1178                     copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
1179                 }
1180             }
1181         }
1182     }
1183 
raw_len(&self) -> u321184     unsafe fn raw_len(&self) -> u32 {
1185         (*self.buf.get()).heap.len
1186     }
1187 
set_len(&mut self, len: u32)1188     unsafe fn set_len(&mut self, len: u32) {
1189         (*self.buf.get()).heap.len = len;
1190     }
1191 
aux(&self) -> u321192     unsafe fn aux(&self) -> u32 {
1193         (*self.buf.get()).heap.aux
1194     }
1195 
set_aux(&self, aux: u32)1196     unsafe fn set_aux(&self, aux: u32) {
1197         (*self.buf.get()).heap.aux = aux;
1198     }
1199 }
1200 
1201 impl<F, A> Tendril<F, A>
1202 where
1203     F: fmt::SliceFormat,
1204     A: Atomicity,
1205 {
1206     /// Build a `Tendril` by copying a slice.
1207     #[inline]
from_slice(x: &F::Slice) -> Tendril<F, A>1208     pub fn from_slice(x: &F::Slice) -> Tendril<F, A> {
1209         unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) }
1210     }
1211 
1212     /// Push a slice onto the end of the `Tendril`.
1213     #[inline]
push_slice(&mut self, x: &F::Slice)1214     pub fn push_slice(&mut self, x: &F::Slice) {
1215         unsafe { self.push_bytes_without_validating(x.as_bytes()) }
1216     }
1217 }
1218 
1219 /// A simple wrapper to make `Tendril` `Send`.
1220 ///
1221 /// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could
1222 /// reasonably implement, in order to clearly separate concerns this type is deliberately
1223 /// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and
1224 /// behaving as an opaque object.
1225 ///
1226 /// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`,
1227 /// and may be returned to a `Tendril` by `Tendril::from(self)`.
1228 #[derive(Clone)]
1229 pub struct SendTendril<F>
1230 where
1231     F: fmt::Format,
1232 {
1233     tendril: Tendril<F>,
1234 }
1235 
1236 unsafe impl<F> Send for SendTendril<F> where F: fmt::Format {}
1237 
1238 impl<F, A> From<Tendril<F, A>> for SendTendril<F>
1239 where
1240     F: fmt::Format,
1241     A: Atomicity,
1242 {
1243     #[inline]
from(tendril: Tendril<F, A>) -> SendTendril<F>1244     fn from(tendril: Tendril<F, A>) -> SendTendril<F> {
1245         tendril.into_send()
1246     }
1247 }
1248 
1249 impl<F, A> From<SendTendril<F>> for Tendril<F, A>
1250 where
1251     F: fmt::Format,
1252     A: Atomicity,
1253 {
1254     #[inline]
from(send: SendTendril<F>) -> Tendril<F, A>1255     fn from(send: SendTendril<F>) -> Tendril<F, A> {
1256         unsafe { mem::transmute(send.tendril) }
1257         // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value
1258         // will be the same (1) regardless, because the layout is defined.
1259         // Thus we don't need to fiddle about resetting it or anything like that.
1260     }
1261 }
1262 
1263 /// `Tendril`-related methods for Rust slices.
1264 pub trait SliceExt<F>: fmt::Slice
1265 where
1266     F: fmt::SliceFormat<Slice = Self>,
1267 {
1268     /// Make a `Tendril` from this slice.
1269     #[inline]
to_tendril(&self) -> Tendril<F>1270     fn to_tendril(&self) -> Tendril<F> {
1271         // It should be done thusly, but at the time of writing the defaults don't help inference:
1272         //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A>
1273         //    where A: Atomicity,
1274         //{
1275         Tendril::from_slice(self)
1276     }
1277 }
1278 
1279 impl SliceExt<fmt::UTF8> for str {}
1280 impl SliceExt<fmt::Bytes> for [u8] {}
1281 
1282 impl<F, A> Tendril<F, A>
1283 where
1284     F: for<'a> fmt::CharFormat<'a>,
1285     A: Atomicity,
1286 {
1287     /// Remove and return the first character, if any.
1288     #[inline]
pop_front_char<'a>(&'a mut self) -> Option<char>1289     pub fn pop_front_char<'a>(&'a mut self) -> Option<char> {
1290         unsafe {
1291             let next_char; // first char in iterator
1292             let mut skip = 0; // number of bytes to skip, or 0 to clear
1293 
1294             {
1295                 // <--+
1296                 //  |  Creating an iterator borrows self, so introduce a
1297                 //  +- scope to contain the borrow (that way we can mutate
1298                 //     self below, after this scope exits).
1299 
1300                 let mut iter = F::char_indices(self.as_byte_slice());
1301                 match iter.next() {
1302                     Some((_, c)) => {
1303                         next_char = Some(c);
1304                         if let Some((n, _)) = iter.next() {
1305                             skip = n as u32;
1306                         }
1307                     }
1308                     None => {
1309                         next_char = None;
1310                     }
1311                 }
1312             }
1313 
1314             if skip != 0 {
1315                 self.unsafe_pop_front(skip);
1316             } else {
1317                 self.clear();
1318             }
1319 
1320             next_char
1321         }
1322     }
1323 
1324     /// Remove and return a run of characters at the front of the `Tendril`
1325     /// which are classified the same according to the function `classify`.
1326     ///
1327     /// Returns `None` on an empty string.
1328     #[inline]
pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)> where C: FnMut(char) -> R, R: PartialEq,1329     pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)>
1330     where
1331         C: FnMut(char) -> R,
1332         R: PartialEq,
1333     {
1334         let (class, first_mismatch);
1335         {
1336             let mut chars = unsafe { F::char_indices(self.as_byte_slice()) };
1337             let (_, first) = unwrap_or_return!(chars.next(), None);
1338             class = classify(first);
1339             first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class);
1340         }
1341 
1342         match first_mismatch {
1343             Some((idx, _)) => unsafe {
1344                 let t = self.unsafe_subtendril(0, idx as u32);
1345                 self.unsafe_pop_front(idx as u32);
1346                 Some((t, class))
1347             },
1348             None => {
1349                 let t = self.clone();
1350                 self.clear();
1351                 Some((t, class))
1352             }
1353         }
1354     }
1355 
1356     /// Push a character, if it can be represented in this format.
1357     #[inline]
try_push_char(&mut self, c: char) -> Result<(), ()>1358     pub fn try_push_char(&mut self, c: char) -> Result<(), ()> {
1359         F::encode_char(c, |b| unsafe {
1360             self.push_bytes_without_validating(b);
1361         })
1362     }
1363 }
1364 
1365 /// Extension trait for `io::Read`.
1366 pub trait ReadExt: io::Read {
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity1367     fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1368     where
1369         A: Atomicity;
1370 }
1371 
1372 impl<T> ReadExt for T
1373 where
1374     T: io::Read,
1375 {
1376     /// Read all bytes until EOF.
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity,1377     fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1378     where
1379         A: Atomicity,
1380     {
1381         // Adapted from libstd/io/mod.rs.
1382         const DEFAULT_BUF_SIZE: u32 = 64 * 1024;
1383 
1384         let start_len = buf.len();
1385         let mut len = start_len;
1386         let mut new_write_size = 16;
1387         let ret;
1388         loop {
1389             if len == buf.len() {
1390                 if new_write_size < DEFAULT_BUF_SIZE {
1391                     new_write_size *= 2;
1392                 }
1393                 // FIXME: this exposes uninitialized bytes to a generic R type
1394                 // this is fine for R=File which never reads these bytes,
1395                 // but user-defined types might.
1396                 // The standard library pushes zeros to `Vec<u8>` for that reason.
1397                 unsafe {
1398                     buf.push_uninitialized(new_write_size);
1399                 }
1400             }
1401 
1402             match self.read(&mut buf[len..]) {
1403                 Ok(0) => {
1404                     ret = Ok(len - start_len);
1405                     break;
1406                 }
1407                 Ok(n) => len += n,
1408                 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
1409                 Err(e) => {
1410                     ret = Err(e);
1411                     break;
1412                 }
1413             }
1414         }
1415 
1416         let buf_len = buf.len32();
1417         buf.pop_back(buf_len - (len as u32));
1418         ret
1419     }
1420 }
1421 
1422 impl<A> io::Write for Tendril<fmt::Bytes, A>
1423 where
1424     A: Atomicity,
1425 {
1426     #[inline]
write(&mut self, buf: &[u8]) -> io::Result<usize>1427     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1428         self.push_slice(buf);
1429         Ok(buf.len())
1430     }
1431 
1432     #[inline]
write_all(&mut self, buf: &[u8]) -> io::Result<()>1433     fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
1434         self.push_slice(buf);
1435         Ok(())
1436     }
1437 
1438     #[inline(always)]
flush(&mut self) -> io::Result<()>1439     fn flush(&mut self) -> io::Result<()> {
1440         Ok(())
1441     }
1442 }
1443 
1444 #[cfg(feature = "encoding")]
1445 impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A>
1446 where
1447     A: Atomicity,
1448 {
1449     #[inline]
write_byte(&mut self, b: u8)1450     fn write_byte(&mut self, b: u8) {
1451         self.push_slice(&[b]);
1452     }
1453 
1454     #[inline]
write_bytes(&mut self, v: &[u8])1455     fn write_bytes(&mut self, v: &[u8]) {
1456         self.push_slice(v);
1457     }
1458 
1459     #[inline]
writer_hint(&mut self, additional: usize)1460     fn writer_hint(&mut self, additional: usize) {
1461         self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1462     }
1463 }
1464 
1465 impl<F, A> Tendril<F, A>
1466 where
1467     A: Atomicity,
1468     F: fmt::SliceFormat<Slice = [u8]>,
1469 {
1470     /// Decode from some character encoding into UTF-8.
1471     ///
1472     /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1473     /// for more information.
1474     #[inline]
1475     #[cfg(feature = "encoding")]
decode( &self, encoding: EncodingRef, trap: DecoderTrap, ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>>1476     pub fn decode(
1477         &self,
1478         encoding: EncodingRef,
1479         trap: DecoderTrap,
1480     ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>> {
1481         let mut ret = Tendril::new();
1482         encoding.decode_to(&*self, trap, &mut ret).map(|_| ret)
1483     }
1484 
1485     /// Push "uninitialized bytes" onto the end.
1486     ///
1487     /// Really, this grows the tendril without writing anything to the new area.
1488     /// It's only defined for byte tendrils because it's only useful if you
1489     /// plan to then mutate the buffer.
1490     #[inline]
push_uninitialized(&mut self, n: u32)1491     pub unsafe fn push_uninitialized(&mut self, n: u32) {
1492         let new_len = self.len32().checked_add(n).expect(OFLOW);
1493         if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG {
1494             self.ptr.set(inline_tag(new_len))
1495         } else {
1496             self.make_owned_with_capacity(new_len);
1497             self.set_len(new_len);
1498         }
1499     }
1500 }
1501 
1502 impl<A> strfmt::Display for Tendril<fmt::UTF8, A>
1503 where
1504     A: Atomicity,
1505 {
1506     #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result1507     fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
1508         <str as strfmt::Display>::fmt(&**self, f)
1509     }
1510 }
1511 
1512 impl<A> str::FromStr for Tendril<fmt::UTF8, A>
1513 where
1514     A: Atomicity,
1515 {
1516     type Err = ();
1517 
1518     #[inline]
from_str(s: &str) -> Result<Self, ()>1519     fn from_str(s: &str) -> Result<Self, ()> {
1520         Ok(Tendril::from_slice(s))
1521     }
1522 }
1523 
1524 impl<A> strfmt::Write for Tendril<fmt::UTF8, A>
1525 where
1526     A: Atomicity,
1527 {
1528     #[inline]
write_str(&mut self, s: &str) -> strfmt::Result1529     fn write_str(&mut self, s: &str) -> strfmt::Result {
1530         self.push_slice(s);
1531         Ok(())
1532     }
1533 }
1534 
1535 #[cfg(feature = "encoding")]
1536 impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A>
1537 where
1538     A: Atomicity,
1539 {
1540     #[inline]
write_char(&mut self, c: char)1541     fn write_char(&mut self, c: char) {
1542         self.push_char(c);
1543     }
1544 
1545     #[inline]
write_str(&mut self, s: &str)1546     fn write_str(&mut self, s: &str) {
1547         self.push_slice(s);
1548     }
1549 
1550     #[inline]
writer_hint(&mut self, additional: usize)1551     fn writer_hint(&mut self, additional: usize) {
1552         self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1553     }
1554 }
1555 
1556 impl<A> Tendril<fmt::UTF8, A>
1557 where
1558     A: Atomicity,
1559 {
1560     /// Encode from UTF-8 into some other character encoding.
1561     ///
1562     /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1563     /// for more information.
1564     #[inline]
1565     #[cfg(feature = "encoding")]
encode( &self, encoding: EncodingRef, trap: EncoderTrap, ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>>1566     pub fn encode(
1567         &self,
1568         encoding: EncodingRef,
1569         trap: EncoderTrap,
1570     ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>> {
1571         let mut ret = Tendril::new();
1572         encoding.encode_to(&*self, trap, &mut ret).map(|_| ret)
1573     }
1574 
1575     /// Push a character onto the end.
1576     #[inline]
push_char(&mut self, c: char)1577     pub fn push_char(&mut self, c: char) {
1578         unsafe {
1579             self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1580         }
1581     }
1582 
1583     /// Create a `Tendril` from a single character.
1584     #[inline]
from_char(c: char) -> Tendril<fmt::UTF8, A>1585     pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> {
1586         let mut t: Tendril<fmt::UTF8, A> = Tendril::new();
1587         t.push_char(c);
1588         t
1589     }
1590 
1591     /// Helper for the `format_tendril!` macro.
1592     #[inline]
format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A>1593     pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> {
1594         use std::fmt::Write;
1595         let mut output: Tendril<fmt::UTF8, A> = Tendril::new();
1596         let _ = write!(&mut output, "{}", args);
1597         output
1598     }
1599 }
1600 
1601 /// Create a `StrTendril` through string formatting.
1602 ///
1603 /// Works just like the standard `format!` macro.
1604 #[macro_export]
1605 macro_rules! format_tendril {
1606     ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*)))
1607 }
1608 
1609 impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A>
1610 where
1611     F: fmt::SliceFormat,
1612     A: Atomicity,
1613 {
1614     #[inline]
from(input: &F::Slice) -> Tendril<F, A>1615     fn from(input: &F::Slice) -> Tendril<F, A> {
1616         Tendril::from_slice(input)
1617     }
1618 }
1619 
1620 impl<A> From<String> for Tendril<fmt::UTF8, A>
1621 where
1622     A: Atomicity,
1623 {
1624     #[inline]
from(input: String) -> Tendril<fmt::UTF8, A>1625     fn from(input: String) -> Tendril<fmt::UTF8, A> {
1626         Tendril::from_slice(&*input)
1627     }
1628 }
1629 
1630 impl<F, A> AsRef<F::Slice> for Tendril<F, A>
1631 where
1632     F: fmt::SliceFormat,
1633     A: Atomicity,
1634 {
1635     #[inline]
as_ref(&self) -> &F::Slice1636     fn as_ref(&self) -> &F::Slice {
1637         &**self
1638     }
1639 }
1640 
1641 impl<A> From<Tendril<fmt::UTF8, A>> for String
1642 where
1643     A: Atomicity,
1644 {
1645     #[inline]
from(input: Tendril<fmt::UTF8, A>) -> String1646     fn from(input: Tendril<fmt::UTF8, A>) -> String {
1647         String::from(&*input)
1648     }
1649 }
1650 
1651 impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String
1652 where
1653     A: Atomicity,
1654 {
1655     #[inline]
from(input: &'a Tendril<fmt::UTF8, A>) -> String1656     fn from(input: &'a Tendril<fmt::UTF8, A>) -> String {
1657         String::from(&**input)
1658     }
1659 }
1660 
1661 #[cfg(all(test, feature = "bench"))]
1662 #[path = "bench.rs"]
1663 mod bench;
1664 
1665 #[cfg(test)]
1666 mod test {
1667     use super::{
1668         Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril,
1669     };
1670     use fmt;
1671     use std::iter;
1672     use std::thread;
1673 
assert_send<T: Send>()1674     fn assert_send<T: Send>() {}
1675 
1676     #[test]
smoke_test()1677     fn smoke_test() {
1678         assert_eq!("", &*"".to_tendril());
1679         assert_eq!("abc", &*"abc".to_tendril());
1680         assert_eq!("Hello, world!", &*"Hello, world!".to_tendril());
1681 
1682         assert_eq!(b"", &*b"".to_tendril());
1683         assert_eq!(b"abc", &*b"abc".to_tendril());
1684         assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril());
1685     }
1686 
1687     #[test]
assert_sizes()1688     fn assert_sizes() {
1689         use std::mem;
1690         struct EmptyWithDrop;
1691         impl Drop for EmptyWithDrop {
1692             fn drop(&mut self) {}
1693         }
1694         let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
1695 
1696         let correct = mem::size_of::<*const ()>()
1697             + 8
1698             + if compiler_uses_inline_drop_flags {
1699                 1
1700             } else {
1701                 0
1702             };
1703 
1704         assert_eq!(correct, mem::size_of::<ByteTendril>());
1705         assert_eq!(correct, mem::size_of::<StrTendril>());
1706 
1707         assert_eq!(correct, mem::size_of::<Option<ByteTendril>>());
1708         assert_eq!(correct, mem::size_of::<Option<StrTendril>>());
1709 
1710         assert_eq!(
1711             mem::size_of::<*const ()>() * 2,
1712             mem::size_of::<Header<Atomic>>(),
1713         );
1714         assert_eq!(
1715             mem::size_of::<*const ()>() + 4,
1716             mem::size_of::<Header<NonAtomic>>(),
1717         );
1718     }
1719 
1720     #[test]
validate_utf8()1721     fn validate_utf8() {
1722         assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok());
1723         assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err());
1724         assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err());
1725         assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err());
1726         assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err());
1727         assert_eq!(
1728             "\u{a66e}",
1729             &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap()
1730         );
1731 
1732         let mut t = StrTendril::new();
1733         assert!(t.try_push_bytes(b"\xEA\x99").is_err());
1734         assert!(t.try_push_bytes(b"\xAE").is_err());
1735         assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok());
1736         assert_eq!("\u{a66e}", &*t);
1737     }
1738 
1739     #[test]
share_and_unshare()1740     fn share_and_unshare() {
1741         let s = b"foobarbaz".to_tendril();
1742         assert_eq!(b"foobarbaz", &*s);
1743         assert!(!s.is_shared());
1744 
1745         let mut t = s.clone();
1746         assert_eq!(s.as_ptr(), t.as_ptr());
1747         assert!(s.is_shared());
1748         assert!(t.is_shared());
1749 
1750         t.push_slice(b"quux");
1751         assert_eq!(b"foobarbaz", &*s);
1752         assert_eq!(b"foobarbazquux", &*t);
1753         assert!(s.as_ptr() != t.as_ptr());
1754         assert!(!t.is_shared());
1755     }
1756 
1757     #[test]
format_display()1758     fn format_display() {
1759         assert_eq!("foobar", &*format!("{}", "foobar".to_tendril()));
1760 
1761         let mut s = "foo".to_tendril();
1762         assert_eq!("foo", &*format!("{}", s));
1763 
1764         let t = s.clone();
1765         assert_eq!("foo", &*format!("{}", s));
1766         assert_eq!("foo", &*format!("{}", t));
1767 
1768         s.push_slice("barbaz!");
1769         assert_eq!("foobarbaz!", &*format!("{}", s));
1770         assert_eq!("foo", &*format!("{}", t));
1771     }
1772 
1773     #[test]
format_debug()1774     fn format_debug() {
1775         assert_eq!(
1776             r#"Tendril<UTF8>(inline: "foobar")"#,
1777             &*format!("{:?}", "foobar".to_tendril())
1778         );
1779         assert_eq!(
1780             r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"#,
1781             &*format!("{:?}", b"foobar".to_tendril())
1782         );
1783 
1784         let t = "anextralongstring".to_tendril();
1785         assert_eq!(
1786             r#"Tendril<UTF8>(owned: "anextralongstring")"#,
1787             &*format!("{:?}", t)
1788         );
1789         let _ = t.clone();
1790         assert_eq!(
1791             r#"Tendril<UTF8>(shared: "anextralongstring")"#,
1792             &*format!("{:?}", t)
1793         );
1794     }
1795 
1796     #[test]
subtendril()1797     fn subtendril() {
1798         assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3));
1799         assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3));
1800 
1801         let mut t = "foo-bar".to_tendril();
1802         t.pop_front(2);
1803         assert_eq!("o-bar".to_tendril(), t);
1804         t.pop_back(1);
1805         assert_eq!("o-ba".to_tendril(), t);
1806 
1807         assert_eq!(
1808             "foo".to_tendril(),
1809             "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3)
1810         );
1811         assert_eq!(
1812             "oo-a-".to_tendril(),
1813             "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5)
1814         );
1815         assert_eq!(
1816             "bar".to_tendril(),
1817             "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3)
1818         );
1819 
1820         let mut t = "another rather long string".to_tendril();
1821         t.pop_front(2);
1822         assert!(t.starts_with("other rather"));
1823         t.pop_back(1);
1824         assert_eq!("other rather long strin".to_tendril(), t);
1825         assert!(t.is_shared());
1826     }
1827 
1828     #[test]
subtendril_invalid()1829     fn subtendril_invalid() {
1830         assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err());
1831         assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err());
1832 
1833         assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err());
1834         assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err());
1835         assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err());
1836         assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err());
1837         assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err());
1838         assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err());
1839         assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err());
1840         assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err());
1841         assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err());
1842 
1843         let mut t = "\u{1f4a9}zzzzzz".to_tendril();
1844         assert!(t.try_pop_front(1).is_err());
1845         assert!(t.try_pop_front(2).is_err());
1846         assert!(t.try_pop_front(3).is_err());
1847         assert!(t.try_pop_front(4).is_ok());
1848         assert_eq!("zzzzzz", &*t);
1849 
1850         let mut t = "zzzzzz\u{1f4a9}".to_tendril();
1851         assert!(t.try_pop_back(1).is_err());
1852         assert!(t.try_pop_back(2).is_err());
1853         assert!(t.try_pop_back(3).is_err());
1854         assert!(t.try_pop_back(4).is_ok());
1855         assert_eq!("zzzzzz", &*t);
1856     }
1857 
1858     #[test]
conversion()1859     fn conversion() {
1860         assert_eq!(
1861             &[0x66, 0x6F, 0x6F].to_tendril(),
1862             "foo".to_tendril().as_bytes()
1863         );
1864         assert_eq!(
1865             [0x66, 0x6F, 0x6F].to_tendril(),
1866             "foo".to_tendril().into_bytes()
1867         );
1868 
1869         let ascii: Tendril<fmt::ASCII> = b"hello".to_tendril().try_reinterpret().unwrap();
1870         assert_eq!(&"hello".to_tendril(), ascii.as_superset());
1871         assert_eq!("hello".to_tendril(), ascii.clone().into_superset());
1872 
1873         assert!(b"\xFF"
1874             .to_tendril()
1875             .try_reinterpret::<fmt::ASCII>()
1876             .is_err());
1877 
1878         let t = "hello".to_tendril();
1879         let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap();
1880         assert_eq!(b"hello", &**ascii.as_bytes());
1881 
1882         assert!("ő"
1883             .to_tendril()
1884             .try_reinterpret_view::<fmt::ASCII>()
1885             .is_err());
1886         assert!("ő".to_tendril().try_as_subset::<fmt::ASCII>().is_err());
1887 
1888         let ascii: Tendril<fmt::ASCII> = "hello".to_tendril().try_into_subset().unwrap();
1889         assert_eq!(b"hello", &**ascii.as_bytes());
1890 
1891         assert!("ő".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
1892         assert!("ő".to_tendril().try_into_subset::<fmt::ASCII>().is_err());
1893     }
1894 
1895     #[test]
clear()1896     fn clear() {
1897         let mut t = "foo-".to_tendril();
1898         t.clear();
1899         assert_eq!(t.len(), 0);
1900         assert_eq!(t.len32(), 0);
1901         assert_eq!(&*t, "");
1902 
1903         let mut t = "much longer".to_tendril();
1904         let s = t.clone();
1905         t.clear();
1906         assert_eq!(t.len(), 0);
1907         assert_eq!(t.len32(), 0);
1908         assert_eq!(&*t, "");
1909         assert_eq!(&*s, "much longer");
1910     }
1911 
1912     #[test]
push_tendril()1913     fn push_tendril() {
1914         let mut t = "abc".to_tendril();
1915         t.push_tendril(&"xyz".to_tendril());
1916         assert_eq!("abcxyz", &*t);
1917     }
1918 
1919     #[test]
wtf8()1920     fn wtf8() {
1921         assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD").is_ok());
1922         assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xB2\xA9").is_ok());
1923         assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err());
1924 
1925         let t: Tendril<fmt::WTF8> =
1926             Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap();
1927         assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3));
1928         assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3));
1929         assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1930 
1931         assert!(t.try_subtendril(0, 1).is_err());
1932         assert!(t.try_subtendril(0, 2).is_err());
1933         assert!(t.try_subtendril(1, 1).is_err());
1934 
1935         assert!(t.try_subtendril(3, 1).is_err());
1936         assert!(t.try_subtendril(3, 2).is_err());
1937         assert!(t.try_subtendril(4, 1).is_err());
1938 
1939         // paired surrogates
1940         let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap();
1941         assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1942         assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice());
1943         assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok());
1944 
1945         // unpaired surrogates
1946         let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap();
1947         assert!(t.try_push_bytes(b"\xED\xA0").is_err());
1948         assert!(t.try_push_bytes(b"\xED").is_err());
1949         assert!(t.try_push_bytes(b"\xA0").is_err());
1950         assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok());
1951         assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice());
1952         assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1953         assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice());
1954         assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1955     }
1956 
1957     #[test]
front_char()1958     fn front_char() {
1959         let mut t = "".to_tendril();
1960         assert_eq!(None, t.pop_front_char());
1961         assert_eq!(None, t.pop_front_char());
1962 
1963         let mut t = "abc".to_tendril();
1964         assert_eq!(Some('a'), t.pop_front_char());
1965         assert_eq!(Some('b'), t.pop_front_char());
1966         assert_eq!(Some('c'), t.pop_front_char());
1967         assert_eq!(None, t.pop_front_char());
1968         assert_eq!(None, t.pop_front_char());
1969 
1970         let mut t = "főo-a-longer-string-bar-baz".to_tendril();
1971         assert_eq!(28, t.len());
1972         assert_eq!(Some('f'), t.pop_front_char());
1973         assert_eq!(Some('ő'), t.pop_front_char());
1974         assert_eq!(Some('o'), t.pop_front_char());
1975         assert_eq!(Some('-'), t.pop_front_char());
1976         assert_eq!(23, t.len());
1977     }
1978 
1979     #[test]
char_run()1980     fn char_run() {
1981         for &(s, exp) in &[
1982             ("", None),
1983             (" ", Some((" ", true))),
1984             ("x", Some(("x", false))),
1985             ("  \t  \n", Some(("  \t  \n", true))),
1986             ("xyzzy", Some(("xyzzy", false))),
1987             ("   xyzzy", Some(("   ", true))),
1988             ("xyzzy   ", Some(("xyzzy", false))),
1989             ("   xyzzy  ", Some(("   ", true))),
1990             ("xyzzy   hi", Some(("xyzzy", false))),
1991             ("中 ", Some(("中", false))),
1992             (" 中 ", Some((" ", true))),
1993             ("  中 ", Some(("  ", true))),
1994             ("   中 ", Some(("   ", true))),
1995         ] {
1996             let mut t = s.to_tendril();
1997             let res = t.pop_front_char_run(char::is_whitespace);
1998             match exp {
1999                 None => assert!(res.is_none()),
2000                 Some((es, ec)) => {
2001                     let (rt, rc) = res.unwrap();
2002                     assert_eq!(es, &*rt);
2003                     assert_eq!(ec, rc);
2004                 }
2005             }
2006         }
2007     }
2008 
2009     #[test]
deref_mut_inline()2010     fn deref_mut_inline() {
2011         let mut t = "xyő".to_tendril().into_bytes();
2012         t[3] = 0xff;
2013         assert_eq!(b"xy\xC5\xFF", &*t);
2014         assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
2015         t[3] = 0x8b;
2016         assert_eq!("xyŋ", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
2017 
2018         unsafe {
2019             t.push_uninitialized(3);
2020             t[4] = 0xEA;
2021             t[5] = 0x99;
2022             t[6] = 0xAE;
2023             assert_eq!(
2024                 "xyŋ\u{a66e}",
2025                 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2026             );
2027             t.push_uninitialized(20);
2028             t.pop_back(20);
2029             assert_eq!(
2030                 "xyŋ\u{a66e}",
2031                 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2032             );
2033         }
2034     }
2035 
2036     #[test]
deref_mut()2037     fn deref_mut() {
2038         let mut t = b"0123456789".to_tendril();
2039         let u = t.clone();
2040         assert!(t.is_shared());
2041         t[9] = 0xff;
2042         assert!(!t.is_shared());
2043         assert_eq!(b"0123456789", &*u);
2044         assert_eq!(b"012345678\xff", &*t);
2045     }
2046 
2047     #[test]
push_char()2048     fn push_char() {
2049         let mut t = "xyz".to_tendril();
2050         t.push_char('o');
2051         assert_eq!("xyzo", &*t);
2052         t.push_char('ő');
2053         assert_eq!("xyzoő", &*t);
2054         t.push_char('\u{a66e}');
2055         assert_eq!("xyzoő\u{a66e}", &*t);
2056         t.push_char('\u{1f4a9}');
2057         assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t);
2058         assert_eq!(t.len(), 13);
2059     }
2060 
2061     #[test]
2062     #[cfg(feature = "encoding")]
encode()2063     fn encode() {
2064         use encoding::{all, EncoderTrap};
2065 
2066         let t = "안녕하세요 러스트".to_tendril();
2067         assert_eq!(
2068             b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae",
2069             &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap()
2070         );
2071 
2072         let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril();
2073         assert_eq!(
2074             b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2075                      \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?",
2076             &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap()
2077         );
2078 
2079         let t = "\u{1f4a9}".to_tendril();
2080         assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err());
2081     }
2082 
2083     #[test]
2084     #[cfg(feature = "encoding")]
decode()2085     fn decode() {
2086         use encoding::{all, DecoderTrap};
2087 
2088         let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\
2089                   \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae"
2090             .to_tendril();
2091         assert_eq!(
2092             "안녕하세요 러스트",
2093             &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap()
2094         );
2095 
2096         let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2097                   \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21"
2098             .to_tendril();
2099         assert_eq!(
2100             "Энергия пробуждения ия-я-я!",
2101             &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap()
2102         );
2103 
2104         let t = b"x \xff y".to_tendril();
2105         assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err());
2106 
2107         let t = b"x \xff y".to_tendril();
2108         assert_eq!(
2109             "x \u{fffd} y",
2110             &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap()
2111         );
2112     }
2113 
2114     #[test]
ascii()2115     fn ascii() {
2116         fn mk(x: &[u8]) -> Tendril<fmt::ASCII> {
2117             x.to_tendril().try_reinterpret().unwrap()
2118         }
2119 
2120         let mut t = mk(b"xyz");
2121         assert_eq!(Some('x'), t.pop_front_char());
2122         assert_eq!(Some('y'), t.pop_front_char());
2123         assert_eq!(Some('z'), t.pop_front_char());
2124         assert_eq!(None, t.pop_front_char());
2125 
2126         let mut t = mk(b" \t xyz");
2127         assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2128         assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace));
2129         assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2130 
2131         let mut t = Tendril::<fmt::ASCII>::new();
2132         assert!(t.try_push_char('x').is_ok());
2133         assert!(t.try_push_char('\0').is_ok());
2134         assert!(t.try_push_char('\u{a0}').is_err());
2135         assert_eq!(b"x\0", t.as_byte_slice());
2136     }
2137 
2138     #[test]
latin1()2139     fn latin1() {
2140         fn mk(x: &[u8]) -> Tendril<fmt::Latin1> {
2141             x.to_tendril().try_reinterpret().unwrap()
2142         }
2143 
2144         let mut t = mk(b"\xd8_\xd8");
2145         assert_eq!(Some('Ø'), t.pop_front_char());
2146         assert_eq!(Some('_'), t.pop_front_char());
2147         assert_eq!(Some('Ø'), t.pop_front_char());
2148         assert_eq!(None, t.pop_front_char());
2149 
2150         let mut t = mk(b" \t \xfe\xa7z");
2151         assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2152         assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace));
2153         assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2154 
2155         let mut t = Tendril::<fmt::Latin1>::new();
2156         assert!(t.try_push_char('x').is_ok());
2157         assert!(t.try_push_char('\0').is_ok());
2158         assert!(t.try_push_char('\u{a0}').is_ok());
2159         assert!(t.try_push_char('ő').is_err());
2160         assert!(t.try_push_char('я').is_err());
2161         assert!(t.try_push_char('\u{a66e}').is_err());
2162         assert!(t.try_push_char('\u{1f4a9}').is_err());
2163         assert_eq!(b"x\0\xa0", t.as_byte_slice());
2164     }
2165 
2166     #[test]
format()2167     fn format() {
2168         assert_eq!("", &*format_tendril!(""));
2169         assert_eq!(
2170             "two and two make 4",
2171             &*format_tendril!("two and two make {}", 2 + 2)
2172         );
2173     }
2174 
2175     #[test]
merge_shared()2176     fn merge_shared() {
2177         let t = "012345678901234567890123456789".to_tendril();
2178         let a = t.subtendril(10, 20);
2179         assert!(a.is_shared());
2180         assert_eq!("01234567890123456789", &*a);
2181         let mut b = t.subtendril(0, 10);
2182         assert!(b.is_shared());
2183         assert_eq!("0123456789", &*b);
2184 
2185         b.push_tendril(&a);
2186         assert!(b.is_shared());
2187         assert!(a.is_shared());
2188         assert!(a.is_shared_with(&b));
2189         assert!(b.is_shared_with(&a));
2190         assert_eq!("012345678901234567890123456789", &*b);
2191 
2192         assert!(t.is_shared());
2193         assert!(t.is_shared_with(&a));
2194         assert!(t.is_shared_with(&b));
2195     }
2196 
2197     #[test]
merge_cant_share()2198     fn merge_cant_share() {
2199         let t = "012345678901234567890123456789".to_tendril();
2200         let mut b = t.subtendril(0, 10);
2201         assert!(b.is_shared());
2202         assert_eq!("0123456789", &*b);
2203 
2204         b.push_tendril(&"abcd".to_tendril());
2205         assert!(!b.is_shared());
2206         assert_eq!("0123456789abcd", &*b);
2207     }
2208 
2209     #[test]
shared_doesnt_reserve()2210     fn shared_doesnt_reserve() {
2211         let mut t = "012345678901234567890123456789".to_tendril();
2212         let a = t.subtendril(1, 10);
2213 
2214         assert!(t.is_shared());
2215         t.reserve(10);
2216         assert!(t.is_shared());
2217 
2218         let _ = a;
2219     }
2220 
2221     #[test]
out_of_bounds()2222     fn out_of_bounds() {
2223         assert!("".to_tendril().try_subtendril(0, 1).is_err());
2224         assert!("abc".to_tendril().try_subtendril(0, 4).is_err());
2225         assert!("abc".to_tendril().try_subtendril(3, 1).is_err());
2226         assert!("abc".to_tendril().try_subtendril(7, 1).is_err());
2227 
2228         let mut t = "".to_tendril();
2229         assert!(t.try_pop_front(1).is_err());
2230         assert!(t.try_pop_front(5).is_err());
2231         assert!(t.try_pop_front(500).is_err());
2232         assert!(t.try_pop_back(1).is_err());
2233         assert!(t.try_pop_back(5).is_err());
2234         assert!(t.try_pop_back(500).is_err());
2235 
2236         let mut t = "abcd".to_tendril();
2237         assert!(t.try_pop_front(1).is_ok());
2238         assert!(t.try_pop_front(4).is_err());
2239         assert!(t.try_pop_front(500).is_err());
2240         assert!(t.try_pop_back(1).is_ok());
2241         assert!(t.try_pop_back(3).is_err());
2242         assert!(t.try_pop_back(500).is_err());
2243     }
2244 
2245     #[test]
compare()2246     fn compare() {
2247         for &a in &[
2248             "indiscretions",
2249             "validity",
2250             "hallucinogenics",
2251             "timelessness",
2252             "original",
2253             "microcosms",
2254             "boilers",
2255             "mammoth",
2256         ] {
2257             for &b in &[
2258                 "intrepidly",
2259                 "frigid",
2260                 "spa",
2261                 "cardigans",
2262                 "guileful",
2263                 "evaporated",
2264                 "unenthusiastic",
2265                 "legitimate",
2266             ] {
2267                 let ta = a.to_tendril();
2268                 let tb = b.to_tendril();
2269 
2270                 assert_eq!(a.eq(b), ta.eq(&tb));
2271                 assert_eq!(a.ne(b), ta.ne(&tb));
2272                 assert_eq!(a.lt(b), ta.lt(&tb));
2273                 assert_eq!(a.le(b), ta.le(&tb));
2274                 assert_eq!(a.gt(b), ta.gt(&tb));
2275                 assert_eq!(a.ge(b), ta.ge(&tb));
2276                 assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb));
2277                 assert_eq!(a.cmp(b), ta.cmp(&tb));
2278             }
2279         }
2280     }
2281 
2282     #[test]
extend_and_from_iterator()2283     fn extend_and_from_iterator() {
2284         // Testing Extend<T> and FromIterator<T> for the various Ts.
2285 
2286         // Tendril<F>
2287         let mut t = "Hello".to_tendril();
2288         t.extend(None::<&Tendril<_>>.into_iter());
2289         assert_eq!("Hello", &*t);
2290         t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]);
2291         assert_eq!("Hello, world!", &*t);
2292         assert_eq!(
2293             "Hello, world!",
2294             &*[
2295                 "Hello".to_tendril(),
2296                 ", ".to_tendril(),
2297                 "world".to_tendril(),
2298                 "!".to_tendril()
2299             ]
2300             .iter()
2301             .collect::<StrTendril>()
2302         );
2303 
2304         // &str
2305         let mut t = "Hello".to_tendril();
2306         t.extend(None::<&str>.into_iter());
2307         assert_eq!("Hello", &*t);
2308         t.extend([", ", "world", "!"].iter().map(|&s| s));
2309         assert_eq!("Hello, world!", &*t);
2310         assert_eq!(
2311             "Hello, world!",
2312             &*["Hello", ", ", "world", "!"]
2313                 .iter()
2314                 .map(|&s| s)
2315                 .collect::<StrTendril>()
2316         );
2317 
2318         // &[u8]
2319         let mut t = b"Hello".to_tendril();
2320         t.extend(None::<&[u8]>.into_iter());
2321         assert_eq!(b"Hello", &*t);
2322         t.extend(
2323             [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()]
2324                 .iter()
2325                 .map(|&s| s),
2326         );
2327         assert_eq!(b"Hello, world!", &*t);
2328         assert_eq!(
2329             b"Hello, world!",
2330             &*[
2331                 b"Hello".as_ref(),
2332                 b", ".as_ref(),
2333                 b"world".as_ref(),
2334                 b"!".as_ref()
2335             ]
2336             .iter()
2337             .map(|&s| s)
2338             .collect::<ByteTendril>()
2339         );
2340 
2341         let string = "the quick brown fox jumps over the lazy dog";
2342         let string_expected = string.to_tendril();
2343         let bytes = string.as_bytes();
2344         let bytes_expected = bytes.to_tendril();
2345 
2346         // char
2347         assert_eq!(string_expected, string.chars().collect());
2348         let mut tendril = StrTendril::new();
2349         tendril.extend(string.chars());
2350         assert_eq!(string_expected, tendril);
2351 
2352         // &u8
2353         assert_eq!(bytes_expected, bytes.iter().collect());
2354         let mut tendril = ByteTendril::new();
2355         tendril.extend(bytes);
2356         assert_eq!(bytes_expected, tendril);
2357 
2358         // u8
2359         assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect());
2360         let mut tendril = ByteTendril::new();
2361         tendril.extend(bytes.iter().map(|&b| b));
2362         assert_eq!(bytes_expected, tendril);
2363     }
2364 
2365     #[test]
from_str()2366     fn from_str() {
2367         use std::str::FromStr;
2368         let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap();
2369         assert_eq!("foo bar baz", &*t);
2370     }
2371 
2372     #[test]
from_char()2373     fn from_char() {
2374         assert_eq!("o", &*StrTendril::from_char('o'));
2375         assert_eq!("ő", &*StrTendril::from_char('ő'));
2376         assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}'));
2377         assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}'));
2378     }
2379 
2380     #[test]
2381     #[cfg_attr(miri, ignore)] // slow
read()2382     fn read() {
2383         fn check(x: &[u8]) {
2384             use std::io::Cursor;
2385             let mut t = ByteTendril::new();
2386             assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap());
2387             assert_eq!(x, &*t);
2388         }
2389 
2390         check(b"");
2391         check(b"abcd");
2392 
2393         let long: Vec<u8> = iter::repeat(b'x').take(1_000_000).collect();
2394         check(&long);
2395     }
2396 
2397     #[test]
hash_map_key()2398     fn hash_map_key() {
2399         use std::collections::HashMap;
2400 
2401         // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of
2402         // https://github.com/rust-lang/rust/issues/27108.
2403         let mut map = HashMap::new();
2404         map.insert("foo".to_tendril(), 1);
2405         assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2406         assert_eq!(map.get(b"bar".as_ref()), None);
2407 
2408         let mut map = HashMap::new();
2409         map.insert(b"foo".to_tendril(), 1);
2410         assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2411         assert_eq!(map.get(b"bar".as_ref()), None);
2412     }
2413 
2414     #[test]
atomic()2415     fn atomic() {
2416         assert_send::<Tendril<fmt::UTF8, Atomic>>();
2417         let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string");
2418         assert!(!s.is_shared());
2419         let mut t = s.clone();
2420         assert!(s.is_shared());
2421         let sp = s.as_ptr() as usize;
2422         thread::spawn(move || {
2423             assert!(t.is_shared());
2424             t.push_slice(" extended");
2425             assert_eq!("this is a string extended", &*t);
2426             assert!(t.as_ptr() as usize != sp);
2427             assert!(!t.is_shared());
2428         })
2429         .join()
2430         .unwrap();
2431         assert!(s.is_shared());
2432         assert_eq!("this is a string", &*s);
2433     }
2434 
2435     #[test]
send()2436     fn send() {
2437         assert_send::<SendTendril<fmt::UTF8>>();
2438         let s = "this is a string".to_tendril();
2439         let t = s.clone();
2440         let s2 = s.into_send();
2441         thread::spawn(move || {
2442             let s = StrTendril::from(s2);
2443             assert!(!s.is_shared());
2444             assert_eq!("this is a string", &*s);
2445         })
2446         .join()
2447         .unwrap();
2448         assert_eq!("this is a string", &*t);
2449     }
2450 
2451     #[test]
inline_send()2452     fn inline_send() {
2453         let s = "x".to_tendril();
2454         let t = s.clone();
2455         let s2 = s.into_send();
2456         thread::spawn(move || {
2457             let s = StrTendril::from(s2);
2458             assert!(!s.is_shared());
2459             assert_eq!("x", &*s);
2460         })
2461         .join()
2462         .unwrap();
2463         assert_eq!("x", &*t);
2464     }
2465 }
2466