1 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
4 // option. This file may not be copied, modified, or distributed
5 // except according to those terms.
6
7 use std::borrow::Borrow;
8 use std::cell::{Cell, UnsafeCell};
9 use std::cmp::Ordering;
10 use std::default::Default;
11 use std::fmt as strfmt;
12 use std::iter::FromIterator;
13 use std::marker::PhantomData;
14 use std::num::NonZeroUsize;
15 use std::ops::{Deref, DerefMut};
16 use std::sync::atomic::Ordering as AtomicOrdering;
17 use std::sync::atomic::{self, AtomicUsize};
18 use std::{hash, io, mem, ptr, str, u32};
19
20 #[cfg(feature = "encoding")]
21 use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef};
22
23 use buf32::{self, Buf32};
24 use fmt::imp::Fixup;
25 use fmt::{self, Slice};
26 use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut};
27 use OFLOW;
28
29 const MAX_INLINE_LEN: usize = 8;
30 const MAX_INLINE_TAG: usize = 0xF;
31 const EMPTY_TAG: usize = 0xF;
32
33 #[inline(always)]
inline_tag(len: u32) -> NonZeroUsize34 fn inline_tag(len: u32) -> NonZeroUsize {
35 debug_assert!(len <= MAX_INLINE_LEN as u32);
36 unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) }
37 }
38
39 /// The multithreadedness of a tendril.
40 ///
41 /// Exactly two types implement this trait:
42 ///
43 /// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works
44 /// across threads; this is akin to `Arc`.
45 ///
46 /// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither
47 /// `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`.
48 ///
49 /// The layout of this trait is also mandated to be that of a `usize`,
50 /// for it is used for reference counting.
51 pub unsafe trait Atomicity: 'static {
52 #[doc(hidden)]
new() -> Self53 fn new() -> Self;
54
55 #[doc(hidden)]
increment(&self) -> usize56 fn increment(&self) -> usize;
57
58 #[doc(hidden)]
decrement(&self) -> usize59 fn decrement(&self) -> usize;
60
61 #[doc(hidden)]
fence_acquire()62 fn fence_acquire();
63 }
64
65 /// A marker of a non-atomic tendril.
66 ///
67 /// This is the default for the second type parameter of a `Tendril`
68 /// and so doesn't typically need to be written.
69 ///
70 /// This is akin to using `Rc` for reference counting.
71 #[repr(C)]
72 pub struct NonAtomic(Cell<PackedUsize>);
73
74 #[repr(C, packed)]
75 #[derive(Copy, Clone)]
76 struct PackedUsize(usize);
77
78 unsafe impl Atomicity for NonAtomic {
79 #[inline]
new() -> Self80 fn new() -> Self {
81 NonAtomic(Cell::new(PackedUsize(1)))
82 }
83
84 #[inline]
increment(&self) -> usize85 fn increment(&self) -> usize {
86 let value = self.0.get().0;
87 self.0.set(PackedUsize(value.checked_add(1).expect(OFLOW)));
88 value
89 }
90
91 #[inline]
decrement(&self) -> usize92 fn decrement(&self) -> usize {
93 let value = self.0.get().0;
94 self.0.set(PackedUsize(value - 1));
95 value
96 }
97
98 #[inline]
fence_acquire()99 fn fence_acquire() {}
100 }
101
102 /// A marker of an atomic (and hence concurrent) tendril.
103 ///
104 /// This is used as the second, optional type parameter of a `Tendril`;
105 /// `Tendril<F, Atomic>` thus implements`Send`.
106 ///
107 /// This is akin to using `Arc` for reference counting.
108 pub struct Atomic(AtomicUsize);
109
110 unsafe impl Atomicity for Atomic {
111 #[inline]
new() -> Self112 fn new() -> Self {
113 Atomic(AtomicUsize::new(1))
114 }
115
116 #[inline]
increment(&self) -> usize117 fn increment(&self) -> usize {
118 // Relaxed is OK because we have a reference already.
119 self.0.fetch_add(1, AtomicOrdering::Relaxed)
120 }
121
122 #[inline]
decrement(&self) -> usize123 fn decrement(&self) -> usize {
124 self.0.fetch_sub(1, AtomicOrdering::Release)
125 }
126
127 #[inline]
fence_acquire()128 fn fence_acquire() {
129 atomic::fence(AtomicOrdering::Acquire);
130 }
131 }
132
133 struct Header<A: Atomicity> {
134 refcount: A,
135 cap: u32,
136 }
137
138 impl<A> Header<A>
139 where
140 A: Atomicity,
141 {
142 #[inline(always)]
new() -> Header<A>143 unsafe fn new() -> Header<A> {
144 Header {
145 refcount: A::new(),
146 cap: 0,
147 }
148 }
149 }
150
151 /// Errors that can occur when slicing a `Tendril`.
152 #[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)]
153 pub enum SubtendrilError {
154 OutOfBounds,
155 ValidationFailed,
156 }
157
158 /// Compact string type for zero-copy parsing.
159 ///
160 /// `Tendril`s have the semantics of owned strings, but are sometimes views
161 /// into shared buffers. When you mutate a `Tendril`, an owned copy is made
162 /// if necessary. Further mutations occur in-place until the string becomes
163 /// shared, e.g. with `clone()` or `subtendril()`.
164 ///
165 /// Buffer sharing is accomplished through thread-local (non-atomic) reference
166 /// counting, which has very low overhead. The Rust type system will prevent
167 /// you at compile time from sending a `Tendril` between threads. We plan to
168 /// relax this restriction in the future; see `README.md`.
169 ///
170 /// Whereas `String` allocates in the heap for any non-empty string, `Tendril`
171 /// can store small strings (up to 8 bytes) in-line, without a heap allocation.
172 /// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes
173 /// versus 24.
174 ///
175 /// The type parameter `F` specifies the format of the tendril, for example
176 /// UTF-8 text or uninterpreted bytes. The parameter will be instantiated
177 /// with one of the marker types from `tendril::fmt`. See the `StrTendril`
178 /// and `ByteTendril` type aliases for two examples.
179 ///
180 /// The type parameter `A` indicates the atomicity of the tendril; it is by
181 /// default `NonAtomic`, but can be specified as `Atomic` to get a tendril
182 /// which implements `Send` (viz. a thread-safe tendril).
183 ///
184 /// The maximum length of a `Tendril` is 4 GB. The library will panic if
185 /// you attempt to go over the limit.
186 #[repr(C)]
187 pub struct Tendril<F, A = NonAtomic>
188 where
189 F: fmt::Format,
190 A: Atomicity,
191 {
192 ptr: Cell<NonZeroUsize>,
193 buf: UnsafeCell<Buffer>,
194 marker: PhantomData<*mut F>,
195 refcount_marker: PhantomData<A>,
196 }
197
198 #[repr(C)]
199 union Buffer {
200 heap: Heap,
201 inline: [u8; 8],
202 }
203
204 #[derive(Copy, Clone)]
205 #[repr(C)]
206 struct Heap {
207 len: u32,
208 aux: u32,
209 }
210
211 unsafe impl<F, A> Send for Tendril<F, A>
212 where
213 F: fmt::Format,
214 A: Atomicity + Sync,
215 {
216 }
217
218 /// `Tendril` for storing native Rust strings.
219 pub type StrTendril = Tendril<fmt::UTF8>;
220
221 /// `Tendril` for storing binary data.
222 pub type ByteTendril = Tendril<fmt::Bytes>;
223
224 impl<F, A> Clone for Tendril<F, A>
225 where
226 F: fmt::Format,
227 A: Atomicity,
228 {
229 #[inline]
clone(&self) -> Tendril<F, A>230 fn clone(&self) -> Tendril<F, A> {
231 unsafe {
232 if self.ptr.get().get() > MAX_INLINE_TAG {
233 self.make_buf_shared();
234 self.incref();
235 }
236
237 ptr::read(self)
238 }
239 }
240 }
241
242 impl<F, A> Drop for Tendril<F, A>
243 where
244 F: fmt::Format,
245 A: Atomicity,
246 {
247 #[inline]
drop(&mut self)248 fn drop(&mut self) {
249 unsafe {
250 let p = self.ptr.get().get();
251 if p <= MAX_INLINE_TAG {
252 return;
253 }
254
255 let (buf, shared, _) = self.assume_buf();
256 if shared {
257 let header = self.header();
258 if (*header).refcount.decrement() == 1 {
259 A::fence_acquire();
260 buf.destroy();
261 }
262 } else {
263 buf.destroy();
264 }
265 }
266 }
267 }
268
269 macro_rules! from_iter_method {
270 ($ty:ty) => {
271 #[inline]
272 fn from_iter<I>(iterable: I) -> Self
273 where
274 I: IntoIterator<Item = $ty>,
275 {
276 let mut output = Self::new();
277 output.extend(iterable);
278 output
279 }
280 };
281 }
282
283 impl<A> Extend<char> for Tendril<fmt::UTF8, A>
284 where
285 A: Atomicity,
286 {
287 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = char>,288 fn extend<I>(&mut self, iterable: I)
289 where
290 I: IntoIterator<Item = char>,
291 {
292 let iterator = iterable.into_iter();
293 self.force_reserve(iterator.size_hint().0 as u32);
294 for c in iterator {
295 self.push_char(c);
296 }
297 }
298 }
299
300 impl<A> FromIterator<char> for Tendril<fmt::UTF8, A>
301 where
302 A: Atomicity,
303 {
304 from_iter_method!(char);
305 }
306
307 impl<A> Extend<u8> for Tendril<fmt::Bytes, A>
308 where
309 A: Atomicity,
310 {
311 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = u8>,312 fn extend<I>(&mut self, iterable: I)
313 where
314 I: IntoIterator<Item = u8>,
315 {
316 let iterator = iterable.into_iter();
317 self.force_reserve(iterator.size_hint().0 as u32);
318 for b in iterator {
319 self.push_slice(&[b]);
320 }
321 }
322 }
323
324 impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A>
325 where
326 A: Atomicity,
327 {
328 from_iter_method!(u8);
329 }
330
331 impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A>
332 where
333 A: Atomicity,
334 {
335 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a u8>,336 fn extend<I>(&mut self, iterable: I)
337 where
338 I: IntoIterator<Item = &'a u8>,
339 {
340 let iterator = iterable.into_iter();
341 self.force_reserve(iterator.size_hint().0 as u32);
342 for &b in iterator {
343 self.push_slice(&[b]);
344 }
345 }
346 }
347
348 impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A>
349 where
350 A: Atomicity,
351 {
352 from_iter_method!(&'a u8);
353 }
354
355 impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A>
356 where
357 A: Atomicity,
358 {
359 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a str>,360 fn extend<I>(&mut self, iterable: I)
361 where
362 I: IntoIterator<Item = &'a str>,
363 {
364 for s in iterable {
365 self.push_slice(s);
366 }
367 }
368 }
369
370 impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A>
371 where
372 A: Atomicity,
373 {
374 from_iter_method!(&'a str);
375 }
376
377 impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A>
378 where
379 A: Atomicity,
380 {
381 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a [u8]>,382 fn extend<I>(&mut self, iterable: I)
383 where
384 I: IntoIterator<Item = &'a [u8]>,
385 {
386 for s in iterable {
387 self.push_slice(s);
388 }
389 }
390 }
391
392 impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A>
393 where
394 A: Atomicity,
395 {
396 from_iter_method!(&'a [u8]);
397 }
398
399 impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A>
400 where
401 F: fmt::Format + 'a,
402 A: Atomicity,
403 {
404 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a Tendril<F, A>>,405 fn extend<I>(&mut self, iterable: I)
406 where
407 I: IntoIterator<Item = &'a Tendril<F, A>>,
408 {
409 for t in iterable {
410 self.push_tendril(t);
411 }
412 }
413 }
414
415 impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A>
416 where
417 F: fmt::Format + 'a,
418 A: Atomicity,
419 {
420 from_iter_method!(&'a Tendril<F, A>);
421 }
422
423 impl<F, A> Deref for Tendril<F, A>
424 where
425 F: fmt::SliceFormat,
426 A: Atomicity,
427 {
428 type Target = F::Slice;
429
430 #[inline]
deref(&self) -> &F::Slice431 fn deref(&self) -> &F::Slice {
432 unsafe { F::Slice::from_bytes(self.as_byte_slice()) }
433 }
434 }
435
436 impl<F, A> DerefMut for Tendril<F, A>
437 where
438 F: fmt::SliceFormat,
439 A: Atomicity,
440 {
441 #[inline]
deref_mut(&mut self) -> &mut F::Slice442 fn deref_mut(&mut self) -> &mut F::Slice {
443 unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) }
444 }
445 }
446
447 impl<F, A> Borrow<[u8]> for Tendril<F, A>
448 where
449 F: fmt::SliceFormat,
450 A: Atomicity,
451 {
borrow(&self) -> &[u8]452 fn borrow(&self) -> &[u8] {
453 self.as_byte_slice()
454 }
455 }
456
457 // Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently,
458 // and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick.
459 // https://github.com/rust-lang/rust/issues/27108
460
461 impl<F, A> PartialEq for Tendril<F, A>
462 where
463 F: fmt::Format,
464 A: Atomicity,
465 {
466 #[inline]
eq(&self, other: &Self) -> bool467 fn eq(&self, other: &Self) -> bool {
468 self.as_byte_slice() == other.as_byte_slice()
469 }
470
471 #[inline]
ne(&self, other: &Self) -> bool472 fn ne(&self, other: &Self) -> bool {
473 self.as_byte_slice() != other.as_byte_slice()
474 }
475 }
476
477 impl<F, A> Eq for Tendril<F, A>
478 where
479 F: fmt::Format,
480 A: Atomicity,
481 {
482 }
483
484 impl<F, A> PartialOrd for Tendril<F, A>
485 where
486 F: fmt::SliceFormat,
487 <F as fmt::SliceFormat>::Slice: PartialOrd,
488 A: Atomicity,
489 {
490 #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>491 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
492 PartialOrd::partial_cmp(&**self, &**other)
493 }
494 }
495
496 impl<F, A> Ord for Tendril<F, A>
497 where
498 F: fmt::SliceFormat,
499 <F as fmt::SliceFormat>::Slice: Ord,
500 A: Atomicity,
501 {
502 #[inline]
cmp(&self, other: &Self) -> Ordering503 fn cmp(&self, other: &Self) -> Ordering {
504 Ord::cmp(&**self, &**other)
505 }
506 }
507
508 impl<F, A> Default for Tendril<F, A>
509 where
510 F: fmt::Format,
511 A: Atomicity,
512 {
513 #[inline(always)]
default() -> Tendril<F, A>514 fn default() -> Tendril<F, A> {
515 Tendril::new()
516 }
517 }
518
519 impl<F, A> strfmt::Debug for Tendril<F, A>
520 where
521 F: fmt::SliceFormat + Default + strfmt::Debug,
522 <F as fmt::SliceFormat>::Slice: strfmt::Debug,
523 A: Atomicity,
524 {
525 #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result526 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
527 let kind = match self.ptr.get().get() {
528 p if p <= MAX_INLINE_TAG => "inline",
529 p if p & 1 == 1 => "shared",
530 _ => "owned",
531 };
532
533 write!(f, "Tendril<{:?}>({}: ", <F as Default>::default(), kind)?;
534 <<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f)?;
535 write!(f, ")")
536 }
537 }
538
539 impl<F, A> hash::Hash for Tendril<F, A>
540 where
541 F: fmt::Format,
542 A: Atomicity,
543 {
544 #[inline]
hash<H: hash::Hasher>(&self, hasher: &mut H)545 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
546 self.as_byte_slice().hash(hasher)
547 }
548 }
549
550 impl<F, A> Tendril<F, A>
551 where
552 F: fmt::Format,
553 A: Atomicity,
554 {
555 /// Create a new, empty `Tendril` in any format.
556 #[inline(always)]
new() -> Tendril<F, A>557 pub fn new() -> Tendril<F, A> {
558 unsafe { Tendril::inline(&[]) }
559 }
560
561 /// Create a new, empty `Tendril` with a specified capacity.
562 #[inline]
with_capacity(capacity: u32) -> Tendril<F, A>563 pub fn with_capacity(capacity: u32) -> Tendril<F, A> {
564 let mut t: Tendril<F, A> = Tendril::new();
565 if capacity > MAX_INLINE_LEN as u32 {
566 unsafe {
567 t.make_owned_with_capacity(capacity);
568 }
569 }
570 t
571 }
572
573 /// Reserve space for additional bytes.
574 ///
575 /// This is only a suggestion. There are cases where `Tendril` will
576 /// decline to allocate until the buffer is actually modified.
577 #[inline]
reserve(&mut self, additional: u32)578 pub fn reserve(&mut self, additional: u32) {
579 if !self.is_shared() {
580 // Don't grow a shared tendril because we'd have to copy
581 // right away.
582 self.force_reserve(additional);
583 }
584 }
585
586 /// Reserve space for additional bytes, even for shared buffers.
587 #[inline]
force_reserve(&mut self, additional: u32)588 fn force_reserve(&mut self, additional: u32) {
589 let new_len = self.len32().checked_add(additional).expect(OFLOW);
590 if new_len > MAX_INLINE_LEN as u32 {
591 unsafe {
592 self.make_owned_with_capacity(new_len);
593 }
594 }
595 }
596
597 /// Get the length of the `Tendril`.
598 ///
599 /// This is named not to conflict with `len()` on the underlying
600 /// slice, if any.
601 #[inline(always)]
len32(&self) -> u32602 pub fn len32(&self) -> u32 {
603 match self.ptr.get().get() {
604 EMPTY_TAG => 0,
605 n if n <= MAX_INLINE_LEN => n as u32,
606 _ => unsafe { self.raw_len() },
607 }
608 }
609
610 /// Is the backing buffer shared?
611 #[inline]
is_shared(&self) -> bool612 pub fn is_shared(&self) -> bool {
613 let n = self.ptr.get().get();
614
615 (n > MAX_INLINE_TAG) && ((n & 1) == 1)
616 }
617
618 /// Is the backing buffer shared with this other `Tendril`?
619 #[inline]
is_shared_with(&self, other: &Tendril<F, A>) -> bool620 pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool {
621 let n = self.ptr.get().get();
622
623 (n > MAX_INLINE_TAG) && (n == other.ptr.get().get())
624 }
625
626 /// Truncate to length 0 without discarding any owned storage.
627 #[inline]
clear(&mut self)628 pub fn clear(&mut self) {
629 if self.ptr.get().get() <= MAX_INLINE_TAG {
630 self.ptr
631 .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) });
632 } else {
633 let (_, shared, _) = unsafe { self.assume_buf() };
634 if shared {
635 // No need to keep a reference alive for a 0-size slice.
636 *self = Tendril::new();
637 } else {
638 unsafe { self.set_len(0) };
639 }
640 }
641 }
642
643 /// Build a `Tendril` by copying a byte slice, if it conforms to the format.
644 #[inline]
try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()>645 pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> {
646 match F::validate(x) {
647 true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }),
648 false => Err(()),
649 }
650 }
651
652 /// View as uninterpreted bytes.
653 #[inline(always)]
as_bytes(&self) -> &Tendril<fmt::Bytes, A>654 pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> {
655 unsafe { mem::transmute(self) }
656 }
657
658 /// Convert into uninterpreted bytes.
659 #[inline(always)]
into_bytes(self) -> Tendril<fmt::Bytes, A>660 pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> {
661 unsafe { mem::transmute(self) }
662 }
663
664 /// Convert `self` into a type which is `Send`.
665 ///
666 /// If the tendril is owned or inline, this is free,
667 /// but if it's shared this will entail a copy of the contents.
668 #[inline]
into_send(mut self) -> SendTendril<F>669 pub fn into_send(mut self) -> SendTendril<F> {
670 self.make_owned();
671 SendTendril {
672 // This changes the header.refcount from A to NonAtomic, but that's
673 // OK because we have defined the format of A as a usize.
674 tendril: unsafe { mem::transmute(self) },
675 }
676 }
677
678 /// View as a superset format, for free.
679 #[inline(always)]
as_superset<Super>(&self) -> &Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,680 pub fn as_superset<Super>(&self) -> &Tendril<Super, A>
681 where
682 F: fmt::SubsetOf<Super>,
683 Super: fmt::Format,
684 {
685 unsafe { mem::transmute(self) }
686 }
687
688 /// Convert into a superset format, for free.
689 #[inline(always)]
into_superset<Super>(self) -> Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,690 pub fn into_superset<Super>(self) -> Tendril<Super, A>
691 where
692 F: fmt::SubsetOf<Super>,
693 Super: fmt::Format,
694 {
695 unsafe { mem::transmute(self) }
696 }
697
698 /// View as a subset format, if the `Tendril` conforms to that subset.
699 #[inline]
try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()> where Sub: fmt::SubsetOf<F>,700 pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()>
701 where
702 Sub: fmt::SubsetOf<F>,
703 {
704 match Sub::revalidate_subset(self.as_byte_slice()) {
705 true => Ok(unsafe { mem::transmute(self) }),
706 false => Err(()),
707 }
708 }
709
710 /// Convert into a subset format, if the `Tendril` conforms to that subset.
711 #[inline]
try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self> where Sub: fmt::SubsetOf<F>,712 pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self>
713 where
714 Sub: fmt::SubsetOf<F>,
715 {
716 match Sub::revalidate_subset(self.as_byte_slice()) {
717 true => Ok(unsafe { mem::transmute(self) }),
718 false => Err(self),
719 }
720 }
721
722 /// View as another format, if the bytes of the `Tendril` are valid for
723 /// that format.
724 #[inline]
try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()> where Other: fmt::Format,725 pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()>
726 where
727 Other: fmt::Format,
728 {
729 match Other::validate(self.as_byte_slice()) {
730 true => Ok(unsafe { mem::transmute(self) }),
731 false => Err(()),
732 }
733 }
734
735 /// Convert into another format, if the `Tendril` conforms to that format.
736 ///
737 /// This only re-validates the existing bytes under the new format. It
738 /// will *not* change the byte content of the tendril!
739 ///
740 /// See the `encode` and `decode` methods for character encoding conversion.
741 #[inline]
try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self> where Other: fmt::Format,742 pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self>
743 where
744 Other: fmt::Format,
745 {
746 match Other::validate(self.as_byte_slice()) {
747 true => Ok(unsafe { mem::transmute(self) }),
748 false => Err(self),
749 }
750 }
751
752 /// Push some bytes onto the end of the `Tendril`, if they conform to the
753 /// format.
754 #[inline]
try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()>755 pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> {
756 match F::validate(buf) {
757 true => unsafe {
758 self.push_bytes_without_validating(buf);
759 Ok(())
760 },
761 false => Err(()),
762 }
763 }
764
765 /// Push another `Tendril` onto the end of this one.
766 #[inline]
push_tendril(&mut self, other: &Tendril<F, A>)767 pub fn push_tendril(&mut self, other: &Tendril<F, A>) {
768 let new_len = self.len32().checked_add(other.len32()).expect(OFLOW);
769
770 unsafe {
771 if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) {
772 let (self_buf, self_shared, _) = self.assume_buf();
773 let (other_buf, other_shared, _) = other.assume_buf();
774
775 if self_shared
776 && other_shared
777 && (self_buf.data_ptr() == other_buf.data_ptr())
778 && other.aux() == self.aux() + self.raw_len()
779 {
780 self.set_len(new_len);
781 return;
782 }
783 }
784
785 self.push_bytes_without_validating(other.as_byte_slice())
786 }
787 }
788
789 /// Attempt to slice this `Tendril` as a new `Tendril`.
790 ///
791 /// This will share the buffer when possible. Mutating a shared buffer
792 /// will copy the contents.
793 ///
794 /// The offset and length are in bytes. The function will return
795 /// `Err` if these are out of bounds, or if the resulting slice
796 /// does not conform to the format.
797 #[inline]
try_subtendril( &self, offset: u32, length: u32, ) -> Result<Tendril<F, A>, SubtendrilError>798 pub fn try_subtendril(
799 &self,
800 offset: u32,
801 length: u32,
802 ) -> Result<Tendril<F, A>, SubtendrilError> {
803 let self_len = self.len32();
804 if offset > self_len || length > (self_len - offset) {
805 return Err(SubtendrilError::OutOfBounds);
806 }
807
808 unsafe {
809 let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize);
810 if !F::validate_subseq(byte_slice) {
811 return Err(SubtendrilError::ValidationFailed);
812 }
813
814 Ok(self.unsafe_subtendril(offset, length))
815 }
816 }
817
818 /// Slice this `Tendril` as a new `Tendril`.
819 ///
820 /// Panics on bounds or validity check failure.
821 #[inline]
subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>822 pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
823 self.try_subtendril(offset, length).unwrap()
824 }
825
826 /// Try to drop `n` bytes from the front.
827 ///
828 /// Returns `Err` if the bytes are not available, or the suffix fails
829 /// validation.
830 #[inline]
try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError>831 pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> {
832 if n == 0 {
833 return Ok(());
834 }
835 let old_len = self.len32();
836 if n > old_len {
837 return Err(SubtendrilError::OutOfBounds);
838 }
839 let new_len = old_len - n;
840
841 unsafe {
842 if !F::validate_suffix(unsafe_slice(
843 self.as_byte_slice(),
844 n as usize,
845 new_len as usize,
846 )) {
847 return Err(SubtendrilError::ValidationFailed);
848 }
849
850 self.unsafe_pop_front(n);
851 Ok(())
852 }
853 }
854
855 /// Drop `n` bytes from the front.
856 ///
857 /// Panics if the bytes are not available, or the suffix fails
858 /// validation.
859 #[inline]
pop_front(&mut self, n: u32)860 pub fn pop_front(&mut self, n: u32) {
861 self.try_pop_front(n).unwrap()
862 }
863
864 /// Drop `n` bytes from the back.
865 ///
866 /// Returns `Err` if the bytes are not available, or the prefix fails
867 /// validation.
868 #[inline]
try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError>869 pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> {
870 if n == 0 {
871 return Ok(());
872 }
873 let old_len = self.len32();
874 if n > old_len {
875 return Err(SubtendrilError::OutOfBounds);
876 }
877 let new_len = old_len - n;
878
879 unsafe {
880 if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) {
881 return Err(SubtendrilError::ValidationFailed);
882 }
883
884 self.unsafe_pop_back(n);
885 Ok(())
886 }
887 }
888
889 /// Drop `n` bytes from the back.
890 ///
891 /// Panics if the bytes are not available, or the prefix fails
892 /// validation.
893 #[inline]
pop_back(&mut self, n: u32)894 pub fn pop_back(&mut self, n: u32) {
895 self.try_pop_back(n).unwrap()
896 }
897
898 /// View as another format, without validating.
899 #[inline(always)]
reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A> where Other: fmt::Format,900 pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A>
901 where
902 Other: fmt::Format,
903 {
904 mem::transmute(self)
905 }
906
907 /// Convert into another format, without validating.
908 #[inline(always)]
reinterpret_without_validating<Other>(self) -> Tendril<Other, A> where Other: fmt::Format,909 pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A>
910 where
911 Other: fmt::Format,
912 {
913 mem::transmute(self)
914 }
915
916 /// Build a `Tendril` by copying a byte slice, without validating.
917 #[inline]
from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A>918 pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> {
919 assert!(x.len() <= buf32::MAX_LEN);
920 if x.len() <= MAX_INLINE_LEN {
921 Tendril::inline(x)
922 } else {
923 Tendril::owned_copy(x)
924 }
925 }
926
927 /// Push some bytes onto the end of the `Tendril`, without validating.
928 #[inline]
push_bytes_without_validating(&mut self, buf: &[u8])929 pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) {
930 assert!(buf.len() <= buf32::MAX_LEN);
931
932 let Fixup {
933 drop_left,
934 drop_right,
935 insert_len,
936 insert_bytes,
937 } = F::fixup(self.as_byte_slice(), buf);
938
939 // FIXME: think more about overflow
940 let adj_len = self.len32() + insert_len - drop_left;
941
942 let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right;
943
944 let drop_left = drop_left as usize;
945 let drop_right = drop_right as usize;
946
947 if new_len <= MAX_INLINE_LEN as u32 {
948 let mut tmp = [0_u8; MAX_INLINE_LEN];
949 {
950 let old = self.as_byte_slice();
951 let mut dest = tmp.as_mut_ptr();
952 copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left));
953 copy_and_advance(
954 &mut dest,
955 unsafe_slice(&insert_bytes, 0, insert_len as usize),
956 );
957 copy_and_advance(
958 &mut dest,
959 unsafe_slice(buf, drop_right, buf.len() - drop_right),
960 );
961 }
962 *self = Tendril::inline(&tmp[..new_len as usize]);
963 } else {
964 self.make_owned_with_capacity(new_len);
965 let (owned, _, _) = self.assume_buf();
966 let mut dest = owned
967 .data_ptr()
968 .offset((owned.len as usize - drop_left) as isize);
969 copy_and_advance(
970 &mut dest,
971 unsafe_slice(&insert_bytes, 0, insert_len as usize),
972 );
973 copy_and_advance(
974 &mut dest,
975 unsafe_slice(buf, drop_right, buf.len() - drop_right),
976 );
977 self.set_len(new_len);
978 }
979 }
980
981 /// Slice this `Tendril` as a new `Tendril`.
982 ///
983 /// Does not check validity or bounds!
984 #[inline]
unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>985 pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
986 if length <= MAX_INLINE_LEN as u32 {
987 Tendril::inline(unsafe_slice(
988 self.as_byte_slice(),
989 offset as usize,
990 length as usize,
991 ))
992 } else {
993 self.make_buf_shared();
994 self.incref();
995 let (buf, _, _) = self.assume_buf();
996 Tendril::shared(buf, self.aux() + offset, length)
997 }
998 }
999
1000 /// Drop `n` bytes from the front.
1001 ///
1002 /// Does not check validity or bounds!
1003 #[inline]
unsafe_pop_front(&mut self, n: u32)1004 pub unsafe fn unsafe_pop_front(&mut self, n: u32) {
1005 let new_len = self.len32() - n;
1006 if new_len <= MAX_INLINE_LEN as u32 {
1007 *self = Tendril::inline(unsafe_slice(
1008 self.as_byte_slice(),
1009 n as usize,
1010 new_len as usize,
1011 ));
1012 } else {
1013 self.make_buf_shared();
1014 self.set_aux(self.aux() + n);
1015 let len = self.raw_len();
1016 self.set_len(len - n);
1017 }
1018 }
1019
1020 /// Drop `n` bytes from the back.
1021 ///
1022 /// Does not check validity or bounds!
1023 #[inline]
unsafe_pop_back(&mut self, n: u32)1024 pub unsafe fn unsafe_pop_back(&mut self, n: u32) {
1025 let new_len = self.len32() - n;
1026 if new_len <= MAX_INLINE_LEN as u32 {
1027 *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize));
1028 } else {
1029 self.make_buf_shared();
1030 let len = self.raw_len();
1031 self.set_len(len - n);
1032 }
1033 }
1034
1035 #[inline]
incref(&self)1036 unsafe fn incref(&self) {
1037 (*self.header()).refcount.increment();
1038 }
1039
1040 #[inline]
make_buf_shared(&self)1041 unsafe fn make_buf_shared(&self) {
1042 let p = self.ptr.get().get();
1043 if p & 1 == 0 {
1044 let header = p as *mut Header<A>;
1045 (*header).cap = self.aux();
1046
1047 self.ptr.set(NonZeroUsize::new_unchecked(p | 1));
1048 self.set_aux(0);
1049 }
1050 }
1051
1052 // This is not public as it is of no practical value to users.
1053 // By and large they shouldn't need to worry about the distinction at all,
1054 // and going out of your way to make it owned is pointless.
1055 #[inline]
make_owned(&mut self)1056 fn make_owned(&mut self) {
1057 unsafe {
1058 let ptr = self.ptr.get().get();
1059 if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 {
1060 *self = Tendril::owned_copy(self.as_byte_slice());
1061 }
1062 }
1063 }
1064
1065 #[inline]
make_owned_with_capacity(&mut self, cap: u32)1066 unsafe fn make_owned_with_capacity(&mut self, cap: u32) {
1067 self.make_owned();
1068 let mut buf = self.assume_buf().0;
1069 buf.grow(cap);
1070 self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize));
1071 self.set_aux(buf.cap);
1072 }
1073
1074 #[inline(always)]
header(&self) -> *mut Header<A>1075 unsafe fn header(&self) -> *mut Header<A> {
1076 (self.ptr.get().get() & !1) as *mut Header<A>
1077 }
1078
1079 #[inline]
assume_buf(&self) -> (Buf32<Header<A>>, bool, u32)1080 unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) {
1081 let ptr = self.ptr.get().get();
1082 let header = self.header();
1083 let shared = (ptr & 1) == 1;
1084 let (cap, offset) = match shared {
1085 true => ((*header).cap, self.aux()),
1086 false => (self.aux(), 0),
1087 };
1088
1089 (
1090 Buf32 {
1091 ptr: header,
1092 len: offset + self.len32(),
1093 cap: cap,
1094 },
1095 shared,
1096 offset,
1097 )
1098 }
1099
1100 #[inline]
inline(x: &[u8]) -> Tendril<F, A>1101 unsafe fn inline(x: &[u8]) -> Tendril<F, A> {
1102 let len = x.len();
1103 let t = Tendril {
1104 ptr: Cell::new(inline_tag(len as u32)),
1105 buf: UnsafeCell::new(Buffer { inline: [0; 8] }),
1106 marker: PhantomData,
1107 refcount_marker: PhantomData,
1108 };
1109 ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len);
1110 t
1111 }
1112
1113 #[inline]
owned(x: Buf32<Header<A>>) -> Tendril<F, A>1114 unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> {
1115 Tendril {
1116 ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)),
1117 buf: UnsafeCell::new(Buffer {
1118 heap: Heap {
1119 len: x.len,
1120 aux: x.cap,
1121 },
1122 }),
1123 marker: PhantomData,
1124 refcount_marker: PhantomData,
1125 }
1126 }
1127
1128 #[inline]
owned_copy(x: &[u8]) -> Tendril<F, A>1129 unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> {
1130 let len32 = x.len() as u32;
1131 let mut b = Buf32::with_capacity(len32, Header::new());
1132 ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len());
1133 b.len = len32;
1134 Tendril::owned(b)
1135 }
1136
1137 #[inline]
shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A>1138 unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> {
1139 Tendril {
1140 ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)),
1141 buf: UnsafeCell::new(Buffer {
1142 heap: Heap { len, aux: off },
1143 }),
1144 marker: PhantomData,
1145 refcount_marker: PhantomData,
1146 }
1147 }
1148
1149 #[inline]
as_byte_slice<'a>(&'a self) -> &'a [u8]1150 fn as_byte_slice<'a>(&'a self) -> &'a [u8] {
1151 unsafe {
1152 match self.ptr.get().get() {
1153 EMPTY_TAG => &[],
1154 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n),
1155 _ => {
1156 let (buf, _, offset) = self.assume_buf();
1157 copy_lifetime(
1158 self,
1159 unsafe_slice(buf.data(), offset as usize, self.len32() as usize),
1160 )
1161 }
1162 }
1163 }
1164 }
1165
1166 // There's no need to worry about locking on an atomic Tendril, because it makes it unique as
1167 // soon as you do that.
1168 #[inline]
as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8]1169 fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] {
1170 unsafe {
1171 match self.ptr.get().get() {
1172 EMPTY_TAG => &mut [],
1173 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n),
1174 _ => {
1175 self.make_owned();
1176 let (mut buf, _, offset) = self.assume_buf();
1177 let len = self.len32() as usize;
1178 copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
1179 }
1180 }
1181 }
1182 }
1183
raw_len(&self) -> u321184 unsafe fn raw_len(&self) -> u32 {
1185 (*self.buf.get()).heap.len
1186 }
1187
set_len(&mut self, len: u32)1188 unsafe fn set_len(&mut self, len: u32) {
1189 (*self.buf.get()).heap.len = len;
1190 }
1191
aux(&self) -> u321192 unsafe fn aux(&self) -> u32 {
1193 (*self.buf.get()).heap.aux
1194 }
1195
set_aux(&self, aux: u32)1196 unsafe fn set_aux(&self, aux: u32) {
1197 (*self.buf.get()).heap.aux = aux;
1198 }
1199 }
1200
1201 impl<F, A> Tendril<F, A>
1202 where
1203 F: fmt::SliceFormat,
1204 A: Atomicity,
1205 {
1206 /// Build a `Tendril` by copying a slice.
1207 #[inline]
from_slice(x: &F::Slice) -> Tendril<F, A>1208 pub fn from_slice(x: &F::Slice) -> Tendril<F, A> {
1209 unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) }
1210 }
1211
1212 /// Push a slice onto the end of the `Tendril`.
1213 #[inline]
push_slice(&mut self, x: &F::Slice)1214 pub fn push_slice(&mut self, x: &F::Slice) {
1215 unsafe { self.push_bytes_without_validating(x.as_bytes()) }
1216 }
1217 }
1218
1219 /// A simple wrapper to make `Tendril` `Send`.
1220 ///
1221 /// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could
1222 /// reasonably implement, in order to clearly separate concerns this type is deliberately
1223 /// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and
1224 /// behaving as an opaque object.
1225 ///
1226 /// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`,
1227 /// and may be returned to a `Tendril` by `Tendril::from(self)`.
1228 #[derive(Clone)]
1229 pub struct SendTendril<F>
1230 where
1231 F: fmt::Format,
1232 {
1233 tendril: Tendril<F>,
1234 }
1235
1236 unsafe impl<F> Send for SendTendril<F> where F: fmt::Format {}
1237
1238 impl<F, A> From<Tendril<F, A>> for SendTendril<F>
1239 where
1240 F: fmt::Format,
1241 A: Atomicity,
1242 {
1243 #[inline]
from(tendril: Tendril<F, A>) -> SendTendril<F>1244 fn from(tendril: Tendril<F, A>) -> SendTendril<F> {
1245 tendril.into_send()
1246 }
1247 }
1248
1249 impl<F, A> From<SendTendril<F>> for Tendril<F, A>
1250 where
1251 F: fmt::Format,
1252 A: Atomicity,
1253 {
1254 #[inline]
from(send: SendTendril<F>) -> Tendril<F, A>1255 fn from(send: SendTendril<F>) -> Tendril<F, A> {
1256 unsafe { mem::transmute(send.tendril) }
1257 // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value
1258 // will be the same (1) regardless, because the layout is defined.
1259 // Thus we don't need to fiddle about resetting it or anything like that.
1260 }
1261 }
1262
1263 /// `Tendril`-related methods for Rust slices.
1264 pub trait SliceExt<F>: fmt::Slice
1265 where
1266 F: fmt::SliceFormat<Slice = Self>,
1267 {
1268 /// Make a `Tendril` from this slice.
1269 #[inline]
to_tendril(&self) -> Tendril<F>1270 fn to_tendril(&self) -> Tendril<F> {
1271 // It should be done thusly, but at the time of writing the defaults don't help inference:
1272 //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A>
1273 // where A: Atomicity,
1274 //{
1275 Tendril::from_slice(self)
1276 }
1277 }
1278
1279 impl SliceExt<fmt::UTF8> for str {}
1280 impl SliceExt<fmt::Bytes> for [u8] {}
1281
1282 impl<F, A> Tendril<F, A>
1283 where
1284 F: for<'a> fmt::CharFormat<'a>,
1285 A: Atomicity,
1286 {
1287 /// Remove and return the first character, if any.
1288 #[inline]
pop_front_char<'a>(&'a mut self) -> Option<char>1289 pub fn pop_front_char<'a>(&'a mut self) -> Option<char> {
1290 unsafe {
1291 let next_char; // first char in iterator
1292 let mut skip = 0; // number of bytes to skip, or 0 to clear
1293
1294 {
1295 // <--+
1296 // | Creating an iterator borrows self, so introduce a
1297 // +- scope to contain the borrow (that way we can mutate
1298 // self below, after this scope exits).
1299
1300 let mut iter = F::char_indices(self.as_byte_slice());
1301 match iter.next() {
1302 Some((_, c)) => {
1303 next_char = Some(c);
1304 if let Some((n, _)) = iter.next() {
1305 skip = n as u32;
1306 }
1307 }
1308 None => {
1309 next_char = None;
1310 }
1311 }
1312 }
1313
1314 if skip != 0 {
1315 self.unsafe_pop_front(skip);
1316 } else {
1317 self.clear();
1318 }
1319
1320 next_char
1321 }
1322 }
1323
1324 /// Remove and return a run of characters at the front of the `Tendril`
1325 /// which are classified the same according to the function `classify`.
1326 ///
1327 /// Returns `None` on an empty string.
1328 #[inline]
pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)> where C: FnMut(char) -> R, R: PartialEq,1329 pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)>
1330 where
1331 C: FnMut(char) -> R,
1332 R: PartialEq,
1333 {
1334 let (class, first_mismatch);
1335 {
1336 let mut chars = unsafe { F::char_indices(self.as_byte_slice()) };
1337 let (_, first) = unwrap_or_return!(chars.next(), None);
1338 class = classify(first);
1339 first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class);
1340 }
1341
1342 match first_mismatch {
1343 Some((idx, _)) => unsafe {
1344 let t = self.unsafe_subtendril(0, idx as u32);
1345 self.unsafe_pop_front(idx as u32);
1346 Some((t, class))
1347 },
1348 None => {
1349 let t = self.clone();
1350 self.clear();
1351 Some((t, class))
1352 }
1353 }
1354 }
1355
1356 /// Push a character, if it can be represented in this format.
1357 #[inline]
try_push_char(&mut self, c: char) -> Result<(), ()>1358 pub fn try_push_char(&mut self, c: char) -> Result<(), ()> {
1359 F::encode_char(c, |b| unsafe {
1360 self.push_bytes_without_validating(b);
1361 })
1362 }
1363 }
1364
1365 /// Extension trait for `io::Read`.
1366 pub trait ReadExt: io::Read {
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity1367 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1368 where
1369 A: Atomicity;
1370 }
1371
1372 impl<T> ReadExt for T
1373 where
1374 T: io::Read,
1375 {
1376 /// Read all bytes until EOF.
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity,1377 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1378 where
1379 A: Atomicity,
1380 {
1381 // Adapted from libstd/io/mod.rs.
1382 const DEFAULT_BUF_SIZE: u32 = 64 * 1024;
1383
1384 let start_len = buf.len();
1385 let mut len = start_len;
1386 let mut new_write_size = 16;
1387 let ret;
1388 loop {
1389 if len == buf.len() {
1390 if new_write_size < DEFAULT_BUF_SIZE {
1391 new_write_size *= 2;
1392 }
1393 // FIXME: this exposes uninitialized bytes to a generic R type
1394 // this is fine for R=File which never reads these bytes,
1395 // but user-defined types might.
1396 // The standard library pushes zeros to `Vec<u8>` for that reason.
1397 unsafe {
1398 buf.push_uninitialized(new_write_size);
1399 }
1400 }
1401
1402 match self.read(&mut buf[len..]) {
1403 Ok(0) => {
1404 ret = Ok(len - start_len);
1405 break;
1406 }
1407 Ok(n) => len += n,
1408 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
1409 Err(e) => {
1410 ret = Err(e);
1411 break;
1412 }
1413 }
1414 }
1415
1416 let buf_len = buf.len32();
1417 buf.pop_back(buf_len - (len as u32));
1418 ret
1419 }
1420 }
1421
1422 impl<A> io::Write for Tendril<fmt::Bytes, A>
1423 where
1424 A: Atomicity,
1425 {
1426 #[inline]
write(&mut self, buf: &[u8]) -> io::Result<usize>1427 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1428 self.push_slice(buf);
1429 Ok(buf.len())
1430 }
1431
1432 #[inline]
write_all(&mut self, buf: &[u8]) -> io::Result<()>1433 fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
1434 self.push_slice(buf);
1435 Ok(())
1436 }
1437
1438 #[inline(always)]
flush(&mut self) -> io::Result<()>1439 fn flush(&mut self) -> io::Result<()> {
1440 Ok(())
1441 }
1442 }
1443
1444 #[cfg(feature = "encoding")]
1445 impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A>
1446 where
1447 A: Atomicity,
1448 {
1449 #[inline]
write_byte(&mut self, b: u8)1450 fn write_byte(&mut self, b: u8) {
1451 self.push_slice(&[b]);
1452 }
1453
1454 #[inline]
write_bytes(&mut self, v: &[u8])1455 fn write_bytes(&mut self, v: &[u8]) {
1456 self.push_slice(v);
1457 }
1458
1459 #[inline]
writer_hint(&mut self, additional: usize)1460 fn writer_hint(&mut self, additional: usize) {
1461 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1462 }
1463 }
1464
1465 impl<F, A> Tendril<F, A>
1466 where
1467 A: Atomicity,
1468 F: fmt::SliceFormat<Slice = [u8]>,
1469 {
1470 /// Decode from some character encoding into UTF-8.
1471 ///
1472 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1473 /// for more information.
1474 #[inline]
1475 #[cfg(feature = "encoding")]
decode( &self, encoding: EncodingRef, trap: DecoderTrap, ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>>1476 pub fn decode(
1477 &self,
1478 encoding: EncodingRef,
1479 trap: DecoderTrap,
1480 ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>> {
1481 let mut ret = Tendril::new();
1482 encoding.decode_to(&*self, trap, &mut ret).map(|_| ret)
1483 }
1484
1485 /// Push "uninitialized bytes" onto the end.
1486 ///
1487 /// Really, this grows the tendril without writing anything to the new area.
1488 /// It's only defined for byte tendrils because it's only useful if you
1489 /// plan to then mutate the buffer.
1490 #[inline]
push_uninitialized(&mut self, n: u32)1491 pub unsafe fn push_uninitialized(&mut self, n: u32) {
1492 let new_len = self.len32().checked_add(n).expect(OFLOW);
1493 if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG {
1494 self.ptr.set(inline_tag(new_len))
1495 } else {
1496 self.make_owned_with_capacity(new_len);
1497 self.set_len(new_len);
1498 }
1499 }
1500 }
1501
1502 impl<A> strfmt::Display for Tendril<fmt::UTF8, A>
1503 where
1504 A: Atomicity,
1505 {
1506 #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result1507 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
1508 <str as strfmt::Display>::fmt(&**self, f)
1509 }
1510 }
1511
1512 impl<A> str::FromStr for Tendril<fmt::UTF8, A>
1513 where
1514 A: Atomicity,
1515 {
1516 type Err = ();
1517
1518 #[inline]
from_str(s: &str) -> Result<Self, ()>1519 fn from_str(s: &str) -> Result<Self, ()> {
1520 Ok(Tendril::from_slice(s))
1521 }
1522 }
1523
1524 impl<A> strfmt::Write for Tendril<fmt::UTF8, A>
1525 where
1526 A: Atomicity,
1527 {
1528 #[inline]
write_str(&mut self, s: &str) -> strfmt::Result1529 fn write_str(&mut self, s: &str) -> strfmt::Result {
1530 self.push_slice(s);
1531 Ok(())
1532 }
1533 }
1534
1535 #[cfg(feature = "encoding")]
1536 impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A>
1537 where
1538 A: Atomicity,
1539 {
1540 #[inline]
write_char(&mut self, c: char)1541 fn write_char(&mut self, c: char) {
1542 self.push_char(c);
1543 }
1544
1545 #[inline]
write_str(&mut self, s: &str)1546 fn write_str(&mut self, s: &str) {
1547 self.push_slice(s);
1548 }
1549
1550 #[inline]
writer_hint(&mut self, additional: usize)1551 fn writer_hint(&mut self, additional: usize) {
1552 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1553 }
1554 }
1555
1556 impl<A> Tendril<fmt::UTF8, A>
1557 where
1558 A: Atomicity,
1559 {
1560 /// Encode from UTF-8 into some other character encoding.
1561 ///
1562 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1563 /// for more information.
1564 #[inline]
1565 #[cfg(feature = "encoding")]
encode( &self, encoding: EncodingRef, trap: EncoderTrap, ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>>1566 pub fn encode(
1567 &self,
1568 encoding: EncodingRef,
1569 trap: EncoderTrap,
1570 ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>> {
1571 let mut ret = Tendril::new();
1572 encoding.encode_to(&*self, trap, &mut ret).map(|_| ret)
1573 }
1574
1575 /// Push a character onto the end.
1576 #[inline]
push_char(&mut self, c: char)1577 pub fn push_char(&mut self, c: char) {
1578 unsafe {
1579 self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1580 }
1581 }
1582
1583 /// Create a `Tendril` from a single character.
1584 #[inline]
from_char(c: char) -> Tendril<fmt::UTF8, A>1585 pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> {
1586 let mut t: Tendril<fmt::UTF8, A> = Tendril::new();
1587 t.push_char(c);
1588 t
1589 }
1590
1591 /// Helper for the `format_tendril!` macro.
1592 #[inline]
format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A>1593 pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> {
1594 use std::fmt::Write;
1595 let mut output: Tendril<fmt::UTF8, A> = Tendril::new();
1596 let _ = write!(&mut output, "{}", args);
1597 output
1598 }
1599 }
1600
1601 /// Create a `StrTendril` through string formatting.
1602 ///
1603 /// Works just like the standard `format!` macro.
1604 #[macro_export]
1605 macro_rules! format_tendril {
1606 ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*)))
1607 }
1608
1609 impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A>
1610 where
1611 F: fmt::SliceFormat,
1612 A: Atomicity,
1613 {
1614 #[inline]
from(input: &F::Slice) -> Tendril<F, A>1615 fn from(input: &F::Slice) -> Tendril<F, A> {
1616 Tendril::from_slice(input)
1617 }
1618 }
1619
1620 impl<A> From<String> for Tendril<fmt::UTF8, A>
1621 where
1622 A: Atomicity,
1623 {
1624 #[inline]
from(input: String) -> Tendril<fmt::UTF8, A>1625 fn from(input: String) -> Tendril<fmt::UTF8, A> {
1626 Tendril::from_slice(&*input)
1627 }
1628 }
1629
1630 impl<F, A> AsRef<F::Slice> for Tendril<F, A>
1631 where
1632 F: fmt::SliceFormat,
1633 A: Atomicity,
1634 {
1635 #[inline]
as_ref(&self) -> &F::Slice1636 fn as_ref(&self) -> &F::Slice {
1637 &**self
1638 }
1639 }
1640
1641 impl<A> From<Tendril<fmt::UTF8, A>> for String
1642 where
1643 A: Atomicity,
1644 {
1645 #[inline]
from(input: Tendril<fmt::UTF8, A>) -> String1646 fn from(input: Tendril<fmt::UTF8, A>) -> String {
1647 String::from(&*input)
1648 }
1649 }
1650
1651 impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String
1652 where
1653 A: Atomicity,
1654 {
1655 #[inline]
from(input: &'a Tendril<fmt::UTF8, A>) -> String1656 fn from(input: &'a Tendril<fmt::UTF8, A>) -> String {
1657 String::from(&**input)
1658 }
1659 }
1660
1661 #[cfg(all(test, feature = "bench"))]
1662 #[path = "bench.rs"]
1663 mod bench;
1664
1665 #[cfg(test)]
1666 mod test {
1667 use super::{
1668 Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril,
1669 };
1670 use fmt;
1671 use std::iter;
1672 use std::thread;
1673
assert_send<T: Send>()1674 fn assert_send<T: Send>() {}
1675
1676 #[test]
smoke_test()1677 fn smoke_test() {
1678 assert_eq!("", &*"".to_tendril());
1679 assert_eq!("abc", &*"abc".to_tendril());
1680 assert_eq!("Hello, world!", &*"Hello, world!".to_tendril());
1681
1682 assert_eq!(b"", &*b"".to_tendril());
1683 assert_eq!(b"abc", &*b"abc".to_tendril());
1684 assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril());
1685 }
1686
1687 #[test]
assert_sizes()1688 fn assert_sizes() {
1689 use std::mem;
1690 struct EmptyWithDrop;
1691 impl Drop for EmptyWithDrop {
1692 fn drop(&mut self) {}
1693 }
1694 let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
1695
1696 let correct = mem::size_of::<*const ()>()
1697 + 8
1698 + if compiler_uses_inline_drop_flags {
1699 1
1700 } else {
1701 0
1702 };
1703
1704 assert_eq!(correct, mem::size_of::<ByteTendril>());
1705 assert_eq!(correct, mem::size_of::<StrTendril>());
1706
1707 assert_eq!(correct, mem::size_of::<Option<ByteTendril>>());
1708 assert_eq!(correct, mem::size_of::<Option<StrTendril>>());
1709
1710 assert_eq!(
1711 mem::size_of::<*const ()>() * 2,
1712 mem::size_of::<Header<Atomic>>(),
1713 );
1714 assert_eq!(
1715 mem::size_of::<*const ()>() + 4,
1716 mem::size_of::<Header<NonAtomic>>(),
1717 );
1718 }
1719
1720 #[test]
validate_utf8()1721 fn validate_utf8() {
1722 assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok());
1723 assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err());
1724 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err());
1725 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err());
1726 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err());
1727 assert_eq!(
1728 "\u{a66e}",
1729 &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap()
1730 );
1731
1732 let mut t = StrTendril::new();
1733 assert!(t.try_push_bytes(b"\xEA\x99").is_err());
1734 assert!(t.try_push_bytes(b"\xAE").is_err());
1735 assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok());
1736 assert_eq!("\u{a66e}", &*t);
1737 }
1738
1739 #[test]
share_and_unshare()1740 fn share_and_unshare() {
1741 let s = b"foobarbaz".to_tendril();
1742 assert_eq!(b"foobarbaz", &*s);
1743 assert!(!s.is_shared());
1744
1745 let mut t = s.clone();
1746 assert_eq!(s.as_ptr(), t.as_ptr());
1747 assert!(s.is_shared());
1748 assert!(t.is_shared());
1749
1750 t.push_slice(b"quux");
1751 assert_eq!(b"foobarbaz", &*s);
1752 assert_eq!(b"foobarbazquux", &*t);
1753 assert!(s.as_ptr() != t.as_ptr());
1754 assert!(!t.is_shared());
1755 }
1756
1757 #[test]
format_display()1758 fn format_display() {
1759 assert_eq!("foobar", &*format!("{}", "foobar".to_tendril()));
1760
1761 let mut s = "foo".to_tendril();
1762 assert_eq!("foo", &*format!("{}", s));
1763
1764 let t = s.clone();
1765 assert_eq!("foo", &*format!("{}", s));
1766 assert_eq!("foo", &*format!("{}", t));
1767
1768 s.push_slice("barbaz!");
1769 assert_eq!("foobarbaz!", &*format!("{}", s));
1770 assert_eq!("foo", &*format!("{}", t));
1771 }
1772
1773 #[test]
format_debug()1774 fn format_debug() {
1775 assert_eq!(
1776 r#"Tendril<UTF8>(inline: "foobar")"#,
1777 &*format!("{:?}", "foobar".to_tendril())
1778 );
1779 assert_eq!(
1780 r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"#,
1781 &*format!("{:?}", b"foobar".to_tendril())
1782 );
1783
1784 let t = "anextralongstring".to_tendril();
1785 assert_eq!(
1786 r#"Tendril<UTF8>(owned: "anextralongstring")"#,
1787 &*format!("{:?}", t)
1788 );
1789 let _ = t.clone();
1790 assert_eq!(
1791 r#"Tendril<UTF8>(shared: "anextralongstring")"#,
1792 &*format!("{:?}", t)
1793 );
1794 }
1795
1796 #[test]
subtendril()1797 fn subtendril() {
1798 assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3));
1799 assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3));
1800
1801 let mut t = "foo-bar".to_tendril();
1802 t.pop_front(2);
1803 assert_eq!("o-bar".to_tendril(), t);
1804 t.pop_back(1);
1805 assert_eq!("o-ba".to_tendril(), t);
1806
1807 assert_eq!(
1808 "foo".to_tendril(),
1809 "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3)
1810 );
1811 assert_eq!(
1812 "oo-a-".to_tendril(),
1813 "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5)
1814 );
1815 assert_eq!(
1816 "bar".to_tendril(),
1817 "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3)
1818 );
1819
1820 let mut t = "another rather long string".to_tendril();
1821 t.pop_front(2);
1822 assert!(t.starts_with("other rather"));
1823 t.pop_back(1);
1824 assert_eq!("other rather long strin".to_tendril(), t);
1825 assert!(t.is_shared());
1826 }
1827
1828 #[test]
subtendril_invalid()1829 fn subtendril_invalid() {
1830 assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err());
1831 assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err());
1832
1833 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err());
1834 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err());
1835 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err());
1836 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err());
1837 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err());
1838 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err());
1839 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err());
1840 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err());
1841 assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err());
1842
1843 let mut t = "\u{1f4a9}zzzzzz".to_tendril();
1844 assert!(t.try_pop_front(1).is_err());
1845 assert!(t.try_pop_front(2).is_err());
1846 assert!(t.try_pop_front(3).is_err());
1847 assert!(t.try_pop_front(4).is_ok());
1848 assert_eq!("zzzzzz", &*t);
1849
1850 let mut t = "zzzzzz\u{1f4a9}".to_tendril();
1851 assert!(t.try_pop_back(1).is_err());
1852 assert!(t.try_pop_back(2).is_err());
1853 assert!(t.try_pop_back(3).is_err());
1854 assert!(t.try_pop_back(4).is_ok());
1855 assert_eq!("zzzzzz", &*t);
1856 }
1857
1858 #[test]
conversion()1859 fn conversion() {
1860 assert_eq!(
1861 &[0x66, 0x6F, 0x6F].to_tendril(),
1862 "foo".to_tendril().as_bytes()
1863 );
1864 assert_eq!(
1865 [0x66, 0x6F, 0x6F].to_tendril(),
1866 "foo".to_tendril().into_bytes()
1867 );
1868
1869 let ascii: Tendril<fmt::ASCII> = b"hello".to_tendril().try_reinterpret().unwrap();
1870 assert_eq!(&"hello".to_tendril(), ascii.as_superset());
1871 assert_eq!("hello".to_tendril(), ascii.clone().into_superset());
1872
1873 assert!(b"\xFF"
1874 .to_tendril()
1875 .try_reinterpret::<fmt::ASCII>()
1876 .is_err());
1877
1878 let t = "hello".to_tendril();
1879 let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap();
1880 assert_eq!(b"hello", &**ascii.as_bytes());
1881
1882 assert!("ő"
1883 .to_tendril()
1884 .try_reinterpret_view::<fmt::ASCII>()
1885 .is_err());
1886 assert!("ő".to_tendril().try_as_subset::<fmt::ASCII>().is_err());
1887
1888 let ascii: Tendril<fmt::ASCII> = "hello".to_tendril().try_into_subset().unwrap();
1889 assert_eq!(b"hello", &**ascii.as_bytes());
1890
1891 assert!("ő".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
1892 assert!("ő".to_tendril().try_into_subset::<fmt::ASCII>().is_err());
1893 }
1894
1895 #[test]
clear()1896 fn clear() {
1897 let mut t = "foo-".to_tendril();
1898 t.clear();
1899 assert_eq!(t.len(), 0);
1900 assert_eq!(t.len32(), 0);
1901 assert_eq!(&*t, "");
1902
1903 let mut t = "much longer".to_tendril();
1904 let s = t.clone();
1905 t.clear();
1906 assert_eq!(t.len(), 0);
1907 assert_eq!(t.len32(), 0);
1908 assert_eq!(&*t, "");
1909 assert_eq!(&*s, "much longer");
1910 }
1911
1912 #[test]
push_tendril()1913 fn push_tendril() {
1914 let mut t = "abc".to_tendril();
1915 t.push_tendril(&"xyz".to_tendril());
1916 assert_eq!("abcxyz", &*t);
1917 }
1918
1919 #[test]
wtf8()1920 fn wtf8() {
1921 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD").is_ok());
1922 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xB2\xA9").is_ok());
1923 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err());
1924
1925 let t: Tendril<fmt::WTF8> =
1926 Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap();
1927 assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3));
1928 assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3));
1929 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1930
1931 assert!(t.try_subtendril(0, 1).is_err());
1932 assert!(t.try_subtendril(0, 2).is_err());
1933 assert!(t.try_subtendril(1, 1).is_err());
1934
1935 assert!(t.try_subtendril(3, 1).is_err());
1936 assert!(t.try_subtendril(3, 2).is_err());
1937 assert!(t.try_subtendril(4, 1).is_err());
1938
1939 // paired surrogates
1940 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap();
1941 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1942 assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice());
1943 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok());
1944
1945 // unpaired surrogates
1946 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap();
1947 assert!(t.try_push_bytes(b"\xED\xA0").is_err());
1948 assert!(t.try_push_bytes(b"\xED").is_err());
1949 assert!(t.try_push_bytes(b"\xA0").is_err());
1950 assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok());
1951 assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice());
1952 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1953 assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice());
1954 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1955 }
1956
1957 #[test]
front_char()1958 fn front_char() {
1959 let mut t = "".to_tendril();
1960 assert_eq!(None, t.pop_front_char());
1961 assert_eq!(None, t.pop_front_char());
1962
1963 let mut t = "abc".to_tendril();
1964 assert_eq!(Some('a'), t.pop_front_char());
1965 assert_eq!(Some('b'), t.pop_front_char());
1966 assert_eq!(Some('c'), t.pop_front_char());
1967 assert_eq!(None, t.pop_front_char());
1968 assert_eq!(None, t.pop_front_char());
1969
1970 let mut t = "főo-a-longer-string-bar-baz".to_tendril();
1971 assert_eq!(28, t.len());
1972 assert_eq!(Some('f'), t.pop_front_char());
1973 assert_eq!(Some('ő'), t.pop_front_char());
1974 assert_eq!(Some('o'), t.pop_front_char());
1975 assert_eq!(Some('-'), t.pop_front_char());
1976 assert_eq!(23, t.len());
1977 }
1978
1979 #[test]
char_run()1980 fn char_run() {
1981 for &(s, exp) in &[
1982 ("", None),
1983 (" ", Some((" ", true))),
1984 ("x", Some(("x", false))),
1985 (" \t \n", Some((" \t \n", true))),
1986 ("xyzzy", Some(("xyzzy", false))),
1987 (" xyzzy", Some((" ", true))),
1988 ("xyzzy ", Some(("xyzzy", false))),
1989 (" xyzzy ", Some((" ", true))),
1990 ("xyzzy hi", Some(("xyzzy", false))),
1991 ("中 ", Some(("中", false))),
1992 (" 中 ", Some((" ", true))),
1993 (" 中 ", Some((" ", true))),
1994 (" 中 ", Some((" ", true))),
1995 ] {
1996 let mut t = s.to_tendril();
1997 let res = t.pop_front_char_run(char::is_whitespace);
1998 match exp {
1999 None => assert!(res.is_none()),
2000 Some((es, ec)) => {
2001 let (rt, rc) = res.unwrap();
2002 assert_eq!(es, &*rt);
2003 assert_eq!(ec, rc);
2004 }
2005 }
2006 }
2007 }
2008
2009 #[test]
deref_mut_inline()2010 fn deref_mut_inline() {
2011 let mut t = "xyő".to_tendril().into_bytes();
2012 t[3] = 0xff;
2013 assert_eq!(b"xy\xC5\xFF", &*t);
2014 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
2015 t[3] = 0x8b;
2016 assert_eq!("xyŋ", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
2017
2018 unsafe {
2019 t.push_uninitialized(3);
2020 t[4] = 0xEA;
2021 t[5] = 0x99;
2022 t[6] = 0xAE;
2023 assert_eq!(
2024 "xyŋ\u{a66e}",
2025 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2026 );
2027 t.push_uninitialized(20);
2028 t.pop_back(20);
2029 assert_eq!(
2030 "xyŋ\u{a66e}",
2031 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2032 );
2033 }
2034 }
2035
2036 #[test]
deref_mut()2037 fn deref_mut() {
2038 let mut t = b"0123456789".to_tendril();
2039 let u = t.clone();
2040 assert!(t.is_shared());
2041 t[9] = 0xff;
2042 assert!(!t.is_shared());
2043 assert_eq!(b"0123456789", &*u);
2044 assert_eq!(b"012345678\xff", &*t);
2045 }
2046
2047 #[test]
push_char()2048 fn push_char() {
2049 let mut t = "xyz".to_tendril();
2050 t.push_char('o');
2051 assert_eq!("xyzo", &*t);
2052 t.push_char('ő');
2053 assert_eq!("xyzoő", &*t);
2054 t.push_char('\u{a66e}');
2055 assert_eq!("xyzoő\u{a66e}", &*t);
2056 t.push_char('\u{1f4a9}');
2057 assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t);
2058 assert_eq!(t.len(), 13);
2059 }
2060
2061 #[test]
2062 #[cfg(feature = "encoding")]
encode()2063 fn encode() {
2064 use encoding::{all, EncoderTrap};
2065
2066 let t = "안녕하세요 러스트".to_tendril();
2067 assert_eq!(
2068 b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae",
2069 &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap()
2070 );
2071
2072 let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril();
2073 assert_eq!(
2074 b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2075 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?",
2076 &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap()
2077 );
2078
2079 let t = "\u{1f4a9}".to_tendril();
2080 assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err());
2081 }
2082
2083 #[test]
2084 #[cfg(feature = "encoding")]
decode()2085 fn decode() {
2086 use encoding::{all, DecoderTrap};
2087
2088 let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\
2089 \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae"
2090 .to_tendril();
2091 assert_eq!(
2092 "안녕하세요 러스트",
2093 &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap()
2094 );
2095
2096 let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2097 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21"
2098 .to_tendril();
2099 assert_eq!(
2100 "Энергия пробуждения ия-я-я!",
2101 &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap()
2102 );
2103
2104 let t = b"x \xff y".to_tendril();
2105 assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err());
2106
2107 let t = b"x \xff y".to_tendril();
2108 assert_eq!(
2109 "x \u{fffd} y",
2110 &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap()
2111 );
2112 }
2113
2114 #[test]
ascii()2115 fn ascii() {
2116 fn mk(x: &[u8]) -> Tendril<fmt::ASCII> {
2117 x.to_tendril().try_reinterpret().unwrap()
2118 }
2119
2120 let mut t = mk(b"xyz");
2121 assert_eq!(Some('x'), t.pop_front_char());
2122 assert_eq!(Some('y'), t.pop_front_char());
2123 assert_eq!(Some('z'), t.pop_front_char());
2124 assert_eq!(None, t.pop_front_char());
2125
2126 let mut t = mk(b" \t xyz");
2127 assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2128 assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace));
2129 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2130
2131 let mut t = Tendril::<fmt::ASCII>::new();
2132 assert!(t.try_push_char('x').is_ok());
2133 assert!(t.try_push_char('\0').is_ok());
2134 assert!(t.try_push_char('\u{a0}').is_err());
2135 assert_eq!(b"x\0", t.as_byte_slice());
2136 }
2137
2138 #[test]
latin1()2139 fn latin1() {
2140 fn mk(x: &[u8]) -> Tendril<fmt::Latin1> {
2141 x.to_tendril().try_reinterpret().unwrap()
2142 }
2143
2144 let mut t = mk(b"\xd8_\xd8");
2145 assert_eq!(Some('Ø'), t.pop_front_char());
2146 assert_eq!(Some('_'), t.pop_front_char());
2147 assert_eq!(Some('Ø'), t.pop_front_char());
2148 assert_eq!(None, t.pop_front_char());
2149
2150 let mut t = mk(b" \t \xfe\xa7z");
2151 assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2152 assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace));
2153 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2154
2155 let mut t = Tendril::<fmt::Latin1>::new();
2156 assert!(t.try_push_char('x').is_ok());
2157 assert!(t.try_push_char('\0').is_ok());
2158 assert!(t.try_push_char('\u{a0}').is_ok());
2159 assert!(t.try_push_char('ő').is_err());
2160 assert!(t.try_push_char('я').is_err());
2161 assert!(t.try_push_char('\u{a66e}').is_err());
2162 assert!(t.try_push_char('\u{1f4a9}').is_err());
2163 assert_eq!(b"x\0\xa0", t.as_byte_slice());
2164 }
2165
2166 #[test]
format()2167 fn format() {
2168 assert_eq!("", &*format_tendril!(""));
2169 assert_eq!(
2170 "two and two make 4",
2171 &*format_tendril!("two and two make {}", 2 + 2)
2172 );
2173 }
2174
2175 #[test]
merge_shared()2176 fn merge_shared() {
2177 let t = "012345678901234567890123456789".to_tendril();
2178 let a = t.subtendril(10, 20);
2179 assert!(a.is_shared());
2180 assert_eq!("01234567890123456789", &*a);
2181 let mut b = t.subtendril(0, 10);
2182 assert!(b.is_shared());
2183 assert_eq!("0123456789", &*b);
2184
2185 b.push_tendril(&a);
2186 assert!(b.is_shared());
2187 assert!(a.is_shared());
2188 assert!(a.is_shared_with(&b));
2189 assert!(b.is_shared_with(&a));
2190 assert_eq!("012345678901234567890123456789", &*b);
2191
2192 assert!(t.is_shared());
2193 assert!(t.is_shared_with(&a));
2194 assert!(t.is_shared_with(&b));
2195 }
2196
2197 #[test]
merge_cant_share()2198 fn merge_cant_share() {
2199 let t = "012345678901234567890123456789".to_tendril();
2200 let mut b = t.subtendril(0, 10);
2201 assert!(b.is_shared());
2202 assert_eq!("0123456789", &*b);
2203
2204 b.push_tendril(&"abcd".to_tendril());
2205 assert!(!b.is_shared());
2206 assert_eq!("0123456789abcd", &*b);
2207 }
2208
2209 #[test]
shared_doesnt_reserve()2210 fn shared_doesnt_reserve() {
2211 let mut t = "012345678901234567890123456789".to_tendril();
2212 let a = t.subtendril(1, 10);
2213
2214 assert!(t.is_shared());
2215 t.reserve(10);
2216 assert!(t.is_shared());
2217
2218 let _ = a;
2219 }
2220
2221 #[test]
out_of_bounds()2222 fn out_of_bounds() {
2223 assert!("".to_tendril().try_subtendril(0, 1).is_err());
2224 assert!("abc".to_tendril().try_subtendril(0, 4).is_err());
2225 assert!("abc".to_tendril().try_subtendril(3, 1).is_err());
2226 assert!("abc".to_tendril().try_subtendril(7, 1).is_err());
2227
2228 let mut t = "".to_tendril();
2229 assert!(t.try_pop_front(1).is_err());
2230 assert!(t.try_pop_front(5).is_err());
2231 assert!(t.try_pop_front(500).is_err());
2232 assert!(t.try_pop_back(1).is_err());
2233 assert!(t.try_pop_back(5).is_err());
2234 assert!(t.try_pop_back(500).is_err());
2235
2236 let mut t = "abcd".to_tendril();
2237 assert!(t.try_pop_front(1).is_ok());
2238 assert!(t.try_pop_front(4).is_err());
2239 assert!(t.try_pop_front(500).is_err());
2240 assert!(t.try_pop_back(1).is_ok());
2241 assert!(t.try_pop_back(3).is_err());
2242 assert!(t.try_pop_back(500).is_err());
2243 }
2244
2245 #[test]
compare()2246 fn compare() {
2247 for &a in &[
2248 "indiscretions",
2249 "validity",
2250 "hallucinogenics",
2251 "timelessness",
2252 "original",
2253 "microcosms",
2254 "boilers",
2255 "mammoth",
2256 ] {
2257 for &b in &[
2258 "intrepidly",
2259 "frigid",
2260 "spa",
2261 "cardigans",
2262 "guileful",
2263 "evaporated",
2264 "unenthusiastic",
2265 "legitimate",
2266 ] {
2267 let ta = a.to_tendril();
2268 let tb = b.to_tendril();
2269
2270 assert_eq!(a.eq(b), ta.eq(&tb));
2271 assert_eq!(a.ne(b), ta.ne(&tb));
2272 assert_eq!(a.lt(b), ta.lt(&tb));
2273 assert_eq!(a.le(b), ta.le(&tb));
2274 assert_eq!(a.gt(b), ta.gt(&tb));
2275 assert_eq!(a.ge(b), ta.ge(&tb));
2276 assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb));
2277 assert_eq!(a.cmp(b), ta.cmp(&tb));
2278 }
2279 }
2280 }
2281
2282 #[test]
extend_and_from_iterator()2283 fn extend_and_from_iterator() {
2284 // Testing Extend<T> and FromIterator<T> for the various Ts.
2285
2286 // Tendril<F>
2287 let mut t = "Hello".to_tendril();
2288 t.extend(None::<&Tendril<_>>.into_iter());
2289 assert_eq!("Hello", &*t);
2290 t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]);
2291 assert_eq!("Hello, world!", &*t);
2292 assert_eq!(
2293 "Hello, world!",
2294 &*[
2295 "Hello".to_tendril(),
2296 ", ".to_tendril(),
2297 "world".to_tendril(),
2298 "!".to_tendril()
2299 ]
2300 .iter()
2301 .collect::<StrTendril>()
2302 );
2303
2304 // &str
2305 let mut t = "Hello".to_tendril();
2306 t.extend(None::<&str>.into_iter());
2307 assert_eq!("Hello", &*t);
2308 t.extend([", ", "world", "!"].iter().map(|&s| s));
2309 assert_eq!("Hello, world!", &*t);
2310 assert_eq!(
2311 "Hello, world!",
2312 &*["Hello", ", ", "world", "!"]
2313 .iter()
2314 .map(|&s| s)
2315 .collect::<StrTendril>()
2316 );
2317
2318 // &[u8]
2319 let mut t = b"Hello".to_tendril();
2320 t.extend(None::<&[u8]>.into_iter());
2321 assert_eq!(b"Hello", &*t);
2322 t.extend(
2323 [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()]
2324 .iter()
2325 .map(|&s| s),
2326 );
2327 assert_eq!(b"Hello, world!", &*t);
2328 assert_eq!(
2329 b"Hello, world!",
2330 &*[
2331 b"Hello".as_ref(),
2332 b", ".as_ref(),
2333 b"world".as_ref(),
2334 b"!".as_ref()
2335 ]
2336 .iter()
2337 .map(|&s| s)
2338 .collect::<ByteTendril>()
2339 );
2340
2341 let string = "the quick brown fox jumps over the lazy dog";
2342 let string_expected = string.to_tendril();
2343 let bytes = string.as_bytes();
2344 let bytes_expected = bytes.to_tendril();
2345
2346 // char
2347 assert_eq!(string_expected, string.chars().collect());
2348 let mut tendril = StrTendril::new();
2349 tendril.extend(string.chars());
2350 assert_eq!(string_expected, tendril);
2351
2352 // &u8
2353 assert_eq!(bytes_expected, bytes.iter().collect());
2354 let mut tendril = ByteTendril::new();
2355 tendril.extend(bytes);
2356 assert_eq!(bytes_expected, tendril);
2357
2358 // u8
2359 assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect());
2360 let mut tendril = ByteTendril::new();
2361 tendril.extend(bytes.iter().map(|&b| b));
2362 assert_eq!(bytes_expected, tendril);
2363 }
2364
2365 #[test]
from_str()2366 fn from_str() {
2367 use std::str::FromStr;
2368 let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap();
2369 assert_eq!("foo bar baz", &*t);
2370 }
2371
2372 #[test]
from_char()2373 fn from_char() {
2374 assert_eq!("o", &*StrTendril::from_char('o'));
2375 assert_eq!("ő", &*StrTendril::from_char('ő'));
2376 assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}'));
2377 assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}'));
2378 }
2379
2380 #[test]
2381 #[cfg_attr(miri, ignore)] // slow
read()2382 fn read() {
2383 fn check(x: &[u8]) {
2384 use std::io::Cursor;
2385 let mut t = ByteTendril::new();
2386 assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap());
2387 assert_eq!(x, &*t);
2388 }
2389
2390 check(b"");
2391 check(b"abcd");
2392
2393 let long: Vec<u8> = iter::repeat(b'x').take(1_000_000).collect();
2394 check(&long);
2395 }
2396
2397 #[test]
hash_map_key()2398 fn hash_map_key() {
2399 use std::collections::HashMap;
2400
2401 // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of
2402 // https://github.com/rust-lang/rust/issues/27108.
2403 let mut map = HashMap::new();
2404 map.insert("foo".to_tendril(), 1);
2405 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2406 assert_eq!(map.get(b"bar".as_ref()), None);
2407
2408 let mut map = HashMap::new();
2409 map.insert(b"foo".to_tendril(), 1);
2410 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2411 assert_eq!(map.get(b"bar".as_ref()), None);
2412 }
2413
2414 #[test]
atomic()2415 fn atomic() {
2416 assert_send::<Tendril<fmt::UTF8, Atomic>>();
2417 let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string");
2418 assert!(!s.is_shared());
2419 let mut t = s.clone();
2420 assert!(s.is_shared());
2421 let sp = s.as_ptr() as usize;
2422 thread::spawn(move || {
2423 assert!(t.is_shared());
2424 t.push_slice(" extended");
2425 assert_eq!("this is a string extended", &*t);
2426 assert!(t.as_ptr() as usize != sp);
2427 assert!(!t.is_shared());
2428 })
2429 .join()
2430 .unwrap();
2431 assert!(s.is_shared());
2432 assert_eq!("this is a string", &*s);
2433 }
2434
2435 #[test]
send()2436 fn send() {
2437 assert_send::<SendTendril<fmt::UTF8>>();
2438 let s = "this is a string".to_tendril();
2439 let t = s.clone();
2440 let s2 = s.into_send();
2441 thread::spawn(move || {
2442 let s = StrTendril::from(s2);
2443 assert!(!s.is_shared());
2444 assert_eq!("this is a string", &*s);
2445 })
2446 .join()
2447 .unwrap();
2448 assert_eq!("this is a string", &*t);
2449 }
2450
2451 #[test]
inline_send()2452 fn inline_send() {
2453 let s = "x".to_tendril();
2454 let t = s.clone();
2455 let s2 = s.into_send();
2456 thread::spawn(move || {
2457 let s = StrTendril::from(s2);
2458 assert!(!s.is_shared());
2459 assert_eq!("x", &*s);
2460 })
2461 .join()
2462 .unwrap();
2463 assert_eq!("x", &*t);
2464 }
2465 }
2466