1 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
4 // option. This file may not be copied, modified, or distributed
5 // except according to those terms.
6
7 use std::{ptr, mem, hash, str, u32, io, slice};
8 use std::sync::atomic::{self, AtomicUsize};
9 use std::sync::atomic::Ordering as AtomicOrdering;
10 use std::borrow::Borrow;
11 use std::marker::PhantomData;
12 use std::cell::Cell;
13 use std::ops::{Deref, DerefMut};
14 use std::iter::FromIterator;
15 use std::io::Write;
16 use std::default::Default;
17 use std::cmp::Ordering;
18 use std::fmt as strfmt;
19
20 #[cfg(feature = "encoding")] use encoding::{self, EncodingRef, DecoderTrap, EncoderTrap};
21
22
23 use buf32::{self, Buf32};
24 use fmt::{self, Slice};
25 use fmt::imp::Fixup;
26 use util::{unsafe_slice, unsafe_slice_mut, copy_and_advance, copy_lifetime_mut, copy_lifetime,
27 NonZeroUsize};
28 use OFLOW;
29
30 const MAX_INLINE_LEN: usize = 8;
31 const MAX_INLINE_TAG: usize = 0xF;
32 const EMPTY_TAG: usize = 0xF;
33
34 #[inline(always)]
inline_tag(len: u32) -> NonZeroUsize35 fn inline_tag(len: u32) -> NonZeroUsize {
36 debug_assert!(len <= MAX_INLINE_LEN as u32);
37 unsafe {
38 NonZeroUsize::new(if len == 0 {
39 EMPTY_TAG
40 } else {
41 len as usize
42 })
43 }
44 }
45
46 /// The multithreadedness of a tendril.
47 ///
48 /// Exactly two types implement this trait:
49 ///
50 /// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works
51 /// across threads; this is akin to `Arc`.
52 ///
53 /// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither
54 /// `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`.
55 ///
56 /// The layout of this trait is also mandated to be that of a `usize`,
57 /// for it is used for reference counting.
58 pub unsafe trait Atomicity: 'static {
59 #[doc(hidden)]
new() -> Self60 fn new() -> Self;
61
62 #[doc(hidden)]
increment(&self) -> usize63 fn increment(&self) -> usize;
64
65 #[doc(hidden)]
decrement(&self) -> usize66 fn decrement(&self) -> usize;
67
68 #[doc(hidden)]
fence_acquire()69 fn fence_acquire();
70 }
71
72 /// A marker of a non-atomic tendril.
73 ///
74 /// This is the default for the second type parameter of a `Tendril`
75 /// and so doesn't typically need to be written.
76 ///
77 /// This is akin to using `Rc` for reference counting.
78 pub struct NonAtomic(Cell<usize>);
79
80 unsafe impl Atomicity for NonAtomic {
81 #[inline]
new() -> Self82 fn new() -> Self {
83 NonAtomic(Cell::new(1))
84 }
85
86 #[inline]
increment(&self) -> usize87 fn increment(&self) -> usize {
88 let value = self.0.get();
89 self.0.set(value.checked_add(1).expect(OFLOW));
90 value
91 }
92
93 #[inline]
decrement(&self) -> usize94 fn decrement(&self) -> usize {
95 let value = self.0.get();
96 self.0.set(value - 1);
97 value
98 }
99
100 #[inline]
fence_acquire()101 fn fence_acquire() {
102 }
103 }
104
105 /// A marker of an atomic (and hence concurrent) tendril.
106 ///
107 /// This is used as the second, optional type parameter of a `Tendril`;
108 /// `Tendril<F, Atomic>` thus implements`Send`.
109 ///
110 /// This is akin to using `Arc` for reference counting.
111 pub struct Atomic(AtomicUsize);
112
113 unsafe impl Atomicity for Atomic {
114 #[inline]
new() -> Self115 fn new() -> Self {
116 Atomic(AtomicUsize::new(1))
117 }
118
119 #[inline]
increment(&self) -> usize120 fn increment(&self) -> usize {
121 // Relaxed is OK because we have a reference already.
122 self.0.fetch_add(1, AtomicOrdering::Relaxed)
123 }
124
125 #[inline]
decrement(&self) -> usize126 fn decrement(&self) -> usize {
127 self.0.fetch_sub(1, AtomicOrdering::Release)
128 }
129
130 #[inline]
fence_acquire()131 fn fence_acquire() {
132 atomic::fence(AtomicOrdering::Acquire);
133 }
134 }
135
136 #[repr(packed)]
137 struct Header<A: Atomicity> {
138 refcount: A,
139 cap: u32,
140 }
141
142 impl<A> Header<A>
143 where A: Atomicity,
144 {
145 #[inline(always)]
new() -> Header<A>146 unsafe fn new() -> Header<A> {
147 Header {
148 refcount: A::new(),
149 cap: mem::uninitialized(),
150 }
151 }
152 }
153
154 /// Errors that can occur when slicing a `Tendril`.
155 #[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)]
156 pub enum SubtendrilError {
157 OutOfBounds,
158 ValidationFailed,
159 }
160
161 /// Compact string type for zero-copy parsing.
162 ///
163 /// `Tendril`s have the semantics of owned strings, but are sometimes views
164 /// into shared buffers. When you mutate a `Tendril`, an owned copy is made
165 /// if necessary. Further mutations occur in-place until the string becomes
166 /// shared, e.g. with `clone()` or `subtendril()`.
167 ///
168 /// Buffer sharing is accomplished through thread-local (non-atomic) reference
169 /// counting, which has very low overhead. The Rust type system will prevent
170 /// you at compile time from sending a `Tendril` between threads. We plan to
171 /// relax this restriction in the future; see `README.md`.
172 ///
173 /// Whereas `String` allocates in the heap for any non-empty string, `Tendril`
174 /// can store small strings (up to 8 bytes) in-line, without a heap allocation.
175 /// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes
176 /// versus 24.
177 ///
178 /// The type parameter `F` specifies the format of the tendril, for example
179 /// UTF-8 text or uninterpreted bytes. The parameter will be instantiated
180 /// with one of the marker types from `tendril::fmt`. See the `StrTendril`
181 /// and `ByteTendril` type aliases for two examples.
182 ///
183 /// The type parameter `A` indicates the atomicity of the tendril; it is by
184 /// default `NonAtomic`, but can be specified as `Atomic` to get a tendril
185 /// which implements `Send` (viz. a thread-safe tendril).
186 ///
187 /// The maximum length of a `Tendril` is 4 GB. The library will panic if
188 /// you attempt to go over the limit.
189 #[repr(C)]
190 pub struct Tendril<F, A = NonAtomic>
191 where F: fmt::Format,
192 A: Atomicity,
193 {
194 ptr: Cell<NonZeroUsize>,
195 len: u32,
196 aux: Cell<u32>,
197 marker: PhantomData<*mut F>,
198 refcount_marker: PhantomData<A>,
199 }
200
201 unsafe impl<F, A> Send for Tendril<F, A> where F: fmt::Format, A: Atomicity + Sync { }
202
203 /// `Tendril` for storing native Rust strings.
204 pub type StrTendril = Tendril<fmt::UTF8>;
205
206 /// `Tendril` for storing binary data.
207 pub type ByteTendril = Tendril<fmt::Bytes>;
208
209 impl<F, A> Clone for Tendril<F, A>
210 where F: fmt::Format,
211 A: Atomicity,
212 {
213 #[inline]
clone(&self) -> Tendril<F, A>214 fn clone(&self) -> Tendril<F, A> {
215 unsafe {
216 if self.ptr.get().get() > MAX_INLINE_TAG {
217 self.make_buf_shared();
218 self.incref();
219 }
220
221 ptr::read(self)
222 }
223 }
224 }
225
226 impl<F, A> Drop for Tendril<F, A>
227 where F: fmt::Format,
228 A: Atomicity,
229 {
230 #[inline]
drop(&mut self)231 fn drop(&mut self) {
232 unsafe {
233 let p = self.ptr.get().get();
234 if p <= MAX_INLINE_TAG {
235 return;
236 }
237
238 let (buf, shared, _) = self.assume_buf();
239 if shared {
240 let header = self.header();
241 if (*header).refcount.decrement() == 1 {
242 A::fence_acquire();
243 buf.destroy();
244 }
245 } else {
246 buf.destroy();
247 }
248 }
249 }
250 }
251
252 macro_rules! from_iter_method {
253 ($ty:ty) => {
254 #[inline]
255 fn from_iter<I>(iterable: I) -> Self
256 where I: IntoIterator<Item = $ty>
257 {
258 let mut output = Self::new();
259 output.extend(iterable);
260 output
261 }
262 }
263 }
264
265 impl<A> Extend<char> for Tendril<fmt::UTF8, A>
266 where A: Atomicity,
267 {
268 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = char>,269 fn extend<I>(&mut self, iterable: I)
270 where I: IntoIterator<Item = char>,
271 {
272 let iterator = iterable.into_iter();
273 self.force_reserve(iterator.size_hint().0 as u32);
274 for c in iterator {
275 self.push_char(c);
276 }
277 }
278 }
279
280 impl<A> FromIterator<char> for Tendril<fmt::UTF8, A>
281 where A: Atomicity,
282 {
283 from_iter_method!(char);
284 }
285
286 impl<A> Extend<u8> for Tendril<fmt::Bytes, A>
287 where A: Atomicity,
288 {
289 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = u8>,290 fn extend<I>(&mut self, iterable: I)
291 where I: IntoIterator<Item = u8>,
292 {
293 let iterator = iterable.into_iter();
294 self.force_reserve(iterator.size_hint().0 as u32);
295 for b in iterator {
296 self.push_slice(&[b]);
297 }
298 }
299 }
300
301 impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A>
302 where A: Atomicity,
303 {
304 from_iter_method!(u8);
305 }
306
307 impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A>
308 where A: Atomicity,
309 {
310 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a u8>,311 fn extend<I>(&mut self, iterable: I)
312 where I: IntoIterator<Item = &'a u8>,
313 {
314 let iterator = iterable.into_iter();
315 self.force_reserve(iterator.size_hint().0 as u32);
316 for &b in iterator {
317 self.push_slice(&[b]);
318 }
319 }
320 }
321
322 impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A>
323 where A: Atomicity,
324 {
325 from_iter_method!(&'a u8);
326 }
327
328 impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A>
329 where A: Atomicity,
330 {
331 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a str>,332 fn extend<I>(&mut self, iterable: I)
333 where I: IntoIterator<Item = &'a str>,
334 {
335 for s in iterable {
336 self.push_slice(s);
337 }
338 }
339 }
340
341 impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A>
342 where A: Atomicity,
343 {
344 from_iter_method!(&'a str);
345 }
346
347 impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A>
348 where A: Atomicity,
349 {
350 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a [u8]>,351 fn extend<I>(&mut self, iterable: I)
352 where I: IntoIterator<Item = &'a [u8]>,
353 {
354 for s in iterable {
355 self.push_slice(s);
356 }
357 }
358 }
359
360 impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A>
361 where A: Atomicity,
362 {
363 from_iter_method!(&'a [u8]);
364 }
365
366 impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A>
367 where F: fmt::Format + 'a,
368 A: Atomicity,
369 {
370 #[inline]
extend<I>(&mut self, iterable: I) where I: IntoIterator<Item = &'a Tendril<F, A>>,371 fn extend<I>(&mut self, iterable: I)
372 where I: IntoIterator<Item = &'a Tendril<F, A>>,
373 {
374 for t in iterable {
375 self.push_tendril(t);
376 }
377 }
378 }
379
380 impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A>
381 where F: fmt::Format + 'a,
382 A: Atomicity,
383 {
384 from_iter_method!(&'a Tendril<F, A>);
385 }
386
387 impl<F, A> Deref for Tendril<F, A>
388 where F: fmt::SliceFormat,
389 A: Atomicity,
390 {
391 type Target = F::Slice;
392
393 #[inline]
deref(&self) -> &F::Slice394 fn deref(&self) -> &F::Slice {
395 unsafe {
396 F::Slice::from_bytes(self.as_byte_slice())
397 }
398 }
399 }
400
401 impl<F, A> DerefMut for Tendril<F, A>
402 where F: fmt::SliceFormat,
403 A: Atomicity,
404 {
405 #[inline]
deref_mut(&mut self) -> &mut F::Slice406 fn deref_mut(&mut self) -> &mut F::Slice {
407 unsafe {
408 F::Slice::from_mut_bytes(self.as_mut_byte_slice())
409 }
410 }
411 }
412
413 impl<F, A> Borrow<[u8]> for Tendril<F, A>
414 where F: fmt::SliceFormat,
415 A: Atomicity,
416 {
borrow(&self) -> &[u8]417 fn borrow(&self) -> &[u8] {
418 self.as_byte_slice()
419 }
420 }
421
422 // Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently,
423 // and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick.
424 // https://github.com/rust-lang/rust/issues/27108
425
426 impl<F, A> PartialEq for Tendril<F, A>
427 where F: fmt::Format,
428 A: Atomicity,
429 {
430 #[inline]
eq(&self, other: &Self) -> bool431 fn eq(&self, other: &Self) -> bool {
432 self.as_byte_slice() == other.as_byte_slice()
433 }
434
435 #[inline]
ne(&self, other: &Self) -> bool436 fn ne(&self, other: &Self) -> bool {
437 self.as_byte_slice() != other.as_byte_slice()
438 }
439 }
440
441 impl<F, A> Eq for Tendril<F, A>
442 where F: fmt::Format,
443 A: Atomicity,
444 { }
445
446 impl<F, A> PartialOrd for Tendril<F, A>
447 where F: fmt::SliceFormat,
448 <F as fmt::SliceFormat>::Slice: PartialOrd,
449 A: Atomicity,
450 {
451 #[inline]
partial_cmp(&self, other: &Self) -> Option<Ordering>452 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
453 PartialOrd::partial_cmp(&**self, &**other)
454 }
455 }
456
457 impl<F, A> Ord for Tendril<F, A>
458 where F: fmt::SliceFormat,
459 <F as fmt::SliceFormat>::Slice: Ord,
460 A: Atomicity,
461 {
462 #[inline]
cmp(&self, other: &Self) -> Ordering463 fn cmp(&self, other: &Self) -> Ordering {
464 Ord::cmp(&**self, &**other)
465 }
466 }
467
468 impl<F, A> Default for Tendril<F, A>
469 where F: fmt::Format,
470 A: Atomicity,
471 {
472 #[inline(always)]
default() -> Tendril<F, A>473 fn default() -> Tendril<F, A> {
474 Tendril::new()
475 }
476 }
477
478 impl<F, A> strfmt::Debug for Tendril<F, A>
479 where F: fmt::SliceFormat + Default + strfmt::Debug,
480 <F as fmt::SliceFormat>::Slice: strfmt::Debug,
481 A: Atomicity,
482 {
483 #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result484 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
485 let kind = match self.ptr.get().get() {
486 p if p <= MAX_INLINE_TAG => "inline",
487 p if p & 1 == 1 => "shared",
488 _ => "owned",
489 };
490
491 try!(write!(f, "Tendril<{:?}>({}: ", <F as Default>::default(), kind));
492 try!(<<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f));
493 write!(f, ")")
494 }
495 }
496
497 impl<F, A> hash::Hash for Tendril<F, A>
498 where F: fmt::Format,
499 A: Atomicity,
500 {
501 #[inline]
hash<H: hash::Hasher>(&self, hasher: &mut H)502 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
503 self.as_byte_slice().hash(hasher)
504 }
505 }
506
507 impl<F, A> Tendril<F, A>
508 where F: fmt::Format,
509 A: Atomicity,
510 {
511 /// Create a new, empty `Tendril` in any format.
512 #[inline(always)]
new() -> Tendril<F, A>513 pub fn new() -> Tendril<F, A> {
514 unsafe {
515 Tendril::inline(&[])
516 }
517 }
518
519 /// Create a new, empty `Tendril` with a specified capacity.
520 #[inline]
with_capacity(capacity: u32) -> Tendril<F, A>521 pub fn with_capacity(capacity: u32) -> Tendril<F, A> {
522 let mut t: Tendril<F, A> = Tendril::new();
523 if capacity > MAX_INLINE_LEN as u32 {
524 unsafe {
525 t.make_owned_with_capacity(capacity);
526 }
527 }
528 t
529 }
530
531 /// Reserve space for additional bytes.
532 ///
533 /// This is only a suggestion. There are cases where `Tendril` will
534 /// decline to allocate until the buffer is actually modified.
535 #[inline]
reserve(&mut self, additional: u32)536 pub fn reserve(&mut self, additional: u32) {
537 if !self.is_shared() {
538 // Don't grow a shared tendril because we'd have to copy
539 // right away.
540 self.force_reserve(additional);
541 }
542 }
543
544 /// Reserve space for additional bytes, even for shared buffers.
545 #[inline]
force_reserve(&mut self, additional: u32)546 fn force_reserve(&mut self, additional: u32) {
547 let new_len = self.len32().checked_add(additional).expect(OFLOW);
548 if new_len > MAX_INLINE_LEN as u32 {
549 unsafe {
550 self.make_owned_with_capacity(new_len);
551 }
552 }
553 }
554
555 /// Get the length of the `Tendril`.
556 ///
557 /// This is named not to conflict with `len()` on the underlying
558 /// slice, if any.
559 #[inline(always)]
len32(&self) -> u32560 pub fn len32(&self) -> u32 {
561 match self.ptr.get().get() {
562 EMPTY_TAG => 0,
563 n if n <= MAX_INLINE_LEN => n as u32,
564 _ => self.len,
565 }
566 }
567
568 /// Is the backing buffer shared?
569 #[inline]
is_shared(&self) -> bool570 pub fn is_shared(&self) -> bool {
571 let n = self.ptr.get().get();
572
573 (n > MAX_INLINE_TAG) && ((n & 1) == 1)
574 }
575
576 /// Is the backing buffer shared with this other `Tendril`?
577 #[inline]
is_shared_with(&self, other: &Tendril<F, A>) -> bool578 pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool {
579 let n = self.ptr.get().get();
580
581 (n > MAX_INLINE_TAG) && (n == other.ptr.get().get())
582 }
583
584 /// Truncate to length 0 without discarding any owned storage.
585 #[inline]
clear(&mut self)586 pub fn clear(&mut self) {
587 if self.ptr.get().get() <= MAX_INLINE_TAG {
588 self.ptr.set(unsafe { NonZeroUsize::new(EMPTY_TAG) });
589 } else {
590 let (_, shared, _) = unsafe { self.assume_buf() };
591 if shared {
592 // No need to keep a reference alive for a 0-size slice.
593 *self = Tendril::new();
594 } else {
595 self.len = 0;
596 }
597 }
598 }
599
600 /// Build a `Tendril` by copying a byte slice, if it conforms to the format.
601 #[inline]
try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()>602 pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> {
603 match F::validate(x) {
604 true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }),
605 false => Err(()),
606 }
607 }
608
609 /// View as uninterpreted bytes.
610 #[inline(always)]
as_bytes(&self) -> &Tendril<fmt::Bytes, A>611 pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> {
612 unsafe { mem::transmute(self) }
613 }
614
615 /// Convert into uninterpreted bytes.
616 #[inline(always)]
into_bytes(self) -> Tendril<fmt::Bytes, A>617 pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> {
618 unsafe { mem::transmute(self) }
619 }
620
621 /// Convert `self` into a type which is `Send`.
622 ///
623 /// If the tendril is owned or inline, this is free,
624 /// but if it's shared this will entail a copy of the contents.
625 #[inline]
into_send(mut self) -> SendTendril<F>626 pub fn into_send(mut self) -> SendTendril<F> {
627 self.make_owned();
628 SendTendril {
629 // This changes the header.refcount from A to NonAtomic, but that's
630 // OK because we have defined the format of A as a usize.
631 tendril: unsafe { mem::transmute(self) },
632 }
633 }
634
635 /// View as a superset format, for free.
636 #[inline(always)]
as_superset<Super>(&self) -> &Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,637 pub fn as_superset<Super>(&self) -> &Tendril<Super, A>
638 where F: fmt::SubsetOf<Super>,
639 Super: fmt::Format,
640 {
641 unsafe { mem::transmute(self) }
642 }
643
644 /// Convert into a superset format, for free.
645 #[inline(always)]
into_superset<Super>(self) -> Tendril<Super, A> where F: fmt::SubsetOf<Super>, Super: fmt::Format,646 pub fn into_superset<Super>(self) -> Tendril<Super, A>
647 where F: fmt::SubsetOf<Super>,
648 Super: fmt::Format,
649 {
650 unsafe { mem::transmute(self) }
651 }
652
653 /// View as a subset format, if the `Tendril` conforms to that subset.
654 #[inline]
try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()> where Sub: fmt::SubsetOf<F>,655 pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()>
656 where Sub: fmt::SubsetOf<F>,
657 {
658 match Sub::revalidate_subset(self.as_byte_slice()) {
659 true => Ok(unsafe { mem::transmute(self) }),
660 false => Err(()),
661 }
662 }
663
664 /// Convert into a subset format, if the `Tendril` conforms to that subset.
665 #[inline]
try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self> where Sub: fmt::SubsetOf<F>,666 pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self>
667 where Sub: fmt::SubsetOf<F>,
668 {
669 match Sub::revalidate_subset(self.as_byte_slice()) {
670 true => Ok(unsafe { mem::transmute(self) }),
671 false => Err(self),
672 }
673 }
674
675 /// View as another format, if the bytes of the `Tendril` are valid for
676 /// that format.
677 #[inline]
try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()> where Other: fmt::Format,678 pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()>
679 where Other: fmt::Format,
680 {
681 match Other::validate(self.as_byte_slice()) {
682 true => Ok(unsafe { mem::transmute(self) }),
683 false => Err(()),
684 }
685 }
686
687 /// Convert into another format, if the `Tendril` conforms to that format.
688 ///
689 /// This only re-validates the existing bytes under the new format. It
690 /// will *not* change the byte content of the tendril!
691 ///
692 /// See the `encode` and `decode` methods for character encoding conversion.
693 #[inline]
try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self> where Other: fmt::Format,694 pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self>
695 where Other: fmt::Format,
696 {
697 match Other::validate(self.as_byte_slice()) {
698 true => Ok(unsafe { mem::transmute(self) }),
699 false => Err(self),
700 }
701 }
702
703 /// Push some bytes onto the end of the `Tendril`, if they conform to the
704 /// format.
705 #[inline]
try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()>706 pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> {
707 match F::validate(buf) {
708 true => unsafe {
709 self.push_bytes_without_validating(buf);
710 Ok(())
711 },
712 false => Err(()),
713 }
714 }
715
716 /// Push another `Tendril` onto the end of this one.
717 #[inline]
push_tendril(&mut self, other: &Tendril<F, A>)718 pub fn push_tendril(&mut self, other: &Tendril<F, A>) {
719 let new_len = self.len32().checked_add(other.len32()).expect(OFLOW);
720
721 unsafe {
722 if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) {
723 let (self_buf, self_shared, _) = self.assume_buf();
724 let (other_buf, other_shared, _) = other.assume_buf();
725
726 if self_shared && other_shared
727 && (self_buf.data_ptr() == other_buf.data_ptr())
728 && (other.aux.get() == self.aux.get() + self.len)
729 {
730 self.len = new_len;
731 return;
732 }
733 }
734
735 self.push_bytes_without_validating(other.as_byte_slice())
736 }
737 }
738
739 /// Attempt to slice this `Tendril` as a new `Tendril`.
740 ///
741 /// This will share the buffer when possible. Mutating a shared buffer
742 /// will copy the contents.
743 ///
744 /// The offset and length are in bytes. The function will return
745 /// `Err` if these are out of bounds, or if the resulting slice
746 /// does not conform to the format.
747 #[inline]
try_subtendril(&self, offset: u32, length: u32) -> Result<Tendril<F, A>, SubtendrilError>748 pub fn try_subtendril(&self, offset: u32, length: u32)
749 -> Result<Tendril<F, A>, SubtendrilError>
750 {
751 let self_len = self.len32();
752 if offset > self_len || length > (self_len - offset) {
753 return Err(SubtendrilError::OutOfBounds);
754 }
755
756 unsafe {
757 let byte_slice = unsafe_slice(self.as_byte_slice(),
758 offset as usize, length as usize);
759 if !F::validate_subseq(byte_slice) {
760 return Err(SubtendrilError::ValidationFailed);
761 }
762
763 Ok(self.unsafe_subtendril(offset, length))
764 }
765 }
766
767 /// Slice this `Tendril` as a new `Tendril`.
768 ///
769 /// Panics on bounds or validity check failure.
770 #[inline]
subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>771 pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
772 self.try_subtendril(offset, length).unwrap()
773 }
774
775 /// Try to drop `n` bytes from the front.
776 ///
777 /// Returns `Err` if the bytes are not available, or the suffix fails
778 /// validation.
779 #[inline]
try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError>780 pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> {
781 if n == 0 {
782 return Ok(());
783 }
784 let old_len = self.len32();
785 if n > old_len {
786 return Err(SubtendrilError::OutOfBounds);
787 }
788 let new_len = old_len - n;
789
790 unsafe {
791 if !F::validate_suffix(unsafe_slice(self.as_byte_slice(),
792 n as usize, new_len as usize)) {
793 return Err(SubtendrilError::ValidationFailed);
794 }
795
796 self.unsafe_pop_front(n);
797 Ok(())
798 }
799 }
800
801 /// Drop `n` bytes from the front.
802 ///
803 /// Panics if the bytes are not available, or the suffix fails
804 /// validation.
805 #[inline]
pop_front(&mut self, n: u32)806 pub fn pop_front(&mut self, n: u32) {
807 self.try_pop_front(n).unwrap()
808 }
809
810 /// Drop `n` bytes from the back.
811 ///
812 /// Returns `Err` if the bytes are not available, or the prefix fails
813 /// validation.
814 #[inline]
try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError>815 pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> {
816 if n == 0 {
817 return Ok(());
818 }
819 let old_len = self.len32();
820 if n > old_len {
821 return Err(SubtendrilError::OutOfBounds);
822 }
823 let new_len = old_len - n;
824
825 unsafe {
826 if !F::validate_prefix(unsafe_slice(self.as_byte_slice(),
827 0, new_len as usize)) {
828 return Err(SubtendrilError::ValidationFailed);
829 }
830
831 self.unsafe_pop_back(n);
832 Ok(())
833 }
834 }
835
836 /// Drop `n` bytes from the back.
837 ///
838 /// Panics if the bytes are not available, or the prefix fails
839 /// validation.
840 #[inline]
pop_back(&mut self, n: u32)841 pub fn pop_back(&mut self, n: u32) {
842 self.try_pop_back(n).unwrap()
843 }
844
845 /// View as another format, without validating.
846 #[inline(always)]
reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A> where Other: fmt::Format,847 pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A>
848 where Other: fmt::Format,
849 {
850 mem::transmute(self)
851 }
852
853 /// Convert into another format, without validating.
854 #[inline(always)]
reinterpret_without_validating<Other>(self) -> Tendril<Other, A> where Other: fmt::Format,855 pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A>
856 where Other: fmt::Format,
857 {
858 mem::transmute(self)
859 }
860
861 /// Build a `Tendril` by copying a byte slice, without validating.
862 #[inline]
from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A>863 pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> {
864 assert!(x.len() <= buf32::MAX_LEN);
865 if x.len() <= MAX_INLINE_LEN {
866 Tendril::inline(x)
867 } else {
868 Tendril::owned_copy(x)
869 }
870 }
871
872 /// Push some bytes onto the end of the `Tendril`, without validating.
873 #[inline]
push_bytes_without_validating(&mut self, buf: &[u8])874 pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) {
875 assert!(buf.len() <= buf32::MAX_LEN);
876
877 let Fixup { drop_left, drop_right, insert_len, insert_bytes }
878 = F::fixup(self.as_byte_slice(), buf);
879
880 // FIXME: think more about overflow
881 let adj_len = self.len32() + insert_len - drop_left;
882
883 let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW)
884 - drop_right;
885
886 let drop_left = drop_left as usize;
887 let drop_right = drop_right as usize;
888
889 if new_len <= MAX_INLINE_LEN as u32 {
890 let mut tmp: [u8; MAX_INLINE_LEN] = mem::uninitialized();
891 {
892 let old = self.as_byte_slice();
893 let mut dest = tmp.as_mut_ptr();
894 copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left));
895 copy_and_advance(&mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize));
896 copy_and_advance(&mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right));
897 }
898 *self = Tendril::inline(&tmp[..new_len as usize]);
899 } else {
900 self.make_owned_with_capacity(new_len);
901 let (owned, _, _) = self.assume_buf();
902 let mut dest = owned.data_ptr().offset((owned.len as usize - drop_left) as isize);
903 copy_and_advance(&mut dest, unsafe_slice(&insert_bytes, 0, insert_len as usize));
904 copy_and_advance(&mut dest, unsafe_slice(buf, drop_right, buf.len() - drop_right));
905 self.len = new_len;
906 }
907 }
908
909 /// Slice this `Tendril` as a new `Tendril`.
910 ///
911 /// Does not check validity or bounds!
912 #[inline]
unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A>913 pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
914 if length <= MAX_INLINE_LEN as u32 {
915 Tendril::inline(unsafe_slice(self.as_byte_slice(),
916 offset as usize, length as usize))
917 } else {
918 self.make_buf_shared();
919 self.incref();
920 let (buf, _, _) = self.assume_buf();
921 Tendril::shared(buf, self.aux.get() + offset, length)
922 }
923 }
924
925 /// Drop `n` bytes from the front.
926 ///
927 /// Does not check validity or bounds!
928 #[inline]
unsafe_pop_front(&mut self, n: u32)929 pub unsafe fn unsafe_pop_front(&mut self, n: u32) {
930 let new_len = self.len32() - n;
931 if new_len <= MAX_INLINE_LEN as u32 {
932 *self = Tendril::inline(unsafe_slice(self.as_byte_slice(),
933 n as usize, new_len as usize));
934 } else {
935 self.make_buf_shared();
936 self.aux.set(self.aux.get() + n);
937 self.len -= n;
938 }
939 }
940
941 /// Drop `n` bytes from the back.
942 ///
943 /// Does not check validity or bounds!
944 #[inline]
unsafe_pop_back(&mut self, n: u32)945 pub unsafe fn unsafe_pop_back(&mut self, n: u32) {
946 let new_len = self.len32() - n;
947 if new_len <= MAX_INLINE_LEN as u32 {
948 *self = Tendril::inline(unsafe_slice(self.as_byte_slice(),
949 0, new_len as usize));
950 } else {
951 self.make_buf_shared();
952 self.len -= n;
953 }
954 }
955
956 #[inline]
incref(&self)957 unsafe fn incref(&self) {
958 (*self.header()).refcount.increment();
959 }
960
961 #[inline]
make_buf_shared(&self)962 unsafe fn make_buf_shared(&self) {
963 let p = self.ptr.get().get();
964 if p & 1 == 0 {
965 let header = p as *mut Header<A>;
966 (*header).cap = self.aux.get();
967
968 self.ptr.set(NonZeroUsize::new(p | 1));
969 self.aux.set(0);
970 }
971 }
972
973 // This is not public as it is of no practical value to users.
974 // By and large they shouldn't need to worry about the distinction at all,
975 // and going out of your way to make it owned is pointless.
976 #[inline]
make_owned(&mut self)977 fn make_owned(&mut self) {
978 unsafe {
979 let ptr = self.ptr.get().get();
980 if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 {
981 *self = Tendril::owned_copy(self.as_byte_slice());
982 }
983 }
984 }
985
986 #[inline]
make_owned_with_capacity(&mut self, cap: u32)987 unsafe fn make_owned_with_capacity(&mut self, cap: u32) {
988 self.make_owned();
989 let mut buf = self.assume_buf().0;
990 buf.grow(cap);
991 self.ptr.set(NonZeroUsize::new(buf.ptr as usize));
992 self.aux.set(buf.cap);
993 }
994
995 #[inline(always)]
header(&self) -> *mut Header<A>996 unsafe fn header(&self) -> *mut Header<A> {
997 (self.ptr.get().get() & !1) as *mut Header<A>
998 }
999
1000 #[inline]
assume_buf(&self) -> (Buf32<Header<A>>, bool, u32)1001 unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) {
1002 let ptr = self.ptr.get().get();
1003 let header = self.header();
1004 let shared = (ptr & 1) == 1;
1005 let (cap, offset) = match shared {
1006 true => ((*header).cap, self.aux.get()),
1007 false => (self.aux.get(), 0),
1008 };
1009
1010 (Buf32 {
1011 ptr: header,
1012 len: offset + self.len32(),
1013 cap: cap,
1014 }, shared, offset)
1015 }
1016
1017 #[inline]
inline(x: &[u8]) -> Tendril<F, A>1018 unsafe fn inline(x: &[u8]) -> Tendril<F, A> {
1019 let len = x.len();
1020 let mut t = Tendril {
1021 ptr: Cell::new(inline_tag(len as u32)),
1022 len: mem::uninitialized(),
1023 aux: mem::uninitialized(),
1024 marker: PhantomData,
1025 refcount_marker: PhantomData,
1026 };
1027 ptr::copy_nonoverlapping(x.as_ptr(), &mut t.len as *mut u32 as *mut u8, len);
1028 t
1029 }
1030
1031 #[inline]
owned(x: Buf32<Header<A>>) -> Tendril<F, A>1032 unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> {
1033 Tendril {
1034 ptr: Cell::new(NonZeroUsize::new(x.ptr as usize)),
1035 len: x.len,
1036 aux: Cell::new(x.cap),
1037 marker: PhantomData,
1038 refcount_marker: PhantomData,
1039 }
1040 }
1041
1042 #[inline]
owned_copy(x: &[u8]) -> Tendril<F, A>1043 unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> {
1044 let len32 = x.len() as u32;
1045 let mut b = Buf32::with_capacity(len32, Header::new());
1046 ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len());
1047 b.len = len32;
1048 Tendril::owned(b)
1049 }
1050
1051 #[inline]
shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A>1052 unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> {
1053 Tendril {
1054 ptr: Cell::new(NonZeroUsize::new((buf.ptr as usize) | 1)),
1055 len: len,
1056 aux: Cell::new(off),
1057 marker: PhantomData,
1058 refcount_marker: PhantomData,
1059 }
1060 }
1061
1062 #[inline]
as_byte_slice<'a>(&'a self) -> &'a [u8]1063 fn as_byte_slice<'a>(&'a self) -> &'a [u8] {
1064 unsafe {
1065 match self.ptr.get().get() {
1066 EMPTY_TAG => &[],
1067 n if n <= MAX_INLINE_LEN => {
1068 slice::from_raw_parts(&self.len as *const u32 as *const u8, n)
1069 }
1070 _ => {
1071 let (buf, _, offset) = self.assume_buf();
1072 copy_lifetime(self, unsafe_slice(buf.data(),
1073 offset as usize, self.len32() as usize))
1074 }
1075 }
1076 }
1077 }
1078
1079 // There's no need to worry about locking on an atomic Tendril, because it makes it unique as
1080 // soon as you do that.
1081 #[inline]
as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8]1082 fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] {
1083 unsafe {
1084 match self.ptr.get().get() {
1085 EMPTY_TAG => &mut [],
1086 n if n <= MAX_INLINE_LEN => {
1087 slice::from_raw_parts_mut(&mut self.len as *mut u32 as *mut u8, n)
1088 }
1089 _ => {
1090 self.make_owned();
1091 let (mut buf, _, offset) = self.assume_buf();
1092 let len = self.len32() as usize;
1093 copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
1094 }
1095 }
1096 }
1097 }
1098 }
1099
1100 impl<F, A> Tendril<F, A>
1101 where F: fmt::SliceFormat,
1102 A: Atomicity,
1103 {
1104 /// Build a `Tendril` by copying a slice.
1105 #[inline]
from_slice(x: &F::Slice) -> Tendril<F, A>1106 pub fn from_slice(x: &F::Slice) -> Tendril<F, A> {
1107 unsafe {
1108 Tendril::from_byte_slice_without_validating(x.as_bytes())
1109 }
1110 }
1111
1112 /// Push a slice onto the end of the `Tendril`.
1113 #[inline]
push_slice(&mut self, x: &F::Slice)1114 pub fn push_slice(&mut self, x: &F::Slice) {
1115 unsafe {
1116 self.push_bytes_without_validating(x.as_bytes())
1117 }
1118 }
1119 }
1120
1121 /// A simple wrapper to make `Tendril` `Send`.
1122 ///
1123 /// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could
1124 /// reasonably implement, in order to clearly separate concerns this type is deliberately
1125 /// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and
1126 /// behaving as an opaque object.
1127 ///
1128 /// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`,
1129 /// and may be returned to a `Tendril` by `Tendril::from(self)`.
1130 #[derive(Clone)]
1131 pub struct SendTendril<F>
1132 where F: fmt::Format,
1133 {
1134 tendril: Tendril<F>,
1135 }
1136
1137 unsafe impl<F> Send for SendTendril<F> where F: fmt::Format { }
1138
1139 impl<F, A> From<Tendril<F, A>> for SendTendril<F>
1140 where F: fmt::Format,
1141 A: Atomicity,
1142 {
1143 #[inline]
from(tendril: Tendril<F, A>) -> SendTendril<F>1144 fn from(tendril: Tendril<F, A>) -> SendTendril<F> {
1145 tendril.into_send()
1146 }
1147 }
1148
1149 impl<F, A> From<SendTendril<F>> for Tendril<F, A>
1150 where F: fmt::Format,
1151 A: Atomicity,
1152 {
1153 #[inline]
from(send: SendTendril<F>) -> Tendril<F, A>1154 fn from(send: SendTendril<F>) -> Tendril<F, A> {
1155 unsafe {
1156 mem::transmute(send.tendril)
1157 }
1158 // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value
1159 // will be the same (1) regardless, because the layout is defined.
1160 // Thus we don't need to fiddle about resetting it or anything like that.
1161 }
1162 }
1163
1164 /// `Tendril`-related methods for Rust slices.
1165 pub trait SliceExt<F>: fmt::Slice where F: fmt::SliceFormat<Slice=Self> {
1166 /// Make a `Tendril` from this slice.
1167 #[inline]
to_tendril(&self) -> Tendril<F>1168 fn to_tendril(&self) -> Tendril<F> {
1169 // It should be done thusly, but at the time of writing the defaults don't help inference:
1170 //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A>
1171 // where A: Atomicity,
1172 //{
1173 Tendril::from_slice(self)
1174 }
1175 }
1176
1177 impl SliceExt<fmt::UTF8> for str { }
1178 impl SliceExt<fmt::Bytes> for [u8] { }
1179
1180 impl<F, A> Tendril<F, A>
1181 where F: for<'a> fmt::CharFormat<'a>,
1182 A: Atomicity,
1183 {
1184 /// Remove and return the first character, if any.
1185 #[inline]
pop_front_char<'a>(&'a mut self) -> Option<char>1186 pub fn pop_front_char<'a>(&'a mut self) -> Option<char> {
1187 unsafe {
1188 let next_char; // first char in iterator
1189 let mut skip = 0; // number of bytes to skip, or 0 to clear
1190
1191 { // <--+
1192 // | Creating an iterator borrows self, so introduce a
1193 // +- scope to contain the borrow (that way we can mutate
1194 // self below, after this scope exits).
1195
1196 let mut iter = F::char_indices(self.as_byte_slice());
1197 match iter.next() {
1198 Some((_, c)) => {
1199 next_char = Some(c);
1200 if let Some((n, _)) = iter.next() {
1201 skip = n as u32;
1202 }
1203 }
1204 None => {
1205 next_char = None;
1206 }
1207 }
1208 }
1209
1210 if skip != 0 {
1211 self.unsafe_pop_front(skip);
1212 } else {
1213 self.clear();
1214 }
1215
1216 next_char
1217 }
1218 }
1219
1220 /// Remove and return a run of characters at the front of the `Tendril`
1221 /// which are classified the same according to the function `classify`.
1222 ///
1223 /// Returns `None` on an empty string.
1224 #[inline]
pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)> where C: FnMut(char) -> R, R: PartialEq,1225 pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C)
1226 -> Option<(Tendril<F, A>, R)>
1227 where C: FnMut(char) -> R,
1228 R: PartialEq,
1229 {
1230 let (class, first_mismatch);
1231 {
1232 let mut chars = unsafe {
1233 F::char_indices(self.as_byte_slice())
1234 };
1235 let (_, first) = unwrap_or_return!(chars.next(), None);
1236 class = classify(first);
1237 first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class);
1238 }
1239
1240 match first_mismatch {
1241 Some((idx, _)) => unsafe {
1242 let t = self.unsafe_subtendril(0, idx as u32);
1243 self.unsafe_pop_front(idx as u32);
1244 Some((t, class))
1245 },
1246 None => {
1247 let t = self.clone();
1248 self.clear();
1249 Some((t, class))
1250 }
1251 }
1252 }
1253
1254 /// Push a character, if it can be represented in this format.
1255 #[inline]
try_push_char(&mut self, c: char) -> Result<(), ()>1256 pub fn try_push_char(&mut self, c: char) -> Result<(), ()> {
1257 F::encode_char(c, |b| unsafe {
1258 self.push_bytes_without_validating(b);
1259 })
1260 }
1261 }
1262
1263 /// Extension trait for `io::Read`.
1264 pub trait ReadExt: io::Read {
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity1265 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1266 where A: Atomicity;
1267 }
1268
1269 impl<T> ReadExt for T
1270 where T: io::Read
1271 {
1272 /// Read all bytes until EOF.
read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> where A: Atomicity,1273 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1274 where A: Atomicity,
1275 {
1276 // Adapted from libstd/io/mod.rs.
1277 const DEFAULT_BUF_SIZE: u32 = 64 * 1024;
1278
1279 let start_len = buf.len();
1280 let mut len = start_len;
1281 let mut new_write_size = 16;
1282 let ret;
1283 loop {
1284 if len == buf.len() {
1285 if new_write_size < DEFAULT_BUF_SIZE {
1286 new_write_size *= 2;
1287 }
1288 // FIXME: this exposes uninitialized bytes to a generic R type
1289 // this is fine for R=File which never reads these bytes,
1290 // but user-defined types might.
1291 // The standard library pushes zeros to `Vec<u8>` for that reason.
1292 unsafe {
1293 buf.push_uninitialized(new_write_size);
1294 }
1295 }
1296
1297 match self.read(&mut buf[len..]) {
1298 Ok(0) => {
1299 ret = Ok(len - start_len);
1300 break;
1301 }
1302 Ok(n) => len += n,
1303 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
1304 Err(e) => {
1305 ret = Err(e);
1306 break;
1307 }
1308 }
1309 }
1310
1311 let buf_len = buf.len32();
1312 buf.pop_back(buf_len - (len as u32));
1313 ret
1314 }
1315 }
1316
1317 impl<A> io::Write for Tendril<fmt::Bytes, A>
1318 where A: Atomicity,
1319 {
1320 #[inline]
write(&mut self, buf: &[u8]) -> io::Result<usize>1321 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1322 self.push_slice(buf);
1323 Ok(buf.len())
1324 }
1325
1326 #[inline]
write_all(&mut self, buf: &[u8]) -> io::Result<()>1327 fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
1328 self.push_slice(buf);
1329 Ok(())
1330 }
1331
1332 #[inline(always)]
flush(&mut self) -> io::Result<()>1333 fn flush(&mut self) -> io::Result<()> {
1334 Ok(())
1335 }
1336 }
1337
1338 #[cfg(feature = "encoding")]
1339 impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A>
1340 where A: Atomicity,
1341 {
1342 #[inline]
write_byte(&mut self, b: u8)1343 fn write_byte(&mut self, b: u8) {
1344 self.push_slice(&[b]);
1345 }
1346
1347 #[inline]
write_bytes(&mut self, v: &[u8])1348 fn write_bytes(&mut self, v: &[u8]) {
1349 self.push_slice(v);
1350 }
1351
1352 #[inline]
writer_hint(&mut self, additional: usize)1353 fn writer_hint(&mut self, additional: usize) {
1354 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1355 }
1356 }
1357
1358 impl<F, A> Tendril<F, A>
1359 where A: Atomicity,
1360 F: fmt::SliceFormat<Slice=[u8]>
1361 {
1362 /// Decode from some character encoding into UTF-8.
1363 ///
1364 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1365 /// for more information.
1366 #[inline]
1367 #[cfg(feature = "encoding")]
decode(&self, encoding: EncodingRef, trap: DecoderTrap) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>>1368 pub fn decode(&self, encoding: EncodingRef, trap: DecoderTrap)
1369 -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>>
1370 {
1371 let mut ret = Tendril::new();
1372 encoding.decode_to(&*self, trap, &mut ret).map(|_| ret)
1373 }
1374
1375 /// Push "uninitialized bytes" onto the end.
1376 ///
1377 /// Really, this grows the tendril without writing anything to the new area.
1378 /// It's only defined for byte tendrils because it's only useful if you
1379 /// plan to then mutate the buffer.
1380 #[inline]
push_uninitialized(&mut self, n: u32)1381 pub unsafe fn push_uninitialized(&mut self, n: u32) {
1382 let new_len = self.len32().checked_add(n).expect(OFLOW);
1383 if new_len <= MAX_INLINE_LEN as u32
1384 && self.ptr.get().get() <= MAX_INLINE_TAG
1385 {
1386 self.ptr.set(inline_tag(new_len))
1387 } else {
1388 self.make_owned_with_capacity(new_len);
1389 self.len = new_len;
1390 }
1391 }
1392 }
1393
1394 impl<A> strfmt::Display for Tendril<fmt::UTF8, A>
1395 where A: Atomicity,
1396 {
1397 #[inline]
fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result1398 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
1399 <str as strfmt::Display>::fmt(&**self, f)
1400 }
1401 }
1402
1403 impl<A> str::FromStr for Tendril<fmt::UTF8, A>
1404 where A: Atomicity,
1405 {
1406 type Err = ();
1407
1408 #[inline]
from_str(s: &str) -> Result<Self, ()>1409 fn from_str(s: &str) -> Result<Self, ()> {
1410 Ok(Tendril::from_slice(s))
1411 }
1412 }
1413
1414 impl<A> strfmt::Write for Tendril<fmt::UTF8, A>
1415 where A: Atomicity,
1416 {
1417 #[inline]
write_str(&mut self, s: &str) -> strfmt::Result1418 fn write_str(&mut self, s: &str) -> strfmt::Result {
1419 self.push_slice(s);
1420 Ok(())
1421 }
1422 }
1423
1424 #[cfg(feature = "encoding")]
1425 impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A>
1426 where A: Atomicity,
1427 {
1428 #[inline]
write_char(&mut self, c: char)1429 fn write_char(&mut self, c: char) {
1430 self.push_char(c);
1431 }
1432
1433 #[inline]
write_str(&mut self, s: &str)1434 fn write_str(&mut self, s: &str) {
1435 self.push_slice(s);
1436 }
1437
1438 #[inline]
writer_hint(&mut self, additional: usize)1439 fn writer_hint(&mut self, additional: usize) {
1440 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1441 }
1442 }
1443
1444 impl<A> Tendril<fmt::UTF8, A>
1445 where A: Atomicity,
1446 {
1447 /// Encode from UTF-8 into some other character encoding.
1448 ///
1449 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1450 /// for more information.
1451 #[inline]
1452 #[cfg(feature = "encoding")]
encode(&self, encoding: EncodingRef, trap: EncoderTrap) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>>1453 pub fn encode(&self, encoding: EncodingRef, trap: EncoderTrap)
1454 -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>>
1455 {
1456 let mut ret = Tendril::new();
1457 encoding.encode_to(&*self, trap, &mut ret).map(|_| ret)
1458 }
1459
1460 /// Push a character onto the end.
1461 #[inline]
push_char(&mut self, c: char)1462 pub fn push_char(&mut self, c: char) {
1463 unsafe {
1464 let mut utf_8: [u8; 4] = mem::uninitialized();
1465 let bytes_written = {
1466 let mut buffer = &mut utf_8[..];
1467 write!(buffer, "{}", c).ok().expect("Tendril::push_char: internal error");
1468 debug_assert!(buffer.len() <= 4);
1469 4 - buffer.len()
1470 };
1471 self.push_bytes_without_validating(unsafe_slice(&utf_8, 0, bytes_written));
1472 }
1473 }
1474
1475 /// Create a `Tendril` from a single character.
1476 #[inline]
from_char(c: char) -> Tendril<fmt::UTF8, A>1477 pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> {
1478 let mut t: Tendril<fmt::UTF8, A> = Tendril::new();
1479 t.push_char(c);
1480 t
1481 }
1482
1483 /// Helper for the `format_tendril!` macro.
1484 #[inline]
format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A>1485 pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> {
1486 use std::fmt::Write;
1487 let mut output: Tendril<fmt::UTF8, A> = Tendril::new();
1488 let _ = write!(&mut output, "{}", args);
1489 output
1490 }
1491 }
1492
1493 /// Create a `StrTendril` through string formatting.
1494 ///
1495 /// Works just like the standard `format!` macro.
1496 #[macro_export]
1497 macro_rules! format_tendril {
1498 ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*)))
1499 }
1500
1501
1502 impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A>
1503 where F: fmt::SliceFormat,
1504 A: Atomicity,
1505 {
1506 #[inline]
from(input: &F::Slice) -> Tendril<F, A>1507 fn from(input: &F::Slice) -> Tendril<F, A> {
1508 Tendril::from_slice(input)
1509 }
1510 }
1511
1512 impl<A> From<String> for Tendril<fmt::UTF8, A>
1513 where A: Atomicity,
1514 {
1515 #[inline]
from(input: String) -> Tendril<fmt::UTF8, A>1516 fn from(input: String) -> Tendril<fmt::UTF8, A> {
1517 Tendril::from_slice(&*input)
1518 }
1519 }
1520
1521 impl<F, A> AsRef<F::Slice> for Tendril<F, A>
1522 where F: fmt::SliceFormat,
1523 A: Atomicity,
1524 {
1525 #[inline]
as_ref(&self) -> &F::Slice1526 fn as_ref(&self) -> &F::Slice {
1527 &**self
1528 }
1529 }
1530
1531 impl<A> From<Tendril<fmt::UTF8, A>> for String
1532 where A: Atomicity,
1533 {
1534 #[inline]
from(input: Tendril<fmt::UTF8, A>) -> String1535 fn from(input: Tendril<fmt::UTF8, A>) -> String {
1536 String::from(&*input)
1537 }
1538 }
1539
1540 impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String
1541 where A: Atomicity,
1542 {
1543 #[inline]
from(input: &'a Tendril<fmt::UTF8, A>) -> String1544 fn from(input: &'a Tendril<fmt::UTF8, A>) -> String {
1545 String::from(&**input)
1546 }
1547 }
1548
1549
1550 #[cfg(all(test, feature = "bench"))]
1551 #[path="bench.rs"]
1552 mod bench;
1553
1554 #[cfg(test)]
1555 mod test {
1556 use super::{Tendril, ByteTendril, StrTendril, SendTendril,
1557 ReadExt, SliceExt, Header, NonAtomic, Atomic};
1558 use fmt;
1559 use std::iter;
1560 use std::thread;
1561
assert_send<T: Send>()1562 fn assert_send<T: Send>() { }
1563
1564 #[test]
smoke_test()1565 fn smoke_test() {
1566 assert_eq!("", &*"".to_tendril());
1567 assert_eq!("abc", &*"abc".to_tendril());
1568 assert_eq!("Hello, world!", &*"Hello, world!".to_tendril());
1569
1570 assert_eq!(b"", &*b"".to_tendril());
1571 assert_eq!(b"abc", &*b"abc".to_tendril());
1572 assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril());
1573 }
1574
1575 #[test]
assert_sizes()1576 fn assert_sizes() {
1577 use std::mem;
1578 struct EmptyWithDrop;
1579 impl Drop for EmptyWithDrop {
1580 fn drop(&mut self) {}
1581 }
1582 let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
1583
1584 let correct = mem::size_of::<*const ()>() + 8 +
1585 if compiler_uses_inline_drop_flags { 1 } else { 0 };
1586
1587 assert_eq!(correct, mem::size_of::<ByteTendril>());
1588 assert_eq!(correct, mem::size_of::<StrTendril>());
1589
1590 assert_eq!(correct, mem::size_of::<Option<ByteTendril>>());
1591 assert_eq!(correct, mem::size_of::<Option<StrTendril>>());
1592
1593 let correct_header = mem::size_of::<*const ()>() + 4;
1594 assert_eq!(correct_header, mem::size_of::<Header<Atomic>>());
1595 assert_eq!(correct_header, mem::size_of::<Header<NonAtomic>>());
1596 }
1597
1598 #[test]
validate_utf8()1599 fn validate_utf8() {
1600 assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok());
1601 assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err());
1602 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err());
1603 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err());
1604 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err());
1605 assert_eq!("\u{a66e}", &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap());
1606
1607 let mut t = StrTendril::new();
1608 assert!(t.try_push_bytes(b"\xEA\x99").is_err());
1609 assert!(t.try_push_bytes(b"\xAE").is_err());
1610 assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok());
1611 assert_eq!("\u{a66e}", &*t);
1612 }
1613
1614 #[test]
share_and_unshare()1615 fn share_and_unshare() {
1616 let s = b"foobarbaz".to_tendril();
1617 assert_eq!(b"foobarbaz", &*s);
1618 assert!(!s.is_shared());
1619
1620 let mut t = s.clone();
1621 assert_eq!(s.as_ptr(), t.as_ptr());
1622 assert!(s.is_shared());
1623 assert!(t.is_shared());
1624
1625 t.push_slice(b"quux");
1626 assert_eq!(b"foobarbaz", &*s);
1627 assert_eq!(b"foobarbazquux", &*t);
1628 assert!(s.as_ptr() != t.as_ptr());
1629 assert!(!t.is_shared());
1630 }
1631
1632 #[test]
format_display()1633 fn format_display() {
1634 assert_eq!("foobar", &*format!("{}", "foobar".to_tendril()));
1635
1636 let mut s = "foo".to_tendril();
1637 assert_eq!("foo", &*format!("{}", s));
1638
1639 let t = s.clone();
1640 assert_eq!("foo", &*format!("{}", s));
1641 assert_eq!("foo", &*format!("{}", t));
1642
1643 s.push_slice("barbaz!");
1644 assert_eq!("foobarbaz!", &*format!("{}", s));
1645 assert_eq!("foo", &*format!("{}", t));
1646 }
1647
1648 #[test]
format_debug()1649 fn format_debug() {
1650 assert_eq!(r#"Tendril<UTF8>(inline: "foobar")"#,
1651 &*format!("{:?}", "foobar".to_tendril()));
1652 assert_eq!(r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"#,
1653 &*format!("{:?}", b"foobar".to_tendril()));
1654
1655 let t = "anextralongstring".to_tendril();
1656 assert_eq!(r#"Tendril<UTF8>(owned: "anextralongstring")"#,
1657 &*format!("{:?}", t));
1658 let _ = t.clone();
1659 assert_eq!(r#"Tendril<UTF8>(shared: "anextralongstring")"#,
1660 &*format!("{:?}", t));
1661 }
1662
1663 #[test]
subtendril()1664 fn subtendril() {
1665 assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3));
1666 assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3));
1667
1668 let mut t = "foo-bar".to_tendril();
1669 t.pop_front(2);
1670 assert_eq!("o-bar".to_tendril(), t);
1671 t.pop_back(1);
1672 assert_eq!("o-ba".to_tendril(), t);
1673
1674 assert_eq!("foo".to_tendril(),
1675 "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3));
1676 assert_eq!("oo-a-".to_tendril(),
1677 "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5));
1678 assert_eq!("bar".to_tendril(),
1679 "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3));
1680
1681 let mut t = "another rather long string".to_tendril();
1682 t.pop_front(2);
1683 assert!(t.starts_with("other rather"));
1684 t.pop_back(1);
1685 assert_eq!("other rather long strin".to_tendril(), t);
1686 assert!(t.is_shared());
1687 }
1688
1689 #[test]
subtendril_invalid()1690 fn subtendril_invalid() {
1691 assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err());
1692 assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err());
1693
1694 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err());
1695 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err());
1696 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err());
1697 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err());
1698 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err());
1699 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err());
1700 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err());
1701 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err());
1702 assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err());
1703
1704 let mut t = "\u{1f4a9}zzzzzz".to_tendril();
1705 assert!(t.try_pop_front(1).is_err());
1706 assert!(t.try_pop_front(2).is_err());
1707 assert!(t.try_pop_front(3).is_err());
1708 assert!(t.try_pop_front(4).is_ok());
1709 assert_eq!("zzzzzz", &*t);
1710
1711 let mut t = "zzzzzz\u{1f4a9}".to_tendril();
1712 assert!(t.try_pop_back(1).is_err());
1713 assert!(t.try_pop_back(2).is_err());
1714 assert!(t.try_pop_back(3).is_err());
1715 assert!(t.try_pop_back(4).is_ok());
1716 assert_eq!("zzzzzz", &*t);
1717 }
1718
1719 #[test]
conversion()1720 fn conversion() {
1721 assert_eq!(&[0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().as_bytes());
1722 assert_eq!([0x66, 0x6F, 0x6F].to_tendril(), "foo".to_tendril().into_bytes());
1723
1724 let ascii: Tendril<fmt::ASCII> = b"hello".to_tendril().try_reinterpret().unwrap();
1725 assert_eq!(&"hello".to_tendril(), ascii.as_superset());
1726 assert_eq!("hello".to_tendril(), ascii.clone().into_superset());
1727
1728 assert!(b"\xFF".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
1729
1730 let t = "hello".to_tendril();
1731 let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap();
1732 assert_eq!(b"hello", &**ascii.as_bytes());
1733
1734 assert!("ő".to_tendril().try_reinterpret_view::<fmt::ASCII>().is_err());
1735 assert!("ő".to_tendril().try_as_subset::<fmt::ASCII>().is_err());
1736
1737 let ascii: Tendril<fmt::ASCII> = "hello".to_tendril().try_into_subset().unwrap();
1738 assert_eq!(b"hello", &**ascii.as_bytes());
1739
1740 assert!("ő".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
1741 assert!("ő".to_tendril().try_into_subset::<fmt::ASCII>().is_err());
1742 }
1743
1744 #[test]
clear()1745 fn clear() {
1746 let mut t = "foo-".to_tendril();
1747 t.clear();
1748 assert_eq!(t.len(), 0);
1749 assert_eq!(t.len32(), 0);
1750 assert_eq!(&*t, "");
1751
1752 let mut t = "much longer".to_tendril();
1753 let s = t.clone();
1754 t.clear();
1755 assert_eq!(t.len(), 0);
1756 assert_eq!(t.len32(), 0);
1757 assert_eq!(&*t, "");
1758 assert_eq!(&*s, "much longer");
1759 }
1760
1761 #[test]
push_tendril()1762 fn push_tendril() {
1763 let mut t = "abc".to_tendril();
1764 t.push_tendril(&"xyz".to_tendril());
1765 assert_eq!("abcxyz", &*t);
1766 }
1767
1768 #[test]
wtf8()1769 fn wtf8() {
1770 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD").is_ok());
1771 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xB2\xA9").is_ok());
1772 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err());
1773
1774 let t: Tendril<fmt::WTF8>
1775 = Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap();
1776 assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap()
1777 == t.subtendril(0, 3));
1778 assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap()
1779 == t.subtendril(3, 3));
1780 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1781
1782 assert!(t.try_subtendril(0, 1).is_err());
1783 assert!(t.try_subtendril(0, 2).is_err());
1784 assert!(t.try_subtendril(1, 1).is_err());
1785
1786 assert!(t.try_subtendril(3, 1).is_err());
1787 assert!(t.try_subtendril(3, 2).is_err());
1788 assert!(t.try_subtendril(4, 1).is_err());
1789
1790 // paired surrogates
1791 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap();
1792 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1793 assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice());
1794 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok());
1795
1796 // unpaired surrogates
1797 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap();
1798 assert!(t.try_push_bytes(b"\xED\xA0").is_err());
1799 assert!(t.try_push_bytes(b"\xED").is_err());
1800 assert!(t.try_push_bytes(b"\xA0").is_err());
1801 assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok());
1802 assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice());
1803 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1804 assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice());
1805 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1806 }
1807
1808 #[test]
front_char()1809 fn front_char() {
1810 let mut t = "".to_tendril();
1811 assert_eq!(None, t.pop_front_char());
1812 assert_eq!(None, t.pop_front_char());
1813
1814 let mut t = "abc".to_tendril();
1815 assert_eq!(Some('a'), t.pop_front_char());
1816 assert_eq!(Some('b'), t.pop_front_char());
1817 assert_eq!(Some('c'), t.pop_front_char());
1818 assert_eq!(None, t.pop_front_char());
1819 assert_eq!(None, t.pop_front_char());
1820
1821 let mut t = "főo-a-longer-string-bar-baz".to_tendril();
1822 assert_eq!(28, t.len());
1823 assert_eq!(Some('f'), t.pop_front_char());
1824 assert_eq!(Some('ő'), t.pop_front_char());
1825 assert_eq!(Some('o'), t.pop_front_char());
1826 assert_eq!(Some('-'), t.pop_front_char());
1827 assert_eq!(23, t.len());
1828 }
1829
1830 #[test]
char_run()1831 fn char_run() {
1832 for &(s, exp) in &[
1833 ("", None),
1834 (" ", Some((" ", true))),
1835 ("x", Some(("x", false))),
1836 (" \t \n", Some((" \t \n", true))),
1837 ("xyzzy", Some(("xyzzy", false))),
1838 (" xyzzy", Some((" ", true))),
1839 ("xyzzy ", Some(("xyzzy", false))),
1840 (" xyzzy ", Some((" ", true))),
1841 ("xyzzy hi", Some(("xyzzy", false))),
1842 ("中 ", Some(("中", false))),
1843 (" 中 ", Some((" ", true))),
1844 (" 中 ", Some((" ", true))),
1845 (" 中 ", Some((" ", true))),
1846 ] {
1847 let mut t = s.to_tendril();
1848 let res = t.pop_front_char_run(char::is_whitespace);
1849 match exp {
1850 None => assert!(res.is_none()),
1851 Some((es, ec)) => {
1852 let (rt, rc) = res.unwrap();
1853 assert_eq!(es, &*rt);
1854 assert_eq!(ec, rc);
1855 }
1856 }
1857 }
1858 }
1859
1860 #[test]
deref_mut_inline()1861 fn deref_mut_inline() {
1862 let mut t = "xyő".to_tendril().into_bytes();
1863 t[3] = 0xff;
1864 assert_eq!(b"xy\xC5\xFF", &*t);
1865 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1866 t[3] = 0x8b;
1867 assert_eq!("xyŋ", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
1868
1869 unsafe {
1870 t.push_uninitialized(3);
1871 t[4] = 0xEA;
1872 t[5] = 0x99;
1873 t[6] = 0xAE;
1874 assert_eq!("xyŋ\u{a66e}", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
1875 t.push_uninitialized(20);
1876 t.pop_back(20);
1877 assert_eq!("xyŋ\u{a66e}", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
1878 }
1879 }
1880
1881 #[test]
deref_mut()1882 fn deref_mut() {
1883 let mut t = b"0123456789".to_tendril();
1884 let u = t.clone();
1885 assert!(t.is_shared());
1886 t[9] = 0xff;
1887 assert!(!t.is_shared());
1888 assert_eq!(b"0123456789", &*u);
1889 assert_eq!(b"012345678\xff", &*t);
1890 }
1891
1892 #[test]
push_char()1893 fn push_char() {
1894 let mut t = "xyz".to_tendril();
1895 t.push_char('o');
1896 assert_eq!("xyzo", &*t);
1897 t.push_char('ő');
1898 assert_eq!("xyzoő", &*t);
1899 t.push_char('\u{a66e}');
1900 assert_eq!("xyzoő\u{a66e}", &*t);
1901 t.push_char('\u{1f4a9}');
1902 assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t);
1903 assert_eq!(t.len(), 13);
1904 }
1905
1906 #[test]
1907 #[cfg(feature = "encoding")]
encode()1908 fn encode() {
1909 use encoding::{all, EncoderTrap};
1910
1911 let t = "안녕하세요 러스트".to_tendril();
1912 assert_eq!(b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae",
1913 &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap());
1914
1915 let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril();
1916 assert_eq!(b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
1917 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?",
1918 &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap());
1919
1920 let t = "\u{1f4a9}".to_tendril();
1921 assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err());
1922 }
1923
1924 #[test]
1925 #[cfg(feature = "encoding")]
decode()1926 fn decode() {
1927 use encoding::{all, DecoderTrap};
1928
1929 let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\
1930 \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae".to_tendril();
1931 assert_eq!("안녕하세요 러스트",
1932 &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap());
1933
1934 let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
1935 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21".to_tendril();
1936 assert_eq!("Энергия пробуждения ия-я-я!",
1937 &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap());
1938
1939 let t = b"x \xff y".to_tendril();
1940 assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err());
1941
1942 let t = b"x \xff y".to_tendril();
1943 assert_eq!("x \u{fffd} y",
1944 &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap());
1945 }
1946
1947 #[test]
ascii()1948 fn ascii() {
1949 fn mk(x: &[u8]) -> Tendril<fmt::ASCII> {
1950 x.to_tendril().try_reinterpret().unwrap()
1951 }
1952
1953 let mut t = mk(b"xyz");
1954 assert_eq!(Some('x'), t.pop_front_char());
1955 assert_eq!(Some('y'), t.pop_front_char());
1956 assert_eq!(Some('z'), t.pop_front_char());
1957 assert_eq!(None, t.pop_front_char());
1958
1959 let mut t = mk(b" \t xyz");
1960 assert!(Some((mk(b" \t "), true))
1961 == t.pop_front_char_run(char::is_whitespace));
1962 assert!(Some((mk(b"xyz"), false))
1963 == t.pop_front_char_run(char::is_whitespace));
1964 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
1965
1966 let mut t = Tendril::<fmt::ASCII>::new();
1967 assert!(t.try_push_char('x').is_ok());
1968 assert!(t.try_push_char('\0').is_ok());
1969 assert!(t.try_push_char('\u{a0}').is_err());
1970 assert_eq!(b"x\0", t.as_byte_slice());
1971 }
1972
1973 #[test]
latin1()1974 fn latin1() {
1975 fn mk(x: &[u8]) -> Tendril<fmt::Latin1> {
1976 x.to_tendril().try_reinterpret().unwrap()
1977 }
1978
1979 let mut t = mk(b"\xd8_\xd8");
1980 assert_eq!(Some('Ø'), t.pop_front_char());
1981 assert_eq!(Some('_'), t.pop_front_char());
1982 assert_eq!(Some('Ø'), t.pop_front_char());
1983 assert_eq!(None, t.pop_front_char());
1984
1985 let mut t = mk(b" \t \xfe\xa7z");
1986 assert!(Some((mk(b" \t "), true))
1987 == t.pop_front_char_run(char::is_whitespace));
1988 assert!(Some((mk(b"\xfe\xa7z"), false))
1989 == t.pop_front_char_run(char::is_whitespace));
1990 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
1991
1992 let mut t = Tendril::<fmt::Latin1>::new();
1993 assert!(t.try_push_char('x').is_ok());
1994 assert!(t.try_push_char('\0').is_ok());
1995 assert!(t.try_push_char('\u{a0}').is_ok());
1996 assert!(t.try_push_char('ő').is_err());
1997 assert!(t.try_push_char('я').is_err());
1998 assert!(t.try_push_char('\u{a66e}').is_err());
1999 assert!(t.try_push_char('\u{1f4a9}').is_err());
2000 assert_eq!(b"x\0\xa0", t.as_byte_slice());
2001 }
2002
2003 #[test]
format()2004 fn format() {
2005 assert_eq!("", &*format_tendril!(""));
2006 assert_eq!("two and two make 4", &*format_tendril!("two and two make {}", 2+2));
2007 }
2008
2009 #[test]
merge_shared()2010 fn merge_shared() {
2011 let t = "012345678901234567890123456789".to_tendril();
2012 let a = t.subtendril(10, 20);
2013 assert!(a.is_shared());
2014 assert_eq!("01234567890123456789", &*a);
2015 let mut b = t.subtendril(0, 10);
2016 assert!(b.is_shared());
2017 assert_eq!("0123456789", &*b);
2018
2019 b.push_tendril(&a);
2020 assert!(b.is_shared());
2021 assert!(a.is_shared());
2022 assert!(a.is_shared_with(&b));
2023 assert!(b.is_shared_with(&a));
2024 assert_eq!("012345678901234567890123456789", &*b);
2025
2026 assert!(t.is_shared());
2027 assert!(t.is_shared_with(&a));
2028 assert!(t.is_shared_with(&b));
2029 }
2030
2031 #[test]
merge_cant_share()2032 fn merge_cant_share() {
2033 let t = "012345678901234567890123456789".to_tendril();
2034 let mut b = t.subtendril(0, 10);
2035 assert!(b.is_shared());
2036 assert_eq!("0123456789", &*b);
2037
2038 b.push_tendril(&"abcd".to_tendril());
2039 assert!(!b.is_shared());
2040 assert_eq!("0123456789abcd", &*b);
2041 }
2042
2043 #[test]
shared_doesnt_reserve()2044 fn shared_doesnt_reserve() {
2045 let mut t = "012345678901234567890123456789".to_tendril();
2046 let a = t.subtendril(1, 10);
2047
2048 assert!(t.is_shared());
2049 t.reserve(10);
2050 assert!(t.is_shared());
2051
2052 let _ = a;
2053 }
2054
2055 #[test]
out_of_bounds()2056 fn out_of_bounds() {
2057 assert!("".to_tendril().try_subtendril(0, 1).is_err());
2058 assert!("abc".to_tendril().try_subtendril(0, 4).is_err());
2059 assert!("abc".to_tendril().try_subtendril(3, 1).is_err());
2060 assert!("abc".to_tendril().try_subtendril(7, 1).is_err());
2061
2062 let mut t = "".to_tendril();
2063 assert!(t.try_pop_front(1).is_err());
2064 assert!(t.try_pop_front(5).is_err());
2065 assert!(t.try_pop_front(500).is_err());
2066 assert!(t.try_pop_back(1).is_err());
2067 assert!(t.try_pop_back(5).is_err());
2068 assert!(t.try_pop_back(500).is_err());
2069
2070
2071 let mut t = "abcd".to_tendril();
2072 assert!(t.try_pop_front(1).is_ok());
2073 assert!(t.try_pop_front(4).is_err());
2074 assert!(t.try_pop_front(500).is_err());
2075 assert!(t.try_pop_back(1).is_ok());
2076 assert!(t.try_pop_back(3).is_err());
2077 assert!(t.try_pop_back(500).is_err());
2078 }
2079
2080 #[test]
compare()2081 fn compare() {
2082 for &a in &["indiscretions", "validity", "hallucinogenics", "timelessness",
2083 "original", "microcosms", "boilers", "mammoth"] {
2084 for &b in &["intrepidly", "frigid", "spa", "cardigans",
2085 "guileful", "evaporated", "unenthusiastic", "legitimate"] {
2086 let ta = a.to_tendril();
2087 let tb = b.to_tendril();
2088
2089 assert_eq!(a.eq(b), ta.eq(&tb));
2090 assert_eq!(a.ne(b), ta.ne(&tb));
2091 assert_eq!(a.lt(b), ta.lt(&tb));
2092 assert_eq!(a.le(b), ta.le(&tb));
2093 assert_eq!(a.gt(b), ta.gt(&tb));
2094 assert_eq!(a.ge(b), ta.ge(&tb));
2095 assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb));
2096 assert_eq!(a.cmp(b), ta.cmp(&tb));
2097 }
2098 }
2099 }
2100
2101 #[test]
extend_and_from_iterator()2102 fn extend_and_from_iterator() {
2103 // Testing Extend<T> and FromIterator<T> for the various Ts.
2104
2105 // Tendril<F>
2106 let mut t = "Hello".to_tendril();
2107 t.extend(None::<&Tendril<_>>.into_iter());
2108 assert_eq!("Hello", &*t);
2109 t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]);
2110 assert_eq!("Hello, world!", &*t);
2111 assert_eq!("Hello, world!", &*["Hello".to_tendril(), ", ".to_tendril(),
2112 "world".to_tendril(), "!".to_tendril()]
2113 .iter().collect::<StrTendril>());
2114
2115 // &str
2116 let mut t = "Hello".to_tendril();
2117 t.extend(None::<&str>.into_iter());
2118 assert_eq!("Hello", &*t);
2119 t.extend([", ", "world", "!"].iter().map(|&s| s));
2120 assert_eq!("Hello, world!", &*t);
2121 assert_eq!("Hello, world!", &*["Hello", ", ", "world", "!"]
2122 .iter().map(|&s| s).collect::<StrTendril>());
2123
2124 // &[u8]
2125 let mut t = b"Hello".to_tendril();
2126 t.extend(None::<&[u8]>.into_iter());
2127 assert_eq!(b"Hello", &*t);
2128 t.extend([b", ".as_ref(), b"world".as_ref(), b"!".as_ref()].iter().map(|&s| s));
2129 assert_eq!(b"Hello, world!", &*t);
2130 assert_eq!(b"Hello, world!", &*[b"Hello".as_ref(), b", ".as_ref(),
2131 b"world".as_ref(), b"!".as_ref()]
2132 .iter().map(|&s| s).collect::<ByteTendril>());
2133
2134 let string = "the quick brown fox jumps over the lazy dog";
2135 let string_expected = string.to_tendril();
2136 let bytes = string.as_bytes();
2137 let bytes_expected = bytes.to_tendril();
2138
2139 // char
2140 assert_eq!(string_expected, string.chars().collect());
2141 let mut tendril = StrTendril::new();
2142 tendril.extend(string.chars());
2143 assert_eq!(string_expected, tendril);
2144
2145 // &u8
2146 assert_eq!(bytes_expected, bytes.iter().collect());
2147 let mut tendril = ByteTendril::new();
2148 tendril.extend(bytes);
2149 assert_eq!(bytes_expected, tendril);
2150
2151 // u8
2152 assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect());
2153 let mut tendril = ByteTendril::new();
2154 tendril.extend(bytes.iter().map(|&b| b));
2155 assert_eq!(bytes_expected, tendril);
2156 }
2157
2158 #[test]
from_str()2159 fn from_str() {
2160 use std::str::FromStr;
2161 let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap();
2162 assert_eq!("foo bar baz", &*t);
2163 }
2164
2165 #[test]
from_char()2166 fn from_char() {
2167 assert_eq!("o", &*StrTendril::from_char('o'));
2168 assert_eq!("ő", &*StrTendril::from_char('ő'));
2169 assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}'));
2170 assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}'));
2171 }
2172
2173 #[test]
read()2174 fn read() {
2175 fn check(x: &[u8]) {
2176 use std::io::Cursor;
2177 let mut t = ByteTendril::new();
2178 assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap());
2179 assert_eq!(x, &*t);
2180 }
2181
2182 check(b"");
2183 check(b"abcd");
2184
2185 let long: Vec<u8> = iter::repeat(b'x').take(1_000_000).collect();
2186 check(&long);
2187 }
2188
2189 #[test]
hash_map_key()2190 fn hash_map_key() {
2191 use std::collections::HashMap;
2192
2193 // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of
2194 // https://github.com/rust-lang/rust/issues/27108.
2195 let mut map = HashMap::new();
2196 map.insert("foo".to_tendril(), 1);
2197 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2198 assert_eq!(map.get(b"bar".as_ref()), None);
2199
2200 let mut map = HashMap::new();
2201 map.insert(b"foo".to_tendril(), 1);
2202 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2203 assert_eq!(map.get(b"bar".as_ref()), None);
2204 }
2205
2206 #[test]
atomic()2207 fn atomic() {
2208 assert_send::<Tendril<fmt::UTF8, Atomic>>();
2209 let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string");
2210 assert!(!s.is_shared());
2211 let mut t = s.clone();
2212 assert!(s.is_shared());
2213 let sp = s.as_ptr() as usize;
2214 thread::spawn(move || {
2215 assert!(t.is_shared());
2216 t.push_slice(" extended");
2217 assert_eq!("this is a string extended", &*t);
2218 assert!(t.as_ptr() as usize != sp);
2219 assert!(!t.is_shared());
2220 }).join().unwrap();
2221 assert!(s.is_shared());
2222 assert_eq!("this is a string", &*s);
2223 }
2224
2225 #[test]
send()2226 fn send() {
2227 assert_send::<SendTendril<fmt::UTF8>>();
2228 let s = "this is a string".to_tendril();
2229 let t = s.clone();
2230 let s2 = s.into_send();
2231 thread::spawn(move || {
2232 let s = StrTendril::from(s2);
2233 assert!(!s.is_shared());
2234 assert_eq!("this is a string", &*s);
2235 }).join().unwrap();
2236 assert_eq!("this is a string", &*t);
2237 }
2238
2239 #[test]
inline_send()2240 fn inline_send() {
2241 let s = "x".to_tendril();
2242 let t = s.clone();
2243 let s2 = s.into_send();
2244 thread::spawn(move || {
2245 let s = StrTendril::from(s2);
2246 assert!(!s.is_shared());
2247 assert_eq!("x", &*s);
2248 }).join().unwrap();
2249 assert_eq!("x", &*t);
2250 }
2251 }
2252