1 #![allow(unused_imports)]
2 
3 use std::borrow::Cow;
4 use std::error;
5 use std::ffi::{OsStr, OsString};
6 use std::fmt;
7 use std::iter;
8 use std::ops;
9 use std::path::{Path, PathBuf};
10 use std::ptr;
11 use std::str;
12 use std::vec;
13 
14 use ext_slice::ByteSlice;
15 use utf8::{self, Utf8Error};
16 
17 /// Concatenate the elements given by the iterator together into a single
18 /// `Vec<u8>`.
19 ///
20 /// The elements may be any type that can be cheaply converted into an `&[u8]`.
21 /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22 ///
23 /// # Examples
24 ///
25 /// Basic usage:
26 ///
27 /// ```
28 /// use bstr;
29 ///
30 /// let s = bstr::concat(&["foo", "bar", "baz"]);
31 /// assert_eq!(s, "foobarbaz".as_bytes());
32 /// ```
33 #[inline]
concat<T, I>(elements: I) -> Vec<u8> where T: AsRef<[u8]>, I: IntoIterator<Item = T>,34 pub fn concat<T, I>(elements: I) -> Vec<u8>
35 where
36     T: AsRef<[u8]>,
37     I: IntoIterator<Item = T>,
38 {
39     let mut dest = vec![];
40     for element in elements {
41         dest.push_str(element);
42     }
43     dest
44 }
45 
46 /// Join the elements given by the iterator with the given separator into a
47 /// single `Vec<u8>`.
48 ///
49 /// Both the separator and the elements may be any type that can be cheaply
50 /// converted into an `&[u8]`. This includes, but is not limited to,
51 /// `&str`, `&BStr` and `&[u8]` itself.
52 ///
53 /// # Examples
54 ///
55 /// Basic usage:
56 ///
57 /// ```
58 /// use bstr;
59 ///
60 /// let s = bstr::join(",", &["foo", "bar", "baz"]);
61 /// assert_eq!(s, "foo,bar,baz".as_bytes());
62 /// ```
63 #[inline]
join<B, T, I>(separator: B, elements: I) -> Vec<u8> where B: AsRef<[u8]>, T: AsRef<[u8]>, I: IntoIterator<Item = T>,64 pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65 where
66     B: AsRef<[u8]>,
67     T: AsRef<[u8]>,
68     I: IntoIterator<Item = T>,
69 {
70     let mut it = elements.into_iter();
71     let mut dest = vec![];
72     match it.next() {
73         None => return dest,
74         Some(first) => {
75             dest.push_str(first);
76         }
77     }
78     for element in it {
79         dest.push_str(&separator);
80         dest.push_str(element);
81     }
82     dest
83 }
84 
85 impl ByteVec for Vec<u8> {
as_vec(&self) -> &Vec<u8>86     fn as_vec(&self) -> &Vec<u8> {
87         self
88     }
as_vec_mut(&mut self) -> &mut Vec<u8>89     fn as_vec_mut(&mut self) -> &mut Vec<u8> {
90         self
91     }
into_vec(self) -> Vec<u8>92     fn into_vec(self) -> Vec<u8> {
93         self
94     }
95 }
96 
97 /// Ensure that callers cannot implement `ByteSlice` by making an
98 /// umplementable trait its super trait.
99 pub trait Sealed {}
100 impl Sealed for Vec<u8> {}
101 
102 /// A trait that extends a slice of bytes with string oriented methods.
103 pub trait ByteVec: Sealed {
104     /// A method for accessing the raw vector bytes of this type. This is
105     /// always a no-op and callers shouldn't care about it. This only exists
106     /// for making the extension trait work.
107     #[doc(hidden)]
as_vec(&self) -> &Vec<u8>108     fn as_vec(&self) -> &Vec<u8>;
109 
110     /// A method for accessing the raw vector bytes of this type, mutably. This
111     /// is always a no-op and callers shouldn't care about it. This only exists
112     /// for making the extension trait work.
113     #[doc(hidden)]
as_vec_mut(&mut self) -> &mut Vec<u8>114     fn as_vec_mut(&mut self) -> &mut Vec<u8>;
115 
116     /// A method for consuming ownership of this vector. This is always a no-op
117     /// and callers shouldn't care about it. This only exists for making the
118     /// extension trait work.
119     #[doc(hidden)]
into_vec(self) -> Vec<u8> where Self: Sized120     fn into_vec(self) -> Vec<u8>
121     where
122         Self: Sized;
123 
124     /// Create a new owned byte string from the given byte slice.
125     ///
126     /// # Examples
127     ///
128     /// Basic usage:
129     ///
130     /// ```
131     /// use bstr::{B, ByteVec};
132     ///
133     /// let s = <Vec<u8>>::from_slice(b"abc");
134     /// assert_eq!(s, B("abc"));
135     /// ```
from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8>136     fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
137         bytes.as_ref().to_vec()
138     }
139 
140     /// Create a new byte string from an owned OS string.
141     ///
142     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
143     /// this returns the original OS string if it is not valid UTF-8.
144     ///
145     /// # Examples
146     ///
147     /// Basic usage:
148     ///
149     /// ```
150     /// use std::ffi::OsString;
151     ///
152     /// use bstr::{B, ByteVec};
153     ///
154     /// let os_str = OsString::from("foo");
155     /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
156     /// assert_eq!(bs, B("foo"));
157     /// ```
158     #[inline]
from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString>159     fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
160         #[cfg(unix)]
161         #[inline]
162         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
163             use std::os::unix::ffi::OsStringExt;
164 
165             Ok(Vec::from(os_str.into_vec()))
166         }
167 
168         #[cfg(not(unix))]
169         #[inline]
170         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
171             os_str.into_string().map(Vec::from)
172         }
173 
174         imp(os_str)
175     }
176 
177     /// Lossily create a new byte string from an OS string slice.
178     ///
179     /// On Unix, this always succeeds, is zero cost and always returns a slice.
180     /// On non-Unix systems, this does a UTF-8 check. If the given OS string
181     /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
182     /// (with invalid bytes replaced by the Unicode replacement codepoint).
183     ///
184     /// # Examples
185     ///
186     /// Basic usage:
187     ///
188     /// ```
189     /// use std::ffi::OsStr;
190     ///
191     /// use bstr::{B, ByteVec};
192     ///
193     /// let os_str = OsStr::new("foo");
194     /// let bs = Vec::from_os_str_lossy(os_str);
195     /// assert_eq!(bs, B("foo"));
196     /// ```
197     #[inline]
from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]>198     fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
199         #[cfg(unix)]
200         #[inline]
201         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
202             use std::os::unix::ffi::OsStrExt;
203 
204             Cow::Borrowed(os_str.as_bytes())
205         }
206 
207         #[cfg(not(unix))]
208         #[inline]
209         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
210             match os_str.to_string_lossy() {
211                 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
212                 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
213             }
214         }
215 
216         imp(os_str)
217     }
218 
219     /// Create a new byte string from an owned file path.
220     ///
221     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
222     /// this returns the original path if it is not valid UTF-8.
223     ///
224     /// # Examples
225     ///
226     /// Basic usage:
227     ///
228     /// ```
229     /// use std::path::PathBuf;
230     ///
231     /// use bstr::{B, ByteVec};
232     ///
233     /// let path = PathBuf::from("foo");
234     /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
235     /// assert_eq!(bs, B("foo"));
236     /// ```
237     #[inline]
from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf>238     fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
239         Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
240     }
241 
242     /// Lossily create a new byte string from a file path.
243     ///
244     /// On Unix, this always succeeds, is zero cost and always returns a slice.
245     /// On non-Unix systems, this does a UTF-8 check. If the given path is not
246     /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
247     /// bytes replaced by the Unicode replacement codepoint).
248     ///
249     /// # Examples
250     ///
251     /// Basic usage:
252     ///
253     /// ```
254     /// use std::path::Path;
255     ///
256     /// use bstr::{B, ByteVec};
257     ///
258     /// let path = Path::new("foo");
259     /// let bs = Vec::from_path_lossy(path);
260     /// assert_eq!(bs, B("foo"));
261     /// ```
262     #[inline]
from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]>263     fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
264         Vec::from_os_str_lossy(path.as_os_str())
265     }
266 
267     /// Appends the given byte to the end of this byte string.
268     ///
269     /// Note that this is equivalent to the generic `Vec::push` method. This
270     /// method is provided to permit callers to explicitly differentiate
271     /// between pushing bytes, codepoints and strings.
272     ///
273     /// # Examples
274     ///
275     /// Basic usage:
276     ///
277     /// ```
278     /// use bstr::ByteVec;
279     ///
280     /// let mut s = <Vec<u8>>::from("abc");
281     /// s.push_byte(b'\xE2');
282     /// s.push_byte(b'\x98');
283     /// s.push_byte(b'\x83');
284     /// assert_eq!(s, "abc☃".as_bytes());
285     /// ```
286     #[inline]
push_byte(&mut self, byte: u8)287     fn push_byte(&mut self, byte: u8) {
288         self.as_vec_mut().push(byte);
289     }
290 
291     /// Appends the given `char` to the end of this byte string.
292     ///
293     /// # Examples
294     ///
295     /// Basic usage:
296     ///
297     /// ```
298     /// use bstr::ByteVec;
299     ///
300     /// let mut s = <Vec<u8>>::from("abc");
301     /// s.push_char('1');
302     /// s.push_char('2');
303     /// s.push_char('3');
304     /// assert_eq!(s, "abc123".as_bytes());
305     /// ```
306     #[inline]
push_char(&mut self, ch: char)307     fn push_char(&mut self, ch: char) {
308         if ch.len_utf8() == 1 {
309             self.push_byte(ch as u8);
310             return;
311         }
312         self.as_vec_mut()
313             .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
314     }
315 
316     /// Appends the given slice to the end of this byte string. This accepts
317     /// any type that be converted to a `&[u8]`. This includes, but is not
318     /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
319     ///
320     /// # Examples
321     ///
322     /// Basic usage:
323     ///
324     /// ```
325     /// use bstr::ByteVec;
326     ///
327     /// let mut s = <Vec<u8>>::from("abc");
328     /// s.push_str(b"123");
329     /// assert_eq!(s, "abc123".as_bytes());
330     /// ```
331     #[inline]
push_str<B: AsRef<[u8]>>(&mut self, bytes: B)332     fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
333         self.as_vec_mut().extend_from_slice(bytes.as_ref());
334     }
335 
336     /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
337     /// valid UTF-8.
338     ///
339     /// If it is not valid UTF-8, then a
340     /// [`FromUtf8Error`](struct.FromUtf8Error.html)
341     /// is returned. (This error can be used to examine why UTF-8 validation
342     /// failed, or to regain the original byte string.)
343     ///
344     /// # Examples
345     ///
346     /// Basic usage:
347     ///
348     /// ```
349     /// use bstr::ByteVec;
350     ///
351     /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
352     /// let bytes = Vec::from("hello");
353     /// let string = bytes.into_string()?;
354     ///
355     /// assert_eq!("hello", string);
356     /// # Ok(()) }; example().unwrap()
357     /// ```
358     ///
359     /// If this byte string is not valid UTF-8, then an error will be returned.
360     /// That error can then be used to inspect the location at which invalid
361     /// UTF-8 was found, or to regain the original byte string:
362     ///
363     /// ```
364     /// use bstr::{B, ByteVec};
365     ///
366     /// let bytes = Vec::from_slice(b"foo\xFFbar");
367     /// let err = bytes.into_string().unwrap_err();
368     ///
369     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
370     /// assert_eq!(err.utf8_error().error_len(), Some(1));
371     ///
372     /// // At no point in this example is an allocation performed.
373     /// let bytes = Vec::from(err.into_vec());
374     /// assert_eq!(bytes, B(b"foo\xFFbar"));
375     /// ```
376     #[inline]
into_string(self) -> Result<String, FromUtf8Error> where Self: Sized,377     fn into_string(self) -> Result<String, FromUtf8Error>
378     where
379         Self: Sized,
380     {
381         match utf8::validate(self.as_vec()) {
382             Err(err) => {
383                 Err(FromUtf8Error { original: self.into_vec(), err: err })
384             }
385             Ok(()) => {
386                 // SAFETY: This is safe because of the guarantees provided by
387                 // utf8::validate.
388                 unsafe { Ok(self.into_string_unchecked()) }
389             }
390         }
391     }
392 
393     /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
394     /// contains invalid UTF-8, then the invalid bytes are replaced with the
395     /// Unicode replacement codepoint.
396     ///
397     /// # Examples
398     ///
399     /// Basic usage:
400     ///
401     /// ```
402     /// use bstr::ByteVec;
403     ///
404     /// let bytes = Vec::from_slice(b"foo\xFFbar");
405     /// let string = bytes.into_string_lossy();
406     /// assert_eq!(string, "foo\u{FFFD}bar");
407     /// ```
408     #[inline]
into_string_lossy(self) -> String where Self: Sized,409     fn into_string_lossy(self) -> String
410     where
411         Self: Sized,
412     {
413         let v = self.as_vec();
414         if let Ok(allutf8) = v.to_str() {
415             return allutf8.to_string();
416         }
417         let mut dst = String::with_capacity(v.len());
418         for ch in v.chars() {
419             dst.push(ch);
420         }
421         dst
422     }
423 
424     /// Unsafely convert this byte string into a `String`, without checking for
425     /// valid UTF-8.
426     ///
427     /// # Safety
428     ///
429     /// Callers *must* ensure that this byte string is valid UTF-8 before
430     /// calling this method. Converting a byte string into a `String` that is
431     /// not valid UTF-8 is considered undefined behavior.
432     ///
433     /// This routine is useful in performance sensitive contexts where the
434     /// UTF-8 validity of the byte string is already known and it is
435     /// undesirable to pay the cost of an additional UTF-8 validation check
436     /// that [`into_string`](#method.into_string) performs.
437     ///
438     /// # Examples
439     ///
440     /// Basic usage:
441     ///
442     /// ```
443     /// use bstr::ByteVec;
444     ///
445     /// // SAFETY: This is safe because string literals are guaranteed to be
446     /// // valid UTF-8 by the Rust compiler.
447     /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
448     /// assert_eq!("☃βツ", s);
449     /// ```
into_string_unchecked(self) -> String where Self: Sized,450     unsafe fn into_string_unchecked(self) -> String
451     where
452         Self: Sized,
453     {
454         String::from_utf8_unchecked(self.into_vec())
455     }
456 
457     /// Converts this byte string into an OS string, in place.
458     ///
459     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
460     /// this returns the original byte string if it is not valid UTF-8.
461     ///
462     /// # Examples
463     ///
464     /// Basic usage:
465     ///
466     /// ```
467     /// use std::ffi::OsStr;
468     ///
469     /// use bstr::ByteVec;
470     ///
471     /// let bs = Vec::from("foo");
472     /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
473     /// assert_eq!(os_str, OsStr::new("foo"));
474     /// ```
475     #[inline]
into_os_string(self) -> Result<OsString, Vec<u8>> where Self: Sized,476     fn into_os_string(self) -> Result<OsString, Vec<u8>>
477     where
478         Self: Sized,
479     {
480         #[cfg(unix)]
481         #[inline]
482         fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
483             use std::os::unix::ffi::OsStringExt;
484 
485             Ok(OsString::from_vec(v))
486         }
487 
488         #[cfg(not(unix))]
489         #[inline]
490         fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
491             match v.into_string() {
492                 Ok(s) => Ok(OsString::from(s)),
493                 Err(err) => Err(err.into_vec()),
494             }
495         }
496 
497         imp(self.into_vec())
498     }
499 
500     /// Lossily converts this byte string into an OS string, in place.
501     ///
502     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
503     /// this will perform a UTF-8 check and lossily convert this byte string
504     /// into valid UTF-8 using the Unicode replacement codepoint.
505     ///
506     /// Note that this can prevent the correct roundtripping of file paths on
507     /// non-Unix systems such as Windows, where file paths are an arbitrary
508     /// sequence of 16-bit integers.
509     ///
510     /// # Examples
511     ///
512     /// Basic usage:
513     ///
514     /// ```
515     /// use bstr::ByteVec;
516     ///
517     /// let bs = Vec::from_slice(b"foo\xFFbar");
518     /// let os_str = bs.into_os_string_lossy();
519     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
520     /// ```
521     #[inline]
into_os_string_lossy(self) -> OsString where Self: Sized,522     fn into_os_string_lossy(self) -> OsString
523     where
524         Self: Sized,
525     {
526         #[cfg(unix)]
527         #[inline]
528         fn imp(v: Vec<u8>) -> OsString {
529             use std::os::unix::ffi::OsStringExt;
530 
531             OsString::from_vec(v)
532         }
533 
534         #[cfg(not(unix))]
535         #[inline]
536         fn imp(v: Vec<u8>) -> OsString {
537             OsString::from(v.into_string_lossy())
538         }
539 
540         imp(self.into_vec())
541     }
542 
543     /// Converts this byte string into an owned file path, in place.
544     ///
545     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
546     /// this returns the original byte string if it is not valid UTF-8.
547     ///
548     /// # Examples
549     ///
550     /// Basic usage:
551     ///
552     /// ```
553     /// use bstr::ByteVec;
554     ///
555     /// let bs = Vec::from("foo");
556     /// let path = bs.into_path_buf().expect("should be valid UTF-8");
557     /// assert_eq!(path.as_os_str(), "foo");
558     /// ```
559     #[inline]
into_path_buf(self) -> Result<PathBuf, Vec<u8>> where Self: Sized,560     fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
561     where
562         Self: Sized,
563     {
564         self.into_os_string().map(PathBuf::from)
565     }
566 
567     /// Lossily converts this byte string into an owned file path, in place.
568     ///
569     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
570     /// this will perform a UTF-8 check and lossily convert this byte string
571     /// into valid UTF-8 using the Unicode replacement codepoint.
572     ///
573     /// Note that this can prevent the correct roundtripping of file paths on
574     /// non-Unix systems such as Windows, where file paths are an arbitrary
575     /// sequence of 16-bit integers.
576     ///
577     /// # Examples
578     ///
579     /// Basic usage:
580     ///
581     /// ```
582     /// use bstr::ByteVec;
583     ///
584     /// let bs = Vec::from_slice(b"foo\xFFbar");
585     /// let path = bs.into_path_buf_lossy();
586     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
587     /// ```
588     #[inline]
into_path_buf_lossy(self) -> PathBuf where Self: Sized,589     fn into_path_buf_lossy(self) -> PathBuf
590     where
591         Self: Sized,
592     {
593         PathBuf::from(self.into_os_string_lossy())
594     }
595 
596     /// Removes the last byte from this `Vec<u8>` and returns it.
597     ///
598     /// If this byte string is empty, then `None` is returned.
599     ///
600     /// If the last codepoint in this byte string is not ASCII, then removing
601     /// the last byte could make this byte string contain invalid UTF-8.
602     ///
603     /// Note that this is equivalent to the generic `Vec::pop` method. This
604     /// method is provided to permit callers to explicitly differentiate
605     /// between popping bytes and codepoints.
606     ///
607     /// # Examples
608     ///
609     /// Basic usage:
610     ///
611     /// ```
612     /// use bstr::ByteVec;
613     ///
614     /// let mut s = Vec::from("foo");
615     /// assert_eq!(s.pop_byte(), Some(b'o'));
616     /// assert_eq!(s.pop_byte(), Some(b'o'));
617     /// assert_eq!(s.pop_byte(), Some(b'f'));
618     /// assert_eq!(s.pop_byte(), None);
619     /// ```
620     #[inline]
pop_byte(&mut self) -> Option<u8>621     fn pop_byte(&mut self) -> Option<u8> {
622         self.as_vec_mut().pop()
623     }
624 
625     /// Removes the last codepoint from this `Vec<u8>` and returns it.
626     ///
627     /// If this byte string is empty, then `None` is returned. If the last
628     /// bytes of this byte string do not correspond to a valid UTF-8 code unit
629     /// sequence, then the Unicode replacement codepoint is yielded instead in
630     /// accordance with the
631     /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
632     ///
633     /// # Examples
634     ///
635     /// Basic usage:
636     ///
637     /// ```
638     /// use bstr::ByteVec;
639     ///
640     /// let mut s = Vec::from("foo");
641     /// assert_eq!(s.pop_char(), Some('o'));
642     /// assert_eq!(s.pop_char(), Some('o'));
643     /// assert_eq!(s.pop_char(), Some('f'));
644     /// assert_eq!(s.pop_char(), None);
645     /// ```
646     ///
647     /// This shows the replacement codepoint substitution policy. Note that
648     /// the first pop yields a replacement codepoint but actually removes two
649     /// bytes. This is in contrast with subsequent pops when encountering
650     /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
651     /// code unit sequence.
652     ///
653     /// ```
654     /// use bstr::ByteVec;
655     ///
656     /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
657     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
658     /// assert_eq!(s.pop_char(), Some('o'));
659     /// assert_eq!(s.pop_char(), Some('o'));
660     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
661     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
662     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
663     /// assert_eq!(s.pop_char(), Some('f'));
664     /// assert_eq!(s.pop_char(), None);
665     /// ```
666     #[inline]
pop_char(&mut self) -> Option<char>667     fn pop_char(&mut self) -> Option<char> {
668         let (ch, size) = utf8::decode_last_lossy(self.as_vec());
669         if size == 0 {
670             return None;
671         }
672         let new_len = self.as_vec().len() - size;
673         self.as_vec_mut().truncate(new_len);
674         Some(ch)
675     }
676 
677     /// Removes a `char` from this `Vec<u8>` at the given byte position and
678     /// returns it.
679     ///
680     /// If the bytes at the given position do not lead to a valid UTF-8 code
681     /// unit sequence, then a
682     /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
683     ///
684     /// # Panics
685     ///
686     /// Panics if `at` is larger than or equal to this byte string's length.
687     ///
688     /// # Examples
689     ///
690     /// Basic usage:
691     ///
692     /// ```
693     /// use bstr::ByteVec;
694     ///
695     /// let mut s = Vec::from("foo☃bar");
696     /// assert_eq!(s.remove_char(3), '☃');
697     /// assert_eq!(s, b"foobar");
698     /// ```
699     ///
700     /// This example shows how the Unicode replacement codepoint policy is
701     /// used:
702     ///
703     /// ```
704     /// use bstr::ByteVec;
705     ///
706     /// let mut s = Vec::from_slice(b"foo\xFFbar");
707     /// assert_eq!(s.remove_char(3), '\u{FFFD}');
708     /// assert_eq!(s, b"foobar");
709     /// ```
710     #[inline]
remove_char(&mut self, at: usize) -> char711     fn remove_char(&mut self, at: usize) -> char {
712         let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
713         assert!(
714             size > 0,
715             "expected {} to be less than {}",
716             at,
717             self.as_vec().len(),
718         );
719         self.as_vec_mut().drain(at..at + size);
720         ch
721     }
722 
723     /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
724     /// position.
725     ///
726     /// This is an `O(n)` operation as it may copy a number of elements in this
727     /// byte string proportional to its length.
728     ///
729     /// # Panics
730     ///
731     /// Panics if `at` is larger than the byte string's length.
732     ///
733     /// # Examples
734     ///
735     /// Basic usage:
736     ///
737     /// ```
738     /// use bstr::ByteVec;
739     ///
740     /// let mut s = Vec::from("foobar");
741     /// s.insert_char(3, '☃');
742     /// assert_eq!(s, "foo☃bar".as_bytes());
743     /// ```
744     #[inline]
insert_char(&mut self, at: usize, ch: char)745     fn insert_char(&mut self, at: usize, ch: char) {
746         self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
747     }
748 
749     /// Inserts the given byte string into this byte string at a particular
750     /// byte position.
751     ///
752     /// This is an `O(n)` operation as it may copy a number of elements in this
753     /// byte string proportional to its length.
754     ///
755     /// The given byte string may be any type that can be cheaply converted
756     /// into a `&[u8]`. This includes, but is not limited to, `&str` and
757     /// `&[u8]`.
758     ///
759     /// # Panics
760     ///
761     /// Panics if `at` is larger than the byte string's length.
762     ///
763     /// # Examples
764     ///
765     /// Basic usage:
766     ///
767     /// ```
768     /// use bstr::ByteVec;
769     ///
770     /// let mut s = Vec::from("foobar");
771     /// s.insert_str(3, "☃☃☃");
772     /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
773     /// ```
774     #[inline]
insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B)775     fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
776         let bytes = bytes.as_ref();
777         let len = self.as_vec().len();
778         assert!(at <= len, "expected {} to be <= {}", at, len);
779 
780         // SAFETY: We'd like to efficiently splice in the given bytes into
781         // this byte string. Since we are only working with `u8` elements here,
782         // we only need to consider whether our bounds are correct and whether
783         // our byte string has enough space.
784         self.as_vec_mut().reserve(bytes.len());
785         unsafe {
786             // Shift bytes after `at` over by the length of `bytes` to make
787             // room for it. This requires referencing two regions of memory
788             // that may overlap, so we use ptr::copy.
789             ptr::copy(
790                 self.as_vec().as_ptr().add(at),
791                 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
792                 len - at,
793             );
794             // Now copy the bytes given into the room we made above. In this
795             // case, we know that the given bytes cannot possibly overlap
796             // with this byte string since we have a mutable borrow of the
797             // latter. Thus, we can use a nonoverlapping copy.
798             ptr::copy_nonoverlapping(
799                 bytes.as_ptr(),
800                 self.as_vec_mut().as_mut_ptr().add(at),
801                 bytes.len(),
802             );
803             self.as_vec_mut().set_len(len + bytes.len());
804         }
805     }
806 
807     /// Removes the specified range in this byte string and replaces it with
808     /// the given bytes. The given bytes do not need to have the same length
809     /// as the range provided.
810     ///
811     /// # Panics
812     ///
813     /// Panics if the given range is invalid.
814     ///
815     /// # Examples
816     ///
817     /// Basic usage:
818     ///
819     /// ```
820     /// use bstr::ByteVec;
821     ///
822     /// let mut s = Vec::from("foobar");
823     /// s.replace_range(2..4, "xxxxx");
824     /// assert_eq!(s, "foxxxxxar".as_bytes());
825     /// ```
826     #[inline]
replace_range<R, B>(&mut self, range: R, replace_with: B) where R: ops::RangeBounds<usize>, B: AsRef<[u8]>,827     fn replace_range<R, B>(&mut self, range: R, replace_with: B)
828     where
829         R: ops::RangeBounds<usize>,
830         B: AsRef<[u8]>,
831     {
832         self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
833     }
834 
835     /// Creates a draining iterator that removes the specified range in this
836     /// `Vec<u8>` and yields each of the removed bytes.
837     ///
838     /// Note that the elements specified by the given range are removed
839     /// regardless of whether the returned iterator is fully exhausted.
840     ///
841     /// Also note that is is unspecified how many bytes are removed from the
842     /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
843     ///
844     /// # Panics
845     ///
846     /// Panics if the given range is not valid.
847     ///
848     /// # Examples
849     ///
850     /// Basic usage:
851     ///
852     /// ```
853     /// use bstr::ByteVec;
854     ///
855     /// let mut s = Vec::from("foobar");
856     /// {
857     ///     let mut drainer = s.drain_bytes(2..4);
858     ///     assert_eq!(drainer.next(), Some(b'o'));
859     ///     assert_eq!(drainer.next(), Some(b'b'));
860     ///     assert_eq!(drainer.next(), None);
861     /// }
862     /// assert_eq!(s, "foar".as_bytes());
863     /// ```
864     #[inline]
drain_bytes<R>(&mut self, range: R) -> DrainBytes where R: ops::RangeBounds<usize>,865     fn drain_bytes<R>(&mut self, range: R) -> DrainBytes
866     where
867         R: ops::RangeBounds<usize>,
868     {
869         DrainBytes { it: self.as_vec_mut().drain(range) }
870     }
871 }
872 
873 /// A draining byte oriented iterator for `Vec<u8>`.
874 ///
875 /// This iterator is created by
876 /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
877 ///
878 /// # Examples
879 ///
880 /// Basic usage:
881 ///
882 /// ```
883 /// use bstr::ByteVec;
884 ///
885 /// let mut s = Vec::from("foobar");
886 /// {
887 ///     let mut drainer = s.drain_bytes(2..4);
888 ///     assert_eq!(drainer.next(), Some(b'o'));
889 ///     assert_eq!(drainer.next(), Some(b'b'));
890 ///     assert_eq!(drainer.next(), None);
891 /// }
892 /// assert_eq!(s, "foar".as_bytes());
893 /// ```
894 #[derive(Debug)]
895 pub struct DrainBytes<'a> {
896     it: vec::Drain<'a, u8>,
897 }
898 
899 impl<'a> iter::FusedIterator for DrainBytes<'a> {}
900 
901 impl<'a> Iterator for DrainBytes<'a> {
902     type Item = u8;
903 
904     #[inline]
next(&mut self) -> Option<u8>905     fn next(&mut self) -> Option<u8> {
906         self.it.next()
907     }
908 }
909 
910 impl<'a> DoubleEndedIterator for DrainBytes<'a> {
911     #[inline]
next_back(&mut self) -> Option<u8>912     fn next_back(&mut self) -> Option<u8> {
913         self.it.next_back()
914     }
915 }
916 
917 impl<'a> ExactSizeIterator for DrainBytes<'a> {
918     #[inline]
len(&self) -> usize919     fn len(&self) -> usize {
920         self.it.len()
921     }
922 }
923 
924 /// An error that may occur when converting a `Vec<u8>` to a `String`.
925 ///
926 /// This error includes the original `Vec<u8>` that failed to convert to a
927 /// `String`. This permits callers to recover the allocation used even if it
928 /// it not valid UTF-8.
929 ///
930 /// # Examples
931 ///
932 /// Basic usage:
933 ///
934 /// ```
935 /// use bstr::{B, ByteVec};
936 ///
937 /// let bytes = Vec::from_slice(b"foo\xFFbar");
938 /// let err = bytes.into_string().unwrap_err();
939 ///
940 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
941 /// assert_eq!(err.utf8_error().error_len(), Some(1));
942 ///
943 /// // At no point in this example is an allocation performed.
944 /// let bytes = Vec::from(err.into_vec());
945 /// assert_eq!(bytes, B(b"foo\xFFbar"));
946 /// ```
947 #[derive(Debug, Eq, PartialEq)]
948 pub struct FromUtf8Error {
949     original: Vec<u8>,
950     err: Utf8Error,
951 }
952 
953 impl FromUtf8Error {
954     /// Return the original bytes as a slice that failed to convert to a
955     /// `String`.
956     ///
957     /// # Examples
958     ///
959     /// Basic usage:
960     ///
961     /// ```
962     /// use bstr::{B, ByteVec};
963     ///
964     /// let bytes = Vec::from_slice(b"foo\xFFbar");
965     /// let err = bytes.into_string().unwrap_err();
966     ///
967     /// // At no point in this example is an allocation performed.
968     /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
969     /// ```
970     #[inline]
as_bytes(&self) -> &[u8]971     pub fn as_bytes(&self) -> &[u8] {
972         &self.original
973     }
974 
975     /// Consume this error and return the original byte string that failed to
976     /// convert to a `String`.
977     ///
978     /// # Examples
979     ///
980     /// Basic usage:
981     ///
982     /// ```
983     /// use bstr::{B, ByteVec};
984     ///
985     /// let bytes = Vec::from_slice(b"foo\xFFbar");
986     /// let err = bytes.into_string().unwrap_err();
987     /// let original = err.into_vec();
988     ///
989     /// // At no point in this example is an allocation performed.
990     /// assert_eq!(original, B(b"foo\xFFbar"));
991     /// ```
992     #[inline]
into_vec(self) -> Vec<u8>993     pub fn into_vec(self) -> Vec<u8> {
994         self.original
995     }
996 
997     /// Return the underlying UTF-8 error that occurred. This error provides
998     /// information on the nature and location of the invalid UTF-8 detected.
999     ///
1000     /// # Examples
1001     ///
1002     /// Basic usage:
1003     ///
1004     /// ```
1005     /// use bstr::{B, ByteVec};
1006     ///
1007     /// let bytes = Vec::from_slice(b"foo\xFFbar");
1008     /// let err = bytes.into_string().unwrap_err();
1009     ///
1010     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1011     /// assert_eq!(err.utf8_error().error_len(), Some(1));
1012     /// ```
1013     #[inline]
utf8_error(&self) -> &Utf8Error1014     pub fn utf8_error(&self) -> &Utf8Error {
1015         &self.err
1016     }
1017 }
1018 
1019 impl error::Error for FromUtf8Error {
1020     #[inline]
description(&self) -> &str1021     fn description(&self) -> &str {
1022         "invalid UTF-8 vector"
1023     }
1024 }
1025 
1026 impl fmt::Display for FromUtf8Error {
1027     #[inline]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1028     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1029         write!(f, "{}", self.err)
1030     }
1031 }
1032 
1033 #[cfg(test)]
1034 mod tests {
1035     use ext_slice::B;
1036     use ext_vec::ByteVec;
1037 
1038     #[test]
insert()1039     fn insert() {
1040         let mut s = vec![];
1041         s.insert_str(0, "foo");
1042         assert_eq!(s, "foo".as_bytes());
1043 
1044         let mut s = Vec::from("a");
1045         s.insert_str(0, "foo");
1046         assert_eq!(s, "fooa".as_bytes());
1047 
1048         let mut s = Vec::from("a");
1049         s.insert_str(1, "foo");
1050         assert_eq!(s, "afoo".as_bytes());
1051 
1052         let mut s = Vec::from("foobar");
1053         s.insert_str(3, "quux");
1054         assert_eq!(s, "fooquuxbar".as_bytes());
1055 
1056         let mut s = Vec::from("foobar");
1057         s.insert_str(3, "x");
1058         assert_eq!(s, "fooxbar".as_bytes());
1059 
1060         let mut s = Vec::from("foobar");
1061         s.insert_str(0, "x");
1062         assert_eq!(s, "xfoobar".as_bytes());
1063 
1064         let mut s = Vec::from("foobar");
1065         s.insert_str(6, "x");
1066         assert_eq!(s, "foobarx".as_bytes());
1067 
1068         let mut s = Vec::from("foobar");
1069         s.insert_str(3, "quuxbazquux");
1070         assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1071     }
1072 
1073     #[test]
1074     #[should_panic]
insert_fail1()1075     fn insert_fail1() {
1076         let mut s = vec![];
1077         s.insert_str(1, "foo");
1078     }
1079 
1080     #[test]
1081     #[should_panic]
insert_fail2()1082     fn insert_fail2() {
1083         let mut s = Vec::from("a");
1084         s.insert_str(2, "foo");
1085     }
1086 
1087     #[test]
1088     #[should_panic]
insert_fail3()1089     fn insert_fail3() {
1090         let mut s = Vec::from("foobar");
1091         s.insert_str(7, "foo");
1092     }
1093 }
1094