1 #![allow(unused_imports)]
2
3 use std::borrow::Cow;
4 use std::error;
5 use std::ffi::{OsStr, OsString};
6 use std::fmt;
7 use std::iter;
8 use std::ops;
9 use std::path::{Path, PathBuf};
10 use std::ptr;
11 use std::str;
12 use std::vec;
13
14 use ext_slice::ByteSlice;
15 use utf8::{self, Utf8Error};
16
17 /// Concatenate the elements given by the iterator together into a single
18 /// `Vec<u8>`.
19 ///
20 /// The elements may be any type that can be cheaply converted into an `&[u8]`.
21 /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22 ///
23 /// # Examples
24 ///
25 /// Basic usage:
26 ///
27 /// ```
28 /// use bstr;
29 ///
30 /// let s = bstr::concat(&["foo", "bar", "baz"]);
31 /// assert_eq!(s, "foobarbaz".as_bytes());
32 /// ```
33 #[inline]
concat<T, I>(elements: I) -> Vec<u8> where T: AsRef<[u8]>, I: IntoIterator<Item = T>,34 pub fn concat<T, I>(elements: I) -> Vec<u8>
35 where
36 T: AsRef<[u8]>,
37 I: IntoIterator<Item = T>,
38 {
39 let mut dest = vec![];
40 for element in elements {
41 dest.push_str(element);
42 }
43 dest
44 }
45
46 /// Join the elements given by the iterator with the given separator into a
47 /// single `Vec<u8>`.
48 ///
49 /// Both the separator and the elements may be any type that can be cheaply
50 /// converted into an `&[u8]`. This includes, but is not limited to,
51 /// `&str`, `&BStr` and `&[u8]` itself.
52 ///
53 /// # Examples
54 ///
55 /// Basic usage:
56 ///
57 /// ```
58 /// use bstr;
59 ///
60 /// let s = bstr::join(",", &["foo", "bar", "baz"]);
61 /// assert_eq!(s, "foo,bar,baz".as_bytes());
62 /// ```
63 #[inline]
join<B, T, I>(separator: B, elements: I) -> Vec<u8> where B: AsRef<[u8]>, T: AsRef<[u8]>, I: IntoIterator<Item = T>,64 pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65 where
66 B: AsRef<[u8]>,
67 T: AsRef<[u8]>,
68 I: IntoIterator<Item = T>,
69 {
70 let mut it = elements.into_iter();
71 let mut dest = vec![];
72 match it.next() {
73 None => return dest,
74 Some(first) => {
75 dest.push_str(first);
76 }
77 }
78 for element in it {
79 dest.push_str(&separator);
80 dest.push_str(element);
81 }
82 dest
83 }
84
85 impl ByteVec for Vec<u8> {
as_vec(&self) -> &Vec<u8>86 fn as_vec(&self) -> &Vec<u8> {
87 self
88 }
as_vec_mut(&mut self) -> &mut Vec<u8>89 fn as_vec_mut(&mut self) -> &mut Vec<u8> {
90 self
91 }
into_vec(self) -> Vec<u8>92 fn into_vec(self) -> Vec<u8> {
93 self
94 }
95 }
96
97 /// Ensure that callers cannot implement `ByteSlice` by making an
98 /// umplementable trait its super trait.
99 pub trait Sealed {}
100 impl Sealed for Vec<u8> {}
101
102 /// A trait that extends a slice of bytes with string oriented methods.
103 pub trait ByteVec: Sealed {
104 /// A method for accessing the raw vector bytes of this type. This is
105 /// always a no-op and callers shouldn't care about it. This only exists
106 /// for making the extension trait work.
107 #[doc(hidden)]
as_vec(&self) -> &Vec<u8>108 fn as_vec(&self) -> &Vec<u8>;
109
110 /// A method for accessing the raw vector bytes of this type, mutably. This
111 /// is always a no-op and callers shouldn't care about it. This only exists
112 /// for making the extension trait work.
113 #[doc(hidden)]
as_vec_mut(&mut self) -> &mut Vec<u8>114 fn as_vec_mut(&mut self) -> &mut Vec<u8>;
115
116 /// A method for consuming ownership of this vector. This is always a no-op
117 /// and callers shouldn't care about it. This only exists for making the
118 /// extension trait work.
119 #[doc(hidden)]
into_vec(self) -> Vec<u8> where Self: Sized120 fn into_vec(self) -> Vec<u8>
121 where
122 Self: Sized;
123
124 /// Create a new owned byte string from the given byte slice.
125 ///
126 /// # Examples
127 ///
128 /// Basic usage:
129 ///
130 /// ```
131 /// use bstr::{B, ByteVec};
132 ///
133 /// let s = <Vec<u8>>::from_slice(b"abc");
134 /// assert_eq!(s, B("abc"));
135 /// ```
from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8>136 fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
137 bytes.as_ref().to_vec()
138 }
139
140 /// Create a new byte string from an owned OS string.
141 ///
142 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
143 /// this returns the original OS string if it is not valid UTF-8.
144 ///
145 /// # Examples
146 ///
147 /// Basic usage:
148 ///
149 /// ```
150 /// use std::ffi::OsString;
151 ///
152 /// use bstr::{B, ByteVec};
153 ///
154 /// let os_str = OsString::from("foo");
155 /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
156 /// assert_eq!(bs, B("foo"));
157 /// ```
158 #[inline]
from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString>159 fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
160 #[cfg(unix)]
161 #[inline]
162 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
163 use std::os::unix::ffi::OsStringExt;
164
165 Ok(Vec::from(os_str.into_vec()))
166 }
167
168 #[cfg(not(unix))]
169 #[inline]
170 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
171 os_str.into_string().map(Vec::from)
172 }
173
174 imp(os_str)
175 }
176
177 /// Lossily create a new byte string from an OS string slice.
178 ///
179 /// On Unix, this always succeeds, is zero cost and always returns a slice.
180 /// On non-Unix systems, this does a UTF-8 check. If the given OS string
181 /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
182 /// (with invalid bytes replaced by the Unicode replacement codepoint).
183 ///
184 /// # Examples
185 ///
186 /// Basic usage:
187 ///
188 /// ```
189 /// use std::ffi::OsStr;
190 ///
191 /// use bstr::{B, ByteVec};
192 ///
193 /// let os_str = OsStr::new("foo");
194 /// let bs = Vec::from_os_str_lossy(os_str);
195 /// assert_eq!(bs, B("foo"));
196 /// ```
197 #[inline]
from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]>198 fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
199 #[cfg(unix)]
200 #[inline]
201 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
202 use std::os::unix::ffi::OsStrExt;
203
204 Cow::Borrowed(os_str.as_bytes())
205 }
206
207 #[cfg(not(unix))]
208 #[inline]
209 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
210 match os_str.to_string_lossy() {
211 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
212 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
213 }
214 }
215
216 imp(os_str)
217 }
218
219 /// Create a new byte string from an owned file path.
220 ///
221 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
222 /// this returns the original path if it is not valid UTF-8.
223 ///
224 /// # Examples
225 ///
226 /// Basic usage:
227 ///
228 /// ```
229 /// use std::path::PathBuf;
230 ///
231 /// use bstr::{B, ByteVec};
232 ///
233 /// let path = PathBuf::from("foo");
234 /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
235 /// assert_eq!(bs, B("foo"));
236 /// ```
237 #[inline]
from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf>238 fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
239 Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
240 }
241
242 /// Lossily create a new byte string from a file path.
243 ///
244 /// On Unix, this always succeeds, is zero cost and always returns a slice.
245 /// On non-Unix systems, this does a UTF-8 check. If the given path is not
246 /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
247 /// bytes replaced by the Unicode replacement codepoint).
248 ///
249 /// # Examples
250 ///
251 /// Basic usage:
252 ///
253 /// ```
254 /// use std::path::Path;
255 ///
256 /// use bstr::{B, ByteVec};
257 ///
258 /// let path = Path::new("foo");
259 /// let bs = Vec::from_path_lossy(path);
260 /// assert_eq!(bs, B("foo"));
261 /// ```
262 #[inline]
from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]>263 fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
264 Vec::from_os_str_lossy(path.as_os_str())
265 }
266
267 /// Appends the given byte to the end of this byte string.
268 ///
269 /// Note that this is equivalent to the generic `Vec::push` method. This
270 /// method is provided to permit callers to explicitly differentiate
271 /// between pushing bytes, codepoints and strings.
272 ///
273 /// # Examples
274 ///
275 /// Basic usage:
276 ///
277 /// ```
278 /// use bstr::ByteVec;
279 ///
280 /// let mut s = <Vec<u8>>::from("abc");
281 /// s.push_byte(b'\xE2');
282 /// s.push_byte(b'\x98');
283 /// s.push_byte(b'\x83');
284 /// assert_eq!(s, "abc☃".as_bytes());
285 /// ```
286 #[inline]
push_byte(&mut self, byte: u8)287 fn push_byte(&mut self, byte: u8) {
288 self.as_vec_mut().push(byte);
289 }
290
291 /// Appends the given `char` to the end of this byte string.
292 ///
293 /// # Examples
294 ///
295 /// Basic usage:
296 ///
297 /// ```
298 /// use bstr::ByteVec;
299 ///
300 /// let mut s = <Vec<u8>>::from("abc");
301 /// s.push_char('1');
302 /// s.push_char('2');
303 /// s.push_char('3');
304 /// assert_eq!(s, "abc123".as_bytes());
305 /// ```
306 #[inline]
push_char(&mut self, ch: char)307 fn push_char(&mut self, ch: char) {
308 if ch.len_utf8() == 1 {
309 self.push_byte(ch as u8);
310 return;
311 }
312 self.as_vec_mut()
313 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
314 }
315
316 /// Appends the given slice to the end of this byte string. This accepts
317 /// any type that be converted to a `&[u8]`. This includes, but is not
318 /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
319 ///
320 /// # Examples
321 ///
322 /// Basic usage:
323 ///
324 /// ```
325 /// use bstr::ByteVec;
326 ///
327 /// let mut s = <Vec<u8>>::from("abc");
328 /// s.push_str(b"123");
329 /// assert_eq!(s, "abc123".as_bytes());
330 /// ```
331 #[inline]
push_str<B: AsRef<[u8]>>(&mut self, bytes: B)332 fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
333 self.as_vec_mut().extend_from_slice(bytes.as_ref());
334 }
335
336 /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
337 /// valid UTF-8.
338 ///
339 /// If it is not valid UTF-8, then a
340 /// [`FromUtf8Error`](struct.FromUtf8Error.html)
341 /// is returned. (This error can be used to examine why UTF-8 validation
342 /// failed, or to regain the original byte string.)
343 ///
344 /// # Examples
345 ///
346 /// Basic usage:
347 ///
348 /// ```
349 /// use bstr::ByteVec;
350 ///
351 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
352 /// let bytes = Vec::from("hello");
353 /// let string = bytes.into_string()?;
354 ///
355 /// assert_eq!("hello", string);
356 /// # Ok(()) }; example().unwrap()
357 /// ```
358 ///
359 /// If this byte string is not valid UTF-8, then an error will be returned.
360 /// That error can then be used to inspect the location at which invalid
361 /// UTF-8 was found, or to regain the original byte string:
362 ///
363 /// ```
364 /// use bstr::{B, ByteVec};
365 ///
366 /// let bytes = Vec::from_slice(b"foo\xFFbar");
367 /// let err = bytes.into_string().unwrap_err();
368 ///
369 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
370 /// assert_eq!(err.utf8_error().error_len(), Some(1));
371 ///
372 /// // At no point in this example is an allocation performed.
373 /// let bytes = Vec::from(err.into_vec());
374 /// assert_eq!(bytes, B(b"foo\xFFbar"));
375 /// ```
376 #[inline]
into_string(self) -> Result<String, FromUtf8Error> where Self: Sized,377 fn into_string(self) -> Result<String, FromUtf8Error>
378 where
379 Self: Sized,
380 {
381 match utf8::validate(self.as_vec()) {
382 Err(err) => {
383 Err(FromUtf8Error { original: self.into_vec(), err: err })
384 }
385 Ok(()) => {
386 // SAFETY: This is safe because of the guarantees provided by
387 // utf8::validate.
388 unsafe { Ok(self.into_string_unchecked()) }
389 }
390 }
391 }
392
393 /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
394 /// contains invalid UTF-8, then the invalid bytes are replaced with the
395 /// Unicode replacement codepoint.
396 ///
397 /// # Examples
398 ///
399 /// Basic usage:
400 ///
401 /// ```
402 /// use bstr::ByteVec;
403 ///
404 /// let bytes = Vec::from_slice(b"foo\xFFbar");
405 /// let string = bytes.into_string_lossy();
406 /// assert_eq!(string, "foo\u{FFFD}bar");
407 /// ```
408 #[inline]
into_string_lossy(self) -> String where Self: Sized,409 fn into_string_lossy(self) -> String
410 where
411 Self: Sized,
412 {
413 let v = self.as_vec();
414 if let Ok(allutf8) = v.to_str() {
415 return allutf8.to_string();
416 }
417 let mut dst = String::with_capacity(v.len());
418 for ch in v.chars() {
419 dst.push(ch);
420 }
421 dst
422 }
423
424 /// Unsafely convert this byte string into a `String`, without checking for
425 /// valid UTF-8.
426 ///
427 /// # Safety
428 ///
429 /// Callers *must* ensure that this byte string is valid UTF-8 before
430 /// calling this method. Converting a byte string into a `String` that is
431 /// not valid UTF-8 is considered undefined behavior.
432 ///
433 /// This routine is useful in performance sensitive contexts where the
434 /// UTF-8 validity of the byte string is already known and it is
435 /// undesirable to pay the cost of an additional UTF-8 validation check
436 /// that [`into_string`](#method.into_string) performs.
437 ///
438 /// # Examples
439 ///
440 /// Basic usage:
441 ///
442 /// ```
443 /// use bstr::ByteVec;
444 ///
445 /// // SAFETY: This is safe because string literals are guaranteed to be
446 /// // valid UTF-8 by the Rust compiler.
447 /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
448 /// assert_eq!("☃βツ", s);
449 /// ```
into_string_unchecked(self) -> String where Self: Sized,450 unsafe fn into_string_unchecked(self) -> String
451 where
452 Self: Sized,
453 {
454 String::from_utf8_unchecked(self.into_vec())
455 }
456
457 /// Converts this byte string into an OS string, in place.
458 ///
459 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
460 /// this returns the original byte string if it is not valid UTF-8.
461 ///
462 /// # Examples
463 ///
464 /// Basic usage:
465 ///
466 /// ```
467 /// use std::ffi::OsStr;
468 ///
469 /// use bstr::ByteVec;
470 ///
471 /// let bs = Vec::from("foo");
472 /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
473 /// assert_eq!(os_str, OsStr::new("foo"));
474 /// ```
475 #[inline]
into_os_string(self) -> Result<OsString, Vec<u8>> where Self: Sized,476 fn into_os_string(self) -> Result<OsString, Vec<u8>>
477 where
478 Self: Sized,
479 {
480 #[cfg(unix)]
481 #[inline]
482 fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
483 use std::os::unix::ffi::OsStringExt;
484
485 Ok(OsString::from_vec(v))
486 }
487
488 #[cfg(not(unix))]
489 #[inline]
490 fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
491 match v.into_string() {
492 Ok(s) => Ok(OsString::from(s)),
493 Err(err) => Err(err.into_vec()),
494 }
495 }
496
497 imp(self.into_vec())
498 }
499
500 /// Lossily converts this byte string into an OS string, in place.
501 ///
502 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
503 /// this will perform a UTF-8 check and lossily convert this byte string
504 /// into valid UTF-8 using the Unicode replacement codepoint.
505 ///
506 /// Note that this can prevent the correct roundtripping of file paths on
507 /// non-Unix systems such as Windows, where file paths are an arbitrary
508 /// sequence of 16-bit integers.
509 ///
510 /// # Examples
511 ///
512 /// Basic usage:
513 ///
514 /// ```
515 /// use bstr::ByteVec;
516 ///
517 /// let bs = Vec::from_slice(b"foo\xFFbar");
518 /// let os_str = bs.into_os_string_lossy();
519 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
520 /// ```
521 #[inline]
into_os_string_lossy(self) -> OsString where Self: Sized,522 fn into_os_string_lossy(self) -> OsString
523 where
524 Self: Sized,
525 {
526 #[cfg(unix)]
527 #[inline]
528 fn imp(v: Vec<u8>) -> OsString {
529 use std::os::unix::ffi::OsStringExt;
530
531 OsString::from_vec(v)
532 }
533
534 #[cfg(not(unix))]
535 #[inline]
536 fn imp(v: Vec<u8>) -> OsString {
537 OsString::from(v.into_string_lossy())
538 }
539
540 imp(self.into_vec())
541 }
542
543 /// Converts this byte string into an owned file path, in place.
544 ///
545 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
546 /// this returns the original byte string if it is not valid UTF-8.
547 ///
548 /// # Examples
549 ///
550 /// Basic usage:
551 ///
552 /// ```
553 /// use bstr::ByteVec;
554 ///
555 /// let bs = Vec::from("foo");
556 /// let path = bs.into_path_buf().expect("should be valid UTF-8");
557 /// assert_eq!(path.as_os_str(), "foo");
558 /// ```
559 #[inline]
into_path_buf(self) -> Result<PathBuf, Vec<u8>> where Self: Sized,560 fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
561 where
562 Self: Sized,
563 {
564 self.into_os_string().map(PathBuf::from)
565 }
566
567 /// Lossily converts this byte string into an owned file path, in place.
568 ///
569 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
570 /// this will perform a UTF-8 check and lossily convert this byte string
571 /// into valid UTF-8 using the Unicode replacement codepoint.
572 ///
573 /// Note that this can prevent the correct roundtripping of file paths on
574 /// non-Unix systems such as Windows, where file paths are an arbitrary
575 /// sequence of 16-bit integers.
576 ///
577 /// # Examples
578 ///
579 /// Basic usage:
580 ///
581 /// ```
582 /// use bstr::ByteVec;
583 ///
584 /// let bs = Vec::from_slice(b"foo\xFFbar");
585 /// let path = bs.into_path_buf_lossy();
586 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
587 /// ```
588 #[inline]
into_path_buf_lossy(self) -> PathBuf where Self: Sized,589 fn into_path_buf_lossy(self) -> PathBuf
590 where
591 Self: Sized,
592 {
593 PathBuf::from(self.into_os_string_lossy())
594 }
595
596 /// Removes the last byte from this `Vec<u8>` and returns it.
597 ///
598 /// If this byte string is empty, then `None` is returned.
599 ///
600 /// If the last codepoint in this byte string is not ASCII, then removing
601 /// the last byte could make this byte string contain invalid UTF-8.
602 ///
603 /// Note that this is equivalent to the generic `Vec::pop` method. This
604 /// method is provided to permit callers to explicitly differentiate
605 /// between popping bytes and codepoints.
606 ///
607 /// # Examples
608 ///
609 /// Basic usage:
610 ///
611 /// ```
612 /// use bstr::ByteVec;
613 ///
614 /// let mut s = Vec::from("foo");
615 /// assert_eq!(s.pop_byte(), Some(b'o'));
616 /// assert_eq!(s.pop_byte(), Some(b'o'));
617 /// assert_eq!(s.pop_byte(), Some(b'f'));
618 /// assert_eq!(s.pop_byte(), None);
619 /// ```
620 #[inline]
pop_byte(&mut self) -> Option<u8>621 fn pop_byte(&mut self) -> Option<u8> {
622 self.as_vec_mut().pop()
623 }
624
625 /// Removes the last codepoint from this `Vec<u8>` and returns it.
626 ///
627 /// If this byte string is empty, then `None` is returned. If the last
628 /// bytes of this byte string do not correspond to a valid UTF-8 code unit
629 /// sequence, then the Unicode replacement codepoint is yielded instead in
630 /// accordance with the
631 /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
632 ///
633 /// # Examples
634 ///
635 /// Basic usage:
636 ///
637 /// ```
638 /// use bstr::ByteVec;
639 ///
640 /// let mut s = Vec::from("foo");
641 /// assert_eq!(s.pop_char(), Some('o'));
642 /// assert_eq!(s.pop_char(), Some('o'));
643 /// assert_eq!(s.pop_char(), Some('f'));
644 /// assert_eq!(s.pop_char(), None);
645 /// ```
646 ///
647 /// This shows the replacement codepoint substitution policy. Note that
648 /// the first pop yields a replacement codepoint but actually removes two
649 /// bytes. This is in contrast with subsequent pops when encountering
650 /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
651 /// code unit sequence.
652 ///
653 /// ```
654 /// use bstr::ByteVec;
655 ///
656 /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
657 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
658 /// assert_eq!(s.pop_char(), Some('o'));
659 /// assert_eq!(s.pop_char(), Some('o'));
660 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
661 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
662 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
663 /// assert_eq!(s.pop_char(), Some('f'));
664 /// assert_eq!(s.pop_char(), None);
665 /// ```
666 #[inline]
pop_char(&mut self) -> Option<char>667 fn pop_char(&mut self) -> Option<char> {
668 let (ch, size) = utf8::decode_last_lossy(self.as_vec());
669 if size == 0 {
670 return None;
671 }
672 let new_len = self.as_vec().len() - size;
673 self.as_vec_mut().truncate(new_len);
674 Some(ch)
675 }
676
677 /// Removes a `char` from this `Vec<u8>` at the given byte position and
678 /// returns it.
679 ///
680 /// If the bytes at the given position do not lead to a valid UTF-8 code
681 /// unit sequence, then a
682 /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
683 ///
684 /// # Panics
685 ///
686 /// Panics if `at` is larger than or equal to this byte string's length.
687 ///
688 /// # Examples
689 ///
690 /// Basic usage:
691 ///
692 /// ```
693 /// use bstr::ByteVec;
694 ///
695 /// let mut s = Vec::from("foo☃bar");
696 /// assert_eq!(s.remove_char(3), '☃');
697 /// assert_eq!(s, b"foobar");
698 /// ```
699 ///
700 /// This example shows how the Unicode replacement codepoint policy is
701 /// used:
702 ///
703 /// ```
704 /// use bstr::ByteVec;
705 ///
706 /// let mut s = Vec::from_slice(b"foo\xFFbar");
707 /// assert_eq!(s.remove_char(3), '\u{FFFD}');
708 /// assert_eq!(s, b"foobar");
709 /// ```
710 #[inline]
remove_char(&mut self, at: usize) -> char711 fn remove_char(&mut self, at: usize) -> char {
712 let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
713 assert!(
714 size > 0,
715 "expected {} to be less than {}",
716 at,
717 self.as_vec().len(),
718 );
719 self.as_vec_mut().drain(at..at + size);
720 ch
721 }
722
723 /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
724 /// position.
725 ///
726 /// This is an `O(n)` operation as it may copy a number of elements in this
727 /// byte string proportional to its length.
728 ///
729 /// # Panics
730 ///
731 /// Panics if `at` is larger than the byte string's length.
732 ///
733 /// # Examples
734 ///
735 /// Basic usage:
736 ///
737 /// ```
738 /// use bstr::ByteVec;
739 ///
740 /// let mut s = Vec::from("foobar");
741 /// s.insert_char(3, '☃');
742 /// assert_eq!(s, "foo☃bar".as_bytes());
743 /// ```
744 #[inline]
insert_char(&mut self, at: usize, ch: char)745 fn insert_char(&mut self, at: usize, ch: char) {
746 self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
747 }
748
749 /// Inserts the given byte string into this byte string at a particular
750 /// byte position.
751 ///
752 /// This is an `O(n)` operation as it may copy a number of elements in this
753 /// byte string proportional to its length.
754 ///
755 /// The given byte string may be any type that can be cheaply converted
756 /// into a `&[u8]`. This includes, but is not limited to, `&str` and
757 /// `&[u8]`.
758 ///
759 /// # Panics
760 ///
761 /// Panics if `at` is larger than the byte string's length.
762 ///
763 /// # Examples
764 ///
765 /// Basic usage:
766 ///
767 /// ```
768 /// use bstr::ByteVec;
769 ///
770 /// let mut s = Vec::from("foobar");
771 /// s.insert_str(3, "☃☃☃");
772 /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
773 /// ```
774 #[inline]
insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B)775 fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
776 let bytes = bytes.as_ref();
777 let len = self.as_vec().len();
778 assert!(at <= len, "expected {} to be <= {}", at, len);
779
780 // SAFETY: We'd like to efficiently splice in the given bytes into
781 // this byte string. Since we are only working with `u8` elements here,
782 // we only need to consider whether our bounds are correct and whether
783 // our byte string has enough space.
784 self.as_vec_mut().reserve(bytes.len());
785 unsafe {
786 // Shift bytes after `at` over by the length of `bytes` to make
787 // room for it. This requires referencing two regions of memory
788 // that may overlap, so we use ptr::copy.
789 ptr::copy(
790 self.as_vec().as_ptr().add(at),
791 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
792 len - at,
793 );
794 // Now copy the bytes given into the room we made above. In this
795 // case, we know that the given bytes cannot possibly overlap
796 // with this byte string since we have a mutable borrow of the
797 // latter. Thus, we can use a nonoverlapping copy.
798 ptr::copy_nonoverlapping(
799 bytes.as_ptr(),
800 self.as_vec_mut().as_mut_ptr().add(at),
801 bytes.len(),
802 );
803 self.as_vec_mut().set_len(len + bytes.len());
804 }
805 }
806
807 /// Removes the specified range in this byte string and replaces it with
808 /// the given bytes. The given bytes do not need to have the same length
809 /// as the range provided.
810 ///
811 /// # Panics
812 ///
813 /// Panics if the given range is invalid.
814 ///
815 /// # Examples
816 ///
817 /// Basic usage:
818 ///
819 /// ```
820 /// use bstr::ByteVec;
821 ///
822 /// let mut s = Vec::from("foobar");
823 /// s.replace_range(2..4, "xxxxx");
824 /// assert_eq!(s, "foxxxxxar".as_bytes());
825 /// ```
826 #[inline]
replace_range<R, B>(&mut self, range: R, replace_with: B) where R: ops::RangeBounds<usize>, B: AsRef<[u8]>,827 fn replace_range<R, B>(&mut self, range: R, replace_with: B)
828 where
829 R: ops::RangeBounds<usize>,
830 B: AsRef<[u8]>,
831 {
832 self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
833 }
834
835 /// Creates a draining iterator that removes the specified range in this
836 /// `Vec<u8>` and yields each of the removed bytes.
837 ///
838 /// Note that the elements specified by the given range are removed
839 /// regardless of whether the returned iterator is fully exhausted.
840 ///
841 /// Also note that is is unspecified how many bytes are removed from the
842 /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
843 ///
844 /// # Panics
845 ///
846 /// Panics if the given range is not valid.
847 ///
848 /// # Examples
849 ///
850 /// Basic usage:
851 ///
852 /// ```
853 /// use bstr::ByteVec;
854 ///
855 /// let mut s = Vec::from("foobar");
856 /// {
857 /// let mut drainer = s.drain_bytes(2..4);
858 /// assert_eq!(drainer.next(), Some(b'o'));
859 /// assert_eq!(drainer.next(), Some(b'b'));
860 /// assert_eq!(drainer.next(), None);
861 /// }
862 /// assert_eq!(s, "foar".as_bytes());
863 /// ```
864 #[inline]
drain_bytes<R>(&mut self, range: R) -> DrainBytes where R: ops::RangeBounds<usize>,865 fn drain_bytes<R>(&mut self, range: R) -> DrainBytes
866 where
867 R: ops::RangeBounds<usize>,
868 {
869 DrainBytes { it: self.as_vec_mut().drain(range) }
870 }
871 }
872
873 /// A draining byte oriented iterator for `Vec<u8>`.
874 ///
875 /// This iterator is created by
876 /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
877 ///
878 /// # Examples
879 ///
880 /// Basic usage:
881 ///
882 /// ```
883 /// use bstr::ByteVec;
884 ///
885 /// let mut s = Vec::from("foobar");
886 /// {
887 /// let mut drainer = s.drain_bytes(2..4);
888 /// assert_eq!(drainer.next(), Some(b'o'));
889 /// assert_eq!(drainer.next(), Some(b'b'));
890 /// assert_eq!(drainer.next(), None);
891 /// }
892 /// assert_eq!(s, "foar".as_bytes());
893 /// ```
894 #[derive(Debug)]
895 pub struct DrainBytes<'a> {
896 it: vec::Drain<'a, u8>,
897 }
898
899 impl<'a> iter::FusedIterator for DrainBytes<'a> {}
900
901 impl<'a> Iterator for DrainBytes<'a> {
902 type Item = u8;
903
904 #[inline]
next(&mut self) -> Option<u8>905 fn next(&mut self) -> Option<u8> {
906 self.it.next()
907 }
908 }
909
910 impl<'a> DoubleEndedIterator for DrainBytes<'a> {
911 #[inline]
next_back(&mut self) -> Option<u8>912 fn next_back(&mut self) -> Option<u8> {
913 self.it.next_back()
914 }
915 }
916
917 impl<'a> ExactSizeIterator for DrainBytes<'a> {
918 #[inline]
len(&self) -> usize919 fn len(&self) -> usize {
920 self.it.len()
921 }
922 }
923
924 /// An error that may occur when converting a `Vec<u8>` to a `String`.
925 ///
926 /// This error includes the original `Vec<u8>` that failed to convert to a
927 /// `String`. This permits callers to recover the allocation used even if it
928 /// it not valid UTF-8.
929 ///
930 /// # Examples
931 ///
932 /// Basic usage:
933 ///
934 /// ```
935 /// use bstr::{B, ByteVec};
936 ///
937 /// let bytes = Vec::from_slice(b"foo\xFFbar");
938 /// let err = bytes.into_string().unwrap_err();
939 ///
940 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
941 /// assert_eq!(err.utf8_error().error_len(), Some(1));
942 ///
943 /// // At no point in this example is an allocation performed.
944 /// let bytes = Vec::from(err.into_vec());
945 /// assert_eq!(bytes, B(b"foo\xFFbar"));
946 /// ```
947 #[derive(Debug, Eq, PartialEq)]
948 pub struct FromUtf8Error {
949 original: Vec<u8>,
950 err: Utf8Error,
951 }
952
953 impl FromUtf8Error {
954 /// Return the original bytes as a slice that failed to convert to a
955 /// `String`.
956 ///
957 /// # Examples
958 ///
959 /// Basic usage:
960 ///
961 /// ```
962 /// use bstr::{B, ByteVec};
963 ///
964 /// let bytes = Vec::from_slice(b"foo\xFFbar");
965 /// let err = bytes.into_string().unwrap_err();
966 ///
967 /// // At no point in this example is an allocation performed.
968 /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
969 /// ```
970 #[inline]
as_bytes(&self) -> &[u8]971 pub fn as_bytes(&self) -> &[u8] {
972 &self.original
973 }
974
975 /// Consume this error and return the original byte string that failed to
976 /// convert to a `String`.
977 ///
978 /// # Examples
979 ///
980 /// Basic usage:
981 ///
982 /// ```
983 /// use bstr::{B, ByteVec};
984 ///
985 /// let bytes = Vec::from_slice(b"foo\xFFbar");
986 /// let err = bytes.into_string().unwrap_err();
987 /// let original = err.into_vec();
988 ///
989 /// // At no point in this example is an allocation performed.
990 /// assert_eq!(original, B(b"foo\xFFbar"));
991 /// ```
992 #[inline]
into_vec(self) -> Vec<u8>993 pub fn into_vec(self) -> Vec<u8> {
994 self.original
995 }
996
997 /// Return the underlying UTF-8 error that occurred. This error provides
998 /// information on the nature and location of the invalid UTF-8 detected.
999 ///
1000 /// # Examples
1001 ///
1002 /// Basic usage:
1003 ///
1004 /// ```
1005 /// use bstr::{B, ByteVec};
1006 ///
1007 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1008 /// let err = bytes.into_string().unwrap_err();
1009 ///
1010 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1011 /// assert_eq!(err.utf8_error().error_len(), Some(1));
1012 /// ```
1013 #[inline]
utf8_error(&self) -> &Utf8Error1014 pub fn utf8_error(&self) -> &Utf8Error {
1015 &self.err
1016 }
1017 }
1018
1019 impl error::Error for FromUtf8Error {
1020 #[inline]
description(&self) -> &str1021 fn description(&self) -> &str {
1022 "invalid UTF-8 vector"
1023 }
1024 }
1025
1026 impl fmt::Display for FromUtf8Error {
1027 #[inline]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1028 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1029 write!(f, "{}", self.err)
1030 }
1031 }
1032
1033 #[cfg(test)]
1034 mod tests {
1035 use ext_slice::B;
1036 use ext_vec::ByteVec;
1037
1038 #[test]
insert()1039 fn insert() {
1040 let mut s = vec![];
1041 s.insert_str(0, "foo");
1042 assert_eq!(s, "foo".as_bytes());
1043
1044 let mut s = Vec::from("a");
1045 s.insert_str(0, "foo");
1046 assert_eq!(s, "fooa".as_bytes());
1047
1048 let mut s = Vec::from("a");
1049 s.insert_str(1, "foo");
1050 assert_eq!(s, "afoo".as_bytes());
1051
1052 let mut s = Vec::from("foobar");
1053 s.insert_str(3, "quux");
1054 assert_eq!(s, "fooquuxbar".as_bytes());
1055
1056 let mut s = Vec::from("foobar");
1057 s.insert_str(3, "x");
1058 assert_eq!(s, "fooxbar".as_bytes());
1059
1060 let mut s = Vec::from("foobar");
1061 s.insert_str(0, "x");
1062 assert_eq!(s, "xfoobar".as_bytes());
1063
1064 let mut s = Vec::from("foobar");
1065 s.insert_str(6, "x");
1066 assert_eq!(s, "foobarx".as_bytes());
1067
1068 let mut s = Vec::from("foobar");
1069 s.insert_str(3, "quuxbazquux");
1070 assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1071 }
1072
1073 #[test]
1074 #[should_panic]
insert_fail1()1075 fn insert_fail1() {
1076 let mut s = vec![];
1077 s.insert_str(1, "foo");
1078 }
1079
1080 #[test]
1081 #[should_panic]
insert_fail2()1082 fn insert_fail2() {
1083 let mut s = Vec::from("a");
1084 s.insert_str(2, "foo");
1085 }
1086
1087 #[test]
1088 #[should_panic]
insert_fail3()1089 fn insert_fail3() {
1090 let mut s = Vec::from("foobar");
1091 s.insert_str(7, "foo");
1092 }
1093 }
1094