1 #[cfg(feature = "std")]
2 use std::borrow::Cow;
3 #[cfg(feature = "std")]
4 use std::ffi::OsStr;
5 #[cfg(feature = "std")]
6 use std::path::Path;
7 
8 use core::{iter, ops, ptr, slice, str};
9 use memchr::{memchr, memmem, memrchr};
10 
11 use crate::ascii;
X(const X<T> &)12 use crate::bstr::BStr;
13 use crate::byteset;
14 #[cfg(feature = "std")]
15 use crate::ext_vec::ByteVec;
16 #[cfg(feature = "unicode")]
~X()17 use crate::unicode::{
18     whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
19     SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
20     WordsWithBreaks,
21 };
22 use crate::utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error};
f()23 
24 /// A short-hand constructor for building a `&[u8]`.
25 ///
26 /// This idiosyncratic constructor is useful for concisely building byte string
27 /// slices. Its primary utility is in conveniently writing byte string literals
28 /// in a uniform way. For example, consider this code that does not compile:
29 ///
30 /// ```ignore
31 /// let strs = vec![b"a", b"xy"];
32 /// ```
33 ///
34 /// The above code doesn't compile because the type of the byte string literal
35 /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
36 /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
37 /// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
38 /// where both `"a"` and `"xy"` have the same type of `&'static str`.)
39 ///
40 /// One way of getting the above code to compile is to convert byte strings to
41 /// slices. You might try this:
42 ///
43 /// ```ignore
44 /// let strs = vec![&b"a", &b"xy"];
45 /// ```
46 ///
47 /// But this just creates values with type `& &'static [u8; 1]` and
48 /// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
49 ///
50 /// ```
51 /// let strs = vec![&b"a"[..], &b"xy"[..]];
52 /// // or
53 /// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
54 /// ```
55 ///
56 /// But neither of these are particularly convenient to type, especially when
57 /// it's something as common as a string literal. Thus, this constructor
58 /// permits writing the following instead:
59 ///
60 /// ```
61 /// use bstr::B;
62 ///
63 /// let strs = vec![B("a"), B(b"xy")];
64 /// ```
65 ///
66 /// Notice that this also lets you mix and match both string literals and byte
67 /// string literals. This can be quite convenient!
68 #[allow(non_snake_case)]
69 #[inline]
70 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
71     bytes.as_ref()
72 }
73 
74 impl ByteSlice for [u8] {
75     #[inline]
76     fn as_bytes(&self) -> &[u8] {
77         self
78     }
79 
80     #[inline]
81     fn as_bytes_mut(&mut self) -> &mut [u8] {
82         self
83     }
84 }
85 
86 /// Ensure that callers cannot implement `ByteSlice` by making an
87 /// umplementable trait its super trait.
88 pub trait Sealed {}
89 impl Sealed for [u8] {}
90 
91 /// A trait that extends `&[u8]` with string oriented methods.
92 pub trait ByteSlice: Sealed {
93     /// A method for accessing the raw bytes of this type. This is always a
94     /// no-op and callers shouldn't care about it. This only exists for making
95     /// the extension trait work.
96     #[doc(hidden)]
97     fn as_bytes(&self) -> &[u8];
98 
99     /// A method for accessing the raw bytes of this type, mutably. This is
100     /// always a no-op and callers shouldn't care about it. This only exists
101     /// for making the extension trait work.
102     #[doc(hidden)]
103     fn as_bytes_mut(&mut self) -> &mut [u8];
104 
105     /// Return this byte slice as a `&BStr`.
106     ///
107     /// Use `&BStr` is useful because of its `fmt::Debug` representation
108     /// and various other trait implementations (such as `PartialEq` and
109     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
110     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
111     /// sequences are used.
112     ///
113     /// # Examples
114     ///
115     /// Basic usage:
116     ///
117     /// ```
118     /// use bstr::ByteSlice;
119     ///
120     /// println!("{:?}", b"foo\xFFbar".as_bstr());
121     /// ```
122     #[inline]
123     fn as_bstr(&self) -> &BStr {
124         BStr::new(self.as_bytes())
125     }
126 
127     /// Return this byte slice as a `&mut BStr`.
128     ///
129     /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
130     /// and various other trait implementations (such as `PartialEq` and
131     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
132     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
133     /// sequences are used.
134     ///
135     /// # Examples
136     ///
137     /// Basic usage:
138     ///
139     /// ```
140     /// use bstr::ByteSlice;
141     ///
142     /// let mut bytes = *b"foo\xFFbar";
143     /// println!("{:?}", &mut bytes.as_bstr_mut());
144     /// ```
145     #[inline]
146     fn as_bstr_mut(&mut self) -> &mut BStr {
147         BStr::new_mut(self.as_bytes_mut())
148     }
149 
150     /// Create an immutable byte string from an OS string slice.
151     ///
152     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
153     /// this returns `None` if the given OS string is not valid UTF-8. (For
154     /// example, on Windows, file paths are allowed to be a sequence of
155     /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
156     /// valid UTF-8.)
157     ///
158     /// # Examples
159     ///
160     /// Basic usage:
161     ///
162     /// ```
163     /// use std::ffi::OsStr;
164     ///
165     /// use bstr::{B, ByteSlice};
166     ///
167     /// let os_str = OsStr::new("foo");
168     /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
169     /// assert_eq!(bs, B("foo"));
170     /// ```
171     #[cfg(feature = "std")]
172     #[inline]
173     fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
174         #[cfg(unix)]
175         #[inline]
176         fn imp(os_str: &OsStr) -> Option<&[u8]> {
177             use std::os::unix::ffi::OsStrExt;
178 
179             Some(os_str.as_bytes())
180         }
181 
182         #[cfg(not(unix))]
183         #[inline]
184         fn imp(os_str: &OsStr) -> Option<&[u8]> {
185             os_str.to_str().map(|s| s.as_bytes())
186         }
187 
188         imp(os_str)
189     }
190 
191     /// Create an immutable byte string from a file path.
192     ///
193     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
194     /// this returns `None` if the given path is not valid UTF-8. (For example,
195     /// on Windows, file paths are allowed to be a sequence of arbitrary 16-bit
196     /// integers. Not all such sequences can be transcoded to valid UTF-8.)
197     ///
198     /// # Examples
199     ///
200     /// Basic usage:
201     ///
202     /// ```
203     /// use std::path::Path;
204     ///
205     /// use bstr::{B, ByteSlice};
206     ///
207     /// let path = Path::new("foo");
208     /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
209     /// assert_eq!(bs, B("foo"));
210     /// ```
211     #[cfg(feature = "std")]
212     #[inline]
213     fn from_path(path: &Path) -> Option<&[u8]> {
214         Self::from_os_str(path.as_os_str())
215     }
216 
217     /// Safely convert this byte string into a `&str` if it's valid UTF-8.
218     ///
219     /// If this byte string is not valid UTF-8, then an error is returned. The
220     /// error returned indicates the first invalid byte found and the length
221     /// of the error.
222     ///
223     /// In cases where a lossy conversion to `&str` is acceptable, then use one
224     /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
225     /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
226     /// methods.
227     ///
228     /// # Examples
229     ///
230     /// Basic usage:
231     ///
232     /// ```
233     /// use bstr::{B, ByteSlice, ByteVec};
234     ///
235     /// # fn example() -> Result<(), bstr::Utf8Error> {
236     /// let s = B("☃βツ").to_str()?;
237     /// assert_eq!("☃βツ", s);
238     ///
239     /// let mut bstring = <Vec<u8>>::from("☃βツ");
240     /// bstring.push(b'\xFF');
241     /// let err = bstring.to_str().unwrap_err();
242     /// assert_eq!(8, err.valid_up_to());
243     /// # Ok(()) }; example().unwrap()
244     /// ```
245     #[inline]
246     fn to_str(&self) -> Result<&str, Utf8Error> {
247         utf8::validate(self.as_bytes()).map(|_| {
248             // SAFETY: This is safe because of the guarantees provided by
249             // utf8::validate.
250             unsafe { str::from_utf8_unchecked(self.as_bytes()) }
251         })
252     }
253 
254     /// Unsafely convert this byte string into a `&str`, without checking for
255     /// valid UTF-8.
256     ///
257     /// # Safety
258     ///
259     /// Callers *must* ensure that this byte string is valid UTF-8 before
260     /// calling this method. Converting a byte string into a `&str` that is
261     /// not valid UTF-8 is considered undefined behavior.
262     ///
263     /// This routine is useful in performance sensitive contexts where the
264     /// UTF-8 validity of the byte string is already known and it is
265     /// undesirable to pay the cost of an additional UTF-8 validation check
266     /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
267     ///
268     /// # Examples
269     ///
270     /// Basic usage:
271     ///
272     /// ```
273     /// use bstr::{B, ByteSlice};
274     ///
275     /// // SAFETY: This is safe because string literals are guaranteed to be
276     /// // valid UTF-8 by the Rust compiler.
277     /// let s = unsafe { B("☃βツ").to_str_unchecked() };
278     /// assert_eq!("☃βツ", s);
279     /// ```
280     #[inline]
281     unsafe fn to_str_unchecked(&self) -> &str {
282         str::from_utf8_unchecked(self.as_bytes())
283     }
284 
285     /// Convert this byte string to a valid UTF-8 string by replacing invalid
286     /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
287     ///
288     /// If the byte string is already valid UTF-8, then no copying or
289     /// allocation is performed and a borrrowed string slice is returned. If
290     /// the byte string is not valid UTF-8, then an owned string buffer is
291     /// returned with invalid bytes replaced by the replacement codepoint.
292     ///
293     /// This method uses the "substitution of maximal subparts" (Unicode
294     /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
295     /// codepoint. Specifically, a replacement codepoint is inserted whenever a
296     /// byte is found that cannot possibly lead to a valid code unit sequence.
297     /// If there were previous bytes that represented a prefix of a well-formed
298     /// code unit sequence, then all of those bytes are substituted with a
299     /// single replacement codepoint. The "substitution of maximal subparts"
300     /// strategy is the same strategy used by
301     /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
302     /// For a more precise description of the maximal subpart strategy, see
303     /// the Unicode Standard, Chapter 3, Section 9. See also
304     /// [Public Review Issue #121](http://www.unicode.org/review/pr-121.html).
305     ///
306     /// N.B. Rust's standard library also appears to use the same strategy,
307     /// but it does not appear to be an API guarantee.
308     ///
309     /// # Examples
310     ///
311     /// Basic usage:
312     ///
313     /// ```
314     /// use std::borrow::Cow;
315     ///
316     /// use bstr::ByteSlice;
317     ///
318     /// let mut bstring = <Vec<u8>>::from("☃βツ");
319     /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
320     ///
321     /// // Add a byte that makes the sequence invalid.
322     /// bstring.push(b'\xFF');
323     /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
324     /// ```
325     ///
326     /// This demonstrates the "maximal subpart" substitution logic.
327     ///
328     /// ```
329     /// use bstr::{B, ByteSlice};
330     ///
331     /// // \x61 is the ASCII codepoint for 'a'.
332     /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
333     /// // \xE1\x80 is a valid 2-byte code unit prefix.
334     /// // \xC2 is a valid 1-byte code unit prefix.
335     /// // \x62 is the ASCII codepoint for 'b'.
336     /// //
337     /// // In sum, each of the prefixes is replaced by a single replacement
338     /// // codepoint since none of the prefixes are properly completed. This
339     /// // is in contrast to other strategies that might insert a replacement
340     /// // codepoint for every single byte.
341     /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
342     /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
343     /// ```
344     #[cfg(feature = "std")]
345     #[inline]
346     fn to_str_lossy(&self) -> Cow<'_, str> {
347         match utf8::validate(self.as_bytes()) {
348             Ok(()) => {
349                 // SAFETY: This is safe because of the guarantees provided by
350                 // utf8::validate.
351                 unsafe {
352                     Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
353                 }
354             }
355             Err(err) => {
356                 let mut lossy = String::with_capacity(self.as_bytes().len());
357                 let (valid, after) =
358                     self.as_bytes().split_at(err.valid_up_to());
359                 // SAFETY: This is safe because utf8::validate guarantees
360                 // that all of `valid` is valid UTF-8.
361                 lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
362                 lossy.push_str("\u{FFFD}");
363                 if let Some(len) = err.error_len() {
364                     after[len..].to_str_lossy_into(&mut lossy);
365                 }
366                 Cow::Owned(lossy)
367             }
368         }
369     }
370 
371     /// Copy the contents of this byte string into the given owned string
372     /// buffer, while replacing invalid UTF-8 code unit sequences with the
373     /// Unicode replacement codepoint (`U+FFFD`).
374     ///
375     /// This method uses the same "substitution of maximal subparts" strategy
376     /// for inserting the replacement codepoint as the
377     /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
378     ///
379     /// This routine is useful for amortizing allocation. However, unlike
380     /// `to_str_lossy`, this routine will _always_ copy the contents of this
381     /// byte string into the destination buffer, even if this byte string is
382     /// valid UTF-8.
383     ///
384     /// # Examples
385     ///
386     /// Basic usage:
387     ///
388     /// ```
389     /// use std::borrow::Cow;
390     ///
391     /// use bstr::ByteSlice;
392     ///
393     /// let mut bstring = <Vec<u8>>::from("☃βツ");
394     /// // Add a byte that makes the sequence invalid.
395     /// bstring.push(b'\xFF');
396     ///
397     /// let mut dest = String::new();
398     /// bstring.to_str_lossy_into(&mut dest);
399     /// assert_eq!("☃βツ\u{FFFD}", dest);
400     /// ```
401     #[cfg(feature = "std")]
402     #[inline]
403     fn to_str_lossy_into(&self, dest: &mut String) {
404         let mut bytes = self.as_bytes();
405         dest.reserve(bytes.len());
406         loop {
407             match utf8::validate(bytes) {
408                 Ok(()) => {
409                     // SAFETY: This is safe because utf8::validate guarantees
410                     // that all of `bytes` is valid UTF-8.
411                     dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
412                     break;
413                 }
414                 Err(err) => {
415                     let (valid, after) = bytes.split_at(err.valid_up_to());
416                     // SAFETY: This is safe because utf8::validate guarantees
417                     // that all of `valid` is valid UTF-8.
418                     dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
419                     dest.push_str("\u{FFFD}");
420                     match err.error_len() {
421                         None => break,
422                         Some(len) => bytes = &after[len..],
423                     }
424                 }
425             }
426         }
427     }
428 
429     /// Create an OS string slice from this byte string.
430     ///
431     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
432     /// this returns a UTF-8 decoding error if this byte string is not valid
433     /// UTF-8. (For example, on Windows, file paths are allowed to be a
434     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
435     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
436     /// 16-bit integers.)
437     ///
438     /// # Examples
439     ///
440     /// Basic usage:
441     ///
442     /// ```
443     /// use bstr::{B, ByteSlice};
444     ///
445     /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
446     /// assert_eq!(os_str, "foo");
447     /// ```
448     #[cfg(feature = "std")]
449     #[inline]
450     fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
451         #[cfg(unix)]
452         #[inline]
453         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
454             use std::os::unix::ffi::OsStrExt;
455 
456             Ok(OsStr::from_bytes(bytes))
457         }
458 
459         #[cfg(not(unix))]
460         #[inline]
461         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
462             bytes.to_str().map(OsStr::new)
463         }
464 
465         imp(self.as_bytes())
466     }
467 
468     /// Lossily create an OS string slice from this byte string.
469     ///
470     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
471     /// this will perform a UTF-8 check and lossily convert this byte string
472     /// into valid UTF-8 using the Unicode replacement codepoint.
473     ///
474     /// Note that this can prevent the correct roundtripping of file paths on
475     /// non-Unix systems such as Windows, where file paths are an arbitrary
476     /// sequence of 16-bit integers.
477     ///
478     /// # Examples
479     ///
480     /// Basic usage:
481     ///
482     /// ```
483     /// use bstr::ByteSlice;
484     ///
485     /// let os_str = b"foo\xFFbar".to_os_str_lossy();
486     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
487     /// ```
488     #[cfg(feature = "std")]
489     #[inline]
490     fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
491         #[cfg(unix)]
492         #[inline]
493         fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
494             use std::os::unix::ffi::OsStrExt;
495 
496             Cow::Borrowed(OsStr::from_bytes(bytes))
497         }
498 
499         #[cfg(not(unix))]
500         #[inline]
501         fn imp(bytes: &[u8]) -> Cow<OsStr> {
502             use std::ffi::OsString;
503 
504             match bytes.to_str_lossy() {
505                 Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
506                 Cow::Owned(x) => Cow::Owned(OsString::from(x)),
507             }
508         }
509 
510         imp(self.as_bytes())
511     }
512 
513     /// Create a path slice from this byte string.
514     ///
515     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
516     /// this returns a UTF-8 decoding error if this byte string is not valid
517     /// UTF-8. (For example, on Windows, file paths are allowed to be a
518     /// sequence of arbitrary 16-bit integers. There is no obvious mapping from
519     /// an arbitrary sequence of 8-bit integers to an arbitrary sequence of
520     /// 16-bit integers.)
521     ///
522     /// # Examples
523     ///
524     /// Basic usage:
525     ///
526     /// ```
527     /// use bstr::ByteSlice;
528     ///
529     /// let path = b"foo".to_path().expect("should be valid UTF-8");
530     /// assert_eq!(path.as_os_str(), "foo");
531     /// ```
532     #[cfg(feature = "std")]
533     #[inline]
534     fn to_path(&self) -> Result<&Path, Utf8Error> {
535         self.to_os_str().map(Path::new)
536     }
537 
538     /// Lossily create a path slice from this byte string.
539     ///
540     /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
541     /// this will perform a UTF-8 check and lossily convert this byte string
542     /// into valid UTF-8 using the Unicode replacement codepoint.
543     ///
544     /// Note that this can prevent the correct roundtripping of file paths on
545     /// non-Unix systems such as Windows, where file paths are an arbitrary
546     /// sequence of 16-bit integers.
547     ///
548     /// # Examples
549     ///
550     /// Basic usage:
551     ///
552     /// ```
553     /// use bstr::ByteSlice;
554     ///
555     /// let bs = b"foo\xFFbar";
556     /// let path = bs.to_path_lossy();
557     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
558     /// ```
559     #[cfg(feature = "std")]
560     #[inline]
561     fn to_path_lossy(&self) -> Cow<'_, Path> {
562         use std::path::PathBuf;
563 
564         match self.to_os_str_lossy() {
565             Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
566             Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
567         }
568     }
569 
570     /// Create a new byte string by repeating this byte string `n` times.
571     ///
572     /// # Panics
573     ///
574     /// This function panics if the capacity of the new byte string would
575     /// overflow.
576     ///
577     /// # Examples
578     ///
579     /// Basic usage:
580     ///
581     /// ```
582     /// use bstr::{B, ByteSlice};
583     ///
584     /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
585     /// assert_eq!(b"foo".repeatn(0), B(""));
586     /// ```
587     #[cfg(feature = "std")]
588     #[inline]
589     fn repeatn(&self, n: usize) -> Vec<u8> {
590         let bs = self.as_bytes();
591         let mut dst = vec![0; bs.len() * n];
592         for i in 0..n {
593             dst[i * bs.len()..(i + 1) * bs.len()].copy_from_slice(bs);
594         }
595         dst
596     }
597 
598     /// Returns true if and only if this byte string contains the given needle.
599     ///
600     /// # Examples
601     ///
602     /// Basic usage:
603     ///
604     /// ```
605     /// use bstr::ByteSlice;
606     ///
607     /// assert!(b"foo bar".contains_str("foo"));
608     /// assert!(b"foo bar".contains_str("bar"));
609     /// assert!(!b"foo".contains_str("foobar"));
610     /// ```
611     #[inline]
612     fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
613         self.find(needle).is_some()
614     }
615 
616     /// Returns true if and only if this byte string has the given prefix.
617     ///
618     /// # Examples
619     ///
620     /// Basic usage:
621     ///
622     /// ```
623     /// use bstr::ByteSlice;
624     ///
625     /// assert!(b"foo bar".starts_with_str("foo"));
626     /// assert!(!b"foo bar".starts_with_str("bar"));
627     /// assert!(!b"foo".starts_with_str("foobar"));
628     /// ```
629     #[inline]
630     fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
631         self.as_bytes().starts_with(prefix.as_ref())
632     }
633 
634     /// Returns true if and only if this byte string has the given suffix.
635     ///
636     /// # Examples
637     ///
638     /// Basic usage:
639     ///
640     /// ```
641     /// use bstr::ByteSlice;
642     ///
643     /// assert!(b"foo bar".ends_with_str("bar"));
644     /// assert!(!b"foo bar".ends_with_str("foo"));
645     /// assert!(!b"bar".ends_with_str("foobar"));
646     /// ```
647     #[inline]
648     fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
649         self.as_bytes().ends_with(suffix.as_ref())
650     }
651 
652     /// Returns the index of the first occurrence of the given needle.
653     ///
654     /// The needle may be any type that can be cheaply converted into a
655     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
656     ///
657     /// Note that if you're are searching for the same needle in many
658     /// different small haystacks, it may be faster to initialize a
659     /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
660     ///
661     /// # Complexity
662     ///
663     /// This routine is guaranteed to have worst case linear time complexity
664     /// with respect to both the needle and the haystack. That is, this runs
665     /// in `O(needle.len() + haystack.len())` time.
666     ///
667     /// This routine is also guaranteed to have worst case constant space
668     /// complexity.
669     ///
670     /// # Examples
671     ///
672     /// Basic usage:
673     ///
674     /// ```
675     /// use bstr::ByteSlice;
676     ///
677     /// let s = b"foo bar baz";
678     /// assert_eq!(Some(0), s.find("foo"));
679     /// assert_eq!(Some(4), s.find("bar"));
680     /// assert_eq!(None, s.find("quux"));
681     /// ```
682     #[inline]
683     fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
684         Finder::new(needle.as_ref()).find(self.as_bytes())
685     }
686 
687     /// Returns the index of the last occurrence of the given needle.
688     ///
689     /// The needle may be any type that can be cheaply converted into a
690     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
691     ///
692     /// Note that if you're are searching for the same needle in many
693     /// different small haystacks, it may be faster to initialize a
694     /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
695     /// each search.
696     ///
697     /// # Complexity
698     ///
699     /// This routine is guaranteed to have worst case linear time complexity
700     /// with respect to both the needle and the haystack. That is, this runs
701     /// in `O(needle.len() + haystack.len())` time.
702     ///
703     /// This routine is also guaranteed to have worst case constant space
704     /// complexity.
705     ///
706     /// # Examples
707     ///
708     /// Basic usage:
709     ///
710     /// ```
711     /// use bstr::ByteSlice;
712     ///
713     /// let s = b"foo bar baz";
714     /// assert_eq!(Some(0), s.rfind("foo"));
715     /// assert_eq!(Some(4), s.rfind("bar"));
716     /// assert_eq!(Some(8), s.rfind("ba"));
717     /// assert_eq!(None, s.rfind("quux"));
718     /// ```
719     #[inline]
720     fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
721         FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
722     }
723 
724     /// Returns an iterator of the non-overlapping occurrences of the given
725     /// needle. The iterator yields byte offset positions indicating the start
726     /// of each match.
727     ///
728     /// # Complexity
729     ///
730     /// This routine is guaranteed to have worst case linear time complexity
731     /// with respect to both the needle and the haystack. That is, this runs
732     /// in `O(needle.len() + haystack.len())` time.
733     ///
734     /// This routine is also guaranteed to have worst case constant space
735     /// complexity.
736     ///
737     /// # Examples
738     ///
739     /// Basic usage:
740     ///
741     /// ```
742     /// use bstr::ByteSlice;
743     ///
744     /// let s = b"foo bar foo foo quux foo";
745     /// let matches: Vec<usize> = s.find_iter("foo").collect();
746     /// assert_eq!(matches, vec![0, 8, 12, 21]);
747     /// ```
748     ///
749     /// An empty string matches at every position, including the position
750     /// immediately following the last byte:
751     ///
752     /// ```
753     /// use bstr::ByteSlice;
754     ///
755     /// let matches: Vec<usize> = b"foo".find_iter("").collect();
756     /// assert_eq!(matches, vec![0, 1, 2, 3]);
757     ///
758     /// let matches: Vec<usize> = b"".find_iter("").collect();
759     /// assert_eq!(matches, vec![0]);
760     /// ```
761     #[inline]
762     fn find_iter<'a, B: ?Sized + AsRef<[u8]>>(
763         &'a self,
764         needle: &'a B,
765     ) -> Find<'a> {
766         Find::new(self.as_bytes(), needle.as_ref())
767     }
768 
769     /// Returns an iterator of the non-overlapping occurrences of the given
770     /// needle in reverse. The iterator yields byte offset positions indicating
771     /// the start of each match.
772     ///
773     /// # Complexity
774     ///
775     /// This routine is guaranteed to have worst case linear time complexity
776     /// with respect to both the needle and the haystack. That is, this runs
777     /// in `O(needle.len() + haystack.len())` time.
778     ///
779     /// This routine is also guaranteed to have worst case constant space
780     /// complexity.
781     ///
782     /// # Examples
783     ///
784     /// Basic usage:
785     ///
786     /// ```
787     /// use bstr::ByteSlice;
788     ///
789     /// let s = b"foo bar foo foo quux foo";
790     /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
791     /// assert_eq!(matches, vec![21, 12, 8, 0]);
792     /// ```
793     ///
794     /// An empty string matches at every position, including the position
795     /// immediately following the last byte:
796     ///
797     /// ```
798     /// use bstr::ByteSlice;
799     ///
800     /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
801     /// assert_eq!(matches, vec![3, 2, 1, 0]);
802     ///
803     /// let matches: Vec<usize> = b"".rfind_iter("").collect();
804     /// assert_eq!(matches, vec![0]);
805     /// ```
806     #[inline]
807     fn rfind_iter<'a, B: ?Sized + AsRef<[u8]>>(
808         &'a self,
809         needle: &'a B,
810     ) -> FindReverse<'a> {
811         FindReverse::new(self.as_bytes(), needle.as_ref())
812     }
813 
814     /// Returns the index of the first occurrence of the given byte. If the
815     /// byte does not occur in this byte string, then `None` is returned.
816     ///
817     /// # Examples
818     ///
819     /// Basic usage:
820     ///
821     /// ```
822     /// use bstr::ByteSlice;
823     ///
824     /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
825     /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
826     /// ```
827     #[inline]
828     fn find_byte(&self, byte: u8) -> Option<usize> {
829         memchr(byte, self.as_bytes())
830     }
831 
832     /// Returns the index of the last occurrence of the given byte. If the
833     /// byte does not occur in this byte string, then `None` is returned.
834     ///
835     /// # Examples
836     ///
837     /// Basic usage:
838     ///
839     /// ```
840     /// use bstr::ByteSlice;
841     ///
842     /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
843     /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
844     /// ```
845     #[inline]
846     fn rfind_byte(&self, byte: u8) -> Option<usize> {
847         memrchr(byte, self.as_bytes())
848     }
849 
850     /// Returns the index of the first occurrence of the given codepoint.
851     /// If the codepoint does not occur in this byte string, then `None` is
852     /// returned.
853     ///
854     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
855     /// then only explicit occurrences of that encoding will be found. Invalid
856     /// UTF-8 sequences will not be matched.
857     ///
858     /// # Examples
859     ///
860     /// Basic usage:
861     ///
862     /// ```
863     /// use bstr::{B, ByteSlice};
864     ///
865     /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
866     /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
867     /// assert_eq!(None, b"foo bar baz".find_char('y'));
868     /// ```
869     #[inline]
870     fn find_char(&self, ch: char) -> Option<usize> {
871         self.find(ch.encode_utf8(&mut [0; 4]))
872     }
873 
874     /// Returns the index of the last occurrence of the given codepoint.
875     /// If the codepoint does not occur in this byte string, then `None` is
876     /// returned.
877     ///
878     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
879     /// then only explicit occurrences of that encoding will be found. Invalid
880     /// UTF-8 sequences will not be matched.
881     ///
882     /// # Examples
883     ///
884     /// Basic usage:
885     ///
886     /// ```
887     /// use bstr::{B, ByteSlice};
888     ///
889     /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
890     /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
891     /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
892     /// ```
893     #[inline]
894     fn rfind_char(&self, ch: char) -> Option<usize> {
895         self.rfind(ch.encode_utf8(&mut [0; 4]))
896     }
897 
898     /// Returns the index of the first occurrence of any of the bytes in the
899     /// provided set.
900     ///
901     /// The `byteset` may be any type that can be cheaply converted into a
902     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
903     /// note that passing a `&str` which contains multibyte characters may not
904     /// behave as you expect: each byte in the `&str` is treated as an
905     /// individual member of the byte set.
906     ///
907     /// Note that order is irrelevant for the `byteset` parameter, and
908     /// duplicate bytes present in its body are ignored.
909     ///
910     /// # Complexity
911     ///
912     /// This routine is guaranteed to have worst case linear time complexity
913     /// with respect to both the set of bytes and the haystack. That is, this
914     /// runs in `O(byteset.len() + haystack.len())` time.
915     ///
916     /// This routine is also guaranteed to have worst case constant space
917     /// complexity.
918     ///
919     /// # Examples
920     ///
921     /// Basic usage:
922     ///
923     /// ```
924     /// use bstr::ByteSlice;
925     ///
926     /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
927     /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
928     /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
929     /// ```
930     #[inline]
931     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
932         byteset::find(self.as_bytes(), byteset.as_ref())
933     }
934 
935     /// Returns the index of the first occurrence of a byte that is not a member
936     /// of the provided set.
937     ///
938     /// The `byteset` may be any type that can be cheaply converted into a
939     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
940     /// note that passing a `&str` which contains multibyte characters may not
941     /// behave as you expect: each byte in the `&str` is treated as an
942     /// individual member of the byte set.
943     ///
944     /// Note that order is irrelevant for the `byteset` parameter, and
945     /// duplicate bytes present in its body are ignored.
946     ///
947     /// # Complexity
948     ///
949     /// This routine is guaranteed to have worst case linear time complexity
950     /// with respect to both the set of bytes and the haystack. That is, this
951     /// runs in `O(byteset.len() + haystack.len())` time.
952     ///
953     /// This routine is also guaranteed to have worst case constant space
954     /// complexity.
955     ///
956     /// # Examples
957     ///
958     /// Basic usage:
959     ///
960     /// ```
961     /// use bstr::ByteSlice;
962     ///
963     /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
964     /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
965     /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
966     /// ```
967     #[inline]
968     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
969         byteset::find_not(self.as_bytes(), byteset.as_ref())
970     }
971 
972     /// Returns the index of the last occurrence of any of the bytes in the
973     /// provided set.
974     ///
975     /// The `byteset` may be any type that can be cheaply converted into a
976     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
977     /// note that passing a `&str` which contains multibyte characters may not
978     /// behave as you expect: each byte in the `&str` is treated as an
979     /// individual member of the byte set.
980     ///
981     /// Note that order is irrelevant for the `byteset` parameter, and duplicate
982     /// bytes present in its body are ignored.
983     ///
984     /// # Complexity
985     ///
986     /// This routine is guaranteed to have worst case linear time complexity
987     /// with respect to both the set of bytes and the haystack. That is, this
988     /// runs in `O(byteset.len() + haystack.len())` time.
989     ///
990     /// This routine is also guaranteed to have worst case constant space
991     /// complexity.
992     ///
993     /// # Examples
994     ///
995     /// Basic usage:
996     ///
997     /// ```
998     /// use bstr::ByteSlice;
999     ///
1000     /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1001     /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1002     /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1003     /// ```
1004     #[inline]
1005     fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1006         byteset::rfind(self.as_bytes(), byteset.as_ref())
1007     }
1008 
1009     /// Returns the index of the last occurrence of a byte that is not a member
1010     /// of the provided set.
1011     ///
1012     /// The `byteset` may be any type that can be cheaply converted into a
1013     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1014     /// note that passing a `&str` which contains multibyte characters may not
1015     /// behave as you expect: each byte in the `&str` is treated as an
1016     /// individual member of the byte set.
1017     ///
1018     /// Note that order is irrelevant for the `byteset` parameter, and
1019     /// duplicate bytes present in its body are ignored.
1020     ///
1021     /// # Complexity
1022     ///
1023     /// This routine is guaranteed to have worst case linear time complexity
1024     /// with respect to both the set of bytes and the haystack. That is, this
1025     /// runs in `O(byteset.len() + haystack.len())` time.
1026     ///
1027     /// This routine is also guaranteed to have worst case constant space
1028     /// complexity.
1029     ///
1030     /// # Examples
1031     ///
1032     /// Basic usage:
1033     ///
1034     /// ```
1035     /// use bstr::ByteSlice;
1036     ///
1037     /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1038     /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1039     /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1040     /// ```
1041     #[inline]
1042     fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1043         byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1044     }
1045 
1046     /// Returns an iterator over the fields in a byte string, separated by
1047     /// contiguous whitespace.
1048     ///
1049     /// # Example
1050     ///
1051     /// Basic usage:
1052     ///
1053     /// ```
1054     /// use bstr::{B, ByteSlice};
1055     ///
1056     /// let s = B("  foo\tbar\t\u{2003}\nquux   \n");
1057     /// let fields: Vec<&[u8]> = s.fields().collect();
1058     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1059     /// ```
1060     ///
1061     /// A byte string consisting of just whitespace yields no elements:
1062     ///
1063     /// ```
1064     /// use bstr::{B, ByteSlice};
1065     ///
1066     /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
1067     /// ```
1068     #[inline]
1069     fn fields(&self) -> Fields<'_> {
1070         Fields::new(self.as_bytes())
1071     }
1072 
1073     /// Returns an iterator over the fields in a byte string, separated by
1074     /// contiguous codepoints satisfying the given predicate.
1075     ///
1076     /// If this byte string is not valid UTF-8, then the given closure will
1077     /// be called with a Unicode replacement codepoint when invalid UTF-8
1078     /// bytes are seen.
1079     ///
1080     /// # Example
1081     ///
1082     /// Basic usage:
1083     ///
1084     /// ```
1085     /// use bstr::{B, ByteSlice};
1086     ///
1087     /// let s = b"123foo999999bar1quux123456";
1088     /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1089     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1090     /// ```
1091     ///
1092     /// A byte string consisting of all codepoints satisfying the predicate
1093     /// yields no elements:
1094     ///
1095     /// ```
1096     /// use bstr::ByteSlice;
1097     ///
1098     /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1099     /// ```
1100     #[inline]
1101     fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
1102         FieldsWith::new(self.as_bytes(), f)
1103     }
1104 
1105     /// Returns an iterator over substrings of this byte string, separated
1106     /// by the given byte string. Each element yielded is guaranteed not to
1107     /// include the splitter substring.
1108     ///
1109     /// The splitter may be any type that can be cheaply converted into a
1110     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1111     ///
1112     /// # Examples
1113     ///
1114     /// Basic usage:
1115     ///
1116     /// ```
1117     /// use bstr::{B, ByteSlice};
1118     ///
1119     /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1120     /// assert_eq!(x, vec![
1121     ///     B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1122     /// ]);
1123     ///
1124     /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1125     /// assert_eq!(x, vec![b""]);
1126     ///
1127     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1128     /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1129     ///
1130     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1131     /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1132     /// ```
1133     ///
1134     /// If a string contains multiple contiguous separators, you will end up
1135     /// with empty strings yielded by the iterator:
1136     ///
1137     /// ```
1138     /// use bstr::{B, ByteSlice};
1139     ///
1140     /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1141     /// assert_eq!(x, vec![
1142     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1143     /// ]);
1144     ///
1145     /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1146     /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1147     /// ```
1148     ///
1149     /// Separators at the start or end of a string are neighbored by empty
1150     /// strings.
1151     ///
1152     /// ```
1153     /// use bstr::{B, ByteSlice};
1154     ///
1155     /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1156     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1157     /// ```
1158     ///
1159     /// When the empty string is used as a separator, it splits every **byte**
1160     /// in the byte string, along with the beginning and end of the byte
1161     /// string.
1162     ///
1163     /// ```
1164     /// use bstr::{B, ByteSlice};
1165     ///
1166     /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1167     /// assert_eq!(x, vec![
1168     ///     B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1169     /// ]);
1170     ///
1171     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1172     /// // may not be valid UTF-8!
1173     /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1174     /// assert_eq!(x, vec![
1175     ///     B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1176     /// ]);
1177     /// ```
1178     ///
1179     /// Contiguous separators, especially whitespace, can lead to possibly
1180     /// surprising behavior. For example, this code is correct:
1181     ///
1182     /// ```
1183     /// use bstr::{B, ByteSlice};
1184     ///
1185     /// let x: Vec<&[u8]> = b"    a  b c".split_str(" ").collect();
1186     /// assert_eq!(x, vec![
1187     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1188     /// ]);
1189     /// ```
1190     ///
1191     /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1192     /// [`fields`](#method.fields) instead.
1193     #[inline]
1194     fn split_str<'a, B: ?Sized + AsRef<[u8]>>(
1195         &'a self,
1196         splitter: &'a B,
1197     ) -> Split<'a> {
1198         Split::new(self.as_bytes(), splitter.as_ref())
1199     }
1200 
1201     /// Returns an iterator over substrings of this byte string, separated by
1202     /// the given byte string, in reverse. Each element yielded is guaranteed
1203     /// not to include the splitter substring.
1204     ///
1205     /// The splitter may be any type that can be cheaply converted into a
1206     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1207     ///
1208     /// # Examples
1209     ///
1210     /// Basic usage:
1211     ///
1212     /// ```
1213     /// use bstr::{B, ByteSlice};
1214     ///
1215     /// let x: Vec<&[u8]> =
1216     ///     b"Mary had a little lamb".rsplit_str(" ").collect();
1217     /// assert_eq!(x, vec![
1218     ///     B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1219     /// ]);
1220     ///
1221     /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1222     /// assert_eq!(x, vec![b""]);
1223     ///
1224     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1225     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1226     ///
1227     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1228     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1229     /// ```
1230     ///
1231     /// If a string contains multiple contiguous separators, you will end up
1232     /// with empty strings yielded by the iterator:
1233     ///
1234     /// ```
1235     /// use bstr::{B, ByteSlice};
1236     ///
1237     /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1238     /// assert_eq!(x, vec![
1239     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1240     /// ]);
1241     ///
1242     /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1243     /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1244     /// ```
1245     ///
1246     /// Separators at the start or end of a string are neighbored by empty
1247     /// strings.
1248     ///
1249     /// ```
1250     /// use bstr::{B, ByteSlice};
1251     ///
1252     /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1253     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1254     /// ```
1255     ///
1256     /// When the empty string is used as a separator, it splits every **byte**
1257     /// in the byte string, along with the beginning and end of the byte
1258     /// string.
1259     ///
1260     /// ```
1261     /// use bstr::{B, ByteSlice};
1262     ///
1263     /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1264     /// assert_eq!(x, vec![
1265     ///     B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1266     /// ]);
1267     ///
1268     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1269     /// // may not be valid UTF-8!
1270     /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1271     /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1272     /// ```
1273     ///
1274     /// Contiguous separators, especially whitespace, can lead to possibly
1275     /// surprising behavior. For example, this code is correct:
1276     ///
1277     /// ```
1278     /// use bstr::{B, ByteSlice};
1279     ///
1280     /// let x: Vec<&[u8]> = b"    a  b c".rsplit_str(" ").collect();
1281     /// assert_eq!(x, vec![
1282     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1283     /// ]);
1284     /// ```
1285     ///
1286     /// It does *not* give you `["a", "b", "c"]`.
1287     #[inline]
1288     fn rsplit_str<'a, B: ?Sized + AsRef<[u8]>>(
1289         &'a self,
1290         splitter: &'a B,
1291     ) -> SplitReverse<'a> {
1292         SplitReverse::new(self.as_bytes(), splitter.as_ref())
1293     }
1294 
1295     /// Returns an iterator of at most `limit` substrings of this byte string,
1296     /// separated by the given byte string. If `limit` substrings are yielded,
1297     /// then the last substring will contain the remainder of this byte string.
1298     ///
1299     /// The needle may be any type that can be cheaply converted into a
1300     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1301     ///
1302     /// # Examples
1303     ///
1304     /// Basic usage:
1305     ///
1306     /// ```
1307     /// use bstr::{B, ByteSlice};
1308     ///
1309     /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1310     /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1311     ///
1312     /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1313     /// assert_eq!(x, vec![b""]);
1314     ///
1315     /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1316     /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1317     ///
1318     /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1319     /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1320     ///
1321     /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1322     /// assert_eq!(x, vec![B("abcXdef")]);
1323     ///
1324     /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1325     /// assert_eq!(x, vec![B("abcdef")]);
1326     ///
1327     /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1328     /// assert!(x.is_empty());
1329     /// ```
1330     #[inline]
1331     fn splitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1332         &'a self,
1333         limit: usize,
1334         splitter: &'a B,
1335     ) -> SplitN<'a> {
1336         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1337     }
1338 
1339     /// Returns an iterator of at most `limit` substrings of this byte string,
1340     /// separated by the given byte string, in reverse. If `limit` substrings
1341     /// are yielded, then the last substring will contain the remainder of this
1342     /// byte string.
1343     ///
1344     /// The needle may be any type that can be cheaply converted into a
1345     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1346     ///
1347     /// # Examples
1348     ///
1349     /// Basic usage:
1350     ///
1351     /// ```
1352     /// use bstr::{B, ByteSlice};
1353     ///
1354     /// let x: Vec<_> =
1355     ///     b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1356     /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1357     ///
1358     /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1359     /// assert_eq!(x, vec![b""]);
1360     ///
1361     /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1362     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1363     ///
1364     /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1365     /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1366     ///
1367     /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1368     /// assert_eq!(x, vec![B("abcXdef")]);
1369     ///
1370     /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1371     /// assert_eq!(x, vec![B("abcdef")]);
1372     ///
1373     /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1374     /// assert!(x.is_empty());
1375     /// ```
1376     #[inline]
1377     fn rsplitn_str<'a, B: ?Sized + AsRef<[u8]>>(
1378         &'a self,
1379         limit: usize,
1380         splitter: &'a B,
1381     ) -> SplitNReverse<'a> {
1382         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1383     }
1384 
1385     /// Replace all matches of the given needle with the given replacement, and
1386     /// the result as a new `Vec<u8>`.
1387     ///
1388     /// This routine is useful as a convenience. If you need to reuse an
1389     /// allocation, use [`replace_into`](#method.replace_into) instead.
1390     ///
1391     /// # Examples
1392     ///
1393     /// Basic usage:
1394     ///
1395     /// ```
1396     /// use bstr::ByteSlice;
1397     ///
1398     /// let s = b"this is old".replace("old", "new");
1399     /// assert_eq!(s, "this is new".as_bytes());
1400     /// ```
1401     ///
1402     /// When the pattern doesn't match:
1403     ///
1404     /// ```
1405     /// use bstr::ByteSlice;
1406     ///
1407     /// let s = b"this is old".replace("nada nada", "limonada");
1408     /// assert_eq!(s, "this is old".as_bytes());
1409     /// ```
1410     ///
1411     /// When the needle is an empty string:
1412     ///
1413     /// ```
1414     /// use bstr::ByteSlice;
1415     ///
1416     /// let s = b"foo".replace("", "Z");
1417     /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1418     /// ```
1419     #[cfg(feature = "std")]
1420     #[inline]
1421     fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1422         &self,
1423         needle: N,
1424         replacement: R,
1425     ) -> Vec<u8> {
1426         let mut dest = Vec::with_capacity(self.as_bytes().len());
1427         self.replace_into(needle, replacement, &mut dest);
1428         dest
1429     }
1430 
1431     /// Replace up to `limit` matches of the given needle with the given
1432     /// replacement, and the result as a new `Vec<u8>`.
1433     ///
1434     /// This routine is useful as a convenience. If you need to reuse an
1435     /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1436     ///
1437     /// # Examples
1438     ///
1439     /// Basic usage:
1440     ///
1441     /// ```
1442     /// use bstr::ByteSlice;
1443     ///
1444     /// let s = b"foofoo".replacen("o", "z", 2);
1445     /// assert_eq!(s, "fzzfoo".as_bytes());
1446     /// ```
1447     ///
1448     /// When the pattern doesn't match:
1449     ///
1450     /// ```
1451     /// use bstr::ByteSlice;
1452     ///
1453     /// let s = b"foofoo".replacen("a", "z", 2);
1454     /// assert_eq!(s, "foofoo".as_bytes());
1455     /// ```
1456     ///
1457     /// When the needle is an empty string:
1458     ///
1459     /// ```
1460     /// use bstr::ByteSlice;
1461     ///
1462     /// let s = b"foo".replacen("", "Z", 2);
1463     /// assert_eq!(s, "ZfZoo".as_bytes());
1464     /// ```
1465     #[cfg(feature = "std")]
1466     #[inline]
1467     fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1468         &self,
1469         needle: N,
1470         replacement: R,
1471         limit: usize,
1472     ) -> Vec<u8> {
1473         let mut dest = Vec::with_capacity(self.as_bytes().len());
1474         self.replacen_into(needle, replacement, limit, &mut dest);
1475         dest
1476     }
1477 
1478     /// Replace all matches of the given needle with the given replacement,
1479     /// and write the result into the provided `Vec<u8>`.
1480     ///
1481     /// This does **not** clear `dest` before writing to it.
1482     ///
1483     /// This routine is useful for reusing allocation. For a more convenient
1484     /// API, use [`replace`](#method.replace) instead.
1485     ///
1486     /// # Examples
1487     ///
1488     /// Basic usage:
1489     ///
1490     /// ```
1491     /// use bstr::ByteSlice;
1492     ///
1493     /// let s = b"this is old";
1494     ///
1495     /// let mut dest = vec![];
1496     /// s.replace_into("old", "new", &mut dest);
1497     /// assert_eq!(dest, "this is new".as_bytes());
1498     /// ```
1499     ///
1500     /// When the pattern doesn't match:
1501     ///
1502     /// ```
1503     /// use bstr::ByteSlice;
1504     ///
1505     /// let s = b"this is old";
1506     ///
1507     /// let mut dest = vec![];
1508     /// s.replace_into("nada nada", "limonada", &mut dest);
1509     /// assert_eq!(dest, "this is old".as_bytes());
1510     /// ```
1511     ///
1512     /// When the needle is an empty string:
1513     ///
1514     /// ```
1515     /// use bstr::ByteSlice;
1516     ///
1517     /// let s = b"foo";
1518     ///
1519     /// let mut dest = vec![];
1520     /// s.replace_into("", "Z", &mut dest);
1521     /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1522     /// ```
1523     #[cfg(feature = "std")]
1524     #[inline]
1525     fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1526         &self,
1527         needle: N,
1528         replacement: R,
1529         dest: &mut Vec<u8>,
1530     ) {
1531         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1532 
1533         let mut last = 0;
1534         for start in self.find_iter(needle) {
1535             dest.push_str(&self.as_bytes()[last..start]);
1536             dest.push_str(replacement);
1537             last = start + needle.len();
1538         }
1539         dest.push_str(&self.as_bytes()[last..]);
1540     }
1541 
1542     /// Replace up to `limit` matches of the given needle with the given
1543     /// replacement, and write the result into the provided `Vec<u8>`.
1544     ///
1545     /// This does **not** clear `dest` before writing to it.
1546     ///
1547     /// This routine is useful for reusing allocation. For a more convenient
1548     /// API, use [`replacen`](#method.replacen) instead.
1549     ///
1550     /// # Examples
1551     ///
1552     /// Basic usage:
1553     ///
1554     /// ```
1555     /// use bstr::ByteSlice;
1556     ///
1557     /// let s = b"foofoo";
1558     ///
1559     /// let mut dest = vec![];
1560     /// s.replacen_into("o", "z", 2, &mut dest);
1561     /// assert_eq!(dest, "fzzfoo".as_bytes());
1562     /// ```
1563     ///
1564     /// When the pattern doesn't match:
1565     ///
1566     /// ```
1567     /// use bstr::ByteSlice;
1568     ///
1569     /// let s = b"foofoo";
1570     ///
1571     /// let mut dest = vec![];
1572     /// s.replacen_into("a", "z", 2, &mut dest);
1573     /// assert_eq!(dest, "foofoo".as_bytes());
1574     /// ```
1575     ///
1576     /// When the needle is an empty string:
1577     ///
1578     /// ```
1579     /// use bstr::ByteSlice;
1580     ///
1581     /// let s = b"foo";
1582     ///
1583     /// let mut dest = vec![];
1584     /// s.replacen_into("", "Z", 2, &mut dest);
1585     /// assert_eq!(dest, "ZfZoo".as_bytes());
1586     /// ```
1587     #[cfg(feature = "std")]
1588     #[inline]
1589     fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1590         &self,
1591         needle: N,
1592         replacement: R,
1593         limit: usize,
1594         dest: &mut Vec<u8>,
1595     ) {
1596         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1597 
1598         let mut last = 0;
1599         for start in self.find_iter(needle).take(limit) {
1600             dest.push_str(&self.as_bytes()[last..start]);
1601             dest.push_str(replacement);
1602             last = start + needle.len();
1603         }
1604         dest.push_str(&self.as_bytes()[last..]);
1605     }
1606 
1607     /// Returns an iterator over the bytes in this byte string.
1608     ///
1609     /// # Examples
1610     ///
1611     /// Basic usage:
1612     ///
1613     /// ```
1614     /// use bstr::ByteSlice;
1615     ///
1616     /// let bs = b"foobar";
1617     /// let bytes: Vec<u8> = bs.bytes().collect();
1618     /// assert_eq!(bytes, bs);
1619     /// ```
1620     #[inline]
1621     fn bytes(&self) -> Bytes<'_> {
1622         Bytes { it: self.as_bytes().iter() }
1623     }
1624 
1625     /// Returns an iterator over the Unicode scalar values in this byte string.
1626     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1627     /// is yielded instead.
1628     ///
1629     /// # Examples
1630     ///
1631     /// Basic usage:
1632     ///
1633     /// ```
1634     /// use bstr::ByteSlice;
1635     ///
1636     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1637     /// let chars: Vec<char> = bs.chars().collect();
1638     /// assert_eq!(vec!['☃', '\u{FFFD}', '��', '\u{FFFD}', 'a'], chars);
1639     /// ```
1640     ///
1641     /// Codepoints can also be iterated over in reverse:
1642     ///
1643     /// ```
1644     /// use bstr::ByteSlice;
1645     ///
1646     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1647     /// let chars: Vec<char> = bs.chars().rev().collect();
1648     /// assert_eq!(vec!['a', '\u{FFFD}', '��', '\u{FFFD}', '☃'], chars);
1649     /// ```
1650     #[inline]
1651     fn chars(&self) -> Chars<'_> {
1652         Chars::new(self.as_bytes())
1653     }
1654 
1655     /// Returns an iterator over the Unicode scalar values in this byte string
1656     /// along with their starting and ending byte index positions. If invalid
1657     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1658     /// instead.
1659     ///
1660     /// Note that this is slightly different from the `CharIndices` iterator
1661     /// provided by the standard library. Aside from working on possibly
1662     /// invalid UTF-8, this iterator provides both the corresponding starting
1663     /// and ending byte indices of each codepoint yielded. The ending position
1664     /// is necessary to slice the original byte string when invalid UTF-8 bytes
1665     /// are converted into a Unicode replacement codepoint, since a single
1666     /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1667     /// (inclusive).
1668     ///
1669     /// # Examples
1670     ///
1671     /// Basic usage:
1672     ///
1673     /// ```
1674     /// use bstr::ByteSlice;
1675     ///
1676     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1677     /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1678     /// assert_eq!(chars, vec![
1679     ///     (0, 3, '☃'),
1680     ///     (3, 4, '\u{FFFD}'),
1681     ///     (4, 8, '��'),
1682     ///     (8, 10, '\u{FFFD}'),
1683     ///     (10, 11, 'a'),
1684     /// ]);
1685     /// ```
1686     ///
1687     /// Codepoints can also be iterated over in reverse:
1688     ///
1689     /// ```
1690     /// use bstr::ByteSlice;
1691     ///
1692     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1693     /// let chars: Vec<(usize, usize, char)> = bs
1694     ///     .char_indices()
1695     ///     .rev()
1696     ///     .collect();
1697     /// assert_eq!(chars, vec![
1698     ///     (10, 11, 'a'),
1699     ///     (8, 10, '\u{FFFD}'),
1700     ///     (4, 8, '��'),
1701     ///     (3, 4, '\u{FFFD}'),
1702     ///     (0, 3, '☃'),
1703     /// ]);
1704     /// ```
1705     #[inline]
1706     fn char_indices(&self) -> CharIndices<'_> {
1707         CharIndices::new(self.as_bytes())
1708     }
1709 
1710     /// Iterate over chunks of valid UTF-8.
1711     ///
1712     /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1713     /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1714     /// which are determined via the "substitution of maximal subparts"
1715     /// strategy described in the docs for the
1716     /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1717     /// method.
1718     ///
1719     /// # Examples
1720     ///
1721     /// This example shows how to gather all valid and invalid chunks from a
1722     /// byte slice:
1723     ///
1724     /// ```
1725     /// use bstr::{ByteSlice, Utf8Chunk};
1726     ///
1727     /// let bytes = b"foo\xFD\xFEbar\xFF";
1728     ///
1729     /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1730     /// for chunk in bytes.utf8_chunks() {
1731     ///     if !chunk.valid().is_empty() {
1732     ///         valid_chunks.push(chunk.valid());
1733     ///     }
1734     ///     if !chunk.invalid().is_empty() {
1735     ///         invalid_chunks.push(chunk.invalid());
1736     ///     }
1737     /// }
1738     ///
1739     /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1740     /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1741     /// ```
1742     #[inline]
1743     fn utf8_chunks(&self) -> Utf8Chunks<'_> {
1744         Utf8Chunks { bytes: self.as_bytes() }
1745     }
1746 
1747     /// Returns an iterator over the grapheme clusters in this byte string.
1748     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1749     /// is yielded instead.
1750     ///
1751     /// # Examples
1752     ///
1753     /// This example shows how multiple codepoints can combine to form a
1754     /// single grapheme cluster:
1755     ///
1756     /// ```
1757     /// use bstr::ByteSlice;
1758     ///
1759     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1760     /// let graphemes: Vec<&str> = bs.graphemes().collect();
1761     /// assert_eq!(vec!["à̖", "����"], graphemes);
1762     /// ```
1763     ///
1764     /// This shows that graphemes can be iterated over in reverse:
1765     ///
1766     /// ```
1767     /// use bstr::ByteSlice;
1768     ///
1769     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1770     /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1771     /// assert_eq!(vec!["����", "à̖"], graphemes);
1772     /// ```
1773     #[cfg(feature = "unicode")]
1774     #[inline]
1775     fn graphemes(&self) -> Graphemes<'_> {
1776         Graphemes::new(self.as_bytes())
1777     }
1778 
1779     /// Returns an iterator over the grapheme clusters in this byte string
1780     /// along with their starting and ending byte index positions. If invalid
1781     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1782     /// instead.
1783     ///
1784     /// # Examples
1785     ///
1786     /// This example shows how to get the byte offsets of each individual
1787     /// grapheme cluster:
1788     ///
1789     /// ```
1790     /// use bstr::ByteSlice;
1791     ///
1792     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1793     /// let graphemes: Vec<(usize, usize, &str)> =
1794     ///     bs.grapheme_indices().collect();
1795     /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "����")], graphemes);
1796     /// ```
1797     ///
1798     /// This example shows what happens when invalid UTF-8 is enountered. Note
1799     /// that the offsets are valid indices into the original string, and do
1800     /// not necessarily correspond to the length of the `&str` returned!
1801     ///
1802     /// ```
1803     /// use bstr::{ByteSlice, ByteVec};
1804     ///
1805     /// let mut bytes = vec![];
1806     /// bytes.push_str("a\u{0300}\u{0316}");
1807     /// bytes.push(b'\xFF');
1808     /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1809     ///
1810     /// let graphemes: Vec<(usize, usize, &str)> =
1811     ///     bytes.grapheme_indices().collect();
1812     /// assert_eq!(
1813     ///     graphemes,
1814     ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "����")]
1815     /// );
1816     /// ```
1817     #[cfg(feature = "unicode")]
1818     #[inline]
1819     fn grapheme_indices(&self) -> GraphemeIndices<'_> {
1820         GraphemeIndices::new(self.as_bytes())
1821     }
1822 
1823     /// Returns an iterator over the words in this byte string. If invalid
1824     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1825     /// instead.
1826     ///
1827     /// This is similar to
1828     /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1829     /// except it only returns elements that contain a "word" character. A word
1830     /// character is defined by UTS #18 (Annex C) to be the combination of the
1831     /// `Alphabetic` and `Join_Control` properties, along with the
1832     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1833     /// categories.
1834     ///
1835     /// Since words are made up of one or more codepoints, this iterator
1836     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1837     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1838     ///
1839     /// # Examples
1840     ///
1841     /// Basic usage:
1842     ///
1843     /// ```
1844     /// use bstr::ByteSlice;
1845     ///
1846     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1847     /// let words: Vec<&str> = bs.words().collect();
1848     /// assert_eq!(words, vec![
1849     ///     "The", "quick", "brown", "fox", "can't",
1850     ///     "jump", "32.3", "feet", "right",
1851     /// ]);
1852     /// ```
1853     #[cfg(feature = "unicode")]
1854     #[inline]
1855     fn words(&self) -> Words<'_> {
1856         Words::new(self.as_bytes())
1857     }
1858 
1859     /// Returns an iterator over the words in this byte string along with
1860     /// their starting and ending byte index positions.
1861     ///
1862     /// This is similar to
1863     /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1864     /// except it only returns elements that contain a "word" character. A word
1865     /// character is defined by UTS #18 (Annex C) to be the combination of the
1866     /// `Alphabetic` and `Join_Control` properties, along with the
1867     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1868     /// categories.
1869     ///
1870     /// Since words are made up of one or more codepoints, this iterator
1871     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1872     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1873     ///
1874     /// # Examples
1875     ///
1876     /// This example shows how to get the byte offsets of each individual
1877     /// word:
1878     ///
1879     /// ```
1880     /// use bstr::ByteSlice;
1881     ///
1882     /// let bs = b"can't jump 32.3 feet";
1883     /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
1884     /// assert_eq!(words, vec![
1885     ///     (0, 5, "can't"),
1886     ///     (6, 10, "jump"),
1887     ///     (11, 15, "32.3"),
1888     ///     (16, 20, "feet"),
1889     /// ]);
1890     /// ```
1891     #[cfg(feature = "unicode")]
1892     #[inline]
1893     fn word_indices(&self) -> WordIndices<'_> {
1894         WordIndices::new(self.as_bytes())
1895     }
1896 
1897     /// Returns an iterator over the words in this byte string, along with
1898     /// all breaks between the words. Concatenating all elements yielded by
1899     /// the iterator results in the original string (modulo Unicode replacement
1900     /// codepoint substitutions if invalid UTF-8 is encountered).
1901     ///
1902     /// Since words are made up of one or more codepoints, this iterator
1903     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1904     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1905     ///
1906     /// # Examples
1907     ///
1908     /// Basic usage:
1909     ///
1910     /// ```
1911     /// use bstr::ByteSlice;
1912     ///
1913     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1914     /// let words: Vec<&str> = bs.words_with_breaks().collect();
1915     /// assert_eq!(words, vec![
1916     ///     "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
1917     ///     " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
1918     ///     ",", " ", "right", "?",
1919     /// ]);
1920     /// ```
1921     #[cfg(feature = "unicode")]
1922     #[inline]
1923     fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
1924         WordsWithBreaks::new(self.as_bytes())
1925     }
1926 
1927     /// Returns an iterator over the words and their byte offsets in this
1928     /// byte string, along with all breaks between the words. Concatenating
1929     /// all elements yielded by the iterator results in the original string
1930     /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
1931     /// encountered).
1932     ///
1933     /// Since words are made up of one or more codepoints, this iterator
1934     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1935     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1936     ///
1937     /// # Examples
1938     ///
1939     /// This example shows how to get the byte offsets of each individual
1940     /// word:
1941     ///
1942     /// ```
1943     /// use bstr::ByteSlice;
1944     ///
1945     /// let bs = b"can't jump 32.3 feet";
1946     /// let words: Vec<(usize, usize, &str)> =
1947     ///     bs.words_with_break_indices().collect();
1948     /// assert_eq!(words, vec![
1949     ///     (0, 5, "can't"),
1950     ///     (5, 6, " "),
1951     ///     (6, 10, "jump"),
1952     ///     (10, 11, " "),
1953     ///     (11, 15, "32.3"),
1954     ///     (15, 16, " "),
1955     ///     (16, 20, "feet"),
1956     /// ]);
1957     /// ```
1958     #[cfg(feature = "unicode")]
1959     #[inline]
1960     fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
1961         WordsWithBreakIndices::new(self.as_bytes())
1962     }
1963 
1964     /// Returns an iterator over the sentences in this byte string.
1965     ///
1966     /// Typically, a sentence will include its trailing punctuation and
1967     /// whitespace. Concatenating all elements yielded by the iterator
1968     /// results in the original string (modulo Unicode replacement codepoint
1969     /// substitutions if invalid UTF-8 is encountered).
1970     ///
1971     /// Since sentences are made up of one or more codepoints, this iterator
1972     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1973     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1974     ///
1975     /// # Examples
1976     ///
1977     /// Basic usage:
1978     ///
1979     /// ```
1980     /// use bstr::ByteSlice;
1981     ///
1982     /// let bs = b"I want this. Not that. Right now.";
1983     /// let sentences: Vec<&str> = bs.sentences().collect();
1984     /// assert_eq!(sentences, vec![
1985     ///     "I want this. ",
1986     ///     "Not that. ",
1987     ///     "Right now.",
1988     /// ]);
1989     /// ```
1990     #[cfg(feature = "unicode")]
1991     #[inline]
1992     fn sentences(&self) -> Sentences<'_> {
1993         Sentences::new(self.as_bytes())
1994     }
1995 
1996     /// Returns an iterator over the sentences in this byte string along with
1997     /// their starting and ending byte index positions.
1998     ///
1999     /// Typically, a sentence will include its trailing punctuation and
2000     /// whitespace. Concatenating all elements yielded by the iterator
2001     /// results in the original string (modulo Unicode replacement codepoint
2002     /// substitutions if invalid UTF-8 is encountered).
2003     ///
2004     /// Since sentences are made up of one or more codepoints, this iterator
2005     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2006     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2007     ///
2008     /// # Examples
2009     ///
2010     /// Basic usage:
2011     ///
2012     /// ```
2013     /// use bstr::ByteSlice;
2014     ///
2015     /// let bs = b"I want this. Not that. Right now.";
2016     /// let sentences: Vec<(usize, usize, &str)> =
2017     ///     bs.sentence_indices().collect();
2018     /// assert_eq!(sentences, vec![
2019     ///     (0, 13, "I want this. "),
2020     ///     (13, 23, "Not that. "),
2021     ///     (23, 33, "Right now."),
2022     /// ]);
2023     /// ```
2024     #[cfg(feature = "unicode")]
2025     #[inline]
2026     fn sentence_indices(&self) -> SentenceIndices<'_> {
2027         SentenceIndices::new(self.as_bytes())
2028     }
2029 
2030     /// An iterator over all lines in a byte string, without their
2031     /// terminators.
2032     ///
2033     /// For this iterator, the only line terminators recognized are `\r\n` and
2034     /// `\n`.
2035     ///
2036     /// # Examples
2037     ///
2038     /// Basic usage:
2039     ///
2040     /// ```
2041     /// use bstr::{B, ByteSlice};
2042     ///
2043     /// let s = b"\
2044     /// foo
2045     ///
2046     /// bar\r
2047     /// baz
2048     ///
2049     ///
2050     /// quux";
2051     /// let lines: Vec<&[u8]> = s.lines().collect();
2052     /// assert_eq!(lines, vec![
2053     ///     B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2054     /// ]);
2055     /// ```
2056     #[inline]
2057     fn lines(&self) -> Lines<'_> {
2058         Lines::new(self.as_bytes())
2059     }
2060 
2061     /// An iterator over all lines in a byte string, including their
2062     /// terminators.
2063     ///
2064     /// For this iterator, the only line terminator recognized is `\n`. (Since
2065     /// line terminators are included, this also handles `\r\n` line endings.)
2066     ///
2067     /// Line terminators are only included if they are present in the original
2068     /// byte string. For example, the last line in a byte string may not end
2069     /// with a line terminator.
2070     ///
2071     /// Concatenating all elements yielded by this iterator is guaranteed to
2072     /// yield the original byte string.
2073     ///
2074     /// # Examples
2075     ///
2076     /// Basic usage:
2077     ///
2078     /// ```
2079     /// use bstr::{B, ByteSlice};
2080     ///
2081     /// let s = b"\
2082     /// foo
2083     ///
2084     /// bar\r
2085     /// baz
2086     ///
2087     ///
2088     /// quux";
2089     /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2090     /// assert_eq!(lines, vec![
2091     ///     B("foo\n"),
2092     ///     B("\n"),
2093     ///     B("bar\r\n"),
2094     ///     B("baz\n"),
2095     ///     B("\n"),
2096     ///     B("\n"),
2097     ///     B("quux"),
2098     /// ]);
2099     /// ```
2100     #[inline]
2101     fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
2102         LinesWithTerminator::new(self.as_bytes())
2103     }
2104 
2105     /// Return a byte string slice with leading and trailing whitespace
2106     /// removed.
2107     ///
2108     /// Whitespace is defined according to the terms of the `White_Space`
2109     /// Unicode property.
2110     ///
2111     /// # Examples
2112     ///
2113     /// Basic usage:
2114     ///
2115     /// ```
2116     /// use bstr::{B, ByteSlice};
2117     ///
2118     /// let s = B(" foo\tbar\t\u{2003}\n");
2119     /// assert_eq!(s.trim(), B("foo\tbar"));
2120     /// ```
2121     #[cfg(feature = "unicode")]
2122     #[inline]
2123     fn trim(&self) -> &[u8] {
2124         self.trim_start().trim_end()
2125     }
2126 
2127     /// Return a byte string slice with leading whitespace removed.
2128     ///
2129     /// Whitespace is defined according to the terms of the `White_Space`
2130     /// Unicode property.
2131     ///
2132     /// # Examples
2133     ///
2134     /// Basic usage:
2135     ///
2136     /// ```
2137     /// use bstr::{B, ByteSlice};
2138     ///
2139     /// let s = B(" foo\tbar\t\u{2003}\n");
2140     /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2141     /// ```
2142     #[cfg(feature = "unicode")]
2143     #[inline]
2144     fn trim_start(&self) -> &[u8] {
2145         let start = whitespace_len_fwd(self.as_bytes());
2146         &self.as_bytes()[start..]
2147     }
2148 
2149     /// Return a byte string slice with trailing whitespace removed.
2150     ///
2151     /// Whitespace is defined according to the terms of the `White_Space`
2152     /// Unicode property.
2153     ///
2154     /// # Examples
2155     ///
2156     /// Basic usage:
2157     ///
2158     /// ```
2159     /// use bstr::{B, ByteSlice};
2160     ///
2161     /// let s = B(" foo\tbar\t\u{2003}\n");
2162     /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2163     /// ```
2164     #[cfg(feature = "unicode")]
2165     #[inline]
2166     fn trim_end(&self) -> &[u8] {
2167         let end = whitespace_len_rev(self.as_bytes());
2168         &self.as_bytes()[..end]
2169     }
2170 
2171     /// Return a byte string slice with leading and trailing characters
2172     /// satisfying the given predicate removed.
2173     ///
2174     /// # Examples
2175     ///
2176     /// Basic usage:
2177     ///
2178     /// ```
2179     /// use bstr::{B, ByteSlice};
2180     ///
2181     /// let s = b"123foo5bar789";
2182     /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2183     /// ```
2184     #[inline]
2185     fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2186         self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2187     }
2188 
2189     /// Return a byte string slice with leading characters satisfying the given
2190     /// predicate removed.
2191     ///
2192     /// # Examples
2193     ///
2194     /// Basic usage:
2195     ///
2196     /// ```
2197     /// use bstr::{B, ByteSlice};
2198     ///
2199     /// let s = b"123foo5bar789";
2200     /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2201     /// ```
2202     #[inline]
2203     fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2204         for (s, _, ch) in self.char_indices() {
2205             if !trim(ch) {
2206                 return &self.as_bytes()[s..];
2207             }
2208         }
2209         b""
2210     }
2211 
2212     /// Return a byte string slice with trailing characters satisfying the
2213     /// given predicate removed.
2214     ///
2215     /// # Examples
2216     ///
2217     /// Basic usage:
2218     ///
2219     /// ```
2220     /// use bstr::{B, ByteSlice};
2221     ///
2222     /// let s = b"123foo5bar789";
2223     /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2224     /// ```
2225     #[inline]
2226     fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2227         for (_, e, ch) in self.char_indices().rev() {
2228             if !trim(ch) {
2229                 return &self.as_bytes()[..e];
2230             }
2231         }
2232         b""
2233     }
2234 
2235     /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2236     /// byte string.
2237     ///
2238     /// In this case, lowercase is defined according to the `Lowercase` Unicode
2239     /// property.
2240     ///
2241     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2242     /// then it is written to the given buffer unchanged.
2243     ///
2244     /// Note that some characters in this byte string may expand into multiple
2245     /// characters when changing the case, so the number of bytes written to
2246     /// the given byte string may not be equivalent to the number of bytes in
2247     /// this byte string.
2248     ///
2249     /// If you'd like to reuse an allocation for performance reasons, then use
2250     /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2251     ///
2252     /// # Examples
2253     ///
2254     /// Basic usage:
2255     ///
2256     /// ```
2257     /// use bstr::{B, ByteSlice};
2258     ///
2259     /// let s = B("HELLO Β");
2260     /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2261     /// ```
2262     ///
2263     /// Scripts without case are not changed:
2264     ///
2265     /// ```
2266     /// use bstr::{B, ByteSlice};
2267     ///
2268     /// let s = B("农历新年");
2269     /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2270     /// ```
2271     ///
2272     /// Invalid UTF-8 remains as is:
2273     ///
2274     /// ```
2275     /// use bstr::{B, ByteSlice};
2276     ///
2277     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2278     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2279     /// ```
2280     #[cfg(all(feature = "std", feature = "unicode"))]
2281     #[inline]
2282     fn to_lowercase(&self) -> Vec<u8> {
2283         let mut buf = vec![];
2284         self.to_lowercase_into(&mut buf);
2285         buf
2286     }
2287 
2288     /// Writes the lowercase equivalent of this byte string into the given
2289     /// buffer. The buffer is not cleared before written to.
2290     ///
2291     /// In this case, lowercase is defined according to the `Lowercase`
2292     /// Unicode property.
2293     ///
2294     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2295     /// then it is written to the given buffer unchanged.
2296     ///
2297     /// Note that some characters in this byte string may expand into multiple
2298     /// characters when changing the case, so the number of bytes written to
2299     /// the given byte string may not be equivalent to the number of bytes in
2300     /// this byte string.
2301     ///
2302     /// If you don't need to amortize allocation and instead prefer
2303     /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2304     ///
2305     /// # Examples
2306     ///
2307     /// Basic usage:
2308     ///
2309     /// ```
2310     /// use bstr::{B, ByteSlice};
2311     ///
2312     /// let s = B("HELLO Β");
2313     ///
2314     /// let mut buf = vec![];
2315     /// s.to_lowercase_into(&mut buf);
2316     /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2317     /// ```
2318     ///
2319     /// Scripts without case are not changed:
2320     ///
2321     /// ```
2322     /// use bstr::{B, ByteSlice};
2323     ///
2324     /// let s = B("农历新年");
2325     ///
2326     /// let mut buf = vec![];
2327     /// s.to_lowercase_into(&mut buf);
2328     /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2329     /// ```
2330     ///
2331     /// Invalid UTF-8 remains as is:
2332     ///
2333     /// ```
2334     /// use bstr::{B, ByteSlice};
2335     ///
2336     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2337     ///
2338     /// let mut buf = vec![];
2339     /// s.to_lowercase_into(&mut buf);
2340     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2341     /// ```
2342     #[cfg(all(feature = "std", feature = "unicode"))]
2343     #[inline]
2344     fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2345         // TODO: This is the best we can do given what std exposes I think.
2346         // If we roll our own case handling, then we might be able to do this
2347         // a bit faster. We shouldn't roll our own case handling unless we
2348         // need to, e.g., for doing caseless matching or case folding.
2349 
2350         // TODO(BUG): This doesn't handle any special casing rules.
2351 
2352         buf.reserve(self.as_bytes().len());
2353         for (s, e, ch) in self.char_indices() {
2354             if ch == '\u{FFFD}' {
2355                 buf.push_str(&self.as_bytes()[s..e]);
2356             } else if ch.is_ascii() {
2357                 buf.push_char(ch.to_ascii_lowercase());
2358             } else {
2359                 for upper in ch.to_lowercase() {
2360                     buf.push_char(upper);
2361                 }
2362             }
2363         }
2364     }
2365 
2366     /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2367     /// this byte string.
2368     ///
2369     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2370     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2371     /// In particular, the length of the byte string returned is always
2372     /// equivalent to the length of this byte string.
2373     ///
2374     /// If you'd like to reuse an allocation for performance reasons, then use
2375     /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2376     /// the conversion in place.
2377     ///
2378     /// # Examples
2379     ///
2380     /// Basic usage:
2381     ///
2382     /// ```
2383     /// use bstr::{B, ByteSlice};
2384     ///
2385     /// let s = B("HELLO Β");
2386     /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2387     /// ```
2388     ///
2389     /// Invalid UTF-8 remains as is:
2390     ///
2391     /// ```
2392     /// use bstr::{B, ByteSlice};
2393     ///
2394     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2395     /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2396     /// ```
2397     #[cfg(feature = "std")]
2398     #[inline]
2399     fn to_ascii_lowercase(&self) -> Vec<u8> {
2400         self.as_bytes().to_ascii_lowercase()
2401     }
2402 
2403     /// Convert this byte string to its lowercase ASCII equivalent in place.
2404     ///
2405     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2406     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2407     ///
2408     /// If you don't need to do the conversion in
2409     /// place and instead prefer convenience, then use
2410     /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2411     ///
2412     /// # Examples
2413     ///
2414     /// Basic usage:
2415     ///
2416     /// ```
2417     /// use bstr::ByteSlice;
2418     ///
2419     /// let mut s = <Vec<u8>>::from("HELLO Β");
2420     /// s.make_ascii_lowercase();
2421     /// assert_eq!(s, "hello Β".as_bytes());
2422     /// ```
2423     ///
2424     /// Invalid UTF-8 remains as is:
2425     ///
2426     /// ```
2427     /// use bstr::{B, ByteSlice, ByteVec};
2428     ///
2429     /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2430     /// s.make_ascii_lowercase();
2431     /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2432     /// ```
2433     #[inline]
2434     fn make_ascii_lowercase(&mut self) {
2435         self.as_bytes_mut().make_ascii_lowercase();
2436     }
2437 
2438     /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2439     /// byte string.
2440     ///
2441     /// In this case, uppercase is defined according to the `Uppercase`
2442     /// Unicode property.
2443     ///
2444     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2445     /// then it is written to the given buffer unchanged.
2446     ///
2447     /// Note that some characters in this byte string may expand into multiple
2448     /// characters when changing the case, so the number of bytes written to
2449     /// the given byte string may not be equivalent to the number of bytes in
2450     /// this byte string.
2451     ///
2452     /// If you'd like to reuse an allocation for performance reasons, then use
2453     /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2454     ///
2455     /// # Examples
2456     ///
2457     /// Basic usage:
2458     ///
2459     /// ```
2460     /// use bstr::{B, ByteSlice};
2461     ///
2462     /// let s = B("hello β");
2463     /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2464     /// ```
2465     ///
2466     /// Scripts without case are not changed:
2467     ///
2468     /// ```
2469     /// use bstr::{B, ByteSlice};
2470     ///
2471     /// let s = B("农历新年");
2472     /// assert_eq!(s.to_uppercase(), B("农历新年"));
2473     /// ```
2474     ///
2475     /// Invalid UTF-8 remains as is:
2476     ///
2477     /// ```
2478     /// use bstr::{B, ByteSlice};
2479     ///
2480     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2481     /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2482     /// ```
2483     #[cfg(all(feature = "std", feature = "unicode"))]
2484     #[inline]
2485     fn to_uppercase(&self) -> Vec<u8> {
2486         let mut buf = vec![];
2487         self.to_uppercase_into(&mut buf);
2488         buf
2489     }
2490 
2491     /// Writes the uppercase equivalent of this byte string into the given
2492     /// buffer. The buffer is not cleared before written to.
2493     ///
2494     /// In this case, uppercase is defined according to the `Uppercase`
2495     /// Unicode property.
2496     ///
2497     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2498     /// then it is written to the given buffer unchanged.
2499     ///
2500     /// Note that some characters in this byte string may expand into multiple
2501     /// characters when changing the case, so the number of bytes written to
2502     /// the given byte string may not be equivalent to the number of bytes in
2503     /// this byte string.
2504     ///
2505     /// If you don't need to amortize allocation and instead prefer
2506     /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2507     ///
2508     /// # Examples
2509     ///
2510     /// Basic usage:
2511     ///
2512     /// ```
2513     /// use bstr::{B, ByteSlice};
2514     ///
2515     /// let s = B("hello β");
2516     ///
2517     /// let mut buf = vec![];
2518     /// s.to_uppercase_into(&mut buf);
2519     /// assert_eq!(buf, B("HELLO Β"));
2520     /// ```
2521     ///
2522     /// Scripts without case are not changed:
2523     ///
2524     /// ```
2525     /// use bstr::{B, ByteSlice};
2526     ///
2527     /// let s = B("农历新年");
2528     ///
2529     /// let mut buf = vec![];
2530     /// s.to_uppercase_into(&mut buf);
2531     /// assert_eq!(buf, B("农历新年"));
2532     /// ```
2533     ///
2534     /// Invalid UTF-8 remains as is:
2535     ///
2536     /// ```
2537     /// use bstr::{B, ByteSlice};
2538     ///
2539     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2540     ///
2541     /// let mut buf = vec![];
2542     /// s.to_uppercase_into(&mut buf);
2543     /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2544     /// ```
2545     #[cfg(all(feature = "std", feature = "unicode"))]
2546     #[inline]
2547     fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2548         // TODO: This is the best we can do given what std exposes I think.
2549         // If we roll our own case handling, then we might be able to do this
2550         // a bit faster. We shouldn't roll our own case handling unless we
2551         // need to, e.g., for doing caseless matching or case folding.
2552         buf.reserve(self.as_bytes().len());
2553         for (s, e, ch) in self.char_indices() {
2554             if ch == '\u{FFFD}' {
2555                 buf.push_str(&self.as_bytes()[s..e]);
2556             } else if ch.is_ascii() {
2557                 buf.push_char(ch.to_ascii_uppercase());
2558             } else {
2559                 for upper in ch.to_uppercase() {
2560                     buf.push_char(upper);
2561                 }
2562             }
2563         }
2564     }
2565 
2566     /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2567     /// this byte string.
2568     ///
2569     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2570     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2571     /// In particular, the length of the byte string returned is always
2572     /// equivalent to the length of this byte string.
2573     ///
2574     /// If you'd like to reuse an allocation for performance reasons, then use
2575     /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2576     /// the conversion in place.
2577     ///
2578     /// # Examples
2579     ///
2580     /// Basic usage:
2581     ///
2582     /// ```
2583     /// use bstr::{B, ByteSlice};
2584     ///
2585     /// let s = B("hello β");
2586     /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2587     /// ```
2588     ///
2589     /// Invalid UTF-8 remains as is:
2590     ///
2591     /// ```
2592     /// use bstr::{B, ByteSlice};
2593     ///
2594     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2595     /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2596     /// ```
2597     #[cfg(feature = "std")]
2598     #[inline]
2599     fn to_ascii_uppercase(&self) -> Vec<u8> {
2600         self.as_bytes().to_ascii_uppercase()
2601     }
2602 
2603     /// Convert this byte string to its uppercase ASCII equivalent in place.
2604     ///
2605     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2606     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2607     ///
2608     /// If you don't need to do the conversion in
2609     /// place and instead prefer convenience, then use
2610     /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2611     ///
2612     /// # Examples
2613     ///
2614     /// Basic usage:
2615     ///
2616     /// ```
2617     /// use bstr::{B, ByteSlice};
2618     ///
2619     /// let mut s = <Vec<u8>>::from("hello β");
2620     /// s.make_ascii_uppercase();
2621     /// assert_eq!(s, B("HELLO β"));
2622     /// ```
2623     ///
2624     /// Invalid UTF-8 remains as is:
2625     ///
2626     /// ```
2627     /// use bstr::{B, ByteSlice, ByteVec};
2628     ///
2629     /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2630     /// s.make_ascii_uppercase();
2631     /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2632     /// ```
2633     #[inline]
2634     fn make_ascii_uppercase(&mut self) {
2635         self.as_bytes_mut().make_ascii_uppercase();
2636     }
2637 
2638     /// Reverse the bytes in this string, in place.
2639     ///
2640     /// This is not necessarily a well formed operation! For example, if this
2641     /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2642     /// string will likely result in invalid UTF-8 and otherwise non-sensical
2643     /// content.
2644     ///
2645     /// Note that this is equivalent to the generic `[u8]::reverse` method.
2646     /// This method is provided to permit callers to explicitly differentiate
2647     /// between reversing bytes, codepoints and graphemes.
2648     ///
2649     /// # Examples
2650     ///
2651     /// Basic usage:
2652     ///
2653     /// ```
2654     /// use bstr::ByteSlice;
2655     ///
2656     /// let mut s = <Vec<u8>>::from("hello");
2657     /// s.reverse_bytes();
2658     /// assert_eq!(s, "olleh".as_bytes());
2659     /// ```
2660     #[inline]
2661     fn reverse_bytes(&mut self) {
2662         self.as_bytes_mut().reverse();
2663     }
2664 
2665     /// Reverse the codepoints in this string, in place.
2666     ///
2667     /// If this byte string is valid UTF-8, then its reversal by codepoint
2668     /// is also guaranteed to be valid UTF-8.
2669     ///
2670     /// This operation is equivalent to the following, but without allocating:
2671     ///
2672     /// ```
2673     /// use bstr::ByteSlice;
2674     ///
2675     /// let mut s = <Vec<u8>>::from("foo☃bar");
2676     ///
2677     /// let mut chars: Vec<char> = s.chars().collect();
2678     /// chars.reverse();
2679     ///
2680     /// let reversed: String = chars.into_iter().collect();
2681     /// assert_eq!(reversed, "rab☃oof");
2682     /// ```
2683     ///
2684     /// Note that this is not necessarily a well formed operation. For example,
2685     /// if this byte string contains grapheme clusters with more than one
2686     /// codepoint, then those grapheme clusters will not necessarily be
2687     /// preserved. If you'd like to preserve grapheme clusters, then use
2688     /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2689     ///
2690     /// # Examples
2691     ///
2692     /// Basic usage:
2693     ///
2694     /// ```
2695     /// use bstr::ByteSlice;
2696     ///
2697     /// let mut s = <Vec<u8>>::from("foo☃bar");
2698     /// s.reverse_chars();
2699     /// assert_eq!(s, "rab☃oof".as_bytes());
2700     /// ```
2701     ///
2702     /// This example shows that not all reversals lead to a well formed string.
2703     /// For example, in this case, combining marks are used to put accents over
2704     /// some letters, and those accent marks must appear after the codepoints
2705     /// they modify.
2706     ///
2707     /// ```
2708     /// use bstr::{B, ByteSlice};
2709     ///
2710     /// let mut s = <Vec<u8>>::from("résumé");
2711     /// s.reverse_chars();
2712     /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2713     /// ```
2714     ///
2715     /// A word of warning: the above example relies on the fact that
2716     /// `résumé` is in decomposed normal form, which means there are separate
2717     /// codepoints for the accents above `e`. If it is instead in composed
2718     /// normal form, then the example works:
2719     ///
2720     /// ```
2721     /// use bstr::{B, ByteSlice};
2722     ///
2723     /// let mut s = <Vec<u8>>::from("résumé");
2724     /// s.reverse_chars();
2725     /// assert_eq!(s, B("émusér"));
2726     /// ```
2727     ///
2728     /// The point here is to be cautious and not assume that just because
2729     /// `reverse_chars` works in one case, that it therefore works in all
2730     /// cases.
2731     #[inline]
2732     fn reverse_chars(&mut self) {
2733         let mut i = 0;
2734         loop {
2735             let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2736             if size == 0 {
2737                 break;
2738             }
2739             if size > 1 {
2740                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2741             }
2742             i += size;
2743         }
2744         self.reverse_bytes();
2745     }
2746 
2747     /// Reverse the graphemes in this string, in place.
2748     ///
2749     /// If this byte string is valid UTF-8, then its reversal by grapheme
2750     /// is also guaranteed to be valid UTF-8.
2751     ///
2752     /// This operation is equivalent to the following, but without allocating:
2753     ///
2754     /// ```
2755     /// use bstr::ByteSlice;
2756     ///
2757     /// let mut s = <Vec<u8>>::from("foo☃bar");
2758     ///
2759     /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2760     /// graphemes.reverse();
2761     ///
2762     /// let reversed = graphemes.concat();
2763     /// assert_eq!(reversed, "rab☃oof");
2764     /// ```
2765     ///
2766     /// # Examples
2767     ///
2768     /// Basic usage:
2769     ///
2770     /// ```
2771     /// use bstr::ByteSlice;
2772     ///
2773     /// let mut s = <Vec<u8>>::from("foo☃bar");
2774     /// s.reverse_graphemes();
2775     /// assert_eq!(s, "rab☃oof".as_bytes());
2776     /// ```
2777     ///
2778     /// This example shows how this correctly handles grapheme clusters,
2779     /// unlike `reverse_chars`.
2780     ///
2781     /// ```
2782     /// use bstr::ByteSlice;
2783     ///
2784     /// let mut s = <Vec<u8>>::from("résumé");
2785     /// s.reverse_graphemes();
2786     /// assert_eq!(s, "émusér".as_bytes());
2787     /// ```
2788     #[cfg(feature = "unicode")]
2789     #[inline]
2790     fn reverse_graphemes(&mut self) {
2791         use crate::unicode::decode_grapheme;
2792 
2793         let mut i = 0;
2794         loop {
2795             let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2796             if size == 0 {
2797                 break;
2798             }
2799             if size > 1 {
2800                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2801             }
2802             i += size;
2803         }
2804         self.reverse_bytes();
2805     }
2806 
2807     /// Returns true if and only if every byte in this byte string is ASCII.
2808     ///
2809     /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2810     /// an ASCII codepoint if and only if it is in the inclusive range
2811     /// `[0, 127]`.
2812     ///
2813     /// # Examples
2814     ///
2815     /// Basic usage:
2816     ///
2817     /// ```
2818     /// use bstr::{B, ByteSlice};
2819     ///
2820     /// assert!(B("abc").is_ascii());
2821     /// assert!(!B("☃βツ").is_ascii());
2822     /// assert!(!B(b"\xFF").is_ascii());
2823     /// ```
2824     #[inline]
2825     fn is_ascii(&self) -> bool {
2826         ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2827     }
2828 
2829     /// Returns true if and only if the entire byte string is valid UTF-8.
2830     ///
2831     /// If you need location information about where a byte string's first
2832     /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2833     ///
2834     /// # Examples
2835     ///
2836     /// Basic usage:
2837     ///
2838     /// ```
2839     /// use bstr::{B, ByteSlice};
2840     ///
2841     /// assert!(B("abc").is_utf8());
2842     /// assert!(B("☃βツ").is_utf8());
2843     /// // invalid bytes
2844     /// assert!(!B(b"abc\xFF").is_utf8());
2845     /// // surrogate encoding
2846     /// assert!(!B(b"\xED\xA0\x80").is_utf8());
2847     /// // incomplete sequence
2848     /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
2849     /// // overlong sequence
2850     /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
2851     /// ```
2852     #[inline]
2853     fn is_utf8(&self) -> bool {
2854         utf8::validate(self.as_bytes()).is_ok()
2855     }
2856 
2857     /// Returns the last byte in this byte string, if it's non-empty. If this
2858     /// byte string is empty, this returns `None`.
2859     ///
2860     /// Note that this is like the generic `[u8]::last`, except this returns
2861     /// the byte by value instead of a reference to the byte.
2862     ///
2863     /// # Examples
2864     ///
2865     /// Basic usage:
2866     ///
2867     /// ```
2868     /// use bstr::ByteSlice;
2869     ///
2870     /// assert_eq!(Some(b'z'), b"baz".last_byte());
2871     /// assert_eq!(None, b"".last_byte());
2872     /// ```
2873     #[inline]
2874     fn last_byte(&self) -> Option<u8> {
2875         let bytes = self.as_bytes();
2876         bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
2877     }
2878 
2879     /// Returns the index of the first non-ASCII byte in this byte string (if
2880     /// any such indices exist). Specifically, it returns the index of the
2881     /// first byte with a value greater than or equal to `0x80`.
2882     ///
2883     /// # Examples
2884     ///
2885     /// Basic usage:
2886     ///
2887     /// ```
2888     /// use bstr::{ByteSlice, B};
2889     ///
2890     /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
2891     /// assert_eq!(None, b"abcde".find_non_ascii_byte());
2892     /// assert_eq!(Some(0), B("��").find_non_ascii_byte());
2893     /// ```
2894     #[inline]
2895     fn find_non_ascii_byte(&self) -> Option<usize> {
2896         let index = ascii::first_non_ascii_byte(self.as_bytes());
2897         if index == self.as_bytes().len() {
2898             None
2899         } else {
2900             Some(index)
2901         }
2902     }
2903 
2904     /// Copies elements from one part of the slice to another part of itself,
2905     /// where the parts may be overlapping.
2906     ///
2907     /// `src` is the range within this byte string to copy from, while `dest`
2908     /// is the starting index of the range within this byte string to copy to.
2909     /// The length indicated by `src` must be less than or equal to the number
2910     /// of bytes from `dest` to the end of the byte string.
2911     ///
2912     /// # Panics
2913     ///
2914     /// Panics if either range is out of bounds, or if `src` is too big to fit
2915     /// into `dest`, or if the end of `src` is before the start.
2916     ///
2917     /// # Examples
2918     ///
2919     /// Copying four bytes within a byte string:
2920     ///
2921     /// ```
2922     /// use bstr::{B, ByteSlice};
2923     ///
2924     /// let mut buf = *b"Hello, World!";
2925     /// let s = &mut buf;
2926     /// s.copy_within_str(1..5, 8);
2927     /// assert_eq!(s, B("Hello, Wello!"));
2928     /// ```
2929     #[inline]
2930     fn copy_within_str<R>(&mut self, src: R, dest: usize)
2931     where
2932         R: ops::RangeBounds<usize>,
2933     {
2934         // TODO: Deprecate this once slice::copy_within stabilizes.
2935         let src_start = match src.start_bound() {
2936             ops::Bound::Included(&n) => n,
2937             ops::Bound::Excluded(&n) => {
2938                 n.checked_add(1).expect("attempted to index slice beyond max")
2939             }
2940             ops::Bound::Unbounded => 0,
2941         };
2942         let src_end = match src.end_bound() {
2943             ops::Bound::Included(&n) => {
2944                 n.checked_add(1).expect("attempted to index slice beyond max")
2945             }
2946             ops::Bound::Excluded(&n) => n,
2947             ops::Bound::Unbounded => self.as_bytes().len(),
2948         };
2949         assert!(src_start <= src_end, "src end is before src start");
2950         assert!(src_end <= self.as_bytes().len(), "src is out of bounds");
2951         let count = src_end - src_start;
2952         assert!(
2953             dest <= self.as_bytes().len() - count,
2954             "dest is out of bounds",
2955         );
2956 
2957         // SAFETY: This is safe because we use ptr::copy to handle overlapping
2958         // copies, and is also safe because we've checked all the bounds above.
2959         // Finally, we are only dealing with u8 data, which is Copy, which
2960         // means we can copy without worrying about ownership/destructors.
2961         unsafe {
2962             ptr::copy(
2963                 self.as_bytes().get_unchecked(src_start),
2964                 self.as_bytes_mut().get_unchecked_mut(dest),
2965                 count,
2966             );
2967         }
2968     }
2969 }
2970 
2971 /// A single substring searcher fixed to a particular needle.
2972 ///
2973 /// The purpose of this type is to permit callers to construct a substring
2974 /// searcher that can be used to search haystacks without the overhead of
2975 /// constructing the searcher in the first place. This is a somewhat niche
2976 /// concern when it's necessary to re-use the same needle to search multiple
2977 /// different haystacks with as little overhead as possible. In general, using
2978 /// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
2979 /// or
2980 /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
2981 /// is good enough, but `Finder` is useful when you can meaningfully observe
2982 /// searcher construction time in a profile.
2983 ///
2984 /// When the `std` feature is enabled, then this type has an `into_owned`
2985 /// version which permits building a `Finder` that is not connected to the
2986 /// lifetime of its needle.
2987 #[derive(Clone, Debug)]
2988 pub struct Finder<'a>(memmem::Finder<'a>);
2989 
2990 impl<'a> Finder<'a> {
2991     /// Create a new finder for the given needle.
2992     #[inline]
2993     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
2994         Finder(memmem::Finder::new(needle.as_ref()))
2995     }
2996 
2997     /// Convert this finder into its owned variant, such that it no longer
2998     /// borrows the needle.
2999     ///
3000     /// If this is already an owned finder, then this is a no-op. Otherwise,
3001     /// this copies the needle.
3002     ///
3003     /// This is only available when the `std` feature is enabled.
3004     #[cfg(feature = "std")]
3005     #[inline]
3006     pub fn into_owned(self) -> Finder<'static> {
3007         Finder(self.0.into_owned())
3008     }
3009 
3010     /// Returns the needle that this finder searches for.
3011     ///
3012     /// Note that the lifetime of the needle returned is tied to the lifetime
3013     /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3014     /// finder's needle can be either borrowed or owned, so the lifetime of the
3015     /// needle returned must necessarily be the shorter of the two.
3016     #[inline]
3017     pub fn needle(&self) -> &[u8] {
3018         self.0.needle()
3019     }
3020 
3021     /// Returns the index of the first occurrence of this needle in the given
3022     /// haystack.
3023     ///
3024     /// The haystack may be any type that can be cheaply converted into a
3025     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3026     ///
3027     /// # Complexity
3028     ///
3029     /// This routine is guaranteed to have worst case linear time complexity
3030     /// with respect to both the needle and the haystack. That is, this runs
3031     /// in `O(needle.len() + haystack.len())` time.
3032     ///
3033     /// This routine is also guaranteed to have worst case constant space
3034     /// complexity.
3035     ///
3036     /// # Examples
3037     ///
3038     /// Basic usage:
3039     ///
3040     /// ```
3041     /// use bstr::Finder;
3042     ///
3043     /// let haystack = "foo bar baz";
3044     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3045     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3046     /// assert_eq!(None, Finder::new("quux").find(haystack));
3047     /// ```
3048     #[inline]
3049     pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3050         self.0.find(haystack.as_ref())
3051     }
3052 }
3053 
3054 /// A single substring reverse searcher fixed to a particular needle.
3055 ///
3056 /// The purpose of this type is to permit callers to construct a substring
3057 /// searcher that can be used to search haystacks without the overhead of
3058 /// constructing the searcher in the first place. This is a somewhat niche
3059 /// concern when it's necessary to re-use the same needle to search multiple
3060 /// different haystacks with as little overhead as possible. In general, using
3061 /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3062 /// or
3063 /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3064 /// is good enough, but `FinderReverse` is useful when you can meaningfully
3065 /// observe searcher construction time in a profile.
3066 ///
3067 /// When the `std` feature is enabled, then this type has an `into_owned`
3068 /// version which permits building a `FinderReverse` that is not connected to
3069 /// the lifetime of its needle.
3070 #[derive(Clone, Debug)]
3071 pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
3072 
3073 impl<'a> FinderReverse<'a> {
3074     /// Create a new reverse finder for the given needle.
3075     #[inline]
3076     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3077         FinderReverse(memmem::FinderRev::new(needle.as_ref()))
3078     }
3079 
3080     /// Convert this finder into its owned variant, such that it no longer
3081     /// borrows the needle.
3082     ///
3083     /// If this is already an owned finder, then this is a no-op. Otherwise,
3084     /// this copies the needle.
3085     ///
3086     /// This is only available when the `std` feature is enabled.
3087     #[cfg(feature = "std")]
3088     #[inline]
3089     pub fn into_owned(self) -> FinderReverse<'static> {
3090         FinderReverse(self.0.into_owned())
3091     }
3092 
3093     /// Returns the needle that this finder searches for.
3094     ///
3095     /// Note that the lifetime of the needle returned is tied to the lifetime
3096     /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3097     /// a finder's needle can be either borrowed or owned, so the lifetime of
3098     /// the needle returned must necessarily be the shorter of the two.
3099     #[inline]
3100     pub fn needle(&self) -> &[u8] {
3101         self.0.needle()
3102     }
3103 
3104     /// Returns the index of the last occurrence of this needle in the given
3105     /// haystack.
3106     ///
3107     /// The haystack may be any type that can be cheaply converted into a
3108     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3109     ///
3110     /// # Complexity
3111     ///
3112     /// This routine is guaranteed to have worst case linear time complexity
3113     /// with respect to both the needle and the haystack. That is, this runs
3114     /// in `O(needle.len() + haystack.len())` time.
3115     ///
3116     /// This routine is also guaranteed to have worst case constant space
3117     /// complexity.
3118     ///
3119     /// # Examples
3120     ///
3121     /// Basic usage:
3122     ///
3123     /// ```
3124     /// use bstr::FinderReverse;
3125     ///
3126     /// let haystack = "foo bar baz";
3127     /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3128     /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3129     /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3130     /// ```
3131     #[inline]
3132     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3133         self.0.rfind(haystack.as_ref())
3134     }
3135 }
3136 
3137 /// An iterator over non-overlapping substring matches.
3138 ///
3139 /// Matches are reported by the byte offset at which they begin.
3140 ///
3141 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3142 /// byte string being looked for.
3143 #[derive(Debug)]
3144 pub struct Find<'a> {
3145     it: memmem::FindIter<'a, 'a>,
3146     haystack: &'a [u8],
3147     needle: &'a [u8],
3148 }
3149 
3150 impl<'a> Find<'a> {
3151     fn new(haystack: &'a [u8], needle: &'a [u8]) -> Find<'a> {
3152         Find { it: memmem::find_iter(haystack, needle), haystack, needle }
3153     }
3154 }
3155 
3156 impl<'a> Iterator for Find<'a> {
3157     type Item = usize;
3158 
3159     #[inline]
3160     fn next(&mut self) -> Option<usize> {
3161         self.it.next()
3162     }
3163 }
3164 
3165 /// An iterator over non-overlapping substring matches in reverse.
3166 ///
3167 /// Matches are reported by the byte offset at which they begin.
3168 ///
3169 /// `'a` is the shorter of two lifetimes: the byte string being searched or the
3170 /// byte string being looked for.
3171 #[derive(Debug)]
3172 pub struct FindReverse<'a> {
3173     it: memmem::FindRevIter<'a, 'a>,
3174     haystack: &'a [u8],
3175     needle: &'a [u8],
3176 }
3177 
3178 impl<'a> FindReverse<'a> {
3179     fn new(haystack: &'a [u8], needle: &'a [u8]) -> FindReverse<'a> {
3180         FindReverse {
3181             it: memmem::rfind_iter(haystack, needle),
3182             haystack,
3183             needle,
3184         }
3185     }
3186 
3187     fn haystack(&self) -> &'a [u8] {
3188         self.haystack
3189     }
3190 
3191     fn needle(&self) -> &[u8] {
3192         self.needle
3193     }
3194 }
3195 
3196 impl<'a> Iterator for FindReverse<'a> {
3197     type Item = usize;
3198 
3199     #[inline]
3200     fn next(&mut self) -> Option<usize> {
3201         self.it.next()
3202     }
3203 }
3204 
3205 /// An iterator over the bytes in a byte string.
3206 ///
3207 /// `'a` is the lifetime of the byte string being traversed.
3208 #[derive(Clone, Debug)]
3209 pub struct Bytes<'a> {
3210     it: slice::Iter<'a, u8>,
3211 }
3212 
3213 impl<'a> Bytes<'a> {
3214     /// Views the remaining underlying data as a subslice of the original data.
3215     /// This has the same lifetime as the original slice,
3216     /// and so the iterator can continue to be used while this exists.
3217     #[inline]
3218     pub fn as_slice(&self) -> &'a [u8] {
3219         self.it.as_slice()
3220     }
3221 }
3222 
3223 impl<'a> Iterator for Bytes<'a> {
3224     type Item = u8;
3225 
3226     #[inline]
3227     fn next(&mut self) -> Option<u8> {
3228         self.it.next().map(|&b| b)
3229     }
3230 
3231     #[inline]
3232     fn size_hint(&self) -> (usize, Option<usize>) {
3233         self.it.size_hint()
3234     }
3235 }
3236 
3237 impl<'a> DoubleEndedIterator for Bytes<'a> {
3238     #[inline]
3239     fn next_back(&mut self) -> Option<u8> {
3240         self.it.next_back().map(|&b| b)
3241     }
3242 }
3243 
3244 impl<'a> ExactSizeIterator for Bytes<'a> {
3245     #[inline]
3246     fn len(&self) -> usize {
3247         self.it.len()
3248     }
3249 }
3250 
3251 impl<'a> iter::FusedIterator for Bytes<'a> {}
3252 
3253 /// An iterator over the fields in a byte string, separated by whitespace.
3254 ///
3255 /// This iterator splits on contiguous runs of whitespace, such that the fields
3256 /// in `foo\t\t\n  \nbar` are `foo` and `bar`.
3257 ///
3258 /// `'a` is the lifetime of the byte string being split.
3259 #[derive(Debug)]
3260 pub struct Fields<'a> {
3261     it: FieldsWith<'a, fn(char) -> bool>,
3262 }
3263 
3264 impl<'a> Fields<'a> {
3265     fn new(bytes: &'a [u8]) -> Fields<'a> {
3266         Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3267     }
3268 }
3269 
3270 impl<'a> Iterator for Fields<'a> {
3271     type Item = &'a [u8];
3272 
3273     #[inline]
3274     fn next(&mut self) -> Option<&'a [u8]> {
3275         self.it.next()
3276     }
3277 }
3278 
3279 /// An iterator over fields in the byte string, separated by a predicate over
3280 /// codepoints.
3281 ///
3282 /// This iterator splits a byte string based on its predicate function such
3283 /// that the elements returned are separated by contiguous runs of codepoints
3284 /// for which the predicate returns true.
3285 ///
3286 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3287 /// of the predicate, i.e., `FnMut(char) -> bool`.
3288 #[derive(Debug)]
3289 pub struct FieldsWith<'a, F> {
3290     f: F,
3291     bytes: &'a [u8],
3292     chars: CharIndices<'a>,
3293 }
3294 
3295 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3296     fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3297         FieldsWith { f, bytes, chars: bytes.char_indices() }
3298     }
3299 }
3300 
3301 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3302     type Item = &'a [u8];
3303 
3304     #[inline]
3305     fn next(&mut self) -> Option<&'a [u8]> {
3306         let (start, mut end);
3307         loop {
3308             match self.chars.next() {
3309                 None => return None,
3310                 Some((s, e, ch)) => {
3311                     if !(self.f)(ch) {
3312                         start = s;
3313                         end = e;
3314                         break;
3315                     }
3316                 }
3317             }
3318         }
3319         while let Some((_, e, ch)) = self.chars.next() {
3320             if (self.f)(ch) {
3321                 break;
3322             }
3323             end = e;
3324         }
3325         Some(&self.bytes[start..end])
3326     }
3327 }
3328 
3329 /// An iterator over substrings in a byte string, split by a separator.
3330 ///
3331 /// `'a` is the lifetime of the byte string being split.
3332 #[derive(Debug)]
3333 pub struct Split<'a> {
3334     finder: Find<'a>,
3335     /// The end position of the previous match of our splitter. The element
3336     /// we yield corresponds to the substring starting at `last` up to the
3337     /// beginning of the next match of the splitter.
3338     last: usize,
3339     /// Only set when iteration is complete. A corner case here is when a
3340     /// splitter is matched at the end of the haystack. At that point, we still
3341     /// need to yield an empty string following it.
3342     done: bool,
3343 }
3344 
3345 impl<'a> Split<'a> {
3346     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> Split<'a> {
3347         let finder = haystack.find_iter(splitter);
3348         Split { finder, last: 0, done: false }
3349     }
3350 }
3351 
3352 impl<'a> Iterator for Split<'a> {
3353     type Item = &'a [u8];
3354 
3355     #[inline]
3356     fn next(&mut self) -> Option<&'a [u8]> {
3357         let haystack = self.finder.haystack;
3358         match self.finder.next() {
3359             Some(start) => {
3360                 let next = &haystack[self.last..start];
3361                 self.last = start + self.finder.needle.len();
3362                 Some(next)
3363             }
3364             None => {
3365                 if self.last >= haystack.len() {
3366                     if !self.done {
3367                         self.done = true;
3368                         Some(b"")
3369                     } else {
3370                         None
3371                     }
3372                 } else {
3373                     let s = &haystack[self.last..];
3374                     self.last = haystack.len();
3375                     self.done = true;
3376                     Some(s)
3377                 }
3378             }
3379         }
3380     }
3381 }
3382 
3383 /// An iterator over substrings in a byte string, split by a separator, in
3384 /// reverse.
3385 ///
3386 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3387 /// of the predicate, i.e., `FnMut(char) -> bool`.
3388 #[derive(Debug)]
3389 pub struct SplitReverse<'a> {
3390     finder: FindReverse<'a>,
3391     /// The end position of the previous match of our splitter. The element
3392     /// we yield corresponds to the substring starting at `last` up to the
3393     /// beginning of the next match of the splitter.
3394     last: usize,
3395     /// Only set when iteration is complete. A corner case here is when a
3396     /// splitter is matched at the end of the haystack. At that point, we still
3397     /// need to yield an empty string following it.
3398     done: bool,
3399 }
3400 
3401 impl<'a> SplitReverse<'a> {
3402     fn new(haystack: &'a [u8], splitter: &'a [u8]) -> SplitReverse<'a> {
3403         let finder = haystack.rfind_iter(splitter);
3404         SplitReverse { finder, last: haystack.len(), done: false }
3405     }
3406 }
3407 
3408 impl<'a> Iterator for SplitReverse<'a> {
3409     type Item = &'a [u8];
3410 
3411     #[inline]
3412     fn next(&mut self) -> Option<&'a [u8]> {
3413         let haystack = self.finder.haystack();
3414         match self.finder.next() {
3415             Some(start) => {
3416                 let nlen = self.finder.needle().len();
3417                 let next = &haystack[start + nlen..self.last];
3418                 self.last = start;
3419                 Some(next)
3420             }
3421             None => {
3422                 if self.last == 0 {
3423                     if !self.done {
3424                         self.done = true;
3425                         Some(b"")
3426                     } else {
3427                         None
3428                     }
3429                 } else {
3430                     let s = &haystack[..self.last];
3431                     self.last = 0;
3432                     self.done = true;
3433                     Some(s)
3434                 }
3435             }
3436         }
3437     }
3438 }
3439 
3440 /// An iterator over at most `n` substrings in a byte string, split by a
3441 /// separator.
3442 ///
3443 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3444 /// of the predicate, i.e., `FnMut(char) -> bool`.
3445 #[derive(Debug)]
3446 pub struct SplitN<'a> {
3447     split: Split<'a>,
3448     limit: usize,
3449     count: usize,
3450 }
3451 
3452 impl<'a> SplitN<'a> {
3453     fn new(
3454         haystack: &'a [u8],
3455         splitter: &'a [u8],
3456         limit: usize,
3457     ) -> SplitN<'a> {
3458         let split = haystack.split_str(splitter);
3459         SplitN { split, limit, count: 0 }
3460     }
3461 }
3462 
3463 impl<'a> Iterator for SplitN<'a> {
3464     type Item = &'a [u8];
3465 
3466     #[inline]
3467     fn next(&mut self) -> Option<&'a [u8]> {
3468         self.count += 1;
3469         if self.count > self.limit || self.split.done {
3470             None
3471         } else if self.count == self.limit {
3472             Some(&self.split.finder.haystack[self.split.last..])
3473         } else {
3474             self.split.next()
3475         }
3476     }
3477 }
3478 
3479 /// An iterator over at most `n` substrings in a byte string, split by a
3480 /// separator, in reverse.
3481 ///
3482 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3483 /// of the predicate, i.e., `FnMut(char) -> bool`.
3484 #[derive(Debug)]
3485 pub struct SplitNReverse<'a> {
3486     split: SplitReverse<'a>,
3487     limit: usize,
3488     count: usize,
3489 }
3490 
3491 impl<'a> SplitNReverse<'a> {
3492     fn new(
3493         haystack: &'a [u8],
3494         splitter: &'a [u8],
3495         limit: usize,
3496     ) -> SplitNReverse<'a> {
3497         let split = haystack.rsplit_str(splitter);
3498         SplitNReverse { split, limit, count: 0 }
3499     }
3500 }
3501 
3502 impl<'a> Iterator for SplitNReverse<'a> {
3503     type Item = &'a [u8];
3504 
3505     #[inline]
3506     fn next(&mut self) -> Option<&'a [u8]> {
3507         self.count += 1;
3508         if self.count > self.limit || self.split.done {
3509             None
3510         } else if self.count == self.limit {
3511             Some(&self.split.finder.haystack()[..self.split.last])
3512         } else {
3513             self.split.next()
3514         }
3515     }
3516 }
3517 
3518 /// An iterator over all lines in a byte string, without their terminators.
3519 ///
3520 /// For this iterator, the only line terminators recognized are `\r\n` and
3521 /// `\n`.
3522 ///
3523 /// `'a` is the lifetime of the byte string being iterated over.
3524 pub struct Lines<'a> {
3525     it: LinesWithTerminator<'a>,
3526 }
3527 
3528 impl<'a> Lines<'a> {
3529     fn new(bytes: &'a [u8]) -> Lines<'a> {
3530         Lines { it: LinesWithTerminator::new(bytes) }
3531     }
3532 }
3533 
3534 impl<'a> Iterator for Lines<'a> {
3535     type Item = &'a [u8];
3536 
3537     #[inline]
3538     fn next(&mut self) -> Option<&'a [u8]> {
3539         let mut line = self.it.next()?;
3540         if line.last_byte() == Some(b'\n') {
3541             line = &line[..line.len() - 1];
3542             if line.last_byte() == Some(b'\r') {
3543                 line = &line[..line.len() - 1];
3544             }
3545         }
3546         Some(line)
3547     }
3548 }
3549 
3550 /// An iterator over all lines in a byte string, including their terminators.
3551 ///
3552 /// For this iterator, the only line terminator recognized is `\n`. (Since
3553 /// line terminators are included, this also handles `\r\n` line endings.)
3554 ///
3555 /// Line terminators are only included if they are present in the original
3556 /// byte string. For example, the last line in a byte string may not end with
3557 /// a line terminator.
3558 ///
3559 /// Concatenating all elements yielded by this iterator is guaranteed to yield
3560 /// the original byte string.
3561 ///
3562 /// `'a` is the lifetime of the byte string being iterated over.
3563 pub struct LinesWithTerminator<'a> {
3564     bytes: &'a [u8],
3565 }
3566 
3567 impl<'a> LinesWithTerminator<'a> {
3568     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3569         LinesWithTerminator { bytes }
3570     }
3571 }
3572 
3573 impl<'a> Iterator for LinesWithTerminator<'a> {
3574     type Item = &'a [u8];
3575 
3576     #[inline]
3577     fn next(&mut self) -> Option<&'a [u8]> {
3578         match self.bytes.find_byte(b'\n') {
3579             None if self.bytes.is_empty() => None,
3580             None => {
3581                 let line = self.bytes;
3582                 self.bytes = b"";
3583                 Some(line)
3584             }
3585             Some(end) => {
3586                 let line = &self.bytes[..end + 1];
3587                 self.bytes = &self.bytes[end + 1..];
3588                 Some(line)
3589             }
3590         }
3591     }
3592 }
3593 
3594 #[cfg(test)]
3595 mod tests {
3596     use crate::ext_slice::{ByteSlice, B};
3597     use crate::tests::LOSSY_TESTS;
3598 
3599     #[test]
3600     fn to_str_lossy() {
3601         for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3602             let got = B(input).to_str_lossy();
3603             assert_eq!(
3604                 expected.as_bytes(),
3605                 got.as_bytes(),
3606                 "to_str_lossy(ith: {:?}, given: {:?})",
3607                 i,
3608                 input,
3609             );
3610 
3611             let mut got = String::new();
3612             B(input).to_str_lossy_into(&mut got);
3613             assert_eq!(
3614                 expected.as_bytes(),
3615                 got.as_bytes(),
3616                 "to_str_lossy_into",
3617             );
3618 
3619             let got = String::from_utf8_lossy(input);
3620             assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3621         }
3622     }
3623 
3624     #[test]
3625     #[should_panic]
3626     fn copy_within_fail1() {
3627         let mut buf = *b"foobar";
3628         let s = &mut buf;
3629         s.copy_within_str(0..2, 5);
3630     }
3631 
3632     #[test]
3633     #[should_panic]
3634     fn copy_within_fail2() {
3635         let mut buf = *b"foobar";
3636         let s = &mut buf;
3637         s.copy_within_str(3..2, 0);
3638     }
3639 
3640     #[test]
3641     #[should_panic]
3642     fn copy_within_fail3() {
3643         let mut buf = *b"foobar";
3644         let s = &mut buf;
3645         s.copy_within_str(5..7, 0);
3646     }
3647 
3648     #[test]
3649     #[should_panic]
3650     fn copy_within_fail4() {
3651         let mut buf = *b"foobar";
3652         let s = &mut buf;
3653         s.copy_within_str(0..1, 6);
3654     }
3655 }
3656