1 use std::borrow::Borrow;
2 use std::borrow::Cow;
3 use std::borrow::ToOwned;
4 use std::ffi::OsStr;
5 use std::ffi::OsString;
6 use std::fmt;
7 use std::fmt::Debug;
8 use std::fmt::Display;
9 use std::fmt::Formatter;
10 use std::mem;
11 use std::ops::Deref;
12 use std::ops::Index;
13 use std::ops::Range;
14 use std::ops::RangeFrom;
15 use std::ops::RangeFull;
16 use std::ops::RangeInclusive;
17 use std::ops::RangeTo;
18 use std::ops::RangeToInclusive;
19 use std::str;
20
21 #[cfg(feature = "memchr")]
22 use memchr::memmem::find;
23 #[cfg(feature = "memchr")]
24 use memchr::memmem::rfind;
25
26 use super::imp::raw;
27 use super::iter::Split;
28 use super::pattern::Encoded as EncodedPattern;
29 use super::OsStrBytes;
30 use super::OsStringBytes;
31 use super::Pattern;
32
33 #[cfg(not(feature = "memchr"))]
find(string: &[u8], pat: &[u8]) -> Option<usize>34 fn find(string: &[u8], pat: &[u8]) -> Option<usize> {
35 for i in 0..=string.len().checked_sub(pat.len())? {
36 if string[i..].starts_with(pat) {
37 return Some(i);
38 }
39 }
40 None
41 }
42
43 #[cfg(not(feature = "memchr"))]
rfind(string: &[u8], pat: &[u8]) -> Option<usize>44 fn rfind(string: &[u8], pat: &[u8]) -> Option<usize> {
45 for i in (pat.len()..=string.len()).rev() {
46 if string[..i].ends_with(pat) {
47 return Some(i - pat.len());
48 }
49 }
50 None
51 }
52
53 macro_rules! impl_trim_matches {
54 ( $self:ident , $pat:expr , $strip_method:ident ) => {{
55 let pat = $pat.__encode();
56 let pat = pat.__get();
57 if pat.is_empty() {
58 return $self;
59 }
60
61 let mut string = &$self.0;
62 while let Some(substring) = string.$strip_method(pat) {
63 string = substring;
64 }
65 Self::from_raw_bytes_unchecked(string)
66 }};
67 }
68
69 macro_rules! impl_split_once_raw {
70 ( $self:ident , $pat:expr , $find_fn:expr ) => {{
71 let pat = $pat.__get();
72
73 let index = $find_fn(&$self.0, pat)?;
74 let prefix = &$self.0[..index];
75 let suffix = &$self.0[index + pat.len()..];
76 Some((
77 Self::from_raw_bytes_unchecked(prefix),
78 Self::from_raw_bytes_unchecked(suffix),
79 ))
80 }};
81 }
82
83 /// A container for the byte strings converted by [`OsStrBytes`].
84 ///
85 /// This wrapper is intended to prevent violating the invariants of the
86 /// [unspecified encoding] used by this crate and minimize encoding
87 /// conversions.
88 ///
89 /// Although this type is annotated with `#[repr(transparent)]`, the inner
90 /// representation is not stable. Transmuting between this type and any other
91 /// causes immediate undefined behavior.
92 ///
93 /// # Indices
94 ///
95 /// Methods of this struct that accept indices require that the index lie on a
96 /// UTF-8 boundary. Although it is possible to manipulate platform strings
97 /// based on other indices, this crate currently does not support them for
98 /// slicing methods. They would add significant complication to the
99 /// implementation and are generally not necessary. However, all indices
100 /// returned by this struct can be used for slicing.
101 ///
102 /// On Unix, all indices are permitted, to avoid false positives. However,
103 /// relying on this implementation detail is discouraged. Platform-specific
104 /// indices are error-prone.
105 ///
106 /// # Complexity
107 ///
108 /// All searching methods have worst-case multiplicative time complexity (i.e.,
109 /// `O(self.raw_len() * pat.len())`). Enabling the "memchr" feature allows
110 /// these methods to instead run in linear time in the worst case (documented
111 /// for [`memchr::memmem::find`][memchr complexity]).
112 ///
113 /// [memchr complexity]: memchr::memmem::find#complexity
114 /// [unspecified encoding]: super#encoding
115 #[derive(Eq, Hash, Ord, PartialEq, PartialOrd)]
116 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
117 #[repr(transparent)]
118 pub struct RawOsStr([u8]);
119
120 impl RawOsStr {
from_raw_bytes_unchecked(string: &[u8]) -> &Self121 fn from_raw_bytes_unchecked(string: &[u8]) -> &Self {
122 // SAFETY: This struct has a layout that makes this operation safe.
123 unsafe { mem::transmute(string) }
124 }
125
126 /// Converts a platform-native string into a representation that can be
127 /// more easily manipulated.
128 ///
129 /// This method performs the necessary conversion immediately, so it can be
130 /// expensive to call. It is recommended to continue using the returned
131 /// instance as long as possible (instead of the original [`OsStr`]), to
132 /// avoid repeated conversions.
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// use std::env;
138 /// # use std::io;
139 ///
140 /// use os_str_bytes::RawOsStr;
141 ///
142 /// let os_string = env::current_exe()?.into_os_string();
143 /// println!("{:?}", RawOsStr::new(&os_string));
144 /// #
145 /// # Ok::<_, io::Error>(())
146 /// ```
147 #[inline]
148 #[must_use]
new(string: &OsStr) -> Cow<'_, Self>149 pub fn new(string: &OsStr) -> Cow<'_, Self> {
150 match string.to_raw_bytes() {
151 Cow::Borrowed(string) => {
152 Cow::Borrowed(Self::from_raw_bytes_unchecked(string))
153 }
154 Cow::Owned(string) => Cow::Owned(RawOsString(string)),
155 }
156 }
157
158 /// Wraps a string, without copying or encoding conversion.
159 ///
160 /// This method is much more efficient than [`RawOsStr::new`], since the
161 /// [encoding] used by this crate is compatible with UTF-8.
162 ///
163 /// # Examples
164 ///
165 /// ```
166 /// use os_str_bytes::RawOsStr;
167 ///
168 /// let string = "foobar";
169 /// let raw = RawOsStr::from_str(string);
170 /// assert_eq!(string, raw);
171 /// ```
172 ///
173 /// [encoding]: super#encoding
174 #[allow(clippy::should_implement_trait)]
175 #[inline]
176 #[must_use]
from_str(string: &str) -> &Self177 pub fn from_str(string: &str) -> &Self {
178 Self::from_raw_bytes_unchecked(string.as_bytes())
179 }
180
181 /// Returns the byte string stored by this container.
182 ///
183 /// The result will match what would be returned by
184 /// [`OsStrBytes::to_raw_bytes`] for the same string.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// use std::env;
190 /// # use std::io;
191 ///
192 /// use os_str_bytes::OsStrBytes;
193 /// use os_str_bytes::RawOsStr;
194 ///
195 /// let os_string = env::current_exe()?.into_os_string();
196 /// let raw = RawOsStr::new(&os_string);
197 /// assert_eq!(os_string.to_raw_bytes(), raw.as_raw_bytes());
198 /// #
199 /// # Ok::<_, io::Error>(())
200 /// ```
201 #[inline]
202 #[must_use]
as_raw_bytes(&self) -> &[u8]203 pub fn as_raw_bytes(&self) -> &[u8] {
204 &self.0
205 }
206
207 /// Equivalent to [`str::contains`].
208 ///
209 /// # Panics
210 ///
211 /// Panics if the pattern is a byte outside of the ASCII range.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// use os_str_bytes::RawOsStr;
217 ///
218 /// let raw = RawOsStr::from_str("foobar");
219 /// assert!(raw.contains("oo"));
220 /// assert!(!raw.contains("of"));
221 /// ```
222 #[inline]
223 #[must_use]
contains<P>(&self, pat: P) -> bool where P: Pattern,224 pub fn contains<P>(&self, pat: P) -> bool
225 where
226 P: Pattern,
227 {
228 self.find(pat).is_some()
229 }
230
231 /// Equivalent to [`str::ends_with`].
232 ///
233 /// # Panics
234 ///
235 /// Panics if the pattern is a byte outside of the ASCII range.
236 ///
237 /// # Examples
238 ///
239 /// ```
240 /// use os_str_bytes::RawOsStr;
241 ///
242 /// let raw = RawOsStr::from_str("foobar");
243 /// assert!(raw.ends_with("bar"));
244 /// assert!(!raw.ends_with("foo"));
245 /// ```
246 #[inline]
247 #[must_use]
ends_with<P>(&self, pat: P) -> bool where P: Pattern,248 pub fn ends_with<P>(&self, pat: P) -> bool
249 where
250 P: Pattern,
251 {
252 let pat = pat.__encode();
253 let pat = pat.__get();
254
255 self.0.ends_with(pat)
256 }
257
258 /// Equivalent to [`str::ends_with`] but accepts this type for the pattern.
259 ///
260 /// # Panics
261 ///
262 /// Panics if the pattern is a byte outside of the ASCII range.
263 ///
264 /// # Examples
265 ///
266 /// ```
267 /// use os_str_bytes::RawOsStr;
268 ///
269 /// let raw = RawOsStr::from_str("foobar");
270 /// assert!(raw.ends_with_os(RawOsStr::from_str("bar")));
271 /// assert!(!raw.ends_with_os(RawOsStr::from_str("foo")));
272 /// ```
273 #[inline]
274 #[must_use]
ends_with_os(&self, pat: &Self) -> bool275 pub fn ends_with_os(&self, pat: &Self) -> bool {
276 raw::ends_with(&self.0, &pat.0)
277 }
278
279 /// Equivalent to [`str::find`].
280 ///
281 /// # Panics
282 ///
283 /// Panics if the pattern is a byte outside of the ASCII range.
284 ///
285 /// # Examples
286 ///
287 /// ```
288 /// use os_str_bytes::RawOsStr;
289 ///
290 /// let raw = RawOsStr::from_str("foobar");
291 /// assert_eq!(Some(1), raw.find("o"));
292 /// assert_eq!(None, raw.find("of"));
293 /// ```
294 #[inline]
295 #[must_use]
find<P>(&self, pat: P) -> Option<usize> where P: Pattern,296 pub fn find<P>(&self, pat: P) -> Option<usize>
297 where
298 P: Pattern,
299 {
300 let pat = pat.__encode();
301 let pat = pat.__get();
302
303 find(&self.0, pat)
304 }
305
306 /// Equivalent to [`str::is_empty`].
307 ///
308 /// # Examples
309 ///
310 /// ```
311 /// use os_str_bytes::RawOsStr;
312 ///
313 /// assert!(RawOsStr::from_str("").is_empty());
314 /// assert!(!RawOsStr::from_str("foobar").is_empty());
315 /// ```
316 #[inline]
317 #[must_use]
is_empty(&self) -> bool318 pub fn is_empty(&self) -> bool {
319 self.0.is_empty()
320 }
321
322 /// Returns the length of the byte string stored by this container.
323 ///
324 /// Only the following assumptions can be made about the result:
325 /// - The length of any Unicode character is the length of its UTF-8
326 /// representation (i.e., [`char::len_utf8`]).
327 /// - Splitting a string at a UTF-8 boundary will return two strings with
328 /// lengths that sum to the length of the original string.
329 ///
330 /// This method may return a different result than would [`OsStr::len`]
331 /// when called on same string, since [`OsStr`] uses an unspecified
332 /// encoding.
333 ///
334 /// # Examples
335 ///
336 /// ```
337 /// use os_str_bytes::RawOsStr;
338 ///
339 /// assert_eq!(6, RawOsStr::from_str("foobar").raw_len());
340 /// assert_eq!(0, RawOsStr::from_str("").raw_len());
341 /// ```
342 #[inline]
343 #[must_use]
raw_len(&self) -> usize344 pub fn raw_len(&self) -> usize {
345 self.0.len()
346 }
347
348 /// Equivalent to [`str::rfind`].
349 ///
350 /// # Panics
351 ///
352 /// Panics if the pattern is a byte outside of the ASCII range.
353 ///
354 /// # Examples
355 ///
356 /// ```
357 /// use os_str_bytes::RawOsStr;
358 ///
359 /// let raw = RawOsStr::from_str("foobar");
360 /// assert_eq!(Some(2), raw.rfind("o"));
361 /// assert_eq!(None, raw.rfind("of"));
362 /// ```
363 #[inline]
364 #[must_use]
rfind<P>(&self, pat: P) -> Option<usize> where P: Pattern,365 pub fn rfind<P>(&self, pat: P) -> Option<usize>
366 where
367 P: Pattern,
368 {
369 let pat = pat.__encode();
370 let pat = pat.__get();
371
372 rfind(&self.0, pat)
373 }
374
rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)> where P: EncodedPattern,375 pub(super) fn rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
376 where
377 P: EncodedPattern,
378 {
379 impl_split_once_raw!(self, pat, rfind)
380 }
381
382 /// Equivalent to [`str::rsplit_once`].
383 ///
384 /// # Panics
385 ///
386 /// Panics if the pattern is a byte outside of the ASCII range.
387 ///
388 /// # Examples
389 ///
390 /// ```
391 /// use os_str_bytes::RawOsStr;
392 ///
393 /// let raw = RawOsStr::from_str("foobar");
394 /// assert_eq!(
395 /// Some((RawOsStr::from_str("fo"), RawOsStr::from_str("bar"))),
396 /// raw.rsplit_once("o"),
397 /// );
398 /// assert_eq!(None, raw.rsplit_once("of"));
399 /// ```
400 #[inline]
401 #[must_use]
rsplit_once<P>(&self, pat: P) -> Option<(&Self, &Self)> where P: Pattern,402 pub fn rsplit_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
403 where
404 P: Pattern,
405 {
406 self.rsplit_once_raw(&pat.__encode())
407 }
408
409 // https://github.com/rust-lang/rust/blob/49c68bd53f90e375bfb3cbba8c1c67a9e0adb9c0/src/libcore/str/mod.rs#L2184-L2221
410 #[cold]
411 #[inline(never)]
412 #[track_caller]
index_boundary_error(&self, index: usize) -> !413 fn index_boundary_error(&self, index: usize) -> ! {
414 debug_assert!(raw::is_continuation(self.0[index]));
415
416 let start = self.0[..index]
417 .iter()
418 .rposition(|&x| !raw::is_continuation(x))
419 .expect("invalid raw bytes");
420 let mut end = index + 1;
421 end += self.0[end..]
422 .iter()
423 .position(|&x| !raw::is_continuation(x))
424 .unwrap_or_else(|| self.raw_len() - end);
425 let code_point = raw::decode_code_point(&self.0[start..end]);
426 panic!(
427 "byte index {} is not a valid boundary; it is inside U+{:04X} \
428 (bytes {}..{})",
429 index, code_point, start, end,
430 );
431 }
432
433 #[track_caller]
check_bound(&self, index: usize)434 fn check_bound(&self, index: usize) {
435 if let Some(&byte) = self.0.get(index) {
436 if raw::is_continuation(byte) {
437 self.index_boundary_error(index);
438 }
439 }
440 }
441
442 /// Equivalent to [`str::split`], but empty patterns are not accepted.
443 ///
444 /// # Panics
445 ///
446 /// Panics if the pattern is a byte outside of the ASCII range or empty.
447 ///
448 /// # Examples
449 ///
450 /// ```
451 /// use os_str_bytes::RawOsStr;
452 ///
453 /// let raw = RawOsStr::from_str("foobar");
454 /// assert_eq!(["f", "", "bar"], *raw.split("o").collect::<Vec<_>>());
455 /// ```
456 #[inline]
457 #[must_use]
split<P>(&self, pat: P) -> Split<'_, P> where P: Pattern,458 pub fn split<P>(&self, pat: P) -> Split<'_, P>
459 where
460 P: Pattern,
461 {
462 Split::new(self, pat)
463 }
464
465 /// Equivalent to [`str::split_at`].
466 ///
467 /// # Panics
468 ///
469 /// Panics if the index is not a [valid boundary].
470 ///
471 /// # Examples
472 ///
473 /// ```
474 /// use os_str_bytes::RawOsStr;
475 ///
476 /// let raw = RawOsStr::from_str("foobar");
477 /// assert_eq!(
478 /// ((RawOsStr::from_str("fo"), RawOsStr::from_str("obar"))),
479 /// raw.split_at(2),
480 /// );
481 /// ```
482 ///
483 /// [valid boundary]: #indices
484 #[inline]
485 #[must_use]
split_at(&self, mid: usize) -> (&Self, &Self)486 pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
487 self.check_bound(mid);
488
489 let (prefix, suffix) = self.0.split_at(mid);
490 (
491 Self::from_raw_bytes_unchecked(prefix),
492 Self::from_raw_bytes_unchecked(suffix),
493 )
494 }
495
split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)> where P: EncodedPattern,496 pub(super) fn split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
497 where
498 P: EncodedPattern,
499 {
500 impl_split_once_raw!(self, pat, find)
501 }
502
503 /// Equivalent to [`str::split_once`].
504 ///
505 /// # Panics
506 ///
507 /// Panics if the pattern is a byte outside of the ASCII range.
508 ///
509 /// # Examples
510 ///
511 /// ```
512 /// use os_str_bytes::RawOsStr;
513 ///
514 /// let raw = RawOsStr::from_str("foobar");
515 /// assert_eq!(
516 /// Some((RawOsStr::from_str("f"), RawOsStr::from_str("obar"))),
517 /// raw.split_once("o"),
518 /// );
519 /// assert_eq!(None, raw.split_once("of"));
520 /// ```
521 #[inline]
522 #[must_use]
split_once<P>(&self, pat: P) -> Option<(&Self, &Self)> where P: Pattern,523 pub fn split_once<P>(&self, pat: P) -> Option<(&Self, &Self)>
524 where
525 P: Pattern,
526 {
527 self.split_once_raw(&pat.__encode())
528 }
529
530 /// Equivalent to [`str::starts_with`].
531 ///
532 /// # Panics
533 ///
534 /// Panics if the pattern is a byte outside of the ASCII range.
535 ///
536 /// # Examples
537 ///
538 /// ```
539 /// use os_str_bytes::RawOsStr;
540 ///
541 /// let raw = RawOsStr::from_str("foobar");
542 /// assert!(raw.starts_with("foo"));
543 /// assert!(!raw.starts_with("bar"));
544 /// ```
545 #[inline]
546 #[must_use]
starts_with<P>(&self, pat: P) -> bool where P: Pattern,547 pub fn starts_with<P>(&self, pat: P) -> bool
548 where
549 P: Pattern,
550 {
551 let pat = pat.__encode();
552 let pat = pat.__get();
553
554 self.0.starts_with(pat)
555 }
556
557 /// Equivalent to [`str::starts_with`] but accepts this type for the
558 /// pattern.
559 ///
560 /// # Panics
561 ///
562 /// Panics if the pattern is a byte outside of the ASCII range.
563 ///
564 /// # Examples
565 ///
566 /// ```
567 /// use os_str_bytes::RawOsStr;
568 ///
569 /// let raw = RawOsStr::from_str("foobar");
570 /// assert!(raw.starts_with_os(RawOsStr::from_str("foo")));
571 /// assert!(!raw.starts_with_os(RawOsStr::from_str("bar")));
572 /// ```
573 #[inline]
574 #[must_use]
starts_with_os(&self, pat: &Self) -> bool575 pub fn starts_with_os(&self, pat: &Self) -> bool {
576 raw::starts_with(&self.0, &pat.0)
577 }
578
579 /// Equivalent to [`str::strip_prefix`].
580 ///
581 /// # Panics
582 ///
583 /// Panics if the pattern is a byte outside of the ASCII range.
584 ///
585 /// # Examples
586 ///
587 /// ```
588 /// use os_str_bytes::RawOsStr;
589 ///
590 /// let raw = RawOsStr::from_str("111foo1bar111");
591 /// assert_eq!(
592 /// Some(RawOsStr::from_str("11foo1bar111")),
593 /// raw.strip_prefix("1"),
594 /// );
595 /// assert_eq!(None, raw.strip_prefix("o"));
596 /// ```
597 #[inline]
598 #[must_use]
strip_prefix<P>(&self, pat: P) -> Option<&Self> where P: Pattern,599 pub fn strip_prefix<P>(&self, pat: P) -> Option<&Self>
600 where
601 P: Pattern,
602 {
603 let pat = pat.__encode();
604 let pat = pat.__get();
605
606 self.0.strip_prefix(pat).map(Self::from_raw_bytes_unchecked)
607 }
608
609 /// Equivalent to [`str::strip_suffix`].
610 ///
611 /// # Panics
612 ///
613 /// Panics if the pattern is a byte outside of the ASCII range.
614 ///
615 /// # Examples
616 ///
617 /// ```
618 /// use os_str_bytes::RawOsStr;
619 ///
620 /// let raw = RawOsStr::from_str("111foo1bar111");
621 /// assert_eq!(
622 /// Some(RawOsStr::from_str("111foo1bar11")),
623 /// raw.strip_suffix("1"),
624 /// );
625 /// assert_eq!(None, raw.strip_suffix("o"));
626 /// ```
627 #[inline]
628 #[must_use]
strip_suffix<P>(&self, pat: P) -> Option<&Self> where P: Pattern,629 pub fn strip_suffix<P>(&self, pat: P) -> Option<&Self>
630 where
631 P: Pattern,
632 {
633 let pat = pat.__encode();
634 let pat = pat.__get();
635
636 self.0.strip_suffix(pat).map(Self::from_raw_bytes_unchecked)
637 }
638
639 /// Converts this representation back to a platform-native string.
640 ///
641 /// # Examples
642 ///
643 /// ```
644 /// use std::env;
645 /// # use std::io;
646 ///
647 /// use os_str_bytes::RawOsStr;
648 ///
649 /// let os_string = env::current_exe()?.into_os_string();
650 /// let raw = RawOsStr::new(&os_string);
651 /// assert_eq!(os_string, raw.to_os_str());
652 /// #
653 /// # Ok::<_, io::Error>(())
654 /// ```
655 #[inline]
656 #[must_use]
to_os_str(&self) -> Cow<'_, OsStr>657 pub fn to_os_str(&self) -> Cow<'_, OsStr> {
658 OsStr::from_raw_bytes(&self.0).expect("invalid raw bytes")
659 }
660
661 /// Equivalent to [`OsStr::to_str`].
662 ///
663 /// # Examples
664 ///
665 /// ```
666 /// use os_str_bytes::RawOsStr;
667 ///
668 /// let string = "foobar";
669 /// let raw = RawOsStr::from_str(string);
670 /// assert_eq!(Some(string), raw.to_str());
671 /// ```
672 #[inline]
673 #[must_use]
to_str(&self) -> Option<&str>674 pub fn to_str(&self) -> Option<&str> {
675 str::from_utf8(&self.0).ok()
676 }
677
678 /// Converts this string to the best UTF-8 representation possible.
679 ///
680 /// Invalid sequences will be replaced with
681 /// [`char::REPLACEMENT_CHARACTER`].
682 ///
683 /// This method may return a different result than would
684 /// [`OsStr::to_string_lossy`] when called on same string, since [`OsStr`]
685 /// uses an unspecified encoding.
686 ///
687 /// # Examples
688 ///
689 /// ```
690 /// use std::env;
691 /// # use std::io;
692 ///
693 /// use os_str_bytes::RawOsStr;
694 ///
695 /// let os_string = env::current_exe()?.into_os_string();
696 /// let raw = RawOsStr::new(&os_string);
697 /// println!("{}", raw.to_str_lossy());
698 /// #
699 /// # Ok::<_, io::Error>(())
700 /// ```
701 #[inline]
702 #[must_use]
to_str_lossy(&self) -> Cow<'_, str>703 pub fn to_str_lossy(&self) -> Cow<'_, str> {
704 String::from_utf8_lossy(&self.0)
705 }
706
707 /// Equivalent to [`str::trim_end_matches`].
708 ///
709 /// # Panics
710 ///
711 /// Panics if the pattern is a byte outside of the ASCII range.
712 ///
713 /// # Examples
714 ///
715 /// ```
716 /// use os_str_bytes::RawOsStr;
717 ///
718 /// let raw = RawOsStr::from_str("111foo1bar111");
719 /// assert_eq!("111foo1bar", raw.trim_end_matches("1"));
720 /// assert_eq!("111foo1bar111", raw.trim_end_matches("o"));
721 /// ```
722 #[must_use]
trim_end_matches<P>(&self, pat: P) -> &Self where P: Pattern,723 pub fn trim_end_matches<P>(&self, pat: P) -> &Self
724 where
725 P: Pattern,
726 {
727 impl_trim_matches!(self, pat, strip_suffix)
728 }
729
730 /// Equivalent to [`str::trim_start_matches`].
731 ///
732 /// # Panics
733 ///
734 /// Panics if the pattern is a byte outside of the ASCII range.
735 ///
736 /// # Examples
737 ///
738 /// ```
739 /// use os_str_bytes::RawOsStr;
740 ///
741 /// let raw = RawOsStr::from_str("111foo1bar111");
742 /// assert_eq!("foo1bar111", raw.trim_start_matches("1"));
743 /// assert_eq!("111foo1bar111", raw.trim_start_matches("o"));
744 /// ```
745 #[must_use]
trim_start_matches<P>(&self, pat: P) -> &Self where P: Pattern,746 pub fn trim_start_matches<P>(&self, pat: P) -> &Self
747 where
748 P: Pattern,
749 {
750 impl_trim_matches!(self, pat, strip_prefix)
751 }
752 }
753
754 impl AsRef<Self> for RawOsStr {
755 #[inline]
as_ref(&self) -> &Self756 fn as_ref(&self) -> &Self {
757 self
758 }
759 }
760
761 impl AsRef<RawOsStr> for str {
762 #[inline]
as_ref(&self) -> &RawOsStr763 fn as_ref(&self) -> &RawOsStr {
764 RawOsStr::from_str(self)
765 }
766 }
767
768 impl AsRef<RawOsStr> for String {
769 #[inline]
as_ref(&self) -> &RawOsStr770 fn as_ref(&self) -> &RawOsStr {
771 (**self).as_ref()
772 }
773 }
774
775 impl Default for &RawOsStr {
776 #[inline]
default() -> Self777 fn default() -> Self {
778 RawOsStr::from_str("")
779 }
780 }
781
782 impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> {
783 #[inline]
from(other: &'a RawOsStr) -> Self784 fn from(other: &'a RawOsStr) -> Self {
785 Cow::Borrowed(other)
786 }
787 }
788
789 macro_rules! r#impl {
790 (
791 $index_type:ty
792 $(, $index_var:ident , $first_bound:expr $(, $second_bound:expr)?)?
793 ) => {
794 impl Index<$index_type> for RawOsStr {
795 type Output = Self;
796
797 #[inline]
index(&self, idx: $index_type) -> &Self::Output798 fn index(&self, idx: $index_type) -> &Self::Output {
799 $(
800 let $index_var = &idx;
801 self.check_bound($first_bound);
802 $(self.check_bound($second_bound);)?
803 )?
804
805 Self::from_raw_bytes_unchecked(&self.0[idx])
806 }
807 }
808 };
809 }
810 r#impl!(Range<usize>, x, x.start, x.end);
811 r#impl!(RangeFrom<usize>, x, x.start);
812 r#impl!(RangeFull);
813 // [usize::MAX] will always be a valid inclusive end index.
814 #[rustfmt::skip]
815 r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1));
816 r#impl!(RangeTo<usize>, x, x.end);
817 r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1));
818
819 impl ToOwned for RawOsStr {
820 type Owned = RawOsString;
821
822 #[inline]
to_owned(&self) -> Self::Owned823 fn to_owned(&self) -> Self::Owned {
824 RawOsString(self.0.to_owned())
825 }
826 }
827
828 /// A container for the byte strings converted by [`OsStringBytes`].
829 ///
830 /// For more information, see [`RawOsStr`].
831 ///
832 /// [unspecified encoding]: super#encoding
833 #[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
834 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
835 pub struct RawOsString(Vec<u8>);
836
837 impl RawOsString {
838 /// Converts a platform-native string into a representation that can be
839 /// more easily manipulated.
840 ///
841 /// For more information, see [`RawOsStr::new`].
842 ///
843 /// # Examples
844 ///
845 /// ```
846 /// use std::env;
847 /// # use std::io;
848 ///
849 /// use os_str_bytes::RawOsString;
850 ///
851 /// let os_string = env::current_exe()?.into_os_string();
852 /// println!("{:?}", RawOsString::new(os_string));
853 /// #
854 /// # Ok::<_, io::Error>(())
855 /// ```
856 #[inline]
857 #[must_use]
new(string: OsString) -> Self858 pub fn new(string: OsString) -> Self {
859 Self(string.into_raw_vec())
860 }
861
862 /// Wraps a string, without copying or encoding conversion.
863 ///
864 /// This method is much more efficient than [`RawOsString::new`], since the
865 /// [encoding] used by this crate is compatible with UTF-8.
866 ///
867 /// # Examples
868 ///
869 /// ```
870 /// use os_str_bytes::RawOsString;
871 ///
872 /// let string = "foobar".to_owned();
873 /// let raw = RawOsString::from_string(string.clone());
874 /// assert_eq!(string, raw);
875 /// ```
876 ///
877 /// [encoding]: super#encoding
878 #[inline]
879 #[must_use]
from_string(string: String) -> Self880 pub fn from_string(string: String) -> Self {
881 Self(string.into_bytes())
882 }
883
884 /// Converts this representation back to a platform-native string.
885 ///
886 /// # Examples
887 ///
888 /// ```
889 /// use std::env;
890 /// # use std::io;
891 ///
892 /// use os_str_bytes::RawOsString;
893 ///
894 /// let os_string = env::current_exe()?.into_os_string();
895 /// let raw = RawOsString::new(os_string.clone());
896 /// assert_eq!(os_string, raw.into_os_string());
897 /// #
898 /// # Ok::<_, io::Error>(())
899 /// ```
900 #[inline]
901 #[must_use]
into_os_string(self) -> OsString902 pub fn into_os_string(self) -> OsString {
903 OsString::from_raw_vec(self.0).expect("invalid raw bytes")
904 }
905
906 /// Returns the byte string stored by this container.
907 ///
908 /// The result will match what would be returned by
909 /// [`OsStringBytes::into_raw_vec`] for the same string.
910 ///
911 /// # Examples
912 ///
913 /// ```
914 /// use std::env;
915 /// # use std::io;
916 ///
917 /// use os_str_bytes::OsStringBytes;
918 /// use os_str_bytes::RawOsString;
919 ///
920 /// let os_string = env::current_exe()?.into_os_string();
921 /// let raw = RawOsString::new(os_string.clone());
922 /// assert_eq!(os_string.into_raw_vec(), raw.into_raw_vec());
923 /// #
924 /// # Ok::<_, io::Error>(())
925 /// ```
926 #[inline]
927 #[must_use]
into_raw_vec(self) -> Vec<u8>928 pub fn into_raw_vec(self) -> Vec<u8> {
929 self.0
930 }
931
932 /// Equivalent to [`OsString::into_string`].
933 ///
934 /// # Examples
935 ///
936 /// ```
937 /// use os_str_bytes::RawOsString;
938 ///
939 /// let string = "foobar".to_owned();
940 /// let raw = RawOsString::from_string(string.clone());
941 /// assert_eq!(Ok(string), raw.into_string());
942 /// ```
943 #[inline]
into_string(self) -> Result<String, Self>944 pub fn into_string(self) -> Result<String, Self> {
945 String::from_utf8(self.0).map_err(|x| Self(x.into_bytes()))
946 }
947 }
948
949 impl AsRef<RawOsStr> for RawOsString {
950 #[inline]
as_ref(&self) -> &RawOsStr951 fn as_ref(&self) -> &RawOsStr {
952 self
953 }
954 }
955
956 impl Borrow<RawOsStr> for RawOsString {
957 #[inline]
borrow(&self) -> &RawOsStr958 fn borrow(&self) -> &RawOsStr {
959 self
960 }
961 }
962
963 impl Deref for RawOsString {
964 type Target = RawOsStr;
965
966 #[inline]
deref(&self) -> &Self::Target967 fn deref(&self) -> &Self::Target {
968 RawOsStr::from_raw_bytes_unchecked(&self.0)
969 }
970 }
971
972 impl From<String> for RawOsString {
973 #[inline]
from(other: String) -> Self974 fn from(other: String) -> Self {
975 Self::from_string(other)
976 }
977 }
978
979 impl From<RawOsString> for Cow<'_, RawOsStr> {
980 #[inline]
from(other: RawOsString) -> Self981 fn from(other: RawOsString) -> Self {
982 Cow::Owned(other)
983 }
984 }
985
986 macro_rules! r#impl {
987 ( $index_type:ty ) => {
988 impl Index<$index_type> for RawOsString {
989 type Output = RawOsStr;
990
991 #[inline]
index(&self, idx: $index_type) -> &Self::Output992 fn index(&self, idx: $index_type) -> &Self::Output {
993 &(**self)[idx]
994 }
995 }
996 };
997 }
998 r#impl!(Range<usize>);
999 r#impl!(RangeFrom<usize>);
1000 r#impl!(RangeFull);
1001 r#impl!(RangeInclusive<usize>);
1002 r#impl!(RangeTo<usize>);
1003 r#impl!(RangeToInclusive<usize>);
1004
1005 struct Buffer<'a>(&'a [u8]);
1006
1007 impl Debug for Buffer<'_> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result1008 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1009 f.write_str("\"")?;
1010
1011 let mut string = self.0;
1012 let mut invalid_length = 0;
1013 while !string.is_empty() {
1014 let (invalid, substring) = string.split_at(invalid_length);
1015
1016 let valid = match str::from_utf8(substring) {
1017 Ok(valid) => {
1018 string = &[];
1019 valid
1020 }
1021 Err(error) => {
1022 let (valid, substring) =
1023 substring.split_at(error.valid_up_to());
1024
1025 let invalid_char_length =
1026 error.error_len().unwrap_or_else(|| substring.len());
1027 if valid.is_empty() {
1028 invalid_length += invalid_char_length;
1029 continue;
1030 }
1031 string = substring;
1032 invalid_length = invalid_char_length;
1033
1034 // SAFETY: This slice was validated to be UTF-8.
1035 unsafe { str::from_utf8_unchecked(valid) }
1036 }
1037 };
1038
1039 raw::debug(invalid, f)?;
1040 Display::fmt(&valid.escape_debug(), f)?;
1041 }
1042
1043 f.write_str("\"")
1044 }
1045 }
1046
1047 macro_rules! r#impl {
1048 ( $type:ty ) => {
1049 impl Debug for $type {
1050 #[inline]
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result1051 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1052 f.debug_tuple(stringify!($type))
1053 .field(&Buffer(&self.0))
1054 .finish()
1055 }
1056 }
1057 };
1058 }
1059 r#impl!(RawOsStr);
1060 r#impl!(RawOsString);
1061
1062 macro_rules! r#impl {
1063 ( $type:ty , $other_type:ty ) => {
1064 impl PartialEq<$other_type> for $type {
1065 #[inline]
eq(&self, other: &$other_type) -> bool1066 fn eq(&self, other: &$other_type) -> bool {
1067 let raw: &RawOsStr = self;
1068 let other: &RawOsStr = other.as_ref();
1069 raw == other
1070 }
1071 }
1072
1073 impl PartialEq<$type> for $other_type {
1074 #[inline]
eq(&self, other: &$type) -> bool1075 fn eq(&self, other: &$type) -> bool {
1076 other == self
1077 }
1078 }
1079 };
1080 }
1081 r#impl!(RawOsStr, RawOsString);
1082 r#impl!(&RawOsStr, RawOsString);
1083 r#impl!(RawOsStr, str);
1084 r#impl!(RawOsStr, String);
1085 r#impl!(&RawOsStr, String);
1086 r#impl!(RawOsString, str);
1087 r#impl!(RawOsString, &str);
1088 r#impl!(RawOsString, String);
1089
1090 #[cfg(feature = "print_bytes")]
1091 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "print_bytes")))]
1092 mod print_bytes {
1093 use print_bytes::ByteStr;
1094 use print_bytes::ToBytes;
1095 #[cfg(windows)]
1096 use print_bytes::WideStr;
1097
1098 #[cfg(windows)]
1099 use crate::imp::raw;
1100
1101 use super::RawOsStr;
1102 use super::RawOsString;
1103
1104 impl ToBytes for RawOsStr {
1105 #[inline]
to_bytes(&self) -> ByteStr<'_>1106 fn to_bytes(&self) -> ByteStr<'_> {
1107 self.0.to_bytes()
1108 }
1109
1110 #[cfg(windows)]
1111 #[inline]
to_wide(&self) -> Option<WideStr>1112 fn to_wide(&self) -> Option<WideStr> {
1113 Some(WideStr::new(raw::encode_wide_unchecked(&self.0).collect()))
1114 }
1115 }
1116
1117 impl ToBytes for RawOsString {
1118 #[inline]
to_bytes(&self) -> ByteStr<'_>1119 fn to_bytes(&self) -> ByteStr<'_> {
1120 (**self).to_bytes()
1121 }
1122
1123 #[cfg(windows)]
1124 #[inline]
to_wide(&self) -> Option<WideStr>1125 fn to_wide(&self) -> Option<WideStr> {
1126 (**self).to_wide()
1127 }
1128 }
1129 }
1130
1131 #[cfg(feature = "uniquote")]
1132 #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "uniquote")))]
1133 mod uniquote {
1134 use uniquote::Formatter;
1135 use uniquote::Quote;
1136 use uniquote::Result;
1137
1138 use crate::imp::raw;
1139
1140 use super::RawOsStr;
1141 use super::RawOsString;
1142
1143 impl Quote for RawOsStr {
1144 #[inline]
escape(&self, f: &mut Formatter<'_>) -> Result1145 fn escape(&self, f: &mut Formatter<'_>) -> Result {
1146 raw::uniquote::escape(&self.0, f)
1147 }
1148 }
1149
1150 impl Quote for RawOsString {
1151 #[inline]
escape(&self, f: &mut Formatter<'_>) -> Result1152 fn escape(&self, f: &mut Formatter<'_>) -> Result {
1153 (**self).escape(f)
1154 }
1155 }
1156 }
1157