1 use crate::{UChar, WideChar};
2 use core::slice;
3 
4 #[cfg(all(feature = "alloc", not(feature = "std")))]
5 use alloc::{
6     borrow::ToOwned,
7     boxed::Box,
8     string::{FromUtf16Error, String},
9     vec::Vec,
10 };
11 #[cfg(feature = "std")]
12 use std::{
13     borrow::ToOwned,
14     boxed::Box,
15     string::{FromUtf16Error, String},
16     vec::Vec,
17 };
18 
19 /// An error returned from `UCString` and `UCStr` to indicate that a terminating nul value
20 /// was missing.
21 ///
22 /// The error optionally returns the ownership of the invalid vector whenever a vector was owned.
23 #[derive(Debug, Clone, PartialEq, Eq)]
24 pub struct MissingNulError<C> {
25     #[cfg(feature = "alloc")]
26     pub(crate) inner: Option<Vec<C>>,
27     #[cfg(not(feature = "alloc"))]
28     _p: core::marker::PhantomData<C>,
29 }
30 
31 impl<C: UChar> MissingNulError<C> {
32     #[cfg(feature = "alloc")]
empty() -> Self33     fn empty() -> Self {
34         Self { inner: None }
35     }
36 
37     #[cfg(not(feature = "alloc"))]
empty() -> Self38     fn empty() -> Self {
39         Self {
40             _p: core::marker::PhantomData,
41         }
42     }
43 
44     /// Consumes this error, returning the underlying vector of `u16` values which generated the
45     /// error in the first place.
46     #[cfg(feature = "alloc")]
into_vec(self) -> Option<Vec<C>>47     pub fn into_vec(self) -> Option<Vec<C>> {
48         self.inner
49     }
50 }
51 
52 impl<C: UChar> core::fmt::Display for MissingNulError<C> {
fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result53     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
54         write!(f, "missing terminating nul value")
55     }
56 }
57 
58 #[cfg(feature = "std")]
59 impl<C: UChar> std::error::Error for MissingNulError<C> {
description(&self) -> &str60     fn description(&self) -> &str {
61         "missing terminating nul value"
62     }
63 }
64 
65 /// C-style wide string reference for `UCString`.
66 ///
67 /// `UCStr` is aware of nul values. Unless unchecked conversions are used, all `UCStr`
68 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values.
69 /// The strings may still contain invalid or ill-formed UTF-16 or UTF-32 data. These strings are
70 /// intended to be used with FFI functions such as Windows API that may require nul-terminated
71 /// strings.
72 ///
73 /// `UCStr` can be converted to and from many other string types, including `UString`,
74 /// `OsString`, and `String`, making proper Unicode FFI safe and easy.
75 ///
76 /// Please prefer using the type aliases `U16CStr` or `U32CStr` or `WideCStr` to using
77 /// this type directly.
78 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
79 pub struct UCStr<C: UChar> {
80     inner: [C],
81 }
82 
83 impl<C: UChar> UCStr<C> {
84     /// Coerces a value into a `UCStr`.
new<S: AsRef<UCStr<C>> + ?Sized>(s: &S) -> &Self85     pub fn new<S: AsRef<UCStr<C>> + ?Sized>(s: &S) -> &Self {
86         s.as_ref()
87     }
88 
89     /// Constructs a `UStr` from a nul-terminated string pointer.
90     ///
91     /// This will scan for nul values beginning with `p`. The first nul value will be used as the
92     /// nul terminator for the string, similar to how libc string functions such as `strlen` work.
93     ///
94     /// # Safety
95     ///
96     /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
97     /// nul terminator, and the function could scan past the underlying buffer.
98     ///
99     /// `p` must be non-null.
100     ///
101     /// # Panics
102     ///
103     /// This function panics if `p` is null.
104     ///
105     /// # Caveat
106     ///
107     /// The lifetime for the returned string is inferred from its usage. To prevent accidental
108     /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
109     /// context, such as by providing a helper function taking the lifetime of a host value for the
110     /// string, or by explicit annotation.
from_ptr_str<'a>(p: *const C) -> &'a Self111     pub unsafe fn from_ptr_str<'a>(p: *const C) -> &'a Self {
112         assert!(!p.is_null());
113         let mut i: isize = 0;
114         while *p.offset(i) != UChar::NUL {
115             i += 1;
116         }
117         let ptr: *const [C] = slice::from_raw_parts(p, i as usize + 1);
118         &*(ptr as *const UCStr<C>)
119     }
120 
121     /// Constructs a `UStr` from a pointer and a length.
122     ///
123     /// The `len` argument is the number of elements, **not** the number of bytes, and does
124     /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that
125     /// `p` is a pointer directly to the nul terminator of the string.
126     ///
127     /// # Safety
128     ///
129     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
130     /// elements.
131     ///
132     /// `p` must be non-null, even for zero `len`.
133     ///
134     /// The interior values of the pointer are not scanned for nul. Any interior nul values will
135     /// result in an invalid `UCStr`.
136     ///
137     /// # Panics
138     ///
139     /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`.
140     /// Only pointers with a nul terminator are valid.
141     ///
142     /// # Caveat
143     ///
144     /// The lifetime for the returned string is inferred from its usage. To prevent accidental
145     /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
146     /// context, such as by providing a helper function taking the lifetime of a host value for the
147     /// string, or by explicit annotation.
from_ptr_with_nul<'a>(p: *const C, len: usize) -> &'a Self148     pub unsafe fn from_ptr_with_nul<'a>(p: *const C, len: usize) -> &'a Self {
149         assert!(*p.add(len) == UChar::NUL);
150         let ptr: *const [C] = slice::from_raw_parts(p, len + 1);
151         &*(ptr as *const UCStr<C>)
152     }
153 
154     /// Constructs a `UCStr` from a slice of values that has a nul terminator.
155     ///
156     /// The slice will be scanned for nul values. When a nul value is found, it is treated as the
157     /// terminator for the string, and the `UCStr` slice will be truncated to that nul.
158     ///
159     /// # Failure
160     ///
161     /// If there are no no nul values in the slice, an error is returned.
from_slice_with_nul(slice: &[C]) -> Result<&Self, MissingNulError<C>>162     pub fn from_slice_with_nul(slice: &[C]) -> Result<&Self, MissingNulError<C>> {
163         match slice.iter().position(|x| *x == UChar::NUL) {
164             None => Err(MissingNulError::empty()),
165             Some(i) => Ok(unsafe { UCStr::from_slice_with_nul_unchecked(&slice[..i + 1]) }),
166         }
167     }
168 
169     /// Constructs a `UCStr` from a slice of values that has a nul terminator. No
170     /// checking for nul values is performed.
171     ///
172     /// # Safety
173     ///
174     /// This function is unsafe because it can lead to invalid `UCStr` values when the slice
175     /// is missing a terminating nul value or there are non-terminating interior nul values
176     /// in the slice.
from_slice_with_nul_unchecked(slice: &[C]) -> &Self177     pub unsafe fn from_slice_with_nul_unchecked(slice: &[C]) -> &Self {
178         let ptr: *const [C] = slice;
179         &*(ptr as *const UCStr<C>)
180     }
181 
182     /// Copies the wide string to an new owned `UString`.
183     #[cfg(feature = "alloc")]
to_ucstring(&self) -> crate::UCString<C>184     pub fn to_ucstring(&self) -> crate::UCString<C> {
185         unsafe { crate::UCString::from_vec_with_nul_unchecked(self.inner.to_owned()) }
186     }
187 
188     /// Copies the wide string to a new owned `UString`.
189     ///
190     /// The `UString` will **not** have a nul terminator.
191     ///
192     /// # Examples
193     ///
194     /// ```rust
195     /// use widestring::U16CString;
196     /// let wcstr = U16CString::from_str("MyString").unwrap();
197     /// // Convert U16CString to a U16String
198     /// let wstr = wcstr.to_ustring();
199     ///
200     /// // U16CString will have a terminating nul
201     /// let wcvec = wcstr.into_vec_with_nul();
202     /// assert_eq!(wcvec[wcvec.len()-1], 0);
203     /// // The resulting U16String will not have the terminating nul
204     /// let wvec = wstr.into_vec();
205     /// assert_ne!(wvec[wvec.len()-1], 0);
206     /// ```
207     ///
208     /// ```rust
209     /// use widestring::U32CString;
210     /// let wcstr = U32CString::from_str("MyString").unwrap();
211     /// // Convert U32CString to a U32String
212     /// let wstr = wcstr.to_ustring();
213     ///
214     /// // U32CString will have a terminating nul
215     /// let wcvec = wcstr.into_vec_with_nul();
216     /// assert_eq!(wcvec[wcvec.len()-1], 0);
217     /// // The resulting U32String will not have the terminating nul
218     /// let wvec = wstr.into_vec();
219     /// assert_ne!(wvec[wvec.len()-1], 0);
220     /// ```
221     #[cfg(feature = "alloc")]
to_ustring(&self) -> crate::UString<C>222     pub fn to_ustring(&self) -> crate::UString<C> {
223         crate::UString::from_vec(self.as_slice())
224     }
225 
226     /// Converts to a slice of the wide string.
227     ///
228     /// The slice will **not** include the nul terminator.
as_slice(&self) -> &[C]229     pub fn as_slice(&self) -> &[C] {
230         &self.inner[..self.len()]
231     }
232 
233     /// Converts to a slice of the wide string, including the nul terminator.
as_slice_with_nul(&self) -> &[C]234     pub fn as_slice_with_nul(&self) -> &[C] {
235         &self.inner
236     }
237 
238     /// Returns a raw pointer to the wide string.
239     ///
240     /// The pointer is valid only as long as the lifetime of this reference.
as_ptr(&self) -> *const C241     pub fn as_ptr(&self) -> *const C {
242         self.inner.as_ptr()
243     }
244 
245     /// Returns the length of the wide string as number of elements (**not** number of bytes)
246     /// **not** including nul terminator.
len(&self) -> usize247     pub fn len(&self) -> usize {
248         self.inner.len() - 1
249     }
250 
251     /// Returns whether this wide string contains no data (i.e. is only the nul terminator).
is_empty(&self) -> bool252     pub fn is_empty(&self) -> bool {
253         self.len() == 0
254     }
255 
256     /// Converts a `Box<UCStr>` into a `UCString` without copying or allocating.
257     ///
258     /// # Examples
259     ///
260     /// ```
261     /// use widestring::U16CString;
262     ///
263     /// let v = vec![102u16, 111u16, 111u16]; // "foo"
264     /// let c_string = U16CString::new(v.clone()).unwrap();
265     /// let boxed = c_string.into_boxed_ucstr();
266     /// assert_eq!(boxed.into_ucstring(), U16CString::new(v).unwrap());
267     /// ```
268     ///
269     /// ```
270     /// use widestring::U32CString;
271     ///
272     /// let v = vec![102u32, 111u32, 111u32]; // "foo"
273     /// let c_string = U32CString::new(v.clone()).unwrap();
274     /// let boxed = c_string.into_boxed_ucstr();
275     /// assert_eq!(boxed.into_ucstring(), U32CString::new(v).unwrap());
276     /// ```
277     #[cfg(feature = "alloc")]
into_ucstring(self: Box<Self>) -> crate::UCString<C>278     pub fn into_ucstring(self: Box<Self>) -> crate::UCString<C> {
279         let raw = Box::into_raw(self) as *mut [C];
280         crate::UCString {
281             inner: unsafe { Box::from_raw(raw) },
282         }
283     }
284 
285     #[cfg(feature = "alloc")]
from_inner(slice: &[C]) -> &UCStr<C>286     pub(crate) fn from_inner(slice: &[C]) -> &UCStr<C> {
287         let ptr: *const [C] = slice;
288         unsafe { &*(ptr as *const UCStr<C>) }
289     }
290 }
291 
292 impl UCStr<u16> {
293     /// Decodes a wide string to an owned `OsString`.
294     ///
295     /// This makes a string copy of the `U16CStr`. Since `U16CStr` makes no guarantees that it is
296     /// valid UTF-16, there is no guarantee that the resulting `OsString` will be valid data. The
297     /// `OsString` will **not** have a nul terminator.
298     ///
299     /// # Examples
300     ///
301     /// ```rust
302     /// use widestring::U16CString;
303     /// use std::ffi::OsString;
304     /// let s = "MyString";
305     /// // Create a wide string from the string
306     /// let wstr = U16CString::from_str(s).unwrap();
307     /// // Create an OsString from the wide string
308     /// let osstr = wstr.to_os_string();
309     ///
310     /// assert_eq!(osstr, OsString::from(s));
311     /// ```
312     #[cfg(feature = "std")]
to_os_string(&self) -> std::ffi::OsString313     pub fn to_os_string(&self) -> std::ffi::OsString {
314         crate::platform::os_from_wide(self.as_slice())
315     }
316 
317     /// Copies the wide string to a `String` if it contains valid UTF-16 data.
318     ///
319     /// # Failures
320     ///
321     /// Returns an error if the string contains any invalid UTF-16 data.
322     ///
323     /// # Examples
324     ///
325     /// ```rust
326     /// use widestring::U16CString;
327     /// let s = "MyString";
328     /// // Create a wide string from the string
329     /// let wstr = U16CString::from_str(s).unwrap();
330     /// // Create a regular string from the wide string
331     /// let s2 = wstr.to_string().unwrap();
332     ///
333     /// assert_eq!(s2, s);
334     /// ```
335     #[cfg(feature = "alloc")]
to_string(&self) -> Result<String, FromUtf16Error>336     pub fn to_string(&self) -> Result<String, FromUtf16Error> {
337         String::from_utf16(self.as_slice())
338     }
339 
340     /// Copies the wide string to a `String`.
341     ///
342     /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
343     ///
344     /// # Examples
345     ///
346     /// ```rust
347     /// use widestring::U16CString;
348     /// let s = "MyString";
349     /// // Create a wide string from the string
350     /// let wstr = U16CString::from_str(s).unwrap();
351     /// // Create a regular string from the wide string
352     /// let s2 = wstr.to_string_lossy();
353     ///
354     /// assert_eq!(s2, s);
355     /// ```
356     #[cfg(feature = "alloc")]
to_string_lossy(&self) -> String357     pub fn to_string_lossy(&self) -> String {
358         String::from_utf16_lossy(self.as_slice())
359     }
360 }
361 
362 impl UCStr<u32> {
363     /// Constructs a `U32Str` from a `char` nul-terminated string pointer.
364     ///
365     /// This will scan for nul values beginning with `p`. The first nul value will be used as the
366     /// nul terminator for the string, similar to how libc string functions such as `strlen` work.
367     ///
368     /// # Safety
369     ///
370     /// This function is unsafe as there is no guarantee that the given pointer is valid or has a
371     /// nul terminator, and the function could scan past the underlying buffer.
372     ///
373     /// `p` must be non-null.
374     ///
375     /// # Panics
376     ///
377     /// This function panics if `p` is null.
378     ///
379     /// # Caveat
380     ///
381     /// The lifetime for the returned string is inferred from its usage. To prevent accidental
382     /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
383     /// context, such as by providing a helper function taking the lifetime of a host value for the
384     /// string, or by explicit annotation.
from_char_ptr_str<'a>(p: *const char) -> &'a Self385     pub unsafe fn from_char_ptr_str<'a>(p: *const char) -> &'a Self {
386         UCStr::from_ptr_str(p as *const u32)
387     }
388 
389     /// Constructs a `U32Str` from a `char` pointer and a length.
390     ///
391     /// The `len` argument is the number of `char` elements, **not** the number of bytes, and does
392     /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that
393     /// `p` is a pointer directly to the nul terminator of the string.
394     ///
395     /// # Safety
396     ///
397     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
398     /// elements.
399     ///
400     /// `p` must be non-null, even for zero `len`.
401     ///
402     /// The interior values of the pointer are not scanned for nul. Any interior nul values will
403     /// result in an invalid `U32CStr`.
404     ///
405     /// # Panics
406     ///
407     /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`.
408     /// Only pointers with a nul terminator are valid.
409     ///
410     /// # Caveat
411     ///
412     /// The lifetime for the returned string is inferred from its usage. To prevent accidental
413     /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the
414     /// context, such as by providing a helper function taking the lifetime of a host value for the
415     /// string, or by explicit annotation.
from_char_ptr_with_nul<'a>(p: *const char, len: usize) -> &'a Self416     pub unsafe fn from_char_ptr_with_nul<'a>(p: *const char, len: usize) -> &'a Self {
417         UCStr::from_ptr_with_nul(p as *const u32, len)
418     }
419 
420     /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator.
421     ///
422     /// The slice will be scanned for nul values. When a nul value is found, it is treated as the
423     /// terminator for the string, and the `U32CStr` slice will be truncated to that nul.
424     ///
425     /// # Failure
426     ///
427     /// If there are no no nul values in `slice`, an error is returned.
from_char_slice_with_nul(slice: &[char]) -> Result<&Self, MissingNulError<u32>>428     pub fn from_char_slice_with_nul(slice: &[char]) -> Result<&Self, MissingNulError<u32>> {
429         let ptr: *const [char] = slice;
430         UCStr::from_slice_with_nul(unsafe { &*(ptr as *const [u32]) })
431     }
432 
433     /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator. No
434     /// checking for nul values is performed.
435     ///
436     /// # Safety
437     ///
438     /// This function is unsafe because it can lead to invalid `U32CStr` values when `slice`
439     /// is missing a terminating nul value or there are non-terminating interior nul values
440     /// in the slice.
from_char_slice_with_nul_unchecked(slice: &[char]) -> &Self441     pub unsafe fn from_char_slice_with_nul_unchecked(slice: &[char]) -> &Self {
442         let ptr: *const [char] = slice;
443         UCStr::from_slice_with_nul_unchecked(&*(ptr as *const [u32]))
444     }
445 
446     /// Decodes a wide string to an owned `OsString`.
447     ///
448     /// This makes a string copy of the `U32CStr`. Since `U32CStr` makes no guarantees that it is
449     /// valid UTF-32, there is no guarantee that the resulting `OsString` will be valid data. The
450     /// `OsString` will **not** have a nul terminator.
451     ///
452     /// # Examples
453     ///
454     /// ```rust
455     /// use widestring::U32CString;
456     /// use std::ffi::OsString;
457     /// let s = "MyString";
458     /// // Create a wide string from the string
459     /// let wstr = U32CString::from_str(s).unwrap();
460     /// // Create an OsString from the wide string
461     /// let osstr = wstr.to_os_string();
462     ///
463     /// assert_eq!(osstr, OsString::from(s));
464     /// ```
465     #[cfg(feature = "std")]
to_os_string(&self) -> std::ffi::OsString466     pub fn to_os_string(&self) -> std::ffi::OsString {
467         self.to_ustring().to_os_string()
468     }
469 
470     /// Copies the wide string to a `String` if it contains valid UTF-32 data.
471     ///
472     /// # Failures
473     ///
474     /// Returns an error if the string contains any invalid UTF-32 data.
475     ///
476     /// # Examples
477     ///
478     /// ```rust
479     /// use widestring::U32CString;
480     /// let s = "MyString";
481     /// // Create a wide string from the string
482     /// let wstr = U32CString::from_str(s).unwrap();
483     /// // Create a regular string from the wide string
484     /// let s2 = wstr.to_string().unwrap();
485     ///
486     /// assert_eq!(s2, s);
487     /// ```
488     #[cfg(feature = "alloc")]
to_string(&self) -> Result<String, crate::FromUtf32Error>489     pub fn to_string(&self) -> Result<String, crate::FromUtf32Error> {
490         self.to_ustring().to_string()
491     }
492 
493     /// Copies the wide string to a `String`.
494     ///
495     /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
496     ///
497     /// # Examples
498     ///
499     /// ```rust
500     /// use widestring::U32CString;
501     /// let s = "MyString";
502     /// // Create a wide string from the string
503     /// let wstr = U32CString::from_str(s).unwrap();
504     /// // Create a regular string from the wide string
505     /// let s2 = wstr.to_string_lossy();
506     ///
507     /// assert_eq!(s2, s);
508     /// ```
509     #[cfg(feature = "alloc")]
to_string_lossy(&self) -> String510     pub fn to_string_lossy(&self) -> String {
511         self.to_ustring().to_string_lossy()
512     }
513 }
514 
515 /// C-style wide string reference for `U16CString`.
516 ///
517 /// `U16CStr` is aware of nul values. Unless unchecked conversions are used, all `U16CStr`
518 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values.
519 /// The strings may still contain invalid or ill-formed UTF-16 data. These strings are intended to
520 /// be used with FFI functions such as Windows API that may require nul-terminated strings.
521 ///
522 /// `U16CStr` can be converted to and from many other string types, including `U16String`,
523 /// `OsString`, and `String`, making proper Unicode FFI safe and easy.
524 pub type U16CStr = UCStr<u16>;
525 
526 /// C-style wide string reference for `U32CString`.
527 ///
528 /// `U32CStr` is aware of nul values. Unless unchecked conversions are used, all `U32CStr`
529 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values.
530 /// The strings may still contain invalid or ill-formed UTF-32 data. These strings are intended to
531 /// be used with FFI functions such as Windows API that may require nul-terminated strings.
532 ///
533 /// `U32CStr` can be converted to and from many other string types, including `U32String`,
534 /// `OsString`, and `String`, making proper Unicode FFI safe and easy.
535 pub type U32CStr = UCStr<u32>;
536 
537 /// Alias for `U16CStr` or `U32CStr` depending on platform. Intended to match typical C `wchar_t` size on platform.
538 pub type WideCStr = UCStr<WideChar>;
539