1 use crate::{UChar, WideChar}; 2 use core::slice; 3 4 #[cfg(all(feature = "alloc", not(feature = "std")))] 5 use alloc::{ 6 borrow::ToOwned, 7 boxed::Box, 8 string::{FromUtf16Error, String}, 9 vec::Vec, 10 }; 11 #[cfg(feature = "std")] 12 use std::{ 13 borrow::ToOwned, 14 boxed::Box, 15 string::{FromUtf16Error, String}, 16 vec::Vec, 17 }; 18 19 /// An error returned from `UCString` and `UCStr` to indicate that a terminating nul value 20 /// was missing. 21 /// 22 /// The error optionally returns the ownership of the invalid vector whenever a vector was owned. 23 #[derive(Debug, Clone, PartialEq, Eq)] 24 pub struct MissingNulError<C> { 25 #[cfg(feature = "alloc")] 26 pub(crate) inner: Option<Vec<C>>, 27 #[cfg(not(feature = "alloc"))] 28 _p: core::marker::PhantomData<C>, 29 } 30 31 impl<C: UChar> MissingNulError<C> { 32 #[cfg(feature = "alloc")] empty() -> Self33 fn empty() -> Self { 34 Self { inner: None } 35 } 36 37 #[cfg(not(feature = "alloc"))] empty() -> Self38 fn empty() -> Self { 39 Self { 40 _p: core::marker::PhantomData, 41 } 42 } 43 44 /// Consumes this error, returning the underlying vector of `u16` values which generated the 45 /// error in the first place. 46 #[cfg(feature = "alloc")] into_vec(self) -> Option<Vec<C>>47 pub fn into_vec(self) -> Option<Vec<C>> { 48 self.inner 49 } 50 } 51 52 impl<C: UChar> core::fmt::Display for MissingNulError<C> { fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result53 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 54 write!(f, "missing terminating nul value") 55 } 56 } 57 58 #[cfg(feature = "std")] 59 impl<C: UChar> std::error::Error for MissingNulError<C> { description(&self) -> &str60 fn description(&self) -> &str { 61 "missing terminating nul value" 62 } 63 } 64 65 /// C-style wide string reference for `UCString`. 66 /// 67 /// `UCStr` is aware of nul values. Unless unchecked conversions are used, all `UCStr` 68 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. 69 /// The strings may still contain invalid or ill-formed UTF-16 or UTF-32 data. These strings are 70 /// intended to be used with FFI functions such as Windows API that may require nul-terminated 71 /// strings. 72 /// 73 /// `UCStr` can be converted to and from many other string types, including `UString`, 74 /// `OsString`, and `String`, making proper Unicode FFI safe and easy. 75 /// 76 /// Please prefer using the type aliases `U16CStr` or `U32CStr` or `WideCStr` to using 77 /// this type directly. 78 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] 79 pub struct UCStr<C: UChar> { 80 inner: [C], 81 } 82 83 impl<C: UChar> UCStr<C> { 84 /// Coerces a value into a `UCStr`. new<S: AsRef<UCStr<C>> + ?Sized>(s: &S) -> &Self85 pub fn new<S: AsRef<UCStr<C>> + ?Sized>(s: &S) -> &Self { 86 s.as_ref() 87 } 88 89 /// Constructs a `UStr` from a nul-terminated string pointer. 90 /// 91 /// This will scan for nul values beginning with `p`. The first nul value will be used as the 92 /// nul terminator for the string, similar to how libc string functions such as `strlen` work. 93 /// 94 /// # Safety 95 /// 96 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a 97 /// nul terminator, and the function could scan past the underlying buffer. 98 /// 99 /// `p` must be non-null. 100 /// 101 /// # Panics 102 /// 103 /// This function panics if `p` is null. 104 /// 105 /// # Caveat 106 /// 107 /// The lifetime for the returned string is inferred from its usage. To prevent accidental 108 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the 109 /// context, such as by providing a helper function taking the lifetime of a host value for the 110 /// string, or by explicit annotation. from_ptr_str<'a>(p: *const C) -> &'a Self111 pub unsafe fn from_ptr_str<'a>(p: *const C) -> &'a Self { 112 assert!(!p.is_null()); 113 let mut i: isize = 0; 114 while *p.offset(i) != UChar::NUL { 115 i += 1; 116 } 117 let ptr: *const [C] = slice::from_raw_parts(p, i as usize + 1); 118 &*(ptr as *const UCStr<C>) 119 } 120 121 /// Constructs a `UStr` from a pointer and a length. 122 /// 123 /// The `len` argument is the number of elements, **not** the number of bytes, and does 124 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that 125 /// `p` is a pointer directly to the nul terminator of the string. 126 /// 127 /// # Safety 128 /// 129 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` 130 /// elements. 131 /// 132 /// `p` must be non-null, even for zero `len`. 133 /// 134 /// The interior values of the pointer are not scanned for nul. Any interior nul values will 135 /// result in an invalid `UCStr`. 136 /// 137 /// # Panics 138 /// 139 /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`. 140 /// Only pointers with a nul terminator are valid. 141 /// 142 /// # Caveat 143 /// 144 /// The lifetime for the returned string is inferred from its usage. To prevent accidental 145 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the 146 /// context, such as by providing a helper function taking the lifetime of a host value for the 147 /// string, or by explicit annotation. from_ptr_with_nul<'a>(p: *const C, len: usize) -> &'a Self148 pub unsafe fn from_ptr_with_nul<'a>(p: *const C, len: usize) -> &'a Self { 149 assert!(*p.add(len) == UChar::NUL); 150 let ptr: *const [C] = slice::from_raw_parts(p, len + 1); 151 &*(ptr as *const UCStr<C>) 152 } 153 154 /// Constructs a `UCStr` from a slice of values that has a nul terminator. 155 /// 156 /// The slice will be scanned for nul values. When a nul value is found, it is treated as the 157 /// terminator for the string, and the `UCStr` slice will be truncated to that nul. 158 /// 159 /// # Failure 160 /// 161 /// If there are no no nul values in the slice, an error is returned. from_slice_with_nul(slice: &[C]) -> Result<&Self, MissingNulError<C>>162 pub fn from_slice_with_nul(slice: &[C]) -> Result<&Self, MissingNulError<C>> { 163 match slice.iter().position(|x| *x == UChar::NUL) { 164 None => Err(MissingNulError::empty()), 165 Some(i) => Ok(unsafe { UCStr::from_slice_with_nul_unchecked(&slice[..i + 1]) }), 166 } 167 } 168 169 /// Constructs a `UCStr` from a slice of values that has a nul terminator. No 170 /// checking for nul values is performed. 171 /// 172 /// # Safety 173 /// 174 /// This function is unsafe because it can lead to invalid `UCStr` values when the slice 175 /// is missing a terminating nul value or there are non-terminating interior nul values 176 /// in the slice. from_slice_with_nul_unchecked(slice: &[C]) -> &Self177 pub unsafe fn from_slice_with_nul_unchecked(slice: &[C]) -> &Self { 178 let ptr: *const [C] = slice; 179 &*(ptr as *const UCStr<C>) 180 } 181 182 /// Copies the wide string to an new owned `UString`. 183 #[cfg(feature = "alloc")] to_ucstring(&self) -> crate::UCString<C>184 pub fn to_ucstring(&self) -> crate::UCString<C> { 185 unsafe { crate::UCString::from_vec_with_nul_unchecked(self.inner.to_owned()) } 186 } 187 188 /// Copies the wide string to a new owned `UString`. 189 /// 190 /// The `UString` will **not** have a nul terminator. 191 /// 192 /// # Examples 193 /// 194 /// ```rust 195 /// use widestring::U16CString; 196 /// let wcstr = U16CString::from_str("MyString").unwrap(); 197 /// // Convert U16CString to a U16String 198 /// let wstr = wcstr.to_ustring(); 199 /// 200 /// // U16CString will have a terminating nul 201 /// let wcvec = wcstr.into_vec_with_nul(); 202 /// assert_eq!(wcvec[wcvec.len()-1], 0); 203 /// // The resulting U16String will not have the terminating nul 204 /// let wvec = wstr.into_vec(); 205 /// assert_ne!(wvec[wvec.len()-1], 0); 206 /// ``` 207 /// 208 /// ```rust 209 /// use widestring::U32CString; 210 /// let wcstr = U32CString::from_str("MyString").unwrap(); 211 /// // Convert U32CString to a U32String 212 /// let wstr = wcstr.to_ustring(); 213 /// 214 /// // U32CString will have a terminating nul 215 /// let wcvec = wcstr.into_vec_with_nul(); 216 /// assert_eq!(wcvec[wcvec.len()-1], 0); 217 /// // The resulting U32String will not have the terminating nul 218 /// let wvec = wstr.into_vec(); 219 /// assert_ne!(wvec[wvec.len()-1], 0); 220 /// ``` 221 #[cfg(feature = "alloc")] to_ustring(&self) -> crate::UString<C>222 pub fn to_ustring(&self) -> crate::UString<C> { 223 crate::UString::from_vec(self.as_slice()) 224 } 225 226 /// Converts to a slice of the wide string. 227 /// 228 /// The slice will **not** include the nul terminator. as_slice(&self) -> &[C]229 pub fn as_slice(&self) -> &[C] { 230 &self.inner[..self.len()] 231 } 232 233 /// Converts to a slice of the wide string, including the nul terminator. as_slice_with_nul(&self) -> &[C]234 pub fn as_slice_with_nul(&self) -> &[C] { 235 &self.inner 236 } 237 238 /// Returns a raw pointer to the wide string. 239 /// 240 /// The pointer is valid only as long as the lifetime of this reference. as_ptr(&self) -> *const C241 pub fn as_ptr(&self) -> *const C { 242 self.inner.as_ptr() 243 } 244 245 /// Returns the length of the wide string as number of elements (**not** number of bytes) 246 /// **not** including nul terminator. len(&self) -> usize247 pub fn len(&self) -> usize { 248 self.inner.len() - 1 249 } 250 251 /// Returns whether this wide string contains no data (i.e. is only the nul terminator). is_empty(&self) -> bool252 pub fn is_empty(&self) -> bool { 253 self.len() == 0 254 } 255 256 /// Converts a `Box<UCStr>` into a `UCString` without copying or allocating. 257 /// 258 /// # Examples 259 /// 260 /// ``` 261 /// use widestring::U16CString; 262 /// 263 /// let v = vec![102u16, 111u16, 111u16]; // "foo" 264 /// let c_string = U16CString::new(v.clone()).unwrap(); 265 /// let boxed = c_string.into_boxed_ucstr(); 266 /// assert_eq!(boxed.into_ucstring(), U16CString::new(v).unwrap()); 267 /// ``` 268 /// 269 /// ``` 270 /// use widestring::U32CString; 271 /// 272 /// let v = vec![102u32, 111u32, 111u32]; // "foo" 273 /// let c_string = U32CString::new(v.clone()).unwrap(); 274 /// let boxed = c_string.into_boxed_ucstr(); 275 /// assert_eq!(boxed.into_ucstring(), U32CString::new(v).unwrap()); 276 /// ``` 277 #[cfg(feature = "alloc")] into_ucstring(self: Box<Self>) -> crate::UCString<C>278 pub fn into_ucstring(self: Box<Self>) -> crate::UCString<C> { 279 let raw = Box::into_raw(self) as *mut [C]; 280 crate::UCString { 281 inner: unsafe { Box::from_raw(raw) }, 282 } 283 } 284 285 #[cfg(feature = "alloc")] from_inner(slice: &[C]) -> &UCStr<C>286 pub(crate) fn from_inner(slice: &[C]) -> &UCStr<C> { 287 let ptr: *const [C] = slice; 288 unsafe { &*(ptr as *const UCStr<C>) } 289 } 290 } 291 292 impl UCStr<u16> { 293 /// Decodes a wide string to an owned `OsString`. 294 /// 295 /// This makes a string copy of the `U16CStr`. Since `U16CStr` makes no guarantees that it is 296 /// valid UTF-16, there is no guarantee that the resulting `OsString` will be valid data. The 297 /// `OsString` will **not** have a nul terminator. 298 /// 299 /// # Examples 300 /// 301 /// ```rust 302 /// use widestring::U16CString; 303 /// use std::ffi::OsString; 304 /// let s = "MyString"; 305 /// // Create a wide string from the string 306 /// let wstr = U16CString::from_str(s).unwrap(); 307 /// // Create an OsString from the wide string 308 /// let osstr = wstr.to_os_string(); 309 /// 310 /// assert_eq!(osstr, OsString::from(s)); 311 /// ``` 312 #[cfg(feature = "std")] to_os_string(&self) -> std::ffi::OsString313 pub fn to_os_string(&self) -> std::ffi::OsString { 314 crate::platform::os_from_wide(self.as_slice()) 315 } 316 317 /// Copies the wide string to a `String` if it contains valid UTF-16 data. 318 /// 319 /// # Failures 320 /// 321 /// Returns an error if the string contains any invalid UTF-16 data. 322 /// 323 /// # Examples 324 /// 325 /// ```rust 326 /// use widestring::U16CString; 327 /// let s = "MyString"; 328 /// // Create a wide string from the string 329 /// let wstr = U16CString::from_str(s).unwrap(); 330 /// // Create a regular string from the wide string 331 /// let s2 = wstr.to_string().unwrap(); 332 /// 333 /// assert_eq!(s2, s); 334 /// ``` 335 #[cfg(feature = "alloc")] to_string(&self) -> Result<String, FromUtf16Error>336 pub fn to_string(&self) -> Result<String, FromUtf16Error> { 337 String::from_utf16(self.as_slice()) 338 } 339 340 /// Copies the wide string to a `String`. 341 /// 342 /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. 343 /// 344 /// # Examples 345 /// 346 /// ```rust 347 /// use widestring::U16CString; 348 /// let s = "MyString"; 349 /// // Create a wide string from the string 350 /// let wstr = U16CString::from_str(s).unwrap(); 351 /// // Create a regular string from the wide string 352 /// let s2 = wstr.to_string_lossy(); 353 /// 354 /// assert_eq!(s2, s); 355 /// ``` 356 #[cfg(feature = "alloc")] to_string_lossy(&self) -> String357 pub fn to_string_lossy(&self) -> String { 358 String::from_utf16_lossy(self.as_slice()) 359 } 360 } 361 362 impl UCStr<u32> { 363 /// Constructs a `U32Str` from a `char` nul-terminated string pointer. 364 /// 365 /// This will scan for nul values beginning with `p`. The first nul value will be used as the 366 /// nul terminator for the string, similar to how libc string functions such as `strlen` work. 367 /// 368 /// # Safety 369 /// 370 /// This function is unsafe as there is no guarantee that the given pointer is valid or has a 371 /// nul terminator, and the function could scan past the underlying buffer. 372 /// 373 /// `p` must be non-null. 374 /// 375 /// # Panics 376 /// 377 /// This function panics if `p` is null. 378 /// 379 /// # Caveat 380 /// 381 /// The lifetime for the returned string is inferred from its usage. To prevent accidental 382 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the 383 /// context, such as by providing a helper function taking the lifetime of a host value for the 384 /// string, or by explicit annotation. from_char_ptr_str<'a>(p: *const char) -> &'a Self385 pub unsafe fn from_char_ptr_str<'a>(p: *const char) -> &'a Self { 386 UCStr::from_ptr_str(p as *const u32) 387 } 388 389 /// Constructs a `U32Str` from a `char` pointer and a length. 390 /// 391 /// The `len` argument is the number of `char` elements, **not** the number of bytes, and does 392 /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that 393 /// `p` is a pointer directly to the nul terminator of the string. 394 /// 395 /// # Safety 396 /// 397 /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` 398 /// elements. 399 /// 400 /// `p` must be non-null, even for zero `len`. 401 /// 402 /// The interior values of the pointer are not scanned for nul. Any interior nul values will 403 /// result in an invalid `U32CStr`. 404 /// 405 /// # Panics 406 /// 407 /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`. 408 /// Only pointers with a nul terminator are valid. 409 /// 410 /// # Caveat 411 /// 412 /// The lifetime for the returned string is inferred from its usage. To prevent accidental 413 /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the 414 /// context, such as by providing a helper function taking the lifetime of a host value for the 415 /// string, or by explicit annotation. from_char_ptr_with_nul<'a>(p: *const char, len: usize) -> &'a Self416 pub unsafe fn from_char_ptr_with_nul<'a>(p: *const char, len: usize) -> &'a Self { 417 UCStr::from_ptr_with_nul(p as *const u32, len) 418 } 419 420 /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator. 421 /// 422 /// The slice will be scanned for nul values. When a nul value is found, it is treated as the 423 /// terminator for the string, and the `U32CStr` slice will be truncated to that nul. 424 /// 425 /// # Failure 426 /// 427 /// If there are no no nul values in `slice`, an error is returned. from_char_slice_with_nul(slice: &[char]) -> Result<&Self, MissingNulError<u32>>428 pub fn from_char_slice_with_nul(slice: &[char]) -> Result<&Self, MissingNulError<u32>> { 429 let ptr: *const [char] = slice; 430 UCStr::from_slice_with_nul(unsafe { &*(ptr as *const [u32]) }) 431 } 432 433 /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator. No 434 /// checking for nul values is performed. 435 /// 436 /// # Safety 437 /// 438 /// This function is unsafe because it can lead to invalid `U32CStr` values when `slice` 439 /// is missing a terminating nul value or there are non-terminating interior nul values 440 /// in the slice. from_char_slice_with_nul_unchecked(slice: &[char]) -> &Self441 pub unsafe fn from_char_slice_with_nul_unchecked(slice: &[char]) -> &Self { 442 let ptr: *const [char] = slice; 443 UCStr::from_slice_with_nul_unchecked(&*(ptr as *const [u32])) 444 } 445 446 /// Decodes a wide string to an owned `OsString`. 447 /// 448 /// This makes a string copy of the `U32CStr`. Since `U32CStr` makes no guarantees that it is 449 /// valid UTF-32, there is no guarantee that the resulting `OsString` will be valid data. The 450 /// `OsString` will **not** have a nul terminator. 451 /// 452 /// # Examples 453 /// 454 /// ```rust 455 /// use widestring::U32CString; 456 /// use std::ffi::OsString; 457 /// let s = "MyString"; 458 /// // Create a wide string from the string 459 /// let wstr = U32CString::from_str(s).unwrap(); 460 /// // Create an OsString from the wide string 461 /// let osstr = wstr.to_os_string(); 462 /// 463 /// assert_eq!(osstr, OsString::from(s)); 464 /// ``` 465 #[cfg(feature = "std")] to_os_string(&self) -> std::ffi::OsString466 pub fn to_os_string(&self) -> std::ffi::OsString { 467 self.to_ustring().to_os_string() 468 } 469 470 /// Copies the wide string to a `String` if it contains valid UTF-32 data. 471 /// 472 /// # Failures 473 /// 474 /// Returns an error if the string contains any invalid UTF-32 data. 475 /// 476 /// # Examples 477 /// 478 /// ```rust 479 /// use widestring::U32CString; 480 /// let s = "MyString"; 481 /// // Create a wide string from the string 482 /// let wstr = U32CString::from_str(s).unwrap(); 483 /// // Create a regular string from the wide string 484 /// let s2 = wstr.to_string().unwrap(); 485 /// 486 /// assert_eq!(s2, s); 487 /// ``` 488 #[cfg(feature = "alloc")] to_string(&self) -> Result<String, crate::FromUtf32Error>489 pub fn to_string(&self) -> Result<String, crate::FromUtf32Error> { 490 self.to_ustring().to_string() 491 } 492 493 /// Copies the wide string to a `String`. 494 /// 495 /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. 496 /// 497 /// # Examples 498 /// 499 /// ```rust 500 /// use widestring::U32CString; 501 /// let s = "MyString"; 502 /// // Create a wide string from the string 503 /// let wstr = U32CString::from_str(s).unwrap(); 504 /// // Create a regular string from the wide string 505 /// let s2 = wstr.to_string_lossy(); 506 /// 507 /// assert_eq!(s2, s); 508 /// ``` 509 #[cfg(feature = "alloc")] to_string_lossy(&self) -> String510 pub fn to_string_lossy(&self) -> String { 511 self.to_ustring().to_string_lossy() 512 } 513 } 514 515 /// C-style wide string reference for `U16CString`. 516 /// 517 /// `U16CStr` is aware of nul values. Unless unchecked conversions are used, all `U16CStr` 518 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. 519 /// The strings may still contain invalid or ill-formed UTF-16 data. These strings are intended to 520 /// be used with FFI functions such as Windows API that may require nul-terminated strings. 521 /// 522 /// `U16CStr` can be converted to and from many other string types, including `U16String`, 523 /// `OsString`, and `String`, making proper Unicode FFI safe and easy. 524 pub type U16CStr = UCStr<u16>; 525 526 /// C-style wide string reference for `U32CString`. 527 /// 528 /// `U32CStr` is aware of nul values. Unless unchecked conversions are used, all `U32CStr` 529 /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. 530 /// The strings may still contain invalid or ill-formed UTF-32 data. These strings are intended to 531 /// be used with FFI functions such as Windows API that may require nul-terminated strings. 532 /// 533 /// `U32CStr` can be converted to and from many other string types, including `U32String`, 534 /// `OsString`, and `String`, making proper Unicode FFI safe and easy. 535 pub type U32CStr = UCStr<u32>; 536 537 /// Alias for `U16CStr` or `U32CStr` depending on platform. Intended to match typical C `wchar_t` size on platform. 538 pub type WideCStr = UCStr<WideChar>; 539