// Copyright 2013-2014 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Abstraction that conditionally compiles either to rust-encoding, //! or to only support UTF-8. #[cfg(feature = "query_encoding")] extern crate encoding; use std::borrow::Cow; #[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter}; #[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; #[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; #[cfg(feature = "query_encoding")] #[derive(Copy, Clone)] pub struct EncodingOverride { /// `None` means UTF-8. encoding: Option } #[cfg(feature = "query_encoding")] impl EncodingOverride { pub fn from_opt_encoding(encoding: Option) -> Self { encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) } pub fn from_encoding(encoding: EncodingRef) -> Self { EncodingOverride { encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } } } #[inline] pub fn utf8() -> Self { EncodingOverride { encoding: None } } pub fn lookup(label: &[u8]) -> Option { // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD // https://encoding.spec.whatwg.org/#names-and-labels ::std::str::from_utf8(label) .ok() .and_then(encoding_from_whatwg_label) .map(Self::from_encoding) } /// https://encoding.spec.whatwg.org/#get-an-output-encoding pub fn to_output_encoding(self) -> Self { if let Some(encoding) = self.encoding { if matches!(encoding.name(), "utf-16le" | "utf-16be") { return Self::utf8() } } self } pub fn is_utf8(&self) -> bool { self.encoding.is_none() } pub fn name(&self) -> &'static str { match self.encoding { Some(encoding) => encoding.name(), None => "utf-8", } } pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { match self.encoding { // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), None => decode_utf8_lossy(input), } } pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { match self.encoding { // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), None => encode_utf8(input) } } } #[cfg(feature = "query_encoding")] impl Debug for EncodingOverride { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "EncodingOverride {{ encoding: ")?; match self.encoding { Some(e) => write!(f, "{} }}", e.name()), None => write!(f, "None }}") } } } #[cfg(not(feature = "query_encoding"))] #[derive(Copy, Clone, Debug)] pub struct EncodingOverride; #[cfg(not(feature = "query_encoding"))] impl EncodingOverride { #[inline] pub fn utf8() -> Self { EncodingOverride } pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { decode_utf8_lossy(input) } pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { encode_utf8(input) } } pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow { match input { Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), Cow::Owned(bytes) => { let raw_utf8: *const [u8]; match String::from_utf8_lossy(&bytes) { Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), Cow::Owned(s) => return s.into(), } // from_utf8_lossy returned a borrow of `bytes` unchanged. debug_assert!(raw_utf8 == &*bytes as *const [u8]); // Reuse the existing `Vec` allocation. unsafe { String::from_utf8_unchecked(bytes) }.into() } } } pub fn encode_utf8(input: Cow) -> Cow<[u8]> { match input { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()) } }