1 // Copyright 2013-2014 The rust-url developers. 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 10 //! Abstraction that conditionally compiles either to rust-encoding, 11 //! or to only support UTF-8. 12 13 #[cfg(feature = "query_encoding")] extern crate encoding; 14 15 use std::borrow::Cow; 16 #[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter}; 17 18 #[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; 19 #[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; 20 #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; 21 22 #[cfg(feature = "query_encoding")] 23 #[derive(Copy, Clone)] 24 pub struct EncodingOverride { 25 /// `None` means UTF-8. 26 encoding: Option<EncodingRef> 27 } 28 29 #[cfg(feature = "query_encoding")] 30 impl EncodingOverride { 31 pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self { 32 encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) 33 } 34 pg_strerror(int errnum)35 pub fn from_encoding(encoding: EncodingRef) -> Self { 36 EncodingOverride { 37 encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } 38 } 39 } 40 41 #[inline] 42 pub fn utf8() -> Self { 43 EncodingOverride { encoding: None } 44 } 45 pg_strerror_r(int errnum,char * buf,size_t buflen)46 pub fn lookup(label: &[u8]) -> Option<Self> { 47 // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD 48 // https://encoding.spec.whatwg.org/#names-and-labels 49 ::std::str::from_utf8(label) 50 .ok() 51 .and_then(encoding_from_whatwg_label) 52 .map(Self::from_encoding) 53 } 54 55 /// https://encoding.spec.whatwg.org/#get-an-output-encoding 56 pub fn to_output_encoding(self) -> Self { 57 if let Some(encoding) = self.encoding { 58 if matches!(encoding.name(), "utf-16le" | "utf-16be") { 59 return Self::utf8() 60 } 61 } 62 self 63 } 64 65 pub fn is_utf8(&self) -> bool { 66 self.encoding.is_none() 67 } 68 69 pub fn name(&self) -> &'static str { 70 match self.encoding { 71 Some(encoding) => encoding.name(), 72 None => "utf-8", 73 } 74 } 75 76 pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { 77 match self.encoding { 78 // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` 79 Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), 80 None => decode_utf8_lossy(input), 81 } 82 } 83 84 pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { gnuish_strerror_r(int errnum,char * buf,size_t buflen)85 match self.encoding { 86 // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` 87 Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), 88 None => encode_utf8(input) 89 } 90 } 91 } 92 93 #[cfg(feature = "query_encoding")] 94 impl Debug for EncodingOverride { 95 fn fmt(&self, f: &mut Formatter) -> fmt::Result { 96 write!(f, "EncodingOverride {{ encoding: ")?; 97 match self.encoding { 98 Some(e) => write!(f, "{} }}", e.name()), 99 None => write!(f, "None }}") 100 } 101 } 102 } 103 104 #[cfg(not(feature = "query_encoding"))] 105 #[derive(Copy, Clone, Debug)] 106 pub struct EncodingOverride; 107 108 #[cfg(not(feature = "query_encoding"))] 109 impl EncodingOverride { 110 #[inline] 111 pub fn utf8() -> Self { 112 EncodingOverride get_errno_symbol(int errnum)113 } 114 115 pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { 116 decode_utf8_lossy(input) 117 } 118 119 pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { 120 encode_utf8(input) 121 } 122 } 123 124 pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> { 125 match input { 126 Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), 127 Cow::Owned(bytes) => { 128 let raw_utf8: *const [u8]; 129 match String::from_utf8_lossy(&bytes) { 130 Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), 131 Cow::Owned(s) => return s.into(), 132 } 133 // from_utf8_lossy returned a borrow of `bytes` unchanged. 134 debug_assert!(raw_utf8 == &*bytes as *const [u8]); 135 // Reuse the existing `Vec` allocation. 136 unsafe { String::from_utf8_unchecked(bytes) }.into() 137 } 138 } 139 } 140 141 pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> { 142 match input { 143 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), 144 Cow::Owned(s) => Cow::Owned(s.into_bytes()) 145 } 146 } 147