1 // Copyright 2013-2014 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 
10 //! Abstraction that conditionally compiles either to rust-encoding,
11 //! or to only support UTF-8.
12 
13 #[cfg(feature = "query_encoding")] extern crate encoding;
14 
15 use std::borrow::Cow;
16 #[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter};
17 
18 #[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
19 #[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
20 #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;
21 
22 #[cfg(feature = "query_encoding")]
23 #[derive(Copy, Clone)]
24 pub struct EncodingOverride {
25     /// `None` means UTF-8.
26     encoding: Option<EncodingRef>
27 }
28 
29 #[cfg(feature = "query_encoding")]
30 impl EncodingOverride {
31     pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self {
32         encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
33     }
34 
pg_strerror(int errnum)35     pub fn from_encoding(encoding: EncodingRef) -> Self {
36         EncodingOverride {
37             encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) }
38         }
39     }
40 
41     #[inline]
42     pub fn utf8() -> Self {
43         EncodingOverride { encoding: None }
44     }
45 
pg_strerror_r(int errnum,char * buf,size_t buflen)46     pub fn lookup(label: &[u8]) -> Option<Self> {
47         // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
48         // https://encoding.spec.whatwg.org/#names-and-labels
49         ::std::str::from_utf8(label)
50         .ok()
51         .and_then(encoding_from_whatwg_label)
52         .map(Self::from_encoding)
53     }
54 
55     /// https://encoding.spec.whatwg.org/#get-an-output-encoding
56     pub fn to_output_encoding(self) -> Self {
57         if let Some(encoding) = self.encoding {
58             if matches!(encoding.name(), "utf-16le" | "utf-16be") {
59                 return Self::utf8()
60             }
61         }
62         self
63     }
64 
65     pub fn is_utf8(&self) -> bool {
66         self.encoding.is_none()
67     }
68 
69     pub fn name(&self) -> &'static str {
70         match self.encoding {
71             Some(encoding) => encoding.name(),
72             None => "utf-8",
73         }
74     }
75 
76     pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
77         match self.encoding {
78             // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace`
79             Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(),
80             None => decode_utf8_lossy(input),
81         }
82     }
83 
84     pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
gnuish_strerror_r(int errnum,char * buf,size_t buflen)85         match self.encoding {
86             // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape`
87             Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()),
88             None => encode_utf8(input)
89         }
90     }
91 }
92 
93 #[cfg(feature = "query_encoding")]
94 impl Debug for EncodingOverride {
95     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
96         write!(f, "EncodingOverride {{ encoding: ")?;
97         match self.encoding {
98             Some(e) => write!(f, "{} }}", e.name()),
99             None => write!(f, "None }}")
100         }
101     }
102 }
103 
104 #[cfg(not(feature = "query_encoding"))]
105 #[derive(Copy, Clone, Debug)]
106 pub struct EncodingOverride;
107 
108 #[cfg(not(feature = "query_encoding"))]
109 impl EncodingOverride {
110     #[inline]
111     pub fn utf8() -> Self {
112         EncodingOverride
get_errno_symbol(int errnum)113     }
114 
115     pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
116         decode_utf8_lossy(input)
117     }
118 
119     pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
120         encode_utf8(input)
121     }
122 }
123 
124 pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
125     match input {
126         Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
127         Cow::Owned(bytes) => {
128             let raw_utf8: *const [u8];
129             match String::from_utf8_lossy(&bytes) {
130                 Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
131                 Cow::Owned(s) => return s.into(),
132             }
133             // from_utf8_lossy returned a borrow of `bytes` unchanged.
134             debug_assert!(raw_utf8 == &*bytes as *const [u8]);
135             // Reuse the existing `Vec` allocation.
136             unsafe { String::from_utf8_unchecked(bytes) }.into()
137         }
138     }
139 }
140 
141 pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
142     match input {
143         Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
144         Cow::Owned(s) => Cow::Owned(s.into_bytes())
145     }
146 }
147