1 // Copyright Mozilla Foundation. See the COPYRIGHT 2 // file at the top-level directory of this distribution. 3 // 4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license 6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your 7 // option. This file may not be copied, modified, or distributed 8 // except according to those terms. 9 10 use super::*; 11 use crate::handles::*; 12 use crate::variant::*; 13 14 cfg_if! { 15 if #[cfg(feature = "simd-accel")] { 16 use simd_funcs::*; 17 use packed_simd::u16x8; 18 19 #[inline(always)] 20 fn shift_upper(unpacked: u16x8) -> u16x8 { 21 let highest_ascii = u16x8::splat(0x7F); 22 unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } 23 } else { 24 } 25 } 26 27 pub struct UserDefinedDecoder; 28 29 impl UserDefinedDecoder { new() -> VariantDecoder30 pub fn new() -> VariantDecoder { 31 VariantDecoder::UserDefined(UserDefinedDecoder) 32 } 33 max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize>34 pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { 35 Some(byte_length) 36 } 37 max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize>38 pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { 39 byte_length.checked_mul(3) 40 } 41 max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize>42 pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { 43 byte_length.checked_mul(3) 44 } 45 46 decoder_function!( 47 {}, 48 {}, 49 {}, 50 { 51 if b < 0x80 { 52 // ASCII run not optimized, because binary data expected 53 destination_handle.write_ascii(b); 54 continue; 55 } 56 destination_handle.write_upper_bmp(u16::from(b) + 0xF700); 57 continue; 58 }, 59 self, 60 src_consumed, 61 dest, 62 source, 63 b, 64 destination_handle, 65 _unread_handle, 66 check_space_bmp, 67 decode_to_utf8_raw, 68 u8, 69 Utf8Destination 70 ); 71 72 #[cfg(not(feature = "simd-accel"))] decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize)73 pub fn decode_to_utf16_raw( 74 &mut self, 75 src: &[u8], 76 dst: &mut [u16], 77 _last: bool, 78 ) -> (DecoderResult, usize, usize) { 79 let (pending, length) = if dst.len() < src.len() { 80 (DecoderResult::OutputFull, dst.len()) 81 } else { 82 (DecoderResult::InputEmpty, src.len()) 83 }; 84 let src_trim = &src[..length]; 85 let dst_trim = &mut dst[..length]; 86 src_trim 87 .iter() 88 .zip(dst_trim.iter_mut()) 89 .for_each(|(from, to)| { 90 *to = { 91 let unit = *from; 92 if unit < 0x80 { 93 u16::from(unit) 94 } else { 95 u16::from(unit) + 0xF700 96 } 97 } 98 }); 99 (pending, length, length) 100 } 101 102 #[cfg(feature = "simd-accel")] decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize)103 pub fn decode_to_utf16_raw( 104 &mut self, 105 src: &[u8], 106 dst: &mut [u16], 107 _last: bool, 108 ) -> (DecoderResult, usize, usize) { 109 let (pending, length) = if dst.len() < src.len() { 110 (DecoderResult::OutputFull, dst.len()) 111 } else { 112 (DecoderResult::InputEmpty, src.len()) 113 }; 114 // Not bothering with alignment 115 let tail_start = length & !0xF; 116 let simd_iterations = length >> 4; 117 let src_ptr = src.as_ptr(); 118 let dst_ptr = dst.as_mut_ptr(); 119 for i in 0..simd_iterations { 120 let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) }; 121 let (first, second) = simd_unpack(input); 122 unsafe { 123 store8_unaligned(dst_ptr.add(i * 16), shift_upper(first)); 124 store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second)); 125 } 126 } 127 let src_tail = &src[tail_start..length]; 128 let dst_tail = &mut dst[tail_start..length]; 129 src_tail 130 .iter() 131 .zip(dst_tail.iter_mut()) 132 .for_each(|(from, to)| { 133 *to = { 134 let unit = *from; 135 if unit < 0x80 { 136 u16::from(unit) 137 } else { 138 u16::from(unit) + 0xF700 139 } 140 } 141 }); 142 (pending, length, length) 143 } 144 } 145 146 pub struct UserDefinedEncoder; 147 148 impl UserDefinedEncoder { new(encoding: &'static Encoding) -> Encoder149 pub fn new(encoding: &'static Encoding) -> Encoder { 150 Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder)) 151 } 152 max_buffer_length_from_utf16_without_replacement( &self, u16_length: usize, ) -> Option<usize>153 pub fn max_buffer_length_from_utf16_without_replacement( 154 &self, 155 u16_length: usize, 156 ) -> Option<usize> { 157 Some(u16_length) 158 } 159 max_buffer_length_from_utf8_without_replacement( &self, byte_length: usize, ) -> Option<usize>160 pub fn max_buffer_length_from_utf8_without_replacement( 161 &self, 162 byte_length: usize, 163 ) -> Option<usize> { 164 Some(byte_length) 165 } 166 167 encoder_functions!( 168 {}, 169 { 170 if c <= '\u{7F}' { 171 // TODO optimize ASCII run 172 destination_handle.write_one(c as u8); 173 continue; 174 } 175 if c < '\u{F780}' || c > '\u{F7FF}' { 176 return ( 177 EncoderResult::Unmappable(c), 178 unread_handle.consumed(), 179 destination_handle.written(), 180 ); 181 } 182 destination_handle.write_one((u32::from(c) - 0xF700) as u8); 183 continue; 184 }, 185 self, 186 src_consumed, 187 source, 188 dest, 189 c, 190 destination_handle, 191 unread_handle, 192 check_space_one 193 ); 194 } 195 196 // Any copyright to the test code below this comment is dedicated to the 197 // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ 198 199 #[cfg(test)] 200 mod tests { 201 use super::super::testing::*; 202 use super::super::*; 203 decode_x_user_defined(bytes: &[u8], expect: &str)204 fn decode_x_user_defined(bytes: &[u8], expect: &str) { 205 decode(X_USER_DEFINED, bytes, expect); 206 } 207 encode_x_user_defined(string: &str, expect: &[u8])208 fn encode_x_user_defined(string: &str, expect: &[u8]) { 209 encode(X_USER_DEFINED, string, expect); 210 } 211 212 #[test] test_x_user_defined_decode()213 fn test_x_user_defined_decode() { 214 // Empty 215 decode_x_user_defined(b"", ""); 216 217 // ASCII 218 decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}"); 219 220 decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}"); 221 decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}"); 222 } 223 224 #[test] test_x_user_defined_encode()225 fn test_x_user_defined_encode() { 226 // Empty 227 encode_x_user_defined("", b""); 228 229 // ASCII 230 encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62"); 231 232 encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF"); 233 encode_x_user_defined("\u{F77F}\u{F800}", b""); 234 } 235 236 #[test] test_x_user_defined_from_two_low_surrogates()237 fn test_x_user_defined_from_two_low_surrogates() { 238 let expectation = b"��"; 239 let mut output = [0u8; 40]; 240 let mut encoder = X_USER_DEFINED.new_encoder(); 241 let (result, read, written, had_errors) = 242 encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); 243 assert_eq!(result, CoderResult::InputEmpty); 244 assert_eq!(read, 2); 245 assert_eq!(written, expectation.len()); 246 assert!(had_errors); 247 assert_eq!(&output[..written], expectation); 248 } 249 } 250