1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 use super::*;
11 use handles::*;
12 use variant::*;
13 
14 cfg_if! {
15     if #[cfg(feature = "simd-accel")] {
16         use simd_funcs::*;
17         use packed_simd::u16x8;
18 
19         #[inline(always)]
20         fn shift_upper(unpacked: u16x8) -> u16x8 {
21             let highest_ascii = u16x8::splat(0x7F);
22             unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0))        }
23     } else {
24     }
25 }
26 
27 pub struct UserDefinedDecoder;
28 
29 impl UserDefinedDecoder {
new() -> VariantDecoder30     pub fn new() -> VariantDecoder {
31         VariantDecoder::UserDefined(UserDefinedDecoder)
32     }
33 
max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize>34     pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
35         Some(byte_length)
36     }
37 
max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize>38     pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
39         byte_length.checked_mul(3)
40     }
41 
max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize>42     pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
43         byte_length.checked_mul(3)
44     }
45 
46     decoder_function!(
47         {},
48         {},
49         {},
50         {
51             if b < 0x80 {
52                 // ASCII run not optimized, because binary data expected
53                 destination_handle.write_ascii(b);
54                 continue;
55             }
56             destination_handle.write_upper_bmp(u16::from(b) + 0xF700);
57             continue;
58         },
59         self,
60         src_consumed,
61         dest,
62         source,
63         b,
64         destination_handle,
65         _unread_handle,
66         check_space_bmp,
67         decode_to_utf8_raw,
68         u8,
69         Utf8Destination
70     );
71 
72     #[cfg(not(feature = "simd-accel"))]
decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize)73     pub fn decode_to_utf16_raw(
74         &mut self,
75         src: &[u8],
76         dst: &mut [u16],
77         _last: bool,
78     ) -> (DecoderResult, usize, usize) {
79         let (pending, length) = if dst.len() < src.len() {
80             (DecoderResult::OutputFull, dst.len())
81         } else {
82             (DecoderResult::InputEmpty, src.len())
83         };
84         let src_trim = &src[..length];
85         let dst_trim = &mut dst[..length];
86         src_trim
87             .iter()
88             .zip(dst_trim.iter_mut())
89             .for_each(|(from, to)| {
90                 *to = {
91                     let unit = *from;
92                     if unit < 0x80 {
93                         u16::from(unit)
94                     } else {
95                         u16::from(unit) + 0xF700
96                     }
97                 }
98             });
99         (pending, length, length)
100     }
101 
102     #[cfg(feature = "simd-accel")]
decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize)103     pub fn decode_to_utf16_raw(
104         &mut self,
105         src: &[u8],
106         dst: &mut [u16],
107         _last: bool,
108     ) -> (DecoderResult, usize, usize) {
109         let (pending, length) = if dst.len() < src.len() {
110             (DecoderResult::OutputFull, dst.len())
111         } else {
112             (DecoderResult::InputEmpty, src.len())
113         };
114         // Not bothering with alignment
115         let tail_start = length & !0xF;
116         let simd_iterations = length >> 4;
117         let src_ptr = src.as_ptr();
118         let dst_ptr = dst.as_mut_ptr();
119         for i in 0..simd_iterations {
120             let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
121             let (first, second) = simd_unpack(input);
122             unsafe {
123                 store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
124                 store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
125             }
126         }
127         let src_tail = &src[tail_start..length];
128         let dst_tail = &mut dst[tail_start..length];
129         src_tail
130             .iter()
131             .zip(dst_tail.iter_mut())
132             .for_each(|(from, to)| {
133                 *to = {
134                     let unit = *from;
135                     if unit < 0x80 {
136                         u16::from(unit)
137                     } else {
138                         u16::from(unit) + 0xF700
139                     }
140                 }
141             });
142         (pending, length, length)
143     }
144 }
145 
146 pub struct UserDefinedEncoder;
147 
148 impl UserDefinedEncoder {
new(encoding: &'static Encoding) -> Encoder149     pub fn new(encoding: &'static Encoding) -> Encoder {
150         Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder))
151     }
152 
max_buffer_length_from_utf16_without_replacement( &self, u16_length: usize, ) -> Option<usize>153     pub fn max_buffer_length_from_utf16_without_replacement(
154         &self,
155         u16_length: usize,
156     ) -> Option<usize> {
157         Some(u16_length)
158     }
159 
max_buffer_length_from_utf8_without_replacement( &self, byte_length: usize, ) -> Option<usize>160     pub fn max_buffer_length_from_utf8_without_replacement(
161         &self,
162         byte_length: usize,
163     ) -> Option<usize> {
164         Some(byte_length)
165     }
166 
167     encoder_functions!(
168         {},
169         {
170             if c <= '\u{7F}' {
171                 // TODO optimize ASCII run
172                 destination_handle.write_one(c as u8);
173                 continue;
174             }
175             if c < '\u{F780}' || c > '\u{F7FF}' {
176                 return (
177                     EncoderResult::Unmappable(c),
178                     unread_handle.consumed(),
179                     destination_handle.written(),
180                 );
181             }
182             destination_handle.write_one((u32::from(c) - 0xF700) as u8);
183             continue;
184         },
185         self,
186         src_consumed,
187         source,
188         dest,
189         c,
190         destination_handle,
191         unread_handle,
192         check_space_one
193     );
194 }
195 
196 // Any copyright to the test code below this comment is dedicated to the
197 // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
198 
199 #[cfg(test)]
200 mod tests {
201     use super::super::testing::*;
202     use super::super::*;
203 
decode_x_user_defined(bytes: &[u8], expect: &str)204     fn decode_x_user_defined(bytes: &[u8], expect: &str) {
205         decode(X_USER_DEFINED, bytes, expect);
206     }
207 
encode_x_user_defined(string: &str, expect: &[u8])208     fn encode_x_user_defined(string: &str, expect: &[u8]) {
209         encode(X_USER_DEFINED, string, expect);
210     }
211 
212     #[test]
test_x_user_defined_decode()213     fn test_x_user_defined_decode() {
214         // Empty
215         decode_x_user_defined(b"", "");
216 
217         // ASCII
218         decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}");
219 
220         decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}");
221         decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}");
222     }
223 
224     #[test]
test_x_user_defined_encode()225     fn test_x_user_defined_encode() {
226         // Empty
227         encode_x_user_defined("", b"");
228 
229         // ASCII
230         encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62");
231 
232         encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF");
233         encode_x_user_defined("\u{F77F}\u{F800}", b"&#63359;&#63488;");
234     }
235 
236     #[test]
test_x_user_defined_from_two_low_surrogates()237     fn test_x_user_defined_from_two_low_surrogates() {
238         let expectation = b"&#65533;&#65533;";
239         let mut output = [0u8; 40];
240         let mut encoder = X_USER_DEFINED.new_encoder();
241         let (result, read, written, had_errors) =
242             encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
243         assert_eq!(result, CoderResult::InputEmpty);
244         assert_eq!(read, 2);
245         assert_eq!(written, expectation.len());
246         assert!(had_errors);
247         assert_eq!(&output[..written], expectation);
248     }
249 }
250