1 // This is a part of rust-encoding.
2 // Copyright (c) 2013-2015, Kang Seonghoon.
3 // See README.md and LICENSE.txt for details.
4 
5 //! UTF-16.
6 
7 use std::convert::Into;
8 use std::marker::PhantomData;
9 use util::as_char;
10 use types::*;
11 
12 /// An implementation type for little endian.
13 ///
14 /// Can be used as a type parameter to `UTF16Encoding`, `UTF16Encoder` and `UTF16Decoder`.
15 #[derive(Clone, Copy)]
16 pub struct Little;
17 
18 /// An implementation type for big endian.
19 ///
20 /// Can be used as a type parameter to `UTF16Encoding`, `UTF16Encoder` and `UTF16Decoder`.
21 #[derive(Clone, Copy)]
22 pub struct Big;
23 
24 /// An internal trait used to customize UTF-16 implementations.
25 #[doc(hidden)] // XXX never intended to be used publicly, should be gone later
26 pub trait Endian: Clone + 'static {
name() -> &'static str27     fn name() -> &'static str;
whatwg_name() -> Option<&'static str>28     fn whatwg_name() -> Option<&'static str>;
write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8)29     fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8);
concat_two_bytes(lead: u16, trail: u8) -> u1630     fn concat_two_bytes(lead: u16, trail: u8) -> u16;
31 }
32 
33 impl Endian for Little {
name() -> &'static str34     fn name() -> &'static str { "utf-16le" }
whatwg_name() -> Option<&'static str>35     fn whatwg_name() -> Option<&'static str> { Some("utf-16le") }
write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8)36     fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8) {
37         output.write_byte(lsb);
38         output.write_byte(msb);
39     }
concat_two_bytes(lead: u16, trail: u8) -> u1640     fn concat_two_bytes(lead: u16, trail: u8) -> u16 {
41         lead | ((trail as u16) << 8)
42     }
43 }
44 
45 impl Endian for Big {
name() -> &'static str46     fn name() -> &'static str { "utf-16be" }
whatwg_name() -> Option<&'static str>47     fn whatwg_name() -> Option<&'static str> { Some("utf-16be") }
write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8)48     fn write_two_bytes(output: &mut ByteWriter, msb: u8, lsb: u8) {
49         output.write_byte(msb);
50         output.write_byte(lsb);
51     }
concat_two_bytes(lead: u16, trail: u8) -> u1652     fn concat_two_bytes(lead: u16, trail: u8) -> u16 {
53         (lead << 8) | trail as u16
54     }
55 }
56 
57 /**
58  * UTF-16 (UCS Transformation Format, 16-bit).
59  *
60  * This is a Unicode encoding where one codepoint may use
61  * 2 (up to U+FFFF) or 4 bytes (up to U+10FFFF) depending on its value.
62  * It uses a "surrogate" mechanism to encode non-BMP codepoints,
63  * which are represented as a pair of lower surrogate and upper surrogate characters.
64  * In this effect, surrogate characters (U+D800..DFFF) cannot appear alone
65  * and cannot be included in a valid Unicode string.
66  *
67  * ## Specialization
68  *
69  * This type is specialized with endianness type `E`,
70  * which should be either `Little` (little endian) or `Big` (big endian).
71  */
72 #[derive(Clone, Copy)]
73 pub struct UTF16Encoding<E> {
74     _marker: PhantomData<E>
75 }
76 
77 /// A type for UTF-16 in little endian.
78 pub type UTF16LEEncoding = UTF16Encoding<Little>;
79 /// A type for UTF-16 in big endian.
80 pub type UTF16BEEncoding = UTF16Encoding<Big>;
81 
82 /// An instance for UTF-16 in little endian.
83 pub const UTF_16LE_ENCODING: UTF16LEEncoding = UTF16Encoding { _marker: PhantomData };
84 /// An instance for UTF-16 in big endian.
85 pub const UTF_16BE_ENCODING: UTF16BEEncoding = UTF16Encoding { _marker: PhantomData };
86 
87 impl<E: Endian> Encoding for UTF16Encoding<E> {
name(&self) -> &'static str88     fn name(&self) -> &'static str { <E as Endian>::name() }
whatwg_name(&self) -> Option<&'static str>89     fn whatwg_name(&self) -> Option<&'static str> { <E as Endian>::whatwg_name() }
raw_encoder(&self) -> Box<RawEncoder>90     fn raw_encoder(&self) -> Box<RawEncoder> { UTF16Encoder::<E>::new() }
raw_decoder(&self) -> Box<RawDecoder>91     fn raw_decoder(&self) -> Box<RawDecoder> { UTF16Decoder::<E>::new() }
92 }
93 
94 /**
95  * An encoder for UTF-16.
96  *
97  * ## Specialization
98  *
99  * This type is specialized with endianness type `E`,
100  * which should be either `Little` (little endian) or `Big` (big endian).
101  */
102 #[derive(Clone, Copy)]
103 pub struct UTF16Encoder<E> {
104     _marker: PhantomData<E>
105 }
106 
107 impl<E: Endian> UTF16Encoder<E> {
new() -> Box<RawEncoder>108     fn new() -> Box<RawEncoder> {
109         Box::new(UTF16Encoder::<E> { _marker: PhantomData })
110     }
111 }
112 
113 impl<E: Endian> RawEncoder for UTF16Encoder<E> {
from_self(&self) -> Box<RawEncoder>114     fn from_self(&self) -> Box<RawEncoder> { UTF16Encoder::<E>::new() }
115 
raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>)116     fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
117         output.writer_hint(input.len() * 2);
118 
119         let write_two_bytes = |output: &mut ByteWriter, msb: u8, lsb: u8|
120             <E as Endian>::write_two_bytes(output, msb, lsb);
121 
122         for ch in input.chars() {
123             match ch {
124                 '\u{0}'...'\u{d7ff}' | '\u{e000}'...'\u{ffff}' => {
125                     let ch = ch as u32;
126                     write_two_bytes(output, (ch >> 8) as u8, (ch & 0xff) as u8);
127                 }
128                 '\u{10000}'...'\u{10ffff}' => {
129                     let ch = ch as u32 - 0x10000;
130                     write_two_bytes(output, (0xd8 | (ch >> 18)) as u8,
131                                             ((ch >> 10) & 0xff) as u8);
132                     write_two_bytes(output, (0xdc | ((ch >> 8) & 0x3)) as u8,
133                                             (ch & 0xff) as u8);
134                 }
135                 _ => unreachable!() // XXX Rust issue #12483, this is redundant
136             }
137         }
138         (input.len(), None)
139     }
140 
raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError>141     fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
142         None
143     }
144 }
145 
146 /**
147  * A decoder for UTF-16.
148  *
149  * ## Specialization
150  *
151  * This type is specialized with endianness type `E`,
152  * which should be either `Little` (little endian) or `Big` (big endian).
153  */
154 pub struct UTF16Decoder<E> {
155     leadbyte: u16,
156     leadsurrogate: u16,
157     _marker: PhantomData<E>
158 }
159 
160 impl<E: Endian> UTF16Decoder<E> {
new() -> Box<RawDecoder>161     pub fn new() -> Box<RawDecoder> {
162         Box::new(UTF16Decoder::<E> { leadbyte: 0xffff, leadsurrogate: 0xffff,
163                                      _marker: PhantomData })
164     }
165 }
166 
167 impl<E: Endian> RawDecoder for UTF16Decoder<E> {
from_self(&self) -> Box<RawDecoder>168     fn from_self(&self) -> Box<RawDecoder> { UTF16Decoder::<E>::new() }
169 
raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>)170     fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
171         output.writer_hint(input.len() / 2); // when every codepoint is U+0000..007F
172 
173         let concat_two_bytes = |lead: u16, trail: u8|
174             <E as Endian>::concat_two_bytes(lead, trail);
175 
176         let mut i = 0;
177         let mut processed = 0;
178         let len = input.len();
179 
180         if i >= len { return (processed, None); }
181 
182         if self.leadbyte != 0xffff {
183             let ch = concat_two_bytes(self.leadbyte, input[i]);
184             i += 1;
185             self.leadbyte = 0xffff;
186             if self.leadsurrogate != 0xffff { // `ch` is lower surrogate
187                 let upper = self.leadsurrogate;
188                 self.leadsurrogate = 0xffff;
189                 match ch {
190                     0xdc00...0xdfff => {
191                         let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
192                         output.write_char(as_char(ch + 0x10000));
193                         processed = i;
194                     }
195                     _ => {
196                         return (processed, Some(CodecError {
197                             upto: i as isize - 2, cause: "invalid sequence".into()
198                         }));
199                     }
200                 }
201             } else {
202                 match ch {
203                     0xd800...0xdbff => {
204                         self.leadsurrogate = ch;
205                         // pass through
206                     }
207                     0xdc00...0xdfff => {
208                         return (processed, Some(CodecError {
209                             upto: i as isize, cause: "invalid sequence".into()
210                         }));
211                     }
212                     _ => {
213                         output.write_char(as_char(ch as u32));
214                         processed = i;
215                     }
216                 }
217             }
218             if i >= len { return (processed, None); }
219         }
220 
221         if self.leadsurrogate != 0xffff {
222             i += 1;
223             if i >= len {
224                 self.leadbyte = input[i-1] as u16;
225                 return (processed, None);
226             }
227             let upper = self.leadsurrogate;
228             let ch = concat_two_bytes(input[i-1] as u16, input[i]);
229             i += 1;
230             match ch {
231                 0xdc00...0xdfff => {
232                     let ch = ((upper as u32 - 0xd800) << 10) + (ch as u32 - 0xdc00);
233                     output.write_char(as_char(ch + 0x10000));
234                 }
235                 _ => {
236                     self.leadbyte = 0xffff;
237                     self.leadsurrogate = 0xffff;
238                     return (processed, Some(CodecError {
239                         upto: i as isize - 2, cause: "invalid sequence".into()
240                     }));
241                 }
242             }
243         }
244 
245         self.leadbyte = 0xffff;
246         self.leadsurrogate = 0xffff;
247         processed = i;
248         while i < len {
249             i += 1;
250             if i >= len {
251                 self.leadbyte = input[i-1] as u16;
252                 break;
253             }
254             let ch = concat_two_bytes(input[i-1] as u16, input[i]);
255             match ch {
256                 0xd800...0xdbff => {
257                     i += 2;
258                     if i >= len {
259                         self.leadsurrogate = ch;
260                         if i-1 < len { self.leadbyte = input[i-1] as u16; }
261                         break;
262                     }
263                     let ch2 = concat_two_bytes(input[i-1] as u16, input[i]);
264                     match ch2 {
265                         0xdc00...0xdfff => {
266                             let ch = ((ch as u32 - 0xd800) << 10) + (ch2 as u32 - 0xdc00);
267                             output.write_char(as_char(ch + 0x10000));
268                         }
269                         _ => {
270                             return (processed, Some(CodecError {
271                                 upto: i as isize - 1, cause: "invalid sequence".into()
272                             }));
273                         }
274                     }
275                 }
276                 0xdc00...0xdfff => {
277                     return (processed, Some(CodecError {
278                         upto: i as isize + 1, cause: "invalid sequence".into()
279                     }));
280                 }
281                 _ => {
282                     output.write_char(as_char(ch as u32));
283                 }
284             }
285             i += 1;
286             processed = i;
287         }
288         (processed, None)
289     }
290 
raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError>291     fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
292         let leadbyte = self.leadbyte;
293         let leadsurrogate = self.leadsurrogate;
294         self.leadbyte = 0xffff;
295         self.leadsurrogate = 0xffff;
296         if leadbyte != 0xffff || leadsurrogate != 0xffff {
297             Some(CodecError { upto: 0, cause: "incomplete sequence".into() })
298         } else {
299             None
300         }
301     }
302 }
303 
304 #[cfg(test)]
305 mod tests {
306     // little endian and big endian is symmetric to each other, there's no need to test both.
307     // since big endian is easier to inspect we test UTF_16BE only.
308 
309     use super::UTF_16BE_ENCODING as UTF_16BE;
310     use types::*;
311 
312     #[test]
test_encoder_valid()313     fn test_encoder_valid() {
314         let mut e = UTF_16BE.raw_encoder();
315         assert_feed_ok!(e, "\u{0}\
316                             \u{1}\u{02}\u{004}\u{0008}\
317                             \u{10}\u{020}\u{0040}\u{80}\
318                             \u{100}\u{0200}\u{400}\u{800}\
319                             \u{1000}\u{2000}\u{4000}\u{8000}\
320                             \u{ffff}", "",
321                         [0x00, 0x00,
322                          0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08,
323                          0x00, 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80,
324                          0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
325                          0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
326                          0xff, 0xff]);
327         assert_feed_ok!(e, "\u{10000}\
328                             \u{10001}\u{010002}\
329                             \u{10004}\u{010008}\
330                             \u{10010}\u{010020}\
331                             \u{10040}\u{010080}\
332                             \u{10100}\u{010200}\
333                             \u{10400}\u{010800}\
334                             \u{11000}\u{012000}\
335                             \u{14000}\u{018000}\
336                             \u{20000}\u{030000}\
337                             \u{50000}\u{090000}\
338                             \u{10FFFF}", "",
339                         [0xd8, 0x00, 0xdc, 0x00,
340                          0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02,
341                          0xd8, 0x00, 0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08,
342                          0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
343                          0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80,
344                          0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00, 0xde, 0x00,
345                          0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00,
346                          0xd8, 0x04, 0xdc, 0x00, 0xd8, 0x08, 0xdc, 0x00,
347                          0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00,
348                          0xd8, 0x40, 0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00,
349                          0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
350                          0xdb, 0xff, 0xdf, 0xff]);
351         assert_finish_ok!(e, []);
352     }
353 
354     #[test]
test_decoder_valid()355     fn test_decoder_valid() {
356         let mut d = UTF_16BE.raw_decoder();
357         assert_feed_ok!(d, [0x00, 0x00,
358                             0x00, 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08,
359                             0x00, 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80,
360                             0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00,
361                             0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00,
362                             0xff, 0xff], [],
363                         "\u{0}\
364                          \u{1}\u{02}\u{004}\u{0008}\
365                          \u{10}\u{020}\u{0040}\u{80}\
366                          \u{100}\u{0200}\u{400}\u{800}\
367                          \u{1000}\u{2000}\u{4000}\u{8000}\
368                          \u{ffff}");
369         assert_feed_ok!(d, [0xd8, 0x00, 0xdc, 0x00,
370                             0xd8, 0x00, 0xdc, 0x01, 0xd8, 0x00, 0xdc, 0x02,
371                             0xd8, 0x00, 0xdc, 0x04, 0xd8, 0x00, 0xdc, 0x08,
372                             0xd8, 0x00, 0xdc, 0x10, 0xd8, 0x00, 0xdc, 0x20,
373                             0xd8, 0x00, 0xdc, 0x40, 0xd8, 0x00, 0xdc, 0x80,
374                             0xd8, 0x00, 0xdd, 0x00, 0xd8, 0x00, 0xde, 0x00,
375                             0xd8, 0x01, 0xdc, 0x00, 0xd8, 0x02, 0xdc, 0x00,
376                             0xd8, 0x04, 0xdc, 0x00, 0xd8, 0x08, 0xdc, 0x00,
377                             0xd8, 0x10, 0xdc, 0x00, 0xd8, 0x20, 0xdc, 0x00,
378                             0xd8, 0x40, 0xdc, 0x00, 0xd8, 0x80, 0xdc, 0x00,
379                             0xd9, 0x00, 0xdc, 0x00, 0xda, 0x00, 0xdc, 0x00,
380                             0xdb, 0xff, 0xdf, 0xff], [],
381                         "\u{10000}\
382                          \u{10001}\u{010002}\
383                          \u{10004}\u{010008}\
384                          \u{10010}\u{010020}\
385                          \u{10040}\u{010080}\
386                          \u{10100}\u{010200}\
387                          \u{10400}\u{010800}\
388                          \u{11000}\u{012000}\
389                          \u{14000}\u{018000}\
390                          \u{20000}\u{030000}\
391                          \u{50000}\u{090000}\
392                          \u{10FFFF}");
393         assert_finish_ok!(d, "");
394     }
395 
396     #[test]
test_decoder_valid_partial_bmp()397     fn test_decoder_valid_partial_bmp() {
398         let mut d = UTF_16BE.raw_decoder();
399         assert_feed_ok!(d, [], [0x12], "");
400         assert_feed_ok!(d, [0x34], [], "\u{1234}");
401         assert_feed_ok!(d, [], [0x56], "");
402         assert_feed_ok!(d, [0x78], [], "\u{5678}");
403         assert_finish_ok!(d, "");
404 
405         let mut d = UTF_16BE.raw_decoder();
406         assert_feed_ok!(d, [], [0x12], "");
407         assert_feed_ok!(d, [0x34], [0x56], "\u{1234}");
408         assert_feed_ok!(d, [0x78, 0xab, 0xcd], [], "\u{5678}\u{abcd}");
409         assert_finish_ok!(d, "");
410     }
411 
412     #[test]
test_decoder_valid_partial_non_bmp()413     fn test_decoder_valid_partial_non_bmp() {
414         let mut d = UTF_16BE.raw_decoder();
415         assert_feed_ok!(d, [], [0xd8], "");
416         assert_feed_ok!(d, [], [0x08], "");
417         assert_feed_ok!(d, [], [0xdf], "");
418         assert_feed_ok!(d, [0x45], [0xd9], "\u{12345}");
419         assert_feed_ok!(d, [], [0x5e], "");
420         assert_feed_ok!(d, [], [0xdc], "");
421         assert_feed_ok!(d, [0x90], [], "\u{67890}");
422         assert_finish_ok!(d, "");
423 
424         let mut d = UTF_16BE.raw_decoder();
425         assert_feed_ok!(d, [], [0xd8], "");
426         assert_feed_ok!(d, [], [0x08, 0xdf], "");
427         assert_feed_ok!(d, [0x45], [0xd9, 0x5e], "\u{12345}");
428         assert_feed_ok!(d, [0xdc, 0x90], [], "\u{67890}");
429         assert_finish_ok!(d, "");
430 
431         let mut d = UTF_16BE.raw_decoder();
432         assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
433         assert_feed_ok!(d, [0x45], [0xd9, 0x5e, 0xdc], "\u{12345}");
434         assert_feed_ok!(d, [0x90], [], "\u{67890}");
435         assert_finish_ok!(d, "");
436     }
437 
438     #[test]
test_decoder_invalid_partial()439     fn test_decoder_invalid_partial() {
440         let mut d = UTF_16BE.raw_decoder();
441         assert_feed_ok!(d, [], [0x12], "");
442         assert_finish_err!(d, "");
443 
444         let mut d = UTF_16BE.raw_decoder();
445         assert_feed_ok!(d, [], [0xd8], "");
446         assert_finish_err!(d, "");
447 
448         let mut d = UTF_16BE.raw_decoder();
449         assert_feed_ok!(d, [], [0xd8, 0x08], "");
450         assert_finish_err!(d, "");
451 
452         let mut d = UTF_16BE.raw_decoder();
453         assert_feed_ok!(d, [], [0xd8, 0x08, 0xdf], "");
454         assert_finish_err!(d, "");
455     }
456 
457     #[test]
test_decoder_invalid_lone_upper_surrogate()458     fn test_decoder_invalid_lone_upper_surrogate() {
459         let mut d = UTF_16BE.raw_decoder();
460         assert_feed_ok!(d, [], [0xd8, 0x00], "");
461         assert_feed_err!(d, [], [], [0x12, 0x34], "");
462         assert_feed_err!(d, [], [0xd8, 0x00], [0x56, 0x78], "");
463         assert_feed_ok!(d, [], [0xd8, 0x00], "");
464         assert_feed_err!(d, [], [], [0xd8, 0x00], "");
465         assert_feed_ok!(d, [], [0xd8, 0x00], "");
466         assert_finish_err!(d, "");
467 
468         let mut d = UTF_16BE.raw_decoder();
469         assert_feed_ok!(d, [], [0xdb, 0xff], "");
470         assert_feed_err!(d, [], [], [0x12, 0x34], "");
471         assert_feed_err!(d, [], [0xdb, 0xff], [0x56, 0x78], "");
472         assert_feed_ok!(d, [], [0xdb, 0xff], "");
473         assert_feed_err!(d, [], [], [0xdb, 0xff], "");
474         assert_feed_ok!(d, [], [0xdb, 0xff], "");
475         assert_finish_err!(d, "");
476     }
477 
478     #[test]
test_decoder_invalid_lone_upper_surrogate_partial()479     fn test_decoder_invalid_lone_upper_surrogate_partial() {
480         let mut d = UTF_16BE.raw_decoder();
481         assert_feed_ok!(d, [], [0xd8], "");
482         assert_feed_err!(d, [], [0x00], [0x12, 0x34], "");
483         assert_feed_ok!(d, [], [0xd8, 0x00, 0x56], "");
484         assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
485         assert_feed_ok!(d, [], [0xd8], "");
486         assert_feed_err!(d, [], [0x00], [0xd8, 0x00], "");
487         assert_feed_ok!(d, [], [0xd8, 0x00, 0xdb], "");
488         assert_feed_err!(d, -1, [], [], [0xdb, 0xff], "");
489         assert_feed_ok!(d, [], [0xd8], "");
490         assert_finish_err!(d, "");
491 
492         let mut d = UTF_16BE.raw_decoder();
493         assert_feed_ok!(d, [], [0xdb], "");
494         assert_feed_err!(d, [], [0xff], [0x12, 0x34], "");
495         assert_feed_ok!(d, [], [0xdb, 0xff, 0x56], "");
496         assert_feed_err!(d, -1, [], [], [0x56, 0x78], "");
497         assert_feed_ok!(d, [], [0xdb], "");
498         assert_feed_err!(d, [], [0xff], [0xdb, 0xff], "");
499         assert_feed_ok!(d, [], [0xdb, 0xff, 0xd8], "");
500         assert_feed_err!(d, -1, [], [], [0xd8, 0x00], "");
501         assert_feed_ok!(d, [], [0xdb], "");
502         assert_finish_err!(d, "");
503     }
504 
505     #[test]
test_decoder_invalid_lone_lower_surrogate()506     fn test_decoder_invalid_lone_lower_surrogate() {
507         let mut d = UTF_16BE.raw_decoder();
508         assert_feed_err!(d, [], [0xdc, 0x00], [], "");
509         assert_feed_err!(d, [0x12, 0x34], [0xdc, 0x00], [0x56, 0x78], "\u{1234}");
510         assert_finish_ok!(d, "");
511 
512         let mut d = UTF_16BE.raw_decoder();
513         assert_feed_err!(d, [], [0xdf, 0xff], [], "");
514         assert_feed_err!(d, [0x12, 0x34], [0xdf, 0xff], [0x56, 0x78], "\u{1234}");
515         assert_finish_ok!(d, "");
516     }
517 
518     #[test]
test_decoder_invalid_lone_lower_surrogate_partial()519     fn test_decoder_invalid_lone_lower_surrogate_partial() {
520         let mut d = UTF_16BE.raw_decoder();
521         assert_feed_ok!(d, [], [0xdc], "");
522         assert_feed_err!(d, [], [0x00], [], "");
523         assert_feed_ok!(d, [0x12, 0x34], [0xdc], "\u{1234}");
524         assert_feed_err!(d, [], [0x00], [0x56, 0x78], "");
525         assert_finish_ok!(d, "");
526 
527         assert_feed_ok!(d, [], [0xdf], "");
528         assert_feed_err!(d, [], [0xff], [], "");
529         assert_feed_ok!(d, [0x12, 0x34], [0xdf], "\u{1234}");
530         assert_feed_err!(d, [], [0xff], [0x56, 0x78], "");
531         assert_finish_ok!(d, "");
532     }
533 
534     #[test]
test_decoder_invalid_one_byte_before_finish()535     fn test_decoder_invalid_one_byte_before_finish() {
536         let mut d = UTF_16BE.raw_decoder();
537         assert_feed_ok!(d, [], [0x12], "");
538         assert_finish_err!(d, "");
539 
540         let mut d = UTF_16BE.raw_decoder();
541         assert_feed_ok!(d, [0x12, 0x34], [0x56], "\u{1234}");
542         assert_finish_err!(d, "");
543     }
544 
545     #[test]
test_decoder_invalid_three_bytes_before_finish()546     fn test_decoder_invalid_three_bytes_before_finish() {
547         let mut d = UTF_16BE.raw_decoder();
548         assert_feed_ok!(d, [], [0xd8, 0x00, 0xdc], "");
549         assert_finish_err!(d, "");
550 
551         let mut d = UTF_16BE.raw_decoder();
552         assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00, 0xdc], "\u{1234}");
553         assert_finish_err!(d, "");
554     }
555 
556     #[test]
test_decoder_invalid_three_bytes_before_finish_partial()557     fn test_decoder_invalid_three_bytes_before_finish_partial() {
558         let mut d = UTF_16BE.raw_decoder();
559         assert_feed_ok!(d, [], [0xd8], "");
560         assert_feed_ok!(d, [], [0x00], "");
561         assert_feed_ok!(d, [], [0xdc], "");
562         assert_finish_err!(d, "");
563 
564         let mut d = UTF_16BE.raw_decoder();
565         assert_feed_ok!(d, [0x12, 0x34], [0xd8], "\u{1234}");
566         assert_feed_ok!(d, [], [0x00, 0xdc], "");
567         assert_finish_err!(d, "");
568 
569         let mut d = UTF_16BE.raw_decoder();
570         assert_feed_ok!(d, [0x12, 0x34], [0xd8, 0x00], "\u{1234}");
571         assert_feed_ok!(d, [], [0xdc], "");
572         assert_finish_err!(d, "");
573     }
574 
575     #[test]
test_decoder_feed_after_finish()576     fn test_decoder_feed_after_finish() {
577         let mut d = UTF_16BE.raw_decoder();
578         assert_feed_ok!(d, [0x12, 0x34], [0x12], "\u{1234}");
579         assert_finish_err!(d, "");
580         assert_feed_ok!(d, [0x12, 0x34], [], "\u{1234}");
581         assert_finish_ok!(d, "");
582 
583         let mut d = UTF_16BE.raw_decoder();
584         assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08, 0xdf], "\u{12345}");
585         assert_finish_err!(d, "");
586         assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8, 0x08], "\u{12345}");
587         assert_finish_err!(d, "");
588         assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [0xd8], "\u{12345}");
589         assert_finish_err!(d, "");
590         assert_feed_ok!(d, [0xd8, 0x08, 0xdf, 0x45], [], "\u{12345}");
591         assert_finish_ok!(d, "");
592     }
593 }
594 
595