1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT
2 // file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9 
10 use handles::*;
11 use data::*;
12 use variant::*;
13 use super::*;
14 // Rust 1.14.0 requires the following despite the asterisk above.
15 use super::in_inclusive_range16;
16 
17 #[derive(Copy,Clone)]
18 enum Iso2022JpDecoderState {
19     Ascii,
20     Roman,
21     Katakana,
22     LeadByte,
23     TrailByte,
24     EscapeStart,
25     Escape,
26 }
27 
28 pub struct Iso2022JpDecoder {
29     decoder_state: Iso2022JpDecoderState,
30     output_state: Iso2022JpDecoderState, // only takes 1 of first 4 values
31     lead: u8,
32     output_flag: bool,
33     pending_prepended: bool,
34 }
35 
36 impl Iso2022JpDecoder {
new() -> VariantDecoder37     pub fn new() -> VariantDecoder {
38         VariantDecoder::Iso2022Jp(
39             Iso2022JpDecoder {
40                 decoder_state: Iso2022JpDecoderState::Ascii,
41                 output_state: Iso2022JpDecoderState::Ascii,
42                 lead: 0u8,
43                 output_flag: false,
44                 pending_prepended: false,
45             }
46         )
47     }
48 
extra_to_input_from_state(&self, byte_length: usize) -> Option<usize>49     fn extra_to_input_from_state(&self, byte_length: usize) -> Option<usize> {
50         byte_length.checked_add(
51             if self.lead == 0 || self.pending_prepended {
52                 0
53             } else {
54                 1
55             } +
56             match self.decoder_state {
57                 Iso2022JpDecoderState::Escape |
58                 Iso2022JpDecoderState::EscapeStart => 1,
59                 _ => 0,
60             }
61         )
62     }
63 
extra_to_output_from_state(&self) -> usize64     fn extra_to_output_from_state(&self) -> usize {
65         if self.lead != 0 && self.pending_prepended {
66             1 + self.output_flag as usize
67         } else {
68             self.output_flag as usize
69         }
70     }
71 
max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize>72     pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
73         checked_add(
74             self.extra_to_output_from_state(),
75             self.extra_to_input_from_state(byte_length),
76         )
77     }
78 
max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize>79     pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
80         // worst case: 1 to 3 (half-width katakana)
81         self.max_utf8_buffer_length(byte_length)
82     }
83 
max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize>84     pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
85         checked_mul(
86             3,
87             checked_add(
88                 self.extra_to_output_from_state(),
89                 self.extra_to_input_from_state(byte_length),
90             ),
91         )
92     }
93 
94     decoder_functions!(
95         {
96             if self.pending_prepended {
97                 // lead was set in EscapeStart and "prepended"
98                 // in Escape.
99                 debug_assert!(self.lead == 0x24u8 || self.lead == 0x28u8);
100                 match dest.check_space_bmp() {
101                     Space::Full(_) => {
102                         return (DecoderResult::OutputFull, 0, 0);
103                     }
104                     Space::Available(destination_handle) => {
105                         self.pending_prepended = false;
106                         self.output_flag = false;
107                         match self.decoder_state {
108                             Iso2022JpDecoderState::Ascii |
109                             Iso2022JpDecoderState::Roman => {
110                                 destination_handle.write_ascii(self.lead);
111                                 self.lead = 0x0u8;
112                             }
113                             Iso2022JpDecoderState::Katakana => {
114                                 destination_handle
115                                     .write_upper_bmp(self.lead as u16 - 0x21u16 + 0xFF61u16);
116                                 self.lead = 0x0u8;
117                             }
118                             Iso2022JpDecoderState::LeadByte => {
119                                 self.decoder_state = Iso2022JpDecoderState::TrailByte;
120                             }
121                             _ => unreachable!(),
122                         }
123                     }
124                 }
125             }
126         },
127         {},
128         {
129             match self.decoder_state {
130                 Iso2022JpDecoderState::TrailByte |
131                 Iso2022JpDecoderState::EscapeStart => {
132                     self.decoder_state = self.output_state;
133                     return (DecoderResult::Malformed(1, 0), src_consumed, dest.written());
134                 }
135                 Iso2022JpDecoderState::Escape => {
136                     self.pending_prepended = true;
137                     self.decoder_state = self.output_state;
138                     return (DecoderResult::Malformed(1, 1), src_consumed, dest.written());
139                 }
140                 _ => {}
141             }
142         },
143         {
144             match self.decoder_state {
145                 Iso2022JpDecoderState::Ascii => {
146                     if b == 0x1Bu8 {
147                         self.decoder_state = Iso2022JpDecoderState::EscapeStart;
148                         continue;
149                     }
150                     self.output_flag = false;
151                     if b > 0x7Fu8 || b == 0x0Eu8 || b == 0x0Fu8 {
152                         return (DecoderResult::Malformed(1, 0),
153                                 unread_handle.consumed(),
154                                 destination_handle.written());
155                     }
156                     destination_handle.write_ascii(b);
157                     continue;
158                 }
159                 Iso2022JpDecoderState::Roman => {
160                     if b == 0x1Bu8 {
161                         self.decoder_state = Iso2022JpDecoderState::EscapeStart;
162                         continue;
163                     }
164                     self.output_flag = false;
165                     if b == 0x5Cu8 {
166                         destination_handle.write_mid_bmp(0x00A5u16);
167                         continue;
168                     }
169                     if b == 0x7Eu8 {
170                         destination_handle.write_upper_bmp(0x203Eu16);
171                         continue;
172                     }
173                     if b > 0x7Fu8 || b == 0x0Eu8 || b == 0x0Fu8 {
174                         return (DecoderResult::Malformed(1, 0),
175                                 unread_handle.consumed(),
176                                 destination_handle.written());
177                     }
178                     destination_handle.write_ascii(b);
179                     continue;
180                 }
181                 Iso2022JpDecoderState::Katakana => {
182                     if b == 0x1Bu8 {
183                         self.decoder_state = Iso2022JpDecoderState::EscapeStart;
184                         continue;
185                     }
186                     self.output_flag = false;
187                     if b >= 0x21u8 && b <= 0x5Fu8 {
188                         destination_handle.write_upper_bmp(b as u16 - 0x21u16 + 0xFF61u16);
189                         continue;
190                     }
191                     return (DecoderResult::Malformed(1, 0),
192                             unread_handle.consumed(),
193                             destination_handle.written());
194                 }
195                 Iso2022JpDecoderState::LeadByte => {
196                     if b == 0x1Bu8 {
197                         self.decoder_state = Iso2022JpDecoderState::EscapeStart;
198                         continue;
199                     }
200                     self.output_flag = false;
201                     if b >= 0x21u8 && b <= 0x7Eu8 {
202                         self.lead = b;
203                         self.decoder_state = Iso2022JpDecoderState::TrailByte;
204                         continue;
205                     }
206                     return (DecoderResult::Malformed(1, 0),
207                             unread_handle.consumed(),
208                             destination_handle.written());
209                 }
210                 Iso2022JpDecoderState::TrailByte => {
211                     if b == 0x1Bu8 {
212                         self.decoder_state = Iso2022JpDecoderState::EscapeStart;
213                         // The byte in error is the previous
214                         // lead byte.
215                         return (DecoderResult::Malformed(1, 1),
216                                 unread_handle.consumed(),
217                                 destination_handle.written());
218                     }
219                     self.decoder_state = Iso2022JpDecoderState::LeadByte;
220                     let jis0208_lead_minus_offset = self.lead - 0x21;
221                     let byte = b;
222                     let handle = destination_handle;
223                     // The code below uses else after continue in
224                     // order to retain the structure seen in EUC-JP.
225                     let trail_minus_offset = byte.wrapping_sub(0x21);
226                     // Fast-track Hiragana (60% according to Lunde)
227                     // and Katakana (10% acconding to Lunde).
228                     if jis0208_lead_minus_offset == 0x03 && trail_minus_offset < 0x53 {
229                         // Hiragana
230                         handle.write_upper_bmp(0x3041 + trail_minus_offset as u16);
231                         continue;
232                     } else if jis0208_lead_minus_offset == 0x04 && trail_minus_offset < 0x56 {
233                         // Katakana
234                         handle.write_upper_bmp(0x30A1 + trail_minus_offset as u16);
235                         continue;
236                     } else if trail_minus_offset > (0xFE - 0xA1) {
237                         return (DecoderResult::Malformed(2, 0),
238                                 unread_handle.consumed(),
239                                 handle.written());
240                     } else {
241                         let pointer = mul_94(jis0208_lead_minus_offset) +
242                                       trail_minus_offset as usize;
243                         let level1_pointer = pointer.wrapping_sub(1410);
244                         if level1_pointer < JIS0208_LEVEL1_KANJI.len() {
245                             handle.write_upper_bmp(JIS0208_LEVEL1_KANJI[level1_pointer]);
246                             continue;
247                         } else {
248                             let level2_pointer = pointer.wrapping_sub(4418);
249                             if level2_pointer < JIS0208_LEVEL2_AND_ADDITIONAL_KANJI.len() {
250                                 handle.write_upper_bmp(
251                                     JIS0208_LEVEL2_AND_ADDITIONAL_KANJI[level2_pointer],
252                                 );
253                                 continue;
254                             } else {
255                                 let ibm_pointer = pointer.wrapping_sub(8272);
256                                 if ibm_pointer < IBM_KANJI.len() {
257                                     handle.write_upper_bmp(IBM_KANJI[ibm_pointer]);
258                                     continue;
259                                 } else if let Some(bmp) = jis0208_symbol_decode(pointer) {
260                                     handle.write_bmp_excl_ascii(bmp);
261                                     continue;
262                                 } else if let Some(bmp) = jis0208_range_decode(pointer) {
263                                     handle.write_bmp_excl_ascii(bmp);
264                                     continue;
265                                 } else {
266                                     return (DecoderResult::Malformed(2, 0),
267                                             unread_handle.consumed(),
268                                             handle.written());
269                                 }
270                             }
271                         }
272                     }
273                 }
274                 Iso2022JpDecoderState::EscapeStart => {
275                     if b == 0x24u8 || b == 0x28u8 {
276                         self.lead = b;
277                         self.decoder_state = Iso2022JpDecoderState::Escape;
278                         continue;
279                     }
280                     self.output_flag = false;
281                     self.decoder_state = self.output_state;
282                     return (DecoderResult::Malformed(1, 0),
283                             unread_handle.unread(),
284                             destination_handle.written());
285                 }
286                 Iso2022JpDecoderState::Escape => {
287                     let mut state: Option<Iso2022JpDecoderState> = None;
288                     if self.lead == 0x28u8 && b == 0x42u8 {
289                         state = Some(Iso2022JpDecoderState::Ascii);
290                     } else if self.lead == 0x28u8 && b == 0x4Au8 {
291                         state = Some(Iso2022JpDecoderState::Roman);
292                     } else if self.lead == 0x28u8 && b == 0x49u8 {
293                         state = Some(Iso2022JpDecoderState::Katakana);
294                     } else if self.lead == 0x24u8 && (b == 0x40u8 || b == 0x42u8) {
295                         state = Some(Iso2022JpDecoderState::LeadByte);
296                     }
297                     match state {
298                         Some(s) => {
299                             self.lead = 0x0u8;
300                             self.decoder_state = s;
301                             self.output_state = s;
302                             let flag = self.output_flag;
303                             self.output_flag = true;
304                             if flag {
305                                 // We had an escape sequence
306                                 // immediately following another
307                                 // escape sequence. Therefore,
308                                 // the first one of these was
309                                 // useless.
310                                 return (DecoderResult::Malformed(3, 3),
311                                         unread_handle.consumed(),
312                                         destination_handle.written());
313                             }
314                             continue;
315                         }
316                         None => {
317                             // self.lead is still the previous
318                             // byte. It will be processed in
319                             // the preabmle upon next call.
320                             self.pending_prepended = true;
321                             self.output_flag = false;
322                             self.decoder_state = self.output_state;
323                             // The byte in error is not the
324                             // current or the previous byte but
325                             // the one before those (lone 0x1B).
326                             return (DecoderResult::Malformed(1, 1),
327                                     unread_handle.unread(),
328                                     destination_handle.written());
329                         }
330                     }
331                 }
332             }
333         },
334         self,
335         src_consumed,
336         dest,
337         source,
338         b,
339         destination_handle,
340         unread_handle,
341         check_space_bmp
342     );
343 }
344 
345 
346 #[cfg_attr(feature = "cargo-clippy", allow(if_let_redundant_pattern_matching, if_same_then_else))]
is_mapped_for_two_byte_encode(bmp: u16) -> bool347 fn is_mapped_for_two_byte_encode(bmp: u16) -> bool {
348     // The code below uses else after return to
349     // keep the same structure as in EUC-JP.
350     // Lunde says 60% Hiragana, 30% Kanji, 10% Katakana
351     let bmp_minus_hiragana = bmp.wrapping_sub(0x3041);
352     if bmp_minus_hiragana < 0x53 {
353         true
354     } else if in_inclusive_range16(bmp, 0x4E00, 0x9FA0) {
355         if 0x4EDD == bmp {
356             true
357         } else if let Some(_) = jis0208_level1_kanji_shift_jis_encode(bmp) {
358             // Use the shift_jis variant, because we don't care about the
359             // byte values here.
360             true
361         } else if let Some(_) = jis0208_level2_and_additional_kanji_encode(bmp) {
362             true
363         } else if let Some(_) = position(&IBM_KANJI[..], bmp) {
364             true
365         } else {
366             false
367         }
368     } else {
369         let bmp_minus_katakana = bmp.wrapping_sub(0x30A1);
370         if bmp_minus_katakana < 0x56 {
371             true
372         } else {
373             let bmp_minus_space = bmp.wrapping_sub(0x3000);
374             if bmp_minus_space < 3 {
375                 // fast-track common punctuation
376                 true
377             } else if in_inclusive_range16(bmp, 0xFF61, 0xFF9F) {
378                 true
379             } else if bmp == 0x2212 {
380                 true
381             } else if let Some(_) = jis0208_range_encode(bmp) {
382                 true
383             } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929 || bmp == 0xF9DC {
384                 true
385             } else if let Some(_) = ibm_symbol_encode(bmp) {
386                 true
387             } else if let Some(_) = jis0208_symbol_encode(bmp) {
388                 true
389             } else {
390                 false
391             }
392         }
393     }
394 }
395 
396 enum Iso2022JpEncoderState {
397     Ascii,
398     Roman,
399     Jis0208,
400 }
401 
402 pub struct Iso2022JpEncoder {
403     state: Iso2022JpEncoderState,
404 }
405 
406 impl Iso2022JpEncoder {
new(encoding: &'static Encoding) -> Encoder407     pub fn new(encoding: &'static Encoding) -> Encoder {
408         Encoder::new(
409             encoding,
410             VariantEncoder::Iso2022Jp(Iso2022JpEncoder { state: Iso2022JpEncoderState::Ascii }),
411         )
412     }
413 
has_pending_state(&self) -> bool414     pub fn has_pending_state(&self) -> bool {
415         match self.state {
416             Iso2022JpEncoderState::Ascii => false,
417             _ => true,
418         }
419     }
420 
421 
max_buffer_length_from_utf16_without_replacement(&self, u16_length: usize) -> Option<usize>422     pub fn max_buffer_length_from_utf16_without_replacement(&self,
423                                                             u16_length: usize)
424                                                             -> Option<usize> {
425         // Worst case: every other character is ASCII/Roman and every other
426         // JIS0208.
427         // Two UTF-16 input units:
428         // Transition to Roman: 3
429         // Roman/ASCII: 1
430         // Transition to JIS0208: 3
431         // JIS0208: 2
432         // End transition: 3
433         checked_add_opt(
434             checked_add(3, u16_length.checked_mul(4)),
435             checked_div(u16_length.checked_add(1), 2),
436         )
437     }
438 
max_buffer_length_from_utf8_without_replacement(&self, byte_length: usize) -> Option<usize>439     pub fn max_buffer_length_from_utf8_without_replacement(&self,
440                                                            byte_length: usize)
441                                                            -> Option<usize> {
442         // Worst case: every other character is ASCII/Roman and every other
443         // JIS0208.
444         // Three UTF-8 input units: 1 ASCII, 2 JIS0208
445         // Transition to ASCII: 3
446         // Roman/ASCII: 1
447         // Transition to JIS0208: 3
448         // JIS0208: 2
449         // End transition: 3
450         checked_add(3, byte_length.checked_mul(3))
451     }
452 
453     encoder_functions!(
454         {
455             match self.state {
456                 Iso2022JpEncoderState::Ascii => {}
457                 _ => {
458                     match dest.check_space_three() {
459                         Space::Full(dst_written) => {
460                             return (EncoderResult::OutputFull, src_consumed, dst_written);
461                         }
462                         Space::Available(destination_handle) => {
463                             self.state = Iso2022JpEncoderState::Ascii;
464                             destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8);
465                         }
466                     }
467                 }
468             }
469         },
470         {
471             match self.state {
472                 Iso2022JpEncoderState::Ascii => {
473                     if c == '\u{0E}' || c == '\u{0F}' || c == '\u{1B}' {
474                         return (EncoderResult::Unmappable('\u{FFFD}'),
475                                 unread_handle.consumed(),
476                                 destination_handle.written());
477                     }
478                     if c <= '\u{7F}' {
479                         destination_handle.write_one(c as u8);
480                         continue;
481                     }
482                     if c == '\u{A5}' || c == '\u{203E}' {
483                         self.state = Iso2022JpEncoderState::Roman;
484                         destination_handle.write_three(0x1Bu8, 0x28u8, 0x4Au8);
485                         unread_handle.unread();
486                         continue;
487                     }
488                     if c > '\u{FFFF}' {
489                         return (EncoderResult::Unmappable(c),
490                                 unread_handle.consumed(),
491                                 destination_handle.written());
492                     }
493                     // Yes, if c is in index, we'll search
494                     // again in the Jis0208 state, but this
495                     // encoder is not worth optimizing.
496                     if is_mapped_for_two_byte_encode(c as u16) {
497                         self.state = Iso2022JpEncoderState::Jis0208;
498                         destination_handle.write_three(0x1Bu8, 0x24u8, 0x42u8);
499                         unread_handle.unread();
500                         continue;
501                     }
502                     return (EncoderResult::Unmappable(c),
503                             unread_handle.consumed(),
504                             destination_handle.written());
505                 }
506                 Iso2022JpEncoderState::Roman => {
507                     if c == '\u{0E}' || c == '\u{0F}' || c == '\u{1B}' {
508                         return (EncoderResult::Unmappable('\u{FFFD}'),
509                                 unread_handle.consumed(),
510                                 destination_handle.written());
511                     }
512                     if c == '\u{5C}' || c == '\u{7E}' {
513                         self.state = Iso2022JpEncoderState::Ascii;
514                         destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8);
515                         unread_handle.unread();
516                         continue;
517                     }
518                     if c <= '\u{7F}' {
519                         destination_handle.write_one(c as u8);
520                         continue;
521                     }
522                     if c == '\u{A5}' {
523                         destination_handle.write_one(0x5Cu8);
524                         continue;
525                     }
526                     if c == '\u{203E}' {
527                         destination_handle.write_one(0x7Eu8);
528                         continue;
529                     }
530                     if c > '\u{FFFF}' {
531                         return (EncoderResult::Unmappable(c),
532                                 unread_handle.consumed(),
533                                 destination_handle.written());
534                     }
535                     // Yes, if c is in index, we'll search
536                     // again in the Jis0208 state, but this
537                     // encoder is not worth optimizing.
538                     if is_mapped_for_two_byte_encode(c as u16) {
539                         self.state = Iso2022JpEncoderState::Jis0208;
540                         destination_handle.write_three(0x1Bu8, 0x24u8, 0x42u8);
541                         unread_handle.unread();
542                         continue;
543                     }
544                     return (EncoderResult::Unmappable(c),
545                             unread_handle.consumed(),
546                             destination_handle.written());
547                 }
548                 Iso2022JpEncoderState::Jis0208 => {
549                     if c <= '\u{7F}' {
550                         self.state = Iso2022JpEncoderState::Ascii;
551                         destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8);
552                         unread_handle.unread();
553                         continue;
554                     }
555                     if c == '\u{A5}' || c == '\u{203E}' {
556                         self.state = Iso2022JpEncoderState::Roman;
557                         destination_handle.write_three(0x1Bu8, 0x28u8, 0x4Au8);
558                         unread_handle.unread();
559                         continue;
560                     }
561                     if c > '\u{FFFF}' {
562                         // Transition to ASCII here in order
563                         // not to make it the responsibility
564                         // of the caller.
565                         self.state = Iso2022JpEncoderState::Ascii;
566                         return (EncoderResult::Unmappable(c),
567                                 unread_handle.consumed(),
568                                 destination_handle
569                                     .write_three_return_written(0x1Bu8, 0x28u8, 0x42u8));
570                     }
571                     let bmp = c as u16;
572                     let handle = destination_handle;
573                     // The code below uses else after continue to
574                     // keep the same structure as in EUC-JP.
575                     // Lunde says 60% Hiragana, 30% Kanji, 10% Katakana
576                     let bmp_minus_hiragana = bmp.wrapping_sub(0x3041);
577                     if bmp_minus_hiragana < 0x53 {
578                         handle.write_two(0x24, 0x21 + bmp_minus_hiragana as u8);
579                         continue;
580                     } else if in_inclusive_range16(bmp, 0x4E00, 0x9FA0) {
581                         if 0x4EDD == bmp {
582                             // Ideograph on the symbol row!
583                             handle.write_two(0x21, 0xB8 - 0x80);
584                             continue;
585                         } else if let Some((lead, trail)) =
586                             jis0208_level1_kanji_iso_2022_jp_encode(bmp) {
587                             handle.write_two(lead, trail);
588                             continue;
589                         } else if let Some(pos) = jis0208_level2_and_additional_kanji_encode(bmp) {
590                             let lead = (pos / 94) + (0xD0 - 0x80);
591                             let trail = (pos % 94) + 0x21;
592                             handle.write_two(lead as u8, trail as u8);
593                             continue;
594                         } else if let Some(pos) = position(&IBM_KANJI[..], bmp) {
595                             let lead = (pos / 94) + (0xF9 - 0x80);
596                             let trail = (pos % 94) + 0x21;
597                             handle.write_two(lead as u8, trail as u8);
598                             continue;
599                         } else {
600                             self.state = Iso2022JpEncoderState::Ascii;
601                             return (EncoderResult::Unmappable(c),
602                                     unread_handle.consumed(),
603                                     handle.write_three_return_written(0x1Bu8, 0x28u8, 0x42u8));
604                         }
605                     } else {
606                         let bmp_minus_katakana = bmp.wrapping_sub(0x30A1);
607                         if bmp_minus_katakana < 0x56 {
608                             handle.write_two(0x25, 0x21 + bmp_minus_katakana as u8);
609                             continue;
610                         } else {
611                             let bmp_minus_space = bmp.wrapping_sub(0x3000);
612                             if bmp_minus_space < 3 {
613                                 // fast-track common punctuation
614                                 handle.write_two(0x21, 0x21 + bmp_minus_space as u8);
615                                 continue;
616                             }
617                             let bmp_minus_half_width = bmp.wrapping_sub(0xFF61);
618                             if bmp_minus_half_width <= (0xFF9F - 0xFF61) {
619                                 // We have half-width katakana. The lead is either
620                                 // row 1 or 5 of JIS X 0208, so the lookup table
621                                 // only stores the trail.
622                                 let lead = if bmp != 0xFF70 &&
623                                               in_inclusive_range16(bmp, 0xFF66, 0xFF9D) {
624                                     0x25u8
625                                 } else {
626                                     0x21u8
627                                 };
628                                 let trail = ISO_2022_JP_HALF_WIDTH_TRAIL[bmp_minus_half_width as
629                                 usize];
630                                 handle.write_two(lead, trail);
631                                 continue;
632                             } else if bmp == 0x2212 {
633                                 handle.write_two(0x21, 0x5D);
634                                 continue;
635                             } else if let Some(pointer) = jis0208_range_encode(bmp) {
636                                 let lead = (pointer / 94) + 0x21;
637                                 let trail = (pointer % 94) + 0x21;
638                                 handle.write_two(lead as u8, trail as u8);
639                                 continue;
640                             } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929 ||
641                                       bmp == 0xF9DC {
642                                 // Guaranteed to be found in IBM_KANJI
643                                 let pos = position(&IBM_KANJI[..], bmp).unwrap();
644                                 let lead = (pos / 94) + (0xF9 - 0x80);
645                                 let trail = (pos % 94) + 0x21;
646                                 handle.write_two(lead as u8, trail as u8);
647                                 continue;
648                             } else if let Some(pointer) = ibm_symbol_encode(bmp) {
649                                 let lead = (pointer / 94) + 0x21;
650                                 let trail = (pointer % 94) + 0x21;
651                                 handle.write_two(lead as u8, trail as u8);
652                                 continue;
653                             } else if let Some(pointer) = jis0208_symbol_encode(bmp) {
654                                 let lead = (pointer / 94) + 0x21;
655                                 let trail = (pointer % 94) + 0x21;
656                                 handle.write_two(lead as u8, trail as u8);
657                                 continue;
658                             } else {
659                                 self.state = Iso2022JpEncoderState::Ascii;
660                                 return (EncoderResult::Unmappable(c),
661                                         unread_handle.consumed(),
662                                         handle.write_three_return_written(0x1Bu8, 0x28u8, 0x42u8));
663                             }
664                         }
665                     }
666                 }
667             }
668         },
669         self,
670         src_consumed,
671         source,
672         dest,
673         c,
674         destination_handle,
675         unread_handle,
676         check_space_three
677     );
678 }
679 
680 // Any copyright to the test code below this comment is dedicated to the
681 // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
682 
683 #[cfg(test)]
684 mod tests {
685     use super::super::testing::*;
686     use super::super::*;
687 
decode_iso_2022_jp(bytes: &[u8], expect: &str)688     fn decode_iso_2022_jp(bytes: &[u8], expect: &str) {
689         decode(ISO_2022_JP, bytes, expect);
690     }
691 
encode_iso_2022_jp(string: &str, expect: &[u8])692     fn encode_iso_2022_jp(string: &str, expect: &[u8]) {
693         encode(ISO_2022_JP, string, expect);
694     }
695 
696     #[test]
test_iso_2022_jp_decode()697     fn test_iso_2022_jp_decode() {
698         // Empty
699         decode_iso_2022_jp(b"", &"");
700 
701         // ASCII
702         decode_iso_2022_jp(b"\x61\x62", "\u{0061}\u{0062}");
703         decode_iso_2022_jp(b"\x7F\x0E\x0F", "\u{007F}\u{FFFD}\u{FFFD}");
704 
705         // Partial escapes
706         decode_iso_2022_jp(b"\x1B", "\u{FFFD}");
707         decode_iso_2022_jp(b"\x1B$", "\u{FFFD}$");
708         decode_iso_2022_jp(b"\x1B(", "\u{FFFD}(");
709         decode_iso_2022_jp(b"\x1B.", "\u{FFFD}.");
710 
711         // ISO escapes
712         decode_iso_2022_jp(b"\x1B(B", ""); // ASCII
713         decode_iso_2022_jp(b"\x1B(J", ""); // Roman
714         decode_iso_2022_jp(b"\x1B$@", ""); // 0208
715         decode_iso_2022_jp(b"\x1B$B", ""); // 0208
716         decode_iso_2022_jp(b"\x1B$(D", "\u{FFFD}$(D"); // 2012
717         decode_iso_2022_jp(b"\x1B$A", "\u{FFFD}$A"); // GB2312
718         decode_iso_2022_jp(b"\x1B$(C", "\u{FFFD}$(C"); // KR
719         decode_iso_2022_jp(b"\x1B.A", "\u{FFFD}.A"); // Latin-1
720         decode_iso_2022_jp(b"\x1B.F", "\u{FFFD}.F"); // Greek
721         decode_iso_2022_jp(b"\x1B(I", ""); // Half-width Katakana
722         decode_iso_2022_jp(b"\x1B$(O", "\u{FFFD}$(O"); // 2013
723         decode_iso_2022_jp(b"\x1B$(P", "\u{FFFD}$(P"); // 2013
724         decode_iso_2022_jp(b"\x1B$(Q", "\u{FFFD}$(Q"); // 2013
725         decode_iso_2022_jp(b"\x1B$)C", "\u{FFFD}$)C"); // KR
726         decode_iso_2022_jp(b"\x1B$)A", "\u{FFFD}$)A"); // GB2312
727         decode_iso_2022_jp(b"\x1B$)G", "\u{FFFD}$)G"); // CNS
728         decode_iso_2022_jp(b"\x1B$*H", "\u{FFFD}$*H"); // CNS
729         decode_iso_2022_jp(b"\x1B$)E", "\u{FFFD}$)E"); // IR
730         decode_iso_2022_jp(b"\x1B$+I", "\u{FFFD}$+I"); // CNS
731         decode_iso_2022_jp(b"\x1B$+J", "\u{FFFD}$+J"); // CNS
732         decode_iso_2022_jp(b"\x1B$+K", "\u{FFFD}$+K"); // CNS
733         decode_iso_2022_jp(b"\x1B$+L", "\u{FFFD}$+L"); // CNS
734         decode_iso_2022_jp(b"\x1B$+M", "\u{FFFD}$+M"); // CNS
735         decode_iso_2022_jp(b"\x1B$(@", "\u{FFFD}$(@"); // 0208
736         decode_iso_2022_jp(b"\x1B$(A", "\u{FFFD}$(A"); // GB2312
737         decode_iso_2022_jp(b"\x1B$(B", "\u{FFFD}$(B"); // 0208
738         decode_iso_2022_jp(b"\x1B%G", "\u{FFFD}%G"); // UTF-8
739 
740         // ASCII
741         decode_iso_2022_jp(b"\x5B", "\u{005B}");
742         decode_iso_2022_jp(b"\x5C", "\u{005C}");
743         decode_iso_2022_jp(b"\x7E", "\u{007E}");
744         decode_iso_2022_jp(b"\x0E", "\u{FFFD}");
745         decode_iso_2022_jp(b"\x0F", "\u{FFFD}");
746         decode_iso_2022_jp(b"\x80", "\u{FFFD}");
747         decode_iso_2022_jp(b"\xFF", "\u{FFFD}");
748         decode_iso_2022_jp(b"\x1B(B\x5B", "\u{005B}");
749         decode_iso_2022_jp(b"\x1B(B\x5C", "\u{005C}");
750         decode_iso_2022_jp(b"\x1B(B\x7E", "\u{007E}");
751         decode_iso_2022_jp(b"\x1B(B\x0E", "\u{FFFD}");
752         decode_iso_2022_jp(b"\x1B(B\x0F", "\u{FFFD}");
753         decode_iso_2022_jp(b"\x1B(B\x80", "\u{FFFD}");
754         decode_iso_2022_jp(b"\x1B(B\xFF", "\u{FFFD}");
755 
756         // Roman
757         decode_iso_2022_jp(b"\x1B(J\x5B", "\u{005B}");
758         decode_iso_2022_jp(b"\x1B(J\x5C", "\u{00A5}");
759         decode_iso_2022_jp(b"\x1B(J\x7E", "\u{203E}");
760         decode_iso_2022_jp(b"\x1B(J\x0E", "\u{FFFD}");
761         decode_iso_2022_jp(b"\x1B(J\x0F", "\u{FFFD}");
762         decode_iso_2022_jp(b"\x1B(J\x80", "\u{FFFD}");
763         decode_iso_2022_jp(b"\x1B(J\xFF", "\u{FFFD}");
764 
765         // Katakana
766         decode_iso_2022_jp(b"\x1B(I\x20", "\u{FFFD}");
767         decode_iso_2022_jp(b"\x1B(I\x21", "\u{FF61}");
768         decode_iso_2022_jp(b"\x1B(I\x5F", "\u{FF9F}");
769         decode_iso_2022_jp(b"\x1B(I\x60", "\u{FFFD}");
770         decode_iso_2022_jp(b"\x1B(I\x0E", "\u{FFFD}");
771         decode_iso_2022_jp(b"\x1B(I\x0F", "\u{FFFD}");
772         decode_iso_2022_jp(b"\x1B(I\x80", "\u{FFFD}");
773         decode_iso_2022_jp(b"\x1B(I\xFF", "\u{FFFD}");
774 
775         // 0208 differences from 1978 to 1983
776         decode_iso_2022_jp(b"\x1B$@\x54\x64", "\u{58FA}");
777         decode_iso_2022_jp(b"\x1B$@\x44\x5B", "\u{58F7}");
778         decode_iso_2022_jp(b"\x1B$@\x74\x21", "\u{582F}");
779         decode_iso_2022_jp(b"\x1B$@\x36\x46", "\u{5C2D}");
780         decode_iso_2022_jp(b"\x1B$@\x28\x2E", "\u{250F}");
781         decode_iso_2022_jp(b"\x1B$B\x54\x64", "\u{58FA}");
782         decode_iso_2022_jp(b"\x1B$B\x44\x5B", "\u{58F7}");
783         decode_iso_2022_jp(b"\x1B$B\x74\x21", "\u{582F}");
784         decode_iso_2022_jp(b"\x1B$B\x36\x46", "\u{5C2D}");
785         decode_iso_2022_jp(b"\x1B$B\x28\x2E", "\u{250F}");
786 
787         // Broken 0208
788         decode_iso_2022_jp(b"\x1B$B\x28\x41", "\u{FFFD}");
789         decode_iso_2022_jp(b"\x1B$@\x80\x54\x64", "\u{FFFD}\u{58FA}");
790         decode_iso_2022_jp(b"\x1B$B\x28\x80", "\u{FFFD}");
791 
792         // Transitions
793         decode_iso_2022_jp(b"\x1B(B\x5C\x1B(J\x5C", "\u{005C}\u{00A5}");
794         decode_iso_2022_jp(b"\x1B(B\x5C\x1B(I\x21", "\u{005C}\u{FF61}");
795         decode_iso_2022_jp(b"\x1B(B\x5C\x1B$@\x54\x64", "\u{005C}\u{58FA}");
796         decode_iso_2022_jp(b"\x1B(B\x5C\x1B$B\x54\x64", "\u{005C}\u{58FA}");
797 
798         decode_iso_2022_jp(b"\x1B(J\x5C\x1B(B\x5C", "\u{00A5}\u{005C}");
799         decode_iso_2022_jp(b"\x1B(J\x5C\x1B(I\x21", "\u{00A5}\u{FF61}");
800         decode_iso_2022_jp(b"\x1B(J\x5C\x1B$@\x54\x64", "\u{00A5}\u{58FA}");
801         decode_iso_2022_jp(b"\x1B(J\x5C\x1B$B\x54\x64", "\u{00A5}\u{58FA}");
802 
803         decode_iso_2022_jp(b"\x1B(I\x21\x1B(J\x5C", "\u{FF61}\u{00A5}");
804         decode_iso_2022_jp(b"\x1B(I\x21\x1B(B\x5C", "\u{FF61}\u{005C}");
805         decode_iso_2022_jp(b"\x1B(I\x21\x1B$@\x54\x64", "\u{FF61}\u{58FA}");
806         decode_iso_2022_jp(b"\x1B(I\x21\x1B$B\x54\x64", "\u{FF61}\u{58FA}");
807 
808         decode_iso_2022_jp(b"\x1B$@\x54\x64\x1B(J\x5C", "\u{58FA}\u{00A5}");
809         decode_iso_2022_jp(b"\x1B$@\x54\x64\x1B(I\x21", "\u{58FA}\u{FF61}");
810         decode_iso_2022_jp(b"\x1B$@\x54\x64\x1B(B\x5C", "\u{58FA}\u{005C}");
811         decode_iso_2022_jp(b"\x1B$@\x54\x64\x1B$B\x54\x64", "\u{58FA}\u{58FA}");
812 
813         decode_iso_2022_jp(b"\x1B$B\x54\x64\x1B(J\x5C", "\u{58FA}\u{00A5}");
814         decode_iso_2022_jp(b"\x1B$B\x54\x64\x1B(I\x21", "\u{58FA}\u{FF61}");
815         decode_iso_2022_jp(b"\x1B$B\x54\x64\x1B$@\x54\x64", "\u{58FA}\u{58FA}");
816         decode_iso_2022_jp(b"\x1B$B\x54\x64\x1B(B\x5C", "\u{58FA}\u{005C}");
817 
818         // Empty transitions
819         decode_iso_2022_jp(b"\x1B(B\x1B(J", "\u{FFFD}");
820         decode_iso_2022_jp(b"\x1B(B\x1B(I", "\u{FFFD}");
821         decode_iso_2022_jp(b"\x1B(B\x1B$@", "\u{FFFD}");
822         decode_iso_2022_jp(b"\x1B(B\x1B$B", "\u{FFFD}");
823 
824         decode_iso_2022_jp(b"\x1B(J\x1B(B", "\u{FFFD}");
825         decode_iso_2022_jp(b"\x1B(J\x1B(I", "\u{FFFD}");
826         decode_iso_2022_jp(b"\x1B(J\x1B$@", "\u{FFFD}");
827         decode_iso_2022_jp(b"\x1B(J\x1B$B", "\u{FFFD}");
828 
829         decode_iso_2022_jp(b"\x1B(I\x1B(J", "\u{FFFD}");
830         decode_iso_2022_jp(b"\x1B(I\x1B(B", "\u{FFFD}");
831         decode_iso_2022_jp(b"\x1B(I\x1B$@", "\u{FFFD}");
832         decode_iso_2022_jp(b"\x1B(I\x1B$B", "\u{FFFD}");
833 
834         decode_iso_2022_jp(b"\x1B$@\x1B(J", "\u{FFFD}");
835         decode_iso_2022_jp(b"\x1B$@\x1B(I", "\u{FFFD}");
836         decode_iso_2022_jp(b"\x1B$@\x1B(B", "\u{FFFD}");
837         decode_iso_2022_jp(b"\x1B$@\x1B$B", "\u{FFFD}");
838 
839         decode_iso_2022_jp(b"\x1B$B\x1B(J", "\u{FFFD}");
840         decode_iso_2022_jp(b"\x1B$B\x1B(I", "\u{FFFD}");
841         decode_iso_2022_jp(b"\x1B$B\x1B$@", "\u{FFFD}");
842         decode_iso_2022_jp(b"\x1B$B\x1B(B", "\u{FFFD}");
843 
844         // Transitions to self
845         decode_iso_2022_jp(b"\x1B(B\x5C\x1B(B\x5C", "\u{005C}\u{005C}");
846         decode_iso_2022_jp(b"\x1B(J\x5C\x1B(J\x5C", "\u{00A5}\u{00A5}");
847         decode_iso_2022_jp(b"\x1B(I\x21\x1B(I\x21", "\u{FF61}\u{FF61}");
848         decode_iso_2022_jp(b"\x1B$@\x54\x64\x1B$@\x54\x64", "\u{58FA}\u{58FA}");
849         decode_iso_2022_jp(b"\x1B$B\x54\x64\x1B$B\x54\x64", "\u{58FA}\u{58FA}");
850     }
851 
852     #[test]
test_iso_2022_jp_encode()853     fn test_iso_2022_jp_encode() {
854         // Empty
855         encode_iso_2022_jp("", b"");
856 
857         // ASCII
858         encode_iso_2022_jp("ab", b"ab");
859         encode_iso_2022_jp("\u{1F4A9}", b"&#128169;");
860         encode_iso_2022_jp("\x1B", b"&#65533;");
861         encode_iso_2022_jp("\x0E", b"&#65533;");
862         encode_iso_2022_jp("\x0F", b"&#65533;");
863 
864         // Roman
865         encode_iso_2022_jp("a\u{00A5}b", b"a\x1B(J\x5Cb\x1B(B");
866         encode_iso_2022_jp("a\u{203E}b", b"a\x1B(J\x7Eb\x1B(B");
867         encode_iso_2022_jp("a\u{00A5}b\x5C", b"a\x1B(J\x5Cb\x1B(B\x5C");
868         encode_iso_2022_jp("a\u{203E}b\x7E", b"a\x1B(J\x7Eb\x1B(B\x7E");
869         encode_iso_2022_jp("\u{00A5}\u{1F4A9}", b"\x1B(J\x5C&#128169;\x1B(B");
870         encode_iso_2022_jp("\u{00A5}\x1B", b"\x1B(J\x5C&#65533;\x1B(B");
871         encode_iso_2022_jp("\u{00A5}\x0E", b"\x1B(J\x5C&#65533;\x1B(B");
872         encode_iso_2022_jp("\u{00A5}\x0F", b"\x1B(J\x5C&#65533;\x1B(B");
873         encode_iso_2022_jp("\u{00A5}\u{58FA}", b"\x1B(J\x5C\x1B$B\x54\x64\x1B(B");
874 
875         // Half-width Katakana
876         encode_iso_2022_jp("\u{FF61}", b"\x1B$B\x21\x23\x1B(B");
877         encode_iso_2022_jp("\u{FF65}", b"\x1B$B\x21\x26\x1B(B");
878         encode_iso_2022_jp("\u{FF66}", b"\x1B$B\x25\x72\x1B(B");
879         encode_iso_2022_jp("\u{FF70}", b"\x1B$B\x21\x3C\x1B(B");
880         encode_iso_2022_jp("\u{FF9D}", b"\x1B$B\x25\x73\x1B(B");
881         encode_iso_2022_jp("\u{FF9E}", b"\x1B$B\x21\x2B\x1B(B");
882         encode_iso_2022_jp("\u{FF9F}", b"\x1B$B\x21\x2C\x1B(B");
883 
884         // 0208
885         encode_iso_2022_jp("\u{58FA}", b"\x1B$B\x54\x64\x1B(B");
886         encode_iso_2022_jp("\u{58FA}\u{250F}", b"\x1B$B\x54\x64\x28\x2E\x1B(B");
887         encode_iso_2022_jp("\u{58FA}\u{1F4A9}", b"\x1B$B\x54\x64\x1B(B&#128169;");
888         encode_iso_2022_jp("\u{58FA}\x1B", b"\x1B$B\x54\x64\x1B(B&#65533;");
889         encode_iso_2022_jp("\u{58FA}\x0E", b"\x1B$B\x54\x64\x1B(B&#65533;");
890         encode_iso_2022_jp("\u{58FA}\x0F", b"\x1B$B\x54\x64\x1B(B&#65533;");
891         encode_iso_2022_jp("\u{58FA}\u{00A5}", b"\x1B$B\x54\x64\x1B(J\x5C\x1B(B");
892         encode_iso_2022_jp("\u{58FA}a", b"\x1B$B\x54\x64\x1B(Ba");
893 
894     }
895 
896     #[test]
test_iso_2022_jp_decode_all()897     fn test_iso_2022_jp_decode_all() {
898         let input = include_bytes!("test_data/iso_2022_jp_in.txt");
899         let expectation = include_str!("test_data/iso_2022_jp_in_ref.txt");
900         let (cow, had_errors) = ISO_2022_JP.decode_without_bom_handling(input);
901         assert!(had_errors, "Should have had errors.");
902         assert_eq!(&cow[..], expectation);
903     }
904 
905     #[test]
test_iso_2022_jp_encode_all()906     fn test_iso_2022_jp_encode_all() {
907         let input = include_str!("test_data/iso_2022_jp_out.txt");
908         let expectation = include_bytes!("test_data/iso_2022_jp_out_ref.txt");
909         let (cow, encoding, had_errors) = ISO_2022_JP.encode(input);
910         assert!(!had_errors, "Should not have had errors.");
911         assert_eq!(encoding, ISO_2022_JP);
912         assert_eq!(&cow[..], &expectation[..]);
913     }
914 
915     #[test]
test_iso_2022_jp_half_width_katakana_length()916     fn test_iso_2022_jp_half_width_katakana_length() {
917         let mut output = [0u8; 20];
918         let mut decoder = ISO_2022_JP.new_decoder();
919         {
920             let (result, read, written) =
921                 decoder.decode_to_utf8_without_replacement(b"\x1B\x28\x49", &mut output, false);
922             assert_eq!(result, DecoderResult::InputEmpty);
923             assert_eq!(read, 3);
924             assert_eq!(written, 0);
925         }
926         {
927             let needed = decoder
928                 .max_utf8_buffer_length_without_replacement(1)
929                 .unwrap();
930             let (result, read, written) =
931                 decoder.decode_to_utf8_without_replacement(b"\x21", &mut output[..needed], true);
932             assert_eq!(result, DecoderResult::InputEmpty);
933             assert_eq!(read, 1);
934             assert_eq!(written, 3);
935             assert_eq!(output[0], 0xEF);
936             assert_eq!(output[1], 0xBD);
937             assert_eq!(output[2], 0xA1);
938         }
939     }
940 
941     #[test]
test_iso_2022_jp_length_after_escape()942     fn test_iso_2022_jp_length_after_escape() {
943         let mut output = [0u16; 20];
944         let mut decoder = ISO_2022_JP.new_decoder();
945         {
946             let (result, read, written, had_errors) =
947                 decoder.decode_to_utf16(b"\x1B", &mut output, false);
948             assert_eq!(result, CoderResult::InputEmpty);
949             assert_eq!(read, 1);
950             assert_eq!(written, 0);
951             assert!(!had_errors);
952         }
953         {
954             let needed = decoder.max_utf16_buffer_length(1).unwrap();
955             let (result, read, written, had_errors) =
956                 decoder.decode_to_utf16(b"A", &mut output[..needed], true);
957             assert_eq!(result, CoderResult::InputEmpty);
958             assert_eq!(read, 1);
959             assert_eq!(written, 2);
960             assert!(had_errors);
961             assert_eq!(output[0], 0xFFFD);
962             assert_eq!(output[1], 0x0041);
963         }
964     }
965 
966     #[test]
test_iso_2022_jp_encode_from_two_low_surrogates()967     fn test_iso_2022_jp_encode_from_two_low_surrogates() {
968         let expectation = b"&#65533;&#65533;";
969         let mut output = [0u8; 40];
970         let mut encoder = ISO_2022_JP.new_encoder();
971         let (result, read, written, had_errors) =
972             encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
973         assert_eq!(result, CoderResult::InputEmpty);
974         assert_eq!(read, 2);
975         assert_eq!(written, expectation.len());
976         assert!(had_errors);
977         assert_eq!(&output[..written], expectation);
978     }
979 
980 }
981