1 // Copyright 2015-2016 Mozilla Foundation. See the COPYRIGHT 2 // file at the top-level directory of this distribution. 3 // 4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license 6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your 7 // option. This file may not be copied, modified, or distributed 8 // except according to those terms. 9 10 use super::*; 11 use ascii::*; 12 use data::position; 13 use handles::*; 14 use variant::*; 15 16 pub struct SingleByteDecoder { 17 table: &'static [u16; 128], 18 } 19 20 impl SingleByteDecoder { new(data: &'static [u16; 128]) -> VariantDecoder21 pub fn new(data: &'static [u16; 128]) -> VariantDecoder { 22 VariantDecoder::SingleByte(SingleByteDecoder { table: data }) 23 } 24 max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize>25 pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { 26 Some(byte_length) 27 } 28 max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize>29 pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { 30 byte_length.checked_mul(3) 31 } 32 max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize>33 pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { 34 byte_length.checked_mul(3) 35 } 36 decode_to_utf8_raw( &mut self, src: &[u8], dst: &mut [u8], _last: bool, ) -> (DecoderResult, usize, usize)37 pub fn decode_to_utf8_raw( 38 &mut self, 39 src: &[u8], 40 dst: &mut [u8], 41 _last: bool, 42 ) -> (DecoderResult, usize, usize) { 43 let mut source = ByteSource::new(src); 44 let mut dest = Utf8Destination::new(dst); 45 'outermost: loop { 46 match dest.copy_ascii_from_check_space_bmp(&mut source) { 47 CopyAsciiResult::Stop(ret) => return ret, 48 CopyAsciiResult::GoOn((mut non_ascii, mut handle)) => 'middle: loop { 49 // Start non-boilerplate 50 // 51 // Since the non-ASCIIness of `non_ascii` is hidden from 52 // the optimizer, it can't figure out that it's OK to 53 // statically omit the bound check when accessing 54 // `[u16; 128]` with an index 55 // `non_ascii as usize - 0x80usize`. 56 let mapped = 57 unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) }; 58 // let mapped = self.table[non_ascii as usize - 0x80usize]; 59 if mapped == 0u16 { 60 return ( 61 DecoderResult::Malformed(1, 0), 62 source.consumed(), 63 handle.written(), 64 ); 65 } 66 let dest_again = handle.write_bmp_excl_ascii(mapped); 67 // End non-boilerplate 68 match source.check_available() { 69 Space::Full(src_consumed) => { 70 return ( 71 DecoderResult::InputEmpty, 72 src_consumed, 73 dest_again.written(), 74 ); 75 } 76 Space::Available(source_handle) => { 77 match dest_again.check_space_bmp() { 78 Space::Full(dst_written) => { 79 return ( 80 DecoderResult::OutputFull, 81 source_handle.consumed(), 82 dst_written, 83 ); 84 } 85 Space::Available(mut destination_handle) => { 86 let (mut b, unread_handle) = source_handle.read(); 87 let source_again = unread_handle.commit(); 88 'innermost: loop { 89 if b > 127 { 90 non_ascii = b; 91 handle = destination_handle; 92 continue 'middle; 93 } 94 // Testing on Haswell says that we should write the 95 // byte unconditionally instead of trying to unread it 96 // to make it part of the next SIMD stride. 97 let dest_again_again = destination_handle.write_ascii(b); 98 if b < 60 { 99 // We've got punctuation 100 match source_again.check_available() { 101 Space::Full(src_consumed_again) => { 102 return ( 103 DecoderResult::InputEmpty, 104 src_consumed_again, 105 dest_again_again.written(), 106 ); 107 } 108 Space::Available(source_handle_again) => { 109 match dest_again_again.check_space_bmp() { 110 Space::Full(dst_written_again) => { 111 return ( 112 DecoderResult::OutputFull, 113 source_handle_again.consumed(), 114 dst_written_again, 115 ); 116 } 117 Space::Available( 118 destination_handle_again, 119 ) => { 120 let (b_again, _unread_handle_again) = 121 source_handle_again.read(); 122 b = b_again; 123 destination_handle = 124 destination_handle_again; 125 continue 'innermost; 126 } 127 } 128 } 129 } 130 } 131 // We've got markup or ASCII text 132 continue 'outermost; 133 } 134 } 135 } 136 } 137 } 138 }, 139 } 140 } 141 } 142 decode_to_utf16_raw( &mut self, src: &[u8], dst: &mut [u16], _last: bool, ) -> (DecoderResult, usize, usize)143 pub fn decode_to_utf16_raw( 144 &mut self, 145 src: &[u8], 146 dst: &mut [u16], 147 _last: bool, 148 ) -> (DecoderResult, usize, usize) { 149 let (pending, length) = if dst.len() < src.len() { 150 (DecoderResult::OutputFull, dst.len()) 151 } else { 152 (DecoderResult::InputEmpty, src.len()) 153 }; 154 let mut converted = 0usize; 155 'outermost: loop { 156 match unsafe { 157 ascii_to_basic_latin( 158 src.as_ptr().add(converted), 159 dst.as_mut_ptr().add(converted), 160 length - converted, 161 ) 162 } { 163 None => { 164 return (pending, length, length); 165 } 166 Some((mut non_ascii, consumed)) => { 167 converted += consumed; 168 'middle: loop { 169 // `converted` doesn't count the reading of `non_ascii` yet. 170 // Since the non-ASCIIness of `non_ascii` is hidden from 171 // the optimizer, it can't figure out that it's OK to 172 // statically omit the bound check when accessing 173 // `[u16; 128]` with an index 174 // `non_ascii as usize - 0x80usize`. 175 let mapped = 176 unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) }; 177 // let mapped = self.table[non_ascii as usize - 0x80usize]; 178 if mapped == 0u16 { 179 return ( 180 DecoderResult::Malformed(1, 0), 181 converted + 1, // +1 `for non_ascii` 182 converted, 183 ); 184 } 185 unsafe { 186 // The bound check has already been performed 187 *(dst.get_unchecked_mut(converted)) = mapped; 188 } 189 converted += 1; 190 // Next, handle ASCII punctuation and non-ASCII without 191 // going back to ASCII acceleration. Non-ASCII scripts 192 // use ASCII punctuation, so this avoid going to 193 // acceleration just for punctuation/space and then 194 // failing. This is a significant boost to non-ASCII 195 // scripts. 196 // TODO: Split out Latin converters without this part 197 // this stuff makes Latin script-conversion slower. 198 if converted == length { 199 return (pending, length, length); 200 } 201 let mut b = unsafe { *(src.get_unchecked(converted)) }; 202 'innermost: loop { 203 if b > 127 { 204 non_ascii = b; 205 continue 'middle; 206 } 207 // Testing on Haswell says that we should write the 208 // byte unconditionally instead of trying to unread it 209 // to make it part of the next SIMD stride. 210 unsafe { 211 *(dst.get_unchecked_mut(converted)) = u16::from(b); 212 } 213 converted += 1; 214 if b < 60 { 215 // We've got punctuation 216 if converted == length { 217 return (pending, length, length); 218 } 219 b = unsafe { *(src.get_unchecked(converted)) }; 220 continue 'innermost; 221 } 222 // We've got markup or ASCII text 223 continue 'outermost; 224 } 225 } 226 } 227 } 228 } 229 } 230 latin1_byte_compatible_up_to(&self, buffer: &[u8]) -> usize231 pub fn latin1_byte_compatible_up_to(&self, buffer: &[u8]) -> usize { 232 let mut bytes = buffer; 233 let mut total = 0; 234 loop { 235 if let Some((non_ascii, offset)) = validate_ascii(bytes) { 236 total += offset; 237 let mapped = unsafe { *(self.table.get_unchecked(non_ascii as usize - 0x80usize)) }; 238 if mapped != u16::from(non_ascii) { 239 return total; 240 } 241 total += 1; 242 bytes = &bytes[offset + 1..]; 243 } else { 244 return total; 245 } 246 } 247 } 248 } 249 250 pub struct SingleByteEncoder { 251 table: &'static [u16; 128], 252 run_bmp_offset: usize, 253 run_byte_offset: usize, 254 run_length: usize, 255 } 256 257 impl SingleByteEncoder { new( encoding: &'static Encoding, data: &'static [u16; 128], run_bmp_offset: u16, run_byte_offset: u8, run_length: u8, ) -> Encoder258 pub fn new( 259 encoding: &'static Encoding, 260 data: &'static [u16; 128], 261 run_bmp_offset: u16, 262 run_byte_offset: u8, 263 run_length: u8, 264 ) -> Encoder { 265 Encoder::new( 266 encoding, 267 VariantEncoder::SingleByte(SingleByteEncoder { 268 table: data, 269 run_bmp_offset: run_bmp_offset as usize, 270 run_byte_offset: run_byte_offset as usize, 271 run_length: run_length as usize, 272 }), 273 ) 274 } 275 max_buffer_length_from_utf16_without_replacement( &self, u16_length: usize, ) -> Option<usize>276 pub fn max_buffer_length_from_utf16_without_replacement( 277 &self, 278 u16_length: usize, 279 ) -> Option<usize> { 280 Some(u16_length) 281 } 282 max_buffer_length_from_utf8_without_replacement( &self, byte_length: usize, ) -> Option<usize>283 pub fn max_buffer_length_from_utf8_without_replacement( 284 &self, 285 byte_length: usize, 286 ) -> Option<usize> { 287 Some(byte_length) 288 } 289 290 #[inline(always)] encode_u16(&self, code_unit: u16) -> Option<u8>291 fn encode_u16(&self, code_unit: u16) -> Option<u8> { 292 // First, we see if the code unit falls into a run of consecutive 293 // code units that can be mapped by offset. This is very efficient 294 // for most non-Latin encodings as well as Latin1-ish encodings. 295 // 296 // For encodings that don't fit this pattern, the run (which may 297 // have the length of just one) just establishes the starting point 298 // for the next rule. 299 // 300 // Next, we do a forward linear search in the part of the index 301 // after the run. Even in non-Latin1-ish Latin encodings (except 302 // macintosh), the lower case letters are here. 303 // 304 // Next, we search the third quadrant up to the start of the run 305 // (upper case letters in Latin encodings except macintosh, in 306 // Greek and in KOI encodings) and then the second quadrant, 307 // except if the run stared before the third quadrant, we search 308 // the second quadrant up to the run. 309 // 310 // Last, we search the first quadrant, which has unused controls 311 // or punctuation in most encodings. This is bad for macintosh 312 // and IBM866, but those are rare. 313 314 // Run of consecutive units 315 let unit_as_usize = code_unit as usize; 316 let offset = unit_as_usize.wrapping_sub(self.run_bmp_offset); 317 if offset < self.run_length { 318 return Some((128 + self.run_byte_offset + offset) as u8); 319 } 320 321 // Search after the run 322 let tail_start = self.run_byte_offset + self.run_length; 323 if let Some(pos) = position(&self.table[tail_start..], code_unit) { 324 return Some((128 + tail_start + pos) as u8); 325 } 326 327 if self.run_byte_offset >= 64 { 328 // Search third quadrant before the run 329 if let Some(pos) = position(&self.table[64..self.run_byte_offset], code_unit) { 330 return Some(((128 + 64) + pos) as u8); 331 } 332 333 // Search second quadrant 334 if let Some(pos) = position(&self.table[32..64], code_unit) { 335 return Some(((128 + 32) + pos) as u8); 336 } 337 } else if let Some(pos) = position(&self.table[32..self.run_byte_offset], code_unit) { 338 // windows-1252, windows-874, ISO-8859-15 and ISO-8859-5 339 // Search second quadrant before the run 340 return Some(((128 + 32) + pos) as u8); 341 } 342 343 // Search first quadrant 344 if let Some(pos) = position(&self.table[..32], code_unit) { 345 return Some((128 + pos) as u8); 346 } 347 348 None 349 } 350 351 ascii_compatible_bmp_encoder_function!( 352 { 353 match self.encode_u16(bmp) { 354 Some(byte) => handle.write_one(byte), 355 None => { 356 return ( 357 EncoderResult::unmappable_from_bmp(bmp), 358 source.consumed(), 359 handle.written(), 360 ); 361 } 362 } 363 }, 364 bmp, 365 self, 366 source, 367 handle, 368 copy_ascii_to_check_space_one, 369 check_space_one, 370 encode_from_utf8_raw, 371 str, 372 Utf8Source, 373 true 374 ); 375 encode_from_utf16_raw( &mut self, src: &[u16], dst: &mut [u8], _last: bool, ) -> (EncoderResult, usize, usize)376 pub fn encode_from_utf16_raw( 377 &mut self, 378 src: &[u16], 379 dst: &mut [u8], 380 _last: bool, 381 ) -> (EncoderResult, usize, usize) { 382 let (pending, length) = if dst.len() < src.len() { 383 (EncoderResult::OutputFull, dst.len()) 384 } else { 385 (EncoderResult::InputEmpty, src.len()) 386 }; 387 let mut converted = 0usize; 388 'outermost: loop { 389 match unsafe { 390 basic_latin_to_ascii( 391 src.as_ptr().add(converted), 392 dst.as_mut_ptr().add(converted), 393 length - converted, 394 ) 395 } { 396 None => { 397 return (pending, length, length); 398 } 399 Some((mut non_ascii, consumed)) => { 400 converted += consumed; 401 'middle: loop { 402 // `converted` doesn't count the reading of `non_ascii` yet. 403 match self.encode_u16(non_ascii) { 404 Some(byte) => { 405 unsafe { 406 *(dst.get_unchecked_mut(converted)) = byte; 407 } 408 converted += 1; 409 } 410 None => { 411 // At this point, we need to know if we 412 // have a surrogate. 413 let high_bits = non_ascii & 0xFC00u16; 414 if high_bits == 0xD800u16 { 415 // high surrogate 416 if converted + 1 == length { 417 // End of buffer. This surrogate is unpaired. 418 return ( 419 EncoderResult::Unmappable('\u{FFFD}'), 420 converted + 1, // +1 `for non_ascii` 421 converted, 422 ); 423 } 424 let second = 425 u32::from(unsafe { *src.get_unchecked(converted + 1) }); 426 if second & 0xFC00u32 != 0xDC00u32 { 427 return ( 428 EncoderResult::Unmappable('\u{FFFD}'), 429 converted + 1, // +1 `for non_ascii` 430 converted, 431 ); 432 } 433 // The next code unit is a low surrogate. 434 let astral: char = unsafe { 435 ::std::char::from_u32_unchecked( 436 (u32::from(non_ascii) << 10) + second 437 - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32), 438 ) 439 }; 440 return ( 441 EncoderResult::Unmappable(astral), 442 converted + 2, // +2 `for non_ascii` and `second` 443 converted, 444 ); 445 } 446 if high_bits == 0xDC00u16 { 447 // Unpaired low surrogate 448 return ( 449 EncoderResult::Unmappable('\u{FFFD}'), 450 converted + 1, // +1 `for non_ascii` 451 converted, 452 ); 453 } 454 return ( 455 EncoderResult::unmappable_from_bmp(non_ascii), 456 converted + 1, // +1 `for non_ascii` 457 converted, 458 ); 459 } 460 } 461 // Next, handle ASCII punctuation and non-ASCII without 462 // going back to ASCII acceleration. Non-ASCII scripts 463 // use ASCII punctuation, so this avoid going to 464 // acceleration just for punctuation/space and then 465 // failing. This is a significant boost to non-ASCII 466 // scripts. 467 // TODO: Split out Latin converters without this part 468 // this stuff makes Latin script-conversion slower. 469 if converted == length { 470 return (pending, length, length); 471 } 472 let mut unit = unsafe { *(src.get_unchecked(converted)) }; 473 'innermost: loop { 474 if unit > 127 { 475 non_ascii = unit; 476 continue 'middle; 477 } 478 // Testing on Haswell says that we should write the 479 // byte unconditionally instead of trying to unread it 480 // to make it part of the next SIMD stride. 481 unsafe { 482 *(dst.get_unchecked_mut(converted)) = unit as u8; 483 } 484 converted += 1; 485 if unit < 60 { 486 // We've got punctuation 487 if converted == length { 488 return (pending, length, length); 489 } 490 unit = unsafe { *(src.get_unchecked(converted)) }; 491 continue 'innermost; 492 } 493 // We've got markup or ASCII text 494 continue 'outermost; 495 } 496 } 497 } 498 } 499 } 500 } 501 } 502 503 // Any copyright to the test code below this comment is dedicated to the 504 // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ 505 506 #[cfg(test)] 507 mod tests { 508 use super::super::testing::*; 509 use super::super::*; 510 511 #[test] test_windows_1255_ca()512 fn test_windows_1255_ca() { 513 decode(WINDOWS_1255, b"\xCA", "\u{05BA}"); 514 encode(WINDOWS_1255, "\u{05BA}", b"\xCA"); 515 } 516 517 #[test] test_ascii_punctuation()518 fn test_ascii_punctuation() { 519 let bytes = b"\xC1\xF5\xF4\xFC \xE5\xDF\xED\xE1\xE9 \xDD\xED\xE1 \xF4\xE5\xF3\xF4. \xC1\xF5\xF4\xFC \xE5\xDF\xED\xE1\xE9 \xDD\xED\xE1 \xF4\xE5\xF3\xF4."; 520 let characters = "\u{0391}\u{03C5}\u{03C4}\u{03CC} \ 521 \u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9} \u{03AD}\u{03BD}\u{03B1} \ 522 \u{03C4}\u{03B5}\u{03C3}\u{03C4}. \u{0391}\u{03C5}\u{03C4}\u{03CC} \ 523 \u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9} \u{03AD}\u{03BD}\u{03B1} \ 524 \u{03C4}\u{03B5}\u{03C3}\u{03C4}."; 525 decode(WINDOWS_1253, bytes, characters); 526 encode(WINDOWS_1253, characters, bytes); 527 } 528 529 #[test] test_decode_malformed()530 fn test_decode_malformed() { 531 decode( 532 WINDOWS_1253, 533 b"\xC1\xF5\xD2\xF4\xFC", 534 "\u{0391}\u{03C5}\u{FFFD}\u{03C4}\u{03CC}", 535 ); 536 } 537 538 #[test] test_encode_unmappables()539 fn test_encode_unmappables() { 540 encode( 541 WINDOWS_1253, 542 "\u{0391}\u{03C5}\u{2603}\u{03C4}\u{03CC}", 543 b"\xC1\xF5☃\xF4\xFC", 544 ); 545 encode( 546 WINDOWS_1253, 547 "\u{0391}\u{03C5}\u{1F4A9}\u{03C4}\u{03CC}", 548 b"\xC1\xF5💩\xF4\xFC", 549 ); 550 } 551 552 #[test] test_encode_unpaired_surrogates()553 fn test_encode_unpaired_surrogates() { 554 encode_from_utf16( 555 WINDOWS_1253, 556 &[0x0391u16, 0x03C5u16, 0xDCA9u16, 0x03C4u16, 0x03CCu16], 557 b"\xC1\xF5�\xF4\xFC", 558 ); 559 encode_from_utf16( 560 WINDOWS_1253, 561 &[0x0391u16, 0x03C5u16, 0xD83Du16, 0x03C4u16, 0x03CCu16], 562 b"\xC1\xF5�\xF4\xFC", 563 ); 564 encode_from_utf16( 565 WINDOWS_1253, 566 &[0x0391u16, 0x03C5u16, 0x03C4u16, 0x03CCu16, 0xD83Du16], 567 b"\xC1\xF5\xF4\xFC�", 568 ); 569 } 570 571 pub const HIGH_BYTES: &'static [u8; 128] = &[ 572 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 573 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 574 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 575 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 576 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 577 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 578 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 579 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 580 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 581 ]; 582 decode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128])583 fn decode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128]) { 584 let mut with_replacement = [0u16; 128]; 585 let mut it = data.iter().enumerate(); 586 loop { 587 match it.next() { 588 Some((i, code_point)) => { 589 if *code_point == 0 { 590 with_replacement[i] = 0xFFFD; 591 } else { 592 with_replacement[i] = *code_point; 593 } 594 } 595 None => { 596 break; 597 } 598 } 599 } 600 601 decode_to_utf16(encoding, HIGH_BYTES, &with_replacement[..]); 602 } 603 encode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128])604 fn encode_single_byte(encoding: &'static Encoding, data: &'static [u16; 128]) { 605 let mut with_zeros = [0u8; 128]; 606 let mut it = data.iter().enumerate(); 607 loop { 608 match it.next() { 609 Some((i, code_point)) => { 610 if *code_point == 0 { 611 with_zeros[i] = 0; 612 } else { 613 with_zeros[i] = HIGH_BYTES[i]; 614 } 615 } 616 None => { 617 break; 618 } 619 } 620 } 621 622 encode_from_utf16(encoding, data, &with_zeros[..]); 623 } 624 625 #[test] test_single_byte_from_two_low_surrogates()626 fn test_single_byte_from_two_low_surrogates() { 627 let expectation = b"��"; 628 let mut output = [0u8; 40]; 629 let mut encoder = WINDOWS_1253.new_encoder(); 630 let (result, read, written, had_errors) = 631 encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); 632 assert_eq!(result, CoderResult::InputEmpty); 633 assert_eq!(read, 2); 634 assert_eq!(written, expectation.len()); 635 assert!(had_errors); 636 assert_eq!(&output[..written], expectation); 637 } 638 639 // These tests are so self-referential that they are pretty useless. 640 641 // BEGIN GENERATED CODE. PLEASE DO NOT EDIT. 642 // Instead, please regenerate using generate-encoding-data.py 643 644 #[test] test_single_byte_decode()645 fn test_single_byte_decode() { 646 decode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866); 647 decode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10); 648 decode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13); 649 decode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14); 650 decode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15); 651 decode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16); 652 decode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2); 653 decode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3); 654 decode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4); 655 decode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5); 656 decode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6); 657 decode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7); 658 decode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8); 659 decode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r); 660 decode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u); 661 decode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh); 662 decode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250); 663 decode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251); 664 decode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252); 665 decode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253); 666 decode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254); 667 decode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255); 668 decode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256); 669 decode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257); 670 decode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258); 671 decode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874); 672 decode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic); 673 } 674 675 #[test] test_single_byte_encode()676 fn test_single_byte_encode() { 677 encode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866); 678 encode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10); 679 encode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13); 680 encode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14); 681 encode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15); 682 encode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16); 683 encode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2); 684 encode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3); 685 encode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4); 686 encode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5); 687 encode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6); 688 encode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7); 689 encode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8); 690 encode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r); 691 encode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u); 692 encode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh); 693 encode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250); 694 encode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251); 695 encode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252); 696 encode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253); 697 encode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254); 698 encode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255); 699 encode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256); 700 encode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257); 701 encode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258); 702 encode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874); 703 encode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic); 704 } 705 // END GENERATED CODE 706 } 707