1 // This is a part of rust-encoding. 2 // Copyright (c) 2013-2015, Kang Seonghoon. 3 // See README.md and LICENSE.txt for details. 4 5 //! Common codec implementation for single-byte encodings. 6 7 use std::convert::Into; 8 use util::{as_char, StrCharIndex}; 9 use types::*; 10 11 /// A common framework for single-byte encodings based on ASCII. 12 #[derive(Copy, Clone)] 13 pub struct SingleByteEncoding { 14 pub name: &'static str, 15 pub whatwg_name: Option<&'static str>, 16 pub index_forward: extern "Rust" fn(u8) -> u16, 17 pub index_backward: extern "Rust" fn(u32) -> u8, 18 } 19 20 impl Encoding for SingleByteEncoding { name(&self) -> &'static str21 fn name(&self) -> &'static str { self.name } whatwg_name(&self) -> Option<&'static str>22 fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name } raw_encoder(&self) -> Box<RawEncoder>23 fn raw_encoder(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) } raw_decoder(&self) -> Box<RawDecoder>24 fn raw_decoder(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) } 25 } 26 27 /// An encoder for single-byte encodings based on ASCII. 28 #[derive(Clone, Copy)] 29 pub struct SingleByteEncoder { 30 index_backward: extern "Rust" fn(u32) -> u8, 31 } 32 33 impl SingleByteEncoder { new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder>34 pub fn new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder> { 35 Box::new(SingleByteEncoder { index_backward: index_backward }) 36 } 37 } 38 39 impl RawEncoder for SingleByteEncoder { from_self(&self) -> Box<RawEncoder>40 fn from_self(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) } is_ascii_compatible(&self) -> bool41 fn is_ascii_compatible(&self) -> bool { true } 42 raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>)43 fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) { 44 output.writer_hint(input.len()); 45 46 for ((i,j), ch) in input.index_iter() { 47 if ch <= '\u{7f}' { 48 output.write_byte(ch as u8); 49 continue; 50 } else { 51 let index = (self.index_backward)(ch as u32); 52 if index != 0 { 53 output.write_byte(index); 54 } else { 55 return (i, Some(CodecError { 56 upto: j as isize, cause: "unrepresentable character".into() 57 })); 58 } 59 } 60 } 61 (input.len(), None) 62 } 63 raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError>64 fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> { 65 None 66 } 67 } 68 69 /// A decoder for single-byte encodings based on ASCII. 70 #[derive(Clone, Copy)] 71 pub struct SingleByteDecoder { 72 index_forward: extern "Rust" fn(u8) -> u16, 73 } 74 75 impl SingleByteDecoder { new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder>76 pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder> { 77 Box::new(SingleByteDecoder { index_forward: index_forward }) 78 } 79 } 80 81 impl RawDecoder for SingleByteDecoder { from_self(&self) -> Box<RawDecoder>82 fn from_self(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) } is_ascii_compatible(&self) -> bool83 fn is_ascii_compatible(&self) -> bool { true } 84 raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>)85 fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) { 86 output.writer_hint(input.len()); 87 88 let mut i = 0; 89 let len = input.len(); 90 while i < len { 91 if input[i] <= 0x7f { 92 output.write_char(input[i] as char); 93 } else { 94 let ch = (self.index_forward)(input[i]); 95 if ch != 0xffff { 96 output.write_char(as_char(ch as u32)); 97 } else { 98 return (i, Some(CodecError { 99 upto: i as isize + 1, cause: "invalid sequence".into() 100 })); 101 } 102 } 103 i += 1; 104 } 105 (i, None) 106 } 107 raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError>108 fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> { 109 None 110 } 111 } 112 113 /// Algorithmic mapping for ISO 8859-1. 114 pub mod iso_8859_1 { forward(code: u8) -> u16115 #[inline] pub fn forward(code: u8) -> u16 { code as u16 } backward(code: u32) -> u8116 #[inline] pub fn backward(code: u32) -> u8 { if (code & !0x7f) == 0x80 {code as u8} else {0} } 117 } 118 119 #[cfg(test)] 120 mod tests { 121 use all::ISO_8859_2; 122 use types::*; 123 124 #[test] test_encoder_non_bmp()125 fn test_encoder_non_bmp() { 126 let mut e = ISO_8859_2.raw_encoder(); 127 assert_feed_err!(e, "A", "\u{FFFF}", "B", [0x41]); 128 assert_feed_err!(e, "A", "\u{10000}", "B", [0x41]); 129 } 130 } 131 132