1 // This is a part of rust-encoding.
2 // Copyright (c) 2013-2015, Kang Seonghoon.
3 // See README.md and LICENSE.txt for details.
4 
5 //! Common codec implementation for single-byte encodings.
6 
7 use std::convert::Into;
8 use util::{as_char, StrCharIndex};
9 use types::*;
10 
11 /// A common framework for single-byte encodings based on ASCII.
12 #[derive(Copy, Clone)]
13 pub struct SingleByteEncoding {
14     pub name: &'static str,
15     pub whatwg_name: Option<&'static str>,
16     pub index_forward: extern "Rust" fn(u8) -> u16,
17     pub index_backward: extern "Rust" fn(u32) -> u8,
18 }
19 
20 impl Encoding for SingleByteEncoding {
name(&self) -> &'static str21     fn name(&self) -> &'static str { self.name }
whatwg_name(&self) -> Option<&'static str>22     fn whatwg_name(&self) -> Option<&'static str> { self.whatwg_name }
raw_encoder(&self) -> Box<RawEncoder>23     fn raw_encoder(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
raw_decoder(&self) -> Box<RawDecoder>24     fn raw_decoder(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
25 }
26 
27 /// An encoder for single-byte encodings based on ASCII.
28 #[derive(Clone, Copy)]
29 pub struct SingleByteEncoder {
30     index_backward: extern "Rust" fn(u32) -> u8,
31 }
32 
33 impl SingleByteEncoder {
new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder>34     pub fn new(index_backward: extern "Rust" fn(u32) -> u8) -> Box<RawEncoder> {
35         Box::new(SingleByteEncoder { index_backward: index_backward })
36     }
37 }
38 
39 impl RawEncoder for SingleByteEncoder {
from_self(&self) -> Box<RawEncoder>40     fn from_self(&self) -> Box<RawEncoder> { SingleByteEncoder::new(self.index_backward) }
is_ascii_compatible(&self) -> bool41     fn is_ascii_compatible(&self) -> bool { true }
42 
raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>)43     fn raw_feed(&mut self, input: &str, output: &mut ByteWriter) -> (usize, Option<CodecError>) {
44         output.writer_hint(input.len());
45 
46         for ((i,j), ch) in input.index_iter() {
47             if ch <= '\u{7f}' {
48                 output.write_byte(ch as u8);
49                 continue;
50             } else {
51                 let index = (self.index_backward)(ch as u32);
52                 if index != 0 {
53                     output.write_byte(index);
54                 } else {
55                     return (i, Some(CodecError {
56                         upto: j as isize, cause: "unrepresentable character".into()
57                     }));
58                 }
59             }
60         }
61         (input.len(), None)
62     }
63 
raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError>64     fn raw_finish(&mut self, _output: &mut ByteWriter) -> Option<CodecError> {
65         None
66     }
67 }
68 
69 /// A decoder for single-byte encodings based on ASCII.
70 #[derive(Clone, Copy)]
71 pub struct SingleByteDecoder {
72     index_forward: extern "Rust" fn(u8) -> u16,
73 }
74 
75 impl SingleByteDecoder {
new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder>76     pub fn new(index_forward: extern "Rust" fn(u8) -> u16) -> Box<RawDecoder> {
77         Box::new(SingleByteDecoder { index_forward: index_forward })
78     }
79 }
80 
81 impl RawDecoder for SingleByteDecoder {
from_self(&self) -> Box<RawDecoder>82     fn from_self(&self) -> Box<RawDecoder> { SingleByteDecoder::new(self.index_forward) }
is_ascii_compatible(&self) -> bool83     fn is_ascii_compatible(&self) -> bool { true }
84 
raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>)85     fn raw_feed(&mut self, input: &[u8], output: &mut StringWriter) -> (usize, Option<CodecError>) {
86         output.writer_hint(input.len());
87 
88         let mut i = 0;
89         let len = input.len();
90         while i < len {
91             if input[i] <= 0x7f {
92                 output.write_char(input[i] as char);
93             } else {
94                 let ch = (self.index_forward)(input[i]);
95                 if ch != 0xffff {
96                     output.write_char(as_char(ch as u32));
97                 } else {
98                     return (i, Some(CodecError {
99                         upto: i as isize + 1, cause: "invalid sequence".into()
100                     }));
101                 }
102             }
103             i += 1;
104         }
105         (i, None)
106     }
107 
raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError>108     fn raw_finish(&mut self, _output: &mut StringWriter) -> Option<CodecError> {
109         None
110     }
111 }
112 
113 /// Algorithmic mapping for ISO 8859-1.
114 pub mod iso_8859_1 {
forward(code: u8) -> u16115     #[inline] pub fn forward(code: u8) -> u16 { code as u16 }
backward(code: u32) -> u8116     #[inline] pub fn backward(code: u32) -> u8 { if (code & !0x7f) == 0x80 {code as u8} else {0} }
117 }
118 
119 #[cfg(test)]
120 mod tests {
121     use all::ISO_8859_2;
122     use types::*;
123 
124     #[test]
test_encoder_non_bmp()125     fn test_encoder_non_bmp() {
126         let mut e = ISO_8859_2.raw_encoder();
127         assert_feed_err!(e, "A", "\u{FFFF}", "B", [0x41]);
128         assert_feed_err!(e, "A", "\u{10000}", "B", [0x41]);
129     }
130 }
131 
132