1 use std::io::{self, Read};
2 use std::str;
3 use std::fmt;
4 
5 #[derive(Debug)]
6 pub enum CharReadError {
7     UnexpectedEof,
8     Utf8(str::Utf8Error),
9     Io(io::Error)
10 }
11 
12 impl From<str::Utf8Error> for CharReadError {
from(e: str::Utf8Error) -> CharReadError13     fn from(e: str::Utf8Error) -> CharReadError {
14         CharReadError::Utf8(e)
15     }
16 }
17 
18 impl From<io::Error> for CharReadError {
from(e: io::Error) -> CharReadError19     fn from(e: io::Error) -> CharReadError {
20         CharReadError::Io(e)
21     }
22 }
23 
24 impl fmt::Display for CharReadError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result25     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26         use self::CharReadError::*;
27         match *self {
28             UnexpectedEof => write!(f, "unexpected end of stream"),
29             Utf8(ref e) => write!(f, "UTF-8 decoding error: {}", e),
30             Io(ref e) => write!(f, "I/O error: {}", e)
31         }
32     }
33 }
34 
next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError>35 pub fn next_char_from<R: Read>(source: &mut R) -> Result<Option<char>, CharReadError> {
36     const MAX_CODEPOINT_LEN: usize = 4;
37 
38     let mut bytes = source.bytes();
39     let mut buf = [0u8; MAX_CODEPOINT_LEN];
40     let mut pos = 0;
41 
42     loop {
43         let next = match bytes.next() {
44             Some(Ok(b)) => b,
45             Some(Err(e)) => return Err(e.into()),
46             None if pos == 0 => return Ok(None),
47             None => return Err(CharReadError::UnexpectedEof)
48         };
49         buf[pos] = next;
50         pos += 1;
51 
52         match str::from_utf8(&buf[..pos]) {
53             Ok(s) => return Ok(s.chars().next()),  // always Some(..)
54             Err(_) if pos < MAX_CODEPOINT_LEN => {},
55             Err(e) => return Err(e.into())
56         }
57     }
58 }
59 
60 #[cfg(test)]
61 mod tests {
62     #[test]
test_next_char_from()63     fn test_next_char_from() {
64         use std::io;
65         use std::error::Error;
66 
67         let mut bytes: &[u8] = "correct".as_bytes();    // correct ASCII
68         assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('c'));
69 
70         let mut bytes: &[u8] = "правильно".as_bytes();  // correct BMP
71         assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('п'));
72 
73         let mut bytes: &[u8] = "��".as_bytes();          // correct non-BMP
74         assert_eq!(super::next_char_from(&mut bytes).unwrap(), Some('��'));
75 
76         let mut bytes: &[u8] = b"";                     // empty
77         assert_eq!(super::next_char_from(&mut bytes).unwrap(), None);
78 
79         let mut bytes: &[u8] = b"\xf0\x9f\x98";         // incomplete code point
80         match super::next_char_from(&mut bytes).unwrap_err() {
81             super::CharReadError::UnexpectedEof => {},
82             e => panic!("Unexpected result: {:?}", e)
83         };
84 
85         let mut bytes: &[u8] = b"\xff\x9f\x98\x32";     // invalid code point
86         match super::next_char_from(&mut bytes).unwrap_err() {
87             super::CharReadError::Utf8(_) => {},
88             e => panic!("Unexpected result: {:?}", e)
89         };
90 
91 
92         // error during read
93         struct ErrorReader;
94         impl io::Read for ErrorReader {
95             fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
96                 Err(io::Error::new(io::ErrorKind::Other, "test error"))
97             }
98         }
99 
100         let mut r = ErrorReader;
101         match super::next_char_from(&mut r).unwrap_err() {
102             super::CharReadError::Io(ref e) if e.kind() == io::ErrorKind::Other &&
103                                                e.description() == "test error" => {},
104             e => panic!("Unexpected result: {:?}", e)
105         }
106     }
107 }
108