1 /* Copyright 2018 The encode_unicode Developers
2  *
3  * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4  * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5  * http://opensource.org/licenses/MIT>, at your option. This file may not be
6  * copied, modified, or distributed except according to those terms.
7  */
8 
9 //! Iterator tests
10 
11 #![cfg(feature="std")]
12 
13 extern crate encode_unicode;
14 
15 use encode_unicode::{IterExt, SliceExt, CharExt};
16 use encode_unicode::iterator::Utf8CharSplitter;
17 use encode_unicode::error::InvalidUtf8Slice::*;
18 use encode_unicode::error::InvalidUtf8::*;
19 use encode_unicode::error::InvalidUtf8FirstByte::*;
20 use encode_unicode::error::InvalidCodepoint::*;
21 use encode_unicode::error::Utf16PairError::*;
22 use std::io::Read;
23 use std::cmp::min;
24 
utf8charmerger()25 #[test] fn utf8charmerger() {
26     let slice = b"\xf0\xa1\x92X\xcc\xbb";
27     let mut iter = slice.iter().to_utf8chars();
28     assert_eq!(iter.size_hint(), (1, Some(6)));
29     assert_eq!(format!("{:?}", &iter),
30                format!("Utf8CharMerger {{ buffered: [], inner: {:?} }}", slice.iter()));
31 
32     assert_eq!(iter.next(), Some(Err(Utf8(NotAContinuationByte(3)))));
33     assert_eq!(iter.size_hint(), (0, Some(5)));
34     assert_eq!(
35         format!("{:?}", &iter),
36         format!("Utf8CharMerger {{ buffered: [161, 146, 88], inner: {:?} }}", slice[4..].iter())
37     );
38 
39     assert_eq!(iter.next(), Some(Err(Utf8(FirstByte(ContinuationByte)))));
40     assert_eq!(iter.into_inner().next(), Some(&b'\xcc'));
41 }
42 
utf8chardecoder()43 #[test] fn utf8chardecoder() {
44     let slice = b"\xf4\xbf\x80\x80XY\xcc\xbbZ_";
45     let mut iter = slice.utf8char_indices();
46     assert_eq!(iter.size_hint(), (2, Some(10)));
47     assert_eq!(
48         format!("{:?}", &iter),
49         format!("Utf8CharDecoder {{ bytes[0..]: {:?} }}", &slice)
50     );
51 
52     assert_eq!(iter.next(), Some((0, Err(Codepoint(TooHigh)), 1)));
53     assert_eq!(
54         format!("{:?}", &iter),
55         format!("Utf8CharDecoder {{ bytes[1..]: {:?} }}", &slice[1..])
56     );
57     assert_eq!(iter.size_hint(), (2, Some(9)));
58     assert_eq!(iter.count(), 8);
59 }
60 
utf16charmerger()61 #[test] fn utf16charmerger() {
62     let slice = [0xd800, 'x' as u16, 0xd900, 0xdfff, 'λ' as u16];
63     let mut iter = slice.iter().to_utf16chars();
64     assert_eq!(iter.size_hint(), (2, Some(5)));
65     assert_eq!(format!("{:?}", &iter),
66                format!("Utf16CharMerger {{ buffered: None, inner: {:?} }}", slice.iter()));
67 
68     assert_eq!(iter.next(), Some(Err(UnmatchedLeadingSurrogate)));
69     assert_eq!(iter.size_hint(), (1, Some(4)));
70     assert_eq!(
71         format!("{:?}", &iter),
72         format!("Utf16CharMerger {{ buffered: Some(120), inner: {:?} }}", slice[2..].iter())
73     );
74 
75     assert_eq!(iter.into_inner().next(), Some(&0xd900));
76 }
77 
utf16chardecoder()78 #[test] fn utf16chardecoder() {
79     let slice = [0xd800, 'x' as u16, 0xd900, 0xdfff, 'λ' as u16];
80     let mut iter = slice.utf16char_indices();
81     assert_eq!(iter.size_hint(), (2, Some(5)));
82     assert_eq!(
83         format!("{:?}", &iter),
84         format!("Utf16CharDecoder {{ units[0..]: {:?} }}", &slice)
85     );
86 
87     assert_eq!(iter.next(), Some((0, Err(UnmatchedLeadingSurrogate), 1)));
88     assert_eq!(
89         format!("{:?}", &iter),
90         format!("Utf16CharDecoder {{ units[1..]: {:?} }}", &slice[1..])
91     );
92     assert_eq!(iter.size_hint(), (2, Some(4)));
93     assert_eq!(iter.count(), 3);
94 }
95 
96 
97 
98 /// Tests for ensuring that iterators which also implement Read support
99 /// interleaving calls of `read()` and `next()`, and that they implement Read
100 /// correctly (support any buffer size at any time).
101 
read_single_ascii()102 #[test] fn read_single_ascii() {
103     let uc = 'a'.to_utf8();
104     assert_eq!(uc.len(), 1);
105     for chunk in 1..5 {
106         let mut buf = [b'E'; 6];
107         let mut iter = uc.into_iter();
108         let mut written = 0;
109         for _ in 0..4 {
110             assert_eq!(iter.read(&mut buf[..0]).unwrap(), 0);
111             let wrote = iter.read(&mut buf[written..written+chunk]).unwrap();
112             assert_eq!(wrote, min(1-written, chunk));
113             written += wrote;
114             for &b in &buf[written..] {assert_eq!(b, b'E');}
115             assert_eq!(buf[..written], AsRef::<[u8]>::as_ref(&uc)[..written]);
116         }
117         assert_eq!(written, 1);
118     }
119 }
120 
read_single_nonascii()121 #[test] fn read_single_nonascii() {
122     let uc = 'ä'.to_utf8();
123     assert_eq!(uc.len(), 2);
124     for chunk in 1..5 {
125         let mut buf = [b'E'; 6];
126         let mut iter = uc.into_iter();
127         let mut written = 0;
128         for _ in 0..4 {
129             assert_eq!(iter.read(&mut buf[..0]).unwrap(), 0);
130             let wrote = iter.read(&mut buf[written..written+chunk]).unwrap();
131             assert_eq!(wrote, min(2-written, chunk));
132             written += wrote;
133             for &b in &buf[written..] {assert_eq!(b, b'E');}
134             assert_eq!(buf[..written], AsRef::<[u8]>::as_ref(&uc)[..written]);
135         }
136         assert_eq!(written, 2);
137     }
138 }
139 
140 
utf8charsplitter_read_all_sizes()141 #[test] fn utf8charsplitter_read_all_sizes() {
142     let s = "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
143     assert!(s.len()%3 == 1);
144     let mut buf = vec![b'E'; s.len()+6];
145     for size in 2..6 {//s.len()+4 {
146         let mut reader = Utf8CharSplitter::from(s.chars().map(|c| c.to_utf8() ));
147         for (offset, part) in s.as_bytes().chunks(size).enumerate() {
148             let read_to = if part.len() == size {(offset+1)*size} else {buf.len()};
149             assert_eq!(reader.read(&mut buf[offset*size..read_to]).unwrap(), part.len());
150             assert_eq!(&buf[..offset*size+part.len()], &s.as_bytes()[..offset*size+part.len()]);
151         }
152         assert_eq!(reader.read(&mut buf[..]).unwrap(), 0);
153         assert!(buf[s.len()..].iter().all(|&b| b==b'E' ));
154     }
155 }
156 
utf8charsplitter_alternate_iter_read()157 #[test] fn utf8charsplitter_alternate_iter_read() {
158     let s = "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
159     let mut buf = [b'0'; 10];
160     for n in 0..2 {
161         // need to collect to test size_hint()
162         // because chars().size_hint() returns ((bytes+3)/4, Some(bytes))
163         let u8chars = s.chars().map(|c| c.to_utf8() ).collect::<Vec<_>>();
164         let mut iter: Utf8CharSplitter<_,_> = u8chars.into_iter().into();
165         for (i, byte) in s.bytes().enumerate() {
166             let until_next = s.as_bytes()[i..].iter().take_while(|&b| (b>>6)==0b10u8 ).count();
167             let remaining_chars = s[i+until_next..].chars().count();
168             println!("{}. run: byte {:02} of {}, remaining: {:02}+{}: 0b{:08b} = {:?}",
169                      n, i, s.len(), remaining_chars, until_next, byte, byte as char);
170             assert_eq!(iter.read(&mut[][..]).unwrap(), 0);
171             if i % 2 == n {
172                 assert_eq!(iter.next(), Some(byte));
173             } else {
174                 assert_eq!(iter.read(&mut buf[..1]).unwrap(), 1);
175                 assert_eq!(buf[0], byte);
176             }
177         }
178         assert_eq!(iter.size_hint(), (0, Some(0)));
179         assert_eq!(iter.next(), None);
180         assert_eq!(iter.read(&mut buf[..]).unwrap(), 0);
181     }
182 }
183