1 /* Copyright 2016 The encode_unicode Developers
2  *
3  * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4  * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5  * http://opensource.org/licenses/MIT>, at your option. This file may not be
6  * copied, modified, or distributed except according to those terms.
7  */
8 
9 //! Test that methods gives the correct error.
10 //! Some also test a bit more because it's easy.
11 
12 extern crate core;
13 use core::char;
14 extern crate encode_unicode;
15 use encode_unicode::*;
16 use encode_unicode::error::*;
17 use encode_unicode::error::InvalidUtf8Array as a;
18 use encode_unicode::error::InvalidUtf8Slice as s;
19 use encode_unicode::error::InvalidCodepoint::*;
20 use encode_unicode::error::InvalidUtf8::*;
21 use encode_unicode::error::InvalidUtf8FirstByte::*;
22 
23 
24 #[test]
from_u32()25 fn from_u32() {
26     for c in 0xd800..0xe000 {
27         assert_eq!(char::from_u32_detailed(c),  Err(Utf16Reserved));
28     }
29     let mut c = 0x11_00_00;
30     loop {
31         assert_eq!(char::from_u32_detailed(c),  Err(TooHigh));
32         // Don't test every value. (Range.step_by() is unstable)
33         match c.checked_add(0x10_11_11) {
34             Some(next) => c = next,
35             None => break,
36         }
37     }
38 }
39 
40 #[test]
utf8_extra_bytes()41 fn utf8_extra_bytes() {
42     for c in 0..256 {
43         assert_eq!( (c as u8).extra_utf8_bytes(), match c {
44             0b_1000_0000...0b_1011_1111 => Err(ContinuationByte),
45             0b_1111_1000...0b_1111_1111 => Err(TooLongSeqence),
46             0b_0000_0000...0b_0111_1111 => Ok(0),
47             0b_1100_0000...0b_1101_1111 => Ok(1),
48             0b_1110_0000...0b_1110_1111 => Ok(2),
49             0b_1111_0000...0b_1111_0111 => Ok(3),
50                          _              => unreachable!(),
51         });
52     }
53 }
54 
55 #[test]
utf16_extra_unit()56 fn utf16_extra_unit() {
57     for c in 0..0x1_00_00 {
58         assert_eq!( (c as u16).utf16_needs_extra_unit(), match c {
59             0b_0000_0000_0000_0000...0b_1101_0111_1111_1111 => Ok(false),
60             0b_1101_1000_0000_0000...0b_1101_1011_1111_1111 => Ok(true),
61             0b_1101_1100_0000_0000...0b_1101_1111_1111_1111 => Err(InvalidUtf16FirstUnit),
62             0b_1110_0000_0000_0000...0b_1111_1111_1111_1111 => Ok(false),
63                                    _                        => unreachable!(),
64         });
65     }
66 }
67 
68 
69 #[test]
from_utf16_tuple()70 fn from_utf16_tuple() {
71     use encode_unicode::error::InvalidUtf16Tuple::*;
72     for u in 0xdc00..0xe000 {
73         let close = if u%3==0 {u-100} else {u+100};
74         let doesnt_matter = if u%2==0 {Some(close)} else {None};
75         assert_eq!(char::from_utf16_tuple((u,doesnt_matter)), Err(FirstIsTrailingSurrogate));
76     }
77     for u in (0..0xd800).chain(0xe000..0x10000) {
78         assert_eq!(
79             char::from_utf16_tuple((u as u16, Some((0x100+u) as u16))),
80             Err(SuperfluousSecond)
81         );
82     }
83     for u in 0xd800..0xdc00 {
84         assert_eq!(char::from_utf16_tuple((u,None)), Err(MissingSecond));
85         assert_eq!(char::from_utf16_tuple((u,Some(u - 0x2ff))), Err(InvalidSecond));
86     }
87 }
88 
89 #[test]
from_utf16_slice_start()90 fn from_utf16_slice_start() {
91     use encode_unicode::error::InvalidUtf16Slice::*;
92     assert_eq!(char::from_utf16_slice_start(&[]), Err(EmptySlice));
93     let mut buf = [0; 6];
94     for u in 0xd800..0xdc00 {
95         buf[0] = u;
96         assert_eq!(char::from_utf16_slice_start(&buf[..1]), Err(MissingSecond));
97         buf[1] = u;
98         let pass = 2 + (u as usize % (buf.len()-2));
99         assert_eq!(char::from_utf16_slice_start(&buf[..pass]), Err(SecondNotLowSurrogate));
100     }
101     for u in 0xdc00..0xe000 {
102         buf[0] = u;
103         let close = if u%3==0 {u-100} else {u+100};
104         let pass = 1 + (u as usize % (buf.len()-1));
105         buf[pass] = close;
106         assert_eq!(char::from_utf16_slice_start(&buf[..pass]), Err(FirstLowSurrogate));
107     }
108 }
109 
110 #[test]
utf8_overlong()111 fn utf8_overlong() {
112     let overlongs = [
113         [0xf0,0x8f], [0xf0,0x87], [0xf0,0x80], // 4-byte
114         [0xe0,0x9f], [0xe0,0x8f], [0xe0,0x80], // 3-byte
115         [0xc1,0xbf], [0xc1,0x92], [0xc1,0x80], // 2-byte
116         [0xc0,0xbf], [0xc0,0x9f], [0xc0,0x80], // 2-byte
117     ];
118     for o in overlongs.iter() {
119         for &last in &[0x80, 0xbf] {
120             let arr = [o[0], o[1], last, last];
121             assert_eq!(char::from_utf8_slice_start(&arr), Err(InvalidUtf8Slice::Utf8(OverLong)));
122             assert_eq!(char::from_utf8_array(arr), Err(InvalidUtf8Array::Utf8(OverLong)));
123             assert_eq!(Utf8Char::from_slice_start(&arr), Err(InvalidUtf8Slice::Utf8(OverLong)));
124             assert_eq!(Utf8Char::from_array(arr), Err(InvalidUtf8Array::Utf8(OverLong)));
125         }
126     }
127 }
128 
129 #[test]
from_str_start()130 fn from_str_start() {
131     assert_eq!(Utf8Char::from_str_start(""), Err(EmptyStrError));
132     assert_eq!(Utf16Char::from_str_start(""), Err(EmptyStrError));
133 }
134 
utf8_codepoint_is_too_high()135 #[test] fn utf8_codepoint_is_too_high() {
136     assert_eq!(Utf8Char::from_array([0xf4, 0x90, 0x80, 0x80]), Err(a::Codepoint(TooHigh)));
137     assert_eq!(char::from_utf8_array([0xf4, 0x90, 0x80, 0x80]), Err(a::Codepoint(TooHigh)));
138     assert_eq!(Utf8Char::from_slice_start(&[0xf4, 0x90, 0x80, 0x80]), Err(s::Codepoint(TooHigh)));
139     assert_eq!(char::from_utf8_slice_start(&[0xf4, 0x90, 0x80, 0x80]), Err(s::Codepoint(TooHigh)));
140 
141     assert_eq!(Utf8Char::from_array([0xf5, 0x88, 0x99, 0xaa]), Err(a::Codepoint(TooHigh)));
142     assert_eq!(char::from_utf8_array([0xf5, 0xaa, 0xbb, 0x88]), Err(a::Codepoint(TooHigh)));
143     assert_eq!(Utf8Char::from_slice_start(&[0xf5, 0x99, 0xaa, 0xbb]), Err(s::Codepoint(TooHigh)));
144     assert_eq!(char::from_utf8_slice_start(&[0xf5, 0xbb, 0x88, 0x99]), Err(s::Codepoint(TooHigh)));
145 }
146 
utf8_codepoint_is_utf16_reserved()147 #[test] fn utf8_codepoint_is_utf16_reserved() {
148     assert_eq!(Utf8Char::from_array([0xed, 0xa0, 0x80, 0xff]), Err(a::Codepoint(Utf16Reserved)));
149     assert_eq!(char::from_utf8_array([0xed, 0xa0, 0x8f, 0x00]), Err(a::Codepoint(Utf16Reserved)));
150     assert_eq!(Utf8Char::from_slice_start(&[0xed, 0xa0, 0xbe, 0xa5]), Err(s::Codepoint(Utf16Reserved)));
151     assert_eq!(char::from_utf8_slice_start(&[0xed, 0xa0, 0xbf]), Err(s::Codepoint(Utf16Reserved)));
152     assert_eq!(Utf8Char::from_array([0xed, 0xbf, 0x80, 0xff]), Err(a::Codepoint(Utf16Reserved)));
153     assert_eq!(char::from_utf8_array([0xed, 0xbf, 0x8f, 0x00]), Err(a::Codepoint(Utf16Reserved)));
154     assert_eq!(Utf8Char::from_slice_start(&[0xed, 0xbf, 0xbe, 0xa5]), Err(s::Codepoint(Utf16Reserved)));
155     assert_eq!(char::from_utf8_slice_start(&[0xed, 0xbf, 0xbf]), Err(s::Codepoint(Utf16Reserved)));
156 }
157 
utf8_first_is_continuation_byte()158 #[test] fn utf8_first_is_continuation_byte() {
159     for first in 0x80..0xc0 {
160         let arr = [first, first<<2, first<<4, first<<6];
161         assert_eq!(Utf8Char::from_array(arr), Err(a::Utf8(FirstByte(ContinuationByte))));
162         assert_eq!(char::from_utf8_array(arr), Err(a::Utf8(FirstByte(ContinuationByte))));
163         let len = (1 + first%3) as usize;
164         assert_eq!(Utf8Char::from_slice_start(&arr[..len]), Err(s::Utf8(FirstByte(ContinuationByte))));
165         assert_eq!(char::from_utf8_slice_start(&arr[..len]), Err(s::Utf8(FirstByte(ContinuationByte))));
166     }
167 }
168 
utf8_too_long()169 #[test] fn utf8_too_long() {
170     for first in 0xf8..0x100 {
171         let arr = [first as u8, 0x88, 0x80, 0x80];
172         assert_eq!(Utf8Char::from_array(arr), Err(a::Utf8(FirstByte(TooLongSeqence))));
173         assert_eq!(char::from_utf8_array(arr), Err(a::Utf8(FirstByte(TooLongSeqence))));
174         let arr = [first as u8, 0x88, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80];
175         let slice = &arr[..if first&1 == 0 {1} else {8}];
176         assert_eq!(Utf8Char::from_slice_start(slice), Err(s::Utf8(FirstByte(TooLongSeqence))));
177         assert_eq!(char::from_utf8_slice_start(slice), Err(s::Utf8(FirstByte(TooLongSeqence))));
178     }
179 }
180 
utf8_not_continuation_byte()181 #[test] fn utf8_not_continuation_byte() {
182     for first in 0xc2..0xf4 {
183         let mut arr = [first, 0x90, 0xa0, 0xb0];
184         let extra = first.extra_utf8_bytes().unwrap();
185         for corrupt in (1..extra).rev() {
186             let expected = NotAContinuationByte(corrupt);
187             for &bad in &[0x00, 0x3f,  0x40, 0x7f,  0xc0, 0xff] {
188                 arr[corrupt] = bad;
189                 assert_eq!(Utf8Char::from_array(arr), Err(a::Utf8(expected)), "{:?}", arr);
190                 assert_eq!(char::from_utf8_array(arr), Err(a::Utf8(expected)));
191                 let slice = if first&1 == 0 {&arr[..1+extra]} else {&arr};
192                 assert_eq!(Utf8Char::from_slice_start(slice), Err(s::Utf8(expected)), "{:?}", slice);
193                 assert_eq!(char::from_utf8_slice_start(slice), Err(s::Utf8(expected)));
194             }
195         }
196     }
197 }
198