1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 use super::UnicodeSegmentation;
12 
13 use std::prelude::v1::*;
14 
15 #[test]
test_graphemes()16 fn test_graphemes() {
17     use crate::testdata::{TEST_SAME, TEST_DIFF};
18 
19     pub const EXTRA_DIFF: &'static [(&'static str,
20                                      &'static [&'static str],
21                                      &'static [&'static str])] = &[
22         // Official test suite doesn't include two Prepend chars between two other chars.
23         ("\u{20}\u{600}\u{600}\u{20}",
24          &["\u{20}", "\u{600}\u{600}\u{20}"],
25          &["\u{20}", "\u{600}", "\u{600}", "\u{20}"]),
26 
27         // Test for Prepend followed by two Any chars
28         ("\u{600}\u{20}\u{20}",
29          &["\u{600}\u{20}", "\u{20}"],
30          &["\u{600}", "\u{20}", "\u{20}"]),
31     ];
32 
33     pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
34         // family emoji (more than two emoji joined by ZWJ)
35         ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
36          &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
37         // cartwheel emoji followed by two fitzpatrick skin tone modifiers
38         // (test case from issue #19)
39         ("\u{1F938}\u{1F3FE}\u{1F3FE}",
40          &["\u{1F938}\u{1F3FE}\u{1F3FE}"]),
41     ];
42 
43     for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
44         // test forward iterator
45         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
46         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
47 
48         // test reverse iterator
49         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned()));
50         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned()));
51     }
52 
53     for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
54         // test forward iterator
55         assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
56         assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
57 
58         // test reverse iterator
59         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned()));
60         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned()));
61     }
62 
63     // test the indices iterators
64     let s = "a̐éö̲\r\n";
65     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
66     let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
67     assert_eq!(gr_inds, b);
68     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).rev().collect::<Vec<(usize, &str)>>();
69     let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
70     assert_eq!(gr_inds, b);
71     let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
72     {
73         let gr_inds = gr_inds_iter.by_ref();
74         let e1 = gr_inds.size_hint();
75         assert_eq!(e1, (1, Some(13)));
76         let c = gr_inds.count();
77         assert_eq!(c, 4);
78     }
79     let e2 = gr_inds_iter.size_hint();
80     assert_eq!(e2, (0, Some(0)));
81 
82     // make sure the reverse iterator does the right thing with "\n" at beginning of string
83     let s = "\n\r\n\r";
84     let gr = UnicodeSegmentation::graphemes(s, true).rev().collect::<Vec<&str>>();
85     let b: &[_] = &["\r", "\r\n", "\n"];
86     assert_eq!(gr, b);
87 }
88 
89 #[test]
test_words()90 fn test_words() {
91     use crate::testdata::TEST_WORD;
92 
93     // Unicode's official tests don't really test longer chains of flag emoji
94     // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
95     const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
96         ("����������������������������", &["����", "����", "����", "����", "����", "����", "����"]),
97         ("��������������������������", &["����", "����", "����", "����", "����", "����", "��"]),
98         ("��a������a������������������", &["��", "a", "����", "��", "a", "����", "����", "����", "����", "��"]),
99         ("\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",  &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"]),
100         ("������",  &["��", "����"]),
101         // perhaps wrong, spaces should not be included?
102         ("hello world", &["hello", " ", "world"]),
103         ("�������������� hi", &["����", "����", "����", "��", " ", "hi"]),
104     ];
105     for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
106         macro_rules! assert_ {
107             ($test:expr, $exp:expr, $name:expr) => {
108                 // collect into vector for better diagnostics in failure case
109                 let testing = $test.collect::<Vec<_>>();
110                 let expected = $exp.collect::<Vec<_>>();
111                 assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
112             }
113         }
114         // test forward iterator
115         assert_!(s.split_word_bounds(),
116                 w.iter().cloned(),
117                 "Forward word boundaries");
118 
119         // test reverse iterator
120         assert_!(s.split_word_bounds().rev(),
121                 w.iter().rev().cloned(),
122                 "Reverse word boundaries");
123 
124         // generate offsets from word string lengths
125         let mut indices = vec![0];
126         for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) {
127             indices.push(i);
128         }
129         indices.pop();
130         let indices = indices;
131 
132         // test forward indices iterator
133         assert_!(s.split_word_bound_indices().map(|(l,_)| l),
134                  indices.iter().cloned(),
135                  "Forward word indices");
136 
137         // test backward indices iterator
138         assert_!(s.split_word_bound_indices().rev().map(|(l,_)| l),
139                  indices.iter().rev().cloned(),
140                  "Reverse word indices");
141     }
142 }
143 
144 
145 #[test]
test_sentences()146 fn test_sentences() {
147     use crate::testdata::TEST_SENTENCE;
148 
149     for &(s, w) in TEST_SENTENCE.iter() {
150         macro_rules! assert_ {
151             ($test:expr, $exp:expr, $name:expr) => {
152                 // collect into vector for better diagnostics in failure case
153                 let testing = $test.collect::<Vec<_>>();
154                 let expected = $exp.collect::<Vec<_>>();
155                 assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
156             }
157         }
158 
159         assert_!(s.split_sentence_bounds(),
160                 w.iter().cloned(),
161                 "Forward sentence boundaries");
162     }
163 }
164 
165 quickcheck! {
166     fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
167         let a = s.graphemes(true).collect::<Vec<_>>();
168         let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
169         b.reverse();
170         a == b
171     }
172 
173     fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
174         let a = s.graphemes(false).collect::<Vec<_>>();
175         let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
176         b.reverse();
177         a == b
178     }
179 
180     fn quickcheck_join_graphemes(s: String) -> bool {
181         let a = s.graphemes(true).collect::<String>();
182         let b = s.graphemes(false).collect::<String>();
183         a == s && b == s
184     }
185 
186     fn quickcheck_forward_reverse_words(s: String) -> bool {
187         let a = s.split_word_bounds().collect::<Vec<_>>();
188         let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
189         b.reverse();
190         a == b
191     }
192 
193     fn quickcheck_join_words(s: String) -> bool {
194         let a = s.split_word_bounds().collect::<String>();
195         a == s
196     }
197 }
198