1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 use super::UnicodeSegmentation;
12
13 use std::prelude::v1::*;
14
15 #[test]
test_graphemes()16 fn test_graphemes() {
17 use crate::testdata::{TEST_SAME, TEST_DIFF};
18
19 pub const EXTRA_DIFF: &'static [(&'static str,
20 &'static [&'static str],
21 &'static [&'static str])] = &[
22 // Official test suite doesn't include two Prepend chars between two other chars.
23 ("\u{20}\u{600}\u{600}\u{20}",
24 &["\u{20}", "\u{600}\u{600}\u{20}"],
25 &["\u{20}", "\u{600}", "\u{600}", "\u{20}"]),
26
27 // Test for Prepend followed by two Any chars
28 ("\u{600}\u{20}\u{20}",
29 &["\u{600}\u{20}", "\u{20}"],
30 &["\u{600}", "\u{20}", "\u{20}"]),
31 ];
32
33 pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
34 // family emoji (more than two emoji joined by ZWJ)
35 ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
36 &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
37 // cartwheel emoji followed by two fitzpatrick skin tone modifiers
38 // (test case from issue #19)
39 ("\u{1F938}\u{1F3FE}\u{1F3FE}",
40 &["\u{1F938}\u{1F3FE}\u{1F3FE}"]),
41 ];
42
43 for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
44 // test forward iterator
45 assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
46 assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
47
48 // test reverse iterator
49 assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned()));
50 assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned()));
51 }
52
53 for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
54 // test forward iterator
55 assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
56 assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
57
58 // test reverse iterator
59 assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned()));
60 assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned()));
61 }
62
63 // test the indices iterators
64 let s = "a̐éö̲\r\n";
65 let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
66 let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
67 assert_eq!(gr_inds, b);
68 let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).rev().collect::<Vec<(usize, &str)>>();
69 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
70 assert_eq!(gr_inds, b);
71 let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
72 {
73 let gr_inds = gr_inds_iter.by_ref();
74 let e1 = gr_inds.size_hint();
75 assert_eq!(e1, (1, Some(13)));
76 let c = gr_inds.count();
77 assert_eq!(c, 4);
78 }
79 let e2 = gr_inds_iter.size_hint();
80 assert_eq!(e2, (0, Some(0)));
81
82 // make sure the reverse iterator does the right thing with "\n" at beginning of string
83 let s = "\n\r\n\r";
84 let gr = UnicodeSegmentation::graphemes(s, true).rev().collect::<Vec<&str>>();
85 let b: &[_] = &["\r", "\r\n", "\n"];
86 assert_eq!(gr, b);
87 }
88
89 #[test]
test_words()90 fn test_words() {
91 use crate::testdata::TEST_WORD;
92
93 // Unicode's official tests don't really test longer chains of flag emoji
94 // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
95 const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
96 ("", &["", "", "", "", "", "", ""]),
97 ("", &["", "", "", "", "", "", ""]),
98 ("aa", &["", "a", "", "", "a", "", "", "", "", ""]),
99 ("\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}", &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"]),
100 ("", &["", ""]),
101 // perhaps wrong, spaces should not be included?
102 ("hello world", &["hello", " ", "world"]),
103 (" hi", &["", "", "", "", " ", "hi"]),
104 ];
105 for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
106 macro_rules! assert_ {
107 ($test:expr, $exp:expr, $name:expr) => {
108 // collect into vector for better diagnostics in failure case
109 let testing = $test.collect::<Vec<_>>();
110 let expected = $exp.collect::<Vec<_>>();
111 assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
112 }
113 }
114 // test forward iterator
115 assert_!(s.split_word_bounds(),
116 w.iter().cloned(),
117 "Forward word boundaries");
118
119 // test reverse iterator
120 assert_!(s.split_word_bounds().rev(),
121 w.iter().rev().cloned(),
122 "Reverse word boundaries");
123
124 // generate offsets from word string lengths
125 let mut indices = vec![0];
126 for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) {
127 indices.push(i);
128 }
129 indices.pop();
130 let indices = indices;
131
132 // test forward indices iterator
133 assert_!(s.split_word_bound_indices().map(|(l,_)| l),
134 indices.iter().cloned(),
135 "Forward word indices");
136
137 // test backward indices iterator
138 assert_!(s.split_word_bound_indices().rev().map(|(l,_)| l),
139 indices.iter().rev().cloned(),
140 "Reverse word indices");
141 }
142 }
143
144
145 #[test]
test_sentences()146 fn test_sentences() {
147 use crate::testdata::TEST_SENTENCE;
148
149 for &(s, w) in TEST_SENTENCE.iter() {
150 macro_rules! assert_ {
151 ($test:expr, $exp:expr, $name:expr) => {
152 // collect into vector for better diagnostics in failure case
153 let testing = $test.collect::<Vec<_>>();
154 let expected = $exp.collect::<Vec<_>>();
155 assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
156 }
157 }
158
159 assert_!(s.split_sentence_bounds(),
160 w.iter().cloned(),
161 "Forward sentence boundaries");
162 }
163 }
164
165 quickcheck! {
166 fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
167 let a = s.graphemes(true).collect::<Vec<_>>();
168 let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
169 b.reverse();
170 a == b
171 }
172
173 fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
174 let a = s.graphemes(false).collect::<Vec<_>>();
175 let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
176 b.reverse();
177 a == b
178 }
179
180 fn quickcheck_join_graphemes(s: String) -> bool {
181 let a = s.graphemes(true).collect::<String>();
182 let b = s.graphemes(false).collect::<String>();
183 a == s && b == s
184 }
185
186 fn quickcheck_forward_reverse_words(s: String) -> bool {
187 let a = s.split_word_bounds().collect::<Vec<_>>();
188 let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
189 b.reverse();
190 a == b
191 }
192
193 fn quickcheck_join_words(s: String) -> bool {
194 let a = s.split_word_bounds().collect::<String>();
195 a == s
196 }
197 }
198