1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 use super::UnicodeSegmentation;
12 
13 use std::prelude::v1::*;
14 
15 #[test]
test_graphemes()16 fn test_graphemes() {
17     use testdata::{TEST_SAME, TEST_DIFF};
18 
19     pub const EXTRA_DIFF: &'static [(&'static str,
20                                      &'static [&'static str],
21                                      &'static [&'static str])] = &[
22         // Official test suite doesn't include two Prepend chars between two other chars.
23         ("\u{20}\u{600}\u{600}\u{20}",
24          &["\u{20}", "\u{600}\u{600}\u{20}"],
25          &["\u{20}", "\u{600}", "\u{600}", "\u{20}"]),
26 
27         // Test for Prepend followed by two Any chars
28         ("\u{600}\u{20}\u{20}",
29          &["\u{600}\u{20}", "\u{20}"],
30          &["\u{600}", "\u{20}", "\u{20}"]),
31     ];
32 
33     pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[
34         // family emoji (more than two emoji joined by ZWJ)
35         ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}",
36          &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]),
37     ];
38 
39     for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) {
40         // test forward iterator
41         assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned()));
42         assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned()));
43 
44         // test reverse iterator
45         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned()));
46         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned()));
47     }
48 
49     for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) {
50         // test forward iterator
51         assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned()));
52         assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned()));
53 
54         // test reverse iterator
55         assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned()));
56         assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned()));
57     }
58 
59     // test the indices iterators
60     let s = "a̐éö̲\r\n";
61     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>();
62     let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
63     assert_eq!(gr_inds, b);
64     let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).rev().collect::<Vec<(usize, &str)>>();
65     let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")];
66     assert_eq!(gr_inds, b);
67     let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true);
68     {
69         let gr_inds = gr_inds_iter.by_ref();
70         let e1 = gr_inds.size_hint();
71         assert_eq!(e1, (1, Some(13)));
72         let c = gr_inds.count();
73         assert_eq!(c, 4);
74     }
75     let e2 = gr_inds_iter.size_hint();
76     assert_eq!(e2, (0, Some(0)));
77 
78     // make sure the reverse iterator does the right thing with "\n" at beginning of string
79     let s = "\n\r\n\r";
80     let gr = UnicodeSegmentation::graphemes(s, true).rev().collect::<Vec<&str>>();
81     let b: &[_] = &["\r", "\r\n", "\n"];
82     assert_eq!(gr, b);
83 }
84 
85 #[test]
test_words()86 fn test_words() {
87     use testdata::TEST_WORD;
88 
89     // Unicode's official tests don't really test longer chains of flag emoji
90     // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ
91     const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[
92         ("����������������������������", &["����", "����", "����", "����", "����", "����", "����"]),
93         ("��������������������������", &["����", "����", "����", "����", "����", "����", "��"]),
94         ("��a������a������������������", &["��", "a", "����", "��", "a", "����", "����", "����", "����", "��"]),
95         ("\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}",  &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"]),
96         ("������",  &["��", "����"]),
97         // perhaps wrong, spaces should not be included?
98         ("hello world", &["hello", " ", "world"]),
99         ("�������������� hi", &["����", "����", "����", "��", " ", "hi"]),
100     ];
101     for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) {
102         macro_rules! assert_ {
103             ($test:expr, $exp:expr, $name:expr) => {
104                 // collect into vector for better diagnostics in failure case
105                 let testing = $test.collect::<Vec<_>>();
106                 let expected = $exp.collect::<Vec<_>>();
107                 assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
108             }
109         }
110         // test forward iterator
111         assert_!(s.split_word_bounds(),
112                 w.iter().cloned(),
113                 "Forward word boundaries");
114 
115         // test reverse iterator
116         assert_!(s.split_word_bounds().rev(),
117                 w.iter().rev().cloned(),
118                 "Reverse word boundaries");
119 
120         // generate offsets from word string lengths
121         let mut indices = vec![0];
122         for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) {
123             indices.push(i);
124         }
125         indices.pop();
126         let indices = indices;
127 
128         // test forward indices iterator
129         assert_!(s.split_word_bound_indices().map(|(l,_)| l),
130                  indices.iter().cloned(),
131                  "Forward word indices");
132 
133         // test backward indices iterator
134         assert_!(s.split_word_bound_indices().rev().map(|(l,_)| l),
135                  indices.iter().rev().cloned(),
136                  "Reverse word indices");
137     }
138 }
139 
140 quickcheck! {
141     fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
142         let a = s.graphemes(true).collect::<Vec<_>>();
143         let mut b = s.graphemes(true).rev().collect::<Vec<_>>();
144         b.reverse();
145         a == b
146     }
147 
148     fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool {
149         let a = s.graphemes(false).collect::<Vec<_>>();
150         let mut b = s.graphemes(false).rev().collect::<Vec<_>>();
151         b.reverse();
152         a == b
153     }
154 
155     fn quickcheck_join_graphemes(s: String) -> bool {
156         let a = s.graphemes(true).collect::<String>();
157         let b = s.graphemes(false).collect::<String>();
158         a == s && b == s
159     }
160 
161     fn quickcheck_forward_reverse_words(s: String) -> bool {
162         let a = s.split_word_bounds().collect::<Vec<_>>();
163         let mut b = s.split_word_bounds().rev().collect::<Vec<_>>();
164         b.reverse();
165         a == b
166     }
167 
168     fn quickcheck_join_words(s: String) -> bool {
169         let a = s.split_word_bounds().collect::<String>();
170         a == s
171     }
172 }
173