1 extern crate once_cell;
2 extern crate quickcheck;
3 extern crate unicode_segmentation;
4
5 use std::fs::File;
6 use std::io::BufReader;
7 use std::path::Path;
8
9 use once_cell::sync::Lazy;
10 use quickcheck::{quickcheck, TestResult};
11
12 extern crate hyphenation;
13 extern crate hyphenation_commons;
14 use hyphenation::extended::*;
15 use hyphenation::Language::*;
16 use hyphenation::*;
17
fiat_std(lang : Language) -> Standard18 fn fiat_std(lang : Language) -> Standard {
19 let filename = format!("{}.standard.bincode", lang.code());
20 let file = File::open(Path::new("dictionaries").join(filename)).unwrap();
21 Standard::from_reader(lang, &mut BufReader::new(file)).unwrap()
22 }
23
fiat_ext(lang : Language) -> Extended24 fn fiat_ext(lang : Language) -> Extended {
25 let filename = format!("{}.extended.bincode", lang.code());
26 let file = File::open(Path::new("dictionaries").join(filename)).unwrap();
27 Extended::from_reader(lang, &mut BufReader::new(file)).unwrap()
28 }
29
30 static EN_US : Lazy<Standard> = Lazy::new(|| fiat_std(EnglishUS));
31 static HU : Lazy<Extended> = Lazy::new(|| fiat_ext(Hungarian));
32 static TR : Lazy<Standard> = Lazy::new(|| fiat_std(Turkish));
33
34
35 #[test]
collected_equals_original()36 fn collected_equals_original() {
37 fn property(original : String) -> bool {
38 let collected : String = EN_US.hyphenate(&original).iter().segments().collect();
39
40 collected == original
41 }
42
43 quickcheck(property as fn(String) -> bool);
44 }
45
46 #[test]
opportunities_within_bounds()47 fn opportunities_within_bounds() {
48 fn property(s : String) -> TestResult {
49 let ci : Vec<_> = s.char_indices().collect();
50 let (l_min, r_min) = EnglishUS.minima();
51 let s_len = ci.len();
52 if s_len < l_min + r_min {
53 return TestResult::discard();
54 }
55
56 let os : Vec<_> = EN_US.opportunities(&s);
57 let ((l, _), (r, _)) = (ci[l_min], ci[s_len - r_min]);
58 let within_bounds = |&i| i >= l && i <= r;
59
60 TestResult::from_bool(os.iter().all(within_bounds))
61 }
62
63 quickcheck(property as fn(String) -> TestResult);
64 }
65
66 #[test]
basics_standard()67 fn basics_standard() {
68 // Standard hyphenation
69 let w0 = "anfractuous";
70 let w1 = "hypha"; // minimum hyphenable length
71 // Exceptions
72 let ex0 = "hyphenation";
73 let ex1 = "bevies"; // unhyphenable (by exception)
74
75 let h_w0 = EN_US.hyphenate(w0);
76 let h_w1 = EN_US.hyphenate(w1);
77 let h_ex0 = EN_US.hyphenate(ex0);
78 let h_ex1 = EN_US.hyphenate(ex1);
79
80 let seg0 = h_w0.iter().segments();
81 let seg1 = h_ex0.iter().segments();
82 let seg2 = h_ex1.iter().segments();
83 let seg3 = h_w1.iter().segments();
84
85 assert_eq!(seg0.size_hint(), (4, Some(4)));
86 assert_eq!(seg1.size_hint(), (4, Some(4)));
87 assert_eq!(seg2.size_hint(), (1, Some(1)));
88 assert_eq!(seg3.size_hint(), (2, Some(2)));
89
90 let v0 : Vec<&str> = seg0.clone().collect();
91 let v1 : Vec<&str> = seg1.clone().collect();
92 let v2 : Vec<&str> = seg2.clone().collect();
93 let v3 : Vec<&str> = seg3.clone().collect();
94
95 assert_eq!(v0, vec!["an", "frac", "tu", "ous"]);
96 assert_eq!(v1, vec!["hy", "phen", "a", "tion"]);
97 assert_eq!(v2, vec!["bevies"]);
98 assert_eq!(v3, vec!["hy", "pha"]);
99
100 // Additional size checks for partially consumed iterators.
101 let mut seg2 = seg2;
102 seg2.next();
103 assert_eq!(seg2.size_hint(), (0, Some(0)));
104 seg2.next();
105 assert_eq!(seg2.size_hint(), (0, Some(0)));
106
107 let mut seg3 = seg3;
108 seg3.next();
109 assert_eq!(seg3.size_hint(), (1, Some(1)));
110 seg3.next();
111 assert_eq!(seg3.size_hint(), (0, Some(0)));
112 }
113
114 #[test]
basics_extended()115 fn basics_extended() {
116 let w0 = "asszonnyal";
117 let w1 = "esszé";
118
119 let v0 : Vec<_> = HU.hyphenate(w0).into_iter().segments().collect();
120 let v1 : Vec<_> = HU.hyphenate(w1).into_iter().segments().collect();
121
122 assert_eq!(v0, vec!["asz", "szony", "nyal"]);
123 assert_eq!(v1, vec!["esz", "szé"]);
124 }
125
126 #[test]
special_casing()127 fn special_casing() {
128 let w0 = "İbrahim";
129 let v0 : Vec<_> = TR.hyphenate(&w0).into_iter().segments().collect();
130 assert_eq!(v0, vec!["İb", "ra", "him"]);
131
132 let w1 = "İLGİNÇ";
133 let v1 : Vec<_> = TR.hyphenate(w1).into_iter().segments().collect();
134 assert_eq!(v1, vec!["İL", "GİNÇ"]);
135
136 let w2 = "MİCRO";
137 let v2 : Vec<_> = EN_US.hyphenate(w2).into_iter().segments().collect();
138 assert_eq!(v2, vec!["Mİ", "CRO"]);
139
140 let w4 = "İDİOM";
141 let v4 : Vec<_> = EN_US.hyphenate(w4).into_iter().segments().collect();
142 assert_eq!(v4, vec!["İD", "İOM"]);
143
144 let w3 = "MUCİLAGİNOUS";
145 let v3 : Vec<_> = EN_US.hyphenate(w3).into_iter().segments().collect();
146 assert_eq!(v3, vec!["MU", "CİLAGİ", "NOUS"]);
147 }
148
149 #[test]
language_mismatch_on_load()150 fn language_mismatch_on_load() {
151 let file = File::open("./dictionaries/mul-ethi.standard.bincode").unwrap();
152 let mut reader = BufReader::new(file);
153 assert!(Standard::from_reader(EnglishUS, &mut reader).is_err());
154 }
155
156 #[test]
text()157 fn text() {
158 use unicode_segmentation::UnicodeSegmentation;
159
160 let hyphenate_text = |text : &str| -> String {
161 text.split_word_bounds()
162 .flat_map(|word| EN_US.hyphenate(word).into_iter())
163 .collect()
164 };
165
166 let t0 = "I know noble accents / And lucid, inescapable rhythms; […]";
167 let expect0 = "I know no-ble ac-cents / And lu-cid, in-escapable rhythms; […]";
168 let seg0 = hyphenate_text(t0);
169 assert_eq!(seg0, expect0);
170
171 let t1 = "ever-burning sulphur unconsumed";
172 let expect1 = "ever-burn-ing sul-phur un-con-sumed";
173 let seg1 = hyphenate_text(t1);
174 assert_eq!(seg1, expect1);
175 }
176
177 #[test]
bounded_exception()178 fn bounded_exception() {
179 let e = "anisotropic"; // an-iso-trop-ic, by exception
180
181 let bounded = EN_US.exception(e);
182 let unbounded = EN_US.exception_within(e, (0, e.len()));
183
184 assert_eq!(bounded, Some(vec![2, 5]));
185 assert_eq!(unbounded, Some(vec![2, 5, 9]));
186 }
187
188 #[test]
readme_examples()189 fn readme_examples() {
190 let hyphenated = EN_US.hyphenate("hyphenation");
191
192 let break_indices = &hyphenated.breaks;
193 assert_eq!(break_indices, &[2, 6, 7]);
194
195 let marked = hyphenated.iter();
196 let collected : Vec<String> = marked.collect();
197 assert_eq!(collected, vec!["hy-", "phen-", "a-", "tion"]);
198
199 let unmarked = hyphenated.iter().segments();
200 let collected : Vec<&str> = unmarked.collect();
201 assert_eq!(collected, vec!["hy", "phen", "a", "tion"]);
202
203 let uppercase : Vec<_> = EN_US.hyphenate("CAPITAL").into_iter().segments().collect();
204 assert_eq!(uppercase, vec!["CAP", "I", "TAL"]);
205 }
206