1 extern crate once_cell;
2 extern crate quickcheck;
3 extern crate unicode_segmentation;
4 
5 use std::fs::File;
6 use std::io::BufReader;
7 use std::path::Path;
8 
9 use once_cell::sync::Lazy;
10 use quickcheck::{quickcheck, TestResult};
11 
12 extern crate hyphenation;
13 extern crate hyphenation_commons;
14 use hyphenation::extended::*;
15 use hyphenation::Language::*;
16 use hyphenation::*;
17 
fiat_std(lang : Language) -> Standard18 fn fiat_std(lang : Language) -> Standard {
19     let filename = format!("{}.standard.bincode", lang.code());
20     let file = File::open(Path::new("dictionaries").join(filename)).unwrap();
21     Standard::from_reader(lang, &mut BufReader::new(file)).unwrap()
22 }
23 
fiat_ext(lang : Language) -> Extended24 fn fiat_ext(lang : Language) -> Extended {
25     let filename = format!("{}.extended.bincode", lang.code());
26     let file = File::open(Path::new("dictionaries").join(filename)).unwrap();
27     Extended::from_reader(lang, &mut BufReader::new(file)).unwrap()
28 }
29 
30 static EN_US : Lazy<Standard> = Lazy::new(|| fiat_std(EnglishUS));
31 static HU : Lazy<Extended> = Lazy::new(|| fiat_ext(Hungarian));
32 static TR : Lazy<Standard> = Lazy::new(|| fiat_std(Turkish));
33 
34 
35 #[test]
collected_equals_original()36 fn collected_equals_original() {
37     fn property(original : String) -> bool {
38         let collected : String = EN_US.hyphenate(&original).iter().segments().collect();
39 
40         collected == original
41     }
42 
43     quickcheck(property as fn(String) -> bool);
44 }
45 
46 #[test]
opportunities_within_bounds()47 fn opportunities_within_bounds() {
48     fn property(s : String) -> TestResult {
49         let ci : Vec<_> = s.char_indices().collect();
50         let (l_min, r_min) = EnglishUS.minima();
51         let s_len = ci.len();
52         if s_len < l_min + r_min {
53             return TestResult::discard();
54         }
55 
56         let os : Vec<_> = EN_US.opportunities(&s);
57         let ((l, _), (r, _)) = (ci[l_min], ci[s_len - r_min]);
58         let within_bounds = |&i| i >= l && i <= r;
59 
60         TestResult::from_bool(os.iter().all(within_bounds))
61     }
62 
63     quickcheck(property as fn(String) -> TestResult);
64 }
65 
66 #[test]
basics_standard()67 fn basics_standard() {
68     // Standard hyphenation
69     let w0 = "anfractuous";
70     let w1 = "hypha"; // minimum hyphenable length
71                       // Exceptions
72     let ex0 = "hyphenation";
73     let ex1 = "bevies"; // unhyphenable (by exception)
74 
75     let h_w0 = EN_US.hyphenate(w0);
76     let h_w1 = EN_US.hyphenate(w1);
77     let h_ex0 = EN_US.hyphenate(ex0);
78     let h_ex1 = EN_US.hyphenate(ex1);
79 
80     let seg0 = h_w0.iter().segments();
81     let seg1 = h_ex0.iter().segments();
82     let seg2 = h_ex1.iter().segments();
83     let seg3 = h_w1.iter().segments();
84 
85     assert_eq!(seg0.size_hint(), (4, Some(4)));
86     assert_eq!(seg1.size_hint(), (4, Some(4)));
87     assert_eq!(seg2.size_hint(), (1, Some(1)));
88     assert_eq!(seg3.size_hint(), (2, Some(2)));
89 
90     let v0 : Vec<&str> = seg0.clone().collect();
91     let v1 : Vec<&str> = seg1.clone().collect();
92     let v2 : Vec<&str> = seg2.clone().collect();
93     let v3 : Vec<&str> = seg3.clone().collect();
94 
95     assert_eq!(v0, vec!["an", "frac", "tu", "ous"]);
96     assert_eq!(v1, vec!["hy", "phen", "a", "tion"]);
97     assert_eq!(v2, vec!["bevies"]);
98     assert_eq!(v3, vec!["hy", "pha"]);
99 
100     // Additional size checks for partially consumed iterators.
101     let mut seg2 = seg2;
102     seg2.next();
103     assert_eq!(seg2.size_hint(), (0, Some(0)));
104     seg2.next();
105     assert_eq!(seg2.size_hint(), (0, Some(0)));
106 
107     let mut seg3 = seg3;
108     seg3.next();
109     assert_eq!(seg3.size_hint(), (1, Some(1)));
110     seg3.next();
111     assert_eq!(seg3.size_hint(), (0, Some(0)));
112 }
113 
114 #[test]
basics_extended()115 fn basics_extended() {
116     let w0 = "asszonnyal";
117     let w1 = "esszé";
118 
119     let v0 : Vec<_> = HU.hyphenate(w0).into_iter().segments().collect();
120     let v1 : Vec<_> = HU.hyphenate(w1).into_iter().segments().collect();
121 
122     assert_eq!(v0, vec!["asz", "szony", "nyal"]);
123     assert_eq!(v1, vec!["esz", "szé"]);
124 }
125 
126 #[test]
special_casing()127 fn special_casing() {
128     let w0 = "İbrahim";
129     let v0 : Vec<_> = TR.hyphenate(&w0).into_iter().segments().collect();
130     assert_eq!(v0, vec!["İb", "ra", "him"]);
131 
132     let w1 = "İLGİNÇ";
133     let v1 : Vec<_> = TR.hyphenate(w1).into_iter().segments().collect();
134     assert_eq!(v1, vec!["İL", "GİNÇ"]);
135 
136     let w2 = "MİCRO";
137     let v2 : Vec<_> = EN_US.hyphenate(w2).into_iter().segments().collect();
138     assert_eq!(v2, vec!["Mİ", "CRO"]);
139 
140     let w4 = "İDİOM";
141     let v4 : Vec<_> = EN_US.hyphenate(w4).into_iter().segments().collect();
142     assert_eq!(v4, vec!["İD", "İOM"]);
143 
144     let w3 = "MUCİLAGİNOUS";
145     let v3 : Vec<_> = EN_US.hyphenate(w3).into_iter().segments().collect();
146     assert_eq!(v3, vec!["MU", "CİLAGİ", "NOUS"]);
147 }
148 
149 #[test]
language_mismatch_on_load()150 fn language_mismatch_on_load() {
151     let file = File::open("./dictionaries/mul-ethi.standard.bincode").unwrap();
152     let mut reader = BufReader::new(file);
153     assert!(Standard::from_reader(EnglishUS, &mut reader).is_err());
154 }
155 
156 #[test]
text()157 fn text() {
158     use unicode_segmentation::UnicodeSegmentation;
159 
160     let hyphenate_text = |text : &str| -> String {
161         text.split_word_bounds()
162             .flat_map(|word| EN_US.hyphenate(word).into_iter())
163             .collect()
164     };
165 
166     let t0 = "I know noble accents / And lucid, inescapable rhythms; […]";
167     let expect0 = "I know no-ble ac-cents / And lu-cid, in-escapable rhythms; […]";
168     let seg0 = hyphenate_text(t0);
169     assert_eq!(seg0, expect0);
170 
171     let t1 = "ever-burning sulphur unconsumed";
172     let expect1 = "ever-burn-ing sul-phur un-con-sumed";
173     let seg1 = hyphenate_text(t1);
174     assert_eq!(seg1, expect1);
175 }
176 
177 #[test]
bounded_exception()178 fn bounded_exception() {
179     let e = "anisotropic"; // an-iso-trop-ic, by exception
180 
181     let bounded = EN_US.exception(e);
182     let unbounded = EN_US.exception_within(e, (0, e.len()));
183 
184     assert_eq!(bounded, Some(vec![2, 5]));
185     assert_eq!(unbounded, Some(vec![2, 5, 9]));
186 }
187 
188 #[test]
readme_examples()189 fn readme_examples() {
190     let hyphenated = EN_US.hyphenate("hyphenation");
191 
192     let break_indices = &hyphenated.breaks;
193     assert_eq!(break_indices, &[2, 6, 7]);
194 
195     let marked = hyphenated.iter();
196     let collected : Vec<String> = marked.collect();
197     assert_eq!(collected, vec!["hy-", "phen-", "a-", "tion"]);
198 
199     let unmarked = hyphenated.iter().segments();
200     let collected : Vec<&str> = unmarked.collect();
201     assert_eq!(collected, vec!["hy", "phen", "a", "tion"]);
202 
203     let uppercase : Vec<_> = EN_US.hyphenate("CAPITAL").into_iter().segments().collect();
204     assert_eq!(uppercase, vec!["CAP", "I", "TAL"]);
205 }
206