1 extern crate fst;
2 extern crate fst_levenshtein;
3 extern crate fst_regex;
4
5 use fst_levenshtein::Levenshtein;
6 use fst_regex::Regex;
7
8 use fst::{Automaton, IntoStreamer, Streamer};
9 use fst::automaton::{Str, Subsequence};
10 use fst::raw::{Builder, Fst, Output};
11 use fst::set::{Set, OpBuilder};
12
13 static WORDS: &'static str = include_str!("../data/words-10000");
14
get_set() -> Set15 fn get_set() -> Set {
16 Set::from_iter(WORDS.lines()).unwrap()
17 }
18
fst_set<I, S>(ss: I) -> Fst where I: IntoIterator<Item=S>, S: AsRef<[u8]>19 fn fst_set<I, S>(ss: I) -> Fst
20 where I: IntoIterator<Item=S>, S: AsRef<[u8]> {
21 let mut bfst = Builder::memory();
22 let mut ss: Vec<Vec<u8>> =
23 ss.into_iter().map(|s| s.as_ref().to_vec()).collect();
24 ss.sort();
25 for s in ss.iter().into_iter() {
26 bfst.add(s).unwrap();
27 }
28 let fst = Fst::from_bytes(bfst.into_inner().unwrap()).unwrap();
29 ss.dedup();
30 assert_eq!(fst.len(), ss.len());
31 fst
32 }
33
34 #[test]
regex_simple()35 fn regex_simple() {
36 let set = fst_set(vec!["abc", "abd", "ayz", "za"]);
37 let re = Regex::new("a[a-z]*").unwrap();
38 let mut rdr = set.search(&re).ge("abd").lt("ax").into_stream();
39 assert_eq!(rdr.next(), Some(("abd".as_bytes(), Output::zero())));
40 assert!(rdr.next().is_none());
41 }
42
43 #[test]
levenshtein_simple()44 fn levenshtein_simple() {
45 let set = fst_set(vec!["woof", "wood", "banana"]);
46 let q = Levenshtein::new("woog", 1).unwrap();
47 let vs = set.search(&q).into_stream().into_byte_keys();
48 assert_eq!(vs, vec!["wood".as_bytes(), "woof".as_bytes()]);
49 }
50
51 #[test]
levenshtein_unicode()52 fn levenshtein_unicode() {
53 let set = fst_set(vec!["woof", "wood", "banana", "☃snowman☃"]);
54 let q = Levenshtein::new("snoman", 3).unwrap();
55 let vs = set.search(&q).into_stream().into_byte_keys();
56 assert_eq!(vs, vec!["☃snowman☃".as_bytes()]);
57 }
58
59 #[test]
complement_small()60 fn complement_small() {
61 let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
62 let set = Set::from_iter(keys).unwrap();
63 let lev = Levenshtein::new("foo", 1).unwrap();
64 let stream = set.search(lev.complement()).into_stream();
65
66 let keys = stream.into_strs().unwrap();
67 assert_eq!(keys, vec!["fa", "focus", "foul"]);
68 }
69
70 #[test]
startswith_small()71 fn startswith_small() {
72 let keys = vec![
73 "", "cooing", "fa", "fo", "fob", "focus", "foo", "food", "foul",
74 "fritter", "frothing",
75 ];
76 let set = Set::from_iter(keys).unwrap();
77 let lev = Levenshtein::new("foo", 1).unwrap();
78 let stream = set.search(lev.starts_with()).into_stream();
79
80 let keys = stream.into_strs().unwrap();
81 assert_eq!(keys, vec![
82 "cooing", "fo", "fob", "focus", "foo", "food", "foul", "frothing",
83 ]);
84 }
85
86 #[test]
intersection_small()87 fn intersection_small() {
88 let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
89 let set = Set::from_iter(keys).unwrap();
90 let lev = Levenshtein::new("foo", 1).unwrap();
91 let reg = Regex::new("(..)*").unwrap();
92 let stream = set.search(lev.intersection(reg)).into_stream();
93
94 let keys = stream.into_strs().unwrap();
95 assert_eq!(keys, vec!["fo", "food"]);
96 }
97
98 #[test]
union_small()99 fn union_small() {
100 let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"];
101 let set = Set::from_iter(keys).unwrap();
102 let lev = Levenshtein::new("foo", 1).unwrap();
103 let reg = Regex::new("(..)*").unwrap();
104 let stream = set.search(lev.union(reg)).into_stream();
105
106 let keys = stream.into_strs().unwrap();
107 assert_eq!(keys, vec!["fa", "fo", "fob", "foo", "food", "foul"]);
108 }
109
110 #[test]
intersection_large()111 fn intersection_large() {
112 let set = get_set();
113 let lev = Levenshtein::new("foo", 3).unwrap();
114 let reg = Regex::new("(..)*").unwrap();
115 let mut stream1 = set.search((&lev).intersection(®)).into_stream();
116 let mut stream2 = OpBuilder::new()
117 .add(set.search(&lev))
118 .add(set.search(®))
119 .intersection();
120 while let Some(key1) = stream1.next() {
121 assert_eq!(stream2.next(), Some(key1));
122 }
123 assert_eq!(stream2.next(), None);
124 }
125
126 #[test]
union_large()127 fn union_large() {
128 let set = get_set();
129 let lev = Levenshtein::new("foo", 3).unwrap();
130 let reg = Regex::new("(..)*").unwrap();
131 let mut stream1 = set.search((&lev).union(®)).into_stream();
132 let mut stream2 = OpBuilder::new()
133 .add(set.search(&lev))
134 .add(set.search(®))
135 .union();
136 while let Some(key1) = stream1.next() {
137 assert_eq!(stream2.next(), Some(key1));
138 }
139 assert_eq!(stream2.next(), None);
140 }
141
142 #[test]
str()143 fn str() {
144 let set = get_set();
145
146 let exact = Str::new("vatican");
147 let mut stream = set.search(&exact).into_stream();
148 assert_eq!(stream.next().unwrap(), b"vatican");
149 assert_eq!(stream.next(), None);
150
151 let exact_mismatch = Str::new("abracadabra");
152 let mut stream = set.search(&exact_mismatch).into_stream();
153 assert_eq!(stream.next(), None);
154
155 let starts_with = Str::new("vati").starts_with();
156 let mut stream = set.search(&starts_with).into_stream();
157 assert_eq!(stream.next().unwrap(), b"vatican");
158 assert_eq!(stream.next().unwrap(), b"vation");
159 assert_eq!(stream.next(), None);
160 }
161
162 #[test]
subsequence()163 fn subsequence() {
164 let set = get_set();
165 let subseq = Subsequence::new("aab");
166 let regex = Regex::new(".*a.*a.*b.*").unwrap();
167 let mut stream1 = set.search(&subseq).into_stream();
168 let mut stream2 = set.search(®ex).into_stream();
169 while let Some(key1) = stream1.next() {
170 assert_eq!(stream2.next(), Some(key1));
171 }
172 assert_eq!(stream2.next(), None);
173 }
174
175 #[test]
implements_default()176 fn implements_default() {
177 let map: fst::Map = Default::default();
178 assert!(map.is_empty());
179
180 let set: fst::Set = Default::default();
181 assert!(set.is_empty());
182 }
183