1
2 /******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25 #include "newSmoothedNgramPredictorTest.h"
26 #include "../common/stringstreamPresageCallback.h"
27
28 #include "core/predictorRegistry.h"
29
30 #include <cstdio> // for remove()
31
32 CPPUNIT_TEST_SUITE_REGISTRATION( NewSmoothedNgramPredictorTest );
33
34 const char* NewSmoothedNgramPredictorTest::DATABASE = "new_database.db";
35 const size_t NewSmoothedNgramPredictorTest::CARDINALITY = 3;
36 const bool NewSmoothedNgramPredictorTest::READ_WRITE_MODE = true;
37
38 const int NewSmoothedNgramPredictorTest::SIZE = 20;
39
setUp()40 void NewSmoothedNgramPredictorTest::setUp()
41 {
42 remove(DATABASE);
43
44 // prepare database
45 SqliteDatabaseConnector db(DATABASE, CARDINALITY, READ_WRITE_MODE);
46 db.createUnigramTable();
47 db.createBigramTable();
48 db.createTrigramTable();
49
50 config = new Configuration();
51 // set context tracker config variables
52 config->insert ("Presage.ContextTracker.LOGGER", "ERROR");
53 config->insert ("Presage.ContextTracker.SLIDING_WINDOW_SIZE", "80");
54 config->insert ("Presage.ContextTracker.LOWERCASE_MODE", "no");
55 config->insert ("Presage.ContextTracker.ONLINE_LEARNING", "yes");
56
57 // set predictor registry config variables
58 config->insert ("Presage.PredictorRegistry.LOGGER", "ERROR");
59 config->insert ("Presage.PredictorRegistry.PREDICTORS", "SmoothedNgramPredictor");
60 // set predictor config variables
61 config->insert ("Presage.Predictors.SmoothedNgramPredictor.PREDICTOR", "SmoothedNgramPredictor");
62 config->insert ("Presage.Predictors.SmoothedNgramPredictor.LOGGER", "ERROR");
63 config->insert ("Presage.Predictors.SmoothedNgramPredictor.DELTAS", "0.001 0.01 0.889");
64 config->insert ("Presage.Predictors.SmoothedNgramPredictor.DBFILENAME", DATABASE);
65 config->insert ("Presage.Predictors.SmoothedNgramPredictor.LEARN", "true");
66 config->insert ("Presage.Predictors.SmoothedNgramPredictor.DatabaseConnector.LOGGER", "ERROR");
67
68 predictorRegistry = new PredictorRegistry(config);
69 stream = new std::stringstream();
70 callback = new StringstreamPresageCallback(*stream);
71 ct = new ContextTracker(config, predictorRegistry, callback);
72 }
73
tearDown()74 void NewSmoothedNgramPredictorTest::tearDown()
75 {
76 delete ct;
77 delete callback;
78 delete predictorRegistry;
79 delete config;
80
81 remove(DATABASE);
82 }
83
testOnlineLearning()84 void NewSmoothedNgramPredictorTest::testOnlineLearning()
85 {
86 // get pointer to predictor
87 Predictor* predictor = predictorRegistry->iterator().next();
88
89 {
90 *stream << "f";
91 ct->update();
92 Prediction actual = predictor->predict(SIZE, 0);
93 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
94 }
95
96 {
97 *stream << "o";
98 ct->update();
99 Prediction actual = predictor->predict(SIZE, 0);
100 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
101 }
102
103 {
104 *stream << "o ";
105 ct->update();
106 Prediction actual = predictor->predict(SIZE, 0);
107 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
108 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(0).getWord());
109 ct->update();
110 }
111
112 {
113 *stream << "bar";
114 ct->update();
115 Prediction actual = predictor->predict(SIZE, 0);
116 }
117
118 {
119 *stream << " ";
120 ct->update();
121 Prediction actual = predictor->predict(SIZE, 0);
122 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
123 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(0).getWord());
124 CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(1).getWord());
125 }
126
127 {
128 *stream << "foobar ";
129 ct->update();
130 Prediction actual = predictor->predict(SIZE, 0);
131 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
132 CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
133 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
134 CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(2).getWord());
135 }
136
137 {
138 *stream << "f";
139 ct->update();
140 Prediction actual = predictor->predict(SIZE, 0);
141 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
142 CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
143 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
144 }
145 }
146
testOfflineLearning()147 void NewSmoothedNgramPredictorTest::testOfflineLearning()
148 {
149 // turns off online learning
150 config->find("Presage.ContextTracker.ONLINE_LEARNING")->set_value("no");
151
152 // get pointer to predictor
153 Predictor* predictor = predictorRegistry->iterator().next();
154
155 {
156 *stream << "f";
157 ct->update();
158 Prediction actual = predictor->predict(SIZE, 0);
159 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
160 }
161
162 {
163 *stream << "o";
164 ct->update();
165 Prediction actual = predictor->predict(SIZE, 0);
166 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
167 }
168
169 {
170 *stream << "o ";
171 ct->update();
172 Prediction actual = predictor->predict(SIZE, 0);
173 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
174 }
175
176 {
177 *stream << "bar";
178 ct->update();
179 Prediction actual = predictor->predict(SIZE, 0);
180 }
181
182 {
183 *stream << " ";
184 ct->update();
185 Prediction actual = predictor->predict(SIZE, 0);
186 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
187 }
188
189 {
190 *stream << "foobar ";
191 ct->update();
192 Prediction actual = predictor->predict(SIZE, 0);
193 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
194 }
195
196 {
197 ct->learn(stream->str());
198 Prediction actual = predictor->predict(SIZE, 0);
199 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
200 CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
201 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
202 CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(2).getWord());
203 }
204
205 {
206 *stream << "f";
207 ct->update();
208 Prediction actual = predictor->predict(SIZE, 0);
209 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
210 CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
211 CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
212 }
213 }
214
testFilter()215 void NewSmoothedNgramPredictorTest::testFilter()
216 {
217 // get pointer to predictor
218 Predictor* predictor = predictorRegistry->iterator().next();
219
220 std::vector<std::string> change;
221 change.push_back("foo");
222 change.push_back("bar");
223 change.push_back("foobar");
224 change.push_back("foz");
225 change.push_back("baz");
226 change.push_back("fozbaz");
227 change.push_back("roo");
228 change.push_back("rar");
229 change.push_back("roobar");
230
231 // Learn some context so that we have data to create non-empty
232 // predictions
233 //
234 predictor->learn(change);
235
236 // Alternatively, predictor could have learnt thus...
237 // *stream << "foo bar foobar foz baz fozbaz roo rar roobar ";
238 // ct->update();
239
240 {
241 Prediction actual = predictor->predict(SIZE, 0);
242 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(9), actual.size());
243 }
244
245 {
246 const char* filters[] = {"f", 0};
247 Prediction actual = predictor->predict(SIZE, filters);
248 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
249 }
250
251 {
252 const char* filters[] = {"b", 0};
253 Prediction actual = predictor->predict(SIZE, filters);
254 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
255 }
256
257 {
258 const char* filters[] = {"r", 0};
259 Prediction actual = predictor->predict(SIZE, filters);
260 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
261 }
262
263 {
264 const char* filters[] = {"f", "b", 0};
265 Prediction actual = predictor->predict(SIZE, filters);
266 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(6), actual.size());
267 }
268
269 {
270 const char* filters[] = {"f", "r", 0};
271 Prediction actual = predictor->predict(SIZE, filters);
272 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(7), actual.size());
273 }
274
275 {
276 const char* filters[] = {"f", "b", "r", 0};
277 Prediction actual = predictor->predict(SIZE, filters);
278 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(9), actual.size());
279 }
280
281 {
282 const char* filters[] = {"fo", 0};
283 Prediction actual = predictor->predict(SIZE, filters);
284 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
285 }
286
287 {
288 const char* filters[] = {"foo", 0};
289 Prediction actual = predictor->predict(SIZE, filters);
290 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
291 }
292
293 {
294 const char* filters[] = {"fo", "ba", 0};
295 Prediction actual = predictor->predict(SIZE, filters);
296 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(6), actual.size());
297 }
298
299 {
300 const char* filters[] = {"fo", "ba", "ro", 0};
301 Prediction actual = predictor->predict(SIZE, filters);
302 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(8), actual.size());
303 }
304
305 {
306 const char* filters[] = {"foo", "bar", 0};
307 Prediction actual = predictor->predict(SIZE, filters);
308 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
309 }
310
311 {
312 const char* filters[] = {"foobar", "fozba", "roo", 0};
313 Prediction actual = predictor->predict(SIZE, filters);
314 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
315 }
316
317 {
318 const char* filters[] = {"foobar", "fozbaz", "roobar", 0};
319 Prediction actual = predictor->predict(SIZE, filters);
320 CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
321 }
322
323 }
324