1 
2 /******************************************************
3  *  Presage, an extensible predictive text entry system
4  *  ---------------------------------------------------
5  *
6  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21                                                                              *
22                                                                 **********(*)*/
23 
24 
25 #include "newSmoothedNgramPredictorTest.h"
26 #include "../common/stringstreamPresageCallback.h"
27 
28 #include "core/predictorRegistry.h"
29 
30 #include <cstdio>  // for remove()
31 
32 CPPUNIT_TEST_SUITE_REGISTRATION( NewSmoothedNgramPredictorTest );
33 
34 const char*  NewSmoothedNgramPredictorTest::DATABASE = "new_database.db";
35 const size_t NewSmoothedNgramPredictorTest::CARDINALITY = 3;
36 const bool   NewSmoothedNgramPredictorTest::READ_WRITE_MODE = true;
37 
38 const int    NewSmoothedNgramPredictorTest::SIZE     = 20;
39 
setUp()40 void NewSmoothedNgramPredictorTest::setUp()
41 {
42     remove(DATABASE);
43 
44     // prepare database
45     SqliteDatabaseConnector db(DATABASE, CARDINALITY, READ_WRITE_MODE);
46     db.createUnigramTable();
47     db.createBigramTable();
48     db.createTrigramTable();
49 
50     config = new Configuration();
51     // set context tracker config variables
52     config->insert ("Presage.ContextTracker.LOGGER", "ERROR");
53     config->insert ("Presage.ContextTracker.SLIDING_WINDOW_SIZE", "80");
54     config->insert ("Presage.ContextTracker.LOWERCASE_MODE", "no");
55     config->insert ("Presage.ContextTracker.ONLINE_LEARNING", "yes");
56 
57     // set predictor registry config variables
58     config->insert ("Presage.PredictorRegistry.LOGGER", "ERROR");
59     config->insert ("Presage.PredictorRegistry.PREDICTORS", "SmoothedNgramPredictor");
60     // set predictor config variables
61     config->insert ("Presage.Predictors.SmoothedNgramPredictor.PREDICTOR", "SmoothedNgramPredictor");
62     config->insert ("Presage.Predictors.SmoothedNgramPredictor.LOGGER", "ERROR");
63     config->insert ("Presage.Predictors.SmoothedNgramPredictor.DELTAS", "0.001 0.01 0.889");
64     config->insert ("Presage.Predictors.SmoothedNgramPredictor.DBFILENAME", DATABASE);
65     config->insert ("Presage.Predictors.SmoothedNgramPredictor.LEARN", "true");
66     config->insert ("Presage.Predictors.SmoothedNgramPredictor.DatabaseConnector.LOGGER", "ERROR");
67 
68     predictorRegistry = new PredictorRegistry(config);
69     stream = new std::stringstream();
70     callback = new StringstreamPresageCallback(*stream);
71     ct = new ContextTracker(config, predictorRegistry, callback);
72 }
73 
tearDown()74 void NewSmoothedNgramPredictorTest::tearDown()
75 {
76     delete ct;
77     delete callback;
78     delete predictorRegistry;
79     delete config;
80 
81     remove(DATABASE);
82 }
83 
testOnlineLearning()84 void NewSmoothedNgramPredictorTest::testOnlineLearning()
85 {
86     // get pointer to predictor
87     Predictor* predictor = predictorRegistry->iterator().next();
88 
89     {
90 	*stream << "f";
91 	ct->update();
92 	Prediction actual = predictor->predict(SIZE, 0);
93 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
94     }
95 
96     {
97 	*stream << "o";
98 	ct->update();
99 	Prediction actual = predictor->predict(SIZE, 0);
100 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
101     }
102 
103     {
104 	*stream << "o ";
105 	ct->update();
106 	Prediction actual = predictor->predict(SIZE, 0);
107 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), actual.size());
108 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(0).getWord());
109 	ct->update();
110     }
111 
112     {
113 	*stream << "bar";
114 	ct->update();
115 	Prediction actual = predictor->predict(SIZE, 0);
116     }
117 
118     {
119 	*stream << " ";
120 	ct->update();
121 	Prediction actual = predictor->predict(SIZE, 0);
122 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
123 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(0).getWord());
124 	CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(1).getWord());
125     }
126 
127     {
128 	*stream << "foobar ";
129 	ct->update();
130 	Prediction actual = predictor->predict(SIZE, 0);
131 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
132 	CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
133 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
134 	CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(2).getWord());
135     }
136 
137     {
138 	*stream << "f";
139 	ct->update();
140 	Prediction actual = predictor->predict(SIZE, 0);
141 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
142 	CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
143 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
144     }
145 }
146 
testOfflineLearning()147 void NewSmoothedNgramPredictorTest::testOfflineLearning()
148 {
149     // turns off online learning
150     config->find("Presage.ContextTracker.ONLINE_LEARNING")->set_value("no");
151 
152     // get pointer to predictor
153     Predictor* predictor = predictorRegistry->iterator().next();
154 
155     {
156 	*stream << "f";
157 	ct->update();
158 	Prediction actual = predictor->predict(SIZE, 0);
159 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
160     }
161 
162     {
163 	*stream << "o";
164 	ct->update();
165 	Prediction actual = predictor->predict(SIZE, 0);
166 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
167     }
168 
169     {
170 	*stream << "o ";
171 	ct->update();
172 	Prediction actual = predictor->predict(SIZE, 0);
173 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
174     }
175 
176     {
177 	*stream << "bar";
178 	ct->update();
179 	Prediction actual = predictor->predict(SIZE, 0);
180     }
181 
182     {
183 	*stream << " ";
184 	ct->update();
185 	Prediction actual = predictor->predict(SIZE, 0);
186 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
187     }
188 
189     {
190 	*stream << "foobar ";
191 	ct->update();
192 	Prediction actual = predictor->predict(SIZE, 0);
193 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), actual.size());
194     }
195 
196     {
197 	ct->learn(stream->str());
198 	Prediction actual = predictor->predict(SIZE, 0);
199 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
200 	CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
201 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
202 	CPPUNIT_ASSERT_EQUAL(std::string("bar"), actual.getSuggestion(2).getWord());
203     }
204 
205     {
206 	*stream << "f";
207 	ct->update();
208 	Prediction actual = predictor->predict(SIZE, 0);
209 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
210 	CPPUNIT_ASSERT_EQUAL(std::string("foobar"), actual.getSuggestion(0).getWord());
211 	CPPUNIT_ASSERT_EQUAL(std::string("foo"), actual.getSuggestion(1).getWord());
212     }
213 }
214 
testFilter()215 void NewSmoothedNgramPredictorTest::testFilter()
216 {
217     // get pointer to predictor
218     Predictor* predictor = predictorRegistry->iterator().next();
219 
220     std::vector<std::string> change;
221     change.push_back("foo");
222     change.push_back("bar");
223     change.push_back("foobar");
224     change.push_back("foz");
225     change.push_back("baz");
226     change.push_back("fozbaz");
227     change.push_back("roo");
228     change.push_back("rar");
229     change.push_back("roobar");
230 
231     // Learn some context so that we have data to create non-empty
232     // predictions
233     //
234     predictor->learn(change);
235 
236     // Alternatively, predictor could have learnt thus...
237     //    *stream << "foo bar foobar foz baz fozbaz roo rar roobar ";
238     //    ct->update();
239 
240     {
241 	Prediction actual = predictor->predict(SIZE, 0);
242 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(9), actual.size());
243     }
244 
245     {
246 	const char* filters[] = {"f", 0};
247 	Prediction actual = predictor->predict(SIZE, filters);
248 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
249     }
250 
251     {
252 	const char* filters[] = {"b", 0};
253 	Prediction actual = predictor->predict(SIZE, filters);
254 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
255     }
256 
257     {
258 	const char* filters[] = {"r", 0};
259 	Prediction actual = predictor->predict(SIZE, filters);
260 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
261     }
262 
263     {
264 	const char* filters[] = {"f", "b", 0};
265 	Prediction actual = predictor->predict(SIZE, filters);
266 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(6), actual.size());
267     }
268 
269     {
270 	const char* filters[] = {"f", "r", 0};
271 	Prediction actual = predictor->predict(SIZE, filters);
272 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(7), actual.size());
273     }
274 
275     {
276 	const char* filters[] = {"f", "b", "r", 0};
277 	Prediction actual = predictor->predict(SIZE, filters);
278 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(9), actual.size());
279     }
280 
281     {
282 	const char* filters[] = {"fo", 0};
283 	Prediction actual = predictor->predict(SIZE, filters);
284 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
285     }
286 
287     {
288 	const char* filters[] = {"foo", 0};
289 	Prediction actual = predictor->predict(SIZE, filters);
290 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), actual.size());
291     }
292 
293     {
294 	const char* filters[] = {"fo", "ba", 0};
295 	Prediction actual = predictor->predict(SIZE, filters);
296 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(6), actual.size());
297     }
298 
299     {
300 	const char* filters[] = {"fo", "ba", "ro", 0};
301 	Prediction actual = predictor->predict(SIZE, filters);
302 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(8), actual.size());
303     }
304 
305     {
306 	const char* filters[] = {"foo", "bar", 0};
307 	Prediction actual = predictor->predict(SIZE, filters);
308 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
309     }
310 
311     {
312 	const char* filters[] = {"foobar", "fozba", "roo", 0};
313 	Prediction actual = predictor->predict(SIZE, filters);
314 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), actual.size());
315     }
316 
317     {
318 	const char* filters[] = {"foobar", "fozbaz", "roobar", 0};
319 	Prediction actual = predictor->predict(SIZE, filters);
320 	CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), actual.size());
321     }
322 
323 }
324