1
2 /******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25 #include "recencyPredictor.h"
26
27 #include <math.h> // for exp()
28
RecencyPredictor(Configuration * config,ContextTracker * ct,const char * name)29 RecencyPredictor::RecencyPredictor(Configuration* config, ContextTracker* ct, const char* name)
30 : Predictor(config,
31 ct,
32 name,
33 "RecencyPredictor, a statistical recency promotion predictor",
34 "RecencyPredictor, based on a recency promotion principle, generates predictions by assigning exponentially decaying probability values to previously encountered tokens. Tokens are assigned a probability value that decays exponentially with their distance from the current token, thereby promoting context recency." ),
35 dispatcher (this)
36 {
37 // RecencyPredictor config variables
38 LOGGER = PREDICTORS + name + ".LOGGER";
39 LAMBDA = PREDICTORS + name + ".LAMBDA";
40 N_0 = PREDICTORS + name + ".N_0";
41 CUTOFF_THRESHOLD = PREDICTORS + name + ".CUTOFF_THRESHOLD";
42
43 // init default values
44 lambda = 1;
45 n_0 = 1;
46 cutoff_threshold = 20;
47
48 dispatcher.map(config->find (LOGGER), &RecencyPredictor::set_logger);
49 dispatcher.map(config->find (LAMBDA), &RecencyPredictor::set_lambda);
50 dispatcher.map(config->find (N_0), &RecencyPredictor::set_n_0);
51 dispatcher.map(config->find (CUTOFF_THRESHOLD), &RecencyPredictor::set_cutoff_threshold);
52 }
53
~RecencyPredictor()54 RecencyPredictor::~RecencyPredictor()
55 {
56 // complete
57 }
58
set_lambda(const std::string & value)59 void RecencyPredictor::set_lambda (const std::string& value)
60 {
61 lambda = Utility::toDouble(value);
62 logger << INFO << "LAMBDA: " << value << endl;
63 }
64
set_n_0(const std::string & value)65 void RecencyPredictor::set_n_0 (const std::string& value)
66 {
67 n_0 = Utility::toDouble (value);
68 logger << INFO << "N_0: " << value << endl;
69 }
70
71
set_cutoff_threshold(const std::string & value)72 void RecencyPredictor::set_cutoff_threshold (const std::string& value)
73 {
74 cutoff_threshold = Utility::toInt (value);
75 logger << INFO << "CUTOFF_THRESHOLD: " << value << endl;
76 }
77
78
predict(const size_t max,const char ** filter) const79 Prediction RecencyPredictor::predict (const size_t max, const char** filter) const
80 {
81 Prediction result;
82
83 std::string prefix = contextTracker->getPrefix();
84 logger << INFO << "prefix: " << prefix << endl;
85 if (!prefix.empty()) {
86 // Only build recency prediction if prefix is not empty: when
87 // prefix is empty, all previosly seen tokens are candidates
88 // for prediction. This is not desirable, because it means
89 // that recency prediction reduces to repetion of max previous
90 // tokens (i.e. the prediction would contain the most recent
91 // tokens in reverse order).
92 //
93 Suggestion suggestion;
94 size_t index = 1;
95 std::string token = contextTracker->getToken(index);
96 double prob = 0;
97 while (!token.empty() // context history exhausted
98 && result.size() < max // need only max suggestions
99 && index <= cutoff_threshold // look back only as far as cutoff
100 ) {
101 logger << INFO << "token: " << token << endl;
102
103 if (token.find(prefix) == 0) { // if token starts with prefix
104
105 if (token_satisfies_filter (token, prefix, filter)) {
106 // compute probability according to exponential decay
107 // formula
108 //
109 prob = n_0 * exp(-(lambda * (index - 1)));
110 logger << INFO << "probability: " << prob << endl;
111 suggestion.setWord(token);
112 suggestion.setProbability(prob);
113 result.addSuggestion(suggestion);
114 }
115
116 }
117
118 index++;
119 token = contextTracker->getToken(index);
120 }
121 }
122
123 return result;
124 }
125
learn(const std::vector<std::string> & change)126 void RecencyPredictor::learn(const std::vector<std::string>& change)
127 {}
128
update(const Observable * var)129 void RecencyPredictor::update (const Observable* var)
130 {
131 logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl;
132 dispatcher.dispatch (var);
133 }
134