1 
2 /******************************************************
3  *  Presage, an extensible predictive text entry system
4  *  ---------------------------------------------------
5  *
6  *  Copyright (C) 2008  Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21                                                                              *
22                                                                 **********(*)*/
23 
24 
25 #include "recencyPredictor.h"
26 
27 #include <math.h>  // for exp()
28 
RecencyPredictor(Configuration * config,ContextTracker * ct,const char * name)29 RecencyPredictor::RecencyPredictor(Configuration* config, ContextTracker* ct, const char* name)
30     : Predictor(config,
31 		ct,
32 		name,
33 		"RecencyPredictor, a statistical recency promotion predictor",
34 		"RecencyPredictor, based on a recency promotion principle, generates predictions by assigning exponentially decaying probability values to previously encountered tokens. Tokens are assigned a probability value that decays exponentially with their distance from the current token, thereby promoting context recency." ),
35       dispatcher (this)
36 {
37     // RecencyPredictor config variables
38     LOGGER           = PREDICTORS + name + ".LOGGER";
39     LAMBDA           = PREDICTORS + name + ".LAMBDA";
40     N_0              = PREDICTORS + name + ".N_0";
41     CUTOFF_THRESHOLD = PREDICTORS + name + ".CUTOFF_THRESHOLD";
42 
43     // init default values
44     lambda = 1;
45     n_0 = 1;
46     cutoff_threshold = 20;
47 
48     dispatcher.map(config->find (LOGGER),            &RecencyPredictor::set_logger);
49     dispatcher.map(config->find (LAMBDA),            &RecencyPredictor::set_lambda);
50     dispatcher.map(config->find (N_0),               &RecencyPredictor::set_n_0);
51     dispatcher.map(config->find (CUTOFF_THRESHOLD),  &RecencyPredictor::set_cutoff_threshold);
52 }
53 
~RecencyPredictor()54 RecencyPredictor::~RecencyPredictor()
55 {
56     // complete
57 }
58 
set_lambda(const std::string & value)59 void RecencyPredictor::set_lambda (const std::string& value)
60 {
61     lambda = Utility::toDouble(value);
62     logger << INFO << "LAMBDA: " << value << endl;
63 }
64 
set_n_0(const std::string & value)65 void RecencyPredictor::set_n_0 (const std::string& value)
66 {
67     n_0 = Utility::toDouble (value);
68     logger << INFO << "N_0: " << value << endl;
69 }
70 
71 
set_cutoff_threshold(const std::string & value)72 void RecencyPredictor::set_cutoff_threshold (const std::string& value)
73 {
74     cutoff_threshold = Utility::toInt (value);
75     logger << INFO << "CUTOFF_THRESHOLD: " << value << endl;
76 }
77 
78 
predict(const size_t max,const char ** filter) const79 Prediction RecencyPredictor::predict (const size_t max, const char** filter) const
80 {
81     Prediction result;
82 
83     std::string prefix = contextTracker->getPrefix();
84     logger << INFO << "prefix: " << prefix << endl;
85     if (!prefix.empty()) {
86         // Only build recency prediction if prefix is not empty: when
87         // prefix is empty, all previosly seen tokens are candidates
88         // for prediction. This is not desirable, because it means
89         // that recency prediction reduces to repetion of max previous
90         // tokens (i.e. the prediction would contain the most recent
91         // tokens in reverse order).
92         //
93         Suggestion  suggestion;
94         size_t      index = 1;
95         std::string token = contextTracker->getToken(index);
96 	double      prob = 0;
97         while (!token.empty()                // context history exhausted
98 	       && result.size() < max        // need only max suggestions
99 	       && index <= cutoff_threshold  // look back only as far as cutoff
100 	    ) {
101 	    logger << INFO << "token: " << token << endl;
102 
103             if (token.find(prefix) == 0) { // if token starts with prefix
104 
105 		if (token_satisfies_filter (token, prefix, filter)) {
106 		    // compute probability according to exponential decay
107 		    // formula
108 		    //
109 		    prob = n_0 * exp(-(lambda * (index - 1)));
110 		    logger << INFO << "probability: " << prob << endl;
111 		    suggestion.setWord(token);
112 		    suggestion.setProbability(prob);
113 		    result.addSuggestion(suggestion);
114 		}
115 
116             }
117 
118             index++;
119             token = contextTracker->getToken(index);
120         }
121     }
122 
123     return result;
124 }
125 
learn(const std::vector<std::string> & change)126 void RecencyPredictor::learn(const std::vector<std::string>& change)
127 {}
128 
update(const Observable * var)129 void RecencyPredictor::update (const Observable* var)
130 {
131     logger << DEBUG << "About to invoke dispatcher: " << var->get_name () << " - " << var->get_value() << endl;
132     dispatcher.dispatch (var);
133 }
134