1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1998 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black (and lots of books) */
34 /* Date : January 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Ordinary Least Squares/Linear regression */
37 /* */
38 /*=======================================================================*/
39 #include <cmath>
40 #include "EST_multistats.h"
41 #include "EST_simplestats.h"
42
43 static void ols_load_selected_feats(const EST_FMatrix &X,
44 const EST_IVector &included,
45 EST_FMatrix &Xl);
46 static int ols_stepwise_find_best(const EST_FMatrix &X,
47 const EST_FMatrix &Y,
48 EST_IVector &included,
49 EST_FMatrix &coeffs,
50 float &bscore,
51 int &best_feat,
52 const EST_FMatrix &Xtest,
53 const EST_FMatrix &Ytest,
54 const EST_StrList &feat_names
55 );
56
ols(const EST_FMatrix & X,const EST_FMatrix & Y,EST_FMatrix & coeffs)57 int ols(const EST_FMatrix &X,const EST_FMatrix &Y, EST_FMatrix &coeffs)
58 {
59 // Ordinary least squares, X contains the samples with 1 (for intercept)
60 // in column 0, Y contains the single values.
61 EST_FMatrix Xplus;
62
63 if (!pseudo_inverse(X,Xplus))
64 return FALSE;
65
66 multiply(Xplus,Y,coeffs);
67
68 return TRUE;
69 }
70
robust_ols(const EST_FMatrix & X,const EST_FMatrix & Y,EST_FMatrix & coeffs)71 int robust_ols(const EST_FMatrix &X,
72 const EST_FMatrix &Y,
73 EST_FMatrix &coeffs)
74 {
75 EST_IVector included;
76 int i;
77
78 included.resize(X.num_columns());
79 for (i=0; i<included.length(); i++)
80 included.a_no_check(i) = TRUE;
81
82 return robust_ols(X,Y,included,coeffs);
83 }
84
robust_ols(const EST_FMatrix & X,const EST_FMatrix & Y,EST_IVector & included,EST_FMatrix & coeffs)85 int robust_ols(const EST_FMatrix &X,
86 const EST_FMatrix &Y,
87 EST_IVector &included,
88 EST_FMatrix &coeffs)
89 {
90 // as with ols but if the pseudo inverse fails remove the offending
91 // features and try again until it works, this can be costly but
92 // its saves *you* from finding the singularity
93 // This expands the output and puts weights of 0 for omitted features
94 EST_FMatrix Xl;
95 EST_FMatrix coeffsl;
96 EST_FMatrix Xplus;
97 int i,j,singularity=-1;
98
99 if (X.num_rows() <= X.num_columns())
100 {
101 cerr << "OLS: less rows than columns, so cannot find solution."
102 << endl;
103 return FALSE;
104 }
105 if (X.num_columns() != included.length())
106 {
107 cerr << "OLS: `included' list wrong size: internal error."
108 << endl;
109 return FALSE;
110 }
111
112 while (TRUE)
113 {
114 ols_load_selected_feats(X,included,Xl);
115 if (pseudo_inverse(Xl,Xplus,singularity))
116 {
117 multiply(Xplus,Y,coeffsl);
118 break;
119 }
120 else
121 { // found a singularity so try again without that column
122 // remap singularity position back to X
123 int s;
124 for (s=i=0; i<singularity; i++)
125 {
126 s++;
127 while ((included(s) == FALSE) ||
128 (included(s) == OLS_IGNORE))
129 s++;
130 }
131 if (included(s) == FALSE)
132 { // oops
133 cerr << "OLS: found singularity twice, shouldn't happen"
134 << endl;
135 return FALSE;
136 }
137 else
138 {
139 cerr << "OLS: omitting singularity in column " << s << endl;
140 included[s] = FALSE;
141 }
142 }
143 }
144
145 // Map coefficients back, making coefficient 0 for singular cols
146 coeffs.resize(X.num_columns(),1);
147 for (j=i=0; i<X.num_columns(); i++)
148 if (included(i))
149 {
150 coeffs.a_no_check(i,0) = coeffsl(j,0);
151 j++;
152 }
153 else
154 coeffs.a_no_check(i,0) = 0.0;
155
156
157 return TRUE;
158
159 }
160
ols_load_selected_feats(const EST_FMatrix & X,const EST_IVector & included,EST_FMatrix & Xl)161 static void ols_load_selected_feats(const EST_FMatrix &X,
162 const EST_IVector &included,
163 EST_FMatrix &Xl)
164 {
165 int i,j,k,width;
166
167 for (width=i=0; i<included.length(); i++)
168 if (included(i) == TRUE)
169 width++;
170
171 Xl.resize(X.num_rows(),width);
172
173 for (i=0; i<X.num_rows(); i++)
174 for (k=j=0; j < X.num_columns(); j++)
175 if (included(j) == TRUE)
176 {
177 Xl.a_no_check(i,k) = X.a_no_check(i,j);
178 k++;
179 }
180
181 }
182
ols_apply(const EST_FMatrix & samples,const EST_FMatrix & coeffs,EST_FMatrix & res)183 int ols_apply(const EST_FMatrix &samples,
184 const EST_FMatrix &coeffs,
185 EST_FMatrix &res)
186 {
187 // Apply coefficients to samples for res.
188
189 if (samples.num_columns() != coeffs.num_rows())
190 return FALSE;
191
192 multiply(samples,coeffs,res);
193
194 return TRUE;
195 }
196
stepwise_ols(const EST_FMatrix & X,const EST_FMatrix & Y,const EST_StrList & feat_names,float limit,EST_FMatrix & coeffs,const EST_FMatrix & Xtest,const EST_FMatrix & Ytest,EST_IVector & included)197 int stepwise_ols(const EST_FMatrix &X,
198 const EST_FMatrix &Y,
199 const EST_StrList &feat_names,
200 float limit,
201 EST_FMatrix &coeffs,
202 const EST_FMatrix &Xtest,
203 const EST_FMatrix &Ytest,
204 EST_IVector &included)
205 {
206 // Find the features that contribute to the correlation using a
207 // a greedy algorithm
208
209 EST_FMatrix coeffsl;
210 float best_score=0.0,bscore;
211 int i,best_feat;
212 int nf=1; // for nice printing of progress
213
214 for (i=1; i < X.num_columns(); i++)
215 {
216 if (!ols_stepwise_find_best(X,Y,included,coeffsl,
217 bscore,best_feat,Xtest,Ytest,
218 feat_names))
219 {
220 cerr << "OLS: stepwise failed" << endl;
221 return FALSE;
222 }
223 if ((bscore - (bscore * (limit/100))) <= best_score)
224 break;
225 else
226 {
227 best_score = bscore;
228 coeffs = coeffsl;
229 included[best_feat] = TRUE;
230 printf("FEATURE %d %s: %2.4f\n",
231 nf,
232 (const char *)feat_names.nth(best_feat),
233 best_score);
234 fflush(stdout);
235 nf++;
236 }
237 }
238
239 return TRUE;
240 }
241
ols_stepwise_find_best(const EST_FMatrix & X,const EST_FMatrix & Y,EST_IVector & included,EST_FMatrix & coeffs,float & bscore,int & best_feat,const EST_FMatrix & Xtest,const EST_FMatrix & Ytest,const EST_StrList & feat_names)242 static int ols_stepwise_find_best(const EST_FMatrix &X,
243 const EST_FMatrix &Y,
244 EST_IVector &included,
245 EST_FMatrix &coeffs,
246 float &bscore,
247 int &best_feat,
248 const EST_FMatrix &Xtest,
249 const EST_FMatrix &Ytest,
250 const EST_StrList &feat_names
251 )
252 {
253 EST_FMatrix coeffsl;
254 bscore = 0;
255 best_feat = -1;
256 int i;
257
258 for (i=0; i < included.length(); i++)
259 {
260 if (included.a_no_check(i) == FALSE)
261 {
262 float cor, rmse;
263 EST_FMatrix pred;
264 included.a_no_check(i) = TRUE;
265 if (!robust_ols(X,Y,included,coeffsl))
266 return FALSE; // failed for some reason
267 ols_apply(Xtest,coeffsl,pred);
268 ols_test(Ytest,pred,cor,rmse);
269 printf("tested %d %s %f best %f\n",
270 i,(const char *)feat_names.nth(i),cor,bscore);
271 if (fabs(cor) > bscore)
272 {
273 bscore = fabs(cor);
274 coeffs = coeffsl;
275 best_feat = i;
276 }
277 included.a_no_check(i) = FALSE;
278 }
279 }
280
281 return TRUE;
282 }
283
ols_test(const EST_FMatrix & real,const EST_FMatrix & predicted,float & correlation,float & rmse)284 int ols_test(const EST_FMatrix &real,
285 const EST_FMatrix &predicted,
286 float &correlation,
287 float &rmse)
288 {
289 // Others probably want this function too
290 // return correlation and RMSE for col 0 in real and predicted
291 int i;
292 float p,r;
293 EST_SuffStats x,y,xx,yy,xy,se,e;
294 double error;
295 double v1,v2,v3;
296
297 if (real.num_rows() != predicted.num_rows())
298 return FALSE; // can't do this
299
300 for (i=0; i < real.num_rows(); i++)
301 {
302 r = real(i,0);
303 p = predicted(i,0);
304 x += p;
305 y += r;
306 error = p-r;
307 se += error*error;
308 e += fabs(error);
309 xx += p*p;
310 yy += r*r;
311 xy += p*r;
312 }
313
314 rmse = sqrt(se.mean());
315
316 v1 = xx.mean()-(x.mean()*x.mean());
317 v2 = yy.mean()-(y.mean()*y.mean());
318
319 v3 = v1*v2;
320
321 if (v3 <= 0)
322 { // happens when there's very little variation in x
323 correlation = 0;
324 rmse = se.mean();
325 return FALSE;
326 }
327 // Pearson's product moment correlation coefficient
328 correlation = (xy.mean() - (x.mean()*y.mean()))/ sqrt(v3);
329
330 // I hate to have to do this but it is necessary.
331 // When the the variation of X is very small v1*v2 approaches
332 // 0 (the negative and equals case is caught above) but that
333 // may not be enough when v1 or v2 are very small but positive.
334 // So I catch it here. If I knew more math I'd be able to describe
335 // this better but the code would remain the same.
336 if ((correlation <= 1.0) && (correlation >= -1.0))
337 return TRUE;
338 else
339 {
340 correlation = 0;
341 return FALSE;
342 }
343 }
344