1
2 ///////////////////////////////////////////////////////////
3 // //
4 // SAGA //
5 // //
6 // System for Automated Geoscientific Analyses //
7 // //
8 // Application Programming Interface //
9 // //
10 // Library: SAGA_API //
11 // //
12 //-------------------------------------------------------//
13 // //
14 // mat_regression_multiple.cpp //
15 // //
16 // Copyright (C) 2005 by Olaf Conrad //
17 // //
18 //-------------------------------------------------------//
19 // //
20 // This file is part of 'SAGA - System for Automated //
21 // Geoscientific Analyses'. //
22 // //
23 // This library is free software; you can redistribute //
24 // it and/or modify it under the terms of the GNU Lesser //
25 // General Public License as published by the Free //
26 // Software Foundation, either version 2.1 of the //
27 // License, or (at your option) any later version. //
28 // //
29 // This library is distributed in the hope that it will //
30 // be useful, but WITHOUT ANY WARRANTY; without even the //
31 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
32 // PARTICULAR PURPOSE. See the GNU Lesser General Public //
33 // License for more details. //
34 // //
35 // You should have received a copy of the GNU Lesser //
36 // General Public License along with this program; if //
37 // not, see <http://www.gnu.org/licenses/>. //
38 // //
39 //-------------------------------------------------------//
40 // //
41 // contact: Olaf Conrad //
42 // Institute of Geography //
43 // University of Goettingen //
44 // Goldschmidtstr. 5 //
45 // 37077 Hamburg //
46 // Germany //
47 // //
48 // e-mail: oconrad@saga-gis.org //
49 // //
50 ///////////////////////////////////////////////////////////
51
52 //---------------------------------------------------------
53 #include "mat_tools.h"
54
55
56 ///////////////////////////////////////////////////////////
57 // //
58 // //
59 // //
60 ///////////////////////////////////////////////////////////
61
62 //---------------------------------------------------------
CSG_Regression_Weighted(void)63 CSG_Regression_Weighted::CSG_Regression_Weighted(void)
64 {
65 m_r2 = -1.0;
66
67 m_Log_maxIter = 30;
68 m_Log_Epsilon = 0.001;
69 m_Log_Difference = 1000.;
70 }
71
72 //---------------------------------------------------------
~CSG_Regression_Weighted(void)73 CSG_Regression_Weighted::~CSG_Regression_Weighted(void)
74 {
75 Destroy();
76 }
77
78 //---------------------------------------------------------
Destroy(void)79 bool CSG_Regression_Weighted::Destroy(void)
80 {
81 m_r2 = -1.0;
82
83 m_y.Destroy();
84 m_w.Destroy();
85 m_X.Destroy();
86 m_b.Destroy();
87
88 return( true );
89 }
90
91
92 ///////////////////////////////////////////////////////////
93 // //
94 ///////////////////////////////////////////////////////////
95
96 //---------------------------------------------------------
Add_Sample(double Weight,double Dependent,const CSG_Vector & Predictors)97 bool CSG_Regression_Weighted::Add_Sample(double Weight, double Dependent, const CSG_Vector &Predictors)
98 {
99 if( m_X.Get_NRows() == 0 )
100 {
101 m_X.Create(Predictors.Get_N() + 1, 1);
102 }
103 else if( m_X.Get_NCols() == Predictors.Get_N() + 1 )
104 {
105 m_X.Add_Row();
106 }
107 else
108 {
109 return( false );
110 }
111
112 m_w.Add_Row(Weight );
113 m_y.Add_Row(Dependent);
114
115 double *y = m_X[m_X.Get_NRows() - 1];
116
117 y[0] = 1.0;
118
119 for(int i=0; i<Predictors.Get_N(); i++)
120 {
121 y[i + 1] = Predictors[i];
122 }
123
124 return( true );
125 }
126
127
128 ///////////////////////////////////////////////////////////
129 // //
130 ///////////////////////////////////////////////////////////
131
132 //---------------------------------------------------------
Calculate(const CSG_Vector & Weights,const CSG_Vector & Dependents,const CSG_Matrix & Predictors,bool bLogistic)133 bool CSG_Regression_Weighted::Calculate(const CSG_Vector &Weights, const CSG_Vector &Dependents, const CSG_Matrix &Predictors, bool bLogistic)
134 {
135 Destroy();
136
137 if( Weights.Get_N() == Dependents.Get_N() && Weights.Get_N() == Predictors.Get_NRows() )
138 {
139 for(int i=0; i<Weights.Get_N(); i++)
140 {
141 Add_Sample(Weights[i], Dependents[i], Predictors.Get_Row(i));
142
143 return( Calculate(bLogistic) );
144 }
145 }
146
147 return( false );
148 }
149
150 //---------------------------------------------------------
Calculate(bool bLogistic)151 bool CSG_Regression_Weighted::Calculate(bool bLogistic)
152 {
153 //-----------------------------------------------------
154 int i, nSamples, nPredictors;
155
156 if( (nSamples = m_w.Get_N()) <= (nPredictors = m_X.Get_NCols() - 1) || nSamples < 2 )
157 {
158 return( false );
159 }
160
161 //-----------------------------------------------------
162 if( bLogistic )
163 {
164 m_b = _Log_Get_Beta(m_X, m_y, m_w);
165
166 if( m_b.Get_N() == 0 )
167 {
168 return( false );
169 }
170 }
171 else
172 {
173 CSG_Matrix YtW(nSamples, 1 + nPredictors);
174
175 for(i=0; i<nSamples; i++)
176 {
177 YtW[0][i] = m_w[i];
178
179 for(int j=1; j<=nPredictors; j++)
180 {
181 YtW[j][i] = m_w[i] * m_X[i][j];
182 }
183 }
184
185 m_b = (YtW * m_X).Get_Inverse() * (YtW * m_y);
186 }
187
188 //-----------------------------------------------------
189 CSG_Simple_Statistics yStats(m_y);
190
191 double rss = 0.0, tss = 0.0;
192
193 for(i=0; i<nSamples; i++)
194 {
195 double yr = m_b[0];
196
197 for(int j=1; j<=nPredictors; j++)
198 {
199 yr += m_b[j] * m_X[i][j];
200 }
201
202 if( bLogistic )
203 {
204 yr = 1. / (1. + exp(-yr));
205 }
206
207 rss += m_w[i] * SG_Get_Square(m_y[i] - yr);
208 tss += m_w[i] * SG_Get_Square(m_y[i] - yStats.Get_Mean());
209 }
210
211 //-----------------------------------------------------
212 if( tss > 0.0 && tss >= rss )
213 {
214 m_r2 = fabs(tss - rss) / tss;
215
216 return( true );
217 }
218
219 m_r2 = -1.0;
220
221 return( false );
222 }
223
224
225 ///////////////////////////////////////////////////////////
226 // //
227 ///////////////////////////////////////////////////////////
228
229 //---------------------------------------------------------
230 // Zhang, D., Ren, N., and Hou, X. (2018):
231 // An improved logistic regression model based on a spatially weighted technique
232 // (ILRBSWT v1.0) and its application to mineral prospectivity mapping.
233 // Geosci. Model Dev., 11, 2525-2539, https://doi.org/10.5194/gmd-11-2525-2018.
234
235 //---------------------------------------------------------
Set_Log_maxIter(int maxIter)236 bool CSG_Regression_Weighted::Set_Log_maxIter(int maxIter)
237 {
238 if( maxIter > 0 )
239 {
240 m_Log_maxIter = maxIter;
241
242 return( true );
243 }
244
245 return( false );
246 }
247
248 //---------------------------------------------------------
Set_Log_Epsilon(double Epsilon)249 bool CSG_Regression_Weighted::Set_Log_Epsilon(double Epsilon)
250 {
251 if( Epsilon >= 0. )
252 {
253 m_Log_Epsilon = Epsilon;
254
255 return( true );
256 }
257
258 return( false );
259 }
260
261 //---------------------------------------------------------
Set_Log_Difference(double Difference)262 bool CSG_Regression_Weighted::Set_Log_Difference(double Difference)
263 {
264 if( Difference > 0. )
265 {
266 m_Log_Difference = Difference;
267
268 return( true );
269 }
270
271 return( false );
272 }
273
274 //---------------------------------------------------------
_Log_Get_Beta(const CSG_Matrix & X,const CSG_Vector & y,const CSG_Vector & w)275 CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w)
276 {
277 CSG_Vector b(X.Get_NCols()), b_best;
278
279 CSG_Vector p = _Log_Get_Props(X, b);
280
281 for(int i=0; i<m_Log_maxIter; ++i)
282 {
283 CSG_Vector b_new = _Log_Get_Beta(b, X, y, w, p);
284
285 if( b_new.Get_N() == 0 )
286 {
287 return( b_best );
288 }
289
290 for(int j=0; j<b_new.Get_N(); ++j)
291 {
292 if( SG_is_NaN(b_new[j]) )
293 {
294 return( b_best );
295 }
296 }
297
298 if( _Log_NoChange(b, b_new) )
299 {
300 return( b_new );
301 }
302
303 if( _Log_OutOfControl(b, b_new) )
304 {
305 return( b_best );
306 }
307
308 p = _Log_Get_Props(X, b_new);
309 b = b_new;
310 b_best = b;
311 }
312
313 return( b_best );
314 }
315
316 //---------------------------------------------------------
_Log_Get_Beta(const CSG_Vector & b,const CSG_Matrix & X,const CSG_Vector & y,const CSG_Vector & w,const CSG_Vector & p)317 CSG_Vector CSG_Regression_Weighted::_Log_Get_Beta(const CSG_Vector &b, const CSG_Matrix &X, const CSG_Vector &y, const CSG_Vector &w, const CSG_Vector &p)
318 {
319 CSG_Matrix Xt = X.Get_Transpose() ; // X'
320 CSG_Matrix M = Xt * _Log_Get_Xwp(p, X, w); // X'* w * V(t-1) * X
321 CSG_Matrix N = M.Get_Inverse() * Xt ; // inv(X'* w * V(t-1) * X) * X'
322 CSG_Vector v = N * _Log_Get_Ywp(p, y, w); // inv(X'* w * V(t-1) * X) * X' * w * (y - p(t-1))
323
324 if( v.Get_N() == b.Get_N() )
325 {
326 return( b + v );
327 }
328
329 return( CSG_Vector() );
330 }
331
332 //---------------------------------------------------------
_Log_Get_Xwp(const CSG_Vector & p,const CSG_Matrix & X,const CSG_Vector & w)333 CSG_Matrix CSG_Regression_Weighted::_Log_Get_Xwp(const CSG_Vector &p, const CSG_Matrix &X, const CSG_Vector &w)
334 {
335 CSG_Matrix Xwp;
336
337 if( p.Get_N() == X.Get_NRows() && Xwp.Create(X.Get_NCols(), X.Get_NRows()) )
338 {
339 for(int i=0; i<X.Get_NRows(); ++i)
340 {
341 for(int j=0; j<X.Get_NCols(); ++j)
342 {
343 Xwp[i][j] = w[i] * p[i] * (1. - p[i]) * X[i][j]; // compute W(u) * V(u) * X
344 }
345 }
346 }
347
348 return( Xwp );
349 }
350
351 //---------------------------------------------------------
_Log_Get_Ywp(const CSG_Vector & p,const CSG_Vector & y,const CSG_Vector & w)352 CSG_Vector CSG_Regression_Weighted::_Log_Get_Ywp(const CSG_Vector &p, const CSG_Vector &y, const CSG_Vector &w)
353 {
354 CSG_Vector Ywp(y.Get_N());
355
356 if( y.Get_N() == p.Get_N() && Ywp.Create(y.Get_N()) )
357 {
358 for(int i=0; i<Ywp.Get_N(); i++)
359 {
360 Ywp[i] = w[i] * (y[i] - p[i]); // compute W(u) * (y - p)
361 }
362 }
363
364 return( Ywp );
365 }
366
367 //---------------------------------------------------------
_Log_Get_Props(const CSG_Matrix & X,const CSG_Vector & b)368 CSG_Vector CSG_Regression_Weighted::_Log_Get_Props(const CSG_Matrix &X, const CSG_Vector &b)
369 {
370 CSG_Vector p(X.Get_NRows());
371
372 for(int i=0; i<X.Get_NRows(); ++i)
373 {
374 double z = 0.;
375
376 for(int j=0; j<X.Get_NCols(); ++j)
377 {
378 z += X[i][j] * b[j];
379 }
380
381 p[i] = 1. / (1. + exp(-z));
382 }
383
384 return( p );
385 }
386
387 //---------------------------------------------------------
_Log_NoChange(const CSG_Vector & b_old,const CSG_Vector & b_new)388 bool CSG_Regression_Weighted::_Log_NoChange(const CSG_Vector &b_old, const CSG_Vector &b_new)
389 {
390 for(int i=0; i<b_old.Get_N(); ++i)
391 {
392 if( fabs(b_old[i] - b_new[i]) > m_Log_Epsilon )
393 {
394 return( false );
395 }
396 }
397
398 return( true ); // if the new beta is equal to the old one under certain accuracy
399 }
400
401 //---------------------------------------------------------
_Log_OutOfControl(const CSG_Vector & b_old,const CSG_Vector & b_new)402 bool CSG_Regression_Weighted::_Log_OutOfControl(const CSG_Vector &b_old, const CSG_Vector &b_new)
403 {
404 for(int i=0; i<b_old.Get_N(); ++i)
405 {
406 if( b_old[i] == 0.0 )
407 {
408 return( false );
409 }
410
411 if( fabs(b_old[i] - b_new[i]) / fabs(b_old[i]) > m_Log_Difference )
412 {
413 return( true );
414 }
415 }
416
417 return( false ); // if the new beta has obvious difference with the old one
418 }
419
420
421 ///////////////////////////////////////////////////////////
422 // //
423 ///////////////////////////////////////////////////////////
424
425 //---------------------------------------------------------
Get_CrossValidation(int nSubSamples)426 bool CSG_Regression_Weighted::Get_CrossValidation(int nSubSamples)
427 {
428 /* if( Get_Predictor_Count() <= 1 )
429 {
430 return( false );
431 }
432
433 //-----------------------------------------------------
434 CSG_Regression_Weighted Model;
435 CSG_Simple_Statistics Stats, SR, SE;
436
437 int i, nModels = 0;
438
439 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
440 {
441 Stats += m_Samples_Model[i][0];
442 }
443
444 //-----------------------------------------------------
445 // leave-one-out cross validation (LOOCV)
446 if( nSubSamples <= 1 || nSubSamples > m_Samples_Model.Get_NRows() / 2 )
447 {
448 for(i=0; i<m_Samples_Model.Get_NRows() && SG_UI_Process_Get_Okay(); i++)
449 {
450 CSG_Vector w(m_w); w.Del_Row(i); Model
451 CSG_Vector x(m_y); x.Del_Row(i);
452 CSG_Matrix Y(m_X); Y.Del_Row(i);
453
454 if( Model.Calculate(w, x, Y) )
455 {
456 nModels++;
457
458 double dObsrv = m_Samples_Model[i][0];
459 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, m_Samples_Model[i] + 1));
460
461 SE += SG_Get_Square(dModel - dObsrv);
462 SR += SG_Get_Square(dModel - (Stats.Get_Sum() - dObsrv) / Samples.Get_NRows());
463 }
464 }
465 }
466
467 //-----------------------------------------------------
468 // k-fold cross validation
469 else
470 {
471 int *SubSet = new int[m_Samples_Model.Get_NRows()];
472
473 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
474 {
475 SubSet[i] = i % nSubSamples;
476 }
477
478 //-------------------------------------------------
479 for(int iSubSet=0; iSubSet<nSubSamples && SG_UI_Process_Get_Okay(); iSubSet++)
480 {
481 CSG_Simple_Statistics Samples_Stats;
482 CSG_Matrix Samples(m_Samples_Model), Validation;
483
484 for(i=Samples.Get_NRows()-1; i>=0; i--)
485 {
486 if( SubSet[i] == iSubSet )
487 {
488 Validation.Add_Row(Samples.Get_Row(i));
489 Samples .Del_Row(i);
490 }
491 else
492 {
493 Samples_Stats += Samples[i][0];
494 }
495 }
496
497 //---------------------------------------------
498 if( Model.Get_Model(Samples) )
499 {
500 nModels++;
501
502 for(i=0; i<Validation.Get_NRows(); i++)
503 {
504 double dObsrv = Validation[i][0];
505 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, Validation[i] + 1));
506
507 SE += SG_Get_Square(dModel - dObsrv);
508 SR += SG_Get_Square(dModel - Samples_Stats.Get_Mean());
509 }
510 }
511 }
512
513 delete[](SubSet);
514 }
515
516 //-----------------------------------------------------
517 m_CV_RMSE = sqrt(SE.Get_Mean());
518 m_CV_NRMSE = sqrt(SE.Get_Mean()) / Stats.Get_Range();
519 m_CV_R2 = SR.Get_Sum() / (SR.Get_Sum() + SE.Get_Sum());
520 m_CV_nSamples = nModels;
521 */
522 //-----------------------------------------------------
523 return( true );
524 }
525
526
527 ///////////////////////////////////////////////////////////
528 // //
529 // //
530 // //
531 ///////////////////////////////////////////////////////////
532
533 //---------------------------------------------------------
534