1 /**********************************************************
2 * Version $Id$
3 *********************************************************/
4
5 ///////////////////////////////////////////////////////////
6 // //
7 // SAGA //
8 // //
9 // System for Automated Geoscientific Analyses //
10 // //
11 // Application Programming Interface //
12 // //
13 // Library: SAGA_API //
14 // //
15 //-------------------------------------------------------//
16 // //
17 // mat_regression_multiple.cpp //
18 // //
19 // Copyright (C) 2005 by Olaf Conrad //
20 // //
21 //-------------------------------------------------------//
22 // //
23 // This file is part of 'SAGA - System for Automated //
24 // Geoscientific Analyses'. //
25 // //
26 // This library is free software; you can redistribute //
27 // it and/or modify it under the terms of the GNU Lesser //
28 // General Public License as published by the Free //
29 // Software Foundation, either version 2.1 of the //
30 // License, or (at your option) any later version. //
31 // //
32 // This library is distributed in the hope that it will //
33 // be useful, but WITHOUT ANY WARRANTY; without even the //
34 // implied warranty of MERCHANTABILITY or FITNESS FOR A //
35 // PARTICULAR PURPOSE. See the GNU Lesser General Public //
36 // License for more details. //
37 // //
38 // You should have received a copy of the GNU Lesser //
39 // General Public License along with this program; if //
40 // not, see <http://www.gnu.org/licenses/>. //
41 // //
42 //-------------------------------------------------------//
43 // //
44 // contact: Olaf Conrad //
45 // Institute of Geography //
46 // University of Goettingen //
47 // Goldschmidtstr. 5 //
48 // 37077 Goettingen //
49 // Germany //
50 // //
51 // e-mail: oconrad@saga-gis.org //
52 // //
53 ///////////////////////////////////////////////////////////
54
55 //---------------------------------------------------------
56
57
58 ///////////////////////////////////////////////////////////
59 // //
60 // //
61 // //
62 ///////////////////////////////////////////////////////////
63
64 //---------------------------------------------------------
65 #include "mat_tools.h"
66 #include "table.h"
67
68
69 ///////////////////////////////////////////////////////////
70 // //
71 // //
72 // //
73 ///////////////////////////////////////////////////////////
74
75 //---------------------------------------------------------
76 enum ESG_Multiple_Regression_Info_Model
77 {
78 MLR_MODEL_R2 = 0,
79 MLR_MODEL_R2_ADJ,
80 MLR_MODEL_SE,
81 MLR_MODEL_SSR,
82 MLR_MODEL_SSE,
83 MLR_MODEL_SST,
84 MLR_MODEL_MSR,
85 MLR_MODEL_MSE,
86 MLR_MODEL_F,
87 MLR_MODEL_SIG,
88 MLR_MODEL_NPREDICT,
89 MLR_MODEL_NSAMPLES,
90 MLR_MODEL_CV_MSE,
91 MLR_MODEL_CV_RMSE,
92 MLR_MODEL_CV_NRMSE,
93 MLR_MODEL_CV_R2,
94 MLR_MODEL_CV_NSAMPLES
95 };
96
97 //---------------------------------------------------------
98 enum ESG_Multiple_Regression_Info_Steps
99 {
100 MLR_STEP_NR = 0,
101 MLR_STEP_R,
102 MLR_STEP_R2,
103 MLR_STEP_R2_ADJ,
104 MLR_STEP_SE,
105 MLR_STEP_SSR,
106 MLR_STEP_SSE,
107 MLR_STEP_MSR,
108 MLR_STEP_MSE,
109 MLR_STEP_DF,
110 MLR_STEP_F,
111 MLR_STEP_SIG,
112 MLR_STEP_VAR_F,
113 MLR_STEP_VAR_SIG,
114 MLR_STEP_DIR,
115 MLR_STEP_VAR
116 };
117
118
119 ///////////////////////////////////////////////////////////
120 // //
121 // //
122 // //
123 ///////////////////////////////////////////////////////////
124
125 //---------------------------------------------------------
CSG_Regression_Multiple(bool bIntercept)126 CSG_Regression_Multiple::CSG_Regression_Multiple(bool bIntercept)
127 {
128 m_pRegression = new CSG_Table;
129
130 m_pRegression ->Add_Field("VAR_ID" , SG_DATATYPE_Int); // MLR_VAR_ID
131 m_pRegression ->Add_Field("VAR_NAME" , SG_DATATYPE_String); // MLR_VAR_NAME
132 m_pRegression ->Add_Field("REGCOEFF" , SG_DATATYPE_Double); // MLR_VAR_RCOEFF
133 m_pRegression ->Add_Field("R" , SG_DATATYPE_Double); // MLR_VAR_R
134 m_pRegression ->Add_Field("R2" , SG_DATATYPE_Double); // MLR_VAR_R2
135 m_pRegression ->Add_Field("R2_ADJ" , SG_DATATYPE_Double); // MLR_VAR_R2_ADJ
136 m_pRegression ->Add_Field("STD_ERROR" , SG_DATATYPE_Double); // MLR_VAR_SE
137 m_pRegression ->Add_Field("T" , SG_DATATYPE_Double); // MLR_VAR_T
138 m_pRegression ->Add_Field("SIG" , SG_DATATYPE_Double); // MLR_VAR_SIG
139 m_pRegression ->Add_Field("P" , SG_DATATYPE_Double); // MLR_VAR_P
140
141 //-----------------------------------------------------
142 m_pSteps = new CSG_Table;
143
144 m_pSteps ->Add_Field("MODEL" , SG_DATATYPE_Int); // MLR_STEP_NR
145 m_pSteps ->Add_Field("R" , SG_DATATYPE_Double); // MLR_STEP_R
146 m_pSteps ->Add_Field("R2" , SG_DATATYPE_Double); // MLR_STEP_R2
147 m_pSteps ->Add_Field("R2_ADJ" , SG_DATATYPE_Double); // MLR_STEP_R2_ADJ
148 m_pSteps ->Add_Field("STD_ERROR" , SG_DATATYPE_Double); // MLR_STEP_SE
149 m_pSteps ->Add_Field("SSR" , SG_DATATYPE_Double); // MLR_STEP_SSR
150 m_pSteps ->Add_Field("SSE" , SG_DATATYPE_Double); // MLR_STEP_SSE
151 m_pSteps ->Add_Field("MSR" , SG_DATATYPE_Double); // MLR_STEP_MSR
152 m_pSteps ->Add_Field("MSE" , SG_DATATYPE_Double); // MLR_STEP_MSE
153 m_pSteps ->Add_Field("DF" , SG_DATATYPE_Double); // MLR_STEP_DF
154 m_pSteps ->Add_Field("F" , SG_DATATYPE_Double); // MLR_STEP_F
155 m_pSteps ->Add_Field("SIG" , SG_DATATYPE_Double); // MLR_STEP_SIG
156 m_pSteps ->Add_Field("VAR_F" , SG_DATATYPE_Double); // MLR_STEP_VAR_F
157 m_pSteps ->Add_Field("VAR_SIG" , SG_DATATYPE_Double); // MLR_STEP_VAR_SIG
158 m_pSteps ->Add_Field("DIR" , SG_DATATYPE_String); // MLR_STEP_DIR
159 m_pSteps ->Add_Field("VARIABLE" , SG_DATATYPE_String); // MLR_STEP_VAR
160
161 //-----------------------------------------------------
162 m_pModel = new CSG_Table;
163
164 m_pModel ->Add_Field("PARAMETER" , SG_DATATYPE_String);
165 m_pModel ->Add_Field("VALUE" , SG_DATATYPE_Double);
166
167 m_pModel ->Add_Record()->Set_Value(0, SG_T("R2" )); // MLR_MODEL_R2
168 m_pModel ->Add_Record()->Set_Value(0, SG_T("R2_ADJ" )); // MLR_MODEL_R2_ADJ
169 m_pModel ->Add_Record()->Set_Value(0, SG_T("STD_ERROR" )); // MLR_MODEL_SE
170 m_pModel ->Add_Record()->Set_Value(0, SG_T("SSR" )); // MLR_MODEL_SSR
171 m_pModel ->Add_Record()->Set_Value(0, SG_T("SSE" )); // MLR_MODEL_SSE
172 m_pModel ->Add_Record()->Set_Value(0, SG_T("SST" )); // MLR_MODEL_SST
173 m_pModel ->Add_Record()->Set_Value(0, SG_T("MSR" )); // MLR_MODEL_MSR
174 m_pModel ->Add_Record()->Set_Value(0, SG_T("MSE" )); // MLR_MODEL_MSE
175 m_pModel ->Add_Record()->Set_Value(0, SG_T("F" )); // MLR_MODEL_F
176 m_pModel ->Add_Record()->Set_Value(0, SG_T("SIG" )); // MLR_MODEL_SIG
177 m_pModel ->Add_Record()->Set_Value(0, SG_T("PREDICTORS" )); // MLR_MODEL_NPREDICT
178 m_pModel ->Add_Record()->Set_Value(0, SG_T("SAMPLES" )); // MLR_MODEL_NSAMPLES
179 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_MSE" )); // MLR_MODEL_CV_MSE
180 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_RMSE" )); // MLR_MODEL_CV_RMSE
181 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_NRMSE" )); // MLR_MODEL_CV_RMSE
182 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_R2" )); // MLR_MODEL_CV_R2
183 m_pModel ->Add_Record()->Set_Value(0, SG_T("CV_SAMPLES" )); // MLR_MODEL_CV_NSAMPLES
184
185 //-----------------------------------------------------
186 m_Predictor = NULL;
187 m_nPredictors = 0;
188
189 m_bIntercept = bIntercept;
190 }
191
192 //---------------------------------------------------------
~CSG_Regression_Multiple(void)193 CSG_Regression_Multiple::~CSG_Regression_Multiple(void)
194 {
195 Destroy();
196
197 delete(m_pRegression);
198 delete(m_pModel);
199 delete(m_pSteps);
200 }
201
202
203 ///////////////////////////////////////////////////////////
204 // //
205 // //
206 // //
207 ///////////////////////////////////////////////////////////
208
209 //---------------------------------------------------------
Destroy(void)210 void CSG_Regression_Multiple::Destroy(void)
211 {
212 m_Names .Clear ();
213 m_Samples .Destroy();
214 m_Samples_Model.Destroy();
215
216 m_pRegression ->Del_Records();
217 m_pSteps ->Del_Records();
218
219 for(int i=0; i<m_pModel->Get_Count(); i++)
220 {
221 m_pModel->Get_Record(i)->Set_NoData(1);
222 }
223
224 if( m_Predictor )
225 {
226 delete[](m_bIncluded);
227 delete[](m_Predictor);
228
229 m_Predictor = NULL;
230 m_nPredictors = 0;
231 }
232 }
233
234
235 ///////////////////////////////////////////////////////////
236 // //
237 // //
238 // //
239 ///////////////////////////////////////////////////////////
240
241 //---------------------------------------------------------
Set_Data(const CSG_Matrix & Samples,CSG_Strings * pNames)242 bool CSG_Regression_Multiple::Set_Data(const CSG_Matrix &Samples, CSG_Strings *pNames)
243 {
244 Destroy();
245
246 int i, nPredictors = Samples.Get_NCols() - 1;
247
248 if( nPredictors < 1 || Samples.Get_NRows() <= nPredictors )
249 {
250 return( false );
251 }
252
253 //-------------------------------------------------
254 for(i=0; i<=nPredictors; i++)
255 {
256 m_Names += pNames && pNames->Get_Count() == Samples.Get_NCols() ? pNames->Get_String(i) : i == 0
257 ? CSG_String::Format(SG_T( "%s"), _TL("Dependent"))
258 : CSG_String::Format(SG_T("%d. %s"), i + 1, _TL("Predictor"));
259 }
260
261 m_Samples = Samples;
262
263 m_bIncluded = new int[nPredictors];
264 m_Predictor = new int[nPredictors];
265
266 //-------------------------------------------------
267 return( true );
268 }
269
270 //---------------------------------------------------------
_Initialize(bool bInclude)271 bool CSG_Regression_Multiple::_Initialize(bool bInclude)
272 {
273 int i, nPredictors = m_Samples.Get_NCols() - 1;
274
275 if( nPredictors < 1 || m_Samples.Get_NRows() <= nPredictors )
276 {
277 return( false );
278 }
279
280 //-------------------------------------------------
281 if( bInclude )
282 {
283 m_nPredictors = nPredictors;
284 m_Samples_Model.Create(m_Samples);
285 }
286 else
287 {
288 m_nPredictors = 0;
289 m_Samples_Model.Set_Col(m_Samples.Get_Col(0));
290 }
291
292 for(i=0; i<nPredictors; i++)
293 {
294 m_Predictor[i] = i;
295 m_bIncluded[i] = bInclude;
296 }
297
298 for(i=0; i<m_pModel->Get_Count(); i++)
299 {
300 m_pModel->Get_Record(i)->Set_NoData(1);
301 }
302
303 //-------------------------------------------------
304 return( true );
305 }
306
307
308 ///////////////////////////////////////////////////////////
309 // //
310 // //
311 // //
312 ///////////////////////////////////////////////////////////
313
314 //---------------------------------------------------------
Get_Model(const CSG_Matrix & Samples,CSG_Strings * pNames)315 bool CSG_Regression_Multiple::Get_Model(const CSG_Matrix &Samples, CSG_Strings *pNames)
316 {
317 return( Set_Data(Samples, pNames) && Get_Model() );
318 }
319
Get_Model_Forward(const CSG_Matrix & Samples,double P_in,CSG_Strings * pNames)320 bool CSG_Regression_Multiple::Get_Model_Forward(const CSG_Matrix &Samples, double P_in, CSG_Strings *pNames)
321 {
322 return( Set_Data(Samples, pNames) && Get_Model_Forward(P_in) );
323 }
324
Get_Model_Backward(const CSG_Matrix & Samples,double P_out,CSG_Strings * pNames)325 bool CSG_Regression_Multiple::Get_Model_Backward(const CSG_Matrix &Samples, double P_out, CSG_Strings *pNames)
326 {
327 return( Set_Data(Samples, pNames) && Get_Model_Backward(P_out) );
328 }
329
Get_Model_Stepwise(const CSG_Matrix & Samples,double P_in,double P_out,CSG_Strings * pNames)330 bool CSG_Regression_Multiple::Get_Model_Stepwise(const CSG_Matrix &Samples, double P_in, double P_out, CSG_Strings *pNames)
331 {
332 return( Set_Data(Samples, pNames) && Get_Model_Stepwise(P_in, P_out) );
333 }
334
335 //---------------------------------------------------------
Get_Model(void)336 bool CSG_Regression_Multiple::Get_Model(void)
337 {
338 return( _Initialize(true) && _Get_Regression(m_Samples) );
339 }
340
341 //---------------------------------------------------------
Get_Model_Forward(double P_in)342 bool CSG_Regression_Multiple::Get_Model_Forward(double P_in)
343 {
344 if( _Initialize(false) )
345 {
346 double R2 = 0.0;
347
348 while( _Get_Step_In(m_Samples_Model, P_in, R2, m_Samples) >= 0 );
349
350 return( _Set_Step_Info(m_Samples_Model) );
351 }
352
353 return( false );
354 }
355
356 //---------------------------------------------------------
Get_Model_Backward(double P_out)357 bool CSG_Regression_Multiple::Get_Model_Backward(double P_out)
358 {
359 if( _Initialize(true) )
360 {
361 double R2 = 0.0;
362
363 while( _Get_Step_Out(m_Samples_Model, P_out, R2) >= 0 );
364
365 return( _Set_Step_Info(m_Samples_Model) );
366 }
367
368 return( false );
369 }
370
371 //---------------------------------------------------------
Get_Model_Stepwise(double P_in,double P_out)372 bool CSG_Regression_Multiple::Get_Model_Stepwise(double P_in, double P_out)
373 {
374 if( _Initialize(false) )
375 {
376 double R2 = 0.0;
377
378 if( P_out <= P_in )
379 {
380 P_out = P_in + 0.001;
381 }
382
383 while( _Get_Step_In(m_Samples_Model, P_in, R2, m_Samples) >= 0 && SG_UI_Process_Get_Okay() )
384 {
385 if( m_nPredictors > 1 )
386 {
387 _Get_Step_Out(m_Samples_Model, P_out, R2);
388 }
389 }
390
391 return( _Set_Step_Info(m_Samples_Model) );
392 }
393
394 return( false );
395 }
396
397
398 ///////////////////////////////////////////////////////////
399 // //
400 // //
401 // //
402 ///////////////////////////////////////////////////////////
403
404 //---------------------------------------------------------
Get_CrossValidation(int nSubSamples)405 bool CSG_Regression_Multiple::Get_CrossValidation(int nSubSamples)
406 {
407 if( m_Samples_Model.Get_NCols() <= 1 )
408 {
409 return( false );
410 }
411
412 //-----------------------------------------------------
413 CSG_Regression_Multiple Model(m_bIntercept);
414 CSG_Simple_Statistics Stats, SR, SE;
415
416 int i, nModels = 0;
417
418 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
419 {
420 Stats += m_Samples_Model[i][0];
421 }
422
423 //-----------------------------------------------------
424 // leave-one-out cross validation (LOOCV)
425 if( nSubSamples <= 1 || nSubSamples > m_Samples_Model.Get_NRows() / 2 )
426 {
427 for(i=0; i<m_Samples_Model.Get_NRows() && SG_UI_Process_Get_Okay(); i++)
428 {
429 CSG_Matrix Samples(m_Samples_Model);
430 Samples.Del_Row(i);
431
432 if( Model.Get_Model(Samples) )
433 {
434 nModels++;
435
436 double dObsrv = m_Samples_Model[i][0];
437 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, m_Samples_Model[i] + 1));
438
439 SE += SG_Get_Square(dModel - dObsrv);
440 SR += SG_Get_Square(dModel - (Stats.Get_Sum() - dObsrv) / Samples.Get_NRows());
441 }
442 }
443 }
444
445 //-----------------------------------------------------
446 // k-fold cross validation
447 else
448 {
449 int *SubSet = new int[m_Samples_Model.Get_NRows()];
450
451 for(i=0; i<m_Samples_Model.Get_NRows(); i++)
452 {
453 SubSet[i] = i % nSubSamples;
454 }
455
456 //-------------------------------------------------
457 for(int iSubSet=0; iSubSet<nSubSamples && SG_UI_Process_Get_Okay(); iSubSet++)
458 {
459 CSG_Simple_Statistics Samples_Stats;
460 CSG_Matrix Samples(m_Samples_Model), Validation;
461
462 for(i=Samples.Get_NRows()-1; i>=0; i--)
463 {
464 if( SubSet[i] == iSubSet )
465 {
466 Validation.Add_Row(Samples.Get_Row(i));
467 Samples .Del_Row(i);
468 }
469 else
470 {
471 Samples_Stats += Samples[i][0];
472 }
473 }
474
475 //---------------------------------------------
476 if( Model.Get_Model(Samples) )
477 {
478 nModels++;
479
480 for(i=0; i<Validation.Get_NRows(); i++)
481 {
482 double dObsrv = Validation[i][0];
483 double dModel = Model.Get_Value(CSG_Vector(m_nPredictors, Validation[i] + 1));
484
485 SE += SG_Get_Square(dModel - dObsrv);
486 SR += SG_Get_Square(dModel - Samples_Stats.Get_Mean());
487 }
488 }
489 }
490
491 delete[](SubSet);
492 }
493
494 //-----------------------------------------------------
495 m_pModel->Get_Record(MLR_MODEL_CV_MSE )->Set_Value(1, SE.Get_Mean());
496 m_pModel->Get_Record(MLR_MODEL_CV_RMSE )->Set_Value(1, sqrt(SE.Get_Mean()));
497 m_pModel->Get_Record(MLR_MODEL_CV_NRMSE )->Set_Value(1, sqrt(SE.Get_Mean()) / Stats.Get_Range());
498 m_pModel->Get_Record(MLR_MODEL_CV_R2 )->Set_Value(1, SR.Get_Sum() / (SR.Get_Sum() + SE.Get_Sum()));
499 m_pModel->Get_Record(MLR_MODEL_CV_NSAMPLES)->Set_Value(1, nModels);
500
501 //-----------------------------------------------------
502 return( true );
503 }
504
505
506 ///////////////////////////////////////////////////////////
507 // //
508 // //
509 // //
510 ///////////////////////////////////////////////////////////
511
512 //---------------------------------------------------------
Get_Value(const CSG_Vector & Predictors) const513 double CSG_Regression_Multiple::Get_Value(const CSG_Vector &Predictors) const
514 {
515 double Value;
516
517 Get_Value(Predictors, Value);
518
519 return( Value );
520 }
521
Get_Value(const CSG_Vector & Predictors,double & Value) const522 bool CSG_Regression_Multiple::Get_Value(const CSG_Vector &Predictors, double &Value) const
523 {
524 if( m_nPredictors == Predictors.Get_N() )
525 {
526 Value = Get_RConst();
527
528 for(int i=0; i<m_nPredictors; i++)
529 {
530 Value += Get_RCoeff(i) * Predictors(i);
531 }
532
533 return( true );
534 }
535
536 Value = 0.0;
537
538 return( false );
539 }
540
541 //---------------------------------------------------------
Get_Residual(int iSample) const542 double CSG_Regression_Multiple::Get_Residual(int iSample) const
543 {
544 double Value;
545
546 Get_Residual(iSample, Value);
547
548 return( Value );
549 }
550
Get_Residual(int iSample,double & Residual) const551 bool CSG_Regression_Multiple::Get_Residual(int iSample, double &Residual) const
552 {
553 if( iSample >= 0 && iSample < m_Samples_Model.Get_NRows() )
554 {
555 Residual = Get_RConst();
556
557 for(int i=0; i<m_nPredictors; i++)
558 {
559 Residual += Get_RCoeff(i) * m_Samples_Model[iSample][1 + i];
560 }
561
562 Residual -= m_Samples_Model[iSample][0];
563
564 return( true );
565 }
566
567 Residual = 0.0;
568
569 return( false );
570 }
571
572 //---------------------------------------------------------
Get_Residuals(CSG_Vector & Residuals) const573 bool CSG_Regression_Multiple::Get_Residuals(CSG_Vector &Residuals) const
574 {
575 Residuals.Create(m_Samples_Model.Get_NRows());
576
577 for(int i=0; i<Residuals.Get_N(); i++)
578 {
579 Get_Residual(i, Residuals[i]);
580 }
581
582 return( Residuals.Get_N() > 0 );
583 }
584
585
586 ///////////////////////////////////////////////////////////
587 // //
588 // //
589 // //
590 ///////////////////////////////////////////////////////////
591
592 //---------------------------------------------------------
_Get_F(int nPredictors,int nSamples,double r2_full,double r2_reduced)593 double CSG_Regression_Multiple::_Get_F(int nPredictors, int nSamples, double r2_full, double r2_reduced)
594 {
595 return( (nSamples - nPredictors - 1) * (r2_full - r2_reduced) / (1.0 - r2_full) );
596 }
597
598 //---------------------------------------------------------
_Get_P(int nPredictors,int nSamples,double r2_full,double r2_reduced)599 double CSG_Regression_Multiple::_Get_P(int nPredictors, int nSamples, double r2_full, double r2_reduced)
600 {
601 double f = (nSamples - nPredictors - 1) * (r2_full - r2_reduced) / (1.0 - r2_full);
602
603 return( CSG_Test_Distribution::Get_F_Tail(f, nPredictors, nSamples - nPredictors - 1, TESTDIST_TYPE_Right) );
604 }
605
606
607 ///////////////////////////////////////////////////////////
608 // //
609 // //
610 // //
611 ///////////////////////////////////////////////////////////
612
613 //---------------------------------------------------------
_Get_Regression(const CSG_Matrix & Samples)614 bool CSG_Regression_Multiple::_Get_Regression(const CSG_Matrix &Samples)
615 {
616 int nPredictors = Samples.Get_NX() - 1;
617 int nSamples = Samples.Get_NY();
618
619 //-----------------------------------------------------
620 int i, j;
621 double Ym, SSR, SSE, SST, MSR, MSE, SE, R2, F;
622 CSG_Vector Y, Yr, B;
623 CSG_Matrix X, Xt, C;
624
625 Y.Create(nSamples);
626 X.Create(nPredictors + (m_bIntercept ? 1 : 0), nSamples);
627
628 //-----------------------------------------------------
629 for(i=0, Ym=0.0; i<nSamples; i++)
630 {
631 Ym += Y[i] = Samples[i][0];
632
633 if( m_bIntercept )
634 {
635 X [i][0] = 1.0;
636
637 for(j=1; j<=nPredictors; j++)
638 {
639 X[i][j] = Samples[i][j];
640 }
641 }
642 else
643 {
644 for(j=0; j<nPredictors; j++)
645 {
646 X[i][j] = Samples[i][j + 1];
647 }
648 }
649 }
650
651 Ym /= nSamples;
652
653 //-----------------------------------------------------
654 Xt = X.Get_Transpose();
655
656 C = (Xt * X).Get_Inverse();
657
658 B = C * (Xt * Y);
659
660 //-----------------------------------------------------
661 Yr = X * B;
662
663 for(i=0, SSE=0.0, SSR=0.0, SST=0.0; i<nSamples; i++)
664 {
665 SSE += SG_Get_Square(Yr[i] - Y[i]);
666 SSR += SG_Get_Square(Yr[i] - Ym);
667 // SST += SG_Get_Square(Y [i] - Ym);
668 }
669
670 // SSE = SST - SSR;
671 SST = SSR + SSE;
672 MSR = SSR / nPredictors;
673 MSE = SSE / (nSamples - nPredictors - 1);
674 SE = sqrt(SSE / (nSamples - nPredictors));
675 R2 = SSR / SST;
676 F = MSR / MSE; // = (nSamples - nPredictors - 1) * (R2 / nPredictors) / (1.0 - R2);
677
678 //-----------------------------------------------------
679 m_pModel->Get_Record(MLR_MODEL_R2 )->Set_Value(1, R2);
680 m_pModel->Get_Record(MLR_MODEL_R2_ADJ )->Set_Value(1, SG_Regression_Get_Adjusted_R2(R2, nSamples, nPredictors));
681 m_pModel->Get_Record(MLR_MODEL_SE )->Set_Value(1, SE);
682 m_pModel->Get_Record(MLR_MODEL_SSR )->Set_Value(1, SSR);
683 m_pModel->Get_Record(MLR_MODEL_SSE )->Set_Value(1, SSE);
684 m_pModel->Get_Record(MLR_MODEL_SST )->Set_Value(1, SST);
685 m_pModel->Get_Record(MLR_MODEL_MSR )->Set_Value(1, MSR);
686 m_pModel->Get_Record(MLR_MODEL_MSE )->Set_Value(1, MSE);
687 m_pModel->Get_Record(MLR_MODEL_F )->Set_Value(1, F);
688 m_pModel->Get_Record(MLR_MODEL_SIG )->Set_Value(1, CSG_Test_Distribution::Get_F_Tail_from_R2(R2, nPredictors, nSamples));
689 m_pModel->Get_Record(MLR_MODEL_NPREDICT)->Set_Value(1, nPredictors);
690 m_pModel->Get_Record(MLR_MODEL_NSAMPLES)->Set_Value(1, nSamples);
691
692 //-----------------------------------------------------
693 CSG_Matrix P = SG_Get_Correlation_Matrix(Samples, true).Get_Inverse(); // get partial correlation
694
695 if( !m_bIntercept )
696 {
697 m_pRegression->Add_Record()->Set_Value(MLR_VAR_NAME, m_Names[0]);
698 }
699
700 for(j=0; j<B.Get_N(); j++)
701 {
702 double se = SE * sqrt(fabs(C[j][j]));
703 double b = B[j];
704 double t = b / se;
705 int k = (m_bIntercept ? j : j + 1);
706 double r = -P[k][0] / sqrt(P[k][k] * P[0][0]);
707
708 CSG_Table_Record *pRecord = m_pRegression->Add_Record();
709
710 pRecord->Set_Value(MLR_VAR_ID , m_bIntercept ? j - 1 : j);
711 pRecord->Set_Value(MLR_VAR_NAME , m_Names[k]);
712 pRecord->Set_Value(MLR_VAR_RCOEFF , b);
713 pRecord->Set_Value(MLR_VAR_R , r);
714 pRecord->Set_Value(MLR_VAR_R2 , r*r);
715 pRecord->Set_Value(MLR_VAR_R2_ADJ , SG_Regression_Get_Adjusted_R2(r*r, nSamples, nPredictors));
716 pRecord->Set_Value(MLR_VAR_SE , se);
717 pRecord->Set_Value(MLR_VAR_T , t);
718 pRecord->Set_Value(MLR_VAR_SIG , CSG_Test_Distribution::Get_T_Tail(t, nSamples - nPredictors, TESTDIST_TYPE_TwoTail));
719 }
720
721 //-----------------------------------------------------
722 return( true );
723 }
724
725
726 ///////////////////////////////////////////////////////////
727 // //
728 // //
729 // //
730 ///////////////////////////////////////////////////////////
731
732 //---------------------------------------------------------
_Get_Step_In(CSG_Matrix & X,double P_in,double & R2,const CSG_Matrix & Samples)733 int CSG_Regression_Multiple::_Get_Step_In(CSG_Matrix &X, double P_in, double &R2, const CSG_Matrix &Samples)
734 {
735 int iBest, iPredictor;
736 double rBest;
737
738 CSG_Regression_Multiple R(m_bIntercept);
739
740 X.Add_Cols(1);
741
742 //-----------------------------------------------------
743 for(iPredictor=0, iBest=-1, rBest=0.0; iPredictor<Samples.Get_NX()-1; iPredictor++)
744 {
745 if( !m_bIncluded[iPredictor] )
746 {
747 X.Set_Col(1 + m_nPredictors, Samples.Get_Col(1 + iPredictor));
748
749 if( R.Get_Model(X) && (iBest < 0 || rBest < R.Get_R2()) )
750 {
751 iBest = iPredictor;
752 rBest = R.Get_R2();
753 }
754 }
755 }
756
757 //-----------------------------------------------------
758 if( iBest >= 0 && _Get_P(1, Samples.Get_NY() - m_nPredictors, rBest, R2) <= P_in )
759 {
760 m_bIncluded[iBest] = true;
761 m_Predictor[m_nPredictors] = iBest;
762
763 m_nPredictors++;
764
765 X.Set_Col(m_nPredictors, Samples.Get_Col(1 + iBest));
766 _Set_Step_Info(X, R2, iBest, true);
767 R2 = rBest;
768
769 return( iBest );
770 }
771
772 X.Del_Col(X.Get_NX() - 1);
773
774 return( -1 );
775 }
776
777 //---------------------------------------------------------
_Get_Step_Out(CSG_Matrix & X,double P_out,double & R2)778 int CSG_Regression_Multiple::_Get_Step_Out(CSG_Matrix &X, double P_out, double &R2)
779 {
780 int iBest, iPredictor;
781 double rBest;
782
783 CSG_Regression_Multiple R(m_bIntercept);
784
785 if( R2 <= 0.0 )
786 {
787 R.Get_Model(X);
788
789 R2 = R.Get_R2();
790 }
791
792 //-----------------------------------------------------
793 for(iPredictor=0, iBest=-1, rBest=0.0; iPredictor<m_nPredictors; iPredictor++)
794 {
795 CSG_Matrix X_reduced(X);
796
797 X_reduced.Del_Col(1 + iPredictor);
798
799 if( R.Get_Model(X_reduced) && (iBest < 0 || rBest < R.Get_R2()) )
800 {
801 iBest = iPredictor;
802 rBest = R.Get_R2();
803 }
804 }
805
806 //-----------------------------------------------------
807 if( iBest >= 0 && _Get_P(1, X.Get_NY() - (m_nPredictors - 1), R2, rBest) > P_out )
808 {
809 m_nPredictors--;
810
811 X.Del_Col(1 + iBest);
812 _Set_Step_Info(X, R2, m_Predictor[iBest], false);
813 R2 = rBest;
814
815 m_bIncluded[m_Predictor[iBest]] = false;
816
817 for(iPredictor=iBest; iPredictor<m_nPredictors; iPredictor++)
818 {
819 m_Predictor[iPredictor] = m_Predictor[iPredictor + 1];
820 }
821
822 return( iBest );
823 }
824
825 return( -1 );
826 }
827
828
829 ///////////////////////////////////////////////////////////
830 // //
831 // //
832 // //
833 ///////////////////////////////////////////////////////////
834
835 //---------------------------------------------------------
_Set_Step_Info(const CSG_Matrix & X)836 bool CSG_Regression_Multiple::_Set_Step_Info(const CSG_Matrix &X)
837 {
838 CSG_Regression_Multiple R(m_bIntercept);
839
840 if( m_nPredictors > 0 && R.Get_Model(X) )
841 {
842 m_pModel ->Assign(R.m_pModel);
843 m_pRegression ->Assign(R.m_pRegression);
844
845 m_pRegression->Get_Record(0)->Set_Value(MLR_VAR_NAME, m_Names[0]);
846
847 for(int i=0; i<m_nPredictors; i++)
848 {
849 CSG_Table_Record *pRecord = m_pRegression->Get_Record(1 + i);
850
851 pRecord->Set_Value(MLR_VAR_ID , m_Predictor[i]);
852 pRecord->Set_Value(MLR_VAR_NAME, m_Names[1 + m_Predictor[i]]);
853 }
854
855 return( true );
856 }
857
858 return( false );
859 }
860
861 //---------------------------------------------------------
_Set_Step_Info(const CSG_Matrix & X,double R2_prev,int iVariable,bool bIn)862 bool CSG_Regression_Multiple::_Set_Step_Info(const CSG_Matrix &X, double R2_prev, int iVariable, bool bIn)
863 {
864 CSG_Regression_Multiple R(m_bIntercept);
865
866 R.Get_Model(X);
867
868 CSG_Table_Record *pRecord = m_pSteps->Add_Record();
869
870 pRecord->Set_Value(MLR_STEP_NR , m_pSteps->Get_Count());
871 pRecord->Set_Value(MLR_STEP_R , sqrt(R.Get_R2()));
872 pRecord->Set_Value(MLR_STEP_R2 , R.Get_R2());
873 pRecord->Set_Value(MLR_STEP_R2_ADJ , R.Get_R2_Adj());
874 pRecord->Set_Value(MLR_STEP_SE , R.Get_StdError());
875 pRecord->Set_Value(MLR_STEP_SSR , R.m_pModel->Get_Record(MLR_MODEL_SSR)->asDouble(1));
876 pRecord->Set_Value(MLR_STEP_SSE , R.m_pModel->Get_Record(MLR_MODEL_SSE)->asDouble(1));
877 pRecord->Set_Value(MLR_STEP_MSR , R.m_pModel->Get_Record(MLR_MODEL_MSR)->asDouble(1));
878 pRecord->Set_Value(MLR_STEP_MSE , R.m_pModel->Get_Record(MLR_MODEL_MSE)->asDouble(1));
879 pRecord->Set_Value(MLR_STEP_DF , X.Get_NRows() - m_nPredictors - 1);
880 pRecord->Set_Value(MLR_STEP_F , R.m_pModel->Get_Record(MLR_MODEL_F )->asDouble(1));
881 pRecord->Set_Value(MLR_STEP_SIG , R.m_pModel->Get_Record(MLR_MODEL_SIG)->asDouble(1));
882 pRecord->Set_Value(MLR_STEP_VAR_F , _Get_F(1, X.Get_NRows() - (m_nPredictors - 1), bIn ? R.Get_R2() : R2_prev, bIn ? R2_prev : R.Get_R2()));
883 pRecord->Set_Value(MLR_STEP_VAR_SIG , _Get_P(1, X.Get_NRows() - (m_nPredictors - 1), bIn ? R.Get_R2() : R2_prev, bIn ? R2_prev : R.Get_R2()));
884 pRecord->Set_Value(MLR_STEP_DIR , bIn ? SG_T(">>") : SG_T("<<"));
885 pRecord->Set_Value(MLR_STEP_VAR , m_Names[1 + iVariable]);
886
887 return( true );
888 }
889
890
891 ///////////////////////////////////////////////////////////
892 // //
893 // //
894 // //
895 ///////////////////////////////////////////////////////////
896
897 //---------------------------------------------------------
Get_R2(void) const898 double CSG_Regression_Multiple::Get_R2 (void) const { return( m_pModel->Get_Record(MLR_MODEL_R2 )->asDouble(1) ); }
Get_R2_Adj(void) const899 double CSG_Regression_Multiple::Get_R2_Adj (void) const { return( m_pModel->Get_Record(MLR_MODEL_R2_ADJ )->asDouble(1) ); }
Get_StdError(void) const900 double CSG_Regression_Multiple::Get_StdError (void) const { return( m_pModel->Get_Record(MLR_MODEL_SE )->asDouble(1) ); }
Get_F(void) const901 double CSG_Regression_Multiple::Get_F (void) const { return( m_pModel->Get_Record(MLR_MODEL_F )->asDouble(1) ); }
Get_P(void) const902 double CSG_Regression_Multiple::Get_P (void) const { return( m_pModel->Get_Record(MLR_MODEL_SIG )->asDouble(1) ); }
Get_CV_RMSE(void) const903 double CSG_Regression_Multiple::Get_CV_RMSE (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_RMSE )->asDouble(1) ); }
Get_CV_NRMSE(void) const904 double CSG_Regression_Multiple::Get_CV_NRMSE (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_NRMSE )->asDouble(1) ); }
Get_CV_R2(void) const905 double CSG_Regression_Multiple::Get_CV_R2 (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_R2 )->asDouble(1) ); }
Get_CV_nSamples(void) const906 int CSG_Regression_Multiple::Get_CV_nSamples (void) const { return( m_pModel->Get_Record(MLR_MODEL_CV_NSAMPLES)->asInt (1) ); }
Get_nPredictors(void) const907 int CSG_Regression_Multiple::Get_nPredictors (void) const { return( m_pModel->Get_Record(MLR_MODEL_NPREDICT )->asInt (1) ); }
Get_nSamples(void) const908 int CSG_Regression_Multiple::Get_nSamples (void) const { return( m_pModel->Get_Record(MLR_MODEL_NSAMPLES )->asInt (1) ); }
Get_DegFreedom(void) const909 int CSG_Regression_Multiple::Get_DegFreedom (void) const { return( Get_nSamples() - Get_nPredictors() - 1 ); }
910
911 //---------------------------------------------------------
Get_Name(int iVariable) const912 const SG_Char * CSG_Regression_Multiple::Get_Name(int iVariable) const
913 {
914 if( iVariable >= 0 && iVariable < m_pRegression->Get_Count() - 1 )
915 {
916 return( m_pRegression->Get_Record(1 + iVariable)->asString(MLR_VAR_NAME) );
917 }
918
919 return( SG_T("") );
920 }
921
922 //---------------------------------------------------------
Get_RConst(void) const923 double CSG_Regression_Multiple::Get_RConst(void) const
924 {
925 if( m_pRegression->Get_Count() > 0 )
926 {
927 return( m_pRegression->Get_Record(0)->asDouble(MLR_VAR_RCOEFF) );
928 }
929
930 return( 0.0 );
931 }
932
933 //---------------------------------------------------------
Get_Parameter(int iVariable,int Parameter) const934 double CSG_Regression_Multiple::Get_Parameter(int iVariable, int Parameter) const
935 {
936 if( iVariable >= 0 && iVariable < m_pRegression->Get_Count() - 1 && Parameter >= 0 && Parameter <= MLR_VAR_P )
937 {
938 return( m_pRegression->Get_Record(1 + iVariable)->asDouble(Parameter) );
939 }
940
941 return( 0.0 );
942 }
943
944
945 ///////////////////////////////////////////////////////////
946 // //
947 // //
948 // //
949 ///////////////////////////////////////////////////////////
950
951 //---------------------------------------------------------
Get_Info(void) const952 CSG_String CSG_Regression_Multiple::Get_Info(void) const
953 {
954 CSG_String s;
955
956 if( Get_nPredictors() < 1 )
957 {
958 return( s );
959 }
960
961 //-----------------------------------------------------
962 if( m_pSteps->Get_Count() > 0 )
963 {
964 s += CSG_String::Format("\n%s:\n\n", _TL("Steps"));
965 s += CSG_String::Format("No. \tR \tR2 \tR2 adj\tStdErr\tF \tP \tF step\tP step\tVariable\n");
966 s += CSG_String::Format("------\t------\t------\t------\t------\t------\t------\t------\t------\t------\n");
967
968 for(int i=0; i<m_pSteps->Get_Count(); i++)
969 {
970 CSG_Table_Record *pRecord = m_pSteps->Get_Record(i);
971
972 s += CSG_String::Format("%d.\t%.2f\t%.2f\t%.2f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%s %s\n",
973 pRecord->asInt (MLR_STEP_NR ),
974 pRecord->asDouble(MLR_STEP_R ),
975 pRecord->asDouble(MLR_STEP_R2 ) * 100.0,
976 pRecord->asDouble(MLR_STEP_R2_ADJ ) * 100.0,
977 pRecord->asDouble(MLR_STEP_SE ),
978 pRecord->asDouble(MLR_STEP_F ),
979 pRecord->asDouble(MLR_STEP_SIG ) * 100.0,
980 pRecord->asDouble(MLR_STEP_VAR_F ),
981 pRecord->asDouble(MLR_STEP_VAR_SIG) * 100.0,
982 pRecord->asString(MLR_STEP_DIR ),
983 pRecord->asString(MLR_STEP_VAR )
984 );
985 }
986 }
987
988 //-----------------------------------------------------
989 s += CSG_String::Format("\n%s:\n\n", _TL("Correlation"));
990 s += CSG_String::Format("No. \tR \tR2 \tR2 adj\tStdErr\tt \tSig. \tb \t\tVariable\n");
991 s += CSG_String::Format("------\t------\t------\t------\t------\t------\t---------\t---------\t\t---------\n");
992
993 for(int i=0; i<m_pRegression->Get_Count(); i++)
994 {
995 CSG_Table_Record *pRecord = m_pRegression->Get_Record(i);
996
997 s += CSG_String::Format("%d.\t%.2f\t%.2f\t%.2f\t%.3f\t%.3f\t%.6f\t%.6f\t%s\n",
998 i,
999 pRecord->asDouble(MLR_VAR_R ),
1000 pRecord->asDouble(MLR_VAR_R2 ) * 100.0,
1001 pRecord->asDouble(MLR_VAR_R2_ADJ) * 100.0,
1002 pRecord->asDouble(MLR_VAR_SE ),
1003 pRecord->asDouble(MLR_VAR_T ),
1004 pRecord->asDouble(MLR_VAR_SIG ) * 100.0,
1005 pRecord->asDouble(MLR_VAR_RCOEFF),
1006 pRecord->asString(MLR_VAR_NAME )
1007 );
1008 }
1009
1010 //-----------------------------------------------------
1011 s += CSG_String::Format("\n%s: %g", _TL("Formula"), Get_RConst());
1012
1013 for(int i=0; i<Get_nPredictors(); i++)
1014 {
1015 double b = Get_RCoeff(i);
1016
1017 s += CSG_String::Format(" %c %g * X%d", b < 0. ? '-' : '+', fabs(b), i + 1);
1018 }
1019
1020 //-----------------------------------------------------
1021 s += "\n\n";
1022
1023 s += CSG_String::Format("%s: %f (%s: %d)\n", _TL("Residual standard error"), Get_StdError(), _TL("degrees of freedom"), Get_DegFreedom());
1024 s += CSG_String::Format("%s: %f (%s: %f)\n", _TL("Multiple R-squared"), 100.0 * Get_R2(), _TL("adjusted"), 100.0 * Get_R2_Adj());
1025 s += CSG_String::Format("%s: %f (%d/%d DF), %s: %g\n", _TL("F-statistic"), Get_F(), Get_nPredictors(), Get_DegFreedom(), _TL("p-value"), Get_P());
1026
1027 //-----------------------------------------------------
1028 return( s );
1029 }
1030
1031
1032 ///////////////////////////////////////////////////////////
1033 // //
1034 // //
1035 // //
1036 ///////////////////////////////////////////////////////////
1037
1038 //---------------------------------------------------------
1039