1 /* $Id: $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25
26 /*****************************************************************************
27
28 File name: sls_alp_regression.cpp
29
30 Author: Sergey Sheetlin
31
32 Contents: Regression methods
33
34 ******************************************************************************/
35
36 #include "sls_alp_regression.hpp"
37
38 using namespace Sls;
39 using namespace std;
40
41
42 //-------------------------Solution of arbitrary equations----------------------------
find_tetta_general(function_type * func_,void * func_pointer_,double a_,double b_,long int n_partition_,double eps_,std::vector<double> & res_)43 void alp_reg::find_tetta_general(
44 function_type *func_,
45 void* func_pointer_,
46 double a_,//[a,b] is the interval for search of equation solution
47 double b_,
48 long int n_partition_,
49 double eps_,
50 std::vector<double> &res_
51 )
52 {
53 res_.resize(0);
54 vector<long int> intervals(0);
55
56 if(n_partition_<=0)
57 {
58 throw error("Error in alp_reg::find_tetta_general\n",4);
59
60 };
61
62 long int i;
63 double h=(b_-a_)/n_partition_;
64 double x1,x2 = 0.0;
65 for(i=0;i<n_partition_;i++)
66 {
67 if(i==0)
68 {
69 x1=(*func_)(//calculation of E(exp(tetta*X))
70 a_+i*h,func_pointer_);
71 if(fabs(x1)<eps_)
72 {
73 res_.push_back(a_+i*h);
74 };
75
76 }
77 else
78 {
79 x1=x2;
80 };
81
82
83 x2=(*func_)(//calculation of E(exp(tetta*X))
84 a_+(i+1)*h,
85 func_pointer_);
86
87 if(fabs(x2)<eps_)
88 {
89 res_.push_back(a_+(i+1)*h);
90 };
91
92 if((x1*x2<0)&&(fabs(x1)>=eps_&&fabs(x2)>=eps_))
93 {
94 intervals.push_back(i);
95 };
96 };
97
98 for(i=0;i<(long int)intervals.size();i++)
99 {
100 double sol=find_single_tetta_general(
101 func_,
102 func_pointer_,
103 a_+intervals[i]*h,//[a,b] is the interval for search of equation solution
104 a_+(1+intervals[i])*h,
105 eps_);
106 res_.push_back(sol);
107 };
108
109 sort(res_.begin(),res_.end());
110
111 return;
112 }
113
find_single_tetta_general(function_type * func_,void * func_pointer_,double a_,double b_,double eps_)114 double alp_reg::find_single_tetta_general(
115 function_type *func_,
116 void* func_pointer_,
117 double a_,//[a,b] is the interval for search of equation solution
118 double b_,
119 double eps_
120 )
121 {
122 if(b_<a_)
123 {
124 throw error("Error in alp_reg::find_single_tetta_general\n",4);
125 };
126
127 double x1=a_;
128 double x2=b_;
129 double precision=(x2-x1)/2;
130
131 double y1=(*func_)(//calculation of E(exp(tetta*X))
132 x1,
133 func_pointer_);
134 if(fabs(y1)<eps_)
135 {
136 return x1;
137 };
138
139
140 double y2=(*func_)(//calculation of E(exp(tetta*X))
141 x2,
142 func_pointer_);
143 if(fabs(y2)<eps_)
144 {
145 return x2;
146 };
147
148 while(precision>eps_)
149 {
150 double x12=(x1+x2)/2;
151
152 double y12=(*func_)(//calculation of E(exp(tetta*X))
153 x12,
154 func_pointer_);
155
156 if(fabs(y12)<eps_)
157 {
158 return x12;
159 };
160
161 if(y12*y1<0)
162 {
163 x2=x12;
164 y2=y12;
165 }
166 else
167 {
168 x1=x12;
169 y1=y12;
170 };
171 precision=(x2-x1)/2;
172 };
173
174 return (x1+x2)/2;
175 }
176
177 //------------regression-----------------------------------
178
correction_of_errors(double * errors_,long int number_of_elements_)179 void alp_reg::correction_of_errors(
180 double *errors_,
181 long int number_of_elements_)
182 {
183
184 if(number_of_elements_<=0)
185 {
186 throw error("Unexpected error\n",4);
187 };
188
189
190 double average_error=0;
191 long int i;
192 for(i=0;i<number_of_elements_;i++)
193 {
194 if(errors_[i]<0)
195 {
196 throw error("Error in alp_reg::correction_of_errors: input error in the regression model is less than 0\n",4);
197 };
198
199 average_error+=errors_[i];
200 };
201
202 average_error/=(double)number_of_elements_;
203
204 double error_eps;
205
206 if(average_error<=0)
207 {
208 error_eps=1e-50;
209 }
210 else
211 {
212 error_eps=average_error;
213 };
214
215
216 for(i=0;i<number_of_elements_;i++)
217 {
218 if(errors_[i]==0)
219 {
220 errors_[i]=error_eps;
221 };
222 };
223 }
224
robust_regression_sum_with_cut_LSM(long int min_length_,long int number_of_elements_,double * values_,double * errors_,bool cut_left_tail_,bool cut_right_tail_,double y_,double & beta0_,double & beta1_,double & beta0_error_,double & beta1_error_,long int & k1_opt_,long int & k2_opt_,bool & res_was_calculated_)225 void alp_reg::robust_regression_sum_with_cut_LSM(
226 long int min_length_,
227 long int number_of_elements_,
228 double *values_,
229 double *errors_,
230 bool cut_left_tail_,
231 bool cut_right_tail_,
232 double y_,
233 double &beta0_,
234 double &beta1_,
235 double &beta0_error_,
236 double &beta1_error_,
237 long int &k1_opt_,
238 long int &k2_opt_,
239 bool &res_was_calculated_)
240 {
241
242 if(number_of_elements_<2)
243 {
244 throw error("Unexpected error\n",4);
245 };
246
247 correction_of_errors(errors_,number_of_elements_);
248
249 //minization of the function
250 double c=y_*y_;
251
252
253 long int k1_start,k1_end;
254 long int k2_start,k2_end;
255
256 if(cut_left_tail_&&cut_right_tail_)
257 {
258 k1_start=0;
259 k1_end=number_of_elements_-1;
260
261 k2_start=0;
262 k2_end=number_of_elements_-1;
263
264 }
265 else
266 {
267 if(cut_left_tail_&&!cut_right_tail_)
268 {
269 k1_start=0;
270 k1_end=number_of_elements_-1;
271
272 k2_start=number_of_elements_-1;
273 k2_end=number_of_elements_-1;
274
275 }
276 else
277 {
278 if(!cut_left_tail_&&cut_right_tail_)
279 {
280 k1_start=0;
281 k1_end=0;
282
283 k2_start=0;
284 k2_end=number_of_elements_-1;
285 }
286 else
287 {
288 k1_start=0;
289 k1_end=0;
290
291 k2_start=number_of_elements_-1;
292 k2_end=number_of_elements_-1;
293
294 };
295 };
296 };
297
298 long int k1_opt=0,k2_opt=0;
299
300 double func_opt=DBL_MAX;
301 double beta0_opt=0;
302 double beta1_opt=0;
303 double beta0_opt_error=0;
304 double beta1_opt_error=0;
305
306 long int k1,k2;
307
308
309 res_was_calculated_=false;
310
311
312 for(k1=k1_start;k1<=k1_end;k1++)
313 {
314
315 for(k2=sls_basic::Tmax(k1+(long int)1,sls_basic::Tmax(k1,k2_start)+min_length_);k2<=k2_end;k2++)
316 {
317 double beta0_opt_tmp,beta1_opt_tmp,beta0_opt_error_tmp,beta1_opt_error_tmp;
318 bool res_was_calculated;
319
320 double tmp=function_for_robust_regression_sum_with_cut_LSM(
321 values_+k1,
322 errors_+k1,
323 k2-k1+1,
324 k1,
325 c,
326 beta0_opt_tmp,
327 beta1_opt_tmp,
328 beta0_opt_error_tmp,
329 beta1_opt_error_tmp,
330 res_was_calculated);
331
332
333
334 if(tmp<func_opt&&res_was_calculated)
335 {
336 func_opt=tmp;
337 beta0_opt=beta0_opt_tmp;
338 beta1_opt=beta1_opt_tmp;
339 beta0_opt_error=beta0_opt_error_tmp;
340 beta1_opt_error=beta1_opt_error_tmp;
341 k1_opt=k1;
342 k2_opt=k2;
343 res_was_calculated_=true;
344 };
345
346 };
347 };
348
349
350
351
352 if(res_was_calculated_)
353 {
354 beta0_=beta0_opt;
355 beta1_=beta1_opt;
356 beta0_error_=beta0_opt_error;
357 beta1_error_=beta1_opt_error;
358 k1_opt_=k1_opt;
359 k2_opt_=k2_opt;
360 };
361
362
363 }
364
function_for_robust_regression_sum_with_cut_LSM(double * values_,double * errors_,long int number_of_elements_,long int k_start_,double c_,double & beta0_,double & beta1_,double & beta0_error_,double & beta1_error_,bool & res_was_calculated_)365 double alp_reg::function_for_robust_regression_sum_with_cut_LSM(
366 double *values_,
367 double *errors_,
368 long int number_of_elements_,
369 long int k_start_,
370 double c_,
371 double &beta0_,
372 double &beta1_,
373 double &beta0_error_,
374 double &beta1_error_,
375 bool &res_was_calculated_)
376 {
377 long int i;
378 double a11=0;
379 double a12=0;
380 double a21=0;
381 double a22=0;
382 double y1=0;
383 double y2=0;
384
385 double y1_error=0;
386 double y2_error=0;
387
388 for(i=0;i<number_of_elements_;i++)
389 {
390 if(errors_[i]!=0)
391 {
392 double tmp=1.0/(errors_[i]*errors_[i]);
393
394 a11+=tmp;
395 a12+=(double)(k_start_+i)*tmp;
396 a22+=(double)((k_start_+i)*(k_start_+i))*tmp;
397 y1+=values_[i]*tmp;
398 y1_error+=tmp*tmp*errors_[i]*errors_[i];
399 y2+=(double)(k_start_+i)*values_[i]*tmp;
400 y2_error+=(double)(k_start_+i)*(double)(k_start_+i)*tmp*tmp*errors_[i]*errors_[i];
401 };
402 };
403
404 a21=a12;
405 y1_error=alp_reg::sqrt_for_errors(y1_error);
406 y2_error=alp_reg::sqrt_for_errors(y2_error);
407
408 double eps=1e-10*sls_basic::Tmax(fabs(a11*a22),fabs(a21*a12));
409
410 double den=a11*a22-a21*a12;
411 if(fabs(den)<=eps)
412 {
413 res_was_calculated_=false;
414 return 0;
415 }
416 else
417 {
418 res_was_calculated_=true;
419 };
420
421 beta0_=(y1*a22-a12*y2)/den;
422 beta1_=(a11*y2-a21*y1)/den;
423
424 beta0_error_=sqrt(y1_error*y1_error*a22*a22+a12*a12*y2_error*y2_error)/den;
425 beta1_error_=sqrt(a11*a11*y2_error*y2_error+a21*a21*y1_error*y1_error)/den;
426
427
428 double res=0;
429 for(i=0;i<number_of_elements_;i++)
430 {
431 if(errors_[i]!=0)
432 {
433 double tmp=(beta0_+beta1_*(i+k_start_)-values_[i])/errors_[i];
434 res+=tmp*tmp-c_;
435 };
436 };
437
438 return res;
439 }
440
robust_regression_sum_with_cut_LSM_beta1_is_defined(long int min_length_,long int number_of_elements_,double * values_,double * errors_,bool cut_left_tail_,bool cut_right_tail_,double y_,double & beta0_,double beta1_,double & beta0_error_,double beta1_error_,long int & k1_opt_,long int & k2_opt_,bool & res_was_calculated_)441 void alp_reg::robust_regression_sum_with_cut_LSM_beta1_is_defined(
442 long int min_length_,
443 long int number_of_elements_,
444 double *values_,
445 double *errors_,
446 bool cut_left_tail_,
447 bool cut_right_tail_,
448 double y_,
449 double &beta0_,
450 double beta1_,
451 double &beta0_error_,
452 double beta1_error_,
453 long int &k1_opt_,
454 long int &k2_opt_,
455 bool &res_was_calculated_)
456 {
457
458 correction_of_errors(errors_,number_of_elements_);
459
460 //minization of the function
461 double c=y_*y_;
462
463
464 long int k1_start,k1_end;
465 long int k2_start,k2_end;
466
467 if(cut_left_tail_&&cut_right_tail_)
468 {
469 k1_start=0;
470 k1_end=number_of_elements_-1;
471
472 k2_start=0;
473 k2_end=number_of_elements_-1;
474
475 }
476 else
477 {
478 if(cut_left_tail_&&!cut_right_tail_)
479 {
480 k1_start=0;
481 k1_end=number_of_elements_-1;
482
483 k2_start=number_of_elements_-1;
484 k2_end=number_of_elements_-1;
485
486 }
487 else
488 {
489 if(!cut_left_tail_&&cut_right_tail_)
490 {
491 k1_start=0;
492 k1_end=0;
493
494 k2_start=0;
495 k2_end=number_of_elements_-1;
496 }
497 else
498 {
499 k1_start=0;
500 k1_end=0;
501
502 k2_start=number_of_elements_-1;
503 k2_end=number_of_elements_-1;
504
505 };
506 };
507 };
508
509 long int k1_opt=0,k2_opt=0;
510
511 double func_opt=DBL_MAX;
512 double beta0_opt=0;
513 double beta0_opt_error=0;
514
515 long int k1,k2;
516
517 res_was_calculated_=false;
518
519
520 for(k1=k1_start;k1<=k1_end;k1++)
521 {
522
523 for(k2=sls_basic::Tmax(k1,k2_start)+min_length_;k2<=k2_end;k2++)
524 {
525 double beta0_opt_tmp,beta1_opt_tmp,beta0_opt_error_tmp,beta1_opt_error_tmp;
526 bool res_was_calculated;
527
528 beta1_opt_tmp=beta1_;
529 beta1_opt_error_tmp=beta1_error_;
530
531 double tmp=function_for_robust_regression_sum_with_cut_LSM_beta1_is_defined(
532 values_+k1,
533 errors_+k1,
534 k2-k1+1,
535 k1,
536 c,
537 beta0_opt_tmp,
538 beta1_opt_tmp,
539 beta0_opt_error_tmp,
540 beta1_opt_error_tmp,
541 res_was_calculated);
542
543 if(tmp<func_opt&&res_was_calculated)
544 {
545 func_opt=tmp;
546 beta0_opt=beta0_opt_tmp;
547 beta0_opt_error=beta0_opt_error_tmp;
548 k1_opt=k1;
549 k2_opt=k2;
550 res_was_calculated_=true;
551 };
552
553 };
554 };
555
556 if(res_was_calculated_)
557 {
558 beta0_=beta0_opt;
559 beta0_error_=beta0_opt_error;
560 k1_opt_=k1_opt;
561 k2_opt_=k2_opt;
562 };
563
564
565 }
566
function_for_robust_regression_sum_with_cut_LSM_beta1_is_defined(double * values_,double * errors_,long int number_of_elements_,long int k_start_,double c_,double & beta0_,double beta1_,double & beta0_error_,double beta1_error_,bool & res_was_calculated_)567 double alp_reg::function_for_robust_regression_sum_with_cut_LSM_beta1_is_defined(
568 double *values_,
569 double *errors_,
570 long int number_of_elements_,
571 long int k_start_,
572 double c_,
573 double &beta0_,
574 double beta1_,
575 double &beta0_error_,
576 double beta1_error_,
577 bool &res_was_calculated_)
578 {
579 long int i;
580 double a11=0;
581 double y1=0;
582
583 double y1_error=0;
584
585
586 for(i=0;i<number_of_elements_;i++)
587 {
588 if(errors_[i]!=0)
589 {
590 double tmp=1.0/(errors_[i]*errors_[i]);
591
592 a11+=tmp;
593 y1+=(values_[i]-(double)(k_start_+i)*beta1_)*tmp;
594 double error_tmp=errors_[i]*errors_[i]+(double)(k_start_+i)*(double)(k_start_+i)*beta1_error_*beta1_error_;
595 y1_error+=tmp*tmp*error_tmp;
596 };
597 };
598
599 y1_error=sqrt(y1_error);
600
601 double eps=1e-10*fabs(a11);
602
603 double den=a11;
604 if(fabs(den)<=eps)
605 {
606 res_was_calculated_=false;
607 return 0;
608 }
609 else
610 {
611 res_was_calculated_=true;
612 };
613
614 beta0_=y1/den;
615
616 beta0_error_=y1_error/den;
617
618
619 double res=0;
620 for(i=0;i<number_of_elements_;i++)
621 {
622 if(errors_[i]!=0)
623 {
624 double tmp=(beta0_+beta1_*(i+k_start_)-values_[i])/errors_[i];
625 res+=tmp*tmp-c_;
626 };
627 };
628
629 return res;
630
631 }
632
median(long int dim_,double * array_)633 double alp_reg::median(
634 long int dim_,
635 double *array_)
636 {
637 vector<double> array_vect(dim_);
638 long int i;
639 for(i=0;i<dim_;i++)
640 {
641 array_vect[i]=array_[i];
642 };
643 sort(array_vect.begin(),array_vect.end());
644 if(dim_%2==0)
645 {
646 long int k=(long int)sls_basic::round((double)dim_/2.0);
647 return 0.5*(array_vect[k-1]+array_vect[k]);
648 }
649 else
650 {
651 long int k=(long int)sls_basic::round((double)(dim_-1.0)/2.0);
652 return array_vect[k];
653
654 };
655 }
656
robust_sum(double * values,long int dim,long int N_points,bool * & remove_flag)657 double alp_reg::robust_sum(
658 double *values,
659 long int dim,
660 long int N_points,
661 bool *&remove_flag)
662 {
663 remove_flag=NULL;
664
665 try
666 {
667 if(dim<=N_points)
668 {
669 throw error("Unexpected error\n",4);
670 };
671
672 long int i;
673 remove_flag=new bool[dim];
674 sls_basic::assert_mem(remove_flag);
675 for(i=0;i<dim;i++)
676 {
677 remove_flag[i]=true;
678 };
679
680
681 double med_val=alp_reg::median(
682 dim,
683 values);
684
685 vector<pair<double,long int> > array_vect(dim);
686
687 for(i=0;i<dim;i++)
688 {
689 pair<double,long int> P;
690 P.first=-fabs(values[i]-med_val);
691 P.second=i;
692 array_vect[i]=P;
693 };
694
695 sort(array_vect.begin(),array_vect.end());
696
697 for(i=0;i<N_points;i++)
698 {
699 remove_flag[array_vect[i].second]=false;
700 };
701
702 double res=0.0;
703
704 for(i=0;i<dim;i++)
705 {
706 if(remove_flag[i])
707 {
708 res+=values[i];
709 };
710 };
711
712 res/=(double)(dim-N_points);
713 return res;
714
715 }
716 catch (...)
717 {
718 delete[]remove_flag;remove_flag=NULL;
719 throw;
720 };
721
722 }
723
724