1 /*
2 * regression.c: Statistical regression functions.
3 *
4 * Authors:
5 * Morten Welinder <terra@gnome.org>
6 * Andrew Chatham <andrew.chatham@duke.edu>
7 * Daniel Carrera <dcarrera@math.toronto.edu>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) version 3.
13 *
14 * This library is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Library General Public License for more details.
18 *
19 * You should have received a copy of the GNU Library General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 * USA.
23 */
24
25 #include <goffice/goffice-config.h>
26 #include "go-regression.h"
27 #include "go-rangefunc.h"
28 #include "go-math.h"
29
30 #include <math.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34
35 /**
36 * GORegressionResult:
37 * @GO_REG_ok: success.
38 * @GO_REG_invalid_dimensions: invalid dimensions.
39 * @GO_REG_invalid_data: invalid data:
40 * @GO_REG_not_enough_data: not enough data.
41 * @GO_REG_near_singular_good: probably good result.
42 * @GO_REG_near_singular_bad: probably bad result.
43 * @GO_REG_singular: singularity found.
44 **/
45
46 /**
47 * go_regression_stat_t:
48 * @se: SE for each parameter estimator.
49 * @t: t values for each parameter estimator.
50 * @sqr_r: squared R.
51 * @adj_sqr_r:
52 * @se_y: the Standard Error of Y.
53 * @F:
54 * @df_reg:
55 * @df_resid:
56 * @df_total:
57 * @ss_reg:
58 * @ss_resid:
59 * @ss_total:
60 * @ms_reg:
61 * @ms_resid:
62 * @ybar:
63 * @xbar:
64 * @var: the variance of the entire regression: sum(errors^2)/(n-xdim).
65 **/
66
67 /**
68 * go_regression_stat_tl:
69 * @se: SE for each parameter estimator.
70 * @t: t values for each parameter estimator.
71 * @sqr_r: squared R.
72 * @adj_sqr_r:
73 * @se_y: the Standard Error of Y.
74 * @F:
75 * @df_reg:
76 * @df_resid:
77 * @df_total:
78 * @ss_reg:
79 * @ss_resid:
80 * @ss_total:
81 * @ms_reg:
82 * @ms_resid:
83 * @ybar:
84 * @xbar:
85 * @var: the variance of the entire regression: sum(errors^2)/(n-xdim).
86 **/
87
88 /* ------------------------------------------------------------------------- */
89 /* Self-inclusion magic. */
90
91 #ifndef DOUBLE
92
93 #define DEFAULT_THRESHOLD (256 * DOUBLE_EPS)
94
95
96 #define DEFINE_COMMON
97 #define DOUBLE double
98 #define DOUBLE_EPS DBL_EPSILON
99 #define SUFFIX(_n) _n
100 #define FORMAT_f "f"
101 #define FORMAT_g "g"
102
103 #ifdef GOFFICE_WITH_LONG_DOUBLE
104
105 #include "go-regression.c"
106 #undef DEFINE_COMMON
107 #undef DOUBLE
108 #undef DOUBLE_EPS
109 #undef SUFFIX
110 #undef FORMAT_f
111 #undef FORMAT_g
112 #ifdef HAVE_SUNMATH_H
113 #include <sunmath.h>
114 #endif
115 #define DOUBLE long double
116 #define DOUBLE_EPS LDBL_EPSILON
117 #define SUFFIX(_n) _n ## l
118 #define FORMAT_f "Lf"
119 #define FORMAT_g "Lg"
120 #else
121 /* It appears that gtk-doc is too dumb to handle this file. Provide
122 a dummy type getter to make things work. */
123 GType go_regression_statl_get_type (void);
go_regression_statl_get_type(void)124 GType go_regression_statl_get_type (void) { return G_TYPE_NONE; }
125 #endif
126
127 #endif
128
129 /* Boxed types code */
130
SUFFIX(go_regression_stat_t)131 static SUFFIX(go_regression_stat_t) *
132 SUFFIX(go_regression_stat_ref) (SUFFIX(go_regression_stat_t)* state)
133 {
134 state->ref_count++;
135 return state;
136 }
137
138 GType
139 #ifdef DEFINE_COMMON
go_regression_stat_get_type(void)140 go_regression_stat_get_type (void)
141 #else
142 go_regression_statl_get_type (void)
143 #endif
144 {
145 static GType t = 0;
146
147 if (t == 0) {
148 t = g_boxed_type_register_static (
149 #ifdef DEFINE_COMMON
150 "go_regression_stat_t",
151 #else
152 "go_regression_stat_tl",
153 #endif
154 (GBoxedCopyFunc)SUFFIX(go_regression_stat_ref),
155 (GBoxedFreeFunc)SUFFIX(go_regression_stat_destroy));
156 }
157 return t;
158 }
159
160 /* ------------------------------------------------------------------------- */
161
162 #ifdef DEFINE_COMMON
163
164 #define MATRIX DOUBLE **
165 #define CONSTMATRIX DOUBLE *const *const
166 #define QUAD SUFFIX(GOQuad)
167 #define QMATRIX QUAD **
168 #define CONSTQMATRIX QUAD *const *const
169
170 #undef DEBUG_REFINEMENT
171
172 #define ALLOC_MATRIX2(var,dim1,dim2,TYPE) \
173 do { int _i, _d1, _d2; \
174 _d1 = (dim1); \
175 _d2 = (dim2); \
176 (var) = g_new (TYPE *, _d1); \
177 for (_i = 0; _i < _d1; _i++) \
178 (var)[_i] = g_new (TYPE, _d2); \
179 } while (0)
180 #define ALLOC_MATRIX(var,dim1,dim2) ALLOC_MATRIX2(var,dim1,dim2,DOUBLE)
181
182 #define FREE_MATRIX(var,dim1,dim2) \
183 do { int _i, _d1; \
184 _d1 = (dim1); \
185 for (_i = 0; _i < _d1; _i++) \
186 g_free ((var)[_i]); \
187 g_free (var); \
188 } while (0)
189
190 #define COPY_MATRIX(dst,src,dim1,dim2) \
191 do { int _i, _j, _d1, _d2; \
192 _d1 = (dim1); \
193 _d2 = (dim2); \
194 for (_i = 0; _i < _d1; _i++) \
195 for (_j = 0; _j < _d2; _j++) \
196 (dst)[_i][_j] = (src)[_i][_j]; \
197 } while (0)
198
199 #define ZERO_VECTOR(dst,dim) \
200 do { int _i, _d; \
201 _d = (dim); \
202 for (_i = 0; _i < _d; _i++) \
203 (dst)[_i] = 0; \
204 } while (0)
205
206 #define COPY_VECTOR(dst,src,dim) \
207 do { int _i, _d; \
208 _d = (dim); \
209 for (_i = 0; _i < _d; _i++) \
210 (dst)[_i] = (src)[_i]; \
211 } while (0)
212
213 #define PRINT_MATRIX(var,dim1,dim2) \
214 do { \
215 int _i, _j, _d1, _d2; \
216 _d1 = (dim1); \
217 _d2 = (dim2); \
218 for (_i = 0; _i < _d1; _i++) { \
219 for (_j = 0; _j < _d2; _j++) { \
220 g_printerr (" %12.8" FORMAT_g, (var)[_i][_j]); \
221 } \
222 g_printerr ("\n"); \
223 } \
224 } while (0)
225
226 #define PRINT_QMATRIX(var,dim1,dim2) \
227 do { \
228 int _i, _j, _d1, _d2; \
229 _d1 = (dim1); \
230 _d2 = (dim2); \
231 for (_i = 0; _i < _d1; _i++) { \
232 for (_j = 0; _j < _d2; _j++) { \
233 g_printerr (" %12.8" FORMAT_g, SUFFIX(go_quad_value) (&(var)[_i][_j])); \
234 } \
235 g_printerr ("\n"); \
236 } \
237 } while (0)
238
239
240 #endif
241
242 /*
243 * ---> j
244 *
245 * | ********
246 * | ********
247 * | ******** A[i][j]
248 * v ********
249 * ********
250 * i ********
251 * ********
252 * ********
253 *
254 */
255
SUFFIX(GOQuadMatrix)256 static SUFFIX(GOQuadMatrix)
257 *
258 SUFFIX(quad_matrix_from_matrix) (CONSTMATRIX A, int m, int n, DOUBLE *scale)
259 {
260 int i, j;
261 SUFFIX(GOQuadMatrix) *qA = SUFFIX(go_quad_matrix_new) (m, n);
262
263 for (i = 0; i < m; i++) {
264 for (j = 0; j < n; j++) {
265 DOUBLE x = scale ? A[i][j] / scale[j] : A[i][j];
266 SUFFIX(go_quad_init) (&qA->data[i][j], x);
267 }
268 }
269
270 return qA;
271 }
272
273 static void
SUFFIX(copy_quad_matrix_to_matrix)274 SUFFIX(copy_quad_matrix_to_matrix) (MATRIX A, const SUFFIX(GOQuadMatrix) *qA)
275 {
276 int i, j;
277
278 for (i = 0; i < qA->m; i++)
279 for (j = 0; j < qA->n; j++)
280 A[i][j] = SUFFIX(go_quad_value) (&qA->data[i][j]);
281
282 }
283
284 /* ------------------------------------------------------------------------- */
285
286 #if 0
287 static void
288 SUFFIX(print_Q) (CONSTQMATRIX V, int m, int n)
289 {
290 QMATRIX Q;
291 int i, j;
292 QUAD *x;
293
294 ALLOC_MATRIX2 (Q, m, m, QUAD);
295 x = g_new (QUAD, m);
296
297 for (j = 0; j < m; j++) {
298 for (i = 0; i < m; i++)
299 SUFFIX(go_quad_init)(&x[i], i == j ? 1 : 0);
300
301 SUFFIX(multiply_Qt)(x, V, m, n);
302 for (i = 0; i < m; i++)
303 Q[j][i] = x[i];
304 }
305
306 PRINT_QMATRIX (Q, m, m);
307
308 FREE_MATRIX (Q, m, m);
309 g_free (x);
310 }
311 #endif
312
313 static DOUBLE
SUFFIX(calc_residual)314 SUFFIX(calc_residual) (CONSTMATRIX AT, const DOUBLE *b, int m, int n,
315 const DOUBLE *y)
316 {
317 int i, j;
318 QUAD N2 = SUFFIX(go_quad_zero);
319
320 for (i = 0; i < m; i++) {
321 QUAD d;
322
323 SUFFIX(go_quad_init) (&d, b[i]);
324
325 for (j = 0; j < n; j++) {
326 QUAD e;
327 SUFFIX(go_quad_mul12) (&e, AT[j][i], y[j]);
328 SUFFIX(go_quad_sub) (&d, &d, &e);
329 }
330
331 SUFFIX(go_quad_mul) (&d, &d, &d);
332 SUFFIX(go_quad_add) (&N2, &N2, &d);
333 }
334
335 return SUFFIX(go_quad_value) (&N2);
336 }
337
338 /* ------------------------------------------------------------------------- */
339
340 /*
341 * Solve AX=B where A is n-times-n and B and X are both n-times-bn.
342 * X is stored back into B.
343 */
344
345 GORegressionResult
SUFFIX(go_linear_solve_multiple)346 SUFFIX(go_linear_solve_multiple) (CONSTMATRIX A, MATRIX B, int n, int bn)
347 {
348 void *state;
349 GORegressionResult regres = GO_REG_ok;
350 SUFFIX(GOQuadMatrix) *qA;
351 const SUFFIX(GOQuadMatrix) *R;
352 SUFFIX(GOQuadQR) *qr;
353 QUAD *QTb, *x;
354 int i, j;
355
356 if (n < 1 || bn < 1)
357 return GO_REG_not_enough_data;
358
359 /* Special case. */
360 if (n == 1) {
361 int i;
362 DOUBLE d = A[0][0];
363 if (d == 0)
364 return GO_REG_singular;
365
366 for (i = 0; i < bn; i++)
367 B[0][i] /= d;
368 return GO_REG_ok;
369 }
370
371 state = SUFFIX(go_quad_start) ();
372
373 qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
374 qr = SUFFIX(go_quad_qr_new) (qA);
375 if (!qr) {
376 regres = GO_REG_invalid_data;
377 goto done;
378 }
379 R = SUFFIX(go_quad_qr_r) (qr);
380
381 QTb = g_new (QUAD, n);
382 x = g_new (QUAD, n);
383
384 for (j = 0; j < bn; j++) {
385 /* Compute Q^T b. */
386 for (i = 0; i < n; i++)
387 SUFFIX(go_quad_init)(&QTb[i], B[i][j]);
388 SUFFIX(go_quad_qr_multiply_qt)(qr, QTb);
389
390 /* Solve R x = Q^T b */
391 if (SUFFIX(go_quad_matrix_back_solve) (R, x, QTb, FALSE))
392 regres = GO_REG_singular;
393
394 /* Round for output. */
395 for (i = 0; i < n; i++)
396 B[i][j] = SUFFIX(go_quad_value) (&x[i]);
397 }
398
399 SUFFIX(go_quad_qr_free) (qr);
400 g_free (x);
401 g_free (QTb);
402 done:
403 SUFFIX(go_quad_matrix_free) (qA);
404
405 SUFFIX(go_quad_end) (state);
406
407 return regres;
408 }
409
410 GORegressionResult
SUFFIX(go_linear_solve)411 SUFFIX(go_linear_solve) (CONSTMATRIX A, const DOUBLE *b, int n, DOUBLE *res)
412 {
413 MATRIX B;
414 int i;
415 GORegressionResult regres;
416
417 if (n < 1)
418 return GO_REG_not_enough_data;
419
420 ALLOC_MATRIX (B, n, 1);
421 for (i = 0; i < n; i++)
422 B[i][0] = b[i];
423
424 regres = SUFFIX(go_linear_solve_multiple) (A, B, n, 1);
425
426 for (i = 0; i < n; i++)
427 res[i] = B[i][0];
428
429 FREE_MATRIX (B, n, 1);
430
431 return regres;
432 }
433
434
435 gboolean
SUFFIX(go_matrix_invert)436 SUFFIX(go_matrix_invert) (MATRIX A, int n)
437 {
438 SUFFIX(GOQuadMatrix) *qA, *qZ;
439 DOUBLE threshold = DEFAULT_THRESHOLD;
440 void *state;
441 gboolean ok;
442
443 state = SUFFIX(go_quad_start) ();
444 qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
445 qZ = SUFFIX(go_quad_matrix_inverse) (qA, threshold);
446 ok = (qZ != NULL);
447 SUFFIX(go_quad_matrix_free) (qA);
448 if (qZ) {
449 SUFFIX(copy_quad_matrix_to_matrix) (A, qZ);
450 SUFFIX(go_quad_matrix_free) (qZ);
451 }
452 SUFFIX(go_quad_end) (state);
453 return ok;
454 }
455
456 DOUBLE
SUFFIX(go_matrix_determinant)457 SUFFIX(go_matrix_determinant) (CONSTMATRIX A, int n)
458 {
459 DOUBLE res;
460 QUAD qres;
461 void *state;
462 SUFFIX(GOQuadMatrix) *qA;
463
464 if (n < 1)
465 return 0;
466
467 state = SUFFIX(go_quad_start) ();
468
469 qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
470 SUFFIX(go_quad_matrix_determinant) (qA, &qres);
471 SUFFIX(go_quad_matrix_free) (qA);
472 res = SUFFIX(go_quad_value) (&qres);
473
474 SUFFIX(go_quad_end) (state);
475
476 return res;
477 }
478
479 static DOUBLE
SUFFIX(calc_scale)480 SUFFIX(calc_scale) (const DOUBLE *xs, int m)
481 {
482 DOUBLE M;
483 int e;
484
485 if (SUFFIX(go_range_maxabs) (xs, m, &M) || M == 0)
486 return 1;
487
488 /*
489 * Pick a scale such that the largest value will be in the
490 * range [1;2[. The scale will be a power of 2 so scaling
491 * doesn't introduce rounding errors of it own.
492 */
493 (void) SUFFIX(frexp) (M, &e);
494 return SUFFIX(ldexp) (1.0, e - 1);
495 }
496
497 /* ------------------------------------------------------------------------- */
498
499 /* Note, that this function takes a transposed matrix xssT. */
500 static GORegressionResult
SUFFIX(general_linear_regression)501 SUFFIX(general_linear_regression) (CONSTMATRIX xssT, int n,
502 const DOUBLE *ys, int m,
503 DOUBLE *result,
504 SUFFIX(go_regression_stat_t) *stat_,
505 gboolean affine,
506 DOUBLE threshold)
507 {
508 GORegressionResult regerr;
509 SUFFIX(GOQuadMatrix) *xss;
510 SUFFIX(GOQuadQR) *qr;
511 int i, j, k;
512 gboolean has_result;
513 void *state;
514 gboolean has_stat;
515 int err;
516 QUAD N2;
517 DOUBLE *xscale;
518 gboolean debug_scale = FALSE;
519
520 ZERO_VECTOR (result, n);
521
522 has_stat = (stat_ != NULL);
523 if (!has_stat)
524 stat_ = SUFFIX(go_regression_stat_new)();
525
526 if (n > m) {
527 regerr = GO_REG_not_enough_data;
528 goto out;
529 }
530
531 state = SUFFIX(go_quad_start) ();
532
533 xscale = g_new (DOUBLE, n);
534 xss = SUFFIX(go_quad_matrix_new) (m, n);
535 for (j = 0; j < n; j++) {
536 xscale[j] = SUFFIX(calc_scale) (xssT[j], m);
537 if (debug_scale)
538 g_printerr ("Scale %d: %" FORMAT_g "\n", j, xscale[j]);
539 for (i = 0; i < m; i++)
540 SUFFIX(go_quad_init) (&xss->data[i][j], xssT[j][i] / xscale[j]);
541 }
542 qr = SUFFIX(go_quad_qr_new) (xss);
543 SUFFIX(go_quad_matrix_free) (xss);
544
545 has_result = (qr != NULL);
546
547 if (has_result) {
548 const SUFFIX(GOQuadMatrix) *R = SUFFIX(go_quad_qr_r) (qr);
549 QUAD *qresult = g_new0 (QUAD, n);
550 QUAD *QTy = g_new (QUAD, m);
551 QUAD *inv = g_new (QUAD, n);
552 DOUBLE emax;
553 int df_resid = m - n;
554 int df_reg = n - (affine ? 1 : 0);
555
556 regerr = GO_REG_ok;
557
558 SUFFIX(go_quad_matrix_eigen_range) (R, NULL, &emax);
559 for (i = 0; i < n; i++) {
560 DOUBLE ei = SUFFIX(go_quad_value)(&R->data[i][i]);
561 gboolean degenerate =
562 !(SUFFIX(fabs)(ei) >= emax * threshold);
563 if (degenerate) {
564 SUFFIX(go_quad_qr_mark_degenerate) (qr, i);
565 df_resid++;
566 df_reg--;
567 }
568 }
569
570 /* Compute Q^T ys. */
571 for (i = 0; i < m; i++)
572 SUFFIX(go_quad_init)(&QTy[i], ys[i]);
573 SUFFIX(go_quad_qr_multiply_qt)(qr, QTy);
574
575 if (0)
576 SUFFIX(go_quad_matrix_dump) (R, "%10.5" FORMAT_g);
577
578 /* Solve R res = Q^T ys */
579 if (SUFFIX(go_quad_matrix_back_solve) (R, qresult, QTy, TRUE))
580 has_result = FALSE;
581
582 for (i = 0; i < n; i++)
583 result[i] = SUFFIX(go_quad_value) (&qresult[i]) / xscale[i];
584
585 /* This should not fail since n >= 1. */
586 err = SUFFIX(go_range_average) (ys, m, &stat_->ybar);
587 g_assert (err == 0);
588
589 /* FIXME: we ought to have a devsq variant that does not
590 recompute the mean. */
591 if (affine)
592 err = SUFFIX(go_range_devsq) (ys, m, &stat_->ss_total);
593 else
594 err = SUFFIX(go_range_sumsq) (ys, m, &stat_->ss_total);
595 g_assert (err == 0);
596
597 stat_->xbar = g_new (DOUBLE, n);
598 for (i = 0; i < n; i++) {
599 int err = SUFFIX(go_range_average) (xssT[i], m, &stat_->xbar[i]);
600 g_assert (err == 0);
601 }
602
603 ;
604 stat_->ss_resid =
605 SUFFIX(calc_residual) (xssT, ys, m, n, result);
606
607 stat_->sqr_r = (stat_->ss_total == 0)
608 ? 1
609 : 1 - stat_->ss_resid / stat_->ss_total;
610 if (stat_->sqr_r < 0) {
611 /*
612 * This is an indication that something has gone wrong
613 * numerically.
614 */
615 regerr = GO_REG_near_singular_bad;
616 }
617
618 /* FIXME: we want to guard against division by zero. */
619 stat_->adj_sqr_r = 1 - stat_->ss_resid * (m - 1) /
620 (df_resid * stat_->ss_total);
621 if (df_resid == 0)
622 N2 = SUFFIX(go_quad_zero);
623 else {
624 QUAD d;
625 SUFFIX(go_quad_init) (&d, df_resid);
626 SUFFIX(go_quad_init) (&N2, stat_->ss_resid);
627 SUFFIX(go_quad_div) (&N2, &N2, &d);
628 }
629 stat_->var = SUFFIX(go_quad_value) (&N2);
630
631 stat_->se = g_new0 (DOUBLE, n);
632 for (k = 0; k < n; k++) {
633 QUAD p, N;
634
635 /* inv = e_k */
636 for (i = 0; i < n; i++)
637 SUFFIX(go_quad_init) (&inv[i], i == k ? 1 : 0);
638
639 /* Solve R^T inv = e_k */
640 if (SUFFIX(go_quad_matrix_fwd_solve) (R, inv, inv, TRUE)) {
641 regerr = GO_REG_singular;
642 break;
643 }
644
645 SUFFIX(go_quad_dot_product) (&N, inv, inv, n);
646 SUFFIX(go_quad_mul) (&p, &N2, &N);
647 SUFFIX(go_quad_sqrt) (&p, &p);
648 stat_->se[k] = SUFFIX(go_quad_value) (&p) / xscale[k];
649 }
650
651 stat_->t = g_new (DOUBLE, n);
652
653 for (i = 0; i < n; i++)
654 stat_->t[i] = (stat_->se[i] == 0)
655 ? SUFFIX(go_pinf)
656 : result[i] / stat_->se[i];
657
658 stat_->df_resid = df_resid;
659 stat_->df_reg = df_reg;
660 stat_->df_total = stat_->df_resid + df_reg;
661
662 stat_->F = (stat_->sqr_r == 1)
663 ? SUFFIX(go_pinf)
664 : ((stat_->sqr_r / df_reg) /
665 (1 - stat_->sqr_r) * stat_->df_resid);
666
667 stat_->ss_reg = stat_->ss_total - stat_->ss_resid;
668 stat_->se_y = SUFFIX(sqrt) (stat_->ss_total / m);
669 stat_->ms_reg = (df_reg == 0)
670 ? 0
671 : stat_->ss_reg / stat_->df_reg;
672 stat_->ms_resid = (df_resid == 0)
673 ? 0
674 : stat_->ss_resid / df_resid;
675
676 g_free (QTy);
677 g_free (qresult);
678 g_free (inv);
679 } else
680 regerr = GO_REG_invalid_data;
681
682 SUFFIX(go_quad_end) (state);
683
684 if (qr) SUFFIX(go_quad_qr_free) (qr);
685 g_free (xscale);
686 out:
687 if (!has_stat)
688 SUFFIX(go_regression_stat_destroy) (stat_);
689
690 return regerr;
691 }
692
693 /* ------------------------------------------------------------------------- */
694
695 typedef struct {
696 DOUBLE min_x;
697 DOUBLE max_x;
698 DOUBLE min_y;
699 DOUBLE max_y;
700 DOUBLE mean_y;
701 } SUFFIX(point_cloud_measure_type);
702
703 /* Takes the current 'sign' (res[0]) and 'c' (res[3]) from the calling
704 * function, transforms xs to ln(sign*(x-c)), performs a simple
705 * linear regression to find the best fitting 'a' (res[1]) and 'b'
706 * (res[2]) for ys and transformed xs, and computes the sum of squared
707 * residuals.
708 * Needs 'sign' (i.e. +1 or -1) and 'c' so adjusted that (sign*(x-c)) is
709 * positive for all xs. n must be > 0. These conditions are trusted to be
710 * checked by the calling functions.
711 * Is called often, so do not make it too slow.
712 */
713
714 static int
SUFFIX(transform_x_and_linear_regression_log_fitting)715 SUFFIX(transform_x_and_linear_regression_log_fitting)
716 (DOUBLE *xs,
717 DOUBLE *transf_xs,
718 const DOUBLE *ys, int n,
719 DOUBLE *res,
720 SUFFIX(point_cloud_measure_type)
721 *point_cloud)
722 {
723 int i;
724 int result = GO_REG_ok;
725 DOUBLE mean_transf_x, diff_x, resid_y;
726 DOUBLE sum1 = 0;
727 DOUBLE sum2 = 0;
728
729 /* log (always > 0) */
730 for (i=0; i<n; i++)
731 transf_xs[i] = SUFFIX(log) (res[0] * (xs[i] - res[3]));
732 SUFFIX(go_range_average) (transf_xs, n, &mean_transf_x);
733 for (i=0; i<n; i++) {
734 diff_x = transf_xs[i] - mean_transf_x;
735 sum1 += diff_x * (ys[i] - point_cloud->mean_y);
736 sum2 += diff_x * diff_x;
737 }
738 res[2] = sum1 / sum2;
739 res[1] = point_cloud->mean_y - (res[2] * mean_transf_x);
740 res[4] = 0;
741 for (i=0; i<n; i++) {
742 resid_y = res[1] + (res[2] * transf_xs[i]) - ys[i];
743 res[4] += resid_y * resid_y;
744 }
745 return result; /* not used for error checking for the sake of speed */
746 }
747
748 static int
SUFFIX(log_fitting)749 SUFFIX(log_fitting) (DOUBLE *xs, const DOUBLE *ys, int n,
750 DOUBLE *res, SUFFIX(point_cloud_measure_type) *point_cloud)
751 {
752 int result = GO_REG_ok;
753 gboolean sign_plus_ok = 1, sign_minus_ok = 1;
754 DOUBLE x_range, c_step, c_accuracy_int, c_offset, c_accuracy;
755 DOUBLE c_range, c_start, c_end, c_dist;
756 DOUBLE *temp_res;
757 DOUBLE *transf_xs;
758
759 temp_res = g_new (DOUBLE, 5);
760 x_range = (point_cloud->max_x) - (point_cloud->min_x);
761 /* Not needed here, but allocate it once for all subfunction calls */
762 transf_xs = g_new (DOUBLE, n);
763 /* Choose final accuracy of c with respect to range of xs.
764 * Make accuracy be a whole power of 10. */
765 c_accuracy = SUFFIX(log10) (x_range);
766 if (c_accuracy < 0)
767 if (SUFFIX(modf) (c_accuracy, &c_accuracy_int) != 0)
768 c_accuracy--;
769 SUFFIX(modf) (c_accuracy, &c_accuracy_int);
770 c_accuracy = c_accuracy_int;
771 c_accuracy = SUFFIX(pow) (10, c_accuracy);
772 c_accuracy *= GO_LOGFIT_C_ACCURACY;
773
774 /* Determine sign. Take a c which is ``much to small'' since the part
775 * of the curve cutting the point cloud is almost not bent.
776 * If making c still smaller does not make things still worse,
777 * assume that we have to change the direction of curve bending
778 * by changing sign.
779 */
780 c_step = x_range * GO_LOGFIT_C_STEP_FACTOR;
781 c_range = x_range * GO_LOGFIT_C_RANGE_FACTOR;
782 res[0] = 1; /* sign */
783 res[3] = point_cloud->min_x - c_range;
784 temp_res[0] = 1;
785 temp_res[3] = res[3] - c_step;
786 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
787 res, point_cloud);
788 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
789 temp_res, point_cloud);
790 if (temp_res[4] <= res[4])
791 sign_plus_ok = 0;
792 /* check again with new sign */
793 res[0] = -1; /* sign */
794 res[3] = point_cloud->max_x + c_range;
795 temp_res[0] = -1;
796 temp_res[3] = res[3] + c_step;
797 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
798 res, point_cloud);
799 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
800 temp_res, point_cloud);
801 if (temp_res[4] <= res[4])
802 sign_minus_ok = 0;
803 /* If not exactly one of plus or minus works, give up.
804 * This happens in point clouds which are very weakly bent.
805 */
806 if (sign_plus_ok && !sign_minus_ok)
807 res[0] = 1;
808 else if (sign_minus_ok && !sign_plus_ok)
809 res[0] = -1;
810 else {
811 result = GO_REG_invalid_data;
812 goto out;
813 }
814
815 /* Start of fitted c-range. Rounded to final accuracy of c. */
816 c_offset = (res[0] == 1) ? point_cloud->min_x : point_cloud->max_x;
817 c_offset = c_accuracy * ((res[0] == 1) ?
818 SUFFIX(floor) (c_offset / c_accuracy)
819 : SUFFIX(ceil) (c_offset /c_accuracy));
820
821 /* Now the adapting of c starts. Find a local minimum of sum
822 * of squared residuals. */
823
824 /* First, catch some unsuitably shaped point clouds. */
825 res[3] = c_offset - res[0] * c_accuracy;
826 temp_res[3] = c_offset - res[0] * 2 * c_accuracy;
827 temp_res[0] = res[0];
828 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
829 res, point_cloud);
830 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
831 temp_res, point_cloud);
832 if (temp_res[4] >= res[4]) {
833 result = GO_REG_invalid_data;
834 goto out;
835 }
836 /* After the above check, any minimum reached will be NOT at
837 * the start of c-range (c_offset - sign * c_accuracy) */
838 c_start = c_offset;
839 c_end = c_start - res[0] * c_range;
840 c_dist = res[0] * (c_start - c_end) / 2;
841 res[3] = c_end + res[0] * c_dist;
842 do {
843 c_dist /= 2;
844 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs,
845 ys, n, res,
846 point_cloud);
847 temp_res[3] = res[3] + res[0] * c_dist;
848 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs,
849 ys, n, temp_res,
850 point_cloud);
851 if (temp_res[4] <= res[4])
852 COPY_VECTOR (res, temp_res, 5);
853 else {
854 temp_res[3] = res[3] - res[0] * c_dist;
855 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs,
856 transf_xs,
857 ys, n,
858 temp_res,
859 point_cloud);
860 if (temp_res[4] <= res[4])
861 COPY_VECTOR (res, temp_res, 5);
862 }
863 } while (c_dist > c_accuracy);
864
865 res[3] = c_accuracy * SUFFIX(go_fake_round) (res[3] / c_accuracy);
866 SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
867 res, point_cloud);
868
869 if ((res[0] * (res[3] - c_end)) < (1.1 * c_accuracy)) {
870 /* Allowing for some inaccuracy, we are at the end of the
871 * range, so this is probably no local minimum.
872 * The start of the range has been checked above. */
873 result = GO_REG_invalid_data;
874 goto out;
875 }
876
877 out:
878 g_free (transf_xs);
879 g_free (temp_res);
880 return result;
881 }
882
883 /**
884 * go_linear_regression:
885 * @xss: x-vectors (i.e. independent data)
886 * @dim: number of x-vectors.
887 * @ys: y-vector. (Dependent data.)
888 * @n: number of data points.
889 * @affine: if true, a non-zero constant is allowed.
890 * @res: (out): place for constant[0] and slope1[1], slope2[2],... There will be dim+1 results.
891 * @stat_: (out): non-NULL storage for additional results.
892 *
893 * Performs multi-dimensional linear regressions on the input points.
894 * Fits to "y = b + a1 * x1 + ... ad * xd".
895 *
896 * Returns: #GORegressionResult as above.
897 **/
898 GORegressionResult
SUFFIX(go_linear_regression)899 SUFFIX(go_linear_regression) (MATRIX xss, int dim,
900 const DOUBLE *ys, int n,
901 gboolean affine,
902 DOUBLE *res,
903 SUFFIX(go_regression_stat_t) *stat_)
904 {
905 GORegressionResult result;
906 DOUBLE threshold = DEFAULT_THRESHOLD;
907
908 g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
909 g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
910
911 if (affine) {
912 int i;
913 DOUBLE **xss2 = g_new (DOUBLE *, dim + 1);
914
915 xss2[0] = g_new (DOUBLE, n);
916 for (i = 0; i < n; i++)
917 xss2[0][i] = 1;
918 memcpy (xss2 + 1, xss, dim * sizeof (DOUBLE *));
919
920 result = SUFFIX(general_linear_regression)
921 (xss2, dim + 1, ys, n,
922 res, stat_, affine, threshold);
923 g_free (xss2[0]);
924 g_free (xss2);
925 } else {
926 res[0] = 0;
927 result = SUFFIX(general_linear_regression)
928 (xss, dim, ys, n,
929 res + 1, stat_, affine, threshold);
930 }
931 return result;
932 }
933
934 /**
935 * go_exponential_regression:
936 * @xss: x-vectors (i.e. independent data)
937 * @dim: number of x-vectors
938 * @ys: y-vector (dependent data)
939 * @n: number of data points
940 * @affine: if %TRUE, a non-one multiplier is allowed
941 * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
942 * @stat_: non-NULL storage for additional results.
943 *
944 * Performs one-dimensional linear regressions on the input points.
945 * Fits to "y = b * m1^x1 * ... * md^xd " or equivalently to
946 * "log y = log b + x1 * log m1 + ... + xd * log md".
947 *
948 * Returns: #GORegressionResult as above.
949 **/
950 GORegressionResult
SUFFIX(go_exponential_regression)951 SUFFIX(go_exponential_regression) (MATRIX xss, int dim,
952 const DOUBLE *ys, int n,
953 gboolean affine,
954 DOUBLE *res,
955 SUFFIX(go_regression_stat_t) *stat_)
956 {
957 GORegressionResult result = SUFFIX(go_exponential_regression_as_log) (xss, dim, ys, n, affine, res, stat_);
958 int i;
959
960 if (result == GO_REG_ok || result == GO_REG_near_singular_good)
961 for (i = 0; i < dim + 1; i++)
962 res[i] = SUFFIX(exp) (res[i]);
963
964 return result;
965 }
966
967 /**
968 * go_exponential_regression_as_log:
969 * @xss: x-vectors (i.e. independent data)
970 * @dim: number of x-vectors
971 * @ys: y-vector (dependent data)
972 * @n: number of data points
973 * @affine: if %TRUE, a non-one multiplier is allowed
974 * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
975 * @stat_: non-NULL storage for additional results.
976 *
977 * Performs one-dimensional linear regressions on the input points as
978 * go_exponential_regression, but returns the logarithm of the coefficients instead
979 * or the coefficients themselves.
980 * Fits to "y = b * exp (m1*x1) * ... * exp (md*xd) " or equivalently to
981 * "ln y = ln b + x1 * m1 + ... + xd * md".
982 *
983 * Returns: #GORegressionResult as above.
984 **/
985 GORegressionResult
SUFFIX(go_exponential_regression_as_log)986 SUFFIX(go_exponential_regression_as_log) (MATRIX xss, int dim,
987 const DOUBLE *ys, int n,
988 gboolean affine,
989 DOUBLE *res,
990 SUFFIX(go_regression_stat_t) *stat_)
991 {
992 DOUBLE *log_ys;
993 GORegressionResult result;
994 int i;
995 DOUBLE threshold = DEFAULT_THRESHOLD;
996
997 g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
998 g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
999
1000 log_ys = g_new (DOUBLE, n);
1001 for (i = 0; i < n; i++)
1002 if (ys[i] > 0)
1003 log_ys[i] = SUFFIX(log) (ys[i]);
1004 else {
1005 result = GO_REG_invalid_data;
1006 goto out;
1007 }
1008
1009 if (affine) {
1010 int i;
1011 DOUBLE **xss2 = g_new (DOUBLE *, dim + 1);
1012
1013 xss2[0] = g_new (DOUBLE, n);
1014 for (i = 0; i < n; i++)
1015 xss2[0][i] = 1;
1016 memcpy (xss2 + 1, xss, dim * sizeof (DOUBLE *));
1017
1018 result = SUFFIX(general_linear_regression)
1019 (xss2, dim + 1, log_ys,
1020 n, res, stat_, affine, threshold);
1021 g_free (xss2[0]);
1022 g_free (xss2);
1023 } else {
1024 res[0] = 0;
1025 result = SUFFIX(general_linear_regression)
1026 (xss, dim, log_ys, n,
1027 res + 1, stat_, affine, threshold);
1028 }
1029
1030 out:
1031 g_free (log_ys);
1032 return result;
1033 }
1034
1035 /**
1036 * go_power_regression:
1037 * @xss: x-vectors (i.e. independent data)
1038 * @dim: number of x-vectors
1039 * @ys: y-vector (dependent data)
1040 * @n: number of data points
1041 * @affine: if %TRUE, a non-one multiplier is allowed
1042 * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
1043 * @stat_: non-NULL storage for additional results.
1044 *
1045 * Performs one-dimensional linear regressions on the input points.
1046 * Fits to "y = b * x1^m1 * ... * xd^md " or equivalently to
1047 * "log y = log b + m1 * log x1 + ... + md * log xd".
1048 *
1049 * Returns: #GORegressionResult as above.
1050 **/
1051 GORegressionResult
SUFFIX(go_power_regression)1052 SUFFIX(go_power_regression) (MATRIX xss, int dim,
1053 const DOUBLE *ys, int n,
1054 gboolean affine,
1055 DOUBLE *res,
1056 SUFFIX(go_regression_stat_t) *stat_)
1057 {
1058 DOUBLE *log_ys, **log_xss = NULL;
1059 GORegressionResult result;
1060 int i, j;
1061 DOUBLE threshold = DEFAULT_THRESHOLD;
1062
1063 g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
1064 g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
1065
1066 log_ys = g_new (DOUBLE, n);
1067 for (i = 0; i < n; i++)
1068 if (ys[i] > 0)
1069 log_ys[i] = SUFFIX(log) (ys[i]);
1070 else {
1071 result = GO_REG_invalid_data;
1072 goto out;
1073 }
1074
1075 ALLOC_MATRIX (log_xss, dim, n);
1076 for (i = 0; i < dim; i++)
1077 for (j = 0; j < n; j++)
1078 if (xss[i][j] > 0)
1079 log_xss[i][j] = SUFFIX(log) (xss[i][j]);
1080 else {
1081 result = GO_REG_invalid_data;
1082 goto out;
1083 }
1084
1085 if (affine) {
1086 int i;
1087 DOUBLE **log_xss2 = g_new (DOUBLE *, dim + 1);
1088
1089 log_xss2[0] = g_new (DOUBLE, n);
1090 for (i = 0; i < n; i++)
1091 log_xss2[0][i] = 1;
1092 memcpy (log_xss2 + 1, log_xss, dim * sizeof (DOUBLE *));
1093
1094 result = SUFFIX(general_linear_regression)
1095 (log_xss2, dim + 1, log_ys,
1096 n, res, stat_, affine, threshold);
1097 g_free (log_xss2[0]);
1098 g_free (log_xss2);
1099 } else {
1100 res[0] = 0;
1101 result = SUFFIX(general_linear_regression)
1102 (log_xss, dim, log_ys, n,
1103 res + 1, stat_, affine, threshold);
1104 }
1105
1106 out:
1107 if (log_xss != NULL)
1108 FREE_MATRIX (log_xss, dim, n);
1109 g_free (log_ys);
1110 return result;
1111 }
1112
1113
1114 /**
1115 * go_logarithmic_regression:
1116 * @xss: x-vectors (i.e. independent data)
1117 * @dim: number of x-vectors
1118 * @ys: y-vector (dependent data)
1119 * @n: number of data points
1120 * @affine: if %TRUE, a non-zero constant is allowed
1121 * @res: output place for constant[0] and factor1[1], factor2[2],... There will be dim+1 results.
1122 * @stat_: non-NULL storage for additional results.
1123 *
1124 * This is almost a copy of linear_regression and produces multi-dimensional
1125 * linear regressions on the input points after transforming xss to ln(xss).
1126 * Fits to "y = b + a1 * z1 + ... ad * zd" with "zi = ln (xi)".
1127 * Problems with arrays in the calling function: see comment to
1128 * gnumeric_linest, which is also valid for gnumeric_logreg.
1129 *
1130 * (Errors: less than two points, all points on a vertical line, non-positive x data.)
1131 *
1132 * Returns: #GORegressionResult as above.
1133 **/
1134 GORegressionResult
SUFFIX(go_logarithmic_regression)1135 SUFFIX(go_logarithmic_regression) (MATRIX xss, int dim,
1136 const DOUBLE *ys, int n,
1137 gboolean affine,
1138 DOUBLE *res,
1139 SUFFIX(go_regression_stat_t) *stat_)
1140 {
1141 MATRIX log_xss;
1142 GORegressionResult result;
1143 int i, j;
1144 DOUBLE threshold = DEFAULT_THRESHOLD;
1145
1146 g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
1147 g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
1148
1149 ALLOC_MATRIX (log_xss, dim, n);
1150 for (i = 0; i < dim; i++)
1151 for (j = 0; j < n; j++)
1152 if (xss[i][j] > 0)
1153 log_xss[i][j] = SUFFIX(log) (xss[i][j]);
1154 else {
1155 result = GO_REG_invalid_data;
1156 goto out;
1157 }
1158
1159
1160 if (affine) {
1161 int i;
1162 DOUBLE **log_xss2 = g_new (DOUBLE *, dim + 1);
1163
1164 log_xss2[0] = g_new (DOUBLE, n);
1165 for (i = 0; i < n; i++)
1166 log_xss2[0][i] = 1;
1167 memcpy (log_xss2 + 1, log_xss, dim * sizeof (DOUBLE *));
1168
1169 result = SUFFIX(general_linear_regression)
1170 (log_xss2, dim + 1, ys, n,
1171 res, stat_, affine, threshold);
1172 g_free (log_xss2[0]);
1173 g_free (log_xss2);
1174 } else {
1175 res[0] = 0;
1176 result = SUFFIX(general_linear_regression)
1177 (log_xss, dim, ys, n,
1178 res + 1, stat_, affine, threshold);
1179 }
1180
1181 out:
1182 FREE_MATRIX (log_xss, dim, n);
1183 return result;
1184 }
1185
1186 /**
1187 * go_logarithmic_fit:
1188 * @xs: x-vector (i.e. independent data)
1189 * @ys: y-vector (dependent data)
1190 * @n: number of data points
1191 * @res: output place for sign[0], a[1], b[2], c[3], and sum of squared residuals[4].
1192 *
1193 * Performs a two-dimensional non-linear fitting on the input points.
1194 * Fits to "y = a + b * ln (sign * (x - c))", with sign in {-1, +1}.
1195 * The graph is a logarithmic curve moved horizontally by c and possibly
1196 * mirrored across the y-axis (if sign = -1).
1197 *
1198 * Fits c (and sign) by iterative trials, but seems to be fast enough even
1199 * for automatic recomputation.
1200 *
1201 * Adapts c until a local minimum of squared residuals is reached. For each
1202 * new c tried out the corresponding a and b are calculated by linear
1203 * regression. If no local minimum is found, an error is returned. If there
1204 * is more than one local minimum, the one found is not necessarily the
1205 * smallest (i.e., there might be cases in which the returned fit is not the
1206 * best possible). If the shape of the point cloud is to different from
1207 * ``logarithmic'', either sign can not be determined (error returned) or no
1208 * local minimum will be found.
1209 *
1210 * (Requires: at least 3 different x values, at least 3 different y values.)
1211 *
1212 * Returns: #GORegressionResult as above.
1213 */
1214
1215 GORegressionResult
SUFFIX(go_logarithmic_fit)1216 SUFFIX(go_logarithmic_fit) (DOUBLE *xs, const DOUBLE *ys, int n, DOUBLE *res)
1217 {
1218 SUFFIX(point_cloud_measure_type) point_cloud_measures;
1219 int i, result;
1220 gboolean more_2_y = 0, more_2_x = 0;
1221
1222 /* Store useful measures for using them here and in subfunctions.
1223 * The checking of n is paranoid -- the calling function should
1224 * have cared for that. */
1225 g_return_val_if_fail (n > 2, GO_REG_invalid_dimensions);
1226 result = SUFFIX(go_range_min) (xs, n, &(point_cloud_measures.min_x));
1227 result = SUFFIX(go_range_max) (xs, n, &(point_cloud_measures.max_x));
1228 result = SUFFIX(go_range_min) (ys, n, &(point_cloud_measures.min_y));
1229 result = SUFFIX(go_range_max) (ys, n, &(point_cloud_measures.max_y));
1230 result = SUFFIX(go_range_average) (ys, n, &(point_cloud_measures.mean_y));
1231 /* Checking of error conditions. */
1232 /* less than 2 different ys or less than 2 different xs */
1233 g_return_val_if_fail (((point_cloud_measures.min_y !=
1234 point_cloud_measures.max_y) &&
1235 (point_cloud_measures.min_x !=
1236 point_cloud_measures.max_x)),
1237 GO_REG_invalid_data);
1238 /* less than 3 different ys */
1239 for (i=0; i<n; i++) {
1240 if ((ys[i] != point_cloud_measures.min_y) &&
1241 (ys[i] != point_cloud_measures.max_y)) {
1242 more_2_y = 1;
1243 break;
1244 }
1245 }
1246 g_return_val_if_fail (more_2_y, GO_REG_invalid_data);
1247 /* less than 3 different xs */
1248 for (i=0; i<n; i++) {
1249 if ((xs[i] != point_cloud_measures.min_x) &&
1250 (xs[i] != point_cloud_measures.max_x)) {
1251 more_2_x = 1;
1252 break;
1253 }
1254 }
1255 g_return_val_if_fail (more_2_x, GO_REG_invalid_data);
1256
1257 /* no errors */
1258 result = SUFFIX(log_fitting) (xs, ys, n, res, &point_cloud_measures);
1259 return result;
1260 }
1261
1262 /* ------------------------------------------------------------------------- */
1263
SUFFIX(go_regression_stat_t)1264 SUFFIX(go_regression_stat_t) *
1265 SUFFIX(go_regression_stat_new) (void)
1266 {
1267 SUFFIX(go_regression_stat_t) * stat_ =
1268 g_new0 (SUFFIX(go_regression_stat_t), 1);
1269
1270 stat_->se = NULL;
1271 stat_->t = NULL;
1272 stat_->xbar = NULL;
1273 stat_->ref_count = 1;
1274
1275 return stat_;
1276 }
1277
1278 /* ------------------------------------------------------------------------- */
1279
1280 void
SUFFIX(go_regression_stat_destroy)1281 SUFFIX(go_regression_stat_destroy) (SUFFIX(go_regression_stat_t) *stat_)
1282 {
1283 if (!stat_)
1284 return;
1285
1286 g_return_if_fail (stat_->ref_count > 0);
1287
1288 stat_->ref_count--;
1289 if (stat_->ref_count > 0)
1290 return;
1291
1292 g_free (stat_->se);
1293 g_free (stat_->t);
1294 g_free (stat_->xbar);
1295 g_free (stat_);
1296 }
1297
1298 /* ------------------------------------------------------------------------- */
1299
1300 #ifdef DEFINE_COMMON
1301 #define DELTA 0.01
1302 /* FIXME: I pulled this number out of my hat.
1303 * I need some testing to pick a sensible value.
1304 */
1305 #define MAX_STEPS 200
1306 #endif
1307
1308 /*
1309 * SYNOPSIS:
1310 * result = derivative( f, &df, x, par, i)
1311 *
1312 * Approximates the partial derivative of a given function, at (x;params)
1313 * with respect to the parameter indicated by ith parameter. The resulst
1314 * is stored in 'df'.
1315 *
1316 * See the header file for more information.
1317 */
1318 static GORegressionResult
SUFFIX(derivative)1319 SUFFIX(derivative) (SUFFIX(GORegressionFunction) f,
1320 DOUBLE *df,
1321 DOUBLE *x, /* Only one point, not the whole data set. */
1322 DOUBLE *par,
1323 int index)
1324 {
1325 DOUBLE y1, y2;
1326 GORegressionResult result;
1327 DOUBLE par_save = par[index];
1328
1329 par[index] = par_save - DELTA;
1330 result = (*f) (x, par, &y1);
1331 if (result != GO_REG_ok) {
1332 par[index] = par_save;
1333 return result;
1334 }
1335
1336 par[index] = par_save + DELTA;
1337 result = (*f) (x, par, &y2);
1338 if (result != GO_REG_ok) {
1339 par[index] = par_save;
1340 return result;
1341 }
1342
1343 #ifdef DEBUG
1344 g_printerr ("y1 = %lf\n", y1);
1345 g_printerr ("y2 = %lf\n", y2);
1346 g_printerr ("DELTA = %lf\n",DELTA);
1347 #endif
1348
1349 *df = (y2 - y1) / (2 * DELTA);
1350 par[index] = par_save;
1351 return GO_REG_ok;
1352 }
1353
1354 /*
1355 * SYNOPSIS:
1356 * result = chi_squared (f, xvals, par, yvals, sigmas, x_dim, &chisq)
1357 *
1358 * / y - f(x ; par) \ 2
1359 * 2 | i i |
1360 * Chi == Sum ( | ------------------ | )
1361 * \ sigma /
1362 * i
1363 *
1364 * sigmas -> Measurement errors in the dataset (along the y-axis).
1365 * NULL means "no errors available", so they are all set to 1.
1366 *
1367 * x_dim -> Number of data points.
1368 *
1369 * This value is not very meaningful without the sigmas. However, it is
1370 * still useful for the fit.
1371 */
1372 static GORegressionResult
SUFFIX(chi_squared)1373 SUFFIX(chi_squared) (SUFFIX(GORegressionFunction) f,
1374 CONSTMATRIX xvals, /* The entire data set. */
1375 DOUBLE *par,
1376 DOUBLE *yvals, /* Ditto. */
1377 DOUBLE *sigmas, /* Ditto. */
1378 int x_dim, /* Number of data points. */
1379 DOUBLE *chisq) /* Chi Squared */
1380 {
1381 int i;
1382 GORegressionResult result;
1383 DOUBLE tmp, y;
1384 *chisq = 0;
1385
1386 for (i = 0; i < x_dim; i++) {
1387 result = f (xvals[i], par, &y);
1388 if (result != GO_REG_ok)
1389 return result;
1390
1391 tmp = (yvals[i] - y ) / (sigmas ? sigmas[i] : 1);
1392
1393 *chisq += tmp * tmp;
1394 }
1395
1396 return GO_REG_ok;
1397 }
1398
1399
1400 /*
1401 * SYNOPSIS:
1402 * result = chi_derivative (f, &dchi, xvals, par, i, yvals,
1403 * sigmas, x_dim)
1404 *
1405 * This is a simple adaptation of the derivative() function specific to
1406 * the Chi Squared.
1407 */
1408 static GORegressionResult
SUFFIX(chi_derivative)1409 SUFFIX(chi_derivative) (SUFFIX(GORegressionFunction) f,
1410 DOUBLE *dchi,
1411 MATRIX xvals, /* The entire data set. */
1412 DOUBLE *par,
1413 int index,
1414 DOUBLE *yvals, /* Ditto. */
1415 DOUBLE *sigmas, /* Ditto. */
1416 int x_dim)
1417 {
1418 DOUBLE y1, y2;
1419 GORegressionResult result;
1420 DOUBLE par_save = par[index];
1421
1422 par[index] = par_save - DELTA;
1423 result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &y1);
1424 if (result != GO_REG_ok) {
1425 par[index] = par_save;
1426 return result;
1427 }
1428
1429 par[index] = par_save + DELTA;
1430 result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &y2);
1431 if (result != GO_REG_ok) {
1432 par[index] = par_save;
1433 return result;
1434 }
1435
1436 #ifdef DEBUG
1437 g_printerr ("y1 = %lf\n", y1);
1438 g_printerr ("y2 = %lf\n", y2);
1439 g_printerr ("DELTA = %lf\n", DELTA);
1440 #endif
1441
1442 *dchi = (y2 - y1) / (2 * DELTA);
1443 par[index] = par_save;
1444 return GO_REG_ok;
1445 }
1446
1447 /*
1448 * SYNOPSIS:
1449 * result = coefficient_matrix (A, f, xvals, par, yvals, sigmas,
1450 * x_dim, p_dim, r)
1451 *
1452 * RETURNS:
1453 * The coefficient matrix of the LM method.
1454 *
1455 * DETAIS:
1456 * The coefficient matrix is defined by
1457 *
1458 * N 1 df df
1459 * A = Sum ( ------- -- -- ( i == j ? 1 + r : 1 ) a)
1460 * ij k=1 sigma^2 dp dp
1461 * k i j
1462 *
1463 * A -> p_dim X p_dim coefficient matrix. MUST ALREADY BE ALLOCATED.
1464 *
1465 * sigmas -> Measurement errors in the dataset (along the y-axis).
1466 * NULL means "no errors available", so they are all set to 1.
1467 *
1468 * x_dim -> Number of data points.
1469 *
1470 * p_dim -> Number of parameters.
1471 *
1472 * r -> Positive constant. Its value is altered during the LM procedure.
1473 */
1474
1475 static GORegressionResult
SUFFIX(coefficient_matrix)1476 SUFFIX(coefficient_matrix) (MATRIX A, /* Output matrix. */
1477 SUFFIX(GORegressionFunction) f,
1478 MATRIX xvals, /* The entire data set. */
1479 DOUBLE *par,
1480 DOUBLE *yvals, /* Ditto. */
1481 DOUBLE *sigmas, /* Ditto. */
1482 int x_dim, /* Number of data points. */
1483 int p_dim, /* Number of parameters. */
1484 DOUBLE r)
1485 {
1486 int i, j, k;
1487 GORegressionResult result;
1488 DOUBLE df_i, df_j;
1489 DOUBLE sum, sigma;
1490
1491 /* Notice that the matrix is symmetric. */
1492 for (i = 0; i < p_dim; i++) {
1493 for (j = 0; j <= i; j++) {
1494 sum = 0;
1495 for (k = 0; k < x_dim; k++) {
1496 result = SUFFIX(derivative) (f, &df_i, xvals[k],
1497 par, i);
1498 if (result != GO_REG_ok)
1499 return result;
1500
1501 result = SUFFIX(derivative) (f, &df_j, xvals[k],
1502 par, j);
1503 if (result != GO_REG_ok)
1504 return result;
1505
1506 sigma = (sigmas ? sigmas[k] : 1);
1507
1508 sum += (df_i * df_j) / (sigma * sigma) *
1509 (i == j ? 1 + r : 1) ;
1510 }
1511 A[i][j] = A[j][i] = sum;
1512 }
1513 }
1514
1515 return GO_REG_ok;
1516 }
1517
1518
1519 /*
1520 * SYNOPSIS:
1521 * result = parameter_errors (f, xvals, par, yvals, sigmas,
1522 * x_dim, p_dim, errors)
1523 *
1524 * Returns the errors associated with the parameters.
1525 * If an error is infinite, it is set to -1.
1526 *
1527 * sigmas -> Measurement errors in the dataset (along the y-axis).
1528 * NULL means "no errors available", so they are all set to 1.
1529 *
1530 * x_dim -> Number of data points.
1531 *
1532 * p_dim -> Number of parameters.
1533 *
1534 * errors -> MUST ALREADY BE ALLOCATED.
1535 */
1536
1537 /* FIXME: I am not happy with the behaviour with infinite errors. */
1538 static GORegressionResult
SUFFIX(parameter_errors)1539 SUFFIX(parameter_errors) (SUFFIX(GORegressionFunction) f,
1540 MATRIX xvals, /* The entire data set. */
1541 DOUBLE *par,
1542 DOUBLE *yvals, /* Ditto. */
1543 DOUBLE *sigmas, /* Ditto. */
1544 int x_dim, /* Number of data points. */
1545 int p_dim, /* Number of parameters. */
1546 DOUBLE *errors)
1547 {
1548 GORegressionResult result;
1549 MATRIX A;
1550 int i;
1551
1552 ALLOC_MATRIX (A, p_dim, p_dim);
1553
1554 result = SUFFIX(coefficient_matrix) (A, f, xvals, par, yvals, sigmas,
1555 x_dim, p_dim, 0);
1556 if (result == GO_REG_ok) {
1557 for (i = 0; i < p_dim; i++)
1558 /* FIXME: these were "[i][j]" which makes no sense. */
1559 errors[i] = (A[i][i] != 0
1560 ? 1 / SUFFIX(sqrt) (A[i][i])
1561 : -1);
1562 }
1563
1564 FREE_MATRIX (A, p_dim, p_dim);
1565 return result;
1566 }
1567
1568 /**
1569 * go_non_linear_regression:
1570 * @f: (scope call): the model function
1571 * @xvals: independent values.
1572 * @par: model parameters.
1573 * @yvals: dependent values.
1574 * @sigmas: stahdard deviations for the dependent values.
1575 * @x_dim: Number of data points.
1576 * @p_dim: Number of parameters.
1577 * @chi: Chi Squared of the final result. This value is not very
1578 * meaningful without the sigmas.
1579 * @errors: MUST ALREADY BE ALLOCATED. These are the approximated standard
1580 * deviation for each parameter.
1581 *
1582 * SYNOPSIS:
1583 * result = non_linear_regression (f, xvals, par, yvals, sigmas,
1584 * x_dim, p_dim, &chi, errors)
1585 * Non linear regression.
1586 * Returns: the results of the non-linear regression from the given initial
1587 * values.
1588 * The resulting parameters are placed back into @par.
1589 **/
1590 /**
1591 * go_non_linear_regressionl:
1592 * @f: (scope call): the model function
1593 * @xvals: independent values.
1594 * @par: model parameters.
1595 * @yvals: dependent values.
1596 * @sigmas: stahdard deviations for the dependent values.
1597 * @x_dim: Number of data points.
1598 * @p_dim: Number of parameters.
1599 * @chi: Chi Squared of the final result. This value is not very
1600 * meaningful without the sigmas.
1601 * @errors: MUST ALREADY BE ALLOCATED. These are the approximated standard
1602 * deviation for each parameter.
1603 *
1604 * SYNOPSIS:
1605 * result = non_linear_regression (f, xvals, par, yvals, sigmas,
1606 * x_dim, p_dim, &chi, errors)
1607 * Non linear regression.
1608 * Returns: the results of the non-linear regression from the given initial
1609 * values.
1610 * The resulting parameters are placed back into @par.
1611 **/
1612 GORegressionResult
SUFFIX(go_non_linear_regression)1613 SUFFIX(go_non_linear_regression) (SUFFIX(GORegressionFunction) f,
1614 MATRIX xvals, /* The entire data set. */
1615 DOUBLE *par,
1616 DOUBLE *yvals, /* Ditto. */
1617 DOUBLE *sigmas, /* Ditto. */
1618 int x_dim, /* Number of data points. */
1619 int p_dim, /* Number of parameters. */
1620 DOUBLE *chi,
1621 DOUBLE *errors)
1622 {
1623 DOUBLE r = 0.001; /* Pick a conservative initial value. */
1624 DOUBLE *b, **A;
1625 DOUBLE *dpar;
1626 DOUBLE *tmp_par;
1627 DOUBLE chi_pre, chi_pos, dchi;
1628 GORegressionResult result;
1629 int i, count;
1630
1631 result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &chi_pre);
1632 if (result != GO_REG_ok)
1633 return result;
1634
1635 ALLOC_MATRIX (A, p_dim, p_dim);
1636 dpar = g_new (DOUBLE, p_dim);
1637 tmp_par = g_new (DOUBLE, p_dim);
1638 b = g_new (DOUBLE, p_dim);
1639 #ifdef DEBUG
1640 g_printerr ("Chi Squared : %lf", chi_pre);
1641 #endif
1642
1643 for (count = 0; count < MAX_STEPS; count++) {
1644 for (i = 0; i < p_dim; i++) {
1645 /*
1646 * d Chi
1647 * b == -----
1648 * k d p
1649 * k
1650 */
1651 result = SUFFIX(chi_derivative) (f, &dchi, xvals, par, i,
1652 yvals, sigmas, x_dim);
1653 if (result != GO_REG_ok)
1654 goto out;
1655
1656 b[i] = - dchi;
1657 }
1658
1659 result = SUFFIX(coefficient_matrix) (A, f, xvals, par, yvals,
1660 sigmas, x_dim, p_dim, r);
1661 if (result != GO_REG_ok)
1662 goto out;
1663
1664 result = SUFFIX(go_linear_solve) (A, b, p_dim, dpar);
1665 if (result != GO_REG_ok)
1666 goto out;
1667
1668 for(i = 0; i < p_dim; i++)
1669 tmp_par[i] = par[i] + dpar[i];
1670
1671 result = SUFFIX(chi_squared) (f, xvals, tmp_par, yvals, sigmas,
1672 x_dim, &chi_pos);
1673 if (result != GO_REG_ok)
1674 goto out;
1675
1676 #ifdef DEBUG
1677 g_printerr ("Chi Squared : %lf", chi_pre);
1678 g_printerr ("Chi Squared : %lf", chi_pos);
1679 g_printerr ("r : %lf", r);
1680 #endif
1681
1682 if (chi_pos <= chi_pre + DELTA / 2) {
1683 /* There is improvement */
1684 r /= 10;
1685 for(i = 0; i < p_dim; i++)
1686 par[i] = tmp_par[i];
1687
1688 if (SUFFIX(fabs) (chi_pos - chi_pre) < DELTA)
1689 break;
1690
1691 chi_pre = chi_pos;
1692 } else {
1693 r *= 10;
1694 }
1695 }
1696
1697 result = SUFFIX(parameter_errors) (f, xvals, par, yvals, sigmas,
1698 x_dim, p_dim, errors);
1699 if (result != GO_REG_ok)
1700 goto out;
1701
1702 *chi = chi_pos;
1703
1704 out:
1705 FREE_MATRIX (A, p_dim, p_dim);
1706 g_free (dpar);
1707 g_free (tmp_par);
1708 g_free (b);
1709
1710 return result;
1711 }
1712
1713 /*
1714 * Compute the diagonal of A (AT A)^-1 AT
1715 *
1716 * A is full-rank m-times-n.
1717 * d is output for m-element diagonal.
1718 */
1719
1720 GORegressionResult
SUFFIX(go_linear_regression_leverage)1721 SUFFIX(go_linear_regression_leverage) (MATRIX A, DOUBLE *d, int m, int n)
1722 {
1723 SUFFIX(GOQuadMatrix) *qA;
1724 SUFFIX(GOQuadQR) *qr;
1725 void *state = SUFFIX(go_quad_start) ();
1726 GORegressionResult regres;
1727 DOUBLE threshold = DEFAULT_THRESHOLD;
1728 DOUBLE *xscale, *Aj;
1729 int i, j;
1730
1731 /*
1732 * Leverages are independing of column scaling.
1733 */
1734 xscale = g_new (DOUBLE, n);
1735 Aj = g_new (DOUBLE, m);
1736 for (j = 0; j < n; j++) {
1737 for (i = 0; i < m; i++)
1738 Aj[i] = A[i][j];
1739 xscale[j] = SUFFIX(calc_scale) (Aj, m);
1740 }
1741 g_free (Aj);
1742
1743 qA = SUFFIX(quad_matrix_from_matrix) (A, m, n, xscale);
1744 qr = SUFFIX(go_quad_qr_new) (qA);
1745 if (qr) {
1746 int k;
1747 QUAD *b = g_new (QUAD, n);
1748 const SUFFIX(GOQuadMatrix) *R = SUFFIX(go_quad_qr_r) (qr);
1749 DOUBLE emin, emax;
1750
1751 SUFFIX(go_quad_matrix_eigen_range) (R, &emin, &emax);
1752 regres = (emin > emax * threshold)
1753 ? GO_REG_ok
1754 : GO_REG_singular;
1755
1756 for (k = 0; k < m; k++) {
1757 int i;
1758 QUAD acc = SUFFIX(go_quad_zero);
1759
1760 /* b = AT e_k */
1761 for (i = 0; i < n; i++)
1762 b[i] = qA->data[k][i];
1763
1764 /* Solve R^T b = AT e_k */
1765 if (SUFFIX(go_quad_matrix_fwd_solve) (R, b, b, FALSE)) {
1766 regres = GO_REG_singular;
1767 break;
1768 }
1769
1770 /* Solve R newb = b */
1771 if (SUFFIX(go_quad_matrix_back_solve) (R, b, b, FALSE)) {
1772 regres = GO_REG_singular;
1773 break;
1774 }
1775
1776 /* acc = (Ab)_k */
1777 for (i = 0; i < n; i++) {
1778 QUAD p;
1779 SUFFIX(go_quad_mul) (&p, &qA->data[k][i], &b[i]);
1780 SUFFIX(go_quad_add) (&acc, &acc, &p);
1781 }
1782
1783 d[k] = SUFFIX(go_quad_value) (&acc);
1784 }
1785
1786 g_free (b);
1787 SUFFIX(go_quad_qr_free) (qr);
1788 } else
1789 regres = GO_REG_invalid_data;
1790
1791 SUFFIX(go_quad_matrix_free) (qA);
1792 g_free (xscale);
1793
1794 SUFFIX(go_quad_end) (state);
1795
1796 return regres;
1797 }
1798
1799 /*
1800 * Compute the pseudo-inverse of a matrix, see
1801 * http://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse
1802 *
1803 * Think of this as A+ = (AT A)^-1 AT, except that the pseudo-inverse
1804 * is defined even if AT*A isn't invertible.
1805 *
1806 * Properties:
1807 *
1808 * A A+ A = A
1809 * A+ A A+ = A+
1810 * (A A+)^T = A A+
1811 * (A+ A)^T = A+ A
1812 *
1813 * assuming threshold==0 and no rounding errors.
1814 */
1815
1816 void
SUFFIX(go_matrix_pseudo_inverse)1817 SUFFIX(go_matrix_pseudo_inverse) (CONSTMATRIX A, int m, int n,
1818 DOUBLE threshold,
1819 MATRIX B)
1820 {
1821 SUFFIX(GOQuadMatrix) *qA, *qZ;
1822 void *state;
1823
1824 state = SUFFIX(go_quad_start) ();
1825 qA = SUFFIX(quad_matrix_from_matrix) (A, m, n, NULL);
1826 qZ = SUFFIX(go_quad_matrix_pseudo_inverse) (qA, threshold);
1827 SUFFIX(go_quad_matrix_free) (qA);
1828 if (qZ) {
1829 SUFFIX(copy_quad_matrix_to_matrix) (B, qZ);
1830 SUFFIX(go_quad_matrix_free) (qZ);
1831 }
1832 SUFFIX(go_quad_end) (state);
1833 }
1834