1 /*
2  * regression.c:  Statistical regression functions.
3  *
4  * Authors:
5  *   Morten Welinder <terra@gnome.org>
6  *   Andrew Chatham <andrew.chatham@duke.edu>
7  *   Daniel Carrera <dcarrera@math.toronto.edu>
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) version 3.
13  *
14  * This library is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Library General Public License for more details.
18  *
19  * You should have received a copy of the GNU Library General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22  * USA.
23  */
24 
25 #include <goffice/goffice-config.h>
26 #include "go-regression.h"
27 #include "go-rangefunc.h"
28 #include "go-math.h"
29 
30 #include <math.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 
35 /**
36  * GORegressionResult:
37  * @GO_REG_ok: success.
38  * @GO_REG_invalid_dimensions: invalid dimensions.
39  * @GO_REG_invalid_data: invalid data:
40  * @GO_REG_not_enough_data: not enough data.
41  * @GO_REG_near_singular_good: probably good result.
42  * @GO_REG_near_singular_bad: probably bad result.
43  * @GO_REG_singular: singularity found.
44  **/
45 
46 /**
47  * go_regression_stat_t:
48  * @se: SE for each parameter estimator.
49  * @t: t values for each parameter estimator.
50  * @sqr_r: squared R.
51  * @adj_sqr_r:
52  * @se_y: the Standard Error of Y.
53  * @F:
54  * @df_reg:
55  * @df_resid:
56  * @df_total:
57  * @ss_reg:
58  * @ss_resid:
59  * @ss_total:
60  * @ms_reg:
61  * @ms_resid:
62  * @ybar:
63  * @xbar:
64  * @var: the variance of the entire regression: sum(errors^2)/(n-xdim).
65  **/
66 
67 /**
68  * go_regression_stat_tl:
69  * @se: SE for each parameter estimator.
70  * @t: t values for each parameter estimator.
71  * @sqr_r: squared R.
72  * @adj_sqr_r:
73  * @se_y: the Standard Error of Y.
74  * @F:
75  * @df_reg:
76  * @df_resid:
77  * @df_total:
78  * @ss_reg:
79  * @ss_resid:
80  * @ss_total:
81  * @ms_reg:
82  * @ms_resid:
83  * @ybar:
84  * @xbar:
85  * @var: the variance of the entire regression: sum(errors^2)/(n-xdim).
86  **/
87 
88 /* ------------------------------------------------------------------------- */
89 /* Self-inclusion magic.  */
90 
91 #ifndef DOUBLE
92 
93 #define DEFAULT_THRESHOLD (256 * DOUBLE_EPS)
94 
95 
96 #define DEFINE_COMMON
97 #define DOUBLE double
98 #define DOUBLE_EPS DBL_EPSILON
99 #define SUFFIX(_n) _n
100 #define FORMAT_f "f"
101 #define FORMAT_g "g"
102 
103 #ifdef GOFFICE_WITH_LONG_DOUBLE
104 
105 #include "go-regression.c"
106 #undef DEFINE_COMMON
107 #undef DOUBLE
108 #undef DOUBLE_EPS
109 #undef SUFFIX
110 #undef FORMAT_f
111 #undef FORMAT_g
112 #ifdef HAVE_SUNMATH_H
113 #include <sunmath.h>
114 #endif
115 #define DOUBLE long double
116 #define DOUBLE_EPS LDBL_EPSILON
117 #define SUFFIX(_n) _n ## l
118 #define FORMAT_f "Lf"
119 #define FORMAT_g "Lg"
120 #else
121 /* It appears that gtk-doc is too dumb to handle this file.  Provide
122    a dummy type getter to make things work.  */
123 GType go_regression_statl_get_type (void);
go_regression_statl_get_type(void)124 GType go_regression_statl_get_type (void) { return G_TYPE_NONE; }
125 #endif
126 
127 #endif
128 
129 /* Boxed types code */
130 
SUFFIX(go_regression_stat_t)131 static SUFFIX(go_regression_stat_t) *
132 SUFFIX(go_regression_stat_ref) (SUFFIX(go_regression_stat_t)* state)
133 {
134 	state->ref_count++;
135 	return state;
136 }
137 
138 GType
139 #ifdef DEFINE_COMMON
go_regression_stat_get_type(void)140 go_regression_stat_get_type (void)
141 #else
142 go_regression_statl_get_type (void)
143 #endif
144 {
145 	static GType t = 0;
146 
147 	if (t == 0) {
148 		t = g_boxed_type_register_static (
149 #ifdef DEFINE_COMMON
150 		     "go_regression_stat_t",
151 #else
152 		     "go_regression_stat_tl",
153 #endif
154 			 (GBoxedCopyFunc)SUFFIX(go_regression_stat_ref),
155 			 (GBoxedFreeFunc)SUFFIX(go_regression_stat_destroy));
156 	}
157 	return t;
158 }
159 
160 /* ------------------------------------------------------------------------- */
161 
162 #ifdef DEFINE_COMMON
163 
164 #define MATRIX DOUBLE **
165 #define CONSTMATRIX DOUBLE *const *const
166 #define QUAD SUFFIX(GOQuad)
167 #define QMATRIX QUAD **
168 #define CONSTQMATRIX QUAD *const *const
169 
170 #undef DEBUG_REFINEMENT
171 
172 #define ALLOC_MATRIX2(var,dim1,dim2,TYPE)		\
173   do { int _i, _d1, _d2;				\
174        _d1 = (dim1);					\
175        _d2 = (dim2);					\
176        (var) = g_new (TYPE *, _d1);			\
177        for (_i = 0; _i < _d1; _i++)			\
178 	       (var)[_i] = g_new (TYPE, _d2);		\
179   } while (0)
180 #define ALLOC_MATRIX(var,dim1,dim2) ALLOC_MATRIX2(var,dim1,dim2,DOUBLE)
181 
182 #define FREE_MATRIX(var,dim1,dim2)			\
183   do { int _i, _d1;					\
184        _d1 = (dim1);					\
185        for (_i = 0; _i < _d1; _i++)			\
186 	       g_free ((var)[_i]);			\
187        g_free (var);					\
188   } while (0)
189 
190 #define COPY_MATRIX(dst,src,dim1,dim2)		\
191   do { int _i, _j, _d1, _d2;			\
192        _d1 = (dim1);				\
193        _d2 = (dim2);				\
194        for (_i = 0; _i < _d1; _i++)		\
195 	 for (_j = 0; _j < _d2; _j++)		\
196 	   (dst)[_i][_j] = (src)[_i][_j];	\
197   } while (0)
198 
199 #define ZERO_VECTOR(dst,dim)			\
200   do { int _i, _d;				\
201        _d = (dim);				\
202        for (_i = 0; _i < _d; _i++)		\
203 	       (dst)[_i] = 0;			\
204   } while (0)
205 
206 #define COPY_VECTOR(dst,src,dim)		\
207   do { int _i, _d;				\
208        _d = (dim);				\
209        for (_i = 0; _i < _d; _i++)		\
210 	       (dst)[_i] = (src)[_i];		\
211   } while (0)
212 
213 #define PRINT_MATRIX(var,dim1,dim2)				\
214   do {								\
215 	int _i, _j, _d1, _d2;					\
216 	_d1 = (dim1);						\
217 	_d2 = (dim2);						\
218 	for (_i = 0; _i < _d1; _i++) {				\
219 	  for (_j = 0; _j < _d2; _j++) {			\
220 		  g_printerr (" %12.8" FORMAT_g, (var)[_i][_j]); \
221 	  }							\
222 	  g_printerr ("\n");					\
223 	}							\
224   } while (0)
225 
226 #define PRINT_QMATRIX(var,dim1,dim2)				\
227   do {								\
228 	int _i, _j, _d1, _d2;					\
229 	_d1 = (dim1);						\
230 	_d2 = (dim2);						\
231 	for (_i = 0; _i < _d1; _i++) {				\
232 	  for (_j = 0; _j < _d2; _j++) {			\
233 		  g_printerr (" %12.8" FORMAT_g, SUFFIX(go_quad_value) (&(var)[_i][_j])); \
234 	  }							\
235 	  g_printerr ("\n");					\
236 	}							\
237   } while (0)
238 
239 
240 #endif
241 
242 /*
243  *       ---> j
244  *
245  *  |    ********
246  *  |    ********
247  *  |    ********        A[i][j]
248  *  v    ********
249  *       ********
250  *  i    ********
251  *       ********
252  *       ********
253  *
254  */
255 
SUFFIX(GOQuadMatrix)256 static SUFFIX(GOQuadMatrix)
257 *
258 SUFFIX(quad_matrix_from_matrix) (CONSTMATRIX A, int m, int n, DOUBLE *scale)
259 {
260 	int i, j;
261 	SUFFIX(GOQuadMatrix) *qA = SUFFIX(go_quad_matrix_new) (m, n);
262 
263 	for (i = 0; i < m; i++) {
264 		for (j = 0; j < n; j++) {
265 			DOUBLE x = scale ? A[i][j] / scale[j] : A[i][j];
266 			SUFFIX(go_quad_init) (&qA->data[i][j], x);
267 		}
268 	}
269 
270 	return qA;
271 }
272 
273 static void
SUFFIX(copy_quad_matrix_to_matrix)274 SUFFIX(copy_quad_matrix_to_matrix) (MATRIX A, const SUFFIX(GOQuadMatrix) *qA)
275 {
276 	int i, j;
277 
278 	for (i = 0; i < qA->m; i++)
279 		for (j = 0; j < qA->n; j++)
280 			A[i][j] = SUFFIX(go_quad_value) (&qA->data[i][j]);
281 
282 }
283 
284 /* ------------------------------------------------------------------------- */
285 
286 #if 0
287 static void
288 SUFFIX(print_Q) (CONSTQMATRIX V, int m, int n)
289 {
290 	QMATRIX Q;
291 	int i, j;
292 	QUAD *x;
293 
294 	ALLOC_MATRIX2 (Q, m, m, QUAD);
295 	x = g_new (QUAD, m);
296 
297 	for (j = 0; j < m; j++) {
298 		for (i = 0; i < m; i++)
299 			SUFFIX(go_quad_init)(&x[i], i == j ? 1 : 0);
300 
301 		SUFFIX(multiply_Qt)(x, V, m, n);
302 		for (i = 0; i < m; i++)
303 			Q[j][i] = x[i];
304 	}
305 
306 	PRINT_QMATRIX (Q, m, m);
307 
308 	FREE_MATRIX (Q, m, m);
309 	g_free (x);
310 }
311 #endif
312 
313 static DOUBLE
SUFFIX(calc_residual)314 SUFFIX(calc_residual) (CONSTMATRIX AT, const DOUBLE *b, int m, int n,
315 		       const DOUBLE *y)
316 {
317 	int i, j;
318 	QUAD N2 = SUFFIX(go_quad_zero);
319 
320 	for (i = 0; i < m; i++) {
321 		QUAD d;
322 
323 		SUFFIX(go_quad_init) (&d, b[i]);
324 
325 		for (j = 0; j < n; j++) {
326 			QUAD e;
327 			SUFFIX(go_quad_mul12) (&e, AT[j][i], y[j]);
328 			SUFFIX(go_quad_sub) (&d, &d, &e);
329 		}
330 
331 		SUFFIX(go_quad_mul) (&d, &d, &d);
332 		SUFFIX(go_quad_add) (&N2, &N2, &d);
333 	}
334 
335 	return SUFFIX(go_quad_value) (&N2);
336 }
337 
338 /* ------------------------------------------------------------------------- */
339 
340 /*
341  * Solve AX=B where A is n-times-n and B and X are both n-times-bn.
342  * X is stored back into B.
343  */
344 
345 GORegressionResult
SUFFIX(go_linear_solve_multiple)346 SUFFIX(go_linear_solve_multiple) (CONSTMATRIX A, MATRIX B, int n, int bn)
347 {
348 	void *state;
349 	GORegressionResult regres = GO_REG_ok;
350 	SUFFIX(GOQuadMatrix) *qA;
351 	const SUFFIX(GOQuadMatrix) *R;
352 	SUFFIX(GOQuadQR) *qr;
353 	QUAD *QTb, *x;
354 	int i, j;
355 
356 	if (n < 1 || bn < 1)
357 		return GO_REG_not_enough_data;
358 
359 	/* Special case.  */
360 	if (n == 1) {
361 		int i;
362 		DOUBLE d = A[0][0];
363 		if (d == 0)
364 			return GO_REG_singular;
365 
366 		for (i = 0; i < bn; i++)
367 			B[0][i] /= d;
368 		return GO_REG_ok;
369 	}
370 
371 	state = SUFFIX(go_quad_start) ();
372 
373 	qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
374 	qr = SUFFIX(go_quad_qr_new) (qA);
375 	if (!qr) {
376 		regres = GO_REG_invalid_data;
377 		goto done;
378 	}
379 	R = SUFFIX(go_quad_qr_r) (qr);
380 
381 	QTb = g_new (QUAD, n);
382 	x = g_new (QUAD, n);
383 
384 	for (j = 0; j < bn; j++) {
385 		/* Compute Q^T b.  */
386 		for (i = 0; i < n; i++)
387 			SUFFIX(go_quad_init)(&QTb[i], B[i][j]);
388 		SUFFIX(go_quad_qr_multiply_qt)(qr, QTb);
389 
390 		/* Solve R x = Q^T b  */
391 		if (SUFFIX(go_quad_matrix_back_solve) (R, x, QTb, FALSE))
392 			regres = GO_REG_singular;
393 
394 		/* Round for output.  */
395 		for (i = 0; i < n; i++)
396 			B[i][j] = SUFFIX(go_quad_value) (&x[i]);
397 	}
398 
399 	SUFFIX(go_quad_qr_free) (qr);
400 	g_free (x);
401 	g_free (QTb);
402 done:
403 	SUFFIX(go_quad_matrix_free) (qA);
404 
405 	SUFFIX(go_quad_end) (state);
406 
407 	return regres;
408 }
409 
410 GORegressionResult
SUFFIX(go_linear_solve)411 SUFFIX(go_linear_solve) (CONSTMATRIX A, const DOUBLE *b, int n, DOUBLE *res)
412 {
413 	MATRIX B;
414 	int i;
415 	GORegressionResult regres;
416 
417 	if (n < 1)
418 		return GO_REG_not_enough_data;
419 
420 	ALLOC_MATRIX (B, n, 1);
421 	for (i = 0; i < n; i++)
422 		B[i][0] = b[i];
423 
424 	regres = SUFFIX(go_linear_solve_multiple) (A, B, n, 1);
425 
426 	for (i = 0; i < n; i++)
427 		res[i] = B[i][0];
428 
429 	FREE_MATRIX (B, n, 1);
430 
431 	return regres;
432 }
433 
434 
435 gboolean
SUFFIX(go_matrix_invert)436 SUFFIX(go_matrix_invert) (MATRIX A, int n)
437 {
438 	SUFFIX(GOQuadMatrix) *qA, *qZ;
439 	DOUBLE threshold = DEFAULT_THRESHOLD;
440 	void *state;
441 	gboolean ok;
442 
443 	state = SUFFIX(go_quad_start) ();
444 	qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
445 	qZ = SUFFIX(go_quad_matrix_inverse) (qA, threshold);
446 	ok = (qZ != NULL);
447 	SUFFIX(go_quad_matrix_free) (qA);
448 	if (qZ) {
449 		SUFFIX(copy_quad_matrix_to_matrix) (A, qZ);
450 		SUFFIX(go_quad_matrix_free) (qZ);
451 	}
452 	SUFFIX(go_quad_end) (state);
453 	return ok;
454 }
455 
456 DOUBLE
SUFFIX(go_matrix_determinant)457 SUFFIX(go_matrix_determinant) (CONSTMATRIX A, int n)
458 {
459 	DOUBLE res;
460 	QUAD qres;
461 	void *state;
462 	SUFFIX(GOQuadMatrix) *qA;
463 
464 	if (n < 1)
465 		return 0;
466 
467 	state = SUFFIX(go_quad_start) ();
468 
469 	qA = SUFFIX(quad_matrix_from_matrix) (A, n, n, NULL);
470 	SUFFIX(go_quad_matrix_determinant) (qA, &qres);
471 	SUFFIX(go_quad_matrix_free) (qA);
472 	res = SUFFIX(go_quad_value) (&qres);
473 
474 	SUFFIX(go_quad_end) (state);
475 
476 	return res;
477 }
478 
479 static DOUBLE
SUFFIX(calc_scale)480 SUFFIX(calc_scale) (const DOUBLE *xs, int m)
481 {
482 	DOUBLE M;
483 	int e;
484 
485 	if (SUFFIX(go_range_maxabs) (xs, m, &M) || M == 0)
486 		return 1;
487 
488 	/*
489 	 * Pick a scale such that the largest value will be in the
490 	 * range [1;2[.  The scale will be a power of 2 so scaling
491 	 * doesn't introduce rounding errors of it own.
492 	 */
493 	(void) SUFFIX(frexp) (M, &e);
494 	return SUFFIX(ldexp) (1.0, e - 1);
495 }
496 
497 /* ------------------------------------------------------------------------- */
498 
499 /* Note, that this function takes a transposed matrix xssT.  */
500 static GORegressionResult
SUFFIX(general_linear_regression)501 SUFFIX(general_linear_regression) (CONSTMATRIX xssT, int n,
502 				   const DOUBLE *ys, int m,
503 				   DOUBLE *result,
504 				   SUFFIX(go_regression_stat_t) *stat_,
505 				   gboolean affine,
506 				   DOUBLE threshold)
507 {
508 	GORegressionResult regerr;
509 	SUFFIX(GOQuadMatrix) *xss;
510 	SUFFIX(GOQuadQR) *qr;
511 	int i, j, k;
512 	gboolean has_result;
513 	void *state;
514 	gboolean has_stat;
515 	int err;
516 	QUAD N2;
517 	DOUBLE *xscale;
518 	gboolean debug_scale = FALSE;
519 
520 	ZERO_VECTOR (result, n);
521 
522 	has_stat = (stat_ != NULL);
523 	if (!has_stat)
524 		stat_ = SUFFIX(go_regression_stat_new)();
525 
526 	if (n > m) {
527 		regerr = GO_REG_not_enough_data;
528 		goto out;
529 	}
530 
531 	state = SUFFIX(go_quad_start) ();
532 
533 	xscale = g_new (DOUBLE, n);
534 	xss = SUFFIX(go_quad_matrix_new) (m, n);
535 	for (j = 0; j < n; j++) {
536 		xscale[j] = SUFFIX(calc_scale) (xssT[j], m);
537 		if (debug_scale)
538 			g_printerr ("Scale %d: %" FORMAT_g "\n", j, xscale[j]);
539 		for (i = 0; i < m; i++)
540 			SUFFIX(go_quad_init) (&xss->data[i][j], xssT[j][i] / xscale[j]);
541 	}
542 	qr = SUFFIX(go_quad_qr_new) (xss);
543 	SUFFIX(go_quad_matrix_free) (xss);
544 
545 	has_result = (qr != NULL);
546 
547 	if (has_result) {
548 		const SUFFIX(GOQuadMatrix) *R = SUFFIX(go_quad_qr_r) (qr);
549 		QUAD *qresult = g_new0 (QUAD, n);
550 		QUAD *QTy = g_new (QUAD, m);
551 		QUAD *inv = g_new (QUAD, n);
552 		DOUBLE emax;
553 		int df_resid = m - n;
554 		int df_reg = n - (affine ? 1 : 0);
555 
556 		regerr = GO_REG_ok;
557 
558 		SUFFIX(go_quad_matrix_eigen_range) (R, NULL, &emax);
559 		for (i = 0; i < n; i++) {
560 			DOUBLE ei = SUFFIX(go_quad_value)(&R->data[i][i]);
561 			gboolean degenerate =
562 				!(SUFFIX(fabs)(ei) >= emax * threshold);
563 			if (degenerate) {
564 				SUFFIX(go_quad_qr_mark_degenerate) (qr, i);
565 				df_resid++;
566 				df_reg--;
567 			}
568 		}
569 
570 		/* Compute Q^T ys.  */
571 		for (i = 0; i < m; i++)
572 			SUFFIX(go_quad_init)(&QTy[i], ys[i]);
573 		SUFFIX(go_quad_qr_multiply_qt)(qr, QTy);
574 
575 		if (0)
576 			SUFFIX(go_quad_matrix_dump) (R, "%10.5" FORMAT_g);
577 
578 		/* Solve R res = Q^T ys */
579 		if (SUFFIX(go_quad_matrix_back_solve) (R, qresult, QTy, TRUE))
580 			has_result = FALSE;
581 
582 		for (i = 0; i < n; i++)
583 			result[i] = SUFFIX(go_quad_value) (&qresult[i]) / xscale[i];
584 
585 		/* This should not fail since n >= 1.  */
586 		err = SUFFIX(go_range_average) (ys, m, &stat_->ybar);
587 		g_assert (err == 0);
588 
589 		/* FIXME: we ought to have a devsq variant that does not
590 		   recompute the mean.  */
591 		if (affine)
592 			err = SUFFIX(go_range_devsq) (ys, m, &stat_->ss_total);
593 		else
594 			err = SUFFIX(go_range_sumsq) (ys, m, &stat_->ss_total);
595 		g_assert (err == 0);
596 
597 		stat_->xbar = g_new (DOUBLE, n);
598 		for (i = 0; i < n; i++) {
599 			int err = SUFFIX(go_range_average) (xssT[i], m, &stat_->xbar[i]);
600 			g_assert (err == 0);
601 		}
602 
603 		;
604 		stat_->ss_resid =
605 			SUFFIX(calc_residual) (xssT, ys, m, n, result);
606 
607 		stat_->sqr_r = (stat_->ss_total == 0)
608 			? 1
609 			: 1 - stat_->ss_resid / stat_->ss_total;
610 		if (stat_->sqr_r < 0) {
611 			/*
612 			 * This is an indication that something has gone wrong
613 			 * numerically.
614 			 */
615 			regerr = GO_REG_near_singular_bad;
616 		}
617 
618 		/* FIXME: we want to guard against division by zero.  */
619 		stat_->adj_sqr_r = 1 - stat_->ss_resid * (m - 1) /
620 			(df_resid * stat_->ss_total);
621 		if (df_resid == 0)
622 			N2 = SUFFIX(go_quad_zero);
623 		else {
624 			QUAD d;
625 			SUFFIX(go_quad_init) (&d, df_resid);
626 			SUFFIX(go_quad_init) (&N2, stat_->ss_resid);
627 			SUFFIX(go_quad_div) (&N2, &N2, &d);
628 		}
629 		stat_->var = SUFFIX(go_quad_value) (&N2);
630 
631 		stat_->se = g_new0 (DOUBLE, n);
632 		for (k = 0; k < n; k++) {
633 			QUAD p, N;
634 
635 			/* inv = e_k */
636 			for (i = 0; i < n; i++)
637 				SUFFIX(go_quad_init) (&inv[i], i == k ? 1 : 0);
638 
639 			/* Solve R^T inv = e_k */
640 			if (SUFFIX(go_quad_matrix_fwd_solve) (R, inv, inv, TRUE)) {
641 				regerr = GO_REG_singular;
642 				break;
643 			}
644 
645 			SUFFIX(go_quad_dot_product) (&N, inv, inv, n);
646 			SUFFIX(go_quad_mul) (&p, &N2, &N);
647 			SUFFIX(go_quad_sqrt) (&p, &p);
648 			stat_->se[k] = SUFFIX(go_quad_value) (&p) / xscale[k];
649 		}
650 
651 		stat_->t = g_new (DOUBLE, n);
652 
653 		for (i = 0; i < n; i++)
654 			stat_->t[i] = (stat_->se[i] == 0)
655 				? SUFFIX(go_pinf)
656 				: result[i] / stat_->se[i];
657 
658 		stat_->df_resid = df_resid;
659 		stat_->df_reg = df_reg;
660 		stat_->df_total = stat_->df_resid + df_reg;
661 
662 		stat_->F = (stat_->sqr_r == 1)
663 			? SUFFIX(go_pinf)
664 			: ((stat_->sqr_r / df_reg) /
665 			   (1 - stat_->sqr_r) * stat_->df_resid);
666 
667 		stat_->ss_reg =  stat_->ss_total - stat_->ss_resid;
668 		stat_->se_y = SUFFIX(sqrt) (stat_->ss_total / m);
669 		stat_->ms_reg = (df_reg == 0)
670 			? 0
671 			: stat_->ss_reg / stat_->df_reg;
672 		stat_->ms_resid = (df_resid == 0)
673 			? 0
674 			: stat_->ss_resid / df_resid;
675 
676 		g_free (QTy);
677 		g_free (qresult);
678 		g_free (inv);
679 	} else
680 		regerr = GO_REG_invalid_data;
681 
682 	SUFFIX(go_quad_end) (state);
683 
684 	if (qr) SUFFIX(go_quad_qr_free) (qr);
685 	g_free (xscale);
686 out:
687 	if (!has_stat)
688 		SUFFIX(go_regression_stat_destroy) (stat_);
689 
690 	return regerr;
691 }
692 
693 /* ------------------------------------------------------------------------- */
694 
695 typedef struct {
696 	DOUBLE min_x;
697 	DOUBLE max_x;
698 	DOUBLE min_y;
699 	DOUBLE max_y;
700 	DOUBLE mean_y;
701 } SUFFIX(point_cloud_measure_type);
702 
703 /* Takes the current 'sign' (res[0]) and 'c' (res[3]) from the calling
704  * function, transforms xs to ln(sign*(x-c)), performs a simple
705  * linear regression to find the best fitting 'a' (res[1]) and 'b'
706  * (res[2]) for ys and transformed xs, and computes the sum of squared
707  * residuals.
708  * Needs 'sign' (i.e. +1 or -1) and 'c' so adjusted that (sign*(x-c)) is
709  * positive for all xs. n must be > 0. These conditions are trusted to be
710  * checked by the calling functions.
711  * Is called often, so do not make it too slow.
712  */
713 
714 static int
SUFFIX(transform_x_and_linear_regression_log_fitting)715 SUFFIX(transform_x_and_linear_regression_log_fitting)
716 	(DOUBLE *xs,
717 	 DOUBLE *transf_xs,
718 	 const DOUBLE *ys, int n,
719 	 DOUBLE *res,
720 	 SUFFIX(point_cloud_measure_type)
721 	 *point_cloud)
722 {
723         int i;
724 	int result = GO_REG_ok;
725 	DOUBLE mean_transf_x, diff_x, resid_y;
726 	DOUBLE sum1 = 0;
727 	DOUBLE sum2 = 0;
728 
729 	/* log (always > 0) */
730 	for (i=0; i<n; i++)
731 	        transf_xs[i] = SUFFIX(log) (res[0] * (xs[i] - res[3]));
732 	SUFFIX(go_range_average) (transf_xs, n, &mean_transf_x);
733 	for (i=0; i<n; i++) {
734 	        diff_x = transf_xs[i] - mean_transf_x;
735 		sum1 += diff_x * (ys[i] - point_cloud->mean_y);
736 		sum2 += diff_x * diff_x;
737 	}
738 	res[2] = sum1 / sum2;
739 	res[1] = point_cloud->mean_y - (res[2] * mean_transf_x);
740 	res[4] = 0;
741 	for (i=0; i<n; i++) {
742 	        resid_y = res[1] + (res[2] * transf_xs[i]) - ys[i];
743 		res[4] += resid_y * resid_y;
744 	}
745 	return result; /* not used for error checking for the sake of speed */
746 }
747 
748 static int
SUFFIX(log_fitting)749 SUFFIX(log_fitting) (DOUBLE *xs, const DOUBLE *ys, int n,
750 		     DOUBLE *res, SUFFIX(point_cloud_measure_type) *point_cloud)
751 {
752         int result = GO_REG_ok;
753 	gboolean sign_plus_ok = 1, sign_minus_ok = 1;
754 	DOUBLE x_range, c_step, c_accuracy_int, c_offset, c_accuracy;
755 	DOUBLE c_range, c_start, c_end, c_dist;
756 	DOUBLE *temp_res;
757         DOUBLE *transf_xs;
758 
759 	temp_res = g_new (DOUBLE, 5);
760 	x_range = (point_cloud->max_x) - (point_cloud->min_x);
761 	/* Not needed here, but allocate it once for all subfunction calls */
762 	transf_xs = g_new (DOUBLE, n);
763 	/* Choose final accuracy of c with respect to range of xs.
764 	 * Make accuracy be a whole power of 10. */
765 	c_accuracy = SUFFIX(log10) (x_range);
766 	if (c_accuracy < 0)
767 	        if (SUFFIX(modf) (c_accuracy, &c_accuracy_int) != 0)
768 		        c_accuracy--;
769 	SUFFIX(modf) (c_accuracy, &c_accuracy_int);
770 	c_accuracy = c_accuracy_int;
771 	c_accuracy = SUFFIX(pow) (10, c_accuracy);
772 	c_accuracy *= GO_LOGFIT_C_ACCURACY;
773 
774 	/* Determine sign. Take a c which is ``much to small'' since the part
775 	 * of the curve cutting the point cloud is almost not bent.
776 	 * If making c still smaller does not make things still worse,
777 	 * assume that we have to change the direction of curve bending
778 	 * by changing sign.
779 	 */
780 	c_step = x_range * GO_LOGFIT_C_STEP_FACTOR;
781 	c_range = x_range * GO_LOGFIT_C_RANGE_FACTOR;
782 	res[0] = 1; /* sign */
783 	res[3] = point_cloud->min_x - c_range;
784 	temp_res[0] = 1;
785 	temp_res[3] = res[3] - c_step;
786 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
787 						       res, point_cloud);
788 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
789 						       temp_res, point_cloud);
790 	if (temp_res[4] <= res[4])
791 	        sign_plus_ok = 0;
792         /* check again with new sign */
793 	res[0] = -1; /* sign */
794 	res[3] = point_cloud->max_x + c_range;
795 	temp_res[0] = -1;
796 	temp_res[3] = res[3] + c_step;
797 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
798 						       res, point_cloud);
799 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
800 						       temp_res, point_cloud);
801 	if (temp_res[4] <= res[4])
802 	        sign_minus_ok = 0;
803 	/* If not exactly one of plus or minus works, give up.
804 	 * This happens in point clouds which are very weakly bent.
805 	 */
806 	if (sign_plus_ok && !sign_minus_ok)
807 	        res[0] = 1;
808 	else if (sign_minus_ok && !sign_plus_ok)
809 	        res[0] = -1;
810 	else {
811 	        result = GO_REG_invalid_data;
812 		goto out;
813 	}
814 
815 	/* Start of fitted c-range. Rounded to final accuracy of c. */
816 	c_offset = (res[0] == 1) ? point_cloud->min_x : point_cloud->max_x;
817 	c_offset = c_accuracy * ((res[0] == 1) ?
818 				 SUFFIX(floor) (c_offset / c_accuracy)
819 				 : SUFFIX(ceil) (c_offset /c_accuracy));
820 
821 	/* Now the adapting of c starts. Find a local minimum of sum
822 	 * of squared residuals. */
823 
824 	/* First, catch some unsuitably shaped point clouds. */
825 	res[3] = c_offset - res[0] * c_accuracy;
826 	temp_res[3] = c_offset - res[0] * 2 * c_accuracy;
827 	temp_res[0] = res[0];
828 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
829 						       res, point_cloud);
830 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
831 						       temp_res, point_cloud);
832 	if (temp_res[4] >= res[4]) {
833 	        result = GO_REG_invalid_data;
834 		goto out;
835 	}
836 	/* After the above check, any minimum reached will be NOT  at
837 	 * the start of c-range (c_offset - sign * c_accuracy) */
838 	c_start = c_offset;
839 	c_end = c_start - res[0] * c_range;
840 	c_dist = res[0] * (c_start - c_end) / 2;
841 	res[3] = c_end + res[0] * c_dist;
842 	do {
843 	    c_dist /= 2;
844 		SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs,
845 							       ys, n, res,
846 							       point_cloud);
847 		temp_res[3] = res[3] + res[0] * c_dist;
848 		SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs,
849 							       ys, n, temp_res,
850 							       point_cloud);
851 		if (temp_res[4] <= res[4])
852 			COPY_VECTOR (res, temp_res, 5);
853 		else {
854 		        temp_res[3] = res[3] - res[0] * c_dist;
855 			SUFFIX(transform_x_and_linear_regression_log_fitting) (xs,
856 							          transf_xs,
857 								  ys, n,
858 								  temp_res,
859 							          point_cloud);
860 			if (temp_res[4] <= res[4])
861 			        COPY_VECTOR (res, temp_res, 5);
862 		}
863 	} while (c_dist > c_accuracy);
864 
865 	res[3] = c_accuracy * SUFFIX(go_fake_round) (res[3] / c_accuracy);
866 	SUFFIX(transform_x_and_linear_regression_log_fitting) (xs, transf_xs, ys, n,
867 						       res, point_cloud);
868 
869 	if ((res[0] * (res[3] - c_end)) < (1.1 * c_accuracy)) {
870 	        /* Allowing for some inaccuracy, we are at the end of the
871 		 * range, so this is probably no local minimum.
872 		 * The start of the range has been checked above. */
873 	        result = GO_REG_invalid_data;
874 		goto out;
875 	}
876 
877  out:
878 	g_free (transf_xs);
879 	g_free (temp_res);
880 	return result;
881 }
882 
883 /**
884  * go_linear_regression:
885  * @xss: x-vectors (i.e. independent data)
886  * @dim: number of x-vectors.
887  * @ys: y-vector.  (Dependent data.)
888  * @n: number of data points.
889  * @affine: if true, a non-zero constant is allowed.
890  * @res: (out): place for constant[0] and slope1[1], slope2[2],... There will be dim+1 results.
891  * @stat_: (out): non-NULL storage for additional results.
892  *
893  * Performs multi-dimensional linear regressions on the input points.
894  * Fits to "y = b + a1 * x1 + ... ad * xd".
895  *
896  * Returns: #GORegressionResult as above.
897  **/
898 GORegressionResult
SUFFIX(go_linear_regression)899 SUFFIX(go_linear_regression) (MATRIX xss, int dim,
900 			      const DOUBLE *ys, int n,
901 			      gboolean affine,
902 			      DOUBLE *res,
903 			      SUFFIX(go_regression_stat_t) *stat_)
904 {
905 	GORegressionResult result;
906 	DOUBLE threshold = DEFAULT_THRESHOLD;
907 
908 	g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
909 	g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
910 
911 	if (affine) {
912 		int i;
913 		DOUBLE **xss2 = g_new (DOUBLE *, dim + 1);
914 
915 		xss2[0] = g_new (DOUBLE, n);
916 		for (i = 0; i < n; i++)
917 			xss2[0][i] = 1;
918 		memcpy (xss2 + 1, xss, dim * sizeof (DOUBLE *));
919 
920 		result = SUFFIX(general_linear_regression)
921 			(xss2, dim + 1, ys, n,
922 			 res, stat_, affine, threshold);
923 		g_free (xss2[0]);
924 		g_free (xss2);
925 	} else {
926 		res[0] = 0;
927 		result = SUFFIX(general_linear_regression)
928 			(xss, dim, ys, n,
929 			 res + 1, stat_, affine, threshold);
930 	}
931 	return result;
932 }
933 
934 /**
935  * go_exponential_regression:
936  * @xss: x-vectors (i.e. independent data)
937  * @dim: number of x-vectors
938  * @ys: y-vector (dependent data)
939  * @n: number of data points
940  * @affine: if %TRUE, a non-one multiplier is allowed
941  * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
942  * @stat_: non-NULL storage for additional results.
943  *
944  * Performs one-dimensional linear regressions on the input points.
945  * Fits to "y = b * m1^x1 * ... * md^xd " or equivalently to
946  * "log y = log b + x1 * log m1 + ... + xd * log md".
947  *
948  * Returns: #GORegressionResult as above.
949  **/
950 GORegressionResult
SUFFIX(go_exponential_regression)951 SUFFIX(go_exponential_regression) (MATRIX xss, int dim,
952 				   const DOUBLE *ys, int n,
953 				   gboolean affine,
954 				   DOUBLE *res,
955 				   SUFFIX(go_regression_stat_t) *stat_)
956 {
957 	GORegressionResult result = SUFFIX(go_exponential_regression_as_log) (xss, dim, ys, n, affine, res, stat_);
958 	int i;
959 
960 	if (result == GO_REG_ok || result == GO_REG_near_singular_good)
961 		for (i = 0; i < dim + 1; i++)
962 			res[i] = SUFFIX(exp) (res[i]);
963 
964 	return result;
965 }
966 
967 /**
968  * go_exponential_regression_as_log:
969  * @xss: x-vectors (i.e. independent data)
970  * @dim: number of x-vectors
971  * @ys: y-vector (dependent data)
972  * @n: number of data points
973  * @affine: if %TRUE, a non-one multiplier is allowed
974  * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
975  * @stat_: non-NULL storage for additional results.
976  *
977  * Performs one-dimensional linear regressions on the input points as
978  * go_exponential_regression, but returns the logarithm of the coefficients instead
979  * or the coefficients themselves.
980  * Fits to "y = b * exp (m1*x1) * ... * exp (md*xd) " or equivalently to
981  * "ln y = ln b + x1 * m1 + ... + xd * md".
982  *
983  * Returns: #GORegressionResult as above.
984  **/
985 GORegressionResult
SUFFIX(go_exponential_regression_as_log)986 SUFFIX(go_exponential_regression_as_log) (MATRIX xss, int dim,
987 				   const DOUBLE *ys, int n,
988 				   gboolean affine,
989 				   DOUBLE *res,
990 				   SUFFIX(go_regression_stat_t) *stat_)
991 {
992 	DOUBLE *log_ys;
993 	GORegressionResult result;
994 	int i;
995 	DOUBLE threshold = DEFAULT_THRESHOLD;
996 
997 	g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
998 	g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
999 
1000 	log_ys = g_new (DOUBLE, n);
1001 	for (i = 0; i < n; i++)
1002 		if (ys[i] > 0)
1003 			log_ys[i] = SUFFIX(log) (ys[i]);
1004 		else {
1005 			result = GO_REG_invalid_data;
1006 			goto out;
1007 		}
1008 
1009 	if (affine) {
1010 		int i;
1011 		DOUBLE **xss2 = g_new (DOUBLE *, dim + 1);
1012 
1013 		xss2[0] = g_new (DOUBLE, n);
1014 		for (i = 0; i < n; i++)
1015 			xss2[0][i] = 1;
1016 		memcpy (xss2 + 1, xss, dim * sizeof (DOUBLE *));
1017 
1018 		result = SUFFIX(general_linear_regression)
1019 			(xss2, dim + 1, log_ys,
1020 			 n, res, stat_, affine, threshold);
1021 		g_free (xss2[0]);
1022 		g_free (xss2);
1023 	} else {
1024 		res[0] = 0;
1025 		result = SUFFIX(general_linear_regression)
1026 			(xss, dim, log_ys, n,
1027 			 res + 1, stat_, affine, threshold);
1028 	}
1029 
1030  out:
1031 	g_free (log_ys);
1032 	return result;
1033 }
1034 
1035 /**
1036  * go_power_regression:
1037  * @xss: x-vectors (i.e. independent data)
1038  * @dim: number of x-vectors
1039  * @ys: y-vector (dependent data)
1040  * @n: number of data points
1041  * @affine: if %TRUE, a non-one multiplier is allowed
1042  * @res: output place for constant[0] and root1[1], root2[2],... There will be dim+1 results.
1043  * @stat_: non-NULL storage for additional results.
1044  *
1045  * Performs one-dimensional linear regressions on the input points.
1046  * Fits to "y = b * x1^m1 * ... * xd^md " or equivalently to
1047  * "log y = log b + m1 * log x1 + ... + md * log xd".
1048  *
1049  * Returns: #GORegressionResult as above.
1050  **/
1051 GORegressionResult
SUFFIX(go_power_regression)1052 SUFFIX(go_power_regression) (MATRIX xss, int dim,
1053 			     const DOUBLE *ys, int n,
1054 			     gboolean affine,
1055 			     DOUBLE *res,
1056 			     SUFFIX(go_regression_stat_t) *stat_)
1057 {
1058 	DOUBLE *log_ys, **log_xss = NULL;
1059 	GORegressionResult result;
1060 	int i, j;
1061 	DOUBLE threshold = DEFAULT_THRESHOLD;
1062 
1063 	g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
1064 	g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
1065 
1066 	log_ys = g_new (DOUBLE, n);
1067 	for (i = 0; i < n; i++)
1068 		if (ys[i] > 0)
1069 			log_ys[i] = SUFFIX(log) (ys[i]);
1070 		else {
1071 			result = GO_REG_invalid_data;
1072 			goto out;
1073 		}
1074 
1075 	ALLOC_MATRIX (log_xss, dim, n);
1076 	for (i = 0; i < dim; i++)
1077 	        for (j = 0; j < n; j++)
1078 		        if (xss[i][j] > 0)
1079 		                log_xss[i][j] = SUFFIX(log) (xss[i][j]);
1080 			else {
1081 			        result = GO_REG_invalid_data;
1082 				goto out;
1083 			}
1084 
1085 	if (affine) {
1086 		int i;
1087 		DOUBLE **log_xss2 = g_new (DOUBLE *, dim + 1);
1088 
1089 		log_xss2[0] = g_new (DOUBLE, n);
1090 		for (i = 0; i < n; i++)
1091 			log_xss2[0][i] = 1;
1092 		memcpy (log_xss2 + 1, log_xss, dim * sizeof (DOUBLE *));
1093 
1094 		result = SUFFIX(general_linear_regression)
1095 			(log_xss2, dim + 1, log_ys,
1096 			 n, res, stat_, affine, threshold);
1097 		g_free (log_xss2[0]);
1098 		g_free (log_xss2);
1099 	} else {
1100 		res[0] = 0;
1101 		result = SUFFIX(general_linear_regression)
1102 			(log_xss, dim, log_ys, n,
1103 			 res + 1, stat_, affine, threshold);
1104 	}
1105 
1106  out:
1107 	if (log_xss != NULL)
1108 		FREE_MATRIX (log_xss, dim, n);
1109 	g_free (log_ys);
1110 	return result;
1111 }
1112 
1113 
1114 /**
1115  * go_logarithmic_regression:
1116  * @xss: x-vectors (i.e. independent data)
1117  * @dim: number of x-vectors
1118  * @ys: y-vector (dependent data)
1119  * @n: number of data points
1120  * @affine: if %TRUE, a non-zero constant is allowed
1121  * @res: output place for constant[0] and factor1[1], factor2[2],... There will be dim+1 results.
1122  * @stat_: non-NULL storage for additional results.
1123  *
1124  * This is almost a copy of linear_regression and produces multi-dimensional
1125  * linear regressions on the input points after transforming xss to ln(xss).
1126  * Fits to "y = b + a1 * z1 + ... ad * zd" with "zi = ln (xi)".
1127  * Problems with arrays in the calling function: see comment to
1128  * gnumeric_linest, which is also valid for gnumeric_logreg.
1129  *
1130  * (Errors: less than two points, all points on a vertical line, non-positive x data.)
1131  *
1132  * Returns: #GORegressionResult as above.
1133  **/
1134 GORegressionResult
SUFFIX(go_logarithmic_regression)1135 SUFFIX(go_logarithmic_regression) (MATRIX xss, int dim,
1136 				   const DOUBLE *ys, int n,
1137 				   gboolean affine,
1138 				   DOUBLE *res,
1139 				   SUFFIX(go_regression_stat_t) *stat_)
1140 {
1141         MATRIX log_xss;
1142 	GORegressionResult result;
1143 	int i, j;
1144 	DOUBLE threshold = DEFAULT_THRESHOLD;
1145 
1146 	g_return_val_if_fail (dim >= 1, GO_REG_invalid_dimensions);
1147 	g_return_val_if_fail (n >= 1, GO_REG_invalid_dimensions);
1148 
1149 	ALLOC_MATRIX (log_xss, dim, n);
1150 	for (i = 0; i < dim; i++)
1151 	        for (j = 0; j < n; j++)
1152 		        if (xss[i][j] > 0)
1153 		                log_xss[i][j] = SUFFIX(log) (xss[i][j]);
1154 			else {
1155 			        result = GO_REG_invalid_data;
1156 				goto out;
1157 			}
1158 
1159 
1160 	if (affine) {
1161 		int i;
1162 		DOUBLE **log_xss2 = g_new (DOUBLE *, dim + 1);
1163 
1164 		log_xss2[0] = g_new (DOUBLE, n);
1165 		for (i = 0; i < n; i++)
1166 			log_xss2[0][i] = 1;
1167 		memcpy (log_xss2 + 1, log_xss, dim * sizeof (DOUBLE *));
1168 
1169 		result = SUFFIX(general_linear_regression)
1170 			(log_xss2, dim + 1, ys, n,
1171 			 res, stat_, affine, threshold);
1172 		g_free (log_xss2[0]);
1173 		g_free (log_xss2);
1174 	} else {
1175 		res[0] = 0;
1176 		result = SUFFIX(general_linear_regression)
1177 			(log_xss, dim, ys, n,
1178 			 res + 1, stat_, affine, threshold);
1179 	}
1180 
1181  out:
1182 	FREE_MATRIX (log_xss, dim, n);
1183 	return result;
1184 }
1185 
1186 /**
1187  * go_logarithmic_fit:
1188  * @xs: x-vector (i.e. independent data)
1189  * @ys: y-vector (dependent data)
1190  * @n: number of data points
1191  * @res: output place for sign[0], a[1], b[2], c[3], and sum of squared residuals[4].
1192  *
1193  * Performs a two-dimensional non-linear fitting on the input points.
1194  * Fits to "y = a + b * ln (sign * (x - c))", with sign in {-1, +1}.
1195  * The graph is a logarithmic curve moved horizontally by c and possibly
1196  * mirrored across the y-axis (if sign = -1).
1197  *
1198  * Fits c (and sign) by iterative trials, but seems to be fast enough even
1199  * for automatic recomputation.
1200  *
1201  * Adapts c until a local minimum of squared residuals is reached. For each
1202  * new c tried out the corresponding a and b are calculated by linear
1203  * regression. If no local minimum is found, an error is returned. If there
1204  * is more than one local minimum, the one found is not necessarily the
1205  * smallest (i.e., there might be cases in which the returned fit is not the
1206  * best possible). If the shape of the point cloud is to different from
1207  * ``logarithmic'', either sign can not be determined (error returned) or no
1208  * local minimum will be found.
1209  *
1210  * (Requires: at least 3 different x values, at least 3 different y values.)
1211  *
1212  * Returns: #GORegressionResult as above.
1213  */
1214 
1215 GORegressionResult
SUFFIX(go_logarithmic_fit)1216 SUFFIX(go_logarithmic_fit) (DOUBLE *xs, const DOUBLE *ys, int n, DOUBLE *res)
1217 {
1218 	SUFFIX(point_cloud_measure_type) point_cloud_measures;
1219 	int i, result;
1220 	gboolean more_2_y = 0, more_2_x = 0;
1221 
1222 	/* Store useful measures for using them here and in subfunctions.
1223 	 * The checking of n is paranoid -- the calling function should
1224 	 * have cared for that. */
1225 	g_return_val_if_fail (n > 2, GO_REG_invalid_dimensions);
1226 	result = SUFFIX(go_range_min) (xs, n, &(point_cloud_measures.min_x));
1227 	result = SUFFIX(go_range_max) (xs, n, &(point_cloud_measures.max_x));
1228 	result = SUFFIX(go_range_min) (ys, n, &(point_cloud_measures.min_y));
1229 	result = SUFFIX(go_range_max) (ys, n, &(point_cloud_measures.max_y));
1230 	result = SUFFIX(go_range_average) (ys, n, &(point_cloud_measures.mean_y));
1231 	/* Checking of error conditions. */
1232 	/* less than 2 different ys or less than 2 different xs */
1233 	g_return_val_if_fail (((point_cloud_measures.min_y !=
1234 				point_cloud_measures.max_y) &&
1235 			       (point_cloud_measures.min_x !=
1236 				point_cloud_measures.max_x)),
1237 			      GO_REG_invalid_data);
1238 	/* less than 3 different ys */
1239 	for (i=0; i<n; i++) {
1240 	        if ((ys[i] != point_cloud_measures.min_y) &&
1241 		    (ys[i] != point_cloud_measures.max_y)) {
1242 		        more_2_y = 1;
1243 			break;
1244 		}
1245 	}
1246 	g_return_val_if_fail (more_2_y, GO_REG_invalid_data);
1247 	/* less than 3 different xs */
1248 	for (i=0; i<n; i++) {
1249 	        if ((xs[i] != point_cloud_measures.min_x) &&
1250 		    (xs[i] != point_cloud_measures.max_x)) {
1251 		        more_2_x = 1;
1252 			break;
1253 		}
1254 	}
1255 	g_return_val_if_fail (more_2_x, GO_REG_invalid_data);
1256 
1257 	/* no errors */
1258 	result = SUFFIX(log_fitting) (xs, ys, n, res, &point_cloud_measures);
1259 	return result;
1260 }
1261 
1262 /* ------------------------------------------------------------------------- */
1263 
SUFFIX(go_regression_stat_t)1264 SUFFIX(go_regression_stat_t) *
1265 SUFFIX(go_regression_stat_new) (void)
1266 {
1267 	SUFFIX(go_regression_stat_t) * stat_ =
1268 		g_new0 (SUFFIX(go_regression_stat_t), 1);
1269 
1270 	stat_->se = NULL;
1271 	stat_->t = NULL;
1272 	stat_->xbar = NULL;
1273 	stat_->ref_count = 1;
1274 
1275 	return stat_;
1276 }
1277 
1278 /* ------------------------------------------------------------------------- */
1279 
1280 void
SUFFIX(go_regression_stat_destroy)1281 SUFFIX(go_regression_stat_destroy) (SUFFIX(go_regression_stat_t) *stat_)
1282 {
1283 	if (!stat_)
1284 		return;
1285 
1286 	g_return_if_fail (stat_->ref_count > 0);
1287 
1288 	stat_->ref_count--;
1289 	if (stat_->ref_count > 0)
1290 		return;
1291 
1292 	g_free (stat_->se);
1293 	g_free (stat_->t);
1294 	g_free (stat_->xbar);
1295 	g_free (stat_);
1296 }
1297 
1298 /* ------------------------------------------------------------------------- */
1299 
1300 #ifdef DEFINE_COMMON
1301 #define DELTA      0.01
1302 /* FIXME:  I pulled this number out of my hat.
1303  * I need some testing to pick a sensible value.
1304  */
1305 #define MAX_STEPS   200
1306 #endif
1307 
1308 /*
1309  * SYNOPSIS:
1310  *      result = derivative( f, &df, x, par, i)
1311  *
1312  * Approximates the partial derivative of a given function, at (x;params)
1313  * with respect to the parameter indicated by ith parameter.  The resulst
1314  * is stored in 'df'.
1315  *
1316  * See the header file for more information.
1317  */
1318 static GORegressionResult
SUFFIX(derivative)1319 SUFFIX(derivative) (SUFFIX(GORegressionFunction) f,
1320 	    DOUBLE *df,
1321 	    DOUBLE *x, /* Only one point, not the whole data set. */
1322 	    DOUBLE *par,
1323 	    int index)
1324 {
1325 	DOUBLE y1, y2;
1326 	GORegressionResult result;
1327 	DOUBLE par_save = par[index];
1328 
1329 	par[index] = par_save - DELTA;
1330 	result = (*f) (x, par, &y1);
1331 	if (result != GO_REG_ok) {
1332 		par[index] = par_save;
1333 		return result;
1334 	}
1335 
1336 	par[index] = par_save + DELTA;
1337 	result = (*f) (x, par, &y2);
1338 	if (result != GO_REG_ok) {
1339 		par[index] = par_save;
1340 		return result;
1341 	}
1342 
1343 #ifdef DEBUG
1344 	g_printerr ("y1 = %lf\n", y1);
1345 	g_printerr ("y2 = %lf\n", y2);
1346 	g_printerr ("DELTA = %lf\n",DELTA);
1347 #endif
1348 
1349 	*df = (y2 - y1) / (2 * DELTA);
1350 	par[index] = par_save;
1351 	return GO_REG_ok;
1352 }
1353 
1354 /*
1355  * SYNOPSIS:
1356  *   result = chi_squared (f, xvals, par, yvals, sigmas, x_dim, &chisq)
1357  *
1358  *                            /  y  - f(x ; par) \ 2
1359  *            2              |    i      i        |
1360  *         Chi   ==   Sum (  | ------------------ |   )
1361  *                            \     sigma        /
1362  *                                       i
1363  *
1364  * sigmas  -> Measurement errors in the dataset (along the y-axis).
1365  *            NULL means "no errors available", so they are all set to 1.
1366  *
1367  * x_dim   -> Number of data points.
1368  *
1369  * This value is not very meaningful without the sigmas.  However, it is
1370  * still useful for the fit.
1371  */
1372 static GORegressionResult
SUFFIX(chi_squared)1373 SUFFIX(chi_squared) (SUFFIX(GORegressionFunction) f,
1374 		     CONSTMATRIX xvals,    /* The entire data set. */
1375 		     DOUBLE *par,
1376 		     DOUBLE *yvals,   /* Ditto. */
1377 		     DOUBLE *sigmas,  /* Ditto. */
1378 		     int x_dim,       /* Number of data points. */
1379 		     DOUBLE *chisq)   /* Chi Squared */
1380 {
1381 	int i;
1382 	GORegressionResult result;
1383 	DOUBLE tmp, y;
1384 	*chisq = 0;
1385 
1386 	for (i = 0; i < x_dim; i++) {
1387 		result = f (xvals[i], par, &y);
1388 		if (result != GO_REG_ok)
1389 			return result;
1390 
1391 		tmp = (yvals[i] - y ) / (sigmas ? sigmas[i] : 1);
1392 
1393 		*chisq += tmp * tmp;
1394 	}
1395 
1396 	return GO_REG_ok;
1397 }
1398 
1399 
1400 /*
1401  * SYNOPSIS:
1402  *      result = chi_derivative (f, &dchi, xvals, par, i, yvals,
1403  *                               sigmas, x_dim)
1404  *
1405  * This is a simple adaptation of the derivative() function specific to
1406  * the Chi Squared.
1407  */
1408 static GORegressionResult
SUFFIX(chi_derivative)1409 SUFFIX(chi_derivative) (SUFFIX(GORegressionFunction) f,
1410 			DOUBLE *dchi,
1411 			MATRIX xvals, /* The entire data set. */
1412 			DOUBLE *par,
1413 			int index,
1414 			DOUBLE *yvals,  /* Ditto. */
1415 			DOUBLE *sigmas, /* Ditto. */
1416 			int x_dim)
1417 {
1418 	DOUBLE y1, y2;
1419 	GORegressionResult result;
1420 	DOUBLE par_save = par[index];
1421 
1422 	par[index] = par_save - DELTA;
1423 	result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &y1);
1424 	if (result != GO_REG_ok) {
1425 		par[index] = par_save;
1426 		return result;
1427 	}
1428 
1429 	par[index] = par_save + DELTA;
1430 	result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &y2);
1431 	if (result != GO_REG_ok) {
1432                 par[index] = par_save;
1433 		return result;
1434 	}
1435 
1436 #ifdef DEBUG
1437 	g_printerr ("y1 = %lf\n", y1);
1438 	g_printerr ("y2 = %lf\n", y2);
1439 	g_printerr ("DELTA = %lf\n", DELTA);
1440 #endif
1441 
1442 	*dchi = (y2 - y1) / (2 * DELTA);
1443 	par[index] = par_save;
1444 	return GO_REG_ok;
1445 }
1446 
1447 /*
1448  * SYNOPSIS:
1449  *   result = coefficient_matrix (A, f, xvals, par, yvals, sigmas,
1450  *                                x_dim, p_dim, r)
1451  *
1452  * RETURNS:
1453  *   The coefficient matrix of the LM method.
1454  *
1455  * DETAIS:
1456  *   The coefficient matrix is defined by
1457  *
1458  *            N        1      df  df
1459  *     A   = Sum  ( -------   --  --  ( i == j ? 1 + r : 1 ) a)
1460  *      ij   k=1    sigma^2   dp  dp
1461  *                       k      i   j
1462  *
1463  * A      -> p_dim X p_dim coefficient matrix.  MUST ALREADY BE ALLOCATED.
1464  *
1465  * sigmas -> Measurement errors in the dataset (along the y-axis).
1466  *           NULL means "no errors available", so they are all set to 1.
1467  *
1468  * x_dim  -> Number of data points.
1469  *
1470  * p_dim  -> Number of parameters.
1471  *
1472  * r      -> Positive constant.  Its value is altered during the LM procedure.
1473  */
1474 
1475 static GORegressionResult
SUFFIX(coefficient_matrix)1476 SUFFIX(coefficient_matrix) (MATRIX A, /* Output matrix. */
1477 			    SUFFIX(GORegressionFunction) f,
1478 			    MATRIX xvals, /* The entire data set. */
1479 			    DOUBLE *par,
1480 			    DOUBLE *yvals,  /* Ditto. */
1481 			    DOUBLE *sigmas, /* Ditto. */
1482 			    int x_dim,      /* Number of data points. */
1483 			    int p_dim,      /* Number of parameters.  */
1484 			    DOUBLE r)
1485 {
1486 	int i, j, k;
1487 	GORegressionResult result;
1488 	DOUBLE df_i, df_j;
1489 	DOUBLE sum, sigma;
1490 
1491 	/* Notice that the matrix is symmetric.  */
1492 	for (i = 0; i < p_dim; i++) {
1493 		for (j = 0; j <= i; j++) {
1494 			sum = 0;
1495 			for (k = 0; k < x_dim; k++) {
1496 				result = SUFFIX(derivative) (f, &df_i, xvals[k],
1497 						     par, i);
1498 				if (result != GO_REG_ok)
1499 					return result;
1500 
1501 				result = SUFFIX(derivative) (f, &df_j, xvals[k],
1502 						     par, j);
1503 				if (result != GO_REG_ok)
1504 					return result;
1505 
1506 				sigma = (sigmas ? sigmas[k] : 1);
1507 
1508 				sum += (df_i * df_j) / (sigma * sigma) *
1509 					(i == j ? 1 + r : 1) ;
1510 			}
1511 			A[i][j] = A[j][i] = sum;
1512 		}
1513 	}
1514 
1515 	return GO_REG_ok;
1516 }
1517 
1518 
1519 /*
1520  * SYNOPSIS:
1521  *   result = parameter_errors (f, xvals, par, yvals, sigmas,
1522  *                              x_dim, p_dim, errors)
1523  *
1524  * Returns the errors associated with the parameters.
1525  * If an error is infinite, it is set to -1.
1526  *
1527  * sigmas  -> Measurement errors in the dataset (along the y-axis).
1528  *            NULL means "no errors available", so they are all set to 1.
1529  *
1530  * x_dim   -> Number of data points.
1531  *
1532  * p_dim   -> Number of parameters.
1533  *
1534  * errors  -> MUST ALREADY BE ALLOCATED.
1535  */
1536 
1537 /* FIXME:  I am not happy with the behaviour with infinite errors.  */
1538 static GORegressionResult
SUFFIX(parameter_errors)1539 SUFFIX(parameter_errors) (SUFFIX(GORegressionFunction) f,
1540 			  MATRIX xvals, /* The entire data set. */
1541 			  DOUBLE *par,
1542 			  DOUBLE *yvals,  /* Ditto. */
1543 			  DOUBLE *sigmas, /* Ditto. */
1544 			  int x_dim,      /* Number of data points. */
1545 			  int p_dim,      /* Number of parameters.  */
1546 			  DOUBLE *errors)
1547 {
1548 	GORegressionResult result;
1549 	MATRIX A;
1550 	int i;
1551 
1552 	ALLOC_MATRIX (A, p_dim, p_dim);
1553 
1554 	result = SUFFIX(coefficient_matrix) (A, f, xvals, par, yvals, sigmas,
1555 				     x_dim, p_dim, 0);
1556 	if (result == GO_REG_ok) {
1557 		for (i = 0; i < p_dim; i++)
1558 			/* FIXME: these were "[i][j]" which makes no sense.  */
1559 			errors[i] = (A[i][i] != 0
1560 				     ? 1 / SUFFIX(sqrt) (A[i][i])
1561 				     : -1);
1562 	}
1563 
1564 	FREE_MATRIX (A, p_dim, p_dim);
1565 	return result;
1566 }
1567 
1568 /**
1569  * go_non_linear_regression:
1570  * @f: (scope call): the model function
1571  * @xvals: independent values.
1572  * @par: model parameters.
1573  * @yvals: dependent values.
1574  * @sigmas: stahdard deviations for the dependent values.
1575  * @x_dim: Number of data points.
1576  * @p_dim: Number of parameters.
1577  * @chi: Chi Squared of the final result.  This value is not very
1578  * meaningful without the sigmas.
1579  * @errors: MUST ALREADY BE ALLOCATED.  These are the approximated standard
1580  * deviation for each parameter.
1581  *
1582  * SYNOPSIS:
1583  *   result = non_linear_regression (f, xvals, par, yvals, sigmas,
1584  *                                   x_dim, p_dim, &chi, errors)
1585  * Non linear regression.
1586  * Returns: the results of the non-linear regression from the given initial
1587  * values.
1588  * The resulting parameters are placed back into @par.
1589  **/
1590 /**
1591  * go_non_linear_regressionl:
1592  * @f: (scope call): the model function
1593  * @xvals: independent values.
1594  * @par: model parameters.
1595  * @yvals: dependent values.
1596  * @sigmas: stahdard deviations for the dependent values.
1597  * @x_dim: Number of data points.
1598  * @p_dim: Number of parameters.
1599  * @chi: Chi Squared of the final result.  This value is not very
1600  * meaningful without the sigmas.
1601  * @errors: MUST ALREADY BE ALLOCATED.  These are the approximated standard
1602  * deviation for each parameter.
1603  *
1604  * SYNOPSIS:
1605  *   result = non_linear_regression (f, xvals, par, yvals, sigmas,
1606  *                                   x_dim, p_dim, &chi, errors)
1607  * Non linear regression.
1608  * Returns: the results of the non-linear regression from the given initial
1609  * values.
1610  * The resulting parameters are placed back into @par.
1611  **/
1612 GORegressionResult
SUFFIX(go_non_linear_regression)1613 SUFFIX(go_non_linear_regression) (SUFFIX(GORegressionFunction) f,
1614 				  MATRIX xvals, /* The entire data set. */
1615 				  DOUBLE *par,
1616 				  DOUBLE *yvals,  /* Ditto. */
1617 				  DOUBLE *sigmas, /* Ditto. */
1618 				  int x_dim,      /* Number of data points. */
1619 				  int p_dim,      /* Number of parameters.  */
1620 				  DOUBLE *chi,
1621 				  DOUBLE *errors)
1622 {
1623 	DOUBLE r = 0.001; /* Pick a conservative initial value. */
1624 	DOUBLE *b, **A;
1625 	DOUBLE *dpar;
1626 	DOUBLE *tmp_par;
1627 	DOUBLE chi_pre, chi_pos, dchi;
1628 	GORegressionResult result;
1629 	int i, count;
1630 
1631 	result = SUFFIX(chi_squared) (f, xvals, par, yvals, sigmas, x_dim, &chi_pre);
1632 	if (result != GO_REG_ok)
1633 		return result;
1634 
1635 	ALLOC_MATRIX (A, p_dim, p_dim);
1636 	dpar    = g_new (DOUBLE, p_dim);
1637 	tmp_par = g_new (DOUBLE, p_dim);
1638 	b       = g_new (DOUBLE, p_dim);
1639 #ifdef DEBUG
1640 	g_printerr ("Chi Squared : %lf", chi_pre);
1641 #endif
1642 
1643 	for (count = 0; count < MAX_STEPS; count++) {
1644 		for (i = 0; i < p_dim; i++) {
1645 			/*
1646 			 *          d Chi
1647 			 *  b   ==  -----
1648 			 *   k       d p
1649 			 *              k
1650 			 */
1651 			result = SUFFIX(chi_derivative) (f, &dchi, xvals, par, i,
1652 						 yvals, sigmas, x_dim);
1653 			if (result != GO_REG_ok)
1654 				goto out;
1655 
1656 			b[i] = - dchi;
1657 		}
1658 
1659 		result = SUFFIX(coefficient_matrix) (A, f, xvals, par, yvals,
1660 					     sigmas, x_dim, p_dim, r);
1661 		if (result != GO_REG_ok)
1662 			goto out;
1663 
1664 		result = SUFFIX(go_linear_solve) (A, b, p_dim, dpar);
1665 		if (result != GO_REG_ok)
1666 			goto out;
1667 
1668 		for(i = 0; i < p_dim; i++)
1669 			tmp_par[i] = par[i] + dpar[i];
1670 
1671 		result = SUFFIX(chi_squared) (f, xvals, tmp_par, yvals, sigmas,
1672 				      x_dim, &chi_pos);
1673 		if (result != GO_REG_ok)
1674 			goto out;
1675 
1676 #ifdef DEBUG
1677 		g_printerr ("Chi Squared : %lf", chi_pre);
1678 		g_printerr ("Chi Squared : %lf", chi_pos);
1679 		g_printerr ("r  :  %lf", r);
1680 #endif
1681 
1682 		if (chi_pos <= chi_pre + DELTA / 2) {
1683 			/* There is improvement */
1684 			r /= 10;
1685 			for(i = 0; i < p_dim; i++)
1686 				par[i] = tmp_par[i];
1687 
1688 			if (SUFFIX(fabs) (chi_pos - chi_pre) < DELTA)
1689 				break;
1690 
1691 			chi_pre = chi_pos;
1692 		} else {
1693 			r *= 10;
1694 		}
1695 	}
1696 
1697 	result = SUFFIX(parameter_errors) (f, xvals, par, yvals, sigmas,
1698 					   x_dim, p_dim, errors);
1699 	if (result != GO_REG_ok)
1700 		goto out;
1701 
1702 	*chi = chi_pos;
1703 
1704  out:
1705 	FREE_MATRIX (A, p_dim, p_dim);
1706 	g_free (dpar);
1707 	g_free (tmp_par);
1708 	g_free (b);
1709 
1710 	return result;
1711 }
1712 
1713 /*
1714  * Compute the diagonal of A (AT A)^-1 AT
1715  *
1716  * A is full-rank m-times-n.
1717  * d is output for m-element diagonal.
1718  */
1719 
1720 GORegressionResult
SUFFIX(go_linear_regression_leverage)1721 SUFFIX(go_linear_regression_leverage) (MATRIX A, DOUBLE *d, int m, int n)
1722 {
1723 	SUFFIX(GOQuadMatrix) *qA;
1724 	SUFFIX(GOQuadQR) *qr;
1725 	void *state = SUFFIX(go_quad_start) ();
1726 	GORegressionResult regres;
1727 	DOUBLE threshold = DEFAULT_THRESHOLD;
1728 	DOUBLE *xscale, *Aj;
1729 	int i, j;
1730 
1731 	/*
1732 	 * Leverages are independing of column scaling.
1733 	 */
1734 	xscale = g_new (DOUBLE, n);
1735 	Aj = g_new (DOUBLE, m);
1736 	for (j = 0; j < n; j++) {
1737 		for (i = 0; i < m; i++)
1738 			Aj[i] = A[i][j];
1739 		xscale[j] = SUFFIX(calc_scale) (Aj, m);
1740 	}
1741 	g_free (Aj);
1742 
1743 	qA = SUFFIX(quad_matrix_from_matrix) (A, m, n, xscale);
1744 	qr = SUFFIX(go_quad_qr_new) (qA);
1745 	if (qr) {
1746 		int k;
1747 		QUAD *b = g_new (QUAD, n);
1748 		const SUFFIX(GOQuadMatrix) *R = SUFFIX(go_quad_qr_r) (qr);
1749 		DOUBLE emin, emax;
1750 
1751 		SUFFIX(go_quad_matrix_eigen_range) (R, &emin, &emax);
1752 		regres = (emin > emax * threshold)
1753 			? GO_REG_ok
1754 			: GO_REG_singular;
1755 
1756 		for (k = 0; k < m; k++) {
1757 			int i;
1758 			QUAD acc = SUFFIX(go_quad_zero);
1759 
1760 			/* b = AT e_k  */
1761 			for (i = 0; i < n; i++)
1762 				b[i] = qA->data[k][i];
1763 
1764 			/* Solve R^T b = AT e_k */
1765 			if (SUFFIX(go_quad_matrix_fwd_solve) (R, b, b, FALSE)) {
1766 				regres = GO_REG_singular;
1767 				break;
1768 			}
1769 
1770 			/* Solve R newb = b */
1771 			if (SUFFIX(go_quad_matrix_back_solve) (R, b, b, FALSE)) {
1772 				regres = GO_REG_singular;
1773 				break;
1774 			}
1775 
1776 			/* acc = (Ab)_k */
1777 			for (i = 0; i < n; i++) {
1778 				QUAD p;
1779 				SUFFIX(go_quad_mul) (&p, &qA->data[k][i], &b[i]);
1780 				SUFFIX(go_quad_add) (&acc, &acc, &p);
1781 			}
1782 
1783 			d[k] = SUFFIX(go_quad_value) (&acc);
1784 		}
1785 
1786 		g_free (b);
1787 		SUFFIX(go_quad_qr_free) (qr);
1788 	} else
1789 		regres = GO_REG_invalid_data;
1790 
1791 	SUFFIX(go_quad_matrix_free) (qA);
1792 	g_free (xscale);
1793 
1794 	SUFFIX(go_quad_end) (state);
1795 
1796 	return regres;
1797 }
1798 
1799 /*
1800  * Compute the pseudo-inverse of a matrix, see
1801  *     http://en.wikipedia.org/wiki/Moore%E2%80%93Penrose_pseudoinverse
1802  *
1803  * Think of this as A+ = (AT A)^-1 AT, except that the pseudo-inverse
1804  * is defined even if AT*A isn't invertible.
1805  *
1806  * Properties:
1807  *
1808  *     A  A+ A  = A
1809  *     A+ A  A+ = A+
1810  *     (A A+)^T = A A+
1811  *     (A+ A)^T = A+ A
1812  *
1813  * assuming threshold==0 and no rounding errors.
1814  */
1815 
1816 void
SUFFIX(go_matrix_pseudo_inverse)1817 SUFFIX(go_matrix_pseudo_inverse) (CONSTMATRIX A, int m, int n,
1818 				  DOUBLE threshold,
1819 				  MATRIX B)
1820 {
1821 	SUFFIX(GOQuadMatrix) *qA, *qZ;
1822 	void *state;
1823 
1824 	state = SUFFIX(go_quad_start) ();
1825 	qA = SUFFIX(quad_matrix_from_matrix) (A, m, n, NULL);
1826 	qZ = SUFFIX(go_quad_matrix_pseudo_inverse) (qA, threshold);
1827 	SUFFIX(go_quad_matrix_free) (qA);
1828 	if (qZ) {
1829 		SUFFIX(copy_quad_matrix_to_matrix) (B, qZ);
1830 		SUFFIX(go_quad_matrix_free) (qZ);
1831 	}
1832 	SUFFIX(go_quad_end) (state);
1833 }
1834