1 /*
2 * $Id: regress.i,v 1.1 2005-09-18 22:06:18 dhmunro Exp $
3 * linear regression and related statistical functions
4 * after Numerical Recipes (Press et. al.) chapter 14
5 */
6 /* Copyright (c) 2005, The Regents of the University of California.
7 * All rights reserved.
8 * This file is part of yorick (http://yorick.sourceforge.net).
9 * Read the accompanying LICENSE file for details.
10 */
11
12 func regress(y, x, &axes, &vals, &chi2, sigy=, rcond=)
13 /* DOCUMENT model = regress(y, [x1,x2,x3,...])
14 * or model = regress(y, [x1,x2,x3,...], axes, vals, chi2)
15 *
16 * Performs linear regression analysis, that is, a linear least squares
17 * fit of the parametric model
18 * y = model(1)*x1 + model(2)*x2 + model(3)*x3 + ...
19 * = model(+) * [x1,x2,x3,...](..,+)
20 * to the supplied data Y, [X1,X2,X3,...]. Y is an array, and the
21 * Xi arrays should have the same dimensions as Y, or more accurately
22 * [X1,X2,X3,...] must have one trailing dimension beyond the dimensions
23 * of Y; the length of the trailing dimension is the number of parameters
24 * in the model.
25 *
26 * Use the sigy= keyword to pass in the standard deviations of the Y
27 * values; sigy must be conformable with Y. The returned model has
28 * minimum chi2, where
29 * chi2 = sum( (y - model(+) * [x1,x2,x3,...](..,+))^2 / sigy^2 )
30 * The default sigy=1, that is, all points have equal weight in chi2.
31 *
32 * The optional AXES, VALS, and CHI2 arguments are outputs. AXES are
33 * the axes of the error ellipsoid of the fitted parameters, and VALS
34 * are the corresponding singular values. AXES(i,) is the vector in
35 * the space of model() corresponding to VALS(i). The VALS are
36 * non-negative and arranged in descending order. CHI2 is the
37 * chi2 value for the returned model, divided by the number of
38 * degrees of freedom in the fit, numberof(y)-sum(vals>rcond*vals(1)).
39 * AXES(i,) are mutually orthogonal unit vectors; you should arrange
40 * the magnitudes of X and Y so this makes sense - their scale factors
41 * may matter. Usually this means arranging that the magnitudes of
42 * the elements of the returned model not differ too wildly.
43 *
44 * Often and unexpectedly, the data do not permit a definitive choice
45 * of the model(); there may be entire subspaces of the model space
46 * which produce indistinguishably good fits to the data. The signature
47 * of this problem is that some of the VALS may be zero or very small.
48 * The rcond= keyword permits you to specify how small a singular
49 * value, relative to the largest, VALS(1), you are willing to consider.
50 * The AXES(i,) corresponding to VALS(i) smaller than rcond*VALS(1)
51 * will be given zero contribution to the returned model(). The default
52 * value is rcond=1.e-9. If any of the returned VALS is less than
53 * rcond*VALS(1) you should either change the scales of Y or some X,
54 * or remove some of the X (the ones which aren't contributing) to
55 * eliminate the problem. The regress function reverses the sign of
56 * any VALS which are less than rcond*VALS(1), so you can quickly
57 * identify them.
58 *
59 * Examples:
60 * ab = regress(y, [1,x]);
61 * ab(1)+ab(2)*x is best fit line to y(x)
62 * ab = regress(y, [1,x,x^2,x^3]);
63 * poly(x,ab(1),ab(2),ab(3)) is best fit cubic to y(x)
64 * ab = regress(y, [cos(x),sin(x)]);
65 * ab(1)*cos(x)+ab(2)*sin(x) is best fit period 2*pi sine wave to y(x)
66 *
67 * SEE ALSO: regress_cov
68 */
69 {
70 y = double(y(*));
71 x = double(x(*,));
72 dims = dimsof(x);
73 n = numberof(y);
74 if (dims(1)!=2 || dims(2)!=n) error, "bad x dimensions";
75 if (is_void(sigy)) sigy = 1. + 0.*y;
76 else sigy = 1./sigy(*);
77 if (is_void(rcond)) rcond = 1.e-9;
78 else rcond = min(max(rcond,1.e-13),0.5);
79 x *= sigy;
80 y *= sigy;
81 local u;
82 vals = SVdec(x, u, axes);
83 u = y(+) * u(+,);
84 mask = vals > rcond*vals(1);
85 np = sum(mask);
86 rvals = mask / (vals + !mask);
87 if (np < numberof(mask)) vals(where(!mask)) *= -1.;
88 u *= rvals;
89 model = u(+) * axes(+,);
90 chi2 = model(+)*x(,+) - y;
91 chi2 = sum(chi2*chi2) / max(n - np, 1);
92 return model;
93 }
94
regress_cov(axes,vals)95 func regress_cov(axes, vals)
96 /* DOCUMENT cov = regress_cov(axes, vals)
97 * or cov = chi2 * regress_cov(axes, vals)
98 *
99 * Return the covariance matrix for the model returned by regress,
100 * where AXES and VALS are the values returned by regress. This
101 * is a symmetric matrix representing
102 * covariance(model(i),model(j))
103 * that is, the variances (on the diagonal i=j) and correlations
104 * of the model parameters.
105 *
106 * If you did not specify the sigy= keyword in the original call to
107 * regress, then the absolute magnitudes of the covariance matrix
108 * elements do not mean much. In case you wish to use the quality
109 * of the fit itself as an estimate of the errors sigy in the
110 * original Y values, assuming they are all equal, you can multiply
111 * by the CHI2 returned by regress, as in the second form above,
112 * in order to produce an estimated error in the fit. See Numerical
113 * Recipes for caveats, but this is what commercial statistics
114 * software generally does.
115 *
116 * cov(1::numberof(vals)+1) are the variances of the model parameters
117 * themselves.
118 *
119 * SEE ALSO: regress
120 */
121 {
122 mask = vals > 0.;
123 axes *= mask / (vals + !mask);
124 return axes(+,) * axes(+,);
125 }
126