1 /* DataModeler.cpp
2 *
3 * Copyright (C) 2014-2021 David Weenink, 2017 Paul Boersma
4 *
5 * This code is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This code is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * ainteger with this work. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "DataModeler.h"
20 #include "NUM2.h"
21 #include "NUMmachar.h"
22 #include "SVD.h"
23 #include "Strings_extensions.h"
24 #include "Sound_and_LPC_robust.h"
25 #include "Table_extensions.h"
26
27 #include "oo_DESTROY.h"
28 #include "DataModeler_def.h"
29 #include "oo_COPY.h"
30 #include "DataModeler_def.h"
31 #include "oo_EQUAL.h"
32 #include "DataModeler_def.h"
33 #include "oo_CAN_WRITE_AS_ENCODING.h"
34 #include "DataModeler_def.h"
35 #include "oo_WRITE_TEXT.h"
36 #include "DataModeler_def.h"
37 #include "oo_WRITE_BINARY.h"
38 #include "DataModeler_def.h"
39 #include "oo_READ_TEXT.h"
40 #include "DataModeler_def.h"
41 #include "oo_READ_BINARY.h"
42 #include "DataModeler_def.h"
43 #include "oo_DESCRIPTION.h"
44 #include "DataModeler_def.h"
45
46 #include "enums_getText.h"
47 #include "DataModeler_enums.h"
48 #include "enums_getValue.h"
49 #include "DataModeler_enums.h"
50
51 Thing_implement (DataModeler, Function, 1);
52
v_info()53 void structDataModeler :: v_info () {
54 MelderInfo_writeLine (U" Time domain:");
55 MelderInfo_writeLine (U" Start time: ", xmin, U" seconds");
56 MelderInfo_writeLine (U" End time: ", xmax, U" seconds");
57 MelderInfo_writeLine (U" Total duration: ", xmax - xmin, U" seconds");
58 double ndf, rSquared = DataModeler_getCoefficientOfDetermination (this, nullptr, nullptr);
59 double probability, chisq = DataModeler_getChiSquaredQ (this, & probability, & ndf);
60 MelderInfo_writeLine (U" Fit:");
61 MelderInfo_writeLine (U" Number of data points: ", numberOfDataPoints);
62 MelderInfo_writeLine (U" Number of parameters: ", numberOfParameters);
63 MelderInfo_writeLine (U" Each data point has ", (weighData == kDataModelerWeights::EQUAL_WEIGHTS ? U" the same weight (estimated)." :
64 ( weighData == kDataModelerWeights::ONE_OVER_SIGMA ? U"a different weight (sigmaY)." :
65 ( weighData == kDataModelerWeights::RELATIVE_ ? U"a different relative weight (Y_value/sigmaY)." :
66 U"a different weight (SQRT(sigmaY))." ) ) ));
67 MelderInfo_writeLine (U" Chi squared: ", chisq);
68 MelderInfo_writeLine (U" Number of degrees of freedom: ", ndf);
69 MelderInfo_writeLine (U" Probability: ", probability);
70 MelderInfo_writeLine (U" R-squared: ", rSquared);
71 for (integer ipar = 1; ipar <= numberOfParameters; ipar ++) {
72 double sigma = ( parameters [ipar] .status == kDataModelerParameterStatus::FIXED_ ? 0 :
73 (isdefined (parameterCovariances -> data [ipar] [ipar]) ? sqrt (parameterCovariances -> data [ipar] [ipar]) : undefined) );
74 MelderInfo_writeLine (U" p [", ipar, U"] = ", parameters [ipar] .value, U"; sigma = ", sigma);
75 }
76 }
77
constant_evaluate(DataModeler,double,vector<structDataModelerParameter> p)78 static double constant_evaluate (DataModeler /* me */, double /* xin */, vector<structDataModelerParameter> p) {
79 return p [1].value;
80 }
81
constant_evaluateBasisFunctions(DataModeler me,double xin,VEC terms)82 static void constant_evaluateBasisFunctions (DataModeler me, double xin, VEC terms) {
83 terms <<= 1.0;
84 }
85
linear_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)86 static double linear_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
87 return undefined;
88 }
89
linear_evaluateBasisFunctions(DataModeler me,double xin,VEC terms)90 static void linear_evaluateBasisFunctions (DataModeler me, double xin, VEC terms) {
91 terms <<= undefined;
92 }
93
polynomial_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)94 static double polynomial_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
95 Melder_assert (p.size == my numberOfParameters);
96 /*
97 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
98 */
99 const double x = (2.0 * xin - my xmin - my xmax) / 2.0;
100 double xpi = 1.0, result = p [1] .value;
101 for (integer ipar = 2; ipar <= my numberOfParameters; ipar ++) {
102 xpi *= x;
103 result += p [ipar] .value * xpi;
104 }
105 return result;
106 }
107
polynome_evaluateBasisFunctions(DataModeler me,double xin,VEC term)108 static void polynome_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
109 Melder_assert (term.size == my numberOfParameters);
110 /*
111 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
112 */
113 const double x = (2.0 * xin - my xmin - my xmax) / 2.0;
114 term [1] = 1.0;
115 for (integer ipar = 2; ipar <= my numberOfParameters; ipar ++)
116 term [ipar] = term [ipar - 1] * x;
117 }
118
legendre_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)119 static double legendre_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
120 Melder_assert (p.size == my numberOfParameters);
121 /*
122 From domain [xmin, xmax] to domain [-1, 1]
123 */
124 const double x = (2.0 * xin - my xmin - my xmax) / (my xmax - my xmin);
125 double pti, ptim1, ptim2 = 1.0, result = p [1] .value;
126 if (my numberOfParameters > 1) {
127 const double twox = 2.0 * x;
128 double f2 = x, d = 1.0;
129 result += p [2] .value * (ptim1 = x);
130 for (integer ipar = 3; ipar <= my numberOfParameters; ipar ++) {
131 const double f1 = d ++;
132 f2 += twox;
133 result += p [ipar] .value * (pti = (f2 * ptim1 - f1 * ptim2) / d);
134 ptim2 = ptim1;
135 ptim1 = pti;
136 }
137 }
138 return result;
139 }
140
legendre_evaluateBasisFunctions(DataModeler me,double xin,VEC term)141 static void legendre_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
142 Melder_assert (term.size == my numberOfParameters);
143 term [1] = 1.0;
144 /*
145 transform x from domain [xmin, xmax] to domain [-1, 1]
146 */
147 const double x = (2.0 * xin - my xmin - my xmax) / (my xmax - my xmin);
148 if (my numberOfParameters > 1) {
149 const double twox = 2.0 * x;
150 double f2 = term [2] = x, d = 1.0;
151 for (integer ipar = 3; ipar <= my numberOfParameters; ipar ++) {
152 const double f1 = d ++;
153 f2 += twox;
154 term [ipar] = (f2 * term [ipar - 1] - f1 * term [ipar - 2]) / d;
155 }
156 }
157 }
158
sigmoid_plus_constant_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)159 static double sigmoid_plus_constant_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
160 Melder_assert (p.size == my numberOfParameters);
161 /*
162 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
163 No need to translate because xin and p [3] are in the same domain.
164 */
165 double result = p [1].value;
166 if (p [2].value != 0.0)
167 result += p [2].value / (1.0 + exp (- (xin - p [3].value) / p [4].value));
168 return result;
169 }
170
sigmoid_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)171 static double sigmoid_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
172 Melder_assert (p.size == my numberOfParameters);
173 /*
174 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
175 No need to translate because xin and p [3] are in the same domain.
176 */
177 const double result = p [1].value / (1.0 + exp (- (xin - p [2].value) / p [3].value));
178 return result;
179 }
180
exponential_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)181 static double exponential_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
182 Melder_assert (p.size == my numberOfParameters);
183 /*
184 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
185 */
186 const double x = xin - 0.5 * (my xmin + my xmax);
187 return p [1].value * exp (p [2].value * x);
188 }
189
exponential_plus_constant_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)190 static double exponential_plus_constant_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
191 Melder_assert (p.size >= 3);
192 /*
193 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
194 */
195 const double x = xin - 0.5 * (my xmin + my xmax);
196 return p [1].value + p [2].value * exp (p [3].value * x);
197 }
198
dummy_evaluateBasisFunctions(DataModeler me,double xin,VEC term)199 static void dummy_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
200 term <<= undefined;
201 }
202
DataModeler_solveDesign(DataModeler me,constMAT const & design,constVEC const & y,autoMAT * covariance)203 autoVEC DataModeler_solveDesign (DataModeler me, constMAT const& design, constVEC const& y, autoMAT *covariance) {
204 Melder_require (design.nrow == y.size,
205 U"The design matrix and the estimate should have the same number of rows.");
206 autoSVD svd = SVD_createFromGeneralMatrix (design);
207 if (! NUMfpp)
208 NUMmachar ();
209 SVD_zeroSmallSingularValues (svd.get(), ( my tolerance > 0.0 ? my tolerance : my numberOfDataPoints * NUMfpp -> eps ));
210 autoVEC solution = SVD_solve (svd.get(), y);
211 if (covariance) {
212 autoMAT covar = SVD_getSquared (svd.get(), true);
213 *covariance = covar.move();
214 }
215 return solution;
216 }
217
218 /*
219 Model: y [i] = a * exp (b * x [i]), i=1..n, solve for a, b.
220 log(y(x)= log(a) + b * x is linear model
221 Precondition: y [i] > 0 || y [i] < 0
222 */
exponential_fit(DataModeler me)223 static void exponential_fit (DataModeler me) {
224 if (my parameters [1].status == kDataModelerParameterStatus::FIXED_ && my parameters [2].status == kDataModelerParameterStatus::FIXED_)
225 return;
226 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
227 double ymin, ymax;
228 DataModeler_getExtremaY (me, & ymin, & ymax);
229 const double sign = ymin * ymax;
230 Melder_require (sign >= 0.0,
231 U"All data should have the same sign.");
232 const double xtr = 0.5 * (my xmin + my xmax);
233 if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
234 /*
235 Model: z(x) = b * x, where z(x) = log(y) - log (a)
236 A minimization of the squared error in the log domain gives greater weight to small values.
237 To compensate, we weigh with the y value. Weisstein, Eric W. "Least Squares Fitting--Exponential." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/LeastSquaresFittingExponential.html
238 */
239 autoMAT design = zero_MAT (my numberOfDataPoints, 1);
240 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
241 integer index = 0;
242 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
243 if (my data [k].status != kDataModelerData::INVALID) {
244 design [++ index] [1] = (my data [k].x - xtr) * weights [k] * my data [k].y;
245 yEstimate [index] = (log (my data [k].y) - log (my parameters [1].value)) * weights [k] * my data [k].y;
246 }
247 }
248 design.resize (index, 1);
249 yEstimate.resize (index);
250 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
251 my parameters [2].value = solution [1];
252 } else if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
253 /*
254 Model: y(x) = a * f(x), where f(x) = exp (b * x)
255 */
256 autoMAT design = zero_MAT (my numberOfDataPoints, 1);
257 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
258 integer index = 0;
259 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
260 if (my data [k].status != kDataModelerData::INVALID) {
261 design [++ index] [1] = exp (my parameters [2].value * (my data [k].x - xtr)) * weights [k];
262 yEstimate [index] = my data [k].y * weights [k];
263 }
264 }
265 design.resize (index, 1);
266 yEstimate.resize (index);
267 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
268 my parameters [1].value = solution [1];
269 } else {
270 /*
271 Model z(x)= a + b * x, where z(x) = log(y(x))
272 A minimization of the squared error in the log domain gives greater weight to small values.
273 To compensate we weigh with the y value.
274 */
275 autoMAT design = zero_MAT (my numberOfDataPoints, 2);
276 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
277 integer index = 0;
278 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
279 if (my data [k].status != kDataModelerData::INVALID) {
280 design [++ index] [1] = 1.0 * my data [k].y * weights [k];
281 design [index] [2] = (my data [k].x - xtr) * my data [k].y * weights [k];
282 yEstimate [index] = log ( sign >= 0.0 ? my data [k].y : - my data [k].y ) * my data [k].y * weights [k];
283 }
284 }
285 design.resize (index, 2);
286 yEstimate.resize (index);
287 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
288 const double a = exp (solution [1]);
289 my parameters [1].value = ( sign >= 0.0 ? a : - a );
290 my parameters [2].value = solution [2];
291 }
292 }
293
294 /*
295 Model: y [i] = constant + b * exp (c * x [i]), i=1..n, solve for constant, b & c.
296 Solution according to Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
297 pages 16-17.
298 Precondition: x [i] must be increasing.
299 */
300
linear_exponent_evaluateBasisFunctions(DataModeler me,double xin,VEC term)301 static void linear_exponent_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
302 Melder_assert (term.size >= 2); // our model is a two parameter one!
303 /*
304 From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
305 */
306 const double x = xin - 0.5 * (my xmin + my xmax);
307 term [1] = 1.0;
308 term [2] = exp (my parameters [3].value * x);
309 }
310
exponential_plus_constant_fit(DataModeler me)311 static void exponential_plus_constant_fit (DataModeler me) {
312 if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
313 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_ &&
314 my parameters [3].status == kDataModelerParameterStatus::FIXED_)
315 return;
316 /*
317 Model: z(x) = b * exp (c * x), where z(x) = y(x) - a.
318 */
319 autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::EXPONENTIAL);
320 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
321 if (thy data [k].status != kDataModelerData::INVALID) {
322 thy data [k].y -= my parameters [1].value;
323 }
324 }
325 DataModeler_fit (thee.get());
326 my parameters [2].value = thy parameters [1].value;
327 my parameters [3].value = thy parameters [2].value;
328 } else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
329 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
330 /*
331 Model: z(x)= a, where z(x) = y(x) - b * exp(c * x)
332 */
333 autoDataModeler thee = DataModeler_createFromDataModeler (me, 1, kDataModelerFunction::LINEAR);
334 thy f_evaluate = constant_evaluate;
335 thy f_evaluateBasisFunctions = constant_evaluateBasisFunctions;
336 my parameters [1].value = 0.0;
337 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
338 if (thy data [k].status != kDataModelerData::INVALID) {
339 thy data [k].y -= my f_evaluate (me, thy data [k].x, my parameters.get());// z(x) = y(x) - b * exp(c * x)
340 }
341 }
342 DataModeler_fit (thee.get());
343 my parameters [1].value = thy parameters [1].value;
344 } else {
345 /*
346 Model: z(x) = a + b * f(x), where f(x) = exp (c * x).
347 Fit as linear model with the third parameter fixed!
348 */
349 autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::LINEAR);
350 thy parameters.resize (thy numberOfParameters + 1);
351 thy parameters [thy numberOfParameters + 1].value = my parameters [3].value;
352 thy parameters [thy numberOfParameters + 1].status = kDataModelerParameterStatus::FIXED_EXTRA;
353 thy f_evaluate = exponential_plus_constant_evaluate;
354 thy f_evaluateBasisFunctions = linear_exponent_evaluateBasisFunctions;
355 DataModeler_fit (thee.get());
356 my parameters [1].value = thy parameters [1].value;
357 my parameters [2].value = thy parameters [2].value;
358 }
359 } else {
360 /*
361 First we determine c.
362 Model: z(x) = A * f1(x) + B * f2(x), where z(x) = y(x) - y [1], f1(x) = x - x [1], f2(x) = integral (x1, x, y(x)dx)
363 A = - a * c, B = c
364 */
365 autoMAT design = zero_MAT (my numberOfDataPoints, 2);
366 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
367 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
368 const double x1 = my data [1].x, y1 = my data [1].y;
369 long double xkm1 = x1, ykm1 = y1, sk = 0.0;
370 /*
371 First row of design has only zero's, skip it.
372 */
373 integer index = 0;
374 for (integer k = 2; k <= my numberOfDataPoints; k ++) {
375 if (my data [k] .status != kDataModelerData::INVALID) {
376 const long double xk = my data [k].x, yk = my data [k].y;
377 sk += 0.5 * (yk + ykm1) * (xk - xkm1); // Jacquelin, Eq. (7)
378 design [++ index] [1] = (xk - x1) * weights [k]; // Jacquelin, Eq. (9)
379 design [index] [2] = sk * weights [k];
380 yEstimate [index] = (yk - y1) * weights [k];
381 xkm1 = xk;
382 ykm1 = yk;
383 }
384 }
385 design.resize (index, 2);
386 yEstimate.resize (index);
387 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
388 const double c = solution [2];
389 my parameters [3].value = c;
390 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
391 /*
392 Model: z(x)= a, where z(x) = y(x) - b * exp(c * x)
393 */
394 autoDataModeler thee = DataModeler_createFromDataModeler (me, 1, kDataModelerFunction::LINEAR);
395 thy f_evaluate = constant_evaluate;
396 thy f_evaluateBasisFunctions = constant_evaluateBasisFunctions;
397 my parameters [1].value = 0.0;
398 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
399 if (thy data [k].status != kDataModelerData::INVALID) {
400 thy data [k].y -= my f_evaluate (me, thy data [k].x, my parameters.get());// z(x) = y(x) - b * exp(c * x)
401 }
402 }
403 DataModeler_fit (thee.get());
404 my parameters [1].value = thy parameters [1].value;
405 } else {
406 /*
407 As if c were fixed
408 Model: y(x) = a + b * f(x), where f(x) = exp (c * x).
409 */
410 autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::LINEAR);
411 thy parameters.resize (thy numberOfParameters + 1);
412 thy parameters [thy numberOfParameters + 1].value = c;
413 thy parameters [thy numberOfParameters + 1].status = kDataModelerParameterStatus::FIXED_EXTRA;
414 thy f_evaluate = exponential_plus_constant_evaluate;
415 thy f_evaluateBasisFunctions = linear_exponent_evaluateBasisFunctions;
416 DataModeler_fit (thee.get());
417 my parameters [1].value = thy parameters [1].value;
418 my parameters [2].value = thy parameters [2].value;
419 my parameters [3].value = c;
420 }
421 }
422 /*
423 error propagation ?
424 */
425 my parameterCovariances -> data.get() <<= undefined;
426 }
427
modelLinearTrendWithSigmoid(DataModeler me,double * out_lambda,double * out_sigma)428 static void modelLinearTrendWithSigmoid (DataModeler me, double *out_lambda, double *out_sigma) {
429 /*
430 Set mu in the middle of the interval and make lambda = 2 * ymean.
431 Calculate sigma such that the model goes through (xmin,model(xmin)) and (xmax, model(xmax))
432 deltaY = y(xmax) - y(xmin) = lambda / (1 + exp (- (xmax - mu) / sigma)) - lambda / (1 + exp (- (xmin - mu) / sigma))
433 Then sigma = 0.5 *(xmax - xmin) / ln ((lambda + deltaY) / (lambda - deltaY)))
434 */
435 autoDataModeler thee = DataModeler_createFromDataModeler(me, 2, kDataModelerFunction::POLYNOME);
436 DataModeler_fit (thee.get());
437 const double lambda = 2.0 * thy parameters [1].value;
438 const double yAtXmin = DataModeler_getModelValueAtX (thee.get(), thy xmin);
439 const double yAtXmax = DataModeler_getModelValueAtX (thee.get(), thy xmax);
440 const double deltaY = yAtXmax - yAtXmin, deltaX = my xmax - my xmin;
441 const double sigma = 0.5 * deltaX / log ((lambda + deltaY) / (lambda - deltaY));
442 if (out_lambda)
443 *out_lambda = lambda;
444 if (out_sigma)
445 *out_sigma = sigma;
446 }
447
448 /*
449 Function: y(x) = b / (1 + exp (- (x - mu) / sigma))
450 Model: z(x) = A * f1(x) + b * f2(x) + C * f3(x), where
451 z (x)= y (x) * ln (y (x)), f1 (x) = integral (0, x, y(x)dx), f2 (x) = x * y (x), f3 = y (x),
452 A = -1 / (lambda * sigma), B = 1 / sigma, C = ln (lambda) - ln (1 + exp ((mu - x1) / sigma))
453 Non-iterative solution according to Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
454 pages 16-17.
455 Precondition: x [i] must be increasing.
456 */
sigmoid_fit(DataModeler me)457 void sigmoid_fit (DataModeler me) {
458 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
459 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
460 double lambda = my parameters [1].value;
461 double mu = my parameters [2].value;
462 double sigma = my parameters [3].value;
463 if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
464 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
465 if (my parameters [3].status == kDataModelerParameterStatus::FIXED_)
466 return;
467 /*
468 Model: z(X)*ln(z(X) = A * f5 (X) + B * f6 (X), where f5 (X) = z (X) * X - z (X) * integral (x [1], X, z(x)dx)),
469 f6 (X) = y (X), z(X) = y (X) / lambda and X [k] = x [k] - mu - x1
470 A = 1 / sigma, B = ln (y (x1))
471 */
472 autoMAT design = zero_MAT (my numberOfDataPoints, 2);
473 long double sk = 0.0, x1 = my data [1].x ; // no need to subtract mu!
474 double xkm1 = 0.0, ykm1 = 0.0;
475 integer index = 0;
476 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
477 if (my data [k] .status != kDataModelerData::INVALID) {
478 const long double xk = my data [k].x, yk = my data [k].y / lambda;
479 sk += 0.5 * (yk + ykm1) * (xk - xkm1); // invariant under translations in x
480 const double f1x = yk * sk;
481 const double f2x = (xk - mu - x1) * yk; // X [k]
482 design [++ index] [1] = (f2x - f1x) * weights [k];
483 design [index] [2] = yk * weights [k];
484 yEstimate [index] = yk * log (yk) * weights [k];
485 xkm1 = xk;
486 ykm1 = yk;
487 }
488 }
489 design.resize (index, 2);
490 yEstimate.resize (index);
491 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
492 sigma = 1.0 /solution [1];
493 } else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
494 /*
495 Model: z(X)*ln(z(X) = C * f3 (X), where z(X)= y(X)/lambda + f1 (X) / sigma - f2 (X) / sigma, X [l] = x [k] - x1
496 */
497 autoMAT design = zero_MAT (my numberOfDataPoints, 1);
498 long double sk = 0.0, x1 = my data [1].x;
499 long double xkm1 = 0.0, ykm1 = 0.0;
500 integer index = 0;
501 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
502 if (my data [k] .status != kDataModelerData::INVALID) {
503 const long double xk = my data [k].x, yk = my data [k].y / lambda;
504 sk += 0.5 * (yk + ykm1) * (xk - xkm1); // xk - xkm1 == X [k] - X [k-1]
505 const double f1x = yk * sk;
506 const double f2x = (xk - x1) * yk;
507 design [++ index] [1] = yk * weights [k];
508 yEstimate [index] = (yk * log (yk) + f1x / sigma - f2x / sigma) * weights [k];
509 xkm1 = xk;
510 ykm1 = yk;
511 }
512 }
513 design.resize (index, 1);
514 yEstimate.resize (index);
515 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
516 const double lnarg = exp (-solution [1]) - 1.0;
517 if (lnarg > 0)
518 mu = x1 + sigma * log (lnarg);
519 else
520 mu = undefined;
521 } else {
522 /*
523 Model: z*ln(z) = A * f4(X) + B * f3 (X), where z(X) = y(X) / lambda, f4(X)= -f1(X) + f2(X), f3 (X) = z(X), A = 1/sigma, B = - ln (1+exp(-(mu - x1)/sigma)) and X [k] = x [k] - x [1].
524 */
525 autoMAT design = zero_MAT (my numberOfDataPoints, 2);
526 long double sk = 0.0, x1 = my data [1].x;
527 long double xkm1 = 0.0, ykm1 = 0.0;
528 integer index = 0;
529 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
530 if (my data [k] .status != kDataModelerData::INVALID) {
531 const long double xk = my data [k].x, yk = my data [k].y / lambda;
532 sk += 0.5 * (yk + ykm1) * (xk - xkm1);
533 const double f1x = yk * sk;
534 const double f2x = (xk - x1) * yk;
535 design [++ index] [1] = (f2x - f1x) * weights [k];
536 design [index] [2] = yk * weights [k]; // f3
537 yEstimate [index] = yk * log (yk) * weights [k];
538 xkm1 = xk;
539 ykm1 = yk;
540 }
541 }
542 design.resize (index, 2);
543 yEstimate.resize (index);
544 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
545 sigma = 1.0 / solution [1];
546 const double lnarg = exp (-solution [2]) - 1.0;
547 if (lnarg > 0)
548 mu = x1 + sigma * log (lnarg);
549 else
550 mu = undefined;
551 }
552 } else if (my parameters [2].status == kDataModelerParameterStatus::FIXED_ &&
553 my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
554 /*
555 Model: y(x) = E * f4 (x), where f4(x) = 1 /(1 + exp (- (x - mu) / sigma))
556 */
557 autoMAT design = zero_MAT (my numberOfDataPoints, 1);
558 integer index = 0;
559 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
560 if (my data [k] .status != kDataModelerData::INVALID) {
561 const double yk = my data [k].y, xk = my data [k].x;
562 const double f4x = 1.0 / (1.0 + exp (- (xk - mu) / sigma));
563 design [++ index] [1] = f4x * weights [k];
564 yEstimate [index] = yk * weights [k];
565 }
566 }
567 design.resize (index, 1);
568 yEstimate.resize (index);
569 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
570 lambda = solution [1];
571 } else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
572 /*
573 Model: z(x) = A * f1 (x) + C * f3(x), where z(x) = y(x)*ln(y(x)) - f2 (x) / sigma
574 */
575 autoMAT design = zero_MAT (my numberOfDataPoints, 2);
576 double sk = 0.0, x1 = my data [1].x;
577 double xkm1 = 0.0, ykm1 = 0.0;
578 integer index = 0;
579 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
580 if (my data [k] .status != kDataModelerData::INVALID) {
581 const double xk = my data [k].x - x1, yk = my data [k].y;
582 const double f2x = xk * yk;
583 sk += 0.5 * (yk + ykm1) * (xk - xkm1);
584 const double f1x = yk * sk;
585 design [++ index] [1] = f1x * weights [k];
586 design [index] [2] = yk * weights [k];
587 yEstimate [index] = (yk * log (yk) - f2x / sigma) * weights [k];
588 xkm1 = xk;
589 ykm1 = yk;
590 }
591 }
592 design.resize (index, 2);
593 yEstimate.resize (index);
594 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
595 sigma = my parameters [3].value;
596 lambda = -1.0 / (solution [1] * sigma);
597 const double lnarg = lambda * exp (-solution [2]) - 1.0;
598 if (lnarg > 0)
599 mu = x1 + sigma * log (lnarg);
600 else
601 mu = undefined;
602 } else {
603 /*
604 Model: z(x) = A * f1 (x) + B * f2(x) + C * f3(x), where z(x) = y(x)*ln(y(x))
605 */
606 autoMAT design = zero_MAT (my numberOfDataPoints, 3);
607 long double sk = 0.0, x1 = my data [1].x;
608 long double xkm1 = 0.0, ykm1 = 0.0;
609 integer index = 0;
610 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
611 if (my data [k] .status != kDataModelerData::INVALID) {
612 const long double xk = my data [k].x, yk = my data [k].y;
613 sk += 0.5 * (yk + ykm1) * (xk - xkm1);
614 const double f1x = yk * sk;
615 const double f2x = (xk - x1) * yk;
616 design [++ index] [1] = f1x * weights [k];
617 design [index] [2] = f2x * weights [k];
618 design [index] [3] = yk * weights [k];
619 yEstimate [index] = yk * log (yk) * weights [k];
620 xkm1 = xk;
621 ykm1 = yk;
622 }
623 }
624 design.resize (index, 3);
625 yEstimate.resize (index);
626 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
627 sigma = 1.0 / solution [2];
628 lambda = - solution [2] / solution [1];
629 if (my parameters [2].status != kDataModelerParameterStatus::FIXED_) {
630 my parameters [1].value = lambda;
631 const double lnarg = lambda * exp (-solution [3]) - 1.0;
632 if (lnarg > 0)
633 mu = x1 + sigma * log (lnarg);
634 else {
635 modelLinearTrendWithSigmoid (me, & lambda, & sigma);
636 mu = 0.5 * (my xmin + my xmax);
637 }
638 }
639 }
640 my parameters [1].value = lambda;
641 my parameters [2].value = mu;
642 my parameters [3].value = sigma;
643 /*
644 error propagation ?
645 */
646 my parameterCovariances -> data.get() <<= undefined;
647 }
648
649 /*
650 Model: y(x) = gamma + lambda / (1 + exp (- (x - mu) / sigma))
651 Solution according to Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
652 pages 38 and following.
653 The author makes a mistake in the derivation of the relation between the a,b,c,d and
654 gamma, lambda, mu and sigma of the model.
655 lambda = ± 1/s * sqrt (b^2-4ac) (the artile wrongly shows "sqrt (b^2+4ac)")
656 gamma = (−b ∓ sqrt (b^2 − 4ac))/ (2*a)
657 sigma = ∓ 1 / sqrt (b^2 − 4ac)
658 mu = x [1]+ sigma * ln (lambda/(d-gamma) - 1)
659 Two models are indistinguishable if:
660 gamma' = gamma + lambda
661 sigma' = -sigma
662 lambda' = - lambda
663 Precondition: x [i] are increasing order.
664 */
sigmoid_plus_constant_fit(DataModeler me)665 void sigmoid_plus_constant_fit (DataModeler me) {
666 double gamma = my parameters [1].value, lambda = my parameters [2].value;
667 double mu = my parameters [3].value, sigma = my parameters [4].value;
668 if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
669 /*
670 Model z(x) = lambda / (1 + exp (- (x - mu) / sigma)) where z(x) = y(x) - gamma.
671 */
672 autoDataModeler thee = DataModeler_createFromDataModeler (me, 3, kDataModelerFunction::SIGMOID);
673 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
674 if (my data [k] .status != kDataModelerData::INVALID) {
675 thy data [k].y -= gamma;
676 }
677 }
678 DataModeler_fit (thee.get());
679 lambda = thy parameters [1].value;
680 mu = thy parameters [2].value;
681 sigma = thy parameters [3].value;
682 } else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_ &&
683 my parameters [4].status == kDataModelerParameterStatus::FIXED_) {
684 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
685 /*
686 Model: z(x) = gamma, where z(x) = y(x) - lambda / (1 + exp (- (x - mu)/ sigma))
687 */
688 autoMAT design = raw_MAT (my numberOfDataPoints, 1);
689 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
690 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
691 integer index = 0;
692 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
693 if (my data [k] .status != kDataModelerData::INVALID) {
694 const long double xk = my data [k].x, yk = my data [k].y;
695 const double fx = lambda / (1 + exp (- (xk - mu) / sigma));
696 design [++ index] [1] = 1.0 * weights [k];
697 yEstimate [index] = (yk - fx) * weights [k];
698 }
699 }
700 design.resize (index, 1);
701 yEstimate.resize (index);
702 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
703 gamma = solution [1];
704 } else {
705 /*
706 Model: y(x) = gamma + lambda * f(x), where f(x) = 1 / (1 + exp (-(x -mu)/sigma))
707 */
708 autoMAT design = raw_MAT (my numberOfDataPoints, 2);
709 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
710 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
711 integer index = 0;
712 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
713 if (my data [k] .status != kDataModelerData::INVALID) {
714 const long double xk = my data [k].x, yk = my data [k].y;
715 const double fx = 1.0 / (1 + exp (- (xk - mu) / sigma));
716 design [++ index] [1] = 1.0 * weights [k];
717 design [index] [2] = fx * weights [k];
718 yEstimate [index] = yk * weights [k];
719 }
720 }
721 design.resize (index, 2);
722 yEstimate.resize (index);
723 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
724 gamma = solution [1];
725 lambda = solution [2];
726 }
727 } else {
728 autoMAT design = raw_MAT (my numberOfDataPoints, 4);
729 autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
730 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
731 long double s1k = 0.0, s2k = 0.0, x1 = my data [1].x, xkm1 = x1, ykm1 = 0.0;
732 integer index = 0;
733 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
734 if (my data [k] .status != kDataModelerData::INVALID) {
735 const long double xk = my data [k].x, yk = my data [k].y;
736 s1k += 0.5 * (yk + ykm1) * (xk - xkm1);
737 s2k += 0.5 * (yk * yk + ykm1 * ykm1) * (xk - xkm1);
738 design [++ index] [1] = s2k * weights [k];
739 design [index] [2] = s1k * weights [k];
740 design [index] [3] = (xk - x1) * weights [k];
741 design [index] [4] = 1.0 * weights [k];
742 yEstimate [index] = yk * weights [k];
743 xkm1 = xk;
744 ykm1 = yk;
745 }
746 }
747 design.resize (index, 4);
748 yEstimate.resize (index);
749 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
750 const double a = solution [1], b = solution [2];
751 const double c = solution [3], d = solution [4];
752 auto setMu = [&, d, x1] () -> double {
753 mu = undefined;
754 const double lnarg = lambda / (d - gamma) - 1.0;
755 if (lnarg > 0.0)
756 mu = x1 + sigma * log (lnarg);
757 return mu;
758 };
759 if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
760 sigma = - 1.0 / (a * lambda);
761 gamma = 0.5 * lambda * (sigma * b - 1.0);
762 if (my parameters [3].status != kDataModelerParameterStatus::FIXED_)
763 setMu ();
764 } if (my parameters [4].status == kDataModelerParameterStatus::FIXED_) {
765 lambda = - 1.0 / (a * sigma);
766 gamma = 0.5 * lambda * (sigma * b - 1.0);
767 setMu ();
768 } else {
769 const double dissq = b * b - 4.0 * a * c;
770 bool modelFitIsBad = true;
771 if (dissq > 0.0) {
772 const double dis = sqrt (dissq);
773 modelFitIsBad = false;
774 lambda = dis / a;
775 gamma = (-b - dis) / (2.0 * a);
776 sigma = - 1.0 / dis;
777 if (my parameters [3].status != kDataModelerParameterStatus::FIXED_) {
778 if (! isdefined (setMu ())) {
779 lambda = -dis / a;
780 gamma = (-b + dis) / (2.0 * a);
781 sigma = 1.0 / dis;
782 if (! isdefined (setMu ()))
783 modelFitIsBad = true;
784 }
785 if (DataModeler_getCoefficientOfDetermination (me, nullptr, nullptr) < 0.0) // model fit is bad!
786 modelFitIsBad = true;
787 }
788 }
789 if (modelFitIsBad) {
790 modelLinearTrendWithSigmoid (me, & lambda, & sigma);
791 gamma = 0.0;
792 mu = 0.5 * (my xmin + my xmax);
793 my parameters [3].value = mu;
794 }
795 }
796 }
797 my parameters [1].value = gamma;
798 my parameters [2].value = lambda;
799 my parameters [3].value = mu;
800 my parameters [4].value = sigma;
801 /*
802 error propagation ?
803 */
804 my parameterCovariances -> data.get() <<= undefined;
805 }
806
series_fit(DataModeler me)807 void series_fit (DataModeler me) {
808 try {
809 /*
810 Count the number of free parameters to be fitted
811 */
812 const integer numberOfFreeParameters = DataModeler_getNumberOfFreeParameters (me);
813 if (numberOfFreeParameters == 0)
814 return;
815 const integer numberOfValidDataPoints = DataModeler_getNumberOfValidDataPoints (me);
816 if (numberOfValidDataPoints - numberOfFreeParameters < 0)
817 return;
818 autoVEC yEstimate = zero_VEC (numberOfValidDataPoints);
819 autoVEC term = zero_VEC (my numberOfParameters);
820 autovector<structDataModelerParameter> fixedParameters = newvectorcopy (my parameters.all());
821 autoMAT design = zero_MAT (numberOfValidDataPoints, numberOfFreeParameters);
822 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
823 /*
824 For function evaluation with only the FIXED parameters
825 */
826 for (integer ipar = 1; ipar <= my parameters.size; ipar ++)
827 if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_)
828 fixedParameters [ipar].value = 0.0;
829
830 /*
831 We solve for the parameters p by minimizing the chi-squared function:
832 chiSquared = sum (i=1...n, (y [i] - sum (k=1..m, p [k] X [k] (x [i])) / sigma [i] ),
833 where n is the 'numberOfValidDataPoints', m is the 'numberOfFreeParameters',
834 - x [i] and y [i] are the i-th datapoint x and y values, respectively,
835 - sum (k=1..m, p [k] X [k] (x [i]) is the model estimation at x [i],
836 - X [k] (x [i]) is the k-th function term evaluated at x [i],
837 - and y [i] has been measured with some uncertainty sigma [i].
838 If we define the design matrix matrix A [i] [j] = X [j] (x [i]) / sigma [i] and
839 the vector b [i] = y [i] / sigma [i], the problem can be stated as
840 minimize the norm ||A.p - b|| for p.
841 This problem can be solved by SVD.
842 */
843 integer idata = 1;
844 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
845 if (my data [ipoint] .status != kDataModelerData::INVALID) {
846 const double xi = my data [ipoint].x, yi = my data [ipoint].y;
847 const double yFixed = my f_evaluate (me, xi, fixedParameters.get());
848 // individual terms of the function
849 my f_evaluateBasisFunctions (me, xi, term.get());
850 for (integer icol = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++)
851 if (my parameters [ipar] .status == kDataModelerParameterStatus::FREE)
852 design [idata] [icol ++] = term [ipar] * weights [ipoint];
853 /*
854 Only 'residual variance' must be explained by the model
855 */
856 yEstimate [idata ++] = (yi - yFixed) * weights [ipoint];
857 }
858 }
859 autoMAT covar;
860 autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), & covar);
861 /*
862 Put the calculated parameters at the correct position in 'my parameters'
863 */
864 Covariance cov = my parameterCovariances.get();
865 for (integer kpar = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++) {
866 if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_)
867 my parameters [ipar] .value = solution [kpar ++];
868 cov -> centroid [ipar] = my parameters [ipar] .value;
869 }
870 cov -> numberOfObservations = numberOfValidDataPoints;
871 /*
872 Estimate covariances between parameters
873 */
874 if (numberOfFreeParameters < my numberOfParameters) {
875 cov -> data.all() <<= 0.0; // set fixed parameters variances and covariances to zero
876 for (integer irow = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++) {
877 if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
878 for (integer icol = 1, jpar = 1; jpar <= my numberOfParameters; jpar ++) {
879 if (my parameters [jpar] .status != kDataModelerParameterStatus::FIXED_)
880 cov -> data [ipar] [jpar] = covar [irow] [icol ++];
881 }
882 irow ++;
883 }
884 }
885 } else {
886 my parameterCovariances -> data = covar.move();
887 }
888 } catch (MelderError) {
889 Melder_throw (U"DataModeler no fit.");
890 }
891 }
892
chisqFromZScores(VEC zscores,double * out_chisq,integer * out_numberOfValidZScores)893 static void chisqFromZScores (VEC zscores, double *out_chisq, integer *out_numberOfValidZScores) {
894 double chisq = 0.0;
895 integer numberOfValidZScores = 0;
896 for (integer ipoint = 1; ipoint <= zscores.size; ipoint ++) {
897 if (isdefined (zscores [ipoint])) {
898 chisq += zscores [ipoint] * zscores [ipoint];
899 numberOfValidZScores ++;
900 }
901 }
902 if (out_chisq)
903 *out_chisq = chisq;
904 if (out_numberOfValidZScores)
905 *out_numberOfValidZScores = numberOfValidZScores;
906 }
907
DataModeler_getModelValueAtX(DataModeler me,double x)908 double DataModeler_getModelValueAtX (DataModeler me, double x) {
909 double f = undefined;
910 if (x >= my xmin && x <= my xmax)
911 f = my f_evaluate (me, x, my parameters.get());
912 return f;
913 }
914
DataModeler_getModelValueAtIndex(DataModeler me,integer index)915 double DataModeler_getModelValueAtIndex (DataModeler me, integer index) {
916 double f = undefined;
917 if (index > 0 && index <= my numberOfDataPoints)
918 f = my f_evaluate (me, my data [index] .x, my parameters.get());
919 return f;
920 }
921
DataModeler_getExtremaY(DataModeler me,double * out_ymin,double * out_ymax)922 void DataModeler_getExtremaY (DataModeler me, double *out_ymin, double *out_ymax) {
923 MelderExtremaWithInit extrema;
924 for (integer i = 1; i <= my numberOfDataPoints; i++)
925 if (my data [i] .status != kDataModelerData::INVALID)
926 extrema.update (my data [i] .y);
927
928 if (out_ymin)
929 *out_ymin = extrema.min;
930 if (out_ymax)
931 *out_ymax = extrema.max;
932 }
933
DataModeler_getDataPointYValue(DataModeler me,integer index)934 double DataModeler_getDataPointYValue (DataModeler me, integer index) {
935 double value = undefined;
936 if (index > 0 && index <= my numberOfDataPoints && my data [index] .status != kDataModelerData::INVALID)
937 value = my data [index] .y;
938 return value;
939 }
940
DataModeler_getDataPointXValue(DataModeler me,integer index)941 double DataModeler_getDataPointXValue (DataModeler me, integer index) {
942 double value = undefined;
943 if (index > 0 && index <= my numberOfDataPoints && my data [index] .status != kDataModelerData::INVALID)
944 value = my data [index] .x;
945 return value;
946 }
947
DataModeler_setDataPointYValue(DataModeler me,integer index,double value)948 void DataModeler_setDataPointYValue (DataModeler me, integer index, double value) {
949 if (index > 0 && index <= my numberOfDataPoints)
950 my data [index] .y = value;
951 }
952
DataModeler_setDataPointXValue(DataModeler me,integer index,double value)953 void DataModeler_setDataPointXValue (DataModeler me, integer index, double value) {
954 if (index > 0 && index <= my numberOfDataPoints)
955 my data [index] .x = value;
956 }
957
DataModeler_setDataPointValues(DataModeler me,integer index,double xvalue,double yvalue)958 void DataModeler_setDataPointValues (DataModeler me, integer index, double xvalue, double yvalue) {
959 if (index > 0 && index <= my numberOfDataPoints) {
960 my data [index] .x = xvalue;
961 my data [index] .y = yvalue;
962 }
963 }
964
DataModeler_setDataPointYSigma(DataModeler me,integer index,double sigma)965 void DataModeler_setDataPointYSigma (DataModeler me, integer index, double sigma) {
966 if (index > 0 && index <= my numberOfDataPoints)
967 my data [index] .sigmaY = sigma;
968 }
969
DataModeler_getDataPointYSigma(DataModeler me,integer index)970 double DataModeler_getDataPointYSigma (DataModeler me, integer index) {
971 double sigma = undefined;
972 if (index > 0 && index <= my numberOfDataPoints)
973 sigma = my data [index] .sigmaY;
974 return sigma;
975 }
976
DataModeler_getDataPointStatus(DataModeler me,integer index)977 kDataModelerData DataModeler_getDataPointStatus (DataModeler me, integer index) {
978 kDataModelerData value = kDataModelerData::INVALID;
979 if (index > 0 && index <= my numberOfDataPoints)
980 value = my data [index] .status;
981 return value;
982 }
983
DataModeler_setDataPointStatus(DataModeler me,integer index,kDataModelerData status)984 void DataModeler_setDataPointStatus (DataModeler me, integer index, kDataModelerData status) {
985 if (index > 0 && index <= my numberOfDataPoints) {
986 if (status == kDataModelerData::VALID && isundef (my data [index] .y))
987 Melder_throw (U"Your data value is undefined. First set the value and then its status.");
988 my data [index] .status = status;
989 }
990 }
991
DataModeler_setDataPointValueAndStatus(DataModeler me,integer index,double value,kDataModelerData dataStatus)992 void DataModeler_setDataPointValueAndStatus (DataModeler me, integer index, double value, kDataModelerData dataStatus) {
993 if (index > 0 && index <= my numberOfDataPoints) {
994 my data [index] .y = value;
995 my data [index] .status = dataStatus;
996 }
997 }
998
DataModeler_setParameterValue(DataModeler me,integer index,double value,kDataModelerParameterStatus status)999 void DataModeler_setParameterValue (DataModeler me, integer index, double value, kDataModelerParameterStatus status) {
1000 if (index > 0 && index <= my numberOfParameters) {
1001 my parameters [index] .value = value;
1002 my parameters [index] .status = status;
1003 }
1004 }
1005
DataModeler_setParameterValueFixed(DataModeler me,integer index,double value)1006 void DataModeler_setParameterValueFixed (DataModeler me, integer index, double value) {
1007 Melder_require (my type == kDataModelerFunction::POLYNOME || my type == kDataModelerFunction::LEGENDRE,
1008 U"This would change the model type, which is not possible yet.");
1009 DataModeler_setParameterValue (me, index, value, kDataModelerParameterStatus::FIXED_);
1010 }
1011
DataModeler_getParameterValue(DataModeler me,integer index)1012 double DataModeler_getParameterValue (DataModeler me, integer index) {
1013 double value = undefined;
1014 if (index > 0 && index <= my numberOfParameters)
1015 value = my parameters [index] .value;
1016 return value;
1017 }
1018
DataModeler_listParameterValues(DataModeler me)1019 autoVEC DataModeler_listParameterValues (DataModeler me) {
1020 autoVEC result = raw_VEC (my numberOfParameters);
1021 for (integer k = 1; k <= my numberOfParameters; k ++)
1022 result [k] = my parameters [k] .value;
1023 return result;
1024 }
1025
DataModeler_getParameterStatus(DataModeler me,integer index)1026 kDataModelerParameterStatus DataModeler_getParameterStatus (DataModeler me, integer index) {
1027 kDataModelerParameterStatus status = kDataModelerParameterStatus::UNDEFINED;
1028 if (index > 0 && index <= my numberOfParameters)
1029 status = my parameters [index] .status;
1030 return status;
1031 }
1032
DataModeler_getParameterStandardDeviation(DataModeler me,integer index)1033 double DataModeler_getParameterStandardDeviation (DataModeler me, integer index) {
1034 double stdev = undefined;
1035 if (index > 0 && index <= my numberOfParameters)
1036 stdev = sqrt (my parameterCovariances -> data [index] [index]);
1037 return stdev;
1038 }
1039
DataModeler_getVarianceOfParameters(DataModeler me,integer fromIndex,integer toIndex,integer * out_numberOfFreeParameters)1040 double DataModeler_getVarianceOfParameters (DataModeler me, integer fromIndex, integer toIndex, integer *out_numberOfFreeParameters) {
1041 double variance = undefined;
1042 getAutoNaturalNumbersWithinRange (& fromIndex, & toIndex, my numberOfParameters, U"parameter");
1043 integer numberOfFreeParameters = 0;
1044 variance = 0;
1045 for (integer ipar = fromIndex; ipar <= toIndex; ipar ++) {
1046 if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
1047 variance += my parameterCovariances -> data [ipar] [ipar];
1048 numberOfFreeParameters ++;
1049 }
1050 }
1051 if (out_numberOfFreeParameters)
1052 *out_numberOfFreeParameters = numberOfFreeParameters;
1053 return variance;
1054 }
1055
DataModeler_setParametersFree(DataModeler me,integer fromIndex,integer toIndex)1056 void DataModeler_setParametersFree (DataModeler me, integer fromIndex, integer toIndex) {
1057 getAutoNaturalNumbersWithinRange (& fromIndex, & toIndex, my numberOfParameters, U"parameter");
1058 for (integer ipar = fromIndex; ipar <= toIndex; ipar ++)
1059 my parameters [ipar] .status = kDataModelerParameterStatus::FREE;
1060 }
1061
DataModeler_setParameterValuesToZero(DataModeler me,double numberOfSigmas)1062 void DataModeler_setParameterValuesToZero (DataModeler me, double numberOfSigmas) {
1063 integer numberOfChangedParameters = 0;
1064 for (integer ipar = my numberOfParameters; ipar > 0; ipar --) {
1065 if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
1066 const double value = my parameters [ipar] .value;
1067 double sigmas = numberOfSigmas * DataModeler_getParameterStandardDeviation (me, ipar);
1068 if ((value - sigmas) * (value + sigmas) < 0) {
1069 DataModeler_setParameterValueFixed (me, ipar, 0.0);
1070 numberOfChangedParameters ++;
1071 }
1072 }
1073 }
1074 }
1075
DataModeler_getNumberOfFreeParameters(DataModeler me)1076 integer DataModeler_getNumberOfFreeParameters (DataModeler me) {
1077 integer numberOfFreeParameters = 0;
1078 for (integer ipar = 1; ipar <= my numberOfParameters; ipar ++) {
1079 if (my parameters [ipar] .status == kDataModelerParameterStatus::FREE)
1080 numberOfFreeParameters ++;
1081 }
1082 return numberOfFreeParameters;
1083 }
1084
DataModeler_getNumberOfFixedParameters(DataModeler me)1085 integer DataModeler_getNumberOfFixedParameters (DataModeler me) {
1086 return my numberOfParameters - DataModeler_getNumberOfFreeParameters (me);
1087 }
1088
DataModeler_getNumberOfValidDataPoints(DataModeler me)1089 integer DataModeler_getNumberOfValidDataPoints (DataModeler me) {
1090 integer numberOfValidDataPoints = 0;
1091 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1092 if (my data [ipoint] .status != kDataModelerData::INVALID)
1093 numberOfValidDataPoints ++;
1094 return numberOfValidDataPoints;
1095 }
1096
DataModeler_getNumberOfInvalidDataPoints(DataModeler me)1097 integer DataModeler_getNumberOfInvalidDataPoints (DataModeler me) {
1098 return my numberOfDataPoints - DataModeler_getNumberOfValidDataPoints (me);
1099 }
1100
DataModeler_setTolerance(DataModeler me,double tolerance)1101 void DataModeler_setTolerance (DataModeler me, double tolerance) {
1102 my tolerance = ( tolerance > 0.0 ? tolerance : my numberOfDataPoints * NUMfpp -> eps );
1103 }
1104
DataModeler_getDegreesOfFreedom(DataModeler me)1105 double DataModeler_getDegreesOfFreedom (DataModeler me) {
1106 integer numberOfDataPoints = 0;
1107 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1108 if (my data [ipoint] .status != kDataModelerData::INVALID)
1109 numberOfDataPoints ++;
1110 const double ndf = numberOfDataPoints - DataModeler_getNumberOfFreeParameters (me);
1111 return ndf;
1112 }
1113
1114 /*
1115 Interpret the values in sigmaY as 1 / sigmay or 1 / sqrt (sigmaY) or y/sigmaY.
1116 If equal weighing than get the sigma form the residual sum of squares between model and data.
1117 */
DataModeler_getDataPointsWeights(DataModeler me,kDataModelerWeights weighData)1118 autoVEC DataModeler_getDataPointsWeights (DataModeler me, kDataModelerWeights weighData) {
1119 autoVEC weights = zero_VEC (my numberOfDataPoints);
1120 if (weighData == kDataModelerWeights::EQUAL_WEIGHTS) {
1121 /*
1122 We weigh with the inverse of the standard deviation of the data to give
1123 subsequent Chi squared tests a meaningful interpretation.
1124 */
1125 const double stdev = DataModeler_getDataStandardDeviation (me);
1126 Melder_require (isdefined (stdev),
1127 U"Not enough data points to calculate standard deviation.");
1128 weights.all() <<= 1.0 / stdev;
1129 } else {
1130 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1131 if (my data [ipoint] .status == kDataModelerData::INVALID)
1132 continue; // invalid points get weight 0.
1133 double sigma = my data [ipoint] .sigmaY;
1134 double weight = 1.0;
1135 if (isdefined (sigma) && sigma > 0.0) {
1136 if (weighData == kDataModelerWeights::ONE_OVER_SIGMA)
1137 weight = 1.0 / sigma;
1138 else if (weighData == kDataModelerWeights::RELATIVE_)
1139 weight = my data [ipoint] .y / sigma;
1140 else if (weighData == kDataModelerWeights::ONE_OVER_SQRTSIGMA) {
1141 weight = 1.0 / sqrt (sigma);
1142 }
1143 }
1144 weights [ipoint] = weight;
1145 }
1146 }
1147 return weights;
1148 }
1149
DataModeler_getZScores(DataModeler me)1150 autoVEC DataModeler_getZScores (DataModeler me) {
1151 try {
1152 autoVEC zscores = raw_VEC (my numberOfDataPoints);
1153 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1154 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1155 double z = undefined;
1156 if (my data [ipoint] .status != kDataModelerData::INVALID) {
1157 const double estimate = my f_evaluate (me, my data [ipoint] .x, my parameters.get());
1158 z = (my data [ipoint] .y - estimate) * weights [ipoint]; // 1/sigma
1159 }
1160 zscores [ipoint] = z;
1161 }
1162 return zscores;
1163 } catch (MelderError) {
1164 Melder_throw (U"No z-scores calculated.");
1165 }
1166 }
1167
DataModeler_getChisqScoresFromZScores(DataModeler me,constVEC zscores,bool substituteAverage)1168 autoVEC DataModeler_getChisqScoresFromZScores (DataModeler me, constVEC zscores, bool substituteAverage) {
1169 Melder_assert (zscores.size == my numberOfDataPoints);
1170 autoVEC chisq = raw_VEC (zscores.size);
1171 integer numberOfDefined = 0;
1172 double sumchisq = 0.0;
1173 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1174 chisq [ipoint] = undefined;
1175 if (isdefined (zscores [ipoint])) {
1176 chisq [ipoint] = zscores [ipoint] * zscores [ipoint];
1177 sumchisq += chisq [ipoint];
1178 numberOfDefined ++;
1179 }
1180 }
1181 if (substituteAverage && numberOfDefined != my numberOfDataPoints && numberOfDefined > 0) {
1182 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1183 if (isundef (chisq [ipoint]))
1184 chisq [ipoint] = sumchisq / numberOfDefined;
1185 }
1186 }
1187 return chisq;
1188 }
1189
DataModeler_getChiSquaredQ(DataModeler me,double * out_prob,double * out_df)1190 double DataModeler_getChiSquaredQ (DataModeler me, double *out_prob, double *out_df) {
1191 double chisq;
1192 integer numberOfValidZScores;
1193 autoVEC zscores = DataModeler_getZScores (me);
1194 chisqFromZScores (zscores.get(), & chisq, & numberOfValidZScores);
1195 const double ndf = DataModeler_getDegreesOfFreedom (me);
1196
1197 if (out_prob)
1198 *out_prob = NUMchiSquareQ (chisq, ndf);
1199 if (out_df)
1200 *out_df = ndf;
1201 return chisq;
1202 }
1203
DataModeler_getWeightedMean(DataModeler me)1204 double DataModeler_getWeightedMean (DataModeler me) {
1205 double ysum = 0.0, wsum = 0.0;
1206 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1207 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1208 if (my data [ipoint] .status != kDataModelerData::INVALID) {
1209 ysum += my data [ipoint] .y * weights [ipoint];
1210 wsum += weights [ipoint];
1211 }
1212 return ysum / wsum;
1213 }
1214
DataModeler_getCoefficientOfDetermination(DataModeler me,double * out_ssreg,double * out_sstot)1215 double DataModeler_getCoefficientOfDetermination (DataModeler me, double *out_ssreg, double *out_sstot) {
1216
1217 /*
1218 We cannot use the standard expressions for ss_tot, and ss_reg because our data are weighted by 1 / sigma [i].
1219 We need the weighted mean and we need to weigh all sums-of-squares accordingly;
1220 if all sigma [i] terms are equal, the formulas reduce to the standard ones.
1221 Ref: A. Buse (1973): Goodness of Fit in Generalized Least Squares Estimation, The American Statician, vol 27, 106-108
1222 */
1223
1224 const double ymean = DataModeler_getWeightedMean (me);
1225 autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1226 longdouble sstot = 0.0, ssreg = 0.0;
1227 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1228 if (my data [ipoint] .status != kDataModelerData::INVALID) {
1229 double diff = (my data [ipoint] .y - ymean) * weights [ipoint];
1230 sstot += diff * diff; // total sum of squares
1231 const double estimate = my f_evaluate (me, my data [ipoint] .x, my parameters.get());
1232 diff = (estimate - my data [ipoint] .y) * weights [ipoint];
1233 ssreg += diff * diff; // regression sum of squares
1234 }
1235 }
1236 const double rSquared = ( sstot > 0.0 ? 1.0 - double (ssreg / sstot) : 1.0 );
1237
1238 if (out_ssreg)
1239 *out_ssreg = double (sstot - ssreg);
1240 if (out_sstot)
1241 *out_sstot = double (sstot);
1242 return rSquared;
1243 }
1244
DataModeler_drawBasisFunction_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer iterm,bool scale,integer numberOfPoints)1245 void DataModeler_drawBasisFunction_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1246 integer iterm, bool scale, integer numberOfPoints)
1247 {
1248 Function_unidirectionalAutowindow (me, & xmin, & xmax);
1249 autoVEC x = raw_VEC (numberOfPoints);
1250 autoVEC y = raw_VEC (numberOfPoints);
1251 autoVEC term = raw_VEC (my numberOfParameters);
1252 for (integer i = 1; i <= numberOfPoints; i ++) {
1253 x [i] = xmin + (i - 0.5) * (xmax - xmin) / numberOfPoints;
1254 my f_evaluateBasisFunctions (me, x [i], term.get());
1255 y [i] = term [iterm];
1256 y [i] = ( scale ? y [i] * my parameters [iterm] .value : y [i] );
1257 }
1258 if (ymax <= ymin) {
1259 MelderExtremaWithInit extrema;
1260 for (integer i = 1; i <= numberOfPoints; i ++)
1261 extrema.update (y [i]);
1262 ymax = extrema.max;
1263 ymin = extrema.min;
1264
1265 }
1266 Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1267 for (integer i = 2; i <= numberOfPoints; i ++)
1268 Graphics_line (g, x [i-1], y [i-1], x [i], y [i]);
1269 }
1270
DataModeler_drawingSpecifiers_x(DataModeler me,double * xmin,double * xmax,integer * ixmin,integer * ixmax)1271 integer DataModeler_drawingSpecifiers_x (DataModeler me, double *xmin, double *xmax, integer *ixmin, integer *ixmax) {
1272 if (*xmax <= *xmin) {
1273 *xmin = my xmin;
1274 *xmax = my xmax;
1275 }
1276 *ixmin = 2;
1277 while (my data [*ixmin] .x < *xmin && *ixmin < my numberOfDataPoints)
1278 (*ixmin) ++;
1279 (*ixmin) --;
1280 *ixmax = my numberOfDataPoints - 1;
1281 while (my data [*ixmax] .x > *xmax && *ixmax > 1)
1282 (*ixmax) --;
1283 (*ixmax) ++;
1284 return *ixmax - *ixmin + 1;
1285 }
1286
DataModeler_drawOutliersMarked_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,double numberOfSigmas,conststring32 mark,double marksFontSize)1287 void DataModeler_drawOutliersMarked_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, double numberOfSigmas, conststring32 mark, double marksFontSize)
1288 {
1289 integer ixmin, ixmax;
1290 if (DataModeler_drawingSpecifiers_x (me, & xmin, & xmax, & ixmin, & ixmax) < 1)
1291 return;
1292 autoVEC zscores = DataModeler_getZScores (me);
1293
1294 Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1295 Graphics_setFontSize (g, marksFontSize);
1296 Graphics_setTextAlignment (g, kGraphics_horizontalAlignment::CENTRE, Graphics_HALF);
1297 const double currentFontSize = Graphics_inqFontSize (g);
1298 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1299 if (my data [ipoint]. status != kDataModelerData::INVALID) {
1300 const double x = my data [ipoint]. x, y = my data [ipoint]. y;
1301 if (x >= xmin && x <= xmax && y >= ymin && y <= ymax)
1302 if (fabs (zscores [ipoint]) > numberOfSigmas)
1303 Graphics_text (g, x, y, mark);
1304 }
1305 }
1306 Graphics_setFontSize (g, currentFontSize);
1307 }
1308
DataModeler_draw_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,bool connectPoints,double barWidth_wc,bool drawDots)1309 void DataModeler_draw_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters, bool errorbars, bool connectPoints, double barWidth_wc, bool drawDots)
1310 {
1311 Function_unidirectionalAutowindow (me, & xmin, & xmax);
1312
1313 integer ixmin = 1;
1314 while (ixmin <= my numberOfDataPoints && my data [ixmin]. x < xmin)
1315 ixmin ++;
1316 integer ixmax = my numberOfDataPoints;
1317 while (ixmax > 0 && my data [ixmax]. x > xmax)
1318 ixmax --;
1319 if (ixmin > ixmax)
1320 return; // nothing to draw
1321 getAutoNaturalNumberWithinRange (& numberOfParameters, my numberOfParameters);
1322
1323 Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1324 double x1, y1, x2, y2;
1325 bool x1defined = false, x2defined = false;
1326 for (integer ipoint = ixmin; ipoint <= ixmax; ipoint ++) {
1327 if (my data [ipoint] .status != kDataModelerData::INVALID) {
1328 const double x = my data [ipoint]. x, y = my data [ipoint]. y;
1329 if (! x1defined) {
1330 x1 = x;
1331 y1 = ( estimated ? my f_evaluate (me, x, my parameters.get()) : y );
1332 x1defined = true;
1333 } else {
1334 x2 = x;
1335 y2 = ( estimated ? my f_evaluate (me, x, my parameters.get()) : y );
1336 x2defined = true;
1337 }
1338 if (x1defined && drawDots) {
1339 if (y >= ymin && y <= ymax)
1340 Graphics_speckle (g, x, y);
1341 }
1342 if (x2defined) { // if (x1defined && x2defined)
1343 if (connectPoints) {
1344 double xo1, yo1, xo2, yo2;
1345 if (NUMclipLineWithinRectangle (x1, y1, x2, y2,
1346 xmin, ymin, xmax, ymax, & xo1, & yo1, & xo2, & yo2)) {
1347 Graphics_line (g, xo1, yo1, xo2, yo2);
1348 }
1349 }
1350 x1 = x;
1351 y1 = y2;
1352 }
1353 const double sigma = my data [ipoint] .sigmaY;
1354 if (errorbars && isdefined (sigma) && sigma > 0 && x1defined) {
1355 const double ym = y1;
1356 double yt = ym + 0.5 * sigma, yb = ym - 0.5 * sigma;
1357 if (estimated) {
1358 yt = ( (y - y1) > 0.0 ? y : y1 );
1359 yb = ( (y - y1) > 0.0 ? y1 : y );
1360 }
1361 bool topOutside = yt > ymax, bottomOutside = yb < ymin;
1362 yt = ( topOutside ? ymax : yt );
1363 yb = ( bottomOutside ? ymin : yb );
1364 Graphics_line (g, x1, yb, x1, yt);
1365 if (barWidth_wc > 0.0 && ! estimated) {
1366 double xl = x1 - 0.5 * barWidth_wc;
1367 double xr = xl + barWidth_wc;
1368 if (! topOutside)
1369 Graphics_line (g, xl, yt, xr, yt);
1370 if (! bottomOutside)
1371 Graphics_line (g, xl, yb, xr, yb);
1372 }
1373 }
1374 }
1375 }
1376 }
1377
1378
DataModeler_drawModel_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer numberOfPoints)1379 void DataModeler_drawModel_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, integer numberOfPoints) {
1380 Function_bidirectionalAutowindow (me, & xmin, & xmax);
1381 autoVEC x = raw_VEC (numberOfPoints), y = raw_VEC (numberOfPoints);
1382 const double dx = (xmax - xmin) / numberOfPoints;
1383 for (integer ipoint = 1; ipoint <= numberOfPoints; ipoint ++) {
1384 x [ipoint] = xmin + (ipoint - 1) * dx;
1385 y [ipoint] = my f_evaluate (me, x [ipoint], my parameters.get());
1386 }
1387 if (ymin == 0.0 && ymax == 0.0) {
1388 ymin = NUMmin (y.get());
1389 ymax = NUMmax (y.get());
1390 }
1391 Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1392 for (integer ipoint = 2; ipoint <= numberOfPoints; ipoint ++) {
1393 double segment_x1, segment_y1, segment_x2, segment_y2;
1394 if (NUMclipLineWithinRectangle (x [ipoint - 1], y [ipoint - 1], x [ipoint], y [ipoint],
1395 xmin, ymin, xmax, ymax, & segment_x1, & segment_y1, & segment_x2, & segment_y2))
1396 Graphics_line (g, segment_x1, segment_y1, segment_x2, segment_y2);
1397 }
1398 }
1399
DataModeler_drawModel(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer numberOfPoints,bool garnish)1400 void DataModeler_drawModel (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, integer numberOfPoints, bool garnish) {
1401 Function_bidirectionalAutowindow (me, & xmin, & xmax);
1402 Graphics_setInner (g);
1403 DataModeler_drawModel_inside (me, g, xmin, xmax, ymin, ymax, numberOfPoints);
1404 Graphics_unsetInner (g);
1405 if (garnish) {
1406 Graphics_drawInnerBox (g);
1407 Graphics_marksBottom (g, 2, true, true, false);
1408 Graphics_marksLeft (g, 2, true, true, false);
1409 }
1410 }
1411
DataModeler_drawTrack_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters)1412 void DataModeler_drawTrack_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters)
1413 {
1414 const bool errorbars = false, connectPoints = true;
1415 const double barWidth_mm = 0;
1416 DataModeler_draw_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters, errorbars, connectPoints, barWidth_mm, 0);
1417 }
1418
DataModeler_drawTrack(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool garnish)1419 void DataModeler_drawTrack (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1420 bool estimated, integer numberOfParameters, bool garnish) {
1421 if (ymax <= ymin)
1422 DataModeler_getExtremaY (me, & ymin, & ymax);
1423 Graphics_setInner (g);
1424 DataModeler_drawTrack_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters);
1425 Graphics_unsetInner (g);
1426 if (garnish) {
1427 Graphics_drawInnerBox (g);
1428 Graphics_marksBottom (g, 2, true, true, false);
1429 Graphics_marksLeft (g, 2, true, true, false);
1430 }
1431 }
1432
DataModeler_speckle_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,double barWidth_wc)1433 void DataModeler_speckle_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters, bool errorbars, double barWidth_wc) {
1434 bool connectPoints = false;
1435 DataModeler_draw_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters, errorbars, connectPoints, barWidth_wc, 1);
1436 }
1437
DataModeler_speckle(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,double barWidth_mm,bool garnish)1438 void DataModeler_speckle (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1439 bool estimated, integer numberOfParameters, bool errorbars, double barWidth_mm, bool garnish)
1440 {
1441 if (ymax <= ymin)
1442 DataModeler_getExtremaY (me, & ymin, & ymax);
1443 Graphics_setInner (g);
1444 DataModeler_speckle_inside (me, g, xmin, xmax, ymin, ymax,
1445 estimated, numberOfParameters, errorbars, barWidth_mm);
1446 Graphics_unsetInner (g);
1447 if (garnish) {
1448 Graphics_drawInnerBox (g);
1449 Graphics_marksBottom (g, 2, true, true, false);
1450 Graphics_marksLeft (g, 2, true, true, false);
1451 }
1452 }
1453
DataModeler_to_Table_zscores(DataModeler me)1454 autoTable DataModeler_to_Table_zscores (DataModeler me) {
1455 try {
1456 const conststring32 columnNames [] = { U"x", U"z" };
1457 autoTable ztable = Table_createWithColumnNames (my numberOfDataPoints, ARRAY_TO_STRVEC (columnNames));
1458 autoVEC zscores = DataModeler_getZScores (me);
1459 for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1460 Table_setNumericValue (ztable.get(), ipoint, 1, my data [ipoint] .x);
1461 Table_setNumericValue (ztable.get(), ipoint, 2, zscores [ipoint]);
1462 }
1463 return ztable;
1464 } catch (MelderError) {
1465 Melder_throw (U"Table not created.");
1466 }
1467 }
1468
DataModeler_normalProbabilityPlot(DataModeler me,Graphics g,integer numberOfQuantiles,double numberOfSigmas,double labelSize,conststring32 label,bool garnish)1469 void DataModeler_normalProbabilityPlot (DataModeler me, Graphics g, integer numberOfQuantiles, double numberOfSigmas, double labelSize, conststring32 label, bool garnish) {
1470 try {
1471 autoTable thee = DataModeler_to_Table_zscores (me);
1472 Table_normalProbabilityPlot (thee.get(), g, 2, numberOfQuantiles, numberOfSigmas, labelSize, label, garnish);
1473 } catch (MelderError) {
1474 Melder_clearError ();
1475 }
1476 }
1477
DataModeler_setBasisFunctions(DataModeler me,kDataModelerFunction type)1478 void DataModeler_setBasisFunctions (DataModeler me, kDataModelerFunction type) {
1479 if (type == kDataModelerFunction::LINEAR) {
1480 my f_evaluate = linear_evaluate;
1481 my f_evaluateBasisFunctions = linear_evaluateBasisFunctions;
1482 my fit = series_fit;
1483 } else if (type == kDataModelerFunction::LEGENDRE) {
1484 my f_evaluate = legendre_evaluate;
1485 my f_evaluateBasisFunctions = legendre_evaluateBasisFunctions;
1486 my fit = series_fit;
1487 } else if (type == kDataModelerFunction::POLYNOME) {
1488 my f_evaluate = polynomial_evaluate;
1489 my f_evaluateBasisFunctions = polynome_evaluateBasisFunctions;
1490 my fit = series_fit;
1491 } else if (type == kDataModelerFunction::SIGMOID) {
1492 my f_evaluate = sigmoid_evaluate;
1493 my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1494 my fit = sigmoid_fit;
1495 } else if (type == kDataModelerFunction::SIGMOID_PLUS_CONSTANT) {
1496 my f_evaluate = sigmoid_plus_constant_evaluate;
1497 my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1498 my fit = sigmoid_plus_constant_fit;
1499 } else if (type == kDataModelerFunction::EXPONENTIAL) {
1500 my f_evaluate = exponential_evaluate;
1501 my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1502 my fit = exponential_fit;
1503 } else if (type == kDataModelerFunction::EXPONENTIAL_PLUS_CONSTANT) {
1504 my f_evaluate = exponential_plus_constant_evaluate;
1505 my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1506 my fit = exponential_plus_constant_fit;
1507 }
1508 my type = type;
1509 }
1510
DataModeler_init(DataModeler me,double xmin,double xmax,integer numberOfDataPoints,integer numberOfParameters,kDataModelerFunction type)1511 void DataModeler_init (DataModeler me, double xmin, double xmax, integer numberOfDataPoints, integer numberOfParameters, kDataModelerFunction type) {
1512 my xmin = xmin;
1513 my xmax = xmax;
1514 DataModeler_setBasisFunctions (me, type);
1515 my numberOfDataPoints = numberOfDataPoints;
1516 my data = newvectorzero<structDataModelerData> (numberOfDataPoints);
1517 my numberOfParameters = ( (type == kDataModelerFunction::EXPONENTIAL) ? 2 :
1518 (type == kDataModelerFunction::EXPONENTIAL_PLUS_CONSTANT || type == kDataModelerFunction::SIGMOID) ? 3 :
1519 (type == kDataModelerFunction::SIGMOID_PLUS_CONSTANT ? 4 : numberOfParameters) );
1520 Melder_require (my numberOfParameters > 0,
1521 U"The number of parameters should be greater than zero.");
1522 my parameters = newvectorzero<structDataModelerParameter> (my numberOfParameters);
1523 for (integer ipar = 1; ipar <= my numberOfParameters; ipar ++)
1524 my parameters [ipar]. status = kDataModelerParameterStatus::FREE;
1525 my parameterNames = Strings_createFixedLength (my numberOfParameters);
1526 my parameterCovariances = Covariance_create (my numberOfParameters);
1527 my type = type;
1528 }
1529
DataModeler_create(double xmin,double xmax,integer numberOfDataPoints,integer numberOfParameters,kDataModelerFunction type)1530 autoDataModeler DataModeler_create (double xmin, double xmax, integer numberOfDataPoints, integer numberOfParameters, kDataModelerFunction type) {
1531 try {
1532 autoDataModeler me = Thing_new (DataModeler);
1533 DataModeler_init (me.get(), xmin, xmax, numberOfDataPoints, numberOfParameters, type);
1534 return me;
1535 } catch (MelderError) {
1536 Melder_throw (U"DataModeler not created.");
1537 }
1538 }
1539
DataModeler_createSimple(double xmin,double xmax,integer numberOfDataPoints,constVECVU const & parameterValues,double gaussianNoiseStd,kDataModelerFunction type)1540 autoDataModeler DataModeler_createSimple (double xmin, double xmax,
1541 integer numberOfDataPoints, constVECVU const& parameterValues, double gaussianNoiseStd, kDataModelerFunction type)
1542 {
1543 try {
1544 Melder_require (xmin < xmax,
1545 U"The domain should be defined properly.");
1546
1547 autoDataModeler me = DataModeler_create (xmin, xmax, numberOfDataPoints, parameterValues.size, type);
1548 for (integer ipar = 1; ipar <= parameterValues.size; ipar ++)
1549 my parameters [ipar]. value = parameterValues [ipar]; // parameters status ok
1550 // generate the data that beinteger to the parameter values
1551 for (integer ipoint = 1; ipoint <= numberOfDataPoints; ipoint ++) {
1552 my data [ipoint]. x = xmin + (ipoint - 0.5) * (xmax - xmin) / numberOfDataPoints;
1553 const double modelY = my f_evaluate (me.get(), my data [ipoint]. x, my parameters.get());
1554 my data [ipoint]. y = modelY + NUMrandomGauss (0.0, gaussianNoiseStd);
1555 my data [ipoint]. sigmaY = undefined;
1556 }
1557 my weighData = kDataModelerWeights::EQUAL_WEIGHTS;
1558 return me;
1559 } catch (MelderError) {
1560 Melder_throw (U"No simple DataModeler created.");
1561 }
1562 }
1563
DataModeler_createFromDataModeler(DataModeler thee,integer numberOfParameters,kDataModelerFunction type)1564 autoDataModeler DataModeler_createFromDataModeler (DataModeler thee, integer numberOfParameters, kDataModelerFunction type) {
1565 try {
1566 autoDataModeler me = DataModeler_create (thy xmin, thy xmax, thy numberOfDataPoints, numberOfParameters, type);
1567 for (integer k = 1; k <= my numberOfDataPoints; k ++) {
1568 my data [k]. status = my data [k]. status;
1569 my data [k]. x = thy data [k]. x;
1570 my data [k]. y = thy data [k]. y;
1571 }
1572 return me;
1573 } catch (MelderError) {
1574 Melder_throw (U"No DataModeler could be created.");
1575 }
1576 }
1577
DataModeler_fit(DataModeler me)1578 void DataModeler_fit (DataModeler me) {
1579 try {
1580 my fit (me);
1581 } catch (MelderError) {
1582 Melder_throw (U"DataModeler no fit.");
1583 }
1584 }
1585
DataModeler_setDataWeighing(DataModeler me,kDataModelerWeights weighData)1586 void DataModeler_setDataWeighing (DataModeler me, kDataModelerWeights weighData) {
1587 if (my weighData != weighData) {
1588 my weighData = weighData;
1589 DataModeler_fit (me); // because sigma has changed!
1590 }
1591 }
1592
DataModeler_to_Covariance_parameters(DataModeler me)1593 autoCovariance DataModeler_to_Covariance_parameters (DataModeler me) {
1594 try {
1595 autoCovariance cov = Data_copy (my parameterCovariances.get());
1596 return cov;
1597 } catch (MelderError) {
1598 Melder_throw (U"Covariance not created.");
1599 }
1600 }
1601
Table_to_DataModeler(Table me,double xmin,double xmax,integer xcolumn,integer ycolumn,integer sigmacolumn,integer numberOfParameters,kDataModelerFunction type)1602 autoDataModeler Table_to_DataModeler (Table me, double xmin, double xmax, integer xcolumn, integer ycolumn, integer sigmacolumn, integer numberOfParameters, kDataModelerFunction type) {
1603 try {
1604 Table_checkSpecifiedColumnNumberWithinRange (me, xcolumn);
1605 Table_checkSpecifiedColumnNumberWithinRange (me, ycolumn);
1606 const bool hasSigmaColumn = ( sigmacolumn > 0 );
1607 if (hasSigmaColumn)
1608 Table_checkSpecifiedColumnNumberWithinRange (me, sigmacolumn);
1609 const integer numberOfRows = my rows.size;
1610 integer numberOfData = 0;
1611 autoVEC x = raw_VEC (numberOfRows);
1612 autoVEC y = raw_VEC (numberOfRows);
1613 autoVEC sy = raw_VEC (numberOfRows);
1614 for (integer i = 1; i <= numberOfRows; i ++) {
1615 const double val = Table_getNumericValue_Assert (me, i, xcolumn);
1616 if (isdefined (val)) {
1617 x [++ numberOfData] = val;
1618 if (numberOfData > 1) {
1619 if (val < x [numberOfData - 1]) {
1620 Melder_throw (U"Data with x-values should be sorted.");
1621 } else if (val == x [numberOfData - 1]) {
1622 Melder_throw (U"All x-values should be different.");
1623 }
1624 }
1625 y [numberOfData] = Table_getNumericValue_Assert (me, i, ycolumn);
1626 sy [numberOfData] = ( hasSigmaColumn ? Table_getNumericValue_Assert (me, i, sigmacolumn) : undefined );
1627 }
1628 }
1629 if (xmax <= xmin)
1630 NUMextrema (x.part (1, numberOfData), & xmin, & xmax);
1631 Melder_require (xmin < xmax,
1632 U"The range of the x-values is too small.");
1633
1634 integer numberOfDataPoints = 0, validData = 0;
1635 for (integer i = 1; i <= numberOfData; i ++) {
1636 if (x [i] >= xmin && x [i] <= xmax)
1637 numberOfDataPoints ++;
1638 }
1639 /*
1640 Some models have a fixed number of parameters
1641 */
1642 autoDataModeler thee = DataModeler_create (xmin, xmax, numberOfDataPoints, numberOfParameters, type);
1643 numberOfDataPoints = 0;
1644 for (integer i = 1; i <= numberOfData; i ++) {
1645 if (x [i] >= xmin && x [i] <= xmax) {
1646 thy data [++ numberOfDataPoints]. x = x [i];
1647 thy data [numberOfDataPoints]. status = kDataModelerData::INVALID;
1648 if (isdefined (y [i])) {
1649 thy data [numberOfDataPoints]. y = y [i];
1650 thy data [numberOfDataPoints]. sigmaY = sy [i];
1651 thy data [numberOfDataPoints]. status = kDataModelerData::VALID;
1652 validData ++;
1653 }
1654 }
1655 }
1656 thy numberOfDataPoints = numberOfDataPoints;
1657 Melder_require (validData >= thy numberOfParameters,
1658 U"The number of parameters should not exceed the number of data points.");
1659
1660 DataModeler_setDataWeighing (thee.get(), ( hasSigmaColumn ? kDataModelerWeights::ONE_OVER_SIGMA : kDataModelerWeights::EQUAL_WEIGHTS));
1661 thy tolerance = 1e-8;
1662 DataModeler_fit (thee.get());
1663 return thee;
1664 } catch (MelderError) {
1665 Melder_throw (U"Datamodeler not created from Table.");
1666 }
1667 }
1668
DataModeler_getResidualSumOfSquares(DataModeler me,integer * out_numberOfValidDataPoints)1669 double DataModeler_getResidualSumOfSquares (DataModeler me, integer *out_numberOfValidDataPoints) {
1670 integer numberOfValidDataPoints = 0;
1671 longdouble residualSS = 0.0;
1672 for (integer i = 1; i <= my numberOfDataPoints; i ++) {
1673 if (my data [i] .status != kDataModelerData::INVALID) {
1674 ++ numberOfValidDataPoints;
1675 residualSS += sqr (my data [i]. y - my f_evaluate (me, my data [i]. x, my parameters.get()));
1676 }
1677 }
1678 if (out_numberOfValidDataPoints)
1679 *out_numberOfValidDataPoints = numberOfValidDataPoints;
1680 return ( numberOfValidDataPoints > 0 ? (double) residualSS : undefined );
1681 }
1682
DataModeler_reportChiSquared(DataModeler me)1683 void DataModeler_reportChiSquared (DataModeler me) {
1684 MelderInfo_writeLine (U"Chi squared test:");
1685 MelderInfo_writeLine (( my weighData == kDataModelerWeights::EQUAL_WEIGHTS ? U"Standard deviation is estimated from the data." :
1686 ( my weighData == kDataModelerWeights::ONE_OVER_SIGMA ? U"Sigmas are used as estimate for local standard deviations." :
1687 ( my weighData == kDataModelerWeights::RELATIVE_ ? U"1/Q's are used as estimate for local standard deviations." :
1688 U"Sqrt sigmas are used as estimate for local standard deviations." ) ) ));
1689 double ndf, probability;
1690 const double chisq = DataModeler_getChiSquaredQ (me, & probability, & ndf);
1691 MelderInfo_writeLine (U"Chi squared = ", chisq);
1692 MelderInfo_writeLine (U"Probability = ", probability);
1693 MelderInfo_writeLine (U"Number of degrees of freedom = ", ndf);
1694 }
1695
DataModeler_getDataStandardDeviation(DataModeler me)1696 double DataModeler_getDataStandardDeviation (DataModeler me) {
1697 try {
1698 integer numberOfDataPoints = 0;
1699 autoVEC y = raw_VEC (my numberOfDataPoints);
1700 for (integer i = 1; i <= my numberOfDataPoints; i ++)
1701 if (my data [i] .status != kDataModelerData::INVALID)
1702 y [++ numberOfDataPoints] = my data [i]. y;
1703 y. resize (numberOfDataPoints); // fake shrink
1704 return NUMstdev (y.get());
1705 } catch (MelderError) {
1706 Melder_throw (U"Cannot estimate sigma.");
1707 }
1708 }
1709
1710 /* End of file DataModeler.cpp */
1711