1 /* DataModeler.cpp
2  *
3  * Copyright (C) 2014-2021 David Weenink, 2017 Paul Boersma
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * ainteger with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "DataModeler.h"
20 #include "NUM2.h"
21 #include "NUMmachar.h"
22 #include "SVD.h"
23 #include "Strings_extensions.h"
24 #include "Sound_and_LPC_robust.h"
25 #include "Table_extensions.h"
26 
27 #include "oo_DESTROY.h"
28 #include "DataModeler_def.h"
29 #include "oo_COPY.h"
30 #include "DataModeler_def.h"
31 #include "oo_EQUAL.h"
32 #include "DataModeler_def.h"
33 #include "oo_CAN_WRITE_AS_ENCODING.h"
34 #include "DataModeler_def.h"
35 #include "oo_WRITE_TEXT.h"
36 #include "DataModeler_def.h"
37 #include "oo_WRITE_BINARY.h"
38 #include "DataModeler_def.h"
39 #include "oo_READ_TEXT.h"
40 #include "DataModeler_def.h"
41 #include "oo_READ_BINARY.h"
42 #include "DataModeler_def.h"
43 #include "oo_DESCRIPTION.h"
44 #include "DataModeler_def.h"
45 
46 #include "enums_getText.h"
47 #include "DataModeler_enums.h"
48 #include "enums_getValue.h"
49 #include "DataModeler_enums.h"
50 
51 Thing_implement (DataModeler, Function, 1);
52 
v_info()53 void structDataModeler :: v_info () {
54 	MelderInfo_writeLine (U"   Time domain:");
55 	MelderInfo_writeLine (U"      Start time: ", xmin, U" seconds");
56 	MelderInfo_writeLine (U"      End time: ", xmax, U" seconds");
57 	MelderInfo_writeLine (U"      Total duration: ", xmax - xmin, U" seconds");
58 	double ndf, rSquared = DataModeler_getCoefficientOfDetermination (this, nullptr, nullptr);
59 	double probability, chisq = DataModeler_getChiSquaredQ (this, & probability, & ndf);
60 	MelderInfo_writeLine (U"   Fit:");
61 	MelderInfo_writeLine (U"      Number of data points: ", numberOfDataPoints);
62 	MelderInfo_writeLine (U"      Number of parameters: ", numberOfParameters);
63 	MelderInfo_writeLine (U"      Each data point has ",  (weighData == kDataModelerWeights::EQUAL_WEIGHTS ? U" the same weight (estimated)." :
64 		( weighData == kDataModelerWeights::ONE_OVER_SIGMA ? U"a different weight (sigmaY)." :
65 		( weighData == kDataModelerWeights::RELATIVE_ ? U"a different relative weight (Y_value/sigmaY)." :
66 		U"a different weight (SQRT(sigmaY))." ) ) ));
67 	MelderInfo_writeLine (U"      Chi squared: ", chisq);
68 	MelderInfo_writeLine (U"      Number of degrees of freedom: ", ndf);
69 	MelderInfo_writeLine (U"      Probability: ", probability);
70 	MelderInfo_writeLine (U"      R-squared: ", rSquared);
71 	for (integer ipar = 1; ipar <= numberOfParameters; ipar ++) {
72 		double sigma = ( parameters [ipar] .status == kDataModelerParameterStatus::FIXED_ ? 0 :
73 			(isdefined (parameterCovariances -> data [ipar] [ipar]) ? sqrt (parameterCovariances -> data [ipar] [ipar]) : undefined) );
74 		MelderInfo_writeLine (U"      p [", ipar, U"] = ", parameters [ipar] .value, U"; sigma = ", sigma);
75 	}
76 }
77 
constant_evaluate(DataModeler,double,vector<structDataModelerParameter> p)78 static double constant_evaluate (DataModeler /* me */, double /* xin */, vector<structDataModelerParameter> p) {
79 	return p [1].value;
80 }
81 
constant_evaluateBasisFunctions(DataModeler me,double xin,VEC terms)82 static void constant_evaluateBasisFunctions (DataModeler me, double xin, VEC terms) {
83 	terms  <<=  1.0;
84 }
85 
linear_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)86 static double linear_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
87 	return undefined;
88 }
89 
linear_evaluateBasisFunctions(DataModeler me,double xin,VEC terms)90 static void linear_evaluateBasisFunctions (DataModeler me, double xin, VEC terms) {
91 	terms  <<=  undefined;
92 }
93 
polynomial_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)94 static double polynomial_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
95 	Melder_assert (p.size == my numberOfParameters);
96 	/*
97 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
98 	*/
99 	const double x = (2.0 * xin - my xmin - my xmax) / 2.0;
100 	double xpi = 1.0, result = p [1] .value;
101 	for (integer ipar = 2; ipar <= my numberOfParameters; ipar ++) {
102 		xpi *= x;
103 		result += p [ipar] .value * xpi;
104 	}
105 	return result;
106 }
107 
polynome_evaluateBasisFunctions(DataModeler me,double xin,VEC term)108 static void polynome_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
109 	Melder_assert (term.size == my numberOfParameters);
110 	/*
111 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
112 	*/
113 	const double x = (2.0 * xin - my xmin - my xmax) / 2.0;
114 	term [1] = 1.0;
115 	for (integer ipar = 2; ipar <= my numberOfParameters; ipar ++)
116 		term [ipar] = term [ipar - 1] * x;
117 }
118 
legendre_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)119 static double legendre_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
120 	Melder_assert (p.size == my numberOfParameters);
121 	/*
122 		From domain [xmin, xmax] to domain [-1, 1]
123 	*/
124 	const double x = (2.0 * xin - my xmin - my xmax) / (my xmax - my xmin);
125 	double pti, ptim1, ptim2 = 1.0, result = p [1] .value;
126 	if (my numberOfParameters > 1) {
127 		const double twox = 2.0 * x;
128 		double f2 = x, d = 1.0;
129 		result += p [2] .value * (ptim1 = x);
130 		for (integer ipar = 3; ipar <= my numberOfParameters; ipar ++) {
131 			const double f1 = d ++;
132 			f2 += twox;
133 			result += p [ipar] .value * (pti = (f2 * ptim1 - f1 * ptim2) / d);
134 			ptim2 = ptim1;
135 			ptim1 = pti;
136 		}
137 	}
138 	return result;
139 }
140 
legendre_evaluateBasisFunctions(DataModeler me,double xin,VEC term)141 static void legendre_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
142 	Melder_assert (term.size == my numberOfParameters);
143 	term [1] = 1.0;
144 	/*
145 		transform x from domain [xmin, xmax] to domain [-1, 1]
146 	*/
147 	const double x = (2.0 * xin - my xmin - my xmax) / (my xmax - my xmin);
148 	if (my numberOfParameters > 1) {
149 		const double twox = 2.0 * x;
150 		double f2 = term [2] = x, d = 1.0;
151 		for (integer ipar = 3; ipar <= my numberOfParameters; ipar ++) {
152 			const double f1 = d ++;
153 			f2 += twox;
154 			term [ipar] = (f2 * term [ipar - 1] - f1 * term [ipar - 2]) / d;
155 		}
156 	}
157 }
158 
sigmoid_plus_constant_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)159 static double sigmoid_plus_constant_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
160 	Melder_assert (p.size == my numberOfParameters);
161 	/*
162 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
163 		No need to translate because xin and p [3] are in the same domain.
164 	*/
165 	double result = p [1].value;
166 	if (p [2].value != 0.0)
167 		result += p [2].value / (1.0 + exp (- (xin - p [3].value) / p [4].value));
168 	return result;
169 }
170 
sigmoid_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)171 static double sigmoid_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
172 	Melder_assert (p.size == my numberOfParameters);
173 	/*
174 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
175 		No need to translate because xin and p [3] are in the same domain.
176 	*/
177 	const double result = p [1].value / (1.0 + exp (- (xin - p [2].value) / p [3].value));
178 	return result;
179 }
180 
exponential_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)181 static double exponential_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
182 	Melder_assert (p.size == my numberOfParameters);
183 	/*
184 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
185 	*/
186 	const double x = xin - 0.5 * (my xmin + my xmax);
187 	return p [1].value * exp (p [2].value * x);
188 }
189 
exponential_plus_constant_evaluate(DataModeler me,double xin,vector<structDataModelerParameter> p)190 static double exponential_plus_constant_evaluate (DataModeler me, double xin, vector<structDataModelerParameter> p) {
191 	Melder_assert (p.size >= 3);
192 	/*
193 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
194 	*/
195 	const double x = xin - 0.5 * (my xmin + my xmax);
196 	return p [1].value + p [2].value * exp (p [3].value * x);
197 }
198 
dummy_evaluateBasisFunctions(DataModeler me,double xin,VEC term)199 static void dummy_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
200 	term  <<=  undefined;
201 }
202 
DataModeler_solveDesign(DataModeler me,constMAT const & design,constVEC const & y,autoMAT * covariance)203 autoVEC DataModeler_solveDesign (DataModeler me, constMAT const& design, constVEC const& y, autoMAT *covariance) {
204 	Melder_require (design.nrow == y.size,
205 		U"The design matrix and the estimate should have the same number of rows.");
206 	autoSVD svd = SVD_createFromGeneralMatrix (design);
207 	if (! NUMfpp)
208 		NUMmachar ();
209 	SVD_zeroSmallSingularValues (svd.get(), ( my tolerance > 0.0 ? my tolerance : my numberOfDataPoints * NUMfpp -> eps ));
210 	autoVEC solution = SVD_solve (svd.get(), y);
211 	if (covariance) {
212 		autoMAT covar = SVD_getSquared (svd.get(), true);
213 		*covariance = covar.move();
214 	}
215 	return solution;
216 }
217 
218 /*
219 	Model: y [i] = a * exp (b * x [i]), i=1..n, solve for a, b.
220 	log(y(x)= log(a) + b * x is linear model
221 	Precondition: y [i] > 0 || y [i] < 0
222 */
exponential_fit(DataModeler me)223 static void exponential_fit (DataModeler me) {
224 	if (my parameters [1].status == kDataModelerParameterStatus::FIXED_ && my parameters [2].status == kDataModelerParameterStatus::FIXED_)
225 		return;
226 	autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
227 	double ymin, ymax;
228 	DataModeler_getExtremaY (me, & ymin, & ymax);
229 	const double sign = ymin * ymax;
230 	Melder_require (sign >= 0.0,
231 		U"All data should have the same sign.");
232 	const double xtr = 0.5 * (my xmin + my xmax);
233 	if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
234 		/*
235 			Model: z(x) = b * x, where z(x) = log(y) - log (a)
236 			A minimization of the squared error in the log domain gives greater weight to small values.
237 			To compensate, we weigh with the y value. Weisstein, Eric W. "Least Squares Fitting--Exponential." From MathWorld--A Wolfram Web Resource. https://mathworld.wolfram.com/LeastSquaresFittingExponential.html
238 		*/
239 		autoMAT design = zero_MAT (my numberOfDataPoints, 1);
240 		autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
241 		integer index = 0;
242 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
243 			if (my data [k].status != kDataModelerData::INVALID) {
244 				design [++ index] [1] = (my data [k].x - xtr) * weights [k] * my data [k].y;
245 				yEstimate [index] = (log (my data [k].y) - log (my parameters [1].value)) * weights [k] * my data [k].y;
246 			}
247 		}
248 		design.resize (index, 1);
249 		yEstimate.resize (index);
250 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
251 		my parameters [2].value = solution [1];
252 	} else if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
253 		/*
254 			Model: y(x) = a * f(x), where f(x) = exp (b * x)
255 		*/
256 		autoMAT design = zero_MAT (my numberOfDataPoints, 1);
257 		autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
258 		integer index = 0;
259 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
260 			if (my data [k].status != kDataModelerData::INVALID) {
261 				design [++ index] [1] = exp (my parameters [2].value * (my data [k].x - xtr)) * weights [k];
262 				yEstimate [index] = my data [k].y * weights [k];
263 			}
264 		}
265 		design.resize (index, 1);
266 		yEstimate.resize (index);
267 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
268 		my parameters [1].value = solution [1];
269 	} else {
270 		/*
271 			Model z(x)= a + b * x, where z(x) = log(y(x))
272 			A minimization of the squared error in the log domain gives greater weight to small values.
273 			To compensate we weigh with the y value.
274 		*/
275 		autoMAT design = zero_MAT (my numberOfDataPoints, 2);
276 		autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
277 		integer index = 0;
278 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
279 			if (my data [k].status != kDataModelerData::INVALID) {
280 				design [++ index] [1] = 1.0 * my data [k].y * weights [k];
281 				design [index] [2] = (my data [k].x - xtr) * my data [k].y * weights [k];
282 				yEstimate [index] = log ( sign >= 0.0 ? my data [k].y : - my data [k].y ) * my data [k].y * weights [k];
283 			}
284 		}
285 		design.resize (index, 2);
286 		yEstimate.resize (index);
287 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
288 		const double a = exp (solution [1]);
289 		my parameters [1].value = ( sign >= 0.0 ? a : - a );
290 		my parameters [2].value = solution [2];
291 	}
292 }
293 
294 /*
295 	Model: y [i] = constant + b * exp (c * x [i]), i=1..n, solve for constant, b & c.
296 	Solution according to Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
297 	pages 16-17.
298 	Precondition: x [i] must be increasing.
299 */
300 
linear_exponent_evaluateBasisFunctions(DataModeler me,double xin,VEC term)301 static void linear_exponent_evaluateBasisFunctions (DataModeler me, double xin, VEC term) {
302 	Melder_assert (term.size >= 2); // our model is a two parameter one!
303 	/*
304 		From domain [xmin, xmax] to domain [-(xmax -xmin)/2, (xmax-xmin)/2]
305 	*/
306 	const double x = xin - 0.5 * (my xmin + my xmax);
307 	term [1] = 1.0;
308 	term [2] = exp (my parameters [3].value * x);
309 }
310 
exponential_plus_constant_fit(DataModeler me)311 static void exponential_plus_constant_fit (DataModeler me) {
312 	if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
313 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_ &&
314 			my parameters [3].status == kDataModelerParameterStatus::FIXED_)
315 				return;
316 		/*
317 			Model: z(x) = b * exp (c * x), where z(x) = y(x) - a.
318 		*/
319 		autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::EXPONENTIAL);
320 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
321 			if (thy data [k].status != kDataModelerData::INVALID) {
322 				thy data [k].y -= my parameters [1].value;
323 			}
324 		}
325 		DataModeler_fit (thee.get());
326 		my parameters [2].value = thy parameters [1].value;
327 		my parameters [3].value = thy parameters [2].value;
328 	} else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
329 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
330 			/*
331 				Model: z(x)= a, where z(x) = y(x) - b * exp(c * x)
332 			*/
333 			autoDataModeler thee = DataModeler_createFromDataModeler (me, 1, kDataModelerFunction::LINEAR);
334 			thy f_evaluate = constant_evaluate;
335 			thy f_evaluateBasisFunctions = constant_evaluateBasisFunctions;
336 			my parameters [1].value = 0.0;
337 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
338 				if (thy data [k].status != kDataModelerData::INVALID) {
339 					thy data [k].y -= my f_evaluate (me, thy data [k].x, my parameters.get());// z(x) = y(x) - b * exp(c * x)
340 				}
341 			}
342 			DataModeler_fit (thee.get());
343 			my parameters [1].value = thy parameters [1].value;
344 		} else {
345 			/*
346 				Model: z(x) = a + b * f(x), where f(x) = exp (c * x).
347 				Fit as linear model with the third parameter fixed!
348 			*/
349 			autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::LINEAR);
350 			thy parameters.resize (thy numberOfParameters + 1);
351 			thy parameters [thy numberOfParameters + 1].value = my parameters [3].value;
352 			thy parameters [thy numberOfParameters + 1].status = kDataModelerParameterStatus::FIXED_EXTRA;
353 			thy f_evaluate = exponential_plus_constant_evaluate;
354 			thy f_evaluateBasisFunctions = linear_exponent_evaluateBasisFunctions;
355 			DataModeler_fit (thee.get());
356 			my parameters [1].value = thy parameters [1].value;
357 			my parameters [2].value = thy parameters [2].value;
358 		}
359 	} else {
360 		/*
361 			First we determine c.
362 			Model: z(x) = A * f1(x) + B * f2(x), where z(x) = y(x) - y [1], f1(x) = x - x [1], f2(x) = integral (x1, x, y(x)dx)
363 			A = - a * c, B = c
364 		*/
365 		autoMAT design = zero_MAT (my numberOfDataPoints, 2);
366 		autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
367 		autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
368 		const double x1 = my data [1].x, y1 = my data [1].y;
369 		long double xkm1 = x1, ykm1 = y1, sk = 0.0;
370 		/*
371 			First row of design has only zero's, skip it.
372 		*/
373 		integer index = 0;
374 		for (integer k = 2; k <= my numberOfDataPoints; k ++) {
375 			if (my data [k] .status != kDataModelerData::INVALID) {
376 				const long double xk = my data [k].x, yk = my data [k].y;
377 				sk += 0.5 * (yk + ykm1) * (xk - xkm1); // Jacquelin, Eq. (7)
378 				design [++ index] [1] = (xk - x1) * weights [k]; // Jacquelin, Eq. (9)
379 				design [index] [2] = sk * weights [k];
380 				yEstimate [index] = (yk - y1) * weights [k];
381 				xkm1 = xk;
382 				ykm1 = yk;
383 			}
384 		}
385 		design.resize (index, 2);
386 		yEstimate.resize (index);
387 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
388 		const double c = solution [2];
389 		my parameters [3].value = c;
390 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
391 			/*
392 				Model: z(x)= a, where z(x) = y(x) - b * exp(c * x)
393 			*/
394 			autoDataModeler thee = DataModeler_createFromDataModeler (me, 1, kDataModelerFunction::LINEAR);
395 			thy f_evaluate = constant_evaluate;
396 			thy f_evaluateBasisFunctions = constant_evaluateBasisFunctions;
397 			my parameters [1].value = 0.0;
398 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
399 				if (thy data [k].status != kDataModelerData::INVALID) {
400 					thy data [k].y -= my f_evaluate (me, thy data [k].x, my parameters.get());// z(x) = y(x) - b * exp(c * x)
401 				}
402 			}
403 			DataModeler_fit (thee.get());
404 			my parameters [1].value = thy parameters [1].value;
405 		} else {
406 			/*
407 				As if c were fixed
408 				Model: y(x) = a + b * f(x), where f(x) = exp (c * x).
409 			*/
410 			autoDataModeler thee = DataModeler_createFromDataModeler (me, 2, kDataModelerFunction::LINEAR);
411 			thy parameters.resize (thy numberOfParameters + 1);
412 			thy parameters [thy numberOfParameters + 1].value = c;
413 			thy parameters [thy numberOfParameters + 1].status = kDataModelerParameterStatus::FIXED_EXTRA;
414 			thy f_evaluate = exponential_plus_constant_evaluate;
415 			thy f_evaluateBasisFunctions = linear_exponent_evaluateBasisFunctions;
416 			DataModeler_fit (thee.get());
417 			my parameters [1].value = thy parameters [1].value;
418 			my parameters [2].value = thy parameters [2].value;
419 			my parameters [3].value = c;
420 		}
421 	}
422 	/*
423 		error propagation ?
424 	*/
425 	my parameterCovariances -> data.get()  <<=  undefined;
426 }
427 
modelLinearTrendWithSigmoid(DataModeler me,double * out_lambda,double * out_sigma)428 static void modelLinearTrendWithSigmoid (DataModeler me, double *out_lambda, double *out_sigma) {
429 		/*
430 			Set mu in the middle of the interval and make lambda = 2 * ymean.
431 			Calculate sigma such that the model goes through (xmin,model(xmin)) and (xmax, model(xmax))
432 		 	deltaY = y(xmax) - y(xmin) = lambda / (1 + exp (- (xmax - mu) / sigma)) - lambda / (1 + exp (- (xmin - mu) / sigma))
433 		 	Then sigma = 0.5 *(xmax - xmin) / ln ((lambda + deltaY) / (lambda - deltaY)))
434 		*/
435 		autoDataModeler thee = DataModeler_createFromDataModeler(me, 2, kDataModelerFunction::POLYNOME);
436 		DataModeler_fit (thee.get());
437 		const double lambda = 2.0 * thy parameters [1].value;
438 		const double yAtXmin = DataModeler_getModelValueAtX (thee.get(), thy xmin);
439 		const double yAtXmax = DataModeler_getModelValueAtX (thee.get(), thy xmax);
440 		const double deltaY = yAtXmax - yAtXmin, deltaX = my xmax - my xmin;
441 		const double sigma = 0.5 * deltaX / log ((lambda + deltaY) / (lambda - deltaY));
442 		if (out_lambda)
443 			*out_lambda = lambda;
444 		if (out_sigma)
445 			*out_sigma = sigma;
446 }
447 
448 /*
449 	Function: y(x) = b / (1 + exp (- (x - mu) / sigma))
450 	Model: z(x) = A * f1(x) + b * f2(x) + C * f3(x), where
451 		z (x)= y (x) * ln (y (x)), f1 (x) = integral (0, x, y(x)dx), f2 (x) = x * y (x), f3 = y (x),
452 		A = -1 / (lambda * sigma), B = 1 / sigma, C = ln (lambda) - ln (1 + exp ((mu - x1) / sigma))
453 	Non-iterative solution according to  Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
454 	pages 16-17.
455 	Precondition: x [i] must be increasing.
456 */
sigmoid_fit(DataModeler me)457 void sigmoid_fit (DataModeler me) {
458 	autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
459 	autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
460 	double lambda = my parameters [1].value;
461 	double mu = my parameters [2].value;
462 	double sigma = my parameters [3].value;
463 	if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
464 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
465 			if (my parameters [3].status == kDataModelerParameterStatus::FIXED_)
466 				return;
467 			/*
468 				Model: z(X)*ln(z(X) = A * f5 (X) + B * f6 (X), where f5 (X) = z (X) * X - z (X) * integral (x [1], X, z(x)dx)),
469 				f6 (X) = y (X), z(X) = y (X) / lambda and X [k] = x [k] - mu - x1
470 				A = 1 / sigma, B = ln (y (x1))
471 			*/
472 			autoMAT design = zero_MAT (my numberOfDataPoints, 2);
473 			long double sk = 0.0, x1 = my data [1].x ; // no need to subtract mu!
474 			double xkm1 = 0.0, ykm1 = 0.0;
475 			integer index = 0;
476 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
477 				if (my data [k] .status != kDataModelerData::INVALID) {
478 					const long double xk = my data [k].x, yk = my data [k].y / lambda;
479 					sk += 0.5 * (yk + ykm1) * (xk - xkm1); // invariant under translations in x
480 					const double f1x = yk * sk;
481 					const double f2x = (xk - mu - x1) * yk; // X [k]
482 					design [++ index] [1] = (f2x - f1x) * weights [k];
483 					design [index] [2] = yk * weights [k];
484 					yEstimate [index] = yk * log (yk) * weights [k];
485 					xkm1 = xk;
486 					ykm1 = yk;
487 				}
488 			}
489 			design.resize (index, 2);
490 			yEstimate.resize (index);
491 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
492 			sigma = 1.0 /solution [1];
493 		} else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
494 			/*
495 				Model: z(X)*ln(z(X) = C * f3 (X), where z(X)= y(X)/lambda + f1 (X) / sigma - f2 (X) / sigma, X [l] = x [k] - x1
496 			*/
497 			autoMAT design = zero_MAT (my numberOfDataPoints, 1);
498 			long double sk = 0.0, x1 = my data [1].x;
499 			long double xkm1 = 0.0, ykm1 = 0.0;
500 			integer index = 0;
501 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
502 				if (my data [k] .status != kDataModelerData::INVALID) {
503 					const long double xk = my data [k].x, yk = my data [k].y / lambda;
504 					sk += 0.5 * (yk + ykm1) * (xk - xkm1); // xk - xkm1 ==  X [k] - X [k-1]
505 					const double f1x = yk * sk;
506 					const double f2x = (xk - x1) * yk;
507 					design [++ index] [1] = yk * weights [k];
508 					yEstimate [index] = (yk * log (yk) + f1x / sigma - f2x / sigma) * weights [k];
509 					xkm1 = xk;
510 					ykm1 = yk;
511 				}
512 			}
513 			design.resize (index, 1);
514 			yEstimate.resize (index);
515 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
516 			const double lnarg = exp (-solution [1]) - 1.0;
517 			if (lnarg > 0)
518 				mu = x1 + sigma * log (lnarg);
519 			else
520 				mu = undefined;
521 		} else {
522 			/*
523 				Model: z*ln(z) = A * f4(X) + B * f3 (X), where z(X) = y(X) / lambda, f4(X)= -f1(X) + f2(X), f3 (X) = z(X), A = 1/sigma, B = - ln (1+exp(-(mu - x1)/sigma)) and X [k] = x [k] - x [1].
524 			*/
525 			autoMAT design = zero_MAT (my numberOfDataPoints, 2);
526 			long double sk = 0.0, x1 = my data [1].x;
527 			long double xkm1 = 0.0, ykm1 = 0.0;
528 			integer index = 0;
529 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
530 				if (my data [k] .status != kDataModelerData::INVALID) {
531 					const long double xk = my data [k].x, yk = my data [k].y / lambda;
532 					sk += 0.5 * (yk + ykm1) * (xk - xkm1);
533 					const double f1x = yk * sk;
534 					const double f2x = (xk - x1) * yk;
535 					design [++ index] [1] = (f2x - f1x) * weights [k];
536 					design [index] [2] = yk * weights [k]; // f3
537 					yEstimate [index] = yk * log (yk) * weights [k];
538 					xkm1 = xk;
539 					ykm1 = yk;
540 				}
541 			}
542 			design.resize (index, 2);
543 			yEstimate.resize (index);
544 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
545 			sigma = 1.0 / solution [1];
546 			const double lnarg = exp (-solution [2]) - 1.0;
547 			if (lnarg > 0)
548 				mu = x1 + sigma * log (lnarg);
549 			else
550 				mu = undefined;
551 		}
552 	} else if (my parameters [2].status == kDataModelerParameterStatus::FIXED_ &&
553 		my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
554 			/*
555 				Model: y(x) = E * f4 (x), where f4(x) = 1 /(1 + exp (- (x - mu) / sigma))
556 			*/
557 			autoMAT design = zero_MAT (my numberOfDataPoints, 1);
558 			integer index = 0;
559 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
560 				if (my data [k] .status != kDataModelerData::INVALID) {
561 					const double yk = my data [k].y, xk = my data [k].x;
562 					const double f4x = 1.0 / (1.0 + exp (- (xk - mu) / sigma));
563 					design [++ index] [1] = f4x * weights [k];
564 					yEstimate [index] = yk * weights [k];
565 				}
566 			}
567 			design.resize (index, 1);
568 			yEstimate.resize (index);
569 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
570 			lambda = solution [1];
571 	} else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_) {
572 		/*
573 			Model: z(x) = A * f1 (x) + C * f3(x), where z(x) = y(x)*ln(y(x)) - f2 (x) / sigma
574 		*/
575 		autoMAT design = zero_MAT (my numberOfDataPoints, 2);
576 		double sk = 0.0, x1 = my data [1].x;
577 		double xkm1 = 0.0, ykm1 = 0.0;
578 		integer index = 0;
579 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
580 			if (my data [k] .status != kDataModelerData::INVALID) {
581 				const double xk = my data [k].x - x1, yk = my data [k].y;
582 				const double f2x = xk * yk;
583 				sk += 0.5 * (yk + ykm1) * (xk - xkm1);
584 				const double f1x =  yk * sk;
585 				design [++ index] [1] = f1x * weights [k];
586 				design [index] [2] = yk * weights [k];
587 				yEstimate [index] = (yk * log (yk) - f2x / sigma) * weights [k];
588 				xkm1 = xk;
589 				ykm1 = yk;
590 			}
591 		}
592 		design.resize (index, 2);
593 		yEstimate.resize (index);
594 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
595 		sigma = my parameters [3].value;
596 		lambda = -1.0 / (solution [1] * sigma);
597 		const double lnarg = lambda * exp (-solution [2]) - 1.0;
598 		if (lnarg > 0)
599 			mu = x1 + sigma * log (lnarg);
600 		else
601 			mu = undefined;
602 	} else {
603 		/*
604 			Model: z(x) =  A * f1 (x) + B * f2(x) + C * f3(x), where z(x) = y(x)*ln(y(x))
605 		*/
606 		autoMAT design = zero_MAT (my numberOfDataPoints, 3);
607 		long double sk = 0.0, x1 = my data [1].x;
608 		long double xkm1 = 0.0, ykm1 = 0.0;
609 		integer index = 0;
610 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
611 			if (my data [k] .status != kDataModelerData::INVALID) {
612 				const long double xk = my data [k].x, yk = my data [k].y;
613 				sk += 0.5 * (yk + ykm1) * (xk - xkm1);
614 				const double f1x = yk * sk;
615 				const double f2x = (xk - x1) * yk;
616 				design [++ index] [1] = f1x * weights [k];
617 				design [index] [2] = f2x * weights [k];
618 				design [index] [3] = yk * weights [k];
619 				yEstimate [index] = yk * log (yk) * weights [k];
620 				xkm1 = xk;
621 				ykm1 = yk;
622 			}
623 		}
624 		design.resize (index, 3);
625 		yEstimate.resize (index);
626 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
627 		sigma = 1.0 / solution [2];
628 		lambda = - solution [2] / solution [1];
629 		if (my parameters [2].status != kDataModelerParameterStatus::FIXED_) {
630 			my parameters [1].value = lambda;
631 			const double lnarg = lambda * exp (-solution [3]) - 1.0;
632 			if (lnarg > 0)
633 				mu = x1 + sigma * log (lnarg);
634 			else {
635 				modelLinearTrendWithSigmoid (me, & lambda, & sigma);
636 				mu = 0.5 * (my xmin + my xmax);
637 			}
638 		}
639 	}
640 	my parameters [1].value = lambda;
641 	my parameters [2].value = mu;
642 	my parameters [3].value = sigma;
643 	/*
644 		error propagation ?
645 	*/
646 	my parameterCovariances -> data.get()  <<=  undefined;
647 }
648 
649 /*
650 	Model: y(x) = gamma + lambda / (1 + exp (- (x - mu) / sigma))
651 	Solution according to  Jean Jacquelin (2009), Régressions et équations intégrales, https://fr.scribd.com/doc/14674814/Regressions-et-equations-integrales,
652 	pages 38 and following.
653 	The author makes a mistake in the derivation of the relation between the a,b,c,d and
654 	gamma, lambda, mu and sigma of the model.
655 	lambda = ± 1/s * sqrt (b^2-4ac) (the artile wrongly shows "sqrt (b^2+4ac)")
656 	gamma = (−b ∓ sqrt (b^2 − 4ac))/ (2*a)
657 	sigma = ∓ 1 / sqrt (b^2 − 4ac)
658 	mu = x [1]+ sigma * ln (lambda/(d-gamma) - 1)
659 	Two models are indistinguishable if:
660 		gamma' = gamma + lambda
661 		sigma' = -sigma
662 		lambda' = - lambda
663 	Precondition: x [i] are increasing order.
664 */
sigmoid_plus_constant_fit(DataModeler me)665 void sigmoid_plus_constant_fit (DataModeler me) {
666 	double gamma = my parameters [1].value, lambda = my parameters [2].value;
667 	double mu = my parameters [3].value, sigma = my parameters [4].value;
668 	if (my parameters [1].status == kDataModelerParameterStatus::FIXED_) {
669 		/*
670 			Model z(x) = lambda / (1 + exp (- (x - mu) / sigma)) where z(x) = y(x) - gamma.
671 		*/
672 		autoDataModeler thee = DataModeler_createFromDataModeler (me, 3, kDataModelerFunction::SIGMOID);
673 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
674 			if (my data [k] .status != kDataModelerData::INVALID) {
675 				thy data [k].y -= gamma;
676 			}
677 		}
678 		DataModeler_fit (thee.get());
679 		lambda = thy parameters [1].value;
680 		mu = thy parameters [2].value;
681 		sigma = thy parameters [3].value;
682 	} else if (my parameters [3].status == kDataModelerParameterStatus::FIXED_ &&
683 		my parameters [4].status == kDataModelerParameterStatus::FIXED_) {
684 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
685 			/*
686 				Model: z(x) = gamma, where z(x) = y(x) - lambda / (1 + exp (- (x - mu)/ sigma))
687 			*/
688 			autoMAT design = raw_MAT (my numberOfDataPoints, 1);
689 			autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
690 			autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
691 			integer index = 0;
692 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
693 				if (my data [k] .status != kDataModelerData::INVALID) {
694 					const long double xk = my data [k].x, yk = my data [k].y;
695 					const double fx = lambda / (1 + exp (- (xk - mu) / sigma));
696 					design [++ index] [1] = 1.0 * weights [k];
697 					yEstimate [index] = (yk - fx) * weights [k];
698 				}
699 			}
700 			design.resize (index, 1);
701 			yEstimate.resize (index);
702 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
703 			gamma = solution [1];
704 		} else {
705 			/*
706 				Model: y(x) = gamma  + lambda * f(x), where f(x) = 1 / (1 + exp (-(x -mu)/sigma))
707 			*/
708 			autoMAT design = raw_MAT (my numberOfDataPoints, 2);
709 			autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
710 			autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
711 			integer index = 0;
712 			for (integer k = 1; k <= my numberOfDataPoints; k ++) {
713 				if (my data [k] .status != kDataModelerData::INVALID) {
714 					const long double xk = my data [k].x, yk = my data [k].y;
715 					const double fx = 1.0 / (1 + exp (- (xk - mu) / sigma));
716 					design [++ index] [1] = 1.0 * weights [k];
717 					design [index] [2] = fx * weights [k];
718 					yEstimate [index] = yk * weights [k];
719 				}
720 			}
721 			design.resize (index, 2);
722 			yEstimate.resize (index);
723 			autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
724 			gamma = solution [1];
725 			lambda = solution [2];
726 		}
727 	} else {
728 		autoMAT design = raw_MAT (my numberOfDataPoints, 4);
729 		autoVEC yEstimate = raw_VEC (my numberOfDataPoints);
730 		autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
731 		long double s1k = 0.0, s2k = 0.0, x1 = my data [1].x, xkm1 = x1, ykm1 = 0.0;
732 		integer index = 0;
733 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
734 			if (my data [k] .status != kDataModelerData::INVALID) {
735 				const long double xk = my data [k].x, yk = my data [k].y;
736 				s1k += 0.5 * (yk + ykm1) * (xk - xkm1);
737 				s2k += 0.5 * (yk * yk + ykm1 * ykm1) * (xk - xkm1);
738 				design [++ index] [1] = s2k * weights [k];
739 				design [index] [2] = s1k * weights [k];
740 				design [index] [3] = (xk - x1) * weights [k];
741 				design [index] [4] = 1.0 * weights [k];
742 				yEstimate [index] = yk * weights [k];
743 				xkm1 = xk;
744 				ykm1 = yk;
745 			}
746 		}
747 		design.resize (index, 4);
748 		yEstimate.resize (index);
749 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), nullptr);
750 		const double a = solution [1], b = solution [2];
751 		const double c = solution [3], d = solution [4];
752 		auto setMu = [&, d, x1] () -> double {
753 			mu = undefined;
754 			const double lnarg = lambda / (d - gamma) - 1.0;
755 			if (lnarg > 0.0)
756 				mu = x1 + sigma * log (lnarg);
757 			return mu;
758 		};
759 		if (my parameters [2].status == kDataModelerParameterStatus::FIXED_) {
760 			sigma = - 1.0 / (a * lambda);
761 			gamma = 0.5 * lambda * (sigma * b - 1.0);
762 			if (my parameters [3].status != kDataModelerParameterStatus::FIXED_)
763 				setMu ();
764 		} if (my parameters [4].status == kDataModelerParameterStatus::FIXED_) {
765 			lambda = - 1.0 / (a * sigma);
766 			gamma = 0.5 * lambda * (sigma * b - 1.0);
767 			setMu ();
768 		} else {
769 			const double dissq = b * b - 4.0 * a * c;
770 			bool modelFitIsBad = true;
771 			if (dissq > 0.0) {
772 				const double dis = sqrt (dissq);
773 				modelFitIsBad = false;
774 				lambda = dis / a;
775 				gamma = (-b - dis) / (2.0 * a);
776 				sigma = - 1.0 / dis;
777 				if (my parameters [3].status != kDataModelerParameterStatus::FIXED_) {
778 					if (! isdefined (setMu ())) {
779 						lambda = -dis / a;
780 						gamma = (-b + dis) / (2.0 * a);
781 						sigma = 1.0 / dis;
782 						if (! isdefined (setMu ()))
783 							modelFitIsBad = true;
784 					}
785 					if (DataModeler_getCoefficientOfDetermination (me, nullptr, nullptr) < 0.0) // model fit is bad!
786 						modelFitIsBad = true;
787 				}
788 			}
789 			if (modelFitIsBad) {
790 				modelLinearTrendWithSigmoid (me, & lambda, & sigma);
791 				gamma = 0.0;
792 				mu = 0.5 * (my xmin + my xmax);
793 				my parameters [3].value = mu;
794 			}
795 		}
796 	}
797 	my parameters [1].value = gamma;
798 	my parameters [2].value = lambda;
799 	my parameters [3].value = mu;
800 	my parameters [4].value = sigma;
801 	/*
802 		error propagation ?
803 	*/
804 	my parameterCovariances -> data.get()  <<=  undefined;
805 }
806 
series_fit(DataModeler me)807 void series_fit (DataModeler me) {
808 	try {
809 		/*
810 			Count the number of free parameters to be fitted
811 		*/
812 		const integer numberOfFreeParameters = DataModeler_getNumberOfFreeParameters (me);
813 		if (numberOfFreeParameters == 0)
814 			return;
815 		const integer numberOfValidDataPoints = DataModeler_getNumberOfValidDataPoints (me);
816 		if (numberOfValidDataPoints - numberOfFreeParameters < 0)
817 			return;
818 		autoVEC yEstimate = zero_VEC (numberOfValidDataPoints);
819 		autoVEC term = zero_VEC (my numberOfParameters);
820 		autovector<structDataModelerParameter> fixedParameters = newvectorcopy (my parameters.all());
821 		autoMAT design = zero_MAT (numberOfValidDataPoints, numberOfFreeParameters);
822 		autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
823 		/*
824 			For function evaluation with only the FIXED parameters
825 		*/
826 		for (integer ipar = 1; ipar <= my parameters.size; ipar ++)
827 			if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_)
828 				fixedParameters [ipar].value = 0.0;
829 
830 		/*
831 			We solve for the parameters p by minimizing the chi-squared function:
832 			chiSquared = sum (i=1...n, (y [i] - sum (k=1..m, p [k] X [k] (x [i])) / sigma [i] ),
833 			where n is the 'numberOfValidDataPoints', m is the 'numberOfFreeParameters',
834 				- x [i] and y [i] are the i-th datapoint x and y values, respectively,
835 				- sum (k=1..m, p [k] X [k] (x [i]) is the model estimation at x [i],
836 				- X [k] (x [i]) is the k-th function term evaluated at x [i],
837 				- and y [i] has been measured with some uncertainty sigma [i].
838 			If we define the design matrix matrix A [i] [j] = X [j] (x [i]) / sigma [i] and
839 			the vector b [i] = y [i] / sigma [i], the problem can be stated as
840 			minimize the norm ||A.p - b|| for p.
841 			This problem can be solved by SVD.
842 		*/
843 		integer idata = 1;
844 		for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
845 			if (my data [ipoint] .status != kDataModelerData::INVALID) {
846 				const double xi = my data [ipoint].x, yi = my data [ipoint].y;
847 				const double yFixed = my f_evaluate (me, xi, fixedParameters.get());
848 				// individual terms of the function
849 				my f_evaluateBasisFunctions (me, xi, term.get());
850 				for (integer icol = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++)
851 					if (my parameters [ipar] .status == kDataModelerParameterStatus::FREE)
852 						design [idata] [icol ++] = term [ipar] * weights [ipoint];
853 				/*
854 					Only 'residual variance' must be explained by the model
855 				*/
856 				yEstimate [idata ++] = (yi - yFixed)  * weights [ipoint];
857 			}
858 		}
859 		autoMAT covar;
860 		autoVEC solution = DataModeler_solveDesign (me, design.get(), yEstimate.get(), & covar);
861 		/*
862 			Put the calculated parameters at the correct position in 'my parameters'
863 		*/
864 		Covariance cov = my parameterCovariances.get();
865 		for (integer kpar = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++) {
866 			if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_)
867 				my parameters [ipar] .value = solution [kpar ++];
868 			cov -> centroid [ipar] = my parameters [ipar] .value;
869 		}
870 		cov -> numberOfObservations = numberOfValidDataPoints;
871 		/*
872 			Estimate covariances between parameters
873 		*/
874 		if (numberOfFreeParameters < my numberOfParameters) {
875 			cov -> data.all()  <<=  0.0;   // set fixed parameters variances and covariances to zero
876 			for (integer irow = 1, ipar = 1; ipar <= my numberOfParameters; ipar ++) {
877 				if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
878 					for (integer icol = 1, jpar = 1; jpar <= my numberOfParameters; jpar ++) {
879 						if (my parameters [jpar] .status != kDataModelerParameterStatus::FIXED_)
880 							cov -> data [ipar] [jpar] = covar [irow] [icol ++];
881 					}
882 					irow ++;
883 				}
884 			}
885 		} else {
886 			my parameterCovariances -> data = covar.move();
887 		}
888 	} catch (MelderError) {
889 		Melder_throw (U"DataModeler no fit.");
890 	}
891 }
892 
chisqFromZScores(VEC zscores,double * out_chisq,integer * out_numberOfValidZScores)893 static void chisqFromZScores (VEC zscores, double *out_chisq, integer *out_numberOfValidZScores) {
894 	double chisq = 0.0;
895 	integer numberOfValidZScores = 0;
896 	for (integer ipoint = 1; ipoint <= zscores.size; ipoint ++) {
897 		if (isdefined (zscores [ipoint])) {
898 			chisq += zscores [ipoint] * zscores [ipoint];
899 			numberOfValidZScores ++;
900 		}
901 	}
902 	if (out_chisq)
903 		*out_chisq = chisq;
904 	if (out_numberOfValidZScores)
905 		*out_numberOfValidZScores = numberOfValidZScores;
906 }
907 
DataModeler_getModelValueAtX(DataModeler me,double x)908 double DataModeler_getModelValueAtX (DataModeler me, double x) {
909 	double f = undefined;
910 	if (x >= my xmin && x <= my xmax)
911 		f = my f_evaluate (me, x, my parameters.get());
912 	return f;
913 }
914 
DataModeler_getModelValueAtIndex(DataModeler me,integer index)915 double DataModeler_getModelValueAtIndex (DataModeler me, integer index) {
916 	double f = undefined;
917 	if (index > 0 && index <= my numberOfDataPoints)
918 		f = my f_evaluate (me, my data [index] .x, my parameters.get());
919 	return f;
920 }
921 
DataModeler_getExtremaY(DataModeler me,double * out_ymin,double * out_ymax)922 void DataModeler_getExtremaY (DataModeler me, double *out_ymin, double *out_ymax) {
923 	MelderExtremaWithInit extrema;
924 	for (integer i = 1; i <= my numberOfDataPoints; i++)
925 		if (my data [i] .status != kDataModelerData::INVALID)
926 			extrema.update (my data [i] .y);
927 
928 	if (out_ymin)
929 		*out_ymin = extrema.min;
930 	if (out_ymax)
931 		*out_ymax = extrema.max;
932 }
933 
DataModeler_getDataPointYValue(DataModeler me,integer index)934 double DataModeler_getDataPointYValue (DataModeler me, integer index) {
935 	double value = undefined;
936 	if (index > 0 && index <= my numberOfDataPoints && my data [index] .status != kDataModelerData::INVALID)
937 		value = my data [index] .y;
938 	return value;
939 }
940 
DataModeler_getDataPointXValue(DataModeler me,integer index)941 double DataModeler_getDataPointXValue (DataModeler me, integer index) {
942 	double value = undefined;
943 	if (index > 0 && index <= my numberOfDataPoints && my data [index] .status != kDataModelerData::INVALID)
944 		value = my data [index] .x;
945 	return value;
946 }
947 
DataModeler_setDataPointYValue(DataModeler me,integer index,double value)948 void DataModeler_setDataPointYValue (DataModeler me, integer index, double value) {
949 	if (index > 0 && index <= my numberOfDataPoints)
950 		my data [index] .y = value;
951 }
952 
DataModeler_setDataPointXValue(DataModeler me,integer index,double value)953 void DataModeler_setDataPointXValue (DataModeler me, integer index, double value) {
954 	if (index > 0 && index <= my numberOfDataPoints)
955 		my data [index] .x = value;
956 }
957 
DataModeler_setDataPointValues(DataModeler me,integer index,double xvalue,double yvalue)958 void DataModeler_setDataPointValues (DataModeler me, integer index, double xvalue, double yvalue) {
959 	if (index > 0 && index <= my numberOfDataPoints) {
960 		my data [index] .x = xvalue;
961 		my data [index] .y = yvalue;
962 	}
963 }
964 
DataModeler_setDataPointYSigma(DataModeler me,integer index,double sigma)965 void DataModeler_setDataPointYSigma (DataModeler me, integer index, double sigma) {
966 	if (index > 0 && index <= my numberOfDataPoints)
967 		my  data [index] .sigmaY = sigma;
968 }
969 
DataModeler_getDataPointYSigma(DataModeler me,integer index)970 double DataModeler_getDataPointYSigma (DataModeler me, integer index) {
971 	double sigma = undefined;
972 	if (index > 0 && index <= my numberOfDataPoints)
973 		sigma = my data [index] .sigmaY;
974 	return sigma;
975 }
976 
DataModeler_getDataPointStatus(DataModeler me,integer index)977 kDataModelerData DataModeler_getDataPointStatus (DataModeler me, integer index) {
978 	kDataModelerData value = kDataModelerData::INVALID;
979 	if (index > 0 && index <= my numberOfDataPoints)
980 		value = my data [index] .status;
981 	return value;
982 }
983 
DataModeler_setDataPointStatus(DataModeler me,integer index,kDataModelerData status)984 void DataModeler_setDataPointStatus (DataModeler me, integer index, kDataModelerData status) {
985 	if (index > 0 && index <= my numberOfDataPoints) {
986 		if (status == kDataModelerData::VALID && isundef (my data [index] .y))
987 			Melder_throw (U"Your data value is undefined. First set the value and then its status.");
988 		my data [index] .status = status;
989 	}
990 }
991 
DataModeler_setDataPointValueAndStatus(DataModeler me,integer index,double value,kDataModelerData dataStatus)992 void DataModeler_setDataPointValueAndStatus (DataModeler me, integer index, double value, kDataModelerData dataStatus) {
993 	if (index > 0 && index <= my numberOfDataPoints) {
994 		my data [index] .y = value;
995 		my data [index] .status = dataStatus;
996 	}
997 }
998 
DataModeler_setParameterValue(DataModeler me,integer index,double value,kDataModelerParameterStatus status)999 void DataModeler_setParameterValue (DataModeler me, integer index, double value, kDataModelerParameterStatus status) {
1000 	if (index > 0 && index <= my numberOfParameters) {
1001 		my parameters [index] .value = value;
1002 		my parameters [index] .status = status;
1003 	}
1004 }
1005 
DataModeler_setParameterValueFixed(DataModeler me,integer index,double value)1006 void DataModeler_setParameterValueFixed (DataModeler me, integer index, double value) {
1007 	Melder_require (my type == kDataModelerFunction::POLYNOME || my type == kDataModelerFunction::LEGENDRE,
1008 		U"This would change the model type, which is not possible yet.");
1009 	DataModeler_setParameterValue (me, index, value, kDataModelerParameterStatus::FIXED_);
1010 }
1011 
DataModeler_getParameterValue(DataModeler me,integer index)1012 double DataModeler_getParameterValue (DataModeler me, integer index) {
1013 	double value = undefined;
1014 	if (index > 0 && index <= my numberOfParameters)
1015 		value = my parameters [index] .value;
1016 	return value;
1017 }
1018 
DataModeler_listParameterValues(DataModeler me)1019 autoVEC DataModeler_listParameterValues (DataModeler me) {
1020 	autoVEC result = raw_VEC (my numberOfParameters);
1021 	for (integer k = 1; k <= my numberOfParameters; k ++)
1022 		result [k] = my parameters [k] .value;
1023 	return result;
1024 }
1025 
DataModeler_getParameterStatus(DataModeler me,integer index)1026 kDataModelerParameterStatus DataModeler_getParameterStatus (DataModeler me, integer index) {
1027 	kDataModelerParameterStatus status = kDataModelerParameterStatus::UNDEFINED;
1028 	if (index > 0 && index <= my numberOfParameters)
1029 		status = my parameters [index] .status;
1030 	return status;
1031 }
1032 
DataModeler_getParameterStandardDeviation(DataModeler me,integer index)1033 double DataModeler_getParameterStandardDeviation (DataModeler me, integer index) {
1034 	double stdev = undefined;
1035 	if (index > 0 && index <= my numberOfParameters)
1036 		stdev = sqrt (my parameterCovariances -> data [index] [index]);
1037 	return stdev;
1038 }
1039 
DataModeler_getVarianceOfParameters(DataModeler me,integer fromIndex,integer toIndex,integer * out_numberOfFreeParameters)1040 double DataModeler_getVarianceOfParameters (DataModeler me, integer fromIndex, integer toIndex, integer *out_numberOfFreeParameters) {
1041 	double variance = undefined;
1042 	getAutoNaturalNumbersWithinRange (& fromIndex, & toIndex, my numberOfParameters, U"parameter");
1043 	integer numberOfFreeParameters = 0;
1044 	variance = 0;
1045 	for (integer ipar = fromIndex; ipar <= toIndex; ipar ++) {
1046 		if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
1047 			variance += my parameterCovariances -> data [ipar] [ipar];
1048 			numberOfFreeParameters ++;
1049 		}
1050 	}
1051 	if (out_numberOfFreeParameters)
1052 		*out_numberOfFreeParameters = numberOfFreeParameters;
1053 	return variance;
1054 }
1055 
DataModeler_setParametersFree(DataModeler me,integer fromIndex,integer toIndex)1056 void DataModeler_setParametersFree (DataModeler me, integer fromIndex, integer toIndex) {
1057 	getAutoNaturalNumbersWithinRange (& fromIndex, & toIndex, my numberOfParameters, U"parameter");
1058 	for (integer ipar = fromIndex; ipar <= toIndex; ipar ++)
1059 		my parameters [ipar] .status = kDataModelerParameterStatus::FREE;
1060 }
1061 
DataModeler_setParameterValuesToZero(DataModeler me,double numberOfSigmas)1062 void DataModeler_setParameterValuesToZero (DataModeler me, double numberOfSigmas) {
1063 	integer numberOfChangedParameters = 0;
1064 	for (integer ipar = my numberOfParameters; ipar > 0; ipar --) {
1065 		if (my parameters [ipar] .status != kDataModelerParameterStatus::FIXED_) {
1066 			const double value = my parameters [ipar] .value;
1067 			double sigmas = numberOfSigmas * DataModeler_getParameterStandardDeviation (me, ipar);
1068 			if ((value - sigmas) * (value + sigmas) < 0) {
1069 				DataModeler_setParameterValueFixed (me, ipar, 0.0);
1070 				numberOfChangedParameters ++;
1071 			}
1072 		}
1073 	}
1074 }
1075 
DataModeler_getNumberOfFreeParameters(DataModeler me)1076 integer DataModeler_getNumberOfFreeParameters (DataModeler me) {
1077 	integer numberOfFreeParameters = 0;
1078 	for (integer ipar = 1; ipar <= my numberOfParameters; ipar ++) {
1079 		if (my parameters [ipar] .status == kDataModelerParameterStatus::FREE)
1080 			numberOfFreeParameters ++;
1081 	}
1082 	return numberOfFreeParameters;
1083 }
1084 
DataModeler_getNumberOfFixedParameters(DataModeler me)1085 integer DataModeler_getNumberOfFixedParameters (DataModeler me) {
1086 	return my numberOfParameters - DataModeler_getNumberOfFreeParameters (me);
1087 }
1088 
DataModeler_getNumberOfValidDataPoints(DataModeler me)1089 integer DataModeler_getNumberOfValidDataPoints (DataModeler me) {
1090 	integer numberOfValidDataPoints = 0;
1091 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1092 		if (my data [ipoint] .status != kDataModelerData::INVALID)
1093 			numberOfValidDataPoints ++;
1094 	return numberOfValidDataPoints;
1095 }
1096 
DataModeler_getNumberOfInvalidDataPoints(DataModeler me)1097 integer DataModeler_getNumberOfInvalidDataPoints (DataModeler me) {
1098 	return my numberOfDataPoints - DataModeler_getNumberOfValidDataPoints  (me);
1099 }
1100 
DataModeler_setTolerance(DataModeler me,double tolerance)1101 void DataModeler_setTolerance (DataModeler me, double tolerance) {
1102 	my tolerance = ( tolerance > 0.0 ? tolerance : my numberOfDataPoints * NUMfpp -> eps );
1103 }
1104 
DataModeler_getDegreesOfFreedom(DataModeler me)1105 double DataModeler_getDegreesOfFreedom (DataModeler me) {
1106 	integer numberOfDataPoints = 0;
1107 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1108 		if (my data [ipoint] .status != kDataModelerData::INVALID)
1109 			numberOfDataPoints ++;
1110 	const double ndf = numberOfDataPoints - DataModeler_getNumberOfFreeParameters (me);
1111 	return ndf;
1112 }
1113 
1114 /*
1115 	Interpret the values in sigmaY as 1 / sigmay or 1 / sqrt (sigmaY) or y/sigmaY.
1116 	If equal weighing than get the sigma form the residual sum of squares between model and data.
1117 */
DataModeler_getDataPointsWeights(DataModeler me,kDataModelerWeights weighData)1118 autoVEC DataModeler_getDataPointsWeights (DataModeler me, kDataModelerWeights weighData) {
1119 	autoVEC weights = zero_VEC (my numberOfDataPoints);
1120 	if (weighData == kDataModelerWeights::EQUAL_WEIGHTS) {
1121 			/*
1122 				We weigh with the inverse of the standard deviation of the data to give
1123 				subsequent Chi squared tests a meaningful interpretation.
1124 			*/
1125 			const double stdev = DataModeler_getDataStandardDeviation (me);
1126 			Melder_require (isdefined (stdev),
1127 				U"Not enough data points to calculate standard deviation.");
1128 			weights.all()  <<=  1.0 / stdev;
1129 	} else {
1130 		for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1131 			if (my data [ipoint] .status == kDataModelerData::INVALID)
1132 				continue; // invalid points get weight 0.
1133 			double sigma = my data [ipoint] .sigmaY;
1134 			double weight = 1.0;
1135 			if (isdefined (sigma) && sigma > 0.0) {
1136 				if (weighData == kDataModelerWeights::ONE_OVER_SIGMA)
1137 					weight = 1.0 / sigma;
1138 				else if (weighData == kDataModelerWeights::RELATIVE_)
1139 					weight = my data [ipoint] .y / sigma;
1140 				else if (weighData == kDataModelerWeights::ONE_OVER_SQRTSIGMA) {
1141 					weight = 1.0 / sqrt (sigma);
1142 				}
1143 			}
1144 			weights [ipoint] = weight;
1145 		}
1146 	}
1147 	return weights;
1148 }
1149 
DataModeler_getZScores(DataModeler me)1150 autoVEC DataModeler_getZScores (DataModeler me) {
1151 	try {
1152 		autoVEC zscores = raw_VEC (my numberOfDataPoints);
1153 		autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1154 		for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1155 			double z = undefined;
1156 			if (my data [ipoint] .status != kDataModelerData::INVALID) {
1157 				const double estimate = my f_evaluate (me, my data [ipoint] .x, my parameters.get());
1158 				z = (my data [ipoint] .y - estimate) * weights [ipoint]; // 1/sigma
1159 			}
1160 			zscores [ipoint] = z;
1161 		}
1162 		return zscores;
1163 	} catch (MelderError) {
1164 		Melder_throw (U"No z-scores calculated.");
1165 	}
1166 }
1167 
DataModeler_getChisqScoresFromZScores(DataModeler me,constVEC zscores,bool substituteAverage)1168 autoVEC DataModeler_getChisqScoresFromZScores (DataModeler me, constVEC zscores, bool substituteAverage) {
1169 	Melder_assert (zscores.size == my numberOfDataPoints);
1170 	autoVEC chisq = raw_VEC (zscores.size);
1171 	integer numberOfDefined = 0;
1172 	double sumchisq = 0.0;
1173 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1174 		chisq [ipoint] = undefined;
1175 		if (isdefined (zscores [ipoint])) {
1176 			chisq [ipoint] = zscores [ipoint] * zscores [ipoint];
1177 			sumchisq += chisq [ipoint];
1178 			numberOfDefined ++;
1179 		}
1180 	}
1181 	if (substituteAverage && numberOfDefined != my numberOfDataPoints && numberOfDefined > 0) {
1182 		for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1183 			if (isundef (chisq [ipoint]))
1184 				chisq [ipoint] = sumchisq / numberOfDefined;
1185 		}
1186 	}
1187 	return chisq;
1188 }
1189 
DataModeler_getChiSquaredQ(DataModeler me,double * out_prob,double * out_df)1190 double DataModeler_getChiSquaredQ (DataModeler me, double *out_prob, double *out_df) {
1191 	double chisq;
1192 	integer numberOfValidZScores;
1193 	autoVEC zscores = DataModeler_getZScores (me);
1194 	chisqFromZScores (zscores.get(), & chisq, & numberOfValidZScores);
1195 	const double ndf = DataModeler_getDegreesOfFreedom (me);
1196 
1197 	if (out_prob)
1198 		*out_prob = NUMchiSquareQ (chisq, ndf);
1199 	if (out_df)
1200 		*out_df = ndf;
1201 	return chisq;
1202 }
1203 
DataModeler_getWeightedMean(DataModeler me)1204 double DataModeler_getWeightedMean (DataModeler me) {
1205 	double ysum = 0.0, wsum = 0.0;
1206 	autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1207 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++)
1208 		if (my data [ipoint] .status != kDataModelerData::INVALID) {
1209 			ysum += my data [ipoint] .y * weights [ipoint];
1210 			wsum += weights [ipoint];
1211 		}
1212 	return ysum / wsum;
1213 }
1214 
DataModeler_getCoefficientOfDetermination(DataModeler me,double * out_ssreg,double * out_sstot)1215 double DataModeler_getCoefficientOfDetermination (DataModeler me, double *out_ssreg, double *out_sstot) {
1216 
1217 	/*
1218 		We cannot use the standard expressions for ss_tot, and ss_reg because our data are weighted by 1 / sigma [i].
1219 		We need the weighted mean and we need to weigh all sums-of-squares accordingly;
1220 		if all sigma [i] terms are equal, the formulas reduce to the standard ones.
1221 		Ref: A. Buse (1973): Goodness of Fit in Generalized Least Squares Estimation, The American Statician, vol 27, 106-108
1222 	 */
1223 
1224 	const double ymean = DataModeler_getWeightedMean (me);
1225 	autoVEC weights = DataModeler_getDataPointsWeights (me, my weighData);
1226 	longdouble sstot = 0.0, ssreg = 0.0;
1227 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1228 		if (my data [ipoint] .status != kDataModelerData::INVALID) {
1229 			double diff = (my data [ipoint] .y - ymean) * weights [ipoint];
1230 			sstot += diff * diff; // total sum of squares
1231 			const double estimate = my f_evaluate (me, my data [ipoint] .x, my parameters.get());
1232 			diff = (estimate - my data [ipoint] .y)  * weights [ipoint];
1233 			ssreg += diff * diff; // regression sum of squares
1234 		}
1235 	}
1236 	const double rSquared = ( sstot > 0.0 ? 1.0 - double (ssreg / sstot) : 1.0 );
1237 
1238 	if (out_ssreg)
1239 		*out_ssreg = double (sstot - ssreg);
1240 	if (out_sstot)
1241 		*out_sstot = double (sstot);
1242 	return rSquared;
1243 }
1244 
DataModeler_drawBasisFunction_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer iterm,bool scale,integer numberOfPoints)1245 void DataModeler_drawBasisFunction_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1246 	integer iterm, bool scale, integer numberOfPoints)
1247 {
1248 	Function_unidirectionalAutowindow (me, & xmin, & xmax);
1249 	autoVEC x = raw_VEC (numberOfPoints);
1250 	autoVEC y = raw_VEC (numberOfPoints);
1251 	autoVEC term = raw_VEC (my numberOfParameters);
1252 	for (integer i = 1; i <= numberOfPoints; i ++) {
1253 		x [i] = xmin + (i - 0.5) * (xmax - xmin) / numberOfPoints;
1254 		my f_evaluateBasisFunctions (me, x [i], term.get());
1255 		y [i] = term [iterm];
1256 		y [i] = ( scale ? y [i] * my parameters [iterm] .value : y [i] );
1257 	}
1258 	if (ymax <= ymin) {
1259 		MelderExtremaWithInit extrema;
1260 		for (integer i = 1; i <= numberOfPoints; i ++)
1261 			extrema.update (y [i]);
1262 		ymax = extrema.max;
1263 		ymin = extrema.min;
1264 
1265 	}
1266 	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1267 	for (integer i = 2; i <= numberOfPoints; i ++)
1268 		Graphics_line (g, x [i-1], y [i-1], x [i], y [i]);
1269 }
1270 
DataModeler_drawingSpecifiers_x(DataModeler me,double * xmin,double * xmax,integer * ixmin,integer * ixmax)1271 integer DataModeler_drawingSpecifiers_x (DataModeler me, double *xmin, double *xmax, integer *ixmin, integer *ixmax) {
1272 	if (*xmax <= *xmin) {
1273 		*xmin = my xmin;
1274 		*xmax = my xmax;
1275 	}
1276 	*ixmin = 2;
1277 	while (my data [*ixmin] .x < *xmin && *ixmin < my numberOfDataPoints)
1278 		(*ixmin) ++;
1279 	(*ixmin) --;
1280 	*ixmax = my numberOfDataPoints - 1;
1281 	while (my data [*ixmax] .x > *xmax && *ixmax > 1)
1282 		(*ixmax) --;
1283 	(*ixmax) ++;
1284 	return *ixmax - *ixmin + 1;
1285 }
1286 
DataModeler_drawOutliersMarked_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,double numberOfSigmas,conststring32 mark,double marksFontSize)1287 void DataModeler_drawOutliersMarked_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, double numberOfSigmas, conststring32 mark, double marksFontSize)
1288 {
1289 	integer ixmin, ixmax;
1290 	if (DataModeler_drawingSpecifiers_x (me, & xmin, & xmax, & ixmin, & ixmax) < 1)
1291 		return;
1292 	autoVEC zscores = DataModeler_getZScores (me);
1293 
1294 	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1295 	Graphics_setFontSize (g, marksFontSize);
1296 	Graphics_setTextAlignment (g, kGraphics_horizontalAlignment::CENTRE, Graphics_HALF);
1297 	const double currentFontSize = Graphics_inqFontSize (g);
1298 	for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1299 		if (my data [ipoint]. status != kDataModelerData::INVALID) {
1300 			const double x = my data [ipoint]. x, y = my data [ipoint]. y;
1301 			if (x >= xmin && x <= xmax && y >= ymin && y <= ymax)
1302 				if (fabs (zscores [ipoint]) > numberOfSigmas)
1303 					Graphics_text (g, x, y, mark);
1304 		}
1305 	}
1306 	Graphics_setFontSize (g, currentFontSize);
1307 }
1308 
DataModeler_draw_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,bool connectPoints,double barWidth_wc,bool drawDots)1309 void DataModeler_draw_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters, bool errorbars, bool connectPoints, double barWidth_wc, bool drawDots)
1310 {
1311 	Function_unidirectionalAutowindow (me, & xmin, & xmax);
1312 
1313 	integer ixmin = 1;
1314 	while (ixmin <= my numberOfDataPoints && my data [ixmin]. x < xmin)
1315 		ixmin ++;
1316 	integer ixmax = my numberOfDataPoints;
1317 	while (ixmax > 0 && my data [ixmax]. x > xmax)
1318 		ixmax --;
1319 	if (ixmin > ixmax)
1320 		return; // nothing to draw
1321 	getAutoNaturalNumberWithinRange (& numberOfParameters, my numberOfParameters);
1322 
1323 	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1324 	double x1, y1, x2, y2;
1325 	bool x1defined = false, x2defined = false;
1326 	for (integer ipoint = ixmin; ipoint <= ixmax; ipoint ++) {
1327 		if (my data [ipoint] .status != kDataModelerData::INVALID) {
1328 			const double x = my  data [ipoint]. x, y = my data [ipoint]. y;
1329 			if (! x1defined) {
1330 				x1 = x;
1331 				y1 = ( estimated ? my f_evaluate (me, x, my parameters.get()) : y );
1332 				x1defined = true;
1333 			} else {
1334 				x2 = x;
1335 				y2 = ( estimated ? my f_evaluate (me, x, my parameters.get()) : y );
1336 				x2defined = true;
1337 			}
1338 			if (x1defined && drawDots) {
1339 				if (y >= ymin && y <= ymax)
1340 					Graphics_speckle (g, x, y);
1341 			}
1342 			if (x2defined) { // if (x1defined && x2defined)
1343 				if (connectPoints) {
1344 					double xo1, yo1, xo2, yo2;
1345 					if (NUMclipLineWithinRectangle (x1, y1, x2, y2,
1346 						xmin, ymin, xmax, ymax, & xo1, & yo1, & xo2, & yo2)) {
1347 						Graphics_line (g, xo1, yo1, xo2, yo2);
1348 					}
1349 				}
1350 				x1 = x;
1351 				y1 = y2;
1352 			}
1353 			const double sigma = my data [ipoint] .sigmaY;
1354 			if (errorbars && isdefined (sigma) && sigma > 0 && x1defined) {
1355 				const double ym = y1;
1356 				double yt = ym + 0.5 * sigma, yb = ym - 0.5 * sigma;
1357 				if (estimated) {
1358 					yt = ( (y - y1) > 0.0 ? y : y1 );
1359 					yb = ( (y - y1) > 0.0 ? y1 : y );
1360 				}
1361 				bool topOutside = yt > ymax, bottomOutside = yb < ymin;
1362 				yt = ( topOutside ? ymax : yt );
1363 				yb = ( bottomOutside ? ymin : yb );
1364 				Graphics_line (g, x1, yb, x1, yt);
1365 				if (barWidth_wc > 0.0 && ! estimated) {
1366 					double xl = x1 - 0.5 * barWidth_wc;
1367 					double xr = xl + barWidth_wc;
1368 					if (! topOutside)
1369 						Graphics_line (g, xl, yt, xr, yt);
1370 					if (! bottomOutside)
1371 						Graphics_line (g, xl, yb, xr, yb);
1372 				}
1373 			}
1374 		}
1375 	}
1376 }
1377 
1378 
DataModeler_drawModel_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer numberOfPoints)1379 void DataModeler_drawModel_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, integer numberOfPoints) {
1380 	Function_bidirectionalAutowindow (me, & xmin, & xmax);
1381 	autoVEC x = raw_VEC (numberOfPoints), y = raw_VEC (numberOfPoints);
1382 	const double dx = (xmax - xmin) / numberOfPoints;
1383 	for (integer ipoint = 1; ipoint <= numberOfPoints; ipoint ++) {
1384 		x [ipoint] = xmin + (ipoint - 1) * dx;
1385 		y [ipoint] = my f_evaluate (me, x [ipoint], my parameters.get());
1386 	}
1387 	if (ymin == 0.0 && ymax == 0.0) {
1388 		ymin = NUMmin (y.get());
1389 		ymax = NUMmax (y.get());
1390 	}
1391 	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
1392 	for (integer ipoint = 2; ipoint <= numberOfPoints; ipoint ++) {
1393 		double segment_x1, segment_y1, segment_x2, segment_y2;
1394 		if (NUMclipLineWithinRectangle (x [ipoint - 1], y [ipoint - 1], x [ipoint], y [ipoint],
1395 			xmin, ymin, xmax, ymax, & segment_x1, & segment_y1, & segment_x2, & segment_y2))
1396 				Graphics_line (g, segment_x1, segment_y1, segment_x2, segment_y2);
1397 	}
1398 }
1399 
DataModeler_drawModel(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,integer numberOfPoints,bool garnish)1400 void DataModeler_drawModel (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, integer numberOfPoints, bool garnish) {
1401 	Function_bidirectionalAutowindow (me, & xmin, & xmax);
1402 	Graphics_setInner (g);
1403 	DataModeler_drawModel_inside (me, g, xmin, xmax, ymin, ymax, numberOfPoints);
1404 	Graphics_unsetInner (g);
1405 	if (garnish) {
1406 		Graphics_drawInnerBox (g);
1407 		Graphics_marksBottom (g, 2, true, true, false);
1408 		Graphics_marksLeft (g, 2, true, true, false);
1409 	}
1410 }
1411 
DataModeler_drawTrack_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters)1412 void DataModeler_drawTrack_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters)
1413 {
1414 	const bool errorbars = false, connectPoints = true;
1415 	const double barWidth_mm = 0;
1416 	DataModeler_draw_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters, errorbars, connectPoints, barWidth_mm, 0);
1417 }
1418 
DataModeler_drawTrack(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool garnish)1419 void DataModeler_drawTrack (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1420 	bool estimated, integer numberOfParameters, bool garnish) {
1421 	if (ymax <= ymin)
1422 		DataModeler_getExtremaY (me, & ymin, & ymax);
1423 	Graphics_setInner (g);
1424 	DataModeler_drawTrack_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters);
1425 	Graphics_unsetInner (g);
1426 	if (garnish) {
1427 		Graphics_drawInnerBox (g);
1428 		Graphics_marksBottom (g, 2, true, true, false);
1429 		Graphics_marksLeft (g, 2, true, true, false);
1430 	}
1431 }
1432 
DataModeler_speckle_inside(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,double barWidth_wc)1433 void DataModeler_speckle_inside (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax, bool estimated, integer numberOfParameters, bool errorbars, double barWidth_wc) {
1434 	bool connectPoints = false;
1435 	DataModeler_draw_inside (me, g, xmin, xmax, ymin, ymax, estimated, numberOfParameters, errorbars, connectPoints, barWidth_wc, 1);
1436 }
1437 
DataModeler_speckle(DataModeler me,Graphics g,double xmin,double xmax,double ymin,double ymax,bool estimated,integer numberOfParameters,bool errorbars,double barWidth_mm,bool garnish)1438 void DataModeler_speckle (DataModeler me, Graphics g, double xmin, double xmax, double ymin, double ymax,
1439 	bool estimated, integer numberOfParameters, bool errorbars, double barWidth_mm, bool garnish)
1440 {
1441 	if (ymax <= ymin)
1442 		DataModeler_getExtremaY (me, & ymin, & ymax);
1443 	Graphics_setInner (g);
1444 	DataModeler_speckle_inside (me, g, xmin, xmax, ymin, ymax,
1445 		estimated, numberOfParameters, errorbars, barWidth_mm);
1446 	Graphics_unsetInner (g);
1447 	if (garnish) {
1448 		Graphics_drawInnerBox (g);
1449 		Graphics_marksBottom (g, 2, true, true, false);
1450 		Graphics_marksLeft (g, 2, true, true, false);
1451 	}
1452 }
1453 
DataModeler_to_Table_zscores(DataModeler me)1454 autoTable DataModeler_to_Table_zscores (DataModeler me) {
1455 	try {
1456 		const conststring32 columnNames [] = { U"x", U"z" };
1457 		autoTable ztable = Table_createWithColumnNames (my numberOfDataPoints, ARRAY_TO_STRVEC (columnNames));
1458 		autoVEC zscores = DataModeler_getZScores (me);
1459 		for (integer ipoint = 1; ipoint <= my numberOfDataPoints; ipoint ++) {
1460 			Table_setNumericValue (ztable.get(), ipoint, 1, my  data [ipoint] .x);
1461 			Table_setNumericValue (ztable.get(), ipoint, 2, zscores [ipoint]);
1462 		}
1463 		return ztable;
1464 	} catch (MelderError) {
1465 		Melder_throw (U"Table not created.");
1466 	}
1467 }
1468 
DataModeler_normalProbabilityPlot(DataModeler me,Graphics g,integer numberOfQuantiles,double numberOfSigmas,double labelSize,conststring32 label,bool garnish)1469 void DataModeler_normalProbabilityPlot (DataModeler me, Graphics g, integer numberOfQuantiles, double numberOfSigmas, double labelSize, conststring32 label, bool garnish) {
1470 	try {
1471 		autoTable thee = DataModeler_to_Table_zscores (me);
1472 		Table_normalProbabilityPlot (thee.get(), g, 2, numberOfQuantiles, numberOfSigmas, labelSize, label, garnish);
1473 	} catch (MelderError) {
1474 		Melder_clearError ();
1475 	}
1476 }
1477 
DataModeler_setBasisFunctions(DataModeler me,kDataModelerFunction type)1478 void DataModeler_setBasisFunctions (DataModeler me, kDataModelerFunction type) {
1479 	if (type == kDataModelerFunction::LINEAR) {
1480 		my f_evaluate = linear_evaluate;
1481 		my f_evaluateBasisFunctions = linear_evaluateBasisFunctions;
1482 		my fit = series_fit;
1483 	} else if (type == kDataModelerFunction::LEGENDRE) {
1484 		my f_evaluate = legendre_evaluate;
1485 		my f_evaluateBasisFunctions = legendre_evaluateBasisFunctions;
1486 		my fit = series_fit;
1487 	} else if (type == kDataModelerFunction::POLYNOME) {
1488 		my f_evaluate = polynomial_evaluate;
1489 		my f_evaluateBasisFunctions = polynome_evaluateBasisFunctions;
1490 		my fit = series_fit;
1491 	} else if (type == kDataModelerFunction::SIGMOID) {
1492 		my f_evaluate = sigmoid_evaluate;
1493 		my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1494 		my fit = sigmoid_fit;
1495 	} else if (type == kDataModelerFunction::SIGMOID_PLUS_CONSTANT) {
1496 		my f_evaluate = sigmoid_plus_constant_evaluate;
1497 		my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1498 		my fit = sigmoid_plus_constant_fit;
1499 	} else if (type == kDataModelerFunction::EXPONENTIAL) {
1500 		my f_evaluate = exponential_evaluate;
1501 		my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1502 		my fit = exponential_fit;
1503 	} else if (type == kDataModelerFunction::EXPONENTIAL_PLUS_CONSTANT) {
1504 		my f_evaluate = exponential_plus_constant_evaluate;
1505 		my f_evaluateBasisFunctions = dummy_evaluateBasisFunctions;
1506 		my fit = exponential_plus_constant_fit;
1507 	}
1508 	my type = type;
1509 }
1510 
DataModeler_init(DataModeler me,double xmin,double xmax,integer numberOfDataPoints,integer numberOfParameters,kDataModelerFunction type)1511 void DataModeler_init (DataModeler me, double xmin, double xmax, integer numberOfDataPoints, integer numberOfParameters, kDataModelerFunction type) {
1512 	my xmin = xmin;
1513 	my xmax = xmax;
1514 	DataModeler_setBasisFunctions (me, type);
1515 	my numberOfDataPoints = numberOfDataPoints;
1516 	my data = newvectorzero<structDataModelerData> (numberOfDataPoints);
1517 	my numberOfParameters = ( (type == kDataModelerFunction::EXPONENTIAL) ? 2 :
1518 		(type == kDataModelerFunction::EXPONENTIAL_PLUS_CONSTANT || type == kDataModelerFunction::SIGMOID) ? 3 :
1519 		(type == kDataModelerFunction::SIGMOID_PLUS_CONSTANT ? 4 : numberOfParameters) );
1520 	Melder_require (my numberOfParameters > 0,
1521 		U"The number of parameters should be greater than zero.");
1522 	my parameters = newvectorzero<structDataModelerParameter> (my numberOfParameters);
1523 	for (integer ipar = 1; ipar <= my numberOfParameters; ipar ++)
1524 		my parameters [ipar]. status = kDataModelerParameterStatus::FREE;
1525 	my parameterNames = Strings_createFixedLength (my numberOfParameters);
1526 	my parameterCovariances = Covariance_create (my numberOfParameters);
1527 	my type = type;
1528 }
1529 
DataModeler_create(double xmin,double xmax,integer numberOfDataPoints,integer numberOfParameters,kDataModelerFunction type)1530 autoDataModeler DataModeler_create (double xmin, double xmax, integer numberOfDataPoints, integer numberOfParameters, kDataModelerFunction type) {
1531 	try {
1532 		autoDataModeler me = Thing_new (DataModeler);
1533 		DataModeler_init (me.get(), xmin, xmax, numberOfDataPoints, numberOfParameters, type);
1534 		return me;
1535 	} catch (MelderError) {
1536 		Melder_throw (U"DataModeler not created.");
1537 	}
1538 }
1539 
DataModeler_createSimple(double xmin,double xmax,integer numberOfDataPoints,constVECVU const & parameterValues,double gaussianNoiseStd,kDataModelerFunction type)1540 autoDataModeler DataModeler_createSimple (double xmin, double xmax,
1541 	integer numberOfDataPoints, constVECVU const& parameterValues, double gaussianNoiseStd, kDataModelerFunction type)
1542 {
1543 	try {
1544 		Melder_require (xmin < xmax,
1545 			U"The domain should be defined properly.");
1546 
1547 		autoDataModeler me = DataModeler_create (xmin, xmax, numberOfDataPoints, parameterValues.size, type);
1548 		for (integer ipar = 1; ipar <= parameterValues.size; ipar ++)
1549 			my parameters [ipar]. value = parameterValues [ipar];   // parameters status ok
1550 		// generate the data that beinteger to the parameter values
1551 		for (integer ipoint = 1; ipoint <= numberOfDataPoints; ipoint ++) {
1552 			my data [ipoint]. x = xmin + (ipoint - 0.5) * (xmax - xmin) / numberOfDataPoints;
1553 			const double modelY = my f_evaluate (me.get(), my data [ipoint]. x, my parameters.get());
1554 			my data [ipoint]. y = modelY + NUMrandomGauss (0.0, gaussianNoiseStd);
1555 			my data [ipoint]. sigmaY = undefined;
1556 		}
1557 		my weighData = kDataModelerWeights::EQUAL_WEIGHTS;
1558 		return me;
1559 	} catch (MelderError) {
1560 		Melder_throw (U"No simple DataModeler created.");
1561 	}
1562 }
1563 
DataModeler_createFromDataModeler(DataModeler thee,integer numberOfParameters,kDataModelerFunction type)1564 autoDataModeler DataModeler_createFromDataModeler (DataModeler thee, integer numberOfParameters, kDataModelerFunction type) {
1565 	try {
1566 		autoDataModeler me = DataModeler_create (thy xmin, thy xmax, thy numberOfDataPoints, numberOfParameters, type);
1567 		for (integer k = 1; k <= my numberOfDataPoints; k ++) {
1568 			my data [k]. status = my data [k]. status;
1569 			my data [k]. x = thy data [k]. x;
1570 			my data [k]. y = thy data [k]. y;
1571 		}
1572 		return me;
1573 	} catch (MelderError) {
1574 		Melder_throw (U"No DataModeler could be created.");
1575 	}
1576 }
1577 
DataModeler_fit(DataModeler me)1578 void DataModeler_fit (DataModeler me) {
1579 	try {
1580 		my fit (me);
1581 	} catch (MelderError) {
1582 		Melder_throw (U"DataModeler no fit.");
1583 	}
1584 }
1585 
DataModeler_setDataWeighing(DataModeler me,kDataModelerWeights weighData)1586 void DataModeler_setDataWeighing (DataModeler me, kDataModelerWeights weighData) {
1587 	if (my weighData != weighData) {
1588 		my weighData = weighData;
1589 		DataModeler_fit (me); // because sigma has changed!
1590 	}
1591 }
1592 
DataModeler_to_Covariance_parameters(DataModeler me)1593 autoCovariance DataModeler_to_Covariance_parameters (DataModeler me) {
1594 	try {
1595 		autoCovariance cov = Data_copy (my parameterCovariances.get());
1596 		return cov;
1597 	} catch (MelderError) {
1598 		Melder_throw (U"Covariance not created.");
1599 	}
1600 }
1601 
Table_to_DataModeler(Table me,double xmin,double xmax,integer xcolumn,integer ycolumn,integer sigmacolumn,integer numberOfParameters,kDataModelerFunction type)1602 autoDataModeler Table_to_DataModeler (Table me, double xmin, double xmax, integer xcolumn, integer ycolumn, integer sigmacolumn, integer numberOfParameters,  kDataModelerFunction type) {
1603 	try {
1604 		Table_checkSpecifiedColumnNumberWithinRange (me, xcolumn);
1605 		Table_checkSpecifiedColumnNumberWithinRange (me, ycolumn);
1606 		const bool hasSigmaColumn = ( sigmacolumn > 0 );
1607 		if (hasSigmaColumn)
1608 			Table_checkSpecifiedColumnNumberWithinRange (me, sigmacolumn);
1609 		const integer numberOfRows = my rows.size;
1610 		integer numberOfData = 0;
1611 		autoVEC x = raw_VEC (numberOfRows);
1612 		autoVEC y = raw_VEC (numberOfRows);
1613 		autoVEC sy = raw_VEC (numberOfRows);
1614 		for (integer i = 1; i <= numberOfRows; i ++) {
1615 			const double val = Table_getNumericValue_Assert (me, i, xcolumn);
1616 			if (isdefined (val)) {
1617 				x [++ numberOfData] = val;
1618 				if (numberOfData > 1) {
1619 					if (val < x [numberOfData - 1]) {
1620 						Melder_throw (U"Data with x-values should be sorted.");
1621 					} else if (val == x [numberOfData - 1]) {
1622 						Melder_throw (U"All x-values should be different.");
1623 					}
1624 				}
1625 				y [numberOfData] = Table_getNumericValue_Assert (me, i, ycolumn);
1626 				sy [numberOfData] = ( hasSigmaColumn ? Table_getNumericValue_Assert (me, i, sigmacolumn) : undefined );
1627 			}
1628 		}
1629 		if (xmax <= xmin)
1630 			NUMextrema (x.part (1, numberOfData), & xmin, & xmax);
1631 		Melder_require (xmin < xmax,
1632 			U"The range of the x-values is too small.");
1633 
1634 		integer numberOfDataPoints = 0, validData = 0;
1635 		for (integer i = 1; i <= numberOfData; i ++) {
1636 			if (x [i] >= xmin && x [i] <= xmax)
1637 				numberOfDataPoints ++;
1638 		}
1639 		/*
1640 			Some models have a fixed number of parameters
1641 		*/
1642 		autoDataModeler thee = DataModeler_create (xmin, xmax, numberOfDataPoints, numberOfParameters, type);
1643 		numberOfDataPoints = 0;
1644 		for (integer i = 1; i <= numberOfData; i ++) {
1645 			if (x [i] >= xmin && x [i] <= xmax) {
1646 				thy data [++ numberOfDataPoints]. x = x [i];
1647 				thy data [numberOfDataPoints]. status = kDataModelerData::INVALID;
1648 				if (isdefined (y [i])) {
1649 					thy data [numberOfDataPoints]. y = y [i];
1650 					thy data [numberOfDataPoints]. sigmaY = sy [i];
1651 					thy data [numberOfDataPoints]. status = kDataModelerData::VALID;
1652 					validData ++;
1653 				}
1654 			}
1655 		}
1656 		thy numberOfDataPoints = numberOfDataPoints;
1657 		Melder_require (validData >= thy numberOfParameters,
1658 			U"The number of parameters should not exceed the number of data points.");
1659 
1660 		DataModeler_setDataWeighing (thee.get(), ( hasSigmaColumn ? kDataModelerWeights::ONE_OVER_SIGMA : kDataModelerWeights::EQUAL_WEIGHTS));
1661 		thy tolerance = 1e-8;
1662 		DataModeler_fit (thee.get());
1663 		return thee;
1664 	} catch (MelderError) {
1665 		Melder_throw (U"Datamodeler not created from Table.");
1666 	}
1667 }
1668 
DataModeler_getResidualSumOfSquares(DataModeler me,integer * out_numberOfValidDataPoints)1669 double DataModeler_getResidualSumOfSquares (DataModeler me, integer *out_numberOfValidDataPoints) {
1670 	integer numberOfValidDataPoints = 0;
1671 	longdouble residualSS = 0.0;
1672 	for (integer i = 1; i <= my numberOfDataPoints; i ++) {
1673 		if (my data [i] .status != kDataModelerData::INVALID) {
1674 			++ numberOfValidDataPoints;
1675 			residualSS += sqr (my data [i]. y - my f_evaluate (me, my data [i]. x, my parameters.get()));
1676 		}
1677 	}
1678 	if (out_numberOfValidDataPoints)
1679 		*out_numberOfValidDataPoints = numberOfValidDataPoints;
1680 	return ( numberOfValidDataPoints > 0 ? (double) residualSS : undefined );
1681 }
1682 
DataModeler_reportChiSquared(DataModeler me)1683 void DataModeler_reportChiSquared (DataModeler me) {
1684 	MelderInfo_writeLine (U"Chi squared test:");
1685 	MelderInfo_writeLine (( my weighData == kDataModelerWeights::EQUAL_WEIGHTS ? U"Standard deviation is estimated from the data." :
1686 		( my weighData == kDataModelerWeights::ONE_OVER_SIGMA ? U"Sigmas are used as estimate for local standard deviations." :
1687 		( my weighData == kDataModelerWeights::RELATIVE_ ? U"1/Q's are used as estimate for local standard deviations." :
1688 		U"Sqrt sigmas are used as estimate for local standard deviations." ) ) ));
1689 	double ndf, probability;
1690 	const double chisq = DataModeler_getChiSquaredQ (me, & probability, & ndf);
1691 	MelderInfo_writeLine (U"Chi squared = ", chisq);
1692 	MelderInfo_writeLine (U"Probability = ", probability);
1693 	MelderInfo_writeLine (U"Number of degrees of freedom = ", ndf);
1694 }
1695 
DataModeler_getDataStandardDeviation(DataModeler me)1696 double DataModeler_getDataStandardDeviation (DataModeler me) {
1697 	try {
1698 		integer numberOfDataPoints = 0;
1699 		autoVEC y = raw_VEC (my numberOfDataPoints);
1700 		for (integer i = 1; i <= my numberOfDataPoints; i ++)
1701 			if (my data [i] .status != kDataModelerData::INVALID)
1702 				y [++ numberOfDataPoints] = my data [i]. y;
1703 		y. resize (numberOfDataPoints);   // fake shrink
1704 		return NUMstdev (y.get());
1705 	} catch (MelderError) {
1706 		Melder_throw (U"Cannot estimate sigma.");
1707 	}
1708 }
1709 
1710 /* End of file DataModeler.cpp */
1711