1 /* NUM2.cpp
2  *
3  * Copyright (C) 1993-2021 David Weenink, Paul Boersma 2017,2020
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20  djmw 20020819 GPL header
21  djmw 20020819 Split nonGLP part off.
22  djmw 20001109 Changed stop criteria in NUMsvdcmp and NUMtqli.
23  djmw 20020819 Split into GPL and nonGPL part.
24  djmw 20021008 Removed SVD_sort.
25  djmw 20030619 Removed calls to NRC svd-routines.
26  djmw 20030623 Removed tqli en tred calls.
27  djmw 20030703 Replaced NUMincompleteBeta with gsl_sf_beta_inc.
28  djmw 20030710 NUMminimize_brent now also returns the minimum function value.
29  djmw 20030731 NUMridders: better approximation for small d.
30 			   NUMinvFisherQ better approximation for p < 0.5
31  djmw 20030813 Added NUMmad and NUMstatistics_huber.
32  djmw 20030825 Replaced gsl_sf_beta_inc with NUMincompleteBeta
33  pb   20030828 Improvements for invFisherQ, ridders, studentP, studentQ,
34  	invStudentQ, invChiSquareQ: modifications for 'undefined' return values.
35  djmw 20030830 Corrected a bug in NUMtriangularfilter_amplitude
36  djmw 20031111 Added NUMdmatrix_transpose, NUMdmatrix_printMatlabForm
37  djmw 20040105 Added NUMmahalanobisDistanceSquared_chi
38  djmw 20040303 Added NUMstring_containsPrintableCharacter.
39  djmw 20050406 NUMprocrutus->NUMprocrustes
40  djmw 20060319 NUMinverse_cholesky: calculation of determinant is made optional
41  djmw 20060517 Added NUMregexp_compile
42  djmw 20060518 Treat NULL string as empty string in strs_replace_regexp/literal. Don't accept empty search in replace_regexStr
43  djmw 20060626 Extra NULL argument for ExecRE.
44  djmw 20070302 NUMclipLineWithinRectangle
45  djmw 20070614 updated to version 1.30 of regular expressions.
46  djmw 20071022 Removed function NUMfvector_moment2.
47  djmw 20071201 Melder_warning<n>
48  djmw 20080110 Corrected some bugs in replace_regexStr
49  djmw 20080122 Bug in replace_regexStr
50  djmw 20080317 +NUMsinc
51  pb   20080410 FisherQ from gsl
52  djmw 20090630 NUMlogNormalP/Q from gsl
53  djmw 20090707 Rename NUMinverse_cholesky to NUMlowerCholeskyInverse,
54  	+NUMcovarianceFromColumnCentredMatrix, +NUMmultivariateKurtosis
55  djmw 20100311 +NUMsolveQuadraticEquation
56  djmw 20100426 replace wcstok by Melder_wcstok
57  djmw 20101209 removed NUMwcscmp is Melder_wcscmp now
58  djmw 20110304 Thing_new
59  djmw 20111110 use autostringvector
60 */
61 
62 #include "SVD.h"
63 #include "Eigen.h"
64 #include "NUMlapack.h"
65 #include "NUM2.h"
66 #include "NUMmachar.h"
67 #include "melder.h"
68 
69 #include "gsl_randist.h"
70 
71 #include "gsl_errno.h"
72 #include "gsl_sf_bessel.h"
73 #include "gsl_sf_gamma.h"
74 #include "gsl_sf_erf.h"
75 #include "gsl_sf_trig.h"
76 #include "gsl_poly.h"
77 #include "gsl_cdf.h"
78 
79 #define SIGN(a,b) ((b < 0) ? -fabs(a) : fabs(a))
80 
81 struct pdf1_struct {
82 	double p;
83 	double df;
84 };
85 struct pdf2_struct {
86 	double p;
87 	double df1;
88 	double df2;
89 };
90 
MATprintMatlabForm(constMATVU const & m,conststring32 name)91 void MATprintMatlabForm (constMATVU const& m, conststring32 name) {
92 	constexpr integer npc = 5;
93 	const ldiv_t n = ldiv (m.ncol, npc);
94 
95 	MelderInfo_open ();
96 	MelderInfo_write (name, U"= [");
97 	for (integer i = 1; i <= m.nrow; i ++) {
98 		for (integer j = 1; j <= n.quot; j ++) {
99 			for (integer k = 1; k <= npc; k ++)
100 				MelderInfo_write (m [i] [(j - 1) * npc + k], (k < npc ? U", " : U""));
101 			MelderInfo_write (j < n.quot ? U",\n" : U"");
102 		}
103 
104 		for (integer k = 1; k <= n.rem; k ++)
105 			MelderInfo_write (m [i] [n.quot * npc + k], (k < n.rem ? U", " : U""));
106 		MelderInfo_write (i < m.nrow ? U";\n" : U"];\n");
107 	}
108 	MelderInfo_close ();
109 }
110 
VECsmoothByMovingAverage_preallocated(VECVU const & out,constVECVU const & in,integer window)111 void VECsmoothByMovingAverage_preallocated (VECVU const& out, constVECVU const& in, integer window) {
112 	Melder_assert (out.size == in.size);
113 	Melder_require (window > 0,
114 		U"The averaging window should be larger than 0.");
115 	for (integer i = 1; i <= out.size; i ++) {
116 		integer jfrom = i - window / 2, jto = i + window / 2;
117 		if (window % 2 == 0)
118 			jto --;
119 		Melder_clipLeft (1_integer, & jfrom);
120 		Melder_clipRight (& jto, out.size);
121 		out [i] = NUMmean (in.part (jfrom, jto));
122 	}
123 }
124 
VECsmooth_gaussian(VECVU const & out,constVECVU const & in,double sigma,NUMfft_Table fftTable)125 void VECsmooth_gaussian (VECVU const& out, constVECVU const& in, double sigma, NUMfft_Table fftTable) {
126 	Melder_require (out.size == in.size,
127 		U"The sizes of the input and output vectors should be equal.");
128 	out  <<=  in;
129 	VECsmooth_gaussian_inplace (out, sigma, fftTable);
130 }
131 
VECsmooth_gaussian_inplace(VECVU const & in_out,double sigma,NUMfft_Table fftTable)132 void VECsmooth_gaussian_inplace (VECVU const& in_out, double sigma, NUMfft_Table fftTable) {
133 	Melder_require (in_out.size <= fftTable -> n,
134 		U"The dimension of the table should at least equal the length of the input vector.");
135 	autoVEC smooth = zero_VEC (fftTable -> n);
136 	smooth.part (1, in_out.size)  <<=  in_out;
137 	NUMfft_forward (fftTable, smooth.get());
138 	/*
139 		Low pass filter with a Gaussian.
140 		The Fourier Transform of h(x)= (exp(-a.x^2) is
141 			H(f) = sqrt (pi/a) exp (-pi^2.f^2/a).
142 		A Gaussian g(x)=sqrt(a/pi)h(x), where a = 1/(2s^2)
143 		The "frequency" region is divided in fftTable -> n steps,
144 		where one "frequency" unit f equals 1 / fftTable -> n.
145 	*/
146 	const double b = 2.0 * NUMpi * sigma * NUMpi * sigma ;
147 	for (integer k = 2; k <= (fftTable -> n + 1) / 2; k ++) {
148 		const double f = (k - 1) /  (double) fftTable -> n;
149 		const double weight = exp (- b * f * f);
150 		smooth [k * 2 - 2] *= weight; // re
151 		smooth [k * 2 - 1] *= weight; // im
152 	}
153 	if (fftTable -> n % 2 == 0)
154 		smooth [fftTable -> n] *= exp (- b * 1 / 2  * 1 / 2);
155 	NUMfft_backward (fftTable, smooth.get());
156 	/*
157 		backwardFT (forwardFT (data)) = n * data;
158 	*/
159 	const double scaleFacor = 1.0 / fftTable -> n;
160 	smooth.part (1, in_out.size)  *=  scaleFacor;
161 	in_out  <<=  smooth.part (1, in_out.size);
162 }
163 
VECsmooth_gaussian_inplace(VECVU const & in_out,double sigma)164 void VECsmooth_gaussian_inplace (VECVU const& in_out, double sigma) {
165 	const integer nfft = Melder_iroundUpToPowerOfTwo (in_out.size);
166 	autoNUMfft_Table fftTable;
167 	NUMfft_Table_init (& fftTable, nfft);
168 	VECsmooth_gaussian_inplace (in_out, sigma, & fftTable);
169 }
170 
MATcovarianceFromColumnCentredMatrix(constMATVU const & x,integer ndf)171 autoMAT MATcovarianceFromColumnCentredMatrix (constMATVU const& x, integer ndf) {
172 	Melder_require (ndf >= 0 && x.nrow - ndf > 0,
173 		U"Invalid arguments.");
174 	autoMAT covar = mtm_MAT (x);
175 	covar.all()  *=  1.0 / (x.nrow - ndf);
176 	return covar;
177 }
178 
MATweighRows(MATVU const & x,constVECVU const & y)179 static void MATweighRows (MATVU const& x, constVECVU const& y) {
180 	Melder_assert (x.nrow == y.size);
181 	for (integer irow = 1; irow <= x.nrow; irow ++)
182 		x.row (irow)  *=  y [irow];
183 }
184 
MATmtm_weighRows(MATVU const & result,constMATVU const & data,constVECVU const & rowWeights)185 void MATmtm_weighRows (MATVU const& result, constMATVU const& data, constVECVU const& rowWeights) {
186 	Melder_assert (data.nrow == rowWeights.size);
187 	Melder_assert (data.ncol == result.ncol);
188 	Melder_assert (result.nrow == result.ncol);
189 	result  <<=  0.0;
190 	if (true) {
191 		autoMAT outer = raw_MAT (result.ncol, result.ncol);
192 		for (integer irow = 1; irow <= data.nrow; irow ++) {
193 			outer_MAT_out (outer.all(), data.row (irow), data.row (irow));
194 			result  +=  outer.all()  *  rowWeights [irow];
195 		}
196 	} else {
197 		autoVEC w = raw_VEC (rowWeights.size);
198 		autoMAT d = copy_MAT (data);
199 		for (integer irow = 1; irow <= w.size; irow ++)
200 			w [irow] = sqrt (rowWeights [irow]);
201 		MATweighRows (d.get(), w.get());
202 		mtm_MAT_out (result, d.get());
203 	}
204 }
205 
MATmultiplyRows_inplace(MATVU const & x,constVECVU const & v)206 inline void MATmultiplyRows_inplace (MATVU const& x, constVECVU const& v) {
207 	Melder_assert (x.nrow == v.size);
208 	for (integer irow = 1; irow <= x.nrow; irow ++)
209 		x.row (irow)  *=  v [irow];  // x [i,j] * v [i]
210 }
MATmultiplyColumns_inplace(MATVU const & x,constVECVU const & v)211 inline void MATmultiplyColumns_inplace (MATVU const& x, constVECVU const& v) {
212 	Melder_assert (x.ncol == v.size);
213 	for (integer icol = 1; icol <= x.nrow; icol ++)
214 		x.column (icol)  *=  v [icol];  // x [i,j] * v [j]
215 }
216 
NUMmultivariateKurtosis(constMATVU const & m,integer method)217 double NUMmultivariateKurtosis (constMATVU const& m, integer method) {
218 	double kurt = undefined;
219 	if (m.nrow < 5)
220 		return kurt;
221 	autoMAT x = copy_MAT (m);
222 	autoVEC mean = columnMeans_VEC (x.get());
223 	x.all()  -=  mean.all();
224 	autoMAT covar = MATcovarianceFromColumnCentredMatrix (x.get(), 1);
225 
226 	if (method == 1) { // Schott (2001, page 33)
227 		kurt = 0.0;
228 		for (integer l = 1; l <= x.ncol; l ++) {
229 			const double sll2 = covar [l] [l] * covar [l] [l];
230 			double zl = 0.0, wl;
231 			for (integer j = 1; j <= x.nrow; j ++) {
232 				const double d = x [j] [l] - mean [l], d2 = d * d;
233 				zl += d2 * d2;
234 			}
235 			zl = (zl - 6.0 * sll2) / (x.nrow - 4);
236 			wl = (sll2 - zl / x.nrow) * x.nrow / (x.nrow - 1);
237 			kurt += zl / wl;
238 		}
239 		kurt = kurt / (3.0 * x.ncol) - 1.0;
240 	}
241 	return kurt;
242 }
243 
244 /*
245 	The following algorithm for monotone regession is on the average
246 	3.5 times faster than
247 	Kruskal's algorithm for monotone regression (and much simpler).
248 	Regression is ascending
249 */
newVECmonotoneRegression(constVEC x)250 autoVEC newVECmonotoneRegression (constVEC x) {
251 	autoVEC fit = copy_VEC (x);
252 	double xt = undefined;   // only to stop gcc from complaining "may be used uninitialized"
253 	for (integer i = 2; i <= x.size; i ++) {
254 		if (fit [i] >= fit [i - 1])
255 			continue;
256 		longdouble sum = fit [i];
257 		integer nt = 1;
258 		for (integer j = 1; j <= i - 1; j ++) {
259 			sum += fit [i - j];
260 			nt ++;
261 			xt = (double) sum / nt;   // i >= 2 -> xt always gets a value
262 			if (j < i - 1 && xt >= fit [i - j - 1])
263 				break;
264 		}
265 		for (integer j = i - nt + 1; j <= i; j ++)
266 			fit [j] = xt;
267 	}
268 	return fit;
269 }
270 
271 #undef TINY
272 
NUMdeterminant_fromSymmetricMatrix(constMAT m)273 double NUMdeterminant_fromSymmetricMatrix (constMAT m) {
274 	Melder_assert (m.nrow == m.ncol);
275 	autoMAT a = copy_MAT (m);
276 	/*
277 		Cholesky decomposition in lower, leave upper intact
278 	*/
279 	integer info;
280 	NUMlapack_dpotf2_ ("U", a.nrow, & a [1] [1], m.nrow, & info);
281 	Melder_require (info == 0,
282 			U"dpotf2_ failed with error = ", info);
283 	longdouble lnd = 0.0;
284 	for (integer i = 1; i <= a.nrow; i ++)
285 		lnd += log (a [i] [i]);
286 	lnd *= 2.0; // because A = L . L'
287 	return (double) lnd;
288 }
289 
newMATlowerCholesky(constMATVU const & a,double * out_lnd)290 autoMAT newMATlowerCholesky (constMATVU const& a, double *out_lnd) {
291 	Melder_assert (a.nrow == a.ncol);
292 	autoMAT result = copy_MAT (a);
293 	MATlowerCholesky_inplace (result.get(), out_lnd);
294 	for (integer irow = 1; irow <= a.nrow - 1; irow ++)
295 		for (integer icol = irow + 1; icol <= a.nrow; icol ++)
296 			result [irow] [icol] = 0.0;
297 	return result;
298 }
299 
300 
newMATlowerCholeslyInverse_fromLowerCholesky(constMAT const & m)301 autoMAT newMATlowerCholeslyInverse_fromLowerCholesky (constMAT const& m) {
302 	Melder_assert (m.nrow == m.ncol);
303 	autoMAT result = copy_MAT (m);
304 	MATlowerCholeskyInverse_inplace (result.get(), nullptr);
305 	return result;
306 }
307 
MATlowerCholesky_inplace(MAT a,double * out_lnDeterminant)308 void MATlowerCholesky_inplace (MAT a, double *out_lnDeterminant) {
309 	Melder_assert (a.nrow == a.ncol);
310 	/*
311 		Cholesky decomposition in lower, leave upper intact
312 		Fortran storage -> use uplo='U' to get 'L'.
313 	*/
314 	integer info;
315 	NUMlapack_dpotf2_ ("U", a.nrow, & a [1] [1], a.nrow, & info);
316 	Melder_require (info == 0,
317 		U"dpotf2 fails with code ", info, U".");
318 	/*
319 		Determinant from diagonal, diagonal is now sqrt (a [i] [i]) !
320 	*/
321 	if (out_lnDeterminant) {
322 		longdouble lnDeterminant = 0.0;
323 		for (integer i = 1; i <= a.nrow; i ++)
324 			lnDeterminant += log (a [i] [i]);
325 		lnDeterminant *= 2.0;   /* because A = L . L' */
326 		*out_lnDeterminant = double (lnDeterminant);
327 	}
328 }
329 
MATlowerCholeskyInverse_inplace(MAT a,double * out_lnd)330 void MATlowerCholeskyInverse_inplace (MAT a, double *out_lnd) {
331 	Melder_assert (a.nrow == a.ncol);
332 	MATlowerCholesky_inplace (a, out_lnd);
333 	/*
334 		Get the inverse
335 	*/
336 	integer info;
337 	(void) NUMlapack_dtrtri_ ("U", "N", a.nrow, & a [1] [1], a.nrow, & info);
338 	Melder_require (info == 0,
339 		U"dtrtri_ fails with code ", info, U".");
340 }
341 
newMATinverse_fromLowerCholeskyInverse(constMAT m)342 autoMAT newMATinverse_fromLowerCholeskyInverse (constMAT m) {
343 	Melder_assert (m.nrow == m.ncol);
344 	autoMAT result = raw_MAT (m.nrow, m.nrow);
345 	for (integer irow = 1; irow <= m.nrow; irow ++) {
346 		for (integer icol = 1; icol <= irow; icol ++) {
347 			longdouble sum = 0.0;
348 			for (integer k = irow; k <= m.nrow; k ++)
349 				sum += m [k] [irow] * m [k] [icol];
350 			result [irow] [icol] = result [icol] [irow] = (double) sum;
351 		}
352 	}
353 	return result;
354 }
355 
NUMmahalanobisDistanceSquared(constMAT lowerInverse,constVEC v,constVEC m)356 double NUMmahalanobisDistanceSquared (constMAT lowerInverse, constVEC v, constVEC m) {
357 	Melder_assert (lowerInverse.ncol == v.size && v.size == m.size);
358 	longdouble chisq = 0.0;
359 	if (lowerInverse.nrow == 1) { // diagonal matrix is one row matrix
360 		for (integer icol = 1; icol <= v.size; icol ++) {
361 			const double t = lowerInverse [1] [icol] * (v [icol] - m [icol]);
362 			chisq += t * t;
363 		}
364 	} else { // square matrix
365 		for (integer irow = v.size; irow > 0; irow --) {
366 			longdouble t = 0.0;
367 			for (integer icol = 1; icol <= irow; icol ++)
368 				t += lowerInverse [irow] [icol] * (v [icol] - m [icol]);
369 			chisq += t * t;
370 		}
371 	}
372 	return (double) chisq;
373 }
374 
375 /*
376 	G. Golub & C. van Loan (1996), Matrix computations, third edition,
377 	The Johns Hopkins University Press Ltd.,
378 	London, (Par. 7.3.1 The Power Method)
379 */
VECdominantEigenvector_inplace(VEC inout_q,constMAT m,double tolerance)380 double VECdominantEigenvector_inplace (VEC inout_q, constMAT m, double tolerance) {
381 	Melder_assert (m.nrow == m.ncol && inout_q.size == m.nrow);
382 
383 	double lambda0, lambda = NUMmul (inout_q, m, inout_q); //  q'. M . q
384 	Melder_require (lambda > 0.0,
385 		U"Zero matrices ??");
386 	autoVEC z = raw_VEC (m.nrow);
387 	integer maximunNumberOfIterations = 30;
388 	for (integer iter = 1; iter <= maximunNumberOfIterations; iter ++) {
389 		lambda0 = lambda;
390 		mul_VEC_out (z.get(), m, inout_q);
391 		VECnormalize_inplace (z.get(), 2.0, 1.0);
392 		lambda = NUMmul (z.get(), m, z.get()); // z'. M . z
393 		if (fabs (lambda - lambda0) < tolerance)
394 			break;
395 	}
396 	inout_q  <<=  z.all();
397 	return lambda;
398 }
399 
MATprojectColumnsOnEigenspace_preallocated(MAT projection,constMATVU const & data,constMATVU const & eigenvectors)400 void MATprojectColumnsOnEigenspace_preallocated (MAT projection, constMATVU const& data, constMATVU const& eigenvectors) {
401 	Melder_assert (data.nrow == eigenvectors.ncol && projection.nrow == eigenvectors.nrow);
402 	for (integer icol = 1; icol <= data.ncol; icol ++)
403 		for (integer irow = 1; irow <= eigenvectors.nrow; irow ++)
404 			projection [irow] [icol] = NUMinner (eigenvectors.row (irow), data.column (icol));
405 	// MATmul_tt (data.get(), eigenvectors.get()) ??
406 }
407 
NUMsolveQuadraticEquation(double a,double b,double c,double * out_x1,double * out_x2)408 integer NUMsolveQuadraticEquation (double a, double b, double c, double *out_x1, double *out_x2) {
409 	double x1, x2;
410 	integer result =  gsl_poly_solve_quadratic (a, b, c, & x1, & x2);
411 	if (out_x1)
412 		*out_x1 = x1;
413 	if (out_x2)
414 		*out_x2 = x2;
415 	return result;
416 }
417 
newVECsolve(constMATVU const & a,constVECVU const & b,double tolerance)418 autoVEC newVECsolve (constMATVU const& a, constVECVU const& b, double tolerance) {
419 	Melder_assert (a.nrow == b.size);
420 	autoSVD me = SVD_createFromGeneralMatrix (a);
421 	SVD_zeroSmallSingularValues (me.get(), tolerance);
422 	autoVEC x = SVD_solve (me.get(), b);
423 	return x;
424 }
425 
newMATsolve(constMATVU const & a,constMATVU const & b,double tolerance)426 autoMAT newMATsolve (constMATVU const& a, constMATVU const& b, double tolerance) {
427 	Melder_assert (a.nrow == b.nrow);
428 	const double tol = ( tolerance > 0.0 ? tolerance : NUMfpp -> eps * a.nrow );
429 
430 	autoSVD me = SVD_createFromGeneralMatrix (a);
431 	autoMAT x = raw_MAT (b.nrow, b.ncol);
432 
433 	SVD_zeroSmallSingularValues (me.get(), tol);
434 
435 	for (integer k = 1; k <= b.ncol; k ++) {
436 		autoVEC xt = SVD_solve (me.get(), b.column (k));
437 		x.column (k)  <<=  xt.all();
438 	}
439 	return x;
440 }
441 
442 /*
443 	Non-negative least squares: Solve Ax = y, i.e
444 	minimize || Ax - y ||^2 for vector x, where all x [i] >= 0.0
445 */
VECsolveNonnegativeLeastSquaresRegression(VECVU const & x,constMATVU const & a,constVECVU const & y,integer maximumNumberOfIterations,double tol,integer infoLevel)446 void VECsolveNonnegativeLeastSquaresRegression (VECVU const& x, constMATVU const& a, constVECVU const& y, integer maximumNumberOfIterations, double tol, integer infoLevel) {
447 	Melder_assert (a.nrow == y.size);
448 	Melder_assert (a.ncol == x.size);
449 	for (integer i = 1; i <= x.size; i ++)
450 		Melder_clipLeft (0.0, & x [i]);
451 	autoVEC r = raw_VEC (y.size);
452 	const double normSquared_y = NUMsum2 (y);
453 	integer iter = 1;
454 	bool farFromConvergence = true;
455 	double difsq, difsq_previous = 1e100; // large enough
456 	while (iter <= maximumNumberOfIterations && farFromConvergence) {
457 		/*
458 			Alternating Least Squares: Fixate all except x [icol]
459 		*/
460 		for (integer icol = 1; icol <= a.ncol; icol ++) {
461 			r.all()  <<=  y;
462 			for (integer jcol = 1; jcol <= a.ncol; jcol ++)
463 				if (jcol != icol)
464 					r.get()  -=  x [jcol] * a.column (jcol);
465 			const double ajr = NUMinner (a.column (icol), r.all());
466 			const double ajaj = NUMsum2 (a.column (icol));
467 			x [icol] = std::max (0.0, ajr / ajaj);
468 		}
469 		/*
470 			Calculate t(x) and compare with previous result.
471 		*/
472 		mul_VEC_out (r.get(), a, x);
473 		r.get()  -=  y;
474 		difsq = NUMsum2 (r.all());
475 		farFromConvergence = ( fabs (difsq - difsq_previous) > std::max (tol * normSquared_y, NUMeps) );
476 		if (infoLevel > 1)
477 			MelderInfo_writeLine (U"Iteration: ", iter, U", error: ", difsq);
478 		difsq_previous = difsq;
479 		iter ++;
480 	}
481 	iter --;
482 	if (infoLevel >= 1)
483 		MelderInfo_writeLine (U"Number of iterations: ", iter, U"; Minimum: ", difsq);
484 	if (infoLevel > 0)
485 			MelderInfo_drain();
486 }
487 
488 struct nr_struct {
489 	double *y, *delta;
490 };
491 
492 /*
493 	f (lambda) = sum (y [i]^2 delta [i] / (delta [i]-lambda)^2, i=1..3)
494 	f'(lambda) = 2 * sum (y [i]^2 delta [i] / (delta [i]-lambda)^3, i=1..3)
495 */
496 
nr_func(double x,double * df,void * data)497 static double nr_func (double x, double *df, void *data) {
498 	const struct nr_struct *me = (struct nr_struct *) data;
499 	longdouble f = 0.0, derivative = 0.0;
500 	for (integer i = 1; i <= 3; i ++) {
501 		const longdouble t1 = (my delta [i] - x);
502 		const longdouble t2 = my y [i] / t1;
503 		const longdouble t3 = t2 * t2 * my delta [i];
504 		f += t3;
505 		derivative += t3 * 2.0 / t1;
506 	}
507 	*df = (double) derivative;
508 	return (double) f;
509 }
510 
NUMsolveConstrainedLSQuadraticRegression(constMAT const & x,constVEC const & d,double * out_alpha,double * out_gamma)511 void NUMsolveConstrainedLSQuadraticRegression (constMAT const& x, constVEC const& d, double *out_alpha, double *out_gamma) {
512 	Melder_assert (x.ncol == x.nrow && d.size == x.ncol && d.size == 3);
513 	Melder_assert (x.nrow >= x.ncol);
514 	integer n3 = 3;
515 	const double eps = 1e-6;
516 	/*
517 		Construct X'.X which will be 3x3
518 	*/
519 	autoMAT xtx = mtm_MAT (x);
520 	/*
521 		Eq (2): get lower Cholesky decomposition from X'.X (3x3)
522 		X'X -> lowerCholesky * lowerCholesky.transpose()
523 		F'^-1 == lowerCholesky
524 		F^-1 == lowerCholesky.transpose()
525 		F = lowerCholeskyInverse.transpose()
526 	*/
527 	autoMAT lowerCholesky = newMATlowerCholesky (xtx.get(), nullptr);
528 	autoMAT lowerCholeskyInverse = newMATlowerCholeslyInverse_fromLowerCholesky (lowerCholesky.get());
529 	/*
530 		Construct G and its eigen-decomposition (eq. (4,5))
531 		Sort eigenvalues (& eigenvectors) ascending.
532 	*/
533 	autoMAT b = zero_MAT (n3, n3);
534 	b [3] [1] = b [1] [3] = -0.5;
535 	b [2] [2] = 1.0;
536 	/*
537 		G = F^-1 B (F')^-1 (eq. 4)
538 	*/
539 	autoMAT g = zero_MAT (n3, n3);
540 	MATmul3_XYXt (g.get(), lowerCholesky.transpose(), b.get());
541 	/*
542 		G's eigen-decomposition with eigenvalues (assumed ascending). (eq. 5)
543 	*/
544 	autoMAT p;
545 	autoVEC delta;
546 	MAT_getEigenSystemFromSymmetricMatrix (g.get(), & p, & delta, true);
547 	/*
548 		Construct y = P'*F'*O'*d = (F*P)'*(O'*d)	(page 632)
549 		We need F*P for later
550 	*/
551 	autoVEC otd = mul_VEC (x.transpose(), d);
552 	autoMAT fp = mul_MAT (p.transpose(), lowerCholeskyInverse.transpose()); // F*P !!
553 	autoVEC y = mul_VEC (fp.transpose(), otd.get()); // P'F' * O'd
554 	/*
555 		The solution (3 cases)
556 	*/
557 	autoVEC w = zero_VEC (n3);
558 	autoVEC chi = raw_VEC (n3);
559 	autoVEC diag = zero_VEC (n3);
560 
561 	if (fabs (y [1]) < eps) {
562 		/*
563 			Case 1: page 633
564 		*/
565 		const double t21 = y [2] / (delta [2] - delta [1]);
566 		const double t31 = y [3] / (delta [3] - delta [1]);
567 		w [1] = sqrt (- delta [1] * (t21 * t21 * delta [2] + t31 * t31 * delta [3]));
568 		w [2] = t21 * delta [2];
569 		w [3] = t31 * delta [3];
570 
571 		mul_VEC_out (chi.get(), fp.get(), w.get()); // chi = F*P*w
572 
573 		if (fabs (chi [3] / chi [1]) < eps) {
574 			w [1] = - w [1];
575 			mul_VEC_out (chi.get(), fp.get(), w.get());
576 		}
577 	} else if (fabs (y [2]) < eps) {
578 		/*
579 			Case 2: page 633
580 		*/
581 		const double t12 = y [1] / (delta [1] - delta [2]);
582 		const double t32 = y [3] / (delta [3] - delta [2]);
583 		double t2;
584 		w [1] = t12 * delta [1];
585 		if ( (delta [2] < delta [3] && (t2 = (t12 * t12 * delta [1] + t32 * t32 * delta [3])) <= 0.0)) {
586 			w [2] = sqrt (- delta [2] * t2); /* +- */
587 			w [3] = t32 * delta [3];
588 			mul_VEC_out (chi.get(), p.get(), w.get());
589 			if (fabs (chi [3] / chi [1]) < eps) {
590 				w [2] = -w [2];
591 				mul_VEC_out (chi.get(), fp.get(), w.get());
592 			}
593 		} else if (fabs (delta [2] - delta [3]) < eps && fabs (y [3]) < eps) {
594 			/*
595 				Choose one value for w [2] from an infinite number
596 			*/
597 			w [2] = w [1];
598 			w [3] = sqrt (- t12 * t12 * delta [1] * delta [2] - w [2] * w [2]);
599 			mul_VEC_out (chi.get(), fp.get(), w.get());
600 		} else {
601 			// should not be here
602 		}
603 	} else {
604 		/*
605 			Case 3: page 634 use Newton-Raphson root finder
606 		*/
607 		struct nr_struct me;
608 		me.y = y.asArgumentToFunctionThatExpectsOneBasedArray();
609 		me.delta = delta.asArgumentToFunctionThatExpectsOneBasedArray();
610 
611 		double lambda = NUMnrbis (nr_func, delta [1] + eps, delta [2] - eps, & me);
612 
613 		for (integer i = 1; i <= 3; i++)
614 			w [i] = y [i] / (1.0 - lambda / delta [i]);
615 		mul_VEC_out (chi.get(), fp.get(), w.get());
616 	}
617 	if (out_alpha)
618 		*out_alpha = chi [1];
619 	if (out_gamma)
620 		*out_gamma = chi [3];
621 }
622 
623 /*
624 	To find the value of b for which the derivative of psi(b) equals zero (Ten Berge, formula 4)
625 	psi (b) = delta*b - b^2 /(4*alpha) - sum (i=1..n, x [i]^2 / (c [i] - b)) (formula 4)
626 	psi'(b) = delta - b / (2 alpha) - sum (x [i]^2 / (c [i] - b)^2, i=1..n). (formula 5)
627 	We use NewTon-Raphson with bisection because we can also use psi'(b)'s derivative:
628 	f (b) = psi'(b)  = delta - b / (2 alpha) - sum (x [i]^2 / (c [i] - b)^2, i=1..n)
629 	f'(b) = psi''(b) = - 1 / (2 alpha) + 2 * sum (x [i]^2 / (c [i] - b)^3, i=1..n)
630 */
631 struct nr2_struct {
632 	integer numberOfTerms;
633 	double delta, alpha;
634 	VEC x, c;
635 };
636 
bolzanoFunction(double b,void * data)637 static double bolzanoFunction (double b, void *data) {
638 	const struct nr2_struct *me = (struct nr2_struct *) data;
639 	longdouble f = my delta - 0.5 * b / my alpha;
640 	for (integer i = 1; i <= my numberOfTerms; i ++) {
641 		const longdouble c = my x [i] / (my c [i] - b);
642 		f -= c * c;
643 	}
644 	return (double) f;
645 }
646 
NUMbolzanoSearch(double (* func)(double x,void * closure),double xmin,double xmax,void * closure)647 static double NUMbolzanoSearch (double (*func) (double x, void *closure), double xmin, double xmax, void *closure) {
648 	Melder_assert (xmin < xmax);
649 	double fleft = (*func)(xmin, closure);
650 	double fright = (*func)(xmax, closure);
651 	if (fleft == 0.0)
652 		return fleft;
653 	if (fright == 0.0)
654 		return fright;
655 	Melder_require (fleft * fright < 0.0,
656 		U"Invalid interval: the function values at the borders should have different signs.");
657 	double xdifference = fabs (xmax - xmin), xdifference_old;
658 	do {
659 		const double xmid = 0.5 * (xmax + xmin);
660 		const double fmid = (*func)(xmid, closure);
661 		if (fmid == 0.0)
662 			return fmid;
663 		if (fleft * fmid < 0.0)
664 			xmax = xmid;
665 		else
666 			xmin = xmid;
667 		xdifference_old = xdifference;
668 		xdifference = fabs (xmax - xmin);
669 	} while (xdifference < xdifference_old);
670 	return 0.5 * (xmax + xmin);
671 }
672 
newVECsolveWeaklyConstrainedLinearRegression(constMAT const & a,constVEC const & y,double alpha,double delta)673 autoVEC newVECsolveWeaklyConstrainedLinearRegression (constMAT const& a, constVEC const& y, double alpha, double delta) {
674 	Melder_assert (a.nrow == y.size);
675 	Melder_require (a.nrow >= a.ncol,
676 		U"The number of rows should not be less than the number of columns.");
677 	Melder_require (alpha >= 0.0,
678 		U"The coefficient of the penalty function should not be negative.");
679 	Melder_require (delta > 0,
680 		U"The solution's vector length should be positive.");
681 
682 	autoVEC c = zero_VEC (a.ncol);
683 	autoSVD svd = SVD_createFromGeneralMatrix (a);
684 
685 	if (alpha == 0.0) {
686 		/*
687 			Solve by standard least squares
688 		*/
689 		autoVEC result = SVD_solve (svd.get(), y);
690 		return result;
691 	}
692 	/*
693 		Step 1: (page 608)
694 		Compute U and C from the eigendecomposition F'F = UCU'
695 		U is from the SVD (F) = VCU' as in the paper (our notation would be: SVD(F)=UDV')
696 		Therefore, the paper's U is our V but since our V is stored as the transpose: U == svd -> v.transpose()!
697 	*/
698 	constMATVU u = svd -> v.transpose();
699 	for (integer i = 1; i <= a.ncol; i++) {
700 		const double ci = svd -> d [i];
701 		c [i] = ci * ci;
702 	}
703 	/*
704 		Evaluate q, the multiplicity of the smallest eigenvalue in C.
705 		The c [i] are ordered, i.e. c [i] >= c [i+1] for all i.
706 	*/
707 	integer q = 1;
708 	const double tol = 1e-6;
709 	while (q < a.ncol && (c [a.ncol - q] - c [a.ncol]) < tol)
710 		q ++;
711 	/*
712 		Step 2: x = U'F'y
713 	*/
714 	autoVEC ftphi = mul_VEC (a.transpose(), y);
715 	autoVEC x = mul_VEC (u.transpose(), ftphi.get());
716 	/*
717 		Step 3:
718 	*/
719 	struct nr2_struct me;
720 	me.numberOfTerms = a.ncol;
721 	me.delta = delta;
722 	me.alpha = alpha;
723 	me.x = x.get();
724 	me.c = c.get();
725 
726 	const double xqsq = NUMsum2 (x.part (a.ncol - q + 1, a.ncol)); // = NUMinner (xq,xq)
727 	integer r = a.ncol;
728 	if (xqsq < tol) { // Case3.b page 608
729 		r = a.ncol - q;
730 		me.numberOfTerms = r;
731 		const double fm = bolzanoFunction (c [a.ncol], & me);
732 		if (fm >= 0.0) { // step 3.b1
733 			/*
734 				Get w0 by overwriting vector x.
735 			*/
736 			x [r + 1] = sqrt (fm);
737 			for (integer j = 1; j <= r; j ++)
738 				x [j] /= c [j] - c [a.ncol]; // eq. 10
739 			if (q > 1)
740 				x.part (r + 2, a.ncol)  <<=  0.0;
741 			autoVEC result = mul_VEC (u, x.all());
742 			return result;
743 		}
744 		// else continue with r = m - q
745 	}
746 	/*
747 		Step 3a & 3b2, determine interval lower bound for root finder
748 	*/
749 	longdouble xCx = 0.0;
750 	for (integer j = 1; j <= r; j ++)
751 		xCx += x [j] * x [j] / c [j];
752 
753 	const double bmin = ( delta > 0.0 ? - xCx / delta : -2.0 * sqrt (alpha * xCx) );
754 	//const double eps = (c [a.ncol] - bmin) * tol;
755 	/*
756 		Find the root of d(psi(b)/db in interval (bmin, c [m])
757 	*/
758 	const double b0 = NUMbolzanoSearch (bolzanoFunction, bmin, c [a.ncol], & me);
759 	/*
760 				Get w0 by overwriting vector x.
761 	*/
762 	for (integer j = 1; j <= r; j ++)
763 		x [j] /= c [j] - b0; // eq. 7
764 	if (q > 1)
765 		x.part (r + 1, a.ncol)  <<=  0.0;
766 	autoVEC result = mul_VEC (u, x.all());
767 	return result;
768 }
769 
770 
NUMprocrustes(constMATVU const & x,constMATVU const & y,autoMAT * out_rotation,autoVEC * out_translation,double * out_scale)771 void NUMprocrustes (constMATVU const& x, constMATVU const& y, autoMAT *out_rotation, autoVEC *out_translation, double *out_scale) {
772 	Melder_assert (x.nrow == y.nrow && x.ncol == y.ncol);
773 	Melder_assert (x.nrow >= x.ncol);
774 	const bool orthogonal = ! out_translation || ! out_scale; // else similarity transform
775 	/*
776 		Reference: Borg & Groenen (1997), Modern multidimensional scaling,
777 		Springer
778 		1. Calculate C = X'JY (page 346) for similarity transform
779 			else X'Y for othogonal (page 341)
780 			JY amounts to centering the columns of Y.
781 	*/
782 	autoMAT yc = copy_MAT (y);
783 	if (! orthogonal)
784 		centreEachColumn_MAT_inout (yc.get());
785 	autoMAT c = mul_MAT (x.transpose(), yc.get()); // X'(JY)
786 	/*
787 		2. Decompose C by SVD: C = UDV' (our SVD has eigenvectors stored row-wise V!)
788 	*/
789 	autoSVD svd = SVD_createFromGeneralMatrix (c.get());
790 	const double trace = NUMsum (svd -> d.all());
791 	Melder_require (trace > 0.0,
792 		U"NUMprocrustes: degenerate configuration(s).");
793 	/*
794 		3. T = VU'
795 	*/
796 	autoMAT rotation = mul_MAT (svd->v.all(), svd->u.transpose());
797 
798 	if (! orthogonal) {
799 		/*
800 			4. Dilation factor s = (tr X'JYT) / (tr Y'JY)
801 			First we need YT.
802 		*/
803 		autoMAT yt = mul_MAT (y, rotation.get());
804 		/*
805 			X'J = (JX)' centering the columns of X
806 		*/
807 		autoMAT xc = copy_MAT (x);
808 		centreEachColumn_MAT_inout (xc.get());
809 		/*
810 			tr X'J YT == tr xc' yt
811 		*/
812 		const double traceXtJYT = NUMtrace2 (xc.transpose(), yt.get()); // trace (Xc'.(YT))
813 		const double traceYtJY = NUMtrace2 (y.transpose(), yc.get()); // trace (Y'.Yc)
814 		const longdouble scale = traceXtJYT / traceYtJY;
815 		/*
816 			5. Translation vector tr = (X - sYT)'1 / x.nrow
817 		*/
818 		if (out_translation) {
819 			autoVEC translation = zero_VEC (x.ncol);
820 			for (integer i = 1; i <= x.ncol; i ++) {
821 				longdouble productsum = 0.0;
822 				for (integer j = 1; j <= x.nrow; j ++)
823 					productsum += x [j] [i] - scale * yt [j] [i];
824 				translation [i] = double (productsum / x.nrow);
825 			}
826 			*out_translation = translation.move();
827 		}
828 		if (out_scale)
829 			*out_scale = (double) scale;
830 	}
831 	if (out_rotation)
832 		*out_rotation = rotation.move();
833 }
834 
NUMmspline(constVEC const & knot,integer order,integer i,double x)835 double NUMmspline (constVEC const & knot, integer order, integer i, double x) {
836 	const integer nSplines = knot.size - order;
837 
838 	Melder_require (nSplines > 0,
839 		U"No splines.");
840 	Melder_require (order > 0 && i <= nSplines,
841 		U"Combination of order and index not correct.");
842 	/*
843 		Find the interval where x is located.
844 		M-splines of order k have degree k-1.
845 		M-splines are zero outside interval [ knot [i], knot [i+order] ).
846 		First and last 'order' knots are equal, i.e.,
847 		knot [1] = ... = knot [order] && knot [knot.size-order+1] = ... knot [knot.size].
848 	*/
849 	integer jj;
850 	for (jj = order; jj <= knot.size - order + 1; jj ++)
851 		if (x < knot [jj])
852 			break;
853 
854 	if (jj < i || (jj > i + order) || jj == order || jj > (knot.size - order + 1))
855 		return 0.0;
856 	/*
857 		Calculate M [i](x|1,t) according to eq.2.
858 	*/
859 	const integer ito = i + order - 1;
860 	autoVEC m = zero_VEC (order);
861 	for (integer j = i; j <= ito; j ++)
862 		if (x >= knot [j] && x < knot [j + 1])
863 			m [j - i + 1] = 1 / (knot [j + 1] - knot [j]);
864 	/*
865 		Iterate to get M [i](x|k,t)
866 	*/
867 	for (integer k = 2; k <= order; k ++) {
868 		for (integer j = i; j <= i + order - k; j ++) {
869 			const double kj = knot [j], kjpk = knot [j + k];
870 			if (kjpk > kj)
871                 m [j - i + 1] = k * ((x - kj) * m [j - i + 1] + (kjpk - x) * m [j - i + 1 + 1]) / ((k - 1) * (kjpk - kj));
872 		}
873 	}
874 	return m [1];
875 }
876 
NUMispline(constVEC const & aknot,integer order,integer i,double x)877 double NUMispline (constVEC const & aknot, integer order, integer i, double x) {
878 	const integer orderp1 = order + 1;
879 	integer j;
880 	for (j = orderp1; j <= aknot.size - order; j ++)
881 		if (x < aknot [j])
882 			break;
883 
884 	if (-- j < i)
885 		return 0.0;
886 
887 	if (j > i + order || (j == aknot.size - order && x == aknot [j]))
888 		return 1.0;
889 
890 	/*
891 		Equation 5 in Ramsay's article contains some errors!!!
892 		1. the interval selection should be 'j-k <= i <= j' instead of
893 			j-k+1 <= i <= j'
894 		2. the summation index m starts at 'i+1' instead of 'i'
895 	*/
896 	double y = 0.0;
897 	for (integer m = i + 1; m <= j; m ++) {
898 		const double r = NUMmspline (aknot, orderp1, m, x);
899 		y += (aknot [m + orderp1] - aknot [m]) * r;
900 	}
901 	y /= orderp1;
902 	return y;
903 }
904 
NUMwilksLambda(constVEC const & lambda,integer from,integer to)905 double NUMwilksLambda (constVEC const& lambda, integer from, integer to) {
906 	Melder_assert (from > 0 && to <= lambda.size && from <= to);
907 	longdouble result = 1.0;
908 	for (integer i = from; i <= to; i ++)
909 		result *= 1.0 / (1.0 + lambda [i]);
910 	return (double) result;
911 }
912 
NUMfactln(integer n)913 double NUMfactln (integer n) {
914 	static double table [101];
915 	if (n < 0)
916 		return undefined;
917 	if (n <= 1)
918 		return 0.0;
919 	return n > 100 ? NUMlnGamma (n + 1.0) : table [n] != 0.0 ? table [n] : (table [n] = NUMlnGamma (n + 1.0));
920 }
921 
NUMnrbis(double (* f)(double x,double * dfx,void * closure),double xmin,double xmax,void * closure)922 double NUMnrbis (double (*f) (double x, double *dfx, void *closure), double xmin, double xmax, void *closure) {
923 	double df, tmp, xh, xl, tol;
924 	const integer itermax = 80;
925 	double fl = (*f) (xmin, & df, closure);
926 	if (fl == 0.0)
927 		return xmin;;
928 
929 	double fh = (*f) (xmax, & df, closure);
930 	if (fh == 0.0)
931 		return xmax;
932 
933 	if ((fl > 0.0 && fh > 0.0) || (fl < 0.0 && fh < 0.0))
934 		return undefined;
935 
936 	if (fl < 0.0) {
937 		xl = xmin;
938 		xh = xmax;
939 	} else {
940 		xh = xmin;
941 		xl = xmax;
942 	}
943 
944 	double dxold = fabs (xmax - xmin);
945 	double dx = dxold;
946 	double root = 0.5 * (xmin + xmax);
947 	double fx = (*f) (root, & df, closure);
948 
949 	for (integer iter = 1; iter <= itermax; iter ++) {
950 		if ((((root - xh) * df - fx) * ((root - xl) * df - fx) >= 0.0) || (fabs (2.0 * fx) > fabs (dxold * df))) {
951 			dxold = dx;
952 			dx = 0.5 * (xh - xl);
953 			root = xl + dx;
954 			if (xl == root)
955 				return root;
956 		} else {
957 			dxold = dx;
958 			dx = fx / df;
959 			tmp = root;
960 			root -= dx;
961 			if (tmp == root)
962 				return root;
963 		}
964 		tol = NUMfpp -> eps	* (root == 0.0 ? 1.0 : fabs (root));
965 		if (fabs (dx) < tol)
966 			return root;
967 
968 		fx = (*f) (root, & df, closure);
969 
970 		if (fx < 0.0)
971 			xl = root;
972 		else
973 			xh = root;
974 	}
975 	Melder_warning (U"NUMnrbis: maximum number of iterations (", itermax, U") exceeded.");
976 	return root;
977 }
978 
NUMridders(double (* f)(double x,void * closure),double x1,double x2,void * closure)979 double NUMridders (double (*f) (double x, void *closure), double x1, double x2, void *closure) {
980 	/* There is still a problem with this implementation:
981 		tol may be zero;
982 	*/
983 	double d, root = undefined, tol;
984 	const integer itermax = 100;
985 
986 	double f1 = f (x1, closure);
987 	if (f1 == 0.0)
988 		return x1;
989 	if (isundef (f1))
990 		return undefined;
991 
992 	double f2 = f (x2, closure);
993 	if (f2 == 0.0)
994 		return x2;
995 	if (isundef (f2))
996 		return undefined;
997 	if ((f1 < 0.0 && f2 < 0.0) || (f1 > 0.0 && f2 > 0.0))
998 		return undefined;
999 
1000 	for (integer iter = 1; iter <= itermax; iter ++) {
1001 		const double x3 = 0.5 * (x1 + x2);
1002 		const double f3 = f (x3, closure);
1003 		if (f3 == 0.0)
1004 			return x3;
1005 		if (isundef (f3))
1006 			return undefined;
1007 
1008 		// New guess: x4 = x3 + (x3 - x1) * sign(f1 - f2) * f3 / sqrt(f3^2 - f1*f2)
1009 
1010 		d = f3 * f3 - f1 * f2;
1011 		if (d < 0.0) {
1012 			Melder_warning (U"d < 0 in ridders (iter = ", iter, U").");
1013 			return undefined;
1014 		}
1015 
1016 		if (d == 0.0) {
1017 			// pb test added because f1 f2 f3 may be 1e-170 or so
1018 			tol = NUMfpp -> eps * (x3 == 0.0 ? 1.0 : fabs (x3));
1019 			if (iter > 1 && fabs (x3 - root) < tol)
1020 				return root;
1021 			root = x3;
1022 
1023 			// Perform bisection.
1024 
1025 			if (f1 > 0.0) {
1026 				// falling curve: f1 > 0, f2 < 0
1027 				if (f3 > 0.0) {
1028 					x1 = x3;
1029 					f1 = f3; // retain invariant: f1 > 0, f2 < 0
1030 				} else {
1031 					// f3 <= 0.0
1032 					x2 = x3;
1033 					f2 = f3; // retain invariant: f1 > 0, f2 < 0
1034 				}
1035 			} else {
1036 				// rising curve: f1 < 0, f2 > 0
1037 				if (f3 > 0.0) {
1038 					x2 = x3;
1039 					f2 = f3; // retain invariant: f1 < 0, f2 > 0
1040 				} else {
1041 					// f3 < 0.0
1042 					x1 = x3;
1043 					f1 = f3; // retain invariant: f1 < 0, f2 > 0
1044 				}
1045 			}
1046 		} else {
1047 			d = sqrt (d);
1048 			if (std::isnan (d)) {
1049 				// pb: square root of denormalized small number fails on some computers
1050 				tol = NUMfpp -> eps * (x3 == 0.0 ? 1.0 : fabs (x3));
1051 				if (iter > 1 && fabs (x3 - root) < tol)
1052 					return root;
1053 				root = x3;
1054 
1055 				// Perform bisection.
1056 
1057 				if (f1 > 0.0) {
1058 					// falling curve: f1 > 0, f2 < 0
1059 					if (f3 > 0.0) {
1060 						x1 = x3;
1061 						f1 = f3; // retain invariant: f1 > 0, f2 < 0
1062 					} else {
1063 						// f3 <= 0.0
1064 						x2 = x3;
1065 						f2 = f3; // retain invariant: f1 > 0, f2 < 0
1066 					}
1067 				} else {
1068 					// rising curve: f1 < 0, f2 > 0
1069 					if (f3 > 0.0) {
1070 						x2 = x3;
1071 						f2 = f3; // retain invariant: f1 < 0, f2 > 0
1072 					} else {
1073 						// f3 < 0.0
1074 						x1 = x3;
1075 						f1 = f3; // retain invariant: f1 < 0, f2 > 0 */
1076 					}
1077 				}
1078 			} else {
1079 				d = (x3 - x1) * f3 / d;
1080 				const double x4 = f1 - f2 < 0 ? x3 - d : x3 + d;
1081 				tol = NUMfpp -> eps * (x4 == 0.0 ? 1.0 : fabs (x4));
1082 				if (iter > 1 && fabs (x4 - root) < tol)
1083 					return root;
1084 				root = x4;
1085 				const double f4 = f (x4, closure);
1086 				if (f4 == 0.0)
1087 					return root;
1088 				if (isundef (f4))
1089 					return undefined;
1090 				if ((f1 > f2) == (d > 0.0)) { // pb: instead of x3 < x4
1091 					if (SIGN (f3, f4) != f3) {
1092 						x1 = x3;
1093 						f1 = f3;
1094 						x2 = x4;
1095 						f2 = f4;
1096 					} else {
1097 						x1 = x4;
1098 						f1 = f4;
1099 					}
1100 				} else {
1101 					if (SIGN (f3, f4) != f3) {
1102 						x1 = x4;
1103 						f1 = f4;
1104 						x2 = x3;
1105 						f2 = f3;
1106 					} else {
1107 						x2 = x4;
1108 						f2 = f4;
1109 					}
1110 				}
1111 			}
1112 		}
1113 		if (fabs (x1 - x2) < tol)
1114 			return root;
1115 	}
1116 
1117 	{
1118 		static integer nwarnings = 0;
1119 		nwarnings ++;
1120 		Melder_warning (U"NUMridders: maximum number of iterations (", itermax, U") exceeded.");
1121 	}
1122 	return root;
1123 }
1124 
NUMlogNormalP(double x,double zeta,double sigma)1125 double NUMlogNormalP (double x, double zeta, double sigma) {
1126 	return gsl_cdf_lognormal_P (x, zeta, sigma);
1127 }
1128 
NUMlogNormalQ(double x,double zeta,double sigma)1129 double NUMlogNormalQ (double x, double zeta, double sigma) {
1130 	return gsl_cdf_lognormal_Q (x, zeta, sigma);
1131 }
1132 
NUMstudentP(double t,double df)1133 double NUMstudentP (double t, double df) {
1134 	if (df < 1.0) return undefined;
1135 	const double ib = NUMincompleteBeta (0.5 * df, 0.5, df / (df + t * t));
1136 	if (isundef (ib))
1137 		return undefined;
1138 	return t < 0.0 ? 0.5 * ib : 1.0 - 0.5 * ib;
1139 }
1140 
NUMstudentQ(double t,double df)1141 double NUMstudentQ (double t, double df) {
1142 	if (df < 1)
1143 		return undefined;
1144 	const double ib = NUMincompleteBeta (0.5 * df, 0.5, df / (df + t * t));
1145 	if (isundef (ib))
1146 		return undefined;
1147 	return t > 0.0 ? 0.5 * ib : 1.0 - 0.5 * ib;
1148 }
1149 
NUMfisherP(double f,double df1,double df2)1150 double NUMfisherP (double f, double df1, double df2) {
1151 	if (f < 0.0 || df1 < 1.0 || df2 < 1.0)
1152 		return undefined;
1153 	const double ib = NUMincompleteBeta (0.5 * df2, 0.5 * df1, df2 / (df2 + f * df1));
1154 	if (isundef (ib))
1155 		return undefined;
1156 	return 1.0 - ib;
1157 }
1158 
NUMfisherQ(double f,double df1,double df2)1159 double NUMfisherQ (double f, double df1, double df2) {
1160 	if (f < 0.0 || df1 < 1.0 || df2 < 1.0)
1161 		return undefined;
1162 	if (Melder_debug == 28) {
1163 		return NUMincompleteBeta (0.5 * df2, 0.5 * df1, df2 / (df2 + f * df1));
1164 	} else {
1165 		const double result = gsl_cdf_fdist_Q (f, df1, df2);
1166 		if (std::isnan (result))
1167 			return undefined;
1168 		return result;
1169 	}
1170 }
1171 
NUMinvGaussQ(double p)1172 double NUMinvGaussQ (double p) {
1173 	double pc = p;
1174 	if (p <= 0.0 || p >= 1.0)
1175 		return undefined;
1176 	if (p > 0.5)
1177 		pc = 1.0 - p;
1178 	double t = sqrt (- 2.0 * log (pc));
1179 	t -= (2.515517 + (0.802853 + 0.010328 * t) * t) /
1180 		 (1.0 + (1.432788 + (0.189269 + 0.001308 * t) * t) * t);
1181 	return p > 0.5 ? -t : t;
1182 }
1183 
studentQ_func(double x,void * voidParams)1184 static double studentQ_func (double x, void *voidParams) {
1185 	const struct pdf1_struct *params = (struct pdf1_struct *) voidParams;
1186 	const double q = NUMstudentQ (x, params -> df);
1187 	return isundef (q) ? undefined : q - params -> p;
1188 }
1189 
NUMinvStudentQ(double p,double df)1190 double NUMinvStudentQ (double p, double df) {
1191 	struct pdf1_struct params;
1192 	const double pc = ( p > 0.5 ? 1.0 - p : p );
1193 
1194 	if (p < 0.0 || p >= 1.0)
1195 		return undefined;
1196 	/*
1197 		Bracket the function f(x) = NUMstudentQ (x, df) - p.
1198 	*/
1199 	double xmax = 1.0;
1200 	for (;;) {
1201 		const double q = NUMstudentQ (xmax, df);
1202 		if (isundef (q))
1203 			return undefined;
1204 		if (q < pc)
1205 			break;
1206 		xmax *= 2.0;
1207 	}
1208 
1209 	const double xmin = ( xmax > 1.0 ? xmax / 2.0 : 0.0 );
1210 	/*
1211 		Find zero of f(x) with Ridders' method.
1212 	*/
1213 	params. df = df;
1214 	params. p = pc;
1215 	const double x = NUMridders (studentQ_func, xmin, xmax, & params);
1216 	if (isundef (x))
1217 		return undefined;
1218 	return p > 0.5 ? -x : x;
1219 }
1220 
chiSquareQ_func(double x,void * voidParams)1221 static double chiSquareQ_func (double x, void *voidParams) {
1222 	const struct pdf1_struct *params = (struct pdf1_struct *) voidParams;
1223 	const double q = NUMchiSquareQ (x, params -> df);
1224 	return isundef (q) ? undefined : q - params -> p;
1225 }
1226 
NUMinvChiSquareQ(double p,double df)1227 double NUMinvChiSquareQ (double p, double df) {
1228 	struct pdf1_struct params;
1229 
1230 	if (p < 0.0 || p >= 1.0)
1231 		return undefined;
1232 	/*
1233 		Bracket the function f(x) = NUMchiSquareQ (x, df) - p.
1234 	*/
1235 	double xmax = 1.0;
1236 	for (;;) {
1237 		const double q = NUMchiSquareQ (xmax, df);
1238 		if (isundef (q))
1239 			return undefined;
1240 		if (q < p)
1241 			break;
1242 		xmax *= 2.0;
1243 	}
1244 	const double xmin = ( xmax > 1.0 ? xmax / 2.0 : 0.0 );
1245 	/*
1246 		Find zero of f(x) with Ridders' method.
1247 	*/
1248 	params. df = df;
1249 	params. p = p;
1250 	return NUMridders (chiSquareQ_func, xmin, xmax, & params);
1251 }
1252 
fisherQ_func(double x,void * voidParams)1253 static double fisherQ_func (double x, void *voidParams) {
1254 	struct pdf2_struct *params = (struct pdf2_struct *) voidParams;
1255 	const double q = NUMfisherQ (x, params -> df1, params -> df2);
1256 	return ( isundef (q) ? undefined : q - params -> p );
1257 }
1258 
NUMinvFisherQ(double p,double df1,double df2)1259 double NUMinvFisherQ (double p, double df1, double df2) {
1260 	if (p <= 0.0 || p > 1.0 || df1 < 1.0 || df2 < 1.0)
1261 		return undefined;
1262 	if (Melder_debug == 29) {
1263 		//if (p == 1.0) return 0.0;
1264 		return gsl_cdf_fdist_Qinv (p, df1, df2);
1265 	} else {
1266 		struct pdf2_struct params;
1267 		if (p == 1.0)
1268 			return 0.0;
1269 		params. p = p;
1270 		params. df1 = df1;
1271 		params. df2 = df2;
1272 		double top = 1000.0;
1273 		for (;;) {
1274 			const double q = NUMfisherQ (top, df1, df2);
1275 			if (isundef (q))
1276 				return undefined;
1277 			if (q < p)
1278 				break;
1279 			if (top > 0.9e300)
1280 				return undefined;
1281 			top *= 1e9;
1282 		}
1283 		return NUMridders (fisherQ_func, 0.0, p > 0.5 ? 2.2 : top, & params);
1284 	}
1285 }
1286 
NUMbeta2(double z,double w)1287 double NUMbeta2 (double z, double w) {
1288 	gsl_sf_result result;
1289 	const integer status = gsl_sf_beta_e (z, w, &result);
1290 	return status == GSL_SUCCESS ? result.val : undefined;
1291 }
1292 
NUMlnBeta(double a,double b)1293 double NUMlnBeta (double a, double b) {
1294 	gsl_sf_result result;
1295 	const integer status = gsl_sf_lnbeta_e (a, b, & result);
1296 	return status == GSL_SUCCESS ? result.val : undefined;
1297 }
1298 
MATscaledResiduals(MAT const & residuals,constMAT const & data,constMAT const & covariance,constVEC const & means)1299 static void MATscaledResiduals (MAT const& residuals, constMAT const& data, constMAT const& covariance, constVEC const& means) {
1300 	try {
1301 		Melder_require (residuals.nrow == data.nrow && residuals.ncol == data.ncol,
1302 			U"The data and the residuals should have the same dimensions.");
1303 		Melder_require (covariance.ncol == means.size && data.ncol == means.size,
1304 			U"The dimensions of the means and the covariance have to conform with the data.");
1305 		autoVEC dif = raw_VEC (data.ncol);
1306 		autoMAT lowerInverse = copy_MAT (covariance);
1307 		MATlowerCholeskyInverse_inplace (lowerInverse.get(), nullptr);
1308 		for (integer irow = 1; irow <= data.nrow; irow ++) {
1309 			dif.all()  <<=  data.row (irow)  -  means;
1310 			residuals.row (irow)  <<=  0.0;
1311 			if (lowerInverse.nrow == 1) { // diagonal matrix is one row matrix
1312 				residuals.row (irow)  <<=  lowerInverse.row (1)  *  dif.get();
1313 			} else {// square matrix
1314 				for (integer icol = 1; icol <= data.ncol; icol ++)
1315 					residuals [irow] [icol] = NUMinner (lowerInverse.row (icol).part (1, icol), dif.part (1, icol));
1316 			}
1317 		}
1318 	} catch (MelderError) {
1319 		Melder_throw (U"MATscaleResiduals: not performed.");
1320 	}
1321 }
1322 
1323 /*************** Hz <--> other freq reps *********************/
1324 
NUMmelToHertz3(double mel)1325 double NUMmelToHertz3 (double mel) {
1326 	if (mel < 0.0)
1327 		return undefined;
1328 	return mel < 1000.0 ? mel : 1000.0 * (exp (mel * log10 (2.0) / 1000.0) - 1.0);
1329 }
1330 
NUMhertzToMel3(double hz)1331 double NUMhertzToMel3 (double hz) {
1332 	if (hz < 0.0)
1333 		return undefined;
1334 	return hz < 1000.0 ? hz : 1000.0 * log10 (1.0 + hz / 1000.0) / log10 (2.0);
1335 }
1336 
NUMmelToHertz2(double mel)1337 double NUMmelToHertz2 (double mel) {
1338 	if (mel < 0.0)
1339 		return undefined;
1340 	return 700.0 * (pow (10.0, mel / 2595.0) - 1.0);
1341 }
1342 
NUMhertzToMel2(double hz)1343 double NUMhertzToMel2 (double hz) {
1344 	if (hz < 0.0)
1345 		return undefined;
1346 	return 2595.0 * log10 (1.0 + hz / 700.0);
1347 }
1348 
NUMhertzToBark_traunmueller(double hz)1349 double NUMhertzToBark_traunmueller (double hz) {
1350 	if (hz < 0.0)
1351 		return undefined;
1352 	return 26.81 * hz / (1960.0 + hz) - 0.53;
1353 }
1354 
NUMbarkToHertz_traunmueller(double bark)1355 double NUMbarkToHertz_traunmueller (double bark) {
1356 	if (bark < 0.0 || bark > 26.28)
1357 		return undefined;
1358 	return 1960.0 * (bark + 0.53) / (26.28 - bark);
1359 }
1360 
1361 
NUMbarkToHertz_zwickerterhardt(double hz)1362 double NUMbarkToHertz_zwickerterhardt (double hz) {
1363 	if (hz < 0.0)
1364 		return undefined;
1365 	return 13.0 * atan (0.00076 * hz) + 3.5 * atan (hz / 7500.0);
1366 }
1367 
NUMbladonlindblomfilter_amplitude(double zc,double z)1368 double NUMbladonlindblomfilter_amplitude (double zc, double z) {
1369 	const double dz = zc - z + 0.474;
1370 	return pow (10.0, 1.581 + 0.75 * dz - 1.75 * sqrt (1.0 + dz * dz));
1371 }
1372 
NUMsekeyhansonfilter_amplitude(double zc,double z)1373 double NUMsekeyhansonfilter_amplitude (double zc, double z) {
1374 	const double dz = zc - z - 0.215;
1375 	return pow (10.0, 0.7 - 0.75 * dz - 1.75 * sqrt (0.196 + dz * dz));
1376 }
1377 
NUMtriangularfilter_amplitude(double fl,double fc,double fh,double f)1378 double NUMtriangularfilter_amplitude (double fl, double fc, double fh, double f) {
1379 	double a = 0.0;
1380 	if (f > fl && f < fh) {
1381 		a = f < fc ? (f - fl) / (fc - fl) : (fh - f) / (fh - fc);
1382 
1383 		/* Normalize such that area under the filter is always 1. ???
1384 
1385 		a /= 2 * (fh - fl);*/
1386 	}
1387 	return a;
1388 }
1389 
NUMformantfilter_amplitude(double fc,double bw,double f)1390 double NUMformantfilter_amplitude (double fc, double bw, double f) {
1391 	const double dq = (fc * fc - f * f) / (bw * f);
1392 	return 1.0 / (dq * dq + 1.0);
1393 }
1394 
1395 /* Childers (1978), Modern Spectrum analysis, IEEE Press, 252-255) */
1396 /* work [1..n+n+n];
1397 b1 = & work [1];
1398 b2 = & work [n+1];
1399 aa = & work [n+n+1];
1400 for (i=1; i<=n+n+n; i ++) work [i]=0;
1401 */
VECburg(VEC const & a,constVEC const & x)1402 double VECburg (VEC const& a, constVEC const& x) {
1403 	const integer n = x.size, m = a.size;
1404 	for (integer j = 1; j <= m; j ++)
1405 		a [j] = 0.0;
1406 	if (n <= 2) {
1407 		a [1] = -1.0;
1408 		return ( n == 2 ? 0.5 * (x [1] * x [1] + x [2] * x [2]) : x [1] * x [1] );
1409 	}
1410 
1411 	autoVEC b1 = zero_VEC (n), b2 = zero_VEC (n), aa = zero_VEC (m);
1412 
1413 	// (3)
1414 
1415 	longdouble p = 0.0;
1416 	for (integer j = 1; j <= n; j ++)
1417 		p += x [j] * x [j];
1418 
1419 	longdouble xms = p / n;
1420 	if (xms <= 0.0)
1421 		return xms;	// warning empty
1422 
1423 	// (9)
1424 
1425 	b1 [1] = x [1];
1426 	if (n < 2)
1427 		return (double) xms;
1428 	b2 [n - 1] = x [n];
1429 	for (integer j = 2; j <= n - 1; j ++)
1430 		b1 [j] = b2 [j - 1] = x [j];
1431 
1432 	for (integer i = 1; i <= m; i ++) {
1433 		// (7)
1434 
1435 		longdouble num = 0.0, denum = 0.0;
1436 		for (integer j = 1; j <= n - i; j ++) {
1437 			num += b1 [j] * b2 [j];
1438 			denum += b1 [j] * b1 [j] + b2 [j] * b2 [j];
1439 		}
1440 
1441 		if (denum <= 0.0)
1442 			return 0.0;	// warning ill-conditioned
1443 
1444 		a [i] = 2.0 * num / denum;
1445 
1446 		// (10)
1447 
1448 		xms *= 1.0 - a [i] * a [i];
1449 
1450 		// (5)
1451 
1452 		for (integer j = 1; j <= i - 1; j ++)
1453 			a [j] = aa [j] - a [i] * aa [i - j];
1454 
1455 		if (i < m) {
1456 
1457 			// (8) Watch out: i -> i+1
1458 
1459 			for (integer j = 1; j <= i; j ++)
1460 				aa [j] = a [j];
1461 			for (integer j = 1; j <= n - i - 1; j ++) {
1462 				b1 [j] -= aa [i] * b2 [j];
1463 				b2 [j] = b2 [j + 1] - aa [i] * b1 [j + 1];
1464 			}
1465 		}
1466 	}
1467 	return xms;
1468 }
1469 
newVECburg(constVEC const & x,integer numberOfPredictionCoefficients,double * out_xms)1470 autoVEC newVECburg (constVEC const& x, integer numberOfPredictionCoefficients, double *out_xms) {
1471 	autoVEC a = raw_VEC (numberOfPredictionCoefficients);
1472 	const double xms = VECburg (a.get(), x);
1473 	if (out_xms)
1474 		*out_xms = xms;
1475 	return a;
1476 }
1477 
VECfilterInverse_inplace(VEC const & s,constVEC const & filter,VEC const & filterMemory)1478 void VECfilterInverse_inplace (VEC const& s, constVEC const& filter, VEC const& filterMemory) {
1479 	Melder_assert (filterMemory.size >= filter.size);
1480 	filterMemory  <<=  0.0;
1481 	for (integer i = 1; i <= s.size; i ++) {
1482 		const double y0 = s [i];
1483 		for (integer j = 1; j <= filter.size; j ++)
1484 			s [i] += filter [j] * filterMemory [j];
1485 		for (integer j = filter.size; j > 1; j --)
1486 			filterMemory [j] = filterMemory [j - 1];
1487 		filterMemory [1] = y0;
1488 	}
1489 }
1490 
NUMdmatrix_to_dBs(MAT const & m,double ref,double factor,double floor)1491 void NUMdmatrix_to_dBs (MAT const& m, double ref, double factor, double floor) {
1492 	const double factor10 = factor * 10.0;
1493 	MelderExtremaWithInit extrema;
1494 
1495 	Melder_assert (ref > 0 && factor > 0);
1496 
1497 	for (integer irow = 1; irow <= m.nrow; irow ++)
1498 		for (integer icol = 1; icol <= m.ncol; icol ++)
1499 			extrema.update (m [irow] [icol]);
1500 
1501 
1502 	Melder_require (extrema.min >= 0.0 && extrema.max >= 0.0,
1503 		U"All matrix elements should be positive.");
1504 
1505 	const double ref_db = factor10 * log10 (ref);
1506 
1507 	for (integer irow = 1; irow <= m.nrow; irow ++) {
1508 		for (integer icol = 1; icol <= m.ncol; icol ++) {
1509 			double mij = floor;
1510 			if (m [irow] [icol] > 0.0) {
1511 				mij = factor10 * log10 (m [irow] [icol]) - ref_db;
1512 				if (mij < floor)
1513 					mij = floor;
1514 			}
1515 			m [irow] [icol] = mij;
1516 		}
1517 	}
1518 }
1519 
MATcosinesTable(integer n)1520 autoMAT MATcosinesTable (integer n) {
1521 	autoMAT result = raw_MAT (n, n);
1522 	for (integer irow = 1; irow <= n; irow ++)
1523 		for (integer icol = 1; icol <= n; icol ++)
1524 			result [irow] [icol] = cos (NUMpi * (irow - 1) * (icol - 0.5) / n);
1525 	return result;
1526 }
1527 
VECcosineTransform_preallocated(VEC const & target,constVEC const & x,constMAT const & cosinesTable)1528 void VECcosineTransform_preallocated (VEC const& target, constVEC const& x, constMAT const& cosinesTable) {
1529 	Melder_assert (cosinesTable.nrow == cosinesTable.ncol);
1530 	Melder_assert (x.size == target.size && x.size == cosinesTable.nrow);
1531 	for (integer k = 1; k <= target.size; k ++)
1532 		target [k] = NUMinner (x, cosinesTable.row (k));
1533 }
1534 
VECinverseCosineTransform_preallocated(VEC const & target,constVEC const & x,constMAT const & cosinesTable)1535 void VECinverseCosineTransform_preallocated (VEC const& target, constVEC const& x, constMAT const& cosinesTable) {
1536 	Melder_assert (cosinesTable.nrow == cosinesTable.ncol);
1537 	Melder_assert (x.size == target.size && x.size == cosinesTable.nrow);
1538 	for (integer j = 1; j <= target.size; j ++) {
1539 		target [j] = 0.5 * x [1] * cosinesTable [1] [j];
1540 		for (integer k = 2; k <= target.size; k ++)
1541 			target [j] += x [k] * cosinesTable [k] [j];
1542 		target [j] *= 2.0 / target.size;
1543 	}
1544 }
1545 
NUMcubicSplineInterpolation_getSecondDerivatives(VEC const & out_y,constVEC const & x,constVEC const & y,double yp1,double ypn)1546 void NUMcubicSplineInterpolation_getSecondDerivatives (VEC const& out_y, constVEC const& x, constVEC const& y, double yp1, double ypn) {
1547 	Melder_assert (x.size == y.size && out_y.size == y.size);
1548 
1549 	autoVEC u = raw_VEC (x.size - 1);
1550 
1551 	if (yp1 > 0.99e30)
1552 		out_y [1] = u [1] = 0.0;
1553 	else {
1554 		out_y [1] = -0.5;
1555 		u [1] = (3.0 / (x [2] - x [1])) * ( (y [2] - y [1]) / (x [2] - x [1]) - yp1);
1556 	}
1557 
1558 	for (integer i = 2; i <= x.size - 1; i ++) {
1559 		const double sig = (x [i] - x [i - 1]) / (x [i + 1] - x [i - 1]);
1560 		const double p = sig * out_y [i - 1] + 2.0;
1561 		out_y [i] = (sig - 1.0) / p;
1562 		u [i] = (y [i + 1] - y [i]) / (x [i + 1] - x [i]) - (y [i] - y [i - 1]) / (x [i] - x [i - 1]);
1563 		u [i] = (6.0 * u [i] / (x [i + 1] - x [i - 1]) - sig * u [i - 1]) / p;
1564 	}
1565 
1566 	double qn, un;
1567 	if (ypn > 0.99e30)
1568 		qn = un = 0.0;
1569 	else {
1570 		qn = 0.5;
1571 		un = (3.0 / (x [x.size] - x [x.size - 1])) * (ypn - (y [x.size] - y [x.size - 1]) / (x [x.size] - x [x.size - 1]));
1572 	}
1573 
1574 	out_y [x.size] = (un - qn * u [x.size - 1]) / (qn * out_y [x.size - 1] + 1.0);
1575 	for (integer k = x.size - 1; k >= 1; k--)
1576 		out_y [k] = out_y [k] * out_y [k + 1] + u [k];
1577 }
1578 
NUMcubicSplineInterpolation(constVEC const & x,constVEC const & y,constVEC const & y2,double xin)1579 double NUMcubicSplineInterpolation (constVEC const& x, constVEC const& y, constVEC const& y2, double xin) {
1580 	Melder_assert (x.size == y.size && x.size == y2.size);
1581 	integer klo = 1, khi = x.size;
1582 	while (khi - klo > 1) {
1583 		integer k = (khi + klo) >> 1;
1584 		if (x [k] > xin)
1585 			khi = k;
1586 		else
1587 			klo = k;
1588 	}
1589 	const double h = x [khi] - x [klo];
1590 	Melder_require (h != 0.0,
1591 		U"NUMcubicSplineInterpolation: bad input value.");
1592 
1593 	const double a = (x [khi] - xin) / h;
1594 	const double b = (xin - x [klo]) / h;
1595 	const double yint = a * y [klo] + b * y [khi] + ((a * a * a - a) * y2 [klo] + (b * b * b - b) * y2 [khi]) * (h * h) / 6.0;
1596 	return yint;
1597 }
1598 
NUMsinc(const double x)1599 double NUMsinc (const double x) {
1600 	struct gsl_sf_result_struct result;
1601 	const integer status = gsl_sf_sinc_e (x / NUMpi, &result);
1602 	return status == GSL_SUCCESS ? result. val : undefined;
1603 }
1604 
NUMsincpi(const double x)1605 double NUMsincpi (const double x) {
1606 	struct gsl_sf_result_struct result;
1607 	const integer status = gsl_sf_sinc_e (x, &result);
1608 	return status == GSL_SUCCESS ? result. val : undefined;
1609 }
1610 
1611 /* Does the line segment from (x1,y1) to (x2,y2) intersect with the line segment from (x3,y3) to (x4,y4)? */
NUMdoLineSegmentsIntersect(double x1,double y1,double x2,double y2,double x3,double y3,double x4,double y4)1612 bool NUMdoLineSegmentsIntersect (double x1, double y1, double x2, double y2, double x3, double y3, double x4, double y4) {
1613 	const integer o11 = NUMgetOrientationOfPoints (x1, y1, x2, y2, x3, y3);
1614 	const integer o12 = NUMgetOrientationOfPoints (x1, y1, x2, y2, x4, y4);
1615 	const integer o21 = NUMgetOrientationOfPoints (x3, y3, x4, y4, x1, y1);
1616 	const integer o22 = NUMgetOrientationOfPoints (x3, y3, x4, y4, x2, y2);
1617 	return ((o11 * o12 < 0) && (o21 * o22 < 0)) || (o11 *o12 *o21 *o22 == 0);
1618 }
1619 
NUMgetOrientationOfPoints(double x1,double y1,double x2,double y2,double x3,double y3)1620 integer NUMgetOrientationOfPoints (double x1, double y1, double x2, double y2, double x3, double y3) {
1621 	integer orientation;
1622 	const longdouble dx2 = x2 - x1, dy2 = y2 - y1;
1623 	const longdouble dx3 = x3 - x1, dy3 = y3 - y1;
1624 	if (dx2 * dy3 > dy2 * dx3)
1625 		orientation = -1;
1626 	else if (dx2 * dy3 < dy2 * dx3)
1627 		orientation = 1;
1628 	else {
1629 		if ((dx2 * dx3 < 0) || (dy2 * dy3 < 0))
1630 			orientation = -1;
1631 		else if ((dx2 * dx2 + dy2 * dy2) >= (dx3 * dx3 + dy3 * dy3))
1632 			orientation = 0;
1633 		else
1634 			orientation = 1;
1635 	}
1636 	return orientation;
1637 }
1638 
NUMgetIntersectionsWithRectangle(double x1,double y1,double x2,double y2,double xmin,double ymin,double xmax,double ymax,double * xi,double * yi)1639 integer NUMgetIntersectionsWithRectangle (double x1, double y1, double x2, double y2, double xmin, double ymin, double xmax, double ymax, double *xi, double *yi) {
1640 	double x [6], y [6];
1641 	integer ni = 0;
1642 
1643 	x [1] = x [4] = x [5] = xmin;
1644 	x [2] = x [3] = xmax;
1645 	y [1] = y [2] = y [5] = ymin;
1646 	y [3] = y [4] = ymax;
1647 	/*
1648 		Calculate intersection of line segment through p1=(x1,y1) to p2(x2,y2) with line segment
1649 		through p3=(x3,y3) to p4=(x4,y4).
1650 		Parametrisation of the lines:
1651 		l1 = p1 + s (p2 - p1), s in (-inf,+inf)
1652 		l2 = p3 + t (p4 - p3), t in (-inf,+inf).
1653 		When s and t are in [0,1] we have line segments between the points.
1654 		At the intersection l1 == l2. We get for the x and y coordinates:
1655 			x1 + s (x2 - x1) = x3 + t (x4 - x3).............(1)
1656 			y1 + s (y2 - y1) = y3 + t (y4 - y3).............(2)
1657 		Multiply (1)*(y2 - y1) and (2)*(x2 - x1):
1658 			x1 (y2 - y1) + s (x2 - x1)(y2 - y1) = x3 (y2 - y1) + t (x4 - x3)(y2 - y1).......(3)
1659 			y1 (x2 - x1) + s (y2 - y1)(x2 - x1) = y3 (x2 - x1) + t (y4 - y3)(x2 - x1).......(4)
1660 		(3)-(4) with y21 = y2 -y1, x21 = x2 - x1, x43 = x4 - x3, ...
1661 			x1 y21 - y1 x21 = x3 y21 - y3 x21 +t (x43 y21 - y43 x21)
1662 		Combining:
1663 			y31 x21 - x31 y21 = t (x43 y21 - y43 x21)
1664 		Therefore at the intersection we have:
1665 
1666 			t = (y31 x21 - x31 y21) / (x43 y21 - y43 x21)
1667 
1668 		If (x43 y21 - y43 x21) == 0
1669 			There is no intersection.
1670 		If (t < 0 || t >= 1)
1671 			No intersection in the segment l2
1672 			To count intersections in a corner only once we have t < 0 instead of t <= 0!
1673 	*/
1674 
1675 	for (integer i = 1; i <= 4; i ++) {
1676 		const double denom = (x [i + 1] - x [i]) * (y2 - y1) - (y [i + 1] - y [i]) * (x2 - x1);
1677 		if (denom == 0.0)
1678 			continue;
1679 
1680 		// We have an intersection.
1681 
1682 		const double t = ((y [i] - y1) * (x2 - x1) - (x [i] - x1) * (y2 - y1)) / denom;
1683 		if (t < 0.0 || t >= 1.0)
1684 			continue;
1685 
1686 		// Intersection is within rectangle side.
1687 
1688 		const double x3 = x [i] + t * (x [i + 1] - x [i]);
1689 		const double y3 = y [i] + t * (y [i + 1] - y [i]);
1690 
1691 		// s must also be valid
1692 
1693 		const double s = ( x1 != x2 ? (x3 - x1) / (x2 - x1) : (y3 - y1) / (y2 - y1) );
1694 		if (s < 0.0 || s >= 1.0)
1695 			continue;
1696 
1697 		ni ++;
1698 		Melder_require (ni <= 3,
1699 			U"Too many intersections.");
1700 
1701 		xi [ni] = x3;
1702 		yi [ni] = y3;
1703 	}
1704 	return ni;
1705 }
1706 
NUMclipLineWithinRectangle(double line_x1,double line_y1,double line_x2,double line_y2,double rect_x1,double rect_y1,double rect_x2,double rect_y2,double * out_line_x1,double * out_line_y1,double * out_line_x2,double * out_line_y2)1707 bool NUMclipLineWithinRectangle (double line_x1, double line_y1, double line_x2, double line_y2, double rect_x1, double rect_y1, double rect_x2, double rect_y2, double *out_line_x1, double *out_line_y1, double *out_line_x2, double *out_line_y2) {
1708 	integer numberOfRectangleCrossings = 0;
1709 	double x, y, a, b;
1710 	double crossing_x [5], crossing_y [5], xmin, xmax, ymin, ymax;
1711 	double segment_x1 = line_x1, segment_y1 = line_y1, segment_x2 = line_x2, segment_y2 = line_y2;
1712 	/*
1713 		This test first because we expect the majority of the tested segments to be within the rectangle
1714 	*/
1715 	if (line_x1 >= rect_x1 && line_x1 <= rect_x2 && line_y1 >= rect_y1 && line_y1 <= rect_y2 &&
1716 			line_x2 >= rect_x1 && line_x2 <= rect_x2 && line_y2 >= rect_y1 && line_y2 <= rect_y2)
1717 		goto end;
1718 	/*
1719 		All lines that are completely outside the rectangle
1720 	*/
1721 	if ( (line_x1 <= rect_x1 && line_x2 <= rect_x1) || (line_x1 >= rect_x2 && line_x2 >= rect_x2) ||
1722 			(line_y1 <= rect_y1 && line_y2 <= rect_y1) || (line_y1 >= rect_y2 && line_y2 >= rect_y2))
1723 		return false;
1724 	/*
1725 		At least line spans (part of) the rectangle.
1726 		Get extremes in x and y of the line for easy testing further on.
1727 	*/
1728 	bool xswap, yswap;
1729 	if (line_x1 < line_x2) {
1730 		xmin = line_x1;
1731 		xmax = line_x2;
1732 		xswap = false;
1733 	} else {
1734 		xmin = line_x2;
1735 		xmax = line_x1;
1736 		xswap = true;
1737 	}
1738 	if (line_y1 < line_y2) {
1739 		ymin = line_y1;
1740 		ymax = line_y2;
1741 		yswap = false;
1742 	} else {
1743 		ymin = line_y2;
1744 		ymax = line_y1;
1745 		yswap = true;
1746 	}
1747 
1748 	if (line_y1 == line_y2) {
1749 		if (xmin < rect_x1)
1750 			segment_x1 = rect_x1;
1751 		if (xmax > rect_x2)
1752 			segment_x2 = rect_x2;
1753 		if (xswap)
1754 			std::swap (segment_x1, segment_x2);
1755 		goto end;
1756 	}
1757 	if (line_x1 == line_x2) {
1758 		if (ymin < rect_y1)
1759 			segment_y1 = rect_y1;
1760 		if (ymax > rect_y2)
1761 			segment_y2 = rect_y2;
1762 		if (yswap)
1763 			std::swap (segment_y1, segment_y2);
1764 		goto end;
1765 	}
1766 	/*
1767 		Now we know that the line from (line_x1,line_y1) to (line_x2,line_y2) is neither horizontal nor vertical.
1768 		We can parametrize it as y = ax + b
1769 	*/
1770 	a = (line_y1 - line_y2) / (line_x1 - line_x2);
1771 	b = line_y1 - a * line_x1;
1772 
1773 	/*
1774 		To determine the crossings we have to avoid counting the crossings in a corner twice.
1775 		Therefore we test the corners inclusive (..<=..<=..) on the vertical borders of the rectangle
1776 		and exclusive (..<..<) at the horizontal borders.
1777 	*/
1778 
1779 	y = a * rect_x1 + b; // Crossing at y with left border: x = rect_x1
1780 
1781 	if (y >= rect_y1 && y <= rect_y2 && xmin < rect_x1) { // Within vertical range?
1782 		crossing_x [++ numberOfRectangleCrossings] = rect_x1;
1783 		crossing_y [numberOfRectangleCrossings] = y;
1784 		crossing_x [2] = xmax;
1785 		crossing_y [2] = line_x1 > line_x2 ? line_y1 : line_y2;
1786 	}
1787 
1788 	x = (rect_y2 - b) / a; // Crossing at x with top border: y = rect_y2
1789 
1790 	if (x > rect_x1 && x < rect_x2 && ymax > rect_y2) { // Within horizontal range?
1791 		crossing_x [++ numberOfRectangleCrossings] = x;
1792 		crossing_y [numberOfRectangleCrossings] = rect_y2;
1793 		if (numberOfRectangleCrossings == 1) {
1794 			crossing_y [2] = ymin;
1795 			crossing_x [2] = line_y1 < line_y2 ? line_x1 : line_x2;
1796 		}
1797 	}
1798 
1799 	y = a * rect_x2 + b; // Crossing at y with right border: x = rect_x2
1800 
1801 	if (y >= rect_y1 && y <= rect_y2 && xmax > rect_x2) { // Within vertical range?
1802 		crossing_x [++ numberOfRectangleCrossings] = rect_x2;
1803 		crossing_y [numberOfRectangleCrossings] = y;
1804 		if (numberOfRectangleCrossings == 1) {
1805 			crossing_x [2] = xmin;
1806 			crossing_y [2] = line_x1 < line_x2 ? line_y1 : line_y2;
1807 		}
1808 	}
1809 
1810 	x = (rect_y1 - b) / a; // Crossing at x with bottom border: y = rect_y1
1811 
1812 	if (x > rect_x1 && x < rect_x2 && ymin < rect_y1) {
1813 		crossing_x [++ numberOfRectangleCrossings] = x;
1814 		crossing_y [numberOfRectangleCrossings] = rect_y1;
1815 		if (numberOfRectangleCrossings == 1) {
1816 			crossing_y [2] = ymax;
1817 			crossing_x [2] = line_y1 > line_y2 ? line_x1 : line_x2;
1818 		}
1819 	}
1820 	if (numberOfRectangleCrossings == 0)
1821 		return false;
1822 	Melder_require (numberOfRectangleCrossings <= 2,
1823 		U"Too many crossings found.");
1824 
1825 	/*
1826 		If start and endpoint of line are outside rectangle and numberOfRectangleCrossings == 1, than the line only touches.
1827 	*/
1828 
1829 	if (numberOfRectangleCrossings == 1 && (line_x1 < rect_x1 || line_x1 > rect_x2 || line_y1 < rect_y1 || line_y1 > rect_y2) &&
1830 		(line_x2 < rect_x1 || line_x2 > rect_x2 || line_y2 < rect_y1 || line_y2 > rect_y2))
1831 		goto end;
1832 
1833 	if ((crossing_x [1] > crossing_x [2] && ! xswap) || (crossing_x [1] < crossing_x [2] && xswap)) {
1834 		std::swap (crossing_x [1], crossing_x [2]);
1835 		std::swap (crossing_y [1], crossing_y [2]);
1836 	}
1837 	segment_x1 = crossing_x [1];
1838 	segment_y1 = crossing_y [1];
1839 	segment_x2 = crossing_x [2];
1840 	segment_y2 = crossing_y [2];
1841 
1842 end:
1843 
1844 	if (out_line_x1)
1845 		*out_line_x1 = segment_x1;
1846 	if (out_line_y1)
1847 		*out_line_y1 = segment_y1;
1848 	if (out_line_x2)
1849 		*out_line_x2 = segment_x2;
1850 	if (out_line_y2)
1851 		*out_line_y2 = segment_y2;
1852 	return true;
1853 }
1854 
NUMgetEllipseBoundingBox(double a,double b,double cospsi,double * out_width,double * out_height)1855 void NUMgetEllipseBoundingBox (double a, double b, double cospsi, double *out_width, double *out_height) {
1856 	Melder_require (cospsi >= -1.0 && cospsi <= 1.0,
1857 		U"NUMgetEllipseBoundingBox: abs (cospi) should not exceed 1.", cospsi);
1858 	double width, height;
1859 	if (cospsi == 1.0) { // a-axis along x-axis
1860 		width = a;
1861 		height = b;
1862 	} else if (cospsi == 0.0) { // a-axis along y-axis
1863 		width = b;
1864 		height = a;
1865 	} else {
1866 		const double psi = acos (cospsi), sinpsi = sin (psi);
1867 		double phi = atan2 (-b * sinpsi, a * cospsi);
1868 		width = fabs (a * cospsi * cos (phi) - b * sinpsi * sin (phi));
1869 		phi = atan2 (b * cospsi, a * sinpsi);
1870 		height = fabs (a * sinpsi * cos (phi) + b * cospsi * sin (phi));
1871 	}
1872 	if (out_width)
1873 		*out_width = width;
1874 	if (out_height)
1875 		*out_height = height;
1876 }
1877 
1878 /*
1879 	Closely modeled after the netlib code by Oleg Keselyov.
1880 */
NUMminimize_brent(double (* f)(double x,void * closure),double a,double b,void * closure,double tol,double * fx)1881 double NUMminimize_brent (double (*f) (double x, void *closure), double a, double b, void *closure, double tol, double *fx) {
1882 	double x, v, fv, w, fw;
1883 	const double golden = 1 - NUM_goldenSection;
1884 	const double sqrt_epsilon = sqrt (NUMfpp -> eps);
1885 	const integer itermax = 60;
1886 
1887 	Melder_assert (tol > 0 && a < b);
1888 
1889 	/*
1890 		First step - golden section
1891 	*/
1892 	v = a + golden * (b - a);
1893 	fv = (*f) (v, closure);
1894 	x = v;
1895 	w = v;
1896 	*fx = fv;
1897 	fw = fv;
1898 
1899 	for (integer iter = 1; iter <= itermax; iter ++) {
1900 		const double middle_range = (a + b) / 2.0;
1901 		const double tol_act = sqrt_epsilon * fabs (x) + tol / 3.0;
1902 		double range = b - a;
1903 		if (fabs (x - middle_range) + range / 2.0 <= 2.0 * tol_act)
1904 			return x;
1905 
1906 		// Obtain the golden section step
1907 
1908 		double new_step = golden * (x < middle_range ? b - x : a - x);
1909 
1910 		// Decide if the parabolic interpolation can be tried
1911 
1912 		if (fabs (x - w) >= tol_act) {
1913 			/*
1914 				Interpolation step is calculated as p/q;
1915 				division operation is delayed until last moment.
1916 			*/
1917 
1918 			const double t = (x - w) * (*fx - fv);
1919 			double q = (x - v) * (*fx - fw);
1920 			double p = (x - v) * q - (x - w) * t;
1921 			q = 2.0 * (q - t);
1922 
1923 			if (q > 0.0)
1924 				p = -p;
1925 			else
1926 				q = -q;
1927 
1928 			/*
1929 				If x+p/q falls in [a,b], not too close to a and b,
1930 				and isn't too large, it is accepted.
1931 				If p/q is too large then the golden section procedure can
1932 				reduce [a,b] range.
1933 			*/
1934 
1935 			if (fabs (p) < fabs (new_step * q) &&
1936 					p > q * (a - x + 2.0 * tol_act) &&
1937 					p < q * (b - x - 2.0 * tol_act)) {
1938 				new_step = p / q;
1939 			}
1940 		}
1941 
1942 		// Adjust the step to be not less than tolerance.
1943 
1944 		if (fabs (new_step) < tol_act)
1945 			new_step = new_step > 0.0 ? tol_act : - tol_act;
1946 
1947 		// Obtain the next approximation to min	and reduce the enveloping range
1948 
1949 		{
1950 			const double t = x + new_step;	// Tentative point for the min
1951 			const double ft = (*f) (t, closure);
1952 
1953 			/*
1954 				If t is a better approximation, reduce the range so that
1955 				t would fall within it. If x remains the best, reduce the range
1956 				so that x falls within it.
1957 			*/
1958 
1959 			if (ft <= *fx) {
1960 				if (t < x)
1961 					b = x;
1962 				else
1963 					a = x;
1964 
1965 				v = w;
1966 				w = x;
1967 				x = t;
1968 				fv = fw;
1969 				fw = *fx;
1970 				*fx = ft;
1971 			} else {
1972 				if (t < x)
1973 					a = t;
1974 				else
1975 					b = t;
1976 
1977 				if (ft <= fw || w == x) {
1978 					v = w;
1979 					w = t;
1980 					fv = fw;
1981 					fw = ft;
1982 				} else if (ft <= fv || v == x || v == w) {
1983 					v = t;
1984 					fv = ft;
1985 				}
1986 			}
1987 		}
1988 	}
1989 	Melder_warning (U"NUMminimize_brent: maximum number of iterations (", itermax, U") exceeded.");
1990 	return x;
1991 }
1992 
1993 /*
1994 	probs is probability vector, i.e. all 0 <= probs [i] <= 1 and sum(i=1...probs.size, probs [i])= 1
1995 */
NUMgetIndexFromProbability(constVEC probs,double p)1996 integer NUMgetIndexFromProbability (constVEC probs, double p) {
1997 	integer index = 1;
1998 	double psum = probs [index];
1999 	while (p > psum && index < probs.size)
2000 		psum += probs [++ index];
2001 	return index;
2002 }
2003 
2004 // straight line fitting
2005 
NUMfitLine_theil(constVEC const & x,constVEC const & y,double * out_m,double * out_intercept,bool completeMethod)2006 void NUMfitLine_theil (constVEC const& x, constVEC const& y, double *out_m, double *out_intercept, bool completeMethod) {
2007 	try {
2008 		Melder_require (x.size == y.size,
2009 			U"NUMfitLine_theil: the sizes of the two vectors should be equal.");
2010 		/*
2011 			Theil's incomplete method:
2012 			Split (x [i],y [i]) as
2013 			(x [i],y [i]), (x [N+i],y [N+i], i=1..numberOfPoints/2
2014 			m [i] = (y [N+i]-y [i])/(x [N+i]-x [i])
2015 			m = median (m [i])
2016 			b = median(y [i]-m*x [i])
2017 		 */
2018 		autoVEC lineParameters = raw_VEC (6);
2019 		NUMfitLine_theil_preallocated (lineParameters.get(), x, y, out_intercept, 0.025, completeMethod);
2020 
2021 		if (out_m)
2022 			*out_m = lineParameters [1];
2023 		if (out_intercept)
2024 			*out_intercept = lineParameters [4];
2025 	} catch (MelderError) {
2026 		Melder_throw (U"No line fit (Theil's method).");
2027 	}
2028 }
2029 
NUMfitLine_theil_preallocated(VEC const & out_lineParameters,constVEC const & x,constVEC const & y,bool wantIntercept,double oneTailedUnconfidence,bool completeMethod)2030 void NUMfitLine_theil_preallocated (VEC const& out_lineParameters, constVEC const& x, constVEC const& y, bool wantIntercept, double oneTailedUnconfidence, bool completeMethod) {
2031 	try {
2032 		Melder_require (x.size == y.size,
2033 			U"NUMfitLine_theil: the sizes of the two vectors should be equal.");
2034 		Melder_require (out_lineParameters.size == 6,
2035 			U"The line parameters vector should have size 6.");
2036 		/*
2037 			Theil's incomplete method:
2038 			Split (x [i],y [i]) as
2039 			(x [i],y [i]), (x [N+i],y [N+i], i=1..numberOfPoints/2
2040 			m [i] = (y [N+i]-y [i])/(x [N+i]-x [i])
2041 			m = median (m [i])
2042 			b = median(y [i]-m*x [i])
2043 		 */
2044 		out_lineParameters  <<=  undefined;
2045 		if (x.size == 1) {
2046 			out_lineParameters [1] = 0.0; // m
2047 			out_lineParameters [4] =  y [1]; // intercept
2048 		} else if (x.size == 2) {
2049 			const double m = out_lineParameters [1] = (y [2] - y [1]) / (x [2] - x [1]);
2050 			out_lineParameters [4] = y [1] - m * x [1];
2051 		} else {
2052 			integer numberOfCombinations;
2053 			autoVEC mbs;
2054 			if (! completeMethod) {
2055 				numberOfCombinations = x.size / 2;
2056 				mbs = zero_VEC (x.size); // allocate for the intercept calculation too
2057 				integer n2 = x.size % 2 == 1 ? numberOfCombinations + 1 : numberOfCombinations;
2058 				for (integer i = 1; i <= numberOfCombinations; i ++)
2059 					mbs [i] = (y [n2 + i] - y [i]) / (x [n2 + i] - x [i]);
2060 			} else { // use all combinations
2061 				numberOfCombinations = (x.size - 1) * x.size / 2;
2062 				mbs = zero_VEC (numberOfCombinations);
2063 				integer index = 0;
2064 				for (integer i = 1; i < x.size; i ++)
2065 					for (integer j = i + 1; j <= x.size; j ++)
2066 						mbs [++ index] = (y [j] - y [i]) / (x [j] - x [i]);
2067 				Melder_assert (index == numberOfCombinations);
2068 			}
2069 			/*
2070 				Get slope as the median of all slopes
2071 			*/
2072 			VEC mbsPart = mbs.part (1, numberOfCombinations);
2073 			sort_VEC_inout (mbsPart);
2074 			out_lineParameters [1] = NUMquantile (mbsPart, 0.5);
2075 			out_lineParameters [2] = NUMquantile (mbsPart, oneTailedUnconfidence);
2076 			out_lineParameters [3] = NUMquantile (mbsPart, 1.0 - oneTailedUnconfidence);
2077 			/*
2078 				Reuse array for intercept
2079 			*/
2080 			if (wantIntercept) {
2081 				for (integer i = 1; i <= x.size; i ++)
2082 					mbs [i] = y [i] - out_lineParameters [1] * x [i];
2083 				mbsPart = mbs.part (1, x.size);
2084 				sort_VEC_inout (mbsPart);
2085 				out_lineParameters [4] = NUMquantile (mbsPart, 0.5);
2086 				out_lineParameters [5] = NUMquantile (mbsPart, oneTailedUnconfidence);
2087 				out_lineParameters [6] = NUMquantile (mbsPart, 1.0 - oneTailedUnconfidence);
2088 			}
2089 		}
2090 	} catch (MelderError) {
2091 		Melder_throw (U"No line fit (Theil's method).");
2092 	}
2093 }
2094 
NUMfitLine_LS(constVEC const & x,constVEC const & y,double * out_m,double * out_intercept)2095 void NUMfitLine_LS (constVEC const& x, constVEC const& y, double *out_m, double *out_intercept) {
2096 	Melder_require (x.size == y.size,
2097 		U"NUMfitLine_LS: the sizes of the two vectors should be equal.");
2098 	const double sx = NUMsum (x);
2099 	const double xmean = sx / x.size;
2100 	longdouble st2 = 0.0, m = 0.0;
2101 	for (integer i = 1; i <= x.size; i ++) {
2102 		const double t = x [i] - xmean;
2103 		st2 += t * t;
2104 		m += t * y [i];
2105 	}
2106 	// y = m*x + b
2107 	m /= st2;
2108 	if (out_intercept) {
2109 		const double sy = NUMsum (y);
2110 		*out_intercept = (sy - m * sx) / x.size;
2111 	}
2112 	if (out_m)
2113 		*out_m = m;
2114 }
2115 
NUMlineFit(constVEC x,constVEC y,double * out_m,double * out_intercept,integer method)2116 void NUMlineFit (constVEC x, constVEC y, double *out_m, double *out_intercept, integer method) {
2117 	if (method == 1)
2118 		NUMfitLine_LS (x, y, out_m, out_intercept);
2119 	else if (method == 3)
2120 		NUMfitLine_theil (x, y, out_m, out_intercept, true);
2121 	else
2122 		NUMfitLine_theil (x, y, out_m, out_intercept, false);
2123 }
2124 
2125 // IEEE: Programs for digital signal processing section 4.3 LPTRN
2126 // lpc [1..n] to rc [1..n]
2127 
VECrc_from_lpc(VEC rc,constVEC lpc)2128 void VECrc_from_lpc (VEC rc, constVEC lpc) {
2129 	Melder_assert (rc.size == lpc.size);
2130 	autoVEC b = raw_VEC (lpc.size);
2131 	autoVEC a = raw_VEC (lpc.size);
2132 	a.all()  <<=  lpc;
2133 	for (integer m = lpc.size; m > 0; m--) {
2134 		rc [m] = a [m];
2135 		Melder_require (fabs (rc [m]) <= 1.0,
2136 			U"Relection coefficient [", m, U"] larger than 1.");
2137 		b.part (1, m)  <<=  a.part (1, m);
2138 		for (integer i = 1; i < m; i ++)
2139 			a [i] = (b [i] - rc [m] * b [m - i]) / (1.0 - rc [m] * rc [m]);
2140 	}
2141 }
2142 
VEClpc_from_rc(VEC lpc,constVEC rc)2143 void VEClpc_from_rc (VEC lpc, constVEC rc) {
2144 	Melder_assert (lpc.size == rc.size);
2145 	lpc  <<=  rc;
2146 	for (integer j = 2; j <= lpc.size; j ++) {
2147 		for (integer k = 1; k <= j / 2; k ++) {
2148 			double at = lpc [k] + rc [j] * lpc [j - k];
2149 			lpc [j - k] += rc [j] * lpc [k];
2150 			lpc [k] = at;
2151 		}
2152 	}
2153 }
2154 
VECarea_from_rc(VEC area,constVEC rc)2155 void VECarea_from_rc (VEC area, constVEC rc) {
2156 	Melder_assert (area.size == rc.size);
2157 	longdouble s = 0.0001; // 1.0 cm^2 at glottis
2158 	for (integer i = area.size; i > 0; i --) {
2159 		s *= (1.0 + rc [i]) / (1.0 - rc [i]);
2160 		area [i] = s;
2161 	}
2162 }
2163 
VECrc_from_area(VEC rc,constVEC area)2164 void VECrc_from_area (VEC rc, constVEC area) {
2165 	Melder_assert (rc.size == area.size);
2166 	double ar;
2167 	for (integer j = 1; j <= rc.size - 1; j ++) {
2168 		ar = area [j + 1] / area [j];
2169 		rc [j] = (1.0 - ar) / (1.0 + ar);
2170 	}
2171 	ar = 0.0001 / area [rc.size];  // 1.0 cm^2 at glottis
2172 	rc [rc.size] = (1.0 - ar) / (1.0 + ar);
2173 }
2174 
VEClpc_from_area(VEC lpc,constVEC area)2175 void VEClpc_from_area (VEC lpc, constVEC area) {
2176 	Melder_assert (lpc.size == area.size);
2177 	autoVEC rc = zero_VEC (lpc.size);
2178 	VECrc_from_area (rc.get(), area);
2179 	VEClpc_from_rc (lpc, rc.get());
2180 }
2181 
VECarea_from_lpc(VEC area,constVEC lpc)2182 void VECarea_from_lpc (VEC area, constVEC lpc) {
2183 	Melder_assert (area.size == lpc.size);
2184 	autoVEC rc = raw_VEC (lpc.size);
2185 	VECrc_from_lpc (rc.get(), lpc);
2186 	VECarea_from_rc (area, rc.get());
2187 }
2188 
2189 #if 0
2190 /*********** Begin deprecated LPC routines ***********************************/
2191 void NUMlpc_lpc_to_rc (double *lpc, integer p, double *rc) {
2192 	autoVEC b = zero_VEC (p);
2193 	autoVEC a  <<=  VEC (lpc, p);
2194 	for (integer m = p; m > 0; m--) {
2195 		rc [m] = a [m];
2196 		Melder_require (fabs (rc [m]) <= 1.0,
2197 			U"Relection coefficient [", m, U"] larger than 1.");
2198 		for (integer i = 1; i < m; i ++) {
2199 			b [i] = a [i];
2200 		}
2201 		for (integer i = 1; i < m; i ++) {
2202 			a [i] = (b [i] - rc [m] * b [m - i]) / (1.0 - rc [m] * rc [m]);
2203 		}
2204 	}
2205 }
2206 
2207 void NUMlpc_rc_to_area2 (double *rc, integer n, double *area);
2208 void NUMlpc_rc_to_area2 (double *rc, integer n, double *area) {
2209 	double s = 0.0001; /* 1.0 cm^2 at glottis */
2210 	for (integer i = n; i > 0; i--) {
2211 		s *= (1.0 + rc [i]) / (1.0 - rc [i]);
2212 		area [i] = s;
2213 	}
2214 }
2215 
2216 void NUMlpc_area_to_lpc2 (double *area, integer n, double *lpc);
2217 void NUMlpc_area_to_lpc2 (double *area, integer n, double *lpc) {
2218 	// from area to reflection coefficients
2219 	autoVEC rc =raw_VEC (n);
2220 	// normalisation: area [n+1] = 0.0001
2221 	for (integer j = n; j > 0; j--) {
2222 		double ar = area [j+1] / area [j];
2223 		rc [j] = (1 - ar) / (1 + ar);
2224 	}
2225 	// LPTRAN works from mouth to lips:
2226 	for (integer j = 1; j <= n; j ++) {
2227 		lpc [j] = rc [n - j + 1];
2228 	}
2229 	for (integer j = 2; j <= n; j ++) {
2230 		integer nh = j / 2;
2231 		double q = rc [j];
2232 		for (integer k = 1; k <= nh; k ++) {
2233 			double at = lpc [k] + q * lpc [j - k];
2234 			lpc [j - k] += q * lpc [k];
2235 			lpc [k] = at;
2236 		}
2237 	}
2238 }
2239 
2240 void NUMlpc_lpc_to_rc2 (double *lpc, integer m, double *rc);
2241 void NUMlpc_lpc_to_rc2 (double *lpc, integer m, double *rc) { // klopt nog niet
2242 	rc.part(1,m)  <<=  lpc.part (1,m)
2243 	for (integer j = 2; j <= m; j ++) {
2244 		integer jb = m + 1 - j;
2245 		integer mh = (jb + 1) / 2;
2246 		double rct = rc [jb+1];
2247 		double d = 1.0 - rct * rct;
2248 		for (integer k = 1; k <= mh; k ++) {
2249 			rc [k] *= (1 - rct) / d;
2250 		}
2251 	}
2252 }
2253 // area [1] at lips generates n+1 areas from n rc's
2254 void NUMlpc_rc_to_area (double *rc, integer m, double *area) {
2255 	area [m+1] = 0.0001; /* 1.0 cm^2 */
2256 	for (integer j = 1; j <= m; j ++) {
2257 		double ar = (1.0 - rc [m+1-j]) / (1.0 + rc [m+1-j]);
2258 		area [m+1-j] = area [m+2-j] / ar;
2259 	}
2260 }
2261 
2262 // returns m-1 reflection coefficients from m areas
2263 void NUMlpc_area_to_rc (double *area, integer m, double *rc) {
2264 	for (integer j = 1; j <= m - 1; j ++) {
2265 		double ar = area [j+1] / area [j];
2266 		rc [j] = (1.0 - ar) / (1.0 + ar);
2267 	}
2268 }
2269 
2270 void NUMlpc_rc_to_lpc (double *rc, integer m, double *lpc);
2271 void NUMlpc_rc_to_lpc (double *rc, integer m, double *lpc) {
2272 	lpc.part (1,m)  <<=  rc.part (1.m)
2273 	for (integer j = 2; j <= m; j ++) {
2274 		for (integer k = 1; k <= j / 2; k ++) {
2275 			double at = lpc [k] + rc [j] * lpc [j - k];
2276 			lpc [j - k] += rc [j] * lpc [k];
2277 			lpc [k] = at;
2278 		}
2279 	}
2280 }
2281 
2282 void NUMlpc_area_to_lpc (double *area, integer m, double *lpc) {
2283 	// from area to reflection coefficients
2284 	autoVEC rc = zero_VEC (m);
2285 	// normalisation: area [n+1] = 0.0001
2286 	NUMlpc_area_to_rc (area, m, rc.peek());
2287 	NUMlpc_rc_to_lpc (rc.peek(), m - 1, lpc); // m-1 ???
2288 }
2289 
2290 void NUMlpc_lpc_to_area (double *lpc, integer m, double *area) {
2291 	autoVEC rc = zero_VEC (m);
2292 	NUMlpc_lpc_to_rc (lpc, m, rc.peek());
2293 	NUMlpc_rc_to_area (rc.peek(), m, area);
2294 
2295 }
2296 /*********** End deprecated LPC routines ***********************************/
2297 #endif
2298 
2299 #undef SIGN
2300 
2301 #define SMALL_MEAN 14
2302 /* If n*p < SMALL_MEAN then use BINV algorithm. The ranlib implementation used cutoff=30;
2303  * but on my (Brian Gough) computer 14 works better
2304  */
2305 
2306 #define BINV_CUTOFF 110
2307 /* In BINV, do not permit ix too large */
2308 
2309 #define FAR_FROM_MEAN 20
2310 /* If ix-n*p is larger than this, then use the "squeeze" algorithm.
2311  * Ranlib used 20, and this seems to be the best choice on my (Brian Gough) machine as well.
2312  */
2313 
2314 #define LNFACT(x) gsl_sf_lnfact(x)
2315 
Stirling(double y1)2316 inline static double Stirling (double y1)
2317 {
2318 	const double y2 = y1 * y1;
2319 	const double s = (13860.0 - (462.0 - (132.0 - (99.0 - 140.0 / y2) / y2) / y2) / y2) / y1 / 166320.0;
2320 	return s;
2321 }
2322 
2323 // djmw 20121211 replaced calls to gsl_rng_uniform with NUMrandomUniform (0,1)
2324 
NUMrandomBinomial(double p,integer n)2325 integer NUMrandomBinomial (double p, integer n) {
2326 	if (p < 0.0 || p > 1.0 || n < 0) {
2327 		return -100000000;
2328 	}
2329 	integer ix;			// return value
2330 	bool flipped = false;
2331 
2332 	if (n == 0)
2333 		return 0;
2334 	if (p > 0.5) {
2335 		p = 1.0 - p;	// work with small p
2336 		flipped = true;
2337 	}
2338 
2339 	const double q = 1.0 - p;
2340 	const double s = p / q;
2341 	const double np = n * p;
2342 
2343 	/*
2344 		Inverse cdf logic for small mean (BINV in K+S)
2345 	*/
2346 
2347 	if (np < SMALL_MEAN) {
2348 		const double f0 = pow (q, n); // djmw gsl_pow_int (q, n); f(x), starting with x=0
2349 
2350 		while (1) {
2351 			/*
2352 				This while(1) loop will almost certainly only loop once; but
2353 				if u=1 to within a few epsilons of machine precision, then it
2354 				is possible for roundoff to prevent the main loop over ix to
2355 				achieve its proper value. Following the ranlib implementation,
2356 				we introduce a check for that situation, and when it occurs,
2357 				we just try again.
2358 			*/
2359 
2360 			double f = f0;
2361 			double u = NUMrandomUniform (0.0, 1.0); // djmw gsl_rng_uniform (rng);
2362 
2363 			for (ix = 0; ix <= BINV_CUTOFF; ++ ix) {
2364 				if (u < f)
2365 					goto Finish;
2366 				u -= f;
2367 				// Use recursion f(x+1) = f(x)* [(n-x)/(x+1)]* [p/(1-p)]
2368 				f *= s * (n - ix) / (ix + 1.0);
2369 			}
2370 
2371 			/*
2372 				It should be the case that the 'goto Finish' was encountered
2373 				before this point was ever reached. But if we have reached
2374 				this point, then roundoff has prevented u from decreasing
2375 				all the way to zero. This can happen only if the initial u
2376 				was very nearly equal to 1, which is a rare situation. In
2377 				that rare situation, we just try again.
2378 
2379 				Note, following the ranlib implementation, we loop ix only to
2380 				a hardcoded value of SMALL_MEAN_LARGE_N=110; we could have
2381 				looped to n, and 99.99...% of the time it won't matter. This
2382 				choice, I think is a little more robust against the rare
2383 				roundoff error. If n>LARGE_N, then it is technically
2384 				possible for ix>LARGE_N, but it is astronomically rare, and
2385 				if ix is that large, it is more likely due to roundoff than
2386 				probability, so better to nip it at LARGE_N than to take a
2387 				chance that roundoff will somehow conspire to produce an even
2388 				larger (and more improbable) ix. If n<LARGE_N, then once
2389 				ix=n, f=0, and the loop will continue until ix=LARGE_N.
2390 			*/
2391 		}
2392 	} else {
2393 
2394 		/*
2395 			For n >= SMALL_MEAN, we invoke the BTPE algorithm
2396 		*/
2397 
2398 		const double ffm = np + p;		// ffm = n*p+p
2399 		const integer m = (integer) ffm;	// m = int floor [n*p+p]
2400 		const double fm = m;				// fm = double m
2401 		const double xm = fm + 0.5;	 	// xm = half integer mean (tip of triangle)
2402 		const double npq = np * q;		// npq = n*p*q
2403 
2404 		/*
2405 			Compute cumulative area of tri, para, exp tails
2406 
2407 			p1: radius of triangle region; since height=1, also: area of region
2408 			p2: p1 + area of parallelogram region
2409 			p3: p2 + area of left tail
2410 			p4: p3 + area of right tail
2411 			pi/p4: probability of i'th area (i=1,2,3,4)
2412 
2413 			Note: magic numbers 2.195, 4.6, 0.134, 20.5, 15.3
2414 			These magic numbers are not adjustable...at least not easily!
2415 		*/
2416 
2417 		const double p1 = Melder_roundDown (2.195 * sqrt (npq) - 4.6 * q) + 0.5;
2418 
2419 		// xl, xr: left and right edges of triangle
2420 		const double xl = xm - p1;
2421 		const double xr = xm + p1;
2422 
2423 		/*
2424 			Parameter of exponential tails
2425 			Left tail:  t(x) = c*exp(-lambda_l* [xl - (x+0.5)])
2426 			Right tail: t(x) = c*exp(-lambda_r* [(x+0.5) - xr])
2427 		*/
2428 
2429 		const double c = 0.134 + 20.5 / (15.3 + fm);
2430 		const double p2 = p1 * (1.0 + c + c);
2431 		const double al = (ffm - xl) / (ffm - xl * p);
2432 		const double lambda_l = al * (1.0 + 0.5 * al);
2433 		const double ar = (xr - ffm) / (xr * q);
2434 		const double lambda_r = ar * (1.0 + 0.5 * ar);
2435 		const double p3 = p2 + c / lambda_l;
2436 		const double p4 = p3 + c / lambda_r;
2437 		double var, accept;
2438 
2439 TryAgain:
2440 
2441 		/*
2442 			Generate random variates, u specifies which region: Tri, Par, Tail
2443 		*/
2444 
2445 		const double u = p4 * NUMrandomUniform (0.0, 1.0); // djmw gsl_rng_uniform (rng) * p4;
2446 		double v = NUMrandomUniform (0.0, 1.0); // djmw gsl_rng_uniform (rng);
2447 
2448 		if (u <= p1) {
2449 			// Triangular region
2450 			ix = (integer) (xm - p1 * v + u);
2451 			goto Finish;
2452 		} else if (u <= p2) {
2453 			// Parallelogram region
2454 			const double x = xl + (u - p1) / c;
2455 			v = v * c + 1.0 - fabs (x - xm) / p1;
2456 			if (v > 1.0 || v <= 0.0)
2457 				goto TryAgain;
2458 			ix = (integer) x;
2459 		} else if (u <= p3) {
2460 			// Left tail
2461 			ix = (integer) (xl + log (v) / lambda_l);
2462 			if (ix < 0)
2463 				goto TryAgain;
2464 			v *= ((u - p2) * lambda_l);
2465 		} else {
2466 			// Right tail
2467 			ix = (integer) (xr - log (v) / lambda_r);
2468 			if (ix > (double) n)
2469 				goto TryAgain;
2470 			v *= ((u - p3) * lambda_r);
2471 		}
2472 
2473 		/*
2474 			At this point, the goal is to test whether v <= f(x)/f(m)
2475 			v <= f(x)/f(m) = (m!(n-m)! / (x!(n-x)!)) * (p/q)^{x-m}
2476 
2477 			Here is a direct test using logarithms. It is a little
2478 			slower than the various "squeezing" computations below, but
2479 			if things are working, it should give exactly the same answer
2480 			(given the same random number seed).
2481 		*/
2482 
2483 		#ifdef DIRECT
2484 		var = log (v);
2485 
2486 		accept = LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q);
2487 
2488 		#else // SQUEEZE METHOD
2489 
2490 		/*
2491 			More efficient determination of whether v < f(x)/f(M)
2492 		 */
2493 
2494 		const integer k = integer_abs (ix - m);
2495 
2496 		if (k <= FAR_FROM_MEAN) {
2497 			/*
2498 				If ix near m (ie, |ix-m|<FAR_FROM_MEAN), then do
2499 				explicit evaluation using recursion relation for f(x)
2500 			*/
2501 			const double g = (n + 1) * s;
2502 			double f = 1.0;
2503 
2504 			var = v;
2505 
2506 			if (m < ix) {
2507 				for (integer i = m + 1; i <= ix; i ++)
2508 					f *= (g / i - s);
2509 			} else if (m > ix) {
2510 				for (integer i = ix + 1; i <= m; i ++)
2511 					f /= (g / i - s);
2512 			}
2513 
2514 			accept = f;
2515 		} else {
2516 			// If ix is far from the mean m: k=ABS(ix-m) large
2517 
2518 			var = log (v);
2519 
2520 			if (k < npq / 2 - 1) {
2521 				/*
2522 					"Squeeze" using upper and lower bounds on
2523 					log(f(x)) The squeeze condition was derived
2524 					under the condition k < npq/2-1
2525 				*/
2526 				const double amaxp = k / npq * ((k * (k / 3.0 + 0.625) + (1.0 / 6.0)) / npq + 0.5);
2527 				const double ynorm = -(k * k / (2.0 * npq));
2528 				if (var < ynorm - amaxp)
2529 					goto Finish;
2530 				if (var > ynorm + amaxp)
2531 					goto TryAgain;
2532 			}
2533 
2534 			/*
2535 				Now, again: do the test log(v) vs. log f(x)/f(M)
2536 			*/
2537 
2538 			#if USE_EXACT
2539 			/*
2540 				This is equivalent to the above, but is a little (~20%) slower
2541 				There are five log's vs three above, maybe that's it?
2542 			*/
2543 
2544 			accept = LNFACT (m) + LNFACT (n - m) - LNFACT (ix) - LNFACT (n - ix) + (ix - m) * log (p / q);
2545 
2546 			#else
2547 			/* USE STIRLING:
2548 				The "#define Stirling" above corresponds to the first five
2549 				terms in asymptotic formula for
2550 				log Gamma (y) - (y-0.5)log(y) + y - 0.5 log(2*pi);
2551 				See Abramowitz and Stegun, eq 6.1.40
2552 
2553 
2554 				Note below: two Stirling's are added, and two are
2555 				subtracted. In both K+S, and in the ranlib
2556 				implementation, all four are added. I (jt) believe that
2557 				is a mistake -- this has been confirmed by personal
2558 				correspondence w/ Dr. Kachitvichyanukul. Note, however,
2559 				the corrections are so small, that I couldn't find an
2560 				example where it made a difference that could be
2561 				observed, let alone tested. In fact, define'ing Stirling
2562 				to be zero gave identical results!! In practice, alv is
2563 				O(1), ranging 0 to -10 or so, while the Stirling
2564 				correction is typically O(10^{-5}) ...setting the
2565 				correction to zero gives about a 2% performance boost;
2566 				might as well keep it just to be pedantic.
2567 			*/
2568 
2569 			{
2570 				const double x1 = ix + 1.0;
2571 				const double w1 = n - ix + 1.0;
2572 				const double f1 = fm + 1.0;
2573 				const double z1 = n + 1.0 - fm;
2574 
2575 				accept = xm * log (f1 / x1) + (n - m + 0.5) * log (z1 / w1) + (ix - m) * log (w1 * p / (x1 * q))
2576 					+ Stirling (f1) + Stirling (z1) - Stirling (x1) - Stirling (w1);
2577 			}
2578 			#endif
2579 			#endif
2580 		}
2581 
2582 
2583 		if (var <= accept)
2584 			goto Finish;
2585 		else
2586 			goto TryAgain;
2587 	}
2588 
2589 Finish:
2590 
2591 	return flipped ? (n - ix) : ix;
2592 }
2593 
NUMrandomBinomial_real(double p,integer n)2594 double NUMrandomBinomial_real (double p, integer n) {
2595 	if (p < 0.0 || p > 1.0 || n < 0)
2596 		return undefined;
2597 	else
2598 		return (double) NUMrandomBinomial (p, n);
2599 }
2600 
NUMrandomWeibull(double scale_lambda,double shape_k)2601 double NUMrandomWeibull (double scale_lambda, double shape_k) {
2602 	Melder_require (scale_lambda > 0.0 && shape_k > 0.0,
2603 		U"NUMrandomWeibull: both arguments should be positive.");
2604 	const double u = NUMrandomUniform (0, 1);
2605 	return scale_lambda * pow (- log (u), 1.0 / shape_k);
2606 }
2607 
NUMrandomGamma(const double alpha,const double beta)2608 double NUMrandomGamma (const double alpha, const double beta) {
2609 	Melder_require (alpha > 0 && beta > 0,
2610 		U"NUMrandomGamma: both arguments should be positive.");
2611 	double result;
2612 	if (alpha >= 1.0) {
2613 		double x, v, d = alpha - 1.0 / 3.0;
2614 		double c = (1.0 / 3.0) / sqrt (d);
2615 		while (1) {
2616 			do {
2617 				x = NUMrandomGauss (0.0, 1.0);
2618 				v = 1.0 + c * x;
2619 			} while (v <= 0.0);
2620 			v = v * v * v;
2621 			double u = NUMrandomUniform (0.0, 1.0);
2622 			if (u < 1.0 - 0.0331 * (x * x) * (x * x))
2623 				break;
2624 			if (log(u) < 0.5 * x * x + d * (1.0 - v + log(v)))
2625 				break;
2626 		}
2627 		result = d * v / beta;
2628 	} else {
2629 		double u = NUMrandomUniform (0.0, 1.0);
2630 		result = NUMrandomGamma (alpha + 1.0, beta) * pow (u, 1.0 / alpha);
2631 	}
2632 	return result;
2633 }
2634 
NUMlngamma_complex(double zr,double zi,double * out_lnr,double * out_arg)2635 void NUMlngamma_complex (double zr, double zi, double *out_lnr, double *out_arg) {
2636 	double ln_re = undefined, ln_arg = undefined;
2637 	gsl_sf_result gsl_lnr, gsl_arg;
2638 	if (gsl_sf_lngamma_complex_e (zr, zi, & gsl_lnr, & gsl_arg)) {
2639 		ln_re = gsl_lnr.val;
2640 		ln_arg = gsl_arg.val;
2641 	}
2642 	if (out_lnr)
2643 		*out_lnr = ln_re;
2644 	if (out_arg)
2645 		*out_arg = ln_arg;
2646 }
2647 
newVECbiharmonic2DSplineInterpolation_getWeights(constVECVU const & x,constVECVU const & y,constVECVU const & z)2648 autoVEC newVECbiharmonic2DSplineInterpolation_getWeights (constVECVU const& x, constVECVU const& y, constVECVU const& z) {
2649 	Melder_assert (x.size == y.size && x.size == z.size);
2650 	autoMAT g = raw_MAT (x.size, x.size);
2651 	/*
2652 		1. Calculate the Green matrix G = |point [i]-point [j]|^2 (ln (|point [i]-point [j]|) - 1.0)
2653 		2. Solve z = G.w for w
2654 	*/
2655 	for (integer i = 1; i <= x.size; i ++) {
2656 		for (integer j = i + 1; j <= x.size; j ++) {
2657 			double dx = x [i] - x [j], dy = y [i] - y [j];
2658 			double distanceSquared = dx * dx + dy * dy;
2659 			g [i] [j] = g [j] [i] = distanceSquared * (0.5 * log (distanceSquared) - 1.0); // Green's function
2660 		}
2661 		g [i] [i] = 0.0;
2662 	}
2663 	autoVEC w = newVECsolve (g.get(), z, 0.0);
2664 	return w;
2665 }
2666 
NUMbiharmonic2DSplineInterpolation(constVECVU const & x,constVECVU const & y,constVECVU const & w,double xp,double yp)2667 double NUMbiharmonic2DSplineInterpolation (constVECVU const& x, constVECVU const& y, constVECVU const& w, double xp, double yp) {
2668 	Melder_assert (x.size == y.size && x.size == w.size);
2669 	longdouble result = 0.0;
2670 	for (integer i = 1; i <= x.size; i ++) {
2671 		const double dx = xp - x [i], dy = yp - y [i];
2672 		const double d = dx * dx + dy * dy;
2673 		result += w [i] * d * (0.5 * log (d) - 1.0);
2674 	}
2675 	return (double) result;
2676 }
2677 
NUMfixIndicesInRange(integer lowerLimit,integer upperLimit,integer * lowIndex,integer * highIndex)2678 void NUMfixIndicesInRange (integer lowerLimit, integer upperLimit, integer *lowIndex, integer *highIndex) {
2679 	Melder_require (lowerLimit <= upperLimit, U"The lower limit should not exceed the upper limit.");
2680 	if (*highIndex < *lowIndex) {
2681 		*lowIndex = lowerLimit; *highIndex = upperLimit;
2682 	} else if (*highIndex == *lowIndex) {
2683 		Melder_require (*lowIndex >= lowerLimit && *highIndex <= upperLimit,
2684 			U"Both lower and upper indices are out of range.");
2685 	} else { // low < high
2686 		Melder_require (*lowIndex < upperLimit && *highIndex > lowerLimit,
2687 			U"Both lower and upper indices are out of range.");
2688 		if (*lowIndex < lowerLimit)
2689 			*lowIndex = lowerLimit;
2690 		if (*highIndex > upperLimit)
2691 			*highIndex = upperLimit;
2692 	}
2693 }
2694 
NUMgetEntropies(constMATVU const & m,double * out_h,double * out_hx,double * out_hy,double * out_hygx,double * out_hxgy,double * out_uygx,double * out_uxgy,double * out_uxy)2695 void NUMgetEntropies (constMATVU const& m, double *out_h, double *out_hx, double *out_hy, double *out_hygx, double *out_hxgy, double *out_uygx, double *out_uxgy, double *out_uxy) {
2696 
2697 	double h = undefined, hx = undefined, hy = undefined;
2698 	double hxgy = undefined, hygx = undefined, uygx = undefined, uxgy = undefined, uxy = undefined;
2699 
2700 	// Get total sum and check if all elements are not negative.
2701 
2702 	longdouble totalSum = 0.0;
2703 	for (integer i = 1; i <= m.nrow; i ++) {
2704 		for (integer j = 1; j <= m.ncol; j++) {
2705 			Melder_require (m [i] [j] >= 0.0,
2706 				U"Matrix elements should not be negative.");
2707 			totalSum += m [i] [j];
2708 		}
2709 	}
2710 
2711 	if (totalSum > 0.0) {
2712 		longdouble hy_t = 0.0;
2713 		for (integer i = 1; i <= m.nrow; i ++) {
2714 			const double rowsum = NUMsum (m.row (i));
2715 			if (rowsum > 0.0) {
2716 				const double p = rowsum / double (totalSum);
2717 				hy_t -= p * NUMlog2 (p);
2718 			}
2719 		}
2720 		hy = (double) hy_t;
2721 
2722 		longdouble hx_t = 0.0;
2723 		for (integer j = 1; j <= m.ncol; j ++) {
2724 			const double colsum = NUMsum (m.column (j));
2725 			if (colsum > 0.0) {
2726 				const double p = colsum / double (totalSum);
2727 				hx_t -= p * NUMlog2 (p);
2728 			}
2729 		}
2730 		hx = (double) hx_t;
2731 
2732 		// Total entropy
2733 		longdouble h_t = 0.0;
2734 		for (integer i = 1; i <= m.nrow; i ++) {
2735 			for (integer j = 1; j <= m.ncol; j ++) {
2736 				if (m [i] [j] > 0.0) {
2737 					const double p = m [i] [j] / double (totalSum);
2738 					h_t -= p * NUMlog2 (p);
2739 				}
2740 			}
2741 		}
2742 		h = (double) h_t;
2743 		hygx = h - hx;
2744 		hxgy = h - hy;
2745 		uygx = (hy - hygx) / hy;
2746 		uxgy = (hx - hxgy) / hx;
2747 		uxy = 2.0 * (hx + hy - h) / (hx + hy);
2748 	}
2749 	if (out_h)
2750 		*out_h = h;
2751 	if (out_hx)
2752 		*out_hx = hx;
2753 	if (out_hy)
2754 		*out_hy = hy;
2755 	// Conditional entropies
2756 	if (out_hygx)
2757 		*out_hygx = hygx;
2758 	if (out_hxgy)
2759 		*out_hxgy = hxgy;
2760 	if (out_uygx)
2761 		*out_uygx = uygx;
2762 	if (out_uxgy)
2763 		*out_uxgy = uxgy;
2764 	if (out_uxy)
2765 		*out_uxy = uxy;
2766 }
2767 #undef TINY
2768 
NUMtrace(constMATVU const & a)2769 double NUMtrace (constMATVU const& a) {
2770 	Melder_assert (a.nrow == a.ncol);
2771 	longdouble trace = 0.0;
2772 	for (integer i = 1; i <= a.nrow; i ++)
2773 		trace += a [i] [i];
2774 	return (double) trace;
2775 }
2776 
NUMtrace2(constMATVU const & x,constMATVU const & y)2777 double NUMtrace2 (constMATVU const& x, constMATVU const& y) {
2778 	Melder_assert (x.ncol == y.nrow && x.nrow == y.ncol);
2779 	longdouble trace = 0.0;
2780 	for (integer irow = 1; irow <= x.nrow; irow ++)
2781 		for (integer k = 1; k <= x.ncol; k ++)
2782 			trace += x [irow] [k] * y [k] [irow];
2783 	return (double) trace;
2784 }
2785 
NUMeigencmp22(double a,double b,double c,double * out_rt1,double * out_rt2,double * out_cs1,double * out_sn1)2786 void NUMeigencmp22 (double a, double b, double c, double *out_rt1, double *out_rt2, double *out_cs1, double *out_sn1) {
2787 	const longdouble sm = a + c, df = a - c, adf = fabs (df);
2788 	const longdouble tb = b + b, ab = fabs (tb);
2789 	longdouble acmx = c, acmn = a;
2790 	if (fabs (a) > fabs (c)) {
2791 		acmx = a;
2792 		acmn = c;
2793 	}
2794 	longdouble rt, tn;
2795 	if (adf > ab) {
2796 		tn = ab / adf;
2797 		rt = adf * sqrt (1.0 + tn * tn);
2798 	} else if (adf < ab) {
2799 		tn = adf / ab;
2800 		rt = ab * sqrt (1.0 + tn * tn);
2801 	} else
2802 		rt = ab * sqrt (2.0);
2803 
2804 	longdouble rt1, rt2;
2805 	integer sgn1, sgn2;
2806 	if (sm < 0.0) {
2807 		rt1 = 0.5 * (sm - rt);
2808 		sgn1 = -1;
2809 		/*
2810 			Order of execution important.
2811 			To get fully accurate smaller eigenvalue,
2812 			next line needs to be executed in higher precision.
2813 		*/
2814 		rt2 = (acmx / rt1) * acmn - (b / rt1) * b;
2815 	} else if (sm > 0.0) {
2816 		rt1 = 0.5 * (sm + rt);
2817 		sgn1 = 1;
2818 		/*
2819 			Order of execution important.
2820 			To get fully accurate smaller eigenvalue,
2821 			next line needs to be executed in higher precision.
2822 		*/
2823 		rt2 = (acmx / rt1) * acmn - (b / rt1) * b;
2824 	} else {
2825 		rt1 = 0.5 * rt;
2826 		rt2 = -0.5 * rt;
2827 		sgn1 = 1;
2828 	}
2829 
2830 	// Compute the eigenvector
2831 
2832 	longdouble cs;
2833 	if (df >= 0.0) {
2834 		cs = df + rt;
2835 		sgn2 = 1;
2836 	} else {
2837 		cs = df - rt;
2838 		sgn2 = -1;
2839 	}
2840 	const longdouble acs = fabs (cs);
2841 	longdouble cs1, sn1;
2842 	if (acs > ab) {
2843 		const longdouble ct = -tb / cs;
2844 		sn1 = 1.0 / sqrt (1.0 + ct * ct);
2845 		cs1 = ct * sn1;
2846 	} else {
2847 		if (ab == 0.0) {
2848 			cs1 = 1.0;
2849 			sn1 = 0.0;
2850 		} else {
2851 			tn = -cs / tb;
2852 			cs1 = 1.0 / sqrt (1.0 + tn * tn);
2853 			sn1 = tn * cs1;
2854 		}
2855 	}
2856 	if (sgn1 == sgn2) {
2857 		tn = cs1;
2858 		cs1 = -sn1;
2859 		sn1 = tn;
2860 	}
2861 	if (fabs (cs1) > 1.0)
2862 		cs1 = copysign (1.0, cs1);
2863 	if (fabs (sn1) > 1.0)
2864 		sn1 = copysign (1.0, sn1);
2865 	if (out_rt1)
2866 		*out_rt1 = (double) rt1;
2867 	if (out_rt2)
2868 		*out_rt2 = (double) rt2;
2869 	if (out_cs1)
2870 		*out_cs1 = (double) cs1;
2871 	if (out_sn1)
2872 		*out_sn1 = (double) sn1;
2873 }
2874 
MATmul3_XYXt(MATVU const & target,constMATVU const & x,constMATVU const & y)2875 void MATmul3_XYXt (MATVU const& target, constMATVU const& x, constMATVU const& y) { // X.Y.X'
2876 	Melder_assert (x.ncol == y.nrow && y.ncol == x.ncol);
2877 	Melder_assert (target.nrow == target.ncol && target.nrow == x.nrow);
2878 	for (integer irow = 1; irow <= target.nrow; irow ++)
2879 		for (integer icol = 1; icol <= target.ncol; icol ++) {
2880 			longdouble sum = 0.0;
2881 			for (integer k = 1; k <= x.ncol; k ++)
2882 				sum += x [irow] [k] * NUMinner (y.row (k), x.row (icol));
2883 			target [irow] [icol] = double (sum);
2884 		}
2885 }
2886 
MATmul3_XYsXt(MATVU const & target,constMATVU const & x,constMATVU const & y)2887 void MATmul3_XYsXt (MATVU const& target, constMATVU const& x, constMATVU const& y) { // X.Y.X'
2888 	Melder_assert (x.ncol == y.nrow && y.ncol == x.ncol);
2889 	Melder_assert (target.nrow == target.ncol && target.nrow == x.nrow);
2890 	for (integer irow = 1; irow <= target.nrow; irow ++)
2891 		for (integer icol = irow; icol <= target.ncol; icol ++) {
2892 			longdouble sum = 0.0;
2893 			for (integer k = 1; k <= x.ncol; k ++)
2894 				sum += x [irow] [k] * NUMinner (y.row (k), x.row (icol));
2895 			target [irow] [icol] = double (sum);
2896 		}
2897 	for (integer irow = 1; irow <= target.nrow; irow ++)
2898 		for (integer icol = irow + 1; icol <= target.ncol; icol ++)
2899 			target [icol] [irow] = target [irow] [icol];
2900 }
2901 
2902 /*
2903 	1. Take absolute value of v.
2904 	2. Sort abs(v) and its index together.
2905 	3. Make all elements of v zero, except the numberOfNonZeros largest elements.
2906 	4. Set the support of these largest elements to 1 and the rest to zero.
2907 */
VECupdateDataAndSupport_inplace(VECVU const & v,BOOLVECVU const & support,integer numberOfNonZeros)2908 static void VECupdateDataAndSupport_inplace (VECVU const& v, BOOLVECVU const& support, integer numberOfNonZeros) {
2909 	Melder_assert (v.size == support.size);
2910 	autoVEC abs = newVECabs (v);
2911 	autoINTVEC index = to_INTVEC (v.size);
2912 	NUMsortTogether <double, integer> (abs.get(), index.get()); // sort is always increasing
2913 	for (integer i = 1; i <= v.size - numberOfNonZeros; i ++) {
2914 		v [index [i]] = 0.0;
2915 		support [index [i]] = false;
2916 	}
2917 	for (integer i = v.size - numberOfNonZeros + 1; i <= v.size; i ++)
2918 		support [index [i]] = true;
2919 }
2920 
update(VEC const & x_new,VEC const & y_new,BOOLVECVU const & support_new,constVECVU const & xn,double stepSize,constVEC const & gradient,constMATVU const & dictionary,constVEC const & yn,integer numberOfNonZeros,VEC buffer)2921 static double update (VEC const& x_new, VEC const& y_new, BOOLVECVU const& support_new, constVECVU const& xn, double stepSize, constVEC const& gradient, constMATVU const& dictionary, constVEC const& yn, integer numberOfNonZeros, VEC buffer) {
2922 	Melder_assert (x_new.size == xn.size && buffer.size == x_new.size);
2923 	Melder_assert (gradient.size == support_new.size && gradient.size == x_new.size);
2924 	Melder_assert (y_new.size == yn.size);
2925 	Melder_assert (dictionary.nrow == yn.size && dictionary.ncol == xn.size);
2926 
2927 	buffer  <<=  stepSize * gradient;
2928 	x_new  <<=  xn  +  buffer; // x(n) + stepSize * gradient
2929 	VECupdateDataAndSupport_inplace (x_new, support_new, numberOfNonZeros);
2930 	buffer  <<=  x_new  -  xn; // x(n+1) - x (n)
2931 	const double xdifsq = NUMsum2 (buffer); // ||x(n+1) - x (n)||^2
2932 
2933 	mul_VEC_out (y_new, dictionary, x_new); // y(n+1) = D. x(n+1)
2934 	buffer.part (1, yn.size)  <<=  y_new  -  yn; // y(n+1) - y(n) = D.(x(n+1) - x(n))
2935 	const double ydifsq = NUMsum2 (buffer.part (1, yn.size)); // ||y(n+1) - y(n)||^2
2936 	return xdifsq / ydifsq;
2937 }
2938 
newVECsolveSparse_IHT(constMATVU const & dictionary,constVECVU const & y,integer numberOfNonZeros,integer maximumNumberOfIterations,double tolerance,integer infoLevel)2939 autoVEC newVECsolveSparse_IHT (constMATVU const& dictionary, constVECVU const& y, integer numberOfNonZeros, integer maximumNumberOfIterations, double tolerance, integer infoLevel) {
2940 	try {
2941 		Melder_assert (dictionary.ncol > dictionary.nrow); // must be underdetermined system
2942 		Melder_assert (dictionary.nrow == y.size); // y = D.x + e
2943 		autoVEC result = zero_VEC (dictionary.ncol);
2944 		VECsolveSparse_IHT (result.get(), dictionary, y, numberOfNonZeros, maximumNumberOfIterations, tolerance, infoLevel);
2945 		return result;
2946 	} catch (MelderError) {
2947 		Melder_throw (U"Solution of sparse problem not found.");
2948 	}
2949 }
2950 
VECsolveSparse_IHT(VECVU const & x,constMATVU const & dictionary,constVECVU const & y,integer numberOfNonZeros,integer maximumNumberOfIterations,double tolerance,integer infoLevel)2951 void VECsolveSparse_IHT (VECVU const& x, constMATVU const& dictionary, constVECVU const& y, integer numberOfNonZeros, integer maximumNumberOfIterations, double tolerance, integer infoLevel) {
2952 	try {
2953 		Melder_assert (dictionary.ncol > dictionary.nrow); // must be underdetermined system
2954 		Melder_assert (dictionary.ncol == x.size); // we calculate D.x
2955 		Melder_assert (dictionary.nrow == y.size); // y = D.x + e
2956 
2957 		autoVEC gradient = raw_VEC (x.size);
2958 		autoVEC x_new = raw_VEC (x.size); // x(n+1), x == x(n)
2959 		autoVEC yfromx = raw_VEC (y.size); // D.x(n)
2960 		autoVEC yfromx_new = raw_VEC (y.size); // D.x(n+1)
2961 		autoVEC ydif = raw_VEC (y.size); // y - D.x(n)
2962 		autoVEC buffer = raw_VEC (x.size);
2963 		autoBOOLVEC support = raw_BOOLVEC (x.size);
2964 		autoBOOLVEC support_new = raw_BOOLVEC (x.size);
2965 
2966 		const double xnormSq = NUMsum2 (x);
2967 		const double rms_y = NUMsum2 (y) / y.size;
2968 		double rms = rms_y;
2969 
2970 		if (xnormSq == 0.0) {
2971 			/*
2972 				Start with x == 0.
2973 				Get initial support supp (Hard_K (D'y))
2974 				Hard_K (v) is a hard thresholder which only keeps the largest K elements from the vector v
2975 			*/
2976 			mul_VEC_out (buffer.get(), dictionary.transpose(), y);
2977 			VECupdateDataAndSupport_inplace (buffer.get(), support.get(), numberOfNonZeros);
2978 			yfromx.all()  <<=  0.0;
2979 			ydif.all()  <<=  y; // ydif = y - D.x(1) = y - D.0 = y
2980 		} else {
2981 			/*
2982 				We improve a current solution x
2983 			*/
2984 			VECupdateDataAndSupport_inplace (x, support.get(), numberOfNonZeros);
2985 			mul_VEC_out (yfromx.get(), dictionary, x); // D.x(n)
2986 			ydif.all()  <<=  y  -  yfromx.all(); // y - D.x(n)
2987 			rms = NUMsum2 (ydif.get()) / y.size; // ||y - D.x(n)||^2
2988 		}
2989 
2990 		bool convergence = false;
2991 		integer iter = 1;
2992 		while (iter <= maximumNumberOfIterations && not convergence) {
2993 
2994 			mul_VEC_out (gradient.get(), dictionary.transpose(), ydif.get()); // D'.(y - D.x(n))
2995 			/*
2996 				Calculate stepSize mu according to Eq. (13)
2997 				mu = || g_sparse ||^2 / || D_sparse * g_sparse ||^2
2998 				where g_sparse only contains the supported elements from the gradient and D_sparse only the supported columns from the dictionary.
2999 			*/
3000 			longdouble normsq_gs = 0.0; // squared norm of the sparse gradient
3001 			for (integer ig = 1; ig <= gradient.size; ig ++) {
3002 				if (support [ig])
3003 					normsq_gs += gradient [ig] * gradient [ig];
3004 			}
3005 			longdouble normsq_dgs = 0.0; // squared norm of the transformed sparse gradient
3006 			for (integer irow = 1; irow <= dictionary.nrow; irow ++) {
3007 				longdouble sum = 0.0;
3008 				for (integer icol = 1; icol <= dictionary.ncol; icol ++)
3009 					if (support [icol])
3010 						sum += dictionary [irow] [icol] * gradient [icol];
3011 				normsq_dgs += sum * sum;
3012 			}
3013 
3014 			double stepSize = normsq_gs / normsq_dgs;
3015 
3016 			double normsq_ratio = update (x_new.get(), yfromx_new.get(), support_new.get(), x, stepSize, gradient.get(), dictionary, yfromx.get(), numberOfNonZeros, buffer.get());
3017 
3018 			if (! NUMequal (support.get(), support_new.get())) {
3019 				double omega;
3020 				const double kappa = 2.0, c = 0.0;
3021 				while (stepSize > (omega = (1.0 - c) * normsq_ratio)) { // stepSize > omega, from Eq. 14
3022 					stepSize *= 1.0 / (kappa * (1.0 - c));
3023 					normsq_ratio = update (x_new.get(), yfromx_new.get(), support_new.get(), x, stepSize, gradient.get(), dictionary, yfromx.get(), numberOfNonZeros, buffer.get());
3024 				}
3025 			}
3026 
3027 			ydif.all()  <<=  y  -  yfromx_new.all();   // y - D.x(n+1)
3028 
3029 			const double rms_new = NUMsum2 (ydif.get()) / y.size;
3030 			const double relativeError = fabs (rms - rms_new) / rms_y;
3031 			convergence = relativeError < tolerance;
3032 			if (infoLevel > 1)
3033 				MelderInfo_writeLine (U"Iteration: ", iter, U", error: ", rms_new, U" relative: ", relativeError, U" stepSize: ", stepSize);
3034 
3035 			x  <<=  x_new.all();
3036 			support.all()  <<=  support_new.all();
3037 			yfromx.all()  <<=  yfromx_new.all();
3038 			rms = rms_new;
3039 			iter ++;
3040 		}
3041 		iter = std::min (iter, maximumNumberOfIterations);
3042 		if (infoLevel >= 1)
3043 			MelderInfo_writeLine (U"Number of iterations: ", iter, U" Difference squared: ", rms);
3044 		if (infoLevel > 0)
3045 			MelderInfo_drain();
3046 	} catch (MelderError) {
3047 		Melder_throw (U"Solution of sparse problem not found.");
3048 	}
3049 }
3050 
NUMpolynomial_recurrence(VEC const & pn,double a,double b,double c,constVEC const & pnm1,constVEC const & pnm2)3051 void NUMpolynomial_recurrence (VEC const& pn, double a, double b, double c, constVEC const& pnm1, constVEC const& pnm2) {
3052 	const integer degree = pn.size - 1;
3053 	Melder_assert (degree > 1 && pnm1.size >= pn.size && pnm2.size >= pn.size);
3054 
3055 	pn [1] = b * pnm1 [1] + c * pnm2 [1];
3056 	for (integer i = 2; i <= degree - 1; i ++)
3057 		pn [i] = a * pnm1 [i - 1] + b * pnm1 [i] + c * pnm2 [i];
3058 	pn [degree] = a * pnm1 [degree - 1] + b * pnm1 [degree];
3059 	pn [degree + 1] = a * pnm1 [degree];
3060 }
3061 
3062 /* End of file NUM2.cpp */
3063