1 /*
2    This source file based on Minpack: initially converted from
3    fortran using f2c, then rendered into relatively idiomatic
4    C with zero-based indexing throughout and pass-by-value for
5    parameters that do not function as pointers. We also rely
6    on <float.h> for the machine precision rather than Minpack's
7    dpmpar().
8 
9    See README in this directory for the Minpack Copyright.
10 
11    Allin Cottrell, Wake Forest University, April 2012
12 */
13 
14 #include "minpack.h"
15 #include <math.h>
16 #include <float.h>
17 
18 /*
19 c     lmdif:
20 c
21 c     the purpose of lmdif is to minimize the sum of the squares of
22 c     m nonlinear functions in n variables by a modification of
23 c     the levenberg-marquardt algorithm. the user must provide a
24 c     subroutine which calculates the functions. the jacobian is
25 c     then calculated by a forward-difference approximation.
26 c
27 c     the subroutine statement is
28 c
29 c       subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
30 c                        diag,mode,factor,nprint,info,nfev,fjac,
31 c                        ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
32 c
33 c     where
34 c
35 c       fcn is the name of the user-supplied subroutine which
36 c         calculates the functions. fcn must be declared
37 c         in an external statement in the user calling
38 c         program, and should be written as follows.
39 c
40 c         subroutine fcn(m,n,x,fvec,iflag)
41 c         integer m,n,iflag
42 c         double precision x(n),fvec(m)
43 c         ----------
44 c         calculate the functions at x and
45 c         return this vector in fvec.
46 c         ----------
47 c         return
48 c         end
49 c
50 c         the value of iflag should not be changed by fcn unless
51 c         the user wants to terminate execution of lmdif.
52 c         in this case set iflag to a negative integer.
53 c
54 c       m is a positive integer input variable set to the number
55 c         of functions.
56 c
57 c       n is a positive integer input variable set to the number
58 c         of variables. n must not exceed m.
59 c
60 c       x is an array of length n. on input x must contain
61 c         an initial estimate of the solution vector. on output x
62 c         contains the final estimate of the solution vector.
63 c
64 c       fvec is an output array of length m which contains
65 c         the functions evaluated at the output x.
66 c
67 c       ftol is a nonnegative input variable. termination
68 c         occurs when both the actual and predicted relative
69 c         reductions in the sum of squares are at most ftol.
70 c         therefore, ftol measures the relative error desired
71 c         in the sum of squares.
72 c
73 c       xtol is a nonnegative input variable. termination
74 c         occurs when the relative error between two consecutive
75 c         iterates is at most xtol. therefore, xtol measures the
76 c         relative error desired in the approximate solution.
77 c
78 c       gtol is a nonnegative input variable. termination
79 c         occurs when the cosine of the angle between fvec and
80 c         any column of the jacobian is at most gtol in absolute
81 c         value. therefore, gtol measures the orthogonality
82 c         desired between the function vector and the columns
83 c         of the jacobian.
84 c
85 c       maxfev is a positive integer input variable. termination
86 c         occurs when the number of calls to fcn is at least
87 c         maxfev by the end of an iteration.
88 c
89 c       epsfcn is an input variable used in determining a suitable
90 c         step length for the forward-difference approximation. this
91 c         approximation assumes that the relative errors in the
92 c         functions are of the order of epsfcn. if epsfcn is less
93 c         than the machine precision, it is assumed that the relative
94 c         errors in the functions are of the order of the machine
95 c         precision.
96 c
97 c       diag is an array of length n. if mode = 1 (see
98 c         below), diag is internally set. if mode = 2, diag
99 c         must contain positive entries that serve as
100 c         multiplicative scale factors for the variables.
101 c
102 c       mode is an integer input variable. if mode = 1, the
103 c         variables will be scaled internally. if mode = 2,
104 c         the scaling is specified by the input diag. other
105 c         values of mode are equivalent to mode = 1.
106 c
107 c       factor is a positive input variable used in determining the
108 c         initial step bound. this bound is set to the product of
109 c         factor and the euclidean norm of diag*x if nonzero, or else
110 c         to factor itself. in most cases factor should lie in the
111 c         interval (.1,100.). 100. is a generally recommended value.
112 c
113 c       nprint is an integer input variable that enables controlled
114 c         printing of iterates if it is positive. in this case,
115 c         fcn is called with iflag = 0 at the beginning of the first
116 c         iteration and every nprint iterations thereafter and
117 c         immediately prior to return, with x and fvec available
118 c         for printing. if nprint is not positive, no special calls
119 c         of fcn with iflag = 0 are made.
120 c
121 c       info is an integer output variable. if the user has
122 c         terminated execution, info is set to the (negative)
123 c         value of iflag. see description of fcn. otherwise,
124 c         info is set as follows.
125 c
126 c         info = 0  improper input parameters.
127 c
128 c         info = 1  both actual and predicted relative reductions
129 c                   in the sum of squares are at most ftol.
130 c
131 c         info = 2  relative error between two consecutive iterates
132 c                   is at most xtol.
133 c
134 c         info = 3  conditions for info = 1 and info = 2 both hold.
135 c
136 c         info = 4  the cosine of the angle between fvec and any
137 c                   column of the jacobian is at most gtol in
138 c                   absolute value.
139 c
140 c         info = 5  number of calls to fcn has reached or
141 c                   exceeded maxfev.
142 c
143 c         info = 6  ftol is too small. no further reduction in
144 c                   the sum of squares is possible.
145 c
146 c         info = 7  xtol is too small. no further improvement in
147 c                   the approximate solution x is possible.
148 c
149 c         info = 8  gtol is too small. fvec is orthogonal to the
150 c                   columns of the jacobian to machine precision.
151 c
152 c       nfev is an integer output variable set to the number of
153 c         calls to fcn.
154 c
155 c       fjac is an output m by n array. the upper n by n submatrix
156 c         of fjac contains an upper triangular matrix r with
157 c         diagonal elements of nonincreasing magnitude such that
158 c
159 c                t     t           t
160 c               p *(jac *jac)*p = r *r,
161 c
162 c         where p is a permutation matrix and jac is the final
163 c         calculated jacobian. column j of p is column ipvt(j)
164 c         (see below) of the identity matrix. the lower trapezoidal
165 c         part of fjac contains information generated during
166 c         the computation of r.
167 c
168 c       ldfjac is a positive integer input variable not less than m
169 c         which specifies the leading dimension of the array fjac.
170 c
171 c       ipvt is an integer output array of length n. ipvt
172 c         defines a permutation matrix p such that jac*p = q*r,
173 c         where jac is the final calculated jacobian, q is
174 c         orthogonal (not stored), and r is upper triangular
175 c         with diagonal elements of nonincreasing magnitude.
176 c         column j of p is column ipvt(j) of the identity matrix.
177 c
178 c       qtf is an output array of length n which contains
179 c         the first n elements of the vector (q transpose)*fvec.
180 c
181 c       wa1, wa2, and wa3 are work arrays of length n.
182 c
183 c       wa4 is a work array of length m.
184 c
185 c     subprograms called
186 c
187 c       user-supplied ...... fcn
188 c
189 c       minpack-supplied ... enorm,fdjac2,lmpar,qrfac
190 c
191 c     argonne national laboratory. minpack project. march 1980.
192 c     burton s. garbow, kenneth e. hillstrom, jorge j. more
193 */
194 
lmdif_(S_fp fcn,int m,int n,double * x,double * fvec,double ftol,double xtol,double gtol,int maxfev,double epsfcn,double * diag,int mode,double factor,int nprint,int * info,int * nfev,double * fjac,int ldfjac,int * ipvt,double * qtf,double * wa1,double * wa2,double * wa3,double * wa4,void * p)195 int lmdif_(S_fp fcn, int m, int n, double *x, double *fvec,
196 	   double ftol, double xtol, double gtol, int maxfev,
197 	   double epsfcn, double *diag, int mode, double factor,
198 	   int nprint, int *info, int *nfev, double *fjac,
199 	   int ldfjac, int *ipvt, double *qtf, double *wa1,
200 	   double *wa2, double *wa3, double *wa4, void *p)
201 {
202     const double p1 = .1;
203     const double p5 = .5;
204     const double p25 = .25;
205     const double p75 = .75;
206     const double p0001 = 1e-4;
207     const double epsmch = DBL_EPSILON;
208     double par, sum;
209     double d, temp1, temp2;
210     double ratio, delta = 0;
211     double fnorm, gnorm, pnorm, fnorm1;
212     double actred, dirder, prered;
213     double temp = 0.0, xnorm = 0.0;
214     int iter, iflag = 0;
215     int i, j, l;
216 
217     *info = 0;
218     *nfev = 0;
219 
220     /* check the input parameters for errors */
221 
222     if (n <= 0 || m < n || ldfjac < m ||
223 	ftol < 0.0 || xtol < 0.0 || gtol < 0.0 ||
224 	maxfev <= 0 || factor <= 0.0) {
225 	goto terminate;
226     }
227 
228     if (mode == 2) {
229 	for (j = 0; j < n; ++j) {
230 	    if (diag[j] <= 0.0) {
231 		goto terminate;
232 	    }
233 	}
234     }
235 
236     /* evaluate the function at the starting point
237        and calculate its norm */
238 
239     iflag = 1;
240     (*fcn)(m, n, x, fvec, &iflag, p);
241     *nfev = 1;
242     if (iflag < 0) {
243 	goto terminate;
244     }
245     fnorm = enorm_(m, fvec);
246 
247     /* initialize Levenberg-Marquardt parameter and iteration counter */
248     par = 0.0;
249     iter = 1;
250 
251     /* beginning of the outer loop */
252  outer_start:
253 
254     /* calculate the jacobian matrix */
255 
256     iflag = 2;
257     fdjac2_((S_fp)fcn, m, n, 0, x, fvec, fjac, ldfjac, &iflag,
258 	    epsfcn, wa4, p);
259     *nfev += n;
260     if (iflag < 0) {
261 	goto terminate;
262     }
263 
264     /* if requested, call fcn to enable printing of iterates */
265 
266     if (nprint > 0) {
267 	iflag = 0;
268 	if ((iter - 1) % nprint == 0) {
269 	    (*fcn)(m, n, x, fvec, &iflag, p);
270 	}
271 	if (iflag < 0) {
272 	    goto terminate;
273 	}
274     }
275 
276     /* compute the QR factorization of the jacobian */
277     qrfac_(m, n, fjac, ldfjac, ipvt, wa1, wa2, wa3);
278 
279     /* on the first iteration and if mode is 1, scale according
280        to the norms of the columns of the initial jacobian
281     */
282     if (iter == 1) {
283 	if (mode != 2) {
284 	    for (j = 0; j < n; ++j) {
285 		diag[j] = wa2[j];
286 		if (wa2[j] == 0.0) {
287 		    diag[j] = 1.0;
288 		}
289 	    }
290 	}
291 	/* on the first iteration, calculate the norm of the scaled x
292 	   and initialize the step bound delta */
293 	for (j = 0; j < n; ++j) {
294 	    wa3[j] = diag[j] * x[j];
295 	}
296 	xnorm = enorm_(n, wa3);
297 	delta = factor * xnorm;
298 	if (delta == 0.0) {
299 	    delta = factor;
300 	}
301     }
302 
303     /* form Q'*fvec and store the first n components in qtf */
304 
305     for (i = 0; i < m; ++i) {
306 	wa4[i] = fvec[i];
307     }
308     for (j = 0; j < n; ++j) {
309 	if (fjac[j + j * ldfjac] != 0.0) {
310 	    sum = 0.0;
311 	    for (i = j; i < m; ++i) {
312 		sum += fjac[i + j * ldfjac] * wa4[i];
313 	    }
314 	    temp = -sum / fjac[j + j * ldfjac];
315 	    for (i = j; i < m; ++i) {
316 		wa4[i] += fjac[i + j * ldfjac] * temp;
317 	    }
318 	}
319 	fjac[j + j * ldfjac] = wa1[j];
320 	qtf[j] = wa4[j];
321     }
322 
323     /* compute the norm of the scaled gradient */
324 
325     gnorm = 0.0;
326     if (fnorm != 0.0) {
327 	for (j = 0; j < n; ++j) {
328 	    l = ipvt[j];
329 	    if (wa2[l] != 0.0) {
330 		sum = 0.0;
331 		for (i = 0; i <= j; ++i) {
332 		    sum += fjac[i + j * ldfjac] * (qtf[i] / fnorm);
333 		}
334 		d = fabs(sum / wa2[l]);
335 		gnorm = max(gnorm, d);
336 	    }
337 	}
338     }
339 
340     /* test for convergence of the gradient norm */
341     if (gnorm <= gtol) {
342 	*info = 4;
343     }
344     if (*info != 0) {
345 	goto terminate;
346     }
347 
348     /* rescale if necessary */
349     if (mode != 2) {
350 	for (j = 0; j < n; ++j) {
351 	    diag[j] = max(diag[j], wa2[j]);
352 	}
353     }
354 
355     /* beginning of the inner loop */
356  inner_start:
357 
358     /* determine the Levenberg-Marquardt parameter */
359     lmpar_(n, fjac, ldfjac, ipvt, diag, qtf, delta, &par,
360 	   wa1, wa2, wa3, wa4);
361 
362     /* store the direction p and x + p; calculate the norm of p */
363     for (j = 0; j < n; ++j) {
364 	wa1[j] = -wa1[j];
365 	wa2[j] = x[j] + wa1[j];
366 	wa3[j] = diag[j] * wa1[j];
367     }
368     pnorm = enorm_(n, wa3);
369 
370     /* on the first iteration, adjust the initial step bound */
371     if (iter == 1) {
372 	delta = min(delta, pnorm);
373     }
374 
375     /* evaluate the function at x + p and calculate its norm */
376     iflag = 1;
377     (*fcn)(m, n, wa2, wa4, &iflag, p);
378     *nfev += 1;
379     if (iflag < 0) {
380 	goto terminate;
381     }
382     fnorm1 = enorm_(m, wa4);
383 
384     /* compute the scaled actual reduction */
385 
386     actred = -1.0;
387     if (p1 * fnorm1 < fnorm) {
388 	d = fnorm1 / fnorm;
389 	actred = 1.0 - d * d;
390     }
391 
392     /* compute the scaled predicted reduction and */
393     /* the scaled directional derivative. */
394 
395     for (j = 0; j < n; ++j) {
396 	wa3[j] = 0.0;
397 	l = ipvt[j];
398 	temp = wa1[l];
399 	for (i = 0; i <= j; ++i) {
400 	    wa3[i] += fjac[i + j * ldfjac] * temp;
401 	}
402     }
403     temp1 = enorm_(n, wa3) / fnorm;
404     temp2 = sqrt(par) * pnorm / fnorm;
405     prered = temp1 * temp1 + temp2 * temp2 / p5;
406     dirder = -(temp1 * temp1 + temp2 * temp2);
407 
408     /* compute the ratio of the actual to the predicted
409        reduction */
410 
411     ratio = 0.0;
412     if (prered != 0.0) {
413 	ratio = actred / prered;
414     }
415 
416     /* update the step bound */
417 
418     if (ratio <= p25) {
419 	if (actred >= 0.0) {
420 	    temp = p5;
421 	}
422 	if (actred < 0.0) {
423 	    temp = p5 * dirder / (dirder + p5 * actred);
424 	}
425 	if (p1 * fnorm1 >= fnorm || temp < p1) {
426 	    temp = p1;
427 	}
428 	d = pnorm / p1;
429 	delta = temp * min(delta, d);
430 	par /= temp;
431     } else if (par == 0.0 || ratio >= p75) {
432 	delta = pnorm / p5;
433 	par = p5 * par;
434     }
435 
436     /* test for successful iteration */
437 
438     if (ratio >= p0001) {
439 	/* successful iteration: update x, fvec, and their norms */
440 	for (j = 0; j < n; ++j) {
441 	    x[j] = wa2[j];
442 	    wa2[j] = diag[j] * x[j];
443 	}
444 	for (i = 0; i < m; ++i) {
445 	    fvec[i] = wa4[i];
446 	}
447 	xnorm = enorm_(n, wa2);
448 	fnorm = fnorm1;
449 	++iter;
450     }
451 
452     /* tests for convergence */
453 
454     if (fabs(actred) <= ftol && prered <= ftol && p5 * ratio <= 1.0) {
455 	*info = 1;
456     }
457     if (delta <= xtol * xnorm) {
458 	*info = 2;
459     }
460     if (fabs(actred) <= ftol && prered <= ftol && p5 * ratio <= 1.0 && *info
461 	    == 2) {
462 	*info = 3;
463     }
464     if (*info != 0) {
465 	goto terminate;
466     }
467 
468     /* tests for termination and stringent tolerances */
469 
470     if (*nfev >= maxfev) {
471 	*info = 5;
472     }
473     if (fabs(actred) <= epsmch && prered <= epsmch && p5 * ratio <= 1.0) {
474 	*info = 6;
475     }
476     if (delta <= epsmch * xnorm) {
477 	*info = 7;
478     }
479     if (gnorm <= epsmch) {
480 	*info = 8;
481     }
482     if (*info != 0) {
483 	goto terminate;
484     }
485 
486     /* end of inner loop; repeat (only) if iteration unsuccessful */
487     if (ratio < p0001) {
488 	goto inner_start;
489     }
490 
491     /* end of the outer loop */
492     goto outer_start;
493 
494  terminate:
495 
496     /* termination, either normal or user-imposed */
497     if (iflag < 0) {
498 	*info = iflag;
499     }
500     if (nprint > 0) {
501 	iflag = 0;
502 	(*fcn)(m, n, x, fvec, &iflag, p);
503     }
504 
505     return 0;
506 }
507 
508