1 /* -------------------------------------------------------------------------- *
2 * OpenSim: Lmdif.cpp *
3 * -------------------------------------------------------------------------- *
4 * The OpenSim API is a toolkit for musculoskeletal modeling and simulation. *
5 * See http://opensim.stanford.edu and the NOTICE file for more information. *
6 * OpenSim is developed at Stanford University and supported by the US *
7 * National Institutes of Health (U54 GM072970, R24 HD065690) and by DARPA *
8 * through the Warrior Web program. *
9 * *
10 * Copyright (c) 2005-2017 Stanford University and the Authors *
11 * *
12 * Licensed under the Apache License, Version 2.0 (the "License"); you may *
13 * not use this file except in compliance with the License. You may obtain a *
14 * copy of the License at http://www.apache.org/licenses/LICENSE-2.0. *
15 * *
16 * Unless required by applicable law or agreed to in writing, software *
17 * distributed under the License is distributed on an "AS IS" BASIS, *
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
19 * See the License for the specific language governing permissions and *
20 * limitations under the License. *
21 * -------------------------------------------------------------------------- */
22
23 /************************lmdif*************************
24 *
25 * Solves or minimizes the sum of squares of m nonlinear
26 * functions of n variables.
27 *
28 * From public domain Fortran version
29 * of Argonne National Laboratories MINPACK
30 *
31 * C translation by Steve Moshier
32 *
33 * Passing C++ and thread-safe by Ned Phipps
34 *
35 *****************************************************/
36
37
38 #include <stdio.h>
39 #include <math.h>
40 #include "Lmdif.h"
41
42 void pmat(int m, int n, double y[]);
43 int mod(int k, int m);
44 int min0(int a, int b);
45 double dmin1(double a, double b);
46 double dmax1(double a, double b);
47
48 //#define BUG 0
49 int JACOBIAN;
50
51 void fdjac2(
52 void (*fcn)(int, int, double[], double[], int *, void *),
53 int m,
54 int n,
55 double x[],
56 double fvec[],
57 double fjac[],
58 int ldfjac,
59 int *iflag,
60 double epsfcn,
61 double wa[],
62 void *data);
63
64 double enorm(int n, double x[]);
65
66 void qrsolv(
67 int n,
68 double r[],
69 int ldr,
70 int ipvt[],
71 double diag[],
72 double qtb[],
73 double x[],
74 double sdiag[],
75 double wa[]);
76
77 void qrfac(
78 int m,
79 int n,
80 double a[],
81 int lda,
82 int pivot,
83 int ipvt[],
84 int lipvt,
85 double rdiag[],
86 double acnorm[],
87 double wa[]);
88
89 void lmpar(
90 int n,
91 double r[],
92 int ldr,
93 int ipvt[],
94 double diag[],
95 double qtb[],
96 double delta,
97 double *par,
98 double x[],
99 double sdiag[],
100 double wa1[],
101 double wa2[]);
102
103
104
105 #define MACHEP 1.2e-16
106 #define DWARF 1.0e-38
107
108
109
110 /*
111 * **********
112 *
113 * subroutine lmdif
114 *
115 * the purpose of lmdif is to minimize the sum of the squares of
116 * m nonlinear functions in n variables by a modification of
117 * the levenberg-marquardt algorithm. the user must provide a
118 * subroutine which calculates the functions. the jacobian is
119 * then calculated by a forward-difference approximation.
120 *
121 * the subroutine statement is
122 *
123 * subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
124 * diag,mode,factor,nprint,info,nfev,fjac,
125 * ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
126 *
127 * where
128 *
129 * fcn is the name of the user-supplied subroutine which
130 * calculates the functions. fcn must be declared
131 * in an external statement in the user calling
132 * program, and should be written as follows.
133 *
134 * subroutine fcn(m,n,x,fvec,iflag)
135 * integer m,n,iflag
136 * double precision x(n),fvec(m)
137 * ----------
138 * calculate the functions at x and
139 * return this vector in fvec.
140 * ----------
141 * return
142 * end
143 *
144 * the value of iflag should not be changed by fcn unless
145 * the user wants to terminate execution of lmdif.
146 * in this case set iflag to a negative integer.
147 *
148 * m is a positive integer input variable set to the number
149 * of functions.
150 *
151 * n is a positive integer input variable set to the number
152 * of variables. n must not exceed m.
153 *
154 * x is an array of length n. on input x must contain
155 * an initial estimate of the solution vector. on output x
156 * contains the final estimate of the solution vector.
157 *
158 * fvec is an output array of length m which contains
159 * the functions evaluated at the output x.
160 *
161 * ftol is a nonnegative input variable. termination
162 * occurs when both the actual and predicted relative
163 * reductions in the sum of squares are at most ftol.
164 * therefore, ftol measures the relative error desired
165 * in the sum of squares.
166 *
167 * xtol is a nonnegative input variable. termination
168 * occurs when the relative error between two consecutive
169 * iterates is at most xtol. therefore, xtol measures the
170 * relative error desired in the approximate solution.
171 *
172 * gtol is a nonnegative input variable. termination
173 * occurs when the cosine of the angle between fvec and
174 * any column of the jacobian is at most gtol in absolute
175 * value. therefore, gtol measures the orthogonality
176 * desired between the function vector and the columns
177 * of the jacobian.
178 *
179 * maxfev is a positive integer input variable. termination
180 * occurs when the number of calls to fcn is at least
181 * maxfev by the end of an iteration.
182 *
183 * epsfcn is an input variable used in determining a suitable
184 * step length for the forward-difference approximation. this
185 * approximation assumes that the relative errors in the
186 * functions are of the order of epsfcn. if epsfcn is less
187 * than the machine precision, it is assumed that the relative
188 * errors in the functions are of the order of the machine
189 * precision.
190 *
191 * diag is an array of length n. if mode = 1 (see
192 * below), diag is internally set. if mode = 2, diag
193 * must contain positive entries that serve as
194 * multiplicative scale factors for the variables.
195 *
196 * mode is an integer input variable. if mode = 1, the
197 * variables will be scaled internally. if mode = 2,
198 * the scaling is specified by the input diag. other
199 * values of mode are equivalent to mode = 1.
200 *
201 * factor is a positive input variable used in determining the
202 * initial step bound. this bound is set to the product of
203 * factor and the euclidean norm of diag*x if nonzero, or else
204 * to factor itself. in most cases factor should lie in the
205 * interval (.1,100.). 100. is a generally recommended value.
206 *
207 * nprint is an integer input variable that enables controlled
208 * printing of iterates if it is positive. in this case,
209 * fcn is called with iflag = 0 at the beginning of the first
210 * iteration and every nprint iterations thereafter and
211 * immediately prior to return, with x and fvec available
212 * for printing. if nprint is not positive, no special calls
213 * of fcn with iflag = 0 are made.
214 *
215 * info is an integer output variable. if the user has
216 * terminated execution, info is set to the (negative)
217 * value of iflag. see description of fcn. otherwise,
218 * info is set as follows.
219 *
220 * info = 0 improper input parameters.
221 *
222 * info = 1 both actual and predicted relative reductions
223 * in the sum of squares are at most ftol.
224 *
225 * info = 2 relative error between two consecutive iterates
226 * is at most xtol.
227 *
228 * info = 3 conditions for info = 1 and info = 2 both hold.
229 *
230 * info = 4 the cosine of the angle between fvec and any
231 * column of the jacobian is at most gtol in
232 * absolute value.
233 *
234 * info = 5 number of calls to fcn has reached or
235 * exceeded maxfev.
236 *
237 * info = 6 ftol is too small. no further reduction in
238 * the sum of squares is possible.
239 *
240 * info = 7 xtol is too small. no further improvement in
241 * the approximate solution x is possible.
242 *
243 * info = 8 gtol is too small. fvec is orthogonal to the
244 * columns of the jacobian to machine precision.
245 *
246 * nfev is an integer output variable set to the number of
247 * calls to fcn.
248 *
249 * fjac is an output m by n array. the upper n by n submatrix
250 * of fjac contains an upper triangular matrix r with
251 * diagonal elements of nonincreasing magnitude such that
252 *
253 * t t t
254 * p *(jac *jac)*p = r *r,
255 *
256 * where p is a permutation matrix and jac is the final
257 * calculated jacobian. column j of p is column ipvt(j)
258 * (see below) of the identity matrix. the lower trapezoidal
259 * part of fjac contains information generated during
260 * the computation of r.
261 *
262 * ldfjac is a positive integer input variable not less than m
263 * which specifies the leading dimension of the array fjac.
264 *
265 * ipvt is an integer output array of length n. ipvt
266 * defines a permutation matrix p such that jac*p = q*r,
267 * where jac is the final calculated jacobian, q is
268 * orthogonal (not stored), and r is upper triangular
269 * with diagonal elements of nonincreasing magnitude.
270 * column j of p is column ipvt(j) of the identity matrix.
271 *
272 * qtf is an output array of length n which contains
273 * the first n elements of the vector (q transpose)*fvec.
274 *
275 * wa1, wa2, and wa3 are work arrays of length n.
276 *
277 * wa4 is a work array of length m.
278 *
279 * subprograms called
280 *
281 * user-supplied ...... fcn
282 *
283 * minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
284 *
285 * fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod
286 *
287 * argonne national laboratory. minpack project. march 1980.
288 * burton s. garbow, kenneth e. hillstrom, jorge j. more
289 *
290 * **********
291 */
lmdif_C(void (* fcn)(int,int,double[],double[],int *,void *),int m,int n,double x[],double fvec[],double ftol,double xtol,double gtol,int maxfev,double epsfcn,double diag[],int mode,double factor,int nprint,int * info,int * nfev,double fjac[],int ldfjac,int ipvt[],double qtf[],double wa1[],double wa2[],double wa3[],double wa4[],void * data)292 void lmdif_C(
293 void (*fcn)(int, int, double[], double[], int *, void *),
294 int m,
295 int n,
296 double x[],
297 double fvec[],
298 double ftol,
299 double xtol,
300 double gtol,
301 int maxfev,
302 double epsfcn,
303 double diag[],
304 int mode,
305 double factor,
306 int nprint,
307 int *info,
308 int *nfev,
309 double fjac[],
310 int ldfjac,
311 int ipvt[],
312 double qtf[],
313 double wa1[],
314 double wa2[],
315 double wa3[],
316 double wa4[],
317 void *data)
318 {
319 int i;
320 int iflag;
321 int ij;
322 int jj;
323 int iter;
324 int j;
325 int l;
326 double actred;
327 double delta;
328 double dirder;
329 double fnorm;
330 double fnorm1;
331 double gnorm;
332 double par;
333 double pnorm;
334 double prered;
335 double ratio;
336 double sum;
337 double temp;
338 double temp1;
339 double temp2;
340 double temp3;
341 double xnorm;
342 static double one = 1.0;
343 static double p1 = 0.1;
344 static double p5 = 0.5;
345 static double p25 = 0.25;
346 static double p75 = 0.75;
347 static double p0001 = 1.0e-4;
348 static double zero = 0.0;
349 //static double p05 = 0.05;
350
351 *info = 0;
352 iflag = 0;
353 *nfev = 0;
354
355 /*
356 * check the input parameters for errors.
357 */
358 if ((n <= 0) || (m < n) || (ldfjac < m) || (ftol < zero)
359 || (xtol < zero) || (gtol < zero) || (maxfev <= 0)
360 || (factor <= zero))
361 goto L300;
362
363 if (mode == 2)
364 {
365 /* scaling by diag[] */
366 for (j=0; j<n; j++)
367 {
368 if (diag[j] <= 0.0)
369 goto L300;
370 }
371 }
372
373 #ifdef BUG
374 printf( "lmdif\n" );
375 #endif
376
377 /* evaluate the function at the starting point
378 * and calculate its norm.
379 */
380 iflag = 1;
381 fcn(m,n,x,fvec,&iflag, data);
382 *nfev = 1;
383 if (iflag < 0)
384 goto L300;
385
386 fnorm = enorm(m,fvec);
387 /* initialize levenberg-marquardt parameter and iteration counter. */
388 par = zero;
389 iter = 1;
390
391 /* beginning of the outer loop. */
392 L30:
393
394 /* calculate the jacobian matrix. */
395 iflag = 2;
396 fdjac2(fcn, m,n,x,fvec,fjac,ldfjac,&iflag,epsfcn,wa4, data);
397 // commented out DKB
398 // *nfev += n;
399 if (iflag < 0)
400 goto L300;
401
402 /* if requested, call fcn to enable printing of iterates. */
403 if (nprint > 0)
404 {
405 iflag = 0;
406 if (mod(iter-1,nprint) == 0)
407 {
408 fcn(m,n,x,fvec,&iflag, data);
409 if (iflag < 0)
410 goto L300;
411 // printf( "fnorm %.15e\n", enorm(m,fvec));
412 }
413 }
414 /* compute the qr factorization of the jacobian. */
415 qrfac(m,n,fjac,ldfjac,1,ipvt,n,wa1,wa2,wa3);
416 // for (j = 0; j < n; j++)
417 // {
418 // printf("wa1[%d] = %e\n", j, wa1[j]);
419 // printf("wa2[%d] = %e\n", j, wa2[j]);
420 // printf("wa3[%d] = %e\n", j, wa3[j]);
421 // }
422 /* on the first iteration and if mode is 1, scale according
423 * to the norms of the columns of the initial jacobian.
424 */
425 if (iter == 1)
426 {
427 // printf("iter = 1, mode = %d\n", mode);
428 if (mode != 2)
429 {
430 for (j=0; j<n; j++)
431 {
432 diag[j] = wa2[j];
433 if (wa2[j] == zero)
434 diag[j] = one;
435 }
436 }
437
438 /* on the first iteration, calculate the norm of the scaled x
439 * and initialize the step bound delta.
440 */
441 for (j=0; j<n; j++)
442 wa3[j] = diag[j] * x[j];
443
444 xnorm = enorm(n,wa3);
445 delta = factor*xnorm;
446 // printf("iter1: xnorm = %e, delta = %e\n", xnorm, delta);
447 //dkb
448 if (fabs(delta) <= 1e-4)
449 // if (delta == zero)
450 delta = factor;
451 }
452
453 /* form (q transpose)*fvec and store the first n components in qtf. */
454 for (i=0; i<m; i++)
455 wa4[i] = fvec[i];
456
457 jj = 0;
458 for (j=0; j<n; j++)
459 {
460 temp3 = fjac[jj];
461 if (temp3 != zero)
462 {
463 sum = zero;
464 ij = jj;
465 for (i=j; i<m; i++)
466 {
467 sum += fjac[ij] * wa4[i];
468 ij += 1; /* fjac[i+m*j] */
469 }
470 temp = -sum / temp3;
471 ij = jj;
472 for (i=j; i<m; i++)
473 {
474 wa4[i] += fjac[ij] * temp;
475 ij += 1; /* fjac[i+m*j] */
476 }
477 }
478 fjac[jj] = wa1[j];
479 jj += m+1; /* fjac[j+m*j] */
480 qtf[j] = wa4[j];
481 }
482
483 /* compute the norm of the scaled gradient. */
484 gnorm = zero;
485 if (fnorm != zero)
486 {
487 jj = 0;
488 for (j=0; j<n; j++)
489 {
490 l = ipvt[j];
491 if (wa2[l] != zero)
492 {
493 sum = zero;
494 ij = jj;
495 for (i=0; i<=j; i++)
496 {
497 sum += fjac[ij]*(qtf[i]/fnorm);
498 ij += 1; /* fjac[i+m*j] */
499 }
500 gnorm = dmax1(gnorm,fabs(sum/wa2[l]));
501 }
502 jj += m;
503 }
504 }
505
506 /* test for convergence of the gradient norm. */
507 if (gnorm <= gtol)
508 *info = 4;
509 if (*info != 0)
510 goto L300;
511
512 //for (j = 0; j < n; j++)
513 // printf("diag[%d] = %e, wa2[%d] = %e\n", j, diag[j], j, wa2[j]);
514 /* rescale if necessary. */
515 if (mode != 2)
516 {
517 for (j=0; j<n; j++)
518 diag[j] = dmax1(diag[j],wa2[j]);
519 }
520
521 /* beginning of the inner loop. */
522 L200:
523
524 /* determine the levenberg-marquardt parameter. */
525 lmpar(n,fjac,ldfjac,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4);
526 /* store the direction p and x + p. calculate the norm of p. */
527 for (j=0; j<n; j++)
528 {
529 wa1[j] = -wa1[j];
530 wa2[j] = x[j] + wa1[j];
531 wa3[j] = diag[j]*wa1[j];
532 //printf("wa2[%d] = %e + %e = %e\n", j, x[j], wa1[j], wa2[j]);
533 }
534 pnorm = enorm(n,wa3);
535 /* on the first iteration, adjust the initial step bound. */
536 if (iter == 1)
537 delta = dmin1(delta,pnorm);
538
539 /* evaluate the function at x + p and calculate its norm. */
540 iflag = 1;
541 //printf("evaluate at:\n");
542 //for (j=0; j<n; j++)
543 // printf("wa2[%d] = %e\n", j, wa2[j]);
544 fcn(m,n,wa2,wa4,&iflag, data);
545 *nfev += 1;
546 if (iflag < 0)
547 goto L300;
548
549 fnorm1 = enorm(m,wa4);
550
551 #ifdef BUG
552 printf( "pnorm %.10e fnorm1 %.10e\n", pnorm, fnorm1 );
553 #endif
554
555 /* compute the scaled actual reduction. */
556 actred = -one;
557 if ((p1*fnorm1) < fnorm)
558 {
559 temp = fnorm1/fnorm;
560 actred = one - temp * temp;
561 }
562 /* compute the scaled predicted reduction and
563 * the scaled directional derivative.
564 */
565 jj = 0;
566 for (j=0; j<n; j++)
567 {
568 wa3[j] = zero;
569 l = ipvt[j];
570 temp = wa1[l];
571 ij = jj;
572 for (i=0; i<=j; i++)
573 {
574 wa3[i] += fjac[ij]*temp;
575 ij += 1; /* fjac[i+m*j] */
576 }
577 jj += m;
578 }
579 temp1 = enorm(n,wa3)/fnorm;
580 temp2 = (sqrt(par)*pnorm)/fnorm;
581 prered = temp1*temp1 + (temp2*temp2)/p5;
582 dirder = -(temp1*temp1 + temp2*temp2);
583 /* compute the ratio of the actual to the predicted reduction. */
584 ratio = zero;
585 if (prered != zero)
586 ratio = actred/prered;
587 /* update the step bound. */
588 if (ratio <= p25)
589 {
590 if (actred >= zero)
591 temp = p5;
592 else
593 temp = p5*dirder/(dirder + p5*actred);
594 if (((p1*fnorm1) >= fnorm) || (temp < p1))
595 temp = p1;
596
597 delta = temp*dmin1(delta,pnorm/p1);
598 par = par/temp;
599 }
600 else
601 {
602 if ((par == zero) || (ratio >= p75))
603 {
604 delta = pnorm/p5;
605 par = p5*par;
606 }
607 }
608 /* test for successful iteration. */
609 if (ratio >= p0001)
610 {
611 /* successful iteration. update x, fvec, and their norms. */
612 for (j=0; j<n; j++)
613 {
614 x[j] = wa2[j];
615 wa2[j] = diag[j]*x[j];
616 }
617 for (i=0; i<m; i++)
618 fvec[i] = wa4[i];
619
620 xnorm = enorm(n,wa2);
621 fnorm = fnorm1;
622 iter += 1;
623 }
624 /* tests for convergence. */
625 if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one))
626 {
627 *info = 1;
628 }
629
630 if (delta <= xtol*xnorm)
631 *info = 2;
632
633 if ((fabs(actred) <= ftol) && (prered <= ftol)
634 && (p5*ratio <= one) && (*info == 2))
635 {
636 *info = 3;
637 }
638
639 if (*info != 0)
640 goto L300;
641
642 /* tests for termination and stringent tolerances. */
643 if (*nfev >= maxfev)
644 *info = 5;
645
646 if ((fabs(actred) <= MACHEP) && (prered <= MACHEP) && (p5*ratio <= one))
647 {
648 *info = 6;
649 }
650
651 if (delta <= MACHEP*xnorm)
652 *info = 7;
653
654 if (gnorm <= MACHEP)
655 *info = 8;
656
657 if (*info != 0)
658 goto L300;
659
660 /* end of the inner loop. repeat if iteration unsuccessful. */
661 if (ratio < p0001)
662 goto L200;
663
664 /* end of the outer loop. */
665 goto L30;
666
667 L300:
668
669 /* termination, either normal or user imposed. */
670 if (iflag < 0)
671 *info = iflag;
672
673 iflag = 0;
674 if (nprint > 0)
675 fcn(m,n,x,fvec,&iflag, data);
676 }
677
678
679 /* **********
680 *
681 * subroutine lmpar
682 *
683 * given an m by n matrix a, an n by n nonsingular diagonal
684 * matrix d, an m-vector b, and a positive number delta,
685 * the problem is to determine a value for the parameter
686 * par such that if x solves the system
687 *
688 * a*x = b , sqrt(par)*d*x = 0 ,
689 *
690 * in the least squares sense, and dxnorm is the euclidean
691 * norm of d*x, then either par is zero and
692 *
693 * (dxnorm-delta) .le. 0.1*delta ,
694 *
695 * or par is positive and
696 *
697 * abs(dxnorm-delta) .le. 0.1*delta .
698 *
699 * this subroutine completes the solution of the problem
700 * if it is provided with the necessary information from the
701 * qr factorization, with column pivoting, of a. that is, if
702 * a*p = q*r, where p is a permutation matrix, q has orthogonal
703 * columns, and r is an upper triangular matrix with diagonal
704 * elements of nonincreasing magnitude, then lmpar expects
705 * the full upper triangle of r, the permutation matrix p,
706 * and the first n components of (q transpose)*b. on output
707 * lmpar also provides an upper triangular matrix s such that
708 *
709 * t t t
710 * p *(a *a + par*d*d)*p = s *s .
711 *
712 * s is employed within lmpar and may be of separate interest.
713 *
714 * only a few iterations are generally needed for convergence
715 * of the algorithm. if, however, the limit of 10 iterations
716 * is reached, then the output par will contain the best
717 * value obtained so far.
718 *
719 * the subroutine statement is
720 *
721 * subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag,
722 * wa1,wa2)
723 *
724 * where
725 *
726 * n is a positive integer input variable set to the order of r.
727 *
728 * r is an n by n array. on input the full upper triangle
729 * must contain the full upper triangle of the matrix r.
730 * on output the full upper triangle is unaltered, and the
731 * strict lower triangle contains the strict upper triangle
732 * (transposed) of the upper triangular matrix s.
733 *
734 * ldr is a positive integer input variable not less than n
735 * which specifies the leading dimension of the array r.
736 *
737 * ipvt is an integer input array of length n which defines the
738 * permutation matrix p such that a*p = q*r. column j of p
739 * is column ipvt(j) of the identity matrix.
740 *
741 * diag is an input array of length n which must contain the
742 * diagonal elements of the matrix d.
743 *
744 * qtb is an input array of length n which must contain the first
745 * n elements of the vector (q transpose)*b.
746 *
747 * delta is a positive input variable which specifies an upper
748 * bound on the euclidean norm of d*x.
749 *
750 * par is a nonnegative variable. on input par contains an
751 * initial estimate of the levenberg-marquardt parameter.
752 * on output par contains the final estimate.
753 *
754 * x is an output array of length n which contains the least
755 * squares solution of the system a*x = b, sqrt(par)*d*x = 0,
756 * for the output par.
757 *
758 * sdiag is an output array of length n which contains the
759 * diagonal elements of the upper triangular matrix s.
760 *
761 * wa1 and wa2 are work arrays of length n.
762 *
763 * subprograms called
764 *
765 * minpack-supplied ... dpmpar,enorm,qrsolv
766 *
767 * fortran-supplied ... dabs,dmax1,dmin1,dsqrt
768 *
769 * argonne national laboratory. minpack project. march 1980.
770 * burton s. garbow, kenneth e. hillstrom, jorge j. more
771 *
772 * **********
773 */
lmpar(int n,double r[],int ldr,int ipvt[],double diag[],double qtb[],double delta,double * par,double x[],double sdiag[],double wa1[],double wa2[])774 void lmpar(
775 int n,
776 double r[],
777 int ldr,
778 int ipvt[],
779 double diag[],
780 double qtb[],
781 double delta,
782 double *par,
783 double x[],
784 double sdiag[],
785 double wa1[],
786 double wa2[])
787 {
788 int i;
789 int iter;
790 int ij;
791 int jj;
792 int j;
793 int jm1;
794 int jp1;
795 int k;
796 int l;
797 int nsing;
798 double dxnorm;
799 double fp;
800 double gnorm;
801 double parc;
802 double parl;
803 double paru;
804 double sum;
805 double temp;
806 static double zero = 0.0;
807 //static double one = 1.0;
808 static double p1 = 0.1;
809 static double p001 = 0.001;
810
811 #ifdef BUG
812 printf( "lmpar\n" );
813 #endif
814
815 /* compute and store in x the gauss-newton direction. if the
816 * jacobian is rank-deficient, obtain a least squares solution.
817 */
818 nsing = n;
819 jj = 0;
820 for (j=0; j<n; j++)
821 {
822 wa1[j] = qtb[j];
823 if ((r[jj] == zero) && (nsing == n))
824 nsing = j;
825
826 if (nsing < n)
827 wa1[j] = zero;
828
829 jj += ldr+1; /* [j+ldr*j] */
830 }
831
832 #ifdef BUG
833 printf( "nsing %d ", nsing );
834 #endif
835
836 if (nsing >= 1)
837 {
838 for (k=0; k<nsing; k++)
839 {
840 j = nsing - k - 1;
841 wa1[j] = wa1[j]/r[j+ldr*j];
842 temp = wa1[j];
843 jm1 = j - 1;
844 if (jm1 >= 0)
845 {
846 ij = ldr * j;
847 for (i=0; i<=jm1; i++)
848 {
849 wa1[i] -= r[ij]*temp;
850 ij += 1;
851 }
852 }
853 }
854 }
855
856 for (j=0; j<n; j++)
857 {
858 l = ipvt[j];
859 x[l] = wa1[j];
860 }
861 /* initialize the iteration counter.
862 * evaluate the function at the origin, and test
863 * for acceptance of the gauss-newton direction.
864 */
865 iter = 0;
866 for (j=0; j<n; j++)
867 wa2[j] = diag[j]*x[j];
868
869 dxnorm = enorm(n,wa2);
870 fp = dxnorm - delta;
871 if (fp <= p1*delta)
872 {
873 #ifdef BUG
874 printf( "going to L220\n" );
875 #endif
876
877 goto L220;
878 }
879
880 /* if the jacobian is not rank deficient, the newton
881 * step provides a lower bound, parl, for the zero of
882 * the function. otherwise set this bound to zero.
883 */
884 parl = zero;
885 if (nsing >= n)
886 {
887 for (j=0; j<n; j++)
888 {
889 l = ipvt[j];
890 wa1[j] = diag[l]*(wa2[l]/dxnorm);
891 }
892 jj = 0;
893 for (j=0; j<n; j++)
894 {
895 sum = zero;
896 jm1 = j - 1;
897 if (jm1 >= 0)
898 {
899 ij = jj;
900 for (i=0; i<=jm1; i++)
901 {
902 sum += r[ij]*wa1[i];
903 ij += 1;
904 }
905 }
906 wa1[j] = (wa1[j] - sum)/r[j+ldr*j];
907 jj += ldr; /* [i+ldr*j] */
908 }
909 temp = enorm(n,wa1);
910 parl = ((fp/delta)/temp)/temp;
911 }
912 /* calculate an upper bound, paru, for the zero of the function. */
913 jj = 0;
914 for (j=0; j<n; j++)
915 {
916 sum = zero;
917 ij = jj;
918 for (i=0; i<=j; i++)
919 {
920 sum += r[ij]*qtb[i];
921 ij += 1;
922 }
923 l = ipvt[j];
924 wa1[j] = sum/diag[l];
925 jj += ldr; /* [i+ldr*j] */
926 }
927 gnorm = enorm(n,wa1);
928 paru = gnorm/delta;
929 if(paru == zero)
930 paru = DWARF/dmin1(delta,p1);
931
932 /* if the input par lies outside of the interval (parl,paru),
933 * set par to the closer endpoint.
934 */
935 *par = dmax1(*par,parl);
936 *par = dmin1(*par,paru);
937 if (*par == zero)
938 *par = gnorm/dxnorm;
939
940 #ifdef BUG
941 printf( "parl %.4e par %.4e paru %.4e\n", parl, *par, paru );
942 #endif
943
944 /* beginning of an iteration. */
945
946 L150:
947
948 iter += 1;
949 /* evaluate the function at the current value of par. */
950 if (*par == zero)
951 *par = dmax1(DWARF,p001*paru);
952
953 temp = sqrt(*par);
954 for (j=0; j<n; j++)
955 wa1[j] = temp*diag[j];
956
957 qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2);
958 for (j=0; j<n; j++)
959 wa2[j] = diag[j]*x[j];
960
961 dxnorm = enorm(n,wa2);
962 temp = fp;
963 fp = dxnorm - delta;
964 /* if the function is small enough, accept the current value
965 * of par. also test for the exceptional cases where parl
966 * is zero or the number of iterations has reached 10.
967 */
968 if ((fabs(fp) <= p1*delta)
969 || ((parl == zero) && (fp <= temp) && (temp < zero))
970 || (iter == 10))
971 {
972 goto L220;
973 }
974 /* compute the newton correction. */
975 for (j=0; j<n; j++)
976 {
977 l = ipvt[j];
978 wa1[j] = diag[l]*(wa2[l]/dxnorm);
979 }
980 jj = 0;
981 for (j=0; j<n; j++)
982 {
983 wa1[j] = wa1[j]/sdiag[j];
984 temp = wa1[j];
985 jp1 = j + 1;
986 if (jp1 < n)
987 {
988 ij = jp1 + jj;
989 for (i=jp1; i<n; i++)
990 {
991 wa1[i] -= r[ij]*temp;
992 ij += 1; /* [i+ldr*j] */
993 }
994 }
995 jj += ldr; /* ldr*j */
996 }
997 temp = enorm(n,wa1);
998 parc = ((fp/delta)/temp)/temp;
999
1000 /* depending on the sign of the function, update parl or paru. */
1001 if (fp > zero)
1002 parl = dmax1(parl, *par);
1003
1004 if (fp < zero)
1005 paru = dmin1(paru, *par);
1006
1007 /* compute an improved estimate for par. */
1008 *par = dmax1(parl, *par + parc);
1009
1010 /* end of an iteration. */
1011
1012 goto L150;
1013
1014 L220:
1015
1016 /* termination. */
1017
1018 if (iter == 0)
1019 *par = zero;
1020 }
1021
1022
1023 /*
1024 * **********
1025 *
1026 * subroutine qrfac
1027 *
1028 * this subroutine uses householder transformations with column
1029 * pivoting (optional) to compute a qr factorization of the
1030 * m by n matrix a. that is, qrfac determines an orthogonal
1031 * matrix q, a permutation matrix p, and an upper trapezoidal
1032 * matrix r with diagonal elements of nonincreasing magnitude,
1033 * such that a*p = q*r. the householder transformation for
1034 * column k, k = 1,2,...,min(m,n), is of the form
1035 *
1036 * t
1037 * i - (1/u(k))*u*u
1038 *
1039 * where u has zeros in the first k-1 positions. the form of
1040 * this transformation and the method of pivoting first
1041 * appeared in the corresponding linpack subroutine.
1042 *
1043 * the subroutine statement is
1044 *
1045 * subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)
1046 *
1047 * where
1048 *
1049 * m is a positive integer input variable set to the number
1050 * of rows of a.
1051 *
1052 * n is a positive integer input variable set to the number
1053 * of columns of a.
1054 *
1055 * a is an m by n array. on input a contains the matrix for
1056 * which the qr factorization is to be computed. on output
1057 * the strict upper trapezoidal part of a contains the strict
1058 * upper trapezoidal part of r, and the lower trapezoidal
1059 * part of a contains a factored form of q (the non-trivial
1060 * elements of the u vectors described above).
1061 *
1062 * lda is a positive integer input variable not less than m
1063 * which specifies the leading dimension of the array a.
1064 *
1065 * pivot is a logical input variable. if pivot is set true,
1066 * then column pivoting is enforced. if pivot is set false,
1067 * then no column pivoting is done.
1068 *
1069 * ipvt is an integer output array of length lipvt. ipvt
1070 * defines the permutation matrix p such that a*p = q*r.
1071 * column j of p is column ipvt(j) of the identity matrix.
1072 * if pivot is false, ipvt is not referenced.
1073 *
1074 * lipvt is a positive integer input variable. if pivot is false,
1075 * then lipvt may be as small as 1. if pivot is true, then
1076 * lipvt must be at least n.
1077 *
1078 * rdiag is an output array of length n which contains the
1079 * diagonal elements of r.
1080 *
1081 * acnorm is an output array of length n which contains the
1082 * norms of the corresponding columns of the input matrix a.
1083 * if this information is not needed, then acnorm can coincide
1084 * with rdiag.
1085 *
1086 * wa is a work array of length n. if pivot is false, then wa
1087 * can coincide with rdiag.
1088 *
1089 * subprograms called
1090 *
1091 * minpack-supplied ... dpmpar,enorm
1092 *
1093 * fortran-supplied ... dmax1,dsqrt,min0
1094 *
1095 * argonne national laboratory. minpack project. march 1980.
1096 * burton s. garbow, kenneth e. hillstrom, jorge j. more
1097 *
1098 * **********
1099 */
qrfac(int m,int n,double a[],int lda,int pivot,int ipvt[],int lipvt,double rdiag[],double acnorm[],double wa[])1100 void qrfac(int m,
1101 int n,
1102 double a[],
1103 int lda,
1104 int pivot,
1105 int ipvt[],
1106 int lipvt,
1107 double rdiag[],
1108 double acnorm[],
1109 double wa[])
1110 {
1111 int i;
1112 int ij;
1113 int jj;
1114 int j;
1115 int jp1;
1116 int k;
1117 int kmax;
1118 int minmn;
1119 double ajnorm;
1120 double sum;
1121 double temp;
1122 static double zero = 0.0;
1123 static double one = 1.0;
1124 static double p05 = 0.05;
1125
1126 /* compute the initial column norms and initialize several arrays. */
1127 //printf("\nqrfac\n");
1128 ij = 0;
1129 for (j=0; j<n; j++)
1130 {
1131 acnorm[j] = enorm(m,&a[ij]);
1132 rdiag[j] = acnorm[j];
1133 wa[j] = rdiag[j];
1134 if (pivot != 0)
1135 ipvt[j] = j;
1136
1137 ij += m; /* m*j */
1138 // printf("acnorm[%d] = %e\n", j, acnorm[j]);
1139 // printf("rdiag[%d] = %e\n", j, rdiag[j]);
1140 }
1141
1142 #ifdef BUG
1143 printf( "qrfac\n" );
1144 #endif
1145
1146 /* reduce a to r with householder transformations. */
1147
1148 minmn = min0(m,n);
1149 for (j=0; j<minmn; j++)
1150 {
1151 if (pivot == 0)
1152 goto L40;
1153
1154 /* bring the column of largest norm into the pivot position. */
1155 kmax = j;
1156 for (k=j; k<n; k++)
1157 {
1158 if (rdiag[k] > rdiag[kmax])
1159 kmax = k;
1160 }
1161 if (kmax == j)
1162 goto L40;
1163
1164 ij = m * j;
1165 jj = m * kmax;
1166 for (i=0; i<m; i++)
1167 {
1168 temp = a[ij]; /* [i+m*j] */
1169 a[ij] = a[jj]; /* [i+m*kmax] */
1170 a[jj] = temp;
1171 ij += 1;
1172 jj += 1;
1173 }
1174 rdiag[kmax] = rdiag[j];
1175 wa[kmax] = wa[j];
1176 k = ipvt[j];
1177 ipvt[j] = ipvt[kmax];
1178 ipvt[kmax] = k;
1179
1180 L40:
1181
1182 /* compute the householder transformation to reduce the
1183 * j-th column of a to a multiple of the j-th unit vector.
1184 */
1185 jj = j + m*j;
1186 ajnorm = enorm(m-j,&a[jj]);
1187 if (ajnorm == zero)
1188 goto L100;
1189
1190 if (a[jj] < zero)
1191 ajnorm = -ajnorm;
1192
1193 ij = jj;
1194 for (i=j; i<m; i++)
1195 {
1196 a[ij] /= ajnorm;
1197 ij += 1; /* [i+m*j] */
1198 }
1199 a[jj] += one;
1200
1201 /* apply the transformation to the remaining columns
1202 * and update the norms.
1203 */
1204 jp1 = j + 1;
1205 if (jp1 < n)
1206 {
1207 for (k=jp1; k<n; k++)
1208 {
1209 sum = zero;
1210 ij = j + m*k;
1211 jj = j + m*j;
1212 for (i=j; i<m; i++)
1213 {
1214 sum += a[jj]*a[ij];
1215 ij += 1; /* [i+m*k] */
1216 jj += 1; /* [i+m*j] */
1217 }
1218 temp = sum/a[j+m*j];
1219 ij = j + m*k;
1220 jj = j + m*j;
1221 for (i=j; i<m; i++)
1222 {
1223 a[ij] -= temp*a[jj];
1224 ij += 1; /* [i+m*k] */
1225 jj += 1; /* [i+m*j] */
1226 }
1227 if ((pivot != 0) && (rdiag[k] != zero))
1228 {
1229 temp = a[j+m*k]/rdiag[k];
1230 temp = dmax1(zero, one-temp*temp);
1231 rdiag[k] *= sqrt(temp);
1232 temp = rdiag[k]/wa[k];
1233 if ((p05*temp*temp) <= MACHEP)
1234 {
1235 rdiag[k] = enorm(m-j-1,&a[jp1+m*k]);
1236 wa[k] = rdiag[k];
1237 }
1238 }
1239 }
1240 }
1241
1242 L100:
1243
1244 rdiag[j] = -ajnorm;
1245 }
1246 }
1247
1248
1249 /*
1250 * **********
1251 *
1252 * subroutine qrsolv
1253 *
1254 * given an m by n matrix a, an n by n diagonal matrix d,
1255 * and an m-vector b, the problem is to determine an x which
1256 * solves the system
1257 *
1258 * a*x = b , d*x = 0 ,
1259 *
1260 * in the least squares sense.
1261 *
1262 * this subroutine completes the solution of the problem
1263 * if it is provided with the necessary information from the
1264 * qr factorization, with column pivoting, of a. that is, if
1265 * a*p = q*r, where p is a permutation matrix, q has orthogonal
1266 * columns, and r is an upper triangular matrix with diagonal
1267 * elements of nonincreasing magnitude, then qrsolv expects
1268 * the full upper triangle of r, the permutation matrix p,
1269 * and the first n components of (q transpose)*b. the system
1270 * a*x = b, d*x = 0, is then equivalent to
1271 *
1272 * t t
1273 * r*z = q *b , p *d*p*z = 0 ,
1274 *
1275 * where x = p*z. if this system does not have full rank,
1276 * then a least squares solution is obtained. on output qrsolv
1277 * also provides an upper triangular matrix s such that
1278 *
1279 * t t t
1280 * p *(a *a + d*d)*p = s *s .
1281 *
1282 * s is computed within qrsolv and may be of separate interest.
1283 *
1284 * the subroutine statement is
1285 *
1286 * subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa)
1287 *
1288 * where
1289 *
1290 * n is a positive integer input variable set to the order of r.
1291 *
1292 * r is an n by n array. on input the full upper triangle
1293 * must contain the full upper triangle of the matrix r.
1294 * on output the full upper triangle is unaltered, and the
1295 * strict lower triangle contains the strict upper triangle
1296 * (transposed) of the upper triangular matrix s.
1297 *
1298 * ldr is a positive integer input variable not less than n
1299 * which specifies the leading dimension of the array r.
1300 *
1301 * ipvt is an integer input array of length n which defines the
1302 * permutation matrix p such that a*p = q*r. column j of p
1303 * is column ipvt(j) of the identity matrix.
1304 *
1305 * diag is an input array of length n which must contain the
1306 * diagonal elements of the matrix d.
1307 *
1308 * qtb is an input array of length n which must contain the first
1309 * n elements of the vector (q transpose)*b.
1310 *
1311 * x is an output array of length n which contains the least
1312 * squares solution of the system a*x = b, d*x = 0.
1313 *
1314 * sdiag is an output array of length n which contains the
1315 * diagonal elements of the upper triangular matrix s.
1316 *
1317 * wa is a work array of length n.
1318 *
1319 * subprograms called
1320 *
1321 * fortran-supplied ... dabs,dsqrt
1322 *
1323 * argonne national laboratory. minpack project. march 1980.
1324 * burton s. garbow, kenneth e. hillstrom, jorge j. more
1325 *
1326 * **********
1327 */
1328
qrsolv(int n,double r[],int ldr,int ipvt[],double diag[],double qtb[],double x[],double sdiag[],double wa[])1329 void qrsolv(int n,
1330 double r[],
1331 int ldr,
1332 int ipvt[],
1333 double diag[],
1334 double qtb[],
1335 double x[],
1336 double sdiag[],
1337 double wa[])
1338 {
1339 int i;
1340 int ij;
1341 int ik;
1342 int kk;
1343 int j;
1344 int jp1;
1345 int k;
1346 int kp1;
1347 int l;
1348 int nsing;
1349 double cos;
1350 double cotan;
1351 double qtbpj;
1352 double sin;
1353 double sum;
1354 double tan;
1355 double temp;
1356 static double zero = 0.0;
1357 static double p25 = 0.25;
1358 static double p5 = 0.5;
1359 //double fabs(), sqrt();
1360
1361 /* copy r and (q transpose)*b to preserve input and initialize s.
1362 * in particular, save the diagonal elements of r in x.
1363 */
1364 kk = 0;
1365 for (j=0; j<n; j++)
1366 {
1367 ij = kk;
1368 ik = kk;
1369 for (i=j; i<n; i++)
1370 {
1371 r[ij] = r[ik];
1372 ij += 1; /* [i+ldr*j] */
1373 ik += ldr; /* [j+ldr*i] */
1374 }
1375 x[j] = r[kk];
1376 wa[j] = qtb[j];
1377 kk += ldr+1; /* j+ldr*j */
1378 }
1379 #ifdef BUG
1380 printf( "qrsolv\n" );
1381 #endif
1382 /* eliminate the diagonal matrix d using a givens rotation. */
1383 for (j=0; j<n; j++)
1384 {
1385 /* prepare the row of d to be eliminated, locating the
1386 * diagonal element using p from the qr factorization.
1387 */
1388 l = ipvt[j];
1389 if (diag[l] == zero)
1390 goto L90;
1391
1392 for (k=j; k<n; k++)
1393 sdiag[k] = zero;
1394
1395 sdiag[j] = diag[l];
1396
1397 /* the transformations to eliminate the row of d
1398 * modify only a single element of (q transpose)*b
1399 * beyond the first n, which is initially zero.
1400 */
1401 qtbpj = zero;
1402 for (k=j; k<n; k++)
1403 {
1404 /* determine a givens rotation which eliminates the
1405 * appropriate element in the current row of d.
1406 */
1407 if (sdiag[k] == zero)
1408 continue;
1409
1410 kk = k + ldr * k;
1411 if (fabs(r[kk]) < fabs(sdiag[k]))
1412 {
1413 cotan = r[kk]/sdiag[k];
1414 sin = p5/sqrt(p25+p25*cotan*cotan);
1415 cos = sin*cotan;
1416 }
1417 else
1418 {
1419 tan = sdiag[k]/r[kk];
1420 cos = p5/sqrt(p25+p25*tan*tan);
1421 sin = cos*tan;
1422 }
1423 /* compute the modified diagonal element of r and
1424 * the modified element of ((q transpose)*b,0).
1425 */
1426 r[kk] = cos*r[kk] + sin*sdiag[k];
1427 temp = cos*wa[k] + sin*qtbpj;
1428 qtbpj = -sin*wa[k] + cos*qtbpj;
1429 wa[k] = temp;
1430 /* accumulate the tranformation in the row of s. */
1431 kp1 = k + 1;
1432 if (n > kp1)
1433 {
1434 ik = kk + 1;
1435 for (i=kp1; i<n; i++)
1436 {
1437 temp = cos*r[ik] + sin*sdiag[i];
1438 sdiag[i] = -sin*r[ik] + cos*sdiag[i];
1439 r[ik] = temp;
1440 ik += 1; /* [i+ldr*k] */
1441 }
1442 }
1443 }
1444
1445 L90:
1446 /* store the diagonal element of s and restore
1447 * the corresponding diagonal element of r.
1448 */
1449 kk = j + ldr*j;
1450 sdiag[j] = r[kk];
1451 r[kk] = x[j];
1452 }
1453 /* solve the triangular system for z. if the system is
1454 * singular, then obtain a least squares solution.
1455 */
1456 nsing = n;
1457 for (j=0; j<n; j++)
1458 {
1459 if ((sdiag[j] == zero) && (nsing == n))
1460 nsing = j;
1461 if (nsing < n)
1462 wa[j] = zero;
1463 }
1464 if(nsing < 1)
1465 goto L150;
1466
1467 for (k=0; k<nsing; k++)
1468 {
1469 j = nsing - k - 1;
1470 sum = zero;
1471 jp1 = j + 1;
1472 if (nsing > jp1)
1473 {
1474 ij = jp1 + ldr * j;
1475 for (i=jp1; i<nsing; i++)
1476 {
1477 sum += r[ij]*wa[i];
1478 ij += 1; /* [i+ldr*j] */
1479 }
1480 }
1481 wa[j] = (wa[j] - sum)/sdiag[j];
1482 }
1483
1484 L150:
1485 /* permute the components of z back to components of x. */
1486 for (j=0; j<n; j++)
1487 {
1488 l = ipvt[j];
1489 x[l] = wa[j];
1490 }
1491 }
1492
1493
1494 /************************enorm.c*************************/
1495 /*
1496 * **********
1497 *
1498 * function enorm
1499 *
1500 * given an n-vector x, this function calculates the
1501 * euclidean norm of x.
1502 *
1503 * the euclidean norm is computed by accumulating the sum of
1504 * squares in three different sums. the sums of squares for the
1505 * small and large components are scaled so that no overflows
1506 * occur. non-destructive underflows are permitted. underflows
1507 * and overflows do not occur in the computation of the unscaled
1508 * sum of squares for the intermediate components.
1509 * the definitions of small, intermediate and large components
1510 * depend on two constants, rdwarf and rgiant. the main
1511 * restrictions on these constants are that rdwarf**2 not
1512 * underflow and rgiant**2 not overflow. the constants
1513 * given here are suitable for every known computer.
1514 *
1515 * the function statement is
1516 *
1517 * double precision function enorm(n,x)
1518 *
1519 * where
1520 *
1521 * n is a positive integer input variable.
1522 *
1523 * x is an input array of length n.
1524 *
1525 * subprograms called
1526 *
1527 * fortran-supplied ... dabs,dsqrt
1528 *
1529 * argonne national laboratory. minpack project. march 1980.
1530 * burton s. garbow, kenneth e. hillstrom, jorge j. more
1531 *
1532 * **********
1533 */
enorm(int n,double x[])1534 double enorm(int n, double x[])
1535 {
1536 int i;
1537 double agiant;
1538 double floatn;
1539 double s1;
1540 double s2;
1541 double s3;
1542 double xabs;
1543 double x1max;
1544 double x3max;
1545 double ans;
1546 double temp;
1547 static double rdwarf = 3.834e-20;
1548 static double rgiant = 1.304e19;
1549 static double zero = 0.0;
1550 static double one = 1.0;
1551 //double fabs(), sqrt();
1552
1553 s1 = zero;
1554 s2 = zero;
1555 s3 = zero;
1556 x1max = zero;
1557 x3max = zero;
1558 floatn = n;
1559 agiant = rgiant/floatn;
1560
1561 for (i=0; i<n; i++)
1562 {
1563 xabs = fabs(x[i]);
1564 if ((xabs > rdwarf) && (xabs < agiant))
1565 {
1566 /* sum for intermediate components. */
1567 s2 += xabs*xabs;
1568 continue;
1569 }
1570
1571 if (xabs > rdwarf)
1572 {
1573 /* sum for large components. */
1574 if (xabs > x1max)
1575 {
1576 temp = x1max/xabs;
1577 s1 = one + s1*temp*temp;
1578 x1max = xabs;
1579 }
1580 else
1581 {
1582 temp = xabs/x1max;
1583 s1 += temp*temp;
1584 }
1585 continue;
1586 }
1587 /* sum for small components. */
1588 if (xabs > x3max)
1589 {
1590 temp = x3max/xabs;
1591 s3 = one + s3*temp*temp;
1592 x3max = xabs;
1593 }
1594 else
1595 {
1596 if (xabs != zero)
1597 {
1598 temp = xabs/x3max;
1599 s3 += temp*temp;
1600 }
1601 }
1602 }
1603 /* calculation of norm. */
1604 if(s1 != zero)
1605 {
1606 temp = s1 + (s2/x1max)/x1max;
1607 ans = x1max*sqrt(temp);
1608 return ans;
1609 }
1610 if (s2 != zero)
1611 {
1612 if (s2 >= x3max)
1613 temp = s2*(one+(x3max/s2)*(x3max*s3));
1614 else
1615 temp = x3max*((s2/x3max)+(x3max*s3));
1616 ans = sqrt(temp);
1617 }
1618 else
1619 {
1620 ans = x3max*sqrt(s3);
1621 }
1622 return ans;
1623 }
1624
1625
1626 /*
1627 * **********
1628 *
1629 * subroutine fdjac2
1630 *
1631 * this subroutine computes a forward-difference approximation
1632 * to the m by n jacobian matrix associated with a specified
1633 * problem of m functions in n variables.
1634 *
1635 * the subroutine statement is
1636 *
1637 * subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa)
1638 *
1639 * where
1640 *
1641 * fcn is the name of the user-supplied subroutine which
1642 * calculates the functions. fcn must be declared
1643 * in an external statement in the user calling
1644 * program, and should be written as follows.
1645 *
1646 * subroutine fcn(m,n,x,fvec,iflag)
1647 * integer m,n,iflag
1648 * double precision x(n),fvec(m)
1649 * ----------
1650 * calculate the functions at x and
1651 * return this vector in fvec.
1652 * ----------
1653 * return
1654 * end
1655 *
1656 * the value of iflag should not be changed by fcn unless
1657 * the user wants to terminate execution of fdjac2.
1658 * in this case set iflag to a negative integer.
1659 *
1660 * m is a positive integer input variable set to the number
1661 * of functions.
1662 *
1663 * n is a positive integer input variable set to the number
1664 * of variables. n must not exceed m.
1665 *
1666 * x is an input array of length n.
1667 *
1668 * fvec is an input array of length m which must contain the
1669 * functions evaluated at x.
1670 *
1671 * fjac is an output m by n array which contains the
1672 * approximation to the jacobian matrix evaluated at x.
1673 *
1674 * ldfjac is a positive integer input variable not less than m
1675 * which specifies the leading dimension of the array fjac.
1676 *
1677 * iflag is an integer variable which can be used to terminate
1678 * the execution of fdjac2. see description of fcn.
1679 *
1680 * epsfcn is an input variable used in determining a suitable
1681 * step length for the forward-difference approximation. this
1682 * approximation assumes that the relative errors in the
1683 * functions are of the order of epsfcn. if epsfcn is less
1684 * than the machine precision, it is assumed that the relative
1685 * errors in the functions are of the order of the machine
1686 * precision.
1687 *
1688 * wa is a work array of length m.
1689 *
1690 * subprograms called
1691 *
1692 * user-supplied ...... fcn
1693 *
1694 * minpack-supplied ... dpmpar
1695 *
1696 * fortran-supplied ... dabs,dmax1,dsqrt
1697 *
1698 * argonne national laboratory. minpack project. march 1980.
1699 * burton s. garbow, kenneth e. hillstrom, jorge j. more
1700 *
1701 **********
1702 */
1703 //#define BUG 0
fdjac2(void (* fcn)(int,int,double[],double[],int *,void *),int m,int n,double x[],double fvec[],double fjac[],int ldfjac,int * iflag,double epsfcn,double wa[],void * data)1704 void fdjac2(void (*fcn)(int, int, double[], double[], int *, void *),
1705 int m,
1706 int n,
1707 double x[],
1708 double fvec[],
1709 double fjac[],
1710 int ldfjac,
1711 int *iflag,
1712 double epsfcn,
1713 double wa[],
1714 void *data)
1715 {
1716 int i;
1717 int j;
1718 int ij;
1719 double eps;
1720 double h;
1721 double temp;
1722 //static double zero = 0.0;
1723
1724 //dkb - changed to equal TvdB solver code, use constant delta = 1e-5
1725 // temp = dmax1(epsfcn,MACHEP);
1726 // eps = sqrt(temp);
1727
1728 JACOBIAN = 1;
1729 eps = 1e-5;
1730
1731 #ifdef BUG
1732 printf( "fdjac2\n" );
1733 #endif
1734
1735 ij = 0;
1736 for (j=0; j<n; j++)
1737 {
1738 temp = x[j];
1739 // h = eps * fabs(temp);
1740 // if (h == zero)
1741 // h = eps;
1742 h = eps; // added dkb
1743 x[j] = temp + h;
1744
1745 fcn(m,n,x,wa,iflag, data);
1746 x[j] = temp;
1747 if (*iflag < 0)
1748 return;
1749
1750 for (i=0; i<m; i++)
1751 {
1752 fjac[ij] = (wa[i] - fvec[i])/h;
1753 ij += 1; /* fjac[i+m*j] */
1754 }
1755 }
1756
1757 #ifdef BUG
1758 printf("jacobian:\n");
1759 pmat( m, n, fjac );
1760 for (i = 0; i < m*n; i++)
1761 printf("%6e ", fjac[i]);
1762 #endif
1763 JACOBIAN = 0;
1764 }
1765
1766
1767 /************************lmmisc.c*************************/
1768
dmax1(double a,double b)1769 double dmax1(double a, double b)
1770 {
1771 if (a >= b)
1772 return a;
1773 else
1774 return b;
1775 }
1776
dmin1(double a,double b)1777 double dmin1(double a, double b)
1778 {
1779 if (a <= b)
1780 return a;
1781 else
1782 return b;
1783 }
1784
min0(int a,int b)1785 int min0(int a, int b)
1786 {
1787 if (a <= b)
1788 return a;
1789 else
1790 return b;
1791 }
1792
mod(int k,int m)1793 int mod(int k, int m)
1794 {
1795 return k % m;
1796 }
1797
1798
pmat(int m,int n,double y[])1799 void pmat(int m, int n, double y[])
1800 {
1801 int i;
1802 int j;
1803 int k;
1804
1805 k = 0;
1806 for (i=0; i<m; i++)
1807 {
1808 printf("row %d: ", i);
1809 for (j=0; j<n; j++)
1810 {
1811 // printf( "%.5e ", y[k] );
1812 printf("%.6f ", y[k]);
1813 k += 1;
1814 }
1815 printf( "\n" );
1816 }
1817 }
1818