1 /* -------------------------------------------------------------------------- *
2  *                            OpenSim:  Lmdif.cpp                             *
3  * -------------------------------------------------------------------------- *
4  * The OpenSim API is a toolkit for musculoskeletal modeling and simulation.  *
5  * See http://opensim.stanford.edu and the NOTICE file for more information.  *
6  * OpenSim is developed at Stanford University and supported by the US        *
7  * National Institutes of Health (U54 GM072970, R24 HD065690) and by DARPA    *
8  * through the Warrior Web program.                                           *
9  *                                                                            *
10  * Copyright (c) 2005-2017 Stanford University and the Authors                *
11  *                                                                            *
12  * Licensed under the Apache License, Version 2.0 (the "License"); you may    *
13  * not use this file except in compliance with the License. You may obtain a  *
14  * copy of the License at http://www.apache.org/licenses/LICENSE-2.0.         *
15  *                                                                            *
16  * Unless required by applicable law or agreed to in writing, software        *
17  * distributed under the License is distributed on an "AS IS" BASIS,          *
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
19  * See the License for the specific language governing permissions and        *
20  * limitations under the License.                                             *
21  * -------------------------------------------------------------------------- */
22 
23 /************************lmdif*************************
24  *
25  * Solves or minimizes the sum of squares of m nonlinear
26  * functions of n variables.
27  *
28  * From public domain Fortran version
29  * of Argonne National Laboratories MINPACK
30  *
31  * C translation by Steve Moshier
32  *
33  * Passing C++ and thread-safe by Ned Phipps
34  *
35  *****************************************************/
36 
37 
38 #include <stdio.h>
39 #include <math.h>
40 #include "Lmdif.h"
41 
42 void pmat(int m, int n, double y[]);
43 int mod(int k, int m);
44 int min0(int a, int b);
45 double dmin1(double a, double b);
46 double dmax1(double a, double b);
47 
48 //#define BUG 0
49 int JACOBIAN;
50 
51 void fdjac2(
52   void (*fcn)(int, int, double[], double[], int *, void *),
53   int    m,
54   int    n,
55   double x[],
56   double fvec[],
57   double fjac[],
58   int    ldfjac,
59   int   *iflag,
60   double epsfcn,
61   double wa[],
62   void *data);
63 
64 double enorm(int n, double x[]);
65 
66 void qrsolv(
67   int    n,
68   double r[],
69   int    ldr,
70   int    ipvt[],
71   double diag[],
72   double qtb[],
73   double x[],
74   double sdiag[],
75   double wa[]);
76 
77 void qrfac(
78   int    m,
79   int    n,
80   double a[],
81   int    lda,
82   int    pivot,
83   int    ipvt[],
84   int    lipvt,
85   double rdiag[],
86   double acnorm[],
87   double wa[]);
88 
89 void lmpar(
90   int    n,
91   double r[],
92   int    ldr,
93   int    ipvt[],
94   double diag[],
95   double qtb[],
96   double delta,
97   double *par,
98   double x[],
99   double sdiag[],
100   double wa1[],
101   double wa2[]);
102 
103 
104 
105 #define MACHEP 1.2e-16
106 #define DWARF  1.0e-38
107 
108 
109 
110 /*
111 *     **********
112 *
113 *     subroutine lmdif
114 *
115 *     the purpose of lmdif is to minimize the sum of the squares of
116 *     m nonlinear functions in n variables by a modification of
117 *     the levenberg-marquardt algorithm. the user must provide a
118 *     subroutine which calculates the functions. the jacobian is
119 *     then calculated by a forward-difference approximation.
120 *
121 *     the subroutine statement is
122 *
123 *   subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
124 *            diag,mode,factor,nprint,info,nfev,fjac,
125 *            ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
126 *
127 *     where
128 *
129 *   fcn is the name of the user-supplied subroutine which
130 *     calculates the functions. fcn must be declared
131 *     in an external statement in the user calling
132 *     program, and should be written as follows.
133 *
134 *     subroutine fcn(m,n,x,fvec,iflag)
135 *     integer m,n,iflag
136 *     double precision x(n),fvec(m)
137 *     ----------
138 *     calculate the functions at x and
139 *     return this vector in fvec.
140 *     ----------
141 *     return
142 *     end
143 *
144 *     the value of iflag should not be changed by fcn unless
145 *     the user wants to terminate execution of lmdif.
146 *     in this case set iflag to a negative integer.
147 *
148 *   m is a positive integer input variable set to the number
149 *     of functions.
150 *
151 *   n is a positive integer input variable set to the number
152 *     of variables. n must not exceed m.
153 *
154 *   x is an array of length n. on input x must contain
155 *     an initial estimate of the solution vector. on output x
156 *     contains the final estimate of the solution vector.
157 *
158 *   fvec is an output array of length m which contains
159 *     the functions evaluated at the output x.
160 *
161 *   ftol is a nonnegative input variable. termination
162 *     occurs when both the actual and predicted relative
163 *     reductions in the sum of squares are at most ftol.
164 *     therefore, ftol measures the relative error desired
165 *     in the sum of squares.
166 *
167 *   xtol is a nonnegative input variable. termination
168 *     occurs when the relative error between two consecutive
169 *     iterates is at most xtol. therefore, xtol measures the
170 *     relative error desired in the approximate solution.
171 *
172 *   gtol is a nonnegative input variable. termination
173 *     occurs when the cosine of the angle between fvec and
174 *     any column of the jacobian is at most gtol in absolute
175 *     value. therefore, gtol measures the orthogonality
176 *     desired between the function vector and the columns
177 *     of the jacobian.
178 *
179 *   maxfev is a positive integer input variable. termination
180 *     occurs when the number of calls to fcn is at least
181 *     maxfev by the end of an iteration.
182 *
183 *   epsfcn is an input variable used in determining a suitable
184 *     step length for the forward-difference approximation. this
185 *     approximation assumes that the relative errors in the
186 *     functions are of the order of epsfcn. if epsfcn is less
187 *     than the machine precision, it is assumed that the relative
188 *     errors in the functions are of the order of the machine
189 *     precision.
190 *
191 *   diag is an array of length n. if mode = 1 (see
192 *     below), diag is internally set. if mode = 2, diag
193 *     must contain positive entries that serve as
194 *     multiplicative scale factors for the variables.
195 *
196 *   mode is an integer input variable. if mode = 1, the
197 *     variables will be scaled internally. if mode = 2,
198 *     the scaling is specified by the input diag. other
199 *     values of mode are equivalent to mode = 1.
200 *
201 *   factor is a positive input variable used in determining the
202 *     initial step bound. this bound is set to the product of
203 *     factor and the euclidean norm of diag*x if nonzero, or else
204 *     to factor itself. in most cases factor should lie in the
205 *     interval (.1,100.). 100. is a generally recommended value.
206 *
207 *   nprint is an integer input variable that enables controlled
208 *     printing of iterates if it is positive. in this case,
209 *     fcn is called with iflag = 0 at the beginning of the first
210 *     iteration and every nprint iterations thereafter and
211 *     immediately prior to return, with x and fvec available
212 *     for printing. if nprint is not positive, no special calls
213 *     of fcn with iflag = 0 are made.
214 *
215 *   info is an integer output variable. if the user has
216 *     terminated execution, info is set to the (negative)
217 *     value of iflag. see description of fcn. otherwise,
218 *     info is set as follows.
219 *
220 *     info = 0  improper input parameters.
221 *
222 *     info = 1  both actual and predicted relative reductions
223 *           in the sum of squares are at most ftol.
224 *
225 *     info = 2  relative error between two consecutive iterates
226 *           is at most xtol.
227 *
228 *     info = 3  conditions for info = 1 and info = 2 both hold.
229 *
230 *     info = 4  the cosine of the angle between fvec and any
231 *           column of the jacobian is at most gtol in
232 *           absolute value.
233 *
234 *     info = 5  number of calls to fcn has reached or
235 *           exceeded maxfev.
236 *
237 *     info = 6  ftol is too small. no further reduction in
238 *           the sum of squares is possible.
239 *
240 *     info = 7  xtol is too small. no further improvement in
241 *           the approximate solution x is possible.
242 *
243 *     info = 8  gtol is too small. fvec is orthogonal to the
244 *           columns of the jacobian to machine precision.
245 *
246 *   nfev is an integer output variable set to the number of
247 *     calls to fcn.
248 *
249 *   fjac is an output m by n array. the upper n by n submatrix
250 *     of fjac contains an upper triangular matrix r with
251 *     diagonal elements of nonincreasing magnitude such that
252 *
253 *        t     t       t
254 *       p *(jac *jac)*p = r *r,
255 *
256 *     where p is a permutation matrix and jac is the final
257 *     calculated jacobian. column j of p is column ipvt(j)
258 *     (see below) of the identity matrix. the lower trapezoidal
259 *     part of fjac contains information generated during
260 *     the computation of r.
261 *
262 *   ldfjac is a positive integer input variable not less than m
263 *     which specifies the leading dimension of the array fjac.
264 *
265 *   ipvt is an integer output array of length n. ipvt
266 *     defines a permutation matrix p such that jac*p = q*r,
267 *     where jac is the final calculated jacobian, q is
268 *     orthogonal (not stored), and r is upper triangular
269 *     with diagonal elements of nonincreasing magnitude.
270 *     column j of p is column ipvt(j) of the identity matrix.
271 *
272 *   qtf is an output array of length n which contains
273 *     the first n elements of the vector (q transpose)*fvec.
274 *
275 *   wa1, wa2, and wa3 are work arrays of length n.
276 *
277 *   wa4 is a work array of length m.
278 *
279 *     subprograms called
280 *
281 *   user-supplied ...... fcn
282 *
283 *   minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
284 *
285 *   fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod
286 *
287 *     argonne national laboratory. minpack project. march 1980.
288 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
289 *
290 *     **********
291 */
lmdif_C(void (* fcn)(int,int,double[],double[],int *,void *),int m,int n,double x[],double fvec[],double ftol,double xtol,double gtol,int maxfev,double epsfcn,double diag[],int mode,double factor,int nprint,int * info,int * nfev,double fjac[],int ldfjac,int ipvt[],double qtf[],double wa1[],double wa2[],double wa3[],double wa4[],void * data)292 void lmdif_C(
293   void (*fcn)(int, int, double[], double[], int *, void *),
294   int    m,
295   int    n,
296   double x[],
297   double fvec[],
298   double ftol,
299   double xtol,
300   double gtol,
301   int    maxfev,
302   double epsfcn,
303   double diag[],
304   int    mode,
305   double factor,
306   int    nprint,
307   int   *info,
308   int   *nfev,
309   double fjac[],
310   int    ldfjac,
311   int    ipvt[],
312   double qtf[],
313   double wa1[],
314   double wa2[],
315   double wa3[],
316   double wa4[],
317   void *data)
318 {
319    int i;
320    int iflag;
321    int ij;
322    int jj;
323    int iter;
324    int j;
325    int l;
326    double actred;
327    double delta;
328    double dirder;
329    double fnorm;
330    double fnorm1;
331    double gnorm;
332    double par;
333    double pnorm;
334    double prered;
335    double ratio;
336    double sum;
337    double temp;
338    double temp1;
339    double temp2;
340    double temp3;
341    double xnorm;
342    static double one = 1.0;
343    static double p1 = 0.1;
344    static double p5 = 0.5;
345    static double p25 = 0.25;
346    static double p75 = 0.75;
347    static double p0001 = 1.0e-4;
348    static double zero = 0.0;
349    //static double p05 = 0.05;
350 
351    *info = 0;
352    iflag = 0;
353    *nfev = 0;
354 
355    /*
356     *     check the input parameters for errors.
357     */
358    if ((n <= 0) || (m < n) || (ldfjac < m) || (ftol < zero)
359       || (xtol < zero) || (gtol < zero) || (maxfev <= 0)
360       || (factor <= zero))
361       goto L300;
362 
363    if (mode == 2)
364    {
365       /* scaling by diag[] */
366       for (j=0; j<n; j++)
367       {
368          if (diag[j] <= 0.0)
369             goto L300;
370       }
371    }
372 
373 #ifdef BUG
374    printf( "lmdif\n" );
375 #endif
376 
377    /* evaluate the function at the starting point
378     * and calculate its norm.
379     */
380    iflag = 1;
381    fcn(m,n,x,fvec,&iflag, data);
382    *nfev = 1;
383    if (iflag < 0)
384       goto L300;
385 
386    fnorm = enorm(m,fvec);
387    /* initialize levenberg-marquardt parameter and iteration counter. */
388    par = zero;
389    iter = 1;
390 
391    /* beginning of the outer loop. */
392 L30:
393 
394    /* calculate the jacobian matrix. */
395    iflag = 2;
396    fdjac2(fcn, m,n,x,fvec,fjac,ldfjac,&iflag,epsfcn,wa4, data);
397    // commented out DKB
398    //    *nfev += n;
399    if (iflag < 0)
400       goto L300;
401 
402    /* if requested, call fcn to enable printing of iterates. */
403    if (nprint > 0)
404    {
405       iflag = 0;
406       if (mod(iter-1,nprint) == 0)
407       {
408          fcn(m,n,x,fvec,&iflag, data);
409          if (iflag < 0)
410             goto L300;
411      //    printf( "fnorm %.15e\n", enorm(m,fvec));
412       }
413    }
414    /* compute the qr factorization of the jacobian. */
415    qrfac(m,n,fjac,ldfjac,1,ipvt,n,wa1,wa2,wa3);
416 //   for (j = 0; j < n; j++)
417 //   {
418 //      printf("wa1[%d] = %e\n", j, wa1[j]);
419 //      printf("wa2[%d] = %e\n", j, wa2[j]);
420 //      printf("wa3[%d] = %e\n", j, wa3[j]);
421 //   }
422    /* on the first iteration and if mode is 1, scale according
423     * to the norms of the columns of the initial jacobian.
424     */
425    if (iter == 1)
426    {
427  //     printf("iter = 1, mode = %d\n", mode);
428       if (mode != 2)
429       {
430          for (j=0; j<n; j++)
431          {
432             diag[j] = wa2[j];
433             if (wa2[j] == zero)
434                diag[j] = one;
435          }
436       }
437 
438       /* on the first iteration, calculate the norm of the scaled x
439        * and initialize the step bound delta.
440        */
441       for (j=0; j<n; j++)
442          wa3[j] = diag[j] * x[j];
443 
444       xnorm = enorm(n,wa3);
445       delta = factor*xnorm;
446      // printf("iter1: xnorm = %e, delta = %e\n", xnorm, delta);
447       //dkb
448       if (fabs(delta) <= 1e-4)
449 //      if (delta == zero)
450          delta = factor;
451    }
452 
453    /* form (q transpose)*fvec and store the first n components in qtf. */
454    for (i=0; i<m; i++)
455       wa4[i] = fvec[i];
456 
457    jj = 0;
458    for (j=0; j<n; j++)
459    {
460       temp3 = fjac[jj];
461       if (temp3 != zero)
462       {
463          sum = zero;
464          ij = jj;
465          for (i=j; i<m; i++)
466          {
467             sum += fjac[ij] * wa4[i];
468             ij += 1;    /* fjac[i+m*j] */
469          }
470          temp = -sum / temp3;
471          ij = jj;
472          for (i=j; i<m; i++)
473          {
474             wa4[i] += fjac[ij] * temp;
475             ij += 1;    /* fjac[i+m*j] */
476          }
477       }
478       fjac[jj] = wa1[j];
479       jj += m+1;  /* fjac[j+m*j] */
480       qtf[j] = wa4[j];
481    }
482 
483    /* compute the norm of the scaled gradient. */
484    gnorm = zero;
485    if (fnorm != zero)
486    {
487       jj = 0;
488       for (j=0; j<n; j++)
489       {
490          l = ipvt[j];
491          if (wa2[l] != zero)
492          {
493             sum = zero;
494             ij = jj;
495             for (i=0; i<=j; i++)
496             {
497                sum += fjac[ij]*(qtf[i]/fnorm);
498                ij += 1; /* fjac[i+m*j] */
499             }
500             gnorm = dmax1(gnorm,fabs(sum/wa2[l]));
501          }
502          jj += m;
503       }
504    }
505 
506    /* test for convergence of the gradient norm. */
507    if (gnorm <= gtol)
508       *info = 4;
509    if (*info != 0)
510       goto L300;
511 
512 //for (j = 0; j < n; j++)
513 //   printf("diag[%d] = %e, wa2[%d] = %e\n", j, diag[j], j, wa2[j]);
514    /* rescale if necessary. */
515    if (mode != 2)
516    {
517       for (j=0; j<n; j++)
518          diag[j] = dmax1(diag[j],wa2[j]);
519    }
520 
521    /* beginning of the inner loop. */
522 L200:
523 
524    /* determine the levenberg-marquardt parameter. */
525    lmpar(n,fjac,ldfjac,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4);
526    /* store the direction p and x + p. calculate the norm of p. */
527    for (j=0; j<n; j++)
528    {
529       wa1[j] = -wa1[j];
530       wa2[j] = x[j] + wa1[j];
531       wa3[j] = diag[j]*wa1[j];
532       //printf("wa2[%d] = %e + %e = %e\n", j, x[j], wa1[j], wa2[j]);
533    }
534    pnorm = enorm(n,wa3);
535    /* on the first iteration, adjust the initial step bound. */
536    if (iter == 1)
537       delta = dmin1(delta,pnorm);
538 
539    /* evaluate the function at x + p and calculate its norm. */
540    iflag = 1;
541    //printf("evaluate at:\n");
542    //for (j=0; j<n; j++)
543    //   printf("wa2[%d] = %e\n", j, wa2[j]);
544    fcn(m,n,wa2,wa4,&iflag, data);
545    *nfev += 1;
546    if (iflag < 0)
547       goto L300;
548 
549    fnorm1 = enorm(m,wa4);
550 
551 #ifdef BUG
552    printf( "pnorm %.10e  fnorm1 %.10e\n", pnorm, fnorm1 );
553 #endif
554 
555    /* compute the scaled actual reduction. */
556    actred = -one;
557    if ((p1*fnorm1) < fnorm)
558    {
559       temp = fnorm1/fnorm;
560       actred = one - temp * temp;
561    }
562    /* compute the scaled predicted reduction and
563     * the scaled directional derivative.
564     */
565    jj = 0;
566    for (j=0; j<n; j++)
567    {
568       wa3[j] = zero;
569       l = ipvt[j];
570       temp = wa1[l];
571       ij = jj;
572       for (i=0; i<=j; i++)
573       {
574          wa3[i] += fjac[ij]*temp;
575          ij += 1; /* fjac[i+m*j] */
576       }
577       jj += m;
578    }
579    temp1 = enorm(n,wa3)/fnorm;
580    temp2 = (sqrt(par)*pnorm)/fnorm;
581    prered = temp1*temp1 + (temp2*temp2)/p5;
582    dirder = -(temp1*temp1 + temp2*temp2);
583    /* compute the ratio of the actual to the predicted reduction. */
584    ratio = zero;
585    if (prered != zero)
586       ratio = actred/prered;
587    /* update the step bound. */
588    if (ratio <= p25)
589    {
590       if (actred >= zero)
591          temp = p5;
592       else
593          temp = p5*dirder/(dirder + p5*actred);
594       if (((p1*fnorm1) >= fnorm) || (temp < p1))
595          temp = p1;
596 
597       delta = temp*dmin1(delta,pnorm/p1);
598       par = par/temp;
599    }
600    else
601    {
602       if ((par == zero) || (ratio >= p75))
603       {
604          delta = pnorm/p5;
605          par = p5*par;
606       }
607    }
608    /* test for successful iteration. */
609    if (ratio >= p0001)
610    {
611       /* successful iteration. update x, fvec, and their norms. */
612       for (j=0; j<n; j++)
613       {
614          x[j] = wa2[j];
615          wa2[j] = diag[j]*x[j];
616       }
617       for (i=0; i<m; i++)
618          fvec[i] = wa4[i];
619 
620       xnorm = enorm(n,wa2);
621       fnorm = fnorm1;
622       iter += 1;
623    }
624    /* tests for convergence. */
625    if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one))
626    {
627       *info = 1;
628    }
629 
630    if (delta <= xtol*xnorm)
631       *info = 2;
632 
633    if ((fabs(actred) <= ftol) && (prered <= ftol)
634       && (p5*ratio <= one) && (*info == 2))
635    {
636       *info = 3;
637    }
638 
639    if (*info != 0)
640       goto L300;
641 
642    /* tests for termination and stringent tolerances. */
643    if (*nfev >= maxfev)
644       *info = 5;
645 
646    if ((fabs(actred) <= MACHEP) && (prered <= MACHEP) && (p5*ratio <= one))
647    {
648       *info = 6;
649    }
650 
651    if (delta <= MACHEP*xnorm)
652       *info = 7;
653 
654    if (gnorm <= MACHEP)
655       *info = 8;
656 
657    if (*info != 0)
658       goto L300;
659 
660    /* end of the inner loop. repeat if iteration unsuccessful. */
661    if (ratio < p0001)
662       goto L200;
663 
664    /*  end of the outer loop. */
665    goto L30;
666 
667 L300:
668 
669    /* termination, either normal or user imposed. */
670    if (iflag < 0)
671       *info = iflag;
672 
673    iflag = 0;
674    if (nprint > 0)
675       fcn(m,n,x,fvec,&iflag, data);
676 }
677 
678 
679 /*     **********
680 *
681 *     subroutine lmpar
682 *
683 *     given an m by n matrix a, an n by n nonsingular diagonal
684 *     matrix d, an m-vector b, and a positive number delta,
685 *     the problem is to determine a value for the parameter
686 *     par such that if x solves the system
687 *
688 *       a*x = b ,     sqrt(par)*d*x = 0 ,
689 *
690 *     in the least squares sense, and dxnorm is the euclidean
691 *     norm of d*x, then either par is zero and
692 *
693 *       (dxnorm-delta) .le. 0.1*delta ,
694 *
695 *     or par is positive and
696 *
697 *       abs(dxnorm-delta) .le. 0.1*delta .
698 *
699 *     this subroutine completes the solution of the problem
700 *     if it is provided with the necessary information from the
701 *     qr factorization, with column pivoting, of a. that is, if
702 *     a*p = q*r, where p is a permutation matrix, q has orthogonal
703 *     columns, and r is an upper triangular matrix with diagonal
704 *     elements of nonincreasing magnitude, then lmpar expects
705 *     the full upper triangle of r, the permutation matrix p,
706 *     and the first n components of (q transpose)*b. on output
707 *     lmpar also provides an upper triangular matrix s such that
708 *
709 *        t   t           t
710 *       p *(a *a + par*d*d)*p = s *s .
711 *
712 *     s is employed within lmpar and may be of separate interest.
713 *
714 *     only a few iterations are generally needed for convergence
715 *     of the algorithm. if, however, the limit of 10 iterations
716 *     is reached, then the output par will contain the best
717 *     value obtained so far.
718 *
719 *     the subroutine statement is
720 *
721 *   subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag,
722 *            wa1,wa2)
723 *
724 *     where
725 *
726 *   n is a positive integer input variable set to the order of r.
727 *
728 *   r is an n by n array. on input the full upper triangle
729 *     must contain the full upper triangle of the matrix r.
730 *     on output the full upper triangle is unaltered, and the
731 *     strict lower triangle contains the strict upper triangle
732 *     (transposed) of the upper triangular matrix s.
733 *
734 *   ldr is a positive integer input variable not less than n
735 *     which specifies the leading dimension of the array r.
736 *
737 *   ipvt is an integer input array of length n which defines the
738 *     permutation matrix p such that a*p = q*r. column j of p
739 *     is column ipvt(j) of the identity matrix.
740 *
741 *   diag is an input array of length n which must contain the
742 *     diagonal elements of the matrix d.
743 *
744 *   qtb is an input array of length n which must contain the first
745 *     n elements of the vector (q transpose)*b.
746 *
747 *   delta is a positive input variable which specifies an upper
748 *     bound on the euclidean norm of d*x.
749 *
750 *   par is a nonnegative variable. on input par contains an
751 *     initial estimate of the levenberg-marquardt parameter.
752 *     on output par contains the final estimate.
753 *
754 *   x is an output array of length n which contains the least
755 *     squares solution of the system a*x = b, sqrt(par)*d*x = 0,
756 *     for the output par.
757 *
758 *   sdiag is an output array of length n which contains the
759 *     diagonal elements of the upper triangular matrix s.
760 *
761 *   wa1 and wa2 are work arrays of length n.
762 *
763 *     subprograms called
764 *
765 *   minpack-supplied ... dpmpar,enorm,qrsolv
766 *
767 *   fortran-supplied ... dabs,dmax1,dmin1,dsqrt
768 *
769 *     argonne national laboratory. minpack project. march 1980.
770 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
771 *
772 *     **********
773 */
lmpar(int n,double r[],int ldr,int ipvt[],double diag[],double qtb[],double delta,double * par,double x[],double sdiag[],double wa1[],double wa2[])774 void lmpar(
775   int n,
776   double r[],
777   int ldr,
778   int ipvt[],
779   double diag[],
780   double qtb[],
781   double delta,
782   double *par,
783   double x[],
784   double sdiag[],
785   double wa1[],
786   double wa2[])
787 {
788    int i;
789    int iter;
790    int ij;
791    int jj;
792    int j;
793    int jm1;
794    int jp1;
795    int k;
796    int l;
797    int nsing;
798    double dxnorm;
799    double fp;
800    double gnorm;
801    double parc;
802    double parl;
803    double paru;
804    double sum;
805    double temp;
806    static double zero = 0.0;
807    //static double one = 1.0;
808    static double p1 = 0.1;
809    static double p001 = 0.001;
810 
811 #ifdef BUG
812    printf( "lmpar\n" );
813 #endif
814 
815    /* compute and store in x the gauss-newton direction. if the
816     * jacobian is rank-deficient, obtain a least squares solution.
817     */
818    nsing = n;
819    jj = 0;
820    for (j=0; j<n; j++)
821    {
822       wa1[j] = qtb[j];
823       if ((r[jj] == zero) && (nsing == n))
824          nsing = j;
825 
826       if (nsing < n)
827          wa1[j] = zero;
828 
829       jj += ldr+1; /* [j+ldr*j] */
830    }
831 
832 #ifdef BUG
833    printf( "nsing %d ", nsing );
834 #endif
835 
836    if (nsing >= 1)
837    {
838       for (k=0; k<nsing; k++)
839       {
840          j = nsing - k - 1;
841          wa1[j] = wa1[j]/r[j+ldr*j];
842          temp = wa1[j];
843          jm1 = j - 1;
844          if (jm1 >= 0)
845          {
846             ij = ldr * j;
847             for (i=0; i<=jm1; i++)
848             {
849                wa1[i] -= r[ij]*temp;
850                ij += 1;
851             }
852          }
853       }
854    }
855 
856    for (j=0; j<n; j++)
857    {
858       l = ipvt[j];
859       x[l] = wa1[j];
860    }
861    /* initialize the iteration counter.
862     * evaluate the function at the origin, and test
863     * for acceptance of the gauss-newton direction.
864     */
865    iter = 0;
866    for (j=0; j<n; j++)
867       wa2[j] = diag[j]*x[j];
868 
869    dxnorm = enorm(n,wa2);
870    fp = dxnorm - delta;
871    if (fp <= p1*delta)
872    {
873 #ifdef BUG
874       printf( "going to L220\n" );
875 #endif
876 
877       goto L220;
878    }
879 
880    /* if the jacobian is not rank deficient, the newton
881     * step provides a lower bound, parl, for the zero of
882     * the function. otherwise set this bound to zero.
883     */
884    parl = zero;
885    if (nsing >= n)
886    {
887       for (j=0; j<n; j++)
888       {
889          l = ipvt[j];
890          wa1[j] = diag[l]*(wa2[l]/dxnorm);
891       }
892       jj = 0;
893       for (j=0; j<n; j++)
894       {
895          sum = zero;
896          jm1 = j - 1;
897          if (jm1 >= 0)
898          {
899             ij = jj;
900             for (i=0; i<=jm1; i++)
901             {
902                sum += r[ij]*wa1[i];
903                ij += 1;
904             }
905          }
906          wa1[j] = (wa1[j] - sum)/r[j+ldr*j];
907          jj += ldr; /* [i+ldr*j] */
908       }
909       temp = enorm(n,wa1);
910       parl = ((fp/delta)/temp)/temp;
911    }
912    /* calculate an upper bound, paru, for the zero of the function. */
913    jj = 0;
914    for (j=0; j<n; j++)
915    {
916       sum = zero;
917       ij = jj;
918       for (i=0; i<=j; i++)
919       {
920          sum += r[ij]*qtb[i];
921          ij += 1;
922       }
923       l = ipvt[j];
924       wa1[j] = sum/diag[l];
925       jj += ldr; /* [i+ldr*j] */
926    }
927    gnorm = enorm(n,wa1);
928    paru = gnorm/delta;
929    if(paru == zero)
930       paru = DWARF/dmin1(delta,p1);
931 
932    /* if the input par lies outside of the interval (parl,paru),
933     * set par to the closer endpoint.
934     */
935    *par = dmax1(*par,parl);
936    *par = dmin1(*par,paru);
937    if (*par == zero)
938       *par = gnorm/dxnorm;
939 
940 #ifdef BUG
941    printf( "parl %.4e  par %.4e  paru %.4e\n", parl, *par, paru );
942 #endif
943 
944    /* beginning of an iteration. */
945 
946 L150:
947 
948    iter += 1;
949    /*  evaluate the function at the current value of par. */
950    if (*par == zero)
951       *par = dmax1(DWARF,p001*paru);
952 
953    temp = sqrt(*par);
954    for (j=0; j<n; j++)
955       wa1[j] = temp*diag[j];
956 
957    qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2);
958    for (j=0; j<n; j++)
959       wa2[j] = diag[j]*x[j];
960 
961    dxnorm = enorm(n,wa2);
962    temp = fp;
963    fp = dxnorm - delta;
964    /* if the function is small enough, accept the current value
965     * of par. also test for the exceptional cases where parl
966     * is zero or the number of iterations has reached 10.
967     */
968    if ((fabs(fp) <= p1*delta)
969       || ((parl == zero) && (fp <= temp) && (temp < zero))
970       || (iter == 10))
971    {
972       goto L220;
973    }
974    /* compute the newton correction. */
975    for (j=0; j<n; j++)
976    {
977       l = ipvt[j];
978       wa1[j] = diag[l]*(wa2[l]/dxnorm);
979    }
980    jj = 0;
981    for (j=0; j<n; j++)
982    {
983       wa1[j] = wa1[j]/sdiag[j];
984       temp = wa1[j];
985       jp1 = j + 1;
986       if (jp1 < n)
987       {
988          ij = jp1 + jj;
989          for (i=jp1; i<n; i++)
990          {
991             wa1[i] -= r[ij]*temp;
992             ij += 1; /* [i+ldr*j] */
993          }
994       }
995       jj += ldr; /* ldr*j */
996    }
997    temp = enorm(n,wa1);
998    parc = ((fp/delta)/temp)/temp;
999 
1000    /* depending on the sign of the function, update parl or paru. */
1001    if (fp > zero)
1002       parl = dmax1(parl, *par);
1003 
1004    if (fp < zero)
1005       paru = dmin1(paru, *par);
1006 
1007    /* compute an improved estimate for par. */
1008    *par = dmax1(parl, *par + parc);
1009 
1010    /* end of an iteration. */
1011 
1012    goto L150;
1013 
1014 L220:
1015 
1016 /* termination. */
1017 
1018    if (iter == 0)
1019       *par = zero;
1020 }
1021 
1022 
1023 /*
1024 *     **********
1025 *
1026 *     subroutine qrfac
1027 *
1028 *     this subroutine uses householder transformations with column
1029 *     pivoting (optional) to compute a qr factorization of the
1030 *     m by n matrix a. that is, qrfac determines an orthogonal
1031 *     matrix q, a permutation matrix p, and an upper trapezoidal
1032 *     matrix r with diagonal elements of nonincreasing magnitude,
1033 *     such that a*p = q*r. the householder transformation for
1034 *     column k, k = 1,2,...,min(m,n), is of the form
1035 *
1036 *               t
1037 *       i - (1/u(k))*u*u
1038 *
1039 *     where u has zeros in the first k-1 positions. the form of
1040 *     this transformation and the method of pivoting first
1041 *     appeared in the corresponding linpack subroutine.
1042 *
1043 *     the subroutine statement is
1044 *
1045 *   subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)
1046 *
1047 *     where
1048 *
1049 *   m is a positive integer input variable set to the number
1050 *     of rows of a.
1051 *
1052 *   n is a positive integer input variable set to the number
1053 *     of columns of a.
1054 *
1055 *   a is an m by n array. on input a contains the matrix for
1056 *     which the qr factorization is to be computed. on output
1057 *     the strict upper trapezoidal part of a contains the strict
1058 *     upper trapezoidal part of r, and the lower trapezoidal
1059 *     part of a contains a factored form of q (the non-trivial
1060 *     elements of the u vectors described above).
1061 *
1062 *   lda is a positive integer input variable not less than m
1063 *     which specifies the leading dimension of the array a.
1064 *
1065 *   pivot is a logical input variable. if pivot is set true,
1066 *     then column pivoting is enforced. if pivot is set false,
1067 *     then no column pivoting is done.
1068 *
1069 *   ipvt is an integer output array of length lipvt. ipvt
1070 *     defines the permutation matrix p such that a*p = q*r.
1071 *     column j of p is column ipvt(j) of the identity matrix.
1072 *     if pivot is false, ipvt is not referenced.
1073 *
1074 *   lipvt is a positive integer input variable. if pivot is false,
1075 *     then lipvt may be as small as 1. if pivot is true, then
1076 *     lipvt must be at least n.
1077 *
1078 *   rdiag is an output array of length n which contains the
1079 *     diagonal elements of r.
1080 *
1081 *   acnorm is an output array of length n which contains the
1082 *     norms of the corresponding columns of the input matrix a.
1083 *     if this information is not needed, then acnorm can coincide
1084 *     with rdiag.
1085 *
1086 *   wa is a work array of length n. if pivot is false, then wa
1087 *     can coincide with rdiag.
1088 *
1089 *     subprograms called
1090 *
1091 *   minpack-supplied ... dpmpar,enorm
1092 *
1093 *   fortran-supplied ... dmax1,dsqrt,min0
1094 *
1095 *     argonne national laboratory. minpack project. march 1980.
1096 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
1097 *
1098 *     **********
1099 */
qrfac(int m,int n,double a[],int lda,int pivot,int ipvt[],int lipvt,double rdiag[],double acnorm[],double wa[])1100 void qrfac(int m,
1101            int n,
1102            double a[],
1103            int lda,
1104            int pivot,
1105            int ipvt[],
1106            int lipvt,
1107            double rdiag[],
1108            double acnorm[],
1109            double wa[])
1110 {
1111    int i;
1112    int ij;
1113    int jj;
1114    int j;
1115    int jp1;
1116    int k;
1117    int kmax;
1118    int minmn;
1119    double ajnorm;
1120    double sum;
1121    double temp;
1122    static double zero = 0.0;
1123    static double one = 1.0;
1124    static double p05 = 0.05;
1125 
1126    /* compute the initial column norms and initialize several arrays. */
1127    //printf("\nqrfac\n");
1128    ij = 0;
1129    for (j=0; j<n; j++)
1130    {
1131       acnorm[j] = enorm(m,&a[ij]);
1132       rdiag[j] = acnorm[j];
1133       wa[j] = rdiag[j];
1134       if (pivot != 0)
1135          ipvt[j] = j;
1136 
1137       ij += m; /* m*j */
1138      // printf("acnorm[%d] = %e\n", j, acnorm[j]);
1139      // printf("rdiag[%d] = %e\n", j, rdiag[j]);
1140    }
1141 
1142 #ifdef BUG
1143    printf( "qrfac\n" );
1144 #endif
1145 
1146    /* reduce a to r with householder transformations. */
1147 
1148    minmn = min0(m,n);
1149    for (j=0; j<minmn; j++)
1150    {
1151       if (pivot == 0)
1152          goto L40;
1153 
1154       /* bring the column of largest norm into the pivot position. */
1155       kmax = j;
1156       for (k=j; k<n; k++)
1157       {
1158          if (rdiag[k] > rdiag[kmax])
1159             kmax = k;
1160       }
1161       if (kmax == j)
1162          goto L40;
1163 
1164       ij = m * j;
1165       jj = m * kmax;
1166       for (i=0; i<m; i++)
1167       {
1168          temp = a[ij]; /* [i+m*j] */
1169          a[ij] = a[jj]; /* [i+m*kmax] */
1170          a[jj] = temp;
1171          ij += 1;
1172          jj += 1;
1173       }
1174       rdiag[kmax] = rdiag[j];
1175       wa[kmax] = wa[j];
1176       k = ipvt[j];
1177       ipvt[j] = ipvt[kmax];
1178       ipvt[kmax] = k;
1179 
1180 L40:
1181 
1182       /* compute the householder transformation to reduce the
1183        * j-th column of a to a multiple of the j-th unit vector.
1184        */
1185       jj = j + m*j;
1186       ajnorm = enorm(m-j,&a[jj]);
1187       if (ajnorm == zero)
1188          goto L100;
1189 
1190       if (a[jj] < zero)
1191          ajnorm = -ajnorm;
1192 
1193       ij = jj;
1194       for (i=j; i<m; i++)
1195       {
1196          a[ij] /= ajnorm;
1197          ij += 1; /* [i+m*j] */
1198       }
1199       a[jj] += one;
1200 
1201       /* apply the transformation to the remaining columns
1202        * and update the norms.
1203        */
1204       jp1 = j + 1;
1205       if (jp1 < n)
1206       {
1207          for (k=jp1; k<n; k++)
1208          {
1209             sum = zero;
1210             ij = j + m*k;
1211             jj = j + m*j;
1212             for (i=j; i<m; i++)
1213             {
1214                sum += a[jj]*a[ij];
1215                ij += 1; /* [i+m*k] */
1216                jj += 1; /* [i+m*j] */
1217             }
1218             temp = sum/a[j+m*j];
1219             ij = j + m*k;
1220             jj = j + m*j;
1221             for (i=j; i<m; i++)
1222             {
1223                a[ij] -= temp*a[jj];
1224                ij += 1; /* [i+m*k] */
1225                jj += 1; /* [i+m*j] */
1226             }
1227             if ((pivot != 0) && (rdiag[k] != zero))
1228             {
1229                temp = a[j+m*k]/rdiag[k];
1230                temp = dmax1(zero, one-temp*temp);
1231                rdiag[k] *= sqrt(temp);
1232                temp = rdiag[k]/wa[k];
1233                if ((p05*temp*temp) <= MACHEP)
1234                {
1235                   rdiag[k] = enorm(m-j-1,&a[jp1+m*k]);
1236                   wa[k] = rdiag[k];
1237                }
1238             }
1239          }
1240       }
1241 
1242 L100:
1243 
1244       rdiag[j] = -ajnorm;
1245    }
1246 }
1247 
1248 
1249 /*
1250 *     **********
1251 *
1252 *     subroutine qrsolv
1253 *
1254 *     given an m by n matrix a, an n by n diagonal matrix d,
1255 *     and an m-vector b, the problem is to determine an x which
1256 *     solves the system
1257 *
1258 *       a*x = b ,     d*x = 0 ,
1259 *
1260 *     in the least squares sense.
1261 *
1262 *     this subroutine completes the solution of the problem
1263 *     if it is provided with the necessary information from the
1264 *     qr factorization, with column pivoting, of a. that is, if
1265 *     a*p = q*r, where p is a permutation matrix, q has orthogonal
1266 *     columns, and r is an upper triangular matrix with diagonal
1267 *     elements of nonincreasing magnitude, then qrsolv expects
1268 *     the full upper triangle of r, the permutation matrix p,
1269 *     and the first n components of (q transpose)*b. the system
1270 *     a*x = b, d*x = 0, is then equivalent to
1271 *
1272 *          t       t
1273 *       r*z = q *b ,  p *d*p*z = 0 ,
1274 *
1275 *     where x = p*z. if this system does not have full rank,
1276 *     then a least squares solution is obtained. on output qrsolv
1277 *     also provides an upper triangular matrix s such that
1278 *
1279 *        t   t       t
1280 *       p *(a *a + d*d)*p = s *s .
1281 *
1282 *     s is computed within qrsolv and may be of separate interest.
1283 *
1284 *     the subroutine statement is
1285 *
1286 *   subroutine qrsolv(n,r,ldr,ipvt,diag,qtb,x,sdiag,wa)
1287 *
1288 *     where
1289 *
1290 *   n is a positive integer input variable set to the order of r.
1291 *
1292 *   r is an n by n array. on input the full upper triangle
1293 *     must contain the full upper triangle of the matrix r.
1294 *     on output the full upper triangle is unaltered, and the
1295 *     strict lower triangle contains the strict upper triangle
1296 *     (transposed) of the upper triangular matrix s.
1297 *
1298 *   ldr is a positive integer input variable not less than n
1299 *     which specifies the leading dimension of the array r.
1300 *
1301 *   ipvt is an integer input array of length n which defines the
1302 *     permutation matrix p such that a*p = q*r. column j of p
1303 *     is column ipvt(j) of the identity matrix.
1304 *
1305 *   diag is an input array of length n which must contain the
1306 *     diagonal elements of the matrix d.
1307 *
1308 *   qtb is an input array of length n which must contain the first
1309 *     n elements of the vector (q transpose)*b.
1310 *
1311 *   x is an output array of length n which contains the least
1312 *     squares solution of the system a*x = b, d*x = 0.
1313 *
1314 *   sdiag is an output array of length n which contains the
1315 *     diagonal elements of the upper triangular matrix s.
1316 *
1317 *   wa is a work array of length n.
1318 *
1319 *     subprograms called
1320 *
1321 *   fortran-supplied ... dabs,dsqrt
1322 *
1323 *     argonne national laboratory. minpack project. march 1980.
1324 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
1325 *
1326 *     **********
1327 */
1328 
qrsolv(int n,double r[],int ldr,int ipvt[],double diag[],double qtb[],double x[],double sdiag[],double wa[])1329 void qrsolv(int n,
1330             double r[],
1331             int ldr,
1332             int ipvt[],
1333             double diag[],
1334             double qtb[],
1335             double x[],
1336             double sdiag[],
1337             double wa[])
1338 {
1339    int i;
1340    int ij;
1341    int ik;
1342    int kk;
1343    int j;
1344    int jp1;
1345    int k;
1346    int kp1;
1347    int l;
1348    int nsing;
1349    double cos;
1350    double cotan;
1351    double qtbpj;
1352    double sin;
1353    double sum;
1354    double tan;
1355    double temp;
1356    static double zero = 0.0;
1357    static double p25 = 0.25;
1358    static double p5 = 0.5;
1359    //double fabs(), sqrt();
1360 
1361    /* copy r and (q transpose)*b to preserve input and initialize s.
1362     * in particular, save the diagonal elements of r in x.
1363     */
1364    kk = 0;
1365    for (j=0; j<n; j++)
1366    {
1367       ij = kk;
1368       ik = kk;
1369       for (i=j; i<n; i++)
1370       {
1371          r[ij] = r[ik];
1372          ij += 1;   /* [i+ldr*j] */
1373          ik += ldr; /* [j+ldr*i] */
1374       }
1375       x[j] = r[kk];
1376       wa[j] = qtb[j];
1377       kk += ldr+1; /* j+ldr*j */
1378    }
1379 #ifdef BUG
1380    printf( "qrsolv\n" );
1381 #endif
1382    /* eliminate the diagonal matrix d using a givens rotation. */
1383    for (j=0; j<n; j++)
1384    {
1385       /* prepare the row of d to be eliminated, locating the
1386        * diagonal element using p from the qr factorization.
1387        */
1388       l = ipvt[j];
1389       if (diag[l] == zero)
1390          goto L90;
1391 
1392       for (k=j; k<n; k++)
1393          sdiag[k] = zero;
1394 
1395       sdiag[j] = diag[l];
1396 
1397       /* the transformations to eliminate the row of d
1398        * modify only a single element of (q transpose)*b
1399        * beyond the first n, which is initially zero.
1400        */
1401       qtbpj = zero;
1402       for (k=j; k<n; k++)
1403       {
1404          /* determine a givens rotation which eliminates the
1405           * appropriate element in the current row of d.
1406           */
1407          if (sdiag[k] == zero)
1408             continue;
1409 
1410          kk = k + ldr * k;
1411          if (fabs(r[kk]) < fabs(sdiag[k]))
1412          {
1413             cotan = r[kk]/sdiag[k];
1414             sin = p5/sqrt(p25+p25*cotan*cotan);
1415             cos = sin*cotan;
1416          }
1417          else
1418          {
1419             tan = sdiag[k]/r[kk];
1420             cos = p5/sqrt(p25+p25*tan*tan);
1421             sin = cos*tan;
1422          }
1423          /* compute the modified diagonal element of r and
1424           * the modified element of ((q transpose)*b,0).
1425           */
1426          r[kk] = cos*r[kk] + sin*sdiag[k];
1427          temp = cos*wa[k] + sin*qtbpj;
1428          qtbpj = -sin*wa[k] + cos*qtbpj;
1429          wa[k] = temp;
1430          /* accumulate the tranformation in the row of s. */
1431          kp1 = k + 1;
1432          if (n > kp1)
1433          {
1434             ik = kk + 1;
1435             for (i=kp1; i<n; i++)
1436             {
1437                temp = cos*r[ik] + sin*sdiag[i];
1438                sdiag[i] = -sin*r[ik] + cos*sdiag[i];
1439                r[ik] = temp;
1440                ik += 1; /* [i+ldr*k] */
1441             }
1442          }
1443       }
1444 
1445 L90:
1446       /* store the diagonal element of s and restore
1447        * the corresponding diagonal element of r.
1448        */
1449       kk = j + ldr*j;
1450       sdiag[j] = r[kk];
1451       r[kk] = x[j];
1452    }
1453    /* solve the triangular system for z. if the system is
1454     * singular, then obtain a least squares solution.
1455     */
1456    nsing = n;
1457    for (j=0; j<n; j++)
1458    {
1459       if ((sdiag[j] == zero) && (nsing == n))
1460          nsing = j;
1461       if (nsing < n)
1462          wa[j] = zero;
1463    }
1464    if(nsing < 1)
1465       goto L150;
1466 
1467    for (k=0; k<nsing; k++)
1468    {
1469       j = nsing - k - 1;
1470       sum = zero;
1471       jp1 = j + 1;
1472       if (nsing > jp1)
1473       {
1474          ij = jp1 + ldr * j;
1475          for (i=jp1; i<nsing; i++)
1476          {
1477             sum += r[ij]*wa[i];
1478             ij += 1; /* [i+ldr*j] */
1479          }
1480       }
1481       wa[j] = (wa[j] - sum)/sdiag[j];
1482    }
1483 
1484 L150:
1485    /* permute the components of z back to components of x. */
1486    for (j=0; j<n; j++)
1487    {
1488       l = ipvt[j];
1489       x[l] = wa[j];
1490    }
1491 }
1492 
1493 
1494 /************************enorm.c*************************/
1495 /*
1496 *     **********
1497 *
1498 *     function enorm
1499 *
1500 *     given an n-vector x, this function calculates the
1501 *     euclidean norm of x.
1502 *
1503 *     the euclidean norm is computed by accumulating the sum of
1504 *     squares in three different sums. the sums of squares for the
1505 *     small and large components are scaled so that no overflows
1506 *     occur. non-destructive underflows are permitted. underflows
1507 *     and overflows do not occur in the computation of the unscaled
1508 *     sum of squares for the intermediate components.
1509 *     the definitions of small, intermediate and large components
1510 *     depend on two constants, rdwarf and rgiant. the main
1511 *     restrictions on these constants are that rdwarf**2 not
1512 *     underflow and rgiant**2 not overflow. the constants
1513 *     given here are suitable for every known computer.
1514 *
1515 *     the function statement is
1516 *
1517 *   double precision function enorm(n,x)
1518 *
1519 *     where
1520 *
1521 *   n is a positive integer input variable.
1522 *
1523 *   x is an input array of length n.
1524 *
1525 *     subprograms called
1526 *
1527 *   fortran-supplied ... dabs,dsqrt
1528 *
1529 *     argonne national laboratory. minpack project. march 1980.
1530 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
1531 *
1532 *     **********
1533 */
enorm(int n,double x[])1534 double enorm(int n, double x[])
1535 {
1536    int i;
1537    double agiant;
1538    double floatn;
1539    double s1;
1540    double s2;
1541    double s3;
1542    double xabs;
1543    double x1max;
1544    double x3max;
1545    double ans;
1546    double temp;
1547    static double rdwarf = 3.834e-20;
1548    static double rgiant = 1.304e19;
1549    static double zero = 0.0;
1550    static double one = 1.0;
1551    //double fabs(), sqrt();
1552 
1553    s1 = zero;
1554    s2 = zero;
1555    s3 = zero;
1556    x1max = zero;
1557    x3max = zero;
1558    floatn = n;
1559    agiant = rgiant/floatn;
1560 
1561    for (i=0; i<n; i++)
1562    {
1563       xabs = fabs(x[i]);
1564       if ((xabs > rdwarf) && (xabs < agiant))
1565       {
1566          /* sum for intermediate components. */
1567          s2 += xabs*xabs;
1568          continue;
1569       }
1570 
1571       if (xabs > rdwarf)
1572       {
1573          /* sum for large components. */
1574          if (xabs > x1max)
1575          {
1576             temp = x1max/xabs;
1577             s1 = one + s1*temp*temp;
1578             x1max = xabs;
1579          }
1580          else
1581          {
1582             temp = xabs/x1max;
1583             s1 += temp*temp;
1584          }
1585          continue;
1586       }
1587       /* sum for small components. */
1588       if (xabs > x3max)
1589       {
1590          temp = x3max/xabs;
1591          s3 = one + s3*temp*temp;
1592          x3max = xabs;
1593       }
1594       else
1595       {
1596          if (xabs != zero)
1597          {
1598             temp = xabs/x3max;
1599             s3 += temp*temp;
1600          }
1601       }
1602    }
1603    /* calculation of norm. */
1604    if(s1 != zero)
1605    {
1606       temp = s1 + (s2/x1max)/x1max;
1607       ans = x1max*sqrt(temp);
1608       return ans;
1609    }
1610    if (s2 != zero)
1611    {
1612       if (s2 >= x3max)
1613          temp = s2*(one+(x3max/s2)*(x3max*s3));
1614       else
1615          temp = x3max*((s2/x3max)+(x3max*s3));
1616       ans = sqrt(temp);
1617    }
1618    else
1619    {
1620       ans = x3max*sqrt(s3);
1621    }
1622    return ans;
1623 }
1624 
1625 
1626 /*
1627 *     **********
1628 *
1629 *     subroutine fdjac2
1630 *
1631 *     this subroutine computes a forward-difference approximation
1632 *     to the m by n jacobian matrix associated with a specified
1633 *     problem of m functions in n variables.
1634 *
1635 *     the subroutine statement is
1636 *
1637 *   subroutine fdjac2(fcn,m,n,x,fvec,fjac,ldfjac,iflag,epsfcn,wa)
1638 *
1639 *     where
1640 *
1641 *   fcn is the name of the user-supplied subroutine which
1642 *     calculates the functions. fcn must be declared
1643 *     in an external statement in the user calling
1644 *     program, and should be written as follows.
1645 *
1646 *     subroutine fcn(m,n,x,fvec,iflag)
1647 *     integer m,n,iflag
1648 *     double precision x(n),fvec(m)
1649 *     ----------
1650 *     calculate the functions at x and
1651 *     return this vector in fvec.
1652 *     ----------
1653 *     return
1654 *     end
1655 *
1656 *     the value of iflag should not be changed by fcn unless
1657 *     the user wants to terminate execution of fdjac2.
1658 *     in this case set iflag to a negative integer.
1659 *
1660 *   m is a positive integer input variable set to the number
1661 *     of functions.
1662 *
1663 *   n is a positive integer input variable set to the number
1664 *     of variables. n must not exceed m.
1665 *
1666 *   x is an input array of length n.
1667 *
1668 *   fvec is an input array of length m which must contain the
1669 *     functions evaluated at x.
1670 *
1671 *   fjac is an output m by n array which contains the
1672 *     approximation to the jacobian matrix evaluated at x.
1673 *
1674 *   ldfjac is a positive integer input variable not less than m
1675 *     which specifies the leading dimension of the array fjac.
1676 *
1677 *   iflag is an integer variable which can be used to terminate
1678 *     the execution of fdjac2. see description of fcn.
1679 *
1680 *   epsfcn is an input variable used in determining a suitable
1681 *     step length for the forward-difference approximation. this
1682 *     approximation assumes that the relative errors in the
1683 *     functions are of the order of epsfcn. if epsfcn is less
1684 *     than the machine precision, it is assumed that the relative
1685 *     errors in the functions are of the order of the machine
1686 *     precision.
1687 *
1688 *   wa is a work array of length m.
1689 *
1690 *     subprograms called
1691 *
1692 *   user-supplied ...... fcn
1693 *
1694 *   minpack-supplied ... dpmpar
1695 *
1696 *   fortran-supplied ... dabs,dmax1,dsqrt
1697 *
1698 *     argonne national laboratory. minpack project. march 1980.
1699 *     burton s. garbow, kenneth e. hillstrom, jorge j. more
1700 *
1701 **********
1702 */
1703 //#define BUG 0
fdjac2(void (* fcn)(int,int,double[],double[],int *,void *),int m,int n,double x[],double fvec[],double fjac[],int ldfjac,int * iflag,double epsfcn,double wa[],void * data)1704 void fdjac2(void (*fcn)(int, int, double[], double[], int *, void *),
1705             int m,
1706             int n,
1707             double x[],
1708             double fvec[],
1709             double fjac[],
1710             int ldfjac,
1711             int *iflag,
1712             double epsfcn,
1713             double wa[],
1714             void *data)
1715 {
1716    int i;
1717    int j;
1718    int ij;
1719    double eps;
1720    double h;
1721    double temp;
1722    //static double zero = 0.0;
1723 
1724    //dkb - changed to equal TvdB solver code, use constant delta = 1e-5
1725    //    temp = dmax1(epsfcn,MACHEP);
1726    //    eps = sqrt(temp);
1727 
1728    JACOBIAN = 1;
1729    eps = 1e-5;
1730 
1731 #ifdef BUG
1732    printf( "fdjac2\n" );
1733 #endif
1734 
1735    ij = 0;
1736    for (j=0; j<n; j++)
1737    {
1738       temp = x[j];
1739       //        h = eps * fabs(temp);
1740       //        if (h == zero)
1741       //            h = eps;
1742       h = eps; // added dkb
1743       x[j] = temp + h;
1744 
1745       fcn(m,n,x,wa,iflag, data);
1746       x[j] = temp;
1747       if (*iflag < 0)
1748          return;
1749 
1750       for (i=0; i<m; i++)
1751       {
1752          fjac[ij] = (wa[i] - fvec[i])/h;
1753          ij += 1;    /* fjac[i+m*j] */
1754       }
1755    }
1756 
1757 #ifdef BUG
1758    printf("jacobian:\n");
1759    pmat( m, n, fjac );
1760    for (i = 0; i < m*n; i++)
1761       printf("%6e  ", fjac[i]);
1762 #endif
1763    JACOBIAN = 0;
1764 }
1765 
1766 
1767 /************************lmmisc.c*************************/
1768 
dmax1(double a,double b)1769 double dmax1(double a, double b)
1770 {
1771    if (a >= b)
1772       return a;
1773    else
1774       return b;
1775 }
1776 
dmin1(double a,double b)1777 double dmin1(double a, double b)
1778 {
1779    if (a <= b)
1780       return a;
1781    else
1782       return b;
1783 }
1784 
min0(int a,int b)1785 int min0(int a, int b)
1786 {
1787    if (a <= b)
1788       return a;
1789    else
1790       return b;
1791 }
1792 
mod(int k,int m)1793 int mod(int k, int m)
1794 {
1795    return k % m;
1796 }
1797 
1798 
pmat(int m,int n,double y[])1799 void pmat(int m, int n, double y[])
1800 {
1801    int i;
1802    int j;
1803    int k;
1804 
1805    k = 0;
1806    for (i=0; i<m; i++)
1807    {
1808       printf("row %d: ", i);
1809       for (j=0; j<n; j++)
1810       {
1811          //  printf( "%.5e ", y[k] );
1812          printf("%.6f  ", y[k]);
1813          k += 1;
1814       }
1815       printf( "\n" );
1816    }
1817 }
1818