1 #if 0
2 #define real_EPSILON FLT_EPSILON
3 #define real_MIN FLT_MIN
4 #define real_MAX FLT_MAX
5 #else
6 #define real_EPSILON DBL_EPSILON
7 #define real_MIN DBL_MIN
8 #define real_MAX DBL_MAX
9 #endif
10 
dpmpar(int i)11 real inline dpmpar(int i)
12 {
13     switch(i) {
14     case 1:
15 	return real_EPSILON;
16     case 2:
17 	return real_MIN;
18     default:
19 	return real_MAX;
20     }
21 }
22 
23 template<int N>
enorm(const real * fvec)24 real inline enorm(const real *fvec) {
25     real s = 0;
26     for (int i = 0; i < N; i++) {
27 	s += fvec[i] * fvec[i];
28     }
29     return sqrt(s);
30 }
31 
enorm(int n,const real * fvec)32 real inline enorm(int n, const real *fvec) {
33     real s = 0;
34     for (int i = 0; i < n; i++) {
35 	s += fvec[i] * fvec[i];
36     }
37     return sqrt(s);
38 }
39 
40 template<int N>
enorm2(const real * fvec)41 real enorm2(const real *fvec) {
42     real s = 0;
43     for (int i = 0; i < N; i++) {
44 	s += fvec[i] * fvec[i];
45     }
46     return s;
47 }
48 
49 template<int N>
qform(real * q,int ldq,real * wa)50 void qform(real *q, int	ldq, real *wa)
51 {
52     /* System generated locals */
53     int q_dim1, q_offset;
54 
55     /* Local variables */
56     int i, j, k, l;
57     real sum, temp;
58 
59     /* Parameter adjustments */
60     --wa;
61     q_dim1 = ldq;
62     q_offset = 1 + q_dim1 * 1;
63     q -= q_offset;
64 
65     /* Function Body */
66 
67 /*     zero out upper triangle of q in the first min(m,n) columns. */
68 
69     if (N >= 2) {
70         for (j = 2; j <= N; ++j) {
71             for (i = 1; i <= j-1; ++i) {
72                 q[i + j * q_dim1] = 0.;
73             }
74         }
75     }
76 
77 /*     accumulate q from its factored form. */
78 
79     for (l = 1; l <= N; ++l) {
80 	k = N - l + 1;
81 	for (i = k; i <= N; ++i) {
82 	    wa[i] = q[i + k * q_dim1];
83 	    q[i + k * q_dim1] = 0.;
84 	}
85 	q[k + k * q_dim1] = 1.;
86 	if (wa[k] != 0.) {
87             for (j = k; j <= N; ++j) {
88                 sum = 0.;
89                 for (i = k; i <= N; ++i) {
90                     sum += q[i + j * q_dim1] * wa[i];
91                 }
92                 temp = sum / wa[k];
93                 for (i = k; i <= N; ++i) {
94                     q[i + j * q_dim1] -= temp * wa[i];
95                 }
96             }
97         }
98     }
99 } /* qform_ */
100 
101 template<int N>
r1updt(real * s,int ls,const real * u,real * v,real * w,int * sing)102 void r1updt(real *s, int ls, const real *u, real *v, real *w, int *sing)
103 {
104     /* Initialized data */
105 
106 #define p5 .5
107 #define p25 .25
108 
109     /* Local variables */
110     int i, j, l, jj, nm1;
111     real tan;
112     int nmj;
113     real cos, sin, tau, temp, giant, cotan;
114 
115     /* Parameter adjustments */
116     --w;
117     --u;
118     --v;
119     --s;
120     (void)ls;
121 
122     /* Function Body */
123 
124 /*     giant is the largest magnitude. */
125 
126     giant = dpmpar(3);
127 
128 /*     initialize the diagonal element pointer. */
129 
130     jj = N * ((N << 1) - N + 1) / 2 - (N - N);
131 
132 /*     move the nontrivial part of the last column of s into w. */
133 
134     l = jj;
135     for (i = N; i <= N; ++i) {
136 	w[i] = s[l];
137 	++l;
138     }
139 
140 /*     rotate the vector v into a multiple of the n-th unit vector */
141 /*     in such a way that a spike is introduced into w. */
142 
143     nm1 = N - 1;
144     if (nm1 >= 1) {
145         for (nmj = 1; nmj <= nm1; ++nmj) {
146             j = N - nmj;
147             jj -= N - j + 1;
148             w[j] = 0.;
149             if (v[j] != 0.) {
150 
151 /*        determine a givens rotation which eliminates the */
152 /*        j-th element of v. */
153 
154                 if (fabs(v[N]) < fabs(v[j])) {
155                     cotan = v[N] / v[j];
156                     sin = p5 / sqrt(p25 + p25 * (cotan * cotan));
157                     cos = sin * cotan;
158                     tau = 1.;
159                     if (fabs(cos) * giant > 1.) {
160                         tau = 1. / cos;
161                     }
162                 } else {
163                     tan = v[j] / v[N];
164                     cos = p5 / sqrt(p25 + p25 * (tan * tan));
165                     sin = cos * tan;
166                     tau = sin;
167                 }
168 
169 /*        apply the transformation to v and store the information */
170 /*        necessary to recover the givens rotation. */
171 
172                 v[N] = sin * v[j] + cos * v[N];
173                 v[j] = tau;
174 
175 /*        apply the transformation to s and extend the spike in w. */
176 
177                 l = jj;
178                 for (i = j; i <= N; ++i) {
179                     temp = cos * s[l] - sin * w[i];
180                     w[i] = sin * s[l] + cos * w[i];
181                     s[l] = temp;
182                     ++l;
183                 }
184             }
185         }
186     }
187 
188 /*     add the spike from the rank 1 update to w. */
189 
190     for (i = 1; i <= N; ++i) {
191 	w[i] += v[N] * u[i];
192     }
193 
194 /*     eliminate the spike. */
195 
196     *sing = false;
197     if (nm1 >= 1) {
198         for (j = 1; j <= nm1; ++j) {
199             if (w[j] != 0.) {
200 
201 /*        determine a givens rotation which eliminates the */
202 /*        j-th element of the spike. */
203 
204                 if (fabs(s[jj]) < fabs(w[j])) {
205                     cotan = s[jj] / w[j];
206                     sin = p5 / sqrt(p25 + p25 * (cotan * cotan));
207                     cos = sin * cotan;
208                     tau = 1.;
209                     if (fabs(cos) * giant > 1.) {
210                         tau = 1. / cos;
211                     }
212                 } else {
213                     tan = w[j] / s[jj];
214                     cos = p5 / sqrt(p25 + p25 * (tan * tan));
215                     sin = cos * tan;
216                     tau = sin;
217                 }
218 
219 /*        apply the transformation to s and reduce the spike in w. */
220 
221                 l = jj;
222                 for (i = j; i <= N; ++i) {
223                     temp = cos * s[l] + sin * w[i];
224                     w[i] = -sin * s[l] + cos * w[i];
225                     s[l] = temp;
226                     ++l;
227                 }
228 
229 /*        store the information necessary to recover the */
230 /*        givens rotation. */
231 
232                 w[j] = tau;
233             }
234 
235 /*        test for zero diagonal elements in the output s. */
236 
237             if (s[jj] == 0.) {
238                 *sing = true;
239             }
240             jj += N - j + 1;
241         }
242     }
243 
244 /*     move w back into the last column of the output s. */
245 
246     l = jj;
247     for (i = N; i <= N; ++i) {
248 	s[l] = w[i];
249 	++l;
250     }
251     if (s[jj] == 0.) {
252 	*sing = true;
253     }
254 
255 /*     last card of subroutine r1updt. */
256 
257 } /* __minpack_func__(r1updt) */
258 
259 template<int M, int N>
r1mpyq(real * a,int lda,const real * v,const real * w)260 void r1mpyq(real *a, int lda, const real *v, const real *w)
261 {
262     /* System generated locals */
263     int a_dim1, a_offset;
264 
265     /* Local variables */
266     int i, j, nm1, nmj;
267     real cos, sin, temp;
268 
269     /* Parameter adjustments */
270     --w;
271     --v;
272     a_dim1 = lda;
273     a_offset = 1 + a_dim1 * 1;
274     a -= a_offset;
275 
276     /* Function Body */
277 
278 /*     apply the first set of givens rotations to a. */
279 
280     nm1 = N - 1;
281     if (nm1 < 1) {
282         return;
283     }
284     for (nmj = 1; nmj <= nm1; ++nmj) {
285 	j = N - nmj;
286 	if (fabs(v[j]) > 1.) {
287 	    cos = 1. / v[j];
288 	    sin = sqrt(1. - cos * cos);
289 	} else {
290 	    sin = v[j];
291 	    cos = sqrt(1. - sin * sin);
292 	}
293 	for (i = 1; i <= M; ++i) {
294 	    temp = cos * a[i + j * a_dim1] - sin * a[i + N * a_dim1];
295 	    a[i + N * a_dim1] = sin * a[i + j * a_dim1] + cos * a[
296 		    i + N * a_dim1];
297 	    a[i + j * a_dim1] = temp;
298 	}
299     }
300 
301 /*     apply the second set of givens rotations to a. */
302 
303     for (j = 1; j <= nm1; ++j) {
304 	if (fabs(w[j]) > 1.) {
305 	    cos = 1. / w[j];
306 	    sin = sqrt(1. - cos * cos);
307 	} else {
308 	    sin = w[j];
309 	    cos = sqrt(1. - sin * sin);
310 	}
311 	for (i = 1; i <= M; ++i) {
312 	    temp = cos * a[i + j * a_dim1] + sin * a[i + N * a_dim1];
313 	    a[i + N * a_dim1] = -sin * a[i + j * a_dim1] + cos * a[i + N * a_dim1];
314 	    a[i + j * a_dim1] = temp;
315 	}
316     }
317 
318 /*     last card of subroutine r1mpyq. */
319 
320 } /* r1mpyq_ */
321 
322 template<int N>
fdjac1(root_fcn * fcn_nn,void * p,real * x,const real * fvec,real * fjac,int ldfjac,int ml,int mu,real epsfcn,real * wa1,real * wa2)323 int fdjac1(root_fcn *fcn_nn, void *p, real *x, const real *fvec, real *fjac, int ldfjac, int ml,
324 	int mu, real epsfcn, real *wa1, real *wa2)
325 {
326     /* System generated locals */
327     int fjac_dim1, fjac_offset;
328 
329     /* Local variables */
330     real h;
331     int i, j, k;
332     real eps, temp;
333     int msum;
334     real epsmch;
335     int iflag = 0;
336 
337     /* Parameter adjustments */
338     --wa2;
339     --wa1;
340     --fvec;
341     --x;
342     fjac_dim1 = ldfjac;
343     fjac_offset = 1 + fjac_dim1 * 1;
344     fjac -= fjac_offset;
345 
346     /* Function Body */
347 
348 /*     epsmch is the machine precision. */
349 
350     epsmch = dpmpar(1);
351 
352     eps = sqrt((std::max(epsfcn,epsmch)));
353     msum = ml + mu + 1;
354     if (msum >= N) {
355 
356 /*        computation of dense approximate jacobian. */
357 
358         for (j = 1; j <= N; ++j) {
359             temp = x[j];
360             h = eps * fabs(temp);
361             if (h == 0.) {
362                 h = eps;
363             }
364             x[j] = temp + h;
365             /* the last parameter of fcn_nn() is set to 2 to tell calls
366                made to compute the function from calls made to compute
367                the Jacobian (see fcn() in tlmfdrv.c) */
368             iflag = fcn_nn(p, &x[1], &wa1[1], 2);
369             if (iflag < 0) {
370                 return iflag;
371             }
372             x[j] = temp;
373             for (i = 1; i <= N; ++i) {
374                 fjac[i + j * fjac_dim1] = (wa1[i] - fvec[i]) / h;
375             }
376         }
377         return 0;
378     }
379 
380 /*        computation of banded approximate jacobian. */
381 
382     for (k = 1; k <= msum; ++k) {
383 	for (j = k; msum < 0 ? j >= N : j <= N; j += msum) {
384 	    wa2[j] = x[j];
385 	    h = eps * fabs(wa2[j]);
386 	    if (h == 0.) {
387 		h = eps;
388 	    }
389 	    x[j] = wa2[j] + h;
390 	}
391 	iflag = fcn_nn(p, &x[1], &wa1[1], 1);
392 	if (iflag < 0) {
393             return iflag;
394 	}
395 	for (j = k; msum < 0 ? j >= N : j <= N; j += msum) {
396 	    x[j] = wa2[j];
397 	    h = eps * fabs(wa2[j]);
398 	    if (h == 0.) {
399 		h = eps;
400 	    }
401 	    for (i = 1; i <= N; ++i) {
402 		fjac[i + j * fjac_dim1] = 0.;
403 		if (i >= j - mu && i <= j + ml) {
404 		    fjac[i + j * fjac_dim1] = (wa1[i] - fvec[i]) / h;
405 		}
406 	    }
407 	}
408     }
409     return 0;
410 
411 } /* fdjac1_ */
412 
413 template<int N>
qrfac(real * a,int lda,int pivot,int * ipvt,int lipvt,real * rdiag,real * acnorm,real * wa)414 void qrfac(real *a, int lda, int pivot, int *ipvt, int lipvt, real *rdiag,
415 	 real *acnorm, real *wa)
416 {
417 #ifdef USE_LAPACK
418     int i, j, k;
419     double t;
420     double* tau = wa;
421     const int ltau = N;
422     int lwork = -1;
423     int info = 0;
424     double* work;
425 
426     if (pivot) {
427         assert( lipvt >= N );
428         /* set all columns free */
429         memset(ipvt, 0, sizeof(int)*N);
430     }
431 
432     /* query optimal size of work */
433     lwork = -1;
434     if (pivot) {
435         dgeqp3_(&m,&n,a,&lda,ipvt,tau,tau,&lwork,&info);
436         lwork = (int)tau[0];
437         assert( lwork >= 3*n+1  );
438     } else {
439         dgeqrf_(&m,&n,a,&lda,tau,tau,&lwork,&info);
440         lwork = (int)tau[0];
441         assert( lwork >= 1 && lwork >= n );
442     }
443 
444     assert( info == 0 );
445 
446     /* alloc work area */
447     work = (double *)malloc(sizeof(double)*lwork);
448     assert(work != NULL);
449 
450     /* set acnorm first (from the doc of qrfac, acnorm may point to the same area as rdiag) */
451     if (acnorm != rdiag) {
452         for (j = 0; j < n; ++j) {
453             acnorm[j] = enorm<N>(&a[j * lda]);
454         }
455     }
456 
457     /* QR decomposition */
458     if (pivot) {
459         dgeqp3_(&m,&n,a,&lda,ipvt,tau,work,&lwork,&info);
460     } else {
461         dgeqrf_(&m,&n,a,&lda,tau,work,&lwork,&info);
462     }
463     assert(info == 0);
464 
465     /* set rdiag, before the diagonal is replaced */
466     memset(rdiag, 0, sizeof(double)*n);
467     for(i=0 ; i<n ; ++i) {
468         rdiag[i] = a[i*lda+i];
469     }
470 
471     /* modify lower trinagular part to look like qrfac's output */
472     for(i=0 ; i<ltau ; ++i) {
473         k = i*lda+i;
474         t = tau[i];
475         a[k] = t;
476         for(j=i+1 ; j<m ; j++) {
477             k++;
478             a[k] *= t;
479         }
480     }
481 
482     free(work);
483 #else /* !USE_LAPACK */
484     /* Initialized data */
485 
486 #define p05 .05
487 
488     /* System generated locals */
489     real d1;
490 
491     /* Local variables */
492     int i, j, k, jp1;
493     real sum;
494     real temp;
495     real epsmch;
496     real ajnorm;
497 
498 /*     ********** */
499 
500 /*     subroutine qrfac */
501 
502 /*     this subroutine uses householder transformations with column */
503 /*     pivoting (optional) to compute a qr factorization of the */
504 /*     m by n matrix a. that is, qrfac determines an orthogonal */
505 /*     matrix q, a permutation matrix p, and an upper trapezoidal */
506 /*     matrix r with diagonal elements of nonincreasing magnitude, */
507 /*     such that a*p = q*r. the householder transformation for */
508 /*     column k, k = 1,2,...,min(m,n), is of the form */
509 
510 /*                           t */
511 /*           i - (1/u(k))*u*u */
512 
513 /*     where u has zeros in the first k-1 positions. the form of */
514 /*     this transformation and the method of pivoting first */
515 /*     appeared in the corresponding linpack subroutine. */
516 
517 /*     the subroutine statement is */
518 
519 /*       subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa) */
520 
521 /*     where */
522 
523 /*       m is a positive integer input variable set to the number */
524 /*         of rows of a. */
525 
526 /*       n is a positive integer input variable set to the number */
527 /*         of columns of a. */
528 
529 /*       a is an m by n array. on input a contains the matrix for */
530 /*         which the qr factorization is to be computed. on output */
531 /*         the strict upper trapezoidal part of a contains the strict */
532 /*         upper trapezoidal part of r, and the lower trapezoidal */
533 /*         part of a contains a factored form of q (the non-trivial */
534 /*         elements of the u vectors described above). */
535 
536 /*       lda is a positive integer input variable not less than m */
537 /*         which specifies the leading dimension of the array a. */
538 
539 /*       pivot is a logical input variable. if pivot is set true, */
540 /*         then column pivoting is enforced. if pivot is set false, */
541 /*         then no column pivoting is done. */
542 
543 /*       ipvt is an integer output array of length lipvt. ipvt */
544 /*         defines the permutation matrix p such that a*p = q*r. */
545 /*         column j of p is column ipvt(j) of the identity matrix. */
546 /*         if pivot is false, ipvt is not referenced. */
547 
548 /*       lipvt is a positive integer input variable. if pivot is false, */
549 /*         then lipvt may be as small as 1. if pivot is true, then */
550 /*         lipvt must be at least n. */
551 
552 /*       rdiag is an output array of length n which contains the */
553 /*         diagonal elements of r. */
554 
555 /*       acnorm is an output array of length n which contains the */
556 /*         norms of the corresponding columns of the input matrix a. */
557 /*         if this information is not needed, then acnorm can coincide */
558 /*         with rdiag. */
559 
560 /*       wa is a work array of length n. if pivot is false, then wa */
561 /*         can coincide with rdiag. */
562 
563 /*     subprograms called */
564 
565 /*       minpack-supplied ... dpmpar,enorm */
566 
567 /*       fortran-supplied ... dmax1,dsqrt,min0 */
568 
569 /*     argonne national laboratory. minpack project. march 1980. */
570 /*     burton s. garbow, kenneth e. hillstrom, jorge j. more */
571 
572 /*     ********** */
573     (void)lipvt;
574 
575 /*     epsmch is the machine precision. */
576 
577     epsmch = dpmpar(1);
578 
579 /*     compute the initial column norms and initialize several arrays. */
580 
581     for (j = 0; j < N; ++j) {
582 	acnorm[j] = enorm<N>(&a[j * lda + 0]);
583 	rdiag[j] = acnorm[j];
584 	wa[j] = rdiag[j];
585 	if (pivot) {
586 	    ipvt[j] = j+1;
587 	}
588     }
589 
590 /*     reduce a to r with householder transformations. */
591 
592     for (j = 0; j < N; ++j) {
593 	if (pivot) {
594 
595 /*        bring the column of largest norm into the pivot position. */
596 
597             int kmax = j;
598             for (k = j; k < N; ++k) {
599                 if (rdiag[k] > rdiag[kmax]) {
600                     kmax = k;
601                 }
602             }
603             if (kmax != j) {
604                 for (i = 0; i < N; ++i) {
605                     temp = a[i + j * lda];
606                     a[i + j * lda] = a[i + kmax * lda];
607                     a[i + kmax * lda] = temp;
608                 }
609                 rdiag[kmax] = rdiag[j];
610                 wa[kmax] = wa[j];
611                 k = ipvt[j];
612                 ipvt[j] = ipvt[kmax];
613                 ipvt[kmax] = k;
614             }
615         }
616 
617 /*        compute the householder transformation to reduce the */
618 /*        j-th column of a to a multiple of the j-th unit vector. */
619 
620 	ajnorm = enorm(N - (j+1) + 1, &a[j + j * lda]);
621 	if (ajnorm != 0.) {
622             if (a[j + j * lda] < 0.) {
623                 ajnorm = -ajnorm;
624             }
625             for (i = j; i < N; ++i) {
626                 a[i + j * lda] /= ajnorm;
627             }
628             a[j + j * lda] += 1.;
629 
630 /*        apply the transformation to the remaining columns */
631 /*        and update the norms. */
632 
633             jp1 = j + 1;
634             if (N > jp1) {
635                 for (k = jp1; k < N; ++k) {
636                     sum = 0.;
637                     for (i = j; i < N; ++i) {
638                         sum += a[i + j * lda] * a[i + k * lda];
639                     }
640                     temp = sum / a[j + j * lda];
641                     for (i = j; i < N; ++i) {
642                         a[i + k * lda] -= temp * a[i + j * lda];
643                     }
644                     if (pivot && rdiag[k] != 0.) {
645                         temp = a[j + k * lda] / rdiag[k];
646                         /* Computing MAX */
647                         d1 = 1. - temp * temp;
648                         rdiag[k] *= sqrt((std::max((real)0.,d1)));
649                         /* Computing 2nd power */
650                         d1 = rdiag[k] / wa[k];
651                         if (p05 * (d1 * d1) <= epsmch) {
652                             rdiag[k] = enorm(N - (j+1), &a[jp1 + k * lda]);
653                             wa[k] = rdiag[k];
654                         }
655                     }
656                 }
657             }
658         }
659 	rdiag[j] = -ajnorm;
660     }
661 
662 /*     last card of subroutine qrfac. */
663 #endif /* !USE_LAPACK */
664 } /* qrfac_ */
665 
666 template<int N>
dogleg(const real * r,int lr,const real * diag,const real * qtb,real delta,real * x,real * wa1,real * wa2)667 void dogleg(const real *r, int lr, const real *diag, const real *qtb, real delta, real *x,
668 	    real *wa1, real *wa2)
669 {
670     /* System generated locals */
671     real d1, d2, d3, d4;
672 
673     /* Local variables */
674     int i, j, k, l, jj, jp1;
675     real sum, temp, alpha, bnorm;
676     real gnorm, qnorm, epsmch;
677     real sgnorm;
678 
679     /* Parameter adjustments */
680     --wa2;
681     --wa1;
682     --x;
683     --qtb;
684     --diag;
685     --r;
686     (void)lr;
687 
688     /* Function Body */
689 
690 /*     epsmch is the machine precision. */
691 
692     epsmch = dpmpar(1);
693 
694 /*     first, calculate the gauss-newton direction. */
695 
696     jj = N * (N + 1) / 2 + 1;
697     for (k = 1; k <= N; ++k) {
698 	j = N - k + 1;
699 	jp1 = j + 1;
700 	jj -= k;
701 	l = jj + 1;
702 	sum = 0.;
703 	if (N >= jp1) {
704             for (i = jp1; i <= N; ++i) {
705                 sum += r[l] * x[i];
706                 ++l;
707             }
708         }
709 	temp = r[jj];
710 	if (temp == 0.) {
711             l = j;
712             for (i = 1; i <= j; ++i) {
713                 /* Computing MAX */
714                 d2 = fabs(r[l]);
715                 temp = std::max(temp,d2);
716                 l = l + N - i;
717             }
718             temp = epsmch * temp;
719             if (temp == 0.) {
720                 temp = epsmch;
721             }
722         }
723 	x[j] = (qtb[j] - sum) / temp;
724     }
725 
726 /*     test whether the gauss-newton direction is acceptable. */
727 
728     for (j = 1; j <= N; ++j) {
729 	wa1[j] = 0.;
730 	wa2[j] = diag[j] * x[j];
731     }
732     qnorm = enorm<N>(&wa2[1]);
733     if (qnorm <= delta) {
734         return;
735     }
736 
737 /*     the gauss-newton direction is not acceptable. */
738 /*     next, calculate the scaled gradient direction. */
739 
740     l = 1;
741     for (j = 1; j <= N; ++j) {
742 	temp = qtb[j];
743 	for (i = j; i <= N; ++i) {
744 	    wa1[i] += r[l] * temp;
745 	    ++l;
746 	}
747 	wa1[j] /= diag[j];
748     }
749 
750 /*     calculate the norm of the scaled gradient and test for */
751 /*     the special case in which the scaled gradient is zero. */
752 
753     gnorm = enorm<N>(&wa1[1]);
754     sgnorm = 0.;
755     alpha = delta / qnorm;
756     if (gnorm != 0.) {
757 
758 /*     calculate the point along the scaled gradient */
759 /*     at which the quadratic is minimized. */
760 
761         for (j = 1; j <= N; ++j) {
762             wa1[j] = wa1[j] / gnorm / diag[j];
763         }
764         l = 1;
765         for (j = 1; j <= N; ++j) {
766             sum = 0.;
767             for (i = j; i <= N; ++i) {
768                 sum += r[l] * wa1[i];
769                 ++l;
770             }
771             wa2[j] = sum;
772         }
773         temp = enorm<N>(&wa2[1]);
774         sgnorm = gnorm / temp / temp;
775 
776 /*     test whether the scaled gradient direction is acceptable. */
777 
778         alpha = 0.;
779         if (sgnorm < delta) {
780 
781 /*     the scaled gradient direction is not acceptable. */
782 /*     finally, calculate the point along the dogleg */
783 /*     at which the quadratic is minimized. */
784 
785             bnorm = enorm<N>(&qtb[1]);
786             temp = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta);
787             /* Computing 2nd power */
788             d1 = sgnorm / delta;
789             /* Computing 2nd power */
790             d2 = temp - delta / qnorm;
791             /* Computing 2nd power */
792             d3 = delta / qnorm;
793             /* Computing 2nd power */
794             d4 = sgnorm / delta;
795             temp = temp - delta / qnorm * (d1 * d1)
796                    + sqrt(d2 * d2
797                           + (1. - d3 * d3) * (1. - d4 * d4));
798             /* Computing 2nd power */
799             d1 = sgnorm / delta;
800             alpha = delta / qnorm * (1. - d1 * d1) / temp;
801         }
802     }
803 
804 /*     form appropriate convex combination of the gauss-newton */
805 /*     direction and the scaled gradient direction. */
806 
807     temp = (1. - alpha) * std::min(sgnorm,delta);
808     for (j = 1; j <= N; ++j) {
809 	x[j] = temp * wa1[j] + alpha * x[j];
810     }
811 
812 } /* dogleg_ */
813 
814 template<int N>
hybrdX(root_fcn * fcn_nn,void * p,real * x,real * fvec,real xtol,int maxfev,int ml,int mu,real epsfcn,real * diag,int mode,real factor,int nprint,int * nfev,real * fjac,int ldfjac,real * r,int lr,real * qtf,real * wa1,real * wa2,real * wa3,real * wa4)815 int hybrdX(root_fcn *fcn_nn, void *p, real *x, real *
816 	fvec, real xtol, int maxfev, int ml, int mu,
817 	real epsfcn, real *diag, int mode, real
818 	factor, int nprint, int *nfev, real *
819 	fjac, int ldfjac, real *r, int lr, real *qtf,
820 	real *wa1, real *wa2, real *wa3, real *wa4)
821 {
822     /* Initialized data */
823 
824 #define p1 .1
825 #define p5 .5
826 #define p001 .001
827 #define p0001 1e-4
828 
829     /* System generated locals */
830     int fjac_dim1, fjac_offset, i1;
831     real d1, d2;
832 
833     /* Local variables */
834     int i, j, l, jm1, iwa[1];
835     real sum;
836     int sing;
837     int iter;
838     real temp;
839     int msum, iflag;
840     real delta = 0.;
841     int jeval;
842     int ncsuc;
843     real ratio;
844     real fnorm;
845     real pnorm, xnorm = 0., fnorm1;
846     int nslow1, nslow2;
847     int ncfail;
848     real actred, epsmch, prered;
849     int info;
850 
851     /* Parameter adjustments */
852     --wa4;
853     --wa3;
854     --wa2;
855     --wa1;
856     --qtf;
857     --diag;
858     --fvec;
859     --x;
860     fjac_dim1 = ldfjac;
861     fjac_offset = 1 + fjac_dim1 * 1;
862     fjac -= fjac_offset;
863     --r;
864 
865     /* Function Body */
866 
867 /*     epsmch is the machine precision. */
868 
869     epsmch = dpmpar(1);
870 
871     info = 0;
872     iflag = 0;
873     *nfev = 0;
874 
875 /*     check the input parameters for errors. */
876 
877     if (N <= 0 || xtol < 0. || maxfev <= 0 || ml < 0 || mu < 0 ||
878 	    factor <= 0. || ldfjac < N || lr < N * (N + 1) / 2) {
879 	goto TERMINATE;
880     }
881     if (mode == 2) {
882         for (j = 1; j <= N; ++j) {
883             if (diag[j] <= 0.) {
884                 goto TERMINATE;
885             }
886         }
887     }
888 
889 /*     evaluate the function at the starting point */
890 /*     and calculate its norm. */
891 
892     iflag = fcn_nn(p, &x[1], &fvec[1], 1);
893     *nfev = 1;
894     if (iflag < 0) {
895 	goto TERMINATE;
896     }
897     fnorm = enorm2<N>(&fvec[1]);
898 
899 /*     determine the number of calls to fcn needed to compute */
900 /*     the jacobian matrix. */
901 
902 /* Computing MIN */
903     i1 = ml + mu + 1;
904     msum = std::min(i1,N);
905 
906 /*     initialize iteration counter and monitors. */
907 
908     iter = 1;
909     ncsuc = 0;
910     ncfail = 0;
911     nslow1 = 0;
912     nslow2 = 0;
913 
914 /*     beginning of the outer loop. */
915 
916     for (;;) {
917         jeval = true;
918 
919 /*        calculate the jacobian matrix. */
920 
921         iflag = fdjac1<N>(fcn_nn, p, &x[1], &fvec[1], &fjac[fjac_offset], ldfjac,
922 			  ml, mu, epsfcn, &wa1[1], &wa2[1]);
923         *nfev += msum;
924         if (iflag < 0) {
925             goto TERMINATE;
926         }
927 
928 /*        compute the qr factorization of the jacobian. */
929 
930         qrfac<N>(&fjac[fjac_offset], ldfjac, false, iwa, 1, &wa1[1], &wa2[1], &wa3[1]);
931 
932 /*        on the first iteration and if mode is 1, scale according */
933 /*        to the norms of the columns of the initial jacobian. */
934 
935         if (iter == 1) {
936             if (mode != 2) {
937                 for (j = 1; j <= N; ++j) {
938                     diag[j] = wa2[j];
939                     if (wa2[j] == 0.) {
940                         diag[j] = 1.;
941                     }
942                 }
943             }
944 
945 /*        on the first iteration, calculate the norm of the scaled x */
946 /*        and initialize the step bound delta. */
947 
948             for (j = 1; j <= N; ++j) {
949                 wa3[j] = diag[j] * x[j];
950             }
951             xnorm = enorm<N>(&wa3[1]);
952             delta = factor * xnorm;
953             if (delta == 0.) {
954                 delta = factor;
955             }
956         }
957 
958 /*        form (q transpose)*fvec and store in qtf. */
959 
960         for (i = 1; i <= N; ++i) {
961             qtf[i] = fvec[i];
962         }
963         for (j = 1; j <= N; ++j) {
964             if (fjac[j + j * fjac_dim1] != 0.) {
965                 sum = 0.;
966                 for (i = j; i <= N; ++i) {
967                     sum += fjac[i + j * fjac_dim1] * qtf[i];
968                 }
969                 temp = -sum / fjac[j + j * fjac_dim1];
970                 for (i = j; i <= N; ++i) {
971                     qtf[i] += fjac[i + j * fjac_dim1] * temp;
972                 }
973             }
974         }
975 
976 /*        copy the triangular factor of the qr factorization into r. */
977 
978         sing = false;
979         for (j = 1; j <= N; ++j) {
980             l = j;
981             jm1 = j - 1;
982             if (jm1 >= 1) {
983                 for (i = 1; i <= jm1; ++i) {
984                     r[l] = fjac[i + j * fjac_dim1];
985                     l = l + N - i;
986                 }
987             }
988             r[l] = wa1[j];
989             if (wa1[j] == 0.) {
990                 sing = true;
991             }
992         }
993 
994 /*        accumulate the orthogonal factor in fjac. */
995 
996         qform<N>(&fjac[fjac_offset], ldfjac, &wa1[1]);
997 
998 /*        rescale if necessary. */
999 
1000         if (mode != 2) {
1001             for (j = 1; j <= N; ++j) {
1002                 /* Computing MAX */
1003                 d1 = diag[j], d2 = wa2[j];
1004                 diag[j] = std::max(d1,d2);
1005             }
1006         }
1007 
1008 /*        beginning of the inner loop. */
1009 
1010         for (;;) {
1011 
1012 /*           if requested, call fcn to enable printing of iterates. */
1013 
1014             if (nprint > 0) {
1015                 iflag = 0;
1016                 if ((iter - 1) % nprint == 0) {
1017                     iflag = fcn_nn(p, &x[1], &fvec[1], 0);
1018                 }
1019                 if (iflag < 0) {
1020                     goto TERMINATE;
1021                 }
1022             }
1023 
1024 /*           determine the direction p. */
1025 
1026             dogleg<N>(&r[1], lr, &diag[1], &qtf[1], delta, &wa1[1], &wa2[1], &wa3[1]);
1027 
1028 /*           store the direction p and x + p. calculate the norm of p. */
1029 
1030             for (j = 1; j <= N; ++j) {
1031                 wa1[j] = -wa1[j];
1032                 wa2[j] = x[j] + wa1[j];
1033                 wa3[j] = diag[j] * wa1[j];
1034             }
1035             pnorm = enorm<N>(&wa3[1]);
1036 
1037 /*           on the first iteration, adjust the initial step bound. */
1038 
1039             if (iter == 1) {
1040                 delta = std::min(delta,pnorm);
1041             }
1042 
1043 /*           evaluate the function at x + p and calculate its norm. */
1044 
1045             iflag = fcn_nn(p, &wa2[1], &wa4[1], 1);
1046             ++(*nfev);
1047             if (iflag < 0) {
1048                 goto TERMINATE;
1049             }
1050             fnorm1 = enorm2<N>(&wa4[1]);
1051 
1052 /*           compute the scaled actual reduction. */
1053 
1054             actred = -1.;
1055             if (fnorm1 < fnorm) {
1056                 /* already 2nd power */
1057                 actred = 1. - fnorm1 / fnorm;
1058             }
1059 
1060 /*           compute the scaled predicted reduction. */
1061 
1062             l = 1;
1063             for (i = 1; i <= N; ++i) {
1064                 sum = 0.;
1065                 for (j = i; j <= N; ++j) {
1066                     sum += r[l] * wa1[j];
1067                     ++l;
1068                 }
1069                 wa3[i] = qtf[i] + sum;
1070             }
1071             temp = enorm2<N>(&wa3[1]);
1072             prered = 0.;
1073             if (temp < fnorm) {
1074                 /* already 2nd power */
1075                 prered = 1. - temp / fnorm;
1076             }
1077 
1078 /*           compute the ratio of the actual to the predicted */
1079 /*           reduction. */
1080 
1081             ratio = 0.;
1082             if (prered > 0.) {
1083                 ratio = actred / prered;
1084             }
1085 
1086 /*           update the step bound. */
1087 
1088             if (ratio < p1) {
1089                 ncsuc = 0;
1090                 ++ncfail;
1091                 delta = p5 * delta;
1092             } else {
1093                 ncfail = 0;
1094                 ++ncsuc;
1095                 if (ratio >= p5 || ncsuc > 1) {
1096                     /* Computing MAX */
1097                     d1 = pnorm / p5;
1098                     delta = std::max(delta,d1);
1099                 }
1100                 if (fabs(ratio - 1.) <= p1) {
1101                     delta = pnorm / p5;
1102                 }
1103             }
1104 
1105 /*           test for successful iteration. */
1106 
1107             if (ratio >= p0001) {
1108 
1109 /*           successful iteration. update x, fvec, and their norms. */
1110 
1111                 for (j = 1; j <= N; ++j) {
1112                     x[j] = wa2[j];
1113                     wa2[j] = diag[j] * x[j];
1114                     fvec[j] = wa4[j];
1115                 }
1116                 xnorm = enorm<N>(&wa2[1]);
1117                 fnorm = fnorm1;
1118                 ++iter;
1119             }
1120 
1121 /*           determine the progress of the iteration. */
1122 
1123             ++nslow1;
1124             if (actred >= p001) {
1125                 nslow1 = 0;
1126             }
1127             if (jeval) {
1128                 ++nslow2;
1129             }
1130             if (actred >= p1) {
1131                 nslow2 = 0;
1132             }
1133 
1134 /*           test for convergence. */
1135 
1136             if (delta <= xtol * xnorm || fnorm == 0.) {
1137                 info = 1;
1138             }
1139             if (info != 0) {
1140                 goto TERMINATE;
1141             }
1142 
1143 /*           tests for termination and stringent tolerances. */
1144 
1145             if (*nfev >= maxfev) {
1146                 info = 2;
1147             }
1148             /* Computing MAX */
1149             d1 = p1 * delta;
1150             if (p1 * std::max(d1,pnorm) <= epsmch * xnorm) {
1151                 info = 3;
1152             }
1153             if (nslow2 == 5) {
1154                 info = 4;
1155             }
1156             if (nslow1 == 10) {
1157                 info = 5;
1158             }
1159             if (info != 0) {
1160                 goto TERMINATE;
1161             }
1162 
1163 /*           criterion for recalculating jacobian approximation */
1164 /*           by forward differences. */
1165 
1166             if (ncfail == 2) {
1167                 goto TERMINATE_INNER_LOOP;
1168             }
1169 
1170 /*           calculate the rank one modification to the jacobian */
1171 /*           and update qtf if necessary. */
1172 
1173             for (j = 1; j <= N; ++j) {
1174                 sum = 0.;
1175                 for (i = 1; i <= N; ++i) {
1176                     sum += fjac[i + j * fjac_dim1] * wa4[i];
1177                 }
1178                 wa2[j] = (sum - wa3[j]) / pnorm;
1179                 wa1[j] = diag[j] * (diag[j] * wa1[j] / pnorm);
1180                 if (ratio >= p0001) {
1181                     qtf[j] = sum;
1182                 }
1183             }
1184 
1185 /*           compute the qr factorization of the updated jacobian. */
1186 
1187             r1updt<N>(&r[1], lr, &wa1[1], &wa2[1], &wa3[1], &sing);
1188             r1mpyq<N, N>(&fjac[fjac_offset], ldfjac, &wa2[1], &wa3[1]);
1189             r1mpyq<1, N>(&qtf[1], 1, &wa2[1], &wa3[1]);
1190 
1191 /*           end of the inner loop. */
1192 
1193             jeval = false;
1194         }
1195 TERMINATE_INNER_LOOP:
1196         ;
1197 /*        end of the outer loop. */
1198 
1199     }
1200 TERMINATE:
1201 
1202 /*     termination, either normal or user imposed. */
1203 
1204     if (iflag < 0) {
1205 	info = iflag;
1206     }
1207     if (nprint > 0) {
1208 	fcn_nn(p, &x[1], &fvec[1], 0);
1209     }
1210     return info;
1211 
1212 /*     last card of subroutine hybrd. */
1213 
1214 } /* hybrd_ */
1215