1 #if 0
2 #define real_EPSILON FLT_EPSILON
3 #define real_MIN FLT_MIN
4 #define real_MAX FLT_MAX
5 #else
6 #define real_EPSILON DBL_EPSILON
7 #define real_MIN DBL_MIN
8 #define real_MAX DBL_MAX
9 #endif
10
dpmpar(int i)11 real inline dpmpar(int i)
12 {
13 switch(i) {
14 case 1:
15 return real_EPSILON;
16 case 2:
17 return real_MIN;
18 default:
19 return real_MAX;
20 }
21 }
22
23 template<int N>
enorm(const real * fvec)24 real inline enorm(const real *fvec) {
25 real s = 0;
26 for (int i = 0; i < N; i++) {
27 s += fvec[i] * fvec[i];
28 }
29 return sqrt(s);
30 }
31
enorm(int n,const real * fvec)32 real inline enorm(int n, const real *fvec) {
33 real s = 0;
34 for (int i = 0; i < n; i++) {
35 s += fvec[i] * fvec[i];
36 }
37 return sqrt(s);
38 }
39
40 template<int N>
enorm2(const real * fvec)41 real enorm2(const real *fvec) {
42 real s = 0;
43 for (int i = 0; i < N; i++) {
44 s += fvec[i] * fvec[i];
45 }
46 return s;
47 }
48
49 template<int N>
qform(real * q,int ldq,real * wa)50 void qform(real *q, int ldq, real *wa)
51 {
52 /* System generated locals */
53 int q_dim1, q_offset;
54
55 /* Local variables */
56 int i, j, k, l;
57 real sum, temp;
58
59 /* Parameter adjustments */
60 --wa;
61 q_dim1 = ldq;
62 q_offset = 1 + q_dim1 * 1;
63 q -= q_offset;
64
65 /* Function Body */
66
67 /* zero out upper triangle of q in the first min(m,n) columns. */
68
69 if (N >= 2) {
70 for (j = 2; j <= N; ++j) {
71 for (i = 1; i <= j-1; ++i) {
72 q[i + j * q_dim1] = 0.;
73 }
74 }
75 }
76
77 /* accumulate q from its factored form. */
78
79 for (l = 1; l <= N; ++l) {
80 k = N - l + 1;
81 for (i = k; i <= N; ++i) {
82 wa[i] = q[i + k * q_dim1];
83 q[i + k * q_dim1] = 0.;
84 }
85 q[k + k * q_dim1] = 1.;
86 if (wa[k] != 0.) {
87 for (j = k; j <= N; ++j) {
88 sum = 0.;
89 for (i = k; i <= N; ++i) {
90 sum += q[i + j * q_dim1] * wa[i];
91 }
92 temp = sum / wa[k];
93 for (i = k; i <= N; ++i) {
94 q[i + j * q_dim1] -= temp * wa[i];
95 }
96 }
97 }
98 }
99 } /* qform_ */
100
101 template<int N>
r1updt(real * s,int ls,const real * u,real * v,real * w,int * sing)102 void r1updt(real *s, int ls, const real *u, real *v, real *w, int *sing)
103 {
104 /* Initialized data */
105
106 #define p5 .5
107 #define p25 .25
108
109 /* Local variables */
110 int i, j, l, jj, nm1;
111 real tan;
112 int nmj;
113 real cos, sin, tau, temp, giant, cotan;
114
115 /* Parameter adjustments */
116 --w;
117 --u;
118 --v;
119 --s;
120 (void)ls;
121
122 /* Function Body */
123
124 /* giant is the largest magnitude. */
125
126 giant = dpmpar(3);
127
128 /* initialize the diagonal element pointer. */
129
130 jj = N * ((N << 1) - N + 1) / 2 - (N - N);
131
132 /* move the nontrivial part of the last column of s into w. */
133
134 l = jj;
135 for (i = N; i <= N; ++i) {
136 w[i] = s[l];
137 ++l;
138 }
139
140 /* rotate the vector v into a multiple of the n-th unit vector */
141 /* in such a way that a spike is introduced into w. */
142
143 nm1 = N - 1;
144 if (nm1 >= 1) {
145 for (nmj = 1; nmj <= nm1; ++nmj) {
146 j = N - nmj;
147 jj -= N - j + 1;
148 w[j] = 0.;
149 if (v[j] != 0.) {
150
151 /* determine a givens rotation which eliminates the */
152 /* j-th element of v. */
153
154 if (fabs(v[N]) < fabs(v[j])) {
155 cotan = v[N] / v[j];
156 sin = p5 / sqrt(p25 + p25 * (cotan * cotan));
157 cos = sin * cotan;
158 tau = 1.;
159 if (fabs(cos) * giant > 1.) {
160 tau = 1. / cos;
161 }
162 } else {
163 tan = v[j] / v[N];
164 cos = p5 / sqrt(p25 + p25 * (tan * tan));
165 sin = cos * tan;
166 tau = sin;
167 }
168
169 /* apply the transformation to v and store the information */
170 /* necessary to recover the givens rotation. */
171
172 v[N] = sin * v[j] + cos * v[N];
173 v[j] = tau;
174
175 /* apply the transformation to s and extend the spike in w. */
176
177 l = jj;
178 for (i = j; i <= N; ++i) {
179 temp = cos * s[l] - sin * w[i];
180 w[i] = sin * s[l] + cos * w[i];
181 s[l] = temp;
182 ++l;
183 }
184 }
185 }
186 }
187
188 /* add the spike from the rank 1 update to w. */
189
190 for (i = 1; i <= N; ++i) {
191 w[i] += v[N] * u[i];
192 }
193
194 /* eliminate the spike. */
195
196 *sing = false;
197 if (nm1 >= 1) {
198 for (j = 1; j <= nm1; ++j) {
199 if (w[j] != 0.) {
200
201 /* determine a givens rotation which eliminates the */
202 /* j-th element of the spike. */
203
204 if (fabs(s[jj]) < fabs(w[j])) {
205 cotan = s[jj] / w[j];
206 sin = p5 / sqrt(p25 + p25 * (cotan * cotan));
207 cos = sin * cotan;
208 tau = 1.;
209 if (fabs(cos) * giant > 1.) {
210 tau = 1. / cos;
211 }
212 } else {
213 tan = w[j] / s[jj];
214 cos = p5 / sqrt(p25 + p25 * (tan * tan));
215 sin = cos * tan;
216 tau = sin;
217 }
218
219 /* apply the transformation to s and reduce the spike in w. */
220
221 l = jj;
222 for (i = j; i <= N; ++i) {
223 temp = cos * s[l] + sin * w[i];
224 w[i] = -sin * s[l] + cos * w[i];
225 s[l] = temp;
226 ++l;
227 }
228
229 /* store the information necessary to recover the */
230 /* givens rotation. */
231
232 w[j] = tau;
233 }
234
235 /* test for zero diagonal elements in the output s. */
236
237 if (s[jj] == 0.) {
238 *sing = true;
239 }
240 jj += N - j + 1;
241 }
242 }
243
244 /* move w back into the last column of the output s. */
245
246 l = jj;
247 for (i = N; i <= N; ++i) {
248 s[l] = w[i];
249 ++l;
250 }
251 if (s[jj] == 0.) {
252 *sing = true;
253 }
254
255 /* last card of subroutine r1updt. */
256
257 } /* __minpack_func__(r1updt) */
258
259 template<int M, int N>
r1mpyq(real * a,int lda,const real * v,const real * w)260 void r1mpyq(real *a, int lda, const real *v, const real *w)
261 {
262 /* System generated locals */
263 int a_dim1, a_offset;
264
265 /* Local variables */
266 int i, j, nm1, nmj;
267 real cos, sin, temp;
268
269 /* Parameter adjustments */
270 --w;
271 --v;
272 a_dim1 = lda;
273 a_offset = 1 + a_dim1 * 1;
274 a -= a_offset;
275
276 /* Function Body */
277
278 /* apply the first set of givens rotations to a. */
279
280 nm1 = N - 1;
281 if (nm1 < 1) {
282 return;
283 }
284 for (nmj = 1; nmj <= nm1; ++nmj) {
285 j = N - nmj;
286 if (fabs(v[j]) > 1.) {
287 cos = 1. / v[j];
288 sin = sqrt(1. - cos * cos);
289 } else {
290 sin = v[j];
291 cos = sqrt(1. - sin * sin);
292 }
293 for (i = 1; i <= M; ++i) {
294 temp = cos * a[i + j * a_dim1] - sin * a[i + N * a_dim1];
295 a[i + N * a_dim1] = sin * a[i + j * a_dim1] + cos * a[
296 i + N * a_dim1];
297 a[i + j * a_dim1] = temp;
298 }
299 }
300
301 /* apply the second set of givens rotations to a. */
302
303 for (j = 1; j <= nm1; ++j) {
304 if (fabs(w[j]) > 1.) {
305 cos = 1. / w[j];
306 sin = sqrt(1. - cos * cos);
307 } else {
308 sin = w[j];
309 cos = sqrt(1. - sin * sin);
310 }
311 for (i = 1; i <= M; ++i) {
312 temp = cos * a[i + j * a_dim1] + sin * a[i + N * a_dim1];
313 a[i + N * a_dim1] = -sin * a[i + j * a_dim1] + cos * a[i + N * a_dim1];
314 a[i + j * a_dim1] = temp;
315 }
316 }
317
318 /* last card of subroutine r1mpyq. */
319
320 } /* r1mpyq_ */
321
322 template<int N>
fdjac1(root_fcn * fcn_nn,void * p,real * x,const real * fvec,real * fjac,int ldfjac,int ml,int mu,real epsfcn,real * wa1,real * wa2)323 int fdjac1(root_fcn *fcn_nn, void *p, real *x, const real *fvec, real *fjac, int ldfjac, int ml,
324 int mu, real epsfcn, real *wa1, real *wa2)
325 {
326 /* System generated locals */
327 int fjac_dim1, fjac_offset;
328
329 /* Local variables */
330 real h;
331 int i, j, k;
332 real eps, temp;
333 int msum;
334 real epsmch;
335 int iflag = 0;
336
337 /* Parameter adjustments */
338 --wa2;
339 --wa1;
340 --fvec;
341 --x;
342 fjac_dim1 = ldfjac;
343 fjac_offset = 1 + fjac_dim1 * 1;
344 fjac -= fjac_offset;
345
346 /* Function Body */
347
348 /* epsmch is the machine precision. */
349
350 epsmch = dpmpar(1);
351
352 eps = sqrt((std::max(epsfcn,epsmch)));
353 msum = ml + mu + 1;
354 if (msum >= N) {
355
356 /* computation of dense approximate jacobian. */
357
358 for (j = 1; j <= N; ++j) {
359 temp = x[j];
360 h = eps * fabs(temp);
361 if (h == 0.) {
362 h = eps;
363 }
364 x[j] = temp + h;
365 /* the last parameter of fcn_nn() is set to 2 to tell calls
366 made to compute the function from calls made to compute
367 the Jacobian (see fcn() in tlmfdrv.c) */
368 iflag = fcn_nn(p, &x[1], &wa1[1], 2);
369 if (iflag < 0) {
370 return iflag;
371 }
372 x[j] = temp;
373 for (i = 1; i <= N; ++i) {
374 fjac[i + j * fjac_dim1] = (wa1[i] - fvec[i]) / h;
375 }
376 }
377 return 0;
378 }
379
380 /* computation of banded approximate jacobian. */
381
382 for (k = 1; k <= msum; ++k) {
383 for (j = k; msum < 0 ? j >= N : j <= N; j += msum) {
384 wa2[j] = x[j];
385 h = eps * fabs(wa2[j]);
386 if (h == 0.) {
387 h = eps;
388 }
389 x[j] = wa2[j] + h;
390 }
391 iflag = fcn_nn(p, &x[1], &wa1[1], 1);
392 if (iflag < 0) {
393 return iflag;
394 }
395 for (j = k; msum < 0 ? j >= N : j <= N; j += msum) {
396 x[j] = wa2[j];
397 h = eps * fabs(wa2[j]);
398 if (h == 0.) {
399 h = eps;
400 }
401 for (i = 1; i <= N; ++i) {
402 fjac[i + j * fjac_dim1] = 0.;
403 if (i >= j - mu && i <= j + ml) {
404 fjac[i + j * fjac_dim1] = (wa1[i] - fvec[i]) / h;
405 }
406 }
407 }
408 }
409 return 0;
410
411 } /* fdjac1_ */
412
413 template<int N>
qrfac(real * a,int lda,int pivot,int * ipvt,int lipvt,real * rdiag,real * acnorm,real * wa)414 void qrfac(real *a, int lda, int pivot, int *ipvt, int lipvt, real *rdiag,
415 real *acnorm, real *wa)
416 {
417 #ifdef USE_LAPACK
418 int i, j, k;
419 double t;
420 double* tau = wa;
421 const int ltau = N;
422 int lwork = -1;
423 int info = 0;
424 double* work;
425
426 if (pivot) {
427 assert( lipvt >= N );
428 /* set all columns free */
429 memset(ipvt, 0, sizeof(int)*N);
430 }
431
432 /* query optimal size of work */
433 lwork = -1;
434 if (pivot) {
435 dgeqp3_(&m,&n,a,&lda,ipvt,tau,tau,&lwork,&info);
436 lwork = (int)tau[0];
437 assert( lwork >= 3*n+1 );
438 } else {
439 dgeqrf_(&m,&n,a,&lda,tau,tau,&lwork,&info);
440 lwork = (int)tau[0];
441 assert( lwork >= 1 && lwork >= n );
442 }
443
444 assert( info == 0 );
445
446 /* alloc work area */
447 work = (double *)malloc(sizeof(double)*lwork);
448 assert(work != NULL);
449
450 /* set acnorm first (from the doc of qrfac, acnorm may point to the same area as rdiag) */
451 if (acnorm != rdiag) {
452 for (j = 0; j < n; ++j) {
453 acnorm[j] = enorm<N>(&a[j * lda]);
454 }
455 }
456
457 /* QR decomposition */
458 if (pivot) {
459 dgeqp3_(&m,&n,a,&lda,ipvt,tau,work,&lwork,&info);
460 } else {
461 dgeqrf_(&m,&n,a,&lda,tau,work,&lwork,&info);
462 }
463 assert(info == 0);
464
465 /* set rdiag, before the diagonal is replaced */
466 memset(rdiag, 0, sizeof(double)*n);
467 for(i=0 ; i<n ; ++i) {
468 rdiag[i] = a[i*lda+i];
469 }
470
471 /* modify lower trinagular part to look like qrfac's output */
472 for(i=0 ; i<ltau ; ++i) {
473 k = i*lda+i;
474 t = tau[i];
475 a[k] = t;
476 for(j=i+1 ; j<m ; j++) {
477 k++;
478 a[k] *= t;
479 }
480 }
481
482 free(work);
483 #else /* !USE_LAPACK */
484 /* Initialized data */
485
486 #define p05 .05
487
488 /* System generated locals */
489 real d1;
490
491 /* Local variables */
492 int i, j, k, jp1;
493 real sum;
494 real temp;
495 real epsmch;
496 real ajnorm;
497
498 /* ********** */
499
500 /* subroutine qrfac */
501
502 /* this subroutine uses householder transformations with column */
503 /* pivoting (optional) to compute a qr factorization of the */
504 /* m by n matrix a. that is, qrfac determines an orthogonal */
505 /* matrix q, a permutation matrix p, and an upper trapezoidal */
506 /* matrix r with diagonal elements of nonincreasing magnitude, */
507 /* such that a*p = q*r. the householder transformation for */
508 /* column k, k = 1,2,...,min(m,n), is of the form */
509
510 /* t */
511 /* i - (1/u(k))*u*u */
512
513 /* where u has zeros in the first k-1 positions. the form of */
514 /* this transformation and the method of pivoting first */
515 /* appeared in the corresponding linpack subroutine. */
516
517 /* the subroutine statement is */
518
519 /* subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa) */
520
521 /* where */
522
523 /* m is a positive integer input variable set to the number */
524 /* of rows of a. */
525
526 /* n is a positive integer input variable set to the number */
527 /* of columns of a. */
528
529 /* a is an m by n array. on input a contains the matrix for */
530 /* which the qr factorization is to be computed. on output */
531 /* the strict upper trapezoidal part of a contains the strict */
532 /* upper trapezoidal part of r, and the lower trapezoidal */
533 /* part of a contains a factored form of q (the non-trivial */
534 /* elements of the u vectors described above). */
535
536 /* lda is a positive integer input variable not less than m */
537 /* which specifies the leading dimension of the array a. */
538
539 /* pivot is a logical input variable. if pivot is set true, */
540 /* then column pivoting is enforced. if pivot is set false, */
541 /* then no column pivoting is done. */
542
543 /* ipvt is an integer output array of length lipvt. ipvt */
544 /* defines the permutation matrix p such that a*p = q*r. */
545 /* column j of p is column ipvt(j) of the identity matrix. */
546 /* if pivot is false, ipvt is not referenced. */
547
548 /* lipvt is a positive integer input variable. if pivot is false, */
549 /* then lipvt may be as small as 1. if pivot is true, then */
550 /* lipvt must be at least n. */
551
552 /* rdiag is an output array of length n which contains the */
553 /* diagonal elements of r. */
554
555 /* acnorm is an output array of length n which contains the */
556 /* norms of the corresponding columns of the input matrix a. */
557 /* if this information is not needed, then acnorm can coincide */
558 /* with rdiag. */
559
560 /* wa is a work array of length n. if pivot is false, then wa */
561 /* can coincide with rdiag. */
562
563 /* subprograms called */
564
565 /* minpack-supplied ... dpmpar,enorm */
566
567 /* fortran-supplied ... dmax1,dsqrt,min0 */
568
569 /* argonne national laboratory. minpack project. march 1980. */
570 /* burton s. garbow, kenneth e. hillstrom, jorge j. more */
571
572 /* ********** */
573 (void)lipvt;
574
575 /* epsmch is the machine precision. */
576
577 epsmch = dpmpar(1);
578
579 /* compute the initial column norms and initialize several arrays. */
580
581 for (j = 0; j < N; ++j) {
582 acnorm[j] = enorm<N>(&a[j * lda + 0]);
583 rdiag[j] = acnorm[j];
584 wa[j] = rdiag[j];
585 if (pivot) {
586 ipvt[j] = j+1;
587 }
588 }
589
590 /* reduce a to r with householder transformations. */
591
592 for (j = 0; j < N; ++j) {
593 if (pivot) {
594
595 /* bring the column of largest norm into the pivot position. */
596
597 int kmax = j;
598 for (k = j; k < N; ++k) {
599 if (rdiag[k] > rdiag[kmax]) {
600 kmax = k;
601 }
602 }
603 if (kmax != j) {
604 for (i = 0; i < N; ++i) {
605 temp = a[i + j * lda];
606 a[i + j * lda] = a[i + kmax * lda];
607 a[i + kmax * lda] = temp;
608 }
609 rdiag[kmax] = rdiag[j];
610 wa[kmax] = wa[j];
611 k = ipvt[j];
612 ipvt[j] = ipvt[kmax];
613 ipvt[kmax] = k;
614 }
615 }
616
617 /* compute the householder transformation to reduce the */
618 /* j-th column of a to a multiple of the j-th unit vector. */
619
620 ajnorm = enorm(N - (j+1) + 1, &a[j + j * lda]);
621 if (ajnorm != 0.) {
622 if (a[j + j * lda] < 0.) {
623 ajnorm = -ajnorm;
624 }
625 for (i = j; i < N; ++i) {
626 a[i + j * lda] /= ajnorm;
627 }
628 a[j + j * lda] += 1.;
629
630 /* apply the transformation to the remaining columns */
631 /* and update the norms. */
632
633 jp1 = j + 1;
634 if (N > jp1) {
635 for (k = jp1; k < N; ++k) {
636 sum = 0.;
637 for (i = j; i < N; ++i) {
638 sum += a[i + j * lda] * a[i + k * lda];
639 }
640 temp = sum / a[j + j * lda];
641 for (i = j; i < N; ++i) {
642 a[i + k * lda] -= temp * a[i + j * lda];
643 }
644 if (pivot && rdiag[k] != 0.) {
645 temp = a[j + k * lda] / rdiag[k];
646 /* Computing MAX */
647 d1 = 1. - temp * temp;
648 rdiag[k] *= sqrt((std::max((real)0.,d1)));
649 /* Computing 2nd power */
650 d1 = rdiag[k] / wa[k];
651 if (p05 * (d1 * d1) <= epsmch) {
652 rdiag[k] = enorm(N - (j+1), &a[jp1 + k * lda]);
653 wa[k] = rdiag[k];
654 }
655 }
656 }
657 }
658 }
659 rdiag[j] = -ajnorm;
660 }
661
662 /* last card of subroutine qrfac. */
663 #endif /* !USE_LAPACK */
664 } /* qrfac_ */
665
666 template<int N>
dogleg(const real * r,int lr,const real * diag,const real * qtb,real delta,real * x,real * wa1,real * wa2)667 void dogleg(const real *r, int lr, const real *diag, const real *qtb, real delta, real *x,
668 real *wa1, real *wa2)
669 {
670 /* System generated locals */
671 real d1, d2, d3, d4;
672
673 /* Local variables */
674 int i, j, k, l, jj, jp1;
675 real sum, temp, alpha, bnorm;
676 real gnorm, qnorm, epsmch;
677 real sgnorm;
678
679 /* Parameter adjustments */
680 --wa2;
681 --wa1;
682 --x;
683 --qtb;
684 --diag;
685 --r;
686 (void)lr;
687
688 /* Function Body */
689
690 /* epsmch is the machine precision. */
691
692 epsmch = dpmpar(1);
693
694 /* first, calculate the gauss-newton direction. */
695
696 jj = N * (N + 1) / 2 + 1;
697 for (k = 1; k <= N; ++k) {
698 j = N - k + 1;
699 jp1 = j + 1;
700 jj -= k;
701 l = jj + 1;
702 sum = 0.;
703 if (N >= jp1) {
704 for (i = jp1; i <= N; ++i) {
705 sum += r[l] * x[i];
706 ++l;
707 }
708 }
709 temp = r[jj];
710 if (temp == 0.) {
711 l = j;
712 for (i = 1; i <= j; ++i) {
713 /* Computing MAX */
714 d2 = fabs(r[l]);
715 temp = std::max(temp,d2);
716 l = l + N - i;
717 }
718 temp = epsmch * temp;
719 if (temp == 0.) {
720 temp = epsmch;
721 }
722 }
723 x[j] = (qtb[j] - sum) / temp;
724 }
725
726 /* test whether the gauss-newton direction is acceptable. */
727
728 for (j = 1; j <= N; ++j) {
729 wa1[j] = 0.;
730 wa2[j] = diag[j] * x[j];
731 }
732 qnorm = enorm<N>(&wa2[1]);
733 if (qnorm <= delta) {
734 return;
735 }
736
737 /* the gauss-newton direction is not acceptable. */
738 /* next, calculate the scaled gradient direction. */
739
740 l = 1;
741 for (j = 1; j <= N; ++j) {
742 temp = qtb[j];
743 for (i = j; i <= N; ++i) {
744 wa1[i] += r[l] * temp;
745 ++l;
746 }
747 wa1[j] /= diag[j];
748 }
749
750 /* calculate the norm of the scaled gradient and test for */
751 /* the special case in which the scaled gradient is zero. */
752
753 gnorm = enorm<N>(&wa1[1]);
754 sgnorm = 0.;
755 alpha = delta / qnorm;
756 if (gnorm != 0.) {
757
758 /* calculate the point along the scaled gradient */
759 /* at which the quadratic is minimized. */
760
761 for (j = 1; j <= N; ++j) {
762 wa1[j] = wa1[j] / gnorm / diag[j];
763 }
764 l = 1;
765 for (j = 1; j <= N; ++j) {
766 sum = 0.;
767 for (i = j; i <= N; ++i) {
768 sum += r[l] * wa1[i];
769 ++l;
770 }
771 wa2[j] = sum;
772 }
773 temp = enorm<N>(&wa2[1]);
774 sgnorm = gnorm / temp / temp;
775
776 /* test whether the scaled gradient direction is acceptable. */
777
778 alpha = 0.;
779 if (sgnorm < delta) {
780
781 /* the scaled gradient direction is not acceptable. */
782 /* finally, calculate the point along the dogleg */
783 /* at which the quadratic is minimized. */
784
785 bnorm = enorm<N>(&qtb[1]);
786 temp = bnorm / gnorm * (bnorm / qnorm) * (sgnorm / delta);
787 /* Computing 2nd power */
788 d1 = sgnorm / delta;
789 /* Computing 2nd power */
790 d2 = temp - delta / qnorm;
791 /* Computing 2nd power */
792 d3 = delta / qnorm;
793 /* Computing 2nd power */
794 d4 = sgnorm / delta;
795 temp = temp - delta / qnorm * (d1 * d1)
796 + sqrt(d2 * d2
797 + (1. - d3 * d3) * (1. - d4 * d4));
798 /* Computing 2nd power */
799 d1 = sgnorm / delta;
800 alpha = delta / qnorm * (1. - d1 * d1) / temp;
801 }
802 }
803
804 /* form appropriate convex combination of the gauss-newton */
805 /* direction and the scaled gradient direction. */
806
807 temp = (1. - alpha) * std::min(sgnorm,delta);
808 for (j = 1; j <= N; ++j) {
809 x[j] = temp * wa1[j] + alpha * x[j];
810 }
811
812 } /* dogleg_ */
813
814 template<int N>
hybrdX(root_fcn * fcn_nn,void * p,real * x,real * fvec,real xtol,int maxfev,int ml,int mu,real epsfcn,real * diag,int mode,real factor,int nprint,int * nfev,real * fjac,int ldfjac,real * r,int lr,real * qtf,real * wa1,real * wa2,real * wa3,real * wa4)815 int hybrdX(root_fcn *fcn_nn, void *p, real *x, real *
816 fvec, real xtol, int maxfev, int ml, int mu,
817 real epsfcn, real *diag, int mode, real
818 factor, int nprint, int *nfev, real *
819 fjac, int ldfjac, real *r, int lr, real *qtf,
820 real *wa1, real *wa2, real *wa3, real *wa4)
821 {
822 /* Initialized data */
823
824 #define p1 .1
825 #define p5 .5
826 #define p001 .001
827 #define p0001 1e-4
828
829 /* System generated locals */
830 int fjac_dim1, fjac_offset, i1;
831 real d1, d2;
832
833 /* Local variables */
834 int i, j, l, jm1, iwa[1];
835 real sum;
836 int sing;
837 int iter;
838 real temp;
839 int msum, iflag;
840 real delta = 0.;
841 int jeval;
842 int ncsuc;
843 real ratio;
844 real fnorm;
845 real pnorm, xnorm = 0., fnorm1;
846 int nslow1, nslow2;
847 int ncfail;
848 real actred, epsmch, prered;
849 int info;
850
851 /* Parameter adjustments */
852 --wa4;
853 --wa3;
854 --wa2;
855 --wa1;
856 --qtf;
857 --diag;
858 --fvec;
859 --x;
860 fjac_dim1 = ldfjac;
861 fjac_offset = 1 + fjac_dim1 * 1;
862 fjac -= fjac_offset;
863 --r;
864
865 /* Function Body */
866
867 /* epsmch is the machine precision. */
868
869 epsmch = dpmpar(1);
870
871 info = 0;
872 iflag = 0;
873 *nfev = 0;
874
875 /* check the input parameters for errors. */
876
877 if (N <= 0 || xtol < 0. || maxfev <= 0 || ml < 0 || mu < 0 ||
878 factor <= 0. || ldfjac < N || lr < N * (N + 1) / 2) {
879 goto TERMINATE;
880 }
881 if (mode == 2) {
882 for (j = 1; j <= N; ++j) {
883 if (diag[j] <= 0.) {
884 goto TERMINATE;
885 }
886 }
887 }
888
889 /* evaluate the function at the starting point */
890 /* and calculate its norm. */
891
892 iflag = fcn_nn(p, &x[1], &fvec[1], 1);
893 *nfev = 1;
894 if (iflag < 0) {
895 goto TERMINATE;
896 }
897 fnorm = enorm2<N>(&fvec[1]);
898
899 /* determine the number of calls to fcn needed to compute */
900 /* the jacobian matrix. */
901
902 /* Computing MIN */
903 i1 = ml + mu + 1;
904 msum = std::min(i1,N);
905
906 /* initialize iteration counter and monitors. */
907
908 iter = 1;
909 ncsuc = 0;
910 ncfail = 0;
911 nslow1 = 0;
912 nslow2 = 0;
913
914 /* beginning of the outer loop. */
915
916 for (;;) {
917 jeval = true;
918
919 /* calculate the jacobian matrix. */
920
921 iflag = fdjac1<N>(fcn_nn, p, &x[1], &fvec[1], &fjac[fjac_offset], ldfjac,
922 ml, mu, epsfcn, &wa1[1], &wa2[1]);
923 *nfev += msum;
924 if (iflag < 0) {
925 goto TERMINATE;
926 }
927
928 /* compute the qr factorization of the jacobian. */
929
930 qrfac<N>(&fjac[fjac_offset], ldfjac, false, iwa, 1, &wa1[1], &wa2[1], &wa3[1]);
931
932 /* on the first iteration and if mode is 1, scale according */
933 /* to the norms of the columns of the initial jacobian. */
934
935 if (iter == 1) {
936 if (mode != 2) {
937 for (j = 1; j <= N; ++j) {
938 diag[j] = wa2[j];
939 if (wa2[j] == 0.) {
940 diag[j] = 1.;
941 }
942 }
943 }
944
945 /* on the first iteration, calculate the norm of the scaled x */
946 /* and initialize the step bound delta. */
947
948 for (j = 1; j <= N; ++j) {
949 wa3[j] = diag[j] * x[j];
950 }
951 xnorm = enorm<N>(&wa3[1]);
952 delta = factor * xnorm;
953 if (delta == 0.) {
954 delta = factor;
955 }
956 }
957
958 /* form (q transpose)*fvec and store in qtf. */
959
960 for (i = 1; i <= N; ++i) {
961 qtf[i] = fvec[i];
962 }
963 for (j = 1; j <= N; ++j) {
964 if (fjac[j + j * fjac_dim1] != 0.) {
965 sum = 0.;
966 for (i = j; i <= N; ++i) {
967 sum += fjac[i + j * fjac_dim1] * qtf[i];
968 }
969 temp = -sum / fjac[j + j * fjac_dim1];
970 for (i = j; i <= N; ++i) {
971 qtf[i] += fjac[i + j * fjac_dim1] * temp;
972 }
973 }
974 }
975
976 /* copy the triangular factor of the qr factorization into r. */
977
978 sing = false;
979 for (j = 1; j <= N; ++j) {
980 l = j;
981 jm1 = j - 1;
982 if (jm1 >= 1) {
983 for (i = 1; i <= jm1; ++i) {
984 r[l] = fjac[i + j * fjac_dim1];
985 l = l + N - i;
986 }
987 }
988 r[l] = wa1[j];
989 if (wa1[j] == 0.) {
990 sing = true;
991 }
992 }
993
994 /* accumulate the orthogonal factor in fjac. */
995
996 qform<N>(&fjac[fjac_offset], ldfjac, &wa1[1]);
997
998 /* rescale if necessary. */
999
1000 if (mode != 2) {
1001 for (j = 1; j <= N; ++j) {
1002 /* Computing MAX */
1003 d1 = diag[j], d2 = wa2[j];
1004 diag[j] = std::max(d1,d2);
1005 }
1006 }
1007
1008 /* beginning of the inner loop. */
1009
1010 for (;;) {
1011
1012 /* if requested, call fcn to enable printing of iterates. */
1013
1014 if (nprint > 0) {
1015 iflag = 0;
1016 if ((iter - 1) % nprint == 0) {
1017 iflag = fcn_nn(p, &x[1], &fvec[1], 0);
1018 }
1019 if (iflag < 0) {
1020 goto TERMINATE;
1021 }
1022 }
1023
1024 /* determine the direction p. */
1025
1026 dogleg<N>(&r[1], lr, &diag[1], &qtf[1], delta, &wa1[1], &wa2[1], &wa3[1]);
1027
1028 /* store the direction p and x + p. calculate the norm of p. */
1029
1030 for (j = 1; j <= N; ++j) {
1031 wa1[j] = -wa1[j];
1032 wa2[j] = x[j] + wa1[j];
1033 wa3[j] = diag[j] * wa1[j];
1034 }
1035 pnorm = enorm<N>(&wa3[1]);
1036
1037 /* on the first iteration, adjust the initial step bound. */
1038
1039 if (iter == 1) {
1040 delta = std::min(delta,pnorm);
1041 }
1042
1043 /* evaluate the function at x + p and calculate its norm. */
1044
1045 iflag = fcn_nn(p, &wa2[1], &wa4[1], 1);
1046 ++(*nfev);
1047 if (iflag < 0) {
1048 goto TERMINATE;
1049 }
1050 fnorm1 = enorm2<N>(&wa4[1]);
1051
1052 /* compute the scaled actual reduction. */
1053
1054 actred = -1.;
1055 if (fnorm1 < fnorm) {
1056 /* already 2nd power */
1057 actred = 1. - fnorm1 / fnorm;
1058 }
1059
1060 /* compute the scaled predicted reduction. */
1061
1062 l = 1;
1063 for (i = 1; i <= N; ++i) {
1064 sum = 0.;
1065 for (j = i; j <= N; ++j) {
1066 sum += r[l] * wa1[j];
1067 ++l;
1068 }
1069 wa3[i] = qtf[i] + sum;
1070 }
1071 temp = enorm2<N>(&wa3[1]);
1072 prered = 0.;
1073 if (temp < fnorm) {
1074 /* already 2nd power */
1075 prered = 1. - temp / fnorm;
1076 }
1077
1078 /* compute the ratio of the actual to the predicted */
1079 /* reduction. */
1080
1081 ratio = 0.;
1082 if (prered > 0.) {
1083 ratio = actred / prered;
1084 }
1085
1086 /* update the step bound. */
1087
1088 if (ratio < p1) {
1089 ncsuc = 0;
1090 ++ncfail;
1091 delta = p5 * delta;
1092 } else {
1093 ncfail = 0;
1094 ++ncsuc;
1095 if (ratio >= p5 || ncsuc > 1) {
1096 /* Computing MAX */
1097 d1 = pnorm / p5;
1098 delta = std::max(delta,d1);
1099 }
1100 if (fabs(ratio - 1.) <= p1) {
1101 delta = pnorm / p5;
1102 }
1103 }
1104
1105 /* test for successful iteration. */
1106
1107 if (ratio >= p0001) {
1108
1109 /* successful iteration. update x, fvec, and their norms. */
1110
1111 for (j = 1; j <= N; ++j) {
1112 x[j] = wa2[j];
1113 wa2[j] = diag[j] * x[j];
1114 fvec[j] = wa4[j];
1115 }
1116 xnorm = enorm<N>(&wa2[1]);
1117 fnorm = fnorm1;
1118 ++iter;
1119 }
1120
1121 /* determine the progress of the iteration. */
1122
1123 ++nslow1;
1124 if (actred >= p001) {
1125 nslow1 = 0;
1126 }
1127 if (jeval) {
1128 ++nslow2;
1129 }
1130 if (actred >= p1) {
1131 nslow2 = 0;
1132 }
1133
1134 /* test for convergence. */
1135
1136 if (delta <= xtol * xnorm || fnorm == 0.) {
1137 info = 1;
1138 }
1139 if (info != 0) {
1140 goto TERMINATE;
1141 }
1142
1143 /* tests for termination and stringent tolerances. */
1144
1145 if (*nfev >= maxfev) {
1146 info = 2;
1147 }
1148 /* Computing MAX */
1149 d1 = p1 * delta;
1150 if (p1 * std::max(d1,pnorm) <= epsmch * xnorm) {
1151 info = 3;
1152 }
1153 if (nslow2 == 5) {
1154 info = 4;
1155 }
1156 if (nslow1 == 10) {
1157 info = 5;
1158 }
1159 if (info != 0) {
1160 goto TERMINATE;
1161 }
1162
1163 /* criterion for recalculating jacobian approximation */
1164 /* by forward differences. */
1165
1166 if (ncfail == 2) {
1167 goto TERMINATE_INNER_LOOP;
1168 }
1169
1170 /* calculate the rank one modification to the jacobian */
1171 /* and update qtf if necessary. */
1172
1173 for (j = 1; j <= N; ++j) {
1174 sum = 0.;
1175 for (i = 1; i <= N; ++i) {
1176 sum += fjac[i + j * fjac_dim1] * wa4[i];
1177 }
1178 wa2[j] = (sum - wa3[j]) / pnorm;
1179 wa1[j] = diag[j] * (diag[j] * wa1[j] / pnorm);
1180 if (ratio >= p0001) {
1181 qtf[j] = sum;
1182 }
1183 }
1184
1185 /* compute the qr factorization of the updated jacobian. */
1186
1187 r1updt<N>(&r[1], lr, &wa1[1], &wa2[1], &wa3[1], &sing);
1188 r1mpyq<N, N>(&fjac[fjac_offset], ldfjac, &wa2[1], &wa3[1]);
1189 r1mpyq<1, N>(&qtf[1], 1, &wa2[1], &wa3[1]);
1190
1191 /* end of the inner loop. */
1192
1193 jeval = false;
1194 }
1195 TERMINATE_INNER_LOOP:
1196 ;
1197 /* end of the outer loop. */
1198
1199 }
1200 TERMINATE:
1201
1202 /* termination, either normal or user imposed. */
1203
1204 if (iflag < 0) {
1205 info = iflag;
1206 }
1207 if (nprint > 0) {
1208 fcn_nn(p, &x[1], &fvec[1], 0);
1209 }
1210 return info;
1211
1212 /* last card of subroutine hybrd. */
1213
1214 } /* hybrd_ */
1215