1 /*
2   This is NEWUOA for unconstrained minimization. The codes were written
3   by Powell in Fortran and then translated to C with f2c. I further
4   modified the code to make it independent of libf2c and f2c.h. Please
5   refer to "The NEWUOA software for unconstrained optimization without
6   derivatives", which is available at www.damtp.cam.ac.uk, for more
7   information.
8  */
9 /*
10   The original fortran codes are distributed without restrictions. The
11   C++ codes are distributed under MIT license.
12  */
13 /* The MIT License
14 
15    Copyright (c) 2004, by M.J.D. Powell <mjdp@cam.ac.uk>
16                  2008, by Attractive Chaos <attractivechaos@aol.co.uk>
17 
18    Permission is hereby granted, free of charge, to any person obtaining
19    a copy of this software and associated documentation files (the
20    "Software"), to deal in the Software without restriction, including
21    without limitation the rights to use, copy, modify, merge, publish,
22    distribute, sublicense, and/or sell copies of the Software, and to
23    permit persons to whom the Software is furnished to do so, subject to
24    the following conditions:
25 
26    The above copyright notice and this permission notice shall be
27    included in all copies or substantial portions of the Software.
28 
29    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36    SOFTWARE.
37 */
38 
39 #ifndef AC_NEWUOA_HH_
40 #define AC_NEWUOA_HH_
41 #include <math.h>
42 #include <stdlib.h>
43 #include <stdio.h>
44 
45 //PARAMETER DECREASE WAS 0.1
46 #define DELTA_DECREASE 0.5
47 #define RHO_DECREASE 0.5
48 
49 
50 /*most probably:
51     TYPE is float or double
52     Func is defined as double func(int n, double *x);
53     or better as
54        class Func {
55          double operator()(int n, double *x);
56        };
57     where n is the number of parameters and x the vector parameter
58     r_start stands unknown...
59 */
60 
61 template<class TYPE, class Func>
62 TYPE min_newuoa(int n, TYPE *x, Func &func, TYPE r_start=1e7, TYPE tol=1e-8, int max_iter=5000);
63 
64 template<class TYPE, class Func>
biglag_(int n,int npt,TYPE * xopt,TYPE * xpt,TYPE * bmat,TYPE * zmat,int * idz,int * ndim,int * knew,TYPE * delta,TYPE * d__,TYPE * alpha,TYPE * hcol,TYPE * gc,TYPE * gd,TYPE * s,TYPE * w,Func &)65 static int biglag_(int n, int npt, TYPE *xopt, TYPE *xpt, TYPE *bmat, TYPE *zmat, int *idz,
66                    int *ndim, int *knew, TYPE *delta, TYPE *d__, TYPE *alpha, TYPE *hcol, TYPE *gc,
67                    TYPE *gd, TYPE *s, TYPE *w, Func & /*func*/)
68 {
69     /* N is the number of variables. NPT is the number of interpolation
70      * equations. XOPT is the best interpolation point so far. XPT
71      * contains the coordinates of the current interpolation
72      * points. BMAT provides the last N columns of H.  ZMAT and IDZ give
73      * a factorization of the first NPT by NPT submatrix of H. NDIM is
74      * the first dimension of BMAT and has the value NPT+N.  KNEW is the
75      * index of the interpolation point that is going to be moved. DEBLLTA
76      * is the current trust region bound. D will be set to the step from
77      * XOPT to the new point. ABLLPHA will be set to the KNEW-th diagonal
78      * element of the H matrix. HCOBLL, GC, GD, S and W will be used for
79      * working space. */
80     /* The step D is calculated in a way that attempts to maximize the
81      * modulus of BLLFUNC(XOPT+D), subject to the bound ||D|| .BLLE. DEBLLTA,
82      * where BLLFUNC is the KNEW-th BLLagrange function. */
83 
84     int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
85         i__1, i__2, i__, j, k, iu, nptm, iterc, isave;
86     TYPE sp, ss, cf1, cf2, cf3, cf4, cf5, dhd, cth, tau, sth, sum, temp, step,
87         angle, scale, denom, delsq, tempa, tempb, twopi, taubeg, tauold, taumax,
88         d__1, dd, gg;
89 
90     /* Parameter adjustments */
91     tempa = tempb = 0.0;
92     zmat_dim1 = npt;
93     zmat_offset = 1 + zmat_dim1;
94     zmat -= zmat_offset;
95     xpt_dim1 = npt;
96     xpt_offset = 1 + xpt_dim1;
97     xpt -= xpt_offset;
98     --xopt;
99     bmat_dim1 = *ndim;
100     bmat_offset = 1 + bmat_dim1;
101     bmat -= bmat_offset;
102     --d__; --hcol; --gc; --gd; --s; --w;
103     /* Function Body */
104     twopi = 2.0 * M_PI;
105     delsq = *delta * *delta;
106     nptm = npt - n - 1;
107     /* Set the first NPT components of HCOBLL to the leading elements of
108      * the KNEW-th column of H. */
109     iterc = 0;
110     i__1 = npt;
111     for (k = 1; k <= i__1; ++k) hcol[k] = 0;
112     i__1 = nptm;
113     for (j = 1; j <= i__1; ++j) {
114         temp = zmat[*knew + j * zmat_dim1];
115         if (j < *idz) temp = -temp;
116         i__2 = npt;
117         for (k = 1; k <= i__2; ++k)
118             hcol[k] += temp * zmat[k + j * zmat_dim1];
119     }
120     *alpha = hcol[*knew];
121     /* Set the unscaled initial direction D. Form the gradient of BLLFUNC
122      * atXOPT, and multiply D by the second derivative matrix of
123      * BLLFUNC. */
124     dd = 0;
125     i__2 = n;
126     for (i__ = 1; i__ <= i__2; ++i__) {
127         d__[i__] = xpt[*knew + i__ * xpt_dim1] - xopt[i__];
128         gc[i__] = bmat[*knew + i__ * bmat_dim1];
129         gd[i__] = 0;
130         /* Computing 2nd power */
131         d__1 = d__[i__];
132         dd += d__1 * d__1;
133     }
134     i__2 = npt;
135     for (k = 1; k <= i__2; ++k) {
136         temp = 0;
137         sum = 0;
138         i__1 = n;
139         for (j = 1; j <= i__1; ++j) {
140             temp += xpt[k + j * xpt_dim1] * xopt[j];
141             sum += xpt[k + j * xpt_dim1] * d__[j];
142         }
143         temp = hcol[k] * temp;
144         sum = hcol[k] * sum;
145         i__1 = n;
146         for (i__ = 1; i__ <= i__1; ++i__) {
147             gc[i__] += temp * xpt[k + i__ * xpt_dim1];
148             gd[i__] += sum * xpt[k + i__ * xpt_dim1];
149         }
150     }
151     /* Scale D and GD, with a sign change if required. Set S to another
152      * vector in the initial two dimensional subspace. */
153     gg = sp = dhd = 0;
154     i__1 = n;
155     for (i__ = 1; i__ <= i__1; ++i__) {
156         /* Computing 2nd power */
157         d__1 = gc[i__];
158         gg += d__1 * d__1;
159         sp += d__[i__] * gc[i__];
160         dhd += d__[i__] * gd[i__];
161     }
162     scale = *delta / sqrt(dd);
163     if (sp * dhd < 0) scale = -scale;
164     temp = 0;
165     if (sp * sp > dd * .99 * gg) temp = 1.0;
166     tau = scale * (fabs(sp) + 0.5 * scale * fabs(dhd));
167     if (gg * delsq < tau * .01 * tau) temp = 1.0;
168     i__1 = n;
169     for (i__ = 1; i__ <= i__1; ++i__) {
170         d__[i__] = scale * d__[i__];
171         gd[i__] = scale * gd[i__];
172         s[i__] = gc[i__] + temp * gd[i__];
173     }
174     /* Begin the iteration by overwriting S with a vector that has the
175      * required length and direction, except that termination occurs if
176      * the given D and S are nearly parallel. */
177     for (iterc = 0; iterc != n; ++iterc) {
178         dd = sp = ss = 0;
179         i__1 = n;
180         for (i__ = 1; i__ <= i__1; ++i__) {
181             /* Computing 2nd power */
182             d__1 = d__[i__];
183             dd += d__1 * d__1;
184             sp += d__[i__] * s[i__];
185             /* Computing 2nd power */
186             d__1 = s[i__];
187             ss += d__1 * d__1;
188         }
189         temp = dd * ss - sp * sp;
190         if (temp <= dd * 1e-8 * ss) return 0;
191         denom = sqrt(temp);
192         i__1 = n;
193         for (i__ = 1; i__ <= i__1; ++i__) {
194             s[i__] = (dd * s[i__] - sp * d__[i__]) / denom;
195             w[i__] = 0;
196         }
197         /* Calculate the coefficients of the objective function on the
198          * circle, beginning with the multiplication of S by the second
199          * derivative matrix. */
200         i__1 = npt;
201         for (k = 1; k <= i__1; ++k) {
202             sum = 0;
203             i__2 = n;
204             for (j = 1; j <= i__2; ++j)
205                 sum += xpt[k + j * xpt_dim1] * s[j];
206             sum = hcol[k] * sum;
207             i__2 = n;
208             for (i__ = 1; i__ <= i__2; ++i__)
209                 w[i__] += sum * xpt[k + i__ * xpt_dim1];
210         }
211         cf1 = cf2 = cf3 = cf4 = cf5 = 0;
212         i__2 = n;
213         for (i__ = 1; i__ <= i__2; ++i__) {
214             cf1 += s[i__] * w[i__];
215             cf2 += d__[i__] * gc[i__];
216             cf3 += s[i__] * gc[i__];
217             cf4 += d__[i__] * gd[i__];
218             cf5 += s[i__] * gd[i__];
219         }
220         cf1 = 0.5 * cf1;
221         cf4 = 0.5 * cf4 - cf1;
222         /* Seek the value of the angle that maximizes the modulus of TAU. */
223         taubeg = cf1 + cf2 + cf4;
224         taumax = tauold = taubeg;
225         isave = 0;
226         iu = 49;
227         temp = twopi / (TYPE) (iu + 1);
228         i__2 = iu;
229         for (i__ = 1; i__ <= i__2; ++i__) {
230             angle = (TYPE) i__ *temp;
231             cth = cos(angle);
232             sth = sin(angle);
233             tau = cf1 + (cf2 + cf4 * cth) * cth + (cf3 + cf5 * cth) * sth;
234             if (fabs(tau) > fabs(taumax)) {
235                 taumax = tau;
236                 isave = i__;
237                 tempa = tauold;
238             } else if (i__ == isave + 1) tempb = tau;
239             tauold = tau;
240         }
241         if (isave == 0) tempa = tau;
242         if (isave == iu) tempb = taubeg;
243         step = 0;
244         if (tempa != tempb) {
245             tempa -= taumax;
246             tempb -= taumax;
247             step = 0.5 * (tempa - tempb) / (tempa + tempb);
248         }
249         angle = temp * ((TYPE) isave + step);
250         /* Calculate the new D and GD. Then test for convergence. */
251         cth = cos(angle);
252         sth = sin(angle);
253         tau = cf1 + (cf2 + cf4 * cth) * cth + (cf3 + cf5 * cth) * sth;
254         i__2 = n;
255         for (i__ = 1; i__ <= i__2; ++i__) {
256             d__[i__] = cth * d__[i__] + sth * s[i__];
257             gd[i__] = cth * gd[i__] + sth * w[i__];
258             s[i__] = gc[i__] + gd[i__];
259         }
260         if (fabs(tau) <= fabs(taubeg) * 1.1) return 0;
261     }
262     return 0;
263 }
264 
265 template<class TYPE>
bigden_(int n,int npt,TYPE * xopt,TYPE * xpt,TYPE * bmat,TYPE * zmat,int * idz,int * ndim,int * kopt,int * knew,TYPE * d__,TYPE * w,TYPE * vlag,TYPE * beta,TYPE * s,TYPE * wvec,TYPE * prod)266 static int bigden_(int n, int npt, TYPE *xopt, TYPE *xpt, TYPE *bmat, TYPE *zmat, int *idz,
267                    int *ndim, int *kopt, int *knew, TYPE *d__, TYPE *w, TYPE *vlag, TYPE *beta,
268                    TYPE *s, TYPE *wvec, TYPE *prod)
269 {
270     /* N is the number of variables.
271      * NPT is the number of interpolation equations.
272      * XOPT is the best interpolation point so far.
273      * XPT contains the coordinates of the current interpolation points.
274      * BMAT provides the last N columns of H.
275      * ZMAT and IDZ give a factorization of the first NPT by NPT
276        submatrix of H.
277      * NDIM is the first dimension of BMAT and has the value NPT+N.
278      * KOPT is the index of the optimal interpolation point.
279      * KNEW is the index of the interpolation point that is going to be
280        moved.
281      * D will be set to the step from XOPT to the new point, and on
282        entry it should be the D that was calculated by the last call of
283        BIGBDLAG. The length of the initial D provides a trust region bound
284        on the final D.
285      * W will be set to Wcheck for the final choice of D.
286      * VBDLAG will be set to Theta*Wcheck+e_b for the final choice of D.
287      * BETA will be set to the value that will occur in the updating
288        formula when the KNEW-th interpolation point is moved to its new
289        position.
290      * S, WVEC, PROD and the private arrays DEN, DENEX and PAR will be
291        used for working space.
292      * D is calculated in a way that should provide a denominator with a
293        large modulus in the updating formula when the KNEW-th
294        interpolation point is shifted to the new position XOPT+D. */
295 
296     int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
297         wvec_dim1, wvec_offset, prod_dim1, prod_offset, i__1, i__2, i__, j, k,
298         isave, iterc, jc, ip, iu, nw, ksav, nptm;
299     TYPE dd, d__1, ds, ss, den[9], par[9], tau, sum, diff, temp, step,
300         alpha, angle, denex[9], tempa, tempb, tempc, ssden, dtest, xoptd,
301         twopi, xopts, denold, denmax, densav, dstemp, sumold, sstemp, xoptsq;
302 
303     /* Parameter adjustments */
304     zmat_dim1 = npt;
305     zmat_offset = 1 + zmat_dim1;
306     zmat -= zmat_offset;
307     xpt_dim1 = npt;
308     xpt_offset = 1 + xpt_dim1;
309     xpt -= xpt_offset;
310     --xopt;
311     prod_dim1 = *ndim;
312     prod_offset = 1 + prod_dim1;
313     prod -= prod_offset;
314     wvec_dim1 = *ndim;
315     wvec_offset = 1 + wvec_dim1;
316     wvec -= wvec_offset;
317     bmat_dim1 = *ndim;
318     bmat_offset = 1 + bmat_dim1;
319     bmat -= bmat_offset;
320     --d__; --w; --vlag; --s;
321     /* Function Body */
322     twopi = atan(1.0) * 8.;
323     nptm = npt - n - 1;
324     /* Store the first NPT elements of the KNEW-th column of H in W(N+1)
325      * to W(N+NPT). */
326     i__1 = npt;
327     for (k = 1; k <= i__1; ++k) w[n + k] = 0;
328     i__1 = nptm;
329     for (j = 1; j <= i__1; ++j) {
330         temp = zmat[*knew + j * zmat_dim1];
331         if (j < *idz) temp = -temp;
332         i__2 = npt;
333         for (k = 1; k <= i__2; ++k)
334             w[n + k] += temp * zmat[k + j * zmat_dim1];
335     }
336     alpha = w[n + *knew];
337     /* The initial search direction D is taken from the last call of
338      * BIGBDLAG, and the initial S is set below, usually to the direction
339      * from X_OPT to X_KNEW, but a different direction to an
340      * interpolation point may be chosen, in order to prevent S from
341      * being nearly parallel to D. */
342     dd = ds = ss = xoptsq = 0;
343     i__2 = n;
344     for (i__ = 1; i__ <= i__2; ++i__) {
345         /* Computing 2nd power */
346         d__1 = d__[i__];
347         dd += d__1 * d__1;
348         s[i__] = xpt[*knew + i__ * xpt_dim1] - xopt[i__];
349         ds += d__[i__] * s[i__];
350         /* Computing 2nd power */
351         d__1 = s[i__];
352         ss += d__1 * d__1;
353         /* Computing 2nd power */
354         d__1 = xopt[i__];
355         xoptsq += d__1 * d__1;
356     }
357     if (ds * ds > dd * .99 * ss) {
358         ksav = *knew;
359         dtest = ds * ds / ss;
360         i__2 = npt;
361         for (k = 1; k <= i__2; ++k) {
362             if (k != *kopt) {
363                 dstemp = 0;
364                 sstemp = 0;
365                 i__1 = n;
366                 for (i__ = 1; i__ <= i__1; ++i__) {
367                     diff = xpt[k + i__ * xpt_dim1] - xopt[i__];
368                     dstemp += d__[i__] * diff;
369                     sstemp += diff * diff;
370                 }
371                 if (dstemp * dstemp / sstemp < dtest) {
372                     ksav = k;
373                     dtest = dstemp * dstemp / sstemp;
374                     ds = dstemp;
375                     ss = sstemp;
376                 }
377             }
378         }
379         i__2 = n;
380         for (i__ = 1; i__ <= i__2; ++i__)
381             s[i__] = xpt[ksav + i__ * xpt_dim1] - xopt[i__];
382     }
383     ssden = dd * ss - ds * ds;
384     iterc = 0;
385     densav = 0;
386     /* Begin the iteration by overwriting S with a vector that has the
387      * required length and direction. */
388 BDL70:
389     ++iterc;
390     temp = 1.0 / sqrt(ssden);
391     xoptd = xopts = 0;
392     i__2 = n;
393     for (i__ = 1; i__ <= i__2; ++i__) {
394         s[i__] = temp * (dd * s[i__] - ds * d__[i__]);
395         xoptd += xopt[i__] * d__[i__];
396         xopts += xopt[i__] * s[i__];
397     }
398     /* Set the coefficients of the first 2.0 terms of BETA. */
399     tempa = 0.5 * xoptd * xoptd;
400     tempb = 0.5 * xopts * xopts;
401     den[0] = dd * (xoptsq + 0.5 * dd) + tempa + tempb;
402     den[1] = 2.0 * xoptd * dd;
403     den[2] = 2.0 * xopts * dd;
404     den[3] = tempa - tempb;
405     den[4] = xoptd * xopts;
406     for (i__ = 6; i__ <= 9; ++i__) den[i__ - 1] = 0;
407     /* Put the coefficients of Wcheck in WVEC. */
408     i__2 = npt;
409     for (k = 1; k <= i__2; ++k) {
410         tempa = tempb = tempc = 0;
411         i__1 = n;
412         for (i__ = 1; i__ <= i__1; ++i__) {
413             tempa += xpt[k + i__ * xpt_dim1] * d__[i__];
414             tempb += xpt[k + i__ * xpt_dim1] * s[i__];
415             tempc += xpt[k + i__ * xpt_dim1] * xopt[i__];
416         }
417         wvec[k + wvec_dim1] = 0.25 * (tempa * tempa + tempb * tempb);
418         wvec[k + (wvec_dim1 << 1)] = tempa * tempc;
419         wvec[k + wvec_dim1 * 3] = tempb * tempc;
420         wvec[k + (wvec_dim1 << 2)] = 0.25 * (tempa * tempa - tempb * tempb);
421         wvec[k + wvec_dim1 * 5] = 0.5 * tempa * tempb;
422     }
423     i__2 = n;
424     for (i__ = 1; i__ <= i__2; ++i__) {
425         ip = i__ + npt;
426         wvec[ip + wvec_dim1] = 0;
427         wvec[ip + (wvec_dim1 << 1)] = d__[i__];
428         wvec[ip + wvec_dim1 * 3] = s[i__];
429         wvec[ip + (wvec_dim1 << 2)] = 0;
430         wvec[ip + wvec_dim1 * 5] = 0;
431     }
432     /* Put the coefficents of THETA*Wcheck in PROD. */
433     for (jc = 1; jc <= 5; ++jc) {
434         nw = npt;
435         if (jc == 2 || jc == 3) nw = *ndim;
436         i__2 = npt;
437         for (k = 1; k <= i__2; ++k) prod[k + jc * prod_dim1] = 0;
438         i__2 = nptm;
439         for (j = 1; j <= i__2; ++j) {
440             sum = 0;
441             i__1 = npt;
442             for (k = 1; k <= i__1; ++k) sum += zmat[k + j * zmat_dim1] * wvec[k + jc * wvec_dim1];
443             if (j < *idz) sum = -sum;
444             i__1 = npt;
445             for (k = 1; k <= i__1; ++k)
446                 prod[k + jc * prod_dim1] += sum * zmat[k + j * zmat_dim1];
447         }
448         if (nw == *ndim) {
449             i__1 = npt;
450             for (k = 1; k <= i__1; ++k) {
451                 sum = 0;
452                 i__2 = n;
453                 for (j = 1; j <= i__2; ++j)
454                     sum += bmat[k + j * bmat_dim1] * wvec[npt + j + jc * wvec_dim1];
455                 prod[k + jc * prod_dim1] += sum;
456             }
457         }
458         i__1 = n;
459         for (j = 1; j <= i__1; ++j) {
460             sum = 0;
461             i__2 = nw;
462             for (i__ = 1; i__ <= i__2; ++i__)
463                 sum += bmat[i__ + j * bmat_dim1] * wvec[i__ + jc * wvec_dim1];
464             prod[npt + j + jc * prod_dim1] = sum;
465         }
466     }
467     /* Include in DEN the part of BETA that depends on THETA. */
468     i__1 = *ndim;
469     for (k = 1; k <= i__1; ++k) {
470         sum = 0;
471         for (i__ = 1; i__ <= 5; ++i__) {
472             par[i__ - 1] = 0.5 * prod[k + i__ * prod_dim1] * wvec[k + i__ * wvec_dim1];
473             sum += par[i__ - 1];
474         }
475         den[0] = den[0] - par[0] - sum;
476         tempa = prod[k + prod_dim1] * wvec[k + (wvec_dim1 << 1)] + prod[k + (
477                      prod_dim1 << 1)] * wvec[k + wvec_dim1];
478         tempb = prod[k + (prod_dim1 << 1)] * wvec[k + (wvec_dim1 << 2)] +
479             prod[k + (prod_dim1 << 2)] * wvec[k + (wvec_dim1 << 1)];
480         tempc = prod[k + prod_dim1 * 3] * wvec[k + wvec_dim1 * 5] + prod[k +
481                    prod_dim1 * 5] * wvec[k + wvec_dim1 * 3];
482         den[1] = den[1] - tempa - 0.5 * (tempb + tempc);
483         den[5] -= 0.5 * (tempb - tempc);
484         tempa = prod[k + prod_dim1] * wvec[k + wvec_dim1 * 3] + prod[k +
485                        prod_dim1 * 3] * wvec[k + wvec_dim1];
486         tempb = prod[k + (prod_dim1 << 1)] * wvec[k + wvec_dim1 * 5] + prod[k
487                   + prod_dim1 * 5] * wvec[k + (wvec_dim1 << 1)];
488         tempc = prod[k + prod_dim1 * 3] * wvec[k + (wvec_dim1 << 2)] + prod[k
489                   + (prod_dim1 << 2)] * wvec[k + wvec_dim1 * 3];
490         den[2] = den[2] - tempa - 0.5 * (tempb - tempc);
491         den[6] -= 0.5 * (tempb + tempc);
492         tempa = prod[k + prod_dim1] * wvec[k + (wvec_dim1 << 2)] + prod[k + (
493                      prod_dim1 << 2)] * wvec[k + wvec_dim1];
494         den[3] = den[3] - tempa - par[1] + par[2];
495         tempa = prod[k + prod_dim1] * wvec[k + wvec_dim1 * 5] + prod[k +
496                        prod_dim1 * 5] * wvec[k + wvec_dim1];
497         tempb = prod[k + (prod_dim1 << 1)] * wvec[k + wvec_dim1 * 3] + prod[k
498                   + prod_dim1 * 3] * wvec[k + (wvec_dim1 << 1)];
499         den[4] = den[4] - tempa - 0.5 * tempb;
500         den[7] = den[7] - par[3] + par[4];
501         tempa = prod[k + (prod_dim1 << 2)] * wvec[k + wvec_dim1 * 5] + prod[k
502                   + prod_dim1 * 5] * wvec[k + (wvec_dim1 << 2)];
503         den[8] -= 0.5 * tempa;
504     }
505     /* Extend DEN so that it holds all the coefficients of DENOM. */
506     sum = 0;
507     for (i__ = 1; i__ <= 5; ++i__) {
508         /* Computing 2nd power */
509         d__1 = prod[*knew + i__ * prod_dim1];
510         par[i__ - 1] = 0.5 * (d__1 * d__1);
511         sum += par[i__ - 1];
512     }
513     denex[0] = alpha * den[0] + par[0] + sum;
514     tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + (prod_dim1 << 1)];
515     tempb = prod[*knew + (prod_dim1 << 1)] * prod[*knew + (prod_dim1 << 2)];
516     tempc = prod[*knew + prod_dim1 * 3] * prod[*knew + prod_dim1 * 5];
517     denex[1] = alpha * den[1] + tempa + tempb + tempc;
518     denex[5] = alpha * den[5] + tempb - tempc;
519     tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + prod_dim1 * 3];
520     tempb = prod[*knew + (prod_dim1 << 1)] * prod[*knew + prod_dim1 * 5];
521     tempc = prod[*knew + prod_dim1 * 3] * prod[*knew + (prod_dim1 << 2)];
522     denex[2] = alpha * den[2] + tempa + tempb - tempc;
523     denex[6] = alpha * den[6] + tempb + tempc;
524     tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + (prod_dim1 << 2)];
525     denex[3] = alpha * den[3] + tempa + par[1] - par[2];
526     tempa = 2.0 * prod[*knew + prod_dim1] * prod[*knew + prod_dim1 * 5];
527     denex[4] = alpha * den[4] + tempa + prod[*knew + (prod_dim1 << 1)] * prod[
528                              *knew + prod_dim1 * 3];
529     denex[7] = alpha * den[7] + par[3] - par[4];
530     denex[8] = alpha * den[8] + prod[*knew + (prod_dim1 << 2)] * prod[*knew +
531                                  prod_dim1 * 5];
532     /* Seek the value of the angle that maximizes the modulus of DENOM. */
533     sum = denex[0] + denex[1] + denex[3] + denex[5] + denex[7];
534     denold = denmax = sum;
535     isave = 0;
536     iu = 49;
537     temp = twopi / (TYPE) (iu + 1);
538     par[0] = 1.0;
539     i__1 = iu;
540     for (i__ = 1; i__ <= i__1; ++i__) {
541         angle = (TYPE) i__ *temp;
542         par[1] = cos(angle);
543         par[2] = sin(angle);
544         for (j = 4; j <= 8; j += 2) {
545             par[j - 1] = par[1] * par[j - 3] - par[2] * par[j - 2];
546             par[j] = par[1] * par[j - 2] + par[2] * par[j - 3];
547         }
548         sumold = sum;
549         sum = 0;
550         for (j = 1; j <= 9; ++j)
551             sum += denex[j - 1] * par[j - 1];
552         if (fabs(sum) > fabs(denmax)) {
553             denmax = sum;
554             isave = i__;
555             tempa = sumold;
556         } else if (i__ == isave + 1) {
557             tempb = sum;
558         }
559     }
560     if (isave == 0) tempa = sum;
561     if (isave == iu) tempb = denold;
562     step = 0;
563     if (tempa != tempb) {
564         tempa -= denmax;
565         tempb -= denmax;
566         step = 0.5 * (tempa - tempb) / (tempa + tempb);
567     }
568     angle = temp * ((TYPE) isave + step);
569     /* Calculate the new parameters of the denominator, the new VBDLAG
570      * vector and the new D. Then test for convergence. */
571     par[1] = cos(angle);
572     par[2] = sin(angle);
573     for (j = 4; j <= 8; j += 2) {
574         par[j - 1] = par[1] * par[j - 3] - par[2] * par[j - 2];
575         par[j] = par[1] * par[j - 2] + par[2] * par[j - 3];
576     }
577     *beta = 0;
578     denmax = 0;
579     for (j = 1; j <= 9; ++j) {
580         *beta += den[j - 1] * par[j - 1];
581         denmax += denex[j - 1] * par[j - 1];
582     }
583     i__1 = *ndim;
584     for (k = 1; k <= i__1; ++k) {
585         vlag[k] = 0;
586         for (j = 1; j <= 5; ++j)
587             vlag[k] += prod[k + j * prod_dim1] * par[j - 1];
588     }
589     tau = vlag[*knew];
590     dd = tempa = tempb = 0;
591     i__1 = n;
592     for (i__ = 1; i__ <= i__1; ++i__) {
593         d__[i__] = par[1] * d__[i__] + par[2] * s[i__];
594         w[i__] = xopt[i__] + d__[i__];
595         /* Computing 2nd power */
596         d__1 = d__[i__];
597         dd += d__1 * d__1;
598         tempa += d__[i__] * w[i__];
599         tempb += w[i__] * w[i__];
600     }
601     if (iterc >= n) goto BDL340;
602 		if (iterc > 1) densav = std::max(densav, denold);
603     if (fabs(denmax) <= fabs(densav) * 1.1) goto BDL340;
604     densav = denmax;
605     /* Set S to 0.5 the gradient of the denominator with respect to
606      * D. Then branch for the next iteration. */
607     i__1 = n;
608     for (i__ = 1; i__ <= i__1; ++i__) {
609         temp = tempa * xopt[i__] + tempb * d__[i__] - vlag[npt + i__];
610         s[i__] = tau * bmat[*knew + i__ * bmat_dim1] + alpha * temp;
611     }
612     i__1 = npt;
613     for (k = 1; k <= i__1; ++k) {
614         sum = 0;
615         i__2 = n;
616         for (j = 1; j <= i__2; ++j)
617             sum += xpt[k + j * xpt_dim1] * w[j];
618         temp = (tau * w[n + k] - alpha * vlag[k]) * sum;
619         i__2 = n;
620         for (i__ = 1; i__ <= i__2; ++i__)
621             s[i__] += temp * xpt[k + i__ * xpt_dim1];
622     }
623     ss = 0;
624     ds = 0;
625     i__2 = n;
626     for (i__ = 1; i__ <= i__2; ++i__) {
627         /* Computing 2nd power */
628         d__1 = s[i__];
629         ss += d__1 * d__1;
630         ds += d__[i__] * s[i__];
631     }
632     ssden = dd * ss - ds * ds;
633     if (ssden >= dd * 1e-8 * ss) goto BDL70;
634     /* Set the vector W before the RETURN from the subroutine. */
635 BDL340:
636     i__2 = *ndim;
637     for (k = 1; k <= i__2; ++k) {
638         w[k] = 0;
639         for (j = 1; j <= 5; ++j) w[k] += wvec[k + j * wvec_dim1] * par[j - 1];
640     }
641     vlag[*kopt] += 1.0;
642     return 0;
643 }
644 
645 template<class TYPE>
trsapp_(int n,int npt,TYPE * xopt,TYPE * xpt,TYPE * gq,TYPE * hq,TYPE * pq,TYPE * delta,TYPE * step,TYPE * d__,TYPE * g,TYPE * hd,TYPE * hs,TYPE * crvmin)646 int trsapp_(int n, int npt, TYPE * xopt, TYPE * xpt, TYPE * gq, TYPE * hq, TYPE * pq,
647             TYPE * delta, TYPE * step, TYPE * d__, TYPE * g, TYPE * hd, TYPE * hs, TYPE * crvmin)
648 {
649     /* The arguments NPT, XOPT, XPT, GQ, HQ and PQ have their usual
650      * meanings, in order to define the current quadratic model Q.
651      * DETRLTA is the trust region radius, and has to be positive. STEP
652      * will be set to the calculated trial step. The arrays D, G, HD and
653      * HS will be used for working space. CRVMIN will be set to the
654      * least curvature of H aint the conjugate directions that occur,
655      * except that it is set to 0 if STEP goes all the way to the trust
656      * region boundary. The calculation of STEP begins with the
657      * truncated conjugate gradient method. If the boundary of the trust
658      * region is reached, then further changes to STEP may be made, each
659      * one being in the 2D space spanned by the current STEP and the
660      * corresponding gradient of Q. Thus STEP should provide a
661      * substantial reduction to Q within the trust region. */
662 
663     int xpt_dim1, xpt_offset, i__1, i__2, i__, j, k, ih, iu, iterc,
664         isave, itersw, itermax;
665     TYPE d__1, d__2, dd, cf, dg, gg, ds, sg, ss, dhd, dhs,
666         cth, sgk, shs, sth, qadd, qbeg, qred, qmin, temp,
667         qsav, qnew, ggbeg, alpha, angle, reduc, ggsav, delsq,
668         tempa, tempb, bstep, ratio, twopi, angtest;
669 
670     /* Parameter adjustments */
671     tempa = tempb = shs = sg = bstep = ggbeg = gg = qred = dd = 0.0;
672     xpt_dim1 = npt;
673     xpt_offset = 1 + xpt_dim1;
674     xpt -= xpt_offset;
675     --xopt; --gq; --hq; --pq; --step; --d__; --g; --hd; --hs;
676     /* Function Body */
677     twopi = 2.0 * M_PI;
678     delsq = *delta * *delta;
679     iterc = 0;
680     itermax = n;
681     itersw = itermax;
682     i__1 = n;
683     for (i__ = 1; i__ <= i__1; ++i__) d__[i__] = xopt[i__];
684     goto TRL170;
685     /* Prepare for the first line search. */
686 TRL20:
687     qred = dd = 0;
688     i__1 = n;
689     for (i__ = 1; i__ <= i__1; ++i__) {
690         step[i__] = 0;
691         hs[i__] = 0;
692         g[i__] = gq[i__] + hd[i__];
693         d__[i__] = -g[i__];
694         /* Computing 2nd power */
695         d__1 = d__[i__];
696         dd += d__1 * d__1;
697     }
698     *crvmin = 0;
699     if (dd == 0) goto TRL160;
700     ds = ss = 0;
701     gg = dd;
702     ggbeg = gg;
703     /* Calculate the step to the trust region boundary and the product
704      * HD. */
705 TRL40:
706     ++iterc;
707     temp = delsq - ss;
708     bstep = temp / (ds + sqrt(ds * ds + dd * temp));
709     goto TRL170;
710 TRL50:
711     dhd = 0;
712     i__1 = n;
713     for (j = 1; j <= i__1; ++j) dhd += d__[j] * hd[j];
714     /* Update CRVMIN and set the step-length ATRLPHA. */
715     alpha = bstep;
716     if (dhd > 0) {
717         temp = dhd / dd;
718         if (iterc == 1) *crvmin = temp;
719         *crvmin = std::min(*crvmin, temp);
720         /* Computing MIN */
721         d__1 = alpha, d__2 = gg / dhd;
722         alpha = std::min(d__1, d__2);
723     }
724     qadd = alpha * (gg - 0.5 * alpha * dhd);
725     qred += qadd;
726     /* Update STEP and HS. */
727     ggsav = gg;
728     gg = 0;
729     i__1 = n;
730     for (i__ = 1; i__ <= i__1; ++i__) {
731         step[i__] += alpha * d__[i__];
732         hs[i__] += alpha * hd[i__];
733         /* Computing 2nd power */
734         d__1 = g[i__] + hs[i__];
735         gg += d__1 * d__1;
736     }
737     /* Begin another conjugate direction iteration if required. */
738     if (alpha < bstep) {
739         if (qadd <= qred * .01 || gg <= ggbeg * 1e-4 || iterc == itermax) goto TRL160;
740         temp = gg / ggsav;
741         dd = ds = ss = 0;
742         i__1 = n;
743         for (i__ = 1; i__ <= i__1; ++i__) {
744             d__[i__] = temp * d__[i__] - g[i__] - hs[i__];
745             /* Computing 2nd power */
746             d__1 = d__[i__];
747             dd += d__1 * d__1;
748             ds += d__[i__] * step[i__];
749             /* Computing 2nd power */
750             d__1 = step[i__];
751             ss += d__1 * d__1;
752         }
753         if (ds <= 0) goto TRL160;
754         if (ss < delsq) goto TRL40;
755     }
756     *crvmin = 0;
757     itersw = iterc;
758     /* Test whether an alternative iteration is required. */
759 TRL90:
760     if (gg <= ggbeg * 1e-4) goto TRL160;
761     sg = 0;
762     shs = 0;
763     i__1 = n;
764     for (i__ = 1; i__ <= i__1; ++i__) {
765         sg += step[i__] * g[i__];
766         shs += step[i__] * hs[i__];
767     }
768     sgk = sg + shs;
769     angtest = sgk / sqrt(gg * delsq);
770     if (angtest <= -.99) goto TRL160;
771     /* Begin the alternative iteration by calculating D and HD and some
772      * scalar products. */
773     ++iterc;
774     temp = sqrt(delsq * gg - sgk * sgk);
775     tempa = delsq / temp;
776     tempb = sgk / temp;
777     i__1 = n;
778     for (i__ = 1; i__ <= i__1; ++i__)
779         d__[i__] = tempa * (g[i__] + hs[i__]) - tempb * step[i__];
780     goto TRL170;
781 TRL120:
782     dg = dhd = dhs = 0;
783     i__1 = n;
784     for (i__ = 1; i__ <= i__1; ++i__) {
785         dg += d__[i__] * g[i__];
786         dhd += hd[i__] * d__[i__];
787         dhs += hd[i__] * step[i__];
788     }
789     /* Seek the value of the angle that minimizes Q. */
790     cf = 0.5 * (shs - dhd);
791     qbeg = sg + cf;
792     qsav = qmin = qbeg;
793     isave = 0;
794     iu = 49;
795     temp = twopi / (TYPE) (iu + 1);
796     i__1 = iu;
797     for (i__ = 1; i__ <= i__1; ++i__) {
798         angle = (TYPE) i__ *temp;
799         cth = cos(angle);
800         sth = sin(angle);
801         qnew = (sg + cf * cth) * cth + (dg + dhs * cth) * sth;
802         if (qnew < qmin) {
803             qmin = qnew;
804             isave = i__;
805             tempa = qsav;
806         } else if (i__ == isave + 1) tempb = qnew;
807         qsav = qnew;
808     }
809     if ((TYPE) isave == 0) tempa = qnew;
810     if (isave == iu) tempb = qbeg;
811     angle = 0;
812     if (tempa != tempb) {
813         tempa -= qmin;
814         tempb -= qmin;
815         angle = 0.5 * (tempa - tempb) / (tempa + tempb);
816     }
817     angle = temp * ((TYPE) isave + angle);
818     /* Calculate the new STEP and HS. Then test for convergence. */
819     cth = cos(angle);
820     sth = sin(angle);
821     reduc = qbeg - (sg + cf * cth) * cth - (dg + dhs * cth) * sth;
822     gg = 0;
823     i__1 = n;
824     for (i__ = 1; i__ <= i__1; ++i__) {
825         step[i__] = cth * step[i__] + sth * d__[i__];
826         hs[i__] = cth * hs[i__] + sth * hd[i__];
827         /* Computing 2nd power */
828         d__1 = g[i__] + hs[i__];
829         gg += d__1 * d__1;
830     }
831     qred += reduc;
832     ratio = reduc / qred;
833     if (iterc < itermax && ratio > .01) goto TRL90;
834 TRL160:
835     return 0;
836     /* The following instructions act as a subroutine for setting the
837      * vector HD to the vector D multiplied by the second derivative
838      * matrix of Q.  They are called from three different places, which
839      * are distinguished by the value of ITERC. */
840 TRL170:
841     i__1 = n;
842     for (i__ = 1; i__ <= i__1; ++i__) hd[i__] = 0;
843     i__1 = npt;
844     for (k = 1; k <= i__1; ++k) {
845         temp = 0;
846         i__2 = n;
847         for (j = 1; j <= i__2; ++j)
848             temp += xpt[k + j * xpt_dim1] * d__[j];
849         temp *= pq[k];
850         i__2 = n;
851         for (i__ = 1; i__ <= i__2; ++i__)
852             hd[i__] += temp * xpt[k + i__ * xpt_dim1];
853     }
854     ih = 0;
855     i__2 = n;
856     for (j = 1; j <= i__2; ++j) {
857         i__1 = j;
858         for (i__ = 1; i__ <= i__1; ++i__) {
859             ++ih;
860             if (i__ < j) hd[j] += hq[ih] * d__[i__];
861             hd[i__] += hq[ih] * d__[j];
862         }
863     }
864     if (iterc == 0) goto TRL20;
865     if (iterc <= itersw) goto TRL50;
866     goto TRL120;
867 }
868 
869 template<class TYPE>
update_(int n,int npt,TYPE * bmat,TYPE * zmat,int * idz,int * ndim,TYPE * vlag,TYPE * beta,int * knew,TYPE * w)870 static int update_(int n, int npt, TYPE *bmat, TYPE *zmat, int *idz, int *ndim, TYPE *vlag,
871                    TYPE *beta, int *knew, TYPE *w)
872 {
873     /* The arrays BMAT and ZMAT with IDZ are updated, in order to shift
874      * the interpolation point that has index KNEW. On entry, VLAG
875      * contains the components of the vector Theta*Wcheck+e_b of the
876      * updating formula (6.11), and BETA holds the value of the
877      * parameter that has this name. The vector W is used for working
878      * space. */
879 
880     int bmat_dim1, bmat_offset, zmat_dim1, zmat_offset, i__1, i__2, i__,
881         j, ja, jb, jl, jp, nptm, iflag;
882     TYPE d__1, d__2, tau, temp, scala, scalb, alpha, denom, tempa, tempb, tausq;
883 
884     /* Parameter adjustments */
885     tempb = 0.0;
886     zmat_dim1 = npt;
887     zmat_offset = 1 + zmat_dim1;
888     zmat -= zmat_offset;
889     bmat_dim1 = *ndim;
890     bmat_offset = 1 + bmat_dim1;
891     bmat -= bmat_offset;
892     --vlag;
893     --w;
894     /* Function Body */
895     nptm = npt - n - 1;
896     /* Apply the rotations that put zeros in the KNEW-th row of ZMAT. */
897     jl = 1;
898     i__1 = nptm;
899     for (j = 2; j <= i__1; ++j) {
900         if (j == *idz) {
901             jl = *idz;
902         } else if (zmat[*knew + j * zmat_dim1] != 0) {
903             /* Computing 2nd power */
904             d__1 = zmat[*knew + jl * zmat_dim1];
905             /* Computing 2nd power */
906             d__2 = zmat[*knew + j * zmat_dim1];
907             temp = sqrt(d__1 * d__1 + d__2 * d__2);
908             tempa = zmat[*knew + jl * zmat_dim1] / temp;
909             tempb = zmat[*knew + j * zmat_dim1] / temp;
910             i__2 = npt;
911             for (i__ = 1; i__ <= i__2; ++i__) {
912                 temp = tempa * zmat[i__ + jl * zmat_dim1] + tempb * zmat[i__
913                                + j * zmat_dim1];
914                 zmat[i__ + j * zmat_dim1] = tempa * zmat[i__ + j * zmat_dim1]
915                     - tempb * zmat[i__ + jl * zmat_dim1];
916                 zmat[i__ + jl * zmat_dim1] = temp;
917             }
918             zmat[*knew + j * zmat_dim1] = 0;
919         }
920     }
921     /* Put the first NPT components of the KNEW-th column of HLAG into
922      * W, and calculate the parameters of the updating formula. */
923     tempa = zmat[*knew + zmat_dim1];
924     if (*idz >= 2) tempa = -tempa;
925     if (jl > 1) tempb = zmat[*knew + jl * zmat_dim1];
926     i__1 = npt;
927     for (i__ = 1; i__ <= i__1; ++i__) {
928         w[i__] = tempa * zmat[i__ + zmat_dim1];
929         if (jl > 1) w[i__] += tempb * zmat[i__ + jl * zmat_dim1];
930     }
931     alpha = w[*knew];
932     tau = vlag[*knew];
933     tausq = tau * tau;
934     denom = alpha * *beta + tausq;
935     vlag[*knew] -= 1.0;
936     /* Complete the updating of ZMAT when there is only 1.0 nonzero
937      * element in the KNEW-th row of the new matrix ZMAT, but, if IFLAG
938      * is set to 1.0, then the first column of ZMAT will be exchanged
939      * with another 1.0 later. */
940     iflag = 0;
941     if (jl == 1) {
942         temp = sqrt((fabs(denom)));
943         tempb = tempa / temp;
944         tempa = tau / temp;
945         i__1 = npt;
946         for (i__ = 1; i__ <= i__1; ++i__)
947             zmat[i__ + zmat_dim1] = tempa * zmat[i__ + zmat_dim1] - tempb *
948                 vlag[i__];
949         if (*idz == 1 && temp < 0) *idz = 2;
950         if (*idz >= 2 && temp >= 0) iflag = 1;
951     } else {
952         /* Complete the updating of ZMAT in the alternative case. */
953         ja = 1;
954         if (*beta >= 0) {
955             ja = jl;
956         }
957         jb = jl + 1 - ja;
958         temp = zmat[*knew + jb * zmat_dim1] / denom;
959         tempa = temp * *beta;
960         tempb = temp * tau;
961         temp = zmat[*knew + ja * zmat_dim1];
962         scala = 1.0 / sqrt(fabs(*beta) * temp * temp + tausq);
963         scalb = scala * sqrt((fabs(denom)));
964         i__1 = npt;
965         for (i__ = 1; i__ <= i__1; ++i__) {
966             zmat[i__ + ja * zmat_dim1] = scala * (tau * zmat[i__ + ja *
967                          zmat_dim1] - temp * vlag[i__]);
968             zmat[i__ + jb * zmat_dim1] = scalb * (zmat[i__ + jb * zmat_dim1]
969                       - tempa * w[i__] - tempb * vlag[i__]);
970         }
971         if (denom <= 0) {
972             if (*beta < 0) ++(*idz);
973             if (*beta >= 0) iflag = 1;
974         }
975     }
976     /* IDZ is reduced in the following case, and usually the first
977      * column of ZMAT is exchanged with a later 1.0. */
978     if (iflag == 1) {
979         --(*idz);
980         i__1 = npt;
981         for (i__ = 1; i__ <= i__1; ++i__) {
982             temp = zmat[i__ + zmat_dim1];
983             zmat[i__ + zmat_dim1] = zmat[i__ + *idz * zmat_dim1];
984             zmat[i__ + *idz * zmat_dim1] = temp;
985         }
986     }
987     /* Finally, update the matrix BMAT. */
988     i__1 = n;
989     for (j = 1; j <= i__1; ++j) {
990         jp = npt + j;
991         w[jp] = bmat[*knew + j * bmat_dim1];
992         tempa = (alpha * vlag[jp] - tau * w[jp]) / denom;
993         tempb = (-(*beta) * w[jp] - tau * vlag[jp]) / denom;
994         i__2 = jp;
995         for (i__ = 1; i__ <= i__2; ++i__) {
996             bmat[i__ + j * bmat_dim1] = bmat[i__ + j * bmat_dim1] + tempa *
997                 vlag[i__] + tempb * w[i__];
998             if (i__ > npt) {
999                 bmat[jp + (i__ - npt) * bmat_dim1] = bmat[i__ + j *
1000                                  bmat_dim1];
1001             }
1002         }
1003     }
1004     return 0;
1005 }
1006 
1007 template<class TYPE, class Func>
newuob_(int n,int npt,TYPE * x,TYPE rhobeg,TYPE rhoend,int * ret_nf,int maxfun,TYPE * xbase,TYPE * xopt,TYPE * xnew,TYPE * xpt,TYPE * fval,TYPE * gq,TYPE * hq,TYPE * pq,TYPE * bmat,TYPE * zmat,int * ndim,TYPE * d__,TYPE * vlag,TYPE * w,Func & func)1008 static TYPE newuob_(int n, int npt, TYPE *x,
1009                     TYPE rhobeg, TYPE rhoend, int *ret_nf, int maxfun,
1010                     TYPE *xbase, TYPE *xopt, TYPE *xnew,
1011                     TYPE *xpt, TYPE *fval, TYPE *gq, TYPE *hq,
1012                     TYPE *pq, TYPE *bmat, TYPE *zmat, int *ndim,
1013                     TYPE *d__, TYPE *vlag, TYPE *w, Func &func)
1014 {
1015     /* XBASE will hold a shift of origin that should reduce the
1016        contributions from rounding errors to values of the model and
1017        Lagrange functions.
1018      * XOPT will be set to the displacement from XBASE of the vector of
1019        variables that provides the least calculated F so far.
1020      * XNEW will be set to the displacement from XBASE of the vector of
1021        variables for the current calculation of F.
1022      * XPT will contain the interpolation point coordinates relative to
1023        XBASE.
1024      * FVAL will hold the values of F at the interpolation points.
1025      * GQ will hold the gradient of the quadratic model at XBASE.
1026      * HQ will hold the explicit second derivatives of the quadratic
1027        model.
1028      * PQ will contain the parameters of the implicit second derivatives
1029        of the quadratic model.
1030      * BMAT will hold the last N columns of H.
1031      * ZMAT will hold the factorization of the leading NPT by NPT
1032        submatrix of H, this factorization being ZMAT times Diag(DZ)
1033        times ZMAT^T, where the elements of DZ are plus or minus 1.0, as
1034        specified by IDZ.
1035      * NDIM is the first dimension of BMAT and has the value NPT+N.
1036      * D is reserved for trial steps from XOPT.
1037      * VLAG will contain the values of the Lagrange functions at a new
1038        point X.  They are part of a product that requires VLAG to be of
1039        length NDIM.
1040      * The array W will be used for working space. Its length must be at
1041        least 10*NDIM = 10*(NPT+N). Set some constants. */
1042 
1043     int xpt_dim1, xpt_offset, bmat_dim1, bmat_offset, zmat_dim1, zmat_offset,
1044         i__1, i__2, i__3, i__, j, k, ih, nf, nh, ip, jp, np, nfm, idz, ipt, jpt,
1045         nfmm, knew, kopt, nptm, ksave, nfsav, itemp, ktemp, itest, nftest;
1046     TYPE d__1, d__2, d__3, f, dx, dsq, rho, sum, fbeg, diff, beta, gisq,
1047         temp, suma, sumb, fopt, bsum, gqsq, xipt, xjpt, sumz, diffa, diffb,
1048         diffc, hdiag, alpha, delta, recip, reciq, fsave, dnorm, ratio, dstep,
1049         vquad, tempq, rhosq, detrat, crvmin, distsq, xoptsq;
1050 
1051     /* Parameter adjustments */
1052     diffc = ratio = dnorm = nfsav = diffa = diffb = xoptsq = f = 0.0;
1053     rho = fbeg = fopt = xjpt = xipt = 0.0;
1054     itest = ipt = jpt = 0;
1055     alpha = dstep = 0.0;
1056     zmat_dim1 = npt;
1057     zmat_offset = 1 + zmat_dim1;
1058     zmat -= zmat_offset;
1059     xpt_dim1 = npt;
1060     xpt_offset = 1 + xpt_dim1;
1061     xpt -= xpt_offset;
1062     --x; --xbase; --xopt; --xnew; --fval; --gq; --hq; --pq;
1063     bmat_dim1 = *ndim;
1064     bmat_offset = 1 + bmat_dim1;
1065     bmat -= bmat_offset;
1066     --d__;
1067     --vlag;
1068     --w;
1069     /* Function Body */
1070     np = n + 1;
1071     nh = n * np / 2;
1072     nptm = npt - np;
1073     nftest = (maxfun > 1)? maxfun : 1;
1074     /* Set the initial elements of XPT, BMAT, HQ, PQ and ZMAT to 0. */
1075     i__1 = n;
1076     for (j = 1; j <= i__1; ++j) {
1077         xbase[j] = x[j];
1078         i__2 = npt;
1079         for (k = 1; k <= i__2; ++k)
1080             xpt[k + j * xpt_dim1] = 0;
1081         i__2 = *ndim;
1082         for (i__ = 1; i__ <= i__2; ++i__)
1083             bmat[i__ + j * bmat_dim1] = 0;
1084     }
1085     i__2 = nh;
1086     for (ih = 1; ih <= i__2; ++ih) hq[ih] = 0;
1087     i__2 = npt;
1088     for (k = 1; k <= i__2; ++k) {
1089         pq[k] = 0;
1090         i__1 = nptm;
1091         for (j = 1; j <= i__1; ++j)
1092             zmat[k + j * zmat_dim1] = 0;
1093     }
1094     /* Begin the initialization procedure. NF becomes 1.0 more than the
1095      * number of function values so far. The coordinates of the
1096      * displacement of the next initial interpolation point from XBASE
1097      * are set in XPT(NF,.). */
1098     rhosq = rhobeg * rhobeg;
1099     recip = 1.0 / rhosq;
1100     reciq = sqrt(.5) / rhosq;
1101     nf = 0;
1102 L50:
1103     nfm = nf;
1104     nfmm = nf - n;
1105     ++nf;
1106     if (nfm <= n << 1) {
1107         if (nfm >= 1 && nfm <= n) {
1108             xpt[nf + nfm * xpt_dim1] = rhobeg;
1109         } else if (nfm > n) {
1110             xpt[nf + nfmm * xpt_dim1] = -(rhobeg);
1111         }
1112     } else {
1113         itemp = (nfmm - 1) / n;
1114         jpt = nfm - itemp * n - n;
1115         ipt = jpt + itemp;
1116         if (ipt > n) {
1117             itemp = jpt;
1118             jpt = ipt - n;
1119             ipt = itemp;
1120         }
1121         xipt = rhobeg;
1122         if (fval[ipt + np] < fval[ipt + 1]) xipt = -xipt;
1123         xjpt = rhobeg;
1124         if (fval[jpt + np] < fval[jpt + 1]) xjpt = -xjpt;
1125         xpt[nf + ipt * xpt_dim1] = xipt;
1126         xpt[nf + jpt * xpt_dim1] = xjpt;
1127     }
1128     /* Calculate the next value of F, label 70 being reached immediately
1129      * after this calculation. The least function value so far and its
1130      * index are required. */
1131     i__1 = n;
1132     for (j = 1; j <= i__1; ++j)
1133         x[j] = xpt[nf + j * xpt_dim1] + xbase[j];
1134     goto L310;
1135 L70:
1136     fval[nf] = f;
1137     if (nf == 1) {
1138         fbeg = fopt = f;
1139         kopt = 1;
1140     } else if (f < fopt) {
1141         fopt = f;
1142         kopt = nf;
1143     }
1144     /* Set the non0 initial elements of BMAT and the quadratic model
1145      * in the cases when NF is at most 2*N+1. */
1146     if (nfm <= n << 1) {
1147         if (nfm >= 1 && nfm <= n) {
1148             gq[nfm] = (f - fbeg) / rhobeg;
1149             if (npt < nf + n) {
1150                 bmat[nfm * bmat_dim1 + 1] = -1.0 / rhobeg;
1151                 bmat[nf + nfm * bmat_dim1] = 1.0 / rhobeg;
1152                 bmat[npt + nfm + nfm * bmat_dim1] = -.5 * rhosq;
1153             }
1154         } else if (nfm > n) {
1155             bmat[nf - n + nfmm * bmat_dim1] = .5 / rhobeg;
1156             bmat[nf + nfmm * bmat_dim1] = -.5 / rhobeg;
1157             zmat[nfmm * zmat_dim1 + 1] = -reciq - reciq;
1158             zmat[nf - n + nfmm * zmat_dim1] = reciq;
1159             zmat[nf + nfmm * zmat_dim1] = reciq;
1160             ih = nfmm * (nfmm + 1) / 2;
1161             temp = (fbeg - f) / rhobeg;
1162             hq[ih] = (gq[nfmm] - temp) / rhobeg;
1163             gq[nfmm] = .5 * (gq[nfmm] + temp);
1164         }
1165         /* Set the off-diagonal second derivatives of the Lagrange
1166          * functions and the initial quadratic model. */
1167     } else {
1168         ih = ipt * (ipt - 1) / 2 + jpt;
1169         if (xipt < 0) ipt += n;
1170         if (xjpt < 0) jpt += n;
1171         zmat[nfmm * zmat_dim1 + 1] = recip;
1172         zmat[nf + nfmm * zmat_dim1] = recip;
1173         zmat[ipt + 1 + nfmm * zmat_dim1] = -recip;
1174         zmat[jpt + 1 + nfmm * zmat_dim1] = -recip;
1175         hq[ih] = (fbeg - fval[ipt + 1] - fval[jpt + 1] + f) / (xipt * xjpt);
1176     }
1177     if (nf < npt) goto L50;
1178     /* Begin the iterative procedure, because the initial model is
1179      * complete. */
1180     rho = rhobeg;
1181     delta = rho;
1182     idz = 1;
1183     diffa = diffb = itest = xoptsq = 0;
1184     i__1 = n;
1185     for (i__ = 1; i__ <= i__1; ++i__) {
1186         xopt[i__] = xpt[kopt + i__ * xpt_dim1];
1187         /* Computing 2nd power */
1188         d__1 = xopt[i__];
1189         xoptsq += d__1 * d__1;
1190     }
1191 L90:
1192     nfsav = nf;
1193     /* Generate the next trust region step and test its length. Set KNEW
1194      * to -1 if the purpose of the next F will be to improve the
1195      * model. */
1196 L100:
1197     knew = 0;
1198     trsapp_(n, npt, &xopt[1], &xpt[xpt_offset], &gq[1], &hq[1], &pq[1], &
1199        delta, &d__[1], &w[1], &w[np], &w[np + n], &w[np + (n << 1)], &
1200         crvmin);
1201     dsq = 0;
1202     i__1 = n;
1203     for (i__ = 1; i__ <= i__1; ++i__) {
1204         /* Computing 2nd power */
1205         d__1 = d__[i__];
1206         dsq += d__1 * d__1;
1207     }
1208     /* Computing MIN */
1209     d__1 = delta, d__2 = sqrt(dsq);
1210     dnorm = std::min(d__1, d__2);
1211     if (dnorm < .5 * rho) {
1212         knew = -1;
1213         delta = DELTA_DECREASE * delta;
1214         ratio = -1.;
1215         if (delta <= rho * 1.5) delta = rho;
1216         if (nf <= nfsav + 2) goto L460;
1217         temp = crvmin * .125 * rho * rho;
1218         /* Computing MAX */
1219         d__1 = std::max(diffa, diffb);
1220         if (temp <= std::max(d__1, diffc)) goto L460;
1221         goto L490;
1222     }
1223     /* Shift XBASE if XOPT may be too far from XBASE. First make the
1224      * changes to BMAT that do not depend on ZMAT. */
1225 L120:
1226     if (dsq <= xoptsq * .001) {
1227         tempq = xoptsq * .25;
1228         i__1 = npt;
1229         for (k = 1; k <= i__1; ++k) {
1230             sum = 0;
1231             i__2 = n;
1232             for (i__ = 1; i__ <= i__2; ++i__)
1233                 sum += xpt[k + i__ * xpt_dim1] * xopt[i__];
1234             temp = pq[k] * sum;
1235             sum -= .5 * xoptsq;
1236             w[npt + k] = sum;
1237             i__2 = n;
1238             for (i__ = 1; i__ <= i__2; ++i__) {
1239                 gq[i__] += temp * xpt[k + i__ * xpt_dim1];
1240                 xpt[k + i__ * xpt_dim1] -= .5 * xopt[i__];
1241                 vlag[i__] = bmat[k + i__ * bmat_dim1];
1242                 w[i__] = sum * xpt[k + i__ * xpt_dim1] + tempq * xopt[i__];
1243                 ip = npt + i__;
1244                 i__3 = i__;
1245                 for (j = 1; j <= i__3; ++j)
1246                     bmat[ip + j * bmat_dim1] = bmat[ip + j * bmat_dim1] +
1247                         vlag[i__] * w[j] + w[i__] * vlag[j];
1248             }
1249         }
1250         /* Then the revisions of BMAT that depend on ZMAT are
1251          * calculated. */
1252         i__3 = nptm;
1253         for (k = 1; k <= i__3; ++k) {
1254             sumz = 0;
1255             i__2 = npt;
1256             for (i__ = 1; i__ <= i__2; ++i__) {
1257                 sumz += zmat[i__ + k * zmat_dim1];
1258                 w[i__] = w[npt + i__] * zmat[i__ + k * zmat_dim1];
1259             }
1260             i__2 = n;
1261             for (j = 1; j <= i__2; ++j) {
1262                 sum = tempq * sumz * xopt[j];
1263                 i__1 = npt;
1264                 for (i__ = 1; i__ <= i__1; ++i__)
1265                     sum += w[i__] * xpt[i__ + j * xpt_dim1];
1266                 vlag[j] = sum;
1267                 if (k < idz) sum = -sum;
1268                 i__1 = npt;
1269                 for (i__ = 1; i__ <= i__1; ++i__)
1270                     bmat[i__ + j * bmat_dim1] += sum * zmat[i__ + k * zmat_dim1];
1271             }
1272             i__1 = n;
1273             for (i__ = 1; i__ <= i__1; ++i__) {
1274                 ip = i__ + npt;
1275                 temp = vlag[i__];
1276                 if (k < idz) temp = -temp;
1277                 i__2 = i__;
1278                 for (j = 1; j <= i__2; ++j)
1279                     bmat[ip + j * bmat_dim1] += temp * vlag[j];
1280             }
1281         }
1282         /* The following instructions complete the shift of XBASE,
1283          * including the changes to the parameters of the quadratic
1284          * model. */
1285         ih = 0;
1286         i__2 = n;
1287         for (j = 1; j <= i__2; ++j) {
1288             w[j] = 0;
1289             i__1 = npt;
1290             for (k = 1; k <= i__1; ++k) {
1291                 w[j] += pq[k] * xpt[k + j * xpt_dim1];
1292                 xpt[k + j * xpt_dim1] -= .5 * xopt[j];
1293             }
1294             i__1 = j;
1295             for (i__ = 1; i__ <= i__1; ++i__) {
1296                 ++ih;
1297                 if (i__ < j) gq[j] += hq[ih] * xopt[i__];
1298                 gq[i__] += hq[ih] * xopt[j];
1299                 hq[ih] = hq[ih] + w[i__] * xopt[j] + xopt[i__] * w[j];
1300                 bmat[npt + i__ + j * bmat_dim1] = bmat[npt + j + i__ *
1301                                  bmat_dim1];
1302             }
1303         }
1304         i__1 = n;
1305         for (j = 1; j <= i__1; ++j) {
1306             xbase[j] += xopt[j];
1307             xopt[j] = 0;
1308         }
1309         xoptsq = 0;
1310     }
1311     /* Pick the model step if KNEW is positive. A different choice of D
1312      * may be made later, if the choice of D by BIGLAG causes
1313      * substantial cancellation in DENOM. */
1314     if (knew > 0) {
1315         biglag_(n, npt, &xopt[1], &xpt[xpt_offset], &bmat[bmat_offset], &zmat[zmat_offset], &idz,
1316                 ndim, &knew, &dstep, &d__[1], &alpha, &vlag[1], &vlag[npt + 1], &w[1], &w[np], &w[np + n], func);
1317     }
1318     /* Calculate VLAG and BETA for the current choice of D. The first
1319      * NPT components of W_check will be held in W. */
1320     i__1 = npt;
1321     for (k = 1; k <= i__1; ++k) {
1322         suma = 0;
1323         sumb = 0;
1324         sum = 0;
1325         i__2 = n;
1326         for (j = 1; j <= i__2; ++j) {
1327             suma += xpt[k + j * xpt_dim1] * d__[j];
1328             sumb += xpt[k + j * xpt_dim1] * xopt[j];
1329             sum += bmat[k + j * bmat_dim1] * d__[j];
1330         }
1331         w[k] = suma * (.5 * suma + sumb);
1332         vlag[k] = sum;
1333     }
1334     beta = 0;
1335     i__1 = nptm;
1336     for (k = 1; k <= i__1; ++k) {
1337         sum = 0;
1338         i__2 = npt;
1339         for (i__ = 1; i__ <= i__2; ++i__)
1340             sum += zmat[i__ + k * zmat_dim1] * w[i__];
1341         if (k < idz) {
1342             beta += sum * sum;
1343             sum = -sum;
1344         } else beta -= sum * sum;
1345         i__2 = npt;
1346         for (i__ = 1; i__ <= i__2; ++i__)
1347             vlag[i__] += sum * zmat[i__ + k * zmat_dim1];
1348     }
1349     bsum = 0;
1350     dx = 0;
1351     i__2 = n;
1352     for (j = 1; j <= i__2; ++j) {
1353         sum = 0;
1354         i__1 = npt;
1355         for (i__ = 1; i__ <= i__1; ++i__)
1356             sum += w[i__] * bmat[i__ + j * bmat_dim1];
1357         bsum += sum * d__[j];
1358         jp = npt + j;
1359         i__1 = n;
1360         for (k = 1; k <= i__1; ++k)
1361             sum += bmat[jp + k * bmat_dim1] * d__[k];
1362         vlag[jp] = sum;
1363         bsum += sum * d__[j];
1364         dx += d__[j] * xopt[j];
1365     }
1366     beta = dx * dx + dsq * (xoptsq + dx + dx + .5 * dsq) + beta - bsum;
1367     vlag[kopt] += 1.0;
1368     /* If KNEW is positive and if the cancellation in DENOM is
1369      * unacceptable, then BIGDEN calculates an alternative model step,
1370      * XNEW being used for working space. */
1371     if (knew > 0) {
1372         /* Computing 2nd power */
1373         d__1 = vlag[knew];
1374         temp = 1.0 + alpha * beta / (d__1 * d__1);
1375         if (fabs(temp) <= .8) {
1376             bigden_(n, npt, &xopt[1], &xpt[xpt_offset], &bmat[bmat_offset], &
1377                 zmat[zmat_offset], &idz, ndim, &kopt, &knew, &d__[1], &w[
1378                                              1], &vlag[1], &beta, &xnew[1], &w[*ndim + 1], &w[*ndim *
1379                                     6 + 1]);
1380         }
1381     }
1382     /* Calculate the next value of the objective function. */
1383 L290:
1384     i__2 = n;
1385     for (i__ = 1; i__ <= i__2; ++i__) {
1386         xnew[i__] = xopt[i__] + d__[i__];
1387         x[i__] = xbase[i__] + xnew[i__];
1388     }
1389     ++nf;
1390 L310:
1391     if (nf > nftest) {
1392         --nf;
1393         //fprintf(stderr, "++ Return from NEWUOA because CALFUN has been called MAXFUN times.\n");
1394         goto L530;
1395     }
1396     f = func(n, &x[1]);
1397     if (nf <= npt) goto L70;
1398     if (knew == -1) goto L530;
1399     /* Use the quadratic model to predict the change in F due to the
1400      * step D, and set DIFF to the error of this prediction. */
1401     vquad = ih = 0;
1402     i__2 = n;
1403     for (j = 1; j <= i__2; ++j) {
1404         vquad += d__[j] * gq[j];
1405         i__1 = j;
1406         for (i__ = 1; i__ <= i__1; ++i__) {
1407             ++ih;
1408             temp = d__[i__] * xnew[j] + d__[j] * xopt[i__];
1409             if (i__ == j) temp = .5 * temp;
1410             vquad += temp * hq[ih];
1411         }
1412     }
1413     i__1 = npt;
1414     for (k = 1; k <= i__1; ++k) vquad += pq[k] * w[k];
1415     diff = f - fopt - vquad;
1416     diffc = diffb;
1417     diffb = diffa;
1418     diffa = fabs(diff);
1419     if (dnorm > rho) nfsav = nf;
1420     /* Update FOPT and XOPT if the new F is the least value of the
1421      * objective function so far. The branch when KNEW is positive
1422      * occurs if D is not a trust region step. */
1423     fsave = fopt;
1424     if (f < fopt) {
1425         fopt = f;
1426         xoptsq = 0;
1427         i__1 = n;
1428         for (i__ = 1; i__ <= i__1; ++i__) {
1429             xopt[i__] = xnew[i__];
1430             /* Computing 2nd power */
1431             d__1 = xopt[i__];
1432             xoptsq += d__1 * d__1;
1433         }
1434     }
1435     ksave = knew;
1436     if (knew > 0) goto L410;
1437     /* Pick the next value of DELTA after a trust region step. */
1438     if (vquad >= 0) {
1439         fprintf(stderr, "++ Return from NEWUOA because a trust region step has failed to reduce Q.\n");
1440         goto L530;
1441     }
1442     ratio = (f - fsave) / vquad;
1443     if (ratio <= 0.1) {
1444         delta = .5 * dnorm;
1445     } else if (ratio <= .7) {
1446         /* Computing MAX */
1447         d__1 = .5 * delta;
1448         delta = std::max(d__1, dnorm);
1449     } else {
1450         /* Computing MAX */
1451         d__1 = .5 * delta, d__2 = dnorm + dnorm;
1452         delta = std::max(d__1, d__2);
1453     }
1454     if (delta <= rho * 1.5) delta = rho;
1455     /* Set KNEW to the index of the next interpolation point to be
1456      * deleted. */
1457     /* Computing MAX */
1458     d__2 = 0.1 * delta;
1459     /* Computing 2nd power */
1460     d__1 = std::max(d__2, rho);
1461     rhosq = d__1 * d__1;
1462     ktemp = detrat = 0;
1463     if (f >= fsave) {
1464         ktemp = kopt;
1465         detrat = 1.0;
1466     }
1467     i__1 = npt;
1468     for (k = 1; k <= i__1; ++k) {
1469         hdiag = 0;
1470         i__2 = nptm;
1471         for (j = 1; j <= i__2; ++j) {
1472             temp = 1.0;
1473             if (j < idz) temp = -1.0;
1474             /* Computing 2nd power */
1475             d__1 = zmat[k + j * zmat_dim1];
1476             hdiag += temp * (d__1 * d__1);
1477         }
1478         /* Computing 2nd power */
1479         d__2 = vlag[k];
1480         temp = (d__1 = beta * hdiag + d__2 * d__2, fabs(d__1));
1481         distsq = 0;
1482         i__2 = n;
1483         for (j = 1; j <= i__2; ++j) {
1484             /* Computing 2nd power */
1485             d__1 = xpt[k + j * xpt_dim1] - xopt[j];
1486             distsq += d__1 * d__1;
1487         }
1488         if (distsq > rhosq) {
1489             /* Computing 3rd power */
1490             d__1 = distsq / rhosq;
1491             temp *= d__1 * (d__1 * d__1);
1492         }
1493         if (temp > detrat && k != ktemp) {
1494             detrat = temp;
1495             knew = k;
1496         }
1497     }
1498     if (knew == 0) goto L460;
1499     /* Update BMAT, ZMAT and IDZ, so that the KNEW-th interpolation
1500      * point can be moved. Begin the updating of the quadratic model,
1501      * starting with the explicit second derivative term. */
1502 L410:
1503     update_(n, npt, &bmat[bmat_offset], &zmat[zmat_offset], &idz, ndim, &vlag[1], &beta, &knew, &w[1]);
1504     fval[knew] = f;
1505     ih = 0;
1506     i__1 = n;
1507     for (i__ = 1; i__ <= i__1; ++i__) {
1508         temp = pq[knew] * xpt[knew + i__ * xpt_dim1];
1509         i__2 = i__;
1510         for (j = 1; j <= i__2; ++j) {
1511             ++ih;
1512             hq[ih] += temp * xpt[knew + j * xpt_dim1];
1513         }
1514     }
1515     pq[knew] = 0;
1516     /* Update the other second derivative parameters, and then the
1517      * gradient vector of the model. Also include the new interpolation
1518      * point. */
1519     i__2 = nptm;
1520     for (j = 1; j <= i__2; ++j) {
1521         temp = diff * zmat[knew + j * zmat_dim1];
1522         if (j < idz) temp = -temp;
1523         i__1 = npt;
1524         for (k = 1; k <= i__1; ++k) {
1525             pq[k] += temp * zmat[k + j * zmat_dim1];
1526         }
1527     }
1528     gqsq = 0;
1529     i__1 = n;
1530     for (i__ = 1; i__ <= i__1; ++i__) {
1531         gq[i__] += diff * bmat[knew + i__ * bmat_dim1];
1532         /* Computing 2nd power */
1533         d__1 = gq[i__];
1534         gqsq += d__1 * d__1;
1535         xpt[knew + i__ * xpt_dim1] = xnew[i__];
1536     }
1537     /* If a trust region step makes a small change to the objective
1538      * function, then calculate the gradient of the least Frobenius norm
1539      * interpolant at XBASE, and store it in W, using VLAG for a vector
1540      * of right hand sides. */
1541     if (ksave == 0 && delta == rho) {
1542         if (fabs(ratio) > .01) {
1543             itest = 0;
1544         } else {
1545             i__1 = npt;
1546             for (k = 1; k <= i__1; ++k)
1547                 vlag[k] = fval[k] - fval[kopt];
1548             gisq = 0;
1549             i__1 = n;
1550             for (i__ = 1; i__ <= i__1; ++i__) {
1551                 sum = 0;
1552                 i__2 = npt;
1553                 for (k = 1; k <= i__2; ++k)
1554                     sum += bmat[k + i__ * bmat_dim1] * vlag[k];
1555                 gisq += sum * sum;
1556                 w[i__] = sum;
1557             }
1558             /* Test whether to replace the new quadratic model by the
1559              * least Frobenius norm interpolant, making the replacement
1560              * if the test is satisfied. */
1561             ++itest;
1562             if (gqsq < gisq * 100.) itest = 0;
1563             if (itest >= 3) {
1564                 i__1 = n;
1565                 for (i__ = 1; i__ <= i__1; ++i__) gq[i__] = w[i__];
1566                 i__1 = nh;
1567                 for (ih = 1; ih <= i__1; ++ih) hq[ih] = 0;
1568                 i__1 = nptm;
1569                 for (j = 1; j <= i__1; ++j) {
1570                     w[j] = 0;
1571                     i__2 = npt;
1572                     for (k = 1; k <= i__2; ++k)
1573                         w[j] += vlag[k] * zmat[k + j * zmat_dim1];
1574                     if (j < idz) w[j] = -w[j];
1575                 }
1576                 i__1 = npt;
1577                 for (k = 1; k <= i__1; ++k) {
1578                     pq[k] = 0;
1579                     i__2 = nptm;
1580                     for (j = 1; j <= i__2; ++j)
1581                         pq[k] += zmat[k + j * zmat_dim1] * w[j];
1582                 }
1583                 itest = 0;
1584             }
1585         }
1586     }
1587     if (f < fsave) kopt = knew;
1588     /* If a trust region step has provided a sufficient decrease in F,
1589      * then branch for another trust region calculation. The case
1590      * KSAVE>0 occurs when the new function value was calculated by a
1591      * model step. */
1592     if (f <= fsave + 0.1 * vquad) goto L100;
1593     if (ksave > 0) goto L100;
1594     /* Alternatively, find out if the interpolation points are close
1595      * enough to the best point so far. */
1596     knew = 0;
1597 L460:
1598     distsq = delta * 4. * delta;
1599     i__2 = npt;
1600     for (k = 1; k <= i__2; ++k) {
1601         sum = 0;
1602         i__1 = n;
1603         for (j = 1; j <= i__1; ++j) {
1604             /* Computing 2nd power */
1605             d__1 = xpt[k + j * xpt_dim1] - xopt[j];
1606             sum += d__1 * d__1;
1607         }
1608         if (sum > distsq) {
1609             knew = k;
1610             distsq = sum;
1611         }
1612     }
1613     /* If KNEW is positive, then set DSTEP, and branch back for the next
1614      * iteration, which will generate a "model step". */
1615     if (knew > 0) {
1616         /* Computing MAX and MIN*/
1617         d__2 = 0.1 * sqrt(distsq), d__3 = .5 * delta;
1618         d__1 = std::min(d__2, d__3);
1619         dstep = std::max(d__1, rho);
1620         dsq = dstep * dstep;
1621         goto L120;
1622     }
1623     if (ratio > 0) goto L100;
1624     if (std::max(delta, dnorm) > rho) goto L100;
1625     /* The calculations with the current value of RHO are complete. Pick
1626      * the next values of RHO and DELTA. */
1627 L490:
1628     if (rho > rhoend) {
1629         delta = .5 * rho;
1630         ratio = rho / rhoend;
1631         if (ratio <= 16.) rho = rhoend;
1632         else if (ratio <= 250.) rho = sqrt(ratio) * rhoend;
1633         else rho = RHO_DECREASE * rho;
1634         delta = std::max(delta, rho);
1635         goto L90;
1636     }
1637     /* Return from the calculation, after another Newton-Raphson step,
1638      * if it is too short to have been tried before. */
1639     if (knew == -1) goto L290;
1640 L530:
1641     if (fopt <= f) {
1642         i__2 = n;
1643         for (i__ = 1; i__ <= i__2; ++i__)
1644             x[i__] = xbase[i__] + xopt[i__];
1645         f = fopt;
1646     }
1647     *ret_nf = nf;
1648     return f;
1649 }
1650 
1651 template<class TYPE, class Func>
newuoa_(int n,int npt,TYPE * x,TYPE rhobeg,TYPE rhoend,int * ret_nf,int maxfun,TYPE * w,Func & func)1652 static TYPE newuoa_(int n, int npt, TYPE *x, TYPE rhobeg, TYPE rhoend, int *ret_nf, int maxfun, TYPE *w, Func &func)
1653 {
1654     /* This subroutine seeks the least value of a function of many
1655      * variables, by a trust region method that forms quadratic models
1656      * by interpolation. There can be some freedom in the interpolation
1657      * conditions, which is taken up by minimizing the Frobenius norm of
1658      * the change to the second derivative of the quadratic model,
1659      * beginning with a zero matrix. The arguments of the subroutine are
1660      * as follows. */
1661 
1662     /* N must be set to the number of variables and must be at least
1663      * two. NPT is the number of interpolation conditions. Its value
1664      * must be in the interval [N+2,(N+1)(N+2)/2]. Initial values of the
1665      * variables must be set in X(1),X(2),...,X(N). They will be changed
1666      * to the values that give the least calculated F. RHOBEG and RHOEND
1667      * must be set to the initial and final values of a trust region
1668      * radius, so both must be positive with RHOEND<=RHOBEG. Typically
1669      * RHOBEG should be about one tenth of the greatest expected change
1670      * to a variable, and RHOEND should indicate the accuracy that is
1671      * required in the final values of the variables. MAXFUN must be set
1672      * to an upper bound on the number of calls of CALFUN.  The array W
1673      * will be used for working space. Its length must be at least
1674      * (NPT+13)*(NPT+N)+3*N*(N+3)/2. */
1675 
1676     /* SUBROUTINE CALFUN (N,X,F) must be provided by the user. It must
1677      * set F to the value of the objective function for the variables
1678      * X(1),X(2),...,X(N). Partition the working space array, so that
1679      * different parts of it can be treated separately by the subroutine
1680      * that performs the main calculation. */
1681 
1682     int id, np, iw, igq, ihq, ixb, ifv, ipq, ivl, ixn,
1683         ixo, ixp, ndim, nptm, ibmat, izmat;
1684 
1685     /* Parameter adjustments */
1686     --w; --x;
1687     /* Function Body */
1688     np = n + 1;
1689     nptm = npt - np;
1690     if (npt < n + 2 || npt > (n + 2) * np / 2) {
1691         fprintf(stderr, "** Return from NEWUOA because NPT is not in the required interval.\n");
1692         return 1;
1693     }
1694     ndim = npt + n;
1695     ixb = 1;
1696     ixo = ixb + n;
1697     ixn = ixo + n;
1698     ixp = ixn + n;
1699     ifv = ixp + n * npt;
1700     igq = ifv + npt;
1701     ihq = igq + n;
1702     ipq = ihq + n * np / 2;
1703     ibmat = ipq + npt;
1704     izmat = ibmat + ndim * n;
1705     id = izmat + npt * nptm;
1706     ivl = id + n;
1707     iw = ivl + ndim;
1708     /* The above settings provide a partition of W for subroutine
1709      * NEWUOB. The partition requires the first NPT*(NPT+N)+5*N*(N+3)/2
1710      * elements of W plus the space that is needed by the last array of
1711      * NEWUOB. */
1712     return newuob_(n, npt, &x[1], rhobeg, rhoend, ret_nf, maxfun, &w[ixb], &w[ixo], &w[ixn],
1713                    &w[ixp], &w[ifv], &w[igq], &w[ihq], &w[ipq], &w[ibmat], &w[izmat],
1714                    &ndim, &w[id], &w[ivl], &w[iw], func);
1715 }
1716 
1717 template<class TYPE, class Func>
min_newuoa(int n,TYPE * x,Func & func,TYPE rb,TYPE tol,int max_iter)1718 TYPE min_newuoa(int n, TYPE *x, Func &func, TYPE rb, TYPE tol, int max_iter)
1719 {
1720     int npt = 2 * n + 1, rnf;
1721     TYPE ret;
1722     TYPE *w = (TYPE*)calloc((npt+13)*(npt+n) + 3*n*(n+3)/2 + 11, sizeof(TYPE));
1723     ret = newuoa_(n, 2*n+1, x, rb, tol, &rnf, max_iter, w, func);
1724     free(w);
1725     return ret;
1726 }
1727 
1728 #endif
1729