1 #include <math.h>
2 #include <stddef.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include "cs.h"
6 #include "mrilib.h"
7 #include "f2c.h"
8 #include <time.h>
9 #include <sys/types.h>
10 #include <unistd.h>
11 
12 #undef  BIGVAL
13 #define BIGVAL 1.e+38   /* this is a big number */
14 
15 #undef MAX
16 #define MAX(a,b) (((a)<(b)) ? (b) : (a))    /* not von Sydow, not Mad */
17 
18 /****************************/
19 /** cf. powell_newuoa.[fc] **/
20 
21 #ifdef USE_OMP
22 # include "powell_newuoa.c"
23 #else
24   extern int newuoa_(integer *n, integer *npt, doublereal *x,
25                      doublereal *rhobeg, doublereal *rhoend, integer *maxfun,
26                      doublereal *w, integer *icode) ;
27 #endif
28 
29 /*---------------------------------------------------------------------------*/
30 /* Changes made Jun 2016 to make these functions work with OpenMP.
31    Macros in Aomp.h for allocating per-thread data are used in place
32    of the older malloc() calls.  The global userfun is also made
33    per-thread, so in principal different threads could be optimizing
34    different functions (not just the same function with different data).     */
35 /*---------------------------------------------------------------------------*/
36 
37 /*---------------------------------------------------------------------------*/
38 
39 static int verb = 0 ;
powell_set_verbose(int v)40 void powell_set_verbose( int v )
41 {  if( verb == v ) return ;
42    verb = v; /** INFO_message("powell_set_verbose(%d)",verb) ; **/
43    return ;
44 }
45 
46 /*---------------------------------------------------------------------------*/
47 /* Macro to periodically reduce a variable into the range 0..1:
48    for example: PRED01(1.2) == 0.8, PRED01(1.8) == 0.2, et cetera;
49    graphically
50                PRED01(x)|
51                         | /\      /\      /\      /\      /\
52                         |/  \    /  \    /  \    /  \    /
53                         |    \  /    \  /    \  /    \  /
54                         |     \/      \/      \/      \/
55                         +------------------------------------> x
56                           -3  -2  -1   0  +1  +2  +3  +4  +5
57 -----------------------------------------------------------------------------*/
58 
59 #undef  PRED01
60 #define PRED01(x) fabs( (x) - 2.0*floor(0.5*((x)+1.0)) )
61 
62 #define SC_BOX  1
63 #define SC_BALL 2
64 
65 AO_DEFINE_SCALAR(int,scalx) ;
66 AO_DEFINE_ARRAY(double,sxmin) ;
67 AO_DEFINE_ARRAY(double,sxsiz) ;
68 AO_DEFINE_ARRAY(double,sx) ;
69 
70 #if 0  /* part of 'nested' and not tested! */
71 static int      mapx = 0    ;  /* if > 0, number of total parameters */
72 static double *mapar = NULL ;  /* array holding all parameters */
73 static int    *mapin = NULL ;  /* index array for mapping */
74 #endif
75 
76 /*---------------------------------------------------------------------------*/
77 
78 /*! Pointer to user-supplied function that does actual work in calfun_(). */
79 
80 #ifndef USE_OMP
81 static double (*userfun)( int n , double *x ) = NULL ;
82 #  define SET_USERFUN(uu) userfun = uu
83 #  define GET_USERFUN     userfun
84 #else
85 static double (*userfun[AO_NTH_MAX])( int n , double *x ) ;
86 #  define SET_USERFUN(uu) userfun[AOth] = uu
87 #  define GET_USERFUN     userfun[AOth]
88 #endif
89 
90 /*---------------------------------------------------------------------------*/
91 /*! Reduce x[ii] (all in 0..1) to be inside the ball [08 Jan 2015] */
92 
xreduce(int n,double * x)93 static void xreduce( int n , double *x )
94 {
95    int ii ;
96 
97    for( ii=0 ; ii < n ; ii++ ){
98      if( !isfinite(x[ii]) || x[ii] < -9.9f || x[ii] > 9.9f ) x[ii] = 0.5f ;
99      else                                                    x[ii] = PRED01(x[ii]) ;
100    }
101    if( AO_VALUE(scalx) == SC_BALL ){
102      double rad=0.0 ;
103      for( ii=0 ; ii < n ; ii++ ) rad += (x[ii]-0.5)*(x[ii]-0.5) ;
104      if( rad > 0.25 ){
105        rad = 0.25 / rad ;
106        for( ii=0 ; ii < n ; ii++ ) x[ii] = 0.5 + (x[ii]-0.5)*rad ;
107      }
108    }
109    return ;
110    return ;
111 }
112 
113 /*---------------------------------------------------------------------------*/
114 /*! Function called by newuoa_();
115     goal is to minimize this as a function of x[0..n-1] */
116 
117 static int calfun_err=0 ;
118 
calfun_(integer * n,doublereal * x,doublereal * fun)119 int calfun_(integer *n, doublereal *x, doublereal *fun)
120 {
121    double val ;
122    double *sxmin=AO_VALUE(sxmin) ;
123    double *sxsiz=AO_VALUE(sxsiz) ;
124    double *sx   =AO_VALUE(sx) ;
125    double (*ufun)( int n , double *x ) = GET_USERFUN ;
126 
127    calfun_err = 0 ;
128 
129    if( AO_VALUE(scalx) == SC_BOX ){  /* in this case, inputs x[] are in range 0..1,  */
130      int ii ;              /* and need to be scaled to their 'true' values */
131 
132      for( ii=0 ; ii < *n ; ii++ ){
133        if( !isfinite(x[ii]) || x[ii] < -9.9 || x[ii] > 9.9 ){
134          fprintf(stderr,"** ERROR: calfun[%d]=%g --> 0\n",ii,x[ii]) ; x[ii] = 0.5 ; calfun_err++ ;
135        }
136        sx[ii] = sxmin[ii] + sxsiz[ii]*PRED01(x[ii]);
137      }
138 
139      val = ufun( (int)(*n) , sx ) ;           /* input = scaled x[] */
140 
141 #if 0
142 if( verb > 2 ){
143   fprintf(stderr," + calfun(") ;
144   for( ii=0 ; ii < *n ; ii++ ){
145     fprintf(stderr,"%7.4f",sx[ii]) ; if( ii+1 < *n ) fprintf(stderr,",") ;
146   }
147   fprintf(stderr,") = %14.7g\n",val) ;
148 }
149 #endif
150 
151    } else if( AO_VALUE(scalx) == SC_BALL ){  /* scale into a ball, not a box [08 Jan 2015] */
152      int ii ; double rad=0.0 ;
153 
154      for( ii=0 ; ii < *n ; ii++ ){
155        if( !isfinite(x[ii]) || x[ii] < -9.9 || x[ii] > 9.9 ){
156          fprintf(stderr,"** ERROR: calfun[%d]=%g --> 0\n",ii,x[ii]) ; x[ii] = 0.5 ; calfun_err++ ;
157        }
158        rad += (x[ii]-0.5)*(x[ii]-0.5) ;
159      }
160      if( rad <= 0.25 ){             /* inside the ball */
161        for( ii=0 ; ii < *n ; ii++ )
162          sx[ii] = sxmin[ii] + sxsiz[ii]*x[ii] ;
163      } else {                       /* outside the ball */
164        rad = 0.25 / rad ;
165        for( ii=0 ; ii < *n ; ii++ )
166          sx[ii] = sxmin[ii] + sxsiz[ii]*(0.5 + (x[ii]-0.5)*rad) ;
167      }
168 
169      val = ufun( (int)(*n) , sx ) ;           /* input = scaled x[] */
170 
171 #if 0
172 if( verb > 2 ){
173   fprintf(stderr," + calfun(") ;
174   for( ii=0 ; ii < *n ; ii++ ){
175     fprintf(stderr,"%7.4f",sx[ii]) ; if( ii+1 < *n ) fprintf(stderr,",") ;
176   }
177   fprintf(stderr,") = %14.7g\n",val) ;
178 }
179 #endif
180 
181 #if 0
182    } else if( mapx ){      /* in this case, the parameters given as input */
183      int ii ;              /* are just a subset of all the parameters, so */
184                            /* we must expand the array to the full size.  */
185      for( ii=0 ; ii < *n ; ii++ ) mapar[mapin[ii]] = x[ii] ;
186      val = ufun( mapx , mapar ) ;
187 #endif
188 
189    } else {
190 
191      val = ufun( (int)(*n) , (double *)x ) ;  /* input = unscaled x[] */
192    }
193 
194    if( !isfinite(val) ){ calfun_err++ ; val = BIGVAL ; } /* 28 Jan 2016 */
195    *fun = (doublereal)val ;
196    return 0 ;
197 }
198 
199 /*---------------------------------------------------------------------------*/
200 
201 static float mfac = 2.0f ;   /* default number of      */
202 static float afac = 3.0f ;   /* sample points is 2*n+3 */
203 
powell_set_mfac(float mm,float aa)204 void powell_set_mfac( float mm , float aa )
205 {
206   if( mm >= 1.0f ){ mfac = mm   ; afac = aa   ; }  /* set */
207   else            { mfac = 2.0f ; afac = 3.0f ; }  /* reset */
208 }
209 
210 /*---------------------------------------------------------------------------*/
211 /*! Driver for Powell's general purpose minimization newuoa function:
212     - ndim   = number of variables in function to be minimized
213     - x      = array [ndim] of variables (input and output)
214     - rstart = size of initial search region around initial value of x
215     - rend   = size of final search region = desired accuracy
216     - maxcall = max number of times to call ufunc
217     - ufunc   = function to minimize: inputs are number of variables (n)
218                 and current array of variables (x) at which to evaluate
219 
220     Return value is number of function calls to ufunc actually done.
221     If return is negative, something bad happened.
222 
223     MJD Powell, "The NEWUOA software for unconstrained optimization without
224     derivatives", Technical report DAMTP 2004/NA08, Cambridge University
225     Numerical Analysis Group.
226     See:  http://www.ii.uib.no/~lennart/drgrad/Powell2004.pdf
227 
228     For constrained optimization (via trickery), see powell_newuoa_con() [OLD]
229     and powell_newuoa_constrained() [NEW].
230 ------------------------------------------------------------------------------*/
231 
powell_newuoa(int ndim,double * x,double rstart,double rend,int maxcall,double (* ufunc)(int,double *))232 int powell_newuoa( int ndim , double *x ,
233                    double rstart , double rend ,
234                    int maxcall , double (*ufunc)(int,double *) )
235 {
236    integer n , npt , icode , maxfun ;
237    doublereal rhobeg , rhoend ;
238    AO_DEFINE_ARRAY(double,w) ; double *w ;
239 
240    /* check inputs */
241 
242    if( ndim < 1                         ) return -2 ;
243    if( x == NULL                        ) return -3 ;
244    if( rstart < rend || rstart <= 1.e-4 ) return -4 ;
245    if( ufunc == NULL                    ) return -5 ;
246 
247    if( rend    <= 0.0       ) rend    = 1.e-4 * rstart ;
248    if( maxcall <  10+5*ndim ) maxcall = 10+5*ndim ;
249 
250    n      = ndim ;
251    npt    = (int)(mfac*n+afac) ; if( npt < n+2   ) npt = n+2 ;
252    icode  = (n+1)*(n+2)/2      ; if( npt > icode ) npt = icode ;
253    maxfun = maxcall ;
254 
255    rhobeg = (doublereal)rstart ;
256    rhoend = (doublereal)rend   ;
257 
258    icode   = (npt+14)*(npt+n) + 3*n*(n+3)/2 + 6666 ;
259    AO_RESIZE_ARRAY(double,w,icode) ; w = AO_VALUE(w) ;
260    icode   = 0 ;
261    SET_USERFUN(ufunc) ;
262    AO_VALUE(scalx) = 0 ;
263 
264    /* do the work: best params are put back into x[] */
265 
266    (void)newuoa_( &n , &npt , (doublereal *)x ,
267                   &rhobeg , &rhoend , &maxfun , w , &icode ) ;
268 
269    return icode ;  /* number of function calls */
270 }
271 
272 /*---------------------------------------------------------------------------*/
273 
274 static int con_meth = SC_BOX ;
275 
powell_newuoa_set_con_box(void)276 void powell_newuoa_set_con_box (void){ con_meth = SC_BOX ; }
powell_newuoa_set_con_ball(void)277 void powell_newuoa_set_con_ball(void){ con_meth = SC_BALL; }
powell_newuoa_get_con(void)278 int  powell_newuoa_get_con     (void){ return con_meth ;   }
powell_newuoa_set_con(int c)279 void powell_newuoa_set_con     (int c){ con_meth = c ;     }
280 
281 /*---------------------------------------------------------------------------*/
282 /*! Similar to powell_newuoa(), but with constraints on the variables,
283     and (if nrand > 0) a random search for the starting vector (the initial
284     vector on input in x[] is also tested to see if it is 'best' for starting).
285     Note the units on rstart and rend are relative (between 0 and 1).
286 
287     Also see the newer powell_newuoa_constrained(), which has more options!
288 -----------------------------------------------------------------------------*/
289 
powell_newuoa_con(int ndim,double * x,double * xbot,double * xtop,int nrand,double rstart,double rend,int maxcall,double (* ufunc)(int,double *))290 int powell_newuoa_con( int ndim , double *x , double *xbot , double *xtop ,
291                        int nrand ,
292                        double rstart , double rend ,
293                        int maxcall , double (*ufunc)(int,double *) )
294 {
295    integer n , npt , icode , maxfun , ncall=0 ;
296    doublereal rhobeg , rhoend ;
297    int ii ;
298    AO_DEFINE_ARRAY(double,x01) ; double *x01 ;
299    AO_DEFINE_ARRAY(double,w)   ; double *w ;
300    double *sxmin,*sxsiz,*sx ;
301 
302    /* check inputs */
303 
304    if( ndim < 1                         ) return -2 ;
305    if( x == NULL                        ) return -3 ;
306    if( rstart < rend || rstart <= 1.e-4 ) return -4 ;
307    if( ufunc == NULL                    ) return -5 ;
308    if( xbot == NULL || xtop == NULL     ) return -6 ;
309 
310    if( rend    <= 0.0       ) rend    = 1.e-4 * rstart ;
311    if( maxcall <  10+5*ndim ) maxcall = 10+5*ndim ;
312 
313    n      = ndim ;
314    npt    = (int)(mfac*n+afac) ; if( npt < n+2   ) npt = n+2 ;
315    icode  = (n+1)*(n+2)/2      ; if( npt > icode ) npt = icode ;
316    maxfun = maxcall ;
317 
318    rhobeg = (doublereal)rstart ;
319    rhoend = (doublereal)rend   ;
320 
321    icode   = (npt+14)*(npt+n) + 3*n*(n+3)/2 + 666 ;
322    AO_RESIZE_ARRAY(double,w,icode) ; w = AO_VALUE(w) ;
323 
324    icode   = 0 ;
325    SET_USERFUN(ufunc) ;
326 
327    /*-- To enforce constraints:
328         (a) scale each variable to be in the range 0..1, in x01[] array;
329         (b) in calfun_(), if an input variable drifts outside the 0..1
330             range, bring it back into that range with the PRED01() macro;
331         (c) then scale that 0..1 value back to the 'true' value
332             before calling ufunc() to evaluate objective function. -------*/
333 
334    /*** The above comment describes the original SC_BOX method of
335         dealing with constraints.  The newer SC_BALL method keeps the
336         scaled parameters inside the ball of radius 0.5 centered at
337         x=0.5 -- this is the L2-ball, rather than the 'Linfinity-ball'
338         that SC_BOX enforces, and this ball is inside the SC_BOX bounds. ***/
339 
340    AO_VALUE(scalx) = con_meth ;
341    AO_RESIZE_ARRAY(double,sxmin,ndim) ; sxmin = AO_VALUE(sxmin) ;
342    AO_RESIZE_ARRAY(double,sxsiz,ndim) ; sxsiz = AO_VALUE(sxsiz) ;
343    AO_RESIZE_ARRAY(double,sx   ,ndim) ; sx    = AO_VALUE(sx) ;
344    AO_RESIZE_ARRAY(double,x01  ,ndim) ; x01   = AO_VALUE(x01) ;
345    for( ii=0 ; ii < ndim ; ii++ ){
346      sxmin[ii] = xbot[ii] ;
347      sxsiz[ii] = xtop[ii] - xbot[ii]; if( sxsiz[ii] <= 0.0 ) sxsiz[ii] = 1.0;
348      x01[ii]   = (x[ii] - sxmin[ii]) / sxsiz[ii] ;
349    }
350    xreduce( ndim , x01 ) ;  /* make sure is in the legal range */
351 
352    /*-- do a random search for the best starting vector? --*/
353 
354    if( nrand > 0 ){
355      AO_DEFINE_ARRAY(double,xbest) ;
356      AO_DEFINE_ARRAY(double,xtest) ;
357      double *xbest , *xtest , fbest , ftest ; int qq ;
358      static int seed=1 ;
359 #pragma omp critical
360      { if( seed ){ srand48((long)time(NULL)+(long)getpid()); seed=0; } }
361      AO_RESIZE_ARRAY(double,xbest,ndim) ; xbest = AO_VALUE(xbest) ;
362      AO_RESIZE_ARRAY(double,xtest,ndim) ; xtest = AO_VALUE(xtest) ;
363      AO_memcpy(xbest,x01,sizeof(double)*ndim) ;
364      (void)calfun_(&n,xbest,&fbest) ; ncall++ ;
365      for( qq=0 ; qq < nrand ; qq++ ){
366        for( ii=0 ; ii < ndim ; ii++ ) xtest[ii] = drand48() ;
367        if( AO_VALUE(scalx) != SC_BOX ) xreduce( ndim, xtest ) ;
368        (void)calfun_(&n,xtest,&ftest) ; ncall++ ;
369        if( ftest < fbest ){
370          fbest = ftest; AO_memcpy(xbest,xtest,sizeof(double)*ndim);
371        }
372      }
373      AO_memcpy(x01,xbest,sizeof(double)*ndim) ;
374    }
375 
376    /****** optimize the scaled variables ******/
377 
378    (void)newuoa_( &n , &npt , (doublereal *)x01 ,
379                   &rhobeg , &rhoend , &maxfun , w , &icode ) ;
380 
381 #if 0
382    if( icode > 0 ) icode += ncall ;
383 #endif
384 
385    /*-- Rescale output back to 'true' range --*/
386 
387    xreduce( ndim , x01 ) ;
388    for( ii=0 ; ii < ndim ; ii++ )
389      x[ii] = sxmin[ii] + x01[ii] * sxsiz[ii] ;
390 
391    if( verb ){
392      fprintf(stderr," +   output param:") ;
393      for( ii=0 ; ii < ndim ; ii++ ) fprintf(stderr," %g",x[ii]) ;
394      fprintf(stderr,"\n") ;
395    }
396 
397    AO_VALUE(scalx) = 0 ;
398    return icode ;
399 }
400 
401 /*---------------------------------------------------------------------------*/
402 /*! Driver for Powell's general purpose minimization newuoa function:
403 
404     - ndim   = number of variables in function to be minimized
405     - x      = array [ndim] of variables (input and output)
406     - cost   = pointer to value to hold output cost function: ufunc(output x[])
407     - xbot   = array [ndim] of minimum values for the x values
408     - xtop   = array [ndim] of maximum values for the x values
409     - nrand  = if nrand > 0, then it is the number of random vectors
410                inside the constraint xbot..xtop region to evaluate
411                as potential starting vectors, in addition to the vector x[]
412     - nkeep  = the number of random vectors to keep for the first round
413                of optimization (in addition to the initial x[] vector)
414     - ntry   = the number of vectors to keep for the second round of
415                optimization
416     - rstart = size of initial search region around initial value of x
417                units are relative to the search size xbot..xtop
418                (if input as 0, 0.10 will be used)
419     - rend   = size of final search region = desired accuracy
420                (if input as 0, 0.0001 will be used)
421     - maxcall= max number of times to call ufunc in any one newuoa() call
422                in the second round of optimization
423     - ufunc  = function to minimize: inputs are number of variables (n)
424                and current array of variables (x) at which to evaluate
425     - Sequence of calculations:
426       - Evaluate ufunc() at the input x[] vector
427       - Also evaluate ufunc() at nrand random vectors in the constraint
428         domain xbot..xtop
429       - Keep the nkeep+1 best random vectors plus the input x[] vector,
430         and do a little bit of NEWUOA optimization on each of them
431         (this is the first round optimization) -- however, vectors
432         that are too close to each other will not be used (cf. DTHRESH)
433       - From those nkeep+1 vectors output by the first round, do the
434         full optimization (this is the second round) on the ntry best
435         starting vectors
436       - The output vector of the second round that has the smallest value
437         of ufunc() is returned in x[]; the corresponding value of ufunc()
438         is returned in *cost
439       - To optimize ONLY starting at input x[] vector: input nrand=nkeep=ntry=0;
440         this will actually just call powell_newuoa_con() to do the work.
441       - Tentative suggestion: nrand=13*ndim nkeep=15 ntry=5; however, it
442         is hard to have a general rule, since cost functions can be funky.
443 
444     Return value is number of function calls to ufunc actually done.
445     If return is negative, something bad happened.
446 
447     The best vector found in the second round of optimization is returned
448     in the x[] array.
449 
450     MJD Powell, "The NEWUOA software for unconstrained optimization without
451     derivatives", Technical report DAMTP 2004/NA08, Cambridge University
452     Numerical Analysis Group.
453     See:  http://www.ii.uib.no/~lennart/drgrad/Powell2004.pdf
454 ------------------------------------------------------------------------------*/
455 
powell_newuoa_constrained(int ndim,double * x,double * cost,double * xbot,double * xtop,int nrand,int nkeep,int ntry,double rstart,double rend,int maxcall,double (* ufunc)(int,double *))456 int powell_newuoa_constrained( int ndim, double *x, double *cost ,
457                                double *xbot, double *xtop ,
458                                int nrand, int nkeep, int ntry ,
459                                double rstart , double rend ,
460                                int maxcall , double (*ufunc)(int,double *) )
461 {
462    integer n , npt , icode , maxfun ;
463    doublereal rhobeg , rhoend , rb,re ;
464    int ii,tt , tbest , ntot=0 , nx01 ;
465    double vbest ;
466    AO_DEFINE_ARRAY(double,w) ; double *w ;
467    AO_DEFINE_ARRAY(double,x01val) ; double *x01val ;
468    AO_DEFINE_2DARRAY(double,x01)  ; double **x01 ;
469    double *sxmin,*sxsiz,*sx ;
470 
471    /*--- check inputs for stupidity ---*/
472 
473    if( ndim < 1                     ) return -2 ;
474    if( x == NULL                    ) return -3 ;
475    if( ufunc == NULL                ) return -5 ;
476    if( xbot == NULL || xtop == NULL ) return -6 ;
477 
478    /*--- edit inputs for idiocy ---*/
479 
480    if( rstart <= rend || rstart <= 1.e-4 || rstart > 0.333 ){
481      rstart = 0.1 ; rend = 1.e-4 ;
482    }
483 
484    /* if this call is totally vanilla, then call the older function */
485 
486    if( nrand == 0 && nkeep == 0 && ntry < 2 )
487      return powell_newuoa_con( ndim , x,xbot,xtop ,
488                                0 , rstart,rend , maxcall , ufunc ) ;
489 
490    if( maxcall < 10+5*ndim ) maxcall = 10+5*ndim ;
491 
492    if( ntry  < 1 )                     ntry  = 1 ;
493    if( nkeep < 1 && nrand > 0 )        nkeep = 1 ;
494    if( nkeep > 0 && nrand <= 2*nkeep ) nrand = 2*nkeep+1 ;
495 
496    /*--- set up newuoa parameters and workspace ---*/
497 
498    n      = ndim ;
499    npt    = (int)(mfac*n+afac) ; if( npt < n+2   ) npt = n+2 ;
500    icode  = (n+1)*(n+2)/2      ; if( npt > icode ) npt = icode ;
501    maxfun = maxcall ;
502 
503    rhobeg = (doublereal)rstart ;
504    rhoend = (doublereal)rend   ;
505 
506    icode   = (npt+14)*(npt+n) + 3*n*(n+3)/2 + 666 ;
507    AO_RESIZE_ARRAY(double,w,icode) ; w = AO_VALUE(w) ;
508 
509    SET_USERFUN(ufunc) ;  /* store pointer to user's function in global variable */
510 
511    /*-- To enforce constraints:
512         (a) scale each variable to be in the range 0..1, in x01[] array;
513         (b) in calfun_(), if an input variable drifts outside the 0..1
514             range, bring it back into that range with the PRED01() macro;
515         (c) then scale that 0..1 value back to the 'true' value
516             before calling ufunc() to evaluate objective function. -------*/
517 
518    AO_VALUE(scalx) = con_meth ;                /* signal to calfun_() to apply scaling */
519    AO_RESIZE_ARRAY(double,sxmin,ndim) ; sxmin = AO_VALUE(sxmin) ;
520    AO_RESIZE_ARRAY(double,sxsiz,ndim) ; sxsiz = AO_VALUE(sxsiz) ;
521    AO_RESIZE_ARRAY(double,sx   ,ndim) ; sx    = AO_VALUE(sx) ;
522    sxmin = AO_VALUE(sxmin) ;
523    sxsiz = AO_VALUE(sxsiz) ;
524    sx    = AO_VALUE(sx) ;
525    for( ii=0 ; ii < ndim ; ii++ ){
526      sxmin[ii] = xbot[ii] ;
527      sxsiz[ii] = xtop[ii] - xbot[ii]; if( sxsiz[ii] <= 0.0 ) sxsiz[ii] = 1.0 ;
528    }
529 
530    /*-- set up the first starting vector from x[] array --*/
531 
532    nx01 = nkeep+2 ;
533 
534    AO_RESIZE_2DARRAY(double,x01,nx01,ndim) ; x01 = AO_VALUE(x01) ;
535    AO_RESIZE_ARRAY(double,x01val,nx01) ; x01val = AO_VALUE(x01val) ;
536    for( tt=0 ; tt < nx01 ; tt++ ) x01val[tt] = BIGVAL; /* mark as unready */
537 
538    /* copy x[] into x01[0], scaling to 0..1 range */
539 
540 #undef  CALFUN_ERROR
541 #define CALFUN_ERROR(xxx)                                      \
542  do{ if( calfun_err ){                                         \
543        int aa ;                                                \
544        fprintf(stderr,"** calfun error:") ;                    \
545        for( aa=0 ; aa < ndim ; aa++ )                          \
546          fprintf(stderr," %g",sxmin[aa]+xxx[aa]*sxsiz[aa]) ;   \
547        fprintf(stderr,"\n") ;                                  \
548    } } while(0)
549 
550    if( verb )
551      INFO_message("Powell: evaluation at initial guess") ;
552    for( ii=0 ; ii < ndim ; ii++ ){
553      x01[0][ii] = (x[ii] - sxmin[ii]) / sxsiz[ii] ;
554      x01[0][ii] = PRED01(x01[0][ii]) ;  /* make sure is in range 0..1 */
555    }
556    if( AO_VALUE(scalx) != SC_BOX ) xreduce( ndim, x01[0] ) ;
557    (void)calfun_(&n,x01[0],x01val+0) ;  /* value of keeper #0 = input vector */
558    ntot++ ;                           /* number of times calfun_() is called */
559    CALFUN_ERROR(x01[0]) ; if( !isfinite(x01val[0]) ) x01val[0] = BIGVAL ;
560 
561    /*-- do a random search for the best starting vector? --*/
562 
563    if( nrand > 0 ){
564      AO_DEFINE_ARRAY(double,xtest) ;
565      double *xtest , *qpar,*cpar , ftest , dist ;
566      int qq,jj ; integer mf ;
567      static int seed=1 ;
568 #pragma omp critical
569      { if( seed ){ srand48((long)time(NULL)+(long)getpid()); seed=0; } }
570      AO_RESIZE_ARRAY(double,xtest,ndim) ; xtest = AO_VALUE(xtest) ;
571 
572      /* Step 1: search nrand start vectors, keeping the
573                 nkeep-th best values we find on the way.
574                 N.B.: we do NOT displace the input vector in x01[0];
575                       therefore, there are nkeep+1 vectors being kept */
576 
577      if( verb )
578        INFO_message("Powell: random search of %d vectors in %d-dim space",nrand,ndim);
579 
580      for( qq=0 ; qq < nrand ; qq++ ){
581        for( ii=0 ; ii < ndim ; ii++ ) xtest[ii] = drand48() ;    /* random pt */
582        if( AO_VALUE(scalx) != SC_BOX ) xreduce( ndim, xtest ) ;
583        (void)calfun_(&n,xtest,&ftest) ; ntot++ ;            /* eval cost func */
584        CALFUN_ERROR(xtest) ; if( !isfinite(ftest) ) ftest = BIGVAL ;
585        for( tt=1 ; tt <= nkeep ; tt++ ){          /* is this better than what */
586          if( ftest < x01val[tt] ){                    /* we've seen thus far? */
587            for( jj=nkeep-1 ; jj >= tt ; jj-- ){    /* push those above #tt up */
588              AO_memcpy( x01[jj+1] , x01[jj] , sizeof(double)*ndim ) ; /* in list */
589              x01val[jj+1] = x01val[jj] ;
590            }
591            AO_memcpy( x01[tt] , xtest , sizeof(double)*ndim ) ;  /* save in list */
592            x01val[tt] = ftest ;
593            break ;     /* breaking out of 'tt' loop, having put xtest in list */
594          }
595        }
596      } /* end of random search loop */
597 
598      /* count number that have valid cost function results */
599 
600      for( tt=0 ; tt <= nkeep && x01val[tt] < BIGVAL ; tt++ ) ; /* nada */
601      nkeep = tt ;   /** from now on, nkeep = actual number of keepers **/
602 
603      /* Step 2a: do a little first round optimization on each of the keepers */
604 
605      if( verb )
606        INFO_message("Powell: 1st round optimization on %d vectors",nkeep);
607 
608      rb = 0.05 ; re = 0.005 ; mf = 9*ndim+7 ; tbest = 0 ; vbest = BIGVAL ;
609      for( tt=0 ; tt < nkeep ; tt++ ){
610        (void)newuoa_( &n, &npt, (doublereal *)x01[tt], &rb,&re,&mf,w,&icode ) ;
611        for( ii=0 ; ii < ndim ; ii++ ) x01[tt][ii] = PRED01(x01[tt][ii]) ;
612        if( AO_VALUE(scalx) != SC_BOX ) xreduce(ndim,x01[tt]) ;
613        (void)calfun_(&n,x01[tt],x01val+tt) ; ntot += icode+1 ;
614        CALFUN_ERROR(x01[tt]) ; if( !isfinite(x01val[tt]) ) x01val[tt] = BIGVAL ;
615        if( x01val[tt] < vbest ){ vbest = x01val[tt]; tbest = tt; }
616        if( verb > 1 )
617          ININFO_message("%2d: cost = %g %c nfunc=%d",tt,x01val[tt],(tbest==tt)?'*':' ',icode) ;
618      }
619 
620      /* Step 2b: sort results by new x01val costs */
621 
622      qsort_doublestuff( nkeep , x01val , (void **)x01 ) ;
623 
624      /* Step 2c: cast out those that are too close to better vectors
625                  in the max-norm (we always keep the best vector in x01[0]) */
626 
627 #undef  DTHRESH
628 #define DTHRESH 0.05    /* max-norm distance threshold for vector reject */
629 
630      for( tt=1 ; tt < nkeep ; tt++ ){
631        qpar = x01[tt] ;                       /* do we keep this vector? */
632        for( jj=0 ; jj < tt ; jj++ ){       /* loop over previous keepers */
633          if( x01val[jj] >= BIGVAL ) continue ;       /* already rejected */
634          cpar = x01[jj] ;                        /* compare qpar to cpar */
635          for( dist=0.0,ii=0 ; ii < ndim ; ii++ ){
636            re = fabs(cpar[ii]-qpar[ii]) ; dist = MAX(dist,re) ;
637          }
638          if( dist < DTHRESH ){              /* qpar is too close to cpar */
639            x01val[tt] = BIGVAL ; break ;           /* reject qpar vector */
640          }
641        }
642      }
643 
644      /* Step 2d: sort again (so that the rejected ones rise to the top) */
645 
646      qsort_doublestuff( nkeep , x01val , (void **)x01 ) ;
647 
648      for( tt=0 ; tt <= nkeep && x01val[tt] < BIGVAL ; tt++ ) ; /* nada */
649      nkeep = tt ;  /* number of keepers that weren't rejected above */
650      if( nkeep == 0 ) ERROR_message("Powell: nothing survived 1st round :-(") ;
651 
652      if( ntry > nkeep ) ntry = nkeep ;
653 
654    } else {       /*------ didn't do random search -----*/
655 
656      ntry = 1 ;   /* can only try the input x[] vector! */
657 
658    }
659 
660    /****** fully optimize each of the first ntry-th vectors in x01[] ******/
661 
662    if( verb )
663      INFO_message("Powell: 2nd round optimization on %d vectors",ntry) ;
664 
665    tbest = 0 ; vbest = BIGVAL ;
666    for( tt=0 ; tt < ntry ; tt++ ){
667      (void)newuoa_( &n , &npt , (doublereal *)x01[tt] ,
668                     &rhobeg , &rhoend , &maxfun , w , &icode ) ;
669      for( ii=0 ; ii < ndim ; ii++ ) x01[tt][ii] = PRED01(x01[tt][ii]) ;
670      if( AO_VALUE(scalx) != SC_BOX ) xreduce(ndim,x01[tt]) ;
671      (void)calfun_(&n,x01[tt],x01val+tt) ; ntot += icode+1 ;
672      CALFUN_ERROR(x01[tt]) ; if( !isfinite(x01val[tt]) ) x01val[tt] = BIGVAL ;
673      if( x01val[tt] < vbest ){ vbest = x01val[tt]; tbest = tt; }
674      if( verb > 1 )
675        ININFO_message("%2d: cost = %g %c  nfunc=%d",tt,x01val[tt],(tbest==tt)?'*':' ',icode) ;
676    }
677 
678    /*-- re-optimize the bestest one again --*/
679 
680    rb = 20.0 * rhoend ; if( rb > rhobeg ) rb = rhobeg ;
681    (void)newuoa_( &n , &npt , (doublereal *)x01[tbest] ,
682                   &rb , &rhoend , &maxfun , w , &icode ) ;
683    for( ii=0 ; ii < ndim ; ii++ ) x01[tbest][ii] = PRED01(x01[tbest][ii]) ;
684    if( AO_VALUE(scalx) != SC_BOX ) xreduce(ndim,x01[tbest]) ;
685    (void)calfun_(&n,x01[tbest],&vbest) ; ntot += icode+1 ;
686 
687    /*-- Rescale bestest output vector back to 'true' range --*/
688 
689    for( ii=0 ; ii < ndim ; ii++ )
690      x[ii] = sxmin[ii] + x01[tbest][ii] * sxsiz[ii] ;
691    if( cost != NULL ) *cost = vbest ;    /* save cost func */
692 
693    /*-- vamoose the ranch --*/
694 
695    AO_VALUE(scalx) = 0 ;
696 
697    return ntot ;
698 }
699 
700 /*---------------------------------------------------------------------------*/
701 /* Function for minimizing over just one parameter,
702    since powell_newuoa requires at least 2 dimensions.
703    Method: repeated subdivision into NCUT pieces, focus on the
704            best piece and the interval around it, then da capo.
705            Brute force, but it works -- usually.
706 *//*-------------------------------------------------------------------------*/
707 
708 #undef  NCUT
709 #define NCUT 25   /* note 25^5 is about 1e7, so we will refine     */
710 #undef  MLEV      /* the original interval xbot..xtop by a factor  */
711 #define MLEV 5    /* of about 10 million -- should be good enough! */
712 
minimize_in_1D(double xbot,double xtop,double (* ufunc)(int,double *))713 double minimize_in_1D( double xbot, double xtop,
714                        double (*ufunc)(int,double *) )
715 {
716   double x1, x2, dx ;
717   double xv, val, vmin ; int imin, nlev, ii;
718 
719   /* stoopid yuser? */
720 
721   if( ufunc == NULL || xbot >= xtop ) return -666.0f ;
722 
723   x1 = xbot ; x2 = xtop ;
724 
725   for( nlev=0 ; nlev < MLEV ; nlev++ ){   /* MLEV repetitions */
726     dx = (x2-x1)/NCUT ; vmin = 1.0e+38 ;
727     for( ii=0 ; ii <= NCUT ; ii++ ){      /* NCUT pieces in x1..x2 */
728       xv  = x1 + ii*dx ;
729       val = ufunc(1,&xv) ;
730       if( ii == 0 || val < vmin ){ imin = ii ; vmin = val ; }
731     }
732 #if 0
733 INFO_message("x1=%g x2=%g imin=%d xmin=%g vmin=%g",x1,x2,imin,x1+imin*dx,vmin) ;
734 #endif
735     if( nlev == MLEV-1 ) return (x1+imin*dx) ;
736 
737     if( imin == 0 ){
738       x2 = x1 + 1.5*dx ;
739     } else if( imin == NCUT ){
740       x1 = x2 - 1.5*dx ;
741     } else {
742       x1 = x1 + (imin-1)*dx*0.99 ; x2 = x1 + 1.98*dx ;
743     }
744   }
745 
746   return (0.5*(x1+x2)) ; /* should never be reached */
747 }
748 
749 /*============================================================================*/
750 /*====----- Nested optimization stuff -- not tested or working yet!! -----====*/
751 /*============================================================================*/
752 #if 0
753 /*---------------------------------------------------------------------------*/
754 
755 static int POW_gcd( int m , int n )    /* Euclid's Greatest Common Denominator */
756 {
757   while( m > 0 ){
758     if( n > m ){ int t=m; m=n; n=t; } /* swap */
759     m -= n ;
760   }
761   return n ;
762 }
763 
764 /*---------------------------------------------------------------------------*/
765 
766 static int POW_find_relprime_fixed( int n )  /* find number relatively prime to n */
767 {
768    int dj , n5=n/5 ;
769    if( n5 < 2 ) return 1 ;
770    for( dj=3 ; POW_gcd(n,dj) > 1 ; dj++ ) ; /*nada*/
771    return dj ;
772 }
773 
774 /*---------------------------------------------------------------------------*/
775 /*! Driver for Powell's general purpose minimization newuoa function:
776     - ndim   = number of variables in function to be minimized
777     - nper   = number to be minimized per round of nesting
778     - pstep  = step in parameter index to use
779     - x      = array [ndim] of variables (input and output)
780     - rstart = size of initial search region around initial value of x
781     - rend   = size of final search region = desired accuracy
782     - maxcall = max number of times to call ufunc
783     - ufunc   = function to minimize: inputs are number of variables (n)
784                 and current array of variables (x) at which to evaluate
785 
786     Return value is number of function calls to ufunc actually done.
787     If return is negative, something bad happened.
788 
789     MJD Powell, "The NEWUOA software for unconstrained optimization without
790     derivatives", Technical report DAMTP 2004/NA08, Cambridge University
791     Numerical Analysis Group.
792     See:  http://www.ii.uib.no/~lennart/drgrad/Powell2004.pdf
793 ------------------------------------------------------------------------------*/
794 
795 int powell_newuoa_nested( int ndim , int nper , int pstep , double *x ,
796                           double rstart , double rend ,
797                           int maxcall , double (*ufunc)(int,double *) )
798 {
799    integer n , npt , icode , maxfun ;
800    doublereal rhobeg , rhoend , rfac , *w , *xloc ;
801    int ii , qnum , qloop ;
802 
803    /* check inputs */
804 
805    if( ndim < 1                         ) return -2 ;
806    if( x == NULL                        ) return -3 ;
807    if( rstart < rend || rstart <= 1.e-4 ) return -4 ;
808    if( ufunc == NULL                    ) return -5 ;
809 
810    if( nper < 2 ){
811      if( ndim <= 100 ){
812        nper = ndim ;
813      } else {
814        nper = ndim / 5 ; if( nper > 100 ) nper = 100 ;
815      }
816    }
817    if( nper >= ndim ){
818      icode = powell_newuoa( ndim,x,rstart,rend,maxcall,ufunc ) ;
819      return icode ;
820    }
821    if( pstep <= 0 || pstep >= ndim ){
822      pstep = POW_find_relprime_fixed(ndim) ;
823    } else if( pstep > 1 ){
824      for( ; POW_gcd(pstep,ndim) > 1 ; pstep++ ) ; /*nada*/
825    }
826 
827    if( rend    <= 0.0        ) rend    = 1.e-2 * rstart ;
828    if( maxcall <  99+27*ndim ) maxcall = 99+27*ndim ;
829 
830    mapx  = ndim ;
831    mapar = (double *)malloc(sizeof(double)*ndim) ;
832    mapin = (int *)   malloc(sizeof(int)   *nper) ;
833    xloc  = (double *)malloc(sizeof(double)*nper) ;
834    for( ii=0 ; ii < ndim ; ii++ ) mapar[ii] = x[ii] ;
835 
836    n      = nper ;
837    npt    = (int)(mfac*n+afac) ; if( npt < n+2   ) npt = n+2 ;
838    icode  = (n+1)*(n+2)/2      ; if( npt > icode ) npt = icode ;
839    maxfun = 13*nper+1 ;
840 
841    rhobeg = (doublereal)rstart ;
842    rhoend = (doublereal)rend   ;
843 
844    icode   = (npt+14)*(npt+n) + 3*n*(n+3)/2 + 6666 ;
845    w       = (doublereal *)malloc(sizeof(doublereal)*icode) ; /* workspace */
846    icode   = 0 ;
847    SET_USERFUN(ufunc) ;
848    AO_VALUE(scalx)   = 0 ;
849 
850    qnum = 0 ; mapin[nper-1] = -pstep ;
851    rfac = pow( 0.8 , nper/(double)ndim ) ;
852 
853    do{
854 
855      mapin[0] = (mapin[nper-1] + pstep) % ndim ;
856      for( ii=1 ; ii < nper ; ii++ ) mapin[ii] = (mapin[ii-1]+pstep) % ndim ;
857      for( ii=0 ; ii < nper ; ii++ ) xloc[ii] = mapar[mapin[ii]] ;
858 
859      /* do the local work: best params are put back into xloc[] */
860 
861      (void)newuoa_( &n , &npt , (doublereal *)xloc ,
862                     &rhobeg , &rhoend , &maxfun , w , &icode ) ;
863 
864      for( ii=0 ; ii < nper ; ii++ ) mapar[mapin[ii]] = xloc[ii] ;
865      qnum += icode ; rhobeg *= rfac ;
866 
867    } while( qnum < maxcall ) ;
868 
869    for( ii=0 ; ii < ndim ; ii++ ) x[ii] = mapar[ii] ;
870 
871    free(w) ; free(xloc) ; free(mapin) ; free(mapar) ;
872 
873    return qnum ;  /* number of function calls */
874 }
875 #endif /* nested optimization */
876 /*============================================================================*/
877