1 /* Copyright (c) 2007-2014 Massachusetts Institute of Technology
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /* Multi-Level Single-Linkage (MLSL) algorithm for
24    global optimization by random local optimizations (a multistart algorithm
25    with "clustering" to avoid repeated detection of the same local minimum),
26    modified to optionally use a Sobol' low-discrepancy sequence (LDS) instead
27    of pseudorandom numbers.  See:
28 
29    A. H. G. Rinnooy Kan and G. T. Timmer, "Stochastic global optimization
30    methods," Mathematical Programming, vol. 39, p. 27-78 (1987).
31        [ actually 2 papers -- part I: clustering methods (p. 27), then
32                               part II: multilevel methods (p. 57) ]
33 
34    and also:
35 
36    Sergei Kucherenko and Yury Sytsko, "Application of deterministic
37    low-discrepancy sequences in global optimization," Computational
38    Optimization and Applications, vol. 30, p. 297-318 (2005).
39 
40    Compared to the above papers, I made a couple other modifications
41    to the algorithm besides the use of a LDS.
42 
43    1) first, the original algorithm suggests discarding points
44       within a *fixed* distance of the boundaries or of previous
45       local minima; I changed this to a distance that decreases with,
46       iteration, proportional to the same threshold radius used
47       for clustering.  (In the case of the boundaries, I make
48       the proportionality constant rather small as well.)
49 
50    2) Kan and Timmer suggest using fancy "spiral-search" algorithms
51       to quickly locate nearest-neighbor points, reducing the
52       overall time for N sample points from O(N^2) to O(N log N)
53       However, recent papers seem to show that such schemes (kd trees,
54       etcetera) become inefficient for high-dimensional spaces (d > 20),
55       and finding a better approach seems to be an open question.  Therefore,
56       since I am mainly interested in MLSL for high-dimensional problems
57       (in low dimensions we have DIRECT etc.), I punted on this question
58       for now.  In practice, O(N^2) (which does not count the #points
59       evaluated in local searches) does not seem too bad if the objective
60       function is expensive.
61 
62 */
63 
64 #include <stdlib.h>
65 #include <math.h>
66 #include <string.h>
67 
68 #include "redblack.h"
69 #include "mlsl.h"
70 
71 /* data structure for each random/quasirandom point in the population */
72 typedef struct {
73      double f; /* function value at x */
74      int minimized; /* if we have already minimized starting from x */
75      double closest_pt_d; /* distance^2 to closest pt with smaller f */
76      double closest_lm_d; /* distance^2 to closest lm with smaller f*/
77      double x[1]; /* array of length n (K&R struct hack) */
78 } pt;
79 
80 /* all of the data used by the various mlsl routines...it's
81    not clear in hindsight that we need to put all of this in a data
82    structure since most of the work occurs in a single routine,
83    but it doesn't hurt us */
84 typedef struct {
85      int n; /* # dimensions */
86      const double *lb, *ub;
87      nlopt_stopping *stop; /* stopping criteria */
88      nlopt_func f; void *f_data;
89 
90      rb_tree pts; /* tree of points (k == pt), sorted by f */
91      rb_tree lms; /* tree of local minimizers, sorted by function value
92 		     (k = array of length d+1, [0] = f, [1..d] = x) */
93 
94      nlopt_sobol s; /* sobol data for LDS point generation, or NULL
95 		       to use pseudo-random numbers */
96 
97      double R_prefactor, dlm, dbound, gamma; /* parameters of MLSL */
98      int N; /* number of pts to add per iteration */
99 } mlsl_data;
100 
101 /* comparison routines to sort the red-black trees by function value */
pt_compare(rb_key p1_,rb_key p2_)102 static int pt_compare(rb_key p1_, rb_key p2_)
103 {
104      pt *p1 = (pt *) p1_;
105      pt *p2 = (pt *) p2_;
106      if (p1->f < p2->f) return -1;
107      if (p1->f > p2->f) return +1;
108      return 0;
109 }
lm_compare(double * k1,double * k2)110 static int lm_compare(double *k1, double *k2)
111 {
112      if (*k1 < *k2) return -1;
113      if (*k1 > *k2) return +1;
114      return 0;
115 }
116 
117 /* Euclidean distance |x1 - x2|^2 */
distance2(int n,const double * x1,const double * x2)118 static double distance2(int n, const double *x1, const double *x2)
119 {
120      int i;
121      double d = 0.;
122      for (i = 0; i < n; ++i) {
123 	  double dx = x1[i] - x2[i];
124 	  d += dx * dx;
125      }
126      return d;
127 }
128 
129 /* find the closest pt to p with a smaller function value;
130    this function is called when p is first added to our tree */
find_closest_pt(int n,rb_tree * pts,pt * p)131 static void find_closest_pt(int n, rb_tree *pts, pt *p)
132 {
133      rb_node *node = rb_tree_find_lt(pts, (rb_key) p);
134      double closest_d = HUGE_VAL;
135      while (node) {
136 	  double d = distance2(n, p->x, ((pt *) node->k)->x);
137 	  if (d < closest_d) closest_d = d;
138 	  node = rb_tree_pred(node);
139      }
140      p->closest_pt_d = closest_d;
141 }
142 
143 /* find the closest local minimizer (lm) to p with a smaller function value;
144    this function is called when p is first added to our tree */
find_closest_lm(int n,rb_tree * lms,pt * p)145 static void find_closest_lm(int n, rb_tree *lms, pt *p)
146 {
147      rb_node *node = rb_tree_find_lt(lms, &p->f);
148      double closest_d = HUGE_VAL;
149      while (node) {
150 	  double d = distance2(n, p->x, node->k+1);
151 	  if (d < closest_d) closest_d = d;
152 	  node = rb_tree_pred(node);
153      }
154      p->closest_lm_d = closest_d;
155 }
156 
157 /* given newpt, which presumably has just been added to our
158    tree, update all pts with a greater function value in case
159    newpt is closer to them than their previous closest_pt ...
160    we can ignore already-minimized points since we never do
161    local minimization from the same point twice */
pts_update_newpt(int n,rb_tree * pts,pt * newpt)162 static void pts_update_newpt(int n, rb_tree *pts, pt *newpt)
163 {
164      rb_node *node = rb_tree_find_gt(pts, (rb_key) newpt);
165      while (node) {
166 	  pt *p = (pt *) node->k;
167 	  if (!p->minimized) {
168 	       double d = distance2(n, newpt->x, p->x);
169 	       if (d < p->closest_pt_d) p->closest_pt_d = d;
170 	  }
171 	  node = rb_tree_succ(node);
172      }
173 }
174 
175 /* given newlm, which presumably has just been added to our
176    tree, update all pts with a greater function value in case
177    newlm is closer to them than their previous closest_lm ...
178    we can ignore already-minimized points since we never do
179    local minimization from the same point twice */
pts_update_newlm(int n,rb_tree * pts,double * newlm)180 static void pts_update_newlm(int n, rb_tree *pts, double *newlm)
181 {
182      pt tmp_pt;
183      rb_node *node;
184      tmp_pt.f = newlm[0];
185      node = rb_tree_find_gt(pts, (rb_key) &tmp_pt);
186      while (node) {
187 	  pt *p = (pt *) node->k;
188 	  if (!p->minimized) {
189 	       double d = distance2(n, newlm+1, p->x);
190 	       if (d < p->closest_lm_d) p->closest_lm_d = d;
191 	  }
192 	  node = rb_tree_succ(node);
193      }
194 }
195 
is_potential_minimizer(mlsl_data * mlsl,pt * p,double dpt_min,double dlm_min,double dbound_min)196 static int is_potential_minimizer(mlsl_data *mlsl, pt *p,
197 				  double dpt_min,
198 				  double dlm_min,
199 				  double dbound_min)
200 {
201      int i, n = mlsl->n;
202      const double *lb = mlsl->lb;
203      const double *ub = mlsl->ub;
204      const double *x = p->x;
205 
206      if (p->minimized)
207 	  return 0;
208 
209      if (p->closest_pt_d <= dpt_min*dpt_min)
210 	  return 0;
211 
212      if (p->closest_lm_d <= dlm_min*dlm_min)
213 	  return 0;
214 
215      for (i = 0; i < n; ++i)
216 	  if ((x[i] - lb[i] <= dbound_min || ub[i] - x[i] <= dbound_min)
217 	      && ub[i] - lb[i] > dbound_min)
218 	       return 0;
219 
220      return 1;
221 }
222 
223 #define K2PI (6.2831853071795864769252867665590057683943388)
224 
225 /* compute Gamma(1 + n/2)^{1/n} ... this is a constant factor
226    used in MLSL as part of the minimum-distance cutoff*/
gam(int n)227 static double gam(int n)
228 {
229      /* use Stirling's approximation:
230 	Gamma(1 + z) ~ sqrt(2*pi*z) * z^z / exp(z)
231         ... this is more than accurate enough for our purposes
232             (error in gam < 15% for d=1, < 4% for d=2, < 2% for d=3, ...)
233             and avoids overflow problems because we can combine it with
234 	    the ^{1/n} exponent */
235      double z = n/2;
236      return sqrt(pow(K2PI * z, 1.0/n) * z) * exp(-0.5);
237 }
238 
alloc_pt(int n)239 static pt *alloc_pt(int n)
240 {
241      pt *p = (pt *) malloc(sizeof(pt) + (n-1) * sizeof(double));
242      if (p) {
243 	  p->minimized = 0;
244 	  p->closest_pt_d = HUGE_VAL;
245 	  p->closest_lm_d = HUGE_VAL;
246      }
247      return p;
248 }
249 
250 /* wrapper around objective function that increments evaluation count */
fcount(unsigned n,const double * x,double * grad,void * p_)251 static double fcount(unsigned n, const double *x, double *grad, void *p_)
252 {
253      mlsl_data *p = (mlsl_data *) p_;
254      p->stop->nevals++;
255      return p->f(n, x, grad, p->f_data);
256 }
257 
get_minf(mlsl_data * d,double * minf,double * x)258 static void get_minf(mlsl_data *d, double *minf, double *x)
259 {
260      rb_node *node = rb_tree_min(&d->pts);
261      if (node) {
262 	  *minf = ((pt *) node->k)->f;
263 	  memcpy(x, ((pt *) node->k)->x, sizeof(double) * d->n);
264      }
265      node = rb_tree_min(&d->lms);
266      if (node && node->k[0] < *minf) {
267 	  *minf = node->k[0];
268 	  memcpy(x, node->k + 1, sizeof(double) * d->n);
269      }
270 }
271 
272 #define MIN(a,b) ((a) < (b) ? (a) : (b))
273 
274 #define MLSL_SIGMA 2. /* MLSL sigma parameter, using value from the papers */
275 #define MLSL_GAMMA 0.3 /* MLSL gamma parameter (FIXME: what should it be?) */
276 
mlsl_minimize(int n,nlopt_func f,void * f_data,const double * lb,const double * ub,double * x,double * minf,nlopt_stopping * stop,nlopt_opt local_opt,int Nsamples,int lds)277 nlopt_result mlsl_minimize(int n, nlopt_func f, void *f_data,
278 			   const double *lb, const double *ub, /* bounds */
279 			   double *x, /* in: initial guess, out: minimizer */
280 			   double *minf,
281 			   nlopt_stopping *stop,
282 			   nlopt_opt local_opt,
283 			   int Nsamples, /* #samples/iteration (0=default) */
284 			   int lds) /* random or low-discrepancy seq. (lds) */
285 {
286      nlopt_result ret = NLOPT_SUCCESS;
287      mlsl_data d;
288      int i;
289      pt *p;
290 
291      if (!Nsamples)
292 	  d.N = 4; /* FIXME: what is good number of samples per iteration? */
293      else
294 	  d.N = Nsamples;
295      if (d.N < 1) return NLOPT_INVALID_ARGS;
296 
297      d.n = n;
298      d.lb = lb; d.ub = ub;
299      d.stop = stop;
300      d.f = f; d.f_data = f_data;
301      rb_tree_init(&d.pts, pt_compare);
302      rb_tree_init(&d.lms, lm_compare);
303      d.s = lds ? nlopt_sobol_create((unsigned) n) : NULL;
304 
305      nlopt_set_min_objective(local_opt, fcount, &d);
306      nlopt_set_lower_bounds(local_opt, lb);
307      nlopt_set_upper_bounds(local_opt, ub);
308      nlopt_set_stopval(local_opt, stop->minf_max);
309 
310      d.gamma = MLSL_GAMMA;
311 
312      d.R_prefactor = sqrt(2./K2PI) * pow(gam(n) * MLSL_SIGMA, 1.0/n);
313      for (i = 0; i < n; ++i)
314 	  d.R_prefactor *= pow(ub[i] - lb[i], 1.0/n);
315 
316      /* MLSL also suggests setting some minimum distance from points
317 	to previous local minimiza and to the boundaries; I don't know
318 	how to choose this as a fixed number, so I set it proportional
319 	to R; see also the comments at top.  dlm and dbound are the
320 	proportionality constants. */
321      d.dlm = 1.0; /* min distance/R to local minima (FIXME: good value?) */
322      d.dbound = 1e-6; /* min distance/R to ub/lb boundaries (good value?) */
323 
324 
325      p = alloc_pt(n);
326      if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
327 
328      /* FIXME: how many sobol points to skip, if any? */
329      nlopt_sobol_skip(d.s, (unsigned) (10*n+d.N), p->x);
330 
331      memcpy(p->x, x, n * sizeof(double));
332      p->f = f(n, x, NULL, f_data);
333      stop->nevals++;
334      if (!rb_tree_insert(&d.pts, (rb_key) p)) {
335 	  free(p); ret = NLOPT_OUT_OF_MEMORY;
336      }
337      if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
338      else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
339      else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
340      else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
341 
342      while (ret == NLOPT_SUCCESS) {
343 	  rb_node *node;
344 	  double R;
345 
346 	  get_minf(&d, minf, x);
347 
348 	  /* sampling phase: add random/quasi-random points */
349 	  for (i = 0; i < d.N && ret == NLOPT_SUCCESS; ++i) {
350 	       p = alloc_pt(n);
351 	       if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
352 	       if (d.s) nlopt_sobol_next(d.s, p->x, lb, ub);
353 	       else { /* use random points instead of LDS */
354 		    int j;
355 		    for (j = 0; j < n; ++j) p->x[j] = nlopt_urand(lb[j],ub[j]);
356 	       }
357 	       p->f = f(n, p->x, NULL, f_data);
358 	       stop->nevals++;
359 	       if (!rb_tree_insert(&d.pts, (rb_key) p)) {
360 		    free(p); ret = NLOPT_OUT_OF_MEMORY;
361 	       }
362 	       if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
363 	       else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
364 	       else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
365 	       else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
366 	       else {
367 		    find_closest_pt(n, &d.pts, p);
368 		    find_closest_lm(n, &d.lms, p);
369 		    pts_update_newpt(n, &d.pts, p);
370 	       }
371 	  }
372 
373 	  /* distance threshhold parameter R in MLSL */
374 	  R = d.R_prefactor
375 	       * pow(log((double) d.pts.N) / d.pts.N, 1.0 / n);
376 
377 	  /* local search phase: do local opt. for promising points */
378 	  node = rb_tree_min(&d.pts);
379 	  for (i = (int) (ceil(d.gamma * d.pts.N) + 0.5);
380 	       node && i > 0 && ret == NLOPT_SUCCESS; --i) {
381 	       p = (pt *) node->k;
382 	       if (is_potential_minimizer(&d, p,
383 					  R, d.dlm*R, d.dbound*R)) {
384 		    nlopt_result lret;
385 		    double *lm;
386 		    double t = nlopt_seconds();
387 
388 		    if (nlopt_stop_forced(stop)) {
389 			 ret = NLOPT_FORCED_STOP; break;
390 		    }
391 		    if (nlopt_stop_evals(stop)) {
392                          ret = NLOPT_MAXEVAL_REACHED; break;
393 		    }
394 		    if (stop->maxtime > 0 &&
395 			t - stop->start >= stop->maxtime) {
396 			 ret = NLOPT_MAXTIME_REACHED; break;
397 		    }
398 		    lm = (double *) malloc(sizeof(double) * (n+1));
399 		    if (!lm) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
400 		    memcpy(lm+1, p->x, sizeof(double) * n);
401 		    lret = nlopt_optimize_limited(local_opt, lm+1, lm,
402 						  stop->maxeval - stop->nevals,
403 						  stop->maxtime -
404 						  (t - stop->start));
405 		    p->minimized = 1;
406 		    if (lret < 0) { free(lm); ret = lret; goto done; }
407 		    if (!rb_tree_insert(&d.lms, lm)) {
408 			 free(lm); ret = NLOPT_OUT_OF_MEMORY;
409 		    }
410 		    else if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
411 		    else if (*lm < stop->minf_max)
412 			 ret = NLOPT_MINF_MAX_REACHED;
413 		    else if (nlopt_stop_evals(stop))
414 			 ret = NLOPT_MAXEVAL_REACHED;
415 		    else if (nlopt_stop_time(stop))
416 			 ret = NLOPT_MAXTIME_REACHED;
417 		    else
418 			 pts_update_newlm(n, &d.pts, lm);
419 	       }
420 
421 	       /* TODO: additional stopping criteria based
422 		  e.g. on improvement in function values, etc? */
423 
424 	       node = rb_tree_succ(node);
425 	  }
426      }
427 
428      get_minf(&d, minf, x);
429 
430  done:
431      nlopt_sobol_destroy(d.s);
432      rb_tree_destroy_with_keys(&d.lms);
433      rb_tree_destroy_with_keys(&d.pts);
434      return ret;
435 }
436