1 /* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3 /*
4 Copyright (C) 2007 Marco Bianchetti
5 Copyright (C) 2007 François du Vignaud
6 Copyright (C) 2007 Giorgio Facchinetti
7 Copyright (C) 2012 Ralph Schreyer
8 Copyright (C) 2012 Mateusz Kapturski
9
10 This file is part of QuantLib, a free-software/open-source library
11 for financial quantitative analysts and developers - http://quantlib.org/
12
13 QuantLib is free software: you can redistribute it and/or modify it
14 under the terms of the QuantLib license. You should have received a
15 copy of the license along with this program; if not, please email
16 <quantlib-dev@lists.sf.net>. The license is also available online at
17 <http://quantlib.org/license.shtml>.
18
19 This program is distributed in the hope that it will be useful, but WITHOUT
20 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21 FOR A PARTICULAR PURPOSE. See the license for more details.
22 */
23
24 #include "optimizers.hpp"
25 #include "utilities.hpp"
26 #include <ql/math/optimization/simplex.hpp>
27 #include <ql/math/optimization/levenbergmarquardt.hpp>
28 #include <ql/math/optimization/conjugategradient.hpp>
29 #include <ql/math/optimization/steepestdescent.hpp>
30 #include <ql/math/optimization/bfgs.hpp>
31 #include <ql/math/optimization/constraint.hpp>
32 #include <ql/math/optimization/costfunction.hpp>
33 #include <ql/math/randomnumbers/mt19937uniformrng.hpp>
34 #include <ql/math/optimization/differentialevolution.hpp>
35 #include <ql/math/optimization/goldstein.hpp>
36
37 using namespace QuantLib;
38 using namespace boost::unit_test_framework;
39
40 using std::pow;
41 using std::cos;
42
43 namespace {
44
45 struct NamedOptimizationMethod;
46
47 std::vector<ext::shared_ptr<CostFunction> > costFunctions_;
48 std::vector<ext::shared_ptr<Constraint> > constraints_;
49 std::vector<Array> initialValues_;
50 std::vector<Size> maxIterations_, maxStationaryStateIterations_;
51 std::vector<Real> rootEpsilons_, functionEpsilons_, gradientNormEpsilons_;
52 std::vector<ext::shared_ptr<EndCriteria> > endCriterias_;
53 std::vector<std::vector<NamedOptimizationMethod> > optimizationMethods_;
54 std::vector<Array> xMinExpected_, yMinExpected_;
55
56 class OneDimensionalPolynomialDegreeN : public CostFunction {
57 public:
OneDimensionalPolynomialDegreeN(const Array & coefficients)58 explicit OneDimensionalPolynomialDegreeN(const Array& coefficients)
59 : coefficients_(coefficients),
60 polynomialDegree_(coefficients.size()-1) {}
61
value(const Array & x) const62 Real value(const Array& x) const {
63 QL_REQUIRE(x.size()==1,"independent variable must be 1 dimensional");
64 Real y = 0;
65 for (Size i=0; i<=polynomialDegree_; ++i)
66 y += coefficients_[i]*std::pow(x[0],static_cast<int>(i));
67 return y;
68 }
69
values(const Array & x) const70 Disposable<Array> values(const Array& x) const{
71 QL_REQUIRE(x.size()==1,"independent variable must be 1 dimensional");
72 Array y(1);
73 y[0] = value(x);
74 return y;
75 }
76
77 private:
78 const Array coefficients_;
79 const Size polynomialDegree_;
80 };
81
82
83 // The goal of this cost function is simply to call another optimization inside
84 // in order to test nested optimizations
85 class OptimizationBasedCostFunction : public CostFunction {
86 public:
value(const Array &) const87 Real value(const Array&) const { return 1.0; }
88
values(const Array &) const89 Disposable<Array> values(const Array&) const{
90 // dummy nested optimization
91 Array coefficients(3, 1.0);
92 OneDimensionalPolynomialDegreeN oneDimensionalPolynomialDegreeN(coefficients);
93 NoConstraint constraint;
94 Array initialValues(1, 100.0);
95 Problem problem(oneDimensionalPolynomialDegreeN, constraint,
96 initialValues);
97 LevenbergMarquardt optimizationMethod;
98 //Simplex optimizationMethod(0.1);
99 //ConjugateGradient optimizationMethod;
100 //SteepestDescent optimizationMethod;
101 EndCriteria endCriteria(1000, 100, 1e-5, 1e-5, 1e-5);
102 optimizationMethod.minimize(problem, endCriteria);
103 // return dummy result
104 Array dummy(1,0);
105 return dummy;
106 }
107 };
108
109
110 enum OptimizationMethodType {simplex,
111 levenbergMarquardt,
112 levenbergMarquardt2,
113 conjugateGradient,
114 conjugateGradient_goldstein,
115 steepestDescent,
116 steepestDescent_goldstein,
117 bfgs,
118 bfgs_goldstein};
119
optimizationMethodTypeToString(OptimizationMethodType type)120 std::string optimizationMethodTypeToString(OptimizationMethodType type) {
121 switch (type) {
122 case simplex:
123 return "Simplex";
124 case levenbergMarquardt:
125 return "Levenberg Marquardt";
126 case levenbergMarquardt2:
127 return "Levenberg Marquardt (cost function's jacbobian)";
128 case conjugateGradient:
129 return "Conjugate Gradient";
130 case steepestDescent:
131 return "Steepest Descent";
132 case bfgs:
133 return "BFGS";
134 case conjugateGradient_goldstein:
135 return "Conjugate Gradient (Goldstein line search)";
136 case steepestDescent_goldstein:
137 return "Steepest Descent (Goldstein line search)";
138 case bfgs_goldstein:
139 return "BFGS (Goldstein line search)";
140 default:
141 QL_FAIL("unknown OptimizationMethod type");
142 }
143 }
144
145 struct NamedOptimizationMethod {
146 ext::shared_ptr<OptimizationMethod> optimizationMethod;
147 std::string name;
148 };
149
150
makeOptimizationMethod(OptimizationMethodType optimizationMethodType,Real simplexLambda,Real levenbergMarquardtEpsfcn,Real levenbergMarquardtXtol,Real levenbergMarquardtGtol)151 ext::shared_ptr<OptimizationMethod> makeOptimizationMethod(
152 OptimizationMethodType optimizationMethodType,
153 Real simplexLambda,
154 Real levenbergMarquardtEpsfcn,
155 Real levenbergMarquardtXtol,
156 Real levenbergMarquardtGtol) {
157 switch (optimizationMethodType) {
158 case simplex:
159 return ext::shared_ptr<OptimizationMethod>(
160 new Simplex(simplexLambda));
161 case levenbergMarquardt:
162 return ext::shared_ptr<OptimizationMethod>(
163 new LevenbergMarquardt(levenbergMarquardtEpsfcn,
164 levenbergMarquardtXtol,
165 levenbergMarquardtGtol));
166 case levenbergMarquardt2:
167 return ext::shared_ptr<OptimizationMethod>(
168 new LevenbergMarquardt(levenbergMarquardtEpsfcn,
169 levenbergMarquardtXtol,
170 levenbergMarquardtGtol,
171 true));
172 case conjugateGradient:
173 return ext::shared_ptr<OptimizationMethod>(new ConjugateGradient);
174 case steepestDescent:
175 return ext::shared_ptr<OptimizationMethod>(new SteepestDescent);
176 case bfgs:
177 return ext::shared_ptr<OptimizationMethod>(new BFGS);
178 case conjugateGradient_goldstein:
179 return ext::shared_ptr<OptimizationMethod>(new ConjugateGradient(ext::make_shared<GoldsteinLineSearch>()));
180 case steepestDescent_goldstein:
181 return ext::shared_ptr<OptimizationMethod>(new SteepestDescent(ext::make_shared<GoldsteinLineSearch>()));
182 case bfgs_goldstein:
183 return ext::shared_ptr<OptimizationMethod>(new BFGS(ext::make_shared<GoldsteinLineSearch>()));
184 default:
185 QL_FAIL("unknown OptimizationMethod type");
186 }
187 }
188
189
makeOptimizationMethods(OptimizationMethodType optimizationMethodTypes[],Size optimizationMethodNb,Real simplexLambda,Real levenbergMarquardtEpsfcn,Real levenbergMarquardtXtol,Real levenbergMarquardtGtol)190 std::vector<NamedOptimizationMethod> makeOptimizationMethods(
191 OptimizationMethodType optimizationMethodTypes[],
192 Size optimizationMethodNb,
193 Real simplexLambda,
194 Real levenbergMarquardtEpsfcn,
195 Real levenbergMarquardtXtol,
196 Real levenbergMarquardtGtol) {
197 std::vector<NamedOptimizationMethod> results;
198 for (Size i=0; i<optimizationMethodNb; ++i) {
199 NamedOptimizationMethod namedOptimizationMethod;
200 namedOptimizationMethod.optimizationMethod = makeOptimizationMethod(
201 optimizationMethodTypes[i],
202 simplexLambda,
203 levenbergMarquardtEpsfcn,
204 levenbergMarquardtXtol,
205 levenbergMarquardtGtol);
206 namedOptimizationMethod.name
207 = optimizationMethodTypeToString(optimizationMethodTypes[i]);
208 results.push_back(namedOptimizationMethod);
209 }
210 return results;
211 }
212
maxDifference(const Array & a,const Array & b)213 Real maxDifference(const Array& a, const Array& b) {
214 Array diff = a-b;
215 Real maxDiff = 0.0;
216 for (Size i=0; i<diff.size(); ++i)
217 maxDiff = std::max(maxDiff, std::fabs(diff[i]));
218 return maxDiff;
219 }
220
221 // Set up, for each cost function, all the ingredients for optimization:
222 // constraint, initial guess, end criteria, optimization methods.
setup()223 void setup() {
224
225 // Cost function n. 1: 1D polynomial of degree 2 (parabolic function y=a*x^2+b*x+c)
226 const Real a = 1; // required a > 0
227 const Real b = 1;
228 const Real c = 1;
229 Array coefficients(3);
230 coefficients[0]= c;
231 coefficients[1]= b;
232 coefficients[2]= a;
233 costFunctions_.push_back(ext::shared_ptr<CostFunction>(
234 new OneDimensionalPolynomialDegreeN(coefficients)));
235 // Set constraint for optimizers: unconstrained problem
236 constraints_.push_back(ext::shared_ptr<Constraint>(new NoConstraint()));
237 // Set initial guess for optimizer
238 Array initialValue(1);
239 initialValue[0] = -100;
240 initialValues_.push_back(initialValue);
241 // Set end criteria for optimizer
242 maxIterations_.push_back(10000); // maxIterations
243 maxStationaryStateIterations_.push_back(100); // MaxStationaryStateIterations
244 rootEpsilons_.push_back(1e-8); // rootEpsilon
245 functionEpsilons_.push_back(1e-8); // functionEpsilon
246 gradientNormEpsilons_.push_back(1e-8); // gradientNormEpsilon
247 endCriterias_.push_back(ext::make_shared<EndCriteria>(
248 maxIterations_.back(), maxStationaryStateIterations_.back(),
249 rootEpsilons_.back(), functionEpsilons_.back(),
250 gradientNormEpsilons_.back()));
251 // Set optimization methods for optimizer
252 OptimizationMethodType optimizationMethodTypes[] = {
253 simplex, levenbergMarquardt, levenbergMarquardt2, conjugateGradient,
254 bfgs //, steepestDescent
255 };
256 Real simplexLambda = 0.1; // characteristic search length for simplex
257 Real levenbergMarquardtEpsfcn = 1.0e-8; // parameters specific for Levenberg-Marquardt
258 Real levenbergMarquardtXtol = 1.0e-8; //
259 Real levenbergMarquardtGtol = 1.0e-8; //
260 optimizationMethods_.push_back(makeOptimizationMethods(
261 optimizationMethodTypes, LENGTH(optimizationMethodTypes),
262 simplexLambda, levenbergMarquardtEpsfcn, levenbergMarquardtXtol,
263 levenbergMarquardtGtol));
264 // Set expected results for optimizer
265 Array xMinExpected(1),yMinExpected(1);
266 xMinExpected[0] = -b/(2.0*a);
267 yMinExpected[0] = -(b*b-4.0*a*c)/(4.0*a);
268 xMinExpected_.push_back(xMinExpected);
269 yMinExpected_.push_back(yMinExpected);
270 }
271
272 }
273
274
test()275 void OptimizersTest::test() {
276 BOOST_TEST_MESSAGE("Testing optimizers...");
277
278 setup();
279
280 // Loop over problems (currently there is only 1 problem)
281 for (Size i=0; i<costFunctions_.size(); ++i) {
282 Problem problem(*costFunctions_[i], *constraints_[i],
283 initialValues_[i]);
284 Array initialValues = problem.currentValue();
285 // Loop over optimizers
286 for (Size j=0; j<(optimizationMethods_[i]).size(); ++j) {
287 Real rootEpsilon = endCriterias_[i]->rootEpsilon();
288 Size endCriteriaTests = 1;
289 // Loop over rootEpsilon
290 for (Size k=0; k<endCriteriaTests; ++k) {
291 problem.setCurrentValue(initialValues);
292 EndCriteria endCriteria(
293 endCriterias_[i]->maxIterations(),
294 endCriterias_[i]->maxStationaryStateIterations(),
295 rootEpsilon,
296 endCriterias_[i]->functionEpsilon(),
297 endCriterias_[i]->gradientNormEpsilon());
298 rootEpsilon *= .1;
299 EndCriteria::Type endCriteriaResult =
300 optimizationMethods_[i][j].optimizationMethod->minimize(
301 problem, endCriteria);
302 Array xMinCalculated = problem.currentValue();
303 Array yMinCalculated = problem.values(xMinCalculated);
304
305 // Check optimization results vs known solution
306 bool completed;
307 switch (endCriteriaResult) {
308 case EndCriteria::None:
309 case EndCriteria::MaxIterations:
310 case EndCriteria::Unknown:
311 completed = false;
312 break;
313 default:
314 completed = true;
315 }
316
317 Real xError = maxDifference(xMinCalculated,xMinExpected_[i]);
318 Real yError = maxDifference(yMinCalculated,yMinExpected_[i]);
319
320 bool correct = (xError <= endCriteria.rootEpsilon() ||
321 yError <= endCriteria.functionEpsilon());
322
323 if ((!completed) || (!correct))
324 BOOST_ERROR("costFunction # = " << i <<
325 "\nOptimizer: " <<
326 optimizationMethods_[i][j].name <<
327 "\n function evaluations: " <<
328 problem.functionEvaluation() <<
329 "\n gradient evaluations: " <<
330 problem.gradientEvaluation() <<
331 "\n x expected: " <<
332 xMinExpected_[i] <<
333 "\n x calculated: " <<
334 std::setprecision(9) << xMinCalculated <<
335 "\n x difference: " <<
336 xMinExpected_[i]- xMinCalculated <<
337 "\n rootEpsilon: " <<
338 std::setprecision(9) <<
339 endCriteria.rootEpsilon() <<
340 "\n y expected: " <<
341 yMinExpected_[i] <<
342 "\n y calculated: " <<
343 std::setprecision(9) << yMinCalculated <<
344 "\n y difference: " <<
345 yMinExpected_[i]- yMinCalculated <<
346 "\n functionEpsilon: " <<
347 std::setprecision(9) <<
348 endCriteria.functionEpsilon() <<
349 "\n endCriteriaResult: " <<
350 endCriteriaResult);
351 }
352 }
353 }
354 }
355
356
nestedOptimizationTest()357 void OptimizersTest::nestedOptimizationTest() {
358 BOOST_TEST_MESSAGE("Testing nested optimizations...");
359 OptimizationBasedCostFunction optimizationBasedCostFunction;
360 NoConstraint constraint;
361 Array initialValues(1, 0.0);
362 Problem problem(optimizationBasedCostFunction, constraint,
363 initialValues);
364 LevenbergMarquardt optimizationMethod;
365 //Simplex optimizationMethod(0.1);
366 //ConjugateGradient optimizationMethod;
367 //SteepestDescent optimizationMethod;
368 EndCriteria endCriteria(1000, 100, 1e-5, 1e-5, 1e-5);
369 optimizationMethod.minimize(problem, endCriteria);
370
371 }
372
373 namespace {
374
375 class FirstDeJong : public CostFunction {
376 public:
values(const Array & x) const377 Disposable<Array> values(const Array& x) const {
378 Array retVal(x.size(),value(x));
379 return retVal;
380 }
value(const Array & x) const381 Real value(const Array& x) const {
382 return DotProduct(x,x);
383 }
384 };
385
386 class SecondDeJong : public CostFunction {
387 public:
values(const Array & x) const388 Disposable<Array> values(const Array& x) const {
389 Array retVal(x.size(),value(x));
390 return retVal;
391 }
value(const Array & x) const392 Real value(const Array& x) const {
393 return 100.0*(x[0]*x[0]-x[1])*(x[0]*x[0]-x[1])
394 + (1.0-x[0])*(1.0-x[0]);
395 }
396 };
397
398 class ModThirdDeJong : public CostFunction {
399 public:
values(const Array & x) const400 Disposable<Array> values(const Array& x) const {
401 Array retVal(x.size(),value(x));
402 return retVal;
403 }
value(const Array & x) const404 Real value(const Array& x) const {
405 Real fx = 0.0;
406 for (Size i=0; i<x.size(); ++i) {
407 fx += std::floor(x[i])*std::floor(x[i]);
408 }
409 return fx;
410 }
411 };
412
413 class ModFourthDeJong : public CostFunction {
414 public:
ModFourthDeJong()415 ModFourthDeJong()
416 : uniformRng_(MersenneTwisterUniformRng(4711)) {
417 }
values(const Array & x) const418 Disposable<Array> values(const Array& x) const {
419 Array retVal(x.size(),value(x));
420 return retVal;
421 }
value(const Array & x) const422 Real value(const Array& x) const {
423 Real fx = 0.0;
424 for (Size i=0; i<x.size(); ++i) {
425 fx += (i+1.0)*pow(x[i],4.0) + uniformRng_.nextReal();
426 }
427 return fx;
428 }
429 MersenneTwisterUniformRng uniformRng_;
430 };
431
432 class Griewangk : public CostFunction {
433 public:
values(const Array & x) const434 Disposable<Array> values(const Array& x) const{
435 Array retVal(x.size(),value(x));
436 return retVal;
437 }
value(const Array & x) const438 Real value(const Array& x) const {
439 Real fx = 0.0;
440 for (Size i=0; i<x.size(); ++i) {
441 fx += x[i]*x[i]/4000.0;
442 }
443 Real p = 1.0;
444 for (Size i=0; i<x.size(); ++i) {
445 p *= cos(x[i]/sqrt(i+1.0));
446 }
447 return fx - p + 1.0;
448 }
449 };
450 }
451
testDifferentialEvolution()452 void OptimizersTest::testDifferentialEvolution() {
453 BOOST_TEST_MESSAGE("Testing differential evolution...");
454
455 /* Note:
456 *
457 * The "ModFourthDeJong" doesn't have a well defined optimum because
458 * of its noisy part. It just has to be <= 15 in our example.
459 * The concrete value might differ for a different input and
460 * different random numbers.
461 *
462 * The "Griewangk" function is an example where the adaptive
463 * version of DifferentialEvolution turns out to be more successful.
464 */
465
466 DifferentialEvolution::Configuration conf =
467 DifferentialEvolution::Configuration()
468 .withStepsizeWeight(0.4)
469 .withBounds()
470 .withCrossoverProbability(0.35)
471 .withPopulationMembers(500)
472 .withStrategy(DifferentialEvolution::BestMemberWithJitter)
473 .withCrossoverType(DifferentialEvolution::Normal)
474 .withAdaptiveCrossover()
475 .withSeed(3242);
476 DifferentialEvolution deOptim(conf);
477
478 DifferentialEvolution::Configuration conf2 =
479 DifferentialEvolution::Configuration()
480 .withStepsizeWeight(1.8)
481 .withBounds()
482 .withCrossoverProbability(0.9)
483 .withPopulationMembers(1000)
484 .withStrategy(DifferentialEvolution::Rand1SelfadaptiveWithRotation)
485 .withCrossoverType(DifferentialEvolution::Normal)
486 .withAdaptiveCrossover()
487 .withSeed(3242);
488 DifferentialEvolution deOptim2(conf2);
489
490 std::vector<DifferentialEvolution > diffEvolOptimisers;
491 diffEvolOptimisers.push_back(deOptim);
492 diffEvolOptimisers.push_back(deOptim);
493 diffEvolOptimisers.push_back(deOptim);
494 diffEvolOptimisers.push_back(deOptim);
495 diffEvolOptimisers.push_back(deOptim2);
496
497 std::vector<ext::shared_ptr<CostFunction> > costFunctions;
498 costFunctions.push_back(ext::shared_ptr<CostFunction>(new FirstDeJong));
499 costFunctions.push_back(ext::shared_ptr<CostFunction>(new SecondDeJong));
500 costFunctions.push_back(ext::shared_ptr<CostFunction>(new ModThirdDeJong));
501 costFunctions.push_back(ext::shared_ptr<CostFunction>(new ModFourthDeJong));
502 costFunctions.push_back(ext::shared_ptr<CostFunction>(new Griewangk));
503
504 std::vector<BoundaryConstraint> constraints;
505 constraints.push_back(BoundaryConstraint(-10.0, 10.0));
506 constraints.push_back(BoundaryConstraint(-10.0, 10.0));
507 constraints.push_back(BoundaryConstraint(-10.0, 10.0));
508 constraints.push_back(BoundaryConstraint(-10.0, 10.0));
509 constraints.push_back(BoundaryConstraint(-600.0, 600.0));
510
511 std::vector<Array> initialValues;
512 initialValues.push_back(Array(3, 5.0));
513 initialValues.push_back(Array(2, 5.0));
514 initialValues.push_back(Array(5, 5.0));
515 initialValues.push_back(Array(30, 5.0));
516 initialValues.push_back(Array(10, 100.0));
517
518 std::vector<EndCriteria> endCriteria;
519 endCriteria.push_back(EndCriteria(100, 10, 1e-10, 1e-8, Null<Real>()));
520 endCriteria.push_back(EndCriteria(100, 10, 1e-10, 1e-8, Null<Real>()));
521 endCriteria.push_back(EndCriteria(100, 10, 1e-10, 1e-8, Null<Real>()));
522 endCriteria.push_back(EndCriteria(500, 100, 1e-10, 1e-8, Null<Real>()));
523 endCriteria.push_back(EndCriteria(1000, 800, 1e-12, 1e-10, Null<Real>()));
524
525 std::vector<Real> minima;
526 minima.push_back(0.0);
527 minima.push_back(0.0);
528 minima.push_back(0.0);
529 minima.push_back(10.9639796558);
530 minima.push_back(0.0);
531
532 for (Size i = 0; i < costFunctions.size(); ++i) {
533 Problem problem(*costFunctions[i], constraints[i], initialValues[i]);
534 diffEvolOptimisers[i].minimize(problem, endCriteria[i]);
535
536 if (i != 3) {
537 // stable
538 if (std::fabs(problem.functionValue() - minima[i]) > 1e-8) {
539 BOOST_ERROR("costFunction # " << i
540 << "\ncalculated: " << problem.functionValue()
541 << "\nexpected: " << minima[i]);
542 }
543 } else {
544 // this case is unstable due to randomness; we're good as
545 // long as the result is below 15
546 if (problem.functionValue() > 15) {
547 BOOST_ERROR("costFunction # " << i
548 << "\ncalculated: " << problem.functionValue()
549 << "\nexpected: " << "less than 15");
550 }
551 }
552 }
553 }
554
suite(SpeedLevel speed)555 test_suite* OptimizersTest::suite(SpeedLevel speed) {
556 test_suite* suite = BOOST_TEST_SUITE("Optimizers tests");
557
558 suite->add(QUANTLIB_TEST_CASE(&OptimizersTest::test));
559 suite->add(QUANTLIB_TEST_CASE(&OptimizersTest::nestedOptimizationTest));
560
561 if (speed <= Fast) {
562 suite->add(QUANTLIB_TEST_CASE(
563 &OptimizersTest::testDifferentialEvolution));
564 }
565
566 return suite;
567 }
568
569