1 /* 2 * Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin 3 * and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn, 4 * and Daniel Pemstein. All Rights Reserved. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify under the terms of the GNU General Public License as 8 * published by Free Software Foundation; either version 2 of the 9 * License, or (at your option) any later version. See the text files 10 * COPYING and LICENSE, distributed with this source code, for further 11 * information. 12 * -------------------------------------------------------------------- 13 * scythestat/optimize.h 14 * 15 */ 16 17 /*! 18 * \file optimize.h 19 * \brief Definitions of functions for doing numerical optimization 20 * and related operations. 21 * 22 * This file contains a number of functions that are useful for 23 * numerical optimization and maximum likelihood estimation. In 24 * addition, it contains some basic facilities for evaluating definite 25 * integrals. 26 * 27 * As is the case across Scythe, we provide both general and default 28 * template definitions for the functions in this file that return 29 * Matrix objects. The general definitions allow the user to 30 * customize the matrix_order and matrix_style of the returned Matrix, 31 * while the default versions return concrete matrices of the same 32 * matrix_order as the first (or only) Matrix argument to the 33 * function. In cases where we supply these two types of definitions, 34 * we explicitly document only the general version, although the 35 * default definition will typically appear in the function list 36 * below. 37 * 38 * \note 39 * Doxygen has some difficulty dealing with overloaded templates. 40 * Under certain circumstances it does not correctly process the 41 * definitions of default templates. In these cases, the definition 42 * for the default template will not even appear in the function list. 43 * We provide default templates for all of the Matrix-returning 44 * functions in this file. 45 * 46 */ 47 48 #ifndef SCYTHE_OPTIMIZE_H 49 #define SCYTHE_OPTIMIZE_H 50 51 #ifdef SCYTHE_COMPILE_DIRECT 52 #include "matrix.h" 53 #include "algorithm.h" 54 #include "error.h" 55 #include "rng.h" 56 #include "distributions.h" 57 #include "la.h" 58 #include "ide.h" 59 #include "smath.h" 60 #include "stat.h" 61 #else 62 #include "scythestat/matrix.h" 63 #include "scythestat/algorithm.h" 64 #include "scythestat/error.h" 65 #include "scythestat/rng.h" 66 #include "scythestat/distributions.h" 67 #include "scythestat/la.h" 68 #include "scythestat/ide.h" 69 #include "scythestat/smath.h" 70 #include "scythestat/stat.h" 71 #endif 72 73 /* We want to use an anonymous namespace to make the following consts 74 * and functions local to this file, but mingw doesn't play nice with 75 * anonymous namespaces so we do things differently when using the 76 * cross-compiler. 77 */ 78 #ifdef __MINGW32__ 79 #define SCYTHE_MINGW32_STATIC static 80 #else 81 #define SCYTHE_MINGW32_STATIC 82 #endif 83 84 namespace scythe { 85 #ifndef __MINGW32__ 86 namespace { 87 #endif 88 89 /* Functions (private to this file) that do very little... */ 90 template <typename T, matrix_order O, matrix_style S> donothing(const Matrix<T,O,S> & x)91 SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x) 92 { 93 return (T) 0.0; 94 } 95 96 template <typename T> donothing(T & x)97 SCYTHE_MINGW32_STATIC T donothing (T& x) 98 { 99 return (T) 0.0; 100 } 101 #ifndef __MINGW32__ 102 } 103 #endif 104 105 106 /* Return the machine epsilon 107 * Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms 108 * in C++. Addison Wesley. pg. 561 109 */ 110 /*! \brief Compute the machine epsilon. 111 * 112 * The epsilon function returns the machine epsilon: the smallest 113 * number that, when summed with 1, produces a value greater than 114 * one. 115 */ 116 template <typename T> 117 T epsilon()118 epsilon() 119 { 120 T eps, del, neweps; 121 del = (T) 0.5; 122 eps = (T) 0.0; 123 neweps = (T) 1.0; 124 125 while ( del > 0 ) { 126 if ( 1 + neweps > 1 ) { /* Then the value might be too large */ 127 eps = neweps; /* ...save the current value... */ 128 neweps -= del; /* ...and decrement a bit */ 129 } else { /* Then the value is too small */ 130 neweps += del; /* ...so increment it */ 131 } 132 del *= 0.5; /* Reduce the adjustment by half */ 133 } 134 135 return eps; 136 } 137 138 /*! \brief Calculate the definite integral of a function from a to b. 139 * 140 * This function calculates the definite integral of a univariate 141 * function on the interval \f$[a,b]\f$. 142 * 143 * \param fun The function (or functor) whose definite integral is 144 * to be calculated. This function should both take and return a 145 * single argument of type T. 146 * \param a The starting value of the interval. 147 * \param b The ending value of the interval. 148 * \param N The number of subintervals to calculate. Increasing 149 * this number will improve the accuracy of the estimate but will 150 * also increase run-time. 151 * 152 * \throw scythe_invalid_arg (Level 1) 153 * 154 * \see adaptsimp(FUNCTOR fun, T a, T b, unsigned int& N, double tol = 1e-5) 155 * \note 156 * Users will typically wish to implement \a fun in terms of a 157 * functor. Using a functor provides a generic way in which to 158 * evaluate functions with more than one parameter. Furthermore, 159 * although one can pass a function pointer to this routine, 160 * the compiler cannot inline and fully optimize code 161 * referenced by function pointers. 162 */ 163 template <typename T, typename FUNCTOR> intsimp(FUNCTOR fun,T a,T b,unsigned int N)164 T intsimp (FUNCTOR fun, T a, T b, unsigned int N) 165 { 166 SCYTHE_CHECK_10(a > b, scythe_invalid_arg, 167 "Lower limit larger than upper"); 168 169 T I = (T) 0; 170 T w = (b - a) / N; 171 for (unsigned int i = 1; i <= N; ++i) 172 I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) + 173 fun(a + i * w)) / 6; 174 175 return I; 176 } 177 178 /*! \brief Calculate the definite integral of a function from a to b. 179 * 180 * This function calculates the definite integral of a univariate 181 * function on the interval \f$[a,b]\f$. 182 * 183 * \param fun The function (or functor) whose definite integral is 184 * to be calculated. This function should both take and return a 185 * single argument of type T. 186 * \param a The starting value of the interval. 187 * \param b The ending value of the interval. 188 * \param N The number of subintervals to calculate. Increasing 189 * this number will improve the accuracy of the estimate but will 190 * also increase run-time. 191 * \param tol The accuracy required. Both accuracy and run-time 192 * decrease as this number increases. 193 * 194 * \throw scythe_invalid_arg (Level 1) 195 * 196 * \see intsimp(FUNCTOR fun, T a, T b, unsigned int& N) 197 * 198 * \note 199 * Users will typically wish to implement \a fun in terms of a 200 * functor. Using a functor provides a generic way in which to 201 * evaluate functions with more than one parameter. Furthermore, 202 * although one can pass a function pointer to this routine, 203 * the compiler cannot inline and fully optimize code 204 * referenced by function pointers. 205 */ 206 template <typename T, typename FUNCTOR> 207 T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5) 208 { 209 SCYTHE_CHECK_10(a > b, scythe_invalid_arg, 210 "Lower limit larger than upper"); 211 212 T I = intsimp(fun, a, b, N); 213 if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol) 214 return adaptsimp(fun, a, (a + b) / 2, N, tol) 215 + adaptsimp(fun, (a + b) / 2, b, N, tol); 216 217 return I; 218 } 219 220 /*! \brief Calculate gradient of a function using a forward 221 * difference formula. 222 * 223 * This function numerically calculates the gradient of a 224 * vector-valued function at \a theta using a forward difference 225 * formula. 226 * 227 * \param fun The function to calculate the gradient of. This 228 * function should both take and return a single Matrix (vector) of 229 * type T. 230 * \param theta The column vector of values at which to calculate 231 * the gradient of the function. 232 * 233 * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 234 * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 235 * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 236 * 237 * \throw scythe_dimension_error (Level 1) 238 * 239 * \note 240 * Users will typically wish to implement \a fun in terms of a 241 * functor. Using a functor provides a generic way in which to 242 * evaluate functions with more than one parameter. Furthermore, 243 * although one can pass a function pointer to this routine, 244 * the compiler cannot inline and fully optimize code 245 * referenced by function pointers. 246 */ 247 template <matrix_order RO, matrix_style RS, typename T, 248 matrix_order PO, matrix_style PS, typename FUNCTOR> 249 Matrix<T, RO, RS> gradfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)250 gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 251 252 { 253 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 254 "Theta not column vector"); 255 256 unsigned int k = theta.size(); 257 T h = std::sqrt(epsilon<T>()); 258 h = std::sqrt(h); 259 260 Matrix<T,RO,RS> grad(k, 1); 261 Matrix<T,RO> e; 262 Matrix<T,RO> temp; 263 for (unsigned int i = 0; i < k; ++i) { 264 e = Matrix<T,RO>(k, 1); 265 e[i] = h; 266 temp = theta + e; 267 donothing(temp); // XXX I don't understand this 268 e = temp - theta; 269 grad[i] = (fun(theta + e) - fun(theta)) / e[i]; 270 } 271 272 return grad; 273 } 274 275 // Default template version 276 template <typename T, matrix_order O, matrix_style S, 277 typename FUNCTOR> 278 Matrix<T, O, Concrete> gradfdif(FUNCTOR fun,const Matrix<T,O,S> & theta)279 gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta) 280 { 281 return gradfdif<O,Concrete>(fun, theta); 282 } 283 284 /*! \brief Calculate the first derivative of the function using 285 * a forward difference formula. 286 * 287 * This function numerically calculates the first derivative of a 288 * function with respect to \a alpha at \f$theta + alpha \cdot p\f$ 289 * using a forward difference formula. This function is primarily 290 * useful for linesearches. 291 * 292 * \param fun The function to calculate the first derivative of. 293 * This function should take a single Matrix<T> argument and return 294 * a value of type T. 295 * \param alpha Double the step length. 296 * \param theta A Matrix (vector) of parameter values at which to 297 * calculate the gradient. 298 * \param p A direction vector. 299 * 300 * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 301 * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 302 * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 303 * 304 * \throw scythe_dimension_error (Level 1) 305 * 306 * \note 307 * Users will typically wish to implement \a fun in terms of a 308 * functor. Using a functor provides a generic way in which to 309 * evaluate functions with more than one parameter. Furthermore, 310 * although one can pass a function pointer to this routine, 311 * the compiler cannot inline and fully optimize code 312 * referenced by function pointers. 313 */ 314 template <typename T, matrix_order PO1, matrix_style PS1, 315 matrix_order PO2, matrix_style PS2, typename FUNCTOR> 316 T gradfdifls(FUNCTOR fun,T alpha,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)317 gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, 318 const Matrix<T,PO2,PS2>& p) 319 320 { 321 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 322 "Theta not column vector"); 323 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 324 "p not column vector"); 325 326 unsigned int k = theta.size(); 327 T h = std::sqrt(epsilon<T>()); 328 h = std::sqrt(h); 329 //T h = std::sqrt(2.2e-16); 330 331 T deriv; 332 333 for (unsigned int i = 0; i < k; ++i) { 334 T temp = alpha + h; 335 donothing(temp); 336 T e = temp - alpha; 337 deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p)) 338 / e; 339 } 340 341 return deriv; 342 } 343 344 /*! \brief Calculate the Jacobian of a function using a forward 345 * difference formula. 346 * 347 * This function numerically calculates the Jacobian of a 348 * vector-valued function using a forward difference formula. 349 * 350 * \param fun The function to calculate the Jacobian of. This 351 * function should both take and return a Matrix (vector) of type 352 * T. 353 * \param theta The column vector of parameter values at which to 354 * take the Jacobian of \a fun. 355 * 356 * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 357 * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 358 * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 359 * 360 * \throw scythe_dimension_error (Level 1) 361 * 362 * \note 363 * Users will typically wish to implement \a fun in terms of a 364 * functor. Using a functor provides a generic way in which to 365 * evaluate functions with more than one parameter. Furthermore, 366 * although one can pass a function pointer to this routine, 367 * the compiler cannot inline and fully optimize code 368 * referenced by function pointers. 369 */ 370 template <matrix_order RO, matrix_style RS, typename T, 371 matrix_order PO, matrix_style PS, typename FUNCTOR> 372 Matrix<T,RO,RS> jacfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)373 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 374 { 375 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 376 "Theta not column vector"); 377 378 Matrix<T,RO> fval = fun(theta); 379 unsigned int k = theta.rows(); 380 unsigned int n = fval.rows(); 381 382 T h = std::sqrt(epsilon<T>()); //2.2e-16 383 h = std::sqrt(h); 384 385 Matrix<T,RO,RS> J(n,k); 386 Matrix<T,RO> e; 387 Matrix<T,RO> temp; 388 Matrix<T,RO> fthetae; 389 Matrix<T,RO> ftheta; 390 391 for (int i = 0; i < k; ++i) { 392 e = Matrix<T,RO>(k,1); 393 e[i] = h; 394 temp = theta + e; 395 donothing(temp); /// XXX ?? 396 e = temp - theta; 397 fthetae = fun(theta + e); 398 ftheta = fun(theta); 399 for (unsigned int j = 0; j < n; ++j) { 400 J(j,i) = (fthetae[j] - ftheta[j]) / e[i]; 401 } 402 } 403 404 return J; 405 } 406 407 // default template 408 template <typename T, matrix_order PO, matrix_style PS, 409 typename FUNCTOR> 410 Matrix<T,PO,PS> jacfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)411 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 412 { 413 return jacfdif<PO,Concrete>(fun, theta); 414 } 415 416 417 /*! \brief Calculate the Hessian of a function using a central 418 * difference formula. 419 * 420 * This function numerically calculates the Hessian of a 421 * vector-valued function using a central difference formula. 422 * 423 * \param fun The function to calculate the Hessian of. This 424 * function should take a Matrix (vector) of type T and return a 425 * single value of type T. 426 * \param theta The column vector of parameter values at which to 427 * calculate the Hessian. 428 * 429 * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 430 * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 431 * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta) 432 * 433 * \throw scythe_dimension_error 434 * 435 * \note 436 * Users will typically wish to implement \a fun in terms of a 437 * functor. Using a functor provides a generic way in which to 438 * evaluate functions with more than one parameter. Furthermore, 439 * although one can pass a function pointer to this routine, 440 * the compiler cannot inline and fully optimize code 441 * referenced by function pointers. 442 */ 443 template <matrix_order RO, matrix_style RS, typename T, 444 matrix_order PO, matrix_style PS, typename FUNCTOR> 445 Matrix<T, RO, RS> hesscdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)446 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 447 { 448 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 449 "Theta not column vector"); 450 451 T fval = fun(theta); 452 453 //std::cout << std::endl; 454 //std::cout << "hesscdif theta = " << theta << "\n"; 455 //std::cout << "hesscdif fun(theta) = " << fval << std::endl; 456 457 unsigned int k = theta.rows(); 458 459 // stepsize CAREFUL -- THIS IS MACHINE SPECIFIC !!!! 460 T h2 = std::sqrt(epsilon<T>()); 461 //T h2 = (T) 1e-10; 462 T h = std::sqrt(h2); 463 464 Matrix<T, RO, RS> H(k,k); 465 466 //std::cout << "h2 = " << h2 << " h = " << h << std::endl; 467 468 Matrix<T,RO> ei; 469 Matrix<T,RO> ej; 470 Matrix<T,RO> temp; 471 472 for (unsigned int i = 0; i < k; ++i) { 473 ei = Matrix<T,RO>(k, 1); 474 ei[i] = h; 475 temp = theta + ei; 476 donothing(temp); // XXX Again, I'm baffled 477 ei = temp - theta; 478 for (unsigned int j = 0; j < k; ++j){ 479 ej = Matrix<T,RO>(k,1); 480 ej[j] = h; 481 temp = theta + ej; 482 donothing(temp); // XXX and again 483 ej = temp - theta; 484 485 if (i == j) { 486 H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 * 487 fun(theta + ei) - 30.0 * fval + 16.0 * 488 fun(theta - ei) - 489 fun(theta - 2.0 * ei)) / (12.0 * h2); 490 } else { 491 H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej) 492 - fun(theta - ei + ej) + fun(theta - ei - ej)) 493 / (4.0 * h2); 494 } 495 } 496 } 497 498 //std::cout << "end of hesscdif, H = " << H << "\n"; 499 return H; 500 } 501 502 // default template 503 template <typename T, matrix_order PO, matrix_style PS, 504 typename FUNCTOR> 505 Matrix<T,PO,PS> hesscdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)506 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta) 507 { 508 return hesscdif<PO,Concrete>(fun, theta); 509 } 510 511 /*! \brief Find the step length that minimizes an implied 1-dimensional function. 512 * 513 * This function performs a line search to find the step length 514 * that approximately minimizes an implied one dimensional 515 * function. 516 * 517 * \param fun The function to minimize. This function should take 518 * one Matrix (vector) argument of type T and return a single value 519 * of type T. 520 * \param theta A column vector of parameter values that anchor the 521 * 1-dimensional function. 522 * \param p A direction vector that creates the 1-dimensional 523 * function. 524 * 525 * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif) 526 * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 527 * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false) 528 * 529 * \throw scythe_dimension_error (Level 1) 530 * 531 * \note 532 * Users will typically wish to implement \a fun in terms of a 533 * functor. Using a functor provides a generic way in which to 534 * evaluate functions with more than one parameter. Furthermore, 535 * although one can pass a function pointer to this routine, 536 * the compiler cannot inline and fully optimize code 537 * referenced by function pointers. 538 */ 539 template <typename T, matrix_order PO1, matrix_style PS1, 540 matrix_order PO2, matrix_style PS2, typename FUNCTOR> linesearch1(FUNCTOR fun,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)541 T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, 542 const Matrix<T,PO2,PS2>& p) 543 { 544 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 545 "Theta not column vector"); 546 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 547 "p not column vector"); 548 549 T alpha_bar = (T) 1.0; 550 T rho = (T) 0.9; 551 T c = (T) 0.5; 552 T alpha = alpha_bar; 553 Matrix<T,PO1> fgrad = gradfdif(fun, theta); 554 555 while (fun(theta + alpha * p) > 556 (fun(theta) + c * alpha * t(fgrad) * p)[0]) { 557 alpha = rho * alpha; 558 } 559 560 return alpha; 561 } 562 563 /*! \brief Find the step length that minimizes an implied 1-dimensional function. 564 * 565 * This function performs a line search to find the step length 566 * that approximately minimizes an implied one dimensional 567 * function. 568 * 569 * \param fun The function to minimize. This function should take 570 * one Matrix (vector) argument of type T and return a single value 571 * of type T. 572 * \param theta A column vector of parameter values that anchor the 573 * 1-dimensional function. 574 * \param p A direction vector that creates the 1-dimensional 575 * function. 576 * \param runif A random uniform number generator function object 577 * (an object that returns a random uniform variate on (0,1) when 578 * its () operator is invoked). 579 * 580 * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 581 * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 582 * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false) 583 * 584 * \throw scythe_dimension_error (Level 1) 585 * 586 * \note 587 * Users will typically wish to implement \a fun in terms of a 588 * functor. Using a functor provides a generic way in which to 589 * evaluate functions with more than one parameter. Furthermore, 590 * although one can pass a function pointer to this routine, 591 * the compiler cannot inline and fully optimize code 592 * referenced by function pointers. 593 */ 594 template <typename T, matrix_order PO1, matrix_style PS1, 595 matrix_order PO2, matrix_style PS2, typename FUNCTOR, 596 typename RNGTYPE> linesearch2(FUNCTOR fun,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p,rng<RNGTYPE> & runif)597 T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, 598 const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif) 599 { 600 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 601 "Theta not column vector"); 602 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 603 "p not column vector"); 604 605 T alpha_last = (T) 0.0; 606 T alpha_cur = (T) 1.0; 607 T alpha_max = (T) 10.0; 608 T c1 = (T) 1e-4; 609 T c2 = (T) 0.5; 610 unsigned int max_iter = 50; 611 T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p); 612 613 for (unsigned int i = 0; i < max_iter; ++i) { 614 T phi_cur = fun(theta + alpha_cur * p); 615 T phi_last = fun(theta + alpha_last * p); 616 617 if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0)) 618 || ((phi_cur >= phi_last) && (i > 0))) { 619 T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p); 620 return alphastar; 621 } 622 623 T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p); 624 if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0) 625 return alpha_cur; 626 627 if ( fgradalpha_cur >= (T) 0.0) { 628 T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p); 629 return alphastar; 630 } 631 632 alpha_last = alpha_cur; 633 // runif stuff below is probably not correc KQ 12/08/2006 634 // I think it should work now DBP 01/02/2007 635 alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur; 636 } 637 638 return 0.001; 639 } 640 641 /*! \brief Find minimum of a function once bracketed. 642 * 643 * This function finds the minimum of a function, once bracketed. 644 * 645 * \param fun The function to minimize. This function should take 646 * one Matrix (vector) argument of type T and return a single value 647 * of type T. 648 * \param alpha_lo The lower bracket. 649 * \param alpha_hi The upper bracket. 650 * \param theta A column vector of parameter values that anchor the 651 * 1-dimensional function. 652 * \param p A direction vector that creates the 1-dimensional 653 * 654 * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 655 * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif) 656 * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false) 657 * 658 * \throw scythe_dimension_error (Level 1) 659 * 660 * \note 661 * Users will typically wish to implement \a fun in terms of a 662 * functor. Using a functor provides a generic way in which to 663 * evaluate functions with more than one parameter. Furthermore, 664 * although one can pass a function pointer to this routine, 665 * the compiler cannot inline and fully optimize code 666 * referenced by function pointers. 667 * 668 */ 669 template <typename T, matrix_order PO1, matrix_style PS1, 670 matrix_order PO2, matrix_style PS2, typename FUNCTOR> zoom(FUNCTOR fun,T alpha_lo,T alpha_hi,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)671 T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi, 672 const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 673 { 674 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 675 "Theta not column vector"); 676 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error, 677 "p not column vector"); 678 679 T alpha_j = (alpha_lo + alpha_hi) / 2.0; 680 T phi_0 = fun(theta); 681 T c1 = (T) 1e-4; 682 T c2 = (T) 0.5; 683 T fgrad0 = gradfdifls(fun, (T) 0, theta, p); 684 685 unsigned int count = 0; 686 unsigned int maxit = 20; 687 while(count < maxit) { 688 T phi_j = fun(theta + alpha_j * p); 689 T phi_lo = fun(theta + alpha_lo * p); 690 691 if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0)) 692 || (phi_j >= phi_lo)){ 693 alpha_hi = alpha_j; 694 } else { 695 T fgradj = gradfdifls(fun, alpha_j, theta, p); 696 if (std::fabs(fgradj) <= -1 * c2 * fgrad0){ 697 return alpha_j; 698 } 699 if ( fgradj * (alpha_hi - alpha_lo) >= 0){ 700 alpha_hi = alpha_lo; 701 } 702 alpha_lo = alpha_j; 703 } 704 ++count; 705 } 706 707 return alpha_j; 708 } 709 710 711 /*! \brief Find function minimum using the BFGS algorithm. 712 * 713 * Numerically find the minimum of a function using the BFGS 714 * algorithm. 715 * 716 * \param fun The function to minimize. This function should take 717 * one Matrix (vector) argument of type T and return a single value 718 * of type T. 719 * \param theta A column vector of parameter values that anchor the 720 * 1-dimensional function. 721 * \param runif A random uniform number generator function object 722 * (an object that returns a random uniform variate on (0,1) when 723 * its () operator is invoked). 724 * \param maxit The maximum number of iterations. 725 * \param tolerance The convergence tolerance. 726 * \param trace Boolean value determining whether BFGS should print 727 * to stdout (defaults to false). 728 * 729 * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 730 * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif) 731 * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p) 732 * 733 * \throw scythe_dimension_error (Level 1) 734 * \throw scythe_convergence_error (Level 0) 735 * 736 * \note 737 * Users will typically wish to implement \a fun in terms of a 738 * functor. Using a functor provides a generic way in which to 739 * evaluate functions with more than one parameter. Furthermore, 740 * although one can pass a function pointer to this routine, 741 * the compiler cannot inline and fully optimize code 742 * referenced by function pointers. 743 */ 744 // there were 2 versions of linesearch1-- the latter was what we 745 // had been calling linesearch2 746 template <matrix_order RO, matrix_style RS, typename T, 747 matrix_order PO, matrix_style PS, 748 typename FUNCTOR, typename RNGTYPE> 749 Matrix<T,RO,RS> 750 BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, 751 unsigned int maxit, T tolerance, bool trace = false) 752 { 753 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 754 "Theta not column vector"); 755 756 unsigned int n = theta.size(); 757 758 // H is initial inverse hessian 759 Matrix<T,RO> H = inv(hesscdif(fun, theta)); 760 761 // gradient at starting values 762 Matrix<T,RO> fgrad = gradfdif(fun, theta); 763 Matrix<T,RO> thetamin = theta; 764 Matrix<T,RO> fgrad_new = fgrad; 765 Matrix<T,RO> I = eye<T,RO>(n); 766 Matrix<T,RO> s; 767 Matrix<T,RO> y; 768 769 unsigned int count = 0; 770 while( (t(fgrad_new)*fgrad_new)[0] > tolerance) { 771 Matrix<T> p = -1.0 * H * fgrad; 772 //std::cout << "initial H * fgrad = " << H * fgrad << "\n"; 773 //std::cout << "initial p = " << p << "\n"; 774 775 T alpha = linesearch2(fun, thetamin, p, runif); 776 //T alpha = linesearch1(fun, thetamin, p); 777 778 //std::cout << "after linesearch p = " << p << "\n"; 779 780 781 Matrix<T> thetamin_new = thetamin + alpha * p; 782 fgrad_new = gradfdif(fun, thetamin_new); 783 s = thetamin_new - thetamin; 784 y = fgrad_new - fgrad; 785 T rho = 1.0 / (t(y) * s)[0]; 786 H = (I - rho * s * t(y)) * H *(I - rho * y * t(s)) 787 + rho * s * t(s); 788 789 thetamin = thetamin_new; 790 fgrad = fgrad_new; 791 ++count; 792 793 #ifndef SCYTHE_RPACK 794 if (trace) { 795 std::cout << "BFGS iteration = " << count << std::endl; 796 std::cout << "thetamin = " << (t(thetamin)) ; 797 std::cout << "gradient = " << (t(fgrad)) ; 798 std::cout << "t(gradient) * gradient = " << (t(fgrad) * fgrad) ; 799 std::cout << "function value = " << fun(thetamin) << 800 std::endl << std::endl; 801 } 802 #endif 803 //std::cout << "Hessian = " << hesscdif(fun, theta) << "\n"; 804 //std::cout << "H = " << H << "\n"; 805 //std::cout << "alpha = " << alpha << std::endl; 806 //std::cout << "p = " << p << "\n"; 807 //std::cout << "-1 * H * fgrad = " << -1.0 * H * fgrad << "\n"; 808 809 SCYTHE_CHECK(count > maxit, scythe_convergence_error, 810 "Failed to converge. Try better starting values"); 811 } 812 813 return thetamin; 814 } 815 816 // Default template 817 template <typename T, matrix_order O, matrix_style S, 818 typename FUNCTOR, typename RNGTYPE> 819 Matrix<T,O,Concrete> 820 BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif, 821 unsigned int maxit, T tolerance, bool trace = false) 822 { 823 return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace); 824 } 825 826 827 /* Solves a system of n nonlinear equations in n unknowns of the form 828 * fun(thetastar) = 0 for thetastar given the function, starting 829 * value theta, max number of iterations, and tolerance. 830 * Uses Broyden's method. 831 */ 832 /*! \brief Solve a system of nonlinear equations. 833 * 834 * Solves a system of n nonlinear equations in n unknowns of the form 835 * \f$fun(\theta^*) = 0\f$ for \f$\theta^*\f$. 836 * 837 * \param fun The function to solve. The function should both take 838 * and return a Matrix of type T. 839 * \param theta A column vector of parameter values at which to 840 * start the solve procedure. 841 * \param maxit The maximum number of iterations. 842 * \param tolerance The convergence tolerance. 843 * 844 * \throw scythe_dimension_error (Level 1) 845 * \throw scythe_convergence_error (Level 1) 846 * 847 * \note 848 * Users will typically wish to implement \a fun in terms of a 849 * functor. Using a functor provides a generic way in which to 850 * evaluate functions with more than one parameter. Furthermore, 851 * although one can pass a function pointer to this routine, 852 * the compiler cannot inline and fully optimize code 853 * referenced by function pointers. 854 */ 855 template <matrix_order RO, matrix_style RS, typename T, 856 matrix_order PO, matrix_style PS, typename FUNCTOR> 857 Matrix<T,RO,RS> 858 nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta, 859 unsigned int maxit = 5000, T tolerance = 1e-6) 860 { 861 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error, 862 "Theta not column vector"); 863 864 865 Matrix<T,RO> thetastar = theta; 866 Matrix<T,RO> B = jacfdif(fun, thetastar); 867 868 Matrix<T,RO> fthetastar; 869 Matrix<T,RO> p; 870 Matrix<T,RO> thetastar_new; 871 Matrix<T,RO> fthetastar_new; 872 Matrix<T,RO> s; 873 Matrix<T,RO> y; 874 875 for (unsigned int i = 0; i < maxit; ++i) { 876 fthetastar = fun(thetastar); 877 p = lu_solve(B, -1 * fthetastar); 878 T alpha = (T) 1.0; 879 thetastar_new = thetastar + alpha*p; 880 fthetastar_new = fun(thetastar_new); 881 s = thetastar_new - thetastar; 882 y = fthetastar_new - fthetastar; 883 B = B + ((y - B * s) * t(s)) / (t(s) * s); 884 thetastar = thetastar_new; 885 if (max(fabs(fthetastar_new)) < tolerance) 886 return thetastar; 887 } 888 889 SCYTHE_THROW_10(scythe_convergence_error, "Failed to converge. Try better starting values or increase maxit"); 890 891 return thetastar; 892 } 893 894 // default template 895 template <typename T, matrix_order O, matrix_style S, 896 typename FUNCTOR> 897 Matrix<T,O,Concrete> 898 nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta, 899 unsigned int maxit = 5000, T tolerance = 1e-6) 900 { 901 return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance); 902 } 903 904 } // namespace scythe 905 906 #endif /* SCYTHE_OPTIMIZE_H */ 907