1 /*
2  * Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin
3  * and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn,
4  * and Daniel Pemstein.  All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify under the terms of the GNU General Public License as
8  * published by Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.  See the text files
10  * COPYING and LICENSE, distributed with this source code, for further
11  * information.
12  * --------------------------------------------------------------------
13  *  scythestat/optimize.h
14  *
15  */
16 
17 /*!
18  * \file optimize.h
19  * \brief Definitions of functions for doing numerical optimization
20  * and related operations.
21  *
22  * This file contains a number of functions that are useful for
23  * numerical optimization and maximum likelihood estimation.  In
24  * addition, it contains some basic facilities for evaluating definite
25  * integrals.
26  *
27  * As is the case across Scythe, we provide both general and default
28  * template definitions for the functions in this file that return
29  * Matrix objects.  The general definitions allow the user to
30  * customize the matrix_order and matrix_style of the returned Matrix,
31  * while the default versions return concrete matrices of the same
32  * matrix_order as the first (or only) Matrix argument to the
33  * function.  In cases where we supply these two types of definitions,
34  * we explicitly document only the general version, although the
35  * default definition will typically appear in the function list
36  * below.
37  *
38  * \note
39  * Doxygen has some difficulty dealing with overloaded templates.
40  * Under certain circumstances it does not correctly process the
41  * definitions of default templates.  In these cases, the definition
42  * for the default template will not even appear in the function list.
43  * We provide default templates for all of the Matrix-returning
44  * functions in this file.
45  *
46  */
47 
48 #ifndef SCYTHE_OPTIMIZE_H
49 #define SCYTHE_OPTIMIZE_H
50 
51 #ifdef SCYTHE_COMPILE_DIRECT
52 #include "matrix.h"
53 #include "algorithm.h"
54 #include "error.h"
55 #include "rng.h"
56 #include "distributions.h"
57 #include "la.h"
58 #include "ide.h"
59 #include "smath.h"
60 #include "stat.h"
61 #else
62 #include "scythestat/matrix.h"
63 #include "scythestat/algorithm.h"
64 #include "scythestat/error.h"
65 #include "scythestat/rng.h"
66 #include "scythestat/distributions.h"
67 #include "scythestat/la.h"
68 #include "scythestat/ide.h"
69 #include "scythestat/smath.h"
70 #include "scythestat/stat.h"
71 #endif
72 
73 /* We want to use an anonymous namespace to make the following consts
74  * and functions local to this file, but mingw doesn't play nice with
75  * anonymous namespaces so we do things differently when using the
76  * cross-compiler.
77  */
78 #ifdef __MINGW32__
79 #define SCYTHE_MINGW32_STATIC static
80 #else
81 #define SCYTHE_MINGW32_STATIC
82 #endif
83 
84 namespace scythe {
85 #ifndef __MINGW32__
86   namespace {
87 #endif
88 
89     /* Functions (private to this file) that do very little... */
90     template <typename T, matrix_order O, matrix_style S>
donothing(const Matrix<T,O,S> & x)91     SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x)
92     {
93       return (T) 0.0;
94     }
95 
96     template <typename T>
donothing(T & x)97     SCYTHE_MINGW32_STATIC T donothing (T& x)
98     {
99       return (T) 0.0;
100     }
101 #ifndef __MINGW32__
102   }
103 #endif
104 
105 
106   /* Return the machine epsilon
107    * Notes: Algorithm taken from Sedgewick, Robert. 1992. Algorithms
108    * in C++. Addison Wesley. pg. 561
109    */
110    /*! \brief Compute the machine epsilon.
111     *
112     * The epsilon function returns the machine epsilon: the smallest
113     * number that, when summed with 1, produces a value greater than
114     * one.
115     */
116   template <typename T>
117   T
epsilon()118   epsilon()
119   {
120     T eps, del, neweps;
121     del    = (T) 0.5;
122     eps    = (T) 0.0;
123     neweps = (T) 1.0;
124 
125     while ( del > 0 ) {
126       if ( 1 + neweps > 1 ) {  /* Then the value might be too large */
127         eps = neweps;    /* ...save the current value... */
128         neweps -= del;    /* ...and decrement a bit */
129       } else {      /* Then the value is too small */
130         neweps += del;    /* ...so increment it */
131       }
132       del *= 0.5;      /* Reduce the adjustment by half */
133     }
134 
135     return eps;
136   }
137 
138    /*! \brief Calculate the definite integral of a function from a to b.
139     *
140     * This function calculates the definite integral of a univariate
141     * function on the interval \f$[a,b]\f$.
142     *
143     * \param fun The function (or functor) whose definite integral is
144     * to be calculated.  This function should both take and return a
145     * single argument of type T.
146     * \param a The starting value of the interval.
147     * \param b The ending value of the interval.
148     * \param N The number of subintervals to calculate.  Increasing
149     * this number will improve the accuracy of the estimate but will
150     * also increase run-time.
151     *
152     * \throw scythe_invalid_arg (Level 1)
153     *
154     * \see adaptsimp(FUNCTOR fun, T a, T b, unsigned int& N, double tol = 1e-5)
155     * \note
156     * Users will typically wish to implement \a fun in terms of a
157     * functor.  Using a functor provides a generic way in which to
158     * evaluate functions with more than one parameter.  Furthermore,
159     * although one can pass a function pointer to this routine,
160     * the compiler cannot inline and fully optimize code
161     * referenced by function pointers.
162     */
163   template <typename T, typename FUNCTOR>
intsimp(FUNCTOR fun,T a,T b,unsigned int N)164   T  intsimp (FUNCTOR fun, T a, T b, unsigned int N)
165   {
166     SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
167         "Lower limit larger than upper");
168 
169     T I = (T) 0;
170     T w = (b - a) / N;
171     for (unsigned int i = 1; i <= N; ++i)
172       I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) +
173           fun(a + i * w)) / 6;
174 
175     return I;
176   }
177 
178    /*! \brief Calculate the definite integral of a function from a to b.
179     *
180     * This function calculates the definite integral of a univariate
181     * function on the interval \f$[a,b]\f$.
182     *
183     * \param fun The function (or functor) whose definite integral is
184     * to be calculated.  This function should both take and return a
185     * single argument of type T.
186     * \param a The starting value of the interval.
187     * \param b The ending value of the interval.
188     * \param N The number of subintervals to calculate.  Increasing
189     * this number will improve the accuracy of the estimate but will
190     * also increase run-time.
191     * \param tol The accuracy required.  Both accuracy and run-time
192     * decrease as this number increases.
193     *
194     * \throw scythe_invalid_arg (Level 1)
195     *
196     * \see intsimp(FUNCTOR fun, T a, T b, unsigned int& N)
197     *
198     * \note
199     * Users will typically wish to implement \a fun in terms of a
200     * functor.  Using a functor provides a generic way in which to
201     * evaluate functions with more than one parameter.  Furthermore,
202     * although one can pass a function pointer to this routine,
203     * the compiler cannot inline and fully optimize code
204     * referenced by function pointers.
205     */
206   template <typename T, typename FUNCTOR>
207   T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5)
208   {
209     SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
210         "Lower limit larger than upper");
211 
212     T I = intsimp(fun, a, b, N);
213     if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol)
214       return adaptsimp(fun, a, (a + b) / 2, N, tol)
215         + adaptsimp(fun, (a + b) / 2, b, N, tol);
216 
217     return I;
218   }
219 
220    /*! \brief Calculate gradient of a function using a forward
221     * difference formula.
222     *
223     * This function numerically calculates the gradient of a
224     * vector-valued function at \a theta using a forward difference
225     * formula.
226     *
227     * \param fun The function to calculate the gradient of.  This
228     * function should both take and return a single Matrix (vector) of
229     * type T.
230     * \param theta The column vector of values at which to calculate
231     * the gradient of the function.
232     *
233     * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
234     * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
235     * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
236     *
237     * \throw scythe_dimension_error (Level 1)
238     *
239     * \note
240     * Users will typically wish to implement \a fun in terms of a
241     * functor.  Using a functor provides a generic way in which to
242     * evaluate functions with more than one parameter.  Furthermore,
243     * although one can pass a function pointer to this routine,
244     * the compiler cannot inline and fully optimize code
245     * referenced by function pointers.
246     */
247   template <matrix_order RO, matrix_style RS, typename T,
248             matrix_order PO, matrix_style PS, typename FUNCTOR>
249   Matrix<T, RO, RS>
gradfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)250   gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
251 
252   {
253     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
254         "Theta not column vector");
255 
256     unsigned int k = theta.size();
257     T h = std::sqrt(epsilon<T>());
258     h = std::sqrt(h);
259 
260     Matrix<T,RO,RS> grad(k, 1);
261     Matrix<T,RO> e;
262     Matrix<T,RO> temp;
263     for (unsigned int i = 0; i < k; ++i) {
264       e = Matrix<T,RO>(k, 1);
265       e[i] = h;
266       temp = theta + e;
267       donothing(temp); // XXX I don't understand this
268       e = temp - theta;
269       grad[i] = (fun(theta + e) - fun(theta)) / e[i];
270     }
271 
272     return grad;
273   }
274 
275   // Default template version
276   template <typename T, matrix_order O, matrix_style S,
277             typename FUNCTOR>
278   Matrix<T, O, Concrete>
gradfdif(FUNCTOR fun,const Matrix<T,O,S> & theta)279   gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta)
280   {
281     return gradfdif<O,Concrete>(fun, theta);
282   }
283 
284    /*! \brief Calculate the first derivative of the function using
285     * a forward difference formula.
286     *
287     * This function numerically calculates the first derivative of a
288     * function with respect to \a alpha at \f$theta + alpha \cdot p\f$
289     * using a forward difference formula.  This function is primarily
290     * useful for linesearches.
291     *
292     * \param fun The function to calculate the first derivative of.
293     * This function should take a single Matrix<T> argument and return
294     * a value of type T.
295     * \param alpha Double the step length.
296     * \param theta A Matrix (vector) of parameter values at which to
297     * calculate the gradient.
298     * \param p A direction vector.
299     *
300     * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
301     * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
302     * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
303     *
304     * \throw scythe_dimension_error (Level 1)
305     *
306     * \note
307     * Users will typically wish to implement \a fun in terms of a
308     * functor.  Using a functor provides a generic way in which to
309     * evaluate functions with more than one parameter.  Furthermore,
310     * although one can pass a function pointer to this routine,
311     * the compiler cannot inline and fully optimize code
312     * referenced by function pointers.
313     */
314   template <typename T, matrix_order PO1, matrix_style PS1,
315             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
316   T
gradfdifls(FUNCTOR fun,T alpha,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)317   gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta,
318               const Matrix<T,PO2,PS2>& p)
319 
320   {
321     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
322         "Theta not column vector");
323     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
324         "p not column vector");
325 
326     unsigned int k = theta.size();
327     T h = std::sqrt(epsilon<T>());
328     h = std::sqrt(h);
329     //T h = std::sqrt(2.2e-16);
330 
331     T deriv;
332 
333     for (unsigned int i = 0; i < k; ++i) {
334       T temp = alpha + h;
335       donothing(temp);
336       T e = temp - alpha;
337       deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p))
338               / e;
339     }
340 
341     return deriv;
342   }
343 
344    /*! \brief Calculate the Jacobian of a function using a forward
345     * difference formula.
346     *
347     * This function numerically calculates the Jacobian of a
348     * vector-valued function using a forward difference formula.
349     *
350     * \param fun The function to calculate the Jacobian of.  This
351     * function should both take and return a Matrix (vector) of type
352     * T.
353     * \param theta The column vector of parameter values at which to
354     * take the Jacobian of \a fun.
355     *
356     * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
357     * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
358     * \see hesscdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
359     *
360     * \throw scythe_dimension_error (Level 1)
361     *
362     * \note
363     * Users will typically wish to implement \a fun in terms of a
364     * functor.  Using a functor provides a generic way in which to
365     * evaluate functions with more than one parameter.  Furthermore,
366     * although one can pass a function pointer to this routine,
367     * the compiler cannot inline and fully optimize code
368     * referenced by function pointers.
369     */
370   template <matrix_order RO, matrix_style RS, typename T,
371             matrix_order PO, matrix_style PS, typename FUNCTOR>
372   Matrix<T,RO,RS>
jacfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)373   jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
374   {
375     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
376         "Theta not column vector");
377 
378     Matrix<T,RO> fval = fun(theta);
379     unsigned int k = theta.rows();
380     unsigned int n = fval.rows();
381 
382     T h = std::sqrt(epsilon<T>()); //2.2e-16
383     h = std::sqrt(h);
384 
385     Matrix<T,RO,RS> J(n,k);
386     Matrix<T,RO> e;
387     Matrix<T,RO> temp;
388     Matrix<T,RO> fthetae;
389     Matrix<T,RO> ftheta;
390 
391     for (int i = 0; i < k; ++i) {
392       e = Matrix<T,RO>(k,1);
393       e[i] = h;
394       temp = theta + e;
395       donothing(temp); /// XXX ??
396       e = temp - theta;
397       fthetae = fun(theta + e);
398       ftheta = fun(theta);
399       for (unsigned int j = 0; j < n; ++j) {
400         J(j,i) = (fthetae[j] - ftheta[j]) / e[i];
401       }
402     }
403 
404     return J;
405   }
406 
407   // default template
408   template <typename T, matrix_order PO, matrix_style PS,
409             typename FUNCTOR>
410   Matrix<T,PO,PS>
jacfdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)411   jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
412   {
413     return jacfdif<PO,Concrete>(fun, theta);
414   }
415 
416 
417    /*! \brief Calculate the Hessian of a function using a central
418     * difference formula.
419     *
420     * This function numerically calculates the Hessian of a
421     * vector-valued function using a central difference formula.
422     *
423     * \param fun The function to calculate the Hessian of.  This
424     * function should take a Matrix (vector) of type T and return a
425     * single value of type T.
426     * \param theta The column vector of parameter values at which to
427     * calculate the Hessian.
428     *
429     * \see gradfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
430     * \see gradfdifls(FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
431     * \see jacfdif(FUNCTOR fun, const Matrix<T,PO,PS>& theta)
432     *
433     * \throw scythe_dimension_error
434     *
435     * \note
436     * Users will typically wish to implement \a fun in terms of a
437     * functor.  Using a functor provides a generic way in which to
438     * evaluate functions with more than one parameter.  Furthermore,
439     * although one can pass a function pointer to this routine,
440     * the compiler cannot inline and fully optimize code
441     * referenced by function pointers.
442     */
443   template <matrix_order RO, matrix_style RS, typename T,
444             matrix_order PO, matrix_style PS, typename FUNCTOR>
445   Matrix<T, RO, RS>
hesscdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)446   hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
447   {
448     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
449         "Theta not column vector");
450 
451     T fval = fun(theta);
452 
453     //std::cout << std::endl;
454     //std::cout << "hesscdif theta = " << theta << "\n";
455     //std::cout << "hesscdif fun(theta) = " << fval << std::endl;
456 
457     unsigned int k = theta.rows();
458 
459     // stepsize CAREFUL -- THIS IS MACHINE SPECIFIC !!!!
460     T h2 = std::sqrt(epsilon<T>());
461     //T h2 = (T) 1e-10;
462     T h = std::sqrt(h2);
463 
464     Matrix<T, RO, RS> H(k,k);
465 
466     //std::cout << "h2 = " << h2 << "  h = " << h << std::endl;
467 
468     Matrix<T,RO> ei;
469     Matrix<T,RO> ej;
470     Matrix<T,RO> temp;
471 
472     for (unsigned int i = 0; i < k; ++i) {
473       ei = Matrix<T,RO>(k, 1);
474       ei[i] = h;
475       temp = theta + ei;
476       donothing(temp); // XXX Again, I'm baffled
477       ei = temp - theta;
478       for (unsigned int j = 0; j < k; ++j){
479         ej = Matrix<T,RO>(k,1);
480         ej[j] = h;
481         temp = theta + ej;
482         donothing(temp); // XXX and again
483         ej = temp - theta;
484 
485         if (i == j) {
486           H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 *
487               fun(theta + ei) - 30.0 * fval + 16.0 *
488               fun(theta - ei) -
489               fun(theta - 2.0 * ei)) / (12.0 * h2);
490         } else {
491           H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej)
492               - fun(theta - ei + ej) + fun(theta - ei - ej))
493             / (4.0 * h2);
494         }
495       }
496     }
497 
498     //std::cout << "end of hesscdif, H = " << H << "\n";
499     return H;
500   }
501 
502   // default template
503   template <typename T, matrix_order PO, matrix_style PS,
504             typename FUNCTOR>
505   Matrix<T,PO,PS>
hesscdif(FUNCTOR fun,const Matrix<T,PO,PS> & theta)506   hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
507   {
508     return hesscdif<PO,Concrete>(fun, theta);
509   }
510 
511    /*! \brief Find the step length that minimizes an implied 1-dimensional function.
512     *
513     * This function performs a line search to find the step length
514     * that approximately minimizes an implied one dimensional
515     * function.
516     *
517     * \param fun The function to minimize.  This function should take
518     * one Matrix (vector) argument of type T and return a single value
519     * of type T.
520     * \param theta A column vector of parameter values that anchor the
521     * 1-dimensional function.
522     * \param p A direction vector that creates the 1-dimensional
523     * function.
524     *
525     * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
526     * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
527     * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
528     *
529     * \throw scythe_dimension_error (Level 1)
530     *
531     * \note
532     * Users will typically wish to implement \a fun in terms of a
533     * functor.  Using a functor provides a generic way in which to
534     * evaluate functions with more than one parameter.  Furthermore,
535     * although one can pass a function pointer to this routine,
536     * the compiler cannot inline and fully optimize code
537     * referenced by function pointers.
538     */
539   template <typename T, matrix_order PO1, matrix_style PS1,
540             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
linesearch1(FUNCTOR fun,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)541   T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
542                  const Matrix<T,PO2,PS2>& p)
543   {
544     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
545         "Theta not column vector");
546     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
547         "p not column vector");
548 
549     T alpha_bar = (T) 1.0;
550     T rho = (T) 0.9;
551     T c   = (T) 0.5;
552     T alpha = alpha_bar;
553     Matrix<T,PO1> fgrad = gradfdif(fun, theta);
554 
555     while (fun(theta + alpha * p) >
556            (fun(theta) + c * alpha * t(fgrad) * p)[0]) {
557       alpha = rho * alpha;
558     }
559 
560     return alpha;
561   }
562 
563    /*! \brief Find the step length that minimizes an implied 1-dimensional function.
564     *
565     * This function performs a line search to find the step length
566     * that approximately minimizes an implied one dimensional
567     * function.
568     *
569     * \param fun The function to minimize.  This function should take
570     * one Matrix (vector) argument of type T and return a single value
571     * of type T.
572     * \param theta A column vector of parameter values that anchor the
573     * 1-dimensional function.
574     * \param p A direction vector that creates the 1-dimensional
575     * function.
576     * \param runif A random uniform number generator function object
577     * (an object that returns a random uniform variate on (0,1) when
578     * its () operator is invoked).
579     *
580     * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
581     * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
582     * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
583     *
584     * \throw scythe_dimension_error (Level 1)
585     *
586     * \note
587     * Users will typically wish to implement \a fun in terms of a
588     * functor.  Using a functor provides a generic way in which to
589     * evaluate functions with more than one parameter.  Furthermore,
590     * although one can pass a function pointer to this routine,
591     * the compiler cannot inline and fully optimize code
592     * referenced by function pointers.
593     */
594   template <typename T, matrix_order PO1, matrix_style PS1,
595             matrix_order PO2, matrix_style PS2, typename FUNCTOR,
596             typename RNGTYPE>
linesearch2(FUNCTOR fun,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p,rng<RNGTYPE> & runif)597   T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
598                  const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
599   {
600     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
601         "Theta not column vector");
602     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
603         "p not column vector");
604 
605     T alpha_last = (T) 0.0;
606     T alpha_cur = (T) 1.0;
607     T alpha_max = (T) 10.0;
608     T c1 = (T) 1e-4;
609     T c2 = (T) 0.5;
610     unsigned int max_iter = 50;
611     T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p);
612 
613     for (unsigned int i = 0; i < max_iter; ++i) {
614       T phi_cur = fun(theta + alpha_cur * p);
615       T phi_last = fun(theta + alpha_last * p);
616 
617       if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0))
618           || ((phi_cur >= phi_last) && (i > 0))) {
619         T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p);
620         return alphastar;
621       }
622 
623       T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p);
624       if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0)
625         return alpha_cur;
626 
627       if ( fgradalpha_cur >= (T) 0.0) {
628         T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p);
629         return alphastar;
630       }
631 
632       alpha_last = alpha_cur;
633       // runif stuff below is probably not correc KQ 12/08/2006
634       // I think it should work now DBP 01/02/2007
635       alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur;
636     }
637 
638     return 0.001;
639   }
640 
641    /*! \brief Find minimum of a function once bracketed.
642     *
643     * This function finds the minimum of a function, once bracketed.
644     *
645     * \param fun The function to minimize.  This function should take
646     * one Matrix (vector) argument of type T and return a single value
647     * of type T.
648     * \param alpha_lo The lower bracket.
649     * \param alpha_hi The upper bracket.
650     * \param theta A column vector of parameter values that anchor the
651     * 1-dimensional function.
652     * \param p A direction vector that creates the 1-dimensional
653     *
654     * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
655     * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
656     * \see BFGS(FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif, unsigned int maxit, T tolerance, bool trace = false)
657     *
658     * \throw scythe_dimension_error (Level 1)
659     *
660     * \note
661     * Users will typically wish to implement \a fun in terms of a
662     * functor.  Using a functor provides a generic way in which to
663     * evaluate functions with more than one parameter.  Furthermore,
664     * although one can pass a function pointer to this routine,
665     * the compiler cannot inline and fully optimize code
666     * referenced by function pointers.
667     *
668     */
669   template <typename T, matrix_order PO1, matrix_style PS1,
670             matrix_order PO2, matrix_style PS2, typename FUNCTOR>
zoom(FUNCTOR fun,T alpha_lo,T alpha_hi,const Matrix<T,PO1,PS1> & theta,const Matrix<T,PO2,PS2> & p)671   T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi,
672           const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
673   {
674     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
675         "Theta not column vector");
676     SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
677         "p not column vector");
678 
679     T alpha_j = (alpha_lo + alpha_hi) / 2.0;
680     T phi_0 = fun(theta);
681     T c1 = (T) 1e-4;
682     T c2 = (T) 0.5;
683     T fgrad0 = gradfdifls(fun, (T) 0, theta, p);
684 
685     unsigned int count = 0;
686     unsigned int maxit = 20;
687     while(count < maxit) {
688       T phi_j = fun(theta + alpha_j * p);
689       T phi_lo = fun(theta + alpha_lo * p);
690 
691       if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0))
692           || (phi_j >= phi_lo)){
693         alpha_hi = alpha_j;
694       } else {
695         T fgradj = gradfdifls(fun, alpha_j, theta, p);
696         if (std::fabs(fgradj) <= -1 * c2 * fgrad0){
697           return alpha_j;
698         }
699         if ( fgradj * (alpha_hi - alpha_lo) >= 0){
700           alpha_hi = alpha_lo;
701         }
702         alpha_lo = alpha_j;
703       }
704       ++count;
705     }
706 
707     return alpha_j;
708   }
709 
710 
711    /*! \brief Find function minimum using the BFGS algorithm.
712     *
713     * Numerically find the minimum of a function using the BFGS
714     * algorithm.
715     *
716     * \param fun The function to minimize.  This function should take
717     * one Matrix (vector) argument of type T and return a single value
718     * of type T.
719     * \param theta A column vector of parameter values that anchor the
720     * 1-dimensional function.
721     * \param runif A random uniform number generator function object
722     * (an object that returns a random uniform variate on (0,1) when
723     * its () operator is invoked).
724     * \param maxit The maximum number of iterations.
725     * \param tolerance The convergence tolerance.
726     * \param trace Boolean value determining whether BFGS should print
727     *              to stdout (defaults to false).
728     *
729     * \see linesearch1(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
730     * \see linesearch2(FUNCTOR fun, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
731     * \see zoom(FUNCTOR fun, T alpha_lo, T alpha_hi, const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
732     *
733     * \throw scythe_dimension_error (Level 1)
734     * \throw scythe_convergence_error (Level 0)
735     *
736     * \note
737     * Users will typically wish to implement \a fun in terms of a
738     * functor.  Using a functor provides a generic way in which to
739     * evaluate functions with more than one parameter.  Furthermore,
740     * although one can pass a function pointer to this routine,
741     * the compiler cannot inline and fully optimize code
742     * referenced by function pointers.
743     */
744   // there were 2 versions of linesearch1-- the latter was what we
745   // had been calling linesearch2
746   template <matrix_order RO, matrix_style RS, typename T,
747             matrix_order PO, matrix_style PS,
748             typename FUNCTOR, typename RNGTYPE>
749   Matrix<T,RO,RS>
750   BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif,
751         unsigned int maxit, T tolerance, bool trace = false)
752   {
753     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
754         "Theta not column vector");
755 
756     unsigned int n = theta.size();
757 
758     // H is initial inverse hessian
759     Matrix<T,RO> H = inv(hesscdif(fun, theta));
760 
761     // gradient at starting values
762     Matrix<T,RO> fgrad = gradfdif(fun, theta);
763     Matrix<T,RO> thetamin = theta;
764     Matrix<T,RO> fgrad_new = fgrad;
765     Matrix<T,RO> I = eye<T,RO>(n);
766     Matrix<T,RO> s;
767     Matrix<T,RO> y;
768 
769     unsigned int count = 0;
770     while( (t(fgrad_new)*fgrad_new)[0] > tolerance) {
771       Matrix<T> p = -1.0 * H * fgrad;
772       //std::cout << "initial H * fgrad = " << H * fgrad << "\n";
773       //std::cout << "initial p = " << p << "\n";
774 
775       T alpha = linesearch2(fun, thetamin, p, runif);
776       //T alpha = linesearch1(fun, thetamin, p);
777 
778       //std::cout << "after linesearch p = " << p << "\n";
779 
780 
781       Matrix<T> thetamin_new = thetamin + alpha * p;
782       fgrad_new = gradfdif(fun, thetamin_new);
783       s = thetamin_new - thetamin;
784       y = fgrad_new - fgrad;
785       T rho = 1.0 / (t(y) * s)[0];
786       H = (I - rho * s * t(y)) * H *(I - rho * y * t(s))
787         + rho * s * t(s);
788 
789       thetamin = thetamin_new;
790       fgrad = fgrad_new;
791       ++count;
792 
793 #ifndef SCYTHE_RPACK
794       if (trace) {
795         std::cout << "BFGS iteration = " << count << std::endl;
796         std::cout << "thetamin = " << (t(thetamin)) ;
797         std::cout << "gradient = " << (t(fgrad)) ;
798         std::cout << "t(gradient) * gradient = " << (t(fgrad) * fgrad) ;
799         std::cout << "function value = " << fun(thetamin) <<
800         std::endl << std::endl;
801       }
802 #endif
803       //std::cout << "Hessian = " << hesscdif(fun, theta) << "\n";
804       //std::cout << "H = " << H << "\n";
805       //std::cout << "alpha = " << alpha << std::endl;
806       //std::cout << "p = " << p << "\n";
807       //std::cout << "-1 * H * fgrad = " << -1.0 * H * fgrad << "\n";
808 
809       SCYTHE_CHECK(count > maxit, scythe_convergence_error,
810           "Failed to converge.  Try better starting values");
811     }
812 
813     return thetamin;
814   }
815 
816   // Default template
817   template <typename T, matrix_order O, matrix_style S,
818             typename FUNCTOR, typename RNGTYPE>
819   Matrix<T,O,Concrete>
820   BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif,
821         unsigned int maxit, T tolerance, bool trace = false)
822   {
823     return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace);
824   }
825 
826 
827   /* Solves a system of n nonlinear equations in n unknowns of the form
828    * fun(thetastar) = 0 for thetastar given the function, starting
829    * value theta, max number of iterations, and tolerance.
830    * Uses Broyden's method.
831    */
832    /*! \brief Solve a system of nonlinear equations.
833     *
834     * Solves a system of n nonlinear equations in n unknowns of the form
835     * \f$fun(\theta^*) = 0\f$ for \f$\theta^*\f$.
836     *
837     * \param fun The function to solve.  The function should both take
838     * and return a Matrix of type T.
839     * \param theta A column vector of parameter values at which to
840     * start the solve procedure.
841     * \param maxit The maximum number of iterations.
842     * \param tolerance The convergence tolerance.
843     *
844     * \throw scythe_dimension_error (Level 1)
845     * \throw scythe_convergence_error (Level 1)
846     *
847     * \note
848     * Users will typically wish to implement \a fun in terms of a
849     * functor.  Using a functor provides a generic way in which to
850     * evaluate functions with more than one parameter.  Furthermore,
851     * although one can pass a function pointer to this routine,
852     * the compiler cannot inline and fully optimize code
853     * referenced by function pointers.
854     */
855   template <matrix_order RO, matrix_style RS, typename T,
856             matrix_order PO, matrix_style PS, typename FUNCTOR>
857   Matrix<T,RO,RS>
858   nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta,
859               unsigned int maxit = 5000, T tolerance = 1e-6)
860   {
861     SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
862         "Theta not column vector");
863 
864 
865     Matrix<T,RO> thetastar = theta;
866     Matrix<T,RO> B = jacfdif(fun, thetastar);
867 
868     Matrix<T,RO> fthetastar;
869     Matrix<T,RO> p;
870     Matrix<T,RO> thetastar_new;
871     Matrix<T,RO> fthetastar_new;
872     Matrix<T,RO> s;
873     Matrix<T,RO> y;
874 
875     for (unsigned int i = 0; i < maxit; ++i) {
876       fthetastar = fun(thetastar);
877       p = lu_solve(B, -1 * fthetastar);
878       T alpha = (T) 1.0;
879       thetastar_new = thetastar + alpha*p;
880       fthetastar_new = fun(thetastar_new);
881       s = thetastar_new - thetastar;
882       y = fthetastar_new - fthetastar;
883       B = B + ((y - B * s) * t(s)) / (t(s) * s);
884       thetastar = thetastar_new;
885       if (max(fabs(fthetastar_new)) < tolerance)
886         return thetastar;
887     }
888 
889     SCYTHE_THROW_10(scythe_convergence_error,  "Failed to converge.  Try better starting values or increase maxit");
890 
891     return thetastar;
892   }
893 
894   // default template
895   template <typename T, matrix_order O, matrix_style S,
896             typename FUNCTOR>
897   Matrix<T,O,Concrete>
898   nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta,
899                unsigned int maxit = 5000, T tolerance = 1e-6)
900   {
901     return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance);
902   }
903 
904 } // namespace scythe
905 
906 #endif /* SCYTHE_OPTIMIZE_H */
907