1 /*
2  *      Limited memory BFGS (L-BFGS).
3  *
4  * Copyright (c) 1990, Jorge Nocedal
5  * Copyright (c) 2007-2010 Naoaki Okazaki
6  * All rights reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  */
26 
27 /* $Id: lbfgs.c 65 2010-01-29 12:19:16Z naoaki $ */
28 
29 /*
30 This library is a C port of the FORTRAN implementation of Limited-memory
31 Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
32 The original FORTRAN source code is available at:
33 http://www.ece.northwestern.edu/~nocedal/lbfgs.html
34 
35 The L-BFGS algorithm is described in:
36     - Jorge Nocedal.
37       Updating Quasi-Newton Matrices with Limited Storage.
38       <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
39     - Dong C. Liu and Jorge Nocedal.
40       On the limited memory BFGS method for large scale optimization.
41       <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
42 
43 The line search algorithms used in this implementation are described in:
44     - John E. Dennis and Robert B. Schnabel.
45       <i>Numerical Methods for Unconstrained Optimization and Nonlinear
46       Equations</i>, Englewood Cliffs, 1983.
47     - Jorge J. More and David J. Thuente.
48       Line search algorithm with guaranteed sufficient decrease.
49       <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
50       pp. 286-307, 1994.
51 
52 This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
53 method presented in:
54     - Galen Andrew and Jianfeng Gao.
55       Scalable training of L1-regularized log-linear models.
56       In <i>Proceedings of the 24th International Conference on Machine
57       Learning (ICML 2007)</i>, pp. 33-40, 2007.
58 
59 I would like to thank the original author, Jorge Nocedal, who has been
60 distributing the effieicnt and explanatory implementation in an open source
61 licence.
62 */
63 
64 #ifdef  HAVE_CONFIG_H
65 #include "config.h"
66 #endif/*HAVE_CONFIG_H*/
67 
68 #ifndef _MSC_VER
69 #include <stdint.h>
70 #endif
71 
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <math.h>
75 
76 #include "lbfgs.h"
77 
78 #ifdef  _MSC_VER
79 #define inline  __inline
80 typedef unsigned int uint32_t;
81 #endif/*_MSC_VER*/
82 
83 #if     defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
84 /* Use SSE2 optimization for 64bit double precision. */
85 #include "arithmetic_sse_double.h"
86 
87 #elif   defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
88 /* Use SSE optimization for 32bit float precision. */
89 #include "arithmetic_sse_float.h"
90 
91 #else
92 /* No CPU specific optimization. */
93 #include "arithmetic_ansi.h"
94 
95 #endif
96 
97 #define min2(a, b)      ((a) <= (b) ? (a) : (b))
98 #define max2(a, b)      ((a) >= (b) ? (a) : (b))
99 #define max3(a, b, c)   max2(max2((a), (b)), (c));
100 
101 #define is_aligned(p, bytes) \
102 	(((uintptr_t)(const void*)(p)) % (bytes) == 0)
103 
104 struct tag_callback_data {
105     int n;
106     void *instance;
107     lbfgs_evaluate_t proc_evaluate;
108     lbfgs_progress_t proc_progress;
109 };
110 typedef struct tag_callback_data callback_data_t;
111 
112 struct tag_iteration_data {
113     lbfgsfloatval_t alpha;
114     lbfgsfloatval_t *s;     /* [n] */
115     lbfgsfloatval_t *y;     /* [n] */
116     lbfgsfloatval_t ys;     /* vecdot(y, s) */
117 };
118 typedef struct tag_iteration_data iteration_data_t;
119 
120 static const lbfgs_parameter_t _defparam = {
121     6, 1e-5, 0, 1e-5,
122     0, LBFGS_LINESEARCH_DEFAULT, 40,
123     1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16,
124     0.0, 0, -1,
125 };
126 
127 /* Forward function declarations. */
128 
129 typedef int (*line_search_proc)(
130     int n,
131     lbfgsfloatval_t *x,
132     lbfgsfloatval_t *f,
133     lbfgsfloatval_t *g,
134     lbfgsfloatval_t *s,
135     lbfgsfloatval_t *stp,
136     const lbfgsfloatval_t* xp,
137     const lbfgsfloatval_t* gp,
138     lbfgsfloatval_t *wa,
139     callback_data_t *cd,
140     const lbfgs_parameter_t *param
141     );
142 
143 static int line_search_backtracking(
144     int n,
145     lbfgsfloatval_t *x,
146     lbfgsfloatval_t *f,
147     lbfgsfloatval_t *g,
148     lbfgsfloatval_t *s,
149     lbfgsfloatval_t *stp,
150     const lbfgsfloatval_t* xp,
151     const lbfgsfloatval_t* gp,
152     lbfgsfloatval_t *wa,
153     callback_data_t *cd,
154     const lbfgs_parameter_t *param
155     );
156 
157 static int line_search_backtracking_owlqn(
158     int n,
159     lbfgsfloatval_t *x,
160     lbfgsfloatval_t *f,
161     lbfgsfloatval_t *g,
162     lbfgsfloatval_t *s,
163     lbfgsfloatval_t *stp,
164     const lbfgsfloatval_t* xp,
165     const lbfgsfloatval_t* gp,
166     lbfgsfloatval_t *wp,
167     callback_data_t *cd,
168     const lbfgs_parameter_t *param
169     );
170 
171 static int line_search_morethuente(
172     int n,
173     lbfgsfloatval_t *x,
174     lbfgsfloatval_t *f,
175     lbfgsfloatval_t *g,
176     lbfgsfloatval_t *s,
177     lbfgsfloatval_t *stp,
178     const lbfgsfloatval_t* xp,
179     const lbfgsfloatval_t* gp,
180     lbfgsfloatval_t *wa,
181     callback_data_t *cd,
182     const lbfgs_parameter_t *param
183     );
184 
185 static int update_trial_interval(
186     lbfgsfloatval_t *x,
187     lbfgsfloatval_t *fx,
188     lbfgsfloatval_t *dx,
189     lbfgsfloatval_t *y,
190     lbfgsfloatval_t *fy,
191     lbfgsfloatval_t *dy,
192     lbfgsfloatval_t *t,
193     lbfgsfloatval_t *ft,
194     lbfgsfloatval_t *dt,
195     const lbfgsfloatval_t tmin,
196     const lbfgsfloatval_t tmax,
197     int *brackt
198     );
199 
200 static lbfgsfloatval_t owlqn_x1norm(
201     const lbfgsfloatval_t* x,
202     const int start,
203     const int n
204     );
205 
206 static void owlqn_pseudo_gradient(
207     lbfgsfloatval_t* pg,
208     const lbfgsfloatval_t* x,
209     const lbfgsfloatval_t* g,
210     const int n,
211     const lbfgsfloatval_t c,
212     const int start,
213     const int end
214     );
215 
216 static void owlqn_project(
217     lbfgsfloatval_t* d,
218     const lbfgsfloatval_t* sign,
219     const int start,
220     const int end
221     );
222 
223 
224 #if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
round_out_variables(int n)225 static int round_out_variables(int n)
226 {
227     n += 7;
228     n /= 8;
229     n *= 8;
230     return n;
231 }
232 #endif/*defined(USE_SSE)*/
233 
lbfgs_malloc(int n)234 lbfgsfloatval_t* lbfgs_malloc(int n)
235 {
236 #if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
237     n = round_out_variables(n);
238 #endif/*defined(USE_SSE)*/
239     return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * (size_t) n);
240 }
241 
lbfgs_free(lbfgsfloatval_t * x)242 void lbfgs_free(lbfgsfloatval_t *x)
243 {
244     vecfree(x);
245 }
246 
lbfgs_parameter_init(lbfgs_parameter_t * param)247 void lbfgs_parameter_init(lbfgs_parameter_t *param)
248 {
249     memcpy(param, &_defparam, sizeof(*param));
250 }
251 
lbfgs(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * ptr_fx,lbfgs_evaluate_t proc_evaluate,lbfgs_progress_t proc_progress,void * instance,lbfgs_parameter_t * _param)252 int lbfgs(
253     int n,
254     lbfgsfloatval_t *x,
255     lbfgsfloatval_t *ptr_fx,
256     lbfgs_evaluate_t proc_evaluate,
257     lbfgs_progress_t proc_progress,
258     void *instance,
259     lbfgs_parameter_t *_param
260     )
261 {
262     int ret;
263     int i, j, k, ls, end, bound;
264     lbfgsfloatval_t step;
265 
266     /* Constant parameters and their default values. */
267     lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
268     const int m = param.m;
269 
270     lbfgsfloatval_t *xp = NULL;
271     lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;
272     lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;
273     iteration_data_t *lm = NULL, *it = NULL;
274     lbfgsfloatval_t ys, yy;
275     lbfgsfloatval_t xnorm, gnorm, beta;
276     lbfgsfloatval_t fx = 0.;
277     lbfgsfloatval_t rate = 0.;
278     line_search_proc linesearch = line_search_morethuente;
279 
280     /* Construct a callback data. */
281     callback_data_t cd;
282     cd.n = n;
283     cd.instance = instance;
284     cd.proc_evaluate = proc_evaluate;
285     cd.proc_progress = proc_progress;
286 
287 #if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
288     /* Round out the number of variables. */
289     n = round_out_variables(n);
290 #endif/*defined(USE_SSE)*/
291 
292     /* Check the input parameters for errors. */
293     if (n <= 0) {
294         return LBFGSERR_INVALID_N;
295     }
296 #if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
297     if (n % 8 != 0) {
298         return LBFGSERR_INVALID_N_SSE;
299     }
300     if (!is_aligned(x, 16)) {
301         return LBFGSERR_INVALID_X_SSE;
302     }
303 #endif/*defined(USE_SSE)*/
304     if (param.epsilon < 0.) {
305         return LBFGSERR_INVALID_EPSILON;
306     }
307     if (param.past < 0) {
308         return LBFGSERR_INVALID_TESTPERIOD;
309     }
310     if (param.delta < 0.) {
311         return LBFGSERR_INVALID_DELTA;
312     }
313     if (param.min_step < 0.) {
314         return LBFGSERR_INVALID_MINSTEP;
315     }
316     if (param.max_step < param.min_step) {
317         return LBFGSERR_INVALID_MAXSTEP;
318     }
319     if (param.ftol < 0.) {
320         return LBFGSERR_INVALID_FTOL;
321     }
322     if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
323         param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
324         if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
325             return LBFGSERR_INVALID_WOLFE;
326         }
327     }
328     if (param.gtol < 0.) {
329         return LBFGSERR_INVALID_GTOL;
330     }
331     if (param.xtol < 0.) {
332         return LBFGSERR_INVALID_XTOL;
333     }
334     if (param.max_linesearch <= 0) {
335         return LBFGSERR_INVALID_MAXLINESEARCH;
336     }
337     if (param.orthantwise_c < 0.) {
338         return LBFGSERR_INVALID_ORTHANTWISE;
339     }
340     if (param.orthantwise_start < 0 || n < param.orthantwise_start) {
341         return LBFGSERR_INVALID_ORTHANTWISE_START;
342     }
343     if (param.orthantwise_end < 0) {
344         param.orthantwise_end = n;
345     }
346     if (n < param.orthantwise_end) {
347         return LBFGSERR_INVALID_ORTHANTWISE_END;
348     }
349     if (param.orthantwise_c != 0.) {
350         switch (param.linesearch) {
351         case LBFGS_LINESEARCH_BACKTRACKING:
352             linesearch = line_search_backtracking_owlqn;
353             break;
354         default:
355             /* Only the backtracking method is available. */
356             return LBFGSERR_INVALID_LINESEARCH;
357         }
358     } else {
359         switch (param.linesearch) {
360         case LBFGS_LINESEARCH_MORETHUENTE:
361             linesearch = line_search_morethuente;
362             break;
363         case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
364         case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
365         case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
366             linesearch = line_search_backtracking;
367             break;
368         default:
369             return LBFGSERR_INVALID_LINESEARCH;
370         }
371     }
372 
373     /* Allocate working space. */
374     xp = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
375     g = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
376     gp = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
377     d = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
378     w = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
379     if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
380         ret = LBFGSERR_OUTOFMEMORY;
381         goto lbfgs_exit;
382     }
383 
384     if (param.orthantwise_c != 0.) {
385         /* Allocate working space for OW-LQN. */
386         pg = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
387         if (pg == NULL) {
388             ret = LBFGSERR_OUTOFMEMORY;
389             goto lbfgs_exit;
390         }
391     }
392 
393     /* Allocate limited memory storage. */
394     lm = (iteration_data_t*)vecalloc((size_t) m * sizeof(iteration_data_t));
395     if (lm == NULL) {
396         ret = LBFGSERR_OUTOFMEMORY;
397         goto lbfgs_exit;
398     }
399 
400     /* Initialize the limited memory. */
401     for (i = 0;i < m;++i) {
402         it = &lm[i];
403         it->alpha = 0;
404         it->ys = 0;
405         it->s = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
406         it->y = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
407         if (it->s == NULL || it->y == NULL) {
408             ret = LBFGSERR_OUTOFMEMORY;
409             goto lbfgs_exit;
410         }
411     }
412 
413     /* Allocate an array for storing previous values of the objective function. */
414     if (0 < param.past) {
415         pf = (lbfgsfloatval_t*)vecalloc((size_t) param.past * sizeof(lbfgsfloatval_t));
416     }
417 
418     /* Evaluate the function value and its gradient. */
419     fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
420     if (0. != param.orthantwise_c) {
421         /* Compute the L1 norm of the variable and add it to the object value. */
422         xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
423         fx += xnorm * param.orthantwise_c;
424         owlqn_pseudo_gradient(
425             pg, x, g, n,
426             param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
427             );
428     }
429 
430     /* Store the initial value of the objective function. */
431     if (pf != NULL) {
432         pf[0] = fx;
433     }
434 
435     /*
436         Compute the direction;
437         we assume the initial hessian matrix H_0 as the identity matrix.
438      */
439     if (param.orthantwise_c == 0.) {
440         vecncpy(d, g, n);
441     } else {
442         vecncpy(d, pg, n);
443     }
444 
445     /*
446        Make sure that the initial variables are not a minimizer.
447      */
448     vec2norm(&xnorm, x, n);
449     if (param.orthantwise_c == 0.) {
450         vec2norm(&gnorm, g, n);
451     } else {
452         vec2norm(&gnorm, pg, n);
453     }
454     if (xnorm < 1.0) xnorm = 1.0;
455     if (gnorm / xnorm <= param.epsilon) {
456         ret = LBFGS_ALREADY_MINIMIZED;
457         goto lbfgs_exit;
458     }
459 
460     /* Compute the initial step:
461         step = 1.0 / sqrt(vecdot(d, d, n))
462      */
463     vec2norminv(&step, d, n);
464 
465     k = 1;
466     end = 0;
467     for (;;) {
468         /* Store the current position and gradient vectors. */
469         veccpy(xp, x, n);
470         veccpy(gp, g, n);
471 
472         /* Search for an optimal step. */
473         if (param.orthantwise_c == 0.) {
474             ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, &param);
475         } else {
476             ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
477             owlqn_pseudo_gradient(
478                 pg, x, g, n,
479                 param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
480                 );
481         }
482         if (ls < 0) {
483             /* Revert to the previous point. */
484             veccpy(x, xp, n);
485             veccpy(g, gp, n);
486             ret = ls;
487             goto lbfgs_exit;
488         }
489 
490         /* Compute x and g norms. */
491         vec2norm(&xnorm, x, n);
492         if (param.orthantwise_c == 0.) {
493             vec2norm(&gnorm, g, n);
494         } else {
495             vec2norm(&gnorm, pg, n);
496         }
497 
498         /* Report the progress. */
499         if (cd.proc_progress) {
500             if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
501                 goto lbfgs_exit;
502             }
503         }
504 
505         /*
506             Convergence test.
507             The criterion is given by the following formula:
508                 |g(x)| / \max(1, |x|) < \epsilon
509          */
510         if (xnorm < 1.0) xnorm = 1.0;
511         if (gnorm / xnorm <= param.epsilon) {
512             /* Convergence. */
513             ret = LBFGS_SUCCESS;
514             break;
515         }
516 
517         /*
518             Test for stopping criterion.
519             The criterion is given by the following formula:
520                 (f(past_x) - f(x)) / f(x) < \delta
521          */
522         if (pf != NULL) {
523             /* We don't test the stopping criterion while k < past. */
524             if (param.past <= k) {
525                 /* Compute the relative improvement from the past. */
526                 rate = (pf[k % param.past] - fx) / fx;
527 
528                 /* The stopping criterion. */
529                 if (rate < param.delta) {
530                     ret = LBFGS_STOP;
531                     break;
532                 }
533             }
534 
535             /* Store the current value of the objective function. */
536             pf[k % param.past] = fx;
537         }
538 
539         if (param.max_iterations != 0 && param.max_iterations < k+1) {
540             /* Maximum number of iterations. */
541             ret = LBFGSERR_MAXIMUMITERATION;
542             break;
543         }
544 
545         /*
546             Update vectors s and y:
547                 s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
548                 y_{k+1} = g_{k+1} - g_{k}.
549          */
550         it = &lm[end];
551         vecdiff(it->s, x, xp, n);
552         vecdiff(it->y, g, gp, n);
553 
554         /*
555             Compute scalars ys and yy:
556                 ys = y^t \cdot s = 1 / \rho.
557                 yy = y^t \cdot y.
558             Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
559          */
560         vecdot(&ys, it->y, it->s, n);
561         vecdot(&yy, it->y, it->y, n);
562         it->ys = ys;
563 
564         /*
565             Recursive formula to compute dir = -(H \cdot g).
566                 This is described in page 779 of:
567                 Jorge Nocedal.
568                 Updating Quasi-Newton Matrices with Limited Storage.
569                 Mathematics of Computation, Vol. 35, No. 151,
570                 pp. 773--782, 1980.
571          */
572         bound = (m <= k) ? m : k;
573         ++k;
574         end = (end + 1) % m;
575 
576         /* Compute the steepest direction. */
577         if (param.orthantwise_c == 0.) {
578             /* Compute the negative of gradients. */
579             vecncpy(d, g, n);
580         } else {
581             vecncpy(d, pg, n);
582         }
583 
584         j = end;
585         for (i = 0;i < bound;++i) {
586             j = (j + m - 1) % m;    /* if (--j == -1) j = m-1; */
587             it = &lm[j];
588             /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
589             vecdot(&it->alpha, it->s, d, n);
590             it->alpha /= it->ys;
591             /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
592             vecadd(d, it->y, -it->alpha, n);
593         }
594 
595         vecscale(d, ys / yy, n);
596 
597         for (i = 0;i < bound;++i) {
598             it = &lm[j];
599             /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
600             vecdot(&beta, it->y, d, n);
601             beta /= it->ys;
602             /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
603             vecadd(d, it->s, it->alpha - beta, n);
604             j = (j + 1) % m;        /* if (++j == m) j = 0; */
605         }
606 
607         /*
608             Constrain the search direction for orthant-wise updates.
609          */
610         if (param.orthantwise_c != 0.) {
611             for (i = param.orthantwise_start;i < param.orthantwise_end;++i) {
612                 if (d[i] * pg[i] >= 0) {
613                     d[i] = 0;
614                 }
615             }
616         }
617 
618         /*
619             Now the search direction d is ready. We try step = 1 first.
620          */
621         step = 1.0;
622     }
623 
624 lbfgs_exit:
625     /* Return the final value of the objective function. */
626     if (ptr_fx != NULL) {
627         *ptr_fx = fx;
628     }
629 
630     vecfree(pf);
631 
632     /* Free memory blocks used by this function. */
633     if (lm != NULL) {
634         for (i = 0;i < m;++i) {
635             vecfree(lm[i].s);
636             vecfree(lm[i].y);
637         }
638         vecfree(lm);
639     }
640     vecfree(pg);
641     vecfree(w);
642     vecfree(d);
643     vecfree(gp);
644     vecfree(g);
645     vecfree(xp);
646 
647     return ret;
648 }
649 
650 
651 
line_search_backtracking(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wp,callback_data_t * cd,const lbfgs_parameter_t * param)652 static int line_search_backtracking(
653     int n,
654     lbfgsfloatval_t *x,
655     lbfgsfloatval_t *f,
656     lbfgsfloatval_t *g,
657     lbfgsfloatval_t *s,
658     lbfgsfloatval_t *stp,
659     const lbfgsfloatval_t* xp,
660     const lbfgsfloatval_t* gp,
661     lbfgsfloatval_t *wp,
662     callback_data_t *cd,
663     const lbfgs_parameter_t *param
664     )
665 {
666     int count = 0;
667     lbfgsfloatval_t width, dg;
668     lbfgsfloatval_t finit, dginit = 0., dgtest;
669     const lbfgsfloatval_t dec = 0.5, inc = 2.1;
670 
671     /* Check the input parameters for errors. */
672     if (*stp <= 0.) {
673         return LBFGSERR_INVALIDPARAMETERS;
674     }
675 
676     /* Compute the initial gradient in the search direction. */
677     vecdot(&dginit, g, s, n);
678 
679     /* Make sure that s points to a descent direction. */
680     if (0 < dginit) {
681         return LBFGSERR_INCREASEGRADIENT;
682     }
683 
684     /* The initial value of the objective function. */
685     finit = *f;
686     dgtest = param->ftol * dginit;
687 
688     for (;;) {
689         veccpy(x, xp, n);
690         vecadd(x, s, *stp, n);
691 
692         /* Evaluate the function and gradient values. */
693         *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
694 
695         ++count;
696 
697         if (*f > finit + *stp * dgtest) {
698             width = dec;
699         } else {
700             /* The sufficient decrease condition (Armijo condition). */
701             if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) {
702                 /* Exit with the Armijo condition. */
703                 return count;
704 	        }
705 
706 	        /* Check the Wolfe condition. */
707 	        vecdot(&dg, g, s, n);
708 	        if (dg < param->wolfe * dginit) {
709     		    width = inc;
710 	        } else {
711 		        if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
712 		            /* Exit with the regular Wolfe condition. */
713 		            return count;
714 		        }
715 
716 		        /* Check the strong Wolfe condition. */
717 		        if(dg > -param->wolfe * dginit) {
718 		            width = dec;
719 		        } else {
720 		            /* Exit with the strong Wolfe condition. */
721 		            return count;
722 		        }
723             }
724         }
725 
726         if (*stp < param->min_step) {
727             /* The step is the minimum value. */
728             return LBFGSERR_MINIMUMSTEP;
729         }
730         if (*stp > param->max_step) {
731             /* The step is the maximum value. */
732             return LBFGSERR_MAXIMUMSTEP;
733         }
734         if (param->max_linesearch <= count) {
735             /* Maximum number of iteration. */
736             return LBFGSERR_MAXIMUMLINESEARCH;
737         }
738 
739         (*stp) *= width;
740     }
741 }
742 
743 
744 
line_search_backtracking_owlqn(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wp,callback_data_t * cd,const lbfgs_parameter_t * param)745 static int line_search_backtracking_owlqn(
746     int n,
747     lbfgsfloatval_t *x,
748     lbfgsfloatval_t *f,
749     lbfgsfloatval_t *g,
750     lbfgsfloatval_t *s,
751     lbfgsfloatval_t *stp,
752     const lbfgsfloatval_t* xp,
753     const lbfgsfloatval_t* gp,
754     lbfgsfloatval_t *wp,
755     callback_data_t *cd,
756     const lbfgs_parameter_t *param
757     )
758 {
759     int i, count = 0;
760     lbfgsfloatval_t width = 0.5, norm = 0.;
761     lbfgsfloatval_t finit = *f, dgtest;
762 
763     /* Check the input parameters for errors. */
764     if (*stp <= 0.) {
765         return LBFGSERR_INVALIDPARAMETERS;
766     }
767 
768     /* Choose the orthant for the new point. */
769     for (i = 0;i < n;++i) {
770         wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i];
771     }
772 
773     for (;;) {
774         /* Update the current point. */
775         veccpy(x, xp, n);
776         vecadd(x, s, *stp, n);
777 
778         /* The current point is projected onto the orthant. */
779         owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end);
780 
781         /* Evaluate the function and gradient values. */
782         *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
783 
784         /* Compute the L1 norm of the variables and add it to the object value. */
785         norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end);
786         *f += norm * param->orthantwise_c;
787 
788         ++count;
789 
790         dgtest = 0.;
791         for (i = 0;i < n;++i) {
792             dgtest += (x[i] - xp[i]) * gp[i];
793         }
794 
795         if (*f <= finit + param->ftol * dgtest) {
796             /* The sufficient decrease condition. */
797             return count;
798         }
799 
800         if (*stp < param->min_step) {
801             /* The step is the minimum value. */
802             return LBFGSERR_MINIMUMSTEP;
803         }
804         if (*stp > param->max_step) {
805             /* The step is the maximum value. */
806             return LBFGSERR_MAXIMUMSTEP;
807         }
808         if (param->max_linesearch <= count) {
809             /* Maximum number of iteration. */
810             return LBFGSERR_MAXIMUMLINESEARCH;
811         }
812 
813         (*stp) *= width;
814     }
815 }
816 
817 
818 
line_search_morethuente(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wa,callback_data_t * cd,const lbfgs_parameter_t * param)819 static int line_search_morethuente(
820     int n,
821     lbfgsfloatval_t *x,
822     lbfgsfloatval_t *f,
823     lbfgsfloatval_t *g,
824     lbfgsfloatval_t *s,
825     lbfgsfloatval_t *stp,
826     const lbfgsfloatval_t* xp,
827     const lbfgsfloatval_t* gp,
828     lbfgsfloatval_t *wa,
829     callback_data_t *cd,
830     const lbfgs_parameter_t *param
831     )
832 {
833     int count = 0;
834     int brackt, stage1, uinfo = 0;
835     lbfgsfloatval_t dg;
836     lbfgsfloatval_t stx, fx, dgx;
837     lbfgsfloatval_t sty, fy, dgy;
838     lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm;
839     lbfgsfloatval_t finit, ftest1, dginit, dgtest;
840     lbfgsfloatval_t width, prev_width;
841     lbfgsfloatval_t stmin, stmax;
842 
843     /* Check the input parameters for errors. */
844     if (*stp <= 0.) {
845         return LBFGSERR_INVALIDPARAMETERS;
846     }
847 
848     /* Compute the initial gradient in the search direction. */
849     vecdot(&dginit, g, s, n);
850 
851     /* Make sure that s points to a descent direction. */
852     if (0 < dginit) {
853         return LBFGSERR_INCREASEGRADIENT;
854     }
855 
856     /* Initialize local variables. */
857     brackt = 0;
858     stage1 = 1;
859     finit = *f;
860     dgtest = param->ftol * dginit;
861     width = param->max_step - param->min_step;
862     prev_width = 2.0 * width;
863 
864     /*
865         The variables stx, fx, dgx contain the values of the step,
866         function, and directional derivative at the best step.
867         The variables sty, fy, dgy contain the value of the step,
868         function, and derivative at the other endpoint of
869         the interval of uncertainty.
870         The variables stp, f, dg contain the values of the step,
871         function, and derivative at the current step.
872     */
873     stx = sty = 0.;
874     fx = fy = finit;
875     dgx = dgy = dginit;
876 
877     for (;;) {
878         /*
879             Set the minimum and maximum steps to correspond to the
880             present interval of uncertainty.
881          */
882         if (brackt) {
883             stmin = min2(stx, sty);
884             stmax = max2(stx, sty);
885         } else {
886             stmin = stx;
887             stmax = *stp + 4.0 * (*stp - stx);
888         }
889 
890         /* Clip the step in the range of [stpmin, stpmax]. */
891         if (*stp < param->min_step) *stp = param->min_step;
892         if (param->max_step < *stp) *stp = param->max_step;
893 
894         /*
895             If an unusual termination is to occur then let
896             stp be the lowest point obtained so far.
897          */
898         if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) {
899             *stp = stx;
900         }
901 
902         /*
903             Compute the current value of x:
904                 x <- x + (*stp) * s.
905          */
906         veccpy(x, xp, n);
907         vecadd(x, s, *stp, n);
908 
909         /* Evaluate the function and gradient values. */
910         *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
911         vecdot(&dg, g, s, n);
912 
913         ftest1 = finit + *stp * dgtest;
914         ++count;
915 
916         /* Test for errors and convergence. */
917         if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) {
918             /* Rounding errors prevent further progress. */
919             return LBFGSERR_ROUNDING_ERROR;
920         }
921         if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) {
922             /* The step is the maximum value. */
923             return LBFGSERR_MAXIMUMSTEP;
924         }
925         if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) {
926             /* The step is the minimum value. */
927             return LBFGSERR_MINIMUMSTEP;
928         }
929         if (brackt && (stmax - stmin) <= param->xtol * stmax) {
930             /* Relative width of the interval of uncertainty is at most xtol. */
931             return LBFGSERR_WIDTHTOOSMALL;
932         }
933         if (param->max_linesearch <= count) {
934             /* Maximum number of iteration. */
935             return LBFGSERR_MAXIMUMLINESEARCH;
936         }
937         if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) {
938             /* The sufficient decrease condition and the directional derivative condition hold. */
939             return count;
940         }
941 
942         /*
943             In the first stage we seek a step for which the modified
944             function has a nonpositive value and nonnegative derivative.
945          */
946         if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) {
947             stage1 = 0;
948         }
949 
950         /*
951             A modified function is used to predict the step only if
952             we have not obtained a step for which the modified
953             function has a nonpositive function value and nonnegative
954             derivative, and if a lower function value has been
955             obtained but the decrease is not sufficient.
956          */
957         if (stage1 && ftest1 < *f && *f <= fx) {
958             /* Define the modified function and derivative values. */
959             fm = *f - *stp * dgtest;
960             fxm = fx - stx * dgtest;
961             fym = fy - sty * dgtest;
962             dgm = dg - dgtest;
963             dgxm = dgx - dgtest;
964             dgym = dgy - dgtest;
965 
966             /*
967                 Call update_trial_interval() to update the interval of
968                 uncertainty and to compute the new step.
969              */
970             uinfo = update_trial_interval(
971                 &stx, &fxm, &dgxm,
972                 &sty, &fym, &dgym,
973                 stp, &fm, &dgm,
974                 stmin, stmax, &brackt
975                 );
976 
977             /* Reset the function and gradient values for f. */
978             fx = fxm + stx * dgtest;
979             fy = fym + sty * dgtest;
980             dgx = dgxm + dgtest;
981             dgy = dgym + dgtest;
982         } else {
983             /*
984                 Call update_trial_interval() to update the interval of
985                 uncertainty and to compute the new step.
986              */
987             uinfo = update_trial_interval(
988                 &stx, &fx, &dgx,
989                 &sty, &fy, &dgy,
990                 stp, f, &dg,
991                 stmin, stmax, &brackt
992                 );
993         }
994 
995         /*
996             Force a sufficient decrease in the interval of uncertainty.
997          */
998         if (brackt) {
999             if (0.66 * prev_width <= fabs(sty - stx)) {
1000                 *stp = stx + 0.5 * (sty - stx);
1001             }
1002             prev_width = width;
1003             width = fabs(sty - stx);
1004         }
1005     }
1006 
1007     return LBFGSERR_LOGICERROR;
1008 }
1009 
1010 
1011 
1012 /**
1013  * Define the local variables for computing minimizers.
1014  */
1015 #define USES_MINIMIZER \
1016     lbfgsfloatval_t a, d, gamma, theta, p, q, r, s;
1017 
1018 /**
1019  * Find a minimizer of an interpolated cubic function.
1020  *  @param  cm      The minimizer of the interpolated cubic.
1021  *  @param  u       The value of one point, u.
1022  *  @param  fu      The value of f(u).
1023  *  @param  du      The value of f'(u).
1024  *  @param  v       The value of another point, v.
1025  *  @param  fv      The value of f(v).
1026  *  @param  du      The value of f'(v).
1027  */
1028 #define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \
1029     d = (v) - (u); \
1030     theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
1031     p = fabs(theta); \
1032     q = fabs(du); \
1033     r = fabs(dv); \
1034     s = max3(p, q, r); \
1035     /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
1036     a = theta / s; \
1037     gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \
1038     if ((v) < (u)) gamma = -gamma; \
1039     p = gamma - (du) + theta; \
1040     q = gamma - (du) + gamma + (dv); \
1041     r = p / q; \
1042     (cm) = (u) + r * d;
1043 
1044 /**
1045  * Find a minimizer of an interpolated cubic function.
1046  *  @param  cm      The minimizer of the interpolated cubic.
1047  *  @param  u       The value of one point, u.
1048  *  @param  fu      The value of f(u).
1049  *  @param  du      The value of f'(u).
1050  *  @param  v       The value of another point, v.
1051  *  @param  fv      The value of f(v).
1052  *  @param  du      The value of f'(v).
1053  *  @param  xmin    The maximum value.
1054  *  @param  xmin    The minimum value.
1055  */
1056 #define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \
1057     d = (v) - (u); \
1058     theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
1059     p = fabs(theta); \
1060     q = fabs(du); \
1061     r = fabs(dv); \
1062     s = max3(p, q, r); \
1063     /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
1064     a = theta / s; \
1065     gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \
1066     if ((u) < (v)) gamma = -gamma; \
1067     p = gamma - (dv) + theta; \
1068     q = gamma - (dv) + gamma + (du); \
1069     r = p / q; \
1070     if (r < 0. && gamma != 0.) { \
1071         (cm) = (v) - r * d; \
1072     } else if (a < 0) { \
1073         (cm) = (xmax); \
1074     } else { \
1075         (cm) = (xmin); \
1076     }
1077 
1078 /**
1079  * Find a minimizer of an interpolated quadratic function.
1080  *  @param  qm      The minimizer of the interpolated quadratic.
1081  *  @param  u       The value of one point, u.
1082  *  @param  fu      The value of f(u).
1083  *  @param  du      The value of f'(u).
1084  *  @param  v       The value of another point, v.
1085  *  @param  fv      The value of f(v).
1086  */
1087 #define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \
1088     a = (v) - (u); \
1089     (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a;
1090 
1091 /**
1092  * Find a minimizer of an interpolated quadratic function.
1093  *  @param  qm      The minimizer of the interpolated quadratic.
1094  *  @param  u       The value of one point, u.
1095  *  @param  du      The value of f'(u).
1096  *  @param  v       The value of another point, v.
1097  *  @param  dv      The value of f'(v).
1098  */
1099 #define QUARD_MINIMIZER2(qm, u, du, v, dv) \
1100     a = (u) - (v); \
1101     (qm) = (v) + (dv) / ((dv) - (du)) * a;
1102 
1103 /**
1104  * Update a safeguarded trial value and interval for line search.
1105  *
1106  *  The parameter x represents the step with the least function value.
1107  *  The parameter t represents the current step. This function assumes
1108  *  that the derivative at the point of x in the direction of the step.
1109  *  If the bracket is set to true, the minimizer has been bracketed in
1110  *  an interval of uncertainty with endpoints between x and y.
1111  *
1112  *  @param  x       The pointer to the value of one endpoint.
1113  *  @param  fx      The pointer to the value of f(x).
1114  *  @param  dx      The pointer to the value of f'(x).
1115  *  @param  y       The pointer to the value of another endpoint.
1116  *  @param  fy      The pointer to the value of f(y).
1117  *  @param  dy      The pointer to the value of f'(y).
1118  *  @param  t       The pointer to the value of the trial value, t.
1119  *  @param  ft      The pointer to the value of f(t).
1120  *  @param  dt      The pointer to the value of f'(t).
1121  *  @param  tmin    The minimum value for the trial value, t.
1122  *  @param  tmax    The maximum value for the trial value, t.
1123  *  @param  brackt  The pointer to the predicate if the trial value is
1124  *                  bracketed.
1125  *  @retval int     Status value. Zero indicates a normal termination.
1126  *
1127  *  @see
1128  *      Jorge J. More and David J. Thuente. Line search algorithm with
1129  *      guaranteed sufficient decrease. ACM Transactions on Mathematical
1130  *      Software (TOMS), Vol 20, No 3, pp. 286-307, 1994.
1131  */
update_trial_interval(lbfgsfloatval_t * x,lbfgsfloatval_t * fx,lbfgsfloatval_t * dx,lbfgsfloatval_t * y,lbfgsfloatval_t * fy,lbfgsfloatval_t * dy,lbfgsfloatval_t * t,lbfgsfloatval_t * ft,lbfgsfloatval_t * dt,const lbfgsfloatval_t tmin,const lbfgsfloatval_t tmax,int * brackt)1132 static int update_trial_interval(
1133     lbfgsfloatval_t *x,
1134     lbfgsfloatval_t *fx,
1135     lbfgsfloatval_t *dx,
1136     lbfgsfloatval_t *y,
1137     lbfgsfloatval_t *fy,
1138     lbfgsfloatval_t *dy,
1139     lbfgsfloatval_t *t,
1140     lbfgsfloatval_t *ft,
1141     lbfgsfloatval_t *dt,
1142     const lbfgsfloatval_t tmin,
1143     const lbfgsfloatval_t tmax,
1144     int *brackt
1145     )
1146 {
1147     int bound;
1148     int dsign = fsigndiff(dt, dx);
1149     lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */
1150     lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */
1151     lbfgsfloatval_t newt;   /* new trial value. */
1152     USES_MINIMIZER;     /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */
1153 
1154     /* Check the input parameters for errors. */
1155     if (*brackt) {
1156         if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) {
1157             /* The trival value t is out of the interval. */
1158             return LBFGSERR_OUTOFINTERVAL;
1159         }
1160         if (0. <= *dx * (*t - *x)) {
1161             /* The function must decrease from x. */
1162             return LBFGSERR_INCREASEGRADIENT;
1163         }
1164         if (tmax < tmin) {
1165             /* Incorrect tmin and tmax specified. */
1166             return LBFGSERR_INCORRECT_TMINMAX;
1167         }
1168     }
1169 
1170     /*
1171         Trial value selection.
1172      */
1173     if (*fx < *ft) {
1174         /*
1175             Case 1: a higher function value.
1176             The minimum is brackt. If the cubic minimizer is closer
1177             to x than the quadratic one, the cubic one is taken, else
1178             the average of the minimizers is taken.
1179          */
1180         *brackt = 1;
1181         bound = 1;
1182         CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
1183         QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
1184         if (fabs(mc - *x) < fabs(mq - *x)) {
1185             newt = mc;
1186         } else {
1187             newt = mc + 0.5 * (mq - mc);
1188         }
1189     } else if (dsign) {
1190         /*
1191             Case 2: a lower function value and derivatives of
1192             opposite sign. The minimum is brackt. If the cubic
1193             minimizer is closer to x than the quadratic (secant) one,
1194             the cubic one is taken, else the quadratic one is taken.
1195          */
1196         *brackt = 1;
1197         bound = 0;
1198         CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
1199         QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
1200         if (fabs(mc - *t) > fabs(mq - *t)) {
1201             newt = mc;
1202         } else {
1203             newt = mq;
1204         }
1205     } else if (fabs(*dt) < fabs(*dx)) {
1206         /*
1207             Case 3: a lower function value, derivatives of the
1208             same sign, and the magnitude of the derivative decreases.
1209             The cubic minimizer is only used if the cubic tends to
1210             infinity in the direction of the minimizer or if the minimum
1211             of the cubic is beyond t. Otherwise the cubic minimizer is
1212             defined to be either tmin or tmax. The quadratic (secant)
1213             minimizer is also computed and if the minimum is brackt
1214             then the the minimizer closest to x is taken, else the one
1215             farthest away is taken.
1216          */
1217         bound = 1;
1218         CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax);
1219         QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
1220         if (*brackt) {
1221             if (fabs(*t - mc) < fabs(*t - mq)) {
1222                 newt = mc;
1223             } else {
1224                 newt = mq;
1225             }
1226         } else {
1227             if (fabs(*t - mc) > fabs(*t - mq)) {
1228                 newt = mc;
1229             } else {
1230                 newt = mq;
1231             }
1232         }
1233     } else {
1234         /*
1235             Case 4: a lower function value, derivatives of the
1236             same sign, and the magnitude of the derivative does
1237             not decrease. If the minimum is not brackt, the step
1238             is either tmin or tmax, else the cubic minimizer is taken.
1239          */
1240         bound = 0;
1241         if (*brackt) {
1242             CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy);
1243         } else if (*x < *t) {
1244             newt = tmax;
1245         } else {
1246             newt = tmin;
1247         }
1248     }
1249 
1250     /*
1251         Update the interval of uncertainty. This update does not
1252         depend on the new step or the case analysis above.
1253 
1254         - Case a: if f(x) < f(t),
1255             x <- x, y <- t.
1256         - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0,
1257             x <- t, y <- y.
1258         - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0,
1259             x <- t, y <- x.
1260      */
1261     if (*fx < *ft) {
1262         /* Case a */
1263         *y = *t;
1264         *fy = *ft;
1265         *dy = *dt;
1266     } else {
1267         /* Case c */
1268         if (dsign) {
1269             *y = *x;
1270             *fy = *fx;
1271             *dy = *dx;
1272         }
1273         /* Cases b and c */
1274         *x = *t;
1275         *fx = *ft;
1276         *dx = *dt;
1277     }
1278 
1279     /* Clip the new trial value in [tmin, tmax]. */
1280     if (tmax < newt) newt = tmax;
1281     if (newt < tmin) newt = tmin;
1282 
1283     /*
1284         Redefine the new trial value if it is close to the upper bound
1285         of the interval.
1286      */
1287     if (*brackt && bound) {
1288         mq = *x + 0.66 * (*y - *x);
1289         if (*x < *y) {
1290             if (mq < newt) newt = mq;
1291         } else {
1292             if (newt < mq) newt = mq;
1293         }
1294     }
1295 
1296     /* Return the new trial value. */
1297     *t = newt;
1298     return 0;
1299 }
1300 
1301 
1302 
1303 
1304 
owlqn_x1norm(const lbfgsfloatval_t * x,const int start,const int n)1305 static lbfgsfloatval_t owlqn_x1norm(
1306     const lbfgsfloatval_t* x,
1307     const int start,
1308     const int n
1309     )
1310 {
1311     int i;
1312     lbfgsfloatval_t norm = 0.;
1313 
1314     for (i = start;i < n;++i) {
1315         norm += fabs(x[i]);
1316     }
1317 
1318     return norm;
1319 }
1320 
owlqn_pseudo_gradient(lbfgsfloatval_t * pg,const lbfgsfloatval_t * x,const lbfgsfloatval_t * g,const int n,const lbfgsfloatval_t c,const int start,const int end)1321 static void owlqn_pseudo_gradient(
1322     lbfgsfloatval_t* pg,
1323     const lbfgsfloatval_t* x,
1324     const lbfgsfloatval_t* g,
1325     const int n,
1326     const lbfgsfloatval_t c,
1327     const int start,
1328     const int end
1329     )
1330 {
1331     int i;
1332 
1333     /* Compute the negative of gradients. */
1334     for (i = 0;i < start;++i) {
1335         pg[i] = g[i];
1336     }
1337 
1338     /* Compute the psuedo-gradients. */
1339     for (i = start;i < end;++i) {
1340         if (x[i] < 0.) {
1341             /* Differentiable. */
1342             pg[i] = g[i] - c;
1343         } else if (0. < x[i]) {
1344             /* Differentiable. */
1345             pg[i] = g[i] + c;
1346         } else {
1347             if (g[i] < -c) {
1348                 /* Take the right partial derivative. */
1349                 pg[i] = g[i] + c;
1350             } else if (c < g[i]) {
1351                 /* Take the left partial derivative. */
1352                 pg[i] = g[i] - c;
1353             } else {
1354                 pg[i] = 0.;
1355             }
1356         }
1357     }
1358 
1359     for (i = end;i < n;++i) {
1360         pg[i] = g[i];
1361     }
1362 }
1363 
owlqn_project(lbfgsfloatval_t * d,const lbfgsfloatval_t * sign,const int start,const int end)1364 static void owlqn_project(
1365     lbfgsfloatval_t* d,
1366     const lbfgsfloatval_t* sign,
1367     const int start,
1368     const int end
1369     )
1370 {
1371     int i;
1372 
1373     for (i = start;i < end;++i) {
1374         if (d[i] * sign[i] <= 0) {
1375             d[i] = 0;
1376         }
1377     }
1378 }
1379