1 /*
2 * Limited memory BFGS (L-BFGS).
3 *
4 * Copyright (c) 1990, Jorge Nocedal
5 * Copyright (c) 2007-2010 Naoaki Okazaki
6 * All rights reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27 /* $Id: lbfgs.c 65 2010-01-29 12:19:16Z naoaki $ */
28
29 /*
30 This library is a C port of the FORTRAN implementation of Limited-memory
31 Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
32 The original FORTRAN source code is available at:
33 http://www.ece.northwestern.edu/~nocedal/lbfgs.html
34
35 The L-BFGS algorithm is described in:
36 - Jorge Nocedal.
37 Updating Quasi-Newton Matrices with Limited Storage.
38 <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.
39 - Dong C. Liu and Jorge Nocedal.
40 On the limited memory BFGS method for large scale optimization.
41 <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.
42
43 The line search algorithms used in this implementation are described in:
44 - John E. Dennis and Robert B. Schnabel.
45 <i>Numerical Methods for Unconstrained Optimization and Nonlinear
46 Equations</i>, Englewood Cliffs, 1983.
47 - Jorge J. More and David J. Thuente.
48 Line search algorithm with guaranteed sufficient decrease.
49 <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,
50 pp. 286-307, 1994.
51
52 This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)
53 method presented in:
54 - Galen Andrew and Jianfeng Gao.
55 Scalable training of L1-regularized log-linear models.
56 In <i>Proceedings of the 24th International Conference on Machine
57 Learning (ICML 2007)</i>, pp. 33-40, 2007.
58
59 I would like to thank the original author, Jorge Nocedal, who has been
60 distributing the effieicnt and explanatory implementation in an open source
61 licence.
62 */
63
64 #ifdef HAVE_CONFIG_H
65 #include "config.h"
66 #endif/*HAVE_CONFIG_H*/
67
68 #ifndef _MSC_VER
69 #include <stdint.h>
70 #endif
71
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <math.h>
75
76 #include "lbfgs.h"
77
78 #ifdef _MSC_VER
79 #define inline __inline
80 typedef unsigned int uint32_t;
81 #endif/*_MSC_VER*/
82
83 #if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
84 /* Use SSE2 optimization for 64bit double precision. */
85 #include "arithmetic_sse_double.h"
86
87 #elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
88 /* Use SSE optimization for 32bit float precision. */
89 #include "arithmetic_sse_float.h"
90
91 #else
92 /* No CPU specific optimization. */
93 #include "arithmetic_ansi.h"
94
95 #endif
96
97 #define min2(a, b) ((a) <= (b) ? (a) : (b))
98 #define max2(a, b) ((a) >= (b) ? (a) : (b))
99 #define max3(a, b, c) max2(max2((a), (b)), (c));
100
101 #define is_aligned(p, bytes) \
102 (((uintptr_t)(const void*)(p)) % (bytes) == 0)
103
104 struct tag_callback_data {
105 int n;
106 void *instance;
107 lbfgs_evaluate_t proc_evaluate;
108 lbfgs_progress_t proc_progress;
109 };
110 typedef struct tag_callback_data callback_data_t;
111
112 struct tag_iteration_data {
113 lbfgsfloatval_t alpha;
114 lbfgsfloatval_t *s; /* [n] */
115 lbfgsfloatval_t *y; /* [n] */
116 lbfgsfloatval_t ys; /* vecdot(y, s) */
117 };
118 typedef struct tag_iteration_data iteration_data_t;
119
120 static const lbfgs_parameter_t _defparam = {
121 6, 1e-5, 0, 1e-5,
122 0, LBFGS_LINESEARCH_DEFAULT, 40,
123 1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16,
124 0.0, 0, -1,
125 };
126
127 /* Forward function declarations. */
128
129 typedef int (*line_search_proc)(
130 int n,
131 lbfgsfloatval_t *x,
132 lbfgsfloatval_t *f,
133 lbfgsfloatval_t *g,
134 lbfgsfloatval_t *s,
135 lbfgsfloatval_t *stp,
136 const lbfgsfloatval_t* xp,
137 const lbfgsfloatval_t* gp,
138 lbfgsfloatval_t *wa,
139 callback_data_t *cd,
140 const lbfgs_parameter_t *param
141 );
142
143 static int line_search_backtracking(
144 int n,
145 lbfgsfloatval_t *x,
146 lbfgsfloatval_t *f,
147 lbfgsfloatval_t *g,
148 lbfgsfloatval_t *s,
149 lbfgsfloatval_t *stp,
150 const lbfgsfloatval_t* xp,
151 const lbfgsfloatval_t* gp,
152 lbfgsfloatval_t *wa,
153 callback_data_t *cd,
154 const lbfgs_parameter_t *param
155 );
156
157 static int line_search_backtracking_owlqn(
158 int n,
159 lbfgsfloatval_t *x,
160 lbfgsfloatval_t *f,
161 lbfgsfloatval_t *g,
162 lbfgsfloatval_t *s,
163 lbfgsfloatval_t *stp,
164 const lbfgsfloatval_t* xp,
165 const lbfgsfloatval_t* gp,
166 lbfgsfloatval_t *wp,
167 callback_data_t *cd,
168 const lbfgs_parameter_t *param
169 );
170
171 static int line_search_morethuente(
172 int n,
173 lbfgsfloatval_t *x,
174 lbfgsfloatval_t *f,
175 lbfgsfloatval_t *g,
176 lbfgsfloatval_t *s,
177 lbfgsfloatval_t *stp,
178 const lbfgsfloatval_t* xp,
179 const lbfgsfloatval_t* gp,
180 lbfgsfloatval_t *wa,
181 callback_data_t *cd,
182 const lbfgs_parameter_t *param
183 );
184
185 static int update_trial_interval(
186 lbfgsfloatval_t *x,
187 lbfgsfloatval_t *fx,
188 lbfgsfloatval_t *dx,
189 lbfgsfloatval_t *y,
190 lbfgsfloatval_t *fy,
191 lbfgsfloatval_t *dy,
192 lbfgsfloatval_t *t,
193 lbfgsfloatval_t *ft,
194 lbfgsfloatval_t *dt,
195 const lbfgsfloatval_t tmin,
196 const lbfgsfloatval_t tmax,
197 int *brackt
198 );
199
200 static lbfgsfloatval_t owlqn_x1norm(
201 const lbfgsfloatval_t* x,
202 const int start,
203 const int n
204 );
205
206 static void owlqn_pseudo_gradient(
207 lbfgsfloatval_t* pg,
208 const lbfgsfloatval_t* x,
209 const lbfgsfloatval_t* g,
210 const int n,
211 const lbfgsfloatval_t c,
212 const int start,
213 const int end
214 );
215
216 static void owlqn_project(
217 lbfgsfloatval_t* d,
218 const lbfgsfloatval_t* sign,
219 const int start,
220 const int end
221 );
222
223
224 #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
round_out_variables(int n)225 static int round_out_variables(int n)
226 {
227 n += 7;
228 n /= 8;
229 n *= 8;
230 return n;
231 }
232 #endif/*defined(USE_SSE)*/
233
lbfgs_malloc(int n)234 lbfgsfloatval_t* lbfgs_malloc(int n)
235 {
236 #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
237 n = round_out_variables(n);
238 #endif/*defined(USE_SSE)*/
239 return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * (size_t) n);
240 }
241
lbfgs_free(lbfgsfloatval_t * x)242 void lbfgs_free(lbfgsfloatval_t *x)
243 {
244 vecfree(x);
245 }
246
lbfgs_parameter_init(lbfgs_parameter_t * param)247 void lbfgs_parameter_init(lbfgs_parameter_t *param)
248 {
249 memcpy(param, &_defparam, sizeof(*param));
250 }
251
lbfgs(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * ptr_fx,lbfgs_evaluate_t proc_evaluate,lbfgs_progress_t proc_progress,void * instance,lbfgs_parameter_t * _param)252 int lbfgs(
253 int n,
254 lbfgsfloatval_t *x,
255 lbfgsfloatval_t *ptr_fx,
256 lbfgs_evaluate_t proc_evaluate,
257 lbfgs_progress_t proc_progress,
258 void *instance,
259 lbfgs_parameter_t *_param
260 )
261 {
262 int ret;
263 int i, j, k, ls, end, bound;
264 lbfgsfloatval_t step;
265
266 /* Constant parameters and their default values. */
267 lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
268 const int m = param.m;
269
270 lbfgsfloatval_t *xp = NULL;
271 lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;
272 lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;
273 iteration_data_t *lm = NULL, *it = NULL;
274 lbfgsfloatval_t ys, yy;
275 lbfgsfloatval_t xnorm, gnorm, beta;
276 lbfgsfloatval_t fx = 0.;
277 lbfgsfloatval_t rate = 0.;
278 line_search_proc linesearch = line_search_morethuente;
279
280 /* Construct a callback data. */
281 callback_data_t cd;
282 cd.n = n;
283 cd.instance = instance;
284 cd.proc_evaluate = proc_evaluate;
285 cd.proc_progress = proc_progress;
286
287 #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
288 /* Round out the number of variables. */
289 n = round_out_variables(n);
290 #endif/*defined(USE_SSE)*/
291
292 /* Check the input parameters for errors. */
293 if (n <= 0) {
294 return LBFGSERR_INVALID_N;
295 }
296 #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
297 if (n % 8 != 0) {
298 return LBFGSERR_INVALID_N_SSE;
299 }
300 if (!is_aligned(x, 16)) {
301 return LBFGSERR_INVALID_X_SSE;
302 }
303 #endif/*defined(USE_SSE)*/
304 if (param.epsilon < 0.) {
305 return LBFGSERR_INVALID_EPSILON;
306 }
307 if (param.past < 0) {
308 return LBFGSERR_INVALID_TESTPERIOD;
309 }
310 if (param.delta < 0.) {
311 return LBFGSERR_INVALID_DELTA;
312 }
313 if (param.min_step < 0.) {
314 return LBFGSERR_INVALID_MINSTEP;
315 }
316 if (param.max_step < param.min_step) {
317 return LBFGSERR_INVALID_MAXSTEP;
318 }
319 if (param.ftol < 0.) {
320 return LBFGSERR_INVALID_FTOL;
321 }
322 if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE ||
323 param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
324 if (param.wolfe <= param.ftol || 1. <= param.wolfe) {
325 return LBFGSERR_INVALID_WOLFE;
326 }
327 }
328 if (param.gtol < 0.) {
329 return LBFGSERR_INVALID_GTOL;
330 }
331 if (param.xtol < 0.) {
332 return LBFGSERR_INVALID_XTOL;
333 }
334 if (param.max_linesearch <= 0) {
335 return LBFGSERR_INVALID_MAXLINESEARCH;
336 }
337 if (param.orthantwise_c < 0.) {
338 return LBFGSERR_INVALID_ORTHANTWISE;
339 }
340 if (param.orthantwise_start < 0 || n < param.orthantwise_start) {
341 return LBFGSERR_INVALID_ORTHANTWISE_START;
342 }
343 if (param.orthantwise_end < 0) {
344 param.orthantwise_end = n;
345 }
346 if (n < param.orthantwise_end) {
347 return LBFGSERR_INVALID_ORTHANTWISE_END;
348 }
349 if (param.orthantwise_c != 0.) {
350 switch (param.linesearch) {
351 case LBFGS_LINESEARCH_BACKTRACKING:
352 linesearch = line_search_backtracking_owlqn;
353 break;
354 default:
355 /* Only the backtracking method is available. */
356 return LBFGSERR_INVALID_LINESEARCH;
357 }
358 } else {
359 switch (param.linesearch) {
360 case LBFGS_LINESEARCH_MORETHUENTE:
361 linesearch = line_search_morethuente;
362 break;
363 case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO:
364 case LBFGS_LINESEARCH_BACKTRACKING_WOLFE:
365 case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE:
366 linesearch = line_search_backtracking;
367 break;
368 default:
369 return LBFGSERR_INVALID_LINESEARCH;
370 }
371 }
372
373 /* Allocate working space. */
374 xp = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
375 g = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
376 gp = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
377 d = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
378 w = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
379 if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {
380 ret = LBFGSERR_OUTOFMEMORY;
381 goto lbfgs_exit;
382 }
383
384 if (param.orthantwise_c != 0.) {
385 /* Allocate working space for OW-LQN. */
386 pg = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
387 if (pg == NULL) {
388 ret = LBFGSERR_OUTOFMEMORY;
389 goto lbfgs_exit;
390 }
391 }
392
393 /* Allocate limited memory storage. */
394 lm = (iteration_data_t*)vecalloc((size_t) m * sizeof(iteration_data_t));
395 if (lm == NULL) {
396 ret = LBFGSERR_OUTOFMEMORY;
397 goto lbfgs_exit;
398 }
399
400 /* Initialize the limited memory. */
401 for (i = 0;i < m;++i) {
402 it = &lm[i];
403 it->alpha = 0;
404 it->ys = 0;
405 it->s = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
406 it->y = (lbfgsfloatval_t*)vecalloc((size_t) n * sizeof(lbfgsfloatval_t));
407 if (it->s == NULL || it->y == NULL) {
408 ret = LBFGSERR_OUTOFMEMORY;
409 goto lbfgs_exit;
410 }
411 }
412
413 /* Allocate an array for storing previous values of the objective function. */
414 if (0 < param.past) {
415 pf = (lbfgsfloatval_t*)vecalloc((size_t) param.past * sizeof(lbfgsfloatval_t));
416 }
417
418 /* Evaluate the function value and its gradient. */
419 fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
420 if (0. != param.orthantwise_c) {
421 /* Compute the L1 norm of the variable and add it to the object value. */
422 xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
423 fx += xnorm * param.orthantwise_c;
424 owlqn_pseudo_gradient(
425 pg, x, g, n,
426 param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
427 );
428 }
429
430 /* Store the initial value of the objective function. */
431 if (pf != NULL) {
432 pf[0] = fx;
433 }
434
435 /*
436 Compute the direction;
437 we assume the initial hessian matrix H_0 as the identity matrix.
438 */
439 if (param.orthantwise_c == 0.) {
440 vecncpy(d, g, n);
441 } else {
442 vecncpy(d, pg, n);
443 }
444
445 /*
446 Make sure that the initial variables are not a minimizer.
447 */
448 vec2norm(&xnorm, x, n);
449 if (param.orthantwise_c == 0.) {
450 vec2norm(&gnorm, g, n);
451 } else {
452 vec2norm(&gnorm, pg, n);
453 }
454 if (xnorm < 1.0) xnorm = 1.0;
455 if (gnorm / xnorm <= param.epsilon) {
456 ret = LBFGS_ALREADY_MINIMIZED;
457 goto lbfgs_exit;
458 }
459
460 /* Compute the initial step:
461 step = 1.0 / sqrt(vecdot(d, d, n))
462 */
463 vec2norminv(&step, d, n);
464
465 k = 1;
466 end = 0;
467 for (;;) {
468 /* Store the current position and gradient vectors. */
469 veccpy(xp, x, n);
470 veccpy(gp, g, n);
471
472 /* Search for an optimal step. */
473 if (param.orthantwise_c == 0.) {
474 ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m);
475 } else {
476 ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m);
477 owlqn_pseudo_gradient(
478 pg, x, g, n,
479 param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
480 );
481 }
482 if (ls < 0) {
483 /* Revert to the previous point. */
484 veccpy(x, xp, n);
485 veccpy(g, gp, n);
486 ret = ls;
487 goto lbfgs_exit;
488 }
489
490 /* Compute x and g norms. */
491 vec2norm(&xnorm, x, n);
492 if (param.orthantwise_c == 0.) {
493 vec2norm(&gnorm, g, n);
494 } else {
495 vec2norm(&gnorm, pg, n);
496 }
497
498 /* Report the progress. */
499 if (cd.proc_progress) {
500 if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
501 goto lbfgs_exit;
502 }
503 }
504
505 /*
506 Convergence test.
507 The criterion is given by the following formula:
508 |g(x)| / \max(1, |x|) < \epsilon
509 */
510 if (xnorm < 1.0) xnorm = 1.0;
511 if (gnorm / xnorm <= param.epsilon) {
512 /* Convergence. */
513 ret = LBFGS_SUCCESS;
514 break;
515 }
516
517 /*
518 Test for stopping criterion.
519 The criterion is given by the following formula:
520 (f(past_x) - f(x)) / f(x) < \delta
521 */
522 if (pf != NULL) {
523 /* We don't test the stopping criterion while k < past. */
524 if (param.past <= k) {
525 /* Compute the relative improvement from the past. */
526 rate = (pf[k % param.past] - fx) / fx;
527
528 /* The stopping criterion. */
529 if (rate < param.delta) {
530 ret = LBFGS_STOP;
531 break;
532 }
533 }
534
535 /* Store the current value of the objective function. */
536 pf[k % param.past] = fx;
537 }
538
539 if (param.max_iterations != 0 && param.max_iterations < k+1) {
540 /* Maximum number of iterations. */
541 ret = LBFGSERR_MAXIMUMITERATION;
542 break;
543 }
544
545 /*
546 Update vectors s and y:
547 s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
548 y_{k+1} = g_{k+1} - g_{k}.
549 */
550 it = &lm[end];
551 vecdiff(it->s, x, xp, n);
552 vecdiff(it->y, g, gp, n);
553
554 /*
555 Compute scalars ys and yy:
556 ys = y^t \cdot s = 1 / \rho.
557 yy = y^t \cdot y.
558 Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
559 */
560 vecdot(&ys, it->y, it->s, n);
561 vecdot(&yy, it->y, it->y, n);
562 it->ys = ys;
563
564 /*
565 Recursive formula to compute dir = -(H \cdot g).
566 This is described in page 779 of:
567 Jorge Nocedal.
568 Updating Quasi-Newton Matrices with Limited Storage.
569 Mathematics of Computation, Vol. 35, No. 151,
570 pp. 773--782, 1980.
571 */
572 bound = (m <= k) ? m : k;
573 ++k;
574 end = (end + 1) % m;
575
576 /* Compute the steepest direction. */
577 if (param.orthantwise_c == 0.) {
578 /* Compute the negative of gradients. */
579 vecncpy(d, g, n);
580 } else {
581 vecncpy(d, pg, n);
582 }
583
584 j = end;
585 for (i = 0;i < bound;++i) {
586 j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
587 it = &lm[j];
588 /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
589 vecdot(&it->alpha, it->s, d, n);
590 it->alpha /= it->ys;
591 /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
592 vecadd(d, it->y, -it->alpha, n);
593 }
594
595 vecscale(d, ys / yy, n);
596
597 for (i = 0;i < bound;++i) {
598 it = &lm[j];
599 /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */
600 vecdot(&beta, it->y, d, n);
601 beta /= it->ys;
602 /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
603 vecadd(d, it->s, it->alpha - beta, n);
604 j = (j + 1) % m; /* if (++j == m) j = 0; */
605 }
606
607 /*
608 Constrain the search direction for orthant-wise updates.
609 */
610 if (param.orthantwise_c != 0.) {
611 for (i = param.orthantwise_start;i < param.orthantwise_end;++i) {
612 if (d[i] * pg[i] >= 0) {
613 d[i] = 0;
614 }
615 }
616 }
617
618 /*
619 Now the search direction d is ready. We try step = 1 first.
620 */
621 step = 1.0;
622 }
623
624 lbfgs_exit:
625 /* Return the final value of the objective function. */
626 if (ptr_fx != NULL) {
627 *ptr_fx = fx;
628 }
629
630 vecfree(pf);
631
632 /* Free memory blocks used by this function. */
633 if (lm != NULL) {
634 for (i = 0;i < m;++i) {
635 vecfree(lm[i].s);
636 vecfree(lm[i].y);
637 }
638 vecfree(lm);
639 }
640 vecfree(pg);
641 vecfree(w);
642 vecfree(d);
643 vecfree(gp);
644 vecfree(g);
645 vecfree(xp);
646
647 return ret;
648 }
649
650
651
line_search_backtracking(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wp,callback_data_t * cd,const lbfgs_parameter_t * param)652 static int line_search_backtracking(
653 int n,
654 lbfgsfloatval_t *x,
655 lbfgsfloatval_t *f,
656 lbfgsfloatval_t *g,
657 lbfgsfloatval_t *s,
658 lbfgsfloatval_t *stp,
659 const lbfgsfloatval_t* xp,
660 const lbfgsfloatval_t* gp,
661 lbfgsfloatval_t *wp,
662 callback_data_t *cd,
663 const lbfgs_parameter_t *param
664 )
665 {
666 int count = 0;
667 lbfgsfloatval_t width, dg;
668 lbfgsfloatval_t finit, dginit = 0., dgtest;
669 const lbfgsfloatval_t dec = 0.5, inc = 2.1;
670
671 /* Check the input parameters for errors. */
672 if (*stp <= 0.) {
673 return LBFGSERR_INVALIDPARAMETERS;
674 }
675
676 /* Compute the initial gradient in the search direction. */
677 vecdot(&dginit, g, s, n);
678
679 /* Make sure that s points to a descent direction. */
680 if (0 < dginit) {
681 return LBFGSERR_INCREASEGRADIENT;
682 }
683
684 /* The initial value of the objective function. */
685 finit = *f;
686 dgtest = param->ftol * dginit;
687
688 for (;;) {
689 veccpy(x, xp, n);
690 vecadd(x, s, *stp, n);
691
692 /* Evaluate the function and gradient values. */
693 *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
694
695 ++count;
696
697 if (*f > finit + *stp * dgtest) {
698 width = dec;
699 } else {
700 /* The sufficient decrease condition (Armijo condition). */
701 if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) {
702 /* Exit with the Armijo condition. */
703 return count;
704 }
705
706 /* Check the Wolfe condition. */
707 vecdot(&dg, g, s, n);
708 if (dg < param->wolfe * dginit) {
709 width = inc;
710 } else {
711 if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
712 /* Exit with the regular Wolfe condition. */
713 return count;
714 }
715
716 /* Check the strong Wolfe condition. */
717 if(dg > -param->wolfe * dginit) {
718 width = dec;
719 } else {
720 /* Exit with the strong Wolfe condition. */
721 return count;
722 }
723 }
724 }
725
726 if (*stp < param->min_step) {
727 /* The step is the minimum value. */
728 return LBFGSERR_MINIMUMSTEP;
729 }
730 if (*stp > param->max_step) {
731 /* The step is the maximum value. */
732 return LBFGSERR_MAXIMUMSTEP;
733 }
734 if (param->max_linesearch <= count) {
735 /* Maximum number of iteration. */
736 return LBFGSERR_MAXIMUMLINESEARCH;
737 }
738
739 (*stp) *= width;
740 }
741 }
742
743
744
line_search_backtracking_owlqn(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wp,callback_data_t * cd,const lbfgs_parameter_t * param)745 static int line_search_backtracking_owlqn(
746 int n,
747 lbfgsfloatval_t *x,
748 lbfgsfloatval_t *f,
749 lbfgsfloatval_t *g,
750 lbfgsfloatval_t *s,
751 lbfgsfloatval_t *stp,
752 const lbfgsfloatval_t* xp,
753 const lbfgsfloatval_t* gp,
754 lbfgsfloatval_t *wp,
755 callback_data_t *cd,
756 const lbfgs_parameter_t *param
757 )
758 {
759 int i, count = 0;
760 lbfgsfloatval_t width = 0.5, norm = 0.;
761 lbfgsfloatval_t finit = *f, dgtest;
762
763 /* Check the input parameters for errors. */
764 if (*stp <= 0.) {
765 return LBFGSERR_INVALIDPARAMETERS;
766 }
767
768 /* Choose the orthant for the new point. */
769 for (i = 0;i < n;++i) {
770 wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i];
771 }
772
773 for (;;) {
774 /* Update the current point. */
775 veccpy(x, xp, n);
776 vecadd(x, s, *stp, n);
777
778 /* The current point is projected onto the orthant. */
779 owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end);
780
781 /* Evaluate the function and gradient values. */
782 *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
783
784 /* Compute the L1 norm of the variables and add it to the object value. */
785 norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end);
786 *f += norm * param->orthantwise_c;
787
788 ++count;
789
790 dgtest = 0.;
791 for (i = 0;i < n;++i) {
792 dgtest += (x[i] - xp[i]) * gp[i];
793 }
794
795 if (*f <= finit + param->ftol * dgtest) {
796 /* The sufficient decrease condition. */
797 return count;
798 }
799
800 if (*stp < param->min_step) {
801 /* The step is the minimum value. */
802 return LBFGSERR_MINIMUMSTEP;
803 }
804 if (*stp > param->max_step) {
805 /* The step is the maximum value. */
806 return LBFGSERR_MAXIMUMSTEP;
807 }
808 if (param->max_linesearch <= count) {
809 /* Maximum number of iteration. */
810 return LBFGSERR_MAXIMUMLINESEARCH;
811 }
812
813 (*stp) *= width;
814 }
815 }
816
817
818
line_search_morethuente(int n,lbfgsfloatval_t * x,lbfgsfloatval_t * f,lbfgsfloatval_t * g,lbfgsfloatval_t * s,lbfgsfloatval_t * stp,const lbfgsfloatval_t * xp,const lbfgsfloatval_t * gp,lbfgsfloatval_t * wa,callback_data_t * cd,const lbfgs_parameter_t * param)819 static int line_search_morethuente(
820 int n,
821 lbfgsfloatval_t *x,
822 lbfgsfloatval_t *f,
823 lbfgsfloatval_t *g,
824 lbfgsfloatval_t *s,
825 lbfgsfloatval_t *stp,
826 const lbfgsfloatval_t* xp,
827 const lbfgsfloatval_t* gp,
828 lbfgsfloatval_t *wa,
829 callback_data_t *cd,
830 const lbfgs_parameter_t *param
831 )
832 {
833 int count = 0;
834 int brackt, stage1, uinfo = 0;
835 lbfgsfloatval_t dg;
836 lbfgsfloatval_t stx, fx, dgx;
837 lbfgsfloatval_t sty, fy, dgy;
838 lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm;
839 lbfgsfloatval_t finit, ftest1, dginit, dgtest;
840 lbfgsfloatval_t width, prev_width;
841 lbfgsfloatval_t stmin, stmax;
842
843 /* Check the input parameters for errors. */
844 if (*stp <= 0.) {
845 return LBFGSERR_INVALIDPARAMETERS;
846 }
847
848 /* Compute the initial gradient in the search direction. */
849 vecdot(&dginit, g, s, n);
850
851 /* Make sure that s points to a descent direction. */
852 if (0 < dginit) {
853 return LBFGSERR_INCREASEGRADIENT;
854 }
855
856 /* Initialize local variables. */
857 brackt = 0;
858 stage1 = 1;
859 finit = *f;
860 dgtest = param->ftol * dginit;
861 width = param->max_step - param->min_step;
862 prev_width = 2.0 * width;
863
864 /*
865 The variables stx, fx, dgx contain the values of the step,
866 function, and directional derivative at the best step.
867 The variables sty, fy, dgy contain the value of the step,
868 function, and derivative at the other endpoint of
869 the interval of uncertainty.
870 The variables stp, f, dg contain the values of the step,
871 function, and derivative at the current step.
872 */
873 stx = sty = 0.;
874 fx = fy = finit;
875 dgx = dgy = dginit;
876
877 for (;;) {
878 /*
879 Set the minimum and maximum steps to correspond to the
880 present interval of uncertainty.
881 */
882 if (brackt) {
883 stmin = min2(stx, sty);
884 stmax = max2(stx, sty);
885 } else {
886 stmin = stx;
887 stmax = *stp + 4.0 * (*stp - stx);
888 }
889
890 /* Clip the step in the range of [stpmin, stpmax]. */
891 if (*stp < param->min_step) *stp = param->min_step;
892 if (param->max_step < *stp) *stp = param->max_step;
893
894 /*
895 If an unusual termination is to occur then let
896 stp be the lowest point obtained so far.
897 */
898 if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) {
899 *stp = stx;
900 }
901
902 /*
903 Compute the current value of x:
904 x <- x + (*stp) * s.
905 */
906 veccpy(x, xp, n);
907 vecadd(x, s, *stp, n);
908
909 /* Evaluate the function and gradient values. */
910 *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
911 vecdot(&dg, g, s, n);
912
913 ftest1 = finit + *stp * dgtest;
914 ++count;
915
916 /* Test for errors and convergence. */
917 if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) {
918 /* Rounding errors prevent further progress. */
919 return LBFGSERR_ROUNDING_ERROR;
920 }
921 if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) {
922 /* The step is the maximum value. */
923 return LBFGSERR_MAXIMUMSTEP;
924 }
925 if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) {
926 /* The step is the minimum value. */
927 return LBFGSERR_MINIMUMSTEP;
928 }
929 if (brackt && (stmax - stmin) <= param->xtol * stmax) {
930 /* Relative width of the interval of uncertainty is at most xtol. */
931 return LBFGSERR_WIDTHTOOSMALL;
932 }
933 if (param->max_linesearch <= count) {
934 /* Maximum number of iteration. */
935 return LBFGSERR_MAXIMUMLINESEARCH;
936 }
937 if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) {
938 /* The sufficient decrease condition and the directional derivative condition hold. */
939 return count;
940 }
941
942 /*
943 In the first stage we seek a step for which the modified
944 function has a nonpositive value and nonnegative derivative.
945 */
946 if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) {
947 stage1 = 0;
948 }
949
950 /*
951 A modified function is used to predict the step only if
952 we have not obtained a step for which the modified
953 function has a nonpositive function value and nonnegative
954 derivative, and if a lower function value has been
955 obtained but the decrease is not sufficient.
956 */
957 if (stage1 && ftest1 < *f && *f <= fx) {
958 /* Define the modified function and derivative values. */
959 fm = *f - *stp * dgtest;
960 fxm = fx - stx * dgtest;
961 fym = fy - sty * dgtest;
962 dgm = dg - dgtest;
963 dgxm = dgx - dgtest;
964 dgym = dgy - dgtest;
965
966 /*
967 Call update_trial_interval() to update the interval of
968 uncertainty and to compute the new step.
969 */
970 uinfo = update_trial_interval(
971 &stx, &fxm, &dgxm,
972 &sty, &fym, &dgym,
973 stp, &fm, &dgm,
974 stmin, stmax, &brackt
975 );
976
977 /* Reset the function and gradient values for f. */
978 fx = fxm + stx * dgtest;
979 fy = fym + sty * dgtest;
980 dgx = dgxm + dgtest;
981 dgy = dgym + dgtest;
982 } else {
983 /*
984 Call update_trial_interval() to update the interval of
985 uncertainty and to compute the new step.
986 */
987 uinfo = update_trial_interval(
988 &stx, &fx, &dgx,
989 &sty, &fy, &dgy,
990 stp, f, &dg,
991 stmin, stmax, &brackt
992 );
993 }
994
995 /*
996 Force a sufficient decrease in the interval of uncertainty.
997 */
998 if (brackt) {
999 if (0.66 * prev_width <= fabs(sty - stx)) {
1000 *stp = stx + 0.5 * (sty - stx);
1001 }
1002 prev_width = width;
1003 width = fabs(sty - stx);
1004 }
1005 }
1006
1007 return LBFGSERR_LOGICERROR;
1008 }
1009
1010
1011
1012 /**
1013 * Define the local variables for computing minimizers.
1014 */
1015 #define USES_MINIMIZER \
1016 lbfgsfloatval_t a, d, gamma, theta, p, q, r, s;
1017
1018 /**
1019 * Find a minimizer of an interpolated cubic function.
1020 * @param cm The minimizer of the interpolated cubic.
1021 * @param u The value of one point, u.
1022 * @param fu The value of f(u).
1023 * @param du The value of f'(u).
1024 * @param v The value of another point, v.
1025 * @param fv The value of f(v).
1026 * @param du The value of f'(v).
1027 */
1028 #define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \
1029 d = (v) - (u); \
1030 theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
1031 p = fabs(theta); \
1032 q = fabs(du); \
1033 r = fabs(dv); \
1034 s = max3(p, q, r); \
1035 /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
1036 a = theta / s; \
1037 gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \
1038 if ((v) < (u)) gamma = -gamma; \
1039 p = gamma - (du) + theta; \
1040 q = gamma - (du) + gamma + (dv); \
1041 r = p / q; \
1042 (cm) = (u) + r * d;
1043
1044 /**
1045 * Find a minimizer of an interpolated cubic function.
1046 * @param cm The minimizer of the interpolated cubic.
1047 * @param u The value of one point, u.
1048 * @param fu The value of f(u).
1049 * @param du The value of f'(u).
1050 * @param v The value of another point, v.
1051 * @param fv The value of f(v).
1052 * @param du The value of f'(v).
1053 * @param xmin The maximum value.
1054 * @param xmin The minimum value.
1055 */
1056 #define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \
1057 d = (v) - (u); \
1058 theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \
1059 p = fabs(theta); \
1060 q = fabs(du); \
1061 r = fabs(dv); \
1062 s = max3(p, q, r); \
1063 /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \
1064 a = theta / s; \
1065 gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \
1066 if ((u) < (v)) gamma = -gamma; \
1067 p = gamma - (dv) + theta; \
1068 q = gamma - (dv) + gamma + (du); \
1069 r = p / q; \
1070 if (r < 0. && gamma != 0.) { \
1071 (cm) = (v) - r * d; \
1072 } else if (a < 0) { \
1073 (cm) = (xmax); \
1074 } else { \
1075 (cm) = (xmin); \
1076 }
1077
1078 /**
1079 * Find a minimizer of an interpolated quadratic function.
1080 * @param qm The minimizer of the interpolated quadratic.
1081 * @param u The value of one point, u.
1082 * @param fu The value of f(u).
1083 * @param du The value of f'(u).
1084 * @param v The value of another point, v.
1085 * @param fv The value of f(v).
1086 */
1087 #define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \
1088 a = (v) - (u); \
1089 (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a;
1090
1091 /**
1092 * Find a minimizer of an interpolated quadratic function.
1093 * @param qm The minimizer of the interpolated quadratic.
1094 * @param u The value of one point, u.
1095 * @param du The value of f'(u).
1096 * @param v The value of another point, v.
1097 * @param dv The value of f'(v).
1098 */
1099 #define QUARD_MINIMIZER2(qm, u, du, v, dv) \
1100 a = (u) - (v); \
1101 (qm) = (v) + (dv) / ((dv) - (du)) * a;
1102
1103 /**
1104 * Update a safeguarded trial value and interval for line search.
1105 *
1106 * The parameter x represents the step with the least function value.
1107 * The parameter t represents the current step. This function assumes
1108 * that the derivative at the point of x in the direction of the step.
1109 * If the bracket is set to true, the minimizer has been bracketed in
1110 * an interval of uncertainty with endpoints between x and y.
1111 *
1112 * @param x The pointer to the value of one endpoint.
1113 * @param fx The pointer to the value of f(x).
1114 * @param dx The pointer to the value of f'(x).
1115 * @param y The pointer to the value of another endpoint.
1116 * @param fy The pointer to the value of f(y).
1117 * @param dy The pointer to the value of f'(y).
1118 * @param t The pointer to the value of the trial value, t.
1119 * @param ft The pointer to the value of f(t).
1120 * @param dt The pointer to the value of f'(t).
1121 * @param tmin The minimum value for the trial value, t.
1122 * @param tmax The maximum value for the trial value, t.
1123 * @param brackt The pointer to the predicate if the trial value is
1124 * bracketed.
1125 * @retval int Status value. Zero indicates a normal termination.
1126 *
1127 * @see
1128 * Jorge J. More and David J. Thuente. Line search algorithm with
1129 * guaranteed sufficient decrease. ACM Transactions on Mathematical
1130 * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994.
1131 */
update_trial_interval(lbfgsfloatval_t * x,lbfgsfloatval_t * fx,lbfgsfloatval_t * dx,lbfgsfloatval_t * y,lbfgsfloatval_t * fy,lbfgsfloatval_t * dy,lbfgsfloatval_t * t,lbfgsfloatval_t * ft,lbfgsfloatval_t * dt,const lbfgsfloatval_t tmin,const lbfgsfloatval_t tmax,int * brackt)1132 static int update_trial_interval(
1133 lbfgsfloatval_t *x,
1134 lbfgsfloatval_t *fx,
1135 lbfgsfloatval_t *dx,
1136 lbfgsfloatval_t *y,
1137 lbfgsfloatval_t *fy,
1138 lbfgsfloatval_t *dy,
1139 lbfgsfloatval_t *t,
1140 lbfgsfloatval_t *ft,
1141 lbfgsfloatval_t *dt,
1142 const lbfgsfloatval_t tmin,
1143 const lbfgsfloatval_t tmax,
1144 int *brackt
1145 )
1146 {
1147 int bound;
1148 int dsign = fsigndiff(dt, dx);
1149 lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */
1150 lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */
1151 lbfgsfloatval_t newt; /* new trial value. */
1152 USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */
1153
1154 /* Check the input parameters for errors. */
1155 if (*brackt) {
1156 if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) {
1157 /* The trival value t is out of the interval. */
1158 return LBFGSERR_OUTOFINTERVAL;
1159 }
1160 if (0. <= *dx * (*t - *x)) {
1161 /* The function must decrease from x. */
1162 return LBFGSERR_INCREASEGRADIENT;
1163 }
1164 if (tmax < tmin) {
1165 /* Incorrect tmin and tmax specified. */
1166 return LBFGSERR_INCORRECT_TMINMAX;
1167 }
1168 }
1169
1170 /*
1171 Trial value selection.
1172 */
1173 if (*fx < *ft) {
1174 /*
1175 Case 1: a higher function value.
1176 The minimum is brackt. If the cubic minimizer is closer
1177 to x than the quadratic one, the cubic one is taken, else
1178 the average of the minimizers is taken.
1179 */
1180 *brackt = 1;
1181 bound = 1;
1182 CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
1183 QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft);
1184 if (fabs(mc - *x) < fabs(mq - *x)) {
1185 newt = mc;
1186 } else {
1187 newt = mc + 0.5 * (mq - mc);
1188 }
1189 } else if (dsign) {
1190 /*
1191 Case 2: a lower function value and derivatives of
1192 opposite sign. The minimum is brackt. If the cubic
1193 minimizer is closer to x than the quadratic (secant) one,
1194 the cubic one is taken, else the quadratic one is taken.
1195 */
1196 *brackt = 1;
1197 bound = 0;
1198 CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt);
1199 QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
1200 if (fabs(mc - *t) > fabs(mq - *t)) {
1201 newt = mc;
1202 } else {
1203 newt = mq;
1204 }
1205 } else if (fabs(*dt) < fabs(*dx)) {
1206 /*
1207 Case 3: a lower function value, derivatives of the
1208 same sign, and the magnitude of the derivative decreases.
1209 The cubic minimizer is only used if the cubic tends to
1210 infinity in the direction of the minimizer or if the minimum
1211 of the cubic is beyond t. Otherwise the cubic minimizer is
1212 defined to be either tmin or tmax. The quadratic (secant)
1213 minimizer is also computed and if the minimum is brackt
1214 then the the minimizer closest to x is taken, else the one
1215 farthest away is taken.
1216 */
1217 bound = 1;
1218 CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax);
1219 QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt);
1220 if (*brackt) {
1221 if (fabs(*t - mc) < fabs(*t - mq)) {
1222 newt = mc;
1223 } else {
1224 newt = mq;
1225 }
1226 } else {
1227 if (fabs(*t - mc) > fabs(*t - mq)) {
1228 newt = mc;
1229 } else {
1230 newt = mq;
1231 }
1232 }
1233 } else {
1234 /*
1235 Case 4: a lower function value, derivatives of the
1236 same sign, and the magnitude of the derivative does
1237 not decrease. If the minimum is not brackt, the step
1238 is either tmin or tmax, else the cubic minimizer is taken.
1239 */
1240 bound = 0;
1241 if (*brackt) {
1242 CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy);
1243 } else if (*x < *t) {
1244 newt = tmax;
1245 } else {
1246 newt = tmin;
1247 }
1248 }
1249
1250 /*
1251 Update the interval of uncertainty. This update does not
1252 depend on the new step or the case analysis above.
1253
1254 - Case a: if f(x) < f(t),
1255 x <- x, y <- t.
1256 - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0,
1257 x <- t, y <- y.
1258 - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0,
1259 x <- t, y <- x.
1260 */
1261 if (*fx < *ft) {
1262 /* Case a */
1263 *y = *t;
1264 *fy = *ft;
1265 *dy = *dt;
1266 } else {
1267 /* Case c */
1268 if (dsign) {
1269 *y = *x;
1270 *fy = *fx;
1271 *dy = *dx;
1272 }
1273 /* Cases b and c */
1274 *x = *t;
1275 *fx = *ft;
1276 *dx = *dt;
1277 }
1278
1279 /* Clip the new trial value in [tmin, tmax]. */
1280 if (tmax < newt) newt = tmax;
1281 if (newt < tmin) newt = tmin;
1282
1283 /*
1284 Redefine the new trial value if it is close to the upper bound
1285 of the interval.
1286 */
1287 if (*brackt && bound) {
1288 mq = *x + 0.66 * (*y - *x);
1289 if (*x < *y) {
1290 if (mq < newt) newt = mq;
1291 } else {
1292 if (newt < mq) newt = mq;
1293 }
1294 }
1295
1296 /* Return the new trial value. */
1297 *t = newt;
1298 return 0;
1299 }
1300
1301
1302
1303
1304
owlqn_x1norm(const lbfgsfloatval_t * x,const int start,const int n)1305 static lbfgsfloatval_t owlqn_x1norm(
1306 const lbfgsfloatval_t* x,
1307 const int start,
1308 const int n
1309 )
1310 {
1311 int i;
1312 lbfgsfloatval_t norm = 0.;
1313
1314 for (i = start;i < n;++i) {
1315 norm += fabs(x[i]);
1316 }
1317
1318 return norm;
1319 }
1320
owlqn_pseudo_gradient(lbfgsfloatval_t * pg,const lbfgsfloatval_t * x,const lbfgsfloatval_t * g,const int n,const lbfgsfloatval_t c,const int start,const int end)1321 static void owlqn_pseudo_gradient(
1322 lbfgsfloatval_t* pg,
1323 const lbfgsfloatval_t* x,
1324 const lbfgsfloatval_t* g,
1325 const int n,
1326 const lbfgsfloatval_t c,
1327 const int start,
1328 const int end
1329 )
1330 {
1331 int i;
1332
1333 /* Compute the negative of gradients. */
1334 for (i = 0;i < start;++i) {
1335 pg[i] = g[i];
1336 }
1337
1338 /* Compute the psuedo-gradients. */
1339 for (i = start;i < end;++i) {
1340 if (x[i] < 0.) {
1341 /* Differentiable. */
1342 pg[i] = g[i] - c;
1343 } else if (0. < x[i]) {
1344 /* Differentiable. */
1345 pg[i] = g[i] + c;
1346 } else {
1347 if (g[i] < -c) {
1348 /* Take the right partial derivative. */
1349 pg[i] = g[i] + c;
1350 } else if (c < g[i]) {
1351 /* Take the left partial derivative. */
1352 pg[i] = g[i] - c;
1353 } else {
1354 pg[i] = 0.;
1355 }
1356 }
1357 }
1358
1359 for (i = end;i < n;++i) {
1360 pg[i] = g[i];
1361 }
1362 }
1363
owlqn_project(lbfgsfloatval_t * d,const lbfgsfloatval_t * sign,const int start,const int end)1364 static void owlqn_project(
1365 lbfgsfloatval_t* d,
1366 const lbfgsfloatval_t* sign,
1367 const int start,
1368 const int end
1369 )
1370 {
1371 int i;
1372
1373 for (i = start;i < end;++i) {
1374 if (d[i] * sign[i] <= 0) {
1375 d[i] = 0;
1376 }
1377 }
1378 }
1379