1 /*
2  *
3  * lbfgsb_new.cpp
4  * HAL_HAS
5  *
6  * CSIRO Open Source Software License Agreement (GPLv3)
7  * Copyright (c) 2014, Commonwealth Scientific and Industrial Research Organisation (CSIRO) ABN 41 687 119 230.
8  * All rights reserved. CSIRO is willing to grant you a license to HAL-HAS on the terms of the GNU General Public
9  * License version 3 as published by the Free Software Foundation (http://www.gnu.org/licenses/gpl.html), except
10  * where otherwise indicated for third party material.
11  * The following additional terms apply under clause 7 of that license:
12  * EXCEPT AS EXPRESSLY STATED IN THIS AGREEMENT AND TO THE FULL EXTENT PERMITTED BY APPLICABLE LAW, THE SOFTWARE
13  * IS PROVIDED "AS-IS". CSIRO MAKES NO REPRESENTATIONS, WARRANTIES OR CONDITIONS OF ANY KIND, EXPRESS OR IMPLIED,
14  * INCLUDING BUT NOT LIMITED TO ANY REPRESENTATIONS, WARRANTIES OR CONDITIONS REGARDING THE CONTENTS OR ACCURACY
15  * OF THE SOFTWARE, OR OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, THE ABSENCE
16  * OF LATENT OR OTHER DEFECTS, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE.
17  * TO THE FULL EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL CSIRO BE LIABLE ON ANY LEGAL THEORY (INCLUDING,
18  * WITHOUT LIMITATION, IN AN ACTION FOR BREACH OF CONTRACT, NEGLIGENCE OR OTHERWISE) FOR ANY CLAIM, LOSS, DAMAGES
19  * OR OTHER LIABILITY HOWSOEVER INCURRED.  WITHOUT LIMITING THE SCOPE OF THE PREVIOUS SENTENCE THE EXCLUSION OF
20  * LIABILITY SHALL INCLUDE: LOSS OF PRODUCTION OR OPERATION TIME, LOSS, DAMAGE OR CORRUPTION OF DATA OR RECORDS;
21  * OR LOSS OF ANTICIPATED SAVINGS, OPPORTUNITY, REVENUE, PROFIT OR GOODWILL, OR OTHER ECONOMIC LOSS; OR ANY SPECIAL,
22  * INCIDENTAL, INDIRECT, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES, ARISING OUT OF OR IN CONNECTION WITH THIS
23  * AGREEMENT, ACCESS OF THE SOFTWARE OR ANY OTHER DEALINGS WITH THE SOFTWARE, EVEN IF CSIRO HAS BEEN ADVISED OF
24  * THE POSSIBILITY OF SUCH CLAIM, LOSS, DAMAGES OR OTHER LIABILITY.
25  * APPLICABLE LEGISLATION SUCH AS THE AUSTRALIAN CONSUMER LAW MAY APPLY REPRESENTATIONS, WARRANTIES, OR CONDITIONS,
26  * OR IMPOSES OBLIGATIONS OR LIABILITY ON CSIRO THAT CANNOT BE EXCLUDED, RESTRICTED OR MODIFIED TO THE FULL EXTENT
27  * SET OUT IN THE EXPRESS TERMS OF THIS CLAUSE ABOVE "CONSUMER GUARANTEES".  TO THE EXTENT THAT SUCH CONSUMER
28  * GUARANTEES CONTINUE TO APPLY, THEN TO THE FULL EXTENT PERMITTED BY THE APPLICABLE LEGISLATION, THE LIABILITY
29  * OF CSIRO UNDER THE RELEVANT CONSUMER GUARANTEE IS LIMITED (WHERE PERMITTED AT CSIRO’S OPTION) TO ONE OF FOLLOWING
30  * REMEDIES OR SUBSTANTIALLY EQUIVALENT REMEDIES:
31  * (a)               THE REPLACEMENT OF THE SOFTWARE, THE SUPPLY OF EQUIVALENT SOFTWARE, OR SUPPLYING RELEVANT
32  *                   SERVICES AGAIN;
33  * (b)               THE REPAIR OF THE SOFTWARE;
34  * (c)               THE PAYMENT OF THE COST OF REPLACING THE SOFTWARE, OF ACQUIRING EQUIVALENT SOFTWARE, HAVING THE
35  *                   RELEVANT SERVICES SUPPLIED AGAIN, OR HAVING THE SOFTWARE REPAIRED.
36  * IN THIS CLAUSE, CSIRO INCLUDES ANY THIRD PARTY AUTHOR OR OWNER OF ANY PART OF THE SOFTWARE OR MATERIAL DISTRIBUTED
37  * WITH IT.  CSIRO MAY ENFORCE ANY RIGHTS ON BEHALF OF THE RELEVANT THIRD PARTY.
38  * Third Party Components
39  * The following third party components are distributed with the Software.  You agree to comply with the license
40  * terms for these components as part of accessing the Software.  Other third party software may also be identified
41  * in separate files distributed with the Software.
42  * ___________________________________________________________________
43  *
44  * R : A Computer Language for Statistical Data Analysis version 3.0.1 (http://cran.r-project.org/src/base/R-3/R-3.0.1.tar.gz)
45  * Copyright (C) 2000-2004 The R Core Team
46  * This software is licensed under GNU GPL
47  *
48  * JACOBI_EIGENVALUE.C (http://people.sc.fsu.edu/~jburkardt/c_src/jacobi_eigenvalue/jacobi_eigenvalue.c)
49  * Copyright (C) 2003-2013 John Burkardt
50  * This software is licensed under GNU LGPL (http://www.gnu.org/licenses/lgpl.html)
51  * ___________________________________________________________________
52  */
53 
54 
55 #include "lbfgsb_new.h"
56 #include <algorithm>
57 //using namespace std;
58 
59 static int c__1 = 1;
60 static int c__11 = 11;
61 
62 #if 0
63 
64 // Function to access the L-BFGS-B function
65 // 1. int n : The number of the variables
66 // 2. double* x : initial values of the variables
67 // 3. double* l : lower bounds of the variables
68 // 4. int maxit : max # of iterations
69 // 5. void* ex  : the wrapped variables for objective function
70 // After the function is invoked, the values of x will be updated
71 void lbfgsb_R(int n, double* x, double* l, int maxit, void* ex) {
72 	int i;
73 	double Fmin;
74 	int fail;
75 	int fncount;
76 	int grcount;
77 	char msg[100];
78 
79 	int m = 5;          // number of BFGS updates retained in the "L-BFGS-B" method. It defaults to 5.
80 
81 	double *u = NULL;   // upper bounds of the variables;
82 
83 	int *nbd;           // 0: unbounded; 1: lower bounded; 2: both lower & upper; 3: upper bounded
84 	nbd = new int[n];
85 	for (i=0; i<n; i++)
86 		nbd[i] = 1;
87 
88 	double factr = 1e7; // control the convergence of the "L-BFGS-B" method.
89 	// Convergence occurs when the reduction in the object is within this factor
90 	// of the machine tolerance.
91 	// Default is 1e7, that is a tolerance of about 1e-8
92 
93 	double pgtol = 0;   // helps control the convergence of the "L-BFGS-B" method.
94 	// It is a tolerance on the projected gradient in the current search direction.
95 	// Default is zero, when the check is suppressed
96 
97 	int trace = 0;      // non-negative integer.
98 	// If positive, tracing information on the progress of the optimization is produced.
99 	// Higher values may produce more tracing information.
100 
101 	int nREPORT = 10;   // The frequency of reports for the "L-BFGS-B" methods if "trace" is positive.
102 	// Defaults to every 10 iterations.
103 
104 /*#ifdef USE_OLD_PARAM
105 	lbfgsb(n, m, x, l, u, nbd, &Fmin, fn, gr1, &fail, ex,
106 			factr, pgtol, &fncount, &grcount, maxit, msg, trace, nREPORT);
107 #else*/
108 	lbfgsb(n, m, x, l, u, nbd, &Fmin, fn, gr2, &fail, ex,
109 			factr, pgtol, &fncount, &grcount, maxit, msg, trace, nREPORT);
110 //#endif
111 
112 	delete[] nbd;
113 }
114 
115 // Function to access the L-BFGS-B function
116 // 1. int n : The number of the variables
117 // 2. double* x : initial values of the variables
118 // 3. double* l : lower bounds of the variables
119 // 4. double* u : upper bounds of the variables
120 // 5. int maxit : max # of iterations
121 // 6. void* ex  : the wrapped variables for objective function
122 // After the function is invoked, the values of x will be updated
123 void lbfgsb_R2(int n, double* x, double* l, double* u, int maxit, void* ex) {
124 	int i;
125 	double Fmin;
126 	int fail;
127 	int fncount;
128 	int grcount;
129 	char msg[100];
130 
131 	int m = 5;          // number of BFGS updates retained in the "L-BFGS-B" method. It defaults to 5.
132 
133 	int *nbd;           // 0: unbounded; 1: lower bounded; 2: both lower & upper; 3: upper bounded
134 	nbd = new int[n];
135 	for (i=0; i<n; i++)
136 		nbd[i] = 2;
137 
138 	double factr = 1e7; // control the convergence of the "L-BFGS-B" method.
139 	// Convergence occurs when the reduction in the object is within this factor
140 	// of the machine tolerance.
141 	// Default is 1e7, that is a tolerance of about 1e-8
142 
143 	double pgtol = 0;   // helps control the convergence of the "L-BFGS-B" method.
144 	// It is a tolerance on the projected gradient in the current search direction.
145 	// Default is zero, when the check is suppressed
146 
147 	int trace = 0;      // non-negative integer.
148 	// If positive, tracing information on the progress of the optimization is produced.
149 	// Higher values may produce more tracing information.
150 
151 	int nREPORT = 10;   // The frequency of reports for the "L-BFGS-B" methods if "trace" is positive.
152 	// Defaults to every 10 iterations.
153 
154 /*#ifdef USE_OLD_PARAM
155 	lbfgsb(n, m, x, l, u, nbd, &Fmin, fn, gr1, &fail, ex,
156 			factr, pgtol, &fncount, &grcount, maxit, msg, trace, nREPORT);
157 #else*/
158 	lbfgsb(n, m, x, l, u, nbd, &Fmin, fn, gr2, &fail, ex,
159 			factr, pgtol, &fncount, &grcount, maxit, msg, trace, nREPORT);
160 //#endif
161 
162 	delete[] nbd;
163 }
164 
165 #endif
166 
167 // ========================================================= //
168 // FUNCTIONS converted from R v.3.0.1
169 // ========================================================= //
170 
lbfgsb(int n,int m,double * x,double * l,double * u,int * nbd,double * Fmin,optimfn fminfn,optimgr fmingr,int * fail,void * ex,double factr,double pgtol,int * fncount,int * grcount,int maxit,char * msg,int trace,int nREPORT)171 void lbfgsb(int n, int m, double *x, double *l, double *u, int *nbd,
172 		double *Fmin, optimfn fminfn, optimgr fmingr, int *fail,
173 		void *ex, double factr, double pgtol,
174 		int *fncount, int *grcount, int maxit, char *msg,
175 		int trace, int nREPORT)
176 {
177 	char task[60];
178 	double f, *g, dsave[29], *wa;
179 	int tr = -1, iter = 0, *iwa, isave[44], lsave[4];
180 
181 	/* shut up gcc -Wall in 4.6.x */
182 
183 	for(int i = 0; i < 4; i++) lsave[i] = 0;
184 
185 	if(n == 0) { /* not handled in setulb */
186 		*fncount = 1;
187 		*grcount = 0;
188 		*Fmin = fminfn(n, u, ex);
189 		strcpy(msg, "NOTHING TO DO");
190 		*fail = 0;
191 		return;
192 	}
193 	if (nREPORT <= 0) {
194 		cerr << "REPORT must be > 0 (method = \"L-BFGS-B\")" << endl;
195 		exit(1);
196 	}
197 	switch(trace) {
198 	case 2: tr = 0; break;
199 	case 3: tr = nREPORT; break;
200 	case 4: tr = 99; break;
201 	case 5: tr = 100; break;
202 	case 6: tr = 101; break;
203 	default: tr = -1; break;
204 	}
205 
206 	*fail = 0;
207 	g = (double*) malloc (n * sizeof(double));
208 	/* this needs to be zeroed for snd in mainlb to be zeroed */
209 	wa = (double *) malloc((2*m*n+4*n+11*m*m+8*m) * sizeof(double));
210 	iwa = (int *) malloc(3*n * sizeof(int));
211 	strcpy(task, "START");
212 	while(1) {
213 		/* Main workhorse setulb() from ../appl/lbfgsb.c : */
214 		setulb(n, m, x, l, u, nbd, &f, g, factr, &pgtol, wa, iwa, task,
215 				tr, lsave, isave, dsave);
216 		/*    Rprintf("in lbfgsb - %s\n", task);*/
217 		if (strncmp(task, "FG", 2) == 0) {
218 			f = fminfn(n, x, ex);
219 			if (!isfinite(f)) {
220 				cerr << "L-BFGS-B needs finite values of 'fn'" << endl;
221 				exit(1);
222 			}
223 			fmingr(n, x, g, ex);
224 		} else if (strncmp(task, "NEW_X", 5) == 0) {
225 			iter++;
226 			if(trace == 1 && (iter % nREPORT == 0)) {
227 				cout << "iter " << iter << " value " << f << endl;
228 			}
229 			if (iter > maxit) {
230 				*fail = 1;
231 				break;
232 			}
233 		} else if (strncmp(task, "WARN", 4) == 0) {
234 			*fail = 51;
235 			break;
236 		} else if (strncmp(task, "CONV", 4) == 0) {
237 			break;
238 		} else if (strncmp(task, "ERROR", 5) == 0) {
239 			*fail = 52;
240 			break;
241 		} else { /* some other condition that is not supposed to happen */
242 			*fail = 52;
243 			break;
244 		}
245 	}
246 	*Fmin = f;
247 	*fncount = *grcount = isave[33];
248 	if (trace) {
249 		cout << "final value " << *Fmin << endl;
250 		if (iter < maxit && *fail == 0)
251 			cout << "converged" << endl;
252 		else
253 			cout << "stopped after " << iter << " iterations\n";
254 	}
255 	strcpy(msg, task);
256 	free(g);
257 	free(wa);
258 	free(iwa);
259 }
260 
setulb(int n,int m,double * x,double * l,double * u,int * nbd,double * f,double * g,double factr,double * pgtol,double * wa,int * iwa,char * task,int iprint,int * lsave,int * isave,double * dsave)261 void setulb(int n, int m, double *x, double *l, double *u, int *nbd,
262 		double *f, double *g, double factr, double *pgtol,
263 		double *wa, int * iwa, char *task, int iprint,
264 		int *lsave, int *isave, double *dsave)
265 {
266 	/*     ************
267 
268 	 Subroutine setulb
269 
270 	 This subroutine partitions the working arrays wa and iwa, and
271 	 then uses the limited memory BFGS method to solve the bound
272 	 constrained optimization problem by calling mainlb.
273 	 (The direct method will be used in the subspace minimization.)
274 
275 	 n is an integer variable.
276 	 On entry n is the dimension of the problem.
277 	 On exit n is unchanged.
278 
279 	 m is an integer variable.
280 	 On entry m is the maximum number of variable metric corrections
281 	 used to define the limited memory matrix.
282 	 On exit m is unchanged.
283 
284 	 x is a double precision array of dimension n.
285 	 On entry x is an approximation to the solution.
286 	 On exit x is the current approximation.
287 
288 	 l is a double precision array of dimension n.
289 	 On entry l is the lower bound on x.
290 	 On exit l is unchanged.
291 
292 	 u is a double precision array of dimension n.
293 	 On entry u is the upper bound on x.
294 	 On exit u is unchanged.
295 
296 	 nbd is an integer array of dimension n.
297 	 On entry nbd represents the type of bounds imposed on the
298 	 variables, and must be specified as follows:
299 	 nbd(i)=0 if x(i) is unbounded,
300 	 1 if x(i) has only a lower bound,
301 	 2 if x(i) has both lower and upper bounds, and
302 	 3 if x(i) has only an upper bound.
303 	 On exit nbd is unchanged.
304 
305 	 f is a double precision variable.
306 	 On first entry f is unspecified.
307 	 On final exit f is the value of the function at x.
308 
309 	 g is a double precision array of dimension n.
310 	 On first entry g is unspecified.
311 	 On final exit g is the value of the gradient at x.
312 
313 	 factr is a double precision variable.
314 	 On entry factr >= 0 is specified by the user.    The iteration
315 	 will stop when
316 
317 	 (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
318 
319 	 where epsmch is the machine precision, which is automatically
320 	 generated by the code. Typical values for factr: 1.d+12 for
321 	 low accuracy; 1.d+7 for moderate accuracy; 1.d+1 for extremely
322 	 high accuracy.
323 	 On exit factr is unchanged.
324 
325 	 pgtol is a double precision variable.
326 	 On entry pgtol >= 0 is specified by the user.    The iteration
327 	 will stop when
328 
329 	 max{|proj g_i | i = 1, ..., n} <= pgtol
330 
331 	 where pg_i is the ith component of the projected gradient.
332 	 On exit pgtol is unchanged.
333 
334 	 wa is a double precision working array of length
335 	 (2mmax + 4)nmax + 11mmax^2 + 8mmax.
336 
337 	 iwa is an integer working array of length 3nmax.
338 
339 	 task is a working string of characters of length 60 indicating
340 	 the current job when entering and quitting this subroutine.
341 
342 	 iprint is an integer variable that must be set by the user.
343 	 It controls the frequency and type of output generated:
344 	 iprint<0    no output is generated;
345 	 iprint=0    print only one line at the last iteration;
346 	 0<iprint<99 print also f and |proj g| every iprint iterations;
347 	 iprint=99   print details of every iteration except n-vectors;
348 	 iprint=100  print also the changes of active set and final x;
349 	 iprint>100  print details of every iteration including x and g;
350 	 When iprint > 0, the file iterate.dat will be created to
351 	 summarize the iteration.
352 
353 	 csave is a working string of characters of length 60.
354 
355 	 lsave is a logical working array of dimension 4.
356 	 On exit with 'task' = NEW_X, the following information is
357 	 available:
358 	 If lsave(1) = .true. then  the initial X has been replaced by
359 	 its projection in the feasible set;
360 	 If lsave(2) = .true. then  the problem is constrained;
361 	 If lsave(3) = .true. then  each variable has upper and lower
362 	 bounds;
363 
364 	 isave is an integer working array of dimension 44.
365 	 On exit with 'task' = NEW_X, the following information is
366 	 available:
367 	 isave(22) = the total number of intervals explored in the
368 	 search of Cauchy points;
369 	 isave(26) = the total number of skipped BFGS updates before
370 	 the current iteration;
371 	 isave(30) = the number of current iteration;
372 	 isave(31) = the total number of BFGS updates prior the current
373 	 iteration;
374 	 isave(33) = the number of intervals explored in the search of
375 	 Cauchy point in the current iteration;
376 	 isave(34) = the total number of function and gradient
377 	 evaluations;
378 	 isave(36) = the number of function value or gradient
379 	 evaluations in the current iteration;
380 	 if isave(37) = 0  then the subspace argmin is within the box;
381 	 if isave(37) = 1  then the subspace argmin is beyond the box;
382 	 isave(38) = the number of free variables in the current
383 	 iteration;
384 	 isave(39) = the number of active constraints in the current
385 	 iteration;
386 	 n + 1 - isave(40) = the number of variables leaving the set of
387 	 active constraints in the current iteration;
388 	 isave(41) = the number of variables entering the set of active
389 	 constraints in the current iteration.
390 
391 	 dsave is a double precision working array of dimension 29.
392 	 On exit with 'task' = NEW_X, the following information is
393 	 available:
394 	 dsave(1) = current 'theta' in the BFGS matrix;
395 	 dsave(2) = f(x) in the previous iteration;
396 	 dsave(3) = factr*epsmch;
397 	 dsave(4) = 2-norm of the line search direction vector;
398 	 dsave(5) = the machine precision epsmch generated by the code;
399 	 dsave(7) = the accumulated time spent on searching for
400 	 Cauchy points;
401 	 dsave(8) = the accumulated time spent on
402 	 subspace minimization;
403 	 dsave(9) = the accumulated time spent on line search;
404 	 dsave(11) = the slope of the line search function at
405 	 the current point of line search;
406 	 dsave(12) = the maximum relative step length imposed in
407 	 line search;
408 	 dsave(13) = the infinity norm of the projected gradient;
409 	 dsave(14) = the relative step length in the line search;
410 	 dsave(15) = the slope of the line search function at
411 	 the starting point of the line search;
412 	 dsave(16) = the square of the 2-norm of the line search
413 	 direction vector.
414 
415 	 Subprograms called:
416 
417 	 L-BFGS-B Library ... mainlb.
418 
419 
420 	 References:
421 
422 	 [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
423 	 memory algorithm for bound constrained optimization'',
424 	 SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
425 
426 	 [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
427 	 limited memory FORTRAN code for solving bound constrained
428 	 optimization problems'', Tech. Report, NAM-11, EECS Department,
429 	 Northwestern University, 1994.
430 
431 	 (Postscript files of these papers are available via anonymous
432 	 ftp to ece.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
433 
434 	 [Aug 2000: via http://www.ece.nwu.edu/~nocedal/lbfgsb.html]
435 
436 	 *    *  *
437 
438 	 NEOS, November 1994. (Latest revision April 1997.)
439 	 Optimization Technology Center.
440 	 Argonne National Laboratory and Northwestern University.
441 	 Written by
442 	 Ciyou Zhu
443 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
444 
445 	 ************
446 	 */
447 
448 	char csave[60];
449 
450 	/* Local variables */
451 	int lsnd, ld, lr, lt;
452 	int lz, lwa, lwn, lss, lws, lwt, lsy, lwy;
453 
454 	/* make sure csave is initialized */
455 	csave[0] = '\0';
456 
457 	/* Parameter adjustments */
458 	--wa;
459 	--isave;
460 
461 	/* Function Body */
462 	if (strncmp(task, "START", 5) == 0) {
463 		isave[1] = m * n;
464 		isave[2] = m * m;
465 		isave[3] = m * m << 2;
466 		isave[4] = 1;
467 		isave[5] = isave[4] + isave[1];
468 		isave[6] = isave[5] + isave[1];
469 		isave[7] = isave[6] + isave[2];
470 		isave[8] = isave[7] + isave[2];
471 		isave[9] = isave[8];
472 		isave[10] = isave[9] + isave[2];
473 		isave[11] = isave[10] + isave[3];
474 		isave[12] = isave[11] + isave[3];
475 		isave[13] = isave[12] + n;
476 		isave[14] = isave[13] + n;
477 		isave[15] = isave[14] + n;
478 		isave[16] = isave[15] + n;
479 	}
480 	lws = isave[4];
481 	lwy = isave[5];
482 	lsy = isave[6];
483 	lss = isave[7];
484 	lwt = isave[9];
485 	lwn = isave[10];
486 	lsnd = isave[11];
487 	lz = isave[12];
488 	lr = isave[13];
489 	ld = isave[14];
490 	lt = isave[15];
491 	lwa = isave[16];
492 	mainlb(n, m, x, l, u, nbd, f, g, factr, pgtol,
493 			&wa[lws], &wa[lwy], &wa[lsy],&wa[lss], &wa[lwt],&wa[lwn],
494 			&wa[lsnd], &wa[lz], &wa[lr], &wa[ld], &wa[lt], &wa[lwa],
495 			iwa, &iwa[n], &iwa[n << 1], task, iprint,
496 			csave, lsave, &isave[22], dsave);
497 	return;
498 } /* setulb */
499 
500 
mainlb(int n,int m,double * x,double * l,double * u,int * nbd,double * f,double * g,double factr,double * pgtol,double * ws,double * wy,double * sy,double * ss,double * wt,double * wn,double * snd,double * z,double * r,double * d,double * t,double * wa,int * indx,int * iwhere,int * indx2,char * task,int iprint,char * csave,int * lsave,int * isave,double * dsave)501 void mainlb(int n, int m, double *x,
502 		double *l, double *u, int *nbd, double *f, double *g,
503 		double factr, double *pgtol, double *ws, double * wy,
504 		double *sy, double *ss, double *wt, double *wn,
505 		double *snd, double *z, double *r, double *d,
506 		double *t, double *wa, int *indx, int *iwhere,
507 		int *indx2, char *task, int iprint,
508 		char *csave, int *lsave, int *isave, double *dsave)
509 {
510 	/*     ************
511 	 Subroutine mainlb
512 
513 	 This subroutine solves bound constrained optimization problems by
514 	 using the compact formula of the limited memory BFGS updates.
515 
516 	 n is an integer variable.
517 	 On entry n is the number of variables.
518 	 On exit n is unchanged.
519 
520 	 m is an integer variable.
521 	 On entry m is the maximum number of variable metric
522 	 corrections allowed in the limited memory matrix.
523 	 On exit m is unchanged.
524 
525 	 x is a double precision array of dimension n.
526 	 On entry x is an approximation to the solution.
527 	 On exit x is the current approximation.
528 
529 	 l is a double precision array of dimension n.
530 	 On entry l is the lower bound of x.
531 	 On exit l is unchanged.
532 
533 	 u is a double precision array of dimension n.
534 	 On entry u is the upper bound of x.
535 	 On exit u is unchanged.
536 
537 	 nbd is an integer array of dimension n.
538 	 On entry nbd represents the type of bounds imposed on the
539 	 variables, and must be specified as follows:
540 	 nbd(i)=0 if x(i) is unbounded,
541 	 1 if x(i) has only a lower bound,
542 	 2 if x(i) has both lower and upper bounds,
543 	 3 if x(i) has only an upper bound.
544 	 On exit nbd is unchanged.
545 
546 	 f is a double precision variable.
547 	 On first entry f is unspecified.
548 	 On final exit f is the value of the function at x.
549 
550 	 g is a double precision array of dimension n.
551 	 On first entry g is unspecified.
552 	 On final exit g is the value of the gradient at x.
553 
554 	 factr is a double precision variable.
555 	 On entry factr >= 0 is specified by the user.    The iteration
556 	 will stop when
557 
558 	 (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
559 
560 	 where epsmch is the machine precision, which is automatically
561 	 generated by the code.
562 	 On exit factr is unchanged.
563 
564 	 pgtol is a double precision variable.
565 	 On entry pgtol >= 0 is specified by the user.    The iteration
566 	 will stop when
567 
568 	 max{|proj g_i | i = 1, ..., n} <= pgtol
569 
570 	 where pg_i is the ith component of the projected gradient.
571 	 On exit pgtol is unchanged.
572 
573 	 ws, wy, sy, and wt are double precision working arrays used to
574 	 store the following information defining the limited memory
575 	 BFGS matrix:
576 	 ws, of dimension n x m, stores S, the matrix of s-vectors;
577 	 wy, of dimension n x m, stores Y, the matrix of y-vectors;
578 	 sy, of dimension m x m, stores S'Y;
579 	 ss, of dimension m x m, stores S'S;
580 	 wt, of dimension m x m, stores the Cholesky factorization
581 	 of (theta*S'S+LD^(-1)L'); see eq.
582 	 (2.26) in [3].
583 
584 	 wn is a double precision working array of dimension 2m x 2m
585 	 used to store the LEL^T factorization of the indefinite matrix
586 	 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'    ]
587 	 [L_a -R_z       theta*S'AA'S ]
588 
589 	 where       E = [-I  0]
590 	 [ 0  I]
591 
592 	 snd is a double precision working array of dimension 2m x 2m
593 	 used to store the lower triangular part of
594 	 N = [Y' ZZ'Y      L_a'+R_z']
595 	 [L_a +R_z  S'AA'S   ]
596 
597 	 z(n),r(n),d(n),t(n),wa(8*m) are double precision working arrays.
598 	 z is used at different times to store the Cauchy point and
599 	 the Newton point.
600 
601 
602 	 indx is an integer working array of dimension n.
603 	 In subroutine freev, indx is used to store the free and fixed
604 	 variables at the Generalized Cauchy Point (GCP).
605 
606 	 iwhere is an integer working array of dimension n used to record
607 	 the status of the vector x for GCP computation.
608 	 iwhere(i)=0 or -3 if x(i) is free and has bounds,
609 	 1       if x(i) is fixed at l(i), and l(i) .ne. u(i)
610 	 2       if x(i) is fixed at u(i), and u(i) .ne. l(i)
611 	 3       if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
612 	 -1       if x(i) is always free, i.e., no bounds on it.
613 
614 	 indx2 is an integer working array of dimension n.
615 	 Within subroutine cauchy, indx2 corresponds to the array iorder.
616 	 In subroutine freev, a list of variables entering and leaving
617 	 the free set is stored in indx2, and it is passed on to
618 	 subroutine formk with this information.
619 
620 	 task is a working string of characters of length 60 indicating
621 	 the current job when entering and leaving this subroutine.
622 
623 	 iprint is an INTEGER variable that must be set by the user.
624 	 It controls the frequency and type of output generated:
625 	 iprint<0    no output is generated;
626 	 iprint=0    print only one line at the last iteration;
627 	 0<iprint<99 print also f and |proj g| every iprint iterations;
628 	 iprint=99   print details of every iteration except n-vectors;
629 	 iprint=100  print also the changes of active set and final x;
630 	 iprint>100  print details of every iteration including x and g;
631 	 When iprint > 0, the file iterate.dat will be created to
632 	 summarize the iteration.
633 
634 	 csave is a working string of characters of length 60.
635 
636 	 lsave is a logical working array of dimension 4.
637 
638 	 isave is an integer working array of dimension 23.
639 
640 	 dsave is a double precision working array of dimension 29.
641 
642 
643 	 Subprograms called
644 
645 	 L-BFGS-B Library ... cauchy, subsm, lnsrlb, formk,
646 
647 	 errclb, prn1lb, prn2lb, prn3lb, active, projgr,
648 
649 	 freev, cmprlb, matupd, formt.
650 
651 	 Minpack2 Library ... timer, dpmeps.
652 
653 	 Linpack Library ... dcopy, ddot.
654 
655 
656 	 References:
657 
658 	 [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
659 	 memory algorithm for bound constrained optimization'',
660 	 SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
661 
662 	 [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
663 	 Subroutines for Large Scale Bound Constrained Optimization''
664 	 Tech. Report, NAM-11, EECS Department, Northwestern University,
665 	 1994.
666 
667 	 [3] R. Byrd, J. Nocedal and R. Schnabel "Representations of
668 	 Quasi-Newton Matrices and their use in Limited Memory Methods'',
669 	 Mathematical Programming 63 (1994), no. 4, pp. 129-156.
670 
671 	 (Postscript files of these papers are available via anonymous
672 	 ftp to ece.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
673 
674 	 *  *     *
675 	 */
676 
677 	/*
678 	 NEOS, November 1994. (Latest revision April 1997.)
679 	 Optimization Technology Center.
680 	 Argonne National Laboratory and Northwestern University.
681 	 Written by
682 	 Ciyou Zhu
683 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
684 
685 	 ************
686 	 */
687 
688 
689 	/* System generated locals */
690 	int ws_offset=0, wy_offset=0, sy_offset=0, ss_offset=0, wt_offset=0,
691 			wn_offset=0, snd_offset=0, i__1;
692 	double d__1, d__2;
693 
694 	/* Local variables */
695 	int head;
696 	double fold;
697 	int nact;
698 	double ddum;
699 	int info;
700 	int nfgv, ifun, iter, nint;
701 	char word[4]; /* allow for terminator */
702 	int i, iback, k = 0; /* -Wall */
703 	double gdold;
704 	int nfree;
705 	int boxed;
706 	int itail;
707 	double theta;
708 	double dnorm;
709 	int nskip, iword;
710 	double xstep = 0.0, stpmx; /* xstep is printed before being used */
711 	double gd, dr, rr;
712 	int ileave;
713 	int itfile;
714 	double cachyt, epsmch;
715 	int updatd;
716 	double sbtime;
717 	int prjctd;
718 	int iupdat;
719 	int cnstnd;
720 	double sbgnrm;
721 	int nenter;
722 	double lnscht;
723 	int nintol;
724 	double dtd;
725 	int col;
726 	double tol;
727 	int wrk;
728 	double stp, cpu1, cpu2;
729 
730 	/* Parameter adjustments */
731 	--indx2;
732 	--iwhere;
733 	--indx;
734 	--t;
735 	--d;
736 	--r;
737 	--z;
738 	--g;
739 	--nbd;
740 	--u;
741 	--l;
742 	--x;
743 	--wa;
744 	--lsave;
745 	--isave;
746 	--dsave;
747 
748 	/* Function Body */
749 	if (strncmp(task, "START", 5) == 0) {
750 		/*      Generate the current machine precision. */
751 		epsmch = DBL_EPSILON;
752 		fold = 0.;
753 		dnorm = 0.;
754 		cpu1 = 0.;
755 		gd = 0.;
756 		sbgnrm = 0.;
757 		stp = 0.;
758 		xstep = 0.;
759 		stpmx = 0.;
760 		gdold = 0.;
761 		dtd = 0.;
762 		/*      Initialize counters and scalars when task='START'. */
763 		/*         for the limited memory BFGS matrices: */
764 		col = 0;
765 		head = 1;
766 		theta = 1.;
767 		iupdat = 0;
768 		updatd = 0;
769 		iback = 0;
770 		itail = 0;
771 		ifun = 0;
772 		iword = 0;
773 		nact = 0;
774 		ileave = 0;
775 		nenter = 0;
776 		/*         for operation counts: */
777 		iter = 0;
778 		nfgv = 0;
779 		nint = 0;
780 		nintol = 0;
781 		nskip = 0;
782 		nfree = n;
783 		/*         for stopping tolerance: */
784 		tol = factr * epsmch;
785 		/*         for measuring running time: */
786 		cachyt = 0.;
787 		sbtime = 0.;
788 		lnscht = 0.;
789 		/*         'word' records the status of subspace solutions. */
790 		strcpy(word, "---");
791 		/*         'info' records the termination information. */
792 		info = 0;
793 		itfile = 0;
794 		/*      Check the input arguments for errors. */
795 		errclb(n, m, factr, &l[1], &u[1], &nbd[1], task, &info, &k);
796 		if (strncmp(task, "ERROR", 5) == 0) {
797 			prn3lb(n, x+1, f, task, iprint, info,
798 					iter, nfgv, nintol, nskip, nact, sbgnrm,
799 					nint, word, iback, stp, xstep, k);
800 			return;
801 		}
802 
803 		prn1lb(n, m, l+1, u+1, x+1, iprint, epsmch);
804 
805 		/*      Initialize iwhere & project x onto the feasible set. */
806 		active(n, &l[1], &u[1], &nbd[1], &x[1], &iwhere[1], iprint, &prjctd,
807 				&cnstnd, &boxed);
808 		/*      The end of the initialization. */
809 	} else {
810 		/*        restore local variables. */
811 		prjctd = lsave[1];
812 		cnstnd = lsave[2];
813 		boxed = lsave[3];
814 		updatd = lsave[4];
815 
816 		nintol = isave[1];
817 		itfile = isave[3];
818 		iback = isave[4];
819 		nskip = isave[5];
820 		head = isave[6];
821 		col = isave[7];
822 		itail = isave[8];
823 		iter = isave[9];
824 		iupdat = isave[10];
825 		nint = isave[12];
826 		nfgv = isave[13];
827 		info = isave[14];
828 		ifun = isave[15];
829 		iword = isave[16];
830 		nfree = isave[17];
831 		nact = isave[18];
832 		ileave = isave[19];
833 		nenter = isave[20];
834 
835 		theta = dsave[1];
836 		fold = dsave[2];
837 		tol = dsave[3];
838 		dnorm = dsave[4];
839 		epsmch = dsave[5];
840 		cpu1 = dsave[6];
841 		cachyt = dsave[7];
842 		sbtime = dsave[8];
843 		lnscht = dsave[9];
844 		gd = dsave[11];
845 		stpmx = dsave[12];
846 		sbgnrm = dsave[13];
847 		stp = dsave[14];
848 		gdold = dsave[15];
849 		dtd = dsave[16];
850 		/*    After returning from the driver go to the point where execution */
851 		/*    is to resume. */
852 		if (strncmp(task, "FG_LN", 5) == 0)    goto L666;
853 		if (strncmp(task, "NEW_X", 5) == 0)     goto L777;
854 		if (strncmp(task, "FG_ST", 5) == 0)     goto L111;
855 
856 		if (strncmp(task, "STOP", 4) == 0) {
857 			if (strncmp(task + 6, "CPU", 3) == 0) {
858 				// restore the previous iterate.
859 				dcopy(&n, &t[1], &c__1, &x[1], &c__1);
860 				dcopy(&n, &r[1], &c__1, &g[1], &c__1);
861 				*f = fold;
862 			}
863 			goto L999;
864 		}
865 	}
866 	/*     Compute f0 and g0. */
867 	strcpy(task, "FG_START");
868 	/*        return to the driver to calculate f and g; reenter at 111. */
869 	goto L1000;
870 	L111:
871 	nfgv = 1;
872 	/*     Compute the infinity norm of the (-) projected gradient. */
873 	projgr(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
874 
875 	if (iprint >= 1)
876 		cout << "At iterate " << iter << " f= " << *f << " |proj g|= " << sbgnrm << endl;
877 
878 	if (sbgnrm <= *pgtol) {
879 		/*                  terminate the algorithm. */
880 		strcpy(task, "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL");
881 		goto L999;
882 	}
883 	/* ----------------- the beginning of the loop -------------------------- */
884 	L222:
885 	if (iprint >= 99)
886 		cout << "Iteration " << iter << endl;
887 	iword = -1;
888 
889 	if (! cnstnd && col > 0) {
890 		/*                          skip the search for GCP. */
891 		dcopy(&n, &x[1], &c__1, &z[1], &c__1);
892 		wrk = updatd;
893 		nint = 0;
894 		goto L333;
895 	}
896 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
897 
898 	/*     Compute the Generalized Cauchy Point (GCP). */
899 
900 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
901 	timer(&cpu1);
902 	cauchy(n, &x[1], &l[1], &u[1], &nbd[1], &g[1], &indx2[1], &iwhere[1], &t[
903 																			 1], &d[1], &z[1], m, &wy[wy_offset], &ws[ws_offset], &sy[
904 																																	  sy_offset], &wt[wt_offset], &theta, &col, &head, &wa[1], &wa[(m
905 																																			  << 1) + 1], &wa[(m << 2) + 1], &wa[m * 6 + 1], &nint, iprint, &
906 																																			  sbgnrm, &info, &epsmch);
907 	if (info != 0) {
908 		/*       singular triangular system detected; refresh the lbfgs memory. */
909 		if (iprint >= 1) {
910 			cout << "Singular triangular system detected;" << endl;
911 			cout << "   refresh the lbfgs memory and restart the iteration." << endl;
912 		}
913 		info = 0;
914 		col = 0;
915 		head = 1;
916 		theta = 1.;
917 		iupdat = 0;
918 		updatd = 0;
919 		timer(&cpu2);
920 		cachyt = cachyt + cpu2 - cpu1;
921 		goto L222;
922 	}
923 	timer(&cpu2);
924 	cachyt = cachyt + cpu2 - cpu1;
925 	nintol += nint;
926 	/*     Count the entering and leaving variables for iter > 0; */
927 	/*     find the index set of free and active variables at the GCP. */
928 	freev(n, &nfree, &indx[1], &nenter, &ileave, &indx2[1], &iwhere[1], &
929 			wrk, &updatd, &cnstnd, iprint, &iter);
930 	nact = n - nfree;
931 	L333:
932 	/*     If there are no free variables or B=theta*I, then */
933 	/*                      skip the subspace minimization. */
934 	if (nfree == 0 || col == 0) {
935 		goto L555;
936 	}
937 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
938 
939 	/*     Subspace minimization. */
940 
941 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
942 	timer(&cpu1);
943 	/*     Form  the LEL^T factorization of the indefinite */
944 	/*     matrix       K = [-D -Y'ZZ'Y/theta     L_a'-R_z'    ] */
945 	/*               [L_a -R_z       theta*S'AA'S ] */
946 	/*     where       E = [-I  0] */
947 	/*               [ 0  I] */
948 	if (wrk) {
949 		formk(n, &nfree, &indx[1], &nenter, &ileave, &indx2[1], &iupdat, &
950 				updatd, &wn[wn_offset], &snd[snd_offset], m, &ws[ws_offset], &
951 				wy[wy_offset], &sy[sy_offset], &theta, &col, &head, &info);
952 	}
953 	if (info != 0) {
954 		/*        nonpositive definiteness in Cholesky factorization; */
955 		/*        refresh the lbfgs memory and restart the iteration. */
956 		if (iprint >= 0) {
957 			cout << "Nonpositive definiteness in Cholesky factorization in formk;" << endl;
958 			cout << "   refresh the lbfgs memory and restart the iteration." << endl;
959 		}
960 		info = 0;
961 		col = 0;
962 		head = 1;
963 		theta = 1.;
964 		iupdat = 0;
965 		updatd = 0;
966 		timer(&cpu2);
967 		sbtime = sbtime + cpu2 - cpu1;
968 		goto L222;
969 	}
970 	/*      compute r=-Z'B(xcp-xk)-Z'g (using wa(2m+1)=W'(xcp-x) */
971 	/*                             from 'cauchy'). */
972 	cmprlb(n, m, &x[1], &g[1], &ws[ws_offset], &wy[wy_offset], &sy[sy_offset]
973 																   , &wt[wt_offset], &z[1], &r[1], &wa[1], &indx[1], &theta, &
974 																   col, &head, &nfree, &cnstnd, &info);
975 	if (info != 0) {
976 		goto L444;
977 	}
978 	/*     call the direct method. */
979 	subsm(n, m, &nfree, &indx[1], &l[1], &u[1], &nbd[1], &z[1], &r[1], &
980 			ws[ws_offset], &wy[wy_offset], &theta, &col, &head, &iword, &wa[1]
981 																			, &wn[wn_offset], iprint, &info);
982 	L444:
983 	if (info != 0) {
984 		/*        singular triangular system detected; */
985 		/*        refresh the lbfgs memory and restart the iteration. */
986 		if (iprint >= 1) {
987 			cout << "Singular triangular system detected;" << endl;
988 			cout << "   refresh the lbfgs memory and restart the iteration." << endl;
989 		}
990 		info = 0;
991 		col = 0;
992 		head = 1;
993 		theta = 1.;
994 		iupdat = 0;
995 		updatd = 0;
996 		timer(&cpu2);
997 		sbtime = sbtime + cpu2 - cpu1;
998 		goto L222;
999 	}
1000 	timer(&cpu2);
1001 	sbtime = sbtime + cpu2 - cpu1;
1002 	L555:
1003 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
1004 
1005 	/*     Line search and optimality tests. */
1006 
1007 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
1008 	/*     Generate the search direction d:=z-x. */
1009 	i__1 = n;
1010 	for (i = 1; i <= i__1; ++i) {
1011 		d[i] = z[i] - x[i];
1012 		/* L40: */
1013 	}
1014 	timer(&cpu1);
1015 	L666:
1016 	lnsrlb(n, &l[1], &u[1], &nbd[1], &x[1], f, &fold, &gd, &gdold, &g[1], &
1017 			d[1], &r[1], &t[1], &z[1], &stp, &dnorm, &dtd, &xstep, &
1018 			stpmx, &iter, &ifun, &iback, &nfgv, &info, task, &boxed, &cnstnd,
1019 			csave, &isave[22], &dsave[17]);
1020 	if (info != 0 || iback >= 20) {
1021 		/*        restore the previous iterate. */
1022 		dcopy(&n, &t[1], &c__1, &x[1], &c__1);
1023 		dcopy(&n, &r[1], &c__1, &g[1], &c__1);
1024 		*f = fold;
1025 		if (col == 0) {
1026 			/*           abnormal termination. */
1027 			if (info == 0) {
1028 				info = -9;
1029 				/*          restore the actual number of f and g evaluations etc. */
1030 				--nfgv;
1031 				--ifun;
1032 				--iback;
1033 			}
1034 			strcpy(task, "WARNING: ABNORMAL_TERMINATION_IN_LNSRCH");
1035 			++iter;
1036 			goto L999;
1037 		} else {
1038 			/*           refresh the lbfgs memory and restart the iteration. */
1039 			if (iprint >= 1) {
1040 				cout << "Bad direction in the line search;" << endl;
1041 				cout << "   refresh the lbfgs memory and restart the iteration." << endl;
1042 			}
1043 			if (info == 0) {
1044 				--nfgv;
1045 			}
1046 			info = 0;
1047 			col = 0;
1048 			head = 1;
1049 			theta = 1.;
1050 			iupdat = 0;
1051 			updatd = 0;
1052 			strcpy(task, "RESTART_FROM_LNSRCH");
1053 			timer(&cpu2);
1054 			lnscht = lnscht + cpu2 - cpu1;
1055 			goto L222;
1056 		}
1057 	} else if (strncmp(task, "FG_LN", 5) == 0) {
1058 		/*        return to the driver for calculating f and g; reenter at 666. */
1059 		goto L1000;
1060 	} else {
1061 		/*        calculate and print out the quantities related to the new X. */
1062 		timer(&cpu2);
1063 		lnscht = lnscht + cpu2 - cpu1;
1064 		++iter;
1065 		/*      Compute the infinity norm of the projected (-)gradient. */
1066 		projgr(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
1067 		/*      Print iteration information. */
1068 		prn2lb(n, x+1, f, g+1, iprint, iter, nfgv, nact,
1069 				sbgnrm, nint, word, iword, iback, stp, xstep);
1070 		goto L1000;
1071 	}
1072 	L777:
1073 	/*     Test for termination. */
1074 	if (sbgnrm <= *pgtol) {
1075 		/*                  terminate the algorithm. */
1076 		strcpy(task, "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL");
1077 		goto L999;
1078 	}
1079 	/* Computing MAX */
1080 	d__1 = fabs(fold), d__2 = fabs(*f), d__1 = max(d__1,d__2);
1081 	ddum = max((double)d__1,(double)1.);
1082 	if (fold - *f <= tol * ddum) {
1083 		/*                      terminate the algorithm. */
1084 		strcpy(task, "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH");
1085 		if (iback >= 10) info = -5;
1086 		/*         i.e., to issue a warning if iback>10 in the line search. */
1087 		goto L999;
1088 	}
1089 	/*     Compute d=newx-oldx, r=newg-oldg, rr=y'y and dr=y's. */
1090 	i__1 = n;
1091 	for (i = 1; i <= i__1; ++i) {
1092 		r[i] = g[i] - r[i];
1093 		/* L42: */
1094 	}
1095 	rr = ddot(&n, &r[1], &c__1, &r[1], &c__1);
1096 	if (stp == 1.) {
1097 		dr = gd - gdold;
1098 		ddum = -gdold;
1099 	} else {
1100 		dr = (gd - gdold) * stp;
1101 		dscal(&n, &stp, &d[1], &c__1);
1102 		ddum = -gdold * stp;
1103 	}
1104 	if (dr <= epsmch * ddum) {
1105 		/*                  skip the L-BFGS update. */
1106 		++nskip;
1107 		updatd = 0;
1108 		if (iprint >= 1) {
1109 			cout << "ys=" << dr << "   -gs=" << ddum << ", BFGS update SKIPPED" << endl;
1110 		}
1111 		goto L888;
1112 	}
1113 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
1114 
1115 	/*     Update the L-BFGS matrix. */
1116 
1117 	/* ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc */
1118 	updatd = 1;
1119 	++iupdat;
1120 	/*     Update matrices WS and WY and form the middle matrix in B. */
1121 	matupd(n, m, &ws[ws_offset], &wy[wy_offset], &sy[sy_offset], &ss[
1122 																	 ss_offset], &d[1], &r[1], &itail, &iupdat, &col, &head, &
1123 																	 theta, &rr, &dr, &stp, &dtd);
1124 	/*     Form the upper half of the pds T = theta*SS + L*D^(-1)*L'; */
1125 	/*      Store T in the upper triangular of the array wt; */
1126 	/*      Cholesky factorize T to J*J' with */
1127 	/*         J' stored in the upper triangular of wt. */
1128 	formt(m, &wt[wt_offset], &sy[sy_offset], &ss[ss_offset], &col, &theta, &
1129 			info);
1130 	if (info != 0) {
1131 		/*        nonpositive definiteness in Cholesky factorization; */
1132 		/*        refresh the lbfgs memory and restart the iteration. */
1133 		if (iprint >= 0) {
1134 			cout << "Nonpositive definiteness in Cholesky factorization in formk;" << endl;
1135 			cout << "   refresh the lbfgs memory and restart the iteration." << endl;
1136 		}
1137 		info = 0;
1138 		col = 0;
1139 		head = 1;
1140 		theta = 1.;
1141 		iupdat = 0;
1142 		updatd = 0;
1143 		goto L222;
1144 	}
1145 	/*     Now the inverse of the middle matrix in B is */
1146 	/*     [  D^(1/2)     O ] [ -D^(1/2)     D^(-1/2)*L' ] */
1147 	/*     [ -L*D^(-1/2)     J ] [    0     J'         ] */
1148 	L888:
1149 	/* -------------------- the end of the loop ----------------------------- */
1150 	goto L222;
1151 	L999:
1152 	L1000:
1153 	/*     Save local variables. */
1154 	lsave[1] = prjctd;
1155 	lsave[2] = cnstnd;
1156 	lsave[3] = boxed;
1157 	lsave[4] = updatd;
1158 	isave[1] = nintol;
1159 	isave[3] = itfile;
1160 	isave[4] = iback;
1161 	isave[5] = nskip;
1162 	isave[6] = head;
1163 	isave[7] = col;
1164 	isave[8] = itail;
1165 	isave[9] = iter;
1166 	isave[10] = iupdat;
1167 	isave[12] = nint;
1168 	isave[13] = nfgv;
1169 	isave[14] = info;
1170 	isave[15] = ifun;
1171 	isave[16] = iword;
1172 	isave[17] = nfree;
1173 	isave[18] = nact;
1174 	isave[19] = ileave;
1175 	isave[20] = nenter;
1176 	dsave[1] = theta;
1177 	dsave[2] = fold;
1178 	dsave[3] = tol;
1179 	dsave[4] = dnorm;
1180 	dsave[5] = epsmch;
1181 	dsave[6] = cpu1;
1182 	dsave[7] = cachyt;
1183 	dsave[8] = sbtime;
1184 	dsave[9] = lnscht;
1185 	dsave[11] = gd;
1186 	dsave[12] = stpmx;
1187 	dsave[13] = sbgnrm;
1188 	dsave[14] = stp;
1189 	dsave[15] = gdold;
1190 	dsave[16] = dtd;
1191 	prn3lb(n, x+1, f, task, iprint, info,
1192 			iter, nfgv, nintol, nskip, nact, sbgnrm,
1193 			nint, word, iback, stp, xstep, k);
1194 	return;
1195 } /* mainlb */
1196 /* ======================= The end of mainlb ============================= */
1197 
errclb(int n,int m,double factr,double * l,double * u,int * nbd,char * task,int * info,int * k)1198 void errclb(int n, int m, double factr, double *l, double *u,
1199 		int *nbd, char *task, int *info, int *k)
1200 {
1201 	/*    ************
1202 	 Subroutine errclb
1203 
1204 	 This subroutine checks the validity of the input data.
1205 
1206 	 *     *  *
1207 
1208 	 NEOS, November 1994. (Latest revision April 1997.)
1209 	 Optimization Technology Center.
1210 	 Argonne National Laboratory and Northwestern University.
1211 	 Written by
1212 	 Ciyou Zhu
1213 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
1214 
1215 	 ************
1216 	 */
1217 
1218 	/* Local variables */
1219 	int i;
1220 
1221 	/* Parameter adjustments */
1222 	--nbd;
1223 	--u;
1224 	--l;
1225 
1226 	/* Function Body */
1227 	/*     Check the input arguments for errors. */
1228 	if (n <= 0)
1229 		strcpy(task, "ERROR: N .LE. 0");
1230 	if (m <= 0)
1231 		strcpy(task, "ERROR: M .LE. 0");
1232 	if (factr < 0.)
1233 		strcpy(task, "ERROR: FACTR .LT. 0");
1234 
1235 	/*     Check the validity of the arrays nbd(i), u(i), and l(i). */
1236 	for (i = 1; i <= n; ++i) {
1237 		if (nbd[i] < 0 || nbd[i] > 3) {
1238 			/*                             return */
1239 			strcpy(task, "ERROR: INVALID NBD");
1240 			*info = -6;
1241 			*k = i;
1242 		}
1243 		if (nbd[i] == 2) {
1244 			if (l[i] > u[i]) {
1245 				/*                      return */
1246 				strcpy(task, "ERROR: NO FEASIBLE SOLUTION");
1247 				*info = -7;
1248 				*k = i;
1249 			}
1250 		}
1251 	}
1252 	return;
1253 } /* errclb */
1254 /* ======================= The end of errclb ============================= */
1255 
active(int n,double * l,double * u,int * nbd,double * x,int * iwhere,int iprint,int * prjctd,int * cnstnd,int * boxed)1256 void active(int n, double *l, double *u,
1257 		int *nbd, double *x, int *iwhere, int iprint,
1258 		int *prjctd, int *cnstnd, int *boxed)
1259 {
1260 	/*    ************
1261 
1262 	 Subroutine active
1263 
1264 	 This subroutine initializes iwhere and projects the initial x to
1265 	 the feasible set if necessary.
1266 
1267 	 iwhere is an integer array of dimension n.
1268 	 On entry iwhere is unspecified.
1269 	 On exit iwhere(i)=-1    if x(i) has no bounds
1270 	 3    if l(i)=u(i)
1271 	 0    otherwise.
1272 	 In cauchy, iwhere is given finer gradations.
1273 
1274 
1275 	 *     *  *
1276 
1277 	 NEOS, November 1994. (Latest revision June 1996.)
1278 	 Optimization Technology Center.
1279 	 Argonne National Laboratory and Northwestern University.
1280 	 Written by
1281 	 Ciyou Zhu
1282 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
1283 
1284 	 ************
1285 	 */
1286 
1287 	/* Local variables */
1288 	int nbdd, i;
1289 
1290 	/* Parameter adjustments */
1291 	--iwhere;
1292 	--x;
1293 	--nbd;
1294 	--u;
1295 	--l;
1296 
1297 	/* Function Body */
1298 
1299 	/*Initialize nbdd, prjctd, cnstnd and boxed. */
1300 	nbdd = 0;
1301 	*prjctd = 0;
1302 	*cnstnd = 0;
1303 	*boxed = 1;
1304 	/*     Project the initial x to the easible set if necessary. */
1305 	for (i = 1; i <= n; ++i) {
1306 		if (nbd[i] > 0) {
1307 			if (nbd[i] <= 2 && x[i] <= l[i]) {
1308 				if (x[i] < l[i]) {
1309 					*prjctd = 1;
1310 					x[i] = l[i];
1311 				}
1312 				++nbdd;
1313 			} else if (nbd[i] >= 2 && x[i] >= u[i]) {
1314 				if (x[i] > u[i]) {
1315 					*prjctd = 1;
1316 					x[i] = u[i];
1317 				}
1318 				++nbdd;
1319 			}
1320 		}
1321 	}
1322 
1323 	/*     Initialize iwhere and assign values to cnstnd and boxed. */
1324 	for (i = 1; i <= n; ++i) {
1325 		if (nbd[i] != 2) {
1326 			*boxed = 0;
1327 		}
1328 		if (nbd[i] == 0) {
1329 			/*                  this variable is always free */
1330 			iwhere[i] = -1;
1331 			/*         otherwise set x(i)=mid(x(i), u(i), l(i)). */
1332 		} else {
1333 			*cnstnd = 1;
1334 			if (nbd[i] == 2 && u[i] - l[i] <= 0.) {
1335 				/*             this variable is always fixed */
1336 				iwhere[i] = 3;
1337 			} else {
1338 				iwhere[i] = 0;
1339 			}
1340 		}
1341 	}
1342 	if (iprint >= 0) {
1343 		if (*prjctd)
1344 			cout << "The initial X is infeasible.  Restart with its projection." << endl;
1345 		if (!*cnstnd)
1346 			cout << "This problem is unconstrained." << endl;
1347 	}
1348 	if (iprint > 0)
1349 		cout << "At X0, " << nbdd << " variables are exactly at the bounds" << endl;
1350 
1351 	return;
1352 } /* active */
1353 /* ======================= The end of active ============================= */
1354 
cauchy(int n,double * x,double * l,double * u,int * nbd,double * g,int * iorder,int * iwhere,double * t,double * d,double * xcp,int m,double * wy,double * ws,double * sy,double * wt,double * theta,int * col,int * head,double * p,double * c,double * wbp,double * v,int * nint,int iprint,double * sbgnrm,int * info,double * epsmch)1355 void cauchy(int n, double *x, double *l, double *u, int *nbd,
1356 		double *g, int *iorder, int * iwhere, double *t,
1357 		double *d, double *xcp, int m,
1358 		double *wy, double *ws, double *sy, double *wt,
1359 		double *theta, int *col, int *head, double *p,
1360 		double *c, double *wbp, double *v, int *nint,
1361 		int iprint, double *sbgnrm, int *info, double * epsmch)
1362 {
1363 	/*     ************
1364 	 Subroutine cauchy
1365 
1366 	 For given x, l, u, g (with sbgnrm > 0), and a limited memory
1367 	 BFGS matrix B defined in terms of matrices WY, WS, WT, and
1368 	 scalars head, col, and theta, this subroutine computes the
1369 	 generalized Cauchy point (GCP), defined as the first local
1370 	 minimizer of the quadratic
1371 
1372 	 Q(x + s) = g's + 1/2 s'Bs
1373 
1374 	 along the projected gradient direction P(x-tg,l,u).
1375 	 The routine returns the GCP in xcp.
1376 
1377 	 n is an integer variable.
1378 	 On entry n is the dimension of the problem.
1379 	 On exit n is unchanged.
1380 
1381 	 x is a double precision array of dimension n.
1382 	 On entry x is the starting point for the GCP computation.
1383 	 On exit x is unchanged.
1384 
1385 	 l is a double precision array of dimension n.
1386 	 On entry l is the lower bound of x.
1387 	 On exit l is unchanged.
1388 
1389 	 u is a double precision array of dimension n.
1390 	 On entry u is the upper bound of x.
1391 	 On exit u is unchanged.
1392 
1393 	 nbd is an integer array of dimension n.
1394 	 On entry nbd represents the type of bounds imposed on the
1395 	 variables, and must be specified as follows:
1396 	 nbd(i)=0 if x(i) is unbounded,
1397 	 1 if x(i) has only a lower bound,
1398 	 2 if x(i) has both lower and upper bounds, and
1399 	 3 if x(i) has only an upper bound.
1400 	 On exit nbd is unchanged.
1401 
1402 	 g is a double precision array of dimension n.
1403 	 On entry g is the gradient of f(x).  g must be a nonzero vector.
1404 	 On exit g is unchanged.
1405 
1406 	 iorder is an integer working array of dimension n.
1407 	 iorder will be used to store the breakpoints in the piecewise
1408 	 linear path and free variables encountered. On exit,
1409 	 iorder(1),...,iorder(nleft) are indices of breakpoints
1410 	 which have not been encountered;
1411 	 iorder(nleft+1),...,iorder(nbreak) are indices of
1412 	 encountered breakpoints; and
1413 	 iorder(nfree),...,iorder(n) are indices of variables which
1414 	 have no bound constraits along the search direction.
1415 
1416 	 iwhere is an integer array of dimension n.
1417 	 On entry iwhere indicates only the permanently fixed (iwhere=3)
1418 	 or free (iwhere= -1) components of x.
1419 	 On exit iwhere records the status of the current x variables.
1420 	 iwhere(i)=-3  if x(i) is free and has bounds, but is not moved
1421 	 0   if x(i) is free and has bounds, and is moved
1422 	 1   if x(i) is fixed at l(i), and l(i) .ne. u(i)
1423 	 2   if x(i) is fixed at u(i), and u(i) .ne. l(i)
1424 	 3   if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
1425 	 -1  if x(i) is always free, i.e., it has no bounds.
1426 
1427 	 t is a double precision working array of dimension n.
1428 	 t will be used to store the break points.
1429 
1430 	 d is a double precision array of dimension n used to store
1431 	 the Cauchy direction P(x-tg)-x.
1432 
1433 	 xcp is a double precision array of dimension n used to return the
1434 	 GCP on exit.
1435 
1436 	 m is an integer variable.
1437 	 On entry m is the maximum number of variable metric corrections
1438 	 used to define the limited memory matrix.
1439 	 On exit m is unchanged.
1440 
1441 	 ws, wy, sy, and wt are double precision arrays.
1442 	 On entry they store information that defines the
1443 	 limited memory BFGS matrix:
1444 	 ws(n,m) stores S, a set of s-vectors;
1445 	 wy(n,m) stores Y, a set of y-vectors;
1446 	 sy(m,m) stores S'Y;
1447 	 wt(m,m) stores the
1448 	 Cholesky factorization of (theta*S'S+LD^(-1)L').
1449 	 On exit these arrays are unchanged.
1450 
1451 	 theta is a double precision variable.
1452 	 On entry theta is the scaling factor specifying B_0 = theta I.
1453 	 On exit theta is unchanged.
1454 
1455 	 col is an integer variable.
1456 	 On entry col is the actual number of variable metric
1457 	 corrections stored so far.
1458 	 On exit col is unchanged.
1459 
1460 	 head is an integer variable.
1461 	 On entry head is the location of the first s-vector
1462 	 (or y-vector) in S (or Y).
1463 	 On exit col is unchanged.
1464 
1465 	 p is a double precision working array of dimension 2m.
1466 	 p will be used to store the vector p = W^(T)d.
1467 
1468 	 c is a double precision working array of dimension 2m.
1469 	 c will be used to store the vector c = W^(T)(xcp-x).
1470 
1471 	 wbp is a double precision working array of dimension 2m.
1472 	 wbp will be used to store the row of W corresponding
1473 	 to a breakpoint.
1474 
1475 	 v is a double precision working array of dimension 2m.
1476 
1477 	 nint is an integer variable.
1478 	 On exit nint records the number of quadratic segments explored
1479 	 in searching for the GCP.
1480 
1481 	 iprint is an INTEGER variable that must be set by the user.
1482 	 It controls the frequency and type of output generated:
1483 	 iprint<0    no output is generated;
1484 	 iprint=0    print only one line at the last iteration;
1485 	 0<iprint<99 print also f and |proj g| every iprint iterations;
1486 	 iprint=99   print details of every iteration except n-vectors;
1487 	 iprint=100  print also the changes of active set and final x;
1488 	 iprint>100  print details of every iteration including x and g;
1489 	 When iprint > 0, the file iterate.dat will be created to
1490 	 summarize the iteration.
1491 
1492 	 sbgnrm is a double precision variable.
1493 	 On entry sbgnrm is the norm of the projected gradient at x.
1494 	 On exit sbgnrm is unchanged.
1495 
1496 	 info is an integer variable.
1497 	 On entry info is 0.
1498 	 On exit info = 0    for normal return,
1499 	 = nonzero for abnormal return when the the system
1500 	 used in routine bmv is singular.
1501 
1502 	 Subprograms called:
1503 
1504 	 L-BFGS-B Library ... hpsolb, bmv.
1505 
1506 	 Linpack ... dscal dcopy, daxpy.
1507 
1508 
1509 	 References:
1510 
1511 	 [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
1512 	 memory algorithm for bound constrained optimization'',
1513 	 SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
1514 
1515 	 [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
1516 	 Subroutines for Large Scale Bound Constrained Optimization''
1517 	 Tech. Report, NAM-11, EECS Department, Northwestern University, 1994.
1518 
1519 	 (Postscript files of these papers are available via anonymous
1520 	 ftp to ece.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
1521 
1522 	 *    *  *
1523 
1524 	 NEOS, November 1994. (Latest revision April 1997.)
1525 	 Optimization Technology Center.
1526 	 Argonne National Laboratory and Northwestern University.
1527 	 Written by
1528 	 Ciyou Zhu
1529 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
1530 
1531 	 ************
1532 	 */
1533 
1534 	/* System generated locals */
1535 	int wy_dim1, wy_offset, ws_dim1, ws_offset, sy_dim1, sy_offset,
1536 	wt_dim1, wt_offset, i__2;
1537 	double d__1;
1538 
1539 	/* Local variables */
1540 	double bkmin, dibp, dibp2, zibp, neggi, tsum;
1541 	double f1, f2, f2_org__, dt, tj, tj0, tl= 0.0, tu=0.0, dtm, wmc, wmp, wmw;
1542 
1543 	int i, j, ibp, iter, bnded, nfree, nleft, nbreak, ibkmin, pointr;
1544 	int xlower, xupper, col2;
1545 
1546 	/* Parameter adjustments */
1547 	--xcp;
1548 	--d;
1549 	--t;
1550 	--iwhere;
1551 	--iorder;
1552 	--g;
1553 	--nbd;
1554 	--u;
1555 	--l;
1556 	--x;
1557 	--v;
1558 	--wbp;
1559 	--c;
1560 	--p;
1561 	wt_dim1 = m;    wt_offset = 1 + wt_dim1 * 1;    wt -= wt_offset;
1562 	sy_dim1 = m;    sy_offset = 1 + sy_dim1 * 1;    sy -= sy_offset;
1563 	ws_dim1 = n;    ws_offset = 1 + ws_dim1 * 1;    ws -= ws_offset;
1564 	wy_dim1 = n;    wy_offset = 1 + wy_dim1 * 1;    wy -= wy_offset;
1565 
1566 	/* Function Body */
1567 
1568 	/*     Check the status of the variables, reset iwhere(i) if necessary;
1569 	 *     compute the Cauchy direction d and the breakpoints t; initialize
1570 	 *     the derivative f1 and the vector p = W'd (for theta = 1).
1571 	 */
1572 
1573 	if (*sbgnrm <= 0.) {
1574 		if (iprint >= 0) cout << "Subgnorm = 0.  GCP = X.\n";
1575 		dcopy(&n, &x[1], &c__1, &xcp[1], &c__1);
1576 		return;
1577 	}
1578 	bnded = 1;
1579 	nfree = n + 1;
1580 	nbreak = 0;
1581 	ibkmin = 0;
1582 	bkmin = 0.;
1583 	col2 = *col << 1;
1584 	f1 = 0.;
1585 	if (iprint >= 99)
1586 		cout << "\n---------------- CAUCHY entered-------------------\n\n";
1587 
1588 	/*     We set p to zero and build it up as we determine d. */
1589 	for (i = 1; i <= col2; ++i)
1590 		p[i] = 0.;
1591 
1592 	/*     In the following loop we determine for each variable its bound */
1593 	/*      status and its breakpoint, and update p accordingly. */
1594 	/*      Smallest breakpoint is identified. */
1595 
1596 	for (i = 1; i <= n; ++i) {
1597 		neggi = -g[i];
1598 		if (iwhere[i] != 3 && iwhere[i] != -1) {
1599 			/*           if x(i) is not a constant and has bounds, */
1600 			/*           compute the difference between x(i) and its bounds. */
1601 			if (nbd[i] <= 2) {
1602 				tl = x[i] - l[i];
1603 			}
1604 			if (nbd[i] >= 2) {
1605 				tu = u[i] - x[i];
1606 			}
1607 			/*         If a variable is close enough to a bound */
1608 			/*           we treat it as at bound. */
1609 			xlower = nbd[i] <= 2 && tl <= 0.;
1610 			xupper = nbd[i] >= 2 && tu <= 0.;
1611 			/*        reset iwhere(i). */
1612 			iwhere[i] = 0;
1613 			if (xlower) {
1614 				if (neggi <= 0.) {
1615 					iwhere[i] = 1;
1616 				}
1617 			} else if (xupper) {
1618 				if (neggi >= 0.) {
1619 					iwhere[i] = 2;
1620 				}
1621 			} else {
1622 				if (fabs(neggi) <= 0.) {
1623 					iwhere[i] = -3;
1624 				}
1625 			}
1626 		}
1627 		pointr = *head;
1628 		if (iwhere[i] != 0 && iwhere[i] != -1) {
1629 			d[i] = 0.;
1630 		} else {
1631 			d[i] = neggi;
1632 			f1 -= neggi * neggi;
1633 			/*           calculate p := p - W'e_i* (g_i). */
1634 			i__2 = *col;
1635 			for (j = 1; j <= i__2; ++j) {
1636 				p[j] += wy[i + pointr * wy_dim1] * neggi;
1637 				p[*col + j] += ws[i + pointr * ws_dim1] * neggi;
1638 				pointr = pointr % m + 1;
1639 			}
1640 			if (nbd[i] <= 2 && nbd[i] != 0 && neggi < 0.) {
1641 				/*                   x(i) + d(i) is bounded; compute t(i). */
1642 				++nbreak;
1643 				iorder[nbreak] = i;
1644 				t[nbreak] = tl / (-neggi);
1645 				if (nbreak == 1 || t[nbreak] < bkmin) {
1646 					bkmin = t[nbreak];
1647 					ibkmin = nbreak;
1648 				}
1649 			} else if (nbd[i] >= 2 && neggi > 0.) {
1650 				/*                   x(i) + d(i) is bounded; compute t(i). */
1651 				++nbreak;
1652 				iorder[nbreak] = i;
1653 				t[nbreak] = tu / neggi;
1654 				if (nbreak == 1 || t[nbreak] < bkmin) {
1655 					bkmin = t[nbreak];
1656 					ibkmin = nbreak;
1657 				}
1658 			} else {/*          x(i) + d(i) is not bounded. */
1659 				--nfree;
1660 				iorder[nfree] = i;
1661 				if (fabs(neggi) > 0.)
1662 					bnded = 0;
1663 			}
1664 		}
1665 		/* L50: */
1666 	} /* for(i = 1:n) */
1667 
1668 	/*     The indices of the nonzero components of d are now stored */
1669 	/*     in iorder(1),...,iorder(nbreak) and iorder(nfree),...,iorder(n). */
1670 	/*     The smallest of the nbreak breakpoints is in t(ibkmin)=bkmin. */
1671 	if (*theta != 1.) {
1672 		/*             complete the initialization of p for theta not= one. */
1673 		dscal(col, theta, &p[*col + 1], &c__1);
1674 	}
1675 	/*     Initialize GCP xcp = x. */
1676 	dcopy(&n, &x[1], &c__1, &xcp[1], &c__1);
1677 	if (nbreak == 0 && nfree == n + 1) {
1678 		/*            is a zero vector, return with the initial xcp as GCP. */
1679 		if (iprint > 100) {
1680 			cout << "Cauchy X =  ";
1681 			for(i = 1; i <= n; i++) cout << xcp[i] << " ";
1682 			cout << "\n";
1683 		}
1684 		return;
1685 	}
1686 	/*     Initialize c = W'(xcp - x) = 0. */
1687 	for (j = 1; j <= col2; ++j)
1688 		c[j] = 0.;
1689 
1690 	/*     Initialize derivative f2. */
1691 	f2 = -(*theta) * f1;
1692 	f2_org__ = f2;
1693 	if (*col > 0) {
1694 		bmv(m, &sy[sy_offset], &wt[wt_offset], col, &p[1], &v[1], info);
1695 		if (*info != 0) {
1696 			return;
1697 		}
1698 		f2 -= ddot(&col2, &v[1], &c__1, &p[1], &c__1);
1699 	}
1700 	dtm = -f1 / f2;
1701 	tsum = 0.;
1702 	*nint = 1;
1703 	if (iprint >= 99) cout << "There are " << nbreak << " breakpoints\n";
1704 
1705 	/*     If there are no breakpoints, locate the GCP and return. */
1706 	if (nbreak == 0) {
1707 		goto L888;
1708 	}
1709 	nleft = nbreak;
1710 	iter = 1;
1711 	tj = 0.;
1712 	/* ------------------- the beginning of the loop ------------------------- */
1713 	L777:
1714 	/*     Find the next smallest breakpoint; */
1715 	/*     compute dt = t(nleft) - t(nleft + 1). */
1716 	tj0 = tj;
1717 	if (iter == 1) {
1718 		/*       Since we already have the smallest breakpoint we need not do */
1719 		/*       heapsort yet. Often only one breakpoint is used and the */
1720 		/*       cost of heapsort is avoided. */
1721 		tj = bkmin;
1722 		ibp = iorder[ibkmin];
1723 	} else {
1724 		if (iter == 2) {
1725 			/* Replace the already used smallest breakpoint with the */
1726 			/* breakpoint numbered nbreak > nlast, before heapsort call. */
1727 			if (ibkmin != nbreak) {
1728 				t[ibkmin] = t[nbreak];
1729 				iorder[ibkmin] = iorder[nbreak];
1730 			}
1731 		}
1732 		/* Update heap structure of breakpoints */
1733 		/* (if iter=2, initialize heap). */
1734 		hpsolb(nleft, &t[1], &iorder[1], iter - 2);
1735 		tj = t[nleft];
1736 		ibp = iorder[nleft];
1737 	}
1738 	dt = tj - tj0;
1739 
1740 	if (dt != 0 && iprint >=  100) {
1741 		cout << "\nPiece    " << *nint << " f1, f2 at start point " << f1 << " " << f2 << "\n",
1742 				cout << "Distance to the next break point =  " << dt << "\n";
1743 		cout << "Distance to the stationary point =  " << dtm << "\n";
1744 	}
1745 
1746 	/*     If a minimizer is within this interval, */
1747 	/*     locate the GCP and return. */
1748 	if (dtm < dt) {
1749 		goto L888;
1750 	}
1751 	/*     Otherwise fix one variable and */
1752 	/*     reset the corresponding component of d to zero. */
1753 	tsum += dt;
1754 	--nleft;
1755 	++iter;
1756 	dibp = d[ibp];
1757 	d[ibp] = 0.;
1758 	if (dibp > 0.) {
1759 		zibp = u[ibp] - x[ibp];
1760 		xcp[ibp] = u[ibp];
1761 		iwhere[ibp] = 2;
1762 	} else {
1763 		zibp = l[ibp] - x[ibp];
1764 		xcp[ibp] = l[ibp];
1765 		iwhere[ibp] = 1;
1766 	}
1767 	if (iprint >= 100) cout << "Variable  " << ibp << "  is fixed.\n";
1768 	if (nleft == 0 && nbreak == n) {
1769 		/*                           all n variables are fixed, */
1770 		/*                          return with xcp as GCP. */
1771 		dtm = dt;
1772 		goto L999;
1773 	}
1774 	/*     Update the derivative information. */
1775 	++(*nint);
1776 	dibp2 = dibp * dibp;
1777 	/*     Update f1 and f2. */
1778 	/*      temporarily set f1 and f2 for col=0. */
1779 	f1 += dt * f2 + dibp2 - *theta * dibp * zibp;
1780 	f2 -= *theta * dibp2;
1781 	if (*col > 0) {
1782 		/*                update c = c + dt*p. */
1783 		daxpy(&col2, &dt, &p[1], &c__1, &c[1], &c__1);
1784 		/*         choose wbp, */
1785 		/*         the row of W corresponding to the breakpoint encountered. */
1786 		pointr = *head;
1787 		for (j = 1; j <= *col; ++j) {
1788 			wbp[j] = wy[ibp + pointr * wy_dim1];
1789 			wbp[*col + j] = *theta * ws[ibp + pointr * ws_dim1];
1790 			pointr = pointr % m + 1;
1791 		}
1792 		/*         compute (wbp)Mc, (wbp)Mp, and (wbp)M(wbp)'. */
1793 		bmv(m, &sy[sy_offset], &wt[wt_offset], col, &wbp[1], &v[1], info);
1794 		if (*info != 0) {
1795 			return;
1796 		}
1797 		wmc = ddot(&col2,  &c[1], &c__1, &v[1], &c__1);
1798 		wmp = ddot(&col2,  &p[1], &c__1, &v[1], &c__1);
1799 		wmw = ddot(&col2,&wbp[1], &c__1, &v[1], &c__1);
1800 		/*         update p = p - dibp*wbp. */
1801 		d__1 = -dibp;
1802 		daxpy(&col2, &d__1, &wbp[1], &c__1, &p[1], &c__1);
1803 		/*         complete updating f1 and f2 while col > 0. */
1804 		f1 += dibp * wmc;
1805 		f2 += (2. * dibp * wmp - dibp2 * wmw);
1806 	}
1807 	if(f2 < (d__1 = *epsmch * f2_org__)) f2 = d__1;
1808 	if (nleft > 0) {
1809 		dtm = -f1 / f2;
1810 		goto L777;
1811 		/*           to repeat the loop for unsearched intervals. */
1812 	} else if (bnded) {
1813 		f1 = 0.;
1814 		f2 = 0.;
1815 		dtm = 0.;
1816 	} else {
1817 		dtm = -f1 / f2;
1818 	}
1819 	/* ------------------- the end of the loop ------------------------------- */
1820 	L888:
1821 	if (iprint >= 99) {
1822 		cout << "\nGCP found in this segment\n";
1823 		cout << "Piece    " << *nint << " f1, f2 at start point " << f1 << " " << f2 << "\n";
1824 		cout << "Distance to the stationary point =  " << dtm << "\n";
1825 	}
1826 
1827 	if (dtm <= 0.) {
1828 		dtm = 0.;
1829 	}
1830 	tsum += dtm;
1831 	/*     Move free variables (i.e., the ones w/o breakpoints) and */
1832 	/*     the variables whose breakpoints haven't been reached. */
1833 	daxpy(&n, &tsum, &d[1], &c__1, &xcp[1], &c__1);
1834 	L999:
1835 	/*     Update c = c + dtm*p = W'(x^c - x) */
1836 	/*     which will be used in computing r = Z'(B(x^c - x) + g). */
1837 	if (*col > 0) {
1838 		daxpy(&col2, &dtm, &p[1], &c__1, &c[1], &c__1);
1839 	}
1840 	if (iprint >= 100) {
1841 		cout << "Cauchy X =  ";
1842 		for(i = 1; i <= n; i++) cout << xcp[i] << " ";
1843 		cout << "\n";
1844 	}
1845 
1846 	if (iprint >= 99)
1847 		cout << "\n---------------- exit CAUCHY----------------------\n\n";
1848 	return;
1849 } /* cauchy */
1850 /* ====================== The end of cauchy ============================== */
1851 
freev(int n,int * nfree,int * indx,int * nenter,int * ileave,int * indx2,int * iwhere,int * wrk,int * updatd,int * cnstnd,int iprint,int * iter)1852 void freev(int n, int *nfree, int *indx,
1853 		int *nenter, int *ileave, int *indx2, int *iwhere,
1854 		int *wrk, int *updatd, int *cnstnd, int iprint,
1855 		int *iter)
1856 {
1857 	/*    ************
1858 
1859 	 Subroutine freev
1860 
1861 	 This subroutine counts the entering and leaving variables when
1862 	 iter > 0, and finds the index set of free and active variables
1863 	 at the GCP.
1864 
1865 	 cnstnd is a int variable indicating whether bounds are present
1866 
1867 	 indx is an int array of dimension n
1868 	 for i=1,...,nfree, indx(i) are the indices of free variables
1869 	 for i=nfree+1,...,n, indx(i) are the indices of bound variables
1870 	 On entry after the first iteration, indx gives
1871 	 the free variables at the previous iteration.
1872 	 On exit it gives the free variables based on the determination
1873 	 in cauchy using the array iwhere.
1874 
1875 	 indx2 is an int array of dimension n
1876 	 On entry indx2 is unspecified.
1877 	 On exit with iter>0, indx2 indicates which variables
1878 	 have changed status since the previous iteration.
1879 	 For i= 1,...,nenter, indx2(i) have changed from bound to free.
1880 	 For i= ileave+1,...,n, indx2(i) have changed from free to bound.
1881 
1882 
1883 	 *     *  *
1884 
1885 	 NEOS, November 1994. (Latest revision June 1996.)
1886 	 Optimization Technology Center.
1887 	 Argonne National Laboratory and Northwestern University.
1888 	 Written by
1889 	 Ciyou Zhu
1890 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
1891 
1892 	 ************
1893 	 */
1894 
1895 	/* System generated locals */
1896 	int i__1;
1897 
1898 	/* Local variables */
1899 	int iact, i, k;
1900 
1901 	/* Parameter adjustments */
1902 	--iwhere;
1903 	--indx2;
1904 	--indx;
1905 
1906 	/* Function Body */
1907 	*nenter = 0;
1908 	*ileave = n + 1;
1909 	if (*iter > 0 && *cnstnd) {/* count the entering and leaving variables. */
1910 		i__1 = *nfree;
1911 		for (i = 1; i <= i__1; ++i) {
1912 			k = indx[i];
1913 			if (iwhere[k] > 0) {
1914 				--(*ileave);
1915 				indx2[*ileave] = k;
1916 				if (iprint >= 100)
1917 					cout << "Variable " << k << " leaves the set of free variables\n";
1918 			}
1919 			/* L20: */
1920 		}
1921 		for (i = *nfree + 1; i <= n; ++i) {
1922 			k = indx[i];
1923 			if (iwhere[k] <= 0) {
1924 				++(*nenter);
1925 				indx2[*nenter] = k;
1926 				if (iprint >= 100)
1927 					cout << "Variable " << k << " enters the set of free variables\n";
1928 			}
1929 			/* L22: */
1930 			if (iprint >= 100)
1931 				cout << n + 1 - *ileave << " variables leave; " << *nenter << " variables enter\n";
1932 		}
1933 	}
1934 	*wrk = *ileave < n + 1 || *nenter > 0 || *updatd;
1935 	/*     Find the index set of free and active variables at the GCP. */
1936 	*nfree = 0;
1937 	iact = n + 1;
1938 	for (i = 1; i <= n; ++i) {
1939 		if (iwhere[i] <= 0) {
1940 			++(*nfree);
1941 			indx[*nfree] = i;
1942 		} else {
1943 			--iact;
1944 			indx[iact] = i;
1945 		}
1946 	}
1947 	if (iprint >= 99)
1948 		cout << *nfree << "  variables are free at GCP on iteration " << *iter + 1 << endl;
1949 	return;
1950 } /* freev */
1951 /* ======================= The end of freev ============================== */
1952 
formk(int n,int * nsub,int * ind,int * nenter,int * ileave,int * indx2,int * iupdat,int * updatd,double * wn,double * wn1,int m,double * ws,double * wy,double * sy,double * theta,int * col,int * head,int * info)1953 void formk(int n, int *nsub, int *ind, int * nenter, int *ileave,
1954 		int *indx2, int *iupdat, int * updatd, double *wn,
1955 		double *wn1, int m, double *ws, double *wy, double *sy,
1956 		double *theta, int *col, int *head, int *info)
1957 {
1958 	/*     ************
1959 
1960 	 Subroutine formk
1961 
1962 	 This subroutine forms  the LEL^T factorization of the indefinite
1963 
1964 	 matrix     K = [-D -Y'ZZ'Y/theta       L_a'-R_z'  ]
1965 	 [L_a -R_z         theta*S'AA'S ]
1966 	 where E = [-I  0]
1967 	 [ 0  I]
1968 	 The matrix K can be shown to be equal to the matrix M^[-1]N
1969 	 occurring in section 5.1 of [1], as well as to the matrix
1970 	 Mbar^[-1] Nbar in section 5.3.
1971 
1972 	 n is an integer variable.
1973 	 On entry n is the dimension of the problem.
1974 	 On exit n is unchanged.
1975 
1976 	 nsub is an integer variable
1977 	 On entry nsub is the number of subspace variables in free set.
1978 	 On exit nsub is not changed.
1979 
1980 	 ind is an integer array of dimension nsub.
1981 	 On entry ind specifies the indices of subspace variables.
1982 	 On exit ind is unchanged.
1983 
1984 	 nenter is an integer variable.
1985 	 On entry nenter is the number of variables entering the
1986 	 free set.
1987 	 On exit nenter is unchanged.
1988 
1989 	 ileave is an integer variable.
1990 	 On entry indx2(ileave),...,indx2(n) are the variables leaving
1991 	 the free set.
1992 	 On exit ileave is unchanged.
1993 
1994 	 indx2 is an integer array of dimension n.
1995 	 On entry indx2(1),...,indx2(nenter) are the variables entering
1996 	 the free set, while indx2(ileave),...,indx2(n) are the
1997 	 variables leaving the free set.
1998 	 On exit indx2 is unchanged.
1999 	 p
2000 	 iupdat is an integer variable.
2001 	 On entry iupdat is the total number of BFGS updates made so far.
2002 	 On exit iupdat is unchanged.
2003 
2004 	 updatd is a logical variable.
2005 	 On entry 'updatd' is true if the L-BFGS matrix is updatd.
2006 	 On exit 'updatd' is unchanged.
2007 
2008 	 wn is a double precision array of dimension 2m x 2m.
2009 	 On entry wn is unspecified.
2010 	 On exit the upper triangle of wn stores the LEL^T factorization
2011 	 of the 2*col x 2*col indefinite matrix
2012 	 [-D -Y'ZZ'Y/theta       L_a'-R_z'  ]
2013 	 [L_a -R_z         theta*S'AA'S ]
2014 
2015 	 wn1 is a double precision array of dimension 2m x 2m.
2016 	 On entry wn1 stores the lower triangular part of
2017 	 [Y' ZZ'Y    L_a'+R_z']
2018 	 [L_a+R_z    S'AA'S     ]
2019 	 in the previous iteration.
2020 	 On exit wn1 stores the corresponding updated matrices.
2021 	 The purpose of wn1 is just to store these inner products
2022 	 so they can be easily updated and inserted into wn.
2023 
2024 	 m is an integer variable.
2025 	 On entry m is the maximum number of variable metric corrections
2026 	 used to define the limited memory matrix.
2027 	 On exit m is unchanged.
2028 
2029 	 ws, wy, sy, and wtyy are double precision arrays;
2030 	 theta is a double precision variable;
2031 	 col is an integer variable;
2032 	 head is an integer variable.
2033 	 On entry they store the information defining the
2034 	 limited memory BFGS matrix:
2035 	 ws(n,m) stores S, a set of s-vectors;
2036 	 wy(n,m) stores Y, a set of y-vectors;
2037 	 sy(m,m) stores S'Y;
2038 	 wtyy(m,m) stores the Cholesky factorization
2039 	 of (theta*S'S+LD^(-1)L')
2040 	 theta is the scaling factor specifying B_0 = theta I;
2041 	 col is the number of variable metric corrections stored;
2042 	 head is the location of the 1st s- (or y-) vector in S (or Y).
2043 	 On exit they are unchanged.
2044 
2045 	 info is an integer variable.
2046 	 On entry info is unspecified.
2047 	 On exit info =  0 for normal return;
2048 	 = -1 when the 1st Cholesky factorization failed;
2049 	 = -2 when the 2st Cholesky factorization failed.
2050 
2051 	 Subprograms called:
2052 
2053 	 Linpack ... dcopy, dpofa, dtrsl.
2054 
2055 
2056 	 References:
2057 	 [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
2058 	 memory algorithm for bound constrained optimization'',
2059 	 SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
2060 
2061 	 [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
2062 	 limited memory FORTRAN code for solving bound constrained
2063 	 optimization problems'', Tech. Report, NAM-11, EECS Department,
2064 	 Northwestern University, 1994.
2065 
2066 	 (Postscript files of these papers are available via anonymous
2067 	 ftp to ece.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
2068 
2069 	 *  *     *
2070 
2071 	 NEOS, November 1994. (Latest revision April 1997.)
2072 	 Optimization Technology Center.
2073 	 Argonne National Laboratory and Northwestern University.
2074 	 Written by
2075 	 Ciyou Zhu
2076 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2077 
2078 	 ************
2079 	 */
2080 
2081 	/* System generated locals */
2082 	int wn_dim1, wn_offset, wn1_dim1, wn1_offset, ws_dim1, ws_offset,
2083 	wy_dim1, wy_offset, sy_dim1, sy_offset, i__1, i__2;
2084 
2085 	/* Local variables */
2086 	int dend, pend;
2087 	int upcl;
2088 	double temp1, temp2, temp3, temp4;
2089 	int i, k;
2090 	int ipntr, jpntr, k1, m2, dbegin, is, js, iy, jy, pbegin, is1, js1,
2091 	col2;
2092 
2093 	/* Parameter adjustments */
2094 	--indx2;
2095 	--ind;
2096 	sy_dim1 = m;
2097 	sy_offset = 1 + sy_dim1 * 1;
2098 	sy -= sy_offset;
2099 	wy_dim1 = n;
2100 	wy_offset = 1 + wy_dim1 * 1;
2101 	wy -= wy_offset;
2102 	ws_dim1 = n;
2103 	ws_offset = 1 + ws_dim1 * 1;
2104 	ws -= ws_offset;
2105 	wn1_dim1 = 2 * m;
2106 	wn1_offset = 1 + wn1_dim1 * 1;
2107 	wn1 -= wn1_offset;
2108 	wn_dim1 = 2 * m;
2109 	wn_offset = 1 + wn_dim1 * 1;
2110 	wn -= wn_offset;
2111 
2112 	/* Function Body */
2113 
2114 	/*     Form the lower triangular part of */
2115 	/*         WN1 = [Y' ZZ'Y      L_a'+R_z'] */
2116 	/*               [L_a+R_z      S'AA'S   ] */
2117 	/*      where L_a is the strictly lower triangular part of S'AA'Y */
2118 	/*        R_z is the upper triangular part of S'ZZ'Y. */
2119 
2120 	if (*updatd) {
2121 		if (*iupdat > m) {/*        shift old part of WN1. */
2122 			i__1 = m - 1;
2123 			for (jy = 1; jy <= i__1; ++jy) {
2124 				js = m + jy;
2125 				i__2 = m - jy;
2126 				dcopy(&i__2, &wn1[jy + 1 + (jy + 1)* wn1_dim1], &c__1,
2127 						&wn1[jy + jy * wn1_dim1], &c__1);
2128 				dcopy(&i__2, &wn1[js + 1 + (js + 1)* wn1_dim1], &c__1,
2129 						&wn1[js + js * wn1_dim1], &c__1);
2130 				i__2 = m - 1;
2131 				dcopy(&i__2, &wn1[m + 2 + (jy + 1) * wn1_dim1], &c__1,
2132 						&wn1[m + 1 + jy * wn1_dim1], &c__1);
2133 				/* L10: */
2134 			}
2135 		}
2136 		/*        put new rows in blocks (1,1), (2,1) and (2,2). */
2137 		pbegin = 1;
2138 		pend = *nsub;
2139 		dbegin = *nsub + 1;
2140 		dend = n;
2141 		iy = *col;
2142 		is = m + *col;
2143 		ipntr = *head + *col - 1;
2144 		if (ipntr > m) {
2145 			ipntr -= m;
2146 		}
2147 		jpntr = *head;
2148 		i__1 = *col;
2149 		for (jy = 1; jy <= i__1; ++jy) {
2150 			js = m + jy;
2151 			temp1 = 0.;
2152 			temp2 = 0.;
2153 			temp3 = 0.;
2154 			/*           compute element jy of row 'col' of Y'ZZ'Y */
2155 			for (k = pbegin; k <= pend; ++k) {
2156 				k1 = ind[k];
2157 				temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
2158 			}
2159 			/*           compute elements jy of row 'col' of L_a and S'AA'S */
2160 			for (k = dbegin; k <= dend; ++k) {
2161 				k1 = ind[k];
2162 				temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
2163 				temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
2164 			}
2165 			wn1[iy + jy * wn1_dim1] = temp1;
2166 			wn1[is + js * wn1_dim1] = temp2;
2167 			wn1[is + jy * wn1_dim1] = temp3;
2168 			jpntr = jpntr % m + 1;
2169 			/* L20: */
2170 		}
2171 		/*        put new column in block (2,1). */
2172 		jy = *col;
2173 		jpntr = *head + *col - 1;
2174 		if (jpntr > m) {
2175 			jpntr -= m;
2176 		}
2177 		ipntr = *head;
2178 		i__1 = *col;
2179 		for (i = 1; i <= i__1; ++i) {
2180 			is = m + i;
2181 			temp3 = 0.;
2182 			/*           compute element i of column 'col' of R_z */
2183 			for (k = pbegin; k <= pend; ++k) {
2184 				k1 = ind[k];
2185 				temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
2186 			}
2187 			ipntr = ipntr % m + 1;
2188 			wn1[is + jy * wn1_dim1] = temp3;
2189 			/* L30: */
2190 		}
2191 		upcl = *col - 1;
2192 	} else {
2193 		upcl = *col;
2194 	}
2195 	/*     modify the old parts in blocks (1,1) and (2,2) due to changes */
2196 	/*     in the set of free variables. */
2197 	ipntr = *head;
2198 	for (iy = 1; iy <= upcl; ++iy) {
2199 		is = m + iy;
2200 		jpntr = *head;
2201 		for (jy = 1; jy <= iy; ++jy) {
2202 			js = m + jy;
2203 			temp1 = 0.;
2204 			temp2 = 0.;
2205 			temp3 = 0.;
2206 			temp4 = 0.;
2207 			for (k = 1; k <= *nenter; ++k) {
2208 				k1 = indx2[k];
2209 				temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
2210 				temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
2211 			}
2212 			for (k = *ileave; k <= n; ++k) {
2213 				k1 = indx2[k];
2214 				temp3 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
2215 				temp4 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
2216 			}
2217 			wn1[iy + jy * wn1_dim1] = wn1[iy + jy * wn1_dim1] + temp1 - temp3;
2218 			wn1[is + js * wn1_dim1] = wn1[is + js * wn1_dim1] - temp2 + temp4;
2219 			jpntr = jpntr % m + 1;
2220 			/* L40: */
2221 		}
2222 		ipntr = ipntr % m + 1;
2223 		/* L45: */
2224 	}
2225 	/*     modify the old parts in block (2,1). */
2226 	ipntr = *head;
2227 	for (is = m + 1; is <= m + upcl; ++is) {
2228 		jpntr = *head;
2229 		for (jy = 1; jy <= upcl; ++jy) {
2230 			temp1 = 0.;
2231 			temp3 = 0.;
2232 			for (k = 1; k <= *nenter; ++k) {
2233 				k1 = indx2[k];
2234 				temp1 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
2235 			}
2236 			for (k = *ileave; k <= n; ++k) {
2237 				k1 = indx2[k];
2238 				temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
2239 			}
2240 			if (is <= jy + m) {
2241 				wn1[is + jy * wn1_dim1] +=  temp1 - temp3;
2242 			} else {
2243 				wn1[is + jy * wn1_dim1] += -temp1 + temp3;
2244 			}
2245 			jpntr = jpntr % m + 1;
2246 			/* L55: */
2247 		}
2248 		ipntr = ipntr % m + 1;
2249 		/* L60: */
2250 	}
2251 	/*     Form the upper triangle of WN = [D+Y' ZZ'Y/theta      -L_a'+R_z' ] */
2252 	/*                       [-L_a +R_z     S'AA'S*theta] */
2253 	m2 = m << 1;
2254 	i__1 = *col;
2255 	for (iy = 1; iy <= i__1; ++iy) {
2256 		is = *col + iy;
2257 		is1 = m + iy;
2258 		i__2 = iy;
2259 		for (jy = 1; jy <= i__2; ++jy) {
2260 			js = *col + jy;
2261 			js1 = m + jy;
2262 			wn[jy + iy * wn_dim1] = wn1[iy + jy * wn1_dim1] / *theta;
2263 			wn[js + is * wn_dim1] = wn1[is1 + js1 * wn1_dim1] * *theta;
2264 			/* L65: */
2265 		}
2266 		i__2 = iy - 1;
2267 		for (jy = 1; jy <= i__2; ++jy) {
2268 			wn[jy + is * wn_dim1] = -wn1[is1 + jy * wn1_dim1];
2269 		}
2270 		i__2 = *col;
2271 		for (jy = iy; jy <= i__2; ++jy) {
2272 			wn[jy + is * wn_dim1] = wn1[is1 + jy * wn1_dim1];
2273 		}
2274 		wn[iy + iy * wn_dim1] += sy[iy + iy * sy_dim1];
2275 		/* L70: */
2276 	}
2277 	/*     Form the upper triangle of */
2278 	/*        WN= [  LL'          L^-1(-L_a'+R_z')] */
2279 	/*        [(-L_a +R_z)L'^-1   S'AA'S*theta  ] */
2280 	/*      first Cholesky factor (1,1) block of wn to get LL' */
2281 	/*                with L' stored in the upper triangle of wn. */
2282 	dpofa(&wn[wn_offset], &m2, col, info);
2283 	if (*info != 0) {
2284 		*info = -1;
2285 		return;
2286 	}
2287 	/*      then form L^-1(-L_a'+R_z') in the (1,2) block. */
2288 	col2 = *col << 1;
2289 	for (js = *col + 1; js <= col2; ++js) {
2290 		dtrsl(&wn[wn_offset], &m2, col,
2291 				&wn[js * wn_dim1 + 1], &c__11, info);
2292 	}
2293 	/*     Form S'AA'S*theta + (L^-1(-L_a'+R_z'))'L^-1(-L_a'+R_z') in the */
2294 	/*      upper triangle of (2,2) block of wn. */
2295 	for (is = *col + 1; is <= col2; ++is) {
2296 		for (js = is; js <= col2; ++js) {
2297 			wn[is + js * wn_dim1] +=
2298 					ddot(col, &wn[is * wn_dim1 + 1], &c__1,
2299 							&wn[js * wn_dim1 + 1], &c__1);
2300 		}
2301 		/* L72: */
2302 	}
2303 	/*     Cholesky factorization of (2,2) block of wn. */
2304 	dpofa(&wn[*col + 1 + (*col + 1) * wn_dim1], &m2, col, info);
2305 	if (*info != 0) {
2306 		*info = -2;
2307 		return;
2308 	}
2309 	return;
2310 } /* formk */
2311 /* ======================= The end of formk ============================== */
2312 
cmprlb(int n,int m,double * x,double * g,double * ws,double * wy,double * sy,double * wt,double * z,double * r,double * wa,int * indx,double * theta,int * col,int * head,int * nfree,int * cnstnd,int * info)2313 void cmprlb(int n, int m, double *x,
2314 		double *g, double *ws, double *wy, double *sy,
2315 		double *wt, double *z, double *r, double *wa,
2316 		int *indx, double *theta, int *col, int *head,
2317 		int *nfree, int *cnstnd, int *info)
2318 {
2319 	/*    ************
2320 
2321 	 Subroutine cmprlb
2322 
2323 	 This subroutine computes r=-Z'B(xcp-xk)-Z'g by using
2324 	 wa(2m+1)=W'(xcp-x) from subroutine cauchy.
2325 
2326 	 Subprograms called:
2327 
2328 	 L-BFGS-B Library ... bmv.
2329 
2330 
2331 	 *     *  *
2332 
2333 	 NEOS, November 1994. (Latest revision June 1996.)
2334 	 Optimization Technology Center.
2335 	 Argonne National Laboratory and Northwestern University.
2336 	 Written by
2337 	 Ciyou Zhu
2338 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2339 
2340 	 ************
2341 	 */
2342 
2343 	/* System generated locals */
2344 	int ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset,
2345 	wt_dim1, wt_offset, Col, n_f;
2346 
2347 	/* Local variables */
2348 	int i, j, k;
2349 	double a1, a2;
2350 	int pointr;
2351 
2352 	/* Parameter adjustments */
2353 	--indx;
2354 	--r;
2355 	--z;
2356 	--g;
2357 	--x;
2358 	--wa;
2359 	wt_dim1 = m;
2360 	wt_offset = 1 + wt_dim1 * 1;
2361 	wt -= wt_offset;
2362 	sy_dim1 = m;
2363 	sy_offset = 1 + sy_dim1 * 1;
2364 	sy -= sy_offset;
2365 	wy_dim1 = n;
2366 	wy_offset = 1 + wy_dim1 * 1;
2367 	wy -= wy_offset;
2368 	ws_dim1 = n;
2369 	ws_offset = 1 + ws_dim1 * 1;
2370 	ws -= ws_offset;
2371 
2372 	/* Function Body */
2373 	Col = *col;
2374 	if (! (*cnstnd) && Col > 0) {
2375 		for (i = 1; i <= n; ++i)
2376 			r[i] = -g[i];
2377 	}
2378 	else {
2379 		n_f = *nfree;
2380 		for (i = 1; i <= n_f; ++i) {
2381 			k = indx[i];
2382 			r[i] = -(*theta) * (z[k] - x[k]) - g[k];
2383 		}
2384 		bmv(m, &sy[sy_offset], &wt[wt_offset], col,
2385 				&wa[(m << 1) + 1], &wa[1], info);
2386 		if (*info != 0) {
2387 			*info = -8;
2388 			return;
2389 		}
2390 		pointr = *head;
2391 		for (j = 1; j <= Col; ++j) {
2392 			a1 = wa[j];
2393 			a2 = *theta * wa[Col + j];
2394 			for (i = 1; i <= n_f; ++i) {
2395 				k = indx[i];
2396 				r[i] += wy[k + pointr * wy_dim1] * a1 +
2397 						ws[k + pointr * ws_dim1] * a2;
2398 			}
2399 			pointr = pointr % m + 1;
2400 		}
2401 	}
2402 	return;
2403 } /* cmprlb */
2404 /* ======================= The end of cmprlb ============================= */
2405 
2406 
subsm(int n,int m,int * nsub,int * ind,double * l,double * u,int * nbd,double * x,double * d,double * ws,double * wy,double * theta,int * col,int * head,int * iword,double * wv,double * wn,int iprint,int * info)2407 void subsm(int n, int m, int *nsub, int *ind,
2408 		double *l, double *u, int *nbd, double *x,
2409 		double *d, double *ws, double *wy, double *theta,
2410 		int *col, int *head, int *iword, double *wv,
2411 		double *wn, int iprint, int *info)
2412 {
2413 	/*    ************
2414 
2415 	 Subroutine subsm
2416 
2417 	 Given xcp, l, u, r, an index set that specifies
2418 	 the active set at xcp, and an l-BFGS matrix B
2419 	 (in terms of WY, WS, SY, WT, head, col, and theta),
2420 	 this subroutine computes an approximate solution
2421 	 of the subspace problem
2422 
2423 	 (P)   min Q(x) = r'(x-xcp) + 1/2 (x-xcp)' B (x-xcp)
2424 
2425 	 subject to l <= x <= u
2426 	 x_i = xcp_i   for all i in A(xcp)
2427 
2428 	 along the subspace unconstrained Newton direction
2429 
2430 	 d = -(Z'BZ)^(-1) r.
2431 
2432 	 The formula for the Newton direction, given the L-BFGS matrix
2433 	 and the Sherman-Morrison formula, is
2434 
2435 	 d = (1/theta)r + (1/theta*2) Z'WK^(-1)W'Z r.
2436 
2437 	 where
2438 	 K = [-D -Y'ZZ'Y/theta        L_a'-R_z'  ]
2439 	 [L_a -R_z          theta*S'AA'S ]
2440 
2441 	 Note that this procedure for computing d differs
2442 	 from that described in [1]. One can show that the matrix K is
2443 	 equal to the matrix M^[-1]N in that paper.
2444 
2445 	 n is an integer variable.
2446 	 On entry n is the dimension of the problem.
2447 	 On exit n is unchanged.
2448 
2449 	 m is an integer variable.
2450 	 On entry m is the maximum number of variable metric corrections
2451 	 used to define the limited memory matrix.
2452 	 On exit m is unchanged.
2453 
2454 	 nsub is an integer variable.
2455 	 On entry nsub is the number of free variables.
2456 	 On exit nsub is unchanged.
2457 
2458 	 ind is an integer array of dimension nsub.
2459 	 On entry ind specifies the coordinate indices of free variables.
2460 	 On exit ind is unchanged.
2461 
2462 	 l is a double precision array of dimension n.
2463 	 On entry l is the lower bound of x.
2464 	 On exit l is unchanged.
2465 
2466 	 u is a double precision array of dimension n.
2467 	 On entry u is the upper bound of x.
2468 	 On exit u is unchanged.
2469 
2470 	 nbd is a integer array of dimension n.
2471 	 On entry nbd represents the type of bounds imposed on the
2472 	 variables, and must be specified as follows:
2473 	 nbd(i)=0 if x(i) is unbounded,
2474 	 1 if x(i) has only a lower bound,
2475 	 2 if x(i) has both lower and upper bounds, and
2476 	 3 if x(i) has only an upper bound.
2477 	 On exit nbd is unchanged.
2478 
2479 	 x is a double precision array of dimension n.
2480 	 On entry x specifies the Cauchy point xcp.
2481 	 On exit x(i) is the minimizer of Q over the subspace of
2482 	 free variables.
2483 
2484 	 d is a double precision array of dimension n.
2485 	 On entry d is the reduced gradient of Q at xcp.
2486 	 On exit d is the Newton direction of Q.
2487 
2488 	 ws and wy are double precision arrays;
2489 	 theta is a double precision variable;
2490 	 col is an integer variable;
2491 	 head is an integer variable.
2492 	 On entry they store the information defining the
2493 	 limited memory BFGS matrix:
2494 	 ws(n,m) stores S, a set of s-vectors;
2495 	 wy(n,m) stores Y, a set of y-vectors;
2496 	 theta is the scaling factor specifying B_0 = theta I;
2497 	 col is the number of variable metric corrections stored;
2498 	 head is the location of the 1st s- (or y-) vector in S (or Y).
2499 	 On exit they are unchanged.
2500 
2501 	 iword is an integer variable.
2502 	 On entry iword is unspecified.
2503 	 On exit iword specifies the status of the subspace solution.
2504 	 iword = 0 if the solution is in the box,
2505 	 1 if some bound is encountered.
2506 
2507 	 wv is a double precision working array of dimension 2m.
2508 
2509 	 wn is a double precision array of dimension 2m x 2m.
2510 	 On entry the upper triangle of wn stores the LEL^T factorization
2511 	 of the indefinite matrix
2512 
2513 	 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
2514 	 [L_a -R_z           theta*S'AA'S ]
2515 	 where E = [-I  0]
2516 	 [ 0  I]
2517 	 On exit wn is unchanged.
2518 
2519 	 iprint is an INTEGER variable that must be set by the user.
2520 	 It controls the frequency and type of output generated:
2521 	 iprint<0    no output is generated;
2522 	 iprint=0    print only one line at the last iteration;
2523 	 0<iprint<99 print also f and |proj g| every iprint iterations;
2524 	 iprint=99   print details of every iteration except n-vectors;
2525 	 iprint=100  print also the changes of active set and final x;
2526 	 iprint>100  print details of every iteration including x and g;
2527 	 When iprint > 0, the file iterate.dat will be created to
2528 	 summarize the iteration.
2529 
2530 	 info is an integer variable.
2531 	 On entry info is unspecified.
2532 	 On exit info = 0       for normal return,
2533 	 = nonzero for abnormal return
2534 	 when the matrix K is ill-conditioned.
2535 
2536 	 Subprograms called:
2537 
2538 	 Linpack dtrsl.
2539 
2540 
2541 	 References:
2542 
2543 	 [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
2544 	 memory algorithm for bound constrained optimization'',
2545 	 SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
2546 
2547 
2548 
2549 	 *  *  *
2550 
2551 	 NEOS, November 1994. (Latest revision June 1996.)
2552 	 Optimization Technology Center.
2553 	 Argonne National Laboratory and Northwestern University.
2554 	 Written by
2555 	 Ciyou Zhu
2556 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2557 
2558 	 ************
2559 	 */
2560 
2561 	/* System generated locals */
2562 	int ws_offset, wn_dim1, wn_offset;
2563 
2564 	/* Local variables */
2565 	double alpha, dk, temp1, temp2;
2566 	int i, j, k, m2, js, jy, pointr, ibd = 0, col2, ns;
2567 
2568 	/* Parameter adjustments */
2569 	--d;
2570 	--u;
2571 	--l;
2572 	--x;
2573 	--ind;
2574 	--nbd;
2575 	--wv;
2576 	wn_dim1 = 2 * m;
2577 	wn_offset = 1 + wn_dim1 * 1;
2578 	wn -= wn_offset;
2579 	/* ws[] and wy[] are both  [n x m ] :*/
2580 	ws_offset = 1 + n * 1;
2581 	ws -= ws_offset;
2582 	wy -= ws_offset;
2583 
2584 	ns = *nsub;
2585 	if (ns <= 0)
2586 		return;
2587 
2588 	/*     Compute wv = W'Zd. */
2589 	pointr = *head;
2590 	for (i = 1; i <= *col; ++i) {
2591 		temp1 = 0.;
2592 		temp2 = 0.;
2593 		for (j = 1; j <= ns; ++j) {
2594 			k = ind[j];
2595 			temp1 += wy[k + pointr * n] * d[j];
2596 			temp2 += ws[k + pointr * n] * d[j];
2597 		}
2598 		wv[i] = temp1;
2599 		wv[*col + i] = *theta * temp2;
2600 		pointr = pointr % m + 1;
2601 		/* L20: */
2602 	}
2603 	/*     Compute wv:=K^(-1)wv. */
2604 	m2 = m << 1;
2605 	col2 = *col << 1;
2606 	dtrsl(&wn[wn_offset], &m2, &col2, &wv[1], &c__11, info);
2607 	if (*info != 0) {
2608 		return;
2609 	}
2610 	for (i = 1; i <= *col; ++i)
2611 		wv[i] = -wv[i];
2612 
2613 	dtrsl(&wn[wn_offset], &m2, &col2, &wv[1], &c__1, info);
2614 	if (*info != 0) {
2615 		return;
2616 	}
2617 	/*     Compute d = (1/theta)d + (1/theta**2)Z'W wv. */
2618 	pointr = *head;
2619 	for (jy = 1; jy <= *col; ++jy) {
2620 		js = *col + jy;
2621 		for (i = 1; i <= ns; ++i) {
2622 			k = ind[i];
2623 			d[i] += (wy[k + pointr * n] * wv[jy] / *theta +
2624 					ws[k + pointr * n] * wv[js]);
2625 		}
2626 		pointr = pointr % m + 1;
2627 		/* L40: */
2628 	}
2629 
2630 	for (i = 1; i <= ns; ++i)
2631 		d[i] /= *theta;
2632 
2633 	/*     Backtrack to the feasible region. */
2634 	alpha = 1.;
2635 	temp1 = alpha;
2636 	for (i = 1; i <= ns; ++i) {
2637 		k = ind[i];
2638 		dk = d[i];
2639 		if (nbd[k] != 0) {
2640 			if (dk < 0. && nbd[k] <= 2) {
2641 				temp2 = l[k] - x[k];
2642 				if (temp2 >= 0.) {
2643 					temp1 = 0.;
2644 				} else if (dk * alpha < temp2) {
2645 					temp1 = temp2 / dk;
2646 				}
2647 			} else if (dk > 0. && nbd[k] >= 2) {
2648 				temp2 = u[k] - x[k];
2649 				if (temp2 <= 0.) {
2650 					temp1 = 0.;
2651 				} else if (dk * alpha > temp2) {
2652 					temp1 = temp2 / dk;
2653 				}
2654 			}
2655 			if (temp1 < alpha) {
2656 				alpha = temp1;
2657 				ibd = i;
2658 			}
2659 		}
2660 		/* L60: */
2661 	}
2662 	if (alpha < 1.) {
2663 		dk = d[ibd];
2664 		k = ind[ibd];
2665 		if (dk > 0.) {
2666 			x[k] = u[k];
2667 			d[ibd] = 0.;
2668 		} else if (dk < 0.) {
2669 			x[k] = l[k];
2670 			d[ibd] = 0.;
2671 		}
2672 	}
2673 	for (i = 1; i <= ns; ++i)
2674 		x[ind[i]] += alpha * d[i];
2675 
2676 	*iword = (alpha < 1.) ? 1 : 0;
2677 
2678 	return;
2679 } /* subsm */
2680 /* ====================== The end of subsm =============================== */
2681 
lnsrlb(int n,double * l,double * u,int * nbd,double * x,double * f,double * fold,double * gd,double * gdold,double * g,double * d,double * r,double * t,double * z,double * stp,double * dnorm,double * dtd,double * xstep,double * stpmx,int * iter,int * ifun,int * iback,int * nfgv,int * info,char * task,int * boxed,int * cnstnd,char * csave,int * isave,double * dsave)2682 void lnsrlb(int n, double *l, double *u,
2683 		int *nbd, double *x, double *f, double *fold,
2684 		double *gd, double *gdold, double *g, double *d,
2685 		double *r, double *t, double *z, double *stp,
2686 		double *dnorm, double *dtd, double *xstep,
2687 		double *stpmx, int *iter, int *ifun, int *iback, int *nfgv,
2688 		int *info, char *task, int *boxed, int *cnstnd,
2689 		char *csave, int *isave, double *dsave)
2690 {
2691 	/*     **********
2692 
2693 	 Subroutine lnsrlb
2694 
2695 	 This subroutine calls subroutine dcsrch from the Minpack2 library
2696 	 to perform the line search.  Subroutine dscrch is safeguarded so
2697 	 that all trial points lie within the feasible region.
2698 
2699 	 Subprograms called:
2700 
2701 	 Minpack2 Library ... dcsrch.
2702 
2703 	 Linpack ... dtrsl, ddot.
2704 
2705 
2706 	 *     *  *
2707 
2708 	 NEOS, November 1994. (Latest revision June 1996.)
2709 	 Optimization Technology Center.
2710 	 Argonne National Laboratory and Northwestern University.
2711 	 Written by
2712 	 Ciyou Zhu
2713 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2714 
2715 	 **********
2716 	 */
2717 
2718 	/* For dcsrch(): */
2719 	const double stpmin = 0.;
2720 	const double ftol = .001;
2721 	const double gtol = .9;
2722 	const double xtol = .1;
2723 
2724 	/* System generated locals */
2725 	double d1;
2726 
2727 	/* Local variables */
2728 	int i;
2729 	double a1, a2;
2730 
2731 	/* Parameter adjustments */
2732 	--z;
2733 	--t;
2734 	--r;
2735 	--d;
2736 	--g;
2737 	--x;
2738 	--nbd;
2739 	--u;
2740 	--l;
2741 
2742 	/* Function Body */
2743 	if (strncmp(task, "FG_LN", 5) == 0) {
2744 		goto L556;
2745 	}
2746 	*dtd = ddot(&n, &d[1], &c__1, &d[1], &c__1);
2747 	*dnorm = sqrt(*dtd);
2748 	/*     Determine the maximum step length. */
2749 	*stpmx = 1e10;
2750 	if (*cnstnd) {
2751 		if (*iter == 0) {
2752 			*stpmx = 1.;
2753 		} else {
2754 			for (i = 1; i <= n; ++i) {
2755 				a1 = d[i];
2756 				if (nbd[i] != 0) {
2757 					if (a1 < 0. && nbd[i] <= 2) {
2758 						a2 = l[i] - x[i];
2759 						if (a2 >= 0.) {
2760 							*stpmx = 0.;
2761 						} else if (a1 * *stpmx < a2) {
2762 							*stpmx = a2 / a1;
2763 						}
2764 					} else if (a1 > 0. && nbd[i] >= 2) {
2765 						a2 = u[i] - x[i];
2766 						if (a2 <= 0.) {
2767 							*stpmx = 0.;
2768 						} else if (a1 * *stpmx > a2) {
2769 							*stpmx = a2 / a1;
2770 						}
2771 					}
2772 				}
2773 				/* L43: */
2774 			}
2775 		}
2776 	}
2777 	if (*iter == 0 && ! (*boxed)) {
2778 		d1 = 1. / *dnorm;
2779 		*stp = min(d1,*stpmx);
2780 	} else {
2781 		*stp = 1.;
2782 	}
2783 	dcopy(&n, &x[1], &c__1, &t[1], &c__1);
2784 	dcopy(&n, &g[1], &c__1, &r[1], &c__1);
2785 	*fold = *f;
2786 	*ifun = 0;
2787 	*iback = 0;
2788 	strcpy(csave, "START");
2789 	L556:
2790 	*gd = ddot(&n, &g[1], &c__1, &d[1], &c__1);
2791 	if (*ifun == 0) {
2792 		*gdold = *gd;
2793 		if (*gd >= 0.) {
2794 			/*                 the directional derivative >=0. */
2795 			/*                 Line search is impossible. */
2796 			*info = -4;
2797 			return;
2798 		}
2799 	}
2800 	dcsrch(f, gd, stp,
2801 			ftol, gtol, xtol,
2802 			stpmin, *stpmx,
2803 			csave, isave, dsave);
2804 	*xstep = *stp * *dnorm;
2805 	if (strncmp(csave, "CONV", 4) != 0 && strncmp(csave, "WARN", 4) != 0) {
2806 		strcpy(task, "FG_LNSRCH");
2807 		++(*ifun);
2808 		++(*nfgv);
2809 		*iback = *ifun - 1;
2810 		if (*stp == 1.) {
2811 			dcopy(&n, &z[1], &c__1, &x[1], &c__1);
2812 		} else {
2813 			for (i = 1; i <= n; ++i) {
2814 				x[i] = *stp * d[i] + t[i];
2815 			}
2816 		}
2817 	} else {
2818 		strcpy(task, "NEW_X");
2819 	}
2820 	return;
2821 } /* lnsrlb */
2822 /* ======================= The end of lnsrlb ============================= */
2823 
matupd(int n,int m,double * ws,double * wy,double * sy,double * ss,double * d,double * r,int * itail,int * iupdat,int * col,int * head,double * theta,double * rr,double * dr,double * stp,double * dtd)2824 void matupd(int n, int m, double *ws,
2825 		double *wy, double *sy, double *ss, double *d,
2826 		double *r, int *itail, int *iupdat, int *col,
2827 		int *head, double *theta, double *rr, double *dr,
2828 		double *stp, double *dtd)
2829 {
2830 	/*    ************
2831 
2832 	 Subroutine matupd
2833 
2834 	 This subroutine updates matrices WS and WY, and forms the
2835 	 middle matrix in B.
2836 
2837 	 Subprograms called:
2838 
2839 	 Linpack ... dcopy, ddot.
2840 
2841 
2842 	 *     *  *
2843 
2844 	 NEOS, November 1994. (Latest revision June 1996.)
2845 	 Optimization Technology Center.
2846 	 Argonne National Laboratory and Northwestern University.
2847 	 Written by
2848 	 Ciyou Zhu
2849 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2850 
2851 	 ************
2852 	 */
2853 
2854 	/* System generated locals */
2855 	int ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset,
2856 	ss_dim1, ss_offset, i__1, i__2;
2857 
2858 	/* Local variables */
2859 	int j;
2860 	int pointr;
2861 
2862 	/* Parameter adjustments */
2863 	--r;
2864 	--d;
2865 	ss_dim1 = m;
2866 	ss_offset = 1 + ss_dim1 * 1;
2867 	ss -= ss_offset;
2868 	sy_dim1 = m;
2869 	sy_offset = 1 + sy_dim1 * 1;
2870 	sy -= sy_offset;
2871 	wy_dim1 = n;
2872 	wy_offset = 1 + wy_dim1 * 1;
2873 	wy -= wy_offset;
2874 	ws_dim1 = n;
2875 	ws_offset = 1 + ws_dim1 * 1;
2876 	ws -= ws_offset;
2877 
2878 	/* Function Body */
2879 
2880 	/*     Set pointers for matrices WS and WY. */
2881 	if (*iupdat <= m) {
2882 		*col = *iupdat;
2883 		*itail = (*head + *iupdat - 2) % m + 1;
2884 	} else {
2885 		*itail = *itail % m + 1;
2886 		*head = *head % m + 1;
2887 	}
2888 	/*     Update matrices WS and WY. */
2889 	dcopy(&n, &d[1], &c__1, &ws[*itail * ws_dim1 + 1], &c__1);
2890 	dcopy(&n, &r[1], &c__1, &wy[*itail * wy_dim1 + 1], &c__1);
2891 	/*     Set theta=yy/ys. */
2892 	*theta = *rr / *dr;
2893 	/*     Form the middle matrix in B. */
2894 	/*      update the upper triangle of SS, */
2895 	/*                       and the lower triangle of SY: */
2896 	if (*iupdat > m) {
2897 		/*                move old information */
2898 		i__1 = *col - 1;
2899 		for (j = 1; j <= i__1; ++j) {
2900 			dcopy(&j, &ss[(j + 1) * ss_dim1 + 2], &c__1,
2901 					&ss[j * ss_dim1 + 1], &c__1);
2902 			i__2 = *col - j;
2903 			dcopy(&i__2, &sy[j + 1 + (j + 1) * sy_dim1], &c__1,
2904 					&sy[j + j * sy_dim1], &c__1);
2905 			/* L50: */
2906 		}
2907 	}
2908 	/*      add new information: the last row of SY */
2909 	/*                           and the last column of SS: */
2910 	pointr = *head;
2911 	i__1 = *col - 1;
2912 	for (j = 1; j <= i__1; ++j) {
2913 		sy[*col + j * sy_dim1] =
2914 				ddot(&n, &d[1], &c__1, &wy[pointr * wy_dim1 + 1], &c__1);
2915 		ss[j + *col * ss_dim1] =
2916 				ddot(&n, &ws[pointr * ws_dim1 + 1], &c__1, &d[1], &c__1);
2917 		pointr = pointr % m + 1;
2918 		/* L51: */
2919 	}
2920 	if (*stp == 1.) {
2921 		ss[*col + *col * ss_dim1] = *dtd;
2922 	} else {
2923 		ss[*col + *col * ss_dim1] = *stp * *stp * *dtd;
2924 	}
2925 	sy[*col + *col * sy_dim1] = *dr;
2926 	return;
2927 } /* matupd */
2928 /* ======================= The end of matupd ============================= */
2929 
formt(int m,double * wt,double * sy,double * ss,int * col,double * theta,int * info)2930 void formt(int m, double *wt, double *sy, double *ss,
2931 		int *col, double *theta, int *info)
2932 {
2933 	/*     ************
2934 
2935 	 Subroutine formt
2936 
2937 	 This subroutine forms the upper half of the pos. def. and symm.
2938 	 T = theta*SS + L*D^(-1)*L', stores T in the upper triangle
2939 	 of the array wt, and performs the Cholesky factorization of T
2940 	 to produce J*J', with J' stored in the upper triangle of wt.
2941 
2942 	 Subprograms called:
2943 
2944 	 Linpack ... dpofa.
2945 
2946 
2947 	 *  *     *
2948 
2949 	 NEOS, November 1994. (Latest revision June 1996.)
2950 	 Optimization Technology Center.
2951 	 Argonne National Laboratory and Northwestern University.
2952 	 Written by
2953 	 Ciyou Zhu
2954 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
2955 
2956 	 ************
2957 	 */
2958 
2959 	/* System generated locals */
2960 	int wt_dim1, wt_offset, sy_dim1, sy_offset, ss_dim1, ss_offset, i__1;
2961 
2962 	/* Local variables */
2963 	double ddum;
2964 	int i, j, k;
2965 	int k1;
2966 
2967 	/* Parameter adjustments */
2968 	ss_dim1 = m;
2969 	ss_offset = 1 + ss_dim1 * 1;
2970 	ss -= ss_offset;
2971 	sy_dim1 = m;
2972 	sy_offset = 1 + sy_dim1 * 1;
2973 	sy -= sy_offset;
2974 	wt_dim1 = m;
2975 	wt_offset = 1 + wt_dim1 * 1;
2976 	wt -= wt_offset;
2977 
2978 	/* Function Body */
2979 
2980 	/*     Form the upper half of  T = theta*SS + L*D^(-1)*L', */
2981 	/*      store T in the upper triangle of the array wt. */
2982 	i__1 = *col;
2983 	for (j = 1; j <= i__1; ++j) {
2984 		wt[j * wt_dim1 + 1] = *theta * ss[j * ss_dim1 + 1];
2985 	}
2986 	for (i = 2; i <= i__1; ++i) {
2987 		for (j = i; j <= i__1; ++j) {
2988 			k1 = min(i,j) - 1;
2989 			ddum = 0.;
2990 			for (k = 1; k <= k1; ++k) {
2991 				ddum += sy[i + k * sy_dim1] * sy[j + k * sy_dim1] / sy[k +
2992 																	   k * sy_dim1];
2993 			}
2994 			wt[i + j * wt_dim1] = ddum + *theta * ss[i + j * ss_dim1];
2995 		}
2996 		/* L55: */
2997 	}
2998 	/*     Cholesky factorize T to J*J' with */
2999 	/*      J' stored in the upper triangle of wt. */
3000 	dpofa(&wt[wt_offset], &m, col, info);
3001 	if (*info != 0) {
3002 		*info = -3;
3003 	}
3004 	return;
3005 } /* formt */
3006 
3007 /* ======================= The end of formt ============================== */
3008 
bmv(int m,double * sy,double * wt,int * col,double * v,double * p,int * info)3009 void bmv(int m, double *sy, double *wt,
3010 		int *col, double *v, double *p, int *info)
3011 {
3012 	/*     ************
3013 
3014 	 *     Subroutine bmv
3015 
3016 	 *     This subroutine computes the product of the 2m x 2m middle matrix
3017 	 *     in the compact L-BFGS formula of B and a 2m vector v;
3018 	 *     it returns the product in p.
3019 
3020 	 *     m is an integer variable.
3021 	 *     On entry m is the maximum number of variable metric corrections
3022 	 *       used to define the limited memory matrix.
3023 	 *     On exit m is unchanged.
3024 
3025 	 *     sy is a double precision array of dimension m x m.
3026 	 *     On entry sy specifies the matrix S'Y.
3027 	 *     On exit sy is unchanged.
3028 
3029 	 *     wt is a double precision array of dimension m x m.
3030 	 *     On entry wt specifies the upper triangular matrix J' which is
3031 	 *       the Cholesky factor of (thetaS'S+LD^(-1)L').
3032 	 *     On exit wt is unchanged.
3033 
3034 	 *     col is an integer variable.
3035 	 *     On entry col specifies the number of s-vectors (or y-vectors)
3036 	 *       stored in the compact L-BFGS formula.
3037 	 *     On exit col is unchanged.
3038 
3039 	 *     v is a double precision array of dimension 2col.
3040 	 *     On entry v specifies vector v.
3041 	 *     On exit v is unchanged.
3042 
3043 	 *     p is a double precision array of dimension 2col.
3044 	 *     On entry p is unspecified.
3045 	 *     On exit p is the product Mv.
3046 
3047 	 *     info is an integer variable.
3048 	 *     On entry info is unspecified.
3049 	 *     On exit info = 0    for normal return,
3050 	 *              = nonzero for abnormal return when the system
3051 	 *                  to be solved by dtrsl is singular.
3052 
3053 	 *     Subprograms called:
3054 
3055 	 *     Linpack ... dtrsl.
3056 
3057 
3058 	 *                 *    *  *
3059 
3060 	 *     NEOS, November 1994. (Latest revision June 1996.)
3061 	 *     Optimization Technology Center.
3062 	 *     Argonne National Laboratory and Northwestern University.
3063 	 *     Written by
3064 	 *              Ciyou Zhu
3065 	 *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
3066 
3067 	 *     ************
3068 	 */
3069 
3070 	/* System generated locals */
3071 	int sy_dim1, sy_offset, wt_dim1, wt_offset, Col;
3072 
3073 
3074 	/* Local variables */
3075 	int i, k;
3076 	int i2;
3077 	double sum;
3078 
3079 	/* Parameter adjustments */
3080 	wt_dim1 = m;
3081 	wt_offset = 1 + wt_dim1 * 1;
3082 	wt -= wt_offset;
3083 	sy_dim1 = m;
3084 	sy_offset = 1 + sy_dim1 * 1;
3085 	sy -= sy_offset;
3086 	--p;
3087 	--v;
3088 
3089 	/* Function Body */
3090 	if (*col == 0) {
3091 		return;
3092 	}
3093 	/*    PART I: solve [     D^(1/2)      O ] [ p1 ] = [ v1 ]
3094 	 *              [ -L*D^(-1/2)   J ] [ p2 ]   [ v2 ].
3095 	 *    solve Jp2=v2+LD^(-1)v1.
3096 	 */
3097 	Col = *col;
3098 	p[*col + 1] = v[*col + 1];
3099 	for (i = 2; i <= Col; ++i) {
3100 		i2 = *col + i;
3101 		sum = 0.;
3102 		for (k = 1; k <= i - 1; ++k) {
3103 			sum += sy[i + k * sy_dim1] * v[k] / sy[k + k * sy_dim1];
3104 		}
3105 		p[i2] = v[i2] + sum;
3106 		/* L20: */
3107 	}
3108 	/*     Solve the triangular system */
3109 	dtrsl(&wt[wt_offset], &m, col, &p[*col + 1], &c__11, info);
3110 	if (*info != 0) {
3111 		return;
3112 	}
3113 	/*     solve D^(1/2)p1=v1. */
3114 	for (i = 1; i <= Col; ++i) {
3115 		p[i] = v[i] / sqrt(sy[i + i * sy_dim1]);
3116 	}
3117 
3118 	/*    PART II: solve [ -D^(1/2)   D^(-1/2)*L'     ] [ p1 ] = [ p1 ]
3119 	 *               [  0        J'         ] [ p2 ]   [ p2 ].
3120 	 *    solve J^Tp2=p2.
3121 	 */
3122 	dtrsl(&wt[wt_offset], &m, col, &p[*col + 1], &c__1, info);
3123 	if (*info != 0) {
3124 		return;
3125 	}
3126 	/*     compute p1=-D^(-1/2)(p1-D^(-1/2)L'p2) */
3127 	/*           =-D^(-1/2)p1 + D^(-1)L'p2. */
3128 	for (i = 1; i <= Col; ++i) {
3129 		p[i] = -p[i] / sqrt(sy[i + i * sy_dim1]);
3130 	}
3131 	for (i = 1; i <= Col; ++i) {
3132 		sum = 0.;
3133 		for (k = i + 1; k <= Col; ++k) {
3134 			sum += sy[k + i * sy_dim1] * p[*col + k] / sy[i + i * sy_dim1];
3135 		}
3136 		p[i] += sum;
3137 		/* L60: */
3138 	}
3139 	return;
3140 } /* bmv */
3141 /* ======================== The end of bmv =============================== */
3142 
hpsolb(int n,double * t,int * iorder,int iheap)3143 void hpsolb(int n, double *t, int *iorder, int iheap)
3144 {
3145 	/*    ************
3146 
3147 	 Subroutine hpsolb
3148 
3149 	 This subroutine sorts out the least element of t, and puts the
3150 	 remaining elements of t in a heap.
3151 
3152 	 n is an int variable.
3153 	 On entry n is the dimension of the arrays t and iorder.
3154 	 On exit n is unchanged.
3155 
3156 	 t is a double precision array of dimension n.
3157 	 On entry t stores the elements to be sorted,
3158 	 On exit t(n) stores the least elements of t, and t(1) to t(n-1)
3159 	 stores the remaining elements in the form of a heap.
3160 
3161 	 iorder is an int array of dimension n.
3162 	 On entry iorder(i) is the index of t(i).
3163 	 On exit iorder(i) is still the index of t(i), but iorder may be
3164 	 permuted in accordance with t.
3165 
3166 	 iheap is an int variable specifying the task.
3167 	 On entry iheap should be set as follows:
3168 	 iheap .eq. 0 if t(1) to t(n) is not in the form of a heap,
3169 	 iheap .ne. 0 if otherwise.
3170 	 On exit iheap is unchanged.
3171 
3172 
3173 	 References:
3174 	 Algorithm 232 of CACM (J. W. J. Williams): HEAPSORT.
3175 
3176 	 *     *  *
3177 
3178 	 NEOS, November 1994. (Latest revision June 1996.)
3179 	 Optimization Technology Center.
3180 	 Argonne National Laboratory and Northwestern University.
3181 	 Written by
3182 	 Ciyou Zhu
3183 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
3184 
3185 	 ************
3186 	 */
3187 
3188 	/* Local variables */
3189 	double ddum;
3190 	int i, j, k, indxin, indxou;
3191 	double out;
3192 
3193 	/* Parameter adjustments */
3194 	--iorder;
3195 	--t;
3196 
3197 	/* Function Body */
3198 	if (iheap == 0) {
3199 		/*      Rearrange the elements t(1) to t(n) to form a heap. */
3200 		for (k = 2; k <= n; ++k) {
3201 			ddum = t[k];
3202 			indxin = iorder[k];
3203 			/*         Add ddum to the heap. */
3204 			i = k;
3205 			h_loop:
3206 			if (i > 1) {
3207 				j = i / 2;
3208 				if (ddum < t[j]) {
3209 					t[i] = t[j];
3210 					iorder[i] = iorder[j];
3211 					i = j;
3212 					goto h_loop;
3213 				}
3214 			}
3215 			t[i] = ddum;
3216 			iorder[i] = indxin;
3217 			/* L20: */
3218 		}
3219 	}
3220 	/*     Assign to 'out' the value of t(1), the least member of the heap, */
3221 	/*      and rearrange the remaining members to form a heap as */
3222 	/*      elements 1 to n-1 of t. */
3223 	if (n > 1) {
3224 		i = 1;
3225 		out = t[1];
3226 		indxou = iorder[1];
3227 		ddum = t[n];
3228 		indxin = iorder[n];
3229 		/*      Restore the heap */
3230 		Loop:
3231 		j = i + i;
3232 		if (j <= n - 1) {
3233 			if (t[j + 1] < t[j]) {
3234 				++j;
3235 			}
3236 			if (t[j] < ddum) {
3237 				t[i] = t[j];
3238 				iorder[i] = iorder[j];
3239 				i = j;
3240 				goto Loop;
3241 			}
3242 		}
3243 		t[i] = ddum;
3244 		iorder[i] = indxin;
3245 		/*     Put the least member in t(n). */
3246 		t[n] = out;
3247 		iorder[n] = indxou;
3248 	}
3249 	return;
3250 } /* hpsolb */
3251 /* ====================== The end of hpsolb ============================== */
3252 
dcsrch(double * f,double * g,double * stp,double ftol,double gtol,double xtol,double stpmin,double stpmax,char * task,int * isave,double * dsave)3253 void dcsrch(double *f, double *g, double *stp,
3254 		/*Chgd: the next five are no longer pointers:*/
3255 		double ftol, double gtol, double xtol,
3256 		double stpmin, double stpmax,
3257 		char *task, int *isave, double *dsave)
3258 {
3259 	/*    **********
3260 
3261 	 Subroutine dcsrch
3262 
3263 	 This subroutine finds a step that satisfies a sufficient
3264 	 decrease condition and a curvature condition.
3265 
3266 	 Each call of the subroutine updates an interval with
3267 	 endpoints stx and sty. The interval is initially chosen
3268 	 so that it contains a minimizer of the modified function
3269 
3270 	 psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
3271 
3272 	 If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
3273 	 interval is chosen so that it contains a minimizer of f.
3274 
3275 	 The algorithm is designed to find a step that satisfies
3276 	 the sufficient decrease condition
3277 
3278 	 f(stp) <= f(0) + ftol*stp*f'(0),
3279 
3280 	 and the curvature condition
3281 
3282 	 abs(f'(stp)) <= gtol*abs(f'(0)).
3283 
3284 	 If ftol is less than gtol and if, for example, the function
3285 	 is bounded below, then there is always a step which satisfies
3286 	 both conditions.
3287 
3288 	 If no step can be found that satisfies both conditions, then
3289 	 the algorithm stops with a warning. In this case stp only
3290 	 satisfies the sufficient decrease condition.
3291 
3292 	 A typical invocation of dcsrch has the following outline:
3293 
3294 	 task = 'START'
3295 	 10 continue
3296 	 call dcsrch( ... )
3297 	 if (task .eq. 'FG') then
3298 	 Evaluate the function and the gradient at stp
3299 	 goto 10
3300 	 end if
3301 
3302 	 NOTE: The user must no alter work arrays between calls.
3303 
3304 	 The subroutine statement is
3305 
3306 	 subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
3307 	 task,isave,dsave)
3308 	 where
3309 
3310 	 f is a double precision variable.
3311 	 On initial entry f is the value of the function at 0.
3312 	 On subsequent entries f is the value of the
3313 	 function at stp.
3314 	 On exit f is the value of the function at stp.
3315 
3316 	 g is a double precision variable.
3317 	 On initial entry g is the derivative of the function at 0.
3318 	 On subsequent entries g is the derivative of the
3319 	 function at stp.
3320 	 On exit g is the derivative of the function at stp.
3321 
3322 	 stp is a double precision variable.
3323 	 On entry stp is the current estimate of a satisfactory
3324 	 step. On initial entry, a positive initial estimate
3325 	 must be provided.
3326 	 On exit stp is the current estimate of a satisfactory step
3327 	 if task = 'FG'. If task = 'CONV' then stp satisfies
3328 	 the sufficient decrease and curvature condition.
3329 
3330 	 ftol is a double precision variable.
3331 	 On entry ftol specifies a nonnegative tolerance for the
3332 	 sufficient decrease condition.
3333 	 On exit ftol is unchanged.
3334 
3335 	 gtol is a double precision variable.
3336 	 On entry gtol specifies a nonnegative tolerance for the
3337 	 curvature condition.
3338 	 On exit gtol is unchanged.
3339 
3340 	 xtol is a double precision variable.
3341 	 On entry xtol specifies a nonnegative relative tolerance
3342 	 for an acceptable step. The subroutine exits with a
3343 	 warning if the relative difference between sty and stx
3344 	 is less than xtol.
3345 	 On exit xtol is unchanged.
3346 
3347 	 stpmin is a double precision variable.
3348 	 On entry stpmin is a nonnegative lower bound for the step.
3349 	 On exit stpmin is unchanged.
3350 
3351 	 stpmax is a double precision variable.
3352 	 On entry stpmax is a nonnegative upper bound for the step.
3353 	 On exit stpmax is unchanged.
3354 
3355 	 task is a character variable of length at least 60.
3356 	 On initial entry task must be set to 'START'.
3357 	 On exit task indicates the required action:
3358 
3359 	 If task(1:2) = 'FG' then evaluate the function and
3360 	 derivative at stp and call dcsrch again.
3361 
3362 	 If task(1:4) = 'CONV' then the search is successful.
3363 
3364 	 If task(1:4) = 'WARN' then the subroutine is not able
3365 	 to satisfy the convergence conditions. The exit value of
3366 	 stp contains the best point found during the search.
3367 
3368 	 If task(1:5) = 'ERROR' then there is an error in the
3369 	 input arguments.
3370 
3371 	 On exit with convergence, a warning or an error, the
3372 	 variable task contains additional information.
3373 
3374 	 isave is an integer work array of dimension 2.
3375 
3376 	 dsave is a double precision work array of dimension 13.
3377 
3378 	 Subprograms called
3379 
3380 	 MINPACK-2 ... dcstep
3381 
3382 
3383 	 MINPACK-1 Project. June 1983.
3384 	 Argonne National Laboratory.
3385 	 Jorge J. More' and David J. Thuente.
3386 
3387 	 MINPACK-2 Project. October 1993.
3388 	 Argonne National Laboratory and University of Minnesota.
3389 	 Brett M. Averick, Richard G. Carter, and Jorge J. More'.
3390 
3391 	 **********
3392 	 */
3393 
3394 	/* Local variables */
3395 	int stage;
3396 	double finit, ginit, width, ftest, gtest, stmin, stmax, width1, fm,
3397 	gm, fx, fy, gx, gy;
3398 	int brackt;
3399 	double fxm, fym, gxm, gym, stx, sty;
3400 
3401 	/* Parameter adjustments */
3402 	--dsave;
3403 	--isave;
3404 
3405 	/* Function Body */
3406 
3407 	/*     Initialization block. */
3408 	if (strncmp(task, "START", 5) == 0) {
3409 		/*      Check the input arguments for errors. */
3410 		if (*stp < stpmin)    strcpy(task, "ERROR: STP .LT. STPMIN");
3411 		if (*stp > stpmax)    strcpy(task, "ERROR: STP .GT. STPMAX");
3412 		if (*g >= 0.)        strcpy(task, "ERROR: INITIAL G .GE. ZERO");
3413 		if (ftol < 0.)        strcpy(task, "ERROR: FTOL .LT. ZERO");
3414 		if (gtol < 0.)        strcpy(task, "ERROR: GTOL .LT. ZERO");
3415 		if (xtol < 0.)        strcpy(task, "ERROR: XTOL .LT. ZERO");
3416 		if (stpmin < 0.)    strcpy(task, "ERROR: STPMIN .LT. ZERO");
3417 		if (stpmax < stpmin)    strcpy(task, "ERROR: STPMAX .LT. STPMIN");
3418 
3419 		/*      Exit if there are errors on input. */
3420 		if (strncmp(task, "ERROR", 5) == 0) {
3421 			return;
3422 		}
3423 		/*      Initialize local variables. */
3424 		brackt = 0;
3425 		stage = 1;
3426 		finit = *f;
3427 		ginit = *g;
3428 		gtest = ftol * ginit;
3429 		width = stpmax - stpmin;
3430 		width1 = width / .5;
3431 		/*      The variables stx, fx, gx contain the values of the step, */
3432 		/*      function, and derivative at the best step. */
3433 		/*      The variables sty, fy, gy contain the value of the step, */
3434 		/*      function, and derivative at sty. */
3435 		/*      The variables stp, f, g contain the values of the step, */
3436 		/*      function, and derivative at stp. */
3437 		stx = 0.;    fx = finit;    gx = ginit;
3438 		sty = 0.;    fy = finit;    gy = ginit;
3439 		stmin = 0.;
3440 		stmax = *stp + *stp * 4.;
3441 		strcpy(task, "FG");
3442 		goto L1000;
3443 	} else {
3444 		/*      Restore local variables. */
3445 		if (isave[1] == 1) {
3446 			brackt = 1;
3447 		} else {
3448 			brackt = 0;
3449 		}
3450 		stage = isave[2];
3451 		ginit = dsave[1];
3452 		gtest = dsave[2];
3453 		gx = dsave[3];
3454 		gy = dsave[4];
3455 		finit = dsave[5];
3456 		fx = dsave[6];
3457 		fy = dsave[7];
3458 		stx = dsave[8];
3459 		sty = dsave[9];
3460 		stmin = dsave[10];
3461 		stmax = dsave[11];
3462 		width = dsave[12];
3463 		width1 = dsave[13];
3464 	}
3465 	/*      If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the */
3466 	/*      algorithm enters the second stage. */
3467 	ftest = finit + *stp * gtest;
3468 	if (stage == 1 && *f <= ftest && *g >= 0.) {
3469 		stage = 2;
3470 	}
3471 	/*    Test for warnings. */
3472 	if (brackt && (*stp <= stmin || *stp >= stmax))
3473 		strcpy(task, "WARNING: ROUNDING ERRORS PREVENT PROGRESS");
3474 	if (brackt && stmax - stmin <= xtol * stmax)
3475 		strcpy(task, "WARNING: XTOL TEST SATISFIED");
3476 	if (*stp == stpmax && *f <= ftest && *g <= gtest)
3477 		strcpy(task, "WARNING: STP = STPMAX");
3478 	if (*stp == stpmin && (*f > ftest || *g >= gtest))
3479 		strcpy(task, "WARNING: STP = STPMIN");
3480 	/*    Test for convergence. */
3481 	if (*f <= ftest && fabs(*g) <= gtol * (-ginit))
3482 		strcpy(task, "CONVERGENCE");
3483 	/*    Test for termination. */
3484 	if (strncmp(task, "WARN", 4) == 0 || strncmp(task, "CONV", 4) == 0)
3485 		goto L1000;
3486 
3487 	/*     A modified function is used to predict the step during the */
3488 	/*     first stage if a lower function value has been obtained but */
3489 	/*     the decrease is not sufficient. */
3490 	if (stage == 1 && *f <= fx && *f > ftest) {
3491 		/*      Define the modified function and derivative values. */
3492 		fm = *f - *stp * gtest;
3493 		fxm = fx - stx * gtest;
3494 		fym = fy - sty * gtest;
3495 		gm = *g - gtest;
3496 		gxm = gx - gtest;
3497 		gym = gy - gtest;
3498 		/*      Call dcstep to update stx, sty, and to compute the new step. */
3499 		dcstep(&stx, &fxm, &gxm, &sty, &fym, &gym, stp, &fm, &gm, &brackt, &
3500 				stmin, &stmax);
3501 		/*      Reset the function and derivative values for f. */
3502 		fx = fxm + stx * gtest;
3503 		fy = fym + sty * gtest;
3504 		gx = gxm + gtest;
3505 		gy = gym + gtest;
3506 	} else {
3507 		/*     Call dcstep to update stx, sty, and to compute the new step. */
3508 		dcstep(&stx, &fx, &gx, &sty, &fy, &gy, stp, f, g, &brackt, &stmin, &
3509 				stmax);
3510 	}
3511 	/*     Decide if a bisection step is needed. */
3512 	if (brackt) {
3513 		if (fabs(sty - stx) >= width1 * .66) {
3514 			*stp = stx + (sty - stx) * .5;
3515 		}
3516 		width1 = width;
3517 		width = fabs(sty - stx);
3518 	}
3519 	/*     Set the minimum and maximum steps allowed for stp. */
3520 	if (brackt) {
3521 		stmin = min(stx,sty);
3522 		stmax = max(stx,sty);
3523 	} else {
3524 		stmin = *stp + (*stp - stx) * 1.1;
3525 		stmax = *stp + (*stp - stx) * 4.;
3526 	}
3527 	/*     Force the step to be within the bounds stpmax and stpmin. */
3528 	if(*stp < stpmin) *stp = stpmin;
3529 	if(*stp > stpmax) *stp = stpmax;
3530 
3531 	/*     If further progress is not possible, let stp be the best */
3532 	/*     point obtained during the search. */
3533 	if ((brackt && (*stp <= stmin || *stp >= stmax)) ||
3534 			(brackt && (stmax - stmin <= xtol * stmax))) {
3535 		*stp = stx;
3536 	}
3537 	/*     Obtain another function and derivative. */
3538 	strcpy(task, "FG");
3539 	L1000:
3540 	/*     Save local variables. */
3541 	if (brackt) {
3542 		isave[1] = 1;
3543 	} else {
3544 		isave[1] = 0;
3545 	}
3546 	isave[2] = stage;
3547 	dsave[1] = ginit;
3548 	dsave[2] = gtest;
3549 	dsave[3] = gx;
3550 	dsave[4] = gy;
3551 	dsave[5] = finit;
3552 	dsave[6] = fx;
3553 	dsave[7] = fy;
3554 	dsave[8] = stx;
3555 	dsave[9] = sty;
3556 	dsave[10] = stmin;
3557 	dsave[11] = stmax;
3558 	dsave[12] = width;
3559 	dsave[13] = width1;
3560 	return;
3561 } /* dcsrch */
3562 /* ====================== The end of dcsrch ============================== */
3563 
3564 
dcstep(double * stx,double * fx,double * dx,double * sty,double * fy,double * dy,double * stp,double * fp,double * dp,int * brackt,double * stpmin,double * stpmax)3565 void dcstep(double *stx, double *fx, double *dx,
3566 		double *sty, double *fy, double *dy, double *stp,
3567 		double *fp, double *dp, int *brackt, double *stpmin,
3568 		double *stpmax)
3569 {
3570 	/*    **********
3571 
3572 	 Subroutine dcstep
3573 
3574 	 This subroutine computes a safeguarded step for a search
3575 	 procedure and updates an interval that contains a step that
3576 	 satisfies a sufficient decrease and a curvature condition.
3577 
3578 	 The parameter stx contains the step with the least function
3579 	 value. If brackt is set to .true. then a minimizer has
3580 	 been bracketed in an interval with endpoints stx and sty.
3581 	 The parameter stp contains the current step.
3582 	 The subroutine assumes that if brackt is set to .true. then
3583 
3584 	 min(stx,sty) < stp < max(stx,sty),
3585 
3586 	 and that the derivative at stx is negative in the direction
3587 	 of the step.
3588 
3589 	 The subroutine statement is
3590 
3591 	 subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
3592 	 stpmin,stpmax)
3593 
3594 	 where
3595 
3596 	 stx is a double precision variable.
3597 	 On entry stx is the best step obtained so far and is an
3598 	 endpoint of the interval that contains the minimizer.
3599 	 On exit stx is the updated best step.
3600 
3601 	 fx is a double precision variable.
3602 	 On entry fx is the function at stx.
3603 	 On exit fx is the function at stx.
3604 
3605 	 dx is a double precision variable.
3606 	 On entry dx is the derivative of the function at
3607 	 stx. The derivative must be negative in the direction of
3608 	 the step, that is, dx and stp - stx must have opposite
3609 	 signs.
3610 	 On exit dx is the derivative of the function at stx.
3611 
3612 	 sty is a double precision variable.
3613 	 On entry sty is the second endpoint of the interval that
3614 	 contains the minimizer.
3615 	 On exit sty is the updated endpoint of the interval that
3616 	 contains the minimizer.
3617 
3618 	 fy is a double precision variable.
3619 	 On entry fy is the function at sty.
3620 	 On exit fy is the function at sty.
3621 
3622 	 dy is a double precision variable.
3623 	 On entry dy is the derivative of the function at sty.
3624 	 On exit dy is the derivative of the function at the exit sty.
3625 
3626 	 stp is a double precision variable.
3627 	 On entry stp is the current step. If brackt is set to .true.
3628 	 then on input stp must be between stx and sty.
3629 	 On exit stp is a new trial step.
3630 
3631 	 fp is a double precision variable.
3632 	 On entry fp is the function at stp
3633 	 On exit fp is unchanged.
3634 
3635 	 dp is a double precision variable.
3636 	 On entry dp is the the derivative of the function at stp.
3637 	 On exit dp is unchanged.
3638 
3639 	 brackt is an logical variable.
3640 	 On entry brackt specifies if a minimizer has been bracketed.
3641 	 Initially brackt must be set to .false.
3642 	 On exit brackt specifies if a minimizer has been bracketed.
3643 	 When a minimizer is bracketed brackt is set to .true.
3644 
3645 	 stpmin is a double precision variable.
3646 	 On entry stpmin is a lower bound for the step.
3647 	 On exit stpmin is unchanged.
3648 
3649 	 stpmax is a double precision variable.
3650 	 On entry stpmax is an upper bound for the step.
3651 	 On exit stpmax is unchanged.
3652 
3653 	 MINPACK-1 Project. June 1983
3654 	 Argonne National Laboratory.
3655 	 Jorge J. More' and David J. Thuente.
3656 
3657 	 MINPACK-2 Project. October 1993.
3658 	 Argonne National Laboratory and University of Minnesota.
3659 	 Brett M. Averick and Jorge J. More'.
3660 
3661 	 **********
3662 	 */
3663 
3664 	/* System generated locals */
3665 	double d__1, d__2;
3666 
3667 	/* Local variables */
3668 	double sgnd, stpc, stpf, stpq, p, q, gamm, r__, s, theta;
3669 
3670 	sgnd = *dp * (*dx / fabs(*dx));
3671 	/*     First case: A higher function value. The minimum is bracketed. */
3672 	/*     If the cubic step is closer to stx than the quadratic step, the */
3673 	/*     cubic step is taken, otherwise the average of the cubic and */
3674 	/*     quadratic steps is taken. */
3675 	if (*fp > *fx) {
3676 		theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
3677 		/* Computing MAX */
3678 		d__1 = fabs(theta), d__2 = fabs(*dx),
3679 				d__1 = max(d__1,d__2), d__2 = fabs(*dp);
3680 		s = max(d__1,d__2);
3681 		/* Computing 2nd power */
3682 		d__1 = theta / s;
3683 		gamm = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
3684 		if (*stp < *stx) {
3685 			gamm = -gamm;
3686 		}
3687 		p = gamm - *dx + theta;
3688 		q = gamm - *dx + gamm + *dp;
3689 		r__ = p / q;
3690 		stpc = *stx + r__ * (*stp - *stx);
3691 		stpq = *stx + *dx / ((*fx - *fp) / (*stp - *stx) + *dx) / 2. * (*stp
3692 				- *stx);
3693 		if (fabs(stpc - *stx) < fabs(stpq - *stx)) {
3694 			stpf = stpc;
3695 		} else {
3696 			stpf = stpc + (stpq - stpc) / 2.;
3697 		}
3698 		*brackt = 1;
3699 		/*     Second case: A lower function value and derivatives of opposite */
3700 		/*     sign. The minimum is bracketed. If the cubic step is farther from */
3701 		/*     stp than the secant step, the cubic step is taken, otherwise the */
3702 		/*     secant step is taken. */
3703 	} else if (sgnd < 0.) {
3704 		theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
3705 		/* Computing MAX */
3706 		d__1 = fabs(theta), d__2 = fabs(*dx),
3707 				d__1 = max(d__1,d__2), d__2 = fabs(*dp);
3708 		s = max(d__1,d__2);
3709 		/* Computing 2nd power */
3710 		d__1 = theta / s;
3711 		gamm = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
3712 		if (*stp > *stx) {
3713 			gamm = -gamm;
3714 		}
3715 		p = gamm - *dp + theta;
3716 		q = gamm - *dp + gamm + *dx;
3717 		r__ = p / q;
3718 		stpc = *stp + r__ * (*stx - *stp);
3719 		stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
3720 		if (fabs(stpc - *stp) > fabs(stpq - *stp)) {
3721 			stpf = stpc;
3722 		} else {
3723 			stpf = stpq;
3724 		}
3725 		*brackt = 1;
3726 		/*     Third case: A lower function value, derivatives of the same sign, */
3727 		/*     and the magnitude of the derivative decreases. */
3728 	} else if (fabs(*dp) < fabs(*dx)) {
3729 		/*      The cubic step is computed only if the cubic tends to infinity */
3730 		/*      in the direction of the step or if the minimum of the cubic */
3731 		/*      is beyond stp. Otherwise the cubic step is defined to be the */
3732 		/*      secant step. */
3733 		theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
3734 		/* Computing MAX */
3735 		d__1 = fabs(theta), d__2 = fabs(*dx),
3736 				d__1 = max(d__1,d__2), d__2 = fabs(*dp);
3737 		s = max(d__1,d__2);
3738 		/*      The case gamm = 0 only arises if the cubic does not tend */
3739 		/*      to infinity in the direction of the step. */
3740 		/* Computing MAX */
3741 		/* Computing 2nd power */
3742 		d__1 = theta / s;
3743 		d__1 = d__1 * d__1 - *dx / s * (*dp / s);
3744 		gamm = d__1 < 0 ? 0. : s * sqrt(d__1);
3745 		if (*stp > *stx) {
3746 			gamm = -gamm;
3747 		}
3748 		p = gamm - *dp + theta;
3749 		q = gamm + (*dx - *dp) + gamm;
3750 		r__ = p / q;
3751 		if (r__ < 0. && gamm != 0.) {
3752 			stpc = *stp + r__ * (*stx - *stp);
3753 		} else if (*stp > *stx) {
3754 			stpc = *stpmax;
3755 		} else {
3756 			stpc = *stpmin;
3757 		}
3758 		stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
3759 		if (*brackt) {
3760 			/*         A minimizer has been bracketed. If the cubic step is */
3761 			/*         closer to stp than the secant step, the cubic step is */
3762 			/*         taken, otherwise the secant step is taken. */
3763 			if (fabs(stpc - *stp) < fabs(stpq - *stp)) {
3764 				stpf = stpc;
3765 			} else {
3766 				stpf = stpq;
3767 			}
3768 			d__1 = *stp + (*sty - *stp) * .66;
3769 			if (*stp > *stx) {
3770 				stpf = min(d__1,stpf);
3771 			} else {
3772 				stpf = max(d__1,stpf);
3773 			}
3774 		} else {
3775 			/*         A minimizer has not been bracketed. If the cubic step is */
3776 			/*         farther from stp than the secant step, the cubic step is */
3777 			/*         taken, otherwise the secant step is taken. */
3778 			if (fabs(stpc - *stp) > fabs(stpq - *stp)) {
3779 				stpf = stpc;
3780 			} else {
3781 				stpf = stpq;
3782 			}
3783 			stpf = min(*stpmax,stpf);
3784 			stpf = max(*stpmin,stpf);
3785 		}
3786 		/*     Fourth case: A lower function value, derivatives of the */
3787 		/*     same sign, and the magnitude of the derivative does not */
3788 		/*     decrease. If the minimum is not bracketed, the step is either */
3789 		/*     stpmin or stpmax, otherwise the cubic step is taken. */
3790 	} else {
3791 		if (*brackt) {
3792 			theta = (*fp - *fy) * 3. / (*sty - *stp) + *dy + *dp;
3793 			/* Computing MAX */
3794 			d__1 = fabs(theta), d__2 = fabs(*dy), d__1 = max(d__1,d__2), d__2 =
3795 					fabs(*dp);
3796 			s = max(d__1,d__2);
3797 			/* Computing 2nd power */
3798 			d__1 = theta / s;
3799 			gamm = s * sqrt(d__1 * d__1 - *dy / s * (*dp / s));
3800 			if (*stp > *sty) {
3801 				gamm = -gamm;
3802 			}
3803 			p = gamm - *dp + theta;
3804 			q = gamm - *dp + gamm + *dy;
3805 			r__ = p / q;
3806 			stpc = *stp + r__ * (*sty - *stp);
3807 			stpf = stpc;
3808 		} else if (*stp > *stx) {
3809 			stpf = *stpmax;
3810 		} else {
3811 			stpf = *stpmin;
3812 		}
3813 	}
3814 	/*     Update the interval which contains a minimizer. */
3815 	if (*fp > *fx) {
3816 		*sty = *stp;
3817 		*fy = *fp;
3818 		*dy = *dp;
3819 	} else {
3820 		if (sgnd < 0.) {
3821 			*sty = *stx;
3822 			*fy = *fx;
3823 			*dy = *dx;
3824 		}
3825 		*stx = *stp;
3826 		*fx = *fp;
3827 		*dx = *dp;
3828 	}
3829 	/*     Compute the new step. */
3830 	*stp = stpf;
3831 	return;
3832 } /* dcstep */
3833 /* ====================== The end of dcstep ============================== */
3834 
3835 
prn3lb(int n,double * x,double * f,char * task,int iprint,int info,int iter,int nfgv,int nintol,int nskip,int nact,double sbgnrm,int nint,char * word,int iback,double stp,double xstep,int k)3836 void prn3lb(int n, double *x, double *f, char *task, int iprint,
3837 		int info, int iter, int nfgv, int nintol, int nskip,
3838 		int nact, double sbgnrm, int nint,
3839 		char *word, int iback, double stp, double xstep,
3840 		int k)
3841 {
3842 	if(strncmp(task, "CONV", 4) == 0) {
3843 		if (iprint >= 0) {
3844 			cout << endl;
3845 			cout << "iterations " << iter << endl;
3846 			cout << "function evaluations " << nfgv << endl;
3847 			cout << "segments explored during Cauchy searches " << nintol << endl;
3848 			cout << "BFGS updates skipped " << nskip << endl;
3849 			cout << "active bounds at final generalized Cauchy point " << nact << endl;
3850 			cout << "norm of the final projected gradient " << sbgnrm << endl;
3851 			cout << "inal function value " << *f << endl;
3852 			cout << endl;
3853 		}
3854 		if (iprint >= 100) pvector((char*)"X =", x, n);
3855 		if (iprint >= 1)
3856 			cout << "F = " << *f << endl;
3857 	}
3858 	if (iprint >= 0) {
3859 		switch(info) {
3860 		case -1:
3861 			cout << "Matrix in 1st Cholesky factorization in formk is not Pos. Def.";
3862 			break;
3863 		case -2:
3864 			cout << "Matrix in 2st Cholesky factorization in formk is not Pos. Def.";
3865 			break;
3866 		case -3:
3867 			cout << "Matrix in the Cholesky factorization in formt is not Pos. Def.";
3868 			break;
3869 		case -4:
3870 			cout << "Derivative >= 0, backtracking line search impossible.";
3871 			break;
3872 		case -5:
3873 			cout << "l(" << k << ") > u(" << k << ").  No feasible solution";
3874 			break;
3875 		case -6:
3876 			cout << "Input nbd(" << k << ") is invalid";
3877 			break;
3878 		case -7:
3879 			cout << "Warning:  more than 10 function and gradient evaluations" << endl;
3880 			cout << "   in the last line search" << endl;
3881 			break;
3882 		case -8:
3883 			cout << "The triangular system is singular." << endl;
3884 			break;
3885 		case -9:
3886 			cout << "Line search cannot locate an adequate point after 20 function" << endl;
3887 			cout << "and gradient evaluations" << endl;
3888 			break;
3889 		default:
3890 			break;
3891 		}
3892 	}
3893 }
3894 
prn1lb(int n,int m,double * l,double * u,double * x,int iprint,double epsmch)3895 void prn1lb(int n, int m, double *l, double *u, double *x,
3896 		int iprint, double epsmch)
3897 {
3898 	if (iprint >=  0) {
3899 		cout << "N = " << n << ", M = " << m << " machine precision = " << epsmch << endl;
3900 		if (iprint >= 100){
3901 			pvector((char*)"L =", l, n);
3902 			pvector((char*)"X0 =",x, n);
3903 			pvector((char*)"U =", u, n);
3904 		}
3905 	}
3906 }
3907 
projgr(int n,double * l,double * u,int * nbd,double * x,double * g,double * sbgnrm)3908 void projgr(int n, double *l, double *u,
3909 		int *nbd, double *x, double *g, double *sbgnrm)
3910 {
3911 	/*    ************
3912 
3913 	 Subroutine projgr
3914 
3915 	 This subroutine computes the infinity norm of the projected gradient.
3916 
3917 
3918 	 *     *  *
3919 
3920 	 NEOS, November 1994. (Latest revision April 1997.)
3921 	 Optimization Technology Center.
3922 	 Argonne National Laboratory and Northwestern University.
3923 	 Written by
3924 	 Ciyou Zhu
3925 	 in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
3926 
3927 	 ************
3928 	 */
3929 	int i;
3930 	double gi, d__1;
3931 
3932 	*sbgnrm = 0.;
3933 	for (i = 0; i < n; ++i) {
3934 		gi = g[i];
3935 		if (nbd[i] != 0) {
3936 			if (gi < 0.) {
3937 				if (nbd[i] >= 2) {
3938 					if(gi < (d__1 = x[i] - u[i]))
3939 						gi = d__1;
3940 				}
3941 			} else {
3942 				if (nbd[i] <= 2) {
3943 					if(gi > (d__1 = x[i] - l[i]))
3944 						gi = d__1;
3945 				}
3946 			}
3947 		}
3948 		if(*sbgnrm < (d__1 = fabs(gi))) *sbgnrm = d__1;
3949 	}
3950 	return;
3951 } /* projgr */
3952 
prn2lb(int n,double * x,double * f,double * g,int iprint,int iter,int nfgv,int nact,double sbgnrm,int nint,char * word,int iword,int iback,double stp,double xstep)3953 void prn2lb(int n, double *x, double *f, double *g, int iprint,
3954 		int iter, int nfgv, int nact, double sbgnrm,
3955 		int nint, char *word, int iword, int iback,
3956 		double stp, double xstep)
3957 {
3958 	if (iprint >=  99) {
3959 		cout << "LINE SEARCH " << iback << " times; norm of step = " << xstep << "\n";
3960 		if (iprint > 100) {
3961 			pvector((char*)"X =", x, n);
3962 			pvector((char*)"G =", g, n);
3963 		}
3964 	} else if (iprint > 0 && iter%iprint == 0) {
3965 		cout << "At iterate " << iter << "  f = " << *f << "  |proj g|=  " << sbgnrm << "\n";
3966 	}
3967 }
3968 
pvector(char * title,double * x,int n)3969 void pvector(char *title, double *x, int n)
3970 {
3971 	int i;
3972 	cout << title;
3973 	for (i = 0; i < n; i++) cout << x[i] << " ";
3974 	cout << endl;
3975 }
3976 
3977 
dcopy(int * n,double * dx,int * incx,double * dy,int * incy)3978 int dcopy(int *n, double *dx, int *incx,
3979 		double *dy, int *incy)
3980 {
3981 
3982 
3983 	/* System generated locals */
3984 	// int i__1;
3985 
3986 	/* Local variables */
3987 	int i, m, ix, iy, mp1;
3988 
3989 
3990 	/*     copies a vector, x, to a vector, y.
3991 	 uses unrolled loops for increments equal to one.
3992 	 jack dongarra, linpack, 3/11/78.
3993 	 modified 12/3/93, array(1) declarations changed to array(*)
3994 
3995 
3996 
3997 	 Parameter adjustments
3998 	 Function Body */
3999 
4000 
4001 	if (*n <= 0) {
4002 		return 0;
4003 	}
4004 	if (*incx == 1 && *incy == 1) {
4005 		goto L20;
4006 	}
4007 
4008 	/*        code for unequal increments or equal increments
4009 	 not equal to 1 */
4010 
4011 	ix = 1;
4012 	iy = 1;
4013 	if (*incx < 0) {
4014 		ix = (-(*n) + 1) * *incx + 1;
4015 	}
4016 	if (*incy < 0) {
4017 		iy = (-(*n) + 1) * *incy + 1;
4018 	}
4019 	// i__1 = *n;
4020 	for (i = 1; i <= *n; ++i) {
4021 		DY(iy) = DX(ix);
4022 		ix += *incx;
4023 		iy += *incy;
4024 		/* L10: */
4025 	}
4026 	return 0;
4027 
4028 	/*        code for both increments equal to 1
4029 
4030 
4031 	 clean-up loop */
4032 
4033 	L20:
4034 	m = *n % 7;
4035 	if (m == 0) {
4036 		goto L40;
4037 	}
4038 	// i__1 = m;
4039 	for (i = 1; i <= m; ++i) {
4040 		DY(i) = DX(i);
4041 		/* L30: */
4042 	}
4043 	if (*n < 7) {
4044 		return 0;
4045 	}
4046 	L40:
4047 	mp1 = m + 1;
4048 	// i__1 = *n;
4049 	for (i = mp1; i <= *n; i += 7) {
4050 		DY(i) = DX(i);
4051 		DY(i + 1) = DX(i + 1);
4052 		DY(i + 2) = DX(i + 2);
4053 		DY(i + 3) = DX(i + 3);
4054 		DY(i + 4) = DX(i + 4);
4055 		DY(i + 5) = DX(i + 5);
4056 		DY(i + 6) = DX(i + 6);
4057 		/* L50: */
4058 	}
4059 	return 0;
4060 } /* dcopy */
4061 
timer(double * ttime)4062 void timer(double * ttime)
4063 {
4064 	*ttime = 0.0;
4065 }
4066 
dscal(int * n,double * da,double * dx,int * incx)4067 int dscal(int *n, double *da, double *dx,
4068 		int *incx)
4069 {
4070 
4071 
4072 	/* System generated locals */
4073 	// int i__1, i__2;
4074 
4075 	/* Local variables */
4076 	int i, m, nincx, mp1;
4077 
4078 
4079 	/*     scales a vector by a constant.
4080 	 uses unrolled loops for increment equal to one.
4081 	 jack dongarra, linpack, 3/11/78.
4082 	 modified 3/93 to return if incx .le. 0.
4083 	 modified 12/3/93, array(1) declarations changed to array(*)
4084 
4085 
4086 
4087 	 Parameter adjustments
4088 	 Function Body */
4089 
4090 
4091 	if (*n <= 0 || *incx <= 0) {
4092 		return 0;
4093 	}
4094 	if (*incx == 1) {
4095 		goto L20;
4096 	}
4097 
4098 	/*        code for increment not equal to 1 */
4099 
4100 	nincx = *n * *incx;
4101 	// i__1 = nincx;
4102 	// i__2 = *incx;
4103 	for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
4104 		DX(i) = *da * DX(i);
4105 		/* L10: */
4106 	}
4107 	return 0;
4108 
4109 	/*        code for increment equal to 1
4110 
4111 
4112 	 clean-up loop */
4113 
4114 	L20:
4115 	m = *n % 5;
4116 	if (m == 0) {
4117 		goto L40;
4118 	}
4119 	// i__2 = m;
4120 	for (i = 1; i <= m; ++i) {
4121 		DX(i) = *da * DX(i);
4122 		/* L30: */
4123 	}
4124 	if (*n < 5) {
4125 		return 0;
4126 	}
4127 	L40:
4128 	mp1 = m + 1;
4129 	// i__2 = *n;
4130 	for (i = mp1; i <= *n; i += 5) {
4131 		DX(i) = *da * DX(i);
4132 		DX(i + 1) = *da * DX(i + 1);
4133 		DX(i + 2) = *da * DX(i + 2);
4134 		DX(i + 3) = *da * DX(i + 3);
4135 		DX(i + 4) = *da * DX(i + 4);
4136 		/* L50: */
4137 	}
4138 	return 0;
4139 } /* dscal */
4140 
ddot(int * n,double * dx,int * incx,double * dy,int * incy)4141 double ddot(int *n, double *dx, int *incx, double *dy,
4142 		int *incy)
4143 {
4144 
4145 
4146 	/* System generated locals */
4147 	// int i__1;
4148 	double ret_val;
4149 
4150 	/* Local variables */
4151 	int i, m;
4152 	double dtemp;
4153 	int ix, iy, mp1;
4154 
4155 
4156 	/*     forms the dot product of two vectors.
4157 	 uses unrolled loops for increments equal to one.
4158 	 jack dongarra, linpack, 3/11/78.
4159 	 modified 12/3/93, array(1) declarations changed to array(*)
4160 
4161 
4162 
4163 	 Parameter adjustments
4164 	 Function Body */
4165 
4166 
4167 	ret_val = 0.;
4168 	dtemp = 0.;
4169 	if (*n <= 0) {
4170 		return ret_val;
4171 	}
4172 	if (*incx == 1 && *incy == 1) {
4173 		goto L20;
4174 	}
4175 
4176 	/*        code for unequal increments or equal increments
4177 	 not equal to 1 */
4178 
4179 	ix = 1;
4180 	iy = 1;
4181 	if (*incx < 0) {
4182 		ix = (-(*n) + 1) * *incx + 1;
4183 	}
4184 	if (*incy < 0) {
4185 		iy = (-(*n) + 1) * *incy + 1;
4186 	}
4187 	// i__1 = *n;
4188 	for (i = 1; i <= *n; ++i) {
4189 		dtemp += DX(ix) * DY(iy);
4190 		ix += *incx;
4191 		iy += *incy;
4192 		/* L10: */
4193 	}
4194 	ret_val = dtemp;
4195 	return ret_val;
4196 
4197 	/*        code for both increments equal to 1
4198 
4199 
4200 	 clean-up loop */
4201 
4202 	L20:
4203 	m = *n % 5;
4204 	if (m == 0) {
4205 		goto L40;
4206 	}
4207 	// i__1 = m;
4208 	for (i = 1; i <= m; ++i) {
4209 		dtemp += DX(i) * DY(i);
4210 		/* L30: */
4211 	}
4212 	if (*n < 5) {
4213 		goto L60;
4214 	}
4215 	L40:
4216 	mp1 = m + 1;
4217 	// i__1 = *n;
4218 	for (i = mp1; i <= *n; i += 5) {
4219 		dtemp = dtemp + DX(i) * DY(i) + DX(i + 1) * DY(i + 1) + DX(i + 2) *
4220 				DY(i + 2) + DX(i + 3) * DY(i + 3) + DX(i + 4) * DY(i + 4);
4221 		/* L50: */
4222 	}
4223 	L60:
4224 	ret_val = dtemp;
4225 	return ret_val;
4226 } /* ddot */
4227 
daxpy(int * n,double * da,double * dx,int * incx,double * dy,int * incy)4228 int daxpy(int *n, double *da, double *dx,
4229 		int *incx, double *dy, int *incy)
4230 {
4231 
4232 
4233 	/* System generated locals */
4234 	// int i__1;
4235 
4236 	/* Local variables */
4237 	int i, m, ix, iy, mp1;
4238 
4239 
4240 	/*     constant times a vector plus a vector.
4241 	 uses unrolled loops for increments equal to one.
4242 	 jack dongarra, linpack, 3/11/78.
4243 	 modified 12/3/93, array(1) declarations changed to array(*)
4244 
4245 
4246 
4247 	 Parameter adjustments
4248 	 Function Body */
4249 
4250 	if (*n <= 0) {
4251 		return 0;
4252 	}
4253 	if (*da == 0.) {
4254 		return 0;
4255 	}
4256 	if (*incx == 1 && *incy == 1) {
4257 		goto L20;
4258 	}
4259 
4260 	/*        code for unequal increments or equal increments
4261 	 not equal to 1 */
4262 
4263 	ix = 1;
4264 	iy = 1;
4265 	if (*incx < 0) {
4266 		ix = (-(*n) + 1) * *incx + 1;
4267 	}
4268 	if (*incy < 0) {
4269 		iy = (-(*n) + 1) * *incy + 1;
4270 	}
4271 	// i__1 = *n;
4272 	for (i = 1; i <= *n; ++i) {
4273 		DY(iy) += *da * DX(ix);
4274 		ix += *incx;
4275 		iy += *incy;
4276 		/* L10: */
4277 	}
4278 	return 0;
4279 
4280 	/*        code for both increments equal to 1
4281 
4282 
4283 	 clean-up loop */
4284 
4285 	L20:
4286 	m = *n % 4;
4287 	if (m == 0) {
4288 		goto L40;
4289 	}
4290 	// i__1 = m;
4291 	for (i = 1; i <= m; ++i) {
4292 		DY(i) += *da * DX(i);
4293 		/* L30: */
4294 	}
4295 	if (*n < 4) {
4296 		return 0;
4297 	}
4298 	L40:
4299 	mp1 = m + 1;
4300 	// i__1 = *n;
4301 	for (i = mp1; i <= *n; i += 4) {
4302 		DY(i) += *da * DX(i);
4303 		DY(i + 1) += *da * DX(i + 1);
4304 		DY(i + 2) += *da * DX(i + 2);
4305 		DY(i + 3) += *da * DX(i + 3);
4306 		/* L50: */
4307 	}
4308 	return 0;
4309 } /* daxpy */
4310 
dpofa(double * a,int * lda,int * n,int * info)4311 int dpofa(double *a, int *lda, int *n, int *info)
4312 {
4313 	/* System generated locals */
4314 	int a_dim1, a_offset, i__1, i__2, i__3;
4315 
4316 	/* Local variables */
4317 	int j, k;
4318 	double s, t;
4319 	int jm1;
4320 
4321 	/*<       integer lda,n,info >*/
4322 	/*<       double precision a(lda,1) >*/
4323 
4324 	/*     dpofa factors a double precision symmetric positive definite */
4325 	/*     matrix. */
4326 
4327 	/*     dpofa is usually called by dpoco, but it can be called */
4328 	/*     directly with a saving in time if  rcond  is not needed. */
4329 	/*     (time for dpoco) = (1 + 18/n)*(time for dpofa) . */
4330 
4331 	/*     on entry */
4332 
4333 	/*        a       double precision(lda, n) */
4334 	/*                the symmetric matrix to be factored.  only the */
4335 	/*                diagonal and upper triangle are used. */
4336 
4337 	/*        lda     integer */
4338 	/*                the leading dimension of the array  a . */
4339 
4340 	/*        n       integer */
4341 	/*                the order of the matrix  a . */
4342 
4343 	/*     on return */
4344 
4345 	/*        a       an upper triangular matrix  r  so that  a = trans(r)*r */
4346 	/*                where  trans(r)  is the transpose. */
4347 	/*                the strict lower triangle is unaltered. */
4348 	/*                if  info .ne. 0 , the factorization is not complete. */
4349 
4350 	/*        info    integer */
4351 	/*                = 0  for normal return. */
4352 	/*                = k  signals an error condition.  the leading minor */
4353 	/*                     of order  k  is not positive definite. */
4354 
4355 	/*     linpack.  this version dated 08/14/78 . */
4356 	/*     cleve moler, university of new mexico, argonne national lab. */
4357 
4358 	/*     subroutines and functions */
4359 
4360 	/*     blas ddot */
4361 	/*     fortran dsqrt */
4362 
4363 	/*     internal variables */
4364 
4365 	/*<       double precision ddot,t >*/
4366 	/*<       double precision s >*/
4367 	/*<       integer j,jm1,k >*/
4368 	/*     begin block with ...exits to 40 */
4369 
4370 
4371 	/*<          do 30 j = 1, n >*/
4372 	/* Parameter adjustments */
4373 	a_dim1 = *lda;
4374 	a_offset = 1 + a_dim1;
4375 	a -= a_offset;
4376 
4377 	/* Function Body */
4378 	i__1 = *n;
4379 	for (j = 1; j <= i__1; ++j) {
4380 		/*<             info = j >*/
4381 		*info = j;
4382 		/*<             s = 0.0d0 >*/
4383 		s = 0.;
4384 		/*<             jm1 = j - 1 >*/
4385 		jm1 = j - 1;
4386 		/*<             if (jm1 .lt. 1) go to 20 >*/
4387 		if (jm1 < 1) {
4388 			goto L20;
4389 		}
4390 		/*<             do 10 k = 1, jm1 >*/
4391 		i__2 = jm1;
4392 		for (k = 1; k <= i__2; ++k) {
4393 			/*<                t = a(k,j) - ddot(k-1,a(1,k),1,a(1,j),1) >*/
4394 			i__3 = k - 1;
4395 			t = a[k + j * a_dim1] - ddot(&i__3, &a[k * a_dim1 + 1], &c__1, &
4396 					a[j * a_dim1 + 1], &c__1);
4397 			/*<                t = t/a(k,k) >*/
4398 			t /= a[k + k * a_dim1];
4399 			/*<                a(k,j) = t >*/
4400 			a[k + j * a_dim1] = t;
4401 			/*<                s = s + t*t >*/
4402 			s += t * t;
4403 			/*<    10       continue >*/
4404 			/* L10: */
4405 		}
4406 		/*<    20       continue >*/
4407 		L20:
4408 		/*<             s = a(j,j) - s >*/
4409 		s = a[j + j * a_dim1] - s;
4410 		/*     ......exit */
4411 		/*<             if (s .le. 0.0d0) go to 40 >*/
4412 		if (s <= 0.) {
4413 			goto L40;
4414 		}
4415 		/*<             a(j,j) = dsqrt(s) >*/
4416 		a[j + j * a_dim1] = sqrt(s);
4417 		/*<    30    continue >*/
4418 		/* L30: */
4419 	}
4420 	/*<          info = 0 >*/
4421 	*info = 0;
4422 	/*<    40 continue >*/
4423 	L40:
4424 	/*<       return >*/
4425 	return 0;
4426 	/*<       end >*/
4427 } /* dpofa */
4428 
dtrsl(double * t,int * ldt,int * n,double * b,int * job,int * info)4429 int dtrsl(double *t, int *ldt, int *n,
4430 		double *b, int *job, int *info)
4431 {
4432 	/* System generated locals */
4433 	int t_dim1, t_offset, i__1, i__2;
4434 
4435 	/* Local variables */
4436 	int j, jj, case__;
4437 	double temp;
4438 
4439 
4440 	/*     dtrsl solves systems of the form */
4441 
4442 	/*                   t * x = b */
4443 	/*     or */
4444 	/*                   trans(t) * x = b */
4445 
4446 	/*     where t is a triangular matrix of order n. here trans(t) */
4447 	/*     denotes the transpose of the matrix t. */
4448 
4449 	/*     on entry */
4450 
4451 	/*         t         double precision(ldt,n) */
4452 	/*                   t contains the matrix of the system. the zero */
4453 	/*                   elements of the matrix are not referenced, and */
4454 	/*                   the corresponding elements of the array can be */
4455 	/*                   used to store other information. */
4456 
4457 	/*         ldt       integer */
4458 	/*                   ldt is the leading dimension of the array t. */
4459 
4460 	/*         n         integer */
4461 	/*                   n is the order of the system. */
4462 
4463 	/*         b         double precision(n). */
4464 	/*                   b contains the right hand side of the system. */
4465 
4466 	/*         job       integer */
4467 	/*                   job specifies what kind of system is to be solved. */
4468 	/*                   if job is */
4469 
4470 	/*                        00   solve t*x=b, t lower triangular, */
4471 	/*                        01   solve t*x=b, t upper triangular, */
4472 	/*                        10   solve trans(t)*x=b, t lower triangular, */
4473 	/*                        11   solve trans(t)*x=b, t upper triangular. */
4474 
4475 	/*     on return */
4476 
4477 	/*         b         b contains the solution, if info .eq. 0. */
4478 	/*                   otherwise b is unaltered. */
4479 
4480 	/*         info      integer */
4481 	/*                   info contains zero if the system is nonsingular. */
4482 	/*                   otherwise info contains the index of */
4483 	/*                   the first zero diagonal element of t. */
4484 
4485 	/*     linpack. this version dated 08/14/78 . */
4486 	/*     g. w. stewart, university of maryland, argonne national lab. */
4487 
4488 	/*     subroutines and functions */
4489 
4490 	/*     blas daxpy,ddot */
4491 	/*     fortran mod */
4492 
4493 	/*     internal variables */
4494 
4495 
4496 	/*     begin block permitting ...exits to 150 */
4497 
4498 	/*        check for zero diagonal elements. */
4499 
4500 	/* Parameter adjustments */
4501 	t_dim1 = *ldt;
4502 	t_offset = 1 + t_dim1;
4503 	t -= t_offset;
4504 	--b;
4505 
4506 	/* Function Body */
4507 	i__1 = *n;
4508 	for (*info = 1; *info <= i__1; ++(*info)) {
4509 		/*     ......exit */
4510 		if (t[*info + *info * t_dim1] == 0.) {
4511 			goto L150;
4512 		}
4513 		/* L10: */
4514 	}
4515 	*info = 0;
4516 
4517 	/*        determine the task and go to it. */
4518 
4519 	case__ = 1;
4520 	if (*job % 10 != 0) {
4521 		case__ = 2;
4522 	}
4523 	if (*job % 100 / 10 != 0) {
4524 		case__ += 2;
4525 	}
4526 	switch (case__) {
4527 	case 1:  goto L20;
4528 	case 2:  goto L50;
4529 	case 3:  goto L80;
4530 	case 4:  goto L110;
4531 	}
4532 
4533 	/*        solve t*x=b for t lower triangular */
4534 
4535 	L20:
4536 	b[1] /= t[t_dim1 + 1];
4537 	if (*n < 2) {
4538 		goto L40;
4539 	}
4540 	i__1 = *n;
4541 	for (j = 2; j <= i__1; ++j) {
4542 		temp = -b[j - 1];
4543 		i__2 = *n - j + 1;
4544 		daxpy(&i__2, &temp, &t[j + (j - 1) * t_dim1], &c__1, &b[j], &c__1);
4545 		b[j] /= t[j + j * t_dim1];
4546 		/* L30: */
4547 	}
4548 	L40:
4549 	goto L140;
4550 
4551 	/*        solve t*x=b for t upper triangular. */
4552 
4553 	L50:
4554 	b[*n] /= t[*n + *n * t_dim1];
4555 	if (*n < 2) {
4556 		goto L70;
4557 	}
4558 	i__1 = *n;
4559 	for (jj = 2; jj <= i__1; ++jj) {
4560 		j = *n - jj + 1;
4561 		temp = -b[j + 1];
4562 		daxpy(&j, &temp, &t[(j + 1) * t_dim1 + 1], &c__1, &b[1], &c__1);
4563 		b[j] /= t[j + j * t_dim1];
4564 		/* L60: */
4565 	}
4566 	L70:
4567 	goto L140;
4568 
4569 	/*        solve trans(t)*x=b for t lower triangular. */
4570 
4571 	L80:
4572 	b[*n] /= t[*n + *n * t_dim1];
4573 	if (*n < 2) {
4574 		goto L100;
4575 	}
4576 	i__1 = *n;
4577 	for (jj = 2; jj <= i__1; ++jj) {
4578 		j = *n - jj + 1;
4579 		i__2 = jj - 1;
4580 		b[j] -= ddot(&i__2, &t[j + 1 + j * t_dim1], &c__1, &b[j + 1], &c__1);
4581 		b[j] /= t[j + j * t_dim1];
4582 		/* L90: */
4583 	}
4584 	L100:
4585 	goto L140;
4586 
4587 	/*        solve trans(t)*x=b for t upper triangular. */
4588 
4589 	L110:
4590 	b[1] /= t[t_dim1 + 1];
4591 	if (*n < 2) {
4592 		goto L130;
4593 	}
4594 	i__1 = *n;
4595 	for (j = 2; j <= i__1; ++j) {
4596 		i__2 = j - 1;
4597 		b[j] -= ddot(&i__2, &t[j * t_dim1 + 1], &c__1, &b[1], &c__1);
4598 		b[j] /= t[j + j * t_dim1];
4599 		/* L120: */
4600 	}
4601 	L130:
4602 	L140:
4603 	L150:
4604 	return 0;
4605 } /* dtrsl */
4606