1 /*
2 * $Id: dgels.c,v 1.1 2005-09-18 22:04:43 dhmunro Exp $
3 * LAPACK routines to solve (in least squares sense) a matrix
4 * by QR or LQ decomposition.
5 */
6 /* Copyright (c) 2005, The Regents of the University of California.
7 * All rights reserved.
8 * This file is part of yorick (http://yorick.sourceforge.net).
9 * Read the accompanying LICENSE file for details.
10 */
11
12 #include "dg.h"
13
14
15 /*---blas routines---*/
16 /* dcopy dnrm2 dscal dger dgemv dgemm dtrmv dtrmm dtrsm */
17
18
19
20 /*---converted nutty string switches to single characters (lower case)---*/
21 #define lsame(x,y) ((x)==(y))
22
23
24 /*-----Fortran intrinsics converted-----*/
25 #define abs(x) ((x)>=0?(x):-(x))
26 extern double sqrt(double);
27 #define dble(x) ((double)x)
28 #define sign(x,y) ((((x)<0)!=((y)<0))?-(x):(x))
29 #define min(x,y) ((x)<(y)?(x):(y))
30 #define max(x,y) ((x)>(y)?(x):(y))
31 /*-----end of Fortran intrinsics-----*/
32
33
34
dgels(char trans,long m,long n,long nrhs,double a[],long lda,double b[],long ldb,double work[],long lwork,long * info)35 void dgels( char trans, long m, long n, long nrhs,
36 double a[], long lda, double b[], long ldb,
37 double work[], long lwork,long *info )
38 {
39 /**
40 * -- LAPACK driver routine (version 1.1) --
41 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
42 * Courant Institute, Argonne National Lab, and Rice University
43 * March 31, 1993
44 *
45 * .. Scalar Arguments ..*/
46 /** ..
47 * .. Array Arguments ..*/
48 #undef work_1
49 #define work_1(a1) work[a1-1]
50 #undef b_2
51 #define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
52 #undef a_2
53 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
54 /** ..
55 *
56 * Purpose
57 * =======
58 *
59 * DGELS solves overdetermined or underdetermined real linear systems
60 * involving an M-by-N matrix A, or its transpose, using a QR or LQ
61 * factorization of A. It is assumed that A has full rank.
62 *
63 * The following options are provided:
64 *
65 * 1. If TRANS = 'N' and m >= n: find the least squares solution of
66 * an overdetermined system, i.e., solve the least squares problem
67 * minimize || B - A*X ||.
68 *
69 * 2. If TRANS = 'N' and m < n: find the minimum norm solution of
70 * an underdetermined system A * X = B.
71 *
72 * 3. If TRANS = 'T' and m >= n: find the minimum norm solution of
73 * an undetermined system A**T * X = B.
74 *
75 * 4. If TRANS = 'T' and m < n: find the least squares solution of
76 * an overdetermined system, i.e., solve the least squares problem
77 * minimize || B - A**T * X ||.
78 *
79 * Several right hand side vectors b and solution vectors x can be
80 * handled in a single call; they are stored as the columns of the
81 * M-by-NRHS right hand side matrix B and the N-by-NRHS solution
82 * matrix X.
83 *
84 * Arguments
85 * =========
86 *
87 * TRANS (input) CHARACTER
88 * = 'N': the linear system involves A;
89 * = 'T': the linear system involves A**T.
90 *
91 * M (input) INTEGER
92 * The number of rows of the matrix A. M >= 0.
93 *
94 * N (input) INTEGER
95 * The number of columns of the matrix A. N >= 0.
96 *
97 * NRHS (input) INTEGER
98 * The number of right hand sides, i.e., the number of
99 * columns of the matrices B and X. NRHS >=0.
100 *
101 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
102 * On entry, the M-by-N matrix A.
103 * On exit,
104 * if M >= N, A is overwritten by details of its QR
105 * factorization as returned by DGEQRF;
106 * if M < N, A is overwritten by details of its LQ
107 * factorization as returned by DGELQF.
108 *
109 * LDA (input) INTEGER
110 * The leading dimension of the array A. LDA >= max(1,M).
111 *
112 * B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
113 * On entry, the matrix B of right hand side vectors, stored
114 * columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
115 * if TRANS = 'T'.
116 * On exit, B is overwritten by the solution vectors, stored
117 * columnwise: if TRANS = 'N' and m >= n, rows 1 to n of B
118 * contain the least squares solution vectors; the residual
119 * sum of squares for the solution in each column is given by
120 * the sum of squares of elements N+1 to M in that column;
121 * if TRANS = 'N' and m < n, rows 1 to N of B contain the
122 * minimum norm solution vectors;
123 * if TRANS = 'T' and m >= n, rows 1 to M of B contain the
124 * minimum norm solution vectors;
125 * if TRANS = 'T' and m < n, rows 1 to M of B contain the
126 * least squares solution vectors; the residual sum of squares
127 * for the solution in each column is given by the sum of
128 * squares of elements M+1 to N in that column.
129 *
130 * LDB (input) INTEGER
131 * The leading dimension of the array B. LDB >= MAX(1,M,N).
132 *
133 * WORK_1 (workspace) DOUBLE PRECISION array, dimension (LWORK)
134 * On exit, if INFO = 0, WORK_1(1) returns the optimal LWORK.
135 *
136 * LWORK (input) INTEGER
137 * The dimension of the array WORK.
138 * LWORK >= min(M,N) + MAX(1,M,N,NRHS).
139 * For optimal performance,
140 * LWORK >= min(M,N) + MAX(1,M,N,NRHS) * NB
141 * where NB is the optimum block size.
142 *
143 * INFO (output) INTEGER
144 * = 0: successful exit
145 * < 0: if INFO = -i, the i-th argument had an illegal value
146 *
147 * =====================================================================
148 *
149 * .. Parameters ..*/
150 #undef zero
151 #define zero 0.0e0
152 #undef one
153 #define one 1.0e0
154 /** ..
155 * .. Local Scalars ..*/
156 int tpsd=0;
157 long brow, i, iascl, ibscl, j, mn, nb, scllen, wsize=0;
158 double anrm, bignum, bnrm, smlnum;
159 /** ..
160 * .. Local Arrays ..*/
161 double rwork[1];
162 #undef rwork_1
163 #define rwork_1(a1) [a1-1]
164 /** ..
165 * .. Intrinsic Functions ..*/
166 /* intrinsic dble, max, min;*/
167 /** ..
168 * .. Executable Statements ..
169 *
170 * Test the input arguments.
171 **/
172 /*-----implicit-declarations-----*/
173 /*-----end-of-declarations-----*/
174 *info = 0;
175 mn = min( m, n );
176 if( !( lsame( trans, 'n' ) || lsame( trans, 't' ) ) ) {
177 *info = -1;
178 } else if( m<0 ) {
179 *info = -2;
180 } else if( n<0 ) {
181 *info = -3;
182 } else if( nrhs<0 ) {
183 *info = -4;
184 } else if( lda<max( 1, m ) ) {
185 *info = -6;
186 } else if( ldb<max( max(1, m), n ) ) {
187 *info = -8;
188 } else if( lwork<max( 1, mn+max( max(m, n), nrhs ) ) ) {
189 *info = -10;
190 }
191 /**
192 * Figure out optimal block size
193 **/
194 if( *info==0 || *info==-10 ) {
195
196 tpsd = 1;
197 if( lsame( trans, 'n' ) )
198 tpsd = 0;
199
200 if( m>=n ) {
201 nb = ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 );
202 if( tpsd ) {
203 nb = max( nb, ilaenv( 1, "dormqr", "ln", m, nrhs, n,
204 -1 ) );
205 } else {
206 nb = max( nb, ilaenv( 1, "dormqr", "lt", m, nrhs, n,
207 -1 ) );
208 }
209 } else {
210 nb = ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
211 if( tpsd ) {
212 nb = max( nb, ilaenv( 1, "dormlq", "lt", n, nrhs, m,
213 -1 ) );
214 } else {
215 nb = max( nb, ilaenv( 1, "dormlq", "ln", n, nrhs, m,
216 -1 ) );
217 }
218 }
219
220 wsize = mn + max( max(m, n), nrhs )*nb;
221 work_1( 1 ) = dble( wsize );
222
223 }
224
225 if( *info!=0 ) {
226 xerbla( "dgels ", -*info );
227 return;
228 }
229 /**
230 * Quick return if possible
231 **/
232 if( min( min(m, n), nrhs )==0 ) {
233 dlaset( 'f'/*ull*/, max( m, n ), nrhs, zero, zero, b, ldb );
234 return;
235 }
236 /**
237 * Get machine parameters
238 **/
239 smlnum = dlamch( 's' ) / dlamch( 'p' );
240 bignum = one / smlnum;
241 dlabad( &smlnum, &bignum );
242 /**
243 * Scale A, B if max entry outside range [SMLNUM,BIGNUM]
244 **/
245 anrm = dlange( 'm', m, n, a, lda, rwork );
246 iascl = 0;
247 if( anrm>zero && anrm<smlnum ) {
248 /**
249 * Scale matrix norm up to SMLNUM
250 **/
251 dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, info );
252 iascl = 1;
253 } else if( anrm>bignum ) {
254 /**
255 * Scale matrix norm down to BIGNUM
256 **/
257 dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, info );
258 iascl = 2;
259 } else if( anrm==zero ) {
260 /**
261 * Matrix all zero. Return zero solution.
262 **/
263 dlaset( 'f', max( m, n ), nrhs, zero, zero, b, ldb );
264 goto L_50;
265 }
266
267 brow = m;
268 if( tpsd )
269 brow = n;
270 bnrm = dlange( 'm', brow, nrhs, b, ldb, rwork );
271 ibscl = 0;
272 if( bnrm>zero && bnrm<smlnum ) {
273 /**
274 * Scale matrix norm up to SMLNUM
275 **/
276 dlascl( 'g', 0, 0, bnrm, smlnum, brow, nrhs, b, ldb,
277 info );
278 ibscl = 1;
279 } else if( bnrm>bignum ) {
280 /**
281 * Scale matrix norm down to BIGNUM
282 **/
283 dlascl( 'g', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,
284 info );
285 ibscl = 2;
286 }
287
288 if( m>=n ) {
289 /**
290 * compute QR factorization of A
291 **/
292 dgeqrf( m, n, a, lda, &work_1( 1 ), &work_1( mn+1 ), lwork-mn,
293 info );
294 /**
295 * workspace at least N, optimally N*NB
296 **/
297 if( !tpsd ) {
298 /**
299 * Least-Squares Problem min || A * X - B ||
300 *
301 * B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
302 **/
303 dormqr( 'l'/*eft*/, 't'/*ranspose*/, m, nrhs, n, a, lda,
304 &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
305 info );
306 /**
307 * workspace at least NRHS, optimally NRHS*NB
308 *
309 * B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
310 **/
311 cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans,
312 CblasNonUnit, n, nrhs, one, a, lda, b, ldb );
313
314 scllen = n;
315
316 } else {
317 /**
318 * Overdetermined system of equations A' * X = B
319 *
320 * B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
321 **/
322 cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans,
323 CblasNonUnit, n, nrhs, one, a, lda, b, ldb );
324 /**
325 * B(N+1:M,1:NRHS) = ZERO
326 **/
327 for (j=1 ; j<=nrhs ; j+=1) {
328 for (i=n + 1 ; i<=m ; i+=1) {
329 b_2( i, j ) = zero;
330 }
331 }
332 /**
333 * B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
334 **/
335 dormqr( 'l'/*eft*/, 'n'/*o transpose*/, m, nrhs, n, a, lda,
336 &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
337 info );
338 /**
339 * workspace at least NRHS, optimally NRHS*NB
340 **/
341 scllen = m;
342
343 }
344
345 } else {
346 /**
347 * Compute LQ factorization of A
348 **/
349 dgelqf( m, n, a, lda, &work_1( 1 ), &work_1( mn+1 ), lwork-mn,
350 info );
351 /**
352 * workspace at least M, optimally M*NB.
353 **/
354 if( !tpsd ) {
355 /**
356 * underdetermined system of equations A * X = B
357 *
358 * B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
359 **/
360 cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans,
361 CblasNonUnit, m, nrhs, one, a, lda, b, ldb );
362 /**
363 * B(M+1:N,1:NRHS) = 0
364 **/
365 for (j=1 ; j<=nrhs ; j+=1) {
366 for (i=m + 1 ; i<=n ; i+=1) {
367 b_2( i, j ) = zero;
368 }
369 }
370 /**
371 * B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
372 **/
373 dormlq( 'l'/*eft*/, 't'/*ranspose*/, n, nrhs, m, a, lda,
374 &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
375 info );
376 /**
377 * workspace at least NRHS, optimally NRHS*NB
378 **/
379 scllen = n;
380
381 } else {
382 /**
383 * overdetermined system min || A' * X - B ||
384 *
385 * B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
386 **/
387 dormlq( 'l'/*eft*/, 'n'/*o transpose*/, n, nrhs, m, a, lda,
388 &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
389 info );
390 /**
391 * workspace at least NRHS, optimally NRHS*NB
392 *
393 * B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
394 **/
395 cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans,
396 CblasNonUnit, m, nrhs, one, a, lda, b, ldb );
397
398 scllen = m;
399
400 }
401
402 }
403 /**
404 * Undo scaling
405 **/
406 if( iascl==1 ) {
407 dlascl( 'g', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,
408 info );
409 } else if( iascl==2 ) {
410 dlascl( 'g', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,
411 info );
412 }
413 if( ibscl==1 ) {
414 dlascl( 'g', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,
415 info );
416 } else if( ibscl==2 ) {
417 dlascl( 'g', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,
418 info );
419 }
420
421 L_50:
422 work_1( 1 ) = dble( wsize );
423
424 return;
425 /**
426 * End of DGELS
427 **/
428 }
429
430
431
dormlq(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long lwork,long * info)432 void dormlq( char side, char trans, long m, long n, long k,
433 double a[], long lda, double tau[], double c[], long ldc,
434 double work[], long lwork, long *info )
435 {
436 /**
437 * -- LAPACK routine (version 1.1) --
438 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
439 * Courant Institute, Argonne National Lab, and Rice University
440 * March 31, 1993
441 *
442 * .. Scalar Arguments ..*/
443 /** ..
444 * .. Array Arguments ..*/
445 #undef work_1
446 #define work_1(a1) work[a1-1]
447 #undef tau_1
448 #define tau_1(a1) tau[a1-1]
449 #undef c_2
450 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
451 #undef a_2
452 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
453 /** ..
454 *
455 * Purpose
456 * =======
457 *
458 * DORMLQ overwrites the general real M-by-N matrix C with
459 *
460 * SIDE = 'L' SIDE = 'R'
461 * TRANS = 'N': Q * C C * Q
462 * TRANS = 'T': Q**T * C C * Q**T
463 *
464 * where Q is a real orthogonal matrix defined as the product of k
465 * elementary reflectors
466 *
467 * Q = H(k) . . . H(2) H(1)
468 *
469 * as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
470 * if SIDE = 'R'.
471 *
472 * Arguments
473 * =========
474 *
475 * SIDE (input) CHARACTER*1
476 * = 'L': apply Q or Q**T from the Left;
477 * = 'R': apply Q or Q**T from the Right.
478 *
479 * TRANS (input) CHARACTER*1
480 * = 'N': No transpose, apply Q;
481 * = 'T': Transpose, apply Q**T.
482 *
483 * M (input) INTEGER
484 * The number of rows of the matrix C. M >= 0.
485 *
486 * N (input) INTEGER
487 * The number of columns of the matrix C. N >= 0.
488 *
489 * K (input) INTEGER
490 * The number of elementary reflectors whose product defines
491 * the matrix Q.
492 * If SIDE = 'L', M >= K >= 0;
493 * if SIDE = 'R', N >= K >= 0.
494 *
495 * A (input) DOUBLE PRECISION array, dimension
496 * (LDA,M) if SIDE = 'L',
497 * (LDA,N) if SIDE = 'R'
498 * The i-th row must contain the vector which defines the
499 * elementary reflector H(i), for i = 1,2,...,k, as returned by
500 * DGELQF in the first k rows of its array argument A.
501 * A is modified by the routine but restored on exit.
502 *
503 * LDA (input) INTEGER
504 * The leading dimension of the array A. LDA >= max(1,K).
505 *
506 * TAU (input) DOUBLE PRECISION array, dimension (K)
507 * TAU(i) must contain the scalar factor of the elementary
508 * reflector H(i), as returned by DGELQF.
509 *
510 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
511 * On entry, the M-by-N matrix C.
512 * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
513 *
514 * LDC (input) INTEGER
515 * The leading dimension of the array C. LDC >= max(1,M).
516 *
517 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
518 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
519 *
520 * LWORK (input) INTEGER
521 * The dimension of the array WORK.
522 * If SIDE = 'L', LWORK >= max(1,N);
523 * if SIDE = 'R', LWORK >= max(1,M).
524 * For optimum performance LWORK >= N*NB if SIDE = 'L', and
525 * LWORK >= M*NB if SIDE = 'R', where NB is the optimal
526 * blocksize.
527 *
528 * INFO (output) INTEGER
529 * = 0: successful exit
530 * < 0: if INFO = -i, the i-th argument had an illegal value
531 *
532 * =====================================================================
533 *
534 * .. Parameters ..*/
535 #undef nbmax
536 #define nbmax 64
537 #undef ldt
538 #define ldt (nbmax+1)
539 /** ..
540 * .. Local Scalars ..*/
541 int left, notran;
542 char transt, side_trans[3];
543 long i, i1, i2, i3, ib, ic=0, iinfo, iws, jc=0, ldwork,
544 mi=0, nb, nbmin, ni=0, nq, nw;
545 /** ..
546 * .. Local Arrays ..*/
547 double t[nbmax][ldt];
548 #undef t_2
549 #define t_2(a1,a2) t[a2-1][a1-1]
550 /** ..
551 * .. Intrinsic Functions ..*/
552 /* intrinsic max, min;*/
553 /** ..
554 * .. Executable Statements ..
555 *
556 * Test the input arguments
557 **/
558 /*-----implicit-declarations-----*/
559 /*-----end-of-declarations-----*/
560 *info = 0;
561 left = lsame( side, 'l' );
562 notran = lsame( trans, 'n' );
563 /**
564 * NQ is the order of Q and NW is the minimum dimension of WORK
565 **/
566 if( left ) {
567 nq = m;
568 nw = n;
569 } else {
570 nq = n;
571 nw = m;
572 }
573 if( !left && !lsame( side, 'r' ) ) {
574 *info = -1;
575 } else if( !notran && !lsame( trans, 't' ) ) {
576 *info = -2;
577 } else if( m<0 ) {
578 *info = -3;
579 } else if( n<0 ) {
580 *info = -4;
581 } else if( k<0 || k>nq ) {
582 *info = -5;
583 } else if( lda<max( 1, k ) ) {
584 *info = -7;
585 } else if( ldc<max( 1, m ) ) {
586 *info = -10;
587 } else if( lwork<max( 1, nw ) ) {
588 *info = -12;
589 }
590 if( *info!=0 ) {
591 xerbla( "dormlq", -*info );
592 return;
593 }
594 /**
595 * Quick return if possible
596 **/
597 if( m==0 || n==0 || k==0 ) {
598 work_1( 1 ) = 1;
599 return;
600 }
601 /**
602 * Determine the block size. NB may be at most NBMAX, where NBMAX
603 * is used to define the local array T.
604 **/
605 side_trans[0]= side;
606 side_trans[1]= trans;
607 side_trans[2]= '\0';
608 nb = min( nbmax, ilaenv( 1, "dormlq", side_trans, m, n, k,
609 -1 ) );
610 nbmin = 2;
611 ldwork = nw;
612 if( nb>1 && nb<k ) {
613 iws = nw*nb;
614 if( lwork<iws ) {
615 nb = lwork / ldwork;
616 nbmin = max( 2, ilaenv( 2, "dormlq", side_trans, m, n, k,
617 -1 ) );
618 }
619 } else {
620 iws = nw;
621 }
622
623 if( nb<nbmin || nb>=k ) {
624 /**
625 * Use unblocked code
626 **/
627 dorml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,
628 &iinfo );
629 } else {
630 /**
631 * Use blocked code
632 **/
633 if( ( left && notran ) ||
634 ( !left && !notran ) ) {
635 i1 = 1;
636 i2 = k;
637 i3 = nb;
638 } else {
639 i1 = ( ( k-1 ) / nb )*nb + 1;
640 i2 = 1;
641 i3 = -nb;
642 }
643
644 if( left ) {
645 ni = n;
646 jc = 1;
647 } else {
648 mi = m;
649 ic = 1;
650 }
651
652 if( notran ) {
653 transt = 't';
654 } else {
655 transt = 'n';
656 }
657
658 for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
659 ib = min( nb, k-i+1 );
660 /**
661 * Form the triangular factor of the block reflector
662 * H = H(i) H(i+1) . . . H(i+ib-1)
663 **/
664 dlarft( 'f'/*orward*/, 'r'/*owwise*/, nq-i+1, ib, &a_2( i, i ),
665 lda, &tau_1( i ), (double *)t, ldt );
666 if( left ) {
667 /**
668 * H or H' is applied to C(i:m,1:n)
669 **/
670 mi = m - i + 1;
671 ic = i;
672 } else {
673 /**
674 * H or H' is applied to C(1:m,i:n)
675 **/
676 ni = n - i + 1;
677 jc = i;
678 }
679 /**
680 * Apply H or H'
681 **/
682 dlarfb( side, transt, 'f'/*orward*/, 'r'/*owwise*/, mi, ni, ib,
683 &a_2( i, i ), lda, (double *)t, ldt, &c_2( ic, jc ), ldc, work,
684 ldwork );
685 }
686 }
687 work_1( 1 ) = iws;
688 return;
689 /**
690 * End of DORMLQ
691 **/
692 }
693
694
695
dorml2(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long * info)696 void dorml2( char side, char trans, long m, long n, long k,
697 double a[], long lda, double tau[], double c[], long ldc,
698 double work[], long *info )
699 {
700 /**
701 * -- LAPACK routine (version 1.1) --
702 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
703 * Courant Institute, Argonne National Lab, and Rice University
704 * February 29, 1992
705 *
706 * .. Scalar Arguments ..*/
707 /** ..
708 * .. Array Arguments ..*/
709 #undef work_1
710 #define work_1(a1) work[a1-1]
711 #undef tau_1
712 #define tau_1(a1) tau[a1-1]
713 #undef c_2
714 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
715 #undef a_2
716 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
717 /** ..
718 *
719 * Purpose
720 * =======
721 *
722 * DORML2 overwrites the general real m by n matrix C with
723 *
724 * Q * C if SIDE = 'L' and TRANS = 'N', or
725 *
726 * Q'* C if SIDE = 'L' and TRANS = 'T', or
727 *
728 * C * Q if SIDE = 'R' and TRANS = 'N', or
729 *
730 * C * Q' if SIDE = 'R' and TRANS = 'T',
731 *
732 * where Q is a real orthogonal matrix defined as the product of k
733 * elementary reflectors
734 *
735 * Q = H(k) . . . H(2) H(1)
736 *
737 * as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
738 * if SIDE = 'R'.
739 *
740 * Arguments
741 * =========
742 *
743 * SIDE (input) CHARACTER*1
744 * = 'L': apply Q or Q' from the Left
745 * = 'R': apply Q or Q' from the Right
746 *
747 * TRANS (input) CHARACTER*1
748 * = 'N': apply Q (No transpose)
749 * = 'T': apply Q' (Transpose)
750 *
751 * M (input) INTEGER
752 * The number of rows of the matrix C. M >= 0.
753 *
754 * N (input) INTEGER
755 * The number of columns of the matrix C. N >= 0.
756 *
757 * K (input) INTEGER
758 * The number of elementary reflectors whose product defines
759 * the matrix Q.
760 * If SIDE = 'L', M >= K >= 0;
761 * if SIDE = 'R', N >= K >= 0.
762 *
763 * A (input) DOUBLE PRECISION array, dimension
764 * (LDA,M) if SIDE = 'L',
765 * (LDA,N) if SIDE = 'R'
766 * The i-th row must contain the vector which defines the
767 * elementary reflector H(i), for i = 1,2,...,k, as returned by
768 * DGELQF in the first k rows of its array argument A.
769 * A is modified by the routine but restored on exit.
770 *
771 * LDA (input) INTEGER
772 * The leading dimension of the array A. LDA >= max(1,K).
773 *
774 * TAU (input) DOUBLE PRECISION array, dimension (K)
775 * TAU(i) must contain the scalar factor of the elementary
776 * reflector H(i), as returned by DGELQF.
777 *
778 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
779 * On entry, the m by n matrix C.
780 * On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
781 *
782 * LDC (input) INTEGER
783 * The leading dimension of the array C. LDC >= max(1,M).
784 *
785 * WORK (workspace) DOUBLE PRECISION array, dimension
786 * (N) if SIDE = 'L',
787 * (M) if SIDE = 'R'
788 *
789 * INFO (output) INTEGER
790 * = 0: successful exit
791 * < 0: if INFO = -i, the i-th argument had an illegal value
792 *
793 * =====================================================================
794 *
795 * .. Parameters ..*/
796 #undef one
797 #define one 1.0e+0
798 /** ..
799 * .. Local Scalars ..*/
800 int left, notran;
801 long i, i1, i2, i3, ic=0, jc=0, mi=0, ni=0, nq;
802 double aii;
803 /** ..
804 * .. Intrinsic Functions ..*/
805 /* intrinsic max;*/
806 /** ..
807 * .. Executable Statements ..
808 *
809 * Test the input arguments
810 **/
811 /*-----implicit-declarations-----*/
812 /*-----end-of-declarations-----*/
813 *info = 0;
814 left = lsame( side, 'l' );
815 notran = lsame( trans, 'n' );
816 /**
817 * NQ is the order of Q
818 **/
819 if( left ) {
820 nq = m;
821 } else {
822 nq = n;
823 }
824 if( !left && !lsame( side, 'r' ) ) {
825 *info = -1;
826 } else if( !notran && !lsame( trans, 't' ) ) {
827 *info = -2;
828 } else if( m<0 ) {
829 *info = -3;
830 } else if( n<0 ) {
831 *info = -4;
832 } else if( k<0 || k>nq ) {
833 *info = -5;
834 } else if( lda<max( 1, k ) ) {
835 *info = -7;
836 } else if( ldc<max( 1, m ) ) {
837 *info = -10;
838 }
839 if( *info!=0 ) {
840 xerbla( "dorml2", -*info );
841 return;
842 }
843 /**
844 * Quick return if possible
845 **/
846 if( m==0 || n==0 || k==0 )
847 return;
848
849 if( ( left && notran ) || ( !left && !notran ) )
850 {
851 i1 = 1;
852 i2 = k;
853 i3 = 1;
854 } else {
855 i1 = k;
856 i2 = 1;
857 i3 = -1;
858 }
859
860 if( left ) {
861 ni = n;
862 jc = 1;
863 } else {
864 mi = m;
865 ic = 1;
866 }
867
868 for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
869 if( left ) {
870 /**
871 * H(i) is applied to C(i:m,1:n)
872 **/
873 mi = m - i + 1;
874 ic = i;
875 } else {
876 /**
877 * H(i) is applied to C(1:m,i:n)
878 **/
879 ni = n - i + 1;
880 jc = i;
881 }
882 /**
883 * Apply H(i)
884 **/
885 aii = a_2( i, i );
886 a_2( i, i ) = one;
887 dlarf( side, mi, ni, &a_2( i, i ), lda, tau_1( i ),
888 &c_2( ic, jc ), ldc, work );
889 a_2( i, i ) = aii;
890 }
891 return;
892 /**
893 * End of DORML2
894 **/
895 }
896
897
898
dgelqf(long m,long n,double a[],long lda,double tau[],double work[],long lwork,long * info)899 void dgelqf( long m, long n, double a[], long lda, double tau[],
900 double work[], long lwork, long *info )
901 {
902 /**
903 * -- LAPACK routine (version 1.1) --
904 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
905 * Courant Institute, Argonne National Lab, and Rice University
906 * March 31, 1993
907 *
908 * .. Scalar Arguments ..*/
909 /** ..
910 * .. Array Arguments ..*/
911 #undef work_1
912 #define work_1(a1) work[a1-1]
913 #undef tau_1
914 #define tau_1(a1) tau[a1-1]
915 #undef a_2
916 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
917 /** ..
918 *
919 * Purpose
920 * =======
921 *
922 * DGELQF computes an LQ factorization of a real M-by-N matrix A:
923 * A = L * Q.
924 *
925 * Arguments
926 * =========
927 *
928 * M (input) INTEGER
929 * The number of rows of the matrix A. M >= 0.
930 *
931 * N (input) INTEGER
932 * The number of columns of the matrix A. N >= 0.
933 *
934 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
935 * On entry, the M-by-N matrix A.
936 * On exit, the elements on and below the diagonal of the array
937 * contain the m-by-min(m,n) lower trapezoidal matrix L (L is
938 * lower triangular if m <= n); the elements above the diagonal,
939 * with the array TAU, represent the orthogonal matrix Q as a
940 * product of elementary reflectors (see Further Details).
941 *
942 * LDA (input) INTEGER
943 * The leading dimension of the array A. LDA >= max(1,M).
944 *
945 * TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
946 * The scalar factors of the elementary reflectors (see Further
947 * Details).
948 *
949 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
950 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
951 *
952 * LWORK (input) INTEGER
953 * The dimension of the array WORK. LWORK >= max(1,M).
954 * For optimum performance LWORK >= M*NB, where NB is the
955 * optimal blocksize.
956 *
957 * INFO (output) INTEGER
958 * = 0: successful exit
959 * < 0: if INFO = -i, the i-th argument had an illegal value
960 *
961 * Further Details
962 * ===============
963 *
964 * The matrix Q is represented as a product of elementary reflectors
965 *
966 * Q = H(k) . . . H(2) H(1), where k = min(m,n).
967 *
968 * Each H(i) has the form
969 *
970 * H(i) = I - tau * v * v'
971 *
972 * where tau is a real scalar, and v is a real vector with
973 * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
974 * and tau in TAU(i).
975 *
976 * =====================================================================
977 *
978 * .. Local Scalars ..*/
979 long i, ib, iinfo, iws, k, ldwork=0, nb, nbmin, nx;
980 /** ..
981 * .. Intrinsic Functions ..*/
982 /* intrinsic max, min;*/
983 /** ..
984 * .. Executable Statements ..
985 *
986 * Test the input arguments
987 **/
988 /*-----implicit-declarations-----*/
989 /*-----end-of-declarations-----*/
990 *info = 0;
991 if( m<0 ) {
992 *info = -1;
993 } else if( n<0 ) {
994 *info = -2;
995 } else if( lda<max( 1, m ) ) {
996 *info = -4;
997 } else if( lwork<max( 1, m ) ) {
998 *info = -7;
999 }
1000 if( *info!=0 ) {
1001 xerbla( "dgelqf", -*info );
1002 return;
1003 }
1004 /**
1005 * Quick return if possible
1006 **/
1007 k = min( m, n );
1008 if( k==0 ) {
1009 work_1( 1 ) = 1;
1010 return;
1011 }
1012 /**
1013 * Determine the block size.
1014 **/
1015 nb = ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
1016 nbmin = 2;
1017 nx = 0;
1018 iws = m;
1019 if( nb>1 && nb<k ) {
1020 /**
1021 * Determine when to cross over from blocked to unblocked code.
1022 **/
1023 nx = max( 0, ilaenv( 3, "dgelqf", " ", m, n, -1, -1 ) );
1024 if( nx<k ) {
1025 /**
1026 * Determine if workspace is large enough for blocked code.
1027 **/
1028 ldwork = m;
1029 iws = ldwork*nb;
1030 if( lwork<iws ) {
1031 /**
1032 * Not enough workspace to use optimal NB: reduce NB and
1033 * determine the minimum value of NB.
1034 **/
1035 nb = lwork / ldwork;
1036 nbmin = max( 2, ilaenv( 2, "dgelqf", " ", m, n, -1,
1037 -1 ) );
1038 }
1039 }
1040 }
1041
1042 if( nb>=nbmin && nb<k && nx<k ) {
1043 /**
1044 * Use blocked code initially
1045 **/
1046 for (i=1 ; nb>0?i<=k - nx:i>=k - nx ; i+=nb) {
1047 ib = min( k-i+1, nb );
1048 /**
1049 * Compute the LQ factorization of the current block
1050 * A(i:i+ib-1,i:n)
1051 **/
1052 dgelq2( ib, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1053 &iinfo );
1054 if( i+ib<=m ) {
1055 /**
1056 * Form the triangular factor of the block reflector
1057 * H = H(i) H(i+1) . . . H(i+ib-1)
1058 **/
1059 dlarft( 'f'/*orward*/, 'r'/*owwise*/, n-i+1, ib, &a_2( i, i ),
1060 lda, &tau_1( i ), work, ldwork );
1061 /**
1062 * Apply H to A(i+ib:m,i:n) from the right
1063 **/
1064 dlarfb( 'r'/*ight*/, 'n'/*o transpose*/, 'f'/*orward*/,
1065 'r'/*owwise*/, m-i-ib+1, n-i+1, ib, &a_2( i, i ),
1066 lda, work, ldwork, &a_2( i+ib, i ), lda,
1067 &work_1( ib+1 ), ldwork );
1068 }
1069 }
1070 } else {
1071 i = 1;
1072 }
1073 /**
1074 * Use unblocked code to factor the last or only block.
1075 **/
1076 if( i<=k )
1077 dgelq2( m-i+1, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1078 &iinfo );
1079
1080 work_1( 1 ) = iws;
1081 return;
1082 /**
1083 * End of DGELQF
1084 **/
1085 }
1086
1087
1088
dgelq2(long m,long n,double a[],long lda,double tau[],double work[],long * info)1089 void dgelq2( long m, long n, double a[], long lda, double tau[], double work[], long *info )
1090 {
1091 /**
1092 * -- LAPACK routine (version 1.1) --
1093 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1094 * Courant Institute, Argonne National Lab, and Rice University
1095 * February 29, 1992
1096 *
1097 * .. Scalar Arguments ..*/
1098 /** ..
1099 * .. Array Arguments ..*/
1100 #undef work_1
1101 #define work_1(a1) work[a1-1]
1102 #undef tau_1
1103 #define tau_1(a1) tau[a1-1]
1104 #undef a_2
1105 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1106 /** ..
1107 *
1108 * Purpose
1109 * =======
1110 *
1111 * DGELQ2 computes an LQ factorization of a real m by n matrix A:
1112 * A = L * Q.
1113 *
1114 * Arguments
1115 * =========
1116 *
1117 * M (input) INTEGER
1118 * The number of rows of the matrix A. M >= 0.
1119 *
1120 * N (input) INTEGER
1121 * The number of columns of the matrix A. N >= 0.
1122 *
1123 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
1124 * On entry, the m by n matrix A.
1125 * On exit, the elements on and below the diagonal of the array
1126 * contain the m by min(m,n) lower trapezoidal matrix L (L is
1127 * lower triangular if m <= n); the elements above the diagonal,
1128 * with the array TAU, represent the orthogonal matrix Q as a
1129 * product of elementary reflectors (see Further Details).
1130 *
1131 * LDA (input) INTEGER
1132 * The leading dimension of the array A. LDA >= max(1,M).
1133 *
1134 * TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
1135 * The scalar factors of the elementary reflectors (see Further
1136 * Details).
1137 *
1138 * WORK (workspace) DOUBLE PRECISION array, dimension (M)
1139 *
1140 * INFO (output) INTEGER
1141 * = 0: successful exit
1142 * < 0: if INFO = -i, the i-th argument had an illegal value
1143 *
1144 * Further Details
1145 * ===============
1146 *
1147 * The matrix Q is represented as a product of elementary reflectors
1148 *
1149 * Q = H(k) . . . H(2) H(1), where k = min(m,n).
1150 *
1151 * Each H(i) has the form
1152 *
1153 * H(i) = I - tau * v * v'
1154 *
1155 * where tau is a real scalar, and v is a real vector with
1156 * v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
1157 * and tau in TAU(i).
1158 *
1159 * =====================================================================
1160 *
1161 * .. Parameters ..*/
1162 #undef one
1163 #define one 1.0e+0
1164 /** ..
1165 * .. Local Scalars ..*/
1166 long i, k;
1167 double aii;
1168 /** ..
1169 * .. Intrinsic Functions ..*/
1170 /* intrinsic max, min;*/
1171 /** ..
1172 * .. Executable Statements ..
1173 *
1174 * Test the input arguments
1175 **/
1176 /*-----implicit-declarations-----*/
1177 /*-----end-of-declarations-----*/
1178 *info = 0;
1179 if( m<0 ) {
1180 *info = -1;
1181 } else if( n<0 ) {
1182 *info = -2;
1183 } else if( lda<max( 1, m ) ) {
1184 *info = -4;
1185 }
1186 if( *info!=0 ) {
1187 xerbla( "dgelq2", -*info );
1188 return;
1189 }
1190
1191 k = min( m, n );
1192
1193 for (i=1 ; i<=k ; i+=1) {
1194 /**
1195 * Generate elementary reflector H(i) to annihilate A(i,i+1:n)
1196 **/
1197 dlarfg( n-i+1, &a_2( i, i ), &a_2( i, min( i+1, n ) ), lda,
1198 &tau_1( i ) );
1199 if( i<m ) {
1200 /**
1201 * Apply H(i) to A(i+1:m,i:n) from the right
1202 **/
1203 aii = a_2( i, i );
1204 a_2( i, i ) = one;
1205 dlarf( 'r'/*ight*/, m-i, n-i+1, &a_2( i, i ), lda, tau_1( i ),
1206 &a_2( i+1, i ), lda, work );
1207 a_2( i, i ) = aii;
1208 }
1209 }
1210 return;
1211 /**
1212 * End of DGELQ2
1213 **/
1214 }
1215
1216
1217
dormqr(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long lwork,long * info)1218 void dormqr( char side, char trans, long m, long n, long k, double a[], long lda, double tau[], double c[], long ldc,double work[], long lwork, long *info )
1219 {
1220 /**
1221 * -- LAPACK routine (version 1.1) --
1222 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1223 * Courant Institute, Argonne National Lab, and Rice University
1224 * March 31, 1993
1225 *
1226 * .. Scalar Arguments ..*/
1227 /** ..
1228 * .. Array Arguments ..*/
1229 #undef work_1
1230 #define work_1(a1) work[a1-1]
1231 #undef tau_1
1232 #define tau_1(a1) tau[a1-1]
1233 #undef c_2
1234 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1235 #undef a_2
1236 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1237 /** ..
1238 *
1239 * Purpose
1240 * =======
1241 *
1242 * DORMQR overwrites the general real M-by-N matrix C with
1243 *
1244 * SIDE = 'L' SIDE = 'R'
1245 * TRANS = 'N': Q * C C * Q
1246 * TRANS = 'T': Q**T * C C * Q**T
1247 *
1248 * where Q is a real orthogonal matrix defined as the product of k
1249 * elementary reflectors
1250 *
1251 * Q = H(1) H(2) . . . H(k)
1252 *
1253 * as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
1254 * if SIDE = 'R'.
1255 *
1256 * Arguments
1257 * =========
1258 *
1259 * SIDE (input) CHARACTER*1
1260 * = 'L': apply Q or Q**T from the Left;
1261 * = 'R': apply Q or Q**T from the Right.
1262 *
1263 * TRANS (input) CHARACTER*1
1264 * = 'N': No transpose, apply Q;
1265 * = 'T': Transpose, apply Q**T.
1266 *
1267 * M (input) INTEGER
1268 * The number of rows of the matrix C. M >= 0.
1269 *
1270 * N (input) INTEGER
1271 * The number of columns of the matrix C. N >= 0.
1272 *
1273 * K (input) INTEGER
1274 * The number of elementary reflectors whose product defines
1275 * the matrix Q.
1276 * If SIDE = 'L', M >= K >= 0;
1277 * if SIDE = 'R', N >= K >= 0.
1278 *
1279 * A (input) DOUBLE PRECISION array, dimension (LDA,K)
1280 * The i-th column must contain the vector which defines the
1281 * elementary reflector H(i), for i = 1,2,...,k, as returned by
1282 * DGEQRF in the first k columns of its array argument A.
1283 * A is modified by the routine but restored on exit.
1284 *
1285 * LDA (input) INTEGER
1286 * The leading dimension of the array A.
1287 * If SIDE = 'L', LDA >= max(1,M);
1288 * if SIDE = 'R', LDA >= max(1,N).
1289 *
1290 * TAU (input) DOUBLE PRECISION array, dimension (K)
1291 * TAU(i) must contain the scalar factor of the elementary
1292 * reflector H(i), as returned by DGEQRF.
1293 *
1294 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1295 * On entry, the M-by-N matrix C.
1296 * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
1297 *
1298 * LDC (input) INTEGER
1299 * The leading dimension of the array C. LDC >= max(1,M).
1300 *
1301 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
1302 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
1303 *
1304 * LWORK (input) INTEGER
1305 * The dimension of the array WORK.
1306 * If SIDE = 'L', LWORK >= max(1,N);
1307 * if SIDE = 'R', LWORK >= max(1,M).
1308 * For optimum performance LWORK >= N*NB if SIDE = 'L', and
1309 * LWORK >= M*NB if SIDE = 'R', where NB is the optimal
1310 * blocksize.
1311 *
1312 * INFO (output) INTEGER
1313 * = 0: successful exit
1314 * < 0: if INFO = -i, the i-th argument had an illegal value
1315 *
1316 * =====================================================================
1317 *
1318 * .. Parameters ..*/
1319 #undef nbmax
1320 #define nbmax 64
1321 #undef ldt
1322 #define ldt (nbmax+1)
1323 /** ..
1324 * .. Local Scalars ..*/
1325 int left, notran;
1326 long i, i1, i2, i3, ib, ic=0, iinfo, iws, jc=0, ldwork,
1327 mi=0, nb, nbmin, ni=0, nq, nw;
1328 char side_trans[3];
1329 /** ..
1330 * .. Local Arrays ..*/
1331 double t[nbmax][ldt];
1332 #undef t_2
1333 #define t_2(a1,a2) t[a2-1][a1-1]
1334 /** ..
1335 * .. Intrinsic Functions ..*/
1336 /* intrinsic max, min;*/
1337 /** ..
1338 * .. Executable Statements ..
1339 *
1340 * Test the input arguments
1341 **/
1342 /*-----implicit-declarations-----*/
1343 /*-----end-of-declarations-----*/
1344 *info = 0;
1345 left = lsame( side, 'l' );
1346 notran = lsame( trans, 'n' );
1347 /**
1348 * NQ is the order of Q and NW is the minimum dimension of WORK
1349 **/
1350 if( left ) {
1351 nq = m;
1352 nw = n;
1353 } else {
1354 nq = n;
1355 nw = m;
1356 }
1357 if( !left && !lsame( side, 'r' ) ) {
1358 *info = -1;
1359 } else if( !notran && !lsame( trans, 't' ) ) {
1360 *info = -2;
1361 } else if( m<0 ) {
1362 *info = -3;
1363 } else if( n<0 ) {
1364 *info = -4;
1365 } else if( k<0 || k>nq ) {
1366 *info = -5;
1367 } else if( lda<max( 1, nq ) ) {
1368 *info = -7;
1369 } else if( ldc<max( 1, m ) ) {
1370 *info = -10;
1371 } else if( lwork<max( 1, nw ) ) {
1372 *info = -12;
1373 }
1374 if( *info!=0 ) {
1375 xerbla( "dormqr", -*info );
1376 return;
1377 }
1378 /**
1379 * Quick return if possible
1380 **/
1381 if( m==0 || n==0 || k==0 ) {
1382 work_1( 1 ) = 1;
1383 return;
1384 }
1385 /**
1386 * Determine the block size. NB may be at most NBMAX, where NBMAX
1387 * is used to define the local array T.
1388 **/
1389 side_trans[0]= side;
1390 side_trans[1]= trans;
1391 side_trans[2]= '\0';
1392 nb = min( nbmax, ilaenv( 1, "dormqr", side_trans, m, n, k,
1393 -1 ) );
1394 nbmin = 2;
1395 ldwork = nw;
1396 if( nb>1 && nb<k ) {
1397 iws = nw*nb;
1398 if( lwork<iws ) {
1399 nb = lwork / ldwork;
1400 nbmin = max( 2, ilaenv( 2, "dormqr", side_trans, m, n, k,
1401 -1 ) );
1402 }
1403 } else {
1404 iws = nw;
1405 }
1406
1407 if( nb<nbmin || nb>=k ) {
1408 /**
1409 * Use unblocked code
1410 **/
1411 dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,
1412 &iinfo );
1413 } else {
1414 /**
1415 * Use blocked code
1416 **/
1417 if( ( left && !notran ) ||
1418 ( !left && notran ) ) {
1419 i1 = 1;
1420 i2 = k;
1421 i3 = nb;
1422 } else {
1423 i1 = ( ( k-1 ) / nb )*nb + 1;
1424 i2 = 1;
1425 i3 = -nb;
1426 }
1427
1428 if( left ) {
1429 ni = n;
1430 jc = 1;
1431 } else {
1432 mi = m;
1433 ic = 1;
1434 }
1435
1436 for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
1437 ib = min( nb, k-i+1 );
1438 /**
1439 * Form the triangular factor of the block reflector
1440 * H = H(i) H(i+1) . . . H(i+ib-1)
1441 **/
1442 dlarft( 'f'/*orward*/, 'c'/*olumnwise*/, nq-i+1, ib, &a_2( i, i ),
1443 lda, &tau_1( i ), (double *)t, ldt );
1444 if( left ) {
1445 /**
1446 * H or H' is applied to C(i:m,1:n)
1447 **/
1448 mi = m - i + 1;
1449 ic = i;
1450 } else {
1451 /**
1452 * H or H' is applied to C(1:m,i:n)
1453 **/
1454 ni = n - i + 1;
1455 jc = i;
1456 }
1457 /**
1458 * Apply H or H'
1459 **/
1460 dlarfb( side, trans, 'f'/*orward*/, 'c'/*olumnwise*/, mi, ni,
1461 ib, &a_2( i, i ), lda, (double *)t, ldt, &c_2( ic, jc ), ldc,
1462 work, ldwork );
1463 }
1464 }
1465 work_1( 1 ) = iws;
1466 return;
1467 /**
1468 * End of DORMQR
1469 **/
1470 #undef nbmax
1471 #undef ldt
1472 }
1473
1474
1475
dorm2r(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long * info)1476 void dorm2r( char side, char trans, long m, long n, long k, double a[], long lda, double tau[], double c[], long ldc,double work[], long *info )
1477 {
1478 /**
1479 * -- LAPACK routine (version 1.1) --
1480 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1481 * Courant Institute, Argonne National Lab, and Rice University
1482 * February 29, 1992
1483 *
1484 * .. Scalar Arguments ..*/
1485 /** ..
1486 * .. Array Arguments ..*/
1487 #undef work_1
1488 #define work_1(a1) work[a1-1]
1489 #undef tau_1
1490 #define tau_1(a1) tau[a1-1]
1491 #undef c_2
1492 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1493 #undef a_2
1494 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1495 /** ..
1496 *
1497 * Purpose
1498 * =======
1499 *
1500 * DORM2R overwrites the general real m by n matrix C with
1501 *
1502 * Q * C if SIDE = 'L' and TRANS = 'N', or
1503 *
1504 * Q'* C if SIDE = 'L' and TRANS = 'T', or
1505 *
1506 * C * Q if SIDE = 'R' and TRANS = 'N', or
1507 *
1508 * C * Q' if SIDE = 'R' and TRANS = 'T',
1509 *
1510 * where Q is a real orthogonal matrix defined as the product of k
1511 * elementary reflectors
1512 *
1513 * Q = H(1) H(2) . . . H(k)
1514 *
1515 * as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
1516 * if SIDE = 'R'.
1517 *
1518 * Arguments
1519 * =========
1520 *
1521 * SIDE (input) CHARACTER*1
1522 * = 'L': apply Q or Q' from the Left
1523 * = 'R': apply Q or Q' from the Right
1524 *
1525 * TRANS (input) CHARACTER*1
1526 * = 'N': apply Q (No transpose)
1527 * = 'T': apply Q' (Transpose)
1528 *
1529 * M (input) INTEGER
1530 * The number of rows of the matrix C. M >= 0.
1531 *
1532 * N (input) INTEGER
1533 * The number of columns of the matrix C. N >= 0.
1534 *
1535 * K (input) INTEGER
1536 * The number of elementary reflectors whose product defines
1537 * the matrix Q.
1538 * If SIDE = 'L', M >= K >= 0;
1539 * if SIDE = 'R', N >= K >= 0.
1540 *
1541 * A (input) DOUBLE PRECISION array, dimension (LDA,K)
1542 * The i-th column must contain the vector which defines the
1543 * elementary reflector H(i), for i = 1,2,...,k, as returned by
1544 * DGEQRF in the first k columns of its array argument A.
1545 * A is modified by the routine but restored on exit.
1546 *
1547 * LDA (input) INTEGER
1548 * The leading dimension of the array A.
1549 * If SIDE = 'L', LDA >= max(1,M);
1550 * if SIDE = 'R', LDA >= max(1,N).
1551 *
1552 * TAU (input) DOUBLE PRECISION array, dimension (K)
1553 * TAU(i) must contain the scalar factor of the elementary
1554 * reflector H(i), as returned by DGEQRF.
1555 *
1556 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1557 * On entry, the m by n matrix C.
1558 * On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
1559 *
1560 * LDC (input) INTEGER
1561 * The leading dimension of the array C. LDC >= max(1,M).
1562 *
1563 * WORK (workspace) DOUBLE PRECISION array, dimension
1564 * (N) if SIDE = 'L',
1565 * (M) if SIDE = 'R'
1566 *
1567 * INFO (output) INTEGER
1568 * = 0: successful exit
1569 * < 0: if INFO = -i, the i-th argument had an illegal value
1570 *
1571 * =====================================================================
1572 *
1573 * .. Parameters ..*/
1574 #undef one
1575 #define one 1.0e+0
1576 /** ..
1577 * .. Local Scalars ..*/
1578 int left, notran;
1579 long i, i1, i2, i3, ic=0, jc=0, mi=0, ni=0, nq;
1580 double aii;
1581 /** ..
1582 * .. Intrinsic Functions ..*/
1583 /* intrinsic max;*/
1584 /** ..
1585 * .. Executable Statements ..
1586 *
1587 * Test the input arguments
1588 **/
1589 /*-----implicit-declarations-----*/
1590 /*-----end-of-declarations-----*/
1591 *info = 0;
1592 left = lsame( side, 'l' );
1593 notran = lsame( trans, 'n' );
1594 /**
1595 * NQ is the order of Q
1596 **/
1597 if( left ) {
1598 nq = m;
1599 } else {
1600 nq = n;
1601 }
1602 if( !left && !lsame( side, 'r' ) ) {
1603 *info = -1;
1604 } else if( !notran && !lsame( trans, 't' ) ) {
1605 *info = -2;
1606 } else if( m<0 ) {
1607 *info = -3;
1608 } else if( n<0 ) {
1609 *info = -4;
1610 } else if( k<0 || k>nq ) {
1611 *info = -5;
1612 } else if( lda<max( 1, nq ) ) {
1613 *info = -7;
1614 } else if( ldc<max( 1, m ) ) {
1615 *info = -10;
1616 }
1617 if( *info!=0 ) {
1618 xerbla( "dorm2r", -*info );
1619 return;
1620 }
1621 /**
1622 * Quick return if possible
1623 **/
1624 if( m==0 || n==0 || k==0 )
1625 return;
1626
1627 if( ( left && !notran ) || ( !left && notran ) )
1628 {
1629 i1 = 1;
1630 i2 = k;
1631 i3 = 1;
1632 } else {
1633 i1 = k;
1634 i2 = 1;
1635 i3 = -1;
1636 }
1637
1638 if( left ) {
1639 ni = n;
1640 jc = 1;
1641 } else {
1642 mi = m;
1643 ic = 1;
1644 }
1645
1646 for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
1647 if( left ) {
1648 /**
1649 * H(i) is applied to C(i:m,1:n)
1650 **/
1651 mi = m - i + 1;
1652 ic = i;
1653 } else {
1654 /**
1655 * H(i) is applied to C(1:m,i:n)
1656 **/
1657 ni = n - i + 1;
1658 jc = i;
1659 }
1660 /**
1661 * Apply H(i)
1662 **/
1663 aii = a_2( i, i );
1664 a_2( i, i ) = one;
1665 dlarf( side, mi, ni, &a_2( i, i ), 1, tau_1( i ), &c_2( ic, jc ),
1666 ldc, work );
1667 a_2( i, i ) = aii;
1668 }
1669 return;
1670 /**
1671 * End of DORM2R
1672 **/
1673 }
1674
1675
1676
dgeqrf(long m,long n,double a[],long lda,double tau[],double work[],long lwork,long * info)1677 void dgeqrf( long m, long n, double a[], long lda, double tau[], double work[], long lwork, long *info )
1678 {
1679 /**
1680 * -- LAPACK routine (version 1.1) --
1681 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1682 * Courant Institute, Argonne National Lab, and Rice University
1683 * March 31, 1993
1684 *
1685 * .. Scalar Arguments ..*/
1686 /** ..
1687 * .. Array Arguments ..*/
1688 #undef work_1
1689 #define work_1(a1) work[a1-1]
1690 #undef tau_1
1691 #define tau_1(a1) tau[a1-1]
1692 #undef a_2
1693 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1694 /** ..
1695 *
1696 * Purpose
1697 * =======
1698 *
1699 * DGEQRF computes a QR factorization of a real M-by-N matrix A:
1700 * A = Q * R.
1701 *
1702 * Arguments
1703 * =========
1704 *
1705 * M (input) INTEGER
1706 * The number of rows of the matrix A. M >= 0.
1707 *
1708 * N (input) INTEGER
1709 * The number of columns of the matrix A. N >= 0.
1710 *
1711 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
1712 * On entry, the M-by-N matrix A.
1713 * On exit, the elements on and above the diagonal of the array
1714 * contain the min(M,N)-by-N upper trapezoidal matrix R (R is
1715 * upper triangular if m >= n); the elements below the diagonal,
1716 * with the array TAU, represent the orthogonal matrix Q as a
1717 * product of min(m,n) elementary reflectors (see Further
1718 * Details).
1719 *
1720 * LDA (input) INTEGER
1721 * The leading dimension of the array A. LDA >= max(1,M).
1722 *
1723 * TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
1724 * The scalar factors of the elementary reflectors (see Further
1725 * Details).
1726 *
1727 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
1728 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
1729 *
1730 * LWORK (input) INTEGER
1731 * The dimension of the array WORK. LWORK >= max(1,N).
1732 * For optimum performance LWORK >= N*NB, where NB is
1733 * the optimal blocksize.
1734 *
1735 * INFO (output) INTEGER
1736 * = 0: successful exit
1737 * < 0: if INFO = -i, the i-th argument had an illegal value
1738 *
1739 * Further Details
1740 * ===============
1741 *
1742 * The matrix Q is represented as a product of elementary reflectors
1743 *
1744 * Q = H(1) H(2) . . . H(k), where k = min(m,n).
1745 *
1746 * Each H(i) has the form
1747 *
1748 * H(i) = I - tau * v * v'
1749 *
1750 * where tau is a real scalar, and v is a real vector with
1751 * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
1752 * and tau in TAU(i).
1753 *
1754 * =====================================================================
1755 *
1756 * .. Local Scalars ..*/
1757 long i, ib, iinfo, iws, k, ldwork=0, nb, nbmin, nx;
1758 /** ..
1759 * .. Intrinsic Functions ..*/
1760 /* intrinsic max, min;*/
1761 /** ..
1762 * .. Executable Statements ..
1763 *
1764 * Test the input arguments
1765 **/
1766 /*-----implicit-declarations-----*/
1767 /*-----end-of-declarations-----*/
1768 *info = 0;
1769 if( m<0 ) {
1770 *info = -1;
1771 } else if( n<0 ) {
1772 *info = -2;
1773 } else if( lda<max( 1, m ) ) {
1774 *info = -4;
1775 } else if( lwork<max( 1, n ) ) {
1776 *info = -7;
1777 }
1778 if( *info!=0 ) {
1779 xerbla( "dgeqrf", -*info );
1780 return;
1781 }
1782 /**
1783 * Quick return if possible
1784 **/
1785 k = min( m, n );
1786 if( k==0 ) {
1787 work_1( 1 ) = 1;
1788 return;
1789 }
1790 /**
1791 * Determine the block size.
1792 **/
1793 nb = ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 );
1794 nbmin = 2;
1795 nx = 0;
1796 iws = n;
1797 if( nb>1 && nb<k ) {
1798 /**
1799 * Determine when to cross over from blocked to unblocked code.
1800 **/
1801 nx = max( 0, ilaenv( 3, "dgeqrf", " ", m, n, -1, -1 ) );
1802 if( nx<k ) {
1803 /**
1804 * Determine if workspace is large enough for blocked code.
1805 **/
1806 ldwork = n;
1807 iws = ldwork*nb;
1808 if( lwork<iws ) {
1809 /**
1810 * Not enough workspace to use optimal NB: reduce NB and
1811 * determine the minimum value of NB.
1812 **/
1813 nb = lwork / ldwork;
1814 nbmin = max( 2, ilaenv( 2, "dgeqrf", " ", m, n, -1,
1815 -1 ) );
1816 }
1817 }
1818 }
1819
1820 if( nb>=nbmin && nb<k && nx<k ) {
1821 /**
1822 * Use blocked code initially
1823 **/
1824 for (i=1 ; nb>0?i<=k - nx:i>=k - nx ; i+=nb) {
1825 ib = min( k-i+1, nb );
1826 /**
1827 * Compute the QR factorization of the current block
1828 * A(i:m,i:i+ib-1)
1829 **/
1830 dgeqr2( m-i+1, ib, &a_2( i, i ), lda, &tau_1( i ), work,
1831 &iinfo );
1832 if( i+ib<=n ) {
1833 /**
1834 * Form the triangular factor of the block reflector
1835 * H = H(i) H(i+1) . . . H(i+ib-1)
1836 **/
1837 dlarft( 'f'/*orward*/, 'c'/*olumnwise*/, m-i+1, ib,
1838 &a_2( i, i ), lda, &tau_1( i ), work, ldwork );
1839 /**
1840 * Apply H' to A(i:m,i+ib:n) from the left
1841 **/
1842 dlarfb( 'l'/*eft*/, 't'/*ranspose*/, 'f'/*orward*/,
1843 'c'/*olumnwise*/, m-i+1, n-i-ib+1, ib,
1844 &a_2( i, i ), lda, work, ldwork, &a_2( i, i+ib ),
1845 lda, &work_1( ib+1 ), ldwork );
1846 }
1847 }
1848 } else {
1849 i = 1;
1850 }
1851 /**
1852 * Use unblocked code to factor the last or only block.
1853 **/
1854 if( i<=k )
1855 dgeqr2( m-i+1, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1856 &iinfo );
1857
1858 work_1( 1 ) = iws;
1859 return;
1860 /**
1861 * End of DGEQRF
1862 **/
1863 }
1864
1865
1866
dlarfb(char side,char trans,char direct,char storev,long m,long n,long k,double v[],long ldv,double t[],long ldt,double c[],long ldc,double work[],long ldwork)1867 void dlarfb( char side, char trans, char direct, char storev,
1868 long m, long n, long k, double v[], long ldv,
1869 double t[], long ldt, double c[], long ldc,
1870 double work[], long ldwork )
1871 {
1872 /**
1873 * -- LAPACK auxiliary routine (version 1.1) --
1874 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1875 * Courant Institute, Argonne National Lab, and Rice University
1876 * February 29, 1992
1877 *
1878 * .. Scalar Arguments ..*/
1879 /** ..
1880 * .. Array Arguments ..*/
1881 #undef work_2
1882 #define work_2(a1,a2) work[a1-1+ldwork*(a2-1)]
1883 #undef v_2
1884 #define v_2(a1,a2) v[a1-1+ldv*(a2-1)]
1885 #undef t_2
1886 #define t_2(a1,a2) t[a1-1+ldt*(a2-1)]
1887 #undef c_2
1888 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1889 /** ..
1890 *
1891 * Purpose
1892 * =======
1893 *
1894 * DLARFB applies a real block reflector H or its transpose H' to a
1895 * real m by n matrix C, from either the left or the right.
1896 *
1897 * Arguments
1898 * =========
1899 *
1900 * SIDE (input) CHARACTER*1
1901 * = 'L': apply H or H' from the Left
1902 * = 'R': apply H or H' from the Right
1903 *
1904 * TRANS (input) CHARACTER*1
1905 * = 'N': apply H (No transpose)
1906 * = 'T': apply H' (Transpose)
1907 *
1908 * DIRECT (input) CHARACTER*1
1909 * Indicates how H is formed from a product of elementary
1910 * reflectors
1911 * = 'F': H = H(1) H(2) . . . H(k) (Forward)
1912 * = 'B': H = H(k) . . . H(2) H(1) (Backward)
1913 *
1914 * STOREV (input) CHARACTER*1
1915 * Indicates how the vectors which define the elementary
1916 * reflectors are stored:
1917 * = 'C': Columnwise
1918 * = 'R': Rowwise
1919 *
1920 * M (input) INTEGER
1921 * The number of rows of the matrix C.
1922 *
1923 * N (input) INTEGER
1924 * The number of columns of the matrix C.
1925 *
1926 * K (input) INTEGER
1927 * The order of the matrix T (= the number of elementary
1928 * reflectors whose product defines the block reflector).
1929 *
1930 * V (input) DOUBLE PRECISION array, dimension
1931 * (LDV,K) if STOREV = 'C'
1932 * (LDV,M) if STOREV = 'R' and SIDE = 'L'
1933 * (LDV,N) if STOREV = 'R' and SIDE = 'R'
1934 * The matrix V. See further details.
1935 *
1936 * LDV (input) INTEGER
1937 * The leading dimension of the array V.
1938 * If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
1939 * if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
1940 * if STOREV = 'R', LDV >= K.
1941 *
1942 * T (input) DOUBLE PRECISION array, dimension (LDT,K)
1943 * The triangular k by k matrix T in the representation of the
1944 * block reflector.
1945 *
1946 * LDT (input) INTEGER
1947 * The leading dimension of the array T. LDT >= K.
1948 *
1949 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1950 * On entry, the m by n matrix C.
1951 * On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
1952 *
1953 * LDC (input) INTEGER
1954 * The leading dimension of the array C. LDA >= max(1,M).
1955 *
1956 * WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
1957 *
1958 * LDWORK (input) INTEGER
1959 * The leading dimension of the array WORK.
1960 * If SIDE = 'L', LDWORK >= max(1,N);
1961 * if SIDE = 'R', LDWORK >= max(1,M).
1962 *
1963 * =====================================================================
1964 *
1965 * .. Parameters ..*/
1966 #undef one
1967 #define one 1.0e+0
1968 /** ..
1969 * .. Local Scalars ..*/
1970 enum CBLAS_TRANSPOSE transt, transf;
1971 long i, j;
1972 /** ..
1973 * .. Executable Statements ..
1974 *
1975 * Quick return if possible
1976 **/
1977 /*-----implicit-declarations-----*/
1978 /*-----end-of-declarations-----*/
1979 if( m<=0 || n<=0 )
1980 return;
1981
1982 if( lsame( trans, 'n' ) ) {
1983 transf = CblasNoTrans;
1984 transt = CblasTrans;
1985 } else {
1986 transf = CblasTrans;
1987 transt = CblasNoTrans;
1988 }
1989
1990 if( lsame( storev, 'c' ) ) {
1991
1992 if( lsame( direct, 'f' ) ) {
1993 /**
1994 * Let V = ( V1 ) (first K rows)
1995 * ( V2 )
1996 * where V1 is unit lower triangular.
1997 **/
1998 if( lsame( side, 'l' ) ) {
1999 /**
2000 * Form H * C or H' * C where C = ( C1 )
2001 * ( C2 )
2002 *
2003 * W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
2004 *
2005 * W := C1'
2006 **/
2007 for (j=1 ; j<=k ; j+=1) {
2008 cblas_dcopy( n, &c_2( j, 1 ), ldc, &work_2( 1, j ), 1 );
2009 }
2010 /**
2011 * W := W * V1
2012 **/
2013 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2014 CblasUnit, n, k, one, v, ldv, work, ldwork );
2015 if( m>k ) {
2016 /**
2017 * W := W + C2'*V2
2018 **/
2019 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, k, m-k,
2020 one, &c_2( k+1, 1 ), ldc, &v_2( k+1, 1 ), ldv,
2021 one, work, ldwork );
2022 }
2023 /**
2024 * W := W * T' or W * T
2025 **/
2026 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transt,
2027 CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2028 /**
2029 * C := C - V * W'
2030 **/
2031 if( m>k ) {
2032 /**
2033 * C2 := C2 - V2 * W'
2034 **/
2035 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m-k, n, k,
2036 -one, &v_2( k+1, 1 ), ldv, work, ldwork, one,
2037 &c_2( k+1, 1 ), ldc );
2038 }
2039 /**
2040 * W := W * V1'
2041 **/
2042 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2043 CblasUnit, n, k, one, v, ldv, work, ldwork );
2044 /**
2045 * C1 := C1 - W'
2046 **/
2047 for (j=1 ; j<=k ; j+=1) {
2048 for (i=1 ; i<=n ; i+=1) {
2049 c_2( j, i ) = c_2( j, i ) - work_2( i, j );
2050 }
2051 }
2052
2053 } else if( lsame( side, 'r' ) ) {
2054 /**
2055 * Form C * H or C * H' where C = ( C1 C2 )
2056 *
2057 * W := C * V = (C1*V1 + C2*V2) (stored in WORK)
2058 *
2059 * W := C1
2060 **/
2061 for (j=1 ; j<=k ; j+=1) {
2062 cblas_dcopy( m, &c_2( 1, j ), 1, &work_2( 1, j ), 1 );
2063 }
2064 /**
2065 * W := W * V1
2066 **/
2067 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2068 CblasUnit, m, k, one, v, ldv, work, ldwork );
2069 if( n>k ) {
2070 /**
2071 * W := W + C2 * V2
2072 **/
2073 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, n-k,
2074 one, &c_2( 1, k+1 ), ldc, &v_2( k+1, 1 ), ldv,
2075 one, work, ldwork );
2076 }
2077 /**
2078 * W := W * T or W * T'
2079 **/
2080 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transf,
2081 CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2082 /**
2083 * C := C - W * V'
2084 **/
2085 if( n>k ) {
2086 /**
2087 * C2 := C2 - W * V2'
2088 **/
2089 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, n-k, k,
2090 -one, work, ldwork, &v_2( k+1, 1 ), ldv, one,
2091 &c_2( 1, k+1 ), ldc );
2092 }
2093 /**
2094 * W := W * V1'
2095 **/
2096 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2097 CblasUnit, m, k, one, v, ldv, work, ldwork );
2098 /**
2099 * C1 := C1 - W
2100 **/
2101 for (j=1 ; j<=k ; j+=1) {
2102 for (i=1 ; i<=m ; i+=1) {
2103 c_2( i, j ) = c_2( i, j ) - work_2( i, j );
2104 }
2105 }
2106 }
2107
2108 } else {
2109 /**
2110 * Let V = ( V1 )
2111 * ( V2 ) (last K rows)
2112 * where V2 is unit upper triangular.
2113 **/
2114 if( lsame( side, 'l' ) ) {
2115 /**
2116 * Form H * C or H' * C where C = ( C1 )
2117 * ( C2 )
2118 *
2119 * W := C' * V = (C1'*V1 + C2'*V2) (stored in WORK)
2120 *
2121 * W := C2'
2122 **/
2123 for (j=1 ; j<=k ; j+=1) {
2124 cblas_dcopy( n, &c_2( m-k+j, 1 ), ldc, &work_2( 1, j ), 1 );
2125 }
2126 /**
2127 * W := W * V2
2128 **/
2129 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2130 CblasUnit, n, k, one, &v_2( m-k+1, 1 ), ldv,
2131 work, ldwork );
2132 if( m>k ) {
2133 /**
2134 * W := W + C1'*V1
2135 **/
2136 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, k, m-k,
2137 one, c, ldc, v, ldv, one, work, ldwork );
2138 }
2139 /**
2140 * W := W * T' or W * T
2141 **/
2142 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transt,
2143 CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2144 /**
2145 * C := C - V * W'
2146 **/
2147 if( m>k ) {
2148 /**
2149 * C1 := C1 - V1 * W'
2150 **/
2151 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m-k, n, k,
2152 -one, v, ldv, work, ldwork, one, c, ldc );
2153 }
2154 /**
2155 * W := W * V2'
2156 **/
2157 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2158 CblasUnit, n, k, one, &v_2( m-k+1, 1 ), ldv,
2159 work, ldwork );
2160 /**
2161 * C2 := C2 - W'
2162 **/
2163 for (j=1 ; j<=k ; j+=1) {
2164 for (i=1 ; i<=n ; i+=1) {
2165 c_2( m-k+j, i ) = c_2( m-k+j, i ) - work_2( i, j );
2166 }
2167 }
2168
2169 } else if( lsame( side, 'r' ) ) {
2170 /**
2171 * Form C * H or C * H' where C = ( C1 C2 )
2172 *
2173 * W := C * V = (C1*V1 + C2*V2) (stored in WORK)
2174 *
2175 * W := C2
2176 **/
2177 for (j=1 ; j<=k ; j+=1) {
2178 cblas_dcopy( m, &c_2( 1, n-k+j ), 1, &work_2( 1, j ), 1 );
2179 }
2180 /**
2181 * W := W * V2
2182 **/
2183 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2184 CblasUnit, m, k, one, &v_2( n-k+1, 1 ), ldv,
2185 work, ldwork );
2186 if( n>k ) {
2187 /**
2188 * W := W + C1 * V1
2189 **/
2190 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, n-k,
2191 one, c, ldc, v, ldv, one, work, ldwork );
2192 }
2193 /**
2194 * W := W * T or W * T'
2195 **/
2196 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transf,
2197 CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2198 /**
2199 * C := C - W * V'
2200 **/
2201 if( n>k ) {
2202 /**
2203 * C1 := C1 - W * V1'
2204 **/
2205 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, n-k, k,
2206 -one, work, ldwork, v, ldv, one, c, ldc );
2207 }
2208 /**
2209 * W := W * V2'
2210 **/
2211 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2212 CblasUnit, m, k, one, &v_2( n-k+1, 1 ), ldv,
2213 work, ldwork );
2214 /**
2215 * C2 := C2 - W
2216 **/
2217 for (j=1 ; j<=k ; j+=1) {
2218 for (i=1 ; i<=m ; i+=1) {
2219 c_2( i, n-k+j ) = c_2( i, n-k+j ) - work_2( i, j );
2220 }
2221 }
2222 }
2223 }
2224
2225 } else if( lsame( storev, 'r' ) ) {
2226
2227 if( lsame( direct, 'f' ) ) {
2228 /**
2229 * Let V = ( V1 V2 ) (V1: first K columns)
2230 * where V1 is unit upper triangular.
2231 **/
2232 if( lsame( side, 'l' ) ) {
2233 /**
2234 * Form H * C or H' * C where C = ( C1 )
2235 * ( C2 )
2236 *
2237 * W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
2238 *
2239 * W := C1'
2240 **/
2241 for (j=1 ; j<=k ; j+=1) {
2242 cblas_dcopy( n, &c_2( j, 1 ), ldc, &work_2( 1, j ), 1 );
2243 }
2244 /**
2245 * W := W * V1'
2246 **/
2247 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2248 CblasUnit, n, k, one, v, ldv, work, ldwork );
2249 if( m>k ) {
2250 /**
2251 * W := W + C2'*V2'
2252 **/
2253 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, n, k, m-k, one,
2254 &c_2( k+1, 1 ), ldc, &v_2( 1, k+1 ), ldv, one,
2255 work, ldwork );
2256 }
2257 /**
2258 * W := W * T' or W * T
2259 **/
2260 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transt,
2261 CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2262 /**
2263 * C := C - V' * W'
2264 **/
2265 if( m>k ) {
2266 /**
2267 * C2 := C2 - V2' * W'
2268 **/
2269 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m-k, n, k, -one,
2270 &v_2( 1, k+1 ), ldv, work, ldwork, one,
2271 &c_2( k+1, 1 ), ldc );
2272 }
2273 /**
2274 * W := W * V1
2275 **/
2276 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2277 CblasUnit, n, k, one, v, ldv, work, ldwork );
2278 /**
2279 * C1 := C1 - W'
2280 **/
2281 for (j=1 ; j<=k ; j+=1) {
2282 for (i=1 ; i<=n ; i+=1) {
2283 c_2( j, i ) = c_2( j, i ) - work_2( i, j );
2284 }
2285 }
2286
2287 } else if( lsame( side, 'r' ) ) {
2288 /**
2289 * Form C * H or C * H' where C = ( C1 C2 )
2290 *
2291 * W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
2292 *
2293 * W := C1
2294 **/
2295 for (j=1 ; j<=k ; j+=1) {
2296 cblas_dcopy( m, &c_2( 1, j ), 1, &work_2( 1, j ), 1 );
2297 }
2298 /**
2299 * W := W * V1'
2300 **/
2301 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2302 CblasUnit, m, k, one, v, ldv, work, ldwork );
2303 if( n>k ) {
2304 /**
2305 * W := W + C2 * V2'
2306 **/
2307 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n-k,
2308 one, &c_2( 1, k+1 ), ldc, &v_2( 1, k+1 ), ldv,
2309 one, work, ldwork );
2310 }
2311 /**
2312 * W := W * T or W * T'
2313 **/
2314 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transf,
2315 CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2316 /**
2317 * C := C - W * V
2318 **/
2319 if( n>k ) {
2320 /**
2321 * C2 := C2 - W * V2
2322 **/
2323 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n-k, k,
2324 -one, work, ldwork, &v_2( 1, k+1 ), ldv, one,
2325 &c_2( 1, k+1 ), ldc );
2326 }
2327 /**
2328 * W := W * V1
2329 **/
2330 cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2331 CblasUnit, m, k, one, v, ldv, work, ldwork );
2332 /**
2333 * C1 := C1 - W
2334 **/
2335 for (j=1 ; j<=k ; j+=1) {
2336 for (i=1 ; i<=m ; i+=1) {
2337 c_2( i, j ) = c_2( i, j ) - work_2( i, j );
2338 }
2339 }
2340
2341 }
2342
2343 } else {
2344 /**
2345 * Let V = ( V1 V2 ) (V2: last K columns)
2346 * where V2 is unit lower triangular.
2347 **/
2348 if( lsame( side, 'l' ) ) {
2349 /**
2350 * Form H * C or H' * C where C = ( C1 )
2351 * ( C2 )
2352 *
2353 * W := C' * V' = (C1'*V1' + C2'*V2') (stored in WORK)
2354 *
2355 * W := C2'
2356 **/
2357 for (j=1 ; j<=k ; j+=1) {
2358 cblas_dcopy( n, &c_2( m-k+j, 1 ), ldc, &work_2( 1, j ), 1 );
2359 }
2360 /**
2361 * W := W * V2'
2362 **/
2363 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2364 CblasUnit, n, k, one, &v_2( 1, m-k+1 ), ldv,
2365 work, ldwork );
2366 if( m>k ) {
2367 /**
2368 * W := W + C1'*V1'
2369 **/
2370 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, n, k, m-k, one,
2371 c, ldc, v, ldv, one, work, ldwork );
2372 }
2373 /**
2374 * W := W * T' or W * T
2375 **/
2376 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transt,
2377 CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2378 /**
2379 * C := C - V' * W'
2380 **/
2381 if( m>k ) {
2382 /**
2383 * C1 := C1 - V1' * W'
2384 **/
2385 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m-k, n, k, -one,
2386 v, ldv, work, ldwork, one, c, ldc );
2387 }
2388 /**
2389 * W := W * V2
2390 **/
2391 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2392 CblasUnit, n, k, one, &v_2( 1, m-k+1 ), ldv,
2393 work, ldwork );
2394 /**
2395 * C2 := C2 - W'
2396 **/
2397 for (j=1 ; j<=k ; j+=1) {
2398 for (i=1 ; i<=n ; i+=1) {
2399 c_2( m-k+j, i ) = c_2( m-k+j, i ) - work_2( i, j );
2400 }
2401 }
2402
2403 } else if( lsame( side, 'r' ) ) {
2404 /**
2405 * Form C * H or C * H' where C = ( C1 C2 )
2406 *
2407 * W := C * V' = (C1*V1' + C2*V2') (stored in WORK)
2408 *
2409 * W := C2
2410 **/
2411 for (j=1 ; j<=k ; j+=1) {
2412 cblas_dcopy( m, &c_2( 1, n-k+j ), 1, &work_2( 1, j ), 1 );
2413 }
2414 /**
2415 * W := W * V2'
2416 **/
2417 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2418 CblasUnit, m, k, one, &v_2( 1, n-k+1 ), ldv,
2419 work, ldwork );
2420 if( n>k ) {
2421 /**
2422 * W := W + C1 * V1'
2423 **/
2424 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n-k,
2425 one, c, ldc, v, ldv, one, work, ldwork );
2426 }
2427 /**
2428 * W := W * T or W * T'
2429 **/
2430 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transf,
2431 CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2432 /**
2433 * C := C - W * V
2434 **/
2435 if( n>k ) {
2436 /**
2437 * C1 := C1 - W * V1
2438 **/
2439 cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n-k, k,
2440 -one, work, ldwork, v, ldv, one, c, ldc );
2441 }
2442 /**
2443 * W := W * V2
2444 **/
2445 cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2446 CblasUnit, m, k, one, &v_2( 1, n-k+1 ), ldv,
2447 work, ldwork );
2448 /**
2449 * C1 := C1 - W
2450 **/
2451 for (j=1 ; j<=k ; j+=1) {
2452 for (i=1 ; i<=m ; i+=1) {
2453 c_2( i, n-k+j ) = c_2( i, n-k+j ) - work_2( i, j );
2454 }
2455 }
2456
2457 }
2458
2459 }
2460 }
2461
2462 return;
2463 /**
2464 * End of DLARFB
2465 **/
2466 }
2467
2468
2469
dlarft(char direct,char storev,long n,long k,double v[],long ldv,double tau[],double t[],long ldt)2470 void dlarft( char direct, char storev, long n, long k,
2471 double v[], long ldv, double tau[], double t[], long ldt )
2472 {
2473 /**
2474 * -- LAPACK auxiliary routine (version 1.1) --
2475 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2476 * Courant Institute, Argonne National Lab, and Rice University
2477 * February 29, 1992
2478 *
2479 * .. Scalar Arguments ..*/
2480 /** ..
2481 * .. Array Arguments ..*/
2482 #undef v_2
2483 #define v_2(a1,a2) v[a1-1+ldv*(a2-1)]
2484 #undef tau_1
2485 #define tau_1(a1) tau[a1-1]
2486 #undef t_2
2487 #define t_2(a1,a2) t[a1-1+ldt*(a2-1)]
2488 /** ..
2489 *
2490 * Purpose
2491 * =======
2492 *
2493 * DLARFT forms the triangular factor T of a real block reflector H
2494 * of order n, which is defined as a product of k elementary reflectors.
2495 *
2496 * If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
2497 *
2498 * If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
2499 *
2500 * If STOREV = 'C', the vector which defines the elementary reflector
2501 * H(i) is stored in the i-th column of the array V, and
2502 *
2503 * H = I - V * T * V'
2504 *
2505 * If STOREV = 'R', the vector which defines the elementary reflector
2506 * H(i) is stored in the i-th row of the array V, and
2507 *
2508 * H = I - V' * T * V
2509 *
2510 * Arguments
2511 * =========
2512 *
2513 * DIRECT (input) CHARACTER*1
2514 * Specifies the order in which the elementary reflectors are
2515 * multiplied to form the block reflector:
2516 * = 'F': H = H(1) H(2) . . . H(k) (Forward)
2517 * = 'B': H = H(k) . . . H(2) H(1) (Backward)
2518 *
2519 * STOREV (input) CHARACTER*1
2520 * Specifies how the vectors which define the elementary
2521 * reflectors are stored (see also Further Details):
2522 * = 'C': columnwise
2523 * = 'R': rowwise
2524 *
2525 * N (input) INTEGER
2526 * The order of the block reflector H. N >= 0.
2527 *
2528 * K (input) INTEGER
2529 * The order of the triangular factor T (= the number of
2530 * elementary reflectors). K >= 1.
2531 *
2532 * V (input/output) DOUBLE PRECISION array, dimension
2533 * (LDV,K) if STOREV = 'C'
2534 * (LDV,N) if STOREV = 'R'
2535 * The matrix V. See further details.
2536 *
2537 * LDV (input) INTEGER
2538 * The leading dimension of the array V.
2539 * If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
2540 *
2541 * TAU (input) DOUBLE PRECISION array, dimension (K)
2542 * TAU(i) must contain the scalar factor of the elementary
2543 * reflector H(i).
2544 *
2545 * T (output) DOUBLE PRECISION array, dimension (LDT,K)
2546 * The k by k triangular factor T of the block reflector.
2547 * If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
2548 * lower triangular. The rest of the array is not used.
2549 *
2550 * LDT (input) INTEGER
2551 * The leading dimension of the array T. LDT >= K.
2552 *
2553 * Further Details
2554 * ===============
2555 *
2556 * The shape of the matrix V and the storage of the vectors which define
2557 * the H(i) is best illustrated by the following example with n = 5 and
2558 * k = 3. The elements equal to 1 are not stored; the corresponding
2559 * array elements are modified but restored on exit. The rest of the
2560 * array is not used.
2561 *
2562 * DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
2563 *
2564 * V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
2565 * ( v1 1 ) ( 1 v2 v2 v2 )
2566 * ( v1 v2 1 ) ( 1 v3 v3 )
2567 * ( v1 v2 v3 )
2568 * ( v1 v2 v3 )
2569 *
2570 * DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
2571 *
2572 * V = ( v1 v2 v3 ) V = ( v1 v1 1 )
2573 * ( v1 v2 v3 ) ( v2 v2 v2 1 )
2574 * ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
2575 * ( 1 v3 )
2576 * ( 1 )
2577 *
2578 * =====================================================================
2579 *
2580 * .. Parameters ..*/
2581 #undef one
2582 #define one 1.0e+0
2583 #undef zero
2584 #define zero 0.0e+0
2585 /** ..
2586 * .. Local Scalars ..*/
2587 long i, j;
2588 double vii;
2589 /** ..
2590 * .. Executable Statements ..
2591 *
2592 * Quick return if possible
2593 **/
2594 /*-----implicit-declarations-----*/
2595 /*-----end-of-declarations-----*/
2596 if( n==0 )
2597 return;
2598
2599 if( lsame( direct, 'f' ) ) {
2600 for (i=1 ; i<=k ; i+=1) {
2601 if( tau_1( i )==zero ) {
2602 /**
2603 * H(i) = I
2604 **/
2605 for (j=1 ; j<=i ; j+=1) {
2606 t_2( j, i ) = zero;
2607 }
2608 } else {
2609 /**
2610 * general case
2611 **/
2612 vii = v_2( i, i );
2613 v_2( i, i ) = one;
2614 if( lsame( storev, 'c' ) ) {
2615 /**
2616 * T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
2617 **/
2618 cblas_dgemv(CblasColMajor, CblasTrans, n-i+1, i-1, -tau_1( i ),
2619 &v_2( i, 1 ), ldv, &v_2( i, i ), 1, zero,
2620 &t_2( 1, i ), 1 );
2621 } else {
2622 /**
2623 * T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
2624 **/
2625 cblas_dgemv(CblasColMajor, CblasNoTrans, i-1, n-i+1,
2626 -tau_1( i ), &v_2( 1, i ), ldv, &v_2( i, i ), ldv, zero,
2627 &t_2( 1, i ), 1 );
2628 }
2629 v_2( i, i ) = vii;
2630 /**
2631 * T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
2632 **/
2633 cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit,
2634 i-1, t, ldt, &t_2( 1, i ), 1 );
2635 t_2( i, i ) = tau_1( i );
2636 }
2637 }
2638 } else {
2639 for (i=k ; i>=1 ; i+=-1) {
2640 if( tau_1( i )==zero ) {
2641 /**
2642 * H(i) = I
2643 **/
2644 for (j=i ; j<=k ; j+=1) {
2645 t_2( j, i ) = zero;
2646 }
2647 } else {
2648 /**
2649 * general case
2650 **/
2651 if( i<k ) {
2652 if( lsame( storev, 'c' ) ) {
2653 vii = v_2( n-k+i, i );
2654 v_2( n-k+i, i ) = one;
2655 /**
2656 * T(i+1:k,i) :=
2657 * - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
2658 **/
2659 cblas_dgemv(CblasColMajor, CblasTrans, n-k+i, k-i,
2660 -tau_1( i ), &v_2( 1, i+1 ), ldv, &v_2( 1, i ),
2661 1, zero, &t_2( i+1, i ), 1 );
2662 v_2( n-k+i, i ) = vii;
2663 } else {
2664 vii = v_2( i, n-k+i );
2665 v_2( i, n-k+i ) = one;
2666 /**
2667 * T(i+1:k,i) :=
2668 * - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
2669 **/
2670 cblas_dgemv(CblasColMajor, CblasNoTrans, k-i, n-k+i,
2671 -tau_1( i ), &v_2( i+1, 1 ), ldv, &v_2( i, 1 ), ldv,
2672 zero, &t_2( i+1, i ), 1 );
2673 v_2( i, n-k+i ) = vii;
2674 }
2675 /**
2676 * T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
2677 **/
2678 cblas_dtrmv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit,
2679 k-i, &t_2( i+1, i+1 ), ldt, &t_2( i+1, i ), 1 );
2680 }
2681 t_2( i, i ) = tau_1( i );
2682 }
2683 }
2684 }
2685 return;
2686 /**
2687 * End of DLARFT
2688 **/
2689 }
2690
2691
2692
dgeqr2(long m,long n,double a[],long lda,double tau[],double work[],long * info)2693 void dgeqr2( long m, long n, double a[], long lda, double tau[],
2694 double work[], long *info )
2695 {
2696 /**
2697 * -- LAPACK routine (version 1.1) --
2698 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2699 * Courant Institute, Argonne National Lab, and Rice University
2700 * February 29, 1992
2701 *
2702 * .. Scalar Arguments ..*/
2703 /** ..
2704 * .. Array Arguments ..*/
2705 #undef work_1
2706 #define work_1(a1) work[a1-1]
2707 #undef tau_1
2708 #define tau_1(a1) tau[a1-1]
2709 #undef a_2
2710 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
2711 /** ..
2712 *
2713 * Purpose
2714 * =======
2715 *
2716 * DGEQR2 computes a QR factorization of a real m by n matrix A:
2717 * A = Q * R.
2718 *
2719 * Arguments
2720 * =========
2721 *
2722 * M (input) INTEGER
2723 * The number of rows of the matrix A. M >= 0.
2724 *
2725 * N (input) INTEGER
2726 * The number of columns of the matrix A. N >= 0.
2727 *
2728 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
2729 * On entry, the m by n matrix A.
2730 * On exit, the elements on and above the diagonal of the array
2731 * contain the min(m,n) by n upper trapezoidal matrix R (R is
2732 * upper triangular if m >= n); the elements below the diagonal,
2733 * with the array TAU, represent the orthogonal matrix Q as a
2734 * product of elementary reflectors (see Further Details).
2735 *
2736 * LDA (input) INTEGER
2737 * The leading dimension of the array A. LDA >= max(1,M).
2738 *
2739 * TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
2740 * The scalar factors of the elementary reflectors (see Further
2741 * Details).
2742 *
2743 * WORK (workspace) DOUBLE PRECISION array, dimension (N)
2744 *
2745 * INFO (output) INTEGER
2746 * = 0: successful exit
2747 * < 0: if INFO = -i, the i-th argument had an illegal value
2748 *
2749 * Further Details
2750 * ===============
2751 *
2752 * The matrix Q is represented as a product of elementary reflectors
2753 *
2754 * Q = H(1) H(2) . . . H(k), where k = min(m,n).
2755 *
2756 * Each H(i) has the form
2757 *
2758 * H(i) = I - tau * v * v'
2759 *
2760 * where tau is a real scalar, and v is a real vector with
2761 * v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
2762 * and tau in TAU(i).
2763 *
2764 * =====================================================================
2765 *
2766 * .. Parameters ..*/
2767 #undef one
2768 #define one 1.0e+0
2769 /** ..
2770 * .. Local Scalars ..*/
2771 long i, k;
2772 double aii;
2773 /** ..
2774 * .. Intrinsic Functions ..*/
2775 /* intrinsic max, min;*/
2776 /** ..
2777 * .. Executable Statements ..
2778 *
2779 * Test the input arguments
2780 **/
2781 /*-----implicit-declarations-----*/
2782 /*-----end-of-declarations-----*/
2783 *info = 0;
2784 if( m<0 ) {
2785 *info = -1;
2786 } else if( n<0 ) {
2787 *info = -2;
2788 } else if( lda<max( 1, m ) ) {
2789 *info = -4;
2790 }
2791 if( *info!=0 ) {
2792 xerbla( "dgeqr2", -*info );
2793 return;
2794 }
2795
2796 k = min( m, n );
2797
2798 for (i=1 ; i<=k ; i+=1) {
2799 /**
2800 * Generate elementary reflector H(i) to annihilate A(i+1:m,i)
2801 **/
2802 dlarfg( m-i+1, &a_2( i, i ), &a_2( min( i+1, m ), i ), 1,
2803 &tau_1( i ) );
2804 if( i<n ) {
2805 /**
2806 * Apply H(i) to A(i:m,i+1:n) from the left
2807 **/
2808 aii = a_2( i, i );
2809 a_2( i, i ) = one;
2810 dlarf( 'l'/*eft*/, m-i+1, n-i, &a_2( i, i ), 1, tau_1( i ),
2811 &a_2( i, i+1 ), lda, work );
2812 a_2( i, i ) = aii;
2813 }
2814 }
2815 return;
2816 /**
2817 * End of DGEQR2
2818 **/
2819 }
2820
2821
2822
dlarf(char side,long m,long n,double v[],long incv,double tau,double c[],long ldc,double work[])2823 void dlarf( char side, long m, long n, double v[], long incv,
2824 double tau, double c[], long ldc, double work[] )
2825 {
2826 /**
2827 * -- LAPACK auxiliary routine (version 1.1) --
2828 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2829 * Courant Institute, Argonne National Lab, and Rice University
2830 * February 29, 1992
2831 *
2832 * .. Scalar Arguments ..*/
2833 /** ..
2834 * .. Array Arguments ..*/
2835 #undef work_1
2836 #define work_1(a1) work[a1-1]
2837 #undef v_1
2838 #define v_1(a1) v[a1-1]
2839 #undef c_2
2840 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
2841 /** ..
2842 *
2843 * Purpose
2844 * =======
2845 *
2846 * DLARF applies a real elementary reflector H to a real m by n matrix
2847 * C, from either the left or the right. H is represented in the form
2848 *
2849 * H = I - tau * v * v'
2850 *
2851 * where tau is a real scalar and v is a real vector.
2852 *
2853 * If tau = 0, then H is taken to be the unit matrix.
2854 *
2855 * Arguments
2856 * =========
2857 *
2858 * SIDE (input) CHARACTER*1
2859 * = 'L': form H * C
2860 * = 'R': form C * H
2861 *
2862 * M (input) INTEGER
2863 * The number of rows of the matrix C.
2864 *
2865 * N (input) INTEGER
2866 * The number of columns of the matrix C.
2867 *
2868 * V (input) DOUBLE PRECISION array, dimension
2869 * (1 + (M-1)*abs(INCV)) if SIDE = 'L'
2870 * or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
2871 * The vector v in the representation of H. V is not used if
2872 * TAU = 0.
2873 *
2874 * INCV (input) INTEGER
2875 * The increment between elements of v. INCV <> 0.
2876 *
2877 * TAU (input) DOUBLE PRECISION
2878 * The value tau in the representation of H.
2879 *
2880 * C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
2881 * On entry, the m by n matrix C.
2882 * On exit, C is overwritten by the matrix H * C if SIDE = 'L',
2883 * or C * H if SIDE = 'R'.
2884 *
2885 * LDC (input) INTEGER
2886 * The leading dimension of the array C. LDC >= max(1,M).
2887 *
2888 * WORK (workspace) DOUBLE PRECISION array, dimension
2889 * (N) if SIDE = 'L'
2890 * or (M) if SIDE = 'R'
2891 *
2892 * =====================================================================
2893 *
2894 * .. Parameters ..*/
2895 #undef one
2896 #define one 1.0e+0
2897 #undef zero
2898 #define zero 0.0e+0
2899 /** ..
2900 * .. Executable Statements ..
2901 **/
2902 /*-----implicit-declarations-----*/
2903 /*-----end-of-declarations-----*/
2904 if( lsame( side, 'l' ) ) {
2905 /**
2906 * Form H * C
2907 **/
2908 if( tau!=zero ) {
2909 /**
2910 * w := C' * v
2911 **/
2912 cblas_dgemv(CblasColMajor, CblasTrans, m, n, one, c, ldc,
2913 v, incv, zero, work, 1 );
2914 /**
2915 * C := C - v * w'
2916 **/
2917 cblas_dger(CblasColMajor, m, n, -tau, v, incv, work, 1, c, ldc );
2918 }
2919 } else {
2920 /**
2921 * Form C * H
2922 **/
2923 if( tau!=zero ) {
2924 /**
2925 * w := C * v
2926 **/
2927 cblas_dgemv(CblasColMajor, CblasNoTrans, m, n, one, c, ldc,
2928 v, incv, zero, work, 1 );
2929 /**
2930 * C := C - w * v'
2931 **/
2932 cblas_dger(CblasColMajor, m, n, -tau, work, 1, v, incv, c, ldc );
2933 }
2934 }
2935 return;
2936 /**
2937 * End of DLARF
2938 **/
2939 }
2940
2941
2942
dlarfg(long n,double * alpha,double x[],long incx,double * tau)2943 void dlarfg( long n, double *alpha, double x[], long incx, double *tau )
2944 {
2945 /**
2946 * -- LAPACK auxiliary routine (version 1.1) --
2947 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2948 * Courant Institute, Argonne National Lab, and Rice University
2949 * February 29, 1992
2950 *
2951 * .. Scalar Arguments ..*/
2952 /** ..
2953 * .. Array Arguments ..*/
2954 #undef x_1
2955 #define x_1(a1) x[a1-1]
2956 /** ..
2957 *
2958 * Purpose
2959 * =======
2960 *
2961 * DLARFG generates a real elementary reflector H of order n, such
2962 * that
2963 *
2964 * H * ( alpha ) = ( beta ), H' * H = I.
2965 * ( x ) ( 0 )
2966 *
2967 * where alpha and beta are scalars, and x is an (n-1)-element real
2968 * vector. H is represented in the form
2969 *
2970 * H = I - tau * ( 1 ) * ( 1 v' ) ,
2971 * ( v )
2972 *
2973 * where tau is a real scalar and v is a real (n-1)-element
2974 * vector.
2975 *
2976 * If the elements of x are all zero, then tau = 0 and H is taken to be
2977 * the unit matrix.
2978 *
2979 * Otherwise 1 <= tau <= 2.
2980 *
2981 * Arguments
2982 * =========
2983 *
2984 * N (input) INTEGER
2985 * The order of the elementary reflector.
2986 *
2987 * ALPHA (input/output) DOUBLE PRECISION
2988 * On entry, the value alpha.
2989 * On exit, it is overwritten with the value beta.
2990 *
2991 * X (input/output) DOUBLE PRECISION array, dimension
2992 * (1+(N-2)*abs(INCX))
2993 * On entry, the vector x.
2994 * On exit, it is overwritten with the vector v.
2995 *
2996 * INCX (input) INTEGER
2997 * The increment between elements of X. INCX <> 0.
2998 *
2999 * TAU (output) DOUBLE PRECISION
3000 * The value tau.
3001 *
3002 * =====================================================================
3003 *
3004 * .. Parameters ..*/
3005 #undef one
3006 #define one 1.0e+0
3007 #undef zero
3008 #define zero 0.0e+0
3009 /** ..
3010 * .. Local Scalars ..*/
3011 long j, knt;
3012 double beta, rsafmn, safmin, xnorm;
3013 /** ..
3014 * .. Intrinsic Functions ..*/
3015 /* intrinsic abs, sign;*/
3016 /** ..
3017 * .. Executable Statements ..
3018 **/
3019 /*-----implicit-declarations-----*/
3020 /*-----end-of-declarations-----*/
3021 if( n<=1 ) {
3022 *tau = zero;
3023 return;
3024 }
3025
3026 xnorm = cblas_dnrm2( n-1, x, incx );
3027
3028 if( xnorm==zero ) {
3029 /**
3030 * H = I
3031 **/
3032 *tau = zero;
3033 } else {
3034 /**
3035 * general case
3036 **/
3037 beta = -sign( dlapy2( *alpha, xnorm ), *alpha );
3038 safmin = dlamch( 's' );
3039 if( abs( beta )<safmin ) {
3040 /**
3041 * XNORM, BETA may be inaccurate; scale X and recompute them
3042 **/
3043 rsafmn = one / safmin;
3044 knt = 0;
3045 L_10:
3046 knt = knt + 1;
3047 cblas_dscal( n-1, rsafmn, x, incx );
3048 beta = beta*rsafmn;
3049 *alpha = *alpha*rsafmn;
3050 if( abs( beta )<safmin )
3051 goto L_10;
3052 /**
3053 * New BETA is at most 1, at least SAFMIN
3054 **/
3055 xnorm = cblas_dnrm2( n-1, x, incx );
3056 beta = -sign( dlapy2( *alpha, xnorm ), *alpha );
3057 *tau = ( beta-*alpha ) / beta;
3058 cblas_dscal( n-1, one / ( *alpha-beta ), x, incx );
3059 /**
3060 * If ALPHA is subnormal, it may lose relative accuracy
3061 **/
3062 *alpha = beta;
3063 for (j=1 ; j<=knt ; j+=1) {
3064 *alpha = *alpha*safmin;
3065 }
3066 } else {
3067 *tau = ( beta-*alpha ) / beta;
3068 cblas_dscal( n-1, one / ( *alpha-beta ), x, incx );
3069 *alpha = beta;
3070 }
3071 }
3072
3073 return;
3074 /**
3075 * End of DLARFG
3076 **/
3077 }
3078
3079
3080
dlapy2(double x,double y)3081 double dlapy2( double x, double y )
3082 {
3083 /**
3084 * -- LAPACK auxiliary routine (version 1.1) --
3085 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3086 * Courant Institute, Argonne National Lab, and Rice University
3087 * October 31, 1992
3088 *
3089 * .. Scalar Arguments ..*/
3090 double dlapy2_R;
3091 /** ..
3092 *
3093 * Purpose
3094 * =======
3095 *
3096 * DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
3097 * overflow.
3098 *
3099 * Arguments
3100 * =========
3101 *
3102 * X (input) DOUBLE PRECISION
3103 * Y (input) DOUBLE PRECISION
3104 * X and Y specify the values x and y.
3105 *
3106 * =====================================================================
3107 *
3108 * .. Parameters ..*/
3109 #undef zero
3110 #define zero 0.0e0
3111 #undef one
3112 #define one 1.0e0
3113 /** ..
3114 * .. Local Scalars ..*/
3115 double w, xabs, yabs, z;
3116 /** ..
3117 * .. Intrinsic Functions ..*/
3118 /* intrinsic abs, max, min, sqrt;*/
3119 /** ..
3120 * .. Executable Statements ..
3121 **/
3122 /*-----implicit-declarations-----*/
3123 /*-----end-of-declarations-----*/
3124 xabs = abs( x );
3125 yabs = abs( y );
3126 w = max( xabs, yabs );
3127 z = min( xabs, yabs );
3128 if( z==zero ) {
3129 dlapy2_R = w;
3130 } else {
3131 dlapy2_R = w*sqrt( one+( z / w )*( z / w ) );
3132 }
3133 return dlapy2_R;
3134 /**
3135 * End of DLAPY2
3136 **/
3137 }
3138
3139
3140
dlascl(char type,long kl,long ku,double cfrom,double cto,long m,long n,double a[],long lda,long * info)3141 void dlascl( char type, long kl, long ku, double cfrom, double cto,
3142 long m, long n, double a[], long lda, long *info )
3143 {
3144 /**
3145 * -- LAPACK auxiliary routine (version 1.1) --
3146 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3147 * Courant Institute, Argonne National Lab, and Rice University
3148 * February 29, 1992
3149 *
3150 * .. Scalar Arguments ..*/
3151 /** ..
3152 * .. Array Arguments ..*/
3153 #undef a_2
3154 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3155 /** ..
3156 *
3157 * Purpose
3158 * =======
3159 *
3160 * DLASCL multiplies the M by N real matrix A by the real scalar
3161 * CTO/CFROM. This is done without over/underflow as long as the final
3162 * result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
3163 * A may be full, upper triangular, lower triangular, upper Hessenberg,
3164 * or banded.
3165 *
3166 * Arguments
3167 * =========
3168 *
3169 * TYPE (input) CHARACTER*1
3170 * TYPE indices the storage type of the input matrix.
3171 * = 'G': A is a full matrix.
3172 * = 'L': A is a lower triangular matrix.
3173 * = 'U': A is an upper triangular matrix.
3174 * = 'H': A is an upper Hessenberg matrix.
3175 * = 'B': A is a symmetric band matrix with lower bandwidth KL
3176 * and upper bandwidth KU and with the only the lower
3177 * half stored.
3178 * = 'Q': A is a symmetric band matrix with lower bandwidth KL
3179 * and upper bandwidth KU and with the only the upper
3180 * half stored.
3181 * = 'Z': A is a band matrix with lower bandwidth KL and upper
3182 * bandwidth KU.
3183 *
3184 * KL (input) INTEGER
3185 * The lower bandwidth of A. Referenced only if TYPE = 'B',
3186 * 'Q' or 'Z'.
3187 *
3188 * KU (input) INTEGER
3189 * The upper bandwidth of A. Referenced only if TYPE = 'B',
3190 * 'Q' or 'Z'.
3191 *
3192 * CFROM (input) DOUBLE PRECISION
3193 * CTO (input) DOUBLE PRECISION
3194 * The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
3195 * without over/underflow if the final result CTO*A(I,J)/CFROM
3196 * can be represented without over/underflow. CFROM must be
3197 * nonzero.
3198 *
3199 * M (input) INTEGER
3200 * The number of rows of the matrix A. M >= 0.
3201 *
3202 * N (input) INTEGER
3203 * The number of columns of the matrix A. N >= 0.
3204 *
3205 * A (input/output) DOUBLE PRECISION array, dimension (LDA,M)
3206 * The matrix to be multiplied by CTO/CFROM. See TYPE for the
3207 * storage type.
3208 *
3209 * LDA (input) INTEGER
3210 * The leading dimension of the array A. LDA >= max(1,M).
3211 *
3212 * INFO (output) INTEGER
3213 * 0 - successful exit
3214 * <0 - if INFO = -i, the i-th argument had an illegal value.
3215 *
3216 * =====================================================================
3217 *
3218 * .. Parameters ..*/
3219 #undef zero
3220 #define zero 0.0e0
3221 #undef one
3222 #define one 1.0e0
3223 /** ..
3224 * .. Local Scalars ..*/
3225 int done;
3226 long i, itype, j, k1, k2, k3, k4;
3227 double bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum;
3228 /** ..
3229 * .. Intrinsic Functions ..*/
3230 /* intrinsic abs, max, min;*/
3231 /** ..
3232 * .. Executable Statements ..
3233 *
3234 * Test the input arguments
3235 **/
3236 /*-----implicit-declarations-----*/
3237 /*-----end-of-declarations-----*/
3238 *info = 0;
3239
3240 if( lsame( type, 'g' ) ) {
3241 itype = 0;
3242 } else if( lsame( type, 'l' ) ) {
3243 itype = 1;
3244 } else if( lsame( type, 'u' ) ) {
3245 itype = 2;
3246 } else if( lsame( type, 'h' ) ) {
3247 itype = 3;
3248 } else if( lsame( type, 'b' ) ) {
3249 itype = 4;
3250 } else if( lsame( type, 'q' ) ) {
3251 itype = 5;
3252 } else if( lsame( type, 'z' ) ) {
3253 itype = 6;
3254 } else {
3255 itype = -1;
3256 }
3257
3258 if( itype==-1 ) {
3259 *info = -1;
3260 } else if( cfrom==zero ) {
3261 *info = -4;
3262 } else if( m<0 ) {
3263 *info = -6;
3264 } else if( n<0 || ( itype==4 && n!=m ) ||
3265 ( itype==5 && n!=m ) ) {
3266 *info = -7;
3267 } else if( itype<=3 && lda<max( 1, m ) ) {
3268 *info = -9;
3269 } else if( itype>=4 ) {
3270 if( kl<0 || kl>max( m-1, 0 ) ) {
3271 *info = -2;
3272 } else if( ku<0 || ku>max( n-1, 0 ) ||
3273 ( ( itype==4 || itype==5 ) && kl!=ku ) )
3274 {
3275 *info = -3;
3276 } else if( ( itype==4 && lda<kl+1 ) ||
3277 ( itype==5 && lda<ku+1 ) ||
3278 ( itype==6 && lda<2*kl+ku+1 ) ) {
3279 *info = -9;
3280 }
3281 }
3282
3283 if( *info!=0 ) {
3284 xerbla( "dlascl", -*info );
3285 return;
3286 }
3287 /**
3288 * Quick return if possible
3289 **/
3290 if( n==0 || m==0 )
3291 return;
3292 /**
3293 * Get machine parameters
3294 **/
3295 smlnum = dlamch( 's' );
3296 bignum = one / smlnum;
3297
3298 cfromc = cfrom;
3299 ctoc = cto;
3300
3301 L_10:
3302 cfrom1 = cfromc*smlnum;
3303 cto1 = ctoc / bignum;
3304 if( abs( cfrom1 )>abs( ctoc ) && ctoc!=zero ) {
3305 mul = smlnum;
3306 done = 0;
3307 cfromc = cfrom1;
3308 } else if( abs( cto1 )>abs( cfromc ) ) {
3309 mul = bignum;
3310 done = 0;
3311 ctoc = cto1;
3312 } else {
3313 mul = ctoc / cfromc;
3314 done = 1;
3315 }
3316
3317 if( itype==0 ) {
3318 /**
3319 * Full matrix
3320 **/
3321 for (j=1 ; j<=n ; j+=1) {
3322 for (i=1 ; i<=m ; i+=1) {
3323 a_2( i, j ) = a_2( i, j )*mul;
3324 }
3325 }
3326
3327 } else if( itype==1 ) {
3328 /**
3329 * Lower triangular matrix
3330 **/
3331 for (j=1 ; j<=n ; j+=1) {
3332 for (i=j ; i<=m ; i+=1) {
3333 a_2( i, j ) = a_2( i, j )*mul;
3334 }
3335 }
3336
3337 } else if( itype==2 ) {
3338 /**
3339 * Upper triangular matrix
3340 **/
3341 for (j=1 ; j<=n ; j+=1) {
3342 for (i=1 ; i<=min( j, m ) ; i+=1) {
3343 a_2( i, j ) = a_2( i, j )*mul;
3344 }
3345 }
3346
3347 } else if( itype==3 ) {
3348 /**
3349 * Upper Hessenberg matrix
3350 **/
3351 for (j=1 ; j<=n ; j+=1) {
3352 for (i=1 ; i<=min( j+1, m ) ; i+=1) {
3353 a_2( i, j ) = a_2( i, j )*mul;
3354 }
3355 }
3356
3357 } else if( itype==4 ) {
3358 /**
3359 * Lower half of a symmetric band matrix
3360 **/
3361 k3 = kl + 1;
3362 k4 = n + 1;
3363 for (j=1 ; j<=n ; j+=1) {
3364 for (i=1 ; i<=min( k3, k4-j ) ; i+=1) {
3365 a_2( i, j ) = a_2( i, j )*mul;
3366 }
3367 }
3368
3369 } else if( itype==5 ) {
3370 /**
3371 * Upper half of a symmetric band matrix
3372 **/
3373 k1 = ku + 2;
3374 k3 = ku + 1;
3375 for (j=1 ; j<=n ; j+=1) {
3376 for (i=max( k1-j, 1 ) ; i<=k3 ; i+=1) {
3377 a_2( i, j ) = a_2( i, j )*mul;
3378 }
3379 }
3380
3381 } else if( itype==6 ) {
3382 /**
3383 * Band matrix
3384 **/
3385 k1 = kl + ku + 2;
3386 k2 = kl + 1;
3387 k3 = 2*kl + ku + 1;
3388 k4 = kl + ku + 1 + m;
3389 for (j=1 ; j<=n ; j+=1) {
3390 for (i=max( k1-j, k2 ) ; i<=min( k3, k4-j ) ; i+=1) {
3391 a_2( i, j ) = a_2( i, j )*mul;
3392 }
3393 }
3394
3395 }
3396
3397 if( !done )
3398 goto L_10;
3399
3400 return;
3401 /**
3402 * End of DLASCL
3403 **/
3404 }
3405 /**
3406 ************************************************************************
3407 **/
3408
3409
3410
dlange(char norm,long m,long n,double a[],long lda,double work[])3411 double dlange( char norm, long m, long n, double a[], long lda, double work[] )
3412 {
3413 /**
3414 * -- LAPACK auxiliary routine (version 1.1) --
3415 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3416 * Courant Institute, Argonne National Lab, and Rice University
3417 * October 31, 1992
3418 *
3419 * .. Scalar Arguments ..*/
3420 double dlange_R;
3421 /** ..
3422 * .. Array Arguments ..*/
3423 #undef work_1
3424 #define work_1(a1) work[a1-1]
3425 #undef a_2
3426 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3427 /** ..
3428 *
3429 * Purpose
3430 * =======
3431 *
3432 * DLANGE returns the value of the one norm, or the Frobenius norm, or
3433 * the infinity norm, or the element of largest absolute value of a
3434 * real matrix A.
3435 *
3436 * Description
3437 * ===========
3438 *
3439 * DLANGE returns the value
3440 *
3441 * DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
3442 * (
3443 * ( norm1(A), NORM = '1', 'O' or 'o'
3444 * (
3445 * ( normI(A), NORM = 'I' or 'i'
3446 * (
3447 * ( normF(A), NORM = 'F', 'f', 'E' or 'e'
3448 *
3449 * where norm1 denotes the one norm of a matrix (maximum column sum),
3450 * normI denotes the infinity norm of a matrix (maximum row sum) and
3451 * normF denotes the Frobenius norm of a matrix (square root of sum of
3452 * squares). Note that max(abs(A(i,j))) is not a matrix norm.
3453 *
3454 * Arguments
3455 * =========
3456 *
3457 * NORM (input) CHARACTER*1
3458 * Specifies the value to be returned in DLANGE as described
3459 * above.
3460 *
3461 * M (input) INTEGER
3462 * The number of rows of the matrix A. M >= 0. When M = 0,
3463 * DLANGE is set to zero.
3464 *
3465 * N (input) INTEGER
3466 * The number of columns of the matrix A. N >= 0. When N = 0,
3467 * DLANGE is set to zero.
3468 *
3469 * A (input) DOUBLE PRECISION array, dimension (LDA,N)
3470 * The m by n matrix A.
3471 *
3472 * LDA (input) INTEGER
3473 * The leading dimension of the array A. LDA >= max(M,1).
3474 *
3475 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK),
3476 * where LWORK >= M when NORM = 'I'; otherwise, WORK is not
3477 * referenced.
3478 *
3479 * =====================================================================
3480 *
3481 * .. Parameters ..*/
3482 #undef one
3483 #define one 1.0e+0
3484 #undef zero
3485 #define zero 0.0e+0
3486 /** ..
3487 * .. Local Scalars ..*/
3488 long i, j;
3489 double scale, sum, value=0.0;
3490 /** ..
3491 * .. Intrinsic Functions ..*/
3492 /* intrinsic abs, max, min, sqrt;*/
3493 /** ..
3494 * .. Executable Statements ..
3495 **/
3496 /*-----implicit-declarations-----*/
3497 /*-----end-of-declarations-----*/
3498 if( min( m, n )==0 ) {
3499 value = zero;
3500 } else if( lsame( norm, 'm' ) ) {
3501 /**
3502 * Find max(abs(A(i,j))).
3503 **/
3504 value = zero;
3505 for (j=1 ; j<=n ; j+=1) {
3506 for (i=1 ; i<=m ; i+=1) {
3507 value = max( value, abs( a_2( i, j ) ) );
3508 }
3509 }
3510 } else if( ( lsame( norm, 'o' ) ) || ( norm=='1' ) ) {
3511 /**
3512 * Find norm1(A).
3513 **/
3514 value = zero;
3515 for (j=1 ; j<=n ; j+=1) {
3516 sum = zero;
3517 for (i=1 ; i<=m ; i+=1) {
3518 sum = sum + abs( a_2( i, j ) );
3519 }
3520 value = max( value, sum );
3521 }
3522 } else if( lsame( norm, 'i' ) ) {
3523 /**
3524 * Find normI(A).
3525 **/
3526 for (i=1 ; i<=m ; i+=1) {
3527 work_1( i ) = zero;
3528 }
3529 for (j=1 ; j<=n ; j+=1) {
3530 for (i=1 ; i<=m ; i+=1) {
3531 work_1( i ) = work_1( i ) + abs( a_2( i, j ) );
3532 }
3533 }
3534 value = zero;
3535 for (i=1 ; i<=m ; i+=1) {
3536 value = max( value, work_1( i ) );
3537 }
3538 } else if( ( lsame( norm, 'f' ) ) || ( lsame( norm, 'e' ) ) ) {
3539 /**
3540 * Find normF(A).
3541 **/
3542 scale = zero;
3543 sum = one;
3544 for (j=1 ; j<=n ; j+=1) {
3545 dlassq( m, &a_2( 1, j ), 1, &scale, &sum );
3546 }
3547 value = scale*sqrt( sum );
3548 }
3549
3550 dlange_R = value;
3551 return dlange_R;
3552 /**
3553 * End of DLANGE
3554 **/
3555 }
3556
3557
3558
dlassq(long n,double x[],long incx,double * scale,double * sumsq)3559 void dlassq( long n, double x[], long incx, double *scale, double *sumsq )
3560 {
3561 /**
3562 * -- LAPACK auxiliary routine (version 1.1) --
3563 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3564 * Courant Institute, Argonne National Lab, and Rice University
3565 * October 31, 1992
3566 *
3567 * .. Scalar Arguments ..*/
3568 /** ..
3569 * .. Array Arguments ..*/
3570 #undef x_1
3571 #define x_1(a1) x[a1-1]
3572 /** ..
3573 *
3574 * Purpose
3575 * =======
3576 *
3577 * DLASSQ returns the values scl and smsq such that
3578 *
3579 * ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
3580 *
3581 * where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is
3582 * assumed to be non-negative and scl returns the value
3583 *
3584 * scl = max( scale, abs( x( i ) ) ).
3585 *
3586 * scale and sumsq must be supplied in SCALE and SUMSQ and
3587 * scl and smsq are overwritten on SCALE and SUMSQ respectively.
3588 *
3589 * The routine makes only one pass through the vector x.
3590 *
3591 * Arguments
3592 * =========
3593 *
3594 * N (input) INTEGER
3595 * The number of elements to be used from the vector X.
3596 *
3597 * X (input) DOUBLE PRECISION
3598 * The vector for which a scaled sum of squares is computed.
3599 * x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
3600 *
3601 * INCX (input) INTEGER
3602 * The increment between successive values of the vector X.
3603 * INCX > 0.
3604 *
3605 * SCALE (input/output) DOUBLE PRECISION
3606 * On entry, the value scale in the equation above.
3607 * On exit, SCALE is overwritten with scl , the scaling factor
3608 * for the sum of squares.
3609 *
3610 * SUMSQ (input/output) DOUBLE PRECISION
3611 * On entry, the value sumsq in the equation above.
3612 * On exit, SUMSQ is overwritten with smsq , the basic sum of
3613 * squares from which scl has been factored out.
3614 *
3615 * =====================================================================
3616 *
3617 * .. Parameters ..*/
3618 #undef zero
3619 #define zero 0.0e+0
3620 /** ..
3621 * .. Local Scalars ..*/
3622 long ix;
3623 double absxi;
3624 /** ..
3625 * .. Intrinsic Functions ..*/
3626 /* intrinsic abs;*/
3627 /** ..
3628 * .. Executable Statements ..
3629 **/
3630 /*-----implicit-declarations-----*/
3631 /*-----end-of-declarations-----*/
3632 if( n>0 ) {
3633 for (ix=1 ; incx>0?ix<=1 + ( n-1 )*incx:ix>=1 + ( n-1 )*incx ; ix+=incx) {
3634 if( x_1( ix )!=zero ) {
3635 absxi = abs( x_1( ix ) );
3636 if( *scale<absxi ) {
3637 *sumsq = 1 + *sumsq*( *scale / absxi )*( *scale / absxi );
3638 *scale = absxi;
3639 } else {
3640 *sumsq = *sumsq + ( absxi / *scale )*( absxi / *scale );
3641 }
3642 }
3643 }
3644 }
3645 return;
3646 /**
3647 * End of DLASSQ
3648 **/
3649 }
3650
3651
3652
dlaset(char uplo,long m,long n,double alpha,double beta,double a[],long lda)3653 void dlaset( char uplo, long m, long n, double alpha, double beta,
3654 double a[], long lda )
3655 {
3656 /**
3657 * -- LAPACK auxiliary routine (version 1.1) --
3658 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3659 * Courant Institute, Argonne National Lab, and Rice University
3660 * October 31, 1992
3661 *
3662 * .. Scalar Arguments ..*/
3663 /** ..
3664 * .. Array Arguments ..*/
3665 #undef a_2
3666 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3667 /** ..
3668 *
3669 * Purpose
3670 * =======
3671 *
3672 * DLASET initializes an m-by-n matrix A to BETA on the diagonal and
3673 * ALPHA on the offdiagonals.
3674 *
3675 * Arguments
3676 * =========
3677 *
3678 * UPLO (input) CHARACTER*1
3679 * Specifies the part of the matrix A to be set.
3680 * = 'U': Upper triangular part is set; the strictly lower
3681 * triangular part of A is not changed.
3682 * = 'L': Lower triangular part is set; the strictly upper
3683 * triangular part of A is not changed.
3684 * Otherwise: All of the matrix A is set.
3685 *
3686 * M (input) INTEGER
3687 * The number of rows of the matrix A. M >= 0.
3688 *
3689 * N (input) INTEGER
3690 * The number of columns of the matrix A. N >= 0.
3691 *
3692 * ALPHA (input) DOUBLE PRECISION
3693 * The constant to which the offdiagonal elements are to be set.
3694 *
3695 * BETA (input) DOUBLE PRECISION
3696 * The constant to which the diagonal elements are to be set.
3697 *
3698 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
3699 * On exit, the leading m-by-n submatrix of A is set as follows:
3700 *
3701 * if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
3702 * if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
3703 * otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
3704 *
3705 * and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
3706 *
3707 * LDA (input) INTEGER
3708 * The leading dimension of the array A. LDA >= max(1,M).
3709 *
3710 * =====================================================================
3711 *
3712 * .. Local Scalars ..*/
3713 long i, j;
3714 /** ..
3715 * .. Intrinsic Functions ..*/
3716 /* intrinsic min;*/
3717 /** ..
3718 * .. Executable Statements ..
3719 **/
3720 /*-----implicit-declarations-----*/
3721 /*-----end-of-declarations-----*/
3722 if( lsame( uplo, 'u' ) ) {
3723 /**
3724 * Set the strictly upper triangular or trapezoidal part of the
3725 * array to ALPHA.
3726 **/
3727 for (j=2 ; j<=n ; j+=1) {
3728 for (i=1 ; i<=min( j-1, m ) ; i+=1) {
3729 a_2( i, j ) = alpha;
3730 }
3731 }
3732
3733 } else if( lsame( uplo, 'l' ) ) {
3734 /**
3735 * Set the strictly lower triangular or trapezoidal part of the
3736 * array to ALPHA.
3737 **/
3738 for (j=1 ; j<=min( m, n ) ; j+=1) {
3739 for (i=j + 1 ; i<=m ; i+=1) {
3740 a_2( i, j ) = alpha;
3741 }
3742 }
3743
3744 } else {
3745 /**
3746 * Set the leading m-by-n submatrix to ALPHA.
3747 **/
3748 for (j=1 ; j<=n ; j+=1) {
3749 for (i=1 ; i<=m ; i+=1) {
3750 a_2( i, j ) = alpha;
3751 }
3752 }
3753 }
3754 /**
3755 * Set the first min(M,N) diagonal elements to BETA.
3756 **/
3757 for (i=1 ; i<=min( m, n ) ; i+=1) {
3758 a_2( i, i ) = beta;
3759 }
3760
3761 return;
3762 /**
3763 * End of DLASET
3764 **/
3765 }
3766