1 /*
2 * $Id: dgelss.c,v 1.1 2005-09-18 22:04:43 dhmunro Exp $
3 * LAPACK matrix solver using SVD.
4 */
5 /* Copyright (c) 2005, The Regents of the University of California.
6 * All rights reserved.
7 * This file is part of yorick (http://yorick.sourceforge.net).
8 * Read the accompanying LICENSE file for details.
9 */
10
11 #include "dg.h"
12
13
14 /*---blas routines---*/
15 /* dcopy dgemv dgemm */
16
17
18 /*-----Fortran intrinsics converted-----*/
19 #define min(x,y) ((x)<(y)?(x):(y))
20 #define max(x,y) ((x)>(y)?(x):(y))
21 /*-----end of Fortran intrinsics-----*/
22
23
24
dgelss(long m,long n,long nrhs,double a[],long lda,double b[],long ldb,double s[],double rcond,long * rank,double work[],long lwork,long * info)25 void dgelss( long m, long n, long nrhs, double a[], long lda,
26 double b[], long ldb, double s[], double rcond,
27 long *rank,double work[], long lwork, long *info )
28 {
29 /**
30 * -- LAPACK driver routine (version 1.1) --
31 * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
32 * Courant Institute, Argonne National Lab, and Rice University
33 * March 31, 1993
34 *
35 * .. Scalar Arguments ..*/
36 /** ..
37 * .. Array Arguments ..*/
38 #undef work_1
39 #define work_1(a1) work[a1-1]
40 #undef s_1
41 #define s_1(a1) s[a1-1]
42 #undef b_2
43 #define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
44 #undef a_2
45 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
46 /** ..
47 *
48 * Purpose
49 * =======
50 *
51 * DGELSS computes the minimum norm solution to a real linear least
52 * squares problem:
53 *
54 * Minimize 2-norm(| b - A*x |).
55 *
56 * using the singular value decomposition (SVD) of A. A is an M-by-N
57 * matrix which may be rank-deficient.
58 *
59 * Several right hand side vectors b and solution vectors x can be
60 * handled in a single call; they are stored as the columns of the
61 * M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
62 * X.
63 *
64 * The effective rank of A is determined by treating as zero those
65 * singular values which are less than RCOND times the largest singular
66 * value.
67 *
68 * Arguments
69 * =========
70 *
71 * M (input) INTEGER
72 * The number of rows of the matrix A. M >= 0.
73 *
74 * N (input) INTEGER
75 * The number of columns of the matrix A. N >= 0.
76 *
77 * NRHS (input) INTEGER
78 * The number of right hand sides, i.e., the number of columns
79 * of the matrices B and X. NRHS >= 0.
80 *
81 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
82 * On entry, the M-by-N matrix A.
83 * On exit, the first min(m,n) rows of A are overwritten with
84 * its right singular vectors, stored rowwise.
85 *
86 * LDA (input) INTEGER
87 * The leading dimension of the array A. LDA >= max(1,M).
88 *
89 * B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
90 * On entry, the M-by-NRHS right hand side matrix B.
91 * On exit, B is overwritten by the N-by-NRHS solution
92 * matrix X. If m >= n and RANK = n, the residual
93 * sum-of-squares for the solution in the i-th column is given
94 * by the sum of squares of elements n+1:m in that column.
95 *
96 * LDB (input) INTEGER
97 * The leading dimension of the array B. LDB >= max(1,MAX(M,N)).
98 *
99 * S (output) DOUBLE PRECISION array, dimension (min(M,N))
100 * The singular values of A in decreasing order.
101 * The condition number of A in the 2-norm = S(1)/S(min(m,n)).
102 *
103 * RCOND (input) DOUBLE PRECISION
104 * RCOND is used to determine the effective rank of A.
105 * Singular values S(i) <= RCOND*S(1) are treated as zero.
106 * If RCOND $<$ 0, machine precision is used instead.
107 *
108 * RANK (output) INTEGER
109 * The effective rank of A, i.e., the number of singular values
110 * which are greater than RCOND*S(1).
111 *
112 * WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
113 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
114 *
115 * LWORK (input) INTEGER
116 * The dimension of the array WORK. LWORK >= 1, and also:
117 * LWORK >= 3*N+MAX(2*N,NRHS,M) if M >= N,
118 * LWORK >= 3*M+MAX(2*M,NRHS,N) if M < N.
119 * For good performance, LWORK should generally be larger.
120 *
121 * INFO (output) INTEGER
122 * = 0: successful exit
123 * < 0: if INFO = -i, the i-th argument had an illegal value.
124 * > 0: the algorithm for computing the SVD failed to converge;
125 * if INFO = i, i off-diagonal elements of an intermediate
126 * bidiagonal form did not converge to zero.
127 *
128 * =====================================================================
129 *
130 * .. Parameters ..*/
131 #undef zero
132 #define zero 0.0e0
133 #undef one
134 #define one 1.0e0
135 /** ..
136 * .. Local Scalars ..*/
137 long bl, chunk, i, iascl, ibscl, ie, il, itau,
138 itaup, itauq, iwork, ldwork, maxmn, maxwrk=0,
139 minmn, minwrk, mm, mnthr;
140 double anrm, bignum, bnrm, eps, sfmin, smlnum, thr;
141 /** ..
142 * .. Local Arrays ..*/
143 double vdum[1];
144 #undef vdum_1
145 #define vdum_1(a1) [a1-1]
146 /** ..
147 * .. Intrinsic Functions ..*/
148 /* intrinsic max, min;*/
149 /** ..
150 * .. Executable Statements ..
151 *
152 * Test the input arguments
153 **/
154 /*-----implicit-declarations-----*/
155 /*-----end-of-declarations-----*/
156 *info = 0;
157 minmn = min( m, n );
158 maxmn = max( m, n );
159 mnthr = ilaenv( 6, "dgelss", " ", m, n, nrhs, -1 );
160 if( m<0 ) {
161 *info = -1;
162 } else if( n<0 ) {
163 *info = -2;
164 } else if( nrhs<0 ) {
165 *info = -3;
166 } else if( lda<max( 1, m ) ) {
167 *info = -5;
168 } else if( ldb<max( 1, maxmn ) ) {
169 *info = -7;
170 }
171 /**
172 * Compute workspace
173 * (Note: Comments in the code beginning "Workspace:" describe the
174 * minimal amount of workspace needed at that point in the code,
175 * as well as the preferred amount for good performance.
176 * NB refers to the optimal block size for the immediately
177 * following subroutine, as returned by ILAENV.)
178 **/
179 minwrk = 1;
180 if( *info==0 && lwork>=1 ) {
181 maxwrk = 0;
182 mm = m;
183 if( m>=n && m>=mnthr ) {
184 /**
185 * Path 1a - overdetermined, with many more rows than columns
186 **/
187 mm = n;
188 maxwrk = max( maxwrk, n+n*ilaenv( 1, "dgeqrf", " ", m, n,
189 -1, -1 ) );
190 maxwrk = max( maxwrk, n+nrhs*
191 ilaenv( 1, "dormqr", "lt", m, nrhs, n, -1 ) );
192 }
193 if( m>=n ) {
194 /**
195 * Path 1 - overdetermined or exactly determined
196 **/
197 maxwrk = max( maxwrk, 3*n+( mm+n )*
198 ilaenv( 1, "dgebrd", " ", mm, n, -1, -1 ) );
199 maxwrk = max( maxwrk, 3*n+nrhs*
200 ilaenv( 1, "dormbr", "qlt", mm, nrhs, n, -1 ) );
201 maxwrk = max( maxwrk, 3*n+( n-1 )*
202 ilaenv( 1, "dorgbr", "p", n, n, n, -1 ) );
203 maxwrk = max( maxwrk, 5*n-4 );
204 maxwrk = max( maxwrk, n*nrhs );
205 minwrk = max( max(3*n+mm, 3*n+nrhs), 5*n-4 );
206 }
207 if( n>m ) {
208 minwrk = max( max(3*m+nrhs, 3*m+n), 5*m-4 );
209 if( n>=mnthr ) {
210 /**
211 * Path 2a - underdetermined, with many more columns
212 * than rows
213 **/
214 maxwrk = m + m*ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
215 maxwrk = max( maxwrk, m*m+4*m+2*m*
216 ilaenv( 1, "dgebrd", " ", m, m, -1, -1 ) );
217 maxwrk = max( maxwrk, m*m+4*m+nrhs*
218 ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
219 maxwrk = max( maxwrk, m*m+4*m+( m-1 )*
220 ilaenv( 1, "dorgbr", "p", m, m, m, -1 ) );
221 maxwrk = max( maxwrk, m*m+6*m-4 );
222 if( nrhs>1 ) {
223 maxwrk = max( maxwrk, m*m+m+m*nrhs );
224 } else {
225 maxwrk = max( maxwrk, m*m+2*m );
226 }
227 maxwrk = max( maxwrk, m+nrhs*
228 ilaenv( 1, "dormlq", "lt", n, nrhs, m, -1 ) );
229 } else {
230 /**
231 * Path 2 - underdetermined
232 **/
233 maxwrk = 3*m + ( n+m )*ilaenv( 1, "dgebrd", " ", m, n,
234 -1, -1 );
235 maxwrk = max( maxwrk, 3*m+nrhs*
236 ilaenv( 1, "dormbr", "qlt", m, nrhs, m, -1 ) );
237 maxwrk = max( maxwrk, 3*m+m*
238 ilaenv( 1, "dorgbr", "p", m, n, m, -1 ) );
239 maxwrk = max( maxwrk, 5*m-4 );
240 maxwrk = max( maxwrk, n*nrhs );
241 }
242 }
243 minwrk = min( minwrk, maxwrk );
244 work_1( 1 ) = maxwrk;
245 }
246
247 minwrk = max( minwrk, 1 );
248 if( lwork<minwrk )
249 *info = -12;
250 if( *info!=0 ) {
251 xerbla( "dgelss", -*info );
252 return;
253 }
254 /**
255 * Quick return if possible
256 **/
257 if( m==0 || n==0 ) {
258 *rank = 0;
259 return;
260 }
261 /**
262 * Get machine parameters
263 **/
264 eps = dlamch( 'p' );
265 sfmin = dlamch( 's' );
266 smlnum = sfmin / eps;
267 bignum = one / smlnum;
268 dlabad( &smlnum, &bignum );
269 /**
270 * Scale A if max entry outside range [SMLNUM,BIGNUM]
271 **/
272 anrm = dlange( 'm', m, n, a, lda, work );
273 iascl = 0;
274 if( anrm>zero && anrm<smlnum ) {
275 /**
276 * Scale matrix norm up to SMLNUM
277 **/
278 dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, info );
279 iascl = 1;
280 } else if( anrm>bignum ) {
281 /**
282 * Scale matrix norm down to BIGNUM
283 **/
284 dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, info );
285 iascl = 2;
286 } else if( anrm==zero ) {
287 /**
288 * Matrix all zero. Return zero solution.
289 **/
290 dlaset( 'f', max( m, n ), nrhs, zero, zero, b, ldb );
291 dlaset( 'f', minmn, 1, zero, zero, s, 1 );
292 *rank = 0;
293 goto L_70;
294 }
295 /**
296 * Scale B if max entry outside range [SMLNUM,BIGNUM]
297 **/
298 bnrm = dlange( 'm', m, nrhs, b, ldb, work );
299 ibscl = 0;
300 if( bnrm>zero && bnrm<smlnum ) {
301 /**
302 * Scale matrix norm up to SMLNUM
303 **/
304 dlascl( 'g', 0, 0, bnrm, smlnum, m, nrhs, b, ldb, info );
305 ibscl = 1;
306 } else if( bnrm>bignum ) {
307 /**
308 * Scale matrix norm down to BIGNUM
309 **/
310 dlascl( 'g', 0, 0, bnrm, bignum, m, nrhs, b, ldb, info );
311 ibscl = 2;
312 }
313 /**
314 * Overdetermined case
315 **/
316 if( m>=n ) {
317 /**
318 * Path 1 - overdetermined or exactly determined
319 **/
320 mm = m;
321 if( m>=mnthr ) {
322 /**
323 * Path 1a - overdetermined, with many more rows than columns
324 **/
325 mm = n;
326 itau = 1;
327 iwork = itau + n;
328 /**
329 * Compute A=Q*R
330 * (Workspace: need 2*N, prefer N+N*NB)
331 **/
332 dgeqrf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
333 lwork-iwork+1, info );
334 /**
335 * Multiply B by transpose(Q)
336 * (Workspace: need N+NRHS, prefer N+NRHS*NB)
337 **/
338 dormqr( 'l', 't', m, nrhs, n, a, lda, &work_1( itau ), b,
339 ldb, &work_1( iwork ), lwork-iwork+1, info );
340 /**
341 * Zero out below R
342 **/
343 if( n>1 )
344 dlaset( 'l', n-1, n-1, zero, zero, &a_2( 2, 1 ), lda );
345 }
346
347 ie = 1;
348 itauq = ie + n;
349 itaup = itauq + n;
350 iwork = itaup + n;
351 /**
352 * Bidiagonalize R in A
353 * (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
354 **/
355 dgebrd( mm, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
356 &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
357 info );
358 /**
359 * Multiply B by transpose of left bidiagonalizing vectors of R
360 * (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
361 **/
362 dormbr( 'q', 'l', 't', mm, nrhs, n, a, lda, &work_1( itauq ),
363 b, ldb, &work_1( iwork ), lwork-iwork+1, info );
364 /**
365 * Generate right bidiagonalizing vectors of R in A
366 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
367 **/
368 dorgbr( 'p', n, n, n, a, lda, &work_1( itaup ),
369 &work_1( iwork ), lwork-iwork+1, info );
370 iwork = ie + n;
371 /**
372 * Perform bidiagonal QR iteration
373 * multiply B by transpose of left singular vectors
374 * compute right singular vectors in A
375 * (Workspace: need 5*N-4)
376 **/
377 dbdsqr( 'u', n, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
378 1, b, ldb, &work_1( iwork ), info );
379 if( *info!=0 )
380 goto L_70;
381 /**
382 * Multiply B by reciprocals of singular values
383 **/
384 thr = max( rcond*s_1( 1 ), sfmin );
385 if( thr<zero )
386 thr = max( eps*s_1( 1 ), sfmin );
387 *rank = 0;
388 for (i=1 ; i<=n ; i+=1) {
389 if( s_1( i )>thr ) {
390 drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
391 *rank = *rank + 1;
392 } else {
393 dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
394 }
395 }
396 /**
397 * Multiply B by right singular vectors
398 * (Workspace: need N, prefer N*NRHS)
399 **/
400 if( lwork>=ldb*nrhs && nrhs>1 ) {
401 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, nrhs, n,
402 one, a, lda, b, ldb, zero, work, ldb );
403 dlacpy( 'g', n, nrhs, work, ldb, b, ldb );
404 } else if( nrhs>1 ) {
405 chunk = lwork / n;
406 for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
407 bl = min( nrhs-i+1, chunk );
408 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, bl, n,
409 one, a, lda, b, ldb, zero, work, n );
410 dlacpy( 'g', n, bl, work, n, b, ldb );
411 }
412 } else {
413 cblas_dgemv(CblasColMajor, CblasTrans, n, n, one, a, lda, b, 1, zero,
414 work, 1 );
415 cblas_dcopy( n, work, 1, b, 1 );
416 }
417
418 } else if( n>=mnthr && lwork>=4*m+m*m+
419 max( max(m, 2*m-4), max(nrhs, n-3*m) ) ) {
420 /**
421 * Path 2a - underdetermined, with many more columns than rows
422 * and sufficient workspace for an efficient algorithm
423 **/
424 ldwork = m;
425 if( lwork>=max( 4*m+m*lda+max( max(m, 2*m-4), max(nrhs, n-3*m) ),
426 m*lda+m+m*nrhs ) )ldwork = lda;
427 itau = 1;
428 iwork = m + 1;
429 /**
430 * Compute A=L*Q
431 * (Workspace: need 2*M, prefer M+M*NB)
432 **/
433 dgelqf( m, n, a, lda, &work_1( itau ), &work_1( iwork ),
434 lwork-iwork+1, info );
435 il = iwork;
436 /**
437 * Copy L to WORK(IL), zeroing out above it
438 **/
439 dlacpy( 'l', m, m, a, lda, &work_1( il ), ldwork );
440 dlaset( 'u', m-1, m-1, zero, zero, &work_1( il+ldwork ),
441 ldwork );
442 ie = il + ldwork*m;
443 itauq = ie + m;
444 itaup = itauq + m;
445 iwork = itaup + m;
446 /**
447 * Bidiagonalize L in WORK(IL)
448 * (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
449 **/
450 dgebrd( m, m, &work_1( il ), ldwork, s, &work_1( ie ),
451 &work_1( itauq ), &work_1( itaup ), &work_1( iwork ),
452 lwork-iwork+1, info );
453 /**
454 * Multiply B by transpose of left bidiagonalizing vectors of L
455 * (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
456 **/
457 dormbr( 'q', 'l', 't', m, nrhs, m, &work_1( il ), ldwork,
458 &work_1( itauq ), b, ldb, &work_1( iwork ),
459 lwork-iwork+1, info );
460 /**
461 * Generate right bidiagonalizing vectors of R in WORK(IL)
462 * (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
463 **/
464 dorgbr( 'p', m, m, m, &work_1( il ), ldwork, &work_1( itaup ),
465 &work_1( iwork ), lwork-iwork+1, info );
466 iwork = ie + m;
467 /**
468 * Perform bidiagonal QR iteration,
469 * computing right singular vectors of L in WORK(IL) and
470 * multiplying B by transpose of left singular vectors
471 * (Workspace: need M*M+6*M-4)
472 **/
473 dbdsqr( 'u', m, m, 0, nrhs, s, &work_1( ie ), &work_1( il ),
474 ldwork, a, lda, b, ldb, &work_1( iwork ), info );
475 if( *info!=0 )
476 goto L_70;
477 /**
478 * Multiply B by reciprocals of singular values
479 **/
480 thr = max( rcond*s_1( 1 ), sfmin );
481 if( thr<zero )
482 thr = max( eps*s_1( 1 ), sfmin );
483 *rank = 0;
484 for (i=1 ; i<=m ; i+=1) {
485 if( s_1( i )>thr ) {
486 drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
487 *rank = *rank + 1;
488 } else {
489 dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
490 }
491 }
492 iwork = ie;
493 /**
494 * Multiply B by right singular vectors of L in WORK(IL)
495 * (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
496 **/
497 if( lwork>=ldb*nrhs+iwork-1 && nrhs>1 ) {
498 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, nrhs, m,
499 one, &work_1( il ), ldwork, b, ldb, zero,
500 &work_1( iwork ), ldb );
501 dlacpy( 'g', m, nrhs, &work_1( iwork ), ldb, b, ldb );
502 } else if( nrhs>1 ) {
503 chunk = ( lwork-iwork+1 ) / m;
504 for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
505 bl = min( nrhs-i+1, chunk );
506 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, m, bl, m,
507 one, &work_1( il ), ldwork, &b_2( 1, i ), ldb, zero,
508 &work_1( iwork ), n );
509 dlacpy( 'g', m, bl, &work_1( iwork ), n, b, ldb );
510 }
511 } else {
512 cblas_dgemv(CblasColMajor, CblasTrans, m, m, one, &work_1( il ),
513 ldwork, &b_2( 1, 1 ), 1, zero, &work_1( iwork ), 1 );
514 cblas_dcopy( m, &work_1( iwork ), 1, &b_2( 1, 1 ), 1 );
515 }
516 /**
517 * Zero out below first M rows of B
518 **/
519 dlaset( 'f', n-m, nrhs, zero, zero, &b_2( m+1, 1 ), ldb );
520 iwork = itau + m;
521 /**
522 * Multiply transpose(Q) by B
523 * (Workspace: need M+NRHS, prefer M+NRHS*NB)
524 **/
525 dormlq( 'l', 't', n, nrhs, m, a, lda, &work_1( itau ), b,
526 ldb, &work_1( iwork ), lwork-iwork+1, info );
527
528 } else {
529 /**
530 * Path 2 - remaining underdetermined cases
531 **/
532 ie = 1;
533 itauq = ie + m;
534 itaup = itauq + m;
535 iwork = itaup + m;
536 /**
537 * Bidiagonalize A
538 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
539 **/
540 dgebrd( m, n, a, lda, s, &work_1( ie ), &work_1( itauq ),
541 &work_1( itaup ), &work_1( iwork ), lwork-iwork+1,
542 info );
543 /**
544 * Multiply B by transpose of left bidiagonalizing vectors
545 * (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
546 **/
547 dormbr( 'q', 'l', 't', m, nrhs, n, a, lda, &work_1( itauq ),
548 b, ldb, &work_1( iwork ), lwork-iwork+1, info );
549 /**
550 * Generate right bidiagonalizing vectors in A
551 * (Workspace: need 4*M, prefer 3*M+M*NB)
552 **/
553 dorgbr( 'p', m, n, m, a, lda, &work_1( itaup ),
554 &work_1( iwork ), lwork-iwork+1, info );
555 iwork = ie + m;
556 /**
557 * Perform bidiagonal QR iteration,
558 * computing right singular vectors of A in A and
559 * multiplying B by transpose of left singular vectors
560 * (Workspace: need 5*M-4)
561 **/
562 dbdsqr( 'l', m, n, 0, nrhs, s, &work_1( ie ), a, lda, vdum,
563 1, b, ldb, &work_1( iwork ), info );
564 if( *info!=0 )
565 goto L_70;
566 /**
567 * Multiply B by reciprocals of singular values
568 **/
569 thr = max( rcond*s_1( 1 ), sfmin );
570 if( thr<zero )
571 thr = max( eps*s_1( 1 ), sfmin );
572 *rank = 0;
573 for (i=1 ; i<=m ; i+=1) {
574 if( s_1( i )>thr ) {
575 drscl( nrhs, s_1( i ), &b_2( i, 1 ), ldb );
576 *rank = *rank + 1;
577 } else {
578 dlaset( 'f', 1, nrhs, zero, zero, &b_2( i, 1 ), ldb );
579 }
580 }
581 /**
582 * Multiply B by right singular vectors of A
583 * (Workspace: need N, prefer N*NRHS)
584 **/
585 if( lwork>=ldb*nrhs && nrhs>1 ) {
586 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, nrhs, m,
587 one, a, lda, b, ldb, zero, work, ldb );
588 dlacpy( 'f', n, nrhs, work, ldb, b, ldb );
589 } else if( nrhs>1 ) {
590 chunk = lwork / n;
591 for (i=1 ; chunk>0?i<=nrhs:i>=nrhs ; i+=chunk) {
592 bl = min( nrhs-i+1, chunk );
593 cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, bl, m, one,
594 a, lda, &b_2( 1, i ), ldb, zero, work, n );
595 dlacpy( 'f', n, bl, work, n, &b_2( 1, i ), ldb );
596 }
597 } else {
598 cblas_dgemv(CblasColMajor, CblasTrans, m, n, one, a, lda, b, 1, zero,
599 work, 1 );
600 cblas_dcopy( n, work, 1, b, 1 );
601 }
602 }
603 /**
604 * Undo scaling
605 **/
606 if( iascl==1 ) {
607 dlascl( 'g', 0, 0, anrm, smlnum, n, nrhs, b, ldb, info );
608 dlascl( 'g', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
609 info );
610 } else if( iascl==2 ) {
611 dlascl( 'g', 0, 0, anrm, bignum, n, nrhs, b, ldb, info );
612 dlascl( 'g', 0, 0, bignum, anrm, minmn, 1, s, minmn,
613 info );
614 }
615 if( ibscl==1 ) {
616 dlascl( 'g', 0, 0, smlnum, bnrm, n, nrhs, b, ldb, info );
617 } else if( ibscl==2 ) {
618 dlascl( 'g', 0, 0, bignum, bnrm, n, nrhs, b, ldb, info );
619 }
620
621 L_70:
622 work_1( 1 ) = maxwrk;
623 return;
624 /**
625 * End of DGELSS
626 **/
627 }
628