1 /*
2  * $Id: dgels.c,v 1.1 2005-09-18 22:04:43 dhmunro Exp $
3  * LAPACK routines to solve (in least squares sense) a matrix
4  * by QR or LQ decomposition.
5  */
6 /* Copyright (c) 2005, The Regents of the University of California.
7  * All rights reserved.
8  * This file is part of yorick (http://yorick.sourceforge.net).
9  * Read the accompanying LICENSE file for details.
10  */
11 
12 #include "dg.h"
13 
14 
15 /*---blas routines---*/
16 /* dcopy dnrm2 dscal dger dgemv dgemm dtrmv dtrmm dtrsm */
17 
18 
19 
20 /*---converted nutty string switches to single characters (lower case)---*/
21 #define lsame(x,y) ((x)==(y))
22 
23 
24 /*-----Fortran intrinsics converted-----*/
25 #define abs(x) ((x)>=0?(x):-(x))
26 extern double sqrt(double);
27 #define dble(x) ((double)x)
28 #define sign(x,y) ((((x)<0)!=((y)<0))?-(x):(x))
29 #define min(x,y) ((x)<(y)?(x):(y))
30 #define max(x,y) ((x)>(y)?(x):(y))
31 /*-----end of Fortran intrinsics-----*/
32 
33 
34 
dgels(char trans,long m,long n,long nrhs,double a[],long lda,double b[],long ldb,double work[],long lwork,long * info)35 void dgels( char trans, long m, long n, long nrhs,
36            double a[], long lda, double b[], long ldb,
37            double work[], long lwork,long *info )
38 {
39   /**
40    *  -- LAPACK driver routine (version 1.1) --
41    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
42    *     Courant Institute, Argonne National Lab, and Rice University
43    *     March 31, 1993
44    *
45    *     .. Scalar Arguments ..*/
46   /**     ..
47    *     .. Array Arguments ..*/
48 #undef work_1
49 #define work_1(a1) work[a1-1]
50 #undef b_2
51 #define b_2(a1,a2) b[a1-1+ldb*(a2-1)]
52 #undef a_2
53 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
54   /**     ..
55    *
56    *  Purpose
57    *  =======
58    *
59    *  DGELS solves overdetermined or underdetermined real linear systems
60    *  involving an M-by-N matrix A, or its transpose, using a QR or LQ
61    *  factorization of A.  It is assumed that A has full rank.
62    *
63    *  The following options are provided:
64    *
65    *  1. If TRANS = 'N' and m >= n:  find the least squares solution of
66    *     an overdetermined system, i.e., solve the least squares problem
67    *                  minimize || B - A*X ||.
68    *
69    *  2. If TRANS = 'N' and m < n:  find the minimum norm solution of
70    *     an underdetermined system A * X = B.
71    *
72    *  3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
73    *     an undetermined system A**T * X = B.
74    *
75    *  4. If TRANS = 'T' and m < n:  find the least squares solution of
76    *     an overdetermined system, i.e., solve the least squares problem
77    *                  minimize || B - A**T * X ||.
78    *
79    *  Several right hand side vectors b and solution vectors x can be
80    *  handled in a single call; they are stored as the columns of the
81    *  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
82    *  matrix X.
83    *
84    *  Arguments
85    *  =========
86    *
87    *  TRANS   (input) CHARACTER
88    *          = 'N': the linear system involves A;
89    *          = 'T': the linear system involves A**T.
90    *
91    *  M       (input) INTEGER
92    *          The number of rows of the matrix A.  M >= 0.
93    *
94    *  N       (input) INTEGER
95    *          The number of columns of the matrix A.  N >= 0.
96    *
97    *  NRHS    (input) INTEGER
98    *          The number of right hand sides, i.e., the number of
99    *          columns of the matrices B and X. NRHS >=0.
100    *
101    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
102    *          On entry, the M-by-N matrix A.
103    *          On exit,
104    *            if M >= N, A is overwritten by details of its QR
105    *                       factorization as returned by DGEQRF;
106    *            if M <  N, A is overwritten by details of its LQ
107    *                       factorization as returned by DGELQF.
108    *
109    *  LDA     (input) INTEGER
110    *          The leading dimension of the array A.  LDA >= max(1,M).
111    *
112    *  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
113    *          On entry, the matrix B of right hand side vectors, stored
114    *          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
115    *          if TRANS = 'T'.
116    *          On exit, B is overwritten by the solution vectors, stored
117    *          columnwise:  if TRANS = 'N' and m >= n, rows 1 to n of B
118    *          contain the least squares solution vectors; the residual
119    *          sum of squares for the solution in each column is given by
120    *          the sum of squares of elements N+1 to M in that column;
121    *          if TRANS = 'N' and m < n, rows 1 to N of B contain the
122    *          minimum norm solution vectors;
123    *          if TRANS = 'T' and m >= n, rows 1 to M of B contain the
124    *          minimum norm solution vectors;
125    *          if TRANS = 'T' and m < n, rows 1 to M of B contain the
126    *          least squares solution vectors; the residual sum of squares
127    *          for the solution in each column is given by the sum of
128    *          squares of elements M+1 to N in that column.
129    *
130    *  LDB     (input) INTEGER
131    *          The leading dimension of the array B. LDB >= MAX(1,M,N).
132    *
133    *  WORK_1    (workspace) DOUBLE PRECISION array, dimension (LWORK)
134    *          On exit, if INFO = 0, WORK_1(1) returns the optimal LWORK.
135    *
136    *  LWORK   (input) INTEGER
137    *          The dimension of the array WORK.
138    *          LWORK >= min(M,N) + MAX(1,M,N,NRHS).
139    *          For optimal performance,
140    *          LWORK >= min(M,N) + MAX(1,M,N,NRHS) * NB
141    *          where NB is the optimum block size.
142    *
143    *  INFO    (output) INTEGER
144    *          = 0:  successful exit
145    *          < 0:  if INFO = -i, the i-th argument had an illegal value
146    *
147    *  =====================================================================
148    *
149    *     .. Parameters ..*/
150 #undef zero
151 #define zero 0.0e0
152 #undef one
153 #define one 1.0e0
154   /**     ..
155    *     .. Local Scalars ..*/
156   int            tpsd=0;
157   long            brow, i, iascl, ibscl, j, mn, nb, scllen, wsize=0;
158   double    anrm, bignum, bnrm, smlnum;
159   /**     ..
160    *     .. Local Arrays ..*/
161   double    rwork[1];
162 #undef rwork_1
163 #define rwork_1(a1) [a1-1]
164   /**     ..
165    *     .. Intrinsic Functions ..*/
166   /*      intrinsic          dble, max, min;*/
167   /**     ..
168    *     .. Executable Statements ..
169    *
170    *     Test the input arguments.
171    **/
172   /*-----implicit-declarations-----*/
173   /*-----end-of-declarations-----*/
174   *info = 0;
175   mn = min( m, n );
176   if( !( lsame( trans, 'n' ) || lsame( trans, 't' ) ) ) {
177     *info = -1;
178   } else if( m<0 ) {
179     *info = -2;
180   } else if( n<0 ) {
181     *info = -3;
182   } else if( nrhs<0 ) {
183     *info = -4;
184   } else if( lda<max( 1, m ) ) {
185     *info = -6;
186   } else if( ldb<max( max(1, m), n ) ) {
187     *info = -8;
188   } else if( lwork<max( 1, mn+max( max(m, n), nrhs ) ) ) {
189     *info = -10;
190   }
191   /**
192    *     Figure out optimal block size
193    **/
194   if( *info==0 || *info==-10 ) {
195 
196     tpsd = 1;
197     if( lsame( trans, 'n' ) )
198       tpsd = 0;
199 
200     if( m>=n ) {
201       nb = ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 );
202       if( tpsd ) {
203         nb = max( nb, ilaenv( 1, "dormqr", "ln", m, nrhs, n,
204                              -1 ) );
205       } else {
206         nb = max( nb, ilaenv( 1, "dormqr", "lt", m, nrhs, n,
207                              -1 ) );
208       }
209     } else {
210       nb = ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
211       if( tpsd ) {
212         nb = max( nb, ilaenv( 1, "dormlq", "lt", n, nrhs, m,
213                              -1 ) );
214       } else {
215         nb = max( nb, ilaenv( 1, "dormlq", "ln", n, nrhs, m,
216                              -1 ) );
217       }
218     }
219 
220     wsize = mn + max( max(m, n), nrhs )*nb;
221     work_1( 1 ) = dble( wsize );
222 
223   }
224 
225   if( *info!=0 ) {
226     xerbla( "dgels ", -*info );
227     return;
228   }
229   /**
230    *     Quick return if possible
231    **/
232   if( min( min(m, n), nrhs )==0 ) {
233     dlaset( 'f'/*ull*/, max( m, n ), nrhs, zero, zero, b, ldb );
234     return;
235   }
236   /**
237    *     Get machine parameters
238    **/
239   smlnum = dlamch( 's' ) / dlamch( 'p' );
240   bignum = one / smlnum;
241   dlabad( &smlnum, &bignum );
242   /**
243    *     Scale A, B if max entry outside range [SMLNUM,BIGNUM]
244    **/
245   anrm = dlange( 'm', m, n, a, lda, rwork );
246   iascl = 0;
247   if( anrm>zero && anrm<smlnum ) {
248     /**
249      *        Scale matrix norm up to SMLNUM
250      **/
251     dlascl( 'g', 0, 0, anrm, smlnum, m, n, a, lda, info );
252     iascl = 1;
253   } else if( anrm>bignum ) {
254     /**
255      *        Scale matrix norm down to BIGNUM
256      **/
257     dlascl( 'g', 0, 0, anrm, bignum, m, n, a, lda, info );
258     iascl = 2;
259   } else if( anrm==zero ) {
260     /**
261      *        Matrix all zero. Return zero solution.
262      **/
263     dlaset( 'f', max( m, n ), nrhs, zero, zero, b, ldb );
264     goto L_50;
265   }
266 
267   brow = m;
268   if( tpsd )
269     brow = n;
270   bnrm = dlange( 'm', brow, nrhs, b, ldb, rwork );
271   ibscl = 0;
272   if( bnrm>zero && bnrm<smlnum ) {
273     /**
274      *        Scale matrix norm up to SMLNUM
275      **/
276     dlascl( 'g', 0, 0, bnrm, smlnum, brow, nrhs, b, ldb,
277            info );
278     ibscl = 1;
279   } else if( bnrm>bignum ) {
280     /**
281      *        Scale matrix norm down to BIGNUM
282      **/
283     dlascl( 'g', 0, 0, bnrm, bignum, brow, nrhs, b, ldb,
284            info );
285     ibscl = 2;
286   }
287 
288   if( m>=n ) {
289     /**
290      *        compute QR factorization of A
291      **/
292     dgeqrf( m, n, a, lda, &work_1( 1 ), &work_1( mn+1 ), lwork-mn,
293            info );
294     /**
295      *        workspace at least N, optimally N*NB
296      **/
297     if( !tpsd ) {
298       /**
299        *           Least-Squares Problem min || A * X - B ||
300        *
301        *           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
302        **/
303       dormqr( 'l'/*eft*/, 't'/*ranspose*/, m, nrhs, n, a, lda,
304              &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
305              info );
306       /**
307        *           workspace at least NRHS, optimally NRHS*NB
308        *
309        *           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
310        **/
311       cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans,
312                   CblasNonUnit, n, nrhs, one, a, lda, b, ldb );
313 
314       scllen = n;
315 
316     } else {
317       /**
318        *           Overdetermined system of equations A' * X = B
319        *
320        *           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
321        **/
322       cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans,
323                   CblasNonUnit, n, nrhs, one, a, lda, b, ldb );
324       /**
325        *           B(N+1:M,1:NRHS) = ZERO
326        **/
327       for (j=1 ; j<=nrhs ; j+=1) {
328         for (i=n + 1 ; i<=m ; i+=1) {
329           b_2( i, j ) = zero;
330         }
331       }
332       /**
333        *           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
334        **/
335       dormqr( 'l'/*eft*/, 'n'/*o transpose*/, m, nrhs, n, a, lda,
336              &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
337              info );
338       /**
339        *           workspace at least NRHS, optimally NRHS*NB
340        **/
341       scllen = m;
342 
343     }
344 
345   } else {
346     /**
347      *        Compute LQ factorization of A
348      **/
349     dgelqf( m, n, a, lda, &work_1( 1 ), &work_1( mn+1 ), lwork-mn,
350            info );
351     /**
352      *        workspace at least M, optimally M*NB.
353      **/
354     if( !tpsd ) {
355       /**
356        *           underdetermined system of equations A * X = B
357        *
358        *           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
359        **/
360       cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans,
361                   CblasNonUnit, m, nrhs, one, a, lda, b, ldb );
362       /**
363        *           B(M+1:N,1:NRHS) = 0
364        **/
365       for (j=1 ; j<=nrhs ; j+=1) {
366         for (i=m + 1 ; i<=n ; i+=1) {
367           b_2( i, j ) = zero;
368         }
369       }
370       /**
371        *           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
372        **/
373       dormlq( 'l'/*eft*/, 't'/*ranspose*/, n, nrhs, m, a, lda,
374              &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
375              info );
376       /**
377        *           workspace at least NRHS, optimally NRHS*NB
378        **/
379       scllen = n;
380 
381     } else {
382       /**
383        *           overdetermined system min || A' * X - B ||
384        *
385        *           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
386        **/
387       dormlq( 'l'/*eft*/, 'n'/*o transpose*/, n, nrhs, m, a, lda,
388              &work_1( 1 ), b, ldb, &work_1( mn+1 ), lwork-mn,
389              info );
390       /**
391        *           workspace at least NRHS, optimally NRHS*NB
392        *
393        *           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
394        **/
395       cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans,
396                   CblasNonUnit, m, nrhs, one, a, lda, b, ldb );
397 
398       scllen = m;
399 
400     }
401 
402   }
403   /**
404    *     Undo scaling
405    **/
406   if( iascl==1 ) {
407     dlascl( 'g', 0, 0, anrm, smlnum, scllen, nrhs, b, ldb,
408            info );
409   } else if( iascl==2 ) {
410     dlascl( 'g', 0, 0, anrm, bignum, scllen, nrhs, b, ldb,
411            info );
412   }
413   if( ibscl==1 ) {
414     dlascl( 'g', 0, 0, smlnum, bnrm, scllen, nrhs, b, ldb,
415            info );
416   } else if( ibscl==2 ) {
417     dlascl( 'g', 0, 0, bignum, bnrm, scllen, nrhs, b, ldb,
418            info );
419   }
420 
421  L_50:
422   work_1( 1 ) = dble( wsize );
423 
424   return;
425   /**
426    *     End of DGELS
427    **/
428 }
429 
430 
431 
dormlq(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long lwork,long * info)432 void dormlq( char side, char trans, long m, long n, long k,
433             double a[], long lda, double tau[], double c[], long ldc,
434             double work[], long lwork, long *info )
435 {
436   /**
437    *  -- LAPACK routine (version 1.1) --
438    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
439    *     Courant Institute, Argonne National Lab, and Rice University
440    *     March 31, 1993
441    *
442    *     .. Scalar Arguments ..*/
443   /**     ..
444    *     .. Array Arguments ..*/
445 #undef work_1
446 #define work_1(a1) work[a1-1]
447 #undef tau_1
448 #define tau_1(a1) tau[a1-1]
449 #undef c_2
450 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
451 #undef a_2
452 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
453   /**     ..
454    *
455    *  Purpose
456    *  =======
457    *
458    *  DORMLQ overwrites the general real M-by-N matrix C with
459    *
460    *                  SIDE = 'L'     SIDE = 'R'
461    *  TRANS = 'N':      Q * C          C * Q
462    *  TRANS = 'T':      Q**T * C       C * Q**T
463    *
464    *  where Q is a real orthogonal matrix defined as the product of k
465    *  elementary reflectors
466    *
467    *        Q = H(k) . . . H(2) H(1)
468    *
469    *  as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
470    *  if SIDE = 'R'.
471    *
472    *  Arguments
473    *  =========
474    *
475    *  SIDE    (input) CHARACTER*1
476    *          = 'L': apply Q or Q**T from the Left;
477    *          = 'R': apply Q or Q**T from the Right.
478    *
479    *  TRANS   (input) CHARACTER*1
480    *          = 'N':  No transpose, apply Q;
481    *          = 'T':  Transpose, apply Q**T.
482    *
483    *  M       (input) INTEGER
484    *          The number of rows of the matrix C. M >= 0.
485    *
486    *  N       (input) INTEGER
487    *          The number of columns of the matrix C. N >= 0.
488    *
489    *  K       (input) INTEGER
490    *          The number of elementary reflectors whose product defines
491    *          the matrix Q.
492    *          If SIDE = 'L', M >= K >= 0;
493    *          if SIDE = 'R', N >= K >= 0.
494    *
495    *  A       (input) DOUBLE PRECISION array, dimension
496    *                               (LDA,M) if SIDE = 'L',
497    *                               (LDA,N) if SIDE = 'R'
498    *          The i-th row must contain the vector which defines the
499    *          elementary reflector H(i), for i = 1,2,...,k, as returned by
500    *          DGELQF in the first k rows of its array argument A.
501    *          A is modified by the routine but restored on exit.
502    *
503    *  LDA     (input) INTEGER
504    *          The leading dimension of the array A. LDA >= max(1,K).
505    *
506    *  TAU     (input) DOUBLE PRECISION array, dimension (K)
507    *          TAU(i) must contain the scalar factor of the elementary
508    *          reflector H(i), as returned by DGELQF.
509    *
510    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
511    *          On entry, the M-by-N matrix C.
512    *          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
513    *
514    *  LDC     (input) INTEGER
515    *          The leading dimension of the array C. LDC >= max(1,M).
516    *
517    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
518    *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
519    *
520    *  LWORK   (input) INTEGER
521    *          The dimension of the array WORK.
522    *          If SIDE = 'L', LWORK >= max(1,N);
523    *          if SIDE = 'R', LWORK >= max(1,M).
524    *          For optimum performance LWORK >= N*NB if SIDE = 'L', and
525    *          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
526    *          blocksize.
527    *
528    *  INFO    (output) INTEGER
529    *          = 0:  successful exit
530    *          < 0:  if INFO = -i, the i-th argument had an illegal value
531    *
532    *  =====================================================================
533    *
534    *     .. Parameters ..*/
535 #undef nbmax
536 #define nbmax 64
537 #undef ldt
538 #define ldt (nbmax+1)
539   /**     ..
540    *     .. Local Scalars ..*/
541   int            left, notran;
542   char          transt, side_trans[3];
543   long    i, i1, i2, i3, ib, ic=0, iinfo, iws, jc=0, ldwork,
544           mi=0, nb, nbmin, ni=0, nq, nw;
545   /**     ..
546    *     .. Local Arrays ..*/
547   double    t[nbmax][ldt];
548 #undef t_2
549 #define t_2(a1,a2) t[a2-1][a1-1]
550   /**     ..
551    *     .. Intrinsic Functions ..*/
552   /*      intrinsic          max, min;*/
553   /**     ..
554    *     .. Executable Statements ..
555    *
556    *     Test the input arguments
557    **/
558   /*-----implicit-declarations-----*/
559   /*-----end-of-declarations-----*/
560   *info = 0;
561   left = lsame( side, 'l' );
562   notran = lsame( trans, 'n' );
563   /**
564    *     NQ is the order of Q and NW is the minimum dimension of WORK
565    **/
566   if( left ) {
567     nq = m;
568     nw = n;
569   } else {
570     nq = n;
571     nw = m;
572   }
573   if( !left && !lsame( side, 'r' ) ) {
574     *info = -1;
575   } else if( !notran && !lsame( trans, 't' ) ) {
576     *info = -2;
577   } else if( m<0 ) {
578     *info = -3;
579   } else if( n<0 ) {
580     *info = -4;
581   } else if( k<0 || k>nq ) {
582     *info = -5;
583   } else if( lda<max( 1, k ) ) {
584     *info = -7;
585   } else if( ldc<max( 1, m ) ) {
586     *info = -10;
587   } else if( lwork<max( 1, nw ) ) {
588     *info = -12;
589   }
590   if( *info!=0 ) {
591     xerbla( "dormlq", -*info );
592     return;
593   }
594   /**
595    *     Quick return if possible
596    **/
597   if( m==0 || n==0 || k==0 ) {
598     work_1( 1 ) = 1;
599     return;
600   }
601   /**
602    *     Determine the block size.  NB may be at most NBMAX, where NBMAX
603    *     is used to define the local array T.
604    **/
605   side_trans[0]= side;
606   side_trans[1]= trans;
607   side_trans[2]= '\0';
608   nb = min( nbmax, ilaenv( 1, "dormlq", side_trans, m, n, k,
609                           -1 ) );
610   nbmin = 2;
611   ldwork = nw;
612   if( nb>1 && nb<k ) {
613     iws = nw*nb;
614     if( lwork<iws ) {
615       nb = lwork / ldwork;
616       nbmin = max( 2, ilaenv( 2, "dormlq", side_trans, m, n, k,
617                              -1 ) );
618     }
619   } else {
620     iws = nw;
621   }
622 
623   if( nb<nbmin || nb>=k ) {
624     /**
625      *        Use unblocked code
626      **/
627     dorml2( side, trans, m, n, k, a, lda, tau, c, ldc, work,
628            &iinfo );
629   } else {
630     /**
631      *        Use blocked code
632      **/
633     if( ( left && notran ) ||
634        ( !left && !notran ) ) {
635       i1 = 1;
636       i2 = k;
637       i3 = nb;
638     } else {
639       i1 = ( ( k-1 ) / nb )*nb + 1;
640       i2 = 1;
641       i3 = -nb;
642     }
643 
644     if( left ) {
645       ni = n;
646       jc = 1;
647     } else {
648       mi = m;
649       ic = 1;
650     }
651 
652     if( notran ) {
653       transt = 't';
654     } else {
655       transt = 'n';
656     }
657 
658     for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
659       ib = min( nb, k-i+1 );
660       /**
661        *           Form the triangular factor of the block reflector
662        *           H = H(i) H(i+1) . . . H(i+ib-1)
663        **/
664       dlarft( 'f'/*orward*/, 'r'/*owwise*/, nq-i+1, ib, &a_2( i, i ),
665              lda, &tau_1( i ), (double *)t, ldt );
666       if( left ) {
667         /**
668          *              H or H' is applied to C(i:m,1:n)
669          **/
670         mi = m - i + 1;
671         ic = i;
672       } else {
673         /**
674          *              H or H' is applied to C(1:m,i:n)
675          **/
676         ni = n - i + 1;
677         jc = i;
678       }
679       /**
680        *           Apply H or H'
681        **/
682       dlarfb( side, transt, 'f'/*orward*/, 'r'/*owwise*/, mi, ni, ib,
683              &a_2( i, i ), lda, (double *)t, ldt, &c_2( ic, jc ), ldc, work,
684              ldwork );
685     }
686   }
687   work_1( 1 ) = iws;
688   return;
689   /**
690    *     End of DORMLQ
691    **/
692 }
693 
694 
695 
dorml2(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long * info)696 void dorml2( char side, char trans, long m, long n, long k,
697             double a[], long lda, double tau[], double c[], long ldc,
698             double work[], long *info )
699 {
700   /**
701    *  -- LAPACK routine (version 1.1) --
702    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
703    *     Courant Institute, Argonne National Lab, and Rice University
704    *     February 29, 1992
705    *
706    *     .. Scalar Arguments ..*/
707   /**     ..
708    *     .. Array Arguments ..*/
709 #undef work_1
710 #define work_1(a1) work[a1-1]
711 #undef tau_1
712 #define tau_1(a1) tau[a1-1]
713 #undef c_2
714 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
715 #undef a_2
716 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
717   /**     ..
718    *
719    *  Purpose
720    *  =======
721    *
722    *  DORML2 overwrites the general real m by n matrix C with
723    *
724    *        Q * C  if SIDE = 'L' and TRANS = 'N', or
725    *
726    *        Q'* C  if SIDE = 'L' and TRANS = 'T', or
727    *
728    *        C * Q  if SIDE = 'R' and TRANS = 'N', or
729    *
730    *        C * Q' if SIDE = 'R' and TRANS = 'T',
731    *
732    *  where Q is a real orthogonal matrix defined as the product of k
733    *  elementary reflectors
734    *
735    *        Q = H(k) . . . H(2) H(1)
736    *
737    *  as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
738    *  if SIDE = 'R'.
739    *
740    *  Arguments
741    *  =========
742    *
743    *  SIDE    (input) CHARACTER*1
744    *          = 'L': apply Q or Q' from the Left
745    *          = 'R': apply Q or Q' from the Right
746    *
747    *  TRANS   (input) CHARACTER*1
748    *          = 'N': apply Q  (No transpose)
749    *          = 'T': apply Q' (Transpose)
750    *
751    *  M       (input) INTEGER
752    *          The number of rows of the matrix C. M >= 0.
753    *
754    *  N       (input) INTEGER
755    *          The number of columns of the matrix C. N >= 0.
756    *
757    *  K       (input) INTEGER
758    *          The number of elementary reflectors whose product defines
759    *          the matrix Q.
760    *          If SIDE = 'L', M >= K >= 0;
761    *          if SIDE = 'R', N >= K >= 0.
762    *
763    *  A       (input) DOUBLE PRECISION array, dimension
764    *                               (LDA,M) if SIDE = 'L',
765    *                               (LDA,N) if SIDE = 'R'
766    *          The i-th row must contain the vector which defines the
767    *          elementary reflector H(i), for i = 1,2,...,k, as returned by
768    *          DGELQF in the first k rows of its array argument A.
769    *          A is modified by the routine but restored on exit.
770    *
771    *  LDA     (input) INTEGER
772    *          The leading dimension of the array A. LDA >= max(1,K).
773    *
774    *  TAU     (input) DOUBLE PRECISION array, dimension (K)
775    *          TAU(i) must contain the scalar factor of the elementary
776    *          reflector H(i), as returned by DGELQF.
777    *
778    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
779    *          On entry, the m by n matrix C.
780    *          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
781    *
782    *  LDC     (input) INTEGER
783    *          The leading dimension of the array C. LDC >= max(1,M).
784    *
785    *  WORK    (workspace) DOUBLE PRECISION array, dimension
786    *                                   (N) if SIDE = 'L',
787    *                                   (M) if SIDE = 'R'
788    *
789    *  INFO    (output) INTEGER
790    *          = 0: successful exit
791    *          < 0: if INFO = -i, the i-th argument had an illegal value
792    *
793    *  =====================================================================
794    *
795    *     .. Parameters ..*/
796 #undef one
797 #define one 1.0e+0
798   /**     ..
799    *     .. Local Scalars ..*/
800   int            left, notran;
801   long            i, i1, i2, i3, ic=0, jc=0, mi=0, ni=0, nq;
802   double    aii;
803   /**     ..
804    *     .. Intrinsic Functions ..*/
805   /*      intrinsic          max;*/
806   /**     ..
807    *     .. Executable Statements ..
808    *
809    *     Test the input arguments
810    **/
811   /*-----implicit-declarations-----*/
812   /*-----end-of-declarations-----*/
813   *info = 0;
814   left = lsame( side, 'l' );
815   notran = lsame( trans, 'n' );
816   /**
817    *     NQ is the order of Q
818    **/
819   if( left ) {
820     nq = m;
821   } else {
822     nq = n;
823   }
824   if( !left && !lsame( side, 'r' ) ) {
825     *info = -1;
826   } else if( !notran && !lsame( trans, 't' ) ) {
827     *info = -2;
828   } else if( m<0 ) {
829     *info = -3;
830   } else if( n<0 ) {
831     *info = -4;
832   } else if( k<0 || k>nq ) {
833     *info = -5;
834   } else if( lda<max( 1, k ) ) {
835     *info = -7;
836   } else if( ldc<max( 1, m ) ) {
837     *info = -10;
838   }
839   if( *info!=0 ) {
840     xerbla( "dorml2", -*info );
841     return;
842   }
843   /**
844    *     Quick return if possible
845    **/
846   if( m==0 || n==0 || k==0 )
847     return;
848 
849   if( ( left && notran ) || ( !left && !notran ) )
850     {
851       i1 = 1;
852       i2 = k;
853       i3 = 1;
854     } else {
855       i1 = k;
856       i2 = 1;
857       i3 = -1;
858     }
859 
860   if( left ) {
861     ni = n;
862     jc = 1;
863   } else {
864     mi = m;
865     ic = 1;
866   }
867 
868   for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
869     if( left ) {
870       /**
871        *           H(i) is applied to C(i:m,1:n)
872        **/
873       mi = m - i + 1;
874       ic = i;
875     } else {
876       /**
877        *           H(i) is applied to C(1:m,i:n)
878        **/
879       ni = n - i + 1;
880       jc = i;
881     }
882     /**
883      *        Apply H(i)
884      **/
885     aii = a_2( i, i );
886     a_2( i, i ) = one;
887     dlarf( side, mi, ni, &a_2( i, i ), lda, tau_1( i ),
888           &c_2( ic, jc ), ldc, work );
889     a_2( i, i ) = aii;
890   }
891   return;
892   /**
893    *     End of DORML2
894    **/
895 }
896 
897 
898 
dgelqf(long m,long n,double a[],long lda,double tau[],double work[],long lwork,long * info)899 void dgelqf( long m, long n, double a[], long lda, double tau[],
900             double work[], long lwork, long *info )
901 {
902   /**
903    *  -- LAPACK routine (version 1.1) --
904    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
905    *     Courant Institute, Argonne National Lab, and Rice University
906    *     March 31, 1993
907    *
908    *     .. Scalar Arguments ..*/
909   /**     ..
910    *     .. Array Arguments ..*/
911 #undef work_1
912 #define work_1(a1) work[a1-1]
913 #undef tau_1
914 #define tau_1(a1) tau[a1-1]
915 #undef a_2
916 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
917   /**     ..
918    *
919    *  Purpose
920    *  =======
921    *
922    *  DGELQF computes an LQ factorization of a real M-by-N matrix A:
923    *  A = L * Q.
924    *
925    *  Arguments
926    *  =========
927    *
928    *  M       (input) INTEGER
929    *          The number of rows of the matrix A.  M >= 0.
930    *
931    *  N       (input) INTEGER
932    *          The number of columns of the matrix A.  N >= 0.
933    *
934    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
935    *          On entry, the M-by-N matrix A.
936    *          On exit, the elements on and below the diagonal of the array
937    *          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
938    *          lower triangular if m <= n); the elements above the diagonal,
939    *          with the array TAU, represent the orthogonal matrix Q as a
940    *          product of elementary reflectors (see Further Details).
941    *
942    *  LDA     (input) INTEGER
943    *          The leading dimension of the array A.  LDA >= max(1,M).
944    *
945    *  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
946    *          The scalar factors of the elementary reflectors (see Further
947    *          Details).
948    *
949    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
950    *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
951    *
952    *  LWORK   (input) INTEGER
953    *          The dimension of the array WORK.  LWORK >= max(1,M).
954    *          For optimum performance LWORK >= M*NB, where NB is the
955    *          optimal blocksize.
956    *
957    *  INFO    (output) INTEGER
958    *          = 0:  successful exit
959    *          < 0:  if INFO = -i, the i-th argument had an illegal value
960    *
961    *  Further Details
962    *  ===============
963    *
964    *  The matrix Q is represented as a product of elementary reflectors
965    *
966    *     Q = H(k) . . . H(2) H(1), where k = min(m,n).
967    *
968    *  Each H(i) has the form
969    *
970    *     H(i) = I - tau * v * v'
971    *
972    *  where tau is a real scalar, and v is a real vector with
973    *  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
974    *  and tau in TAU(i).
975    *
976    *  =====================================================================
977    *
978    *     .. Local Scalars ..*/
979   long            i, ib, iinfo, iws, k, ldwork=0, nb, nbmin, nx;
980   /**     ..
981    *     .. Intrinsic Functions ..*/
982   /*      intrinsic          max, min;*/
983   /**     ..
984    *     .. Executable Statements ..
985    *
986    *     Test the input arguments
987    **/
988   /*-----implicit-declarations-----*/
989   /*-----end-of-declarations-----*/
990   *info = 0;
991   if( m<0 ) {
992     *info = -1;
993   } else if( n<0 ) {
994     *info = -2;
995   } else if( lda<max( 1, m ) ) {
996     *info = -4;
997   } else if( lwork<max( 1, m ) ) {
998     *info = -7;
999   }
1000   if( *info!=0 ) {
1001     xerbla( "dgelqf", -*info );
1002     return;
1003   }
1004   /**
1005    *     Quick return if possible
1006    **/
1007   k = min( m, n );
1008   if( k==0 ) {
1009     work_1( 1 ) = 1;
1010     return;
1011   }
1012   /**
1013    *     Determine the block size.
1014    **/
1015   nb = ilaenv( 1, "dgelqf", " ", m, n, -1, -1 );
1016   nbmin = 2;
1017   nx = 0;
1018   iws = m;
1019   if( nb>1 && nb<k ) {
1020     /**
1021      *        Determine when to cross over from blocked to unblocked code.
1022      **/
1023     nx = max( 0, ilaenv( 3, "dgelqf", " ", m, n, -1, -1 ) );
1024     if( nx<k ) {
1025       /**
1026        *           Determine if workspace is large enough for blocked code.
1027        **/
1028       ldwork = m;
1029       iws = ldwork*nb;
1030       if( lwork<iws ) {
1031         /**
1032          *              Not enough workspace to use optimal NB:  reduce NB and
1033          *              determine the minimum value of NB.
1034          **/
1035         nb = lwork / ldwork;
1036         nbmin = max( 2, ilaenv( 2, "dgelqf", " ", m, n, -1,
1037                                -1 ) );
1038       }
1039     }
1040   }
1041 
1042   if( nb>=nbmin && nb<k && nx<k ) {
1043     /**
1044      *        Use blocked code initially
1045      **/
1046     for (i=1 ; nb>0?i<=k - nx:i>=k - nx ; i+=nb) {
1047       ib = min( k-i+1, nb );
1048       /**
1049        *           Compute the LQ factorization of the current block
1050        *           A(i:i+ib-1,i:n)
1051        **/
1052       dgelq2( ib, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1053              &iinfo );
1054       if( i+ib<=m ) {
1055         /**
1056          *              Form the triangular factor of the block reflector
1057          *              H = H(i) H(i+1) . . . H(i+ib-1)
1058          **/
1059         dlarft( 'f'/*orward*/, 'r'/*owwise*/, n-i+1, ib, &a_2( i, i ),
1060                lda, &tau_1( i ), work, ldwork );
1061         /**
1062          *              Apply H to A(i+ib:m,i:n) from the right
1063          **/
1064         dlarfb( 'r'/*ight*/, 'n'/*o transpose*/, 'f'/*orward*/,
1065                'r'/*owwise*/, m-i-ib+1, n-i+1, ib, &a_2( i, i ),
1066                lda, work, ldwork, &a_2( i+ib, i ), lda,
1067                &work_1( ib+1 ), ldwork );
1068       }
1069     }
1070   } else {
1071     i = 1;
1072   }
1073   /**
1074    *     Use unblocked code to factor the last or only block.
1075    **/
1076   if( i<=k )
1077     dgelq2( m-i+1, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1078            &iinfo );
1079 
1080   work_1( 1 ) = iws;
1081   return;
1082   /**
1083    *     End of DGELQF
1084    **/
1085 }
1086 
1087 
1088 
dgelq2(long m,long n,double a[],long lda,double tau[],double work[],long * info)1089 void dgelq2( long m, long n, double a[], long lda, double tau[], double work[], long *info )
1090 {
1091   /**
1092    *  -- LAPACK routine (version 1.1) --
1093    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1094    *     Courant Institute, Argonne National Lab, and Rice University
1095    *     February 29, 1992
1096    *
1097    *     .. Scalar Arguments ..*/
1098   /**     ..
1099    *     .. Array Arguments ..*/
1100 #undef work_1
1101 #define work_1(a1) work[a1-1]
1102 #undef tau_1
1103 #define tau_1(a1) tau[a1-1]
1104 #undef a_2
1105 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1106   /**     ..
1107    *
1108    *  Purpose
1109    *  =======
1110    *
1111    *  DGELQ2 computes an LQ factorization of a real m by n matrix A:
1112    *  A = L * Q.
1113    *
1114    *  Arguments
1115    *  =========
1116    *
1117    *  M       (input) INTEGER
1118    *          The number of rows of the matrix A.  M >= 0.
1119    *
1120    *  N       (input) INTEGER
1121    *          The number of columns of the matrix A.  N >= 0.
1122    *
1123    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
1124    *          On entry, the m by n matrix A.
1125    *          On exit, the elements on and below the diagonal of the array
1126    *          contain the m by min(m,n) lower trapezoidal matrix L (L is
1127    *          lower triangular if m <= n); the elements above the diagonal,
1128    *          with the array TAU, represent the orthogonal matrix Q as a
1129    *          product of elementary reflectors (see Further Details).
1130    *
1131    *  LDA     (input) INTEGER
1132    *          The leading dimension of the array A.  LDA >= max(1,M).
1133    *
1134    *  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
1135    *          The scalar factors of the elementary reflectors (see Further
1136    *          Details).
1137    *
1138    *  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
1139    *
1140    *  INFO    (output) INTEGER
1141    *          = 0: successful exit
1142    *          < 0: if INFO = -i, the i-th argument had an illegal value
1143    *
1144    *  Further Details
1145    *  ===============
1146    *
1147    *  The matrix Q is represented as a product of elementary reflectors
1148    *
1149    *     Q = H(k) . . . H(2) H(1), where k = min(m,n).
1150    *
1151    *  Each H(i) has the form
1152    *
1153    *     H(i) = I - tau * v * v'
1154    *
1155    *  where tau is a real scalar, and v is a real vector with
1156    *  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
1157    *  and tau in TAU(i).
1158    *
1159    *  =====================================================================
1160    *
1161    *     .. Parameters ..*/
1162 #undef one
1163 #define one 1.0e+0
1164   /**     ..
1165    *     .. Local Scalars ..*/
1166   long            i, k;
1167   double    aii;
1168   /**     ..
1169    *     .. Intrinsic Functions ..*/
1170   /*      intrinsic          max, min;*/
1171   /**     ..
1172    *     .. Executable Statements ..
1173    *
1174    *     Test the input arguments
1175    **/
1176   /*-----implicit-declarations-----*/
1177   /*-----end-of-declarations-----*/
1178   *info = 0;
1179   if( m<0 ) {
1180     *info = -1;
1181   } else if( n<0 ) {
1182     *info = -2;
1183   } else if( lda<max( 1, m ) ) {
1184     *info = -4;
1185   }
1186   if( *info!=0 ) {
1187     xerbla( "dgelq2", -*info );
1188     return;
1189   }
1190 
1191   k = min( m, n );
1192 
1193   for (i=1 ; i<=k ; i+=1) {
1194     /**
1195      *        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
1196      **/
1197     dlarfg( n-i+1, &a_2( i, i ), &a_2( i, min( i+1, n ) ), lda,
1198            &tau_1( i ) );
1199     if( i<m ) {
1200       /**
1201        *           Apply H(i) to A(i+1:m,i:n) from the right
1202        **/
1203       aii = a_2( i, i );
1204       a_2( i, i ) = one;
1205       dlarf( 'r'/*ight*/, m-i, n-i+1, &a_2( i, i ), lda, tau_1( i ),
1206             &a_2( i+1, i ), lda, work );
1207       a_2( i, i ) = aii;
1208     }
1209   }
1210   return;
1211   /**
1212    *     End of DGELQ2
1213    **/
1214 }
1215 
1216 
1217 
dormqr(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long lwork,long * info)1218 void dormqr( char side, char trans, long m, long n, long k, double a[], long lda, double tau[], double c[], long ldc,double work[], long lwork, long *info )
1219 {
1220   /**
1221    *  -- LAPACK routine (version 1.1) --
1222    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1223    *     Courant Institute, Argonne National Lab, and Rice University
1224    *     March 31, 1993
1225    *
1226    *     .. Scalar Arguments ..*/
1227   /**     ..
1228    *     .. Array Arguments ..*/
1229 #undef work_1
1230 #define work_1(a1) work[a1-1]
1231 #undef tau_1
1232 #define tau_1(a1) tau[a1-1]
1233 #undef c_2
1234 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1235 #undef a_2
1236 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1237   /**     ..
1238    *
1239    *  Purpose
1240    *  =======
1241    *
1242    *  DORMQR overwrites the general real M-by-N matrix C with
1243    *
1244    *                  SIDE = 'L'     SIDE = 'R'
1245    *  TRANS = 'N':      Q * C          C * Q
1246    *  TRANS = 'T':      Q**T * C       C * Q**T
1247    *
1248    *  where Q is a real orthogonal matrix defined as the product of k
1249    *  elementary reflectors
1250    *
1251    *        Q = H(1) H(2) . . . H(k)
1252    *
1253    *  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
1254    *  if SIDE = 'R'.
1255    *
1256    *  Arguments
1257    *  =========
1258    *
1259    *  SIDE    (input) CHARACTER*1
1260    *          = 'L': apply Q or Q**T from the Left;
1261    *          = 'R': apply Q or Q**T from the Right.
1262    *
1263    *  TRANS   (input) CHARACTER*1
1264    *          = 'N':  No transpose, apply Q;
1265    *          = 'T':  Transpose, apply Q**T.
1266    *
1267    *  M       (input) INTEGER
1268    *          The number of rows of the matrix C. M >= 0.
1269    *
1270    *  N       (input) INTEGER
1271    *          The number of columns of the matrix C. N >= 0.
1272    *
1273    *  K       (input) INTEGER
1274    *          The number of elementary reflectors whose product defines
1275    *          the matrix Q.
1276    *          If SIDE = 'L', M >= K >= 0;
1277    *          if SIDE = 'R', N >= K >= 0.
1278    *
1279    *  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
1280    *          The i-th column must contain the vector which defines the
1281    *          elementary reflector H(i), for i = 1,2,...,k, as returned by
1282    *          DGEQRF in the first k columns of its array argument A.
1283    *          A is modified by the routine but restored on exit.
1284    *
1285    *  LDA     (input) INTEGER
1286    *          The leading dimension of the array A.
1287    *          If SIDE = 'L', LDA >= max(1,M);
1288    *          if SIDE = 'R', LDA >= max(1,N).
1289    *
1290    *  TAU     (input) DOUBLE PRECISION array, dimension (K)
1291    *          TAU(i) must contain the scalar factor of the elementary
1292    *          reflector H(i), as returned by DGEQRF.
1293    *
1294    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1295    *          On entry, the M-by-N matrix C.
1296    *          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
1297    *
1298    *  LDC     (input) INTEGER
1299    *          The leading dimension of the array C. LDC >= max(1,M).
1300    *
1301    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
1302    *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
1303    *
1304    *  LWORK   (input) INTEGER
1305    *          The dimension of the array WORK.
1306    *          If SIDE = 'L', LWORK >= max(1,N);
1307    *          if SIDE = 'R', LWORK >= max(1,M).
1308    *          For optimum performance LWORK >= N*NB if SIDE = 'L', and
1309    *          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
1310    *          blocksize.
1311    *
1312    *  INFO    (output) INTEGER
1313    *          = 0:  successful exit
1314    *          < 0:  if INFO = -i, the i-th argument had an illegal value
1315    *
1316    *  =====================================================================
1317    *
1318    *     .. Parameters ..*/
1319 #undef nbmax
1320 #define nbmax 64
1321 #undef ldt
1322 #define ldt (nbmax+1)
1323   /**     ..
1324    *     .. Local Scalars ..*/
1325   int            left, notran;
1326   long    i, i1, i2, i3, ib, ic=0, iinfo, iws, jc=0, ldwork,
1327           mi=0, nb, nbmin, ni=0, nq, nw;
1328   char side_trans[3];
1329   /**     ..
1330    *     .. Local Arrays ..*/
1331   double    t[nbmax][ldt];
1332 #undef t_2
1333 #define t_2(a1,a2) t[a2-1][a1-1]
1334   /**     ..
1335    *     .. Intrinsic Functions ..*/
1336   /*      intrinsic          max, min;*/
1337   /**     ..
1338    *     .. Executable Statements ..
1339    *
1340    *     Test the input arguments
1341    **/
1342   /*-----implicit-declarations-----*/
1343   /*-----end-of-declarations-----*/
1344   *info = 0;
1345   left = lsame( side, 'l' );
1346   notran = lsame( trans, 'n' );
1347   /**
1348    *     NQ is the order of Q and NW is the minimum dimension of WORK
1349    **/
1350   if( left ) {
1351     nq = m;
1352     nw = n;
1353   } else {
1354     nq = n;
1355     nw = m;
1356   }
1357   if( !left && !lsame( side, 'r' ) ) {
1358     *info = -1;
1359   } else if( !notran && !lsame( trans, 't' ) ) {
1360     *info = -2;
1361   } else if( m<0 ) {
1362     *info = -3;
1363   } else if( n<0 ) {
1364     *info = -4;
1365   } else if( k<0 || k>nq ) {
1366     *info = -5;
1367   } else if( lda<max( 1, nq ) ) {
1368     *info = -7;
1369   } else if( ldc<max( 1, m ) ) {
1370     *info = -10;
1371   } else if( lwork<max( 1, nw ) ) {
1372     *info = -12;
1373   }
1374   if( *info!=0 ) {
1375     xerbla( "dormqr", -*info );
1376     return;
1377   }
1378   /**
1379    *     Quick return if possible
1380    **/
1381   if( m==0 || n==0 || k==0 ) {
1382     work_1( 1 ) = 1;
1383     return;
1384   }
1385   /**
1386    *     Determine the block size.  NB may be at most NBMAX, where NBMAX
1387    *     is used to define the local array T.
1388    **/
1389   side_trans[0]= side;
1390   side_trans[1]= trans;
1391   side_trans[2]= '\0';
1392   nb = min( nbmax, ilaenv( 1, "dormqr", side_trans, m, n, k,
1393                           -1 ) );
1394   nbmin = 2;
1395   ldwork = nw;
1396   if( nb>1 && nb<k ) {
1397     iws = nw*nb;
1398     if( lwork<iws ) {
1399       nb = lwork / ldwork;
1400       nbmin = max( 2, ilaenv( 2, "dormqr", side_trans, m, n, k,
1401                              -1 ) );
1402     }
1403   } else {
1404     iws = nw;
1405   }
1406 
1407   if( nb<nbmin || nb>=k ) {
1408     /**
1409      *        Use unblocked code
1410      **/
1411     dorm2r( side, trans, m, n, k, a, lda, tau, c, ldc, work,
1412            &iinfo );
1413   } else {
1414     /**
1415      *        Use blocked code
1416      **/
1417     if( ( left && !notran ) ||
1418        ( !left && notran ) ) {
1419       i1 = 1;
1420       i2 = k;
1421       i3 = nb;
1422     } else {
1423       i1 = ( ( k-1 ) / nb )*nb + 1;
1424       i2 = 1;
1425       i3 = -nb;
1426     }
1427 
1428     if( left ) {
1429       ni = n;
1430       jc = 1;
1431     } else {
1432       mi = m;
1433       ic = 1;
1434     }
1435 
1436     for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
1437       ib = min( nb, k-i+1 );
1438       /**
1439        *           Form the triangular factor of the block reflector
1440        *           H = H(i) H(i+1) . . . H(i+ib-1)
1441        **/
1442       dlarft( 'f'/*orward*/, 'c'/*olumnwise*/, nq-i+1, ib, &a_2( i, i ),
1443              lda, &tau_1( i ), (double *)t, ldt );
1444       if( left ) {
1445         /**
1446          *              H or H' is applied to C(i:m,1:n)
1447          **/
1448         mi = m - i + 1;
1449         ic = i;
1450       } else {
1451         /**
1452          *              H or H' is applied to C(1:m,i:n)
1453          **/
1454         ni = n - i + 1;
1455         jc = i;
1456       }
1457       /**
1458        *           Apply H or H'
1459        **/
1460       dlarfb( side, trans, 'f'/*orward*/, 'c'/*olumnwise*/, mi, ni,
1461              ib, &a_2( i, i ), lda, (double *)t, ldt, &c_2( ic, jc ), ldc,
1462              work, ldwork );
1463     }
1464   }
1465   work_1( 1 ) = iws;
1466   return;
1467   /**
1468    *     End of DORMQR
1469    **/
1470 #undef nbmax
1471 #undef ldt
1472 }
1473 
1474 
1475 
dorm2r(char side,char trans,long m,long n,long k,double a[],long lda,double tau[],double c[],long ldc,double work[],long * info)1476 void dorm2r( char side, char trans, long m, long n, long k, double a[], long lda, double tau[], double c[], long ldc,double work[], long *info )
1477 {
1478   /**
1479    *  -- LAPACK routine (version 1.1) --
1480    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1481    *     Courant Institute, Argonne National Lab, and Rice University
1482    *     February 29, 1992
1483    *
1484    *     .. Scalar Arguments ..*/
1485   /**     ..
1486    *     .. Array Arguments ..*/
1487 #undef work_1
1488 #define work_1(a1) work[a1-1]
1489 #undef tau_1
1490 #define tau_1(a1) tau[a1-1]
1491 #undef c_2
1492 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1493 #undef a_2
1494 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1495   /**     ..
1496    *
1497    *  Purpose
1498    *  =======
1499    *
1500    *  DORM2R overwrites the general real m by n matrix C with
1501    *
1502    *        Q * C  if SIDE = 'L' and TRANS = 'N', or
1503    *
1504    *        Q'* C  if SIDE = 'L' and TRANS = 'T', or
1505    *
1506    *        C * Q  if SIDE = 'R' and TRANS = 'N', or
1507    *
1508    *        C * Q' if SIDE = 'R' and TRANS = 'T',
1509    *
1510    *  where Q is a real orthogonal matrix defined as the product of k
1511    *  elementary reflectors
1512    *
1513    *        Q = H(1) H(2) . . . H(k)
1514    *
1515    *  as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
1516    *  if SIDE = 'R'.
1517    *
1518    *  Arguments
1519    *  =========
1520    *
1521    *  SIDE    (input) CHARACTER*1
1522    *          = 'L': apply Q or Q' from the Left
1523    *          = 'R': apply Q or Q' from the Right
1524    *
1525    *  TRANS   (input) CHARACTER*1
1526    *          = 'N': apply Q  (No transpose)
1527    *          = 'T': apply Q' (Transpose)
1528    *
1529    *  M       (input) INTEGER
1530    *          The number of rows of the matrix C. M >= 0.
1531    *
1532    *  N       (input) INTEGER
1533    *          The number of columns of the matrix C. N >= 0.
1534    *
1535    *  K       (input) INTEGER
1536    *          The number of elementary reflectors whose product defines
1537    *          the matrix Q.
1538    *          If SIDE = 'L', M >= K >= 0;
1539    *          if SIDE = 'R', N >= K >= 0.
1540    *
1541    *  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
1542    *          The i-th column must contain the vector which defines the
1543    *          elementary reflector H(i), for i = 1,2,...,k, as returned by
1544    *          DGEQRF in the first k columns of its array argument A.
1545    *          A is modified by the routine but restored on exit.
1546    *
1547    *  LDA     (input) INTEGER
1548    *          The leading dimension of the array A.
1549    *          If SIDE = 'L', LDA >= max(1,M);
1550    *          if SIDE = 'R', LDA >= max(1,N).
1551    *
1552    *  TAU     (input) DOUBLE PRECISION array, dimension (K)
1553    *          TAU(i) must contain the scalar factor of the elementary
1554    *          reflector H(i), as returned by DGEQRF.
1555    *
1556    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1557    *          On entry, the m by n matrix C.
1558    *          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
1559    *
1560    *  LDC     (input) INTEGER
1561    *          The leading dimension of the array C. LDC >= max(1,M).
1562    *
1563    *  WORK    (workspace) DOUBLE PRECISION array, dimension
1564    *                                   (N) if SIDE = 'L',
1565    *                                   (M) if SIDE = 'R'
1566    *
1567    *  INFO    (output) INTEGER
1568    *          = 0: successful exit
1569    *          < 0: if INFO = -i, the i-th argument had an illegal value
1570    *
1571    *  =====================================================================
1572    *
1573    *     .. Parameters ..*/
1574 #undef one
1575 #define one 1.0e+0
1576   /**     ..
1577    *     .. Local Scalars ..*/
1578   int            left, notran;
1579   long            i, i1, i2, i3, ic=0, jc=0, mi=0, ni=0, nq;
1580   double    aii;
1581   /**     ..
1582    *     .. Intrinsic Functions ..*/
1583   /*      intrinsic          max;*/
1584   /**     ..
1585    *     .. Executable Statements ..
1586    *
1587    *     Test the input arguments
1588    **/
1589   /*-----implicit-declarations-----*/
1590   /*-----end-of-declarations-----*/
1591   *info = 0;
1592   left = lsame( side, 'l' );
1593   notran = lsame( trans, 'n' );
1594   /**
1595    *     NQ is the order of Q
1596    **/
1597   if( left ) {
1598     nq = m;
1599   } else {
1600     nq = n;
1601   }
1602   if( !left && !lsame( side, 'r' ) ) {
1603     *info = -1;
1604   } else if( !notran && !lsame( trans, 't' ) ) {
1605     *info = -2;
1606   } else if( m<0 ) {
1607     *info = -3;
1608   } else if( n<0 ) {
1609     *info = -4;
1610   } else if( k<0 || k>nq ) {
1611     *info = -5;
1612   } else if( lda<max( 1, nq ) ) {
1613     *info = -7;
1614   } else if( ldc<max( 1, m ) ) {
1615     *info = -10;
1616   }
1617   if( *info!=0 ) {
1618     xerbla( "dorm2r", -*info );
1619     return;
1620   }
1621   /**
1622    *     Quick return if possible
1623    **/
1624   if( m==0 || n==0 || k==0 )
1625     return;
1626 
1627   if( ( left && !notran ) || ( !left && notran ) )
1628     {
1629       i1 = 1;
1630       i2 = k;
1631       i3 = 1;
1632     } else {
1633       i1 = k;
1634       i2 = 1;
1635       i3 = -1;
1636     }
1637 
1638   if( left ) {
1639     ni = n;
1640     jc = 1;
1641   } else {
1642     mi = m;
1643     ic = 1;
1644   }
1645 
1646   for (i=i1 ; i3>0?i<=i2:i>=i2 ; i+=i3) {
1647     if( left ) {
1648       /**
1649        *           H(i) is applied to C(i:m,1:n)
1650        **/
1651       mi = m - i + 1;
1652       ic = i;
1653     } else {
1654       /**
1655        *           H(i) is applied to C(1:m,i:n)
1656        **/
1657       ni = n - i + 1;
1658       jc = i;
1659     }
1660     /**
1661      *        Apply H(i)
1662      **/
1663     aii = a_2( i, i );
1664     a_2( i, i ) = one;
1665     dlarf( side, mi, ni, &a_2( i, i ), 1, tau_1( i ), &c_2( ic, jc ),
1666           ldc, work );
1667     a_2( i, i ) = aii;
1668   }
1669   return;
1670   /**
1671    *     End of DORM2R
1672    **/
1673 }
1674 
1675 
1676 
dgeqrf(long m,long n,double a[],long lda,double tau[],double work[],long lwork,long * info)1677 void dgeqrf( long m, long n, double a[], long lda, double tau[], double work[], long lwork, long *info )
1678 {
1679   /**
1680    *  -- LAPACK routine (version 1.1) --
1681    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1682    *     Courant Institute, Argonne National Lab, and Rice University
1683    *     March 31, 1993
1684    *
1685    *     .. Scalar Arguments ..*/
1686   /**     ..
1687    *     .. Array Arguments ..*/
1688 #undef work_1
1689 #define work_1(a1) work[a1-1]
1690 #undef tau_1
1691 #define tau_1(a1) tau[a1-1]
1692 #undef a_2
1693 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
1694   /**     ..
1695    *
1696    *  Purpose
1697    *  =======
1698    *
1699    *  DGEQRF computes a QR factorization of a real M-by-N matrix A:
1700    *  A = Q * R.
1701    *
1702    *  Arguments
1703    *  =========
1704    *
1705    *  M       (input) INTEGER
1706    *          The number of rows of the matrix A.  M >= 0.
1707    *
1708    *  N       (input) INTEGER
1709    *          The number of columns of the matrix A.  N >= 0.
1710    *
1711    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
1712    *          On entry, the M-by-N matrix A.
1713    *          On exit, the elements on and above the diagonal of the array
1714    *          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
1715    *          upper triangular if m >= n); the elements below the diagonal,
1716    *          with the array TAU, represent the orthogonal matrix Q as a
1717    *          product of min(m,n) elementary reflectors (see Further
1718    *          Details).
1719    *
1720    *  LDA     (input) INTEGER
1721    *          The leading dimension of the array A.  LDA >= max(1,M).
1722    *
1723    *  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
1724    *          The scalar factors of the elementary reflectors (see Further
1725    *          Details).
1726    *
1727    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
1728    *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
1729    *
1730    *  LWORK   (input) INTEGER
1731    *          The dimension of the array WORK.  LWORK >= max(1,N).
1732    *          For optimum performance LWORK >= N*NB, where NB is
1733    *          the optimal blocksize.
1734    *
1735    *  INFO    (output) INTEGER
1736    *          = 0:  successful exit
1737    *          < 0:  if INFO = -i, the i-th argument had an illegal value
1738    *
1739    *  Further Details
1740    *  ===============
1741    *
1742    *  The matrix Q is represented as a product of elementary reflectors
1743    *
1744    *     Q = H(1) H(2) . . . H(k), where k = min(m,n).
1745    *
1746    *  Each H(i) has the form
1747    *
1748    *     H(i) = I - tau * v * v'
1749    *
1750    *  where tau is a real scalar, and v is a real vector with
1751    *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
1752    *  and tau in TAU(i).
1753    *
1754    *  =====================================================================
1755    *
1756    *     .. Local Scalars ..*/
1757   long            i, ib, iinfo, iws, k, ldwork=0, nb, nbmin, nx;
1758   /**     ..
1759    *     .. Intrinsic Functions ..*/
1760   /*      intrinsic          max, min;*/
1761   /**     ..
1762    *     .. Executable Statements ..
1763    *
1764    *     Test the input arguments
1765    **/
1766   /*-----implicit-declarations-----*/
1767   /*-----end-of-declarations-----*/
1768   *info = 0;
1769   if( m<0 ) {
1770     *info = -1;
1771   } else if( n<0 ) {
1772     *info = -2;
1773   } else if( lda<max( 1, m ) ) {
1774     *info = -4;
1775   } else if( lwork<max( 1, n ) ) {
1776     *info = -7;
1777   }
1778   if( *info!=0 ) {
1779     xerbla( "dgeqrf", -*info );
1780     return;
1781   }
1782   /**
1783    *     Quick return if possible
1784    **/
1785   k = min( m, n );
1786   if( k==0 ) {
1787     work_1( 1 ) = 1;
1788     return;
1789   }
1790   /**
1791    *     Determine the block size.
1792    **/
1793   nb = ilaenv( 1, "dgeqrf", " ", m, n, -1, -1 );
1794   nbmin = 2;
1795   nx = 0;
1796   iws = n;
1797   if( nb>1 && nb<k ) {
1798     /**
1799      *        Determine when to cross over from blocked to unblocked code.
1800      **/
1801     nx = max( 0, ilaenv( 3, "dgeqrf", " ", m, n, -1, -1 ) );
1802     if( nx<k ) {
1803       /**
1804        *           Determine if workspace is large enough for blocked code.
1805        **/
1806       ldwork = n;
1807       iws = ldwork*nb;
1808       if( lwork<iws ) {
1809         /**
1810          *              Not enough workspace to use optimal NB:  reduce NB and
1811          *              determine the minimum value of NB.
1812          **/
1813         nb = lwork / ldwork;
1814         nbmin = max( 2, ilaenv( 2, "dgeqrf", " ", m, n, -1,
1815                                -1 ) );
1816       }
1817     }
1818   }
1819 
1820   if( nb>=nbmin && nb<k && nx<k ) {
1821     /**
1822      *        Use blocked code initially
1823      **/
1824     for (i=1 ; nb>0?i<=k - nx:i>=k - nx ; i+=nb) {
1825       ib = min( k-i+1, nb );
1826       /**
1827        *           Compute the QR factorization of the current block
1828        *           A(i:m,i:i+ib-1)
1829        **/
1830       dgeqr2( m-i+1, ib, &a_2( i, i ), lda, &tau_1( i ), work,
1831              &iinfo );
1832       if( i+ib<=n ) {
1833         /**
1834          *              Form the triangular factor of the block reflector
1835          *              H = H(i) H(i+1) . . . H(i+ib-1)
1836          **/
1837         dlarft( 'f'/*orward*/, 'c'/*olumnwise*/, m-i+1, ib,
1838                &a_2( i, i ), lda, &tau_1( i ), work, ldwork );
1839         /**
1840          *              Apply H' to A(i:m,i+ib:n) from the left
1841          **/
1842         dlarfb( 'l'/*eft*/, 't'/*ranspose*/, 'f'/*orward*/,
1843                'c'/*olumnwise*/, m-i+1, n-i-ib+1, ib,
1844                &a_2( i, i ), lda, work, ldwork, &a_2( i, i+ib ),
1845                lda, &work_1( ib+1 ), ldwork );
1846       }
1847     }
1848   } else {
1849     i = 1;
1850   }
1851   /**
1852    *     Use unblocked code to factor the last or only block.
1853    **/
1854   if( i<=k )
1855     dgeqr2( m-i+1, n-i+1, &a_2( i, i ), lda, &tau_1( i ), work,
1856            &iinfo );
1857 
1858   work_1( 1 ) = iws;
1859   return;
1860   /**
1861    *     End of DGEQRF
1862    **/
1863 }
1864 
1865 
1866 
dlarfb(char side,char trans,char direct,char storev,long m,long n,long k,double v[],long ldv,double t[],long ldt,double c[],long ldc,double work[],long ldwork)1867 void dlarfb( char side, char trans, char direct, char storev,
1868             long m, long n, long k, double v[], long ldv,
1869             double t[], long ldt, double c[], long ldc,
1870             double work[], long ldwork )
1871 {
1872   /**
1873    *  -- LAPACK auxiliary routine (version 1.1) --
1874    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
1875    *     Courant Institute, Argonne National Lab, and Rice University
1876    *     February 29, 1992
1877    *
1878    *     .. Scalar Arguments ..*/
1879   /**     ..
1880    *     .. Array Arguments ..*/
1881 #undef work_2
1882 #define work_2(a1,a2) work[a1-1+ldwork*(a2-1)]
1883 #undef v_2
1884 #define v_2(a1,a2) v[a1-1+ldv*(a2-1)]
1885 #undef t_2
1886 #define t_2(a1,a2) t[a1-1+ldt*(a2-1)]
1887 #undef c_2
1888 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
1889   /**     ..
1890    *
1891    *  Purpose
1892    *  =======
1893    *
1894    *  DLARFB applies a real block reflector H or its transpose H' to a
1895    *  real m by n matrix C, from either the left or the right.
1896    *
1897    *  Arguments
1898    *  =========
1899    *
1900    *  SIDE    (input) CHARACTER*1
1901    *          = 'L': apply H or H' from the Left
1902    *          = 'R': apply H or H' from the Right
1903    *
1904    *  TRANS   (input) CHARACTER*1
1905    *          = 'N': apply H (No transpose)
1906    *          = 'T': apply H' (Transpose)
1907    *
1908    *  DIRECT  (input) CHARACTER*1
1909    *          Indicates how H is formed from a product of elementary
1910    *          reflectors
1911    *          = 'F': H = H(1) H(2) . . . H(k) (Forward)
1912    *          = 'B': H = H(k) . . . H(2) H(1) (Backward)
1913    *
1914    *  STOREV  (input) CHARACTER*1
1915    *          Indicates how the vectors which define the elementary
1916    *          reflectors are stored:
1917    *          = 'C': Columnwise
1918    *          = 'R': Rowwise
1919    *
1920    *  M       (input) INTEGER
1921    *          The number of rows of the matrix C.
1922    *
1923    *  N       (input) INTEGER
1924    *          The number of columns of the matrix C.
1925    *
1926    *  K       (input) INTEGER
1927    *          The order of the matrix T (= the number of elementary
1928    *          reflectors whose product defines the block reflector).
1929    *
1930    *  V       (input) DOUBLE PRECISION array, dimension
1931    *                                (LDV,K) if STOREV = 'C'
1932    *                                (LDV,M) if STOREV = 'R' and SIDE = 'L'
1933    *                                (LDV,N) if STOREV = 'R' and SIDE = 'R'
1934    *          The matrix V. See further details.
1935    *
1936    *  LDV     (input) INTEGER
1937    *          The leading dimension of the array V.
1938    *          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
1939    *          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
1940    *          if STOREV = 'R', LDV >= K.
1941    *
1942    *  T       (input) DOUBLE PRECISION array, dimension (LDT,K)
1943    *          The triangular k by k matrix T in the representation of the
1944    *          block reflector.
1945    *
1946    *  LDT     (input) INTEGER
1947    *          The leading dimension of the array T. LDT >= K.
1948    *
1949    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
1950    *          On entry, the m by n matrix C.
1951    *          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
1952    *
1953    *  LDC     (input) INTEGER
1954    *          The leading dimension of the array C. LDA >= max(1,M).
1955    *
1956    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
1957    *
1958    *  LDWORK  (input) INTEGER
1959    *          The leading dimension of the array WORK.
1960    *          If SIDE = 'L', LDWORK >= max(1,N);
1961    *          if SIDE = 'R', LDWORK >= max(1,M).
1962    *
1963    *  =====================================================================
1964    *
1965    *     .. Parameters ..*/
1966 #undef one
1967 #define one 1.0e+0
1968   /**     ..
1969    *     .. Local Scalars ..*/
1970   enum CBLAS_TRANSPOSE transt, transf;
1971   long            i, j;
1972   /**     ..
1973    *     .. Executable Statements ..
1974    *
1975    *     Quick return if possible
1976    **/
1977   /*-----implicit-declarations-----*/
1978   /*-----end-of-declarations-----*/
1979   if( m<=0 || n<=0 )
1980     return;
1981 
1982   if( lsame( trans, 'n' ) ) {
1983     transf = CblasNoTrans;
1984     transt = CblasTrans;
1985   } else {
1986     transf = CblasTrans;
1987     transt = CblasNoTrans;
1988   }
1989 
1990   if( lsame( storev, 'c' ) ) {
1991 
1992     if( lsame( direct, 'f' ) ) {
1993       /**
1994        *           Let  V =  ( V1 )    (first K rows)
1995        *                     ( V2 )
1996        *           where  V1  is unit lower triangular.
1997        **/
1998       if( lsame( side, 'l' ) ) {
1999         /**
2000          *              Form  H * C  or  H' * C  where  C = ( C1 )
2001          *                                                  ( C2 )
2002          *
2003          *              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
2004          *
2005          *              W := C1'
2006          **/
2007         for (j=1 ; j<=k ; j+=1) {
2008           cblas_dcopy( n, &c_2( j, 1 ), ldc, &work_2( 1, j ), 1 );
2009         }
2010         /**
2011          *              W := W * V1
2012          **/
2013         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2014                     CblasUnit, n, k, one, v, ldv, work, ldwork );
2015         if( m>k ) {
2016           /**
2017            *                 W := W + C2'*V2
2018            **/
2019           cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, k, m-k,
2020                       one, &c_2( k+1, 1 ), ldc, &v_2( k+1, 1 ), ldv,
2021                       one, work, ldwork );
2022         }
2023         /**
2024          *              W := W * T'  or  W * T
2025          **/
2026         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transt,
2027                     CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2028         /**
2029          *              C := C - V * W'
2030          **/
2031         if( m>k ) {
2032           /**
2033            *                 C2 := C2 - V2 * W'
2034            **/
2035           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m-k, n, k,
2036                       -one, &v_2( k+1, 1 ), ldv, work, ldwork, one,
2037                       &c_2( k+1, 1 ), ldc );
2038         }
2039         /**
2040          *              W := W * V1'
2041          **/
2042         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower,  CblasTrans,
2043                     CblasUnit, n, k, one, v, ldv, work, ldwork );
2044         /**
2045          *              C1 := C1 - W'
2046          **/
2047         for (j=1 ; j<=k ; j+=1) {
2048           for (i=1 ; i<=n ; i+=1) {
2049             c_2( j, i ) = c_2( j, i ) - work_2( i, j );
2050           }
2051         }
2052 
2053       } else if( lsame( side, 'r' ) ) {
2054         /**
2055          *              Form  C * H  or  C * H'  where  C = ( C1  C2 )
2056          *
2057          *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
2058          *
2059          *              W := C1
2060          **/
2061         for (j=1 ; j<=k ; j+=1) {
2062           cblas_dcopy( m, &c_2( 1, j ), 1, &work_2( 1, j ), 1 );
2063         }
2064         /**
2065          *              W := W * V1
2066          **/
2067         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2068                     CblasUnit, m, k, one, v, ldv, work, ldwork );
2069         if( n>k ) {
2070           /**
2071            *                 W := W + C2 * V2
2072            **/
2073           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, n-k,
2074                       one, &c_2( 1, k+1 ), ldc, &v_2( k+1, 1 ), ldv,
2075                       one, work, ldwork );
2076         }
2077         /**
2078          *              W := W * T  or  W * T'
2079          **/
2080         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transf,
2081                     CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2082         /**
2083          *              C := C - W * V'
2084          **/
2085         if( n>k ) {
2086           /**
2087            *                 C2 := C2 - W * V2'
2088            **/
2089           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, n-k, k,
2090                       -one, work, ldwork, &v_2( k+1, 1 ), ldv, one,
2091                       &c_2( 1, k+1 ), ldc );
2092         }
2093         /**
2094          *              W := W * V1'
2095          **/
2096         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2097                     CblasUnit, m, k, one, v, ldv, work, ldwork );
2098         /**
2099          *              C1 := C1 - W
2100          **/
2101         for (j=1 ; j<=k ; j+=1) {
2102           for (i=1 ; i<=m ; i+=1) {
2103             c_2( i, j ) = c_2( i, j ) - work_2( i, j );
2104           }
2105         }
2106       }
2107 
2108     } else {
2109       /**
2110        *           Let  V =  ( V1 )
2111        *                     ( V2 )    (last K rows)
2112        *           where  V2  is unit upper triangular.
2113        **/
2114       if( lsame( side, 'l' ) ) {
2115         /**
2116          *              Form  H * C  or  H' * C  where  C = ( C1 )
2117          *                                                  ( C2 )
2118          *
2119          *              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
2120          *
2121          *              W := C2'
2122          **/
2123         for (j=1 ; j<=k ; j+=1) {
2124           cblas_dcopy( n, &c_2( m-k+j, 1 ), ldc, &work_2( 1, j ), 1 );
2125         }
2126         /**
2127          *              W := W * V2
2128          **/
2129         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2130                     CblasUnit, n, k, one, &v_2( m-k+1, 1 ), ldv,
2131                     work, ldwork );
2132         if( m>k ) {
2133           /**
2134            *                 W := W + C1'*V1
2135            **/
2136           cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, n, k, m-k,
2137                       one, c, ldc, v, ldv, one, work, ldwork );
2138         }
2139         /**
2140          *              W := W * T'  or  W * T
2141          **/
2142         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transt,
2143                     CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2144         /**
2145          *              C := C - V * W'
2146          **/
2147         if( m>k ) {
2148           /**
2149            *                 C1 := C1 - V1 * W'
2150            **/
2151           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m-k, n, k,
2152                       -one, v, ldv, work, ldwork, one, c, ldc );
2153         }
2154         /**
2155          *              W := W * V2'
2156          **/
2157         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2158                     CblasUnit, n, k, one, &v_2( m-k+1, 1 ), ldv,
2159                     work, ldwork );
2160         /**
2161          *              C2 := C2 - W'
2162          **/
2163         for (j=1 ; j<=k ; j+=1) {
2164           for (i=1 ; i<=n ; i+=1) {
2165             c_2( m-k+j, i ) = c_2( m-k+j, i ) - work_2( i, j );
2166           }
2167         }
2168 
2169       } else if( lsame( side, 'r' ) ) {
2170         /**
2171          *              Form  C * H  or  C * H'  where  C = ( C1  C2 )
2172          *
2173          *              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
2174          *
2175          *              W := C2
2176          **/
2177         for (j=1 ; j<=k ; j+=1) {
2178           cblas_dcopy( m, &c_2( 1, n-k+j ), 1, &work_2( 1, j ), 1 );
2179         }
2180         /**
2181          *              W := W * V2
2182          **/
2183         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2184                     CblasUnit, m, k, one, &v_2( n-k+1, 1 ), ldv,
2185                     work, ldwork );
2186         if( n>k ) {
2187           /**
2188            *                 W := W + C1 * V1
2189            **/
2190           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, k, n-k,
2191                       one, c, ldc, v, ldv, one, work, ldwork );
2192         }
2193         /**
2194          *              W := W * T  or  W * T'
2195          **/
2196         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transf,
2197                     CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2198         /**
2199          *              C := C - W * V'
2200          **/
2201         if( n>k ) {
2202           /**
2203            *                 C1 := C1 - W * V1'
2204            **/
2205           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, n-k, k,
2206                       -one, work, ldwork, v, ldv, one, c, ldc );
2207         }
2208         /**
2209          *              W := W * V2'
2210          **/
2211         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2212                     CblasUnit, m, k, one, &v_2( n-k+1, 1 ), ldv,
2213                     work, ldwork );
2214         /**
2215          *              C2 := C2 - W
2216          **/
2217         for (j=1 ; j<=k ; j+=1) {
2218           for (i=1 ; i<=m ; i+=1) {
2219             c_2( i, n-k+j ) = c_2( i, n-k+j ) - work_2( i, j );
2220           }
2221         }
2222       }
2223     }
2224 
2225   } else if( lsame( storev, 'r' ) ) {
2226 
2227     if( lsame( direct, 'f' ) ) {
2228       /**
2229        *           Let  V =  ( V1  V2 )    (V1: first K columns)
2230        *           where  V1  is unit upper triangular.
2231        **/
2232       if( lsame( side, 'l' ) ) {
2233         /**
2234          *              Form  H * C  or  H' * C  where  C = ( C1 )
2235          *                                                  ( C2 )
2236          *
2237          *              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
2238          *
2239          *              W := C1'
2240          **/
2241         for (j=1 ; j<=k ; j+=1) {
2242           cblas_dcopy( n, &c_2( j, 1 ), ldc, &work_2( 1, j ), 1 );
2243         }
2244         /**
2245          *              W := W * V1'
2246          **/
2247         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2248                     CblasUnit, n, k, one, v, ldv, work, ldwork );
2249         if( m>k ) {
2250           /**
2251            *                 W := W + C2'*V2'
2252            **/
2253           cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, n, k, m-k, one,
2254                       &c_2( k+1, 1 ), ldc, &v_2( 1, k+1 ), ldv, one,
2255                       work, ldwork );
2256         }
2257         /**
2258          *              W := W * T'  or  W * T
2259          **/
2260         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transt,
2261                     CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2262         /**
2263          *              C := C - V' * W'
2264          **/
2265         if( m>k ) {
2266           /**
2267            *                 C2 := C2 - V2' * W'
2268            **/
2269           cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m-k, n, k, -one,
2270                       &v_2( 1, k+1 ), ldv, work, ldwork, one,
2271                       &c_2( k+1, 1 ), ldc );
2272         }
2273         /**
2274          *              W := W * V1
2275          **/
2276         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2277                     CblasUnit, n, k, one, v, ldv, work, ldwork );
2278         /**
2279          *              C1 := C1 - W'
2280          **/
2281         for (j=1 ; j<=k ; j+=1) {
2282           for (i=1 ; i<=n ; i+=1) {
2283             c_2( j, i ) = c_2( j, i ) - work_2( i, j );
2284           }
2285         }
2286 
2287       } else if( lsame( side, 'r' ) ) {
2288         /**
2289          *              Form  C * H  or  C * H'  where  C = ( C1  C2 )
2290          *
2291          *              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
2292          *
2293          *              W := C1
2294          **/
2295         for (j=1 ; j<=k ; j+=1) {
2296           cblas_dcopy( m, &c_2( 1, j ), 1, &work_2( 1, j ), 1 );
2297         }
2298         /**
2299          *              W := W * V1'
2300          **/
2301         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasTrans,
2302                     CblasUnit, m, k, one, v, ldv, work, ldwork );
2303         if( n>k ) {
2304           /**
2305            *                 W := W + C2 * V2'
2306            **/
2307           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n-k,
2308                       one, &c_2( 1, k+1 ), ldc, &v_2( 1, k+1 ), ldv,
2309                       one, work, ldwork );
2310         }
2311         /**
2312          *              W := W * T  or  W * T'
2313          **/
2314         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, transf,
2315                     CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2316         /**
2317          *              C := C - W * V
2318          **/
2319         if( n>k ) {
2320           /**
2321            *                 C2 := C2 - W * V2
2322            **/
2323           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n-k, k,
2324                       -one, work, ldwork, &v_2( 1, k+1 ), ldv, one,
2325                       &c_2( 1, k+1 ), ldc );
2326         }
2327         /**
2328          *              W := W * V1
2329          **/
2330         cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans,
2331                     CblasUnit, m, k, one, v, ldv, work, ldwork );
2332         /**
2333          *              C1 := C1 - W
2334          **/
2335         for (j=1 ; j<=k ; j+=1) {
2336           for (i=1 ; i<=m ; i+=1) {
2337             c_2( i, j ) = c_2( i, j ) - work_2( i, j );
2338           }
2339         }
2340 
2341       }
2342 
2343     } else {
2344       /**
2345        *           Let  V =  ( V1  V2 )    (V2: last K columns)
2346        *           where  V2  is unit lower triangular.
2347        **/
2348       if( lsame( side, 'l' ) ) {
2349         /**
2350          *              Form  H * C  or  H' * C  where  C = ( C1 )
2351          *                                                  ( C2 )
2352          *
2353          *              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
2354          *
2355          *              W := C2'
2356          **/
2357         for (j=1 ; j<=k ; j+=1) {
2358           cblas_dcopy( n, &c_2( m-k+j, 1 ), ldc, &work_2( 1, j ), 1 );
2359         }
2360         /**
2361          *              W := W * V2'
2362          **/
2363         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2364                     CblasUnit, n, k, one, &v_2( 1, m-k+1 ), ldv,
2365                     work, ldwork );
2366         if( m>k ) {
2367           /**
2368            *                 W := W + C1'*V1'
2369            **/
2370           cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, n, k, m-k, one,
2371                       c, ldc, v, ldv, one, work, ldwork );
2372         }
2373         /**
2374          *              W := W * T'  or  W * T
2375          **/
2376         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transt,
2377                     CblasNonUnit, n, k, one, t, ldt, work, ldwork );
2378         /**
2379          *              C := C - V' * W'
2380          **/
2381         if( m>k ) {
2382           /**
2383            *                 C1 := C1 - V1' * W'
2384            **/
2385           cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m-k, n, k, -one,
2386                       v, ldv, work, ldwork, one, c, ldc );
2387         }
2388         /**
2389          *              W := W * V2
2390          **/
2391         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2392                     CblasUnit, n, k, one, &v_2( 1, m-k+1 ), ldv,
2393                     work, ldwork );
2394         /**
2395          *              C2 := C2 - W'
2396          **/
2397         for (j=1 ; j<=k ; j+=1) {
2398           for (i=1 ; i<=n ; i+=1) {
2399             c_2( m-k+j, i ) = c_2( m-k+j, i ) - work_2( i, j );
2400           }
2401         }
2402 
2403       } else if( lsame( side, 'r' ) ) {
2404         /**
2405          *              Form  C * H  or  C * H'  where  C = ( C1  C2 )
2406          *
2407          *              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
2408          *
2409          *              W := C2
2410          **/
2411         for (j=1 ; j<=k ; j+=1) {
2412           cblas_dcopy( m, &c_2( 1, n-k+j ), 1, &work_2( 1, j ), 1 );
2413         }
2414         /**
2415          *              W := W * V2'
2416          **/
2417         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans,
2418                     CblasUnit, m, k, one, &v_2( 1, n-k+1 ), ldv,
2419                     work, ldwork );
2420         if( n>k ) {
2421           /**
2422            *                 W := W + C1 * V1'
2423            **/
2424           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, m, k, n-k,
2425                       one, c, ldc, v, ldv, one, work, ldwork );
2426         }
2427         /**
2428          *              W := W * T  or  W * T'
2429          **/
2430         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, transf,
2431                     CblasNonUnit, m, k, one, t, ldt, work, ldwork );
2432         /**
2433          *              C := C - W * V
2434          **/
2435         if( n>k ) {
2436           /**
2437            *                 C1 := C1 - W * V1
2438            **/
2439           cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n-k, k,
2440                       -one, work, ldwork, v, ldv, one, c, ldc );
2441         }
2442         /**
2443          *              W := W * V2
2444          **/
2445         cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans,
2446                     CblasUnit, m, k, one, &v_2( 1, n-k+1 ), ldv,
2447                     work, ldwork );
2448         /**
2449          *              C1 := C1 - W
2450          **/
2451         for (j=1 ; j<=k ; j+=1) {
2452           for (i=1 ; i<=m ; i+=1) {
2453             c_2( i, n-k+j ) = c_2( i, n-k+j ) - work_2( i, j );
2454           }
2455         }
2456 
2457       }
2458 
2459     }
2460   }
2461 
2462   return;
2463   /**
2464    *     End of DLARFB
2465    **/
2466 }
2467 
2468 
2469 
dlarft(char direct,char storev,long n,long k,double v[],long ldv,double tau[],double t[],long ldt)2470 void dlarft( char direct, char storev, long n, long k,
2471             double v[], long ldv, double tau[], double t[], long ldt )
2472 {
2473   /**
2474    *  -- LAPACK auxiliary routine (version 1.1) --
2475    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2476    *     Courant Institute, Argonne National Lab, and Rice University
2477    *     February 29, 1992
2478    *
2479    *     .. Scalar Arguments ..*/
2480   /**     ..
2481    *     .. Array Arguments ..*/
2482 #undef v_2
2483 #define v_2(a1,a2) v[a1-1+ldv*(a2-1)]
2484 #undef tau_1
2485 #define tau_1(a1) tau[a1-1]
2486 #undef t_2
2487 #define t_2(a1,a2) t[a1-1+ldt*(a2-1)]
2488   /**     ..
2489    *
2490    *  Purpose
2491    *  =======
2492    *
2493    *  DLARFT forms the triangular factor T of a real block reflector H
2494    *  of order n, which is defined as a product of k elementary reflectors.
2495    *
2496    *  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
2497    *
2498    *  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
2499    *
2500    *  If STOREV = 'C', the vector which defines the elementary reflector
2501    *  H(i) is stored in the i-th column of the array V, and
2502    *
2503    *     H  =  I - V * T * V'
2504    *
2505    *  If STOREV = 'R', the vector which defines the elementary reflector
2506    *  H(i) is stored in the i-th row of the array V, and
2507    *
2508    *     H  =  I - V' * T * V
2509    *
2510    *  Arguments
2511    *  =========
2512    *
2513    *  DIRECT  (input) CHARACTER*1
2514    *          Specifies the order in which the elementary reflectors are
2515    *          multiplied to form the block reflector:
2516    *          = 'F': H = H(1) H(2) . . . H(k) (Forward)
2517    *          = 'B': H = H(k) . . . H(2) H(1) (Backward)
2518    *
2519    *  STOREV  (input) CHARACTER*1
2520    *          Specifies how the vectors which define the elementary
2521    *          reflectors are stored (see also Further Details):
2522    *          = 'C': columnwise
2523    *          = 'R': rowwise
2524    *
2525    *  N       (input) INTEGER
2526    *          The order of the block reflector H. N >= 0.
2527    *
2528    *  K       (input) INTEGER
2529    *          The order of the triangular factor T (= the number of
2530    *          elementary reflectors). K >= 1.
2531    *
2532    *  V       (input/output) DOUBLE PRECISION array, dimension
2533    *                               (LDV,K) if STOREV = 'C'
2534    *                               (LDV,N) if STOREV = 'R'
2535    *          The matrix V. See further details.
2536    *
2537    *  LDV     (input) INTEGER
2538    *          The leading dimension of the array V.
2539    *          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
2540    *
2541    *  TAU     (input) DOUBLE PRECISION array, dimension (K)
2542    *          TAU(i) must contain the scalar factor of the elementary
2543    *          reflector H(i).
2544    *
2545    *  T       (output) DOUBLE PRECISION array, dimension (LDT,K)
2546    *          The k by k triangular factor T of the block reflector.
2547    *          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
2548    *          lower triangular. The rest of the array is not used.
2549    *
2550    *  LDT     (input) INTEGER
2551    *          The leading dimension of the array T. LDT >= K.
2552    *
2553    *  Further Details
2554    *  ===============
2555    *
2556    *  The shape of the matrix V and the storage of the vectors which define
2557    *  the H(i) is best illustrated by the following example with n = 5 and
2558    *  k = 3. The elements equal to 1 are not stored; the corresponding
2559    *  array elements are modified but restored on exit. The rest of the
2560    *  array is not used.
2561    *
2562    *  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
2563    *
2564    *               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
2565    *                   ( v1  1    )                     (     1 v2 v2 v2 )
2566    *                   ( v1 v2  1 )                     (        1 v3 v3 )
2567    *                   ( v1 v2 v3 )
2568    *                   ( v1 v2 v3 )
2569    *
2570    *  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
2571    *
2572    *               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
2573    *                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
2574    *                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
2575    *                   (     1 v3 )
2576    *                   (        1 )
2577    *
2578    *  =====================================================================
2579    *
2580    *     .. Parameters ..*/
2581 #undef one
2582 #define one 1.0e+0
2583 #undef zero
2584 #define zero 0.0e+0
2585   /**     ..
2586    *     .. Local Scalars ..*/
2587   long            i, j;
2588   double    vii;
2589   /**     ..
2590    *     .. Executable Statements ..
2591    *
2592    *     Quick return if possible
2593    **/
2594   /*-----implicit-declarations-----*/
2595   /*-----end-of-declarations-----*/
2596   if( n==0 )
2597     return;
2598 
2599   if( lsame( direct, 'f' ) ) {
2600     for (i=1 ; i<=k ; i+=1) {
2601       if( tau_1( i )==zero ) {
2602         /**
2603          *              H(i)  =  I
2604          **/
2605         for (j=1 ; j<=i ; j+=1) {
2606           t_2( j, i ) = zero;
2607         }
2608       } else {
2609         /**
2610          *              general case
2611          **/
2612         vii = v_2( i, i );
2613         v_2( i, i ) = one;
2614         if( lsame( storev, 'c' ) ) {
2615           /**
2616            *                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
2617            **/
2618           cblas_dgemv(CblasColMajor, CblasTrans, n-i+1, i-1, -tau_1( i ),
2619                       &v_2( i, 1 ), ldv, &v_2( i, i ), 1, zero,
2620                       &t_2( 1, i ), 1 );
2621         } else {
2622           /**
2623            *                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
2624            **/
2625           cblas_dgemv(CblasColMajor, CblasNoTrans, i-1, n-i+1,
2626                       -tau_1( i ), &v_2( 1, i ), ldv, &v_2( i, i ), ldv, zero,
2627                       &t_2( 1, i ), 1 );
2628         }
2629         v_2( i, i ) = vii;
2630         /**
2631          *              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
2632          **/
2633         cblas_dtrmv(CblasColMajor, CblasUpper, CblasNoTrans, CblasNonUnit,
2634                     i-1, t, ldt, &t_2( 1, i ), 1 );
2635         t_2( i, i ) = tau_1( i );
2636       }
2637     }
2638   } else {
2639     for (i=k ; i>=1 ; i+=-1) {
2640       if( tau_1( i )==zero ) {
2641         /**
2642          *              H(i)  =  I
2643          **/
2644         for (j=i ; j<=k ; j+=1) {
2645           t_2( j, i ) = zero;
2646         }
2647       } else {
2648         /**
2649          *              general case
2650          **/
2651         if( i<k ) {
2652           if( lsame( storev, 'c' ) ) {
2653             vii = v_2( n-k+i, i );
2654             v_2( n-k+i, i ) = one;
2655             /**
2656              *             T(i+1:k,i) :=
2657              *                   - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
2658              **/
2659             cblas_dgemv(CblasColMajor, CblasTrans, n-k+i, k-i,
2660                         -tau_1( i ), &v_2( 1, i+1 ), ldv, &v_2( 1, i ),
2661                         1, zero, &t_2( i+1, i ), 1 );
2662             v_2( n-k+i, i ) = vii;
2663           } else {
2664             vii = v_2( i, n-k+i );
2665             v_2( i, n-k+i ) = one;
2666             /**
2667              *         T(i+1:k,i) :=
2668              *                  - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
2669              **/
2670             cblas_dgemv(CblasColMajor, CblasNoTrans, k-i, n-k+i,
2671                         -tau_1( i ), &v_2( i+1, 1 ), ldv, &v_2( i, 1 ), ldv,
2672                         zero, &t_2( i+1, i ), 1 );
2673             v_2( i, n-k+i ) = vii;
2674           }
2675           /**
2676            *                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
2677            **/
2678           cblas_dtrmv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit,
2679                       k-i, &t_2( i+1, i+1 ), ldt, &t_2( i+1, i ), 1 );
2680         }
2681         t_2( i, i ) = tau_1( i );
2682       }
2683     }
2684   }
2685   return;
2686   /**
2687    *     End of DLARFT
2688    **/
2689 }
2690 
2691 
2692 
dgeqr2(long m,long n,double a[],long lda,double tau[],double work[],long * info)2693 void dgeqr2( long m, long n, double a[], long lda, double tau[],
2694             double work[], long *info )
2695 {
2696   /**
2697    *  -- LAPACK routine (version 1.1) --
2698    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2699    *     Courant Institute, Argonne National Lab, and Rice University
2700    *     February 29, 1992
2701    *
2702    *     .. Scalar Arguments ..*/
2703   /**     ..
2704    *     .. Array Arguments ..*/
2705 #undef work_1
2706 #define work_1(a1) work[a1-1]
2707 #undef tau_1
2708 #define tau_1(a1) tau[a1-1]
2709 #undef a_2
2710 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
2711   /**     ..
2712    *
2713    *  Purpose
2714    *  =======
2715    *
2716    *  DGEQR2 computes a QR factorization of a real m by n matrix A:
2717    *  A = Q * R.
2718    *
2719    *  Arguments
2720    *  =========
2721    *
2722    *  M       (input) INTEGER
2723    *          The number of rows of the matrix A.  M >= 0.
2724    *
2725    *  N       (input) INTEGER
2726    *          The number of columns of the matrix A.  N >= 0.
2727    *
2728    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
2729    *          On entry, the m by n matrix A.
2730    *          On exit, the elements on and above the diagonal of the array
2731    *          contain the min(m,n) by n upper trapezoidal matrix R (R is
2732    *          upper triangular if m >= n); the elements below the diagonal,
2733    *          with the array TAU, represent the orthogonal matrix Q as a
2734    *          product of elementary reflectors (see Further Details).
2735    *
2736    *  LDA     (input) INTEGER
2737    *          The leading dimension of the array A.  LDA >= max(1,M).
2738    *
2739    *  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
2740    *          The scalar factors of the elementary reflectors (see Further
2741    *          Details).
2742    *
2743    *  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
2744    *
2745    *  INFO    (output) INTEGER
2746    *          = 0: successful exit
2747    *          < 0: if INFO = -i, the i-th argument had an illegal value
2748    *
2749    *  Further Details
2750    *  ===============
2751    *
2752    *  The matrix Q is represented as a product of elementary reflectors
2753    *
2754    *     Q = H(1) H(2) . . . H(k), where k = min(m,n).
2755    *
2756    *  Each H(i) has the form
2757    *
2758    *     H(i) = I - tau * v * v'
2759    *
2760    *  where tau is a real scalar, and v is a real vector with
2761    *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
2762    *  and tau in TAU(i).
2763    *
2764    *  =====================================================================
2765    *
2766    *     .. Parameters ..*/
2767 #undef one
2768 #define one 1.0e+0
2769   /**     ..
2770    *     .. Local Scalars ..*/
2771   long            i, k;
2772   double    aii;
2773   /**     ..
2774    *     .. Intrinsic Functions ..*/
2775   /*      intrinsic          max, min;*/
2776   /**     ..
2777    *     .. Executable Statements ..
2778    *
2779    *     Test the input arguments
2780    **/
2781   /*-----implicit-declarations-----*/
2782   /*-----end-of-declarations-----*/
2783   *info = 0;
2784   if( m<0 ) {
2785     *info = -1;
2786   } else if( n<0 ) {
2787     *info = -2;
2788   } else if( lda<max( 1, m ) ) {
2789     *info = -4;
2790   }
2791   if( *info!=0 ) {
2792     xerbla( "dgeqr2", -*info );
2793     return;
2794   }
2795 
2796   k = min( m, n );
2797 
2798   for (i=1 ; i<=k ; i+=1) {
2799     /**
2800      *        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
2801      **/
2802     dlarfg( m-i+1, &a_2( i, i ), &a_2( min( i+1, m ), i ), 1,
2803            &tau_1( i ) );
2804     if( i<n ) {
2805       /**
2806        *           Apply H(i) to A(i:m,i+1:n) from the left
2807        **/
2808       aii = a_2( i, i );
2809       a_2( i, i ) = one;
2810       dlarf( 'l'/*eft*/, m-i+1, n-i, &a_2( i, i ), 1, tau_1( i ),
2811             &a_2( i, i+1 ), lda, work );
2812       a_2( i, i ) = aii;
2813     }
2814   }
2815   return;
2816   /**
2817    *     End of DGEQR2
2818    **/
2819 }
2820 
2821 
2822 
dlarf(char side,long m,long n,double v[],long incv,double tau,double c[],long ldc,double work[])2823 void dlarf( char side, long m, long n, double v[], long incv,
2824            double tau, double c[], long ldc, double work[] )
2825 {
2826   /**
2827    *  -- LAPACK auxiliary routine (version 1.1) --
2828    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2829    *     Courant Institute, Argonne National Lab, and Rice University
2830    *     February 29, 1992
2831    *
2832    *     .. Scalar Arguments ..*/
2833   /**     ..
2834    *     .. Array Arguments ..*/
2835 #undef work_1
2836 #define work_1(a1) work[a1-1]
2837 #undef v_1
2838 #define v_1(a1) v[a1-1]
2839 #undef c_2
2840 #define c_2(a1,a2) c[a1-1+ldc*(a2-1)]
2841   /**     ..
2842    *
2843    *  Purpose
2844    *  =======
2845    *
2846    *  DLARF applies a real elementary reflector H to a real m by n matrix
2847    *  C, from either the left or the right. H is represented in the form
2848    *
2849    *        H = I - tau * v * v'
2850    *
2851    *  where tau is a real scalar and v is a real vector.
2852    *
2853    *  If tau = 0, then H is taken to be the unit matrix.
2854    *
2855    *  Arguments
2856    *  =========
2857    *
2858    *  SIDE    (input) CHARACTER*1
2859    *          = 'L': form  H * C
2860    *          = 'R': form  C * H
2861    *
2862    *  M       (input) INTEGER
2863    *          The number of rows of the matrix C.
2864    *
2865    *  N       (input) INTEGER
2866    *          The number of columns of the matrix C.
2867    *
2868    *  V       (input) DOUBLE PRECISION array, dimension
2869    *                     (1 + (M-1)*abs(INCV)) if SIDE = 'L'
2870    *                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
2871    *          The vector v in the representation of H. V is not used if
2872    *          TAU = 0.
2873    *
2874    *  INCV    (input) INTEGER
2875    *          The increment between elements of v. INCV <> 0.
2876    *
2877    *  TAU     (input) DOUBLE PRECISION
2878    *          The value tau in the representation of H.
2879    *
2880    *  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
2881    *          On entry, the m by n matrix C.
2882    *          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
2883    *          or C * H if SIDE = 'R'.
2884    *
2885    *  LDC     (input) INTEGER
2886    *          The leading dimension of the array C. LDC >= max(1,M).
2887    *
2888    *  WORK    (workspace) DOUBLE PRECISION array, dimension
2889    *                         (N) if SIDE = 'L'
2890    *                      or (M) if SIDE = 'R'
2891    *
2892    *  =====================================================================
2893    *
2894    *     .. Parameters ..*/
2895 #undef one
2896 #define one 1.0e+0
2897 #undef zero
2898 #define zero 0.0e+0
2899   /**     ..
2900    *     .. Executable Statements ..
2901    **/
2902   /*-----implicit-declarations-----*/
2903   /*-----end-of-declarations-----*/
2904   if( lsame( side, 'l' ) ) {
2905     /**
2906      *        Form  H * C
2907      **/
2908     if( tau!=zero ) {
2909       /**
2910        *           w := C' * v
2911        **/
2912       cblas_dgemv(CblasColMajor, CblasTrans, m, n, one, c, ldc,
2913                   v, incv, zero, work, 1 );
2914       /**
2915        *           C := C - v * w'
2916        **/
2917       cblas_dger(CblasColMajor, m, n, -tau, v, incv, work, 1, c, ldc );
2918     }
2919   } else {
2920     /**
2921      *        Form  C * H
2922      **/
2923     if( tau!=zero ) {
2924       /**
2925        *           w := C * v
2926        **/
2927       cblas_dgemv(CblasColMajor, CblasNoTrans, m, n, one, c, ldc,
2928                   v, incv, zero, work, 1 );
2929       /**
2930        *           C := C - w * v'
2931        **/
2932       cblas_dger(CblasColMajor, m, n, -tau, work, 1, v, incv, c, ldc );
2933     }
2934   }
2935   return;
2936   /**
2937    *     End of DLARF
2938    **/
2939 }
2940 
2941 
2942 
dlarfg(long n,double * alpha,double x[],long incx,double * tau)2943 void dlarfg( long n, double *alpha, double x[], long incx, double *tau )
2944 {
2945   /**
2946    *  -- LAPACK auxiliary routine (version 1.1) --
2947    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
2948    *     Courant Institute, Argonne National Lab, and Rice University
2949    *     February 29, 1992
2950    *
2951    *     .. Scalar Arguments ..*/
2952   /**     ..
2953    *     .. Array Arguments ..*/
2954 #undef x_1
2955 #define x_1(a1) x[a1-1]
2956   /**     ..
2957    *
2958    *  Purpose
2959    *  =======
2960    *
2961    *  DLARFG generates a real elementary reflector H of order n, such
2962    *  that
2963    *
2964    *        H * ( alpha ) = ( beta ),   H' * H = I.
2965    *            (   x   )   (   0  )
2966    *
2967    *  where alpha and beta are scalars, and x is an (n-1)-element real
2968    *  vector. H is represented in the form
2969    *
2970    *        H = I - tau * ( 1 ) * ( 1 v' ) ,
2971    *                      ( v )
2972    *
2973    *  where tau is a real scalar and v is a real (n-1)-element
2974    *  vector.
2975    *
2976    *  If the elements of x are all zero, then tau = 0 and H is taken to be
2977    *  the unit matrix.
2978    *
2979    *  Otherwise  1 <= tau <= 2.
2980    *
2981    *  Arguments
2982    *  =========
2983    *
2984    *  N       (input) INTEGER
2985    *          The order of the elementary reflector.
2986    *
2987    *  ALPHA   (input/output) DOUBLE PRECISION
2988    *          On entry, the value alpha.
2989    *          On exit, it is overwritten with the value beta.
2990    *
2991    *  X       (input/output) DOUBLE PRECISION array, dimension
2992    *                         (1+(N-2)*abs(INCX))
2993    *          On entry, the vector x.
2994    *          On exit, it is overwritten with the vector v.
2995    *
2996    *  INCX    (input) INTEGER
2997    *          The increment between elements of X. INCX <> 0.
2998    *
2999    *  TAU     (output) DOUBLE PRECISION
3000    *          The value tau.
3001    *
3002    *  =====================================================================
3003    *
3004    *     .. Parameters ..*/
3005 #undef one
3006 #define one 1.0e+0
3007 #undef zero
3008 #define zero 0.0e+0
3009   /**     ..
3010    *     .. Local Scalars ..*/
3011   long            j, knt;
3012   double    beta, rsafmn, safmin, xnorm;
3013   /**     ..
3014    *     .. Intrinsic Functions ..*/
3015   /*      intrinsic          abs, sign;*/
3016   /**     ..
3017    *     .. Executable Statements ..
3018    **/
3019   /*-----implicit-declarations-----*/
3020   /*-----end-of-declarations-----*/
3021   if( n<=1 ) {
3022     *tau = zero;
3023     return;
3024   }
3025 
3026   xnorm = cblas_dnrm2( n-1, x, incx );
3027 
3028   if( xnorm==zero ) {
3029     /**
3030      *        H  =  I
3031      **/
3032     *tau = zero;
3033   } else {
3034     /**
3035      *        general case
3036      **/
3037     beta = -sign( dlapy2( *alpha, xnorm ), *alpha );
3038     safmin = dlamch( 's' );
3039     if( abs( beta )<safmin ) {
3040       /**
3041        *           XNORM, BETA may be inaccurate; scale X and recompute them
3042        **/
3043       rsafmn = one / safmin;
3044       knt = 0;
3045     L_10:
3046       knt = knt + 1;
3047       cblas_dscal( n-1, rsafmn, x, incx );
3048       beta = beta*rsafmn;
3049       *alpha = *alpha*rsafmn;
3050       if( abs( beta )<safmin )
3051         goto L_10;
3052       /**
3053        *           New BETA is at most 1, at least SAFMIN
3054        **/
3055       xnorm = cblas_dnrm2( n-1, x, incx );
3056       beta = -sign( dlapy2( *alpha, xnorm ), *alpha );
3057       *tau = ( beta-*alpha ) / beta;
3058       cblas_dscal( n-1, one / ( *alpha-beta ), x, incx );
3059       /**
3060        *           If ALPHA is subnormal, it may lose relative accuracy
3061        **/
3062       *alpha = beta;
3063       for (j=1 ; j<=knt ; j+=1) {
3064         *alpha = *alpha*safmin;
3065       }
3066     } else {
3067       *tau = ( beta-*alpha ) / beta;
3068       cblas_dscal( n-1, one / ( *alpha-beta ), x, incx );
3069       *alpha = beta;
3070     }
3071   }
3072 
3073   return;
3074   /**
3075    *     End of DLARFG
3076    **/
3077 }
3078 
3079 
3080 
dlapy2(double x,double y)3081 double   dlapy2( double x, double y )
3082 {
3083   /**
3084    *  -- LAPACK auxiliary routine (version 1.1) --
3085    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3086    *     Courant Institute, Argonne National Lab, and Rice University
3087    *     October 31, 1992
3088    *
3089    *     .. Scalar Arguments ..*/
3090   double dlapy2_R;
3091   /**     ..
3092    *
3093    *  Purpose
3094    *  =======
3095    *
3096    *  DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
3097    *  overflow.
3098    *
3099    *  Arguments
3100    *  =========
3101    *
3102    *  X       (input) DOUBLE PRECISION
3103    *  Y       (input) DOUBLE PRECISION
3104    *          X and Y specify the values x and y.
3105    *
3106    *  =====================================================================
3107    *
3108    *     .. Parameters ..*/
3109 #undef zero
3110 #define zero 0.0e0
3111 #undef one
3112 #define one 1.0e0
3113   /**     ..
3114    *     .. Local Scalars ..*/
3115   double    w, xabs, yabs, z;
3116   /**     ..
3117    *     .. Intrinsic Functions ..*/
3118   /*      intrinsic          abs, max, min, sqrt;*/
3119   /**     ..
3120    *     .. Executable Statements ..
3121    **/
3122   /*-----implicit-declarations-----*/
3123   /*-----end-of-declarations-----*/
3124   xabs = abs( x );
3125   yabs = abs( y );
3126   w = max( xabs, yabs );
3127   z = min( xabs, yabs );
3128   if( z==zero ) {
3129     dlapy2_R = w;
3130   } else {
3131     dlapy2_R = w*sqrt( one+( z / w )*( z / w ) );
3132   }
3133   return dlapy2_R;
3134   /**
3135    *     End of DLAPY2
3136    **/
3137 }
3138 
3139 
3140 
dlascl(char type,long kl,long ku,double cfrom,double cto,long m,long n,double a[],long lda,long * info)3141 void dlascl( char type, long kl, long ku, double cfrom, double cto,
3142             long m, long n, double a[], long lda, long *info )
3143 {
3144   /**
3145    *  -- LAPACK auxiliary routine (version 1.1) --
3146    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3147    *     Courant Institute, Argonne National Lab, and Rice University
3148    *     February 29, 1992
3149    *
3150    *     .. Scalar Arguments ..*/
3151   /**     ..
3152    *     .. Array Arguments ..*/
3153 #undef a_2
3154 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3155   /**     ..
3156    *
3157    *  Purpose
3158    *  =======
3159    *
3160    *  DLASCL multiplies the M by N real matrix A by the real scalar
3161    *  CTO/CFROM.  This is done without over/underflow as long as the final
3162    *  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
3163    *  A may be full, upper triangular, lower triangular, upper Hessenberg,
3164    *  or banded.
3165    *
3166    *  Arguments
3167    *  =========
3168    *
3169    *  TYPE    (input) CHARACTER*1
3170    *          TYPE indices the storage type of the input matrix.
3171    *          = 'G':  A is a full matrix.
3172    *          = 'L':  A is a lower triangular matrix.
3173    *          = 'U':  A is an upper triangular matrix.
3174    *          = 'H':  A is an upper Hessenberg matrix.
3175    *          = 'B':  A is a symmetric band matrix with lower bandwidth KL
3176    *                  and upper bandwidth KU and with the only the lower
3177    *                  half stored.
3178    *          = 'Q':  A is a symmetric band matrix with lower bandwidth KL
3179    *                  and upper bandwidth KU and with the only the upper
3180    *                  half stored.
3181    *          = 'Z':  A is a band matrix with lower bandwidth KL and upper
3182    *                  bandwidth KU.
3183    *
3184    *  KL      (input) INTEGER
3185    *          The lower bandwidth of A.  Referenced only if TYPE = 'B',
3186    *          'Q' or 'Z'.
3187    *
3188    *  KU      (input) INTEGER
3189    *          The upper bandwidth of A.  Referenced only if TYPE = 'B',
3190    *          'Q' or 'Z'.
3191    *
3192    *  CFROM   (input) DOUBLE PRECISION
3193    *  CTO     (input) DOUBLE PRECISION
3194    *          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
3195    *          without over/underflow if the final result CTO*A(I,J)/CFROM
3196    *          can be represented without over/underflow.  CFROM must be
3197    *          nonzero.
3198    *
3199    *  M       (input) INTEGER
3200    *          The number of rows of the matrix A.  M >= 0.
3201    *
3202    *  N       (input) INTEGER
3203    *          The number of columns of the matrix A.  N >= 0.
3204    *
3205    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
3206    *          The matrix to be multiplied by CTO/CFROM.  See TYPE for the
3207    *          storage type.
3208    *
3209    *  LDA     (input) INTEGER
3210    *          The leading dimension of the array A.  LDA >= max(1,M).
3211    *
3212    *  INFO    (output) INTEGER
3213    *          0  - successful exit
3214    *          <0 - if INFO = -i, the i-th argument had an illegal value.
3215    *
3216    *  =====================================================================
3217    *
3218    *     .. Parameters ..*/
3219 #undef zero
3220 #define zero 0.0e0
3221 #undef one
3222 #define one 1.0e0
3223   /**     ..
3224    *     .. Local Scalars ..*/
3225   int            done;
3226   long            i, itype, j, k1, k2, k3, k4;
3227   double    bignum, cfrom1, cfromc, cto1, ctoc, mul, smlnum;
3228   /**     ..
3229    *     .. Intrinsic Functions ..*/
3230   /*      intrinsic          abs, max, min;*/
3231   /**     ..
3232    *     .. Executable Statements ..
3233    *
3234    *     Test the input arguments
3235    **/
3236   /*-----implicit-declarations-----*/
3237   /*-----end-of-declarations-----*/
3238   *info = 0;
3239 
3240   if( lsame( type, 'g' ) ) {
3241     itype = 0;
3242   } else if( lsame( type, 'l' ) ) {
3243     itype = 1;
3244   } else if( lsame( type, 'u' ) ) {
3245     itype = 2;
3246   } else if( lsame( type, 'h' ) ) {
3247     itype = 3;
3248   } else if( lsame( type, 'b' ) ) {
3249     itype = 4;
3250   } else if( lsame( type, 'q' ) ) {
3251     itype = 5;
3252   } else if( lsame( type, 'z' ) ) {
3253     itype = 6;
3254   } else {
3255     itype = -1;
3256   }
3257 
3258   if( itype==-1 ) {
3259     *info = -1;
3260   } else if( cfrom==zero ) {
3261     *info = -4;
3262   } else if( m<0 ) {
3263     *info = -6;
3264   } else if( n<0 || ( itype==4 && n!=m ) ||
3265             ( itype==5 && n!=m ) ) {
3266     *info = -7;
3267   } else if( itype<=3 && lda<max( 1, m ) ) {
3268     *info = -9;
3269   } else if( itype>=4 ) {
3270     if( kl<0 || kl>max( m-1, 0 ) ) {
3271       *info = -2;
3272     } else if( ku<0 || ku>max( n-1, 0 ) ||
3273               ( ( itype==4 || itype==5 ) && kl!=ku ) )
3274       {
3275         *info = -3;
3276       } else if( ( itype==4 && lda<kl+1 ) ||
3277                 ( itype==5 && lda<ku+1 ) ||
3278                 ( itype==6 && lda<2*kl+ku+1 ) ) {
3279         *info = -9;
3280       }
3281   }
3282 
3283   if( *info!=0 ) {
3284     xerbla( "dlascl", -*info );
3285     return;
3286   }
3287   /**
3288    *     Quick return if possible
3289    **/
3290   if( n==0 || m==0 )
3291     return;
3292   /**
3293    *     Get machine parameters
3294    **/
3295   smlnum = dlamch( 's' );
3296   bignum = one / smlnum;
3297 
3298   cfromc = cfrom;
3299   ctoc = cto;
3300 
3301  L_10:
3302   cfrom1 = cfromc*smlnum;
3303   cto1 = ctoc / bignum;
3304   if( abs( cfrom1 )>abs( ctoc ) && ctoc!=zero ) {
3305     mul = smlnum;
3306     done = 0;
3307     cfromc = cfrom1;
3308   } else if( abs( cto1 )>abs( cfromc ) ) {
3309     mul = bignum;
3310     done = 0;
3311     ctoc = cto1;
3312   } else {
3313     mul = ctoc / cfromc;
3314     done = 1;
3315   }
3316 
3317   if( itype==0 ) {
3318     /**
3319      *        Full matrix
3320      **/
3321     for (j=1 ; j<=n ; j+=1) {
3322       for (i=1 ; i<=m ; i+=1) {
3323         a_2( i, j ) = a_2( i, j )*mul;
3324       }
3325     }
3326 
3327   } else if( itype==1 ) {
3328     /**
3329      *        Lower triangular matrix
3330      **/
3331     for (j=1 ; j<=n ; j+=1) {
3332       for (i=j ; i<=m ; i+=1) {
3333         a_2( i, j ) = a_2( i, j )*mul;
3334       }
3335     }
3336 
3337   } else if( itype==2 ) {
3338     /**
3339      *        Upper triangular matrix
3340      **/
3341     for (j=1 ; j<=n ; j+=1) {
3342       for (i=1 ; i<=min( j, m ) ; i+=1) {
3343         a_2( i, j ) = a_2( i, j )*mul;
3344       }
3345     }
3346 
3347   } else if( itype==3 ) {
3348     /**
3349      *        Upper Hessenberg matrix
3350      **/
3351     for (j=1 ; j<=n ; j+=1) {
3352       for (i=1 ; i<=min( j+1, m ) ; i+=1) {
3353         a_2( i, j ) = a_2( i, j )*mul;
3354       }
3355     }
3356 
3357   } else if( itype==4 ) {
3358     /**
3359      *        Lower half of a symmetric band matrix
3360      **/
3361     k3 = kl + 1;
3362     k4 = n + 1;
3363     for (j=1 ; j<=n ; j+=1) {
3364       for (i=1 ; i<=min( k3, k4-j ) ; i+=1) {
3365         a_2( i, j ) = a_2( i, j )*mul;
3366       }
3367     }
3368 
3369   } else if( itype==5 ) {
3370     /**
3371      *        Upper half of a symmetric band matrix
3372      **/
3373     k1 = ku + 2;
3374     k3 = ku + 1;
3375     for (j=1 ; j<=n ; j+=1) {
3376       for (i=max( k1-j, 1 ) ; i<=k3 ; i+=1) {
3377         a_2( i, j ) = a_2( i, j )*mul;
3378       }
3379     }
3380 
3381   } else if( itype==6 ) {
3382     /**
3383      *        Band matrix
3384      **/
3385     k1 = kl + ku + 2;
3386     k2 = kl + 1;
3387     k3 = 2*kl + ku + 1;
3388     k4 = kl + ku + 1 + m;
3389     for (j=1 ; j<=n ; j+=1) {
3390       for (i=max( k1-j, k2 ) ; i<=min( k3, k4-j ) ; i+=1) {
3391         a_2( i, j ) = a_2( i, j )*mul;
3392       }
3393     }
3394 
3395   }
3396 
3397   if( !done )
3398     goto L_10;
3399 
3400   return;
3401   /**
3402    *     End of DLASCL
3403    **/
3404 }
3405 /**
3406 ************************************************************************
3407 **/
3408 
3409 
3410 
dlange(char norm,long m,long n,double a[],long lda,double work[])3411 double   dlange( char norm, long m, long n, double a[], long lda, double work[] )
3412 {
3413   /**
3414    *  -- LAPACK auxiliary routine (version 1.1) --
3415    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3416    *     Courant Institute, Argonne National Lab, and Rice University
3417    *     October 31, 1992
3418    *
3419    *     .. Scalar Arguments ..*/
3420   double dlange_R;
3421   /**     ..
3422    *     .. Array Arguments ..*/
3423 #undef work_1
3424 #define work_1(a1) work[a1-1]
3425 #undef a_2
3426 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3427   /**     ..
3428    *
3429    *  Purpose
3430    *  =======
3431    *
3432    *  DLANGE  returns the value of the one norm,  or the Frobenius norm, or
3433    *  the  infinity norm,  or the  element of  largest absolute value  of a
3434    *  real matrix A.
3435    *
3436    *  Description
3437    *  ===========
3438    *
3439    *  DLANGE returns the value
3440    *
3441    *     DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
3442    *              (
3443    *              ( norm1(A),         NORM = '1', 'O' or 'o'
3444    *              (
3445    *              ( normI(A),         NORM = 'I' or 'i'
3446    *              (
3447    *              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
3448    *
3449    *  where  norm1  denotes the  one norm of a matrix (maximum column sum),
3450    *  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
3451    *  normF  denotes the  Frobenius norm of a matrix (square root of sum of
3452    *  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
3453    *
3454    *  Arguments
3455    *  =========
3456    *
3457    *  NORM    (input) CHARACTER*1
3458    *          Specifies the value to be returned in DLANGE as described
3459    *          above.
3460    *
3461    *  M       (input) INTEGER
3462    *          The number of rows of the matrix A.  M >= 0.  When M = 0,
3463    *          DLANGE is set to zero.
3464    *
3465    *  N       (input) INTEGER
3466    *          The number of columns of the matrix A.  N >= 0.  When N = 0,
3467    *          DLANGE is set to zero.
3468    *
3469    *  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
3470    *          The m by n matrix A.
3471    *
3472    *  LDA     (input) INTEGER
3473    *          The leading dimension of the array A.  LDA >= max(M,1).
3474    *
3475    *  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
3476    *          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
3477    *          referenced.
3478    *
3479    * =====================================================================
3480    *
3481    *     .. Parameters ..*/
3482 #undef one
3483 #define one 1.0e+0
3484 #undef zero
3485 #define zero 0.0e+0
3486   /**     ..
3487    *     .. Local Scalars ..*/
3488   long            i, j;
3489   double    scale, sum, value=0.0;
3490   /**     ..
3491    *     .. Intrinsic Functions ..*/
3492   /*      intrinsic          abs, max, min, sqrt;*/
3493   /**     ..
3494    *     .. Executable Statements ..
3495    **/
3496   /*-----implicit-declarations-----*/
3497   /*-----end-of-declarations-----*/
3498   if( min( m, n )==0 ) {
3499     value = zero;
3500   } else if( lsame( norm, 'm' ) ) {
3501     /**
3502      *        Find max(abs(A(i,j))).
3503      **/
3504     value = zero;
3505     for (j=1 ; j<=n ; j+=1) {
3506       for (i=1 ; i<=m ; i+=1) {
3507         value = max( value, abs( a_2( i, j ) ) );
3508       }
3509     }
3510   } else if( ( lsame( norm, 'o' ) ) || ( norm=='1' ) ) {
3511     /**
3512      *        Find norm1(A).
3513      **/
3514     value = zero;
3515     for (j=1 ; j<=n ; j+=1) {
3516       sum = zero;
3517       for (i=1 ; i<=m ; i+=1) {
3518         sum = sum + abs( a_2( i, j ) );
3519       }
3520       value = max( value, sum );
3521     }
3522   } else if( lsame( norm, 'i' ) ) {
3523     /**
3524      *        Find normI(A).
3525      **/
3526     for (i=1 ; i<=m ; i+=1) {
3527       work_1( i ) = zero;
3528     }
3529     for (j=1 ; j<=n ; j+=1) {
3530       for (i=1 ; i<=m ; i+=1) {
3531         work_1( i ) = work_1( i ) + abs( a_2( i, j ) );
3532       }
3533     }
3534     value = zero;
3535     for (i=1 ; i<=m ; i+=1) {
3536       value = max( value, work_1( i ) );
3537     }
3538   } else if( ( lsame( norm, 'f' ) ) || ( lsame( norm, 'e' ) ) ) {
3539     /**
3540      *        Find normF(A).
3541      **/
3542     scale = zero;
3543     sum = one;
3544     for (j=1 ; j<=n ; j+=1) {
3545       dlassq( m, &a_2( 1, j ), 1, &scale, &sum );
3546     }
3547     value = scale*sqrt( sum );
3548   }
3549 
3550   dlange_R = value;
3551   return dlange_R;
3552   /**
3553    *     End of DLANGE
3554    **/
3555 }
3556 
3557 
3558 
dlassq(long n,double x[],long incx,double * scale,double * sumsq)3559 void dlassq( long n, double x[], long incx, double *scale, double *sumsq )
3560 {
3561   /**
3562    *  -- LAPACK auxiliary routine (version 1.1) --
3563    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3564    *     Courant Institute, Argonne National Lab, and Rice University
3565    *     October 31, 1992
3566    *
3567    *     .. Scalar Arguments ..*/
3568   /**     ..
3569    *     .. Array Arguments ..*/
3570 #undef x_1
3571 #define x_1(a1) x[a1-1]
3572   /**     ..
3573    *
3574    *  Purpose
3575    *  =======
3576    *
3577    *  DLASSQ  returns the values  scl  and  smsq  such that
3578    *
3579    *     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
3580    *
3581    *  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
3582    *  assumed to be non-negative and  scl  returns the value
3583    *
3584    *     scl = max( scale, abs( x( i ) ) ).
3585    *
3586    *  scale and sumsq must be supplied in SCALE and SUMSQ and
3587    *  scl and smsq are overwritten on SCALE and SUMSQ respectively.
3588    *
3589    *  The routine makes only one pass through the vector x.
3590    *
3591    *  Arguments
3592    *  =========
3593    *
3594    *  N       (input) INTEGER
3595    *          The number of elements to be used from the vector X.
3596    *
3597    *  X       (input) DOUBLE PRECISION
3598    *          The vector for which a scaled sum of squares is computed.
3599    *             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
3600    *
3601    *  INCX    (input) INTEGER
3602    *          The increment between successive values of the vector X.
3603    *          INCX > 0.
3604    *
3605    *  SCALE   (input/output) DOUBLE PRECISION
3606    *          On entry, the value  scale  in the equation above.
3607    *          On exit, SCALE is overwritten with  scl , the scaling factor
3608    *          for the sum of squares.
3609    *
3610    *  SUMSQ   (input/output) DOUBLE PRECISION
3611    *          On entry, the value  sumsq  in the equation above.
3612    *          On exit, SUMSQ is overwritten with  smsq , the basic sum of
3613    *          squares from which  scl  has been factored out.
3614    *
3615    * =====================================================================
3616    *
3617    *     .. Parameters ..*/
3618 #undef zero
3619 #define zero 0.0e+0
3620   /**     ..
3621    *     .. Local Scalars ..*/
3622   long            ix;
3623   double    absxi;
3624   /**     ..
3625    *     .. Intrinsic Functions ..*/
3626   /*      intrinsic          abs;*/
3627   /**     ..
3628    *     .. Executable Statements ..
3629    **/
3630   /*-----implicit-declarations-----*/
3631   /*-----end-of-declarations-----*/
3632   if( n>0 ) {
3633     for (ix=1 ; incx>0?ix<=1 + ( n-1 )*incx:ix>=1 + ( n-1 )*incx ; ix+=incx) {
3634       if( x_1( ix )!=zero ) {
3635         absxi = abs( x_1( ix ) );
3636         if( *scale<absxi ) {
3637           *sumsq = 1 + *sumsq*( *scale / absxi )*( *scale / absxi );
3638           *scale = absxi;
3639         } else {
3640           *sumsq = *sumsq + ( absxi / *scale )*( absxi / *scale );
3641         }
3642       }
3643     }
3644   }
3645   return;
3646   /**
3647    *     End of DLASSQ
3648    **/
3649 }
3650 
3651 
3652 
dlaset(char uplo,long m,long n,double alpha,double beta,double a[],long lda)3653 void dlaset( char uplo, long m, long n, double alpha, double beta,
3654             double a[], long lda )
3655 {
3656   /**
3657    *  -- LAPACK auxiliary routine (version 1.1) --
3658    *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
3659    *     Courant Institute, Argonne National Lab, and Rice University
3660    *     October 31, 1992
3661    *
3662    *     .. Scalar Arguments ..*/
3663   /**     ..
3664    *     .. Array Arguments ..*/
3665 #undef a_2
3666 #define a_2(a1,a2) a[a1-1+lda*(a2-1)]
3667   /**     ..
3668    *
3669    *  Purpose
3670    *  =======
3671    *
3672    *  DLASET initializes an m-by-n matrix A to BETA on the diagonal and
3673    *  ALPHA on the offdiagonals.
3674    *
3675    *  Arguments
3676    *  =========
3677    *
3678    *  UPLO    (input) CHARACTER*1
3679    *          Specifies the part of the matrix A to be set.
3680    *          = 'U':      Upper triangular part is set; the strictly lower
3681    *                      triangular part of A is not changed.
3682    *          = 'L':      Lower triangular part is set; the strictly upper
3683    *                      triangular part of A is not changed.
3684    *          Otherwise:  All of the matrix A is set.
3685    *
3686    *  M       (input) INTEGER
3687    *          The number of rows of the matrix A.  M >= 0.
3688    *
3689    *  N       (input) INTEGER
3690    *          The number of columns of the matrix A.  N >= 0.
3691    *
3692    *  ALPHA   (input) DOUBLE PRECISION
3693    *          The constant to which the offdiagonal elements are to be set.
3694    *
3695    *  BETA    (input) DOUBLE PRECISION
3696    *          The constant to which the diagonal elements are to be set.
3697    *
3698    *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
3699    *          On exit, the leading m-by-n submatrix of A is set as follows:
3700    *
3701    *          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
3702    *          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
3703    *          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
3704    *
3705    *          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
3706    *
3707    *  LDA     (input) INTEGER
3708    *          The leading dimension of the array A.  LDA >= max(1,M).
3709    *
3710    * =====================================================================
3711    *
3712    *     .. Local Scalars ..*/
3713   long            i, j;
3714   /**     ..
3715    *     .. Intrinsic Functions ..*/
3716   /*      intrinsic          min;*/
3717   /**     ..
3718    *     .. Executable Statements ..
3719    **/
3720   /*-----implicit-declarations-----*/
3721   /*-----end-of-declarations-----*/
3722   if( lsame( uplo, 'u' ) ) {
3723     /**
3724      *        Set the strictly upper triangular or trapezoidal part of the
3725      *        array to ALPHA.
3726      **/
3727     for (j=2 ; j<=n ; j+=1) {
3728       for (i=1 ; i<=min( j-1, m ) ; i+=1) {
3729         a_2( i, j ) = alpha;
3730       }
3731     }
3732 
3733   } else if( lsame( uplo, 'l' ) ) {
3734     /**
3735      *        Set the strictly lower triangular or trapezoidal part of the
3736      *        array to ALPHA.
3737      **/
3738     for (j=1 ; j<=min( m, n ) ; j+=1) {
3739       for (i=j + 1 ; i<=m ; i+=1) {
3740         a_2( i, j ) = alpha;
3741       }
3742     }
3743 
3744   } else {
3745     /**
3746      *        Set the leading m-by-n submatrix to ALPHA.
3747      **/
3748     for (j=1 ; j<=n ; j+=1) {
3749       for (i=1 ; i<=m ; i+=1) {
3750         a_2( i, j ) = alpha;
3751       }
3752     }
3753   }
3754   /**
3755    *     Set the first min(M,N) diagonal elements to BETA.
3756    **/
3757   for (i=1 ; i<=min( m, n ) ; i+=1) {
3758     a_2( i, i ) = beta;
3759   }
3760 
3761   return;
3762   /**
3763    *     End of DLASET
3764    **/
3765 }
3766