1      SUBROUTINE CDBTRF( M, N, KL, KU, AB, LDAB, INFO )
2*
3*  -- ScaLAPACK auxiliary routine (version 2.0) --
4*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
5*
6*     Written by Andrew J. Cleary, University of Tennessee.
7*     August, 1996.
8*     Modified from CGBTRF:
9*  -- LAPACK routine (preliminary version) --
10*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
11*     Courant Institute, Argonne National Lab, and Rice University
12*     August 6, 1991
13*
14*     .. Scalar Arguments ..
15      INTEGER            INFO, KL, KU, LDAB, M, N
16*     ..
17*     .. Array Arguments ..
18      COMPLEX            AB( LDAB, * )
19*     ..
20*
21*  Purpose
22*  =======
23*
24*  Cdbtrf computes an LU factorization of a real m-by-n band matrix A
25*  without using partial pivoting or row interchanges.
26*
27*  This is the blocked version of the algorithm, calling Level 3 BLAS.
28*
29*  Arguments
30*  =========
31*
32*  M       (input) INTEGER
33*          The number of rows of the matrix A.  M >= 0.
34*
35*  N       (input) INTEGER
36*          The number of columns of the matrix A.  N >= 0.
37*
38*  KL      (input) INTEGER
39*          The number of subdiagonals within the band of A.  KL >= 0.
40*
41*  KU      (input) INTEGER
42*          The number of superdiagonals within the band of A.  KU >= 0.
43*
44*  AB      (input/output) REAL array, dimension (LDAB,N)
45*          On entry, the matrix A in band storage, in rows KL+1 to
46*          2*KL+KU+1; rows 1 to KL of the array need not be set.
47*          The j-th column of A is stored in the j-th column of the
48*          array AB as follows:
49*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
50*
51*          On exit, details of the factorization: U is stored as an
52*          upper triangular band matrix with KL+KU superdiagonals in
53*          rows 1 to KL+KU+1, and the multipliers used during the
54*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
55*          See below for further details.
56*
57*  LDAB    (input) INTEGER
58*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
59*
60*  INFO    (output) INTEGER
61*          = 0: successful exit
62*          < 0: if INFO = -i, the i-th argument had an illegal value
63*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
64*               has been completed, but the factor U is exactly
65*               singular, and division by zero will occur if it is used
66*               to solve a system of equations.
67*
68*  Further Details
69*  ===============
70*
71*  The band storage scheme is illustrated by the following example, when
72*  M = N = 6, KL = 2, KU = 1:
73*
74*  On entry:                       On exit:
75*
76*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
77*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
78*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
79*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
80*
81*  Array elements marked * are not used by the routine.
82*
83*  =====================================================================
84*
85*     .. Parameters ..
86      REAL               ONE, ZERO
87      PARAMETER          ( ONE = 1.0E+0 )
88      PARAMETER          ( ZERO = 0.0E+0 )
89      COMPLEX            CONE, CZERO
90      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
91      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
92      INTEGER            NBMAX, LDWORK
93      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
94*     ..
95*     .. Local Scalars ..
96      INTEGER            I, I2, I3, II, J, J2, J3, JB, JJ, JM, JP,
97     $                   JU, KM, KV, NB, NW
98*     ..
99*     .. Local Arrays ..
100      COMPLEX              WORK13( LDWORK, NBMAX ),
101     $                   WORK31( LDWORK, NBMAX )
102*     ..
103*     .. External Functions ..
104      INTEGER            ILAENV, ISAMAX
105      EXTERNAL           ILAENV, ISAMAX
106*     ..
107*     .. External Subroutines ..
108      EXTERNAL           CCOPY, CDBTF2, CGEMM, CGERU, CSCAL,
109     $                   CSWAP, CTRSM, XERBLA
110*     ..
111*     .. Intrinsic Functions ..
112      INTRINSIC          MAX, MIN
113*     ..
114*     .. Executable Statements ..
115*
116*     KV is the number of superdiagonals in the factor U
117*
118      KV = KU
119*
120*     Test the input parameters.
121*
122      INFO = 0
123      IF( M.LT.0 ) THEN
124         INFO = -1
125      ELSE IF( N.LT.0 ) THEN
126         INFO = -2
127      ELSE IF( KL.LT.0 ) THEN
128         INFO = -3
129      ELSE IF( KU.LT.0 ) THEN
130         INFO = -4
131      ELSE IF( LDAB.LT.MIN( MIN( KL+KV+1,M ),N ) ) THEN
132         INFO = -6
133      END IF
134      IF( INFO.NE.0 ) THEN
135         CALL XERBLA( 'CDBTRF', -INFO )
136         RETURN
137      END IF
138*
139*     Quick return if possible
140*
141      IF( M.EQ.0 .OR. N.EQ.0 )
142     $     RETURN
143*
144*     Determine the block size for this environment
145*
146      NB = ILAENV( 1, 'CDBTRF', ' ', M, N, KL, KU )
147*
148*     The block size must not exceed the limit set by the size of the
149*     local arrays WORK13 and WORK31.
150*
151      NB = MIN( NB, NBMAX )
152*
153      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
154*
155*        Use unblocked code
156*
157         CALL CDBTF2( M, N, KL, KU, AB, LDAB, INFO )
158      ELSE
159*
160*        Use blocked code
161*
162*        Zero the superdiagonal elements of the work array WORK13
163*
164         DO 20 J = 1, NB
165            DO 10 I = 1, J - 1
166               WORK13( I, J ) = ZERO
167   10       CONTINUE
168   20    CONTINUE
169*
170*        Zero the subdiagonal elements of the work array WORK31
171*
172         DO 40 J = 1, NB
173            DO 30 I = J + 1, NB
174               WORK31( I, J ) = ZERO
175   30       CONTINUE
176   40    CONTINUE
177*
178*        JU is the index of the last column affected by the current
179*        stage of the factorization
180*
181         JU = 1
182*
183         DO 180 J = 1, MIN( M, N ), NB
184            JB = MIN( NB, MIN( M, N )-J+1 )
185*
186*           The active part of the matrix is partitioned
187*
188*              A11   A12   A13
189*              A21   A22   A23
190*              A31   A32   A33
191*
192*           Here A11, A21 and A31 denote the current block of JB columns
193*           which is about to be factorized. The number of rows in the
194*           partitioning are JB, I2, I3 respectively, and the numbers
195*           of columns are JB, J2, J3. The superdiagonal elements of A13
196*           and the subdiagonal elements of A31 lie outside the band.
197*
198            I2 = MIN( KL-JB, M-J-JB+1 )
199            I3 = MIN( JB, M-J-KL+1 )
200*
201*           J2 and J3 are computed after JU has been updated.
202*
203*           Factorize the current block of JB columns
204*
205            DO 80 JJ = J, J + JB - 1
206*
207*              Find pivot and test for singularity. KM is the number of
208*              subdiagonal elements in the current column.
209*
210               KM = MIN( KL, M-JJ )
211               JP = 1
212               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
213                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
214*
215*                 Compute multipliers
216*
217                  CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
218     $                 1 )
219*
220*                 Update trailing submatrix within the band and within
221*                 the current block. JM is the index of the last column
222*                 which needs to be updated.
223*
224                  JM = MIN( JU, J+JB-1 )
225                  IF( JM.GT.JJ ) THEN
226                     CALL CGERU( KM, JM-JJ, -CONE, AB( KV+2, JJ ), 1,
227     $                          AB( KV, JJ+1 ), LDAB-1,
228     $                          AB( KV+1, JJ+1 ), LDAB-1 )
229                  END IF
230               END IF
231*
232*              Copy current column of A31 into the work array WORK31
233*
234               NW = MIN( JJ-J+1, I3 )
235               IF( NW.GT.0 )
236     $            CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
237     $                        WORK31( 1, JJ-J+1 ), 1 )
238   80       CONTINUE
239            IF( J+JB.LE.N ) THEN
240*
241*              Apply the row interchanges to the other blocks.
242*
243               J2 = MIN( JU-J+1, KV ) - JB
244               J3 = MAX( 0, JU-J-KV+1 )
245*
246*              Update the relevant part of the trailing submatrix
247*
248               IF( J2.GT.0 ) THEN
249*
250*                 Update A12
251*
252                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
253     $                        JB, J2, CONE, AB( KV+1, J ), LDAB-1,
254     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
255*
256                  IF( I2.GT.0 ) THEN
257*
258*                    Update A22
259*
260                     CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
261     $                           JB, -CONE, AB( KV+1+JB, J ), LDAB-1,
262     $                           AB( KV+1-JB, J+JB ), LDAB-1, CONE,
263     $                           AB( KV+1, J+JB ), LDAB-1 )
264                  END IF
265*
266                  IF( I3.GT.0 ) THEN
267*
268*                    Update A32
269*
270                     CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
271     $                           JB, -CONE, WORK31, LDWORK,
272     $                           AB( KV+1-JB, J+JB ), LDAB-1, CONE,
273     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
274                  END IF
275               END IF
276*
277               IF( J3.GT.0 ) THEN
278*
279*                 Copy the lower triangle of A13 into the work array
280*                 WORK13
281*
282                  DO 130 JJ = 1, J3
283                     DO 120 II = JJ, JB
284                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
285  120                CONTINUE
286  130             CONTINUE
287*
288*                 Update A13 in the work array
289*
290                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
291     $                        JB, J3, CONE, AB( KV+1, J ), LDAB-1,
292     $                        WORK13, LDWORK )
293*
294                  IF( I2.GT.0 ) THEN
295*
296*                    Update A23
297*
298                     CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
299     $                           JB, -CONE, AB( KV+1+JB, J ), LDAB-1,
300     $                           WORK13, LDWORK, CONE, AB( 1+JB, J+KV ),
301     $                           LDAB-1 )
302                  END IF
303*
304                  IF( I3.GT.0 ) THEN
305*
306*                    Update A33
307*
308                     CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
309     $                         JB, -CONE, WORK31, LDWORK, WORK13,
310     $                         LDWORK, CONE, AB( 1+KL, J+KV ), LDAB-1 )
311                  END IF
312*
313*                 Copy the lower triangle of A13 back into place
314*
315                  DO 150 JJ = 1, J3
316                     DO 140 II = JJ, JB
317                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
318  140                CONTINUE
319  150             CONTINUE
320               END IF
321            ELSE
322            END IF
323*
324*           copy the upper triangle of A31 back into place
325*
326            DO 170 JJ = J + JB - 1, J, -1
327*
328*              Copy the current column of A31 back into place
329*
330               NW = MIN( I3, JJ-J+1 )
331               IF( NW.GT.0 )
332     $            CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
333     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
334  170       CONTINUE
335  180    CONTINUE
336      END IF
337*
338      RETURN
339*
340*     End of CDBTRF
341*
342      END
343