1 /* ./src_f77/cgbtrf.f -- translated by f2c (version 20030320).
2 You must link the resulting object file with the libraries:
3 -lf2c -lm (in that order)
4 */
5
6 #include <punc/vf2c.h>
7
8 /* Table of constant values */
9
10 static complex c_b1 = {1.f,0.f};
11 static integer c__1 = 1;
12 static integer c__65 = 65;
13
cgbtrf_(integer * m,integer * n,integer * kl,integer * ku,complex * ab,integer * ldab,integer * ipiv,integer * info)14 /* Subroutine */ int cgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
15 complex *ab, integer *ldab, integer *ipiv, integer *info)
16 {
17 /* System generated locals */
18 integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
19 complex q__1;
20
21 /* Builtin functions */
22 void c_div(complex *, complex *, complex *);
23
24 /* Local variables */
25 static integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km,
26 ju, kv, nw;
27 static complex temp;
28 extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
29 integer *), cgemm_(char *, char *, integer *, integer *, integer *
30 , complex *, complex *, integer *, complex *, integer *, complex *
31 , complex *, integer *, ftnlen, ftnlen), cgeru_(integer *,
32 integer *, complex *, complex *, integer *, complex *, integer *,
33 complex *, integer *), ccopy_(integer *, complex *, integer *,
34 complex *, integer *), cswap_(integer *, complex *, integer *,
35 complex *, integer *);
36 static complex work13[4160] /* was [65][64] */, work31[4160] /*
37 was [65][64] */;
38 extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
39 integer *, integer *, complex *, complex *, integer *, complex *,
40 integer *, ftnlen, ftnlen, ftnlen, ftnlen), cgbtf2_(integer *,
41 integer *, integer *, integer *, complex *, integer *, integer *,
42 integer *);
43 extern integer icamax_(integer *, complex *, integer *);
44 extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
45 extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
46 integer *, integer *, ftnlen, ftnlen);
47 extern /* Subroutine */ int claswp_(integer *, complex *, integer *,
48 integer *, integer *, integer *, integer *);
49
50
51 /* -- LAPACK routine (version 3.0) -- */
52 /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
53 /* Courant Institute, Argonne National Lab, and Rice University */
54 /* September 30, 1994 */
55
56 /* .. Scalar Arguments .. */
57 /* .. */
58 /* .. Array Arguments .. */
59 /* .. */
60
61 /* Purpose */
62 /* ======= */
63
64 /* CGBTRF computes an LU factorization of a complex m-by-n band matrix A */
65 /* using partial pivoting with row interchanges. */
66
67 /* This is the blocked version of the algorithm, calling Level 3 BLAS. */
68
69 /* Arguments */
70 /* ========= */
71
72 /* M (input) INTEGER */
73 /* The number of rows of the matrix A. M >= 0. */
74
75 /* N (input) INTEGER */
76 /* The number of columns of the matrix A. N >= 0. */
77
78 /* KL (input) INTEGER */
79 /* The number of subdiagonals within the band of A. KL >= 0. */
80
81 /* KU (input) INTEGER */
82 /* The number of superdiagonals within the band of A. KU >= 0. */
83
84 /* AB (input/output) COMPLEX array, dimension (LDAB,N) */
85 /* On entry, the matrix A in band storage, in rows KL+1 to */
86 /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */
87 /* The j-th column of A is stored in the j-th column of the */
88 /* array AB as follows: */
89 /* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */
90
91 /* On exit, details of the factorization: U is stored as an */
92 /* upper triangular band matrix with KL+KU superdiagonals in */
93 /* rows 1 to KL+KU+1, and the multipliers used during the */
94 /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
95 /* See below for further details. */
96
97 /* LDAB (input) INTEGER */
98 /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */
99
100 /* IPIV (output) INTEGER array, dimension (min(M,N)) */
101 /* The pivot indices; for 1 <= i <= min(M,N), row i of the */
102 /* matrix was interchanged with row IPIV(i). */
103
104 /* INFO (output) INTEGER */
105 /* = 0: successful exit */
106 /* < 0: if INFO = -i, the i-th argument had an illegal value */
107 /* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
108 /* has been completed, but the factor U is exactly */
109 /* singular, and division by zero will occur if it is used */
110 /* to solve a system of equations. */
111
112 /* Further Details */
113 /* =============== */
114
115 /* The band storage scheme is illustrated by the following example, when */
116 /* M = N = 6, KL = 2, KU = 1: */
117
118 /* On entry: On exit: */
119
120 /* * * * + + + * * * u14 u25 u36 */
121 /* * * + + + + * * u13 u24 u35 u46 */
122 /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */
123 /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */
124 /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */
125 /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */
126
127 /* Array elements marked * are not used by the routine; elements marked */
128 /* + need not be set on entry, but are required by the routine to store */
129 /* elements of U because of fill-in resulting from the row interchanges. */
130
131 /* ===================================================================== */
132
133 /* .. Parameters .. */
134 /* .. */
135 /* .. Local Scalars .. */
136 /* .. */
137 /* .. Local Arrays .. */
138 /* .. */
139 /* .. External Functions .. */
140 /* .. */
141 /* .. External Subroutines .. */
142 /* .. */
143 /* .. Intrinsic Functions .. */
144 /* .. */
145 /* .. Executable Statements .. */
146
147 /* KV is the number of superdiagonals in the factor U, allowing for */
148 /* fill-in */
149
150 /* Parameter adjustments */
151 ab_dim1 = *ldab;
152 ab_offset = 1 + ab_dim1;
153 ab -= ab_offset;
154 --ipiv;
155
156 /* Function Body */
157 kv = *ku + *kl;
158
159 /* Test the input parameters. */
160
161 *info = 0;
162 if (*m < 0) {
163 *info = -1;
164 } else if (*n < 0) {
165 *info = -2;
166 } else if (*kl < 0) {
167 *info = -3;
168 } else if (*ku < 0) {
169 *info = -4;
170 } else if (*ldab < *kl + kv + 1) {
171 *info = -6;
172 }
173 if (*info != 0) {
174 i__1 = -(*info);
175 xerbla_("CGBTRF", &i__1, (ftnlen)6);
176 return 0;
177 }
178
179 /* Quick return if possible */
180
181 if (*m == 0 || *n == 0) {
182 return 0;
183 }
184
185 /* Determine the block size for this environment */
186
187 nb = ilaenv_(&c__1, "CGBTRF", " ", m, n, kl, ku, (ftnlen)6, (ftnlen)1);
188
189 /* The block size must not exceed the limit set by the size of the */
190 /* local arrays WORK13 and WORK31. */
191
192 nb = min(nb,64);
193
194 if (nb <= 1 || nb > *kl) {
195
196 /* Use unblocked code */
197
198 cgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
199 } else {
200
201 /* Use blocked code */
202
203 /* Zero the superdiagonal elements of the work array WORK13 */
204
205 i__1 = nb;
206 for (j = 1; j <= i__1; ++j) {
207 i__2 = j - 1;
208 for (i__ = 1; i__ <= i__2; ++i__) {
209 i__3 = i__ + j * 65 - 66;
210 work13[i__3].r = 0.f, work13[i__3].i = 0.f;
211 /* L10: */
212 }
213 /* L20: */
214 }
215
216 /* Zero the subdiagonal elements of the work array WORK31 */
217
218 i__1 = nb;
219 for (j = 1; j <= i__1; ++j) {
220 i__2 = nb;
221 for (i__ = j + 1; i__ <= i__2; ++i__) {
222 i__3 = i__ + j * 65 - 66;
223 work31[i__3].r = 0.f, work31[i__3].i = 0.f;
224 /* L30: */
225 }
226 /* L40: */
227 }
228
229 /* Gaussian elimination with partial pivoting */
230
231 /* Set fill-in elements in columns KU+2 to KV to zero */
232
233 i__1 = min(kv,*n);
234 for (j = *ku + 2; j <= i__1; ++j) {
235 i__2 = *kl;
236 for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
237 i__3 = i__ + j * ab_dim1;
238 ab[i__3].r = 0.f, ab[i__3].i = 0.f;
239 /* L50: */
240 }
241 /* L60: */
242 }
243
244 /* JU is the index of the last column affected by the current */
245 /* stage of the factorization */
246
247 ju = 1;
248
249 i__1 = min(*m,*n);
250 i__2 = nb;
251 for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
252 /* Computing MIN */
253 i__3 = nb, i__4 = min(*m,*n) - j + 1;
254 jb = min(i__3,i__4);
255
256 /* The active part of the matrix is partitioned */
257
258 /* A11 A12 A13 */
259 /* A21 A22 A23 */
260 /* A31 A32 A33 */
261
262 /* Here A11, A21 and A31 denote the current block of JB columns */
263 /* which is about to be factorized. The number of rows in the */
264 /* partitioning are JB, I2, I3 respectively, and the numbers */
265 /* of columns are JB, J2, J3. The superdiagonal elements of A13 */
266 /* and the subdiagonal elements of A31 lie outside the band. */
267
268 /* Computing MIN */
269 i__3 = *kl - jb, i__4 = *m - j - jb + 1;
270 i2 = min(i__3,i__4);
271 /* Computing MIN */
272 i__3 = jb, i__4 = *m - j - *kl + 1;
273 i3 = min(i__3,i__4);
274
275 /* J2 and J3 are computed after JU has been updated. */
276
277 /* Factorize the current block of JB columns */
278
279 i__3 = j + jb - 1;
280 for (jj = j; jj <= i__3; ++jj) {
281
282 /* Set fill-in elements in column JJ+KV to zero */
283
284 if (jj + kv <= *n) {
285 i__4 = *kl;
286 for (i__ = 1; i__ <= i__4; ++i__) {
287 i__5 = i__ + (jj + kv) * ab_dim1;
288 ab[i__5].r = 0.f, ab[i__5].i = 0.f;
289 /* L70: */
290 }
291 }
292
293 /* Find pivot and test for singularity. KM is the number of */
294 /* subdiagonal elements in the current column. */
295
296 /* Computing MIN */
297 i__4 = *kl, i__5 = *m - jj;
298 km = min(i__4,i__5);
299 i__4 = km + 1;
300 jp = icamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
301 ipiv[jj] = jp + jj - j;
302 i__4 = kv + jp + jj * ab_dim1;
303 if (ab[i__4].r != 0.f || ab[i__4].i != 0.f) {
304 /* Computing MAX */
305 /* Computing MIN */
306 i__6 = jj + *ku + jp - 1;
307 i__4 = ju, i__5 = min(i__6,*n);
308 ju = max(i__4,i__5);
309 if (jp != 1) {
310
311 /* Apply interchange to columns J to J+JB-1 */
312
313 if (jp + jj - 1 < j + *kl) {
314
315 i__4 = *ldab - 1;
316 i__5 = *ldab - 1;
317 cswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
318 i__4, &ab[kv + jp + jj - j + j * ab_dim1],
319 &i__5);
320 } else {
321
322 /* The interchange affects columns J to JJ-1 of A31 */
323 /* which are stored in the work array WORK31 */
324
325 i__4 = jj - j;
326 i__5 = *ldab - 1;
327 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1],
328 &i__5, &work31[jp + jj - j - *kl - 1], &
329 c__65);
330 i__4 = j + jb - jj;
331 i__5 = *ldab - 1;
332 i__6 = *ldab - 1;
333 cswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
334 ab[kv + jp + jj * ab_dim1], &i__6);
335 }
336 }
337
338 /* Compute multipliers */
339
340 c_div(&q__1, &c_b1, &ab[kv + 1 + jj * ab_dim1]);
341 cscal_(&km, &q__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
342
343 /* Update trailing submatrix within the band and within */
344 /* the current block. JM is the index of the last column */
345 /* which needs to be updated. */
346
347 /* Computing MIN */
348 i__4 = ju, i__5 = j + jb - 1;
349 jm = min(i__4,i__5);
350 if (jm > jj) {
351 i__4 = jm - jj;
352 q__1.r = -1.f, q__1.i = -0.f;
353 i__5 = *ldab - 1;
354 i__6 = *ldab - 1;
355 cgeru_(&km, &i__4, &q__1, &ab[kv + 2 + jj * ab_dim1],
356 &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
357 ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
358 }
359 } else {
360
361 /* If pivot is zero, set INFO to the index of the pivot */
362 /* unless a zero pivot has already been found. */
363
364 if (*info == 0) {
365 *info = jj;
366 }
367 }
368
369 /* Copy current column of A31 into the work array WORK31 */
370
371 /* Computing MIN */
372 i__4 = jj - j + 1;
373 nw = min(i__4,i3);
374 if (nw > 0) {
375 ccopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
376 c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
377 }
378 /* L80: */
379 }
380 if (j + jb <= *n) {
381
382 /* Apply the row interchanges to the other blocks. */
383
384 /* Computing MIN */
385 i__3 = ju - j + 1;
386 j2 = min(i__3,kv) - jb;
387 /* Computing MAX */
388 i__3 = 0, i__4 = ju - j - kv + 1;
389 j3 = max(i__3,i__4);
390
391 /* Use CLASWP to apply the row interchanges to A12, A22, and */
392 /* A32. */
393
394 i__3 = *ldab - 1;
395 claswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
396 c__1, &jb, &ipiv[j], &c__1);
397
398 /* Adjust the pivot indices. */
399
400 i__3 = j + jb - 1;
401 for (i__ = j; i__ <= i__3; ++i__) {
402 ipiv[i__] = ipiv[i__] + j - 1;
403 /* L90: */
404 }
405
406 /* Apply the row interchanges to A13, A23, and A33 */
407 /* columnwise. */
408
409 k2 = j - 1 + jb + j2;
410 i__3 = j3;
411 for (i__ = 1; i__ <= i__3; ++i__) {
412 jj = k2 + i__;
413 i__4 = j + jb - 1;
414 for (ii = j + i__ - 1; ii <= i__4; ++ii) {
415 ip = ipiv[ii];
416 if (ip != ii) {
417 i__5 = kv + 1 + ii - jj + jj * ab_dim1;
418 temp.r = ab[i__5].r, temp.i = ab[i__5].i;
419 i__5 = kv + 1 + ii - jj + jj * ab_dim1;
420 i__6 = kv + 1 + ip - jj + jj * ab_dim1;
421 ab[i__5].r = ab[i__6].r, ab[i__5].i = ab[i__6].i;
422 i__5 = kv + 1 + ip - jj + jj * ab_dim1;
423 ab[i__5].r = temp.r, ab[i__5].i = temp.i;
424 }
425 /* L100: */
426 }
427 /* L110: */
428 }
429
430 /* Update the relevant part of the trailing submatrix */
431
432 if (j2 > 0) {
433
434 /* Update A12 */
435
436 i__3 = *ldab - 1;
437 i__4 = *ldab - 1;
438 ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2,
439 &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv +
440 1 - jb + (j + jb) * ab_dim1], &i__4, (ftnlen)4, (
441 ftnlen)5, (ftnlen)12, (ftnlen)4);
442
443 if (i2 > 0) {
444
445 /* Update A22 */
446
447 q__1.r = -1.f, q__1.i = -0.f;
448 i__3 = *ldab - 1;
449 i__4 = *ldab - 1;
450 i__5 = *ldab - 1;
451 cgemm_("No transpose", "No transpose", &i2, &j2, &jb,
452 &q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
453 &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4,
454 &c_b1, &ab[kv + 1 + (j + jb) * ab_dim1], &
455 i__5, (ftnlen)12, (ftnlen)12);
456 }
457
458 if (i3 > 0) {
459
460 /* Update A32 */
461
462 q__1.r = -1.f, q__1.i = -0.f;
463 i__3 = *ldab - 1;
464 i__4 = *ldab - 1;
465 cgemm_("No transpose", "No transpose", &i3, &j2, &jb,
466 &q__1, work31, &c__65, &ab[kv + 1 - jb + (j +
467 jb) * ab_dim1], &i__3, &c_b1, &ab[kv + *kl +
468 1 - jb + (j + jb) * ab_dim1], &i__4, (ftnlen)
469 12, (ftnlen)12);
470 }
471 }
472
473 if (j3 > 0) {
474
475 /* Copy the lower triangle of A13 into the work array */
476 /* WORK13 */
477
478 i__3 = j3;
479 for (jj = 1; jj <= i__3; ++jj) {
480 i__4 = jb;
481 for (ii = jj; ii <= i__4; ++ii) {
482 i__5 = ii + jj * 65 - 66;
483 i__6 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
484 work13[i__5].r = ab[i__6].r, work13[i__5].i = ab[
485 i__6].i;
486 /* L120: */
487 }
488 /* L130: */
489 }
490
491 /* Update A13 in the work array */
492
493 i__3 = *ldab - 1;
494 ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3,
495 &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &
496 c__65, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)
497 4);
498
499 if (i2 > 0) {
500
501 /* Update A23 */
502
503 q__1.r = -1.f, q__1.i = -0.f;
504 i__3 = *ldab - 1;
505 i__4 = *ldab - 1;
506 cgemm_("No transpose", "No transpose", &i2, &j3, &jb,
507 &q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
508 work13, &c__65, &c_b1, &ab[jb + 1 + (j + kv) *
509 ab_dim1], &i__4, (ftnlen)12, (ftnlen)12);
510 }
511
512 if (i3 > 0) {
513
514 /* Update A33 */
515
516 q__1.r = -1.f, q__1.i = -0.f;
517 i__3 = *ldab - 1;
518 cgemm_("No transpose", "No transpose", &i3, &j3, &jb,
519 &q__1, work31, &c__65, work13, &c__65, &c_b1,
520 &ab[*kl + 1 + (j + kv) * ab_dim1], &i__3, (
521 ftnlen)12, (ftnlen)12);
522 }
523
524 /* Copy the lower triangle of A13 back into place */
525
526 i__3 = j3;
527 for (jj = 1; jj <= i__3; ++jj) {
528 i__4 = jb;
529 for (ii = jj; ii <= i__4; ++ii) {
530 i__5 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
531 i__6 = ii + jj * 65 - 66;
532 ab[i__5].r = work13[i__6].r, ab[i__5].i = work13[
533 i__6].i;
534 /* L140: */
535 }
536 /* L150: */
537 }
538 }
539 } else {
540
541 /* Adjust the pivot indices. */
542
543 i__3 = j + jb - 1;
544 for (i__ = j; i__ <= i__3; ++i__) {
545 ipiv[i__] = ipiv[i__] + j - 1;
546 /* L160: */
547 }
548 }
549
550 /* Partially undo the interchanges in the current block to */
551 /* restore the upper triangular form of A31 and copy the upper */
552 /* triangle of A31 back into place */
553
554 i__3 = j;
555 for (jj = j + jb - 1; jj >= i__3; --jj) {
556 jp = ipiv[jj] - jj + 1;
557 if (jp != 1) {
558
559 /* Apply interchange to columns J to JJ-1 */
560
561 if (jp + jj - 1 < j + *kl) {
562
563 /* The interchange does not affect A31 */
564
565 i__4 = jj - j;
566 i__5 = *ldab - 1;
567 i__6 = *ldab - 1;
568 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
569 i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
570 i__6);
571 } else {
572
573 /* The interchange does affect A31 */
574
575 i__4 = jj - j;
576 i__5 = *ldab - 1;
577 cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
578 i__5, &work31[jp + jj - j - *kl - 1], &c__65);
579 }
580 }
581
582 /* Copy the current column of A31 back into place */
583
584 /* Computing MIN */
585 i__4 = i3, i__5 = jj - j + 1;
586 nw = min(i__4,i__5);
587 if (nw > 0) {
588 ccopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
589 kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
590 }
591 /* L170: */
592 }
593 /* L180: */
594 }
595 }
596
597 return 0;
598
599 /* End of CGBTRF */
600
601 } /* cgbtrf_ */
602
603