1 /* ./src_f77/cgbtrf.f -- translated by f2c (version 20030320).
2    You must link the resulting object file with the libraries:
3 	-lf2c -lm   (in that order)
4 */
5 
6 #include <punc/vf2c.h>
7 
8 /* Table of constant values */
9 
10 static complex c_b1 = {1.f,0.f};
11 static integer c__1 = 1;
12 static integer c__65 = 65;
13 
cgbtrf_(integer * m,integer * n,integer * kl,integer * ku,complex * ab,integer * ldab,integer * ipiv,integer * info)14 /* Subroutine */ int cgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
15 	 complex *ab, integer *ldab, integer *ipiv, integer *info)
16 {
17     /* System generated locals */
18     integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
19     complex q__1;
20 
21     /* Builtin functions */
22     void c_div(complex *, complex *, complex *);
23 
24     /* Local variables */
25     static integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km,
26 	     ju, kv, nw;
27     static complex temp;
28     extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
29 	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
30 	    , complex *, complex *, integer *, complex *, integer *, complex *
31 	    , complex *, integer *, ftnlen, ftnlen), cgeru_(integer *,
32 	    integer *, complex *, complex *, integer *, complex *, integer *,
33 	    complex *, integer *), ccopy_(integer *, complex *, integer *,
34 	    complex *, integer *), cswap_(integer *, complex *, integer *,
35 	    complex *, integer *);
36     static complex work13[4160]	/* was [65][64] */, work31[4160]	/*
37 	    was [65][64] */;
38     extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
39 	    integer *, integer *, complex *, complex *, integer *, complex *,
40 	    integer *, ftnlen, ftnlen, ftnlen, ftnlen), cgbtf2_(integer *,
41 	    integer *, integer *, integer *, complex *, integer *, integer *,
42 	    integer *);
43     extern integer icamax_(integer *, complex *, integer *);
44     extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
45     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
46 	    integer *, integer *, ftnlen, ftnlen);
47     extern /* Subroutine */ int claswp_(integer *, complex *, integer *,
48 	    integer *, integer *, integer *, integer *);
49 
50 
51 /*  -- LAPACK routine (version 3.0) -- */
52 /*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
53 /*     Courant Institute, Argonne National Lab, and Rice University */
54 /*     September 30, 1994 */
55 
56 /*     .. Scalar Arguments .. */
57 /*     .. */
58 /*     .. Array Arguments .. */
59 /*     .. */
60 
61 /*  Purpose */
62 /*  ======= */
63 
64 /*  CGBTRF computes an LU factorization of a complex m-by-n band matrix A */
65 /*  using partial pivoting with row interchanges. */
66 
67 /*  This is the blocked version of the algorithm, calling Level 3 BLAS. */
68 
69 /*  Arguments */
70 /*  ========= */
71 
72 /*  M       (input) INTEGER */
73 /*          The number of rows of the matrix A.  M >= 0. */
74 
75 /*  N       (input) INTEGER */
76 /*          The number of columns of the matrix A.  N >= 0. */
77 
78 /*  KL      (input) INTEGER */
79 /*          The number of subdiagonals within the band of A.  KL >= 0. */
80 
81 /*  KU      (input) INTEGER */
82 /*          The number of superdiagonals within the band of A.  KU >= 0. */
83 
84 /*  AB      (input/output) COMPLEX array, dimension (LDAB,N) */
85 /*          On entry, the matrix A in band storage, in rows KL+1 to */
86 /*          2*KL+KU+1; rows 1 to KL of the array need not be set. */
87 /*          The j-th column of A is stored in the j-th column of the */
88 /*          array AB as follows: */
89 /*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */
90 
91 /*          On exit, details of the factorization: U is stored as an */
92 /*          upper triangular band matrix with KL+KU superdiagonals in */
93 /*          rows 1 to KL+KU+1, and the multipliers used during the */
94 /*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */
95 /*          See below for further details. */
96 
97 /*  LDAB    (input) INTEGER */
98 /*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1. */
99 
100 /*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
101 /*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
102 /*          matrix was interchanged with row IPIV(i). */
103 
104 /*  INFO    (output) INTEGER */
105 /*          = 0: successful exit */
106 /*          < 0: if INFO = -i, the i-th argument had an illegal value */
107 /*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */
108 /*               has been completed, but the factor U is exactly */
109 /*               singular, and division by zero will occur if it is used */
110 /*               to solve a system of equations. */
111 
112 /*  Further Details */
113 /*  =============== */
114 
115 /*  The band storage scheme is illustrated by the following example, when */
116 /*  M = N = 6, KL = 2, KU = 1: */
117 
118 /*  On entry:                       On exit: */
119 
120 /*      *    *    *    +    +    +       *    *    *   u14  u25  u36 */
121 /*      *    *    +    +    +    +       *    *   u13  u24  u35  u46 */
122 /*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
123 /*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */
124 /*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   * */
125 /*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    * */
126 
127 /*  Array elements marked * are not used by the routine; elements marked */
128 /*  + need not be set on entry, but are required by the routine to store */
129 /*  elements of U because of fill-in resulting from the row interchanges. */
130 
131 /*  ===================================================================== */
132 
133 /*     .. Parameters .. */
134 /*     .. */
135 /*     .. Local Scalars .. */
136 /*     .. */
137 /*     .. Local Arrays .. */
138 /*     .. */
139 /*     .. External Functions .. */
140 /*     .. */
141 /*     .. External Subroutines .. */
142 /*     .. */
143 /*     .. Intrinsic Functions .. */
144 /*     .. */
145 /*     .. Executable Statements .. */
146 
147 /*     KV is the number of superdiagonals in the factor U, allowing for */
148 /*     fill-in */
149 
150     /* Parameter adjustments */
151     ab_dim1 = *ldab;
152     ab_offset = 1 + ab_dim1;
153     ab -= ab_offset;
154     --ipiv;
155 
156     /* Function Body */
157     kv = *ku + *kl;
158 
159 /*     Test the input parameters. */
160 
161     *info = 0;
162     if (*m < 0) {
163 	*info = -1;
164     } else if (*n < 0) {
165 	*info = -2;
166     } else if (*kl < 0) {
167 	*info = -3;
168     } else if (*ku < 0) {
169 	*info = -4;
170     } else if (*ldab < *kl + kv + 1) {
171 	*info = -6;
172     }
173     if (*info != 0) {
174 	i__1 = -(*info);
175 	xerbla_("CGBTRF", &i__1, (ftnlen)6);
176 	return 0;
177     }
178 
179 /*     Quick return if possible */
180 
181     if (*m == 0 || *n == 0) {
182 	return 0;
183     }
184 
185 /*     Determine the block size for this environment */
186 
187     nb = ilaenv_(&c__1, "CGBTRF", " ", m, n, kl, ku, (ftnlen)6, (ftnlen)1);
188 
189 /*     The block size must not exceed the limit set by the size of the */
190 /*     local arrays WORK13 and WORK31. */
191 
192     nb = min(nb,64);
193 
194     if (nb <= 1 || nb > *kl) {
195 
196 /*        Use unblocked code */
197 
198 	cgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info);
199     } else {
200 
201 /*        Use blocked code */
202 
203 /*        Zero the superdiagonal elements of the work array WORK13 */
204 
205 	i__1 = nb;
206 	for (j = 1; j <= i__1; ++j) {
207 	    i__2 = j - 1;
208 	    for (i__ = 1; i__ <= i__2; ++i__) {
209 		i__3 = i__ + j * 65 - 66;
210 		work13[i__3].r = 0.f, work13[i__3].i = 0.f;
211 /* L10: */
212 	    }
213 /* L20: */
214 	}
215 
216 /*        Zero the subdiagonal elements of the work array WORK31 */
217 
218 	i__1 = nb;
219 	for (j = 1; j <= i__1; ++j) {
220 	    i__2 = nb;
221 	    for (i__ = j + 1; i__ <= i__2; ++i__) {
222 		i__3 = i__ + j * 65 - 66;
223 		work31[i__3].r = 0.f, work31[i__3].i = 0.f;
224 /* L30: */
225 	    }
226 /* L40: */
227 	}
228 
229 /*        Gaussian elimination with partial pivoting */
230 
231 /*        Set fill-in elements in columns KU+2 to KV to zero */
232 
233 	i__1 = min(kv,*n);
234 	for (j = *ku + 2; j <= i__1; ++j) {
235 	    i__2 = *kl;
236 	    for (i__ = kv - j + 2; i__ <= i__2; ++i__) {
237 		i__3 = i__ + j * ab_dim1;
238 		ab[i__3].r = 0.f, ab[i__3].i = 0.f;
239 /* L50: */
240 	    }
241 /* L60: */
242 	}
243 
244 /*        JU is the index of the last column affected by the current */
245 /*        stage of the factorization */
246 
247 	ju = 1;
248 
249 	i__1 = min(*m,*n);
250 	i__2 = nb;
251 	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
252 /* Computing MIN */
253 	    i__3 = nb, i__4 = min(*m,*n) - j + 1;
254 	    jb = min(i__3,i__4);
255 
256 /*           The active part of the matrix is partitioned */
257 
258 /*              A11   A12   A13 */
259 /*              A21   A22   A23 */
260 /*              A31   A32   A33 */
261 
262 /*           Here A11, A21 and A31 denote the current block of JB columns */
263 /*           which is about to be factorized. The number of rows in the */
264 /*           partitioning are JB, I2, I3 respectively, and the numbers */
265 /*           of columns are JB, J2, J3. The superdiagonal elements of A13 */
266 /*           and the subdiagonal elements of A31 lie outside the band. */
267 
268 /* Computing MIN */
269 	    i__3 = *kl - jb, i__4 = *m - j - jb + 1;
270 	    i2 = min(i__3,i__4);
271 /* Computing MIN */
272 	    i__3 = jb, i__4 = *m - j - *kl + 1;
273 	    i3 = min(i__3,i__4);
274 
275 /*           J2 and J3 are computed after JU has been updated. */
276 
277 /*           Factorize the current block of JB columns */
278 
279 	    i__3 = j + jb - 1;
280 	    for (jj = j; jj <= i__3; ++jj) {
281 
282 /*              Set fill-in elements in column JJ+KV to zero */
283 
284 		if (jj + kv <= *n) {
285 		    i__4 = *kl;
286 		    for (i__ = 1; i__ <= i__4; ++i__) {
287 			i__5 = i__ + (jj + kv) * ab_dim1;
288 			ab[i__5].r = 0.f, ab[i__5].i = 0.f;
289 /* L70: */
290 		    }
291 		}
292 
293 /*              Find pivot and test for singularity. KM is the number of */
294 /*              subdiagonal elements in the current column. */
295 
296 /* Computing MIN */
297 		i__4 = *kl, i__5 = *m - jj;
298 		km = min(i__4,i__5);
299 		i__4 = km + 1;
300 		jp = icamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1);
301 		ipiv[jj] = jp + jj - j;
302 		i__4 = kv + jp + jj * ab_dim1;
303 		if (ab[i__4].r != 0.f || ab[i__4].i != 0.f) {
304 /* Computing MAX */
305 /* Computing MIN */
306 		    i__6 = jj + *ku + jp - 1;
307 		    i__4 = ju, i__5 = min(i__6,*n);
308 		    ju = max(i__4,i__5);
309 		    if (jp != 1) {
310 
311 /*                    Apply interchange to columns J to J+JB-1 */
312 
313 			if (jp + jj - 1 < j + *kl) {
314 
315 			    i__4 = *ldab - 1;
316 			    i__5 = *ldab - 1;
317 			    cswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], &
318 				    i__4, &ab[kv + jp + jj - j + j * ab_dim1],
319 				     &i__5);
320 			} else {
321 
322 /*                       The interchange affects columns J to JJ-1 of A31 */
323 /*                       which are stored in the work array WORK31 */
324 
325 			    i__4 = jj - j;
326 			    i__5 = *ldab - 1;
327 			    cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1],
328 				    &i__5, &work31[jp + jj - j - *kl - 1], &
329 				    c__65);
330 			    i__4 = j + jb - jj;
331 			    i__5 = *ldab - 1;
332 			    i__6 = *ldab - 1;
333 			    cswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, &
334 				    ab[kv + jp + jj * ab_dim1], &i__6);
335 			}
336 		    }
337 
338 /*                 Compute multipliers */
339 
340 		    c_div(&q__1, &c_b1, &ab[kv + 1 + jj * ab_dim1]);
341 		    cscal_(&km, &q__1, &ab[kv + 2 + jj * ab_dim1], &c__1);
342 
343 /*                 Update trailing submatrix within the band and within */
344 /*                 the current block. JM is the index of the last column */
345 /*                 which needs to be updated. */
346 
347 /* Computing MIN */
348 		    i__4 = ju, i__5 = j + jb - 1;
349 		    jm = min(i__4,i__5);
350 		    if (jm > jj) {
351 			i__4 = jm - jj;
352 			q__1.r = -1.f, q__1.i = -0.f;
353 			i__5 = *ldab - 1;
354 			i__6 = *ldab - 1;
355 			cgeru_(&km, &i__4, &q__1, &ab[kv + 2 + jj * ab_dim1],
356 				&c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, &
357 				ab[kv + 1 + (jj + 1) * ab_dim1], &i__6);
358 		    }
359 		} else {
360 
361 /*                 If pivot is zero, set INFO to the index of the pivot */
362 /*                 unless a zero pivot has already been found. */
363 
364 		    if (*info == 0) {
365 			*info = jj;
366 		    }
367 		}
368 
369 /*              Copy current column of A31 into the work array WORK31 */
370 
371 /* Computing MIN */
372 		i__4 = jj - j + 1;
373 		nw = min(i__4,i3);
374 		if (nw > 0) {
375 		    ccopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], &
376 			    c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
377 		}
378 /* L80: */
379 	    }
380 	    if (j + jb <= *n) {
381 
382 /*              Apply the row interchanges to the other blocks. */
383 
384 /* Computing MIN */
385 		i__3 = ju - j + 1;
386 		j2 = min(i__3,kv) - jb;
387 /* Computing MAX */
388 		i__3 = 0, i__4 = ju - j - kv + 1;
389 		j3 = max(i__3,i__4);
390 
391 /*              Use CLASWP to apply the row interchanges to A12, A22, and */
392 /*              A32. */
393 
394 		i__3 = *ldab - 1;
395 		claswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &
396 			c__1, &jb, &ipiv[j], &c__1);
397 
398 /*              Adjust the pivot indices. */
399 
400 		i__3 = j + jb - 1;
401 		for (i__ = j; i__ <= i__3; ++i__) {
402 		    ipiv[i__] = ipiv[i__] + j - 1;
403 /* L90: */
404 		}
405 
406 /*              Apply the row interchanges to A13, A23, and A33 */
407 /*              columnwise. */
408 
409 		k2 = j - 1 + jb + j2;
410 		i__3 = j3;
411 		for (i__ = 1; i__ <= i__3; ++i__) {
412 		    jj = k2 + i__;
413 		    i__4 = j + jb - 1;
414 		    for (ii = j + i__ - 1; ii <= i__4; ++ii) {
415 			ip = ipiv[ii];
416 			if (ip != ii) {
417 			    i__5 = kv + 1 + ii - jj + jj * ab_dim1;
418 			    temp.r = ab[i__5].r, temp.i = ab[i__5].i;
419 			    i__5 = kv + 1 + ii - jj + jj * ab_dim1;
420 			    i__6 = kv + 1 + ip - jj + jj * ab_dim1;
421 			    ab[i__5].r = ab[i__6].r, ab[i__5].i = ab[i__6].i;
422 			    i__5 = kv + 1 + ip - jj + jj * ab_dim1;
423 			    ab[i__5].r = temp.r, ab[i__5].i = temp.i;
424 			}
425 /* L100: */
426 		    }
427 /* L110: */
428 		}
429 
430 /*              Update the relevant part of the trailing submatrix */
431 
432 		if (j2 > 0) {
433 
434 /*                 Update A12 */
435 
436 		    i__3 = *ldab - 1;
437 		    i__4 = *ldab - 1;
438 		    ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2,
439 			    &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv +
440 			    1 - jb + (j + jb) * ab_dim1], &i__4, (ftnlen)4, (
441 			    ftnlen)5, (ftnlen)12, (ftnlen)4);
442 
443 		    if (i2 > 0) {
444 
445 /*                    Update A22 */
446 
447 			q__1.r = -1.f, q__1.i = -0.f;
448 			i__3 = *ldab - 1;
449 			i__4 = *ldab - 1;
450 			i__5 = *ldab - 1;
451 			cgemm_("No transpose", "No transpose", &i2, &j2, &jb,
452 				&q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
453 				&ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4,
454 				&c_b1, &ab[kv + 1 + (j + jb) * ab_dim1], &
455 				i__5, (ftnlen)12, (ftnlen)12);
456 		    }
457 
458 		    if (i3 > 0) {
459 
460 /*                    Update A32 */
461 
462 			q__1.r = -1.f, q__1.i = -0.f;
463 			i__3 = *ldab - 1;
464 			i__4 = *ldab - 1;
465 			cgemm_("No transpose", "No transpose", &i3, &j2, &jb,
466 				&q__1, work31, &c__65, &ab[kv + 1 - jb + (j +
467 				jb) * ab_dim1], &i__3, &c_b1, &ab[kv + *kl +
468 				1 - jb + (j + jb) * ab_dim1], &i__4, (ftnlen)
469 				12, (ftnlen)12);
470 		    }
471 		}
472 
473 		if (j3 > 0) {
474 
475 /*                 Copy the lower triangle of A13 into the work array */
476 /*                 WORK13 */
477 
478 		    i__3 = j3;
479 		    for (jj = 1; jj <= i__3; ++jj) {
480 			i__4 = jb;
481 			for (ii = jj; ii <= i__4; ++ii) {
482 			    i__5 = ii + jj * 65 - 66;
483 			    i__6 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
484 			    work13[i__5].r = ab[i__6].r, work13[i__5].i = ab[
485 				    i__6].i;
486 /* L120: */
487 			}
488 /* L130: */
489 		    }
490 
491 /*                 Update A13 in the work array */
492 
493 		    i__3 = *ldab - 1;
494 		    ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3,
495 			    &c_b1, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &
496 			    c__65, (ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)
497 			    4);
498 
499 		    if (i2 > 0) {
500 
501 /*                    Update A23 */
502 
503 			q__1.r = -1.f, q__1.i = -0.f;
504 			i__3 = *ldab - 1;
505 			i__4 = *ldab - 1;
506 			cgemm_("No transpose", "No transpose", &i2, &j3, &jb,
507 				&q__1, &ab[kv + 1 + jb + j * ab_dim1], &i__3,
508 				work13, &c__65, &c_b1, &ab[jb + 1 + (j + kv) *
509 				 ab_dim1], &i__4, (ftnlen)12, (ftnlen)12);
510 		    }
511 
512 		    if (i3 > 0) {
513 
514 /*                    Update A33 */
515 
516 			q__1.r = -1.f, q__1.i = -0.f;
517 			i__3 = *ldab - 1;
518 			cgemm_("No transpose", "No transpose", &i3, &j3, &jb,
519 				&q__1, work31, &c__65, work13, &c__65, &c_b1,
520 				&ab[*kl + 1 + (j + kv) * ab_dim1], &i__3, (
521 				ftnlen)12, (ftnlen)12);
522 		    }
523 
524 /*                 Copy the lower triangle of A13 back into place */
525 
526 		    i__3 = j3;
527 		    for (jj = 1; jj <= i__3; ++jj) {
528 			i__4 = jb;
529 			for (ii = jj; ii <= i__4; ++ii) {
530 			    i__5 = ii - jj + 1 + (jj + j + kv - 1) * ab_dim1;
531 			    i__6 = ii + jj * 65 - 66;
532 			    ab[i__5].r = work13[i__6].r, ab[i__5].i = work13[
533 				    i__6].i;
534 /* L140: */
535 			}
536 /* L150: */
537 		    }
538 		}
539 	    } else {
540 
541 /*              Adjust the pivot indices. */
542 
543 		i__3 = j + jb - 1;
544 		for (i__ = j; i__ <= i__3; ++i__) {
545 		    ipiv[i__] = ipiv[i__] + j - 1;
546 /* L160: */
547 		}
548 	    }
549 
550 /*           Partially undo the interchanges in the current block to */
551 /*           restore the upper triangular form of A31 and copy the upper */
552 /*           triangle of A31 back into place */
553 
554 	    i__3 = j;
555 	    for (jj = j + jb - 1; jj >= i__3; --jj) {
556 		jp = ipiv[jj] - jj + 1;
557 		if (jp != 1) {
558 
559 /*                 Apply interchange to columns J to JJ-1 */
560 
561 		    if (jp + jj - 1 < j + *kl) {
562 
563 /*                    The interchange does not affect A31 */
564 
565 			i__4 = jj - j;
566 			i__5 = *ldab - 1;
567 			i__6 = *ldab - 1;
568 			cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
569 				i__5, &ab[kv + jp + jj - j + j * ab_dim1], &
570 				i__6);
571 		    } else {
572 
573 /*                    The interchange does affect A31 */
574 
575 			i__4 = jj - j;
576 			i__5 = *ldab - 1;
577 			cswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &
578 				i__5, &work31[jp + jj - j - *kl - 1], &c__65);
579 		    }
580 		}
581 
582 /*              Copy the current column of A31 back into place */
583 
584 /* Computing MIN */
585 		i__4 = i3, i__5 = jj - j + 1;
586 		nw = min(i__4,i__5);
587 		if (nw > 0) {
588 		    ccopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[
589 			    kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1);
590 		}
591 /* L170: */
592 	    }
593 /* L180: */
594 	}
595     }
596 
597     return 0;
598 
599 /*     End of CGBTRF */
600 
601 } /* cgbtrf_ */
602 
603