1
2 /*! @file dsp_blas2.c
3 * \brief Sparse BLAS 2, using some dense BLAS 2 operations
4 *
5 * <pre>
6 * -- SuperLU routine (version 3.0) --
7 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
8 * and Lawrence Berkeley National Lab.
9 * October 15, 2003
10 * </pre>
11 */
12 /*
13 * File name: dsp_blas2.c
14 * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
15 */
16
17 #include "slu_ddefs.h"
18
19 /*
20 * Function prototypes
21 */
22 void dusolve(int, int, double*, double*);
23 void dlsolve(int, int, double*, double*);
24 void dmatvec(int, int, int, double*, double*, double*);
25
26 /*! \brief Solves one of the systems of equations A*x = b, or A'*x = b
27 *
28 * <pre>
29 * Purpose
30 * =======
31 *
32 * sp_dtrsv() solves one of the systems of equations
33 * A*x = b, or A'*x = b,
34 * where b and x are n element vectors and A is a sparse unit , or
35 * non-unit, upper or lower triangular matrix.
36 * No test for singularity or near-singularity is included in this
37 * routine. Such tests must be performed before calling this routine.
38 *
39 * Parameters
40 * ==========
41 *
42 * uplo - (input) char*
43 * On entry, uplo specifies whether the matrix is an upper or
44 * lower triangular matrix as follows:
45 * uplo = 'U' or 'u' A is an upper triangular matrix.
46 * uplo = 'L' or 'l' A is a lower triangular matrix.
47 *
48 * trans - (input) char*
49 * On entry, trans specifies the equations to be solved as
50 * follows:
51 * trans = 'N' or 'n' A*x = b.
52 * trans = 'T' or 't' A'*x = b.
53 * trans = 'C' or 'c' A'*x = b.
54 *
55 * diag - (input) char*
56 * On entry, diag specifies whether or not A is unit
57 * triangular as follows:
58 * diag = 'U' or 'u' A is assumed to be unit triangular.
59 * diag = 'N' or 'n' A is not assumed to be unit
60 * triangular.
61 *
62 * L - (input) SuperMatrix*
63 * The factor L from the factorization Pr*A*Pc=L*U. Use
64 * compressed row subscripts storage for supernodes,
65 * i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
66 *
67 * U - (input) SuperMatrix*
68 * The factor U from the factorization Pr*A*Pc=L*U.
69 * U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
70 *
71 * x - (input/output) double*
72 * Before entry, the incremented array X must contain the n
73 * element right-hand side vector b. On exit, X is overwritten
74 * with the solution vector x.
75 *
76 * info - (output) int*
77 * If *info = -i, the i-th argument had an illegal value.
78 * </pre>
79 */
80 int
sp_dtrsv(char * uplo,char * trans,char * diag,SuperMatrix * L,SuperMatrix * U,double * x,SuperLUStat_t * stat,int * info)81 sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
82 SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
83 {
84 #ifdef _CRAY
85 _fcd ftcs1 = _cptofcd("L", strlen("L")),
86 ftcs2 = _cptofcd("N", strlen("N")),
87 ftcs3 = _cptofcd("U", strlen("U"));
88 #endif
89 SCformat *Lstore;
90 NCformat *Ustore;
91 double *Lval, *Uval;
92 int incx = 1, incy = 1;
93 double alpha = 1.0, beta = 1.0;
94 int nrow;
95 int fsupc, nsupr, nsupc, luptr, istart, irow;
96 int i, k, iptr, jcol;
97 double *work;
98 flops_t solve_ops;
99
100 /* Test the input parameters */
101 *info = 0;
102 if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
103 else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
104 !lsame_(trans, "C")) *info = -2;
105 else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
106 else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
107 else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
108 if ( *info ) {
109 i = -(*info);
110 xerbla_("sp_dtrsv", &i);
111 return 0;
112 }
113
114 Lstore = L->Store;
115 Lval = Lstore->nzval;
116 Ustore = U->Store;
117 Uval = Ustore->nzval;
118 solve_ops = 0;
119
120 if ( !(work = doubleCalloc(L->nrow)) )
121 ABORT("Malloc fails for work in sp_dtrsv().");
122
123 if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
124
125 if ( lsame_(uplo, "L") ) {
126 /* Form x := inv(L)*x */
127 if ( L->nrow == 0 ) return 0; /* Quick return */
128
129 for (k = 0; k <= Lstore->nsuper; k++) {
130 fsupc = L_FST_SUPC(k);
131 istart = L_SUB_START(fsupc);
132 nsupr = L_SUB_START(fsupc+1) - istart;
133 nsupc = L_FST_SUPC(k+1) - fsupc;
134 luptr = L_NZ_START(fsupc);
135 nrow = nsupr - nsupc;
136
137 solve_ops += nsupc * (nsupc - 1);
138 solve_ops += 2 * nrow * nsupc;
139
140 if ( nsupc == 1 ) {
141 for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
142 irow = L_SUB(iptr);
143 ++luptr;
144 x[irow] -= x[fsupc] * Lval[luptr];
145 }
146 } else {
147 #ifdef USE_VENDOR_BLAS
148 #ifdef _CRAY
149 STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
150 &x[fsupc], &incx);
151
152 SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
153 &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
154 #else
155 dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
156 &x[fsupc], &incx);
157
158 dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
159 &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
160 #endif
161 #else
162 dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
163
164 dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
165 &x[fsupc], &work[0] );
166 #endif
167
168 iptr = istart + nsupc;
169 for (i = 0; i < nrow; ++i, ++iptr) {
170 irow = L_SUB(iptr);
171 x[irow] -= work[i]; /* Scatter */
172 work[i] = 0.0;
173
174 }
175 }
176 } /* for k ... */
177
178 } else {
179 /* Form x := inv(U)*x */
180
181 if ( U->nrow == 0 ) return 0; /* Quick return */
182
183 for (k = Lstore->nsuper; k >= 0; k--) {
184 fsupc = L_FST_SUPC(k);
185 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
186 nsupc = L_FST_SUPC(k+1) - fsupc;
187 luptr = L_NZ_START(fsupc);
188
189 solve_ops += nsupc * (nsupc + 1);
190
191 if ( nsupc == 1 ) {
192 x[fsupc] /= Lval[luptr];
193 for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
194 irow = U_SUB(i);
195 x[irow] -= x[fsupc] * Uval[i];
196 }
197 } else {
198 #ifdef USE_VENDOR_BLAS
199 #ifdef _CRAY
200 STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
201 &x[fsupc], &incx);
202 #else
203 dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
204 &x[fsupc], &incx);
205 #endif
206 #else
207 dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
208 #endif
209
210 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
211 solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
212 for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
213 i++) {
214 irow = U_SUB(i);
215 x[irow] -= x[jcol] * Uval[i];
216 }
217 }
218 }
219 } /* for k ... */
220
221 }
222 } else { /* Form x := inv(A')*x */
223
224 if ( lsame_(uplo, "L") ) {
225 /* Form x := inv(L')*x */
226 if ( L->nrow == 0 ) return 0; /* Quick return */
227
228 for (k = Lstore->nsuper; k >= 0; --k) {
229 fsupc = L_FST_SUPC(k);
230 istart = L_SUB_START(fsupc);
231 nsupr = L_SUB_START(fsupc+1) - istart;
232 nsupc = L_FST_SUPC(k+1) - fsupc;
233 luptr = L_NZ_START(fsupc);
234
235 solve_ops += 2 * (nsupr - nsupc) * nsupc;
236
237 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
238 iptr = istart + nsupc;
239 for (i = L_NZ_START(jcol) + nsupc;
240 i < L_NZ_START(jcol+1); i++) {
241 irow = L_SUB(iptr);
242 x[jcol] -= x[irow] * Lval[i];
243 iptr++;
244 }
245 }
246
247 if ( nsupc > 1 ) {
248 solve_ops += nsupc * (nsupc - 1);
249 #ifdef _CRAY
250 ftcs1 = _cptofcd("L", strlen("L"));
251 ftcs2 = _cptofcd("T", strlen("T"));
252 ftcs3 = _cptofcd("U", strlen("U"));
253 STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
254 &x[fsupc], &incx);
255 #else
256 dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
257 &x[fsupc], &incx);
258 #endif
259 }
260 }
261 } else {
262 /* Form x := inv(U')*x */
263 if ( U->nrow == 0 ) return 0; /* Quick return */
264
265 for (k = 0; k <= Lstore->nsuper; k++) {
266 fsupc = L_FST_SUPC(k);
267 nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
268 nsupc = L_FST_SUPC(k+1) - fsupc;
269 luptr = L_NZ_START(fsupc);
270
271 for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
272 solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
273 for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
274 irow = U_SUB(i);
275 x[jcol] -= x[irow] * Uval[i];
276 }
277 }
278
279 solve_ops += nsupc * (nsupc + 1);
280
281 if ( nsupc == 1 ) {
282 x[fsupc] /= Lval[luptr];
283 } else {
284 #ifdef _CRAY
285 ftcs1 = _cptofcd("U", strlen("U"));
286 ftcs2 = _cptofcd("T", strlen("T"));
287 ftcs3 = _cptofcd("N", strlen("N"));
288 STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
289 &x[fsupc], &incx);
290 #else
291 dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
292 &x[fsupc], &incx);
293 #endif
294 }
295 } /* for k ... */
296 }
297 }
298
299 stat->ops[SOLVE] += solve_ops;
300 SUPERLU_FREE(work);
301 return 0;
302 }
303
304
305
306 /*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
307 *
308 * <pre>
309 * Purpose
310 * =======
311 *
312 * sp_dgemv() performs one of the matrix-vector operations
313 * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
314 * where alpha and beta are scalars, x and y are vectors and A is a
315 * sparse A->nrow by A->ncol matrix.
316 *
317 * Parameters
318 * ==========
319 *
320 * TRANS - (input) char*
321 * On entry, TRANS specifies the operation to be performed as
322 * follows:
323 * TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
324 * TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
325 * TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
326 *
327 * ALPHA - (input) double
328 * On entry, ALPHA specifies the scalar alpha.
329 *
330 * A - (input) SuperMatrix*
331 * Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
332 * Currently, the type of A can be:
333 * Stype = NC or NCP; Dtype = SLU_D; Mtype = GE.
334 * In the future, more general A can be handled.
335 *
336 * X - (input) double*, array of DIMENSION at least
337 * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
338 * and at least
339 * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
340 * Before entry, the incremented array X must contain the
341 * vector x.
342 *
343 * INCX - (input) int
344 * On entry, INCX specifies the increment for the elements of
345 * X. INCX must not be zero.
346 *
347 * BETA - (input) double
348 * On entry, BETA specifies the scalar beta. When BETA is
349 * supplied as zero then Y need not be set on input.
350 *
351 * Y - (output) double*, array of DIMENSION at least
352 * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
353 * and at least
354 * ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
355 * Before entry with BETA non-zero, the incremented array Y
356 * must contain the vector y. On exit, Y is overwritten by the
357 * updated vector y.
358 *
359 * INCY - (input) int
360 * On entry, INCY specifies the increment for the elements of
361 * Y. INCY must not be zero.
362 *
363 * ==== Sparse Level 2 Blas routine.
364 * </pre>
365 */
366
367 int
sp_dgemv(char * trans,double alpha,SuperMatrix * A,double * x,int incx,double beta,double * y,int incy)368 sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x,
369 int incx, double beta, double *y, int incy)
370 {
371 /* Local variables */
372 NCformat *Astore;
373 double *Aval;
374 int info;
375 double temp;
376 int lenx, leny, i, j, irow;
377 int iy, jx, jy, kx, ky;
378 int notran;
379
380 notran = lsame_(trans, "N");
381 Astore = A->Store;
382 Aval = Astore->nzval;
383
384 /* Test the input parameters */
385 info = 0;
386 if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
387 else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
388 else if (incx == 0) info = 5;
389 else if (incy == 0) info = 8;
390 if (info != 0) {
391 xerbla_("sp_dgemv ", &info);
392 return 0;
393 }
394
395 /* Quick return if possible. */
396 if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
397 return 0;
398
399 /* Set LENX and LENY, the lengths of the vectors x and y, and set
400 up the start points in X and Y. */
401 if (lsame_(trans, "N")) {
402 lenx = A->ncol;
403 leny = A->nrow;
404 } else {
405 lenx = A->nrow;
406 leny = A->ncol;
407 }
408 if (incx > 0) kx = 0;
409 else kx = - (lenx - 1) * incx;
410 if (incy > 0) ky = 0;
411 else ky = - (leny - 1) * incy;
412
413 /* Start the operations. In this version the elements of A are
414 accessed sequentially with one pass through A. */
415 /* First form y := beta*y. */
416 if (beta != 1.) {
417 if (incy == 1) {
418 if (beta == 0.)
419 for (i = 0; i < leny; ++i) y[i] = 0.;
420 else
421 for (i = 0; i < leny; ++i) y[i] = beta * y[i];
422 } else {
423 iy = ky;
424 if (beta == 0.)
425 for (i = 0; i < leny; ++i) {
426 y[iy] = 0.;
427 iy += incy;
428 }
429 else
430 for (i = 0; i < leny; ++i) {
431 y[iy] = beta * y[iy];
432 iy += incy;
433 }
434 }
435 }
436
437 if (alpha == 0.) return 0;
438
439 if ( notran ) {
440 /* Form y := alpha*A*x + y. */
441 jx = kx;
442 if (incy == 1) {
443 for (j = 0; j < A->ncol; ++j) {
444 if (x[jx] != 0.) {
445 temp = alpha * x[jx];
446 for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
447 irow = Astore->rowind[i];
448 y[irow] += temp * Aval[i];
449 }
450 }
451 jx += incx;
452 }
453 } else {
454 ABORT("Not implemented.");
455 }
456 } else {
457 /* Form y := alpha*A'*x + y. */
458 jy = ky;
459 if (incx == 1) {
460 for (j = 0; j < A->ncol; ++j) {
461 temp = 0.;
462 for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
463 irow = Astore->rowind[i];
464 temp += Aval[i] * x[irow];
465 }
466 y[jy] += alpha * temp;
467 jy += incy;
468 }
469 } else {
470 ABORT("Not implemented.");
471 }
472 }
473 return 0;
474 } /* sp_dgemv */
475
476
477
478