1 
2 /*! @file dsp_blas2.c
3  * \brief Sparse BLAS 2, using some dense BLAS 2 operations
4  *
5  * <pre>
6  * -- SuperLU routine (version 3.0) --
7  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
8  * and Lawrence Berkeley National Lab.
9  * October 15, 2003
10  * </pre>
11  */
12 /*
13  * File name:		dsp_blas2.c
14  * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
15  */
16 
17 #include "slu_ddefs.h"
18 
19 /*
20  * Function prototypes
21  */
22 void dusolve(int, int, double*, double*);
23 void dlsolve(int, int, double*, double*);
24 void dmatvec(int, int, int, double*, double*, double*);
25 
26 /*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
27  *
28  * <pre>
29  *   Purpose
30  *   =======
31  *
32  *   sp_dtrsv() solves one of the systems of equations
33  *       A*x = b,   or   A'*x = b,
34  *   where b and x are n element vectors and A is a sparse unit , or
35  *   non-unit, upper or lower triangular matrix.
36  *   No test for singularity or near-singularity is included in this
37  *   routine. Such tests must be performed before calling this routine.
38  *
39  *   Parameters
40  *   ==========
41  *
42  *   uplo   - (input) char*
43  *            On entry, uplo specifies whether the matrix is an upper or
44  *             lower triangular matrix as follows:
45  *                uplo = 'U' or 'u'   A is an upper triangular matrix.
46  *                uplo = 'L' or 'l'   A is a lower triangular matrix.
47  *
48  *   trans  - (input) char*
49  *             On entry, trans specifies the equations to be solved as
50  *             follows:
51  *                trans = 'N' or 'n'   A*x = b.
52  *                trans = 'T' or 't'   A'*x = b.
53  *                trans = 'C' or 'c'   A'*x = b.
54  *
55  *   diag   - (input) char*
56  *             On entry, diag specifies whether or not A is unit
57  *             triangular as follows:
58  *                diag = 'U' or 'u'   A is assumed to be unit triangular.
59  *                diag = 'N' or 'n'   A is not assumed to be unit
60  *                                    triangular.
61  *
62  *   L       - (input) SuperMatrix*
63  *	       The factor L from the factorization Pr*A*Pc=L*U. Use
64  *             compressed row subscripts storage for supernodes,
65  *             i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
66  *
67  *   U       - (input) SuperMatrix*
68  *	        The factor U from the factorization Pr*A*Pc=L*U.
69  *	        U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
70  *
71  *   x       - (input/output) double*
72  *             Before entry, the incremented array X must contain the n
73  *             element right-hand side vector b. On exit, X is overwritten
74  *             with the solution vector x.
75  *
76  *   info    - (output) int*
77  *             If *info = -i, the i-th argument had an illegal value.
78  * </pre>
79  */
80 int
sp_dtrsv(char * uplo,char * trans,char * diag,SuperMatrix * L,SuperMatrix * U,double * x,SuperLUStat_t * stat,int * info)81 sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
82          SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
83 {
84 #ifdef _CRAY
85     _fcd ftcs1 = _cptofcd("L", strlen("L")),
86 	 ftcs2 = _cptofcd("N", strlen("N")),
87 	 ftcs3 = _cptofcd("U", strlen("U"));
88 #endif
89     SCformat *Lstore;
90     NCformat *Ustore;
91     double   *Lval, *Uval;
92     int incx = 1, incy = 1;
93     double alpha = 1.0, beta = 1.0;
94     int nrow;
95     int fsupc, nsupr, nsupc, luptr, istart, irow;
96     int i, k, iptr, jcol;
97     double *work;
98     flops_t solve_ops;
99 
100     /* Test the input parameters */
101     *info = 0;
102     if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
103     else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
104               !lsame_(trans, "C")) *info = -2;
105     else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
106     else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
107     else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
108     if ( *info ) {
109 	i = -(*info);
110 	xerbla_("sp_dtrsv", &i);
111 	return 0;
112     }
113 
114     Lstore = L->Store;
115     Lval = Lstore->nzval;
116     Ustore = U->Store;
117     Uval = Ustore->nzval;
118     solve_ops = 0;
119 
120     if ( !(work = doubleCalloc(L->nrow)) )
121 	ABORT("Malloc fails for work in sp_dtrsv().");
122 
123     if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
124 
125 	if ( lsame_(uplo, "L") ) {
126 	    /* Form x := inv(L)*x */
127     	    if ( L->nrow == 0 ) return 0; /* Quick return */
128 
129 	    for (k = 0; k <= Lstore->nsuper; k++) {
130 		fsupc = L_FST_SUPC(k);
131 		istart = L_SUB_START(fsupc);
132 		nsupr = L_SUB_START(fsupc+1) - istart;
133 		nsupc = L_FST_SUPC(k+1) - fsupc;
134 		luptr = L_NZ_START(fsupc);
135 		nrow = nsupr - nsupc;
136 
137 	        solve_ops += nsupc * (nsupc - 1);
138 	        solve_ops += 2 * nrow * nsupc;
139 
140 		if ( nsupc == 1 ) {
141 		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
142 			irow = L_SUB(iptr);
143 			++luptr;
144 			x[irow] -= x[fsupc] * Lval[luptr];
145 		    }
146 		} else {
147 #ifdef USE_VENDOR_BLAS
148 #ifdef _CRAY
149 		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
150 		       	&x[fsupc], &incx);
151 
152 		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
153 		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
154 #else
155 		    dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
156 		       	&x[fsupc], &incx);
157 
158 		    dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
159 		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
160 #endif
161 #else
162 		    dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
163 
164 		    dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
165                              &x[fsupc], &work[0] );
166 #endif
167 
168 		    iptr = istart + nsupc;
169 		    for (i = 0; i < nrow; ++i, ++iptr) {
170 			irow = L_SUB(iptr);
171 			x[irow] -= work[i];	/* Scatter */
172 			work[i] = 0.0;
173 
174 		    }
175 	 	}
176 	    } /* for k ... */
177 
178 	} else {
179 	    /* Form x := inv(U)*x */
180 
181 	    if ( U->nrow == 0 ) return 0; /* Quick return */
182 
183 	    for (k = Lstore->nsuper; k >= 0; k--) {
184 	    	fsupc = L_FST_SUPC(k);
185 	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
186 	    	nsupc = L_FST_SUPC(k+1) - fsupc;
187 	    	luptr = L_NZ_START(fsupc);
188 
189     	        solve_ops += nsupc * (nsupc + 1);
190 
191 		if ( nsupc == 1 ) {
192 		    x[fsupc] /= Lval[luptr];
193 		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
194 			irow = U_SUB(i);
195 			x[irow] -= x[fsupc] * Uval[i];
196 		    }
197 		} else {
198 #ifdef USE_VENDOR_BLAS
199 #ifdef _CRAY
200 		    STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
201 		       &x[fsupc], &incx);
202 #else
203 		    dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
204                            &x[fsupc], &incx);
205 #endif
206 #else
207 		    dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
208 #endif
209 
210 		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
211 		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
212 		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
213 				i++) {
214 			    irow = U_SUB(i);
215 			    x[irow] -= x[jcol] * Uval[i];
216 		    	}
217                     }
218 		}
219 	    } /* for k ... */
220 
221 	}
222     } else { /* Form x := inv(A')*x */
223 
224 	if ( lsame_(uplo, "L") ) {
225 	    /* Form x := inv(L')*x */
226     	    if ( L->nrow == 0 ) return 0; /* Quick return */
227 
228 	    for (k = Lstore->nsuper; k >= 0; --k) {
229 	    	fsupc = L_FST_SUPC(k);
230 	    	istart = L_SUB_START(fsupc);
231 	    	nsupr = L_SUB_START(fsupc+1) - istart;
232 	    	nsupc = L_FST_SUPC(k+1) - fsupc;
233 	    	luptr = L_NZ_START(fsupc);
234 
235 		solve_ops += 2 * (nsupr - nsupc) * nsupc;
236 
237 		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
238 		    iptr = istart + nsupc;
239 		    for (i = L_NZ_START(jcol) + nsupc;
240 				i < L_NZ_START(jcol+1); i++) {
241 			irow = L_SUB(iptr);
242 			x[jcol] -= x[irow] * Lval[i];
243 			iptr++;
244 		    }
245 		}
246 
247 		if ( nsupc > 1 ) {
248 		    solve_ops += nsupc * (nsupc - 1);
249 #ifdef _CRAY
250                     ftcs1 = _cptofcd("L", strlen("L"));
251                     ftcs2 = _cptofcd("T", strlen("T"));
252                     ftcs3 = _cptofcd("U", strlen("U"));
253 		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
254 			&x[fsupc], &incx);
255 #else
256 		    dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
257 			&x[fsupc], &incx);
258 #endif
259 		}
260 	    }
261 	} else {
262 	    /* Form x := inv(U')*x */
263 	    if ( U->nrow == 0 ) return 0; /* Quick return */
264 
265 	    for (k = 0; k <= Lstore->nsuper; k++) {
266 	    	fsupc = L_FST_SUPC(k);
267 	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
268 	    	nsupc = L_FST_SUPC(k+1) - fsupc;
269 	    	luptr = L_NZ_START(fsupc);
270 
271 		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
272 		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
273 		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
274 			irow = U_SUB(i);
275 			x[jcol] -= x[irow] * Uval[i];
276 		    }
277 		}
278 
279 		solve_ops += nsupc * (nsupc + 1);
280 
281 		if ( nsupc == 1 ) {
282 		    x[fsupc] /= Lval[luptr];
283 		} else {
284 #ifdef _CRAY
285                     ftcs1 = _cptofcd("U", strlen("U"));
286                     ftcs2 = _cptofcd("T", strlen("T"));
287                     ftcs3 = _cptofcd("N", strlen("N"));
288 		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
289 			    &x[fsupc], &incx);
290 #else
291 		    dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
292 			    &x[fsupc], &incx);
293 #endif
294 		}
295 	    } /* for k ... */
296 	}
297     }
298 
299     stat->ops[SOLVE] += solve_ops;
300     SUPERLU_FREE(work);
301     return 0;
302 }
303 
304 
305 
306 /*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
307  *
308  * <pre>
309  *   Purpose
310  *   =======
311  *
312  *   sp_dgemv()  performs one of the matrix-vector operations
313  *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
314  *   where alpha and beta are scalars, x and y are vectors and A is a
315  *   sparse A->nrow by A->ncol matrix.
316  *
317  *   Parameters
318  *   ==========
319  *
320  *   TRANS  - (input) char*
321  *            On entry, TRANS specifies the operation to be performed as
322  *            follows:
323  *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
324  *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
325  *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
326  *
327  *   ALPHA  - (input) double
328  *            On entry, ALPHA specifies the scalar alpha.
329  *
330  *   A      - (input) SuperMatrix*
331  *            Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
332  *            Currently, the type of A can be:
333  *                Stype = NC or NCP; Dtype = SLU_D; Mtype = GE.
334  *            In the future, more general A can be handled.
335  *
336  *   X      - (input) double*, array of DIMENSION at least
337  *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
338  *            and at least
339  *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
340  *            Before entry, the incremented array X must contain the
341  *            vector x.
342  *
343  *   INCX   - (input) int
344  *            On entry, INCX specifies the increment for the elements of
345  *            X. INCX must not be zero.
346  *
347  *   BETA   - (input) double
348  *            On entry, BETA specifies the scalar beta. When BETA is
349  *            supplied as zero then Y need not be set on input.
350  *
351  *   Y      - (output) double*,  array of DIMENSION at least
352  *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
353  *            and at least
354  *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
355  *            Before entry with BETA non-zero, the incremented array Y
356  *            must contain the vector y. On exit, Y is overwritten by the
357  *            updated vector y.
358  *
359  *   INCY   - (input) int
360  *            On entry, INCY specifies the increment for the elements of
361  *            Y. INCY must not be zero.
362  *
363  *   ==== Sparse Level 2 Blas routine.
364  * </pre>
365  */
366 
367 int
sp_dgemv(char * trans,double alpha,SuperMatrix * A,double * x,int incx,double beta,double * y,int incy)368 sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x,
369 	 int incx, double beta, double *y, int incy)
370 {
371     /* Local variables */
372     NCformat *Astore;
373     double   *Aval;
374     int info;
375     double temp;
376     int lenx, leny, i, j, irow;
377     int iy, jx, jy, kx, ky;
378     int notran;
379 
380     notran = lsame_(trans, "N");
381     Astore = A->Store;
382     Aval = Astore->nzval;
383 
384     /* Test the input parameters */
385     info = 0;
386     if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
387     else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
388     else if (incx == 0) info = 5;
389     else if (incy == 0)	info = 8;
390     if (info != 0) {
391 	xerbla_("sp_dgemv ", &info);
392 	return 0;
393     }
394 
395     /* Quick return if possible. */
396     if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
397 	return 0;
398 
399     /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set
400        up the start points in  X  and  Y. */
401     if (lsame_(trans, "N")) {
402 	lenx = A->ncol;
403 	leny = A->nrow;
404     } else {
405 	lenx = A->nrow;
406 	leny = A->ncol;
407     }
408     if (incx > 0) kx = 0;
409     else kx =  - (lenx - 1) * incx;
410     if (incy > 0) ky = 0;
411     else ky =  - (leny - 1) * incy;
412 
413     /* Start the operations. In this version the elements of A are
414        accessed sequentially with one pass through A. */
415     /* First form  y := beta*y. */
416     if (beta != 1.) {
417 	if (incy == 1) {
418 	    if (beta == 0.)
419 		for (i = 0; i < leny; ++i) y[i] = 0.;
420 	    else
421 		for (i = 0; i < leny; ++i) y[i] = beta * y[i];
422 	} else {
423 	    iy = ky;
424 	    if (beta == 0.)
425 		for (i = 0; i < leny; ++i) {
426 		    y[iy] = 0.;
427 		    iy += incy;
428 		}
429 	    else
430 		for (i = 0; i < leny; ++i) {
431 		    y[iy] = beta * y[iy];
432 		    iy += incy;
433 		}
434 	}
435     }
436 
437     if (alpha == 0.) return 0;
438 
439     if ( notran ) {
440 	/* Form  y := alpha*A*x + y. */
441 	jx = kx;
442 	if (incy == 1) {
443 	    for (j = 0; j < A->ncol; ++j) {
444 		if (x[jx] != 0.) {
445 		    temp = alpha * x[jx];
446 		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
447 			irow = Astore->rowind[i];
448 			y[irow] += temp * Aval[i];
449 		    }
450 		}
451 		jx += incx;
452 	    }
453 	} else {
454 	    ABORT("Not implemented.");
455 	}
456     } else {
457 	/* Form  y := alpha*A'*x + y. */
458 	jy = ky;
459 	if (incx == 1) {
460 	    for (j = 0; j < A->ncol; ++j) {
461 		temp = 0.;
462 		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
463 		    irow = Astore->rowind[i];
464 		    temp += Aval[i] * x[irow];
465 		}
466 		y[jy] += alpha * temp;
467 		jy += incy;
468 	    }
469 	} else {
470 	    ABORT("Not implemented.");
471 	}
472     }
473     return 0;
474 } /* sp_dgemv */
475 
476 
477 
478