1 
2 /*! @file sutil.c
3  * \brief Matrix utility functions
4  *
5  * <pre>
6  * -- SuperLU routine (version 3.1) --
7  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
8  * and Lawrence Berkeley National Lab.
9  * August 1, 2008
10  *
11  * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
12  *
13  * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14  * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
15  *
16  * Permission is hereby granted to use or copy this program for any
17  * purpose, provided the above notices are retained on all copies.
18  * Permission to modify the code and to distribute modified code is
19  * granted, provided the above notices are retained, and a notice that
20  * the code was modified is included with the above copyright notice.
21  * </pre>
22  */
23 
24 
25 #include <math.h>
26 #include "slu_sdefs.h"
27 
28 void
sCreate_CompCol_Matrix(SuperMatrix * A,int m,int n,int nnz,float * nzval,int * rowind,int * colptr,Stype_t stype,Dtype_t dtype,Mtype_t mtype)29 sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
30 		       float *nzval, int *rowind, int *colptr,
31 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
32 {
33     NCformat *Astore;
34 
35     A->Stype = stype;
36     A->Dtype = dtype;
37     A->Mtype = mtype;
38     A->nrow = m;
39     A->ncol = n;
40     A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
41     if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
42     Astore = A->Store;
43     Astore->nnz = nnz;
44     Astore->nzval = nzval;
45     Astore->rowind = rowind;
46     Astore->colptr = colptr;
47 }
48 
49 void
sCreate_CompRow_Matrix(SuperMatrix * A,int m,int n,int nnz,float * nzval,int * colind,int * rowptr,Stype_t stype,Dtype_t dtype,Mtype_t mtype)50 sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
51 		       float *nzval, int *colind, int *rowptr,
52 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
53 {
54     NRformat *Astore;
55 
56     A->Stype = stype;
57     A->Dtype = dtype;
58     A->Mtype = mtype;
59     A->nrow = m;
60     A->ncol = n;
61     A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
62     if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
63     Astore = A->Store;
64     Astore->nnz = nnz;
65     Astore->nzval = nzval;
66     Astore->colind = colind;
67     Astore->rowptr = rowptr;
68 }
69 
70 /*! \brief Copy matrix A into matrix B. */
71 void
sCopy_CompCol_Matrix(SuperMatrix * A,SuperMatrix * B)72 sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
73 {
74     NCformat *Astore, *Bstore;
75     int      ncol, nnz, i;
76 
77     B->Stype = A->Stype;
78     B->Dtype = A->Dtype;
79     B->Mtype = A->Mtype;
80     B->nrow  = A->nrow;;
81     B->ncol  = ncol = A->ncol;
82     Astore   = (NCformat *) A->Store;
83     Bstore   = (NCformat *) B->Store;
84     Bstore->nnz = nnz = Astore->nnz;
85     for (i = 0; i < nnz; ++i)
86 	((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
87     for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
88     for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
89 }
90 
91 
92 void
sCreate_Dense_Matrix(SuperMatrix * X,int m,int n,float * x,int ldx,Stype_t stype,Dtype_t dtype,Mtype_t mtype)93 sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
94 		    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
95 {
96     DNformat    *Xstore;
97 
98     X->Stype = stype;
99     X->Dtype = dtype;
100     X->Mtype = mtype;
101     X->nrow = m;
102     X->ncol = n;
103     X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
104     if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
105     Xstore = (DNformat *) X->Store;
106     Xstore->lda = ldx;
107     Xstore->nzval = (float *) x;
108 }
109 
110 void
sCopy_Dense_Matrix(int M,int N,float * X,int ldx,float * Y,int ldy)111 sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
112 			float *Y, int ldy)
113 {
114 /*! \brief Copies a two-dimensional matrix X to another matrix Y.
115  */
116     int    i, j;
117 
118     for (j = 0; j < N; ++j)
119         for (i = 0; i < M; ++i)
120             Y[i + j*ldy] = X[i + j*ldx];
121 }
122 
123 void
sCreate_SuperNode_Matrix(SuperMatrix * L,int m,int n,int nnz,float * nzval,int * nzval_colptr,int * rowind,int * rowind_colptr,int * col_to_sup,int * sup_to_col,Stype_t stype,Dtype_t dtype,Mtype_t mtype)124 sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
125 			float *nzval, int *nzval_colptr, int *rowind,
126 			int *rowind_colptr, int *col_to_sup, int *sup_to_col,
127 			Stype_t stype, Dtype_t dtype, Mtype_t mtype)
128 {
129     SCformat *Lstore;
130 
131     L->Stype = stype;
132     L->Dtype = dtype;
133     L->Mtype = mtype;
134     L->nrow = m;
135     L->ncol = n;
136     L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
137     if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
138     Lstore = L->Store;
139     Lstore->nnz = nnz;
140     Lstore->nsuper = col_to_sup[n];
141     Lstore->nzval = nzval;
142     Lstore->nzval_colptr = nzval_colptr;
143     Lstore->rowind = rowind;
144     Lstore->rowind_colptr = rowind_colptr;
145     Lstore->col_to_sup = col_to_sup;
146     Lstore->sup_to_col = sup_to_col;
147 
148 }
149 
150 
151 /*! \brief Convert a row compressed storage into a column compressed storage.
152  */
153 void
sCompRow_to_CompCol(int m,int n,int nnz,float * a,int * colind,int * rowptr,float ** at,int ** rowind,int ** colptr)154 sCompRow_to_CompCol(int m, int n, int nnz,
155 		    float *a, int *colind, int *rowptr,
156 		    float **at, int **rowind, int **colptr)
157 {
158     register int i, j, col, relpos;
159     int *marker;
160 
161     /* Allocate storage for another copy of the matrix. */
162     *at = (float *) floatMalloc(nnz);
163     *rowind = (int *) intMalloc(nnz);
164     *colptr = (int *) intMalloc(n+1);
165     marker = (int *) intCalloc(n);
166 
167     /* Get counts of each column of A, and set up column pointers */
168     for (i = 0; i < m; ++i)
169 	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
170     (*colptr)[0] = 0;
171     for (j = 0; j < n; ++j) {
172 	(*colptr)[j+1] = (*colptr)[j] + marker[j];
173 	marker[j] = (*colptr)[j];
174     }
175 
176     /* Transfer the matrix into the compressed column storage. */
177     for (i = 0; i < m; ++i) {
178 	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
179 	    col = colind[j];
180 	    relpos = marker[col];
181 	    (*rowind)[relpos] = i;
182 	    (*at)[relpos] = a[j];
183 	    ++marker[col];
184 	}
185     }
186 
187     SUPERLU_FREE(marker);
188 }
189 
190 
191 void
sPrint_CompCol_Matrix(char * what,SuperMatrix * A)192 sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
193 {
194     NCformat     *Astore;
195     register int i,n;
196     float       *dp;
197 
198     printf("\nCompCol matrix %s:\n", what);
199     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
200     n = A->ncol;
201     Astore = (NCformat *) A->Store;
202     dp = (float *) Astore->nzval;
203     printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
204     printf("nzval: ");
205     for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
206     printf("\nrowind: ");
207     for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
208     printf("\ncolptr: ");
209     for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
210     printf("\n");
211     fflush(stdout);
212 }
213 
214 void
sPrint_SuperNode_Matrix(char * what,SuperMatrix * A)215 sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
216 {
217     SCformat     *Astore;
218     register int i, j, k, c, d, n, nsup;
219     float       *dp;
220     int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
221 
222     printf("\nSuperNode matrix %s:\n", what);
223     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
224     n = A->ncol;
225     Astore = (SCformat *) A->Store;
226     dp = (float *) Astore->nzval;
227     col_to_sup = Astore->col_to_sup;
228     sup_to_col = Astore->sup_to_col;
229     rowind_colptr = Astore->rowind_colptr;
230     rowind = Astore->rowind;
231     printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
232 	   A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
233     printf("nzval:\n");
234     for (k = 0; k <= Astore->nsuper; ++k) {
235       c = sup_to_col[k];
236       nsup = sup_to_col[k+1] - c;
237       for (j = c; j < c + nsup; ++j) {
238 	d = Astore->nzval_colptr[j];
239 	for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
240 	  printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
241 	}
242       }
243     }
244 #if 0
245     for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
246 #endif
247     printf("\nnzval_colptr: ");
248     for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
249     printf("\nrowind: ");
250     for (i = 0; i < Astore->rowind_colptr[n]; ++i)
251         printf("%d  ", Astore->rowind[i]);
252     printf("\nrowind_colptr: ");
253     for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
254     printf("\ncol_to_sup: ");
255     for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
256     printf("\nsup_to_col: ");
257     for (i = 0; i <= Astore->nsuper+1; ++i)
258         printf("%d  ", sup_to_col[i]);
259     printf("\n");
260     fflush(stdout);
261 }
262 
263 void
sPrint_Dense_Matrix(char * what,SuperMatrix * A)264 sPrint_Dense_Matrix(char *what, SuperMatrix *A)
265 {
266     DNformat     *Astore = (DNformat *) A->Store;
267     register int i, j, lda = Astore->lda;
268     float       *dp;
269 
270     printf("\nDense matrix %s:\n", what);
271     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
272     dp = (float *) Astore->nzval;
273     printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
274     printf("\nnzval: ");
275     for (j = 0; j < A->ncol; ++j) {
276         for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
277         printf("\n");
278     }
279     printf("\n");
280     fflush(stdout);
281 }
282 
283 /*! \brief Diagnostic print of column "jcol" in the U/L factor.
284  */
285 void
sprint_lu_col(char * msg,int jcol,int pivrow,int * xprune,GlobalLU_t * Glu)286 sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
287 {
288     int     i, k, fsupc;
289     int     *xsup, *supno;
290     int     *xlsub, *lsub;
291     float  *lusup;
292     int     *xlusup;
293     float  *ucol;
294     int     *usub, *xusub;
295 
296     xsup    = Glu->xsup;
297     supno   = Glu->supno;
298     lsub    = Glu->lsub;
299     xlsub   = Glu->xlsub;
300     lusup   = Glu->lusup;
301     xlusup  = Glu->xlusup;
302     ucol    = Glu->ucol;
303     usub    = Glu->usub;
304     xusub   = Glu->xusub;
305 
306     printf("%s", msg);
307     printf("col %d: pivrow %d, supno %d, xprune %d\n",
308 	   jcol, pivrow, supno[jcol], xprune[jcol]);
309 
310     printf("\tU-col:\n");
311     for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
312 	printf("\t%d%10.4f\n", usub[i], ucol[i]);
313     printf("\tL-col in rectangular snode:\n");
314     fsupc = xsup[supno[jcol]];	/* first col of the snode */
315     i = xlsub[fsupc];
316     k = xlusup[jcol];
317     while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
318 	printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
319 	i++; k++;
320     }
321     fflush(stdout);
322 }
323 
324 
325 /*! \brief Check whether tempv[] == 0. This should be true before and after calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
326  */
scheck_tempv(int n,float * tempv)327 void scheck_tempv(int n, float *tempv)
328 {
329     int i;
330 
331     for (i = 0; i < n; i++) {
332 	if (tempv[i] != 0.0)
333 	{
334 	    fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
335 	    ABORT("scheck_tempv");
336 	}
337     }
338 }
339 
340 
341 void
sGenXtrue(int n,int nrhs,float * x,int ldx)342 sGenXtrue(int n, int nrhs, float *x, int ldx)
343 {
344     int  i, j;
345     for (j = 0; j < nrhs; ++j)
346 	for (i = 0; i < n; ++i) {
347 	    x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
348 	}
349 }
350 
351 /*! \brief Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
352  */
353 void
sFillRHS(trans_t trans,int nrhs,float * x,int ldx,SuperMatrix * A,SuperMatrix * B)354 sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
355          SuperMatrix *A, SuperMatrix *B)
356 {
357     NCformat *Astore;
358     float   *Aval;
359     DNformat *Bstore;
360     float   *rhs;
361     float one = 1.0;
362     float zero = 0.0;
363     int      ldc;
364     char transc[1];
365 
366     Astore = A->Store;
367     Aval   = (float *) Astore->nzval;
368     Bstore = B->Store;
369     rhs    = Bstore->nzval;
370     ldc    = Bstore->lda;
371 
372     if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
373     else *(unsigned char *)transc = 'T';
374 
375     sp_sgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
376 	     x, ldx, zero, rhs, ldc);
377 
378 }
379 
380 /*! \brief Fills a float precision array with a given value.
381  */
382 void
sfill(float * a,int alen,float dval)383 sfill(float *a, int alen, float dval)
384 {
385     register int i;
386     for (i = 0; i < alen; i++) a[i] = dval;
387 }
388 
389 
390 
391 /*! \brief Check the inf-norm of the error vector
392  */
sinf_norm_error(int nrhs,SuperMatrix * X,float * xtrue)393 void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
394 {
395     DNformat *Xstore;
396     float err, xnorm;
397     float *Xmat, *soln_work;
398     int i, j;
399 
400     Xstore = X->Store;
401     Xmat = Xstore->nzval;
402 
403     for (j = 0; j < nrhs; j++) {
404       soln_work = &Xmat[j*Xstore->lda];
405       err = xnorm = 0.0;
406       for (i = 0; i < X->nrow; i++) {
407 	err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
408 	xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
409       }
410       err = err / xnorm;
411       printf("||X - Xtrue||/||X|| = %e\n", err);
412     }
413 }
414 
415 
416 
417 /*! \brief Print performance of the code. */
418 void
sPrintPerf(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage,float rpg,float rcond,float * ferr,float * berr,char * equed,SuperLUStat_t * stat)419 sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
420            float rpg, float rcond, float *ferr,
421            float *berr, char *equed, SuperLUStat_t *stat)
422 {
423     SCformat *Lstore;
424     NCformat *Ustore;
425     double   *utime;
426     flops_t  *ops;
427 
428     utime = stat->utime;
429     ops   = stat->ops;
430 
431     if ( utime[FACT] != 0. )
432 	printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
433 	       ops[FACT]*1e-6/utime[FACT]);
434     printf("Identify relaxed snodes	= %8.2f\n", utime[RELAX]);
435     if ( utime[SOLVE] != 0. )
436 	printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
437 	       ops[SOLVE]*1e-6/utime[SOLVE]);
438 
439     Lstore = (SCformat *) L->Store;
440     Ustore = (NCformat *) U->Store;
441     printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
442     printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
443     printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
444 
445     printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
446 	   mem_usage->for_lu/1e6, mem_usage->total_needed/1e6);
447     printf("Number of memory expansions: %d\n", stat->expansions);
448 
449     printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
450     printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
451 	   utime[FACT], ops[FACT]*1e-6/utime[FACT],
452 	   utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
453 	   utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
454 
455     printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
456     printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
457 	   rpg, rcond, ferr[0], berr[0], equed);
458 
459 }
460 
461 
462 
463 
print_float_vec(char * what,int n,float * vec)464 print_float_vec(char *what, int n, float *vec)
465 {
466     int i;
467     printf("%s: n %d\n", what, n);
468     for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
469     return 0;
470 }
471 
472