1 
2 
3 /*
4  * -- SuperLU routine (version 2.0) --
5  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
6  * and Lawrence Berkeley National Lab.
7  * November 15, 1997
8  *
9  */
10 /*
11   Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
12 
13   THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
14   EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
15 
16   Permission is hereby granted to use or copy this program for any
17   purpose, provided the above notices are retained on all copies.
18   Permission to modify the code and to distribute modified code is
19   granted, provided the above notices are retained, and a notice that
20   the code was modified is included with the above copyright notice.
21 */
22 
23 #include <math.h>
24 #include "dsp_defs.h"
25 #include "util.h"
26 
27 void
dCreate_CompCol_Matrix(SuperMatrix * A,int m,int n,int nnz,double * nzval,int * rowind,int * colptr,Stype_t stype,Dtype_t dtype,Mtype_t mtype)28 dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
29 		       double *nzval, int *rowind, int *colptr,
30 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
31 {
32     NCformat *Astore;
33 
34     A->Stype = stype;
35     A->Dtype = dtype;
36     A->Mtype = mtype;
37     A->nrow = m;
38     A->ncol = n;
39     A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
40     if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
41     Astore = A->Store;
42     Astore->nnz = nnz;
43     Astore->nzval = nzval;
44     Astore->rowind = rowind;
45     Astore->colptr = colptr;
46 }
47 
48 void
dCreate_CompRow_Matrix(SuperMatrix * A,int m,int n,int nnz,double * nzval,int * colind,int * rowptr,Stype_t stype,Dtype_t dtype,Mtype_t mtype)49 dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz,
50 		       double *nzval, int *colind, int *rowptr,
51 		       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
52 {
53     NRformat *Astore;
54 
55     A->Stype = stype;
56     A->Dtype = dtype;
57     A->Mtype = mtype;
58     A->nrow = m;
59     A->ncol = n;
60     A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
61     if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
62     Astore = A->Store;
63     Astore->nnz = nnz;
64     Astore->nzval = nzval;
65     Astore->colind = colind;
66     Astore->rowptr = rowptr;
67 }
68 
69 /* Copy matrix A into matrix B. */
70 void
dCopy_CompCol_Matrix(SuperMatrix * A,SuperMatrix * B)71 dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
72 {
73     NCformat *Astore, *Bstore;
74     int      ncol, nnz, i;
75 
76     B->Stype = A->Stype;
77     B->Dtype = A->Dtype;
78     B->Mtype = A->Mtype;
79     B->nrow  = A->nrow;;
80     B->ncol  = ncol = A->ncol;
81     Astore   = (NCformat *) A->Store;
82     Bstore   = (NCformat *) B->Store;
83     Bstore->nnz = nnz = Astore->nnz;
84     for (i = 0; i < nnz; ++i)
85 	((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
86     for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
87     for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
88 }
89 
90 
91 void
dCreate_Dense_Matrix(SuperMatrix * X,int m,int n,double * x,int ldx,Stype_t stype,Dtype_t dtype,Mtype_t mtype)92 dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
93 		    Stype_t stype, Dtype_t dtype, Mtype_t mtype)
94 {
95     DNformat    *Xstore;
96 
97     X->Stype = stype;
98     X->Dtype = dtype;
99     X->Mtype = mtype;
100     X->nrow = m;
101     X->ncol = n;
102     X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
103     if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
104     Xstore = (DNformat *) X->Store;
105     Xstore->lda = ldx;
106     Xstore->nzval = (double *) x;
107 }
108 
109 void
dCopy_Dense_Matrix(int M,int N,double * X,int ldx,double * Y,int ldy)110 dCopy_Dense_Matrix(int M, int N, double *X, int ldx,
111 			double *Y, int ldy)
112 {
113 /*
114  *
115  *  Purpose
116  *  =======
117  *
118  *  Copies a two-dimensional matrix X to another matrix Y.
119  */
120     int    i, j;
121 
122     for (j = 0; j < N; ++j)
123         for (i = 0; i < M; ++i)
124             Y[i + j*ldy] = X[i + j*ldx];
125 }
126 
127 void
dCreate_SuperNode_Matrix(SuperMatrix * L,int m,int n,int nnz,double * nzval,int * nzval_colptr,int * rowind,int * rowind_colptr,int * col_to_sup,int * sup_to_col,Stype_t stype,Dtype_t dtype,Mtype_t mtype)128 dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz,
129 			double *nzval, int *nzval_colptr, int *rowind,
130 			int *rowind_colptr, int *col_to_sup, int *sup_to_col,
131 			Stype_t stype, Dtype_t dtype, Mtype_t mtype)
132 {
133     SCformat *Lstore;
134 
135     L->Stype = stype;
136     L->Dtype = dtype;
137     L->Mtype = mtype;
138     L->nrow = m;
139     L->ncol = n;
140     L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
141     if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
142     Lstore = L->Store;
143     Lstore->nnz = nnz;
144     Lstore->nsuper = col_to_sup[n];
145     Lstore->nzval = nzval;
146     Lstore->nzval_colptr = nzval_colptr;
147     Lstore->rowind = rowind;
148     Lstore->rowind_colptr = rowind_colptr;
149     Lstore->col_to_sup = col_to_sup;
150     Lstore->sup_to_col = sup_to_col;
151 
152 }
153 
154 
155 /*
156  * Convert a row compressed storage into a column compressed storage.
157  */
158 void
dCompRow_to_CompCol(int m,int n,int nnz,double * a,int * colind,int * rowptr,double ** at,int ** rowind,int ** colptr)159 dCompRow_to_CompCol(int m, int n, int nnz,
160 		    double *a, int *colind, int *rowptr,
161 		    double **at, int **rowind, int **colptr)
162 {
163     register int i, j, col, relpos;
164     int *marker;
165 
166     /* Allocate storage for another copy of the matrix. */
167     *at = (double *) doubleMalloc(nnz);
168     *rowind = (int *) intMalloc(nnz);
169     *colptr = (int *) intMalloc(n+1);
170     marker = (int *) intCalloc(n);
171 
172     /* Get counts of each column of A, and set up column pointers */
173     for (i = 0; i < m; ++i)
174 	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
175     (*colptr)[0] = 0;
176     for (j = 0; j < n; ++j) {
177 	(*colptr)[j+1] = (*colptr)[j] + marker[j];
178 	marker[j] = (*colptr)[j];
179     }
180 
181     /* Transfer the matrix into the compressed column storage. */
182     for (i = 0; i < m; ++i) {
183 	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
184 	    col = colind[j];
185 	    relpos = marker[col];
186 	    (*rowind)[relpos] = i;
187 	    (*at)[relpos] = a[j];
188 	    ++marker[col];
189 	}
190     }
191 
192     SUPERLU_FREE(marker);
193 }
194 
195 
196 void
dPrint_CompCol_Matrix(char * what,SuperMatrix * A)197 dPrint_CompCol_Matrix(char *what, SuperMatrix *A)
198 {
199     NCformat     *Astore;
200     register int i,n;
201     double       *dp;
202 
203     printf("\nCompCol matrix %s:\n", what);
204     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
205     n = A->ncol;
206     Astore = (NCformat *) A->Store;
207     dp = (double *) Astore->nzval;
208     printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
209     printf("nzval: ");
210     for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
211     printf("\nrowind: ");
212     for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
213     printf("\ncolptr: ");
214     for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
215     printf("\n");
216     fflush(stdout);
217 }
218 
219 void
dPrint_SuperNode_Matrix(char * what,SuperMatrix * A)220 dPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
221 {
222     SCformat     *Astore;
223     register int i, j, k, c, d, n, nsup;
224     double       *dp;
225     int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
226 
227     printf("\nSuperNode matrix %s:\n", what);
228     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
229     n = A->ncol;
230     Astore = (SCformat *) A->Store;
231     dp = (double *) Astore->nzval;
232     col_to_sup = Astore->col_to_sup;
233     sup_to_col = Astore->sup_to_col;
234     rowind_colptr = Astore->rowind_colptr;
235     rowind = Astore->rowind;
236     printf("nrow %d, ncol %d, nnz %d, nsuper %d\n",
237 	   A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
238     printf("nzval:\n");
239     for (k = 0; k <= Astore->nsuper+1; ++k) {
240       c = sup_to_col[k];
241       nsup = sup_to_col[k+1] - c;
242       for (j = c; j < c + nsup; ++j) {
243 	d = Astore->nzval_colptr[j];
244 	for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
245 	  printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
246 	}
247       }
248     }
249 #if 0
250     for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
251 #endif
252     printf("\nnzval_colptr: ");
253     for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
254     printf("\nrowind: ");
255     for (i = 0; i < Astore->rowind_colptr[n]; ++i)
256         printf("%d  ", Astore->rowind[i]);
257     printf("\nrowind_colptr: ");
258     for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
259     printf("\ncol_to_sup: ");
260     for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
261     printf("\nsup_to_col: ");
262     for (i = 0; i <= Astore->nsuper+1; ++i)
263         printf("%d  ", sup_to_col[i]);
264     printf("\n");
265     fflush(stdout);
266 }
267 
268 void
dPrint_Dense_Matrix(char * what,SuperMatrix * A)269 dPrint_Dense_Matrix(char *what, SuperMatrix *A)
270 {
271     DNformat     *Astore;
272     register int i;
273     double       *dp;
274 
275     printf("\nDense matrix %s:\n", what);
276     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
277     Astore = (DNformat *) A->Store;
278     dp = (double *) Astore->nzval;
279     printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
280     printf("\nnzval: ");
281     for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
282     printf("\n");
283     fflush(stdout);
284 }
285 
286 /*
287  * Diagnostic print of column "jcol" in the U/L factor.
288  */
289 void
dprint_lu_col(char * msg,int jcol,int pivrow,int * xprune,GlobalLU_t * Glu)290 dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
291 {
292     int     i, k, fsupc;
293     int     *xsup, *supno;
294     int     *xlsub, *lsub;
295     double  *lusup;
296     int     *xlusup;
297     double  *ucol;
298     int     *usub, *xusub;
299 
300     xsup    = Glu->xsup;
301     supno   = Glu->supno;
302     lsub    = Glu->lsub;
303     xlsub   = Glu->xlsub;
304     lusup   = Glu->lusup;
305     xlusup  = Glu->xlusup;
306     ucol    = Glu->ucol;
307     usub    = Glu->usub;
308     xusub   = Glu->xusub;
309 
310     printf("%s", msg);
311     printf("col %d: pivrow %d, supno %d, xprune %d\n",
312 	   jcol, pivrow, supno[jcol], xprune[jcol]);
313 
314     printf("\tU-col:\n");
315     for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
316 	printf("\t%d%10.4f\n", usub[i], ucol[i]);
317     printf("\tL-col in rectangular snode:\n");
318     fsupc = xsup[supno[jcol]];	/* first col of the snode */
319     i = xlsub[fsupc];
320     k = xlusup[jcol];
321     while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
322 	printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
323 	i++; k++;
324     }
325     fflush(stdout);
326 }
327 
328 
329 /*
330  * Check whether tempv[] == 0. This should be true before and after
331  * calling any numeric routines, i.e., "panel_bmod" and "column_bmod".
332  */
dcheck_tempv(int n,double * tempv)333 void dcheck_tempv(int n, double *tempv)
334 {
335     int i;
336 
337     for (i = 0; i < n; i++) {
338 	if (tempv[i] != 0.0)
339 	{
340 	    fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
341 	    ABORT("dcheck_tempv");
342 	}
343     }
344 }
345 
346 
347 void
dGenXtrue(int n,int nrhs,double * x,int ldx)348 dGenXtrue(int n, int nrhs, double *x, int ldx)
349 {
350     int  i, j;
351     for (j = 0; j < nrhs; ++j)
352 	for (i = 0; i < n; ++i) {
353 	    x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
354 	}
355 }
356 
357 /*
358  * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
359  */
360 void
dFillRHS(char * trans,int nrhs,double * x,int ldx,SuperMatrix * A,SuperMatrix * B)361 dFillRHS(char *trans, int nrhs, double *x, int ldx,
362 		SuperMatrix *A, SuperMatrix *B)
363 {
364     NCformat *Astore;
365     double   *Aval;
366     DNformat *Bstore;
367     double   *rhs;
368     double one = 1.0;
369     double zero = 0.0;
370     int      ldc;
371 
372     Astore = A->Store;
373     Aval   = (double *) Astore->nzval;
374     Bstore = B->Store;
375     rhs    = Bstore->nzval;
376     ldc    = Bstore->lda;
377 
378     sp_dgemm(trans, "N", A->nrow, nrhs, A->ncol, one, A,
379 	     x, ldx, zero, rhs, ldc);
380 
381 }
382 
383 /*
384  * Fills a double precision array with a given value.
385  */
386 void
dfill(double * a,int alen,double dval)387 dfill(double *a, int alen, double dval)
388 {
389     register int i;
390     for (i = 0; i < alen; i++) a[i] = dval;
391 }
392 
393 
394 
395 /*
396  * Check the inf-norm of the error vector
397  */
dinf_norm_error(int nrhs,SuperMatrix * X,double * xtrue)398 void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
399 {
400     DNformat *Xstore;
401     double err, xnorm;
402     double *Xmat, *soln_work;
403     int i, j;
404 
405     Xstore = X->Store;
406     Xmat = Xstore->nzval;
407 
408     for (j = 0; j < nrhs; j++) {
409       soln_work = &Xmat[j*Xstore->lda];
410       err = xnorm = 0.0;
411       for (i = 0; i < X->nrow; i++) {
412 	err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
413 	xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
414       }
415       err = err / xnorm;
416       printf("||X - Xtrue||/||X|| = %e\n", err);
417     }
418 }
419 
420 
421 
422 /* Print performance of the code. */
423 void
dPrintPerf(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage,double rpg,double rcond,double * ferr,double * berr,char * equed)424 dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
425 	       double rpg, double rcond, double *ferr,
426 	       double *berr, char *equed)
427 {
428     SCformat *Lstore;
429     NCformat *Ustore;
430     extern SuperLUStat_t SuperLUStat;
431     double   *utime;
432     flops_t  *ops;
433 
434     utime = SuperLUStat.utime;
435     ops   = SuperLUStat.ops;
436 
437     if ( utime[FACT] != 0. )
438 	printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
439 	       ops[FACT]*1e-6/utime[FACT]);
440     printf("Identify relaxed snodes	= %8.2f\n", utime[RELAX]);
441     if ( utime[SOLVE] != 0. )
442 	printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
443 	       ops[SOLVE]*1e-6/utime[SOLVE]);
444 
445     Lstore = (SCformat *) L->Store;
446     Ustore = (NCformat *) U->Store;
447     printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
448     printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
449     printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
450 
451     printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
452 	   mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
453 	   mem_usage->expansions);
454 
455     printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
456     printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
457 	   utime[FACT], ops[FACT]*1e-6/utime[FACT],
458 	   utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
459 	   utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
460 
461     printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
462     printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
463 	   rpg, rcond, ferr[0], berr[0], equed);
464 
465 }
466 
467 
468 
469 
print_double_vec(char * what,int n,double * vec)470 print_double_vec(char *what, int n, double *vec)
471 {
472     int i;
473     printf("%s: n %d\n", what, n);
474     for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
475     return 0;
476 }
477 
478