1 /** @file slu_util.h
2  * \brief Utility header file
3  *
4  * -- SuperLU routine (version 4.1) --
5  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
6  * and Lawrence Berkeley National Lab.
7  * November, 2010
8  *
9  */
10 
11 #ifndef __SUPERLU_UTIL /* allow multiple inclusions */
12 #define __SUPERLU_UTIL
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 /*
18 #ifndef __STDC__
19 #include <malloc.h>
20 #endif
21 */
22 #include <assert.h>
23 #include "superlu_enum_consts.h"
24 
25 /***********************************************************************
26  * Macros
27  ***********************************************************************/
28 #define FIRSTCOL_OF_SNODE(i)	(xsup[i])
29 /* No of marker arrays used in the symbolic factorization,
30    each of size n */
31 #define NO_MARKER     3
32 #define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
33 
34 #ifndef USER_ABORT
35 #define USER_ABORT(msg) superlu_abort_and_exit(msg)
36 #endif
37 
38 #define ABORT(err_msg) \
39  { char msg[256];\
40    sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
41    USER_ABORT(msg); }
42 
43 
44 #ifndef USER_MALLOC
45 #if 1
46 #define USER_MALLOC(size) superlu_malloc(size)
47 #else
48 /* The following may check out some uninitialized data */
49 #define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
50 #endif
51 #endif
52 
53 #define SUPERLU_MALLOC(size) USER_MALLOC(size)
54 
55 #ifndef USER_FREE
56 #define USER_FREE(addr) superlu_free(addr)
57 #endif
58 
59 #define SUPERLU_FREE(addr) USER_FREE(addr)
60 
61 #define CHECK_MALLOC(where) {                 \
62     extern int superlu_malloc_total;        \
63     printf("%s: malloc_total %d Bytes\n",     \
64 	   where, superlu_malloc_total); \
65 }
66 
67 #define SUPERLU_MAX(x, y) 	( (x) > (y) ? (x) : (y) )
68 #define SUPERLU_MIN(x, y) 	( (x) < (y) ? (x) : (y) )
69 
70 /*********************************************************
71  * Macros used for easy access of sparse matrix entries. *
72  *********************************************************/
73 #define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
74 #define L_SUB(ptr)           ( Lstore->rowind[ptr] )
75 #define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
76 #define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
77 #define U_NZ_START(col)      ( Ustore->colptr[col] )
78 #define U_SUB(ptr)           ( Ustore->rowind[ptr] )
79 
80 
81 /***********************************************************************
82  * Constants
83  ***********************************************************************/
84 #define EMPTY	(-1)
85 /*#define NO	(-1)*/
86 #define FALSE	0
87 #define TRUE	1
88 
89 #define NO_MEMTYPE  4      /* 0: lusup;
90 			      1: ucol;
91 			      2: lsub;
92 			      3: usub */
93 
94 #define GluIntArray(n)   (5 * (n) + 5)
95 
96 /* Dropping rules */
97 #define  NODROP	        ( 0x0000 )
98 #define	 DROP_BASIC	( 0x0001 )  /* ILU(tau) */
99 #define  DROP_PROWS	( 0x0002 )  /* ILUTP: keep p maximum rows */
100 #define  DROP_COLUMN	( 0x0004 )  /* ILUTP: for j-th column,
101 				              p = gamma * nnz(A(:,j)) */
102 #define  DROP_AREA 	( 0x0008 )  /* ILUTP: for j-th column, use
103  		 			      nnz(F(:,1:j)) / nnz(A(:,1:j))
104 					      to limit memory growth  */
105 #define  DROP_SECONDARY	( 0x000E )  /* PROWS | COLUMN | AREA */
106 #define  DROP_DYNAMIC	( 0x0010 )  /* adaptive tau */
107 #define  DROP_INTERP	( 0x0100 )  /* use interpolation */
108 
109 
110 #if 1
111 #define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */
112 #else
113 #define MILU_ALPHA  1.0 /* multiple of drop_sum to be added to diagonal */
114 #endif
115 
116 
117 /***********************************************************************
118  * Type definitions
119  ***********************************************************************/
120 typedef float    flops_t;
121 typedef unsigned char Logical;
122 
123 /*
124  *-- This contains the options used to control the solution process.
125  *
126  * Fact   (fact_t)
127  *        Specifies whether or not the factored form of the matrix
128  *        A is supplied on entry, and if not, how the matrix A should
129  *        be factorizaed.
130  *        = DOFACT: The matrix A will be factorized from scratch, and the
131  *             factors will be stored in L and U.
132  *        = SamePattern: The matrix A will be factorized assuming
133  *             that a factorization of a matrix with the same sparsity
134  *             pattern was performed prior to this one. Therefore, this
135  *             factorization will reuse column permutation vector
136  *             ScalePermstruct->perm_c and the column elimination tree
137  *             LUstruct->etree.
138  *        = SamePattern_SameRowPerm: The matrix A will be factorized
139  *             assuming that a factorization of a matrix with the same
140  *             sparsity	pattern and similar numerical values was performed
141  *             prior to this one. Therefore, this factorization will reuse
142  *             both row and column scaling factors R and C, both row and
143  *             column permutation vectors perm_r and perm_c, and the
144  *             data structure set up from the previous symbolic factorization.
145  *        = FACTORED: On entry, L, U, perm_r and perm_c contain the
146  *              factored form of A. If DiagScale is not NOEQUIL, the matrix
147  *              A has been equilibrated with scaling factors R and C.
148  *
149  * Equil  (yes_no_t)
150  *        Specifies whether to equilibrate the system (scale A's row and
151  *        columns to have unit norm).
152  *
153  * ColPerm (colperm_t)
154  *        Specifies what type of column permutation to use to reduce fill.
155  *        = NATURAL: use the natural ordering
156  *        = MMD_ATA: use minimum degree ordering on structure of A'*A
157  *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
158  *        = COLAMD: use approximate minimum degree column ordering
159  *        = MY_PERMC: use the ordering specified by the user
160  *
161  * Trans  (trans_t)
162  *        Specifies the form of the system of equations:
163  *        = NOTRANS: A * X = B        (No transpose)
164  *        = TRANS:   A**T * X = B     (Transpose)
165  *        = CONJ:    A**H * X = B     (Transpose)
166  *
167  * IterRefine (IterRefine_t)
168  *        Specifies whether to perform iterative refinement.
169  *        = NO: no iterative refinement
170  *        = SINGLE: perform iterative refinement in single precision
171  *        = DOUBLE: perform iterative refinement in double precision
172  *        = EXTRA: perform iterative refinement in extra precision
173  *
174  * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
175  *        Specifies the threshold used for a diagonal entry to be an
176  *        acceptable pivot.
177  *
178  * SymmetricMode (yest_no_t)
179  *        Specifies whether to use symmetric mode. Symmetric mode gives
180  *        preference to diagonal pivots, and uses an (A'+A)-based column
181  *        permutation algorithm.
182  *
183  * PivotGrowth (yes_no_t)
184  *        Specifies whether to compute the reciprocal pivot growth.
185  *
186  * ConditionNumber (ues_no_t)
187  *        Specifies whether to compute the reciprocal condition number.
188  *
189  * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU)
190  *        Specifies whether to permute rows of the original matrix.
191  *        = NO: not to permute the rows
192  *        = LargeDiag: make the diagonal large relative to the off-diagonal
193  *        = MY_PERMR: use the permutation given by the user
194  *
195  * ILU_DropRule (int)
196  *        Specifies the dropping rule:
197  *	  = DROP_BASIC:   Basic dropping rule, supernodal based ILUTP(tau).
198  *	  = DROP_PROWS:   Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
199  *	  = DROP_COLUMN:  Variant of ILUTP(p,tau), for j-th column,
200  *			      p = gamma * nnz(A(:,j)).
201  *	  = DROP_AREA:    Variation of ILUTP, for j-th column, use
202  *			      nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
203  *	  = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
204  *			  If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
205  *				  tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
206  *			  Otherwise
207  *				  tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
208  *			  tau_U(j) uses the similar rule.
209  *			  NOTE: the thresholds used by L and U are separate.
210  *	  = DROP_INTERP:  Compute the second dropping threshold by
211  *	                  interpolation instead of sorting (default).
212  *  		          In this case, the actual fill ratio is not
213  *			  guaranteed to be smaller than gamma.
214  *   	  Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
215  *	  ( Default: DROP_BASIC | DROP_AREA )
216  *
217  * ILU_DropTol (double)
218  *        numerical threshold for dropping.
219  *
220  * ILU_FillFactor (double)
221  *        Gamma in the secondary dropping.
222  *
223  * ILU_Norm (norm_t)
224  *        Specify which norm to use to measure the row size in a
225  *        supernode: infinity-norm, 1-norm, or 2-norm.
226  *
227  * ILU_FillTol (double)
228  *        numerical threshold for zero pivot perturbation.
229  *
230  * ILU_MILU (milu_t)
231  *        Specifies which version of MILU to use.
232  *
233  * ILU_MILU_Dim (double)
234  *        Dimension of the PDE if available.
235  *
236  * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
237  *        Specifies whether to replace the tiny diagonals by
238  *        sqrt(epsilon)*||A|| during LU factorization.
239  *
240  * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
241  *        Specifies whether the initialization has been performed to the
242  *        triangular solve.
243  *
244  * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
245  *        Specifies whether the initialization has been performed to the
246  *        sparse matrix-vector multiplication routine needed in iterative
247  *        refinement.
248  *
249  * PrintStat (yes_no_t)
250  *        Specifies whether to print the solver's statistics.
251  */
252 typedef struct {
253     fact_t        Fact;
254     yes_no_t      Equil;
255     colperm_t     ColPerm;
256     trans_t       Trans;
257     IterRefine_t  IterRefine;
258     double        DiagPivotThresh;
259     yes_no_t      SymmetricMode;
260     yes_no_t      PivotGrowth;
261     yes_no_t      ConditionNumber;
262     rowperm_t     RowPerm;
263     int 	  ILU_DropRule;
264     double	  ILU_DropTol;    /* threshold for dropping */
265     double	  ILU_FillFactor; /* gamma in the secondary dropping */
266     norm_t	  ILU_Norm;       /* infinity-norm, 1-norm, or 2-norm */
267     double	  ILU_FillTol;    /* threshold for zero pivot perturbation */
268     milu_t	  ILU_MILU;
269     double	  ILU_MILU_Dim;   /* Dimension of PDE (if available) */
270     yes_no_t      ParSymbFact;
271     yes_no_t      ReplaceTinyPivot; /* used in SuperLU_DIST */
272     yes_no_t      SolveInitialized;
273     yes_no_t      RefineInitialized;
274     yes_no_t      PrintStat;
275 } superlu_options_t;
276 
277 /*! \brief Headers for 4 types of dynamatically managed memory */
278 typedef struct e_node {
279     int size;      /* length of the memory that has been used */
280     void *mem;     /* pointer to the new malloc'd store */
281 } ExpHeader;
282 
283 typedef struct {
284     int  size;
285     int  used;
286     int  top1;  /* grow upward, relative to &array[0] */
287     int  top2;  /* grow downward */
288     void *array;
289 } LU_stack_t;
290 
291 typedef struct {
292     int     *panel_histo; /* histogram of panel size distribution */
293     double  *utime;       /* running time at various phases */
294     flops_t *ops;         /* operation count at various phases */
295     int     TinyPivots;   /* number of tiny pivots */
296     int     RefineSteps;  /* number of iterative refinement steps */
297     int     expansions;   /* number of memory expansions */
298 } SuperLUStat_t;
299 
300 typedef struct {
301     float for_lu;
302     float total_needed;
303 } mem_usage_t;
304 
305 
306 /***********************************************************************
307  * Prototypes
308  ***********************************************************************/
309 #ifdef __cplusplus
310 extern "C" {
311 #endif
312 
313 extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
314 extern void    Destroy_CompCol_Matrix(SuperMatrix *);
315 extern void    Destroy_CompRow_Matrix(SuperMatrix *);
316 extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
317 extern void    Destroy_CompCol_Permuted(SuperMatrix *);
318 extern void    Destroy_Dense_Matrix(SuperMatrix *);
319 extern void    get_perm_c(int, SuperMatrix *, int *);
320 extern void    set_default_options(superlu_options_t *options);
321 extern void    ilu_set_default_options(superlu_options_t *options);
322 extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
323 			    SuperMatrix*);
324 extern void    superlu_abort_and_exit(char*);
325 extern void    *superlu_malloc (size_t);
326 extern int     *intMalloc (int);
327 extern int     *intCalloc (int);
328 extern void    superlu_free (void*);
329 extern void    SetIWork (int, int, int, int *, int **, int **, int **,
330                          int **, int **, int **, int **);
331 extern int     sp_coletree (int *, int *, int *, int, int, int *);
332 extern void    relax_snode (const int, int *, const int, int *, int *);
333 extern void    heap_relax_snode (const int, int *, const int, int *, int *);
334 extern int     mark_relax(int, int *, int *, int *, int *, int *, int *);
335 extern void    ilu_relax_snode (const int, int *, const int, int *,
336 				int *, int *);
337 extern void    ilu_heap_relax_snode (const int, int *, const int, int *,
338 				     int *, int*);
339 extern void    resetrep_col (const int, const int *, int *);
340 extern int     spcoletree (int *, int *, int *, int, int, int *);
341 extern int     *TreePostorder (int, int *);
342 extern double  SuperLU_timer_ ();
343 extern int     sp_ienv (int);
344 extern int     lsame_ (char *, char *);
345 extern int     xerbla_ (char *, int *);
346 extern void    ifill (int *, int, int);
347 extern void    snode_profile (int, int *);
348 extern void    super_stats (int, int *);
349 extern void    check_repfnz(int, int, int, int *);
350 extern void    PrintSumm (char *, int, int, int);
351 extern void    StatInit(SuperLUStat_t *);
352 extern void    StatPrint (SuperLUStat_t *);
353 extern void    StatFree(SuperLUStat_t *);
354 extern void    print_panel_seg(int, int, int, int, int *, int *);
355 extern int     print_int_vec(char *,int, int *);
356 extern int     slu_PrintInt10(char *, int, int *);
357 
358 #ifdef __cplusplus
359   }
360 #endif
361 
362 #endif /* __SUPERLU_UTIL */
363