1 /** @file slu_util.h 2 * \brief Utility header file 3 * 4 * -- SuperLU routine (version 4.1) -- 5 * Univ. of California Berkeley, Xerox Palo Alto Research Center, 6 * and Lawrence Berkeley National Lab. 7 * November, 2010 8 * 9 */ 10 11 #ifndef __SUPERLU_UTIL /* allow multiple inclusions */ 12 #define __SUPERLU_UTIL 13 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 /* 18 #ifndef __STDC__ 19 #include <malloc.h> 20 #endif 21 */ 22 #include <assert.h> 23 #include "superlu_enum_consts.h" 24 25 /*********************************************************************** 26 * Macros 27 ***********************************************************************/ 28 #define FIRSTCOL_OF_SNODE(i) (xsup[i]) 29 /* No of marker arrays used in the symbolic factorization, 30 each of size n */ 31 #define NO_MARKER 3 32 #define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) ) 33 34 #ifndef USER_ABORT 35 #define USER_ABORT(msg) superlu_abort_and_exit(msg) 36 #endif 37 38 #define ABORT(err_msg) \ 39 { char msg[256];\ 40 sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ 41 USER_ABORT(msg); } 42 43 44 #ifndef USER_MALLOC 45 #if 1 46 #define USER_MALLOC(size) superlu_malloc(size) 47 #else 48 /* The following may check out some uninitialized data */ 49 #define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size) 50 #endif 51 #endif 52 53 #define SUPERLU_MALLOC(size) USER_MALLOC(size) 54 55 #ifndef USER_FREE 56 #define USER_FREE(addr) superlu_free(addr) 57 #endif 58 59 #define SUPERLU_FREE(addr) USER_FREE(addr) 60 61 #define CHECK_MALLOC(where) { \ 62 extern int superlu_malloc_total; \ 63 printf("%s: malloc_total %d Bytes\n", \ 64 where, superlu_malloc_total); \ 65 } 66 67 #define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) 68 #define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) 69 70 /********************************************************* 71 * Macros used for easy access of sparse matrix entries. * 72 *********************************************************/ 73 #define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) 74 #define L_SUB(ptr) ( Lstore->rowind[ptr] ) 75 #define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) 76 #define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) 77 #define U_NZ_START(col) ( Ustore->colptr[col] ) 78 #define U_SUB(ptr) ( Ustore->rowind[ptr] ) 79 80 81 /*********************************************************************** 82 * Constants 83 ***********************************************************************/ 84 #define EMPTY (-1) 85 /*#define NO (-1)*/ 86 #define FALSE 0 87 #define TRUE 1 88 89 #define NO_MEMTYPE 4 /* 0: lusup; 90 1: ucol; 91 2: lsub; 92 3: usub */ 93 94 #define GluIntArray(n) (5 * (n) + 5) 95 96 /* Dropping rules */ 97 #define NODROP ( 0x0000 ) 98 #define DROP_BASIC ( 0x0001 ) /* ILU(tau) */ 99 #define DROP_PROWS ( 0x0002 ) /* ILUTP: keep p maximum rows */ 100 #define DROP_COLUMN ( 0x0004 ) /* ILUTP: for j-th column, 101 p = gamma * nnz(A(:,j)) */ 102 #define DROP_AREA ( 0x0008 ) /* ILUTP: for j-th column, use 103 nnz(F(:,1:j)) / nnz(A(:,1:j)) 104 to limit memory growth */ 105 #define DROP_SECONDARY ( 0x000E ) /* PROWS | COLUMN | AREA */ 106 #define DROP_DYNAMIC ( 0x0010 ) /* adaptive tau */ 107 #define DROP_INTERP ( 0x0100 ) /* use interpolation */ 108 109 110 #if 1 111 #define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */ 112 #else 113 #define MILU_ALPHA 1.0 /* multiple of drop_sum to be added to diagonal */ 114 #endif 115 116 117 /*********************************************************************** 118 * Type definitions 119 ***********************************************************************/ 120 typedef float flops_t; 121 typedef unsigned char Logical; 122 123 /* 124 *-- This contains the options used to control the solution process. 125 * 126 * Fact (fact_t) 127 * Specifies whether or not the factored form of the matrix 128 * A is supplied on entry, and if not, how the matrix A should 129 * be factorizaed. 130 * = DOFACT: The matrix A will be factorized from scratch, and the 131 * factors will be stored in L and U. 132 * = SamePattern: The matrix A will be factorized assuming 133 * that a factorization of a matrix with the same sparsity 134 * pattern was performed prior to this one. Therefore, this 135 * factorization will reuse column permutation vector 136 * ScalePermstruct->perm_c and the column elimination tree 137 * LUstruct->etree. 138 * = SamePattern_SameRowPerm: The matrix A will be factorized 139 * assuming that a factorization of a matrix with the same 140 * sparsity pattern and similar numerical values was performed 141 * prior to this one. Therefore, this factorization will reuse 142 * both row and column scaling factors R and C, both row and 143 * column permutation vectors perm_r and perm_c, and the 144 * data structure set up from the previous symbolic factorization. 145 * = FACTORED: On entry, L, U, perm_r and perm_c contain the 146 * factored form of A. If DiagScale is not NOEQUIL, the matrix 147 * A has been equilibrated with scaling factors R and C. 148 * 149 * Equil (yes_no_t) 150 * Specifies whether to equilibrate the system (scale A's row and 151 * columns to have unit norm). 152 * 153 * ColPerm (colperm_t) 154 * Specifies what type of column permutation to use to reduce fill. 155 * = NATURAL: use the natural ordering 156 * = MMD_ATA: use minimum degree ordering on structure of A'*A 157 * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A 158 * = COLAMD: use approximate minimum degree column ordering 159 * = MY_PERMC: use the ordering specified by the user 160 * 161 * Trans (trans_t) 162 * Specifies the form of the system of equations: 163 * = NOTRANS: A * X = B (No transpose) 164 * = TRANS: A**T * X = B (Transpose) 165 * = CONJ: A**H * X = B (Transpose) 166 * 167 * IterRefine (IterRefine_t) 168 * Specifies whether to perform iterative refinement. 169 * = NO: no iterative refinement 170 * = SINGLE: perform iterative refinement in single precision 171 * = DOUBLE: perform iterative refinement in double precision 172 * = EXTRA: perform iterative refinement in extra precision 173 * 174 * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU) 175 * Specifies the threshold used for a diagonal entry to be an 176 * acceptable pivot. 177 * 178 * SymmetricMode (yest_no_t) 179 * Specifies whether to use symmetric mode. Symmetric mode gives 180 * preference to diagonal pivots, and uses an (A'+A)-based column 181 * permutation algorithm. 182 * 183 * PivotGrowth (yes_no_t) 184 * Specifies whether to compute the reciprocal pivot growth. 185 * 186 * ConditionNumber (ues_no_t) 187 * Specifies whether to compute the reciprocal condition number. 188 * 189 * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU) 190 * Specifies whether to permute rows of the original matrix. 191 * = NO: not to permute the rows 192 * = LargeDiag: make the diagonal large relative to the off-diagonal 193 * = MY_PERMR: use the permutation given by the user 194 * 195 * ILU_DropRule (int) 196 * Specifies the dropping rule: 197 * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau). 198 * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n. 199 * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column, 200 * p = gamma * nnz(A(:,j)). 201 * = DROP_AREA: Variation of ILUTP, for j-th column, use 202 * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory. 203 * = DROP_DYNAMIC: Modify the threshold tau during factorizaion: 204 * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma 205 * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2); 206 * Otherwise 207 * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2); 208 * tau_U(j) uses the similar rule. 209 * NOTE: the thresholds used by L and U are separate. 210 * = DROP_INTERP: Compute the second dropping threshold by 211 * interpolation instead of sorting (default). 212 * In this case, the actual fill ratio is not 213 * guaranteed to be smaller than gamma. 214 * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive. 215 * ( Default: DROP_BASIC | DROP_AREA ) 216 * 217 * ILU_DropTol (double) 218 * numerical threshold for dropping. 219 * 220 * ILU_FillFactor (double) 221 * Gamma in the secondary dropping. 222 * 223 * ILU_Norm (norm_t) 224 * Specify which norm to use to measure the row size in a 225 * supernode: infinity-norm, 1-norm, or 2-norm. 226 * 227 * ILU_FillTol (double) 228 * numerical threshold for zero pivot perturbation. 229 * 230 * ILU_MILU (milu_t) 231 * Specifies which version of MILU to use. 232 * 233 * ILU_MILU_Dim (double) 234 * Dimension of the PDE if available. 235 * 236 * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) 237 * Specifies whether to replace the tiny diagonals by 238 * sqrt(epsilon)*||A|| during LU factorization. 239 * 240 * SolveInitialized (yes_no_t) (only for SuperLU_DIST) 241 * Specifies whether the initialization has been performed to the 242 * triangular solve. 243 * 244 * RefineInitialized (yes_no_t) (only for SuperLU_DIST) 245 * Specifies whether the initialization has been performed to the 246 * sparse matrix-vector multiplication routine needed in iterative 247 * refinement. 248 * 249 * PrintStat (yes_no_t) 250 * Specifies whether to print the solver's statistics. 251 */ 252 typedef struct { 253 fact_t Fact; 254 yes_no_t Equil; 255 colperm_t ColPerm; 256 trans_t Trans; 257 IterRefine_t IterRefine; 258 double DiagPivotThresh; 259 yes_no_t SymmetricMode; 260 yes_no_t PivotGrowth; 261 yes_no_t ConditionNumber; 262 rowperm_t RowPerm; 263 int ILU_DropRule; 264 double ILU_DropTol; /* threshold for dropping */ 265 double ILU_FillFactor; /* gamma in the secondary dropping */ 266 norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */ 267 double ILU_FillTol; /* threshold for zero pivot perturbation */ 268 milu_t ILU_MILU; 269 double ILU_MILU_Dim; /* Dimension of PDE (if available) */ 270 yes_no_t ParSymbFact; 271 yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */ 272 yes_no_t SolveInitialized; 273 yes_no_t RefineInitialized; 274 yes_no_t PrintStat; 275 } superlu_options_t; 276 277 /*! \brief Headers for 4 types of dynamatically managed memory */ 278 typedef struct e_node { 279 int size; /* length of the memory that has been used */ 280 void *mem; /* pointer to the new malloc'd store */ 281 } ExpHeader; 282 283 typedef struct { 284 int size; 285 int used; 286 int top1; /* grow upward, relative to &array[0] */ 287 int top2; /* grow downward */ 288 void *array; 289 } LU_stack_t; 290 291 typedef struct { 292 int *panel_histo; /* histogram of panel size distribution */ 293 double *utime; /* running time at various phases */ 294 flops_t *ops; /* operation count at various phases */ 295 int TinyPivots; /* number of tiny pivots */ 296 int RefineSteps; /* number of iterative refinement steps */ 297 int expansions; /* number of memory expansions */ 298 } SuperLUStat_t; 299 300 typedef struct { 301 float for_lu; 302 float total_needed; 303 } mem_usage_t; 304 305 306 /*********************************************************************** 307 * Prototypes 308 ***********************************************************************/ 309 #ifdef __cplusplus 310 extern "C" { 311 #endif 312 313 extern void Destroy_SuperMatrix_Store(SuperMatrix *); 314 extern void Destroy_CompCol_Matrix(SuperMatrix *); 315 extern void Destroy_CompRow_Matrix(SuperMatrix *); 316 extern void Destroy_SuperNode_Matrix(SuperMatrix *); 317 extern void Destroy_CompCol_Permuted(SuperMatrix *); 318 extern void Destroy_Dense_Matrix(SuperMatrix *); 319 extern void get_perm_c(int, SuperMatrix *, int *); 320 extern void set_default_options(superlu_options_t *options); 321 extern void ilu_set_default_options(superlu_options_t *options); 322 extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*, 323 SuperMatrix*); 324 extern void superlu_abort_and_exit(char*); 325 extern void *superlu_malloc (size_t); 326 extern int *intMalloc (int); 327 extern int *intCalloc (int); 328 extern void superlu_free (void*); 329 extern void SetIWork (int, int, int, int *, int **, int **, int **, 330 int **, int **, int **, int **); 331 extern int sp_coletree (int *, int *, int *, int, int, int *); 332 extern void relax_snode (const int, int *, const int, int *, int *); 333 extern void heap_relax_snode (const int, int *, const int, int *, int *); 334 extern int mark_relax(int, int *, int *, int *, int *, int *, int *); 335 extern void ilu_relax_snode (const int, int *, const int, int *, 336 int *, int *); 337 extern void ilu_heap_relax_snode (const int, int *, const int, int *, 338 int *, int*); 339 extern void resetrep_col (const int, const int *, int *); 340 extern int spcoletree (int *, int *, int *, int, int, int *); 341 extern int *TreePostorder (int, int *); 342 extern double SuperLU_timer_ (); 343 extern int sp_ienv (int); 344 extern int lsame_ (char *, char *); 345 extern int xerbla_ (char *, int *); 346 extern void ifill (int *, int, int); 347 extern void snode_profile (int, int *); 348 extern void super_stats (int, int *); 349 extern void check_repfnz(int, int, int, int *); 350 extern void PrintSumm (char *, int, int, int); 351 extern void StatInit(SuperLUStat_t *); 352 extern void StatPrint (SuperLUStat_t *); 353 extern void StatFree(SuperLUStat_t *); 354 extern void print_panel_seg(int, int, int, int, int *, int *); 355 extern int print_int_vec(char *,int, int *); 356 extern int slu_PrintInt10(char *, int, int *); 357 358 #ifdef __cplusplus 359 } 360 #endif 361 362 #endif /* __SUPERLU_UTIL */ 363