1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /*--------------------------------------------------------------------------
9 * Test driver for unstructured matrix interface (IJ_matrix interface).
10 * Do `driver -help' for usage info.
11 * This driver started from the driver for parcsr_linear_solvers, and it
12 * works by first building a parcsr matrix as before and then "copying"
13 * that matrix row-by-row into the IJMatrix interface. AJC 7/99.
14 *--------------------------------------------------------------------------*/
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <math.h>
18
19 #include "_hypre_utilities.h"
20 #include "HYPRE.h"
21 #include "HYPRE_parcsr_mv.h"
22
23 #include "HYPRE_IJ_mv.h"
24 #include "_hypre_IJ_mv.h"
25 #include "HYPRE_parcsr_ls.h"
26 #include "_hypre_parcsr_mv.h"
27 #include "HYPRE_krylov.h"
28
29 #if defined(HYPRE_USING_UMPIRE)
30 #include "umpire/interface/umpire.h"
31 #endif
32
33 /* begin lobpcg */
34
35 #define NO_SOLVER -9198
36
37 #include <time.h>
38
39 #include "HYPRE_lobpcg.h"
40
41 /* max dt */
42 #define DT_INF 1.0e30
43 HYPRE_Int
44 BuildParIsoLaplacian( HYPRE_Int argc, char** argv, HYPRE_ParCSRMatrix *A_ptr );
45
46 /* end lobpcg */
47
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51
52 HYPRE_Int BuildParFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
53 HYPRE_Int ReadParVectorFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParVector *b_ptr );
54
55 HYPRE_Int BuildParLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
56 HYPRE_Int BuildParSysLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
57 HYPRE_Int BuildParDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr);
58 HYPRE_Int BuildParFromOneFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_Int num_functions , HYPRE_ParCSRMatrix *A_ptr );
59 HYPRE_Int BuildFuncsFromFiles (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
60 HYPRE_Int BuildFuncsFromOneFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
61 HYPRE_Int BuildRhsParFromOneFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_ParVector *b_ptr );
62 HYPRE_Int BuildBigArrayFromOneFile (HYPRE_Int argc , char *argv [] , const char *array_name , HYPRE_Int arg_index , HYPRE_BigInt *partitioning , HYPRE_Int *size , HYPRE_BigInt **array_ptr);
63 HYPRE_Int BuildParLaplacian9pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
64 HYPRE_Int BuildParLaplacian27pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
65 HYPRE_Int BuildParRotate7pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
66 HYPRE_Int BuildParVarDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr );
67 HYPRE_ParCSRMatrix GenerateSysLaplacian (MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz,
68 HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
69 HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
70 HYPRE_ParCSRMatrix GenerateSysLaplacianVCoef (MPI_Comm comm, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz,
71 HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
72 HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
73 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz, HYPRE_Real vcx, HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values);
74
75 HYPRE_Int BuildParCoordinates (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_Int *coorddim_ptr , float **coord_ptr );
76
77 extern HYPRE_Int hypre_FlexGMRESModifyPCAMGExample(void *precond_data, HYPRE_Int iterations,
78 HYPRE_Real rel_residual_norm);
79
80 extern HYPRE_Int hypre_FlexGMRESModifyPCDefault(void *precond_data, HYPRE_Int iteration,
81 HYPRE_Real rel_residual_norm);
82 #ifdef __cplusplus
83 }
84 #endif
85 #define SECOND_TIME 0
86
87 hypre_int
main(hypre_int argc,char * argv[])88 main( hypre_int argc,
89 char *argv[] )
90 {
91 HYPRE_Int arg_index;
92 HYPRE_Int print_usage;
93 HYPRE_Int sparsity_known = 0;
94 HYPRE_Int add = 0;
95 HYPRE_Int check_constant = 0;
96 HYPRE_Int off_proc = 0;
97 HYPRE_Int chunk = 0;
98 HYPRE_Int omp_flag = 0;
99 HYPRE_Int build_matrix_type;
100 HYPRE_Int build_matrix_arg_index;
101 HYPRE_Int build_rhs_type;
102 HYPRE_Int build_rhs_arg_index;
103 HYPRE_Int build_src_type;
104 HYPRE_Int build_src_arg_index;
105 HYPRE_Int build_x0_type;
106 HYPRE_Int build_x0_arg_index;
107 HYPRE_Int build_funcs_type;
108 HYPRE_Int build_funcs_arg_index;
109 HYPRE_Int build_fpt_arg_index;
110 HYPRE_Int build_sfpt_arg_index;
111 HYPRE_Int build_cpt_arg_index;
112 HYPRE_Int solver_id;
113 HYPRE_Int solver_type = 1;
114 HYPRE_Int recompute_res = 0; /* What should be the default here? */
115 HYPRE_Int ioutdat;
116 HYPRE_Int poutdat;
117 HYPRE_Int debug_flag;
118 HYPRE_Int ierr = 0;
119 HYPRE_Int i,j;
120 HYPRE_Int max_levels = 25;
121 HYPRE_Int num_iterations;
122 HYPRE_Int pcg_num_its, dscg_num_its;
123 HYPRE_Int max_iter = 1000;
124 HYPRE_Int mg_max_iter = 100;
125 HYPRE_Int nodal = 0;
126 HYPRE_Int nodal_diag = 0;
127 HYPRE_Int keep_same_sign = 0;
128 HYPRE_Real cf_tol = 0.9;
129 HYPRE_Real norm;
130 HYPRE_Real final_res_norm;
131 void *object;
132
133 HYPRE_IJMatrix ij_A = NULL;
134 HYPRE_IJVector ij_b = NULL;
135 HYPRE_IJVector ij_x = NULL;
136 HYPRE_IJVector *ij_rbm;
137
138 HYPRE_ParCSRMatrix parcsr_A = NULL;
139 HYPRE_ParVector b = NULL;
140 HYPRE_ParVector x = NULL;
141 HYPRE_ParVector *interp_vecs = NULL;
142 HYPRE_ParVector residual = NULL;
143 HYPRE_ParVector x0_save = NULL;
144
145 HYPRE_Solver amg_solver;
146 HYPRE_Solver amgdd_solver;
147 HYPRE_Solver pcg_solver;
148 HYPRE_Solver amg_precond=NULL;
149 HYPRE_Solver pcg_precond=NULL;
150 HYPRE_Solver pcg_precond_gotten;
151
152 HYPRE_Int check_residual = 0;
153 HYPRE_Int num_procs, myid;
154 HYPRE_Int local_row;
155 HYPRE_Int *row_sizes;
156 HYPRE_Int *diag_sizes;
157 HYPRE_Int *offdiag_sizes;
158 HYPRE_BigInt *rows;
159 HYPRE_Int size;
160 HYPRE_Int *ncols;
161 HYPRE_BigInt *col_inds;
162 HYPRE_Int *dof_func;
163 HYPRE_Int num_functions = 1;
164 HYPRE_Int num_paths = 1;
165 HYPRE_Int agg_num_levels = 0;
166 HYPRE_Int ns_coarse = 1, ns_down = -1, ns_up = -1;
167
168 HYPRE_Int time_index;
169 MPI_Comm comm = hypre_MPI_COMM_WORLD;
170 HYPRE_BigInt M, N, big_i;
171 HYPRE_Int local_num_rows, local_num_cols;
172 HYPRE_BigInt first_local_row, last_local_row;
173 HYPRE_BigInt first_local_col, last_local_col;
174 HYPRE_BigInt *partitioning = NULL;
175 HYPRE_Int variant, overlap, domain_type;
176 HYPRE_Real schwarz_rlx_weight;
177 HYPRE_Real *values, val;
178
179 HYPRE_Int use_nonsymm_schwarz = 0;
180 HYPRE_Int test_ij = 0;
181 HYPRE_Int build_rbm = 0;
182 HYPRE_Int build_rbm_index = 0;
183 HYPRE_Int num_interp_vecs = 0;
184 HYPRE_Int interp_vec_variant = 0;
185 HYPRE_Int Q_max = 0;
186 HYPRE_Real Q_trunc = 0;
187
188 const HYPRE_Real dt_inf = DT_INF;
189 HYPRE_Real dt = dt_inf;
190
191 /* solve -Ax = b, for testing SND matrices */
192 HYPRE_Int negA = 0;
193
194 /* parameters for BoomerAMG */
195 HYPRE_Real A_drop_tol = 0.0;
196 HYPRE_Int A_drop_type = -1;
197 HYPRE_Int coarsen_cut_factor = 0;
198 HYPRE_Real strong_threshold;
199 HYPRE_Real strong_thresholdR;
200 HYPRE_Real filter_thresholdR;
201 HYPRE_Real trunc_factor;
202 HYPRE_Real jacobi_trunc_threshold;
203 HYPRE_Real S_commpkg_switch = 1.0;
204 HYPRE_Real CR_rate = 0.7;
205 HYPRE_Real CR_strong_th = 0.0;
206 HYPRE_Int CR_use_CG = 0;
207 HYPRE_Int P_max_elmts = 4;
208 HYPRE_Int cycle_type;
209 HYPRE_Int fcycle;
210 HYPRE_Int coarsen_type = 10;
211 HYPRE_Int measure_type = 0;
212 HYPRE_Int num_sweeps = 1;
213 HYPRE_Int IS_type;
214 HYPRE_Int num_CR_relax_steps = 2;
215 HYPRE_Int relax_type = -1;
216 HYPRE_Int add_relax_type = 18;
217 HYPRE_Int relax_coarse = -1;
218 HYPRE_Int relax_up = -1;
219 HYPRE_Int relax_down = -1;
220 HYPRE_Int relax_order = 0;
221 HYPRE_Int level_w = -1;
222 HYPRE_Int level_ow = -1;
223 /* HYPRE_Int smooth_lev; */
224 /* HYPRE_Int smooth_rlx = 8; */
225 HYPRE_Int smooth_type = 6;
226 HYPRE_Int smooth_num_levels = 0;
227 HYPRE_Int smooth_num_sweeps = 1;
228 HYPRE_Int coarse_threshold = 9;
229 HYPRE_Int min_coarse_size = 0;
230 /* redundant coarse grid solve */
231 HYPRE_Int seq_threshold = 0;
232 HYPRE_Int redundant = 0;
233 /* additive versions */
234 HYPRE_Int additive = -1;
235 HYPRE_Int mult_add = -1;
236 HYPRE_Int simple = -1;
237 HYPRE_Int add_last_lvl = -1;
238 HYPRE_Int add_P_max_elmts = 0;
239 HYPRE_Real add_trunc_factor = 0;
240 HYPRE_Int rap2 = 0;
241 HYPRE_Int mod_rap2 = 0;
242 HYPRE_Int keepTranspose = 0;
243 #ifdef HYPRE_USING_DSUPERLU
244 HYPRE_Int dslu_threshold = -1;
245 #endif
246 HYPRE_Real relax_wt;
247 HYPRE_Real add_relax_wt = 1.0;
248 HYPRE_Real relax_wt_level;
249 HYPRE_Real outer_wt;
250 HYPRE_Real outer_wt_level;
251 HYPRE_Real tol = 1.e-8, pc_tol = 0.;
252 HYPRE_Real atol = 0.0;
253 HYPRE_Real max_row_sum = 1.;
254 HYPRE_Int converge_type = 0;
255
256 HYPRE_Int cheby_order = 2;
257 HYPRE_Int cheby_eig_est = 10;
258 HYPRE_Int cheby_variant = 0;
259 HYPRE_Int cheby_scale = 1;
260 HYPRE_Real cheby_fraction = .3;
261
262 #if defined(HYPRE_USING_GPU)
263 keepTranspose = 1;
264 coarsen_type = 8;
265 mod_rap2 = 1;
266 HYPRE_Int spgemm_use_cusparse = 0;
267 HYPRE_Int use_curand = 1;
268 #if defined(HYPRE_USING_HIP)
269 spgemm_use_cusparse = 1;
270 #endif
271 HYPRE_Int spgemm_alg = 1;
272 HYPRE_Int spgemm_rowest_mtd = 3;
273 HYPRE_Int spgemm_rowest_nsamples = 32;
274 HYPRE_Real spgemm_rowest_mult = 1.5;
275 char spgemm_hash_type = 'L';
276 #endif
277
278 /* for CGC BM Aug 25, 2006 */
279 HYPRE_Int cgcits = 1;
280 /* for coordinate plotting BM Oct 24, 2006 */
281 HYPRE_Int plot_grids = 0;
282 HYPRE_Int coord_dim = 3;
283 float *coordinates = NULL;
284 char plot_file_name[256];
285
286 /* parameters for ParaSAILS */
287 HYPRE_Real sai_threshold = 0.1;
288 HYPRE_Real sai_filter = 0.1;
289
290 /* parameters for PILUT */
291 HYPRE_Real drop_tol = -1;
292 HYPRE_Int nonzeros_to_keep = -1;
293
294 /* parameters for Euclid or ILU smoother in AMG */
295 HYPRE_Real eu_ilut = 0.0;
296 HYPRE_Real eu_sparse_A = 0.0;
297 HYPRE_Int eu_bj = 0;
298 HYPRE_Int eu_level = -1;
299 HYPRE_Int eu_stats = 0;
300 HYPRE_Int eu_mem = 0;
301 HYPRE_Int eu_row_scale = 0; /* Euclid only */
302
303 /* parameters for GMRES */
304 HYPRE_Int k_dim;
305 /* parameters for COGMRES */
306 HYPRE_Int cgs = 1;
307 HYPRE_Int unroll = 0;
308 /* parameters for LGMRES */
309 HYPRE_Int aug_dim;
310 /* parameters for GSMG */
311 HYPRE_Int gsmg_samples = 5;
312 /* interpolation */
313 HYPRE_Int interp_type = 6; /* default value */
314 HYPRE_Int post_interp_type = 0; /* default value */
315 /* RL: restriction */
316 HYPRE_Int restri_type = 0;
317 /* aggressive coarsening */
318 HYPRE_Int agg_interp_type = 4; /* default value */
319 HYPRE_Int agg_P_max_elmts = 0; /* default value */
320 HYPRE_Int agg_P12_max_elmts = 0; /* default value */
321 HYPRE_Real agg_trunc_factor = 0; /* default value */
322 HYPRE_Real agg_P12_trunc_factor = 0; /* default value */
323
324 HYPRE_Int print_system = 0;
325 HYPRE_Int rel_change = 0;
326
327 /* begin lobpcg */
328 HYPRE_Int hybrid = 1;
329 HYPRE_Int num_sweep = 1;
330 HYPRE_Int relax_default = 3;
331
332 HYPRE_Int lobpcgFlag = 0;
333 HYPRE_Int lobpcgGen = 0;
334 HYPRE_Int constrained = 0;
335 HYPRE_Int vFromFileFlag = 0;
336 HYPRE_Int lobpcgSeed = 0;
337 HYPRE_Int blockSize = 1;
338 HYPRE_Int verbosity = 1;
339 HYPRE_Int iterations;
340 HYPRE_Int maxIterations = 100;
341 HYPRE_Int checkOrtho = 0;
342 HYPRE_Int printLevel = 0; /* also c.f. poutdat */
343 HYPRE_Int two_norm = 1;
344 HYPRE_Int pcgIterations = 0;
345 HYPRE_Int pcgMode = 1;
346 HYPRE_Real pcgTol = 1e-2;
347 HYPRE_Real nonOrthF;
348
349 FILE* filePtr;
350
351 mv_MultiVectorPtr eigenvectors = NULL;
352 mv_MultiVectorPtr constraints = NULL;
353 mv_MultiVectorPtr workspace = NULL;
354
355 HYPRE_Real* eigenvalues = NULL;
356
357 HYPRE_Real* residuals;
358 utilities_FortranMatrix* residualNorms;
359 utilities_FortranMatrix* residualNormsHistory;
360 utilities_FortranMatrix* eigenvaluesHistory;
361 utilities_FortranMatrix* printBuffer;
362 utilities_FortranMatrix* gramXX;
363 utilities_FortranMatrix* identity;
364
365 HYPRE_Solver lobpcg_solver;
366
367 mv_InterfaceInterpreter* interpreter;
368 HYPRE_MatvecFunctions matvec_fn;
369
370 HYPRE_IJMatrix ij_B;
371 HYPRE_ParCSRMatrix parcsr_B;
372
373 /* end lobpcg */
374
375 /* mgr options */
376 HYPRE_Int mgr_bsize = 1;
377 HYPRE_Int mgr_nlevels = 0;
378 HYPRE_Int mgr_num_reserved_nodes = 0;
379 HYPRE_Int mgr_non_c_to_f = 1;
380 HYPRE_Int mgr_frelax_method = 0;
381 HYPRE_Int *mgr_num_cindexes = NULL;
382 HYPRE_Int **mgr_cindexes = NULL;
383 HYPRE_BigInt *mgr_reserved_coarse_indexes = NULL;
384 HYPRE_Int mgr_relax_type = 0;
385 HYPRE_Int mgr_num_relax_sweeps = 2;
386 HYPRE_Int mgr_interp_type = 2;
387 HYPRE_Int mgr_num_interp_sweeps = 2;
388 HYPRE_Int mgr_gsmooth_type = 0;
389 HYPRE_Int mgr_num_gsmooth_sweeps = 1;
390 HYPRE_Int mgr_restrict_type = 0;
391 HYPRE_Int mgr_num_restrict_sweeps = 0;
392 /* end mgr options */
393
394 /* hypre_ILU options */
395 HYPRE_Int ilu_type = 0;
396 HYPRE_Int ilu_lfil = 0;
397 HYPRE_Int ilu_sm_max_iter = 1;
398 HYPRE_Real ilu_droptol = 1.0e-02;
399 HYPRE_Int ilu_max_row_nnz = 1000;
400 HYPRE_Int ilu_schur_max_iter = 3;
401 HYPRE_Real ilu_nsh_droptol = 1.0e-02;
402 /* end hypre ILU options */
403
404 HYPRE_Real *nongalerk_tol = NULL;
405 HYPRE_Int nongalerk_num_tol = 0;
406
407 /* coasening data */
408 HYPRE_Int num_cpt = 0;
409 HYPRE_Int num_fpt = 0;
410 HYPRE_Int num_isolated_fpt = 0;
411 HYPRE_BigInt *cpt_index = NULL;
412 HYPRE_BigInt *fpt_index = NULL;
413 HYPRE_BigInt *isolated_fpt_index = NULL;
414
415 HYPRE_BigInt *row_nums = NULL;
416 HYPRE_Int *num_cols = NULL;
417 HYPRE_BigInt *col_nums = NULL;
418 HYPRE_Int i_indx, j_indx, num_rows;
419 HYPRE_Real *data = NULL;
420
421 HYPRE_Int air = 0;
422 HYPRE_Int **grid_relax_points = NULL;
423
424 /* amg-dd options */
425 HYPRE_Int amgdd_start_level = 0;
426 HYPRE_Int amgdd_padding = 1;
427 HYPRE_Int amgdd_fac_num_relax = 1;
428 HYPRE_Int amgdd_num_comp_cycles = 2;
429 HYPRE_Int amgdd_fac_relax_type = 3;
430 HYPRE_Int amgdd_fac_cycle_type = 1;
431 HYPRE_Int amgdd_num_ghost_layers = 1;
432
433 /* default execution policy and memory space */
434 HYPRE_ExecutionPolicy default_exec_policy = HYPRE_EXEC_DEVICE;
435 HYPRE_MemoryLocation memory_location = HYPRE_MEMORY_DEVICE;
436
437 #ifdef HYPRE_USING_DEVICE_POOL
438 /* device pool allocator */
439 hypre_uint mempool_bin_growth = 8,
440 mempool_min_bin = 3,
441 mempool_max_bin = 9;
442 size_t mempool_max_cached_bytes = 2000LL * 1024 * 1024;
443 #endif
444
445 /* Initialize MPI */
446 hypre_MPI_Init(&argc, &argv);
447
448 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
449 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
450
451 /*-----------------------------------------------------------
452 * Set defaults
453 *-----------------------------------------------------------*/
454 build_matrix_type = 2;
455 build_matrix_arg_index = argc;
456 build_rhs_type = 2;
457 build_rhs_arg_index = argc;
458 build_src_type = -1;
459 build_src_arg_index = argc;
460 build_x0_type = -1;
461 build_x0_arg_index = argc;
462 build_funcs_type = 0;
463 build_funcs_arg_index = argc;
464 build_fpt_arg_index = 0;
465 build_sfpt_arg_index = 0;
466 build_cpt_arg_index = 0;
467 IS_type = 1;
468 debug_flag = 0;
469 solver_id = 0;
470 ioutdat = 3;
471 poutdat = 1;
472 hypre_sprintf (plot_file_name,"AMGgrids.CF.dat");
473
474 /*-----------------------------------------------------------
475 * Parse command line
476 *-----------------------------------------------------------*/
477
478 print_usage = 0;
479 arg_index = 1;
480
481 while ( (arg_index < argc) && (!print_usage) )
482 {
483 if ( strcmp(argv[arg_index], "-fromfile") == 0 )
484 {
485 arg_index++;
486 build_matrix_type = -1;
487 build_matrix_arg_index = arg_index;
488 }
489 else if ( strcmp(argv[arg_index], "-fromparcsrfile") == 0 )
490 {
491 arg_index++;
492 build_matrix_type = 0;
493 build_matrix_arg_index = arg_index;
494 }
495 else if ( strcmp(argv[arg_index], "-fromonecsrfile") == 0 )
496 {
497 arg_index++;
498 build_matrix_type = 1;
499 build_matrix_arg_index = arg_index;
500 }
501 else if ( strcmp(argv[arg_index], "-laplacian") == 0 )
502 {
503 arg_index++;
504 build_matrix_type = 2;
505 build_matrix_arg_index = arg_index;
506 }
507 else if ( strcmp(argv[arg_index], "-9pt") == 0 )
508 {
509 arg_index++;
510 build_matrix_type = 3;
511 build_matrix_arg_index = arg_index;
512 }
513 else if ( strcmp(argv[arg_index], "-27pt") == 0 )
514 {
515 arg_index++;
516 build_matrix_type = 4;
517 build_matrix_arg_index = arg_index;
518 }
519 else if ( strcmp(argv[arg_index], "-difconv") == 0 )
520 {
521 arg_index++;
522 build_matrix_type = 5;
523 build_matrix_arg_index = arg_index;
524 }
525 else if ( strcmp(argv[arg_index], "-vardifconv") == 0 )
526 {
527 arg_index++;
528 build_matrix_type = 6;
529 build_matrix_arg_index = arg_index;
530 }
531 else if ( strcmp(argv[arg_index], "-rotate") == 0 )
532 {
533 arg_index++;
534 build_matrix_type = 7;
535 build_matrix_arg_index = arg_index;
536 }
537 else if ( strcmp(argv[arg_index], "-test_ij") == 0 )
538 {
539 arg_index++;
540 test_ij = 1;
541 }
542 else if ( strcmp(argv[arg_index], "-funcsfromonefile") == 0 )
543 {
544 arg_index++;
545 build_funcs_type = 1;
546 build_funcs_arg_index = arg_index;
547 }
548 else if ( strcmp(argv[arg_index], "-funcsfromfile") == 0 )
549 {
550 arg_index++;
551 build_funcs_type = 2;
552 build_funcs_arg_index = arg_index;
553 }
554 else if ( strcmp(argv[arg_index], "-exact_size") == 0 )
555 {
556 arg_index++;
557 sparsity_known = 1;
558 }
559 else if ( strcmp(argv[arg_index], "-storage_low") == 0 )
560 {
561 arg_index++;
562 sparsity_known = 2;
563 }
564 else if ( strcmp(argv[arg_index], "-add") == 0 )
565 {
566 arg_index++;
567 add = atoi(argv[arg_index++]);
568 }
569 else if ( strcmp(argv[arg_index], "-chunk") == 0 )
570 {
571 arg_index++;
572 chunk = atoi(argv[arg_index++]);
573 }
574 else if ( strcmp(argv[arg_index], "-off_proc") == 0 )
575 {
576 arg_index++;
577 off_proc = atoi(argv[arg_index++]);
578 }
579 else if ( strcmp(argv[arg_index], "-omp") == 0 )
580 {
581 arg_index++;
582 omp_flag = atoi(argv[arg_index++]);
583 }
584 else if ( strcmp(argv[arg_index], "-check_constant") == 0 )
585 {
586 arg_index++;
587 check_constant = atoi(argv[arg_index++]);
588 }
589 else if ( strcmp(argv[arg_index], "-concrete_parcsr") == 0 )
590 {
591 arg_index++;
592 build_matrix_arg_index = arg_index;
593 }
594 else if ( strcmp(argv[arg_index], "-solver") == 0 )
595 {
596 arg_index++;
597 /* begin lobpcg */
598 if ( strcmp(argv[arg_index], "none") == 0 ) {
599 solver_id = NO_SOLVER;
600 arg_index++;
601 }
602 else /* end lobpcg */
603 {
604 solver_id = atoi(argv[arg_index++]);
605 }
606 }
607 else if ( strcmp(argv[arg_index], "-rbm") == 0 )
608 {
609 arg_index++;
610 build_rbm = 1;
611 num_interp_vecs = atoi(argv[arg_index++]);
612 build_rbm_index = arg_index;
613 }
614 else if ( strcmp(argv[arg_index], "-rhsfromfile") == 0 )
615 {
616 arg_index++;
617 build_rhs_type = 0;
618 build_rhs_arg_index = arg_index;
619 }
620 else if ( strcmp(argv[arg_index], "-rhsfromonefile") == 0 )
621 {
622 arg_index++;
623 build_rhs_type = 1;
624 build_rhs_arg_index = arg_index;
625 }
626 else if ( strcmp(argv[arg_index], "-rhsparcsrfile") == 0 )
627 {
628 arg_index++;
629 build_rhs_type = 7;
630 build_rhs_arg_index = arg_index;
631 }
632 else if ( strcmp(argv[arg_index], "-rhsisone") == 0 )
633 {
634 arg_index++;
635 build_rhs_type = 2;
636 build_rhs_arg_index = arg_index;
637 }
638 else if ( strcmp(argv[arg_index], "-rhsrand") == 0 )
639 {
640 arg_index++;
641 build_rhs_type = 3;
642 build_rhs_arg_index = arg_index;
643 }
644 else if ( strcmp(argv[arg_index], "-xisone") == 0 )
645 {
646 arg_index++;
647 build_rhs_type = 4;
648 build_rhs_arg_index = arg_index;
649 }
650 else if ( strcmp(argv[arg_index], "-rhszero") == 0 )
651 {
652 arg_index++;
653 build_rhs_type = 5;
654 build_rhs_arg_index = arg_index;
655 }
656 else if ( strcmp(argv[arg_index], "-srcfromfile") == 0 )
657 {
658 arg_index++;
659 build_src_type = 0;
660 build_rhs_type = -1;
661 build_src_arg_index = arg_index;
662 }
663 else if ( strcmp(argv[arg_index], "-srcfromonefile") == 0 )
664 {
665 arg_index++;
666 build_src_type = 1;
667 build_rhs_type = -1;
668 build_src_arg_index = arg_index;
669 }
670 else if ( strcmp(argv[arg_index], "-srcisone") == 0 )
671 {
672 arg_index++;
673 build_src_type = 2;
674 build_rhs_type = -1;
675 build_src_arg_index = arg_index;
676 }
677 else if ( strcmp(argv[arg_index], "-srcrand") == 0 )
678 {
679 arg_index++;
680 build_src_type = 3;
681 build_rhs_type = -1;
682 build_src_arg_index = arg_index;
683 }
684 else if ( strcmp(argv[arg_index], "-srczero") == 0 )
685 {
686 arg_index++;
687 build_src_type = 4;
688 build_rhs_type = -1;
689 build_src_arg_index = arg_index;
690 }
691 else if ( strcmp(argv[arg_index], "-x0fromfile") == 0 )
692 {
693 arg_index++;
694 build_x0_type = 0;
695 build_x0_arg_index = arg_index;
696 }
697 else if ( strcmp(argv[arg_index], "-x0parcsrfile") == 0 )
698 {
699 arg_index++;
700 build_x0_type = 7;
701 build_x0_arg_index = arg_index;
702 }
703 else if ( strcmp(argv[arg_index], "-x0rand") == 0 )
704 {
705 arg_index++;
706 build_x0_type = 1;
707 build_x0_arg_index = arg_index;
708 }
709 else if ( strcmp(argv[arg_index], "-CFfromfile") == 0 )
710 {
711 arg_index++;
712 coarsen_type = 999;
713 }
714 else if ( strcmp(argv[arg_index], "-Ffromonefile") == 0 )
715 {
716 arg_index++;
717 build_fpt_arg_index = arg_index;
718 }
719 else if ( strcmp(argv[arg_index], "-SFfromonefile") == 0 )
720 {
721 arg_index++;
722 build_sfpt_arg_index = arg_index;
723 }
724 else if ( strcmp(argv[arg_index], "-Cfromonefile") == 0 )
725 {
726 arg_index++;
727 build_cpt_arg_index = arg_index;
728 }
729 else if ( strcmp(argv[arg_index], "-cljp") == 0 )
730 {
731 arg_index++;
732 coarsen_type = 0;
733 }
734 else if ( strcmp(argv[arg_index], "-cljp1") == 0 )
735 {
736 arg_index++;
737 coarsen_type = 7;
738 }
739 else if ( strcmp(argv[arg_index], "-cgc") == 0 )
740 {
741 arg_index++;
742 coarsen_type = 21;
743 cgcits = 200;
744 }
745 else if ( strcmp(argv[arg_index], "-cgce") == 0 )
746 {
747 arg_index++;
748 coarsen_type = 22;
749 cgcits = 200;
750 }
751 else if ( strcmp(argv[arg_index], "-pmis") == 0 )
752 {
753 arg_index++;
754 coarsen_type = 8;
755 }
756 else if ( strcmp(argv[arg_index], "-pmis1") == 0 )
757 {
758 arg_index++;
759 coarsen_type = 9;
760 }
761 else if ( strcmp(argv[arg_index], "-cr1") == 0 )
762 {
763 arg_index++;
764 coarsen_type = 98;
765 }
766 else if ( strcmp(argv[arg_index], "-cr") == 0 )
767 {
768 arg_index++;
769 coarsen_type = 99;
770 }
771 else if ( strcmp(argv[arg_index], "-crcg") == 0 )
772 {
773 arg_index++;
774 CR_use_CG = atoi(argv[arg_index++]);
775 }
776 else if ( strcmp(argv[arg_index], "-hmis") == 0 )
777 {
778 arg_index++;
779 coarsen_type = 10;
780 }
781 else if ( strcmp(argv[arg_index], "-ruge") == 0 )
782 {
783 arg_index++;
784 coarsen_type = 1;
785 }
786 else if ( strcmp(argv[arg_index], "-ruge1p") == 0 )
787 {
788 arg_index++;
789 coarsen_type = 11;
790 }
791 else if ( strcmp(argv[arg_index], "-ruge2b") == 0 )
792 {
793 arg_index++;
794 coarsen_type = 2;
795 }
796 else if ( strcmp(argv[arg_index], "-ruge3") == 0 )
797 {
798 arg_index++;
799 coarsen_type = 3;
800 }
801 else if ( strcmp(argv[arg_index], "-ruge3c") == 0 )
802 {
803 arg_index++;
804 coarsen_type = 4;
805 }
806 else if ( strcmp(argv[arg_index], "-rugerlx") == 0 )
807 {
808 arg_index++;
809 coarsen_type = 5;
810 }
811 else if ( strcmp(argv[arg_index], "-falgout") == 0 )
812 {
813 arg_index++;
814 coarsen_type = 6;
815 }
816 else if ( strcmp(argv[arg_index], "-gm") == 0 )
817 {
818 arg_index++;
819 measure_type = 1;
820 }
821 else if ( strcmp(argv[arg_index], "-is") == 0 )
822 {
823 arg_index++;
824 IS_type = atoi(argv[arg_index++]);
825 }
826 else if ( strcmp(argv[arg_index], "-ncr") == 0 )
827 {
828 arg_index++;
829 num_CR_relax_steps = atoi(argv[arg_index++]);
830 }
831 else if ( strcmp(argv[arg_index], "-crth") == 0 )
832 {
833 arg_index++;
834 CR_rate = atof(argv[arg_index++]);
835 }
836 else if ( strcmp(argv[arg_index], "-crst") == 0 )
837 {
838 arg_index++;
839 CR_strong_th = atof(argv[arg_index++]);
840 }
841 else if ( strcmp(argv[arg_index], "-rlx") == 0 )
842 {
843 arg_index++;
844 relax_type = atoi(argv[arg_index++]);
845 }
846 else if ( strcmp(argv[arg_index], "-rlx_coarse") == 0 )
847 {
848 arg_index++;
849 relax_coarse = atoi(argv[arg_index++]);
850 }
851 else if ( strcmp(argv[arg_index], "-rlx_down") == 0 )
852 {
853 arg_index++;
854 relax_down = atoi(argv[arg_index++]);
855 }
856 else if ( strcmp(argv[arg_index], "-rlx_up") == 0 )
857 {
858 arg_index++;
859 relax_up = atoi(argv[arg_index++]);
860 }
861 else if ( strcmp(argv[arg_index], "-smtype") == 0 )
862 {
863 arg_index++;
864 smooth_type = atoi(argv[arg_index++]);
865 }
866 else if ( strcmp(argv[arg_index], "-smlv") == 0 )
867 {
868 arg_index++;
869 smooth_num_levels = atoi(argv[arg_index++]);
870 }
871 else if ( strcmp(argv[arg_index], "-mxl") == 0 )
872 {
873 arg_index++;
874 max_levels = atoi(argv[arg_index++]);
875 }
876 else if ( strcmp(argv[arg_index], "-dbg") == 0 )
877 {
878 arg_index++;
879 debug_flag = atoi(argv[arg_index++]);
880 }
881 else if ( strcmp(argv[arg_index], "-nf") == 0 )
882 {
883 arg_index++;
884 num_functions = atoi(argv[arg_index++]);
885 }
886 else if ( strcmp(argv[arg_index], "-agg_nl") == 0 )
887 {
888 arg_index++;
889 agg_num_levels = atoi(argv[arg_index++]);
890 }
891 else if ( strcmp(argv[arg_index], "-npaths") == 0 )
892 {
893 arg_index++;
894 num_paths = atoi(argv[arg_index++]);
895 }
896 else if ( strcmp(argv[arg_index], "-ns") == 0 )
897 {
898 arg_index++;
899 num_sweeps = atoi(argv[arg_index++]);
900 }
901 else if ( strcmp(argv[arg_index], "-ns_coarse") == 0 )
902 {
903 arg_index++;
904 ns_coarse = atoi(argv[arg_index++]);
905 }
906 else if ( strcmp(argv[arg_index], "-ns_down") == 0 )
907 {
908 arg_index++;
909 ns_down = atoi(argv[arg_index++]);
910 }
911 else if ( strcmp(argv[arg_index], "-ns_up") == 0 )
912 {
913 arg_index++;
914 ns_up = atoi(argv[arg_index++]);
915 }
916 else if ( strcmp(argv[arg_index], "-sns") == 0 )
917 {
918 arg_index++;
919 smooth_num_sweeps = atoi(argv[arg_index++]);
920 }
921 else if ( strcmp(argv[arg_index], "-max_iter") == 0 )
922 {
923 arg_index++;
924 max_iter = atoi(argv[arg_index++]);
925 }
926 else if ( strcmp(argv[arg_index], "-mg_max_iter") == 0 )
927 {
928 arg_index++;
929 mg_max_iter = atoi(argv[arg_index++]);
930 }
931
932 else if ( strcmp(argv[arg_index], "-dt") == 0 )
933 {
934 arg_index++;
935 dt = atof(argv[arg_index++]);
936 build_rhs_type = -1;
937 if ( build_src_type == -1 ) build_src_type = 2;
938 }
939 else if ( strcmp(argv[arg_index], "-restritype") == 0 )
940 {
941 arg_index++;
942 restri_type = atoi(argv[arg_index++]);
943 }
944 else if ( strcmp(argv[arg_index], "-help") == 0 )
945 {
946 print_usage = 1;
947 }
948 /* begin lobpcg */
949 else if ( strcmp(argv[arg_index], "-lobpcg") == 0 )
950 { /* use lobpcg */
951 arg_index++;
952 lobpcgFlag = 1;
953 }
954 else if ( strcmp(argv[arg_index], "-gen") == 0 )
955 { /* generalized evp */
956 arg_index++;
957 lobpcgGen = 1;
958 }
959 else if ( strcmp(argv[arg_index], "-con") == 0 )
960 { /* constrained evp */
961 arg_index++;
962 constrained = 1;
963 }
964 else if ( strcmp(argv[arg_index], "-orthchk") == 0 )
965 { /* lobpcg: check orthonormality */
966 arg_index++;
967 checkOrtho = 1;
968 }
969 else if ( strcmp(argv[arg_index], "-vfromfile") == 0 )
970 { /* lobpcg: get initial vectors from file */
971 arg_index++;
972 vFromFileFlag = 1;
973 }
974 else if ( strcmp(argv[arg_index], "-vrand") == 0 )
975 { /* lobpcg: block size */
976 arg_index++;
977 blockSize = atoi(argv[arg_index++]);
978 }
979 else if ( strcmp(argv[arg_index], "-seed") == 0 )
980 { /* lobpcg: seed for srand */
981 arg_index++;
982 lobpcgSeed = atoi(argv[arg_index++]);
983 }
984 else if ( strcmp(argv[arg_index], "-itr") == 0 )
985 { /* lobpcg: max # of iterations */
986 arg_index++;
987 maxIterations = atoi(argv[arg_index++]);
988 }
989 else if ( strcmp(argv[arg_index], "-verb") == 0 )
990 { /* lobpcg: verbosity level */
991 arg_index++;
992 verbosity = atoi(argv[arg_index++]);
993 }
994 else if ( strcmp(argv[arg_index], "-vout") == 0 )
995 { /* lobpcg: print level */
996 arg_index++;
997 printLevel = atoi(argv[arg_index++]);
998 }
999 else if ( strcmp(argv[arg_index], "-pcgitr") == 0 )
1000 { /* lobpcg: inner pcg iterations */
1001 arg_index++;
1002 pcgIterations = atoi(argv[arg_index++]);
1003 }
1004 else if ( strcmp(argv[arg_index], "-pcgtol") == 0 )
1005 { /* lobpcg: inner pcg iterations */
1006 arg_index++;
1007 pcgTol = atof(argv[arg_index++]);
1008 }
1009 else if ( strcmp(argv[arg_index], "-pcgmode") == 0 )
1010 { /* lobpcg: initial guess for inner pcg */
1011 arg_index++; /* 0: zero, otherwise rhs */
1012 pcgMode = atoi(argv[arg_index++]);
1013 }
1014 /* end lobpcg */
1015 /* begin mgr options*/
1016 else if ( strcmp(argv[arg_index], "-mgr_bsize") == 0 )
1017 { /* mgr block size */
1018 arg_index++;
1019 mgr_bsize = atoi(argv[arg_index++]);
1020 }
1021 else if ( strcmp(argv[arg_index], "-mgr_nlevels") == 0 )
1022 { /* mgr number of coarsening levels */
1023 arg_index++;
1024 mgr_nlevels = atoi(argv[arg_index++]);
1025 }
1026 else if ( strcmp(argv[arg_index], "-mgr_non_c_to_f") == 0 )
1027 { /* mgr intermediate coarse grid strategy */
1028 arg_index++;
1029 mgr_non_c_to_f = atoi(argv[arg_index++]);
1030 }
1031 else if ( strcmp(argv[arg_index], "-mgr_num_reserved_nodes") == 0 )
1032 { /* mgr number of reserved nodes to be put on coarsest grid */
1033 arg_index++;
1034 mgr_num_reserved_nodes = atoi(argv[arg_index++]);
1035 }
1036 else if ( strcmp(argv[arg_index], "-mgr_frelax_method") == 0 )
1037 { /* mgr F-relaxation strategy: single/ multi level */
1038 arg_index++;
1039 mgr_frelax_method = atoi(argv[arg_index++]);
1040 }
1041 else if ( strcmp(argv[arg_index], "-mgr_relax_type") == 0 )
1042 { /* relax type for "single level" F-relaxation */
1043 arg_index++;
1044 mgr_relax_type = atoi(argv[arg_index++]);
1045 }
1046 else if ( strcmp(argv[arg_index], "-mgr_relax_sweeps") == 0 )
1047 { /* number of relaxation sweeps */
1048 arg_index++;
1049 mgr_num_relax_sweeps = atoi(argv[arg_index++]);
1050 }
1051 else if ( strcmp(argv[arg_index], "-mgr_interp_type") == 0 )
1052 { /* interpolation type */
1053 arg_index++;
1054 mgr_interp_type = atoi(argv[arg_index++]);
1055 }
1056 else if ( strcmp(argv[arg_index], "-mgr_interp_sweeps") == 0 )
1057 { /* number of interpolation sweeps*/
1058 arg_index++;
1059 mgr_num_interp_sweeps = atoi(argv[arg_index++]);
1060 }
1061 else if ( strcmp(argv[arg_index], "-mgr_gsmooth_type") == 0 )
1062 { /* global smoother type */
1063 arg_index++;
1064 mgr_gsmooth_type = atoi(argv[arg_index++]);
1065 }
1066 else if ( strcmp(argv[arg_index], "-mgr_gsmooth_sweeps") == 0 )
1067 { /* number of global smooth sweeps*/
1068 arg_index++;
1069 mgr_num_gsmooth_sweeps = atoi(argv[arg_index++]);
1070 }
1071 else if ( strcmp(argv[arg_index], "-mgr_restrict_type") == 0 )
1072 { /* restriction type */
1073 arg_index++;
1074 mgr_restrict_type = atoi(argv[arg_index++]);
1075 }
1076 else if ( strcmp(argv[arg_index], "-mgr_restrict_sweeps") == 0 )
1077 { /* number of restriction sweeps*/
1078 arg_index++;
1079 mgr_num_restrict_sweeps = atoi(argv[arg_index++]);
1080 }
1081 /* end mgr options */
1082 /* begin ilu options*/
1083 else if ( strcmp(argv[arg_index], "-ilu_type") == 0 )
1084 { /* ilu_type */
1085 arg_index++;
1086 ilu_type = atoi(argv[arg_index++]);
1087 }
1088 else if ( strcmp(argv[arg_index], "-ilu_sm_max_iter") == 0 )
1089 { /* number of iteration when applied as a smoother */
1090 arg_index++;
1091 ilu_sm_max_iter = atoi(argv[arg_index++]);
1092 }
1093 else if ( strcmp(argv[arg_index], "-ilu_lfil") == 0 )
1094 { /* level of fill */
1095 arg_index++;
1096 ilu_lfil = atoi(argv[arg_index++]);
1097 }
1098 else if ( strcmp(argv[arg_index], "-ilu_droptol") == 0 )
1099 { /* drop tolerance */
1100 arg_index++;
1101 ilu_droptol = atof(argv[arg_index++]);
1102 }
1103 else if ( strcmp(argv[arg_index], "-ilu_max_row_nnz") == 0 )
1104 { /* Max number of nonzeros to keep per row */
1105 arg_index++;
1106 ilu_max_row_nnz = atoi(argv[arg_index++]);
1107 }
1108 else if ( strcmp(argv[arg_index], "-ilu_schur_max_iter") == 0 )
1109 { /* Max number of iterations for schur system solver */
1110 arg_index++;
1111 ilu_schur_max_iter = atoi(argv[arg_index++]);
1112 }
1113 else if ( strcmp(argv[arg_index], "-ilu_nsh_droptol") == 0 )
1114 { /* Max number of iterations for schur system solver */
1115 arg_index++;
1116 ilu_nsh_droptol = atof(argv[arg_index++]);
1117 }
1118 /* end ilu options */
1119 #if defined(HYPRE_USING_GPU)
1120 else if ( strcmp(argv[arg_index], "-exec_host") == 0 )
1121 {
1122 arg_index++;
1123 default_exec_policy = HYPRE_EXEC_HOST;
1124 }
1125 else if ( strcmp(argv[arg_index], "-exec_device") == 0 )
1126 {
1127 arg_index++;
1128 default_exec_policy = HYPRE_EXEC_DEVICE;
1129 }
1130 else if ( strcmp(argv[arg_index], "-mm_cusparse") == 0 )
1131 {
1132 arg_index++;
1133 spgemm_use_cusparse = atoi(argv[arg_index++]);
1134 }
1135 else if ( strcmp(argv[arg_index], "-spgemm_alg") == 0 )
1136 {
1137 arg_index++;
1138 spgemm_alg = atoi(argv[arg_index++]);
1139 }
1140 else if ( strcmp(argv[arg_index], "-spgemm_rowest") == 0 )
1141 {
1142 arg_index++;
1143 spgemm_rowest_mtd = atoi(argv[arg_index++]);
1144 }
1145 else if ( strcmp(argv[arg_index], "-spgemm_rowestmult") == 0 )
1146 {
1147 arg_index++;
1148 spgemm_rowest_mult = atof(argv[arg_index++]);
1149 }
1150 else if ( strcmp(argv[arg_index], "-spgemm_rowestnsamples") == 0 )
1151 {
1152 arg_index++;
1153 spgemm_rowest_nsamples = atoi(argv[arg_index++]);
1154 }
1155 else if ( strcmp(argv[arg_index], "-spgemm_hash") == 0 )
1156 {
1157 arg_index++;
1158 spgemm_hash_type = argv[arg_index++][0];
1159 }
1160 else if ( strcmp(argv[arg_index], "-use_curand") == 0 )
1161 {
1162 arg_index++;
1163 use_curand = atoi(argv[arg_index++]);
1164 }
1165 #endif
1166 #ifdef HYPRE_USING_DEVICE_POOL
1167 else if ( strcmp(argv[arg_index], "-mempool_growth") == 0 )
1168 {
1169 arg_index++;
1170 mempool_bin_growth = atoi(argv[arg_index++]);
1171 }
1172 else if ( strcmp(argv[arg_index], "-mempool_minbin") == 0 )
1173 {
1174 arg_index++;
1175 mempool_min_bin = atoi(argv[arg_index++]);
1176 }
1177 else if ( strcmp(argv[arg_index], "-mempool_maxbin") == 0 )
1178 {
1179 arg_index++;
1180 mempool_max_bin = atoi(argv[arg_index++]);
1181 }
1182 else if ( strcmp(argv[arg_index], "-mempool_maxcached") == 0 )
1183 {
1184 // Give maximum cached in Mbytes.
1185 arg_index++;
1186 mempool_max_cached_bytes = atoi(argv[arg_index++])*1024LL*1024LL;
1187 }
1188 #endif
1189 else if ( strcmp(argv[arg_index], "-negA") == 0 )
1190 {
1191 arg_index++;
1192 negA = atoi(argv[arg_index++]);
1193 }
1194 else
1195 {
1196 arg_index++;
1197 }
1198 }
1199
1200 /* begin CGC BM Aug 25, 2006 */
1201 if (coarsen_type == 21 || coarsen_type == 22) {
1202 arg_index = 0;
1203 while ( (arg_index < argc) && (!print_usage) )
1204 {
1205 if ( strcmp(argv[arg_index], "-cgcits") == 0 )
1206 {
1207 arg_index++;
1208 cgcits = atoi(argv[arg_index++]);
1209 }
1210 else
1211 {
1212 arg_index++;
1213 }
1214 }
1215 }
1216
1217 /* begin lobpcg */
1218
1219 if ( solver_id == 0 && lobpcgFlag )
1220 {
1221 solver_id = 1;
1222 }
1223
1224 /* end lobpcg */
1225
1226 if (solver_id == 8 || solver_id == 18)
1227 {
1228 max_levels = 1;
1229 }
1230
1231 /* defaults for BoomerAMG */
1232 if (solver_id == 0 || solver_id == 1 || solver_id == 3 || solver_id == 5
1233 || solver_id == 9 || solver_id == 13 || solver_id == 14
1234 || solver_id == 15 || solver_id == 20 || solver_id == 51 || solver_id == 61
1235 || solver_id == 16
1236 || solver_id == 70 || solver_id == 71 || solver_id == 72
1237 || solver_id == 90 || solver_id == 91)
1238 {
1239 strong_threshold = 0.25;
1240 strong_thresholdR = 0.25;
1241 filter_thresholdR = 0.00;
1242 trunc_factor = 0.;
1243 jacobi_trunc_threshold = 0.01;
1244 cycle_type = 1;
1245 fcycle = 0;
1246 relax_wt = 1.;
1247 outer_wt = 1.;
1248
1249 /* for CGNR preconditioned with Boomeramg, only relaxation scheme 0 is
1250 implemented, i.e. Jacobi relaxation, and needs to be used without CF
1251 ordering */
1252 if (solver_id == 5)
1253 {
1254 relax_type = 0;
1255 relax_order = 0;
1256 }
1257 }
1258
1259 /* defaults for Schwarz */
1260 variant = 0; /* multiplicative */
1261 overlap = 1; /* 1 layer overlap */
1262 domain_type = 2; /* through agglomeration */
1263 schwarz_rlx_weight = 1.;
1264
1265 /* defaults for GMRES */
1266 k_dim = 5;
1267 cgs = 1;
1268 unroll = 0;
1269
1270 /* defaults for LGMRES - should use a larger k_dim, though*/
1271 aug_dim = 2;
1272
1273 arg_index = 0;
1274 while (arg_index < argc)
1275 {
1276 if ( strcmp(argv[arg_index], "-k") == 0 )
1277 {
1278 arg_index++;
1279 k_dim = atoi(argv[arg_index++]);
1280 }
1281 else if ( strcmp(argv[arg_index], "-cgs") == 0 )
1282 {
1283 arg_index++;
1284 cgs = atoi(argv[arg_index++]);
1285 }
1286 else if ( strcmp(argv[arg_index], "-unroll") == 0 )
1287 {
1288 arg_index++;
1289 unroll = atoi(argv[arg_index++]);
1290 }
1291 else if ( strcmp(argv[arg_index], "-check_residual") == 0 )
1292 {
1293 arg_index++;
1294 check_residual = 1;
1295 }
1296 else if ( strcmp(argv[arg_index], "-aug") == 0 )
1297 {
1298 arg_index++;
1299 aug_dim = atoi(argv[arg_index++]);
1300 }
1301 else if ( strcmp(argv[arg_index], "-w") == 0 )
1302 {
1303 arg_index++;
1304 relax_wt = atof(argv[arg_index++]);
1305 }
1306 else if ( strcmp(argv[arg_index], "-wl") == 0 )
1307 {
1308 arg_index++;
1309 relax_wt_level = atof(argv[arg_index++]);
1310 level_w = atoi(argv[arg_index++]);
1311 }
1312 else if ( strcmp(argv[arg_index], "-ow") == 0 )
1313 {
1314 arg_index++;
1315 outer_wt = atof(argv[arg_index++]);
1316 }
1317 else if ( strcmp(argv[arg_index], "-owl") == 0 )
1318 {
1319 arg_index++;
1320 outer_wt_level = atof(argv[arg_index++]);
1321 level_ow = atoi(argv[arg_index++]);
1322 }
1323 else if ( strcmp(argv[arg_index], "-sw") == 0 )
1324 {
1325 arg_index++;
1326 schwarz_rlx_weight = atof(argv[arg_index++]);
1327 }
1328 else if ( strcmp(argv[arg_index], "-coarse_th") == 0 )
1329 {
1330 arg_index++;
1331 coarse_threshold = atof(argv[arg_index++]);
1332 }
1333 else if ( strcmp(argv[arg_index], "-adroptol") == 0 )
1334 {
1335 arg_index++;
1336 A_drop_tol = atof(argv[arg_index++]);
1337 }
1338 else if ( strcmp(argv[arg_index], "-adroptype") == 0 )
1339 {
1340 arg_index++;
1341 A_drop_type = atoi(argv[arg_index++]);
1342 }
1343 else if ( strcmp(argv[arg_index], "-min_cs") == 0 )
1344 {
1345 arg_index++;
1346 min_coarse_size = atof(argv[arg_index++]);
1347 }
1348 else if ( strcmp(argv[arg_index], "-seq_th") == 0 )
1349 {
1350 arg_index++;
1351 seq_threshold = atof(argv[arg_index++]);
1352 }
1353 else if ( strcmp(argv[arg_index], "-red") == 0 )
1354 {
1355 arg_index++;
1356 redundant = atof(argv[arg_index++]);
1357 }
1358 else if ( strcmp(argv[arg_index], "-cutf") == 0 )
1359 {
1360 arg_index++;
1361 coarsen_cut_factor = atoi(argv[arg_index++]);
1362 }
1363 else if ( strcmp(argv[arg_index], "-th") == 0 )
1364 {
1365 arg_index++;
1366 strong_threshold = atof(argv[arg_index++]);
1367 }
1368 else if ( strcmp(argv[arg_index], "-thR") == 0 )
1369 {
1370 arg_index++;
1371 strong_thresholdR = atof(argv[arg_index++]);
1372 }
1373 else if ( strcmp(argv[arg_index], "-fltr_thR") == 0 )
1374 {
1375 arg_index++;
1376 filter_thresholdR = atof(argv[arg_index++]);
1377 }
1378 else if ( strcmp(argv[arg_index], "-CF") == 0 )
1379 {
1380 arg_index++;
1381 relax_order = atoi(argv[arg_index++]);
1382 }
1383 else if ( strcmp(argv[arg_index], "-cf") == 0 )
1384 {
1385 arg_index++;
1386 cf_tol = atof(argv[arg_index++]);
1387 }
1388 else if ( strcmp(argv[arg_index], "-tol") == 0 )
1389 {
1390 arg_index++;
1391 tol = atof(argv[arg_index++]);
1392 }
1393 else if ( strcmp(argv[arg_index], "-conv_type") == 0 )
1394 {
1395 arg_index++;
1396 converge_type = atoi(argv[arg_index++]);
1397 }
1398 else if ( strcmp(argv[arg_index], "-atol") == 0 )
1399 {
1400 arg_index++;
1401 atol = atof(argv[arg_index++]);
1402 }
1403 else if ( strcmp(argv[arg_index], "-mxrs") == 0 )
1404 {
1405 arg_index++;
1406 max_row_sum = atof(argv[arg_index++]);
1407 }
1408 else if ( strcmp(argv[arg_index], "-sai_th") == 0 )
1409 {
1410 arg_index++;
1411 sai_threshold = atof(argv[arg_index++]);
1412 }
1413 else if ( strcmp(argv[arg_index], "-sai_filt") == 0 )
1414 {
1415 arg_index++;
1416 sai_filter = atof(argv[arg_index++]);
1417 }
1418 else if ( strcmp(argv[arg_index], "-drop_tol") == 0 )
1419 {
1420 arg_index++;
1421 drop_tol = atof(argv[arg_index++]);
1422 }
1423 else if ( strcmp(argv[arg_index], "-nonzeros_to_keep") == 0 )
1424 {
1425 arg_index++;
1426 nonzeros_to_keep = atoi(argv[arg_index++]);
1427 }
1428 else if ( strcmp(argv[arg_index], "-ilut") == 0 )
1429 {
1430 arg_index++;
1431 eu_ilut = atof(argv[arg_index++]);
1432 }
1433 else if ( strcmp(argv[arg_index], "-sparseA") == 0 )
1434 {
1435 arg_index++;
1436 eu_sparse_A = atof(argv[arg_index++]);
1437 }
1438 else if ( strcmp(argv[arg_index], "-rowScale") == 0 )
1439 {
1440 arg_index++;
1441 eu_row_scale = 1;
1442 }
1443 else if ( strcmp(argv[arg_index], "-level") == 0 )
1444 {
1445 arg_index++;
1446 eu_level = atoi(argv[arg_index++]);
1447 }
1448 else if ( strcmp(argv[arg_index], "-bj") == 0 )
1449 {
1450 arg_index++;
1451 eu_bj = 1;
1452 }
1453 else if ( strcmp(argv[arg_index], "-eu_stats") == 0 )
1454 {
1455 arg_index++;
1456 eu_stats = 1;
1457 }
1458 else if ( strcmp(argv[arg_index], "-eu_mem") == 0 )
1459 {
1460 arg_index++;
1461 eu_mem = 1;
1462 }
1463 else if ( strcmp(argv[arg_index], "-tr") == 0 )
1464 {
1465 arg_index++;
1466 trunc_factor = atof(argv[arg_index++]);
1467 }
1468 else if ( strcmp(argv[arg_index], "-Pmx") == 0 )
1469 {
1470 arg_index++;
1471 P_max_elmts = atoi(argv[arg_index++]);
1472 }
1473 else if ( strcmp(argv[arg_index], "-interpvecvar") == 0 )
1474 {
1475 arg_index++;
1476 interp_vec_variant = atoi(argv[arg_index++]);
1477 }
1478 else if ( strcmp(argv[arg_index], "-Qtr") == 0 )
1479 {
1480 arg_index++;
1481 Q_trunc = atof(argv[arg_index++]);
1482 }
1483 else if ( strcmp(argv[arg_index], "-Qmx") == 0 )
1484 {
1485 arg_index++;
1486 Q_max = atoi(argv[arg_index++]);
1487 }
1488 else if ( strcmp(argv[arg_index], "-jtr") == 0 )
1489 {
1490 arg_index++;
1491 jacobi_trunc_threshold = atof(argv[arg_index++]);
1492 }
1493 else if ( strcmp(argv[arg_index], "-Ssw") == 0 )
1494 {
1495 arg_index++;
1496 S_commpkg_switch = atof(argv[arg_index++]);
1497 }
1498 else if ( strcmp(argv[arg_index], "-solver_type") == 0 )
1499 {
1500 arg_index++;
1501 solver_type = atoi(argv[arg_index++]);
1502 }
1503 else if ( strcmp(argv[arg_index], "-recompute") == 0 )
1504 {
1505 arg_index++;
1506 recompute_res = atoi(argv[arg_index++]);
1507 }
1508 else if ( strcmp(argv[arg_index], "-iout") == 0 )
1509 {
1510 arg_index++;
1511 ioutdat = atoi(argv[arg_index++]);
1512 }
1513 else if ( strcmp(argv[arg_index], "-pout") == 0 )
1514 {
1515 arg_index++;
1516 poutdat = atoi(argv[arg_index++]);
1517 }
1518 else if ( strcmp(argv[arg_index], "-var") == 0 )
1519 {
1520 arg_index++;
1521 variant = atoi(argv[arg_index++]);
1522 }
1523 else if ( strcmp(argv[arg_index], "-use_ns") == 0 )
1524 {
1525 arg_index++;
1526 use_nonsymm_schwarz = 1;
1527 }
1528 else if ( strcmp(argv[arg_index], "-ov") == 0 )
1529 {
1530 arg_index++;
1531 overlap = atoi(argv[arg_index++]);
1532 }
1533 else if ( strcmp(argv[arg_index], "-dom") == 0 )
1534 {
1535 arg_index++;
1536 domain_type = atoi(argv[arg_index++]);
1537 }
1538 else if ( strcmp(argv[arg_index], "-blk_sm") == 0 )
1539 {
1540 arg_index++;
1541 smooth_num_levels = atoi(argv[arg_index++]);
1542 overlap = 0;
1543 smooth_type = 6;
1544 domain_type = 1;
1545 }
1546 else if ( strcmp(argv[arg_index], "-mu") == 0 )
1547 {
1548 arg_index++;
1549 cycle_type = atoi(argv[arg_index++]);
1550 }
1551 else if ( strcmp(argv[arg_index], "-fmg") == 0 )
1552 {
1553 arg_index++;
1554 fcycle = 1;
1555 }
1556 else if ( strcmp(argv[arg_index], "-numsamp") == 0 )
1557 {
1558 arg_index++;
1559 gsmg_samples = atoi(argv[arg_index++]);
1560 }
1561 else if ( strcmp(argv[arg_index], "-interptype") == 0 )
1562 {
1563 arg_index++;
1564 interp_type = atoi(argv[arg_index++]);
1565 }
1566 else if ( strcmp(argv[arg_index], "-agg_interp") == 0 )
1567 {
1568 arg_index++;
1569 agg_interp_type = atoi(argv[arg_index++]);
1570 }
1571 else if ( strcmp(argv[arg_index], "-agg_Pmx") == 0 )
1572 {
1573 arg_index++;
1574 agg_P_max_elmts = atoi(argv[arg_index++]);
1575 }
1576 else if ( strcmp(argv[arg_index], "-agg_P12_mx") == 0 )
1577 {
1578 arg_index++;
1579 agg_P12_max_elmts = atoi(argv[arg_index++]);
1580 }
1581 else if ( strcmp(argv[arg_index], "-agg_tr") == 0 )
1582 {
1583 arg_index++;
1584 agg_trunc_factor = atof(argv[arg_index++]);
1585 }
1586 else if ( strcmp(argv[arg_index], "-agg_P12_tr") == 0 )
1587 {
1588 arg_index++;
1589 agg_P12_trunc_factor = atof(argv[arg_index++]);
1590 }
1591 else if ( strcmp(argv[arg_index], "-postinterptype") == 0 )
1592 {
1593 arg_index++;
1594 post_interp_type = atoi(argv[arg_index++]);
1595 }
1596 else if ( strcmp(argv[arg_index], "-nodal") == 0 )
1597 {
1598 arg_index++;
1599 nodal = atoi(argv[arg_index++]);
1600 }
1601 else if ( strcmp(argv[arg_index], "-rel_change") == 0 )
1602 {
1603 arg_index++;
1604 rel_change = 1;
1605 }
1606 else if ( strcmp(argv[arg_index], "-nodal_diag") == 0 )
1607 {
1608 arg_index++;
1609 nodal_diag = atoi(argv[arg_index++]);
1610 }
1611 else if ( strcmp(argv[arg_index], "-keepSS") == 0 )
1612 {
1613 arg_index++;
1614 keep_same_sign = atoi(argv[arg_index++]);
1615 }
1616 else if ( strcmp(argv[arg_index], "-cheby_order") == 0 )
1617 {
1618 arg_index++;
1619 cheby_order = atoi(argv[arg_index++]);
1620 }
1621 else if ( strcmp(argv[arg_index], "-cheby_eig_est") == 0 )
1622 {
1623 arg_index++;
1624 cheby_eig_est = atoi(argv[arg_index++]);
1625 }
1626 else if ( strcmp(argv[arg_index], "-cheby_variant") == 0 )
1627 {
1628 arg_index++;
1629 cheby_variant = atoi(argv[arg_index++]);
1630 }
1631 else if ( strcmp(argv[arg_index], "-cheby_scale") == 0 )
1632 {
1633 arg_index++;
1634 cheby_scale = atoi(argv[arg_index++]);
1635 }
1636 else if ( strcmp(argv[arg_index], "-cheby_fraction") == 0 )
1637 {
1638 arg_index++;
1639 cheby_fraction = atof(argv[arg_index++]);
1640 }
1641 else if ( strcmp(argv[arg_index], "-additive") == 0 )
1642 {
1643 arg_index++;
1644 additive = atoi(argv[arg_index++]);
1645 }
1646 else if ( strcmp(argv[arg_index], "-mult_add") == 0 )
1647 {
1648 arg_index++;
1649 mult_add = atoi(argv[arg_index++]);
1650 }
1651 else if ( strcmp(argv[arg_index], "-simple") == 0 )
1652 {
1653 arg_index++;
1654 simple = atoi(argv[arg_index++]);
1655 }
1656 else if ( strcmp(argv[arg_index], "-add_end") == 0 )
1657 {
1658 arg_index++;
1659 add_last_lvl = atoi(argv[arg_index++]);
1660 }
1661 else if ( strcmp(argv[arg_index], "-add_Pmx") == 0 )
1662 {
1663 arg_index++;
1664 add_P_max_elmts = atoi(argv[arg_index++]);
1665 }
1666 else if ( strcmp(argv[arg_index], "-add_tr") == 0 )
1667 {
1668 arg_index++;
1669 add_trunc_factor = atof(argv[arg_index++]);
1670 }
1671 else if ( strcmp(argv[arg_index], "-add_rlx") == 0 )
1672 {
1673 arg_index++;
1674 add_relax_type = atoi(argv[arg_index++]);
1675 }
1676 else if ( strcmp(argv[arg_index], "-add_w") == 0 )
1677 {
1678 arg_index++;
1679 add_relax_wt = atof(argv[arg_index++]);
1680 }
1681 else if ( strcmp(argv[arg_index], "-rap") == 0 )
1682 {
1683 arg_index++;
1684 rap2 = atoi(argv[arg_index++]);
1685 }
1686 else if ( strcmp(argv[arg_index], "-mod_rap2") == 0 )
1687 {
1688 arg_index++;
1689 mod_rap2 = atoi(argv[arg_index++]);
1690 }
1691 else if ( strcmp(argv[arg_index], "-keepT") == 0 )
1692 {
1693 arg_index++;
1694 keepTranspose = atoi(argv[arg_index++]);
1695 }
1696 #ifdef HYPRE_USING_DSUPERLU
1697 else if ( strcmp(argv[arg_index], "-dslu_th") == 0 )
1698 {
1699 arg_index++;
1700 dslu_threshold = atoi(argv[arg_index++]);
1701 }
1702 #endif
1703 else if ( strcmp(argv[arg_index], "-nongalerk_tol") == 0 )
1704 {
1705 arg_index++;
1706 nongalerk_num_tol = atoi(argv[arg_index++]);
1707 nongalerk_tol = hypre_CTAlloc(HYPRE_Real, nongalerk_num_tol, HYPRE_MEMORY_HOST);
1708 for (i = 0; i < nongalerk_num_tol; i++)
1709 {
1710 nongalerk_tol[i] = atof(argv[arg_index++]);
1711 }
1712 }
1713 else if ( strcmp(argv[arg_index], "-print") == 0 )
1714 {
1715 arg_index++;
1716 print_system = 1;
1717 }
1718 /* BM Oct 23, 2006 */
1719 else if ( strcmp(argv[arg_index], "-plot_grids") == 0 )
1720 {
1721 arg_index++;
1722 plot_grids = 1;
1723 }
1724 else if ( strcmp(argv[arg_index], "-plot_file_name") == 0 )
1725 {
1726 arg_index++;
1727 hypre_sprintf (plot_file_name,"%s",argv[arg_index++]);
1728 }
1729 else if ( strcmp(argv[arg_index], "-AIR") == 0 )
1730 {
1731 arg_index++;
1732 air = atoi(argv[arg_index++]);
1733 }
1734 else if ( strcmp(argv[arg_index], "-amgdd_start_level") == 0 )
1735 {
1736 arg_index++;
1737 amgdd_start_level = atoi(argv[arg_index++]);
1738 }
1739 else if ( strcmp(argv[arg_index], "-amgdd_padding") == 0 )
1740 {
1741 arg_index++;
1742 amgdd_padding = atoi(argv[arg_index++]);
1743 }
1744 else if ( strcmp(argv[arg_index], "-amgdd_fac_num_relax") == 0 )
1745 {
1746 arg_index++;
1747 amgdd_fac_num_relax = atoi(argv[arg_index++]);
1748 }
1749 else if ( strcmp(argv[arg_index], "-amgdd_num_comp_cycles") == 0 )
1750 {
1751 arg_index++;
1752 amgdd_num_comp_cycles = atoi(argv[arg_index++]);
1753 }
1754 else if ( strcmp(argv[arg_index], "-amgdd_fac_relax_type") == 0 )
1755 {
1756 arg_index++;
1757 amgdd_fac_relax_type = atoi(argv[arg_index++]);
1758 }
1759 else if ( strcmp(argv[arg_index], "-amgdd_fac_cycle_type") == 0 )
1760 {
1761 arg_index++;
1762 amgdd_fac_cycle_type = atoi(argv[arg_index++]);
1763 }
1764 else if ( strcmp(argv[arg_index], "-amgdd_num_ghost_layers") == 0 )
1765 {
1766 arg_index++;
1767 amgdd_num_ghost_layers = atoi(argv[arg_index++]);
1768 }
1769 else
1770 {
1771 arg_index++;
1772 }
1773 }
1774
1775 /* default settings for AIR alg. */
1776 if (air)
1777 {
1778 restri_type = air; /* Set Restriction to be AIR */
1779 interp_type = 100; /* 1-pt Interp */
1780 #if defined(HYPRE_USING_GPU)
1781 relax_type = 7;
1782 #else
1783 relax_type = 0;
1784 #endif
1785 ns_down = 0;
1786 ns_up = 3;
1787 /* this is a 2-D 4-by-k array using Double pointers */
1788 grid_relax_points = hypre_CTAlloc(HYPRE_Int*, 4, HYPRE_MEMORY_HOST);
1789 grid_relax_points[0] = NULL;
1790 grid_relax_points[1] = hypre_CTAlloc(HYPRE_Int, ns_down, HYPRE_MEMORY_HOST);
1791 grid_relax_points[2] = hypre_CTAlloc(HYPRE_Int, ns_up, HYPRE_MEMORY_HOST);
1792 grid_relax_points[3] = hypre_CTAlloc(HYPRE_Int, ns_coarse, HYPRE_MEMORY_HOST);
1793 /* down cycle: C */
1794 for (i=0; i<ns_down; i++)
1795 {
1796 grid_relax_points[1][i] = 0;//1;
1797 }
1798 /* up cycle: F */
1799 //for (i=0; i<ns_up; i++)
1800 //{
1801 if (ns_up == 3)
1802 {
1803 grid_relax_points[2][0] = -1; // F
1804 grid_relax_points[2][1] = -1; // F
1805 grid_relax_points[2][2] = 1; // C
1806 }
1807 else if (ns_up == 2)
1808 {
1809 grid_relax_points[2][0] = -1; // F
1810 grid_relax_points[2][1] = -1; // F
1811 }
1812 //}
1813 /* coarse: all */
1814 for (i=0; i<ns_coarse; i++)
1815 {
1816 grid_relax_points[3][i] = 0;
1817 }
1818 coarse_threshold = 20;
1819 /* does not support aggressive coarsening */
1820 agg_num_levels = 0;
1821 }
1822 /*-----------------------------------------------------------
1823 * Print usage info
1824 *-----------------------------------------------------------*/
1825
1826 if ( print_usage )
1827 {
1828 if ( myid == 0 )
1829 {
1830 hypre_printf("\n");
1831 hypre_printf("Usage: %s [<options>]\n", argv[0]);
1832 hypre_printf("\n");
1833 hypre_printf(" -fromfile <filename> : ");
1834 hypre_printf("matrix read from multiple files (IJ format)\n");
1835 hypre_printf(" -fromparcsrfile <filename> : ");
1836 hypre_printf("matrix read from multiple files (ParCSR format)\n");
1837 hypre_printf(" -fromonecsrfile <filename> : ");
1838 hypre_printf("matrix read from a single file (CSR format)\n");
1839 hypre_printf("\n");
1840 hypre_printf(" -laplacian [<options>] : build 5pt 2D laplacian problem (default) \n");
1841 hypre_printf(" -sysL <num functions> : build SYSTEMS laplacian 7pt operator\n");
1842 hypre_printf(" -9pt [<opts>] : build 9pt 2D laplacian problem\n");
1843 hypre_printf(" -27pt [<opts>] : build 27pt 3D laplacian problem\n");
1844 hypre_printf(" -difconv [<opts>] : build convection-diffusion problem\n");
1845 hypre_printf(" -n <nx> <ny> <nz> : total problem size \n");
1846 hypre_printf(" -P <Px> <Py> <Pz> : processor topology\n");
1847 hypre_printf(" -c <cx> <cy> <cz> : diffusion coefficients\n");
1848 hypre_printf(" -a <ax> <ay> <az> : convection coefficients\n");
1849 hypre_printf(" -atype <type> : FD scheme for convection \n");
1850 hypre_printf(" 0=Forward (default) 1=Backward\n");
1851 hypre_printf(" 2=Centered 3=Upwind\n");
1852 hypre_printf("\n");
1853 hypre_printf(" -exact_size : inserts immediately into ParCSR structure\n");
1854 hypre_printf(" -storage_low : allocates not enough storage for aux struct\n");
1855 hypre_printf(" -concrete_parcsr : use parcsr matrix type as concrete type\n");
1856 hypre_printf("\n");
1857 hypre_printf(" -rhsfromfile : ");
1858 hypre_printf("rhs read from multiple files (IJ format)\n");
1859 hypre_printf(" -rhsfromonefile : ");
1860 hypre_printf("rhs read from a single file (CSR format)\n");
1861 hypre_printf(" -rhsparcsrfile : ");
1862 hypre_printf("rhs read from multiple files (ParCSR format)\n");
1863 hypre_printf(" -Ffromonefile : ");
1864 hypre_printf("list of F points from a single file\n");
1865 hypre_printf(" -SFfromonefile : ");
1866 hypre_printf("list of isolated F points from a single file\n");
1867 hypre_printf(" -rhsrand : rhs is random vector\n");
1868 hypre_printf(" -rhsisone : rhs is vector with unit components (default)\n");
1869 hypre_printf(" -xisone : solution of all ones\n");
1870 hypre_printf(" -rhszero : rhs is zero vector\n");
1871 hypre_printf("\n");
1872 hypre_printf(" -dt <val> : specify finite backward Euler time step\n");
1873 hypre_printf(" : -rhsfromfile, -rhsfromonefile, -rhsrand,\n");
1874 hypre_printf(" : -rhsrand, or -xisone will be ignored\n");
1875 hypre_printf(" -srcfromfile : ");
1876 hypre_printf("backward Euler source read from multiple files (IJ format)\n");
1877 hypre_printf(" -srcfromonefile : ");
1878 hypre_printf("backward Euler source read from a single file (IJ format)\n");
1879 hypre_printf(" -srcrand : ");
1880 hypre_printf("backward Euler source is random vector with components in range 0 - 1\n");
1881 hypre_printf(" -srcisone : ");
1882 hypre_printf("backward Euler source is vector with unit components (default)\n");
1883 hypre_printf(" -srczero : ");
1884 hypre_printf("backward Euler source is zero-vector\n");
1885 hypre_printf(" -x0fromfile : ");
1886 hypre_printf("initial guess x0 read from multiple files (IJ format)\n");
1887 hypre_printf("\n");
1888 hypre_printf(" -solver <ID> : solver ID\n");
1889 hypre_printf(" 0=AMG 1=AMG-PCG \n");
1890 hypre_printf(" 2=DS-PCG 3=AMG-GMRES \n");
1891 hypre_printf(" 4=DS-GMRES 5=AMG-CGNR \n");
1892 hypre_printf(" 6=DS-CGNR 7=PILUT-GMRES \n");
1893 hypre_printf(" 8=ParaSails-PCG 9=AMG-BiCGSTAB \n");
1894 hypre_printf(" 10=DS-BiCGSTAB 11=PILUT-BiCGSTAB \n");
1895 hypre_printf(" 12=Schwarz-PCG 13=GSMG \n");
1896 hypre_printf(" 14=GSMG-PCG 15=GSMG-GMRES\n");
1897 hypre_printf(" 16=AMG-COGMRES 17=DIAG-COGMRES\n");
1898 hypre_printf(" 18=ParaSails-GMRES\n");
1899 hypre_printf(" 20=Hybrid solver/ DiagScale, AMG \n");
1900 hypre_printf(" 43=Euclid-PCG 44=Euclid-GMRES \n");
1901 hypre_printf(" 45=Euclid-BICGSTAB 46=Euclid-COGMRES\n");
1902 hypre_printf(" 47=Euclid-FlexGMRES\n");
1903 hypre_printf(" 50=DS-LGMRES 51=AMG-LGMRES \n");
1904 hypre_printf(" 60=DS-FlexGMRES 61=AMG-FlexGMRES \n");
1905 hypre_printf(" 70=MGR 71=MGR-PCG \n");
1906 hypre_printf(" 72=MGR-FlexGMRES 73=MGR-BICGSTAB \n");
1907 hypre_printf(" 74=MGR-COGMRES \n");
1908 hypre_printf(" 80=ILU 81=ILU-GMRES \n");
1909 hypre_printf(" 82=ILU-FlexGMRES \n");
1910 hypre_printf(" 90=AMG-DD 91=AMG-DD-GMRES \n");
1911 hypre_printf("\n");
1912 hypre_printf(" -cljp : CLJP coarsening \n");
1913 hypre_printf(" -cljp1 : CLJP coarsening, fixed random \n");
1914 hypre_printf(" -cgc : CGC coarsening \n");
1915 hypre_printf(" -cgce : CGC-E coarsening \n");
1916 hypre_printf(" -pmis : PMIS coarsening \n");
1917 hypre_printf(" -pmis1 : PMIS coarsening, fixed random \n");
1918 hypre_printf(" -hmis : HMIS coarsening (default)\n");
1919 hypre_printf(" -ruge : Ruge-Stueben coarsening (local)\n");
1920 hypre_printf(" -ruge1p : Ruge-Stueben coarsening 1st pass only(local)\n");
1921 hypre_printf(" -ruge3 : third pass on boundary\n");
1922 hypre_printf(" -ruge3c : third pass on boundary, keep c-points\n");
1923 hypre_printf(" -falgout : local Ruge_Stueben followed by CLJP\n");
1924 hypre_printf(" -gm : use global measures\n");
1925 hypre_printf("\n");
1926 hypre_printf(" -interptype <val> : set interpolation type\n");
1927 hypre_printf(" 0=Classical modified interpolation \n");
1928 hypre_printf(" 1=least squares interpolation (for GSMG only) \n");
1929 hypre_printf(" 0=Classical modified interpolation for hyperbolic PDEs \n");
1930 hypre_printf(" 3=direct interpolation with separation of weights \n");
1931 hypre_printf(" 15=direct interpolation\n");
1932 hypre_printf(" 4=multipass interpolation \n");
1933 hypre_printf(" 5=multipass interpolation with separation of weights \n");
1934 hypre_printf(" 6=extended classical modified interpolation (default) \n");
1935 hypre_printf(" 7=extended (only if no common C neighbor) interpolation \n");
1936 hypre_printf(" 8=standard interpolation \n");
1937 hypre_printf(" 9=standard interpolation with separation of weights \n");
1938 hypre_printf(" 12=FF interpolation \n");
1939 hypre_printf(" 13=FF1 interpolation \n");
1940
1941 hypre_printf(" 16=use modified unknown interpolation for a system (w/unknown or hybrid approach) \n");
1942 hypre_printf(" 17=use non-systems interp = 6 for a system (w/unknown or hybrid approach) \n");
1943 hypre_printf(" 18=use non-systems interp = 8 for a system (w/unknown or hybrid approach) \n");
1944 hypre_printf(" 19=use non-systems interp = 0 for a system (w/unknown or hybrid approach) \n");
1945
1946 hypre_printf(" 10=classical block interpolation for nodal systems AMG\n");
1947 hypre_printf(" 11=classical block interpolation with diagonal blocks for nodal systems AMG\n");
1948 hypre_printf(" 20=same as 10, but don't add weak connect. to diag \n");
1949 hypre_printf(" 21=same as 11, but don't add weak connect. to diag \n");
1950 hypre_printf(" 22=classical block interpolation w/Ruge's variant for nodal systems AMG \n");
1951 hypre_printf(" 23=same as 22, but use row sums for diag scaling matrices,for nodal systems AMG \n");
1952 hypre_printf(" 24=direct block interpolation for nodal systems AMG\n");
1953 hypre_printf(" 100=One point interpolation [a Boolean matrix]\n");
1954 hypre_printf("\n");
1955
1956 /* RL */
1957 hypre_printf(" -restritype <val> : set restriction type\n");
1958 hypre_printf(" 0=transpose of the interpolation \n");
1959 hypre_printf(" k=local approximate ideal restriction (AIR-k) \n");
1960 hypre_printf("\n");
1961
1962 hypre_printf(" -rlx <val> : relaxation type\n");
1963 hypre_printf(" 0=Weighted Jacobi \n");
1964 hypre_printf(" 1=Gauss-Seidel (very slow!) \n");
1965 hypre_printf(" 3=Hybrid Gauss-Seidel \n");
1966 hypre_printf(" 4=Hybrid backward Gauss-Seidel \n");
1967 hypre_printf(" 6=Hybrid symmetric Gauss-Seidel \n");
1968 hypre_printf(" 8= symmetric L1-Gauss-Seidel \n");
1969 hypre_printf(" 13= forward L1-Gauss-Seidel \n");
1970 hypre_printf(" 14= backward L1-Gauss-Seidel \n");
1971 hypre_printf(" 15=CG \n");
1972 hypre_printf(" 16=Chebyshev \n");
1973 hypre_printf(" 17=FCF-Jacobi \n");
1974 hypre_printf(" 18=L1-Jacobi (may be used with -CF) \n");
1975 hypre_printf(" 9=Gauss elimination (use for coarsest grid only) \n");
1976 hypre_printf(" 99=Gauss elimination with pivoting (use for coarsest grid only) \n");
1977 hypre_printf(" 20= Nodal Weighted Jacobi (for systems only) \n");
1978 hypre_printf(" 23= Nodal Hybrid Jacobi/Gauss-Seidel (for systems only) \n");
1979 hypre_printf(" 26= Nodal Hybrid Symmetric Gauss-Seidel (for systems only)\n");
1980 hypre_printf(" 29= Nodal Gauss elimination (use for coarsest grid only) \n");
1981 hypre_printf(" -rlx_coarse <val> : set relaxation type for coarsest grid\n");
1982 hypre_printf(" -rlx_down <val> : set relaxation type for down cycle\n");
1983 hypre_printf(" -rlx_up <val> : set relaxation type for up cycle\n");
1984 hypre_printf(" -cheby_order <val> : set order (1-4) for Chebyshev poly. smoother (default is 2)\n");
1985 hypre_printf(" -cheby_fraction <val> : fraction of the spectrum for Chebyshev poly. smoother (default is .3)\n");
1986 hypre_printf(" -nodal <val> : nodal system type\n");
1987 hypre_printf(" 0 = Unknown approach \n");
1988 hypre_printf(" 1 = Frobenius norm \n");
1989 hypre_printf(" 2 = Sum of Abs.value of elements \n");
1990 hypre_printf(" 3 = Largest magnitude element (includes its sign) \n");
1991 hypre_printf(" 4 = Inf. norm \n");
1992 hypre_printf(" 5 = One norm (note: use with block version only) \n");
1993 hypre_printf(" 6 = Sum of all elements in block \n");
1994 hypre_printf(" -nodal_diag <val> :how to treat diag elements\n");
1995 hypre_printf(" 0 = no special treatment \n");
1996 hypre_printf(" 1 = make diag = neg.sum of the off_diag \n");
1997 hypre_printf(" 2 = make diag = neg. of diag \n");
1998 hypre_printf(" -ns <val> : Use <val> sweeps on each level\n");
1999 hypre_printf(" (default C/F down, F/C up, F/C fine\n");
2000 hypre_printf(" -ns_coarse <val> : set no. of sweeps for coarsest grid\n");
2001 /* RL restore these */
2002 hypre_printf(" -ns_down <val> : set no. of sweeps for down cycle\n");
2003 hypre_printf(" -ns_up <val> : set no. of sweeps for up cycle\n");
2004 hypre_printf("\n");
2005 hypre_printf(" -mu <val> : set AMG cycles (1=V, 2=W, etc.)\n");
2006 hypre_printf(" -cutf <val> : set coarsening cut factor for dense rows\n");
2007 hypre_printf(" -th <val> : set AMG threshold Theta = val \n");
2008 hypre_printf(" -tr <val> : set AMG interpolation truncation factor = val \n");
2009 hypre_printf(" -Pmx <val> : set maximal no. of elmts per row for AMG interpolation (default: 4)\n");
2010 hypre_printf(" -jtr <val> : set truncation threshold for Jacobi interpolation = val \n");
2011 hypre_printf(" -Ssw <val> : set S-commpkg-switch = val \n");
2012 hypre_printf(" -mxrs <val> : set AMG maximum row sum threshold for dependency weakening \n");
2013 hypre_printf(" -nf <val> : set number of functions for systems AMG\n");
2014 hypre_printf(" -numsamp <val> : set number of sample vectors for GSMG\n");
2015
2016 hypre_printf(" -postinterptype <val> : invokes <val> no. of Jacobi interpolation steps after main interpolation\n");
2017 hypre_printf("\n");
2018 hypre_printf(" -cgcitr <val> : set maximal number of coarsening iterations for CGC\n");
2019 hypre_printf(" -solver_type <val> : sets solver within Hybrid solver\n");
2020 hypre_printf(" : 1 PCG (default)\n");
2021 hypre_printf(" : 2 GMRES\n");
2022 hypre_printf(" : 3 BiCGSTAB\n");
2023
2024 hypre_printf(" -w <val> : set Jacobi relax weight = val\n");
2025 hypre_printf(" -k <val> : dimension Krylov space for GMRES\n");
2026 hypre_printf(" -aug <val> : number of augmentation vectors for LGMRES (-k indicates total approx space size)\n");
2027
2028 hypre_printf(" -mxl <val> : maximum number of levels (AMG, ParaSAILS)\n");
2029 hypre_printf(" -tol <val> : set solver convergence tolerance = val\n");
2030 hypre_printf(" -atol <val> : set solver absolute convergence tolerance = val\n");
2031 hypre_printf(" -max_iter <val> : set max iterations\n");
2032 hypre_printf(" -mg_max_iter <val> : set max iterations for mg solvers\n");
2033 hypre_printf(" -agg_nl <val> : set number of aggressive coarsening levels (default:0)\n");
2034 hypre_printf(" -np <val> : set number of paths of length 2 for aggr. coarsening\n");
2035 hypre_printf("\n");
2036 hypre_printf(" -sai_th <val> : set ParaSAILS threshold = val \n");
2037 hypre_printf(" -sai_filt <val> : set ParaSAILS filter = val \n");
2038 hypre_printf("\n");
2039 hypre_printf(" -level <val> : set k in ILU(k) for Euclid \n");
2040 hypre_printf(" -bj <val> : enable block Jacobi ILU for Euclid \n");
2041 hypre_printf(" -ilut <val> : set drop tolerance for ILUT in Euclid\n");
2042 hypre_printf(" Note ILUT is sequential only!\n");
2043 hypre_printf(" -sparseA <val> : set drop tolerance in ILU(k) for Euclid \n");
2044 hypre_printf(" -rowScale <val> : enable row scaling in Euclid \n");
2045 hypre_printf("\n");
2046 hypre_printf(" -drop_tol <val> : set threshold for dropping in PILUT\n");
2047 hypre_printf(" -nonzeros_to_keep <val>: number of nonzeros in each row to keep\n");
2048 hypre_printf("\n");
2049 hypre_printf(" -iout <val> : set output flag\n");
2050 hypre_printf(" 0=no output 1=matrix stats\n");
2051 hypre_printf(" 2=cycle stats 3=matrix & cycle stats\n");
2052 hypre_printf("\n");
2053 hypre_printf(" -dbg <val> : set debug flag\n");
2054 hypre_printf(" 0=no debugging\n 1=internal timing\n 2=interpolation truncation\n 3=more detailed timing in coarsening routine\n");
2055 hypre_printf("\n");
2056 hypre_printf(" -print : print out the system\n");
2057 hypre_printf("\n");
2058 /* begin lobpcg */
2059
2060 hypre_printf("LOBPCG options:\n");
2061 hypre_printf("\n");
2062 hypre_printf(" -lobpcg : run LOBPCG instead of PCG\n");
2063 hypre_printf("\n");
2064 hypre_printf(" -gen : solve generalized EVP with B = Laplacian\n");
2065 hypre_printf("\n");
2066 hypre_printf(" -con : solve constrained EVP using 'vectors.*.*'\n");
2067 hypre_printf(" as constraints (see -vout 1 below)\n");
2068 hypre_printf("\n");
2069 hypre_printf(" -solver none : no HYPRE preconditioner is used\n");
2070 hypre_printf("\n");
2071 hypre_printf(" -itr <val> : maximal number of LOBPCG iterations\n");
2072 hypre_printf(" (default 100);\n");
2073 hypre_printf("\n");
2074 hypre_printf(" -vrand <val> : compute <val> eigenpairs using random\n");
2075 hypre_printf(" initial vectors (default 1)\n");
2076 hypre_printf("\n");
2077 hypre_printf(" -seed <val> : use <val> as the seed for the random\n");
2078 hypre_printf(" number generator(default seed is based\n");
2079 hypre_printf(" on the time of the run)\n");
2080 hypre_printf("\n");
2081 hypre_printf(" -vfromfile : read initial vectors from files\n");
2082 hypre_printf(" vectors.i.j where i is vector number\n");
2083 hypre_printf(" and j is processor number\n");
2084 hypre_printf("\n");
2085 hypre_printf(" -orthchk : check eigenvectors for orthonormality\n");
2086 hypre_printf("\n");
2087 hypre_printf(" -verb <val> : verbosity level\n");
2088 hypre_printf(" -verb 0 : no print\n");
2089 hypre_printf(" -verb 1 : print initial eigenvalues and residuals,\n");
2090 hypre_printf(" the iteration number, the number of\n");
2091 hypre_printf(" non-convergent eigenpairs and final\n");
2092 hypre_printf(" eigenvalues and residuals (default)\n");
2093 hypre_printf(" -verb 2 : print eigenvalues and residuals on each\n");
2094 hypre_printf(" iteration\n");
2095 hypre_printf("\n");
2096 hypre_printf(" -pcgitr <val> : maximal number of inner PCG iterations\n");
2097 hypre_printf(" for preconditioning (default 1);\n");
2098 hypre_printf(" if <val> = 0 then the preconditioner\n");
2099 hypre_printf(" is applied directly\n");
2100 hypre_printf("\n");
2101 hypre_printf(" -pcgtol <val> : residual tolerance for inner iterations\n");
2102 hypre_printf(" (default 0.01)\n");
2103 hypre_printf("\n");
2104 hypre_printf(" -vout <val> : file output level\n");
2105 hypre_printf(" -vout 0 : no files created (default)\n");
2106 hypre_printf(" -vout 1 : write eigenvalues to values.txt, residuals\n");
2107 hypre_printf(" to residuals.txt and eigenvectors to \n");
2108 hypre_printf(" vectors.i.j where i is vector number\n");
2109 hypre_printf(" and j is processor number\n");
2110 hypre_printf(" -vout 2 : in addition to the above, write the\n");
2111 hypre_printf(" eigenvalues history (the matrix whose\n");
2112 hypre_printf(" i-th column contains eigenvalues at\n");
2113 hypre_printf(" (i+1)-th iteration) to val_hist.txt and\n");
2114 hypre_printf(" residuals history to res_hist.txt\n");
2115 hypre_printf("\nNOTE: in this test driver LOBPCG only works with solvers 1, 2, 8, 12, 14 and 43\n");
2116 hypre_printf("\ndefault solver is 1\n");
2117 hypre_printf("\n");
2118
2119 /* end lobpcg */
2120
2121 hypre_printf(" -plot_grids : print out information for plotting the grids\n");
2122 hypre_printf(" -plot_file_name <val> : file name for plotting output\n");
2123 hypre_printf("\n");
2124 hypre_printf(" -smtype <val> :smooth type\n");
2125 hypre_printf(" -smlv <val> :smooth num levels\n");
2126 hypre_printf(" -ov <val> :over lap:\n");
2127 hypre_printf(" -dom <val> :domain type\n");
2128 hypre_printf(" -use_ns : use non-symm schwarz smoother\n");
2129 hypre_printf(" -var <val> : schwarz smoother variant (0-3) \n");
2130 hypre_printf(" -blk_sm <val> : same as '-smtype 6 -ov 0 -dom 1 -smlv <val>'\n");
2131 hypre_printf(" -nongalerk_tol <val> <list> : specify the NonGalerkin drop tolerance\n");
2132 hypre_printf(" and list contains the values, where last value\n");
2133 hypre_printf(" in list is repeated if val < num_levels in AMG\n");
2134
2135 /* MGR options */
2136 hypre_printf(" -mgr_bsize <val> : set block size = val\n");
2137 hypre_printf(" -mgr_nlevels <val> : set number of coarsening levels = val\n");
2138 hypre_printf(" -mgr_num_reserved_nodes <val> : set number of reserved nodes \n");
2139 hypre_printf(" to be kept till the coarsest grid = val\n");
2140 hypre_printf(" -mgr_non_c_to_f <val> : set strategy for intermediate coarse grid \n");
2141 hypre_printf(" -mgr_non_c_to_f 0 : Allow some non Cpoints to be labeled \n");
2142 hypre_printf(" Cpoints on intermediate grid \n");
2143 hypre_printf(" -mgr_non_c_to_f 1 : set non Cpoints strictly to Fpoints \n");
2144 hypre_printf(" -mgr_frelax_method <val> : set F-relaxation strategy \n");
2145 hypre_printf(" -mgr_frelax_method 0 : Use 'single-level smoother' strategy \n");
2146 hypre_printf(" for F-relaxation \n");
2147 hypre_printf(" -mgr_frelax_method 1 : Use a 'multi-level smoother' strategy \n");
2148 hypre_printf(" for F-relaxation \n");
2149 /* end MGR options */
2150 /* hypre ILU options */
2151 hypre_printf(" -ilu_type <val> : set ILU factorization type = val\n");
2152 hypre_printf(" -ilu_type 0 : Block Jacobi with ILU(k) variants \n");
2153 hypre_printf(" -ilu_type 1 : Block Jacobi with ILUT \n");
2154 hypre_printf(" -ilu_type 10 : GMRES with ILU(k) variants \n");
2155 hypre_printf(" -ilu_type 11 : GMRES with ILUT \n");
2156 hypre_printf(" -ilu_type 20 : NSH with ILU(k) variants \n");
2157 hypre_printf(" -ilu_type 21 : NSH with ILUT \n");
2158 hypre_printf(" -ilu_type 30 : RAS with ILU(k) variants \n");
2159 hypre_printf(" -ilu_type 31 : RAS with ILUT \n");
2160 hypre_printf(" -ilu_type 40 : ddPQ + GMRES with ILU(k) variants \n");
2161 hypre_printf(" -ilu_type 41 : ddPQ + GMRES with ILUT \n");
2162 hypre_printf(" -ilu_type 50 : GMRES with ILU(0): RAP variant with MILU(0) \n");
2163 hypre_printf(" -ilu_lfil <val> : set level of fill (k) for ILU(k) = val\n");
2164 hypre_printf(" -ilu_droptol <val> : set drop tolerance threshold for ILUT = val \n");
2165 hypre_printf(" -ilu_max_row_nnz <val> : set max. num of nonzeros to keep per row = val \n");
2166 hypre_printf(" -ilu_schur_max_iter <val> : set max. num of iteration for GMRES/NSH Schur = val \n");
2167 hypre_printf(" -ilu_nsh_droptol <val> : set drop tolerance threshold for NSH = val \n");
2168 hypre_printf(" -ilu_sm_max_iter <val> : set number of iterations when applied as a smmother in AMG = val \n");
2169 /* end ILU options */
2170 /* hypre AMG-DD options */
2171 hypre_printf(" -amgdd_start_level <val> : set AMG-DD start level = val\n");
2172 hypre_printf(" -amgdd_padding <val> : set AMG-DD padding = val\n");
2173 hypre_printf(" -amgdd_num_ghost_layers <val> : set AMG-DD number of ghost layers = val\n");
2174 hypre_printf(" -amgdd_fac_num_relax <val> : set AMG-DD FAC cycle number of pre/post-relaxations = val\n");
2175 hypre_printf(" -amgdd_num_comp_cycles <val> : set AMG-DD number of inner FAC cycles = val\n");
2176 hypre_printf(" -amgdd_fac_relax_type <val> : set AMG-DD FAC relaxation type = val\n");
2177 hypre_printf(" 0=Weighted Jacobi \n");
2178 hypre_printf(" 1=Gauss-Seidel \n");
2179 hypre_printf(" 2=Ordered Gauss-Seidel \n");
2180 hypre_printf(" 3=CFL1 Jacobi \n");
2181 hypre_printf(" -amgdd_fac_cycle_type <val> : set AMG-DD FAC cycle type = val\n");
2182 hypre_printf(" 1=V-cycle \n");
2183 hypre_printf(" 2=W-cycle \n");
2184 hypre_printf(" 3=F-cycle \n");
2185 /* end AMG-DD options */
2186 }
2187
2188 goto final;
2189 }
2190
2191 /*-----------------------------------------------------------
2192 * Print driver parameters
2193 *-----------------------------------------------------------*/
2194
2195 if (myid == 0)
2196 {
2197 #ifdef HYPRE_DEVELOP_STRING
2198 #ifdef HYPRE_DEVELOP_BRANCH
2199 hypre_printf("\nUsing HYPRE_DEVELOP_STRING: %s (main development branch %s)\n\n",
2200 HYPRE_DEVELOP_STRING, HYPRE_DEVELOP_BRANCH);
2201 #else
2202 hypre_printf("\nUsing HYPRE_DEVELOP_STRING: %s (not main development branch)\n\n",
2203 HYPRE_DEVELOP_STRING);
2204 #endif
2205 #endif
2206 hypre_printf("Running with these driver parameters:\n");
2207 hypre_printf(" solver ID = %d\n\n", solver_id);
2208 }
2209
2210 /*-----------------------------------------------------------------
2211 * GPU Device binding
2212 * Must be done before HYPRE_Init() and should not be changed after
2213 *-----------------------------------------------------------------*/
2214 hypre_bind_device(myid, num_procs, hypre_MPI_COMM_WORLD);
2215
2216 time_index = hypre_InitializeTiming("Hypre init");
2217 hypre_BeginTiming(time_index);
2218
2219 /*-----------------------------------------------------------
2220 * Initialize : must be the first HYPRE function to call
2221 *-----------------------------------------------------------*/
2222 HYPRE_Init();
2223
2224 hypre_EndTiming(time_index);
2225 hypre_PrintTiming("Hypre init times", hypre_MPI_COMM_WORLD);
2226 hypre_FinalizeTiming(time_index);
2227 hypre_ClearTiming();
2228
2229 #ifdef HYPRE_USING_DEVICE_POOL
2230 /* To be effective, hypre_SetCubMemPoolSize must immediately follow HYPRE_Init */
2231 HYPRE_SetGPUMemoryPoolSize( mempool_bin_growth, mempool_min_bin,
2232 mempool_max_bin, mempool_max_cached_bytes );
2233 #endif
2234
2235 #if defined(HYPRE_USING_UMPIRE)
2236 /* Setup Umpire pools */
2237 HYPRE_SetUmpireDevicePoolName("HYPRE_DEVICE_POOL_TEST");
2238 HYPRE_SetUmpireUMPoolName("HYPRE_UM_POOL_TEST");
2239 HYPRE_SetUmpireHostPoolName("HYPRE_HOST_POOL_TEST");
2240 HYPRE_SetUmpirePinnedPoolName("HYPRE_PINNED_POOL_TEST");
2241 HYPRE_SetUmpireDevicePoolSize(4LL * 1024 * 1024 * 1024);
2242 HYPRE_SetUmpireUMPoolSize(4LL * 1024 * 1024 * 1024);
2243 HYPRE_SetUmpireHostPoolSize(4LL * 1024 * 1024 * 1024);
2244 HYPRE_SetUmpirePinnedPoolSize(4LL * 1024 * 1024 * 1024);
2245 #endif
2246
2247 /* default memory location */
2248 HYPRE_SetMemoryLocation(memory_location);
2249
2250 /* default execution policy */
2251 HYPRE_SetExecutionPolicy(default_exec_policy);
2252
2253 #if defined(HYPRE_USING_GPU)
2254 /* use cuSPARSE for SpGEMM */
2255 ierr = HYPRE_SetSpGemmUseCusparse(spgemm_use_cusparse); hypre_assert(ierr == 0);
2256 ierr = hypre_SetSpGemmAlgorithm(spgemm_alg); hypre_assert(ierr == 0);
2257 ierr = hypre_SetSpGemmRownnzEstimateMethod(spgemm_rowest_mtd); hypre_assert(ierr == 0);
2258 ierr = hypre_SetSpGemmRownnzEstimateNSamples(spgemm_rowest_nsamples); hypre_assert(ierr == 0);
2259 ierr = hypre_SetSpGemmRownnzEstimateMultFactor(spgemm_rowest_mult); hypre_assert(ierr == 0);
2260 ierr = hypre_SetSpGemmHashType(spgemm_hash_type); hypre_assert(ierr == 0);
2261 /* use cuRand for PMIS */
2262 HYPRE_SetUseGpuRand(use_curand);
2263 #endif
2264
2265 /*-----------------------------------------------------------
2266 * Set up matrix
2267 *-----------------------------------------------------------*/
2268
2269 if ( myid == 0 && dt != dt_inf)
2270 {
2271 hypre_printf(" Backward Euler time step with dt = %e\n", dt);
2272 hypre_printf(" Dirichlet 0 BCs are implicit in the spatial operator\n");
2273 }
2274
2275 time_index = hypre_InitializeTiming("Spatial Operator");
2276 hypre_BeginTiming(time_index);
2277 if ( build_matrix_type == -1 )
2278 {
2279 ierr = HYPRE_IJMatrixRead( argv[build_matrix_arg_index], comm,
2280 HYPRE_PARCSR, &ij_A );
2281 if (ierr)
2282 {
2283 hypre_printf("ERROR: Problem reading in the system matrix!\n");
2284 exit(1);
2285 }
2286 }
2287 else if ( build_matrix_type == 0 )
2288 {
2289 BuildParFromFile(argc, argv, build_matrix_arg_index, &parcsr_A);
2290 }
2291 else if ( build_matrix_type == 1 )
2292 {
2293 BuildParFromOneFile(argc, argv, build_matrix_arg_index, num_functions,
2294 &parcsr_A);
2295 }
2296 else if ( build_matrix_type == 2 )
2297 {
2298 BuildParLaplacian(argc, argv, build_matrix_arg_index, &parcsr_A);
2299 }
2300 else if ( build_matrix_type == 3 )
2301 {
2302 BuildParLaplacian9pt(argc, argv, build_matrix_arg_index, &parcsr_A);
2303 }
2304 else if ( build_matrix_type == 4 )
2305 {
2306 BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A);
2307
2308 hypre_CSRMatrixGpuSpMVAnalysis(hypre_ParCSRMatrixDiag(parcsr_A));
2309 }
2310 else if ( build_matrix_type == 5 )
2311 {
2312 BuildParDifConv(argc, argv, build_matrix_arg_index, &parcsr_A);
2313 }
2314 else if ( build_matrix_type == 6 )
2315 {
2316 BuildParVarDifConv(argc, argv, build_matrix_arg_index, &parcsr_A, &b);
2317 build_rhs_type = 6;
2318 build_src_type = 5;
2319 }
2320 else if ( build_matrix_type == 7 )
2321 {
2322 BuildParRotate7pt(argc, argv, build_matrix_arg_index, &parcsr_A);
2323 }
2324
2325 else
2326 {
2327 hypre_printf("You have asked for an unsupported problem with\n");
2328 hypre_printf("build_matrix_type = %d.\n", build_matrix_type);
2329 return(-1);
2330 }
2331 /* BM Oct 23, 2006 */
2332 if (plot_grids)
2333 {
2334 if (build_matrix_type > 1 && build_matrix_type < 8)
2335 BuildParCoordinates (argc, argv, build_matrix_arg_index,
2336 &coord_dim, &coordinates);
2337 else
2338 {
2339 hypre_printf("Warning: coordinates are not yet printed for build_matrix_type = %d.\n", build_matrix_type);
2340 }
2341 }
2342
2343 if (build_matrix_type < 0)
2344 {
2345 ierr = HYPRE_IJMatrixGetLocalRange( ij_A,
2346 &first_local_row, &last_local_row ,
2347 &first_local_col, &last_local_col );
2348
2349 local_num_rows = (HYPRE_Int)(last_local_row - first_local_row + 1);
2350 local_num_cols = (HYPRE_Int)(last_local_col - first_local_col + 1);
2351 ierr += HYPRE_IJMatrixGetObject( ij_A, &object);
2352 parcsr_A = (HYPRE_ParCSRMatrix) object;
2353 }
2354 else
2355 {
2356 /*-----------------------------------------------------------
2357 * Copy the parcsr matrix into the IJMatrix through interface calls
2358 *-----------------------------------------------------------*/
2359 ierr = HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2360 &first_local_row, &last_local_row ,
2361 &first_local_col, &last_local_col );
2362
2363 local_num_rows = (HYPRE_Int)(last_local_row - first_local_row + 1);
2364 local_num_cols = (HYPRE_Int)(last_local_col - first_local_col + 1);
2365 }
2366 hypre_EndTiming(time_index);
2367 hypre_PrintTiming("Generate Matrix", hypre_MPI_COMM_WORLD);
2368 hypre_FinalizeTiming(time_index);
2369 hypre_ClearTiming();
2370
2371 /* Check the ij interface - not necessary if one just wants to test solvers */
2372 if (test_ij && build_matrix_type > -1)
2373 {
2374 hypre_ParCSRMatrixMigrate(parcsr_A, HYPRE_MEMORY_HOST);
2375
2376 HYPRE_Int mx_size = 5;
2377 time_index = hypre_InitializeTiming("Generate IJ matrix");
2378 hypre_BeginTiming(time_index);
2379
2380 ierr += HYPRE_ParCSRMatrixGetDims( parcsr_A, &M, &N );
2381
2382 ierr += HYPRE_IJMatrixCreate( comm, first_local_row, last_local_row,
2383 first_local_col, last_local_col, &ij_A );
2384
2385 ierr += HYPRE_IJMatrixSetObjectType( ij_A, HYPRE_PARCSR );
2386 num_rows = local_num_rows;
2387 if (off_proc)
2388 {
2389 if (myid != num_procs-1)
2390 {
2391 num_rows++;
2392 }
2393 if (myid)
2394 {
2395 num_rows++;
2396 }
2397 }
2398 /* The following shows how to build an IJMatrix if one has only an
2399 estimate for the row sizes */
2400 row_nums = hypre_CTAlloc(HYPRE_BigInt, num_rows, HYPRE_MEMORY_HOST);
2401 num_cols = hypre_CTAlloc(HYPRE_Int, num_rows, HYPRE_MEMORY_HOST);
2402 if (sparsity_known == 1)
2403 {
2404 diag_sizes = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
2405 offdiag_sizes = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
2406 }
2407 else
2408 {
2409 size = 5;
2410 if (sparsity_known == 0)
2411 {
2412 if (build_matrix_type == 2)
2413 {
2414 size = 7;
2415 }
2416 if (build_matrix_type == 3)
2417 {
2418 size = 9;
2419 }
2420 if (build_matrix_type == 4)
2421 {
2422 size = 27;
2423 }
2424 }
2425 row_sizes = hypre_CTAlloc(HYPRE_Int, num_rows, HYPRE_MEMORY_HOST);
2426 for (i = 0; i < num_rows; i++)
2427 {
2428 row_sizes[i] = size;
2429 }
2430 }
2431 local_row = 0;
2432 if (build_matrix_type == 2)
2433 {
2434 mx_size = 7;
2435 }
2436 if (build_matrix_type == 3)
2437 {
2438 mx_size = 9;
2439 }
2440 if (build_matrix_type == 4)
2441 {
2442 mx_size = 27;
2443 }
2444 col_nums = hypre_CTAlloc(HYPRE_BigInt, mx_size*num_rows, HYPRE_MEMORY_HOST);
2445 data = hypre_CTAlloc(HYPRE_Real, mx_size*num_rows, HYPRE_MEMORY_HOST);
2446 i_indx = 0;
2447 j_indx = 0;
2448
2449 if (off_proc && myid)
2450 {
2451 num_cols[i_indx] = 2;
2452 row_nums[i_indx++] = first_local_row - 1;
2453 col_nums[j_indx] = first_local_row - 1;
2454 data[j_indx++] = 6.0;
2455 col_nums[j_indx] = first_local_row - 2;
2456 data[j_indx++] = -1.0;
2457 }
2458 for (i = 0; i < local_num_rows; i++)
2459 {
2460 row_nums[i_indx] = first_local_row + i;
2461 ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A, first_local_row+i, &size, &col_inds, &values);
2462 num_cols[i_indx++] = size;
2463 hypre_TMemcpy(&col_nums[j_indx], &col_inds[0], HYPRE_BigInt, size, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2464 hypre_TMemcpy(&data[j_indx], &values[0], HYPRE_Real, size, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST);
2465 if (sparsity_known == 1)
2466 {
2467 for (j = 0; j < size; j++)
2468 {
2469 if (col_nums[j_indx+j] < first_local_row || col_nums[j_indx+j] > last_local_row)
2470 {
2471 offdiag_sizes[local_row]++;
2472 }
2473 else
2474 {
2475 diag_sizes[local_row]++;
2476 }
2477 }
2478 }
2479 j_indx += size;
2480 local_row++;
2481 ierr += HYPRE_ParCSRMatrixRestoreRow(parcsr_A, first_local_row+i, &size, &col_inds, &values);
2482 }
2483
2484 if (off_proc && myid != num_procs-1)
2485 {
2486 num_cols[i_indx] = 2;
2487 row_nums[i_indx++] = last_local_row + 1;
2488 col_nums[j_indx] = last_local_row + 2;
2489 data[j_indx++] = -1.0;
2490 col_nums[j_indx] = last_local_row + 1;
2491 data[j_indx++] = 6.0;
2492 }
2493
2494 if (sparsity_known == 1)
2495 {
2496 ierr += HYPRE_IJMatrixSetDiagOffdSizes( ij_A, (const HYPRE_Int *) diag_sizes,
2497 (const HYPRE_Int *) offdiag_sizes );
2498 }
2499 else
2500 {
2501 ierr = HYPRE_IJMatrixSetRowSizes ( ij_A, (const HYPRE_Int *) row_sizes );
2502 }
2503
2504 ierr += HYPRE_IJMatrixInitialize_v2( ij_A, memory_location );
2505
2506 if (omp_flag)
2507 {
2508 HYPRE_IJMatrixSetOMPFlag(ij_A, 1);
2509 }
2510
2511 /* move arrays to `memory_location' */
2512 HYPRE_Int *num_cols_h = num_cols;
2513 HYPRE_BigInt *row_nums_h = row_nums;
2514 HYPRE_BigInt *col_nums_h = col_nums;
2515 HYPRE_Real *data_h = data;
2516 if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
2517 {
2518 num_cols = hypre_TAlloc(HYPRE_Int, num_rows, memory_location);
2519 row_nums = hypre_TAlloc(HYPRE_BigInt, num_rows, memory_location);
2520 col_nums = hypre_TAlloc(HYPRE_BigInt, mx_size*num_rows, memory_location);
2521 data = hypre_TAlloc(HYPRE_Real, mx_size*num_rows, memory_location);
2522
2523 hypre_TMemcpy(num_cols, num_cols_h, HYPRE_Int, num_rows, memory_location, HYPRE_MEMORY_HOST);
2524 hypre_TMemcpy(row_nums, row_nums_h, HYPRE_BigInt, num_rows, memory_location, HYPRE_MEMORY_HOST);
2525 hypre_TMemcpy(col_nums, col_nums_h, HYPRE_BigInt, mx_size*num_rows, memory_location, HYPRE_MEMORY_HOST);
2526 hypre_TMemcpy(data, data_h, HYPRE_Real, mx_size*num_rows, memory_location, HYPRE_MEMORY_HOST);
2527 }
2528
2529 if (chunk)
2530 {
2531 if (add)
2532 {
2533 ierr += HYPRE_IJMatrixAddToValues(ij_A, num_rows, num_cols, row_nums,
2534 (const HYPRE_BigInt *) col_nums,
2535 (const HYPRE_Real *) data);
2536 }
2537 else
2538 {
2539 ierr += HYPRE_IJMatrixSetValues(ij_A, num_rows, num_cols, row_nums,
2540 (const HYPRE_BigInt *) col_nums,
2541 (const HYPRE_Real *) data);
2542 }
2543 }
2544 else
2545 {
2546 j_indx = 0;
2547 for (i=0; i < num_rows; i++)
2548 {
2549 if (add)
2550 {
2551 ierr += HYPRE_IJMatrixAddToValues( ij_A, 1, &num_cols[i], &row_nums[i],
2552 (const HYPRE_BigInt *) &col_nums[j_indx],
2553 (const HYPRE_Real *) &data[j_indx] );
2554 }
2555 else
2556 {
2557 ierr += HYPRE_IJMatrixSetValues( ij_A, 1, &num_cols[i], &row_nums[i],
2558 (const HYPRE_BigInt *) &col_nums[j_indx],
2559 (const HYPRE_Real *) &data[j_indx] );
2560 }
2561 j_indx += num_cols_h[i];
2562 }
2563 }
2564 hypre_TFree(num_cols_h, HYPRE_MEMORY_HOST);
2565 hypre_TFree(row_nums_h, HYPRE_MEMORY_HOST);
2566 hypre_TFree(col_nums_h, HYPRE_MEMORY_HOST);
2567 hypre_TFree(data_h, HYPRE_MEMORY_HOST);
2568 if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
2569 {
2570 hypre_TFree(col_nums, memory_location);
2571 hypre_TFree(data, memory_location);
2572 hypre_TFree(row_nums, memory_location);
2573 hypre_TFree(num_cols, memory_location);
2574 }
2575
2576 if (sparsity_known == 1)
2577 {
2578 hypre_TFree(diag_sizes, HYPRE_MEMORY_HOST);
2579 hypre_TFree(offdiag_sizes, HYPRE_MEMORY_HOST);
2580 }
2581 else
2582 {
2583 hypre_TFree(row_sizes, HYPRE_MEMORY_HOST);
2584 }
2585
2586 ierr += HYPRE_IJMatrixAssemble( ij_A );
2587
2588 hypre_EndTiming(time_index);
2589 hypre_PrintTiming("IJ Matrix Setup", hypre_MPI_COMM_WORLD);
2590 hypre_FinalizeTiming(time_index);
2591 hypre_ClearTiming();
2592
2593 if (ierr)
2594 {
2595 hypre_printf("Error in driver building IJMatrix from parcsr matrix. \n");
2596 return(-1);
2597 }
2598
2599 /* This is to emphasize that one can IJMatrixAddToValues after an
2600 IJMatrixRead or an IJMatrixAssemble. After an IJMatrixRead,
2601 assembly is unnecessary if the sparsity pattern of the matrix is
2602 not changed somehow. If one has not used IJMatrixRead, one has
2603 the opportunity to IJMatrixAddTo before a IJMatrixAssemble.
2604 This first sets all matrix coefficients to -1 and then adds 7.0
2605 to the diagonal to restore the original matrix*/
2606
2607 if (check_constant)
2608 {
2609 ierr += HYPRE_IJMatrixSetConstantValues( ij_A, -1.0 );
2610 }
2611
2612 ncols = hypre_TAlloc(HYPRE_Int, last_local_row - first_local_row + 1, HYPRE_MEMORY_HOST);
2613 rows = hypre_TAlloc(HYPRE_BigInt, last_local_row - first_local_row + 1, HYPRE_MEMORY_HOST);
2614 col_inds = hypre_TAlloc(HYPRE_BigInt, last_local_row - first_local_row + 1, HYPRE_MEMORY_HOST);
2615 values = hypre_TAlloc(HYPRE_Real, last_local_row - first_local_row + 1, HYPRE_MEMORY_HOST);
2616
2617 val = 0.0;
2618
2619 if (check_constant)
2620 {
2621 val = 7.0;
2622 }
2623 if (dt < dt_inf)
2624 {
2625 val += 1./dt;
2626 }
2627 else
2628 {
2629 val += 0.0; /* Use zero to avoid unintentional loss of significance */
2630 }
2631
2632 for (big_i = first_local_row; big_i <= last_local_row; big_i++)
2633 {
2634 j = (HYPRE_Int) (big_i - first_local_row);
2635 ncols[j] = 1;
2636 rows[j] = big_i;
2637 col_inds[j] = big_i;
2638 values[j] = val;
2639 }
2640
2641 if (hypre_GetActualMemLocation(memory_location) != hypre_MEMORY_HOST)
2642 {
2643 HYPRE_Int *ncols_h = ncols;
2644 HYPRE_BigInt *rows_h = rows;
2645 HYPRE_BigInt *col_inds_h = col_inds;
2646 HYPRE_Real *values_h = values;
2647
2648 ncols = hypre_TAlloc(HYPRE_Int, last_local_row - first_local_row + 1, memory_location);
2649 rows = hypre_TAlloc(HYPRE_BigInt, last_local_row - first_local_row + 1, memory_location);
2650 col_inds = hypre_TAlloc(HYPRE_BigInt, last_local_row - first_local_row + 1, memory_location);
2651 values = hypre_TAlloc(HYPRE_Real, last_local_row - first_local_row + 1, memory_location);
2652
2653 hypre_TMemcpy(ncols, ncols_h, HYPRE_Int, last_local_row - first_local_row + 1, memory_location, HYPRE_MEMORY_HOST);
2654 hypre_TMemcpy(rows, rows_h, HYPRE_BigInt, last_local_row - first_local_row + 1, memory_location, HYPRE_MEMORY_HOST);
2655 hypre_TMemcpy(col_inds, col_inds_h, HYPRE_BigInt, last_local_row - first_local_row + 1, memory_location, HYPRE_MEMORY_HOST);
2656 hypre_TMemcpy(values, values_h, HYPRE_Real, last_local_row - first_local_row + 1, memory_location, HYPRE_MEMORY_HOST);
2657
2658 hypre_TFree(ncols_h, HYPRE_MEMORY_HOST);
2659 hypre_TFree(rows_h, HYPRE_MEMORY_HOST);
2660 hypre_TFree(col_inds_h, HYPRE_MEMORY_HOST);
2661 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
2662 }
2663
2664 ierr += HYPRE_IJMatrixAddToValues( ij_A,
2665 local_num_rows,
2666 /* this is to show one can use NULL if ncols contains all ones */
2667 NULL, /* ncols, */
2668 rows,
2669 (const HYPRE_BigInt *) col_inds,
2670 (const HYPRE_Real *) values );
2671
2672 hypre_TFree(ncols, memory_location);
2673 hypre_TFree(rows, memory_location);
2674 hypre_TFree(col_inds, memory_location);
2675 hypre_TFree(values, memory_location);
2676
2677 /* If sparsity pattern is not changed since last IJMatrixAssemble call,
2678 this should be a no-op */
2679
2680 ierr += HYPRE_IJMatrixAssemble( ij_A );
2681
2682 /*-----------------------------------------------------------
2683 * Fetch the resulting underlying matrix out
2684 *-----------------------------------------------------------*/
2685 ierr += HYPRE_ParCSRMatrixDestroy(parcsr_A);
2686
2687 ierr += HYPRE_IJMatrixGetObject( ij_A, &object);
2688 parcsr_A = (HYPRE_ParCSRMatrix) object;
2689 }
2690
2691 /*-----------------------------------------------------------
2692 * Set up the interp vector
2693 *-----------------------------------------------------------*/
2694 if ( build_rbm)
2695 {
2696 char new_file_name[80];
2697 /* RHS */
2698 interp_vecs = hypre_CTAlloc(HYPRE_ParVector, num_interp_vecs, HYPRE_MEMORY_HOST);
2699 ij_rbm = hypre_CTAlloc(HYPRE_IJVector, num_interp_vecs, HYPRE_MEMORY_HOST);
2700 for (i=0; i < num_interp_vecs; i++)
2701 {
2702 hypre_sprintf(new_file_name, "%s.%d", argv[build_rbm_index],i);
2703 ierr = HYPRE_IJVectorRead( new_file_name, hypre_MPI_COMM_WORLD,
2704 HYPRE_PARCSR, &ij_rbm[i] );
2705 ierr = HYPRE_IJVectorGetObject( ij_rbm[i], &object );
2706 interp_vecs[i] = (HYPRE_ParVector) object;
2707 }
2708 if (ierr)
2709 {
2710 hypre_printf("ERROR: Problem reading in rbm!\n");
2711 exit(1);
2712 }
2713 }
2714
2715 /*-----------------------------------------------------------
2716 * Set up coarsening data
2717 *-----------------------------------------------------------*/
2718 if (build_fpt_arg_index || build_sfpt_arg_index || build_cpt_arg_index)
2719 {
2720 HYPRE_ParCSRMatrixGetGlobalRowPartitioning(parcsr_A, 0, &partitioning);
2721
2722 if (build_fpt_arg_index)
2723 {
2724 BuildBigArrayFromOneFile(argc, argv, "Fine points", build_fpt_arg_index,
2725 partitioning, &num_fpt, &fpt_index);
2726 }
2727
2728 if (build_sfpt_arg_index)
2729 {
2730 BuildBigArrayFromOneFile(argc, argv, "Isolated Fine points", build_sfpt_arg_index,
2731 partitioning, &num_isolated_fpt, &isolated_fpt_index);
2732 }
2733
2734 if (build_cpt_arg_index)
2735 {
2736 BuildBigArrayFromOneFile(argc, argv, "Coarse points", build_cpt_arg_index,
2737 partitioning, &num_cpt, &cpt_index);
2738 }
2739
2740 if (partitioning)
2741 {
2742 hypre_TFree(partitioning, HYPRE_MEMORY_HOST);
2743 }
2744 }
2745
2746 /*-----------------------------------------------------------
2747 * Set up the RHS and initial guess
2748 *-----------------------------------------------------------*/
2749 time_index = hypre_InitializeTiming("RHS and Initial Guess");
2750 hypre_BeginTiming(time_index);
2751
2752 if ( build_rhs_type == 0 )
2753 {
2754 if (myid == 0)
2755 {
2756 hypre_printf(" RHS vector read from file %s\n", argv[build_rhs_arg_index]);
2757 hypre_printf(" Initial guess is 0\n");
2758 }
2759
2760 /* RHS */
2761 ierr = HYPRE_IJVectorRead( argv[build_rhs_arg_index], hypre_MPI_COMM_WORLD,
2762 HYPRE_PARCSR, &ij_b );
2763 if (ierr)
2764 {
2765 hypre_printf("ERROR: Problem reading in the right-hand-side!\n");
2766 exit(1);
2767 }
2768 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2769 b = (HYPRE_ParVector) object;
2770
2771 /* Initial guess */
2772 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2773 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2774 HYPRE_IJVectorInitialize(ij_x);
2775 HYPRE_IJVectorAssemble(ij_x);
2776
2777 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2778 x = (HYPRE_ParVector) object;
2779 }
2780 else if ( build_rhs_type == 1 )
2781 {
2782 if (myid == 0)
2783 {
2784 hypre_printf(" RHS vector read from file %s\n", argv[build_rhs_arg_index]);
2785 hypre_printf(" Initial guess is 0\n");
2786 }
2787
2788 ij_b = NULL;
2789 BuildRhsParFromOneFile(argc, argv, build_rhs_arg_index, parcsr_A, &b);
2790
2791 /* initial guess */
2792 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2793 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2794 HYPRE_IJVectorInitialize(ij_x);
2795 HYPRE_IJVectorAssemble(ij_x);
2796
2797 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2798 x = (HYPRE_ParVector) object;
2799 }
2800 else if (build_rhs_type == 7)
2801 {
2802 if (myid == 0)
2803 {
2804 hypre_printf(" RHS vector read from file %s\n", argv[build_rhs_arg_index]);
2805 hypre_printf(" Initial guess is 0\n");
2806 }
2807
2808 ij_b = NULL;
2809 ReadParVectorFromFile(argc, argv, build_rhs_arg_index, &b);
2810
2811 /* initial guess */
2812 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2813 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2814 HYPRE_IJVectorInitialize(ij_x);
2815 HYPRE_IJVectorAssemble(ij_x);
2816
2817 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2818 x = (HYPRE_ParVector) object;
2819 }
2820 else if ( build_rhs_type == 2 )
2821 {
2822 if (myid == 0)
2823 {
2824 hypre_printf(" RHS vector has unit components\n");
2825 hypre_printf(" Initial guess is 0\n");
2826 }
2827
2828 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_rows, HYPRE_MEMORY_HOST);
2829 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_rows, memory_location);
2830 for (i = 0; i < local_num_rows; i++)
2831 {
2832 values_h[i] = 1.0;
2833 }
2834 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_rows, memory_location, HYPRE_MEMORY_HOST);
2835
2836 /* RHS */
2837 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
2838 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
2839 HYPRE_IJVectorInitialize_v2(ij_b, memory_location);
2840 HYPRE_IJVectorSetValues(ij_b, local_num_rows, NULL, values_d);
2841 HYPRE_IJVectorAssemble(ij_b);
2842 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2843 b = (HYPRE_ParVector) object;
2844
2845 hypre_Memset(values_d, 0, local_num_rows*sizeof(HYPRE_Real), HYPRE_MEMORY_DEVICE);
2846 /* Initial guess */
2847 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2848 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2849 HYPRE_IJVectorInitialize_v2(ij_x, memory_location);
2850 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
2851 HYPRE_IJVectorAssemble(ij_x);
2852 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2853 x = (HYPRE_ParVector) object;
2854
2855 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
2856 hypre_TFree(values_d, memory_location);
2857 }
2858 else if ( build_rhs_type == 3 )
2859 {
2860 if (myid == 0)
2861 {
2862 hypre_printf(" RHS vector has random components and unit 2-norm\n");
2863 hypre_printf(" Initial guess is 0\n");
2864 }
2865
2866 /* RHS */
2867 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
2868 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
2869 HYPRE_IJVectorInitialize(ij_b);
2870 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2871 b = (HYPRE_ParVector) object;
2872
2873 /* For purposes of this test, HYPRE_ParVector functions are used, but
2874 these are not necessary. For a clean use of the interface, the user
2875 "should" modify components of ij_x by using functions
2876 HYPRE_IJVectorSetValues or HYPRE_IJVectorAddToValues */
2877
2878 HYPRE_ParVectorSetRandomValues(b, 22775);
2879 HYPRE_ParVectorInnerProd(b,b,&norm);
2880 norm = 1./sqrt(norm);
2881 ierr = HYPRE_ParVectorScale(norm, b);
2882
2883 /* Initial guess */
2884 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2885 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2886 HYPRE_IJVectorInitialize(ij_x);
2887 HYPRE_IJVectorAssemble(ij_x);
2888
2889 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2890 x = (HYPRE_ParVector) object;
2891 }
2892 else if ( build_rhs_type == 4 )
2893 {
2894 if (myid == 0)
2895 {
2896 hypre_printf(" RHS vector set for solution with unit components\n");
2897 hypre_printf(" Initial guess is 0\n");
2898 }
2899
2900 /* Temporary use of solution vector */
2901 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2902 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2903 HYPRE_IJVectorInitialize(ij_x);
2904
2905 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_cols, HYPRE_MEMORY_HOST);
2906 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_cols, memory_location);
2907 for (i = 0; i < local_num_cols; i++)
2908 {
2909 values_h[i] = 1.;
2910 }
2911 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_cols, memory_location, HYPRE_MEMORY_HOST);
2912
2913 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
2914 HYPRE_IJVectorAssemble(ij_x);
2915 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
2916 hypre_TFree(values_d, memory_location);
2917
2918 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2919 x = (HYPRE_ParVector) object;
2920
2921 /* RHS */
2922 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
2923 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
2924 HYPRE_IJVectorInitialize(ij_b);
2925 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2926 b = (HYPRE_ParVector) object;
2927
2928 HYPRE_ParCSRMatrixMatvec(1.0, parcsr_A, x, 0.0, b);
2929
2930 /* Zero initial guess */
2931 hypre_IJVectorZeroValues(ij_x);
2932 }
2933 else if ( build_rhs_type == 5 )
2934 {
2935 if (myid == 0)
2936 {
2937 hypre_printf(" RHS vector is 0\n");
2938 hypre_printf(" Initial guess has unit components\n");
2939 }
2940
2941 /* RHS */
2942 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
2943 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
2944 HYPRE_IJVectorInitialize(ij_b);
2945 HYPRE_IJVectorAssemble(ij_b);
2946
2947 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2948 b = (HYPRE_ParVector) object;
2949
2950 /* Initial guess */
2951 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2952 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2953 HYPRE_IJVectorInitialize(ij_x);
2954
2955 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_cols, HYPRE_MEMORY_HOST);
2956 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_cols, memory_location);
2957 for (i = 0; i < local_num_cols; i++)
2958 {
2959 values_h[i] = 1.;
2960 }
2961 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_cols, memory_location, HYPRE_MEMORY_HOST);
2962
2963 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
2964 HYPRE_IJVectorAssemble(ij_x);
2965 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
2966 hypre_TFree(values_d, memory_location);
2967
2968 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
2969 x = (HYPRE_ParVector) object;
2970 }
2971 else if ( build_rhs_type == 6)
2972 {
2973 ij_b = NULL;
2974 }
2975
2976 if ( build_src_type == 0)
2977 {
2978 if (myid == 0)
2979 {
2980 hypre_printf(" Source vector read from file %s\n", argv[build_src_arg_index]);
2981 hypre_printf(" Initial unknown vector in evolution is 0\n");
2982 }
2983
2984 ierr = HYPRE_IJVectorRead( argv[build_src_arg_index], hypre_MPI_COMM_WORLD,
2985 HYPRE_PARCSR, &ij_b );
2986 if (ierr)
2987 {
2988 hypre_printf("ERROR: Problem reading in the right-hand-side!\n");
2989 exit(1);
2990 }
2991 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
2992 b = (HYPRE_ParVector) object;
2993
2994 /* Initial unknown vector */
2995 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
2996 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
2997 HYPRE_IJVectorInitialize(ij_x);
2998 HYPRE_IJVectorAssemble(ij_x);
2999
3000 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3001 x = (HYPRE_ParVector) object;
3002 }
3003 else if (build_src_type == 1)
3004 {
3005 BuildRhsParFromOneFile(argc, argv, build_src_arg_index, parcsr_A, &b);
3006 ij_b = NULL;
3007
3008 /* Initial unknown vector */
3009 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3010 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3011 HYPRE_IJVectorInitialize(ij_x);
3012 HYPRE_IJVectorAssemble(ij_x);
3013
3014 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3015 x = (HYPRE_ParVector) object;
3016 }
3017 else if ( build_src_type == 2 )
3018 {
3019 if (myid == 0)
3020 {
3021 hypre_printf(" Source vector has unit components\n");
3022 hypre_printf(" Initial unknown vector is 0\n");
3023 }
3024
3025 /* RHS */
3026 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
3027 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
3028 HYPRE_IJVectorInitialize(ij_b);
3029
3030 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_rows, HYPRE_MEMORY_HOST);
3031 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_rows, memory_location);
3032 for (i = 0; i < local_num_rows; i++)
3033 {
3034 values_h[i] = 1.;
3035 }
3036 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_rows, memory_location, HYPRE_MEMORY_HOST);
3037
3038 HYPRE_IJVectorSetValues(ij_b, local_num_rows, NULL, values_d);
3039 HYPRE_IJVectorAssemble(ij_b);
3040 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3041 hypre_TFree(values_d, memory_location);
3042
3043 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
3044 b = (HYPRE_ParVector) object;
3045
3046 /* Initial guess */
3047 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3048 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3049 HYPRE_IJVectorInitialize(ij_x);
3050
3051 /* For backward Euler the previous backward Euler iterate (assumed
3052 0 here) is usually used as the initial guess */
3053 HYPRE_IJVectorAssemble(ij_x);
3054
3055 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3056 x = (HYPRE_ParVector) object;
3057 }
3058 else if ( build_src_type == 3 )
3059 {
3060 if (myid == 0)
3061 {
3062 hypre_printf(" Source vector has random components in range 0 - 1\n");
3063 hypre_printf(" Initial unknown vector is 0\n");
3064 }
3065
3066 /* RHS */
3067 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
3068 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
3069 HYPRE_IJVectorInitialize(ij_b);
3070
3071 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_rows, HYPRE_MEMORY_HOST);
3072 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_rows, memory_location);
3073 hypre_SeedRand(myid);
3074 for (i = 0; i < local_num_rows; i++)
3075 {
3076 values_h[i] = hypre_Rand();
3077 }
3078 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_rows, memory_location, HYPRE_MEMORY_HOST);
3079
3080 HYPRE_IJVectorSetValues(ij_b, local_num_rows, NULL, values_d);
3081 HYPRE_IJVectorAssemble(ij_b);
3082 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3083 hypre_TFree(values_d, memory_location);
3084
3085 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
3086 b = (HYPRE_ParVector) object;
3087
3088 /* Initial guess */
3089 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3090 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3091 HYPRE_IJVectorInitialize(ij_x);
3092
3093 /* For backward Euler the previous backward Euler iterate (assumed
3094 0 here) is usually used as the initial guess */
3095 HYPRE_IJVectorAssemble(ij_x);
3096
3097 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3098 x = (HYPRE_ParVector) object;
3099 }
3100 else if ( build_src_type == 4 )
3101 {
3102 if (myid == 0)
3103 {
3104 hypre_printf(" Source vector is 0 \n");
3105 hypre_printf(" Initial unknown vector has random components in range 0 - 1\n");
3106 }
3107
3108 /* RHS */
3109 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_row, last_local_row, &ij_b);
3110 HYPRE_IJVectorSetObjectType(ij_b, HYPRE_PARCSR);
3111 HYPRE_IJVectorInitialize(ij_b);
3112
3113 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_rows, HYPRE_MEMORY_HOST);
3114 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_rows, memory_location);
3115 hypre_SeedRand(myid);
3116 for (i = 0; i < local_num_rows; i++)
3117 {
3118 values_h[i] = hypre_Rand()/dt;
3119 }
3120 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_rows, memory_location, HYPRE_MEMORY_HOST);
3121
3122 HYPRE_IJVectorSetValues(ij_b, local_num_rows, NULL, values_d);
3123 HYPRE_IJVectorAssemble(ij_b);
3124 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3125 hypre_TFree(values_d, memory_location);
3126
3127 ierr = HYPRE_IJVectorGetObject( ij_b, &object );
3128 b = (HYPRE_ParVector) object;
3129
3130 /* Initial guess */
3131 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3132 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3133 HYPRE_IJVectorInitialize(ij_x);
3134
3135 /* For backward Euler the previous backward Euler iterate (assumed
3136 random in 0 - 1 here) is usually used as the initial guess */
3137 values_h = hypre_CTAlloc(HYPRE_Real, local_num_cols, HYPRE_MEMORY_HOST);
3138 values_d = hypre_CTAlloc(HYPRE_Real, local_num_cols, memory_location);
3139 hypre_SeedRand(myid);
3140 for (i = 0; i < local_num_cols; i++)
3141 {
3142 values_h[i] = hypre_Rand();
3143 }
3144 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_cols, memory_location, HYPRE_MEMORY_HOST);
3145
3146 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
3147 HYPRE_IJVectorAssemble(ij_x);
3148 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3149 hypre_TFree(values_d, memory_location);
3150
3151 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3152 x = (HYPRE_ParVector) object;
3153 }
3154 else if ( build_src_type == 5 )
3155 {
3156 if (myid == 0)
3157 {
3158 hypre_printf(" Initial guess is random \n");
3159 }
3160
3161 /* Initial guess */
3162 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3163 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3164 HYPRE_IJVectorInitialize(ij_x);
3165
3166 /* For backward Euler the previous backward Euler iterate (assumed
3167 random in 0 - 1 here) is usually used as the initial guess */
3168 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_cols, HYPRE_MEMORY_HOST);
3169 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_cols, memory_location);
3170 hypre_SeedRand(myid+2747);
3171 hypre_SeedRand(myid);
3172 for (i = 0; i < local_num_cols; i++)
3173 {
3174 values_h[i] = hypre_Rand();
3175 }
3176 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_cols, memory_location, HYPRE_MEMORY_HOST);
3177
3178 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
3179 HYPRE_IJVectorAssemble(ij_x);
3180 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3181 hypre_TFree(values_d, memory_location);
3182
3183 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3184 x = (HYPRE_ParVector) object;
3185 }
3186
3187 /* initial guess */
3188 if ( build_x0_type == 0 )
3189 {
3190 /* from file */
3191 if (myid == 0)
3192 {
3193 hypre_printf(" Initial guess vector read from file %s\n", argv[build_x0_arg_index]);
3194 }
3195 /* x0 */
3196 if (ij_x)
3197 {
3198 HYPRE_IJVectorDestroy(ij_x);
3199 }
3200 ierr = HYPRE_IJVectorRead( argv[build_x0_arg_index], hypre_MPI_COMM_WORLD,
3201 HYPRE_PARCSR, &ij_x );
3202 if (ierr)
3203 {
3204 hypre_printf("ERROR: Problem reading in x0!\n");
3205 exit(1);
3206 }
3207 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3208 x = (HYPRE_ParVector) object;
3209 }
3210 else if (build_x0_type == 1)
3211 {
3212 /* random */
3213 if (myid == 0)
3214 {
3215 hypre_printf(" Initial guess is random \n");
3216 }
3217
3218 if (ij_x)
3219 {
3220 HYPRE_IJVectorDestroy(ij_x);
3221 }
3222
3223 /* Initial guess */
3224 HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, first_local_col, last_local_col, &ij_x);
3225 HYPRE_IJVectorSetObjectType(ij_x, HYPRE_PARCSR);
3226 HYPRE_IJVectorInitialize(ij_x);
3227
3228 /* For backward Euler the previous backward Euler iterate (assumed
3229 random in 0 - 1 here) is usually used as the initial guess */
3230 HYPRE_Real *values_h = hypre_CTAlloc(HYPRE_Real, local_num_cols, HYPRE_MEMORY_HOST);
3231 HYPRE_Real *values_d = hypre_CTAlloc(HYPRE_Real, local_num_cols, memory_location);
3232 hypre_SeedRand(myid);
3233 for (i = 0; i < local_num_cols; i++)
3234 {
3235 values_h[i] = hypre_Rand();
3236 }
3237 hypre_TMemcpy(values_d, values_h, HYPRE_Real, local_num_cols, memory_location, HYPRE_MEMORY_HOST);
3238
3239 HYPRE_IJVectorSetValues(ij_x, local_num_cols, NULL, values_d);
3240 HYPRE_IJVectorAssemble(ij_x);
3241 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
3242 hypre_TFree(values_d, memory_location);
3243
3244 ierr = HYPRE_IJVectorGetObject( ij_x, &object );
3245 x = (HYPRE_ParVector) object;
3246 }
3247
3248 hypre_EndTiming(time_index);
3249 hypre_PrintTiming("IJ Vector Setup", hypre_MPI_COMM_WORLD);
3250 hypre_FinalizeTiming(time_index);
3251 hypre_ClearTiming();
3252
3253 if (num_functions > 1)
3254 {
3255 dof_func = NULL;
3256 if (build_funcs_type == 1)
3257 {
3258 hypre_printf("calling BuildFuncsFromOneFile\n");
3259 BuildFuncsFromOneFile(argc, argv, build_funcs_arg_index, parcsr_A, &dof_func);
3260 }
3261 else if (build_funcs_type == 2)
3262 {
3263 hypre_printf("calling BuildFuncsFromOneFiles\n");
3264 BuildFuncsFromFiles(argc, argv, build_funcs_arg_index, parcsr_A, &dof_func);
3265 }
3266 else
3267 {
3268 if (myid == 0)
3269 {
3270 hypre_printf (" Number of functions = %d \n", num_functions);
3271 }
3272 }
3273 }
3274
3275 /*-----------------------------------------------------------
3276 * Print out the system and initial guess
3277 *-----------------------------------------------------------*/
3278
3279 if (negA)
3280 {
3281 hypre_ParCSRMatrixScale(parcsr_A, -1);
3282 }
3283
3284 if (print_system)
3285 {
3286 if (ij_A)
3287 {
3288 HYPRE_IJMatrixPrint(ij_A, "IJ.out.A");
3289 }
3290 else if (parcsr_A)
3291 {
3292 hypre_ParCSRMatrixPrintIJ(parcsr_A, 0, 0, "IJ.out.A");
3293 }
3294 if (ij_b)
3295 {
3296 HYPRE_IJVectorPrint(ij_b, "IJ.out.b");
3297 }
3298 else if (b)
3299 {
3300 HYPRE_ParVectorPrint(b, "ParVec.out.b");
3301 }
3302 HYPRE_IJVectorPrint(ij_x, "IJ.out.x0");
3303 }
3304
3305 /*-----------------------------------------------------------
3306 * Migrate the system to the wanted memory space
3307 *-----------------------------------------------------------*/
3308 hypre_ParCSRMatrixMigrate(parcsr_A, hypre_HandleMemoryLocation(hypre_handle()));
3309 hypre_ParVectorMigrate(b, hypre_HandleMemoryLocation(hypre_handle()));
3310 hypre_ParVectorMigrate(x, hypre_HandleMemoryLocation(hypre_handle()));
3311
3312 /* save the initial guess for the 2nd time */
3313 #if SECOND_TIME
3314 x0_save = hypre_ParVectorCloneDeep_v2(x, hypre_ParVectorMemoryLocation(x));
3315 #endif
3316
3317 /*-----------------------------------------------------------
3318 * Solve the system using the hybrid solver
3319 *-----------------------------------------------------------*/
3320
3321 if (solver_id == -1)
3322 {
3323 HYPRE_Int nmv = 100;
3324 HYPRE_Int num_threads = hypre_NumThreads();
3325
3326 if (myid == 0)
3327 {
3328 hypre_printf("Running %d matvecs with A\n", nmv);
3329 hypre_printf("\n\n Num MPI tasks = %d\n\n",num_procs);
3330 hypre_printf(" Num OpenMP threads = %d\n\n",num_threads);
3331 }
3332
3333 HYPRE_Real tt = hypre_MPI_Wtime();
3334
3335 time_index = hypre_InitializeTiming("MatVec Test");
3336 hypre_BeginTiming(time_index);
3337
3338 for (i = 0; i < nmv; i++)
3339 {
3340 HYPRE_ParCSRMatrixMatvec(1., parcsr_A, x, 0., b);
3341 }
3342
3343 #if defined(HYPRE_USING_GPU)
3344 hypre_SyncCudaDevice(hypre_handle());
3345 #endif
3346
3347 hypre_EndTiming(time_index);
3348 hypre_PrintTiming("MatVec Test", hypre_MPI_COMM_WORLD);
3349 hypre_FinalizeTiming(time_index);
3350 hypre_ClearTiming();
3351
3352 tt = hypre_MPI_Wtime() - tt;
3353
3354 if (myid == 0)
3355 {
3356 hypre_printf("Matvec time %.2f (ms)\n", tt*1000.0);
3357 }
3358
3359 goto final;
3360 }
3361
3362 if (solver_id == 20)
3363 {
3364 if (myid == 0) hypre_printf("Solver: AMG\n");
3365 time_index = hypre_InitializeTiming("AMG_hybrid Setup");
3366 hypre_BeginTiming(time_index);
3367
3368 HYPRE_ParCSRHybridCreate(&amg_solver);
3369 HYPRE_ParCSRHybridSetTol(amg_solver, tol);
3370 HYPRE_ParCSRHybridSetAbsoluteTol(amg_solver, atol);
3371 HYPRE_ParCSRHybridSetConvergenceTol(amg_solver, cf_tol);
3372 HYPRE_ParCSRHybridSetSolverType(amg_solver, solver_type);
3373 HYPRE_ParCSRHybridSetRecomputeResidual(amg_solver, recompute_res);
3374 HYPRE_ParCSRHybridSetLogging(amg_solver, ioutdat);
3375 HYPRE_ParCSRHybridSetPrintLevel(amg_solver, poutdat);
3376 HYPRE_ParCSRHybridSetDSCGMaxIter(amg_solver, max_iter);
3377 HYPRE_ParCSRHybridSetPCGMaxIter(amg_solver, mg_max_iter);
3378 HYPRE_ParCSRHybridSetCoarsenType(amg_solver, coarsen_type);
3379 HYPRE_ParCSRHybridSetStrongThreshold(amg_solver, strong_threshold);
3380 HYPRE_ParCSRHybridSetTruncFactor(amg_solver, trunc_factor);
3381 HYPRE_ParCSRHybridSetPMaxElmts(amg_solver, P_max_elmts);
3382 HYPRE_ParCSRHybridSetMaxLevels(amg_solver, max_levels);
3383 HYPRE_ParCSRHybridSetMaxRowSum(amg_solver, max_row_sum);
3384 HYPRE_ParCSRHybridSetNumSweeps(amg_solver, num_sweeps);
3385 HYPRE_ParCSRHybridSetInterpType(amg_solver, interp_type);
3386
3387 if (relax_type > -1) HYPRE_ParCSRHybridSetRelaxType(amg_solver, relax_type);
3388 HYPRE_ParCSRHybridSetAggNumLevels(amg_solver, agg_num_levels);
3389 HYPRE_ParCSRHybridSetAggInterpType(amg_solver, agg_interp_type);
3390 HYPRE_ParCSRHybridSetNumPaths(amg_solver, num_paths);
3391 HYPRE_ParCSRHybridSetNumFunctions(amg_solver, num_functions);
3392 HYPRE_ParCSRHybridSetNodal(amg_solver, nodal);
3393 if (relax_down > -1)
3394 HYPRE_ParCSRHybridSetCycleRelaxType(amg_solver, relax_down, 1);
3395 if (relax_up > -1)
3396 HYPRE_ParCSRHybridSetCycleRelaxType(amg_solver, relax_up, 2);
3397 if (relax_coarse > -1)
3398 HYPRE_ParCSRHybridSetCycleRelaxType(amg_solver, relax_coarse, 3);
3399 HYPRE_ParCSRHybridSetRelaxOrder(amg_solver, relax_order);
3400 HYPRE_ParCSRHybridSetKeepTranspose(amg_solver, keepTranspose);
3401 HYPRE_ParCSRHybridSetMaxCoarseSize(amg_solver, coarse_threshold);
3402 HYPRE_ParCSRHybridSetMinCoarseSize(amg_solver, min_coarse_size);
3403 HYPRE_ParCSRHybridSetSeqThreshold(amg_solver, seq_threshold);
3404 HYPRE_ParCSRHybridSetRelaxWt(amg_solver, relax_wt);
3405 HYPRE_ParCSRHybridSetOuterWt(amg_solver, outer_wt);
3406 if (level_w > -1)
3407 HYPRE_ParCSRHybridSetLevelRelaxWt(amg_solver, relax_wt_level, level_w);
3408 if (level_ow > -1)
3409 HYPRE_ParCSRHybridSetLevelOuterWt(amg_solver, outer_wt_level, level_ow);
3410 HYPRE_ParCSRHybridSetNonGalerkinTol(amg_solver, nongalerk_num_tol, nongalerk_tol);
3411
3412 HYPRE_ParCSRHybridSetup(amg_solver, parcsr_A, b, x);
3413
3414 hypre_EndTiming(time_index);
3415 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
3416 hypre_FinalizeTiming(time_index);
3417 hypre_ClearTiming();
3418
3419 time_index = hypre_InitializeTiming("ParCSR Hybrid Solve");
3420 hypre_BeginTiming(time_index);
3421
3422 HYPRE_ParCSRHybridSolve(amg_solver, parcsr_A, b, x);
3423
3424 hypre_EndTiming(time_index);
3425 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
3426 hypre_FinalizeTiming(time_index);
3427 hypre_ClearTiming();
3428
3429 HYPRE_ParCSRHybridGetNumIterations(amg_solver, &num_iterations);
3430 HYPRE_ParCSRHybridGetPCGNumIterations(amg_solver, &pcg_num_its);
3431 HYPRE_ParCSRHybridGetDSCGNumIterations(amg_solver, &dscg_num_its);
3432 HYPRE_ParCSRHybridGetFinalRelativeResidualNorm(amg_solver,
3433 &final_res_norm);
3434
3435 if (myid == 0)
3436 {
3437 hypre_printf("\n");
3438 hypre_printf("Iterations = %d\n", num_iterations);
3439 hypre_printf("PCG_Iterations = %d\n", pcg_num_its);
3440 hypre_printf("DSCG_Iterations = %d\n", dscg_num_its);
3441 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
3442 hypre_printf("\n");
3443 }
3444
3445 #if SECOND_TIME
3446 /* run a second time to check for memory leaks */
3447 hypre_ParVectorCopy(x0_save, x);
3448 HYPRE_ParCSRHybridSetup(amg_solver, parcsr_A, b, x);
3449 HYPRE_ParCSRHybridSolve(amg_solver, parcsr_A, b, x);
3450
3451 HYPRE_ParCSRHybridGetNumIterations(amg_solver, &num_iterations);
3452 HYPRE_ParCSRHybridGetPCGNumIterations(amg_solver, &pcg_num_its);
3453 HYPRE_ParCSRHybridGetDSCGNumIterations(amg_solver, &dscg_num_its);
3454 HYPRE_ParCSRHybridGetFinalRelativeResidualNorm(amg_solver,
3455 &final_res_norm);
3456 if (myid == 0)
3457 {
3458 hypre_printf("\n");
3459 hypre_printf("Iterations = %d\n", num_iterations);
3460 hypre_printf("PCG_Iterations = %d\n", pcg_num_its);
3461 hypre_printf("DSCG_Iterations = %d\n", dscg_num_its);
3462 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
3463 hypre_printf("\n");
3464 }
3465
3466 HYPRE_Real time[4];
3467 HYPRE_ParCSRHybridGetSetupSolveTime(amg_solver, time);
3468 if (myid == 0)
3469 {
3470 printf("ParCSRHybrid: Setup-Time1 %f Solve-Time1 %f Setup-Time2 %f Solve-Time2 %f\n",
3471 time[0], time[1], time[2], time[3]);
3472 }
3473 #endif
3474
3475 HYPRE_ParCSRHybridDestroy(amg_solver);
3476 }
3477 /*-----------------------------------------------------------
3478 * Solve the system using AMG
3479 *-----------------------------------------------------------*/
3480
3481 if (solver_id == 0 || solver_id == 90)
3482 {
3483 if (solver_id == 0)
3484 {
3485 if (myid == 0) hypre_printf("Solver: AMG\n");
3486 time_index = hypre_InitializeTiming("BoomerAMG Setup");
3487 hypre_BeginTiming(time_index);
3488
3489 HYPRE_BoomerAMGCreate(&amg_solver);
3490 }
3491 else if (solver_id == 90)
3492 {
3493 if (myid == 0) hypre_printf("Solver: AMG-DD\n");
3494 time_index = hypre_InitializeTiming("BoomerAMGDD Setup");
3495 hypre_BeginTiming(time_index);
3496
3497 HYPRE_BoomerAMGDDCreate(&amgdd_solver);
3498 HYPRE_BoomerAMGDDGetAMG(amgdd_solver, &amg_solver);
3499
3500 /* AMG-DD options */
3501 HYPRE_BoomerAMGDDSetStartLevel(amgdd_solver, amgdd_start_level);
3502 HYPRE_BoomerAMGDDSetPadding(amgdd_solver, amgdd_padding);
3503 HYPRE_BoomerAMGDDSetFACNumRelax(amgdd_solver, amgdd_fac_num_relax);
3504 HYPRE_BoomerAMGDDSetFACNumCycles(amgdd_solver, amgdd_num_comp_cycles);
3505 HYPRE_BoomerAMGDDSetFACRelaxType(amgdd_solver, amgdd_fac_relax_type);
3506 HYPRE_BoomerAMGDDSetFACCycleType(amgdd_solver, amgdd_fac_cycle_type);
3507 HYPRE_BoomerAMGDDSetNumGhostLayers(amgdd_solver, amgdd_num_ghost_layers);
3508 }
3509
3510 if (air)
3511 {
3512 /* RL: specify restriction */
3513 hypre_assert(restri_type >= 0);
3514 HYPRE_BoomerAMGSetRestriction(amg_solver, restri_type); /* 0: P^T, 1: AIR, 2: AIR-2 */
3515 HYPRE_BoomerAMGSetGridRelaxPoints(amg_solver, grid_relax_points);
3516 HYPRE_BoomerAMGSetStrongThresholdR(amg_solver, strong_thresholdR);
3517 HYPRE_BoomerAMGSetFilterThresholdR(amg_solver, filter_thresholdR);
3518 }
3519
3520 /* RL */
3521 HYPRE_BoomerAMGSetADropTol(amg_solver, A_drop_tol);
3522 HYPRE_BoomerAMGSetADropType(amg_solver, A_drop_type);
3523 /* BM Aug 25, 2006 */
3524 HYPRE_BoomerAMGSetCGCIts(amg_solver, cgcits);
3525 HYPRE_BoomerAMGSetInterpType(amg_solver, interp_type);
3526 HYPRE_BoomerAMGSetRestriction(amg_solver, restri_type); /* 0: P^T, 1: AIR, 2: AIR-2 */
3527 HYPRE_BoomerAMGSetPostInterpType(amg_solver, post_interp_type);
3528 HYPRE_BoomerAMGSetNumSamples(amg_solver, gsmg_samples);
3529 HYPRE_BoomerAMGSetCoarsenType(amg_solver, coarsen_type);
3530 HYPRE_BoomerAMGSetCoarsenCutFactor(amg_solver, coarsen_cut_factor);
3531 HYPRE_BoomerAMGSetCPoints(amg_solver, max_levels, num_cpt, cpt_index);
3532 HYPRE_BoomerAMGSetFPoints(amg_solver, num_fpt, fpt_index);
3533 HYPRE_BoomerAMGSetIsolatedFPoints(amg_solver, num_isolated_fpt, isolated_fpt_index);
3534 HYPRE_BoomerAMGSetMeasureType(amg_solver, measure_type);
3535 HYPRE_BoomerAMGSetConvergeType(amg_solver, converge_type);
3536 HYPRE_BoomerAMGSetTol(amg_solver, tol);
3537 HYPRE_BoomerAMGSetStrongThreshold(amg_solver, strong_threshold);
3538 HYPRE_BoomerAMGSetSeqThreshold(amg_solver, seq_threshold);
3539 HYPRE_BoomerAMGSetRedundant(amg_solver, redundant);
3540 HYPRE_BoomerAMGSetMaxCoarseSize(amg_solver, coarse_threshold);
3541 HYPRE_BoomerAMGSetMinCoarseSize(amg_solver, min_coarse_size);
3542 HYPRE_BoomerAMGSetTruncFactor(amg_solver, trunc_factor);
3543 HYPRE_BoomerAMGSetPMaxElmts(amg_solver, P_max_elmts);
3544 HYPRE_BoomerAMGSetJacobiTruncThreshold(amg_solver, jacobi_trunc_threshold);
3545 HYPRE_BoomerAMGSetSCommPkgSwitch(amg_solver, S_commpkg_switch);
3546 /* note: log is written to standard output, not to file */
3547 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 3);
3548 //HYPRE_BoomerAMGSetLogging(amg_solver, 2);
3549 HYPRE_BoomerAMGSetPrintFileName(amg_solver, "driver.out.log");
3550 HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
3551 HYPRE_BoomerAMGSetFCycle(amg_solver, fcycle);
3552 HYPRE_BoomerAMGSetNumSweeps(amg_solver, num_sweeps);
3553 HYPRE_BoomerAMGSetISType(amg_solver, IS_type);
3554 HYPRE_BoomerAMGSetNumCRRelaxSteps(amg_solver, num_CR_relax_steps);
3555 HYPRE_BoomerAMGSetCRRate(amg_solver, CR_rate);
3556 HYPRE_BoomerAMGSetCRStrongTh(amg_solver, CR_strong_th);
3557 HYPRE_BoomerAMGSetCRUseCG(amg_solver, CR_use_CG);
3558 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(amg_solver, relax_type);
3559 if (relax_down > -1)
3560 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_down, 1);
3561 if (relax_up > -1)
3562 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_up, 2);
3563 if (relax_coarse > -1)
3564 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_coarse, 3);
3565 HYPRE_BoomerAMGSetAddRelaxType(amg_solver, add_relax_type);
3566 HYPRE_BoomerAMGSetAddRelaxWt(amg_solver, add_relax_wt);
3567 HYPRE_BoomerAMGSetChebyOrder(amg_solver, cheby_order);
3568 HYPRE_BoomerAMGSetChebyFraction(amg_solver, cheby_fraction);
3569 HYPRE_BoomerAMGSetChebyEigEst(amg_solver, cheby_eig_est);
3570 HYPRE_BoomerAMGSetChebyVariant(amg_solver, cheby_variant);
3571 HYPRE_BoomerAMGSetChebyScale(amg_solver, cheby_scale);
3572 HYPRE_BoomerAMGSetRelaxOrder(amg_solver, relax_order);
3573 HYPRE_BoomerAMGSetRelaxWt(amg_solver, relax_wt);
3574 HYPRE_BoomerAMGSetOuterWt(amg_solver, outer_wt);
3575 HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
3576 if (level_w > -1)
3577 HYPRE_BoomerAMGSetLevelRelaxWt(amg_solver, relax_wt_level, level_w);
3578 if (level_ow > -1)
3579 HYPRE_BoomerAMGSetLevelOuterWt(amg_solver, outer_wt_level, level_ow);
3580 HYPRE_BoomerAMGSetSmoothType(amg_solver, smooth_type);
3581 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_solver, smooth_num_sweeps);
3582 HYPRE_BoomerAMGSetSmoothNumLevels(amg_solver, smooth_num_levels);
3583 HYPRE_BoomerAMGSetMaxRowSum(amg_solver, max_row_sum);
3584 HYPRE_BoomerAMGSetDebugFlag(amg_solver, debug_flag);
3585 HYPRE_BoomerAMGSetVariant(amg_solver, variant);
3586 HYPRE_BoomerAMGSetOverlap(amg_solver, overlap);
3587 HYPRE_BoomerAMGSetDomainType(amg_solver, domain_type);
3588 HYPRE_BoomerAMGSetSchwarzUseNonSymm(amg_solver, use_nonsymm_schwarz);
3589
3590 HYPRE_BoomerAMGSetSchwarzRlxWeight(amg_solver, schwarz_rlx_weight);
3591 if (eu_level < 0) eu_level = 0;
3592 HYPRE_BoomerAMGSetEuLevel(amg_solver, eu_level);
3593 HYPRE_BoomerAMGSetEuBJ(amg_solver, eu_bj);
3594 HYPRE_BoomerAMGSetEuSparseA(amg_solver, eu_sparse_A);
3595 HYPRE_BoomerAMGSetILUType(amg_solver, ilu_type);
3596 HYPRE_BoomerAMGSetILULevel(amg_solver, ilu_lfil);
3597 HYPRE_BoomerAMGSetILUDroptol(amg_solver, ilu_droptol);
3598 HYPRE_BoomerAMGSetILUMaxRowNnz(amg_solver, ilu_max_row_nnz);
3599 HYPRE_BoomerAMGSetILUMaxIter(amg_solver, ilu_sm_max_iter);
3600
3601 HYPRE_BoomerAMGSetNumFunctions(amg_solver, num_functions);
3602 HYPRE_BoomerAMGSetAggNumLevels(amg_solver, agg_num_levels);
3603 HYPRE_BoomerAMGSetAggInterpType(amg_solver, agg_interp_type);
3604 HYPRE_BoomerAMGSetAggTruncFactor(amg_solver, agg_trunc_factor);
3605 HYPRE_BoomerAMGSetAggP12TruncFactor(amg_solver, agg_P12_trunc_factor);
3606 HYPRE_BoomerAMGSetAggPMaxElmts(amg_solver, agg_P_max_elmts);
3607 HYPRE_BoomerAMGSetAggP12MaxElmts(amg_solver, agg_P12_max_elmts);
3608 HYPRE_BoomerAMGSetNumPaths(amg_solver, num_paths);
3609 HYPRE_BoomerAMGSetNodal(amg_solver, nodal);
3610 HYPRE_BoomerAMGSetNodalDiag(amg_solver, nodal_diag);
3611 HYPRE_BoomerAMGSetKeepSameSign(amg_solver, keep_same_sign);
3612 HYPRE_BoomerAMGSetCycleNumSweeps(amg_solver, ns_coarse, 3);
3613 if (ns_down > -1)
3614 {
3615 HYPRE_BoomerAMGSetCycleNumSweeps(amg_solver, ns_down, 1);
3616 }
3617 if (ns_up > -1)
3618 {
3619 HYPRE_BoomerAMGSetCycleNumSweeps(amg_solver, ns_up, 2);
3620 }
3621 if (num_functions > 1)
3622 HYPRE_BoomerAMGSetDofFunc(amg_solver, dof_func);
3623 HYPRE_BoomerAMGSetAdditive(amg_solver, additive);
3624 HYPRE_BoomerAMGSetMultAdditive(amg_solver, mult_add);
3625 HYPRE_BoomerAMGSetSimple(amg_solver, simple);
3626 HYPRE_BoomerAMGSetAddLastLvl(amg_solver, add_last_lvl);
3627 HYPRE_BoomerAMGSetMultAddPMaxElmts(amg_solver, add_P_max_elmts);
3628 HYPRE_BoomerAMGSetMultAddTruncFactor(amg_solver, add_trunc_factor);
3629
3630 HYPRE_BoomerAMGSetMaxIter(amg_solver, mg_max_iter);
3631 HYPRE_BoomerAMGSetRAP2(amg_solver, rap2);
3632 HYPRE_BoomerAMGSetModuleRAP2(amg_solver, mod_rap2);
3633 HYPRE_BoomerAMGSetKeepTranspose(amg_solver, keepTranspose);
3634 #ifdef HYPRE_USING_DSUPERLU
3635 HYPRE_BoomerAMGSetDSLUThreshold(amg_solver, dslu_threshold);
3636 #endif
3637 /*HYPRE_BoomerAMGSetNonGalerkTol(amg_solver, nongalerk_num_tol, nongalerk_tol);*/
3638 if (nongalerk_tol)
3639 {
3640 HYPRE_BoomerAMGSetNonGalerkinTol(amg_solver, nongalerk_tol[nongalerk_num_tol-1]);
3641 for (i=0; i < nongalerk_num_tol-1; i++)
3642 HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_solver, nongalerk_tol[i], i);
3643 }
3644 if (build_rbm)
3645 {
3646 HYPRE_BoomerAMGSetInterpVectors(amg_solver, num_interp_vecs, interp_vecs);
3647 HYPRE_BoomerAMGSetInterpVecVariant(amg_solver, interp_vec_variant);
3648 HYPRE_BoomerAMGSetInterpVecQMax(amg_solver, Q_max);
3649 HYPRE_BoomerAMGSetInterpVecAbsQTrunc(amg_solver, Q_trunc);
3650 }
3651
3652 /* BM Oct 23, 2006 */
3653 if (plot_grids) {
3654 HYPRE_BoomerAMGSetPlotGrids (amg_solver, 1);
3655 HYPRE_BoomerAMGSetPlotFileName (amg_solver, plot_file_name);
3656 HYPRE_BoomerAMGSetCoordDim (amg_solver, coord_dim);
3657 HYPRE_BoomerAMGSetCoordinates (amg_solver, coordinates);
3658 }
3659
3660 //cudaProfilerStart();
3661
3662 #if defined(HYPRE_USING_NVTX)
3663 hypre_GpuProfilingPushRange("AMG-Setup-1");
3664 #endif
3665 if (solver_id == 0)
3666 {
3667 HYPRE_BoomerAMGSetup(amg_solver, parcsr_A, b, x);
3668 }
3669 else if (solver_id == 90)
3670 {
3671 HYPRE_BoomerAMGDDSetup(amgdd_solver, parcsr_A, b, x);
3672 }
3673
3674 #if defined(HYPRE_USING_NVTX)
3675 hypre_GpuProfilingPopRange();
3676 #endif
3677
3678 #if defined(HYPRE_USING_GPU)
3679 hypre_SyncCudaDevice(hypre_handle());
3680 #endif
3681
3682 hypre_EndTiming(time_index);
3683 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
3684 hypre_FinalizeTiming(time_index);
3685 hypre_ClearTiming();
3686
3687 if (solver_id == 0)
3688 {
3689 time_index = hypre_InitializeTiming("BoomerAMG Solve");
3690 }
3691 else if (solver_id == 90)
3692 {
3693 time_index = hypre_InitializeTiming("BoomerAMG-DD Solve");
3694 }
3695 hypre_BeginTiming(time_index);
3696
3697 #if defined(HYPRE_USING_NVTX)
3698 hypre_GpuProfilingPushRange("AMG-Solve-1");
3699 #endif
3700
3701 //cudaProfilerStart();
3702 if (solver_id == 0)
3703 {
3704 HYPRE_BoomerAMGSolve(amg_solver, parcsr_A, b, x);
3705 }
3706 else if (solver_id == 90)
3707 {
3708 HYPRE_BoomerAMGDDSolve(amgdd_solver, parcsr_A, b, x);
3709 }
3710 //cudaProfilerStop();
3711
3712 #if defined(HYPRE_USING_NVTX)
3713 hypre_GpuProfilingPopRange();
3714 #endif
3715
3716 #if defined(HYPRE_USING_GPU)
3717 hypre_SyncCudaDevice(hypre_handle());
3718 #endif
3719
3720 hypre_EndTiming(time_index);
3721 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
3722 hypre_FinalizeTiming(time_index);
3723 hypre_ClearTiming();
3724
3725 if (solver_id == 0)
3726 {
3727 HYPRE_BoomerAMGGetNumIterations(amg_solver, &num_iterations);
3728 HYPRE_BoomerAMGGetFinalRelativeResidualNorm(amg_solver, &final_res_norm);
3729 }
3730 else if (solver_id == 90)
3731 {
3732 HYPRE_BoomerAMGDDGetNumIterations(amgdd_solver, &num_iterations);
3733 HYPRE_BoomerAMGDDGetFinalRelativeResidualNorm(amgdd_solver, &final_res_norm);
3734 }
3735
3736 if (myid == 0)
3737 {
3738 hypre_printf("\n");
3739 if (solver_id == 0)
3740 {
3741 hypre_printf("BoomerAMG Iterations = %d\n", num_iterations);
3742 }
3743 else if (solver_id == 90)
3744 {
3745 hypre_printf("BoomerAMG-DD Iterations = %d\n", num_iterations);
3746 }
3747 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
3748 hypre_printf("\n");
3749 }
3750
3751 #if SECOND_TIME
3752 /* run a second time to check for memory leaks */
3753 //HYPRE_ParVectorSetRandomValues(x, 775);
3754 hypre_ParVectorCopy(x0_save, x);
3755
3756 HYPRE_Real tt, maxtt = 0.0, tset = 0.0, tsol = 0.0;
3757
3758 tt = hypre_MPI_Wtime();
3759
3760 #if defined(HYPRE_USING_NVTX)
3761 hypre_GpuProfilingPushRange("AMG-Setup-2");
3762 #endif
3763
3764 if (solver_id == 0)
3765 {
3766 HYPRE_BoomerAMGSetup(amg_solver, parcsr_A, b, x);
3767 }
3768 else if (solver_id == 90)
3769 {
3770 HYPRE_BoomerAMGDDSetup(amgdd_solver, parcsr_A, b, x);
3771 }
3772
3773 #if defined(HYPRE_USING_NVTX)
3774 hypre_GpuProfilingPopRange();
3775 #endif
3776
3777 #if defined(HYPRE_USING_GPU)
3778 hypre_SyncCudaDevice(hypre_handle());
3779 #endif
3780
3781 tt = hypre_MPI_Wtime() - tt;
3782
3783 hypre_MPI_Reduce(&tt, &maxtt, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD);
3784
3785 if (myid == 0)
3786 {
3787 tset = maxtt;
3788 }
3789
3790 tt = hypre_MPI_Wtime();
3791
3792 #if defined(HYPRE_USING_NVTX)
3793 hypre_GpuProfilingPushRange("AMG-Solve-2");
3794 #endif
3795
3796 if (solver_id == 0)
3797 {
3798 HYPRE_BoomerAMGSolve(amg_solver, parcsr_A, b, x);
3799 }
3800 else if (solver_id == 90)
3801 {
3802 HYPRE_BoomerAMGDDSolve(amgdd_solver, parcsr_A, b, x);
3803 }
3804
3805 #if defined(HYPRE_USING_NVTX)
3806 hypre_GpuProfilingPopRange();
3807 #endif
3808
3809 #if defined(HYPRE_USING_GPU)
3810 hypre_SyncCudaDevice(hypre_handle());
3811 #endif
3812
3813 tt = hypre_MPI_Wtime() - tt;
3814
3815 hypre_MPI_Reduce(&tt, &maxtt, 1, hypre_MPI_REAL, hypre_MPI_MAX, 0, hypre_MPI_COMM_WORLD);
3816
3817 if (myid == 0)
3818 {
3819 tsol = maxtt;
3820 hypre_printf("AMG Setup time %.2f (s)\n", tset);
3821 hypre_printf("AMG Solve time %.2f (s)\n", tsol);
3822 }
3823 #endif // SECOND_TIME
3824
3825 //cudaProfilerStop();
3826
3827 if (solver_id == 0)
3828 {
3829 HYPRE_BoomerAMGDestroy(amg_solver);
3830 }
3831 else if (solver_id == 90)
3832 {
3833 HYPRE_BoomerAMGDDDestroy(amgdd_solver);
3834 }
3835 }
3836
3837 /*-----------------------------------------------------------
3838 * Solve the system using GSMG
3839 *-----------------------------------------------------------*/
3840
3841 if (solver_id == 13)
3842 {
3843 /* reset some smoother parameters */
3844
3845 relax_order = 0;
3846
3847 if (myid == 0) hypre_printf("Solver: GSMG\n");
3848 time_index = hypre_InitializeTiming("BoomerAMG Setup");
3849 hypre_BeginTiming(time_index);
3850
3851 HYPRE_BoomerAMGCreate(&amg_solver);
3852 HYPRE_BoomerAMGSetGSMG(amg_solver, 4); /* specify GSMG */
3853 /* BM Aug 25, 2006 */
3854 HYPRE_BoomerAMGSetCGCIts(amg_solver, cgcits);
3855 HYPRE_BoomerAMGSetInterpType(amg_solver, interp_type);
3856 HYPRE_BoomerAMGSetPostInterpType(amg_solver, post_interp_type);
3857 HYPRE_BoomerAMGSetNumSamples(amg_solver, gsmg_samples);
3858 HYPRE_BoomerAMGSetCoarsenType(amg_solver, coarsen_type);
3859 HYPRE_BoomerAMGSetCoarsenCutFactor(amg_solver, coarsen_cut_factor);
3860 HYPRE_BoomerAMGSetCPoints(amg_solver, max_levels, num_cpt, cpt_index);
3861 HYPRE_BoomerAMGSetFPoints(amg_solver, num_fpt, fpt_index);
3862 HYPRE_BoomerAMGSetIsolatedFPoints(amg_solver, num_isolated_fpt, isolated_fpt_index);
3863 HYPRE_BoomerAMGSetMeasureType(amg_solver, measure_type);
3864 HYPRE_BoomerAMGSetTol(amg_solver, tol);
3865 HYPRE_BoomerAMGSetStrongThreshold(amg_solver, strong_threshold);
3866 HYPRE_BoomerAMGSetSeqThreshold(amg_solver, seq_threshold);
3867 HYPRE_BoomerAMGSetRedundant(amg_solver, redundant);
3868 HYPRE_BoomerAMGSetMaxCoarseSize(amg_solver, coarse_threshold);
3869 HYPRE_BoomerAMGSetMinCoarseSize(amg_solver, min_coarse_size);
3870 HYPRE_BoomerAMGSetTruncFactor(amg_solver, trunc_factor);
3871 HYPRE_BoomerAMGSetPMaxElmts(amg_solver, P_max_elmts);
3872 HYPRE_BoomerAMGSetJacobiTruncThreshold(amg_solver, jacobi_trunc_threshold);
3873 HYPRE_BoomerAMGSetSCommPkgSwitch(amg_solver, S_commpkg_switch);
3874 /* note: log is written to standard output, not to file */
3875 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 3);
3876 HYPRE_BoomerAMGSetPrintFileName(amg_solver, "driver.out.log");
3877 HYPRE_BoomerAMGSetMaxIter(amg_solver, mg_max_iter);
3878 HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
3879 HYPRE_BoomerAMGSetFCycle(amg_solver, fcycle);
3880 HYPRE_BoomerAMGSetNumSweeps(amg_solver, num_sweeps);
3881 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(amg_solver, relax_type);
3882 if (relax_down > -1)
3883 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_down, 1);
3884 if (relax_up > -1)
3885 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_up, 2);
3886 if (relax_coarse > -1)
3887 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_coarse, 3);
3888 HYPRE_BoomerAMGSetAddRelaxType(amg_solver, add_relax_type);
3889 HYPRE_BoomerAMGSetAddRelaxWt(amg_solver, add_relax_wt);
3890 HYPRE_BoomerAMGSetChebyOrder(amg_solver, cheby_order);
3891 HYPRE_BoomerAMGSetChebyFraction(amg_solver, cheby_fraction);
3892 HYPRE_BoomerAMGSetChebyEigEst(amg_solver, cheby_eig_est);
3893 HYPRE_BoomerAMGSetChebyVariant(amg_solver, cheby_variant);
3894 HYPRE_BoomerAMGSetChebyScale(amg_solver, cheby_scale);
3895 HYPRE_BoomerAMGSetRelaxOrder(amg_solver, relax_order);
3896 HYPRE_BoomerAMGSetRelaxWt(amg_solver, relax_wt);
3897 HYPRE_BoomerAMGSetOuterWt(amg_solver, outer_wt);
3898 if (level_w > -1)
3899 HYPRE_BoomerAMGSetLevelRelaxWt(amg_solver, relax_wt_level, level_w);
3900 if (level_ow > -1)
3901 HYPRE_BoomerAMGSetLevelOuterWt(amg_solver, outer_wt_level, level_ow);
3902 HYPRE_BoomerAMGSetSmoothType(amg_solver, smooth_type);
3903 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_solver, smooth_num_sweeps);
3904 HYPRE_BoomerAMGSetSmoothNumLevels(amg_solver, smooth_num_levels);
3905 HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
3906 HYPRE_BoomerAMGSetMaxRowSum(amg_solver, max_row_sum);
3907 HYPRE_BoomerAMGSetDebugFlag(amg_solver, debug_flag);
3908 HYPRE_BoomerAMGSetVariant(amg_solver, variant);
3909 HYPRE_BoomerAMGSetOverlap(amg_solver, overlap);
3910 HYPRE_BoomerAMGSetDomainType(amg_solver, domain_type);
3911 HYPRE_BoomerAMGSetSchwarzUseNonSymm(amg_solver, use_nonsymm_schwarz);
3912 HYPRE_BoomerAMGSetSchwarzRlxWeight(amg_solver, schwarz_rlx_weight);
3913 if (eu_level < 0) eu_level = 0;
3914 HYPRE_BoomerAMGSetEuLevel(amg_solver, eu_level);
3915 HYPRE_BoomerAMGSetEuBJ(amg_solver, eu_bj);
3916 HYPRE_BoomerAMGSetEuSparseA(amg_solver, eu_sparse_A);
3917 HYPRE_BoomerAMGSetNumFunctions(amg_solver, num_functions);
3918 HYPRE_BoomerAMGSetAggNumLevels(amg_solver, agg_num_levels);
3919 HYPRE_BoomerAMGSetAggInterpType(amg_solver, agg_interp_type);
3920 HYPRE_BoomerAMGSetAggTruncFactor(amg_solver, agg_trunc_factor);
3921 HYPRE_BoomerAMGSetAggP12TruncFactor(amg_solver, agg_P12_trunc_factor);
3922 HYPRE_BoomerAMGSetAggPMaxElmts(amg_solver, agg_P_max_elmts);
3923 HYPRE_BoomerAMGSetAggP12MaxElmts(amg_solver, agg_P12_max_elmts);
3924 HYPRE_BoomerAMGSetNumPaths(amg_solver, num_paths);
3925 HYPRE_BoomerAMGSetNodal(amg_solver, nodal);
3926 HYPRE_BoomerAMGSetNodalDiag(amg_solver, nodal_diag);
3927 if (num_functions > 1)
3928 HYPRE_BoomerAMGSetDofFunc(amg_solver, dof_func);
3929 HYPRE_BoomerAMGSetAdditive(amg_solver, additive);
3930 HYPRE_BoomerAMGSetMultAdditive(amg_solver, mult_add);
3931 HYPRE_BoomerAMGSetSimple(amg_solver, simple);
3932 HYPRE_BoomerAMGSetAddLastLvl(amg_solver, add_last_lvl);
3933 HYPRE_BoomerAMGSetMultAddPMaxElmts(amg_solver, add_P_max_elmts);
3934 HYPRE_BoomerAMGSetMultAddTruncFactor(amg_solver, add_trunc_factor);
3935 HYPRE_BoomerAMGSetRAP2(amg_solver, rap2);
3936 HYPRE_BoomerAMGSetModuleRAP2(amg_solver, mod_rap2);
3937 HYPRE_BoomerAMGSetKeepTranspose(amg_solver, keepTranspose);
3938 if (nongalerk_tol)
3939 {
3940 HYPRE_BoomerAMGSetNonGalerkinTol(amg_solver, nongalerk_tol[nongalerk_num_tol-1]);
3941 for (i=0; i < nongalerk_num_tol-1; i++)
3942 HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_solver, nongalerk_tol[i], i);
3943 }
3944
3945 HYPRE_BoomerAMGSetup(amg_solver, parcsr_A, b, x);
3946
3947 hypre_EndTiming(time_index);
3948 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
3949 hypre_FinalizeTiming(time_index);
3950 hypre_ClearTiming();
3951
3952 time_index = hypre_InitializeTiming("BoomerAMG Solve");
3953 hypre_BeginTiming(time_index);
3954
3955 HYPRE_BoomerAMGSolve(amg_solver, parcsr_A, b, x);
3956
3957 hypre_EndTiming(time_index);
3958 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
3959 hypre_FinalizeTiming(time_index);
3960 hypre_ClearTiming();
3961
3962 #if SECOND_TIME
3963 /* run a second time to check for memory leaks */
3964 HYPRE_ParVectorSetRandomValues(x, 775);
3965 HYPRE_BoomerAMGSetup(amg_solver, parcsr_A, b, x);
3966 HYPRE_BoomerAMGSolve(amg_solver, parcsr_A, b, x);
3967 #endif
3968
3969 HYPRE_BoomerAMGDestroy(amg_solver);
3970 }
3971
3972 if (solver_id == 999)
3973 {
3974 HYPRE_IJMatrix ij_M;
3975 HYPRE_ParCSRMatrix parcsr_mat;
3976
3977 /* use ParaSails preconditioner */
3978 if (myid == 0) hypre_printf("Test ParaSails Build IJMatrix\n");
3979
3980 HYPRE_IJMatrixPrint(ij_A, "parasails.in");
3981
3982 HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
3983 HYPRE_ParaSailsSetParams(pcg_precond, 0., 0);
3984 HYPRE_ParaSailsSetFilter(pcg_precond, 0.);
3985 HYPRE_ParaSailsSetLogging(pcg_precond, ioutdat);
3986
3987 HYPRE_IJMatrixGetObject( ij_A, &object);
3988 parcsr_mat = (HYPRE_ParCSRMatrix) object;
3989
3990 HYPRE_ParaSailsSetup(pcg_precond, parcsr_mat, NULL, NULL);
3991 HYPRE_ParaSailsBuildIJMatrix(pcg_precond, &ij_M);
3992 HYPRE_IJMatrixPrint(ij_M, "parasails.out");
3993
3994 if (myid == 0) hypre_printf("Printed to parasails.out.\n");
3995 exit(0);
3996 }
3997
3998 /*-----------------------------------------------------------
3999 * Solve the system using PCG
4000 *-----------------------------------------------------------*/
4001
4002 /* begin lobpcg */
4003 if (!lobpcgFlag && (solver_id == 1 || solver_id == 2 || solver_id == 8 ||
4004 solver_id == 12 || solver_id == 14 || solver_id == 43 || solver_id == 71))
4005 /*end lobpcg */
4006 {
4007 time_index = hypre_InitializeTiming("PCG Setup");
4008 hypre_BeginTiming(time_index);
4009
4010 HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
4011 HYPRE_PCGSetMaxIter(pcg_solver, max_iter);
4012 HYPRE_PCGSetTol(pcg_solver, tol);
4013 HYPRE_PCGSetTwoNorm(pcg_solver, 1);
4014 HYPRE_PCGSetRelChange(pcg_solver, rel_change);
4015 HYPRE_PCGSetPrintLevel(pcg_solver, ioutdat);
4016 HYPRE_PCGSetAbsoluteTol(pcg_solver, atol);
4017 HYPRE_PCGSetRecomputeResidual(pcg_solver, recompute_res);
4018
4019 if (solver_id == 1)
4020 {
4021 /* use BoomerAMG as preconditioner */
4022 if (myid == 0) hypre_printf("Solver: AMG-PCG\n");
4023 HYPRE_BoomerAMGCreate(&pcg_precond);
4024 /* BM Aug 25, 2006 */
4025 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
4026 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
4027 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
4028 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
4029 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
4030 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
4031 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
4032 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
4033 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
4034 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
4035 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
4036 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
4037 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
4038 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
4039 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
4040 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
4041 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
4042 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
4043 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
4044 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
4045 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
4046 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
4047 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
4048 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
4049 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
4050 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
4051 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
4052 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
4053 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
4054 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
4055 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
4056 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
4057 if (relax_down > -1)
4058 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
4059 if (relax_up > -1)
4060 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
4061 if (relax_coarse > -1)
4062 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
4063 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
4064 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
4065 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
4066 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
4067 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
4068 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
4069 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
4070 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
4071 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
4072 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
4073 if (level_w > -1)
4074 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
4075 if (level_ow > -1)
4076 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
4077 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
4078 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
4079 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
4080 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
4081 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
4082 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
4083 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
4084 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
4085 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
4086 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
4087 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
4088 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
4089 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
4090 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
4091 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
4092 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
4093 HYPRE_BoomerAMGSetKeepSameSign(pcg_precond, keep_same_sign);
4094 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
4095 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
4096 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
4097 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
4098 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
4099 if (eu_level < 0) eu_level = 0;
4100 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
4101 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
4102 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
4103 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
4104 if (num_functions > 1)
4105 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
4106 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
4107 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
4108 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
4109 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
4110 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
4111 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
4112 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
4113 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
4114 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
4115 #ifdef HYPRE_USING_DSUPERLU
4116 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
4117 #endif
4118 if (nongalerk_tol)
4119 {
4120 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
4121 for (i=0; i < nongalerk_num_tol-1; i++)
4122 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
4123 }
4124 if (build_rbm)
4125 {
4126 HYPRE_BoomerAMGSetInterpVectors(pcg_precond, num_interp_vecs, interp_vecs);
4127 HYPRE_BoomerAMGSetInterpVecVariant(pcg_precond, interp_vec_variant);
4128 HYPRE_BoomerAMGSetInterpVecQMax(pcg_precond, Q_max);
4129 HYPRE_BoomerAMGSetInterpVecAbsQTrunc(pcg_precond, Q_trunc);
4130 }
4131 HYPRE_PCGSetMaxIter(pcg_solver, mg_max_iter);
4132 HYPRE_PCGSetPrecond(pcg_solver,
4133 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
4134 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
4135 pcg_precond);
4136 }
4137 else if (solver_id == 2)
4138 {
4139
4140 /* use diagonal scaling as preconditioner */
4141 if (myid == 0) hypre_printf("Solver: DS-PCG\n");
4142 pcg_precond = NULL;
4143
4144 HYPRE_PCGSetPrecond(pcg_solver,
4145 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
4146 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
4147 pcg_precond);
4148 }
4149 else if (solver_id == 8)
4150 {
4151 /* use ParaSails preconditioner */
4152 if (myid == 0) hypre_printf("Solver: ParaSails-PCG\n");
4153
4154 HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
4155 HYPRE_ParaSailsSetParams(pcg_precond, sai_threshold, max_levels);
4156 HYPRE_ParaSailsSetFilter(pcg_precond, sai_filter);
4157 HYPRE_ParaSailsSetLogging(pcg_precond, poutdat);
4158
4159 HYPRE_PCGSetPrecond(pcg_solver,
4160 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
4161 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup,
4162 pcg_precond);
4163 }
4164 else if (solver_id == 12)
4165 {
4166 /* use Schwarz preconditioner */
4167 if (myid == 0) hypre_printf("Solver: Schwarz-PCG\n");
4168
4169 HYPRE_SchwarzCreate(&pcg_precond);
4170 HYPRE_SchwarzSetVariant(pcg_precond, variant);
4171 HYPRE_SchwarzSetOverlap(pcg_precond, overlap);
4172 HYPRE_SchwarzSetDomainType(pcg_precond, domain_type);
4173 HYPRE_SchwarzSetRelaxWeight(pcg_precond, schwarz_rlx_weight);
4174 HYPRE_SchwarzSetNonSymm(pcg_precond, use_nonsymm_schwarz);
4175 HYPRE_PCGSetPrecond(pcg_solver,
4176 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSolve,
4177 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSetup,
4178 pcg_precond);
4179 }
4180 else if (solver_id == 14)
4181 {
4182 /* use GSMG as preconditioner */
4183
4184 /* reset some smoother parameters */
4185
4186 /* fine grid */
4187 relax_order = 0;
4188
4189 if (myid == 0) hypre_printf("Solver: GSMG-PCG\n");
4190 HYPRE_BoomerAMGCreate(&pcg_precond);
4191 HYPRE_BoomerAMGSetGSMG(pcg_precond, 4);
4192 /* BM Aug 25, 2006 */
4193 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
4194 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
4195 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
4196 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
4197 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
4198 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
4199 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
4200 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
4201 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
4202 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
4203 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
4204 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
4205 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
4206 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
4207 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
4208 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
4209 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
4210 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
4211 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
4212 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
4213 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
4214 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
4215 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
4216 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
4217 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
4218 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
4219 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
4220 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
4221 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
4222 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
4223 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
4224 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
4225 if (relax_down > -1)
4226 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
4227 if (relax_up > -1)
4228 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
4229 if (relax_coarse > -1)
4230 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
4231 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
4232 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
4233 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
4234 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
4235 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
4236 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
4237 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
4238 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
4239 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
4240 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
4241 if (level_w > -1)
4242 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
4243 if (level_ow > -1)
4244 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
4245 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
4246 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
4247 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
4248 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
4249 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
4250 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
4251 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
4252 if (eu_level < 0) eu_level = 0;
4253 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
4254 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
4255 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
4256 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
4257 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
4258 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
4259 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
4260 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
4261 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
4262 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
4263 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
4264 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
4265 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
4266 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
4267 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
4268 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
4269 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
4270 if (num_functions > 1)
4271 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
4272 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
4273 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
4274 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
4275 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
4276 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
4277 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
4278 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
4279 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
4280 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
4281 #ifdef HYPRE_USING_DSUPERLU
4282 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
4283 #endif
4284 if (nongalerk_tol)
4285 {
4286 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
4287 for (i=0; i < nongalerk_num_tol-1; i++)
4288 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
4289 }
4290 HYPRE_PCGSetMaxIter(pcg_solver, mg_max_iter);
4291 HYPRE_PCGSetPrecond(pcg_solver,
4292 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
4293 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
4294 pcg_precond);
4295 }
4296 else if (solver_id == 43)
4297 {
4298 /* use Euclid preconditioning */
4299 if (myid == 0) hypre_printf("Solver: Euclid-PCG\n");
4300
4301 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
4302
4303 /* note: There are three three methods of setting run-time
4304 parameters for Euclid: (see HYPRE_parcsr_ls.h); here
4305 we'll use what I think is simplest: let Euclid internally
4306 parse the command line.
4307 */
4308 if (eu_level > -1) HYPRE_EuclidSetLevel(pcg_precond, eu_level);
4309 if (eu_ilut) HYPRE_EuclidSetILUT(pcg_precond, eu_ilut);
4310 if (eu_sparse_A) HYPRE_EuclidSetSparseA(pcg_precond, eu_sparse_A);
4311 if (eu_row_scale) HYPRE_EuclidSetRowScale(pcg_precond, eu_row_scale);
4312 if (eu_bj) HYPRE_EuclidSetBJ(pcg_precond, eu_bj);
4313 HYPRE_EuclidSetStats(pcg_precond, eu_stats);
4314 HYPRE_EuclidSetMem(pcg_precond, eu_mem);
4315
4316 /*HYPRE_EuclidSetParams(pcg_precond, argc, argv);*/
4317
4318 HYPRE_PCGSetPrecond(pcg_solver,
4319 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
4320 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
4321 pcg_precond);
4322 }
4323 else if( solver_id == 71 )
4324 {
4325 /* use MGR preconditioning */
4326 if (myid == 0) hypre_printf("Solver: MGR-PCG\n");
4327
4328 HYPRE_MGRCreate(&pcg_precond);
4329
4330 mgr_num_cindexes = hypre_CTAlloc(HYPRE_Int, mgr_nlevels, HYPRE_MEMORY_HOST);
4331 for(i=0; i<mgr_nlevels; i++)
4332 { /* assume 1 coarse index per level */
4333 mgr_num_cindexes[i] = 1;
4334 }
4335 mgr_cindexes = hypre_CTAlloc(HYPRE_Int*, mgr_nlevels, HYPRE_MEMORY_HOST);
4336 for(i=0; i<mgr_nlevels; i++)
4337 {
4338 mgr_cindexes[i] = hypre_CTAlloc(HYPRE_Int, mgr_num_cindexes[i], HYPRE_MEMORY_HOST);
4339 }
4340 for(i=0; i<mgr_nlevels; i++)
4341 { /* assume coarse point is at index 0 */
4342 mgr_cindexes[i][0] = 0;
4343 }
4344 mgr_reserved_coarse_indexes = hypre_CTAlloc(HYPRE_BigInt, mgr_num_reserved_nodes, HYPRE_MEMORY_HOST);
4345 for(i=0; i<mgr_num_reserved_nodes; i++)
4346 { /* generate artificial reserved nodes */
4347 mgr_reserved_coarse_indexes[i] = last_local_row - (HYPRE_BigInt) i; //2*i+1;
4348 }
4349
4350 /* set MGR data by block */
4351 HYPRE_MGRSetCpointsByBlock( pcg_precond, mgr_bsize, mgr_nlevels, mgr_num_cindexes,mgr_cindexes);
4352 /* set reserved coarse nodes */
4353 if(mgr_num_reserved_nodes)HYPRE_MGRSetReservedCoarseNodes(pcg_precond, mgr_num_reserved_nodes, mgr_reserved_coarse_indexes);
4354
4355 /* set intermediate coarse grid strategy */
4356 HYPRE_MGRSetNonCpointsToFpoints(pcg_precond, mgr_non_c_to_f);
4357 /* set F relaxation strategy */
4358 HYPRE_MGRSetFRelaxMethod(pcg_precond, mgr_frelax_method);
4359 /* set relax type for single level F-relaxation and post-relaxation */
4360 HYPRE_MGRSetRelaxType(pcg_precond, 0);
4361 HYPRE_MGRSetNumRelaxSweeps(pcg_precond, 2);
4362 /* set interpolation type */
4363 HYPRE_MGRSetInterpType(pcg_precond, 2);
4364 HYPRE_MGRSetNumInterpSweeps(pcg_precond, 2);
4365 /* set print level */
4366 HYPRE_MGRSetPrintLevel(pcg_precond, 1);
4367 /* set max iterations */
4368 HYPRE_MGRSetMaxIter(pcg_precond, 1);
4369 HYPRE_MGRSetTol(pcg_precond, pc_tol);
4370
4371 /* create AMG coarse grid solver */
4372
4373 HYPRE_BoomerAMGCreate(&amg_solver);
4374 /* BM Aug 25, 2006 */
4375 HYPRE_BoomerAMGSetCGCIts(amg_solver, cgcits);
4376 HYPRE_BoomerAMGSetInterpType(amg_solver, 0);
4377 HYPRE_BoomerAMGSetPostInterpType(amg_solver, post_interp_type);
4378 HYPRE_BoomerAMGSetCoarsenType(amg_solver, 6);
4379 HYPRE_BoomerAMGSetPMaxElmts(amg_solver, 0);
4380 /* note: log is written to standard output, not to file */
4381 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 1);
4382 HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
4383 HYPRE_BoomerAMGSetFCycle(amg_solver, fcycle);
4384 HYPRE_BoomerAMGSetNumSweeps(amg_solver, num_sweeps);
4385 HYPRE_BoomerAMGSetRelaxType(amg_solver, 3);
4386 if (relax_down > -1)
4387 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_down, 1);
4388 if (relax_up > -1)
4389 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_up, 2);
4390 if (relax_coarse > -1)
4391 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_coarse, 3);
4392 HYPRE_BoomerAMGSetRelaxOrder(amg_solver, 1);
4393 HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
4394 HYPRE_BoomerAMGSetSmoothType(amg_solver, smooth_type);
4395 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_solver, smooth_num_sweeps);
4396 HYPRE_BoomerAMGSetMaxIter(amg_solver, 1);
4397 HYPRE_BoomerAMGSetTol(amg_solver, 0.0);
4398
4399 /* set the MGR coarse solver. Comment out to use default CG solver in MGR */
4400 HYPRE_MGRSetCoarseSolver( pcg_precond, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, amg_solver);
4401
4402 /* setup MGR-PCG solver */
4403 HYPRE_PCGSetPrecond(pcg_solver,
4404 (HYPRE_PtrToSolverFcn) HYPRE_MGRSolve,
4405 (HYPRE_PtrToSolverFcn) HYPRE_MGRSetup,
4406 pcg_precond);
4407
4408 }
4409
4410 HYPRE_PCGGetPrecond(pcg_solver, &pcg_precond_gotten);
4411 if (pcg_precond_gotten != pcg_precond)
4412 {
4413 hypre_printf("HYPRE_ParCSRPCGGetPrecond got bad precond\n");
4414 return(-1);
4415 }
4416 else
4417 if (myid == 0)
4418 hypre_printf("HYPRE_ParCSRPCGGetPrecond got good precond\n");
4419
4420 HYPRE_PCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
4421 (HYPRE_Vector)b, (HYPRE_Vector)x);
4422 hypre_EndTiming(time_index);
4423 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
4424 hypre_FinalizeTiming(time_index);
4425 hypre_ClearTiming();
4426
4427 time_index = hypre_InitializeTiming("PCG Solve");
4428 hypre_BeginTiming(time_index);
4429
4430 HYPRE_PCGSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
4431 (HYPRE_Vector)b, (HYPRE_Vector)x);
4432
4433 hypre_EndTiming(time_index);
4434 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
4435 hypre_FinalizeTiming(time_index);
4436 hypre_ClearTiming();
4437
4438 HYPRE_PCGGetNumIterations(pcg_solver, &num_iterations);
4439 HYPRE_PCGGetFinalRelativeResidualNorm(pcg_solver, &final_res_norm);
4440
4441 #if SECOND_TIME
4442 /* run a second time to check for memory leaks */
4443 HYPRE_ParVectorSetRandomValues(x, 775);
4444 time_index = hypre_InitializeTiming("PCG Setup");
4445 hypre_BeginTiming(time_index);
4446
4447 HYPRE_PCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
4448 (HYPRE_Vector)b, (HYPRE_Vector)x);
4449
4450 hypre_EndTiming(time_index);
4451 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
4452 hypre_FinalizeTiming(time_index);
4453 hypre_ClearTiming();
4454
4455 time_index = hypre_InitializeTiming("PCG Solve");
4456 hypre_BeginTiming(time_index);
4457
4458 HYPRE_PCGSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
4459 (HYPRE_Vector)b, (HYPRE_Vector)x);
4460
4461 hypre_EndTiming(time_index);
4462 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
4463 hypre_FinalizeTiming(time_index);
4464 hypre_ClearTiming();
4465 #endif
4466
4467 HYPRE_ParCSRPCGDestroy(pcg_solver);
4468
4469 if (solver_id == 1)
4470 {
4471 HYPRE_BoomerAMGDestroy(pcg_precond);
4472 }
4473 else if (solver_id == 8)
4474 {
4475 HYPRE_ParaSailsDestroy(pcg_precond);
4476 }
4477 else if (solver_id == 12)
4478 {
4479 HYPRE_SchwarzDestroy(pcg_precond);
4480 }
4481 else if (solver_id == 14)
4482 {
4483 HYPRE_BoomerAMGDestroy(pcg_precond);
4484 }
4485 else if (solver_id == 43)
4486 {
4487 HYPRE_EuclidDestroy(pcg_precond);
4488 }
4489 else if(solver_id == 71)
4490 {
4491 /* free memory */
4492 if(mgr_num_cindexes)
4493 hypre_TFree(mgr_num_cindexes, HYPRE_MEMORY_HOST);
4494 mgr_num_cindexes = NULL;
4495
4496 if(mgr_reserved_coarse_indexes)
4497 hypre_TFree(mgr_reserved_coarse_indexes, HYPRE_MEMORY_HOST);
4498 mgr_reserved_coarse_indexes = NULL;
4499
4500 if(mgr_cindexes)
4501 {
4502 for( i=0; i<mgr_nlevels; i++)
4503 {
4504 if(mgr_cindexes[i])
4505 hypre_TFree(mgr_cindexes[i], HYPRE_MEMORY_HOST);
4506 }
4507 hypre_TFree(mgr_cindexes, HYPRE_MEMORY_HOST);
4508 mgr_cindexes = NULL;
4509 }
4510
4511 HYPRE_BoomerAMGDestroy(amg_solver);
4512 HYPRE_MGRDestroy(pcg_precond);
4513 }
4514
4515 if (myid == 0)
4516 {
4517 hypre_printf("\n");
4518 hypre_printf("Iterations = %d\n", num_iterations);
4519 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
4520 hypre_printf("\n");
4521 }
4522
4523 }
4524
4525 /* begin lobpcg */
4526
4527 /*-----------------------------------------------------------
4528 * Solve the eigenvalue problem using LOBPCG
4529 *-----------------------------------------------------------*/
4530 if ( lobpcgFlag )
4531 {
4532 interpreter = hypre_CTAlloc(mv_InterfaceInterpreter, 1, HYPRE_MEMORY_HOST);
4533
4534 HYPRE_ParCSRSetupInterpreter( interpreter );
4535 HYPRE_ParCSRSetupMatvec(&matvec_fn);
4536
4537 if (myid != 0)
4538 verbosity = 0;
4539
4540 if ( lobpcgGen ) {
4541 BuildParIsoLaplacian(argc, argv, &parcsr_B);
4542
4543 ierr = HYPRE_ParCSRMatrixGetLocalRange( parcsr_B,
4544 &first_local_row, &last_local_row ,
4545 &first_local_col, &last_local_col );
4546
4547 local_num_rows = (HYPRE_Int)(last_local_row - first_local_row + 1);
4548 local_num_cols = (HYPRE_Int)(last_local_col - first_local_col + 1);
4549 ierr += HYPRE_ParCSRMatrixGetDims( parcsr_B, &M, &N );
4550
4551 ierr += HYPRE_IJMatrixCreate( comm, first_local_row, last_local_row,
4552 first_local_col, last_local_col,
4553 &ij_B );
4554
4555 ierr += HYPRE_IJMatrixSetObjectType( ij_B, HYPRE_PARCSR );
4556
4557 if (sparsity_known == 1) {
4558 diag_sizes = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
4559 offdiag_sizes = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
4560 local_row = 0;
4561 for (big_i=first_local_row; big_i<= last_local_row; big_i++) {
4562 ierr += HYPRE_ParCSRMatrixGetRow( parcsr_B, big_i, &size,
4563 &col_inds, &values );
4564 for (j=0; j < size; j++)
4565 {
4566 if (col_inds[j] < first_local_row || col_inds[j] > last_local_row)
4567 offdiag_sizes[local_row]++;
4568 else
4569 diag_sizes[local_row]++;
4570 }
4571 local_row++;
4572 ierr += HYPRE_ParCSRMatrixRestoreRow( parcsr_B, big_i, &size,
4573 &col_inds, &values );
4574 }
4575 ierr += HYPRE_IJMatrixSetDiagOffdSizes( ij_B,
4576 (const HYPRE_Int *) diag_sizes,
4577 (const HYPRE_Int *) offdiag_sizes );
4578 hypre_TFree(diag_sizes, HYPRE_MEMORY_HOST);
4579 hypre_TFree(offdiag_sizes, HYPRE_MEMORY_HOST);
4580
4581 ierr = HYPRE_IJMatrixInitialize( ij_B );
4582
4583 for (big_i=first_local_row; big_i<= last_local_row; big_i++)
4584 {
4585 ierr += HYPRE_ParCSRMatrixGetRow( parcsr_B, big_i, &size,
4586 &col_inds, &values );
4587
4588 ierr += HYPRE_IJMatrixSetValues( ij_B, 1, &size, &big_i,
4589 (const HYPRE_BigInt *) col_inds,
4590 (const HYPRE_Real *) values );
4591
4592 ierr += HYPRE_ParCSRMatrixRestoreRow( parcsr_B, big_i, &size,
4593 &col_inds, &values );
4594 }
4595 }
4596 else
4597 {
4598 row_sizes = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
4599
4600 size = 5; /* this is in general too low, and supposed to test
4601 the capability of the reallocation of the interface */
4602
4603 if (sparsity_known == 0) /* tries a more accurate estimate of the
4604 storage */
4605 {
4606 if (build_matrix_type == 2) size = 7;
4607 if (build_matrix_type == 3) size = 9;
4608 if (build_matrix_type == 4) size = 27;
4609 }
4610
4611 for (i=0; i < local_num_rows; i++)
4612 row_sizes[i] = size;
4613
4614 ierr = HYPRE_IJMatrixSetRowSizes ( ij_B, (const HYPRE_Int *) row_sizes );
4615
4616 hypre_TFree(row_sizes, HYPRE_MEMORY_HOST);
4617
4618 ierr = HYPRE_IJMatrixInitialize( ij_B );
4619
4620 /* Loop through all locally stored rows and insert them into ij_matrix */
4621 for (big_i=first_local_row; big_i<= last_local_row; big_i++)
4622 {
4623 ierr += HYPRE_ParCSRMatrixGetRow( parcsr_B, big_i, &size,
4624 &col_inds, &values );
4625
4626 ierr += HYPRE_IJMatrixSetValues( ij_B, 1, &size, &big_i,
4627 (const HYPRE_BigInt *) col_inds,
4628 (const HYPRE_Real *) values );
4629
4630 ierr += HYPRE_ParCSRMatrixRestoreRow( parcsr_B, big_i, &size,
4631 &col_inds, &values );
4632 }
4633 }
4634
4635 ierr += HYPRE_IJMatrixAssemble( ij_B );
4636
4637 ierr += HYPRE_ParCSRMatrixDestroy(parcsr_B);
4638
4639 ierr += HYPRE_IJMatrixGetObject( ij_B, &object);
4640 parcsr_B = (HYPRE_ParCSRMatrix) object;
4641
4642 } /* if ( lobpcgGen ) */
4643
4644
4645 if ( pcgIterations > 0 ) /* do inner pcg iterations */
4646 {
4647 time_index = hypre_InitializeTiming("PCG Setup");
4648 hypre_BeginTiming(time_index);
4649
4650 HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
4651 HYPRE_PCGSetMaxIter(pcg_solver, pcgIterations);
4652 HYPRE_PCGSetTol(pcg_solver, pcgTol);
4653 HYPRE_PCGSetTwoNorm(pcg_solver, two_norm);
4654 HYPRE_PCGSetRelChange(pcg_solver, 0);
4655 HYPRE_PCGSetPrintLevel(pcg_solver, 0);
4656 HYPRE_PCGSetRecomputeResidual(pcg_solver, recompute_res);
4657
4658 HYPRE_PCGGetPrecond(pcg_solver, &pcg_precond);
4659
4660 if (solver_id == 1)
4661 {
4662 /* use BoomerAMG as preconditioner */
4663 if (myid == 0) hypre_printf("Solver: AMG-PCG\n");
4664 HYPRE_BoomerAMGCreate(&pcg_precond);
4665 /* BM Aug 25, 2006 */
4666 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
4667 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
4668 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
4669 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
4670 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
4671 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
4672 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
4673 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
4674 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
4675 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
4676 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
4677 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
4678 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
4679 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
4680 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
4681 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
4682 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
4683 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
4684 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
4685 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
4686 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
4687 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
4688 if (relax_down > -1)
4689 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
4690 if (relax_up > -1)
4691 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
4692 if (relax_coarse > -1)
4693 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
4694 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
4695 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
4696 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
4697 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
4698 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
4699 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
4700 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
4701 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
4702 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
4703 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
4704 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
4705 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
4706 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
4707 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
4708 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
4709 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
4710 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
4711 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
4712 if (num_functions > 1)
4713 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
4714 HYPRE_PCGSetPrecond(pcg_solver,
4715 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
4716 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
4717 pcg_precond);
4718 }
4719 else if (solver_id == 2)
4720 {
4721 /* use diagonal scaling as preconditioner */
4722 if (myid == 0) hypre_printf("Solver: DS-PCG\n");
4723 pcg_precond = NULL;
4724
4725 HYPRE_PCGSetPrecond(pcg_solver,
4726 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
4727 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
4728 pcg_precond);
4729 }
4730 else if (solver_id == 8)
4731 {
4732 /* use ParaSails preconditioner */
4733 if (myid == 0) hypre_printf("Solver: ParaSails-PCG\n");
4734
4735 HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
4736 HYPRE_ParaSailsSetParams(pcg_precond, sai_threshold, max_levels);
4737 HYPRE_ParaSailsSetFilter(pcg_precond, sai_filter);
4738 HYPRE_ParaSailsSetLogging(pcg_precond, poutdat);
4739
4740 HYPRE_PCGSetPrecond(pcg_solver,
4741 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
4742 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup,
4743 pcg_precond);
4744 }
4745 else if (solver_id == 12)
4746 {
4747 /* use Schwarz preconditioner */
4748 if (myid == 0) hypre_printf("Solver: Schwarz-PCG\n");
4749
4750 HYPRE_SchwarzCreate(&pcg_precond);
4751 HYPRE_SchwarzSetVariant(pcg_precond, variant);
4752 HYPRE_SchwarzSetOverlap(pcg_precond, overlap);
4753 HYPRE_SchwarzSetDomainType(pcg_precond, domain_type);
4754 HYPRE_SchwarzSetRelaxWeight(pcg_precond, schwarz_rlx_weight);
4755
4756 HYPRE_PCGSetPrecond(pcg_solver,
4757 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSolve,
4758 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSetup,
4759 pcg_precond);
4760 }
4761 else if (solver_id == 14)
4762 {
4763 /* use GSMG as preconditioner */
4764
4765 /* reset some smoother parameters */
4766
4767 num_sweeps = num_sweep;
4768 relax_type = relax_default;
4769 relax_order = 0;
4770
4771 if (myid == 0) hypre_printf("Solver: GSMG-PCG\n");
4772 HYPRE_BoomerAMGCreate(&pcg_precond);
4773 /* BM Aug 25, 2006 */
4774 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
4775 HYPRE_BoomerAMGSetGSMG(pcg_precond, 4);
4776 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
4777 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
4778 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
4779 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
4780 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
4781 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
4782 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
4783 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
4784 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
4785 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
4786 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
4787 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
4788 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
4789 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
4790 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
4791 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
4792 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
4793 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
4794 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
4795 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
4796 HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
4797 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
4798 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
4799 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
4800 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
4801 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
4802 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
4803 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
4804 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
4805 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
4806 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
4807 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
4808 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
4809 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
4810 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
4811 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
4812 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
4813 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
4814 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
4815 if (num_functions > 1)
4816 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
4817 HYPRE_PCGSetPrecond(pcg_solver,
4818 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
4819 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
4820 pcg_precond);
4821 }
4822 else if (solver_id == 43)
4823 {
4824 /* use Euclid preconditioning */
4825 if (myid == 0) hypre_printf("Solver: Euclid-PCG\n");
4826
4827 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
4828
4829 /* note: There are three three methods of setting run-time
4830 * parameters for Euclid: (see HYPRE_parcsr_ls.h); here
4831 * we'll use what I think is simplest: let Euclid internally
4832 * parse the command line.
4833 * */
4834 HYPRE_EuclidSetParams(pcg_precond, argc, argv);
4835
4836 HYPRE_PCGSetPrecond(pcg_solver,
4837 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
4838 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
4839 pcg_precond);
4840 }
4841 else if (solver_id != NO_SOLVER )
4842 {
4843 if ( verbosity )
4844 hypre_printf("Solver ID not recognized - running inner PCG iterations without preconditioner\n\n");
4845 }
4846
4847 HYPRE_PCGGetPrecond(pcg_solver, &pcg_precond_gotten);
4848 if (pcg_precond_gotten != pcg_precond)
4849 {
4850 hypre_printf("HYPRE_ParCSRPCGGetPrecond got bad precond\n");
4851 return(-1);
4852 }
4853 else
4854 if (myid == 0)
4855 hypre_printf("HYPRE_ParCSRPCGGetPrecond got good precond\n");
4856
4857 /* HYPRE_PCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
4858 * (HYPRE_Vector)b, (HYPRE_Vector)x); */
4859
4860 hypre_EndTiming(time_index);
4861 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
4862 hypre_FinalizeTiming(time_index);
4863 hypre_ClearTiming();
4864
4865 HYPRE_LOBPCGCreate(interpreter, &matvec_fn, &lobpcg_solver);
4866
4867 HYPRE_LOBPCGSetMaxIter(lobpcg_solver, maxIterations);
4868 HYPRE_LOBPCGSetPrecondUsageMode(lobpcg_solver, pcgMode);
4869 HYPRE_LOBPCGSetTol(lobpcg_solver, tol);
4870 HYPRE_LOBPCGSetPrintLevel(lobpcg_solver, verbosity);
4871
4872 HYPRE_LOBPCGSetPrecond(lobpcg_solver,
4873 (HYPRE_PtrToSolverFcn) HYPRE_PCGSolve,
4874 (HYPRE_PtrToSolverFcn) HYPRE_PCGSetup,
4875 pcg_solver);
4876
4877 HYPRE_LOBPCGSetupT(lobpcg_solver, (HYPRE_Matrix)parcsr_A,
4878 (HYPRE_Vector)x);
4879
4880 HYPRE_LOBPCGSetup(lobpcg_solver, (HYPRE_Matrix)parcsr_A,
4881 (HYPRE_Vector)b, (HYPRE_Vector)x);
4882
4883 if ( lobpcgGen )
4884 HYPRE_LOBPCGSetupB(lobpcg_solver, (HYPRE_Matrix)parcsr_B,
4885 (HYPRE_Vector)x);
4886
4887 if ( vFromFileFlag ) {
4888 eigenvectors = mv_MultiVectorWrap( interpreter,
4889 HYPRE_ParCSRMultiVectorRead(hypre_MPI_COMM_WORLD,
4890 interpreter,
4891 "vectors" ),1);
4892 hypre_assert( eigenvectors != NULL );
4893 blockSize = mv_MultiVectorWidth( eigenvectors );
4894 }
4895 else {
4896 eigenvectors = mv_MultiVectorCreateFromSampleVector( interpreter,
4897 blockSize,
4898 x );
4899 if ( lobpcgSeed )
4900 mv_MultiVectorSetRandom( eigenvectors, lobpcgSeed );
4901 else
4902 mv_MultiVectorSetRandom( eigenvectors, (HYPRE_Int)time(0) );
4903 }
4904
4905 if ( constrained ) {
4906 constraints = mv_MultiVectorWrap( interpreter,
4907 HYPRE_ParCSRMultiVectorRead(hypre_MPI_COMM_WORLD,
4908 interpreter,
4909 "vectors" ),1);
4910 hypre_assert( constraints != NULL );
4911 }
4912
4913 eigenvalues = hypre_CTAlloc(HYPRE_Real, blockSize, HYPRE_MEMORY_HOST);
4914
4915 time_index = hypre_InitializeTiming("LOBPCG Solve");
4916 hypre_BeginTiming(time_index);
4917
4918 HYPRE_LOBPCGSolve(lobpcg_solver, constraints, eigenvectors, eigenvalues );
4919
4920 hypre_EndTiming(time_index);
4921 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
4922 hypre_FinalizeTiming(time_index);
4923 hypre_ClearTiming();
4924
4925
4926 if ( checkOrtho ) {
4927
4928 gramXX = utilities_FortranMatrixCreate();
4929 identity = utilities_FortranMatrixCreate();
4930
4931 utilities_FortranMatrixAllocateData( blockSize, blockSize, gramXX );
4932 utilities_FortranMatrixAllocateData( blockSize, blockSize, identity );
4933
4934 if ( lobpcgGen ) {
4935 workspace = mv_MultiVectorCreateCopy( eigenvectors, 0 );
4936 hypre_LOBPCGMultiOperatorB( lobpcg_solver,
4937 mv_MultiVectorGetData(eigenvectors),
4938 mv_MultiVectorGetData(workspace) );
4939 lobpcg_MultiVectorByMultiVector( eigenvectors, workspace, gramXX );
4940 }
4941 else
4942 lobpcg_MultiVectorByMultiVector( eigenvectors, eigenvectors, gramXX );
4943
4944 utilities_FortranMatrixSetToIdentity( identity );
4945 utilities_FortranMatrixAdd( -1, identity, gramXX, gramXX );
4946 nonOrthF = utilities_FortranMatrixFNorm( gramXX );
4947 if ( myid == 0 )
4948 hypre_printf("Non-orthonormality of eigenvectors: %12.5e\n", nonOrthF);
4949
4950 utilities_FortranMatrixDestroy( gramXX );
4951 utilities_FortranMatrixDestroy( identity );
4952
4953 }
4954
4955 if ( printLevel ) {
4956 HYPRE_ParCSRMultiVectorPrint( mv_MultiVectorGetData(eigenvectors), "vectors" );
4957
4958 if ( myid == 0 ) {
4959 if ( (filePtr = fopen("values.txt", "w")) ) {
4960 hypre_fprintf(filePtr, "%d\n", blockSize);
4961 for ( i = 0; i < blockSize; i++ )
4962 hypre_fprintf(filePtr, "%22.14e\n", eigenvalues[i]);
4963 fclose(filePtr);
4964 }
4965
4966 if ( (filePtr = fopen("residuals.txt", "w")) ) {
4967 residualNorms = HYPRE_LOBPCGResidualNorms( lobpcg_solver );
4968 residuals = utilities_FortranMatrixValues( residualNorms );
4969 hypre_fprintf(filePtr, "%d\n", blockSize);
4970 for ( i = 0; i < blockSize; i++ )
4971 hypre_fprintf(filePtr, "%22.14e\n", residuals[i]);
4972 fclose(filePtr);
4973 }
4974
4975 if ( printLevel > 1 ) {
4976
4977 printBuffer = utilities_FortranMatrixCreate();
4978
4979 iterations = HYPRE_LOBPCGIterations( lobpcg_solver );
4980
4981 eigenvaluesHistory = HYPRE_LOBPCGEigenvaluesHistory( lobpcg_solver );
4982 utilities_FortranMatrixSelectBlock( eigenvaluesHistory,
4983 1, blockSize, 1, iterations + 1, printBuffer );
4984 utilities_FortranMatrixPrint( printBuffer, "val_hist.txt" );
4985 residualNormsHistory = HYPRE_LOBPCGResidualNormsHistory( lobpcg_solver );
4986 utilities_FortranMatrixSelectBlock(residualNormsHistory,
4987 1, blockSize, 1, iterations + 1, printBuffer );
4988 utilities_FortranMatrixPrint( printBuffer, "res_hist.txt" );
4989
4990 utilities_FortranMatrixDestroy( printBuffer );
4991 }
4992 }
4993 }
4994
4995 HYPRE_LOBPCGDestroy(lobpcg_solver);
4996 mv_MultiVectorDestroy( eigenvectors );
4997 if ( constrained )
4998 mv_MultiVectorDestroy( constraints );
4999 if ( lobpcgGen )
5000 mv_MultiVectorDestroy( workspace );
5001 hypre_TFree(eigenvalues, HYPRE_MEMORY_HOST);
5002
5003 HYPRE_ParCSRPCGDestroy(pcg_solver);
5004
5005 if (solver_id == 1)
5006 {
5007 HYPRE_BoomerAMGDestroy(pcg_precond);
5008 }
5009 else if (solver_id == 8)
5010 {
5011 HYPRE_ParaSailsDestroy(pcg_precond);
5012 }
5013 else if (solver_id == 12)
5014 {
5015 HYPRE_SchwarzDestroy(pcg_precond);
5016 }
5017 else if (solver_id == 14)
5018 {
5019 HYPRE_BoomerAMGDestroy(pcg_precond);
5020 }
5021 else if (solver_id == 43)
5022 {
5023 HYPRE_EuclidDestroy(pcg_precond);
5024 }
5025
5026 }
5027 else { /* pcgIterations <= 0 --> use the preconditioner directly */
5028
5029 time_index = hypre_InitializeTiming("LOBPCG Setup");
5030 hypre_BeginTiming(time_index);
5031 if (myid != 0)
5032 verbosity = 0;
5033 HYPRE_LOBPCGCreate(interpreter, &matvec_fn, &pcg_solver);
5034 HYPRE_LOBPCGSetMaxIter(pcg_solver, maxIterations);
5035 HYPRE_LOBPCGSetTol(pcg_solver, tol);
5036 HYPRE_LOBPCGSetPrintLevel(pcg_solver, verbosity);
5037
5038 HYPRE_LOBPCGGetPrecond(pcg_solver, &pcg_precond);
5039
5040 if (solver_id == 1)
5041 {
5042 /* use BoomerAMG as preconditioner */
5043 if (myid == 0)
5044 hypre_printf("Solver: AMG-PCG\n");
5045
5046 HYPRE_BoomerAMGCreate(&pcg_precond);
5047 /* BM Aug 25, 2006 */
5048 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
5049 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
5050 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
5051 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
5052 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
5053 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
5054 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
5055 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
5056 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
5057 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
5058 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
5059 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
5060 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
5061 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
5062 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
5063 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
5064 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
5065 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
5066 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
5067 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
5068 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
5069 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
5070 if (relax_down > -1)
5071 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
5072 if (relax_up > -1)
5073 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
5074 if (relax_coarse > -1)
5075 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
5076 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
5077 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
5078 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
5079 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
5080 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
5081 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
5082 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
5083 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
5084 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
5085 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
5086 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
5087 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
5088 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
5089 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
5090 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
5091 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
5092 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
5093 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
5094 if (num_functions > 1)
5095 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
5096 HYPRE_LOBPCGSetPrecond(pcg_solver,
5097 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
5098 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
5099 pcg_precond);
5100 }
5101 else if (solver_id == 2)
5102 {
5103
5104 /* use diagonal scaling as preconditioner */
5105 if (myid == 0)
5106 hypre_printf("Solver: DS-PCG\n");
5107
5108 pcg_precond = NULL;
5109
5110 HYPRE_LOBPCGSetPrecond(pcg_solver,
5111 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
5112 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
5113 pcg_precond);
5114 }
5115 else if (solver_id == 8)
5116 {
5117 /* use ParaSails preconditioner */
5118 if (myid == 0)
5119 hypre_printf("Solver: ParaSails-PCG\n");
5120
5121 HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
5122 HYPRE_ParaSailsSetParams(pcg_precond, sai_threshold, max_levels);
5123 HYPRE_ParaSailsSetFilter(pcg_precond, sai_filter);
5124 HYPRE_ParaSailsSetLogging(pcg_precond, poutdat);
5125
5126 HYPRE_LOBPCGSetPrecond(pcg_solver,
5127 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
5128 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup,
5129 pcg_precond);
5130 }
5131 else if (solver_id == 12)
5132 {
5133 /* use Schwarz preconditioner */
5134 if (myid == 0)
5135 hypre_printf("Solver: Schwarz-PCG\n");
5136
5137 HYPRE_SchwarzCreate(&pcg_precond);
5138 HYPRE_SchwarzSetVariant(pcg_precond, variant);
5139 HYPRE_SchwarzSetOverlap(pcg_precond, overlap);
5140 HYPRE_SchwarzSetDomainType(pcg_precond, domain_type);
5141 HYPRE_SchwarzSetRelaxWeight(pcg_precond, schwarz_rlx_weight);
5142
5143 HYPRE_LOBPCGSetPrecond(pcg_solver,
5144 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSolve,
5145 (HYPRE_PtrToSolverFcn) HYPRE_SchwarzSetup,
5146 pcg_precond);
5147 }
5148 else if (solver_id == 14)
5149 {
5150 /* use GSMG as preconditioner */
5151
5152 /* reset some smoother parameters */
5153
5154 num_sweeps = num_sweep;
5155 relax_type = relax_default;
5156 relax_order = 0;
5157
5158 if (myid == 0) hypre_printf("Solver: GSMG-PCG\n");
5159 HYPRE_BoomerAMGCreate(&pcg_precond);
5160 /* BM Aug 25, 2006 */
5161 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
5162 HYPRE_BoomerAMGSetGSMG(pcg_precond, 4);
5163 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
5164 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
5165 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
5166 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, (hybrid*coarsen_type));
5167 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
5168 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
5169 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
5170 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
5171 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
5172 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
5173 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
5174 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
5175 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
5176 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
5177 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
5178 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
5179 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
5180 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
5181 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
5182 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
5183 HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
5184 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
5185 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
5186 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
5187 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
5188 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
5189 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
5190 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
5191 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
5192 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
5193 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
5194 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
5195 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
5196 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
5197 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
5198 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
5199 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
5200 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
5201 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
5202 if (num_functions > 1)
5203 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
5204
5205 HYPRE_LOBPCGSetPrecond(pcg_solver,
5206 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
5207 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
5208 pcg_precond);
5209 }
5210 else if (solver_id == 43)
5211 {
5212 /* use Euclid preconditioning */
5213 if (myid == 0)
5214 hypre_printf("Solver: Euclid-PCG\n");
5215
5216 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
5217
5218 /* note: There are three three methods of setting run-time
5219 * parameters for Euclid: (see HYPRE_parcsr_ls.h); here we'll
5220 * use what I think is simplest: let Euclid internally parse
5221 * the command line. */
5222 HYPRE_EuclidSetParams(pcg_precond, argc, argv);
5223
5224 HYPRE_LOBPCGSetPrecond(pcg_solver,
5225 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
5226 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
5227 pcg_precond);
5228 }
5229 else if (solver_id != NO_SOLVER )
5230 {
5231 if ( verbosity )
5232 hypre_printf("Solver ID not recognized - running LOBPCG without preconditioner\n\n");
5233 }
5234
5235 HYPRE_LOBPCGGetPrecond(pcg_solver, &pcg_precond_gotten);
5236 if (pcg_precond_gotten != pcg_precond && pcgIterations)
5237 {
5238 hypre_printf("HYPRE_ParCSRLOBPCGGetPrecond got bad precond\n");
5239 return(-1);
5240 }
5241 else
5242 if (myid == 0)
5243 hypre_printf("HYPRE_ParCSRLOBPCGGetPrecond got good precond\n");
5244
5245 HYPRE_LOBPCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
5246 (HYPRE_Vector)b, (HYPRE_Vector)x);
5247
5248 if ( lobpcgGen )
5249 HYPRE_LOBPCGSetupB(pcg_solver, (HYPRE_Matrix)parcsr_B,
5250 (HYPRE_Vector)x);
5251
5252 hypre_EndTiming(time_index);
5253 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
5254 hypre_FinalizeTiming(time_index);
5255 hypre_ClearTiming();
5256
5257 if ( vFromFileFlag ) {
5258 eigenvectors = mv_MultiVectorWrap( interpreter,
5259 HYPRE_ParCSRMultiVectorRead(hypre_MPI_COMM_WORLD,
5260 interpreter,
5261 "vectors" ),1);
5262 hypre_assert( eigenvectors != NULL );
5263 blockSize = mv_MultiVectorWidth( eigenvectors );
5264 }
5265 else {
5266 eigenvectors = mv_MultiVectorCreateFromSampleVector( interpreter,
5267 blockSize,
5268 x );
5269 if ( lobpcgSeed )
5270 mv_MultiVectorSetRandom( eigenvectors, lobpcgSeed );
5271 else
5272 mv_MultiVectorSetRandom( eigenvectors, (HYPRE_Int)time(0) );
5273 }
5274
5275 if ( constrained ) {
5276 constraints = mv_MultiVectorWrap( interpreter,
5277 HYPRE_ParCSRMultiVectorRead(hypre_MPI_COMM_WORLD,
5278 interpreter,
5279 "vectors" ),1);
5280 hypre_assert( constraints != NULL );
5281 }
5282
5283 eigenvalues = hypre_CTAlloc(HYPRE_Real, blockSize, HYPRE_MEMORY_HOST);
5284
5285 time_index = hypre_InitializeTiming("LOBPCG Solve");
5286 hypre_BeginTiming(time_index);
5287
5288 HYPRE_LOBPCGSolve(pcg_solver, constraints, eigenvectors, eigenvalues);
5289
5290 hypre_EndTiming(time_index);
5291 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
5292 hypre_FinalizeTiming(time_index);
5293 hypre_ClearTiming();
5294
5295 if ( checkOrtho ) {
5296
5297 gramXX = utilities_FortranMatrixCreate();
5298 identity = utilities_FortranMatrixCreate();
5299
5300 utilities_FortranMatrixAllocateData( blockSize, blockSize, gramXX );
5301 utilities_FortranMatrixAllocateData( blockSize, blockSize, identity );
5302
5303 if ( lobpcgGen ) {
5304 workspace = mv_MultiVectorCreateCopy( eigenvectors, 0 );
5305 hypre_LOBPCGMultiOperatorB( pcg_solver,
5306 mv_MultiVectorGetData(eigenvectors),
5307 mv_MultiVectorGetData(workspace) );
5308 lobpcg_MultiVectorByMultiVector( eigenvectors, workspace, gramXX );
5309 }
5310 else
5311 lobpcg_MultiVectorByMultiVector( eigenvectors, eigenvectors, gramXX );
5312
5313 utilities_FortranMatrixSetToIdentity( identity );
5314 utilities_FortranMatrixAdd( -1, identity, gramXX, gramXX );
5315 nonOrthF = utilities_FortranMatrixFNorm( gramXX );
5316 if ( myid == 0 )
5317 hypre_printf("Non-orthonormality of eigenvectors: %12.5e\n", nonOrthF);
5318
5319 utilities_FortranMatrixDestroy( gramXX );
5320 utilities_FortranMatrixDestroy( identity );
5321
5322 }
5323
5324 if ( printLevel ) {
5325 HYPRE_ParCSRMultiVectorPrint( mv_MultiVectorGetData(eigenvectors), "vectors" );
5326
5327 if ( myid == 0 ) {
5328 if ( (filePtr = fopen("values.txt", "w")) ) {
5329 hypre_fprintf(filePtr, "%d\n", blockSize);
5330 for ( i = 0; i < blockSize; i++ )
5331 hypre_fprintf(filePtr, "%22.14e\n", eigenvalues[i]);
5332 fclose(filePtr);
5333 }
5334
5335 if ( (filePtr = fopen("residuals.txt", "w")) ) {
5336 residualNorms = HYPRE_LOBPCGResidualNorms( pcg_solver );
5337 residuals = utilities_FortranMatrixValues( residualNorms );
5338 hypre_fprintf(filePtr, "%d\n", blockSize);
5339 for ( i = 0; i < blockSize; i++ )
5340 hypre_fprintf(filePtr, "%22.14e\n", residuals[i]);
5341 fclose(filePtr);
5342 }
5343
5344 if ( printLevel > 1 ) {
5345
5346 printBuffer = utilities_FortranMatrixCreate();
5347
5348 iterations = HYPRE_LOBPCGIterations( pcg_solver );
5349
5350 eigenvaluesHistory = HYPRE_LOBPCGEigenvaluesHistory( pcg_solver );
5351 utilities_FortranMatrixSelectBlock( eigenvaluesHistory,
5352 1, blockSize, 1, iterations + 1, printBuffer );
5353 utilities_FortranMatrixPrint( printBuffer, "val_hist.txt" );
5354
5355 residualNormsHistory = HYPRE_LOBPCGResidualNormsHistory( pcg_solver );
5356 utilities_FortranMatrixSelectBlock(residualNormsHistory,
5357 1, blockSize, 1, iterations + 1, printBuffer );
5358 utilities_FortranMatrixPrint( printBuffer, "res_hist.txt" );
5359
5360 utilities_FortranMatrixDestroy( printBuffer );
5361 }
5362 }
5363 }
5364
5365 #if SECOND_TIME
5366 /* run a second time to check for memory leaks */
5367 mv_MultiVectorSetRandom( eigenvectors, 775 );
5368 HYPRE_LOBPCGSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
5369 (HYPRE_Vector)b, (HYPRE_Vector)x);
5370 HYPRE_LOBPCGSolve(pcg_solver, constraints, eigenvectors, eigenvalues );
5371 #endif
5372
5373 HYPRE_LOBPCGDestroy(pcg_solver);
5374
5375 if (solver_id == 1)
5376 {
5377 HYPRE_BoomerAMGDestroy(pcg_precond);
5378 }
5379 else if (solver_id == 8)
5380 {
5381 HYPRE_ParaSailsDestroy(pcg_precond);
5382 }
5383 else if (solver_id == 12)
5384 {
5385 HYPRE_SchwarzDestroy(pcg_precond);
5386 }
5387 else if (solver_id == 14)
5388 {
5389 HYPRE_BoomerAMGDestroy(pcg_precond);
5390 }
5391 else if (solver_id == 43)
5392 {
5393 HYPRE_EuclidDestroy(pcg_precond);
5394 }
5395
5396 mv_MultiVectorDestroy( eigenvectors );
5397 if ( constrained )
5398 mv_MultiVectorDestroy( constraints );
5399 if ( lobpcgGen )
5400 mv_MultiVectorDestroy( workspace );
5401 hypre_TFree(eigenvalues, HYPRE_MEMORY_HOST);
5402 } /* if ( pcgIterations > 0 ) */
5403
5404 hypre_TFree( interpreter , HYPRE_MEMORY_HOST);
5405
5406 if ( lobpcgGen )
5407 HYPRE_IJMatrixDestroy(ij_B);
5408
5409 } /* if ( lobpcgFlag ) */
5410
5411 /* end lobpcg */
5412
5413 /*-----------------------------------------------------------
5414 * Solve the system using GMRES
5415 *-----------------------------------------------------------*/
5416
5417 if (solver_id == 3 || solver_id == 4 || solver_id == 7 ||
5418 solver_id == 15 || solver_id == 18 || solver_id == 44 ||
5419 solver_id == 81 || solver_id == 91)
5420 {
5421 time_index = hypre_InitializeTiming("GMRES Setup");
5422 hypre_BeginTiming(time_index);
5423
5424 HYPRE_ParCSRGMRESCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
5425 HYPRE_GMRESSetKDim(pcg_solver, k_dim);
5426 HYPRE_GMRESSetMaxIter(pcg_solver, max_iter);
5427 HYPRE_GMRESSetTol(pcg_solver, tol);
5428 HYPRE_GMRESSetAbsoluteTol(pcg_solver, atol);
5429 HYPRE_GMRESSetLogging(pcg_solver, 1);
5430 HYPRE_GMRESSetPrintLevel(pcg_solver, ioutdat);
5431 HYPRE_GMRESSetRelChange(pcg_solver, rel_change);
5432
5433 if (solver_id == 3 || solver_id == 91)
5434 {
5435 if (solver_id == 3)
5436 {
5437 /* use BoomerAMG as preconditioner */
5438 if (myid == 0) hypre_printf("Solver: AMG-GMRES\n");
5439 HYPRE_BoomerAMGCreate(&amg_precond);
5440 }
5441 else
5442 {
5443 /* use BoomerAMG-DD as preconditioner */
5444 if (myid == 0) hypre_printf("Solver: AMG-DD-GMRES\n");
5445
5446 HYPRE_BoomerAMGDDCreate(&pcg_precond);
5447 HYPRE_BoomerAMGDDGetAMG(pcg_precond, &amg_precond);
5448
5449 /* AMG-DD options */
5450 HYPRE_BoomerAMGDDSetStartLevel(pcg_precond, amgdd_start_level);
5451 HYPRE_BoomerAMGDDSetPadding(pcg_precond, amgdd_padding);
5452 HYPRE_BoomerAMGDDSetFACNumRelax(pcg_precond, amgdd_fac_num_relax);
5453 HYPRE_BoomerAMGDDSetFACNumCycles(pcg_precond, amgdd_num_comp_cycles);
5454 HYPRE_BoomerAMGDDSetFACRelaxType(pcg_precond, amgdd_fac_relax_type);
5455 HYPRE_BoomerAMGDDSetFACCycleType(pcg_precond, amgdd_fac_cycle_type);
5456 HYPRE_BoomerAMGDDSetNumGhostLayers(pcg_precond, amgdd_num_ghost_layers);
5457 }
5458
5459 if (air)
5460 {
5461 /* RL: specify restriction */
5462 hypre_assert(restri_type >= 0);
5463 HYPRE_BoomerAMGSetRestriction(amg_precond, restri_type); /* 0: P^T, 1: AIR, 2: AIR-2 */
5464 HYPRE_BoomerAMGSetGridRelaxPoints(amg_precond, grid_relax_points);
5465 HYPRE_BoomerAMGSetStrongThresholdR(amg_precond, strong_thresholdR);
5466 HYPRE_BoomerAMGSetFilterThresholdR(amg_precond, filter_thresholdR);
5467 }
5468
5469 HYPRE_BoomerAMGSetCGCIts(amg_precond, cgcits);
5470 HYPRE_BoomerAMGSetInterpType(amg_precond, interp_type);
5471 HYPRE_BoomerAMGSetRestriction(amg_precond, restri_type); /* 0: P^T, 1: AIR, 2: AIR-2 */
5472 HYPRE_BoomerAMGSetPostInterpType(amg_precond, post_interp_type);
5473 HYPRE_BoomerAMGSetNumSamples(amg_precond, gsmg_samples);
5474 HYPRE_BoomerAMGSetTol(amg_precond, pc_tol);
5475 HYPRE_BoomerAMGSetCoarsenType(amg_precond, coarsen_type);
5476 HYPRE_BoomerAMGSetCoarsenCutFactor(amg_precond, coarsen_cut_factor);
5477 HYPRE_BoomerAMGSetCPoints(amg_precond, max_levels, num_cpt, cpt_index);
5478 HYPRE_BoomerAMGSetFPoints(amg_precond, num_fpt, fpt_index);
5479 HYPRE_BoomerAMGSetIsolatedFPoints(amg_precond, num_isolated_fpt, isolated_fpt_index);
5480 HYPRE_BoomerAMGSetMeasureType(amg_precond, measure_type);
5481 HYPRE_BoomerAMGSetStrongThreshold(amg_precond, strong_threshold);
5482 HYPRE_BoomerAMGSetSeqThreshold(amg_precond, seq_threshold);
5483 HYPRE_BoomerAMGSetRedundant(amg_precond, redundant);
5484 HYPRE_BoomerAMGSetMaxCoarseSize(amg_precond, coarse_threshold);
5485 HYPRE_BoomerAMGSetMinCoarseSize(amg_precond, min_coarse_size);
5486 HYPRE_BoomerAMGSetTruncFactor(amg_precond, trunc_factor);
5487 HYPRE_BoomerAMGSetPMaxElmts(amg_precond, P_max_elmts);
5488 HYPRE_BoomerAMGSetJacobiTruncThreshold(amg_precond, jacobi_trunc_threshold);
5489 HYPRE_BoomerAMGSetSCommPkgSwitch(amg_precond, S_commpkg_switch);
5490 HYPRE_BoomerAMGSetPrintLevel(amg_precond, poutdat);
5491 HYPRE_BoomerAMGSetPrintFileName(amg_precond, "driver.out.log");
5492 HYPRE_BoomerAMGSetMaxIter(amg_precond, 1);
5493 HYPRE_BoomerAMGSetCycleType(amg_precond, cycle_type);
5494 HYPRE_BoomerAMGSetFCycle(amg_precond, fcycle);
5495 HYPRE_BoomerAMGSetNumSweeps(amg_precond, num_sweeps);
5496 HYPRE_BoomerAMGSetISType(amg_precond, IS_type);
5497 HYPRE_BoomerAMGSetNumCRRelaxSteps(amg_precond, num_CR_relax_steps);
5498 HYPRE_BoomerAMGSetCRRate(amg_precond, CR_rate);
5499 HYPRE_BoomerAMGSetCRStrongTh(amg_precond, CR_strong_th);
5500 HYPRE_BoomerAMGSetCRUseCG(amg_precond, CR_use_CG);
5501 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(amg_precond, relax_type);
5502 if (relax_down > -1)
5503 HYPRE_BoomerAMGSetCycleRelaxType(amg_precond, relax_down, 1);
5504 if (relax_up > -1)
5505 HYPRE_BoomerAMGSetCycleRelaxType(amg_precond, relax_up, 2);
5506 if (relax_coarse > -1)
5507 HYPRE_BoomerAMGSetCycleRelaxType(amg_precond, relax_coarse, 3);
5508 HYPRE_BoomerAMGSetAddRelaxType(amg_precond, add_relax_type);
5509 HYPRE_BoomerAMGSetAddRelaxWt(amg_precond, add_relax_wt);
5510 HYPRE_BoomerAMGSetChebyOrder(amg_precond, cheby_order);
5511 HYPRE_BoomerAMGSetChebyFraction(amg_precond, cheby_fraction);
5512 HYPRE_BoomerAMGSetChebyEigEst(amg_precond, cheby_eig_est);
5513 HYPRE_BoomerAMGSetChebyVariant(amg_precond, cheby_variant);
5514 HYPRE_BoomerAMGSetChebyScale(amg_precond, cheby_scale);
5515 HYPRE_BoomerAMGSetRelaxOrder(amg_precond, relax_order);
5516 HYPRE_BoomerAMGSetRelaxWt(amg_precond, relax_wt);
5517 HYPRE_BoomerAMGSetOuterWt(amg_precond, outer_wt);
5518 if (level_w > -1)
5519 HYPRE_BoomerAMGSetLevelRelaxWt(amg_precond, relax_wt_level,level_w);
5520 if (level_ow > -1)
5521 HYPRE_BoomerAMGSetLevelOuterWt(amg_precond,outer_wt_level,level_ow);
5522 HYPRE_BoomerAMGSetSmoothType(amg_precond, smooth_type);
5523 HYPRE_BoomerAMGSetSmoothNumLevels(amg_precond, smooth_num_levels);
5524 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_precond, smooth_num_sweeps);
5525 HYPRE_BoomerAMGSetMaxLevels(amg_precond, max_levels);
5526 HYPRE_BoomerAMGSetMaxRowSum(amg_precond, max_row_sum);
5527 HYPRE_BoomerAMGSetDebugFlag(amg_precond, debug_flag);
5528 HYPRE_BoomerAMGSetNumFunctions(amg_precond, num_functions);
5529 HYPRE_BoomerAMGSetAggNumLevels(amg_precond, agg_num_levels);
5530 HYPRE_BoomerAMGSetAggInterpType(amg_precond, agg_interp_type);
5531 HYPRE_BoomerAMGSetAggTruncFactor(amg_precond, agg_trunc_factor);
5532 HYPRE_BoomerAMGSetAggP12TruncFactor(amg_precond, agg_P12_trunc_factor);
5533 HYPRE_BoomerAMGSetAggPMaxElmts(amg_precond, agg_P_max_elmts);
5534 HYPRE_BoomerAMGSetAggP12MaxElmts(amg_precond, agg_P12_max_elmts);
5535 HYPRE_BoomerAMGSetNumPaths(amg_precond, num_paths);
5536 HYPRE_BoomerAMGSetNodal(amg_precond, nodal);
5537 HYPRE_BoomerAMGSetNodalDiag(amg_precond, nodal_diag);
5538 HYPRE_BoomerAMGSetVariant(amg_precond, variant);
5539 HYPRE_BoomerAMGSetOverlap(amg_precond, overlap);
5540 HYPRE_BoomerAMGSetDomainType(amg_precond, domain_type);
5541 HYPRE_BoomerAMGSetSchwarzUseNonSymm(amg_precond, use_nonsymm_schwarz);
5542 HYPRE_BoomerAMGSetSchwarzRlxWeight(amg_precond, schwarz_rlx_weight);
5543 if (eu_level < 0) eu_level = 0;
5544 HYPRE_BoomerAMGSetEuLevel(amg_precond, eu_level);
5545 HYPRE_BoomerAMGSetEuBJ(amg_precond, eu_bj);
5546 HYPRE_BoomerAMGSetEuSparseA(amg_precond, eu_sparse_A);
5547 HYPRE_BoomerAMGSetCycleNumSweeps(amg_precond, ns_coarse, 3);
5548 if (ns_down > -1)
5549 {
5550 HYPRE_BoomerAMGSetCycleNumSweeps(amg_precond, ns_down, 1);
5551 }
5552 if (ns_up > -1)
5553 {
5554 HYPRE_BoomerAMGSetCycleNumSweeps(amg_precond, ns_up, 2);
5555 }
5556 if (num_functions > 1)
5557 HYPRE_BoomerAMGSetDofFunc(amg_precond, dof_func);
5558 HYPRE_BoomerAMGSetAdditive(amg_precond, additive);
5559 HYPRE_BoomerAMGSetMultAdditive(amg_precond, mult_add);
5560 HYPRE_BoomerAMGSetSimple(amg_precond, simple);
5561 HYPRE_BoomerAMGSetAddLastLvl(amg_precond, add_last_lvl);
5562 HYPRE_BoomerAMGSetMultAddPMaxElmts(amg_precond, add_P_max_elmts);
5563 HYPRE_BoomerAMGSetMultAddTruncFactor(amg_precond, add_trunc_factor);
5564 HYPRE_BoomerAMGSetRAP2(amg_precond, rap2);
5565 HYPRE_BoomerAMGSetModuleRAP2(amg_precond, mod_rap2);
5566 HYPRE_BoomerAMGSetKeepTranspose(amg_precond, keepTranspose);
5567 #ifdef HYPRE_USING_DSUPERLU
5568 HYPRE_BoomerAMGSetDSLUThreshold(amg_precond, dslu_threshold);
5569 #endif
5570 if (nongalerk_tol)
5571 {
5572 HYPRE_BoomerAMGSetNonGalerkinTol(amg_precond, nongalerk_tol[nongalerk_num_tol-1]);
5573 for (i=0; i < nongalerk_num_tol-1; i++)
5574 HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_precond, nongalerk_tol[i], i);
5575 }
5576 if (build_rbm)
5577 {
5578 HYPRE_BoomerAMGSetInterpVectors(amg_precond, 1, interp_vecs);
5579 HYPRE_BoomerAMGSetInterpVecVariant(amg_precond, interp_vec_variant);
5580 HYPRE_BoomerAMGSetInterpVecQMax(amg_precond, Q_max);
5581 HYPRE_BoomerAMGSetInterpVecAbsQTrunc(amg_precond, Q_trunc);
5582 }
5583
5584 HYPRE_GMRESSetMaxIter(pcg_solver, mg_max_iter);
5585
5586 if (solver_id == 3)
5587 {
5588 HYPRE_GMRESSetPrecond(pcg_solver,
5589 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
5590 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
5591 amg_precond);
5592 }
5593 else if (solver_id == 91)
5594 {
5595 HYPRE_GMRESSetPrecond(pcg_solver,
5596 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGDDSolve,
5597 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGDDSetup,
5598 pcg_precond);
5599 }
5600 }
5601 else if (solver_id == 4)
5602 {
5603 /* use diagonal scaling as preconditioner */
5604 if (myid == 0) hypre_printf("Solver: DS-GMRES\n");
5605 pcg_precond = NULL;
5606
5607 HYPRE_GMRESSetPrecond(pcg_solver,
5608 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
5609 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
5610 pcg_precond);
5611 }
5612 else if (solver_id == 7)
5613 {
5614 /* use PILUT as preconditioner */
5615 if (myid == 0) hypre_printf("Solver: PILUT-GMRES\n");
5616
5617 ierr = HYPRE_ParCSRPilutCreate( hypre_MPI_COMM_WORLD, &pcg_precond );
5618 if (ierr) {
5619 hypre_printf("Error in ParPilutCreate\n");
5620 }
5621
5622 HYPRE_GMRESSetPrecond(pcg_solver,
5623 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRPilutSolve,
5624 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRPilutSetup,
5625 pcg_precond);
5626
5627 if (drop_tol >= 0 )
5628 HYPRE_ParCSRPilutSetDropTolerance( pcg_precond,
5629 drop_tol );
5630
5631 if (nonzeros_to_keep >= 0 )
5632 HYPRE_ParCSRPilutSetFactorRowSize( pcg_precond,
5633 nonzeros_to_keep );
5634 }
5635 else if (solver_id == 15)
5636 {
5637 /* use GSMG as preconditioner */
5638
5639 /* reset some smoother parameters */
5640
5641 relax_order = 0;
5642
5643 if (myid == 0) hypre_printf("Solver: GSMG-GMRES\n");
5644 HYPRE_BoomerAMGCreate(&pcg_precond);
5645 HYPRE_BoomerAMGSetGSMG(pcg_precond, 4);
5646 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
5647 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
5648 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
5649 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
5650 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
5651 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
5652 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
5653 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
5654 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
5655 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
5656 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
5657 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
5658 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
5659 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
5660 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
5661 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
5662 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
5663 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
5664 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
5665 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
5666 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
5667 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
5668 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
5669 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
5670 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
5671 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
5672 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
5673 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
5674 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
5675 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
5676 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
5677 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
5678 if (relax_down > -1)
5679 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
5680 if (relax_up > -1)
5681 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
5682 if (relax_coarse > -1)
5683 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
5684 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
5685 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
5686 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
5687 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
5688 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
5689 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
5690 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
5691 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
5692 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
5693 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
5694 if (level_w > -1)
5695 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
5696 if (level_ow > -1)
5697 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
5698 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
5699 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
5700 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
5701 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
5702 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
5703 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
5704 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
5705 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
5706 if (eu_level < 0) eu_level = 0;
5707 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
5708 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
5709 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
5710 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
5711 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
5712 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
5713 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
5714 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
5715 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
5716 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
5717 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
5718 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
5719 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
5720 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
5721 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
5722 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
5723 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
5724 if (num_functions > 1)
5725 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
5726 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
5727 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
5728 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
5729 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
5730 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
5731 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
5732 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
5733 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
5734 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
5735 #ifdef HYPRE_USING_DSUPERLU
5736 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
5737 #endif
5738 if (nongalerk_tol)
5739 {
5740 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
5741 for (i=0; i < nongalerk_num_tol-1; i++)
5742 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
5743 }
5744 HYPRE_GMRESSetMaxIter(pcg_solver, mg_max_iter);
5745 HYPRE_GMRESSetPrecond(pcg_solver,
5746 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
5747 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
5748 pcg_precond);
5749 }
5750 else if (solver_id == 18)
5751 {
5752 /* use ParaSails preconditioner */
5753 if (myid == 0) hypre_printf("Solver: ParaSails-GMRES\n");
5754
5755 HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
5756 HYPRE_ParaSailsSetParams(pcg_precond, sai_threshold, max_levels);
5757 HYPRE_ParaSailsSetFilter(pcg_precond, sai_filter);
5758 HYPRE_ParaSailsSetLogging(pcg_precond, poutdat);
5759 HYPRE_ParaSailsSetSym(pcg_precond, 0);
5760
5761 HYPRE_GMRESSetPrecond(pcg_solver,
5762 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
5763 (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup,
5764 pcg_precond);
5765 }
5766 else if (solver_id == 44)
5767 {
5768 /* use Euclid preconditioning */
5769 if (myid == 0) hypre_printf("Solver: Euclid-GMRES\n");
5770
5771 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
5772
5773 if (eu_level > -1) HYPRE_EuclidSetLevel(pcg_precond, eu_level);
5774 if (eu_ilut) HYPRE_EuclidSetILUT(pcg_precond, eu_ilut);
5775 if (eu_sparse_A) HYPRE_EuclidSetSparseA(pcg_precond, eu_sparse_A);
5776 if (eu_row_scale) HYPRE_EuclidSetRowScale(pcg_precond, eu_row_scale);
5777 if (eu_bj) HYPRE_EuclidSetBJ(pcg_precond, eu_bj);
5778 HYPRE_EuclidSetStats(pcg_precond, eu_stats);
5779 HYPRE_EuclidSetMem(pcg_precond, eu_mem);
5780 /*HYPRE_EuclidSetParams(pcg_precond, argc, argv);*/
5781
5782 HYPRE_GMRESSetPrecond (pcg_solver,
5783 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
5784 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
5785 pcg_precond);
5786 }
5787 else if (solver_id == 81)
5788 {
5789 /* use hypre_ILU preconditioning */
5790 if (myid == 0) hypre_printf("Solver: ILU-GMRES\n");
5791
5792 /* create precon */
5793 HYPRE_ILUCreate(&pcg_precond);
5794 HYPRE_ILUSetType(pcg_precond, ilu_type);
5795 HYPRE_ILUSetLevelOfFill(pcg_precond, ilu_lfil);
5796 /* set print level */
5797 HYPRE_ILUSetPrintLevel(pcg_precond, 1);
5798 /* set max iterations */
5799 HYPRE_ILUSetMaxIter(pcg_precond, 1);
5800 HYPRE_ILUSetTol(pcg_precond, pc_tol);
5801 /* set max number of nonzeros per row */
5802 HYPRE_ILUSetMaxNnzPerRow(pcg_precond,ilu_max_row_nnz);
5803 /* set the droptol */
5804 HYPRE_ILUSetDropThreshold(pcg_precond,ilu_droptol);
5805 /* set max iterations for Schur system solve */
5806 HYPRE_ILUSetSchurMaxIter( pcg_precond, ilu_schur_max_iter );
5807 if(ilu_type == 20 || ilu_type == 21)
5808 {
5809 HYPRE_ILUSetNSHDropThreshold( pcg_precond, ilu_nsh_droptol);
5810 }
5811
5812 /* setup ILU-GMRES solver */
5813 HYPRE_GMRESSetMaxIter(pcg_solver, mg_max_iter);
5814 HYPRE_GMRESSetPrecond(pcg_solver,
5815 (HYPRE_PtrToSolverFcn) HYPRE_ILUSolve,
5816 (HYPRE_PtrToSolverFcn) HYPRE_ILUSetup,
5817 pcg_precond);
5818 }
5819
5820 HYPRE_GMRESGetPrecond(pcg_solver, &pcg_precond_gotten);
5821 if (pcg_precond_gotten != ((solver_id == 3) ? amg_precond : pcg_precond))
5822 {
5823 hypre_printf("HYPRE_GMRESGetPrecond got bad precond\n");
5824 return(-1);
5825 }
5826 else
5827 {
5828 if (myid == 0)
5829 {
5830 hypre_printf("HYPRE_GMRESGetPrecond got good precond\n");
5831 }
5832 }
5833 HYPRE_GMRESSetup(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
5834
5835 hypre_EndTiming(time_index);
5836 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
5837 hypre_FinalizeTiming(time_index);
5838 hypre_ClearTiming();
5839
5840 time_index = hypre_InitializeTiming("GMRES Solve");
5841 hypre_BeginTiming(time_index);
5842
5843 HYPRE_GMRESSolve(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
5844
5845 hypre_EndTiming(time_index);
5846 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
5847 hypre_FinalizeTiming(time_index);
5848 hypre_ClearTiming();
5849
5850 HYPRE_GMRESGetNumIterations(pcg_solver, &num_iterations);
5851 HYPRE_GMRESGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
5852
5853 if (check_residual)
5854 {
5855 HYPRE_BigInt *indices_h, *indices_d;
5856 HYPRE_Complex *values_h, *values_d;
5857 HYPRE_Int num_values = 20;
5858 HYPRE_ParCSRGMRESGetResidual(pcg_solver, &residual);
5859 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
5860 &first_local_row, &last_local_row ,
5861 &first_local_col, &last_local_col );
5862 local_num_rows = (HYPRE_Int)(last_local_row - first_local_row + 1);
5863 if (local_num_rows < 20)
5864 {
5865 num_values = local_num_rows;
5866 }
5867 indices_h = hypre_TAlloc(HYPRE_BigInt, num_values, HYPRE_MEMORY_HOST);
5868 values_h = hypre_TAlloc(HYPRE_Complex, num_values, HYPRE_MEMORY_HOST);
5869 indices_d = hypre_TAlloc(HYPRE_BigInt, num_values, HYPRE_MEMORY_DEVICE);
5870 values_d = hypre_TAlloc(HYPRE_Complex, num_values, HYPRE_MEMORY_DEVICE);
5871 for (i = 0; i < num_values; i++)
5872 {
5873 indices_h[i] = first_local_row + i;
5874 }
5875 hypre_TMemcpy(indices_d, indices_h, HYPRE_BigInt, num_values, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST);
5876
5877 HYPRE_ParVectorGetValues((HYPRE_ParVector) residual, num_values, indices_d, values_d);
5878
5879 hypre_TMemcpy(values_h, values_d, HYPRE_Complex, num_values, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
5880
5881 for (i = 0; i < num_values; i++)
5882 {
5883 if (myid == 0)
5884 {
5885 hypre_printf("index %d value %e\n", i, values_h[i]);
5886 }
5887 }
5888 hypre_TFree(indices_h, HYPRE_MEMORY_HOST);
5889 hypre_TFree(values_h, HYPRE_MEMORY_HOST);
5890 hypre_TFree(indices_d, HYPRE_MEMORY_DEVICE);
5891 hypre_TFree(values_d, HYPRE_MEMORY_DEVICE);
5892 }
5893
5894 #if SECOND_TIME
5895 /* run a second time to check for memory leaks */
5896 HYPRE_ParVectorSetRandomValues(x, 775);
5897 HYPRE_GMRESSetup(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
5898 (HYPRE_Vector)x);
5899 HYPRE_GMRESSolve(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
5900 (HYPRE_Vector)x);
5901 #endif
5902
5903 HYPRE_ParCSRGMRESDestroy(pcg_solver);
5904
5905 if (solver_id == 3)
5906 {
5907 HYPRE_BoomerAMGDestroy(amg_precond);
5908 }
5909 else if (solver_id == 15)
5910 {
5911 HYPRE_BoomerAMGDestroy(pcg_precond);
5912 }
5913 else if (solver_id == 7)
5914 {
5915 HYPRE_ParCSRPilutDestroy(pcg_precond);
5916 }
5917 else if (solver_id == 18)
5918 {
5919 HYPRE_ParaSailsDestroy(pcg_precond);
5920 }
5921 else if (solver_id == 44)
5922 {
5923 HYPRE_EuclidDestroy(pcg_precond);
5924 }
5925 else if (solver_id == 81)
5926 {
5927 HYPRE_ILUDestroy(pcg_precond);
5928 }
5929 else if (solver_id == 91)
5930 {
5931 HYPRE_BoomerAMGDDDestroy(pcg_precond);
5932 }
5933
5934 if (myid == 0)
5935 {
5936 hypre_printf("\n");
5937 hypre_printf("GMRES Iterations = %d\n", num_iterations);
5938 hypre_printf("Final GMRES Relative Residual Norm = %e\n", final_res_norm);
5939 hypre_printf("\n");
5940 }
5941 }
5942 /*-----------------------------------------------------------
5943 * Solve the system using LGMRES
5944 *-----------------------------------------------------------*/
5945
5946 if (solver_id == 50 || solver_id == 51 )
5947 {
5948 time_index = hypre_InitializeTiming("LGMRES Setup");
5949 hypre_BeginTiming(time_index);
5950
5951 HYPRE_ParCSRLGMRESCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
5952 HYPRE_LGMRESSetKDim(pcg_solver, k_dim);
5953 HYPRE_LGMRESSetAugDim(pcg_solver, aug_dim);
5954 HYPRE_LGMRESSetMaxIter(pcg_solver, max_iter);
5955 HYPRE_LGMRESSetTol(pcg_solver, tol);
5956 HYPRE_LGMRESSetAbsoluteTol(pcg_solver, atol);
5957 HYPRE_LGMRESSetLogging(pcg_solver, 1);
5958 HYPRE_LGMRESSetPrintLevel(pcg_solver, ioutdat);
5959
5960 if (solver_id == 51)
5961 {
5962 /* use BoomerAMG as preconditioner */
5963 if (myid == 0) hypre_printf("Solver: AMG-LGMRES\n");
5964
5965 HYPRE_BoomerAMGCreate(&pcg_precond);
5966 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
5967 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
5968 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
5969 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
5970 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
5971 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
5972 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
5973 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
5974 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
5975 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
5976 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
5977 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
5978 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
5979 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
5980 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
5981 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
5982 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
5983 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
5984 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
5985 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
5986 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
5987 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
5988 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
5989 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
5990 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
5991 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
5992 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
5993 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
5994 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
5995 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
5996 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
5997 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
5998 if (relax_down > -1)
5999 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
6000 if (relax_up > -1)
6001 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
6002 if (relax_coarse > -1)
6003 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
6004 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
6005 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
6006 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
6007 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
6008 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
6009 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
6010 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
6011 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
6012 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
6013 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
6014 if (level_w > -1)
6015 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
6016 if (level_ow > -1)
6017 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
6018 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
6019 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
6020 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
6021 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
6022 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
6023 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
6024 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
6025 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
6026 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
6027 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
6028 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
6029 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
6030 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
6031 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
6032 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
6033 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
6034 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
6035 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
6036 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
6037 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
6038 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
6039 if (eu_level < 0) eu_level = 0;
6040 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
6041 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
6042 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
6043 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
6044 if (num_functions > 1)
6045 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
6046 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
6047 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
6048 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
6049 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
6050 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
6051 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
6052 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
6053 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
6054 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
6055 #ifdef HYPRE_USING_DSUPERLU
6056 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
6057 #endif
6058 if (nongalerk_tol)
6059 {
6060 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
6061 for (i=0; i < nongalerk_num_tol-1; i++)
6062 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
6063 }
6064 HYPRE_LGMRESSetMaxIter(pcg_solver, mg_max_iter);
6065 HYPRE_LGMRESSetPrecond(pcg_solver,
6066 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
6067 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
6068 pcg_precond);
6069 }
6070 else if (solver_id == 50)
6071 {
6072 /* use diagonal scaling as preconditioner */
6073 if (myid == 0) hypre_printf("Solver: DS-LGMRES\n");
6074 pcg_precond = NULL;
6075
6076 HYPRE_LGMRESSetPrecond(pcg_solver,
6077 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
6078 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
6079 pcg_precond);
6080 }
6081
6082 HYPRE_LGMRESGetPrecond(pcg_solver, &pcg_precond_gotten);
6083 if (pcg_precond_gotten != pcg_precond)
6084 {
6085 hypre_printf("HYPRE_LGMRESGetPrecond got bad precond\n");
6086 return(-1);
6087 }
6088 else
6089 if (myid == 0)
6090 hypre_printf("HYPRE_LGMRESGetPrecond got good precond\n");
6091 HYPRE_LGMRESSetup
6092 (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
6093
6094 hypre_EndTiming(time_index);
6095 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
6096 hypre_FinalizeTiming(time_index);
6097 hypre_ClearTiming();
6098
6099 time_index = hypre_InitializeTiming("LGMRES Solve");
6100 hypre_BeginTiming(time_index);
6101
6102 HYPRE_LGMRESSolve
6103 (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
6104
6105 hypre_EndTiming(time_index);
6106 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
6107 hypre_FinalizeTiming(time_index);
6108 hypre_ClearTiming();
6109
6110 HYPRE_LGMRESGetNumIterations(pcg_solver, &num_iterations);
6111 HYPRE_LGMRESGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
6112
6113 HYPRE_ParCSRLGMRESDestroy(pcg_solver);
6114
6115 if (solver_id == 51)
6116 {
6117 HYPRE_BoomerAMGDestroy(pcg_precond);
6118 }
6119
6120 if (myid == 0)
6121 {
6122 hypre_printf("\n");
6123 hypre_printf("LGMRES Iterations = %d\n", num_iterations);
6124 hypre_printf("Final LGMRES Relative Residual Norm = %e\n", final_res_norm);
6125 hypre_printf("\n");
6126 }
6127 }
6128
6129 /*-----------------------------------------------------------
6130 * Solve the system using FlexGMRES
6131 *-----------------------------------------------------------*/
6132
6133 if (solver_id == 60 || solver_id == 61 || solver_id == 72 || solver_id == 82 || solver_id == 47)
6134 {
6135 time_index = hypre_InitializeTiming("FlexGMRES Setup");
6136 hypre_BeginTiming(time_index);
6137
6138 HYPRE_ParCSRFlexGMRESCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
6139 HYPRE_FlexGMRESSetKDim(pcg_solver, k_dim);
6140 HYPRE_FlexGMRESSetMaxIter(pcg_solver, max_iter);
6141 HYPRE_FlexGMRESSetTol(pcg_solver, tol);
6142 HYPRE_FlexGMRESSetAbsoluteTol(pcg_solver, atol);
6143 HYPRE_FlexGMRESSetLogging(pcg_solver, 1);
6144 HYPRE_FlexGMRESSetPrintLevel(pcg_solver, ioutdat);
6145
6146 if (solver_id == 61)
6147 {
6148 /* use BoomerAMG as preconditioner */
6149 if (myid == 0) hypre_printf("Solver: AMG-FlexGMRES\n");
6150
6151 HYPRE_BoomerAMGCreate(&pcg_precond);
6152 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
6153 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
6154 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
6155 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
6156 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
6157 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
6158 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
6159 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
6160 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
6161 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
6162 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
6163 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
6164 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
6165 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
6166 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
6167 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
6168 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
6169 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
6170 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
6171 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
6172 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
6173 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
6174 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
6175 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
6176 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
6177 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
6178 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
6179 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
6180 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
6181 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
6182 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
6183 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
6184 if (relax_down > -1)
6185 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
6186 if (relax_up > -1)
6187 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
6188 if (relax_coarse > -1)
6189 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
6190 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
6191 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
6192 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
6193 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
6194 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
6195 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
6196 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
6197 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
6198 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
6199 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
6200 if (level_w > -1)
6201 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
6202 if (level_ow > -1)
6203 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
6204 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
6205 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
6206 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
6207 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
6208 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
6209 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
6210 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
6211 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
6212 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
6213 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
6214 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
6215 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
6216 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
6217 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
6218 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
6219 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
6220 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
6221 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
6222 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
6223 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
6224 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
6225 if (eu_level < 0) eu_level = 0;
6226 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
6227 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
6228 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
6229 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
6230 if (num_functions > 1)
6231 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
6232 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
6233 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
6234 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
6235 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
6236 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
6237 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
6238 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
6239 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
6240 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
6241 #ifdef HYPRE_USING_DSUPERLU
6242 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
6243 #endif
6244 if (nongalerk_tol)
6245 {
6246 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
6247 for (i=0; i < nongalerk_num_tol-1; i++)
6248 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
6249 }
6250 HYPRE_FlexGMRESSetMaxIter(pcg_solver, mg_max_iter);
6251 HYPRE_FlexGMRESSetPrecond(pcg_solver,
6252 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
6253 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
6254 pcg_precond);
6255 }
6256 else if( solver_id == 72 )
6257 {
6258 /* use MGR preconditioning */
6259 if (myid == 0) hypre_printf("Solver: MGR-FlexGMRES\n");
6260
6261 HYPRE_MGRCreate(&pcg_precond);
6262
6263 mgr_num_cindexes = hypre_CTAlloc(HYPRE_Int, mgr_nlevels, HYPRE_MEMORY_HOST);
6264 for(i=0; i<mgr_nlevels; i++)
6265 { /* assume 1 coarse index per level */
6266 mgr_num_cindexes[i] = 1;
6267 }
6268 mgr_cindexes = hypre_CTAlloc(HYPRE_Int*, mgr_nlevels, HYPRE_MEMORY_HOST);
6269 for(i=0; i<mgr_nlevels; i++)
6270 {
6271 mgr_cindexes[i] = hypre_CTAlloc(HYPRE_Int, mgr_num_cindexes[i], HYPRE_MEMORY_HOST);
6272 }
6273 for(i=0; i<mgr_nlevels; i++)
6274 { /* assume coarse point is at index 0 */
6275 mgr_cindexes[i][0] = 0;
6276 }
6277 mgr_reserved_coarse_indexes = hypre_CTAlloc(HYPRE_BigInt, mgr_num_reserved_nodes, HYPRE_MEMORY_HOST);
6278 for(i=0; i<mgr_num_reserved_nodes; i++)
6279 { /* generate artificial reserved nodes */
6280 mgr_reserved_coarse_indexes[i] = last_local_row - (HYPRE_BigInt) i; //2*i+1;
6281 }
6282
6283 /* set MGR data by block */
6284 HYPRE_MGRSetCpointsByBlock( pcg_precond, mgr_bsize, mgr_nlevels, mgr_num_cindexes,mgr_cindexes);
6285 /* set reserved coarse nodes */
6286 if(mgr_num_reserved_nodes)HYPRE_MGRSetReservedCoarseNodes(pcg_precond, mgr_num_reserved_nodes, mgr_reserved_coarse_indexes);
6287
6288 /* set intermediate coarse grid strategy */
6289 HYPRE_MGRSetNonCpointsToFpoints(pcg_precond, mgr_non_c_to_f);
6290 /* set F relaxation strategy */
6291 HYPRE_MGRSetFRelaxMethod(pcg_precond, mgr_frelax_method);
6292 /* set relax type for single level F-relaxation and post-relaxation */
6293 HYPRE_MGRSetRelaxType(pcg_precond, mgr_relax_type);
6294 HYPRE_MGRSetNumRelaxSweeps(pcg_precond, mgr_num_relax_sweeps);
6295 /* set interpolation type */
6296 HYPRE_MGRSetInterpType(pcg_precond, mgr_interp_type);
6297 HYPRE_MGRSetNumInterpSweeps(pcg_precond, mgr_num_interp_sweeps);
6298 /* set print level */
6299 HYPRE_MGRSetPrintLevel(pcg_precond, 1);
6300 /* set max iterations */
6301 HYPRE_MGRSetMaxIter(pcg_precond, 1);
6302 HYPRE_MGRSetTol(pcg_precond, pc_tol);
6303
6304 HYPRE_MGRSetGlobalsmoothType(pcg_precond, mgr_gsmooth_type);
6305 HYPRE_MGRSetMaxGlobalsmoothIters( pcg_precond, mgr_num_gsmooth_sweeps );
6306
6307 /* create AMG coarse grid solver */
6308
6309 HYPRE_BoomerAMGCreate(&amg_solver);
6310 /* BM Aug 25, 2006 */
6311 HYPRE_BoomerAMGSetCGCIts(amg_solver, cgcits);
6312 HYPRE_BoomerAMGSetInterpType(amg_solver, 0);
6313 HYPRE_BoomerAMGSetPostInterpType(amg_solver, post_interp_type);
6314 HYPRE_BoomerAMGSetCoarsenType(amg_solver, 6);
6315 HYPRE_BoomerAMGSetPMaxElmts(amg_solver, 0);
6316 /* note: log is written to standard output, not to file */
6317 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 1);
6318 HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
6319 HYPRE_BoomerAMGSetFCycle(amg_solver, fcycle);
6320 HYPRE_BoomerAMGSetNumSweeps(amg_solver, num_sweeps);
6321 HYPRE_BoomerAMGSetRelaxType(amg_solver, 3);
6322 if (relax_down > -1)
6323 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_down, 1);
6324 if (relax_up > -1)
6325 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_up, 2);
6326 if (relax_coarse > -1)
6327 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_coarse, 3);
6328 HYPRE_BoomerAMGSetRelaxOrder(amg_solver, 1);
6329 HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
6330 HYPRE_BoomerAMGSetSmoothType(amg_solver, smooth_type);
6331 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_solver, smooth_num_sweeps);
6332 HYPRE_BoomerAMGSetMaxIter(amg_solver, 1);
6333 HYPRE_BoomerAMGSetTol(amg_solver, 0.0);
6334
6335 /* set the MGR coarse solver. Comment out to use default CG solver in MGR */
6336 HYPRE_MGRSetCoarseSolver( pcg_precond, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, amg_solver);
6337
6338 /* setup MGR-PCG solver */
6339 HYPRE_FlexGMRESSetMaxIter(pcg_solver, mg_max_iter);
6340 HYPRE_FlexGMRESSetPrecond(pcg_solver,
6341 (HYPRE_PtrToSolverFcn) HYPRE_MGRSolve,
6342 (HYPRE_PtrToSolverFcn) HYPRE_MGRSetup,
6343 pcg_precond);
6344 }
6345 else if (solver_id == 47)
6346 {
6347 /* use Euclid preconditioning */
6348 if (myid == 0) hypre_printf("Solver: Euclid-FlexGMRES\n");
6349
6350 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
6351
6352 if (eu_level > -1) HYPRE_EuclidSetLevel(pcg_precond, eu_level);
6353 if (eu_ilut) HYPRE_EuclidSetILUT(pcg_precond, eu_ilut);
6354 if (eu_sparse_A) HYPRE_EuclidSetSparseA(pcg_precond, eu_sparse_A);
6355 if (eu_row_scale) HYPRE_EuclidSetRowScale(pcg_precond, eu_row_scale);
6356 if (eu_bj) HYPRE_EuclidSetBJ(pcg_precond, eu_bj);
6357 HYPRE_EuclidSetStats(pcg_precond, eu_stats);
6358 HYPRE_EuclidSetMem(pcg_precond, eu_mem);
6359 /*HYPRE_EuclidSetParams(pcg_precond, argc, argv);*/
6360
6361 /* setup MGR-PCG solver */
6362 HYPRE_FlexGMRESSetMaxIter(pcg_solver, mg_max_iter);
6363 HYPRE_FlexGMRESSetPrecond(pcg_solver,
6364 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
6365 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
6366 pcg_precond);
6367 }
6368 else if (solver_id == 82)
6369 {
6370 /* use hypre_ILU preconditioning */
6371 if (myid == 0) hypre_printf("Solver: ILU-FlexGMRES\n");
6372
6373 /* create precon */
6374 HYPRE_ILUCreate(&pcg_precond);
6375 HYPRE_ILUSetType(pcg_precond, ilu_type);
6376 HYPRE_ILUSetLevelOfFill(pcg_precond, ilu_lfil);
6377 /* set print level */
6378 HYPRE_ILUSetPrintLevel(pcg_precond, 1);
6379 /* set max iterations */
6380 HYPRE_ILUSetMaxIter(pcg_precond, 1);
6381 HYPRE_ILUSetTol(pcg_precond, pc_tol);
6382 /* set max number of nonzeros per row */
6383 HYPRE_ILUSetMaxNnzPerRow(pcg_precond,ilu_max_row_nnz);
6384 /* set the droptol */
6385 HYPRE_ILUSetDropThreshold(pcg_precond,ilu_droptol);
6386 /* set max iterations for Schur system solve */
6387 HYPRE_ILUSetSchurMaxIter( pcg_precond, ilu_schur_max_iter );
6388 HYPRE_ILUSetNSHDropThreshold( pcg_precond, ilu_nsh_droptol);
6389
6390 /* setup MGR-PCG solver */
6391 HYPRE_FlexGMRESSetMaxIter(pcg_solver, mg_max_iter);
6392 HYPRE_FlexGMRESSetPrecond(pcg_solver,
6393 (HYPRE_PtrToSolverFcn) HYPRE_ILUSolve,
6394 (HYPRE_PtrToSolverFcn) HYPRE_ILUSetup,
6395 pcg_precond);
6396 }
6397 else if (solver_id == 60)
6398 {
6399 /* use diagonal scaling as preconditioner */
6400 if (myid == 0) hypre_printf("Solver: DS-FlexGMRES\n");
6401 pcg_precond = NULL;
6402
6403 HYPRE_FlexGMRESSetPrecond(pcg_solver,
6404 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
6405 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
6406 pcg_precond);
6407 }
6408
6409 HYPRE_FlexGMRESGetPrecond(pcg_solver, &pcg_precond_gotten);
6410 if (pcg_precond_gotten != pcg_precond)
6411 {
6412 hypre_printf("HYPRE_FlexGMRESGetPrecond got bad precond\n");
6413 return(-1);
6414 }
6415 else
6416 if (myid == 0)
6417 hypre_printf("HYPRE_FlexGMRESGetPrecond got good precond\n");
6418
6419
6420 /* this is optional - could be a user defined one instead (see ex5.c)*/
6421 HYPRE_FlexGMRESSetModifyPC( pcg_solver,
6422 (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCDefault);
6423
6424
6425 HYPRE_FlexGMRESSetup
6426 (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
6427
6428 hypre_EndTiming(time_index);
6429 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
6430 hypre_FinalizeTiming(time_index);
6431 hypre_ClearTiming();
6432
6433 time_index = hypre_InitializeTiming("FlexGMRES Solve");
6434 hypre_BeginTiming(time_index);
6435
6436 HYPRE_FlexGMRESSolve
6437 (pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b, (HYPRE_Vector)x);
6438
6439 hypre_EndTiming(time_index);
6440 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
6441 hypre_FinalizeTiming(time_index);
6442 hypre_ClearTiming();
6443
6444 HYPRE_FlexGMRESGetNumIterations(pcg_solver, &num_iterations);
6445 HYPRE_FlexGMRESGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
6446
6447 HYPRE_ParCSRFlexGMRESDestroy(pcg_solver);
6448
6449 if (solver_id == 61)
6450 {
6451 HYPRE_BoomerAMGDestroy(pcg_precond);
6452 }
6453 else if(solver_id == 72)
6454 {
6455 /* free memory */
6456 if(mgr_num_cindexes)
6457 hypre_TFree(mgr_num_cindexes, HYPRE_MEMORY_HOST);
6458 mgr_num_cindexes = NULL;
6459
6460 if(mgr_reserved_coarse_indexes)
6461 hypre_TFree(mgr_reserved_coarse_indexes, HYPRE_MEMORY_HOST);
6462 mgr_reserved_coarse_indexes = NULL;
6463
6464 if(mgr_cindexes)
6465 {
6466 for( i=0; i<mgr_nlevels; i++)
6467 {
6468 if(mgr_cindexes[i])
6469 hypre_TFree(mgr_cindexes[i], HYPRE_MEMORY_HOST);
6470 }
6471 hypre_TFree(mgr_cindexes, HYPRE_MEMORY_HOST);
6472 mgr_cindexes = NULL;
6473 }
6474
6475 HYPRE_BoomerAMGDestroy(amg_solver);
6476 HYPRE_MGRDestroy(pcg_precond);
6477 }
6478 else if (solver_id == 47)
6479 {
6480 HYPRE_EuclidDestroy(pcg_precond);
6481 }
6482 else if(solver_id == 82)
6483 {
6484 HYPRE_ILUDestroy(pcg_precond);
6485 }
6486 if (myid == 0)
6487 {
6488 hypre_printf("\n");
6489 hypre_printf("FlexGMRES Iterations = %d\n", num_iterations);
6490 hypre_printf("Final FlexGMRES Relative Residual Norm = %e\n", final_res_norm);
6491 hypre_printf("\n");
6492 }
6493 }
6494
6495 /*-----------------------------------------------------------
6496 * Solve the system using BiCGSTAB
6497 *-----------------------------------------------------------*/
6498
6499 if (solver_id == 9 || solver_id == 10 || solver_id == 11 || solver_id == 45 || solver_id == 73)
6500 {
6501 time_index = hypre_InitializeTiming("BiCGSTAB Setup");
6502 hypre_BeginTiming(time_index);
6503
6504 HYPRE_ParCSRBiCGSTABCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
6505 HYPRE_BiCGSTABSetMaxIter(pcg_solver, max_iter);
6506 HYPRE_BiCGSTABSetTol(pcg_solver, tol);
6507 HYPRE_BiCGSTABSetAbsoluteTol(pcg_solver, atol);
6508 HYPRE_BiCGSTABSetLogging(pcg_solver, ioutdat);
6509 HYPRE_BiCGSTABSetPrintLevel(pcg_solver, ioutdat);
6510
6511 if (solver_id == 9)
6512 {
6513 /* use BoomerAMG as preconditioner */
6514 if (myid == 0) hypre_printf("Solver: AMG-BiCGSTAB\n");
6515 HYPRE_BoomerAMGCreate(&pcg_precond);
6516 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
6517 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
6518 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
6519 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
6520 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
6521 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
6522 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
6523 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
6524 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
6525 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
6526 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
6527 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
6528 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
6529 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
6530 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
6531 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
6532 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
6533 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
6534 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
6535 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
6536 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
6537 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
6538 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
6539 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
6540 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
6541 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
6542 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
6543 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
6544 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
6545 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
6546 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
6547 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
6548 if (relax_down > -1)
6549 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
6550 if (relax_up > -1)
6551 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
6552 if (relax_coarse > -1)
6553 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
6554 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
6555 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
6556 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
6557 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
6558 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
6559 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
6560 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
6561 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
6562 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
6563 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
6564 if (level_w > -1)
6565 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
6566 if (level_ow > -1)
6567 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
6568 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
6569 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
6570 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
6571 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
6572 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
6573 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
6574 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
6575 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
6576 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
6577 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
6578 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
6579 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
6580 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
6581 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
6582 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
6583 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
6584 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
6585 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
6586 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
6587 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
6588
6589 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
6590 if (eu_level < 0) eu_level = 0;
6591 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
6592 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
6593 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
6594 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
6595 if (num_functions > 1)
6596 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
6597 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
6598 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
6599 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
6600 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
6601 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
6602 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
6603 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
6604 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
6605 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
6606 #ifdef HYPRE_USING_DSUPERLU
6607 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
6608 #endif
6609 if (nongalerk_tol)
6610 {
6611 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
6612 for (i=0; i < nongalerk_num_tol-1; i++)
6613 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
6614 }
6615 HYPRE_BiCGSTABSetMaxIter(pcg_solver, mg_max_iter);
6616 HYPRE_BiCGSTABSetPrecond(pcg_solver,
6617 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
6618 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
6619 pcg_precond);
6620 }
6621 else if (solver_id == 10)
6622 {
6623 /* use diagonal scaling as preconditioner */
6624 if (myid == 0) hypre_printf("Solver: DS-BiCGSTAB\n");
6625 pcg_precond = NULL;
6626
6627 HYPRE_BiCGSTABSetPrecond(pcg_solver,
6628 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
6629 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
6630 pcg_precond);
6631 }
6632 else if (solver_id == 11)
6633 {
6634 /* use PILUT as preconditioner */
6635 if (myid == 0) hypre_printf("Solver: PILUT-BiCGSTAB\n");
6636
6637 ierr = HYPRE_ParCSRPilutCreate( hypre_MPI_COMM_WORLD, &pcg_precond );
6638 if (ierr) {
6639 hypre_printf("Error in ParPilutCreate\n");
6640 }
6641
6642 HYPRE_BiCGSTABSetPrecond(pcg_solver,
6643 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRPilutSolve,
6644 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRPilutSetup,
6645 pcg_precond);
6646
6647 HYPRE_ParCSRPilutSetLogging(pcg_precond, 0);
6648
6649 if (drop_tol >= 0 )
6650 HYPRE_ParCSRPilutSetDropTolerance( pcg_precond,
6651 drop_tol );
6652
6653 if (nonzeros_to_keep >= 0 )
6654 HYPRE_ParCSRPilutSetFactorRowSize( pcg_precond,
6655 nonzeros_to_keep );
6656 }
6657 else if (solver_id == 45)
6658 {
6659 /* use Euclid preconditioning */
6660 if (myid == 0) hypre_printf("Solver: Euclid-BICGSTAB\n");
6661
6662 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
6663
6664 /* note: There are three three methods of setting run-time
6665 parameters for Euclid: (see HYPRE_parcsr_ls.h); here
6666 we'll use what I think is simplest: let Euclid internally
6667 parse the command line.
6668 */
6669 if (eu_level > -1) HYPRE_EuclidSetLevel(pcg_precond, eu_level);
6670 if (eu_ilut) HYPRE_EuclidSetILUT(pcg_precond, eu_ilut);
6671 if (eu_sparse_A) HYPRE_EuclidSetSparseA(pcg_precond, eu_sparse_A);
6672 if (eu_row_scale) HYPRE_EuclidSetRowScale(pcg_precond, eu_row_scale);
6673 if (eu_bj) HYPRE_EuclidSetBJ(pcg_precond, eu_bj);
6674 HYPRE_EuclidSetStats(pcg_precond, eu_stats);
6675 HYPRE_EuclidSetMem(pcg_precond, eu_mem);
6676
6677 /*HYPRE_EuclidSetParams(pcg_precond, argc, argv);*/
6678
6679 HYPRE_BiCGSTABSetPrecond(pcg_solver,
6680 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
6681 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
6682 pcg_precond);
6683 }
6684 else if (solver_id == 73)
6685 {
6686 /* use MGR preconditioning */
6687 if (myid == 0) hypre_printf("Solver: MGR-BICGSTAB\n");
6688
6689 HYPRE_MGRCreate(&pcg_precond);
6690
6691 mgr_num_cindexes = hypre_CTAlloc(HYPRE_Int, mgr_nlevels, HYPRE_MEMORY_HOST);
6692 for(i=0; i<mgr_nlevels; i++)
6693 { /* assume 1 coarse index per level */
6694 mgr_num_cindexes[i] = 1;
6695 }
6696 mgr_cindexes = hypre_CTAlloc(HYPRE_Int*, mgr_nlevels, HYPRE_MEMORY_HOST);
6697 for(i=0; i<mgr_nlevels; i++)
6698 {
6699 mgr_cindexes[i] = hypre_CTAlloc(HYPRE_Int, mgr_num_cindexes[i], HYPRE_MEMORY_HOST);
6700 }
6701 for(i=0; i<mgr_nlevels; i++)
6702 { /* assume coarse point is at index 0 */
6703 mgr_cindexes[i][0] = 2;
6704 }
6705
6706 mgr_reserved_coarse_indexes = hypre_CTAlloc(HYPRE_BigInt, mgr_num_reserved_nodes, HYPRE_MEMORY_HOST);
6707 for(i=0; i<mgr_num_reserved_nodes; i++)
6708 {
6709 /* Generate 'artificial' reserved nodes. Assumes these are ordered last in the system */
6710 mgr_reserved_coarse_indexes[i] = last_local_row - (HYPRE_BigInt) i; //2*i+1;
6711 // hypre_printf("mgr_reserved_coarse_indexes[i] = %b \n", mgr_reserved_coarse_indexes[i]);
6712 }
6713
6714 /* set MGR data by block */
6715 HYPRE_MGRSetCpointsByBlock( pcg_precond, mgr_bsize, mgr_nlevels, mgr_num_cindexes,mgr_cindexes);
6716 /* set reserved coarse nodes */
6717 if(mgr_num_reserved_nodes)HYPRE_MGRSetReservedCoarseNodes(pcg_precond, mgr_num_reserved_nodes, mgr_reserved_coarse_indexes);
6718
6719 /* set intermediate coarse grid strategy */
6720 HYPRE_MGRSetNonCpointsToFpoints(pcg_precond, mgr_non_c_to_f);
6721 /* set F relaxation strategy */
6722 HYPRE_MGRSetFRelaxMethod(pcg_precond, mgr_frelax_method);
6723 /* set relax type for single level F-relaxation and post-relaxation */
6724 HYPRE_MGRSetRelaxType(pcg_precond, mgr_relax_type);
6725 HYPRE_MGRSetNumRelaxSweeps(pcg_precond, mgr_num_relax_sweeps);
6726 /* set interpolation type */
6727 HYPRE_MGRSetRestrictType(pcg_precond, mgr_restrict_type);
6728 HYPRE_MGRSetNumRestrictSweeps(pcg_precond, mgr_num_restrict_sweeps);
6729 HYPRE_MGRSetInterpType(pcg_precond, mgr_interp_type);
6730 HYPRE_MGRSetNumInterpSweeps(pcg_precond, mgr_num_interp_sweeps);
6731 /* set print level */
6732 HYPRE_MGRSetPrintLevel(pcg_precond, 1);
6733 /* set max iterations */
6734 HYPRE_MGRSetMaxIter(pcg_precond, 1);
6735 HYPRE_MGRSetTol(pcg_precond, pc_tol);
6736
6737 HYPRE_MGRSetGlobalsmoothType(pcg_precond, mgr_gsmooth_type);
6738 HYPRE_MGRSetMaxGlobalsmoothIters( pcg_precond, mgr_num_gsmooth_sweeps );
6739
6740 /* create AMG coarse grid solver */
6741
6742 HYPRE_BoomerAMGCreate(&amg_solver);
6743 HYPRE_BoomerAMGSetTol(amg_solver, pc_tol);
6744 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 1);
6745
6746 HYPRE_BoomerAMGSetMaxIter(amg_solver, 1);
6747
6748 HYPRE_BoomerAMGSetCycleType(amg_solver, 1);
6749 HYPRE_BoomerAMGSetNumSweeps(amg_solver, 1);
6750 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 14, 1);
6751 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 14, 2);
6752 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 9, 3);
6753 /* set the MGR coarse solver. Comment out to use default CG solver in MGR */
6754 HYPRE_MGRSetCoarseSolver( pcg_precond, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, amg_solver);
6755
6756
6757 /* setup MGR-BiCGSTAB solver */
6758 HYPRE_BiCGSTABSetMaxIter(pcg_solver, mg_max_iter);
6759 HYPRE_BiCGSTABSetPrecond(pcg_solver,
6760 (HYPRE_PtrToSolverFcn) HYPRE_MGRSolve,
6761 (HYPRE_PtrToSolverFcn) HYPRE_MGRSetup,
6762 pcg_precond);
6763 }
6764 HYPRE_BiCGSTABSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
6765 (HYPRE_Vector)b, (HYPRE_Vector)x);
6766
6767 hypre_EndTiming(time_index);
6768 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
6769 hypre_FinalizeTiming(time_index);
6770 hypre_ClearTiming();
6771
6772 time_index = hypre_InitializeTiming("BiCGSTAB Solve");
6773 hypre_BeginTiming(time_index);
6774
6775 HYPRE_BiCGSTABSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
6776 (HYPRE_Vector)b, (HYPRE_Vector)x);
6777
6778 hypre_EndTiming(time_index);
6779 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
6780 hypre_FinalizeTiming(time_index);
6781 hypre_ClearTiming();
6782
6783 HYPRE_BiCGSTABGetNumIterations(pcg_solver, &num_iterations);
6784 HYPRE_BiCGSTABGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
6785 #if SECOND_TIME
6786 /* run a second time to check for memory leaks */
6787 HYPRE_ParVectorSetRandomValues(x, 775);
6788 HYPRE_BiCGSTABSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
6789 (HYPRE_Vector)b, (HYPRE_Vector)x);
6790 HYPRE_BiCGSTABSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
6791 (HYPRE_Vector)b, (HYPRE_Vector)x);
6792 #endif
6793
6794 HYPRE_ParCSRBiCGSTABDestroy(pcg_solver);
6795
6796 if (solver_id == 9)
6797 {
6798 HYPRE_BoomerAMGDestroy(pcg_precond);
6799 }
6800
6801 if (solver_id == 11)
6802 {
6803 HYPRE_ParCSRPilutDestroy(pcg_precond);
6804 }
6805 else if (solver_id == 45)
6806 {
6807 HYPRE_EuclidDestroy(pcg_precond);
6808 }
6809 else if(solver_id == 73)
6810 {
6811 /* free memory */
6812 if(mgr_num_cindexes)
6813 hypre_TFree(mgr_num_cindexes, HYPRE_MEMORY_HOST);
6814 mgr_num_cindexes = NULL;
6815
6816 if(mgr_reserved_coarse_indexes)
6817 hypre_TFree(mgr_reserved_coarse_indexes, HYPRE_MEMORY_HOST);
6818 mgr_reserved_coarse_indexes = NULL;
6819
6820 if(mgr_cindexes)
6821 {
6822 for( i=0; i<mgr_nlevels; i++)
6823 {
6824 if(mgr_cindexes[i])
6825 hypre_TFree(mgr_cindexes[i], HYPRE_MEMORY_HOST);
6826 }
6827 hypre_TFree(mgr_cindexes, HYPRE_MEMORY_HOST);
6828 mgr_cindexes = NULL;
6829 }
6830
6831 HYPRE_BoomerAMGDestroy(amg_solver);
6832 HYPRE_MGRDestroy(pcg_precond);
6833 }
6834 if (myid == 0)
6835 {
6836 hypre_printf("\n");
6837 hypre_printf("BiCGSTAB Iterations = %d\n", num_iterations);
6838 hypre_printf("Final BiCGSTAB Relative Residual Norm = %e\n", final_res_norm);
6839 hypre_printf("\n");
6840 }
6841 }
6842
6843 /*-----------------------------------------------------------
6844 * Solve the system using COGMRES
6845 *-----------------------------------------------------------*/
6846
6847 if (solver_id == 16 || solver_id == 17 || solver_id == 46 || solver_id == 74)
6848 {
6849 time_index = hypre_InitializeTiming("COGMRES Setup");
6850 hypre_BeginTiming(time_index);
6851
6852 HYPRE_ParCSRCOGMRESCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
6853 HYPRE_COGMRESSetKDim(pcg_solver, k_dim);
6854 HYPRE_COGMRESSetUnroll(pcg_solver, unroll);
6855 HYPRE_COGMRESSetCGS(pcg_solver, cgs);
6856 HYPRE_COGMRESSetMaxIter(pcg_solver, max_iter);
6857 HYPRE_COGMRESSetTol(pcg_solver, tol);
6858 HYPRE_COGMRESSetAbsoluteTol(pcg_solver, atol);
6859 HYPRE_COGMRESSetLogging(pcg_solver, ioutdat);
6860 HYPRE_COGMRESSetPrintLevel(pcg_solver, ioutdat);
6861
6862 if (solver_id == 16)
6863 {
6864 /* use BoomerAMG as preconditioner */
6865 if (myid == 0) hypre_printf("Solver: AMG-COGMRES\n");
6866 HYPRE_BoomerAMGCreate(&pcg_precond);
6867 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
6868 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
6869 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
6870 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
6871 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
6872 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
6873 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
6874 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
6875 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
6876 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
6877 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
6878 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
6879 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
6880 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
6881 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
6882 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
6883 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
6884 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
6885 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
6886 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
6887 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
6888 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
6889 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
6890 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
6891 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
6892 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
6893 HYPRE_BoomerAMGSetISType(pcg_precond, IS_type);
6894 HYPRE_BoomerAMGSetNumCRRelaxSteps(pcg_precond, num_CR_relax_steps);
6895 HYPRE_BoomerAMGSetCRRate(pcg_precond, CR_rate);
6896 HYPRE_BoomerAMGSetCRStrongTh(pcg_precond, CR_strong_th);
6897 HYPRE_BoomerAMGSetCRUseCG(pcg_precond, CR_use_CG);
6898 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
6899 if (relax_down > -1)
6900 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
6901 if (relax_up > -1)
6902 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
6903 if (relax_coarse > -1)
6904 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
6905 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
6906 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
6907 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
6908 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
6909 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
6910 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
6911 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
6912 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
6913 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
6914 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
6915 if (level_w > -1)
6916 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
6917 if (level_ow > -1)
6918 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
6919 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
6920 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
6921 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
6922 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
6923 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
6924 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
6925 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
6926 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
6927 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
6928 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
6929 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
6930 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
6931 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
6932 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
6933 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
6934 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
6935 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
6936 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
6937 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
6938 HYPRE_BoomerAMGSetSchwarzUseNonSymm(pcg_precond, use_nonsymm_schwarz);
6939
6940 HYPRE_BoomerAMGSetSchwarzRlxWeight(pcg_precond, schwarz_rlx_weight);
6941 if (eu_level < 0) eu_level = 0;
6942 HYPRE_BoomerAMGSetEuLevel(pcg_precond, eu_level);
6943 HYPRE_BoomerAMGSetEuBJ(pcg_precond, eu_bj);
6944 HYPRE_BoomerAMGSetEuSparseA(pcg_precond, eu_sparse_A);
6945 HYPRE_BoomerAMGSetCycleNumSweeps(pcg_precond, ns_coarse, 3);
6946 if (num_functions > 1)
6947 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
6948 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
6949 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
6950 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
6951 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
6952 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
6953 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
6954 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
6955 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
6956 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
6957 #ifdef HYPRE_USING_DSUPERLU
6958 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
6959 #endif
6960 if (nongalerk_tol)
6961 {
6962 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
6963 for (i=0; i < nongalerk_num_tol-1; i++)
6964 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
6965 }
6966 HYPRE_COGMRESSetMaxIter(pcg_solver, mg_max_iter);
6967 HYPRE_COGMRESSetPrecond(pcg_solver,
6968 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
6969 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
6970 pcg_precond);
6971 }
6972 else if (solver_id == 17)
6973 {
6974 /* use diagonal scaling as preconditioner */
6975 if (myid == 0) hypre_printf("Solver: DS-COGMRES\n");
6976 pcg_precond = NULL;
6977
6978 HYPRE_COGMRESSetPrecond(pcg_solver,
6979 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
6980 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
6981 pcg_precond);
6982 }
6983 else if (solver_id == 46)
6984 {
6985 /* use Euclid preconditioning */
6986 if (myid == 0) hypre_printf("Solver: Euclid-BICGSTAB\n");
6987
6988 HYPRE_EuclidCreate(hypre_MPI_COMM_WORLD, &pcg_precond);
6989
6990 /* note: There are three three methods of setting run-time
6991 parameters for Euclid: (see HYPRE_parcsr_ls.h); here
6992 we'll use what I think is simplest: let Euclid internally
6993 parse the command line.
6994 */
6995 if (eu_level > -1) HYPRE_EuclidSetLevel(pcg_precond, eu_level);
6996 if (eu_ilut) HYPRE_EuclidSetILUT(pcg_precond, eu_ilut);
6997 if (eu_sparse_A) HYPRE_EuclidSetSparseA(pcg_precond, eu_sparse_A);
6998 if (eu_row_scale) HYPRE_EuclidSetRowScale(pcg_precond, eu_row_scale);
6999 if (eu_bj) HYPRE_EuclidSetBJ(pcg_precond, eu_bj);
7000 HYPRE_EuclidSetStats(pcg_precond, eu_stats);
7001 HYPRE_EuclidSetMem(pcg_precond, eu_mem);
7002
7003 /*HYPRE_EuclidSetParams(pcg_precond, argc, argv);*/
7004
7005 HYPRE_COGMRESSetPrecond(pcg_solver,
7006 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSolve,
7007 (HYPRE_PtrToSolverFcn) HYPRE_EuclidSetup,
7008 pcg_precond);
7009 }
7010 else if (solver_id == 74)
7011 {
7012 /* use MGR preconditioning */
7013 if (myid == 0) hypre_printf("Solver: MGR-BICGSTAB\n");
7014
7015 HYPRE_MGRCreate(&pcg_precond);
7016
7017 mgr_num_cindexes = hypre_CTAlloc(HYPRE_Int, mgr_nlevels, HYPRE_MEMORY_HOST);
7018 for(i=0; i<mgr_nlevels; i++)
7019 { /* assume 1 coarse index per level */
7020 mgr_num_cindexes[i] = 1;
7021 }
7022 mgr_cindexes = hypre_CTAlloc(HYPRE_Int*, mgr_nlevels, HYPRE_MEMORY_HOST);
7023 for(i=0; i<mgr_nlevels; i++)
7024 {
7025 mgr_cindexes[i] = hypre_CTAlloc(HYPRE_Int, mgr_num_cindexes[i], HYPRE_MEMORY_HOST);
7026 }
7027 for(i=0; i<mgr_nlevels; i++)
7028 { /* assume coarse point is at index 0 */
7029 mgr_cindexes[i][0] = 2;
7030 }
7031
7032 mgr_reserved_coarse_indexes = hypre_CTAlloc(HYPRE_BigInt, mgr_num_reserved_nodes, HYPRE_MEMORY_HOST);
7033 for(i=0; i<mgr_num_reserved_nodes; i++)
7034 {
7035 /* Generate 'artificial' reserved nodes. Assumes these are ordered last in the system */
7036 mgr_reserved_coarse_indexes[i] = last_local_row - (HYPRE_BigInt) i; //2*i+1;
7037 // hypre_printf("mgr_reserved_coarse_indexes[i] = %b \n", mgr_reserved_coarse_indexes[i]);
7038 }
7039
7040 /* set MGR data by block */
7041 HYPRE_MGRSetCpointsByBlock( pcg_precond, mgr_bsize, mgr_nlevels, mgr_num_cindexes,mgr_cindexes);
7042 /* set reserved coarse nodes */
7043 if(mgr_num_reserved_nodes)HYPRE_MGRSetReservedCoarseNodes(pcg_precond, mgr_num_reserved_nodes, mgr_reserved_coarse_indexes);
7044
7045 /* set intermediate coarse grid strategy */
7046 HYPRE_MGRSetNonCpointsToFpoints(pcg_precond, mgr_non_c_to_f);
7047 /* set F relaxation strategy */
7048 HYPRE_MGRSetFRelaxMethod(pcg_precond, mgr_frelax_method);
7049 /* set relax type for single level F-relaxation and post-relaxation */
7050 HYPRE_MGRSetRelaxType(pcg_precond, mgr_relax_type);
7051 HYPRE_MGRSetNumRelaxSweeps(pcg_precond, mgr_num_relax_sweeps);
7052 /* set interpolation type */
7053 HYPRE_MGRSetRestrictType(pcg_precond, mgr_restrict_type);
7054 HYPRE_MGRSetNumRestrictSweeps(pcg_precond, mgr_num_restrict_sweeps);
7055 HYPRE_MGRSetInterpType(pcg_precond, mgr_interp_type);
7056 HYPRE_MGRSetNumInterpSweeps(pcg_precond, mgr_num_interp_sweeps);
7057 /* set print level */
7058 HYPRE_MGRSetPrintLevel(pcg_precond, 1);
7059 /* set max iterations */
7060 HYPRE_MGRSetMaxIter(pcg_precond, 1);
7061 HYPRE_MGRSetTol(pcg_precond, pc_tol);
7062
7063 HYPRE_MGRSetGlobalsmoothType(pcg_precond, mgr_gsmooth_type);
7064 HYPRE_MGRSetMaxGlobalsmoothIters( pcg_precond, mgr_num_gsmooth_sweeps );
7065
7066 /* create AMG coarse grid solver */
7067
7068 HYPRE_BoomerAMGCreate(&amg_solver);
7069 HYPRE_BoomerAMGSetTol(amg_solver, pc_tol);
7070 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 1);
7071
7072 HYPRE_BoomerAMGSetMaxIter(amg_solver, 1);
7073
7074 HYPRE_BoomerAMGSetCycleType(amg_solver, 1);
7075 HYPRE_BoomerAMGSetNumSweeps(amg_solver, 1);
7076 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 14, 1);
7077 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 14, 2);
7078 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, 9, 3);
7079 /* set the MGR coarse solver. Comment out to use default CG solver in MGR */
7080 HYPRE_MGRSetCoarseSolver( pcg_precond, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, amg_solver);
7081
7082
7083 /* setup MGR-COGMRES solver */
7084 HYPRE_COGMRESSetMaxIter(pcg_solver, mg_max_iter);
7085 HYPRE_COGMRESSetPrecond(pcg_solver,
7086 (HYPRE_PtrToSolverFcn) HYPRE_MGRSolve,
7087 (HYPRE_PtrToSolverFcn) HYPRE_MGRSetup,
7088 pcg_precond);
7089 }
7090 HYPRE_COGMRESSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
7091 (HYPRE_Vector)b, (HYPRE_Vector)x);
7092
7093 hypre_EndTiming(time_index);
7094 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
7095 hypre_FinalizeTiming(time_index);
7096 hypre_ClearTiming();
7097
7098 time_index = hypre_InitializeTiming("COGMRES Solve");
7099 hypre_BeginTiming(time_index);
7100
7101 HYPRE_COGMRESSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
7102 (HYPRE_Vector)b, (HYPRE_Vector)x);
7103
7104 hypre_EndTiming(time_index);
7105 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
7106 hypre_FinalizeTiming(time_index);
7107 hypre_ClearTiming();
7108
7109 HYPRE_COGMRESGetNumIterations(pcg_solver, &num_iterations);
7110 HYPRE_COGMRESGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
7111 #if SECOND_TIME
7112 /* run a second time to check for memory leaks */
7113 HYPRE_ParVectorSetRandomValues(x, 775);
7114 HYPRE_COGMRESSetup(pcg_solver, (HYPRE_Matrix)parcsr_A,
7115 (HYPRE_Vector)b, (HYPRE_Vector)x);
7116 HYPRE_COGMRESSolve(pcg_solver, (HYPRE_Matrix)parcsr_A,
7117 (HYPRE_Vector)b, (HYPRE_Vector)x);
7118 #endif
7119
7120 HYPRE_ParCSRCOGMRESDestroy(pcg_solver);
7121
7122 if (solver_id == 16)
7123 {
7124 HYPRE_BoomerAMGDestroy(pcg_precond);
7125 }
7126 else if (solver_id == 46)
7127 {
7128 HYPRE_EuclidDestroy(pcg_precond);
7129 }
7130 else if(solver_id == 74)
7131 {
7132 /* free memory */
7133 if(mgr_num_cindexes)
7134 hypre_TFree(mgr_num_cindexes, HYPRE_MEMORY_HOST);
7135 mgr_num_cindexes = NULL;
7136
7137 if(mgr_reserved_coarse_indexes)
7138 hypre_TFree(mgr_reserved_coarse_indexes, HYPRE_MEMORY_HOST);
7139 mgr_reserved_coarse_indexes = NULL;
7140
7141 if(mgr_cindexes)
7142 {
7143 for( i=0; i<mgr_nlevels; i++)
7144 {
7145 if(mgr_cindexes[i])
7146 hypre_TFree(mgr_cindexes[i], HYPRE_MEMORY_HOST);
7147 }
7148 hypre_TFree(mgr_cindexes, HYPRE_MEMORY_HOST);
7149 mgr_cindexes = NULL;
7150 }
7151
7152 HYPRE_BoomerAMGDestroy(amg_solver);
7153 HYPRE_MGRDestroy(pcg_precond);
7154 }
7155 if (myid == 0)
7156 {
7157 hypre_printf("\n");
7158 hypre_printf("COGMRES Iterations = %d\n", num_iterations);
7159 hypre_printf("Final COGMRES Relative Residual Norm = %e\n", final_res_norm);
7160 hypre_printf("\n");
7161 }
7162 }
7163 /*-----------------------------------------------------------
7164 * Solve the system using CGNR
7165 *-----------------------------------------------------------*/
7166
7167 if (solver_id == 5 || solver_id == 6)
7168 {
7169 time_index = hypre_InitializeTiming("CGNR Setup");
7170 hypre_BeginTiming(time_index);
7171
7172 HYPRE_ParCSRCGNRCreate(hypre_MPI_COMM_WORLD, &pcg_solver);
7173 HYPRE_CGNRSetMaxIter(pcg_solver, max_iter);
7174 HYPRE_CGNRSetTol(pcg_solver, tol);
7175 HYPRE_CGNRSetLogging(pcg_solver, ioutdat);
7176
7177 if (solver_id == 5)
7178 {
7179 /* use BoomerAMG as preconditioner */
7180 if (myid == 0) hypre_printf("Solver: AMG-CGNR\n");
7181 HYPRE_BoomerAMGCreate(&pcg_precond);
7182 HYPRE_BoomerAMGSetCGCIts(pcg_precond, cgcits);
7183 HYPRE_BoomerAMGSetInterpType(pcg_precond, interp_type);
7184 HYPRE_BoomerAMGSetPostInterpType(pcg_precond, post_interp_type);
7185 HYPRE_BoomerAMGSetNumSamples(pcg_precond, gsmg_samples);
7186 HYPRE_BoomerAMGSetTol(pcg_precond, pc_tol);
7187 HYPRE_BoomerAMGSetCoarsenType(pcg_precond, coarsen_type);
7188 HYPRE_BoomerAMGSetCoarsenCutFactor(pcg_precond, coarsen_cut_factor);
7189 HYPRE_BoomerAMGSetCPoints(pcg_precond, max_levels, num_cpt, cpt_index);
7190 HYPRE_BoomerAMGSetFPoints(pcg_precond, num_fpt, fpt_index);
7191 HYPRE_BoomerAMGSetIsolatedFPoints(pcg_precond, num_isolated_fpt, isolated_fpt_index);
7192 HYPRE_BoomerAMGSetMeasureType(pcg_precond, measure_type);
7193 HYPRE_BoomerAMGSetStrongThreshold(pcg_precond, strong_threshold);
7194 HYPRE_BoomerAMGSetSeqThreshold(pcg_precond, seq_threshold);
7195 HYPRE_BoomerAMGSetRedundant(pcg_precond, redundant);
7196 HYPRE_BoomerAMGSetMaxCoarseSize(pcg_precond, coarse_threshold);
7197 HYPRE_BoomerAMGSetMinCoarseSize(pcg_precond, min_coarse_size);
7198 HYPRE_BoomerAMGSetTruncFactor(pcg_precond, trunc_factor);
7199 HYPRE_BoomerAMGSetPMaxElmts(pcg_precond, P_max_elmts);
7200 HYPRE_BoomerAMGSetJacobiTruncThreshold(pcg_precond, jacobi_trunc_threshold);
7201 HYPRE_BoomerAMGSetSCommPkgSwitch(pcg_precond, S_commpkg_switch);
7202 HYPRE_BoomerAMGSetPrintLevel(pcg_precond, poutdat);
7203 HYPRE_BoomerAMGSetPrintFileName(pcg_precond, "driver.out.log");
7204 HYPRE_BoomerAMGSetMaxIter(pcg_precond, 1);
7205 HYPRE_BoomerAMGSetCycleType(pcg_precond, cycle_type);
7206 HYPRE_BoomerAMGSetFCycle(pcg_precond, fcycle);
7207 HYPRE_BoomerAMGSetNumSweeps(pcg_precond, num_sweeps);
7208 if (relax_type > -1) HYPRE_BoomerAMGSetRelaxType(pcg_precond, relax_type);
7209 if (relax_down > -1)
7210 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_down, 1);
7211 if (relax_up > -1)
7212 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_up, 2);
7213 if (relax_coarse > -1)
7214 HYPRE_BoomerAMGSetCycleRelaxType(pcg_precond, relax_coarse, 3);
7215 HYPRE_BoomerAMGSetAddRelaxType(pcg_precond, add_relax_type);
7216 HYPRE_BoomerAMGSetAddRelaxWt(pcg_precond, add_relax_wt);
7217 HYPRE_BoomerAMGSetChebyOrder(pcg_precond, cheby_order);
7218 HYPRE_BoomerAMGSetChebyFraction(pcg_precond, cheby_fraction);
7219 HYPRE_BoomerAMGSetChebyEigEst(pcg_precond, cheby_eig_est);
7220 HYPRE_BoomerAMGSetChebyVariant(pcg_precond, cheby_variant);
7221 HYPRE_BoomerAMGSetChebyScale(pcg_precond, cheby_scale);
7222 HYPRE_BoomerAMGSetRelaxOrder(pcg_precond, relax_order);
7223 HYPRE_BoomerAMGSetRelaxWt(pcg_precond, relax_wt);
7224 HYPRE_BoomerAMGSetOuterWt(pcg_precond, outer_wt);
7225 if (level_w > -1)
7226 HYPRE_BoomerAMGSetLevelRelaxWt(pcg_precond, relax_wt_level,level_w);
7227 if (level_ow > -1)
7228 HYPRE_BoomerAMGSetLevelOuterWt(pcg_precond,outer_wt_level,level_ow);
7229 HYPRE_BoomerAMGSetSmoothType(pcg_precond, smooth_type);
7230 HYPRE_BoomerAMGSetSmoothNumLevels(pcg_precond, smooth_num_levels);
7231 HYPRE_BoomerAMGSetSmoothNumSweeps(pcg_precond, smooth_num_sweeps);
7232 HYPRE_BoomerAMGSetMaxLevels(pcg_precond, max_levels);
7233 HYPRE_BoomerAMGSetMaxRowSum(pcg_precond, max_row_sum);
7234 HYPRE_BoomerAMGSetDebugFlag(pcg_precond, debug_flag);
7235 HYPRE_BoomerAMGSetNumFunctions(pcg_precond, num_functions);
7236 HYPRE_BoomerAMGSetAggNumLevels(pcg_precond, agg_num_levels);
7237 HYPRE_BoomerAMGSetAggInterpType(pcg_precond, agg_interp_type);
7238 HYPRE_BoomerAMGSetAggTruncFactor(pcg_precond, agg_trunc_factor);
7239 HYPRE_BoomerAMGSetAggP12TruncFactor(pcg_precond, agg_P12_trunc_factor);
7240 HYPRE_BoomerAMGSetAggPMaxElmts(pcg_precond, agg_P_max_elmts);
7241 HYPRE_BoomerAMGSetAggP12MaxElmts(pcg_precond, agg_P12_max_elmts);
7242 HYPRE_BoomerAMGSetNumPaths(pcg_precond, num_paths);
7243 HYPRE_BoomerAMGSetNodal(pcg_precond, nodal);
7244 HYPRE_BoomerAMGSetNodalDiag(pcg_precond, nodal_diag);
7245 HYPRE_BoomerAMGSetVariant(pcg_precond, variant);
7246 HYPRE_BoomerAMGSetOverlap(pcg_precond, overlap);
7247 HYPRE_BoomerAMGSetDomainType(pcg_precond, domain_type);
7248 if (num_functions > 1)
7249 HYPRE_BoomerAMGSetDofFunc(pcg_precond, dof_func);
7250 HYPRE_BoomerAMGSetAdditive(pcg_precond, additive);
7251 HYPRE_BoomerAMGSetMultAdditive(pcg_precond, mult_add);
7252 HYPRE_BoomerAMGSetSimple(pcg_precond, simple);
7253 HYPRE_BoomerAMGSetAddLastLvl(pcg_precond, add_last_lvl);
7254 HYPRE_BoomerAMGSetMultAddPMaxElmts(pcg_precond, add_P_max_elmts);
7255 HYPRE_BoomerAMGSetMultAddTruncFactor(pcg_precond, add_trunc_factor);
7256 HYPRE_BoomerAMGSetRAP2(pcg_precond, rap2);
7257 HYPRE_BoomerAMGSetModuleRAP2(pcg_precond, mod_rap2);
7258 HYPRE_BoomerAMGSetKeepTranspose(pcg_precond, keepTranspose);
7259 #ifdef HYPRE_USING_DSUPERLU
7260 HYPRE_BoomerAMGSetDSLUThreshold(pcg_precond, dslu_threshold);
7261 #endif
7262 if (nongalerk_tol)
7263 {
7264 HYPRE_BoomerAMGSetNonGalerkinTol(pcg_precond, nongalerk_tol[nongalerk_num_tol-1]);
7265 for (i=0; i < nongalerk_num_tol-1; i++)
7266 HYPRE_BoomerAMGSetLevelNonGalerkinTol(pcg_precond, nongalerk_tol[i], i);
7267 }
7268 HYPRE_CGNRSetMaxIter(pcg_solver, mg_max_iter);
7269 HYPRE_CGNRSetPrecond(pcg_solver,
7270 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
7271 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolveT,
7272 (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup,
7273 pcg_precond);
7274 }
7275 else if (solver_id == 6)
7276 {
7277 /* use diagonal scaling as preconditioner */
7278 if (myid == 0) hypre_printf("Solver: DS-CGNR\n");
7279 pcg_precond = NULL;
7280
7281 HYPRE_CGNRSetPrecond(pcg_solver,
7282 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
7283 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScale,
7284 (HYPRE_PtrToSolverFcn) HYPRE_ParCSRDiagScaleSetup,
7285 pcg_precond);
7286 }
7287
7288 HYPRE_CGNRGetPrecond(pcg_solver, &pcg_precond_gotten);
7289 if (pcg_precond_gotten != pcg_precond)
7290 {
7291 hypre_printf("HYPRE_ParCSRCGNRGetPrecond got bad precond\n");
7292 return(-1);
7293 }
7294 else
7295 if (myid == 0)
7296 hypre_printf("HYPRE_ParCSRCGNRGetPrecond got good precond\n");
7297 HYPRE_CGNRSetup(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
7298 (HYPRE_Vector)x);
7299
7300 hypre_EndTiming(time_index);
7301 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
7302 hypre_FinalizeTiming(time_index);
7303 hypre_ClearTiming();
7304
7305 time_index = hypre_InitializeTiming("CGNR Solve");
7306 hypre_BeginTiming(time_index);
7307
7308 HYPRE_CGNRSolve(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
7309 (HYPRE_Vector)x);
7310
7311 hypre_EndTiming(time_index);
7312 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
7313 hypre_FinalizeTiming(time_index);
7314 hypre_ClearTiming();
7315
7316 HYPRE_CGNRGetNumIterations(pcg_solver, &num_iterations);
7317 HYPRE_CGNRGetFinalRelativeResidualNorm(pcg_solver,&final_res_norm);
7318
7319 #if SECOND_TIME
7320 /* run a second time to check for memory leaks */
7321 HYPRE_ParVectorSetRandomValues(x, 775);
7322 HYPRE_CGNRSetup(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
7323 (HYPRE_Vector)x);
7324 HYPRE_CGNRSolve(pcg_solver, (HYPRE_Matrix)parcsr_A, (HYPRE_Vector)b,
7325 (HYPRE_Vector)x);
7326 #endif
7327
7328 HYPRE_ParCSRCGNRDestroy(pcg_solver);
7329
7330 if (solver_id == 5)
7331 {
7332 HYPRE_BoomerAMGDestroy(pcg_precond);
7333 }
7334 if (myid == 0 /* begin lobpcg */ && !lobpcgFlag /* end lobpcg */)
7335 {
7336 hypre_printf("\n");
7337 hypre_printf("Iterations = %d\n", num_iterations);
7338 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
7339 hypre_printf("\n");
7340 }
7341 }
7342
7343 /*-----------------------------------------------------------
7344 * Solve the system using MGR
7345 *-----------------------------------------------------------*/
7346
7347 if (solver_id == 70)
7348 {
7349 if (myid == 0) hypre_printf("Solver: MGR\n");
7350 time_index = hypre_InitializeTiming("MGR Setup");
7351 hypre_BeginTiming(time_index);
7352
7353 HYPRE_Solver mgr_solver;
7354 HYPRE_MGRCreate(&mgr_solver);
7355
7356 mgr_num_cindexes = hypre_CTAlloc(HYPRE_Int, mgr_nlevels, HYPRE_MEMORY_HOST);
7357 for(i=0; i<mgr_nlevels; i++)
7358 { /* assume 1 coarse index per level */
7359 mgr_num_cindexes[i] = 1;
7360 }
7361 mgr_cindexes = hypre_CTAlloc(HYPRE_Int*, mgr_nlevels, HYPRE_MEMORY_HOST);
7362 for(i=0; i<mgr_nlevels; i++)
7363 {
7364 mgr_cindexes[i] = hypre_CTAlloc(HYPRE_Int, mgr_num_cindexes[i], HYPRE_MEMORY_HOST);
7365 }
7366 for(i=0; i<mgr_nlevels; i++)
7367 { /* assume coarse point is at index 0 */
7368 mgr_cindexes[i][0] = 0;
7369 }
7370 mgr_reserved_coarse_indexes = hypre_CTAlloc(HYPRE_BigInt, mgr_num_reserved_nodes, HYPRE_MEMORY_HOST);
7371 for(i=0; i<mgr_num_reserved_nodes; i++)
7372 { /* generate artificial reserved nodes */
7373 mgr_reserved_coarse_indexes[i] = last_local_row - (HYPRE_BigInt) i; //2*i+1;
7374 }
7375
7376 /* set MGR data by block */
7377 HYPRE_MGRSetCpointsByBlock( mgr_solver, mgr_bsize, mgr_nlevels, mgr_num_cindexes,mgr_cindexes);
7378 /* set reserved coarse nodes */
7379 if(mgr_num_reserved_nodes)HYPRE_MGRSetReservedCoarseNodes(mgr_solver, mgr_num_reserved_nodes, mgr_reserved_coarse_indexes);
7380
7381 /* set intermediate coarse grid strategy */
7382 HYPRE_MGRSetNonCpointsToFpoints(mgr_solver, mgr_non_c_to_f);
7383 /* set F relaxation strategy */
7384 HYPRE_MGRSetFRelaxMethod(mgr_solver, mgr_frelax_method);
7385 /* set relax type for single level F-relaxation and post-relaxation */
7386 HYPRE_MGRSetRelaxType(mgr_solver, mgr_relax_type);
7387 HYPRE_MGRSetNumRelaxSweeps(mgr_solver, mgr_num_relax_sweeps);
7388 /* set interpolation type */
7389 HYPRE_MGRSetRestrictType(mgr_solver, mgr_restrict_type);
7390 HYPRE_MGRSetNumRestrictSweeps(mgr_solver, mgr_num_restrict_sweeps);
7391 HYPRE_MGRSetInterpType(mgr_solver, mgr_interp_type);
7392 HYPRE_MGRSetNumInterpSweeps(mgr_solver, mgr_num_interp_sweeps);
7393 /* set print level */
7394 HYPRE_MGRSetPrintLevel(mgr_solver, 3);
7395 /* set max iterations */
7396 HYPRE_MGRSetMaxIter(mgr_solver, max_iter);
7397 HYPRE_MGRSetTol(mgr_solver, tol);
7398
7399 HYPRE_MGRSetGlobalsmoothType(mgr_solver, mgr_gsmooth_type);
7400 HYPRE_MGRSetMaxGlobalsmoothIters( mgr_solver, mgr_num_gsmooth_sweeps );
7401
7402 /* create AMG coarse grid solver */
7403
7404 HYPRE_BoomerAMGCreate(&amg_solver);
7405 /* BM Aug 25, 2006 */
7406 HYPRE_BoomerAMGSetCGCIts(amg_solver, cgcits);
7407 HYPRE_BoomerAMGSetInterpType(amg_solver, 0);
7408 HYPRE_BoomerAMGSetPostInterpType(amg_solver, post_interp_type);
7409 HYPRE_BoomerAMGSetCoarsenType(amg_solver, 6);
7410 HYPRE_BoomerAMGSetTol(amg_solver, tol);
7411 HYPRE_BoomerAMGSetPMaxElmts(amg_solver, 0);
7412 HYPRE_BoomerAMGSetCycleType(amg_solver, cycle_type);
7413 HYPRE_BoomerAMGSetFCycle(amg_solver, fcycle);
7414 HYPRE_BoomerAMGSetNumSweeps(amg_solver, num_sweeps);
7415 HYPRE_BoomerAMGSetRelaxType(amg_solver, 3);
7416 if (relax_down > -1)
7417 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_down, 1);
7418 if (relax_up > -1)
7419 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_up, 2);
7420 if (relax_coarse > -1)
7421 HYPRE_BoomerAMGSetCycleRelaxType(amg_solver, relax_coarse, 3);
7422 HYPRE_BoomerAMGSetRelaxOrder(amg_solver, 1);
7423 HYPRE_BoomerAMGSetMaxLevels(amg_solver, max_levels);
7424 HYPRE_BoomerAMGSetSmoothType(amg_solver, smooth_type);
7425 HYPRE_BoomerAMGSetSmoothNumSweeps(amg_solver, smooth_num_sweeps);
7426 if(mgr_nlevels < 1 || mgr_bsize < 2)
7427 {
7428 HYPRE_BoomerAMGSetMaxIter(amg_solver, max_iter);
7429 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 3);
7430 }
7431 else
7432 {
7433 HYPRE_BoomerAMGSetMaxIter(amg_solver, 1);
7434 HYPRE_BoomerAMGSetTol(amg_solver, 0.0);
7435 HYPRE_BoomerAMGSetPrintLevel(amg_solver, 1);
7436 }
7437 /* set the MGR coarse solver. Comment out to use default CG solver in MGR */
7438 HYPRE_MGRSetCoarseSolver( mgr_solver, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, amg_solver);
7439
7440 /* setup MGR solver */
7441 HYPRE_MGRSetup(mgr_solver, parcsr_A, b, x);
7442
7443 hypre_EndTiming(time_index);
7444 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
7445 hypre_FinalizeTiming(time_index);
7446 hypre_ClearTiming();
7447
7448 time_index = hypre_InitializeTiming("MGR Solve");
7449 hypre_BeginTiming(time_index);
7450
7451 /* MGR solve */
7452 HYPRE_MGRSolve(mgr_solver, parcsr_A, b, x);
7453
7454 hypre_EndTiming(time_index);
7455 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
7456 hypre_FinalizeTiming(time_index);
7457 hypre_ClearTiming();
7458
7459 HYPRE_MGRGetNumIterations(mgr_solver, &num_iterations);
7460 HYPRE_MGRGetFinalRelativeResidualNorm(mgr_solver, &final_res_norm);
7461
7462 if (myid == 0)
7463 {
7464 hypre_printf("\n");
7465 hypre_printf("MGR Iterations = %d\n", num_iterations);
7466 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
7467 hypre_printf("\n");
7468 }
7469
7470 #if SECOND_TIME
7471 /* run a second time to check for memory leaks */
7472 HYPRE_ParVectorSetRandomValues(x, 775);
7473 HYPRE_MGRSetup(mgr_solver, parcsr_A, b, x);
7474 HYPRE_MGRSolve(mgr_solver, parcsr_A, b, x);
7475 #endif
7476
7477 /* free memory */
7478 if(mgr_num_cindexes)
7479 hypre_TFree(mgr_num_cindexes, HYPRE_MEMORY_HOST);
7480 mgr_num_cindexes = NULL;
7481
7482 if(mgr_reserved_coarse_indexes)
7483 hypre_TFree(mgr_reserved_coarse_indexes, HYPRE_MEMORY_HOST);
7484 mgr_reserved_coarse_indexes = NULL;
7485
7486 if(mgr_cindexes)
7487 {
7488 for( i=0; i<mgr_nlevels; i++)
7489 {
7490 if(mgr_cindexes[i])
7491 hypre_TFree(mgr_cindexes[i], HYPRE_MEMORY_HOST);
7492 }
7493 hypre_TFree(mgr_cindexes, HYPRE_MEMORY_HOST);
7494 mgr_cindexes = NULL;
7495 }
7496
7497 HYPRE_BoomerAMGDestroy(amg_solver);
7498 HYPRE_MGRDestroy(mgr_solver);
7499 }
7500
7501 /*-----------------------------------------------------------
7502 * Solve the system using hypre_ILU
7503 *-----------------------------------------------------------*/
7504
7505 if (solver_id == 80)
7506 {
7507 if (myid == 0) hypre_printf("Solver: hypre_ILU\n");
7508 time_index = hypre_InitializeTiming("hypre_ILU Setup");
7509 hypre_BeginTiming(time_index);
7510
7511 HYPRE_Solver ilu_solver;
7512 HYPRE_ILUCreate(&ilu_solver);
7513
7514 /* set ilu type */
7515 HYPRE_ILUSetType(ilu_solver, ilu_type);
7516 /* set level of fill */
7517 HYPRE_ILUSetLevelOfFill(ilu_solver, ilu_lfil);
7518 /* set print level */
7519 HYPRE_ILUSetPrintLevel(ilu_solver, 2);
7520 /* set max iterations */
7521 HYPRE_ILUSetMaxIter(ilu_solver, max_iter);
7522 /* set max number of nonzeros per row */
7523 HYPRE_ILUSetMaxNnzPerRow(ilu_solver,ilu_max_row_nnz);
7524 /* set the droptol */
7525 HYPRE_ILUSetDropThreshold(ilu_solver,ilu_droptol);
7526 HYPRE_ILUSetTol(ilu_solver, tol);
7527 /* set max iterations for Schur system solve */
7528 HYPRE_ILUSetSchurMaxIter( ilu_solver, ilu_schur_max_iter );
7529
7530 /* setting for NSH */
7531 if(ilu_type == 20 || ilu_type == 21)
7532 {
7533 HYPRE_ILUSetNSHDropThreshold( ilu_solver, ilu_nsh_droptol);
7534 }
7535
7536
7537 /* setup hypre_ILU solver */
7538 HYPRE_ILUSetup(ilu_solver, parcsr_A, b, x);
7539
7540 hypre_EndTiming(time_index);
7541 hypre_PrintTiming("Setup phase times", hypre_MPI_COMM_WORLD);
7542 hypre_FinalizeTiming(time_index);
7543 hypre_ClearTiming();
7544
7545 time_index = hypre_InitializeTiming("hypre_ILU Solve");
7546 hypre_BeginTiming(time_index);
7547
7548 /* hypre_ILU solve */
7549 HYPRE_ILUSolve(ilu_solver, parcsr_A, b, x);
7550
7551 hypre_EndTiming(time_index);
7552 hypre_PrintTiming("Solve phase times", hypre_MPI_COMM_WORLD);
7553 hypre_FinalizeTiming(time_index);
7554 hypre_ClearTiming();
7555
7556 HYPRE_ILUGetNumIterations(ilu_solver, &num_iterations);
7557 HYPRE_ILUGetFinalRelativeResidualNorm(ilu_solver, &final_res_norm);
7558
7559 if (myid == 0)
7560 {
7561 hypre_printf("\n");
7562 hypre_printf("hypre_ILU Iterations = %d\n", num_iterations);
7563 hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
7564 hypre_printf("\n");
7565 }
7566
7567 #if SECOND_TIME
7568 /* run a second time to check for memory leaks */
7569 HYPRE_ParVectorSetRandomValues(x, 775);
7570 HYPRE_ILUSetup(ilu_solver, parcsr_A, b, x);
7571 HYPRE_ILUSolve(ilu_solver, parcsr_A, b, x);
7572 #endif
7573
7574 /* free memory */
7575 HYPRE_ILUDestroy(ilu_solver);
7576 }
7577
7578 /*-----------------------------------------------------------
7579 * Print the solution and other info
7580 *-----------------------------------------------------------*/
7581
7582 if (print_system)
7583 {
7584 HYPRE_IJVectorPrint(ij_x, "IJ.out.x");
7585 }
7586
7587 /*-----------------------------------------------------------
7588 * Finalize things
7589 *-----------------------------------------------------------*/
7590
7591 final:
7592
7593 HYPRE_ParVectorDestroy(x0_save);
7594
7595 if (test_ij || build_matrix_type == -1)
7596 {
7597 HYPRE_IJMatrixDestroy(ij_A);
7598 }
7599 else
7600 {
7601 HYPRE_ParCSRMatrixDestroy(parcsr_A);
7602 }
7603
7604 /* for build_rhs_type = 1, 6 or 7, we did not create ij_b - just b*/
7605 if (build_rhs_type == 1 || build_rhs_type == 6 || build_rhs_type == 7)
7606 {
7607 HYPRE_ParVectorDestroy(b);
7608 }
7609 else
7610 {
7611 HYPRE_IJVectorDestroy(ij_b);
7612 }
7613
7614 HYPRE_IJVectorDestroy(ij_x);
7615
7616 if (build_rbm)
7617 {
7618 for (i = 0; i < num_interp_vecs; i++)
7619 {
7620 HYPRE_IJVectorDestroy(ij_rbm[i]);
7621 }
7622 hypre_TFree(ij_rbm, HYPRE_MEMORY_HOST);
7623 hypre_TFree(interp_vecs, HYPRE_MEMORY_HOST);
7624 }
7625 if (nongalerk_tol)
7626 {
7627 hypre_TFree(nongalerk_tol, HYPRE_MEMORY_HOST);
7628 }
7629
7630 if (cpt_index)
7631 {
7632 hypre_TFree(cpt_index, HYPRE_MEMORY_HOST);
7633 }
7634 if (fpt_index)
7635 {
7636 hypre_TFree(fpt_index, HYPRE_MEMORY_HOST);
7637 }
7638 if (isolated_fpt_index)
7639 {
7640 hypre_TFree(isolated_fpt_index, HYPRE_MEMORY_HOST);
7641 }
7642
7643 /*
7644 hypre_FinalizeMemoryDebug();
7645 */
7646
7647 //hypre_PrintMemoryTracker();
7648
7649 /* Finalize Hypre */
7650 HYPRE_Finalize();
7651
7652 /* Finalize MPI */
7653 hypre_MPI_Finalize();
7654
7655 /* when using cuda-memcheck --leak-check full, uncomment this */
7656 #if defined(HYPRE_USING_GPU)
7657 hypre_ResetCudaDevice(hypre_handle());
7658 #endif
7659
7660 return (0);
7661 }
7662
7663 /*----------------------------------------------------------------------
7664 * Build matrix from file. Expects three files on each processor.
7665 * filename.D.n contains the diagonal part, filename.O.n contains
7666 * the offdiagonal part and filename.INFO.n contains global row
7667 * and column numbers, number of columns of offdiagonal matrix
7668 * and the mapping of offdiagonal column numbers to global column numbers.
7669 * Parameters given in command line.
7670 *----------------------------------------------------------------------*/
7671
7672 HYPRE_Int
BuildParFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)7673 BuildParFromFile( HYPRE_Int argc,
7674 char *argv[],
7675 HYPRE_Int arg_index,
7676 HYPRE_ParCSRMatrix *A_ptr )
7677 {
7678 char *filename;
7679
7680 HYPRE_ParCSRMatrix A;
7681
7682 HYPRE_Int myid;
7683
7684 /*-----------------------------------------------------------
7685 * Initialize some stuff
7686 *-----------------------------------------------------------*/
7687
7688 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
7689
7690 /*-----------------------------------------------------------
7691 * Parse command line
7692 *-----------------------------------------------------------*/
7693
7694 if (arg_index < argc)
7695 {
7696 filename = argv[arg_index];
7697 }
7698 else
7699 {
7700 hypre_printf("Error: No filename specified \n");
7701 exit(1);
7702 }
7703
7704 /*-----------------------------------------------------------
7705 * Print driver parameters
7706 *-----------------------------------------------------------*/
7707
7708 if (myid == 0)
7709 {
7710 hypre_printf(" FromFile: %s\n", filename);
7711 }
7712
7713 /*-----------------------------------------------------------
7714 * Generate the matrix
7715 *-----------------------------------------------------------*/
7716
7717 HYPRE_ParCSRMatrixRead(hypre_MPI_COMM_WORLD, filename,&A);
7718
7719 *A_ptr = A;
7720
7721 return (0);
7722 }
7723
7724
7725 /*----------------------------------------------------------------------
7726 * Build rhs from file. Expects two files on each processor.
7727 * filename.n contains the data and
7728 * and filename.INFO.n contains global row
7729 * numbers
7730 *----------------------------------------------------------------------*/
7731
7732 HYPRE_Int
ReadParVectorFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParVector * b_ptr)7733 ReadParVectorFromFile( HYPRE_Int argc,
7734 char *argv[],
7735 HYPRE_Int arg_index,
7736 HYPRE_ParVector *b_ptr )
7737 {
7738 char *filename;
7739
7740 HYPRE_ParVector b;
7741
7742 HYPRE_Int myid;
7743
7744 /*-----------------------------------------------------------
7745 * Initialize some stuff
7746 *-----------------------------------------------------------*/
7747
7748 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
7749
7750 /*-----------------------------------------------------------
7751 * Parse command line
7752 *-----------------------------------------------------------*/
7753
7754 if (arg_index < argc)
7755 {
7756 filename = argv[arg_index];
7757 }
7758 else
7759 {
7760 hypre_printf("Error: No filename specified \n");
7761 exit(1);
7762 }
7763
7764 /*-----------------------------------------------------------
7765 * Print driver parameters
7766 *-----------------------------------------------------------*/
7767
7768 if (myid == 0)
7769 {
7770 hypre_printf(" From ParFile: %s\n", filename);
7771 }
7772
7773 /*-----------------------------------------------------------
7774 * Generate the matrix
7775 *-----------------------------------------------------------*/
7776
7777 HYPRE_ParVectorRead(hypre_MPI_COMM_WORLD, filename, &b);
7778
7779 *b_ptr = b;
7780
7781 return (0);
7782 }
7783
7784
7785
7786
7787 /*----------------------------------------------------------------------
7788 * Build standard 7-point laplacian in 3D with grid and anisotropy.
7789 * Parameters given in command line.
7790 *----------------------------------------------------------------------*/
7791
7792 HYPRE_Int
BuildParLaplacian(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)7793 BuildParLaplacian( HYPRE_Int argc,
7794 char *argv[],
7795 HYPRE_Int arg_index,
7796 HYPRE_ParCSRMatrix *A_ptr )
7797 {
7798 HYPRE_BigInt nx, ny, nz;
7799 HYPRE_Int P, Q, R;
7800 HYPRE_Real cx, cy, cz;
7801
7802 HYPRE_ParCSRMatrix A;
7803
7804 HYPRE_Int num_procs, myid;
7805 HYPRE_Int p, q, r;
7806 HYPRE_Int num_fun = 1;
7807 HYPRE_Real *values;
7808 HYPRE_Real *mtrx;
7809
7810 HYPRE_Real ep = .1;
7811
7812 HYPRE_Int system_vcoef = 0;
7813 HYPRE_Int sys_opt = 0;
7814 HYPRE_Int vcoef_opt = 0;
7815
7816
7817 /*-----------------------------------------------------------
7818 * Initialize some stuff
7819 *-----------------------------------------------------------*/
7820
7821 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
7822 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
7823
7824 /*-----------------------------------------------------------
7825 * Set defaults
7826 *-----------------------------------------------------------*/
7827
7828 nx = 10;
7829 ny = 10;
7830 nz = 10;
7831
7832 P = 1;
7833 Q = num_procs;
7834 R = 1;
7835
7836 cx = 1.;
7837 cy = 1.;
7838 cz = 1.;
7839
7840 /*-----------------------------------------------------------
7841 * Parse command line
7842 *-----------------------------------------------------------*/
7843 arg_index = 0;
7844 while (arg_index < argc)
7845 {
7846 if ( strcmp(argv[arg_index], "-n") == 0 )
7847 {
7848 arg_index++;
7849 nx = atoi(argv[arg_index++]);
7850 ny = atoi(argv[arg_index++]);
7851 nz = atoi(argv[arg_index++]);
7852 }
7853 else if ( strcmp(argv[arg_index], "-P") == 0 )
7854 {
7855 arg_index++;
7856 P = atoi(argv[arg_index++]);
7857 Q = atoi(argv[arg_index++]);
7858 R = atoi(argv[arg_index++]);
7859 }
7860 else if ( strcmp(argv[arg_index], "-c") == 0 )
7861 {
7862 arg_index++;
7863 cx = atof(argv[arg_index++]);
7864 cy = atof(argv[arg_index++]);
7865 cz = atof(argv[arg_index++]);
7866 }
7867 else if ( strcmp(argv[arg_index], "-sysL") == 0 )
7868 {
7869 arg_index++;
7870 num_fun = atoi(argv[arg_index++]);
7871 }
7872 else if ( strcmp(argv[arg_index], "-sysL_opt") == 0 )
7873 {
7874 arg_index++;
7875 sys_opt = atoi(argv[arg_index++]);
7876 }
7877 else if ( strcmp(argv[arg_index], "-sys_vcoef") == 0 )
7878 {
7879 /* have to use -sysL for this to */
7880 arg_index++;
7881 system_vcoef = 1;
7882 }
7883 else if ( strcmp(argv[arg_index], "-sys_vcoef_opt") == 0 )
7884 {
7885 arg_index++;
7886 vcoef_opt = atoi(argv[arg_index++]);
7887 }
7888 else if ( strcmp(argv[arg_index], "-ep") == 0 )
7889 {
7890 arg_index++;
7891 ep = atof(argv[arg_index++]);
7892 }
7893 else
7894 {
7895 arg_index++;
7896 }
7897 }
7898
7899 /*-----------------------------------------------------------
7900 * Check a few things
7901 *-----------------------------------------------------------*/
7902
7903 if ((P*Q*R) != num_procs)
7904 {
7905 hypre_printf("Error: Invalid number of processors or processor topology \n");
7906 exit(1);
7907 }
7908
7909 /*-----------------------------------------------------------
7910 * Print driver parameters
7911 *-----------------------------------------------------------*/
7912
7913 if (myid == 0)
7914 {
7915 hypre_printf(" Laplacian: num_fun = %d\n", num_fun);
7916 hypre_printf(" (nx, ny, nz) = (%b, %b, %b)\n", nx, ny, nz);
7917 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
7918 hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
7919 }
7920
7921 /*-----------------------------------------------------------
7922 * Set up the grid structure
7923 *-----------------------------------------------------------*/
7924
7925 /* compute p,q,r from P,Q,R and myid */
7926 p = myid % P;
7927 q = (( myid - p)/P) % Q;
7928 r = ( myid - p - P*q)/( P*Q );
7929
7930 /*-----------------------------------------------------------
7931 * Generate the matrix
7932 *-----------------------------------------------------------*/
7933
7934 values = hypre_CTAlloc(HYPRE_Real, 4, HYPRE_MEMORY_HOST);
7935
7936 values[1] = -cx;
7937 values[2] = -cy;
7938 values[3] = -cz;
7939
7940 values[0] = 0.;
7941 if (nx > 1)
7942 {
7943 values[0] += 2.0*cx;
7944 }
7945 if (ny > 1)
7946 {
7947 values[0] += 2.0*cy;
7948 }
7949 if (nz > 1)
7950 {
7951 values[0] += 2.0*cz;
7952 }
7953
7954 if (num_fun == 1)
7955 A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD,
7956 nx, ny, nz, P, Q, R, p, q, r, values);
7957 else
7958 {
7959 mtrx = hypre_CTAlloc(HYPRE_Real, num_fun*num_fun, HYPRE_MEMORY_HOST);
7960
7961 if (num_fun == 2)
7962 {
7963 if (sys_opt ==1) /* identity */
7964 {
7965 mtrx[0] = 1.0;
7966 mtrx[1] = 0.0;
7967 mtrx[2] = 0.0;
7968 mtrx[3] = 1.0;
7969 }
7970 else if (sys_opt ==2)
7971 {
7972 mtrx[0] = 1.0;
7973 mtrx[1] = 0.0;
7974 mtrx[2] = 0.0;
7975 mtrx[3] = 20.0;
7976 }
7977 else if (sys_opt ==3) /* similar to barry's talk - ex1 */
7978 {
7979 mtrx[0] = 1.0;
7980 mtrx[1] = 2.0;
7981 mtrx[2] = 2.0;
7982 mtrx[3] = 1.0;
7983 }
7984 else if (sys_opt ==4) /* can use with vcoef to get barry's ex*/
7985 {
7986 mtrx[0] = 1.0;
7987 mtrx[1] = 1.0;
7988 mtrx[2] = 1.0;
7989 mtrx[3] = 1.0;
7990 }
7991 else if (sys_opt ==5) /* barry's talk - ex1 */
7992 {
7993 mtrx[0] = 1.0;
7994 mtrx[1] = 1.1;
7995 mtrx[2] = 1.1;
7996 mtrx[3] = 1.0;
7997 }
7998 else if (sys_opt ==6) /* */
7999 {
8000 mtrx[0] = 1.1;
8001 mtrx[1] = 1.0;
8002 mtrx[2] = 1.0;
8003 mtrx[3] = 1.1;
8004 }
8005
8006 else /* == 0 */
8007 {
8008 mtrx[0] = 2;
8009 mtrx[1] = 1;
8010 mtrx[2] = 1;
8011 mtrx[3] = 2;
8012 }
8013 }
8014 else if (num_fun == 3)
8015 {
8016 if (sys_opt ==1)
8017 {
8018 mtrx[0] = 1.0;
8019 mtrx[1] = 0.0;
8020 mtrx[2] = 0.0;
8021 mtrx[3] = 0.0;
8022 mtrx[4] = 1.0;
8023 mtrx[5] = 0.0;
8024 mtrx[6] = 0.0;
8025 mtrx[7] = 0.0;
8026 mtrx[8] = 1.0;
8027 }
8028 else if (sys_opt ==2)
8029 {
8030 mtrx[0] = 1.0;
8031 mtrx[1] = 0.0;
8032 mtrx[2] = 0.0;
8033 mtrx[3] = 0.0;
8034 mtrx[4] = 20.0;
8035 mtrx[5] = 0.0;
8036 mtrx[6] = 0.0;
8037 mtrx[7] = 0.0;
8038 mtrx[8] =.01;
8039 }
8040 else if (sys_opt ==3)
8041 {
8042 mtrx[0] = 1.01;
8043 mtrx[1] = 1;
8044 mtrx[2] = 0.0;
8045 mtrx[3] = 1;
8046 mtrx[4] = 2;
8047 mtrx[5] = 1;
8048 mtrx[6] = 0.0;
8049 mtrx[7] = 1;
8050 mtrx[8] = 1.01;
8051 }
8052 else if (sys_opt ==4) /* barry ex4 */
8053 {
8054 mtrx[0] = 3;
8055 mtrx[1] = 1;
8056 mtrx[2] = 0.0;
8057 mtrx[3] = 1;
8058 mtrx[4] = 4;
8059 mtrx[5] = 2;
8060 mtrx[6] = 0.0;
8061 mtrx[7] = 2;
8062 mtrx[8] = .25;
8063 }
8064 else /* == 0 */
8065 {
8066 mtrx[0] = 2.0;
8067 mtrx[1] = 1.0;
8068 mtrx[2] = 0.0;
8069 mtrx[3] = 1.0;
8070 mtrx[4] = 2.0;
8071 mtrx[5] = 1.0;
8072 mtrx[6] = 0.0;
8073 mtrx[7] = 1.0;
8074 mtrx[8] = 2.0;
8075 }
8076
8077 }
8078 else if (num_fun == 4)
8079 {
8080 mtrx[0] = 1.01;
8081 mtrx[1] = 1;
8082 mtrx[2] = 0.0;
8083 mtrx[3] = 0.0;
8084 mtrx[4] = 1;
8085 mtrx[5] = 2;
8086 mtrx[6] = 1;
8087 mtrx[7] = 0.0;
8088 mtrx[8] = 0.0;
8089 mtrx[9] = 1;
8090 mtrx[10] = 1.01;
8091 mtrx[11] = 0.0;
8092 mtrx[12] = 2;
8093 mtrx[13] = 1;
8094 mtrx[14] = 0.0;
8095 mtrx[15] = 1;
8096 }
8097
8098 if (!system_vcoef)
8099 {
8100 A = (HYPRE_ParCSRMatrix) GenerateSysLaplacian(hypre_MPI_COMM_WORLD,
8101 nx, ny, nz, P, Q,
8102 R, p, q, r, num_fun, mtrx, values);
8103 }
8104 else
8105 {
8106 HYPRE_Real *mtrx_values;
8107
8108 mtrx_values = hypre_CTAlloc(HYPRE_Real, num_fun*num_fun*4, HYPRE_MEMORY_HOST);
8109
8110 if (num_fun == 2)
8111 {
8112 if (vcoef_opt == 1)
8113 {
8114 /* Barry's talk * - must also have sys_opt = 4, all fail */
8115 mtrx[0] = 1.0;
8116 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .10, 1.0, 0, mtrx_values);
8117
8118 mtrx[1] = 1.0;
8119 SetSysVcoefValues(num_fun, nx, ny, nz, .1, 1.0, 1.0, 1, mtrx_values);
8120
8121 mtrx[2] = 1.0;
8122 SetSysVcoefValues(num_fun, nx, ny, nz, .01, 1.0, 1.0, 2, mtrx_values);
8123
8124 mtrx[3] = 1.0;
8125 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
8126 }
8127 else if (vcoef_opt == 2)
8128 {
8129 /* Barry's talk * - ex2 - if have sys-opt = 4*/
8130 mtrx[0] = 1.0;
8131 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .010, 1.0, 0, mtrx_values);
8132
8133 mtrx[1] = 200.0;
8134 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
8135
8136 mtrx[2] = 200.0;
8137 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 2, mtrx_values);
8138
8139 mtrx[3] = 1.0;
8140 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
8141 }
8142 else if (vcoef_opt == 3) /* use with default sys_opt - ulrike ex 3*/
8143 {
8144
8145 /* mtrx[0] */
8146 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
8147
8148 /* mtrx[1] */
8149 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
8150
8151 /* mtrx[2] */
8152 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
8153
8154 /* mtrx[3] */
8155 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 3, mtrx_values);
8156 }
8157 else if (vcoef_opt == 4) /* use with default sys_opt - ulrike ex 4*/
8158 {
8159 HYPRE_Real ep2 = ep;
8160
8161 /* mtrx[0] */
8162 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
8163
8164 /* mtrx[1] */
8165 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep*1.0, 1.0, 1, mtrx_values);
8166
8167 /* mtrx[2] */
8168 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
8169
8170 /* mtrx[3] */
8171 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep2*1.0, 1.0, 3, mtrx_values);
8172 }
8173 else if (vcoef_opt == 5) /* use with default sys_opt - */
8174 {
8175 HYPRE_Real alp, beta;
8176 alp = .001;
8177 beta = 10;
8178
8179 /* mtrx[0] */
8180 SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 0, mtrx_values);
8181
8182 /* mtrx[1] */
8183 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 1, mtrx_values);
8184
8185 /* mtrx[2] */
8186 SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 2, mtrx_values);
8187
8188 /* mtrx[3] */
8189 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 3, mtrx_values);
8190 }
8191 else /* = 0 */
8192 {
8193 /* mtrx[0] */
8194 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 0, mtrx_values);
8195
8196 /* mtrx[1] */
8197 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 2.0, 1.0, 1, mtrx_values);
8198
8199 /* mtrx[2] */
8200 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, 1.0, 0.0, 2, mtrx_values);
8201
8202 /* mtrx[3] */
8203 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 3.0, 1.0, 3, mtrx_values);
8204 }
8205 }
8206 else if (num_fun == 3)
8207 {
8208 mtrx[0] = 1;
8209 SetSysVcoefValues(num_fun, nx, ny, nz, 1, .01, 1, 0, mtrx_values);
8210
8211 mtrx[1] = 1;
8212 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 1, mtrx_values);
8213
8214 mtrx[2] = 0.0;
8215 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 2, mtrx_values);
8216
8217 mtrx[3] = 1;
8218 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 3, mtrx_values);
8219
8220 mtrx[4] = 1;
8221 SetSysVcoefValues(num_fun, nx, ny, nz, 2, .02, 1, 4, mtrx_values);
8222
8223 mtrx[5] = 2;
8224 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 5, mtrx_values);
8225
8226 mtrx[6] = 0.0;
8227 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 6, mtrx_values);
8228
8229 mtrx[7] = 2;
8230 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 7, mtrx_values);
8231
8232 mtrx[8] = 1;
8233 SetSysVcoefValues(num_fun, nx, ny, nz, 1.5, .04, 1, 8, mtrx_values);
8234 }
8235
8236 A = (HYPRE_ParCSRMatrix) GenerateSysLaplacianVCoef(hypre_MPI_COMM_WORLD,
8237 nx, ny, nz, P, Q,
8238 R, p, q, r, num_fun, mtrx, mtrx_values);
8239
8240 hypre_TFree(mtrx_values, HYPRE_MEMORY_HOST);
8241 }
8242
8243 hypre_TFree(mtrx, HYPRE_MEMORY_HOST);
8244 }
8245
8246 hypre_TFree(values, HYPRE_MEMORY_HOST);
8247
8248 *A_ptr = A;
8249
8250 return (0);
8251 }
8252
8253 /*----------------------------------------------------------------------
8254 * returns the sign of a real number
8255 * 1 : positive
8256 * 0 : zero
8257 * -1 : negative
8258 *----------------------------------------------------------------------*/
sign_double(HYPRE_Real a)8259 static inline HYPRE_Int sign_double(HYPRE_Real a)
8260 {
8261 return ( (0.0 < a) - (0.0 > a) );
8262 }
8263
8264 /*----------------------------------------------------------------------
8265 * Build standard 7-point convection-diffusion operator
8266 * Parameters given in command line.
8267 * Operator:
8268 *
8269 * -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f
8270 *
8271 *----------------------------------------------------------------------*/
8272
8273 HYPRE_Int
BuildParDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)8274 BuildParDifConv( HYPRE_Int argc,
8275 char *argv[],
8276 HYPRE_Int arg_index,
8277 HYPRE_ParCSRMatrix *A_ptr)
8278 {
8279 HYPRE_BigInt nx, ny, nz;
8280 HYPRE_Int P, Q, R;
8281 HYPRE_Real cx, cy, cz;
8282 HYPRE_Real ax, ay, az, atype;
8283 HYPRE_Real hinx,hiny,hinz;
8284 HYPRE_Int sign_prod;
8285
8286 HYPRE_ParCSRMatrix A;
8287
8288 HYPRE_Int num_procs, myid;
8289 HYPRE_Int p, q, r;
8290 HYPRE_Real *values;
8291
8292 /*-----------------------------------------------------------
8293 * Initialize some stuff
8294 *-----------------------------------------------------------*/
8295
8296 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
8297 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
8298
8299 /*-----------------------------------------------------------
8300 * Set defaults
8301 *-----------------------------------------------------------*/
8302
8303 nx = 10;
8304 ny = 10;
8305 nz = 10;
8306
8307 P = 1;
8308 Q = num_procs;
8309 R = 1;
8310
8311 cx = 1.;
8312 cy = 1.;
8313 cz = 1.;
8314
8315 ax = 1.;
8316 ay = 1.;
8317 az = 1.;
8318
8319 atype = 0;
8320
8321 /*-----------------------------------------------------------
8322 * Parse command line
8323 *-----------------------------------------------------------*/
8324 arg_index = 0;
8325 while (arg_index < argc)
8326 {
8327 if ( strcmp(argv[arg_index], "-n") == 0 )
8328 {
8329 arg_index++;
8330 nx = atoi(argv[arg_index++]);
8331 ny = atoi(argv[arg_index++]);
8332 nz = atoi(argv[arg_index++]);
8333 }
8334 else if ( strcmp(argv[arg_index], "-P") == 0 )
8335 {
8336 arg_index++;
8337 P = atoi(argv[arg_index++]);
8338 Q = atoi(argv[arg_index++]);
8339 R = atoi(argv[arg_index++]);
8340 }
8341 else if ( strcmp(argv[arg_index], "-c") == 0 )
8342 {
8343 arg_index++;
8344 cx = atof(argv[arg_index++]);
8345 cy = atof(argv[arg_index++]);
8346 cz = atof(argv[arg_index++]);
8347 }
8348 else if ( strcmp(argv[arg_index], "-a") == 0 )
8349 {
8350 arg_index++;
8351 ax = atof(argv[arg_index++]);
8352 ay = atof(argv[arg_index++]);
8353 az = atof(argv[arg_index++]);
8354 }
8355 else if ( strcmp(argv[arg_index], "-atype") == 0 )
8356 {
8357 arg_index++;
8358 atype = atoi(argv[arg_index++]);
8359 }
8360 else
8361 {
8362 arg_index++;
8363 }
8364 }
8365
8366 /*-----------------------------------------------------------
8367 * Check a few things
8368 *-----------------------------------------------------------*/
8369
8370 if ((P*Q*R) != num_procs)
8371 {
8372 hypre_printf("Error: Invalid number of processors or processor topology \n");
8373 exit(1);
8374 }
8375
8376 /*-----------------------------------------------------------
8377 * Print driver parameters
8378 *-----------------------------------------------------------*/
8379
8380 if (myid == 0)
8381 {
8382 hypre_printf(" Convection-Diffusion: \n");
8383 hypre_printf(" -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f\n");
8384 hypre_printf(" (nx, ny, nz) = (%b, %b, %b)\n", nx, ny, nz);
8385 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
8386 hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n", cx, cy, cz);
8387 hypre_printf(" (ax, ay, az) = (%f, %f, %f)\n\n", ax, ay, az);
8388 }
8389
8390 /*-----------------------------------------------------------
8391 * Set up the grid structure
8392 *-----------------------------------------------------------*/
8393
8394 /* compute p,q,r from P,Q,R and myid */
8395 p = myid % P;
8396 q = (( myid - p)/P) % Q;
8397 r = ( myid - p - P*q)/( P*Q );
8398
8399 hinx = 1./(HYPRE_Real)(nx+1);
8400 hiny = 1./(HYPRE_Real)(ny+1);
8401 hinz = 1./(HYPRE_Real)(nz+1);
8402
8403 /*-----------------------------------------------------------
8404 * Generate the matrix
8405 *-----------------------------------------------------------*/
8406 /* values[7]:
8407 * [0]: center
8408 * [1]: X-
8409 * [2]: Y-
8410 * [3]: Z-
8411 * [4]: X+
8412 * [5]: Y+
8413 * [6]: Z+
8414 */
8415 values = hypre_CTAlloc(HYPRE_Real, 7, HYPRE_MEMORY_HOST);
8416
8417 values[0] = 0.;
8418
8419 if (0 == atype) /* forward scheme for conv */
8420 {
8421 values[1] = -cx/(hinx*hinx);
8422 values[2] = -cy/(hiny*hiny);
8423 values[3] = -cz/(hinz*hinz);
8424 values[4] = -cx/(hinx*hinx) + ax/hinx;
8425 values[5] = -cy/(hiny*hiny) + ay/hiny;
8426 values[6] = -cz/(hinz*hinz) + az/hinz;
8427
8428 if (nx > 1)
8429 {
8430 values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
8431 }
8432 if (ny > 1)
8433 {
8434 values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
8435 }
8436 if (nz > 1)
8437 {
8438 values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
8439 }
8440 }
8441 else if (1 == atype) /* backward scheme for conv */
8442 {
8443 values[1] = -cx/(hinx*hinx) - ax/hinx;
8444 values[2] = -cy/(hiny*hiny) - ay/hiny;
8445 values[3] = -cz/(hinz*hinz) - az/hinz;
8446 values[4] = -cx/(hinx*hinx);
8447 values[5] = -cy/(hiny*hiny);
8448 values[6] = -cz/(hinz*hinz);
8449
8450 if (nx > 1)
8451 {
8452 values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
8453 }
8454 if (ny > 1)
8455 {
8456 values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
8457 }
8458 if (nz > 1)
8459 {
8460 values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
8461 }
8462 }
8463 else if (3 == atype) /* upwind scheme */
8464 {
8465 sign_prod = sign_double(cx) * sign_double(ax);
8466 if (sign_prod == 1) /* same sign use back scheme */
8467 {
8468 values[1] = -cx/(hinx*hinx) - ax/hinx;
8469 values[4] = -cx/(hinx*hinx);
8470 if (nx > 1)
8471 {
8472 values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
8473 }
8474 }
8475 else /* diff sign use forward scheme */
8476 {
8477 values[1] = -cx/(hinx*hinx);
8478 values[4] = -cx/(hinx*hinx) + ax/hinx;
8479 if (nx > 1)
8480 {
8481 values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
8482 }
8483 }
8484
8485 sign_prod = sign_double(cy) * sign_double(ay);
8486 if (sign_prod == 1) /* same sign use back scheme */
8487 {
8488 values[2] = -cy/(hiny*hiny) - ay/hiny;
8489 values[5] = -cy/(hiny*hiny);
8490 if (ny > 1)
8491 {
8492 values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
8493 }
8494 }
8495 else /* diff sign use forward scheme */
8496 {
8497 values[2] = -cy/(hiny*hiny);
8498 values[5] = -cy/(hiny*hiny) + ay/hiny;
8499 if (ny > 1)
8500 {
8501 values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
8502 }
8503 }
8504
8505 sign_prod = sign_double(cz) * sign_double(az);
8506 if (sign_prod == 1) /* same sign use back scheme */
8507 {
8508 values[3] = -cz/(hinz*hinz) - az/hinz;
8509 values[6] = -cz/(hinz*hinz);
8510 if (nz > 1)
8511 {
8512 values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
8513 }
8514 }
8515 else /* diff sign use forward scheme */
8516 {
8517 values[3] = -cz/(hinz*hinz);
8518 values[6] = -cz/(hinz*hinz) + az/hinz;
8519 if (nz > 1)
8520 {
8521 values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
8522 }
8523 }
8524 }
8525 else /* centered difference scheme */
8526 {
8527 values[1] = -cx/(hinx*hinx) - ax/(2.*hinx);
8528 values[2] = -cy/(hiny*hiny) - ay/(2.*hiny);
8529 values[3] = -cz/(hinz*hinz) - az/(2.*hinz);
8530 values[4] = -cx/(hinx*hinx) + ax/(2.*hinx);
8531 values[5] = -cy/(hiny*hiny) + ay/(2.*hiny);
8532 values[6] = -cz/(hinz*hinz) + az/(2.*hinz);
8533
8534 if (nx > 1)
8535 {
8536 values[0] += 2.0*cx/(hinx*hinx);
8537 }
8538 if (ny > 1)
8539 {
8540 values[0] += 2.0*cy/(hiny*hiny);
8541 }
8542 if (nz > 1)
8543 {
8544 values[0] += 2.0*cz/(hinz*hinz);
8545 }
8546 }
8547
8548 A = (HYPRE_ParCSRMatrix) GenerateDifConv(hypre_MPI_COMM_WORLD,
8549 nx, ny, nz, P, Q, R, p, q, r, values);
8550
8551 hypre_TFree(values, HYPRE_MEMORY_HOST);
8552
8553 *A_ptr = A;
8554
8555 return (0);
8556 }
8557
8558 /*----------------------------------------------------------------------
8559 * Build matrix from one file on Proc. 0. Expects matrix to be in
8560 * CSR format. Distributes matrix across processors giving each about
8561 * the same number of rows.
8562 * Parameters given in command line.
8563 *----------------------------------------------------------------------*/
8564
8565 HYPRE_Int
BuildParFromOneFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_Int num_functions,HYPRE_ParCSRMatrix * A_ptr)8566 BuildParFromOneFile( HYPRE_Int argc,
8567 char *argv[],
8568 HYPRE_Int arg_index,
8569 HYPRE_Int num_functions,
8570 HYPRE_ParCSRMatrix *A_ptr )
8571 {
8572 char *filename;
8573
8574 HYPRE_CSRMatrix A_CSR = NULL;
8575 HYPRE_BigInt *row_part = NULL;
8576 HYPRE_BigInt *col_part = NULL;
8577
8578 HYPRE_Int myid, numprocs;
8579 HYPRE_Int i, rest, size, num_nodes, num_dofs;
8580
8581 /*-----------------------------------------------------------
8582 * Initialize some stuff
8583 *-----------------------------------------------------------*/
8584
8585 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
8586 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &numprocs );
8587
8588 /*-----------------------------------------------------------
8589 * Parse command line
8590 *-----------------------------------------------------------*/
8591
8592 if (arg_index < argc)
8593 {
8594 filename = argv[arg_index];
8595 }
8596 else
8597 {
8598 hypre_printf("Error: No filename specified \n");
8599 exit(1);
8600 }
8601
8602 /*-----------------------------------------------------------
8603 * Print driver parameters
8604 *-----------------------------------------------------------*/
8605
8606 if (myid == 0)
8607 {
8608 hypre_printf(" FromFile: %s\n", filename);
8609
8610 /*-----------------------------------------------------------
8611 * Generate the matrix
8612 *-----------------------------------------------------------*/
8613
8614 A_CSR = HYPRE_CSRMatrixRead(filename);
8615 }
8616
8617 if (myid == 0 && num_functions > 1)
8618 {
8619 HYPRE_CSRMatrixGetNumRows(A_CSR, &num_dofs);
8620 num_nodes = num_dofs/num_functions;
8621 if (num_dofs == num_functions*num_nodes)
8622 {
8623 row_part = hypre_CTAlloc(HYPRE_BigInt, numprocs+1, HYPRE_MEMORY_HOST);
8624
8625 row_part[0] = 0;
8626 size = num_nodes/numprocs;
8627 rest = num_nodes-size*numprocs;
8628 for (i = 0; i < rest; i++)
8629 {
8630 row_part[i+1] = row_part[i] + (size + 1)*num_functions;
8631 }
8632 for (i = rest; i < numprocs; i++)
8633 {
8634 row_part[i+1] = row_part[i]+size*num_functions;
8635 }
8636
8637 col_part = row_part;
8638 }
8639 }
8640
8641 HYPRE_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part, A_ptr);
8642
8643 if (myid == 0)
8644 {
8645 HYPRE_CSRMatrixDestroy(A_CSR);
8646 }
8647
8648 return (0);
8649 }
8650
8651 /*----------------------------------------------------------------------
8652 * Build Function array from files on different processors
8653 *----------------------------------------------------------------------*/
8654
8655 HYPRE_Int
BuildFuncsFromFiles(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix parcsr_A,HYPRE_Int ** dof_func_ptr)8656 BuildFuncsFromFiles( HYPRE_Int argc,
8657 char *argv[],
8658 HYPRE_Int arg_index,
8659 HYPRE_ParCSRMatrix parcsr_A,
8660 HYPRE_Int **dof_func_ptr )
8661 {
8662 /*----------------------------------------------------------------------
8663 * Build Function array from files on different processors
8664 *----------------------------------------------------------------------*/
8665
8666 hypre_printf (" Feature is not implemented yet!\n");
8667 return(0);
8668
8669 }
8670
8671 /*----------------------------------------------------------------------
8672 * Build Function array from file on master process
8673 *----------------------------------------------------------------------*/
8674
8675 HYPRE_Int
BuildFuncsFromOneFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix parcsr_A,HYPRE_Int ** dof_func_ptr)8676 BuildFuncsFromOneFile( HYPRE_Int argc,
8677 char *argv[],
8678 HYPRE_Int arg_index,
8679 HYPRE_ParCSRMatrix parcsr_A,
8680 HYPRE_Int **dof_func_ptr )
8681 {
8682 char *filename;
8683
8684 HYPRE_Int myid, num_procs;
8685 HYPRE_Int first_row_index;
8686 HYPRE_Int last_row_index;
8687 HYPRE_BigInt *partitioning;
8688 HYPRE_Int *dof_func;
8689 HYPRE_Int *dof_func_local;
8690 HYPRE_Int i, j;
8691 HYPRE_Int local_size;
8692 HYPRE_Int global_size;
8693 hypre_MPI_Request *requests;
8694 hypre_MPI_Status *status, status0;
8695 MPI_Comm comm;
8696
8697 /*-----------------------------------------------------------
8698 * Initialize some stuff
8699 *-----------------------------------------------------------*/
8700
8701 comm = hypre_MPI_COMM_WORLD;
8702 hypre_MPI_Comm_rank(comm, &myid );
8703 hypre_MPI_Comm_size(comm, &num_procs );
8704
8705 /*-----------------------------------------------------------
8706 * Parse command line
8707 *-----------------------------------------------------------*/
8708
8709 if (arg_index < argc)
8710 {
8711 filename = argv[arg_index];
8712 }
8713 else
8714 {
8715 hypre_printf("Error: No filename specified \n");
8716 exit(1);
8717 }
8718
8719 /*-----------------------------------------------------------
8720 * Print driver parameters
8721 *-----------------------------------------------------------*/
8722
8723 if (myid == 0)
8724 {
8725 FILE *fp;
8726 hypre_printf(" Funcs FromFile: %s\n", filename);
8727
8728 /*-----------------------------------------------------------
8729 * read in the data
8730 *-----------------------------------------------------------*/
8731 fp = fopen(filename, "r");
8732
8733 hypre_fscanf(fp, "%d", &global_size);
8734 dof_func = hypre_CTAlloc(HYPRE_Int, global_size, HYPRE_MEMORY_HOST);
8735
8736 for (j = 0; j < global_size; j++)
8737 {
8738 hypre_fscanf(fp, "%d", &dof_func[j]);
8739 }
8740
8741 fclose(fp);
8742 }
8743
8744 HYPRE_ParCSRMatrixGetGlobalRowPartitioning(parcsr_A, 0, &partitioning);
8745 first_row_index = hypre_ParCSRMatrixFirstRowIndex(parcsr_A);
8746 last_row_index = hypre_ParCSRMatrixLastRowIndex(parcsr_A);
8747 local_size = last_row_index - first_row_index + 1;
8748 dof_func_local = hypre_CTAlloc(HYPRE_Int, local_size, HYPRE_MEMORY_HOST);
8749 if (myid == 0)
8750 {
8751 requests = hypre_CTAlloc(hypre_MPI_Request, num_procs-1, HYPRE_MEMORY_HOST);
8752 status = hypre_CTAlloc(hypre_MPI_Status, num_procs-1, HYPRE_MEMORY_HOST);
8753 for (i=1; i < num_procs; i++)
8754 {
8755 hypre_MPI_Isend(&dof_func[partitioning[i]],
8756 (partitioning[i+1] - partitioning[i]),
8757 HYPRE_MPI_INT, i, 0, comm, &requests[i-1]);
8758 }
8759 for (i=0; i < local_size; i++)
8760 {
8761 dof_func_local[i] = dof_func[i];
8762 }
8763 hypre_MPI_Waitall(num_procs-1,requests, status);
8764 hypre_TFree(requests, HYPRE_MEMORY_HOST);
8765 hypre_TFree(status, HYPRE_MEMORY_HOST);
8766 }
8767 else
8768 {
8769 hypre_MPI_Recv(dof_func_local,local_size,HYPRE_MPI_INT,0,0,comm,&status0);
8770 }
8771
8772 *dof_func_ptr = dof_func_local;
8773
8774 if (myid == 0) hypre_TFree(dof_func, HYPRE_MEMORY_HOST);
8775
8776 if (partitioning) hypre_TFree(partitioning, HYPRE_MEMORY_HOST);
8777
8778 return (0);
8779 }
8780
8781 /*----------------------------------------------------------------------
8782 * Build Rhs from one file on Proc. 0. Distributes vector across processors
8783 * giving each about using the distribution of the matrix A.
8784 *----------------------------------------------------------------------*/
8785
8786 HYPRE_Int
BuildRhsParFromOneFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix parcsr_A,HYPRE_ParVector * b_ptr)8787 BuildRhsParFromOneFile( HYPRE_Int argc,
8788 char *argv[],
8789 HYPRE_Int arg_index,
8790 HYPRE_ParCSRMatrix parcsr_A,
8791 HYPRE_ParVector *b_ptr )
8792 {
8793 char *filename;
8794 HYPRE_Int myid;
8795 HYPRE_BigInt *partitioning;
8796 HYPRE_ParVector b;
8797 HYPRE_Vector b_CSR=NULL;
8798
8799 /*-----------------------------------------------------------
8800 * Initialize some stuff
8801 *-----------------------------------------------------------*/
8802
8803 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
8804 partitioning = hypre_ParCSRMatrixRowStarts(parcsr_A);
8805
8806 /*-----------------------------------------------------------
8807 * Parse command line
8808 *-----------------------------------------------------------*/
8809
8810 if (arg_index < argc)
8811 {
8812 filename = argv[arg_index];
8813 }
8814 else
8815 {
8816 hypre_printf("Error: No filename specified \n");
8817 exit(1);
8818 }
8819
8820 /*-----------------------------------------------------------
8821 * Print driver parameters
8822 *-----------------------------------------------------------*/
8823
8824 if (myid == 0)
8825 {
8826 hypre_printf(" Rhs FromFile: %s\n", filename);
8827
8828 /*-----------------------------------------------------------
8829 * Generate the matrix
8830 *-----------------------------------------------------------*/
8831
8832 b_CSR = HYPRE_VectorRead(filename);
8833 }
8834 HYPRE_VectorToParVector(hypre_MPI_COMM_WORLD, b_CSR, partitioning,&b);
8835
8836 *b_ptr = b;
8837
8838 HYPRE_VectorDestroy(b_CSR);
8839
8840 return (0);
8841 }
8842
8843 /*----------------------------------------------------------------------
8844 * Build Rhs from one file on Proc. 0. Distributes vector across processors
8845 * giving each about using the distribution of the matrix A.
8846 *----------------------------------------------------------------------*/
8847
8848 HYPRE_Int
BuildBigArrayFromOneFile(HYPRE_Int argc,char * argv[],const char * array_name,HYPRE_Int arg_index,HYPRE_BigInt * partitioning,HYPRE_Int * size,HYPRE_BigInt ** array_ptr)8849 BuildBigArrayFromOneFile( HYPRE_Int argc,
8850 char *argv[],
8851 const char *array_name,
8852 HYPRE_Int arg_index,
8853 HYPRE_BigInt *partitioning,
8854 HYPRE_Int *size,
8855 HYPRE_BigInt **array_ptr )
8856 {
8857 MPI_Comm comm = hypre_MPI_COMM_WORLD;
8858 char *filename;
8859 FILE *fp;
8860 HYPRE_Int myid;
8861 HYPRE_Int num_procs;
8862 HYPRE_Int global_size;
8863 HYPRE_BigInt *global_array;
8864 HYPRE_BigInt *array;
8865 HYPRE_BigInt *send_buffer;
8866 HYPRE_Int *send_counts = NULL;
8867 HYPRE_Int *displs;
8868 HYPRE_Int *array_procs;
8869 HYPRE_Int j, jj, proc;
8870
8871 /*-----------------------------------------------------------
8872 * Initialize some stuff
8873 *-----------------------------------------------------------*/
8874 hypre_MPI_Comm_rank(comm, &myid);
8875 hypre_MPI_Comm_size(comm, &num_procs);
8876
8877 /*-----------------------------------------------------------
8878 * Parse command line
8879 *-----------------------------------------------------------*/
8880 if (arg_index < argc)
8881 {
8882 filename = argv[arg_index];
8883 }
8884 else
8885 {
8886 if (myid == 0)
8887 {
8888 hypre_printf("Error: No filename specified \n");
8889 }
8890 hypre_MPI_Abort(comm, 1);
8891 }
8892
8893 /*-----------------------------------------------------------
8894 * Print driver parameters
8895 *-----------------------------------------------------------*/
8896 if (myid == 0)
8897 {
8898 hypre_printf(" %s array FromFile: %s\n", array_name, filename);
8899
8900 /*-----------------------------------------------------------
8901 * Read data
8902 *-----------------------------------------------------------*/
8903 fp = fopen(filename, "r");
8904
8905 hypre_fscanf(fp, "%d", &global_size);
8906 global_array = hypre_CTAlloc(HYPRE_BigInt, global_size, HYPRE_MEMORY_HOST);
8907 for (j = 0; j < global_size; j++)
8908 {
8909 hypre_fscanf(fp, "%d", &global_array[j]);
8910 }
8911
8912 fclose(fp);
8913 }
8914
8915 /*-----------------------------------------------------------
8916 * Distribute data
8917 *-----------------------------------------------------------*/
8918 if (myid == 0)
8919 {
8920 send_counts = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
8921 displs = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST);
8922 array_procs = hypre_CTAlloc(HYPRE_Int, global_size, HYPRE_MEMORY_HOST);
8923 send_buffer = hypre_CTAlloc(HYPRE_BigInt, global_size, HYPRE_MEMORY_HOST);
8924 for (j = 0; j < global_size; j++)
8925 {
8926 for (proc = 0; proc < (num_procs + 1); proc++)
8927 {
8928 if (global_array[j] < partitioning[proc])
8929 {
8930 proc--; break;
8931 }
8932 }
8933
8934 if (proc < num_procs)
8935 {
8936 send_counts[proc]++;
8937 array_procs[j] = proc;
8938 }
8939 else
8940 {
8941 array_procs[j] = -1; // Not found
8942 }
8943 }
8944
8945 for (proc = 0; proc < (num_procs - 1); proc++)
8946 {
8947 displs[proc+1] = displs[proc] + send_counts[proc];
8948 }
8949 }
8950 hypre_MPI_Scatter(send_counts, 1, HYPRE_MPI_INT, size, 1, HYPRE_MPI_INT, 0, comm);
8951
8952 if (myid == 0)
8953 {
8954 for (proc = 0; proc < num_procs; proc++)
8955 {
8956 send_counts[proc] = 0;
8957 }
8958
8959 for (j = 0; j < global_size; j++)
8960 {
8961 proc = array_procs[j];
8962 if (proc > -1)
8963 {
8964 jj = displs[proc] + send_counts[proc];
8965 send_buffer[jj] = global_array[j];
8966 send_counts[proc]++;
8967 }
8968 }
8969 }
8970
8971 array = hypre_CTAlloc(HYPRE_BigInt, *size, HYPRE_MEMORY_HOST);
8972 hypre_MPI_Scatterv(send_buffer, send_counts, displs, HYPRE_MPI_BIG_INT,
8973 array, *size, HYPRE_MPI_BIG_INT, 0, comm);
8974 *array_ptr = array;
8975
8976 /* Free memory */
8977 if (myid == 0)
8978 {
8979 hypre_TFree(send_counts, HYPRE_MEMORY_HOST);
8980 hypre_TFree(send_buffer, HYPRE_MEMORY_HOST);
8981 hypre_TFree(displs, HYPRE_MEMORY_HOST);
8982 hypre_TFree(array_procs, HYPRE_MEMORY_HOST);
8983 hypre_TFree(global_array, HYPRE_MEMORY_HOST);
8984 }
8985
8986 return 0;
8987 }
8988
8989 /*----------------------------------------------------------------------
8990 * Build standard 9-point laplacian in 2D with grid and anisotropy.
8991 * Parameters given in command line.
8992 *----------------------------------------------------------------------*/
8993
8994 HYPRE_Int
BuildParLaplacian9pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)8995 BuildParLaplacian9pt( HYPRE_Int argc,
8996 char *argv[],
8997 HYPRE_Int arg_index,
8998 HYPRE_ParCSRMatrix *A_ptr )
8999 {
9000 HYPRE_BigInt nx, ny;
9001 HYPRE_Int P, Q;
9002
9003 HYPRE_ParCSRMatrix A;
9004
9005 HYPRE_Int num_procs, myid;
9006 HYPRE_Int p, q;
9007 HYPRE_Real *values;
9008
9009 /*-----------------------------------------------------------
9010 * Initialize some stuff
9011 *-----------------------------------------------------------*/
9012
9013 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9014 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9015
9016 /*-----------------------------------------------------------
9017 * Set defaults
9018 *-----------------------------------------------------------*/
9019
9020 nx = 10;
9021 ny = 10;
9022
9023 P = 1;
9024 Q = num_procs;
9025
9026 /*-----------------------------------------------------------
9027 * Parse command line
9028 *-----------------------------------------------------------*/
9029 arg_index = 0;
9030 while (arg_index < argc)
9031 {
9032 if ( strcmp(argv[arg_index], "-n") == 0 )
9033 {
9034 arg_index++;
9035 nx = atoi(argv[arg_index++]);
9036 ny = atoi(argv[arg_index++]);
9037 }
9038 else if ( strcmp(argv[arg_index], "-P") == 0 )
9039 {
9040 arg_index++;
9041 P = atoi(argv[arg_index++]);
9042 Q = atoi(argv[arg_index++]);
9043 }
9044 else
9045 {
9046 arg_index++;
9047 }
9048 }
9049
9050 /*-----------------------------------------------------------
9051 * Check a few things
9052 *-----------------------------------------------------------*/
9053
9054 if ((P*Q) != num_procs)
9055 {
9056 hypre_printf("Error: Invalid number of processors or processor topology \n");
9057 exit(1);
9058 }
9059
9060 /*-----------------------------------------------------------
9061 * Print driver parameters
9062 *-----------------------------------------------------------*/
9063
9064 if (myid == 0)
9065 {
9066 hypre_printf(" Laplacian 9pt:\n");
9067 hypre_printf(" (nx, ny) = (%b, %b)\n", nx, ny);
9068 hypre_printf(" (Px, Py) = (%d, %d)\n\n", P, Q);
9069 }
9070
9071 /*-----------------------------------------------------------
9072 * Set up the grid structure
9073 *-----------------------------------------------------------*/
9074
9075 /* compute p,q from P,Q and myid */
9076 p = myid % P;
9077 q = ( myid - p)/P;
9078
9079 /*-----------------------------------------------------------
9080 * Generate the matrix
9081 *-----------------------------------------------------------*/
9082
9083 values = hypre_CTAlloc(HYPRE_Real, 2, HYPRE_MEMORY_HOST);
9084
9085 values[1] = -1.;
9086
9087 values[0] = 0.;
9088 if (nx > 1)
9089 {
9090 values[0] += 2.0;
9091 }
9092 if (ny > 1)
9093 {
9094 values[0] += 2.0;
9095 }
9096 if (nx > 1 && ny > 1)
9097 {
9098 values[0] += 4.0;
9099 }
9100
9101 A = (HYPRE_ParCSRMatrix) GenerateLaplacian9pt(hypre_MPI_COMM_WORLD,
9102 nx, ny, P, Q, p, q, values);
9103
9104 hypre_TFree(values, HYPRE_MEMORY_HOST);
9105
9106 *A_ptr = A;
9107
9108 return (0);
9109 }
9110 /*----------------------------------------------------------------------
9111 * Build 27-point laplacian in 3D,
9112 * Parameters given in command line.
9113 *----------------------------------------------------------------------*/
9114
9115 HYPRE_Int
BuildParLaplacian27pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)9116 BuildParLaplacian27pt( HYPRE_Int argc,
9117 char *argv[],
9118 HYPRE_Int arg_index,
9119 HYPRE_ParCSRMatrix *A_ptr )
9120 {
9121 HYPRE_BigInt nx, ny, nz;
9122 HYPRE_Int P, Q, R;
9123
9124 HYPRE_ParCSRMatrix A;
9125
9126 HYPRE_Int num_procs, myid;
9127 HYPRE_Int p, q, r;
9128 HYPRE_Real *values;
9129
9130 /*-----------------------------------------------------------
9131 * Initialize some stuff
9132 *-----------------------------------------------------------*/
9133
9134 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9135 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9136
9137 /*-----------------------------------------------------------
9138 * Set defaults
9139 *-----------------------------------------------------------*/
9140
9141 nx = 10;
9142 ny = 10;
9143 nz = 10;
9144
9145 P = 1;
9146 Q = num_procs;
9147 R = 1;
9148
9149 /*-----------------------------------------------------------
9150 * Parse command line
9151 *-----------------------------------------------------------*/
9152 arg_index = 0;
9153 while (arg_index < argc)
9154 {
9155 if ( strcmp(argv[arg_index], "-n") == 0 )
9156 {
9157 arg_index++;
9158 nx = atoi(argv[arg_index++]);
9159 ny = atoi(argv[arg_index++]);
9160 nz = atoi(argv[arg_index++]);
9161 }
9162 else if ( strcmp(argv[arg_index], "-P") == 0 )
9163 {
9164 arg_index++;
9165 P = atoi(argv[arg_index++]);
9166 Q = atoi(argv[arg_index++]);
9167 R = atoi(argv[arg_index++]);
9168 }
9169 else
9170 {
9171 arg_index++;
9172 }
9173 }
9174
9175 /*-----------------------------------------------------------
9176 * Check a few things
9177 *-----------------------------------------------------------*/
9178
9179 if ((P*Q*R) != num_procs)
9180 {
9181 hypre_printf("Error: Invalid number of processors or processor topology \n");
9182 exit(1);
9183 }
9184
9185 /*-----------------------------------------------------------
9186 * Print driver parameters
9187 *-----------------------------------------------------------*/
9188
9189 if (myid == 0)
9190 {
9191 hypre_printf(" Laplacian_27pt:\n");
9192 hypre_printf(" (nx, ny, nz) = (%b, %b, %b)\n", nx, ny, nz);
9193 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n\n", P, Q, R);
9194 }
9195
9196 /*-----------------------------------------------------------
9197 * Set up the grid structure
9198 *-----------------------------------------------------------*/
9199
9200 /* compute p,q,r from P,Q,R and myid */
9201 p = myid % P;
9202 q = (( myid - p)/P) % Q;
9203 r = ( myid - p - P*q)/( P*Q );
9204
9205 /*-----------------------------------------------------------
9206 * Generate the matrix
9207 *-----------------------------------------------------------*/
9208
9209 values = hypre_CTAlloc(HYPRE_Real, 2, HYPRE_MEMORY_HOST);
9210
9211 values[0] = 26.0;
9212 if (nx == 1 || ny == 1 || nz == 1)
9213 values[0] = 8.0;
9214 if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1)
9215 values[0] = 2.0;
9216 values[1] = -1.;
9217
9218 A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(hypre_MPI_COMM_WORLD,
9219 nx, ny, nz, P, Q, R, p, q, r, values);
9220
9221 hypre_TFree(values, HYPRE_MEMORY_HOST);
9222
9223 *A_ptr = A;
9224
9225 return (0);
9226 }
9227
9228
9229 /*----------------------------------------------------------------------
9230 * Build 7-point in 2D
9231 * Parameters given in command line.
9232 *----------------------------------------------------------------------*/
9233
9234 HYPRE_Int
BuildParRotate7pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)9235 BuildParRotate7pt( HYPRE_Int argc,
9236 char *argv[],
9237 HYPRE_Int arg_index,
9238 HYPRE_ParCSRMatrix *A_ptr )
9239 {
9240 HYPRE_BigInt nx, ny;
9241 HYPRE_Int P, Q;
9242
9243 HYPRE_ParCSRMatrix A;
9244
9245 HYPRE_Int num_procs, myid;
9246 HYPRE_Int p, q;
9247 HYPRE_Real eps, alpha;
9248
9249 /*-----------------------------------------------------------
9250 * Initialize some stuff
9251 *-----------------------------------------------------------*/
9252
9253 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9254 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9255
9256 /*-----------------------------------------------------------
9257 * Set defaults
9258 *-----------------------------------------------------------*/
9259
9260 nx = 10;
9261 ny = 10;
9262
9263 P = 1;
9264 Q = num_procs;
9265
9266 /*-----------------------------------------------------------
9267 * Parse command line
9268 *-----------------------------------------------------------*/
9269 arg_index = 0;
9270 while (arg_index < argc)
9271 {
9272 if ( strcmp(argv[arg_index], "-n") == 0 )
9273 {
9274 arg_index++;
9275 nx = atoi(argv[arg_index++]);
9276 ny = atoi(argv[arg_index++]);
9277 }
9278 else if ( strcmp(argv[arg_index], "-P") == 0 )
9279 {
9280 arg_index++;
9281 P = atoi(argv[arg_index++]);
9282 Q = atoi(argv[arg_index++]);
9283 }
9284 else if ( strcmp(argv[arg_index], "-alpha") == 0 )
9285 {
9286 arg_index++;
9287 alpha = atof(argv[arg_index++]);
9288 }
9289 else if ( strcmp(argv[arg_index], "-eps") == 0 )
9290 {
9291 arg_index++;
9292 eps = atof(argv[arg_index++]);
9293 }
9294 else
9295 {
9296 arg_index++;
9297 }
9298 }
9299
9300 /*-----------------------------------------------------------
9301 * Check a few things
9302 *-----------------------------------------------------------*/
9303
9304 if ((P*Q) != num_procs)
9305 {
9306 hypre_printf("Error: Invalid number of processors or processor topology \n");
9307 exit(1);
9308 }
9309
9310 /*-----------------------------------------------------------
9311 * Print driver parameters
9312 *-----------------------------------------------------------*/
9313
9314 if (myid == 0)
9315 {
9316 hypre_printf(" Rotate 7pt:\n");
9317 hypre_printf(" alpha = %f, eps = %f\n", alpha,eps);
9318 hypre_printf(" (nx, ny) = (%b, %b)\n", nx, ny);
9319 hypre_printf(" (Px, Py) = (%d, %d)\n", P, Q);
9320 }
9321
9322 /*-----------------------------------------------------------
9323 * Set up the grid structure
9324 *-----------------------------------------------------------*/
9325
9326 /* compute p,q from P,Q and myid */
9327 p = myid % P;
9328 q = ( myid - p)/P;
9329
9330 /*-----------------------------------------------------------
9331 * Generate the matrix
9332 *-----------------------------------------------------------*/
9333
9334 A = (HYPRE_ParCSRMatrix) GenerateRotate7pt(hypre_MPI_COMM_WORLD,
9335 nx, ny, P, Q, p, q, alpha, eps);
9336
9337 *A_ptr = A;
9338
9339 return (0);
9340 }
9341
9342 /*----------------------------------------------------------------------
9343 * Build standard 7-point difference operator using centered differences
9344 *
9345 * eps*(a(x,y,z) ux)x + (b(x,y,z) uy)y + (c(x,y,z) uz)z
9346 * d(x,y,z) ux + e(x,y,z) uy + f(x,y,z) uz + g(x,y,z) u
9347 *
9348 * functions a,b,c,d,e,f,g need to be defined inside par_vardifconv.c
9349 *
9350 *----------------------------------------------------------------------*/
9351
9352 HYPRE_Int
BuildParVarDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr,HYPRE_ParVector * rhs_ptr)9353 BuildParVarDifConv( HYPRE_Int argc,
9354 char *argv[],
9355 HYPRE_Int arg_index,
9356 HYPRE_ParCSRMatrix *A_ptr,
9357 HYPRE_ParVector *rhs_ptr )
9358 {
9359 HYPRE_BigInt nx, ny, nz;
9360 HYPRE_Int P, Q, R;
9361
9362 HYPRE_ParCSRMatrix A;
9363 HYPRE_ParVector rhs;
9364
9365 HYPRE_Int num_procs, myid;
9366 HYPRE_Int p, q, r;
9367 HYPRE_Int type;
9368 HYPRE_Real eps;
9369
9370 /*-----------------------------------------------------------
9371 * Initialize some stuff
9372 *-----------------------------------------------------------*/
9373
9374 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9375 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9376
9377 /*-----------------------------------------------------------
9378 * Set defaults
9379 *-----------------------------------------------------------*/
9380
9381 nx = 10;
9382 ny = 10;
9383 nz = 10;
9384 P = 1;
9385 Q = num_procs;
9386 R = 1;
9387 eps = 1.0;
9388
9389 /* type: 0 : default FD;
9390 * 1-3 : FD and examples 1-3 in Ruge-Stuben paper */
9391 type = 0;
9392
9393 /*-----------------------------------------------------------
9394 * Parse command line
9395 *-----------------------------------------------------------*/
9396 arg_index = 0;
9397 while (arg_index < argc)
9398 {
9399 if ( strcmp(argv[arg_index], "-n") == 0 )
9400 {
9401 arg_index++;
9402 nx = atoi(argv[arg_index++]);
9403 ny = atoi(argv[arg_index++]);
9404 nz = atoi(argv[arg_index++]);
9405 }
9406 else if ( strcmp(argv[arg_index], "-P") == 0 )
9407 {
9408 arg_index++;
9409 P = atoi(argv[arg_index++]);
9410 Q = atoi(argv[arg_index++]);
9411 R = atoi(argv[arg_index++]);
9412 }
9413 else if ( strcmp(argv[arg_index], "-eps") == 0 )
9414 {
9415 arg_index++;
9416 eps = atof(argv[arg_index++]);
9417 }
9418 else if ( strcmp(argv[arg_index], "-vardifconvRS") == 0 )
9419 {
9420 arg_index++;
9421 type = atoi(argv[arg_index++]);
9422 }
9423 else
9424 {
9425 arg_index++;
9426 }
9427 }
9428
9429 /*-----------------------------------------------------------
9430 * Check a few things
9431 *-----------------------------------------------------------*/
9432
9433 if ((P*Q*R) != num_procs)
9434 {
9435 hypre_printf("Error: Invalid number of processors or processor topology \n");
9436 exit(1);
9437 }
9438
9439 /*-----------------------------------------------------------
9440 * Print driver parameters
9441 *-----------------------------------------------------------*/
9442
9443 if (myid == 0)
9444 {
9445 hypre_printf(" ell PDE: eps = %f\n", eps);
9446 hypre_printf(" Dx(aDxu) + Dy(bDyu) + Dz(cDzu) + d Dxu + e Dyu + f Dzu + g u= f\n");
9447 hypre_printf(" (nx, ny, nz) = (%b, %b, %b)\n", nx, ny, nz);
9448 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
9449 }
9450 /*-----------------------------------------------------------
9451 * Set up the grid structure
9452 *-----------------------------------------------------------*/
9453
9454 /* compute p,q,r from P,Q,R and myid */
9455 p = myid % P;
9456 q = (( myid - p)/P) % Q;
9457 r = ( myid - p - P*q)/( P*Q );
9458
9459 /*-----------------------------------------------------------
9460 * Generate the matrix
9461 *-----------------------------------------------------------*/
9462
9463 if (0 == type)
9464 {
9465 A = (HYPRE_ParCSRMatrix) GenerateVarDifConv(hypre_MPI_COMM_WORLD,
9466 nx, ny, nz, P, Q, R, p, q, r, eps, &rhs);
9467 }
9468 else
9469 {
9470 A = (HYPRE_ParCSRMatrix) GenerateRSVarDifConv(hypre_MPI_COMM_WORLD,
9471 nx, ny, nz, P, Q, R, p, q, r, eps, &rhs,
9472 type);
9473 }
9474
9475 *A_ptr = A;
9476 *rhs_ptr = rhs;
9477
9478 return (0);
9479 }
9480
9481 /**************************************************************************/
9482
SetSysVcoefValues(HYPRE_Int num_fun,HYPRE_BigInt nx,HYPRE_BigInt ny,HYPRE_BigInt nz,HYPRE_Real vcx,HYPRE_Real vcy,HYPRE_Real vcz,HYPRE_Int mtx_entry,HYPRE_Real * values)9483 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_BigInt nx, HYPRE_BigInt ny, HYPRE_BigInt nz, HYPRE_Real vcx,
9484 HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values)
9485 {
9486
9487
9488 HYPRE_Int sz = num_fun*num_fun;
9489
9490 values[1*sz + mtx_entry] = -vcx;
9491 values[2*sz + mtx_entry] = -vcy;
9492 values[3*sz + mtx_entry] = -vcz;
9493 values[0*sz + mtx_entry] = 0.0;
9494
9495 if (nx > 1)
9496 {
9497 values[0*sz + mtx_entry] += 2.0*vcx;
9498 }
9499 if (ny > 1)
9500 {
9501 values[0*sz + mtx_entry] += 2.0*vcy;
9502 }
9503 if (nz > 1)
9504 {
9505 values[0*sz + mtx_entry] += 2.0*vcz;
9506 }
9507
9508 return 0;
9509
9510 }
9511
9512 /*----------------------------------------------------------------------
9513 * Build coordinates for 1D/2D/3D
9514 *----------------------------------------------------------------------*/
9515
9516 HYPRE_Int
BuildParCoordinates(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_Int * coorddim_ptr,float ** coord_ptr)9517 BuildParCoordinates( HYPRE_Int argc,
9518 char *argv[],
9519 HYPRE_Int arg_index,
9520 HYPRE_Int *coorddim_ptr,
9521 float **coord_ptr )
9522 {
9523 HYPRE_BigInt nx, ny, nz;
9524 HYPRE_Int P, Q, R;
9525
9526 HYPRE_Int num_procs, myid;
9527 HYPRE_Int p, q, r;
9528
9529 HYPRE_Int coorddim;
9530 float *coordinates;
9531
9532 /*-----------------------------------------------------------
9533 * Initialize some stuff
9534 *-----------------------------------------------------------*/
9535
9536 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9537 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9538
9539 /*-----------------------------------------------------------
9540 * Set defaults
9541 *-----------------------------------------------------------*/
9542
9543 nx = 10;
9544 ny = 10;
9545 nz = 10;
9546
9547 P = 1;
9548 Q = num_procs;
9549 R = 1;
9550
9551 /*-----------------------------------------------------------
9552 * Parse command line
9553 *-----------------------------------------------------------*/
9554 arg_index = 0;
9555 while (arg_index < argc)
9556 {
9557 if ( strcmp(argv[arg_index], "-n") == 0 )
9558 {
9559 arg_index++;
9560 nx = atoi(argv[arg_index++]);
9561 ny = atoi(argv[arg_index++]);
9562 nz = atoi(argv[arg_index++]);
9563 }
9564 else if ( strcmp(argv[arg_index], "-P") == 0 )
9565 {
9566 arg_index++;
9567 P = atoi(argv[arg_index++]);
9568 Q = atoi(argv[arg_index++]);
9569 R = atoi(argv[arg_index++]);
9570 }
9571 else
9572 {
9573 arg_index++;
9574 }
9575 }
9576
9577 /* compute p,q,r from P,Q,R and myid */
9578 p = myid % P;
9579 q = (( myid - p)/P) % Q;
9580 r = ( myid - p - P*q)/( P*Q );
9581
9582 /*-----------------------------------------------------------
9583 * Generate the coordinates
9584 *-----------------------------------------------------------*/
9585
9586 coorddim = 3;
9587 if (nx<2) coorddim--;
9588 if (ny<2) coorddim--;
9589 if (nz<2) coorddim--;
9590
9591 if (coorddim>0)
9592 coordinates = GenerateCoordinates (hypre_MPI_COMM_WORLD,
9593 nx, ny, nz, P, Q, R, p, q, r, coorddim);
9594 else
9595 coordinates=NULL;
9596
9597 *coorddim_ptr = coorddim;
9598 *coord_ptr = coordinates;
9599 return (0);
9600 }
9601
9602
9603 /* begin lobpcg */
9604
9605 /*----------------------------------------------------------------------
9606 * Build standard 7-point laplacian in 3D.
9607 *----------------------------------------------------------------------*/
9608
9609 HYPRE_Int
BuildParIsoLaplacian(HYPRE_Int argc,char ** argv,HYPRE_ParCSRMatrix * A_ptr)9610 BuildParIsoLaplacian( HYPRE_Int argc, char** argv, HYPRE_ParCSRMatrix *A_ptr )
9611 {
9612
9613 HYPRE_BigInt nx, ny, nz;
9614 HYPRE_Real cx, cy, cz;
9615
9616 HYPRE_Int P, Q, R;
9617
9618 HYPRE_ParCSRMatrix A;
9619
9620 HYPRE_Int num_procs, myid;
9621 HYPRE_Int p, q, r;
9622 HYPRE_Real *values;
9623
9624 HYPRE_Int arg_index;
9625
9626 /*-----------------------------------------------------------
9627 * Initialize some stuff
9628 *-----------------------------------------------------------*/
9629
9630 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
9631 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
9632
9633 /*-----------------------------------------------------------
9634 * Set defaults
9635 *-----------------------------------------------------------*/
9636
9637 P = 1;
9638 Q = num_procs;
9639 R = 1;
9640
9641 nx = 10;
9642 ny = 10;
9643 nz = 10;
9644
9645 cx = 1.0;
9646 cy = 1.0;
9647 cz = 1.0;
9648
9649
9650 arg_index = 0;
9651 while (arg_index < argc)
9652 {
9653 if ( strcmp(argv[arg_index], "-n") == 0 )
9654 {
9655 arg_index++;
9656 nx = atoi(argv[arg_index++]);
9657 ny = atoi(argv[arg_index++]);
9658 nz = atoi(argv[arg_index++]);
9659 }
9660 else
9661 {
9662 arg_index++;
9663 }
9664 }
9665
9666 /*-----------------------------------------------------------
9667 * Print driver parameters
9668 *-----------------------------------------------------------*/
9669
9670 if (myid == 0)
9671 {
9672 hypre_printf(" Laplacian:\n");
9673 hypre_printf(" (nx, ny, nz) = (%b, %b, %b)\n", nx, ny, nz);
9674 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
9675 hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
9676 }
9677
9678 /*-----------------------------------------------------------
9679 * Set up the grid structure
9680 *-----------------------------------------------------------*/
9681
9682 /* compute p,q,r from P,Q,R and myid */
9683 p = myid % P;
9684 q = (( myid - p)/P) % Q;
9685 r = ( myid - p - P*q)/( P*Q );
9686
9687 /*-----------------------------------------------------------
9688 * Generate the matrix
9689 *-----------------------------------------------------------*/
9690
9691 values = hypre_CTAlloc(HYPRE_Real, 4, HYPRE_MEMORY_HOST);
9692
9693 values[1] = -cx;
9694 values[2] = -cy;
9695 values[3] = -cz;
9696
9697 values[0] = 0.;
9698 if (nx > 1)
9699 {
9700 values[0] += 2.0*cx;
9701 }
9702 if (ny > 1)
9703 {
9704 values[0] += 2.0*cy;
9705 }
9706 if (nz > 1)
9707 {
9708 values[0] += 2.0*cz;
9709 }
9710
9711 A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD,
9712 nx, ny, nz, P, Q, R, p, q, r, values);
9713
9714 hypre_TFree(values, HYPRE_MEMORY_HOST);
9715
9716 *A_ptr = A;
9717
9718 return (0);
9719 }
9720
9721 /* end lobpcg */
9722