1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /*--------------------------------------------------------------------------
9 * Test driver for unstructured matrix interface (IJ_matrix interface).
10 * Do `driver -help' for usage info.
11 * This driver started from the driver for parcsr_linear_solvers, and it
12 * works by first building a parcsr matrix as before and then "copying"
13 * that matrix row-by-row into the IJMatrix interface. AJC 7/99.
14 *--------------------------------------------------------------------------*/
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <math.h>
18
19 #include "_hypre_utilities.h"
20 #include "HYPRE.h"
21 #include "HYPRE_parcsr_mv.h"
22
23 #include "HYPRE_IJ_mv.h"
24 #include "HYPRE_parcsr_ls.h"
25 #include "_hypre_parcsr_ls.h"
26 #include "_hypre_parcsr_mv.h"
27 #include "HYPRE_krylov.h"
28
29 #include "cuda_profiler_api.h"
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35
36
37 HYPRE_Int BuildParFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
38 HYPRE_Int BuildParRhsFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParVector *b_ptr );
39
40 HYPRE_Int BuildParLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
41 HYPRE_Int BuildParSysLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
42 HYPRE_Int BuildParDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr);
43 HYPRE_Int BuildFuncsFromFiles (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
44 HYPRE_Int BuildFuncsFromOneFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
45 HYPRE_Int BuildParLaplacian9pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
46 HYPRE_Int BuildParLaplacian27pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
47 HYPRE_Int BuildParRotate7pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
48 HYPRE_Int BuildParVarDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr );
49 HYPRE_ParCSRMatrix GenerateSysLaplacian (MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz,
50 HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
51 HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
52 HYPRE_ParCSRMatrix GenerateSysLaplacianVCoef (MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz,
53 HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
54 HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
55 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Real vcx, HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values);
56
57 HYPRE_Int BuildParCoordinates (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_Int *coorddim_ptr , float **coord_ptr );
58
59 void testPMIS(HYPRE_ParCSRMatrix parcsr_A);
60 void testPMIS2(HYPRE_ParCSRMatrix parcsr_A);
61 void testPMIS3(HYPRE_ParCSRMatrix parcsr_A);
62 void testTranspose(HYPRE_ParCSRMatrix parcsr_A);
63 void testAdd(HYPRE_ParCSRMatrix parcsr_A);
64 void testFFFC(HYPRE_ParCSRMatrix parcsr_A);
65
66 HYPRE_Int CompareParCSRDH(HYPRE_ParCSRMatrix hmat, HYPRE_ParCSRMatrix dmat, HYPRE_Real tol);
67
68 #ifdef __cplusplus
69 }
70 #endif
71
72 hypre_int
main(hypre_int argc,char * argv[])73 main( hypre_int argc,
74 char *argv[] )
75 {
76 HYPRE_Int arg_index;
77 HYPRE_Int print_usage;
78 HYPRE_Int build_matrix_type;
79 HYPRE_Int build_matrix_arg_index;
80 HYPRE_Int ierr = 0;
81 void *object;
82
83 HYPRE_IJMatrix ij_A = NULL;
84 HYPRE_ParCSRMatrix parcsr_A = NULL;
85
86 HYPRE_Int time_index;
87 MPI_Comm comm = hypre_MPI_COMM_WORLD;
88 HYPRE_Int test = 1;
89 //HYPRE_Int i;
90 //HYPRE_Real *data;
91
92 HYPRE_Int myid, num_procs;
93
94 /*-----------------------------------------------------------
95 * Initialize some stuff
96 *-----------------------------------------------------------*/
97 /* Initialize MPI */
98 hypre_MPI_Init(&argc, &argv);
99
100 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
101 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
102
103 /*-----------------------------------------------------------------
104 * GPU Device binding
105 * Must be done before HYPRE_Init() and should not be changed after
106 *-----------------------------------------------------------------*/
107 hypre_bind_device(myid, num_procs, hypre_MPI_COMM_WORLD);
108
109 /*-----------------------------------------------------------
110 * Initialize : must be the first HYPRE function to call
111 *-----------------------------------------------------------*/
112 HYPRE_Init();
113
114 hypre_SetNumThreads(5);
115 hypre_printf("CPU #OMP THREADS %d\n", hypre_NumThreads());
116
117 /*-----------------------------------------------------------
118 * Set defaults
119 *-----------------------------------------------------------*/
120 build_matrix_type = 2;
121 build_matrix_arg_index = argc;
122
123 /*-----------------------------------------------------------
124 * Parse command line
125 *-----------------------------------------------------------*/
126
127 print_usage = 0;
128 arg_index = 1;
129
130 while ( (arg_index < argc) && (!print_usage) )
131 {
132 if ( strcmp(argv[arg_index], "-fromfile") == 0 )
133 {
134 arg_index++;
135 build_matrix_type = -1;
136 build_matrix_arg_index = arg_index;
137 }
138 else if ( strcmp(argv[arg_index], "-fromparcsrfile") == 0 )
139 {
140 arg_index++;
141 build_matrix_type = 0;
142 build_matrix_arg_index = arg_index;
143 }
144 else if ( strcmp(argv[arg_index], "-fromonecsrfile") == 0 )
145 {
146 arg_index++;
147 build_matrix_type = 1;
148 build_matrix_arg_index = arg_index;
149 }
150 else if ( strcmp(argv[arg_index], "-laplacian") == 0 )
151 {
152 arg_index++;
153 build_matrix_type = 2;
154 build_matrix_arg_index = arg_index;
155 }
156 else if ( strcmp(argv[arg_index], "-9pt") == 0 )
157 {
158 arg_index++;
159 build_matrix_type = 3;
160 build_matrix_arg_index = arg_index;
161 }
162 else if ( strcmp(argv[arg_index], "-27pt") == 0 )
163 {
164 arg_index++;
165 build_matrix_type = 4;
166 build_matrix_arg_index = arg_index;
167 }
168 else if ( strcmp(argv[arg_index], "-difconv") == 0 )
169 {
170 arg_index++;
171 build_matrix_type = 5;
172 build_matrix_arg_index = arg_index;
173 }
174 else if ( strcmp(argv[arg_index], "-vardifconv") == 0 )
175 {
176 arg_index++;
177 build_matrix_type = 6;
178 build_matrix_arg_index = arg_index;
179 }
180 else if ( strcmp(argv[arg_index], "-rotate") == 0 )
181 {
182 arg_index++;
183 build_matrix_type = 7;
184 build_matrix_arg_index = arg_index;
185 }
186 else if ( strcmp(argv[arg_index], "-test") == 0 )
187 {
188 arg_index++;
189 test = atoi(argv[arg_index++]);
190 }
191 else if ( strcmp(argv[arg_index], "-help") == 0 )
192 {
193 print_usage = 1;
194 }
195 else
196 {
197 arg_index++;
198 }
199 }
200
201 /*-----------------------------------------------------------
202 * Print usage info
203 *-----------------------------------------------------------*/
204 if ( print_usage )
205 {
206 goto final;
207 }
208
209 /*-----------------------------------------------------------
210 * Print driver parameters
211 *-----------------------------------------------------------*/
212
213 HYPRE_SetSpGemmUseCusparse(0);
214 /* use cuRand for PMIS */
215 HYPRE_SetUseGpuRand(1);
216
217 /*-----------------------------------------------------------
218 * Set up matrix
219 *-----------------------------------------------------------*/
220 time_index = hypre_InitializeTiming("Generate Matrix");
221 hypre_BeginTiming(time_index);
222 if ( build_matrix_type == -1 )
223 {
224 ierr = HYPRE_IJMatrixRead( argv[build_matrix_arg_index], comm,
225 HYPRE_PARCSR, &ij_A );
226 if (ierr)
227 {
228 hypre_printf("ERROR: Problem reading in the system matrix!\n");
229 exit(1);
230 }
231 }
232 else if ( build_matrix_type == 0 )
233 {
234 BuildParFromFile(argc, argv, build_matrix_arg_index, &parcsr_A);
235 }
236 else if ( build_matrix_type == 2 )
237 {
238 BuildParLaplacian(argc, argv, build_matrix_arg_index, &parcsr_A);
239 }
240 else if ( build_matrix_type == 3 )
241 {
242 BuildParLaplacian9pt(argc, argv, build_matrix_arg_index, &parcsr_A);
243 }
244 else if ( build_matrix_type == 4 )
245 {
246 BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A);
247 }
248 else if ( build_matrix_type == 5 )
249 {
250 BuildParDifConv(argc, argv, build_matrix_arg_index, &parcsr_A);
251 }
252 else if ( build_matrix_type == 7 )
253 {
254 BuildParRotate7pt(argc, argv, build_matrix_arg_index, &parcsr_A);
255 }
256 else
257 {
258 hypre_printf("You have asked for an unsupported problem with\n");
259 hypre_printf("build_matrix_type = %d.\n", build_matrix_type);
260 return(-1);
261 }
262
263 if (build_matrix_type < 0)
264 {
265 ierr += HYPRE_IJMatrixGetObject( ij_A, &object);
266 parcsr_A = (HYPRE_ParCSRMatrix) object;
267 }
268
269 hypre_EndTiming(time_index);
270 hypre_PrintTiming("Generate Matrix", hypre_MPI_COMM_WORLD);
271 hypre_FinalizeTiming(time_index);
272 hypre_ClearTiming();
273
274 hypre_ParCSRMatrixMigrate(parcsr_A, hypre_HandleMemoryLocation(hypre_handle()));
275
276 /*
277 * TESTS
278 */
279 if (test == 1)
280 {
281 testPMIS(parcsr_A);
282 }
283 else if (test == 2)
284 {
285 testPMIS2(parcsr_A);
286 }
287 else if (test == 3)
288 {
289 testPMIS3(parcsr_A);
290 }
291 else if (test == 4)
292 {
293 testTranspose(parcsr_A);
294 }
295 else if (test == 5)
296 {
297 testAdd(parcsr_A);
298 }
299
300 //testFFFC(parcsr_A);
301
302 /*-----------------------------------------------------------
303 * Finalize things
304 *-----------------------------------------------------------*/
305
306 if (build_matrix_type == -1)
307 {
308 HYPRE_IJMatrixDestroy(ij_A);
309 }
310 else
311 {
312 HYPRE_ParCSRMatrixDestroy(parcsr_A);
313 }
314
315 final:
316
317 /* Finalize Hypre */
318 HYPRE_Finalize();
319
320 /* Finalize MPI */
321 hypre_MPI_Finalize();
322
323 /* when using cuda-memcheck --leak-check full, uncomment this */
324 #if defined(HYPRE_USING_GPU)
325 hypre_ResetCudaDevice(hypre_handle());
326 #endif
327
328 return (0);
329 }
330
331 /*----------------------------------------------------------------------
332 * Build matrix from file. Expects three files on each processor.
333 * filename.D.n contains the diagonal part, filename.O.n contains
334 * the offdiagonal part and filename.INFO.n contains global row
335 * and column numbers, number of columns of offdiagonal matrix
336 * and the mapping of offdiagonal column numbers to global column numbers.
337 * Parameters given in command line.
338 *----------------------------------------------------------------------*/
339
340 HYPRE_Int
BuildParFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)341 BuildParFromFile( HYPRE_Int argc,
342 char *argv[],
343 HYPRE_Int arg_index,
344 HYPRE_ParCSRMatrix *A_ptr )
345 {
346 char *filename;
347
348 HYPRE_ParCSRMatrix A;
349
350 HYPRE_Int myid;
351
352 /*-----------------------------------------------------------
353 * Initialize some stuff
354 *-----------------------------------------------------------*/
355
356 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
357
358 /*-----------------------------------------------------------
359 * Parse command line
360 *-----------------------------------------------------------*/
361
362 if (arg_index < argc)
363 {
364 filename = argv[arg_index];
365 }
366 else
367 {
368 hypre_printf("Error: No filename specified \n");
369 exit(1);
370 }
371
372 /*-----------------------------------------------------------
373 * Print driver parameters
374 *-----------------------------------------------------------*/
375
376 if (myid == 0)
377 {
378 hypre_printf(" FromFile: %s\n", filename);
379 }
380
381 /*-----------------------------------------------------------
382 * Generate the matrix
383 *-----------------------------------------------------------*/
384
385 HYPRE_ParCSRMatrixRead(hypre_MPI_COMM_WORLD, filename,&A);
386
387 *A_ptr = A;
388
389 return (0);
390 }
391
392
393 /*----------------------------------------------------------------------
394 * Build rhs from file. Expects two files on each processor.
395 * filename.n contains the data and
396 * and filename.INFO.n contains global row
397 * numbers
398 *----------------------------------------------------------------------*/
399
400 HYPRE_Int
BuildParRhsFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParVector * b_ptr)401 BuildParRhsFromFile( HYPRE_Int argc,
402 char *argv[],
403 HYPRE_Int arg_index,
404 HYPRE_ParVector *b_ptr )
405 {
406 char *filename;
407
408 HYPRE_ParVector b;
409
410 HYPRE_Int myid;
411
412 /*-----------------------------------------------------------
413 * Initialize some stuff
414 *-----------------------------------------------------------*/
415
416 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
417
418 /*-----------------------------------------------------------
419 * Parse command line
420 *-----------------------------------------------------------*/
421
422 if (arg_index < argc)
423 {
424 filename = argv[arg_index];
425 }
426 else
427 {
428 hypre_printf("Error: No filename specified \n");
429 exit(1);
430 }
431
432 /*-----------------------------------------------------------
433 * Print driver parameters
434 *-----------------------------------------------------------*/
435
436 if (myid == 0)
437 {
438 hypre_printf(" RhsFromParFile: %s\n", filename);
439 }
440
441 /*-----------------------------------------------------------
442 * Generate the matrix
443 *-----------------------------------------------------------*/
444
445 HYPRE_ParVectorRead(hypre_MPI_COMM_WORLD, filename,&b);
446
447 *b_ptr = b;
448
449 return (0);
450 }
451
452
453
454
455 /*----------------------------------------------------------------------
456 * Build standard 7-point laplacian in 3D with grid and anisotropy.
457 * Parameters given in command line.
458 *----------------------------------------------------------------------*/
459
460 HYPRE_Int
BuildParLaplacian(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)461 BuildParLaplacian( HYPRE_Int argc,
462 char *argv[],
463 HYPRE_Int arg_index,
464 HYPRE_ParCSRMatrix *A_ptr )
465 {
466 HYPRE_Int nx, ny, nz;
467 HYPRE_Int P, Q, R;
468 HYPRE_Real cx, cy, cz;
469
470 HYPRE_ParCSRMatrix A;
471
472 HYPRE_Int num_procs, myid;
473 HYPRE_Int p, q, r;
474 HYPRE_Int num_fun = 1;
475 HYPRE_Real *values;
476 HYPRE_Real *mtrx;
477
478 HYPRE_Real ep = .1;
479
480 HYPRE_Int system_vcoef = 0;
481 HYPRE_Int sys_opt = 0;
482 HYPRE_Int vcoef_opt = 0;
483
484
485 /*-----------------------------------------------------------
486 * Initialize some stuff
487 *-----------------------------------------------------------*/
488
489 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
490 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
491
492 /*-----------------------------------------------------------
493 * Set defaults
494 *-----------------------------------------------------------*/
495
496 nx = 10;
497 ny = 10;
498 nz = 10;
499
500 P = 1;
501 Q = num_procs;
502 R = 1;
503
504 cx = 1.;
505 cy = 1.;
506 cz = 1.;
507
508 /*-----------------------------------------------------------
509 * Parse command line
510 *-----------------------------------------------------------*/
511 arg_index = 0;
512 while (arg_index < argc)
513 {
514 if ( strcmp(argv[arg_index], "-n") == 0 )
515 {
516 arg_index++;
517 nx = atoi(argv[arg_index++]);
518 ny = atoi(argv[arg_index++]);
519 nz = atoi(argv[arg_index++]);
520 }
521 else if ( strcmp(argv[arg_index], "-P") == 0 )
522 {
523 arg_index++;
524 P = atoi(argv[arg_index++]);
525 Q = atoi(argv[arg_index++]);
526 R = atoi(argv[arg_index++]);
527 }
528 else if ( strcmp(argv[arg_index], "-c") == 0 )
529 {
530 arg_index++;
531 cx = atof(argv[arg_index++]);
532 cy = atof(argv[arg_index++]);
533 cz = atof(argv[arg_index++]);
534 }
535 else if ( strcmp(argv[arg_index], "-sysL") == 0 )
536 {
537 arg_index++;
538 num_fun = atoi(argv[arg_index++]);
539 }
540 else if ( strcmp(argv[arg_index], "-sysL_opt") == 0 )
541 {
542 arg_index++;
543 sys_opt = atoi(argv[arg_index++]);
544 }
545 else if ( strcmp(argv[arg_index], "-sys_vcoef") == 0 )
546 {
547 /* have to use -sysL for this to */
548 arg_index++;
549 system_vcoef = 1;
550 }
551 else if ( strcmp(argv[arg_index], "-sys_vcoef_opt") == 0 )
552 {
553 arg_index++;
554 vcoef_opt = atoi(argv[arg_index++]);
555 }
556 else if ( strcmp(argv[arg_index], "-ep") == 0 )
557 {
558 arg_index++;
559 ep = atof(argv[arg_index++]);
560 }
561 else
562 {
563 arg_index++;
564 }
565 }
566
567 /*-----------------------------------------------------------
568 * Check a few things
569 *-----------------------------------------------------------*/
570
571 if ((P*Q*R) != num_procs)
572 {
573 hypre_printf("Error: Invalid number of processors or processor topology \n");
574 exit(1);
575 }
576
577 /*-----------------------------------------------------------
578 * Print driver parameters
579 *-----------------------------------------------------------*/
580
581 if (myid == 0)
582 {
583 hypre_printf(" Laplacian: num_fun = %d\n", num_fun);
584 hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
585 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
586 hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
587 }
588
589 /*-----------------------------------------------------------
590 * Set up the grid structure
591 *-----------------------------------------------------------*/
592
593 /* compute p,q,r from P,Q,R and myid */
594 p = myid % P;
595 q = (( myid - p)/P) % Q;
596 r = ( myid - p - P*q)/( P*Q );
597
598 /*-----------------------------------------------------------
599 * Generate the matrix
600 *-----------------------------------------------------------*/
601
602 values = hypre_CTAlloc(HYPRE_Real, 4, HYPRE_MEMORY_HOST);
603
604 values[1] = -cx;
605 values[2] = -cy;
606 values[3] = -cz;
607
608 values[0] = 0.;
609 if (nx > 1)
610 {
611 values[0] += 2.0*cx;
612 }
613 if (ny > 1)
614 {
615 values[0] += 2.0*cy;
616 }
617 if (nz > 1)
618 {
619 values[0] += 2.0*cz;
620 }
621
622 if (num_fun == 1)
623 A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD,
624 nx, ny, nz, P, Q, R, p, q, r, values);
625 else
626 {
627 mtrx = hypre_CTAlloc(HYPRE_Real, num_fun*num_fun, HYPRE_MEMORY_HOST);
628
629 if (num_fun == 2)
630 {
631 if (sys_opt ==1) /* identity */
632 {
633 mtrx[0] = 1.0;
634 mtrx[1] = 0.0;
635 mtrx[2] = 0.0;
636 mtrx[3] = 1.0;
637 }
638 else if (sys_opt ==2)
639 {
640 mtrx[0] = 1.0;
641 mtrx[1] = 0.0;
642 mtrx[2] = 0.0;
643 mtrx[3] = 20.0;
644 }
645 else if (sys_opt ==3) /* similar to barry's talk - ex1 */
646 {
647 mtrx[0] = 1.0;
648 mtrx[1] = 2.0;
649 mtrx[2] = 2.0;
650 mtrx[3] = 1.0;
651 }
652 else if (sys_opt ==4) /* can use with vcoef to get barry's ex*/
653 {
654 mtrx[0] = 1.0;
655 mtrx[1] = 1.0;
656 mtrx[2] = 1.0;
657 mtrx[3] = 1.0;
658 }
659 else if (sys_opt ==5) /* barry's talk - ex1 */
660 {
661 mtrx[0] = 1.0;
662 mtrx[1] = 1.1;
663 mtrx[2] = 1.1;
664 mtrx[3] = 1.0;
665 }
666 else if (sys_opt ==6) /* */
667 {
668 mtrx[0] = 1.1;
669 mtrx[1] = 1.0;
670 mtrx[2] = 1.0;
671 mtrx[3] = 1.1;
672 }
673
674 else /* == 0 */
675 {
676 mtrx[0] = 2;
677 mtrx[1] = 1;
678 mtrx[2] = 1;
679 mtrx[3] = 2;
680 }
681 }
682 else if (num_fun == 3)
683 {
684 if (sys_opt ==1)
685 {
686 mtrx[0] = 1.0;
687 mtrx[1] = 0.0;
688 mtrx[2] = 0.0;
689 mtrx[3] = 0.0;
690 mtrx[4] = 1.0;
691 mtrx[5] = 0.0;
692 mtrx[6] = 0.0;
693 mtrx[7] = 0.0;
694 mtrx[8] = 1.0;
695 }
696 else if (sys_opt ==2)
697 {
698 mtrx[0] = 1.0;
699 mtrx[1] = 0.0;
700 mtrx[2] = 0.0;
701 mtrx[3] = 0.0;
702 mtrx[4] = 20.0;
703 mtrx[5] = 0.0;
704 mtrx[6] = 0.0;
705 mtrx[7] = 0.0;
706 mtrx[8] =.01;
707 }
708 else if (sys_opt ==3)
709 {
710 mtrx[0] = 1.01;
711 mtrx[1] = 1;
712 mtrx[2] = 0.0;
713 mtrx[3] = 1;
714 mtrx[4] = 2;
715 mtrx[5] = 1;
716 mtrx[6] = 0.0;
717 mtrx[7] = 1;
718 mtrx[8] = 1.01;
719 }
720 else if (sys_opt ==4) /* barry ex4 */
721 {
722 mtrx[0] = 3;
723 mtrx[1] = 1;
724 mtrx[2] = 0.0;
725 mtrx[3] = 1;
726 mtrx[4] = 4;
727 mtrx[5] = 2;
728 mtrx[6] = 0.0;
729 mtrx[7] = 2;
730 mtrx[8] = .25;
731 }
732 else /* == 0 */
733 {
734 mtrx[0] = 2.0;
735 mtrx[1] = 1.0;
736 mtrx[2] = 0.0;
737 mtrx[3] = 1.0;
738 mtrx[4] = 2.0;
739 mtrx[5] = 1.0;
740 mtrx[6] = 0.0;
741 mtrx[7] = 1.0;
742 mtrx[8] = 2.0;
743 }
744
745 }
746 else if (num_fun == 4)
747 {
748 mtrx[0] = 1.01;
749 mtrx[1] = 1;
750 mtrx[2] = 0.0;
751 mtrx[3] = 0.0;
752 mtrx[4] = 1;
753 mtrx[5] = 2;
754 mtrx[6] = 1;
755 mtrx[7] = 0.0;
756 mtrx[8] = 0.0;
757 mtrx[9] = 1;
758 mtrx[10] = 1.01;
759 mtrx[11] = 0.0;
760 mtrx[12] = 2;
761 mtrx[13] = 1;
762 mtrx[14] = 0.0;
763 mtrx[15] = 1;
764 }
765
766
767
768
769 if (!system_vcoef)
770 {
771 A = (HYPRE_ParCSRMatrix) GenerateSysLaplacian(hypre_MPI_COMM_WORLD,
772 nx, ny, nz, P, Q,
773 R, p, q, r, num_fun, mtrx, values);
774 }
775 else
776 {
777
778
779 HYPRE_Real *mtrx_values;
780
781 mtrx_values = hypre_CTAlloc(HYPRE_Real, num_fun*num_fun*4, HYPRE_MEMORY_HOST);
782
783 if (num_fun == 2)
784 {
785 if (vcoef_opt == 1)
786 {
787 /* Barry's talk * - must also have sys_opt = 4, all fail */
788 mtrx[0] = 1.0;
789 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .10, 1.0, 0, mtrx_values);
790
791 mtrx[1] = 1.0;
792 SetSysVcoefValues(num_fun, nx, ny, nz, .1, 1.0, 1.0, 1, mtrx_values);
793
794 mtrx[2] = 1.0;
795 SetSysVcoefValues(num_fun, nx, ny, nz, .01, 1.0, 1.0, 2, mtrx_values);
796
797 mtrx[3] = 1.0;
798 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
799
800 }
801 else if (vcoef_opt == 2)
802 {
803 /* Barry's talk * - ex2 - if have sys-opt = 4*/
804 mtrx[0] = 1.0;
805 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .010, 1.0, 0, mtrx_values);
806
807 mtrx[1] = 200.0;
808 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
809
810 mtrx[2] = 200.0;
811 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 2, mtrx_values);
812
813 mtrx[3] = 1.0;
814 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
815
816 }
817 else if (vcoef_opt == 3) /* use with default sys_opt - ulrike ex 3*/
818 {
819
820 /* mtrx[0] */
821 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
822
823 /* mtrx[1] */
824 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
825
826 /* mtrx[2] */
827 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
828
829 /* mtrx[3] */
830 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 3, mtrx_values);
831 }
832 else if (vcoef_opt == 4) /* use with default sys_opt - ulrike ex 4*/
833 {
834 HYPRE_Real ep2 = ep;
835
836 /* mtrx[0] */
837 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
838
839 /* mtrx[1] */
840 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep*1.0, 1.0, 1, mtrx_values);
841
842 /* mtrx[2] */
843 SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
844
845 /* mtrx[3] */
846 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep2*1.0, 1.0, 3, mtrx_values);
847 }
848 else if (vcoef_opt == 5) /* use with default sys_opt - */
849 {
850 HYPRE_Real alp, beta;
851 alp = .001;
852 beta = 10;
853
854 /* mtrx[0] */
855 SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 0, mtrx_values);
856
857 /* mtrx[1] */
858 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 1, mtrx_values);
859
860 /* mtrx[2] */
861 SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 2, mtrx_values);
862
863 /* mtrx[3] */
864 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 3, mtrx_values);
865 }
866 else /* = 0 */
867 {
868 /* mtrx[0] */
869 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 0, mtrx_values);
870
871 /* mtrx[1] */
872 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 2.0, 1.0, 1, mtrx_values);
873
874 /* mtrx[2] */
875 SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, 1.0, 0.0, 2, mtrx_values);
876
877 /* mtrx[3] */
878 SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 3.0, 1.0, 3, mtrx_values);
879 }
880
881 }
882 else if (num_fun == 3)
883 {
884 mtrx[0] = 1;
885 SetSysVcoefValues(num_fun, nx, ny, nz, 1, .01, 1, 0, mtrx_values);
886
887 mtrx[1] = 1;
888 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 1, mtrx_values);
889
890 mtrx[2] = 0.0;
891 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 2, mtrx_values);
892
893 mtrx[3] = 1;
894 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 3, mtrx_values);
895
896 mtrx[4] = 1;
897 SetSysVcoefValues(num_fun, nx, ny, nz, 2, .02, 1, 4, mtrx_values);
898
899 mtrx[5] = 2;
900 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 5, mtrx_values);
901
902 mtrx[6] = 0.0;
903 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 6, mtrx_values);
904
905 mtrx[7] = 2;
906 SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 7, mtrx_values);
907
908 mtrx[8] = 1;
909 SetSysVcoefValues(num_fun, nx, ny, nz, 1.5, .04, 1, 8, mtrx_values);
910
911 }
912
913 A = (HYPRE_ParCSRMatrix) GenerateSysLaplacianVCoef(hypre_MPI_COMM_WORLD,
914 nx, ny, nz, P, Q,
915 R, p, q, r, num_fun, mtrx, mtrx_values);
916
917
918
919
920
921 hypre_TFree(mtrx_values, HYPRE_MEMORY_HOST);
922 }
923
924 hypre_TFree(mtrx, HYPRE_MEMORY_HOST);
925 }
926
927 hypre_TFree(values, HYPRE_MEMORY_HOST);
928
929 *A_ptr = A;
930
931 return (0);
932 }
933
934 /*----------------------------------------------------------------------
935 * returns the sign of a real number
936 * 1 : positive
937 * 0 : zero
938 * -1 : negative
939 *----------------------------------------------------------------------*/
sign_double(HYPRE_Real a)940 static inline HYPRE_Int sign_double(HYPRE_Real a)
941 {
942 return ( (0.0 < a) - (0.0 > a) );
943 }
944
945 /*----------------------------------------------------------------------
946 * Build standard 7-point convection-diffusion operator
947 * Parameters given in command line.
948 * Operator:
949 *
950 * -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f
951 *
952 *----------------------------------------------------------------------*/
953
954 HYPRE_Int
BuildParDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)955 BuildParDifConv( HYPRE_Int argc,
956 char *argv[],
957 HYPRE_Int arg_index,
958 HYPRE_ParCSRMatrix *A_ptr)
959 {
960 HYPRE_Int nx, ny, nz;
961 HYPRE_Int P, Q, R;
962 HYPRE_Real cx, cy, cz;
963 HYPRE_Real ax, ay, az, atype;
964 HYPRE_Real hinx,hiny,hinz;
965 HYPRE_Int sign_prod;
966
967 HYPRE_ParCSRMatrix A;
968
969 HYPRE_Int num_procs, myid;
970 HYPRE_Int p, q, r;
971 HYPRE_Real *values;
972
973 /*-----------------------------------------------------------
974 * Initialize some stuff
975 *-----------------------------------------------------------*/
976
977 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
978 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
979
980 /*-----------------------------------------------------------
981 * Set defaults
982 *-----------------------------------------------------------*/
983
984 nx = 10;
985 ny = 10;
986 nz = 10;
987
988 P = 1;
989 Q = num_procs;
990 R = 1;
991
992 cx = 1.;
993 cy = 1.;
994 cz = 1.;
995
996 ax = 1.;
997 ay = 1.;
998 az = 1.;
999
1000 atype = 0;
1001
1002 /*-----------------------------------------------------------
1003 * Parse command line
1004 *-----------------------------------------------------------*/
1005 arg_index = 0;
1006 while (arg_index < argc)
1007 {
1008 if ( strcmp(argv[arg_index], "-n") == 0 )
1009 {
1010 arg_index++;
1011 nx = atoi(argv[arg_index++]);
1012 ny = atoi(argv[arg_index++]);
1013 nz = atoi(argv[arg_index++]);
1014 }
1015 else if ( strcmp(argv[arg_index], "-P") == 0 )
1016 {
1017 arg_index++;
1018 P = atoi(argv[arg_index++]);
1019 Q = atoi(argv[arg_index++]);
1020 R = atoi(argv[arg_index++]);
1021 }
1022 else if ( strcmp(argv[arg_index], "-c") == 0 )
1023 {
1024 arg_index++;
1025 cx = atof(argv[arg_index++]);
1026 cy = atof(argv[arg_index++]);
1027 cz = atof(argv[arg_index++]);
1028 }
1029 else if ( strcmp(argv[arg_index], "-a") == 0 )
1030 {
1031 arg_index++;
1032 ax = atof(argv[arg_index++]);
1033 ay = atof(argv[arg_index++]);
1034 az = atof(argv[arg_index++]);
1035 }
1036 else if ( strcmp(argv[arg_index], "-atype") == 0 )
1037 {
1038 arg_index++;
1039 atype = atoi(argv[arg_index++]);
1040 }
1041 else
1042 {
1043 arg_index++;
1044 }
1045 }
1046
1047 /*-----------------------------------------------------------
1048 * Check a few things
1049 *-----------------------------------------------------------*/
1050
1051 if ((P*Q*R) != num_procs)
1052 {
1053 hypre_printf("Error: Invalid number of processors or processor topology \n");
1054 exit(1);
1055 }
1056
1057 /*-----------------------------------------------------------
1058 * Print driver parameters
1059 *-----------------------------------------------------------*/
1060
1061 if (myid == 0)
1062 {
1063 hypre_printf(" Convection-Diffusion: \n");
1064 hypre_printf(" -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f\n");
1065 hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1066 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
1067 hypre_printf(" (cx, cy, cz) = (%f, %f, %f)\n", cx, cy, cz);
1068 hypre_printf(" (ax, ay, az) = (%f, %f, %f)\n\n", ax, ay, az);
1069 }
1070
1071 /*-----------------------------------------------------------
1072 * Set up the grid structure
1073 *-----------------------------------------------------------*/
1074
1075 /* compute p,q,r from P,Q,R and myid */
1076 p = myid % P;
1077 q = (( myid - p)/P) % Q;
1078 r = ( myid - p - P*q)/( P*Q );
1079
1080 hinx = 1./(nx+1);
1081 hiny = 1./(ny+1);
1082 hinz = 1./(nz+1);
1083
1084 /*-----------------------------------------------------------
1085 * Generate the matrix
1086 *-----------------------------------------------------------*/
1087 /* values[7]:
1088 * [0]: center
1089 * [1]: X-
1090 * [2]: Y-
1091 * [3]: Z-
1092 * [4]: X+
1093 * [5]: Y+
1094 * [6]: Z+
1095 */
1096 values = hypre_CTAlloc(HYPRE_Real, 7, HYPRE_MEMORY_HOST);
1097
1098 values[0] = 0.;
1099
1100 if (0 == atype) /* forward scheme for conv */
1101 {
1102 values[1] = -cx/(hinx*hinx);
1103 values[2] = -cy/(hiny*hiny);
1104 values[3] = -cz/(hinz*hinz);
1105 values[4] = -cx/(hinx*hinx) + ax/hinx;
1106 values[5] = -cy/(hiny*hiny) + ay/hiny;
1107 values[6] = -cz/(hinz*hinz) + az/hinz;
1108
1109 if (nx > 1)
1110 {
1111 values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
1112 }
1113 if (ny > 1)
1114 {
1115 values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
1116 }
1117 if (nz > 1)
1118 {
1119 values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
1120 }
1121 }
1122 else if (1 == atype) /* backward scheme for conv */
1123 {
1124 values[1] = -cx/(hinx*hinx) - ax/hinx;
1125 values[2] = -cy/(hiny*hiny) - ay/hiny;
1126 values[3] = -cz/(hinz*hinz) - az/hinz;
1127 values[4] = -cx/(hinx*hinx);
1128 values[5] = -cy/(hiny*hiny);
1129 values[6] = -cz/(hinz*hinz);
1130
1131 if (nx > 1)
1132 {
1133 values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
1134 }
1135 if (ny > 1)
1136 {
1137 values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
1138 }
1139 if (nz > 1)
1140 {
1141 values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
1142 }
1143 }
1144 else if (3 == atype) /* upwind scheme */
1145 {
1146 sign_prod = sign_double(cx) * sign_double(ax);
1147 if (sign_prod == 1) /* same sign use back scheme */
1148 {
1149 values[1] = -cx/(hinx*hinx) - ax/hinx;
1150 values[4] = -cx/(hinx*hinx);
1151 if (nx > 1)
1152 {
1153 values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
1154 }
1155 }
1156 else /* diff sign use forward scheme */
1157 {
1158 values[1] = -cx/(hinx*hinx);
1159 values[4] = -cx/(hinx*hinx) + ax/hinx;
1160 if (nx > 1)
1161 {
1162 values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
1163 }
1164 }
1165
1166 sign_prod = sign_double(cy) * sign_double(ay);
1167 if (sign_prod == 1) /* same sign use back scheme */
1168 {
1169 values[2] = -cy/(hiny*hiny) - ay/hiny;
1170 values[5] = -cy/(hiny*hiny);
1171 if (ny > 1)
1172 {
1173 values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
1174 }
1175 }
1176 else /* diff sign use forward scheme */
1177 {
1178 values[2] = -cy/(hiny*hiny);
1179 values[5] = -cy/(hiny*hiny) + ay/hiny;
1180 if (ny > 1)
1181 {
1182 values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
1183 }
1184 }
1185
1186 sign_prod = sign_double(cz) * sign_double(az);
1187 if (sign_prod == 1) /* same sign use back scheme */
1188 {
1189 values[3] = -cz/(hinz*hinz) - az/hinz;
1190 values[6] = -cz/(hinz*hinz);
1191 if (nz > 1)
1192 {
1193 values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
1194 }
1195 }
1196 else /* diff sign use forward scheme */
1197 {
1198 values[3] = -cz/(hinz*hinz);
1199 values[6] = -cz/(hinz*hinz) + az/hinz;
1200 if (nz > 1)
1201 {
1202 values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
1203 }
1204 }
1205 }
1206 else /* centered difference scheme */
1207 {
1208 values[1] = -cx/(hinx*hinx) - ax/(2.*hinx);
1209 values[2] = -cy/(hiny*hiny) - ay/(2.*hiny);
1210 values[3] = -cz/(hinz*hinz) - az/(2.*hinz);
1211 values[4] = -cx/(hinx*hinx) + ax/(2.*hinx);
1212 values[5] = -cy/(hiny*hiny) + ay/(2.*hiny);
1213 values[6] = -cz/(hinz*hinz) + az/(2.*hinz);
1214
1215 if (nx > 1)
1216 {
1217 values[0] += 2.0*cx/(hinx*hinx);
1218 }
1219 if (ny > 1)
1220 {
1221 values[0] += 2.0*cy/(hiny*hiny);
1222 }
1223 if (nz > 1)
1224 {
1225 values[0] += 2.0*cz/(hinz*hinz);
1226 }
1227 }
1228
1229 A = (HYPRE_ParCSRMatrix) GenerateDifConv(hypre_MPI_COMM_WORLD,
1230 nx, ny, nz, P, Q, R, p, q, r, values);
1231
1232 hypre_TFree(values, HYPRE_MEMORY_HOST);
1233
1234 *A_ptr = A;
1235
1236 return (0);
1237 }
1238
1239 /*----------------------------------------------------------------------
1240 * Build standard 9-point laplacian in 2D with grid and anisotropy.
1241 * Parameters given in command line.
1242 *----------------------------------------------------------------------*/
1243
1244 HYPRE_Int
BuildParLaplacian9pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1245 BuildParLaplacian9pt( HYPRE_Int argc,
1246 char *argv[],
1247 HYPRE_Int arg_index,
1248 HYPRE_ParCSRMatrix *A_ptr )
1249 {
1250 HYPRE_Int nx, ny;
1251 HYPRE_Int P, Q;
1252
1253 HYPRE_ParCSRMatrix A;
1254
1255 HYPRE_Int num_procs, myid;
1256 HYPRE_Int p, q;
1257 HYPRE_Real *values;
1258
1259 /*-----------------------------------------------------------
1260 * Initialize some stuff
1261 *-----------------------------------------------------------*/
1262
1263 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1264 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1265
1266 /*-----------------------------------------------------------
1267 * Set defaults
1268 *-----------------------------------------------------------*/
1269
1270 nx = 10;
1271 ny = 10;
1272
1273 P = 1;
1274 Q = num_procs;
1275
1276 /*-----------------------------------------------------------
1277 * Parse command line
1278 *-----------------------------------------------------------*/
1279 arg_index = 0;
1280 while (arg_index < argc)
1281 {
1282 if ( strcmp(argv[arg_index], "-n") == 0 )
1283 {
1284 arg_index++;
1285 nx = atoi(argv[arg_index++]);
1286 ny = atoi(argv[arg_index++]);
1287 }
1288 else if ( strcmp(argv[arg_index], "-P") == 0 )
1289 {
1290 arg_index++;
1291 P = atoi(argv[arg_index++]);
1292 Q = atoi(argv[arg_index++]);
1293 }
1294 else
1295 {
1296 arg_index++;
1297 }
1298 }
1299
1300 /*-----------------------------------------------------------
1301 * Check a few things
1302 *-----------------------------------------------------------*/
1303
1304 if ((P*Q) != num_procs)
1305 {
1306 hypre_printf("Error: Invalid number of processors or processor topology \n");
1307 exit(1);
1308 }
1309
1310 /*-----------------------------------------------------------
1311 * Print driver parameters
1312 *-----------------------------------------------------------*/
1313
1314 if (myid == 0)
1315 {
1316 hypre_printf(" Laplacian 9pt:\n");
1317 hypre_printf(" (nx, ny) = (%d, %d)\n", nx, ny);
1318 hypre_printf(" (Px, Py) = (%d, %d)\n\n", P, Q);
1319 }
1320
1321 /*-----------------------------------------------------------
1322 * Set up the grid structure
1323 *-----------------------------------------------------------*/
1324
1325 /* compute p,q from P,Q and myid */
1326 p = myid % P;
1327 q = ( myid - p)/P;
1328
1329 /*-----------------------------------------------------------
1330 * Generate the matrix
1331 *-----------------------------------------------------------*/
1332
1333 values = hypre_CTAlloc(HYPRE_Real, 2, HYPRE_MEMORY_HOST);
1334
1335 values[1] = -1.;
1336
1337 values[0] = 0.;
1338 if (nx > 1)
1339 {
1340 values[0] += 2.0;
1341 }
1342 if (ny > 1)
1343 {
1344 values[0] += 2.0;
1345 }
1346 if (nx > 1 && ny > 1)
1347 {
1348 values[0] += 4.0;
1349 }
1350
1351 A = (HYPRE_ParCSRMatrix) GenerateLaplacian9pt(hypre_MPI_COMM_WORLD,
1352 nx, ny, P, Q, p, q, values);
1353
1354 hypre_TFree(values, HYPRE_MEMORY_HOST);
1355
1356 *A_ptr = A;
1357
1358 return (0);
1359 }
1360 /*----------------------------------------------------------------------
1361 * Build 27-point laplacian in 3D,
1362 * Parameters given in command line.
1363 *----------------------------------------------------------------------*/
1364
1365 HYPRE_Int
BuildParLaplacian27pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1366 BuildParLaplacian27pt( HYPRE_Int argc,
1367 char *argv[],
1368 HYPRE_Int arg_index,
1369 HYPRE_ParCSRMatrix *A_ptr )
1370 {
1371 HYPRE_Int nx, ny, nz;
1372 HYPRE_Int P, Q, R;
1373
1374 HYPRE_ParCSRMatrix A;
1375
1376 HYPRE_Int num_procs, myid;
1377 HYPRE_Int p, q, r;
1378 HYPRE_Real *values;
1379
1380 /*-----------------------------------------------------------
1381 * Initialize some stuff
1382 *-----------------------------------------------------------*/
1383
1384 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1385 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1386
1387 /*-----------------------------------------------------------
1388 * Set defaults
1389 *-----------------------------------------------------------*/
1390
1391 nx = 10;
1392 ny = 10;
1393 nz = 10;
1394
1395 P = 1;
1396 Q = num_procs;
1397 R = 1;
1398
1399 /*-----------------------------------------------------------
1400 * Parse command line
1401 *-----------------------------------------------------------*/
1402 arg_index = 0;
1403 while (arg_index < argc)
1404 {
1405 if ( strcmp(argv[arg_index], "-n") == 0 )
1406 {
1407 arg_index++;
1408 nx = atoi(argv[arg_index++]);
1409 ny = atoi(argv[arg_index++]);
1410 nz = atoi(argv[arg_index++]);
1411 }
1412 else if ( strcmp(argv[arg_index], "-P") == 0 )
1413 {
1414 arg_index++;
1415 P = atoi(argv[arg_index++]);
1416 Q = atoi(argv[arg_index++]);
1417 R = atoi(argv[arg_index++]);
1418 }
1419 else
1420 {
1421 arg_index++;
1422 }
1423 }
1424
1425 /*-----------------------------------------------------------
1426 * Check a few things
1427 *-----------------------------------------------------------*/
1428
1429 if ((P*Q*R) != num_procs)
1430 {
1431 hypre_printf("Error: Invalid number of processors or processor topology \n");
1432 exit(1);
1433 }
1434
1435 /*-----------------------------------------------------------
1436 * Print driver parameters
1437 *-----------------------------------------------------------*/
1438
1439 if (myid == 0)
1440 {
1441 hypre_printf(" Laplacian_27pt:\n");
1442 hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1443 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n\n", P, Q, R);
1444 }
1445
1446 /*-----------------------------------------------------------
1447 * Set up the grid structure
1448 *-----------------------------------------------------------*/
1449
1450 /* compute p,q,r from P,Q,R and myid */
1451 p = myid % P;
1452 q = (( myid - p)/P) % Q;
1453 r = ( myid - p - P*q)/( P*Q );
1454
1455 /*-----------------------------------------------------------
1456 * Generate the matrix
1457 *-----------------------------------------------------------*/
1458
1459 values = hypre_CTAlloc(HYPRE_Real, 2, HYPRE_MEMORY_HOST);
1460
1461 values[0] = 26.0;
1462 if (nx == 1 || ny == 1 || nz == 1)
1463 values[0] = 8.0;
1464 if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1)
1465 values[0] = 2.0;
1466 values[1] = -1.;
1467
1468 A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(hypre_MPI_COMM_WORLD,
1469 nx, ny, nz, P, Q, R, p, q, r, values);
1470
1471 hypre_TFree(values, HYPRE_MEMORY_HOST);
1472
1473 *A_ptr = A;
1474
1475 return (0);
1476 }
1477
1478
1479 /*----------------------------------------------------------------------
1480 * Build 7-point in 2D
1481 * Parameters given in command line.
1482 *----------------------------------------------------------------------*/
1483
1484 HYPRE_Int
BuildParRotate7pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1485 BuildParRotate7pt( HYPRE_Int argc,
1486 char *argv[],
1487 HYPRE_Int arg_index,
1488 HYPRE_ParCSRMatrix *A_ptr )
1489 {
1490 HYPRE_Int nx, ny;
1491 HYPRE_Int P, Q;
1492
1493 HYPRE_ParCSRMatrix A;
1494
1495 HYPRE_Int num_procs, myid;
1496 HYPRE_Int p, q;
1497 HYPRE_Real eps, alpha;
1498
1499 /*-----------------------------------------------------------
1500 * Initialize some stuff
1501 *-----------------------------------------------------------*/
1502
1503 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1504 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1505
1506 /*-----------------------------------------------------------
1507 * Set defaults
1508 *-----------------------------------------------------------*/
1509
1510 nx = 10;
1511 ny = 10;
1512
1513 P = 1;
1514 Q = num_procs;
1515
1516 /*-----------------------------------------------------------
1517 * Parse command line
1518 *-----------------------------------------------------------*/
1519 arg_index = 0;
1520 while (arg_index < argc)
1521 {
1522 if ( strcmp(argv[arg_index], "-n") == 0 )
1523 {
1524 arg_index++;
1525 nx = atoi(argv[arg_index++]);
1526 ny = atoi(argv[arg_index++]);
1527 }
1528 else if ( strcmp(argv[arg_index], "-P") == 0 )
1529 {
1530 arg_index++;
1531 P = atoi(argv[arg_index++]);
1532 Q = atoi(argv[arg_index++]);
1533 }
1534 else if ( strcmp(argv[arg_index], "-alpha") == 0 )
1535 {
1536 arg_index++;
1537 alpha = atof(argv[arg_index++]);
1538 }
1539 else if ( strcmp(argv[arg_index], "-eps") == 0 )
1540 {
1541 arg_index++;
1542 eps = atof(argv[arg_index++]);
1543 }
1544 else
1545 {
1546 arg_index++;
1547 }
1548 }
1549
1550 /*-----------------------------------------------------------
1551 * Check a few things
1552 *-----------------------------------------------------------*/
1553
1554 if ((P*Q) != num_procs)
1555 {
1556 hypre_printf("Error: Invalid number of processors or processor topology \n");
1557 exit(1);
1558 }
1559
1560 /*-----------------------------------------------------------
1561 * Print driver parameters
1562 *-----------------------------------------------------------*/
1563
1564 if (myid == 0)
1565 {
1566 hypre_printf(" Rotate 7pt:\n");
1567 hypre_printf(" alpha = %f, eps = %f\n", alpha,eps);
1568 hypre_printf(" (nx, ny) = (%d, %d)\n", nx, ny);
1569 hypre_printf(" (Px, Py) = (%d, %d)\n", P, Q);
1570 }
1571
1572 /*-----------------------------------------------------------
1573 * Set up the grid structure
1574 *-----------------------------------------------------------*/
1575
1576 /* compute p,q from P,Q and myid */
1577 p = myid % P;
1578 q = ( myid - p)/P;
1579
1580 /*-----------------------------------------------------------
1581 * Generate the matrix
1582 *-----------------------------------------------------------*/
1583
1584 A = (HYPRE_ParCSRMatrix) GenerateRotate7pt(hypre_MPI_COMM_WORLD,
1585 nx, ny, P, Q, p, q, alpha, eps);
1586
1587 *A_ptr = A;
1588
1589 return (0);
1590 }
1591
1592 /*----------------------------------------------------------------------
1593 * Build standard 7-point difference operator using centered differences
1594 *
1595 * eps*(a(x,y,z) ux)x + (b(x,y,z) uy)y + (c(x,y,z) uz)z
1596 * d(x,y,z) ux + e(x,y,z) uy + f(x,y,z) uz + g(x,y,z) u
1597 *
1598 * functions a,b,c,d,e,f,g need to be defined inside par_vardifconv.c
1599 *
1600 *----------------------------------------------------------------------*/
1601
1602 HYPRE_Int
BuildParVarDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr,HYPRE_ParVector * rhs_ptr)1603 BuildParVarDifConv( HYPRE_Int argc,
1604 char *argv[],
1605 HYPRE_Int arg_index,
1606 HYPRE_ParCSRMatrix *A_ptr ,
1607 HYPRE_ParVector *rhs_ptr )
1608 {
1609 HYPRE_Int nx, ny, nz;
1610 HYPRE_Int P, Q, R;
1611
1612 HYPRE_ParCSRMatrix A;
1613 HYPRE_ParVector rhs;
1614
1615 HYPRE_Int num_procs, myid;
1616 HYPRE_Int p, q, r;
1617 HYPRE_Int type;
1618 HYPRE_Real eps;
1619
1620 /*-----------------------------------------------------------
1621 * Initialize some stuff
1622 *-----------------------------------------------------------*/
1623
1624 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1625 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1626
1627 /*-----------------------------------------------------------
1628 * Set defaults
1629 *-----------------------------------------------------------*/
1630
1631 nx = 10;
1632 ny = 10;
1633 nz = 10;
1634 P = 1;
1635 Q = num_procs;
1636 R = 1;
1637 eps = 1.0;
1638
1639 /* type: 0 : default FD;
1640 * 1-3 : FD and examples 1-3 in Ruge-Stuben paper */
1641 type = 0;
1642
1643 /*-----------------------------------------------------------
1644 * Parse command line
1645 *-----------------------------------------------------------*/
1646 arg_index = 0;
1647 while (arg_index < argc)
1648 {
1649 if ( strcmp(argv[arg_index], "-n") == 0 )
1650 {
1651 arg_index++;
1652 nx = atoi(argv[arg_index++]);
1653 ny = atoi(argv[arg_index++]);
1654 nz = atoi(argv[arg_index++]);
1655 }
1656 else if ( strcmp(argv[arg_index], "-P") == 0 )
1657 {
1658 arg_index++;
1659 P = atoi(argv[arg_index++]);
1660 Q = atoi(argv[arg_index++]);
1661 R = atoi(argv[arg_index++]);
1662 }
1663 else if ( strcmp(argv[arg_index], "-eps") == 0 )
1664 {
1665 arg_index++;
1666 eps = atof(argv[arg_index++]);
1667 }
1668 else if ( strcmp(argv[arg_index], "-vardifconvRS") == 0 )
1669 {
1670 arg_index++;
1671 type = atoi(argv[arg_index++]);
1672 }
1673 else
1674 {
1675 arg_index++;
1676 }
1677 }
1678
1679 /*-----------------------------------------------------------
1680 * Check a few things
1681 *-----------------------------------------------------------*/
1682
1683 if ((P*Q*R) != num_procs)
1684 {
1685 hypre_printf("Error: Invalid number of processors or processor topology \n");
1686 exit(1);
1687 }
1688
1689 /*-----------------------------------------------------------
1690 * Print driver parameters
1691 *-----------------------------------------------------------*/
1692
1693 if (myid == 0)
1694 {
1695 hypre_printf(" ell PDE: eps = %f\n", eps);
1696 hypre_printf(" Dx(aDxu) + Dy(bDyu) + Dz(cDzu) + d Dxu + e Dyu + f Dzu + g u= f\n");
1697 hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1698 hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R);
1699 }
1700 /*-----------------------------------------------------------
1701 * Set up the grid structure
1702 *-----------------------------------------------------------*/
1703
1704 /* compute p,q,r from P,Q,R and myid */
1705 p = myid % P;
1706 q = (( myid - p)/P) % Q;
1707 r = ( myid - p - P*q)/( P*Q );
1708
1709 /*-----------------------------------------------------------
1710 * Generate the matrix
1711 *-----------------------------------------------------------*/
1712
1713 if (0 == type)
1714 {
1715 A = (HYPRE_ParCSRMatrix) GenerateVarDifConv(hypre_MPI_COMM_WORLD,
1716 nx, ny, nz, P, Q, R, p, q, r, eps, &rhs);
1717 }
1718 else
1719 {
1720 A = (HYPRE_ParCSRMatrix) GenerateRSVarDifConv(hypre_MPI_COMM_WORLD,
1721 nx, ny, nz, P, Q, R, p, q, r, eps, &rhs,
1722 type);
1723 }
1724
1725 *A_ptr = A;
1726 *rhs_ptr = rhs;
1727
1728 return (0);
1729 }
1730
1731 /**************************************************************************/
1732
1733
SetSysVcoefValues(HYPRE_Int num_fun,HYPRE_Int nx,HYPRE_Int ny,HYPRE_Int nz,HYPRE_Real vcx,HYPRE_Real vcy,HYPRE_Real vcz,HYPRE_Int mtx_entry,HYPRE_Real * values)1734 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Real vcx,
1735 HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values)
1736 {
1737
1738
1739 HYPRE_Int sz = num_fun*num_fun;
1740
1741 values[1*sz + mtx_entry] = -vcx;
1742 values[2*sz + mtx_entry] = -vcy;
1743 values[3*sz + mtx_entry] = -vcz;
1744 values[0*sz + mtx_entry] = 0.0;
1745
1746 if (nx > 1)
1747 {
1748 values[0*sz + mtx_entry] += 2.0*vcx;
1749 }
1750 if (ny > 1)
1751 {
1752 values[0*sz + mtx_entry] += 2.0*vcy;
1753 }
1754 if (nz > 1)
1755 {
1756 values[0*sz + mtx_entry] += 2.0*vcz;
1757 }
1758
1759 return 0;
1760
1761 }
1762
1763 /*----------------------------------------------------------------------
1764 * Build coordinates for 1D/2D/3D
1765 *----------------------------------------------------------------------*/
1766
1767 HYPRE_Int
BuildParCoordinates(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_Int * coorddim_ptr,float ** coord_ptr)1768 BuildParCoordinates( HYPRE_Int argc,
1769 char *argv[],
1770 HYPRE_Int arg_index,
1771 HYPRE_Int *coorddim_ptr,
1772 float **coord_ptr )
1773 {
1774 HYPRE_Int nx, ny, nz;
1775 HYPRE_Int P, Q, R;
1776
1777 HYPRE_Int num_procs, myid;
1778 HYPRE_Int p, q, r;
1779
1780 HYPRE_Int coorddim;
1781 float *coordinates;
1782
1783 /*-----------------------------------------------------------
1784 * Initialize some stuff
1785 *-----------------------------------------------------------*/
1786
1787 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1788 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1789
1790 /*-----------------------------------------------------------
1791 * Set defaults
1792 *-----------------------------------------------------------*/
1793
1794 nx = 10;
1795 ny = 10;
1796 nz = 10;
1797
1798 P = 1;
1799 Q = num_procs;
1800 R = 1;
1801
1802 /*-----------------------------------------------------------
1803 * Parse command line
1804 *-----------------------------------------------------------*/
1805 arg_index = 0;
1806 while (arg_index < argc)
1807 {
1808 if ( strcmp(argv[arg_index], "-n") == 0 )
1809 {
1810 arg_index++;
1811 nx = atoi(argv[arg_index++]);
1812 ny = atoi(argv[arg_index++]);
1813 nz = atoi(argv[arg_index++]);
1814 }
1815 else if ( strcmp(argv[arg_index], "-P") == 0 )
1816 {
1817 arg_index++;
1818 P = atoi(argv[arg_index++]);
1819 Q = atoi(argv[arg_index++]);
1820 R = atoi(argv[arg_index++]);
1821 }
1822 else
1823 {
1824 arg_index++;
1825 }
1826 }
1827
1828 /* compute p,q,r from P,Q,R and myid */
1829 p = myid % P;
1830 q = (( myid - p)/P) % Q;
1831 r = ( myid - p - P*q)/( P*Q );
1832
1833 /*-----------------------------------------------------------
1834 * Generate the coordinates
1835 *-----------------------------------------------------------*/
1836
1837 coorddim = 3;
1838 if (nx<2) coorddim--;
1839 if (ny<2) coorddim--;
1840 if (nz<2) coorddim--;
1841
1842 if (coorddim>0)
1843 coordinates = GenerateCoordinates (hypre_MPI_COMM_WORLD,
1844 nx, ny, nz, P, Q, R, p, q, r, coorddim);
1845 else
1846 coordinates=NULL;
1847
1848 *coorddim_ptr = coorddim;
1849 *coord_ptr = coordinates;
1850 return (0);
1851 }
1852
1853
1854 void
testPMIS(HYPRE_ParCSRMatrix parcsr_A)1855 testPMIS(HYPRE_ParCSRMatrix parcsr_A)
1856 {
1857 HYPRE_Int nC1 = 0, nC2 = 0, i;
1858 HYPRE_Int time_index;
1859 hypre_IntArray *h_CF_marker = NULL;
1860 hypre_IntArray *h_CF_marker2 = NULL;
1861 HYPRE_Int *d_CF_marker = NULL;
1862 HYPRE_Real max_row_sum = 1.0;
1863 HYPRE_Int num_functions = 1;
1864 HYPRE_Real strong_threshold = 0.25;
1865 HYPRE_Int debug_flag = 0;
1866 HYPRE_Int local_num_rows;
1867 HYPRE_Int first_local_row, last_local_row;
1868 HYPRE_Int first_local_col, last_local_col;
1869 MPI_Comm comm = hypre_MPI_COMM_WORLD;
1870 HYPRE_Int num_procs, myid;
1871
1872 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1873 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1874
1875 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
1876 &first_local_row, &last_local_row ,
1877 &first_local_col, &last_local_col );
1878
1879 local_num_rows = last_local_row - first_local_row + 1;
1880 //local_num_cols = last_local_col - first_local_col + 1;
1881
1882 /* Soc on HOST */
1883 HYPRE_ParCSRMatrix parcsr_S = NULL;
1884
1885 hypre_BoomerAMGCreateSHost(parcsr_A, strong_threshold, max_row_sum,
1886 num_functions, NULL, &parcsr_S);
1887
1888 /* PMIS on HOST */
1889 time_index = hypre_InitializeTiming("Host PMIS");
1890 hypre_BeginTiming(time_index);
1891
1892 hypre_BoomerAMGCoarsenPMISHost(parcsr_S, parcsr_A, 2, debug_flag, &h_CF_marker);
1893
1894 hypre_EndTiming(time_index);
1895 hypre_PrintTiming("Host PMIS", hypre_MPI_COMM_WORLD);
1896 hypre_FinalizeTiming(time_index);
1897 hypre_ClearTiming();
1898
1899 /* Soc on DEVICE */
1900 HYPRE_ParCSRMatrix parcsr_S_device = NULL;
1901 hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
1902 num_functions, NULL, &parcsr_S_device);
1903 /* PMIS on DEVICE */
1904 time_index = hypre_InitializeTiming("Device PMIS");
1905 hypre_BeginTiming(time_index);
1906
1907 h_CF_marker2 = hypre_IntArrayCreate(local_num_rows);
1908 hypre_IntArrayInitialize(h_CF_marker2);
1909 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker2);
1910
1911 hypre_EndTiming(time_index);
1912 hypre_PrintTiming("Device PMIS", hypre_MPI_COMM_WORLD);
1913 hypre_FinalizeTiming(time_index);
1914 hypre_ClearTiming();
1915
1916 //h_CF_marker2 = hypre_TAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
1917 //hypre_TMemcpy(h_CF_marker2, d_CF_marker, HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
1918
1919 for (i = 0; i < local_num_rows; i++)
1920 {
1921 if (hypre_IntArrayData(h_CF_marker)[i] > 0)
1922 {
1923 nC1++;
1924 }
1925
1926 hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
1927
1928 //hypre_assert(h_CF_marker[i] == h_CF_marker2[i]);
1929
1930 if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
1931 {
1932 nC2++;
1933 }
1934 }
1935
1936 HYPRE_Int allnC1, allnC2;
1937 hypre_MPI_Allreduce(&nC1, &allnC1, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1938 hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1939 if (myid == 0)
1940 {
1941 printf("nC1 %d nC2 %d\n", allnC1, allnC2);
1942 }
1943
1944 hypre_ParCSRMatrixDestroy(parcsr_S);
1945 hypre_ParCSRMatrixDestroy(parcsr_S_device);
1946 hypre_IntArrayDestroy(h_CF_marker);
1947 hypre_IntArrayDestroy(h_CF_marker2);
1948 hypre_TFree(d_CF_marker, HYPRE_MEMORY_DEVICE);
1949 }
1950
1951 void
testPMIS3(HYPRE_ParCSRMatrix parcsr_A)1952 testPMIS3(HYPRE_ParCSRMatrix parcsr_A)
1953 {
1954 HYPRE_Int nC2 = 0, i;
1955 hypre_IntArray *h_CF_marker2 = NULL;
1956 HYPRE_Real max_row_sum = 1.0;
1957 HYPRE_Int num_functions = 1;
1958 HYPRE_Real strong_threshold = 0.25;
1959 HYPRE_Int debug_flag = 0;
1960 HYPRE_Int local_num_rows;
1961 HYPRE_Int first_local_row, last_local_row;
1962 HYPRE_Int first_local_col, last_local_col;
1963 MPI_Comm comm = hypre_MPI_COMM_WORLD;
1964 HYPRE_Int num_procs, myid;
1965
1966 hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1967 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1968
1969 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
1970 &first_local_row, &last_local_row ,
1971 &first_local_col, &last_local_col );
1972
1973 local_num_rows = last_local_row - first_local_row + 1;
1974 //local_num_cols = last_local_col - first_local_col + 1;
1975
1976 /* Soc on DEVICE */
1977 HYPRE_ParCSRMatrix parcsr_S_device = NULL;
1978 hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
1979 num_functions, NULL, &parcsr_S_device);
1980 /* PMIS on DEVICE */
1981 h_CF_marker2 = hypre_IntArrayCreate(local_num_rows);
1982 hypre_IntArrayInitialize(h_CF_marker2);
1983 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker2);
1984
1985 for (i = 0; i < local_num_rows; i++)
1986 {
1987 hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
1988
1989 if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
1990 {
1991 nC2++;
1992 }
1993 }
1994
1995 HYPRE_Int allnC2;
1996 hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1997 if (myid == 0)
1998 {
1999 printf("nC2 %d\n", allnC2);
2000 }
2001
2002 hypre_ParCSRMatrixDestroy(parcsr_S_device);
2003 hypre_IntArrayDestroy(h_CF_marker2);
2004 }
2005
2006 void
testPMIS2(HYPRE_ParCSRMatrix parcsr_A)2007 testPMIS2(HYPRE_ParCSRMatrix parcsr_A)
2008 {
2009 HYPRE_Int nC2 = 0, i;
2010 hypre_IntArray *h_CF_marker = NULL;
2011 hypre_IntArray *h_CF_marker2 = NULL;
2012 HYPRE_Real max_row_sum = 1.0;
2013 HYPRE_Int num_functions = 1;
2014 HYPRE_Real strong_threshold = 0.25;
2015 HYPRE_Int debug_flag = 0;
2016 HYPRE_Int local_num_rows;
2017 HYPRE_Int first_local_row, last_local_row;
2018 HYPRE_Int first_local_col, last_local_col;
2019 MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A);
2020 HYPRE_Int num_procs, myid;
2021
2022 hypre_MPI_Comm_size(comm, &num_procs );
2023 hypre_MPI_Comm_rank(comm, &myid );
2024
2025 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2026 &first_local_row, &last_local_row ,
2027 &first_local_col, &last_local_col );
2028
2029 local_num_rows = last_local_row - first_local_row + 1;
2030 //local_num_cols = last_local_col - first_local_col + 1;
2031
2032 /* Soc on DEVICE */
2033 HYPRE_ParCSRMatrix parcsr_S_device = NULL;
2034 hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
2035 num_functions, NULL, &parcsr_S_device);
2036 /* PMIS on DEVICE */
2037 h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2038 hypre_IntArrayInitialize(h_CF_marker);
2039 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker);
2040
2041 HYPRE_Int *coarse_pnts_global = NULL;
2042 hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL,
2043 &coarse_pnts_global);
2044
2045 /* interp */
2046 hypre_ParCSRMatrix *P;
2047 hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2048 coarse_pnts_global, 1, NULL,
2049 debug_flag, 0.0, 0, 3, &P);
2050
2051 hypre_ParCSRMatrix *AH = hypre_ParCSRMatrixRAPKTDevice(P, parcsr_A, P, 1);
2052 hypre_ParCSRMatrixSetNumNonzeros(AH);
2053
2054 //printf("AH %d, %d\n", hypre_ParCSRMatrixGlobalNumRows(AH), hypre_ParCSRMatrixNumNonzeros(AH));
2055
2056 hypre_ParCSRMatrixPrintIJ(AH, 0, 0, "AH");
2057
2058 HYPRE_Int local_num_rows2 = hypre_ParCSRMatrixNumRows(AH);
2059
2060 hypre_ParCSRMatrix *S2;
2061 hypre_BoomerAMGCreateSDevice(AH, strong_threshold, max_row_sum,
2062 num_functions, NULL, &S2);
2063
2064 h_CF_marker2 = hypre_IntArrayCreate(local_num_rows2);
2065 hypre_IntArrayInitialize(h_CF_marker2);
2066
2067 hypre_BoomerAMGCoarsenPMISDevice(S2, AH, 2, debug_flag, &h_CF_marker2);
2068
2069 for (i = 0; i < local_num_rows2; i++)
2070 {
2071 hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
2072
2073 if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
2074 {
2075 nC2++;
2076 }
2077 }
2078
2079 HYPRE_Int allnC2;
2080 hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
2081 if (myid == 0)
2082 {
2083 printf("nC2 %d\n", allnC2);
2084 }
2085
2086 hypre_ParCSRMatrixDestroy(parcsr_S_device);
2087 hypre_ParCSRMatrixDestroy(S2);
2088 hypre_IntArrayDestroy(h_CF_marker);
2089 hypre_IntArrayDestroy(h_CF_marker2);
2090 }
2091
2092 void
testTranspose(HYPRE_ParCSRMatrix parcsr_A)2093 testTranspose(HYPRE_ParCSRMatrix parcsr_A)
2094 {
2095 HYPRE_Int myid;
2096 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2097
2098 HYPRE_Real tol = 0.0;
2099 HYPRE_Int ierr = 0;
2100
2101 HYPRE_ParCSRMatrix parcsr_AT;
2102 hypre_ParCSRMatrixTransposeDevice(parcsr_A, &parcsr_AT, 1);
2103
2104 HYPRE_ParCSRMatrix parcsr_AT_h;
2105 HYPRE_ParCSRMatrix parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2106 hypre_ParCSRMatrixTransposeHost(parcsr_A_h, &parcsr_AT_h, 1);
2107
2108 ierr += CompareParCSRDH(parcsr_AT_h, parcsr_AT, tol); hypre_assert(!ierr);
2109
2110 hypre_ParCSRMatrixDestroy(parcsr_AT);
2111 hypre_ParCSRMatrixDestroy(parcsr_AT_h);
2112
2113 //
2114 hypre_ParCSRMatrixTransposeDevice(parcsr_A, &parcsr_AT, 0);
2115 hypre_ParCSRMatrixTransposeHost(parcsr_A_h, &parcsr_AT_h, 0);
2116
2117 hypre_ParCSRMatrixSetConstantValues(parcsr_AT, 1.0);
2118 hypre_ParCSRMatrixSetConstantValues(parcsr_AT_h, 1.0);
2119
2120 ierr += CompareParCSRDH(parcsr_AT_h, parcsr_AT, tol); hypre_assert(!ierr);
2121
2122 hypre_ParCSRMatrixDestroy(parcsr_AT);
2123 hypre_ParCSRMatrixDestroy(parcsr_AT_h);
2124 hypre_ParCSRMatrixDestroy(parcsr_A_h);
2125
2126 //
2127 HYPRE_Int first_local_row, last_local_row;
2128 HYPRE_Int first_local_col, last_local_col;
2129 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2130 &first_local_row, &last_local_row ,
2131 &first_local_col, &last_local_col );
2132 HYPRE_Int local_num_rows = last_local_row - first_local_row + 1;
2133 HYPRE_ParCSRMatrix parcsr_S_device = NULL;
2134 hypre_BoomerAMGCreateSDevice(parcsr_A, 0.25, 1.0, 1, NULL, &parcsr_S_device);
2135 hypre_IntArray *h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2136 hypre_IntArrayInitialize(h_CF_marker);
2137 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, 0, &h_CF_marker);
2138 hypre_ParCSRMatrix *P, *PT, *P_h, *PT_h, *P2;
2139 HYPRE_Int *coarse_pnts_global = NULL;
2140 MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A);
2141 hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL, &coarse_pnts_global);
2142 hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2143 coarse_pnts_global, 1, NULL,
2144 0, 0.0, 0, 3, &P);
2145 P_h = hypre_ParCSRMatrixClone_v2(P, 1, HYPRE_MEMORY_HOST);
2146
2147 hypre_ParCSRMatrixTransposeDevice(P, &PT, 1);
2148 hypre_ParCSRMatrixTransposeHost(P_h, &PT_h, 1);
2149 hypre_ParCSRMatrixTransposeDevice(PT, &P2, 1);
2150
2151 ierr += CompareParCSRDH(PT_h, PT, tol); hypre_assert(!ierr);
2152 ierr += CompareParCSRDH(P_h, P2, tol); hypre_assert(!ierr);
2153
2154 if (myid == 0 && !ierr)
2155 {
2156 printf("[hypre_ParCSRMatrixTranspose] All Tests were OK ...\n");
2157 }
2158
2159 hypre_ParCSRMatrixDestroy(P);
2160 hypre_ParCSRMatrixDestroy(PT);
2161 hypre_ParCSRMatrixDestroy(P_h);
2162 hypre_ParCSRMatrixDestroy(PT_h);
2163 hypre_ParCSRMatrixDestroy(P2);
2164 hypre_IntArrayDestroy(h_CF_marker);
2165 }
2166
2167 void
testAdd(HYPRE_ParCSRMatrix parcsr_A)2168 testAdd(HYPRE_ParCSRMatrix parcsr_A)
2169 {
2170 HYPRE_Int myid;
2171 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2172
2173 HYPRE_Real tol = 1e-14;
2174 HYPRE_Int ierr = 0;
2175 HYPRE_Real alpha = 3.141592654, beta = 2.718281828*9.9;
2176
2177 HYPRE_ParCSRMatrix parcsr_A2 = hypre_ParCSRMatMatDevice(parcsr_A, parcsr_A);
2178 HYPRE_ParCSRMatrix parcsr_C;
2179 hypre_ParCSRMatrixAddDevice(alpha, parcsr_A, beta, parcsr_A2, &parcsr_C);
2180
2181 HYPRE_ParCSRMatrix parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2182 HYPRE_ParCSRMatrix parcsr_A2_h = hypre_ParCSRMatrixClone_v2(parcsr_A2, 1, HYPRE_MEMORY_HOST);
2183 HYPRE_ParCSRMatrix parcsr_C_h;
2184 hypre_ParCSRMatrixAddHost(alpha, parcsr_A_h, beta, parcsr_A2_h, &parcsr_C_h);
2185 ierr += CompareParCSRDH(parcsr_C_h, parcsr_C, tol); hypre_assert(!ierr);
2186
2187 hypre_ParCSRMatrixDestroy(parcsr_A2);
2188 hypre_ParCSRMatrixDestroy(parcsr_C);
2189 hypre_ParCSRMatrixDestroy(parcsr_A_h);
2190 hypre_ParCSRMatrixDestroy(parcsr_A2_h);
2191 hypre_ParCSRMatrixDestroy(parcsr_C_h);
2192
2193 //
2194 HYPRE_Int first_local_row, last_local_row;
2195 HYPRE_Int first_local_col, last_local_col;
2196 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2197 &first_local_row, &last_local_row ,
2198 &first_local_col, &last_local_col );
2199 HYPRE_Int local_num_rows = last_local_row - first_local_row + 1;
2200 HYPRE_ParCSRMatrix parcsr_S_device = NULL;
2201 hypre_BoomerAMGCreateSDevice(parcsr_A, 0.25, 1.0, 1, NULL, &parcsr_S_device);
2202 hypre_IntArray *h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2203 hypre_IntArrayInitialize(h_CF_marker);
2204 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, 0, &h_CF_marker);
2205 hypre_ParCSRMatrix *P, *AP, *P_h, *AP_h;
2206 HYPRE_Int *coarse_pnts_global = NULL;
2207 MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A);
2208 hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL, &coarse_pnts_global);
2209 hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2210 coarse_pnts_global, 1, NULL,
2211 0, 0.0, 0, 3, &P);
2212 AP = hypre_ParCSRMatMatDevice(parcsr_A, P);
2213 P_h = hypre_ParCSRMatrixClone_v2(P, 1, HYPRE_MEMORY_HOST);
2214 AP_h = hypre_ParCSRMatrixClone_v2(AP, 1, HYPRE_MEMORY_HOST);
2215 hypre_ParCSRMatrixAddDevice(alpha, P, beta, AP, &parcsr_C);
2216 hypre_ParCSRMatrixAddHost(alpha, P_h, beta, AP_h, &parcsr_C_h);
2217
2218 ierr += CompareParCSRDH(parcsr_C_h, parcsr_C, tol); hypre_assert(!ierr);
2219
2220 hypre_ParCSRMatrixDestroy(P);
2221 hypre_ParCSRMatrixDestroy(AP);
2222 hypre_ParCSRMatrixDestroy(P_h);
2223 hypre_ParCSRMatrixDestroy(AP_h);
2224 hypre_ParCSRMatrixDestroy(parcsr_C);
2225 hypre_ParCSRMatrixDestroy(parcsr_C_h);
2226 hypre_ParCSRMatrixDestroy(parcsr_S_device);
2227 hypre_IntArrayDestroy(h_CF_marker);
2228
2229 if (myid == 0 && !ierr)
2230 {
2231 printf("[hypre_ParCSRMatrixAdd] All Tests were OK ...\n");
2232 }
2233 }
2234
2235 void
testFFFC(HYPRE_ParCSRMatrix parcsr_A)2236 testFFFC(HYPRE_ParCSRMatrix parcsr_A)
2237 {
2238 HYPRE_Real max_row_sum = 1.0;
2239 HYPRE_Real strong_threshold = 0.25;
2240 HYPRE_Int debug_flag = 0;
2241 HYPRE_Int num_functions = 1;
2242 hypre_IntArray *h_CF_marker = NULL;
2243 HYPRE_Int local_num_rows;
2244 HYPRE_Int first_local_row, last_local_row;
2245 HYPRE_Int first_local_col, last_local_col;
2246 HYPRE_Int *coarse_dof_func = NULL;
2247 HYPRE_BigInt *coarse_pnts_global = NULL;
2248 HYPRE_Int ierr = 0;
2249 HYPRE_Int myid;
2250
2251 hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2252 HYPRE_ParCSRMatrix parcsr_A_h;
2253 HYPRE_ParCSRMatrix AFF, AFC, W;
2254 HYPRE_ParCSRMatrix AFF_h, AFC_h, W_h;
2255 HYPRE_ParCSRMatrix parcsr_S_h, parcsr_S_device;
2256
2257 HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2258 &first_local_row, &last_local_row ,
2259 &first_local_col, &last_local_col );
2260
2261 local_num_rows = last_local_row - first_local_row + 1;
2262
2263 /* Soc on DEVICE */
2264 hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum, num_functions, NULL, &parcsr_S_device);
2265 h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2266 hypre_IntArrayInitialize(h_CF_marker);
2267 hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 0, debug_flag, &h_CF_marker);
2268 hypre_BoomerAMGCoarseParms(hypre_ParCSRMatrixComm(parcsr_A), local_num_rows, num_functions, NULL,
2269 h_CF_marker, &coarse_dof_func, &coarse_pnts_global);
2270
2271 /* FFFC on Device */
2272 hypre_ParCSRMatrixGenerateFFFCDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), coarse_pnts_global, parcsr_S_device, &AFC, &AFF);
2273
2274 /* FFFC on Host */
2275 parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2276 parcsr_S_h = hypre_ParCSRMatrixClone_v2(parcsr_S_device, 0, HYPRE_MEMORY_HOST);
2277 hypre_MatvecCommPkgCreate(parcsr_A_h);
2278 hypre_ParCSRMatrixGenerateFFFC(parcsr_A_h, hypre_IntArrayData(h_CF_marker), coarse_pnts_global, parcsr_S_h, &AFC_h, &AFF_h);
2279
2280 /* AFF * AFC */
2281 W_h = hypre_ParCSRMatMatHost(AFF_h, AFC_h);
2282 W = hypre_ParCSRMatMatDevice(AFF, AFC);
2283
2284 /* check */
2285 HYPRE_Real tol = 1e-15;
2286 ierr += CompareParCSRDH(AFF_h, AFF, tol); hypre_assert(!ierr);
2287 ierr += CompareParCSRDH(AFC_h, AFC, tol); hypre_assert(!ierr);
2288 ierr += CompareParCSRDH(W_h, W, tol); hypre_assert(!ierr);
2289
2290 if (myid == 0 && !ierr)
2291 {
2292 printf("All Tests were OK ...\n");
2293 }
2294
2295 /* done */
2296 hypre_TFree(h_CF_marker, HYPRE_MEMORY_HOST);
2297 hypre_IntArrayDestroy(h_CF_marker);
2298 hypre_ParCSRMatrixDestroy(parcsr_A_h);
2299 hypre_ParCSRMatrixDestroy(AFF);
2300 hypre_ParCSRMatrixDestroy(AFC);
2301 hypre_ParCSRMatrixDestroy(W);
2302 hypre_ParCSRMatrixDestroy(AFF_h);
2303 hypre_ParCSRMatrixDestroy(AFC_h);
2304 hypre_ParCSRMatrixDestroy(W_h);
2305 }
2306
2307 HYPRE_Int
CompareParCSRDH(HYPRE_ParCSRMatrix hmat,HYPRE_ParCSRMatrix dmat,HYPRE_Real tol)2308 CompareParCSRDH(HYPRE_ParCSRMatrix hmat, HYPRE_ParCSRMatrix dmat, HYPRE_Real tol)
2309 {
2310 HYPRE_ParCSRMatrix hmat2, emat;
2311 HYPRE_Real enorm, fnorm, rnorm;
2312 HYPRE_Int i, ecode = 0, ecode_total = 0;
2313
2314 hmat2 = hypre_ParCSRMatrixClone_v2(dmat, 1, HYPRE_MEMORY_HOST);
2315
2316 if (hypre_ParCSRMatrixNumRows(hmat) != hypre_ParCSRMatrixNumRows(hmat2))
2317 {
2318 ecode ++;
2319 }
2320 if (hypre_ParCSRMatrixNumCols(hmat) != hypre_ParCSRMatrixNumCols(hmat2))
2321 {
2322 ecode ++;
2323 }
2324 if (hypre_CSRMatrixNumRows(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumRows(hypre_ParCSRMatrixOffd(hmat2)))
2325 {
2326 ecode ++;
2327 }
2328 if (hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat2)))
2329 {
2330 ecode ++;
2331 }
2332 for (i = 0; i < hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat)); i++)
2333 {
2334 if (hypre_ParCSRMatrixColMapOffd(hmat)[i] != hypre_ParCSRMatrixColMapOffd(hmat2)[i])
2335 {
2336 ecode++;
2337 break;
2338 }
2339 }
2340 if (hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(hmat)) != hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(hmat2)))
2341 {
2342 ecode ++;
2343 }
2344 if (hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(hmat2)))
2345 {
2346 ecode ++;
2347 }
2348
2349 hypre_MPI_Allreduce(&ecode, &ecode_total, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hypre_MPI_COMM_WORLD);
2350
2351 hypre_ParCSRMatrixAdd(1.0, hmat, -1.0, hmat2, &emat);
2352 enorm = hypre_ParCSRMatrixFnorm(emat);
2353
2354 fnorm = hypre_ParCSRMatrixFnorm(hmat);
2355 rnorm = fnorm > 0 ? enorm / fnorm : enorm;
2356 if ( rnorm > tol )
2357 {
2358 ecode_total ++;
2359 }
2360
2361 printf("relative error %e = %e / %e\n", rnorm, enorm, fnorm);
2362
2363 hypre_ParCSRMatrixDestroy(hmat2);
2364 hypre_ParCSRMatrixDestroy(emat);
2365
2366 return ecode_total;
2367 }
2368