1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /*--------------------------------------------------------------------------
9  * Test driver for unstructured matrix interface (IJ_matrix interface).
10  * Do `driver -help' for usage info.
11  * This driver started from the driver for parcsr_linear_solvers, and it
12  * works by first building a parcsr matrix as before and then "copying"
13  * that matrix row-by-row into the IJMatrix interface. AJC 7/99.
14  *--------------------------------------------------------------------------*/
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <math.h>
18 
19 #include "_hypre_utilities.h"
20 #include "HYPRE.h"
21 #include "HYPRE_parcsr_mv.h"
22 
23 #include "HYPRE_IJ_mv.h"
24 #include "HYPRE_parcsr_ls.h"
25 #include "_hypre_parcsr_ls.h"
26 #include "_hypre_parcsr_mv.h"
27 #include "HYPRE_krylov.h"
28 
29 #include "cuda_profiler_api.h"
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 
36 
37 HYPRE_Int BuildParFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
38 HYPRE_Int BuildParRhsFromFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParVector *b_ptr );
39 
40 HYPRE_Int BuildParLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
41 HYPRE_Int BuildParSysLaplacian (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
42 HYPRE_Int BuildParDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr);
43 HYPRE_Int BuildFuncsFromFiles (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
44 HYPRE_Int BuildFuncsFromOneFile (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix A , HYPRE_Int **dof_func_ptr );
45 HYPRE_Int BuildParLaplacian9pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
46 HYPRE_Int BuildParLaplacian27pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
47 HYPRE_Int BuildParRotate7pt (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr );
48 HYPRE_Int BuildParVarDifConv (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_ParCSRMatrix *A_ptr , HYPRE_ParVector *rhs_ptr );
49 HYPRE_ParCSRMatrix GenerateSysLaplacian (MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz,
50                                          HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
51                                          HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
52 HYPRE_ParCSRMatrix GenerateSysLaplacianVCoef (MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz,
53                                               HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r,
54                                               HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value);
55 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Real vcx, HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values);
56 
57 HYPRE_Int BuildParCoordinates (HYPRE_Int argc , char *argv [], HYPRE_Int arg_index , HYPRE_Int *coorddim_ptr , float **coord_ptr );
58 
59 void testPMIS(HYPRE_ParCSRMatrix parcsr_A);
60 void testPMIS2(HYPRE_ParCSRMatrix parcsr_A);
61 void testPMIS3(HYPRE_ParCSRMatrix parcsr_A);
62 void testTranspose(HYPRE_ParCSRMatrix parcsr_A);
63 void testAdd(HYPRE_ParCSRMatrix parcsr_A);
64 void testFFFC(HYPRE_ParCSRMatrix parcsr_A);
65 
66 HYPRE_Int CompareParCSRDH(HYPRE_ParCSRMatrix hmat, HYPRE_ParCSRMatrix dmat, HYPRE_Real tol);
67 
68 #ifdef __cplusplus
69 }
70 #endif
71 
72 hypre_int
main(hypre_int argc,char * argv[])73 main( hypre_int argc,
74       char *argv[] )
75 {
76    HYPRE_Int           arg_index;
77    HYPRE_Int           print_usage;
78    HYPRE_Int           build_matrix_type;
79    HYPRE_Int           build_matrix_arg_index;
80    HYPRE_Int           ierr = 0;
81    void               *object;
82 
83    HYPRE_IJMatrix     ij_A = NULL;
84    HYPRE_ParCSRMatrix parcsr_A   = NULL;
85 
86    HYPRE_Int       time_index;
87    MPI_Comm        comm = hypre_MPI_COMM_WORLD;
88    HYPRE_Int       test = 1;
89    //HYPRE_Int       i;
90    //HYPRE_Real      *data;
91 
92    HYPRE_Int myid, num_procs;
93 
94    /*-----------------------------------------------------------
95     * Initialize some stuff
96     *-----------------------------------------------------------*/
97    /* Initialize MPI */
98    hypre_MPI_Init(&argc, &argv);
99 
100    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
101    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
102 
103    /*-----------------------------------------------------------------
104     * GPU Device binding
105     * Must be done before HYPRE_Init() and should not be changed after
106     *-----------------------------------------------------------------*/
107    hypre_bind_device(myid, num_procs, hypre_MPI_COMM_WORLD);
108 
109    /*-----------------------------------------------------------
110     * Initialize : must be the first HYPRE function to call
111     *-----------------------------------------------------------*/
112    HYPRE_Init();
113 
114    hypre_SetNumThreads(5);
115    hypre_printf("CPU #OMP THREADS %d\n", hypre_NumThreads());
116 
117    /*-----------------------------------------------------------
118     * Set defaults
119     *-----------------------------------------------------------*/
120    build_matrix_type = 2;
121    build_matrix_arg_index = argc;
122 
123    /*-----------------------------------------------------------
124     * Parse command line
125     *-----------------------------------------------------------*/
126 
127    print_usage = 0;
128    arg_index = 1;
129 
130    while ( (arg_index < argc) && (!print_usage) )
131    {
132       if ( strcmp(argv[arg_index], "-fromfile") == 0 )
133       {
134          arg_index++;
135          build_matrix_type      = -1;
136          build_matrix_arg_index = arg_index;
137       }
138       else if ( strcmp(argv[arg_index], "-fromparcsrfile") == 0 )
139       {
140          arg_index++;
141          build_matrix_type      = 0;
142          build_matrix_arg_index = arg_index;
143       }
144       else if ( strcmp(argv[arg_index], "-fromonecsrfile") == 0 )
145       {
146          arg_index++;
147          build_matrix_type      = 1;
148          build_matrix_arg_index = arg_index;
149       }
150       else if ( strcmp(argv[arg_index], "-laplacian") == 0 )
151       {
152          arg_index++;
153          build_matrix_type      = 2;
154          build_matrix_arg_index = arg_index;
155       }
156       else if ( strcmp(argv[arg_index], "-9pt") == 0 )
157       {
158          arg_index++;
159          build_matrix_type      = 3;
160          build_matrix_arg_index = arg_index;
161       }
162       else if ( strcmp(argv[arg_index], "-27pt") == 0 )
163       {
164          arg_index++;
165          build_matrix_type      = 4;
166          build_matrix_arg_index = arg_index;
167       }
168       else if ( strcmp(argv[arg_index], "-difconv") == 0 )
169       {
170          arg_index++;
171          build_matrix_type      = 5;
172          build_matrix_arg_index = arg_index;
173       }
174       else if ( strcmp(argv[arg_index], "-vardifconv") == 0 )
175       {
176          arg_index++;
177          build_matrix_type      = 6;
178          build_matrix_arg_index = arg_index;
179       }
180       else if ( strcmp(argv[arg_index], "-rotate") == 0 )
181       {
182          arg_index++;
183          build_matrix_type      = 7;
184          build_matrix_arg_index = arg_index;
185       }
186       else if ( strcmp(argv[arg_index], "-test") == 0 )
187       {
188          arg_index++;
189          test = atoi(argv[arg_index++]);
190       }
191       else if ( strcmp(argv[arg_index], "-help") == 0 )
192       {
193          print_usage = 1;
194       }
195       else
196       {
197          arg_index++;
198       }
199    }
200 
201    /*-----------------------------------------------------------
202     * Print usage info
203     *-----------------------------------------------------------*/
204    if ( print_usage )
205    {
206       goto final;
207    }
208 
209    /*-----------------------------------------------------------
210     * Print driver parameters
211     *-----------------------------------------------------------*/
212 
213    HYPRE_SetSpGemmUseCusparse(0);
214    /* use cuRand for PMIS */
215    HYPRE_SetUseGpuRand(1);
216 
217    /*-----------------------------------------------------------
218     * Set up matrix
219     *-----------------------------------------------------------*/
220    time_index = hypre_InitializeTiming("Generate Matrix");
221    hypre_BeginTiming(time_index);
222    if ( build_matrix_type == -1 )
223    {
224       ierr = HYPRE_IJMatrixRead( argv[build_matrix_arg_index], comm,
225                                  HYPRE_PARCSR, &ij_A );
226       if (ierr)
227       {
228          hypre_printf("ERROR: Problem reading in the system matrix!\n");
229          exit(1);
230       }
231    }
232    else if ( build_matrix_type == 0 )
233    {
234       BuildParFromFile(argc, argv, build_matrix_arg_index, &parcsr_A);
235    }
236    else if ( build_matrix_type == 2 )
237    {
238       BuildParLaplacian(argc, argv, build_matrix_arg_index, &parcsr_A);
239    }
240    else if ( build_matrix_type == 3 )
241    {
242       BuildParLaplacian9pt(argc, argv, build_matrix_arg_index, &parcsr_A);
243    }
244    else if ( build_matrix_type == 4 )
245    {
246       BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A);
247    }
248    else if ( build_matrix_type == 5 )
249    {
250       BuildParDifConv(argc, argv, build_matrix_arg_index, &parcsr_A);
251    }
252    else if ( build_matrix_type == 7 )
253    {
254       BuildParRotate7pt(argc, argv, build_matrix_arg_index, &parcsr_A);
255    }
256    else
257    {
258       hypre_printf("You have asked for an unsupported problem with\n");
259       hypre_printf("build_matrix_type = %d.\n", build_matrix_type);
260       return(-1);
261    }
262 
263    if (build_matrix_type < 0)
264    {
265       ierr += HYPRE_IJMatrixGetObject( ij_A, &object);
266       parcsr_A = (HYPRE_ParCSRMatrix) object;
267    }
268 
269    hypre_EndTiming(time_index);
270    hypre_PrintTiming("Generate Matrix", hypre_MPI_COMM_WORLD);
271    hypre_FinalizeTiming(time_index);
272    hypre_ClearTiming();
273 
274    hypre_ParCSRMatrixMigrate(parcsr_A, hypre_HandleMemoryLocation(hypre_handle()));
275 
276    /*
277     * TESTS
278     */
279    if (test == 1)
280    {
281       testPMIS(parcsr_A);
282    }
283    else if (test == 2)
284    {
285       testPMIS2(parcsr_A);
286    }
287    else if (test == 3)
288    {
289       testPMIS3(parcsr_A);
290    }
291    else if (test == 4)
292    {
293       testTranspose(parcsr_A);
294    }
295    else if (test == 5)
296    {
297       testAdd(parcsr_A);
298    }
299 
300    //testFFFC(parcsr_A);
301 
302    /*-----------------------------------------------------------
303     * Finalize things
304     *-----------------------------------------------------------*/
305 
306    if (build_matrix_type == -1)
307    {
308       HYPRE_IJMatrixDestroy(ij_A);
309    }
310    else
311    {
312       HYPRE_ParCSRMatrixDestroy(parcsr_A);
313    }
314 
315  final:
316 
317    /* Finalize Hypre */
318    HYPRE_Finalize();
319 
320    /* Finalize MPI */
321    hypre_MPI_Finalize();
322 
323    /* when using cuda-memcheck --leak-check full, uncomment this */
324 #if defined(HYPRE_USING_GPU)
325    hypre_ResetCudaDevice(hypre_handle());
326 #endif
327 
328    return (0);
329 }
330 
331 /*----------------------------------------------------------------------
332  * Build matrix from file. Expects three files on each processor.
333  * filename.D.n contains the diagonal part, filename.O.n contains
334  * the offdiagonal part and filename.INFO.n contains global row
335  * and column numbers, number of columns of offdiagonal matrix
336  * and the mapping of offdiagonal column numbers to global column numbers.
337  * Parameters given in command line.
338  *----------------------------------------------------------------------*/
339 
340 HYPRE_Int
BuildParFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)341 BuildParFromFile( HYPRE_Int            argc,
342                   char                *argv[],
343                   HYPRE_Int            arg_index,
344                   HYPRE_ParCSRMatrix  *A_ptr     )
345 {
346    char               *filename;
347 
348    HYPRE_ParCSRMatrix A;
349 
350    HYPRE_Int                 myid;
351 
352    /*-----------------------------------------------------------
353     * Initialize some stuff
354     *-----------------------------------------------------------*/
355 
356    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
357 
358    /*-----------------------------------------------------------
359     * Parse command line
360     *-----------------------------------------------------------*/
361 
362    if (arg_index < argc)
363    {
364       filename = argv[arg_index];
365    }
366    else
367    {
368       hypre_printf("Error: No filename specified \n");
369       exit(1);
370    }
371 
372    /*-----------------------------------------------------------
373     * Print driver parameters
374     *-----------------------------------------------------------*/
375 
376    if (myid == 0)
377    {
378       hypre_printf("  FromFile: %s\n", filename);
379    }
380 
381    /*-----------------------------------------------------------
382     * Generate the matrix
383     *-----------------------------------------------------------*/
384 
385    HYPRE_ParCSRMatrixRead(hypre_MPI_COMM_WORLD, filename,&A);
386 
387    *A_ptr = A;
388 
389    return (0);
390 }
391 
392 
393 /*----------------------------------------------------------------------
394  * Build rhs from file. Expects two files on each processor.
395  * filename.n contains the data and
396  * and filename.INFO.n contains global row
397  * numbers
398  *----------------------------------------------------------------------*/
399 
400 HYPRE_Int
BuildParRhsFromFile(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParVector * b_ptr)401 BuildParRhsFromFile( HYPRE_Int            argc,
402                      char                *argv[],
403                      HYPRE_Int            arg_index,
404                      HYPRE_ParVector      *b_ptr     )
405 {
406    char               *filename;
407 
408    HYPRE_ParVector b;
409 
410    HYPRE_Int                 myid;
411 
412    /*-----------------------------------------------------------
413     * Initialize some stuff
414     *-----------------------------------------------------------*/
415 
416    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
417 
418    /*-----------------------------------------------------------
419     * Parse command line
420     *-----------------------------------------------------------*/
421 
422    if (arg_index < argc)
423    {
424       filename = argv[arg_index];
425    }
426    else
427    {
428       hypre_printf("Error: No filename specified \n");
429       exit(1);
430    }
431 
432    /*-----------------------------------------------------------
433     * Print driver parameters
434     *-----------------------------------------------------------*/
435 
436    if (myid == 0)
437    {
438       hypre_printf("  RhsFromParFile: %s\n", filename);
439    }
440 
441    /*-----------------------------------------------------------
442     * Generate the matrix
443     *-----------------------------------------------------------*/
444 
445    HYPRE_ParVectorRead(hypre_MPI_COMM_WORLD, filename,&b);
446 
447    *b_ptr = b;
448 
449    return (0);
450 }
451 
452 
453 
454 
455 /*----------------------------------------------------------------------
456  * Build standard 7-point laplacian in 3D with grid and anisotropy.
457  * Parameters given in command line.
458  *----------------------------------------------------------------------*/
459 
460 HYPRE_Int
BuildParLaplacian(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)461 BuildParLaplacian( HYPRE_Int            argc,
462                    char                *argv[],
463                    HYPRE_Int            arg_index,
464                    HYPRE_ParCSRMatrix  *A_ptr     )
465 {
466    HYPRE_Int                 nx, ny, nz;
467    HYPRE_Int                 P, Q, R;
468    HYPRE_Real          cx, cy, cz;
469 
470    HYPRE_ParCSRMatrix  A;
471 
472    HYPRE_Int                 num_procs, myid;
473    HYPRE_Int                 p, q, r;
474    HYPRE_Int                 num_fun = 1;
475    HYPRE_Real         *values;
476    HYPRE_Real         *mtrx;
477 
478    HYPRE_Real          ep = .1;
479 
480    HYPRE_Int                 system_vcoef = 0;
481    HYPRE_Int                 sys_opt = 0;
482    HYPRE_Int                 vcoef_opt = 0;
483 
484 
485    /*-----------------------------------------------------------
486     * Initialize some stuff
487     *-----------------------------------------------------------*/
488 
489    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
490    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
491 
492    /*-----------------------------------------------------------
493     * Set defaults
494     *-----------------------------------------------------------*/
495 
496    nx = 10;
497    ny = 10;
498    nz = 10;
499 
500    P  = 1;
501    Q  = num_procs;
502    R  = 1;
503 
504    cx = 1.;
505    cy = 1.;
506    cz = 1.;
507 
508    /*-----------------------------------------------------------
509     * Parse command line
510     *-----------------------------------------------------------*/
511    arg_index = 0;
512    while (arg_index < argc)
513    {
514       if ( strcmp(argv[arg_index], "-n") == 0 )
515       {
516          arg_index++;
517          nx = atoi(argv[arg_index++]);
518          ny = atoi(argv[arg_index++]);
519          nz = atoi(argv[arg_index++]);
520       }
521       else if ( strcmp(argv[arg_index], "-P") == 0 )
522       {
523          arg_index++;
524          P  = atoi(argv[arg_index++]);
525          Q  = atoi(argv[arg_index++]);
526          R  = atoi(argv[arg_index++]);
527       }
528       else if ( strcmp(argv[arg_index], "-c") == 0 )
529       {
530          arg_index++;
531          cx = atof(argv[arg_index++]);
532          cy = atof(argv[arg_index++]);
533          cz = atof(argv[arg_index++]);
534       }
535       else if ( strcmp(argv[arg_index], "-sysL") == 0 )
536       {
537          arg_index++;
538          num_fun = atoi(argv[arg_index++]);
539       }
540       else if ( strcmp(argv[arg_index], "-sysL_opt") == 0 )
541       {
542          arg_index++;
543          sys_opt = atoi(argv[arg_index++]);
544       }
545       else if ( strcmp(argv[arg_index], "-sys_vcoef") == 0 )
546       {
547          /* have to use -sysL for this to */
548          arg_index++;
549          system_vcoef = 1;
550       }
551       else if ( strcmp(argv[arg_index], "-sys_vcoef_opt") == 0 )
552       {
553          arg_index++;
554          vcoef_opt = atoi(argv[arg_index++]);
555       }
556       else if ( strcmp(argv[arg_index], "-ep") == 0 )
557       {
558          arg_index++;
559          ep = atof(argv[arg_index++]);
560       }
561       else
562       {
563          arg_index++;
564       }
565    }
566 
567    /*-----------------------------------------------------------
568     * Check a few things
569     *-----------------------------------------------------------*/
570 
571    if ((P*Q*R) != num_procs)
572    {
573       hypre_printf("Error: Invalid number of processors or processor topology \n");
574       exit(1);
575    }
576 
577    /*-----------------------------------------------------------
578     * Print driver parameters
579     *-----------------------------------------------------------*/
580 
581    if (myid == 0)
582    {
583       hypre_printf("  Laplacian:   num_fun = %d\n", num_fun);
584       hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
585       hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n", P,  Q,  R);
586       hypre_printf("    (cx, cy, cz) = (%f, %f, %f)\n\n", cx, cy, cz);
587    }
588 
589    /*-----------------------------------------------------------
590     * Set up the grid structure
591     *-----------------------------------------------------------*/
592 
593    /* compute p,q,r from P,Q,R and myid */
594    p = myid % P;
595    q = (( myid - p)/P) % Q;
596    r = ( myid - p - P*q)/( P*Q );
597 
598    /*-----------------------------------------------------------
599     * Generate the matrix
600     *-----------------------------------------------------------*/
601 
602    values = hypre_CTAlloc(HYPRE_Real,  4, HYPRE_MEMORY_HOST);
603 
604    values[1] = -cx;
605    values[2] = -cy;
606    values[3] = -cz;
607 
608    values[0] = 0.;
609    if (nx > 1)
610    {
611       values[0] += 2.0*cx;
612    }
613    if (ny > 1)
614    {
615       values[0] += 2.0*cy;
616    }
617    if (nz > 1)
618    {
619       values[0] += 2.0*cz;
620    }
621 
622    if (num_fun == 1)
623       A = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD,
624                                                  nx, ny, nz, P, Q, R, p, q, r, values);
625    else
626    {
627       mtrx = hypre_CTAlloc(HYPRE_Real,  num_fun*num_fun, HYPRE_MEMORY_HOST);
628 
629       if (num_fun == 2)
630       {
631          if (sys_opt ==1) /* identity  */
632          {
633             mtrx[0] = 1.0;
634             mtrx[1] = 0.0;
635             mtrx[2] = 0.0;
636             mtrx[3] = 1.0;
637          }
638          else if (sys_opt ==2)
639          {
640             mtrx[0] = 1.0;
641             mtrx[1] = 0.0;
642             mtrx[2] = 0.0;
643             mtrx[3] = 20.0;
644          }
645          else if (sys_opt ==3) /* similar to barry's talk - ex1 */
646          {
647             mtrx[0] = 1.0;
648             mtrx[1] = 2.0;
649             mtrx[2] = 2.0;
650             mtrx[3] = 1.0;
651          }
652          else if (sys_opt ==4) /* can use with vcoef to get barry's ex*/
653          {
654             mtrx[0] = 1.0;
655             mtrx[1] = 1.0;
656             mtrx[2] = 1.0;
657             mtrx[3] = 1.0;
658          }
659          else if (sys_opt ==5) /* barry's talk - ex1 */
660          {
661             mtrx[0] = 1.0;
662             mtrx[1] = 1.1;
663             mtrx[2] = 1.1;
664             mtrx[3] = 1.0;
665          }
666          else if (sys_opt ==6) /*  */
667          {
668             mtrx[0] = 1.1;
669             mtrx[1] = 1.0;
670             mtrx[2] = 1.0;
671             mtrx[3] = 1.1;
672          }
673 
674          else /* == 0 */
675          {
676             mtrx[0] = 2;
677             mtrx[1] = 1;
678             mtrx[2] = 1;
679             mtrx[3] = 2;
680          }
681       }
682       else if (num_fun == 3)
683       {
684          if (sys_opt ==1)
685          {
686             mtrx[0] = 1.0;
687             mtrx[1] = 0.0;
688             mtrx[2] = 0.0;
689             mtrx[3] = 0.0;
690             mtrx[4] = 1.0;
691             mtrx[5] = 0.0;
692             mtrx[6] = 0.0;
693             mtrx[7] = 0.0;
694             mtrx[8] = 1.0;
695          }
696          else if (sys_opt ==2)
697          {
698             mtrx[0] = 1.0;
699             mtrx[1] = 0.0;
700             mtrx[2] = 0.0;
701             mtrx[3] = 0.0;
702             mtrx[4] = 20.0;
703             mtrx[5] = 0.0;
704             mtrx[6] = 0.0;
705             mtrx[7] = 0.0;
706             mtrx[8] =.01;
707          }
708          else if (sys_opt ==3)
709          {
710             mtrx[0] = 1.01;
711             mtrx[1] = 1;
712             mtrx[2] = 0.0;
713             mtrx[3] = 1;
714             mtrx[4] = 2;
715             mtrx[5] = 1;
716             mtrx[6] = 0.0;
717             mtrx[7] = 1;
718             mtrx[8] = 1.01;
719          }
720          else if (sys_opt ==4) /* barry ex4 */
721          {
722             mtrx[0] = 3;
723             mtrx[1] = 1;
724             mtrx[2] = 0.0;
725             mtrx[3] = 1;
726             mtrx[4] = 4;
727             mtrx[5] = 2;
728             mtrx[6] = 0.0;
729             mtrx[7] = 2;
730             mtrx[8] = .25;
731          }
732          else /* == 0 */
733          {
734             mtrx[0] = 2.0;
735             mtrx[1] = 1.0;
736             mtrx[2] = 0.0;
737             mtrx[3] = 1.0;
738             mtrx[4] = 2.0;
739             mtrx[5] = 1.0;
740             mtrx[6] = 0.0;
741             mtrx[7] = 1.0;
742             mtrx[8] = 2.0;
743          }
744 
745       }
746       else if (num_fun == 4)
747       {
748          mtrx[0] = 1.01;
749          mtrx[1] = 1;
750          mtrx[2] = 0.0;
751          mtrx[3] = 0.0;
752          mtrx[4] = 1;
753          mtrx[5] = 2;
754          mtrx[6] = 1;
755          mtrx[7] = 0.0;
756          mtrx[8] = 0.0;
757          mtrx[9] = 1;
758          mtrx[10] = 1.01;
759          mtrx[11] = 0.0;
760          mtrx[12] = 2;
761          mtrx[13] = 1;
762          mtrx[14] = 0.0;
763          mtrx[15] = 1;
764       }
765 
766 
767 
768 
769       if (!system_vcoef)
770       {
771          A = (HYPRE_ParCSRMatrix) GenerateSysLaplacian(hypre_MPI_COMM_WORLD,
772                                                        nx, ny, nz, P, Q,
773                                                        R, p, q, r, num_fun, mtrx, values);
774       }
775       else
776       {
777 
778 
779          HYPRE_Real *mtrx_values;
780 
781          mtrx_values = hypre_CTAlloc(HYPRE_Real,  num_fun*num_fun*4, HYPRE_MEMORY_HOST);
782 
783          if (num_fun == 2)
784          {
785             if (vcoef_opt == 1)
786             {
787                /* Barry's talk * - must also have sys_opt = 4, all fail */
788                mtrx[0] = 1.0;
789                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .10, 1.0, 0, mtrx_values);
790 
791                mtrx[1]  = 1.0;
792                SetSysVcoefValues(num_fun, nx, ny, nz, .1, 1.0, 1.0, 1, mtrx_values);
793 
794                mtrx[2] = 1.0;
795                SetSysVcoefValues(num_fun, nx, ny, nz, .01, 1.0, 1.0, 2, mtrx_values);
796 
797                mtrx[3] = 1.0;
798                SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
799 
800             }
801             else if (vcoef_opt == 2)
802             {
803                /* Barry's talk * - ex2 - if have sys-opt = 4*/
804                mtrx[0] = 1.0;
805                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, .010, 1.0, 0, mtrx_values);
806 
807                mtrx[1]  = 200.0;
808                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
809 
810                mtrx[2] = 200.0;
811                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 2, mtrx_values);
812 
813                mtrx[3] = 1.0;
814                SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, .02, 1.0, 3, mtrx_values);
815 
816             }
817             else if (vcoef_opt == 3) /* use with default sys_opt  - ulrike ex 3*/
818             {
819 
820                /* mtrx[0] */
821                SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
822 
823                /* mtrx[1] */
824                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 1, mtrx_values);
825 
826                /* mtrx[2] */
827                SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
828 
829                /* mtrx[3] */
830                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 3, mtrx_values);
831             }
832             else if (vcoef_opt == 4) /* use with default sys_opt  - ulrike ex 4*/
833             {
834                HYPRE_Real ep2 = ep;
835 
836                /* mtrx[0] */
837                SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 0, mtrx_values);
838 
839                /* mtrx[1] */
840                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep*1.0, 1.0, 1, mtrx_values);
841 
842                /* mtrx[2] */
843                SetSysVcoefValues(num_fun, nx, ny, nz, ep*1.0, 1.0, 1.0, 2, mtrx_values);
844 
845                /* mtrx[3] */
846                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, ep2*1.0, 1.0, 3, mtrx_values);
847             }
848             else if (vcoef_opt == 5) /* use with default sys_opt  - */
849             {
850                HYPRE_Real  alp, beta;
851                alp = .001;
852                beta = 10;
853 
854                /* mtrx[0] */
855                SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 0, mtrx_values);
856 
857                /* mtrx[1] */
858                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 1, mtrx_values);
859 
860                /* mtrx[2] */
861                SetSysVcoefValues(num_fun, nx, ny, nz, alp*1.0, 1.0, 1.0, 2, mtrx_values);
862 
863                /* mtrx[3] */
864                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, beta*1.0, 1.0, 3, mtrx_values);
865             }
866             else  /* = 0 */
867             {
868                /* mtrx[0] */
869                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 1.0, 1.0, 0, mtrx_values);
870 
871                /* mtrx[1] */
872                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 2.0, 1.0, 1, mtrx_values);
873 
874                /* mtrx[2] */
875                SetSysVcoefValues(num_fun, nx, ny, nz, 2.0, 1.0, 0.0, 2, mtrx_values);
876 
877                /* mtrx[3] */
878                SetSysVcoefValues(num_fun, nx, ny, nz, 1.0, 3.0, 1.0, 3, mtrx_values);
879             }
880 
881          }
882          else if (num_fun == 3)
883          {
884             mtrx[0] = 1;
885             SetSysVcoefValues(num_fun, nx, ny, nz, 1, .01, 1, 0, mtrx_values);
886 
887             mtrx[1] = 1;
888             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 1, mtrx_values);
889 
890             mtrx[2] = 0.0;
891             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 2, mtrx_values);
892 
893             mtrx[3] = 1;
894             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 3, mtrx_values);
895 
896             mtrx[4] = 1;
897             SetSysVcoefValues(num_fun, nx, ny, nz, 2, .02, 1, 4, mtrx_values);
898 
899             mtrx[5] = 2;
900             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 5, mtrx_values);
901 
902             mtrx[6] = 0.0;
903             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 6, mtrx_values);
904 
905             mtrx[7] = 2;
906             SetSysVcoefValues(num_fun, nx, ny, nz, 1, 1, 1, 7, mtrx_values);
907 
908             mtrx[8] = 1;
909             SetSysVcoefValues(num_fun, nx, ny, nz, 1.5, .04, 1, 8, mtrx_values);
910 
911          }
912 
913          A = (HYPRE_ParCSRMatrix) GenerateSysLaplacianVCoef(hypre_MPI_COMM_WORLD,
914                                                             nx, ny, nz, P, Q,
915                                                             R, p, q, r, num_fun, mtrx, mtrx_values);
916 
917 
918 
919 
920 
921          hypre_TFree(mtrx_values, HYPRE_MEMORY_HOST);
922       }
923 
924       hypre_TFree(mtrx, HYPRE_MEMORY_HOST);
925    }
926 
927    hypre_TFree(values, HYPRE_MEMORY_HOST);
928 
929    *A_ptr = A;
930 
931    return (0);
932 }
933 
934 /*----------------------------------------------------------------------
935  * returns the sign of a real number
936  *  1 : positive
937  *  0 : zero
938  * -1 : negative
939  *----------------------------------------------------------------------*/
sign_double(HYPRE_Real a)940 static inline HYPRE_Int sign_double(HYPRE_Real a)
941 {
942    return ( (0.0 < a) - (0.0 > a) );
943 }
944 
945 /*----------------------------------------------------------------------
946  * Build standard 7-point convection-diffusion operator
947  * Parameters given in command line.
948  * Operator:
949  *
950  *  -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f
951  *
952  *----------------------------------------------------------------------*/
953 
954 HYPRE_Int
BuildParDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)955 BuildParDifConv( HYPRE_Int            argc,
956                  char                *argv[],
957                  HYPRE_Int            arg_index,
958                  HYPRE_ParCSRMatrix  *A_ptr)
959 {
960    HYPRE_Int           nx, ny, nz;
961    HYPRE_Int           P, Q, R;
962    HYPRE_Real          cx, cy, cz;
963    HYPRE_Real          ax, ay, az, atype;
964    HYPRE_Real          hinx,hiny,hinz;
965    HYPRE_Int           sign_prod;
966 
967    HYPRE_ParCSRMatrix  A;
968 
969    HYPRE_Int           num_procs, myid;
970    HYPRE_Int           p, q, r;
971    HYPRE_Real         *values;
972 
973    /*-----------------------------------------------------------
974     * Initialize some stuff
975     *-----------------------------------------------------------*/
976 
977    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
978    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
979 
980    /*-----------------------------------------------------------
981     * Set defaults
982     *-----------------------------------------------------------*/
983 
984    nx = 10;
985    ny = 10;
986    nz = 10;
987 
988    P  = 1;
989    Q  = num_procs;
990    R  = 1;
991 
992    cx = 1.;
993    cy = 1.;
994    cz = 1.;
995 
996    ax = 1.;
997    ay = 1.;
998    az = 1.;
999 
1000    atype = 0;
1001 
1002    /*-----------------------------------------------------------
1003     * Parse command line
1004     *-----------------------------------------------------------*/
1005    arg_index = 0;
1006    while (arg_index < argc)
1007    {
1008       if ( strcmp(argv[arg_index], "-n") == 0 )
1009       {
1010          arg_index++;
1011          nx = atoi(argv[arg_index++]);
1012          ny = atoi(argv[arg_index++]);
1013          nz = atoi(argv[arg_index++]);
1014       }
1015       else if ( strcmp(argv[arg_index], "-P") == 0 )
1016       {
1017          arg_index++;
1018          P  = atoi(argv[arg_index++]);
1019          Q  = atoi(argv[arg_index++]);
1020          R  = atoi(argv[arg_index++]);
1021       }
1022       else if ( strcmp(argv[arg_index], "-c") == 0 )
1023       {
1024          arg_index++;
1025          cx = atof(argv[arg_index++]);
1026          cy = atof(argv[arg_index++]);
1027          cz = atof(argv[arg_index++]);
1028       }
1029       else if ( strcmp(argv[arg_index], "-a") == 0 )
1030       {
1031          arg_index++;
1032          ax = atof(argv[arg_index++]);
1033          ay = atof(argv[arg_index++]);
1034          az = atof(argv[arg_index++]);
1035       }
1036       else if ( strcmp(argv[arg_index], "-atype") == 0 )
1037       {
1038          arg_index++;
1039          atype = atoi(argv[arg_index++]);
1040       }
1041       else
1042       {
1043          arg_index++;
1044       }
1045    }
1046 
1047    /*-----------------------------------------------------------
1048     * Check a few things
1049     *-----------------------------------------------------------*/
1050 
1051    if ((P*Q*R) != num_procs)
1052    {
1053       hypre_printf("Error: Invalid number of processors or processor topology \n");
1054       exit(1);
1055    }
1056 
1057    /*-----------------------------------------------------------
1058     * Print driver parameters
1059     *-----------------------------------------------------------*/
1060 
1061    if (myid == 0)
1062    {
1063       hypre_printf("  Convection-Diffusion: \n");
1064       hypre_printf("    -cx Dxx - cy Dyy - cz Dzz + ax Dx + ay Dy + az Dz = f\n");
1065       hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1066       hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n", P,  Q,  R);
1067       hypre_printf("    (cx, cy, cz) = (%f, %f, %f)\n", cx, cy, cz);
1068       hypre_printf("    (ax, ay, az) = (%f, %f, %f)\n\n", ax, ay, az);
1069    }
1070 
1071    /*-----------------------------------------------------------
1072     * Set up the grid structure
1073     *-----------------------------------------------------------*/
1074 
1075    /* compute p,q,r from P,Q,R and myid */
1076    p = myid % P;
1077    q = (( myid - p)/P) % Q;
1078    r = ( myid - p - P*q)/( P*Q );
1079 
1080    hinx = 1./(nx+1);
1081    hiny = 1./(ny+1);
1082    hinz = 1./(nz+1);
1083 
1084    /*-----------------------------------------------------------
1085     * Generate the matrix
1086     *-----------------------------------------------------------*/
1087    /* values[7]:
1088     *    [0]: center
1089     *    [1]: X-
1090     *    [2]: Y-
1091     *    [3]: Z-
1092     *    [4]: X+
1093     *    [5]: Y+
1094     *    [6]: Z+
1095     */
1096    values = hypre_CTAlloc(HYPRE_Real,  7, HYPRE_MEMORY_HOST);
1097 
1098    values[0] = 0.;
1099 
1100    if (0 == atype) /* forward scheme for conv */
1101    {
1102       values[1] = -cx/(hinx*hinx);
1103       values[2] = -cy/(hiny*hiny);
1104       values[3] = -cz/(hinz*hinz);
1105       values[4] = -cx/(hinx*hinx) + ax/hinx;
1106       values[5] = -cy/(hiny*hiny) + ay/hiny;
1107       values[6] = -cz/(hinz*hinz) + az/hinz;
1108 
1109       if (nx > 1)
1110       {
1111          values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
1112       }
1113       if (ny > 1)
1114       {
1115          values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
1116       }
1117       if (nz > 1)
1118       {
1119          values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
1120       }
1121    }
1122    else if (1 == atype) /* backward scheme for conv */
1123    {
1124       values[1] = -cx/(hinx*hinx) - ax/hinx;
1125       values[2] = -cy/(hiny*hiny) - ay/hiny;
1126       values[3] = -cz/(hinz*hinz) - az/hinz;
1127       values[4] = -cx/(hinx*hinx);
1128       values[5] = -cy/(hiny*hiny);
1129       values[6] = -cz/(hinz*hinz);
1130 
1131       if (nx > 1)
1132       {
1133          values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
1134       }
1135       if (ny > 1)
1136       {
1137          values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
1138       }
1139       if (nz > 1)
1140       {
1141          values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
1142       }
1143    }
1144    else if (3 == atype) /* upwind scheme */
1145    {
1146       sign_prod = sign_double(cx) * sign_double(ax);
1147       if (sign_prod == 1) /* same sign use back scheme */
1148       {
1149          values[1] = -cx/(hinx*hinx) - ax/hinx;
1150          values[4] = -cx/(hinx*hinx);
1151          if (nx > 1)
1152          {
1153             values[0] += 2.0*cx/(hinx*hinx) + 1.*ax/hinx;
1154          }
1155       }
1156       else /* diff sign use forward scheme */
1157       {
1158          values[1] = -cx/(hinx*hinx);
1159          values[4] = -cx/(hinx*hinx) + ax/hinx;
1160          if (nx > 1)
1161          {
1162             values[0] += 2.0*cx/(hinx*hinx) - 1.*ax/hinx;
1163          }
1164       }
1165 
1166       sign_prod = sign_double(cy) * sign_double(ay);
1167       if (sign_prod == 1) /* same sign use back scheme */
1168       {
1169          values[2] = -cy/(hiny*hiny) - ay/hiny;
1170          values[5] = -cy/(hiny*hiny);
1171          if (ny > 1)
1172          {
1173             values[0] += 2.0*cy/(hiny*hiny) + 1.*ay/hiny;
1174          }
1175       }
1176       else /* diff sign use forward scheme */
1177       {
1178          values[2] = -cy/(hiny*hiny);
1179          values[5] = -cy/(hiny*hiny) + ay/hiny;
1180          if (ny > 1)
1181          {
1182             values[0] += 2.0*cy/(hiny*hiny) - 1.*ay/hiny;
1183          }
1184       }
1185 
1186       sign_prod = sign_double(cz) * sign_double(az);
1187       if (sign_prod == 1) /* same sign use back scheme */
1188       {
1189          values[3] = -cz/(hinz*hinz) - az/hinz;
1190          values[6] = -cz/(hinz*hinz);
1191          if (nz > 1)
1192          {
1193             values[0] += 2.0*cz/(hinz*hinz) + 1.*az/hinz;
1194          }
1195       }
1196       else /* diff sign use forward scheme */
1197       {
1198          values[3] = -cz/(hinz*hinz);
1199          values[6] = -cz/(hinz*hinz) + az/hinz;
1200          if (nz > 1)
1201          {
1202             values[0] += 2.0*cz/(hinz*hinz) - 1.*az/hinz;
1203          }
1204       }
1205    }
1206    else /* centered difference scheme */
1207    {
1208       values[1] = -cx/(hinx*hinx) - ax/(2.*hinx);
1209       values[2] = -cy/(hiny*hiny) - ay/(2.*hiny);
1210       values[3] = -cz/(hinz*hinz) - az/(2.*hinz);
1211       values[4] = -cx/(hinx*hinx) + ax/(2.*hinx);
1212       values[5] = -cy/(hiny*hiny) + ay/(2.*hiny);
1213       values[6] = -cz/(hinz*hinz) + az/(2.*hinz);
1214 
1215       if (nx > 1)
1216       {
1217          values[0] += 2.0*cx/(hinx*hinx);
1218       }
1219       if (ny > 1)
1220       {
1221          values[0] += 2.0*cy/(hiny*hiny);
1222       }
1223       if (nz > 1)
1224       {
1225          values[0] += 2.0*cz/(hinz*hinz);
1226       }
1227    }
1228 
1229    A = (HYPRE_ParCSRMatrix) GenerateDifConv(hypre_MPI_COMM_WORLD,
1230                                             nx, ny, nz, P, Q, R, p, q, r, values);
1231 
1232    hypre_TFree(values, HYPRE_MEMORY_HOST);
1233 
1234    *A_ptr = A;
1235 
1236    return (0);
1237 }
1238 
1239 /*----------------------------------------------------------------------
1240  * Build standard 9-point laplacian in 2D with grid and anisotropy.
1241  * Parameters given in command line.
1242  *----------------------------------------------------------------------*/
1243 
1244 HYPRE_Int
BuildParLaplacian9pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1245 BuildParLaplacian9pt( HYPRE_Int                  argc,
1246                       char                *argv[],
1247                       HYPRE_Int                  arg_index,
1248                       HYPRE_ParCSRMatrix  *A_ptr     )
1249 {
1250    HYPRE_Int                 nx, ny;
1251    HYPRE_Int                 P, Q;
1252 
1253    HYPRE_ParCSRMatrix  A;
1254 
1255    HYPRE_Int                 num_procs, myid;
1256    HYPRE_Int                 p, q;
1257    HYPRE_Real         *values;
1258 
1259    /*-----------------------------------------------------------
1260     * Initialize some stuff
1261     *-----------------------------------------------------------*/
1262 
1263    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1264    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1265 
1266    /*-----------------------------------------------------------
1267     * Set defaults
1268     *-----------------------------------------------------------*/
1269 
1270    nx = 10;
1271    ny = 10;
1272 
1273    P  = 1;
1274    Q  = num_procs;
1275 
1276    /*-----------------------------------------------------------
1277     * Parse command line
1278     *-----------------------------------------------------------*/
1279    arg_index = 0;
1280    while (arg_index < argc)
1281    {
1282       if ( strcmp(argv[arg_index], "-n") == 0 )
1283       {
1284          arg_index++;
1285          nx = atoi(argv[arg_index++]);
1286          ny = atoi(argv[arg_index++]);
1287       }
1288       else if ( strcmp(argv[arg_index], "-P") == 0 )
1289       {
1290          arg_index++;
1291          P  = atoi(argv[arg_index++]);
1292          Q  = atoi(argv[arg_index++]);
1293       }
1294       else
1295       {
1296          arg_index++;
1297       }
1298    }
1299 
1300    /*-----------------------------------------------------------
1301     * Check a few things
1302     *-----------------------------------------------------------*/
1303 
1304    if ((P*Q) != num_procs)
1305    {
1306       hypre_printf("Error: Invalid number of processors or processor topology \n");
1307       exit(1);
1308    }
1309 
1310    /*-----------------------------------------------------------
1311     * Print driver parameters
1312     *-----------------------------------------------------------*/
1313 
1314    if (myid == 0)
1315    {
1316       hypre_printf("  Laplacian 9pt:\n");
1317       hypre_printf("    (nx, ny) = (%d, %d)\n", nx, ny);
1318       hypre_printf("    (Px, Py) = (%d, %d)\n\n", P,  Q);
1319    }
1320 
1321    /*-----------------------------------------------------------
1322     * Set up the grid structure
1323     *-----------------------------------------------------------*/
1324 
1325    /* compute p,q from P,Q and myid */
1326    p = myid % P;
1327    q = ( myid - p)/P;
1328 
1329    /*-----------------------------------------------------------
1330     * Generate the matrix
1331     *-----------------------------------------------------------*/
1332 
1333    values = hypre_CTAlloc(HYPRE_Real,  2, HYPRE_MEMORY_HOST);
1334 
1335    values[1] = -1.;
1336 
1337    values[0] = 0.;
1338    if (nx > 1)
1339    {
1340       values[0] += 2.0;
1341    }
1342    if (ny > 1)
1343    {
1344       values[0] += 2.0;
1345    }
1346    if (nx > 1 && ny > 1)
1347    {
1348       values[0] += 4.0;
1349    }
1350 
1351    A = (HYPRE_ParCSRMatrix) GenerateLaplacian9pt(hypre_MPI_COMM_WORLD,
1352                                                  nx, ny, P, Q, p, q, values);
1353 
1354    hypre_TFree(values, HYPRE_MEMORY_HOST);
1355 
1356    *A_ptr = A;
1357 
1358    return (0);
1359 }
1360 /*----------------------------------------------------------------------
1361  * Build 27-point laplacian in 3D,
1362  * Parameters given in command line.
1363  *----------------------------------------------------------------------*/
1364 
1365 HYPRE_Int
BuildParLaplacian27pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1366 BuildParLaplacian27pt( HYPRE_Int                  argc,
1367                        char                *argv[],
1368                        HYPRE_Int                  arg_index,
1369                        HYPRE_ParCSRMatrix  *A_ptr     )
1370 {
1371    HYPRE_Int                 nx, ny, nz;
1372    HYPRE_Int                 P, Q, R;
1373 
1374    HYPRE_ParCSRMatrix  A;
1375 
1376    HYPRE_Int                 num_procs, myid;
1377    HYPRE_Int                 p, q, r;
1378    HYPRE_Real         *values;
1379 
1380    /*-----------------------------------------------------------
1381     * Initialize some stuff
1382     *-----------------------------------------------------------*/
1383 
1384    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1385    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1386 
1387    /*-----------------------------------------------------------
1388     * Set defaults
1389     *-----------------------------------------------------------*/
1390 
1391    nx = 10;
1392    ny = 10;
1393    nz = 10;
1394 
1395    P  = 1;
1396    Q  = num_procs;
1397    R  = 1;
1398 
1399    /*-----------------------------------------------------------
1400     * Parse command line
1401     *-----------------------------------------------------------*/
1402    arg_index = 0;
1403    while (arg_index < argc)
1404    {
1405       if ( strcmp(argv[arg_index], "-n") == 0 )
1406       {
1407          arg_index++;
1408          nx = atoi(argv[arg_index++]);
1409          ny = atoi(argv[arg_index++]);
1410          nz = atoi(argv[arg_index++]);
1411       }
1412       else if ( strcmp(argv[arg_index], "-P") == 0 )
1413       {
1414          arg_index++;
1415          P  = atoi(argv[arg_index++]);
1416          Q  = atoi(argv[arg_index++]);
1417          R  = atoi(argv[arg_index++]);
1418       }
1419       else
1420       {
1421          arg_index++;
1422       }
1423    }
1424 
1425    /*-----------------------------------------------------------
1426     * Check a few things
1427     *-----------------------------------------------------------*/
1428 
1429    if ((P*Q*R) != num_procs)
1430    {
1431       hypre_printf("Error: Invalid number of processors or processor topology \n");
1432       exit(1);
1433    }
1434 
1435    /*-----------------------------------------------------------
1436     * Print driver parameters
1437     *-----------------------------------------------------------*/
1438 
1439    if (myid == 0)
1440    {
1441       hypre_printf("  Laplacian_27pt:\n");
1442       hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1443       hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n\n", P,  Q,  R);
1444    }
1445 
1446    /*-----------------------------------------------------------
1447     * Set up the grid structure
1448     *-----------------------------------------------------------*/
1449 
1450    /* compute p,q,r from P,Q,R and myid */
1451    p = myid % P;
1452    q = (( myid - p)/P) % Q;
1453    r = ( myid - p - P*q)/( P*Q );
1454 
1455    /*-----------------------------------------------------------
1456     * Generate the matrix
1457     *-----------------------------------------------------------*/
1458 
1459    values = hypre_CTAlloc(HYPRE_Real,  2, HYPRE_MEMORY_HOST);
1460 
1461    values[0] = 26.0;
1462    if (nx == 1 || ny == 1 || nz == 1)
1463       values[0] = 8.0;
1464    if (nx*ny == 1 || nx*nz == 1 || ny*nz == 1)
1465       values[0] = 2.0;
1466    values[1] = -1.;
1467 
1468    A = (HYPRE_ParCSRMatrix) GenerateLaplacian27pt(hypre_MPI_COMM_WORLD,
1469                                                   nx, ny, nz, P, Q, R, p, q, r, values);
1470 
1471    hypre_TFree(values, HYPRE_MEMORY_HOST);
1472 
1473    *A_ptr = A;
1474 
1475    return (0);
1476 }
1477 
1478 
1479 /*----------------------------------------------------------------------
1480  * Build 7-point in 2D
1481  * Parameters given in command line.
1482  *----------------------------------------------------------------------*/
1483 
1484 HYPRE_Int
BuildParRotate7pt(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr)1485 BuildParRotate7pt( HYPRE_Int                  argc,
1486                    char                *argv[],
1487                    HYPRE_Int                  arg_index,
1488                    HYPRE_ParCSRMatrix  *A_ptr     )
1489 {
1490    HYPRE_Int                 nx, ny;
1491    HYPRE_Int                 P, Q;
1492 
1493    HYPRE_ParCSRMatrix  A;
1494 
1495    HYPRE_Int                 num_procs, myid;
1496    HYPRE_Int                 p, q;
1497    HYPRE_Real          eps, alpha;
1498 
1499    /*-----------------------------------------------------------
1500     * Initialize some stuff
1501     *-----------------------------------------------------------*/
1502 
1503    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1504    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1505 
1506    /*-----------------------------------------------------------
1507     * Set defaults
1508     *-----------------------------------------------------------*/
1509 
1510    nx = 10;
1511    ny = 10;
1512 
1513    P  = 1;
1514    Q  = num_procs;
1515 
1516    /*-----------------------------------------------------------
1517     * Parse command line
1518     *-----------------------------------------------------------*/
1519    arg_index = 0;
1520    while (arg_index < argc)
1521    {
1522       if ( strcmp(argv[arg_index], "-n") == 0 )
1523       {
1524          arg_index++;
1525          nx = atoi(argv[arg_index++]);
1526          ny = atoi(argv[arg_index++]);
1527       }
1528       else if ( strcmp(argv[arg_index], "-P") == 0 )
1529       {
1530          arg_index++;
1531          P  = atoi(argv[arg_index++]);
1532          Q  = atoi(argv[arg_index++]);
1533       }
1534       else if ( strcmp(argv[arg_index], "-alpha") == 0 )
1535       {
1536          arg_index++;
1537          alpha  = atof(argv[arg_index++]);
1538       }
1539       else if ( strcmp(argv[arg_index], "-eps") == 0 )
1540       {
1541          arg_index++;
1542          eps  = atof(argv[arg_index++]);
1543       }
1544       else
1545       {
1546          arg_index++;
1547       }
1548    }
1549 
1550    /*-----------------------------------------------------------
1551     * Check a few things
1552     *-----------------------------------------------------------*/
1553 
1554    if ((P*Q) != num_procs)
1555    {
1556       hypre_printf("Error: Invalid number of processors or processor topology \n");
1557       exit(1);
1558    }
1559 
1560    /*-----------------------------------------------------------
1561     * Print driver parameters
1562     *-----------------------------------------------------------*/
1563 
1564    if (myid == 0)
1565    {
1566       hypre_printf("  Rotate 7pt:\n");
1567       hypre_printf("    alpha = %f, eps = %f\n", alpha,eps);
1568       hypre_printf("    (nx, ny) = (%d, %d)\n", nx, ny);
1569       hypre_printf("    (Px, Py) = (%d, %d)\n", P,  Q);
1570    }
1571 
1572    /*-----------------------------------------------------------
1573     * Set up the grid structure
1574     *-----------------------------------------------------------*/
1575 
1576    /* compute p,q from P,Q and myid */
1577    p = myid % P;
1578    q = ( myid - p)/P;
1579 
1580    /*-----------------------------------------------------------
1581     * Generate the matrix
1582     *-----------------------------------------------------------*/
1583 
1584    A = (HYPRE_ParCSRMatrix) GenerateRotate7pt(hypre_MPI_COMM_WORLD,
1585                                               nx, ny, P, Q, p, q, alpha, eps);
1586 
1587    *A_ptr = A;
1588 
1589    return (0);
1590 }
1591 
1592 /*----------------------------------------------------------------------
1593  * Build standard 7-point difference operator using centered differences
1594  *
1595  *  eps*(a(x,y,z) ux)x + (b(x,y,z) uy)y + (c(x,y,z) uz)z
1596  *  d(x,y,z) ux + e(x,y,z) uy + f(x,y,z) uz + g(x,y,z) u
1597  *
1598  *  functions a,b,c,d,e,f,g need to be defined inside par_vardifconv.c
1599  *
1600  *----------------------------------------------------------------------*/
1601 
1602 HYPRE_Int
BuildParVarDifConv(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_ParCSRMatrix * A_ptr,HYPRE_ParVector * rhs_ptr)1603 BuildParVarDifConv( HYPRE_Int                  argc,
1604                     char                *argv[],
1605                     HYPRE_Int                  arg_index,
1606                     HYPRE_ParCSRMatrix  *A_ptr    ,
1607                     HYPRE_ParVector  *rhs_ptr     )
1608 {
1609    HYPRE_Int                 nx, ny, nz;
1610    HYPRE_Int                 P, Q, R;
1611 
1612    HYPRE_ParCSRMatrix  A;
1613    HYPRE_ParVector  rhs;
1614 
1615    HYPRE_Int           num_procs, myid;
1616    HYPRE_Int           p, q, r;
1617    HYPRE_Int           type;
1618    HYPRE_Real          eps;
1619 
1620    /*-----------------------------------------------------------
1621     * Initialize some stuff
1622     *-----------------------------------------------------------*/
1623 
1624    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1625    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1626 
1627    /*-----------------------------------------------------------
1628     * Set defaults
1629     *-----------------------------------------------------------*/
1630 
1631    nx = 10;
1632    ny = 10;
1633    nz = 10;
1634    P  = 1;
1635    Q  = num_procs;
1636    R  = 1;
1637    eps = 1.0;
1638 
1639    /* type: 0   : default FD;
1640     *       1-3 : FD and examples 1-3 in Ruge-Stuben paper */
1641    type = 0;
1642 
1643    /*-----------------------------------------------------------
1644     * Parse command line
1645     *-----------------------------------------------------------*/
1646    arg_index = 0;
1647    while (arg_index < argc)
1648    {
1649       if ( strcmp(argv[arg_index], "-n") == 0 )
1650       {
1651          arg_index++;
1652          nx = atoi(argv[arg_index++]);
1653          ny = atoi(argv[arg_index++]);
1654          nz = atoi(argv[arg_index++]);
1655       }
1656       else if ( strcmp(argv[arg_index], "-P") == 0 )
1657       {
1658          arg_index++;
1659          P  = atoi(argv[arg_index++]);
1660          Q  = atoi(argv[arg_index++]);
1661          R  = atoi(argv[arg_index++]);
1662       }
1663       else if ( strcmp(argv[arg_index], "-eps") == 0 )
1664       {
1665          arg_index++;
1666          eps  = atof(argv[arg_index++]);
1667       }
1668       else if ( strcmp(argv[arg_index], "-vardifconvRS") == 0 )
1669       {
1670          arg_index++;
1671          type = atoi(argv[arg_index++]);
1672       }
1673       else
1674       {
1675          arg_index++;
1676       }
1677    }
1678 
1679    /*-----------------------------------------------------------
1680     * Check a few things
1681     *-----------------------------------------------------------*/
1682 
1683    if ((P*Q*R) != num_procs)
1684    {
1685       hypre_printf("Error: Invalid number of processors or processor topology \n");
1686       exit(1);
1687    }
1688 
1689    /*-----------------------------------------------------------
1690     * Print driver parameters
1691     *-----------------------------------------------------------*/
1692 
1693    if (myid == 0)
1694    {
1695       hypre_printf("  ell PDE: eps = %f\n", eps);
1696       hypre_printf("    Dx(aDxu) + Dy(bDyu) + Dz(cDzu) + d Dxu + e Dyu + f Dzu  + g u= f\n");
1697       hypre_printf("    (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz);
1698       hypre_printf("    (Px, Py, Pz) = (%d, %d, %d)\n", P,  Q,  R);
1699    }
1700    /*-----------------------------------------------------------
1701     * Set up the grid structure
1702     *-----------------------------------------------------------*/
1703 
1704    /* compute p,q,r from P,Q,R and myid */
1705    p = myid % P;
1706    q = (( myid - p)/P) % Q;
1707    r = ( myid - p - P*q)/( P*Q );
1708 
1709    /*-----------------------------------------------------------
1710     * Generate the matrix
1711     *-----------------------------------------------------------*/
1712 
1713    if (0 == type)
1714    {
1715       A = (HYPRE_ParCSRMatrix) GenerateVarDifConv(hypre_MPI_COMM_WORLD,
1716                                                   nx, ny, nz, P, Q, R, p, q, r, eps, &rhs);
1717    }
1718    else
1719    {
1720       A = (HYPRE_ParCSRMatrix) GenerateRSVarDifConv(hypre_MPI_COMM_WORLD,
1721                                                     nx, ny, nz, P, Q, R, p, q, r, eps, &rhs,
1722                                                     type);
1723    }
1724 
1725    *A_ptr = A;
1726    *rhs_ptr = rhs;
1727 
1728    return (0);
1729 }
1730 
1731 /**************************************************************************/
1732 
1733 
SetSysVcoefValues(HYPRE_Int num_fun,HYPRE_Int nx,HYPRE_Int ny,HYPRE_Int nz,HYPRE_Real vcx,HYPRE_Real vcy,HYPRE_Real vcz,HYPRE_Int mtx_entry,HYPRE_Real * values)1734 HYPRE_Int SetSysVcoefValues(HYPRE_Int num_fun, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Real vcx,
1735                             HYPRE_Real vcy, HYPRE_Real vcz, HYPRE_Int mtx_entry, HYPRE_Real *values)
1736 {
1737 
1738 
1739    HYPRE_Int sz = num_fun*num_fun;
1740 
1741    values[1*sz + mtx_entry] = -vcx;
1742    values[2*sz + mtx_entry] = -vcy;
1743    values[3*sz + mtx_entry] = -vcz;
1744    values[0*sz + mtx_entry] = 0.0;
1745 
1746    if (nx > 1)
1747    {
1748       values[0*sz + mtx_entry] += 2.0*vcx;
1749    }
1750    if (ny > 1)
1751    {
1752       values[0*sz + mtx_entry] += 2.0*vcy;
1753    }
1754    if (nz > 1)
1755    {
1756       values[0*sz + mtx_entry] += 2.0*vcz;
1757    }
1758 
1759    return 0;
1760 
1761 }
1762 
1763 /*----------------------------------------------------------------------
1764  * Build coordinates for 1D/2D/3D
1765  *----------------------------------------------------------------------*/
1766 
1767 HYPRE_Int
BuildParCoordinates(HYPRE_Int argc,char * argv[],HYPRE_Int arg_index,HYPRE_Int * coorddim_ptr,float ** coord_ptr)1768 BuildParCoordinates( HYPRE_Int                  argc,
1769                      char                *argv[],
1770                      HYPRE_Int                  arg_index,
1771                      HYPRE_Int                 *coorddim_ptr,
1772                      float               **coord_ptr     )
1773 {
1774    HYPRE_Int                 nx, ny, nz;
1775    HYPRE_Int                 P, Q, R;
1776 
1777    HYPRE_Int                 num_procs, myid;
1778    HYPRE_Int                 p, q, r;
1779 
1780    HYPRE_Int                 coorddim;
1781    float               *coordinates;
1782 
1783    /*-----------------------------------------------------------
1784     * Initialize some stuff
1785     *-----------------------------------------------------------*/
1786 
1787    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1788    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1789 
1790    /*-----------------------------------------------------------
1791     * Set defaults
1792     *-----------------------------------------------------------*/
1793 
1794    nx = 10;
1795    ny = 10;
1796    nz = 10;
1797 
1798    P  = 1;
1799    Q  = num_procs;
1800    R  = 1;
1801 
1802    /*-----------------------------------------------------------
1803     * Parse command line
1804     *-----------------------------------------------------------*/
1805    arg_index = 0;
1806    while (arg_index < argc)
1807    {
1808       if ( strcmp(argv[arg_index], "-n") == 0 )
1809       {
1810          arg_index++;
1811          nx = atoi(argv[arg_index++]);
1812          ny = atoi(argv[arg_index++]);
1813          nz = atoi(argv[arg_index++]);
1814       }
1815       else if ( strcmp(argv[arg_index], "-P") == 0 )
1816       {
1817          arg_index++;
1818          P  = atoi(argv[arg_index++]);
1819          Q  = atoi(argv[arg_index++]);
1820          R  = atoi(argv[arg_index++]);
1821       }
1822       else
1823       {
1824          arg_index++;
1825       }
1826    }
1827 
1828    /* compute p,q,r from P,Q,R and myid */
1829    p = myid % P;
1830    q = (( myid - p)/P) % Q;
1831    r = ( myid - p - P*q)/( P*Q );
1832 
1833    /*-----------------------------------------------------------
1834     * Generate the coordinates
1835     *-----------------------------------------------------------*/
1836 
1837    coorddim = 3;
1838    if (nx<2) coorddim--;
1839    if (ny<2) coorddim--;
1840    if (nz<2) coorddim--;
1841 
1842    if (coorddim>0)
1843       coordinates = GenerateCoordinates (hypre_MPI_COMM_WORLD,
1844                                          nx, ny, nz, P, Q, R, p, q, r, coorddim);
1845    else
1846       coordinates=NULL;
1847 
1848    *coorddim_ptr = coorddim;
1849    *coord_ptr = coordinates;
1850    return (0);
1851 }
1852 
1853 
1854 void
testPMIS(HYPRE_ParCSRMatrix parcsr_A)1855 testPMIS(HYPRE_ParCSRMatrix parcsr_A)
1856 {
1857    HYPRE_Int         nC1 = 0, nC2 = 0, i;
1858    HYPRE_Int         time_index;
1859    hypre_IntArray   *h_CF_marker  = NULL;
1860    hypre_IntArray   *h_CF_marker2 = NULL;
1861    HYPRE_Int        *d_CF_marker  = NULL;
1862    HYPRE_Real        max_row_sum = 1.0;
1863    HYPRE_Int         num_functions = 1;
1864    HYPRE_Real        strong_threshold = 0.25;
1865    HYPRE_Int         debug_flag = 0;
1866    HYPRE_Int         local_num_rows;
1867    HYPRE_Int         first_local_row, last_local_row;
1868    HYPRE_Int         first_local_col, last_local_col;
1869    MPI_Comm          comm = hypre_MPI_COMM_WORLD;
1870    HYPRE_Int         num_procs, myid;
1871 
1872    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1873    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1874 
1875    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
1876                                     &first_local_row, &last_local_row ,
1877                                     &first_local_col, &last_local_col );
1878 
1879    local_num_rows = last_local_row - first_local_row + 1;
1880    //local_num_cols = last_local_col - first_local_col + 1;
1881 
1882    /* Soc on HOST */
1883    HYPRE_ParCSRMatrix parcsr_S   = NULL;
1884 
1885    hypre_BoomerAMGCreateSHost(parcsr_A, strong_threshold, max_row_sum,
1886                               num_functions, NULL, &parcsr_S);
1887 
1888    /* PMIS on HOST */
1889    time_index = hypre_InitializeTiming("Host PMIS");
1890    hypre_BeginTiming(time_index);
1891 
1892    hypre_BoomerAMGCoarsenPMISHost(parcsr_S, parcsr_A, 2, debug_flag, &h_CF_marker);
1893 
1894    hypre_EndTiming(time_index);
1895    hypre_PrintTiming("Host PMIS", hypre_MPI_COMM_WORLD);
1896    hypre_FinalizeTiming(time_index);
1897    hypre_ClearTiming();
1898 
1899    /* Soc on DEVICE */
1900    HYPRE_ParCSRMatrix parcsr_S_device  = NULL;
1901    hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
1902                                 num_functions, NULL, &parcsr_S_device);
1903    /* PMIS on DEVICE */
1904    time_index = hypre_InitializeTiming("Device PMIS");
1905    hypre_BeginTiming(time_index);
1906 
1907    h_CF_marker2 = hypre_IntArrayCreate(local_num_rows);
1908    hypre_IntArrayInitialize(h_CF_marker2);
1909    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker2);
1910 
1911    hypre_EndTiming(time_index);
1912    hypre_PrintTiming("Device PMIS", hypre_MPI_COMM_WORLD);
1913    hypre_FinalizeTiming(time_index);
1914    hypre_ClearTiming();
1915 
1916    //h_CF_marker2 = hypre_TAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST);
1917    //hypre_TMemcpy(h_CF_marker2, d_CF_marker, HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE);
1918 
1919    for (i = 0; i < local_num_rows; i++)
1920    {
1921       if (hypre_IntArrayData(h_CF_marker)[i] > 0)
1922       {
1923          nC1++;
1924       }
1925 
1926       hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
1927 
1928       //hypre_assert(h_CF_marker[i] == h_CF_marker2[i]);
1929 
1930       if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
1931       {
1932          nC2++;
1933       }
1934    }
1935 
1936    HYPRE_Int allnC1, allnC2;
1937    hypre_MPI_Allreduce(&nC1, &allnC1, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1938    hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1939    if (myid == 0)
1940    {
1941       printf("nC1 %d nC2 %d\n", allnC1, allnC2);
1942    }
1943 
1944    hypre_ParCSRMatrixDestroy(parcsr_S);
1945    hypre_ParCSRMatrixDestroy(parcsr_S_device);
1946    hypre_IntArrayDestroy(h_CF_marker);
1947    hypre_IntArrayDestroy(h_CF_marker2);
1948    hypre_TFree(d_CF_marker,  HYPRE_MEMORY_DEVICE);
1949 }
1950 
1951 void
testPMIS3(HYPRE_ParCSRMatrix parcsr_A)1952 testPMIS3(HYPRE_ParCSRMatrix parcsr_A)
1953 {
1954    HYPRE_Int         nC2 = 0, i;
1955    hypre_IntArray   *h_CF_marker2 = NULL;
1956    HYPRE_Real        max_row_sum = 1.0;
1957    HYPRE_Int         num_functions = 1;
1958    HYPRE_Real        strong_threshold = 0.25;
1959    HYPRE_Int         debug_flag = 0;
1960    HYPRE_Int         local_num_rows;
1961    HYPRE_Int         first_local_row, last_local_row;
1962    HYPRE_Int         first_local_col, last_local_col;
1963    MPI_Comm          comm = hypre_MPI_COMM_WORLD;
1964    HYPRE_Int         num_procs, myid;
1965 
1966    hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
1967    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );
1968 
1969    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
1970                                     &first_local_row, &last_local_row ,
1971                                     &first_local_col, &last_local_col );
1972 
1973    local_num_rows = last_local_row - first_local_row + 1;
1974    //local_num_cols = last_local_col - first_local_col + 1;
1975 
1976    /* Soc on DEVICE */
1977    HYPRE_ParCSRMatrix parcsr_S_device  = NULL;
1978    hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
1979                                 num_functions, NULL, &parcsr_S_device);
1980    /* PMIS on DEVICE */
1981    h_CF_marker2 = hypre_IntArrayCreate(local_num_rows);
1982    hypre_IntArrayInitialize(h_CF_marker2);
1983    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker2);
1984 
1985    for (i = 0; i < local_num_rows; i++)
1986    {
1987       hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
1988 
1989       if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
1990       {
1991          nC2++;
1992       }
1993    }
1994 
1995    HYPRE_Int allnC2;
1996    hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
1997    if (myid == 0)
1998    {
1999       printf("nC2 %d\n", allnC2);
2000    }
2001 
2002    hypre_ParCSRMatrixDestroy(parcsr_S_device);
2003    hypre_IntArrayDestroy(h_CF_marker2);
2004 }
2005 
2006 void
testPMIS2(HYPRE_ParCSRMatrix parcsr_A)2007 testPMIS2(HYPRE_ParCSRMatrix parcsr_A)
2008 {
2009    HYPRE_Int         nC2 = 0, i;
2010    hypre_IntArray   *h_CF_marker  = NULL;
2011    hypre_IntArray   *h_CF_marker2 = NULL;
2012    HYPRE_Real        max_row_sum = 1.0;
2013    HYPRE_Int         num_functions = 1;
2014    HYPRE_Real        strong_threshold = 0.25;
2015    HYPRE_Int         debug_flag = 0;
2016    HYPRE_Int         local_num_rows;
2017    HYPRE_Int         first_local_row, last_local_row;
2018    HYPRE_Int         first_local_col, last_local_col;
2019    MPI_Comm          comm = hypre_ParCSRMatrixComm(parcsr_A);
2020    HYPRE_Int         num_procs, myid;
2021 
2022    hypre_MPI_Comm_size(comm, &num_procs );
2023    hypre_MPI_Comm_rank(comm, &myid );
2024 
2025    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2026                                     &first_local_row, &last_local_row ,
2027                                     &first_local_col, &last_local_col );
2028 
2029    local_num_rows = last_local_row - first_local_row + 1;
2030    //local_num_cols = last_local_col - first_local_col + 1;
2031 
2032    /* Soc on DEVICE */
2033    HYPRE_ParCSRMatrix parcsr_S_device  = NULL;
2034    hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum,
2035                                 num_functions, NULL, &parcsr_S_device);
2036    /* PMIS on DEVICE */
2037    h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2038    hypre_IntArrayInitialize(h_CF_marker);
2039    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, debug_flag, &h_CF_marker);
2040 
2041    HYPRE_Int *coarse_pnts_global = NULL;
2042    hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL,
2043          &coarse_pnts_global);
2044 
2045    /* interp */
2046    hypre_ParCSRMatrix *P;
2047    hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2048                                        coarse_pnts_global, 1, NULL,
2049                                        debug_flag, 0.0, 0, 3, &P);
2050 
2051    hypre_ParCSRMatrix *AH = hypre_ParCSRMatrixRAPKTDevice(P, parcsr_A, P, 1);
2052    hypre_ParCSRMatrixSetNumNonzeros(AH);
2053 
2054    //printf("AH %d, %d\n", hypre_ParCSRMatrixGlobalNumRows(AH), hypre_ParCSRMatrixNumNonzeros(AH));
2055 
2056    hypre_ParCSRMatrixPrintIJ(AH, 0, 0, "AH");
2057 
2058    HYPRE_Int local_num_rows2 = hypre_ParCSRMatrixNumRows(AH);
2059 
2060    hypre_ParCSRMatrix *S2;
2061    hypre_BoomerAMGCreateSDevice(AH, strong_threshold, max_row_sum,
2062                                 num_functions, NULL, &S2);
2063 
2064    h_CF_marker2 = hypre_IntArrayCreate(local_num_rows2);
2065    hypre_IntArrayInitialize(h_CF_marker2);
2066 
2067    hypre_BoomerAMGCoarsenPMISDevice(S2, AH, 2, debug_flag, &h_CF_marker2);
2068 
2069    for (i = 0; i < local_num_rows2; i++)
2070    {
2071       hypre_assert(hypre_IntArrayData(h_CF_marker2)[i] == 1 || hypre_IntArrayData(h_CF_marker2)[i] == -1 || hypre_IntArrayData(h_CF_marker2)[i] == -3);
2072 
2073       if (hypre_IntArrayData(h_CF_marker2)[i] > 0)
2074       {
2075          nC2++;
2076       }
2077    }
2078 
2079    HYPRE_Int allnC2;
2080    hypre_MPI_Allreduce(&nC2, &allnC2, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm);
2081    if (myid == 0)
2082    {
2083       printf("nC2 %d\n", allnC2);
2084    }
2085 
2086    hypre_ParCSRMatrixDestroy(parcsr_S_device);
2087    hypre_ParCSRMatrixDestroy(S2);
2088    hypre_IntArrayDestroy(h_CF_marker);
2089    hypre_IntArrayDestroy(h_CF_marker2);
2090 }
2091 
2092 void
testTranspose(HYPRE_ParCSRMatrix parcsr_A)2093 testTranspose(HYPRE_ParCSRMatrix parcsr_A)
2094 {
2095    HYPRE_Int    myid;
2096    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2097 
2098    HYPRE_Real tol = 0.0;
2099    HYPRE_Int  ierr = 0;
2100 
2101    HYPRE_ParCSRMatrix parcsr_AT;
2102    hypre_ParCSRMatrixTransposeDevice(parcsr_A, &parcsr_AT, 1);
2103 
2104    HYPRE_ParCSRMatrix parcsr_AT_h;
2105    HYPRE_ParCSRMatrix parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2106    hypre_ParCSRMatrixTransposeHost(parcsr_A_h, &parcsr_AT_h, 1);
2107 
2108    ierr += CompareParCSRDH(parcsr_AT_h, parcsr_AT, tol); hypre_assert(!ierr);
2109 
2110    hypre_ParCSRMatrixDestroy(parcsr_AT);
2111    hypre_ParCSRMatrixDestroy(parcsr_AT_h);
2112 
2113    //
2114    hypre_ParCSRMatrixTransposeDevice(parcsr_A, &parcsr_AT, 0);
2115    hypre_ParCSRMatrixTransposeHost(parcsr_A_h, &parcsr_AT_h, 0);
2116 
2117    hypre_ParCSRMatrixSetConstantValues(parcsr_AT, 1.0);
2118    hypre_ParCSRMatrixSetConstantValues(parcsr_AT_h, 1.0);
2119 
2120    ierr += CompareParCSRDH(parcsr_AT_h, parcsr_AT, tol); hypre_assert(!ierr);
2121 
2122    hypre_ParCSRMatrixDestroy(parcsr_AT);
2123    hypre_ParCSRMatrixDestroy(parcsr_AT_h);
2124    hypre_ParCSRMatrixDestroy(parcsr_A_h);
2125 
2126    //
2127    HYPRE_Int    first_local_row, last_local_row;
2128    HYPRE_Int    first_local_col, last_local_col;
2129    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2130                                     &first_local_row, &last_local_row ,
2131                                     &first_local_col, &last_local_col );
2132    HYPRE_Int local_num_rows = last_local_row - first_local_row + 1;
2133    HYPRE_ParCSRMatrix parcsr_S_device  = NULL;
2134    hypre_BoomerAMGCreateSDevice(parcsr_A, 0.25, 1.0, 1, NULL, &parcsr_S_device);
2135    hypre_IntArray *h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2136    hypre_IntArrayInitialize(h_CF_marker);
2137    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, 0, &h_CF_marker);
2138    hypre_ParCSRMatrix *P, *PT, *P_h, *PT_h, *P2;
2139    HYPRE_Int *coarse_pnts_global = NULL;
2140    MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A);
2141    hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL, &coarse_pnts_global);
2142    hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2143                                        coarse_pnts_global, 1, NULL,
2144                                        0, 0.0, 0, 3, &P);
2145    P_h = hypre_ParCSRMatrixClone_v2(P, 1, HYPRE_MEMORY_HOST);
2146 
2147    hypre_ParCSRMatrixTransposeDevice(P, &PT, 1);
2148    hypre_ParCSRMatrixTransposeHost(P_h, &PT_h, 1);
2149    hypre_ParCSRMatrixTransposeDevice(PT, &P2, 1);
2150 
2151    ierr += CompareParCSRDH(PT_h, PT, tol); hypre_assert(!ierr);
2152    ierr += CompareParCSRDH(P_h, P2, tol); hypre_assert(!ierr);
2153 
2154    if (myid == 0 && !ierr)
2155    {
2156       printf("[hypre_ParCSRMatrixTranspose] All Tests were OK ...\n");
2157    }
2158 
2159    hypre_ParCSRMatrixDestroy(P);
2160    hypre_ParCSRMatrixDestroy(PT);
2161    hypre_ParCSRMatrixDestroy(P_h);
2162    hypre_ParCSRMatrixDestroy(PT_h);
2163    hypre_ParCSRMatrixDestroy(P2);
2164    hypre_IntArrayDestroy(h_CF_marker);
2165 }
2166 
2167 void
testAdd(HYPRE_ParCSRMatrix parcsr_A)2168 testAdd(HYPRE_ParCSRMatrix parcsr_A)
2169 {
2170    HYPRE_Int    myid;
2171    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2172 
2173    HYPRE_Real tol = 1e-14;
2174    HYPRE_Int  ierr = 0;
2175    HYPRE_Real alpha = 3.141592654, beta = 2.718281828*9.9;
2176 
2177    HYPRE_ParCSRMatrix parcsr_A2 = hypre_ParCSRMatMatDevice(parcsr_A, parcsr_A);
2178    HYPRE_ParCSRMatrix parcsr_C;
2179    hypre_ParCSRMatrixAddDevice(alpha, parcsr_A, beta, parcsr_A2, &parcsr_C);
2180 
2181    HYPRE_ParCSRMatrix parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2182    HYPRE_ParCSRMatrix parcsr_A2_h = hypre_ParCSRMatrixClone_v2(parcsr_A2, 1, HYPRE_MEMORY_HOST);
2183    HYPRE_ParCSRMatrix parcsr_C_h;
2184    hypre_ParCSRMatrixAddHost(alpha, parcsr_A_h, beta, parcsr_A2_h, &parcsr_C_h);
2185    ierr += CompareParCSRDH(parcsr_C_h, parcsr_C, tol); hypre_assert(!ierr);
2186 
2187    hypre_ParCSRMatrixDestroy(parcsr_A2);
2188    hypre_ParCSRMatrixDestroy(parcsr_C);
2189    hypre_ParCSRMatrixDestroy(parcsr_A_h);
2190    hypre_ParCSRMatrixDestroy(parcsr_A2_h);
2191    hypre_ParCSRMatrixDestroy(parcsr_C_h);
2192 
2193    //
2194    HYPRE_Int    first_local_row, last_local_row;
2195    HYPRE_Int    first_local_col, last_local_col;
2196    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2197                                     &first_local_row, &last_local_row ,
2198                                     &first_local_col, &last_local_col );
2199    HYPRE_Int local_num_rows = last_local_row - first_local_row + 1;
2200    HYPRE_ParCSRMatrix parcsr_S_device  = NULL;
2201    hypre_BoomerAMGCreateSDevice(parcsr_A, 0.25, 1.0, 1, NULL, &parcsr_S_device);
2202    hypre_IntArray *h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2203    hypre_IntArrayInitialize(h_CF_marker);
2204    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 2, 0, &h_CF_marker);
2205    hypre_ParCSRMatrix *P, *AP, *P_h, *AP_h;
2206    HYPRE_Int *coarse_pnts_global = NULL;
2207    MPI_Comm comm = hypre_ParCSRMatrixComm(parcsr_A);
2208    hypre_BoomerAMGCoarseParms(comm, local_num_rows, 1, NULL, h_CF_marker, NULL, &coarse_pnts_global);
2209    hypre_BoomerAMGBuildDirInterpDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), parcsr_S_device,
2210                                        coarse_pnts_global, 1, NULL,
2211                                        0, 0.0, 0, 3, &P);
2212    AP = hypre_ParCSRMatMatDevice(parcsr_A, P);
2213    P_h = hypre_ParCSRMatrixClone_v2(P, 1, HYPRE_MEMORY_HOST);
2214    AP_h = hypre_ParCSRMatrixClone_v2(AP, 1, HYPRE_MEMORY_HOST);
2215    hypre_ParCSRMatrixAddDevice(alpha, P, beta, AP, &parcsr_C);
2216    hypre_ParCSRMatrixAddHost(alpha, P_h, beta, AP_h, &parcsr_C_h);
2217 
2218    ierr += CompareParCSRDH(parcsr_C_h, parcsr_C, tol); hypre_assert(!ierr);
2219 
2220    hypre_ParCSRMatrixDestroy(P);
2221    hypre_ParCSRMatrixDestroy(AP);
2222    hypre_ParCSRMatrixDestroy(P_h);
2223    hypre_ParCSRMatrixDestroy(AP_h);
2224    hypre_ParCSRMatrixDestroy(parcsr_C);
2225    hypre_ParCSRMatrixDestroy(parcsr_C_h);
2226    hypre_ParCSRMatrixDestroy(parcsr_S_device);
2227    hypre_IntArrayDestroy(h_CF_marker);
2228 
2229    if (myid == 0 && !ierr)
2230    {
2231       printf("[hypre_ParCSRMatrixAdd] All Tests were OK ...\n");
2232    }
2233 }
2234 
2235 void
testFFFC(HYPRE_ParCSRMatrix parcsr_A)2236 testFFFC(HYPRE_ParCSRMatrix parcsr_A)
2237 {
2238    HYPRE_Real        max_row_sum = 1.0;
2239    HYPRE_Real        strong_threshold = 0.25;
2240    HYPRE_Int         debug_flag = 0;
2241    HYPRE_Int         num_functions = 1;
2242    hypre_IntArray   *h_CF_marker  = NULL;
2243    HYPRE_Int         local_num_rows;
2244    HYPRE_Int         first_local_row, last_local_row;
2245    HYPRE_Int         first_local_col, last_local_col;
2246    HYPRE_Int         *coarse_dof_func = NULL;
2247    HYPRE_BigInt      *coarse_pnts_global = NULL;
2248    HYPRE_Int         ierr = 0;
2249    HYPRE_Int         myid;
2250 
2251    hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
2252    HYPRE_ParCSRMatrix parcsr_A_h;
2253    HYPRE_ParCSRMatrix AFF, AFC, W;
2254    HYPRE_ParCSRMatrix AFF_h, AFC_h, W_h;
2255    HYPRE_ParCSRMatrix parcsr_S_h, parcsr_S_device;
2256 
2257    HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
2258                                     &first_local_row, &last_local_row ,
2259                                     &first_local_col, &last_local_col );
2260 
2261    local_num_rows = last_local_row - first_local_row + 1;
2262 
2263    /* Soc on DEVICE */
2264    hypre_BoomerAMGCreateSDevice(parcsr_A, strong_threshold, max_row_sum, num_functions, NULL, &parcsr_S_device);
2265    h_CF_marker = hypre_IntArrayCreate(local_num_rows);
2266    hypre_IntArrayInitialize(h_CF_marker);
2267    hypre_BoomerAMGCoarsenPMISDevice(parcsr_S_device, parcsr_A, 0, debug_flag, &h_CF_marker);
2268    hypre_BoomerAMGCoarseParms(hypre_ParCSRMatrixComm(parcsr_A), local_num_rows, num_functions, NULL,
2269                               h_CF_marker, &coarse_dof_func, &coarse_pnts_global);
2270 
2271    /* FFFC on Device */
2272    hypre_ParCSRMatrixGenerateFFFCDevice(parcsr_A, hypre_IntArrayData(h_CF_marker), coarse_pnts_global, parcsr_S_device, &AFC, &AFF);
2273 
2274    /* FFFC on Host */
2275    parcsr_A_h = hypre_ParCSRMatrixClone_v2(parcsr_A, 1, HYPRE_MEMORY_HOST);
2276    parcsr_S_h = hypre_ParCSRMatrixClone_v2(parcsr_S_device, 0, HYPRE_MEMORY_HOST);
2277    hypre_MatvecCommPkgCreate(parcsr_A_h);
2278    hypre_ParCSRMatrixGenerateFFFC(parcsr_A_h, hypre_IntArrayData(h_CF_marker), coarse_pnts_global, parcsr_S_h, &AFC_h, &AFF_h);
2279 
2280    /* AFF * AFC */
2281    W_h = hypre_ParCSRMatMatHost(AFF_h, AFC_h);
2282    W   = hypre_ParCSRMatMatDevice(AFF, AFC);
2283 
2284    /* check */
2285    HYPRE_Real tol = 1e-15;
2286    ierr += CompareParCSRDH(AFF_h, AFF, tol); hypre_assert(!ierr);
2287    ierr += CompareParCSRDH(AFC_h, AFC, tol); hypre_assert(!ierr);
2288    ierr += CompareParCSRDH(W_h,     W, tol); hypre_assert(!ierr);
2289 
2290    if (myid == 0 && !ierr)
2291    {
2292       printf("All Tests were OK ...\n");
2293    }
2294 
2295    /* done */
2296    hypre_TFree(h_CF_marker, HYPRE_MEMORY_HOST);
2297    hypre_IntArrayDestroy(h_CF_marker);
2298    hypre_ParCSRMatrixDestroy(parcsr_A_h);
2299    hypre_ParCSRMatrixDestroy(AFF);
2300    hypre_ParCSRMatrixDestroy(AFC);
2301    hypre_ParCSRMatrixDestroy(W);
2302    hypre_ParCSRMatrixDestroy(AFF_h);
2303    hypre_ParCSRMatrixDestroy(AFC_h);
2304    hypre_ParCSRMatrixDestroy(W_h);
2305 }
2306 
2307 HYPRE_Int
CompareParCSRDH(HYPRE_ParCSRMatrix hmat,HYPRE_ParCSRMatrix dmat,HYPRE_Real tol)2308 CompareParCSRDH(HYPRE_ParCSRMatrix hmat, HYPRE_ParCSRMatrix dmat, HYPRE_Real tol)
2309 {
2310    HYPRE_ParCSRMatrix hmat2, emat;
2311    HYPRE_Real enorm, fnorm, rnorm;
2312    HYPRE_Int i, ecode = 0, ecode_total = 0;
2313 
2314    hmat2 = hypre_ParCSRMatrixClone_v2(dmat, 1, HYPRE_MEMORY_HOST);
2315 
2316    if (hypre_ParCSRMatrixNumRows(hmat) != hypre_ParCSRMatrixNumRows(hmat2))
2317    {
2318       ecode ++;
2319    }
2320    if (hypre_ParCSRMatrixNumCols(hmat) != hypre_ParCSRMatrixNumCols(hmat2))
2321    {
2322       ecode ++;
2323    }
2324    if (hypre_CSRMatrixNumRows(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumRows(hypre_ParCSRMatrixOffd(hmat2)))
2325    {
2326       ecode ++;
2327    }
2328    if (hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat2)))
2329    {
2330       ecode ++;
2331    }
2332    for (i = 0; i < hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(hmat)); i++)
2333    {
2334       if (hypre_ParCSRMatrixColMapOffd(hmat)[i] != hypre_ParCSRMatrixColMapOffd(hmat2)[i])
2335       {
2336          ecode++;
2337          break;
2338       }
2339    }
2340    if (hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(hmat)) != hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(hmat2)))
2341    {
2342       ecode ++;
2343    }
2344    if (hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(hmat)) != hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(hmat2)))
2345    {
2346       ecode ++;
2347    }
2348 
2349    hypre_MPI_Allreduce(&ecode, &ecode_total, 1, HYPRE_MPI_INT, hypre_MPI_SUM, hypre_MPI_COMM_WORLD);
2350 
2351    hypre_ParCSRMatrixAdd(1.0, hmat, -1.0, hmat2, &emat);
2352    enorm = hypre_ParCSRMatrixFnorm(emat);
2353 
2354    fnorm = hypre_ParCSRMatrixFnorm(hmat);
2355    rnorm = fnorm > 0 ? enorm / fnorm : enorm;
2356    if ( rnorm > tol )
2357    {
2358       ecode_total ++;
2359    }
2360 
2361    printf("relative error %e = %e / %e\n", rnorm, enorm, fnorm);
2362 
2363    hypre_ParCSRMatrixDestroy(hmat2);
2364    hypre_ParCSRMatrixDestroy(emat);
2365 
2366    return ecode_total;
2367 }
2368