1 /******************************************************************************
2  * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3  * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4  *
5  * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6  ******************************************************************************/
7 
8 /*
9    Example 15
10 
11    Interface:      Semi-Structured interface (SStruct)
12 
13    Compile with:   make ex15
14 
15    Sample run:     mpirun -np 8 ex15 -n 10
16 
17    To see options: ex15 -help
18 
19    Description:    This code solves a 3D electromagnetic diffusion (definite
20                    curl-curl) problem using the lowest order Nedelec, or "edge"
21                    finite element discretization on a uniform hexahedral meshing
22                    of the unit cube.  The right-hand-side corresponds to a unit
23                    vector force and we use uniform zero Dirichlet boundary
24                    conditions.  The overall problem reads:
25                                 curl alpha curl E + beta E = 1,
26                    with E x n = 0 on the boundary, where alpha and beta are
27                    piecewise-constant material coefficients.
28 
29                    The linear system is split in parallel using the SStruct
30                    interface with an n x n x n grid on each processors, and
31                    similar N x N x N processor grid.  Therefore, the number of
32                    processors should be a perfect cube.
33 
34                    This example code is mainly meant as an illustration of using
35                    the Auxiliary-space Maxwell Solver (AMS) through the SStruct
36                    interface.  It is also an example of setting up a finite
37                    element discretization in the SStruct interface, and we
38                    recommend viewing Example 13 and Example 14 before viewing
39                    this example.
40 */
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <math.h>
46 #include "HYPRE_sstruct_mv.h"
47 #include "HYPRE_sstruct_ls.h"
48 #include "HYPRE.h"
49 #include "ex.h"
50 
51 #ifdef HYPRE_EXVIS
52 #include "vis.c"
53 #endif
54 
55 int optionAlpha, optionBeta;
56 
57 /* Curl-curl coefficient alpha = mu^{-1} */
alpha(double x,double y,double z)58 double alpha(double x, double y, double z)
59 {
60    switch (optionAlpha)
61    {
62       case 0: /* uniform coefficient */
63          return 1.0;
64       case 1: /* smooth coefficient */
65          return x*x+exp(y)+sin(z);
66       case 2: /* small outside of an interior cube */
67          if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
68             return 1.0;
69          else
70             return 1.0e-6;
71       case 3: /* small outside of an interior ball */
72          if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
73             return 1.0;
74          else
75             return 1.0e-6;
76       case 4: /* random coefficient */
77          return ((double)rand()/RAND_MAX);
78       default:
79          return 1.0;
80    }
81 }
82 
83 /* Mass coefficient beta = sigma */
beta(double x,double y,double z)84 double beta(double x, double y, double z)
85 {
86    switch (optionBeta)
87    {
88       case 0: /* uniform coefficient */
89          return 1.0;
90       case 1: /* smooth coefficient */
91          return x*x+exp(y)+sin(z);
92       case 2:/* small outside of interior cube */
93          if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
94             return 1.0;
95          else
96             return 1.0e-6;
97       case 3: /* small outside of an interior ball */
98          if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
99             return 1.0;
100          else
101             return 1.0e-6;
102       case 4: /* random coefficient */
103          return ((double)rand()/RAND_MAX);
104       default:
105          return 1.0;
106    }
107 }
108 
109 /*
110    This routine computes the lowest order Nedelec, or "edge" finite element
111    stiffness matrix and load vector on a cube of size h.  The 12 edges {e_i}
112    are numbered in terms of the vertices as follows:
113 
114            [7]------[6]
115            /|       /|     e_0 = 01, e_1 = 12, e_2  = 32, e_3  = 03,
116           / |      / |     e_4 = 45, e_5 = 56, e_6  = 76, e_7  = 47,
117         [4]------[5] |     e_8 = 04, e_9 = 15, e_10 = 26, e_11 = 37.
118          | [3]----|-[2]
119          | /      | /      The edges are oriented from first to the
120          |/       |/       second vertex, e.g. e_0 is from [0] to [1].
121         [0]------[1]
122 
123    We allow for different scaling of the curl-curl and the mass parts of the
124    matrix with coefficients alpha and beta respectively:
125 
126          S_ij = alpha (curl phi_i,curl phi_j) + beta (phi_i, phi_j).
127 
128    The load vector corresponding to a right-hand side of {1,1,1} is
129 
130                         F_j = (1,phi_j) = h^2/4.
131 */
ComputeFEMND1(double ** S,double F[12],double x,double y,double z,double h)132 void ComputeFEMND1(double **S, double F[12],
133                    double x, double y, double z, double h)
134 {
135    int i, j;
136 
137    double h2_4 = h*h/4;
138 
139    double cS1 = alpha(x,y,z)/(6.0*h), cS2 = 2*cS1, cS4 = 2*cS2;
140    double cM1 = beta(x,y,z)*h/36.0,   cM2 = 2*cM1, cM4 = 2*cM2;
141 
142    S[ 0][ 0] =  cS4 + cM4;   S[ 0][ 1] =  cS2;         S[ 0][ 2] = -cS1 + cM2;
143    S[ 0][ 3] = -cS2;         S[ 0][ 4] = -cS1 + cM2;   S[ 0][ 5] =  cS1;
144    S[ 0][ 6] = -cS2 + cM1;   S[ 0][ 7] = -cS1;         S[ 0][ 8] = -cS2;
145    S[ 0][ 9] =  cS2;         S[ 0][10] =  cS1;         S[ 0][11] = -cS1;
146 
147    S[ 1][ 1] =  cS4 + cM4;   S[ 1][ 2] = -cS2;         S[ 1][ 3] = -cS1 + cM2;
148    S[ 1][ 4] =  cS1;         S[ 1][ 5] = -cS1 + cM2;   S[ 1][ 6] = -cS1;
149    S[ 1][ 7] = -cS2 + cM1;   S[ 1][ 8] = -cS1;         S[ 1][ 9] = -cS2;
150    S[ 1][10] =  cS2;         S[ 1][11] =  cS1;
151 
152    S[ 2][ 2] =  cS4 + cM4;   S[ 2][ 3] =  cS2;         S[ 2][ 4] = -cS2 + cM1;
153    S[ 2][ 5] = -cS1;         S[ 2][ 6] = -cS1 + cM2;   S[ 2][ 7] =  cS1;
154    S[ 2][ 8] = -cS1;         S[ 2][ 9] =  cS1;         S[ 2][10] =  cS2;
155    S[ 2][11] = -cS2;
156 
157    S[ 3][ 3] =  cS4 + cM4;   S[ 3][ 4] = -cS1;         S[ 3][ 5] = -cS2 + cM1;
158    S[ 3][ 6] =  cS1;         S[ 3][ 7] = -cS1 + cM2;   S[ 3][ 8] = -cS2;
159    S[ 3][ 9] = -cS1;         S[ 3][10] =  cS1;         S[ 3][11] =  cS2;
160 
161    S[ 4][ 4] =  cS4 + cM4;   S[ 4][ 5] =  cS2;         S[ 4][ 6] = -cS1 + cM2;
162    S[ 4][ 7] = -cS2;         S[ 4][ 8] =  cS2;         S[ 4][ 9] = -cS2;
163    S[ 4][10] = -cS1;         S[ 4][11] =  cS1;
164 
165    S[ 5][ 5] =  cS4 + cM4;   S[ 5][ 6] = -cS2;         S[ 5][ 7] = -cS1 + cM2;
166    S[ 5][ 8] =  cS1;         S[ 5][ 9] =  cS2;         S[ 5][10] = -cS2;
167    S[ 5][11] = -cS1;
168 
169    S[ 6][ 6] =  cS4 + cM4;   S[ 6][ 7] =  cS2;         S[ 6][ 8] =  cS1;
170    S[ 6][ 9] = -cS1;         S[ 6][10] = -cS2;         S[ 6][11] =  cS2;
171 
172    S[ 7][ 7] =  cS4 + cM4;   S[ 7][ 8] =  cS2;         S[ 7][ 9] =  cS1;
173    S[ 7][10] = -cS1;         S[ 7][11] = -cS2;
174 
175    S[ 8][ 8] =  cS4 + cM4;   S[ 8][ 9] = -cS1 + cM2;   S[ 8][10] = -cS2 + cM1;
176    S[ 8][11] = -cS1 + cM2;
177 
178    S[ 9][ 9] =  cS4 + cM4;   S[ 9][10] = -cS1 + cM2;   S[ 9][11] = -cS2 + cM1;
179 
180    S[10][10] =  cS4 + cM4;   S[10][11] = -cS1 + cM2;
181 
182    S[11][11] =  cS4 + cM4;
183 
184    /* The stiffness matrix is symmetric */
185    for (i = 1; i < 12; i++)
186       for (j = 0; j < i; j++)
187          S[i][j] = S[j][i];
188 
189    for (i = 0; i < 12; i++)
190       F[i] = h2_4;
191 }
192 
193 
main(int argc,char * argv[])194 int main (int argc, char *argv[])
195 {
196    int myid, num_procs;
197    int n, N, pi, pj, pk;
198    double h;
199    int vis;
200 
201    double tol, theta;
202    int maxit, cycle_type;
203    int rlx_type, rlx_sweeps, rlx_weight, rlx_omega;
204    int amg_coarsen_type, amg_agg_levels, amg_rlx_type;
205    int amg_interp_type, amg_Pmax;
206    int singular_problem ;
207 
208    double mytime = 0.0;
209    double walltime = 0.0;
210 
211    HYPRE_SStructGrid     edge_grid;
212    HYPRE_SStructGraph    A_graph;
213    HYPRE_SStructMatrix   A;
214    HYPRE_SStructVector   b;
215    HYPRE_SStructVector   x;
216    HYPRE_SStructGrid     node_grid;
217    HYPRE_SStructGraph    G_graph;
218    HYPRE_SStructStencil  G_stencil[3];
219    HYPRE_SStructMatrix   G;
220    HYPRE_SStructVector   xcoord, ycoord, zcoord;
221 
222    HYPRE_Solver          solver, precond;
223 
224    /* Initialize MPI */
225    MPI_Init(&argc, &argv);
226    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
227    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
228 
229    /* Initialize HYPRE */
230    HYPRE_Init();
231 
232    /* Print GPU info */
233    /* HYPRE_PrintDeviceInfo(); */
234 
235    /* Set default parameters */
236    n                = 10;
237    vis              = 0;
238    optionAlpha      = 0;
239    optionBeta       = 0;
240    maxit            = 100;
241    tol              = 1e-6;
242    cycle_type       = 13;
243    rlx_type         = 2;
244    rlx_sweeps       = 1;
245    rlx_weight       = 1.0;
246    rlx_omega        = 1.0;
247    amg_coarsen_type = 10;
248    amg_agg_levels   = 1;
249    amg_rlx_type     = 6;
250    theta            = 0.25;
251    amg_interp_type  = 6;
252    amg_Pmax         = 4;
253    singular_problem = 0;
254 
255    /* Parse command line */
256    {
257       int arg_index = 0;
258       int print_usage = 0;
259 
260       while (arg_index < argc)
261       {
262          if ( strcmp(argv[arg_index], "-n") == 0 )
263          {
264             arg_index++;
265             n = atoi(argv[arg_index++]);
266          }
267          else if ( strcmp(argv[arg_index], "-a") == 0 )
268          {
269             arg_index++;
270             optionAlpha = atoi(argv[arg_index++]);
271          }
272          else if ( strcmp(argv[arg_index], "-b") == 0 )
273          {
274             arg_index++;
275             optionBeta = atoi(argv[arg_index++]);
276          }
277          else if ( strcmp(argv[arg_index], "-vis") == 0 )
278          {
279             arg_index++;
280             vis = 1;
281          }
282          else if ( strcmp(argv[arg_index], "-maxit") == 0 )
283          {
284             arg_index++;
285             maxit = atoi(argv[arg_index++]);
286          }
287          else if ( strcmp(argv[arg_index], "-tol") == 0 )
288          {
289             arg_index++;
290             tol = atof(argv[arg_index++]);
291          }
292          else if ( strcmp(argv[arg_index], "-type") == 0 )
293          {
294             arg_index++;
295             cycle_type = atoi(argv[arg_index++]);
296          }
297          else if ( strcmp(argv[arg_index], "-rlx") == 0 )
298          {
299             arg_index++;
300             rlx_type = atoi(argv[arg_index++]);
301          }
302          else if ( strcmp(argv[arg_index], "-rlxn") == 0 )
303          {
304             arg_index++;
305             rlx_sweeps = atoi(argv[arg_index++]);
306          }
307          else if ( strcmp(argv[arg_index], "-rlxw") == 0 )
308          {
309             arg_index++;
310             rlx_weight = atof(argv[arg_index++]);
311          }
312          else if ( strcmp(argv[arg_index], "-rlxo") == 0 )
313          {
314             arg_index++;
315             rlx_omega = atof(argv[arg_index++]);
316          }
317          else if ( strcmp(argv[arg_index], "-ctype") == 0 )
318          {
319             arg_index++;
320             amg_coarsen_type = atoi(argv[arg_index++]);
321          }
322          else if ( strcmp(argv[arg_index], "-amgrlx") == 0 )
323          {
324             arg_index++;
325             amg_rlx_type = atoi(argv[arg_index++]);
326          }
327          else if ( strcmp(argv[arg_index], "-agg") == 0 )
328          {
329             arg_index++;
330             amg_agg_levels = atoi(argv[arg_index++]);
331          }
332          else if ( strcmp(argv[arg_index], "-itype") == 0 )
333          {
334             arg_index++;
335             amg_interp_type = atoi(argv[arg_index++]);
336          }
337          else if ( strcmp(argv[arg_index], "-pmax") == 0 )
338          {
339             arg_index++;
340             amg_Pmax = atoi(argv[arg_index++]);
341          }
342          else if ( strcmp(argv[arg_index], "-sing") == 0 )
343          {
344             arg_index++;
345             singular_problem = 1;
346          }
347          else if ( strcmp(argv[arg_index], "-theta") == 0 )
348          {
349             arg_index++;
350             theta = atof(argv[arg_index++]);
351          }
352 
353          else if ( strcmp(argv[arg_index], "-help") == 0 )
354          {
355             print_usage = 1;
356             break;
357          }
358          else
359          {
360             arg_index++;
361          }
362       }
363 
364       if ((print_usage) && (myid == 0))
365       {
366          printf("\n");
367          printf("Usage: %s [<options>]\n", argv[0]);
368          printf("\n");
369          printf("  -n <n>              : problem size per processor (default: 10)\n");
370          printf("  -a <alpha_opt>      : choice for the curl-curl coefficient (default: 1)\n");
371          printf("  -b <beta_opt>       : choice for the mass coefficient (default: 1)\n");
372          printf("  -vis                : save the solution for GLVis visualization\n");
373          printf("\n");
374          printf("PCG-AMS solver options:                                     \n");
375          printf("  -maxit <num>        : maximum number of iterations (100)  \n");
376          printf("  -tol <num>          : convergence tolerance (1e-6)        \n");
377          printf("  -type <num>         : 3-level cycle type (0-8, 11-14)     \n");
378          printf("  -theta <num>        : BoomerAMG threshold (0.25)          \n");
379          printf("  -ctype <num>        : BoomerAMG coarsening type           \n");
380          printf("  -agg <num>          : Levels of BoomerAMG agg. coarsening \n");
381          printf("  -amgrlx <num>       : BoomerAMG relaxation type           \n");
382          printf("  -itype <num>        : BoomerAMG interpolation type        \n");
383          printf("  -pmax <num>         : BoomerAMG interpolation truncation  \n");
384          printf("  -rlx <num>          : relaxation type                     \n");
385          printf("  -rlxn <num>         : number of relaxation sweeps         \n");
386          printf("  -rlxw <num>         : damping parameter (usually <=1)     \n");
387          printf("  -rlxo <num>         : SOR parameter (usually in (0,2))    \n");
388          printf("  -sing               : curl-curl only (singular) problem   \n");
389          printf("\n");
390          printf("\n");
391       }
392 
393       if (print_usage)
394       {
395          MPI_Finalize();
396          return (0);
397       }
398    }
399 
400    /* Figure out the processor grid (N x N x N).  The local problem size is n^3,
401       while pi, pj and pk indicate the position in the processor grid. */
402    N  = pow(num_procs,1.0/3.0) + 0.5;
403    if (num_procs != N*N*N)
404    {
405       if (myid == 0) printf("Can't run on %d processors, try %d.\n",
406                             num_procs, N*N*N);
407       MPI_Finalize();
408       exit(1);
409    }
410    h  = 1.0 / (N*n);
411    pk = myid / (N*N);
412    pj = myid/N - pk*N;
413    pi = myid - pj*N - pk*N*N;
414 
415    /* Start timing */
416    mytime -= MPI_Wtime();
417 
418    /* 1. Set up the edge and nodal grids.  Note that we do this simultaneously
419          to make sure that they have the same extents.  For simplicity we use
420          only one part to represent the unit cube. */
421    {
422       int ndim = 3;
423       int nparts = 1;
424 
425       /* Create empty 2D grid objects */
426       HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &node_grid);
427       HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &edge_grid);
428 
429       /* Set the extents of the grid - each processor sets its grid boxes. */
430       {
431          int part = 0;
432          int ilower[3] = {1 + pi*n, 1 + pj*n, 1 + pk*n};
433          int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
434 
435          HYPRE_SStructGridSetExtents(node_grid, part, ilower, iupper);
436          HYPRE_SStructGridSetExtents(edge_grid, part, ilower, iupper);
437       }
438 
439       /* Set the variable type and number of variables on each grid. */
440       {
441          int i;
442          int nnodevars = 1;
443          int nedgevars = 3;
444 
445          HYPRE_SStructVariable nodevars[1] = {HYPRE_SSTRUCT_VARIABLE_NODE};
446          HYPRE_SStructVariable edgevars[3] = {HYPRE_SSTRUCT_VARIABLE_XEDGE,
447                                               HYPRE_SSTRUCT_VARIABLE_YEDGE,
448                                               HYPRE_SSTRUCT_VARIABLE_ZEDGE};
449          for (i = 0; i < nparts; i++)
450          {
451             HYPRE_SStructGridSetVariables(node_grid, i, nnodevars, nodevars);
452             HYPRE_SStructGridSetVariables(edge_grid, i, nedgevars, edgevars);
453          }
454       }
455 
456       /* Since there is only one part, there is no need to call the
457          SetNeighborPart or SetSharedPart functions, which determine the spatial
458          relation between the parts.  See Examples 12, 13 and 14 for
459          illustrations of these calls. */
460 
461       /* Now the grids are ready to be used */
462       HYPRE_SStructGridAssemble(node_grid);
463       HYPRE_SStructGridAssemble(edge_grid);
464    }
465 
466    /* 2. Create the finite element stiffness matrix A and load vector b. */
467    {
468       int part = 0; /* this problem has only one part */
469 
470       /* Set the ordering of the variables in the finite element problem.  This
471          is done by listing the variable offset directions relative to the
472          element's center.  See the Reference Manual for more details. */
473       {
474          int ordering[48] =       { 0,  0, -1, -1,    /* x-edge [0]-[1] */
475                                     1, +1,  0, -1,    /* y-edge [1]-[2] */
476          /*     [7]------[6]  */    0,  0, +1, -1,    /* x-edge [3]-[2] */
477          /*     /|       /|   */    1, -1,  0, -1,    /* y-edge [0]-[3] */
478          /*    / |      / |   */    0,  0, -1, +1,    /* x-edge [4]-[5] */
479          /*  [4]------[5] |   */    1, +1,  0, +1,    /* y-edge [5]-[6] */
480          /*   | [3]----|-[2]  */    0,  0, +1, +1,    /* x-edge [7]-[6] */
481          /*   | /      | /    */    1, -1,  0, +1,    /* y-edge [4]-[7] */
482          /*   |/       |/     */    2, -1, -1,  0,    /* z-edge [0]-[4] */
483          /*  [0]------[1]     */    2, +1, -1,  0,    /* z-edge [1]-[5] */
484                                     2, +1, +1,  0,    /* z-edge [2]-[6] */
485                                     2, -1, +1,  0 };  /* z-edge [3]-[7] */
486 
487          HYPRE_SStructGridSetFEMOrdering(edge_grid, part, ordering);
488       }
489 
490       /* Set up the Graph - this determines the non-zero structure of the
491          matrix. */
492       {
493          int part = 0;
494 
495          /* Create the graph object */
496          HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &A_graph);
497 
498          /* See MatrixSetObjectType below */
499          HYPRE_SStructGraphSetObjectType(A_graph, HYPRE_PARCSR);
500 
501          /* Indicate that this problem uses finite element stiffness matrices and
502             load vectors, instead of stencils. */
503          HYPRE_SStructGraphSetFEM(A_graph, part);
504 
505          /* The edge finite element matrix is full, so there is no need to call the
506             HYPRE_SStructGraphSetFEMSparsity() function. */
507 
508          /* Assemble the graph */
509          HYPRE_SStructGraphAssemble(A_graph);
510       }
511 
512       /* Set up the SStruct Matrix and right-hand side vector */
513       {
514          /* Create the matrix object */
515          HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, A_graph, &A);
516          /* Use a ParCSR storage */
517          HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
518          /* Indicate that the matrix coefficients are ready to be set */
519          HYPRE_SStructMatrixInitialize(A);
520 
521          /* Create an empty vector object */
522          HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &b);
523          /* Use a ParCSR storage */
524          HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
525          /* Indicate that the vector coefficients are ready to be set */
526          HYPRE_SStructVectorInitialize(b);
527       }
528 
529       /* Set the matrix and vector entries by finite element assembly */
530       {
531          /* local stiffness matrix and load vector */
532          /* OK to use constant-length arrays for CPUs */
533          /* double S[12][12], F[12]; */
534          double *F = (double *) malloc(12*sizeof(double));
535          double *S_flat = (double *) malloc(12*12*sizeof(double));
536          double *S[12];
537 
538          int i, j, k;
539          int index[3];
540 
541          for (i = 0; i < 12; i++)
542          {
543             S[i] = &S_flat[i*12];
544          }
545 
546          for (i = 1; i <= n; i++)
547          {
548             for (j = 1; j <= n; j++)
549             {
550                for (k = 1; k <= n; k++)
551                {
552                   /* Compute the FEM matrix and r.h.s. for cell (i,j,k) with
553                      coefficients evaluated at the cell center. */
554                   index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
555                   ComputeFEMND1(S,F,(pi*n+i)*h-h/2,(pj*n+j)*h-h/2,(pk*n+k)*h-h/2,h);
556 
557                   /* Eliminate boundary conditions on x = 0 */
558                   if (index[0] == 1)
559                   {
560                      int ii, jj, bc_edges[4] = { 3, 11, 7, 8 };
561                      for (ii = 0; ii < 4; ii++)
562                      {
563                         for (jj = 0; jj < 12; jj++)
564                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
565                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
566                         F[bc_edges[ii]] = 0.0;
567                      }
568                   }
569                   /* Eliminate boundary conditions on y = 0 */
570                   if (index[1] == 1)
571                   {
572                      int ii, jj, bc_edges[4] = { 0, 9, 4, 8 };
573                      for (ii = 0; ii < 4; ii++)
574                      {
575                         for (jj = 0; jj < 12; jj++)
576                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
577                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
578                         F[bc_edges[ii]] = 0.0;
579                      }
580                   }
581                   /* Eliminate boundary conditions on z = 0 */
582                   if (index[2] == 1)
583                   {
584                      int ii, jj, bc_edges[4] = { 0, 1, 2, 3 };
585                      for (ii = 0; ii < 4; ii++)
586                      {
587                         for (jj = 0; jj < 12; jj++)
588                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
589                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
590                         F[bc_edges[ii]] = 0.0;
591                      }
592                   }
593                   /* Eliminate boundary conditions on x = 1 */
594                   if (index[0] == N*n)
595                   {
596                      int ii, jj, bc_edges[4] = { 1, 10, 5, 9 };
597                      for (ii = 0; ii < 4; ii++)
598                      {
599                         for (jj = 0; jj < 12; jj++)
600                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
601                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
602                         F[bc_edges[ii]] = 0.0;
603                      }
604                   }
605                   /* Eliminate boundary conditions on y = 1 */
606                   if (index[1] == N*n)
607                   {
608                      int ii, jj, bc_edges[4] = { 2, 10, 6, 11 };
609                      for (ii = 0; ii < 4; ii++)
610                      {
611                         for (jj = 0; jj < 12; jj++)
612                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
613                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
614                         F[bc_edges[ii]] = 0.0;
615                      }
616                   }
617                   /* Eliminate boundary conditions on z = 1 */
618                   if (index[2] == N*n)
619                   {
620                      int ii, jj, bc_edges[4] = { 4, 5, 6, 7 };
621                      for (ii = 0; ii < 4; ii++)
622                      {
623                         for (jj = 0; jj < 12; jj++)
624                            S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
625                         S[bc_edges[ii]][bc_edges[ii]] = 1.0;
626                         F[bc_edges[ii]] = 0.0;
627                      }
628                   }
629 
630                   /* Assemble the matrix */
631                   HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);
632 
633                   /* Assemble the vector */
634                   HYPRE_SStructVectorAddFEMValues(b, part, index, F);
635                }
636             }
637          }
638          free(F);
639          free(S_flat);
640       }
641 
642       /* Collective calls finalizing the matrix and vector assembly */
643       HYPRE_SStructMatrixAssemble(A);
644       HYPRE_SStructVectorAssemble(b);
645    }
646 
647    /* 3. Create the discrete gradient matrix G, which is needed in AMS. */
648    {
649       int part = 0;
650       int stencil_size = 2;
651 
652       /* Define the discretization stencil relating the edges and nodes of the
653          grid. */
654       {
655          int ndim = 3;
656          int entry;
657          int var = 0; /* the node variable */
658 
659          /* The discrete gradient stencils connect edge to node variables. */
660          int Gx_offsets[2][3] = {{-1,0,0},{0,0,0}};  /* x-edge [7]-[6] */
661          int Gy_offsets[2][3] = {{0,-1,0},{0,0,0}};  /* y-edge [5]-[6] */
662          int Gz_offsets[2][3] = {{0,0,-1},{0,0,0}};  /* z-edge [2]-[6] */
663 
664          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[0]);
665          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[1]);
666          HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[2]);
667 
668          for (entry = 0; entry < stencil_size; entry++)
669          {
670             HYPRE_SStructStencilSetEntry(G_stencil[0], entry, Gx_offsets[entry], var);
671             HYPRE_SStructStencilSetEntry(G_stencil[1], entry, Gy_offsets[entry], var);
672             HYPRE_SStructStencilSetEntry(G_stencil[2], entry, Gz_offsets[entry], var);
673          }
674       }
675 
676       /* Set up the Graph - this determines the non-zero structure of the
677          matrix. */
678       {
679          int nvars = 3;
680          int var; /* the edge variables */
681 
682          /* Create the discrete gradient graph object */
683          HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &G_graph);
684 
685          /* See MatrixSetObjectType below */
686          HYPRE_SStructGraphSetObjectType(G_graph, HYPRE_PARCSR);
687 
688          /* Since the discrete gradient relates edge and nodal variables (it is a
689             rectangular matrix), we have to specify the domain (column) grid. */
690          HYPRE_SStructGraphSetDomainGrid(G_graph, node_grid);
691 
692          /* Tell the graph which stencil to use for each edge variable on each
693             part (we only have one part). */
694          for (var = 0; var < nvars; var++)
695             HYPRE_SStructGraphSetStencil(G_graph, part, var, G_stencil[var]);
696 
697          /* Assemble the graph */
698          HYPRE_SStructGraphAssemble(G_graph);
699       }
700 
701       /* Set up the SStruct Matrix */
702       {
703          /* Create the matrix object */
704          HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, G_graph, &G);
705          /* Use a ParCSR storage */
706          HYPRE_SStructMatrixSetObjectType(G, HYPRE_PARCSR);
707          /* Indicate that the matrix coefficients are ready to be set */
708          HYPRE_SStructMatrixInitialize(G);
709       }
710 
711       /* Set the discrete gradient values, assuming a "natural" orientation of
712          the edges (i.e. one in agreement with the coordinate directions). */
713       {
714          int i;
715          int nedges = n*(n+1)*(n+1);
716          double *values;
717          int stencil_indices[2] = {0,1}; /* the nodes of each edge */
718 
719          values = (double*) calloc(2*nedges, sizeof(double));
720 
721          /* The edge orientation is fixed: from first to second node */
722          for (i = 0; i < nedges; i++)
723          {
724             values[2*i]   = -1.0;
725             values[2*i+1] =  1.0;
726          }
727 
728          /* Set the values in the discrete gradient x-edges */
729          {
730             int var = 0;
731             int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
732             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
733             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
734                                             stencil_size, stencil_indices,
735                                             values);
736          }
737          /* Set the values in the discrete gradient y-edges */
738          {
739             int var = 1;
740             int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
741             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
742             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
743                                             stencil_size, stencil_indices,
744                                             values);
745          }
746          /* Set the values in the discrete gradient z-edges */
747          {
748             int var = 2;
749             int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
750             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
751             HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
752                                             stencil_size, stencil_indices,
753                                             values);
754          }
755 
756          free(values);
757       }
758 
759       /* Finalize the matrix assembly */
760       HYPRE_SStructMatrixAssemble(G);
761    }
762 
763    /* 4. Create the vectors of nodal coordinates xcoord, ycoord and zcoord,
764          which are needed in AMS. */
765    {
766       int i, j, k;
767       int part = 0;
768       int var = 0; /* the node variable */
769       int index[3];
770       double *xyzval = (double *) malloc(3*sizeof(double));
771 
772       /* Create empty vector objects */
773       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &xcoord);
774       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &ycoord);
775       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &zcoord);
776       /* Set the object type to ParCSR */
777       HYPRE_SStructVectorSetObjectType(xcoord, HYPRE_PARCSR);
778       HYPRE_SStructVectorSetObjectType(ycoord, HYPRE_PARCSR);
779       HYPRE_SStructVectorSetObjectType(zcoord, HYPRE_PARCSR);
780       /* Indicate that the vector coefficients are ready to be set */
781       HYPRE_SStructVectorInitialize(xcoord);
782       HYPRE_SStructVectorInitialize(ycoord);
783       HYPRE_SStructVectorInitialize(zcoord);
784 
785       /* Compute and set the coordinates of the nodes */
786       for (i = 0; i <= n; i++)
787          for (j = 0; j <= n; j++)
788             for (k = 0; k <= n; k++)
789             {
790                index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
791 
792                xyzval[0] = index[0]*h;
793                xyzval[1] = index[1]*h;
794                xyzval[2] = index[2]*h;
795 
796                HYPRE_SStructVectorSetValues(xcoord, part, index, var, &xyzval[0]);
797                HYPRE_SStructVectorSetValues(ycoord, part, index, var, &xyzval[1]);
798                HYPRE_SStructVectorSetValues(zcoord, part, index, var, &xyzval[2]);
799             }
800 
801       /* Finalize the vector assembly */
802       HYPRE_SStructVectorAssemble(xcoord);
803       HYPRE_SStructVectorAssemble(ycoord);
804       HYPRE_SStructVectorAssemble(zcoord);
805       free(xyzval);
806    }
807 
808    /* 5. Set up a SStruct Vector for the solution vector x */
809    {
810       int part = 0;
811       int nvalues = n*(n+1)*(n+1);
812       double *values;
813 
814       values = (double*) calloc(nvalues, sizeof(double));
815 
816       /* Create an empty vector object */
817       HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &x);
818       /* Set the object type to ParCSR */
819       HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
820       /* Indicate that the vector coefficients are ready to be set */
821       HYPRE_SStructVectorInitialize(x);
822 
823       /* Set the values for the initial guess x-edge */
824       {
825          int var = 0;
826          int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
827          int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
828          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
829       }
830       /* Set the values for the initial guess y-edge */
831       {
832          int var = 1;
833          int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
834          int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
835          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
836       }
837       /* Set the values for the initial guess z-edge */
838       {
839          int var = 2;
840          int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
841          int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
842          HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
843       }
844 
845       free(values);
846 
847       /* Finalize the vector assembly */
848       HYPRE_SStructVectorAssemble(x);
849    }
850 
851    /* Finalize current timing */
852    mytime += MPI_Wtime();
853    MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
854    if (myid == 0)
855    {
856       printf("\nSStruct Setup time = %f seconds\n\n", walltime);
857    }
858 
859    /* 6. Set up and call the PCG-AMS solver (Solver options can be found in the
860          Reference Manual.) */
861    {
862       double final_res_norm;
863       int its;
864 
865       HYPRE_ParCSRMatrix    par_A;
866       HYPRE_ParVector       par_b;
867       HYPRE_ParVector       par_x;
868 
869       HYPRE_ParCSRMatrix    par_G;
870       HYPRE_ParVector       par_xcoord;
871       HYPRE_ParVector       par_ycoord;
872       HYPRE_ParVector       par_zcoord;
873 
874       /* Extract the ParCSR objects needed in the solver */
875       HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
876       HYPRE_SStructVectorGetObject(b, (void **) &par_b);
877       HYPRE_SStructVectorGetObject(x, (void **) &par_x);
878       HYPRE_SStructMatrixGetObject(G, (void **) &par_G);
879       HYPRE_SStructVectorGetObject(xcoord, (void **) &par_xcoord);
880       HYPRE_SStructVectorGetObject(ycoord, (void **) &par_ycoord);
881       HYPRE_SStructVectorGetObject(zcoord, (void **) &par_zcoord);
882 
883       if (myid == 0)
884       {
885          HYPRE_Int numrows, numcols;
886          HYPRE_ParCSRMatrixGetDims(par_A, &numrows, &numcols);
887          printf("Problem size: %d\n\n", numrows);
888       }
889 
890       /* Start timing */
891       mytime -= MPI_Wtime();
892 
893       /* Create solver */
894       HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
895 
896       /* Set some parameters (See Reference Manual for more parameters) */
897       HYPRE_PCGSetMaxIter(solver, maxit); /* max iterations */
898       HYPRE_PCGSetTol(solver, tol); /* conv. tolerance */
899       HYPRE_PCGSetTwoNorm(solver, 0); /* use the two norm as the stopping criteria */
900       HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
901       HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
902 
903       /* Create AMS preconditioner */
904       HYPRE_AMSCreate(&precond);
905 
906       /* Set AMS parameters */
907       HYPRE_AMSSetMaxIter(precond, 1);
908       HYPRE_AMSSetTol(precond, 0.0);
909       HYPRE_AMSSetCycleType(precond, cycle_type);
910       HYPRE_AMSSetPrintLevel(precond, 1);
911 
912       /* Set discrete gradient */
913       HYPRE_AMSSetDiscreteGradient(precond, par_G);
914 
915       /* Set vertex coordinates */
916       HYPRE_AMSSetCoordinateVectors(precond,
917                                     par_xcoord, par_ycoord, par_zcoord);
918 
919       if (singular_problem)
920          HYPRE_AMSSetBetaPoissonMatrix(precond, NULL);
921 
922       /* Smoothing and AMG options */
923       HYPRE_AMSSetSmoothingOptions(precond,
924                                    rlx_type, rlx_sweeps,
925                                    rlx_weight, rlx_omega);
926       HYPRE_AMSSetAlphaAMGOptions(precond,
927                                   amg_coarsen_type, amg_agg_levels,
928                                   amg_rlx_type, theta, amg_interp_type,
929                                   amg_Pmax);
930       HYPRE_AMSSetBetaAMGOptions(precond,
931                                  amg_coarsen_type, amg_agg_levels,
932                                  amg_rlx_type, theta, amg_interp_type,
933                                  amg_Pmax);
934 
935       /* Set the PCG preconditioner */
936       HYPRE_PCGSetPrecond(solver,
937                           (HYPRE_PtrToSolverFcn) HYPRE_AMSSolve,
938                           (HYPRE_PtrToSolverFcn) HYPRE_AMSSetup,
939                           precond);
940 
941       /* Call the setup */
942       HYPRE_ParCSRPCGSetup(solver, par_A, par_b, par_x);
943 
944       /* Finalize current timing */
945       mytime += MPI_Wtime();
946       MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
947       if (myid == 0)
948       {
949          printf("\nAMS Setup time = %f seconds\n\n", walltime);
950       }
951 
952       /* Start timing again */
953       mytime -= MPI_Wtime();
954 
955       /* Call the solve */
956       HYPRE_ParCSRPCGSolve(solver, par_A, par_b, par_x);
957 
958       /* Finalize current timing */
959       mytime += MPI_Wtime();
960       MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
961       if (myid == 0)
962       {
963          printf("\nAMS Solve time = %f seconds\n\n", walltime);
964       }
965 
966       /* Get some info */
967       HYPRE_PCGGetNumIterations(solver, &its);
968       HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
969 
970       /* Clean up */
971       HYPRE_AMSDestroy(precond);
972       HYPRE_ParCSRPCGDestroy(solver);
973 
974       /* Gather the solution vector */
975       HYPRE_SStructVectorGather(x);
976 
977       /* Save the solution for GLVis visualization, see vis/glvis-ex15.sh */
978       if (vis)
979       {
980 #ifdef HYPRE_EXVIS
981          FILE *file;
982          char  filename[255];
983 
984          int part = 0;
985          int nvalues = n*(n+1)*(n+1);
986          double *xvalues, *yvalues, *zvalues;
987 
988          xvalues = (double*) calloc(nvalues, sizeof(double));
989          yvalues = (double*) calloc(nvalues, sizeof(double));
990          zvalues = (double*) calloc(nvalues, sizeof(double));
991 
992          /* Get local solution in the x-edges */
993          {
994             int var = 0;
995             int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
996             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
997             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
998                                             var, xvalues);
999          }
1000          /* Get local solution in the y-edges */
1001          {
1002             int var = 1;
1003             int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
1004             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
1005             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
1006                                             var, yvalues);
1007          }
1008          /* Get local solution in the z-edges */
1009          {
1010             int var = 2;
1011             int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
1012             int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
1013             HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
1014                                             var, zvalues);
1015          }
1016 
1017          sprintf(filename, "%s.%06d", "vis/ex15.sol", myid);
1018          if ((file = fopen(filename, "w")) == NULL)
1019          {
1020             printf("Error: can't open output file %s\n", filename);
1021             MPI_Finalize();
1022             exit(1);
1023          }
1024 
1025          /* Finite element space header */
1026          fprintf(file, "FiniteElementSpace\n");
1027          fprintf(file, "FiniteElementCollection: Local_Hex_ND1\n");
1028          fprintf(file, "VDim: 1\n");
1029          fprintf(file, "Ordering: 0\n\n");
1030 
1031          /* Save solution with replicated shared data, i.e., element by element,
1032             using the same numbering as the local finite element unknowns. */
1033          {
1034             int i, j, k, s;
1035 
1036             /* Initial x-, y- and z-edge indices in the values arrays */
1037             int oi[4] = { 0, n, n*(n+1), n*(n+1)+n }; /* e_0, e_2,  e_4,  e_6 */
1038             int oj[4] = { 0, 1, n*(n+1), n*(n+1)+1 }; /* e_3, e_1,  e_7,  e_5 */
1039             int ok[4] = { 0, 1,     n+1,       n+2 }; /* e_8, e_9, e_11, e_10 */
1040             /* Loop over the cells while updating the above offsets */
1041             for (k = 0; k < n; k++)
1042             {
1043                for (j = 0; j < n; j++)
1044                {
1045                   for (i = 0; i < n; i++)
1046                   {
1047                      fprintf(file,
1048                              "%.14e\n%.14e\n%.14e\n%.14e\n"
1049                              "%.14e\n%.14e\n%.14e\n%.14e\n"
1050                              "%.14e\n%.14e\n%.14e\n%.14e\n",
1051                              xvalues[oi[0]], yvalues[oj[1]], xvalues[oi[1]], yvalues[oj[0]],
1052                              xvalues[oi[2]], yvalues[oj[3]], xvalues[oi[3]], yvalues[oj[2]],
1053                              zvalues[ok[0]], zvalues[ok[1]], zvalues[ok[3]], zvalues[ok[2]]);
1054 
1055                      for (s=0; s<4; s++) oi[s]++, oj[s]++, ok[s]++;
1056                   }
1057                   for (s=0; s<4; s++) oj[s]++, ok[s]++;
1058                }
1059                for (s=0; s<4; s++) oi[s]+=n, ok[s]+=n+1;
1060             }
1061          }
1062 
1063          fflush(file);
1064          fclose(file);
1065          free(xvalues);
1066          free(yvalues);
1067          free(zvalues);
1068 
1069          /* Save local finite element mesh */
1070          GLVis_PrintLocalCubicMesh("vis/ex15.mesh", n, n, n, h,
1071                                    pi*h*n, pj*h*n, pk*h*n, myid);
1072 
1073          /* Additional visualization data */
1074          GLVis_PrintData("vis/ex15.data", myid, num_procs);
1075 #endif
1076       }
1077 
1078       if (myid == 0)
1079       {
1080          printf("\n");
1081          printf("Iterations = %d\n", its);
1082          printf("Final Relative Residual Norm = %g\n", final_res_norm);
1083          printf("\n");
1084       }
1085    }
1086 
1087    /* Free memory */
1088    HYPRE_SStructGridDestroy(edge_grid);
1089    HYPRE_SStructGraphDestroy(A_graph);
1090    HYPRE_SStructMatrixDestroy(A);
1091    HYPRE_SStructVectorDestroy(b);
1092    HYPRE_SStructVectorDestroy(x);
1093    HYPRE_SStructGridDestroy(node_grid);
1094    HYPRE_SStructGraphDestroy(G_graph);
1095    HYPRE_SStructStencilDestroy(G_stencil[0]);
1096    HYPRE_SStructStencilDestroy(G_stencil[1]);
1097    HYPRE_SStructStencilDestroy(G_stencil[2]);
1098    HYPRE_SStructMatrixDestroy(G);
1099    HYPRE_SStructVectorDestroy(xcoord);
1100    HYPRE_SStructVectorDestroy(ycoord);
1101    HYPRE_SStructVectorDestroy(zcoord);
1102 
1103    /* Finalize HYPRE */
1104    HYPRE_Finalize();
1105 
1106    /* Finalize MPI */
1107    MPI_Finalize();
1108 
1109    return 0;
1110 }
1111