1 /******************************************************************************
2 * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other
3 * HYPRE Project Developers. See the top-level COPYRIGHT file for details.
4 *
5 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
6 ******************************************************************************/
7
8 /*
9 Example 15
10
11 Interface: Semi-Structured interface (SStruct)
12
13 Compile with: make ex15
14
15 Sample run: mpirun -np 8 ex15 -n 10
16
17 To see options: ex15 -help
18
19 Description: This code solves a 3D electromagnetic diffusion (definite
20 curl-curl) problem using the lowest order Nedelec, or "edge"
21 finite element discretization on a uniform hexahedral meshing
22 of the unit cube. The right-hand-side corresponds to a unit
23 vector force and we use uniform zero Dirichlet boundary
24 conditions. The overall problem reads:
25 curl alpha curl E + beta E = 1,
26 with E x n = 0 on the boundary, where alpha and beta are
27 piecewise-constant material coefficients.
28
29 The linear system is split in parallel using the SStruct
30 interface with an n x n x n grid on each processors, and
31 similar N x N x N processor grid. Therefore, the number of
32 processors should be a perfect cube.
33
34 This example code is mainly meant as an illustration of using
35 the Auxiliary-space Maxwell Solver (AMS) through the SStruct
36 interface. It is also an example of setting up a finite
37 element discretization in the SStruct interface, and we
38 recommend viewing Example 13 and Example 14 before viewing
39 this example.
40 */
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <math.h>
46 #include "HYPRE_sstruct_mv.h"
47 #include "HYPRE_sstruct_ls.h"
48 #include "HYPRE.h"
49 #include "ex.h"
50
51 #ifdef HYPRE_EXVIS
52 #include "vis.c"
53 #endif
54
55 int optionAlpha, optionBeta;
56
57 /* Curl-curl coefficient alpha = mu^{-1} */
alpha(double x,double y,double z)58 double alpha(double x, double y, double z)
59 {
60 switch (optionAlpha)
61 {
62 case 0: /* uniform coefficient */
63 return 1.0;
64 case 1: /* smooth coefficient */
65 return x*x+exp(y)+sin(z);
66 case 2: /* small outside of an interior cube */
67 if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
68 return 1.0;
69 else
70 return 1.0e-6;
71 case 3: /* small outside of an interior ball */
72 if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
73 return 1.0;
74 else
75 return 1.0e-6;
76 case 4: /* random coefficient */
77 return ((double)rand()/RAND_MAX);
78 default:
79 return 1.0;
80 }
81 }
82
83 /* Mass coefficient beta = sigma */
beta(double x,double y,double z)84 double beta(double x, double y, double z)
85 {
86 switch (optionBeta)
87 {
88 case 0: /* uniform coefficient */
89 return 1.0;
90 case 1: /* smooth coefficient */
91 return x*x+exp(y)+sin(z);
92 case 2:/* small outside of interior cube */
93 if ((fabs(x-0.5) < 0.25) && (fabs(y-0.5) < 0.25) && (fabs(z-0.5) < 0.25))
94 return 1.0;
95 else
96 return 1.0e-6;
97 case 3: /* small outside of an interior ball */
98 if (((x-0.5)*(x-0.5)+(y-0.5)*(y-0.5)+(z-0.5)*(z-0.5)) < 0.0625)
99 return 1.0;
100 else
101 return 1.0e-6;
102 case 4: /* random coefficient */
103 return ((double)rand()/RAND_MAX);
104 default:
105 return 1.0;
106 }
107 }
108
109 /*
110 This routine computes the lowest order Nedelec, or "edge" finite element
111 stiffness matrix and load vector on a cube of size h. The 12 edges {e_i}
112 are numbered in terms of the vertices as follows:
113
114 [7]------[6]
115 /| /| e_0 = 01, e_1 = 12, e_2 = 32, e_3 = 03,
116 / | / | e_4 = 45, e_5 = 56, e_6 = 76, e_7 = 47,
117 [4]------[5] | e_8 = 04, e_9 = 15, e_10 = 26, e_11 = 37.
118 | [3]----|-[2]
119 | / | / The edges are oriented from first to the
120 |/ |/ second vertex, e.g. e_0 is from [0] to [1].
121 [0]------[1]
122
123 We allow for different scaling of the curl-curl and the mass parts of the
124 matrix with coefficients alpha and beta respectively:
125
126 S_ij = alpha (curl phi_i,curl phi_j) + beta (phi_i, phi_j).
127
128 The load vector corresponding to a right-hand side of {1,1,1} is
129
130 F_j = (1,phi_j) = h^2/4.
131 */
ComputeFEMND1(double ** S,double F[12],double x,double y,double z,double h)132 void ComputeFEMND1(double **S, double F[12],
133 double x, double y, double z, double h)
134 {
135 int i, j;
136
137 double h2_4 = h*h/4;
138
139 double cS1 = alpha(x,y,z)/(6.0*h), cS2 = 2*cS1, cS4 = 2*cS2;
140 double cM1 = beta(x,y,z)*h/36.0, cM2 = 2*cM1, cM4 = 2*cM2;
141
142 S[ 0][ 0] = cS4 + cM4; S[ 0][ 1] = cS2; S[ 0][ 2] = -cS1 + cM2;
143 S[ 0][ 3] = -cS2; S[ 0][ 4] = -cS1 + cM2; S[ 0][ 5] = cS1;
144 S[ 0][ 6] = -cS2 + cM1; S[ 0][ 7] = -cS1; S[ 0][ 8] = -cS2;
145 S[ 0][ 9] = cS2; S[ 0][10] = cS1; S[ 0][11] = -cS1;
146
147 S[ 1][ 1] = cS4 + cM4; S[ 1][ 2] = -cS2; S[ 1][ 3] = -cS1 + cM2;
148 S[ 1][ 4] = cS1; S[ 1][ 5] = -cS1 + cM2; S[ 1][ 6] = -cS1;
149 S[ 1][ 7] = -cS2 + cM1; S[ 1][ 8] = -cS1; S[ 1][ 9] = -cS2;
150 S[ 1][10] = cS2; S[ 1][11] = cS1;
151
152 S[ 2][ 2] = cS4 + cM4; S[ 2][ 3] = cS2; S[ 2][ 4] = -cS2 + cM1;
153 S[ 2][ 5] = -cS1; S[ 2][ 6] = -cS1 + cM2; S[ 2][ 7] = cS1;
154 S[ 2][ 8] = -cS1; S[ 2][ 9] = cS1; S[ 2][10] = cS2;
155 S[ 2][11] = -cS2;
156
157 S[ 3][ 3] = cS4 + cM4; S[ 3][ 4] = -cS1; S[ 3][ 5] = -cS2 + cM1;
158 S[ 3][ 6] = cS1; S[ 3][ 7] = -cS1 + cM2; S[ 3][ 8] = -cS2;
159 S[ 3][ 9] = -cS1; S[ 3][10] = cS1; S[ 3][11] = cS2;
160
161 S[ 4][ 4] = cS4 + cM4; S[ 4][ 5] = cS2; S[ 4][ 6] = -cS1 + cM2;
162 S[ 4][ 7] = -cS2; S[ 4][ 8] = cS2; S[ 4][ 9] = -cS2;
163 S[ 4][10] = -cS1; S[ 4][11] = cS1;
164
165 S[ 5][ 5] = cS4 + cM4; S[ 5][ 6] = -cS2; S[ 5][ 7] = -cS1 + cM2;
166 S[ 5][ 8] = cS1; S[ 5][ 9] = cS2; S[ 5][10] = -cS2;
167 S[ 5][11] = -cS1;
168
169 S[ 6][ 6] = cS4 + cM4; S[ 6][ 7] = cS2; S[ 6][ 8] = cS1;
170 S[ 6][ 9] = -cS1; S[ 6][10] = -cS2; S[ 6][11] = cS2;
171
172 S[ 7][ 7] = cS4 + cM4; S[ 7][ 8] = cS2; S[ 7][ 9] = cS1;
173 S[ 7][10] = -cS1; S[ 7][11] = -cS2;
174
175 S[ 8][ 8] = cS4 + cM4; S[ 8][ 9] = -cS1 + cM2; S[ 8][10] = -cS2 + cM1;
176 S[ 8][11] = -cS1 + cM2;
177
178 S[ 9][ 9] = cS4 + cM4; S[ 9][10] = -cS1 + cM2; S[ 9][11] = -cS2 + cM1;
179
180 S[10][10] = cS4 + cM4; S[10][11] = -cS1 + cM2;
181
182 S[11][11] = cS4 + cM4;
183
184 /* The stiffness matrix is symmetric */
185 for (i = 1; i < 12; i++)
186 for (j = 0; j < i; j++)
187 S[i][j] = S[j][i];
188
189 for (i = 0; i < 12; i++)
190 F[i] = h2_4;
191 }
192
193
main(int argc,char * argv[])194 int main (int argc, char *argv[])
195 {
196 int myid, num_procs;
197 int n, N, pi, pj, pk;
198 double h;
199 int vis;
200
201 double tol, theta;
202 int maxit, cycle_type;
203 int rlx_type, rlx_sweeps, rlx_weight, rlx_omega;
204 int amg_coarsen_type, amg_agg_levels, amg_rlx_type;
205 int amg_interp_type, amg_Pmax;
206 int singular_problem ;
207
208 double mytime = 0.0;
209 double walltime = 0.0;
210
211 HYPRE_SStructGrid edge_grid;
212 HYPRE_SStructGraph A_graph;
213 HYPRE_SStructMatrix A;
214 HYPRE_SStructVector b;
215 HYPRE_SStructVector x;
216 HYPRE_SStructGrid node_grid;
217 HYPRE_SStructGraph G_graph;
218 HYPRE_SStructStencil G_stencil[3];
219 HYPRE_SStructMatrix G;
220 HYPRE_SStructVector xcoord, ycoord, zcoord;
221
222 HYPRE_Solver solver, precond;
223
224 /* Initialize MPI */
225 MPI_Init(&argc, &argv);
226 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
227 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
228
229 /* Initialize HYPRE */
230 HYPRE_Init();
231
232 /* Print GPU info */
233 /* HYPRE_PrintDeviceInfo(); */
234
235 /* Set default parameters */
236 n = 10;
237 vis = 0;
238 optionAlpha = 0;
239 optionBeta = 0;
240 maxit = 100;
241 tol = 1e-6;
242 cycle_type = 13;
243 rlx_type = 2;
244 rlx_sweeps = 1;
245 rlx_weight = 1.0;
246 rlx_omega = 1.0;
247 amg_coarsen_type = 10;
248 amg_agg_levels = 1;
249 amg_rlx_type = 6;
250 theta = 0.25;
251 amg_interp_type = 6;
252 amg_Pmax = 4;
253 singular_problem = 0;
254
255 /* Parse command line */
256 {
257 int arg_index = 0;
258 int print_usage = 0;
259
260 while (arg_index < argc)
261 {
262 if ( strcmp(argv[arg_index], "-n") == 0 )
263 {
264 arg_index++;
265 n = atoi(argv[arg_index++]);
266 }
267 else if ( strcmp(argv[arg_index], "-a") == 0 )
268 {
269 arg_index++;
270 optionAlpha = atoi(argv[arg_index++]);
271 }
272 else if ( strcmp(argv[arg_index], "-b") == 0 )
273 {
274 arg_index++;
275 optionBeta = atoi(argv[arg_index++]);
276 }
277 else if ( strcmp(argv[arg_index], "-vis") == 0 )
278 {
279 arg_index++;
280 vis = 1;
281 }
282 else if ( strcmp(argv[arg_index], "-maxit") == 0 )
283 {
284 arg_index++;
285 maxit = atoi(argv[arg_index++]);
286 }
287 else if ( strcmp(argv[arg_index], "-tol") == 0 )
288 {
289 arg_index++;
290 tol = atof(argv[arg_index++]);
291 }
292 else if ( strcmp(argv[arg_index], "-type") == 0 )
293 {
294 arg_index++;
295 cycle_type = atoi(argv[arg_index++]);
296 }
297 else if ( strcmp(argv[arg_index], "-rlx") == 0 )
298 {
299 arg_index++;
300 rlx_type = atoi(argv[arg_index++]);
301 }
302 else if ( strcmp(argv[arg_index], "-rlxn") == 0 )
303 {
304 arg_index++;
305 rlx_sweeps = atoi(argv[arg_index++]);
306 }
307 else if ( strcmp(argv[arg_index], "-rlxw") == 0 )
308 {
309 arg_index++;
310 rlx_weight = atof(argv[arg_index++]);
311 }
312 else if ( strcmp(argv[arg_index], "-rlxo") == 0 )
313 {
314 arg_index++;
315 rlx_omega = atof(argv[arg_index++]);
316 }
317 else if ( strcmp(argv[arg_index], "-ctype") == 0 )
318 {
319 arg_index++;
320 amg_coarsen_type = atoi(argv[arg_index++]);
321 }
322 else if ( strcmp(argv[arg_index], "-amgrlx") == 0 )
323 {
324 arg_index++;
325 amg_rlx_type = atoi(argv[arg_index++]);
326 }
327 else if ( strcmp(argv[arg_index], "-agg") == 0 )
328 {
329 arg_index++;
330 amg_agg_levels = atoi(argv[arg_index++]);
331 }
332 else if ( strcmp(argv[arg_index], "-itype") == 0 )
333 {
334 arg_index++;
335 amg_interp_type = atoi(argv[arg_index++]);
336 }
337 else if ( strcmp(argv[arg_index], "-pmax") == 0 )
338 {
339 arg_index++;
340 amg_Pmax = atoi(argv[arg_index++]);
341 }
342 else if ( strcmp(argv[arg_index], "-sing") == 0 )
343 {
344 arg_index++;
345 singular_problem = 1;
346 }
347 else if ( strcmp(argv[arg_index], "-theta") == 0 )
348 {
349 arg_index++;
350 theta = atof(argv[arg_index++]);
351 }
352
353 else if ( strcmp(argv[arg_index], "-help") == 0 )
354 {
355 print_usage = 1;
356 break;
357 }
358 else
359 {
360 arg_index++;
361 }
362 }
363
364 if ((print_usage) && (myid == 0))
365 {
366 printf("\n");
367 printf("Usage: %s [<options>]\n", argv[0]);
368 printf("\n");
369 printf(" -n <n> : problem size per processor (default: 10)\n");
370 printf(" -a <alpha_opt> : choice for the curl-curl coefficient (default: 1)\n");
371 printf(" -b <beta_opt> : choice for the mass coefficient (default: 1)\n");
372 printf(" -vis : save the solution for GLVis visualization\n");
373 printf("\n");
374 printf("PCG-AMS solver options: \n");
375 printf(" -maxit <num> : maximum number of iterations (100) \n");
376 printf(" -tol <num> : convergence tolerance (1e-6) \n");
377 printf(" -type <num> : 3-level cycle type (0-8, 11-14) \n");
378 printf(" -theta <num> : BoomerAMG threshold (0.25) \n");
379 printf(" -ctype <num> : BoomerAMG coarsening type \n");
380 printf(" -agg <num> : Levels of BoomerAMG agg. coarsening \n");
381 printf(" -amgrlx <num> : BoomerAMG relaxation type \n");
382 printf(" -itype <num> : BoomerAMG interpolation type \n");
383 printf(" -pmax <num> : BoomerAMG interpolation truncation \n");
384 printf(" -rlx <num> : relaxation type \n");
385 printf(" -rlxn <num> : number of relaxation sweeps \n");
386 printf(" -rlxw <num> : damping parameter (usually <=1) \n");
387 printf(" -rlxo <num> : SOR parameter (usually in (0,2)) \n");
388 printf(" -sing : curl-curl only (singular) problem \n");
389 printf("\n");
390 printf("\n");
391 }
392
393 if (print_usage)
394 {
395 MPI_Finalize();
396 return (0);
397 }
398 }
399
400 /* Figure out the processor grid (N x N x N). The local problem size is n^3,
401 while pi, pj and pk indicate the position in the processor grid. */
402 N = pow(num_procs,1.0/3.0) + 0.5;
403 if (num_procs != N*N*N)
404 {
405 if (myid == 0) printf("Can't run on %d processors, try %d.\n",
406 num_procs, N*N*N);
407 MPI_Finalize();
408 exit(1);
409 }
410 h = 1.0 / (N*n);
411 pk = myid / (N*N);
412 pj = myid/N - pk*N;
413 pi = myid - pj*N - pk*N*N;
414
415 /* Start timing */
416 mytime -= MPI_Wtime();
417
418 /* 1. Set up the edge and nodal grids. Note that we do this simultaneously
419 to make sure that they have the same extents. For simplicity we use
420 only one part to represent the unit cube. */
421 {
422 int ndim = 3;
423 int nparts = 1;
424
425 /* Create empty 2D grid objects */
426 HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &node_grid);
427 HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &edge_grid);
428
429 /* Set the extents of the grid - each processor sets its grid boxes. */
430 {
431 int part = 0;
432 int ilower[3] = {1 + pi*n, 1 + pj*n, 1 + pk*n};
433 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
434
435 HYPRE_SStructGridSetExtents(node_grid, part, ilower, iupper);
436 HYPRE_SStructGridSetExtents(edge_grid, part, ilower, iupper);
437 }
438
439 /* Set the variable type and number of variables on each grid. */
440 {
441 int i;
442 int nnodevars = 1;
443 int nedgevars = 3;
444
445 HYPRE_SStructVariable nodevars[1] = {HYPRE_SSTRUCT_VARIABLE_NODE};
446 HYPRE_SStructVariable edgevars[3] = {HYPRE_SSTRUCT_VARIABLE_XEDGE,
447 HYPRE_SSTRUCT_VARIABLE_YEDGE,
448 HYPRE_SSTRUCT_VARIABLE_ZEDGE};
449 for (i = 0; i < nparts; i++)
450 {
451 HYPRE_SStructGridSetVariables(node_grid, i, nnodevars, nodevars);
452 HYPRE_SStructGridSetVariables(edge_grid, i, nedgevars, edgevars);
453 }
454 }
455
456 /* Since there is only one part, there is no need to call the
457 SetNeighborPart or SetSharedPart functions, which determine the spatial
458 relation between the parts. See Examples 12, 13 and 14 for
459 illustrations of these calls. */
460
461 /* Now the grids are ready to be used */
462 HYPRE_SStructGridAssemble(node_grid);
463 HYPRE_SStructGridAssemble(edge_grid);
464 }
465
466 /* 2. Create the finite element stiffness matrix A and load vector b. */
467 {
468 int part = 0; /* this problem has only one part */
469
470 /* Set the ordering of the variables in the finite element problem. This
471 is done by listing the variable offset directions relative to the
472 element's center. See the Reference Manual for more details. */
473 {
474 int ordering[48] = { 0, 0, -1, -1, /* x-edge [0]-[1] */
475 1, +1, 0, -1, /* y-edge [1]-[2] */
476 /* [7]------[6] */ 0, 0, +1, -1, /* x-edge [3]-[2] */
477 /* /| /| */ 1, -1, 0, -1, /* y-edge [0]-[3] */
478 /* / | / | */ 0, 0, -1, +1, /* x-edge [4]-[5] */
479 /* [4]------[5] | */ 1, +1, 0, +1, /* y-edge [5]-[6] */
480 /* | [3]----|-[2] */ 0, 0, +1, +1, /* x-edge [7]-[6] */
481 /* | / | / */ 1, -1, 0, +1, /* y-edge [4]-[7] */
482 /* |/ |/ */ 2, -1, -1, 0, /* z-edge [0]-[4] */
483 /* [0]------[1] */ 2, +1, -1, 0, /* z-edge [1]-[5] */
484 2, +1, +1, 0, /* z-edge [2]-[6] */
485 2, -1, +1, 0 }; /* z-edge [3]-[7] */
486
487 HYPRE_SStructGridSetFEMOrdering(edge_grid, part, ordering);
488 }
489
490 /* Set up the Graph - this determines the non-zero structure of the
491 matrix. */
492 {
493 int part = 0;
494
495 /* Create the graph object */
496 HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &A_graph);
497
498 /* See MatrixSetObjectType below */
499 HYPRE_SStructGraphSetObjectType(A_graph, HYPRE_PARCSR);
500
501 /* Indicate that this problem uses finite element stiffness matrices and
502 load vectors, instead of stencils. */
503 HYPRE_SStructGraphSetFEM(A_graph, part);
504
505 /* The edge finite element matrix is full, so there is no need to call the
506 HYPRE_SStructGraphSetFEMSparsity() function. */
507
508 /* Assemble the graph */
509 HYPRE_SStructGraphAssemble(A_graph);
510 }
511
512 /* Set up the SStruct Matrix and right-hand side vector */
513 {
514 /* Create the matrix object */
515 HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, A_graph, &A);
516 /* Use a ParCSR storage */
517 HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
518 /* Indicate that the matrix coefficients are ready to be set */
519 HYPRE_SStructMatrixInitialize(A);
520
521 /* Create an empty vector object */
522 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &b);
523 /* Use a ParCSR storage */
524 HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
525 /* Indicate that the vector coefficients are ready to be set */
526 HYPRE_SStructVectorInitialize(b);
527 }
528
529 /* Set the matrix and vector entries by finite element assembly */
530 {
531 /* local stiffness matrix and load vector */
532 /* OK to use constant-length arrays for CPUs */
533 /* double S[12][12], F[12]; */
534 double *F = (double *) malloc(12*sizeof(double));
535 double *S_flat = (double *) malloc(12*12*sizeof(double));
536 double *S[12];
537
538 int i, j, k;
539 int index[3];
540
541 for (i = 0; i < 12; i++)
542 {
543 S[i] = &S_flat[i*12];
544 }
545
546 for (i = 1; i <= n; i++)
547 {
548 for (j = 1; j <= n; j++)
549 {
550 for (k = 1; k <= n; k++)
551 {
552 /* Compute the FEM matrix and r.h.s. for cell (i,j,k) with
553 coefficients evaluated at the cell center. */
554 index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
555 ComputeFEMND1(S,F,(pi*n+i)*h-h/2,(pj*n+j)*h-h/2,(pk*n+k)*h-h/2,h);
556
557 /* Eliminate boundary conditions on x = 0 */
558 if (index[0] == 1)
559 {
560 int ii, jj, bc_edges[4] = { 3, 11, 7, 8 };
561 for (ii = 0; ii < 4; ii++)
562 {
563 for (jj = 0; jj < 12; jj++)
564 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
565 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
566 F[bc_edges[ii]] = 0.0;
567 }
568 }
569 /* Eliminate boundary conditions on y = 0 */
570 if (index[1] == 1)
571 {
572 int ii, jj, bc_edges[4] = { 0, 9, 4, 8 };
573 for (ii = 0; ii < 4; ii++)
574 {
575 for (jj = 0; jj < 12; jj++)
576 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
577 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
578 F[bc_edges[ii]] = 0.0;
579 }
580 }
581 /* Eliminate boundary conditions on z = 0 */
582 if (index[2] == 1)
583 {
584 int ii, jj, bc_edges[4] = { 0, 1, 2, 3 };
585 for (ii = 0; ii < 4; ii++)
586 {
587 for (jj = 0; jj < 12; jj++)
588 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
589 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
590 F[bc_edges[ii]] = 0.0;
591 }
592 }
593 /* Eliminate boundary conditions on x = 1 */
594 if (index[0] == N*n)
595 {
596 int ii, jj, bc_edges[4] = { 1, 10, 5, 9 };
597 for (ii = 0; ii < 4; ii++)
598 {
599 for (jj = 0; jj < 12; jj++)
600 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
601 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
602 F[bc_edges[ii]] = 0.0;
603 }
604 }
605 /* Eliminate boundary conditions on y = 1 */
606 if (index[1] == N*n)
607 {
608 int ii, jj, bc_edges[4] = { 2, 10, 6, 11 };
609 for (ii = 0; ii < 4; ii++)
610 {
611 for (jj = 0; jj < 12; jj++)
612 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
613 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
614 F[bc_edges[ii]] = 0.0;
615 }
616 }
617 /* Eliminate boundary conditions on z = 1 */
618 if (index[2] == N*n)
619 {
620 int ii, jj, bc_edges[4] = { 4, 5, 6, 7 };
621 for (ii = 0; ii < 4; ii++)
622 {
623 for (jj = 0; jj < 12; jj++)
624 S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
625 S[bc_edges[ii]][bc_edges[ii]] = 1.0;
626 F[bc_edges[ii]] = 0.0;
627 }
628 }
629
630 /* Assemble the matrix */
631 HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);
632
633 /* Assemble the vector */
634 HYPRE_SStructVectorAddFEMValues(b, part, index, F);
635 }
636 }
637 }
638 free(F);
639 free(S_flat);
640 }
641
642 /* Collective calls finalizing the matrix and vector assembly */
643 HYPRE_SStructMatrixAssemble(A);
644 HYPRE_SStructVectorAssemble(b);
645 }
646
647 /* 3. Create the discrete gradient matrix G, which is needed in AMS. */
648 {
649 int part = 0;
650 int stencil_size = 2;
651
652 /* Define the discretization stencil relating the edges and nodes of the
653 grid. */
654 {
655 int ndim = 3;
656 int entry;
657 int var = 0; /* the node variable */
658
659 /* The discrete gradient stencils connect edge to node variables. */
660 int Gx_offsets[2][3] = {{-1,0,0},{0,0,0}}; /* x-edge [7]-[6] */
661 int Gy_offsets[2][3] = {{0,-1,0},{0,0,0}}; /* y-edge [5]-[6] */
662 int Gz_offsets[2][3] = {{0,0,-1},{0,0,0}}; /* z-edge [2]-[6] */
663
664 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[0]);
665 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[1]);
666 HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[2]);
667
668 for (entry = 0; entry < stencil_size; entry++)
669 {
670 HYPRE_SStructStencilSetEntry(G_stencil[0], entry, Gx_offsets[entry], var);
671 HYPRE_SStructStencilSetEntry(G_stencil[1], entry, Gy_offsets[entry], var);
672 HYPRE_SStructStencilSetEntry(G_stencil[2], entry, Gz_offsets[entry], var);
673 }
674 }
675
676 /* Set up the Graph - this determines the non-zero structure of the
677 matrix. */
678 {
679 int nvars = 3;
680 int var; /* the edge variables */
681
682 /* Create the discrete gradient graph object */
683 HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &G_graph);
684
685 /* See MatrixSetObjectType below */
686 HYPRE_SStructGraphSetObjectType(G_graph, HYPRE_PARCSR);
687
688 /* Since the discrete gradient relates edge and nodal variables (it is a
689 rectangular matrix), we have to specify the domain (column) grid. */
690 HYPRE_SStructGraphSetDomainGrid(G_graph, node_grid);
691
692 /* Tell the graph which stencil to use for each edge variable on each
693 part (we only have one part). */
694 for (var = 0; var < nvars; var++)
695 HYPRE_SStructGraphSetStencil(G_graph, part, var, G_stencil[var]);
696
697 /* Assemble the graph */
698 HYPRE_SStructGraphAssemble(G_graph);
699 }
700
701 /* Set up the SStruct Matrix */
702 {
703 /* Create the matrix object */
704 HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, G_graph, &G);
705 /* Use a ParCSR storage */
706 HYPRE_SStructMatrixSetObjectType(G, HYPRE_PARCSR);
707 /* Indicate that the matrix coefficients are ready to be set */
708 HYPRE_SStructMatrixInitialize(G);
709 }
710
711 /* Set the discrete gradient values, assuming a "natural" orientation of
712 the edges (i.e. one in agreement with the coordinate directions). */
713 {
714 int i;
715 int nedges = n*(n+1)*(n+1);
716 double *values;
717 int stencil_indices[2] = {0,1}; /* the nodes of each edge */
718
719 values = (double*) calloc(2*nedges, sizeof(double));
720
721 /* The edge orientation is fixed: from first to second node */
722 for (i = 0; i < nedges; i++)
723 {
724 values[2*i] = -1.0;
725 values[2*i+1] = 1.0;
726 }
727
728 /* Set the values in the discrete gradient x-edges */
729 {
730 int var = 0;
731 int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
732 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
733 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
734 stencil_size, stencil_indices,
735 values);
736 }
737 /* Set the values in the discrete gradient y-edges */
738 {
739 int var = 1;
740 int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
741 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
742 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
743 stencil_size, stencil_indices,
744 values);
745 }
746 /* Set the values in the discrete gradient z-edges */
747 {
748 int var = 2;
749 int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
750 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
751 HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
752 stencil_size, stencil_indices,
753 values);
754 }
755
756 free(values);
757 }
758
759 /* Finalize the matrix assembly */
760 HYPRE_SStructMatrixAssemble(G);
761 }
762
763 /* 4. Create the vectors of nodal coordinates xcoord, ycoord and zcoord,
764 which are needed in AMS. */
765 {
766 int i, j, k;
767 int part = 0;
768 int var = 0; /* the node variable */
769 int index[3];
770 double *xyzval = (double *) malloc(3*sizeof(double));
771
772 /* Create empty vector objects */
773 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &xcoord);
774 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &ycoord);
775 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &zcoord);
776 /* Set the object type to ParCSR */
777 HYPRE_SStructVectorSetObjectType(xcoord, HYPRE_PARCSR);
778 HYPRE_SStructVectorSetObjectType(ycoord, HYPRE_PARCSR);
779 HYPRE_SStructVectorSetObjectType(zcoord, HYPRE_PARCSR);
780 /* Indicate that the vector coefficients are ready to be set */
781 HYPRE_SStructVectorInitialize(xcoord);
782 HYPRE_SStructVectorInitialize(ycoord);
783 HYPRE_SStructVectorInitialize(zcoord);
784
785 /* Compute and set the coordinates of the nodes */
786 for (i = 0; i <= n; i++)
787 for (j = 0; j <= n; j++)
788 for (k = 0; k <= n; k++)
789 {
790 index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
791
792 xyzval[0] = index[0]*h;
793 xyzval[1] = index[1]*h;
794 xyzval[2] = index[2]*h;
795
796 HYPRE_SStructVectorSetValues(xcoord, part, index, var, &xyzval[0]);
797 HYPRE_SStructVectorSetValues(ycoord, part, index, var, &xyzval[1]);
798 HYPRE_SStructVectorSetValues(zcoord, part, index, var, &xyzval[2]);
799 }
800
801 /* Finalize the vector assembly */
802 HYPRE_SStructVectorAssemble(xcoord);
803 HYPRE_SStructVectorAssemble(ycoord);
804 HYPRE_SStructVectorAssemble(zcoord);
805 free(xyzval);
806 }
807
808 /* 5. Set up a SStruct Vector for the solution vector x */
809 {
810 int part = 0;
811 int nvalues = n*(n+1)*(n+1);
812 double *values;
813
814 values = (double*) calloc(nvalues, sizeof(double));
815
816 /* Create an empty vector object */
817 HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &x);
818 /* Set the object type to ParCSR */
819 HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
820 /* Indicate that the vector coefficients are ready to be set */
821 HYPRE_SStructVectorInitialize(x);
822
823 /* Set the values for the initial guess x-edge */
824 {
825 int var = 0;
826 int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
827 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
828 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
829 }
830 /* Set the values for the initial guess y-edge */
831 {
832 int var = 1;
833 int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
834 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
835 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
836 }
837 /* Set the values for the initial guess z-edge */
838 {
839 int var = 2;
840 int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
841 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
842 HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
843 }
844
845 free(values);
846
847 /* Finalize the vector assembly */
848 HYPRE_SStructVectorAssemble(x);
849 }
850
851 /* Finalize current timing */
852 mytime += MPI_Wtime();
853 MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
854 if (myid == 0)
855 {
856 printf("\nSStruct Setup time = %f seconds\n\n", walltime);
857 }
858
859 /* 6. Set up and call the PCG-AMS solver (Solver options can be found in the
860 Reference Manual.) */
861 {
862 double final_res_norm;
863 int its;
864
865 HYPRE_ParCSRMatrix par_A;
866 HYPRE_ParVector par_b;
867 HYPRE_ParVector par_x;
868
869 HYPRE_ParCSRMatrix par_G;
870 HYPRE_ParVector par_xcoord;
871 HYPRE_ParVector par_ycoord;
872 HYPRE_ParVector par_zcoord;
873
874 /* Extract the ParCSR objects needed in the solver */
875 HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
876 HYPRE_SStructVectorGetObject(b, (void **) &par_b);
877 HYPRE_SStructVectorGetObject(x, (void **) &par_x);
878 HYPRE_SStructMatrixGetObject(G, (void **) &par_G);
879 HYPRE_SStructVectorGetObject(xcoord, (void **) &par_xcoord);
880 HYPRE_SStructVectorGetObject(ycoord, (void **) &par_ycoord);
881 HYPRE_SStructVectorGetObject(zcoord, (void **) &par_zcoord);
882
883 if (myid == 0)
884 {
885 HYPRE_Int numrows, numcols;
886 HYPRE_ParCSRMatrixGetDims(par_A, &numrows, &numcols);
887 printf("Problem size: %d\n\n", numrows);
888 }
889
890 /* Start timing */
891 mytime -= MPI_Wtime();
892
893 /* Create solver */
894 HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);
895
896 /* Set some parameters (See Reference Manual for more parameters) */
897 HYPRE_PCGSetMaxIter(solver, maxit); /* max iterations */
898 HYPRE_PCGSetTol(solver, tol); /* conv. tolerance */
899 HYPRE_PCGSetTwoNorm(solver, 0); /* use the two norm as the stopping criteria */
900 HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
901 HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
902
903 /* Create AMS preconditioner */
904 HYPRE_AMSCreate(&precond);
905
906 /* Set AMS parameters */
907 HYPRE_AMSSetMaxIter(precond, 1);
908 HYPRE_AMSSetTol(precond, 0.0);
909 HYPRE_AMSSetCycleType(precond, cycle_type);
910 HYPRE_AMSSetPrintLevel(precond, 1);
911
912 /* Set discrete gradient */
913 HYPRE_AMSSetDiscreteGradient(precond, par_G);
914
915 /* Set vertex coordinates */
916 HYPRE_AMSSetCoordinateVectors(precond,
917 par_xcoord, par_ycoord, par_zcoord);
918
919 if (singular_problem)
920 HYPRE_AMSSetBetaPoissonMatrix(precond, NULL);
921
922 /* Smoothing and AMG options */
923 HYPRE_AMSSetSmoothingOptions(precond,
924 rlx_type, rlx_sweeps,
925 rlx_weight, rlx_omega);
926 HYPRE_AMSSetAlphaAMGOptions(precond,
927 amg_coarsen_type, amg_agg_levels,
928 amg_rlx_type, theta, amg_interp_type,
929 amg_Pmax);
930 HYPRE_AMSSetBetaAMGOptions(precond,
931 amg_coarsen_type, amg_agg_levels,
932 amg_rlx_type, theta, amg_interp_type,
933 amg_Pmax);
934
935 /* Set the PCG preconditioner */
936 HYPRE_PCGSetPrecond(solver,
937 (HYPRE_PtrToSolverFcn) HYPRE_AMSSolve,
938 (HYPRE_PtrToSolverFcn) HYPRE_AMSSetup,
939 precond);
940
941 /* Call the setup */
942 HYPRE_ParCSRPCGSetup(solver, par_A, par_b, par_x);
943
944 /* Finalize current timing */
945 mytime += MPI_Wtime();
946 MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
947 if (myid == 0)
948 {
949 printf("\nAMS Setup time = %f seconds\n\n", walltime);
950 }
951
952 /* Start timing again */
953 mytime -= MPI_Wtime();
954
955 /* Call the solve */
956 HYPRE_ParCSRPCGSolve(solver, par_A, par_b, par_x);
957
958 /* Finalize current timing */
959 mytime += MPI_Wtime();
960 MPI_Allreduce(&mytime, &walltime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
961 if (myid == 0)
962 {
963 printf("\nAMS Solve time = %f seconds\n\n", walltime);
964 }
965
966 /* Get some info */
967 HYPRE_PCGGetNumIterations(solver, &its);
968 HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
969
970 /* Clean up */
971 HYPRE_AMSDestroy(precond);
972 HYPRE_ParCSRPCGDestroy(solver);
973
974 /* Gather the solution vector */
975 HYPRE_SStructVectorGather(x);
976
977 /* Save the solution for GLVis visualization, see vis/glvis-ex15.sh */
978 if (vis)
979 {
980 #ifdef HYPRE_EXVIS
981 FILE *file;
982 char filename[255];
983
984 int part = 0;
985 int nvalues = n*(n+1)*(n+1);
986 double *xvalues, *yvalues, *zvalues;
987
988 xvalues = (double*) calloc(nvalues, sizeof(double));
989 yvalues = (double*) calloc(nvalues, sizeof(double));
990 zvalues = (double*) calloc(nvalues, sizeof(double));
991
992 /* Get local solution in the x-edges */
993 {
994 int var = 0;
995 int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
996 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
997 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
998 var, xvalues);
999 }
1000 /* Get local solution in the y-edges */
1001 {
1002 int var = 1;
1003 int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
1004 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
1005 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
1006 var, yvalues);
1007 }
1008 /* Get local solution in the z-edges */
1009 {
1010 int var = 2;
1011 int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
1012 int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
1013 HYPRE_SStructVectorGetBoxValues(x, part, ilower, iupper,
1014 var, zvalues);
1015 }
1016
1017 sprintf(filename, "%s.%06d", "vis/ex15.sol", myid);
1018 if ((file = fopen(filename, "w")) == NULL)
1019 {
1020 printf("Error: can't open output file %s\n", filename);
1021 MPI_Finalize();
1022 exit(1);
1023 }
1024
1025 /* Finite element space header */
1026 fprintf(file, "FiniteElementSpace\n");
1027 fprintf(file, "FiniteElementCollection: Local_Hex_ND1\n");
1028 fprintf(file, "VDim: 1\n");
1029 fprintf(file, "Ordering: 0\n\n");
1030
1031 /* Save solution with replicated shared data, i.e., element by element,
1032 using the same numbering as the local finite element unknowns. */
1033 {
1034 int i, j, k, s;
1035
1036 /* Initial x-, y- and z-edge indices in the values arrays */
1037 int oi[4] = { 0, n, n*(n+1), n*(n+1)+n }; /* e_0, e_2, e_4, e_6 */
1038 int oj[4] = { 0, 1, n*(n+1), n*(n+1)+1 }; /* e_3, e_1, e_7, e_5 */
1039 int ok[4] = { 0, 1, n+1, n+2 }; /* e_8, e_9, e_11, e_10 */
1040 /* Loop over the cells while updating the above offsets */
1041 for (k = 0; k < n; k++)
1042 {
1043 for (j = 0; j < n; j++)
1044 {
1045 for (i = 0; i < n; i++)
1046 {
1047 fprintf(file,
1048 "%.14e\n%.14e\n%.14e\n%.14e\n"
1049 "%.14e\n%.14e\n%.14e\n%.14e\n"
1050 "%.14e\n%.14e\n%.14e\n%.14e\n",
1051 xvalues[oi[0]], yvalues[oj[1]], xvalues[oi[1]], yvalues[oj[0]],
1052 xvalues[oi[2]], yvalues[oj[3]], xvalues[oi[3]], yvalues[oj[2]],
1053 zvalues[ok[0]], zvalues[ok[1]], zvalues[ok[3]], zvalues[ok[2]]);
1054
1055 for (s=0; s<4; s++) oi[s]++, oj[s]++, ok[s]++;
1056 }
1057 for (s=0; s<4; s++) oj[s]++, ok[s]++;
1058 }
1059 for (s=0; s<4; s++) oi[s]+=n, ok[s]+=n+1;
1060 }
1061 }
1062
1063 fflush(file);
1064 fclose(file);
1065 free(xvalues);
1066 free(yvalues);
1067 free(zvalues);
1068
1069 /* Save local finite element mesh */
1070 GLVis_PrintLocalCubicMesh("vis/ex15.mesh", n, n, n, h,
1071 pi*h*n, pj*h*n, pk*h*n, myid);
1072
1073 /* Additional visualization data */
1074 GLVis_PrintData("vis/ex15.data", myid, num_procs);
1075 #endif
1076 }
1077
1078 if (myid == 0)
1079 {
1080 printf("\n");
1081 printf("Iterations = %d\n", its);
1082 printf("Final Relative Residual Norm = %g\n", final_res_norm);
1083 printf("\n");
1084 }
1085 }
1086
1087 /* Free memory */
1088 HYPRE_SStructGridDestroy(edge_grid);
1089 HYPRE_SStructGraphDestroy(A_graph);
1090 HYPRE_SStructMatrixDestroy(A);
1091 HYPRE_SStructVectorDestroy(b);
1092 HYPRE_SStructVectorDestroy(x);
1093 HYPRE_SStructGridDestroy(node_grid);
1094 HYPRE_SStructGraphDestroy(G_graph);
1095 HYPRE_SStructStencilDestroy(G_stencil[0]);
1096 HYPRE_SStructStencilDestroy(G_stencil[1]);
1097 HYPRE_SStructStencilDestroy(G_stencil[2]);
1098 HYPRE_SStructMatrixDestroy(G);
1099 HYPRE_SStructVectorDestroy(xcoord);
1100 HYPRE_SStructVectorDestroy(ycoord);
1101 HYPRE_SStructVectorDestroy(zcoord);
1102
1103 /* Finalize HYPRE */
1104 HYPRE_Finalize();
1105
1106 /* Finalize MPI */
1107 MPI_Finalize();
1108
1109 return 0;
1110 }
1111