1 /*
2 * -----------------------------------------------------------------
3 * Programmer(s): Daniel R. Reynolds @ SMU
4 * Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5 * -----------------------------------------------------------------
6 * SUNDIALS Copyright Start
7 * Copyright (c) 2002-2021, Lawrence Livermore National Security
8 * and Southern Methodist University.
9 * All rights reserved.
10 *
11 * See the top-level LICENSE and NOTICE files for details.
12 *
13 * SPDX-License-Identifier: BSD-3-Clause
14 * SUNDIALS Copyright End
15 * -----------------------------------------------------------------
16 * Example problem for IDA: 2D heat equation, parallel, GMRES.
17 *
18 * This example solves a discretized 2D heat equation problem.
19 * This version uses the Krylov solver SUNLinSol_SPGMR.
20 *
21 * The DAE system solved is a spatial discretization of the PDE
22 * du/dt = d^2u/dx^2 + d^2u/dy^2
23 * on the unit square. The boundary condition is u = 0 on all edges.
24 * Initial conditions are given by u = 16 x (1 - x) y (1 - y).
25 * The PDE is treated with central differences on a uniform MX x MY
26 * grid. The values of u at the interior points satisfy ODEs, and
27 * equations u = 0 at the boundaries are appended, to form a DAE
28 * system of size N = MX * MY. Here MX = MY = 10.
29 *
30 * The system is actually implemented on submeshes, processor by
31 * processor, with an MXSUB by MYSUB mesh on each of NPEX * NPEY
32 * processors.
33 *
34 * The system is solved with IDA using the Krylov linear solver
35 * SUNLinSol_SPGMR. The preconditioner uses the diagonal elements of the
36 * Jacobian only. Routines for preconditioning, required by
37 * SUNLinSol_SPGMR, are supplied here. The constraints u >= 0 are posed
38 * for all components. Local error testing on the boundary values
39 * is suppressed. Output is taken at t = 0, .01, .02, .04,
40 * ..., 10.24.
41 * -----------------------------------------------------------------
42 */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <math.h>
47
48 #include <ida/ida.h>
49 #include <sunlinsol/sunlinsol_spgmr.h>
50 #include <nvector/nvector_parallel.h>
51 #include <sundials/sundials_types.h>
52
53 #include <mpi.h>
54
55 #define ZERO RCONST(0.0)
56 #define ONE RCONST(1.0)
57 #define TWO RCONST(2.0)
58
59 #define NOUT 11 /* Number of output times */
60
61 #define NPEX 2 /* No. PEs in x direction of PE array */
62 #define NPEY 2 /* No. PEs in y direction of PE array */
63 /* Total no. PEs = NPEX*NPEY */
64 #define MXSUB 5 /* No. x points per subgrid */
65 #define MYSUB 5 /* No. y points per subgrid */
66
67 /* Global spatial mesh is MX x MY = (NPEX x MXSUB) x (NPEY x MYSUB) */
68
69 typedef struct {
70 int thispe, npex, npey, ixsub, jysub;
71 sunindextype mx, my, mxsub, mysub;
72 realtype dx, dy, coeffx, coeffy, coeffxy;
73 realtype *uext;
74 realtype *send_buff;
75 realtype *recv_buff;
76 N_Vector pp; /* vector of diagonal preconditioner elements */
77 MPI_Comm comm;
78 } *UserData;
79
80 /* User-supplied residual function and supporting routines */
81
82 int resHeat(realtype tt, N_Vector uu, N_Vector up,
83 N_Vector rr, void *user_data);
84
85 static int rescomm(N_Vector uu, N_Vector up, UserData data);
86
87 static int reslocal(realtype tt, N_Vector uu, N_Vector up,
88 N_Vector res, UserData data);
89
90 static int BSend(MPI_Comm comm, int thispe,
91 int ixsub, int jysub, int npex, int npey,
92 sunindextype mxsub, sunindextype mysub,
93 const realtype uarray[], realtype* send_buffer);
94
95 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
96 int ixsub, int jysub, int npex, int npey,
97 sunindextype mxsub, sunindextype mysub,
98 realtype recv_buff[]);
99
100 static int BRecvWait(MPI_Request request[],
101 int ixsub, int jysub, int npex, int npey,
102 sunindextype mxsub, sunindextype mysub,
103 realtype uext[], const realtype recv_buff[]);
104
105 /* User-supplied preconditioner routines */
106
107 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
108 N_Vector rvec, N_Vector zvec, realtype c_j,
109 realtype delta, void *user_data);
110
111 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
112 realtype c_j, void *user_data);
113
114 /* Private function to check function return values */
115
116 static int InitUserData(int thispe, MPI_Comm comm, UserData data);
117
118 static int AllocUserData(int thispe, MPI_Comm comm, N_Vector uu, UserData data);
119
120 static int DeleteUserData(UserData data);
121
122 static int SetInitialProfile(N_Vector uu, N_Vector up, N_Vector id,
123 N_Vector res, UserData data);
124
125 static void PrintHeader(realtype rtol, realtype atol, UserData data);
126
127 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu);
128
129 static void PrintFinalStats(void *ida_mem);
130
131 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
132
133 /*
134 *--------------------------------------------------------------------
135 * MAIN PROGRAM
136 *--------------------------------------------------------------------
137 */
138
main(int argc,char * argv[])139 int main(int argc, char *argv[])
140 {
141 MPI_Comm comm;
142 void *ida_mem;
143 SUNLinearSolver LS;
144 UserData data;
145 int iout, thispe, retval, npes;
146 sunindextype Neq, local_N;
147 realtype rtol, atol, t0, t1, tout, tret;
148 N_Vector uu, up, constraints, id, res;
149
150 ida_mem = NULL;
151 LS = NULL;
152 data = NULL;
153 uu = up = constraints = id = res = NULL;
154
155 /* Get processor number and total number of pe's. */
156
157 MPI_Init(&argc, &argv);
158 comm = MPI_COMM_WORLD;
159 MPI_Comm_size(comm, &npes);
160 MPI_Comm_rank(comm, &thispe);
161
162 /* Allocate and initialize the data structure */
163 data = (UserData) malloc(sizeof *data);
164 if(check_retval((void *)data, "malloc", 2, thispe))
165 MPI_Abort(comm, 1);
166
167 InitUserData(thispe, comm, data);
168
169 /* Check if the number of MPI processes matches the number of subgrids */
170 if (npes != (data->npex * data->npey)) {
171 if (thispe == 0)
172 fprintf(stderr,
173 "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
174 npes, data->npex * data->npey);
175 free(data);
176 MPI_Finalize();
177 return(1);
178 }
179
180 /* Set local length local_N and global length Neq. */
181
182 local_N = data->mxsub * data->mysub;
183 Neq = data->mx * data->my;
184
185 /* Allocate and initialize N-vectors. */
186
187 uu = N_VNew_Parallel(comm, local_N, Neq);
188 if(check_retval((void *)uu, "N_VNew_Parallel", 0, thispe))
189 MPI_Abort(comm, 1);
190
191 up = N_VClone(uu);
192 if(check_retval((void *)up, "N_VClone", 0, thispe))
193 MPI_Abort(comm, 1);
194
195 res = N_VClone(uu);
196 if(check_retval((void *)res, "N_VClone", 0, thispe))
197 MPI_Abort(comm, 1);
198
199 constraints = N_VClone(uu);
200 if(check_retval((void *)constraints, "N_VClone", 0, thispe))
201 MPI_Abort(comm, 1);
202
203 id = N_VClone(uu);
204 if(check_retval((void *)id, "N_VClone", 0, thispe))
205 MPI_Abort(comm, 1);
206
207 /* Allocate user data extended vector and MPI buffers */
208 retval = AllocUserData(thispe, comm, uu, data);
209 if(check_retval(&retval, "AllocUserData", 1, thispe)) MPI_Abort(comm, 1);
210
211 /* Initialize the uu, up, id, and res profiles. */
212 SetInitialProfile(uu, up, id, res, data);
213
214 /* Set constraints to all 1's for nonnegative solution values. */
215 N_VConst(ONE, constraints);
216
217 t0 = ZERO; t1 = RCONST(0.01);
218
219 /* Scalar relative and absolute tolerance. */
220 rtol = ZERO;
221 atol = RCONST(1.0e-3);
222
223 /* Call IDACreate and IDAMalloc to initialize solution. */
224
225 ida_mem = IDACreate();
226 if(check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
227
228 retval = IDASetUserData(ida_mem, data);
229 if(check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
230
231 retval = IDASetSuppressAlg(ida_mem, SUNTRUE);
232 if(check_retval(&retval, "IDASetSuppressAlg", 1, thispe)) MPI_Abort(comm, 1);
233
234 retval = IDASetId(ida_mem, id);
235 if(check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
236
237 retval = IDASetConstraints(ida_mem, constraints);
238 if(check_retval(&retval, "IDASetConstraints", 1, thispe)) MPI_Abort(comm, 1);
239 N_VDestroy(constraints);
240
241 retval = IDAInit(ida_mem, resHeat, t0, uu, up);
242 if(check_retval(&retval, "IDAInit", 1, thispe)) MPI_Abort(comm, 1);
243
244 retval = IDASStolerances(ida_mem, rtol, atol);
245 if(check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
246
247 /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver. */
248
249 LS = SUNLinSol_SPGMR(uu, PREC_LEFT, 0); /* use default maxl */
250 if(check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
251
252 retval = IDASetLinearSolver(ida_mem, LS, NULL);
253 if(check_retval(&retval, "IDASetLinearSolver", 1, thispe)) MPI_Abort(comm, 1);
254
255 retval = IDASetPreconditioner(ida_mem, PsetupHeat, PsolveHeat);
256 if(check_retval(&retval, "IDASetPreconditioner", 1, thispe)) MPI_Abort(comm, 1);
257
258 /* Print output heading (on processor 0 only) and intial solution */
259
260 if (thispe == 0) PrintHeader(rtol, atol, data);
261 PrintOutput(thispe, ida_mem, t0, uu);
262
263 /* Loop over tout, call IDASolve, print output. */
264
265 for (tout = t1, iout = 1; iout <= NOUT; iout++, tout *= TWO) {
266
267 retval = IDASolve(ida_mem, tout, &tret, uu, up, IDA_NORMAL);
268 if(check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
269
270 PrintOutput(thispe, ida_mem, tret, uu);
271
272 }
273
274 /* Print remaining counters. */
275
276 if (thispe == 0) PrintFinalStats(ida_mem);
277
278 /* Free memory */
279
280 IDAFree(&ida_mem);
281 SUNLinSolFree(LS);
282
283 N_VDestroy(id);
284 N_VDestroy(res);
285 N_VDestroy(up);
286 N_VDestroy(uu);
287
288 DeleteUserData(data);
289 free(data);
290
291 MPI_Finalize();
292
293 return(0);
294
295 }
296
297 /*
298 *--------------------------------------------------------------------
299 * FUNCTIONS CALLED BY IDA
300 *--------------------------------------------------------------------
301 */
302
303 /*
304 * resHeat: heat equation system residual function
305 * This uses 5-point central differencing on the interior points, and
306 * includes algebraic equations for the boundary values.
307 * So for each interior point, the residual component has the form
308 * res_i = u'_i - (central difference)_i
309 * while for each boundary point, it is res_i = u_i.
310 *
311 * This parallel implementation uses several supporting routines.
312 * First a call is made to rescomm to do communication of subgrid boundary
313 * data into array uext. Then reslocal is called to compute the residual
314 * on individual processors and their corresponding domains. The routines
315 * BSend, BRecvPost, and BREcvWait handle interprocessor communication
316 * of uu required to calculate the residual.
317 */
318
resHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,void * user_data)319 int resHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
320 void *user_data)
321 {
322 int retval = 0;
323
324 /* Call rescomm to do inter-processor communication. */
325 retval = rescomm(uu, up, user_data);
326
327 /* Call reslocal to calculate res. */
328 retval = reslocal(tt, uu, up, rr, user_data);
329
330 return(retval);
331
332 }
333
334 /*
335 * PsetupHeat: setup for diagonal preconditioner for heatsk.
336 *
337 * The optional user-supplied functions PsetupHeat and
338 * PsolveHeat together must define the left preconditoner
339 * matrix P approximating the system Jacobian matrix
340 * J = dF/du + cj*dF/du'
341 * (where the DAE system is F(t,u,u') = 0), and solve the linear
342 * systems P z = r. This is done in this case by keeping only
343 * the diagonal elements of the J matrix above, storing them as
344 * inverses in a vector pp, when computed in PsetupHeat, for
345 * subsequent use in PsolveHeat.
346 *
347 * In this instance, only cj and data (user data structure, with
348 * pp etc.) are used from the PsetupHeat argument list.
349 *
350 */
351
PsetupHeat(realtype tt,N_Vector yy,N_Vector yp,N_Vector rr,realtype c_j,void * user_data)352 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
353 realtype c_j, void *user_data)
354 {
355 sunindextype lx, ly, ixbegin, ixend, jybegin, jyend, locu;
356
357 /* Unwrap the user data */
358 UserData data = (UserData) user_data;
359 const int ixsub = data->ixsub;
360 const int jysub = data->jysub;
361 const int npex = data->npex;
362 const int npey = data->npey;
363 const sunindextype mxsub = data->mxsub;
364 const sunindextype mysub = data->mysub;
365 realtype *ppv = N_VGetArrayPointer(data->pp);
366
367 /* Calculate the value for the inverse of the diagonal preconditioner */
368 const realtype pelinv = ONE/(c_j + data->coeffxy);
369
370 /* Initially set all pp elements to one. */
371 N_VConst(ONE, data->pp);
372
373 /* Prepare to loop over subgrid. */
374 ixbegin = 0;
375 ixend = mxsub-1;
376 jybegin = 0;
377 jyend = mysub-1;
378 if (ixsub == 0) ixbegin++;
379 if (ixsub == npex-1) ixend--;
380 if (jysub == 0) jybegin++;
381 if (jysub == npey-1) jyend--;
382
383 /* Load the inverse of the preconditioner diagonal elements
384 in loop over all the local subgrid. */
385
386 for (ly = jybegin; ly <=jyend; ly++) {
387 for (lx = ixbegin; lx <= ixend; lx++) {
388 locu = lx + ly*mxsub;
389 ppv[locu] = pelinv;
390 }
391 }
392
393 return(0);
394
395 }
396
397 /*
398 * PsolveHeat: solve preconditioner linear system.
399 * This routine multiplies the input vector rvec by the vector pp
400 * containing the inverse diagonal Jacobian elements (previously
401 * computed in PsetupHeat), returning the result in zvec.
402 */
403
PsolveHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,N_Vector rvec,N_Vector zvec,realtype c_j,realtype delta,void * user_data)404 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up,
405 N_Vector rr, N_Vector rvec, N_Vector zvec,
406 realtype c_j, realtype delta, void *user_data)
407 {
408 UserData data = (UserData) user_data;
409
410 N_VProd(data->pp, rvec, zvec);
411
412 return(0);
413
414 }
415
416 /*
417 *--------------------------------------------------------------------
418 * SUPPORTING FUNCTIONS
419 *--------------------------------------------------------------------
420 */
421
422
423 /*
424 * rescomm routine. This routine performs all inter-processor
425 * communication of data in u needed to calculate G.
426 */
427
rescomm(N_Vector uu,N_Vector up,UserData data)428 static int rescomm(N_Vector uu, N_Vector up, UserData data)
429 {
430 MPI_Request request[4];
431
432 /* Get comm, thispe, subgrid indices, data sizes */
433 MPI_Comm comm = data->comm;
434 const int thispe = data->thispe;
435 const int ixsub = data->ixsub;
436 const int jysub = data->jysub;
437 const int npex = data->npex;
438 const int npey = data->npey;
439 const sunindextype mxsub = data->mxsub;
440 const sunindextype mysub = data->mysub;
441
442 /* Get solution vector data, buffers, extended array uext. */
443 const realtype *uarray = N_VGetArrayPointer(uu);
444 realtype *uext = data->uext;
445 realtype *send_buffer = data->send_buff;
446 realtype *recv_buff = data->recv_buff;
447
448 /* Start receiving boundary data from neighboring PEs. */
449 BRecvPost(comm, request, thispe, ixsub, jysub, npex, npey, mxsub, mysub, recv_buff);
450
451 /* Send data from boundary of local grid to neighboring PEs. */
452 BSend(comm, thispe, ixsub, jysub, npex, npey, mxsub, mysub, uarray, send_buffer);
453
454 /* Finish receiving boundary data from neighboring PEs. */
455 BRecvWait(request, ixsub, jysub, npex, npey, mxsub, mysub, uext, recv_buff);
456
457 return(0);
458
459 }
460
461 /*
462 * reslocal routine. Compute res = F(t, uu, up). This routine assumes
463 * that all inter-processor communication of data needed to calculate F
464 * has already been done, and that this data is in the work array uext.
465 */
466
reslocal(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,UserData data)467 static int reslocal(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
468 UserData data)
469 {
470 realtype termx, termy, termctr;
471 sunindextype lx, ly;
472 sunindextype locu, locue;
473 sunindextype ixbegin, ixend, jybegin, jyend;
474
475 /* Get subgrid indices, array sizes */
476 const int ixsub = data->ixsub;
477 const int jysub = data->jysub;
478 const int npex = data->npex;
479 const int npey = data->npey;
480 const sunindextype mxsub = data->mxsub;
481 const sunindextype mxsub2 = data->mxsub + 2;
482 const sunindextype mysub = data->mysub;
483
484 /* Vector data arrays, extended work array uext. */
485 const realtype *uuv = N_VGetArrayPointer(uu);
486 const realtype *upv = N_VGetArrayPointer(up);
487 realtype *resv = N_VGetArrayPointer(rr);
488 realtype *uext = data->uext;
489
490 /* Initialize all elements of rr to uu. This sets the boundary
491 elements simply without indexing hassles. */
492
493 N_VScale(ONE, uu, rr);
494
495 /* Copy local segment of u vector into the working extended array uext.
496 This completes uext prior to the computation of the rr vector. */
497
498 for (ly = 0; ly < mysub; ly++) {
499 for (lx = 0; lx < mxsub; lx++) {
500 uext[mxsub2*(ly+1) + (lx+1)] = uuv[mxsub*ly + lx];
501 }
502 }
503
504 /* Set loop limits for the interior of the local subgrid. */
505
506 ixbegin = 0;
507 ixend = mxsub-1;
508 jybegin = 0;
509 jyend = mysub-1;
510 if (ixsub == 0) ixbegin++;
511 if (ixsub == npex-1) ixend--;
512 if (jysub == 0) jybegin++;
513 if (jysub == npey-1) jyend--;
514
515 /* Loop over all grid points in local subgrid. */
516
517 for (ly = jybegin; ly <=jyend; ly++) {
518 for (lx = ixbegin; lx <= ixend; lx++) {
519 locu = lx + ly*mxsub;
520 locue = (lx+1) + (ly+1)*mxsub2;
521 termx = data->coeffx *(uext[locue-1] + uext[locue+1]);
522 termy = data->coeffy *(uext[locue-mxsub2] + uext[locue+mxsub2]);
523 termctr = data->coeffxy*uext[locue];
524 resv[locu] = upv[locu] - (termx + termy - termctr);
525 }
526 }
527 return(0);
528
529 }
530
531 /*
532 * Routine to send boundary data to neighboring PEs.
533 */
534
BSend(MPI_Comm comm,int thispe,int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,const realtype uarray[],realtype * send_buffer)535 static int BSend(MPI_Comm comm, int thispe,
536 int ixsub, int jysub, int npex, int npey,
537 sunindextype mxsub, sunindextype mysub,
538 const realtype uarray[], realtype *send_buffer)
539 {
540 sunindextype lx, ly;
541 /* Have left, right, top and bottom buffers use the same send_buffer. */
542 realtype *bufleft = send_buffer;
543 realtype *bufright = send_buffer + mysub;
544 realtype *buftop = send_buffer + 2*mysub;
545 realtype *bufbottom = send_buffer + 2*mysub + mxsub;
546
547 /* If jysub > 0, send data from bottom x-line of u. (via bufbottom) */
548
549 if (jysub != 0) {
550 for (lx = 0; lx < mxsub; ++lx) {
551 bufbottom[lx] = uarray[lx];
552 }
553 MPI_Send(bufbottom, (int) mxsub, MPI_SUNREALTYPE, thispe-npex, 0, comm);
554 }
555
556 /* If jysub < NPEY-1, send data from top x-line of u. (via buftop) */
557
558 if (jysub != npey-1) {
559 for (lx = 0; lx < mxsub; ++lx) {
560 buftop[lx] = uarray[(mysub-1)*mxsub + lx];
561 }
562 MPI_Send(buftop, (int) mxsub, MPI_SUNREALTYPE, thispe+npex, 0, comm);
563 }
564
565 /* If ixsub > 0, send data from left y-line of u (via bufleft). */
566
567 if (ixsub != 0) {
568 for (ly = 0; ly < mysub; ly++) {
569 bufleft[ly] = uarray[ly*mxsub];
570 }
571 MPI_Send(bufleft, (int) mysub, MPI_SUNREALTYPE, thispe-1, 0, comm);
572 }
573
574 /* If ixsub < NPEX-1, send data from right y-line of u (via bufright). */
575
576 if (ixsub != npex-1) {
577 for (ly = 0; ly < mysub; ly++) {
578 bufright[ly] = uarray[ly*mxsub + (mxsub-1)];
579 }
580 MPI_Send(bufright, (int) mysub, MPI_SUNREALTYPE, thispe+1, 0, comm);
581 }
582
583 return(0);
584
585 }
586
587 /*
588 * Routine to start receiving boundary data from neighboring PEs.
589 * Notes:
590 * 1) buffer should be able to hold 2*MYSUB realtype entries, should be
591 * passed to both the BRecvPost and BRecvWait functions, and should not
592 * be manipulated between the two calls.
593 * 2) request should have 4 entries, and should be passed in
594 * both calls also.
595 */
596
BRecvPost(MPI_Comm comm,MPI_Request request[],int thispe,int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,realtype recv_buff[])597 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
598 int ixsub, int jysub, int npex, int npey,
599 sunindextype mxsub, sunindextype mysub,
600 realtype recv_buff[])
601 {
602 /* Have left, right, top and bottom buffers use the same recv_buff. */
603 realtype *bufleft = recv_buff;
604 realtype *bufright = recv_buff + mysub;
605 realtype *buftop = recv_buff + 2*mysub;
606 realtype *bufbottom = recv_buff + 2*mysub + mxsub;
607
608 /* If jysub > 0, receive data for bottom x-line of uext. */
609 if (jysub != 0) {
610 MPI_Irecv(bufbottom, (int) mxsub, MPI_SUNREALTYPE,
611 thispe-npex, 0, comm, &request[0]);
612 }
613
614 /* If jysub < NPEY-1, receive data for top x-line of uext. */
615 if (jysub != npey-1) {
616 MPI_Irecv(buftop, (int) mxsub, MPI_SUNREALTYPE,
617 thispe+npex, 0, comm, &request[1]);
618 }
619
620 /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
621 if (ixsub != 0) {
622 MPI_Irecv(&bufleft[0], (int) mysub, MPI_SUNREALTYPE,
623 thispe-1, 0, comm, &request[2]);
624 }
625
626 /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
627 if (ixsub != npex-1) {
628 MPI_Irecv(&bufright[0], (int) mysub, MPI_SUNREALTYPE,
629 thispe+1, 0, comm, &request[3]);
630 }
631
632 return(0);
633
634 }
635
636 /*
637 * Routine to finish receiving boundary data from neighboring PEs.
638 * Notes:
639 * 1) buffer should be able to hold 2*MYSUB realtype entries, should be
640 * passed to both the BRecvPost and BRecvWait functions, and should not
641 * be manipulated between the two calls.
642 * 2) request should have four entries, and should be passed in both
643 * calls also.
644 */
645
BRecvWait(MPI_Request request[],int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,realtype uext[],const realtype recv_buff[])646 static int BRecvWait(MPI_Request request[], int ixsub, int jysub,
647 int npex, int npey,
648 sunindextype mxsub, sunindextype mysub,
649 realtype uext[], const realtype recv_buff[])
650 {
651 MPI_Status status;
652 const realtype *bufleft = recv_buff;
653 const realtype *bufright = recv_buff + mysub;
654 const realtype *buftop = recv_buff + 2*mysub;
655 const realtype *bufbottom = recv_buff + 2*mysub + mxsub;
656 sunindextype ly, lx, offsetue;
657 const sunindextype mxsub2 = mxsub + 2;
658 const sunindextype mysub1 = mysub + 1;
659
660 /* If jysub > 0, receive data for bottom x-line of uext. */
661 if (jysub != 0) {
662 MPI_Wait(&request[0], &status);
663
664 /* Copy the recv_buff to uext. */
665 for (lx = 0; lx < mxsub; lx++) {
666 offsetue = 1 + lx;
667 uext[offsetue] = bufbottom[lx];
668 }
669 }
670
671 /* If jysub < NPEY-1, receive data for top x-line of uext. */
672 if (jysub != npey-1) {
673 MPI_Wait(&request[1], &status);
674
675 /* Copy the recv_buff to uext. */
676 for (lx = 0; lx < mxsub; lx++) {
677 offsetue = (1 + mysub1*mxsub2) + lx;
678 uext[offsetue] = buftop[lx];
679 }
680 }
681
682 /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
683 if (ixsub != 0) {
684 MPI_Wait(&request[2], &status);
685
686 /* Copy the recv_buff to uext. */
687 for (ly = 0; ly < mysub; ly++) {
688 offsetue = (ly+1)*mxsub2;
689 uext[offsetue] = bufleft[ly];
690 }
691 }
692
693 /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
694 if (ixsub != npex-1) {
695 MPI_Wait(&request[3], &status);
696
697 /* Copy the recv_buff to uext */
698 for (ly = 0; ly < mysub; ly++) {
699 offsetue = (ly+2)*mxsub2 - 1;
700 uext[offsetue] = bufright[ly];
701 }
702 }
703
704 return(0);
705
706 }
707
708 /*
709 *--------------------------------------------------------------------
710 * PRIVATE FUNCTIONS
711 *--------------------------------------------------------------------
712 */
713
714 /*
715 * InitUserData initializes the user's data block data.
716 */
717
InitUserData(int thispe,MPI_Comm comm,UserData data)718 static int InitUserData(int thispe, MPI_Comm comm, UserData data)
719 {
720 data->comm = comm;
721 data->thispe = thispe;
722 data->npex = NPEX; /* Number of subgrids in x-direction */
723 data->npey = NPEY; /* Number of subgrids in y-direction */
724 data->mxsub = MXSUB; /* Number of subgrid mesh points in x-direction */
725 data->mysub = MYSUB; /* Number of subgrid mesh points in y-direction */
726 data->jysub = thispe/data->npex;
727 data->ixsub = thispe - (data->jysub * data->npex);
728 data->mx = data->npex * data->mxsub; /* Mesh size in x-direction */
729 data->my = data->npey * data->mysub; /* Mesh size in y-direction */
730 data->dx = ONE/(data->mx-ONE); /* Assumes a [0,1] interval in x. */
731 data->dy = ONE/(data->my-ONE); /* Assumes a [0,1] interval in y. */
732 data->coeffx = ONE/(data->dx * data->dx);
733 data->coeffy = ONE/(data->dy * data->dy);
734 data->coeffxy = TWO/(data->dx * data->dx) + TWO/(data->dy * data->dy);
735
736 data->uext =NULL;
737 data->send_buff = NULL;
738 data->recv_buff = NULL;
739
740 return(0);
741 }
742
743
744 /*
745 * AllocUserData allocates memory for the extended vector uext
746 * and MPI communication buffers.
747 */
748
AllocUserData(int thispe,MPI_Comm comm,N_Vector uu,UserData data)749 static int AllocUserData(int thispe, MPI_Comm comm, N_Vector uu, UserData data)
750 {
751 /* An N-vector to hold preconditioner. */
752 data->pp = N_VClone(uu);
753 if(data->pp == NULL) {
754 MPI_Abort(comm, 1);
755 return -1;
756 }
757
758 /* Allocate local extended vector (includes ghost nodes) */
759 data->uext = (realtype*) malloc((data->mxsub + 2)*(data->mysub +2)*sizeof(realtype));
760 if(data->uext == NULL) {
761 N_VDestroy(data->pp);
762 MPI_Abort(comm, 1);
763 return -1;
764 }
765
766 /* Allocate local host send buffer */
767 data->send_buff = (realtype*) malloc(2*(data->mxsub + data->mysub)*sizeof(realtype));
768 if(data->send_buff == NULL) {
769 N_VDestroy(data->pp);
770 free(data->uext);
771 MPI_Abort(comm, 1);
772 return -1;
773 }
774
775 data->recv_buff = (realtype*) malloc(2*(data->mxsub + data->mysub)*sizeof(realtype));
776 if(data->recv_buff == NULL) {
777 N_VDestroy(data->pp);
778 free(data->uext);
779 free(data->send_buff);
780 MPI_Abort(comm, 1);
781 return -1;
782 }
783
784 return 0;
785 }
786
787
DeleteUserData(UserData data)788 static int DeleteUserData(UserData data)
789 {
790 if (data->pp != NULL)
791 N_VDestroy(data->pp);
792 if (data->uext != NULL)
793 free(data->uext);
794 if (data->send_buff != NULL)
795 free(data->send_buff);
796 if (data->recv_buff != NULL)
797 free(data->recv_buff);
798 return 0;
799 }
800
801 /*
802 * SetInitialProfile sets the initial values for the problem.
803 */
804
SetInitialProfile(N_Vector uu,N_Vector up,N_Vector id,N_Vector res,UserData data)805 static int SetInitialProfile(N_Vector uu, N_Vector up, N_Vector id,
806 N_Vector res, UserData data)
807 {
808 sunindextype i, iloc, j, jloc, loc;
809 realtype xfact, yfact;
810
811 /* Initialize uu. */
812
813 realtype *uudata = N_VGetArrayPointer(uu);
814 realtype *iddata = N_VGetArrayPointer(id);
815
816 /* Set mesh spacings and subgrid indices for this PE. */
817 const realtype dx = data->dx;
818 const realtype dy = data->dy;
819 const int ixsub = data->ixsub;
820 const int jysub = data->jysub;
821
822 /* Set beginning and ending locations in the global array corresponding
823 to the portion of that array assigned to this processor. */
824 const sunindextype mxsub = data->mxsub;
825 const sunindextype mysub = data->mysub;
826 const sunindextype ixbegin = mxsub*ixsub;
827 const sunindextype ixend = mxsub*(ixsub+1) - 1;
828 const sunindextype jybegin = mysub*jysub;
829 const sunindextype jyend = mysub*(jysub+1) - 1;
830
831 /* Loop over the local array, computing the initial profile value.
832 The global indices are (i,j) and the local indices are (iloc,jloc).
833 Also set the id vector to zero for boundary points, one otherwise. */
834
835 N_VConst(ONE, id);
836 for (j = jybegin, jloc = 0; j <= jyend; j++, jloc++) {
837 yfact = dy*j;
838 for (i = ixbegin, iloc = 0; i <= ixend; i++, iloc++) {
839 xfact = dx*i;
840 loc = iloc + jloc*mxsub;
841 uudata[loc] = RCONST(16.0) * xfact * (ONE - xfact) * yfact * (ONE - yfact);
842
843 if (i == 0 || i == data->mx - 1 || j == 0 || j == data->my - 1)
844 iddata[loc] = ZERO;
845 }
846 }
847
848 /* Initialize up. */
849
850 N_VConst(ZERO, up); /* Initially set up = 0. */
851
852 /* resHeat sets res to negative of ODE RHS values at interior points. */
853 resHeat(ZERO, uu, up, res, data);
854
855 /* Copy -res into up to get correct initial up values. */
856 N_VScale(-ONE, res, up);
857
858 return(0);
859 }
860
861 /*
862 * Print first lines of output and table heading
863 */
864
PrintHeader(realtype rtol,realtype atol,UserData data)865 static void PrintHeader(realtype rtol, realtype atol, UserData data)
866 {
867 printf("\nidaHeat2D_kry_p: Heat equation, parallel example problem for IDA\n");
868 printf(" Discretized heat equation on 2D unit square.\n");
869 printf(" Zero boundary conditions,");
870 printf(" polynomial initial conditions.\n");
871 printf(" Mesh dimensions: %d x %d", (int) data->mx, (int) data->my);
872 printf(" Total system size: %ld\n\n", (long) (data->mx * data->my));
873 printf("Subgrid dimensions: %d x %d", (int) data->mxsub, (int) data->mysub);
874 printf(" Processor array: %d x %d\n", (int) data->npex, (int) data->npey);
875 #if defined(SUNDIALS_EXTENDED_PRECISION)
876 printf("Tolerance parameters: rtol = %Lg atol = %Lg\n", rtol, atol);
877 #elif defined(SUNDIALS_DOUBLE_PRECISION)
878 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
879 #else
880 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
881 #endif
882 printf("Constraints set to force all solution components >= 0. \n");
883 printf("SUPPRESSALG = SUNTRUE to suppress local error testing on ");
884 printf("all boundary components. \n");
885 printf("Linear solver: SUNLinSol_SPGMR ");
886 printf("Preconditioner: diagonal elements only.\n");
887
888 /* Print output table heading and initial line of table. */
889 printf("\n Output Summary (umax = max-norm of solution) \n\n");
890 printf(" time umax k nst nni nli nre nreLS h npe nps\n");
891 printf("----------------------------------------------------------------------\n");
892 }
893
894 /*
895 * PrintOutput: print max norm of solution and current solver statistics
896 */
897
PrintOutput(int id,void * ida_mem,realtype t,N_Vector uu)898 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu)
899 {
900 realtype hused, umax;
901 long int nst, nni, nje, nre, nreLS, nli, npe, nps;
902 int kused, retval;
903
904 umax = N_VMaxNorm(uu);
905
906 if (id == 0) {
907
908 retval = IDAGetLastOrder(ida_mem, &kused);
909 check_retval(&retval, "IDAGetLastOrder", 1, id);
910 retval = IDAGetNumSteps(ida_mem, &nst);
911 check_retval(&retval, "IDAGetNumSteps", 1, id);
912 retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
913 check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, id);
914 retval = IDAGetNumResEvals(ida_mem, &nre);
915 check_retval(&retval, "IDAGetNumResEvals", 1, id);
916 retval = IDAGetLastStep(ida_mem, &hused);
917 check_retval(&retval, "IDAGetLastStep", 1, id);
918 retval = IDAGetNumJtimesEvals(ida_mem, &nje);
919 check_retval(&retval, "IDAGetNumJtimesEvals", 1, id);
920 retval = IDAGetNumLinIters(ida_mem, &nli);
921 check_retval(&retval, "IDAGetNumLinIters", 1, id);
922 retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
923 check_retval(&retval, "IDAGetNumLinResEvals", 1, id);
924 retval = IDAGetNumPrecEvals(ida_mem, &npe);
925 check_retval(&retval, "IDAGetNumPrecEvals", 1, id);
926 retval = IDAGetNumPrecSolves(ida_mem, &nps);
927 check_retval(&retval, "IDAGetNumPrecSolves", 1, id);
928
929 #if defined(SUNDIALS_EXTENDED_PRECISION)
930 printf(" %5.2Lf %13.5Le %d %3ld %3ld %3ld %4ld %4ld %9.2Le %3ld %3ld\n",
931 t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
932 #elif defined(SUNDIALS_DOUBLE_PRECISION)
933 printf(" %5.2f %13.5e %d %3ld %3ld %3ld %4ld %4ld %9.2e %3ld %3ld\n",
934 t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
935 #else
936 printf(" %5.2f %13.5e %d %3ld %3ld %3ld %4ld %4ld %9.2e %3ld %3ld\n",
937 t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
938 #endif
939
940 }
941 }
942
943 /*
944 * Print some final integrator statistics
945 */
946
PrintFinalStats(void * ida_mem)947 static void PrintFinalStats(void *ida_mem)
948 {
949 long int netf, ncfn, ncfl;
950
951 IDAGetNumErrTestFails(ida_mem, &netf);
952 IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
953 IDAGetNumLinConvFails(ida_mem, &ncfl);
954
955 printf("\nError test failures = %ld\n", netf);
956 printf("Nonlinear convergence failures = %ld\n", ncfn);
957 printf("Linear convergence failures = %ld\n", ncfl);
958 }
959
960 /*
961 * Check function return value...
962 * opt == 0 means SUNDIALS function allocates memory so check if
963 * returned NULL pointer
964 * opt == 1 means SUNDIALS function returns an integer value so check if
965 * retval < 0
966 * opt == 2 means function allocates memory so check if returned
967 * NULL pointer
968 */
969
check_retval(void * returnvalue,const char * funcname,int opt,int id)970 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
971 {
972 int *retval;
973
974 if (opt == 0 && returnvalue == NULL) {
975 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
976 fprintf(stderr,
977 "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
978 id, funcname);
979 return(1);
980 } else if (opt == 1) {
981 /* Check if retval < 0 */
982 retval = (int *) returnvalue;
983 if (*retval < 0) {
984 fprintf(stderr,
985 "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
986 id, funcname, *retval);
987 return(1);
988 }
989 } else if (opt == 2 && returnvalue == NULL) {
990 /* Check if function returned NULL pointer - no memory allocated */
991 fprintf(stderr,
992 "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
993 id, funcname);
994 return(1);
995 }
996
997 return(0);
998 }
999