1 /*
2  * -----------------------------------------------------------------
3  * Programmer(s): Daniel R. Reynolds @ SMU
4  *         Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5  * -----------------------------------------------------------------
6  * SUNDIALS Copyright Start
7  * Copyright (c) 2002-2021, Lawrence Livermore National Security
8  * and Southern Methodist University.
9  * All rights reserved.
10  *
11  * See the top-level LICENSE and NOTICE files for details.
12  *
13  * SPDX-License-Identifier: BSD-3-Clause
14  * SUNDIALS Copyright End
15  * -----------------------------------------------------------------
16  * Example problem for IDA: 2D heat equation, parallel, GMRES.
17  *
18  * This example solves a discretized 2D heat equation problem.
19  * This version uses the Krylov solver SUNLinSol_SPGMR.
20  *
21  * The DAE system solved is a spatial discretization of the PDE
22  *          du/dt = d^2u/dx^2 + d^2u/dy^2
23  * on the unit square. The boundary condition is u = 0 on all edges.
24  * Initial conditions are given by u = 16 x (1 - x) y (1 - y).
25  * The PDE is treated with central differences on a uniform MX x MY
26  * grid. The values of u at the interior points satisfy ODEs, and
27  * equations u = 0 at the boundaries are appended, to form a DAE
28  * system of size N = MX * MY. Here MX = MY = 10.
29  *
30  * The system is actually implemented on submeshes, processor by
31  * processor, with an MXSUB by MYSUB mesh on each of NPEX * NPEY
32  * processors.
33  *
34  * The system is solved with IDA using the Krylov linear solver
35  * SUNLinSol_SPGMR. The preconditioner uses the diagonal elements of the
36  * Jacobian only. Routines for preconditioning, required by
37  * SUNLinSol_SPGMR, are supplied here. The constraints u >= 0 are posed
38  * for all components. Local error testing on the boundary values
39  * is suppressed. Output is taken at t = 0, .01, .02, .04,
40  * ..., 10.24.
41  * -----------------------------------------------------------------
42  */
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <math.h>
47 
48 #include <ida/ida.h>
49 #include <sunlinsol/sunlinsol_spgmr.h>
50 #include <nvector/nvector_parallel.h>
51 #include <sundials/sundials_types.h>
52 
53 #include <mpi.h>
54 
55 #define ZERO  RCONST(0.0)
56 #define ONE   RCONST(1.0)
57 #define TWO   RCONST(2.0)
58 
59 #define NOUT         11    /* Number of output times */
60 
61 #define NPEX         2     /* No. PEs in x direction of PE array */
62 #define NPEY         2     /* No. PEs in y direction of PE array */
63                            /* Total no. PEs = NPEX*NPEY */
64 #define MXSUB        5     /* No. x points per subgrid */
65 #define MYSUB        5     /* No. y points per subgrid */
66 
67 /* Global spatial mesh is MX x MY = (NPEX x MXSUB) x (NPEY x MYSUB) */
68 
69 typedef struct {
70   int thispe, npex, npey, ixsub, jysub;
71   sunindextype mx, my, mxsub, mysub;
72   realtype     dx, dy, coeffx, coeffy, coeffxy;
73   realtype    *uext;
74   realtype    *send_buff;
75   realtype    *recv_buff;
76   N_Vector     pp;    /* vector of diagonal preconditioner elements */
77   MPI_Comm     comm;
78 } *UserData;
79 
80 /* User-supplied residual function and supporting routines */
81 
82 int resHeat(realtype tt, N_Vector uu, N_Vector up,
83             N_Vector rr, void *user_data);
84 
85 static int rescomm(N_Vector uu, N_Vector up, UserData data);
86 
87 static int reslocal(realtype tt, N_Vector uu, N_Vector up,
88                     N_Vector res,  UserData data);
89 
90 static int BSend(MPI_Comm comm, int thispe,
91                  int ixsub, int jysub, int npex, int npey,
92                  sunindextype mxsub, sunindextype mysub,
93                  const realtype uarray[], realtype* send_buffer);
94 
95 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
96                      int ixsub, int jysub, int npex, int npey,
97                      sunindextype mxsub, sunindextype mysub,
98                      realtype recv_buff[]);
99 
100 static int BRecvWait(MPI_Request request[],
101                      int ixsub, int jysub, int npex, int npey,
102                      sunindextype mxsub, sunindextype mysub,
103                      realtype uext[], const realtype recv_buff[]);
104 
105 /* User-supplied preconditioner routines */
106 
107 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
108                N_Vector rvec, N_Vector zvec, realtype c_j,
109                realtype delta, void *user_data);
110 
111 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
112                realtype c_j, void *user_data);
113 
114 /* Private function to check function return values */
115 
116 static int InitUserData(int thispe, MPI_Comm comm, UserData data);
117 
118 static int AllocUserData(int thispe, MPI_Comm comm, N_Vector uu, UserData data);
119 
120 static int DeleteUserData(UserData data);
121 
122 static int SetInitialProfile(N_Vector uu, N_Vector up, N_Vector id,
123                              N_Vector res, UserData data);
124 
125 static void PrintHeader(realtype rtol, realtype atol, UserData data);
126 
127 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu);
128 
129 static void PrintFinalStats(void *ida_mem);
130 
131 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
132 
133 /*
134  *--------------------------------------------------------------------
135  * MAIN PROGRAM
136  *--------------------------------------------------------------------
137  */
138 
main(int argc,char * argv[])139 int main(int argc, char *argv[])
140 {
141   MPI_Comm comm;
142   void *ida_mem;
143   SUNLinearSolver LS;
144   UserData data;
145   int iout, thispe, retval, npes;
146   sunindextype Neq, local_N;
147   realtype rtol, atol, t0, t1, tout, tret;
148   N_Vector uu, up, constraints, id, res;
149 
150   ida_mem = NULL;
151   LS = NULL;
152   data = NULL;
153   uu = up = constraints = id = res = NULL;
154 
155   /* Get processor number and total number of pe's. */
156 
157   MPI_Init(&argc, &argv);
158   comm = MPI_COMM_WORLD;
159   MPI_Comm_size(comm, &npes);
160   MPI_Comm_rank(comm, &thispe);
161 
162   /* Allocate and initialize the data structure */
163   data = (UserData) malloc(sizeof *data);
164   if(check_retval((void *)data, "malloc", 2, thispe))
165     MPI_Abort(comm, 1);
166 
167   InitUserData(thispe, comm, data);
168 
169   /* Check if the number of MPI processes matches the number of subgrids */
170   if (npes != (data->npex * data->npey)) {
171     if (thispe == 0)
172       fprintf(stderr,
173               "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
174               npes, data->npex * data->npey);
175     free(data);
176     MPI_Finalize();
177     return(1);
178   }
179 
180   /* Set local length local_N and global length Neq. */
181 
182   local_N = data->mxsub * data->mysub;
183   Neq     = data->mx * data->my;
184 
185   /* Allocate and initialize N-vectors. */
186 
187   uu = N_VNew_Parallel(comm, local_N, Neq);
188   if(check_retval((void *)uu, "N_VNew_Parallel", 0, thispe))
189     MPI_Abort(comm, 1);
190 
191   up = N_VClone(uu);
192   if(check_retval((void *)up, "N_VClone", 0, thispe))
193     MPI_Abort(comm, 1);
194 
195   res = N_VClone(uu);
196   if(check_retval((void *)res, "N_VClone", 0, thispe))
197     MPI_Abort(comm, 1);
198 
199   constraints = N_VClone(uu);
200   if(check_retval((void *)constraints, "N_VClone", 0, thispe))
201     MPI_Abort(comm, 1);
202 
203   id = N_VClone(uu);
204   if(check_retval((void *)id, "N_VClone", 0, thispe))
205     MPI_Abort(comm, 1);
206 
207   /* Allocate user data extended vector and MPI buffers */
208   retval = AllocUserData(thispe, comm, uu, data);
209   if(check_retval(&retval, "AllocUserData", 1, thispe)) MPI_Abort(comm, 1);
210 
211   /* Initialize the uu, up, id, and res profiles. */
212   SetInitialProfile(uu, up, id, res, data);
213 
214   /* Set constraints to all 1's for nonnegative solution values. */
215   N_VConst(ONE, constraints);
216 
217   t0 = ZERO; t1 = RCONST(0.01);
218 
219   /* Scalar relative and absolute tolerance. */
220   rtol = ZERO;
221   atol = RCONST(1.0e-3);
222 
223   /* Call IDACreate and IDAMalloc to initialize solution. */
224 
225   ida_mem = IDACreate();
226   if(check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
227 
228   retval = IDASetUserData(ida_mem, data);
229   if(check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
230 
231   retval = IDASetSuppressAlg(ida_mem, SUNTRUE);
232   if(check_retval(&retval, "IDASetSuppressAlg", 1, thispe)) MPI_Abort(comm, 1);
233 
234   retval = IDASetId(ida_mem, id);
235   if(check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
236 
237   retval = IDASetConstraints(ida_mem, constraints);
238   if(check_retval(&retval, "IDASetConstraints", 1, thispe)) MPI_Abort(comm, 1);
239   N_VDestroy(constraints);
240 
241   retval = IDAInit(ida_mem, resHeat, t0, uu, up);
242   if(check_retval(&retval, "IDAInit", 1, thispe)) MPI_Abort(comm, 1);
243 
244   retval = IDASStolerances(ida_mem, rtol, atol);
245   if(check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
246 
247   /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver. */
248 
249   LS = SUNLinSol_SPGMR(uu, PREC_LEFT, 0);  /* use default maxl */
250   if(check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
251 
252   retval = IDASetLinearSolver(ida_mem, LS, NULL);
253   if(check_retval(&retval, "IDASetLinearSolver", 1, thispe)) MPI_Abort(comm, 1);
254 
255   retval = IDASetPreconditioner(ida_mem, PsetupHeat, PsolveHeat);
256   if(check_retval(&retval, "IDASetPreconditioner", 1, thispe)) MPI_Abort(comm, 1);
257 
258   /* Print output heading (on processor 0 only) and intial solution  */
259 
260   if (thispe == 0) PrintHeader(rtol, atol, data);
261   PrintOutput(thispe, ida_mem, t0, uu);
262 
263   /* Loop over tout, call IDASolve, print output. */
264 
265   for (tout = t1, iout = 1; iout <= NOUT; iout++, tout *= TWO) {
266 
267     retval = IDASolve(ida_mem, tout, &tret, uu, up, IDA_NORMAL);
268     if(check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
269 
270     PrintOutput(thispe, ida_mem, tret, uu);
271 
272   }
273 
274   /* Print remaining counters. */
275 
276   if (thispe == 0) PrintFinalStats(ida_mem);
277 
278   /* Free memory */
279 
280   IDAFree(&ida_mem);
281   SUNLinSolFree(LS);
282 
283   N_VDestroy(id);
284   N_VDestroy(res);
285   N_VDestroy(up);
286   N_VDestroy(uu);
287 
288   DeleteUserData(data);
289   free(data);
290 
291   MPI_Finalize();
292 
293   return(0);
294 
295 }
296 
297 /*
298  *--------------------------------------------------------------------
299  * FUNCTIONS CALLED BY IDA
300  *--------------------------------------------------------------------
301  */
302 
303 /*
304  * resHeat: heat equation system residual function
305  * This uses 5-point central differencing on the interior points, and
306  * includes algebraic equations for the boundary values.
307  * So for each interior point, the residual component has the form
308  *    res_i = u'_i - (central difference)_i
309  * while for each boundary point, it is res_i = u_i.
310  *
311  * This parallel implementation uses several supporting routines.
312  * First a call is made to rescomm to do communication of subgrid boundary
313  * data into array uext.  Then reslocal is called to compute the residual
314  * on individual processors and their corresponding domains.  The routines
315  * BSend, BRecvPost, and BREcvWait handle interprocessor communication
316  * of uu required to calculate the residual.
317  */
318 
resHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,void * user_data)319 int resHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
320             void *user_data)
321 {
322   int retval = 0;
323 
324   /* Call rescomm to do inter-processor communication. */
325   retval = rescomm(uu, up, user_data);
326 
327   /* Call reslocal to calculate res. */
328   retval = reslocal(tt, uu, up, rr, user_data);
329 
330   return(retval);
331 
332 }
333 
334 /*
335  * PsetupHeat: setup for diagonal preconditioner for heatsk.
336  *
337  * The optional user-supplied functions PsetupHeat and
338  * PsolveHeat together must define the left preconditoner
339  * matrix P approximating the system Jacobian matrix
340  *                   J = dF/du + cj*dF/du'
341  * (where the DAE system is F(t,u,u') = 0), and solve the linear
342  * systems P z = r.   This is done in this case by keeping only
343  * the diagonal elements of the J matrix above, storing them as
344  * inverses in a vector pp, when computed in PsetupHeat, for
345  * subsequent use in PsolveHeat.
346  *
347  * In this instance, only cj and data (user data structure, with
348  * pp etc.) are used from the PsetupHeat argument list.
349  *
350  */
351 
PsetupHeat(realtype tt,N_Vector yy,N_Vector yp,N_Vector rr,realtype c_j,void * user_data)352 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
353                realtype c_j, void *user_data)
354 {
355   sunindextype lx, ly, ixbegin, ixend, jybegin, jyend, locu;
356 
357   /* Unwrap the user data */
358   UserData data = (UserData) user_data;
359   const int ixsub = data->ixsub;
360   const int jysub = data->jysub;
361   const int npex  = data->npex;
362   const int npey  = data->npey;
363   const sunindextype mxsub = data->mxsub;
364   const sunindextype mysub = data->mysub;
365   realtype *ppv = N_VGetArrayPointer(data->pp);
366 
367   /* Calculate the value for the inverse of the diagonal preconditioner */
368   const realtype pelinv = ONE/(c_j + data->coeffxy);
369 
370   /* Initially set all pp elements to one. */
371   N_VConst(ONE, data->pp);
372 
373   /* Prepare to loop over subgrid. */
374   ixbegin = 0;
375   ixend   = mxsub-1;
376   jybegin = 0;
377   jyend   = mysub-1;
378   if (ixsub == 0) ixbegin++;
379   if (ixsub == npex-1) ixend--;
380   if (jysub == 0) jybegin++;
381   if (jysub == npey-1) jyend--;
382 
383   /* Load the inverse of the preconditioner diagonal elements
384      in loop over all the local subgrid. */
385 
386   for (ly = jybegin; ly <=jyend; ly++) {
387     for (lx = ixbegin; lx <= ixend; lx++) {
388       locu  = lx + ly*mxsub;
389       ppv[locu] = pelinv;
390     }
391   }
392 
393   return(0);
394 
395 }
396 
397 /*
398  * PsolveHeat: solve preconditioner linear system.
399  * This routine multiplies the input vector rvec by the vector pp
400  * containing the inverse diagonal Jacobian elements (previously
401  * computed in PsetupHeat), returning the result in zvec.
402  */
403 
PsolveHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,N_Vector rvec,N_Vector zvec,realtype c_j,realtype delta,void * user_data)404 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up,
405                N_Vector rr, N_Vector rvec, N_Vector zvec,
406                realtype c_j, realtype delta, void *user_data)
407 {
408   UserData data = (UserData) user_data;
409 
410   N_VProd(data->pp, rvec, zvec);
411 
412   return(0);
413 
414 }
415 
416 /*
417  *--------------------------------------------------------------------
418  * SUPPORTING FUNCTIONS
419  *--------------------------------------------------------------------
420  */
421 
422 
423 /*
424  * rescomm routine.  This routine performs all inter-processor
425  * communication of data in u needed to calculate G.
426  */
427 
rescomm(N_Vector uu,N_Vector up,UserData data)428 static int rescomm(N_Vector uu, N_Vector up, UserData data)
429 {
430   MPI_Request request[4];
431 
432   /* Get comm, thispe, subgrid indices, data sizes */
433   MPI_Comm comm = data->comm;
434   const int thispe = data->thispe;
435   const int ixsub = data->ixsub;
436   const int jysub = data->jysub;
437   const int npex = data->npex;
438   const int npey = data->npey;
439   const sunindextype mxsub = data->mxsub;
440   const sunindextype mysub = data->mysub;
441 
442   /* Get solution vector data, buffers, extended array uext. */
443   const realtype *uarray = N_VGetArrayPointer(uu);
444   realtype *uext = data->uext;
445   realtype *send_buffer = data->send_buff;
446   realtype *recv_buff = data->recv_buff;
447 
448   /* Start receiving boundary data from neighboring PEs. */
449   BRecvPost(comm, request, thispe, ixsub, jysub, npex, npey, mxsub, mysub, recv_buff);
450 
451   /* Send data from boundary of local grid to neighboring PEs. */
452   BSend(comm, thispe, ixsub, jysub, npex, npey, mxsub, mysub, uarray, send_buffer);
453 
454   /* Finish receiving boundary data from neighboring PEs. */
455   BRecvWait(request, ixsub, jysub, npex, npey, mxsub, mysub, uext, recv_buff);
456 
457   return(0);
458 
459 }
460 
461 /*
462  * reslocal routine.  Compute res = F(t, uu, up).  This routine assumes
463  * that all inter-processor communication of data needed to calculate F
464  * has already been done, and that this data is in the work array uext.
465  */
466 
reslocal(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,UserData data)467 static int reslocal(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
468                     UserData data)
469 {
470   realtype termx, termy, termctr;
471   sunindextype lx, ly;
472   sunindextype locu, locue;
473   sunindextype ixbegin, ixend, jybegin, jyend;
474 
475   /* Get subgrid indices, array sizes */
476   const int ixsub = data->ixsub;
477   const int jysub = data->jysub;
478   const int npex = data->npex;
479   const int npey = data->npey;
480   const sunindextype mxsub = data->mxsub;
481   const sunindextype mxsub2 = data->mxsub + 2;
482   const sunindextype mysub = data->mysub;
483 
484   /* Vector data arrays, extended work array uext. */
485   const realtype *uuv = N_VGetArrayPointer(uu);
486   const realtype *upv = N_VGetArrayPointer(up);
487   realtype *resv = N_VGetArrayPointer(rr);
488   realtype *uext = data->uext;
489 
490   /* Initialize all elements of rr to uu. This sets the boundary
491      elements simply without indexing hassles. */
492 
493   N_VScale(ONE, uu, rr);
494 
495   /* Copy local segment of u vector into the working extended array uext.
496      This completes uext prior to the computation of the rr vector.     */
497 
498   for (ly = 0; ly < mysub; ly++) {
499     for (lx = 0; lx < mxsub; lx++) {
500       uext[mxsub2*(ly+1) + (lx+1)] = uuv[mxsub*ly + lx];
501     }
502   }
503 
504   /* Set loop limits for the interior of the local subgrid. */
505 
506   ixbegin = 0;
507   ixend   = mxsub-1;
508   jybegin = 0;
509   jyend   = mysub-1;
510   if (ixsub == 0) ixbegin++;
511   if (ixsub == npex-1) ixend--;
512   if (jysub == 0) jybegin++;
513   if (jysub == npey-1) jyend--;
514 
515   /* Loop over all grid points in local subgrid. */
516 
517   for (ly = jybegin; ly <=jyend; ly++) {
518     for (lx = ixbegin; lx <= ixend; lx++) {
519       locu  = lx + ly*mxsub;
520       locue = (lx+1) + (ly+1)*mxsub2;
521       termx = data->coeffx *(uext[locue-1]      + uext[locue+1]);
522       termy = data->coeffy *(uext[locue-mxsub2] + uext[locue+mxsub2]);
523       termctr = data->coeffxy*uext[locue];
524       resv[locu] = upv[locu] - (termx + termy - termctr);
525     }
526   }
527   return(0);
528 
529 }
530 
531 /*
532  * Routine to send boundary data to neighboring PEs.
533  */
534 
BSend(MPI_Comm comm,int thispe,int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,const realtype uarray[],realtype * send_buffer)535 static int BSend(MPI_Comm comm, int thispe,
536                  int ixsub, int jysub, int npex, int npey,
537                  sunindextype mxsub, sunindextype mysub,
538                  const realtype uarray[], realtype *send_buffer)
539 {
540   sunindextype lx, ly;
541   /* Have left, right, top and bottom buffers use the same send_buffer. */
542   realtype *bufleft   = send_buffer;
543   realtype *bufright  = send_buffer + mysub;
544   realtype *buftop    = send_buffer + 2*mysub;
545   realtype *bufbottom = send_buffer + 2*mysub + mxsub;
546 
547   /* If jysub > 0, send data from bottom x-line of u.  (via bufbottom) */
548 
549   if (jysub != 0) {
550     for (lx = 0; lx < mxsub; ++lx) {
551       bufbottom[lx] = uarray[lx];
552     }
553     MPI_Send(bufbottom, (int) mxsub, MPI_SUNREALTYPE, thispe-npex, 0, comm);
554   }
555 
556   /* If jysub < NPEY-1, send data from top x-line of u. (via buftop) */
557 
558   if (jysub != npey-1) {
559     for (lx = 0; lx < mxsub; ++lx) {
560       buftop[lx] = uarray[(mysub-1)*mxsub + lx];
561     }
562     MPI_Send(buftop, (int) mxsub, MPI_SUNREALTYPE, thispe+npex, 0, comm);
563   }
564 
565   /* If ixsub > 0, send data from left y-line of u (via bufleft). */
566 
567   if (ixsub != 0) {
568     for (ly = 0; ly < mysub; ly++) {
569       bufleft[ly] = uarray[ly*mxsub];
570     }
571     MPI_Send(bufleft, (int) mysub, MPI_SUNREALTYPE, thispe-1, 0, comm);
572   }
573 
574   /* If ixsub < NPEX-1, send data from right y-line of u (via bufright). */
575 
576   if (ixsub != npex-1) {
577     for (ly = 0; ly < mysub; ly++) {
578       bufright[ly] = uarray[ly*mxsub + (mxsub-1)];
579     }
580     MPI_Send(bufright, (int) mysub, MPI_SUNREALTYPE, thispe+1, 0, comm);
581   }
582 
583   return(0);
584 
585 }
586 
587 /*
588  * Routine to start receiving boundary data from neighboring PEs.
589  * Notes:
590  *   1) buffer should be able to hold 2*MYSUB realtype entries, should be
591  *      passed to both the BRecvPost and BRecvWait functions, and should not
592  *      be manipulated between the two calls.
593  *   2) request should have 4 entries, and should be passed in
594  *      both calls also.
595  */
596 
BRecvPost(MPI_Comm comm,MPI_Request request[],int thispe,int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,realtype recv_buff[])597 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
598                      int ixsub, int jysub, int npex, int npey,
599                      sunindextype mxsub, sunindextype mysub,
600                      realtype recv_buff[])
601 {
602   /* Have left, right, top and bottom buffers use the same recv_buff. */
603   realtype *bufleft   = recv_buff;
604   realtype *bufright  = recv_buff + mysub;
605   realtype *buftop    = recv_buff + 2*mysub;
606   realtype *bufbottom = recv_buff + 2*mysub + mxsub;
607 
608   /* If jysub > 0, receive data for bottom x-line of uext. */
609   if (jysub != 0) {
610     MPI_Irecv(bufbottom, (int) mxsub, MPI_SUNREALTYPE,
611               thispe-npex, 0, comm, &request[0]);
612   }
613 
614   /* If jysub < NPEY-1, receive data for top x-line of uext. */
615   if (jysub != npey-1) {
616     MPI_Irecv(buftop, (int) mxsub, MPI_SUNREALTYPE,
617               thispe+npex, 0, comm, &request[1]);
618   }
619 
620   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
621   if (ixsub != 0) {
622     MPI_Irecv(&bufleft[0], (int) mysub, MPI_SUNREALTYPE,
623               thispe-1, 0, comm, &request[2]);
624   }
625 
626   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
627   if (ixsub != npex-1) {
628     MPI_Irecv(&bufright[0], (int) mysub, MPI_SUNREALTYPE,
629               thispe+1, 0, comm, &request[3]);
630   }
631 
632   return(0);
633 
634 }
635 
636 /*
637  * Routine to finish receiving boundary data from neighboring PEs.
638  * Notes:
639  *   1) buffer should be able to hold 2*MYSUB realtype entries, should be
640  *      passed to both the BRecvPost and BRecvWait functions, and should not
641  *      be manipulated between the two calls.
642  *   2) request should have four entries, and should be passed in both
643  *      calls also.
644  */
645 
BRecvWait(MPI_Request request[],int ixsub,int jysub,int npex,int npey,sunindextype mxsub,sunindextype mysub,realtype uext[],const realtype recv_buff[])646 static int BRecvWait(MPI_Request request[], int ixsub, int jysub,
647                      int npex, int npey,
648                      sunindextype mxsub, sunindextype mysub,
649                      realtype uext[], const realtype recv_buff[])
650 {
651   MPI_Status status;
652   const realtype *bufleft   = recv_buff;
653   const realtype *bufright  = recv_buff + mysub;
654   const realtype *buftop    = recv_buff + 2*mysub;
655   const realtype *bufbottom = recv_buff + 2*mysub + mxsub;
656   sunindextype ly, lx, offsetue;
657   const sunindextype mxsub2 = mxsub + 2;
658   const sunindextype mysub1 = mysub + 1;
659 
660   /* If jysub > 0, receive data for bottom x-line of uext. */
661   if (jysub != 0) {
662     MPI_Wait(&request[0], &status);
663 
664     /* Copy the recv_buff to uext. */
665     for (lx = 0; lx < mxsub; lx++) {
666       offsetue = 1 + lx;
667       uext[offsetue] = bufbottom[lx];
668     }
669   }
670 
671   /* If jysub < NPEY-1, receive data for top x-line of uext. */
672   if (jysub != npey-1) {
673     MPI_Wait(&request[1], &status);
674 
675     /* Copy the recv_buff to uext. */
676     for (lx = 0; lx < mxsub; lx++) {
677       offsetue = (1 + mysub1*mxsub2) + lx;
678       uext[offsetue] = buftop[lx];
679     }
680   }
681 
682   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
683   if (ixsub != 0) {
684     MPI_Wait(&request[2], &status);
685 
686     /* Copy the recv_buff to uext. */
687     for (ly = 0; ly < mysub; ly++) {
688       offsetue = (ly+1)*mxsub2;
689       uext[offsetue] = bufleft[ly];
690     }
691   }
692 
693   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
694   if (ixsub != npex-1) {
695     MPI_Wait(&request[3], &status);
696 
697     /* Copy the recv_buff to uext */
698     for (ly = 0; ly < mysub; ly++) {
699       offsetue = (ly+2)*mxsub2 - 1;
700       uext[offsetue] = bufright[ly];
701     }
702   }
703 
704   return(0);
705 
706 }
707 
708 /*
709  *--------------------------------------------------------------------
710  * PRIVATE FUNCTIONS
711  *--------------------------------------------------------------------
712  */
713 
714 /*
715  * InitUserData initializes the user's data block data.
716  */
717 
InitUserData(int thispe,MPI_Comm comm,UserData data)718 static int InitUserData(int thispe, MPI_Comm comm, UserData data)
719 {
720   data->comm    = comm;
721   data->thispe  = thispe;
722   data->npex    = NPEX;  /* Number of subgrids in x-direction */
723   data->npey    = NPEY;  /* Number of subgrids in y-direction */
724   data->mxsub   = MXSUB; /* Number of subgrid mesh points in x-direction */
725   data->mysub   = MYSUB; /* Number of subgrid mesh points in y-direction */
726   data->jysub   = thispe/data->npex;
727   data->ixsub   = thispe - (data->jysub * data->npex);
728   data->mx      = data->npex * data->mxsub;  /* Mesh size in x-direction */
729   data->my      = data->npey * data->mysub;  /* Mesh size in y-direction */
730   data->dx      = ONE/(data->mx-ONE); /* Assumes a [0,1] interval in x. */
731   data->dy      = ONE/(data->my-ONE); /* Assumes a [0,1] interval in y. */
732   data->coeffx  = ONE/(data->dx * data->dx);
733   data->coeffy  = ONE/(data->dy * data->dy);
734   data->coeffxy = TWO/(data->dx * data->dx) + TWO/(data->dy * data->dy);
735 
736   data->uext =NULL;
737   data->send_buff = NULL;
738   data->recv_buff = NULL;
739 
740   return(0);
741 }
742 
743 
744 /*
745  * AllocUserData allocates memory for the extended vector uext
746  * and MPI communication buffers.
747  */
748 
AllocUserData(int thispe,MPI_Comm comm,N_Vector uu,UserData data)749 static int AllocUserData(int thispe, MPI_Comm comm, N_Vector uu, UserData data)
750 {
751   /* An N-vector to hold preconditioner. */
752   data->pp = N_VClone(uu);
753   if(data->pp == NULL) {
754     MPI_Abort(comm, 1);
755     return -1;
756   }
757 
758   /* Allocate local extended vector (includes ghost nodes) */
759   data->uext = (realtype*) malloc((data->mxsub + 2)*(data->mysub +2)*sizeof(realtype));
760   if(data->uext == NULL) {
761     N_VDestroy(data->pp);
762     MPI_Abort(comm, 1);
763     return -1;
764   }
765 
766   /* Allocate local host send buffer */
767   data->send_buff = (realtype*) malloc(2*(data->mxsub + data->mysub)*sizeof(realtype));
768   if(data->send_buff == NULL) {
769     N_VDestroy(data->pp);
770     free(data->uext);
771     MPI_Abort(comm, 1);
772     return -1;
773   }
774 
775   data->recv_buff = (realtype*) malloc(2*(data->mxsub + data->mysub)*sizeof(realtype));
776   if(data->recv_buff == NULL) {
777     N_VDestroy(data->pp);
778     free(data->uext);
779     free(data->send_buff);
780     MPI_Abort(comm, 1);
781     return -1;
782   }
783 
784   return 0;
785 }
786 
787 
DeleteUserData(UserData data)788 static int DeleteUserData(UserData data)
789 {
790   if (data->pp != NULL)
791     N_VDestroy(data->pp);
792   if (data->uext != NULL)
793     free(data->uext);
794   if (data->send_buff != NULL)
795     free(data->send_buff);
796   if (data->recv_buff != NULL)
797     free(data->recv_buff);
798   return 0;
799 }
800 
801 /*
802  * SetInitialProfile sets the initial values for the problem.
803  */
804 
SetInitialProfile(N_Vector uu,N_Vector up,N_Vector id,N_Vector res,UserData data)805 static int SetInitialProfile(N_Vector uu, N_Vector up,  N_Vector id,
806                              N_Vector res, UserData data)
807 {
808   sunindextype i, iloc, j, jloc, loc;
809   realtype xfact, yfact;
810 
811   /* Initialize uu. */
812 
813   realtype *uudata = N_VGetArrayPointer(uu);
814   realtype *iddata = N_VGetArrayPointer(id);
815 
816   /* Set mesh spacings and subgrid indices for this PE. */
817   const realtype dx = data->dx;
818   const realtype dy = data->dy;
819   const int ixsub = data->ixsub;
820   const int jysub = data->jysub;
821 
822   /* Set beginning and ending locations in the global array corresponding
823      to the portion of that array assigned to this processor. */
824   const sunindextype mxsub   = data->mxsub;
825   const sunindextype mysub   = data->mysub;
826   const sunindextype ixbegin = mxsub*ixsub;
827   const sunindextype ixend   = mxsub*(ixsub+1) - 1;
828   const sunindextype jybegin = mysub*jysub;
829   const sunindextype jyend   = mysub*(jysub+1) - 1;
830 
831   /* Loop over the local array, computing the initial profile value.
832      The global indices are (i,j) and the local indices are (iloc,jloc).
833      Also set the id vector to zero for boundary points, one otherwise. */
834 
835   N_VConst(ONE, id);
836   for (j = jybegin, jloc = 0; j <= jyend; j++, jloc++) {
837     yfact = dy*j;
838     for (i = ixbegin, iloc = 0; i <= ixend; i++, iloc++) {
839       xfact = dx*i;
840       loc = iloc + jloc*mxsub;
841       uudata[loc] = RCONST(16.0) * xfact * (ONE - xfact) * yfact * (ONE - yfact);
842 
843       if (i == 0 || i == data->mx - 1 || j == 0 || j == data->my - 1)
844         iddata[loc] = ZERO;
845     }
846   }
847 
848   /* Initialize up. */
849 
850   N_VConst(ZERO, up);    /* Initially set up = 0. */
851 
852   /* resHeat sets res to negative of ODE RHS values at interior points. */
853   resHeat(ZERO, uu, up, res, data);
854 
855   /* Copy -res into up to get correct initial up values. */
856   N_VScale(-ONE, res, up);
857 
858   return(0);
859 }
860 
861 /*
862  * Print first lines of output and table heading
863  */
864 
PrintHeader(realtype rtol,realtype atol,UserData data)865 static void PrintHeader(realtype rtol, realtype atol, UserData data)
866 {
867   printf("\nidaHeat2D_kry_p: Heat equation, parallel example problem for IDA\n");
868   printf("            Discretized heat equation on 2D unit square.\n");
869   printf("            Zero boundary conditions,");
870   printf(" polynomial initial conditions.\n");
871   printf("            Mesh dimensions: %d x %d", (int) data->mx, (int) data->my);
872   printf("        Total system size: %ld\n\n", (long) (data->mx * data->my));
873   printf("Subgrid dimensions: %d x %d", (int) data->mxsub, (int) data->mysub);
874   printf("        Processor array: %d x %d\n", (int) data->npex, (int) data->npey);
875 #if defined(SUNDIALS_EXTENDED_PRECISION)
876   printf("Tolerance parameters:  rtol = %Lg   atol = %Lg\n", rtol, atol);
877 #elif defined(SUNDIALS_DOUBLE_PRECISION)
878   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
879 #else
880   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
881 #endif
882   printf("Constraints set to force all solution components >= 0. \n");
883   printf("SUPPRESSALG = SUNTRUE to suppress local error testing on ");
884   printf("all boundary components. \n");
885   printf("Linear solver: SUNLinSol_SPGMR  ");
886   printf("Preconditioner: diagonal elements only.\n");
887 
888   /* Print output table heading and initial line of table. */
889   printf("\n   Output Summary (umax = max-norm of solution) \n\n");
890   printf("  time     umax       k  nst  nni  nli   nre   nreLS    h      npe nps\n");
891   printf("----------------------------------------------------------------------\n");
892 }
893 
894 /*
895  * PrintOutput: print max norm of solution and current solver statistics
896  */
897 
PrintOutput(int id,void * ida_mem,realtype t,N_Vector uu)898 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu)
899 {
900   realtype hused, umax;
901   long int nst, nni, nje, nre, nreLS, nli, npe, nps;
902   int kused, retval;
903 
904   umax = N_VMaxNorm(uu);
905 
906   if (id == 0) {
907 
908     retval = IDAGetLastOrder(ida_mem, &kused);
909     check_retval(&retval, "IDAGetLastOrder", 1, id);
910     retval = IDAGetNumSteps(ida_mem, &nst);
911     check_retval(&retval, "IDAGetNumSteps", 1, id);
912     retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
913     check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, id);
914     retval = IDAGetNumResEvals(ida_mem, &nre);
915     check_retval(&retval, "IDAGetNumResEvals", 1, id);
916     retval = IDAGetLastStep(ida_mem, &hused);
917     check_retval(&retval, "IDAGetLastStep", 1, id);
918     retval = IDAGetNumJtimesEvals(ida_mem, &nje);
919     check_retval(&retval, "IDAGetNumJtimesEvals", 1, id);
920     retval = IDAGetNumLinIters(ida_mem, &nli);
921     check_retval(&retval, "IDAGetNumLinIters", 1, id);
922     retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
923     check_retval(&retval, "IDAGetNumLinResEvals", 1, id);
924     retval = IDAGetNumPrecEvals(ida_mem, &npe);
925     check_retval(&retval, "IDAGetNumPrecEvals", 1, id);
926     retval = IDAGetNumPrecSolves(ida_mem, &nps);
927     check_retval(&retval, "IDAGetNumPrecSolves", 1, id);
928 
929 #if defined(SUNDIALS_EXTENDED_PRECISION)
930     printf(" %5.2Lf %13.5Le  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2Le  %3ld %3ld\n",
931            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
932 #elif defined(SUNDIALS_DOUBLE_PRECISION)
933     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2e  %3ld %3ld\n",
934            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
935 #else
936     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2e  %3ld %3ld\n",
937            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
938 #endif
939 
940   }
941 }
942 
943 /*
944  * Print some final integrator statistics
945  */
946 
PrintFinalStats(void * ida_mem)947 static void PrintFinalStats(void *ida_mem)
948 {
949   long int netf, ncfn, ncfl;
950 
951   IDAGetNumErrTestFails(ida_mem, &netf);
952   IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
953   IDAGetNumLinConvFails(ida_mem, &ncfl);
954 
955   printf("\nError test failures            = %ld\n", netf);
956   printf("Nonlinear convergence failures = %ld\n", ncfn);
957   printf("Linear convergence failures    = %ld\n", ncfl);
958 }
959 
960 /*
961  * Check function return value...
962  *   opt == 0 means SUNDIALS function allocates memory so check if
963  *            returned NULL pointer
964  *   opt == 1 means SUNDIALS function returns an integer value so check if
965  *            retval < 0
966  *   opt == 2 means function allocates memory so check if returned
967  *            NULL pointer
968  */
969 
check_retval(void * returnvalue,const char * funcname,int opt,int id)970 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
971 {
972   int *retval;
973 
974   if (opt == 0 && returnvalue == NULL) {
975     /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
976     fprintf(stderr,
977             "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
978             id, funcname);
979     return(1);
980   } else if (opt == 1) {
981     /* Check if retval < 0 */
982     retval = (int *) returnvalue;
983     if (*retval < 0) {
984       fprintf(stderr,
985               "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
986               id, funcname, *retval);
987       return(1);
988     }
989   } else if (opt == 2 && returnvalue == NULL) {
990     /* Check if function returned NULL pointer - no memory allocated */
991     fprintf(stderr,
992             "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
993             id, funcname);
994     return(1);
995   }
996 
997   return(0);
998 }
999