1 /*
2  * -----------------------------------------------------------------
3  * Programmer(s): Daniel R. Reynolds @ SMU
4  *         Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5  * -----------------------------------------------------------------
6  * SUNDIALS Copyright Start
7  * Copyright (c) 2002-2021, Lawrence Livermore National Security
8  * and Southern Methodist University.
9  * All rights reserved.
10  *
11  * See the top-level LICENSE and NOTICE files for details.
12  *
13  * SPDX-License-Identifier: BSD-3-Clause
14  * SUNDIALS Copyright End
15  * -----------------------------------------------------------------
16  * Example problem for IDA: 2D heat equation, parallel, GMRES,
17  * IDABBDPRE.
18  *
19  * This example solves a discretized 2D heat equation problem.
20  * This version uses the Krylov solver SUNLinSol_SPGMR and BBD
21  * preconditioning.
22  *
23  * The DAE system solved is a spatial discretization of the PDE
24  *          du/dt = d^2u/dx^2 + d^2u/dy^2
25  * on the unit square. The boundary condition is u = 0 on all edges.
26  * Initial conditions are given by u = 16 x (1 - x) y (1 - y). The
27  * PDE is treated with central differences on a uniform MX x MY
28  * grid. The values of u at the interior points satisfy ODEs, and
29  * equations u = 0 at the boundaries are appended, to form a DAE
30  * system of size N = MX * MY. Here MX = MY = 10.
31  *
32  * The system is actually implemented on submeshes, processor by
33  * processor, with an MXSUB by MYSUB mesh on each of NPEX * NPEY
34  * processors.
35  *
36  * The system is solved with IDA using the Krylov linear solver
37  * SUNLinSol_SPGMR in conjunction with the preconditioner module IDABBDPRE.
38  * The preconditioner uses a tridiagonal approximation
39  * (half-bandwidths = 1). The constraints u >= 0 are posed for all
40  * components. Local error testing on the boundary values is
41  * suppressed. Output is taken at t = 0, .01, .02, .04, ..., 10.24.
42  * -----------------------------------------------------------------
43  */
44 
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <math.h>
48 
49 #include <ida/ida.h>
50 #include <ida/ida_bbdpre.h>
51 #include <sunlinsol/sunlinsol_spgmr.h>
52 #include <nvector/nvector_parallel.h>
53 #include <sundials/sundials_types.h>
54 
55 #include <mpi.h>
56 
57 #define ZERO  RCONST(0.0)
58 #define ONE   RCONST(1.0)
59 #define TWO   RCONST(2.0)
60 
61 #define NOUT         11             /* Number of output times */
62 
63 #define NPEX         2              /* No. PEs in x direction of PE array */
64 #define NPEY         2              /* No. PEs in y direction of PE array */
65                                     /* Total no. PEs = NPEX*NPEY */
66 #define MXSUB        5              /* No. x points per subgrid */
67 #define MYSUB        5              /* No. y points per subgrid */
68 
69 #define MX           (NPEX*MXSUB)   /* MX = number of x mesh points */
70 #define MY           (NPEY*MYSUB)   /* MY = number of y mesh points */
71                                     /* Spatial mesh is MX by MY */
72 
73 typedef struct {
74   int thispe, mx, my, ixsub, jysub, npex, npey, mxsub, mysub;
75   sunindextype n_local;
76   realtype dx, dy, coeffx, coeffy, coeffxy;
77   realtype uext[(MXSUB+2)*(MYSUB+2)];
78   MPI_Comm comm;
79 } *UserData;
80 
81 /* Prototypes of user-supplied and supporting functions */
82 
83 static int heatres(realtype tres, N_Vector uu, N_Vector up,
84                    N_Vector res, void *user_data);
85 
86 static int rescomm(sunindextype Nlocal, realtype tt,
87                    N_Vector uu, N_Vector up, void *user_data);
88 
89 static int reslocal(sunindextype Nlocal, realtype tres,
90                     N_Vector uu, N_Vector up, N_Vector res,
91                     void *user_data);
92 
93 static int BSend(MPI_Comm comm, int thispe, int ixsub,
94                  int jysub, int dsizex, int dsizey,
95                  realtype uarray[]);
96 
97 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
98                      int ixsub, int jysub,
99                      int dsizex, int dsizey,
100                      realtype uext[], realtype buffer[]);
101 
102 static int BRecvWait(MPI_Request request[], int ixsub, int jysub,
103                      int dsizex, realtype uext[], realtype buffer[]);
104 
105 /* Prototypes of private functions */
106 
107 static int InitUserData(int thispe, MPI_Comm comm, UserData data);
108 
109 static int SetInitialProfile(N_Vector uu, N_Vector up, N_Vector id,
110                              N_Vector res, UserData data);
111 
112 static void PrintHeader(sunindextype Neq, realtype rtol, realtype atol);
113 
114 static void PrintCase(int case_number, sunindextype mudq, sunindextype mukeep);
115 
116 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu);
117 
118 static void PrintFinalStats(void *ida_mem);
119 
120 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
121 
122 /*
123  *--------------------------------------------------------------------
124  * MAIN PROGRAM
125  *--------------------------------------------------------------------
126  */
127 
main(int argc,char * argv[])128 int main(int argc, char *argv[])
129 {
130   MPI_Comm comm;
131   void *ida_mem;
132   SUNLinearSolver LS;
133   UserData data;
134   int thispe, iout, retval, npes;
135   sunindextype Neq, local_N, mudq, mldq, mukeep, mlkeep;
136   realtype rtol, atol, t0, t1, tout, tret;
137   N_Vector uu, up, constraints, id, res;
138 
139   ida_mem = NULL;
140   LS = NULL;
141   data = NULL;
142   uu = up = constraints = id = res = NULL;
143 
144   /* Get processor number and total number of pe's. */
145 
146   MPI_Init(&argc, &argv);
147   comm = MPI_COMM_WORLD;
148   MPI_Comm_size(comm, &npes);
149   MPI_Comm_rank(comm, &thispe);
150 
151   if (npes != NPEX*NPEY) {
152     if (thispe == 0)
153       fprintf(stderr,
154               "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
155               npes,NPEX*NPEY);
156     MPI_Finalize();
157     return(1);
158   }
159 
160   /* Set local length local_N and global length Neq. */
161 
162   local_N = MXSUB*MYSUB;
163   Neq     = MX * MY;
164 
165   /* Allocate N-vectors. */
166 
167   uu = N_VNew_Parallel(comm, local_N, Neq);
168   if(check_retval((void *)uu, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
169 
170   up = N_VNew_Parallel(comm, local_N, Neq);
171   if(check_retval((void *)up, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
172 
173   res = N_VNew_Parallel(comm, local_N, Neq);
174   if(check_retval((void *)res, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
175 
176   constraints = N_VNew_Parallel(comm, local_N, Neq);
177   if(check_retval((void *)constraints, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
178 
179   id = N_VNew_Parallel(comm, local_N, Neq);
180   if(check_retval((void *)id, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
181 
182   /* Allocate and initialize the data structure. */
183 
184   data = (UserData) malloc(sizeof *data);
185   if(check_retval((void *)data, "malloc", 2, thispe)) MPI_Abort(comm, 1);
186 
187   InitUserData(thispe, comm, data);
188 
189   /* Initialize the uu, up, id, and constraints profiles. */
190 
191   SetInitialProfile(uu, up, id, res, data);
192   N_VConst(ONE, constraints);
193 
194   t0 = ZERO; t1 = RCONST(0.01);
195 
196   /* Scalar relative and absolute tolerance. */
197 
198   rtol = ZERO;
199   atol = RCONST(1.0e-3);
200 
201   /* Call IDACreate and IDAMalloc to initialize solution */
202 
203   ida_mem = IDACreate();
204   if(check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
205 
206   retval = IDASetUserData(ida_mem, data);
207   if(check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
208 
209   retval = IDASetSuppressAlg(ida_mem, SUNTRUE);
210   if(check_retval(&retval, "IDASetSuppressAlg", 1, thispe)) MPI_Abort(comm, 1);
211 
212   retval = IDASetId(ida_mem, id);
213   if(check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
214 
215   retval = IDASetConstraints(ida_mem, constraints);
216   if(check_retval(&retval, "IDASetConstraints", 1, thispe)) MPI_Abort(comm, 1);
217   N_VDestroy(constraints);
218 
219   retval = IDAInit(ida_mem, heatres, t0, uu, up);
220   if(check_retval(&retval, "IDAInit", 1, thispe)) MPI_Abort(comm, 1);
221 
222   retval = IDASStolerances(ida_mem, rtol, atol);
223   if(check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
224 
225   mudq = MXSUB;
226   mldq = MXSUB;
227   mukeep = 1;
228   mlkeep = 1;
229 
230   /* Print problem description */
231 
232   if (thispe == 0 ) PrintHeader(Neq, rtol, atol);
233 
234   /*
235    * -----------------------------
236    * Case 1 -- mldq = mudq = MXSUB
237    * -----------------------------
238    */
239 
240   /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver. */
241   LS = SUNLinSol_SPGMR(uu, PREC_LEFT, 0);  /* IDA recommends left-preconditioning only;
242                                               0 indicates to use default maxl value */
243   if(check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
244 
245   retval = SUNLinSol_SPGMRSetMaxRestarts(LS, 5);  /* IDA recommends allowing up to 5 restarts */
246   if(check_retval(&retval, "SUNLinSol_SPGMRSetMaxRestarts", 1, thispe)) MPI_Abort(comm, 1);
247 
248   retval = IDASetLinearSolver(ida_mem, LS, NULL);
249   if(check_retval(&retval, "IDASetLinearSolver", 1, thispe)) MPI_Abort(comm, 1);
250 
251   /* Call IDABBDPrecInit to initialize BBD preconditioner. */
252   retval = IDABBDPrecInit(ida_mem, local_N, mudq, mldq, mukeep, mlkeep,
253                        ZERO, reslocal, NULL);
254   if(check_retval(&retval, "IDABBDPrecAlloc", 1, thispe)) MPI_Abort(comm, 1);
255 
256   /* Print output heading (on processor 0 only) and initial solution. */
257   if (thispe == 0) PrintCase(1, mudq, mukeep);
258 
259   /* Loop over tout, call IDASolve, print output. */
260   for (tout = t1, iout = 1; iout <= NOUT; iout++, tout *= TWO) {
261 
262     retval = IDASolve(ida_mem, tout, &tret, uu, up, IDA_NORMAL);
263     if(check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
264 
265     PrintOutput(thispe, ida_mem, tret, uu);
266 
267   }
268 
269   /* Print final statistics */
270   if (thispe == 0) PrintFinalStats(ida_mem);
271 
272   /*
273    * -----------------------------
274    * Case 2 -- mldq = mudq = 1
275    * -----------------------------
276    */
277 
278   mudq = 1;
279   mldq = 1;
280 
281   /* Re-initialize the uu and up profiles. */
282   SetInitialProfile(uu, up, id, res, data);
283 
284   /* Call IDAReInit to re-initialize IDA. */
285   retval = IDAReInit(ida_mem, t0, uu, up);
286   if(check_retval(&retval, "IDAReInit", 1, thispe)) MPI_Abort(comm, 1);
287 
288   /* Call IDABBDPrecReInit to re-initialize BBD preconditioner. */
289   retval = IDABBDPrecReInit(ida_mem, mudq, mldq, ZERO);
290   if(check_retval(&retval, "IDABBDPrecReInit", 1, thispe)) MPI_Abort(comm, 1);
291 
292   /* Print output heading (on processor 0 only). */
293   if (thispe == 0) PrintCase(2, mudq, mukeep);
294 
295   /* Loop over tout, call IDASolve, print output. */
296   for (tout = t1, iout = 1; iout <= NOUT; iout++, tout *= TWO) {
297 
298     retval = IDASolve(ida_mem, tout, &tret, uu, up, IDA_NORMAL);
299     if(check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
300 
301     PrintOutput(thispe, ida_mem, tret, uu);
302 
303   }
304 
305   /* Print final statistics */
306   if (thispe == 0) PrintFinalStats(ida_mem);
307 
308   /* Free Memory */
309   IDAFree(&ida_mem);
310   SUNLinSolFree(LS);
311   free(data);
312   N_VDestroy(id);
313   N_VDestroy(res);
314   N_VDestroy(up);
315   N_VDestroy(uu);
316 
317   MPI_Finalize();
318 
319   return(0);
320 
321 }
322 /*
323  *--------------------------------------------------------------------
324  * FUNCTIONS CALLED BY IDA
325  *--------------------------------------------------------------------
326  */
327 
328 /*
329  * heatres: heat equation system residual function
330  * This uses 5-point central differencing on the interior points, and
331  * includes algebraic equations for the boundary values.
332  * So for each interior point, the residual component has the form
333  *    res_i = u'_i - (central difference)_i
334  * while for each boundary point, it is res_i = u_i.
335  *
336  * This parallel implementation uses several supporting routines.
337  * First a call is made to rescomm to do communication of subgrid boundary
338  * data into array uext.  Then reslocal is called to compute the residual
339  * on individual processors and their corresponding domains.  The routines
340  * BSend, BRecvPost, and BREcvWait handle interprocessor communication
341  * of uu required to calculate the residual.
342  */
343 
heatres(realtype tres,N_Vector uu,N_Vector up,N_Vector res,void * user_data)344 static int heatres(realtype tres, N_Vector uu, N_Vector up,
345                    N_Vector res, void *user_data)
346 {
347   int retval;
348   UserData data;
349   sunindextype Nlocal;
350 
351   data = (UserData) user_data;
352 
353   Nlocal = data->n_local;
354 
355   /* Call rescomm to do inter-processor communication. */
356   retval = rescomm(Nlocal, tres, uu, up, data);
357 
358   /* Call reslocal to calculate res. */
359   retval = reslocal(Nlocal, tres, uu, up, res, data);
360 
361   return(retval);
362 
363 }
364 
365 /*
366  * rescomm routine.  This routine performs all inter-processor
367  * communication of data in u needed to calculate G.
368  */
369 
rescomm(sunindextype Nlocal,realtype tt,N_Vector uu,N_Vector up,void * user_data)370 static int rescomm(sunindextype Nlocal, realtype tt,
371                    N_Vector uu, N_Vector up, void *user_data)
372 {
373   UserData data;
374   realtype *uarray, *uext, buffer[2*MYSUB];
375   MPI_Comm comm;
376   int thispe, ixsub, jysub, mxsub, mysub;
377   MPI_Request request[4];
378 
379   data = (UserData) user_data;
380   uarray = N_VGetArrayPointer(uu);
381 
382   /* Get comm, thispe, subgrid indices, data sizes, extended array uext. */
383   comm = data->comm;  thispe = data->thispe;
384   ixsub = data->ixsub;   jysub = data->jysub;
385   mxsub = data->mxsub;   mysub = data->mysub;
386   uext = data->uext;
387 
388   /* Start receiving boundary data from neighboring PEs. */
389   BRecvPost(comm, request, thispe, ixsub, jysub, mxsub, mysub, uext, buffer);
390 
391   /* Send data from boundary of local grid to neighboring PEs. */
392   BSend(comm, thispe, ixsub, jysub, mxsub, mysub, uarray);
393 
394   /* Finish receiving boundary data from neighboring PEs. */
395   BRecvWait(request, ixsub, jysub, mxsub, uext, buffer);
396 
397   return(0);
398 
399 }
400 
401 /*
402  * reslocal routine.  Compute res = F(t, uu, up).  This routine assumes
403  * that all inter-processor communication of data needed to calculate F
404  *  has already been done, and that this data is in the work array uext.
405  */
406 
reslocal(sunindextype Nlocal,realtype tres,N_Vector uu,N_Vector up,N_Vector res,void * user_data)407 static int reslocal(sunindextype Nlocal, realtype tres,
408                     N_Vector uu, N_Vector up, N_Vector res,
409                     void *user_data)
410 {
411   realtype *uext, *uuv, *upv, *resv;
412   realtype termx, termy, termctr;
413   int lx, ly, offsetu, offsetue, locu, locue;
414   int ixsub, jysub, mxsub, mxsub2, mysub, npex, npey;
415   int ixbegin, ixend, jybegin, jyend;
416   UserData data;
417 
418   /* Get subgrid indices, array sizes, extended work array uext. */
419 
420   data = (UserData) user_data;
421   uext = data->uext;
422   uuv = N_VGetArrayPointer(uu);
423   upv = N_VGetArrayPointer(up);
424   resv = N_VGetArrayPointer(res);
425   ixsub = data->ixsub; jysub = data->jysub;
426   mxsub = data->mxsub; mxsub2 = data->mxsub + 2;
427   mysub = data->mysub; npex = data->npex; npey = data->npey;
428 
429   /* Initialize all elements of res to uu. This sets the boundary
430      elements simply without indexing hassles. */
431 
432   N_VScale(ONE, uu, res);
433 
434   /* Copy local segment of u vector into the working extended array uext.
435      This completes uext prior to the computation of the res vector.     */
436 
437   offsetu = 0;
438   offsetue = mxsub2 + 1;
439   for (ly = 0; ly < mysub; ly++) {
440     for (lx = 0; lx < mxsub; lx++) uext[offsetue+lx] = uuv[offsetu+lx];
441     offsetu = offsetu + mxsub;
442     offsetue = offsetue + mxsub2;
443   }
444 
445   /* Set loop limits for the interior of the local subgrid. */
446 
447   ixbegin = 0;
448   ixend   = mxsub-1;
449   jybegin = 0;
450   jyend   = mysub-1;
451   if (ixsub == 0) ixbegin++;
452   if (ixsub == npex-1) ixend--;
453   if (jysub == 0) jybegin++;
454   if (jysub == npey-1) jyend--;
455 
456   /* Loop over all grid points in local subgrid. */
457 
458   for (ly = jybegin; ly <=jyend; ly++) {
459     for (lx = ixbegin; lx <= ixend; lx++) {
460       locu  = lx + ly*mxsub;
461       locue = (lx+1) + (ly+1)*mxsub2;
462       termx = data->coeffx *(uext[locue-1]      + uext[locue+1]);
463       termy = data->coeffy *(uext[locue-mxsub2] + uext[locue+mxsub2]);
464       termctr = data->coeffxy*uext[locue];
465       resv[locu] = upv[locu] - (termx + termy - termctr);
466    }
467   }
468   return(0);
469 
470 }
471 
472 /*
473  * Routine to send boundary data to neighboring PEs.
474  */
475 
BSend(MPI_Comm comm,int thispe,int ixsub,int jysub,int dsizex,int dsizey,realtype uarray[])476 static int BSend(MPI_Comm comm, int thispe, int ixsub,
477                  int jysub, int dsizex, int dsizey,
478                  realtype uarray[])
479 {
480   int ly, offsetu;
481   realtype bufleft[MYSUB], bufright[MYSUB];
482 
483   /* If jysub > 0, send data from bottom x-line of u. */
484 
485   if (jysub != 0)
486     MPI_Send(&uarray[0], dsizex, MPI_SUNREALTYPE, thispe-NPEX, 0, comm);
487 
488   /* If jysub < NPEY-1, send data from top x-line of u. */
489 
490   if (jysub != NPEY-1) {
491     offsetu = (MYSUB-1)*dsizex;
492     MPI_Send(&uarray[offsetu], dsizex, MPI_SUNREALTYPE,
493              thispe+NPEX, 0, comm);
494   }
495 
496   /* If ixsub > 0, send data from left y-line of u (via bufleft). */
497 
498   if (ixsub != 0) {
499     for (ly = 0; ly < MYSUB; ly++) {
500       offsetu = ly*dsizex;
501       bufleft[ly] = uarray[offsetu];
502     }
503     MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, thispe-1, 0, comm);
504   }
505 
506   /* If ixsub < NPEX-1, send data from right y-line of u (via bufright). */
507 
508   if (ixsub != NPEX-1) {
509     for (ly = 0; ly < MYSUB; ly++) {
510       offsetu = ly*MXSUB + (MXSUB-1);
511       bufright[ly] = uarray[offsetu];
512     }
513     MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, thispe+1, 0, comm);
514   }
515 
516   return(0);
517 
518 }
519 
520 /*
521  * Routine to start receiving boundary data from neighboring PEs.
522  * Notes:
523  *   1) buffer should be able to hold 2*MYSUB realtype entries, should be
524  *      passed to both the BRecvPost and BRecvWait functions, and should not
525  *      be manipulated between the two calls.
526  *   2) request should have 4 entries, and should be passed in
527  *      both calls also.
528  */
529 
BRecvPost(MPI_Comm comm,MPI_Request request[],int thispe,int ixsub,int jysub,int dsizex,int dsizey,realtype uext[],realtype buffer[])530 static int BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
531                      int ixsub, int jysub,
532                      int dsizex, int dsizey,
533                      realtype uext[], realtype buffer[])
534 {
535   int offsetue;
536   /* Have bufleft and bufright use the same buffer. */
537   realtype *bufleft = buffer, *bufright = buffer+MYSUB;
538 
539   /* If jysub > 0, receive data for bottom x-line of uext. */
540   if (jysub != 0)
541     MPI_Irecv(&uext[1], dsizex, MPI_SUNREALTYPE,
542               thispe-NPEX, 0, comm, &request[0]);
543 
544   /* If jysub < NPEY-1, receive data for top x-line of uext. */
545   if (jysub != NPEY-1) {
546     offsetue = (1 + (MYSUB+1)*(MXSUB+2));
547     MPI_Irecv(&uext[offsetue], dsizex, MPI_SUNREALTYPE,
548               thispe+NPEX, 0, comm, &request[1]);
549   }
550 
551   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
552   if (ixsub != 0) {
553     MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
554               thispe-1, 0, comm, &request[2]);
555   }
556 
557   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
558   if (ixsub != NPEX-1) {
559     MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
560               thispe+1, 0, comm, &request[3]);
561   }
562 
563   return(0);
564 
565 }
566 
567 /*
568  * Routine to finish receiving boundary data from neighboring PEs.
569  * Notes:
570  *   1) buffer should be able to hold 2*MYSUB realtype entries, should be
571  *      passed to both the BRecvPost and BRecvWait functions, and should not
572  *      be manipulated between the two calls.
573  *   2) request should have four entries, and should be passed in both
574  *      calls also.
575  */
576 
BRecvWait(MPI_Request request[],int ixsub,int jysub,int dsizex,realtype uext[],realtype buffer[])577 static int BRecvWait(MPI_Request request[], int ixsub,
578                      int jysub, int dsizex, realtype uext[],
579                      realtype buffer[])
580 {
581   int ly, dsizex2, offsetue;
582   realtype *bufleft = buffer, *bufright = buffer+MYSUB;
583   MPI_Status status;
584 
585   dsizex2 = dsizex + 2;
586 
587   /* If jysub > 0, receive data for bottom x-line of uext. */
588   if (jysub != 0)
589     MPI_Wait(&request[0],&status);
590 
591   /* If jysub < NPEY-1, receive data for top x-line of uext. */
592   if (jysub != NPEY-1)
593     MPI_Wait(&request[1],&status);
594 
595   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
596   if (ixsub != 0) {
597     MPI_Wait(&request[2],&status);
598 
599     /* Copy the buffer to uext. */
600     for (ly = 0; ly < MYSUB; ly++) {
601       offsetue = (ly+1)*dsizex2;
602       uext[offsetue] = bufleft[ly];
603     }
604   }
605 
606   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
607   if (ixsub != NPEX-1) {
608     MPI_Wait(&request[3],&status);
609 
610     /* Copy the buffer to uext */
611     for (ly = 0; ly < MYSUB; ly++) {
612       offsetue = (ly+2)*dsizex2 - 1;
613       uext[offsetue] = bufright[ly];
614     }
615   }
616 
617   return(0);
618 }
619 
620 /*
621  *--------------------------------------------------------------------
622  * PRIVATE FUNCTIONS
623  *--------------------------------------------------------------------
624  */
625 
626 /*
627  * InitUserData initializes the user's data block data.
628  */
629 
InitUserData(int thispe,MPI_Comm comm,UserData data)630 static int InitUserData(int thispe, MPI_Comm comm, UserData data)
631 {
632   data->thispe = thispe;
633   data->dx = ONE/(MX-ONE);       /* Assumes a [0,1] interval in x. */
634   data->dy = ONE/(MY-ONE);       /* Assumes a [0,1] interval in y. */
635   data->coeffx  = ONE/(data->dx * data->dx);
636   data->coeffy  = ONE/(data->dy * data->dy);
637   data->coeffxy = TWO/(data->dx * data->dx) + TWO/(data->dy * data->dy) ;
638   data->jysub   = thispe/NPEX;
639   data->ixsub   = thispe - data->jysub * NPEX;
640   data->npex    = NPEX;
641   data->npey    = NPEY;
642   data->mx      = MX;
643   data->my      = MY;
644   data->mxsub   = MXSUB;
645   data->mysub   = MYSUB;
646   data->comm    = comm;
647   data->n_local = MXSUB*MYSUB;
648   return(0);
649 
650 }
651 
652 /*
653  * SetInitialProfile sets the initial values for the problem.
654  */
655 
SetInitialProfile(N_Vector uu,N_Vector up,N_Vector id,N_Vector res,UserData data)656 static int SetInitialProfile(N_Vector uu, N_Vector up,  N_Vector id,
657                              N_Vector res, UserData data)
658 {
659   int i, iloc, j, jloc, offset, loc, ixsub, jysub;
660   int ixbegin, ixend, jybegin, jyend;
661   realtype xfact, yfact, *udata, *iddata;
662 
663   /* Initialize uu. */
664 
665   udata = N_VGetArrayPointer(uu);
666   iddata = N_VGetArrayPointer(id);
667 
668   /* Set mesh spacings and subgrid indices for this PE. */
669   ixsub = data->ixsub;
670   jysub = data->jysub;
671 
672   /* Set beginning and ending locations in the global array corresponding
673      to the portion of that array assigned to this processor. */
674   ixbegin = MXSUB*ixsub;
675   ixend   = MXSUB*(ixsub+1) - 1;
676   jybegin = MYSUB*jysub;
677   jyend   = MYSUB*(jysub+1) - 1;
678 
679   /* Loop over the local array, computing the initial profile value.
680      The global indices are (i,j) and the local indices are (iloc,jloc).
681      Also set the id vector to zero for boundary points, one otherwise. */
682 
683   N_VConst(ONE,id);
684   for (j = jybegin, jloc = 0; j <= jyend; j++, jloc++) {
685     yfact = data->dy*j;
686     offset= jloc*MXSUB;
687     for (i = ixbegin, iloc = 0; i <= ixend; i++, iloc++) {
688       xfact = data->dx * i;
689       loc = offset + iloc;
690       udata[loc] = RCONST(16.0) * xfact * (ONE - xfact) * yfact * (ONE - yfact);
691       if (i == 0 || i == MX-1 || j == 0 || j == MY-1) iddata[loc] = ZERO;
692     }
693   }
694 
695   /* Initialize up. */
696 
697   N_VConst(ZERO, up);    /* Initially set up = 0. */
698 
699   /* heatres sets res to negative of ODE RHS values at interior points. */
700   heatres(ZERO, uu, up, res, data);
701 
702   /* Copy -res into up to get correct initial up values. */
703   N_VScale(-ONE, res, up);
704 
705   return(0);
706 
707 }
708 
709 /*
710  * Print first lines of output (problem description)
711  * and table heading
712  */
713 
PrintHeader(sunindextype Neq,realtype rtol,realtype atol)714 static void PrintHeader(sunindextype Neq, realtype rtol, realtype atol)
715 {
716     printf("\nidaHeat2D_kry_bbd_p: Heat equation, parallel example problem for IDA\n");
717     printf("                Discretized heat equation on 2D unit square.\n");
718     printf("                Zero boundary conditions,");
719     printf(" polynomial initial conditions.\n");
720     printf("                Mesh dimensions: %d x %d", MX, MY);
721     printf("        Total system size: %ld\n\n", (long int) Neq);
722 
723     printf("Subgrid dimensions: %d x %d", MXSUB, MYSUB);
724     printf("         Processor array: %d x %d\n", NPEX, NPEY);
725 #if defined(SUNDIALS_EXTENDED_PRECISION)
726     printf("Tolerance parameters:  rtol = %Lg   atol = %Lg\n", rtol, atol);
727 #elif defined(SUNDIALS_DOUBLE_PRECISION)
728     printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
729 #else
730     printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
731 #endif
732     printf("Constraints set to force all solution components >= 0. \n");
733     printf("SUPPRESSALG = SUNTRUE to suppress local error testing on");
734     printf(" all boundary components. \n");
735     printf("Linear solver: SUNLinSol_SPGMR.    ");
736     printf("Preconditioner: IDABBDPRE - Banded-block-diagonal.\n");
737 
738 }
739 
740 /*
741  * Print case and table header
742  */
743 
PrintCase(int case_number,sunindextype mudq,sunindextype mukeep)744 static void PrintCase(int case_number, sunindextype mudq, sunindextype mukeep)
745 {
746   printf("\n\nCase %1d. \n", case_number);
747   printf("   Difference quotient half-bandwidths = %ld", (long int) mudq);
748   printf("   Retained matrix half-bandwidths = %ld \n", (long int) mukeep);
749 
750   /* Print output table heading and initial line of table. */
751   printf("\n   Output Summary (umax = max-norm of solution) \n\n");
752   printf("  time     umax       k  nst  nni  nli   nre nreLS nge     h      npe nps\n");
753   printf(" .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .\n");
754 }
755 
756 /*
757  * Print integrator statistics and max-norm of solution
758  */
759 
PrintOutput(int id,void * ida_mem,realtype t,N_Vector uu)760 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu)
761 {
762   realtype umax, hused;
763   int kused, retval;
764   long int nst, nni, nre, nli, npe, nps, nreLS, nge;
765 
766   umax = N_VMaxNorm(uu);
767 
768   if (id == 0) {
769 
770     retval = IDAGetLastOrder(ida_mem, &kused);
771     check_retval(&retval, "IDAGetLastOrder", 1, id);
772     retval = IDAGetNumSteps(ida_mem, &nst);
773     check_retval(&retval, "IDAGetNumSteps", 1, id);
774     retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
775     check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, id);
776     retval = IDAGetNumResEvals(ida_mem, &nre);
777     check_retval(&retval, "IDAGetNumResEvals", 1, id);
778     retval = IDAGetLastStep(ida_mem, &hused);
779     check_retval(&retval, "IDAGetLastStep", 1, id);
780     retval = IDAGetNumLinIters(ida_mem, &nli);
781     check_retval(&retval, "IDAGetNumLinIters", 1, id);
782     retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
783     check_retval(&retval, "IDAGetNumLinResEvals", 1, id);
784     retval = IDABBDPrecGetNumGfnEvals(ida_mem, &nge);
785     check_retval(&retval, "IDABBDPrecGetNumGfnEvals", 1, id);
786     retval = IDAGetNumPrecEvals(ida_mem, &npe);
787     check_retval(&retval, "IDAGetNumPrecEvals", 1, id);
788     retval = IDAGetNumPrecSolves(ida_mem, &nps);
789     check_retval(&retval, "IDAGetNumPrecSolves", 1, id);
790 
791 #if defined(SUNDIALS_EXTENDED_PRECISION)
792     printf(" %5.2Lf %13.5Le  %d  %3ld  %3ld  %3ld  %4ld %4ld %4ld %9.2Le  %3ld %3ld\n",
793            t, umax, kused, nst, nni, nli, nre, nreLS, nge, hused, npe, nps);
794 #elif defined(SUNDIALS_DOUBLE_PRECISION)
795     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld %4ld %4ld %9.2e  %3ld %3ld\n",
796            t, umax, kused, nst, nni, nli, nre, nreLS, nge, hused, npe, nps);
797 #else
798     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld %4ld %4ld %9.2e  %3ld %3ld\n",
799            t, umax, kused, nst, nni, nli, nre, nreLS, nge, hused, npe, nps);
800 #endif
801 
802   }
803 }
804 
805 /*
806  * Print some final integrator statistics
807  */
808 
PrintFinalStats(void * ida_mem)809 static void PrintFinalStats(void *ida_mem)
810 {
811   long int netf, ncfn, ncfl;
812 
813   IDAGetNumErrTestFails(ida_mem, &netf);
814   IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
815   IDAGetNumLinConvFails(ida_mem, &ncfl);
816 
817   printf("\nError test failures            = %ld\n", netf);
818   printf("Nonlinear convergence failures = %ld\n", ncfn);
819   printf("Linear convergence failures    = %ld\n", ncfl);
820 }
821 
822 /*
823  * Check function return value...
824  *   opt == 0 means SUNDIALS function allocates memory so check if
825  *            returned NULL pointer
826  *   opt == 1 means SUNDIALS function returns an integer value so check if
827  *            retval < 0
828  *   opt == 2 means function allocates memory so check if returned
829  *            NULL pointer
830  */
831 
check_retval(void * returnvalue,const char * funcname,int opt,int id)832 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
833 {
834   int *retval;
835 
836   /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
837   if (opt == 0 && returnvalue == NULL) {
838     fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n", id, funcname);
839     return(1); }
840 
841   /* Check if retval < 0 */
842   else if (opt == 1) {
843     retval = (int *) returnvalue;
844     if (*retval < 0) {
845       fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n", id, funcname, *retval);
846       return(1); }}
847 
848   /* Check if function returned NULL pointer - no memory allocated */
849   else if (opt == 2 && returnvalue == NULL) {
850     fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n", id, funcname);
851     return(1); }
852 
853   return(0);
854 }
855