1 /*
2  * -----------------------------------------------------------------
3  * Programmer(s): S. D. Cohen, A. C. Hindmarsh, M. R. Wittman, and
4  *                Radu Serban  @ LLNL
5  * -----------------------------------------------------------------
6  * SUNDIALS Copyright Start
7  * Copyright (c) 2002-2021, Lawrence Livermore National Security
8  * and Southern Methodist University.
9  * All rights reserved.
10  *
11  * See the top-level LICENSE and NOTICE files for details.
12  *
13  * SPDX-License-Identifier: BSD-3-Clause
14  * SUNDIALS Copyright End
15  * -----------------------------------------------------------------
16  * Example problem:
17  *
18  * An ODE system is generated from the following 2-species diurnal
19  * kinetics advection-diffusion PDE system in 2 space dimensions:
20  *
21  * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
22  *                 + Ri(c1,c2,t)      for i = 1,2,   where
23  *   R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
24  *   R2(c1,c2,t) =  q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
25  *   Kv(y) = Kv0*exp(y/5) ,
26  * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
27  * vary diurnally. The problem is posed on the square
28  *   0 <= x <= 20,    30 <= y <= 50   (all in km),
29  * with homogeneous Neumann boundary conditions, and for time t in
30  *   0 <= t <= 86400 sec (1 day).
31  * The PDE system is treated by central differences on a uniform
32  * mesh, with simple polynomial initial profiles.
33  *
34  * The problem is solved by CVODE on NPE processors, treated
35  * as a rectangular process grid of size NPEX by NPEY, with
36  * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
37  * by MYSUB of the (x,y) mesh. Thus the actual mesh sizes are
38  * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
39  * neq = 2*MX*MY.
40  *
41  * The solution is done with the BDF/GMRES method (i.e. using the
42  * SUNLinSol_SPGMR linear solver) and a block-diagonal matrix with
43  * banded blocks as a preconditioner, using the CVBBDPRE module.
44  * Each block is generated using difference quotients, with
45  * half-bandwidths mudq = mldq = 2*MXSUB, but the retained banded
46  * blocks have half-bandwidths mukeep = mlkeep = 2.
47  * A copy of the approximate Jacobian is saved and conditionally
48  * reused within the preconditioner routine.
49  *
50  * The problem is solved twice -- with left and right preconditioning.
51  *
52  * Performance data and sampled solution values are printed at
53  * selected output times, and all performance counters are printed
54  * on completion.
55  *
56  * This version uses MPI for user routines.
57  * Execute with number of processors = NPEX*NPEY (see constants below).
58  * --------------------------------------------------------------------
59  */
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <math.h>
64 
65 #include <cvode/cvode.h>               /* prototypes for CVODE fcts., consts.  */
66 #include <nvector/nvector_parallel.h>  /* access to MPI-parallel N_Vector      */
67 #include <sunlinsol/sunlinsol_spgmr.h> /* access to SPGMR SUNLinearSolver      */
68 #include <cvode/cvode_bbdpre.h>        /* access to CVBBDPRE module            */
69 #include <sundials/sundials_types.h>   /* definitions of realtype, booleantype */
70 
71 #include <mpi.h> /* MPI constants and types */
72 
73 /* helpful macros */
74 
75 #ifndef SQR
76 #define SQR(A) ((A)*(A))
77 #endif
78 
79 /* Problem Constants */
80 
81 #define ZERO         RCONST(0.0)
82 
83 #define NVARS        2                 /* number of species         */
84 #define KH           RCONST(4.0e-6)    /* horizontal diffusivity Kh */
85 #define VEL          RCONST(0.001)     /* advection velocity V      */
86 #define KV0          RCONST(1.0e-8)    /* coefficient in Kv(y)      */
87 #define Q1           RCONST(1.63e-16)  /* coefficients q1, q2, c3   */
88 #define Q2           RCONST(4.66e-16)
89 #define C3           RCONST(3.7e16)
90 #define A3           RCONST(22.62)     /* coefficient in expression for q3(t) */
91 #define A4           RCONST(7.601)     /* coefficient in expression for q4(t) */
92 #define C1_SCALE     RCONST(1.0e6)     /* coefficients in initial profiles    */
93 #define C2_SCALE     RCONST(1.0e12)
94 
95 #define T0           ZERO                /* initial time */
96 #define NOUT         12                  /* number of output times */
97 #define TWOHR        RCONST(7200.0)      /* number of seconds in two hours  */
98 #define HALFDAY      RCONST(4.32e4)      /* number of seconds in a half day */
99 #define PI       RCONST(3.1415926535898) /* pi */
100 
101 #define XMIN         ZERO                /* grid boundaries in x  */
102 #define XMAX         RCONST(20.0)
103 #define YMIN         RCONST(30.0)        /* grid boundaries in y  */
104 #define YMAX         RCONST(50.0)
105 
106 #define NPEX         2              /* no. PEs in x direction of PE array */
107 #define NPEY         2              /* no. PEs in y direction of PE array */
108                                     /* Total no. PEs = NPEX*NPEY */
109 #define MXSUB        5              /* no. x points per subgrid */
110 #define MYSUB        5              /* no. y points per subgrid */
111 
112 #define MX           (NPEX*MXSUB)   /* MX = number of x mesh points */
113 #define MY           (NPEY*MYSUB)   /* MY = number of y mesh points */
114                                     /* Spatial mesh is MX by MY */
115 /* CVodeInit Constants */
116 
117 #define RTOL    RCONST(1.0e-5)    /* scalar relative tolerance */
118 #define FLOOR   RCONST(100.0)     /* value of C1 or C2 at which tolerances */
119                                   /* change from relative to absolute      */
120 #define ATOL    (RTOL*FLOOR)      /* scalar absolute tolerance */
121 
122 /* Type : UserData
123    contains problem constants, extended dependent variable array,
124    grid constants, processor indices, MPI communicator */
125 
126 typedef struct {
127   realtype q4, om, dx, dy, hdco, haco, vdco;
128   realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
129   int my_pe, isubx, isuby;
130   sunindextype nvmxsub, nvmxsub2, Nlocal;
131   MPI_Comm comm;
132 } *UserData;
133 
134 /* Prototypes of private helper functions */
135 
136 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
137                          UserData data);
138 static void SetInitialProfiles(N_Vector u, UserData data);
139 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
140 		       sunindextype mukeep, sunindextype mlkeep);
141 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
142                         N_Vector u, realtype t);
143 static void PrintFinalStats(void *cvode_mem);
144 static void BSend(MPI_Comm comm,
145                   int my_pe, int isubx, int isuby,
146                   sunindextype dsizex, sunindextype dsizey,
147                   realtype uarray[]);
148 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
149                       int my_pe, int isubx, int isuby,
150 		      sunindextype dsizex, sunindextype dsizey,
151 		      realtype uext[], realtype buffer[]);
152 static void BRecvWait(MPI_Request request[],
153                       int isubx, int isuby,
154                       sunindextype dsizex, realtype uext[],
155                       realtype buffer[]);
156 
157 static void fucomm(realtype t, N_Vector u, void *user_data);
158 
159 /* Prototype of function called by the solver */
160 
161 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
162 
163 /* Prototype of functions called by the CVBBDPRE module */
164 
165 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
166                   N_Vector udot, void *user_data);
167 
168 /* Private function to check function return values */
169 
170 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
171 
172 /***************************** Main Program ******************************/
173 
main(int argc,char * argv[])174 int main(int argc, char *argv[])
175 {
176   UserData data;
177   SUNLinearSolver LS;
178   void *cvode_mem;
179   realtype abstol, reltol, t, tout;
180   N_Vector u;
181   int iout, my_pe, npes, retval, jpre;
182   sunindextype neq, local_N, mudq, mldq, mukeep, mlkeep;
183   MPI_Comm comm;
184 
185   data = NULL;
186   LS = NULL;
187   cvode_mem = NULL;
188   u = NULL;
189 
190   /* Set problem size neq */
191   neq = NVARS*MX*MY;
192 
193   /* Get processor number and total number of pe's */
194   MPI_Init(&argc, &argv);
195   comm = MPI_COMM_WORLD;
196   MPI_Comm_size(comm, &npes);
197   MPI_Comm_rank(comm, &my_pe);
198 
199   if (npes != NPEX*NPEY) {
200     if (my_pe == 0)
201       fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
202               npes, NPEX*NPEY);
203     MPI_Finalize();
204     return(1);
205   }
206 
207   /* Set local length */
208   local_N = NVARS*MXSUB*MYSUB;
209 
210   /* Allocate and load user data block */
211   data = (UserData) malloc(sizeof *data);
212   if(check_retval((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
213   InitUserData(my_pe, local_N, comm, data);
214 
215   /* Allocate and initialize u, and set tolerances */
216   u = N_VNew_Parallel(comm, local_N, neq);
217   if(check_retval((void *)u, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
218   SetInitialProfiles(u, data);
219   abstol = ATOL;
220   reltol = RTOL;
221 
222   /* Call CVodeCreate to create the solver memory and specify the
223    * Backward Differentiation Formula */
224   cvode_mem = CVodeCreate(CV_BDF);
225   if(check_retval((void *)cvode_mem, "CVodeCreate", 0, my_pe)) MPI_Abort(comm, 1);
226 
227   /* Set the pointer to user-defined data */
228   retval = CVodeSetUserData(cvode_mem, data);
229   if(check_retval(&retval, "CVodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
230 
231   /* Call CVodeInit to initialize the integrator memory and specify the
232    * user's right hand side function in u'=f(t,u), the inital time T0, and
233    * the initial dependent variable vector u. */
234   retval = CVodeInit(cvode_mem, f, T0, u);
235   if(check_retval(&retval, "CVodeInit", 1, my_pe)) return(1);
236 
237   /* Call CVodeSStolerances to specify the scalar relative tolerance
238    * and scalar absolute tolerances */
239   retval = CVodeSStolerances(cvode_mem, reltol, abstol);
240   if (check_retval(&retval, "CVodeSStolerances", 1, my_pe)) return(1);
241 
242   /* Create SPGMR solver structure -- use left preconditioning
243      and the default Krylov dimension maxl */
244   LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
245   if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
246 
247   /* Attach SPGMR solver structure to CVode interface */
248   retval = CVodeSetLinearSolver(cvode_mem, LS, NULL);
249   if (check_retval(&retval, "CVodeSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
250 
251   /* Initialize BBD preconditioner */
252   mudq = mldq = NVARS*MXSUB;
253   mukeep = mlkeep = NVARS;
254   retval = CVBBDPrecInit(cvode_mem, local_N, mudq, mldq,
255                        mukeep, mlkeep, ZERO, flocal, NULL);
256   if(check_retval(&retval, "CVBBDPrecInit", 1, my_pe)) MPI_Abort(comm, 1);
257 
258   /* Print heading */
259   if (my_pe == 0) PrintIntro(npes, mudq, mldq, mukeep, mlkeep);
260 
261   /* Loop over jpre (= PREC_LEFT, PREC_RIGHT), and solve the problem */
262   for (jpre = PREC_LEFT; jpre <= PREC_RIGHT; jpre++) {
263 
264   /* On second run, re-initialize u, the integrator, CVBBDPRE, and SPGMR */
265 
266   if (jpre == PREC_RIGHT) {
267 
268     SetInitialProfiles(u, data);
269 
270     retval = CVodeReInit(cvode_mem, T0, u);
271     if(check_retval(&retval, "CVodeReInit", 1, my_pe)) MPI_Abort(comm, 1);
272 
273     retval = CVBBDPrecReInit(cvode_mem, mudq, mldq, ZERO);
274     if(check_retval(&retval, "CVBBDPrecReInit", 1, my_pe)) MPI_Abort(comm, 1);
275 
276     retval = SUNLinSol_SPGMRSetPrecType(LS, PREC_RIGHT);
277     if(check_retval(&retval, "SUNLinSol_SPGMRSetPrecType", 1, my_pe)) MPI_Abort(comm, 1);
278 
279     if (my_pe == 0) {
280       printf("\n\n-------------------------------------------------------");
281       printf("------------\n");
282     }
283 
284   }
285 
286 
287   if (my_pe == 0) {
288     printf("\n\nPreconditioner type is:  jpre = %s\n\n",
289 	   (jpre == PREC_LEFT) ? "PREC_LEFT" : "PREC_RIGHT");
290   }
291 
292   /* In loop over output points, call CVode, print results, test for error */
293 
294   for (iout = 1, tout = TWOHR; iout <= NOUT; iout++, tout += TWOHR) {
295     retval = CVode(cvode_mem, tout, u, &t, CV_NORMAL);
296     if(check_retval(&retval, "CVode", 1, my_pe)) break;
297     PrintOutput(cvode_mem, my_pe, comm, u, t);
298   }
299 
300   /* Print final statistics */
301 
302   if (my_pe == 0) PrintFinalStats(cvode_mem);
303 
304   } /* End of jpre loop */
305 
306   /* Free memory */
307   N_VDestroy(u);
308   free(data);
309   CVodeFree(&cvode_mem);
310   SUNLinSolFree(LS);
311 
312   MPI_Finalize();
313 
314   return(0);
315 }
316 
317 /*********************** Private Helper Functions ************************/
318 
319 /* Load constants in data */
320 
InitUserData(int my_pe,sunindextype local_N,MPI_Comm comm,UserData data)321 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
322                          UserData data)
323 {
324   int isubx, isuby;
325 
326   /* Set problem constants */
327   data->om = PI/HALFDAY;
328   data->dx = (XMAX-XMIN)/((realtype)(MX-1));
329   data->dy = (YMAX-YMIN)/((realtype)(MY-1));
330   data->hdco = KH/SQR(data->dx);
331   data->haco = VEL/(RCONST(2.0)*data->dx);
332   data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
333 
334   /* Set machine-related constants */
335   data->comm = comm;
336   data->my_pe = my_pe;
337   data->Nlocal = local_N;
338   /* isubx and isuby are the PE grid indices corresponding to my_pe */
339   isuby = my_pe/NPEX;
340   isubx = my_pe - isuby*NPEX;
341   data->isubx = isubx;
342   data->isuby = isuby;
343   /* Set the sizes of a boundary x-line in u and uext */
344   data->nvmxsub = NVARS*MXSUB;
345   data->nvmxsub2 = NVARS*(MXSUB+2);
346 }
347 
348 /* Set initial conditions in u */
349 
SetInitialProfiles(N_Vector u,UserData data)350 static void SetInitialProfiles(N_Vector u, UserData data)
351 {
352   int isubx, isuby;
353   int lx, ly, jx, jy;
354   sunindextype offset;
355   realtype dx, dy, x, y, cx, cy, xmid, ymid;
356   realtype *uarray;
357 
358   /* Set pointer to data array in vector u */
359 
360   uarray = N_VGetArrayPointer(u);
361 
362   /* Get mesh spacings, and subgrid indices for this PE */
363 
364   dx = data->dx;         dy = data->dy;
365   isubx = data->isubx;   isuby = data->isuby;
366 
367   /* Load initial profiles of c1 and c2 into local u vector.
368   Here lx and ly are local mesh point indices on the local subgrid,
369   and jx and jy are the global mesh point indices. */
370 
371   offset = 0;
372   xmid = RCONST(0.5)*(XMIN + XMAX);
373   ymid = RCONST(0.5)*(YMIN + YMAX);
374   for (ly = 0; ly < MYSUB; ly++) {
375     jy = ly + isuby*MYSUB;
376     y = YMIN + jy*dy;
377     cy = SQR(RCONST(0.1)*(y - ymid));
378     cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
379     for (lx = 0; lx < MXSUB; lx++) {
380       jx = lx + isubx*MXSUB;
381       x = XMIN + jx*dx;
382       cx = SQR(RCONST(0.1)*(x - xmid));
383       cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
384       uarray[offset  ] = C1_SCALE*cx*cy;
385       uarray[offset+1] = C2_SCALE*cx*cy;
386       offset = offset + 2;
387     }
388   }
389 }
390 
391 /* Print problem introduction */
392 
PrintIntro(int npes,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep)393 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
394 		       sunindextype mukeep, sunindextype mlkeep)
395 {
396   printf("\n2-species diurnal advection-diffusion problem\n");
397   printf("  %d by %d mesh on %d processors\n", MX, MY, npes);
398   printf("  Using CVBBDPRE preconditioner module\n");
399   printf("    Difference-quotient half-bandwidths are");
400   printf(" mudq = %ld,  mldq = %ld\n", (long int) mudq, (long int) mldq);
401   printf("    Retained band block half-bandwidths are");
402   printf(" mukeep = %ld,  mlkeep = %ld", (long int) mukeep, (long int) mlkeep);
403 
404   return;
405 }
406 
407 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
408 
PrintOutput(void * cvode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)409 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
410                         N_Vector u, realtype t)
411 {
412   int qu, retval, npelast;
413   long int nst;
414   sunindextype i0, i1;
415   realtype hu, *uarray, tempu[2];
416   MPI_Status status;
417 
418   npelast = NPEX*NPEY - 1;
419   uarray = N_VGetArrayPointer(u);
420 
421   /* Send c1,c2 at top right mesh point to PE 0 */
422   if (my_pe == npelast) {
423     i0 = NVARS*MXSUB*MYSUB - 2;
424     i1 = i0 + 1;
425     if (npelast != 0)
426       MPI_Send(&uarray[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
427     else {
428       tempu[0] = uarray[i0];
429       tempu[1] = uarray[i1];
430     }
431   }
432 
433   /* On PE 0, receive c1,c2 at top right, then print performance data
434      and sampled solution values */
435   if (my_pe == 0) {
436     if (npelast != 0)
437       MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
438     retval = CVodeGetNumSteps(cvode_mem, &nst);
439     check_retval(&retval, "CVodeGetNumSteps", 1, my_pe);
440     retval = CVodeGetLastOrder(cvode_mem, &qu);
441     check_retval(&retval, "CVodeGetLastOrder", 1, my_pe);
442     retval = CVodeGetLastStep(cvode_mem, &hu);
443     check_retval(&retval, "CVodeGetLastStep", 1, my_pe);
444 #if defined(SUNDIALS_EXTENDED_PRECISION)
445     printf("t = %.2Le   no. steps = %ld   order = %d   stepsize = %.2Le\n",
446            t, nst, qu, hu);
447     printf("At bottom left:  c1, c2 = %12.3Le %12.3Le \n", uarray[0], uarray[1]);
448     printf("At top right:    c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
449 #elif defined(SUNDIALS_DOUBLE_PRECISION)
450     printf("t = %.2e   no. steps = %ld   order = %d   stepsize = %.2e\n",
451            t, nst, qu, hu);
452     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
453     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
454 #else
455     printf("t = %.2e   no. steps = %ld   order = %d   stepsize = %.2e\n",
456            t, nst, qu, hu);
457     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
458     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
459 #endif
460   }
461 }
462 
463 /* Print final statistics contained in iopt */
464 
PrintFinalStats(void * cvode_mem)465 static void PrintFinalStats(void *cvode_mem)
466 {
467   long int lenrw, leniw ;
468   long int lenrwLS, leniwLS;
469   long int lenrwBBDP, leniwBBDP;
470   long int nst, nfe, nsetups, nni, ncfn, netf, ngevalsBBDP;
471   long int nli, npe, nps, ncfl, nfeLS;
472   int retval;
473 
474   retval = CVodeGetWorkSpace(cvode_mem, &lenrw, &leniw);
475   check_retval(&retval, "CVodeGetWorkSpace", 1, 0);
476   retval = CVodeGetNumSteps(cvode_mem, &nst);
477   check_retval(&retval, "CVodeGetNumSteps", 1, 0);
478   retval = CVodeGetNumRhsEvals(cvode_mem, &nfe);
479   check_retval(&retval, "CVodeGetNumRhsEvals", 1, 0);
480   retval = CVodeGetNumLinSolvSetups(cvode_mem, &nsetups);
481   check_retval(&retval, "CVodeGetNumLinSolvSetups", 1, 0);
482   retval = CVodeGetNumErrTestFails(cvode_mem, &netf);
483   check_retval(&retval, "CVodeGetNumErrTestFails", 1, 0);
484   retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni);
485   check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, 0);
486   retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn);
487   check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, 0);
488 
489   retval = CVodeGetLinWorkSpace(cvode_mem, &lenrwLS, &leniwLS);
490   check_retval(&retval, "CVodeGetLinWorkSpace", 1, 0);
491   retval = CVodeGetNumLinIters(cvode_mem, &nli);
492   check_retval(&retval, "CVodeGetNumLinIters", 1, 0);
493   retval = CVodeGetNumPrecEvals(cvode_mem, &npe);
494   check_retval(&retval, "CVodeGetNumPrecEvals", 1, 0);
495   retval = CVodeGetNumPrecSolves(cvode_mem, &nps);
496   check_retval(&retval, "CVodeGetNumPrecSolves", 1, 0);
497   retval = CVodeGetNumLinConvFails(cvode_mem, &ncfl);
498   check_retval(&retval, "CVodeGetNumLinConvFails", 1, 0);
499   retval = CVodeGetNumLinRhsEvals(cvode_mem, &nfeLS);
500   check_retval(&retval, "CVodeGetNumLinRhsEvals", 1, 0);
501 
502   printf("\nFinal Statistics: \n\n");
503   printf("lenrw   = %5ld     leniw   = %5ld\n"  , lenrw, leniw);
504   printf("lenrwls = %5ld     leniwls = %5ld\n"  , lenrwLS, leniwLS);
505   printf("nst     = %5ld\n"                     , nst);
506   printf("nfe     = %5ld     nfels   = %5ld\n"  , nfe, nfeLS);
507   printf("nni     = %5ld     nli     = %5ld\n"  , nni, nli);
508   printf("nsetups = %5ld     netf    = %5ld\n"  , nsetups, netf);
509   printf("npe     = %5ld     nps     = %5ld\n"  , npe, nps);
510   printf("ncfn    = %5ld     ncfl    = %5ld\n\n", ncfn, ncfl);
511 
512   retval = CVBBDPrecGetWorkSpace(cvode_mem, &lenrwBBDP, &leniwBBDP);
513   check_retval(&retval, "CVBBDPrecGetWorkSpace", 1, 0);
514   retval = CVBBDPrecGetNumGfnEvals(cvode_mem, &ngevalsBBDP);
515   check_retval(&retval, "CVBBDPrecGetNumGfnEvals", 1, 0);
516   printf("In CVBBDPRE: real/integer local work space sizes = %ld, %ld\n",
517 	 lenrwBBDP, leniwBBDP);
518   printf("             no. flocal evals. = %ld\n",ngevalsBBDP);
519 }
520 
521 /* Routine to send boundary data to neighboring PEs */
522 
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uarray[])523 static void BSend(MPI_Comm comm,
524                   int my_pe, int isubx, int isuby,
525                   sunindextype dsizex, sunindextype dsizey,
526                   realtype uarray[])
527 {
528   int i, ly;
529   sunindextype offsetu, offsetbuf;
530   realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
531 
532   /* If isuby > 0, send data from bottom x-line of u */
533 
534   if (isuby != 0)
535     MPI_Send(&uarray[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
536 
537   /* If isuby < NPEY-1, send data from top x-line of u */
538 
539   if (isuby != NPEY-1) {
540     offsetu = (MYSUB-1)*dsizex;
541     MPI_Send(&uarray[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
542   }
543 
544   /* If isubx > 0, send data from left y-line of u (via bufleft) */
545 
546   if (isubx != 0) {
547     for (ly = 0; ly < MYSUB; ly++) {
548       offsetbuf = ly*NVARS;
549       offsetu = ly*dsizex;
550       for (i = 0; i < NVARS; i++)
551         bufleft[offsetbuf+i] = uarray[offsetu+i];
552     }
553     MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
554   }
555 
556   /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
557 
558   if (isubx != NPEX-1) {
559     for (ly = 0; ly < MYSUB; ly++) {
560       offsetbuf = ly*NVARS;
561       offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
562       for (i = 0; i < NVARS; i++)
563         bufright[offsetbuf+i] = uarray[offsetu+i];
564     }
565     MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
566   }
567 
568 }
569 
570 /* Routine to start receiving boundary data from neighboring PEs.
571    Notes:
572    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
573    passed to both the BRecvPost and BRecvWait functions, and should not
574    be manipulated between the two calls.
575    2) request should have 4 entries, and should be passed in both calls also. */
576 
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])577 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
578                       int my_pe, int isubx, int isuby,
579 		      sunindextype dsizex, sunindextype dsizey,
580 		      realtype uext[], realtype buffer[])
581 {
582   sunindextype offsetue;
583   /* Have bufleft and bufright use the same buffer */
584   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
585 
586   /* If isuby > 0, receive data for bottom x-line of uext */
587   if (isuby != 0)
588     MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
589               my_pe-NPEX, 0, comm, &request[0]);
590 
591   /* If isuby < NPEY-1, receive data for top x-line of uext */
592   if (isuby != NPEY-1) {
593     offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
594     MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
595               my_pe+NPEX, 0, comm, &request[1]);
596   }
597 
598   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
599   if (isubx != 0) {
600     MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
601               my_pe-1, 0, comm, &request[2]);
602   }
603 
604   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
605   if (isubx != NPEX-1) {
606     MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
607               my_pe+1, 0, comm, &request[3]);
608   }
609 
610 }
611 
612 /* Routine to finish receiving boundary data from neighboring PEs.
613    Notes:
614    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
615    passed to both the BRecvPost and BRecvWait functions, and should not
616    be manipulated between the two calls.
617    2) request should have 4 entries, and should be passed in both calls also. */
618 
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])619 static void BRecvWait(MPI_Request request[],
620                       int isubx, int isuby,
621                       sunindextype dsizex, realtype uext[],
622                       realtype buffer[])
623 {
624   int i, ly;
625   sunindextype dsizex2, offsetue, offsetbuf;
626   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
627   MPI_Status status;
628 
629   dsizex2 = dsizex + 2*NVARS;
630 
631   /* If isuby > 0, receive data for bottom x-line of uext */
632   if (isuby != 0)
633     MPI_Wait(&request[0],&status);
634 
635   /* If isuby < NPEY-1, receive data for top x-line of uext */
636   if (isuby != NPEY-1)
637     MPI_Wait(&request[1],&status);
638 
639   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
640   if (isubx != 0) {
641     MPI_Wait(&request[2],&status);
642 
643     /* Copy the buffer to uext */
644     for (ly = 0; ly < MYSUB; ly++) {
645       offsetbuf = ly*NVARS;
646       offsetue = (ly+1)*dsizex2;
647       for (i = 0; i < NVARS; i++)
648         uext[offsetue+i] = bufleft[offsetbuf+i];
649     }
650   }
651 
652   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
653   if (isubx != NPEX-1) {
654     MPI_Wait(&request[3],&status);
655 
656     /* Copy the buffer to uext */
657     for (ly = 0; ly < MYSUB; ly++) {
658       offsetbuf = ly*NVARS;
659       offsetue = (ly+2)*dsizex2 - NVARS;
660       for (i = 0; i < NVARS; i++)
661 	uext[offsetue+i] = bufright[offsetbuf+i];
662     }
663   }
664 }
665 
666 /* fucomm routine.  This routine performs all inter-processor
667    communication of data in u needed to calculate f.         */
668 
fucomm(realtype t,N_Vector u,void * user_data)669 static void fucomm(realtype t, N_Vector u, void *user_data)
670 {
671   UserData data;
672   realtype *uarray, *uext, buffer[2*NVARS*MYSUB];
673   MPI_Comm comm;
674   int my_pe, isubx, isuby;
675   sunindextype nvmxsub, nvmysub;
676   MPI_Request request[4];
677 
678   data = (UserData) user_data;
679   uarray = N_VGetArrayPointer(u);
680 
681   /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
682 
683   comm = data->comm;  my_pe = data->my_pe;
684   isubx = data->isubx;   isuby = data->isuby;
685   nvmxsub = data->nvmxsub;
686   nvmysub = NVARS*MYSUB;
687   uext = data->uext;
688 
689   /* Start receiving boundary data from neighboring PEs */
690 
691   BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
692 
693   /* Send data from boundary of local grid to neighboring PEs */
694 
695   BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, uarray);
696 
697   /* Finish receiving boundary data from neighboring PEs */
698 
699   BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
700 }
701 
702 /***************** Function called by the solver **************************/
703 
704 /* f routine.  Evaluate f(t,y).  First call fucomm to do communication of
705    subgrid boundary data into uext.  Then calculate f by a call to flocal. */
706 
f(realtype t,N_Vector u,N_Vector udot,void * user_data)707 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
708 {
709   UserData data;
710 
711   data = (UserData) user_data;
712 
713   /* Call fucomm to do inter-processor communication */
714 
715   fucomm (t, u, user_data);
716 
717   /* Call flocal to calculate all right-hand sides */
718 
719   flocal (data->Nlocal, t, u, udot, user_data);
720 
721   return(0);
722 }
723 
724 /***************** Functions called by the CVBBDPRE module ****************/
725 
726 /* flocal routine.  Compute f(t,y).  This routine assumes that all
727    inter-processor communication of data needed to calculate f has already
728    been done, and this data is in the work array uext.                    */
729 
flocal(sunindextype Nlocal,realtype t,N_Vector u,N_Vector udot,void * user_data)730 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
731                   N_Vector udot, void *user_data)
732 {
733   realtype *uext;
734   realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
735   realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
736   realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
737   realtype q4coef, dely, verdco, hordco, horaco;
738   int i, lx, ly, jy;
739   int isubx, isuby;
740   sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
741   UserData data;
742   realtype *uarray, *duarray;
743 
744   uarray = N_VGetArrayPointer(u);
745   duarray = N_VGetArrayPointer(udot);
746 
747   /* Get subgrid indices, array sizes, extended work array uext */
748 
749   data = (UserData) user_data;
750   isubx = data->isubx;   isuby = data->isuby;
751   nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
752   uext = data->uext;
753 
754   /* Copy local segment of u vector into the working extended array uext */
755 
756   offsetu = 0;
757   offsetue = nvmxsub2 + NVARS;
758   for (ly = 0; ly < MYSUB; ly++) {
759     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
760     offsetu = offsetu + nvmxsub;
761     offsetue = offsetue + nvmxsub2;
762   }
763 
764   /* To facilitate homogeneous Neumann boundary conditions, when this is
765   a boundary PE, copy data from the first interior mesh line of u to uext */
766 
767   /* If isuby = 0, copy x-line 2 of u to uext */
768   if (isuby == 0) {
769     for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = uarray[nvmxsub+i];
770   }
771 
772   /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
773   if (isuby == NPEY-1) {
774     offsetu = (MYSUB-2)*nvmxsub;
775     offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
776     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
777   }
778 
779   /* If isubx = 0, copy y-line 2 of u to uext */
780   if (isubx == 0) {
781     for (ly = 0; ly < MYSUB; ly++) {
782       offsetu = ly*nvmxsub + NVARS;
783       offsetue = (ly+1)*nvmxsub2;
784       for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
785     }
786   }
787 
788   /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
789   if (isubx == NPEX-1) {
790     for (ly = 0; ly < MYSUB; ly++) {
791       offsetu = (ly+1)*nvmxsub - 2*NVARS;
792       offsetue = (ly+2)*nvmxsub2 - NVARS;
793       for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
794     }
795   }
796 
797   /* Make local copies of problem variables, for efficiency */
798 
799   dely = data->dy;
800   verdco = data->vdco;
801   hordco = data->hdco;
802   horaco = data->haco;
803 
804   /* Set diurnal rate coefficients as functions of t, and save q4 in
805   data block for use by preconditioner evaluation routine            */
806 
807   s = sin((data->om)*t);
808   if (s > ZERO) {
809     q3 = exp(-A3/s);
810     q4coef = exp(-A4/s);
811   } else {
812     q3 = ZERO;
813     q4coef = ZERO;
814   }
815   data->q4 = q4coef;
816 
817 
818   /* Loop over all grid points in local subgrid */
819 
820   for (ly = 0; ly < MYSUB; ly++) {
821 
822     jy = ly + isuby*MYSUB;
823 
824     /* Set vertical diffusion coefficients at jy +- 1/2 */
825 
826     ydn = YMIN + (jy - RCONST(0.5))*dely;
827     yup = ydn + dely;
828     cydn = verdco*exp(RCONST(0.2)*ydn);
829     cyup = verdco*exp(RCONST(0.2)*yup);
830     for (lx = 0; lx < MXSUB; lx++) {
831 
832       /* Extract c1 and c2, and set kinetic rate terms */
833 
834       offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
835       c1 = uext[offsetue];
836       c2 = uext[offsetue+1];
837       qq1 = Q1*c1*C3;
838       qq2 = Q2*c1*c2;
839       qq3 = q3*C3;
840       qq4 = q4coef*c2;
841       rkin1 = -qq1 - qq2 + 2.0*qq3 + qq4;
842       rkin2 = qq1 - qq2 - qq4;
843 
844       /* Set vertical diffusion terms */
845 
846       c1dn = uext[offsetue-nvmxsub2];
847       c2dn = uext[offsetue-nvmxsub2+1];
848       c1up = uext[offsetue+nvmxsub2];
849       c2up = uext[offsetue+nvmxsub2+1];
850       vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
851       vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
852 
853       /* Set horizontal diffusion and advection terms */
854 
855       c1lt = uext[offsetue-2];
856       c2lt = uext[offsetue-1];
857       c1rt = uext[offsetue+2];
858       c2rt = uext[offsetue+3];
859       hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
860       hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
861       horad1 = horaco*(c1rt - c1lt);
862       horad2 = horaco*(c2rt - c2lt);
863 
864       /* Load all terms into duarray */
865 
866       offsetu = lx*NVARS + ly*nvmxsub;
867       duarray[offsetu]   = vertd1 + hord1 + horad1 + rkin1;
868       duarray[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
869     }
870   }
871 
872   return(0);
873 }
874 
875 /* Check function return value...
876      opt == 0 means SUNDIALS function allocates memory so check if
877               returned NULL pointer
878      opt == 1 means SUNDIALS function returns an integer value so check if
879               retval < 0
880      opt == 2 means function allocates memory so check if returned
881               NULL pointer */
882 
check_retval(void * returnvalue,const char * funcname,int opt,int id)883 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
884 {
885   int *retval;
886 
887   /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
888   if (opt == 0 && returnvalue == NULL) {
889     fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
890             id, funcname);
891     return(1); }
892 
893   /* Check if retval < 0 */
894   else if (opt == 1) {
895     retval = (int *) returnvalue;
896     if (*retval < 0) {
897       fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
898               id, funcname, *retval);
899       return(1); }}
900 
901   /* Check if function returned NULL pointer - no memory allocated */
902   else if (opt == 2 && returnvalue == NULL) {
903     fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
904             id, funcname);
905     return(1); }
906 
907   return(0);
908 }
909