1 /*
2  * -----------------------------------------------------------------
3  * Programmer(s): Daniel R. Reynolds @ SMU
4  *                S. D. Cohen, A. C. Hindmarsh, M. R. Wittman, and
5  *                Radu Serban  @ LLNL
6  * -----------------------------------------------------------------
7  * SUNDIALS Copyright Start
8  * Copyright (c) 2002-2021, Lawrence Livermore National Security
9  * and Southern Methodist University.
10  * All rights reserved.
11  *
12  * See the top-level LICENSE and NOTICE files for details.
13  *
14  * SPDX-License-Identifier: BSD-3-Clause
15  * SUNDIALS Copyright End
16  * -----------------------------------------------------------------
17  * Example problem:
18  *
19  * An ODE system is generated from the following 2-species diurnal
20  * kinetics advection-diffusion PDE system in 2 space dimensions:
21  *
22  * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
23  *                 + Ri(c1,c2,t)      for i = 1,2,   where
24  *   R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
25  *   R2(c1,c2,t) =  q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
26  *   Kv(y) = Kv0*exp(y/5) ,
27  * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
28  * vary diurnally. The problem is posed on the square
29  *   0 <= x <= 20,    30 <= y <= 50   (all in km),
30  * with homogeneous Neumann boundary conditions, and for time t in
31  *   0 <= t <= 86400 sec (1 day).
32  * The PDE system is treated by central differences on a uniform
33  * mesh, with simple polynomial initial profiles.
34  *
35  * The problem is solved by CVODES on NPE processors, treated
36  * as a rectangular process grid of size NPEX by NPEY, with
37  * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
38  * by MYSUB of the (x,y) mesh. Thus the actual mesh sizes are
39  * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
40  * neq = 2*MX*MY.
41  *
42  * The solution is done with the BDF/GMRES method (i.e. using the
43  * SUNLinSol_SPGMR linear solver) and a block-diagonal matrix with banded
44  * blocks as a preconditioner, using the CVBBDPRE module.
45  * Each block is generated using difference quotients, with
46  * half-bandwidths mudq = mldq = 2*MXSUB, but the retained banded
47  * blocks have half-bandwidths mukeep = mlkeep = 2.
48  * A copy of the approximate Jacobian is saved and conditionally
49  * reused within the preconditioner routine.
50  *
51  * The problem is solved twice -- with left and right preconditioning.
52  *
53  * Performance data and sampled solution values are printed at
54  * selected output times, and all performance counters are printed
55  * on completion.
56  *
57  * This version uses MPI for user routines.
58  * Execute with number of processors = NPEX*NPEY (see constants below).
59  * --------------------------------------------------------------------
60  */
61 
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <math.h>
65 
66 #include <cvodes/cvodes.h>              /* prototypes for CVODE fcts. */
67 #include <cvodes/cvodes_bbdpre.h>       /* prototypes for CVBBDPRE module */
68 #include <sunlinsol/sunlinsol_spgmr.h>  /* prototypes and constants for SUNLinSol_SPGMR solver */
69 #include <nvector/nvector_parallel.h>   /* definition N_Vector */
70 #include <sundials/sundials_types.h>    /* definitions of realtype, booleantype */
71 
72 #include <mpi.h>                        /* MPI constants and types */
73 
74 /* helpful macros */
75 
76 #ifndef SQR
77 #define SQR(A) ((A)*(A))
78 #endif
79 
80 /* Problem Constants */
81 
82 #define ZERO         RCONST(0.0)
83 
84 #define NVARS        2                 /* number of species         */
85 #define KH           RCONST(4.0e-6)    /* horizontal diffusivity Kh */
86 #define VEL          RCONST(0.001)     /* advection velocity V      */
87 #define KV0          RCONST(1.0e-8)    /* coefficient in Kv(y)      */
88 #define Q1           RCONST(1.63e-16)  /* coefficients q1, q2, c3   */
89 #define Q2           RCONST(4.66e-16)
90 #define C3           RCONST(3.7e16)
91 #define A3           RCONST(22.62)     /* coefficient in expression for q3(t) */
92 #define A4           RCONST(7.601)     /* coefficient in expression for q4(t) */
93 #define C1_SCALE     RCONST(1.0e6)     /* coefficients in initial profiles    */
94 #define C2_SCALE     RCONST(1.0e12)
95 
96 #define T0           ZERO                /* initial time */
97 #define NOUT         12                  /* number of output times */
98 #define TWOHR        RCONST(7200.0)      /* number of seconds in two hours  */
99 #define HALFDAY      RCONST(4.32e4)      /* number of seconds in a half day */
100 #define PI       RCONST(3.1415926535898) /* pi */
101 
102 #define XMIN         ZERO                /* grid boundaries in x  */
103 #define XMAX         RCONST(20.0)
104 #define YMIN         RCONST(30.0)        /* grid boundaries in y  */
105 #define YMAX         RCONST(50.0)
106 
107 #define NPEX         2              /* no. PEs in x direction of PE array */
108 #define NPEY         2              /* no. PEs in y direction of PE array */
109                                     /* Total no. PEs = NPEX*NPEY */
110 #define MXSUB        5              /* no. x points per subgrid */
111 #define MYSUB        5              /* no. y points per subgrid */
112 
113 #define MX           (NPEX*MXSUB)   /* MX = number of x mesh points */
114 #define MY           (NPEY*MYSUB)   /* MY = number of y mesh points */
115                                     /* Spatial mesh is MX by MY */
116 /* CVodeInit Constants */
117 
118 #define RTOL    RCONST(1.0e-5)    /* scalar relative tolerance */
119 #define FLOOR   RCONST(100.0)     /* value of C1 or C2 at which tolerances */
120                                   /* change from relative to absolute      */
121 #define ATOL    (RTOL*FLOOR)      /* scalar absolute tolerance */
122 
123 /* Type : UserData
124    contains problem constants, extended dependent variable array,
125    grid constants, processor indices, MPI communicator */
126 
127 typedef struct {
128   realtype q4, om, dx, dy, hdco, haco, vdco;
129   realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
130   int my_pe, isubx, isuby;
131   sunindextype nvmxsub, nvmxsub2, Nlocal;
132   MPI_Comm comm;
133 } *UserData;
134 
135 /* Prototypes of private helper functions */
136 
137 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
138                          UserData data);
139 static void SetInitialProfiles(N_Vector u, UserData data);
140 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
141 		       sunindextype mukeep, sunindextype mlkeep);
142 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
143                         N_Vector u, realtype t);
144 static void PrintFinalStats(void *cvode_mem);
145 static void BSend(MPI_Comm comm,
146                   int my_pe, int isubx, int isuby,
147                   sunindextype dsizex, sunindextype dsizey,
148                   realtype uarray[]);
149 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
150                       int my_pe, int isubx, int isuby,
151 		      sunindextype dsizex, sunindextype dsizey,
152 		      realtype uext[], realtype buffer[]);
153 static void BRecvWait(MPI_Request request[],
154                       int isubx, int isuby,
155                       sunindextype dsizex, realtype uext[],
156                       realtype buffer[]);
157 
158 static void fucomm(realtype t, N_Vector u, void *user_data);
159 
160 /* Prototype of function called by the solver */
161 
162 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
163 
164 /* Prototype of functions called by the CVBBDPRE module */
165 
166 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
167                   N_Vector udot, void *user_data);
168 
169 /* Private function to check function return values */
170 
171 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
172 
173 /***************************** Main Program ******************************/
174 
main(int argc,char * argv[])175 int main(int argc, char *argv[])
176 {
177   UserData data;
178   SUNLinearSolver LS;
179   void *cvode_mem;
180   realtype abstol, reltol, t, tout;
181   N_Vector u;
182   int iout, my_pe, npes, retval, jpre;
183   sunindextype neq, local_N, mudq, mldq, mukeep, mlkeep;
184   MPI_Comm comm;
185 
186   data = NULL;
187   LS = NULL;
188   cvode_mem = NULL;
189   u = NULL;
190 
191   /* Set problem size neq */
192   neq = NVARS*MX*MY;
193 
194   /* Get processor number and total number of pe's */
195   MPI_Init(&argc, &argv);
196   comm = MPI_COMM_WORLD;
197   MPI_Comm_size(comm, &npes);
198   MPI_Comm_rank(comm, &my_pe);
199 
200   if (npes != NPEX*NPEY) {
201     if (my_pe == 0)
202       fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
203               npes, NPEX*NPEY);
204     MPI_Finalize();
205     return(1);
206   }
207 
208   /* Set local length */
209   local_N = NVARS*MXSUB*MYSUB;
210 
211   /* Allocate and load user data block */
212   data = (UserData) malloc(sizeof *data);
213   if(check_retval((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
214   InitUserData(my_pe, local_N, comm, data);
215 
216   /* Allocate and initialize u, and set tolerances */
217   u = N_VNew_Parallel(comm, local_N, neq);
218   if(check_retval((void *)u, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
219   SetInitialProfiles(u, data);
220   abstol = ATOL;
221   reltol = RTOL;
222 
223   /* Call CVodeCreate to create the solver memory and specify the
224    * Backward Differentiation Formula */
225   cvode_mem = CVodeCreate(CV_BDF);
226   if(check_retval((void *)cvode_mem, "CVodeCreate", 0, my_pe)) MPI_Abort(comm, 1);
227 
228   /* Set the pointer to user-defined data */
229   retval = CVodeSetUserData(cvode_mem, data);
230   if(check_retval(&retval, "CVodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
231 
232   /* Call CVodeInit to initialize the integrator memory and specify the
233    * user's right hand side function in u'=f(t,u), the inital time T0, and
234    * the initial dependent variable vector u. */
235   retval = CVodeInit(cvode_mem, f, T0, u);
236   if(check_retval(&retval, "CVodeInit", 1, my_pe)) MPI_Abort(comm, 1);
237 
238   /* Call CVodeSStolerances to specify the scalar relative tolerance
239    * and scalar absolute tolerances */
240   retval = CVodeSStolerances(cvode_mem, reltol, abstol);
241   if (check_retval(&retval, "CVodeSStolerances", 1, my_pe)) MPI_Abort(comm, 1);
242 
243   /* Create SPGMR solver structure -- use left preconditioning
244      and the default Krylov dimension maxl */
245   LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
246   if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
247 
248   /* Attach the linear solver */
249   retval = CVodeSetLinearSolver(cvode_mem, LS, NULL);
250   if(check_retval(&retval, "CVodeSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
251 
252   /* Initialize BBD preconditioner */
253   mudq = mldq = NVARS*MXSUB;
254   mukeep = mlkeep = NVARS;
255   retval = CVBBDPrecInit(cvode_mem, local_N, mudq, mldq,
256                        mukeep, mlkeep, ZERO, flocal, NULL);
257   if(check_retval(&retval, "CVBBDPrecAlloc", 1, my_pe)) MPI_Abort(comm, 1);
258 
259   /* Print heading */
260   if (my_pe == 0) PrintIntro(npes, mudq, mldq, mukeep, mlkeep);
261 
262   /* Loop over jpre (= PREC_LEFT, PREC_RIGHT), and solve the problem */
263   for (jpre = PREC_LEFT; jpre <= PREC_RIGHT; jpre++) {
264 
265   /* On second run, re-initialize u, the integrator, CVBBDPRE,
266      and preconditioning type */
267 
268   if (jpre == PREC_RIGHT) {
269 
270     SetInitialProfiles(u, data);
271 
272     retval = CVodeReInit(cvode_mem, T0, u);
273     if(check_retval(&retval, "CVodeReInit", 1, my_pe)) MPI_Abort(comm, 1);
274 
275     retval = CVBBDPrecReInit(cvode_mem, mudq, mldq, ZERO);
276     if(check_retval(&retval, "CVBBDPrecReInit", 1, my_pe)) MPI_Abort(comm, 1);
277 
278     retval = SUNLinSol_SPGMRSetPrecType(LS, PREC_RIGHT);
279     if(check_retval(&retval, "SUNLinSol_SPGMRSetPrecType", 1, my_pe)) MPI_Abort(comm, 1);
280 
281     if (my_pe == 0) {
282       printf("\n\n-------------------------------------------------------");
283       printf("------------\n");
284     }
285 
286   }
287 
288 
289   if (my_pe == 0) {
290     printf("\n\nPreconditioner type is:  jpre = %s\n\n",
291 	   (jpre == PREC_LEFT) ? "PREC_LEFT" : "PREC_RIGHT");
292   }
293 
294   /* In loop over output points, call CVode, print results, test for error */
295 
296   for (iout = 1, tout = TWOHR; iout <= NOUT; iout++, tout += TWOHR) {
297     retval = CVode(cvode_mem, tout, u, &t, CV_NORMAL);
298     if(check_retval(&retval, "CVode", 1, my_pe)) break;
299     PrintOutput(cvode_mem, my_pe, comm, u, t);
300   }
301 
302   /* Print final statistics */
303 
304   if (my_pe == 0) PrintFinalStats(cvode_mem);
305 
306   } /* End of jpre loop */
307 
308   /* Free memory */
309   N_VDestroy_Parallel(u);
310   free(data);
311   CVodeFree(&cvode_mem);
312   SUNLinSolFree(LS);
313 
314   MPI_Finalize();
315 
316   return(0);
317 }
318 
319 /*********************** Private Helper Functions ************************/
320 
321 /* Load constants in data */
322 
InitUserData(int my_pe,sunindextype local_N,MPI_Comm comm,UserData data)323 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
324                          UserData data)
325 {
326   int isubx, isuby;
327 
328   /* Set problem constants */
329   data->om = PI/HALFDAY;
330   data->dx = (XMAX-XMIN)/((realtype)(MX-1));
331   data->dy = (YMAX-YMIN)/((realtype)(MY-1));
332   data->hdco = KH/SQR(data->dx);
333   data->haco = VEL/(RCONST(2.0)*data->dx);
334   data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
335 
336   /* Set machine-related constants */
337   data->comm = comm;
338   data->my_pe = my_pe;
339   data->Nlocal = local_N;
340   /* isubx and isuby are the PE grid indices corresponding to my_pe */
341   isuby = my_pe/NPEX;
342   isubx = my_pe - isuby*NPEX;
343   data->isubx = isubx;
344   data->isuby = isuby;
345   /* Set the sizes of a boundary x-line in u and uext */
346   data->nvmxsub = NVARS*MXSUB;
347   data->nvmxsub2 = NVARS*(MXSUB+2);
348 }
349 
350 /* Set initial conditions in u */
351 
SetInitialProfiles(N_Vector u,UserData data)352 static void SetInitialProfiles(N_Vector u, UserData data)
353 {
354   int isubx, isuby;
355   int lx, ly, jx, jy;
356   sunindextype offset;
357   realtype dx, dy, x, y, cx, cy, xmid, ymid;
358   realtype *uarray;
359 
360   /* Set pointer to data array in vector u */
361 
362   uarray = N_VGetArrayPointer_Parallel(u);
363 
364   /* Get mesh spacings, and subgrid indices for this PE */
365 
366   dx = data->dx;         dy = data->dy;
367   isubx = data->isubx;   isuby = data->isuby;
368 
369   /* Load initial profiles of c1 and c2 into local u vector.
370   Here lx and ly are local mesh point indices on the local subgrid,
371   and jx and jy are the global mesh point indices. */
372 
373   offset = 0;
374   xmid = RCONST(0.5)*(XMIN + XMAX);
375   ymid = RCONST(0.5)*(YMIN + YMAX);
376   for (ly = 0; ly < MYSUB; ly++) {
377     jy = ly + isuby*MYSUB;
378     y = YMIN + jy*dy;
379     cy = SQR(RCONST(0.1)*(y - ymid));
380     cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
381     for (lx = 0; lx < MXSUB; lx++) {
382       jx = lx + isubx*MXSUB;
383       x = XMIN + jx*dx;
384       cx = SQR(RCONST(0.1)*(x - xmid));
385       cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
386       uarray[offset  ] = C1_SCALE*cx*cy;
387       uarray[offset+1] = C2_SCALE*cx*cy;
388       offset = offset + 2;
389     }
390   }
391 }
392 
393 /* Print problem introduction */
394 
PrintIntro(int npes,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep)395 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
396 		       sunindextype mukeep, sunindextype mlkeep)
397 {
398   printf("\n2-species diurnal advection-diffusion problem\n");
399   printf("  %d by %d mesh on %d processors\n", MX, MY, npes);
400   printf("  Using CVBBDPRE preconditioner module\n");
401   printf("    Difference-quotient half-bandwidths are");
402   printf(" mudq = %ld,  mldq = %ld\n", (long int) mudq, (long int) mldq);
403   printf("    Retained band block half-bandwidths are");
404   printf(" mukeep = %ld,  mlkeep = %ld", (long int) mukeep, (long int) mlkeep);
405 
406   return;
407 }
408 
409 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
410 
PrintOutput(void * cvode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)411 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
412                         N_Vector u, realtype t)
413 {
414   int qu, retval, npelast;
415   sunindextype i0, i1;
416   long int nst;
417   realtype hu, *uarray, tempu[2];
418   MPI_Status status;
419 
420   npelast = NPEX*NPEY - 1;
421   uarray = N_VGetArrayPointer_Parallel(u);
422 
423   /* Send c1,c2 at top right mesh point to PE 0 */
424   if (my_pe == npelast) {
425     i0 = NVARS*MXSUB*MYSUB - 2;
426     i1 = i0 + 1;
427     if (npelast != 0)
428       MPI_Send(&uarray[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
429     else {
430       tempu[0] = uarray[i0];
431       tempu[1] = uarray[i1];
432     }
433   }
434 
435   /* On PE 0, receive c1,c2 at top right, then print performance data
436      and sampled solution values */
437   if (my_pe == 0) {
438     if (npelast != 0)
439       MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
440     retval = CVodeGetNumSteps(cvode_mem, &nst);
441     check_retval(&retval, "CVodeGetNumSteps", 1, my_pe);
442     retval = CVodeGetLastOrder(cvode_mem, &qu);
443     check_retval(&retval, "CVodeGetLastOrder", 1, my_pe);
444     retval = CVodeGetLastStep(cvode_mem, &hu);
445     check_retval(&retval, "CVodeGetLastStep", 1, my_pe);
446 #if defined(SUNDIALS_EXTENDED_PRECISION)
447     printf("t = %.2Le   no. steps = %ld   order = %d   stepsize = %.2Le\n",
448            t, nst, qu, hu);
449     printf("At bottom left:  c1, c2 = %12.3Le %12.3Le \n", uarray[0], uarray[1]);
450     printf("At top right:    c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
451 #elif defined(SUNDIALS_DOUBLE_PRECISION)
452     printf("t = %.2e   no. steps = %ld   order = %d   stepsize = %.2e\n",
453            t, nst, qu, hu);
454     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
455     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
456 #else
457     printf("t = %.2e   no. steps = %ld   order = %d   stepsize = %.2e\n",
458            t, nst, qu, hu);
459     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
460     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
461 #endif
462   }
463 }
464 
465 /* Print final statistics contained in iopt */
466 
PrintFinalStats(void * cvode_mem)467 static void PrintFinalStats(void *cvode_mem)
468 {
469   long int lenrw, leniw ;
470   long int lenrwLS, leniwLS;
471   long int lenrwBBDP, leniwBBDP;
472   long int nst, nfe, nsetups, nni, ncfn, netf;
473   long int nli, npe, nps, ncfl, nfeLS, ngevalsBBDP;
474   int retval;
475 
476   retval = CVodeGetWorkSpace(cvode_mem, &lenrw, &leniw);
477   check_retval(&retval, "CVodeGetWorkSpace", 1, 0);
478   retval = CVodeGetNumSteps(cvode_mem, &nst);
479   check_retval(&retval, "CVodeGetNumSteps", 1, 0);
480   retval = CVodeGetNumRhsEvals(cvode_mem, &nfe);
481   check_retval(&retval, "CVodeGetNumRhsEvals", 1, 0);
482   retval = CVodeGetNumLinSolvSetups(cvode_mem, &nsetups);
483   check_retval(&retval, "CVodeGetNumLinSolvSetups", 1, 0);
484   retval = CVodeGetNumErrTestFails(cvode_mem, &netf);
485   check_retval(&retval, "CVodeGetNumErrTestFails", 1, 0);
486   retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni);
487   check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, 0);
488   retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn);
489   check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, 0);
490 
491   retval = CVodeGetLinWorkSpace(cvode_mem, &lenrwLS, &leniwLS);
492   check_retval(&retval, "CVodeGetLinWorkSpace", 1, 0);
493   retval = CVodeGetNumLinIters(cvode_mem, &nli);
494   check_retval(&retval, "CVodeGetNumLinIters", 1, 0);
495   retval = CVodeGetNumPrecEvals(cvode_mem, &npe);
496   check_retval(&retval, "CVodeGetNumPrecEvals", 1, 0);
497   retval = CVodeGetNumPrecSolves(cvode_mem, &nps);
498   check_retval(&retval, "CVodeGetNumPrecSolves", 1, 0);
499   retval = CVodeGetNumLinConvFails(cvode_mem, &ncfl);
500   check_retval(&retval, "CVodeGetNumLinConvFails", 1, 0);
501   retval = CVodeGetNumLinRhsEvals(cvode_mem, &nfeLS);
502   check_retval(&retval, "CVodeGetNumLinRhsEvals", 1, 0);
503 
504   printf("\nFinal Statistics: \n\n");
505   printf("lenrw   = %5ld     leniw   = %5ld\n"  , lenrw, leniw);
506   printf("lenrwls = %5ld     leniwls = %5ld\n"  , lenrwLS, leniwLS);
507   printf("nst     = %5ld\n"                     , nst);
508   printf("nfe     = %5ld     nfels   = %5ld\n"  , nfe, nfeLS);
509   printf("nni     = %5ld     nli     = %5ld\n"  , nni, nli);
510   printf("nsetups = %5ld     netf    = %5ld\n"  , nsetups, netf);
511   printf("npe     = %5ld     nps     = %5ld\n"  , npe, nps);
512   printf("ncfn    = %5ld     ncfl    = %5ld\n\n", ncfn, ncfl);
513 
514   retval = CVBBDPrecGetWorkSpace(cvode_mem, &lenrwBBDP, &leniwBBDP);
515   check_retval(&retval, "CVBBDPrecGetWorkSpace", 1, 0);
516   retval = CVBBDPrecGetNumGfnEvals(cvode_mem, &ngevalsBBDP);
517   check_retval(&retval, "CVBBDPrecGetNumGfnEvals", 1, 0);
518   printf("In CVBBDPRE: real/integer local work space sizes = %ld, %ld\n",
519 	 lenrwBBDP, leniwBBDP);
520   printf("             no. flocal evals. = %ld\n",ngevalsBBDP);
521 }
522 
523 /* Routine to send boundary data to neighboring PEs */
524 
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uarray[])525 static void BSend(MPI_Comm comm,
526                   int my_pe, int isubx, int isuby,
527                   sunindextype dsizex, sunindextype dsizey,
528                   realtype uarray[])
529 {
530   int i, ly;
531   sunindextype offsetu, offsetbuf;
532   realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
533 
534   /* If isuby > 0, send data from bottom x-line of u */
535 
536   if (isuby != 0)
537     MPI_Send(&uarray[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
538 
539   /* If isuby < NPEY-1, send data from top x-line of u */
540 
541   if (isuby != NPEY-1) {
542     offsetu = (MYSUB-1)*dsizex;
543     MPI_Send(&uarray[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
544   }
545 
546   /* If isubx > 0, send data from left y-line of u (via bufleft) */
547 
548   if (isubx != 0) {
549     for (ly = 0; ly < MYSUB; ly++) {
550       offsetbuf = ly*NVARS;
551       offsetu = ly*dsizex;
552       for (i = 0; i < NVARS; i++)
553         bufleft[offsetbuf+i] = uarray[offsetu+i];
554     }
555     MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
556   }
557 
558   /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
559 
560   if (isubx != NPEX-1) {
561     for (ly = 0; ly < MYSUB; ly++) {
562       offsetbuf = ly*NVARS;
563       offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
564       for (i = 0; i < NVARS; i++)
565         bufright[offsetbuf+i] = uarray[offsetu+i];
566     }
567     MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
568   }
569 
570 }
571 
572 /* Routine to start receiving boundary data from neighboring PEs.
573    Notes:
574    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
575    passed to both the BRecvPost and BRecvWait functions, and should not
576    be manipulated between the two calls.
577    2) request should have 4 entries, and should be passed in both calls also. */
578 
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])579 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
580                       int my_pe, int isubx, int isuby,
581 		      sunindextype dsizex, sunindextype dsizey,
582 		      realtype uext[], realtype buffer[])
583 {
584   sunindextype offsetue;
585   /* Have bufleft and bufright use the same buffer */
586   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
587 
588   /* If isuby > 0, receive data for bottom x-line of uext */
589   if (isuby != 0)
590     MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
591               my_pe-NPEX, 0, comm, &request[0]);
592 
593   /* If isuby < NPEY-1, receive data for top x-line of uext */
594   if (isuby != NPEY-1) {
595     offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
596     MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
597               my_pe+NPEX, 0, comm, &request[1]);
598   }
599 
600   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
601   if (isubx != 0) {
602     MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
603               my_pe-1, 0, comm, &request[2]);
604   }
605 
606   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
607   if (isubx != NPEX-1) {
608     MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
609               my_pe+1, 0, comm, &request[3]);
610   }
611 
612 }
613 
614 /* Routine to finish receiving boundary data from neighboring PEs.
615    Notes:
616    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
617    passed to both the BRecvPost and BRecvWait functions, and should not
618    be manipulated between the two calls.
619    2) request should have 4 entries, and should be passed in both calls also. */
620 
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])621 static void BRecvWait(MPI_Request request[],
622                       int isubx, int isuby,
623                       sunindextype dsizex, realtype uext[],
624                       realtype buffer[])
625 {
626   int i, ly;
627   sunindextype dsizex2, offsetue, offsetbuf;
628   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
629   MPI_Status status;
630 
631   dsizex2 = dsizex + 2*NVARS;
632 
633   /* If isuby > 0, receive data for bottom x-line of uext */
634   if (isuby != 0)
635     MPI_Wait(&request[0],&status);
636 
637   /* If isuby < NPEY-1, receive data for top x-line of uext */
638   if (isuby != NPEY-1)
639     MPI_Wait(&request[1],&status);
640 
641   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
642   if (isubx != 0) {
643     MPI_Wait(&request[2],&status);
644 
645     /* Copy the buffer to uext */
646     for (ly = 0; ly < MYSUB; ly++) {
647       offsetbuf = ly*NVARS;
648       offsetue = (ly+1)*dsizex2;
649       for (i = 0; i < NVARS; i++)
650         uext[offsetue+i] = bufleft[offsetbuf+i];
651     }
652   }
653 
654   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
655   if (isubx != NPEX-1) {
656     MPI_Wait(&request[3],&status);
657 
658     /* Copy the buffer to uext */
659     for (ly = 0; ly < MYSUB; ly++) {
660       offsetbuf = ly*NVARS;
661       offsetue = (ly+2)*dsizex2 - NVARS;
662       for (i = 0; i < NVARS; i++)
663 	uext[offsetue+i] = bufright[offsetbuf+i];
664     }
665   }
666 }
667 
668 /* fucomm routine.  This routine performs all inter-processor
669    communication of data in u needed to calculate f.         */
670 
fucomm(realtype t,N_Vector u,void * user_data)671 static void fucomm(realtype t, N_Vector u, void *user_data)
672 {
673   UserData data;
674   realtype *uarray, *uext, buffer[2*NVARS*MYSUB];
675   MPI_Comm comm;
676   int my_pe, isubx, isuby;
677   sunindextype nvmxsub, nvmysub;
678   MPI_Request request[4];
679 
680   data = (UserData) user_data;
681   uarray = N_VGetArrayPointer_Parallel(u);
682 
683   /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
684 
685   comm = data->comm;  my_pe = data->my_pe;
686   isubx = data->isubx;   isuby = data->isuby;
687   nvmxsub = data->nvmxsub;
688   nvmysub = NVARS*MYSUB;
689   uext = data->uext;
690 
691   /* Start receiving boundary data from neighboring PEs */
692 
693   BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
694 
695   /* Send data from boundary of local grid to neighboring PEs */
696 
697   BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, uarray);
698 
699   /* Finish receiving boundary data from neighboring PEs */
700 
701   BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
702 }
703 
704 /***************** Function called by the solver **************************/
705 
706 /* f routine.  Evaluate f(t,y).  First call fucomm to do communication of
707    subgrid boundary data into uext.  Then calculate f by a call to flocal. */
708 
f(realtype t,N_Vector u,N_Vector udot,void * user_data)709 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
710 {
711   UserData data;
712 
713   data = (UserData) user_data;
714 
715   /* Call fucomm to do inter-processor communication */
716 
717   fucomm (t, u, user_data);
718 
719   /* Call flocal to calculate all right-hand sides */
720 
721   flocal (data->Nlocal, t, u, udot, user_data);
722 
723   return(0);
724 }
725 
726 /***************** Functions called by the CVBBDPRE module ****************/
727 
728 /* flocal routine.  Compute f(t,y).  This routine assumes that all
729    inter-processor communication of data needed to calculate f has already
730    been done, and this data is in the work array uext.                    */
731 
flocal(sunindextype Nlocal,realtype t,N_Vector u,N_Vector udot,void * user_data)732 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
733                   N_Vector udot, void *user_data)
734 {
735   realtype *uext;
736   realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
737   realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
738   realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
739   realtype q4coef, dely, verdco, hordco, horaco;
740   int i, lx, ly, jy;
741   int isubx, isuby;
742   sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
743   UserData data;
744   realtype *uarray, *duarray;
745 
746   uarray = N_VGetArrayPointer_Parallel(u);
747   duarray = N_VGetArrayPointer_Parallel(udot);
748 
749   /* Get subgrid indices, array sizes, extended work array uext */
750 
751   data = (UserData) user_data;
752   isubx = data->isubx;   isuby = data->isuby;
753   nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
754   uext = data->uext;
755 
756   /* Copy local segment of u vector into the working extended array uext */
757 
758   offsetu = 0;
759   offsetue = nvmxsub2 + NVARS;
760   for (ly = 0; ly < MYSUB; ly++) {
761     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
762     offsetu = offsetu + nvmxsub;
763     offsetue = offsetue + nvmxsub2;
764   }
765 
766   /* To facilitate homogeneous Neumann boundary conditions, when this is
767   a boundary PE, copy data from the first interior mesh line of u to uext */
768 
769   /* If isuby = 0, copy x-line 2 of u to uext */
770   if (isuby == 0) {
771     for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = uarray[nvmxsub+i];
772   }
773 
774   /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
775   if (isuby == NPEY-1) {
776     offsetu = (MYSUB-2)*nvmxsub;
777     offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
778     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
779   }
780 
781   /* If isubx = 0, copy y-line 2 of u to uext */
782   if (isubx == 0) {
783     for (ly = 0; ly < MYSUB; ly++) {
784       offsetu = ly*nvmxsub + NVARS;
785       offsetue = (ly+1)*nvmxsub2;
786       for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
787     }
788   }
789 
790   /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
791   if (isubx == NPEX-1) {
792     for (ly = 0; ly < MYSUB; ly++) {
793       offsetu = (ly+1)*nvmxsub - 2*NVARS;
794       offsetue = (ly+2)*nvmxsub2 - NVARS;
795       for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
796     }
797   }
798 
799   /* Make local copies of problem variables, for efficiency */
800 
801   dely = data->dy;
802   verdco = data->vdco;
803   hordco = data->hdco;
804   horaco = data->haco;
805 
806   /* Set diurnal rate coefficients as functions of t, and save q4 in
807   data block for use by preconditioner evaluation routine            */
808 
809   s = sin((data->om)*t);
810   if (s > ZERO) {
811     q3 = exp(-A3/s);
812     q4coef = exp(-A4/s);
813   } else {
814     q3 = ZERO;
815     q4coef = ZERO;
816   }
817   data->q4 = q4coef;
818 
819 
820   /* Loop over all grid points in local subgrid */
821 
822   for (ly = 0; ly < MYSUB; ly++) {
823 
824     jy = ly + isuby*MYSUB;
825 
826     /* Set vertical diffusion coefficients at jy +- 1/2 */
827 
828     ydn = YMIN + (jy - RCONST(0.5))*dely;
829     yup = ydn + dely;
830     cydn = verdco*exp(RCONST(0.2)*ydn);
831     cyup = verdco*exp(RCONST(0.2)*yup);
832     for (lx = 0; lx < MXSUB; lx++) {
833 
834       /* Extract c1 and c2, and set kinetic rate terms */
835 
836       offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
837       c1 = uext[offsetue];
838       c2 = uext[offsetue+1];
839       qq1 = Q1*c1*C3;
840       qq2 = Q2*c1*c2;
841       qq3 = q3*C3;
842       qq4 = q4coef*c2;
843       rkin1 = -qq1 - qq2 + 2.0*qq3 + qq4;
844       rkin2 = qq1 - qq2 - qq4;
845 
846       /* Set vertical diffusion terms */
847 
848       c1dn = uext[offsetue-nvmxsub2];
849       c2dn = uext[offsetue-nvmxsub2+1];
850       c1up = uext[offsetue+nvmxsub2];
851       c2up = uext[offsetue+nvmxsub2+1];
852       vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
853       vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
854 
855       /* Set horizontal diffusion and advection terms */
856 
857       c1lt = uext[offsetue-2];
858       c2lt = uext[offsetue-1];
859       c1rt = uext[offsetue+2];
860       c2rt = uext[offsetue+3];
861       hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
862       hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
863       horad1 = horaco*(c1rt - c1lt);
864       horad2 = horaco*(c2rt - c2lt);
865 
866       /* Load all terms into duarray */
867 
868       offsetu = lx*NVARS + ly*nvmxsub;
869       duarray[offsetu]   = vertd1 + hord1 + horad1 + rkin1;
870       duarray[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
871     }
872   }
873 
874   return(0);
875 }
876 
877 /* Check function return value...
878      opt == 0 means SUNDIALS function allocates memory so check if
879               returned NULL pointer
880      opt == 1 means SUNDIALS function returns an integer value so check if
881               retval < 0
882      opt == 2 means function allocates memory so check if returned
883               NULL pointer */
884 
check_retval(void * returnvalue,const char * funcname,int opt,int id)885 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
886 {
887   int *retval;
888 
889   /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
890   if (opt == 0 && returnvalue == NULL) {
891     fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
892             id, funcname);
893     return(1); }
894 
895   /* Check if retval < 0 */
896   else if (opt == 1) {
897     retval = (int *) returnvalue;
898     if (*retval < 0) {
899       fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
900               id, funcname, *retval);
901       return(1); }}
902 
903   /* Check if function returned NULL pointer - no memory allocated */
904   else if (opt == 2 && returnvalue == NULL) {
905     fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
906             id, funcname);
907     return(1); }
908 
909   return(0);
910 }
911