1 /*
2 * -----------------------------------------------------------------
3 * Programmer(s): S. D. Cohen, A. C. Hindmarsh, M. R. Wittman, and
4 * Radu Serban @ LLNL
5 * -----------------------------------------------------------------
6 * SUNDIALS Copyright Start
7 * Copyright (c) 2002-2021, Lawrence Livermore National Security
8 * and Southern Methodist University.
9 * All rights reserved.
10 *
11 * See the top-level LICENSE and NOTICE files for details.
12 *
13 * SPDX-License-Identifier: BSD-3-Clause
14 * SUNDIALS Copyright End
15 * -----------------------------------------------------------------
16 * Example problem:
17 *
18 * An ODE system is generated from the following 2-species diurnal
19 * kinetics advection-diffusion PDE system in 2 space dimensions:
20 *
21 * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
22 * + Ri(c1,c2,t) for i = 1,2, where
23 * R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
24 * R2(c1,c2,t) = q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
25 * Kv(y) = Kv0*exp(y/5) ,
26 * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
27 * vary diurnally. The problem is posed on the square
28 * 0 <= x <= 20, 30 <= y <= 50 (all in km),
29 * with homogeneous Neumann boundary conditions, and for time t in
30 * 0 <= t <= 86400 sec (1 day).
31 * The PDE system is treated by central differences on a uniform
32 * mesh, with simple polynomial initial profiles.
33 *
34 * The problem is solved by CVODE on NPE processors, treated
35 * as a rectangular process grid of size NPEX by NPEY, with
36 * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
37 * by MYSUB of the (x,y) mesh. Thus the actual mesh sizes are
38 * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
39 * neq = 2*MX*MY.
40 *
41 * The solution is done with the BDF/GMRES method (i.e. using the
42 * SUNLinSol_SPGMR linear solver) and a block-diagonal matrix with
43 * banded blocks as a preconditioner, using the CVBBDPRE module.
44 * Each block is generated using difference quotients, with
45 * half-bandwidths mudq = mldq = 2*MXSUB, but the retained banded
46 * blocks have half-bandwidths mukeep = mlkeep = 2.
47 * A copy of the approximate Jacobian is saved and conditionally
48 * reused within the preconditioner routine.
49 *
50 * The problem is solved twice -- with left and right preconditioning.
51 *
52 * Performance data and sampled solution values are printed at
53 * selected output times, and all performance counters are printed
54 * on completion.
55 *
56 * This version uses MPI for user routines.
57 * Execute with number of processors = NPEX*NPEY (see constants below).
58 * --------------------------------------------------------------------
59 */
60
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <math.h>
64
65 #include <cvode/cvode.h> /* prototypes for CVODE fcts., consts. */
66 #include <nvector/nvector_parallel.h> /* access to MPI-parallel N_Vector */
67 #include <sunlinsol/sunlinsol_spgmr.h> /* access to SPGMR SUNLinearSolver */
68 #include <cvode/cvode_bbdpre.h> /* access to CVBBDPRE module */
69 #include <sundials/sundials_types.h> /* definitions of realtype, booleantype */
70
71 #include <mpi.h> /* MPI constants and types */
72
73 /* helpful macros */
74
75 #ifndef SQR
76 #define SQR(A) ((A)*(A))
77 #endif
78
79 /* Problem Constants */
80
81 #define ZERO RCONST(0.0)
82
83 #define NVARS 2 /* number of species */
84 #define KH RCONST(4.0e-6) /* horizontal diffusivity Kh */
85 #define VEL RCONST(0.001) /* advection velocity V */
86 #define KV0 RCONST(1.0e-8) /* coefficient in Kv(y) */
87 #define Q1 RCONST(1.63e-16) /* coefficients q1, q2, c3 */
88 #define Q2 RCONST(4.66e-16)
89 #define C3 RCONST(3.7e16)
90 #define A3 RCONST(22.62) /* coefficient in expression for q3(t) */
91 #define A4 RCONST(7.601) /* coefficient in expression for q4(t) */
92 #define C1_SCALE RCONST(1.0e6) /* coefficients in initial profiles */
93 #define C2_SCALE RCONST(1.0e12)
94
95 #define T0 ZERO /* initial time */
96 #define NOUT 12 /* number of output times */
97 #define TWOHR RCONST(7200.0) /* number of seconds in two hours */
98 #define HALFDAY RCONST(4.32e4) /* number of seconds in a half day */
99 #define PI RCONST(3.1415926535898) /* pi */
100
101 #define XMIN ZERO /* grid boundaries in x */
102 #define XMAX RCONST(20.0)
103 #define YMIN RCONST(30.0) /* grid boundaries in y */
104 #define YMAX RCONST(50.0)
105
106 #define NPEX 2 /* no. PEs in x direction of PE array */
107 #define NPEY 2 /* no. PEs in y direction of PE array */
108 /* Total no. PEs = NPEX*NPEY */
109 #define MXSUB 5 /* no. x points per subgrid */
110 #define MYSUB 5 /* no. y points per subgrid */
111
112 #define MX (NPEX*MXSUB) /* MX = number of x mesh points */
113 #define MY (NPEY*MYSUB) /* MY = number of y mesh points */
114 /* Spatial mesh is MX by MY */
115 /* CVodeInit Constants */
116
117 #define RTOL RCONST(1.0e-5) /* scalar relative tolerance */
118 #define FLOOR RCONST(100.0) /* value of C1 or C2 at which tolerances */
119 /* change from relative to absolute */
120 #define ATOL (RTOL*FLOOR) /* scalar absolute tolerance */
121
122 /* Type : UserData
123 contains problem constants, extended dependent variable array,
124 grid constants, processor indices, MPI communicator */
125
126 typedef struct {
127 realtype q4, om, dx, dy, hdco, haco, vdco;
128 realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
129 int my_pe, isubx, isuby;
130 sunindextype nvmxsub, nvmxsub2, Nlocal;
131 MPI_Comm comm;
132 } *UserData;
133
134 /* Prototypes of private helper functions */
135
136 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
137 UserData data);
138 static void SetInitialProfiles(N_Vector u, UserData data);
139 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
140 sunindextype mukeep, sunindextype mlkeep);
141 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
142 N_Vector u, realtype t);
143 static void PrintFinalStats(void *cvode_mem);
144 static void BSend(MPI_Comm comm,
145 int my_pe, int isubx, int isuby,
146 sunindextype dsizex, sunindextype dsizey,
147 realtype uarray[]);
148 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
149 int my_pe, int isubx, int isuby,
150 sunindextype dsizex, sunindextype dsizey,
151 realtype uext[], realtype buffer[]);
152 static void BRecvWait(MPI_Request request[],
153 int isubx, int isuby,
154 sunindextype dsizex, realtype uext[],
155 realtype buffer[]);
156
157 static void fucomm(realtype t, N_Vector u, void *user_data);
158
159 /* Prototype of function called by the solver */
160
161 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
162
163 /* Prototype of functions called by the CVBBDPRE module */
164
165 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
166 N_Vector udot, void *user_data);
167
168 /* Private function to check function return values */
169
170 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
171
172 /***************************** Main Program ******************************/
173
main(int argc,char * argv[])174 int main(int argc, char *argv[])
175 {
176 UserData data;
177 SUNLinearSolver LS;
178 void *cvode_mem;
179 realtype abstol, reltol, t, tout;
180 N_Vector u;
181 int iout, my_pe, npes, retval, jpre;
182 sunindextype neq, local_N, mudq, mldq, mukeep, mlkeep;
183 MPI_Comm comm;
184
185 data = NULL;
186 LS = NULL;
187 cvode_mem = NULL;
188 u = NULL;
189
190 /* Set problem size neq */
191 neq = NVARS*MX*MY;
192
193 /* Get processor number and total number of pe's */
194 MPI_Init(&argc, &argv);
195 comm = MPI_COMM_WORLD;
196 MPI_Comm_size(comm, &npes);
197 MPI_Comm_rank(comm, &my_pe);
198
199 if (npes != NPEX*NPEY) {
200 if (my_pe == 0)
201 fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
202 npes, NPEX*NPEY);
203 MPI_Finalize();
204 return(1);
205 }
206
207 /* Set local length */
208 local_N = NVARS*MXSUB*MYSUB;
209
210 /* Allocate and load user data block */
211 data = (UserData) malloc(sizeof *data);
212 if(check_retval((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
213 InitUserData(my_pe, local_N, comm, data);
214
215 /* Allocate and initialize u, and set tolerances */
216 u = N_VNew_Parallel(comm, local_N, neq);
217 if(check_retval((void *)u, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
218 SetInitialProfiles(u, data);
219 abstol = ATOL;
220 reltol = RTOL;
221
222 /* Call CVodeCreate to create the solver memory and specify the
223 * Backward Differentiation Formula */
224 cvode_mem = CVodeCreate(CV_BDF);
225 if(check_retval((void *)cvode_mem, "CVodeCreate", 0, my_pe)) MPI_Abort(comm, 1);
226
227 /* Set the pointer to user-defined data */
228 retval = CVodeSetUserData(cvode_mem, data);
229 if(check_retval(&retval, "CVodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
230
231 /* Call CVodeInit to initialize the integrator memory and specify the
232 * user's right hand side function in u'=f(t,u), the inital time T0, and
233 * the initial dependent variable vector u. */
234 retval = CVodeInit(cvode_mem, f, T0, u);
235 if(check_retval(&retval, "CVodeInit", 1, my_pe)) return(1);
236
237 /* Call CVodeSStolerances to specify the scalar relative tolerance
238 * and scalar absolute tolerances */
239 retval = CVodeSStolerances(cvode_mem, reltol, abstol);
240 if (check_retval(&retval, "CVodeSStolerances", 1, my_pe)) return(1);
241
242 /* Create SPGMR solver structure -- use left preconditioning
243 and the default Krylov dimension maxl */
244 LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
245 if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
246
247 /* Attach SPGMR solver structure to CVode interface */
248 retval = CVodeSetLinearSolver(cvode_mem, LS, NULL);
249 if (check_retval(&retval, "CVodeSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
250
251 /* Initialize BBD preconditioner */
252 mudq = mldq = NVARS*MXSUB;
253 mukeep = mlkeep = NVARS;
254 retval = CVBBDPrecInit(cvode_mem, local_N, mudq, mldq,
255 mukeep, mlkeep, ZERO, flocal, NULL);
256 if(check_retval(&retval, "CVBBDPrecInit", 1, my_pe)) MPI_Abort(comm, 1);
257
258 /* Print heading */
259 if (my_pe == 0) PrintIntro(npes, mudq, mldq, mukeep, mlkeep);
260
261 /* Loop over jpre (= PREC_LEFT, PREC_RIGHT), and solve the problem */
262 for (jpre = PREC_LEFT; jpre <= PREC_RIGHT; jpre++) {
263
264 /* On second run, re-initialize u, the integrator, CVBBDPRE, and SPGMR */
265
266 if (jpre == PREC_RIGHT) {
267
268 SetInitialProfiles(u, data);
269
270 retval = CVodeReInit(cvode_mem, T0, u);
271 if(check_retval(&retval, "CVodeReInit", 1, my_pe)) MPI_Abort(comm, 1);
272
273 retval = CVBBDPrecReInit(cvode_mem, mudq, mldq, ZERO);
274 if(check_retval(&retval, "CVBBDPrecReInit", 1, my_pe)) MPI_Abort(comm, 1);
275
276 retval = SUNLinSol_SPGMRSetPrecType(LS, PREC_RIGHT);
277 if(check_retval(&retval, "SUNLinSol_SPGMRSetPrecType", 1, my_pe)) MPI_Abort(comm, 1);
278
279 if (my_pe == 0) {
280 printf("\n\n-------------------------------------------------------");
281 printf("------------\n");
282 }
283
284 }
285
286
287 if (my_pe == 0) {
288 printf("\n\nPreconditioner type is: jpre = %s\n\n",
289 (jpre == PREC_LEFT) ? "PREC_LEFT" : "PREC_RIGHT");
290 }
291
292 /* In loop over output points, call CVode, print results, test for error */
293
294 for (iout = 1, tout = TWOHR; iout <= NOUT; iout++, tout += TWOHR) {
295 retval = CVode(cvode_mem, tout, u, &t, CV_NORMAL);
296 if(check_retval(&retval, "CVode", 1, my_pe)) break;
297 PrintOutput(cvode_mem, my_pe, comm, u, t);
298 }
299
300 /* Print final statistics */
301
302 if (my_pe == 0) PrintFinalStats(cvode_mem);
303
304 } /* End of jpre loop */
305
306 /* Free memory */
307 N_VDestroy(u);
308 free(data);
309 CVodeFree(&cvode_mem);
310 SUNLinSolFree(LS);
311
312 MPI_Finalize();
313
314 return(0);
315 }
316
317 /*********************** Private Helper Functions ************************/
318
319 /* Load constants in data */
320
InitUserData(int my_pe,sunindextype local_N,MPI_Comm comm,UserData data)321 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
322 UserData data)
323 {
324 int isubx, isuby;
325
326 /* Set problem constants */
327 data->om = PI/HALFDAY;
328 data->dx = (XMAX-XMIN)/((realtype)(MX-1));
329 data->dy = (YMAX-YMIN)/((realtype)(MY-1));
330 data->hdco = KH/SQR(data->dx);
331 data->haco = VEL/(RCONST(2.0)*data->dx);
332 data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
333
334 /* Set machine-related constants */
335 data->comm = comm;
336 data->my_pe = my_pe;
337 data->Nlocal = local_N;
338 /* isubx and isuby are the PE grid indices corresponding to my_pe */
339 isuby = my_pe/NPEX;
340 isubx = my_pe - isuby*NPEX;
341 data->isubx = isubx;
342 data->isuby = isuby;
343 /* Set the sizes of a boundary x-line in u and uext */
344 data->nvmxsub = NVARS*MXSUB;
345 data->nvmxsub2 = NVARS*(MXSUB+2);
346 }
347
348 /* Set initial conditions in u */
349
SetInitialProfiles(N_Vector u,UserData data)350 static void SetInitialProfiles(N_Vector u, UserData data)
351 {
352 int isubx, isuby;
353 int lx, ly, jx, jy;
354 sunindextype offset;
355 realtype dx, dy, x, y, cx, cy, xmid, ymid;
356 realtype *uarray;
357
358 /* Set pointer to data array in vector u */
359
360 uarray = N_VGetArrayPointer(u);
361
362 /* Get mesh spacings, and subgrid indices for this PE */
363
364 dx = data->dx; dy = data->dy;
365 isubx = data->isubx; isuby = data->isuby;
366
367 /* Load initial profiles of c1 and c2 into local u vector.
368 Here lx and ly are local mesh point indices on the local subgrid,
369 and jx and jy are the global mesh point indices. */
370
371 offset = 0;
372 xmid = RCONST(0.5)*(XMIN + XMAX);
373 ymid = RCONST(0.5)*(YMIN + YMAX);
374 for (ly = 0; ly < MYSUB; ly++) {
375 jy = ly + isuby*MYSUB;
376 y = YMIN + jy*dy;
377 cy = SQR(RCONST(0.1)*(y - ymid));
378 cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
379 for (lx = 0; lx < MXSUB; lx++) {
380 jx = lx + isubx*MXSUB;
381 x = XMIN + jx*dx;
382 cx = SQR(RCONST(0.1)*(x - xmid));
383 cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
384 uarray[offset ] = C1_SCALE*cx*cy;
385 uarray[offset+1] = C2_SCALE*cx*cy;
386 offset = offset + 2;
387 }
388 }
389 }
390
391 /* Print problem introduction */
392
PrintIntro(int npes,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep)393 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
394 sunindextype mukeep, sunindextype mlkeep)
395 {
396 printf("\n2-species diurnal advection-diffusion problem\n");
397 printf(" %d by %d mesh on %d processors\n", MX, MY, npes);
398 printf(" Using CVBBDPRE preconditioner module\n");
399 printf(" Difference-quotient half-bandwidths are");
400 printf(" mudq = %ld, mldq = %ld\n", (long int) mudq, (long int) mldq);
401 printf(" Retained band block half-bandwidths are");
402 printf(" mukeep = %ld, mlkeep = %ld", (long int) mukeep, (long int) mlkeep);
403
404 return;
405 }
406
407 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
408
PrintOutput(void * cvode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)409 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
410 N_Vector u, realtype t)
411 {
412 int qu, retval, npelast;
413 long int nst;
414 sunindextype i0, i1;
415 realtype hu, *uarray, tempu[2];
416 MPI_Status status;
417
418 npelast = NPEX*NPEY - 1;
419 uarray = N_VGetArrayPointer(u);
420
421 /* Send c1,c2 at top right mesh point to PE 0 */
422 if (my_pe == npelast) {
423 i0 = NVARS*MXSUB*MYSUB - 2;
424 i1 = i0 + 1;
425 if (npelast != 0)
426 MPI_Send(&uarray[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
427 else {
428 tempu[0] = uarray[i0];
429 tempu[1] = uarray[i1];
430 }
431 }
432
433 /* On PE 0, receive c1,c2 at top right, then print performance data
434 and sampled solution values */
435 if (my_pe == 0) {
436 if (npelast != 0)
437 MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
438 retval = CVodeGetNumSteps(cvode_mem, &nst);
439 check_retval(&retval, "CVodeGetNumSteps", 1, my_pe);
440 retval = CVodeGetLastOrder(cvode_mem, &qu);
441 check_retval(&retval, "CVodeGetLastOrder", 1, my_pe);
442 retval = CVodeGetLastStep(cvode_mem, &hu);
443 check_retval(&retval, "CVodeGetLastStep", 1, my_pe);
444 #if defined(SUNDIALS_EXTENDED_PRECISION)
445 printf("t = %.2Le no. steps = %ld order = %d stepsize = %.2Le\n",
446 t, nst, qu, hu);
447 printf("At bottom left: c1, c2 = %12.3Le %12.3Le \n", uarray[0], uarray[1]);
448 printf("At top right: c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
449 #elif defined(SUNDIALS_DOUBLE_PRECISION)
450 printf("t = %.2e no. steps = %ld order = %d stepsize = %.2e\n",
451 t, nst, qu, hu);
452 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
453 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
454 #else
455 printf("t = %.2e no. steps = %ld order = %d stepsize = %.2e\n",
456 t, nst, qu, hu);
457 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
458 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
459 #endif
460 }
461 }
462
463 /* Print final statistics contained in iopt */
464
PrintFinalStats(void * cvode_mem)465 static void PrintFinalStats(void *cvode_mem)
466 {
467 long int lenrw, leniw ;
468 long int lenrwLS, leniwLS;
469 long int lenrwBBDP, leniwBBDP;
470 long int nst, nfe, nsetups, nni, ncfn, netf, ngevalsBBDP;
471 long int nli, npe, nps, ncfl, nfeLS;
472 int retval;
473
474 retval = CVodeGetWorkSpace(cvode_mem, &lenrw, &leniw);
475 check_retval(&retval, "CVodeGetWorkSpace", 1, 0);
476 retval = CVodeGetNumSteps(cvode_mem, &nst);
477 check_retval(&retval, "CVodeGetNumSteps", 1, 0);
478 retval = CVodeGetNumRhsEvals(cvode_mem, &nfe);
479 check_retval(&retval, "CVodeGetNumRhsEvals", 1, 0);
480 retval = CVodeGetNumLinSolvSetups(cvode_mem, &nsetups);
481 check_retval(&retval, "CVodeGetNumLinSolvSetups", 1, 0);
482 retval = CVodeGetNumErrTestFails(cvode_mem, &netf);
483 check_retval(&retval, "CVodeGetNumErrTestFails", 1, 0);
484 retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni);
485 check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, 0);
486 retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn);
487 check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, 0);
488
489 retval = CVodeGetLinWorkSpace(cvode_mem, &lenrwLS, &leniwLS);
490 check_retval(&retval, "CVodeGetLinWorkSpace", 1, 0);
491 retval = CVodeGetNumLinIters(cvode_mem, &nli);
492 check_retval(&retval, "CVodeGetNumLinIters", 1, 0);
493 retval = CVodeGetNumPrecEvals(cvode_mem, &npe);
494 check_retval(&retval, "CVodeGetNumPrecEvals", 1, 0);
495 retval = CVodeGetNumPrecSolves(cvode_mem, &nps);
496 check_retval(&retval, "CVodeGetNumPrecSolves", 1, 0);
497 retval = CVodeGetNumLinConvFails(cvode_mem, &ncfl);
498 check_retval(&retval, "CVodeGetNumLinConvFails", 1, 0);
499 retval = CVodeGetNumLinRhsEvals(cvode_mem, &nfeLS);
500 check_retval(&retval, "CVodeGetNumLinRhsEvals", 1, 0);
501
502 printf("\nFinal Statistics: \n\n");
503 printf("lenrw = %5ld leniw = %5ld\n" , lenrw, leniw);
504 printf("lenrwls = %5ld leniwls = %5ld\n" , lenrwLS, leniwLS);
505 printf("nst = %5ld\n" , nst);
506 printf("nfe = %5ld nfels = %5ld\n" , nfe, nfeLS);
507 printf("nni = %5ld nli = %5ld\n" , nni, nli);
508 printf("nsetups = %5ld netf = %5ld\n" , nsetups, netf);
509 printf("npe = %5ld nps = %5ld\n" , npe, nps);
510 printf("ncfn = %5ld ncfl = %5ld\n\n", ncfn, ncfl);
511
512 retval = CVBBDPrecGetWorkSpace(cvode_mem, &lenrwBBDP, &leniwBBDP);
513 check_retval(&retval, "CVBBDPrecGetWorkSpace", 1, 0);
514 retval = CVBBDPrecGetNumGfnEvals(cvode_mem, &ngevalsBBDP);
515 check_retval(&retval, "CVBBDPrecGetNumGfnEvals", 1, 0);
516 printf("In CVBBDPRE: real/integer local work space sizes = %ld, %ld\n",
517 lenrwBBDP, leniwBBDP);
518 printf(" no. flocal evals. = %ld\n",ngevalsBBDP);
519 }
520
521 /* Routine to send boundary data to neighboring PEs */
522
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uarray[])523 static void BSend(MPI_Comm comm,
524 int my_pe, int isubx, int isuby,
525 sunindextype dsizex, sunindextype dsizey,
526 realtype uarray[])
527 {
528 int i, ly;
529 sunindextype offsetu, offsetbuf;
530 realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
531
532 /* If isuby > 0, send data from bottom x-line of u */
533
534 if (isuby != 0)
535 MPI_Send(&uarray[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
536
537 /* If isuby < NPEY-1, send data from top x-line of u */
538
539 if (isuby != NPEY-1) {
540 offsetu = (MYSUB-1)*dsizex;
541 MPI_Send(&uarray[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
542 }
543
544 /* If isubx > 0, send data from left y-line of u (via bufleft) */
545
546 if (isubx != 0) {
547 for (ly = 0; ly < MYSUB; ly++) {
548 offsetbuf = ly*NVARS;
549 offsetu = ly*dsizex;
550 for (i = 0; i < NVARS; i++)
551 bufleft[offsetbuf+i] = uarray[offsetu+i];
552 }
553 MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
554 }
555
556 /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
557
558 if (isubx != NPEX-1) {
559 for (ly = 0; ly < MYSUB; ly++) {
560 offsetbuf = ly*NVARS;
561 offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
562 for (i = 0; i < NVARS; i++)
563 bufright[offsetbuf+i] = uarray[offsetu+i];
564 }
565 MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
566 }
567
568 }
569
570 /* Routine to start receiving boundary data from neighboring PEs.
571 Notes:
572 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
573 passed to both the BRecvPost and BRecvWait functions, and should not
574 be manipulated between the two calls.
575 2) request should have 4 entries, and should be passed in both calls also. */
576
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])577 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
578 int my_pe, int isubx, int isuby,
579 sunindextype dsizex, sunindextype dsizey,
580 realtype uext[], realtype buffer[])
581 {
582 sunindextype offsetue;
583 /* Have bufleft and bufright use the same buffer */
584 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
585
586 /* If isuby > 0, receive data for bottom x-line of uext */
587 if (isuby != 0)
588 MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
589 my_pe-NPEX, 0, comm, &request[0]);
590
591 /* If isuby < NPEY-1, receive data for top x-line of uext */
592 if (isuby != NPEY-1) {
593 offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
594 MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
595 my_pe+NPEX, 0, comm, &request[1]);
596 }
597
598 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
599 if (isubx != 0) {
600 MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
601 my_pe-1, 0, comm, &request[2]);
602 }
603
604 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
605 if (isubx != NPEX-1) {
606 MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
607 my_pe+1, 0, comm, &request[3]);
608 }
609
610 }
611
612 /* Routine to finish receiving boundary data from neighboring PEs.
613 Notes:
614 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
615 passed to both the BRecvPost and BRecvWait functions, and should not
616 be manipulated between the two calls.
617 2) request should have 4 entries, and should be passed in both calls also. */
618
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])619 static void BRecvWait(MPI_Request request[],
620 int isubx, int isuby,
621 sunindextype dsizex, realtype uext[],
622 realtype buffer[])
623 {
624 int i, ly;
625 sunindextype dsizex2, offsetue, offsetbuf;
626 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
627 MPI_Status status;
628
629 dsizex2 = dsizex + 2*NVARS;
630
631 /* If isuby > 0, receive data for bottom x-line of uext */
632 if (isuby != 0)
633 MPI_Wait(&request[0],&status);
634
635 /* If isuby < NPEY-1, receive data for top x-line of uext */
636 if (isuby != NPEY-1)
637 MPI_Wait(&request[1],&status);
638
639 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
640 if (isubx != 0) {
641 MPI_Wait(&request[2],&status);
642
643 /* Copy the buffer to uext */
644 for (ly = 0; ly < MYSUB; ly++) {
645 offsetbuf = ly*NVARS;
646 offsetue = (ly+1)*dsizex2;
647 for (i = 0; i < NVARS; i++)
648 uext[offsetue+i] = bufleft[offsetbuf+i];
649 }
650 }
651
652 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
653 if (isubx != NPEX-1) {
654 MPI_Wait(&request[3],&status);
655
656 /* Copy the buffer to uext */
657 for (ly = 0; ly < MYSUB; ly++) {
658 offsetbuf = ly*NVARS;
659 offsetue = (ly+2)*dsizex2 - NVARS;
660 for (i = 0; i < NVARS; i++)
661 uext[offsetue+i] = bufright[offsetbuf+i];
662 }
663 }
664 }
665
666 /* fucomm routine. This routine performs all inter-processor
667 communication of data in u needed to calculate f. */
668
fucomm(realtype t,N_Vector u,void * user_data)669 static void fucomm(realtype t, N_Vector u, void *user_data)
670 {
671 UserData data;
672 realtype *uarray, *uext, buffer[2*NVARS*MYSUB];
673 MPI_Comm comm;
674 int my_pe, isubx, isuby;
675 sunindextype nvmxsub, nvmysub;
676 MPI_Request request[4];
677
678 data = (UserData) user_data;
679 uarray = N_VGetArrayPointer(u);
680
681 /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
682
683 comm = data->comm; my_pe = data->my_pe;
684 isubx = data->isubx; isuby = data->isuby;
685 nvmxsub = data->nvmxsub;
686 nvmysub = NVARS*MYSUB;
687 uext = data->uext;
688
689 /* Start receiving boundary data from neighboring PEs */
690
691 BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
692
693 /* Send data from boundary of local grid to neighboring PEs */
694
695 BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, uarray);
696
697 /* Finish receiving boundary data from neighboring PEs */
698
699 BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
700 }
701
702 /***************** Function called by the solver **************************/
703
704 /* f routine. Evaluate f(t,y). First call fucomm to do communication of
705 subgrid boundary data into uext. Then calculate f by a call to flocal. */
706
f(realtype t,N_Vector u,N_Vector udot,void * user_data)707 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
708 {
709 UserData data;
710
711 data = (UserData) user_data;
712
713 /* Call fucomm to do inter-processor communication */
714
715 fucomm (t, u, user_data);
716
717 /* Call flocal to calculate all right-hand sides */
718
719 flocal (data->Nlocal, t, u, udot, user_data);
720
721 return(0);
722 }
723
724 /***************** Functions called by the CVBBDPRE module ****************/
725
726 /* flocal routine. Compute f(t,y). This routine assumes that all
727 inter-processor communication of data needed to calculate f has already
728 been done, and this data is in the work array uext. */
729
flocal(sunindextype Nlocal,realtype t,N_Vector u,N_Vector udot,void * user_data)730 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
731 N_Vector udot, void *user_data)
732 {
733 realtype *uext;
734 realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
735 realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
736 realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
737 realtype q4coef, dely, verdco, hordco, horaco;
738 int i, lx, ly, jy;
739 int isubx, isuby;
740 sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
741 UserData data;
742 realtype *uarray, *duarray;
743
744 uarray = N_VGetArrayPointer(u);
745 duarray = N_VGetArrayPointer(udot);
746
747 /* Get subgrid indices, array sizes, extended work array uext */
748
749 data = (UserData) user_data;
750 isubx = data->isubx; isuby = data->isuby;
751 nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
752 uext = data->uext;
753
754 /* Copy local segment of u vector into the working extended array uext */
755
756 offsetu = 0;
757 offsetue = nvmxsub2 + NVARS;
758 for (ly = 0; ly < MYSUB; ly++) {
759 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
760 offsetu = offsetu + nvmxsub;
761 offsetue = offsetue + nvmxsub2;
762 }
763
764 /* To facilitate homogeneous Neumann boundary conditions, when this is
765 a boundary PE, copy data from the first interior mesh line of u to uext */
766
767 /* If isuby = 0, copy x-line 2 of u to uext */
768 if (isuby == 0) {
769 for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = uarray[nvmxsub+i];
770 }
771
772 /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
773 if (isuby == NPEY-1) {
774 offsetu = (MYSUB-2)*nvmxsub;
775 offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
776 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
777 }
778
779 /* If isubx = 0, copy y-line 2 of u to uext */
780 if (isubx == 0) {
781 for (ly = 0; ly < MYSUB; ly++) {
782 offsetu = ly*nvmxsub + NVARS;
783 offsetue = (ly+1)*nvmxsub2;
784 for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
785 }
786 }
787
788 /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
789 if (isubx == NPEX-1) {
790 for (ly = 0; ly < MYSUB; ly++) {
791 offsetu = (ly+1)*nvmxsub - 2*NVARS;
792 offsetue = (ly+2)*nvmxsub2 - NVARS;
793 for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
794 }
795 }
796
797 /* Make local copies of problem variables, for efficiency */
798
799 dely = data->dy;
800 verdco = data->vdco;
801 hordco = data->hdco;
802 horaco = data->haco;
803
804 /* Set diurnal rate coefficients as functions of t, and save q4 in
805 data block for use by preconditioner evaluation routine */
806
807 s = sin((data->om)*t);
808 if (s > ZERO) {
809 q3 = exp(-A3/s);
810 q4coef = exp(-A4/s);
811 } else {
812 q3 = ZERO;
813 q4coef = ZERO;
814 }
815 data->q4 = q4coef;
816
817
818 /* Loop over all grid points in local subgrid */
819
820 for (ly = 0; ly < MYSUB; ly++) {
821
822 jy = ly + isuby*MYSUB;
823
824 /* Set vertical diffusion coefficients at jy +- 1/2 */
825
826 ydn = YMIN + (jy - RCONST(0.5))*dely;
827 yup = ydn + dely;
828 cydn = verdco*exp(RCONST(0.2)*ydn);
829 cyup = verdco*exp(RCONST(0.2)*yup);
830 for (lx = 0; lx < MXSUB; lx++) {
831
832 /* Extract c1 and c2, and set kinetic rate terms */
833
834 offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
835 c1 = uext[offsetue];
836 c2 = uext[offsetue+1];
837 qq1 = Q1*c1*C3;
838 qq2 = Q2*c1*c2;
839 qq3 = q3*C3;
840 qq4 = q4coef*c2;
841 rkin1 = -qq1 - qq2 + 2.0*qq3 + qq4;
842 rkin2 = qq1 - qq2 - qq4;
843
844 /* Set vertical diffusion terms */
845
846 c1dn = uext[offsetue-nvmxsub2];
847 c2dn = uext[offsetue-nvmxsub2+1];
848 c1up = uext[offsetue+nvmxsub2];
849 c2up = uext[offsetue+nvmxsub2+1];
850 vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
851 vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
852
853 /* Set horizontal diffusion and advection terms */
854
855 c1lt = uext[offsetue-2];
856 c2lt = uext[offsetue-1];
857 c1rt = uext[offsetue+2];
858 c2rt = uext[offsetue+3];
859 hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
860 hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
861 horad1 = horaco*(c1rt - c1lt);
862 horad2 = horaco*(c2rt - c2lt);
863
864 /* Load all terms into duarray */
865
866 offsetu = lx*NVARS + ly*nvmxsub;
867 duarray[offsetu] = vertd1 + hord1 + horad1 + rkin1;
868 duarray[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
869 }
870 }
871
872 return(0);
873 }
874
875 /* Check function return value...
876 opt == 0 means SUNDIALS function allocates memory so check if
877 returned NULL pointer
878 opt == 1 means SUNDIALS function returns an integer value so check if
879 retval < 0
880 opt == 2 means function allocates memory so check if returned
881 NULL pointer */
882
check_retval(void * returnvalue,const char * funcname,int opt,int id)883 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
884 {
885 int *retval;
886
887 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
888 if (opt == 0 && returnvalue == NULL) {
889 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
890 id, funcname);
891 return(1); }
892
893 /* Check if retval < 0 */
894 else if (opt == 1) {
895 retval = (int *) returnvalue;
896 if (*retval < 0) {
897 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
898 id, funcname, *retval);
899 return(1); }}
900
901 /* Check if function returned NULL pointer - no memory allocated */
902 else if (opt == 2 && returnvalue == NULL) {
903 fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
904 id, funcname);
905 return(1); }
906
907 return(0);
908 }
909