1 /*
2 * -----------------------------------------------------------------
3 * Programmer(s): Daniel R. Reynolds @ SMU
4 * S. D. Cohen, A. C. Hindmarsh, M. R. Wittman, and
5 * Radu Serban @ LLNL
6 * -----------------------------------------------------------------
7 * SUNDIALS Copyright Start
8 * Copyright (c) 2002-2021, Lawrence Livermore National Security
9 * and Southern Methodist University.
10 * All rights reserved.
11 *
12 * See the top-level LICENSE and NOTICE files for details.
13 *
14 * SPDX-License-Identifier: BSD-3-Clause
15 * SUNDIALS Copyright End
16 * -----------------------------------------------------------------
17 * Example problem:
18 *
19 * An ODE system is generated from the following 2-species diurnal
20 * kinetics advection-diffusion PDE system in 2 space dimensions:
21 *
22 * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
23 * + Ri(c1,c2,t) for i = 1,2, where
24 * R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
25 * R2(c1,c2,t) = q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
26 * Kv(y) = Kv0*exp(y/5) ,
27 * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
28 * vary diurnally. The problem is posed on the square
29 * 0 <= x <= 20, 30 <= y <= 50 (all in km),
30 * with homogeneous Neumann boundary conditions, and for time t in
31 * 0 <= t <= 86400 sec (1 day).
32 * The PDE system is treated by central differences on a uniform
33 * mesh, with simple polynomial initial profiles.
34 *
35 * The problem is solved by CVODES on NPE processors, treated
36 * as a rectangular process grid of size NPEX by NPEY, with
37 * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
38 * by MYSUB of the (x,y) mesh. Thus the actual mesh sizes are
39 * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
40 * neq = 2*MX*MY.
41 *
42 * The solution is done with the BDF/GMRES method (i.e. using the
43 * SUNLinSol_SPGMR linear solver) and a block-diagonal matrix with banded
44 * blocks as a preconditioner, using the CVBBDPRE module.
45 * Each block is generated using difference quotients, with
46 * half-bandwidths mudq = mldq = 2*MXSUB, but the retained banded
47 * blocks have half-bandwidths mukeep = mlkeep = 2.
48 * A copy of the approximate Jacobian is saved and conditionally
49 * reused within the preconditioner routine.
50 *
51 * The problem is solved twice -- with left and right preconditioning.
52 *
53 * Performance data and sampled solution values are printed at
54 * selected output times, and all performance counters are printed
55 * on completion.
56 *
57 * This version uses MPI for user routines.
58 * Execute with number of processors = NPEX*NPEY (see constants below).
59 * --------------------------------------------------------------------
60 */
61
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <math.h>
65
66 #include <cvodes/cvodes.h> /* prototypes for CVODE fcts. */
67 #include <cvodes/cvodes_bbdpre.h> /* prototypes for CVBBDPRE module */
68 #include <sunlinsol/sunlinsol_spgmr.h> /* prototypes and constants for SUNLinSol_SPGMR solver */
69 #include <nvector/nvector_parallel.h> /* definition N_Vector */
70 #include <sundials/sundials_types.h> /* definitions of realtype, booleantype */
71
72 #include <mpi.h> /* MPI constants and types */
73
74 /* helpful macros */
75
76 #ifndef SQR
77 #define SQR(A) ((A)*(A))
78 #endif
79
80 /* Problem Constants */
81
82 #define ZERO RCONST(0.0)
83
84 #define NVARS 2 /* number of species */
85 #define KH RCONST(4.0e-6) /* horizontal diffusivity Kh */
86 #define VEL RCONST(0.001) /* advection velocity V */
87 #define KV0 RCONST(1.0e-8) /* coefficient in Kv(y) */
88 #define Q1 RCONST(1.63e-16) /* coefficients q1, q2, c3 */
89 #define Q2 RCONST(4.66e-16)
90 #define C3 RCONST(3.7e16)
91 #define A3 RCONST(22.62) /* coefficient in expression for q3(t) */
92 #define A4 RCONST(7.601) /* coefficient in expression for q4(t) */
93 #define C1_SCALE RCONST(1.0e6) /* coefficients in initial profiles */
94 #define C2_SCALE RCONST(1.0e12)
95
96 #define T0 ZERO /* initial time */
97 #define NOUT 12 /* number of output times */
98 #define TWOHR RCONST(7200.0) /* number of seconds in two hours */
99 #define HALFDAY RCONST(4.32e4) /* number of seconds in a half day */
100 #define PI RCONST(3.1415926535898) /* pi */
101
102 #define XMIN ZERO /* grid boundaries in x */
103 #define XMAX RCONST(20.0)
104 #define YMIN RCONST(30.0) /* grid boundaries in y */
105 #define YMAX RCONST(50.0)
106
107 #define NPEX 2 /* no. PEs in x direction of PE array */
108 #define NPEY 2 /* no. PEs in y direction of PE array */
109 /* Total no. PEs = NPEX*NPEY */
110 #define MXSUB 5 /* no. x points per subgrid */
111 #define MYSUB 5 /* no. y points per subgrid */
112
113 #define MX (NPEX*MXSUB) /* MX = number of x mesh points */
114 #define MY (NPEY*MYSUB) /* MY = number of y mesh points */
115 /* Spatial mesh is MX by MY */
116 /* CVodeInit Constants */
117
118 #define RTOL RCONST(1.0e-5) /* scalar relative tolerance */
119 #define FLOOR RCONST(100.0) /* value of C1 or C2 at which tolerances */
120 /* change from relative to absolute */
121 #define ATOL (RTOL*FLOOR) /* scalar absolute tolerance */
122
123 /* Type : UserData
124 contains problem constants, extended dependent variable array,
125 grid constants, processor indices, MPI communicator */
126
127 typedef struct {
128 realtype q4, om, dx, dy, hdco, haco, vdco;
129 realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
130 int my_pe, isubx, isuby;
131 sunindextype nvmxsub, nvmxsub2, Nlocal;
132 MPI_Comm comm;
133 } *UserData;
134
135 /* Prototypes of private helper functions */
136
137 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
138 UserData data);
139 static void SetInitialProfiles(N_Vector u, UserData data);
140 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
141 sunindextype mukeep, sunindextype mlkeep);
142 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
143 N_Vector u, realtype t);
144 static void PrintFinalStats(void *cvode_mem);
145 static void BSend(MPI_Comm comm,
146 int my_pe, int isubx, int isuby,
147 sunindextype dsizex, sunindextype dsizey,
148 realtype uarray[]);
149 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
150 int my_pe, int isubx, int isuby,
151 sunindextype dsizex, sunindextype dsizey,
152 realtype uext[], realtype buffer[]);
153 static void BRecvWait(MPI_Request request[],
154 int isubx, int isuby,
155 sunindextype dsizex, realtype uext[],
156 realtype buffer[]);
157
158 static void fucomm(realtype t, N_Vector u, void *user_data);
159
160 /* Prototype of function called by the solver */
161
162 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
163
164 /* Prototype of functions called by the CVBBDPRE module */
165
166 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
167 N_Vector udot, void *user_data);
168
169 /* Private function to check function return values */
170
171 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
172
173 /***************************** Main Program ******************************/
174
main(int argc,char * argv[])175 int main(int argc, char *argv[])
176 {
177 UserData data;
178 SUNLinearSolver LS;
179 void *cvode_mem;
180 realtype abstol, reltol, t, tout;
181 N_Vector u;
182 int iout, my_pe, npes, retval, jpre;
183 sunindextype neq, local_N, mudq, mldq, mukeep, mlkeep;
184 MPI_Comm comm;
185
186 data = NULL;
187 LS = NULL;
188 cvode_mem = NULL;
189 u = NULL;
190
191 /* Set problem size neq */
192 neq = NVARS*MX*MY;
193
194 /* Get processor number and total number of pe's */
195 MPI_Init(&argc, &argv);
196 comm = MPI_COMM_WORLD;
197 MPI_Comm_size(comm, &npes);
198 MPI_Comm_rank(comm, &my_pe);
199
200 if (npes != NPEX*NPEY) {
201 if (my_pe == 0)
202 fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
203 npes, NPEX*NPEY);
204 MPI_Finalize();
205 return(1);
206 }
207
208 /* Set local length */
209 local_N = NVARS*MXSUB*MYSUB;
210
211 /* Allocate and load user data block */
212 data = (UserData) malloc(sizeof *data);
213 if(check_retval((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
214 InitUserData(my_pe, local_N, comm, data);
215
216 /* Allocate and initialize u, and set tolerances */
217 u = N_VNew_Parallel(comm, local_N, neq);
218 if(check_retval((void *)u, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
219 SetInitialProfiles(u, data);
220 abstol = ATOL;
221 reltol = RTOL;
222
223 /* Call CVodeCreate to create the solver memory and specify the
224 * Backward Differentiation Formula */
225 cvode_mem = CVodeCreate(CV_BDF);
226 if(check_retval((void *)cvode_mem, "CVodeCreate", 0, my_pe)) MPI_Abort(comm, 1);
227
228 /* Set the pointer to user-defined data */
229 retval = CVodeSetUserData(cvode_mem, data);
230 if(check_retval(&retval, "CVodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
231
232 /* Call CVodeInit to initialize the integrator memory and specify the
233 * user's right hand side function in u'=f(t,u), the inital time T0, and
234 * the initial dependent variable vector u. */
235 retval = CVodeInit(cvode_mem, f, T0, u);
236 if(check_retval(&retval, "CVodeInit", 1, my_pe)) MPI_Abort(comm, 1);
237
238 /* Call CVodeSStolerances to specify the scalar relative tolerance
239 * and scalar absolute tolerances */
240 retval = CVodeSStolerances(cvode_mem, reltol, abstol);
241 if (check_retval(&retval, "CVodeSStolerances", 1, my_pe)) MPI_Abort(comm, 1);
242
243 /* Create SPGMR solver structure -- use left preconditioning
244 and the default Krylov dimension maxl */
245 LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
246 if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
247
248 /* Attach the linear solver */
249 retval = CVodeSetLinearSolver(cvode_mem, LS, NULL);
250 if(check_retval(&retval, "CVodeSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
251
252 /* Initialize BBD preconditioner */
253 mudq = mldq = NVARS*MXSUB;
254 mukeep = mlkeep = NVARS;
255 retval = CVBBDPrecInit(cvode_mem, local_N, mudq, mldq,
256 mukeep, mlkeep, ZERO, flocal, NULL);
257 if(check_retval(&retval, "CVBBDPrecAlloc", 1, my_pe)) MPI_Abort(comm, 1);
258
259 /* Print heading */
260 if (my_pe == 0) PrintIntro(npes, mudq, mldq, mukeep, mlkeep);
261
262 /* Loop over jpre (= PREC_LEFT, PREC_RIGHT), and solve the problem */
263 for (jpre = PREC_LEFT; jpre <= PREC_RIGHT; jpre++) {
264
265 /* On second run, re-initialize u, the integrator, CVBBDPRE,
266 and preconditioning type */
267
268 if (jpre == PREC_RIGHT) {
269
270 SetInitialProfiles(u, data);
271
272 retval = CVodeReInit(cvode_mem, T0, u);
273 if(check_retval(&retval, "CVodeReInit", 1, my_pe)) MPI_Abort(comm, 1);
274
275 retval = CVBBDPrecReInit(cvode_mem, mudq, mldq, ZERO);
276 if(check_retval(&retval, "CVBBDPrecReInit", 1, my_pe)) MPI_Abort(comm, 1);
277
278 retval = SUNLinSol_SPGMRSetPrecType(LS, PREC_RIGHT);
279 if(check_retval(&retval, "SUNLinSol_SPGMRSetPrecType", 1, my_pe)) MPI_Abort(comm, 1);
280
281 if (my_pe == 0) {
282 printf("\n\n-------------------------------------------------------");
283 printf("------------\n");
284 }
285
286 }
287
288
289 if (my_pe == 0) {
290 printf("\n\nPreconditioner type is: jpre = %s\n\n",
291 (jpre == PREC_LEFT) ? "PREC_LEFT" : "PREC_RIGHT");
292 }
293
294 /* In loop over output points, call CVode, print results, test for error */
295
296 for (iout = 1, tout = TWOHR; iout <= NOUT; iout++, tout += TWOHR) {
297 retval = CVode(cvode_mem, tout, u, &t, CV_NORMAL);
298 if(check_retval(&retval, "CVode", 1, my_pe)) break;
299 PrintOutput(cvode_mem, my_pe, comm, u, t);
300 }
301
302 /* Print final statistics */
303
304 if (my_pe == 0) PrintFinalStats(cvode_mem);
305
306 } /* End of jpre loop */
307
308 /* Free memory */
309 N_VDestroy_Parallel(u);
310 free(data);
311 CVodeFree(&cvode_mem);
312 SUNLinSolFree(LS);
313
314 MPI_Finalize();
315
316 return(0);
317 }
318
319 /*********************** Private Helper Functions ************************/
320
321 /* Load constants in data */
322
InitUserData(int my_pe,sunindextype local_N,MPI_Comm comm,UserData data)323 static void InitUserData(int my_pe, sunindextype local_N, MPI_Comm comm,
324 UserData data)
325 {
326 int isubx, isuby;
327
328 /* Set problem constants */
329 data->om = PI/HALFDAY;
330 data->dx = (XMAX-XMIN)/((realtype)(MX-1));
331 data->dy = (YMAX-YMIN)/((realtype)(MY-1));
332 data->hdco = KH/SQR(data->dx);
333 data->haco = VEL/(RCONST(2.0)*data->dx);
334 data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
335
336 /* Set machine-related constants */
337 data->comm = comm;
338 data->my_pe = my_pe;
339 data->Nlocal = local_N;
340 /* isubx and isuby are the PE grid indices corresponding to my_pe */
341 isuby = my_pe/NPEX;
342 isubx = my_pe - isuby*NPEX;
343 data->isubx = isubx;
344 data->isuby = isuby;
345 /* Set the sizes of a boundary x-line in u and uext */
346 data->nvmxsub = NVARS*MXSUB;
347 data->nvmxsub2 = NVARS*(MXSUB+2);
348 }
349
350 /* Set initial conditions in u */
351
SetInitialProfiles(N_Vector u,UserData data)352 static void SetInitialProfiles(N_Vector u, UserData data)
353 {
354 int isubx, isuby;
355 int lx, ly, jx, jy;
356 sunindextype offset;
357 realtype dx, dy, x, y, cx, cy, xmid, ymid;
358 realtype *uarray;
359
360 /* Set pointer to data array in vector u */
361
362 uarray = N_VGetArrayPointer_Parallel(u);
363
364 /* Get mesh spacings, and subgrid indices for this PE */
365
366 dx = data->dx; dy = data->dy;
367 isubx = data->isubx; isuby = data->isuby;
368
369 /* Load initial profiles of c1 and c2 into local u vector.
370 Here lx and ly are local mesh point indices on the local subgrid,
371 and jx and jy are the global mesh point indices. */
372
373 offset = 0;
374 xmid = RCONST(0.5)*(XMIN + XMAX);
375 ymid = RCONST(0.5)*(YMIN + YMAX);
376 for (ly = 0; ly < MYSUB; ly++) {
377 jy = ly + isuby*MYSUB;
378 y = YMIN + jy*dy;
379 cy = SQR(RCONST(0.1)*(y - ymid));
380 cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
381 for (lx = 0; lx < MXSUB; lx++) {
382 jx = lx + isubx*MXSUB;
383 x = XMIN + jx*dx;
384 cx = SQR(RCONST(0.1)*(x - xmid));
385 cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
386 uarray[offset ] = C1_SCALE*cx*cy;
387 uarray[offset+1] = C2_SCALE*cx*cy;
388 offset = offset + 2;
389 }
390 }
391 }
392
393 /* Print problem introduction */
394
PrintIntro(int npes,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep)395 static void PrintIntro(int npes, sunindextype mudq, sunindextype mldq,
396 sunindextype mukeep, sunindextype mlkeep)
397 {
398 printf("\n2-species diurnal advection-diffusion problem\n");
399 printf(" %d by %d mesh on %d processors\n", MX, MY, npes);
400 printf(" Using CVBBDPRE preconditioner module\n");
401 printf(" Difference-quotient half-bandwidths are");
402 printf(" mudq = %ld, mldq = %ld\n", (long int) mudq, (long int) mldq);
403 printf(" Retained band block half-bandwidths are");
404 printf(" mukeep = %ld, mlkeep = %ld", (long int) mukeep, (long int) mlkeep);
405
406 return;
407 }
408
409 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
410
PrintOutput(void * cvode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)411 static void PrintOutput(void *cvode_mem, int my_pe, MPI_Comm comm,
412 N_Vector u, realtype t)
413 {
414 int qu, retval, npelast;
415 sunindextype i0, i1;
416 long int nst;
417 realtype hu, *uarray, tempu[2];
418 MPI_Status status;
419
420 npelast = NPEX*NPEY - 1;
421 uarray = N_VGetArrayPointer_Parallel(u);
422
423 /* Send c1,c2 at top right mesh point to PE 0 */
424 if (my_pe == npelast) {
425 i0 = NVARS*MXSUB*MYSUB - 2;
426 i1 = i0 + 1;
427 if (npelast != 0)
428 MPI_Send(&uarray[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
429 else {
430 tempu[0] = uarray[i0];
431 tempu[1] = uarray[i1];
432 }
433 }
434
435 /* On PE 0, receive c1,c2 at top right, then print performance data
436 and sampled solution values */
437 if (my_pe == 0) {
438 if (npelast != 0)
439 MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
440 retval = CVodeGetNumSteps(cvode_mem, &nst);
441 check_retval(&retval, "CVodeGetNumSteps", 1, my_pe);
442 retval = CVodeGetLastOrder(cvode_mem, &qu);
443 check_retval(&retval, "CVodeGetLastOrder", 1, my_pe);
444 retval = CVodeGetLastStep(cvode_mem, &hu);
445 check_retval(&retval, "CVodeGetLastStep", 1, my_pe);
446 #if defined(SUNDIALS_EXTENDED_PRECISION)
447 printf("t = %.2Le no. steps = %ld order = %d stepsize = %.2Le\n",
448 t, nst, qu, hu);
449 printf("At bottom left: c1, c2 = %12.3Le %12.3Le \n", uarray[0], uarray[1]);
450 printf("At top right: c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
451 #elif defined(SUNDIALS_DOUBLE_PRECISION)
452 printf("t = %.2e no. steps = %ld order = %d stepsize = %.2e\n",
453 t, nst, qu, hu);
454 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
455 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
456 #else
457 printf("t = %.2e no. steps = %ld order = %d stepsize = %.2e\n",
458 t, nst, qu, hu);
459 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", uarray[0], uarray[1]);
460 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
461 #endif
462 }
463 }
464
465 /* Print final statistics contained in iopt */
466
PrintFinalStats(void * cvode_mem)467 static void PrintFinalStats(void *cvode_mem)
468 {
469 long int lenrw, leniw ;
470 long int lenrwLS, leniwLS;
471 long int lenrwBBDP, leniwBBDP;
472 long int nst, nfe, nsetups, nni, ncfn, netf;
473 long int nli, npe, nps, ncfl, nfeLS, ngevalsBBDP;
474 int retval;
475
476 retval = CVodeGetWorkSpace(cvode_mem, &lenrw, &leniw);
477 check_retval(&retval, "CVodeGetWorkSpace", 1, 0);
478 retval = CVodeGetNumSteps(cvode_mem, &nst);
479 check_retval(&retval, "CVodeGetNumSteps", 1, 0);
480 retval = CVodeGetNumRhsEvals(cvode_mem, &nfe);
481 check_retval(&retval, "CVodeGetNumRhsEvals", 1, 0);
482 retval = CVodeGetNumLinSolvSetups(cvode_mem, &nsetups);
483 check_retval(&retval, "CVodeGetNumLinSolvSetups", 1, 0);
484 retval = CVodeGetNumErrTestFails(cvode_mem, &netf);
485 check_retval(&retval, "CVodeGetNumErrTestFails", 1, 0);
486 retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni);
487 check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, 0);
488 retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn);
489 check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, 0);
490
491 retval = CVodeGetLinWorkSpace(cvode_mem, &lenrwLS, &leniwLS);
492 check_retval(&retval, "CVodeGetLinWorkSpace", 1, 0);
493 retval = CVodeGetNumLinIters(cvode_mem, &nli);
494 check_retval(&retval, "CVodeGetNumLinIters", 1, 0);
495 retval = CVodeGetNumPrecEvals(cvode_mem, &npe);
496 check_retval(&retval, "CVodeGetNumPrecEvals", 1, 0);
497 retval = CVodeGetNumPrecSolves(cvode_mem, &nps);
498 check_retval(&retval, "CVodeGetNumPrecSolves", 1, 0);
499 retval = CVodeGetNumLinConvFails(cvode_mem, &ncfl);
500 check_retval(&retval, "CVodeGetNumLinConvFails", 1, 0);
501 retval = CVodeGetNumLinRhsEvals(cvode_mem, &nfeLS);
502 check_retval(&retval, "CVodeGetNumLinRhsEvals", 1, 0);
503
504 printf("\nFinal Statistics: \n\n");
505 printf("lenrw = %5ld leniw = %5ld\n" , lenrw, leniw);
506 printf("lenrwls = %5ld leniwls = %5ld\n" , lenrwLS, leniwLS);
507 printf("nst = %5ld\n" , nst);
508 printf("nfe = %5ld nfels = %5ld\n" , nfe, nfeLS);
509 printf("nni = %5ld nli = %5ld\n" , nni, nli);
510 printf("nsetups = %5ld netf = %5ld\n" , nsetups, netf);
511 printf("npe = %5ld nps = %5ld\n" , npe, nps);
512 printf("ncfn = %5ld ncfl = %5ld\n\n", ncfn, ncfl);
513
514 retval = CVBBDPrecGetWorkSpace(cvode_mem, &lenrwBBDP, &leniwBBDP);
515 check_retval(&retval, "CVBBDPrecGetWorkSpace", 1, 0);
516 retval = CVBBDPrecGetNumGfnEvals(cvode_mem, &ngevalsBBDP);
517 check_retval(&retval, "CVBBDPrecGetNumGfnEvals", 1, 0);
518 printf("In CVBBDPRE: real/integer local work space sizes = %ld, %ld\n",
519 lenrwBBDP, leniwBBDP);
520 printf(" no. flocal evals. = %ld\n",ngevalsBBDP);
521 }
522
523 /* Routine to send boundary data to neighboring PEs */
524
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uarray[])525 static void BSend(MPI_Comm comm,
526 int my_pe, int isubx, int isuby,
527 sunindextype dsizex, sunindextype dsizey,
528 realtype uarray[])
529 {
530 int i, ly;
531 sunindextype offsetu, offsetbuf;
532 realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
533
534 /* If isuby > 0, send data from bottom x-line of u */
535
536 if (isuby != 0)
537 MPI_Send(&uarray[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
538
539 /* If isuby < NPEY-1, send data from top x-line of u */
540
541 if (isuby != NPEY-1) {
542 offsetu = (MYSUB-1)*dsizex;
543 MPI_Send(&uarray[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
544 }
545
546 /* If isubx > 0, send data from left y-line of u (via bufleft) */
547
548 if (isubx != 0) {
549 for (ly = 0; ly < MYSUB; ly++) {
550 offsetbuf = ly*NVARS;
551 offsetu = ly*dsizex;
552 for (i = 0; i < NVARS; i++)
553 bufleft[offsetbuf+i] = uarray[offsetu+i];
554 }
555 MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
556 }
557
558 /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
559
560 if (isubx != NPEX-1) {
561 for (ly = 0; ly < MYSUB; ly++) {
562 offsetbuf = ly*NVARS;
563 offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
564 for (i = 0; i < NVARS; i++)
565 bufright[offsetbuf+i] = uarray[offsetu+i];
566 }
567 MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
568 }
569
570 }
571
572 /* Routine to start receiving boundary data from neighboring PEs.
573 Notes:
574 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
575 passed to both the BRecvPost and BRecvWait functions, and should not
576 be manipulated between the two calls.
577 2) request should have 4 entries, and should be passed in both calls also. */
578
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])579 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
580 int my_pe, int isubx, int isuby,
581 sunindextype dsizex, sunindextype dsizey,
582 realtype uext[], realtype buffer[])
583 {
584 sunindextype offsetue;
585 /* Have bufleft and bufright use the same buffer */
586 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
587
588 /* If isuby > 0, receive data for bottom x-line of uext */
589 if (isuby != 0)
590 MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
591 my_pe-NPEX, 0, comm, &request[0]);
592
593 /* If isuby < NPEY-1, receive data for top x-line of uext */
594 if (isuby != NPEY-1) {
595 offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
596 MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
597 my_pe+NPEX, 0, comm, &request[1]);
598 }
599
600 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
601 if (isubx != 0) {
602 MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
603 my_pe-1, 0, comm, &request[2]);
604 }
605
606 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
607 if (isubx != NPEX-1) {
608 MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
609 my_pe+1, 0, comm, &request[3]);
610 }
611
612 }
613
614 /* Routine to finish receiving boundary data from neighboring PEs.
615 Notes:
616 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
617 passed to both the BRecvPost and BRecvWait functions, and should not
618 be manipulated between the two calls.
619 2) request should have 4 entries, and should be passed in both calls also. */
620
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])621 static void BRecvWait(MPI_Request request[],
622 int isubx, int isuby,
623 sunindextype dsizex, realtype uext[],
624 realtype buffer[])
625 {
626 int i, ly;
627 sunindextype dsizex2, offsetue, offsetbuf;
628 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
629 MPI_Status status;
630
631 dsizex2 = dsizex + 2*NVARS;
632
633 /* If isuby > 0, receive data for bottom x-line of uext */
634 if (isuby != 0)
635 MPI_Wait(&request[0],&status);
636
637 /* If isuby < NPEY-1, receive data for top x-line of uext */
638 if (isuby != NPEY-1)
639 MPI_Wait(&request[1],&status);
640
641 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
642 if (isubx != 0) {
643 MPI_Wait(&request[2],&status);
644
645 /* Copy the buffer to uext */
646 for (ly = 0; ly < MYSUB; ly++) {
647 offsetbuf = ly*NVARS;
648 offsetue = (ly+1)*dsizex2;
649 for (i = 0; i < NVARS; i++)
650 uext[offsetue+i] = bufleft[offsetbuf+i];
651 }
652 }
653
654 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
655 if (isubx != NPEX-1) {
656 MPI_Wait(&request[3],&status);
657
658 /* Copy the buffer to uext */
659 for (ly = 0; ly < MYSUB; ly++) {
660 offsetbuf = ly*NVARS;
661 offsetue = (ly+2)*dsizex2 - NVARS;
662 for (i = 0; i < NVARS; i++)
663 uext[offsetue+i] = bufright[offsetbuf+i];
664 }
665 }
666 }
667
668 /* fucomm routine. This routine performs all inter-processor
669 communication of data in u needed to calculate f. */
670
fucomm(realtype t,N_Vector u,void * user_data)671 static void fucomm(realtype t, N_Vector u, void *user_data)
672 {
673 UserData data;
674 realtype *uarray, *uext, buffer[2*NVARS*MYSUB];
675 MPI_Comm comm;
676 int my_pe, isubx, isuby;
677 sunindextype nvmxsub, nvmysub;
678 MPI_Request request[4];
679
680 data = (UserData) user_data;
681 uarray = N_VGetArrayPointer_Parallel(u);
682
683 /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
684
685 comm = data->comm; my_pe = data->my_pe;
686 isubx = data->isubx; isuby = data->isuby;
687 nvmxsub = data->nvmxsub;
688 nvmysub = NVARS*MYSUB;
689 uext = data->uext;
690
691 /* Start receiving boundary data from neighboring PEs */
692
693 BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
694
695 /* Send data from boundary of local grid to neighboring PEs */
696
697 BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, uarray);
698
699 /* Finish receiving boundary data from neighboring PEs */
700
701 BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
702 }
703
704 /***************** Function called by the solver **************************/
705
706 /* f routine. Evaluate f(t,y). First call fucomm to do communication of
707 subgrid boundary data into uext. Then calculate f by a call to flocal. */
708
f(realtype t,N_Vector u,N_Vector udot,void * user_data)709 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
710 {
711 UserData data;
712
713 data = (UserData) user_data;
714
715 /* Call fucomm to do inter-processor communication */
716
717 fucomm (t, u, user_data);
718
719 /* Call flocal to calculate all right-hand sides */
720
721 flocal (data->Nlocal, t, u, udot, user_data);
722
723 return(0);
724 }
725
726 /***************** Functions called by the CVBBDPRE module ****************/
727
728 /* flocal routine. Compute f(t,y). This routine assumes that all
729 inter-processor communication of data needed to calculate f has already
730 been done, and this data is in the work array uext. */
731
flocal(sunindextype Nlocal,realtype t,N_Vector u,N_Vector udot,void * user_data)732 static int flocal(sunindextype Nlocal, realtype t, N_Vector u,
733 N_Vector udot, void *user_data)
734 {
735 realtype *uext;
736 realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
737 realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
738 realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
739 realtype q4coef, dely, verdco, hordco, horaco;
740 int i, lx, ly, jy;
741 int isubx, isuby;
742 sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
743 UserData data;
744 realtype *uarray, *duarray;
745
746 uarray = N_VGetArrayPointer_Parallel(u);
747 duarray = N_VGetArrayPointer_Parallel(udot);
748
749 /* Get subgrid indices, array sizes, extended work array uext */
750
751 data = (UserData) user_data;
752 isubx = data->isubx; isuby = data->isuby;
753 nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
754 uext = data->uext;
755
756 /* Copy local segment of u vector into the working extended array uext */
757
758 offsetu = 0;
759 offsetue = nvmxsub2 + NVARS;
760 for (ly = 0; ly < MYSUB; ly++) {
761 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
762 offsetu = offsetu + nvmxsub;
763 offsetue = offsetue + nvmxsub2;
764 }
765
766 /* To facilitate homogeneous Neumann boundary conditions, when this is
767 a boundary PE, copy data from the first interior mesh line of u to uext */
768
769 /* If isuby = 0, copy x-line 2 of u to uext */
770 if (isuby == 0) {
771 for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = uarray[nvmxsub+i];
772 }
773
774 /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
775 if (isuby == NPEY-1) {
776 offsetu = (MYSUB-2)*nvmxsub;
777 offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
778 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = uarray[offsetu+i];
779 }
780
781 /* If isubx = 0, copy y-line 2 of u to uext */
782 if (isubx == 0) {
783 for (ly = 0; ly < MYSUB; ly++) {
784 offsetu = ly*nvmxsub + NVARS;
785 offsetue = (ly+1)*nvmxsub2;
786 for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
787 }
788 }
789
790 /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
791 if (isubx == NPEX-1) {
792 for (ly = 0; ly < MYSUB; ly++) {
793 offsetu = (ly+1)*nvmxsub - 2*NVARS;
794 offsetue = (ly+2)*nvmxsub2 - NVARS;
795 for (i = 0; i < NVARS; i++) uext[offsetue+i] = uarray[offsetu+i];
796 }
797 }
798
799 /* Make local copies of problem variables, for efficiency */
800
801 dely = data->dy;
802 verdco = data->vdco;
803 hordco = data->hdco;
804 horaco = data->haco;
805
806 /* Set diurnal rate coefficients as functions of t, and save q4 in
807 data block for use by preconditioner evaluation routine */
808
809 s = sin((data->om)*t);
810 if (s > ZERO) {
811 q3 = exp(-A3/s);
812 q4coef = exp(-A4/s);
813 } else {
814 q3 = ZERO;
815 q4coef = ZERO;
816 }
817 data->q4 = q4coef;
818
819
820 /* Loop over all grid points in local subgrid */
821
822 for (ly = 0; ly < MYSUB; ly++) {
823
824 jy = ly + isuby*MYSUB;
825
826 /* Set vertical diffusion coefficients at jy +- 1/2 */
827
828 ydn = YMIN + (jy - RCONST(0.5))*dely;
829 yup = ydn + dely;
830 cydn = verdco*exp(RCONST(0.2)*ydn);
831 cyup = verdco*exp(RCONST(0.2)*yup);
832 for (lx = 0; lx < MXSUB; lx++) {
833
834 /* Extract c1 and c2, and set kinetic rate terms */
835
836 offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
837 c1 = uext[offsetue];
838 c2 = uext[offsetue+1];
839 qq1 = Q1*c1*C3;
840 qq2 = Q2*c1*c2;
841 qq3 = q3*C3;
842 qq4 = q4coef*c2;
843 rkin1 = -qq1 - qq2 + 2.0*qq3 + qq4;
844 rkin2 = qq1 - qq2 - qq4;
845
846 /* Set vertical diffusion terms */
847
848 c1dn = uext[offsetue-nvmxsub2];
849 c2dn = uext[offsetue-nvmxsub2+1];
850 c1up = uext[offsetue+nvmxsub2];
851 c2up = uext[offsetue+nvmxsub2+1];
852 vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
853 vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
854
855 /* Set horizontal diffusion and advection terms */
856
857 c1lt = uext[offsetue-2];
858 c2lt = uext[offsetue-1];
859 c1rt = uext[offsetue+2];
860 c2rt = uext[offsetue+3];
861 hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
862 hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
863 horad1 = horaco*(c1rt - c1lt);
864 horad2 = horaco*(c2rt - c2lt);
865
866 /* Load all terms into duarray */
867
868 offsetu = lx*NVARS + ly*nvmxsub;
869 duarray[offsetu] = vertd1 + hord1 + horad1 + rkin1;
870 duarray[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
871 }
872 }
873
874 return(0);
875 }
876
877 /* Check function return value...
878 opt == 0 means SUNDIALS function allocates memory so check if
879 returned NULL pointer
880 opt == 1 means SUNDIALS function returns an integer value so check if
881 retval < 0
882 opt == 2 means function allocates memory so check if returned
883 NULL pointer */
884
check_retval(void * returnvalue,const char * funcname,int opt,int id)885 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
886 {
887 int *retval;
888
889 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
890 if (opt == 0 && returnvalue == NULL) {
891 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
892 id, funcname);
893 return(1); }
894
895 /* Check if retval < 0 */
896 else if (opt == 1) {
897 retval = (int *) returnvalue;
898 if (*retval < 0) {
899 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
900 id, funcname, *retval);
901 return(1); }}
902
903 /* Check if function returned NULL pointer - no memory allocated */
904 else if (opt == 2 && returnvalue == NULL) {
905 fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
906 id, funcname);
907 return(1); }
908
909 return(0);
910 }
911