1 /*-----------------------------------------------------------------
2 * Programmer(s): Daniel R. Reynolds @ SMU
3 *---------------------------------------------------------------
4 * SUNDIALS Copyright Start
5 * Copyright (c) 2002-2020, Lawrence Livermore National Security
6 * and Southern Methodist University.
7 * All rights reserved.
8 *
9 * See the top-level LICENSE and NOTICE files for details.
10 *
11 * SPDX-License-Identifier: BSD-3-Clause
12 * SUNDIALS Copyright End
13 *---------------------------------------------------------------
14 * Example problem:
15 *
16 * An ODE system is generated from the following 2-species diurnal
17 * kinetics advection-diffusion PDE system in 2 space dimensions:
18 *
19 * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
20 * + Ri(c1,c2,t) for i = 1,2, where
21 * R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
22 * R2(c1,c2,t) = q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
23 * Kv(y) = Kv0*exp(y/5) ,
24 * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
25 * vary diurnally. The problem is posed on the square
26 * 0 <= x <= 20, 30 <= y <= 50 (all in km),
27 * with homogeneous Neumann boundary conditions, and for time t in
28 * 0 <= t <= 86400 sec (1 day).
29 * The PDE system is treated by central differences on a uniform
30 * mesh, with simple polynomial initial profiles.
31 *
32 * The problem is solved by ARKODE on NPE processors, treated
33 * as a rectangular process grid of size NPEX by NPEY, with
34 * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
35 * by MYSUB of the (x,y) mesh. Thus the actual mesh sizes are
36 * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
37 * neq = 2*MX*MY.
38 *
39 * The solution is done with the DIRK/GMRES method (i.e. using the
40 * SUNLinSol_SPGMR linear solver) and the block-diagonal part of the
41 * Newton matrix as a left preconditioner. A copy of the
42 * block-diagonal part of the Jacobian is saved and conditionally
43 * reused within the preconditioner routine.
44 *
45 * Performance data and sampled solution values are printed at
46 * selected output times, and all performance counters are printed
47 * on completion.
48 *
49 * This version uses MPI for user routines.
50 *
51 * Execution: mpiexec -n N ark_diurnal_kry_p with N = NPEX*NPEY
52 * (see constants below).
53 *---------------------------------------------------------------*/
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <math.h>
57 #include <arkode/arkode_arkstep.h> /* prototypes for ARKStep fcts., consts */
58 #include <nvector/nvector_parallel.h> /* access to MPI-parallel N_Vector */
59 #include <sunlinsol/sunlinsol_spgmr.h> /* access to SPGMR SUNLinearSolver */
60 #include <sundials/sundials_dense.h> /* prototypes for small dense fcts. */
61 #include <sundials/sundials_types.h> /* SUNDIALS type definitions */
62 #include <mpi.h> /* MPI constants and types */
63
64 /* helpful macros */
65
66 #ifndef SQR
67 #define SQR(A) ((A)*(A))
68 #endif
69
70 /* Problem Constants */
71 #define NVARS 2 /* number of species */
72 #define KH RCONST(4.0e-6) /* horizontal diffusivity Kh */
73 #define VEL RCONST(0.001) /* advection velocity V */
74 #define KV0 RCONST(1.0e-8) /* coefficient in Kv(y) */
75 #define Q1 RCONST(1.63e-16) /* coefficients q1, q2, c3 */
76 #define Q2 RCONST(4.66e-16)
77 #define C3 RCONST(3.7e16)
78 #define A3 RCONST(22.62) /* coefficient in expression for q3(t) */
79 #define A4 RCONST(7.601) /* coefficient in expression for q4(t) */
80 #define C1_SCALE RCONST(1.0e6) /* coefficients in initial profiles */
81 #define C2_SCALE RCONST(1.0e12)
82
83 #define T0 RCONST(0.0) /* initial time */
84 #define NOUT 12 /* number of output times */
85 #define TWOHR RCONST(7200.0) /* number of seconds in two hours */
86 #define HALFDAY RCONST(4.32e4) /* number of seconds in a half day */
87 #define PI RCONST(3.1415926535898) /* pi */
88
89 #define XMIN RCONST(0.0) /* grid boundaries in x */
90 #define XMAX RCONST(20.0)
91 #define YMIN RCONST(30.0) /* grid boundaries in y */
92 #define YMAX RCONST(50.0)
93
94 #define NPEX 2 /* no. PEs in x direction of PE array */
95 #define NPEY 2 /* no. PEs in y direction of PE array */
96 /* Total no. PEs = NPEX*NPEY */
97 #define MXSUB 5 /* no. x points per subgrid */
98 #define MYSUB 5 /* no. y points per subgrid */
99
100 #define MX (NPEX*MXSUB) /* MX = number of x mesh points */
101 #define MY (NPEY*MYSUB) /* MY = number of y mesh points */
102 /* Spatial mesh is MX by MY */
103
104 /* initialization constants */
105 #define RTOL RCONST(1.0e-5) /* scalar relative tolerance */
106 #define FLOOR RCONST(100.0) /* value of C1 or C2 at which tolerances */
107 /* change from relative to absolute */
108 #define ATOL (RTOL*FLOOR) /* scalar absolute tolerance */
109
110
111 /* User-defined matrix accessor macro: IJth
112
113 IJth is defined in order to write code which indexes into dense
114 matrices with a (row,column) pair, where 1 <= row,column <= NVARS.
115
116 IJth(a,i,j) references the (i,j)th entry of the small matrix realtype **a,
117 where 1 <= i,j <= NVARS. The small matrix routines in sundials_dense.h
118 work with matrices stored by column in a 2-dimensional array. In C,
119 arrays are indexed starting at 0, not 1. */
120 #define IJth(a,i,j) (a[j-1][i-1])
121
122 /* Type : UserData
123 contains problem constants, preconditioner blocks, pivot arrays,
124 grid constants, and processor indices, as well as data needed
125 for the preconditiner */
126 typedef struct {
127
128 realtype q4, om, dx, dy, hdco, haco, vdco;
129 realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
130 int my_pe, isubx, isuby;
131 int nvmxsub, nvmxsub2;
132 MPI_Comm comm;
133
134 /* For preconditioner */
135 realtype **P[MXSUB][MYSUB], **Jbd[MXSUB][MYSUB];
136 sunindextype *pivot[MXSUB][MYSUB];
137
138 } *UserData;
139
140 /* Private Helper Functions */
141 static void InitUserData(int my_pe, MPI_Comm comm, UserData data);
142 static void FreeUserData(UserData data);
143 static void SetInitialProfiles(N_Vector u, UserData data);
144 static void PrintOutput(void *arkode_mem, int my_pe, MPI_Comm comm,
145 N_Vector u, realtype t);
146 static void PrintFinalStats(void *arkode_mem);
147 static void BSend(MPI_Comm comm,
148 int my_pe, int isubx, int isuby,
149 sunindextype dsizex, sunindextype dsizey,
150 realtype udata[]);
151 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
152 int my_pe, int isubx, int isuby,
153 sunindextype dsizex, sunindextype dsizey,
154 realtype uext[], realtype buffer[]);
155 static void BRecvWait(MPI_Request request[],
156 int isubx, int isuby,
157 sunindextype dsizex, realtype uext[],
158 realtype buffer[]);
159 static void ucomm(realtype t, N_Vector u, UserData data);
160 static void fcalc(realtype t, realtype udata[], realtype dudata[],
161 UserData data);
162
163
164 /* Functions Called by the Solver */
165 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
166 static int Precond(realtype tn, N_Vector u, N_Vector fu,
167 booleantype jok, booleantype *jcurPtr,
168 realtype gamma, void *user_data);
169 static int PSolve(realtype tn, N_Vector u, N_Vector fu,
170 N_Vector r, N_Vector z, realtype gamma,
171 realtype delta, int lr, void *user_data);
172
173 /* Private function to check function return values */
174 static int check_flag(void *flagvalue, const char *funcname, int opt, int id);
175
176
177 /***************************** Main Program ******************************/
main(int argc,char * argv[])178 int main(int argc, char *argv[])
179 {
180 realtype abstol, reltol, t, tout;
181 N_Vector u;
182 UserData data;
183 SUNLinearSolver LS;
184 void *arkode_mem;
185 int iout, flag, my_pe, npes;
186 sunindextype neq, local_N;
187 MPI_Comm comm;
188
189 u = NULL;
190 data = NULL;
191 LS = NULL;
192 arkode_mem = NULL;
193
194 /* Set problem size neq */
195 neq = NVARS*MX*MY;
196
197 /* Get processor number and total number of pe's */
198 MPI_Init(&argc, &argv);
199 comm = MPI_COMM_WORLD;
200 MPI_Comm_size(comm, &npes);
201 MPI_Comm_rank(comm, &my_pe);
202
203 if (npes != NPEX*NPEY) {
204 if (my_pe == 0)
205 fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
206 npes,NPEX*NPEY);
207 MPI_Finalize();
208 return(1);
209 }
210
211 /* Set local length */
212 local_N = NVARS*MXSUB*MYSUB;
213
214 /* Allocate and load user data block; allocate preconditioner block */
215 data = (UserData) malloc(sizeof *data);
216 if (check_flag((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
217 InitUserData(my_pe, comm, data);
218
219 /* Allocate u, and set initial values and tolerances */
220 u = N_VNew_Parallel(comm, local_N, neq);
221 if (check_flag((void *)u, "N_VNew", 0, my_pe)) MPI_Abort(comm, 1);
222 SetInitialProfiles(u, data);
223 abstol = ATOL; reltol = RTOL;
224
225 /* Create SPGMR solver structure -- use left preconditioning
226 and the default Krylov dimension maxl */
227 LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
228 if (check_flag((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
229
230 /* Call ARKStepCreate to initialize the integrator memory and specify the
231 user's right hand side functions in u'=fi(t,u) [here fe is NULL],
232 the inital time T0, and the initial dependent variable vector u. */
233 arkode_mem = ARKStepCreate(NULL, f, T0, u);
234 if (check_flag((void *)arkode_mem, "ARKStepCreate", 0, my_pe)) MPI_Abort(comm, 1);
235
236 /* Set the pointer to user-defined data */
237 flag = ARKStepSetUserData(arkode_mem, data);
238 if (check_flag(&flag, "ARKStepSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
239
240 /* Call ARKStepSetMaxNumSteps to increase default */
241 flag = ARKStepSetMaxNumSteps(arkode_mem, 10000);
242 if (check_flag(&flag, "ARKStepSetMaxNumSteps", 1, my_pe)) return(1);
243
244 /* Call ARKStepSStolerances to specify the scalar relative tolerance
245 and scalar absolute tolerances */
246 flag = ARKStepSStolerances(arkode_mem, reltol, abstol);
247 if (check_flag(&flag, "ARKStepSStolerances", 1, my_pe)) return(1);
248
249 /* Attach SPGMR solver structure to ARKStep interface */
250 flag = ARKStepSetLinearSolver(arkode_mem, LS, NULL);
251 if (check_flag(&flag, "ARKStepSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
252
253 /* Set preconditioner setup and solve routines Precond and PSolve,
254 and the pointer to the user-defined block data */
255 flag = ARKStepSetPreconditioner(arkode_mem, Precond, PSolve);
256 if (check_flag(&flag, "ARKStepSetPreconditioner", 1, my_pe)) MPI_Abort(comm, 1);
257
258 /* Print heading */
259 if (my_pe == 0)
260 printf("\n2-species diurnal advection-diffusion problem\n\n");
261
262 /* In loop over output points, call ARKStepEvolve, print results, test for error */
263 for (iout=1, tout=TWOHR; iout<=NOUT; iout++, tout+=TWOHR) {
264 flag = ARKStepEvolve(arkode_mem, tout, u, &t, ARK_NORMAL);
265 if (check_flag(&flag, "ARKStepEvolve", 1, my_pe)) break;
266 PrintOutput(arkode_mem, my_pe, comm, u, t);
267 }
268
269 /* Print final statistics */
270 if (my_pe == 0) PrintFinalStats(arkode_mem);
271
272 /* Free memory */
273 FreeUserData(data);
274 ARKStepFree(&arkode_mem);
275 SUNLinSolFree(LS);
276 N_VDestroy(u);
277 MPI_Finalize();
278 return(0);
279 }
280
281 /*********************** Private Helper Functions ************************/
282
283 /* Load constants in data */
InitUserData(int my_pe,MPI_Comm comm,UserData data)284 static void InitUserData(int my_pe, MPI_Comm comm, UserData data)
285 {
286 int isubx, isuby;
287 int lx, ly;
288
289 /* Set problem constants */
290 data->om = PI/HALFDAY;
291 data->dx = (XMAX-XMIN)/((realtype)(MX-1));
292 data->dy = (YMAX-YMIN)/((realtype)(MY-1));
293 data->hdco = KH/SQR(data->dx);
294 data->haco = VEL/(RCONST(2.0)*data->dx);
295 data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
296
297 /* Set machine-related constants */
298 data->comm = comm;
299 data->my_pe = my_pe;
300
301 /* isubx and isuby are the PE grid indices corresponding to my_pe */
302 isuby = my_pe/NPEX;
303 isubx = my_pe - isuby*NPEX;
304 data->isubx = isubx;
305 data->isuby = isuby;
306
307 /* Set the sizes of a boundary x-line in u and uext */
308 data->nvmxsub = NVARS*MXSUB;
309 data->nvmxsub2 = NVARS*(MXSUB+2);
310
311 /* Preconditioner-related fields */
312 for (lx = 0; lx < MXSUB; lx++) {
313 for (ly = 0; ly < MYSUB; ly++) {
314 (data->P)[lx][ly] = newDenseMat(NVARS, NVARS);
315 (data->Jbd)[lx][ly] = newDenseMat(NVARS, NVARS);
316 (data->pivot)[lx][ly] = newIndexArray(NVARS);
317 }
318 }
319 }
320
321 /* Free user data memory */
FreeUserData(UserData data)322 static void FreeUserData(UserData data)
323 {
324 int lx, ly;
325 for (lx = 0; lx < MXSUB; lx++) {
326 for (ly = 0; ly < MYSUB; ly++) {
327 destroyMat((data->P)[lx][ly]);
328 destroyMat((data->Jbd)[lx][ly]);
329 destroyArray((data->pivot)[lx][ly]);
330 }
331 }
332 free(data);
333 }
334
335 /* Set initial conditions in u */
SetInitialProfiles(N_Vector u,UserData data)336 static void SetInitialProfiles(N_Vector u, UserData data)
337 {
338 int isubx, isuby, lx, ly, jx, jy;
339 sunindextype offset;
340 realtype dx, dy, x, y, cx, cy, xmid, ymid;
341 realtype *udata;
342
343 /* Set pointer to data array in vector u */
344 udata = N_VGetArrayPointer(u);
345
346 /* Get mesh spacings, and subgrid indices for this PE */
347 dx = data->dx; dy = data->dy;
348 isubx = data->isubx; isuby = data->isuby;
349
350 /* Load initial profiles of c1 and c2 into local u vector.
351 Here lx and ly are local mesh point indices on the local subgrid,
352 and jx and jy are the global mesh point indices. */
353 offset = 0;
354 xmid = RCONST(0.5)*(XMIN + XMAX);
355 ymid = RCONST(0.5)*(YMIN + YMAX);
356 for (ly = 0; ly < MYSUB; ly++) {
357 jy = ly + isuby*MYSUB;
358 y = YMIN + jy*dy;
359 cy = SQR(RCONST(0.1)*(y - ymid));
360 cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
361 for (lx = 0; lx < MXSUB; lx++) {
362 jx = lx + isubx*MXSUB;
363 x = XMIN + jx*dx;
364 cx = SQR(RCONST(0.1)*(x - xmid));
365 cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
366 udata[offset ] = C1_SCALE*cx*cy;
367 udata[offset+1] = C2_SCALE*cx*cy;
368 offset = offset + 2;
369 }
370 }
371 }
372
373 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
PrintOutput(void * arkode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)374 static void PrintOutput(void *arkode_mem, int my_pe, MPI_Comm comm,
375 N_Vector u, realtype t)
376 {
377 int flag;
378 realtype hu, *udata, tempu[2];
379 int npelast;
380 sunindextype i0, i1;
381 long int nst;
382 MPI_Status status;
383
384 npelast = NPEX*NPEY - 1;
385 udata = N_VGetArrayPointer(u);
386
387 /* Send c1,c2 at top right mesh point to PE 0 */
388 if (my_pe == npelast) {
389 i0 = NVARS*MXSUB*MYSUB - 2;
390 i1 = i0 + 1;
391 if (npelast != 0)
392 MPI_Send(&udata[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
393 else {
394 tempu[0] = udata[i0];
395 tempu[1] = udata[i1];
396 }
397 }
398
399 /* On PE 0, receive c1,c2 at top right, then print performance data
400 and sampled solution values */
401 if (my_pe == 0) {
402 if (npelast != 0)
403 MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
404 flag = ARKStepGetNumSteps(arkode_mem, &nst);
405 check_flag(&flag, "ARKStepGetNumSteps", 1, my_pe);
406 flag = ARKStepGetLastStep(arkode_mem, &hu);
407 check_flag(&flag, "ARKStepGetLastStep", 1, my_pe);
408
409 #if defined(SUNDIALS_EXTENDED_PRECISION)
410 printf("t = %.2Le no. steps = %ld stepsize = %.2Le\n",
411 t, nst, hu);
412 printf("At bottom left: c1, c2 = %12.3Le %12.3Le \n", udata[0], udata[1]);
413 printf("At top right: c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
414 #elif defined(SUNDIALS_DOUBLE_PRECISION)
415 printf("t = %.2e no. steps = %ld stepsize = %.2e\n",
416 t, nst, hu);
417 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]);
418 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
419 #else
420 printf("t = %.2e no. steps = %ld stepsize = %.2e\n",
421 t, nst, hu);
422 printf("At bottom left: c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]);
423 printf("At top right: c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
424 #endif
425 }
426 }
427
428 /* Print final statistics contained in iopt */
PrintFinalStats(void * arkode_mem)429 static void PrintFinalStats(void *arkode_mem)
430 {
431 long int lenrw, leniw;
432 long int lenrwLS, leniwLS;
433 long int nst, nfe, nfi, nsetups, nni, ncfn, netf;
434 long int nli, npe, nps, ncfl, nfeLS;
435 int flag;
436
437 flag = ARKStepGetWorkSpace(arkode_mem, &lenrw, &leniw);
438 check_flag(&flag, "ARKStepGetWorkSpace", 1, 0);
439 flag = ARKStepGetNumSteps(arkode_mem, &nst);
440 check_flag(&flag, "ARKStepGetNumSteps", 1, 0);
441 flag = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi);
442 check_flag(&flag, "ARKStepGetNumRhsEvals", 1, 0);
443 flag = ARKStepGetNumLinSolvSetups(arkode_mem, &nsetups);
444 check_flag(&flag, "ARKStepGetNumLinSolvSetups", 1, 0);
445 flag = ARKStepGetNumErrTestFails(arkode_mem, &netf);
446 check_flag(&flag, "ARKStepGetNumErrTestFails", 1, 0);
447 flag = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni);
448 check_flag(&flag, "ARKStepGetNumNonlinSolvIters", 1, 0);
449 flag = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncfn);
450 check_flag(&flag, "ARKStepGetNumNonlinSolvConvFails", 1, 0);
451
452 flag = ARKStepGetLinWorkSpace(arkode_mem, &lenrwLS, &leniwLS);
453 check_flag(&flag, "ARKStepGetLinWorkSpace", 1, 0);
454 flag = ARKStepGetNumLinIters(arkode_mem, &nli);
455 check_flag(&flag, "ARKStepGetNumLinIters", 1, 0);
456 flag = ARKStepGetNumPrecEvals(arkode_mem, &npe);
457 check_flag(&flag, "ARKStepGetNumPrecEvals", 1, 0);
458 flag = ARKStepGetNumPrecSolves(arkode_mem, &nps);
459 check_flag(&flag, "ARKStepGetNumPrecSolves", 1, 0);
460 flag = ARKStepGetNumLinConvFails(arkode_mem, &ncfl);
461 check_flag(&flag, "ARKStepGetNumLinConvFails", 1, 0);
462 flag = ARKStepGetNumLinRhsEvals(arkode_mem, &nfeLS);
463 check_flag(&flag, "ARKStepGetNumLinRhsEvals", 1, 0);
464
465 printf("\nFinal Statistics: \n\n");
466 printf("lenrw = %5ld leniw = %5ld\n", lenrw, leniw);
467 printf("lenrwls = %5ld leniwls = %5ld\n", lenrwLS, leniwLS);
468 printf("nst = %5ld nfe = %5ld\n", nst, nfe);
469 printf("nfi = %5ld nfels = %5ld\n", nfi, nfeLS);
470 printf("nni = %5ld nli = %5ld\n", nni, nli);
471 printf("nsetups = %5ld netf = %5ld\n", nsetups, netf);
472 printf("npe = %5ld nps = %5ld\n", npe, nps);
473 printf("ncfn = %5ld ncfl = %5ld\n\n", ncfn, ncfl);
474 }
475
476 /* Routine to send boundary data to neighboring PEs */
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype udata[])477 static void BSend(MPI_Comm comm, int my_pe, int isubx,
478 int isuby, sunindextype dsizex,
479 sunindextype dsizey, realtype udata[])
480 {
481 int i, ly;
482 sunindextype offsetu, offsetbuf;
483 realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
484
485 /* If isuby > 0, send data from bottom x-line of u */
486 if (isuby != 0)
487 MPI_Send(&udata[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
488
489 /* If isuby < NPEY-1, send data from top x-line of u */
490 if (isuby != NPEY-1) {
491 offsetu = (MYSUB-1)*dsizex;
492 MPI_Send(&udata[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
493 }
494
495 /* If isubx > 0, send data from left y-line of u (via bufleft) */
496 if (isubx != 0) {
497 for (ly = 0; ly < MYSUB; ly++) {
498 offsetbuf = ly*NVARS;
499 offsetu = ly*dsizex;
500 for (i = 0; i < NVARS; i++)
501 bufleft[offsetbuf+i] = udata[offsetu+i];
502 }
503 MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
504 }
505
506 /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
507 if (isubx != NPEX-1) {
508 for (ly = 0; ly < MYSUB; ly++) {
509 offsetbuf = ly*NVARS;
510 offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
511 for (i = 0; i < NVARS; i++)
512 bufright[offsetbuf+i] = udata[offsetu+i];
513 }
514 MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
515 }
516 }
517
518 /* Routine to start receiving boundary data from neighboring PEs.
519 Notes:
520 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
521 passed to both the BRecvPost and BRecvWait functions, and should not
522 be manipulated between the two calls.
523 2) request should have 4 entries, and should be passed in both calls also. */
524
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])525 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
526 int my_pe, int isubx, int isuby,
527 sunindextype dsizex, sunindextype dsizey,
528 realtype uext[], realtype buffer[])
529 {
530 sunindextype offsetue;
531 /* Have bufleft and bufright use the same buffer */
532 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
533
534 /* If isuby > 0, receive data for bottom x-line of uext */
535 if (isuby != 0)
536 MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
537 my_pe-NPEX, 0, comm, &request[0]);
538
539 /* If isuby < NPEY-1, receive data for top x-line of uext */
540 if (isuby != NPEY-1) {
541 offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
542 MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
543 my_pe+NPEX, 0, comm, &request[1]);
544 }
545
546 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
547 if (isubx != 0) {
548 MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
549 my_pe-1, 0, comm, &request[2]);
550 }
551
552 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
553 if (isubx != NPEX-1) {
554 MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
555 my_pe+1, 0, comm, &request[3]);
556 }
557 }
558
559 /* Routine to finish receiving boundary data from neighboring PEs.
560 Notes:
561 1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
562 passed to both the BRecvPost and BRecvWait functions, and should not
563 be manipulated between the two calls.
564 2) request should have 4 entries, and should be passed in both calls also. */
565
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])566 static void BRecvWait(MPI_Request request[],
567 int isubx, int isuby,
568 sunindextype dsizex, realtype uext[],
569 realtype buffer[])
570 {
571 int i, ly;
572 sunindextype dsizex2, offsetue, offsetbuf;
573 realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
574 MPI_Status status;
575
576 dsizex2 = dsizex + 2*NVARS;
577
578 /* If isuby > 0, receive data for bottom x-line of uext */
579 if (isuby != 0)
580 MPI_Wait(&request[0],&status);
581
582 /* If isuby < NPEY-1, receive data for top x-line of uext */
583 if (isuby != NPEY-1)
584 MPI_Wait(&request[1],&status);
585
586 /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
587 if (isubx != 0) {
588 MPI_Wait(&request[2],&status);
589
590 /* Copy the buffer to uext */
591 for (ly = 0; ly < MYSUB; ly++) {
592 offsetbuf = ly*NVARS;
593 offsetue = (ly+1)*dsizex2;
594 for (i = 0; i < NVARS; i++)
595 uext[offsetue+i] = bufleft[offsetbuf+i];
596 }
597 }
598
599 /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
600 if (isubx != NPEX-1) {
601 MPI_Wait(&request[3],&status);
602
603 /* Copy the buffer to uext */
604 for (ly = 0; ly < MYSUB; ly++) {
605 offsetbuf = ly*NVARS;
606 offsetue = (ly+2)*dsizex2 - NVARS;
607 for (i = 0; i < NVARS; i++)
608 uext[offsetue+i] = bufright[offsetbuf+i];
609 }
610 }
611 }
612
613 /* ucomm routine. This routine performs all communication
614 between processors of data needed to calculate f. */
615
ucomm(realtype t,N_Vector u,UserData data)616 static void ucomm(realtype t, N_Vector u, UserData data)
617 {
618
619 realtype *udata, *uext, buffer[2*NVARS*MYSUB];
620 MPI_Comm comm;
621 int my_pe, isubx, isuby;
622 sunindextype nvmxsub, nvmysub;
623 MPI_Request request[4];
624
625 udata = N_VGetArrayPointer(u);
626
627 /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
628 comm = data->comm; my_pe = data->my_pe;
629 isubx = data->isubx; isuby = data->isuby;
630 nvmxsub = data->nvmxsub;
631 nvmysub = NVARS*MYSUB;
632 uext = data->uext;
633
634 /* Start receiving boundary data from neighboring PEs */
635 BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
636
637 /* Send data from boundary of local grid to neighboring PEs */
638 BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, udata);
639
640 /* Finish receiving boundary data from neighboring PEs */
641 BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
642 }
643
644
645 /* fcalc routine. Compute f(t,y). This routine assumes that communication
646 between processors of data needed to calculate f has already been done,
647 and this data is in the work array uext. */
648
fcalc(realtype t,realtype udata[],realtype dudata[],UserData data)649 static void fcalc(realtype t, realtype udata[],
650 realtype dudata[], UserData data)
651 {
652 realtype *uext;
653 realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
654 realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
655 realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
656 realtype q4coef, dely, verdco, hordco, horaco;
657 int i, lx, ly, jy;
658 int isubx, isuby;
659 sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
660
661 /* Get subgrid indices, data sizes, extended work array uext */
662 isubx = data->isubx; isuby = data->isuby;
663 nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
664 uext = data->uext;
665
666 /* Copy local segment of u vector into the working extended array uext */
667 offsetu = 0;
668 offsetue = nvmxsub2 + NVARS;
669 for (ly = 0; ly < MYSUB; ly++) {
670 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = udata[offsetu+i];
671 offsetu = offsetu + nvmxsub;
672 offsetue = offsetue + nvmxsub2;
673 }
674
675 /* To facilitate homogeneous Neumann boundary conditions, when this is
676 a boundary PE, copy data from the first interior mesh line of u to uext */
677
678 /* If isuby = 0, copy x-line 2 of u to uext */
679 if (isuby == 0) {
680 for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = udata[nvmxsub+i];
681 }
682
683 /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
684 if (isuby == NPEY-1) {
685 offsetu = (MYSUB-2)*nvmxsub;
686 offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
687 for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = udata[offsetu+i];
688 }
689
690 /* If isubx = 0, copy y-line 2 of u to uext */
691 if (isubx == 0) {
692 for (ly = 0; ly < MYSUB; ly++) {
693 offsetu = ly*nvmxsub + NVARS;
694 offsetue = (ly+1)*nvmxsub2;
695 for (i = 0; i < NVARS; i++) uext[offsetue+i] = udata[offsetu+i];
696 }
697 }
698
699 /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
700 if (isubx == NPEX-1) {
701 for (ly = 0; ly < MYSUB; ly++) {
702 offsetu = (ly+1)*nvmxsub - 2*NVARS;
703 offsetue = (ly+2)*nvmxsub2 - NVARS;
704 for (i = 0; i < NVARS; i++) uext[offsetue+i] = udata[offsetu+i];
705 }
706 }
707
708 /* Make local copies of problem variables, for efficiency */
709 dely = data->dy;
710 verdco = data->vdco;
711 hordco = data->hdco;
712 horaco = data->haco;
713
714 /* Set diurnal rate coefficients as functions of t, and save q4 in
715 data block for use by preconditioner evaluation routine */
716 s = sin((data->om)*t);
717 if (s > RCONST(0.0)) {
718 q3 = exp(-A3/s);
719 q4coef = exp(-A4/s);
720 } else {
721 q3 = RCONST(0.0);
722 q4coef = RCONST(0.0);
723 }
724 data->q4 = q4coef;
725
726 /* Loop over all grid points in local subgrid */
727 for (ly = 0; ly < MYSUB; ly++) {
728
729 jy = ly + isuby*MYSUB;
730
731 /* Set vertical diffusion coefficients at jy +- 1/2 */
732 ydn = YMIN + (jy - RCONST(0.5))*dely;
733 yup = ydn + dely;
734 cydn = verdco*exp(RCONST(0.2)*ydn);
735 cyup = verdco*exp(RCONST(0.2)*yup);
736 for (lx = 0; lx < MXSUB; lx++) {
737
738 /* Extract c1 and c2, and set kinetic rate terms */
739 offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
740 c1 = uext[offsetue];
741 c2 = uext[offsetue+1];
742 qq1 = Q1*c1*C3;
743 qq2 = Q2*c1*c2;
744 qq3 = q3*C3;
745 qq4 = q4coef*c2;
746 rkin1 = -qq1 - qq2 + RCONST(2.0)*qq3 + qq4;
747 rkin2 = qq1 - qq2 - qq4;
748
749 /* Set vertical diffusion terms */
750 c1dn = uext[offsetue-nvmxsub2];
751 c2dn = uext[offsetue-nvmxsub2+1];
752 c1up = uext[offsetue+nvmxsub2];
753 c2up = uext[offsetue+nvmxsub2+1];
754 vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
755 vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
756
757 /* Set horizontal diffusion and advection terms */
758 c1lt = uext[offsetue-2];
759 c2lt = uext[offsetue-1];
760 c1rt = uext[offsetue+2];
761 c2rt = uext[offsetue+3];
762 hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
763 hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
764 horad1 = horaco*(c1rt - c1lt);
765 horad2 = horaco*(c2rt - c2lt);
766
767 /* Load all terms into dudata */
768 offsetu = lx*NVARS + ly*nvmxsub;
769 dudata[offsetu] = vertd1 + hord1 + horad1 + rkin1;
770 dudata[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
771 }
772 }
773 }
774
775
776 /***************** Functions Called by the Solver *************************/
777
778 /* f routine. Evaluate f(t,y). First call ucomm to do communication of
779 subgrid boundary data into uext. Then calculate f by a call to fcalc. */
780
f(realtype t,N_Vector u,N_Vector udot,void * user_data)781 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
782 {
783 realtype *udata, *dudata;
784 UserData data;
785
786 udata = N_VGetArrayPointer(u);
787 dudata = N_VGetArrayPointer(udot);
788 data = (UserData) user_data;
789
790 /* Call ucomm to do inter-processor communication */
791 ucomm(t, u, data);
792
793 /* Call fcalc to calculate all right-hand sides */
794 fcalc(t, udata, dudata, data);
795
796 return(0);
797 }
798
799
800 /* Preconditioner setup routine. Generate and preprocess P. */
Precond(realtype tn,N_Vector u,N_Vector fu,booleantype jok,booleantype * jcurPtr,realtype gamma,void * user_data)801 static int Precond(realtype tn, N_Vector u, N_Vector fu,
802 booleantype jok, booleantype *jcurPtr,
803 realtype gamma, void *user_data)
804 {
805 realtype c1, c2, cydn, cyup, diag, ydn, yup, q4coef, dely, verdco, hordco;
806 realtype **(*P)[MYSUB], **(*Jbd)[MYSUB];
807 int nvmxsub, offset;
808 sunindextype ier;
809 sunindextype *(*pivot)[MYSUB];
810 int lx, ly, jy, isuby;
811 realtype *udata, **a, **j;
812 UserData data;
813
814 /* Make local copies of pointers in user_data, pointer to u's data,
815 and PE index pair */
816 data = (UserData) user_data;
817 P = data->P;
818 Jbd = data->Jbd;
819 pivot = data->pivot;
820 udata = N_VGetArrayPointer(u);
821 isuby = data->isuby;
822 nvmxsub = data->nvmxsub;
823
824 if (jok) {
825 /* jok = SUNTRUE: Copy Jbd to P */
826 for (ly = 0; ly < MYSUB; ly++)
827 for (lx = 0; lx < MXSUB; lx++)
828 denseCopy(Jbd[lx][ly], P[lx][ly], NVARS, NVARS);
829
830 *jcurPtr = SUNFALSE;
831
832 }
833
834 else {
835
836 /* jok = SUNFALSE: Generate Jbd from scratch and copy to P */
837
838 /* Make local copies of problem variables, for efficiency */
839 q4coef = data->q4;
840 dely = data->dy;
841 verdco = data->vdco;
842 hordco = data->hdco;
843
844 /* Compute 2x2 diagonal Jacobian blocks (using q4 values
845 computed on the last f call). Load into P. */
846 for (ly = 0; ly < MYSUB; ly++) {
847 jy = ly + isuby*MYSUB;
848 ydn = YMIN + (jy - RCONST(0.5))*dely;
849 yup = ydn + dely;
850 cydn = verdco*exp(RCONST(0.2)*ydn);
851 cyup = verdco*exp(RCONST(0.2)*yup);
852 diag = -(cydn + cyup + RCONST(2.0)*hordco);
853 for (lx = 0; lx < MXSUB; lx++) {
854 offset = lx*NVARS + ly*nvmxsub;
855 c1 = udata[offset];
856 c2 = udata[offset+1];
857 j = Jbd[lx][ly];
858 a = P[lx][ly];
859 IJth(j,1,1) = (-Q1*C3 - Q2*c2) + diag;
860 IJth(j,1,2) = -Q2*c1 + q4coef;
861 IJth(j,2,1) = Q1*C3 - Q2*c2;
862 IJth(j,2,2) = (-Q2*c1 - q4coef) + diag;
863 denseCopy(j, a, NVARS, NVARS);
864 }
865 }
866
867 *jcurPtr = SUNTRUE;
868
869 }
870
871 /* Scale by -gamma */
872 for (ly = 0; ly < MYSUB; ly++)
873 for (lx = 0; lx < MXSUB; lx++)
874 denseScale(-gamma, P[lx][ly], NVARS, NVARS);
875
876 /* Add identity matrix and do LU decompositions on blocks in place */
877 for (lx = 0; lx < MXSUB; lx++) {
878 for (ly = 0; ly < MYSUB; ly++) {
879 denseAddIdentity(P[lx][ly], NVARS);
880 ier = denseGETRF(P[lx][ly], NVARS, NVARS, pivot[lx][ly]);
881 if (ier != 0) return(1);
882 }
883 }
884
885 return(0);
886 }
887
888
889 /* Preconditioner solve routine */
PSolve(realtype tn,N_Vector u,N_Vector fu,N_Vector r,N_Vector z,realtype gamma,realtype delta,int lr,void * user_data)890 static int PSolve(realtype tn, N_Vector u, N_Vector fu,
891 N_Vector r, N_Vector z,
892 realtype gamma, realtype delta,
893 int lr, void *user_data)
894 {
895 realtype **(*P)[MYSUB];
896 int nvmxsub;
897 sunindextype *(*pivot)[MYSUB];
898 int lx, ly;
899 realtype *zdata, *v;
900 UserData data;
901
902 /* Extract the P and pivot arrays from user_data */
903 data = (UserData) user_data;
904 P = data->P;
905 pivot = data->pivot;
906
907 /* Solve the block-diagonal system Px = r using LU factors stored
908 in P and pivot data in pivot, and return the solution in z.
909 First copy vector r to z. */
910 N_VScale(RCONST(1.0), r, z);
911 nvmxsub = data->nvmxsub;
912 zdata = N_VGetArrayPointer(z);
913
914 for (lx = 0; lx < MXSUB; lx++) {
915 for (ly = 0; ly < MYSUB; ly++) {
916 v = &(zdata[lx*NVARS + ly*nvmxsub]);
917 denseGETRS(P[lx][ly], NVARS, pivot[lx][ly], v);
918 }
919 }
920
921 return(0);
922 }
923
924
925 /*********************** Private Helper Function ************************/
926
927 /* Check function return value...
928 opt == 0 means SUNDIALS function allocates memory so check if
929 returned NULL pointer
930 opt == 1 means SUNDIALS function returns a flag so check if
931 flag >= 0
932 opt == 2 means function allocates memory so check if returned
933 NULL pointer */
934
check_flag(void * flagvalue,const char * funcname,int opt,int id)935 static int check_flag(void *flagvalue, const char *funcname, int opt, int id)
936 {
937 int *errflag;
938
939 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
940 if (opt == 0 && flagvalue == NULL) {
941 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
942 id, funcname);
943 return(1);
944 } else if (opt == 1) { /* Check if flag < 0 */
945 errflag = (int *) flagvalue;
946 if (*errflag < 0) {
947 fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with flag = %d\n\n",
948 id, funcname, *errflag);
949 return(1);
950 }
951 } else if (opt == 2 && flagvalue == NULL) { /* Check if function returned NULL pointer - no memory allocated */
952 fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
953 id, funcname);
954 return(1);
955 }
956
957 return(0);
958 }
959