1 /*-----------------------------------------------------------------
2  * Programmer(s): Daniel R. Reynolds @ SMU
3  *---------------------------------------------------------------
4  * SUNDIALS Copyright Start
5  * Copyright (c) 2002-2020, Lawrence Livermore National Security
6  * and Southern Methodist University.
7  * All rights reserved.
8  *
9  * See the top-level LICENSE and NOTICE files for details.
10  *
11  * SPDX-License-Identifier: BSD-3-Clause
12  * SUNDIALS Copyright End
13  *---------------------------------------------------------------
14  * Example problem:
15  *
16  * An ODE system is generated from the following 2-species diurnal
17  * kinetics advection-diffusion PDE system in 2 space dimensions:
18  *
19  * dc(i)/dt = Kh*(d/dx)^2 c(i) + V*dc(i)/dx + (d/dy)(Kv(y)*dc(i)/dy)
20  *                 + Ri(c1,c2,t)      for i = 1,2,   where
21  *   R1(c1,c2,t) = -q1*c1*c3 - q2*c1*c2 + 2*q3(t)*c3 + q4(t)*c2 ,
22  *   R2(c1,c2,t) =  q1*c1*c3 - q2*c1*c2 - q4(t)*c2 ,
23  *   Kv(y) = Kv0*exp(y/5) ,
24  * Kh, V, Kv0, q1, q2, and c3 are constants, and q3(t) and q4(t)
25  * vary diurnally. The problem is posed on the square
26  *   0 <= x <= 20,    30 <= y <= 50   (all in km),
27  * with homogeneous Neumann boundary conditions, and for time t in
28  *   0 <= t <= 86400 sec (1 day).
29  * The PDE system is treated by central differences on a uniform
30  * mesh, with simple polynomial initial profiles.
31  *
32  * The problem is solved by ARKODE on NPE processors, treated
33  * as a rectangular process grid of size NPEX by NPEY, with
34  * NPE = NPEX*NPEY. Each processor contains a subgrid of size MXSUB
35  * by MYSUB of the (x,y) mesh.  Thus the actual mesh sizes are
36  * MX = MXSUB*NPEX and MY = MYSUB*NPEY, and the ODE system size is
37  * neq = 2*MX*MY.
38  *
39  * The solution is done with the DIRK/GMRES method (i.e. using the
40  * SUNLinSol_SPGMR linear solver) and the block-diagonal part of the
41  * Newton matrix as a left preconditioner. A copy of the
42  * block-diagonal part of the Jacobian is saved and conditionally
43  * reused within the preconditioner routine.
44  *
45  * Performance data and sampled solution values are printed at
46  * selected output times, and all performance counters are printed
47  * on completion.
48  *
49  * This version uses MPI for user routines.
50  *
51  * Execution: mpiexec -n N ark_diurnal_kry_p   with N = NPEX*NPEY
52  * (see constants below).
53  *---------------------------------------------------------------*/
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <math.h>
57 #include <arkode/arkode_arkstep.h>       /* prototypes for ARKStep fcts., consts */
58 #include <nvector/nvector_parallel.h>    /* access to MPI-parallel N_Vector  */
59 #include <sunlinsol/sunlinsol_spgmr.h>   /* access to SPGMR SUNLinearSolver  */
60 #include <sundials/sundials_dense.h>     /* prototypes for small dense fcts. */
61 #include <sundials/sundials_types.h>     /* SUNDIALS type definitions        */
62 #include <mpi.h>                         /* MPI constants and types          */
63 
64 /* helpful macros */
65 
66 #ifndef SQR
67 #define SQR(A) ((A)*(A))
68 #endif
69 
70 /* Problem Constants */
71 #define NVARS        2                    /* number of species         */
72 #define KH           RCONST(4.0e-6)       /* horizontal diffusivity Kh */
73 #define VEL          RCONST(0.001)        /* advection velocity V      */
74 #define KV0          RCONST(1.0e-8)       /* coefficient in Kv(y)      */
75 #define Q1           RCONST(1.63e-16)     /* coefficients q1, q2, c3   */
76 #define Q2           RCONST(4.66e-16)
77 #define C3           RCONST(3.7e16)
78 #define A3           RCONST(22.62)        /* coefficient in expression for q3(t) */
79 #define A4           RCONST(7.601)        /* coefficient in expression for q4(t) */
80 #define C1_SCALE     RCONST(1.0e6)        /* coefficients in initial profiles    */
81 #define C2_SCALE     RCONST(1.0e12)
82 
83 #define T0           RCONST(0.0)          /* initial time */
84 #define NOUT         12                   /* number of output times */
85 #define TWOHR        RCONST(7200.0)       /* number of seconds in two hours  */
86 #define HALFDAY      RCONST(4.32e4)       /* number of seconds in a half day */
87 #define PI       RCONST(3.1415926535898)  /* pi */
88 
89 #define XMIN         RCONST(0.0)          /* grid boundaries in x  */
90 #define XMAX         RCONST(20.0)
91 #define YMIN         RCONST(30.0)         /* grid boundaries in y  */
92 #define YMAX         RCONST(50.0)
93 
94 #define NPEX         2                    /* no. PEs in x direction of PE array */
95 #define NPEY         2                    /* no. PEs in y direction of PE array */
96                                           /* Total no. PEs = NPEX*NPEY */
97 #define MXSUB        5                    /* no. x points per subgrid */
98 #define MYSUB        5                    /* no. y points per subgrid */
99 
100 #define MX           (NPEX*MXSUB)         /* MX = number of x mesh points */
101 #define MY           (NPEY*MYSUB)         /* MY = number of y mesh points */
102                                           /* Spatial mesh is MX by MY */
103 
104 /* initialization constants */
105 #define RTOL    RCONST(1.0e-5)            /* scalar relative tolerance */
106 #define FLOOR   RCONST(100.0)             /* value of C1 or C2 at which tolerances */
107                                           /* change from relative to absolute      */
108 #define ATOL    (RTOL*FLOOR)              /* scalar absolute tolerance */
109 
110 
111 /* User-defined matrix accessor macro: IJth
112 
113    IJth is defined in order to write code which indexes into dense
114    matrices with a (row,column) pair, where 1 <= row,column <= NVARS.
115 
116    IJth(a,i,j) references the (i,j)th entry of the small matrix realtype **a,
117    where 1 <= i,j <= NVARS. The small matrix routines in sundials_dense.h
118    work with matrices stored by column in a 2-dimensional array. In C,
119    arrays are indexed starting at 0, not 1. */
120 #define IJth(a,i,j) (a[j-1][i-1])
121 
122 /* Type : UserData
123    contains problem constants, preconditioner blocks, pivot arrays,
124    grid constants, and processor indices, as well as data needed
125    for the preconditiner */
126 typedef struct {
127 
128   realtype q4, om, dx, dy, hdco, haco, vdco;
129   realtype uext[NVARS*(MXSUB+2)*(MYSUB+2)];
130   int my_pe, isubx, isuby;
131   int nvmxsub, nvmxsub2;
132   MPI_Comm comm;
133 
134   /* For preconditioner */
135   realtype **P[MXSUB][MYSUB], **Jbd[MXSUB][MYSUB];
136   sunindextype *pivot[MXSUB][MYSUB];
137 
138 } *UserData;
139 
140 /* Private Helper Functions */
141 static void InitUserData(int my_pe, MPI_Comm comm, UserData data);
142 static void FreeUserData(UserData data);
143 static void SetInitialProfiles(N_Vector u, UserData data);
144 static void PrintOutput(void *arkode_mem, int my_pe, MPI_Comm comm,
145                         N_Vector u, realtype t);
146 static void PrintFinalStats(void *arkode_mem);
147 static void BSend(MPI_Comm comm,
148                   int my_pe, int isubx, int isuby,
149                   sunindextype dsizex, sunindextype dsizey,
150                   realtype udata[]);
151 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
152                       int my_pe, int isubx, int isuby,
153                       sunindextype dsizex, sunindextype dsizey,
154                       realtype uext[], realtype buffer[]);
155 static void BRecvWait(MPI_Request request[],
156                       int isubx, int isuby,
157                       sunindextype dsizex, realtype uext[],
158                       realtype buffer[]);
159 static void ucomm(realtype t, N_Vector u, UserData data);
160 static void fcalc(realtype t, realtype udata[], realtype dudata[],
161                   UserData data);
162 
163 
164 /* Functions Called by the Solver */
165 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data);
166 static int Precond(realtype tn, N_Vector u, N_Vector fu,
167                    booleantype jok, booleantype *jcurPtr,
168                    realtype gamma, void *user_data);
169 static int PSolve(realtype tn, N_Vector u, N_Vector fu,
170                   N_Vector r, N_Vector z, realtype gamma,
171                   realtype delta, int lr, void *user_data);
172 
173 /* Private function to check function return values */
174 static int check_flag(void *flagvalue, const char *funcname, int opt, int id);
175 
176 
177 /***************************** Main Program ******************************/
main(int argc,char * argv[])178 int main(int argc, char *argv[])
179 {
180   realtype abstol, reltol, t, tout;
181   N_Vector u;
182   UserData data;
183   SUNLinearSolver LS;
184   void *arkode_mem;
185   int iout, flag, my_pe, npes;
186   sunindextype neq, local_N;
187   MPI_Comm comm;
188 
189   u = NULL;
190   data = NULL;
191   LS = NULL;
192   arkode_mem = NULL;
193 
194   /* Set problem size neq */
195   neq = NVARS*MX*MY;
196 
197   /* Get processor number and total number of pe's */
198   MPI_Init(&argc, &argv);
199   comm = MPI_COMM_WORLD;
200   MPI_Comm_size(comm, &npes);
201   MPI_Comm_rank(comm, &my_pe);
202 
203   if (npes != NPEX*NPEY) {
204     if (my_pe == 0)
205       fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n",
206               npes,NPEX*NPEY);
207     MPI_Finalize();
208     return(1);
209   }
210 
211   /* Set local length */
212   local_N = NVARS*MXSUB*MYSUB;
213 
214   /* Allocate and load user data block; allocate preconditioner block */
215   data = (UserData) malloc(sizeof *data);
216   if (check_flag((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1);
217   InitUserData(my_pe, comm, data);
218 
219   /* Allocate u, and set initial values and tolerances */
220   u = N_VNew_Parallel(comm, local_N, neq);
221   if (check_flag((void *)u, "N_VNew", 0, my_pe)) MPI_Abort(comm, 1);
222   SetInitialProfiles(u, data);
223   abstol = ATOL; reltol = RTOL;
224 
225   /* Create SPGMR solver structure -- use left preconditioning
226      and the default Krylov dimension maxl */
227   LS = SUNLinSol_SPGMR(u, PREC_LEFT, 0);
228   if (check_flag((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
229 
230   /* Call ARKStepCreate to initialize the integrator memory and specify the
231      user's right hand side functions in u'=fi(t,u) [here fe is NULL],
232      the inital time T0, and the initial dependent variable vector u. */
233   arkode_mem = ARKStepCreate(NULL, f, T0, u);
234   if (check_flag((void *)arkode_mem, "ARKStepCreate", 0, my_pe)) MPI_Abort(comm, 1);
235 
236   /* Set the pointer to user-defined data */
237   flag = ARKStepSetUserData(arkode_mem, data);
238   if (check_flag(&flag, "ARKStepSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
239 
240   /* Call ARKStepSetMaxNumSteps to increase default */
241   flag = ARKStepSetMaxNumSteps(arkode_mem, 10000);
242   if (check_flag(&flag, "ARKStepSetMaxNumSteps", 1, my_pe)) return(1);
243 
244   /* Call ARKStepSStolerances to specify the scalar relative tolerance
245      and scalar absolute tolerances */
246   flag = ARKStepSStolerances(arkode_mem, reltol, abstol);
247   if (check_flag(&flag, "ARKStepSStolerances", 1, my_pe)) return(1);
248 
249   /* Attach SPGMR solver structure to ARKStep interface */
250   flag = ARKStepSetLinearSolver(arkode_mem, LS, NULL);
251   if (check_flag(&flag, "ARKStepSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
252 
253   /* Set preconditioner setup and solve routines Precond and PSolve,
254      and the pointer to the user-defined block data */
255   flag = ARKStepSetPreconditioner(arkode_mem, Precond, PSolve);
256   if (check_flag(&flag, "ARKStepSetPreconditioner", 1, my_pe)) MPI_Abort(comm, 1);
257 
258   /* Print heading */
259   if (my_pe == 0)
260     printf("\n2-species diurnal advection-diffusion problem\n\n");
261 
262   /* In loop over output points, call ARKStepEvolve, print results, test for error */
263   for (iout=1, tout=TWOHR; iout<=NOUT; iout++, tout+=TWOHR) {
264     flag = ARKStepEvolve(arkode_mem, tout, u, &t, ARK_NORMAL);
265     if (check_flag(&flag, "ARKStepEvolve", 1, my_pe)) break;
266     PrintOutput(arkode_mem, my_pe, comm, u, t);
267   }
268 
269   /* Print final statistics */
270   if (my_pe == 0) PrintFinalStats(arkode_mem);
271 
272   /* Free memory */
273   FreeUserData(data);
274   ARKStepFree(&arkode_mem);
275   SUNLinSolFree(LS);
276   N_VDestroy(u);
277   MPI_Finalize();
278   return(0);
279 }
280 
281 /*********************** Private Helper Functions ************************/
282 
283 /* Load constants in data */
InitUserData(int my_pe,MPI_Comm comm,UserData data)284 static void InitUserData(int my_pe, MPI_Comm comm, UserData data)
285 {
286   int isubx, isuby;
287   int lx, ly;
288 
289   /* Set problem constants */
290   data->om = PI/HALFDAY;
291   data->dx = (XMAX-XMIN)/((realtype)(MX-1));
292   data->dy = (YMAX-YMIN)/((realtype)(MY-1));
293   data->hdco = KH/SQR(data->dx);
294   data->haco = VEL/(RCONST(2.0)*data->dx);
295   data->vdco = (RCONST(1.0)/SQR(data->dy))*KV0;
296 
297   /* Set machine-related constants */
298   data->comm = comm;
299   data->my_pe = my_pe;
300 
301   /* isubx and isuby are the PE grid indices corresponding to my_pe */
302   isuby = my_pe/NPEX;
303   isubx = my_pe - isuby*NPEX;
304   data->isubx = isubx;
305   data->isuby = isuby;
306 
307   /* Set the sizes of a boundary x-line in u and uext */
308   data->nvmxsub = NVARS*MXSUB;
309   data->nvmxsub2 = NVARS*(MXSUB+2);
310 
311   /* Preconditioner-related fields */
312   for (lx = 0; lx < MXSUB; lx++) {
313     for (ly = 0; ly < MYSUB; ly++) {
314       (data->P)[lx][ly] = newDenseMat(NVARS, NVARS);
315       (data->Jbd)[lx][ly] = newDenseMat(NVARS, NVARS);
316       (data->pivot)[lx][ly] = newIndexArray(NVARS);
317     }
318   }
319 }
320 
321 /* Free user data memory */
FreeUserData(UserData data)322 static void FreeUserData(UserData data)
323 {
324   int lx, ly;
325   for (lx = 0; lx < MXSUB; lx++) {
326     for (ly = 0; ly < MYSUB; ly++) {
327       destroyMat((data->P)[lx][ly]);
328       destroyMat((data->Jbd)[lx][ly]);
329       destroyArray((data->pivot)[lx][ly]);
330     }
331   }
332   free(data);
333 }
334 
335 /* Set initial conditions in u */
SetInitialProfiles(N_Vector u,UserData data)336 static void SetInitialProfiles(N_Vector u, UserData data)
337 {
338   int isubx, isuby, lx, ly, jx, jy;
339   sunindextype offset;
340   realtype dx, dy, x, y, cx, cy, xmid, ymid;
341   realtype *udata;
342 
343   /* Set pointer to data array in vector u */
344   udata = N_VGetArrayPointer(u);
345 
346   /* Get mesh spacings, and subgrid indices for this PE */
347   dx = data->dx;         dy = data->dy;
348   isubx = data->isubx;   isuby = data->isuby;
349 
350   /* Load initial profiles of c1 and c2 into local u vector.
351   Here lx and ly are local mesh point indices on the local subgrid,
352   and jx and jy are the global mesh point indices. */
353   offset = 0;
354   xmid = RCONST(0.5)*(XMIN + XMAX);
355   ymid = RCONST(0.5)*(YMIN + YMAX);
356   for (ly = 0; ly < MYSUB; ly++) {
357     jy = ly + isuby*MYSUB;
358     y = YMIN + jy*dy;
359     cy = SQR(RCONST(0.1)*(y - ymid));
360     cy = RCONST(1.0) - cy + RCONST(0.5)*SQR(cy);
361     for (lx = 0; lx < MXSUB; lx++) {
362       jx = lx + isubx*MXSUB;
363       x = XMIN + jx*dx;
364       cx = SQR(RCONST(0.1)*(x - xmid));
365       cx = RCONST(1.0) - cx + RCONST(0.5)*SQR(cx);
366       udata[offset  ] = C1_SCALE*cx*cy;
367       udata[offset+1] = C2_SCALE*cx*cy;
368       offset = offset + 2;
369     }
370   }
371 }
372 
373 /* Print current t, step count, order, stepsize, and sampled c1,c2 values */
PrintOutput(void * arkode_mem,int my_pe,MPI_Comm comm,N_Vector u,realtype t)374 static void PrintOutput(void *arkode_mem, int my_pe, MPI_Comm comm,
375                         N_Vector u, realtype t)
376 {
377   int flag;
378   realtype hu, *udata, tempu[2];
379   int npelast;
380   sunindextype i0, i1;
381   long int nst;
382   MPI_Status status;
383 
384   npelast = NPEX*NPEY - 1;
385   udata = N_VGetArrayPointer(u);
386 
387   /* Send c1,c2 at top right mesh point to PE 0 */
388   if (my_pe == npelast) {
389     i0 = NVARS*MXSUB*MYSUB - 2;
390     i1 = i0 + 1;
391     if (npelast != 0)
392       MPI_Send(&udata[i0], 2, MPI_SUNREALTYPE, 0, 0, comm);
393     else {
394       tempu[0] = udata[i0];
395       tempu[1] = udata[i1];
396     }
397   }
398 
399   /* On PE 0, receive c1,c2 at top right, then print performance data
400      and sampled solution values */
401   if (my_pe == 0) {
402     if (npelast != 0)
403       MPI_Recv(&tempu[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
404     flag = ARKStepGetNumSteps(arkode_mem, &nst);
405     check_flag(&flag, "ARKStepGetNumSteps", 1, my_pe);
406     flag = ARKStepGetLastStep(arkode_mem, &hu);
407     check_flag(&flag, "ARKStepGetLastStep", 1, my_pe);
408 
409 #if defined(SUNDIALS_EXTENDED_PRECISION)
410     printf("t = %.2Le   no. steps = %ld   stepsize = %.2Le\n",
411            t, nst, hu);
412     printf("At bottom left:  c1, c2 = %12.3Le %12.3Le \n", udata[0], udata[1]);
413     printf("At top right:    c1, c2 = %12.3Le %12.3Le \n\n", tempu[0], tempu[1]);
414 #elif defined(SUNDIALS_DOUBLE_PRECISION)
415     printf("t = %.2e   no. steps = %ld   stepsize = %.2e\n",
416            t, nst, hu);
417     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]);
418     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
419 #else
420     printf("t = %.2e   no. steps = %ld   stepsize = %.2e\n",
421            t, nst, hu);
422     printf("At bottom left:  c1, c2 = %12.3e %12.3e \n", udata[0], udata[1]);
423     printf("At top right:    c1, c2 = %12.3e %12.3e \n\n", tempu[0], tempu[1]);
424 #endif
425   }
426 }
427 
428 /* Print final statistics contained in iopt */
PrintFinalStats(void * arkode_mem)429 static void PrintFinalStats(void *arkode_mem)
430 {
431   long int lenrw, leniw;
432   long int lenrwLS, leniwLS;
433   long int nst, nfe, nfi, nsetups, nni, ncfn, netf;
434   long int nli, npe, nps, ncfl, nfeLS;
435   int flag;
436 
437   flag = ARKStepGetWorkSpace(arkode_mem, &lenrw, &leniw);
438   check_flag(&flag, "ARKStepGetWorkSpace", 1, 0);
439   flag = ARKStepGetNumSteps(arkode_mem, &nst);
440   check_flag(&flag, "ARKStepGetNumSteps", 1, 0);
441   flag = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi);
442   check_flag(&flag, "ARKStepGetNumRhsEvals", 1, 0);
443   flag = ARKStepGetNumLinSolvSetups(arkode_mem, &nsetups);
444   check_flag(&flag, "ARKStepGetNumLinSolvSetups", 1, 0);
445   flag = ARKStepGetNumErrTestFails(arkode_mem, &netf);
446   check_flag(&flag, "ARKStepGetNumErrTestFails", 1, 0);
447   flag = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni);
448   check_flag(&flag, "ARKStepGetNumNonlinSolvIters", 1, 0);
449   flag = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncfn);
450   check_flag(&flag, "ARKStepGetNumNonlinSolvConvFails", 1, 0);
451 
452   flag = ARKStepGetLinWorkSpace(arkode_mem, &lenrwLS, &leniwLS);
453   check_flag(&flag, "ARKStepGetLinWorkSpace", 1, 0);
454   flag = ARKStepGetNumLinIters(arkode_mem, &nli);
455   check_flag(&flag, "ARKStepGetNumLinIters", 1, 0);
456   flag = ARKStepGetNumPrecEvals(arkode_mem, &npe);
457   check_flag(&flag, "ARKStepGetNumPrecEvals", 1, 0);
458   flag = ARKStepGetNumPrecSolves(arkode_mem, &nps);
459   check_flag(&flag, "ARKStepGetNumPrecSolves", 1, 0);
460   flag = ARKStepGetNumLinConvFails(arkode_mem, &ncfl);
461   check_flag(&flag, "ARKStepGetNumLinConvFails", 1, 0);
462   flag = ARKStepGetNumLinRhsEvals(arkode_mem, &nfeLS);
463   check_flag(&flag, "ARKStepGetNumLinRhsEvals", 1, 0);
464 
465   printf("\nFinal Statistics: \n\n");
466   printf("lenrw   = %5ld     leniw   = %5ld\n", lenrw, leniw);
467   printf("lenrwls = %5ld     leniwls = %5ld\n", lenrwLS, leniwLS);
468   printf("nst     = %5ld     nfe     = %5ld\n", nst, nfe);
469   printf("nfi     = %5ld     nfels   = %5ld\n", nfi, nfeLS);
470   printf("nni     = %5ld     nli     = %5ld\n", nni, nli);
471   printf("nsetups = %5ld     netf    = %5ld\n", nsetups, netf);
472   printf("npe     = %5ld     nps     = %5ld\n", npe, nps);
473   printf("ncfn    = %5ld     ncfl    = %5ld\n\n", ncfn, ncfl);
474 }
475 
476 /* Routine to send boundary data to neighboring PEs */
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype udata[])477 static void BSend(MPI_Comm comm, int my_pe, int isubx,
478                   int isuby, sunindextype dsizex,
479                   sunindextype dsizey, realtype udata[])
480 {
481   int i, ly;
482   sunindextype offsetu, offsetbuf;
483   realtype bufleft[NVARS*MYSUB], bufright[NVARS*MYSUB];
484 
485   /* If isuby > 0, send data from bottom x-line of u */
486   if (isuby != 0)
487     MPI_Send(&udata[0], (int) dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
488 
489   /* If isuby < NPEY-1, send data from top x-line of u */
490   if (isuby != NPEY-1) {
491     offsetu = (MYSUB-1)*dsizex;
492     MPI_Send(&udata[offsetu], (int) dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
493   }
494 
495   /* If isubx > 0, send data from left y-line of u (via bufleft) */
496   if (isubx != 0) {
497     for (ly = 0; ly < MYSUB; ly++) {
498       offsetbuf = ly*NVARS;
499       offsetu = ly*dsizex;
500       for (i = 0; i < NVARS; i++)
501         bufleft[offsetbuf+i] = udata[offsetu+i];
502     }
503     MPI_Send(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
504   }
505 
506   /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
507   if (isubx != NPEX-1) {
508     for (ly = 0; ly < MYSUB; ly++) {
509       offsetbuf = ly*NVARS;
510       offsetu = offsetbuf*MXSUB + (MXSUB-1)*NVARS;
511       for (i = 0; i < NVARS; i++)
512         bufright[offsetbuf+i] = udata[offsetu+i];
513     }
514     MPI_Send(&bufright[0], (int) dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
515   }
516 }
517 
518 /* Routine to start receiving boundary data from neighboring PEs.
519    Notes:
520    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
521    passed to both the BRecvPost and BRecvWait functions, and should not
522    be manipulated between the two calls.
523    2) request should have 4 entries, and should be passed in both calls also. */
524 
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,sunindextype dsizex,sunindextype dsizey,realtype uext[],realtype buffer[])525 static void BRecvPost(MPI_Comm comm, MPI_Request request[],
526                       int my_pe, int isubx, int isuby,
527                       sunindextype dsizex, sunindextype dsizey,
528                       realtype uext[], realtype buffer[])
529 {
530   sunindextype offsetue;
531   /* Have bufleft and bufright use the same buffer */
532   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
533 
534   /* If isuby > 0, receive data for bottom x-line of uext */
535   if (isuby != 0)
536     MPI_Irecv(&uext[NVARS], (int) dsizex, MPI_SUNREALTYPE,
537               my_pe-NPEX, 0, comm, &request[0]);
538 
539   /* If isuby < NPEY-1, receive data for top x-line of uext */
540   if (isuby != NPEY-1) {
541     offsetue = NVARS*(1 + (MYSUB+1)*(MXSUB+2));
542     MPI_Irecv(&uext[offsetue], (int) dsizex, MPI_SUNREALTYPE,
543               my_pe+NPEX, 0, comm, &request[1]);
544   }
545 
546   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
547   if (isubx != 0) {
548     MPI_Irecv(&bufleft[0], (int) dsizey, MPI_SUNREALTYPE,
549               my_pe-1, 0, comm, &request[2]);
550   }
551 
552   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
553   if (isubx != NPEX-1) {
554     MPI_Irecv(&bufright[0], (int) dsizey, MPI_SUNREALTYPE,
555               my_pe+1, 0, comm, &request[3]);
556   }
557 }
558 
559 /* Routine to finish receiving boundary data from neighboring PEs.
560    Notes:
561    1) buffer should be able to hold 2*NVARS*MYSUB realtype entries, should be
562    passed to both the BRecvPost and BRecvWait functions, and should not
563    be manipulated between the two calls.
564    2) request should have 4 entries, and should be passed in both calls also. */
565 
BRecvWait(MPI_Request request[],int isubx,int isuby,sunindextype dsizex,realtype uext[],realtype buffer[])566 static void BRecvWait(MPI_Request request[],
567                       int isubx, int isuby,
568                       sunindextype dsizex, realtype uext[],
569                       realtype buffer[])
570 {
571   int i, ly;
572   sunindextype dsizex2, offsetue, offsetbuf;
573   realtype *bufleft = buffer, *bufright = buffer+NVARS*MYSUB;
574   MPI_Status status;
575 
576   dsizex2 = dsizex + 2*NVARS;
577 
578   /* If isuby > 0, receive data for bottom x-line of uext */
579   if (isuby != 0)
580     MPI_Wait(&request[0],&status);
581 
582   /* If isuby < NPEY-1, receive data for top x-line of uext */
583   if (isuby != NPEY-1)
584     MPI_Wait(&request[1],&status);
585 
586   /* If isubx > 0, receive data for left y-line of uext (via bufleft) */
587   if (isubx != 0) {
588     MPI_Wait(&request[2],&status);
589 
590     /* Copy the buffer to uext */
591     for (ly = 0; ly < MYSUB; ly++) {
592       offsetbuf = ly*NVARS;
593       offsetue = (ly+1)*dsizex2;
594       for (i = 0; i < NVARS; i++)
595         uext[offsetue+i] = bufleft[offsetbuf+i];
596     }
597   }
598 
599   /* If isubx < NPEX-1, receive data for right y-line of uext (via bufright) */
600   if (isubx != NPEX-1) {
601     MPI_Wait(&request[3],&status);
602 
603     /* Copy the buffer to uext */
604     for (ly = 0; ly < MYSUB; ly++) {
605       offsetbuf = ly*NVARS;
606       offsetue = (ly+2)*dsizex2 - NVARS;
607       for (i = 0; i < NVARS; i++)
608         uext[offsetue+i] = bufright[offsetbuf+i];
609     }
610   }
611 }
612 
613 /* ucomm routine.  This routine performs all communication
614    between processors of data needed to calculate f. */
615 
ucomm(realtype t,N_Vector u,UserData data)616 static void ucomm(realtype t, N_Vector u, UserData data)
617 {
618 
619   realtype *udata, *uext, buffer[2*NVARS*MYSUB];
620   MPI_Comm comm;
621   int my_pe, isubx, isuby;
622   sunindextype nvmxsub, nvmysub;
623   MPI_Request request[4];
624 
625   udata = N_VGetArrayPointer(u);
626 
627   /* Get comm, my_pe, subgrid indices, data sizes, extended array uext */
628   comm = data->comm;  my_pe = data->my_pe;
629   isubx = data->isubx;   isuby = data->isuby;
630   nvmxsub = data->nvmxsub;
631   nvmysub = NVARS*MYSUB;
632   uext = data->uext;
633 
634   /* Start receiving boundary data from neighboring PEs */
635   BRecvPost(comm, request, my_pe, isubx, isuby, nvmxsub, nvmysub, uext, buffer);
636 
637   /* Send data from boundary of local grid to neighboring PEs */
638   BSend(comm, my_pe, isubx, isuby, nvmxsub, nvmysub, udata);
639 
640   /* Finish receiving boundary data from neighboring PEs */
641   BRecvWait(request, isubx, isuby, nvmxsub, uext, buffer);
642 }
643 
644 
645 /* fcalc routine. Compute f(t,y).  This routine assumes that communication
646    between processors of data needed to calculate f has already been done,
647    and this data is in the work array uext. */
648 
fcalc(realtype t,realtype udata[],realtype dudata[],UserData data)649 static void fcalc(realtype t, realtype udata[],
650                   realtype dudata[], UserData data)
651 {
652   realtype *uext;
653   realtype q3, c1, c2, c1dn, c2dn, c1up, c2up, c1lt, c2lt;
654   realtype c1rt, c2rt, cydn, cyup, hord1, hord2, horad1, horad2;
655   realtype qq1, qq2, qq3, qq4, rkin1, rkin2, s, vertd1, vertd2, ydn, yup;
656   realtype q4coef, dely, verdco, hordco, horaco;
657   int i, lx, ly, jy;
658   int isubx, isuby;
659   sunindextype nvmxsub, nvmxsub2, offsetu, offsetue;
660 
661   /* Get subgrid indices, data sizes, extended work array uext */
662   isubx = data->isubx;   isuby = data->isuby;
663   nvmxsub = data->nvmxsub; nvmxsub2 = data->nvmxsub2;
664   uext = data->uext;
665 
666   /* Copy local segment of u vector into the working extended array uext */
667   offsetu = 0;
668   offsetue = nvmxsub2 + NVARS;
669   for (ly = 0; ly < MYSUB; ly++) {
670     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = udata[offsetu+i];
671     offsetu = offsetu + nvmxsub;
672     offsetue = offsetue + nvmxsub2;
673   }
674 
675   /* To facilitate homogeneous Neumann boundary conditions, when this is
676   a boundary PE, copy data from the first interior mesh line of u to uext */
677 
678   /* If isuby = 0, copy x-line 2 of u to uext */
679   if (isuby == 0) {
680     for (i = 0; i < nvmxsub; i++) uext[NVARS+i] = udata[nvmxsub+i];
681   }
682 
683   /* If isuby = NPEY-1, copy x-line MYSUB-1 of u to uext */
684   if (isuby == NPEY-1) {
685     offsetu = (MYSUB-2)*nvmxsub;
686     offsetue = (MYSUB+1)*nvmxsub2 + NVARS;
687     for (i = 0; i < nvmxsub; i++) uext[offsetue+i] = udata[offsetu+i];
688   }
689 
690   /* If isubx = 0, copy y-line 2 of u to uext */
691   if (isubx == 0) {
692     for (ly = 0; ly < MYSUB; ly++) {
693       offsetu = ly*nvmxsub + NVARS;
694       offsetue = (ly+1)*nvmxsub2;
695       for (i = 0; i < NVARS; i++) uext[offsetue+i] = udata[offsetu+i];
696     }
697   }
698 
699   /* If isubx = NPEX-1, copy y-line MXSUB-1 of u to uext */
700   if (isubx == NPEX-1) {
701     for (ly = 0; ly < MYSUB; ly++) {
702       offsetu = (ly+1)*nvmxsub - 2*NVARS;
703       offsetue = (ly+2)*nvmxsub2 - NVARS;
704       for (i = 0; i < NVARS; i++) uext[offsetue+i] = udata[offsetu+i];
705     }
706   }
707 
708   /* Make local copies of problem variables, for efficiency */
709   dely = data->dy;
710   verdco = data->vdco;
711   hordco  = data->hdco;
712   horaco  = data->haco;
713 
714   /* Set diurnal rate coefficients as functions of t, and save q4 in
715   data block for use by preconditioner evaluation routine */
716   s = sin((data->om)*t);
717   if (s > RCONST(0.0)) {
718     q3 = exp(-A3/s);
719     q4coef = exp(-A4/s);
720   } else {
721     q3 = RCONST(0.0);
722     q4coef = RCONST(0.0);
723   }
724   data->q4 = q4coef;
725 
726   /* Loop over all grid points in local subgrid */
727   for (ly = 0; ly < MYSUB; ly++) {
728 
729     jy = ly + isuby*MYSUB;
730 
731     /* Set vertical diffusion coefficients at jy +- 1/2 */
732     ydn = YMIN + (jy - RCONST(0.5))*dely;
733     yup = ydn + dely;
734     cydn = verdco*exp(RCONST(0.2)*ydn);
735     cyup = verdco*exp(RCONST(0.2)*yup);
736     for (lx = 0; lx < MXSUB; lx++) {
737 
738       /* Extract c1 and c2, and set kinetic rate terms */
739       offsetue = (lx+1)*NVARS + (ly+1)*nvmxsub2;
740       c1 = uext[offsetue];
741       c2 = uext[offsetue+1];
742       qq1 = Q1*c1*C3;
743       qq2 = Q2*c1*c2;
744       qq3 = q3*C3;
745       qq4 = q4coef*c2;
746       rkin1 = -qq1 - qq2 + RCONST(2.0)*qq3 + qq4;
747       rkin2 = qq1 - qq2 - qq4;
748 
749       /* Set vertical diffusion terms */
750       c1dn = uext[offsetue-nvmxsub2];
751       c2dn = uext[offsetue-nvmxsub2+1];
752       c1up = uext[offsetue+nvmxsub2];
753       c2up = uext[offsetue+nvmxsub2+1];
754       vertd1 = cyup*(c1up - c1) - cydn*(c1 - c1dn);
755       vertd2 = cyup*(c2up - c2) - cydn*(c2 - c2dn);
756 
757       /* Set horizontal diffusion and advection terms */
758       c1lt = uext[offsetue-2];
759       c2lt = uext[offsetue-1];
760       c1rt = uext[offsetue+2];
761       c2rt = uext[offsetue+3];
762       hord1 = hordco*(c1rt - RCONST(2.0)*c1 + c1lt);
763       hord2 = hordco*(c2rt - RCONST(2.0)*c2 + c2lt);
764       horad1 = horaco*(c1rt - c1lt);
765       horad2 = horaco*(c2rt - c2lt);
766 
767       /* Load all terms into dudata */
768       offsetu = lx*NVARS + ly*nvmxsub;
769       dudata[offsetu]   = vertd1 + hord1 + horad1 + rkin1;
770       dudata[offsetu+1] = vertd2 + hord2 + horad2 + rkin2;
771     }
772   }
773 }
774 
775 
776 /***************** Functions Called by the Solver *************************/
777 
778 /* f routine.  Evaluate f(t,y).  First call ucomm to do communication of
779    subgrid boundary data into uext.  Then calculate f by a call to fcalc. */
780 
f(realtype t,N_Vector u,N_Vector udot,void * user_data)781 static int f(realtype t, N_Vector u, N_Vector udot, void *user_data)
782 {
783   realtype *udata, *dudata;
784   UserData data;
785 
786   udata = N_VGetArrayPointer(u);
787   dudata = N_VGetArrayPointer(udot);
788   data = (UserData) user_data;
789 
790   /* Call ucomm to do inter-processor communication */
791   ucomm(t, u, data);
792 
793   /* Call fcalc to calculate all right-hand sides */
794   fcalc(t, udata, dudata, data);
795 
796   return(0);
797 }
798 
799 
800 /* Preconditioner setup routine. Generate and preprocess P. */
Precond(realtype tn,N_Vector u,N_Vector fu,booleantype jok,booleantype * jcurPtr,realtype gamma,void * user_data)801 static int Precond(realtype tn, N_Vector u, N_Vector fu,
802                    booleantype jok, booleantype *jcurPtr,
803                    realtype gamma, void *user_data)
804 {
805   realtype c1, c2, cydn, cyup, diag, ydn, yup, q4coef, dely, verdco, hordco;
806   realtype **(*P)[MYSUB], **(*Jbd)[MYSUB];
807   int nvmxsub, offset;
808   sunindextype ier;
809   sunindextype *(*pivot)[MYSUB];
810   int lx, ly, jy, isuby;
811   realtype *udata, **a, **j;
812   UserData data;
813 
814   /* Make local copies of pointers in user_data, pointer to u's data,
815      and PE index pair */
816   data = (UserData) user_data;
817   P = data->P;
818   Jbd = data->Jbd;
819   pivot = data->pivot;
820   udata = N_VGetArrayPointer(u);
821   isuby = data->isuby;
822   nvmxsub = data->nvmxsub;
823 
824   if (jok) {
825   /* jok = SUNTRUE: Copy Jbd to P */
826     for (ly = 0; ly < MYSUB; ly++)
827       for (lx = 0; lx < MXSUB; lx++)
828         denseCopy(Jbd[lx][ly], P[lx][ly], NVARS, NVARS);
829 
830   *jcurPtr = SUNFALSE;
831 
832   }
833 
834   else {
835 
836     /* jok = SUNFALSE: Generate Jbd from scratch and copy to P */
837 
838     /* Make local copies of problem variables, for efficiency */
839     q4coef = data->q4;
840     dely = data->dy;
841     verdco = data->vdco;
842     hordco  = data->hdco;
843 
844     /* Compute 2x2 diagonal Jacobian blocks (using q4 values
845      computed on the last f call).  Load into P. */
846     for (ly = 0; ly < MYSUB; ly++) {
847       jy = ly + isuby*MYSUB;
848       ydn = YMIN + (jy - RCONST(0.5))*dely;
849       yup = ydn + dely;
850       cydn = verdco*exp(RCONST(0.2)*ydn);
851       cyup = verdco*exp(RCONST(0.2)*yup);
852       diag = -(cydn + cyup + RCONST(2.0)*hordco);
853       for (lx = 0; lx < MXSUB; lx++) {
854         offset = lx*NVARS + ly*nvmxsub;
855         c1 = udata[offset];
856         c2 = udata[offset+1];
857         j = Jbd[lx][ly];
858         a = P[lx][ly];
859         IJth(j,1,1) = (-Q1*C3 - Q2*c2) + diag;
860         IJth(j,1,2) = -Q2*c1 + q4coef;
861         IJth(j,2,1) = Q1*C3 - Q2*c2;
862         IJth(j,2,2) = (-Q2*c1 - q4coef) + diag;
863         denseCopy(j, a, NVARS, NVARS);
864       }
865     }
866 
867     *jcurPtr = SUNTRUE;
868 
869   }
870 
871   /* Scale by -gamma */
872   for (ly = 0; ly < MYSUB; ly++)
873     for (lx = 0; lx < MXSUB; lx++)
874       denseScale(-gamma, P[lx][ly], NVARS, NVARS);
875 
876   /* Add identity matrix and do LU decompositions on blocks in place */
877   for (lx = 0; lx < MXSUB; lx++) {
878     for (ly = 0; ly < MYSUB; ly++) {
879       denseAddIdentity(P[lx][ly], NVARS);
880       ier = denseGETRF(P[lx][ly], NVARS, NVARS, pivot[lx][ly]);
881       if (ier != 0) return(1);
882     }
883   }
884 
885   return(0);
886 }
887 
888 
889 /* Preconditioner solve routine */
PSolve(realtype tn,N_Vector u,N_Vector fu,N_Vector r,N_Vector z,realtype gamma,realtype delta,int lr,void * user_data)890 static int PSolve(realtype tn, N_Vector u, N_Vector fu,
891                   N_Vector r, N_Vector z,
892                   realtype gamma, realtype delta,
893                   int lr, void *user_data)
894 {
895   realtype **(*P)[MYSUB];
896   int nvmxsub;
897   sunindextype *(*pivot)[MYSUB];
898   int lx, ly;
899   realtype *zdata, *v;
900   UserData data;
901 
902   /* Extract the P and pivot arrays from user_data */
903   data = (UserData) user_data;
904   P = data->P;
905   pivot = data->pivot;
906 
907   /* Solve the block-diagonal system Px = r using LU factors stored
908      in P and pivot data in pivot, and return the solution in z.
909      First copy vector r to z. */
910   N_VScale(RCONST(1.0), r, z);
911   nvmxsub = data->nvmxsub;
912   zdata = N_VGetArrayPointer(z);
913 
914   for (lx = 0; lx < MXSUB; lx++) {
915     for (ly = 0; ly < MYSUB; ly++) {
916       v = &(zdata[lx*NVARS + ly*nvmxsub]);
917       denseGETRS(P[lx][ly], NVARS, pivot[lx][ly], v);
918     }
919   }
920 
921   return(0);
922 }
923 
924 
925 /*********************** Private Helper Function ************************/
926 
927 /* Check function return value...
928      opt == 0 means SUNDIALS function allocates memory so check if
929               returned NULL pointer
930      opt == 1 means SUNDIALS function returns a flag so check if
931               flag >= 0
932      opt == 2 means function allocates memory so check if returned
933               NULL pointer */
934 
check_flag(void * flagvalue,const char * funcname,int opt,int id)935 static int check_flag(void *flagvalue, const char *funcname, int opt, int id)
936 {
937   int *errflag;
938 
939   /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
940   if (opt == 0 && flagvalue == NULL) {
941     fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
942             id, funcname);
943     return(1);
944   } else if (opt == 1) { /* Check if flag < 0 */
945     errflag = (int *) flagvalue;
946     if (*errflag < 0) {
947       fprintf(stderr, "\nSUNDIALS_ERROR(%d): %s() failed with flag = %d\n\n",
948               id, funcname, *errflag);
949       return(1);
950     }
951   } else if (opt == 2 && flagvalue == NULL) { /* Check if function returned NULL pointer - no memory allocated */
952     fprintf(stderr, "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
953             id, funcname);
954     return(1);
955   }
956 
957   return(0);
958 }
959