1 /*
2 * -----------------------------------------------------------------
3 * Programmer(s): Daniel R. Reynolds @ SMU
4 * Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5 * -----------------------------------------------------------------
6 * SUNDIALS Copyright Start
7 * Copyright (c) 2002-2020, Lawrence Livermore National Security
8 * and Southern Methodist University.
9 * All rights reserved.
10 *
11 * See the top-level LICENSE and NOTICE files for details.
12 *
13 * SPDX-License-Identifier: BSD-3-Clause
14 * SUNDIALS Copyright End
15 * -----------------------------------------------------------------
16 * Example program for IDA: Food web, parallel, GMRES, IDABBD
17 * preconditioner.
18 *
19 * This example program for IDA uses SUNLinSol_SPGMR as the linear solver.
20 * It is written for a parallel computer system and uses the
21 * IDABBDPRE band-block-diagonal preconditioner module for the
22 * SUNLinSol_SPGMR package.
23 *
24 * The mathematical problem solved in this example is a DAE system
25 * that arises from a system of partial differential equations after
26 * spatial discretization. The PDE system is a food web population
27 * model, with predator-prey interaction and diffusion on the unit
28 * square in two dimensions. The dependent variable vector is:
29 *
30 * 1 2 ns
31 * c = (c , c , ..., c ) , ns = 2 * np
32 *
33 * and the PDE's are as follows:
34 *
35 * i i i
36 * dc /dt = d(i)*(c + c ) + R (x,y,c) (i = 1,...,np)
37 * xx yy i
38 *
39 * i i
40 * 0 = d(i)*(c + c ) + R (x,y,c) (i = np+1,...,ns)
41 * xx yy i
42 *
43 * where the reaction terms R are:
44 *
45 * i ns j
46 * R (x,y,c) = c * (b(i) + sum a(i,j)*c )
47 * i j=1
48 *
49 * The number of species is ns = 2 * np, with the first np being
50 * prey and the last np being predators. The coefficients a(i,j),
51 * b(i), d(i) are:
52 *
53 * a(i,i) = -AA (all i)
54 * a(i,j) = -GG (i <= np , j > np)
55 * a(i,j) = EE (i > np, j <= np)
56 * all other a(i,j) = 0
57 * b(i) = BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y)) (i <= np)
58 * b(i) =-BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y)) (i > np)
59 * d(i) = DPREY (i <= np)
60 * d(i) = DPRED (i > np)
61 *
62 * Note: The above equations are written in 1-based indices,
63 * whereas the code has 0-based indices, being written in C.
64 *
65 * The various scalar parameters required are set using '#define'
66 * statements or directly in routine InitUserData. In this program,
67 * np = 1, ns = 2. The boundary conditions are homogeneous Neumann:
68 * normal derivative = 0.
69 *
70 * A polynomial in x and y is used to set the initial values of the
71 * first np variables (the prey variables) at each x,y location,
72 * while initial values for the remaining (predator) variables are
73 * set to a flat value, which is corrected by IDACalcIC.
74 *
75 * The PDEs are discretized by central differencing on a MX by MY
76 * mesh, and so the system size Neq is the product
77 * MX * MY * NUM_SPECIES. The system is actually implemented on
78 * submeshes, processor by processor, with an MXSUB by MYSUB mesh
79 * on each of NPEX * NPEY processors.
80 *
81 * The DAE system is solved by IDA using the SUNLinSol_SPGMR linear solver,
82 * in conjunction with the preconditioner module IDABBDPRE. The
83 * preconditioner uses a 5-diagonal band-block-diagonal
84 * approximation (half-bandwidths = 2). Output is printed at
85 * t = 0, .001, .01, .1, .4, .7, 1.
86 * -----------------------------------------------------------------
87 * References:
88 * [1] Peter N. Brown and Alan C. Hindmarsh,
89 * Reduced Storage Matrix Methods in Stiff ODE systems,
90 * Journal of Applied Mathematics and Computation, Vol. 31
91 * (May 1989), pp. 40-91.
92 *
93 * [2] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
94 * Using Krylov Methods in the Solution of Large-Scale
95 * Differential-Algebraic Systems, SIAM J. Sci. Comput., 15
96 * (1994), pp. 1467-1488.
97 *
98 * [3] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
99 * Consistent Initial Condition Calculation for Differential-
100 * Algebraic Systems, SIAM J. Sci. Comput., 19 (1998),
101 * pp. 1495-1512.
102 * -----------------------------------------------------------------
103 */
104
105 #include <stdio.h>
106 #include <stdlib.h>
107 #include <math.h>
108
109 #include <ida/ida.h>
110 #include <ida/ida_bbdpre.h>
111 #include <sunlinsol/sunlinsol_spgmr.h>
112 #include <nvector/nvector_parallel.h>
113 #include <sundials/sundials_types.h>
114 #include <sundials/sundials_dense.h>
115
116 #include <mpi.h>
117
118 /* Problem Constants */
119
120 #define NPREY 1 /* Number of prey (= number of predators). */
121 #define NUM_SPECIES 2*NPREY
122
123 #define PI RCONST(3.1415926535898) /* pi */
124 #define FOURPI (RCONST(4.0)*PI) /* 4 pi */
125
126 #define MXSUB 10 /* Number of x mesh points per processor subgrid */
127 #define MYSUB 10 /* Number of y mesh points per processor subgrid */
128 #define NPEX 2 /* Number of subgrids in the x direction */
129 #define NPEY 2 /* Number of subgrids in the y direction */
130 #define MX (MXSUB*NPEX) /* MX = number of x mesh points */
131 #define MY (MYSUB*NPEY) /* MY = number of y mesh points */
132 #define NSMXSUB (NUM_SPECIES * MXSUB)
133 #define NEQ (NUM_SPECIES*MX*MY) /* Number of equations in system */
134 #define AA RCONST(1.0) /* Coefficient in above eqns. for a */
135 #define EE RCONST(10000.) /* Coefficient in above eqns. for a */
136 #define GG RCONST(0.5e-6) /* Coefficient in above eqns. for a */
137 #define BB RCONST(1.0) /* Coefficient in above eqns. for b */
138 #define DPREY RCONST(1.0) /* Coefficient in above eqns. for d */
139 #define DPRED RCONST(0.05) /* Coefficient in above eqns. for d */
140 #define ALPHA RCONST(50.) /* Coefficient alpha in above eqns. */
141 #define BETA RCONST(1000.) /* Coefficient beta in above eqns. */
142 #define AX RCONST(1.0) /* Total range of x variable */
143 #define AY RCONST(1.0) /* Total range of y variable */
144 #define RTOL RCONST(1.e-5) /* rtol tolerance */
145 #define ATOL RCONST(1.e-5) /* atol tolerance */
146 #define ZERO RCONST(0.) /* 0. */
147 #define ONE RCONST(1.0) /* 1. */
148 #define NOUT 6
149 #define TMULT RCONST(10.0) /* Multiplier for tout values */
150 #define TADD RCONST(0.3) /* Increment for tout values */
151
152 /* User-defined vector accessor macro IJ_Vptr. */
153
154 /*
155 * IJ_Vptr is defined in order to express the underlying 3-d structure of the
156 * dependent variable vector from its underlying 1-d storage (an N_Vector).
157 * IJ_Vptr(vv,i,j) returns a pointer to the location in vv corresponding to
158 * species index is = 0, x-index ix = i, and y-index jy = j.
159 */
160
161 #define IJ_Vptr(vv,i,j) (&NV_Ith_P(vv, (i)*NUM_SPECIES + (j)*NSMXSUB ))
162
163 /* Type: UserData. Contains problem constants, preconditioner data, etc. */
164
165 typedef struct {
166 int ns, np, thispe, npes, ixsub, jysub, npex, npey;
167 int mxsub, mysub, nsmxsub, nsmxsub2;
168 realtype dx, dy, **acoef;
169 realtype cox[NUM_SPECIES], coy[NUM_SPECIES], bcoef[NUM_SPECIES],
170 rhs[NUM_SPECIES], cext[(MXSUB+2)*(MYSUB+2)*NUM_SPECIES];
171 MPI_Comm comm;
172 N_Vector rates;
173 sunindextype n_local;
174 } *UserData;
175
176 /* Prototypes for functions called by the IDA Solver. */
177
178 static int resweb(realtype tt, N_Vector cc, N_Vector cp,
179 N_Vector rr, void *user_data);
180
181 static int reslocal(sunindextype Nlocal, realtype tt,
182 N_Vector cc, N_Vector cp, N_Vector res,
183 void *user_data);
184
185 static int rescomm(sunindextype Nlocal, realtype tt,
186 N_Vector cc, N_Vector cp,
187 void *user_data);
188
189 /* Prototypes for supporting functions */
190
191 static void BSend(MPI_Comm comm, int thispe, int ixsub, int jysub,
192 int dsizex, int dsizey, realtype carray[]);
193
194 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
195 int ixsub, int jysub,
196 int dsizex, int dsizey,
197 realtype cext[], realtype buffer[]);
198
199 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
200 int dsizex, realtype cext[], realtype buffer[]);
201
202 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
203 UserData webdata);
204
205 static realtype dotprod(int size, realtype *x1, realtype *x2);
206
207 /* Prototypes for private functions */
208
209 static void InitUserData(UserData webdata, int thispe, int npes,
210 MPI_Comm comm);
211
212 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
213 N_Vector scrtch, UserData webdata);
214
215 static void PrintHeader(sunindextype SystemSize, int maxl,
216 sunindextype mudq, sunindextype mldq,
217 sunindextype mukeep, sunindextype mlkeep,
218 realtype rtol, realtype atol);
219
220 static void PrintOutput(void *ida_mem, N_Vector cc, realtype time,
221 UserData webdata, MPI_Comm comm);
222
223 static void PrintFinalStats(void *ida_mem);
224
225 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
226
227 /*
228 *--------------------------------------------------------------------
229 * MAIN PROGRAM
230 *--------------------------------------------------------------------
231 */
232
main(int argc,char * argv[])233 int main(int argc, char *argv[])
234 {
235 MPI_Comm comm;
236 void *ida_mem;
237 SUNLinearSolver LS;
238 UserData webdata;
239 sunindextype SystemSize, local_N, mudq, mldq, mukeep, mlkeep;
240 realtype rtol, atol, t0, tout, tret;
241 N_Vector cc, cp, res, id;
242 int thispe, npes, maxl, iout, retval;
243
244 cc = cp = res = id = NULL;
245 webdata = NULL;
246 LS = NULL;
247 ida_mem = NULL;
248
249 /* Set communicator, and get processor number and total number of PE's. */
250
251 MPI_Init(&argc, &argv);
252 comm = MPI_COMM_WORLD;
253 MPI_Comm_rank(comm, &thispe);
254 MPI_Comm_size(comm, &npes);
255
256 if (npes != NPEX*NPEY) {
257 if (thispe == 0)
258 fprintf(stderr,
259 "\nMPI_ERROR(0): npes = %d not equal to NPEX*NPEY = %d\n",
260 npes, NPEX*NPEY);
261 MPI_Finalize();
262 return(1);
263 }
264
265 /* Set local length (local_N) and global length (SystemSize). */
266
267 local_N = MXSUB*MYSUB*NUM_SPECIES;
268 SystemSize = NEQ;
269
270 /* Set up user data block webdata. */
271
272 webdata = (UserData) malloc(sizeof *webdata);
273 webdata->rates = N_VNew_Parallel(comm, local_N, SystemSize);
274 webdata->acoef = newDenseMat(NUM_SPECIES, NUM_SPECIES);
275
276 InitUserData(webdata, thispe, npes, comm);
277
278 /* Create needed vectors, and load initial values.
279 The vector res is used temporarily only. */
280
281 cc = N_VNew_Parallel(comm, local_N, SystemSize);
282 if(check_retval((void *)cc, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
283
284 cp = N_VNew_Parallel(comm, local_N, SystemSize);
285 if(check_retval((void *)cp, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
286
287 res = N_VNew_Parallel(comm, local_N, SystemSize);
288 if(check_retval((void *)res, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
289
290 id = N_VNew_Parallel(comm, local_N, SystemSize);
291 if(check_retval((void *)id, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
292
293 SetInitialProfiles(cc, cp, id, res, webdata);
294
295 N_VDestroy(res);
296
297 /* Set remaining inputs to IDAMalloc. */
298
299 t0 = ZERO;
300 rtol = RTOL;
301 atol = ATOL;
302
303 /* Call IDACreate and IDAMalloc to initialize solution */
304
305 ida_mem = IDACreate();
306 if(check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
307
308 retval = IDASetUserData(ida_mem, webdata);
309 if(check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
310
311 retval = IDASetId(ida_mem, id);
312 if(check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
313
314 retval = IDAInit(ida_mem, resweb, t0, cc, cp);
315 if(check_retval(&retval, "IDAInit", 1, thispe)) MPI_Abort(comm, 1);
316
317 retval = IDASStolerances(ida_mem, rtol, atol);
318 if(check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
319
320 /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver */
321
322 maxl = 16;
323 LS = SUNLinSol_SPGMR(cc, PREC_LEFT, maxl);
324 if(check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
325 retval = IDASetLinearSolver(ida_mem, LS, NULL);
326 if(check_retval(&retval, "IDASetLinearSolver", 1, thispe)) MPI_Abort(comm, 1);
327
328 /* Call IDABBDPrecInit to initialize the band-block-diagonal preconditioner.
329 The half-bandwidths for the difference quotient evaluation are exact
330 for the system Jacobian, but only a 5-diagonal band matrix is retained. */
331
332 mudq = mldq = NSMXSUB;
333 mukeep = mlkeep = 2;
334 retval = IDABBDPrecInit(ida_mem, local_N, mudq, mldq, mukeep, mlkeep,
335 ZERO, reslocal, NULL);
336 if(check_retval(&retval, "IDABBDPrecInit", 1, thispe)) MPI_Abort(comm, 1);
337
338 /* Call IDACalcIC (with default options) to correct the initial values. */
339
340 tout = RCONST(0.001);
341 retval = IDACalcIC(ida_mem, IDA_YA_YDP_INIT, tout);
342 if(check_retval(&retval, "IDACalcIC", 1, thispe)) MPI_Abort(comm, 1);
343
344 /* On PE 0, print heading, basic parameters, initial values. */
345
346 if (thispe == 0) PrintHeader(SystemSize, maxl,
347 mudq, mldq, mukeep, mlkeep,
348 rtol, atol);
349 PrintOutput(ida_mem, cc, t0, webdata, comm);
350
351 /* Call IDA in tout loop, normal mode, and print selected output. */
352
353 for (iout = 1; iout <= NOUT; iout++) {
354
355 retval = IDASolve(ida_mem, tout, &tret, cc, cp, IDA_NORMAL);
356 if(check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
357
358 PrintOutput(ida_mem, cc, tret, webdata, comm);
359
360 if (iout < 3) tout *= TMULT;
361 else tout += TADD;
362
363 }
364
365 /* On PE 0, print final set of statistics. */
366
367 if (thispe == 0) PrintFinalStats(ida_mem);
368
369 /* Free memory. */
370
371 N_VDestroy(cc);
372 N_VDestroy(cp);
373 N_VDestroy(id);
374
375 IDAFree(&ida_mem);
376 SUNLinSolFree(LS);
377
378 destroyMat(webdata->acoef);
379 N_VDestroy(webdata->rates);
380 free(webdata);
381
382 MPI_Finalize();
383
384 return(0);
385 }
386
387 /*
388 *--------------------------------------------------------------------
389 * PRIVATE FUNCTIONS
390 *--------------------------------------------------------------------
391 */
392
393 /*
394 * InitUserData: Load problem constants in webdata (of type UserData).
395 */
396
InitUserData(UserData webdata,int thispe,int npes,MPI_Comm comm)397 static void InitUserData(UserData webdata, int thispe, int npes,
398 MPI_Comm comm)
399 {
400 int i, j, np;
401 realtype *a1,*a2, *a3, *a4, dx2, dy2, **acoef, *bcoef, *cox, *coy;
402
403 webdata->jysub = thispe / NPEX;
404 webdata->ixsub = thispe - (webdata->jysub)*NPEX;
405 webdata->mxsub = MXSUB;
406 webdata->mysub = MYSUB;
407 webdata->npex = NPEX;
408 webdata->npey = NPEY;
409 webdata->ns = NUM_SPECIES;
410 webdata->np = NPREY;
411 webdata->dx = AX/(MX-1);
412 webdata->dy = AY/(MY-1);
413 webdata->thispe = thispe;
414 webdata->npes = npes;
415 webdata->nsmxsub = MXSUB * NUM_SPECIES;
416 webdata->nsmxsub2 = (MXSUB+2)*NUM_SPECIES;
417 webdata->comm = comm;
418 webdata->n_local = MXSUB*MYSUB*NUM_SPECIES;
419
420 /* Set up the coefficients a and b plus others found in the equations. */
421
422 np = webdata->np;
423 dx2 = (webdata->dx)*(webdata->dx);
424 dy2 = (webdata->dy)*(webdata->dy);
425
426 acoef = webdata->acoef;
427 bcoef = webdata->bcoef;
428 cox = webdata->cox;
429 coy = webdata->coy;
430
431 for (i = 0; i < np; i++) {
432 a1 = &(acoef[i][np]);
433 a2 = &(acoef[i+np][0]);
434 a3 = &(acoef[i][0]);
435 a4 = &(acoef[i+np][np]);
436 /* Fill in the portion of acoef in the four quadrants, row by row. */
437 for (j = 0; j < np; j++) {
438 *a1++ = -GG;
439 *a2++ = EE;
440 *a3++ = ZERO;
441 *a4++ = ZERO;
442 }
443
444 /* Reset the diagonal elements of acoef to -AA. */
445 acoef[i][i] = -AA; acoef[i+np][i+np] = -AA;
446
447 /* Set coefficients for b and diffusion terms. */
448 bcoef[i] = BB; bcoef[i+np] = -BB;
449 cox[i] = DPREY/dx2; cox[i+np] = DPRED/dx2;
450 coy[i] = DPREY/dy2; coy[i+np] = DPRED/dy2;
451 }
452
453 }
454
455 /*
456 * SetInitialProfiles: Set initial conditions in cc, cp, and id.
457 * A polynomial profile is used for the prey cc values, and a constant
458 * (1.0e5) is loaded as the initial guess for the predator cc values.
459 * The id values are set to 1 for the prey and 0 for the predators.
460 * The prey cp values are set according to the given system, and
461 * the predator cp values are set to zero.
462 */
463
SetInitialProfiles(N_Vector cc,N_Vector cp,N_Vector id,N_Vector res,UserData webdata)464 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
465 N_Vector res, UserData webdata)
466 {
467 int ixsub, jysub, mxsub, mysub, np, ix, jy, is;
468 realtype *cxy, *idxy, *cpxy, dx, dy, xx, yy, xyfactor;
469
470 ixsub = webdata->ixsub;
471 jysub = webdata->jysub;
472 mxsub = webdata->mxsub;
473 mysub = webdata->mxsub;
474 dx = webdata->dx;
475 dy = webdata->dy;
476 np = webdata->np;
477
478 /* Loop over grid, load cc values and id values. */
479 for (jy = 0; jy < mysub; jy++) {
480 yy = (jy + jysub*mysub) * dy;
481 for (ix = 0; ix < mxsub; ix++) {
482 xx = (ix + ixsub*mxsub) * dx;
483 xyfactor = 16.*xx*(1. - xx)*yy*(1. - yy);
484 xyfactor *= xyfactor;
485
486 cxy = IJ_Vptr(cc,ix,jy);
487 idxy = IJ_Vptr(id,ix,jy);
488 for (is = 0; is < NUM_SPECIES; is++) {
489 if (is < np)
490 {cxy[is] = RCONST(10.0)+(realtype)(is+1)*xyfactor; idxy[is] = ONE;}
491 else { cxy[is] = 1.0e5; idxy[is] = ZERO; }
492 }
493 }
494 }
495
496 /* Set c' for the prey by calling the residual function with cp = 0. */
497
498 N_VConst(ZERO, cp);
499 resweb(ZERO, cc, cp, res, webdata);
500 N_VScale(-ONE, res, cp);
501
502 /* Set c' for predators to 0. */
503
504 for (jy = 0; jy < mysub; jy++) {
505 for (ix = 0; ix < mxsub; ix++) {
506 cpxy = IJ_Vptr(cp,ix,jy);
507 for (is = np; is < NUM_SPECIES; is++) cpxy[is] = ZERO;
508 }
509 }
510 }
511
512 /*
513 * Print first lines of output (problem description)
514 * and table headerr
515 */
516
PrintHeader(sunindextype SystemSize,int maxl,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep,realtype rtol,realtype atol)517 static void PrintHeader(sunindextype SystemSize, int maxl,
518 sunindextype mudq, sunindextype mldq,
519 sunindextype mukeep, sunindextype mlkeep,
520 realtype rtol, realtype atol)
521 {
522 printf("\nidaFoodWeb_kry_bbd_p: Predator-prey DAE parallel example problem for IDA \n\n");
523 printf("Number of species ns: %d", NUM_SPECIES);
524 printf(" Mesh dimensions: %d x %d", MX, MY);
525 printf(" Total system size: %ld\n",(long int) SystemSize);
526 printf("Subgrid dimensions: %d x %d", MXSUB, MYSUB);
527 printf(" Processor array: %d x %d\n", NPEX, NPEY);
528 #if defined(SUNDIALS_EXTENDED_PRECISION)
529 printf("Tolerance parameters: rtol = %Lg atol = %Lg\n", rtol, atol);
530 #elif defined(SUNDIALS_DOUBLE_PRECISION)
531 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
532 #else
533 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
534 #endif
535 printf("Linear solver: SUNLinSol_SPGMR Max. Krylov dimension maxl: %d\n", maxl);
536 printf("Preconditioner: band-block-diagonal (IDABBDPRE), with parameters\n");
537 printf(" mudq = %ld, mldq = %ld, mukeep = %ld, mlkeep = %ld\n",
538 (long int) mudq, (long int) mldq, (long int) mukeep, (long int) mlkeep);
539 printf("CalcIC called to correct initial predator concentrations \n\n");
540 printf("-----------------------------------------------------------\n");
541 printf(" t bottom-left top-right");
542 printf(" | nst k h\n");
543 printf("-----------------------------------------------------------\n\n");
544 }
545
546
547 /*
548 * PrintOutput: Print output values at output time t = tt.
549 * Selected run statistics are printed. Then values of c1 and c2
550 * are printed for the bottom left and top right grid points only.
551 */
552
PrintOutput(void * ida_mem,N_Vector cc,realtype tt,UserData webdata,MPI_Comm comm)553 static void PrintOutput(void *ida_mem, N_Vector cc, realtype tt,
554 UserData webdata, MPI_Comm comm)
555 {
556 MPI_Status status;
557 realtype *cdata, clast[2], hused;
558 long int nst;
559 int i, kused, retval, thispe, npelast, ilast;;
560
561 thispe = webdata->thispe;
562 npelast = webdata->npes - 1;
563 cdata = N_VGetArrayPointer(cc);
564
565 /* Send conc. at top right mesh point from PE npes-1 to PE 0. */
566 if (thispe == npelast) {
567 ilast = NUM_SPECIES*MXSUB*MYSUB - 2;
568 if (npelast != 0)
569 MPI_Send(&cdata[ilast], 2, MPI_SUNREALTYPE, 0, 0, comm);
570 else { clast[0] = cdata[ilast]; clast[1] = cdata[ilast+1]; }
571 }
572
573 /* On PE 0, receive conc. at top right from PE npes - 1.
574 Then print performance data and sampled solution values. */
575
576 if (thispe == 0) {
577
578 if (npelast != 0)
579 MPI_Recv(&clast[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
580
581 retval = IDAGetLastOrder(ida_mem, &kused);
582 check_retval(&retval, "IDAGetLastOrder", 1, thispe);
583 retval = IDAGetNumSteps(ida_mem, &nst);
584 check_retval(&retval, "IDAGetNumSteps", 1, thispe);
585 retval = IDAGetLastStep(ida_mem, &hused);
586 check_retval(&retval, "IDAGetLastStep", 1, thispe);
587
588 #if defined(SUNDIALS_EXTENDED_PRECISION)
589 printf("%8.2Le %12.4Le %12.4Le | %3ld %1d %12.4Le\n",
590 tt, cdata[0], clast[0], nst, kused, hused);
591 for (i=1;i<NUM_SPECIES;i++)
592 printf(" %12.4Le %12.4Le |\n",cdata[i],clast[i]);
593 #elif defined(SUNDIALS_DOUBLE_PRECISION)
594 printf("%8.2e %12.4e %12.4e | %3ld %1d %12.4e\n",
595 tt, cdata[0], clast[0], nst, kused, hused);
596 for (i=1;i<NUM_SPECIES;i++)
597 printf(" %12.4e %12.4e |\n",cdata[i],clast[i]);
598 #else
599 printf("%8.2e %12.4e %12.4e | %3ld %1d %12.4e\n",
600 tt, cdata[0], clast[0], nst, kused, hused);
601 for (i=1;i<NUM_SPECIES;i++)
602 printf(" %12.4e %12.4e |\n",cdata[i],clast[i]);
603 #endif
604 printf("\n");
605
606 }
607
608 }
609
610 /*
611 * PrintFinalStats: Print final run data contained in iopt.
612 */
613
PrintFinalStats(void * ida_mem)614 static void PrintFinalStats(void *ida_mem)
615 {
616 long int nst, nre, nreLS, netf, ncfn, nni, ncfl, nli, npe, nps, nge;
617 int retval;
618
619 retval = IDAGetNumSteps(ida_mem, &nst);
620 check_retval(&retval, "IDAGetNumSteps", 1, 0);
621 retval = IDAGetNumResEvals(ida_mem, &nre);
622 check_retval(&retval, "IDAGetNumResEvals", 1, 0);
623 retval = IDAGetNumErrTestFails(ida_mem, &netf);
624 check_retval(&retval, "IDAGetNumErrTestFails", 1, 0);
625 retval = IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
626 check_retval(&retval, "IDAGetNumNonlinSolvConvFails", 1, 0);
627 retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
628 check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, 0);
629
630 retval = IDAGetNumLinConvFails(ida_mem, &ncfl);
631 check_retval(&retval, "IDAGetNumLinConvFails", 1, 0);
632 retval = IDAGetNumLinIters(ida_mem, &nli);
633 check_retval(&retval, "IDAGetNumLinIters", 1, 0);
634 retval = IDAGetNumPrecEvals(ida_mem, &npe);
635 check_retval(&retval, "IDAGetNumPrecEvals", 1, 0);
636 retval = IDAGetNumPrecSolves(ida_mem, &nps);
637 check_retval(&retval, "IDAGetNumPrecSolves", 1, 0);
638 retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
639 check_retval(&retval, "IDAGetNumLinResEvals", 1, 0);
640
641 retval = IDABBDPrecGetNumGfnEvals(ida_mem, &nge);
642 check_retval(&retval, "IDABBDPrecGetNumGfnEvals", 1, 0);
643
644 printf("-----------------------------------------------------------\n");
645 printf("\nFinal statistics: \n\n");
646
647 printf("Number of steps = %ld\n", nst);
648 printf("Number of residual evaluations = %ld\n", nre+nreLS);
649 printf("Number of nonlinear iterations = %ld\n", nni);
650 printf("Number of error test failures = %ld\n", netf);
651 printf("Number of nonlinear conv. failures = %ld\n\n", ncfn);
652
653 printf("Number of linear iterations = %ld\n", nli);
654 printf("Number of linear conv. failures = %ld\n\n", ncfl);
655
656 printf("Number of preconditioner setups = %ld\n", npe);
657 printf("Number of preconditioner solves = %ld\n", nps);
658 printf("Number of local residual evals. = %ld\n", nge);
659
660 }
661
662 /*
663 * Check function return value...
664 * opt == 0 means SUNDIALS function allocates memory so check if
665 * returned NULL pointer
666 * opt == 1 means SUNDIALS function returns an integer value so check if
667 * retval < 0
668 * opt == 2 means function allocates memory so check if returned
669 * NULL pointer
670 */
671
check_retval(void * returnvalue,const char * funcname,int opt,int id)672 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
673 {
674 int *retval;
675
676 if (opt == 0 && returnvalue == NULL) {
677 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
678 fprintf(stderr,
679 "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
680 id, funcname);
681 return(1);
682 } else if (opt == 1) {
683 /* Check if retval < 0 */
684 retval = (int *) returnvalue;
685 if (*retval < 0) {
686 fprintf(stderr,
687 "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
688 id, funcname, *retval);
689 return(1);
690 }
691 } else if (opt == 2 && returnvalue == NULL) {
692 /* Check if function returned NULL pointer - no memory allocated */
693 fprintf(stderr,
694 "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
695 id, funcname);
696 return(1);
697 }
698
699 return(0);
700 }
701
702 /*
703 *--------------------------------------------------------------------
704 * FUNCTIONS CALLED BY IDA & SUPPORTING FUNCTIONS
705 *--------------------------------------------------------------------
706 */
707
708 /*
709 * resweb: System residual function for predator-prey system.
710 * To compute the residual function F, this routine calls:
711 * rescomm, for needed communication, and then
712 * reslocal, for computation of the residuals on this processor.
713 */
714
resweb(realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,void * user_data)715 static int resweb(realtype tt,
716 N_Vector cc, N_Vector cp, N_Vector rr,
717 void *user_data)
718 {
719 int retval;
720 UserData webdata;
721 sunindextype Nlocal;
722
723 webdata = (UserData) user_data;
724
725 Nlocal = webdata->n_local;
726
727 /* Call rescomm to do inter-processor communication. */
728 retval = rescomm(Nlocal, tt, cc, cp, user_data);
729
730 /* Call reslocal to calculate the local portion of residual vector. */
731 retval = reslocal(Nlocal, tt, cc, cp, rr, user_data);
732
733 return(retval);
734 }
735
736 /*
737 * rescomm: Communication routine in support of resweb.
738 * This routine performs all inter-processor communication of components
739 * of the cc vector needed to calculate F, namely the components at all
740 * interior subgrid boundaries (ghost cell data). It loads this data
741 * into a work array cext (the local portion of c, extended).
742 * The message-passing uses blocking sends, non-blocking receives,
743 * and receive-waiting, in routines BRecvPost, BSend, BRecvWait.
744 */
745
rescomm(sunindextype Nlocal,realtype tt,N_Vector cc,N_Vector cp,void * user_data)746 static int rescomm(sunindextype Nlocal, realtype tt,
747 N_Vector cc, N_Vector cp,
748 void *user_data)
749 {
750
751 UserData webdata;
752 realtype *cdata, *cext, buffer[2*NUM_SPECIES*MYSUB];
753 int thispe, ixsub, jysub, nsmxsub, nsmysub;
754 MPI_Comm comm;
755 MPI_Request request[4];
756
757 webdata = (UserData) user_data;
758 cdata = N_VGetArrayPointer(cc);
759
760 /* Get comm, thispe, subgrid indices, data sizes, extended array cext. */
761
762 comm = webdata->comm;
763 thispe = webdata->thispe;
764
765 ixsub = webdata->ixsub;
766 jysub = webdata->jysub;
767 cext = webdata->cext;
768 nsmxsub = webdata->nsmxsub;
769 nsmysub = (webdata->ns)*(webdata->mysub);
770
771 /* Start receiving boundary data from neighboring PEs. */
772
773 BRecvPost(comm, request, thispe, ixsub, jysub, nsmxsub, nsmysub,
774 cext, buffer);
775
776 /* Send data from boundary of local grid to neighboring PEs. */
777
778 BSend(comm, thispe, ixsub, jysub, nsmxsub, nsmysub, cdata);
779
780 /* Finish receiving boundary data from neighboring PEs. */
781
782 BRecvWait(request, ixsub, jysub, nsmxsub, cext, buffer);
783
784 return(0);
785 }
786
787 /*
788 * BRecvPost: Start receiving boundary data from neighboring PEs.
789 * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
790 * should be passed to both the BRecvPost and BRecvWait functions, and
791 * should not be manipulated between the two calls.
792 * (2) request should have 4 entries, and is also passed in both calls.
793 */
794
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cext[],realtype buffer[])795 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
796 int ixsub, int jysub,
797 int dsizex, int dsizey,
798 realtype cext[], realtype buffer[])
799 {
800 int offsetce;
801 /* Have bufleft and bufright use the same buffer. */
802 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
803
804 /* If jysub > 0, receive data for bottom x-line of cext. */
805 if (jysub != 0)
806 MPI_Irecv(&cext[NUM_SPECIES], dsizex, MPI_SUNREALTYPE,
807 my_pe-NPEX, 0, comm, &request[0]);
808
809 /* If jysub < NPEY-1, receive data for top x-line of cext. */
810 if (jysub != NPEY-1) {
811 offsetce = NUM_SPECIES*(1 + (MYSUB+1)*(MXSUB+2));
812 MPI_Irecv(&cext[offsetce], dsizex, MPI_SUNREALTYPE,
813 my_pe+NPEX, 0, comm, &request[1]);
814 }
815
816 /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
817 if (ixsub != 0) {
818 MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
819 my_pe-1, 0, comm, &request[2]);
820 }
821
822 /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
823 if (ixsub != NPEX-1) {
824 MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
825 my_pe+1, 0, comm, &request[3]);
826 }
827
828 }
829
830 /*
831 * BRecvWait: Finish receiving boundary data from neighboring PEs.
832 * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
833 * should be passed to both the BRecvPost and BRecvWait functions, and
834 * should not be manipulated between the two calls.
835 * (2) request should have 4 entries, and is also passed in both calls.
836 */
837
BRecvWait(MPI_Request request[],int ixsub,int jysub,int dsizex,realtype cext[],realtype buffer[])838 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
839 int dsizex, realtype cext[], realtype buffer[])
840 {
841 int i;
842 int ly, dsizex2, offsetce, offsetbuf;
843 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
844 MPI_Status status;
845
846 dsizex2 = dsizex + 2*NUM_SPECIES;
847
848 /* If jysub > 0, receive data for bottom x-line of cext. */
849 if (jysub != 0)
850 MPI_Wait(&request[0],&status);
851
852 /* If jysub < NPEY-1, receive data for top x-line of cext. */
853 if (jysub != NPEY-1)
854 MPI_Wait(&request[1],&status);
855
856 /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
857 if (ixsub != 0) {
858 MPI_Wait(&request[2],&status);
859
860 /* Copy the buffer to cext */
861 for (ly = 0; ly < MYSUB; ly++) {
862 offsetbuf = ly*NUM_SPECIES;
863 offsetce = (ly+1)*dsizex2;
864 for (i = 0; i < NUM_SPECIES; i++)
865 cext[offsetce+i] = bufleft[offsetbuf+i];
866 }
867 }
868
869 /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
870 if (ixsub != NPEX-1) {
871 MPI_Wait(&request[3],&status);
872
873 /* Copy the buffer to cext */
874 for (ly = 0; ly < MYSUB; ly++) {
875 offsetbuf = ly*NUM_SPECIES;
876 offsetce = (ly+2)*dsizex2 - NUM_SPECIES;
877 for (i = 0; i < NUM_SPECIES; i++)
878 cext[offsetce+i] = bufright[offsetbuf+i];
879 }
880 }
881 }
882
883 /*
884 * BSend: Send boundary data to neighboring PEs.
885 * This routine sends components of cc from internal subgrid boundaries
886 * to the appropriate neighbor PEs.
887 */
888
BSend(MPI_Comm comm,int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cdata[])889 static void BSend(MPI_Comm comm, int my_pe, int ixsub, int jysub,
890 int dsizex, int dsizey, realtype cdata[])
891 {
892 int i;
893 int ly, offsetc, offsetbuf;
894 realtype bufleft[NUM_SPECIES*MYSUB], bufright[NUM_SPECIES*MYSUB];
895
896 /* If jysub > 0, send data from bottom x-line of cc. */
897
898 if (jysub != 0)
899 MPI_Send(&cdata[0], dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
900
901 /* If jysub < NPEY-1, send data from top x-line of cc. */
902
903 if (jysub != NPEY-1) {
904 offsetc = (MYSUB-1)*dsizex;
905 MPI_Send(&cdata[offsetc], dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
906 }
907
908 /* If ixsub > 0, send data from left y-line of cc (via bufleft). */
909
910 if (ixsub != 0) {
911 for (ly = 0; ly < MYSUB; ly++) {
912 offsetbuf = ly*NUM_SPECIES;
913 offsetc = ly*dsizex;
914 for (i = 0; i < NUM_SPECIES; i++)
915 bufleft[offsetbuf+i] = cdata[offsetc+i];
916 }
917 MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
918 }
919
920 /* If ixsub < NPEX-1, send data from right y-line of cc (via bufright). */
921
922 if (ixsub != NPEX-1) {
923 for (ly = 0; ly < MYSUB; ly++) {
924 offsetbuf = ly*NUM_SPECIES;
925 offsetc = offsetbuf*MXSUB + (MXSUB-1)*NUM_SPECIES;
926 for (i = 0; i < NUM_SPECIES; i++)
927 bufright[offsetbuf+i] = cdata[offsetc+i];
928 }
929 MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
930 }
931 }
932
933 /* Define lines are for ease of readability in the following functions. */
934
935 #define mxsub (webdata->mxsub)
936 #define mysub (webdata->mysub)
937 #define npex (webdata->npex)
938 #define npey (webdata->npey)
939 #define ixsub (webdata->ixsub)
940 #define jysub (webdata->jysub)
941 #define nsmxsub (webdata->nsmxsub)
942 #define nsmxsub2 (webdata->nsmxsub2)
943 #define np (webdata->np)
944 #define dx (webdata->dx)
945 #define dy (webdata->dy)
946 #define cox (webdata->cox)
947 #define coy (webdata->coy)
948 #define rhs (webdata->rhs)
949 #define cext (webdata->cext)
950 #define rates (webdata->rates)
951 #define ns (webdata->ns)
952 #define acoef (webdata->acoef)
953 #define bcoef (webdata->bcoef)
954
955 /*
956 * reslocal: Compute res = F(t,cc,cp).
957 * This routine assumes that all inter-processor communication of data
958 * needed to calculate F has already been done. Components at interior
959 * subgrid boundaries are assumed to be in the work array cext.
960 * The local portion of the cc vector is first copied into cext.
961 * The exterior Neumann boundary conditions are explicitly handled here
962 * by copying data from the first interior mesh line to the ghost cell
963 * locations in cext. Then the reaction and diffusion terms are
964 * evaluated in terms of the cext array, and the residuals are formed.
965 * The reaction terms are saved separately in the vector webdata->rates
966 * for use by the preconditioner setup routine.
967 */
968
reslocal(sunindextype Nlocal,realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,void * user_data)969 static int reslocal(sunindextype Nlocal, realtype tt,
970 N_Vector cc, N_Vector cp, N_Vector rr,
971 void *user_data)
972 {
973 realtype *cdata, *ratesxy, *cpxy, *resxy,
974 xx, yy, dcyli, dcyui, dcxli, dcxui;
975 int ix, jy, is, i, locc, ylocce, locce;
976 UserData webdata;
977
978 webdata = (UserData) user_data;
979
980 /* Get data pointers, subgrid data, array sizes, work array cext. */
981
982 cdata = N_VGetArrayPointer(cc);
983
984 /* Copy local segment of cc vector into the working extended array cext. */
985
986 locc = 0;
987 locce = nsmxsub2 + NUM_SPECIES;
988 for (jy = 0; jy < mysub; jy++) {
989 for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
990 locc = locc + nsmxsub;
991 locce = locce + nsmxsub2;
992 }
993
994 /* To facilitate homogeneous Neumann boundary conditions, when this is
995 a boundary PE, copy data from the first interior mesh line of cc to cext. */
996
997 /* If jysub = 0, copy x-line 2 of cc to cext. */
998 if (jysub == 0)
999 { for (i = 0; i < nsmxsub; i++) cext[NUM_SPECIES+i] = cdata[nsmxsub+i]; }
1000
1001 /* If jysub = npey-1, copy x-line mysub-1 of cc to cext. */
1002 if (jysub == npey-1) {
1003 locc = (mysub-2)*nsmxsub;
1004 locce = (mysub+1)*nsmxsub2 + NUM_SPECIES;
1005 for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
1006 }
1007
1008 /* If ixsub = 0, copy y-line 2 of cc to cext. */
1009 if (ixsub == 0) {
1010 for (jy = 0; jy < mysub; jy++) {
1011 locc = jy*nsmxsub + NUM_SPECIES;
1012 locce = (jy+1)*nsmxsub2;
1013 for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1014 }
1015 }
1016
1017 /* If ixsub = npex-1, copy y-line mxsub-1 of cc to cext. */
1018 if (ixsub == npex-1) {
1019 for (jy = 0; jy < mysub; jy++) {
1020 locc = (jy+1)*nsmxsub - 2*NUM_SPECIES;
1021 locce = (jy+2)*nsmxsub2 - NUM_SPECIES;
1022 for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1023 }
1024 }
1025
1026 /* Loop over all grid points, setting local array rates to right-hand sides.
1027 Then set rr values appropriately for prey/predator components of F. */
1028
1029 for (jy = 0; jy < mysub; jy++) {
1030 ylocce = (jy+1)*nsmxsub2;
1031 yy = (jy+jysub*mysub)*dy;
1032
1033 for (ix = 0; ix < mxsub; ix++) {
1034 locce = ylocce + (ix+1)*NUM_SPECIES;
1035 xx = (ix + ixsub*mxsub)*dx;
1036
1037 ratesxy = IJ_Vptr(rates,ix,jy);
1038 WebRates(xx, yy, &(cext[locce]), ratesxy, webdata);
1039
1040 resxy = IJ_Vptr(rr,ix,jy);
1041 cpxy = IJ_Vptr(cp,ix,jy);
1042
1043 for (is = 0; is < NUM_SPECIES; is++) {
1044 dcyli = cext[locce+is] - cext[locce+is-nsmxsub2];
1045 dcyui = cext[locce+is+nsmxsub2] - cext[locce+is];
1046
1047 dcxli = cext[locce+is] - cext[locce+is-NUM_SPECIES];
1048 dcxui = cext[locce+is+NUM_SPECIES] - cext[locce+is];
1049
1050 rhs[is] = cox[is]*(dcxui-dcxli) + coy[is]*(dcyui-dcyli) + ratesxy[is];
1051
1052 if (is < np) resxy[is] = cpxy[is] - rhs[is];
1053 else resxy[is] = - rhs[is];
1054
1055 }
1056 }
1057 }
1058
1059 return(0);
1060 }
1061
1062 /*
1063 * WebRates: Evaluate reaction rates at a given spatial point.
1064 * At a given (x,y), evaluate the array of ns reaction terms R.
1065 */
1066
WebRates(realtype xx,realtype yy,realtype * cxy,realtype * ratesxy,UserData webdata)1067 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
1068 UserData webdata)
1069 {
1070 int is;
1071 realtype fac;
1072
1073 for (is = 0; is < NUM_SPECIES; is++)
1074 ratesxy[is] = dotprod(NUM_SPECIES, cxy, acoef[is]);
1075
1076 fac = ONE + ALPHA*xx*yy + BETA*sin(FOURPI*xx)*sin(FOURPI*yy);
1077
1078 for (is = 0; is < NUM_SPECIES; is++)
1079 ratesxy[is] = cxy[is]*( bcoef[is]*fac + ratesxy[is] );
1080
1081 }
1082
1083 /*
1084 * dotprod: dot product routine for realtype arrays, for use by WebRates.
1085 */
1086
dotprod(int size,realtype * x1,realtype * x2)1087 static realtype dotprod(int size, realtype *x1, realtype *x2)
1088 {
1089 int i;
1090 realtype *xx1, *xx2, temp = ZERO;
1091
1092 xx1 = x1;
1093 xx2 = x2;
1094 for (i = 0; i < size; i++)
1095 temp += (*xx1++) * (*xx2++);
1096
1097 return(temp);
1098 }
1099