1 /*
2 * -----------------------------------------------------------------
3 * Programmer(s): Daniel R. Reynolds @ SMU
4 * Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5 * -----------------------------------------------------------------
6 * SUNDIALS Copyright Start
7 * Copyright (c) 2002-2021, Lawrence Livermore National Security
8 * and Southern Methodist University.
9 * All rights reserved.
10 *
11 * See the top-level LICENSE and NOTICE files for details.
12 *
13 * SPDX-License-Identifier: BSD-3-Clause
14 * SUNDIALS Copyright End
15 * -----------------------------------------------------------------
16 * Example program for IDA: Food web, parallel, GMRES, user
17 * preconditioner.
18 *
19 * This example program for IDA uses SUNLinSol_SPGMR as the linear solver.
20 * It is written for a parallel computer system and uses a
21 * block-diagonal preconditioner (setup and solve routines) for the
22 * SUNLinSol_SPGMR package.
23 *
24 * The mathematical problem solved in this example is a DAE system
25 * that arises from a system of partial differential equations after
26 * spatial discretization. The PDE system is a food web population
27 * model, with predator-prey interaction and diffusion on the unit
28 * square in two dimensions. The dependent variable vector is:
29 *
30 * 1 2 ns
31 * c = (c , c , ..., c ) , ns = 2 * np
32 *
33 * and the PDE's are as follows:
34 *
35 * i i i
36 * dc /dt = d(i)*(c + c ) + R (x,y,c) (i = 1,...,np)
37 * xx yy i
38 *
39 * i i
40 * 0 = d(i)*(c + c ) + R (x,y,c) (i = np+1,...,ns)
41 * xx yy i
42 *
43 * where the reaction terms R are:
44 *
45 * i ns j
46 * R (x,y,c) = c * (b(i) + sum a(i,j)*c )
47 * i j=1
48 *
49 * The number of species is ns = 2 * np, with the first np being
50 * prey and the last np being predators. The coefficients a(i,j),
51 * b(i), d(i) are:
52 *
53 * a(i,i) = -AA (all i)
54 * a(i,j) = -GG (i <= np , j > np)
55 * a(i,j) = EE (i > np, j <= np)
56 * all other a(i,j) = 0
57 * b(i) = BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y)) (i <= np)
58 * b(i) =-BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y)) (i > np)
59 * d(i) = DPREY (i <= np)
60 * d(i) = DPRED (i > np)
61 *
62 * Note: The above equations are written in 1-based indices,
63 * whereas the code has 0-based indices, being written in C.
64 *
65 * The various scalar parameters required are set using '#define'
66 * statements or directly in routine InitUserData. In this program,
67 * np = 1, ns = 2. The boundary conditions are homogeneous Neumann:
68 * normal derivative = 0.
69 *
70 * A polynomial in x and y is used to set the initial values of the
71 * first np variables (the prey variables) at each x,y location,
72 * while initial values for the remaining (predator) variables are
73 * set to a flat value, which is corrected by IDACalcIC.
74 *
75 * The PDEs are discretized by central differencing on a MX by MY
76 * mesh, and so the system size Neq is the product
77 * MX * MY * NUM_SPECIES. The system is actually implemented on
78 * submeshes, processor by processor, with an MXSUB by MYSUB mesh
79 * on each of NPEX * NPEY processors.
80 *
81 * The DAE system is solved by IDA using the SUNLinSol_SPGMR linear
82 * solver, which uses the preconditioned GMRES iterative method to
83 * solve linear systems. The precondtioner supplied to SUNLinSol_SPGMR is
84 * the block-diagonal part of the Jacobian with ns by ns blocks
85 * arising from the reaction terms only. Output is printed at
86 * t = 0, .001, .01, .1, .4, .7, 1.
87 * -----------------------------------------------------------------
88 * References:
89 * [1] Peter N. Brown and Alan C. Hindmarsh,
90 * Reduced Storage Matrix Methods in Stiff ODE systems,
91 * Journal of Applied Mathematics and Computation, Vol. 31
92 * (May 1989), pp. 40-91.
93 *
94 * [2] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
95 * Using Krylov Methods in the Solution of Large-Scale
96 * Differential-Algebraic Systems, SIAM J. Sci. Comput., 15
97 * (1994), pp. 1467-1488.
98 *
99 * [3] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
100 * Consistent Initial Condition Calculation for Differential-
101 * Algebraic Systems, SIAM J. Sci. Comput., 19 (1998),
102 * pp. 1495-1512.
103 * -----------------------------------------------------------------
104 */
105
106 #include <stdio.h>
107 #include <stdlib.h>
108 #include <math.h>
109
110 #include <ida/ida.h>
111 #include <sunlinsol/sunlinsol_spgmr.h>
112 #include <nvector/nvector_parallel.h>
113 #include <sundials/sundials_dense.h>
114 #include <sundials/sundials_types.h>
115
116 #include <mpi.h>
117
118 /* helpful macros */
119
120 #ifndef MAX
121 #define MAX(A, B) ((A) > (B) ? (A) : (B))
122 #endif
123
124 /* Problem Constants. */
125
126 #define NPREY 1 /* Number of prey (= number of predators). */
127 #define NUM_SPECIES 2*NPREY
128
129 #define PI RCONST(3.1415926535898) /* pi */
130 #define FOURPI (RCONST(4.0)*PI) /* 4 pi */
131
132 #define MXSUB 10 /* Number of x mesh points per processor subgrid */
133 #define MYSUB 10 /* Number of y mesh points per processor subgrid */
134 #define NPEX 2 /* Number of subgrids in the x direction */
135 #define NPEY 2 /* Number of subgrids in the y direction */
136 #define MX (MXSUB*NPEX) /* MX = number of x mesh points */
137 #define MY (MYSUB*NPEY) /* MY = number of y mesh points */
138 #define NSMXSUB (NUM_SPECIES * MXSUB)
139 #define NEQ (NUM_SPECIES*MX*MY) /* Number of equations in system */
140 #define AA RCONST(1.0) /* Coefficient in above eqns. for a */
141 #define EE RCONST(10000.) /* Coefficient in above eqns. for a */
142 #define GG RCONST(0.5e-6) /* Coefficient in above eqns. for a */
143 #define BB RCONST(1.0) /* Coefficient in above eqns. for b */
144 #define DPREY RCONST(1.0) /* Coefficient in above eqns. for d */
145 #define DPRED RCONST(0.05) /* Coefficient in above eqns. for d */
146 #define ALPHA RCONST(50.) /* Coefficient alpha in above eqns. */
147 #define BETA RCONST(1000.) /* Coefficient beta in above eqns. */
148 #define AX RCONST(1.0) /* Total range of x variable */
149 #define AY RCONST(1.0) /* Total range of y variable */
150 #define RTOL RCONST(1.e-5) /* rtol tolerance */
151 #define ATOL RCONST(1.e-5) /* atol tolerance */
152 #define ZERO RCONST(0.) /* 0. */
153 #define ONE RCONST(1.0) /* 1. */
154 #define NOUT 6
155 #define TMULT RCONST(10.0) /* Multiplier for tout values */
156 #define TADD RCONST(0.3) /* Increment for tout values */
157
158
159 /* User-defined vector accessor macro IJ_Vptr. */
160
161 /* IJ_Vptr is defined in order to express the underlying 3-d structure of the
162 dependent variable vector from its underlying 1-d storage (an N_Vector).
163 IJ_Vptr(vv,i,j) returns a pointer to the location in vv corresponding to
164 species index is = 0, x-index ix = i, and y-index jy = j. */
165
166 #define IJ_Vptr(vv,i,j) (&NV_Ith_P(vv, (i)*NUM_SPECIES + (j)*NSMXSUB ))
167
168 /* Type: UserData. Contains problem constants, preconditioner data, etc. */
169
170 typedef struct {
171 sunindextype ns;
172 int np, thispe, npes, ixsub, jysub, npex, npey;
173 int mxsub, mysub, nsmxsub, nsmxsub2;
174 realtype dx, dy, **acoef;
175 realtype cox[NUM_SPECIES], coy[NUM_SPECIES], bcoef[NUM_SPECIES],
176 rhs[NUM_SPECIES], cext[(MXSUB+2)*(MYSUB+2)*NUM_SPECIES];
177 MPI_Comm comm;
178 N_Vector rates;
179 realtype **PP[MXSUB][MYSUB];
180 sunindextype *pivot[MXSUB][MYSUB];
181 N_Vector ewt;
182 void *ida_mem;
183 } *UserData;
184
185
186 /* Prototypes for user-supplied and supporting functions. */
187
188 static int resweb(realtype time,
189 N_Vector cc, N_Vector cp, N_Vector resval,
190 void *user_data);
191
192 static int Precondbd(realtype tt, N_Vector cc, N_Vector cp,
193 N_Vector rr, realtype cj, void *user_data);
194
195 static int PSolvebd(realtype tt, N_Vector cc, N_Vector cp,
196 N_Vector rr, N_Vector rvec, N_Vector zvec,
197 realtype cj, realtype delta, void *user_data);
198
199 static int rescomm(N_Vector cc, N_Vector cp, void *user_data);
200
201 static void BSend(MPI_Comm comm, int thispe, int ixsub, int jysub,
202 int dsizex, int dsizey, realtype carray[]);
203
204 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
205 int ixsub, int jysub,
206 int dsizex, int dsizey,
207 realtype cext[], realtype buffer[]);
208
209 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
210 int dsizex, realtype cext[], realtype buffer[]);
211
212 static int reslocal(realtype tt, N_Vector cc, N_Vector cp, N_Vector res,
213 void *user_data);
214
215 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
216 UserData webdata);
217
218 static realtype dotprod(int size, realtype *x1, realtype *x2);
219
220 /* Prototypes for private Helper Functions. */
221
222 static UserData AllocUserData(MPI_Comm comm, sunindextype local_N,
223 sunindextype SystemSize);
224
225 static void InitUserData(UserData webdata, int thispe, int npes,
226 MPI_Comm comm);
227
228 static void FreeUserData(UserData webdata);
229
230 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
231 N_Vector scrtch, UserData webdata);
232
233 static void PrintHeader(sunindextype SystemSize, int maxl,
234 realtype rtol, realtype atol);
235
236 static void PrintOutput(void *ida_mem, N_Vector cc, realtype time,
237 UserData webdata, MPI_Comm comm);
238
239 static void PrintFinalStats(void *ida_mem);
240
241 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
242
243 /*
244 *--------------------------------------------------------------------
245 * MAIN PROGRAM
246 *--------------------------------------------------------------------
247 */
248
main(int argc,char * argv[])249 int main(int argc, char *argv[])
250 {
251 MPI_Comm comm;
252 void *ida_mem;
253 SUNLinearSolver LS;
254 UserData webdata;
255 sunindextype SystemSize, local_N;
256 realtype rtol, atol, t0, tout, tret;
257 N_Vector cc, cp, res, id;
258 int thispe, npes, maxl, iout, retval;
259
260 cc = cp = res = id = NULL;
261 webdata = NULL;
262 LS = NULL;
263 ida_mem = NULL;
264
265 /* Set communicator, and get processor number and total number of PE's. */
266
267 MPI_Init(&argc, &argv);
268 comm = MPI_COMM_WORLD;
269 MPI_Comm_rank(comm, &thispe);
270 MPI_Comm_size(comm, &npes);
271
272 if (npes != NPEX*NPEY) {
273 if (thispe == 0)
274 fprintf(stderr,
275 "\nMPI_ERROR(0): npes = %d not equal to NPEX*NPEY = %d\n",
276 npes, NPEX*NPEY);
277 MPI_Finalize();
278 return(1);
279 }
280
281 /* Set local length (local_N) and global length (SystemSize). */
282
283 local_N = MXSUB*MYSUB*NUM_SPECIES;
284 SystemSize = NEQ;
285
286 /* Set up user data block webdata. */
287
288 webdata = AllocUserData(comm, local_N, SystemSize);
289 if (check_retval((void *)webdata, "AllocUserData", 0, thispe)) MPI_Abort(comm, 1);
290
291 InitUserData(webdata, thispe, npes, comm);
292
293 /* Create needed vectors, and load initial values.
294 The vector res is used temporarily only. */
295
296 cc = N_VNew_Parallel(comm, local_N, SystemSize);
297 if (check_retval((void *)cc, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
298
299 cp = N_VNew_Parallel(comm, local_N, SystemSize);
300 if (check_retval((void *)cp, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
301
302 res = N_VNew_Parallel(comm, local_N, SystemSize);
303 if (check_retval((void *)res, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
304
305 id = N_VNew_Parallel(comm, local_N, SystemSize);
306 if (check_retval((void *)id, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
307
308 SetInitialProfiles(cc, cp, id, res, webdata);
309
310 N_VDestroy(res);
311
312 /* Set remaining inputs to IDAMalloc. */
313
314 t0 = ZERO;
315 rtol = RTOL;
316 atol = ATOL;
317
318 /* Call IDACreate and IDAMalloc to initialize IDA.
319 A pointer to IDA problem memory is returned and stored in idamem. */
320
321 ida_mem = IDACreate();
322 if (check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
323
324 retval = IDASetUserData(ida_mem, webdata);
325 if (check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
326
327 retval = IDASetId(ida_mem, id);
328 if (check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
329
330 retval = IDAInit(ida_mem, resweb, t0, cc, cp);
331 if (check_retval(&retval, "IDAinit", 1, thispe)) MPI_Abort(comm, 1);
332
333 retval = IDASStolerances(ida_mem, rtol, atol);
334 if (check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
335
336 webdata->ida_mem = ida_mem;
337
338 /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver
339 to IDA, and specify the supplied [left] preconditioner routines
340 (Precondbd & PSolvebd). maxl (Krylov subspace dim.) is set to 16. */
341
342 maxl = 16;
343 LS = SUNLinSol_SPGMR(cc, PREC_LEFT, maxl);
344 if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
345
346 retval = SUNLinSol_SPGMRSetMaxRestarts(LS, 5); /* IDA recommends allowing up to 5 restarts */
347 if(check_retval(&retval, "SUNLinSol_SPGMRSetMaxRestarts", 1, thispe)) MPI_Abort(comm, 1);
348
349 retval = IDASetLinearSolver(ida_mem, LS, NULL);
350 if (check_retval(&retval, "IDASetLinearSolver", 1, thispe))
351 MPI_Abort(comm, 1);
352
353 retval = IDASetPreconditioner(ida_mem, Precondbd, PSolvebd);
354 if (check_retval(&retval, "IDASetPreconditioner", 1, thispe))
355 MPI_Abort(comm, 1);
356
357 /* Call IDACalcIC (with default options) to correct the initial values. */
358
359 tout = RCONST(0.001);
360 retval = IDACalcIC(ida_mem, IDA_YA_YDP_INIT, tout);
361 if (check_retval(&retval, "IDACalcIC", 1, thispe))
362 MPI_Abort(comm, 1);
363
364 /* On PE 0, print heading, basic parameters, initial values. */
365
366 if (thispe == 0) PrintHeader(SystemSize, maxl, rtol, atol);
367 PrintOutput(ida_mem, cc, t0, webdata, comm);
368
369 /* Loop over iout, call IDASolve (normal mode), print selected output. */
370
371 for (iout = 1; iout <= NOUT; iout++) {
372
373 retval = IDASolve(ida_mem, tout, &tret, cc, cp, IDA_NORMAL);
374 if (check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
375
376 PrintOutput(ida_mem, cc, tret, webdata, comm);
377
378 if (iout < 3) tout *= TMULT;
379 else tout += TADD;
380
381 }
382
383 /* On PE 0, print final set of statistics. */
384 if (thispe == 0) PrintFinalStats(ida_mem);
385
386 /* Free memory. */
387
388 N_VDestroy(cc);
389 N_VDestroy(cp);
390 N_VDestroy(id);
391
392 IDAFree(&ida_mem);
393 SUNLinSolFree(LS);
394
395 FreeUserData(webdata);
396
397 MPI_Finalize();
398
399 return(0);
400
401 }
402
403 /*
404 *--------------------------------------------------------------------
405 * PRIVATE FUNCTIONS
406 *--------------------------------------------------------------------
407 */
408
409 /*
410 * AllocUserData: Allocate memory for data structure of type UserData.
411 */
412
AllocUserData(MPI_Comm comm,sunindextype local_N,sunindextype SystemSize)413 static UserData AllocUserData(MPI_Comm comm, sunindextype local_N, sunindextype SystemSize)
414 {
415 int ix, jy;
416 UserData webdata;
417
418 webdata = (UserData) malloc(sizeof *webdata);
419
420 webdata->rates = N_VNew_Parallel(comm, local_N, SystemSize);
421
422 for (ix = 0; ix < MXSUB; ix++) {
423 for (jy = 0; jy < MYSUB; jy++) {
424 (webdata->PP)[ix][jy] = newDenseMat(NUM_SPECIES, NUM_SPECIES);
425 (webdata->pivot)[ix][jy] = newIndexArray(NUM_SPECIES);
426 }
427 }
428
429 webdata->acoef = newDenseMat(NUM_SPECIES, NUM_SPECIES);
430 webdata->ewt = N_VNew_Parallel(comm, local_N, SystemSize);
431 return(webdata);
432
433 }
434
435 /*
436 * InitUserData: Load problem constants in webdata (of type UserData).
437 */
438
InitUserData(UserData webdata,int thispe,int npes,MPI_Comm comm)439 static void InitUserData(UserData webdata, int thispe, int npes,
440 MPI_Comm comm)
441 {
442 int i, j, np;
443 realtype *a1,*a2, *a3, *a4, dx2, dy2, **acoef, *bcoef, *cox, *coy;
444
445 webdata->jysub = thispe / NPEX;
446 webdata->ixsub = thispe - (webdata->jysub)*NPEX;
447 webdata->mxsub = MXSUB;
448 webdata->mysub = MYSUB;
449 webdata->npex = NPEX;
450 webdata->npey = NPEY;
451 webdata->ns = NUM_SPECIES;
452 webdata->np = NPREY;
453 webdata->dx = AX/(MX-1);
454 webdata->dy = AY/(MY-1);
455 webdata->thispe = thispe;
456 webdata->npes = npes;
457 webdata->nsmxsub = MXSUB * NUM_SPECIES;
458 webdata->nsmxsub2 = (MXSUB+2)*NUM_SPECIES;
459 webdata->comm = comm;
460
461 /* Set up the coefficients a and b plus others found in the equations. */
462 np = webdata->np;
463 dx2 = (webdata->dx)*(webdata->dx); dy2 = (webdata->dy)*(webdata->dy);
464
465 acoef = webdata->acoef;
466 bcoef = webdata->bcoef;
467 cox = webdata->cox;
468 coy = webdata->coy;
469
470 for (i = 0; i < np; i++) {
471 a1 = &(acoef[i][np]);
472 a2 = &(acoef[i+np][0]);
473 a3 = &(acoef[i][0]);
474 a4 = &(acoef[i+np][np]);
475
476 /* Fill in the portion of acoef in the four quadrants, row by row. */
477 for (j = 0; j < np; j++) {
478 *a1++ = -GG;
479 *a2++ = EE;
480 *a3++ = ZERO;
481 *a4++ = ZERO;
482 }
483
484 /* Reset the diagonal elements of acoef to -AA. */
485 acoef[i][i] = -AA; acoef[i+np][i+np] = -AA;
486
487 /* Set coefficients for b and diffusion terms. */
488 bcoef[i] = BB; bcoef[i+np] = -BB;
489 cox[i] = DPREY/dx2; cox[i+np] = DPRED/dx2;
490 coy[i] = DPREY/dy2; coy[i+np] = DPRED/dy2;
491 }
492
493 }
494
495 /*
496 * FreeUserData: Free webdata memory.
497 */
498
FreeUserData(UserData webdata)499 static void FreeUserData(UserData webdata)
500 {
501 int ix, jy;
502
503 for (ix = 0; ix < MXSUB; ix++) {
504 for (jy = 0; jy < MYSUB; jy++) {
505 destroyMat((webdata->PP)[ix][jy]);
506 destroyArray((webdata->pivot)[ix][jy]);
507 }
508 }
509
510 destroyMat(webdata->acoef);
511 N_VDestroy(webdata->rates);
512 N_VDestroy(webdata->ewt);
513 free(webdata);
514
515 }
516
517 /*
518 * SetInitialProfiles: Set initial conditions in cc, cp, and id.
519 * A polynomial profile is used for the prey cc values, and a constant
520 * (1.0e5) is loaded as the initial guess for the predator cc values.
521 * The id values are set to 1 for the prey and 0 for the predators.
522 * The prey cp values are set according to the given system, and
523 * the predator cp values are set to zero.
524 */
525
SetInitialProfiles(N_Vector cc,N_Vector cp,N_Vector id,N_Vector res,UserData webdata)526 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
527 N_Vector res, UserData webdata)
528 {
529 int ixsub, jysub, mxsub, mysub, np, ix, jy, is;
530 realtype *cxy, *idxy, *cpxy, dx, dy, xx, yy, xyfactor;
531
532 ixsub = webdata->ixsub;
533 jysub = webdata->jysub;
534 mxsub = webdata->mxsub;
535 mysub = webdata->mxsub;
536 dx = webdata->dx;
537 dy = webdata->dy;
538 np = webdata->np;
539
540 /* Loop over grid, load cc values and id values. */
541 for (jy = 0; jy < mysub; jy++) {
542 yy = (jy + jysub*mysub) * dy;
543 for (ix = 0; ix < mxsub; ix++) {
544 xx = (ix + ixsub*mxsub) * dx;
545 xyfactor = RCONST(16.0)*xx*(ONE - xx)*yy*(ONE - yy);
546 xyfactor *= xyfactor;
547
548 cxy = IJ_Vptr(cc,ix,jy);
549 idxy = IJ_Vptr(id,ix,jy);
550 for (is = 0; is < NUM_SPECIES; is++) {
551 if (is < np) { cxy[is] = RCONST(10.0) + (realtype)(is+1)*xyfactor; idxy[is] = ONE; }
552 else { cxy[is] = 1.0e5; idxy[is] = ZERO; }
553 }
554 }
555 }
556
557 /* Set c' for the prey by calling the residual function with cp = 0. */
558 N_VConst(ZERO, cp);
559 resweb(ZERO, cc, cp, res, webdata);
560 N_VScale(-ONE, res, cp);
561
562 /* Set c' for predators to 0. */
563 for (jy = 0; jy < mysub; jy++) {
564 for (ix = 0; ix < mxsub; ix++) {
565 cpxy = IJ_Vptr(cp,ix,jy);
566 for (is = np; is < NUM_SPECIES; is++) cpxy[is] = ZERO;
567 }
568 }
569 }
570
571 /*
572 * Print first lines of output (problem description)
573 */
574
PrintHeader(sunindextype SystemSize,int maxl,realtype rtol,realtype atol)575 static void PrintHeader(sunindextype SystemSize, int maxl,
576 realtype rtol, realtype atol)
577 {
578 printf("\nidaFoodWeb_kry_p: Predator-prey DAE parallel example problem for IDA \n\n");
579 printf("Number of species ns: %d", NUM_SPECIES);
580 printf(" Mesh dimensions: %d x %d", MX, MY);
581 printf(" Total system size: %ld\n", (long int) SystemSize);
582 printf("Subgrid dimensions: %d x %d", MXSUB, MYSUB);
583 printf(" Processor array: %d x %d\n", NPEX, NPEY);
584 #if defined(SUNDIALS_EXTENDED_PRECISION)
585 printf("Tolerance parameters: rtol = %Lg atol = %Lg\n", rtol, atol);
586 #elif defined(SUNDIALS_DOUBLE_PRECISION)
587 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
588 #else
589 printf("Tolerance parameters: rtol = %g atol = %g\n", rtol, atol);
590 #endif
591 printf("Linear solver: SUNLinSol_SPGMR Max. Krylov dimension maxl: %d\n", maxl);
592 printf("Preconditioner: block diagonal, block size ns,");
593 printf(" via difference quotients\n");
594 printf("CalcIC called to correct initial predator concentrations \n\n");
595
596 printf("-----------------------------------------------------------\n");
597 printf(" t bottom-left top-right");
598 printf(" | nst k h\n");
599 printf("-----------------------------------------------------------\n\n");
600 }
601
602 /*
603 * PrintOutput: Print output values at output time t = tt.
604 * Selected run statistics are printed. Then values of c1 and c2
605 * are printed for the bottom left and top right grid points only.
606 * (NOTE: This routine is specific to the case NUM_SPECIES = 2.)
607 */
608
PrintOutput(void * ida_mem,N_Vector cc,realtype tt,UserData webdata,MPI_Comm comm)609 static void PrintOutput(void *ida_mem, N_Vector cc, realtype tt,
610 UserData webdata, MPI_Comm comm)
611 {
612 MPI_Status status;
613 realtype *cdata, clast[2], hused;
614 long int nst;
615 int i, kused, retval, thispe, npelast, ilast;;
616
617 thispe = webdata->thispe;
618 npelast = webdata->npes - 1;
619 cdata = N_VGetArrayPointer(cc);
620
621 /* Send conc. at top right mesh point from PE npes-1 to PE 0. */
622 if (thispe == npelast) {
623 ilast = NUM_SPECIES*MXSUB*MYSUB - 2;
624 if (npelast != 0)
625 MPI_Send(&cdata[ilast], 2, MPI_SUNREALTYPE, 0, 0, comm);
626 else { clast[0] = cdata[ilast]; clast[1] = cdata[ilast+1]; }
627 }
628
629 /* On PE 0, receive conc. at top right from PE npes - 1.
630 Then print performance data and sampled solution values. */
631
632 if (thispe == 0) {
633
634 if (npelast != 0)
635 MPI_Recv(&clast[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
636
637 retval = IDAGetLastOrder(ida_mem, &kused);
638 check_retval(&retval, "IDAGetLastOrder", 1, thispe);
639 retval = IDAGetNumSteps(ida_mem, &nst);
640 check_retval(&retval, "IDAGetNumSteps", 1, thispe);
641 retval = IDAGetLastStep(ida_mem, &hused);
642 check_retval(&retval, "IDAGetLastStep", 1, thispe);
643
644 #if defined(SUNDIALS_EXTENDED_PRECISION)
645 printf("%8.2Le %12.4Le %12.4Le | %3ld %1d %12.4Le\n",
646 tt, cdata[0], clast[0], nst, kused, hused);
647 for (i=1;i<NUM_SPECIES;i++)
648 printf(" %12.4Le %12.4Le |\n",cdata[i],clast[i]);
649 #elif defined(SUNDIALS_DOUBLE_PRECISION)
650 printf("%8.2e %12.4e %12.4e | %3ld %1d %12.4e\n",
651 tt, cdata[0], clast[0], nst, kused, hused);
652 for (i=1;i<NUM_SPECIES;i++)
653 printf(" %12.4e %12.4e |\n",cdata[i],clast[i]);
654 #else
655 printf("%8.2e %12.4e %12.4e | %3ld %1d %12.4e\n",
656 tt, cdata[0], clast[0], nst, kused, hused);
657 for (i=1;i<NUM_SPECIES;i++)
658 printf(" %12.4e %12.4e |\n",cdata[i],clast[i]);
659 #endif
660 printf("\n");
661
662 }
663 }
664
665 /*
666 * PrintFinalStats: Print final run data contained in iopt.
667 */
668
PrintFinalStats(void * ida_mem)669 static void PrintFinalStats(void *ida_mem)
670 {
671 long int nst, nre, nreLS, netf, ncfn, nni, ncfl, nli, npe, nps;
672 int retval;
673
674 retval = IDAGetNumSteps(ida_mem, &nst);
675 check_retval(&retval, "IDAGetNumSteps", 1, 0);
676 retval = IDAGetNumResEvals(ida_mem, &nre);
677 check_retval(&retval, "IDAGetNumResEvals", 1, 0);
678 retval = IDAGetNumErrTestFails(ida_mem, &netf);
679 check_retval(&retval, "IDAGetNumErrTestFails", 1, 0);
680 retval = IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
681 check_retval(&retval, "IDAGetNumNonlinSolvConvFails", 1, 0);
682 retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
683 check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, 0);
684
685 retval = IDAGetNumLinConvFails(ida_mem, &ncfl);
686 check_retval(&retval, "IDAGetNumLinConvFails", 1, 0);
687 retval = IDAGetNumLinIters(ida_mem, &nli);
688 check_retval(&retval, "IDAGetNumLinIters", 1, 0);
689 retval = IDAGetNumPrecEvals(ida_mem, &npe);
690 check_retval(&retval, "IDAGetNumPrecEvals", 1, 0);
691 retval = IDAGetNumPrecSolves(ida_mem, &nps);
692 check_retval(&retval, "IDAGetNumPrecSolves", 1, 0);
693 retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
694 check_retval(&retval, "IDAGetNumLinResEvals", 1, 0);
695
696 printf("-----------------------------------------------------------\n");
697 printf("\nFinal statistics: \n\n");
698
699 printf("Number of steps = %ld\n", nst);
700 printf("Number of residual evaluations = %ld\n", nre+nreLS);
701 printf("Number of nonlinear iterations = %ld\n", nni);
702 printf("Number of error test failures = %ld\n", netf);
703 printf("Number of nonlinear conv. failures = %ld\n\n", ncfn);
704
705 printf("Number of linear iterations = %ld\n", nli);
706 printf("Number of linear conv. failures = %ld\n\n", ncfl);
707
708 printf("Number of preconditioner setups = %ld\n", npe);
709 printf("Number of preconditioner solves = %ld\n", nps);
710
711 }
712
713 /*
714 * Check function return value...
715 * opt == 0 means SUNDIALS function allocates memory so check if
716 * returned NULL pointer
717 * opt == 1 means SUNDIALS function returns an integer value so check if
718 * retval < 0
719 * opt == 2 means function allocates memory so check if returned
720 * NULL pointer
721 */
722
check_retval(void * returnvalue,const char * funcname,int opt,int id)723 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
724 {
725 int *retval;
726
727 if (opt == 0 && returnvalue == NULL) {
728 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
729 fprintf(stderr,
730 "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
731 id, funcname);
732 return(1);
733 } else if (opt == 1) {
734 /* Check if retval < 0 */
735 retval = (int *) returnvalue;
736 if (*retval < 0) {
737 fprintf(stderr,
738 "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
739 id, funcname, *retval);
740 return(1);
741 }
742 } else if (opt == 2 && returnvalue == NULL) {
743 /* Check if function returned NULL pointer - no memory allocated */
744 fprintf(stderr,
745 "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
746 id, funcname);
747 return(1);
748 }
749
750 return(0);
751 }
752
753 /*
754 *--------------------------------------------------------------------
755 * FUNCTIONS CALLED BY IDA & SUPPORTING FUNCTIONS
756 *--------------------------------------------------------------------
757 */
758
759 /*
760 * resweb: System residual function for predator-prey system.
761 * To compute the residual function F, this routine calls:
762 * rescomm, for needed communication, and then
763 * reslocal, for computation of the residuals on this processor.
764 */
765
resweb(realtype tt,N_Vector cc,N_Vector cp,N_Vector res,void * user_data)766 static int resweb(realtype tt, N_Vector cc, N_Vector cp,
767 N_Vector res, void *user_data)
768 {
769 int retval;
770 UserData webdata;
771
772 webdata = (UserData)user_data;
773
774 /* Call rescomm to do inter-processor communication. */
775 retval = rescomm(cc, cp, webdata);
776
777 /* Call reslocal to calculate the local portion of residual vector. */
778 retval = reslocal(tt, cc, cp, res, webdata);
779
780 return(retval);
781
782 }
783
784 /*
785 * rescomm: Communication routine in support of resweb.
786 * This routine performs all inter-processor communication of components
787 * of the cc vector needed to calculate F, namely the components at all
788 * interior subgrid boundaries (ghost cell data). It loads this data
789 * into a work array cext (the local portion of c, extended).
790 * The message-passing uses blocking sends, non-blocking receives,
791 * and receive-waiting, in routines BRecvPost, BSend, BRecvWait.
792 */
793
rescomm(N_Vector cc,N_Vector cp,void * user_data)794 static int rescomm(N_Vector cc, N_Vector cp, void *user_data)
795 {
796
797 UserData webdata;
798 realtype *cdata, *cext, buffer[2*NUM_SPECIES*MYSUB];
799 int thispe, ixsub, jysub, nsmxsub, nsmysub;
800 MPI_Comm comm;
801 MPI_Request request[4];
802
803 webdata = (UserData) user_data;
804 cdata = N_VGetArrayPointer(cc);
805
806 /* Get comm, thispe, subgrid indices, data sizes, extended array cext. */
807 comm = webdata->comm;
808 thispe = webdata->thispe;
809 ixsub = webdata->ixsub;
810 jysub = webdata->jysub;
811 cext = webdata->cext;
812 nsmxsub = webdata->nsmxsub;
813 nsmysub = ((int)(webdata->ns))*(webdata->mysub);
814
815 /* Start receiving boundary data from neighboring PEs. */
816 BRecvPost(comm, request, thispe, ixsub, jysub, nsmxsub, nsmysub,
817 cext, buffer);
818
819 /* Send data from boundary of local grid to neighboring PEs. */
820 BSend(comm, thispe, ixsub, jysub, nsmxsub, nsmysub, cdata);
821
822 /* Finish receiving boundary data from neighboring PEs. */
823 BRecvWait(request, ixsub, jysub, nsmxsub, cext, buffer);
824
825 return(0);
826
827 }
828
829 /*
830 * BSend: Send boundary data to neighboring PEs.
831 * This routine sends components of cc from internal subgrid boundaries
832 * to the appropriate neighbor PEs.
833 */
834
BSend(MPI_Comm comm,int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cdata[])835 static void BSend(MPI_Comm comm, int my_pe, int ixsub, int jysub,
836 int dsizex, int dsizey, realtype cdata[])
837 {
838 int i;
839 int ly, offsetc, offsetbuf;
840 realtype bufleft[NUM_SPECIES*MYSUB], bufright[NUM_SPECIES*MYSUB];
841
842 /* If jysub > 0, send data from bottom x-line of cc. */
843 if (jysub != 0)
844 MPI_Send(&cdata[0], dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
845
846 /* If jysub < NPEY-1, send data from top x-line of cc. */
847 if (jysub != NPEY-1) {
848 offsetc = (MYSUB-1)*dsizex;
849 MPI_Send(&cdata[offsetc], dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
850 }
851
852 /* If ixsub > 0, send data from left y-line of cc (via bufleft). */
853 if (ixsub != 0) {
854 for (ly = 0; ly < MYSUB; ly++) {
855 offsetbuf = ly*NUM_SPECIES;
856 offsetc = ly*dsizex;
857 for (i = 0; i < NUM_SPECIES; i++)
858 bufleft[offsetbuf+i] = cdata[offsetc+i];
859 }
860 MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
861 }
862
863 /* If ixsub < NPEX-1, send data from right y-line of cc (via bufright). */
864 if (ixsub != NPEX-1) {
865 for (ly = 0; ly < MYSUB; ly++) {
866 offsetbuf = ly*NUM_SPECIES;
867 offsetc = offsetbuf*MXSUB + (MXSUB-1)*NUM_SPECIES;
868 for (i = 0; i < NUM_SPECIES; i++)
869 bufright[offsetbuf+i] = cdata[offsetc+i];
870 }
871 MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
872 }
873
874 }
875
876 /*
877 * BRecvPost: Start receiving boundary data from neighboring PEs.
878 * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
879 * should be passed to both the BRecvPost and BRecvWait functions, and
880 * should not be manipulated between the two calls.
881 * (2) request should have 4 entries, and is also passed in both calls.
882 */
883
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cext[],realtype buffer[])884 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
885 int ixsub, int jysub,
886 int dsizex, int dsizey,
887 realtype cext[], realtype buffer[])
888 {
889 int offsetce;
890 /* Have bufleft and bufright use the same buffer. */
891 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
892
893 /* If jysub > 0, receive data for bottom x-line of cext. */
894 if (jysub != 0)
895 MPI_Irecv(&cext[NUM_SPECIES], dsizex, MPI_SUNREALTYPE,
896 my_pe-NPEX, 0, comm, &request[0]);
897
898 /* If jysub < NPEY-1, receive data for top x-line of cext. */
899 if (jysub != NPEY-1) {
900 offsetce = NUM_SPECIES*(1 + (MYSUB+1)*(MXSUB+2));
901 MPI_Irecv(&cext[offsetce], dsizex, MPI_SUNREALTYPE,
902 my_pe+NPEX, 0, comm, &request[1]);
903 }
904
905 /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
906 if (ixsub != 0) {
907 MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
908 my_pe-1, 0, comm, &request[2]);
909 }
910
911 /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
912 if (ixsub != NPEX-1) {
913 MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
914 my_pe+1, 0, comm, &request[3]);
915 }
916
917 }
918
919 /*
920 * BRecvWait: Finish receiving boundary data from neighboring PEs.
921 * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
922 * should be passed to both the BRecvPost and BRecvWait functions, and
923 * should not be manipulated between the two calls.
924 * (2) request should have 4 entries, and is also passed in both calls.
925 */
926
BRecvWait(MPI_Request request[],int ixsub,int jysub,int dsizex,realtype cext[],realtype buffer[])927 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
928 int dsizex, realtype cext[], realtype buffer[])
929 {
930 int i;
931 int ly, dsizex2, offsetce, offsetbuf;
932 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
933 MPI_Status status;
934
935 dsizex2 = dsizex + 2*NUM_SPECIES;
936
937 /* If jysub > 0, receive data for bottom x-line of cext. */
938 if (jysub != 0)
939 MPI_Wait(&request[0],&status);
940
941 /* If jysub < NPEY-1, receive data for top x-line of cext. */
942 if (jysub != NPEY-1)
943 MPI_Wait(&request[1],&status);
944
945 /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
946 if (ixsub != 0) {
947 MPI_Wait(&request[2],&status);
948
949 /* Copy the buffer to cext */
950 for (ly = 0; ly < MYSUB; ly++) {
951 offsetbuf = ly*NUM_SPECIES;
952 offsetce = (ly+1)*dsizex2;
953 for (i = 0; i < NUM_SPECIES; i++)
954 cext[offsetce+i] = bufleft[offsetbuf+i];
955 }
956 }
957
958 /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
959 if (ixsub != NPEX-1) {
960 MPI_Wait(&request[3],&status);
961
962 /* Copy the buffer to cext */
963 for (ly = 0; ly < MYSUB; ly++) {
964 offsetbuf = ly*NUM_SPECIES;
965 offsetce = (ly+2)*dsizex2 - NUM_SPECIES;
966 for (i = 0; i < NUM_SPECIES; i++)
967 cext[offsetce+i] = bufright[offsetbuf+i];
968 }
969 }
970 }
971
972 /* Define lines are for ease of readability in the following functions. */
973
974 #define mxsub (webdata->mxsub)
975 #define mysub (webdata->mysub)
976 #define npex (webdata->npex)
977 #define npey (webdata->npey)
978 #define ixsub (webdata->ixsub)
979 #define jysub (webdata->jysub)
980 #define nsmxsub (webdata->nsmxsub)
981 #define nsmxsub2 (webdata->nsmxsub2)
982 #define np (webdata->np)
983 #define dx (webdata->dx)
984 #define dy (webdata->dy)
985 #define cox (webdata->cox)
986 #define coy (webdata->coy)
987 #define rhs (webdata->rhs)
988 #define cext (webdata->cext)
989 #define rates (webdata->rates)
990 #define ns (webdata->ns)
991 #define acoef (webdata->acoef)
992 #define bcoef (webdata->bcoef)
993
994 /*
995 * reslocal: Compute res = F(t,cc,cp).
996 * This routine assumes that all inter-processor communication of data
997 * needed to calculate F has already been done. Components at interior
998 * subgrid boundaries are assumed to be in the work array cext.
999 * The local portion of the cc vector is first copied into cext.
1000 * The exterior Neumann boundary conditions are explicitly handled here
1001 * by copying data from the first interior mesh line to the ghost cell
1002 * locations in cext. Then the reaction and diffusion terms are
1003 * evaluated in terms of the cext array, and the residuals are formed.
1004 * The reaction terms are saved separately in the vector webdata->rates
1005 * for use by the preconditioner setup routine.
1006 */
1007
reslocal(realtype tt,N_Vector cc,N_Vector cp,N_Vector res,void * user_data)1008 static int reslocal(realtype tt, N_Vector cc, N_Vector cp, N_Vector res,
1009 void *user_data)
1010 {
1011 realtype *cdata, *ratesxy, *cpxy, *resxy,
1012 xx, yy, dcyli, dcyui, dcxli, dcxui;
1013 int ix, jy, is, i, locc, ylocce, locce;
1014 UserData webdata;
1015
1016 webdata = (UserData) user_data;
1017
1018 /* Get data pointers, subgrid data, array sizes, work array cext. */
1019 cdata = N_VGetArrayPointer(cc);
1020
1021 /* Copy local segment of cc vector into the working extended array cext. */
1022 locc = 0;
1023 locce = nsmxsub2 + NUM_SPECIES;
1024 for (jy = 0; jy < mysub; jy++) {
1025 for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
1026 locc = locc + nsmxsub;
1027 locce = locce + nsmxsub2;
1028 }
1029
1030 /* To facilitate homogeneous Neumann boundary conditions, when this is
1031 a boundary PE, copy data from the first interior mesh line of cc to cext. */
1032
1033 /* If jysub = 0, copy x-line 2 of cc to cext. */
1034 if (jysub == 0)
1035 { for (i = 0; i < nsmxsub; i++) cext[NUM_SPECIES+i] = cdata[nsmxsub+i]; }
1036
1037 /* If jysub = npey-1, copy x-line mysub-1 of cc to cext. */
1038 if (jysub == npey-1) {
1039 locc = (mysub-2)*nsmxsub;
1040 locce = (mysub+1)*nsmxsub2 + NUM_SPECIES;
1041 for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
1042 }
1043
1044 /* If ixsub = 0, copy y-line 2 of cc to cext. */
1045 if (ixsub == 0) {
1046 for (jy = 0; jy < mysub; jy++) {
1047 locc = jy*nsmxsub + NUM_SPECIES;
1048 locce = (jy+1)*nsmxsub2;
1049 for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1050 }
1051 }
1052
1053 /* If ixsub = npex-1, copy y-line mxsub-1 of cc to cext. */
1054 if (ixsub == npex-1) {
1055 for (jy = 0; jy < mysub; jy++) {
1056 locc = (jy+1)*nsmxsub - 2*NUM_SPECIES;
1057 locce = (jy+2)*nsmxsub2 - NUM_SPECIES;
1058 for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1059 }
1060 }
1061
1062 /* Loop over all grid points, setting local array rates to right-hand sides.
1063 Then set res values appropriately for prey/predator components of F. */
1064 for (jy = 0; jy < mysub; jy++) {
1065 ylocce = (jy+1)*nsmxsub2;
1066 yy = (jy+jysub*mysub)*dy;
1067
1068 for (ix = 0; ix < mxsub; ix++) {
1069 locce = ylocce + (ix+1)*NUM_SPECIES;
1070 xx = (ix + ixsub*mxsub)*dx;
1071
1072 ratesxy = IJ_Vptr(rates,ix,jy);
1073 WebRates(xx, yy, &(cext[locce]), ratesxy, webdata);
1074
1075 resxy = IJ_Vptr(res,ix,jy);
1076 cpxy = IJ_Vptr(cp,ix,jy);
1077
1078 for (is = 0; is < NUM_SPECIES; is++) {
1079 dcyli = cext[locce+is] - cext[locce+is-nsmxsub2];
1080 dcyui = cext[locce+is+nsmxsub2] - cext[locce+is];
1081
1082 dcxli = cext[locce+is] - cext[locce+is-NUM_SPECIES];
1083 dcxui = cext[locce+is+NUM_SPECIES] - cext[locce+is];
1084
1085 rhs[is] = cox[is]*(dcxui-dcxli) + coy[is]*(dcyui-dcyli) + ratesxy[is];
1086
1087 if (is < np) resxy[is] = cpxy[is] - rhs[is];
1088 else resxy[is] = - rhs[is];
1089
1090 } /* End of is (species) loop. */
1091 } /* End of ix loop. */
1092 } /* End of jy loop. */
1093
1094 return(0);
1095
1096 }
1097
1098 /*
1099 * WebRates: Evaluate reaction rates at a given spatial point.
1100 * At a given (x,y), evaluate the array of ns reaction terms R.
1101 */
1102
WebRates(realtype xx,realtype yy,realtype * cxy,realtype * ratesxy,UserData webdata)1103 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
1104 UserData webdata)
1105 {
1106 int is;
1107 realtype fac;
1108
1109 for (is = 0; is < NUM_SPECIES; is++)
1110 ratesxy[is] = dotprod(NUM_SPECIES, cxy, acoef[is]);
1111
1112 fac = ONE + ALPHA*xx*yy + BETA*sin(FOURPI*xx)*sin(FOURPI*yy);
1113
1114 for (is = 0; is < NUM_SPECIES; is++)
1115 ratesxy[is] = cxy[is]*( bcoef[is]*fac + ratesxy[is] );
1116
1117 }
1118
1119 /*
1120 * dotprod: dot product routine for realtype arrays, for use by WebRates.
1121 */
1122
dotprod(int size,realtype * x1,realtype * x2)1123 static realtype dotprod(int size, realtype *x1, realtype *x2)
1124 {
1125 int i;
1126 realtype *xx1, *xx2, temp = ZERO;
1127
1128 xx1 = x1; xx2 = x2;
1129 for (i = 0; i < size; i++) temp += (*xx1++) * (*xx2++);
1130 return(temp);
1131
1132 }
1133
1134 /*
1135 * Preconbd: Preconditioner setup routine.
1136 * This routine generates and preprocesses the block-diagonal
1137 * preconditoner PP. At each spatial point, a block of PP is computed
1138 * by way of difference quotients on the reaction rates R.
1139 * The base value of R are taken from webdata->rates, as set by webres.
1140 * Each block is LU-factored, for later solution of the linear systems.
1141 */
1142
Precondbd(realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,realtype cj,void * user_data)1143 static int Precondbd(realtype tt, N_Vector cc, N_Vector cp,
1144 N_Vector rr, realtype cj, void *user_data)
1145 {
1146 int retval, thispe;
1147 sunindextype ret;
1148 realtype uround;
1149 realtype xx, yy, *cxy, *ewtxy, cctemp, **Pxy, *ratesxy, *Pxycol, *cpxy;
1150 realtype inc, sqru, fac, perturb_rates[NUM_SPECIES];
1151 int is, js, ix, jy;
1152 UserData webdata;
1153 void *ida_mem;
1154 N_Vector ewt;
1155 realtype hh;
1156
1157 webdata = (UserData)user_data;
1158 uround = UNIT_ROUNDOFF;
1159 sqru = sqrt(uround);
1160 thispe = webdata->thispe;
1161
1162 ida_mem = webdata->ida_mem;
1163 ewt = webdata->ewt;
1164 retval = IDAGetErrWeights(ida_mem, ewt);
1165 check_retval(&retval, "IDAGetErrWeights", 1, thispe);
1166 retval = IDAGetCurrentStep(ida_mem, &hh);
1167 check_retval(&retval, "IDAGetCurrentStep", 1, thispe);
1168
1169 for (jy = 0; jy < mysub; jy++) {
1170 yy = (jy + jysub*mysub)*dy;
1171
1172 for (ix = 0; ix < mxsub; ix++) {
1173 xx = (ix+ ixsub*mxsub)*dx;
1174 Pxy = (webdata->PP)[ix][jy];
1175 cxy = IJ_Vptr(cc,ix,jy);
1176 cpxy = IJ_Vptr(cp,ix,jy);
1177 ewtxy= IJ_Vptr(ewt,ix,jy);
1178 ratesxy = IJ_Vptr(rates,ix,jy);
1179
1180 for (js = 0; js < ns; js++) {
1181 inc = sqru*(MAX(fabs(cxy[js]), MAX(hh*fabs(cpxy[js]), ONE/ewtxy[js])));
1182 cctemp = cxy[js]; /* Save the (js,ix,jy) element of cc. */
1183 cxy[js] += inc; /* Perturb the (js,ix,jy) element of cc. */
1184 fac = -ONE/inc;
1185
1186 WebRates(xx, yy, cxy, perturb_rates, webdata);
1187
1188 Pxycol = Pxy[js];
1189
1190 for (is = 0; is < ns; is++)
1191 Pxycol[is] = (perturb_rates[is] - ratesxy[is])*fac;
1192
1193 if (js < np) Pxycol[js] += cj; /* Add partial with respect to cp. */
1194
1195 cxy[js] = cctemp; /* Restore (js,ix,jy) element of cc. */
1196
1197 } /* End of js loop. */
1198
1199 /* Do LU decomposition of matrix block for grid point (ix,jy). */
1200 ret = denseGETRF(Pxy, ns, ns, (webdata->pivot)[ix][jy]);
1201
1202 if (ret != 0) return(1);
1203
1204 } /* End of ix loop. */
1205 } /* End of jy loop. */
1206
1207 return(0);
1208
1209 }
1210
1211 /*
1212 * PSolvebd: Preconditioner solve routine.
1213 * This routine applies the LU factorization of the blocks of the
1214 * preconditioner PP, to compute the solution of PP * zvec = rvec.
1215 */
1216
PSolvebd(realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,N_Vector rvec,N_Vector zvec,realtype cj,realtype delta,void * user_data)1217 static int PSolvebd(realtype tt, N_Vector cc, N_Vector cp,
1218 N_Vector rr, N_Vector rvec, N_Vector zvec,
1219 realtype cj, realtype delta, void *user_data)
1220 {
1221 realtype **Pxy, *zxy;
1222 sunindextype *pivot, ix, jy;
1223 UserData webdata;
1224
1225 webdata = (UserData)user_data;
1226
1227 N_VScale(ONE, rvec, zvec);
1228
1229 /* Loop through subgrid and apply preconditioner factors at each point. */
1230 for (ix = 0; ix < mxsub; ix++) {
1231 for (jy = 0; jy < mysub; jy++) {
1232
1233 /* For grid point (ix,jy), do backsolve on local vector.
1234 zxy is the address of the local portion of zvec, and
1235 Pxy is the address of the corresponding block of PP. */
1236 zxy = IJ_Vptr(zvec,ix,jy);
1237 Pxy = (webdata->PP)[ix][jy];
1238 pivot = (webdata->pivot)[ix][jy];
1239 denseGETRS(Pxy, ns, pivot, zxy);
1240
1241 } /* End of jy loop. */
1242 } /* End of ix loop. */
1243
1244 return(0);
1245
1246 }
1247