1 /*
2  * -----------------------------------------------------------------
3  * Programmer(s): Daniel R. Reynolds @ SMU
4  *         Allan Taylor, Alan Hindmarsh and Radu Serban @ LLNL
5  * -----------------------------------------------------------------
6  * SUNDIALS Copyright Start
7  * Copyright (c) 2002-2021, Lawrence Livermore National Security
8  * and Southern Methodist University.
9  * All rights reserved.
10  *
11  * See the top-level LICENSE and NOTICE files for details.
12  *
13  * SPDX-License-Identifier: BSD-3-Clause
14  * SUNDIALS Copyright End
15  * -----------------------------------------------------------------
16  * Example program for IDA: Food web, parallel, GMRES, user
17  * preconditioner.
18  *
19  * This example program for IDA uses SUNLinSol_SPGMR as the linear solver.
20  * It is written for a parallel computer system and uses a
21  * block-diagonal preconditioner (setup and solve routines) for the
22  * SUNLinSol_SPGMR package.
23  *
24  * The mathematical problem solved in this example is a DAE system
25  * that arises from a system of partial differential equations after
26  * spatial discretization. The PDE system is a food web population
27  * model, with predator-prey interaction and diffusion on the unit
28  * square in two dimensions. The dependent variable vector is:
29  *
30  *         1   2         ns
31  *   c = (c , c ,  ..., c  ) , ns = 2 * np
32  *
33  * and the PDE's are as follows:
34  *
35  *     i             i      i
36  *   dc /dt = d(i)*(c    + c  )  +  R (x,y,c)   (i = 1,...,np)
37  *                   xx     yy       i
38  *
39  *              i      i
40  *   0 = d(i)*(c   +  c  )  +  R  (x,y,c)   (i = np+1,...,ns)
41  *              xx     yy       i
42  *
43  *   where the reaction terms R are:
44  *
45  *                   i             ns         j
46  *   R  (x,y,c)  =  c  * (b(i)  + sum a(i,j)*c )
47  *    i                           j=1
48  *
49  * The number of species is ns = 2 * np, with the first np being
50  * prey and the last np being predators. The coefficients a(i,j),
51  * b(i), d(i) are:
52  *
53  *   a(i,i) = -AA  (all i)
54  *   a(i,j) = -GG  (i <= np , j >  np)
55  *   a(i,j) =  EE  (i >  np,  j <= np)
56  *   all other a(i,j) = 0
57  *   b(i) = BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y))  (i <= np)
58  *   b(i) =-BB*(1+ alpha * x*y + beta*sin(4 pi x)*sin(4 pi y))  (i  > np)
59  *   d(i) = DPREY  (i <= np)
60  *   d(i) = DPRED  (i > np)
61  *
62  * Note: The above equations are written in 1-based indices,
63  * whereas the code has 0-based indices, being written in C.
64  *
65  * The various scalar parameters required are set using '#define'
66  * statements or directly in routine InitUserData. In this program,
67  * np = 1, ns = 2. The boundary conditions are homogeneous Neumann:
68  * normal derivative  =  0.
69  *
70  * A polynomial in x and y is used to set the initial values of the
71  * first np variables (the prey variables) at each x,y location,
72  * while initial values for the remaining (predator) variables are
73  * set to a flat value, which is corrected by IDACalcIC.
74  *
75  * The PDEs are discretized by central differencing on a MX by MY
76  * mesh, and so the system size Neq is the product
77  * MX * MY * NUM_SPECIES. The system is actually implemented on
78  * submeshes, processor by processor, with an MXSUB by MYSUB mesh
79  * on each of NPEX * NPEY processors.
80  *
81  * The DAE system is solved by IDA using the SUNLinSol_SPGMR linear
82  * solver, which uses the preconditioned GMRES iterative method to
83  * solve linear systems. The precondtioner supplied to SUNLinSol_SPGMR is
84  * the block-diagonal part of the Jacobian with ns by ns blocks
85  * arising from the reaction terms only. Output is printed at
86  * t = 0, .001, .01, .1, .4, .7, 1.
87  * -----------------------------------------------------------------
88  * References:
89  * [1] Peter N. Brown and Alan C. Hindmarsh,
90  *     Reduced Storage Matrix Methods in Stiff ODE systems,
91  *     Journal of Applied Mathematics and Computation, Vol. 31
92  *     (May 1989), pp. 40-91.
93  *
94  * [2] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
95  *     Using Krylov Methods in the Solution of Large-Scale
96  *     Differential-Algebraic Systems, SIAM J. Sci. Comput., 15
97  *     (1994), pp. 1467-1488.
98  *
99  * [3] Peter N. Brown, Alan C. Hindmarsh, and Linda R. Petzold,
100  *     Consistent Initial Condition Calculation for Differential-
101  *     Algebraic Systems, SIAM J. Sci. Comput., 19 (1998),
102  *     pp. 1495-1512.
103  * -----------------------------------------------------------------
104  */
105 
106 #include <stdio.h>
107 #include <stdlib.h>
108 #include <math.h>
109 
110 #include <ida/ida.h>
111 #include <sunlinsol/sunlinsol_spgmr.h>
112 #include <nvector/nvector_parallel.h>
113 #include <sundials/sundials_dense.h>
114 #include <sundials/sundials_types.h>
115 
116 #include <mpi.h>
117 
118 /* helpful macros */
119 
120 #ifndef MAX
121 #define MAX(A, B) ((A) > (B) ? (A) : (B))
122 #endif
123 
124 /* Problem Constants. */
125 
126 #define NPREY       1        /* Number of prey (= number of predators). */
127 #define NUM_SPECIES 2*NPREY
128 
129 #define PI          RCONST(3.1415926535898)   /* pi */
130 #define FOURPI      (RCONST(4.0)*PI)          /* 4 pi */
131 
132 #define MXSUB       10    /* Number of x mesh points per processor subgrid */
133 #define MYSUB       10    /* Number of y mesh points per processor subgrid */
134 #define NPEX        2     /* Number of subgrids in the x direction */
135 #define NPEY        2     /* Number of subgrids in the y direction */
136 #define MX          (MXSUB*NPEX)      /* MX = number of x mesh points */
137 #define MY          (MYSUB*NPEY)      /* MY = number of y mesh points */
138 #define NSMXSUB     (NUM_SPECIES * MXSUB)
139 #define NEQ         (NUM_SPECIES*MX*MY) /* Number of equations in system */
140 #define AA          RCONST(1.0)    /* Coefficient in above eqns. for a */
141 #define EE          RCONST(10000.) /* Coefficient in above eqns. for a */
142 #define GG          RCONST(0.5e-6) /* Coefficient in above eqns. for a */
143 #define BB          RCONST(1.0)    /* Coefficient in above eqns. for b */
144 #define DPREY       RCONST(1.0)    /* Coefficient in above eqns. for d */
145 #define DPRED       RCONST(0.05)   /* Coefficient in above eqns. for d */
146 #define ALPHA       RCONST(50.)    /* Coefficient alpha in above eqns. */
147 #define BETA        RCONST(1000.)  /* Coefficient beta in above eqns. */
148 #define AX          RCONST(1.0)    /* Total range of x variable */
149 #define AY          RCONST(1.0)    /* Total range of y variable */
150 #define RTOL        RCONST(1.e-5)  /*  rtol tolerance */
151 #define ATOL        RCONST(1.e-5)  /*  atol tolerance */
152 #define ZERO        RCONST(0.)     /* 0. */
153 #define ONE         RCONST(1.0)    /* 1. */
154 #define NOUT        6
155 #define TMULT       RCONST(10.0)   /* Multiplier for tout values */
156 #define TADD        RCONST(0.3)    /* Increment for tout values */
157 
158 
159 /* User-defined vector accessor macro IJ_Vptr. */
160 
161 /* IJ_Vptr is defined in order to express the underlying 3-d structure of the
162    dependent variable vector from its underlying 1-d storage (an N_Vector).
163    IJ_Vptr(vv,i,j) returns a pointer to the location in vv corresponding to
164    species index is = 0, x-index ix = i, and y-index jy = j.                */
165 
166 #define IJ_Vptr(vv,i,j) (&NV_Ith_P(vv, (i)*NUM_SPECIES + (j)*NSMXSUB ))
167 
168 /* Type: UserData.  Contains problem constants, preconditioner data, etc. */
169 
170 typedef struct {
171   sunindextype ns;
172   int np, thispe, npes, ixsub, jysub, npex, npey;
173   int mxsub, mysub, nsmxsub, nsmxsub2;
174   realtype dx, dy, **acoef;
175   realtype cox[NUM_SPECIES], coy[NUM_SPECIES], bcoef[NUM_SPECIES],
176     rhs[NUM_SPECIES], cext[(MXSUB+2)*(MYSUB+2)*NUM_SPECIES];
177   MPI_Comm comm;
178   N_Vector rates;
179   realtype **PP[MXSUB][MYSUB];
180   sunindextype *pivot[MXSUB][MYSUB];
181   N_Vector ewt;
182   void *ida_mem;
183 } *UserData;
184 
185 
186 /* Prototypes for user-supplied and supporting functions. */
187 
188 static int resweb(realtype time,
189                   N_Vector cc, N_Vector cp, N_Vector resval,
190                   void *user_data);
191 
192 static int Precondbd(realtype tt, N_Vector cc, N_Vector cp,
193                      N_Vector rr, realtype cj, void *user_data);
194 
195 static int PSolvebd(realtype tt, N_Vector cc, N_Vector cp,
196                     N_Vector rr, N_Vector rvec, N_Vector zvec,
197                     realtype cj, realtype delta, void *user_data);
198 
199 static int rescomm(N_Vector cc, N_Vector cp, void *user_data);
200 
201 static void BSend(MPI_Comm comm, int thispe, int ixsub, int jysub,
202                   int dsizex, int dsizey, realtype carray[]);
203 
204 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int thispe,
205                       int ixsub, int jysub,
206                       int dsizex, int dsizey,
207                       realtype cext[], realtype buffer[]);
208 
209 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
210                       int dsizex, realtype cext[], realtype buffer[]);
211 
212 static int reslocal(realtype tt, N_Vector cc, N_Vector cp, N_Vector res,
213                     void *user_data);
214 
215 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
216                      UserData webdata);
217 
218 static realtype dotprod(int size, realtype *x1, realtype *x2);
219 
220 /* Prototypes for private Helper Functions. */
221 
222 static UserData AllocUserData(MPI_Comm comm, sunindextype local_N,
223                               sunindextype SystemSize);
224 
225 static void InitUserData(UserData webdata, int thispe, int npes,
226                          MPI_Comm comm);
227 
228 static void FreeUserData(UserData webdata);
229 
230 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
231                                N_Vector scrtch, UserData webdata);
232 
233 static void PrintHeader(sunindextype SystemSize, int maxl,
234                         realtype rtol, realtype atol);
235 
236 static void PrintOutput(void *ida_mem, N_Vector cc, realtype time,
237                         UserData webdata, MPI_Comm comm);
238 
239 static void PrintFinalStats(void *ida_mem);
240 
241 static int check_retval(void *returnvalue, const char *funcname, int opt, int id);
242 
243 /*
244  *--------------------------------------------------------------------
245  * MAIN PROGRAM
246  *--------------------------------------------------------------------
247  */
248 
main(int argc,char * argv[])249 int main(int argc, char *argv[])
250 {
251   MPI_Comm comm;
252   void *ida_mem;
253   SUNLinearSolver LS;
254   UserData webdata;
255   sunindextype SystemSize, local_N;
256   realtype rtol, atol, t0, tout, tret;
257   N_Vector cc, cp, res, id;
258   int thispe, npes, maxl, iout, retval;
259 
260   cc = cp = res = id = NULL;
261   webdata = NULL;
262   LS = NULL;
263   ida_mem = NULL;
264 
265   /* Set communicator, and get processor number and total number of PE's. */
266 
267   MPI_Init(&argc, &argv);
268   comm = MPI_COMM_WORLD;
269   MPI_Comm_rank(comm, &thispe);
270   MPI_Comm_size(comm, &npes);
271 
272   if (npes != NPEX*NPEY) {
273     if (thispe == 0)
274       fprintf(stderr,
275               "\nMPI_ERROR(0): npes = %d not equal to NPEX*NPEY = %d\n",
276 	      npes, NPEX*NPEY);
277     MPI_Finalize();
278     return(1);
279   }
280 
281   /* Set local length (local_N) and global length (SystemSize). */
282 
283   local_N = MXSUB*MYSUB*NUM_SPECIES;
284   SystemSize = NEQ;
285 
286   /* Set up user data block webdata. */
287 
288   webdata = AllocUserData(comm, local_N, SystemSize);
289   if (check_retval((void *)webdata, "AllocUserData", 0, thispe)) MPI_Abort(comm, 1);
290 
291   InitUserData(webdata, thispe, npes, comm);
292 
293   /* Create needed vectors, and load initial values.
294      The vector res is used temporarily only.        */
295 
296   cc  = N_VNew_Parallel(comm, local_N, SystemSize);
297   if (check_retval((void *)cc, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
298 
299   cp  = N_VNew_Parallel(comm, local_N, SystemSize);
300   if (check_retval((void *)cp, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
301 
302   res = N_VNew_Parallel(comm, local_N, SystemSize);
303   if (check_retval((void *)res, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
304 
305   id  = N_VNew_Parallel(comm, local_N, SystemSize);
306   if (check_retval((void *)id, "N_VNew_Parallel", 0, thispe)) MPI_Abort(comm, 1);
307 
308   SetInitialProfiles(cc, cp, id, res, webdata);
309 
310   N_VDestroy(res);
311 
312   /* Set remaining inputs to IDAMalloc. */
313 
314   t0 = ZERO;
315   rtol = RTOL;
316   atol = ATOL;
317 
318   /* Call IDACreate and IDAMalloc to initialize IDA.
319      A pointer to IDA problem memory is returned and stored in idamem. */
320 
321   ida_mem = IDACreate();
322   if (check_retval((void *)ida_mem, "IDACreate", 0, thispe)) MPI_Abort(comm, 1);
323 
324   retval = IDASetUserData(ida_mem, webdata);
325   if (check_retval(&retval, "IDASetUserData", 1, thispe)) MPI_Abort(comm, 1);
326 
327   retval = IDASetId(ida_mem, id);
328   if (check_retval(&retval, "IDASetId", 1, thispe)) MPI_Abort(comm, 1);
329 
330   retval = IDAInit(ida_mem, resweb, t0, cc, cp);
331   if (check_retval(&retval, "IDAinit", 1, thispe)) MPI_Abort(comm, 1);
332 
333   retval = IDASStolerances(ida_mem, rtol, atol);
334   if (check_retval(&retval, "IDASStolerances", 1, thispe)) MPI_Abort(comm, 1);
335 
336   webdata->ida_mem = ida_mem;
337 
338   /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver
339      to IDA, and specify the supplied [left] preconditioner routines
340      (Precondbd & PSolvebd).  maxl (Krylov subspace dim.) is set to 16. */
341 
342   maxl = 16;
343   LS = SUNLinSol_SPGMR(cc, PREC_LEFT, maxl);
344   if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe)) MPI_Abort(comm, 1);
345 
346   retval = SUNLinSol_SPGMRSetMaxRestarts(LS, 5);  /* IDA recommends allowing up to 5 restarts */
347   if(check_retval(&retval, "SUNLinSol_SPGMRSetMaxRestarts", 1, thispe)) MPI_Abort(comm, 1);
348 
349   retval = IDASetLinearSolver(ida_mem, LS, NULL);
350   if (check_retval(&retval, "IDASetLinearSolver", 1, thispe))
351     MPI_Abort(comm, 1);
352 
353   retval = IDASetPreconditioner(ida_mem, Precondbd, PSolvebd);
354   if (check_retval(&retval, "IDASetPreconditioner", 1, thispe))
355     MPI_Abort(comm, 1);
356 
357   /* Call IDACalcIC (with default options) to correct the initial values. */
358 
359   tout = RCONST(0.001);
360   retval = IDACalcIC(ida_mem, IDA_YA_YDP_INIT, tout);
361   if (check_retval(&retval, "IDACalcIC", 1, thispe))
362     MPI_Abort(comm, 1);
363 
364   /* On PE 0, print heading, basic parameters, initial values. */
365 
366   if (thispe == 0) PrintHeader(SystemSize, maxl, rtol, atol);
367   PrintOutput(ida_mem, cc, t0, webdata, comm);
368 
369   /* Loop over iout, call IDASolve (normal mode), print selected output. */
370 
371   for (iout = 1; iout <= NOUT; iout++) {
372 
373     retval = IDASolve(ida_mem, tout, &tret, cc, cp, IDA_NORMAL);
374     if (check_retval(&retval, "IDASolve", 1, thispe)) MPI_Abort(comm, 1);
375 
376     PrintOutput(ida_mem, cc, tret, webdata, comm);
377 
378     if (iout < 3) tout *= TMULT;
379     else          tout += TADD;
380 
381   }
382 
383   /* On PE 0, print final set of statistics. */
384   if (thispe == 0) PrintFinalStats(ida_mem);
385 
386   /* Free memory. */
387 
388   N_VDestroy(cc);
389   N_VDestroy(cp);
390   N_VDestroy(id);
391 
392   IDAFree(&ida_mem);
393   SUNLinSolFree(LS);
394 
395   FreeUserData(webdata);
396 
397   MPI_Finalize();
398 
399   return(0);
400 
401 }
402 
403 /*
404  *--------------------------------------------------------------------
405  * PRIVATE FUNCTIONS
406  *--------------------------------------------------------------------
407  */
408 
409 /*
410  * AllocUserData: Allocate memory for data structure of type UserData.
411  */
412 
AllocUserData(MPI_Comm comm,sunindextype local_N,sunindextype SystemSize)413 static UserData AllocUserData(MPI_Comm comm, sunindextype local_N, sunindextype SystemSize)
414 {
415   int ix, jy;
416   UserData webdata;
417 
418   webdata = (UserData) malloc(sizeof *webdata);
419 
420   webdata->rates = N_VNew_Parallel(comm, local_N, SystemSize);
421 
422   for (ix = 0; ix < MXSUB; ix++) {
423     for (jy = 0; jy < MYSUB; jy++) {
424       (webdata->PP)[ix][jy] = newDenseMat(NUM_SPECIES, NUM_SPECIES);
425       (webdata->pivot)[ix][jy] = newIndexArray(NUM_SPECIES);
426     }
427   }
428 
429   webdata->acoef = newDenseMat(NUM_SPECIES, NUM_SPECIES);
430   webdata->ewt = N_VNew_Parallel(comm, local_N, SystemSize);
431   return(webdata);
432 
433 }
434 
435 /*
436  * InitUserData: Load problem constants in webdata (of type UserData).
437  */
438 
InitUserData(UserData webdata,int thispe,int npes,MPI_Comm comm)439 static void InitUserData(UserData webdata, int thispe, int npes,
440                          MPI_Comm comm)
441 {
442   int i, j, np;
443   realtype *a1,*a2, *a3, *a4, dx2, dy2, **acoef, *bcoef, *cox, *coy;
444 
445   webdata->jysub = thispe / NPEX;
446   webdata->ixsub = thispe - (webdata->jysub)*NPEX;
447   webdata->mxsub = MXSUB;
448   webdata->mysub = MYSUB;
449   webdata->npex = NPEX;
450   webdata->npey = NPEY;
451   webdata->ns = NUM_SPECIES;
452   webdata->np = NPREY;
453   webdata->dx = AX/(MX-1);
454   webdata->dy = AY/(MY-1);
455   webdata->thispe = thispe;
456   webdata->npes   = npes;
457   webdata->nsmxsub = MXSUB * NUM_SPECIES;
458   webdata->nsmxsub2 = (MXSUB+2)*NUM_SPECIES;
459   webdata->comm = comm;
460 
461   /* Set up the coefficients a and b plus others found in the equations. */
462   np = webdata->np;
463   dx2 = (webdata->dx)*(webdata->dx); dy2 = (webdata->dy)*(webdata->dy);
464 
465   acoef = webdata->acoef;
466   bcoef = webdata->bcoef;
467   cox = webdata->cox;
468   coy = webdata->coy;
469 
470   for (i = 0; i < np; i++) {
471     a1 = &(acoef[i][np]);
472     a2 = &(acoef[i+np][0]);
473     a3 = &(acoef[i][0]);
474     a4 = &(acoef[i+np][np]);
475 
476     /*  Fill in the portion of acoef in the four quadrants, row by row. */
477     for (j = 0; j < np; j++) {
478       *a1++ =  -GG;
479       *a2++ =   EE;
480       *a3++ = ZERO;
481       *a4++ = ZERO;
482     }
483 
484     /* Reset the diagonal elements of acoef to -AA. */
485     acoef[i][i] = -AA; acoef[i+np][i+np] = -AA;
486 
487     /* Set coefficients for b and diffusion terms. */
488     bcoef[i] = BB; bcoef[i+np] = -BB;
489     cox[i] = DPREY/dx2; cox[i+np] = DPRED/dx2;
490     coy[i] = DPREY/dy2; coy[i+np] = DPRED/dy2;
491   }
492 
493 }
494 
495 /*
496  * FreeUserData: Free webdata memory.
497  */
498 
FreeUserData(UserData webdata)499 static void FreeUserData(UserData webdata)
500 {
501   int ix, jy;
502 
503   for (ix = 0; ix < MXSUB; ix++) {
504     for (jy = 0; jy < MYSUB; jy++) {
505       destroyMat((webdata->PP)[ix][jy]);
506       destroyArray((webdata->pivot)[ix][jy]);
507     }
508   }
509 
510   destroyMat(webdata->acoef);
511   N_VDestroy(webdata->rates);
512   N_VDestroy(webdata->ewt);
513   free(webdata);
514 
515 }
516 
517 /*
518  * SetInitialProfiles: Set initial conditions in cc, cp, and id.
519  * A polynomial profile is used for the prey cc values, and a constant
520  * (1.0e5) is loaded as the initial guess for the predator cc values.
521  * The id values are set to 1 for the prey and 0 for the predators.
522  * The prey cp values are set according to the given system, and
523  * the predator cp values are set to zero.
524  */
525 
SetInitialProfiles(N_Vector cc,N_Vector cp,N_Vector id,N_Vector res,UserData webdata)526 static void SetInitialProfiles(N_Vector cc, N_Vector cp, N_Vector id,
527                                N_Vector res, UserData webdata)
528 {
529   int ixsub, jysub, mxsub, mysub, np, ix, jy, is;
530   realtype *cxy, *idxy, *cpxy, dx, dy, xx, yy, xyfactor;
531 
532   ixsub = webdata->ixsub;
533   jysub = webdata->jysub;
534   mxsub = webdata->mxsub;
535   mysub = webdata->mxsub;
536   dx = webdata->dx;
537   dy = webdata->dy;
538   np = webdata->np;
539 
540   /* Loop over grid, load cc values and id values. */
541   for (jy = 0; jy < mysub; jy++) {
542     yy = (jy + jysub*mysub) * dy;
543     for (ix = 0; ix < mxsub; ix++) {
544       xx = (ix + ixsub*mxsub) * dx;
545       xyfactor = RCONST(16.0)*xx*(ONE - xx)*yy*(ONE - yy);
546       xyfactor *= xyfactor;
547 
548       cxy = IJ_Vptr(cc,ix,jy);
549       idxy = IJ_Vptr(id,ix,jy);
550       for (is = 0; is < NUM_SPECIES; is++) {
551 	if (is < np) { cxy[is] = RCONST(10.0) + (realtype)(is+1)*xyfactor; idxy[is] = ONE; }
552         else { cxy[is] = 1.0e5; idxy[is] = ZERO; }
553       }
554     }
555   }
556 
557   /* Set c' for the prey by calling the residual function with cp = 0. */
558   N_VConst(ZERO, cp);
559   resweb(ZERO, cc, cp, res, webdata);
560   N_VScale(-ONE, res, cp);
561 
562   /* Set c' for predators to 0. */
563   for (jy = 0; jy < mysub; jy++) {
564     for (ix = 0; ix < mxsub; ix++) {
565       cpxy = IJ_Vptr(cp,ix,jy);
566       for (is = np; is < NUM_SPECIES; is++) cpxy[is] = ZERO;
567     }
568   }
569 }
570 
571 /*
572  * Print first lines of output (problem description)
573  */
574 
PrintHeader(sunindextype SystemSize,int maxl,realtype rtol,realtype atol)575 static void PrintHeader(sunindextype SystemSize, int maxl,
576                         realtype rtol, realtype atol)
577 {
578   printf("\nidaFoodWeb_kry_p: Predator-prey DAE parallel example problem for IDA \n\n");
579   printf("Number of species ns: %d", NUM_SPECIES);
580   printf("     Mesh dimensions: %d x %d", MX, MY);
581   printf("     Total system size: %ld\n", (long int) SystemSize);
582   printf("Subgrid dimensions: %d x %d", MXSUB, MYSUB);
583   printf("     Processor array: %d x %d\n", NPEX, NPEY);
584 #if defined(SUNDIALS_EXTENDED_PRECISION)
585   printf("Tolerance parameters:  rtol = %Lg   atol = %Lg\n", rtol, atol);
586 #elif defined(SUNDIALS_DOUBLE_PRECISION)
587   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
588 #else
589   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
590 #endif
591   printf("Linear solver: SUNLinSol_SPGMR     Max. Krylov dimension maxl: %d\n", maxl);
592   printf("Preconditioner: block diagonal, block size ns,");
593   printf(" via difference quotients\n");
594   printf("CalcIC called to correct initial predator concentrations \n\n");
595 
596   printf("-----------------------------------------------------------\n");
597   printf("  t        bottom-left  top-right");
598   printf("    | nst  k      h\n");
599   printf("-----------------------------------------------------------\n\n");
600 }
601 
602 /*
603  * PrintOutput: Print output values at output time t = tt.
604  * Selected run statistics are printed.  Then values of c1 and c2
605  * are printed for the bottom left and top right grid points only.
606  * (NOTE: This routine is specific to the case NUM_SPECIES = 2.)
607  */
608 
PrintOutput(void * ida_mem,N_Vector cc,realtype tt,UserData webdata,MPI_Comm comm)609 static void PrintOutput(void *ida_mem, N_Vector cc, realtype tt,
610                         UserData webdata, MPI_Comm comm)
611 {
612   MPI_Status status;
613   realtype *cdata, clast[2], hused;
614   long int nst;
615   int i, kused, retval, thispe, npelast, ilast;;
616 
617   thispe = webdata->thispe;
618   npelast = webdata->npes - 1;
619   cdata = N_VGetArrayPointer(cc);
620 
621   /* Send conc. at top right mesh point from PE npes-1 to PE 0. */
622   if (thispe == npelast) {
623     ilast = NUM_SPECIES*MXSUB*MYSUB - 2;
624     if (npelast != 0)
625       MPI_Send(&cdata[ilast], 2, MPI_SUNREALTYPE, 0, 0, comm);
626     else { clast[0] = cdata[ilast]; clast[1] = cdata[ilast+1]; }
627   }
628 
629   /* On PE 0, receive conc. at top right from PE npes - 1.
630      Then print performance data and sampled solution values. */
631 
632   if (thispe == 0) {
633 
634     if (npelast != 0)
635       MPI_Recv(&clast[0], 2, MPI_SUNREALTYPE, npelast, 0, comm, &status);
636 
637     retval = IDAGetLastOrder(ida_mem, &kused);
638     check_retval(&retval, "IDAGetLastOrder", 1, thispe);
639     retval = IDAGetNumSteps(ida_mem, &nst);
640     check_retval(&retval, "IDAGetNumSteps", 1, thispe);
641     retval = IDAGetLastStep(ida_mem, &hused);
642     check_retval(&retval, "IDAGetLastStep", 1, thispe);
643 
644 #if defined(SUNDIALS_EXTENDED_PRECISION)
645     printf("%8.2Le %12.4Le %12.4Le   | %3ld  %1d %12.4Le\n",
646          tt, cdata[0], clast[0], nst, kused, hused);
647     for (i=1;i<NUM_SPECIES;i++)
648       printf("         %12.4Le %12.4Le   |\n",cdata[i],clast[i]);
649 #elif defined(SUNDIALS_DOUBLE_PRECISION)
650     printf("%8.2e %12.4e %12.4e   | %3ld  %1d %12.4e\n",
651          tt, cdata[0], clast[0], nst, kused, hused);
652     for (i=1;i<NUM_SPECIES;i++)
653       printf("         %12.4e %12.4e   |\n",cdata[i],clast[i]);
654 #else
655     printf("%8.2e %12.4e %12.4e   | %3ld  %1d %12.4e\n",
656          tt, cdata[0], clast[0], nst, kused, hused);
657     for (i=1;i<NUM_SPECIES;i++)
658       printf("         %12.4e %12.4e   |\n",cdata[i],clast[i]);
659 #endif
660     printf("\n");
661 
662   }
663 }
664 
665 /*
666  * PrintFinalStats: Print final run data contained in iopt.
667  */
668 
PrintFinalStats(void * ida_mem)669 static void PrintFinalStats(void *ida_mem)
670 {
671   long int nst, nre, nreLS, netf, ncfn, nni, ncfl, nli, npe, nps;
672   int retval;
673 
674   retval = IDAGetNumSteps(ida_mem, &nst);
675   check_retval(&retval, "IDAGetNumSteps", 1, 0);
676   retval = IDAGetNumResEvals(ida_mem, &nre);
677   check_retval(&retval, "IDAGetNumResEvals", 1, 0);
678   retval = IDAGetNumErrTestFails(ida_mem, &netf);
679   check_retval(&retval, "IDAGetNumErrTestFails", 1, 0);
680   retval = IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
681   check_retval(&retval, "IDAGetNumNonlinSolvConvFails", 1, 0);
682   retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
683   check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, 0);
684 
685   retval = IDAGetNumLinConvFails(ida_mem, &ncfl);
686   check_retval(&retval, "IDAGetNumLinConvFails", 1, 0);
687   retval = IDAGetNumLinIters(ida_mem, &nli);
688   check_retval(&retval, "IDAGetNumLinIters", 1, 0);
689   retval = IDAGetNumPrecEvals(ida_mem, &npe);
690   check_retval(&retval, "IDAGetNumPrecEvals", 1, 0);
691   retval = IDAGetNumPrecSolves(ida_mem, &nps);
692   check_retval(&retval, "IDAGetNumPrecSolves", 1, 0);
693   retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
694   check_retval(&retval, "IDAGetNumLinResEvals", 1, 0);
695 
696   printf("-----------------------------------------------------------\n");
697   printf("\nFinal statistics: \n\n");
698 
699   printf("Number of steps                    = %ld\n", nst);
700   printf("Number of residual evaluations     = %ld\n", nre+nreLS);
701   printf("Number of nonlinear iterations     = %ld\n", nni);
702   printf("Number of error test failures      = %ld\n", netf);
703   printf("Number of nonlinear conv. failures = %ld\n\n", ncfn);
704 
705   printf("Number of linear iterations        = %ld\n", nli);
706   printf("Number of linear conv. failures    = %ld\n\n", ncfl);
707 
708   printf("Number of preconditioner setups    = %ld\n", npe);
709   printf("Number of preconditioner solves    = %ld\n", nps);
710 
711 }
712 
713 /*
714  * Check function return value...
715  *   opt == 0 means SUNDIALS function allocates memory so check if
716  *            returned NULL pointer
717  *   opt == 1 means SUNDIALS function returns an integer value so check if
718  *            retval < 0
719  *   opt == 2 means function allocates memory so check if returned
720  *            NULL pointer
721  */
722 
check_retval(void * returnvalue,const char * funcname,int opt,int id)723 static int check_retval(void *returnvalue, const char *funcname, int opt, int id)
724 {
725   int *retval;
726 
727   if (opt == 0 && returnvalue == NULL) {
728     /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
729     fprintf(stderr,
730             "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
731             id, funcname);
732     return(1);
733   } else if (opt == 1) {
734     /* Check if retval < 0 */
735     retval = (int *) returnvalue;
736     if (*retval < 0) {
737       fprintf(stderr,
738               "\nSUNDIALS_ERROR(%d): %s() failed with retval = %d\n\n",
739               id, funcname, *retval);
740       return(1);
741     }
742   } else if (opt == 2 && returnvalue == NULL) {
743     /* Check if function returned NULL pointer - no memory allocated */
744     fprintf(stderr,
745             "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
746             id, funcname);
747     return(1);
748   }
749 
750   return(0);
751 }
752 
753 /*
754  *--------------------------------------------------------------------
755  * FUNCTIONS CALLED BY IDA & SUPPORTING FUNCTIONS
756  *--------------------------------------------------------------------
757  */
758 
759 /*
760  * resweb: System residual function for predator-prey system.
761  * To compute the residual function F, this routine calls:
762  *    rescomm, for needed communication, and then
763  *    reslocal, for computation of the residuals on this processor.
764  */
765 
resweb(realtype tt,N_Vector cc,N_Vector cp,N_Vector res,void * user_data)766 static int resweb(realtype tt, N_Vector cc, N_Vector cp,
767                   N_Vector res,  void *user_data)
768 {
769   int retval;
770   UserData webdata;
771 
772   webdata = (UserData)user_data;
773 
774   /* Call rescomm to do inter-processor communication. */
775   retval = rescomm(cc, cp, webdata);
776 
777   /* Call reslocal to calculate the local portion of residual vector. */
778   retval = reslocal(tt, cc, cp, res, webdata);
779 
780   return(retval);
781 
782 }
783 
784 /*
785  * rescomm: Communication routine in support of resweb.
786  * This routine performs all inter-processor communication of components
787  * of the cc vector needed to calculate F, namely the components at all
788  * interior subgrid boundaries (ghost cell data).  It loads this data
789  * into a work array cext (the local portion of c, extended).
790  * The message-passing uses blocking sends, non-blocking receives,
791  * and receive-waiting, in routines BRecvPost, BSend, BRecvWait.
792  */
793 
rescomm(N_Vector cc,N_Vector cp,void * user_data)794 static int rescomm(N_Vector cc, N_Vector cp, void *user_data)
795 {
796 
797   UserData webdata;
798   realtype *cdata, *cext, buffer[2*NUM_SPECIES*MYSUB];
799   int thispe, ixsub, jysub, nsmxsub, nsmysub;
800   MPI_Comm comm;
801   MPI_Request request[4];
802 
803   webdata = (UserData) user_data;
804   cdata = N_VGetArrayPointer(cc);
805 
806   /* Get comm, thispe, subgrid indices, data sizes, extended array cext. */
807   comm    = webdata->comm;
808   thispe  = webdata->thispe;
809   ixsub   = webdata->ixsub;
810   jysub   = webdata->jysub;
811   cext    = webdata->cext;
812   nsmxsub = webdata->nsmxsub;
813   nsmysub = ((int)(webdata->ns))*(webdata->mysub);
814 
815   /* Start receiving boundary data from neighboring PEs. */
816   BRecvPost(comm, request, thispe, ixsub, jysub, nsmxsub, nsmysub,
817             cext, buffer);
818 
819   /* Send data from boundary of local grid to neighboring PEs. */
820   BSend(comm, thispe, ixsub, jysub, nsmxsub, nsmysub, cdata);
821 
822   /* Finish receiving boundary data from neighboring PEs. */
823   BRecvWait(request, ixsub, jysub, nsmxsub, cext, buffer);
824 
825   return(0);
826 
827 }
828 
829 /*
830  * BSend: Send boundary data to neighboring PEs.
831  * This routine sends components of cc from internal subgrid boundaries
832  * to the appropriate neighbor PEs.
833  */
834 
BSend(MPI_Comm comm,int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cdata[])835 static void BSend(MPI_Comm comm, int my_pe, int ixsub, int jysub,
836                   int dsizex, int dsizey, realtype cdata[])
837 {
838   int i;
839   int ly, offsetc, offsetbuf;
840   realtype bufleft[NUM_SPECIES*MYSUB], bufright[NUM_SPECIES*MYSUB];
841 
842   /* If jysub > 0, send data from bottom x-line of cc. */
843   if (jysub != 0)
844     MPI_Send(&cdata[0], dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
845 
846   /* If jysub < NPEY-1, send data from top x-line of cc. */
847   if (jysub != NPEY-1) {
848     offsetc = (MYSUB-1)*dsizex;
849     MPI_Send(&cdata[offsetc], dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
850   }
851 
852   /* If ixsub > 0, send data from left y-line of cc (via bufleft). */
853   if (ixsub != 0) {
854     for (ly = 0; ly < MYSUB; ly++) {
855       offsetbuf = ly*NUM_SPECIES;
856       offsetc = ly*dsizex;
857       for (i = 0; i < NUM_SPECIES; i++)
858         bufleft[offsetbuf+i] = cdata[offsetc+i];
859     }
860     MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
861   }
862 
863   /* If ixsub < NPEX-1, send data from right y-line of cc (via bufright). */
864   if (ixsub != NPEX-1) {
865     for (ly = 0; ly < MYSUB; ly++) {
866       offsetbuf = ly*NUM_SPECIES;
867       offsetc = offsetbuf*MXSUB + (MXSUB-1)*NUM_SPECIES;
868       for (i = 0; i < NUM_SPECIES; i++)
869         bufright[offsetbuf+i] = cdata[offsetc+i];
870     }
871     MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
872   }
873 
874 }
875 
876 /*
877  * BRecvPost: Start receiving boundary data from neighboring PEs.
878  * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
879  *     should be passed to both the BRecvPost and BRecvWait functions, and
880  *     should not be manipulated between the two calls.
881  * (2) request should have 4 entries, and is also passed in both calls.
882  */
883 
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int ixsub,int jysub,int dsizex,int dsizey,realtype cext[],realtype buffer[])884 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
885                       int ixsub, int jysub,
886                       int dsizex, int dsizey,
887                       realtype cext[], realtype buffer[])
888 {
889   int offsetce;
890   /* Have bufleft and bufright use the same buffer. */
891   realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
892 
893   /* If jysub > 0, receive data for bottom x-line of cext. */
894   if (jysub != 0)
895     MPI_Irecv(&cext[NUM_SPECIES], dsizex, MPI_SUNREALTYPE,
896               my_pe-NPEX, 0, comm, &request[0]);
897 
898   /* If jysub < NPEY-1, receive data for top x-line of cext. */
899   if (jysub != NPEY-1) {
900     offsetce = NUM_SPECIES*(1 + (MYSUB+1)*(MXSUB+2));
901     MPI_Irecv(&cext[offsetce], dsizex, MPI_SUNREALTYPE,
902               my_pe+NPEX, 0, comm, &request[1]);
903   }
904 
905   /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
906   if (ixsub != 0) {
907     MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
908               my_pe-1, 0, comm, &request[2]);
909   }
910 
911   /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
912   if (ixsub != NPEX-1) {
913     MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
914               my_pe+1, 0, comm, &request[3]);
915   }
916 
917 }
918 
919 /*
920  * BRecvWait: Finish receiving boundary data from neighboring PEs.
921  * (1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
922  *     should be passed to both the BRecvPost and BRecvWait functions, and
923  *     should not be manipulated between the two calls.
924  * (2) request should have 4 entries, and is also passed in both calls.
925  */
926 
BRecvWait(MPI_Request request[],int ixsub,int jysub,int dsizex,realtype cext[],realtype buffer[])927 static void BRecvWait(MPI_Request request[], int ixsub, int jysub,
928                       int dsizex, realtype cext[], realtype buffer[])
929 {
930   int i;
931   int ly, dsizex2, offsetce, offsetbuf;
932   realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
933   MPI_Status status;
934 
935   dsizex2 = dsizex + 2*NUM_SPECIES;
936 
937   /* If jysub > 0, receive data for bottom x-line of cext. */
938   if (jysub != 0)
939     MPI_Wait(&request[0],&status);
940 
941   /* If jysub < NPEY-1, receive data for top x-line of cext. */
942   if (jysub != NPEY-1)
943     MPI_Wait(&request[1],&status);
944 
945   /* If ixsub > 0, receive data for left y-line of cext (via bufleft). */
946   if (ixsub != 0) {
947     MPI_Wait(&request[2],&status);
948 
949     /* Copy the buffer to cext */
950     for (ly = 0; ly < MYSUB; ly++) {
951       offsetbuf = ly*NUM_SPECIES;
952       offsetce = (ly+1)*dsizex2;
953       for (i = 0; i < NUM_SPECIES; i++)
954         cext[offsetce+i] = bufleft[offsetbuf+i];
955     }
956   }
957 
958   /* If ixsub < NPEX-1, receive data for right y-line of cext (via bufright). */
959   if (ixsub != NPEX-1) {
960     MPI_Wait(&request[3],&status);
961 
962     /* Copy the buffer to cext */
963     for (ly = 0; ly < MYSUB; ly++) {
964       offsetbuf = ly*NUM_SPECIES;
965       offsetce = (ly+2)*dsizex2 - NUM_SPECIES;
966       for (i = 0; i < NUM_SPECIES; i++)
967         cext[offsetce+i] = bufright[offsetbuf+i];
968     }
969   }
970 }
971 
972 /* Define lines are for ease of readability in the following functions. */
973 
974 #define mxsub      (webdata->mxsub)
975 #define mysub      (webdata->mysub)
976 #define npex       (webdata->npex)
977 #define npey       (webdata->npey)
978 #define ixsub      (webdata->ixsub)
979 #define jysub      (webdata->jysub)
980 #define nsmxsub    (webdata->nsmxsub)
981 #define nsmxsub2   (webdata->nsmxsub2)
982 #define np         (webdata->np)
983 #define dx         (webdata->dx)
984 #define dy         (webdata->dy)
985 #define cox        (webdata->cox)
986 #define coy        (webdata->coy)
987 #define rhs        (webdata->rhs)
988 #define cext       (webdata->cext)
989 #define rates      (webdata->rates)
990 #define ns         (webdata->ns)
991 #define acoef      (webdata->acoef)
992 #define bcoef      (webdata->bcoef)
993 
994 /*
995  * reslocal: Compute res = F(t,cc,cp).
996  * This routine assumes that all inter-processor communication of data
997  * needed to calculate F has already been done.  Components at interior
998  * subgrid boundaries are assumed to be in the work array cext.
999  * The local portion of the cc vector is first copied into cext.
1000  * The exterior Neumann boundary conditions are explicitly handled here
1001  * by copying data from the first interior mesh line to the ghost cell
1002  * locations in cext.  Then the reaction and diffusion terms are
1003  * evaluated in terms of the cext array, and the residuals are formed.
1004  * The reaction terms are saved separately in the vector webdata->rates
1005  * for use by the preconditioner setup routine.
1006  */
1007 
reslocal(realtype tt,N_Vector cc,N_Vector cp,N_Vector res,void * user_data)1008 static int reslocal(realtype tt, N_Vector cc, N_Vector cp, N_Vector res,
1009                     void *user_data)
1010 {
1011   realtype *cdata, *ratesxy, *cpxy, *resxy,
1012     xx, yy, dcyli, dcyui, dcxli, dcxui;
1013   int ix, jy, is, i, locc, ylocce, locce;
1014   UserData webdata;
1015 
1016   webdata = (UserData) user_data;
1017 
1018   /* Get data pointers, subgrid data, array sizes, work array cext. */
1019   cdata = N_VGetArrayPointer(cc);
1020 
1021   /* Copy local segment of cc vector into the working extended array cext. */
1022   locc = 0;
1023   locce = nsmxsub2 + NUM_SPECIES;
1024   for (jy = 0; jy < mysub; jy++) {
1025     for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
1026     locc = locc + nsmxsub;
1027     locce = locce + nsmxsub2;
1028   }
1029 
1030   /* To facilitate homogeneous Neumann boundary conditions, when this is
1031   a boundary PE, copy data from the first interior mesh line of cc to cext. */
1032 
1033   /* If jysub = 0, copy x-line 2 of cc to cext. */
1034   if (jysub == 0)
1035     { for (i = 0; i < nsmxsub; i++) cext[NUM_SPECIES+i] = cdata[nsmxsub+i]; }
1036 
1037   /* If jysub = npey-1, copy x-line mysub-1 of cc to cext. */
1038   if (jysub == npey-1) {
1039     locc = (mysub-2)*nsmxsub;
1040     locce = (mysub+1)*nsmxsub2 + NUM_SPECIES;
1041     for (i = 0; i < nsmxsub; i++) cext[locce+i] = cdata[locc+i];
1042   }
1043 
1044   /* If ixsub = 0, copy y-line 2 of cc to cext. */
1045   if (ixsub == 0) {
1046     for (jy = 0; jy < mysub; jy++) {
1047       locc = jy*nsmxsub + NUM_SPECIES;
1048       locce = (jy+1)*nsmxsub2;
1049       for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1050     }
1051   }
1052 
1053   /* If ixsub = npex-1, copy y-line mxsub-1 of cc to cext. */
1054   if (ixsub == npex-1) {
1055     for (jy = 0; jy < mysub; jy++) {
1056       locc  = (jy+1)*nsmxsub - 2*NUM_SPECIES;
1057       locce = (jy+2)*nsmxsub2 - NUM_SPECIES;
1058       for (i = 0; i < NUM_SPECIES; i++) cext[locce+i] = cdata[locc+i];
1059     }
1060   }
1061 
1062   /* Loop over all grid points, setting local array rates to right-hand sides.
1063      Then set res values appropriately for prey/predator components of F. */
1064   for (jy = 0; jy < mysub; jy++) {
1065     ylocce = (jy+1)*nsmxsub2;
1066     yy     = (jy+jysub*mysub)*dy;
1067 
1068     for (ix = 0; ix < mxsub; ix++) {
1069       locce = ylocce + (ix+1)*NUM_SPECIES;
1070       xx = (ix + ixsub*mxsub)*dx;
1071 
1072       ratesxy = IJ_Vptr(rates,ix,jy);
1073       WebRates(xx, yy, &(cext[locce]), ratesxy, webdata);
1074 
1075       resxy = IJ_Vptr(res,ix,jy);
1076       cpxy = IJ_Vptr(cp,ix,jy);
1077 
1078       for (is = 0; is < NUM_SPECIES; is++) {
1079         dcyli = cext[locce+is]          - cext[locce+is-nsmxsub2];
1080         dcyui = cext[locce+is+nsmxsub2] - cext[locce+is];
1081 
1082         dcxli = cext[locce+is]             - cext[locce+is-NUM_SPECIES];
1083         dcxui = cext[locce+is+NUM_SPECIES] - cext[locce+is];
1084 
1085         rhs[is] = cox[is]*(dcxui-dcxli) + coy[is]*(dcyui-dcyli) + ratesxy[is];
1086 
1087         if (is < np) resxy[is] = cpxy[is] - rhs[is];
1088         else         resxy[is] =          - rhs[is];
1089 
1090       } /* End of is (species) loop. */
1091     } /* End of ix loop. */
1092   } /* End of jy loop. */
1093 
1094   return(0);
1095 
1096 }
1097 
1098 /*
1099  * WebRates: Evaluate reaction rates at a given spatial point.
1100  * At a given (x,y), evaluate the array of ns reaction terms R.
1101  */
1102 
WebRates(realtype xx,realtype yy,realtype * cxy,realtype * ratesxy,UserData webdata)1103 static void WebRates(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
1104                      UserData webdata)
1105 {
1106   int is;
1107   realtype fac;
1108 
1109   for (is = 0; is < NUM_SPECIES; is++)
1110     ratesxy[is] = dotprod(NUM_SPECIES, cxy, acoef[is]);
1111 
1112   fac = ONE + ALPHA*xx*yy + BETA*sin(FOURPI*xx)*sin(FOURPI*yy);
1113 
1114   for (is = 0; is < NUM_SPECIES; is++)
1115     ratesxy[is] = cxy[is]*( bcoef[is]*fac + ratesxy[is] );
1116 
1117 }
1118 
1119 /*
1120  * dotprod: dot product routine for realtype arrays, for use by WebRates.
1121  */
1122 
dotprod(int size,realtype * x1,realtype * x2)1123 static realtype dotprod(int size, realtype *x1, realtype *x2)
1124 {
1125   int i;
1126   realtype *xx1, *xx2, temp = ZERO;
1127 
1128   xx1 = x1; xx2 = x2;
1129   for (i = 0; i < size; i++) temp += (*xx1++) * (*xx2++);
1130   return(temp);
1131 
1132 }
1133 
1134 /*
1135  * Preconbd: Preconditioner setup routine.
1136  * This routine generates and preprocesses the block-diagonal
1137  * preconditoner PP.  At each spatial point, a block of PP is computed
1138  * by way of difference quotients on the reaction rates R.
1139  * The base value of R are taken from webdata->rates, as set by webres.
1140  * Each block is LU-factored, for later solution of the linear systems.
1141  */
1142 
Precondbd(realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,realtype cj,void * user_data)1143 static int Precondbd(realtype tt, N_Vector cc, N_Vector cp,
1144                      N_Vector rr, realtype cj, void *user_data)
1145 {
1146   int retval, thispe;
1147   sunindextype ret;
1148   realtype uround;
1149   realtype xx, yy, *cxy, *ewtxy, cctemp, **Pxy, *ratesxy, *Pxycol, *cpxy;
1150   realtype inc, sqru, fac, perturb_rates[NUM_SPECIES];
1151   int is, js, ix, jy;
1152   UserData webdata;
1153   void *ida_mem;
1154   N_Vector ewt;
1155   realtype hh;
1156 
1157   webdata = (UserData)user_data;
1158   uround = UNIT_ROUNDOFF;
1159   sqru = sqrt(uround);
1160   thispe = webdata->thispe;
1161 
1162   ida_mem = webdata->ida_mem;
1163   ewt = webdata->ewt;
1164   retval = IDAGetErrWeights(ida_mem, ewt);
1165   check_retval(&retval, "IDAGetErrWeights", 1, thispe);
1166   retval = IDAGetCurrentStep(ida_mem, &hh);
1167   check_retval(&retval, "IDAGetCurrentStep", 1, thispe);
1168 
1169   for (jy = 0; jy < mysub; jy++) {
1170     yy = (jy + jysub*mysub)*dy;
1171 
1172     for (ix = 0; ix < mxsub; ix++) {
1173       xx = (ix+ ixsub*mxsub)*dx;
1174       Pxy = (webdata->PP)[ix][jy];
1175       cxy = IJ_Vptr(cc,ix,jy);
1176       cpxy = IJ_Vptr(cp,ix,jy);
1177       ewtxy= IJ_Vptr(ewt,ix,jy);
1178       ratesxy = IJ_Vptr(rates,ix,jy);
1179 
1180       for (js = 0; js < ns; js++) {
1181         inc = sqru*(MAX(fabs(cxy[js]), MAX(hh*fabs(cpxy[js]), ONE/ewtxy[js])));
1182         cctemp = cxy[js];  /* Save the (js,ix,jy) element of cc. */
1183         cxy[js] += inc;    /* Perturb the (js,ix,jy) element of cc. */
1184         fac = -ONE/inc;
1185 
1186         WebRates(xx, yy, cxy, perturb_rates, webdata);
1187 
1188         Pxycol = Pxy[js];
1189 
1190         for (is = 0; is < ns; is++)
1191           Pxycol[is] = (perturb_rates[is] - ratesxy[is])*fac;
1192 
1193         if (js < np) Pxycol[js] += cj; /* Add partial with respect to cp. */
1194 
1195         cxy[js] = cctemp; /* Restore (js,ix,jy) element of cc. */
1196 
1197       } /* End of js loop. */
1198 
1199       /* Do LU decomposition of matrix block for grid point (ix,jy). */
1200       ret = denseGETRF(Pxy, ns, ns, (webdata->pivot)[ix][jy]);
1201 
1202       if (ret != 0) return(1);
1203 
1204     } /* End of ix loop. */
1205   } /* End of jy loop. */
1206 
1207   return(0);
1208 
1209 }
1210 
1211 /*
1212  * PSolvebd: Preconditioner solve routine.
1213  * This routine applies the LU factorization of the blocks of the
1214  * preconditioner PP, to compute the solution of PP * zvec = rvec.
1215  */
1216 
PSolvebd(realtype tt,N_Vector cc,N_Vector cp,N_Vector rr,N_Vector rvec,N_Vector zvec,realtype cj,realtype delta,void * user_data)1217 static int PSolvebd(realtype tt, N_Vector cc, N_Vector cp,
1218                     N_Vector rr, N_Vector rvec, N_Vector zvec,
1219                     realtype cj, realtype delta, void *user_data)
1220 {
1221   realtype **Pxy, *zxy;
1222   sunindextype *pivot, ix, jy;
1223   UserData webdata;
1224 
1225   webdata = (UserData)user_data;
1226 
1227   N_VScale(ONE, rvec, zvec);
1228 
1229   /* Loop through subgrid and apply preconditioner factors at each point. */
1230   for (ix = 0; ix < mxsub; ix++) {
1231     for (jy = 0; jy < mysub; jy++) {
1232 
1233       /* For grid point (ix,jy), do backsolve on local vector.
1234          zxy is the address of the local portion of zvec, and
1235          Pxy is the address of the corresponding block of PP.  */
1236       zxy = IJ_Vptr(zvec,ix,jy);
1237       Pxy = (webdata->PP)[ix][jy];
1238       pivot = (webdata->pivot)[ix][jy];
1239       denseGETRS(Pxy, ns, pivot, zxy);
1240 
1241     } /* End of jy loop. */
1242   } /* End of ix loop. */
1243 
1244   return(0);
1245 
1246 }
1247