1 /* -----------------------------------------------------------------
2  * Programmer(s): Allan Taylor, Alan Hindmarsh and
3  *                Radu Serban @ LLNL
4  * -----------------------------------------------------------------
5  * SUNDIALS Copyright Start
6  * Copyright (c) 2002-2021, Lawrence Livermore National Security
7  * and Southern Methodist University.
8  * All rights reserved.
9  *
10  * See the top-level LICENSE and NOTICE files for details.
11  *
12  * SPDX-License-Identifier: BSD-3-Clause
13  * SUNDIALS Copyright End
14  * -----------------------------------------------------------------
15  * Example problem for KINSOL (parallel machine case) using the BBD
16  * preconditioner.
17  *
18  * This example solves a nonlinear system that arises from a system
19  * of partial differential equations. The PDE system is a food web
20  * population model, with predator-prey interaction and diffusion on
21  * the unit square in two dimensions. The dependent variable vector
22  * is the following:
23  *
24  *       1   2         ns
25  * c = (c , c ,  ..., c  )     (denoted by the variable cc)
26  *
27  * and the PDE's are as follows:
28  *
29  *                    i       i
30  *         0 = d(i)*(c     + c    )  +  f  (x,y,c)   (i=1,...,ns)
31  *                    xx      yy         i
32  *
33  *   where
34  *
35  *                   i             ns         j
36  *   f  (x,y,c)  =  c  * (b(i)  + sum a(i,j)*c )
37  *    i                           j=1
38  *
39  * The number of species is ns = 2 * np, with the first np being
40  * prey and the last np being predators. The number np is both the
41  * number of prey and predator species. The coefficients a(i,j),
42  * b(i), d(i) are:
43  *
44  *   a(i,i) = -AA   (all i)
45  *   a(i,j) = -GG   (i <= np , j >  np)
46  *   a(i,j) =  EE   (i >  np,  j <= np)
47  *   b(i) = BB * (1 + alpha * x * y)   (i <= np)
48  *   b(i) =-BB * (1 + alpha * x * y)   (i >  np)
49  *   d(i) = DPREY   (i <= np)
50  *   d(i) = DPRED   ( i > np)
51  *
52  * The various scalar parameters are set using define's or in
53  * routine InitUserData.
54  *
55  * The boundary conditions are: normal derivative = 0, and the
56  * initial guess is constant in x and y, but the final solution
57  * is not.
58  *
59  * The PDEs are discretized by central differencing on an MX by
60  * MY mesh.
61  *
62  * The nonlinear system is solved by KINSOL using the method
63  * specified in the local variable globalstrat.
64  *
65  * The preconditioner matrix is a band-block-diagonal matrix
66  * using the KINBBDPRE module. The half-bandwidths are as follows:
67  *
68  *   Difference quotient half-bandwidths mldq = mudq = 2*ns - 1
69  *   Retained banded blocks have half-bandwidths mlkeep = mukeep = ns.
70  *
71  * -----------------------------------------------------------------
72  * References:
73  *
74  * 1. Peter N. Brown and Youcef Saad,
75  *    Hybrid Krylov Methods for Nonlinear Systems of Equations
76  *    LLNL report UCRL-97645, November 1987.
77  *
78  * 2. Peter N. Brown and Alan C. Hindmarsh,
79  *    Reduced Storage Matrix Methods in Stiff ODE systems,
80  *    Lawrence Livermore National Laboratory Report  UCRL-95088,
81  *    Rev. 1, June 1987, and  Journal of Applied Mathematics and
82  *    Computation, Vol. 31 (May 1989), pp. 40-91. (Presents a
83  *    description of the time-dependent version of this test
84  *    problem.)
85  * --------------------------------------------------------------------------
86  *  Run command line: mpirun -np N -machinefile machines kinFoodWeb_kry_bbd_p
87  *  where N = NPEX * NPEY is the number of processors.
88  * --------------------------------------------------------------------------
89  */
90 
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <math.h>
94 
95 #include <kinsol/kinsol.h>             /* access to KINSOL func., consts.      */
96 #include <nvector/nvector_parallel.h>  /* access to MPI parallel N_Vector      */
97 #include <sunlinsol/sunlinsol_spgmr.h> /* access to SPGMR SUNLinearSolver      */
98 #include <kinsol/kinsol_bbdpre.h>      /* access to BBD preconditioner         */
99 #include <sundials/sundials_dense.h>   /* use generic dense solver in precond. */
100 #include <sundials/sundials_types.h>   /* defs. of realtype, sunindextype      */
101 #include <sundials/sundials_math.h>    /* access to SUNMAX, SUNRabs, SUNRsqrt  */
102 
103 #include <mpi.h>
104 
105 /* Problem Constants */
106 
107 #define NUM_SPECIES     6  /* must equal 2*(number of prey or predators)
108                               number of prey = number of predators       */
109 
110 #define PI       RCONST(3.1415926535898)   /* pi */
111 
112 #define NPEX        2            /* number of processors in the x-direction  */
113 #define NPEY        2            /* number of processors in the y-direction  */
114 #define MXSUB       10           /* number of x mesh points per subgrid      */
115 #define MYSUB       10           /* number of y mesh points per subgrid      */
116 #define MX          (NPEX*MXSUB) /* number of mesh points in the x-direction */
117 #define MY          (NPEY*MYSUB) /* number of mesh points in the y-direction */
118 #define NSMXSUB     (NUM_SPECIES * MXSUB)
119 #define NSMXSUB2    (NUM_SPECIES * (MXSUB+2))
120 #define NEQ         (NUM_SPECIES*MX*MY)  /* number of equations in the system */
121 #define AA          RCONST(1.0)    /* value of coefficient AA in above eqns */
122 #define EE          RCONST(10000.) /* value of coefficient EE in above eqns */
123 #define GG          RCONST(0.5e-6) /* value of coefficient GG in above eqns */
124 #define BB          RCONST(1.0)    /* value of coefficient BB in above eqns */
125 #define DPREY       RCONST(1.0)    /* value of coefficient dprey above */
126 #define DPRED       RCONST(0.5)    /* value of coefficient dpred above */
127 #define ALPHA       RCONST(1.0)    /* value of coefficient alpha above */
128 #define AX          RCONST(1.0)    /* total range of x variable */
129 #define AY          RCONST(1.0)    /* total range of y variable */
130 #define FTOL        RCONST(1.e-7)  /* ftol tolerance */
131 #define STOL        RCONST(1.e-13) /* stol tolerance */
132 #define THOUSAND    RCONST(1000.0) /* one thousand */
133 #define ZERO        RCONST(0.0)    /* 0. */
134 #define ONE         RCONST(1.0)    /* 1. */
135 #define PREYIN      RCONST(1.0)    /* initial guess for prey concentrations. */
136 #define PREDIN      RCONST(30000.0)/* initial guess for predator concs.      */
137 
138 /* User-defined vector access macro: IJ_Vptr */
139 
140 /* IJ_Vptr is defined in order to translate from the underlying 3D structure
141    of the dependent variable vector to the 1D storage scheme for an N-vector.
142    IJ_Vptr(vv,i,j) returns a pointer to the location in vv corresponding to
143    indices is = 0, jx = i, jy = j.    */
144 
145 #define IJ_Vptr(vv,i,j)   (&NV_Ith_P(vv, i*NUM_SPECIES + j*NSMXSUB))
146 
147 /* Type : UserData
148    contains problem constants and extended array */
149 
150 typedef struct {
151   realtype **acoef, *bcoef;
152   N_Vector rates;
153   realtype *cox, *coy;
154   realtype ax, ay, dx, dy;
155   sunindextype Nlocal;
156   int mx, my, ns, np;
157   realtype cext[NUM_SPECIES * (MXSUB+2)*(MYSUB+2)];
158   int my_pe, isubx, isuby, nsmxsub, nsmxsub2;
159   MPI_Comm comm;
160 } *UserData;
161 
162 /* Functions called by the KINSOL Solver */
163 
164 static int func(N_Vector cc, N_Vector fval, void *user_data);
165 
166 static int ccomm(sunindextype Nlocal, N_Vector cc, void *data);
167 
168 static int func_local(sunindextype Nlocal, N_Vector cc, N_Vector fval, void *user_data);
169 
170 /* Private Helper Functions */
171 
172 static UserData AllocUserData(void);
173 static void InitUserData(int my_pe, sunindextype Nlocal, MPI_Comm comm, UserData data);
174 static void FreeUserData(UserData data);
175 static void SetInitialProfiles(N_Vector cc, N_Vector sc);
176 static void PrintHeader(int globalstrategy, int maxl, int maxlrst,
177                         sunindextype mudq, sunindextype mldq,
178                         sunindextype mukeep, sunindextype mlkeep,
179                         realtype fnormtol, realtype scsteptol);
180 static void PrintOutput(int my_pe, MPI_Comm comm, N_Vector cc);
181 static void PrintFinalStats(void *kmem);
182 static void WebRate(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
183                     void *user_data);
184 static realtype DotProd(int size, realtype *x1, realtype *x2);
185 static void BSend(MPI_Comm comm, int my_pe, int isubx,
186                   int isuby, int dsizex, int dsizey,
187                   realtype *cdata);
188 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
189                       int isubx, int isuby,
190                       int dsizex, int dsizey,
191                       realtype *cext, realtype *buffer);
192 static void BRecvWait(MPI_Request request[], int isubx,
193                       int isuby, int dsizex, realtype *cext,
194                       realtype *buffer);
195 static int check_flag(void *flagvalue, const char *funcname, int opt, int id);
196 
197 /*
198  *--------------------------------------------------------------------
199  * MAIN PROGRAM
200  *--------------------------------------------------------------------
201  */
202 
main(int argc,char * argv[])203 int main(int argc, char *argv[])
204 {
205   int globalstrategy;
206   sunindextype Nlocal;
207   realtype fnormtol, scsteptol, dq_rel_uu;
208   N_Vector cc, sc, constraints;
209   UserData data;
210   int flag, maxl, maxlrst;
211   int my_pe, npes, npelast = NPEX*NPEY-1;
212   void *kmem;
213   SUNLinearSolver LS;
214   sunindextype mudq, mldq, mukeep, mlkeep;
215   MPI_Comm comm;
216 
217   cc = sc = constraints = NULL;
218   kmem = NULL;
219   LS = NULL;
220   data = NULL;
221 
222   /* Get processor number and total number of pe's */
223   MPI_Init(&argc, &argv);
224   comm = MPI_COMM_WORLD;
225   MPI_Comm_size(comm, &npes);
226   MPI_Comm_rank(comm, &my_pe);
227 
228   if (npes != NPEX*NPEY) {
229     if (my_pe == 0)
230       fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
231 	          npes,NPEX*NPEY);
232     MPI_Finalize();
233     return(1);
234   }
235 
236   /* Allocate memory, and set problem data, initial values, tolerances */
237 
238   /* Set local vector length */
239   Nlocal = NUM_SPECIES*MXSUB*MYSUB;
240 
241   /* Allocate and initialize user data block */
242   data = AllocUserData();
243   if (check_flag((void *)data, "AllocUserData", 2, my_pe)) MPI_Abort(comm, 1);
244   InitUserData(my_pe, Nlocal, comm, data);
245 
246   /* Set global strategy flag */
247   globalstrategy = KIN_NONE;
248 
249   /* Allocate and initialize vectors */
250   cc = N_VNew_Parallel(comm, Nlocal, NEQ);
251   if (check_flag((void *)cc, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
252   sc = N_VNew_Parallel(comm, Nlocal, NEQ);
253   if (check_flag((void *)sc, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
254   data->rates = N_VNew_Parallel(comm, Nlocal, NEQ);
255   if (check_flag((void *)data->rates, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
256   constraints = N_VNew_Parallel(comm, Nlocal, NEQ);
257   if (check_flag((void *)constraints, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
258   N_VConst(ZERO, constraints);
259 
260   SetInitialProfiles(cc, sc);
261 
262   fnormtol = FTOL; scsteptol = STOL;
263 
264   /* Call KINCreate/KINInit to initialize KINSOL:
265      nvSpec  points to machine environment data
266      A pointer to KINSOL problem memory is returned and stored in kmem. */
267   kmem = KINCreate();
268   if (check_flag((void *)kmem, "KINCreate", 0, my_pe)) MPI_Abort(comm, 1);
269 
270   /* Vector cc passed as template vector. */
271   flag = KINInit(kmem, func, cc);
272   if (check_flag(&flag, "KINInit", 1, my_pe)) MPI_Abort(comm, 1);
273   flag = KINSetUserData(kmem, data);
274   if (check_flag(&flag, "KINSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
275   flag = KINSetConstraints(kmem, constraints);
276   if (check_flag(&flag, "KINSetConstraints", 1, my_pe)) MPI_Abort(comm, 1);
277   flag = KINSetFuncNormTol(kmem, fnormtol);
278   if (check_flag(&flag, "KINSetFuncNormTol", 1, my_pe)) MPI_Abort(comm, 1);
279   flag = KINSetScaledStepTol(kmem, scsteptol);
280   if (check_flag(&flag, "KINSetScaledStepTol", 1, my_pe)) MPI_Abort(comm, 1);
281 
282   /* We no longer need the constraints vector since KINSetConstraints
283      creates a private copy for KINSOL to use. */
284   N_VDestroy(constraints);
285 
286   /* Create SUNLinSol_SPGMR object with right preconditioning and the
287      maximum Krylov dimension maxl */
288   maxl = 20;
289   LS = SUNLinSol_SPGMR(cc, PREC_RIGHT, maxl);
290   if(check_flag((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
291 
292   /* Attach the linear solver to KINSOL */
293   flag = KINSetLinearSolver(kmem, LS, NULL);
294   if (check_flag(&flag, "KINSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
295 
296   /* Set the maximum number of restarts */
297   maxlrst = 2;
298   flag = SUNLinSol_SPGMRSetMaxRestarts(LS, maxlrst);
299   if (check_flag(&flag, "SUNLinSol_SPGMRSetMaxRestarts", 1, my_pe)) MPI_Abort(comm, 1);
300 
301   /* Call KINBBDPrecInit to initialize and allocate memory for the
302      band-block-diagonal preconditioner, and specify the local and
303      communication functions func_local and gcomm=NULL (all communication
304      needed for the func_local is already done in func). */
305   dq_rel_uu = ZERO;
306   mudq = mldq = 2*NUM_SPECIES - 1;
307   mukeep = mlkeep = NUM_SPECIES;
308 
309   /* Initialize BBD preconditioner */
310   flag = KINBBDPrecInit(kmem, Nlocal, mudq, mldq, mukeep, mlkeep,
311                         dq_rel_uu, func_local, NULL);
312   if (check_flag(&flag, "KINBBDPrecInit", 1, my_pe)) MPI_Abort(comm, 1);
313 
314   /* Print out the problem size, solution parameters, initial guess. */
315   if (my_pe == 0)
316     PrintHeader(globalstrategy, maxl, maxlrst, mudq, mldq, mukeep,
317 		mlkeep, fnormtol, scsteptol);
318 
319   /* Call KINSol and print output concentration profile */
320   flag = KINSol(kmem,           /* KINSol memory block */
321                 cc,             /* initial guess on input; solution vector */
322                 globalstrategy, /* global strategy choice */
323                 sc,             /* scaling vector for the variable cc */
324                 sc);            /* scaling vector for function values fval */
325   if (check_flag(&flag, "KINSol", 1, my_pe)) MPI_Abort(comm, 1);
326 
327   if (my_pe == 0) printf("\n\nComputed equilibrium species concentrations:\n");
328   if (my_pe == 0 || my_pe == npelast) PrintOutput(my_pe, comm, cc);
329 
330   /* Print final statistics and free memory */
331   if (my_pe == 0) PrintFinalStats(kmem);
332 
333   N_VDestroy(cc);
334   N_VDestroy(sc);
335   KINFree(&kmem);
336   SUNLinSolFree(LS);
337   FreeUserData(data);
338 
339   MPI_Finalize();
340 
341   return(0);
342 }
343 
344 /* Readability definitions used in other routines below */
345 
346 #define acoef  (data->acoef)
347 #define bcoef  (data->bcoef)
348 #define cox    (data->cox)
349 #define coy    (data->coy)
350 
351 /*
352  *--------------------------------------------------------------------
353  * FUNCTIONS CALLED BY KINSOL
354  *--------------------------------------------------------------------
355  */
356 
357 /*
358  * ccomm routine.  This routine performs all communication
359  * between processors of data needed to calculate f.
360  */
361 
ccomm(sunindextype Nlocal,N_Vector cc,void * userdata)362 static int ccomm(sunindextype Nlocal, N_Vector cc, void *userdata)
363 {
364 
365   realtype *cdata, *cext, buffer[2*NUM_SPECIES*MYSUB];
366   UserData data;
367   MPI_Comm comm;
368   int my_pe, isubx, isuby, nsmxsub, nsmysub;
369   MPI_Request request[4];
370 
371   /* Get comm, my_pe, subgrid indices, data sizes, extended array cext */
372   data = (UserData) userdata;
373   comm = data->comm;  my_pe = data->my_pe;
374   isubx = data->isubx;   isuby = data->isuby;
375   nsmxsub = data->nsmxsub;
376   nsmysub = NUM_SPECIES*MYSUB;
377   cext = data->cext;
378 
379   cdata = N_VGetArrayPointer(cc);
380 
381   /* Start receiving boundary data from neighboring PEs */
382   BRecvPost(comm, request, my_pe, isubx, isuby, nsmxsub, nsmysub, cext, buffer);
383 
384   /* Send data from boundary of local grid to neighboring PEs */
385   BSend(comm, my_pe, isubx, isuby, nsmxsub, nsmysub, cdata);
386 
387   /* Finish receiving boundary data from neighboring PEs */
388   BRecvWait(request, isubx, isuby, nsmxsub, cext, buffer);
389 
390   return(0);
391 }
392 
393 /*
394  * System function for predator-prey system - calculation part
395  */
396 
func_local(sunindextype Nlocal,N_Vector cc,N_Vector fval,void * user_data)397 static int func_local(sunindextype Nlocal, N_Vector cc, N_Vector fval, void *user_data)
398 {
399   realtype xx, yy, *cxy, *rxy, *fxy, dcydi, dcyui, dcxli, dcxri;
400   realtype *cext, dely, delx, *cdata;
401   int i, jx, jy, is, ly;
402   int isubx, isuby, nsmxsub, nsmxsub2;
403   int shifty, offsetc, offsetce, offsetcl, offsetcr, offsetcd, offsetcu;
404   UserData data;
405 
406   data = (UserData)user_data;
407   cdata = N_VGetArrayPointer(cc);
408 
409   /* Get subgrid indices, data sizes, extended work array cext */
410   isubx = data->isubx;   isuby = data->isuby;
411   nsmxsub = data->nsmxsub; nsmxsub2 = data->nsmxsub2;
412   cext = data->cext;
413 
414   /* Copy local segment of cc vector into the working extended array cext */
415   offsetc = 0;
416   offsetce = nsmxsub2 + NUM_SPECIES;
417   for (ly = 0; ly < MYSUB; ly++) {
418     for (i = 0; i < nsmxsub; i++) cext[offsetce+i] = cdata[offsetc+i];
419     offsetc = offsetc + nsmxsub;
420     offsetce = offsetce + nsmxsub2;
421   }
422 
423   /* To facilitate homogeneous Neumann boundary conditions, when this is a
424      boundary PE, copy data from the first interior mesh line of cc to cext */
425 
426   /* If isuby = 0, copy x-line 2 of cc to cext */
427   if (isuby == 0) {
428     for (i = 0; i < nsmxsub; i++) cext[NUM_SPECIES+i] = cdata[nsmxsub+i];
429   }
430 
431   /* If isuby = NPEY-1, copy x-line MYSUB-1 of cc to cext */
432   if (isuby == NPEY-1) {
433     offsetc = (MYSUB-2)*nsmxsub;
434     offsetce = (MYSUB+1)*nsmxsub2 + NUM_SPECIES;
435     for (i = 0; i < nsmxsub; i++) cext[offsetce+i] = cdata[offsetc+i];
436   }
437 
438   /* If isubx = 0, copy y-line 2 of cc to cext */
439   if (isubx == 0) {
440     for (ly = 0; ly < MYSUB; ly++) {
441       offsetc = ly*nsmxsub + NUM_SPECIES;
442       offsetce = (ly+1)*nsmxsub2;
443       for (i = 0; i < NUM_SPECIES; i++) cext[offsetce+i] = cdata[offsetc+i];
444     }
445   }
446 
447   /* If isubx = NPEX-1, copy y-line MXSUB-1 of cc to cext */
448   if (isubx == NPEX-1) {
449     for (ly = 0; ly < MYSUB; ly++) {
450       offsetc = (ly+1)*nsmxsub - 2*NUM_SPECIES;
451       offsetce = (ly+2)*nsmxsub2 - NUM_SPECIES;
452       for (i = 0; i < NUM_SPECIES; i++) cext[offsetce+i] = cdata[offsetc+i];
453     }
454   }
455 
456   /* Loop over all mesh points, evaluating rate arra at each point */
457   delx = data->dx;
458   dely = data->dy;
459   shifty = (MXSUB+2)*NUM_SPECIES;
460 
461   for (jy = 0; jy < MYSUB; jy++) {
462 
463     yy = dely*(jy + isuby * MYSUB);
464 
465     for (jx = 0; jx < MXSUB; jx++) {
466 
467       xx = delx * (jx + isubx * MXSUB);
468       cxy = IJ_Vptr(cc,jx,jy);
469       rxy = IJ_Vptr(data->rates,jx,jy);
470       fxy = IJ_Vptr(fval,jx,jy);
471 
472       WebRate(xx, yy, cxy, rxy, user_data);
473 
474       offsetc = (jx+1)*NUM_SPECIES + (jy+1)*NSMXSUB2;
475       offsetcd = offsetc - shifty;
476       offsetcu = offsetc + shifty;
477       offsetcl = offsetc - NUM_SPECIES;
478       offsetcr = offsetc + NUM_SPECIES;
479 
480       for (is = 0; is < NUM_SPECIES; is++) {
481 
482         /* differencing in x */
483         dcydi = cext[offsetc+is]  - cext[offsetcd+is];
484         dcyui = cext[offsetcu+is] - cext[offsetc+is];
485 
486         /* differencing in y */
487         dcxli = cext[offsetc+is]  - cext[offsetcl+is];
488         dcxri = cext[offsetcr+is] - cext[offsetc+is];
489 
490         /* compute the value at xx , yy */
491         fxy[is] = (coy)[is] * (dcyui - dcydi) +
492           (cox)[is] * (dcxri - dcxli) + rxy[is];
493 
494       } /* end of is loop */
495 
496     } /* end of jx loop */
497 
498   } /* end of jy loop */
499 
500   return(0);
501 }
502 
503 /*
504  * System function routine.  Evaluate f(cc).  First call ccomm to do
505  * communication of subgrid boundary data into cext.  Then calculate f
506  * by a call to func_local.
507  */
508 
func(N_Vector cc,N_Vector fval,void * user_data)509 static int func(N_Vector cc, N_Vector fval, void *user_data)
510 {
511   UserData data;
512 
513   data = (UserData) user_data;
514 
515   /* Call ccomm to do inter-processor communicaiton */
516   ccomm(data->Nlocal, cc, data);
517 
518   /* Call func_local to calculate all right-hand sides */
519   func_local(data->Nlocal, cc, fval, data);
520 
521   return(0);
522 }
523 
524 /*
525  * Interaction rate function routine
526  */
527 
WebRate(realtype xx,realtype yy,realtype * cxy,realtype * ratesxy,void * user_data)528 static void WebRate(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
529                     void *user_data)
530 {
531   int i;
532   realtype fac;
533   UserData data;
534 
535   data = (UserData)user_data;
536 
537   for (i = 0; i<NUM_SPECIES; i++)
538     ratesxy[i] = DotProd(NUM_SPECIES, cxy, acoef[i]);
539 
540   fac = ONE + ALPHA * xx * yy;
541 
542   for (i = 0; i < NUM_SPECIES; i++)
543     ratesxy[i] = cxy[i] * ( bcoef[i] * fac + ratesxy[i] );
544 }
545 
546 /*
547  * Dot product routine for realtype arrays
548  */
549 
DotProd(int size,realtype * x1,realtype * x2)550 static realtype DotProd(int size, realtype *x1, realtype *x2)
551 {
552   int i;
553   realtype *xx1, *xx2, temp = ZERO;
554 
555   xx1 = x1; xx2 = x2;
556   for (i = 0; i < size; i++) temp += (*xx1++) * (*xx2++);
557 
558   return(temp);
559 }
560 
561 /*
562  *--------------------------------------------------------------------
563  * PRIVATE FUNCTIONS
564  *--------------------------------------------------------------------
565  */
566 
567 /*
568  * Allocate memory for data structure of type UserData
569  */
570 
AllocUserData(void)571 static UserData AllocUserData(void)
572 {
573   UserData data;
574 
575   data = (UserData) malloc(sizeof *data);
576 
577   acoef = newDenseMat(NUM_SPECIES, NUM_SPECIES);
578   bcoef = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
579   cox   = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
580   coy   = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
581 
582   return(data);
583 }
584 
585 /*
586  * Load problem constants in data
587  */
588 
InitUserData(int my_pe,sunindextype Nlocal,MPI_Comm comm,UserData data)589 static void InitUserData(int my_pe, sunindextype Nlocal, MPI_Comm comm, UserData data)
590 {
591   int i, j, np;
592   realtype *a1,*a2, *a3, *a4, dx2, dy2;
593 
594   data->mx = MX;
595   data->my = MY;
596   data->ns = NUM_SPECIES;
597   data->np = NUM_SPECIES/2;
598   data->ax = AX;
599   data->ay = AY;
600   data->dx = (data->ax)/(MX-1);
601   data->dy = (data->ay)/(MY-1);
602   data->my_pe = my_pe;
603   data->Nlocal = Nlocal;
604   data->comm = comm;
605   data->isuby = my_pe/NPEX;
606   data->isubx = my_pe - data->isuby*NPEX;
607   data->nsmxsub = NUM_SPECIES * MXSUB;
608   data->nsmxsub2 = NUM_SPECIES * (MXSUB+2);
609 
610   /* Set up the coefficients a and b plus others found in the equations */
611   np = data->np;
612 
613   dx2=(data->dx)*(data->dx); dy2=(data->dy)*(data->dy);
614 
615   for (i = 0; i < np; i++) {
616     a1= &(acoef[i][np]);
617     a2= &(acoef[i+np][0]);
618     a3= &(acoef[i][0]);
619     a4= &(acoef[i+np][np]);
620 
621     /*  Fill in the portion of acoef in the four quadrants, row by row */
622     for (j = 0; j < np; j++) {
623       *a1++ =  -GG;
624       *a2++ =   EE;
625       *a3++ = ZERO;
626       *a4++ = ZERO;
627     }
628 
629     /* and then change the diagonal elements of acoef to -AA */
630     acoef[i][i]=-AA;
631     acoef[i+np][i+np] = -AA;
632 
633     bcoef[i] = BB;
634     bcoef[i+np] = -BB;
635 
636     cox[i]=DPREY/dx2;
637     cox[i+np]=DPRED/dx2;
638 
639     coy[i]=DPREY/dy2;
640     coy[i+np]=DPRED/dy2;
641   }
642 }
643 
644 /*
645  * Free data memory
646  */
647 
FreeUserData(UserData data)648 static void FreeUserData(UserData data)
649 {
650 
651   destroyMat(acoef);
652   free(bcoef);
653   free(cox); free(coy);
654   N_VDestroy(data->rates);
655 
656   free(data);
657 
658 }
659 
660 /*
661  * Set initial conditions in cc
662  */
663 
SetInitialProfiles(N_Vector cc,N_Vector sc)664 static void SetInitialProfiles(N_Vector cc, N_Vector sc)
665 {
666   int i, jx, jy;
667   realtype *cloc, *sloc;
668   realtype  ctemp[NUM_SPECIES], stemp[NUM_SPECIES];
669 
670   /* Initialize arrays ctemp and stemp used in the loading process */
671   for (i = 0; i < NUM_SPECIES/2; i++) {
672     ctemp[i] = PREYIN;
673     stemp[i] = ONE;
674   }
675   for (i = NUM_SPECIES/2; i < NUM_SPECIES; i++) {
676     ctemp[i] = PREDIN;
677     stemp[i] = RCONST(0.00001);
678   }
679 
680   /* Load initial profiles into cc and sc vector from ctemp and stemp. */
681   for (jy = 0; jy < MYSUB; jy++) {
682     for (jx=0; jx < MXSUB; jx++) {
683       cloc = IJ_Vptr(cc,jx,jy);
684       sloc = IJ_Vptr(sc,jx,jy);
685       for (i = 0; i < NUM_SPECIES; i++){
686         cloc[i] = ctemp[i];
687         sloc[i] = stemp[i];
688       }
689     }
690   }
691 
692 }
693 
694 /*
695  * Print first lines of output (problem description)
696  */
697 
PrintHeader(int globalstrategy,int maxl,int maxlrst,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep,realtype fnormtol,realtype scsteptol)698 static void PrintHeader(int globalstrategy, int maxl, int maxlrst,
699                         sunindextype mudq, sunindextype mldq,
700 			sunindextype mukeep, sunindextype mlkeep,
701                         realtype fnormtol, realtype scsteptol)
702 {
703     printf("\nPredator-prey test problem--  KINSol (parallel-BBD version)\n\n");
704     printf("Mesh dimensions = %d X %d\n", MX, MY);
705     printf("Number of species = %d\n", NUM_SPECIES);
706     printf("Total system size = %d\n\n", NEQ);
707     printf("Subgrid dimensions = %d X %d\n", MXSUB, MYSUB);
708     printf("Processor array is %d X %d\n\n", NPEX, NPEY);
709     printf("Flag globalstrategy = %d (0 = None, 1 = Linesearch)\n",
710            globalstrategy);
711     printf("Linear solver is SPGMR with maxl = %d, maxlrst = %d\n",
712            maxl, maxlrst);
713     printf("Preconditioning uses band-block-diagonal matrix from KINBBDPRE\n");
714     printf("  Difference quotient half-bandwidths: mudq = %ld, mldq = %ld\n",
715 	   (long int) mudq, (long int) mldq);
716     printf("  Retained band block half-bandwidths: mukeep = %ld, mlkeep = %ld\n",
717 	   (long int) mukeep, (long int) mlkeep);
718 #if defined(SUNDIALS_EXTENDED_PRECISION)
719     printf("Tolerance parameters:  fnormtol = %Lg   scsteptol = %Lg\n",
720            fnormtol, scsteptol);
721 #elif defined(SUNDIALS_DOUBLE_PRECISION)
722     printf("Tolerance parameters:  fnormtol = %g   scsteptol = %g\n",
723            fnormtol, scsteptol);
724 #else
725     printf("Tolerance parameters:  fnormtol = %g   scsteptol = %g\n",
726            fnormtol, scsteptol);
727 #endif
728 
729     printf("\nInitial profile of concentration\n");
730 #if defined(SUNDIALS_EXTENDED_PRECISION)
731     printf("At all mesh points:  %Lg %Lg %Lg   %Lg %Lg %Lg\n",
732            PREYIN,PREYIN,PREYIN, PREDIN,PREDIN,PREDIN);
733 #elif defined(SUNDIALS_DOUBLE_PRECISION)
734     printf("At all mesh points:  %g %g %g   %g %g %g\n",
735            PREYIN,PREYIN,PREYIN, PREDIN,PREDIN,PREDIN);
736 #else
737     printf("At all mesh points:  %g %g %g   %g %g %g\n", PREYIN,PREYIN,PREYIN,
738            PREDIN,PREDIN,PREDIN);
739 #endif
740 }
741 
742 /*
743  * Print sample of current cc values
744  */
745 
PrintOutput(int my_pe,MPI_Comm comm,N_Vector cc)746 static void PrintOutput(int my_pe, MPI_Comm comm, N_Vector cc)
747 {
748   int is, i0, npelast;
749   realtype  *ct, tempc[NUM_SPECIES];
750   MPI_Status status;
751 
752   npelast = NPEX*NPEY - 1;
753 
754   ct = N_VGetArrayPointer(cc);
755 
756   /* Send the cc values (for all species) at the top right mesh point to PE 0 */
757   if (my_pe == npelast) {
758     i0 = NUM_SPECIES*(MXSUB*MYSUB-1);
759     if (npelast!=0)
760       MPI_Send(&ct[i0],NUM_SPECIES,MPI_SUNREALTYPE,0,0,comm);
761     else  /* single processor case */
762       for (is = 0; is < NUM_SPECIES; is++) tempc[is]=ct[i0+is];
763   }
764 
765   /* On PE 0, receive the cc values at top right, then print performance data
766      and sampled solution values */
767   if (my_pe == 0) {
768 
769     if (npelast != 0)
770       MPI_Recv(&tempc[0],NUM_SPECIES,MPI_SUNREALTYPE,npelast,0,comm,&status);
771 
772     printf("\nAt bottom left:");
773     for (is = 0; is < NUM_SPECIES; is++){
774       if ((is%6)*6== is) printf("\n");
775 #if defined(SUNDIALS_EXTENDED_PRECISION)
776       printf(" %Lg",ct[is]);
777 #elif defined(SUNDIALS_DOUBLE_PRECISION)
778       printf(" %g",ct[is]);
779 #else
780       printf(" %g",ct[is]);
781 #endif
782     }
783 
784     printf("\n\nAt top right:");
785     for (is = 0; is < NUM_SPECIES; is++) {
786       if ((is%6)*6 == is) printf("\n");
787 #if defined(SUNDIALS_EXTENDED_PRECISION)
788       printf(" %Lg",tempc[is]);
789 #elif defined(SUNDIALS_DOUBLE_PRECISION)
790       printf(" %g",tempc[is]);
791 #else
792       printf(" %g",tempc[is]);
793 #endif
794     }
795     printf("\n\n");
796   }
797 }
798 
799 /*
800  * Print final statistics contained in iopt
801  */
802 
PrintFinalStats(void * kmem)803 static void PrintFinalStats(void *kmem)
804 {
805   long int nni, nfe, nli, npe, nps, ncfl, nfeSG;
806   int flag;
807 
808   flag = KINGetNumNonlinSolvIters(kmem, &nni);
809   check_flag(&flag, "KINGetNumNonlinSolvIters", 1, 0);
810   flag = KINGetNumFuncEvals(kmem, &nfe);
811   check_flag(&flag, "KINGetNumFuncEvals", 1, 0);
812   flag = KINGetNumLinIters(kmem, &nli);
813   check_flag(&flag, "KINGetNumLinIters", 1, 0);
814   flag = KINGetNumPrecEvals(kmem, &npe);
815   check_flag(&flag, "KINGetNumPrecEvals", 1, 0);
816   flag = KINGetNumPrecSolves(kmem, &nps);
817   check_flag(&flag, "KINGetNumPrecSolves", 1, 0);
818   flag = KINGetNumLinConvFails(kmem, &ncfl);
819   check_flag(&flag, "KINGetNumLinConvFails", 1, 0);
820   flag = KINGetNumLinFuncEvals(kmem, &nfeSG);
821   check_flag(&flag, "KINGetNumLinFuncEvals", 1, 0);
822 
823   printf("Final Statistics.. \n");
824   printf("nni    = %5ld    nli   = %5ld\n", nni, nli);
825   printf("nfe    = %5ld    nfeSG = %5ld\n", nfe, nfeSG);
826   printf("nps    = %5ld    npe   = %5ld     ncfl  = %5ld\n", nps, npe, ncfl);
827 
828 }
829 
830 /*
831  * Routine to send boundary data to neighboring PEs
832  */
833 
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,int dsizex,int dsizey,realtype * cdata)834 static void BSend(MPI_Comm comm, int my_pe,
835                   int isubx, int isuby,
836                   int dsizex, int dsizey, realtype *cdata)
837 {
838   int i, ly;
839   int offsetc, offsetbuf;
840   realtype bufleft[NUM_SPECIES*MYSUB], bufright[NUM_SPECIES*MYSUB];
841 
842   /* If isuby > 0, send data from bottom x-line of u */
843   if (isuby != 0)
844     MPI_Send(&cdata[0], dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
845 
846   /* If isuby < NPEY-1, send data from top x-line of u */
847   if (isuby != NPEY-1) {
848     offsetc = (MYSUB-1)*dsizex;
849     MPI_Send(&cdata[offsetc], dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
850   }
851 
852   /* If isubx > 0, send data from left y-line of u (via bufleft) */
853   if (isubx != 0) {
854     for (ly = 0; ly < MYSUB; ly++) {
855       offsetbuf = ly*NUM_SPECIES;
856       offsetc = ly*dsizex;
857       for (i = 0; i < NUM_SPECIES; i++)
858         bufleft[offsetbuf+i] = cdata[offsetc+i];
859     }
860     MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
861   }
862 
863   /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
864   if (isubx != NPEX-1) {
865     for (ly = 0; ly < MYSUB; ly++) {
866       offsetbuf = ly*NUM_SPECIES;
867       offsetc = offsetbuf*MXSUB + (MXSUB-1)*NUM_SPECIES;
868       for (i = 0; i < NUM_SPECIES; i++)
869         bufright[offsetbuf+i] = cdata[offsetc+i];
870     }
871     MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
872   }
873 }
874 
875 /*
876  * Routine to start receiving boundary data from neighboring PEs.
877  *  Notes:
878  *  1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
879  *     should be passed to both the BRecvPost and BRecvWait functions, and
880  *     should not be manipulated between the two calls.
881  *  2) request should have 4 entries, and should be passed in both calls also.
882  */
883 
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,int dsizex,int dsizey,realtype * cext,realtype * buffer)884 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
885                       int isubx, int isuby,
886                       int dsizex, int dsizey,
887                       realtype *cext, realtype *buffer)
888 {
889   int offsetce;
890 
891   /* Have bufleft and bufright use the same buffer */
892   realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
893 
894   /* If isuby > 0, receive data for bottom x-line of cext */
895   if (isuby != 0)
896     MPI_Irecv(&cext[NUM_SPECIES], dsizex, MPI_SUNREALTYPE,
897               my_pe-NPEX, 0, comm, &request[0]);
898 
899   /* If isuby < NPEY-1, receive data for top x-line of cext */
900   if (isuby != NPEY-1) {
901     offsetce = NUM_SPECIES*(1 + (MYSUB+1)*(MXSUB+2));
902     MPI_Irecv(&cext[offsetce], dsizex, MPI_SUNREALTYPE,
903               my_pe+NPEX, 0, comm, &request[1]);
904   }
905 
906   /* If isubx > 0, receive data for left y-line of cext (via bufleft) */
907   if (isubx != 0) {
908     MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
909               my_pe-1, 0, comm, &request[2]);
910   }
911 
912   /* If isubx < NPEX-1, receive data for right y-line of cext (via bufright) */
913   if (isubx != NPEX-1) {
914     MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
915               my_pe+1, 0, comm, &request[3]);
916   }
917 }
918 
919 /*
920  * Routine to finish receiving boundary data from neighboring PEs.
921  *  Notes:
922  *  1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
923  *     should be passed to both the BRecvPost and BRecvWait functions, and
924  *     should not be manipulated between the two calls.
925  *  2) request should have 4 entries, and should be passed in both calls also.
926  */
927 
BRecvWait(MPI_Request request[],int isubx,int isuby,int dsizex,realtype * cext,realtype * buffer)928 static void BRecvWait(MPI_Request request[], int isubx,
929                       int isuby, int dsizex, realtype *cext,
930                       realtype *buffer)
931 {
932   int i, ly;
933   int dsizex2, offsetce, offsetbuf;
934   realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
935   MPI_Status status;
936 
937   dsizex2 = dsizex + 2*NUM_SPECIES;
938 
939   /* If isuby > 0, receive data for bottom x-line of cext */
940   if (isuby != 0)
941     MPI_Wait(&request[0],&status);
942 
943   /* If isuby < NPEY-1, receive data for top x-line of cext */
944   if (isuby != NPEY-1)
945     MPI_Wait(&request[1],&status);
946 
947   /* If isubx > 0, receive data for left y-line of cext (via bufleft) */
948   if (isubx != 0) {
949     MPI_Wait(&request[2],&status);
950 
951     /* Copy the buffer to cext */
952     for (ly = 0; ly < MYSUB; ly++) {
953       offsetbuf = ly*NUM_SPECIES;
954       offsetce = (ly+1)*dsizex2;
955       for (i = 0; i < NUM_SPECIES; i++)
956         cext[offsetce+i] = bufleft[offsetbuf+i];
957     }
958   }
959 
960   /* If isubx < NPEX-1, receive data for right y-line of cext (via bufright) */
961   if (isubx != NPEX-1) {
962     MPI_Wait(&request[3],&status);
963 
964     /* Copy the buffer to cext */
965     for (ly = 0; ly < MYSUB; ly++) {
966       offsetbuf = ly*NUM_SPECIES;
967       offsetce = (ly+2)*dsizex2 - NUM_SPECIES;
968       for (i = 0; i < NUM_SPECIES; i++)
969         cext[offsetce+i] = bufright[offsetbuf+i];
970     }
971   }
972 }
973 /*
974  * Check function return value...
975  *    opt == 0 means SUNDIALS function allocates memory so check if
976  *             returned NULL pointer
977  *    opt == 1 means SUNDIALS function returns a flag so check if
978  *             flag >= 0
979  *    opt == 2 means function allocates memory so check if returned
980  *             NULL pointer
981  */
982 
check_flag(void * flagvalue,const char * funcname,int opt,int id)983 static int check_flag(void *flagvalue, const char *funcname, int opt, int id)
984 {
985   int *errflag;
986 
987   /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
988   if (opt == 0 && flagvalue == NULL) {
989     fprintf(stderr,
990             "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
991 	    id, funcname);
992     return(1);
993   }
994 
995   /* Check if flag < 0 */
996   else if (opt == 1) {
997     errflag = (int *) flagvalue;
998     if (*errflag < 0) {
999       fprintf(stderr,
1000               "\nSUNDIALS_ERROR(%d): %s() failed with flag = %d\n\n",
1001 	      id, funcname, *errflag);
1002       return(1);
1003     }
1004   }
1005 
1006   /* Check if function returned NULL pointer - no memory allocated */
1007   else if (opt == 2 && flagvalue == NULL) {
1008     fprintf(stderr,
1009             "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
1010 	    id, funcname);
1011     return(1);
1012   }
1013 
1014   return(0);
1015 }
1016