1 /* -----------------------------------------------------------------
2 * Programmer(s): Allan Taylor, Alan Hindmarsh and
3 * Radu Serban @ LLNL
4 * -----------------------------------------------------------------
5 * SUNDIALS Copyright Start
6 * Copyright (c) 2002-2021, Lawrence Livermore National Security
7 * and Southern Methodist University.
8 * All rights reserved.
9 *
10 * See the top-level LICENSE and NOTICE files for details.
11 *
12 * SPDX-License-Identifier: BSD-3-Clause
13 * SUNDIALS Copyright End
14 * -----------------------------------------------------------------
15 * Example problem for KINSOL (parallel machine case) using the BBD
16 * preconditioner.
17 *
18 * This example solves a nonlinear system that arises from a system
19 * of partial differential equations. The PDE system is a food web
20 * population model, with predator-prey interaction and diffusion on
21 * the unit square in two dimensions. The dependent variable vector
22 * is the following:
23 *
24 * 1 2 ns
25 * c = (c , c , ..., c ) (denoted by the variable cc)
26 *
27 * and the PDE's are as follows:
28 *
29 * i i
30 * 0 = d(i)*(c + c ) + f (x,y,c) (i=1,...,ns)
31 * xx yy i
32 *
33 * where
34 *
35 * i ns j
36 * f (x,y,c) = c * (b(i) + sum a(i,j)*c )
37 * i j=1
38 *
39 * The number of species is ns = 2 * np, with the first np being
40 * prey and the last np being predators. The number np is both the
41 * number of prey and predator species. The coefficients a(i,j),
42 * b(i), d(i) are:
43 *
44 * a(i,i) = -AA (all i)
45 * a(i,j) = -GG (i <= np , j > np)
46 * a(i,j) = EE (i > np, j <= np)
47 * b(i) = BB * (1 + alpha * x * y) (i <= np)
48 * b(i) =-BB * (1 + alpha * x * y) (i > np)
49 * d(i) = DPREY (i <= np)
50 * d(i) = DPRED ( i > np)
51 *
52 * The various scalar parameters are set using define's or in
53 * routine InitUserData.
54 *
55 * The boundary conditions are: normal derivative = 0, and the
56 * initial guess is constant in x and y, but the final solution
57 * is not.
58 *
59 * The PDEs are discretized by central differencing on an MX by
60 * MY mesh.
61 *
62 * The nonlinear system is solved by KINSOL using the method
63 * specified in the local variable globalstrat.
64 *
65 * The preconditioner matrix is a band-block-diagonal matrix
66 * using the KINBBDPRE module. The half-bandwidths are as follows:
67 *
68 * Difference quotient half-bandwidths mldq = mudq = 2*ns - 1
69 * Retained banded blocks have half-bandwidths mlkeep = mukeep = ns.
70 *
71 * -----------------------------------------------------------------
72 * References:
73 *
74 * 1. Peter N. Brown and Youcef Saad,
75 * Hybrid Krylov Methods for Nonlinear Systems of Equations
76 * LLNL report UCRL-97645, November 1987.
77 *
78 * 2. Peter N. Brown and Alan C. Hindmarsh,
79 * Reduced Storage Matrix Methods in Stiff ODE systems,
80 * Lawrence Livermore National Laboratory Report UCRL-95088,
81 * Rev. 1, June 1987, and Journal of Applied Mathematics and
82 * Computation, Vol. 31 (May 1989), pp. 40-91. (Presents a
83 * description of the time-dependent version of this test
84 * problem.)
85 * --------------------------------------------------------------------------
86 * Run command line: mpirun -np N -machinefile machines kinFoodWeb_kry_bbd_p
87 * where N = NPEX * NPEY is the number of processors.
88 * --------------------------------------------------------------------------
89 */
90
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <math.h>
94
95 #include <kinsol/kinsol.h> /* access to KINSOL func., consts. */
96 #include <nvector/nvector_parallel.h> /* access to MPI parallel N_Vector */
97 #include <sunlinsol/sunlinsol_spgmr.h> /* access to SPGMR SUNLinearSolver */
98 #include <kinsol/kinsol_bbdpre.h> /* access to BBD preconditioner */
99 #include <sundials/sundials_dense.h> /* use generic dense solver in precond. */
100 #include <sundials/sundials_types.h> /* defs. of realtype, sunindextype */
101 #include <sundials/sundials_math.h> /* access to SUNMAX, SUNRabs, SUNRsqrt */
102
103 #include <mpi.h>
104
105 /* Problem Constants */
106
107 #define NUM_SPECIES 6 /* must equal 2*(number of prey or predators)
108 number of prey = number of predators */
109
110 #define PI RCONST(3.1415926535898) /* pi */
111
112 #define NPEX 2 /* number of processors in the x-direction */
113 #define NPEY 2 /* number of processors in the y-direction */
114 #define MXSUB 10 /* number of x mesh points per subgrid */
115 #define MYSUB 10 /* number of y mesh points per subgrid */
116 #define MX (NPEX*MXSUB) /* number of mesh points in the x-direction */
117 #define MY (NPEY*MYSUB) /* number of mesh points in the y-direction */
118 #define NSMXSUB (NUM_SPECIES * MXSUB)
119 #define NSMXSUB2 (NUM_SPECIES * (MXSUB+2))
120 #define NEQ (NUM_SPECIES*MX*MY) /* number of equations in the system */
121 #define AA RCONST(1.0) /* value of coefficient AA in above eqns */
122 #define EE RCONST(10000.) /* value of coefficient EE in above eqns */
123 #define GG RCONST(0.5e-6) /* value of coefficient GG in above eqns */
124 #define BB RCONST(1.0) /* value of coefficient BB in above eqns */
125 #define DPREY RCONST(1.0) /* value of coefficient dprey above */
126 #define DPRED RCONST(0.5) /* value of coefficient dpred above */
127 #define ALPHA RCONST(1.0) /* value of coefficient alpha above */
128 #define AX RCONST(1.0) /* total range of x variable */
129 #define AY RCONST(1.0) /* total range of y variable */
130 #define FTOL RCONST(1.e-7) /* ftol tolerance */
131 #define STOL RCONST(1.e-13) /* stol tolerance */
132 #define THOUSAND RCONST(1000.0) /* one thousand */
133 #define ZERO RCONST(0.0) /* 0. */
134 #define ONE RCONST(1.0) /* 1. */
135 #define PREYIN RCONST(1.0) /* initial guess for prey concentrations. */
136 #define PREDIN RCONST(30000.0)/* initial guess for predator concs. */
137
138 /* User-defined vector access macro: IJ_Vptr */
139
140 /* IJ_Vptr is defined in order to translate from the underlying 3D structure
141 of the dependent variable vector to the 1D storage scheme for an N-vector.
142 IJ_Vptr(vv,i,j) returns a pointer to the location in vv corresponding to
143 indices is = 0, jx = i, jy = j. */
144
145 #define IJ_Vptr(vv,i,j) (&NV_Ith_P(vv, i*NUM_SPECIES + j*NSMXSUB))
146
147 /* Type : UserData
148 contains problem constants and extended array */
149
150 typedef struct {
151 realtype **acoef, *bcoef;
152 N_Vector rates;
153 realtype *cox, *coy;
154 realtype ax, ay, dx, dy;
155 sunindextype Nlocal;
156 int mx, my, ns, np;
157 realtype cext[NUM_SPECIES * (MXSUB+2)*(MYSUB+2)];
158 int my_pe, isubx, isuby, nsmxsub, nsmxsub2;
159 MPI_Comm comm;
160 } *UserData;
161
162 /* Functions called by the KINSOL Solver */
163
164 static int func(N_Vector cc, N_Vector fval, void *user_data);
165
166 static int ccomm(sunindextype Nlocal, N_Vector cc, void *data);
167
168 static int func_local(sunindextype Nlocal, N_Vector cc, N_Vector fval, void *user_data);
169
170 /* Private Helper Functions */
171
172 static UserData AllocUserData(void);
173 static void InitUserData(int my_pe, sunindextype Nlocal, MPI_Comm comm, UserData data);
174 static void FreeUserData(UserData data);
175 static void SetInitialProfiles(N_Vector cc, N_Vector sc);
176 static void PrintHeader(int globalstrategy, int maxl, int maxlrst,
177 sunindextype mudq, sunindextype mldq,
178 sunindextype mukeep, sunindextype mlkeep,
179 realtype fnormtol, realtype scsteptol);
180 static void PrintOutput(int my_pe, MPI_Comm comm, N_Vector cc);
181 static void PrintFinalStats(void *kmem);
182 static void WebRate(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
183 void *user_data);
184 static realtype DotProd(int size, realtype *x1, realtype *x2);
185 static void BSend(MPI_Comm comm, int my_pe, int isubx,
186 int isuby, int dsizex, int dsizey,
187 realtype *cdata);
188 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
189 int isubx, int isuby,
190 int dsizex, int dsizey,
191 realtype *cext, realtype *buffer);
192 static void BRecvWait(MPI_Request request[], int isubx,
193 int isuby, int dsizex, realtype *cext,
194 realtype *buffer);
195 static int check_flag(void *flagvalue, const char *funcname, int opt, int id);
196
197 /*
198 *--------------------------------------------------------------------
199 * MAIN PROGRAM
200 *--------------------------------------------------------------------
201 */
202
main(int argc,char * argv[])203 int main(int argc, char *argv[])
204 {
205 int globalstrategy;
206 sunindextype Nlocal;
207 realtype fnormtol, scsteptol, dq_rel_uu;
208 N_Vector cc, sc, constraints;
209 UserData data;
210 int flag, maxl, maxlrst;
211 int my_pe, npes, npelast = NPEX*NPEY-1;
212 void *kmem;
213 SUNLinearSolver LS;
214 sunindextype mudq, mldq, mukeep, mlkeep;
215 MPI_Comm comm;
216
217 cc = sc = constraints = NULL;
218 kmem = NULL;
219 LS = NULL;
220 data = NULL;
221
222 /* Get processor number and total number of pe's */
223 MPI_Init(&argc, &argv);
224 comm = MPI_COMM_WORLD;
225 MPI_Comm_size(comm, &npes);
226 MPI_Comm_rank(comm, &my_pe);
227
228 if (npes != NPEX*NPEY) {
229 if (my_pe == 0)
230 fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
231 npes,NPEX*NPEY);
232 MPI_Finalize();
233 return(1);
234 }
235
236 /* Allocate memory, and set problem data, initial values, tolerances */
237
238 /* Set local vector length */
239 Nlocal = NUM_SPECIES*MXSUB*MYSUB;
240
241 /* Allocate and initialize user data block */
242 data = AllocUserData();
243 if (check_flag((void *)data, "AllocUserData", 2, my_pe)) MPI_Abort(comm, 1);
244 InitUserData(my_pe, Nlocal, comm, data);
245
246 /* Set global strategy flag */
247 globalstrategy = KIN_NONE;
248
249 /* Allocate and initialize vectors */
250 cc = N_VNew_Parallel(comm, Nlocal, NEQ);
251 if (check_flag((void *)cc, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
252 sc = N_VNew_Parallel(comm, Nlocal, NEQ);
253 if (check_flag((void *)sc, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
254 data->rates = N_VNew_Parallel(comm, Nlocal, NEQ);
255 if (check_flag((void *)data->rates, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
256 constraints = N_VNew_Parallel(comm, Nlocal, NEQ);
257 if (check_flag((void *)constraints, "N_VNew_Parallel", 0, my_pe)) MPI_Abort(comm, 1);
258 N_VConst(ZERO, constraints);
259
260 SetInitialProfiles(cc, sc);
261
262 fnormtol = FTOL; scsteptol = STOL;
263
264 /* Call KINCreate/KINInit to initialize KINSOL:
265 nvSpec points to machine environment data
266 A pointer to KINSOL problem memory is returned and stored in kmem. */
267 kmem = KINCreate();
268 if (check_flag((void *)kmem, "KINCreate", 0, my_pe)) MPI_Abort(comm, 1);
269
270 /* Vector cc passed as template vector. */
271 flag = KINInit(kmem, func, cc);
272 if (check_flag(&flag, "KINInit", 1, my_pe)) MPI_Abort(comm, 1);
273 flag = KINSetUserData(kmem, data);
274 if (check_flag(&flag, "KINSetUserData", 1, my_pe)) MPI_Abort(comm, 1);
275 flag = KINSetConstraints(kmem, constraints);
276 if (check_flag(&flag, "KINSetConstraints", 1, my_pe)) MPI_Abort(comm, 1);
277 flag = KINSetFuncNormTol(kmem, fnormtol);
278 if (check_flag(&flag, "KINSetFuncNormTol", 1, my_pe)) MPI_Abort(comm, 1);
279 flag = KINSetScaledStepTol(kmem, scsteptol);
280 if (check_flag(&flag, "KINSetScaledStepTol", 1, my_pe)) MPI_Abort(comm, 1);
281
282 /* We no longer need the constraints vector since KINSetConstraints
283 creates a private copy for KINSOL to use. */
284 N_VDestroy(constraints);
285
286 /* Create SUNLinSol_SPGMR object with right preconditioning and the
287 maximum Krylov dimension maxl */
288 maxl = 20;
289 LS = SUNLinSol_SPGMR(cc, PREC_RIGHT, maxl);
290 if(check_flag((void *)LS, "SUNLinSol_SPGMR", 0, my_pe)) MPI_Abort(comm, 1);
291
292 /* Attach the linear solver to KINSOL */
293 flag = KINSetLinearSolver(kmem, LS, NULL);
294 if (check_flag(&flag, "KINSetLinearSolver", 1, my_pe)) MPI_Abort(comm, 1);
295
296 /* Set the maximum number of restarts */
297 maxlrst = 2;
298 flag = SUNLinSol_SPGMRSetMaxRestarts(LS, maxlrst);
299 if (check_flag(&flag, "SUNLinSol_SPGMRSetMaxRestarts", 1, my_pe)) MPI_Abort(comm, 1);
300
301 /* Call KINBBDPrecInit to initialize and allocate memory for the
302 band-block-diagonal preconditioner, and specify the local and
303 communication functions func_local and gcomm=NULL (all communication
304 needed for the func_local is already done in func). */
305 dq_rel_uu = ZERO;
306 mudq = mldq = 2*NUM_SPECIES - 1;
307 mukeep = mlkeep = NUM_SPECIES;
308
309 /* Initialize BBD preconditioner */
310 flag = KINBBDPrecInit(kmem, Nlocal, mudq, mldq, mukeep, mlkeep,
311 dq_rel_uu, func_local, NULL);
312 if (check_flag(&flag, "KINBBDPrecInit", 1, my_pe)) MPI_Abort(comm, 1);
313
314 /* Print out the problem size, solution parameters, initial guess. */
315 if (my_pe == 0)
316 PrintHeader(globalstrategy, maxl, maxlrst, mudq, mldq, mukeep,
317 mlkeep, fnormtol, scsteptol);
318
319 /* Call KINSol and print output concentration profile */
320 flag = KINSol(kmem, /* KINSol memory block */
321 cc, /* initial guess on input; solution vector */
322 globalstrategy, /* global strategy choice */
323 sc, /* scaling vector for the variable cc */
324 sc); /* scaling vector for function values fval */
325 if (check_flag(&flag, "KINSol", 1, my_pe)) MPI_Abort(comm, 1);
326
327 if (my_pe == 0) printf("\n\nComputed equilibrium species concentrations:\n");
328 if (my_pe == 0 || my_pe == npelast) PrintOutput(my_pe, comm, cc);
329
330 /* Print final statistics and free memory */
331 if (my_pe == 0) PrintFinalStats(kmem);
332
333 N_VDestroy(cc);
334 N_VDestroy(sc);
335 KINFree(&kmem);
336 SUNLinSolFree(LS);
337 FreeUserData(data);
338
339 MPI_Finalize();
340
341 return(0);
342 }
343
344 /* Readability definitions used in other routines below */
345
346 #define acoef (data->acoef)
347 #define bcoef (data->bcoef)
348 #define cox (data->cox)
349 #define coy (data->coy)
350
351 /*
352 *--------------------------------------------------------------------
353 * FUNCTIONS CALLED BY KINSOL
354 *--------------------------------------------------------------------
355 */
356
357 /*
358 * ccomm routine. This routine performs all communication
359 * between processors of data needed to calculate f.
360 */
361
ccomm(sunindextype Nlocal,N_Vector cc,void * userdata)362 static int ccomm(sunindextype Nlocal, N_Vector cc, void *userdata)
363 {
364
365 realtype *cdata, *cext, buffer[2*NUM_SPECIES*MYSUB];
366 UserData data;
367 MPI_Comm comm;
368 int my_pe, isubx, isuby, nsmxsub, nsmysub;
369 MPI_Request request[4];
370
371 /* Get comm, my_pe, subgrid indices, data sizes, extended array cext */
372 data = (UserData) userdata;
373 comm = data->comm; my_pe = data->my_pe;
374 isubx = data->isubx; isuby = data->isuby;
375 nsmxsub = data->nsmxsub;
376 nsmysub = NUM_SPECIES*MYSUB;
377 cext = data->cext;
378
379 cdata = N_VGetArrayPointer(cc);
380
381 /* Start receiving boundary data from neighboring PEs */
382 BRecvPost(comm, request, my_pe, isubx, isuby, nsmxsub, nsmysub, cext, buffer);
383
384 /* Send data from boundary of local grid to neighboring PEs */
385 BSend(comm, my_pe, isubx, isuby, nsmxsub, nsmysub, cdata);
386
387 /* Finish receiving boundary data from neighboring PEs */
388 BRecvWait(request, isubx, isuby, nsmxsub, cext, buffer);
389
390 return(0);
391 }
392
393 /*
394 * System function for predator-prey system - calculation part
395 */
396
func_local(sunindextype Nlocal,N_Vector cc,N_Vector fval,void * user_data)397 static int func_local(sunindextype Nlocal, N_Vector cc, N_Vector fval, void *user_data)
398 {
399 realtype xx, yy, *cxy, *rxy, *fxy, dcydi, dcyui, dcxli, dcxri;
400 realtype *cext, dely, delx, *cdata;
401 int i, jx, jy, is, ly;
402 int isubx, isuby, nsmxsub, nsmxsub2;
403 int shifty, offsetc, offsetce, offsetcl, offsetcr, offsetcd, offsetcu;
404 UserData data;
405
406 data = (UserData)user_data;
407 cdata = N_VGetArrayPointer(cc);
408
409 /* Get subgrid indices, data sizes, extended work array cext */
410 isubx = data->isubx; isuby = data->isuby;
411 nsmxsub = data->nsmxsub; nsmxsub2 = data->nsmxsub2;
412 cext = data->cext;
413
414 /* Copy local segment of cc vector into the working extended array cext */
415 offsetc = 0;
416 offsetce = nsmxsub2 + NUM_SPECIES;
417 for (ly = 0; ly < MYSUB; ly++) {
418 for (i = 0; i < nsmxsub; i++) cext[offsetce+i] = cdata[offsetc+i];
419 offsetc = offsetc + nsmxsub;
420 offsetce = offsetce + nsmxsub2;
421 }
422
423 /* To facilitate homogeneous Neumann boundary conditions, when this is a
424 boundary PE, copy data from the first interior mesh line of cc to cext */
425
426 /* If isuby = 0, copy x-line 2 of cc to cext */
427 if (isuby == 0) {
428 for (i = 0; i < nsmxsub; i++) cext[NUM_SPECIES+i] = cdata[nsmxsub+i];
429 }
430
431 /* If isuby = NPEY-1, copy x-line MYSUB-1 of cc to cext */
432 if (isuby == NPEY-1) {
433 offsetc = (MYSUB-2)*nsmxsub;
434 offsetce = (MYSUB+1)*nsmxsub2 + NUM_SPECIES;
435 for (i = 0; i < nsmxsub; i++) cext[offsetce+i] = cdata[offsetc+i];
436 }
437
438 /* If isubx = 0, copy y-line 2 of cc to cext */
439 if (isubx == 0) {
440 for (ly = 0; ly < MYSUB; ly++) {
441 offsetc = ly*nsmxsub + NUM_SPECIES;
442 offsetce = (ly+1)*nsmxsub2;
443 for (i = 0; i < NUM_SPECIES; i++) cext[offsetce+i] = cdata[offsetc+i];
444 }
445 }
446
447 /* If isubx = NPEX-1, copy y-line MXSUB-1 of cc to cext */
448 if (isubx == NPEX-1) {
449 for (ly = 0; ly < MYSUB; ly++) {
450 offsetc = (ly+1)*nsmxsub - 2*NUM_SPECIES;
451 offsetce = (ly+2)*nsmxsub2 - NUM_SPECIES;
452 for (i = 0; i < NUM_SPECIES; i++) cext[offsetce+i] = cdata[offsetc+i];
453 }
454 }
455
456 /* Loop over all mesh points, evaluating rate arra at each point */
457 delx = data->dx;
458 dely = data->dy;
459 shifty = (MXSUB+2)*NUM_SPECIES;
460
461 for (jy = 0; jy < MYSUB; jy++) {
462
463 yy = dely*(jy + isuby * MYSUB);
464
465 for (jx = 0; jx < MXSUB; jx++) {
466
467 xx = delx * (jx + isubx * MXSUB);
468 cxy = IJ_Vptr(cc,jx,jy);
469 rxy = IJ_Vptr(data->rates,jx,jy);
470 fxy = IJ_Vptr(fval,jx,jy);
471
472 WebRate(xx, yy, cxy, rxy, user_data);
473
474 offsetc = (jx+1)*NUM_SPECIES + (jy+1)*NSMXSUB2;
475 offsetcd = offsetc - shifty;
476 offsetcu = offsetc + shifty;
477 offsetcl = offsetc - NUM_SPECIES;
478 offsetcr = offsetc + NUM_SPECIES;
479
480 for (is = 0; is < NUM_SPECIES; is++) {
481
482 /* differencing in x */
483 dcydi = cext[offsetc+is] - cext[offsetcd+is];
484 dcyui = cext[offsetcu+is] - cext[offsetc+is];
485
486 /* differencing in y */
487 dcxli = cext[offsetc+is] - cext[offsetcl+is];
488 dcxri = cext[offsetcr+is] - cext[offsetc+is];
489
490 /* compute the value at xx , yy */
491 fxy[is] = (coy)[is] * (dcyui - dcydi) +
492 (cox)[is] * (dcxri - dcxli) + rxy[is];
493
494 } /* end of is loop */
495
496 } /* end of jx loop */
497
498 } /* end of jy loop */
499
500 return(0);
501 }
502
503 /*
504 * System function routine. Evaluate f(cc). First call ccomm to do
505 * communication of subgrid boundary data into cext. Then calculate f
506 * by a call to func_local.
507 */
508
func(N_Vector cc,N_Vector fval,void * user_data)509 static int func(N_Vector cc, N_Vector fval, void *user_data)
510 {
511 UserData data;
512
513 data = (UserData) user_data;
514
515 /* Call ccomm to do inter-processor communicaiton */
516 ccomm(data->Nlocal, cc, data);
517
518 /* Call func_local to calculate all right-hand sides */
519 func_local(data->Nlocal, cc, fval, data);
520
521 return(0);
522 }
523
524 /*
525 * Interaction rate function routine
526 */
527
WebRate(realtype xx,realtype yy,realtype * cxy,realtype * ratesxy,void * user_data)528 static void WebRate(realtype xx, realtype yy, realtype *cxy, realtype *ratesxy,
529 void *user_data)
530 {
531 int i;
532 realtype fac;
533 UserData data;
534
535 data = (UserData)user_data;
536
537 for (i = 0; i<NUM_SPECIES; i++)
538 ratesxy[i] = DotProd(NUM_SPECIES, cxy, acoef[i]);
539
540 fac = ONE + ALPHA * xx * yy;
541
542 for (i = 0; i < NUM_SPECIES; i++)
543 ratesxy[i] = cxy[i] * ( bcoef[i] * fac + ratesxy[i] );
544 }
545
546 /*
547 * Dot product routine for realtype arrays
548 */
549
DotProd(int size,realtype * x1,realtype * x2)550 static realtype DotProd(int size, realtype *x1, realtype *x2)
551 {
552 int i;
553 realtype *xx1, *xx2, temp = ZERO;
554
555 xx1 = x1; xx2 = x2;
556 for (i = 0; i < size; i++) temp += (*xx1++) * (*xx2++);
557
558 return(temp);
559 }
560
561 /*
562 *--------------------------------------------------------------------
563 * PRIVATE FUNCTIONS
564 *--------------------------------------------------------------------
565 */
566
567 /*
568 * Allocate memory for data structure of type UserData
569 */
570
AllocUserData(void)571 static UserData AllocUserData(void)
572 {
573 UserData data;
574
575 data = (UserData) malloc(sizeof *data);
576
577 acoef = newDenseMat(NUM_SPECIES, NUM_SPECIES);
578 bcoef = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
579 cox = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
580 coy = (realtype *)malloc(NUM_SPECIES * sizeof(realtype));
581
582 return(data);
583 }
584
585 /*
586 * Load problem constants in data
587 */
588
InitUserData(int my_pe,sunindextype Nlocal,MPI_Comm comm,UserData data)589 static void InitUserData(int my_pe, sunindextype Nlocal, MPI_Comm comm, UserData data)
590 {
591 int i, j, np;
592 realtype *a1,*a2, *a3, *a4, dx2, dy2;
593
594 data->mx = MX;
595 data->my = MY;
596 data->ns = NUM_SPECIES;
597 data->np = NUM_SPECIES/2;
598 data->ax = AX;
599 data->ay = AY;
600 data->dx = (data->ax)/(MX-1);
601 data->dy = (data->ay)/(MY-1);
602 data->my_pe = my_pe;
603 data->Nlocal = Nlocal;
604 data->comm = comm;
605 data->isuby = my_pe/NPEX;
606 data->isubx = my_pe - data->isuby*NPEX;
607 data->nsmxsub = NUM_SPECIES * MXSUB;
608 data->nsmxsub2 = NUM_SPECIES * (MXSUB+2);
609
610 /* Set up the coefficients a and b plus others found in the equations */
611 np = data->np;
612
613 dx2=(data->dx)*(data->dx); dy2=(data->dy)*(data->dy);
614
615 for (i = 0; i < np; i++) {
616 a1= &(acoef[i][np]);
617 a2= &(acoef[i+np][0]);
618 a3= &(acoef[i][0]);
619 a4= &(acoef[i+np][np]);
620
621 /* Fill in the portion of acoef in the four quadrants, row by row */
622 for (j = 0; j < np; j++) {
623 *a1++ = -GG;
624 *a2++ = EE;
625 *a3++ = ZERO;
626 *a4++ = ZERO;
627 }
628
629 /* and then change the diagonal elements of acoef to -AA */
630 acoef[i][i]=-AA;
631 acoef[i+np][i+np] = -AA;
632
633 bcoef[i] = BB;
634 bcoef[i+np] = -BB;
635
636 cox[i]=DPREY/dx2;
637 cox[i+np]=DPRED/dx2;
638
639 coy[i]=DPREY/dy2;
640 coy[i+np]=DPRED/dy2;
641 }
642 }
643
644 /*
645 * Free data memory
646 */
647
FreeUserData(UserData data)648 static void FreeUserData(UserData data)
649 {
650
651 destroyMat(acoef);
652 free(bcoef);
653 free(cox); free(coy);
654 N_VDestroy(data->rates);
655
656 free(data);
657
658 }
659
660 /*
661 * Set initial conditions in cc
662 */
663
SetInitialProfiles(N_Vector cc,N_Vector sc)664 static void SetInitialProfiles(N_Vector cc, N_Vector sc)
665 {
666 int i, jx, jy;
667 realtype *cloc, *sloc;
668 realtype ctemp[NUM_SPECIES], stemp[NUM_SPECIES];
669
670 /* Initialize arrays ctemp and stemp used in the loading process */
671 for (i = 0; i < NUM_SPECIES/2; i++) {
672 ctemp[i] = PREYIN;
673 stemp[i] = ONE;
674 }
675 for (i = NUM_SPECIES/2; i < NUM_SPECIES; i++) {
676 ctemp[i] = PREDIN;
677 stemp[i] = RCONST(0.00001);
678 }
679
680 /* Load initial profiles into cc and sc vector from ctemp and stemp. */
681 for (jy = 0; jy < MYSUB; jy++) {
682 for (jx=0; jx < MXSUB; jx++) {
683 cloc = IJ_Vptr(cc,jx,jy);
684 sloc = IJ_Vptr(sc,jx,jy);
685 for (i = 0; i < NUM_SPECIES; i++){
686 cloc[i] = ctemp[i];
687 sloc[i] = stemp[i];
688 }
689 }
690 }
691
692 }
693
694 /*
695 * Print first lines of output (problem description)
696 */
697
PrintHeader(int globalstrategy,int maxl,int maxlrst,sunindextype mudq,sunindextype mldq,sunindextype mukeep,sunindextype mlkeep,realtype fnormtol,realtype scsteptol)698 static void PrintHeader(int globalstrategy, int maxl, int maxlrst,
699 sunindextype mudq, sunindextype mldq,
700 sunindextype mukeep, sunindextype mlkeep,
701 realtype fnormtol, realtype scsteptol)
702 {
703 printf("\nPredator-prey test problem-- KINSol (parallel-BBD version)\n\n");
704 printf("Mesh dimensions = %d X %d\n", MX, MY);
705 printf("Number of species = %d\n", NUM_SPECIES);
706 printf("Total system size = %d\n\n", NEQ);
707 printf("Subgrid dimensions = %d X %d\n", MXSUB, MYSUB);
708 printf("Processor array is %d X %d\n\n", NPEX, NPEY);
709 printf("Flag globalstrategy = %d (0 = None, 1 = Linesearch)\n",
710 globalstrategy);
711 printf("Linear solver is SPGMR with maxl = %d, maxlrst = %d\n",
712 maxl, maxlrst);
713 printf("Preconditioning uses band-block-diagonal matrix from KINBBDPRE\n");
714 printf(" Difference quotient half-bandwidths: mudq = %ld, mldq = %ld\n",
715 (long int) mudq, (long int) mldq);
716 printf(" Retained band block half-bandwidths: mukeep = %ld, mlkeep = %ld\n",
717 (long int) mukeep, (long int) mlkeep);
718 #if defined(SUNDIALS_EXTENDED_PRECISION)
719 printf("Tolerance parameters: fnormtol = %Lg scsteptol = %Lg\n",
720 fnormtol, scsteptol);
721 #elif defined(SUNDIALS_DOUBLE_PRECISION)
722 printf("Tolerance parameters: fnormtol = %g scsteptol = %g\n",
723 fnormtol, scsteptol);
724 #else
725 printf("Tolerance parameters: fnormtol = %g scsteptol = %g\n",
726 fnormtol, scsteptol);
727 #endif
728
729 printf("\nInitial profile of concentration\n");
730 #if defined(SUNDIALS_EXTENDED_PRECISION)
731 printf("At all mesh points: %Lg %Lg %Lg %Lg %Lg %Lg\n",
732 PREYIN,PREYIN,PREYIN, PREDIN,PREDIN,PREDIN);
733 #elif defined(SUNDIALS_DOUBLE_PRECISION)
734 printf("At all mesh points: %g %g %g %g %g %g\n",
735 PREYIN,PREYIN,PREYIN, PREDIN,PREDIN,PREDIN);
736 #else
737 printf("At all mesh points: %g %g %g %g %g %g\n", PREYIN,PREYIN,PREYIN,
738 PREDIN,PREDIN,PREDIN);
739 #endif
740 }
741
742 /*
743 * Print sample of current cc values
744 */
745
PrintOutput(int my_pe,MPI_Comm comm,N_Vector cc)746 static void PrintOutput(int my_pe, MPI_Comm comm, N_Vector cc)
747 {
748 int is, i0, npelast;
749 realtype *ct, tempc[NUM_SPECIES];
750 MPI_Status status;
751
752 npelast = NPEX*NPEY - 1;
753
754 ct = N_VGetArrayPointer(cc);
755
756 /* Send the cc values (for all species) at the top right mesh point to PE 0 */
757 if (my_pe == npelast) {
758 i0 = NUM_SPECIES*(MXSUB*MYSUB-1);
759 if (npelast!=0)
760 MPI_Send(&ct[i0],NUM_SPECIES,MPI_SUNREALTYPE,0,0,comm);
761 else /* single processor case */
762 for (is = 0; is < NUM_SPECIES; is++) tempc[is]=ct[i0+is];
763 }
764
765 /* On PE 0, receive the cc values at top right, then print performance data
766 and sampled solution values */
767 if (my_pe == 0) {
768
769 if (npelast != 0)
770 MPI_Recv(&tempc[0],NUM_SPECIES,MPI_SUNREALTYPE,npelast,0,comm,&status);
771
772 printf("\nAt bottom left:");
773 for (is = 0; is < NUM_SPECIES; is++){
774 if ((is%6)*6== is) printf("\n");
775 #if defined(SUNDIALS_EXTENDED_PRECISION)
776 printf(" %Lg",ct[is]);
777 #elif defined(SUNDIALS_DOUBLE_PRECISION)
778 printf(" %g",ct[is]);
779 #else
780 printf(" %g",ct[is]);
781 #endif
782 }
783
784 printf("\n\nAt top right:");
785 for (is = 0; is < NUM_SPECIES; is++) {
786 if ((is%6)*6 == is) printf("\n");
787 #if defined(SUNDIALS_EXTENDED_PRECISION)
788 printf(" %Lg",tempc[is]);
789 #elif defined(SUNDIALS_DOUBLE_PRECISION)
790 printf(" %g",tempc[is]);
791 #else
792 printf(" %g",tempc[is]);
793 #endif
794 }
795 printf("\n\n");
796 }
797 }
798
799 /*
800 * Print final statistics contained in iopt
801 */
802
PrintFinalStats(void * kmem)803 static void PrintFinalStats(void *kmem)
804 {
805 long int nni, nfe, nli, npe, nps, ncfl, nfeSG;
806 int flag;
807
808 flag = KINGetNumNonlinSolvIters(kmem, &nni);
809 check_flag(&flag, "KINGetNumNonlinSolvIters", 1, 0);
810 flag = KINGetNumFuncEvals(kmem, &nfe);
811 check_flag(&flag, "KINGetNumFuncEvals", 1, 0);
812 flag = KINGetNumLinIters(kmem, &nli);
813 check_flag(&flag, "KINGetNumLinIters", 1, 0);
814 flag = KINGetNumPrecEvals(kmem, &npe);
815 check_flag(&flag, "KINGetNumPrecEvals", 1, 0);
816 flag = KINGetNumPrecSolves(kmem, &nps);
817 check_flag(&flag, "KINGetNumPrecSolves", 1, 0);
818 flag = KINGetNumLinConvFails(kmem, &ncfl);
819 check_flag(&flag, "KINGetNumLinConvFails", 1, 0);
820 flag = KINGetNumLinFuncEvals(kmem, &nfeSG);
821 check_flag(&flag, "KINGetNumLinFuncEvals", 1, 0);
822
823 printf("Final Statistics.. \n");
824 printf("nni = %5ld nli = %5ld\n", nni, nli);
825 printf("nfe = %5ld nfeSG = %5ld\n", nfe, nfeSG);
826 printf("nps = %5ld npe = %5ld ncfl = %5ld\n", nps, npe, ncfl);
827
828 }
829
830 /*
831 * Routine to send boundary data to neighboring PEs
832 */
833
BSend(MPI_Comm comm,int my_pe,int isubx,int isuby,int dsizex,int dsizey,realtype * cdata)834 static void BSend(MPI_Comm comm, int my_pe,
835 int isubx, int isuby,
836 int dsizex, int dsizey, realtype *cdata)
837 {
838 int i, ly;
839 int offsetc, offsetbuf;
840 realtype bufleft[NUM_SPECIES*MYSUB], bufright[NUM_SPECIES*MYSUB];
841
842 /* If isuby > 0, send data from bottom x-line of u */
843 if (isuby != 0)
844 MPI_Send(&cdata[0], dsizex, MPI_SUNREALTYPE, my_pe-NPEX, 0, comm);
845
846 /* If isuby < NPEY-1, send data from top x-line of u */
847 if (isuby != NPEY-1) {
848 offsetc = (MYSUB-1)*dsizex;
849 MPI_Send(&cdata[offsetc], dsizex, MPI_SUNREALTYPE, my_pe+NPEX, 0, comm);
850 }
851
852 /* If isubx > 0, send data from left y-line of u (via bufleft) */
853 if (isubx != 0) {
854 for (ly = 0; ly < MYSUB; ly++) {
855 offsetbuf = ly*NUM_SPECIES;
856 offsetc = ly*dsizex;
857 for (i = 0; i < NUM_SPECIES; i++)
858 bufleft[offsetbuf+i] = cdata[offsetc+i];
859 }
860 MPI_Send(&bufleft[0], dsizey, MPI_SUNREALTYPE, my_pe-1, 0, comm);
861 }
862
863 /* If isubx < NPEX-1, send data from right y-line of u (via bufright) */
864 if (isubx != NPEX-1) {
865 for (ly = 0; ly < MYSUB; ly++) {
866 offsetbuf = ly*NUM_SPECIES;
867 offsetc = offsetbuf*MXSUB + (MXSUB-1)*NUM_SPECIES;
868 for (i = 0; i < NUM_SPECIES; i++)
869 bufright[offsetbuf+i] = cdata[offsetc+i];
870 }
871 MPI_Send(&bufright[0], dsizey, MPI_SUNREALTYPE, my_pe+1, 0, comm);
872 }
873 }
874
875 /*
876 * Routine to start receiving boundary data from neighboring PEs.
877 * Notes:
878 * 1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
879 * should be passed to both the BRecvPost and BRecvWait functions, and
880 * should not be manipulated between the two calls.
881 * 2) request should have 4 entries, and should be passed in both calls also.
882 */
883
BRecvPost(MPI_Comm comm,MPI_Request request[],int my_pe,int isubx,int isuby,int dsizex,int dsizey,realtype * cext,realtype * buffer)884 static void BRecvPost(MPI_Comm comm, MPI_Request request[], int my_pe,
885 int isubx, int isuby,
886 int dsizex, int dsizey,
887 realtype *cext, realtype *buffer)
888 {
889 int offsetce;
890
891 /* Have bufleft and bufright use the same buffer */
892 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
893
894 /* If isuby > 0, receive data for bottom x-line of cext */
895 if (isuby != 0)
896 MPI_Irecv(&cext[NUM_SPECIES], dsizex, MPI_SUNREALTYPE,
897 my_pe-NPEX, 0, comm, &request[0]);
898
899 /* If isuby < NPEY-1, receive data for top x-line of cext */
900 if (isuby != NPEY-1) {
901 offsetce = NUM_SPECIES*(1 + (MYSUB+1)*(MXSUB+2));
902 MPI_Irecv(&cext[offsetce], dsizex, MPI_SUNREALTYPE,
903 my_pe+NPEX, 0, comm, &request[1]);
904 }
905
906 /* If isubx > 0, receive data for left y-line of cext (via bufleft) */
907 if (isubx != 0) {
908 MPI_Irecv(&bufleft[0], dsizey, MPI_SUNREALTYPE,
909 my_pe-1, 0, comm, &request[2]);
910 }
911
912 /* If isubx < NPEX-1, receive data for right y-line of cext (via bufright) */
913 if (isubx != NPEX-1) {
914 MPI_Irecv(&bufright[0], dsizey, MPI_SUNREALTYPE,
915 my_pe+1, 0, comm, &request[3]);
916 }
917 }
918
919 /*
920 * Routine to finish receiving boundary data from neighboring PEs.
921 * Notes:
922 * 1) buffer should be able to hold 2*NUM_SPECIES*MYSUB realtype entries,
923 * should be passed to both the BRecvPost and BRecvWait functions, and
924 * should not be manipulated between the two calls.
925 * 2) request should have 4 entries, and should be passed in both calls also.
926 */
927
BRecvWait(MPI_Request request[],int isubx,int isuby,int dsizex,realtype * cext,realtype * buffer)928 static void BRecvWait(MPI_Request request[], int isubx,
929 int isuby, int dsizex, realtype *cext,
930 realtype *buffer)
931 {
932 int i, ly;
933 int dsizex2, offsetce, offsetbuf;
934 realtype *bufleft = buffer, *bufright = buffer+NUM_SPECIES*MYSUB;
935 MPI_Status status;
936
937 dsizex2 = dsizex + 2*NUM_SPECIES;
938
939 /* If isuby > 0, receive data for bottom x-line of cext */
940 if (isuby != 0)
941 MPI_Wait(&request[0],&status);
942
943 /* If isuby < NPEY-1, receive data for top x-line of cext */
944 if (isuby != NPEY-1)
945 MPI_Wait(&request[1],&status);
946
947 /* If isubx > 0, receive data for left y-line of cext (via bufleft) */
948 if (isubx != 0) {
949 MPI_Wait(&request[2],&status);
950
951 /* Copy the buffer to cext */
952 for (ly = 0; ly < MYSUB; ly++) {
953 offsetbuf = ly*NUM_SPECIES;
954 offsetce = (ly+1)*dsizex2;
955 for (i = 0; i < NUM_SPECIES; i++)
956 cext[offsetce+i] = bufleft[offsetbuf+i];
957 }
958 }
959
960 /* If isubx < NPEX-1, receive data for right y-line of cext (via bufright) */
961 if (isubx != NPEX-1) {
962 MPI_Wait(&request[3],&status);
963
964 /* Copy the buffer to cext */
965 for (ly = 0; ly < MYSUB; ly++) {
966 offsetbuf = ly*NUM_SPECIES;
967 offsetce = (ly+2)*dsizex2 - NUM_SPECIES;
968 for (i = 0; i < NUM_SPECIES; i++)
969 cext[offsetce+i] = bufright[offsetbuf+i];
970 }
971 }
972 }
973 /*
974 * Check function return value...
975 * opt == 0 means SUNDIALS function allocates memory so check if
976 * returned NULL pointer
977 * opt == 1 means SUNDIALS function returns a flag so check if
978 * flag >= 0
979 * opt == 2 means function allocates memory so check if returned
980 * NULL pointer
981 */
982
check_flag(void * flagvalue,const char * funcname,int opt,int id)983 static int check_flag(void *flagvalue, const char *funcname, int opt, int id)
984 {
985 int *errflag;
986
987 /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
988 if (opt == 0 && flagvalue == NULL) {
989 fprintf(stderr,
990 "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
991 id, funcname);
992 return(1);
993 }
994
995 /* Check if flag < 0 */
996 else if (opt == 1) {
997 errflag = (int *) flagvalue;
998 if (*errflag < 0) {
999 fprintf(stderr,
1000 "\nSUNDIALS_ERROR(%d): %s() failed with flag = %d\n\n",
1001 id, funcname, *errflag);
1002 return(1);
1003 }
1004 }
1005
1006 /* Check if function returned NULL pointer - no memory allocated */
1007 else if (opt == 2 && flagvalue == NULL) {
1008 fprintf(stderr,
1009 "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
1010 id, funcname);
1011 return(1);
1012 }
1013
1014 return(0);
1015 }
1016