1 /* -----------------------------------------------------------------
2  * Programmer(s): Slaven Peles @ LLNL
3  * -----------------------------------------------------------------
4  * Acknowledgements: This example is based on idaHeat2D_kry_p
5  *                   example by Daniel R. Reynolds @ SMU and
6  *                   Allan Taylor, Alan Hindmarsh and
7  *                   Radu Serban @ LLNL
8  * -----------------------------------------------------------------
9  * Example problem for IDA: 2D heat equation, parallel, GMRES.
10  *
11  * This example solves a discretized 2D heat equation problem.
12  * This version uses the Krylov solver SUNSPGMR.
13  *
14  * The DAE system solved is a spatial discretization of the PDE
15  *          du/dt = d^2u/dx^2 + d^2u/dy^2
16  * on the unit square. The boundary condition is u = 0 on all edges.
17  * Initial conditions are given by u = 16 x (1 - x) y (1 - y).
18  * The PDE is treated with central differences on a uniform MX x MY
19  * grid. The values of u at the interior points satisfy ODEs, and
20  * equations u = 0 at the boundaries are appended, to form a DAE
21  * system of size N = MX * MY. Here MX = MY = 10.
22  *
23  * The system is actually implemented on submeshes, processor by
24  * processor, with an MXSUB by MYSUB mesh on each of NPEX * NPEY
25  * processors.
26  *
27  * The system is solved with IDA using the Krylov linear solver
28  * SUNSPGMR. The preconditioner uses the diagonal elements of the
29  * Jacobian only. Routines for preconditioning, required by
30  * SUNSPGMR, are supplied here. The constraints u >= 0 are posed
31  * for all components. Local error testing on the boundary values
32  * is suppressed. Output is taken at t = 0, .01, .02, .04,
33  * ..., 10.24.
34  * -----------------------------------------------------------------
35  * When running this example for unit testing, set the environemnt
36  * variables OMP_PROC_BIND=false and OMP_NUM_THREADS=2 to run
37  * without thread binding and with two OpenMP threads.
38  * -----------------------------------------------------------------
39  */
40 
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <cmath>
44 #include <string.h>
45 #include <vector>
46 
47 #include <Tpetra_Core.hpp>
48 #include <Tpetra_Vector.hpp>
49 #include <Tpetra_Version.hpp>
50 
51 #include <ida/ida.h>
52 #include <nvector/trilinos/SundialsTpetraVectorInterface.hpp>
53 #include <nvector/nvector_trilinos.h>
54 #include <sunlinsol/sunlinsol_spgmr.h>
55 #include <sundials/sundials_types.h>
56 #include <sundials/sundials_mpi_types.h>
57 
58 #define ZERO  RCONST(0.0)
59 #define ONE   RCONST(1.0)
60 #define TWO   RCONST(2.0)
61 
62 #define NOUT         11    /* Number of output times */
63 
64 #define NPEX         2     /* No. PEs in x direction of PE array */
65 #define NPEY         2     /* No. PEs in y direction of PE array */
66                            /* Total no. PEs = NPEX*NPEY */
67 #define MXSUB        5     /* No. x points per subgrid */
68 #define MYSUB        5     /* No. y points per subgrid */
69 
70 /* Global spatial mesh is MX x MY = (NPEX x MXSUB) x (NPEY x MYSUB) */
71 
72 /* type definitions */
73 typedef Sundials::TpetraVectorInterface::vector_type vector_type;
74 typedef vector_type::scalar_type scalar_type;
75 typedef vector_type::mag_type mag_type;
76 typedef vector_type::global_ordinal_type global_ordinal_type;
77 typedef vector_type::local_ordinal_type local_ordinal_type;
78 typedef vector_type::node_type::memory_space memory_space;
79 typedef vector_type::node_type::execution_space execution_space;
80 typedef vector_type::map_type map_type;
81 
82 /* User data structure */
83 struct UserData {
84   /* user data destructor */
~UserDataUserData85   ~UserData()
86   {
87     if (pp != NULL)
88       N_VDestroy(pp);
89   }
90 
91   Teuchos::RCP<const Teuchos::Comm<int> > comm;
92   int thispe, npex, npey, ixsub, jysub;
93   sunindextype mx, my, mxsub, mysub;
94   realtype dx, dy, coeffx, coeffy, coeffxy;
95   Kokkos::View<scalar_type*, memory_space> uext; /* device array */
96 
97   Kokkos::View<scalar_type*, memory_space> send_buff_top;
98   Kokkos::View<scalar_type*, memory_space> send_buff_bottom;
99   Kokkos::View<scalar_type*, memory_space> send_buff_left;
100   Kokkos::View<scalar_type*, memory_space> send_buff_right;
101   Kokkos::View<scalar_type*, memory_space> recv_buff_top;
102   Kokkos::View<scalar_type*, memory_space> recv_buff_bottom;
103   Kokkos::View<scalar_type*, memory_space> recv_buff_left;
104   Kokkos::View<scalar_type*, memory_space> recv_buff_right;
105 
106   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_send_buff_top;
107   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_send_buff_bottom;
108   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_send_buff_left;
109   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_send_buff_right;
110   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_recv_buff_top;
111   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_recv_buff_bottom;
112   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_recv_buff_left;
113   typename Kokkos::View<scalar_type*, memory_space>::HostMirror h_recv_buff_right;
114 
115   N_Vector pp;    /* vector of diagonal preconditioner elements */
116 };
117 
118 
119 /* User-supplied residual and supporting functions */
120 
121 int resHeat(realtype tt, N_Vector uu, N_Vector up,
122             N_Vector rr, void *user_data);
123 
124 static int rescomm(N_Vector uu, N_Vector up, void *user_data);
125 
126 static int reslocal(realtype tt, N_Vector uu, N_Vector up,
127                     N_Vector res,  void *user_data);
128 
129 static int BSend(N_Vector uu, void *user_data);
130 
131 static int BRecvPost(MPI_Request request[], void *user_data);
132 
133 static int BRecvWait(MPI_Request request[], void *user_data);
134 
135 
136 /* User-supplied preconditioner functions */
137 
138 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
139                N_Vector rvec, N_Vector zvec, realtype c_j,
140                realtype delta, void *user_data);
141 
142 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
143                realtype c_j, void *user_data);
144 
145 /* Private function to allocate memory, initialize problem and print output */
146 
147 static int InitUserData(int thispe, const Teuchos::RCP<const Teuchos::Comm<int> > comm, UserData *data);
148 
149 static int AllocUserData(int thispe, N_Vector uu, UserData *data);
150 
151 static int SetInitialProfile(N_Vector uu, N_Vector up, N_Vector id,
152                              N_Vector res, UserData *data);
153 
154 static void PrintHeader(realtype rtol, realtype atol, UserData *data);
155 
156 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu);
157 
158 static void PrintFinalStats(void *ida_mem);
159 
160 static int check_retval(void *flagvalue, const char *funcname, int opt, int id);
161 
162 /*
163  *--------------------------------------------------------------------
164  * MAIN PROGRAM
165  *--------------------------------------------------------------------
166  */
167 
main(int argc,char * argv[])168 int main(int argc, char *argv[])
169 {
170   void *ida_mem = NULL;
171   SUNLinearSolver LS = NULL;
172   int iout, retval;
173   realtype rtol, atol, t0, t1, tout, tret;
174   N_Vector uu = NULL;
175   N_Vector up = NULL;
176   N_Vector constraints = NULL;
177   N_Vector id = NULL;
178   N_Vector res = NULL;
179 
180   /* Start an MPI session */
181   Tpetra::ScopeGuard tpetraScope(&argc, &argv);
182 
183   /* Create Tpetra communicator */
184   auto comm = Tpetra::getDefaultComm();
185   const int thispe = comm->getRank();
186   const int npes   = comm->getSize();
187 
188   /* Allocate and initialize the data structure */
189   UserData *data = new UserData();
190   if(check_retval((void *)data, "malloc", 2, thispe))
191     return -1;
192 
193   /* Initialize user data */
194   InitUserData(thispe, comm, data);
195 
196   /* Set global solution vector lengths. */
197   const sunindextype global_length = data->mx * data->my;
198 
199   /* Choose zero-based (C-style) indexing. */
200   const sunindextype index_base = 0;
201 
202   /* Construct am MPI Map */
203   Teuchos::RCP<const map_type> testMap =
204     Teuchos::rcp(new map_type (global_length, index_base, comm,
205                                Tpetra::GloballyDistributed));
206 
207   /* Check if the number of MPI processes matches the number of subgrids */
208   if (npes != (data->npex * data->npey)) {
209     if (thispe == 0)
210       fprintf(stderr,
211               "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n",
212               npes, data->npex * data->npey);
213     delete data;
214     return 1;
215   }
216 
217   /* Construct a Tpetra vector and return refernce counting pointer to it. */
218   Teuchos::RCP<vector_type> rcpuu =
219     Teuchos::rcp(new vector_type(testMap));
220 
221 
222 
223   /* Allocate and initialize N-vectors. */
224 
225   uu = N_VMake_Trilinos(rcpuu);
226   if(check_retval((void *)uu, "N_VMake_Trilinos", 0, thispe))
227     return -1;
228 
229   up = N_VClone(uu);
230   if(check_retval((void *)up, "N_VClone", 0, thispe))
231     return -1;
232 
233   res = N_VClone(uu);
234   if(check_retval((void *)res, "N_VClone", 0, thispe))
235     return -1;
236 
237   constraints = N_VClone(uu);
238   if(check_retval((void *)constraints, "N_VClone", 0, thispe))
239     return -1;
240 
241   id = N_VClone(uu);
242   if(check_retval((void *)id, "N_VClone", 0, thispe))
243     return -1;
244 
245   /* Allocate user data extended vector and MPI buffers */
246   retval = AllocUserData(thispe, uu, data);
247   if(check_retval(&retval, "AllocUserData", 1, thispe)) return -1;
248 
249 
250   /* Initialize the uu, up, id, and res profiles. */
251   SetInitialProfile(uu, up, id, res, data);
252 
253   /* Set constraints to all 1's for nonnegative solution values. */
254   N_VConst(ONE, constraints);
255 
256   t0 = ZERO; t1 = RCONST(0.01);
257 
258   /* Scalar relative and absolute tolerance. */
259   rtol = ZERO;
260   atol = RCONST(1.0e-3);
261 
262   /* Call IDACreate and IDAMalloc to initialize solution. */
263 
264   ida_mem = IDACreate();
265   if(check_retval((void *)ida_mem, "IDACreate", 0, thispe))
266     return -1;
267 
268   retval = IDASetUserData(ida_mem, data);
269   if(check_retval(&retval, "IDASetUserData", 1, thispe))
270     return -1;
271 
272   retval = IDASetSuppressAlg(ida_mem, SUNTRUE);
273   if(check_retval(&retval, "IDASetSuppressAlg", 1, thispe))
274     return -1;
275 
276   retval = IDASetId(ida_mem, id);
277   if(check_retval(&retval, "IDASetId", 1, thispe))
278     return -1;
279 
280   retval = IDASetConstraints(ida_mem, constraints);
281   if(check_retval(&retval, "IDASetConstraints", 1, thispe))
282     return -1;
283   N_VDestroy(constraints);
284 
285   retval = IDAInit(ida_mem, resHeat, t0, uu, up);
286   if(check_retval(&retval, "IDAInit", 1, thispe))
287     return -1;
288 
289   retval = IDASStolerances(ida_mem, rtol, atol);
290   if(check_retval(&retval, "IDASStolerances", 1, thispe))
291     return -1;
292 
293   /* Call SUNLinSol_SPGMR and IDASetLinearSolver to specify the linear solver. */
294 
295   LS = SUNLinSol_SPGMR(uu, PREC_LEFT, 0);  /* use default maxl */
296   if(check_retval((void *)LS, "SUNLinSol_SPGMR", 0, thispe))
297     return -1;
298 
299   retval = IDASetLinearSolver(ida_mem, LS, NULL);
300   if(check_retval(&retval, "IDASetLinearSolver", 1, thispe))
301     return -1;
302 
303   retval = IDASetPreconditioner(ida_mem, PsetupHeat, PsolveHeat);
304   if(check_retval(&retval, "IDASetPreconditioner", 1, thispe))
305     return -1;
306 
307   /* Print output heading (on processor 0 only) and intial solution  */
308 
309   if (thispe == 0) PrintHeader(rtol, atol, data);
310   PrintOutput(thispe, ida_mem, t0, uu);
311 
312   /* Loop over tout, call IDASolve, print output. */
313 
314   for (tout = t1, iout = 1; iout <= NOUT; iout++, tout *= TWO) {
315 
316     retval = IDASolve(ida_mem, tout, &tret, uu, up, IDA_NORMAL);
317     if(check_retval(&retval, "IDASolve", 1, thispe))
318       return -1;
319 
320     PrintOutput(thispe, ida_mem, tret, uu);
321 
322   }
323 
324   /* Print remaining counters. */
325 
326   if (thispe == 0) PrintFinalStats(ida_mem);
327 
328   /* Free memory */
329 
330   IDAFree(&ida_mem);
331   SUNLinSolFree(LS);
332 
333   N_VDestroy(id);
334   N_VDestroy(res);
335   N_VDestroy(up);
336   N_VDestroy(uu);
337 
338   /* Delete template Tpetra vector */
339   rcpuu = Teuchos::null;
340 
341   delete data;
342 
343   return(0);
344 }
345 
346 /*
347  *--------------------------------------------------------------------
348  * FUNCTIONS CALLED BY IDA
349  *--------------------------------------------------------------------
350  */
351 
352 /*
353  * resHeat: heat equation system residual function
354  * This uses 5-point central differencing on the interior points, and
355  * includes algebraic equations for the boundary values.
356  * So for each interior point, the residual component has the form
357  *    res_i = u'_i - (central difference)_i
358  * while for each boundary point, it is res_i = u_i.
359  *
360  * This parallel implementation uses several supporting routines.
361  * First a call is made to rescomm to do communication of subgrid boundary
362  * data into array uext.  Then reslocal is called to compute the residual
363  * on individual processors and their corresponding domains.  The routines
364  * BSend, BRecvPost, and BREcvWait handle interprocessor communication
365  * of uu required to calculate the residual.
366  */
resHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,void * user_data)367 int resHeat(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
368             void *user_data)
369 {
370   int retval = 0;
371 
372   /* Call rescomm to do inter-processor communication. */
373   retval = rescomm(uu, up, user_data);
374 
375   /* Call reslocal to calculate res. */
376   retval = reslocal(tt, uu, up, rr, user_data);
377 
378   return(retval);
379 }
380 
381 
382 /*
383  * PsetupHeat: setup for diagonal preconditioner for heatsk.
384  *
385  * The optional user-supplied functions PsetupHeat and
386  * PsolveHeat together must define the left preconditoner
387  * matrix P approximating the system Jacobian matrix
388  *                   J = dF/du + cj*dF/du'
389  * (where the DAE system is F(t,u,u') = 0), and solve the linear
390  * systems P z = r.   This is done in this case by keeping only
391  * the diagonal elements of the J matrix above, storing them as
392  * inverses in a vector pp, when computed in PsetupHeat, for
393  * subsequent use in PsolveHeat.
394  *
395  * In this instance, only cj and data (user data structure, with
396  * pp etc.) are used from the PsetupHeat argument list.
397  *
398  */
PsetupHeat(realtype tt,N_Vector yy,N_Vector yp,N_Vector rr,realtype c_j,void * user_data)399 int PsetupHeat(realtype tt, N_Vector yy, N_Vector yp, N_Vector rr,
400                realtype c_j, void *user_data)
401 {
402   /* Unwrap the user data */
403   UserData* data = reinterpret_cast<UserData*>(user_data);
404   const int ixsub = data->ixsub;
405   const int jysub = data->jysub;
406   const int npex  = data->npex;
407   const int npey  = data->npey;
408   const sunindextype mxsub = data->mxsub;
409   const sunindextype mysub = data->mysub;
410 
411   sunindextype ibc, i0, jbc, j0;
412 
413   /* Initially set all pp elements on the device to one. */
414   N_VConst(ONE, data->pp);
415 
416   /* Get access to local data */
417   Teuchos::RCP<vector_type> pptp = N_VGetVector_Trilinos(data->pp);
418   auto pp_2d = pptp->getLocalView<memory_space>();
419   auto pp_1d = Kokkos::subview (pp_2d, Kokkos::ALL(), 0);
420 
421   /* Calculate the value for the inverse element of the diagonal preconditioner */
422   const realtype pelinv = ONE/(c_j + data->coeffxy);
423 
424   ibc = (ixsub == 0) || (ixsub == npex-1);
425   i0  = (ixsub == 0);
426   jbc = (jysub == 0) || (jysub == npey-1);
427   j0  = (jysub == 0);
428 
429   /* Set inverse of the preconditioner; ppv must be on the device */
430   Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, (mxsub - ibc)*(mysub - jbc)),
431     KOKKOS_LAMBDA (const local_ordinal_type &tid)
432     {
433       sunindextype j = tid / (mxsub - ibc) + j0;
434       sunindextype i = tid % (mxsub - ibc) + i0;
435 
436       pp_1d(i + j*mxsub) = pelinv;
437     }
438   );
439 
440   return(0);
441 }
442 
443 
444 /*
445  * PsolveHeat: solve preconditioner linear system.
446  * This routine multiplies the input vector rvec by the vector pp
447  * containing the inverse diagonal Jacobian elements (previously
448  * computed in PsetupHeat), returning the result in zvec.
449  */
PsolveHeat(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,N_Vector rvec,N_Vector zvec,realtype c_j,realtype delta,void * user_data)450 int PsolveHeat(realtype tt, N_Vector uu, N_Vector up,
451                N_Vector rr, N_Vector rvec, N_Vector zvec,
452                realtype c_j, realtype delta, void *user_data)
453 {
454   UserData* data = reinterpret_cast<UserData*>(user_data);
455 
456   N_VProd(data->pp, rvec, zvec);
457 
458   return(0);
459 }
460 
461 /*
462  *--------------------------------------------------------------------
463  * SUPPORTING FUNCTIONS
464  *--------------------------------------------------------------------
465  */
466 
467 
468 /*
469  * rescomm routine.  This routine performs all inter-processor
470  * communication of data in u needed to calculate G.
471  */
rescomm(N_Vector uu,N_Vector up,void * user_data)472 static int rescomm(N_Vector uu, N_Vector up, void* user_data)
473 {
474   /* Set array of raw MPI requests */
475   MPI_Request request[4];
476 
477   /* Start receiving boundary data from neighboring PEs. */
478   BRecvPost(request, user_data);
479 
480   /* Send data from boundary of local grid to neighboring PEs. */
481   BSend(uu, user_data);
482 
483   /* Finish receiving boundary data from neighboring PEs. */
484   BRecvWait(request, user_data);
485 
486   return(0);
487 }
488 
489 /*
490  * reslocal routine.  Compute res = F(t, uu, up).  This routine assumes
491  * that all inter-processor communication of data needed to calculate F
492  * has already been done, and that this data is in the work array uext.
493  */
reslocal(realtype tt,N_Vector uu,N_Vector up,N_Vector rr,void * user_data)494 static int reslocal(realtype tt, N_Vector uu, N_Vector up, N_Vector rr,
495                     void *user_data)
496 {
497   UserData* data = reinterpret_cast<UserData*>(user_data);
498 
499   /* Get subgrid indices, array sizes, and grid coefficients. */
500   const int ixsub = data->ixsub;
501   const int jysub = data->jysub;
502   const int npex  = data->npex;
503   const int npey  = data->npey;
504   const sunindextype mxsub  = data->mxsub;
505   const sunindextype mxsub2 = data->mxsub + 2;
506   const sunindextype mysub  = data->mysub;
507   const realtype coeffx  = data->coeffx;
508   const realtype coeffy  = data->coeffy;
509   const realtype coeffxy = data->coeffxy;
510 
511   sunindextype ibc, i0, jbc, j0;
512 
513   /* Initialize all elements of rr to uu. This sets the boundary
514      elements simply without indexing hassles. */
515   N_VScale(ONE, uu, rr);
516 
517   /* Get vector data arrays, extended work array uext. */
518   Teuchos::RCP<vector_type> uuv = N_VGetVector_Trilinos(uu);
519   Teuchos::RCP<vector_type> upv = N_VGetVector_Trilinos(up);
520   Teuchos::RCP<vector_type> rrv = N_VGetVector_Trilinos(rr);
521 
522   const auto uu_2d = uuv->getLocalView<memory_space>();
523   const auto uu_1d = Kokkos::subview (uu_2d, Kokkos::ALL(), 0);
524   const auto up_2d = upv->getLocalView<memory_space>();
525   const auto up_1d = Kokkos::subview (up_2d, Kokkos::ALL(), 0);
526   auto rr_2d = rrv->getLocalView<memory_space>();
527   auto rr_1d = Kokkos::subview (rr_2d, Kokkos::ALL(), 0);
528   Kokkos::View<realtype*, memory_space> uext_1d = data->uext;
529 
530   /* Copy local segment of u vector into the working extended array uext.
531      This completes uext prior to the computation of the rr vector.
532      uext and uuv must be on the device.     */
533   Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mxsub*mysub),
534     KOKKOS_LAMBDA (const local_ordinal_type &tid)
535     {
536       sunindextype j = tid/mxsub;
537       sunindextype i = tid%mxsub;
538 
539       uext_1d((i+1) + (j+1)*mxsub2) = uu_1d(i + j*mxsub);
540     }
541   );
542 
543   /* Set loop limits for the interior of the local subgrid. */
544 
545   /* Prepare to loop over subgrid. */
546   ibc = (ixsub == 0) || (ixsub == npex-1);
547   i0  = (ixsub == 0);
548   jbc = (jysub == 0) || (jysub == npey-1);
549   j0  = (jysub == 0);
550 
551   /* Compute local residual; uext, upv, and resv must be on the device */
552   Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, (mxsub - ibc)*(mysub - jbc)),
553     KOKKOS_LAMBDA (const local_ordinal_type &tid)
554     {
555       sunindextype j = tid/(mxsub - ibc) + j0;
556       sunindextype i = tid%(mxsub - ibc) + i0;
557       sunindextype locu  = i + j*mxsub;
558       sunindextype locue = (i+1) + (j+1)*mxsub2;
559 
560       realtype termx   = coeffx * (uext_1d(locue-1)      + uext_1d(locue+1));
561       realtype termy   = coeffy * (uext_1d(locue-mxsub2) + uext_1d(locue+mxsub2));
562       realtype termctr = coeffxy * uext_1d(locue);
563       rr_1d(locu) = up_1d(locu) - (termx + termy - termctr);
564     }
565   );
566 
567   return(0);
568 }
569 
570 
571 /*
572  * Routine to send boundary data to neighboring PEs.
573  */
BSend(N_Vector uu,void * user_data)574 static int BSend(N_Vector uu, void *user_data)
575 {
576   UserData* data = reinterpret_cast<UserData*>(user_data);
577 
578   /* Get comm, thispe, subgrid indices, data sizes */
579   auto comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(data->comm);
580   auto rawComm = comm->getRawMpiComm();
581 
582   const int thispe = data->thispe;
583   const int ixsub  = data->ixsub;
584   const int jysub  = data->jysub;
585   const int npex   = data->npex;
586   const int npey   = data->npey;
587 
588   /* Type conversion from sunindextype to int (required by Teuchos::send) */
589   const int mxsub = data->mxsub;
590   const int mysub = data->mysub;
591 
592   /* Get pointers to buffers and extended solution vector data array uext. */
593   Kokkos::View<realtype*, memory_space> bufleft   = data->send_buff_left;
594   Kokkos::View<realtype*, memory_space> bufright  = data->send_buff_right;
595   Kokkos::View<realtype*, memory_space> buftop    = data->send_buff_top;
596   Kokkos::View<realtype*, memory_space> bufbottom = data->send_buff_bottom;
597   typename Kokkos::View<realtype*>::HostMirror h_bufleft   =  data->h_send_buff_left;
598   typename Kokkos::View<realtype*>::HostMirror h_bufright  =  data->h_send_buff_right;
599   typename Kokkos::View<realtype*>::HostMirror h_buftop    =  data->h_send_buff_top;
600   typename Kokkos::View<realtype*>::HostMirror h_bufbottom =  data->h_send_buff_bottom;
601 
602   /* Get solution vector data. */
603   Teuchos::RCP<vector_type> uuv = N_VGetVector_Trilinos(uu);
604   const auto uu_2d = uuv->getLocalView<memory_space>();
605   const auto uu_1d = Kokkos::subview (uu_2d, Kokkos::ALL(), 0);
606 
607   /* If jysub > 0, send data from bottom x-line of u.  (via bufbottom) */
608 
609   if (jysub != 0) {
610     /* Device kernel here to copy from uarray to the buffer on the device */
611     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mxsub),
612       KOKKOS_LAMBDA (const local_ordinal_type &lx)
613       {
614         bufbottom(lx) = uu_1d(lx);
615       }
616     );
617     /* Copy device buffer to the corresponding host buffer */
618     Kokkos::deep_copy(h_bufbottom, bufbottom);
619     /* MPI send buffer */
620     MPI_Send(h_bufbottom.data(), mxsub, MPI_SUNREALTYPE, thispe-npex, 0, *rawComm);
621   }
622 
623   /* If jysub < NPEY-1, send data from top x-line of u. (via buftop) */
624 
625   if (jysub != npey-1) {
626     /* Device kernel here to copy from uarray to the buffer on the device */
627     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mxsub),
628       KOKKOS_LAMBDA (const local_ordinal_type &lx)
629       {
630         buftop(lx) = uu_1d((mysub-1)*mxsub + lx);
631       }
632     );
633     /* Copy buffer to the host */
634     Kokkos::deep_copy(h_buftop, buftop);
635     /* MPI send buffer */
636     MPI_Send(h_buftop.data(), mxsub, MPI_SUNREALTYPE, thispe+npex, 0, *rawComm);
637   }
638 
639   /* If ixsub > 0, send data from left y-line of u (via bufleft). */
640 
641   if (ixsub != 0) {
642     /* Device kernel here to copy from uarray to the buffer on the device */
643     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mysub),
644       KOKKOS_LAMBDA (const local_ordinal_type &ly)
645       {
646         bufleft(ly) = uu_1d(ly*mxsub);
647       }
648     );
649     /* Copy buffer to the host */
650     Kokkos::deep_copy(h_bufleft, bufleft);
651     /* MPI send buffer */
652     MPI_Send(h_bufleft.data(), mysub, MPI_SUNREALTYPE, thispe-1, 0, *rawComm);
653   }
654 
655   /* If ixsub < NPEX-1, send data from right y-line of u (via bufright). */
656 
657   if (ixsub != npex-1) {
658     /* Device kernel here to copy from uarray to the buffer on the device */
659     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mysub),
660       KOKKOS_LAMBDA (const local_ordinal_type &ly)
661       {
662         bufright(ly) = uu_1d(ly*mxsub + (mxsub-1));
663       }
664     );
665     /* Copy buffer to the host */
666     Kokkos::deep_copy(h_bufright, bufright);
667     /* MPI send buffer */
668     MPI_Send(h_bufright.data(), mysub, MPI_SUNREALTYPE, thispe+1, 0, *rawComm);
669   }
670 
671   return(0);
672 
673 }
674 
675 
676 /*
677  * Routine to start receiving boundary data from neighboring PEs.
678  * Notes:
679  *   1) buffer should be able to hold 2*(MYSUB+MYSUB) realtype entries, should
680  *      be passed to both the BRecvPost and BRecvWait functions, and should not
681  *      be manipulated between the two calls.
682  *   2) request should have 4 entries, and should be passed in
683  *      both calls also.
684  */
BRecvPost(MPI_Request request[],void * user_data)685 static int BRecvPost(MPI_Request request[], void *user_data)
686 {
687   UserData* data = reinterpret_cast<UserData*>(user_data);
688 
689   /* Get comm, thispe, subgrid indices, data sizes */
690   auto comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(data->comm);
691   auto rawComm = comm->getRawMpiComm();
692 
693   const int thispe = data->thispe;
694   const int ixsub  = data->ixsub;
695   const int jysub  = data->jysub;
696   const int npex   = data->npex;
697   const int npey   = data->npey;
698   const sunindextype mxsub = data->mxsub;
699   const sunindextype mysub = data->mysub;
700 
701   /* Get left, right, top and bottom host buffers. */
702   typename Kokkos::View<realtype*>::HostMirror h_bufleft   = data->h_recv_buff_left;
703   typename Kokkos::View<realtype*>::HostMirror h_bufright  = data->h_recv_buff_right;
704   typename Kokkos::View<realtype*>::HostMirror h_buftop    = data->h_recv_buff_top;
705   typename Kokkos::View<realtype*>::HostMirror h_bufbottom = data->h_recv_buff_bottom;
706 
707   /* If jysub > 0, receive data for bottom x-line of uext. */
708   if (jysub != 0) {
709     MPI_Irecv(h_bufbottom.data(), mxsub, MPI_SUNREALTYPE,
710               thispe-npex, 0, *rawComm, &request[0]);
711   }
712 
713   /* If jysub < NPEY-1, receive data for top x-line of uext. */
714   if (jysub != npey-1) {
715     MPI_Irecv(h_buftop.data(), mxsub, MPI_SUNREALTYPE,
716               thispe+npex, 0, *rawComm, &request[1]);
717   }
718 
719   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
720   if (ixsub != 0) {
721     MPI_Irecv(h_bufleft.data(), mysub, MPI_SUNREALTYPE,
722               thispe-1, 0, *rawComm, &request[2]);
723   }
724 
725   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
726   if (ixsub != npex-1) {
727     MPI_Irecv(h_bufright.data(), mysub, MPI_SUNREALTYPE,
728               thispe+1, 0, *rawComm, &request[3]);
729   }
730 
731   return(0);
732 
733 }
734 
735 /*
736  * Routine to finish receiving boundary data from neighboring PEs.
737  * Notes:
738  *   1) buffer should be able to hold 2*MYSUB realtype entries, should be
739  *      passed to both the BRecvPost and BRecvWait functions, and should not
740  *      be manipulated between the two calls.
741  *   2) request should have four entries, and should be passed in both
742  *      calls also.
743  */
BRecvWait(MPI_Request request[],void * user_data)744 static int BRecvWait(MPI_Request request[], void *user_data)
745 {
746   MPI_Status status;
747   UserData* data = reinterpret_cast<UserData*>(user_data);
748 
749   /* Get thispe, subgrid indices, data sizes */
750   const int ixsub  = data->ixsub;
751   const int jysub  = data->jysub;
752   const int npex   = data->npex;
753   const int npey   = data->npey;
754   const sunindextype mxsub = data->mxsub;
755   const sunindextype mysub = data->mysub;
756 
757   /* Get pointers to buffers and extended solution vector data array uext. */
758   typename Kokkos::View<realtype*>::HostMirror h_bufleft   = data->h_recv_buff_left;
759   typename Kokkos::View<realtype*>::HostMirror h_bufright  = data->h_recv_buff_right;
760   typename Kokkos::View<realtype*>::HostMirror h_buftop    = data->h_recv_buff_top;
761   typename Kokkos::View<realtype*>::HostMirror h_bufbottom = data->h_recv_buff_bottom;
762 
763   Kokkos::View<realtype*, memory_space> bufleft   = data->recv_buff_left;
764   Kokkos::View<realtype*, memory_space> bufright  = data->recv_buff_right;
765   Kokkos::View<realtype*, memory_space> buftop    = data->recv_buff_top;
766   Kokkos::View<realtype*, memory_space> bufbottom = data->recv_buff_bottom;
767 
768   Kokkos::View<realtype*, memory_space> uext_1d = data->uext;
769 
770   const sunindextype mxsub2 = mxsub + 2;
771   const sunindextype mysub1 = mysub + 1;
772 
773   /* If jysub > 0, receive data for bottom x-line of uext. */
774   if (jysub != 0) {
775     MPI_Wait(&request[0], &status);
776     /* Copy the buffer from the host to the device */
777     Kokkos::deep_copy(bufbottom, h_bufbottom);
778     /* Copy the bottom dev_recv_buff to uext. */
779     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mxsub),
780       KOKKOS_LAMBDA (const local_ordinal_type &lx)
781       {
782         uext_1d(1 + lx) = bufbottom(lx);
783       }
784     );
785   }
786 
787   /* If jysub < NPEY-1, receive data for top x-line of uext. */
788   if (jysub != npey-1) {
789     MPI_Wait(&request[1], &status);
790     /* Copy the buffer from the host to the device */
791     Kokkos::deep_copy(buftop, h_buftop);
792     /* Copy the top dev_recv_buff to uext. */
793     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mxsub),
794       KOKKOS_LAMBDA (const local_ordinal_type &lx)
795       {
796         uext_1d((1 + mysub1*mxsub2) + lx) = buftop(lx);
797       }
798     );
799   }
800 
801   /* If ixsub > 0, receive data for left y-line of uext (via bufleft). */
802   if (ixsub != 0) {
803     MPI_Wait(&request[2], &status);
804     /* Copy the buffer from the host to the device */
805     Kokkos::deep_copy(bufleft, h_bufleft);
806     /* Copy the left dev_recv_buff to uext. */
807     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mysub),
808       KOKKOS_LAMBDA (const local_ordinal_type &ly)
809       {
810         uext_1d((ly + 1)*mxsub2) = bufleft(ly);
811       }
812     );
813   }
814 
815   /* If ixsub < NPEX-1, receive data for right y-line of uext (via bufright). */
816   if (ixsub != npex-1) {
817     MPI_Wait(&request[3], &status);
818     /* Copy the buffer from the host to the device */
819     Kokkos::deep_copy(bufright, h_bufright);
820     /* Copy the right dev_recv_buff to uext. */
821     Kokkos::parallel_for (Kokkos::RangePolicy<execution_space>(0, mysub),
822       KOKKOS_LAMBDA (const local_ordinal_type &ly)
823       {
824         uext_1d((ly + 2)*mxsub2 - 1) = bufright(ly);
825       }
826     );
827   }
828 
829   return(0);
830 }
831 
832 /*
833  *--------------------------------------------------------------------
834  * PRIVATE FUNCTIONS
835  *--------------------------------------------------------------------
836  */
837 
838 /*
839  * InitUserData initializes the user's data block data.
840  */
InitUserData(int thispe,const Teuchos::RCP<const Teuchos::Comm<int>> comm,UserData * data)841 static int InitUserData(int thispe, const Teuchos::RCP<const Teuchos::Comm<int> > comm, UserData *data)
842 {
843   data->comm    = comm;
844   data->thispe  = thispe;
845   data->npex    = NPEX;  /* Number of subgrids in x-direction */
846   data->npey    = NPEY;  /* Number of subgrids in y-direction */
847   data->mxsub   = MXSUB; /* Number of subgrid mesh points in x-direction */
848   data->mysub   = MYSUB; /* Number of subgrid mesh points in y-direction */
849   data->jysub   = thispe/data->npex;
850   data->ixsub   = thispe - (data->jysub * data->npex);
851   data->mx      = data->npex * data->mxsub;  /* Mesh size in x-direction */
852   data->my      = data->npey * data->mysub;  /* Mesh size in y-direction */
853   data->dx      = ONE/(data->mx-ONE); /* Assumes a [0,1] interval in x. */
854   data->dy      = ONE/(data->my-ONE); /* Assumes a [0,1] interval in y. */
855   data->coeffx  = ONE/(data->dx * data->dx);
856   data->coeffy  = ONE/(data->dy * data->dy);
857   data->coeffxy = TWO/(data->dx * data->dx) + TWO/(data->dy * data->dy);
858 
859   return(0);
860 }
861 
862 
863 /*
864  * AllocUserData allocates memory for the extended vector uext
865  * and MPI communication buffers.
866  */
AllocUserData(int thispe,N_Vector uu,UserData * data)867 static int AllocUserData(int thispe, N_Vector uu, UserData *data)
868 {
869   sunindextype mxsub = data->mxsub;
870   sunindextype mysub = data->mysub;
871 
872   /* Allocate local extended vector (includes ghost nodes) */
873   Kokkos::resize(data->uext, (mxsub + 2)*(mysub +2));
874 
875   /* Allocate local send buffers */
876   Kokkos::resize(data->send_buff_left,   mysub);
877   Kokkos::resize(data->send_buff_right,  mysub);
878   Kokkos::resize(data->send_buff_top,    mxsub);
879   Kokkos::resize(data->send_buff_bottom, mxsub);
880 
881   data->h_send_buff_left = create_mirror_view(data->send_buff_left);
882   data->h_send_buff_right = create_mirror_view(data->send_buff_right);
883   data->h_send_buff_top = create_mirror_view(data->send_buff_top);
884   data->h_send_buff_bottom = create_mirror_view(data->send_buff_bottom);
885 
886   /* Allocate local receive buffers */
887   Kokkos::resize(data->recv_buff_left,   mysub);
888   Kokkos::resize(data->recv_buff_right,  mysub);
889   Kokkos::resize(data->recv_buff_top,    mxsub);
890   Kokkos::resize(data->recv_buff_bottom, mxsub);
891 
892   data->h_recv_buff_left = create_mirror_view(data->recv_buff_left);
893   data->h_recv_buff_right = create_mirror_view(data->recv_buff_right);
894   data->h_recv_buff_top = create_mirror_view(data->recv_buff_top);
895   data->h_recv_buff_bottom = create_mirror_view(data->recv_buff_bottom);
896 
897   /* An N-vector to hold preconditioner. */
898   data->pp = N_VClone(uu);
899   if(data->pp == NULL) {
900     return -1;
901   }
902 
903   return 0;
904 }
905 
906 
907 /*
908  * SetInitialProfile sets the initial values for the problem.
909  */
SetInitialProfile(N_Vector uu,N_Vector up,N_Vector id,N_Vector res,UserData * data)910 static int SetInitialProfile(N_Vector uu, N_Vector up,  N_Vector id,
911                              N_Vector res, UserData *data)
912 {
913   sunindextype i, iloc, j, jloc, loc;
914   realtype xfact, yfact;
915 
916   /* Initialize uu. */
917 
918   /* Get Tpetra vectors */
919   Teuchos::RCP<vector_type> tu = N_VGetVector_Trilinos(uu);
920   Teuchos::RCP<vector_type> ti = N_VGetVector_Trilinos(id);
921 
922   /* Get access to local uu and id data, sync the host with the device */
923   tu->sync<Kokkos::HostSpace>();
924   auto u_2d = tu->getLocalView<Kokkos::HostSpace>();
925   auto u_1d = Kokkos::subview(u_2d, Kokkos::ALL(), 0);
926   tu->modify<Kokkos::HostSpace>();
927 
928   ti->sync<Kokkos::HostSpace>();
929   auto id_2d = ti->getLocalView<Kokkos::HostSpace>();
930   auto id_1d = Kokkos::subview(id_2d, Kokkos::ALL(), 0);
931   ti->modify<Kokkos::HostSpace>();
932 
933   /* Set mesh spacings and subgrid indices for this PE. */
934   const realtype dx = data->dx;
935   const realtype dy = data->dy;
936   const int ixsub = data->ixsub;
937   const int jysub = data->jysub;
938 
939   /* Set beginning and ending locations in the global array corresponding
940      to the portion of that array assigned to this processor. */
941   const sunindextype mxsub   = data->mxsub;
942   const sunindextype mysub   = data->mysub;
943   const sunindextype ixbegin = mxsub*ixsub;
944   const sunindextype ixend   = mxsub*(ixsub+1) - 1;
945   const sunindextype jybegin = mysub*jysub;
946   const sunindextype jyend   = mysub*(jysub+1) - 1;
947 
948   /* Loop over the local array, computing the initial profile value.
949      The global indices are (i,j) and the local indices are (iloc,jloc).
950      Also set the id vector to zero for boundary points, one otherwise. */
951 
952   for (j = jybegin, jloc = 0; j <= jyend; j++, jloc++) {
953     yfact = dy*j;
954     for (i = ixbegin, iloc = 0; i <= ixend; i++, iloc++) {
955       xfact = dx*i;
956       loc = iloc + jloc*mxsub;
957       u_1d(loc) = RCONST(16.0) * xfact * (ONE - xfact) * yfact * (ONE - yfact);
958 
959       if (i == 0 || i == data->mx - 1 || j == 0 || j == data->my - 1)
960         id_1d(loc) = ZERO;
961       else
962         id_1d(loc) = ONE;
963     }
964   }
965 
966   /* Sync the device with the host */
967   tu->sync<memory_space>();
968   ti->sync<memory_space>();
969 
970   /* Initialize up. */
971 
972   N_VConst(ZERO, up);    /* Initially set up = 0. */
973 
974   /* resHeat sets res to negative of ODE RHS values at interior points. */
975   resHeat(ZERO, uu, up, res, data);
976 
977   /* Copy -res into up to get correct initial up values on the device only! */
978   N_VScale(-ONE, res, up);
979 
980   return(0);
981 }
982 
983 
984 /*
985  * Print first lines of output and table heading
986  */
PrintHeader(realtype rtol,realtype atol,UserData * data)987 static void PrintHeader(realtype rtol, realtype atol, UserData *data)
988 {
989   printf("\nidaHeat2D_kry_p: Heat equation, parallel example problem for IDA\n");
990   printf("            Discretized heat equation on 2D unit square.\n");
991   printf("            Zero boundary conditions,");
992   printf(" polynomial initial conditions.\n");
993   printf("            Mesh dimensions: %d x %d", (int) data->mx, (int) data->my);
994   printf("        Total system size: %ld\n\n", (long) data->mx * data->my);
995   printf("Subgrid dimensions: %d x %d", (int) data->mxsub, (int) data->mysub);
996   printf("        Processor array: %d x %d\n", (int) data->npex, (int) data->npey);
997 #if defined(SUNDIALS_EXTENDED_PRECISION)
998   printf("Tolerance parameters:  rtol = %Lg   atol = %Lg\n", rtol, atol);
999 #elif defined(SUNDIALS_DOUBLE_PRECISION)
1000   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
1001 #else
1002   printf("Tolerance parameters:  rtol = %g   atol = %g\n", rtol, atol);
1003 #endif
1004   printf("Constraints set to force all solution components >= 0. \n");
1005   printf("SUPPRESSALG = SUNTRUE to suppress local error testing on ");
1006   printf("all boundary components. \n");
1007   printf("Linear solver: SUNSPGMR  ");
1008   printf("Preconditioner: diagonal elements only.\n");
1009 
1010   /* Print output table heading and initial line of table. */
1011   printf("\n   Output Summary (umax = max-norm of solution) \n\n");
1012   printf("  time     umax       k  nst  nni  nli   nre   nreLS    h      npe nps\n");
1013   printf("----------------------------------------------------------------------\n");
1014 }
1015 
1016 
1017 /*
1018  * PrintOutput: print max norm of solution and current solver statistics
1019  */
PrintOutput(int id,void * ida_mem,realtype t,N_Vector uu)1020 static void PrintOutput(int id, void *ida_mem, realtype t, N_Vector uu)
1021 {
1022   realtype hused, umax;
1023   long int nst, nni, nje, nre, nreLS, nli, npe, nps;
1024   int kused, retval;
1025 
1026   umax = N_VMaxNorm(uu);
1027 
1028   if (id == 0) {
1029 
1030     retval = IDAGetLastOrder(ida_mem, &kused);
1031     check_retval(&retval, "IDAGetLastOrder", 1, id);
1032     retval = IDAGetNumSteps(ida_mem, &nst);
1033     check_retval(&retval, "IDAGetNumSteps", 1, id);
1034     retval = IDAGetNumNonlinSolvIters(ida_mem, &nni);
1035     check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, id);
1036     retval = IDAGetNumResEvals(ida_mem, &nre);
1037     check_retval(&retval, "IDAGetNumResEvals", 1, id);
1038     retval = IDAGetLastStep(ida_mem, &hused);
1039     check_retval(&retval, "IDAGetLastStep", 1, id);
1040     retval = IDAGetNumJtimesEvals(ida_mem, &nje);
1041     check_retval(&retval, "IDAGetNumJtimesEvals", 1, id);
1042     retval = IDAGetNumLinIters(ida_mem, &nli);
1043     check_retval(&retval, "IDAGetNumLinIters", 1, id);
1044     retval = IDAGetNumLinResEvals(ida_mem, &nreLS);
1045     check_retval(&retval, "IDAGetNumLinResEvals", 1, id);
1046     retval = IDAGetNumPrecEvals(ida_mem, &npe);
1047     check_retval(&retval, "IDAGetPrecEvals", 1, id);
1048     retval = IDAGetNumPrecSolves(ida_mem, &nps);
1049     check_retval(&retval, "IDAGetNumPrecSolves", 1, id);
1050 
1051 #if defined(SUNDIALS_EXTENDED_PRECISION)
1052     printf(" %5.2Lf %13.5Le  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2Le  %3ld %3ld\n",
1053            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
1054 #elif defined(SUNDIALS_DOUBLE_PRECISION)
1055     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2e  %3ld %3ld\n",
1056            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
1057 #else
1058     printf(" %5.2f %13.5e  %d  %3ld  %3ld  %3ld  %4ld  %4ld  %9.2e  %3ld %3ld\n",
1059            t, umax, kused, nst, nni, nje, nre, nreLS, hused, npe, nps);
1060 #endif
1061 
1062   }
1063 }
1064 
1065 
1066 /*
1067  * Print some final integrator statistics
1068  */
PrintFinalStats(void * ida_mem)1069 static void PrintFinalStats(void *ida_mem)
1070 {
1071   long int netf, ncfn, ncfl;
1072 
1073   IDAGetNumErrTestFails(ida_mem, &netf);
1074   IDAGetNumNonlinSolvConvFails(ida_mem, &ncfn);
1075   IDAGetNumLinConvFails(ida_mem, &ncfl);
1076 
1077   printf("\nError test failures            = %ld\n", netf);
1078   printf("Nonlinear convergence failures = %ld\n", ncfn);
1079   printf("Linear convergence failures    = %ld\n", ncfl);
1080 }
1081 
1082 /*
1083  * Check function return value...
1084  *   opt == 0 means SUNDIALS function allocates memory so check if
1085  *            returned NULL pointer
1086  *   opt == 1 means SUNDIALS function returns a flag so check if
1087  *            flag >= 0
1088  *   opt == 2 means function allocates memory so check if returned
1089  *            NULL pointer
1090  */
check_retval(void * flagvalue,const char * funcname,int opt,int id)1091 static int check_retval(void *flagvalue, const char *funcname, int opt, int id)
1092 {
1093   int *errflag;
1094 
1095   if (opt == 0 && flagvalue == NULL) {
1096     /* Check if SUNDIALS function returned NULL pointer - no memory allocated */
1097     fprintf(stderr,
1098             "\nSUNDIALS_ERROR(%d): %s() failed - returned NULL pointer\n\n",
1099             id, funcname);
1100     return(1);
1101   } else if (opt == 1) {
1102     /* Check if flag < 0 */
1103     errflag = (int *) flagvalue;
1104     if (*errflag < 0) {
1105       fprintf(stderr,
1106               "\nSUNDIALS_ERROR(%d): %s() failed with flag = %d\n\n",
1107               id, funcname, *errflag);
1108       return(1);
1109     }
1110   } else if (opt == 2 && flagvalue == NULL) {
1111     /* Check if function returned NULL pointer - no memory allocated */
1112     fprintf(stderr,
1113             "\nMEMORY_ERROR(%d): %s() failed - returned NULL pointer\n\n",
1114             id, funcname);
1115     return(1);
1116   }
1117 
1118   return(0);
1119 }
1120