1 /* -----------------------------------------------------------------
2  * Programmer(s): Slaven Peles and Cody J. Balos @ LLNL
3  * -----------------------------------------------------------------
4  * SUNDIALS Copyright Start
5  * Copyright (c) 2002-2021, Lawrence Livermore National Security
6  * and Southern Methodist University.
7  * All rights reserved.
8  *
9  * See the top-level LICENSE and NOTICE files for details.
10  *
11  * SPDX-License-Identifier: BSD-3-Clause
12  * SUNDIALS Copyright End
13  * -----------------------------------------------------------------
14  * This is the header file for the CUDA implementation of the
15  * NVECTOR module.
16  * -----------------------------------------------------------------*/
17 
18 #ifndef _NVECTOR_CUDA_H
19 #define _NVECTOR_CUDA_H
20 
21 #include <cuda_runtime.h>
22 #include <stdio.h>
23 
24 #include <sundials/sundials_cuda_policies.hpp>
25 #include <sundials/sundials_nvector.h>
26 #include <sundials/sundials_config.h>
27 #include <sunmemory/sunmemory_cuda.h>
28 
29 #ifdef __cplusplus  /* wrapper to enable C++ usage */
30 extern "C" {
31 #endif
32 
33 /*
34  * -----------------------------------------------------------------
35  * CUDA implementation of N_Vector
36  * -----------------------------------------------------------------
37  */
38 
39 struct _N_VectorContent_Cuda
40 {
41   sunindextype       length;
42   booleantype        own_exec;
43   booleantype        own_helper;
44   SUNMemory          host_data;
45   SUNMemory          device_data;
46   SUNCudaExecPolicy* stream_exec_policy;
47   SUNCudaExecPolicy* reduce_exec_policy;
48   SUNMemoryHelper    mem_helper;
49   void*              priv; /* 'private' data */
50 };
51 
52 typedef struct _N_VectorContent_Cuda *N_VectorContent_Cuda;
53 
54 /*
55  * -----------------------------------------------------------------
56  * NVECTOR_CUDA implementation specific functions
57  * -----------------------------------------------------------------
58  */
59 
60 SUNDIALS_EXPORT N_Vector N_VNewEmpty_Cuda();
61 SUNDIALS_EXPORT N_Vector N_VNew_Cuda(sunindextype length);
62 SUNDIALS_EXPORT N_Vector N_VNewManaged_Cuda(sunindextype length);
63 SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Cuda(sunindextype length,
64                                                 booleantype use_managed_mem,
65                                                 SUNMemoryHelper helper);
66 SUNDIALS_EXPORT N_Vector N_VMake_Cuda(sunindextype length,
67                                       realtype *h_vdata,
68                                       realtype *d_vdata);
69 SUNDIALS_EXPORT N_Vector N_VMakeManaged_Cuda(sunindextype length,
70                                              realtype *vdata);
71 /* DEPRECATION NOTICE:
72    In SUNDIALS v6, this function will be removed.
73    Use N_VNewWithMemHelp_Cuda instead.
74  */
75 SUNDIALS_DEPRECATED_EXPORT
76 N_Vector N_VMakeWithManagedAllocator_Cuda(sunindextype length,
77                                           void* (*allocfn)(size_t),
78                                           void (*freefn)(void*));
79 SUNDIALS_EXPORT void N_VSetHostArrayPointer_Cuda(realtype* h_vdata, N_Vector v);
80 SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Cuda(realtype* d_vdata, N_Vector v);
81 SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Cuda(N_Vector x);
82 SUNDIALS_EXPORT int N_VSetKernelExecPolicy_Cuda(N_Vector x,
83                                                 SUNCudaExecPolicy* stream_exec_policy,
84                                                 SUNCudaExecPolicy* reduce_exec_policy);
85 SUNDIALS_EXPORT void N_VCopyToDevice_Cuda(N_Vector v);
86 SUNDIALS_EXPORT void N_VCopyFromDevice_Cuda(N_Vector v);
87  /* DEPRECATED (to be removed in SUNDIALS v6): use N_VSetKerrnelExecPolicy_Cuda instead */
88 SUNDIALS_DEPRECATED_EXPORT void N_VSetCudaStream_Cuda(N_Vector x, cudaStream_t *stream);
89 
90 SUNDIALS_STATIC_INLINE
N_VGetLength_Cuda(N_Vector x)91 sunindextype N_VGetLength_Cuda(N_Vector x)
92 {
93   N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
94   return content->length;
95 }
96 
97 SUNDIALS_STATIC_INLINE
N_VGetHostArrayPointer_Cuda(N_Vector x)98 realtype *N_VGetHostArrayPointer_Cuda(N_Vector x)
99 {
100   N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
101   return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr);
102 }
103 
104 SUNDIALS_STATIC_INLINE
N_VGetDeviceArrayPointer_Cuda(N_Vector x)105 realtype *N_VGetDeviceArrayPointer_Cuda(N_Vector x)
106 {
107   N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
108   return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr);
109 }
110 
111 /*
112  * -----------------------------------------------------------------
113  * NVECTOR API functions
114  * -----------------------------------------------------------------
115  */
116 
117 SUNDIALS_STATIC_INLINE
N_VGetVectorID_Cuda(N_Vector v)118 N_Vector_ID N_VGetVectorID_Cuda(N_Vector v)
119 {
120   return SUNDIALS_NVEC_CUDA;
121 }
122 
123 SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Cuda(N_Vector w);
124 SUNDIALS_EXPORT N_Vector N_VClone_Cuda(N_Vector w);
125 SUNDIALS_EXPORT void N_VDestroy_Cuda(N_Vector v);
126 SUNDIALS_EXPORT void N_VSpace_Cuda(N_Vector v, sunindextype *lrw, sunindextype *liw);
127 
128 /* standard vector operations */
129 SUNDIALS_EXPORT void N_VLinearSum_Cuda(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z);
130 SUNDIALS_EXPORT void N_VConst_Cuda(realtype c, N_Vector z);
131 SUNDIALS_EXPORT void N_VProd_Cuda(N_Vector x, N_Vector y, N_Vector z);
132 SUNDIALS_EXPORT void N_VDiv_Cuda(N_Vector x, N_Vector y, N_Vector z);
133 SUNDIALS_EXPORT void N_VScale_Cuda(realtype c, N_Vector x, N_Vector z);
134 SUNDIALS_EXPORT void N_VAbs_Cuda(N_Vector x, N_Vector z);
135 SUNDIALS_EXPORT void N_VInv_Cuda(N_Vector x, N_Vector z);
136 SUNDIALS_EXPORT void N_VAddConst_Cuda(N_Vector x, realtype b, N_Vector z);
137 SUNDIALS_EXPORT realtype N_VDotProd_Cuda(N_Vector x, N_Vector y);
138 SUNDIALS_EXPORT realtype N_VMaxNorm_Cuda(N_Vector x);
139 SUNDIALS_EXPORT realtype N_VWrmsNorm_Cuda(N_Vector x, N_Vector w);
140 SUNDIALS_EXPORT realtype N_VWrmsNormMask_Cuda(N_Vector x, N_Vector w, N_Vector id);
141 SUNDIALS_EXPORT realtype N_VMin_Cuda(N_Vector x);
142 SUNDIALS_EXPORT realtype N_VWL2Norm_Cuda(N_Vector x, N_Vector w);
143 SUNDIALS_EXPORT realtype N_VL1Norm_Cuda(N_Vector x);
144 SUNDIALS_EXPORT void N_VCompare_Cuda(realtype c, N_Vector x, N_Vector z);
145 SUNDIALS_EXPORT booleantype N_VInvTest_Cuda(N_Vector x, N_Vector z);
146 SUNDIALS_EXPORT booleantype N_VConstrMask_Cuda(N_Vector c, N_Vector x, N_Vector m);
147 SUNDIALS_EXPORT realtype N_VMinQuotient_Cuda(N_Vector num, N_Vector denom);
148 
149 /* fused vector operations */
150 SUNDIALS_EXPORT int N_VLinearCombination_Cuda(int nvec, realtype* c, N_Vector* X,
151                                               N_Vector Z);
152 SUNDIALS_EXPORT int N_VScaleAddMulti_Cuda(int nvec, realtype* c, N_Vector X,
153                                           N_Vector* Y, N_Vector* Z);
154 SUNDIALS_EXPORT int N_VDotProdMulti_Cuda(int nvec, N_Vector x, N_Vector* Y,
155                                          realtype* dotprods);
156 
157 /* vector array operations */
158 SUNDIALS_EXPORT int N_VLinearSumVectorArray_Cuda(int nvec,
159                                                  realtype a, N_Vector* X,
160                                                  realtype b, N_Vector* Y,
161                                                  N_Vector* Z);
162 SUNDIALS_EXPORT int N_VScaleVectorArray_Cuda(int nvec, realtype* c, N_Vector* X,
163                                              N_Vector* Z);
164 SUNDIALS_EXPORT int N_VConstVectorArray_Cuda(int nvec, realtype c, N_Vector* Z);
165 SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Cuda(int nvec, int nsum,
166                                                      realtype* a, N_Vector* X,
167                                                      N_Vector** Y, N_Vector** Z);
168 SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Cuda(int nvec, int nsum,
169                                                          realtype* c,
170                                                          N_Vector** X,
171                                                          N_Vector* Z);
172 SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Cuda(int nvec, N_Vector* X,
173                                                 N_Vector* W, realtype* nrm);
174 SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Cuda(int nvec, N_Vector* X,
175                                                     N_Vector* W, N_Vector id,
176                                                     realtype* nrm);
177 
178 /* OPTIONAL local reduction kernels (no parallel communication) */
179 SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Cuda(N_Vector x, N_Vector w);
180 SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Cuda(N_Vector x, N_Vector w, N_Vector id);
181 
182 /* OPTIONAL XBraid interface operations */
183 SUNDIALS_EXPORT int N_VBufSize_Cuda(N_Vector x, sunindextype *size);
184 SUNDIALS_EXPORT int N_VBufPack_Cuda(N_Vector x, void *buf);
185 SUNDIALS_EXPORT int N_VBufUnpack_Cuda(N_Vector x, void *buf);
186 
187 /* OPTIONAL operations for debugging */
188 SUNDIALS_EXPORT void N_VPrint_Cuda(N_Vector v);
189 SUNDIALS_EXPORT void N_VPrintFile_Cuda(N_Vector v, FILE *outfile);
190 
191 
192 /*
193  * -----------------------------------------------------------------
194  * Enable / disable fused vector operations
195  * -----------------------------------------------------------------
196  */
197 
198 SUNDIALS_EXPORT int N_VEnableFusedOps_Cuda(N_Vector v, booleantype tf);
199 
200 SUNDIALS_EXPORT int N_VEnableLinearCombination_Cuda(N_Vector v, booleantype tf);
201 SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Cuda(N_Vector v, booleantype tf);
202 SUNDIALS_EXPORT int N_VEnableDotProdMulti_Cuda(N_Vector v, booleantype tf);
203 
204 SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Cuda(N_Vector v, booleantype tf);
205 SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Cuda(N_Vector v, booleantype tf);
206 SUNDIALS_EXPORT int N_VEnableConstVectorArray_Cuda(N_Vector v, booleantype tf);
207 SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Cuda(N_Vector v, booleantype tf);
208 SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Cuda(N_Vector v, booleantype tf);
209 SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Cuda(N_Vector v, booleantype tf);
210 SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Cuda(N_Vector v, booleantype tf);
211 
212 #ifdef __cplusplus
213 }
214 #endif
215 
216 #endif
217