1 /* -----------------------------------------------------------------
2 * Programmer(s): Slaven Peles and Cody J. Balos @ LLNL
3 * -----------------------------------------------------------------
4 * SUNDIALS Copyright Start
5 * Copyright (c) 2002-2021, Lawrence Livermore National Security
6 * and Southern Methodist University.
7 * All rights reserved.
8 *
9 * See the top-level LICENSE and NOTICE files for details.
10 *
11 * SPDX-License-Identifier: BSD-3-Clause
12 * SUNDIALS Copyright End
13 * -----------------------------------------------------------------
14 * This is the header file for the CUDA implementation of the
15 * NVECTOR module.
16 * -----------------------------------------------------------------*/
17
18 #ifndef _NVECTOR_CUDA_H
19 #define _NVECTOR_CUDA_H
20
21 #include <cuda_runtime.h>
22 #include <stdio.h>
23
24 #include <sundials/sundials_cuda_policies.hpp>
25 #include <sundials/sundials_nvector.h>
26 #include <sundials/sundials_config.h>
27 #include <sunmemory/sunmemory_cuda.h>
28
29 #ifdef __cplusplus /* wrapper to enable C++ usage */
30 extern "C" {
31 #endif
32
33 /*
34 * -----------------------------------------------------------------
35 * CUDA implementation of N_Vector
36 * -----------------------------------------------------------------
37 */
38
39 struct _N_VectorContent_Cuda
40 {
41 sunindextype length;
42 booleantype own_exec;
43 booleantype own_helper;
44 SUNMemory host_data;
45 SUNMemory device_data;
46 SUNCudaExecPolicy* stream_exec_policy;
47 SUNCudaExecPolicy* reduce_exec_policy;
48 SUNMemoryHelper mem_helper;
49 void* priv; /* 'private' data */
50 };
51
52 typedef struct _N_VectorContent_Cuda *N_VectorContent_Cuda;
53
54 /*
55 * -----------------------------------------------------------------
56 * NVECTOR_CUDA implementation specific functions
57 * -----------------------------------------------------------------
58 */
59
60 SUNDIALS_EXPORT N_Vector N_VNewEmpty_Cuda();
61 SUNDIALS_EXPORT N_Vector N_VNew_Cuda(sunindextype length);
62 SUNDIALS_EXPORT N_Vector N_VNewManaged_Cuda(sunindextype length);
63 SUNDIALS_EXPORT N_Vector N_VNewWithMemHelp_Cuda(sunindextype length,
64 booleantype use_managed_mem,
65 SUNMemoryHelper helper);
66 SUNDIALS_EXPORT N_Vector N_VMake_Cuda(sunindextype length,
67 realtype *h_vdata,
68 realtype *d_vdata);
69 SUNDIALS_EXPORT N_Vector N_VMakeManaged_Cuda(sunindextype length,
70 realtype *vdata);
71 /* DEPRECATION NOTICE:
72 In SUNDIALS v6, this function will be removed.
73 Use N_VNewWithMemHelp_Cuda instead.
74 */
75 SUNDIALS_DEPRECATED_EXPORT
76 N_Vector N_VMakeWithManagedAllocator_Cuda(sunindextype length,
77 void* (*allocfn)(size_t),
78 void (*freefn)(void*));
79 SUNDIALS_EXPORT void N_VSetHostArrayPointer_Cuda(realtype* h_vdata, N_Vector v);
80 SUNDIALS_EXPORT void N_VSetDeviceArrayPointer_Cuda(realtype* d_vdata, N_Vector v);
81 SUNDIALS_EXPORT booleantype N_VIsManagedMemory_Cuda(N_Vector x);
82 SUNDIALS_EXPORT int N_VSetKernelExecPolicy_Cuda(N_Vector x,
83 SUNCudaExecPolicy* stream_exec_policy,
84 SUNCudaExecPolicy* reduce_exec_policy);
85 SUNDIALS_EXPORT void N_VCopyToDevice_Cuda(N_Vector v);
86 SUNDIALS_EXPORT void N_VCopyFromDevice_Cuda(N_Vector v);
87 /* DEPRECATED (to be removed in SUNDIALS v6): use N_VSetKerrnelExecPolicy_Cuda instead */
88 SUNDIALS_DEPRECATED_EXPORT void N_VSetCudaStream_Cuda(N_Vector x, cudaStream_t *stream);
89
90 SUNDIALS_STATIC_INLINE
N_VGetLength_Cuda(N_Vector x)91 sunindextype N_VGetLength_Cuda(N_Vector x)
92 {
93 N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
94 return content->length;
95 }
96
97 SUNDIALS_STATIC_INLINE
N_VGetHostArrayPointer_Cuda(N_Vector x)98 realtype *N_VGetHostArrayPointer_Cuda(N_Vector x)
99 {
100 N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
101 return(content->host_data == NULL ? NULL : (realtype*)content->host_data->ptr);
102 }
103
104 SUNDIALS_STATIC_INLINE
N_VGetDeviceArrayPointer_Cuda(N_Vector x)105 realtype *N_VGetDeviceArrayPointer_Cuda(N_Vector x)
106 {
107 N_VectorContent_Cuda content = (N_VectorContent_Cuda)x->content;
108 return(content->device_data == NULL ? NULL : (realtype*)content->device_data->ptr);
109 }
110
111 /*
112 * -----------------------------------------------------------------
113 * NVECTOR API functions
114 * -----------------------------------------------------------------
115 */
116
117 SUNDIALS_STATIC_INLINE
N_VGetVectorID_Cuda(N_Vector v)118 N_Vector_ID N_VGetVectorID_Cuda(N_Vector v)
119 {
120 return SUNDIALS_NVEC_CUDA;
121 }
122
123 SUNDIALS_EXPORT N_Vector N_VCloneEmpty_Cuda(N_Vector w);
124 SUNDIALS_EXPORT N_Vector N_VClone_Cuda(N_Vector w);
125 SUNDIALS_EXPORT void N_VDestroy_Cuda(N_Vector v);
126 SUNDIALS_EXPORT void N_VSpace_Cuda(N_Vector v, sunindextype *lrw, sunindextype *liw);
127
128 /* standard vector operations */
129 SUNDIALS_EXPORT void N_VLinearSum_Cuda(realtype a, N_Vector x, realtype b, N_Vector y, N_Vector z);
130 SUNDIALS_EXPORT void N_VConst_Cuda(realtype c, N_Vector z);
131 SUNDIALS_EXPORT void N_VProd_Cuda(N_Vector x, N_Vector y, N_Vector z);
132 SUNDIALS_EXPORT void N_VDiv_Cuda(N_Vector x, N_Vector y, N_Vector z);
133 SUNDIALS_EXPORT void N_VScale_Cuda(realtype c, N_Vector x, N_Vector z);
134 SUNDIALS_EXPORT void N_VAbs_Cuda(N_Vector x, N_Vector z);
135 SUNDIALS_EXPORT void N_VInv_Cuda(N_Vector x, N_Vector z);
136 SUNDIALS_EXPORT void N_VAddConst_Cuda(N_Vector x, realtype b, N_Vector z);
137 SUNDIALS_EXPORT realtype N_VDotProd_Cuda(N_Vector x, N_Vector y);
138 SUNDIALS_EXPORT realtype N_VMaxNorm_Cuda(N_Vector x);
139 SUNDIALS_EXPORT realtype N_VWrmsNorm_Cuda(N_Vector x, N_Vector w);
140 SUNDIALS_EXPORT realtype N_VWrmsNormMask_Cuda(N_Vector x, N_Vector w, N_Vector id);
141 SUNDIALS_EXPORT realtype N_VMin_Cuda(N_Vector x);
142 SUNDIALS_EXPORT realtype N_VWL2Norm_Cuda(N_Vector x, N_Vector w);
143 SUNDIALS_EXPORT realtype N_VL1Norm_Cuda(N_Vector x);
144 SUNDIALS_EXPORT void N_VCompare_Cuda(realtype c, N_Vector x, N_Vector z);
145 SUNDIALS_EXPORT booleantype N_VInvTest_Cuda(N_Vector x, N_Vector z);
146 SUNDIALS_EXPORT booleantype N_VConstrMask_Cuda(N_Vector c, N_Vector x, N_Vector m);
147 SUNDIALS_EXPORT realtype N_VMinQuotient_Cuda(N_Vector num, N_Vector denom);
148
149 /* fused vector operations */
150 SUNDIALS_EXPORT int N_VLinearCombination_Cuda(int nvec, realtype* c, N_Vector* X,
151 N_Vector Z);
152 SUNDIALS_EXPORT int N_VScaleAddMulti_Cuda(int nvec, realtype* c, N_Vector X,
153 N_Vector* Y, N_Vector* Z);
154 SUNDIALS_EXPORT int N_VDotProdMulti_Cuda(int nvec, N_Vector x, N_Vector* Y,
155 realtype* dotprods);
156
157 /* vector array operations */
158 SUNDIALS_EXPORT int N_VLinearSumVectorArray_Cuda(int nvec,
159 realtype a, N_Vector* X,
160 realtype b, N_Vector* Y,
161 N_Vector* Z);
162 SUNDIALS_EXPORT int N_VScaleVectorArray_Cuda(int nvec, realtype* c, N_Vector* X,
163 N_Vector* Z);
164 SUNDIALS_EXPORT int N_VConstVectorArray_Cuda(int nvec, realtype c, N_Vector* Z);
165 SUNDIALS_EXPORT int N_VScaleAddMultiVectorArray_Cuda(int nvec, int nsum,
166 realtype* a, N_Vector* X,
167 N_Vector** Y, N_Vector** Z);
168 SUNDIALS_EXPORT int N_VLinearCombinationVectorArray_Cuda(int nvec, int nsum,
169 realtype* c,
170 N_Vector** X,
171 N_Vector* Z);
172 SUNDIALS_EXPORT int N_VWrmsNormVectorArray_Cuda(int nvec, N_Vector* X,
173 N_Vector* W, realtype* nrm);
174 SUNDIALS_EXPORT int N_VWrmsNormMaskVectorArray_Cuda(int nvec, N_Vector* X,
175 N_Vector* W, N_Vector id,
176 realtype* nrm);
177
178 /* OPTIONAL local reduction kernels (no parallel communication) */
179 SUNDIALS_EXPORT realtype N_VWSqrSumLocal_Cuda(N_Vector x, N_Vector w);
180 SUNDIALS_EXPORT realtype N_VWSqrSumMaskLocal_Cuda(N_Vector x, N_Vector w, N_Vector id);
181
182 /* OPTIONAL XBraid interface operations */
183 SUNDIALS_EXPORT int N_VBufSize_Cuda(N_Vector x, sunindextype *size);
184 SUNDIALS_EXPORT int N_VBufPack_Cuda(N_Vector x, void *buf);
185 SUNDIALS_EXPORT int N_VBufUnpack_Cuda(N_Vector x, void *buf);
186
187 /* OPTIONAL operations for debugging */
188 SUNDIALS_EXPORT void N_VPrint_Cuda(N_Vector v);
189 SUNDIALS_EXPORT void N_VPrintFile_Cuda(N_Vector v, FILE *outfile);
190
191
192 /*
193 * -----------------------------------------------------------------
194 * Enable / disable fused vector operations
195 * -----------------------------------------------------------------
196 */
197
198 SUNDIALS_EXPORT int N_VEnableFusedOps_Cuda(N_Vector v, booleantype tf);
199
200 SUNDIALS_EXPORT int N_VEnableLinearCombination_Cuda(N_Vector v, booleantype tf);
201 SUNDIALS_EXPORT int N_VEnableScaleAddMulti_Cuda(N_Vector v, booleantype tf);
202 SUNDIALS_EXPORT int N_VEnableDotProdMulti_Cuda(N_Vector v, booleantype tf);
203
204 SUNDIALS_EXPORT int N_VEnableLinearSumVectorArray_Cuda(N_Vector v, booleantype tf);
205 SUNDIALS_EXPORT int N_VEnableScaleVectorArray_Cuda(N_Vector v, booleantype tf);
206 SUNDIALS_EXPORT int N_VEnableConstVectorArray_Cuda(N_Vector v, booleantype tf);
207 SUNDIALS_EXPORT int N_VEnableWrmsNormVectorArray_Cuda(N_Vector v, booleantype tf);
208 SUNDIALS_EXPORT int N_VEnableWrmsNormMaskVectorArray_Cuda(N_Vector v, booleantype tf);
209 SUNDIALS_EXPORT int N_VEnableScaleAddMultiVectorArray_Cuda(N_Vector v, booleantype tf);
210 SUNDIALS_EXPORT int N_VEnableLinearCombinationVectorArray_Cuda(N_Vector v, booleantype tf);
211
212 #ifdef __cplusplus
213 }
214 #endif
215
216 #endif
217