1 //------------------------------------------------------------------------------
2 // GB_cuda_gateway.h: definitions for interface to GB_cuda_* functions
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // CUDA gateway functions (DRAFT: in progress)
11
12 // This file can be #include'd into any GraphBLAS/Source file that needs to
13 // call a CUDA gateway function, or use the typedef defined below. It is also
14 // #include'd in GraphBLAS/CUDA/GB_cuda.h, for use by the CUDA/GB_cuda_*.cu
15 // gateway functions.
16
17 // If GBCUDA is defined in GraphBLAS/CMakeLists.txt, then GraphBLAS can call
18 // the C-callable gateway functions defined in GraphBLAS/CUDA/*.cu source
19 // files. If GBCUDA is not defined, then these functions are not called. The
20 // typedef always appears, since it is part of the GB_Global struct, whether
21 // or not CUDA is used.
22
23 #ifndef GB_CUDA_GATEWAY_H
24 #define GB_CUDA_GATEWAY_H
25
26 #define GB_CUDA_MAX_GPUS 32
27
28 // The GPU is only used if the work is larger than the GxB_GPU_CHUNK.
29 // The default value of this parameter is GB_GPU_CHUNK_DEFAULT:
30 #define GB_GPU_CHUNK_DEFAULT (1024*1024)
31
32 #if defined ( GB_NVCC )
33 extern "C" {
34 #endif
35
36 //------------------------------------------------------------------------------
37 // GB_cuda_device: properties of each GPU in the system
38 //------------------------------------------------------------------------------
39
40 typedef struct
41 {
42 char name [256] ;
43 size_t total_global_memory ;
44 int number_of_sms ;
45 int compute_capability_major;
46 int compute_capability_minor;
47 bool use_memory_pool;
48 int pool_size; // TODO: should this be size_t?
49 int max_pool_size; // TODO: should this be size_t?
50 void *memory_resource;
51 }
52 GB_cuda_device ;
53
54 //------------------------------------------------------------------------------
55 // GB_ngpus_to_use: determine # of GPUs to use for the next computation
56 //------------------------------------------------------------------------------
57
GB_ngpus_to_use(double work)58 static inline int GB_ngpus_to_use
59 (
60 double work // total work to do
61 )
62 {
63 // get the current GxB_GPU_CONTROL setting
64 GrB_Desc_Value gpu_control = GB_Global_gpu_control_get ( ) ;
65 int gpu_count = GB_Global_gpu_count_get ( ) ;
66 if (gpu_control == GxB_GPU_NEVER || gpu_count == 0)
67 {
68 // never use the GPU(s)
69 return (0) ;
70 }
71 else if (gpu_control == GxB_GPU_ALWAYS)
72 {
73 // always use all available GPU(s)
74 return (gpu_count) ;
75 }
76 else
77 {
78 // use no more than max_gpus_to_use
79 double gpu_chunk = GB_Global_gpu_chunk_get ( ) ;
80 double max_gpus_to_use = floor (work / gpu_chunk) ;
81 // but use no more than the # of GPUs available
82 if (max_gpus_to_use > gpu_count) return (gpu_count) ;
83 return ((int) max_gpus_to_use) ;
84 }
85 }
86
87
88 //------------------------------------------------------------------------------
89 // GB_cuda_* gateway functions
90 //------------------------------------------------------------------------------
91
92 bool GB_cuda_get_device_count // true if OK, false if failure
93 (
94 int *gpu_count // return # of GPUs in the system
95 ) ;
96
97 bool GB_cuda_warmup (int device) ;
98
99 bool GB_cuda_get_device( int *device) ;
100
101 bool GB_cuda_set_device( int device) ;
102
103 bool GB_cuda_get_device_properties
104 (
105 int device,
106 GB_cuda_device *prop
107 ) ;
108
109 // There is no GB_cuda_realloc function, since CUDA does not have a
110 // realloc function.
111 void *GB_cuda_malloc (size_t size) ; // standard malloc signature
112 void GB_cuda_free (void *p) ; // standard free signature
113 void *GB_cuda_calloc (size_t n, size_t size) ; // standard calloc signature
114
115 GrB_Info GB_cuda_red__plus_int64
116 (
117 int64_t *result,
118 int64_t *Ax,
119 int64_t anz,
120 int64_t *restrict W, // array of size ntasks
121 int64_t worksize,
122 int ntasks,
123 int nthreads,
124 int blocksize
125 ) ;
126
127 GrB_Info GB_AxB_dot3_cuda // C<M> = A'*B using dot product method
128 (
129 GrB_Matrix C, // output matrix, static header
130 const GrB_Matrix M, // mask matrix
131 const bool Mask_struct, // if true, use the only structure of M
132 const GrB_Matrix A, // input matrix
133 const GrB_Matrix B, // input matrix
134 const GrB_Semiring semiring, // semiring that defines C=A*B
135 const bool flipxy, // if true, do z=fmult(b,a) vs fmult(a,b)
136 GB_Context Context
137 ) ;
138
139 #if defined ( GB_NVCC )
140 }
141 #endif
142
143 #endif
144
145