1 //------------------------------------------------------------------------------
2 // GB_cuda_gateway.h: definitions for interface to GB_cuda_* functions
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // CUDA gateway functions (DRAFT: in progress)
11 
12 // This file can be #include'd into any GraphBLAS/Source file that needs to
13 // call a CUDA gateway function, or use the typedef defined below.  It is also
14 // #include'd in GraphBLAS/CUDA/GB_cuda.h, for use by the CUDA/GB_cuda_*.cu
15 // gateway functions.
16 
17 // If GBCUDA is defined in GraphBLAS/CMakeLists.txt, then GraphBLAS can call
18 // the C-callable gateway functions defined in GraphBLAS/CUDA/*.cu source
19 // files.  If GBCUDA is not defined, then these functions are not called.  The
20 // typedef always appears, since it is part of the GB_Global struct, whether
21 // or not CUDA is used.
22 
23 #ifndef GB_CUDA_GATEWAY_H
24 #define GB_CUDA_GATEWAY_H
25 
26 #define GB_CUDA_MAX_GPUS 32
27 
28 // The GPU is only used if the work is larger than the GxB_GPU_CHUNK.
29 // The default value of this parameter is GB_GPU_CHUNK_DEFAULT:
30 #define GB_GPU_CHUNK_DEFAULT (1024*1024)
31 
32 #if defined ( GB_NVCC )
33 extern "C" {
34 #endif
35 
36 //------------------------------------------------------------------------------
37 // GB_cuda_device: properties of each GPU in the system
38 //------------------------------------------------------------------------------
39 
40 typedef struct
41 {
42     char    name [256] ;
43     size_t  total_global_memory ;
44     int  number_of_sms ;
45     int  compute_capability_major;
46     int  compute_capability_minor;
47     bool use_memory_pool;
48     int  pool_size;             // TODO: should this be size_t?
49     int  max_pool_size;         // TODO: should this be size_t?
50     void *memory_resource;
51 }
52 GB_cuda_device ;
53 
54 //------------------------------------------------------------------------------
55 // GB_ngpus_to_use: determine # of GPUs to use for the next computation
56 //------------------------------------------------------------------------------
57 
GB_ngpus_to_use(double work)58 static inline int GB_ngpus_to_use
59 (
60     double work                 // total work to do
61 )
62 {
63     // get the current GxB_GPU_CONTROL setting
64     GrB_Desc_Value gpu_control = GB_Global_gpu_control_get ( ) ;
65     int gpu_count = GB_Global_gpu_count_get ( ) ;
66     if (gpu_control == GxB_GPU_NEVER || gpu_count == 0)
67     {
68         // never use the GPU(s)
69         return (0) ;
70     }
71     else if (gpu_control == GxB_GPU_ALWAYS)
72     {
73         // always use all available GPU(s)
74         return (gpu_count) ;
75     }
76     else
77     {
78         // use no more than max_gpus_to_use
79         double gpu_chunk = GB_Global_gpu_chunk_get ( ) ;
80         double max_gpus_to_use = floor (work / gpu_chunk) ;
81         // but use no more than the # of GPUs available
82         if (max_gpus_to_use > gpu_count) return (gpu_count) ;
83         return ((int) max_gpus_to_use) ;
84     }
85 }
86 
87 
88 //------------------------------------------------------------------------------
89 // GB_cuda_* gateway functions
90 //------------------------------------------------------------------------------
91 
92 bool GB_cuda_get_device_count   // true if OK, false if failure
93 (
94     int *gpu_count              // return # of GPUs in the system
95 ) ;
96 
97 bool GB_cuda_warmup (int device) ;
98 
99 bool GB_cuda_get_device( int *device) ;
100 
101 bool GB_cuda_set_device( int device) ;
102 
103 bool GB_cuda_get_device_properties
104 (
105     int device,
106     GB_cuda_device *prop
107 ) ;
108 
109 // There is no GB_cuda_realloc function, since CUDA does not have a
110 // realloc function.
111 void *GB_cuda_malloc (size_t size) ;           // standard malloc signature
112 void  GB_cuda_free (void *p) ;                 // standard free signature
113 void *GB_cuda_calloc (size_t n, size_t size) ; // standard calloc signature
114 
115 GrB_Info GB_cuda_red__plus_int64
116 (
117     int64_t *result,
118     int64_t *Ax,
119     int64_t anz,
120     int64_t *restrict W,      // array of size ntasks
121     int64_t worksize,
122     int ntasks,
123     int nthreads,
124     int blocksize
125 ) ;
126 
127 GrB_Info GB_AxB_dot3_cuda           // C<M> = A'*B using dot product method
128 (
129     GrB_Matrix C,                   // output matrix, static header
130     const GrB_Matrix M,             // mask matrix
131     const bool Mask_struct,         // if true, use the only structure of M
132     const GrB_Matrix A,             // input matrix
133     const GrB_Matrix B,             // input matrix
134     const GrB_Semiring semiring,    // semiring that defines C=A*B
135     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
136     GB_Context Context
137 ) ;
138 
139 #if defined ( GB_NVCC )
140 }
141 #endif
142 
143 #endif
144 
145