1
2 // Decide branch direction for GPU use for the dot-product MxM
3 extern "C"
4 {
5 #include "GB_mxm.h"
6 }
7 #include "GB_cuda.h"
8
GB_AxB_dot3_cuda_branch(const GrB_Matrix M,const bool Mask_struct,const GrB_Matrix A,const GrB_Matrix B,const GrB_Semiring semiring,const bool flipxy,GB_Context Context)9 bool GB_AxB_dot3_cuda_branch
10 (
11 const GrB_Matrix M, // mask matrix
12 const bool Mask_struct, // if true, use the only structure of M
13 const GrB_Matrix A, // input matrix
14 const GrB_Matrix B, // input matrix
15 const GrB_Semiring semiring, // semiring that defines C=A*B
16 const bool flipxy, // if true, do z=fmult(b,a) vs fmult(a,b)
17 GB_Context Context
18 )
19 {
20 // very rough estimate of the work to do
21 double adeg = ((double) GB_NNZ (A)) / ((double) GB_IMAX (1, A->nvec)) ;
22 double bdeg = ((double) GB_NNZ (B)) / ((double) GB_IMAX (1, B->nvec)) ;
23 double work = GB_NNZ (M) * GB_IMIN (adeg, bdeg) ;
24
25 // TODO if A or B are not accessed (first, 2nd, or pair ops)
26 // then the type if A can be user-defined here, for CUDA.
27
28 // TODO: the test for a built-in semiring needs to be
29 // removed, to allow for the generation of CUDA kernels for non-
30 // built-in semirings. The code generation process currently does not
31 // support user-defined types and operators, but this needs to be
32 // handled. In addition, CUDA kernels could be built for semirings
33 // that are not built-in, but consist solely of built-in types and
34 // operators (such as BOR_BSHIFT on INT32 inputs).
35
36 int ngpus_to_use = GB_ngpus_to_use (work) ;
37 GBURBLE (" work:%g gpus:%d ", work, ngpus_to_use) ;
38 if (ngpus_to_use > 0
39 && (semiring->header_size == 0) // semiring is built-in
40 && (A->type->code != GB_UDT_code)
41 && (B->type->code != GB_UDT_code))
42 {
43 return true;
44 }
45 else
46 {
47 return false;
48 }
49
50 }
51