1 
2 // Decide branch direction for GPU use for the dot-product MxM
3 extern "C"
4 {
5   #include "GB_mxm.h"
6 }
7 #include "GB_cuda.h"
8 
GB_AxB_dot3_cuda_branch(const GrB_Matrix M,const bool Mask_struct,const GrB_Matrix A,const GrB_Matrix B,const GrB_Semiring semiring,const bool flipxy,GB_Context Context)9 bool GB_AxB_dot3_cuda_branch
10 (
11     const GrB_Matrix M,             // mask matrix
12     const bool Mask_struct,         // if true, use the only structure of M
13     const GrB_Matrix A,             // input matrix
14     const GrB_Matrix B,             // input matrix
15     const GrB_Semiring semiring,    // semiring that defines C=A*B
16     const bool flipxy,              // if true, do z=fmult(b,a) vs fmult(a,b)
17     GB_Context Context
18 )
19 {
20         // very rough estimate of the work to do
21         double adeg = ((double) GB_NNZ (A)) / ((double) GB_IMAX (1, A->nvec)) ;
22         double bdeg = ((double) GB_NNZ (B)) / ((double) GB_IMAX (1, B->nvec)) ;
23         double work = GB_NNZ (M) * GB_IMIN (adeg, bdeg) ;
24 
25         // TODO if A or B are not accessed (first, 2nd, or pair ops)
26         // then the type if A can be user-defined here, for CUDA.
27 
28         // TODO: the test for a built-in semiring needs to be
29         // removed, to allow for the generation of CUDA kernels for non-
30         // built-in semirings.  The code generation process currently does not
31         // support user-defined types and operators, but this needs to be
32         // handled.  In addition, CUDA kernels could be built for semirings
33         // that are not built-in, but consist solely of built-in types and
34         // operators (such as BOR_BSHIFT on INT32 inputs).
35 
36         int ngpus_to_use = GB_ngpus_to_use (work) ;
37         GBURBLE (" work:%g gpus:%d ", work, ngpus_to_use) ;
38         if (ngpus_to_use > 0
39             && (semiring->header_size == 0)     // semiring is built-in
40             && (A->type->code != GB_UDT_code)
41             && (B->type->code != GB_UDT_code))
42         {
43             return true;
44         }
45         else
46         {
47             return false;
48         }
49 
50 }
51