1 // SPDX-License-Identifier: Apache-2.0 2 3 //------------------------------------------------------------------------------ 4 // GB_cuda_buckets.h: definitions for buckets using for dot3 5 //------------------------------------------------------------------------------ 6 7 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved. 8 // http://suitesparse.com See GraphBLAS/Doc/License.txt for license. 9 10 //------------------------------------------------------------------------------ 11 12 // This file is #include'd only in the GraphBLAS/CUDA/GB_cuda*.cu source files. 13 14 #ifndef GB_CUDA_BUCKETS_H 15 #define GB_CUDA_BUCKETS_H 16 17 // nvcc chokes on the 'restrict' keyword, so define it to the empty string 18 // for compiling the *.cu files. 19 #define restrict 20 21 // nvcc also chokes on fpclassify (an ANSI C11 construct that does not appear 22 // in C++11, it seems). It also issues spurious warnings about compiler 23 // pragmas. Source/GB.h avoids these constructs if GB_NVCC is defined. 24 #define GB_NVCC 25 26 27 // 12 buckets: computed by up to 11 kernel launches (zombies need no work...), 28 // using 5 different kernels (with different configurations depending on the 29 // bucket). 30 typedef enum 31 { 32 // bring out your dead: 33 GB_BUCKET_ZOMBIE = 0, // C(i,j) is a zombie (not a bucket) 34 35 // dot3: C<M>=A'B, M is sparse or hyper, C is sparse or hyper 36 // 32 kernels A,B: (hyper,sparse,bitmap,full)^2 x M is (sparse/hyper) 37 38 // a full/full kernel: 39 // CUDA kernel: dndn, handles a single bucket: 40 // both A(:,i) and B(:,j) are dense 41 GB_BUCKET_DNDN = 1, 42 43 // two full/(sparse,hyper) kernels: 44 // CUDA kernel: spdn, handles 4 buckets: 45 // A(:,i) is dense and B(:,j) is very sparse (< 256 entries) 46 GB_BUCKET_DNVS = 2, 47 // A(:,i) is dense and B(:,j) is sparse (>= 256 entries) 48 GB_BUCKET_DNSP = 3, 49 50 // a sparse/full kernel 51 // A(:,i) is very sparse (< 256 entries) and B(:,j) is dense 52 GB_BUCKET_VSDN = 4, 53 // A(:,i) is sparse (>= 256 entries) and B(:,j) is dense 54 GB_BUCKET_SPDN = 5, 55 56 // a sparse/bitmap kernel 57 // a bitmap/bitmap kernel 58 // a bitmap/sparse kernel 59 // ... 60 61 62 // sparse/sparse: 63 // CUDA kernel: vssp, handles 1 bucket, uses binary search: 64 // A(:,i) is very sparse compared to B(:,j), or visa versa 65 GB_BUCKET_VSSP = 6, 66 67 // CUDA kernel: vsvs, handles 4 buckets: 68 // let len = nnz (A (:,i) + nnz (B (:,j)), then: 69 GB_BUCKET_VSVS_4 = 7, // len <= 4 70 GB_BUCKET_VSVS_16 = 8, // len <= 16 71 GB_BUCKET_VSVS_64 = 9, // len <= 64 72 GB_BUCKET_VSVS_256 = 10, // len <= 256 73 74 // CUDA kernel: mp, use the merge-path method: 75 GB_BUCKET_MERGEPATH = 11, 76 77 // CUDA kernel: warpix, use the warp-intersect method, unused so far: 78 GB_BUCKET_WARP_IX = 12 79 } 80 GB_bucket_code ; 81 82 #endif 83