1 //------------------------------------------------------------------------------
2 // GB_cuda_cumsum: cumlative sum of an array using GPU acceleration
3 //------------------------------------------------------------------------------
4 
5 // SPDX-License-Identifier: Apache-2.0
6 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
7 // http://suitesparse.com   See GraphBLAS/Doc/License.txt for license.
8 
9 //------------------------------------------------------------------------------
10 
11 // Compute the cumulative sum of an array count[0:n], of size n+1
12 // in pseudo-MATLAB notation:
13 
14 //      k = sum (count [0:n-1] != 0) ;
15 
16 //      count = cumsum ([0 count[0:n-1]]) ;
17 
18 // That is, count [j] on input is overwritten with the value of
19 // sum (count [0..j-1]).  count [n] is implicitly zero on input.
20 // On output, count [n] is the total sum.
21 
22 #include "GB_cuda.h"
23 #include <local_cub/device/device_scan.cuh>
24 
GB_cuda_cumsum(int64_t * restrict count,const int64_t n)25 GrB_Info GB_cuda_cumsum             // compute the cumulative sum of an array
26 (
27     int64_t *restrict count,    // size n+1, input/output
28     const int64_t n
29 )
30 {
31     //--------------------------------------------------------------------------
32     // check inputs
33     //--------------------------------------------------------------------------
34 
35     ASSERT (count != NULL) ;
36     ASSERT (n >= 0) ;
37 
38     //--------------------------------------------------------------------------
39     // count = cumsum ([0 count[0:n-1]]) ;
40     //--------------------------------------------------------------------------
41     void *d_temp_storage = NULL;
42     size_t temp_storage_bytes;
43     cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, (int)n);
44     size_t size ;
45     d_temp_storage  = GB_malloc_memory( temp_storage_bytes, 1, &size);
46     if ( d_temp_storage == NULL){
47        return GrB_OUT_OF_MEMORY;
48     }
49 
50     // Run
51     CubDebugExit(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, n));
52 
53     // Check for correctness (and display results, if specified)
54     #ifdef GB_DEBUG
55     int compare = CompareDeviceResults(h_reference, count, num_items, true, g_verbose);
56     ASSERT( compare == 0);
57     #endif
58 
59     // Cleanup
60     GB_dealloc_memory (&d_temp_storage, &size) ;
61 
62     return GrB_SUCCESS;
63 }
64 
65