1 //------------------------------------------------------------------------------
2 // GB_cuda_cumsum: cumlative sum of an array using GPU acceleration
3 //------------------------------------------------------------------------------
4
5 // SPDX-License-Identifier: Apache-2.0
6 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2019, All Rights Reserved.
7 // http://suitesparse.com See GraphBLAS/Doc/License.txt for license.
8
9 //------------------------------------------------------------------------------
10
11 // Compute the cumulative sum of an array count[0:n], of size n+1
12 // in pseudo-MATLAB notation:
13
14 // k = sum (count [0:n-1] != 0) ;
15
16 // count = cumsum ([0 count[0:n-1]]) ;
17
18 // That is, count [j] on input is overwritten with the value of
19 // sum (count [0..j-1]). count [n] is implicitly zero on input.
20 // On output, count [n] is the total sum.
21
22 #include "GB_cuda.h"
23 #include <local_cub/device/device_scan.cuh>
24
GB_cuda_cumsum(int64_t * restrict count,const int64_t n)25 GrB_Info GB_cuda_cumsum // compute the cumulative sum of an array
26 (
27 int64_t *restrict count, // size n+1, input/output
28 const int64_t n
29 )
30 {
31 //--------------------------------------------------------------------------
32 // check inputs
33 //--------------------------------------------------------------------------
34
35 ASSERT (count != NULL) ;
36 ASSERT (n >= 0) ;
37
38 //--------------------------------------------------------------------------
39 // count = cumsum ([0 count[0:n-1]]) ;
40 //--------------------------------------------------------------------------
41 void *d_temp_storage = NULL;
42 size_t temp_storage_bytes;
43 cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, (int)n);
44 size_t size ;
45 d_temp_storage = GB_malloc_memory( temp_storage_bytes, 1, &size);
46 if ( d_temp_storage == NULL){
47 return GrB_OUT_OF_MEMORY;
48 }
49
50 // Run
51 CubDebugExit(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, count, count, n));
52
53 // Check for correctness (and display results, if specified)
54 #ifdef GB_DEBUG
55 int compare = CompareDeviceResults(h_reference, count, num_items, true, g_verbose);
56 ASSERT( compare == 0);
57 #endif
58
59 // Cleanup
60 GB_dealloc_memory (&d_temp_storage, &size) ;
61
62 return GrB_SUCCESS;
63 }
64
65