1 //------------------------------------------------------------------------------
2 // GB_task_cumsum: cumulative sum of Cp and fine tasks in TaskList
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Cp is never NULL. C is created as sparse or hypersparse.
11
12 #include "GB.h"
13
GB_task_cumsum(int64_t * Cp,const int64_t Cnvec,int64_t * Cnvec_nonempty,GB_task_struct * restrict TaskList,const int ntasks,const int nthreads,GB_Context Context)14 void GB_task_cumsum
15 (
16 int64_t *Cp, // size Cnvec+1
17 const int64_t Cnvec,
18 int64_t *Cnvec_nonempty, // # of non-empty vectors in C
19 GB_task_struct *restrict TaskList, // array of structs
20 const int ntasks, // # of tasks
21 const int nthreads, // # of threads
22 GB_Context Context
23 )
24 {
25
26 //--------------------------------------------------------------------------
27 // check inputs
28 //--------------------------------------------------------------------------
29
30 ASSERT (Cp != NULL) ;
31 ASSERT (Cnvec >= 0) ;
32 ASSERT (Cnvec_nonempty != NULL) ;
33 ASSERT (TaskList != NULL) ;
34 ASSERT (ntasks >= 0) ;
35 ASSERT (nthreads >= 1) ;
36
37 //--------------------------------------------------------------------------
38 // local cumulative sum of the fine tasks
39 //--------------------------------------------------------------------------
40
41 for (int taskid = 0 ; taskid < ntasks ; taskid++)
42 {
43 int64_t k = TaskList [taskid].kfirst ;
44 if (TaskList [taskid].klast < 0)
45 {
46 // Compute the sum of all fine tasks for vector k, in Cp [k]. Also
47 // compute the cumulative sum of TaskList [taskid].pC, for the
48 // tasks that work on vector k. The first fine task for vector k
49 // starts with TaskList [taskid].pC = 0, which is an offset from
50 // the final Cp [k]. A subsequent fine task t for a vector k
51 // starts on offset of TaskList [t].pC from the start of C(:,k).
52 // Cp [k] has not been cumsum'd across all of Cp. It is just the
53 // count of the entries in C(:,k). The final Cp [k] is added to
54 // each fine task below, after the GB_cumsum of Cp.
55 int64_t pC = Cp [k] ;
56 Cp [k] += TaskList [taskid].pC ;
57 TaskList [taskid].pC = pC ;
58 }
59 }
60
61 //--------------------------------------------------------------------------
62 // replace Cp with its cumulative sum
63 //--------------------------------------------------------------------------
64
65 GB_cumsum (Cp, Cnvec, Cnvec_nonempty, nthreads, Context) ;
66
67 //--------------------------------------------------------------------------
68 // shift the cumulative sum of the fine tasks
69 //--------------------------------------------------------------------------
70
71 for (int taskid = 0 ; taskid < ntasks ; taskid++)
72 {
73 int64_t k = TaskList [taskid].kfirst ;
74 if (TaskList [taskid].klast < 0)
75 {
76 // TaskList [taskid].pC is currently an offset for this task into
77 // C(:,k). The first fine task for vector k has an offset of zero,
78 // the 2nd fine task has an offset equal to the # of entries
79 // computed by the first task, and so on. Cp [k] needs to be added
80 // to all offsets to get the final starting position for each fine
81 // task in C.
82 TaskList [taskid].pC += Cp [k] ;
83 }
84 else
85 {
86 // The last fine task to operate on vector k needs know its own
87 // pC_end, which is Cp [k+1]. Suppose that task is taskid-1. If
88 // this taskid is the first fine task for vector k, then TaskList
89 // [taskid].pC is set to Cp [k] above. If all coarse tasks are
90 // also given TaskList [taskid].pC = Cp [k], then taskid-1 will
91 // always know its pC_end, which is TaskList [taskid].pC,
92 // regardless of whether taskid is a fine or coarse task.
93 TaskList [taskid].pC = Cp [k] ;
94 }
95 }
96
97 // The last task is ntasks-1. It may be a fine task, in which case it
98 // computes the entries in C from TaskList [ntasks-1].pC to
99 // TaskList [ntasks].pC-1, inclusive.
100 TaskList [ntasks].pC = Cp [Cnvec] ;
101
102 //--------------------------------------------------------------------------
103 // check result
104 //--------------------------------------------------------------------------
105
106 #ifdef GB_DEBUG
107 // nthreads, ntasks, Cnvec) ;
108 for (int t = 0 ; t < ntasks ; t++)
109 {
110 int64_t k = TaskList [t].kfirst ;
111 int64_t klast = TaskList [t].klast ;
112 if (klast < 0)
113 {
114 // this is a fine task for vector k
115 int64_t pA = TaskList [t].pA ;
116 int64_t pA_end = TaskList [t].pA_end ;
117 int64_t pB = TaskList [t].pB ;
118 int64_t pB_end = TaskList [t].pB_end ;
119 int64_t pC = TaskList [t].pC ;
120 int64_t pC_end = TaskList [t+1].pC ;
121 int64_t pM = TaskList [t].pM ;
122 int64_t pM_end = TaskList [t].pM_end ;
123 ASSERT (k >= 0 && k < Cnvec) ;
124 // pA:(pA_end-1) must reside inside A(:,j), and pB:(pB_end-1) must
125 // reside inside B(:,j), but these cannot be checked here since A
126 // and B are not available. These basic checks can be done:
127 ASSERT (pA == -1 || (0 <= pA && pA <= pA_end)) ;
128 ASSERT (pB == -1 || (0 <= pB && pB <= pB_end)) ;
129 ASSERT (pM == -1 || (0 <= pM && pM <= pM_end)) ;
130 // pC and pC_end can be checked exactly. This task t computes
131 // entries pC:(pC_end-1) of C, inclusive.
132 ASSERT (Cp [k] <= pC && pC <= pC_end && pC_end <= Cp [k+1]) ;
133 }
134 else
135 {
136 // this is a coarse task for vectors k:klast, inclusive
137 ASSERT (k >= 0 && k < Cnvec) ;
138 ASSERT (klast >= 0 && klast <= Cnvec) ;
139 ASSERT (k <= klast) ;
140 }
141 }
142 #endif
143 }
144
145