1 //------------------------------------------------------------------------------
2 // GB_task_cumsum: cumulative sum of Cp and fine tasks in TaskList
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Cp is never NULL.  C is created as sparse or hypersparse.
11 
12 #include "GB.h"
13 
GB_task_cumsum(int64_t * Cp,const int64_t Cnvec,int64_t * Cnvec_nonempty,GB_task_struct * restrict TaskList,const int ntasks,const int nthreads,GB_Context Context)14 void GB_task_cumsum
15 (
16     int64_t *Cp,                        // size Cnvec+1
17     const int64_t Cnvec,
18     int64_t *Cnvec_nonempty,            // # of non-empty vectors in C
19     GB_task_struct *restrict TaskList,  // array of structs
20     const int ntasks,                   // # of tasks
21     const int nthreads,                 // # of threads
22     GB_Context Context
23 )
24 {
25 
26     //--------------------------------------------------------------------------
27     // check inputs
28     //--------------------------------------------------------------------------
29 
30     ASSERT (Cp != NULL) ;
31     ASSERT (Cnvec >= 0) ;
32     ASSERT (Cnvec_nonempty != NULL) ;
33     ASSERT (TaskList != NULL) ;
34     ASSERT (ntasks >= 0) ;
35     ASSERT (nthreads >= 1) ;
36 
37     //--------------------------------------------------------------------------
38     // local cumulative sum of the fine tasks
39     //--------------------------------------------------------------------------
40 
41     for (int taskid = 0 ; taskid < ntasks ; taskid++)
42     {
43         int64_t k = TaskList [taskid].kfirst ;
44         if (TaskList [taskid].klast < 0)
45         {
46             // Compute the sum of all fine tasks for vector k, in Cp [k].  Also
47             // compute the cumulative sum of TaskList [taskid].pC, for the
48             // tasks that work on vector k.  The first fine task for vector k
49             // starts with TaskList [taskid].pC = 0, which is an offset from
50             // the final Cp [k].  A subsequent fine task t for a vector k
51             // starts on offset of TaskList [t].pC from the start of C(:,k).
52             // Cp [k] has not been cumsum'd across all of Cp.  It is just the
53             // count of the entries in C(:,k).  The final Cp [k] is added to
54             // each fine task below, after the GB_cumsum of Cp.
55             int64_t pC = Cp [k] ;
56             Cp [k] += TaskList [taskid].pC ;
57             TaskList [taskid].pC = pC ;
58         }
59     }
60 
61     //--------------------------------------------------------------------------
62     // replace Cp with its cumulative sum
63     //--------------------------------------------------------------------------
64 
65     GB_cumsum (Cp, Cnvec, Cnvec_nonempty, nthreads, Context) ;
66 
67     //--------------------------------------------------------------------------
68     // shift the cumulative sum of the fine tasks
69     //--------------------------------------------------------------------------
70 
71     for (int taskid = 0 ; taskid < ntasks ; taskid++)
72     {
73         int64_t k = TaskList [taskid].kfirst ;
74         if (TaskList [taskid].klast < 0)
75         {
76             // TaskList [taskid].pC is currently an offset for this task into
77             // C(:,k).  The first fine task for vector k has an offset of zero,
78             // the 2nd fine task has an offset equal to the # of entries
79             // computed by the first task, and so on.  Cp [k] needs to be added
80             // to all offsets to get the final starting position for each fine
81             // task in C.
82             TaskList [taskid].pC += Cp [k] ;
83         }
84         else
85         {
86             // The last fine task to operate on vector k needs know its own
87             // pC_end, which is Cp [k+1].  Suppose that task is taskid-1.  If
88             // this taskid is the first fine task for vector k, then TaskList
89             // [taskid].pC is set to Cp [k] above.  If all coarse tasks are
90             // also given TaskList [taskid].pC = Cp [k], then taskid-1 will
91             // always know its pC_end, which is TaskList [taskid].pC,
92             // regardless of whether taskid is a fine or coarse task.
93             TaskList [taskid].pC = Cp [k] ;
94         }
95     }
96 
97     // The last task is ntasks-1.  It may be a fine task, in which case it
98     // computes the entries in C from TaskList [ntasks-1].pC to
99     // TaskList [ntasks].pC-1, inclusive.
100     TaskList [ntasks].pC = Cp [Cnvec] ;
101 
102     //--------------------------------------------------------------------------
103     // check result
104     //--------------------------------------------------------------------------
105 
106     #ifdef GB_DEBUG
107     // nthreads, ntasks, Cnvec) ;
108     for (int t = 0 ; t < ntasks ; t++)
109     {
110         int64_t k = TaskList [t].kfirst ;
111         int64_t klast = TaskList [t].klast ;
112         if (klast < 0)
113         {
114             // this is a fine task for vector k
115             int64_t pA     = TaskList [t].pA ;
116             int64_t pA_end = TaskList [t].pA_end ;
117             int64_t pB     = TaskList [t].pB ;
118             int64_t pB_end = TaskList [t].pB_end ;
119             int64_t pC     = TaskList [t].pC ;
120             int64_t pC_end = TaskList [t+1].pC ;
121             int64_t pM     = TaskList [t].pM ;
122             int64_t pM_end = TaskList [t].pM_end ;
123             ASSERT (k >= 0 && k < Cnvec) ;
124             // pA:(pA_end-1) must reside inside A(:,j), and pB:(pB_end-1) must
125             // reside inside B(:,j), but these cannot be checked here since A
126             // and B are not available.  These basic checks can be done:
127             ASSERT (pA == -1 || (0 <= pA && pA <= pA_end)) ;
128             ASSERT (pB == -1 || (0 <= pB && pB <= pB_end)) ;
129             ASSERT (pM == -1 || (0 <= pM && pM <= pM_end)) ;
130             // pC and pC_end can be checked exactly.  This task t computes
131             // entries pC:(pC_end-1) of C, inclusive.
132             ASSERT (Cp [k] <= pC && pC <= pC_end && pC_end <= Cp [k+1]) ;
133         }
134         else
135         {
136             // this is a coarse task for vectors k:klast, inclusive
137             ASSERT (k >= 0 && k < Cnvec) ;
138             ASSERT (klast >= 0 && klast <= Cnvec) ;
139             ASSERT (k <= klast) ;
140         }
141     }
142     #endif
143 }
144 
145