1 //------------------------------------------------------------------------------
2 // GB_AxB_saxpy3_cumsum: finalize nnz(C(:,j)) and find cumulative sum of Cp
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // phase3: fine tasks finalize their computation nnz(C(:,j))
11 // phase4: cumulative sum of C->p
12
13 #include "GB_AxB_saxpy3.h"
14
GB_AxB_saxpy3_cumsum(GrB_Matrix C,GB_saxpy3task_struct * SaxpyTasks,int nfine,double chunk,int nthreads,GB_Context Context)15 void GB_AxB_saxpy3_cumsum
16 (
17 GrB_Matrix C, // finalize C->p
18 GB_saxpy3task_struct *SaxpyTasks, // list of tasks, and workspace
19 int nfine, // number of fine tasks
20 double chunk, // chunk size
21 int nthreads, // number of threads
22 GB_Context Context
23 )
24 {
25
26 //--------------------------------------------------------------------------
27 // get C
28 //--------------------------------------------------------------------------
29
30 ASSERT (!GB_IS_BITMAP (C)) ;
31 ASSERT (!GB_IS_FULL (C)) ;
32 int64_t *restrict Cp = C->p ;
33 const int64_t cvlen = C->vlen ;
34 const int64_t cnvec = C->nvec ;
35
36 //==========================================================================
37 // phase3: count nnz(C(:,j)) for fine tasks
38 //==========================================================================
39
40 int taskid ;
41 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1)
42 for (taskid = 0 ; taskid < nfine ; taskid++)
43 {
44
45 //----------------------------------------------------------------------
46 // get the task descriptor
47 //----------------------------------------------------------------------
48
49 // int64_t kk = SaxpyTasks [taskid].vector ;
50 int64_t hash_size = SaxpyTasks [taskid].hsize ;
51 bool use_Gustavson = (hash_size == cvlen) ;
52 int team_size = SaxpyTasks [taskid].team_size ;
53 int leader = SaxpyTasks [taskid].leader ;
54 int my_teamid = taskid - leader ;
55 int64_t my_cjnz = 0 ;
56
57 if (use_Gustavson)
58 {
59
60 //------------------------------------------------------------------
61 // phase3: fine Gustavson task, C=A*B, C<M>=A*B, or C<!M>=A*B
62 //------------------------------------------------------------------
63
64 // Hf [i] == 2 if C(i,j) is an entry in C(:,j)
65
66 int8_t *restrict Hf ;
67 Hf = (int8_t *restrict) SaxpyTasks [taskid].Hf ;
68 int64_t istart, iend ;
69 GB_PARTITION (istart, iend, cvlen, my_teamid, team_size) ;
70 for (int64_t i = istart ; i < iend ; i++)
71 {
72 if (Hf [i] == 2)
73 {
74 my_cjnz++ ;
75 }
76 }
77
78 }
79 else
80 {
81
82 //------------------------------------------------------------------
83 // phase3: fine hash task, C=A*B, C<M>=A*B, or C<!M>=A*B
84 //------------------------------------------------------------------
85
86 // (Hf [hash] & 3) == 2 if C(i,j) is an entry in C(:,j),
87 // and the index i of the entry is (Hf [hash] >> 2) - 1.
88
89 int64_t *restrict Hf = (int64_t *restrict) SaxpyTasks [taskid].Hf ;
90 int64_t mystart, myend ;
91 GB_PARTITION (mystart, myend, hash_size, my_teamid, team_size) ;
92 for (int64_t hash = mystart ; hash < myend ; hash++)
93 {
94 if ((Hf [hash] & 3) == 2)
95 {
96 my_cjnz++ ;
97 }
98 }
99 }
100
101 SaxpyTasks [taskid].my_cjnz = my_cjnz ; // count my nnz(C(:,j))
102 }
103
104 //==========================================================================
105 // phase4: compute Cp with cumulative sum
106 //==========================================================================
107
108 //--------------------------------------------------------------------------
109 // sum nnz (C (:,j)) for fine tasks
110 //--------------------------------------------------------------------------
111
112 // SaxpyTasks [taskid].my_cjnz is the # of unique entries found in C(:,j) by
113 // that task. Sum these terms to compute total # of entries in C(:,j).
114
115 for (taskid = 0 ; taskid < nfine ; taskid++)
116 {
117 int64_t kk = SaxpyTasks [taskid].vector ;
118 Cp [kk] = 0 ;
119 }
120
121 for (taskid = 0 ; taskid < nfine ; taskid++)
122 {
123 int64_t kk = SaxpyTasks [taskid].vector ;
124 int64_t my_cjnz = SaxpyTasks [taskid].my_cjnz ;
125 Cp [kk] += my_cjnz ;
126 ASSERT (my_cjnz <= cvlen) ;
127 }
128
129 //--------------------------------------------------------------------------
130 // cumulative sum for Cp (fine and coarse tasks)
131 //--------------------------------------------------------------------------
132
133 // Cp [kk] is now nnz (C (:,j)), for all vectors j, whether computed by
134 // fine tasks or coarse tasks, and where j == GBH (Bh, kk)
135
136 int nth = GB_nthreads (cnvec, chunk, nthreads) ;
137 GB_cumsum (Cp, cnvec, &(C->nvec_nonempty), nth, Context) ;
138
139 //--------------------------------------------------------------------------
140 // cumulative sum of nnz (C (:,j)) for each team of fine tasks
141 //--------------------------------------------------------------------------
142
143 int64_t cjnz_sum = 0 ;
144 for (taskid = 0 ; taskid < nfine ; taskid++)
145 {
146 if (taskid == SaxpyTasks [taskid].leader)
147 {
148 cjnz_sum = 0 ;
149 // also find the max (C (:,j)) for any fine hash tasks
150 int64_t hash_size = SaxpyTasks [taskid].hsize ;
151 bool use_Gustavson = (hash_size == cvlen) ;
152 if (!use_Gustavson)
153 {
154 int64_t kk = SaxpyTasks [taskid].vector ;
155 int64_t cjnz = Cp [kk+1] - Cp [kk] ;
156 }
157 }
158 int64_t my_cjnz = SaxpyTasks [taskid].my_cjnz ;
159 SaxpyTasks [taskid].my_cjnz = cjnz_sum ;
160 cjnz_sum += my_cjnz ;
161 }
162 }
163
164