1 //------------------------------------------------------------------------------
2 // GB_subassign_07: C(I,J)<M> += scalar ; no S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 07: C(I,J)<M> += scalar ; no S
11
12 // M: present
13 // Mask_comp: false
14 // C_replace: false
15 // accum: present
16 // A: scalar
17 // S: none
18
19 // C: not bitmap
20 // M: any sparsity
21
22 #include "GB_subassign_methods.h"
23
GB_subassign_07(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_BinaryOp accum,const void * scalar,const GrB_Type atype,GB_Context Context)24 GrB_Info GB_subassign_07
25 (
26 GrB_Matrix C,
27 // input:
28 const GrB_Index *I,
29 const int64_t nI,
30 const int Ikind,
31 const int64_t Icolon [3],
32 const GrB_Index *J,
33 const int64_t nJ,
34 const int Jkind,
35 const int64_t Jcolon [3],
36 const GrB_Matrix M,
37 const bool Mask_struct,
38 const GrB_BinaryOp accum,
39 const void *scalar,
40 const GrB_Type atype,
41 GB_Context Context
42 )
43 {
44
45 //--------------------------------------------------------------------------
46 // check inputs
47 //--------------------------------------------------------------------------
48
49 ASSERT (!GB_IS_BITMAP (C)) ;
50 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
51
52 //--------------------------------------------------------------------------
53 // get inputs
54 //--------------------------------------------------------------------------
55
56 GB_EMPTY_TASKLIST ;
57 GB_MATRIX_WAIT_IF_JUMBLED (C) ;
58 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
59
60 GB_GET_C ; // C must not be bitmap
61 int64_t zorig = C->nzombies ;
62 const int64_t *restrict Ch = C->h ;
63 const int64_t *restrict Cp = C->p ;
64 const bool C_is_hyper = (Ch != NULL) ;
65 const int64_t Cnvec = C->nvec ;
66 GB_GET_MASK ;
67 GB_GET_ACCUM_SCALAR ;
68
69 //--------------------------------------------------------------------------
70 // Method 07: C(I,J)<M> += scalar ; no S
71 //--------------------------------------------------------------------------
72
73 // Time: Close to Optimal: same as Method 05.
74
75 // Method 05 and Method 07 are very similar. Also compare with Method 06n.
76
77 //--------------------------------------------------------------------------
78 // Parallel: slice M into coarse/fine tasks (Method 05, 06n, 07)
79 //--------------------------------------------------------------------------
80
81 GB_SUBASSIGN_ONE_SLICE (M) ; // M cannot be jumbled
82
83 //--------------------------------------------------------------------------
84 // phase 1: undelete zombies, update entries, and count pending tuples
85 //--------------------------------------------------------------------------
86
87 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
88 reduction(+:nzombies)
89 for (taskid = 0 ; taskid < ntasks ; taskid++)
90 {
91
92 //----------------------------------------------------------------------
93 // get the task descriptor
94 //----------------------------------------------------------------------
95
96 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
97
98 //----------------------------------------------------------------------
99 // compute all vectors in this task
100 //----------------------------------------------------------------------
101
102 for (int64_t k = kfirst ; k <= klast ; k++)
103 {
104
105 //------------------------------------------------------------------
106 // get j, the kth vector of M
107 //------------------------------------------------------------------
108
109 int64_t j = GBH (Mh, k) ;
110 GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
111 int64_t mjnz = pM_end - pM ;
112 if (mjnz == 0) continue ;
113
114 //------------------------------------------------------------------
115 // get jC, the corresponding vector of C
116 //------------------------------------------------------------------
117
118 GB_GET_jC ;
119 int64_t cjnz = pC_end - pC_start ;
120 bool cjdense = (cjnz == Cvlen) ;
121
122 //------------------------------------------------------------------
123 // C(I,jC)<M(:,j)> += scalar ; no S
124 //------------------------------------------------------------------
125
126 if (cjdense)
127 {
128
129 //--------------------------------------------------------------
130 // C(:,jC) is dense so the binary search of C is not needed
131 //--------------------------------------------------------------
132
133 for ( ; pM < pM_end ; pM++)
134 {
135
136 //----------------------------------------------------------
137 // update C(iC,jC), but only if M(iA,j) allows it
138 //----------------------------------------------------------
139
140 bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
141 if (mij)
142 {
143 int64_t iA = GBI (Mi, pM, Mvlen) ;
144 GB_iC_DENSE_LOOKUP ;
145
146 // ----[C A 1] or [X A 1]-------------------------------
147 // [C A 1]: action: ( =C+A ): apply accum
148 // [X A 1]: action: ( undelete ): zombie lives
149 GB_withaccum_C_A_1_scalar ;
150 }
151 }
152
153 }
154 else
155 {
156
157 //--------------------------------------------------------------
158 // C(:,jC) is sparse; use binary search for C
159 //--------------------------------------------------------------
160
161 for ( ; pM < pM_end ; pM++)
162 {
163
164 //----------------------------------------------------------
165 // update C(iC,jC), but only if M(iA,j) allows it
166 //----------------------------------------------------------
167
168 bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
169 if (mij)
170 {
171 int64_t iA = GBI (Mi, pM, Mvlen) ;
172
173 // find C(iC,jC) in C(:,jC)
174 GB_iC_BINARY_SEARCH ;
175 if (cij_found)
176 {
177 // ----[C A 1] or [X A 1]---------------------------
178 // [C A 1]: action: ( =C+A ): apply accum
179 // [X A 1]: action: ( undelete ): zombie lives
180 GB_withaccum_C_A_1_scalar ;
181 }
182 else
183 {
184 // ----[. A 1]--------------------------------------
185 // [. A 1]: action: ( insert )
186 task_pending++ ;
187 }
188 }
189 }
190 }
191 }
192
193 GB_PHASE1_TASK_WRAPUP ;
194 }
195
196 //--------------------------------------------------------------------------
197 // phase 2: insert pending tuples
198 //--------------------------------------------------------------------------
199
200 GB_PENDING_CUMSUM ;
201 zorig = C->nzombies ;
202
203 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
204 reduction(&&:pending_sorted)
205 for (taskid = 0 ; taskid < ntasks ; taskid++)
206 {
207
208 //----------------------------------------------------------------------
209 // get the task descriptor
210 //----------------------------------------------------------------------
211
212 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
213
214 //----------------------------------------------------------------------
215 // compute all vectors in this task
216 //----------------------------------------------------------------------
217
218 for (int64_t k = kfirst ; k <= klast ; k++)
219 {
220
221 //------------------------------------------------------------------
222 // get j, the kth vector of M
223 //------------------------------------------------------------------
224
225 int64_t j = GBH (Mh, k) ;
226 GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
227 int64_t mjnz = pM_end - pM ;
228 if (mjnz == 0) continue ;
229
230 //------------------------------------------------------------------
231 // get jC, the corresponding vector of C
232 //------------------------------------------------------------------
233
234 GB_GET_jC ;
235 bool cjdense = ((pC_end - pC_start) == Cvlen) ;
236
237 //------------------------------------------------------------------
238 // C(I,jC)<M(:,j)> += scalar ; no S
239 //------------------------------------------------------------------
240
241 if (!cjdense)
242 {
243
244 //--------------------------------------------------------------
245 // C(:,jC) is sparse; use binary search for C
246 //--------------------------------------------------------------
247
248 for ( ; pM < pM_end ; pM++)
249 {
250
251 //----------------------------------------------------------
252 // update C(iC,jC), but only if M(iA,j) allows it
253 //----------------------------------------------------------
254
255 bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
256 if (mij)
257 {
258 int64_t iA = GBI (Mi, pM, Mvlen) ;
259
260 // find C(iC,jC) in C(:,jC)
261 GB_iC_BINARY_SEARCH ;
262 if (!cij_found)
263 {
264 // ----[. A 1]--------------------------------------
265 // [. A 1]: action: ( insert )
266 GB_PENDING_INSERT (scalar) ;
267 }
268 }
269 }
270 }
271 }
272
273 GB_PHASE2_TASK_WRAPUP ;
274 }
275
276 //--------------------------------------------------------------------------
277 // finalize the matrix and return result
278 //--------------------------------------------------------------------------
279
280 GB_SUBASSIGN_WRAPUP ;
281 }
282
283