1 //------------------------------------------------------------------------------
2 // GB_subassign_08n: C(I,J)<M> += A ; no S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 08n: C(I,J)<M> += A ; no S
11
12 // M: present
13 // Mask_comp: false
14 // C_replace: false
15 // accum: present
16 // A: matrix
17 // S: none
18
19 // C not bitmap; C can be full since no zombies are inserted in that case.
20 // If C is bitmap, then GB_bitmap_assign_M_accum is used instead.
21 // M, A: not bitmap; Method 08s is used instead if M or A are bitmap.
22
23 #include "GB_subassign_methods.h"
24
25 //------------------------------------------------------------------------------
26 // GB_PHASE1_ACTION
27 //------------------------------------------------------------------------------
28
29 // action to take for phase 1 when A(i,j) exists and M(i,j)=1
30 #define GB_PHASE1_ACTION \
31 { \
32 if (cjdense) \
33 { \
34 /* direct lookup of C(iC,jC) */ \
35 GB_iC_DENSE_LOOKUP ; \
36 /* ----[C A 1] or [X A 1]------------------------------- */ \
37 /* [C A 1]: action: ( =C+A ): apply accum */ \
38 /* [X A 1]: action: ( undelete ): zombie lives */ \
39 GB_withaccum_C_A_1_matrix ; \
40 } \
41 else \
42 { \
43 /* binary search for C(iC,jC) in C(:,jC) */ \
44 GB_iC_BINARY_SEARCH ; \
45 if (cij_found) \
46 { \
47 /* ----[C A 1] or [X A 1]--------------------------- */ \
48 /* [C A 1]: action: ( =C+A ): apply accum */ \
49 /* [X A 1]: action: ( undelete ): zombie lives */ \
50 GB_withaccum_C_A_1_matrix ; \
51 } \
52 else \
53 { \
54 /* ----[. A 1]-------------------------------------- */ \
55 /* [. A 1]: action: ( insert ) */ \
56 task_pending++ ; \
57 } \
58 } \
59 }
60
61 //------------------------------------------------------------------------------
62 // GB_PHASE2_ACTION
63 //------------------------------------------------------------------------------
64
65 // action to take for phase 2 when A(i,j) exists and M(i,j)=1
66 #define GB_PHASE2_ACTION \
67 { \
68 ASSERT (!cjdense) ; \
69 { \
70 /* binary search for C(iC,jC) in C(:,jC) */ \
71 GB_iC_BINARY_SEARCH ; \
72 if (!cij_found) \
73 { \
74 /* ----[. A 1]-------------------------------------- */ \
75 /* [. A 1]: action: ( insert ) */ \
76 GB_PENDING_INSERT (Ax +(pA*asize)) ; \
77 } \
78 } \
79 }
80
81 //------------------------------------------------------------------------------
82 // GB_subassign_08n: C(I,J)<M> += A ; no S
83 //------------------------------------------------------------------------------
84
GB_subassign_08n(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)85 GrB_Info GB_subassign_08n
86 (
87 GrB_Matrix C,
88 // input:
89 const GrB_Index *I,
90 const int64_t nI,
91 const int Ikind,
92 const int64_t Icolon [3],
93 const GrB_Index *J,
94 const int64_t nJ,
95 const int Jkind,
96 const int64_t Jcolon [3],
97 const GrB_Matrix M,
98 const bool Mask_struct,
99 const GrB_BinaryOp accum,
100 const GrB_Matrix A,
101 GB_Context Context
102 )
103 {
104
105 //--------------------------------------------------------------------------
106 // check inputs
107 //--------------------------------------------------------------------------
108
109 ASSERT (!GB_IS_BITMAP (C)) ;
110 ASSERT (!GB_IS_BITMAP (M)) ; // Method 08s is used if M is bitmap
111 ASSERT (!GB_IS_BITMAP (A)) ; // Method 08s is used if A is bitmap
112 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
113 ASSERT (!GB_aliased (C, A)) ; // NO ALIAS of C==A
114
115 //--------------------------------------------------------------------------
116 // get inputs
117 //--------------------------------------------------------------------------
118
119 GB_EMPTY_TASKLIST ;
120 GB_MATRIX_WAIT_IF_JUMBLED (C) ;
121 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
122 GB_MATRIX_WAIT_IF_JUMBLED (A) ;
123
124 GB_GET_C ; // C must not be bitmap
125 int64_t zorig = C->nzombies ;
126 const int64_t Cnvec = C->nvec ;
127 const int64_t *restrict Ch = C->h ;
128 const int64_t *restrict Cp = C->p ;
129 const bool C_is_hyper = (Ch != NULL) ;
130 GB_GET_MASK ;
131 GB_GET_A ;
132 const int64_t *restrict Ah = A->h ;
133 GB_GET_ACCUM ;
134
135 //--------------------------------------------------------------------------
136 // Method 08n: C(I,J)<M> += A ; no S
137 //--------------------------------------------------------------------------
138
139 // Time: Close to optimal. Omega (sum_j (min (nnz (A(:,j)), nnz (M(:,j)))),
140 // since only the intersection of A.*M needs to be considered. If either
141 // M(:,j) or A(:,j) are very sparse compared to the other, then the shorter
142 // is traversed with a linear-time scan and a binary search is used for the
143 // other. If the number of nonzeros is comparable, a linear-time scan is
144 // used for both. Once two entries M(i,j)=1 and A(i,j) are found with the
145 // same index i, the entry A(i,j) is accumulated or inserted into C.
146
147 // The algorithm is very much like the eWise multiplication of A.*M, so the
148 // parallel scheduling relies on GB_emult_01_phase0 and GB_ewise_slice.
149
150 //--------------------------------------------------------------------------
151 // Parallel: slice the eWiseMult of Z=A.*M (Method 08n only)
152 //--------------------------------------------------------------------------
153
154 // Method 08n only. If C is sparse, it is sliced for a fine task, so that
155 // it can do a binary search via GB_iC_BINARY_SEARCH. But if C(:,jC) is
156 // dense, C(:,jC) is not sliced, so the fine task must do a direct lookup
157 // via GB_iC_DENSE_LOOKUP. Otherwise a race condition will occur.
158 // The Z matrix is not constructed, except for its hyperlist (Zh_shallow)
159 // and mapping to A and M.
160
161 // No matrix (C, M, or A) can be bitmap. C, M, A can be sparse/hyper/full,
162 // in any combination.
163
164 int64_t Znvec ;
165 const int64_t *restrict Zh_shallow = NULL ;
166 GB_OK (GB_subassign_emult_slice (
167 &TaskList, &TaskList_size, &ntasks, &nthreads,
168 &Znvec, &Zh_shallow, &Z_to_A, &Z_to_A_size, &Z_to_M, &Z_to_M_size,
169 C, I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
170 A, M, Context)) ;
171 GB_ALLOCATE_NPENDING_WERK ;
172
173 //--------------------------------------------------------------------------
174 // phase 1: undelete zombies, update entries, and count pending tuples
175 //--------------------------------------------------------------------------
176
177 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
178 reduction(+:nzombies)
179 for (taskid = 0 ; taskid < ntasks ; taskid++)
180 {
181
182 //----------------------------------------------------------------------
183 // get the task descriptor
184 //----------------------------------------------------------------------
185
186 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
187
188 //----------------------------------------------------------------------
189 // compute all vectors in this task
190 //----------------------------------------------------------------------
191
192 for (int64_t k = kfirst ; k <= klast ; k++)
193 {
194
195 //------------------------------------------------------------------
196 // get A(:,j) and M(:,j)
197 //------------------------------------------------------------------
198
199 int64_t j = GBH (Zh_shallow, k) ;
200 GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
201 GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
202
203 //------------------------------------------------------------------
204 // quick checks for empty intersection of A(:,j) and M(:,j)
205 //------------------------------------------------------------------
206
207 int64_t ajnz = pA_end - pA ;
208 int64_t mjnz = pM_end - pM ;
209 if (ajnz == 0 || mjnz == 0) continue ;
210 int64_t iA_first = GBI (Ai, pA, Avlen) ;
211 int64_t iA_last = GBI (Ai, pA_end-1, Avlen) ;
212 int64_t iM_first = GBI (Mi, pM, Mvlen) ;
213 int64_t iM_last = GBI (Mi, pM_end-1, Mvlen) ;
214 if (iA_last < iM_first || iM_last < iA_first) continue ;
215 int64_t pM_start = pM ;
216
217 //------------------------------------------------------------------
218 // get jC, the corresponding vector of C
219 //------------------------------------------------------------------
220
221 GB_GET_jC ;
222 bool cjdense = (pC_end - pC_start == Cvlen) ;
223
224 //------------------------------------------------------------------
225 // C(I,jC)<M(:,j)> += A(:,j) ; no S
226 //------------------------------------------------------------------
227
228 if (ajnz > 32 * mjnz)
229 {
230
231 //--------------------------------------------------------------
232 // A(:,j) is much denser than M(:,j)
233 //--------------------------------------------------------------
234
235 for ( ; pM < pM_end ; pM++)
236 {
237 if (GB_mcast (Mx, pM, msize))
238 {
239 int64_t iA = GBI (Mi, pM, Mvlen) ;
240 // find iA in A(:,j)
241 int64_t pright = pA_end - 1 ;
242 bool found ;
243 // FUTURE::: exploit dense A(:,j)
244 GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
245 if (found) GB_PHASE1_ACTION ;
246 }
247 }
248
249 }
250 else if (mjnz > 32 * ajnz)
251 {
252
253 //--------------------------------------------------------------
254 // M(:,j) is much denser than A(:,j)
255 //--------------------------------------------------------------
256
257 // FUTURE::: exploit dense mask
258 bool mjdense = false ;
259
260 for ( ; pA < pA_end ; pA++)
261 {
262 int64_t iA = GBI (Ai, pA, Avlen) ;
263 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
264 if (mij) GB_PHASE1_ACTION ;
265 }
266
267 }
268 else
269 {
270
271 //----------------------------------------------------------
272 // A(:,j) and M(:,j) have about the same # of entries
273 //----------------------------------------------------------
274
275 // linear-time scan of A(:,j) and M(:,j)
276
277 while (pA < pA_end && pM < pM_end)
278 {
279 int64_t iA = GBI (Ai, pA, Avlen) ;
280 int64_t iM = GBI (Mi, pM, Mvlen) ;
281 if (iA < iM)
282 {
283 // A(i,j) exists but not M(i,j)
284 GB_NEXT (A) ;
285 }
286 else if (iM < iA)
287 {
288 // M(i,j) exists but not A(i,j)
289 GB_NEXT (M) ;
290 }
291 else
292 {
293 // both A(i,j) and M(i,j) exist
294 if (GB_mcast (Mx, pM, msize)) GB_PHASE1_ACTION ;
295 GB_NEXT (A) ;
296 GB_NEXT (M) ;
297 }
298 }
299 }
300 }
301
302 GB_PHASE1_TASK_WRAPUP ;
303 }
304
305 //--------------------------------------------------------------------------
306 // phase 2: insert pending tuples
307 //--------------------------------------------------------------------------
308
309 GB_PENDING_CUMSUM ;
310 zorig = C->nzombies ;
311
312 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
313 reduction(&&:pending_sorted)
314 for (taskid = 0 ; taskid < ntasks ; taskid++)
315 {
316
317 //----------------------------------------------------------------------
318 // get the task descriptor
319 //----------------------------------------------------------------------
320
321 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
322
323 //----------------------------------------------------------------------
324 // compute all vectors in this task
325 //----------------------------------------------------------------------
326
327 for (int64_t k = kfirst ; k <= klast ; k++)
328 {
329
330 //------------------------------------------------------------------
331 // get A(:,j) and M(:,j)
332 //------------------------------------------------------------------
333
334 int64_t j = GBH (Zh_shallow, k) ;
335 GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
336 GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
337
338 //------------------------------------------------------------------
339 // quick checks for empty intersection of A(:,j) and M(:,j)
340 //------------------------------------------------------------------
341
342 int64_t ajnz = pA_end - pA ;
343 int64_t mjnz = pM_end - pM ;
344 if (ajnz == 0 || mjnz == 0) continue ;
345 int64_t iA_first = GBI (Ai, pA, Avlen) ;
346 int64_t iA_last = GBI (Ai, pA_end-1, Avlen) ;
347 int64_t iM_first = GBI (Mi, pM, Mvlen) ;
348 int64_t iM_last = GBI (Mi, pM_end-1, Mvlen) ;
349 if (iA_last < iM_first || iM_last < iA_first) continue ;
350 int64_t pM_start = pM ;
351
352 //------------------------------------------------------------------
353 // get jC, the corresponding vector of C
354 //------------------------------------------------------------------
355
356 GB_GET_jC ;
357 bool cjdense = (pC_end - pC_start == Cvlen) ;
358 if (cjdense) continue ;
359
360 //------------------------------------------------------------------
361 // C(I,jC)<M(:,j)> += A(:,j) ; no S
362 //------------------------------------------------------------------
363
364 if (ajnz > 32 * mjnz)
365 {
366
367 //--------------------------------------------------------------
368 // A(:,j) is much denser than M(:,j)
369 //--------------------------------------------------------------
370
371 for ( ; pM < pM_end ; pM++)
372 {
373 if (GB_mcast (Mx, pM, msize))
374 {
375 int64_t iA = GBI (Mi, pM, Mvlen) ;
376 // find iA in A(:,j)
377 int64_t pright = pA_end - 1 ;
378 bool found ;
379 // FUTURE::: exploit dense A(:,j)
380 GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
381 if (found) GB_PHASE2_ACTION ;
382 }
383 }
384
385 }
386 else if (mjnz > 32 * ajnz)
387 {
388
389 //--------------------------------------------------------------
390 // M(:,j) is much denser than A(:,j)
391 //--------------------------------------------------------------
392
393 // FUTURE::: exploit dense mask
394 bool mjdense = false ;
395
396 for ( ; pA < pA_end ; pA++)
397 {
398 int64_t iA = GBI (Ai, pA, Avlen) ;
399 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
400 if (mij) GB_PHASE2_ACTION ;
401 }
402
403 }
404 else
405 {
406
407 //----------------------------------------------------------
408 // A(:,j) and M(:,j) have about the same # of entries
409 //----------------------------------------------------------
410
411 // linear-time scan of A(:,j) and M(:,j)
412
413 while (pA < pA_end && pM < pM_end)
414 {
415 int64_t iA = GBI (Ai, pA, Avlen) ;
416 int64_t iM = GBI (Mi, pM, Mvlen) ;
417 if (iA < iM)
418 {
419 // A(i,j) exists but not M(i,j)
420 GB_NEXT (A) ;
421 }
422 else if (iM < iA)
423 {
424 // M(i,j) exists but not A(i,j)
425 GB_NEXT (M) ;
426 }
427 else
428 {
429 // both A(i,j) and M(i,j) exist
430 if (GB_mcast (Mx, pM, msize)) GB_PHASE2_ACTION ;
431 GB_NEXT (A) ;
432 GB_NEXT (M) ;
433 }
434 }
435 }
436 }
437
438 GB_PHASE2_TASK_WRAPUP ;
439 }
440
441 //--------------------------------------------------------------------------
442 // finalize the matrix and return result
443 //--------------------------------------------------------------------------
444
445 GB_SUBASSIGN_WRAPUP ;
446 }
447
448