1 //------------------------------------------------------------------------------
2 // GB_subassign_08s_and_16: C(I,J)<M or !M> += A ; using S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 08s: C(I,J)<M> += A ; using S
11 // Method 16: C(I,J)<!M> += A ; using S
12
13 // M: present
14 // Mask_comp: true or false
15 // C_replace: false
16 // accum: present
17 // A: matrix
18 // S: constructed
19
20 // C: not bitmap: use GB_bitmap_assign instead
21 // M, A: any sparsity structure.
22
23 #include "GB_subassign_methods.h"
24
GB_subassign_08s_and_16(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)25 GrB_Info GB_subassign_08s_and_16
26 (
27 GrB_Matrix C,
28 // input:
29 const GrB_Index *I,
30 const int64_t ni,
31 const int64_t nI,
32 const int Ikind,
33 const int64_t Icolon [3],
34 const GrB_Index *J,
35 const int64_t nj,
36 const int64_t nJ,
37 const int Jkind,
38 const int64_t Jcolon [3],
39 const GrB_Matrix M,
40 const bool Mask_struct, // if true, use the only structure of M
41 const bool Mask_comp, // if true, !M, else use M
42 const GrB_BinaryOp accum,
43 const GrB_Matrix A,
44 GB_Context Context
45 )
46 {
47
48 //--------------------------------------------------------------------------
49 // check inputs
50 //--------------------------------------------------------------------------
51
52 ASSERT (!GB_IS_BITMAP (C)) ;
53 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
54 ASSERT (!GB_aliased (C, A)) ; // NO ALIAS of C==A
55
56 //--------------------------------------------------------------------------
57 // S = C(I,J)
58 //--------------------------------------------------------------------------
59
60 GB_EMPTY_TASKLIST ;
61 GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
62
63 //--------------------------------------------------------------------------
64 // get inputs
65 //--------------------------------------------------------------------------
66
67 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
68 GB_MATRIX_WAIT_IF_JUMBLED (A) ;
69
70 GB_GET_C ; // C must not be bitmap
71 GB_GET_MASK ;
72 GB_GET_A ;
73 GB_GET_S ;
74 GB_GET_ACCUM ;
75
76 //--------------------------------------------------------------------------
77 // Method 16: C(I,J)<!M> += A ; using S
78 //--------------------------------------------------------------------------
79
80 // Time: Close to optimal. All entries in A+S must be traversed.
81
82 //--------------------------------------------------------------------------
83 // Method 08s: C(I,J)<M> += A ; using S
84 //--------------------------------------------------------------------------
85
86 // Time: Only entries in A must be traversed, and the corresponding entries
87 // in C located. This method constructs S and traverses all of it in the
88 // worst case. Compare with method 08n, which does not construct S but
89 // instead uses a binary search for entries in C, but it only traverses
90 // entries in A.*M.
91
92 //--------------------------------------------------------------------------
93 // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
94 //--------------------------------------------------------------------------
95
96 if (A_is_bitmap)
97 {
98 // all of IxJ must be examined
99 GB_SUBASSIGN_IXJ_SLICE ;
100 }
101 else
102 {
103 // traverse all A+S
104 GB_SUBASSIGN_TWO_SLICE (A, S) ;
105 }
106
107 //--------------------------------------------------------------------------
108 // phase 1: create zombies, update entries, and count pending tuples
109 //--------------------------------------------------------------------------
110
111 if (A_is_bitmap)
112 {
113
114 //----------------------------------------------------------------------
115 // phase1: A is bitmap
116 //----------------------------------------------------------------------
117
118 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
119 reduction(+:nzombies)
120 for (taskid = 0 ; taskid < ntasks ; taskid++)
121 {
122
123 //------------------------------------------------------------------
124 // get the task descriptor
125 //------------------------------------------------------------------
126
127 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
128
129 //------------------------------------------------------------------
130 // compute all vectors in this task
131 //------------------------------------------------------------------
132
133 for (int64_t j = kfirst ; j <= klast ; j++)
134 {
135
136 //--------------------------------------------------------------
137 // get S(iA_start:iA_end,j)
138 //--------------------------------------------------------------
139
140 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
141 int64_t pA_start = j * Avlen ;
142
143 //--------------------------------------------------------------
144 // get M(:,j)
145 //--------------------------------------------------------------
146
147 int64_t pM_start, pM_end ;
148 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
149 bool mjdense = (pM_end - pM_start) == Mvlen ;
150
151 //--------------------------------------------------------------
152 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
153 //--------------------------------------------------------------
154
155 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
156 {
157 int64_t pA = pA_start + iA ;
158 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
159 bool Afound = Ab [pA] ;
160
161 if (Sfound && !Afound)
162 {
163 // S (i,j) is present but A (i,j) is not
164 // ----[C . 1] or [X . 1]-------------------------------
165 // [C . 1]: action: ( C ): no change, with accum
166 // [X . 1]: action: ( X ): still a zombie
167 // ----[C . 0] or [X . 0]-------------------------------
168 // [C . 0]: action: ( C ): no change, with accum
169 // [X . 0]: action: ( X ): still a zombie
170 GB_NEXT (S) ;
171 }
172 else if (!Sfound && Afound)
173 {
174 // S (i,j) is not present, A (i,j) is present
175 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
176 if (Mask_comp) mij = !mij ;
177 if (mij)
178 {
179 // ----[. A 1]--------------------------------------
180 // [. A 1]: action: ( insert )
181 task_pending++ ;
182 }
183 }
184 else if (Sfound && Afound)
185 {
186 // both S (i,j) and A (i,j) present
187 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
188 if (Mask_comp) mij = !mij ;
189 if (mij)
190 {
191 // ----[C A 1] or [X A 1]---------------------------
192 // [C A 1]: action: ( =A ): A to C no accum
193 // [C A 1]: action: ( =C+A ): apply accum
194 // [X A 1]: action: ( undelete ): zombie lives
195 GB_C_S_LOOKUP ;
196 GB_withaccum_C_A_1_matrix ;
197 }
198 GB_NEXT (S) ;
199 }
200 }
201 }
202 GB_PHASE1_TASK_WRAPUP ;
203 }
204
205 }
206 else
207 {
208
209 //----------------------------------------------------------------------
210 // phase1: A is hypersparse, sparse, or full
211 //----------------------------------------------------------------------
212
213 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
214 reduction(+:nzombies)
215 for (taskid = 0 ; taskid < ntasks ; taskid++)
216 {
217
218 //------------------------------------------------------------------
219 // get the task descriptor
220 //------------------------------------------------------------------
221
222 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
223
224 //------------------------------------------------------------------
225 // compute all vectors in this task
226 //------------------------------------------------------------------
227
228 for (int64_t k = kfirst ; k <= klast ; k++)
229 {
230
231 //--------------------------------------------------------------
232 // get A(:,j) and S(:,j)
233 //--------------------------------------------------------------
234
235 int64_t j = GBH (Zh, k) ;
236 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
237 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
238
239 //--------------------------------------------------------------
240 // get M(:,j)
241 //--------------------------------------------------------------
242
243 int64_t pM_start, pM_end ;
244 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
245 bool mjdense = (pM_end - pM_start) == Mvlen ;
246
247 //--------------------------------------------------------------
248 // do a 2-way merge of S(:,j) and A(:,j)
249 //--------------------------------------------------------------
250
251 // jC = J [j] ; or J is a colon expression
252 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
253
254 // while both list S (:,j) and A (:,j) have entries
255 while (pS < pS_end && pA < pA_end)
256 {
257 int64_t iS = GBI (Si, pS, Svlen) ;
258 int64_t iA = GBI (Ai, pA, Avlen) ;
259
260 if (iS < iA)
261 {
262 // S (i,j) is present but A (i,j) is not
263 // ----[C . 1] or [X . 1]-------------------------------
264 // [C . 1]: action: ( C ): no change, with accum
265 // [X . 1]: action: ( X ): still a zombie
266 // ----[C . 0] or [X . 0]-------------------------------
267 // [C . 0]: action: ( C ): no change, with accum
268 // [X . 0]: action: ( X ): still a zombie
269 GB_NEXT (S) ;
270 }
271 else if (iA < iS)
272 {
273 // S (i,j) is not present, A (i,j) is present
274 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
275 if (Mask_comp) mij = !mij ;
276 if (mij)
277 {
278 // ----[. A 1]--------------------------------------
279 // [. A 1]: action: ( insert )
280 task_pending++ ;
281 }
282 GB_NEXT (A) ;
283 }
284 else
285 {
286 // both S (i,j) and A (i,j) present
287 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
288 if (Mask_comp) mij = !mij ;
289 if (mij)
290 {
291 // ----[C A 1] or [X A 1]---------------------------
292 // [C A 1]: action: ( =A ): A to C no accum
293 // [C A 1]: action: ( =C+A ): apply accum
294 // [X A 1]: action: ( undelete ): zombie lives
295 GB_C_S_LOOKUP ;
296 GB_withaccum_C_A_1_matrix ;
297 }
298 GB_NEXT (S) ;
299 GB_NEXT (A) ;
300 }
301 }
302
303 // ignore the remainder of S(:,j)
304
305 // while list A (:,j) has entries. List S (:,j) exhausted.
306 while (pA < pA_end)
307 {
308 // S (i,j) is not present, A (i,j) is present
309 int64_t iA = GBI (Ai, pA, Avlen) ;
310 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
311 if (Mask_comp) mij = !mij ;
312 if (mij)
313 {
314 // ----[. A 1]------------------------------------------
315 // [. A 1]: action: ( insert )
316 task_pending++ ;
317 }
318 GB_NEXT (A) ;
319 }
320 }
321
322 GB_PHASE1_TASK_WRAPUP ;
323 }
324 }
325
326 //--------------------------------------------------------------------------
327 // phase 2: insert pending tuples
328 //--------------------------------------------------------------------------
329
330 GB_PENDING_CUMSUM ;
331
332 if (A_is_bitmap)
333 {
334
335 //----------------------------------------------------------------------
336 // phase2: A is bitmap
337 //----------------------------------------------------------------------
338
339 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
340 reduction(&&:pending_sorted)
341 for (taskid = 0 ; taskid < ntasks ; taskid++)
342 {
343
344 //------------------------------------------------------------------
345 // get the task descriptor
346 //------------------------------------------------------------------
347
348 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
349
350 //------------------------------------------------------------------
351 // compute all vectors in this task
352 //------------------------------------------------------------------
353
354 for (int64_t j = kfirst ; j <= klast ; j++)
355 {
356
357 //--------------------------------------------------------------
358 // get S(iA_start:iA_end,j)
359 //--------------------------------------------------------------
360
361 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
362 int64_t pA_start = j * Avlen ;
363
364 //--------------------------------------------------------------
365 // get M(:,j)
366 //--------------------------------------------------------------
367
368 int64_t pM_start, pM_end ;
369 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
370 bool mjdense = (pM_end - pM_start) == Mvlen ;
371
372 //--------------------------------------------------------------
373 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
374 //--------------------------------------------------------------
375
376 // jC = J [j] ; or J is a colon expression
377 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
378
379 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
380 {
381 int64_t pA = pA_start + iA ;
382 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
383 bool Afound = Ab [pA] ;
384 if (!Sfound && Afound)
385 {
386 // S (i,j) is not present, A (i,j) is present
387 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
388 if (Mask_comp) mij = !mij ;
389 if (mij)
390 {
391 // ----[. A 1]--------------------------------------
392 // [. A 1]: action: ( insert )
393 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
394 GB_PENDING_INSERT (Ax +(pA*asize)) ;
395 }
396 }
397 else if (Sfound)
398 {
399 // S (i,j) present
400 GB_NEXT (S) ;
401 }
402 }
403 }
404 GB_PHASE2_TASK_WRAPUP ;
405 }
406
407 }
408 else
409 {
410
411 //----------------------------------------------------------------------
412 // phase2: A is hypersparse, sparse, or full
413 //----------------------------------------------------------------------
414
415 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
416 reduction(&&:pending_sorted)
417 for (taskid = 0 ; taskid < ntasks ; taskid++)
418 {
419
420 //------------------------------------------------------------------
421 // get the task descriptor
422 //------------------------------------------------------------------
423
424 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
425
426 //------------------------------------------------------------------
427 // compute all vectors in this task
428 //------------------------------------------------------------------
429
430 for (int64_t k = kfirst ; k <= klast ; k++)
431 {
432
433 //--------------------------------------------------------------
434 // get A(:,j) and S(:,j)
435 //--------------------------------------------------------------
436
437 int64_t j = GBH (Zh, k) ;
438 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
439 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
440
441 //--------------------------------------------------------------
442 // get M(:,j)
443 //--------------------------------------------------------------
444
445 int64_t pM_start, pM_end ;
446 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
447 bool mjdense = (pM_end - pM_start) == Mvlen ;
448
449 //--------------------------------------------------------------
450 // do a 2-way merge of S(:,j) and A(:,j)
451 //--------------------------------------------------------------
452
453 // jC = J [j] ; or J is a colon expression
454 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
455
456 // while both list S (:,j) and A (:,j) have entries
457 while (pS < pS_end && pA < pA_end)
458 {
459 int64_t iS = GBI (Si, pS, Svlen) ;
460 int64_t iA = GBI (Ai, pA, Avlen) ;
461
462 if (iS < iA)
463 {
464 // S (i,j) is present but A (i,j) is not
465 GB_NEXT (S) ;
466 }
467 else if (iA < iS)
468 {
469 // S (i,j) is not present, A (i,j) is present
470 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
471 if (Mask_comp) mij = !mij ;
472 if (mij)
473 {
474 // ----[. A 1]--------------------------------------
475 // [. A 1]: action: ( insert )
476 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
477 GB_PENDING_INSERT (Ax +(pA*asize)) ;
478 }
479 GB_NEXT (A) ;
480 }
481 else
482 {
483 // both S (i,j) and A (i,j) present
484 GB_NEXT (S) ;
485 GB_NEXT (A) ;
486 }
487 }
488
489 // while list A (:,j) has entries. List S (:,j) exhausted.
490 while (pA < pA_end)
491 {
492 // S (i,j) is not present, A (i,j) is present
493 int64_t iA = GBI (Ai, pA, Avlen) ;
494 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
495 if (Mask_comp) mij = !mij ;
496 if (mij)
497 {
498 // ----[. A 1]------------------------------------------
499 // [. A 1]: action: ( insert )
500 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
501 GB_PENDING_INSERT (Ax +(pA*asize)) ;
502 }
503 GB_NEXT (A) ;
504 }
505 }
506
507 GB_PHASE2_TASK_WRAPUP ;
508 }
509 }
510
511 //--------------------------------------------------------------------------
512 // finalize the matrix and return result
513 //--------------------------------------------------------------------------
514
515 GB_SUBASSIGN_WRAPUP ;
516 }
517
518