1 //------------------------------------------------------------------------------
2 // GB_subassign_12_and_20: C(I,J)<M or !M,repl> += A ; using S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 12: C(I,J)<M,repl> += A ; using S
11 // Method 20: C(I,J)<!M,repl> += A ; using S
12
13 // M: present
14 // Mask_comp: true or false
15 // C_replace: true
16 // accum: present
17 // A: matrix
18 // S: constructed
19
20 // C: not bitmap: use GB_bitmap_assign instead
21 // M, A: any sparsity structure.
22
23 #include "GB_subassign_methods.h"
24
GB_subassign_12_and_20(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)25 GrB_Info GB_subassign_12_and_20
26 (
27 GrB_Matrix C,
28 // input:
29 const GrB_Index *I,
30 const int64_t ni,
31 const int64_t nI,
32 const int Ikind,
33 const int64_t Icolon [3],
34 const GrB_Index *J,
35 const int64_t nj,
36 const int64_t nJ,
37 const int Jkind,
38 const int64_t Jcolon [3],
39 const GrB_Matrix M,
40 const bool Mask_struct, // if true, use the only structure of M
41 const bool Mask_comp, // if true, !M, else use M
42 const GrB_BinaryOp accum,
43 const GrB_Matrix A,
44 GB_Context Context
45 )
46 {
47
48 //--------------------------------------------------------------------------
49 // check inputs
50 //--------------------------------------------------------------------------
51
52 ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
53 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
54 ASSERT (!GB_aliased (C, A)) ; // NO ALIAS of C==A
55
56 //--------------------------------------------------------------------------
57 // S = C(I,J)
58 //--------------------------------------------------------------------------
59
60 GB_EMPTY_TASKLIST ;
61 GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
62
63 //--------------------------------------------------------------------------
64 // get inputs
65 //--------------------------------------------------------------------------
66
67 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
68 GB_MATRIX_WAIT_IF_JUMBLED (A) ;
69
70 GB_GET_C ; // C must not be bitmap
71 GB_GET_MASK ;
72 GB_GET_A ;
73 GB_GET_S ;
74 GB_GET_ACCUM ;
75
76 //--------------------------------------------------------------------------
77 // Method 12: C(I,J)<M,repl> += A ; using S
78 // Method 20: C(I,J)<!M,repl> += A ; using S
79 //--------------------------------------------------------------------------
80
81 // Time: all entries in S+A must be traversed, so Omega(nnz(S)+nnz(A)) is
82 // required. All cases of the mask (0, 1, or not present) must be
83 // considered, because of the C_replace descriptor being true.
84
85 //--------------------------------------------------------------------------
86 // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
87 //--------------------------------------------------------------------------
88
89 if (A_is_bitmap)
90 {
91 // all of IxJ must be examined
92 GB_SUBASSIGN_IXJ_SLICE ;
93 }
94 else
95 {
96 // traverse all A+S
97 GB_SUBASSIGN_TWO_SLICE (A, S) ;
98 }
99
100 //--------------------------------------------------------------------------
101 // phase 1: create zombies, update entries, and count pending tuples
102 //--------------------------------------------------------------------------
103
104 if (A_is_bitmap)
105 {
106
107 //----------------------------------------------------------------------
108 // phase1: A is bitmap
109 //----------------------------------------------------------------------
110
111 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
112 reduction(+:nzombies)
113 for (taskid = 0 ; taskid < ntasks ; taskid++)
114 {
115
116 //------------------------------------------------------------------
117 // get the task descriptor
118 //------------------------------------------------------------------
119
120 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
121
122 //------------------------------------------------------------------
123 // compute all vectors in this task
124 //------------------------------------------------------------------
125
126 for (int64_t j = kfirst ; j <= klast ; j++)
127 {
128
129 //--------------------------------------------------------------
130 // get S(iA_start:iA_end,j)
131 //--------------------------------------------------------------
132
133 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
134 int64_t pA_start = j * Avlen ;
135
136 //--------------------------------------------------------------
137 // get M(:,j)
138 //--------------------------------------------------------------
139
140 int64_t pM_start, pM_end ;
141 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
142 bool mjdense = (pM_end - pM_start) == Mvlen ;
143
144 //--------------------------------------------------------------
145 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
146 //--------------------------------------------------------------
147
148 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
149 {
150 int64_t pA = pA_start + iA ;
151 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
152 bool Afound = Ab [pA] ;
153
154 if (Sfound && !Afound)
155 {
156 // S (i,j) is present but A (i,j) is not
157 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
158 if (Mask_comp) mij = !mij ;
159 if (!mij)
160 {
161 // ----[C . 0] or [X . 0]---------------------------
162 // [X . 0]: action: ( X ): still a zombie
163 // [C . 0]: C_repl: action: ( delete ): now zombie
164 GB_C_S_LOOKUP ;
165 GB_DELETE_ENTRY ;
166 }
167 GB_NEXT (S) ;
168 }
169 else if (!Sfound && Afound)
170 {
171 // S (i,j) is not present, A (i,j) is present
172 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
173 if (Mask_comp) mij = !mij ;
174 if (mij)
175 {
176 // ----[. A 1]--------------------------------------
177 // [. A 1]: action: ( insert )
178 task_pending++ ;
179 }
180 }
181 else if (Sfound && Afound)
182 {
183 // both S (i,j) and A (i,j) present
184 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
185 if (Mask_comp) mij = !mij ;
186 GB_C_S_LOOKUP ;
187 if (mij)
188 {
189 // ----[C A 1] or [X A 1]---------------------------
190 // [C A 1]: action: ( =A ): A to C no accum
191 // [C A 1]: action: ( =C+A ): apply accum
192 // [X A 1]: action: ( undelete ): zombie lives
193 GB_withaccum_C_A_1_matrix ;
194 }
195 else
196 {
197 // ----[C A 0] or [X A 0]---------------------------
198 // [X A 0]: action: ( X ): still a zombie
199 // [C A 0]: C_repl: action: ( delete ): now zombie
200 GB_DELETE_ENTRY ;
201 }
202 GB_NEXT (S) ;
203 }
204 }
205 }
206 GB_PHASE1_TASK_WRAPUP ;
207 }
208
209 }
210 else
211 {
212
213 //----------------------------------------------------------------------
214 // phase1: A is hypersparse, sparse, or full
215 //----------------------------------------------------------------------
216
217 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
218 reduction(+:nzombies)
219 for (taskid = 0 ; taskid < ntasks ; taskid++)
220 {
221
222 //------------------------------------------------------------------
223 // get the task descriptor
224 //------------------------------------------------------------------
225
226 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
227
228 //------------------------------------------------------------------
229 // compute all vectors in this task
230 //------------------------------------------------------------------
231
232 for (int64_t k = kfirst ; k <= klast ; k++)
233 {
234
235 //--------------------------------------------------------------
236 // get A(:,j) and S(:,j)
237 //--------------------------------------------------------------
238
239 int64_t j = GBH (Zh, k) ;
240 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
241 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
242
243 //--------------------------------------------------------------
244 // get M(:,j)
245 //--------------------------------------------------------------
246
247 int64_t pM_start, pM_end ;
248 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
249 bool mjdense = (pM_end - pM_start) == Mvlen ;
250
251 //--------------------------------------------------------------
252 // do a 2-way merge of S(:,j) and A(:,j)
253 //--------------------------------------------------------------
254
255 // jC = J [j] ; or J is a colon expression
256 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
257
258 // while both list S (:,j) and A (:,j) have entries
259 while (pS < pS_end && pA < pA_end)
260 {
261 int64_t iS = GBI (Si, pS, Svlen) ;
262 int64_t iA = GBI (Ai, pA, Avlen) ;
263
264 if (iS < iA)
265 {
266 // S (i,j) is present but A (i,j) is not
267 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
268 if (Mask_comp) mij = !mij ;
269 if (!mij)
270 {
271 // ----[C . 0] or [X . 0]---------------------------
272 // [X . 0]: action: ( X ): still a zombie
273 // [C . 0]: C_repl: action: ( delete ): now zombie
274 GB_C_S_LOOKUP ;
275 GB_DELETE_ENTRY ;
276 }
277 GB_NEXT (S) ;
278 }
279 else if (iA < iS)
280 {
281 // S (i,j) is not present, A (i,j) is present
282 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
283 if (Mask_comp) mij = !mij ;
284 if (mij)
285 {
286 // ----[. A 1]--------------------------------------
287 // [. A 1]: action: ( insert )
288 task_pending++ ;
289 }
290 GB_NEXT (A) ;
291 }
292 else
293 {
294 // both S (i,j) and A (i,j) present
295 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
296 if (Mask_comp) mij = !mij ;
297 GB_C_S_LOOKUP ;
298 if (mij)
299 {
300 // ----[C A 1] or [X A 1]---------------------------
301 // [C A 1]: action: ( =A ): A to C no accum
302 // [C A 1]: action: ( =C+A ): apply accum
303 // [X A 1]: action: ( undelete ): zombie lives
304 GB_withaccum_C_A_1_matrix ;
305 }
306 else
307 {
308 // ----[C A 0] or [X A 0]---------------------------
309 // [X A 0]: action: ( X ): still a zombie
310 // [C A 0]: C_repl: action: ( delete ): now zombie
311 GB_DELETE_ENTRY ;
312 }
313 GB_NEXT (S) ;
314 GB_NEXT (A) ;
315 }
316 }
317
318 // while list S (:,j) has entries. List A (:,j) exhausted.
319 while (pS < pS_end)
320 {
321 int64_t iS = GBI (Si, pS, Svlen) ;
322 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
323 if (Mask_comp) mij = !mij ;
324 if (!mij)
325 {
326 // ----[C . 0] or [X . 0]-------------------------------
327 // [X . 0]: action: ( X ): still a zombie
328 // [C . 0]: C_repl: action: ( delete ): becomes zombie
329 GB_C_S_LOOKUP ;
330 GB_DELETE_ENTRY ;
331 }
332 GB_NEXT (S) ;
333 }
334
335 // while list A (:,j) has entries. List S (:,j) exhausted.
336 while (pA < pA_end)
337 {
338 // S (i,j) is not present, A (i,j) is present
339 int64_t iA = GBI (Ai, pA, Avlen) ;
340 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
341 if (Mask_comp) mij = !mij ;
342 if (mij)
343 {
344 // ----[. A 1]------------------------------------------
345 // [. A 1]: action: ( insert )
346 task_pending++ ;
347 }
348 GB_NEXT (A) ;
349 }
350 }
351
352 GB_PHASE1_TASK_WRAPUP ;
353 }
354 }
355
356 //--------------------------------------------------------------------------
357 // phase 2: insert pending tuples
358 //--------------------------------------------------------------------------
359
360 GB_PENDING_CUMSUM ;
361
362 if (A_is_bitmap)
363 {
364
365 //----------------------------------------------------------------------
366 // phase2: A is bitmap
367 //----------------------------------------------------------------------
368
369 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
370 reduction(&&:pending_sorted)
371 for (taskid = 0 ; taskid < ntasks ; taskid++)
372 {
373
374 //------------------------------------------------------------------
375 // get the task descriptor
376 //------------------------------------------------------------------
377
378 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
379
380 //------------------------------------------------------------------
381 // compute all vectors in this task
382 //------------------------------------------------------------------
383
384 for (int64_t j = kfirst ; j <= klast ; j++)
385 {
386
387 //--------------------------------------------------------------
388 // get S(iA_start:iA_end,j)
389 //--------------------------------------------------------------
390
391 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
392 int64_t pA_start = j * Avlen ;
393
394 //--------------------------------------------------------------
395 // get M(:,j)
396 //--------------------------------------------------------------
397
398 int64_t pM_start, pM_end ;
399 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
400 bool mjdense = (pM_end - pM_start) == Mvlen ;
401
402 //--------------------------------------------------------------
403 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
404 //--------------------------------------------------------------
405
406 // jC = J [j] ; or J is a colon expression
407 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
408
409 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
410 {
411 int64_t pA = pA_start + iA ;
412 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
413 bool Afound = Ab [pA] ;
414 if (!Sfound && Afound)
415 {
416 // S (i,j) is not present, A (i,j) is present
417 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
418 if (Mask_comp) mij = !mij ;
419 if (mij)
420 {
421 // ----[. A 1]--------------------------------------
422 // [. A 1]: action: ( insert )
423 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
424 GB_PENDING_INSERT (Ax +(pA*asize)) ;
425 }
426 }
427 else if (Sfound)
428 {
429 // S (i,j) present
430 GB_NEXT (S) ;
431 }
432 }
433 }
434 GB_PHASE2_TASK_WRAPUP ;
435 }
436
437 }
438 else
439 {
440
441 //----------------------------------------------------------------------
442 // phase2: A is hypersparse, sparse, or full
443 //----------------------------------------------------------------------
444
445 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
446 reduction(&&:pending_sorted)
447 for (taskid = 0 ; taskid < ntasks ; taskid++)
448 {
449
450 //------------------------------------------------------------------
451 // get the task descriptor
452 //------------------------------------------------------------------
453
454 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
455
456 //------------------------------------------------------------------
457 // compute all vectors in this task
458 //------------------------------------------------------------------
459
460 for (int64_t k = kfirst ; k <= klast ; k++)
461 {
462
463 //--------------------------------------------------------------
464 // get A(:,j) and S(:,j)
465 //--------------------------------------------------------------
466
467 int64_t j = GBH (Zh, k) ;
468 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
469 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
470
471 //--------------------------------------------------------------
472 // get M(:,j)
473 //--------------------------------------------------------------
474
475 int64_t pM_start, pM_end ;
476 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
477 bool mjdense = (pM_end - pM_start) == Mvlen ;
478
479 //--------------------------------------------------------------
480 // do a 2-way merge of S(:,j) and A(:,j)
481 //--------------------------------------------------------------
482
483 // jC = J [j] ; or J is a colon expression
484 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
485
486 // while both list S (:,j) and A (:,j) have entries
487 while (pS < pS_end && pA < pA_end)
488 {
489 int64_t iS = GBI (Si, pS, Svlen) ;
490 int64_t iA = GBI (Ai, pA, Avlen) ;
491
492 if (iS < iA)
493 {
494 // S (i,j) is present but A (i,j) is not
495 GB_NEXT (S) ;
496 }
497 else if (iA < iS)
498 {
499 // S (i,j) is not present, A (i,j) is present
500 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
501 if (Mask_comp) mij = !mij ;
502 if (mij)
503 {
504 // ----[. A 1]--------------------------------------
505 // [. A 1]: action: ( insert )
506 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
507 GB_PENDING_INSERT (Ax +(pA*asize)) ;
508 }
509 GB_NEXT (A) ;
510 }
511 else
512 {
513 // both S (i,j) and A (i,j) present
514 GB_NEXT (S) ;
515 GB_NEXT (A) ;
516 }
517 }
518
519 // while list A (:,j) has entries. List S (:,j) exhausted.
520 while (pA < pA_end)
521 {
522 // S (i,j) is not present, A (i,j) is present
523 int64_t iA = GBI (Ai, pA, Avlen) ;
524 GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
525 if (Mask_comp) mij = !mij ;
526 if (mij)
527 {
528 // ----[. A 1]------------------------------------------
529 // [. A 1]: action: ( insert )
530 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
531 GB_PENDING_INSERT (Ax +(pA*asize)) ;
532 }
533 GB_NEXT (A) ;
534 }
535 }
536
537 GB_PHASE2_TASK_WRAPUP ;
538 }
539 }
540
541 //--------------------------------------------------------------------------
542 // finalize the matrix and return result
543 //--------------------------------------------------------------------------
544
545 GB_SUBASSIGN_WRAPUP ;
546 }
547
548