1 //------------------------------------------------------------------------------
2 // GB_subassign_12_and_20: C(I,J)<M or !M,repl> += A ; using S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 12: C(I,J)<M,repl> += A ; using S
11 // Method 20: C(I,J)<!M,repl> += A ; using S
12 
13 // M:           present
14 // Mask_comp:   true or false
15 // C_replace:   true
16 // accum:       present
17 // A:           matrix
18 // S:           constructed
19 
20 // C: not bitmap: use GB_bitmap_assign instead
21 // M, A: any sparsity structure.
22 
23 #include "GB_subassign_methods.h"
24 
GB_subassign_12_and_20(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)25 GrB_Info GB_subassign_12_and_20
26 (
27     GrB_Matrix C,
28     // input:
29     const GrB_Index *I,
30     const int64_t ni,
31     const int64_t nI,
32     const int Ikind,
33     const int64_t Icolon [3],
34     const GrB_Index *J,
35     const int64_t nj,
36     const int64_t nJ,
37     const int Jkind,
38     const int64_t Jcolon [3],
39     const GrB_Matrix M,
40     const bool Mask_struct,         // if true, use the only structure of M
41     const bool Mask_comp,           // if true, !M, else use M
42     const GrB_BinaryOp accum,
43     const GrB_Matrix A,
44     GB_Context Context
45 )
46 {
47 
48     //--------------------------------------------------------------------------
49     // check inputs
50     //--------------------------------------------------------------------------
51 
52     ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
53     ASSERT (!GB_aliased (C, M)) ;   // NO ALIAS of C==M
54     ASSERT (!GB_aliased (C, A)) ;   // NO ALIAS of C==A
55 
56     //--------------------------------------------------------------------------
57     // S = C(I,J)
58     //--------------------------------------------------------------------------
59 
60     GB_EMPTY_TASKLIST ;
61     GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
62 
63     //--------------------------------------------------------------------------
64     // get inputs
65     //--------------------------------------------------------------------------
66 
67     GB_MATRIX_WAIT_IF_JUMBLED (M) ;
68     GB_MATRIX_WAIT_IF_JUMBLED (A) ;
69 
70     GB_GET_C ;      // C must not be bitmap
71     GB_GET_MASK ;
72     GB_GET_A ;
73     GB_GET_S ;
74     GB_GET_ACCUM ;
75 
76     //--------------------------------------------------------------------------
77     // Method 12: C(I,J)<M,repl> += A ; using S
78     // Method 20: C(I,J)<!M,repl> += A ; using S
79     //--------------------------------------------------------------------------
80 
81     // Time: all entries in S+A must be traversed, so Omega(nnz(S)+nnz(A)) is
82     // required.  All cases of the mask (0, 1, or not present) must be
83     // considered, because of the C_replace descriptor being true.
84 
85     //--------------------------------------------------------------------------
86     // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
87     //--------------------------------------------------------------------------
88 
89     if (A_is_bitmap)
90     {
91         // all of IxJ must be examined
92         GB_SUBASSIGN_IXJ_SLICE ;
93     }
94     else
95     {
96         // traverse all A+S
97         GB_SUBASSIGN_TWO_SLICE (A, S) ;
98     }
99 
100     //--------------------------------------------------------------------------
101     // phase 1: create zombies, update entries, and count pending tuples
102     //--------------------------------------------------------------------------
103 
104     if (A_is_bitmap)
105     {
106 
107         //----------------------------------------------------------------------
108         // phase1: A is bitmap
109         //----------------------------------------------------------------------
110 
111         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
112             reduction(+:nzombies)
113         for (taskid = 0 ; taskid < ntasks ; taskid++)
114         {
115 
116             //------------------------------------------------------------------
117             // get the task descriptor
118             //------------------------------------------------------------------
119 
120             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
121 
122             //------------------------------------------------------------------
123             // compute all vectors in this task
124             //------------------------------------------------------------------
125 
126             for (int64_t j = kfirst ; j <= klast ; j++)
127             {
128 
129                 //--------------------------------------------------------------
130                 // get S(iA_start:iA_end,j)
131                 //--------------------------------------------------------------
132 
133                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
134                 int64_t pA_start = j * Avlen ;
135 
136                 //--------------------------------------------------------------
137                 // get M(:,j)
138                 //--------------------------------------------------------------
139 
140                 int64_t pM_start, pM_end ;
141                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
142                 bool mjdense = (pM_end - pM_start) == Mvlen ;
143 
144                 //--------------------------------------------------------------
145                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
146                 //--------------------------------------------------------------
147 
148                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
149                 {
150                     int64_t pA = pA_start + iA ;
151                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
152                     bool Afound = Ab [pA] ;
153 
154                     if (Sfound && !Afound)
155                     {
156                         // S (i,j) is present but A (i,j) is not
157                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
158                         if (Mask_comp) mij = !mij ;
159                         if (!mij)
160                         {
161                             // ----[C . 0] or [X . 0]---------------------------
162                             // [X . 0]: action: ( X ): still a zombie
163                             // [C . 0]: C_repl: action: ( delete ): now zombie
164                             GB_C_S_LOOKUP ;
165                             GB_DELETE_ENTRY ;
166                         }
167                         GB_NEXT (S) ;
168                     }
169                     else if (!Sfound && Afound)
170                     {
171                         // S (i,j) is not present, A (i,j) is present
172                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
173                         if (Mask_comp) mij = !mij ;
174                         if (mij)
175                         {
176                             // ----[. A 1]--------------------------------------
177                             // [. A 1]: action: ( insert )
178                             task_pending++ ;
179                         }
180                     }
181                     else if (Sfound && Afound)
182                     {
183                         // both S (i,j) and A (i,j) present
184                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
185                         if (Mask_comp) mij = !mij ;
186                         GB_C_S_LOOKUP ;
187                         if (mij)
188                         {
189                             // ----[C A 1] or [X A 1]---------------------------
190                             // [C A 1]: action: ( =A ): A to C no accum
191                             // [C A 1]: action: ( =C+A ): apply accum
192                             // [X A 1]: action: ( undelete ): zombie lives
193                             GB_withaccum_C_A_1_matrix ;
194                         }
195                         else
196                         {
197                             // ----[C A 0] or [X A 0]---------------------------
198                             // [X A 0]: action: ( X ): still a zombie
199                             // [C A 0]: C_repl: action: ( delete ): now zombie
200                             GB_DELETE_ENTRY ;
201                         }
202                         GB_NEXT (S) ;
203                     }
204                 }
205             }
206             GB_PHASE1_TASK_WRAPUP ;
207         }
208 
209     }
210     else
211     {
212 
213         //----------------------------------------------------------------------
214         // phase1: A is hypersparse, sparse, or full
215         //----------------------------------------------------------------------
216 
217         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
218             reduction(+:nzombies)
219         for (taskid = 0 ; taskid < ntasks ; taskid++)
220         {
221 
222             //------------------------------------------------------------------
223             // get the task descriptor
224             //------------------------------------------------------------------
225 
226             GB_GET_TASK_DESCRIPTOR_PHASE1 ;
227 
228             //------------------------------------------------------------------
229             // compute all vectors in this task
230             //------------------------------------------------------------------
231 
232             for (int64_t k = kfirst ; k <= klast ; k++)
233             {
234 
235                 //--------------------------------------------------------------
236                 // get A(:,j) and S(:,j)
237                 //--------------------------------------------------------------
238 
239                 int64_t j = GBH (Zh, k) ;
240                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
241                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
242 
243                 //--------------------------------------------------------------
244                 // get M(:,j)
245                 //--------------------------------------------------------------
246 
247                 int64_t pM_start, pM_end ;
248                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
249                 bool mjdense = (pM_end - pM_start) == Mvlen ;
250 
251                 //--------------------------------------------------------------
252                 // do a 2-way merge of S(:,j) and A(:,j)
253                 //--------------------------------------------------------------
254 
255                 // jC = J [j] ; or J is a colon expression
256                 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
257 
258                 // while both list S (:,j) and A (:,j) have entries
259                 while (pS < pS_end && pA < pA_end)
260                 {
261                     int64_t iS = GBI (Si, pS, Svlen) ;
262                     int64_t iA = GBI (Ai, pA, Avlen) ;
263 
264                     if (iS < iA)
265                     {
266                         // S (i,j) is present but A (i,j) is not
267                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
268                         if (Mask_comp) mij = !mij ;
269                         if (!mij)
270                         {
271                             // ----[C . 0] or [X . 0]---------------------------
272                             // [X . 0]: action: ( X ): still a zombie
273                             // [C . 0]: C_repl: action: ( delete ): now zombie
274                             GB_C_S_LOOKUP ;
275                             GB_DELETE_ENTRY ;
276                         }
277                         GB_NEXT (S) ;
278                     }
279                     else if (iA < iS)
280                     {
281                         // S (i,j) is not present, A (i,j) is present
282                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
283                         if (Mask_comp) mij = !mij ;
284                         if (mij)
285                         {
286                             // ----[. A 1]--------------------------------------
287                             // [. A 1]: action: ( insert )
288                             task_pending++ ;
289                         }
290                         GB_NEXT (A) ;
291                     }
292                     else
293                     {
294                         // both S (i,j) and A (i,j) present
295                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
296                         if (Mask_comp) mij = !mij ;
297                         GB_C_S_LOOKUP ;
298                         if (mij)
299                         {
300                             // ----[C A 1] or [X A 1]---------------------------
301                             // [C A 1]: action: ( =A ): A to C no accum
302                             // [C A 1]: action: ( =C+A ): apply accum
303                             // [X A 1]: action: ( undelete ): zombie lives
304                             GB_withaccum_C_A_1_matrix ;
305                         }
306                         else
307                         {
308                             // ----[C A 0] or [X A 0]---------------------------
309                             // [X A 0]: action: ( X ): still a zombie
310                             // [C A 0]: C_repl: action: ( delete ): now zombie
311                             GB_DELETE_ENTRY ;
312                         }
313                         GB_NEXT (S) ;
314                         GB_NEXT (A) ;
315                     }
316                 }
317 
318                 // while list S (:,j) has entries.  List A (:,j) exhausted.
319                 while (pS < pS_end)
320                 {
321                     int64_t iS = GBI (Si, pS, Svlen) ;
322                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iS) ;
323                     if (Mask_comp) mij = !mij ;
324                     if (!mij)
325                     {
326                         // ----[C . 0] or [X . 0]-------------------------------
327                         // [X . 0]: action: ( X ): still a zombie
328                         // [C . 0]: C_repl: action: ( delete ): becomes zombie
329                         GB_C_S_LOOKUP ;
330                         GB_DELETE_ENTRY ;
331                     }
332                     GB_NEXT (S) ;
333                 }
334 
335                 // while list A (:,j) has entries.  List S (:,j) exhausted.
336                 while (pA < pA_end)
337                 {
338                     // S (i,j) is not present, A (i,j) is present
339                     int64_t iA = GBI (Ai, pA, Avlen) ;
340                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
341                     if (Mask_comp) mij = !mij ;
342                     if (mij)
343                     {
344                         // ----[. A 1]------------------------------------------
345                         // [. A 1]: action: ( insert )
346                         task_pending++ ;
347                     }
348                     GB_NEXT (A) ;
349                 }
350             }
351 
352             GB_PHASE1_TASK_WRAPUP ;
353         }
354     }
355 
356     //--------------------------------------------------------------------------
357     // phase 2: insert pending tuples
358     //--------------------------------------------------------------------------
359 
360     GB_PENDING_CUMSUM ;
361 
362     if (A_is_bitmap)
363     {
364 
365         //----------------------------------------------------------------------
366         // phase2: A is bitmap
367         //----------------------------------------------------------------------
368 
369         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
370             reduction(&&:pending_sorted)
371         for (taskid = 0 ; taskid < ntasks ; taskid++)
372         {
373 
374             //------------------------------------------------------------------
375             // get the task descriptor
376             //------------------------------------------------------------------
377 
378             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
379 
380             //------------------------------------------------------------------
381             // compute all vectors in this task
382             //------------------------------------------------------------------
383 
384             for (int64_t j = kfirst ; j <= klast ; j++)
385             {
386 
387                 //--------------------------------------------------------------
388                 // get S(iA_start:iA_end,j)
389                 //--------------------------------------------------------------
390 
391                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
392                 int64_t pA_start = j * Avlen ;
393 
394                 //--------------------------------------------------------------
395                 // get M(:,j)
396                 //--------------------------------------------------------------
397 
398                 int64_t pM_start, pM_end ;
399                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
400                 bool mjdense = (pM_end - pM_start) == Mvlen ;
401 
402                 //--------------------------------------------------------------
403                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
404                 //--------------------------------------------------------------
405 
406                 // jC = J [j] ; or J is a colon expression
407                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
408 
409                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
410                 {
411                     int64_t pA = pA_start + iA ;
412                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
413                     bool Afound = Ab [pA] ;
414                     if (!Sfound && Afound)
415                     {
416                         // S (i,j) is not present, A (i,j) is present
417                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
418                         if (Mask_comp) mij = !mij ;
419                         if (mij)
420                         {
421                             // ----[. A 1]--------------------------------------
422                             // [. A 1]: action: ( insert )
423                             int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
424                             GB_PENDING_INSERT (Ax +(pA*asize)) ;
425                         }
426                     }
427                     else if (Sfound)
428                     {
429                         // S (i,j) present
430                         GB_NEXT (S) ;
431                     }
432                 }
433             }
434             GB_PHASE2_TASK_WRAPUP ;
435         }
436 
437     }
438     else
439     {
440 
441         //----------------------------------------------------------------------
442         // phase2: A is hypersparse, sparse, or full
443         //----------------------------------------------------------------------
444 
445         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
446             reduction(&&:pending_sorted)
447         for (taskid = 0 ; taskid < ntasks ; taskid++)
448         {
449 
450             //------------------------------------------------------------------
451             // get the task descriptor
452             //------------------------------------------------------------------
453 
454             GB_GET_TASK_DESCRIPTOR_PHASE2 ;
455 
456             //------------------------------------------------------------------
457             // compute all vectors in this task
458             //------------------------------------------------------------------
459 
460             for (int64_t k = kfirst ; k <= klast ; k++)
461             {
462 
463                 //--------------------------------------------------------------
464                 // get A(:,j) and S(:,j)
465                 //--------------------------------------------------------------
466 
467                 int64_t j = GBH (Zh, k) ;
468                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
469                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
470 
471                 //--------------------------------------------------------------
472                 // get M(:,j)
473                 //--------------------------------------------------------------
474 
475                 int64_t pM_start, pM_end ;
476                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
477                 bool mjdense = (pM_end - pM_start) == Mvlen ;
478 
479                 //--------------------------------------------------------------
480                 // do a 2-way merge of S(:,j) and A(:,j)
481                 //--------------------------------------------------------------
482 
483                 // jC = J [j] ; or J is a colon expression
484                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
485 
486                 // while both list S (:,j) and A (:,j) have entries
487                 while (pS < pS_end && pA < pA_end)
488                 {
489                     int64_t iS = GBI (Si, pS, Svlen) ;
490                     int64_t iA = GBI (Ai, pA, Avlen) ;
491 
492                     if (iS < iA)
493                     {
494                         // S (i,j) is present but A (i,j) is not
495                         GB_NEXT (S) ;
496                     }
497                     else if (iA < iS)
498                     {
499                         // S (i,j) is not present, A (i,j) is present
500                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
501                         if (Mask_comp) mij = !mij ;
502                         if (mij)
503                         {
504                             // ----[. A 1]--------------------------------------
505                             // [. A 1]: action: ( insert )
506                             int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
507                             GB_PENDING_INSERT (Ax +(pA*asize)) ;
508                         }
509                         GB_NEXT (A) ;
510                     }
511                     else
512                     {
513                         // both S (i,j) and A (i,j) present
514                         GB_NEXT (S) ;
515                         GB_NEXT (A) ;
516                     }
517                 }
518 
519                 // while list A (:,j) has entries.  List S (:,j) exhausted.
520                 while (pA < pA_end)
521                 {
522                     // S (i,j) is not present, A (i,j) is present
523                     int64_t iA = GBI (Ai, pA, Avlen) ;
524                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
525                     if (Mask_comp) mij = !mij ;
526                     if (mij)
527                     {
528                         // ----[. A 1]------------------------------------------
529                         // [. A 1]: action: ( insert )
530                         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
531                         GB_PENDING_INSERT (Ax +(pA*asize)) ;
532                     }
533                     GB_NEXT (A) ;
534                 }
535             }
536 
537             GB_PHASE2_TASK_WRAPUP ;
538         }
539     }
540 
541     //--------------------------------------------------------------------------
542     // finalize the matrix and return result
543     //--------------------------------------------------------------------------
544 
545     GB_SUBASSIGN_WRAPUP ;
546 }
547 
548