1 //------------------------------------------------------------------------------
2 // GB_subassign_08s_and_16: C(I,J)<M or !M> += A ; using S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 08s: C(I,J)<M> += A ; using S
11 // Method 16: C(I,J)<!M> += A ; using S
12 
13 // M:           present
14 // Mask_comp:   true or false
15 // C_replace:   false
16 // accum:       present
17 // A:           matrix
18 // S:           constructed
19 
20 // C: not bitmap: use GB_bitmap_assign instead
21 // M, A: any sparsity structure.
22 
23 #include "GB_subassign_methods.h"
24 
GB_subassign_08s_and_16(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const bool Mask_comp,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)25 GrB_Info GB_subassign_08s_and_16
26 (
27     GrB_Matrix C,
28     // input:
29     const GrB_Index *I,
30     const int64_t ni,
31     const int64_t nI,
32     const int Ikind,
33     const int64_t Icolon [3],
34     const GrB_Index *J,
35     const int64_t nj,
36     const int64_t nJ,
37     const int Jkind,
38     const int64_t Jcolon [3],
39     const GrB_Matrix M,
40     const bool Mask_struct,         // if true, use the only structure of M
41     const bool Mask_comp,           // if true, !M, else use M
42     const GrB_BinaryOp accum,
43     const GrB_Matrix A,
44     GB_Context Context
45 )
46 {
47 
48     //--------------------------------------------------------------------------
49     // check inputs
50     //--------------------------------------------------------------------------
51 
52     ASSERT (!GB_IS_BITMAP (C)) ;
53     ASSERT (!GB_aliased (C, M)) ;   // NO ALIAS of C==M
54     ASSERT (!GB_aliased (C, A)) ;   // NO ALIAS of C==A
55 
56     //--------------------------------------------------------------------------
57     // S = C(I,J)
58     //--------------------------------------------------------------------------
59 
60     GB_EMPTY_TASKLIST ;
61     GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
62 
63     //--------------------------------------------------------------------------
64     // get inputs
65     //--------------------------------------------------------------------------
66 
67     GB_MATRIX_WAIT_IF_JUMBLED (M) ;
68     GB_MATRIX_WAIT_IF_JUMBLED (A) ;
69 
70     GB_GET_C ;      // C must not be bitmap
71     GB_GET_MASK ;
72     GB_GET_A ;
73     GB_GET_S ;
74     GB_GET_ACCUM ;
75 
76     //--------------------------------------------------------------------------
77     // Method 16:  C(I,J)<!M> += A ; using S
78     //--------------------------------------------------------------------------
79 
80     // Time: Close to optimal.  All entries in A+S must be traversed.
81 
82     //--------------------------------------------------------------------------
83     // Method 08s: C(I,J)<M> += A ; using S
84     //--------------------------------------------------------------------------
85 
86     // Time: Only entries in A must be traversed, and the corresponding entries
87     // in C located.  This method constructs S and traverses all of it in the
88     // worst case.  Compare with method 08n, which does not construct S but
89     // instead uses a binary search for entries in C, but it only traverses
90     // entries in A.*M.
91 
92     //--------------------------------------------------------------------------
93     // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
94     //--------------------------------------------------------------------------
95 
96     if (A_is_bitmap)
97     {
98         // all of IxJ must be examined
99         GB_SUBASSIGN_IXJ_SLICE ;
100     }
101     else
102     {
103         // traverse all A+S
104         GB_SUBASSIGN_TWO_SLICE (A, S) ;
105     }
106 
107     //--------------------------------------------------------------------------
108     // phase 1: create zombies, update entries, and count pending tuples
109     //--------------------------------------------------------------------------
110 
111     if (A_is_bitmap)
112     {
113 
114         //----------------------------------------------------------------------
115         // phase1: A is bitmap
116         //----------------------------------------------------------------------
117 
118         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
119             reduction(+:nzombies)
120         for (taskid = 0 ; taskid < ntasks ; taskid++)
121         {
122 
123             //------------------------------------------------------------------
124             // get the task descriptor
125             //------------------------------------------------------------------
126 
127             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
128 
129             //------------------------------------------------------------------
130             // compute all vectors in this task
131             //------------------------------------------------------------------
132 
133             for (int64_t j = kfirst ; j <= klast ; j++)
134             {
135 
136                 //--------------------------------------------------------------
137                 // get S(iA_start:iA_end,j)
138                 //--------------------------------------------------------------
139 
140                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
141                 int64_t pA_start = j * Avlen ;
142 
143                 //--------------------------------------------------------------
144                 // get M(:,j)
145                 //--------------------------------------------------------------
146 
147                 int64_t pM_start, pM_end ;
148                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
149                 bool mjdense = (pM_end - pM_start) == Mvlen ;
150 
151                 //--------------------------------------------------------------
152                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
153                 //--------------------------------------------------------------
154 
155                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
156                 {
157                     int64_t pA = pA_start + iA ;
158                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
159                     bool Afound = Ab [pA] ;
160 
161                     if (Sfound && !Afound)
162                     {
163                         // S (i,j) is present but A (i,j) is not
164                         // ----[C . 1] or [X . 1]-------------------------------
165                         // [C . 1]: action: ( C ): no change, with accum
166                         // [X . 1]: action: ( X ): still a zombie
167                         // ----[C . 0] or [X . 0]-------------------------------
168                         // [C . 0]: action: ( C ): no change, with accum
169                         // [X . 0]: action: ( X ): still a zombie
170                         GB_NEXT (S) ;
171                     }
172                     else if (!Sfound && Afound)
173                     {
174                         // S (i,j) is not present, A (i,j) is present
175                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
176                         if (Mask_comp) mij = !mij ;
177                         if (mij)
178                         {
179                             // ----[. A 1]--------------------------------------
180                             // [. A 1]: action: ( insert )
181                             task_pending++ ;
182                         }
183                     }
184                     else if (Sfound && Afound)
185                     {
186                         // both S (i,j) and A (i,j) present
187                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
188                         if (Mask_comp) mij = !mij ;
189                         if (mij)
190                         {
191                             // ----[C A 1] or [X A 1]---------------------------
192                             // [C A 1]: action: ( =A ): A to C no accum
193                             // [C A 1]: action: ( =C+A ): apply accum
194                             // [X A 1]: action: ( undelete ): zombie lives
195                             GB_C_S_LOOKUP ;
196                             GB_withaccum_C_A_1_matrix ;
197                         }
198                         GB_NEXT (S) ;
199                     }
200                 }
201             }
202             GB_PHASE1_TASK_WRAPUP ;
203         }
204 
205     }
206     else
207     {
208 
209         //----------------------------------------------------------------------
210         // phase1: A is hypersparse, sparse, or full
211         //----------------------------------------------------------------------
212 
213         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
214             reduction(+:nzombies)
215         for (taskid = 0 ; taskid < ntasks ; taskid++)
216         {
217 
218             //------------------------------------------------------------------
219             // get the task descriptor
220             //------------------------------------------------------------------
221 
222             GB_GET_TASK_DESCRIPTOR_PHASE1 ;
223 
224             //------------------------------------------------------------------
225             // compute all vectors in this task
226             //------------------------------------------------------------------
227 
228             for (int64_t k = kfirst ; k <= klast ; k++)
229             {
230 
231                 //--------------------------------------------------------------
232                 // get A(:,j) and S(:,j)
233                 //--------------------------------------------------------------
234 
235                 int64_t j = GBH (Zh, k) ;
236                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
237                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
238 
239                 //--------------------------------------------------------------
240                 // get M(:,j)
241                 //--------------------------------------------------------------
242 
243                 int64_t pM_start, pM_end ;
244                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
245                 bool mjdense = (pM_end - pM_start) == Mvlen ;
246 
247                 //--------------------------------------------------------------
248                 // do a 2-way merge of S(:,j) and A(:,j)
249                 //--------------------------------------------------------------
250 
251                 // jC = J [j] ; or J is a colon expression
252                 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
253 
254                 // while both list S (:,j) and A (:,j) have entries
255                 while (pS < pS_end && pA < pA_end)
256                 {
257                     int64_t iS = GBI (Si, pS, Svlen) ;
258                     int64_t iA = GBI (Ai, pA, Avlen) ;
259 
260                     if (iS < iA)
261                     {
262                         // S (i,j) is present but A (i,j) is not
263                         // ----[C . 1] or [X . 1]-------------------------------
264                         // [C . 1]: action: ( C ): no change, with accum
265                         // [X . 1]: action: ( X ): still a zombie
266                         // ----[C . 0] or [X . 0]-------------------------------
267                         // [C . 0]: action: ( C ): no change, with accum
268                         // [X . 0]: action: ( X ): still a zombie
269                         GB_NEXT (S) ;
270                     }
271                     else if (iA < iS)
272                     {
273                         // S (i,j) is not present, A (i,j) is present
274                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
275                         if (Mask_comp) mij = !mij ;
276                         if (mij)
277                         {
278                             // ----[. A 1]--------------------------------------
279                             // [. A 1]: action: ( insert )
280                             task_pending++ ;
281                         }
282                         GB_NEXT (A) ;
283                     }
284                     else
285                     {
286                         // both S (i,j) and A (i,j) present
287                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
288                         if (Mask_comp) mij = !mij ;
289                         if (mij)
290                         {
291                             // ----[C A 1] or [X A 1]---------------------------
292                             // [C A 1]: action: ( =A ): A to C no accum
293                             // [C A 1]: action: ( =C+A ): apply accum
294                             // [X A 1]: action: ( undelete ): zombie lives
295                             GB_C_S_LOOKUP ;
296                             GB_withaccum_C_A_1_matrix ;
297                         }
298                         GB_NEXT (S) ;
299                         GB_NEXT (A) ;
300                     }
301                 }
302 
303                 // ignore the remainder of S(:,j)
304 
305                 // while list A (:,j) has entries.  List S (:,j) exhausted.
306                 while (pA < pA_end)
307                 {
308                     // S (i,j) is not present, A (i,j) is present
309                     int64_t iA = GBI (Ai, pA, Avlen) ;
310                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
311                     if (Mask_comp) mij = !mij ;
312                     if (mij)
313                     {
314                         // ----[. A 1]------------------------------------------
315                         // [. A 1]: action: ( insert )
316                         task_pending++ ;
317                     }
318                     GB_NEXT (A) ;
319                 }
320             }
321 
322             GB_PHASE1_TASK_WRAPUP ;
323         }
324     }
325 
326     //--------------------------------------------------------------------------
327     // phase 2: insert pending tuples
328     //--------------------------------------------------------------------------
329 
330     GB_PENDING_CUMSUM ;
331 
332     if (A_is_bitmap)
333     {
334 
335         //----------------------------------------------------------------------
336         // phase2: A is bitmap
337         //----------------------------------------------------------------------
338 
339         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
340             reduction(&&:pending_sorted)
341         for (taskid = 0 ; taskid < ntasks ; taskid++)
342         {
343 
344             //------------------------------------------------------------------
345             // get the task descriptor
346             //------------------------------------------------------------------
347 
348             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
349 
350             //------------------------------------------------------------------
351             // compute all vectors in this task
352             //------------------------------------------------------------------
353 
354             for (int64_t j = kfirst ; j <= klast ; j++)
355             {
356 
357                 //--------------------------------------------------------------
358                 // get S(iA_start:iA_end,j)
359                 //--------------------------------------------------------------
360 
361                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
362                 int64_t pA_start = j * Avlen ;
363 
364                 //--------------------------------------------------------------
365                 // get M(:,j)
366                 //--------------------------------------------------------------
367 
368                 int64_t pM_start, pM_end ;
369                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
370                 bool mjdense = (pM_end - pM_start) == Mvlen ;
371 
372                 //--------------------------------------------------------------
373                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
374                 //--------------------------------------------------------------
375 
376                 // jC = J [j] ; or J is a colon expression
377                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
378 
379                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
380                 {
381                     int64_t pA = pA_start + iA ;
382                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
383                     bool Afound = Ab [pA] ;
384                     if (!Sfound && Afound)
385                     {
386                         // S (i,j) is not present, A (i,j) is present
387                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
388                         if (Mask_comp) mij = !mij ;
389                         if (mij)
390                         {
391                             // ----[. A 1]--------------------------------------
392                             // [. A 1]: action: ( insert )
393                             int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
394                             GB_PENDING_INSERT (Ax +(pA*asize)) ;
395                         }
396                     }
397                     else if (Sfound)
398                     {
399                         // S (i,j) present
400                         GB_NEXT (S) ;
401                     }
402                 }
403             }
404             GB_PHASE2_TASK_WRAPUP ;
405         }
406 
407     }
408     else
409     {
410 
411         //----------------------------------------------------------------------
412         // phase2: A is hypersparse, sparse, or full
413         //----------------------------------------------------------------------
414 
415         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
416             reduction(&&:pending_sorted)
417         for (taskid = 0 ; taskid < ntasks ; taskid++)
418         {
419 
420             //------------------------------------------------------------------
421             // get the task descriptor
422             //------------------------------------------------------------------
423 
424             GB_GET_TASK_DESCRIPTOR_PHASE2 ;
425 
426             //------------------------------------------------------------------
427             // compute all vectors in this task
428             //------------------------------------------------------------------
429 
430             for (int64_t k = kfirst ; k <= klast ; k++)
431             {
432 
433                 //--------------------------------------------------------------
434                 // get A(:,j) and S(:,j)
435                 //--------------------------------------------------------------
436 
437                 int64_t j = GBH (Zh, k) ;
438                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
439                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
440 
441                 //--------------------------------------------------------------
442                 // get M(:,j)
443                 //--------------------------------------------------------------
444 
445                 int64_t pM_start, pM_end ;
446                 GB_VECTOR_LOOKUP (pM_start, pM_end, M, j) ;
447                 bool mjdense = (pM_end - pM_start) == Mvlen ;
448 
449                 //--------------------------------------------------------------
450                 // do a 2-way merge of S(:,j) and A(:,j)
451                 //--------------------------------------------------------------
452 
453                 // jC = J [j] ; or J is a colon expression
454                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
455 
456                 // while both list S (:,j) and A (:,j) have entries
457                 while (pS < pS_end && pA < pA_end)
458                 {
459                     int64_t iS = GBI (Si, pS, Svlen) ;
460                     int64_t iA = GBI (Ai, pA, Avlen) ;
461 
462                     if (iS < iA)
463                     {
464                         // S (i,j) is present but A (i,j) is not
465                         GB_NEXT (S) ;
466                     }
467                     else if (iA < iS)
468                     {
469                         // S (i,j) is not present, A (i,j) is present
470                         GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
471                         if (Mask_comp) mij = !mij ;
472                         if (mij)
473                         {
474                             // ----[. A 1]--------------------------------------
475                             // [. A 1]: action: ( insert )
476                             int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
477                             GB_PENDING_INSERT (Ax +(pA*asize)) ;
478                         }
479                         GB_NEXT (A) ;
480                     }
481                     else
482                     {
483                         // both S (i,j) and A (i,j) present
484                         GB_NEXT (S) ;
485                         GB_NEXT (A) ;
486                     }
487                 }
488 
489                 // while list A (:,j) has entries.  List S (:,j) exhausted.
490                 while (pA < pA_end)
491                 {
492                     // S (i,j) is not present, A (i,j) is present
493                     int64_t iA = GBI (Ai, pA, Avlen) ;
494                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
495                     if (Mask_comp) mij = !mij ;
496                     if (mij)
497                     {
498                         // ----[. A 1]------------------------------------------
499                         // [. A 1]: action: ( insert )
500                         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
501                         GB_PENDING_INSERT (Ax +(pA*asize)) ;
502                     }
503                     GB_NEXT (A) ;
504                 }
505             }
506 
507             GB_PHASE2_TASK_WRAPUP ;
508         }
509     }
510 
511     //--------------------------------------------------------------------------
512     // finalize the matrix and return result
513     //--------------------------------------------------------------------------
514 
515     GB_SUBASSIGN_WRAPUP ;
516 }
517 
518