1 //------------------------------------------------------------------------------
2 // GB_subassign_08n: C(I,J)<M> += A ; no S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 08n: C(I,J)<M> += A ; no S
11 
12 // M:           present
13 // Mask_comp:   false
14 // C_replace:   false
15 // accum:       present
16 // A:           matrix
17 // S:           none
18 
19 // C not bitmap; C can be full since no zombies are inserted in that case.
20 // If C is bitmap, then GB_bitmap_assign_M_accum is used instead.
21 // M, A: not bitmap; Method 08s is used instead if M or A are bitmap.
22 
23 #include "GB_subassign_methods.h"
24 
25 //------------------------------------------------------------------------------
26 // GB_PHASE1_ACTION
27 //------------------------------------------------------------------------------
28 
29 // action to take for phase 1 when A(i,j) exists and M(i,j)=1
30 #define GB_PHASE1_ACTION                                                    \
31 {                                                                           \
32     if (cjdense)                                                            \
33     {                                                                       \
34         /* direct lookup of C(iC,jC) */                                     \
35         GB_iC_DENSE_LOOKUP ;                                                \
36         /* ----[C A 1] or [X A 1]------------------------------- */         \
37         /* [C A 1]: action: ( =C+A ): apply accum                */         \
38         /* [X A 1]: action: ( undelete ): zombie lives           */         \
39         GB_withaccum_C_A_1_matrix ;                                         \
40     }                                                                       \
41     else                                                                    \
42     {                                                                       \
43         /* binary search for C(iC,jC) in C(:,jC) */                         \
44         GB_iC_BINARY_SEARCH ;                                               \
45         if (cij_found)                                                      \
46         {                                                                   \
47             /* ----[C A 1] or [X A 1]--------------------------- */         \
48             /* [C A 1]: action: ( =C+A ): apply accum            */         \
49             /* [X A 1]: action: ( undelete ): zombie lives       */         \
50             GB_withaccum_C_A_1_matrix ;                                     \
51         }                                                                   \
52         else                                                                \
53         {                                                                   \
54             /* ----[. A 1]-------------------------------------- */         \
55             /* [. A 1]: action: ( insert )                       */         \
56             task_pending++ ;                                                \
57         }                                                                   \
58     }                                                                       \
59 }
60 
61 //------------------------------------------------------------------------------
62 // GB_PHASE2_ACTION
63 //------------------------------------------------------------------------------
64 
65 // action to take for phase 2 when A(i,j) exists and M(i,j)=1
66 #define GB_PHASE2_ACTION                                                    \
67 {                                                                           \
68     ASSERT (!cjdense) ;                                                     \
69     {                                                                       \
70         /* binary search for C(iC,jC) in C(:,jC) */                         \
71         GB_iC_BINARY_SEARCH ;                                               \
72         if (!cij_found)                                                     \
73         {                                                                   \
74             /* ----[. A 1]-------------------------------------- */         \
75             /* [. A 1]: action: ( insert )                       */         \
76             GB_PENDING_INSERT (Ax +(pA*asize)) ;                            \
77         }                                                                   \
78     }                                                                       \
79 }
80 
81 //------------------------------------------------------------------------------
82 // GB_subassign_08n: C(I,J)<M> += A ; no S
83 //------------------------------------------------------------------------------
84 
GB_subassign_08n(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_BinaryOp accum,const GrB_Matrix A,GB_Context Context)85 GrB_Info GB_subassign_08n
86 (
87     GrB_Matrix C,
88     // input:
89     const GrB_Index *I,
90     const int64_t nI,
91     const int Ikind,
92     const int64_t Icolon [3],
93     const GrB_Index *J,
94     const int64_t nJ,
95     const int Jkind,
96     const int64_t Jcolon [3],
97     const GrB_Matrix M,
98     const bool Mask_struct,
99     const GrB_BinaryOp accum,
100     const GrB_Matrix A,
101     GB_Context Context
102 )
103 {
104 
105     //--------------------------------------------------------------------------
106     // check inputs
107     //--------------------------------------------------------------------------
108 
109     ASSERT (!GB_IS_BITMAP (C)) ;
110     ASSERT (!GB_IS_BITMAP (M)) ;    // Method 08s is used if M is bitmap
111     ASSERT (!GB_IS_BITMAP (A)) ;    // Method 08s is used if A is bitmap
112     ASSERT (!GB_aliased (C, M)) ;   // NO ALIAS of C==M
113     ASSERT (!GB_aliased (C, A)) ;   // NO ALIAS of C==A
114 
115     //--------------------------------------------------------------------------
116     // get inputs
117     //--------------------------------------------------------------------------
118 
119     GB_EMPTY_TASKLIST ;
120     GB_MATRIX_WAIT_IF_JUMBLED (C) ;
121     GB_MATRIX_WAIT_IF_JUMBLED (M) ;
122     GB_MATRIX_WAIT_IF_JUMBLED (A) ;
123 
124     GB_GET_C ;      // C must not be bitmap
125     int64_t zorig = C->nzombies ;
126     const int64_t Cnvec = C->nvec ;
127     const int64_t *restrict Ch = C->h ;
128     const int64_t *restrict Cp = C->p ;
129     const bool C_is_hyper = (Ch != NULL) ;
130     GB_GET_MASK ;
131     GB_GET_A ;
132     const int64_t *restrict Ah = A->h ;
133     GB_GET_ACCUM ;
134 
135     //--------------------------------------------------------------------------
136     // Method 08n: C(I,J)<M> += A ; no S
137     //--------------------------------------------------------------------------
138 
139     // Time: Close to optimal. Omega (sum_j (min (nnz (A(:,j)), nnz (M(:,j)))),
140     // since only the intersection of A.*M needs to be considered.  If either
141     // M(:,j) or A(:,j) are very sparse compared to the other, then the shorter
142     // is traversed with a linear-time scan and a binary search is used for the
143     // other.  If the number of nonzeros is comparable, a linear-time scan is
144     // used for both.  Once two entries M(i,j)=1 and A(i,j) are found with the
145     // same index i, the entry A(i,j) is accumulated or inserted into C.
146 
147     // The algorithm is very much like the eWise multiplication of A.*M, so the
148     // parallel scheduling relies on GB_emult_01_phase0 and GB_ewise_slice.
149 
150     //--------------------------------------------------------------------------
151     // Parallel: slice the eWiseMult of Z=A.*M (Method 08n only)
152     //--------------------------------------------------------------------------
153 
154     // Method 08n only.  If C is sparse, it is sliced for a fine task, so that
155     // it can do a binary search via GB_iC_BINARY_SEARCH.  But if C(:,jC) is
156     // dense, C(:,jC) is not sliced, so the fine task must do a direct lookup
157     // via GB_iC_DENSE_LOOKUP.  Otherwise a race condition will occur.
158     // The Z matrix is not constructed, except for its hyperlist (Zh_shallow)
159     // and mapping to A and M.
160 
161     // No matrix (C, M, or A) can be bitmap.  C, M, A can be sparse/hyper/full,
162     // in any combination.
163 
164     int64_t Znvec ;
165     const int64_t *restrict Zh_shallow = NULL ;
166     GB_OK (GB_subassign_emult_slice (
167         &TaskList, &TaskList_size, &ntasks, &nthreads,
168         &Znvec, &Zh_shallow, &Z_to_A, &Z_to_A_size, &Z_to_M, &Z_to_M_size,
169         C, I, nI, Ikind, Icolon, J, nJ, Jkind, Jcolon,
170         A, M, Context)) ;
171     GB_ALLOCATE_NPENDING_WERK ;
172 
173     //--------------------------------------------------------------------------
174     // phase 1: undelete zombies, update entries, and count pending tuples
175     //--------------------------------------------------------------------------
176 
177     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
178         reduction(+:nzombies)
179     for (taskid = 0 ; taskid < ntasks ; taskid++)
180     {
181 
182         //----------------------------------------------------------------------
183         // get the task descriptor
184         //----------------------------------------------------------------------
185 
186         GB_GET_TASK_DESCRIPTOR_PHASE1 ;
187 
188         //----------------------------------------------------------------------
189         // compute all vectors in this task
190         //----------------------------------------------------------------------
191 
192         for (int64_t k = kfirst ; k <= klast ; k++)
193         {
194 
195             //------------------------------------------------------------------
196             // get A(:,j) and M(:,j)
197             //------------------------------------------------------------------
198 
199             int64_t j = GBH (Zh_shallow, k) ;
200             GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
201             GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
202 
203             //------------------------------------------------------------------
204             // quick checks for empty intersection of A(:,j) and M(:,j)
205             //------------------------------------------------------------------
206 
207             int64_t ajnz = pA_end - pA ;
208             int64_t mjnz = pM_end - pM ;
209             if (ajnz == 0 || mjnz == 0) continue ;
210             int64_t iA_first = GBI (Ai, pA, Avlen) ;
211             int64_t iA_last  = GBI (Ai, pA_end-1, Avlen) ;
212             int64_t iM_first = GBI (Mi, pM, Mvlen) ;
213             int64_t iM_last  = GBI (Mi, pM_end-1, Mvlen) ;
214             if (iA_last < iM_first || iM_last < iA_first) continue ;
215             int64_t pM_start = pM ;
216 
217             //------------------------------------------------------------------
218             // get jC, the corresponding vector of C
219             //------------------------------------------------------------------
220 
221             GB_GET_jC ;
222             bool cjdense = (pC_end - pC_start == Cvlen) ;
223 
224             //------------------------------------------------------------------
225             // C(I,jC)<M(:,j)> += A(:,j) ; no S
226             //------------------------------------------------------------------
227 
228             if (ajnz > 32 * mjnz)
229             {
230 
231                 //--------------------------------------------------------------
232                 // A(:,j) is much denser than M(:,j)
233                 //--------------------------------------------------------------
234 
235                 for ( ; pM < pM_end ; pM++)
236                 {
237                     if (GB_mcast (Mx, pM, msize))
238                     {
239                         int64_t iA = GBI (Mi, pM, Mvlen) ;
240                         // find iA in A(:,j)
241                         int64_t pright = pA_end - 1 ;
242                         bool found ;
243                         // FUTURE::: exploit dense A(:,j)
244                         GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
245                         if (found) GB_PHASE1_ACTION ;
246                     }
247                 }
248 
249             }
250             else if (mjnz > 32 * ajnz)
251             {
252 
253                 //--------------------------------------------------------------
254                 // M(:,j) is much denser than A(:,j)
255                 //--------------------------------------------------------------
256 
257                 // FUTURE::: exploit dense mask
258                 bool mjdense = false ;
259 
260                 for ( ; pA < pA_end ; pA++)
261                 {
262                     int64_t iA = GBI (Ai, pA, Avlen) ;
263                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
264                     if (mij) GB_PHASE1_ACTION ;
265                 }
266 
267             }
268             else
269             {
270 
271                 //----------------------------------------------------------
272                 // A(:,j) and M(:,j) have about the same # of entries
273                 //----------------------------------------------------------
274 
275                 // linear-time scan of A(:,j) and M(:,j)
276 
277                 while (pA < pA_end && pM < pM_end)
278                 {
279                     int64_t iA = GBI (Ai, pA, Avlen) ;
280                     int64_t iM = GBI (Mi, pM, Mvlen) ;
281                     if (iA < iM)
282                     {
283                         // A(i,j) exists but not M(i,j)
284                         GB_NEXT (A) ;
285                     }
286                     else if (iM < iA)
287                     {
288                         // M(i,j) exists but not A(i,j)
289                         GB_NEXT (M) ;
290                     }
291                     else
292                     {
293                         // both A(i,j) and M(i,j) exist
294                         if (GB_mcast (Mx, pM, msize)) GB_PHASE1_ACTION ;
295                         GB_NEXT (A) ;
296                         GB_NEXT (M) ;
297                     }
298                 }
299             }
300         }
301 
302         GB_PHASE1_TASK_WRAPUP ;
303     }
304 
305     //--------------------------------------------------------------------------
306     // phase 2: insert pending tuples
307     //--------------------------------------------------------------------------
308 
309     GB_PENDING_CUMSUM ;
310     zorig = C->nzombies ;
311 
312     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
313         reduction(&&:pending_sorted)
314     for (taskid = 0 ; taskid < ntasks ; taskid++)
315     {
316 
317         //----------------------------------------------------------------------
318         // get the task descriptor
319         //----------------------------------------------------------------------
320 
321         GB_GET_TASK_DESCRIPTOR_PHASE2 ;
322 
323         //----------------------------------------------------------------------
324         // compute all vectors in this task
325         //----------------------------------------------------------------------
326 
327         for (int64_t k = kfirst ; k <= klast ; k++)
328         {
329 
330             //------------------------------------------------------------------
331             // get A(:,j) and M(:,j)
332             //------------------------------------------------------------------
333 
334             int64_t j = GBH (Zh_shallow, k) ;
335             GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
336             GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
337 
338             //------------------------------------------------------------------
339             // quick checks for empty intersection of A(:,j) and M(:,j)
340             //------------------------------------------------------------------
341 
342             int64_t ajnz = pA_end - pA ;
343             int64_t mjnz = pM_end - pM ;
344             if (ajnz == 0 || mjnz == 0) continue ;
345             int64_t iA_first = GBI (Ai, pA, Avlen) ;
346             int64_t iA_last  = GBI (Ai, pA_end-1, Avlen) ;
347             int64_t iM_first = GBI (Mi, pM, Mvlen) ;
348             int64_t iM_last  = GBI (Mi, pM_end-1, Mvlen) ;
349             if (iA_last < iM_first || iM_last < iA_first) continue ;
350             int64_t pM_start = pM ;
351 
352             //------------------------------------------------------------------
353             // get jC, the corresponding vector of C
354             //------------------------------------------------------------------
355 
356             GB_GET_jC ;
357             bool cjdense = (pC_end - pC_start == Cvlen) ;
358             if (cjdense) continue ;
359 
360             //------------------------------------------------------------------
361             // C(I,jC)<M(:,j)> += A(:,j) ; no S
362             //------------------------------------------------------------------
363 
364             if (ajnz > 32 * mjnz)
365             {
366 
367                 //--------------------------------------------------------------
368                 // A(:,j) is much denser than M(:,j)
369                 //--------------------------------------------------------------
370 
371                 for ( ; pM < pM_end ; pM++)
372                 {
373                     if (GB_mcast (Mx, pM, msize))
374                     {
375                         int64_t iA = GBI (Mi, pM, Mvlen) ;
376                         // find iA in A(:,j)
377                         int64_t pright = pA_end - 1 ;
378                         bool found ;
379                         // FUTURE::: exploit dense A(:,j)
380                         GB_BINARY_SEARCH (iA, Ai, pA, pright, found) ;
381                         if (found) GB_PHASE2_ACTION ;
382                     }
383                 }
384 
385             }
386             else if (mjnz > 32 * ajnz)
387             {
388 
389                 //--------------------------------------------------------------
390                 // M(:,j) is much denser than A(:,j)
391                 //--------------------------------------------------------------
392 
393                 // FUTURE::: exploit dense mask
394                 bool mjdense = false ;
395 
396                 for ( ; pA < pA_end ; pA++)
397                 {
398                     int64_t iA = GBI (Ai, pA, Avlen) ;
399                     GB_MIJ_BINARY_SEARCH_OR_DENSE_LOOKUP (iA) ;
400                     if (mij) GB_PHASE2_ACTION ;
401                 }
402 
403             }
404             else
405             {
406 
407                 //----------------------------------------------------------
408                 // A(:,j) and M(:,j) have about the same # of entries
409                 //----------------------------------------------------------
410 
411                 // linear-time scan of A(:,j) and M(:,j)
412 
413                 while (pA < pA_end && pM < pM_end)
414                 {
415                     int64_t iA = GBI (Ai, pA, Avlen) ;
416                     int64_t iM = GBI (Mi, pM, Mvlen) ;
417                     if (iA < iM)
418                     {
419                         // A(i,j) exists but not M(i,j)
420                         GB_NEXT (A) ;
421                     }
422                     else if (iM < iA)
423                     {
424                         // M(i,j) exists but not A(i,j)
425                         GB_NEXT (M) ;
426                     }
427                     else
428                     {
429                         // both A(i,j) and M(i,j) exist
430                         if (GB_mcast (Mx, pM, msize)) GB_PHASE2_ACTION ;
431                         GB_NEXT (A) ;
432                         GB_NEXT (M) ;
433                     }
434                 }
435             }
436         }
437 
438         GB_PHASE2_TASK_WRAPUP ;
439     }
440 
441     //--------------------------------------------------------------------------
442     // finalize the matrix and return result
443     //--------------------------------------------------------------------------
444 
445     GB_SUBASSIGN_WRAPUP ;
446 }
447 
448