1 //------------------------------------------------------------------------------
2 // GB_subassign_07: C(I,J)<M> += scalar ; no S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 07: C(I,J)<M> += scalar ; no S
11 
12 // M:           present
13 // Mask_comp:   false
14 // C_replace:   false
15 // accum:       present
16 // A:           scalar
17 // S:           none
18 
19 // C: not bitmap
20 // M: any sparsity
21 
22 #include "GB_subassign_methods.h"
23 
GB_subassign_07(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_BinaryOp accum,const void * scalar,const GrB_Type atype,GB_Context Context)24 GrB_Info GB_subassign_07
25 (
26     GrB_Matrix C,
27     // input:
28     const GrB_Index *I,
29     const int64_t nI,
30     const int Ikind,
31     const int64_t Icolon [3],
32     const GrB_Index *J,
33     const int64_t nJ,
34     const int Jkind,
35     const int64_t Jcolon [3],
36     const GrB_Matrix M,
37     const bool Mask_struct,
38     const GrB_BinaryOp accum,
39     const void *scalar,
40     const GrB_Type atype,
41     GB_Context Context
42 )
43 {
44 
45     //--------------------------------------------------------------------------
46     // check inputs
47     //--------------------------------------------------------------------------
48 
49     ASSERT (!GB_IS_BITMAP (C)) ;
50     ASSERT (!GB_aliased (C, M)) ;   // NO ALIAS of C==M
51 
52     //--------------------------------------------------------------------------
53     // get inputs
54     //--------------------------------------------------------------------------
55 
56     GB_EMPTY_TASKLIST ;
57     GB_MATRIX_WAIT_IF_JUMBLED (C) ;
58     GB_MATRIX_WAIT_IF_JUMBLED (M) ;
59 
60     GB_GET_C ;      // C must not be bitmap
61     int64_t zorig = C->nzombies ;
62     const int64_t *restrict Ch = C->h ;
63     const int64_t *restrict Cp = C->p ;
64     const bool C_is_hyper = (Ch != NULL) ;
65     const int64_t Cnvec = C->nvec ;
66     GB_GET_MASK ;
67     GB_GET_ACCUM_SCALAR ;
68 
69     //--------------------------------------------------------------------------
70     // Method 07: C(I,J)<M> += scalar ; no S
71     //--------------------------------------------------------------------------
72 
73     // Time: Close to Optimal:  same as Method 05.
74 
75     // Method 05 and Method 07 are very similar.  Also compare with Method 06n.
76 
77     //--------------------------------------------------------------------------
78     // Parallel: slice M into coarse/fine tasks (Method 05, 06n, 07)
79     //--------------------------------------------------------------------------
80 
81     GB_SUBASSIGN_ONE_SLICE (M) ;    // M cannot be jumbled
82 
83     //--------------------------------------------------------------------------
84     // phase 1: undelete zombies, update entries, and count pending tuples
85     //--------------------------------------------------------------------------
86 
87     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
88         reduction(+:nzombies)
89     for (taskid = 0 ; taskid < ntasks ; taskid++)
90     {
91 
92         //----------------------------------------------------------------------
93         // get the task descriptor
94         //----------------------------------------------------------------------
95 
96         GB_GET_TASK_DESCRIPTOR_PHASE1 ;
97 
98         //----------------------------------------------------------------------
99         // compute all vectors in this task
100         //----------------------------------------------------------------------
101 
102         for (int64_t k = kfirst ; k <= klast ; k++)
103         {
104 
105             //------------------------------------------------------------------
106             // get j, the kth vector of M
107             //------------------------------------------------------------------
108 
109             int64_t j = GBH (Mh, k) ;
110             GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
111             int64_t mjnz = pM_end - pM ;
112             if (mjnz == 0) continue ;
113 
114             //------------------------------------------------------------------
115             // get jC, the corresponding vector of C
116             //------------------------------------------------------------------
117 
118             GB_GET_jC ;
119             int64_t cjnz = pC_end - pC_start ;
120             bool cjdense = (cjnz == Cvlen) ;
121 
122             //------------------------------------------------------------------
123             // C(I,jC)<M(:,j)> += scalar ; no S
124             //------------------------------------------------------------------
125 
126             if (cjdense)
127             {
128 
129                 //--------------------------------------------------------------
130                 // C(:,jC) is dense so the binary search of C is not needed
131                 //--------------------------------------------------------------
132 
133                 for ( ; pM < pM_end ; pM++)
134                 {
135 
136                     //----------------------------------------------------------
137                     // update C(iC,jC), but only if M(iA,j) allows it
138                     //----------------------------------------------------------
139 
140                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
141                     if (mij)
142                     {
143                         int64_t iA = GBI (Mi, pM, Mvlen) ;
144                         GB_iC_DENSE_LOOKUP ;
145 
146                         // ----[C A 1] or [X A 1]-------------------------------
147                         // [C A 1]: action: ( =C+A ): apply accum
148                         // [X A 1]: action: ( undelete ): zombie lives
149                         GB_withaccum_C_A_1_scalar ;
150                     }
151                 }
152 
153             }
154             else
155             {
156 
157                 //--------------------------------------------------------------
158                 // C(:,jC) is sparse; use binary search for C
159                 //--------------------------------------------------------------
160 
161                 for ( ; pM < pM_end ; pM++)
162                 {
163 
164                     //----------------------------------------------------------
165                     // update C(iC,jC), but only if M(iA,j) allows it
166                     //----------------------------------------------------------
167 
168                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
169                     if (mij)
170                     {
171                         int64_t iA = GBI (Mi, pM, Mvlen) ;
172 
173                         // find C(iC,jC) in C(:,jC)
174                         GB_iC_BINARY_SEARCH ;
175                         if (cij_found)
176                         {
177                             // ----[C A 1] or [X A 1]---------------------------
178                             // [C A 1]: action: ( =C+A ): apply accum
179                             // [X A 1]: action: ( undelete ): zombie lives
180                             GB_withaccum_C_A_1_scalar ;
181                         }
182                         else
183                         {
184                             // ----[. A 1]--------------------------------------
185                             // [. A 1]: action: ( insert )
186                             task_pending++ ;
187                         }
188                     }
189                 }
190             }
191         }
192 
193         GB_PHASE1_TASK_WRAPUP ;
194     }
195 
196     //--------------------------------------------------------------------------
197     // phase 2: insert pending tuples
198     //--------------------------------------------------------------------------
199 
200     GB_PENDING_CUMSUM ;
201     zorig = C->nzombies ;
202 
203     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
204         reduction(&&:pending_sorted)
205     for (taskid = 0 ; taskid < ntasks ; taskid++)
206     {
207 
208         //----------------------------------------------------------------------
209         // get the task descriptor
210         //----------------------------------------------------------------------
211 
212         GB_GET_TASK_DESCRIPTOR_PHASE2 ;
213 
214         //----------------------------------------------------------------------
215         // compute all vectors in this task
216         //----------------------------------------------------------------------
217 
218         for (int64_t k = kfirst ; k <= klast ; k++)
219         {
220 
221             //------------------------------------------------------------------
222             // get j, the kth vector of M
223             //------------------------------------------------------------------
224 
225             int64_t j = GBH (Mh, k) ;
226             GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
227             int64_t mjnz = pM_end - pM ;
228             if (mjnz == 0) continue ;
229 
230             //------------------------------------------------------------------
231             // get jC, the corresponding vector of C
232             //------------------------------------------------------------------
233 
234             GB_GET_jC ;
235             bool cjdense = ((pC_end - pC_start) == Cvlen) ;
236 
237             //------------------------------------------------------------------
238             // C(I,jC)<M(:,j)> += scalar ; no S
239             //------------------------------------------------------------------
240 
241             if (!cjdense)
242             {
243 
244                 //--------------------------------------------------------------
245                 // C(:,jC) is sparse; use binary search for C
246                 //--------------------------------------------------------------
247 
248                 for ( ; pM < pM_end ; pM++)
249                 {
250 
251                     //----------------------------------------------------------
252                     // update C(iC,jC), but only if M(iA,j) allows it
253                     //----------------------------------------------------------
254 
255                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
256                     if (mij)
257                     {
258                         int64_t iA = GBI (Mi, pM, Mvlen) ;
259 
260                         // find C(iC,jC) in C(:,jC)
261                         GB_iC_BINARY_SEARCH ;
262                         if (!cij_found)
263                         {
264                             // ----[. A 1]--------------------------------------
265                             // [. A 1]: action: ( insert )
266                             GB_PENDING_INSERT (scalar) ;
267                         }
268                     }
269                 }
270             }
271         }
272 
273         GB_PHASE2_TASK_WRAPUP ;
274     }
275 
276     //--------------------------------------------------------------------------
277     // finalize the matrix and return result
278     //--------------------------------------------------------------------------
279 
280     GB_SUBASSIGN_WRAPUP ;
281 }
282 
283