1 //------------------------------------------------------------------------------
2 // GB_subassign_05: C(I,J)<M> = scalar ; no S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 05: C(I,J)<M> = scalar ; no S
11 
12 // M:           present
13 // Mask_comp:   false
14 // C_replace:   false
15 // accum:       NULL
16 // A:           scalar
17 // S:           none
18 
19 // C: not bitmap
20 // M: any sparsity
21 
22 #include "GB_subassign_methods.h"
23 
GB_subassign_05(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const void * scalar,const GrB_Type atype,GB_Context Context)24 GrB_Info GB_subassign_05
25 (
26     GrB_Matrix C,
27     // input:
28     const GrB_Index *I,
29     const int64_t nI,
30     const int Ikind,
31     const int64_t Icolon [3],
32     const GrB_Index *J,
33     const int64_t nJ,
34     const int Jkind,
35     const int64_t Jcolon [3],
36     const GrB_Matrix M,
37     const bool Mask_struct,
38     const void *scalar,
39     const GrB_Type atype,
40     GB_Context Context
41 )
42 {
43 
44     //--------------------------------------------------------------------------
45     // check inputs
46     //--------------------------------------------------------------------------
47 
48     ASSERT (!GB_IS_BITMAP (C)) ;
49     ASSERT (!GB_aliased (C, M)) ;   // NO ALIAS of C==M
50 
51     //--------------------------------------------------------------------------
52     // get inputs
53     //--------------------------------------------------------------------------
54 
55     GB_EMPTY_TASKLIST ;
56     GB_MATRIX_WAIT_IF_JUMBLED (C) ;
57     GB_MATRIX_WAIT_IF_JUMBLED (M) ;
58 
59     GB_GET_C ;      // C must not be bitmap
60     int64_t zorig = C->nzombies ;
61     const int64_t *restrict Ch = C->h ;
62     const int64_t *restrict Cp = C->p ;
63     const bool C_is_hyper = (Ch != NULL) ;
64     const int64_t Cnvec = C->nvec ;
65     GB_GET_MASK ;
66     GB_GET_SCALAR ;
67     GrB_BinaryOp accum = NULL ;
68 
69     //--------------------------------------------------------------------------
70     // Method 05: C(I,J)<M> = scalar ; no S
71     //--------------------------------------------------------------------------
72 
73     // Time: Close to Optimal:  the method must iterate over all entries in M,
74     // so the time is Omega(nnz(M)).  For each entry M(i,j)=1, the
75     // corresponding entry in C must be found and updated (inserted or
76     // modified).  This method does this with a binary search of C(:,jC) or a
77     // direct lookup if C(:,jC) is dense.  The time is thus O(nnz(M)*log(n)) in
78     // the worst case, usually less than that since C(:,jC) often has O(1)
79     // entries.  An additional time of O(|J|*log(Cnvec)) is added if C is
80     // hypersparse.  There is no equivalent method that computes
81     // C(I,J)<M>=scalar using the matrix S.
82 
83     // Method 05 and Method 07 are very similar.  Also compare with Method 06n.
84 
85     //--------------------------------------------------------------------------
86     // Parallel: slice M into coarse/fine tasks (Method 05, 06n, 07)
87     //--------------------------------------------------------------------------
88 
89     GB_SUBASSIGN_ONE_SLICE (M) ;    // M cannot be jumbled
90 
91     //--------------------------------------------------------------------------
92     // phase 1: undelete zombies, update entries, and count pending tuples
93     //--------------------------------------------------------------------------
94 
95     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
96         reduction(+:nzombies)
97     for (taskid = 0 ; taskid < ntasks ; taskid++)
98     {
99 
100         //----------------------------------------------------------------------
101         // get the task descriptor
102         //----------------------------------------------------------------------
103 
104         GB_GET_TASK_DESCRIPTOR_PHASE1 ;
105 
106         //----------------------------------------------------------------------
107         // compute all vectors in this task
108         //----------------------------------------------------------------------
109 
110         for (int64_t k = kfirst ; k <= klast ; k++)
111         {
112 
113             //------------------------------------------------------------------
114             // get j, the kth vector of M
115             //------------------------------------------------------------------
116 
117             int64_t j = GBH (Mh, k) ;
118             GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
119             int64_t mjnz = pM_end - pM ;
120             if (mjnz == 0) continue ;
121 
122             //------------------------------------------------------------------
123             // get jC, the corresponding vector of C
124             //------------------------------------------------------------------
125 
126             GB_GET_jC ;
127             int64_t cjnz = pC_end - pC_start ;
128             bool cjdense = (cjnz == Cvlen) ;
129 
130             //------------------------------------------------------------------
131             // C(I,jC)<M(:,j)> = scalar ; no S
132             //------------------------------------------------------------------
133 
134             if (cjdense)
135             {
136 
137                 //--------------------------------------------------------------
138                 // C(:,jC) is dense so the binary search of C is not needed
139                 //--------------------------------------------------------------
140 
141                 for ( ; pM < pM_end ; pM++)
142                 {
143 
144                     //----------------------------------------------------------
145                     // update C(iC,jC), but only if M(iA,j) allows it
146                     //----------------------------------------------------------
147 
148                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
149                     if (mij)
150                     {
151                         int64_t iA = GBI (Mi, pM, Mvlen) ;
152                         GB_iC_DENSE_LOOKUP ;
153 
154                         // ----[C A 1] or [X A 1]-------------------------------
155                         // [C A 1]: action: ( =A ): copy A into C, no accum
156                         // [X A 1]: action: ( undelete ): zombie lives
157                         GB_noaccum_C_A_1_scalar ;
158                     }
159                 }
160 
161             }
162             else
163             {
164 
165                 //--------------------------------------------------------------
166                 // C(:,jC) is sparse; use binary search for C
167                 //--------------------------------------------------------------
168 
169                 for ( ; pM < pM_end ; pM++)
170                 {
171 
172                     //----------------------------------------------------------
173                     // update C(iC,jC), but only if M(iA,j) allows it
174                     //----------------------------------------------------------
175 
176                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
177                     if (mij)
178                     {
179                         int64_t iA = GBI (Mi, pM, Mvlen) ;
180 
181                         // find C(iC,jC) in C(:,jC)
182                         GB_iC_BINARY_SEARCH ;
183                         if (cij_found)
184                         {
185                             // ----[C A 1] or [X A 1]---------------------------
186                             // [C A 1]: action: ( =A ): copy A into C, no accum
187                             // [X A 1]: action: ( undelete ): zombie lives
188                             GB_noaccum_C_A_1_scalar ;
189                         }
190                         else
191                         {
192                             // ----[. A 1]--------------------------------------
193                             // [. A 1]: action: ( insert )
194                             task_pending++ ;
195                         }
196                     }
197                 }
198             }
199         }
200 
201         GB_PHASE1_TASK_WRAPUP ;
202     }
203 
204     //--------------------------------------------------------------------------
205     // phase 2: insert pending tuples
206     //--------------------------------------------------------------------------
207 
208     GB_PENDING_CUMSUM ;
209     zorig = C->nzombies ;
210 
211     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
212         reduction(&&:pending_sorted)
213     for (taskid = 0 ; taskid < ntasks ; taskid++)
214     {
215 
216         //----------------------------------------------------------------------
217         // get the task descriptor
218         //----------------------------------------------------------------------
219 
220         GB_GET_TASK_DESCRIPTOR_PHASE2 ;
221 
222         //----------------------------------------------------------------------
223         // compute all vectors in this task
224         //----------------------------------------------------------------------
225 
226         for (int64_t k = kfirst ; k <= klast ; k++)
227         {
228 
229             //------------------------------------------------------------------
230             // get j, the kth vector of M
231             //------------------------------------------------------------------
232 
233             int64_t j = GBH (Mh, k) ;
234             GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
235             int64_t mjnz = pM_end - pM ;
236             if (mjnz == 0) continue ;
237 
238             //------------------------------------------------------------------
239             // get jC, the corresponding vector of C
240             //------------------------------------------------------------------
241 
242             GB_GET_jC ;
243             bool cjdense = ((pC_end - pC_start) == Cvlen) ;
244 
245             //------------------------------------------------------------------
246             // C(I,jC)<M(:,j)> = scalar ; no S
247             //------------------------------------------------------------------
248 
249             if (!cjdense)
250             {
251 
252                 //--------------------------------------------------------------
253                 // C(:,jC) is sparse; use binary search for C
254                 //--------------------------------------------------------------
255 
256                 for ( ; pM < pM_end ; pM++)
257                 {
258 
259                     //----------------------------------------------------------
260                     // update C(iC,jC), but only if M(iA,j) allows it
261                     //----------------------------------------------------------
262 
263                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ;
264                     if (mij)
265                     {
266                         int64_t iA = GBI (Mi, pM, Mvlen) ;
267 
268                         // find C(iC,jC) in C(:,jC)
269                         GB_iC_BINARY_SEARCH ;
270                         if (!cij_found)
271                         {
272                             // ----[. A 1]--------------------------------------
273                             // [. A 1]: action: ( insert )
274                             GB_PENDING_INSERT (scalar) ;
275                         }
276                     }
277                 }
278             }
279         }
280 
281         GB_PHASE2_TASK_WRAPUP ;
282     }
283 
284     //--------------------------------------------------------------------------
285     // finalize the matrix and return result
286     //--------------------------------------------------------------------------
287 
288     GB_SUBASSIGN_WRAPUP ;
289 }
290 
291