1 //------------------------------------------------------------------------------
2 // GB_subassign_06n: C(I,J)<M> = A ; no S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 06n: C(I,J)<M> = A ; no S
11
12 // M: present
13 // Mask_comp: false
14 // C_replace: false
15 // accum: NULL
16 // A: matrix
17 // S: none (see also GB_subassign_06s)
18
19 // FULL: if A and C are dense, then C remains dense.
20
21 // If A is sparse and C dense, C will likely become sparse, except if M(i,j)=0
22 // wherever A(i,j) is not present. So if M==A is aliased and A is sparse, then
23 // C remains dense. Need C(I,J)<A,struct>=A kernel. Then in that case, if C
24 // is dense it remains dense, even if A is sparse. If that change is made,
25 // this kernel can start with converting C to sparse if A is sparse.
26
27 // C is not bitmap: GB_bitmap_assign is used if C is bitmap.
28 // M and A are not bitmap: 06s is used instead, if M or A are bitmap.
29
30 #include "GB_subassign_methods.h"
31
GB_subassign_06n(GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_Matrix A,GB_Context Context)32 GrB_Info GB_subassign_06n
33 (
34 GrB_Matrix C,
35 // input:
36 const GrB_Index *I,
37 const int64_t nI,
38 const int Ikind,
39 const int64_t Icolon [3],
40 const GrB_Index *J,
41 const int64_t nJ,
42 const int Jkind,
43 const int64_t Jcolon [3],
44 const GrB_Matrix M,
45 const bool Mask_struct,
46 const GrB_Matrix A,
47 GB_Context Context
48 )
49 {
50
51 //--------------------------------------------------------------------------
52 // check inputs
53 //--------------------------------------------------------------------------
54
55 ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
56 ASSERT (!GB_IS_BITMAP (M)) ; // Method 06n is not used for M bitmap
57 ASSERT (!GB_IS_BITMAP (A)) ; // Method 06n is not used for A bitmap
58 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
59 ASSERT (!GB_aliased (C, A)) ; // NO ALIAS of C==A
60
61 ASSERT_MATRIX_OK (C, "C input for 06n", GB0) ;
62 ASSERT_MATRIX_OK (M, "M input for 06n", GB0) ;
63 ASSERT_MATRIX_OK (A, "A input for 06n", GB0) ;
64
65 //--------------------------------------------------------------------------
66 // get inputs
67 //--------------------------------------------------------------------------
68
69 GB_EMPTY_TASKLIST ;
70 GB_MATRIX_WAIT_IF_JUMBLED (C) ;
71 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
72 GB_MATRIX_WAIT_IF_JUMBLED (A) ;
73
74 GB_GET_C ; // C must not be bitmap
75 int64_t zorig = C->nzombies ;
76 const int64_t Cnvec = C->nvec ;
77 const int64_t *restrict Ch = C->h ;
78 const int64_t *restrict Cp = C->p ;
79 const bool C_is_hyper = (Ch != NULL) ;
80 GB_GET_MASK ;
81 GB_GET_A ;
82 const int64_t *restrict Ah = A->h ;
83 const int64_t Anvec = A->nvec ;
84 const bool A_is_hyper = (Ah != NULL) ;
85 GrB_BinaryOp accum = NULL ;
86
87 //--------------------------------------------------------------------------
88 // Method 06n: C(I,J)<M> = A ; no S
89 //--------------------------------------------------------------------------
90
91 // Time: O(nnz(M)*(log(a)+log(c)), where a and c are the # of entries in a
92 // vector of A and C, respectively. The entries in the intersection of M
93 // (where the entries are true) and the matrix addition C(I,J)+A must be
94 // examined. This method scans M, and searches for entries in A and C(I,J)
95 // using two binary searches. If M is very dense, this method can be
96 // slower than Method 06s. This method is selected if nnz (A) >= nnz (M).
97
98 // Compare with Methods 05 and 07, which use a similar algorithmic outline
99 // and parallelization strategy.
100
101 //--------------------------------------------------------------------------
102 // Parallel: slice M into coarse/fine tasks (Method 05, 06n, 07)
103 //--------------------------------------------------------------------------
104
105 GB_SUBASSIGN_ONE_SLICE (M) ; // M cannot be jumbled
106
107 //--------------------------------------------------------------------------
108 // phase 1: create zombies, update entries, and count pending tuples
109 //--------------------------------------------------------------------------
110
111 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
112 reduction(+:nzombies)
113 for (taskid = 0 ; taskid < ntasks ; taskid++)
114 {
115
116 //----------------------------------------------------------------------
117 // get the task descriptor
118 //----------------------------------------------------------------------
119
120 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
121
122 //----------------------------------------------------------------------
123 // compute all vectors in this task
124 //----------------------------------------------------------------------
125
126 for (int64_t k = kfirst ; k <= klast ; k++)
127 {
128
129 //------------------------------------------------------------------
130 // get j, the kth vector of M
131 //------------------------------------------------------------------
132
133 int64_t j = GBH (Mh, k) ;
134 GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
135 int64_t mjnz = pM_end - pM ;
136 if (mjnz == 0) continue ;
137
138 //------------------------------------------------------------------
139 // get A(:,j)
140 //------------------------------------------------------------------
141
142 int64_t pA, pA_end ;
143 GB_VECTOR_LOOKUP (pA, pA_end, A, j) ;
144 int64_t ajnz = pA_end - pA ;
145 bool ajdense = (ajnz == Avlen) ;
146 int64_t pA_start = pA ;
147
148 //------------------------------------------------------------------
149 // get jC, the corresponding vector of C
150 //------------------------------------------------------------------
151
152 GB_GET_jC ;
153 int64_t cjnz = pC_end - pC_start ;
154 if (cjnz == 0 && ajnz == 0) continue ;
155 bool cjdense = (cjnz == Cvlen) ;
156
157 //------------------------------------------------------------------
158 // C(I,jC)<M(:,j)> = A(:,j) ; no S
159 //------------------------------------------------------------------
160
161 if (cjdense && ajdense)
162 {
163
164 //--------------------------------------------------------------
165 // C(:,jC) and A(:,j) are both dense
166 //--------------------------------------------------------------
167
168 for ( ; pM < pM_end ; pM++)
169 {
170
171 //----------------------------------------------------------
172 // update C(iC,jC), but only if M(iA,j) allows it
173 //----------------------------------------------------------
174
175 if (GB_mcast (Mx, pM, msize))
176 {
177 int64_t iA = GBI (Mi, pM, Mvlen) ;
178 GB_iC_DENSE_LOOKUP ;
179
180 // find iA in A(:,j)
181 // A(:,j) is dense; no need for binary search
182 pA = pA_start + iA ;
183 ASSERT (GBI (Ai, pA, Avlen) == iA) ;
184 // ----[C A 1] or [X A 1]-----------------------
185 // [C A 1]: action: ( =A ): copy A to C, no acc
186 // [X A 1]: action: ( undelete ): zombie lives
187 GB_noaccum_C_A_1_matrix ;
188 }
189 }
190
191 }
192 else if (cjdense)
193 {
194
195 //--------------------------------------------------------------
196 // C(:,jC) is dense, A(:,j) is sparse
197 //--------------------------------------------------------------
198
199 for ( ; pM < pM_end ; pM++)
200 {
201
202 //----------------------------------------------------------
203 // update C(iC,jC), but only if M(iA,j) allows it
204 //----------------------------------------------------------
205
206 if (GB_mcast (Mx, pM, msize))
207 {
208 int64_t iA = GBI (Mi, pM, Mvlen) ;
209 GB_iC_DENSE_LOOKUP ;
210
211 // find iA in A(:,j)
212 bool aij_found ;
213 int64_t apright = pA_end - 1 ;
214 GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
215
216 if (!aij_found)
217 {
218 // C (iC,jC) is present but A (i,j) is not
219 // ----[C . 1] or [X . 1]---------------------------
220 // [C . 1]: action: ( delete ): becomes zombie
221 // [X . 1]: action: ( X ): still zombie
222 GB_DELETE_ENTRY ;
223 }
224 else
225 {
226 // ----[C A 1] or [X A 1]---------------------------
227 // [C A 1]: action: ( =A ): copy A to C, no accum
228 // [X A 1]: action: ( undelete ): zombie lives
229 GB_noaccum_C_A_1_matrix ;
230 }
231 }
232 }
233
234 }
235 else if (ajdense)
236 {
237
238 //--------------------------------------------------------------
239 // C(:,jC) is sparse, A(:,j) is dense
240 //--------------------------------------------------------------
241
242 for ( ; pM < pM_end ; pM++)
243 {
244
245 //----------------------------------------------------------
246 // update C(iC,jC), but only if M(iA,j) allows it
247 //----------------------------------------------------------
248
249 if (GB_mcast (Mx, pM, msize))
250 {
251 int64_t iA = GBI (Mi, pM, Mvlen) ;
252
253 // find C(iC,jC) in C(:,jC)
254 GB_iC_BINARY_SEARCH ;
255
256 // lookup iA in A(:,j)
257 pA = pA_start + iA ;
258 ASSERT (GBI (Ai, pA, Avlen) == iA) ;
259
260 if (cij_found)
261 {
262 // ----[C A 1] or [X A 1]---------------------------
263 // [C A 1]: action: ( =A ): copy A into C, no accum
264 // [X A 1]: action: ( undelete ): zombie lives
265 GB_noaccum_C_A_1_matrix ;
266 }
267 else
268 {
269 // C (iC,jC) is not present, A (i,j) is present
270 // ----[. A 1]--------------------------------------
271 // [. A 1]: action: ( insert )
272 task_pending++ ;
273 }
274 }
275 }
276
277 }
278 else
279 {
280
281 //--------------------------------------------------------------
282 // C(:,jC) and A(:,j) are both sparse
283 //--------------------------------------------------------------
284
285 for ( ; pM < pM_end ; pM++)
286 {
287
288 //----------------------------------------------------------
289 // update C(iC,jC), but only if M(iA,j) allows it
290 //----------------------------------------------------------
291
292 if (GB_mcast (Mx, pM, msize))
293 {
294 int64_t iA = GBI (Mi, pM, Mvlen) ;
295
296 // find C(iC,jC) in C(:,jC)
297 GB_iC_BINARY_SEARCH ;
298
299 // find iA in A(:,j)
300 bool aij_found ;
301 int64_t apright = pA_end - 1 ;
302 GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
303
304 if (cij_found && aij_found)
305 {
306 // ----[C A 1] or [X A 1]---------------------------
307 // [C A 1]: action: ( =A ): copy A into C, no accum
308 // [X A 1]: action: ( undelete ): zombie lives
309 GB_noaccum_C_A_1_matrix ;
310 }
311 else if (!cij_found && aij_found)
312 {
313 // C (iC,jC) is not present, A (i,j) is present
314 // ----[. A 1]--------------------------------------
315 // [. A 1]: action: ( insert )
316 task_pending++ ;
317 }
318 else if (cij_found && !aij_found)
319 {
320 // C (iC,jC) is present but A (i,j) is not
321 // ----[C . 1] or [X . 1]---------------------------
322 // [C . 1]: action: ( delete ): becomes zombie
323 // [X . 1]: action: ( X ): still zombie
324 GB_DELETE_ENTRY ;
325 }
326 }
327 }
328 }
329 }
330
331 GB_PHASE1_TASK_WRAPUP ;
332 }
333
334 //--------------------------------------------------------------------------
335 // phase 2: insert pending tuples
336 //--------------------------------------------------------------------------
337
338 GB_PENDING_CUMSUM ;
339 zorig = C->nzombies ;
340
341 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
342 reduction(&&:pending_sorted)
343 for (taskid = 0 ; taskid < ntasks ; taskid++)
344 {
345
346 //----------------------------------------------------------------------
347 // get the task descriptor
348 //----------------------------------------------------------------------
349
350 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
351
352 //----------------------------------------------------------------------
353 // compute all vectors in this task
354 //----------------------------------------------------------------------
355
356 for (int64_t k = kfirst ; k <= klast ; k++)
357 {
358
359 //------------------------------------------------------------------
360 // get j, the kth vector of M
361 //------------------------------------------------------------------
362
363 int64_t j = GBH (Mh, k) ;
364 GB_GET_VECTOR (pM, pM_end, pA, pA_end, Mp, k, Mvlen) ;
365 int64_t mjnz = pM_end - pM ;
366 if (mjnz == 0) continue ;
367
368 //------------------------------------------------------------------
369 // get A(:,j)
370 //------------------------------------------------------------------
371
372 int64_t pA, pA_end ;
373 GB_VECTOR_LOOKUP (pA, pA_end, A, j) ;
374 int64_t ajnz = pA_end - pA ;
375 if (ajnz == 0) continue ;
376 bool ajdense = (ajnz == Avlen) ;
377 int64_t pA_start = pA ;
378
379 //------------------------------------------------------------------
380 // get jC, the corresponding vector of C
381 //------------------------------------------------------------------
382
383 GB_GET_jC ;
384 bool cjdense = ((pC_end - pC_start) == Cvlen) ;
385
386 //------------------------------------------------------------------
387 // C(I,jC)<M(:,j)> = A(:,j)
388 //------------------------------------------------------------------
389
390 if (!cjdense)
391 {
392
393 //--------------------------------------------------------------
394 // C(:,jC) is sparse; use binary search for C
395 //--------------------------------------------------------------
396
397 for ( ; pM < pM_end ; pM++)
398 {
399
400 //----------------------------------------------------------
401 // update C(iC,jC), but only if M(iA,j) allows it
402 //----------------------------------------------------------
403
404 if (GB_mcast (Mx, pM, msize))
405 {
406 int64_t iA = GBI (Mi, pM, Mvlen) ;
407
408 // find iA in A(:,j)
409 if (ajdense)
410 {
411 // A(:,j) is dense; no need for binary search
412 pA = pA_start + iA ;
413 ASSERT (GBI (Ai, pA, Avlen) == iA) ;
414 }
415 else
416 {
417 // A(:,j) is sparse; use binary search
418 int64_t apright = pA_end - 1 ;
419 bool aij_found ;
420 GB_BINARY_SEARCH (iA, Ai, pA, apright, aij_found) ;
421 if (!aij_found) continue ;
422 }
423
424 // find C(iC,jC) in C(:,jC)
425 GB_iC_BINARY_SEARCH ;
426 if (!cij_found)
427 {
428 // C (iC,jC) is not present, A (i,j) is present
429 // ----[. A 1]--------------------------------------
430 // [. A 1]: action: ( insert )
431 GB_PENDING_INSERT (Ax +(pA*asize)) ;
432 }
433 }
434 }
435 }
436 }
437
438 GB_PHASE2_TASK_WRAPUP ;
439 }
440
441 //--------------------------------------------------------------------------
442 // finalize the matrix and return result
443 //--------------------------------------------------------------------------
444
445 GB_SUBASSIGN_WRAPUP ;
446 }
447
448