1 //------------------------------------------------------------------------------
2 // GB_subassign_11: C(I,J)<M,repl> += scalar ; using S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 11: C(I,J)<M,repl> += scalar ; using S
11
12 // M: present
13 // Mask_comp: false
14 // C_replace: true
15 // accum: present
16 // A: scalar
17 // S: constructed
18
19 // C, M: not bitmap
20
21 #include "GB_unused.h"
22 #include "GB_subassign_methods.h"
23
GB_subassign_11(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix M,const bool Mask_struct,const GrB_BinaryOp accum,const void * scalar,const GrB_Type atype,GB_Context Context)24 GrB_Info GB_subassign_11
25 (
26 GrB_Matrix C,
27 // input:
28 const GrB_Index *I,
29 const int64_t ni,
30 const int64_t nI,
31 const int Ikind,
32 const int64_t Icolon [3],
33 const GrB_Index *J,
34 const int64_t nj,
35 const int64_t nJ,
36 const int Jkind,
37 const int64_t Jcolon [3],
38 const GrB_Matrix M,
39 const bool Mask_struct,
40 const GrB_BinaryOp accum,
41 const void *scalar,
42 const GrB_Type atype,
43 GB_Context Context
44 )
45 {
46
47 //--------------------------------------------------------------------------
48 // check inputs
49 //--------------------------------------------------------------------------
50
51 ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
52 ASSERT (!GB_aliased (C, M)) ; // NO ALIAS of C==M
53
54 //--------------------------------------------------------------------------
55 // S = C(I,J)
56 //--------------------------------------------------------------------------
57
58 GB_EMPTY_TASKLIST ;
59 GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
60
61 //--------------------------------------------------------------------------
62 // get inputs
63 //--------------------------------------------------------------------------
64
65 GB_MATRIX_WAIT_IF_JUMBLED (M) ;
66
67 GB_GET_C ; // C must not be bitmap
68 GB_GET_MASK ;
69 GB_GET_ACCUM_SCALAR ;
70 GB_GET_S ;
71
72 //--------------------------------------------------------------------------
73 // Method 11: C(I,J)<M,repl> += scalar ; using S
74 //--------------------------------------------------------------------------
75
76 // Time: Optimal. All entries in M+S must be examined. All entries in S
77 // are modified: if M(i,j)=1 then S(i,j) is used to write to the
78 // corresponding entry in C. If M(i,j) is not present, or zero, then the
79 // entry in C is cleared (because of C_replace). If S(i,j) is not present,
80 // and M(i,j)=1, then the scalar is inserted into C. The only case that
81 // can be skipped is if neither S nor M is present. As a result, this
82 // method need not traverse all of IxJ. It can limit its traversal to the
83 // pattern of M+S.
84
85 // Method 09 and Method 11 are very similar.
86
87 //--------------------------------------------------------------------------
88 // Parallel: M+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
89 //--------------------------------------------------------------------------
90
91 if (M_is_bitmap)
92 {
93 // all of IxJ must be examined
94 GB_SUBASSIGN_IXJ_SLICE ;
95 }
96 else
97 {
98 // traverse all M+S
99 GB_SUBASSIGN_TWO_SLICE (M, S) ;
100 }
101
102 //--------------------------------------------------------------------------
103 // phase 1: create zombies, update entries, and count pending tuples
104 //--------------------------------------------------------------------------
105
106 if (M_is_bitmap)
107 {
108
109 //----------------------------------------------------------------------
110 // phase1: M is bitmap
111 //----------------------------------------------------------------------
112
113 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
114 reduction(+:nzombies)
115 for (taskid = 0 ; taskid < ntasks ; taskid++)
116 {
117
118 //------------------------------------------------------------------
119 // get the task descriptor
120 //------------------------------------------------------------------
121
122 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iM_start, iM_end) ;
123
124 //------------------------------------------------------------------
125 // compute all vectors in this task
126 //------------------------------------------------------------------
127
128 for (int64_t j = kfirst ; j <= klast ; j++)
129 {
130
131 //--------------------------------------------------------------
132 // get S(iM_start:iM_end,j)
133 //--------------------------------------------------------------
134
135 GB_GET_VECTOR_FOR_IXJ (S, iM_start) ;
136 int64_t pM_start = j * Mvlen ;
137
138 //--------------------------------------------------------------
139 // do a 2-way merge of S(iM_start:iM_end,j) and M(ditto,j)
140 //--------------------------------------------------------------
141
142 for (int64_t iM = iM_start ; iM < iM_end ; iM++)
143 {
144
145 int64_t pM = pM_start + iM ;
146 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iM) ;
147 bool mij = Mb [pM] && GB_mcast (Mx, pM, msize) ;
148
149 if (Sfound && !mij)
150 {
151 // S (i,j) is present but M (i,j) is false
152 // ----[C A 0] or [X A 0]-------------------------------
153 // [X A 0]: action: ( X ): still a zombie
154 // [C A 0]: C_repl: action: ( delete ): becomes zombie
155 GB_C_S_LOOKUP ;
156 GB_DELETE_ENTRY ;
157 GB_NEXT (S) ;
158 }
159 else if (!Sfound && mij)
160 {
161 // S (i,j) is not present, M (i,j) is true
162 // ----[. A 1]------------------------------------------
163 // [. A 1]: action: ( insert )
164 task_pending++ ;
165 }
166 else if (Sfound && mij)
167 {
168 // S (i,j) present and M (i,j) is true
169 GB_C_S_LOOKUP ;
170 // ----[C A 1] or [X A 1]-------------------------------
171 // [C A 1]: action: ( =C+A ): apply accum
172 // [X A 1]: action: ( undelete ): zombie lives
173 GB_withaccum_C_A_1_scalar ;
174 GB_NEXT (S) ;
175 }
176 }
177 }
178 GB_PHASE1_TASK_WRAPUP ;
179 }
180
181 }
182 else
183 {
184
185 //----------------------------------------------------------------------
186 // phase1: M is hypersparse, sparse, or full
187 //----------------------------------------------------------------------
188
189 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
190 reduction(+:nzombies)
191 for (taskid = 0 ; taskid < ntasks ; taskid++)
192 {
193
194 //------------------------------------------------------------------
195 // get the task descriptor
196 //------------------------------------------------------------------
197
198 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
199
200 //------------------------------------------------------------------
201 // compute all vectors in this task
202 //------------------------------------------------------------------
203
204 for (int64_t k = kfirst ; k <= klast ; k++)
205 {
206
207 //--------------------------------------------------------------
208 // get S(:,j) and M(:,j)
209 //--------------------------------------------------------------
210
211 int64_t j = GBH (Zh, k) ;
212 GB_GET_MAPPED (pM, pM_end, pA, pA_end, Mp, j, k, Z_to_X, Mvlen);
213 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
214
215 //--------------------------------------------------------------
216 // do a 2-way merge of S(:,j) and M(:,j)
217 //--------------------------------------------------------------
218
219 // jC = J [j] ; or J is a colon expression
220 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
221
222 // while both list S (:,j) and M (:,j) have entries
223 while (pS < pS_end && pM < pM_end)
224 {
225 int64_t iS = GBI (Si, pS, Svlen) ;
226 int64_t iM = GBI (Mi, pM, Mvlen) ;
227
228 if (iS < iM)
229 {
230 // S (i,j) is present but M (i,j) is not
231 // ----[C A 0] or [X A 0]-------------------------------
232 // [X A 0]: action: ( X ): still a zombie
233 // [C A 0]: C_repl: action: ( delete ): becomes zombie
234 GB_C_S_LOOKUP ;
235 GB_DELETE_ENTRY ;
236 GB_NEXT (S) ;
237 }
238 else if (iM < iS)
239 {
240 // S (i,j) is not present, M (i,j) is present
241 if (GB_mcast (Mx, pM, msize))
242 {
243 // ----[. A 1]--------------------------------------
244 // [. A 1]: action: ( insert )
245 task_pending++ ;
246 }
247 GB_NEXT (M) ;
248 }
249 else
250 {
251 // both S (i,j) and M (i,j) present
252 GB_C_S_LOOKUP ;
253 if (GB_mcast (Mx, pM, msize))
254 {
255 // ----[C A 1] or [X A 1]---------------------------
256 // [C A 1]: action: ( =C+A ): apply accum
257 // [X A 1]: action: ( undelete ): zombie lives
258 GB_withaccum_C_A_1_scalar ;
259 }
260 else
261 {
262 // ----[C A 0] or [X A 0]---------------------------
263 // [X A 0]: action: ( X ): still a zombie
264 // [C A 0]: C_repl: action: ( delete ): now zombie
265 GB_DELETE_ENTRY ;
266 }
267 GB_NEXT (S) ;
268 GB_NEXT (M) ;
269 }
270 }
271
272 // while list S (:,j) has entries. List M (:,j) exhausted.
273 while (pS < pS_end)
274 {
275 // S (i,j) is present but M (i,j) is not
276 // ----[C A 0] or [X A 0]-----------------------------------
277 // [X A 0]: action: ( X ): still a zombie
278 // [C A 0]: C_repl: action: ( delete ): becomes zombie
279 GB_C_S_LOOKUP ;
280 GB_DELETE_ENTRY ;
281 GB_NEXT (S) ;
282 }
283
284 // while list M (:,j) has entries. List S (:,j) exhausted.
285 while (pM < pM_end)
286 {
287 // S (i,j) is not present, M (i,j) is present
288 if (GB_mcast (Mx, pM, msize))
289 {
290 // ----[. A 1]------------------------------------------
291 // [. A 1]: action: ( insert )
292 task_pending++ ;
293 }
294 GB_NEXT (M) ;
295 }
296 }
297
298 GB_PHASE1_TASK_WRAPUP ;
299 }
300 }
301
302 //--------------------------------------------------------------------------
303 // phase 2: insert pending tuples
304 //--------------------------------------------------------------------------
305
306 GB_PENDING_CUMSUM ;
307
308 if (M_is_bitmap)
309 {
310
311 //----------------------------------------------------------------------
312 // phase2: M is bitmap
313 //----------------------------------------------------------------------
314
315 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
316 reduction(&&:pending_sorted)
317 for (taskid = 0 ; taskid < ntasks ; taskid++)
318 {
319
320 //------------------------------------------------------------------
321 // get the task descriptor
322 //------------------------------------------------------------------
323
324 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iM_start, iM_end) ;
325
326 //------------------------------------------------------------------
327 // compute all vectors in this task
328 //------------------------------------------------------------------
329
330 for (int64_t j = kfirst ; j <= klast ; j++)
331 {
332
333 //--------------------------------------------------------------
334 // get S(iM_start:iM_end,j)
335 //--------------------------------------------------------------
336
337 GB_GET_VECTOR_FOR_IXJ (S, iM_start) ;
338 int64_t pM_start = j * Mvlen ;
339
340 //--------------------------------------------------------------
341 // do a 2-way merge of S(iM_start:iM_end,j) and M(ditto,j)
342 //--------------------------------------------------------------
343
344 // jC = J [j] ; or J is a colon expression
345 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
346
347 for (int64_t iM = iM_start ; iM < iM_end ; iM++)
348 {
349 int64_t pM = pM_start + iM ;
350 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iM) ;
351 bool mij = Mb [pM] && GB_mcast (Mx, pM, msize) ;
352
353 if (!Sfound && mij)
354 {
355 // S (i,j) is not present, M (i,j) is true
356 // ----[. A 1]------------------------------------------
357 // [. A 1]: action: ( insert )
358 int64_t iC = GB_ijlist (I, iM, Ikind, Icolon) ;
359 GB_PENDING_INSERT (scalar) ;
360 }
361 else if (Sfound)
362 {
363 // S (i,j) present
364 GB_NEXT (S) ;
365 }
366 }
367 }
368 GB_PHASE2_TASK_WRAPUP ;
369 }
370
371 }
372 else
373 {
374
375 //----------------------------------------------------------------------
376 // phase2: M is hypersparse, sparse, or full
377 //----------------------------------------------------------------------
378
379 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
380 reduction(&&:pending_sorted)
381 for (taskid = 0 ; taskid < ntasks ; taskid++)
382 {
383
384 //------------------------------------------------------------------
385 // get the task descriptor
386 //------------------------------------------------------------------
387
388 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
389
390 //------------------------------------------------------------------
391 // compute all vectors in this task
392 //------------------------------------------------------------------
393
394 for (int64_t k = kfirst ; k <= klast ; k++)
395 {
396
397 //--------------------------------------------------------------
398 // get S(:,j) and M(:,j)
399 //--------------------------------------------------------------
400
401 int64_t j = GBH (Zh, k) ;
402 GB_GET_MAPPED (pM, pM_end, pA, pA_end, Mp, j, k, Z_to_X, Mvlen);
403 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
404
405 //--------------------------------------------------------------
406 // do a 2-way merge of S(:,j) and M(:,j)
407 //--------------------------------------------------------------
408
409 // jC = J [j] ; or J is a colon expression
410 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
411
412 // while both list S (:,j) and M (:,j) have entries
413 while (pS < pS_end && pM < pM_end)
414 {
415 int64_t iS = GBI (Si, pS, Svlen) ;
416 int64_t iM = GBI (Mi, pM, Mvlen) ;
417
418 if (iS < iM)
419 {
420 // S (i,j) is present but M (i,j) is not
421 GB_NEXT (S) ;
422 }
423 else if (iM < iS)
424 {
425 // S (i,j) is not present, M (i,j) is present
426 if (GB_mcast (Mx, pM, msize))
427 {
428 // ----[. A 1]--------------------------------------
429 // [. A 1]: action: ( insert )
430 int64_t iC = GB_ijlist (I, iM, Ikind, Icolon) ;
431 GB_PENDING_INSERT (scalar) ;
432 }
433 GB_NEXT (M) ;
434 }
435 else
436 {
437 // both S (i,j) and M (i,j) present
438 GB_NEXT (S) ;
439 GB_NEXT (M) ;
440 }
441 }
442
443 // while list M (:,j) has entries. List S (:,j) exhausted.
444 while (pM < pM_end)
445 {
446 // S (i,j) is not present, M (i,j) is present
447 if (GB_mcast (Mx, pM, msize))
448 {
449 // ----[. A 1]------------------------------------------
450 // [. A 1]: action: ( insert )
451 int64_t iM = GBI (Mi, pM, Mvlen) ;
452 int64_t iC = GB_ijlist (I, iM, Ikind, Icolon) ;
453 GB_PENDING_INSERT (scalar) ;
454 }
455 GB_NEXT (M) ;
456 }
457 }
458
459 GB_PHASE2_TASK_WRAPUP ;
460 }
461 }
462
463 //--------------------------------------------------------------------------
464 // finalize the matrix and return result
465 //--------------------------------------------------------------------------
466
467 GB_SUBASSIGN_WRAPUP ;
468 }
469
470