1 //------------------------------------------------------------------------------
2 // GB_subassign_02: C(I,J) = A ; using S
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Method 02: C(I,J) = A ; using S
11
12 // M: NULL
13 // Mask_comp: false
14 // C_replace: false
15 // accum: NULL
16 // A: matrix
17 // S: constructed
18
19 // C: not bitmap or full: use GB_bitmap_assign instead
20 // A: any sparsity structure.
21
22 #include "GB_subassign_methods.h"
23
GB_subassign_02(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix A,GB_Context Context)24 GrB_Info GB_subassign_02
25 (
26 GrB_Matrix C,
27 // input:
28 const GrB_Index *I,
29 const int64_t ni,
30 const int64_t nI,
31 const int Ikind,
32 const int64_t Icolon [3],
33 const GrB_Index *J,
34 const int64_t nj,
35 const int64_t nJ,
36 const int Jkind,
37 const int64_t Jcolon [3],
38 const GrB_Matrix A,
39 GB_Context Context
40 )
41 {
42
43 //--------------------------------------------------------------------------
44 // check inputs
45 //--------------------------------------------------------------------------
46
47 ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
48 ASSERT (!GB_aliased (C, A)) ; // NO ALIAS of C==A
49
50 //--------------------------------------------------------------------------
51 // S = C(I,J)
52 //--------------------------------------------------------------------------
53
54 GB_EMPTY_TASKLIST ;
55 GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
56
57 //--------------------------------------------------------------------------
58 // get inputs
59 //--------------------------------------------------------------------------
60
61 GB_MATRIX_WAIT_IF_JUMBLED (A) ;
62
63 GB_GET_C ; // C must not be bitmap
64 GB_GET_A ;
65 GB_GET_S ;
66 GrB_BinaryOp accum = NULL ;
67
68 //--------------------------------------------------------------------------
69 // Method 02: C(I,J) = A ; using S
70 //--------------------------------------------------------------------------
71
72 // Time: Optimal. All entries in A+S must be examined, so the work is
73 // Omega (nnz(A)+nnz(S)).
74
75 // Method 02 and Method 04 are somewhat similar. They differ on how C is
76 // modified when the entry is present in S but not A.
77
78 // TODO: phase2 of Method 02 and 04 are identical and could be
79 // done in a single function.
80
81 //--------------------------------------------------------------------------
82 // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
83 //--------------------------------------------------------------------------
84
85 if (A_is_bitmap)
86 {
87 // all of IxJ must be examined
88 GB_SUBASSIGN_IXJ_SLICE ;
89 }
90 else
91 {
92 // traverse all A+S
93 GB_SUBASSIGN_TWO_SLICE (A, S) ;
94 }
95
96 //--------------------------------------------------------------------------
97 // phase 1: create zombies, update entries, and count pending tuples
98 //--------------------------------------------------------------------------
99
100 if (A_is_bitmap)
101 {
102
103 //----------------------------------------------------------------------
104 // phase1: A is bitmap
105 //----------------------------------------------------------------------
106
107 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
108 reduction(+:nzombies)
109 for (taskid = 0 ; taskid < ntasks ; taskid++)
110 {
111
112 //------------------------------------------------------------------
113 // get the task descriptor
114 //------------------------------------------------------------------
115
116 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
117
118 //------------------------------------------------------------------
119 // compute all vectors in this task
120 //------------------------------------------------------------------
121
122 for (int64_t j = kfirst ; j <= klast ; j++)
123 {
124
125 //--------------------------------------------------------------
126 // get S(iA_start:iA_end,j)
127 //--------------------------------------------------------------
128
129 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
130 int64_t pA_start = j * Avlen ;
131
132 //--------------------------------------------------------------
133 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
134 //--------------------------------------------------------------
135
136 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
137 {
138 int64_t pA = pA_start + iA ;
139 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
140 bool Afound = Ab [pA] ;
141 if (Sfound && !Afound)
142 {
143 // ----[C . 1] or [X . 1]-------------------------------
144 // S (i,j) is present but A (i,j) is not
145 // [C . 1]: action: ( delete ): becomes zombie
146 // [X . 1]: action: ( X ): still a zombie
147 GB_C_S_LOOKUP ;
148 GB_DELETE_ENTRY ;
149 GB_NEXT (S) ;
150 }
151 else if (!Sfound && Afound)
152 {
153 // ----[. A 1]------------------------------------------
154 // S (i,j) is not present, A (i,j) is present
155 // [. A 1]: action: ( insert )
156 task_pending++ ;
157 }
158 else if (Sfound && Afound)
159 {
160 // ----[C A 1] or [X A 1]-------------------------------
161 // both S (i,j) and A (i,j) present
162 // [C A 1]: action: ( =A ): copy A into C, no accum
163 // [X A 1]: action: ( undelete ): zombie lives
164 GB_C_S_LOOKUP ;
165 GB_noaccum_C_A_1_matrix ;
166 GB_NEXT (S) ;
167 }
168 }
169 }
170 GB_PHASE1_TASK_WRAPUP ;
171 }
172
173 }
174 else
175 {
176
177 //----------------------------------------------------------------------
178 // phase1: A is hypersparse, sparse, or full
179 //----------------------------------------------------------------------
180
181 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
182 reduction(+:nzombies)
183 for (taskid = 0 ; taskid < ntasks ; taskid++)
184 {
185
186 //------------------------------------------------------------------
187 // get the task descriptor
188 //------------------------------------------------------------------
189
190 GB_GET_TASK_DESCRIPTOR_PHASE1 ;
191
192 //------------------------------------------------------------------
193 // compute all vectors in this task
194 //------------------------------------------------------------------
195
196 for (int64_t k = kfirst ; k <= klast ; k++)
197 {
198
199 //--------------------------------------------------------------
200 // get A(:,j) and S(:,j)
201 //--------------------------------------------------------------
202
203 int64_t j = GBH (Zh, k) ;
204 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
205 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
206
207 //--------------------------------------------------------------
208 // do a 2-way merge of S(:,j) and A(:,j)
209 //--------------------------------------------------------------
210
211 // jC = J [j] ; or J is a colon expression
212 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
213
214 // while both list S (:,j) and A (:,j) have entries
215 while (pS < pS_end && pA < pA_end)
216 {
217 int64_t iS = GBI (Si, pS, Svlen) ;
218 int64_t iA = GBI (Ai, pA, Avlen) ;
219
220 if (iS < iA)
221 {
222 // ----[C . 1] or [X . 1]-------------------------------
223 // S (i,j) is present but A (i,j) is not
224 // [C . 1]: action: ( delete ): becomes zombie
225 // [X . 1]: action: ( X ): still a zombie
226 GB_C_S_LOOKUP ;
227 GB_DELETE_ENTRY ;
228 GB_NEXT (S) ;
229 }
230 else if (iA < iS)
231 {
232 // ----[. A 1]------------------------------------------
233 // S (i,j) is not present, A (i,j) is present
234 // [. A 1]: action: ( insert )
235 task_pending++ ;
236 GB_NEXT (A) ;
237 }
238 else
239 {
240 // ----[C A 1] or [X A 1]-------------------------------
241 // both S (i,j) and A (i,j) present
242 // [C A 1]: action: ( =A ): copy A into C, no accum
243 // [X A 1]: action: ( undelete ): zombie lives
244 GB_C_S_LOOKUP ;
245 GB_noaccum_C_A_1_matrix ;
246 GB_NEXT (S) ;
247 GB_NEXT (A) ;
248 }
249 }
250
251 // while list S (:,j) has entries. List A (:,j) exhausted.
252 while (pS < pS_end)
253 {
254 // ----[C . 1] or [X . 1]-----------------------------------
255 // S (i,j) is present but A (i,j) is not
256 // [C . 1]: action: ( delete ): becomes zombie
257 // [X . 1]: action: ( X ): still a zombie
258 GB_C_S_LOOKUP ;
259 GB_DELETE_ENTRY ;
260 GB_NEXT (S) ;
261 }
262
263 // List A (:,j) has entries. List S (:,j) exhausted.
264 task_pending += (pA_end - pA) ;
265 }
266
267 GB_PHASE1_TASK_WRAPUP ;
268 }
269 }
270
271 //--------------------------------------------------------------------------
272 // phase 2: insert pending tuples
273 //--------------------------------------------------------------------------
274
275 GB_PENDING_CUMSUM ;
276
277 if (A_is_bitmap)
278 {
279
280 //----------------------------------------------------------------------
281 // phase2: A is bitmap
282 //----------------------------------------------------------------------
283
284 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
285 reduction(&&:pending_sorted)
286 for (taskid = 0 ; taskid < ntasks ; taskid++)
287 {
288
289 //------------------------------------------------------------------
290 // get the task descriptor
291 //------------------------------------------------------------------
292
293 GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
294
295 //------------------------------------------------------------------
296 // compute all vectors in this task
297 //------------------------------------------------------------------
298
299 for (int64_t j = kfirst ; j <= klast ; j++)
300 {
301
302 //--------------------------------------------------------------
303 // get S(iA_start:iA_end,j)
304 //--------------------------------------------------------------
305
306 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
307 int64_t pA_start = j * Avlen ;
308
309 //--------------------------------------------------------------
310 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
311 //--------------------------------------------------------------
312
313 // jC = J [j] ; or J is a colon expression
314 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
315
316 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
317 {
318 int64_t pA = pA_start + iA ;
319 bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
320 bool Afound = Ab [pA] ;
321 if (!Sfound && Afound)
322 {
323 // ----[. A 1]------------------------------------------
324 // S (i,j) is not present, A (i,j) is present
325 // [. A 1]: action: ( insert )
326 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
327 GB_PENDING_INSERT (Ax +(pA*asize)) ;
328 GB_NEXT (A) ;
329 }
330 else if (Sfound)
331 {
332 // S (i,j) present
333 GB_NEXT (S) ;
334 }
335 }
336 }
337 GB_PHASE2_TASK_WRAPUP ;
338 }
339
340 }
341 else
342 {
343
344 //----------------------------------------------------------------------
345 // phase2: A is hypersparse, sparse, or full
346 //----------------------------------------------------------------------
347
348 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
349 reduction(&&:pending_sorted)
350 for (taskid = 0 ; taskid < ntasks ; taskid++)
351 {
352
353 //------------------------------------------------------------------
354 // get the task descriptor
355 //------------------------------------------------------------------
356
357 GB_GET_TASK_DESCRIPTOR_PHASE2 ;
358
359 //------------------------------------------------------------------
360 // compute all vectors in this task
361 //------------------------------------------------------------------
362
363 for (int64_t k = kfirst ; k <= klast ; k++)
364 {
365
366 //--------------------------------------------------------------
367 // get A(:,j) and S(:,j)
368 //--------------------------------------------------------------
369
370 int64_t j = GBH (Zh, k) ;
371 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
372 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
373
374 //--------------------------------------------------------------
375 // do a 2-way merge of S(:,j) and A(:,j)
376 //--------------------------------------------------------------
377
378 // jC = J [j] ; or J is a colon expression
379 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
380
381 // while both list S (:,j) and A (:,j) have entries
382 while (pS < pS_end && pA < pA_end)
383 {
384 int64_t iS = GBI (Si, pS, Svlen) ;
385 int64_t iA = GBI (Ai, pA, Avlen) ;
386
387 if (iS < iA)
388 {
389 GB_NEXT (S) ;
390 }
391 else if (iA < iS)
392 {
393 // ----[. A 1]------------------------------------------
394 // S (i,j) is not present, A (i,j) is present
395 // [. A 1]: action: ( insert )
396 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
397 GB_PENDING_INSERT (Ax +(pA*asize)) ;
398 GB_NEXT (A) ;
399 }
400 else
401 {
402 GB_NEXT (S) ;
403 GB_NEXT (A) ;
404 }
405 }
406
407 // ignore the remainder of S (:,j)
408
409 // while list A (:,j) has entries. List S (:,j) exhausted.
410 while (pA < pA_end)
411 {
412 // ----[. A 1]----------------------------------------------
413 // S (i,j) is not present, A (i,j) is present
414 // [. A 1]: action: ( insert )
415 int64_t iA = GBI (Ai, pA, Avlen) ;
416 int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
417 GB_PENDING_INSERT (Ax +(pA*asize)) ;
418 GB_NEXT (A) ;
419 }
420 }
421 GB_PHASE2_TASK_WRAPUP ;
422 }
423 }
424
425 //--------------------------------------------------------------------------
426 // finalize the matrix and return result
427 //--------------------------------------------------------------------------
428
429 GB_SUBASSIGN_WRAPUP ;
430 }
431
432