1 //------------------------------------------------------------------------------
2 // GB_subassign_02: C(I,J) = A ; using S
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Method 02: C(I,J) = A ; using S
11 
12 // M:           NULL
13 // Mask_comp:   false
14 // C_replace:   false
15 // accum:       NULL
16 // A:           matrix
17 // S:           constructed
18 
19 // C: not bitmap or full: use GB_bitmap_assign instead
20 // A: any sparsity structure.
21 
22 #include "GB_subassign_methods.h"
23 
GB_subassign_02(GrB_Matrix C,const GrB_Index * I,const int64_t ni,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nj,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix A,GB_Context Context)24 GrB_Info GB_subassign_02
25 (
26     GrB_Matrix C,
27     // input:
28     const GrB_Index *I,
29     const int64_t ni,
30     const int64_t nI,
31     const int Ikind,
32     const int64_t Icolon [3],
33     const GrB_Index *J,
34     const int64_t nj,
35     const int64_t nJ,
36     const int Jkind,
37     const int64_t Jcolon [3],
38     const GrB_Matrix A,
39     GB_Context Context
40 )
41 {
42 
43     //--------------------------------------------------------------------------
44     // check inputs
45     //--------------------------------------------------------------------------
46 
47     ASSERT (!GB_IS_BITMAP (C)) ; ASSERT (!GB_IS_FULL (C)) ;
48     ASSERT (!GB_aliased (C, A)) ;   // NO ALIAS of C==A
49 
50     //--------------------------------------------------------------------------
51     // S = C(I,J)
52     //--------------------------------------------------------------------------
53 
54     GB_EMPTY_TASKLIST ;
55     GB_OK (GB_subassign_symbolic (S, C, I, ni, J, nj, true, Context)) ;
56 
57     //--------------------------------------------------------------------------
58     // get inputs
59     //--------------------------------------------------------------------------
60 
61     GB_MATRIX_WAIT_IF_JUMBLED (A) ;
62 
63     GB_GET_C ;      // C must not be bitmap
64     GB_GET_A ;
65     GB_GET_S ;
66     GrB_BinaryOp accum = NULL ;
67 
68     //--------------------------------------------------------------------------
69     // Method 02: C(I,J) = A ; using S
70     //--------------------------------------------------------------------------
71 
72     // Time: Optimal.  All entries in A+S must be examined, so the work is
73     // Omega (nnz(A)+nnz(S)).
74 
75     // Method 02 and Method 04 are somewhat similar.  They differ on how C is
76     // modified when the entry is present in S but not A.
77 
78     // TODO: phase2 of Method 02 and 04 are identical and could be
79     // done in a single function.
80 
81     //--------------------------------------------------------------------------
82     // Parallel: A+S (Methods 02, 04, 09, 10, 11, 12, 14, 16, 18, 20)
83     //--------------------------------------------------------------------------
84 
85     if (A_is_bitmap)
86     {
87         // all of IxJ must be examined
88         GB_SUBASSIGN_IXJ_SLICE ;
89     }
90     else
91     {
92         // traverse all A+S
93         GB_SUBASSIGN_TWO_SLICE (A, S) ;
94     }
95 
96     //--------------------------------------------------------------------------
97     // phase 1: create zombies, update entries, and count pending tuples
98     //--------------------------------------------------------------------------
99 
100     if (A_is_bitmap)
101     {
102 
103         //----------------------------------------------------------------------
104         // phase1: A is bitmap
105         //----------------------------------------------------------------------
106 
107         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
108             reduction(+:nzombies)
109         for (taskid = 0 ; taskid < ntasks ; taskid++)
110         {
111 
112             //------------------------------------------------------------------
113             // get the task descriptor
114             //------------------------------------------------------------------
115 
116             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE1 (iA_start, iA_end) ;
117 
118             //------------------------------------------------------------------
119             // compute all vectors in this task
120             //------------------------------------------------------------------
121 
122             for (int64_t j = kfirst ; j <= klast ; j++)
123             {
124 
125                 //--------------------------------------------------------------
126                 // get S(iA_start:iA_end,j)
127                 //--------------------------------------------------------------
128 
129                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
130                 int64_t pA_start = j * Avlen ;
131 
132                 //--------------------------------------------------------------
133                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
134                 //--------------------------------------------------------------
135 
136                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
137                 {
138                     int64_t pA = pA_start + iA ;
139                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
140                     bool Afound = Ab [pA] ;
141                     if (Sfound && !Afound)
142                     {
143                         // ----[C . 1] or [X . 1]-------------------------------
144                         // S (i,j) is present but A (i,j) is not
145                         // [C . 1]: action: ( delete ): becomes zombie
146                         // [X . 1]: action: ( X ): still a zombie
147                         GB_C_S_LOOKUP ;
148                         GB_DELETE_ENTRY ;
149                         GB_NEXT (S) ;
150                     }
151                     else if (!Sfound && Afound)
152                     {
153                         // ----[. A 1]------------------------------------------
154                         // S (i,j) is not present, A (i,j) is present
155                         // [. A 1]: action: ( insert )
156                         task_pending++ ;
157                     }
158                     else if (Sfound && Afound)
159                     {
160                         // ----[C A 1] or [X A 1]-------------------------------
161                         // both S (i,j) and A (i,j) present
162                         // [C A 1]: action: ( =A ): copy A into C, no accum
163                         // [X A 1]: action: ( undelete ): zombie lives
164                         GB_C_S_LOOKUP ;
165                         GB_noaccum_C_A_1_matrix ;
166                         GB_NEXT (S) ;
167                     }
168                 }
169             }
170             GB_PHASE1_TASK_WRAPUP ;
171         }
172 
173     }
174     else
175     {
176 
177         //----------------------------------------------------------------------
178         // phase1: A is hypersparse, sparse, or full
179         //----------------------------------------------------------------------
180 
181         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
182             reduction(+:nzombies)
183         for (taskid = 0 ; taskid < ntasks ; taskid++)
184         {
185 
186             //------------------------------------------------------------------
187             // get the task descriptor
188             //------------------------------------------------------------------
189 
190             GB_GET_TASK_DESCRIPTOR_PHASE1 ;
191 
192             //------------------------------------------------------------------
193             // compute all vectors in this task
194             //------------------------------------------------------------------
195 
196             for (int64_t k = kfirst ; k <= klast ; k++)
197             {
198 
199                 //--------------------------------------------------------------
200                 // get A(:,j) and S(:,j)
201                 //--------------------------------------------------------------
202 
203                 int64_t j = GBH (Zh, k) ;
204                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
205                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
206 
207                 //--------------------------------------------------------------
208                 // do a 2-way merge of S(:,j) and A(:,j)
209                 //--------------------------------------------------------------
210 
211                 // jC = J [j] ; or J is a colon expression
212                 // int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
213 
214                 // while both list S (:,j) and A (:,j) have entries
215                 while (pS < pS_end && pA < pA_end)
216                 {
217                     int64_t iS = GBI (Si, pS, Svlen) ;
218                     int64_t iA = GBI (Ai, pA, Avlen) ;
219 
220                     if (iS < iA)
221                     {
222                         // ----[C . 1] or [X . 1]-------------------------------
223                         // S (i,j) is present but A (i,j) is not
224                         // [C . 1]: action: ( delete ): becomes zombie
225                         // [X . 1]: action: ( X ): still a zombie
226                         GB_C_S_LOOKUP ;
227                         GB_DELETE_ENTRY ;
228                         GB_NEXT (S) ;
229                     }
230                     else if (iA < iS)
231                     {
232                         // ----[. A 1]------------------------------------------
233                         // S (i,j) is not present, A (i,j) is present
234                         // [. A 1]: action: ( insert )
235                         task_pending++ ;
236                         GB_NEXT (A) ;
237                     }
238                     else
239                     {
240                         // ----[C A 1] or [X A 1]-------------------------------
241                         // both S (i,j) and A (i,j) present
242                         // [C A 1]: action: ( =A ): copy A into C, no accum
243                         // [X A 1]: action: ( undelete ): zombie lives
244                         GB_C_S_LOOKUP ;
245                         GB_noaccum_C_A_1_matrix ;
246                         GB_NEXT (S) ;
247                         GB_NEXT (A) ;
248                     }
249                 }
250 
251                 // while list S (:,j) has entries.  List A (:,j) exhausted.
252                 while (pS < pS_end)
253                 {
254                     // ----[C . 1] or [X . 1]-----------------------------------
255                     // S (i,j) is present but A (i,j) is not
256                     // [C . 1]: action: ( delete ): becomes zombie
257                     // [X . 1]: action: ( X ): still a zombie
258                     GB_C_S_LOOKUP ;
259                     GB_DELETE_ENTRY ;
260                     GB_NEXT (S) ;
261                 }
262 
263                 // List A (:,j) has entries.  List S (:,j) exhausted.
264                 task_pending += (pA_end - pA) ;
265             }
266 
267             GB_PHASE1_TASK_WRAPUP ;
268         }
269     }
270 
271     //--------------------------------------------------------------------------
272     // phase 2: insert pending tuples
273     //--------------------------------------------------------------------------
274 
275     GB_PENDING_CUMSUM ;
276 
277     if (A_is_bitmap)
278     {
279 
280         //----------------------------------------------------------------------
281         // phase2: A is bitmap
282         //----------------------------------------------------------------------
283 
284         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
285             reduction(&&:pending_sorted)
286         for (taskid = 0 ; taskid < ntasks ; taskid++)
287         {
288 
289             //------------------------------------------------------------------
290             // get the task descriptor
291             //------------------------------------------------------------------
292 
293             GB_GET_IXJ_TASK_DESCRIPTOR_PHASE2 (iA_start, iA_end) ;
294 
295             //------------------------------------------------------------------
296             // compute all vectors in this task
297             //------------------------------------------------------------------
298 
299             for (int64_t j = kfirst ; j <= klast ; j++)
300             {
301 
302                 //--------------------------------------------------------------
303                 // get S(iA_start:iA_end,j)
304                 //--------------------------------------------------------------
305 
306                 GB_GET_VECTOR_FOR_IXJ (S, iA_start) ;
307                 int64_t pA_start = j * Avlen ;
308 
309                 //--------------------------------------------------------------
310                 // do a 2-way merge of S(iA_start:iA_end,j) and A(ditto,j)
311                 //--------------------------------------------------------------
312 
313                 // jC = J [j] ; or J is a colon expression
314                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
315 
316                 for (int64_t iA = iA_start ; iA < iA_end ; iA++)
317                 {
318                     int64_t pA = pA_start + iA ;
319                     bool Sfound = (pS < pS_end) && (GBI (Si, pS, Svlen) == iA) ;
320                     bool Afound = Ab [pA] ;
321                     if (!Sfound && Afound)
322                     {
323                         // ----[. A 1]------------------------------------------
324                         // S (i,j) is not present, A (i,j) is present
325                         // [. A 1]: action: ( insert )
326                         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
327                         GB_PENDING_INSERT (Ax +(pA*asize)) ;
328                         GB_NEXT (A) ;
329                     }
330                     else if (Sfound)
331                     {
332                         // S (i,j) present
333                         GB_NEXT (S) ;
334                     }
335                 }
336             }
337             GB_PHASE2_TASK_WRAPUP ;
338         }
339 
340     }
341     else
342     {
343 
344         //----------------------------------------------------------------------
345         // phase2: A is hypersparse, sparse, or full
346         //----------------------------------------------------------------------
347 
348         #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
349             reduction(&&:pending_sorted)
350         for (taskid = 0 ; taskid < ntasks ; taskid++)
351         {
352 
353             //------------------------------------------------------------------
354             // get the task descriptor
355             //------------------------------------------------------------------
356 
357             GB_GET_TASK_DESCRIPTOR_PHASE2 ;
358 
359             //------------------------------------------------------------------
360             // compute all vectors in this task
361             //------------------------------------------------------------------
362 
363             for (int64_t k = kfirst ; k <= klast ; k++)
364             {
365 
366                 //--------------------------------------------------------------
367                 // get A(:,j) and S(:,j)
368                 //--------------------------------------------------------------
369 
370                 int64_t j = GBH (Zh, k) ;
371                 GB_GET_MAPPED (pA, pA_end, pA, pA_end, Ap, j, k, Z_to_X, Avlen);
372                 GB_GET_MAPPED (pS, pS_end, pB, pB_end, Sp, j, k, Z_to_S, Svlen);
373 
374                 //--------------------------------------------------------------
375                 // do a 2-way merge of S(:,j) and A(:,j)
376                 //--------------------------------------------------------------
377 
378                 // jC = J [j] ; or J is a colon expression
379                 int64_t jC = GB_ijlist (J, j, Jkind, Jcolon) ;
380 
381                 // while both list S (:,j) and A (:,j) have entries
382                 while (pS < pS_end && pA < pA_end)
383                 {
384                     int64_t iS = GBI (Si, pS, Svlen) ;
385                     int64_t iA = GBI (Ai, pA, Avlen) ;
386 
387                     if (iS < iA)
388                     {
389                         GB_NEXT (S) ;
390                     }
391                     else if (iA < iS)
392                     {
393                         // ----[. A 1]------------------------------------------
394                         // S (i,j) is not present, A (i,j) is present
395                         // [. A 1]: action: ( insert )
396                         int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
397                         GB_PENDING_INSERT (Ax +(pA*asize)) ;
398                         GB_NEXT (A) ;
399                     }
400                     else
401                     {
402                         GB_NEXT (S) ;
403                         GB_NEXT (A) ;
404                     }
405                 }
406 
407                 // ignore the remainder of S (:,j)
408 
409                 // while list A (:,j) has entries.  List S (:,j) exhausted.
410                 while (pA < pA_end)
411                 {
412                     // ----[. A 1]----------------------------------------------
413                     // S (i,j) is not present, A (i,j) is present
414                     // [. A 1]: action: ( insert )
415                     int64_t iA = GBI (Ai, pA, Avlen) ;
416                     int64_t iC = GB_ijlist (I, iA, Ikind, Icolon) ;
417                     GB_PENDING_INSERT (Ax +(pA*asize)) ;
418                     GB_NEXT (A) ;
419                 }
420             }
421             GB_PHASE2_TASK_WRAPUP ;
422         }
423     }
424 
425     //--------------------------------------------------------------------------
426     // finalize the matrix and return result
427     //--------------------------------------------------------------------------
428 
429     GB_SUBASSIGN_WRAPUP ;
430 }
431 
432