1 //------------------------------------------------------------------------------
2 // GB_dense_subassign_06d_template: C<A> = A where C is dense or bitmap
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11 
12     //--------------------------------------------------------------------------
13     // get C and A
14     //--------------------------------------------------------------------------
15 
16     ASSERT (!GB_ZOMBIES (A)) ;
17     ASSERT (GB_JUMBLED_OK (A)) ;
18     ASSERT (!GB_PENDING (A)) ;
19 
20     const int64_t  *restrict Ap = A->p ;
21     const int64_t  *restrict Ah = A->h ;
22     const int64_t  *restrict Ai = A->i ;
23     const int8_t   *restrict Ab = A->b ;
24     const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ;
25     const int64_t avlen = A->vlen ;
26     const bool A_is_bitmap = GB_IS_BITMAP (A) ;
27     const bool A_is_dense = GB_as_if_full (A) ;
28     const int64_t anz = GB_NNZ_HELD (A) ;
29 
30     GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ;
31     int8_t   *restrict Cb = C->b ;
32     const int64_t cvlen = C->vlen ;
33     const bool C_is_bitmap = GB_IS_BITMAP (C) ;
34 
35     //--------------------------------------------------------------------------
36     // C<A> = A
37     //--------------------------------------------------------------------------
38 
39     int64_t cnvals = C->nvals ;     // for C bitmap
40 
41     if (A_is_dense)
42     {
43 
44         //----------------------------------------------------------------------
45         // A is dense: all entries present
46         //----------------------------------------------------------------------
47 
48         if (C_is_bitmap)
49         {
50 
51             //------------------------------------------------------------------
52             // C is bitmap, A is dense
53             //------------------------------------------------------------------
54 
55             if (Mask_struct)
56             {
57                 // C<A,struct>=A with C bitmap, A dense
58                 int64_t p ;
59                 #pragma omp parallel for num_threads(A_nthreads) \
60                     schedule(static)
61                 for (p = 0 ; p < anz ; p++)
62                 {
63                     // Cx [p] = Ax [p]
64                     GB_COPY_A_TO_C (Cx, p, Ax, p) ;
65                 }
66                 GB_memset (Cb, 1, anz, A_nthreads) ;
67                 cnvals = anz ;
68             }
69             else
70             {
71                 // C<A>=A with C bitmap, A dense
72                 int tid ;
73                 #pragma omp parallel for num_threads(A_nthreads) \
74                     schedule(static) reduction(+:cnvals)
75                 for (tid = 0 ; tid < A_nthreads ; tid++)
76                 {
77                     int64_t pA_start, pA_end, task_cnvals = 0 ;
78                     GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ;
79                     for (int64_t p = pA_start ; p < pA_end ; p++)
80                     {
81                         if (GB_AX_MASK (Ax, p, asize))
82                         {
83                             // Cx [p] = Ax [p]
84                             GB_COPY_A_TO_C (Cx, p, Ax, p) ;
85                             task_cnvals += (Cb [p] == 0) ;
86                             Cb [p] = 1 ;
87                         }
88                     }
89                     cnvals += task_cnvals ;
90                 }
91             }
92 
93         }
94         else
95         {
96 
97             //------------------------------------------------------------------
98             // C is hypersparse, sparse, or full, with all entries present
99             //------------------------------------------------------------------
100 
101             if (Mask_struct)
102             {
103                 // C<A,struct>=A with C sparse/hyper/full
104                 int64_t p ;
105                 #pragma omp parallel for num_threads(A_nthreads) \
106                     schedule(static)
107                 for (p = 0 ; p < anz ; p++)
108                 {
109                     // Cx [p] = Ax [p]
110                     GB_COPY_A_TO_C (Cx, p, Ax, p) ;
111                 }
112             }
113             else
114             {
115                 // C<A>=A with C sparse/hyper/full
116                 int64_t p ;
117                 #pragma omp parallel for num_threads(A_nthreads) \
118                     schedule(static)
119                 for (p = 0 ; p < anz ; p++)
120                 {
121                     if (GB_AX_MASK (Ax, p, asize))
122                     {
123                         // Cx [p] = Ax [p]
124                         GB_COPY_A_TO_C (Cx, p, Ax, p) ;
125                     }
126                 }
127             }
128         }
129 
130     }
131     else if (A_is_bitmap)
132     {
133         //----------------------------------------------------------------------
134         // A is bitmap
135         //----------------------------------------------------------------------
136 
137         if (C_is_bitmap)
138         {
139 
140             //------------------------------------------------------------------
141             // C is bitmap, A is bitmap
142             //------------------------------------------------------------------
143 
144             if (Mask_struct)
145             {
146                 // C<A,struct>=A with A and C bitmap
147                 int tid ;
148                 #pragma omp parallel for num_threads(A_nthreads) \
149                     schedule(static) reduction(+:cnvals)
150                 for (tid = 0 ; tid < A_nthreads ; tid++)
151                 {
152                     int64_t pA_start, pA_end, task_cnvals = 0 ;
153                     GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ;
154                     for (int64_t p = pA_start ; p < pA_end ; p++)
155                     {
156                         if (Ab [p])
157                         {
158                             // Cx [p] = Ax [p]
159                             GB_COPY_A_TO_C (Cx, p, Ax, p) ;
160                             task_cnvals += (Cb [p] == 0) ;
161                             Cb [p] = 1 ;
162                         }
163                     }
164                     cnvals += task_cnvals ;
165                 }
166 
167             }
168             else
169             {
170                 // C<A>=A with A and C bitmap
171                 int tid ;
172                 #pragma omp parallel for num_threads(A_nthreads) \
173                     schedule(static) reduction(+:cnvals)
174                 for (tid = 0 ; tid < A_nthreads ; tid++)
175                 {
176                     int64_t pA_start, pA_end, task_cnvals = 0 ;
177                     GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ;
178                     for (int64_t p = pA_start ; p < pA_end ; p++)
179                     {
180                         if (Ab [p] && GB_AX_MASK (Ax, p, asize))
181                         {
182                             // Cx [p] = Ax [p]
183                             GB_COPY_A_TO_C (Cx, p, Ax, p) ;
184                             task_cnvals += (Cb [p] == 0) ;
185                             Cb [p] = 1 ;
186                         }
187                     }
188                     cnvals += task_cnvals ;
189                 }
190             }
191 
192         }
193         else
194         {
195 
196             //------------------------------------------------------------------
197             // C is hypersparse, sparse, or full, with all entries present
198             //------------------------------------------------------------------
199 
200             if (Mask_struct)
201             {
202                 // C<A,struct>=A with A bitmap, and C hyper/sparse/full
203                 // this method is used by LAGraph_bfs_parent when q is
204                 // a bitmap and pi is full.
205                 int64_t p ;
206                 #pragma omp parallel for num_threads(A_nthreads) \
207                     schedule(static)
208                 for (p = 0 ; p < anz ; p++)
209                 {
210                     // Cx [p] = Ax [p]
211                     if (Ab [p])
212                     {
213                         GB_COPY_A_TO_C (Cx, p, Ax, p) ;
214                     }
215                 }
216             }
217             else
218             {
219                 // C<A>=A with A bitmap, and C hyper/sparse/full
220                 int64_t p ;
221                 #pragma omp parallel for num_threads(A_nthreads) \
222                     schedule(static)
223                 for (p = 0 ; p < anz ; p++)
224                 {
225                     if (Ab [p] && GB_AX_MASK (Ax, p, asize))
226                     {
227                         // Cx [p] = Ax [p]
228                         GB_COPY_A_TO_C (Cx, p, Ax, p) ;
229                     }
230                 }
231             }
232         }
233 
234     }
235     else
236     {
237 
238         //----------------------------------------------------------------------
239         // A is hypersparse or sparse; C is dense or a bitmap
240         //----------------------------------------------------------------------
241 
242         const int64_t *restrict kfirst_Aslice = A_ek_slicing ;
243         const int64_t *restrict klast_Aslice  = A_ek_slicing + A_ntasks ;
244         const int64_t *restrict pstart_Aslice = A_ek_slicing + A_ntasks * 2 ;
245         int taskid ;
246 
247         if (Mask_struct)
248         {
249             if (C_is_bitmap)
250             {
251 
252                 //--------------------------------------------------------------
253                 // C is bitmap, mask is structural
254                 //--------------------------------------------------------------
255 
256                 #pragma omp parallel for num_threads(A_nthreads) \
257                     schedule(dynamic,1) reduction(+:cnvals)
258                 for (taskid = 0 ; taskid < A_ntasks ; taskid++)
259                 {
260                     // if kfirst > klast then taskid does no work at all
261                     int64_t kfirst = kfirst_Aslice [taskid] ;
262                     int64_t klast  = klast_Aslice  [taskid] ;
263                     int64_t task_cnvals = 0 ;
264                     // C<A(:,kfirst:klast)> = A(:,kfirst:klast)
265                     for (int64_t k = kfirst ; k <= klast ; k++)
266                     {
267                         // get A(:,j), the kth vector of A
268                         int64_t j = GBH (Ah, k) ;
269                         int64_t pA_start, pA_end ;
270                         GB_get_pA (&pA_start, &pA_end, taskid, k,
271                             kfirst, klast, pstart_Aslice, Ap, avlen) ;
272                         // pC is the start of C(:,j)
273                         int64_t pC = j * cvlen ;
274                         // C<A(:,j),struct>=A(:,j) with C bitmap, A sparse
275                         GB_PRAGMA_SIMD_REDUCTION (+,task_cnvals)
276                         for (int64_t pA = pA_start ; pA < pA_end ; pA++)
277                         {
278                             int64_t p = pC + Ai [pA] ;
279                             // Cx [p] = Ax [pA]
280                             GB_COPY_A_TO_C (Cx, p, Ax, pA) ;
281                             task_cnvals += (Cb [p] == 0) ;
282                             Cb [p] = 1 ;
283                         }
284                     }
285                     cnvals += task_cnvals ;
286                 }
287 
288             }
289             else
290             {
291 
292                 //--------------------------------------------------------------
293                 // C is full, mask is structural
294                 //--------------------------------------------------------------
295 
296                 #pragma omp parallel for num_threads(A_nthreads) \
297                     schedule(dynamic,1)
298                 for (taskid = 0 ; taskid < A_ntasks ; taskid++)
299                 {
300                     // if kfirst > klast then taskid does no work at all
301                     int64_t kfirst = kfirst_Aslice [taskid] ;
302                     int64_t klast  = klast_Aslice  [taskid] ;
303                     // C<A(:,kfirst:klast)> = A(:,kfirst:klast)
304                     for (int64_t k = kfirst ; k <= klast ; k++)
305                     {
306                         // get A(:,j), the kth vector of A
307                         int64_t j = GBH (Ah, k) ;
308                         int64_t pA_start, pA_end ;
309                         GB_get_pA (&pA_start, &pA_end, taskid, k,
310                             kfirst, klast, pstart_Aslice, Ap, avlen) ;
311                         // pC is the start of C(:,j)
312                         int64_t pC = j * cvlen ;
313                         // C<A(:,j),struct>=A(:,j) with C full, A sparse
314                         GB_PRAGMA_SIMD_VECTORIZE
315                         for (int64_t pA = pA_start ; pA < pA_end ; pA++)
316                         {
317                             int64_t p = pC + Ai [pA] ;
318                             // Cx [p] = Ax [pA]
319                             GB_COPY_A_TO_C (Cx, p, Ax, pA) ;
320                         }
321                     }
322                 }
323             }
324 
325         }
326         else
327         {
328             if (C_is_bitmap)
329             {
330 
331                 //--------------------------------------------------------------
332                 // C is bitmap, mask is valued
333                 //--------------------------------------------------------------
334 
335                 #pragma omp parallel for num_threads(A_nthreads) \
336                     schedule(dynamic,1) reduction(+:cnvals)
337                 for (taskid = 0 ; taskid < A_ntasks ; taskid++)
338                 {
339                     // if kfirst > klast then taskid does no work at all
340                     int64_t kfirst = kfirst_Aslice [taskid] ;
341                     int64_t klast  = klast_Aslice  [taskid] ;
342                     int64_t task_cnvals = 0 ;
343                     // C<A(:,kfirst:klast)> = A(:,kfirst:klast)
344                     for (int64_t k = kfirst ; k <= klast ; k++)
345                     {
346                         // get A(:,j), the kth vector of A
347                         int64_t j = GBH (Ah, k) ;
348                         int64_t pA_start, pA_end ;
349                         GB_get_pA (&pA_start, &pA_end, taskid, k,
350                             kfirst, klast, pstart_Aslice, Ap, avlen) ;
351                         // pC is the start of C(:,j)
352                         int64_t pC = j * cvlen ;
353                         // C<A(:,j),struct>=A(:,j) with C bitmap, A sparse
354                         GB_PRAGMA_SIMD_REDUCTION (+,task_cnvals)
355                         for (int64_t pA = pA_start ; pA < pA_end ; pA++)
356                         {
357                             if (GB_AX_MASK (Ax, pA, asize))
358                             {
359                                 int64_t p = pC + Ai [pA] ;
360                                 // Cx [p] = Ax [pA]
361                                 GB_COPY_A_TO_C (Cx, p, Ax, pA) ;
362                                 task_cnvals += (Cb [p] == 0) ;
363                                 Cb [p] = 1 ;
364                             }
365                         }
366                     }
367                     cnvals += task_cnvals ;
368                 }
369 
370             }
371             else
372             {
373 
374                 //--------------------------------------------------------------
375                 // C is full, mask is valued
376                 //--------------------------------------------------------------
377 
378                 #pragma omp parallel for num_threads(A_nthreads) \
379                     schedule(dynamic,1) reduction(+:cnvals)
380                 for (taskid = 0 ; taskid < A_ntasks ; taskid++)
381                 {
382                     // if kfirst > klast then taskid does no work at all
383                     int64_t kfirst = kfirst_Aslice [taskid] ;
384                     int64_t klast  = klast_Aslice  [taskid] ;
385                     // C<A(:,kfirst:klast)> = A(:,kfirst:klast)
386                     for (int64_t k = kfirst ; k <= klast ; k++)
387                     {
388                         // get A(:,j), the kth vector of A
389                         int64_t j = GBH (Ah, k) ;
390                         int64_t pA_start, pA_end ;
391                         GB_get_pA (&pA_start, &pA_end, taskid, k,
392                             kfirst, klast, pstart_Aslice, Ap, avlen) ;
393                         // pC is the start of C(:,j)
394                         int64_t pC = j * cvlen ;
395                         // C<A(:,j),struct>=A(:,j) with C dense, A sparse
396                         GB_PRAGMA_SIMD_VECTORIZE
397                         for (int64_t pA = pA_start ; pA < pA_end ; pA++)
398                         {
399                             if (GB_AX_MASK (Ax, pA, asize))
400                             {
401                                 int64_t p = pC + Ai [pA] ;
402                                 // Cx [p] = Ax [pA]
403                                 GB_COPY_A_TO_C (Cx, p, Ax, pA) ;
404                             }
405                         }
406                     }
407                 }
408             }
409         }
410     }
411 
412     //--------------------------------------------------------------------------
413     // log the number of entries in the C bitmap
414     //--------------------------------------------------------------------------
415 
416     if (C_is_bitmap)
417     {
418         C->nvals = cnvals ;
419     }
420 }
421 
422