1 //------------------------------------------------------------------------------
2 // GB_dense_subassign_25_template: C<M> = A where C is empty and A is dense
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // C<M> = A where C starts as empty, M is structural, and A is dense.  The
11 // pattern of C is an exact copy of M.  A is full, dense, or bitmap.
12 // M is sparse or hypersparse, and C is constructed with the same pattern as M.
13 
14 {
15 
16     //--------------------------------------------------------------------------
17     // get C, M, and A
18     //--------------------------------------------------------------------------
19 
20     ASSERT (GB_sparsity (M) == GB_sparsity (C)) ;
21     GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ;
22     int64_t *restrict Ci = C->i ;
23 
24     ASSERT (GB_IS_SPARSE (M) || GB_IS_HYPERSPARSE (M)) ;
25     ASSERT (GB_JUMBLED_OK (M)) ;
26     const int64_t *restrict Mp = M->p ;
27     const int64_t *restrict Mh = M->h ;
28     const int64_t *restrict Mi = M->i ;
29     const int64_t mvlen = M->vlen ;
30 
31     const bool A_is_bitmap = GB_IS_BITMAP (A) ;
32     const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ;
33     const int8_t   *restrict Ab = A->b ;
34     const int64_t avlen = A->vlen ;
35 
36     const int64_t *restrict kfirst_Mslice = M_ek_slicing ;
37     const int64_t *restrict klast_Mslice  = M_ek_slicing + M_ntasks ;
38     const int64_t *restrict pstart_Mslice = M_ek_slicing + M_ntasks * 2 ;
39 
40     //--------------------------------------------------------------------------
41     // C<M> = A
42     //--------------------------------------------------------------------------
43 
44     if (A_is_bitmap)
45     {
46 
47         //----------------------------------------------------------------------
48         // A is bitmap, so zombies can be created in C
49         //----------------------------------------------------------------------
50 
51         int64_t nzombies = 0 ;
52 
53         int tid ;
54         #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1) \
55             reduction(+:nzombies)
56         for (tid = 0 ; tid < M_ntasks ; tid++)
57         {
58 
59             // if kfirst > klast then task tid does no work at all
60             int64_t kfirst = kfirst_Mslice [tid] ;
61             int64_t klast  = klast_Mslice  [tid] ;
62             int64_t task_nzombies = 0 ;
63 
64             //------------------------------------------------------------------
65             // C<M(:,kfirst:klast)> = A(:,kfirst:klast)
66             //------------------------------------------------------------------
67 
68             for (int64_t k = kfirst ; k <= klast ; k++)
69             {
70 
71                 //--------------------------------------------------------------
72                 // find the part of M(:,k) to be operated on by this task
73                 //--------------------------------------------------------------
74 
75                 int64_t j = GBH (Mh, k) ;
76                 int64_t pM_start, pM_end ;
77                 GB_get_pA (&pM_start, &pM_end, tid, k,
78                     kfirst, klast, pstart_Mslice, Mp, mvlen) ;
79 
80                 //--------------------------------------------------------------
81                 // C<M(:,j)> = A(:,j)
82                 //--------------------------------------------------------------
83 
84                 // M is hypersparse or sparse.  C is the same as M.
85                 // pA points to the start of A(:,j) since A is dense
86                 int64_t pA = j * avlen ;
87                 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
88                 {
89                     int64_t i = Mi [pM] ;
90                     int64_t p = pA + i ;
91                     if (Ab [p])
92                     {
93                         // C(i,j) = A(i,j)
94                         GB_COPY_A_TO_C (Cx, pM, Ax, p) ;    // Cx [pM] = Ax [p]
95                     }
96                     else
97                     {
98                         // C(i,j) becomes a zombie
99                         task_nzombies++ ;
100                         Ci [pM] = GB_FLIP (i) ;
101                     }
102                 }
103             }
104             nzombies += task_nzombies ;
105         }
106         C->nzombies = nzombies ;
107 
108     }
109     else
110     {
111 
112         //----------------------------------------------------------------------
113         // A is full, so no zombies will appear in C
114         //----------------------------------------------------------------------
115 
116         int tid ;
117         #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1)
118         for (tid = 0 ; tid < M_ntasks ; tid++)
119         {
120 
121             // if kfirst > klast then task tid does no work at all
122             int64_t kfirst = kfirst_Mslice [tid] ;
123             int64_t klast  = klast_Mslice  [tid] ;
124 
125             //------------------------------------------------------------------
126             // C<M(:,kfirst:klast)> = A(:,kfirst:klast)
127             //------------------------------------------------------------------
128 
129             for (int64_t k = kfirst ; k <= klast ; k++)
130             {
131 
132                 //--------------------------------------------------------------
133                 // find the part of M(:,k) to be operated on by this task
134                 //--------------------------------------------------------------
135 
136                 int64_t j = GBH (Mh, k) ;
137                 int64_t pM_start, pM_end ;
138                 GB_get_pA (&pM_start, &pM_end, tid, k,
139                     kfirst, klast, pstart_Mslice, Mp, mvlen) ;
140 
141                 //--------------------------------------------------------------
142                 // C<M(:,j)> = A(:,j)
143                 //--------------------------------------------------------------
144 
145                 // M is hypersparse or sparse.  C is the same as M.
146                 // pA points to the start of A(:,j) since A is dense
147                 int64_t pA = j * avlen ;
148                 GB_PRAGMA_SIMD_VECTORIZE
149                 for (int64_t pM = pM_start ; pM < pM_end ; pM++)
150                 {
151                     int64_t p = pA + GBI (Mi, pM, mvlen) ;
152                     GB_COPY_A_TO_C (Cx, pM, Ax, p) ;    // Cx [pM] = Ax [p]
153                 }
154             }
155         }
156     }
157 }
158 
159