1 //------------------------------------------------------------------------------
2 // GB_bitmap_AxB_saxpy_A_bitmap_B_bitmap: C<#M>+=A*B, C bitmap, M any format
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 {
11 
12     int64_t tid ;
13     #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \
14         reduction(+:cnvals)
15     for (tid = 0 ; tid < ntasks ; tid++)
16     {
17 
18         //----------------------------------------------------------------------
19         // get the task to compute C (I,J)
20         //----------------------------------------------------------------------
21 
22         int64_t I_tid = tid / nI_tasks ;
23         int64_t J_tid = tid % nI_tasks ;
24 
25         // I = istart:iend-1
26         int64_t istart = I_tid * GB_TILE_SIZE ;
27         int64_t iend   = GB_IMIN (avlen, istart + GB_TILE_SIZE) ;
28 
29         // J = jstart:jend-1
30         int64_t jstart = J_tid * GB_TILE_SIZE ;
31         int64_t jend   = GB_IMIN (bvdim, jstart + GB_TILE_SIZE) ;
32 
33         int64_t task_cnvals = 0 ;
34 
35         //----------------------------------------------------------------------
36         // check if any entry in the M(I,J) mask permits any change to C(I,J)
37         //----------------------------------------------------------------------
38 
39         #if GB_MASK_IS_SPARSE_OR_HYPER || GB_MASK_IS_BITMAP_OR_FULL
40 
41             bool any_update_allowed = false ;
42 
43             for (int64_t j = jstart ; j < jend && !any_update_allowed ; j++)
44             {
45                 for (int64_t i = istart ; i < iend && !any_update_allowed ; i++)
46                 {
47 
48                     //----------------------------------------------------------
49                     // get pointer to C(i,j) and M(i,j)
50                     //----------------------------------------------------------
51 
52                     int64_t pC = j * avlen + i ;
53 
54                     //----------------------------------------------------------
55                     // check M(i,j)
56                     //----------------------------------------------------------
57 
58                     #if GB_MASK_IS_SPARSE_OR_HYPER
59 
60                         // M is sparse or hypersparse
61                         int8_t cb = Cb [pC] ;
62                         bool mij = (cb & 2) ;
63 
64                     #elif GB_MASK_IS_BITMAP_OR_FULL
65 
66                         // M is bitmap or full
67                         GB_GET_M_ij (pC) ;
68 
69                     #endif
70 
71                     if (Mask_comp) mij = !mij ;
72                     if (!mij) continue ;
73                     any_update_allowed = true ;
74                 }
75             }
76 
77             if (!any_update_allowed)
78             {
79                 // C(I,J) cannot be modified at all; skip it
80                 continue ;
81             }
82 
83         #endif
84 
85         //----------------------------------------------------------------------
86         // declare local storage for this task
87         //----------------------------------------------------------------------
88 
89 //      GB_ATYPE Ax_cache [GB_TILE_SIZE * GB_KTILE_SIZE] ;
90 //      int8_t Ab_cache [GB_TILE_SIZE * GB_KTILE_SIZE] ;
91         bool Ab_any_in_row [GB_TILE_SIZE] ;
92 
93         //----------------------------------------------------------------------
94         // C<#M>(I,J) += A(I,:) * B(:,J)
95         //----------------------------------------------------------------------
96 
97         for (int64_t kstart = 0 ; kstart < avdim ; kstart += GB_KTILE_SIZE)
98         {
99             // K = kstart:kend-1
100             int64_t kend = GB_IMIN (avdim, kstart + GB_KTILE_SIZE) ;
101 
102             //------------------------------------------------------------------
103             // TODO: load A(I,K) into local storage
104             //------------------------------------------------------------------
105 
106             // For built-in semirings, load A(I,K) into local storage of size
107             // GB_TILE_SIZE * GB_KTILE_SIZE and transpose it.  Load in the
108             // bitmap Ab if not NULL, and Ax if not NULL.
109 
110 #if 0
111             for (int64_t k = kstart ; k < kend ; k++)
112             {
113                 for (int64_t i = istart ; i < iend ; i++)
114                 {
115                     int64_t pA = i + k * avlen ;
116                     int8_t ab = GBB (Ab, pA) ;
117                     i_local = i - istart ;
118                     k_local = k - kstart ;
119                     Ab_cache [i_local * GB_KTILE_SIZE ...
120                 }
121             }
122 #endif
123 
124             //------------------------------------------------------------------
125             // Check for entries in each row of A(I,K)
126             //------------------------------------------------------------------
127 
128             if (A_is_bitmap)
129             {
130                 for (int i = 0 ; i < GB_TILE_SIZE ; i++)
131                 {
132                     Ab_any_in_row [i] = false ;
133                 }
134                 for (int64_t k = kstart ; k < kend ; k++)
135                 {
136                     for (int64_t i = istart ; i < iend ; i++)
137                     {
138                         int64_t pA = i + k * avlen ;    // get pointer to A(i,k)
139                         int8_t  ab = Ab [pA] ;
140                         // Ab_cache [(i-istart) * GB_KTILE_SIZE + (k-kstart)]
141                         //      = ab ;
142                         Ab_any_in_row [i-istart] |= ab ;
143                     }
144                 }
145             }
146 
147             //------------------------------------------------------------------
148             // C<#M>(I,J) += A(I,K) * B(K,J)
149             //------------------------------------------------------------------
150 
151             for (int64_t j = jstart ; j < jend ; j++)
152             {
153 
154                 //--------------------------------------------------------------
155                 // B is bitmap or full: check if any B(K,j) entry exists
156                 //--------------------------------------------------------------
157 
158                 if (B_is_bitmap)
159                 {
160                     int b = 0 ;
161                     for (int64_t k = kstart ; k < kend ; k++)
162                     {
163                         int64_t pB = k + j * bvlen ;    // pointer to B(k,j)
164                         b += Bb [pB] ;
165                     }
166                     if (b == 0)
167                     {
168                         // no entry exists in B(K,j)
169                         continue ;
170                     }
171                 }
172 
173                 //--------------------------------------------------------------
174                 // C<#M>(I,j) += A(I,K) * B(K,j)
175                 //--------------------------------------------------------------
176 
177                 GB_GET_T_FOR_SECONDJ ;
178 
179                 for (int64_t i = istart ; i < iend ; i++)
180                 {
181 
182                     //----------------------------------------------------------
183                     // skip if A(i,K) has no entries
184                     //----------------------------------------------------------
185 
186                     if (A_is_bitmap && !Ab_any_in_row [i - istart])
187                     {
188                         continue ;
189                     }
190 
191                     //----------------------------------------------------------
192                     // get C(i,j)
193                     //----------------------------------------------------------
194 
195                     int64_t pC = i + j * avlen ;
196 
197                     //----------------------------------------------------------
198                     // check M(i,j)
199                     //----------------------------------------------------------
200 
201                     #if GB_MASK_IS_SPARSE_OR_HYPER
202 
203                         // M is sparse or hypersparse
204                         int8_t cb = Cb [pC] ;
205                         bool mij = (cb & 2) ;
206                         if (Mask_comp) mij = !mij ;
207                         if (!mij) continue ;
208                         cb = (cb & 1) ;
209 
210                     #elif GB_MASK_IS_BITMAP_OR_FULL
211 
212                         // M is bitmap or full
213                         GB_GET_M_ij (pC) ;
214                         if (Mask_comp) mij = !mij ;
215                         if (!mij) continue ;
216                         int8_t cb = Cb [pC] ;
217 
218                     #else
219 
220                         // no mask
221                         int8_t cb = Cb [pC] ;
222 
223                     #endif
224 
225                     //----------------------------------------------------------
226                     // C(i,j) += A(i,K) * B(K,j)
227                     //----------------------------------------------------------
228 
229                     if (cb == 0)
230                     {
231 
232                         //------------------------------------------------------
233                         // C(i,j) does not yet exist
234                         //------------------------------------------------------
235 
236                         for (int64_t k = kstart ; k < kend ; k++)
237                         {
238                             int64_t pA = i + k * avlen ;    // pointer to A(i,k)
239                             int64_t pB = k + j * bvlen ;    // pointer to B(k,j)
240                             if (!GBB (Ab, pA)) continue ;
241                             if (!GBB (Bb, pB)) continue ;
242                             GB_GET_B_kj ;                   // get B(k,j)
243                             GB_MULT_A_ik_B_kj ;             // t = A(i,k)*B(k,j)
244                             if (cb == 0)
245                             {
246                                 // C(i,j) = A(i,k) * B(k,j)
247                                 GB_CIJ_WRITE (pC, t) ;
248                                 Cb [pC] = keep ;
249                                 cb = keep ;
250                                 task_cnvals++ ;
251                             }
252                             else
253                             {
254                                 // C(i,j) += A(i,k) * B(k,j)
255                                 GB_CIJ_UPDATE (pC, t) ;
256                             }
257                         }
258 
259                     }
260                     else
261                     {
262 
263                         //------------------------------------------------------
264                         // C(i,j) already exists
265                         //------------------------------------------------------
266 
267                         #if !GB_IS_ANY_PAIR_SEMIRING
268                         for (int64_t k = kstart ; k < kend ; k++)
269                         {
270                             int64_t pA = i + k * avlen ;    // pointer to A(i,k)
271                             int64_t pB = k + j * bvlen ;    // pointer to B(k,j)
272                             if (!GBB (Ab, pA)) continue ;
273                             if (!GBB (Bb, pB)) continue ;
274                             GB_GET_B_kj ;                   // get B(k,j)
275                             GB_MULT_A_ik_B_kj ;             // t = A(i,k)*B(k,j)
276                             // C(i,j) += A(i,k) * B(k,j)
277                             GB_CIJ_UPDATE (pC, t) ;
278                         }
279                         #endif
280                     }
281                 }
282             }
283         }
284         cnvals += task_cnvals ;
285     }
286 }
287 
288