1 //------------------------------------------------------------------------------ 2 // GB_bitmap_AxB_saxpy_A_bitmap_B_bitmap: C<#M>+=A*B, C bitmap, M any format 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 12 int64_t tid ; 13 #pragma omp parallel for num_threads(nthreads) schedule(dynamic,1) \ 14 reduction(+:cnvals) 15 for (tid = 0 ; tid < ntasks ; tid++) 16 { 17 18 //---------------------------------------------------------------------- 19 // get the task to compute C (I,J) 20 //---------------------------------------------------------------------- 21 22 int64_t I_tid = tid / nI_tasks ; 23 int64_t J_tid = tid % nI_tasks ; 24 25 // I = istart:iend-1 26 int64_t istart = I_tid * GB_TILE_SIZE ; 27 int64_t iend = GB_IMIN (avlen, istart + GB_TILE_SIZE) ; 28 29 // J = jstart:jend-1 30 int64_t jstart = J_tid * GB_TILE_SIZE ; 31 int64_t jend = GB_IMIN (bvdim, jstart + GB_TILE_SIZE) ; 32 33 int64_t task_cnvals = 0 ; 34 35 //---------------------------------------------------------------------- 36 // check if any entry in the M(I,J) mask permits any change to C(I,J) 37 //---------------------------------------------------------------------- 38 39 #if GB_MASK_IS_SPARSE_OR_HYPER || GB_MASK_IS_BITMAP_OR_FULL 40 41 bool any_update_allowed = false ; 42 43 for (int64_t j = jstart ; j < jend && !any_update_allowed ; j++) 44 { 45 for (int64_t i = istart ; i < iend && !any_update_allowed ; i++) 46 { 47 48 //---------------------------------------------------------- 49 // get pointer to C(i,j) and M(i,j) 50 //---------------------------------------------------------- 51 52 int64_t pC = j * avlen + i ; 53 54 //---------------------------------------------------------- 55 // check M(i,j) 56 //---------------------------------------------------------- 57 58 #if GB_MASK_IS_SPARSE_OR_HYPER 59 60 // M is sparse or hypersparse 61 int8_t cb = Cb [pC] ; 62 bool mij = (cb & 2) ; 63 64 #elif GB_MASK_IS_BITMAP_OR_FULL 65 66 // M is bitmap or full 67 GB_GET_M_ij (pC) ; 68 69 #endif 70 71 if (Mask_comp) mij = !mij ; 72 if (!mij) continue ; 73 any_update_allowed = true ; 74 } 75 } 76 77 if (!any_update_allowed) 78 { 79 // C(I,J) cannot be modified at all; skip it 80 continue ; 81 } 82 83 #endif 84 85 //---------------------------------------------------------------------- 86 // declare local storage for this task 87 //---------------------------------------------------------------------- 88 89 // GB_ATYPE Ax_cache [GB_TILE_SIZE * GB_KTILE_SIZE] ; 90 // int8_t Ab_cache [GB_TILE_SIZE * GB_KTILE_SIZE] ; 91 bool Ab_any_in_row [GB_TILE_SIZE] ; 92 93 //---------------------------------------------------------------------- 94 // C<#M>(I,J) += A(I,:) * B(:,J) 95 //---------------------------------------------------------------------- 96 97 for (int64_t kstart = 0 ; kstart < avdim ; kstart += GB_KTILE_SIZE) 98 { 99 // K = kstart:kend-1 100 int64_t kend = GB_IMIN (avdim, kstart + GB_KTILE_SIZE) ; 101 102 //------------------------------------------------------------------ 103 // TODO: load A(I,K) into local storage 104 //------------------------------------------------------------------ 105 106 // For built-in semirings, load A(I,K) into local storage of size 107 // GB_TILE_SIZE * GB_KTILE_SIZE and transpose it. Load in the 108 // bitmap Ab if not NULL, and Ax if not NULL. 109 110 #if 0 111 for (int64_t k = kstart ; k < kend ; k++) 112 { 113 for (int64_t i = istart ; i < iend ; i++) 114 { 115 int64_t pA = i + k * avlen ; 116 int8_t ab = GBB (Ab, pA) ; 117 i_local = i - istart ; 118 k_local = k - kstart ; 119 Ab_cache [i_local * GB_KTILE_SIZE ... 120 } 121 } 122 #endif 123 124 //------------------------------------------------------------------ 125 // Check for entries in each row of A(I,K) 126 //------------------------------------------------------------------ 127 128 if (A_is_bitmap) 129 { 130 for (int i = 0 ; i < GB_TILE_SIZE ; i++) 131 { 132 Ab_any_in_row [i] = false ; 133 } 134 for (int64_t k = kstart ; k < kend ; k++) 135 { 136 for (int64_t i = istart ; i < iend ; i++) 137 { 138 int64_t pA = i + k * avlen ; // get pointer to A(i,k) 139 int8_t ab = Ab [pA] ; 140 // Ab_cache [(i-istart) * GB_KTILE_SIZE + (k-kstart)] 141 // = ab ; 142 Ab_any_in_row [i-istart] |= ab ; 143 } 144 } 145 } 146 147 //------------------------------------------------------------------ 148 // C<#M>(I,J) += A(I,K) * B(K,J) 149 //------------------------------------------------------------------ 150 151 for (int64_t j = jstart ; j < jend ; j++) 152 { 153 154 //-------------------------------------------------------------- 155 // B is bitmap or full: check if any B(K,j) entry exists 156 //-------------------------------------------------------------- 157 158 if (B_is_bitmap) 159 { 160 int b = 0 ; 161 for (int64_t k = kstart ; k < kend ; k++) 162 { 163 int64_t pB = k + j * bvlen ; // pointer to B(k,j) 164 b += Bb [pB] ; 165 } 166 if (b == 0) 167 { 168 // no entry exists in B(K,j) 169 continue ; 170 } 171 } 172 173 //-------------------------------------------------------------- 174 // C<#M>(I,j) += A(I,K) * B(K,j) 175 //-------------------------------------------------------------- 176 177 GB_GET_T_FOR_SECONDJ ; 178 179 for (int64_t i = istart ; i < iend ; i++) 180 { 181 182 //---------------------------------------------------------- 183 // skip if A(i,K) has no entries 184 //---------------------------------------------------------- 185 186 if (A_is_bitmap && !Ab_any_in_row [i - istart]) 187 { 188 continue ; 189 } 190 191 //---------------------------------------------------------- 192 // get C(i,j) 193 //---------------------------------------------------------- 194 195 int64_t pC = i + j * avlen ; 196 197 //---------------------------------------------------------- 198 // check M(i,j) 199 //---------------------------------------------------------- 200 201 #if GB_MASK_IS_SPARSE_OR_HYPER 202 203 // M is sparse or hypersparse 204 int8_t cb = Cb [pC] ; 205 bool mij = (cb & 2) ; 206 if (Mask_comp) mij = !mij ; 207 if (!mij) continue ; 208 cb = (cb & 1) ; 209 210 #elif GB_MASK_IS_BITMAP_OR_FULL 211 212 // M is bitmap or full 213 GB_GET_M_ij (pC) ; 214 if (Mask_comp) mij = !mij ; 215 if (!mij) continue ; 216 int8_t cb = Cb [pC] ; 217 218 #else 219 220 // no mask 221 int8_t cb = Cb [pC] ; 222 223 #endif 224 225 //---------------------------------------------------------- 226 // C(i,j) += A(i,K) * B(K,j) 227 //---------------------------------------------------------- 228 229 if (cb == 0) 230 { 231 232 //------------------------------------------------------ 233 // C(i,j) does not yet exist 234 //------------------------------------------------------ 235 236 for (int64_t k = kstart ; k < kend ; k++) 237 { 238 int64_t pA = i + k * avlen ; // pointer to A(i,k) 239 int64_t pB = k + j * bvlen ; // pointer to B(k,j) 240 if (!GBB (Ab, pA)) continue ; 241 if (!GBB (Bb, pB)) continue ; 242 GB_GET_B_kj ; // get B(k,j) 243 GB_MULT_A_ik_B_kj ; // t = A(i,k)*B(k,j) 244 if (cb == 0) 245 { 246 // C(i,j) = A(i,k) * B(k,j) 247 GB_CIJ_WRITE (pC, t) ; 248 Cb [pC] = keep ; 249 cb = keep ; 250 task_cnvals++ ; 251 } 252 else 253 { 254 // C(i,j) += A(i,k) * B(k,j) 255 GB_CIJ_UPDATE (pC, t) ; 256 } 257 } 258 259 } 260 else 261 { 262 263 //------------------------------------------------------ 264 // C(i,j) already exists 265 //------------------------------------------------------ 266 267 #if !GB_IS_ANY_PAIR_SEMIRING 268 for (int64_t k = kstart ; k < kend ; k++) 269 { 270 int64_t pA = i + k * avlen ; // pointer to A(i,k) 271 int64_t pB = k + j * bvlen ; // pointer to B(k,j) 272 if (!GBB (Ab, pA)) continue ; 273 if (!GBB (Bb, pB)) continue ; 274 GB_GET_B_kj ; // get B(k,j) 275 GB_MULT_A_ik_B_kj ; // t = A(i,k)*B(k,j) 276 // C(i,j) += A(i,k) * B(k,j) 277 GB_CIJ_UPDATE (pC, t) ; 278 } 279 #endif 280 } 281 } 282 } 283 } 284 cnvals += task_cnvals ; 285 } 286 } 287 288