1 //------------------------------------------------------------------------------ 2 // GB_dense_subassign_06d_template: C<A> = A where C is dense or bitmap 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 12 //-------------------------------------------------------------------------- 13 // get C and A 14 //-------------------------------------------------------------------------- 15 16 ASSERT (!GB_ZOMBIES (A)) ; 17 ASSERT (GB_JUMBLED_OK (A)) ; 18 ASSERT (!GB_PENDING (A)) ; 19 20 const int64_t *restrict Ap = A->p ; 21 const int64_t *restrict Ah = A->h ; 22 const int64_t *restrict Ai = A->i ; 23 const int8_t *restrict Ab = A->b ; 24 const GB_ATYPE *restrict Ax = (GB_ATYPE *) A->x ; 25 const int64_t avlen = A->vlen ; 26 const bool A_is_bitmap = GB_IS_BITMAP (A) ; 27 const bool A_is_dense = GB_as_if_full (A) ; 28 const int64_t anz = GB_NNZ_HELD (A) ; 29 30 GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ; 31 int8_t *restrict Cb = C->b ; 32 const int64_t cvlen = C->vlen ; 33 const bool C_is_bitmap = GB_IS_BITMAP (C) ; 34 35 //-------------------------------------------------------------------------- 36 // C<A> = A 37 //-------------------------------------------------------------------------- 38 39 int64_t cnvals = C->nvals ; // for C bitmap 40 41 if (A_is_dense) 42 { 43 44 //---------------------------------------------------------------------- 45 // A is dense: all entries present 46 //---------------------------------------------------------------------- 47 48 if (C_is_bitmap) 49 { 50 51 //------------------------------------------------------------------ 52 // C is bitmap, A is dense 53 //------------------------------------------------------------------ 54 55 if (Mask_struct) 56 { 57 // C<A,struct>=A with C bitmap, A dense 58 int64_t p ; 59 #pragma omp parallel for num_threads(A_nthreads) \ 60 schedule(static) 61 for (p = 0 ; p < anz ; p++) 62 { 63 // Cx [p] = Ax [p] 64 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 65 } 66 GB_memset (Cb, 1, anz, A_nthreads) ; 67 cnvals = anz ; 68 } 69 else 70 { 71 // C<A>=A with C bitmap, A dense 72 int tid ; 73 #pragma omp parallel for num_threads(A_nthreads) \ 74 schedule(static) reduction(+:cnvals) 75 for (tid = 0 ; tid < A_nthreads ; tid++) 76 { 77 int64_t pA_start, pA_end, task_cnvals = 0 ; 78 GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ; 79 for (int64_t p = pA_start ; p < pA_end ; p++) 80 { 81 if (GB_AX_MASK (Ax, p, asize)) 82 { 83 // Cx [p] = Ax [p] 84 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 85 task_cnvals += (Cb [p] == 0) ; 86 Cb [p] = 1 ; 87 } 88 } 89 cnvals += task_cnvals ; 90 } 91 } 92 93 } 94 else 95 { 96 97 //------------------------------------------------------------------ 98 // C is hypersparse, sparse, or full, with all entries present 99 //------------------------------------------------------------------ 100 101 if (Mask_struct) 102 { 103 // C<A,struct>=A with C sparse/hyper/full 104 int64_t p ; 105 #pragma omp parallel for num_threads(A_nthreads) \ 106 schedule(static) 107 for (p = 0 ; p < anz ; p++) 108 { 109 // Cx [p] = Ax [p] 110 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 111 } 112 } 113 else 114 { 115 // C<A>=A with C sparse/hyper/full 116 int64_t p ; 117 #pragma omp parallel for num_threads(A_nthreads) \ 118 schedule(static) 119 for (p = 0 ; p < anz ; p++) 120 { 121 if (GB_AX_MASK (Ax, p, asize)) 122 { 123 // Cx [p] = Ax [p] 124 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 125 } 126 } 127 } 128 } 129 130 } 131 else if (A_is_bitmap) 132 { 133 //---------------------------------------------------------------------- 134 // A is bitmap 135 //---------------------------------------------------------------------- 136 137 if (C_is_bitmap) 138 { 139 140 //------------------------------------------------------------------ 141 // C is bitmap, A is bitmap 142 //------------------------------------------------------------------ 143 144 if (Mask_struct) 145 { 146 // C<A,struct>=A with A and C bitmap 147 int tid ; 148 #pragma omp parallel for num_threads(A_nthreads) \ 149 schedule(static) reduction(+:cnvals) 150 for (tid = 0 ; tid < A_nthreads ; tid++) 151 { 152 int64_t pA_start, pA_end, task_cnvals = 0 ; 153 GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ; 154 for (int64_t p = pA_start ; p < pA_end ; p++) 155 { 156 if (Ab [p]) 157 { 158 // Cx [p] = Ax [p] 159 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 160 task_cnvals += (Cb [p] == 0) ; 161 Cb [p] = 1 ; 162 } 163 } 164 cnvals += task_cnvals ; 165 } 166 167 } 168 else 169 { 170 // C<A>=A with A and C bitmap 171 int tid ; 172 #pragma omp parallel for num_threads(A_nthreads) \ 173 schedule(static) reduction(+:cnvals) 174 for (tid = 0 ; tid < A_nthreads ; tid++) 175 { 176 int64_t pA_start, pA_end, task_cnvals = 0 ; 177 GB_PARTITION (pA_start, pA_end, anz, tid, A_nthreads) ; 178 for (int64_t p = pA_start ; p < pA_end ; p++) 179 { 180 if (Ab [p] && GB_AX_MASK (Ax, p, asize)) 181 { 182 // Cx [p] = Ax [p] 183 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 184 task_cnvals += (Cb [p] == 0) ; 185 Cb [p] = 1 ; 186 } 187 } 188 cnvals += task_cnvals ; 189 } 190 } 191 192 } 193 else 194 { 195 196 //------------------------------------------------------------------ 197 // C is hypersparse, sparse, or full, with all entries present 198 //------------------------------------------------------------------ 199 200 if (Mask_struct) 201 { 202 // C<A,struct>=A with A bitmap, and C hyper/sparse/full 203 // this method is used by LAGraph_bfs_parent when q is 204 // a bitmap and pi is full. 205 int64_t p ; 206 #pragma omp parallel for num_threads(A_nthreads) \ 207 schedule(static) 208 for (p = 0 ; p < anz ; p++) 209 { 210 // Cx [p] = Ax [p] 211 if (Ab [p]) 212 { 213 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 214 } 215 } 216 } 217 else 218 { 219 // C<A>=A with A bitmap, and C hyper/sparse/full 220 int64_t p ; 221 #pragma omp parallel for num_threads(A_nthreads) \ 222 schedule(static) 223 for (p = 0 ; p < anz ; p++) 224 { 225 if (Ab [p] && GB_AX_MASK (Ax, p, asize)) 226 { 227 // Cx [p] = Ax [p] 228 GB_COPY_A_TO_C (Cx, p, Ax, p) ; 229 } 230 } 231 } 232 } 233 234 } 235 else 236 { 237 238 //---------------------------------------------------------------------- 239 // A is hypersparse or sparse; C is dense or a bitmap 240 //---------------------------------------------------------------------- 241 242 const int64_t *restrict kfirst_Aslice = A_ek_slicing ; 243 const int64_t *restrict klast_Aslice = A_ek_slicing + A_ntasks ; 244 const int64_t *restrict pstart_Aslice = A_ek_slicing + A_ntasks * 2 ; 245 int taskid ; 246 247 if (Mask_struct) 248 { 249 if (C_is_bitmap) 250 { 251 252 //-------------------------------------------------------------- 253 // C is bitmap, mask is structural 254 //-------------------------------------------------------------- 255 256 #pragma omp parallel for num_threads(A_nthreads) \ 257 schedule(dynamic,1) reduction(+:cnvals) 258 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 259 { 260 // if kfirst > klast then taskid does no work at all 261 int64_t kfirst = kfirst_Aslice [taskid] ; 262 int64_t klast = klast_Aslice [taskid] ; 263 int64_t task_cnvals = 0 ; 264 // C<A(:,kfirst:klast)> = A(:,kfirst:klast) 265 for (int64_t k = kfirst ; k <= klast ; k++) 266 { 267 // get A(:,j), the kth vector of A 268 int64_t j = GBH (Ah, k) ; 269 int64_t pA_start, pA_end ; 270 GB_get_pA (&pA_start, &pA_end, taskid, k, 271 kfirst, klast, pstart_Aslice, Ap, avlen) ; 272 // pC is the start of C(:,j) 273 int64_t pC = j * cvlen ; 274 // C<A(:,j),struct>=A(:,j) with C bitmap, A sparse 275 GB_PRAGMA_SIMD_REDUCTION (+,task_cnvals) 276 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 277 { 278 int64_t p = pC + Ai [pA] ; 279 // Cx [p] = Ax [pA] 280 GB_COPY_A_TO_C (Cx, p, Ax, pA) ; 281 task_cnvals += (Cb [p] == 0) ; 282 Cb [p] = 1 ; 283 } 284 } 285 cnvals += task_cnvals ; 286 } 287 288 } 289 else 290 { 291 292 //-------------------------------------------------------------- 293 // C is full, mask is structural 294 //-------------------------------------------------------------- 295 296 #pragma omp parallel for num_threads(A_nthreads) \ 297 schedule(dynamic,1) 298 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 299 { 300 // if kfirst > klast then taskid does no work at all 301 int64_t kfirst = kfirst_Aslice [taskid] ; 302 int64_t klast = klast_Aslice [taskid] ; 303 // C<A(:,kfirst:klast)> = A(:,kfirst:klast) 304 for (int64_t k = kfirst ; k <= klast ; k++) 305 { 306 // get A(:,j), the kth vector of A 307 int64_t j = GBH (Ah, k) ; 308 int64_t pA_start, pA_end ; 309 GB_get_pA (&pA_start, &pA_end, taskid, k, 310 kfirst, klast, pstart_Aslice, Ap, avlen) ; 311 // pC is the start of C(:,j) 312 int64_t pC = j * cvlen ; 313 // C<A(:,j),struct>=A(:,j) with C full, A sparse 314 GB_PRAGMA_SIMD_VECTORIZE 315 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 316 { 317 int64_t p = pC + Ai [pA] ; 318 // Cx [p] = Ax [pA] 319 GB_COPY_A_TO_C (Cx, p, Ax, pA) ; 320 } 321 } 322 } 323 } 324 325 } 326 else 327 { 328 if (C_is_bitmap) 329 { 330 331 //-------------------------------------------------------------- 332 // C is bitmap, mask is valued 333 //-------------------------------------------------------------- 334 335 #pragma omp parallel for num_threads(A_nthreads) \ 336 schedule(dynamic,1) reduction(+:cnvals) 337 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 338 { 339 // if kfirst > klast then taskid does no work at all 340 int64_t kfirst = kfirst_Aslice [taskid] ; 341 int64_t klast = klast_Aslice [taskid] ; 342 int64_t task_cnvals = 0 ; 343 // C<A(:,kfirst:klast)> = A(:,kfirst:klast) 344 for (int64_t k = kfirst ; k <= klast ; k++) 345 { 346 // get A(:,j), the kth vector of A 347 int64_t j = GBH (Ah, k) ; 348 int64_t pA_start, pA_end ; 349 GB_get_pA (&pA_start, &pA_end, taskid, k, 350 kfirst, klast, pstart_Aslice, Ap, avlen) ; 351 // pC is the start of C(:,j) 352 int64_t pC = j * cvlen ; 353 // C<A(:,j),struct>=A(:,j) with C bitmap, A sparse 354 GB_PRAGMA_SIMD_REDUCTION (+,task_cnvals) 355 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 356 { 357 if (GB_AX_MASK (Ax, pA, asize)) 358 { 359 int64_t p = pC + Ai [pA] ; 360 // Cx [p] = Ax [pA] 361 GB_COPY_A_TO_C (Cx, p, Ax, pA) ; 362 task_cnvals += (Cb [p] == 0) ; 363 Cb [p] = 1 ; 364 } 365 } 366 } 367 cnvals += task_cnvals ; 368 } 369 370 } 371 else 372 { 373 374 //-------------------------------------------------------------- 375 // C is full, mask is valued 376 //-------------------------------------------------------------- 377 378 #pragma omp parallel for num_threads(A_nthreads) \ 379 schedule(dynamic,1) reduction(+:cnvals) 380 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 381 { 382 // if kfirst > klast then taskid does no work at all 383 int64_t kfirst = kfirst_Aslice [taskid] ; 384 int64_t klast = klast_Aslice [taskid] ; 385 // C<A(:,kfirst:klast)> = A(:,kfirst:klast) 386 for (int64_t k = kfirst ; k <= klast ; k++) 387 { 388 // get A(:,j), the kth vector of A 389 int64_t j = GBH (Ah, k) ; 390 int64_t pA_start, pA_end ; 391 GB_get_pA (&pA_start, &pA_end, taskid, k, 392 kfirst, klast, pstart_Aslice, Ap, avlen) ; 393 // pC is the start of C(:,j) 394 int64_t pC = j * cvlen ; 395 // C<A(:,j),struct>=A(:,j) with C dense, A sparse 396 GB_PRAGMA_SIMD_VECTORIZE 397 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 398 { 399 if (GB_AX_MASK (Ax, pA, asize)) 400 { 401 int64_t p = pC + Ai [pA] ; 402 // Cx [p] = Ax [pA] 403 GB_COPY_A_TO_C (Cx, p, Ax, pA) ; 404 } 405 } 406 } 407 } 408 } 409 } 410 } 411 412 //-------------------------------------------------------------------------- 413 // log the number of entries in the C bitmap 414 //-------------------------------------------------------------------------- 415 416 if (C_is_bitmap) 417 { 418 C->nvals = cnvals ; 419 } 420 } 421 422