1 //------------------------------------------------------------------------------ 2 // GB_bitmap_AxB_saxpy_template.c: C<#M>+=A*B when C is bitmap 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // GB_AxB_saxpy_sparsity determines the sparsity structure for C<M or !M>=A*B 11 // or C=A*B, and this template is used when C is bitmap. C can be modified 12 // in-place if the accum operator is the same as the monoid. 13 14 #undef GB_FREE_ALL 15 #define GB_FREE_ALL \ 16 { \ 17 GB_FREE_WERK (&Wf, Wf_size) ; \ 18 GB_FREE_WERK (&Wax, Wax_size) ; \ 19 GB_FREE_WERK (&Wbx, Wbx_size) ; \ 20 GB_FREE_WERK (&Wcx, Wcx_size) ; \ 21 GB_WERK_POP (GH_slice, int64_t) ; \ 22 GB_WERK_POP (A_slice, int64_t) ; \ 23 GB_WERK_POP (B_slice, int64_t) ; \ 24 GB_WERK_POP (M_ek_slicing, int64_t) ; \ 25 } 26 27 { 28 29 //-------------------------------------------------------------------------- 30 // declare workspace 31 //-------------------------------------------------------------------------- 32 33 int8_t *restrict Wf = NULL ; size_t Wf_size = 0 ; 34 GB_void *restrict Wax = NULL ; size_t Wax_size = 0 ; 35 GB_void *restrict Wbx = NULL ; size_t Wbx_size = 0 ; 36 GB_void *restrict Wcx = NULL ; size_t Wcx_size = 0 ; 37 GB_WERK_DECLARE (GH_slice, int64_t) ; 38 GB_WERK_DECLARE (A_slice, int64_t) ; 39 GB_WERK_DECLARE (B_slice, int64_t) ; 40 GB_WERK_DECLARE (M_ek_slicing, int64_t) ; 41 42 //-------------------------------------------------------------------------- 43 // determine max # of threads to use 44 //-------------------------------------------------------------------------- 45 46 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ; 47 48 //-------------------------------------------------------------------------- 49 // get C, M, A, and B 50 //-------------------------------------------------------------------------- 51 52 ASSERT (GB_IS_BITMAP (C)) ; // C is always bitmap 53 int8_t *restrict Cb = C->b ; 54 GB_CTYPE *restrict Cx = (GB_CTYPE *) C->x ; 55 const int64_t cvlen = C->vlen ; 56 ASSERT (C->vlen == A->vlen) ; 57 ASSERT (C->vdim == B->vdim) ; 58 ASSERT (A->vdim == B->vlen) ; 59 int64_t cnvals = C->nvals ; 60 61 const int64_t *restrict Bp = B->p ; 62 const int64_t *restrict Bh = B->h ; 63 const int8_t *restrict Bb = B->b ; 64 const int64_t *restrict Bi = B->i ; 65 const GB_BTYPE *restrict Bx = (GB_BTYPE *) (B_is_pattern ? NULL : B->x) ; 66 const int64_t bvlen = B->vlen ; 67 const int64_t bvdim = B->vdim ; 68 const int64_t bnvec = B->nvec ; 69 70 const bool B_jumbled = B->jumbled ; 71 const int64_t bnz = GB_NNZ_HELD (B) ; 72 73 const bool B_is_sparse = GB_IS_SPARSE (B) ; 74 const bool B_is_hyper = GB_IS_HYPERSPARSE (B) ; 75 const bool B_is_bitmap = GB_IS_BITMAP (B) ; 76 const bool B_is_sparse_or_hyper = B_is_sparse || B_is_hyper ; 77 78 const int64_t *restrict Ap = A->p ; 79 const int64_t *restrict Ah = A->h ; 80 const int8_t *restrict Ab = A->b ; 81 const int64_t *restrict Ai = A->i ; 82 const GB_ATYPE *restrict Ax = (GB_ATYPE *) (A_is_pattern ? NULL : A->x) ; 83 const int64_t anvec = A->nvec ; 84 const int64_t avlen = A->vlen ; 85 const int64_t avdim = A->vdim ; 86 87 const bool A_jumbled = A->jumbled ; 88 const int64_t anz = GB_NNZ_HELD (A) ; 89 90 const bool A_is_sparse = GB_IS_SPARSE (A) ; 91 const bool A_is_hyper = GB_IS_HYPERSPARSE (A) ; 92 const bool A_is_bitmap = GB_IS_BITMAP (A) ; 93 const bool A_is_sparse_or_hyper = A_is_sparse || A_is_hyper ; 94 95 const int64_t *restrict Mp = NULL ; 96 const int64_t *restrict Mh = NULL ; 97 const int8_t *restrict Mb = NULL ; 98 const int64_t *restrict Mi = NULL ; 99 const GB_void *restrict Mx = NULL ; 100 size_t msize = 0 ; 101 int64_t mnvec = 0 ; 102 int64_t mvlen = 0 ; 103 const bool M_is_hyper = GB_IS_HYPERSPARSE (M) ; 104 const bool M_is_sparse = GB_IS_SPARSE (M) ; 105 const bool M_is_sparse_or_hyper = M_is_hyper || M_is_sparse ; 106 const bool M_is_bitmap = GB_IS_BITMAP (M) ; 107 const bool M_is_full = GB_IS_FULL (M) ; 108 int M_nthreads = 0 ; 109 int M_ntasks = 0 ; 110 111 if (M != NULL) 112 { 113 ASSERT (C->vlen == M->vlen) ; 114 ASSERT (C->vdim == M->vdim) ; 115 Mp = M->p ; 116 Mh = M->h ; 117 Mb = M->b ; 118 Mi = M->i ; 119 Mx = (GB_void *) (Mask_struct ? NULL : (M->x)) ; 120 msize = M->type->size ; 121 mnvec = M->nvec ; 122 mvlen = M->vlen ; 123 124 GB_SLICE_MATRIX (M, 8, chunk) ; 125 126 // if M is sparse or hypersparse, scatter it into the C bitmap 127 if (M_is_sparse_or_hyper) 128 { 129 // Cb [pC] += 2 for each entry M(i,j) in the mask 130 GB_bitmap_M_scatter (C, 131 NULL, 0, GB_ALL, NULL, NULL, 0, GB_ALL, NULL, 132 M, Mask_struct, GB_ASSIGN, GB_BITMAP_M_SCATTER_PLUS_2, 133 M_ek_slicing, M_ntasks, M_nthreads, Context) ; 134 // the bitmap of C now contains: 135 // Cb (i,j) = 0: cij not present, mij zero 136 // Cb (i,j) = 1: cij present, mij zero 137 // Cb (i,j) = 2: cij not present, mij 1 138 // Cb (i,j) = 3: cij present, mij 1 139 } 140 } 141 142 //-------------------------------------------------------------------------- 143 // select the method 144 //-------------------------------------------------------------------------- 145 146 if (B_is_sparse_or_hyper) 147 { 148 149 //----------------------------------------------------- 150 // C = A * B 151 //----------------------------------------------------- 152 153 // bitmap . bitmap hyper 154 // bitmap . full hyper 155 // bitmap . bitmap sparse 156 // bitmap . full sparse 157 158 //----------------------------------------------------- 159 // C <M>= A * B 160 //----------------------------------------------------- 161 162 // bitmap any bitmap hyper 163 // bitmap any full hyper 164 // bitmap any bitmap sparse 165 // bitmap any full sparse 166 167 //----------------------------------------------------- 168 // C <!M>= A * B 169 //----------------------------------------------------- 170 171 // bitmap any bitmap hyper 172 // bitmap any full hyper 173 // bitmap any bitmap sparse 174 // bitmap any full sparse 175 176 #undef GB_PANEL_SIZE 177 #define GB_PANEL_SIZE 64 178 179 ASSERT (GB_IS_BITMAP (A) || GB_IS_FULL (A)) ; 180 double work = ((double) avlen) * ((double) bnz) ; 181 int nthreads = GB_nthreads (work, chunk, nthreads_max) ; 182 int naslice, nbslice, ntasks ; 183 184 if (nthreads == 1 || bnvec == 0) 185 { 186 // do the entire computation with a single thread 187 naslice = 1 ; 188 nbslice = 1 ; 189 } 190 else 191 { 192 // determine number of slices for A and B 193 ntasks = 2 * nthreads ; 194 int naslice_max = GB_ICEIL (avlen, GB_PANEL_SIZE) ; 195 int dtasks = floor (sqrt ((double) ntasks)) ; 196 naslice = GB_IMIN (dtasks, naslice_max) ; 197 naslice = GB_IMAX (naslice, 1) ; 198 nbslice = ntasks / naslice ; 199 nbslice = GB_IMIN (nbslice, bnvec) ; 200 if (nbslice > bnvec) 201 { 202 // too few vectors of B; recompute nbslice and naslice 203 nbslice = bnvec ; 204 naslice = ntasks / nbslice ; 205 naslice = GB_IMIN (naslice, naslice_max) ; 206 naslice = GB_IMAX (naslice, 1) ; 207 } 208 } 209 210 ntasks = naslice * nbslice ; 211 212 // slice the matrix B 213 GB_WERK_PUSH (B_slice, nbslice + 1, int64_t) ; 214 if (B_slice == NULL) 215 { 216 // out of memory 217 GB_FREE_ALL ; 218 return (GrB_OUT_OF_MEMORY) ; 219 } 220 GB_pslice (B_slice, Bp, bnvec, nbslice, false) ; 221 222 if (M == NULL) 223 { 224 225 //------------------------------------------------------------------ 226 // C = A*B, no mask, A bitmap/full, B sparse/hyper 227 //------------------------------------------------------------------ 228 229 #define GB_MASK_IS_SPARSE_OR_HYPER 0 230 #define GB_MASK_IS_BITMAP_OR_FULL 0 231 #undef keep 232 #define keep 1 233 if (A_is_bitmap) 234 { 235 // A is bitmap, B is sparse/hyper, no mask 236 #undef GB_A_IS_BITMAP 237 #define GB_A_IS_BITMAP 1 238 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 239 } 240 else 241 { 242 // A is full, B is sparse/hyper, no mask 243 #undef GB_A_IS_BITMAP 244 #define GB_A_IS_BITMAP 0 245 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 246 } 247 #undef GB_MASK_IS_SPARSE_OR_HYPER 248 #undef GB_MASK_IS_BITMAP_OR_FULL 249 250 } 251 else if (M_is_sparse_or_hyper) 252 { 253 254 //------------------------------------------------------------------ 255 // C<M> or <!M>=A*B, M sparse/hyper, A bitmap/full, B sparse/hyper 256 //------------------------------------------------------------------ 257 258 #define GB_MASK_IS_SPARSE_OR_HYPER 1 259 #define GB_MASK_IS_BITMAP_OR_FULL 0 260 #undef keep 261 const int8_t keep = (Mask_comp) ? 1 : 3 ; 262 if (A_is_bitmap) 263 { 264 // A is bitmap, M and B are sparse/hyper 265 #undef GB_A_IS_BITMAP 266 #define GB_A_IS_BITMAP 1 267 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 268 } 269 else 270 { 271 // A is full, M and B are sparse/hyper 272 #undef GB_A_IS_BITMAP 273 #define GB_A_IS_BITMAP 0 274 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 275 } 276 #undef GB_MASK_IS_SPARSE_OR_HYPER 277 #undef GB_MASK_IS_BITMAP_OR_FULL 278 279 } 280 else 281 { 282 283 //------------------------------------------------------------------ 284 // C<M> or <!M> = A*B, M bitmap/full, A bitmap/full, B sparse/hyper 285 //------------------------------------------------------------------ 286 287 #define GB_MASK_IS_SPARSE_OR_HYPER 0 288 #define GB_MASK_IS_BITMAP_OR_FULL 1 289 #undef keep 290 #define keep 1 291 if (A_is_bitmap) 292 { 293 // A is bitmap, M is bitmap/full, B is sparse/hyper 294 #undef GB_A_IS_BITMAP 295 #define GB_A_IS_BITMAP 1 296 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 297 } 298 else 299 { 300 // A is full, M is bitmap/full, B is sparse/hyper 301 #undef GB_A_IS_BITMAP 302 #define GB_A_IS_BITMAP 0 303 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_sparse_template.c" 304 } 305 #undef GB_MASK_IS_SPARSE_OR_HYPER 306 #undef GB_MASK_IS_BITMAP_OR_FULL 307 } 308 309 #undef GB_PANEL_SIZE 310 #undef GB_A_IS_BITMAP 311 312 } 313 else if (A_is_sparse_or_hyper) 314 { 315 316 //----------------------------------------------------- 317 // C = A * B 318 //----------------------------------------------------- 319 320 // bitmap . hyper bitmap 321 // bitmap . sparse bitmap 322 // bitmap . hyper full 323 // bitmap . sparse full 324 325 //----------------------------------------------------- 326 // C <M>= A * B 327 //----------------------------------------------------- 328 329 // bitmap any hyper bitmap 330 // bitmap any sparse bitmap 331 // bitmap bitmap/full hyper full 332 // bitmap bitmap/full sparse full 333 334 //----------------------------------------------------- 335 // C <!M>= A * B 336 //----------------------------------------------------- 337 338 // bitmap any hyper bitmap 339 // bitmap any sparse bitmap 340 // bitmap any hyper full 341 // bitmap any sparse full 342 343 ASSERT (GB_IS_BITMAP (B) || GB_IS_FULL (B)) ; 344 double work = ((double) anz) * (double) bvdim ; 345 int nthreads = GB_nthreads (work, chunk, nthreads_max) ; 346 int nfine_tasks_per_vector = 0, ntasks ; 347 bool use_coarse_tasks, use_atomics = false ; 348 349 if (nthreads == 1 || bvdim == 0) 350 { 351 // do the entire computation with a single thread, with coarse task 352 ntasks = 1 ; 353 use_coarse_tasks = true ; 354 GBURBLE ("(coarse, threads: 1) ") ; 355 } 356 else if (nthreads <= bvdim) 357 { 358 // All tasks are coarse, and each coarse task does 1 or more 359 // whole vectors of B 360 ntasks = GB_IMIN (bvdim, 2 * nthreads) ; 361 use_coarse_tasks = true ; 362 GBURBLE ("(coarse, threads: %d, tasks %d) ", nthreads, ntasks) ; 363 } 364 else 365 { 366 // All tasks are fine. Each task does a slice of a single vector 367 // of B, and each vector of B is handled by the same # of fine 368 // tasks. 369 use_coarse_tasks = false ; 370 371 // Select between a non-atomic method with Wf/Wx workspace, 372 // and an atomic method with no workspace. 373 double cnz = ((double) cvlen) * ((double) bvdim) ; 374 double intensity = ((double) work) / fmax (cnz, 1) ; 375 double workspace = ((double) cvlen) * ((double) nthreads) ; 376 double relwspace = workspace / fmax (anz + bnz + cnz, 1) ; 377 GBURBLE ("(fine, threads: %d, relwspace: %0.3g, intensity: %0.3g", 378 nthreads, relwspace, intensity) ; 379 if ((intensity > 64 && relwspace < 0.05) || 380 (intensity > 16 && intensity <= 64 && relwspace < 0.50)) 381 { 382 // non-atomic method with workspace 383 use_atomics = false ; 384 ntasks = nthreads ; 385 GBURBLE (": non-atomic) ") ; 386 } 387 else 388 { 389 // atomic method 390 use_atomics = true ; 391 ntasks = 2 * nthreads ; 392 GBURBLE (": atomic) ") ; 393 } 394 395 nfine_tasks_per_vector = ceil ((double) ntasks / (double) bvdim) ; 396 ntasks = bvdim * nfine_tasks_per_vector ; 397 ASSERT (nfine_tasks_per_vector > 1) ; 398 399 // slice the matrix A for each team of fine tasks 400 GB_WERK_PUSH (A_slice, nfine_tasks_per_vector + 1, int64_t) ; 401 if (A_slice == NULL) 402 { 403 // out of memory 404 GB_FREE_ALL ; 405 return (GrB_OUT_OF_MEMORY) ; 406 } 407 GB_pslice (A_slice, Ap, anvec, nfine_tasks_per_vector, true) ; 408 } 409 410 if (M == NULL) 411 { 412 413 //------------------------------------------------------------------ 414 // C = A*B, no mask, A sparse/hyper, B bitmap/full 415 //------------------------------------------------------------------ 416 417 #define GB_MASK_IS_SPARSE_OR_HYPER 0 418 #define GB_MASK_IS_BITMAP_OR_FULL 0 419 #undef keep 420 #define keep 1 421 if (B_is_bitmap) 422 { 423 // A is sparse/hyper, B is bitmap, no mask 424 #undef GB_B_IS_BITMAP 425 #define GB_B_IS_BITMAP 1 426 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 427 } 428 else 429 { 430 // A is sparse/hyper, B is full, no mask 431 #undef GB_B_IS_BITMAP 432 #define GB_B_IS_BITMAP 0 433 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 434 } 435 #undef GB_MASK_IS_SPARSE_OR_HYPER 436 #undef GB_MASK_IS_BITMAP_OR_FULL 437 438 } 439 else if (M_is_sparse_or_hyper) 440 { 441 442 //------------------------------------------------------------------ 443 // C<M> or <!M> = A*B, M and A are sparse/hyper, B bitmap/full 444 //------------------------------------------------------------------ 445 446 #define GB_MASK_IS_SPARSE_OR_HYPER 1 447 #define GB_MASK_IS_BITMAP_OR_FULL 0 448 #undef keep 449 const int8_t keep = (Mask_comp) ? 1 : 3 ; 450 if (B_is_bitmap) 451 { 452 // A is sparse/hyper, B is bitmap, M is sparse/hyper 453 #undef GB_B_IS_BITMAP 454 #define GB_B_IS_BITMAP 1 455 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 456 } 457 else 458 { 459 // A is sparse/hyper, B is full, no mask 460 #undef GB_B_IS_BITMAP 461 #define GB_B_IS_BITMAP 0 462 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 463 } 464 #undef GB_MASK_IS_SPARSE_OR_HYPER 465 #undef GB_MASK_IS_BITMAP_OR_FULL 466 467 } 468 else 469 { 470 471 //------------------------------------------------------------------ 472 // C<M> or <!M> = A*B, M bitmap, A sparse, B bitmap 473 //------------------------------------------------------------------ 474 475 #define GB_MASK_IS_SPARSE_OR_HYPER 0 476 #define GB_MASK_IS_BITMAP_OR_FULL 1 477 #undef keep 478 #define keep 1 479 if (B_is_bitmap) 480 { 481 // A is sparse/hyper, B is bitmap, M is bitmap/full 482 #undef GB_B_IS_BITMAP 483 #define GB_B_IS_BITMAP 1 484 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 485 } 486 else 487 { 488 // A is sparse/hyper, B is full, M is bitmap/full 489 #undef GB_B_IS_BITMAP 490 #define GB_B_IS_BITMAP 0 491 #include "GB_bitmap_AxB_saxpy_A_sparse_B_bitmap_template.c" 492 } 493 #undef GB_MASK_IS_SPARSE_OR_HYPER 494 #undef GB_MASK_IS_BITMAP_OR_FULL 495 } 496 497 #undef GB_B_IS_BITMAP 498 499 } 500 else 501 { 502 503 //----------------------------------------------------- 504 // C = A * B 505 //----------------------------------------------------- 506 507 // bitmap . bitmap bitmap 508 // bitmap . full bitmap 509 // bitmap . bitmap full 510 // full . full full 511 512 //----------------------------------------------------- 513 // C <M>= A * B 514 //----------------------------------------------------- 515 516 // bitmap any bitmap bitmap 517 // bitmap any full bitmap 518 // bitmap bitmap/full bitmap full 519 // bitmap bitmap/full full full 520 521 //----------------------------------------------------- 522 // C <!M>= A * B 523 //----------------------------------------------------- 524 525 // bitmap any bitmap bitmap 526 // bitmap any full bitmap 527 // bitmap any bitmap full 528 // bitmap any full full 529 530 #define GB_TILE_SIZE 64 531 #define GB_KTILE_SIZE 8 532 533 double work = ((double) avlen) * ((double) bvlen) * ((double) bvdim) ; 534 int nthreads = GB_nthreads (work, chunk, nthreads_max) ; 535 int64_t nI_tasks = (bvdim == 0) ? 1 : (1 + (bvdim-1) / GB_TILE_SIZE) ; 536 int64_t nJ_tasks = (avlen == 0) ? 1 : (1 + (avlen-1) / GB_TILE_SIZE) ; 537 int64_t ntasks = nI_tasks * nJ_tasks ; 538 539 if (M == NULL) 540 { 541 542 //------------------------------------------------------------------ 543 // C = A*B, no mask, A and B bitmap/full 544 //------------------------------------------------------------------ 545 546 #define GB_MASK_IS_SPARSE_OR_HYPER 0 547 #define GB_MASK_IS_BITMAP_OR_FULL 0 548 #undef keep 549 #define keep 1 550 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_bitmap_template.c" 551 #undef GB_MASK_IS_SPARSE_OR_HYPER 552 #undef GB_MASK_IS_BITMAP_OR_FULL 553 554 } 555 else if (M_is_sparse_or_hyper) 556 { 557 558 //------------------------------------------------------------------ 559 // C<M> or <!M> = A*B, M sparse/hyper, A and B bitmap/full 560 //------------------------------------------------------------------ 561 562 #define GB_MASK_IS_SPARSE_OR_HYPER 1 563 #define GB_MASK_IS_BITMAP_OR_FULL 0 564 #undef keep 565 const int8_t keep = (Mask_comp) ? 1 : 3 ; 566 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_bitmap_template.c" 567 #undef GB_MASK_IS_SPARSE_OR_HYPER 568 #undef GB_MASK_IS_BITMAP_OR_FULL 569 570 } 571 else 572 { 573 574 //------------------------------------------------------------------ 575 // C<M> or <!M> = A*B, all matrices bitmap/full 576 //------------------------------------------------------------------ 577 578 #define GB_MASK_IS_SPARSE_OR_HYPER 0 579 #define GB_MASK_IS_BITMAP_OR_FULL 1 580 #undef keep 581 #define keep 1 582 #include "GB_bitmap_AxB_saxpy_A_bitmap_B_bitmap_template.c" 583 #undef GB_MASK_IS_SPARSE_OR_HYPER 584 #undef GB_MASK_IS_BITMAP_OR_FULL 585 } 586 } 587 588 C->nvals = cnvals ; 589 590 //-------------------------------------------------------------------------- 591 // if M is sparse, clear it from the C bitmap 592 //-------------------------------------------------------------------------- 593 594 if (M_is_sparse_or_hyper) 595 { 596 // Cb [pC] -= 2 for each entry M(i,j) in the mask 597 GB_bitmap_M_scatter (C, 598 NULL, 0, GB_ALL, NULL, NULL, 0, GB_ALL, NULL, 599 M, Mask_struct, GB_ASSIGN, GB_BITMAP_M_SCATTER_MINUS_2, 600 M_ek_slicing, M_ntasks, M_nthreads, Context) ; 601 } 602 603 //-------------------------------------------------------------------------- 604 // free workspace 605 //-------------------------------------------------------------------------- 606 607 GB_FREE_ALL ; 608 } 609 610 #undef GB_FREE_ALL 611 612