1 //------------------------------------------------------------------------------ 2 // GB_bitmap_add_template: C = A+B, C<M>=A+B, and C<!M>=A+B, C bitmap 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // C is bitmap. The mask M can have any sparsity structure, and is efficient 11 // to apply (all methods are asymptotically optimal). All cases (no M, M, !M) 12 // are handled. 13 14 { 15 16 // TODO: the input C can be modified in-place, if it is also bitmap 17 int64_t cnvals = 0 ; 18 19 if (M == NULL) 20 { 21 22 //---------------------------------------------------------------------- 23 // M is not present 24 //---------------------------------------------------------------------- 25 26 // ------------------------------------------ 27 // C = A + B 28 // ------------------------------------------ 29 // bitmap . sparse bitmap 30 // bitmap . bitmap sparse 31 // bitmap . bitmap bitmap 32 33 ASSERT (A_is_bitmap || B_is_bitmap) ; 34 ASSERT (!A_is_full) ; 35 ASSERT (!B_is_full) ; 36 37 if (A_is_bitmap && B_is_bitmap) 38 { 39 40 //------------------------------------------------------------------ 41 // Method21: C, A, and B are all bitmap 42 //------------------------------------------------------------------ 43 44 int tid ; 45 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 46 reduction(+:cnvals) 47 for (tid = 0 ; tid < C_nthreads ; tid++) 48 { 49 int64_t pstart, pend, task_cnvals = 0 ; 50 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 51 for (int64_t p = pstart ; p < pend ; p++) 52 { 53 int8_t c = 0 ; 54 if (Ab [p] && Bb [p]) 55 { 56 // C (i,j) = A (i,j) + B (i,j) 57 GB_GETA (aij, Ax, p) ; 58 GB_GETB (bij, Bx, p) ; 59 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 60 c = 1 ; 61 } 62 else if (Bb [p]) 63 { 64 // C (i,j) = B (i,j) 65 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 66 c = 1 ; 67 } 68 else if (Ab [p]) 69 { 70 // C (i,j) = A (i,j) 71 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 72 c = 1 ; 73 } 74 Cb [p] = c ; 75 task_cnvals += c ; 76 } 77 cnvals += task_cnvals ; 78 } 79 80 } 81 else if (A_is_bitmap) 82 { 83 84 //------------------------------------------------------------------ 85 // Method22: C and A are bitmap; B is sparse or hypersparse 86 //------------------------------------------------------------------ 87 88 int64_t p ; 89 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 90 for (p = 0 ; p < cnz ; p++) 91 { 92 // C (i,j) = A (i,j) 93 int8_t a = Ab [p] ; 94 if (a) GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 95 Cb [p] = a ; 96 } 97 cnvals = A->nvals ; 98 99 GB_SLICE_MATRIX (B, 8, chunk) ; 100 101 #pragma omp parallel for num_threads(B_nthreads) \ 102 schedule(dynamic,1) reduction(+:cnvals) 103 for (taskid = 0 ; taskid < B_ntasks ; taskid++) 104 { 105 int64_t kfirst = kfirst_Bslice [taskid] ; 106 int64_t klast = klast_Bslice [taskid] ; 107 int64_t task_cnvals = 0 ; 108 for (int64_t k = kfirst ; k <= klast ; k++) 109 { 110 // find the part of B(:,k) for this task 111 int64_t j = GBH (Bh, k) ; 112 int64_t pB_start, pB_end ; 113 GB_get_pA (&pB_start, &pB_end, taskid, k, kfirst, 114 klast, pstart_Bslice, Bp, vlen) ; 115 int64_t pC_start = j * vlen ; 116 // traverse over B(:,j), the kth vector of B 117 for (int64_t pB = pB_start ; pB < pB_end ; pB++) 118 { 119 int64_t i = Bi [pB] ; 120 int64_t p = pC_start + i ; 121 if (Cb [p]) 122 { 123 // C (i,j) = A (i,j) + B (i,j) 124 GB_GETA (aij, Ax, p) ; 125 GB_GETB (bij, Bx, pB) ; 126 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 127 } 128 else 129 { 130 // C (i,j) = B (i,j) 131 GB_COPY_B_TO_C (GB_CX (p), Bx, pB) ; 132 Cb [p] = 1 ; 133 task_cnvals++ ; 134 } 135 } 136 } 137 cnvals += task_cnvals ; 138 } 139 140 } 141 else 142 { 143 144 //------------------------------------------------------------------ 145 // Method23: C and B are bitmap; A is sparse or hypersparse 146 //------------------------------------------------------------------ 147 148 int64_t p ; 149 #pragma omp parallel for num_threads(C_nthreads) schedule(static) 150 for (p = 0 ; p < cnz ; p++) 151 { 152 // C (i,j) = B (i,j) 153 int8_t b = Bb [p] ; 154 if (b) GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 155 Cb [p] = b ; 156 } 157 cnvals = B->nvals ; 158 159 GB_SLICE_MATRIX (A, 8, chunk) ; 160 161 #pragma omp parallel for num_threads(A_nthreads) \ 162 schedule(dynamic,1) reduction(+:cnvals) 163 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 164 { 165 int64_t kfirst = kfirst_Aslice [taskid] ; 166 int64_t klast = klast_Aslice [taskid] ; 167 int64_t task_cnvals = 0 ; 168 for (int64_t k = kfirst ; k <= klast ; k++) 169 { 170 // find the part of A(:,k) for this task 171 int64_t j = GBH (Ah, k) ; 172 int64_t pA_start, pA_end ; 173 GB_get_pA (&pA_start, &pA_end, taskid, k, kfirst, 174 klast, pstart_Aslice, Ap, vlen) ; 175 int64_t pC_start = j * vlen ; 176 // traverse over A(:,j), the kth vector of A 177 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 178 { 179 int64_t i = Ai [pA] ; 180 int64_t p = pC_start + i ; 181 if (Cb [p]) 182 { 183 // C (i,j) = A (i,j) + B (i,j) 184 GB_GETA (aij, Ax, pA) ; 185 GB_GETB (bij, Bx, p) ; 186 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 187 } 188 else 189 { 190 // C (i,j) = A (i,j) 191 GB_COPY_A_TO_C (GB_CX (p), Ax, pA) ; 192 Cb [p] = 1 ; 193 task_cnvals++ ; 194 } 195 } 196 } 197 cnvals += task_cnvals ; 198 } 199 } 200 201 } 202 else if (M_is_sparse_or_hyper) 203 { 204 205 //---------------------------------------------------------------------- 206 // C is bitmap, M is sparse or hyper and complemented 207 //---------------------------------------------------------------------- 208 209 // ------------------------------------------ 210 // C <!M> = A + B 211 // ------------------------------------------ 212 // bitmap sparse sparse bitmap 213 // bitmap sparse sparse full 214 // bitmap sparse bitmap sparse 215 // bitmap sparse bitmap bitmap 216 // bitmap sparse bitmap full 217 // bitmap sparse full sparse 218 // bitmap sparse full bitmap 219 // bitmap sparse full full 220 221 // M is sparse and complemented. If M is sparse and not 222 // complemented, then C is constructed as sparse, not bitmap. 223 ASSERT (Mask_comp) ; 224 225 // C(i,j) = A(i,j) + B(i,j) can only be computed where M(i,j) is 226 // not present in the sparse pattern of M, and where it is present 227 // but equal to zero. 228 229 //---------------------------------------------------------------------- 230 // scatter M into the C bitmap 231 //---------------------------------------------------------------------- 232 233 GB_SLICE_MATRIX (M, 8, chunk) ; 234 235 #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1) 236 for (taskid = 0 ; taskid < M_ntasks ; taskid++) 237 { 238 int64_t kfirst = kfirst_Mslice [taskid] ; 239 int64_t klast = klast_Mslice [taskid] ; 240 for (int64_t k = kfirst ; k <= klast ; k++) 241 { 242 // find the part of M(:,k) for this task 243 int64_t j = GBH (Mh, k) ; 244 int64_t pM_start, pM_end ; 245 GB_get_pA (&pM_start, &pM_end, taskid, k, kfirst, 246 klast, pstart_Mslice, Mp, vlen) ; 247 int64_t pC_start = j * vlen ; 248 // traverse over M(:,j), the kth vector of M 249 for (int64_t pM = pM_start ; pM < pM_end ; pM++) 250 { 251 // mark C(i,j) if M(i,j) is true 252 bool mij = GB_mcast (Mx, pM, msize) ; 253 if (mij) 254 { 255 int64_t i = Mi [pM] ; 256 int64_t p = pC_start + i ; 257 Cb [p] = 2 ; 258 } 259 } 260 } 261 } 262 263 // C(i,j) has been marked, in Cb, with the value 2 where M(i,j)=1. 264 // These positions will not be computed in C(i,j). C(i,j) can only 265 // be modified where Cb [p] is zero. 266 267 //---------------------------------------------------------------------- 268 // compute C<!M>=A+B using the mask scattered in C 269 //---------------------------------------------------------------------- 270 271 bool M_cleared = false ; 272 273 if ((A_is_bitmap || A_is_full) && (B_is_bitmap || B_is_full)) 274 { 275 276 //------------------------------------------------------------------ 277 // Method24(!M,sparse): C is bitmap, both A and B are bitmap or full 278 //------------------------------------------------------------------ 279 280 int tid ; 281 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 282 reduction(+:cnvals) 283 for (tid = 0 ; tid < C_nthreads ; tid++) 284 { 285 int64_t pstart, pend, task_cnvals = 0 ; 286 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 287 for (int64_t p = pstart ; p < pend ; p++) 288 { 289 int8_t c = Cb [p] ; 290 if (c == 0) 291 { 292 // M(i,j) is zero, so C(i,j) can be computed 293 int8_t a = GBB (Ab, p) ; 294 int8_t b = GBB (Bb, p) ; 295 if (a && b) 296 { 297 // C (i,j) = A (i,j) + B (i,j) 298 GB_GETA (aij, Ax, p) ; 299 GB_GETB (bij, Bx, p) ; 300 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 301 c = 1 ; 302 } 303 else if (b) 304 { 305 // C (i,j) = B (i,j) 306 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 307 c = 1 ; 308 } 309 else if (a) 310 { 311 // C (i,j) = A (i,j) 312 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 313 c = 1 ; 314 } 315 Cb [p] = c ; 316 task_cnvals += c ; 317 } 318 else 319 { 320 // M(i,j) == 1, so C(i,j) is not computed 321 Cb [p] = 0 ; 322 } 323 } 324 cnvals += task_cnvals ; 325 } 326 M_cleared = true ; // M has also been cleared from C 327 328 } 329 else if (A_is_bitmap || A_is_full) 330 { 331 332 //------------------------------------------------------------------ 333 // Method25(!M,sparse): C bitmap, A bitmap or full, B sparse/hyper 334 //------------------------------------------------------------------ 335 336 int tid ; 337 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 338 reduction(+:cnvals) 339 for (tid = 0 ; tid < C_nthreads ; tid++) 340 { 341 int64_t pstart, pend, task_cnvals = 0 ; 342 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 343 for (int64_t p = pstart ; p < pend ; p++) 344 { 345 if (Cb [p] == 0) 346 { 347 // C (i,j) = A (i,j) 348 int8_t a = GBB (Ab, p) ; 349 if (a) GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 350 Cb [p] = a ; 351 task_cnvals += a ; 352 } 353 } 354 cnvals += task_cnvals ; 355 } 356 357 GB_SLICE_MATRIX (B, 8, chunk) ; 358 359 #pragma omp parallel for num_threads(B_nthreads) \ 360 schedule(dynamic,1) reduction(+:cnvals) 361 for (taskid = 0 ; taskid < B_ntasks ; taskid++) 362 { 363 int64_t kfirst = kfirst_Bslice [taskid] ; 364 int64_t klast = klast_Bslice [taskid] ; 365 int64_t task_cnvals = 0 ; 366 for (int64_t k = kfirst ; k <= klast ; k++) 367 { 368 // find the part of B(:,k) for this task 369 int64_t j = GBH (Bh, k) ; 370 int64_t pB_start, pB_end ; 371 GB_get_pA (&pB_start, &pB_end, taskid, k, kfirst, 372 klast, pstart_Bslice, Bp, vlen) ; 373 int64_t pC_start = j * vlen ; 374 // traverse over B(:,j), the kth vector of B 375 for (int64_t pB = pB_start ; pB < pB_end ; pB++) 376 { 377 int64_t i = Bi [pB] ; 378 int64_t p = pC_start + i ; 379 int8_t c = Cb [p] ; 380 if (c == 1) 381 { 382 // C (i,j) = A (i,j) + B (i,j) 383 GB_GETA (aij, Ax, p) ; 384 GB_GETB (bij, Bx, pB) ; 385 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 386 } 387 else if (c == 0) 388 { 389 // C (i,j) = B (i,j) 390 GB_COPY_B_TO_C (GB_CX (p), Bx, pB) ; 391 Cb [p] = 1 ; 392 task_cnvals++ ; 393 } 394 } 395 } 396 cnvals += task_cnvals ; 397 } 398 399 } 400 else 401 { 402 403 //------------------------------------------------------------------ 404 // Method26: C bitmap, A sparse or hypersparse, B bitmap or full 405 //------------------------------------------------------------------ 406 407 int tid ; 408 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 409 reduction(+:cnvals) 410 for (tid = 0 ; tid < C_nthreads ; tid++) 411 { 412 int64_t pstart, pend, task_cnvals = 0 ; 413 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 414 for (int64_t p = pstart ; p < pend ; p++) 415 { 416 if (Cb [p] == 0) 417 { 418 // C (i,j) = B (i,j) 419 int8_t b = GBB (Bb, p) ; 420 if (b) GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 421 Cb [p] = b ; 422 task_cnvals += b ; 423 } 424 } 425 cnvals += task_cnvals ; 426 } 427 428 GB_SLICE_MATRIX (A, 8, chunk) ; 429 430 #pragma omp parallel for num_threads(A_nthreads) \ 431 schedule(dynamic,1) reduction(+:cnvals) 432 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 433 { 434 int64_t kfirst = kfirst_Aslice [taskid] ; 435 int64_t klast = klast_Aslice [taskid] ; 436 int64_t task_cnvals = 0 ; 437 for (int64_t k = kfirst ; k <= klast ; k++) 438 { 439 // find the part of A(:,k) for this task 440 int64_t j = GBH (Ah, k) ; 441 int64_t pA_start, pA_end ; 442 GB_get_pA (&pA_start, &pA_end, taskid, k, kfirst, 443 klast, pstart_Aslice, Ap, vlen) ; 444 int64_t pC_start = j * vlen ; 445 // traverse over A(:,j), the kth vector of A 446 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 447 { 448 int64_t i = Ai [pA] ; 449 int64_t p = pC_start + i ; 450 int8_t c = Cb [p] ; 451 if (c == 1) 452 { 453 // C (i,j) = A (i,j) + B (i,j) 454 GB_GETA (aij, Ax, pA) ; 455 GB_GETB (bij, Bx, p) ; 456 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 457 } 458 else if (c == 0) 459 { 460 // C (i,j) = A (i,j) 461 GB_COPY_A_TO_C (GB_CX (p), Ax, pA) ; 462 Cb [p] = 1 ; 463 task_cnvals++ ; 464 } 465 } 466 } 467 cnvals += task_cnvals ; 468 } 469 } 470 471 //--------------------------------------------------------------------- 472 // clear M from C 473 //--------------------------------------------------------------------- 474 475 if (!M_cleared) 476 { 477 // This step is required if either A or B are sparse/hyper (if 478 // one is sparse/hyper, the other must be bitmap). It requires 479 // an extra pass over the mask M, so this might be slower than 480 // postponing the application of the mask, and doing it later. 481 482 #pragma omp parallel for num_threads(M_nthreads) schedule(dynamic,1) 483 for (taskid = 0 ; taskid < M_ntasks ; taskid++) 484 { 485 int64_t kfirst = kfirst_Mslice [taskid] ; 486 int64_t klast = klast_Mslice [taskid] ; 487 for (int64_t k = kfirst ; k <= klast ; k++) 488 { 489 // find the part of M(:,k) for this task 490 int64_t j = GBH (Mh, k) ; 491 int64_t pM_start, pM_end ; 492 GB_get_pA (&pM_start, &pM_end, taskid, k, kfirst, 493 klast, pstart_Mslice, Mp, vlen) ; 494 int64_t pC_start = j * vlen ; 495 // traverse over M(:,j), the kth vector of M 496 for (int64_t pM = pM_start ; pM < pM_end ; pM++) 497 { 498 // mark C(i,j) if M(i,j) is true 499 bool mij = GB_mcast (Mx, pM, msize) ; 500 if (mij) 501 { 502 int64_t i = Mi [pM] ; 503 int64_t p = pC_start + i ; 504 Cb [p] = 0 ; 505 } 506 } 507 } 508 } 509 } 510 511 } 512 else 513 { 514 515 //---------------------------------------------------------------------- 516 // C is bitmap; M is bitmap or full 517 //---------------------------------------------------------------------- 518 519 // ------------------------------------------ 520 // C <M> = A + B 521 // ------------------------------------------ 522 // bitmap bitmap sparse bitmap 523 // bitmap bitmap sparse full 524 // bitmap bitmap bitmap sparse 525 // bitmap bitmap bitmap bitmap 526 // bitmap bitmap bitmap full 527 // bitmap bitmap full sparse 528 // bitmap bitmap full bitmap 529 // bitmap bitmap full full 530 531 // ------------------------------------------ 532 // C <M> = A + B 533 // ------------------------------------------ 534 // bitmap full sparse bitmap 535 // bitmap full sparse full 536 // bitmap full bitmap sparse 537 // bitmap full bitmap bitmap 538 // bitmap full bitmap full 539 // bitmap full full sparse 540 // bitmap full full bitmap 541 // bitmap full full full 542 543 // ------------------------------------------ 544 // C <!M> = A + B 545 // ------------------------------------------ 546 // bitmap bitmap sparse sparse 547 // bitmap bitmap sparse bitmap 548 // bitmap bitmap sparse full 549 // bitmap bitmap bitmap sparse 550 // bitmap bitmap bitmap bitmap 551 // bitmap bitmap bitmap full 552 // bitmap bitmap full sparse 553 // bitmap bitmap full bitmap 554 // bitmap bitmap full full 555 556 // ------------------------------------------ 557 // C <!M> = A + B 558 // ------------------------------------------ 559 // bitmap full sparse sparse 560 // bitmap full sparse bitmap 561 // bitmap full sparse full 562 // bitmap full bitmap sparse 563 // bitmap full bitmap bitmap 564 // bitmap full bitmap full 565 // bitmap full full sparse 566 // bitmap full full bitmap 567 // bitmap full full full 568 569 570 ASSERT (M_is_bitmap || M_is_full) ; 571 ASSERT (A_is_bitmap || A_is_full || B_is_bitmap || B_is_full) ; 572 573 #undef GB_GET_MIJ 574 #define GB_GET_MIJ(p) \ 575 bool mij = GBB (Mb, p) && GB_mcast (Mx, p, msize) ; \ 576 if (Mask_comp) mij = !mij ; 577 578 if ((A_is_bitmap || A_is_full) && (B_is_bitmap || B_is_full)) 579 { 580 581 //------------------------------------------------------------------ 582 // Method27: C is bitmap; M, A, and B are bitmap or full 583 //------------------------------------------------------------------ 584 585 int tid ; 586 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 587 reduction(+:cnvals) 588 for (tid = 0 ; tid < C_nthreads ; tid++) 589 { 590 int64_t pstart, pend, task_cnvals = 0 ; 591 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 592 for (int64_t p = pstart ; p < pend ; p++) 593 { 594 GB_GET_MIJ (p) ; 595 if (mij) 596 { 597 // M(i,j) is true, so C(i,j) can be computed 598 int8_t a = GBB (Ab, p) ; 599 int8_t b = GBB (Bb, p) ; 600 int8_t c = 0 ; 601 if (a && b) 602 { 603 // C (i,j) = A (i,j) + B (i,j) 604 GB_GETA (aij, Ax, p) ; 605 GB_GETB (bij, Bx, p) ; 606 GB_BINOP (GB_CX (p), aij, bij, p % vlen, p / vlen) ; 607 c = 1 ; 608 } 609 else if (b) 610 { 611 // C (i,j) = B (i,j) 612 GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 613 c = 1 ; 614 } 615 else if (a) 616 { 617 // C (i,j) = A (i,j) 618 GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 619 c = 1 ; 620 } 621 Cb [p] = c ; 622 task_cnvals += c ; 623 } 624 else 625 { 626 // M(i,j) == 1, so C(i,j) is not computed 627 Cb [p] = 0 ; 628 } 629 } 630 cnvals += task_cnvals ; 631 } 632 633 } 634 else if (A_is_bitmap || A_is_full) 635 { 636 637 //------------------------------------------------------------------ 638 // Method28: C bitmap; M and A bitmap or full; B sparse or hyper 639 //------------------------------------------------------------------ 640 641 int tid ; 642 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 643 reduction(+:cnvals) 644 for (tid = 0 ; tid < C_nthreads ; tid++) 645 { 646 int64_t pstart, pend, task_cnvals = 0 ; 647 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 648 for (int64_t p = pstart ; p < pend ; p++) 649 { 650 GB_GET_MIJ (p) ; 651 if (mij) 652 { 653 // C (i,j) = A (i,j) 654 int8_t a = GBB (Ab, p) ; 655 if (a) GB_COPY_A_TO_C (GB_CX (p), Ax, p) ; 656 Cb [p] = a ; 657 task_cnvals += a ; 658 } 659 else 660 { 661 Cb [p] = 0 ; 662 } 663 } 664 cnvals += task_cnvals ; 665 } 666 667 GB_SLICE_MATRIX (B, 8, chunk) ; 668 669 #pragma omp parallel for num_threads(B_nthreads) \ 670 schedule(dynamic,1) reduction(+:cnvals) 671 for (taskid = 0 ; taskid < B_ntasks ; taskid++) 672 { 673 int64_t kfirst = kfirst_Bslice [taskid] ; 674 int64_t klast = klast_Bslice [taskid] ; 675 int64_t task_cnvals = 0 ; 676 for (int64_t k = kfirst ; k <= klast ; k++) 677 { 678 // find the part of B(:,k) for this task 679 int64_t j = GBH (Bh, k) ; 680 int64_t pB_start, pB_end ; 681 GB_get_pA (&pB_start, &pB_end, taskid, k, kfirst, 682 klast, pstart_Bslice, Bp, vlen) ; 683 int64_t pC_start = j * vlen ; 684 // traverse over B(:,j), the kth vector of B 685 for (int64_t pB = pB_start ; pB < pB_end ; pB++) 686 { 687 int64_t i = Bi [pB] ; 688 int64_t p = pC_start + i ; 689 GB_GET_MIJ (p) ; 690 if (mij) 691 { 692 int8_t c = Cb [p] ; 693 if (c == 1) 694 { 695 // C (i,j) = A (i,j) + B (i,j) 696 GB_GETA (aij, Ax, p) ; 697 GB_GETB (bij, Bx, pB) ; 698 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 699 } 700 else 701 { 702 // C (i,j) = B (i,j) 703 GB_COPY_B_TO_C (GB_CX (p), Bx, pB) ; 704 Cb [p] = 1 ; 705 task_cnvals++ ; 706 } 707 } 708 } 709 } 710 cnvals += task_cnvals ; 711 } 712 713 } 714 else 715 { 716 717 //------------------------------------------------------------------ 718 // Method29: C bitmap; M and B bitmap or full; A sparse or hyper 719 //------------------------------------------------------------------ 720 721 int tid ; 722 #pragma omp parallel for num_threads(C_nthreads) schedule(static) \ 723 reduction(+:cnvals) 724 for (tid = 0 ; tid < C_nthreads ; tid++) 725 { 726 int64_t pstart, pend, task_cnvals = 0 ; 727 GB_PARTITION (pstart, pend, cnz, tid, C_nthreads) ; 728 for (int64_t p = pstart ; p < pend ; p++) 729 { 730 GB_GET_MIJ (p) ; 731 if (mij) 732 { 733 // C (i,j) = B (i,j) 734 int8_t b = GBB (Bb, p) ; 735 if (b) GB_COPY_B_TO_C (GB_CX (p), Bx, p) ; 736 Cb [p] = b ; 737 task_cnvals += b ; 738 } 739 else 740 { 741 Cb [p] = 0 ; 742 } 743 } 744 cnvals += task_cnvals ; 745 } 746 747 GB_SLICE_MATRIX (A, 8, chunk) ; 748 749 #pragma omp parallel for num_threads(A_nthreads) \ 750 schedule(dynamic,1) reduction(+:cnvals) 751 for (taskid = 0 ; taskid < A_ntasks ; taskid++) 752 { 753 int64_t kfirst = kfirst_Aslice [taskid] ; 754 int64_t klast = klast_Aslice [taskid] ; 755 int64_t task_cnvals = 0 ; 756 for (int64_t k = kfirst ; k <= klast ; k++) 757 { 758 // find the part of A(:,k) for this task 759 int64_t j = GBH (Ah, k) ; 760 int64_t pA_start, pA_end ; 761 GB_get_pA (&pA_start, &pA_end, taskid, k, kfirst, 762 klast, pstart_Aslice, Ap, vlen) ; 763 int64_t pC_start = j * vlen ; 764 // traverse over A(:,j), the kth vector of A 765 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 766 { 767 int64_t i = Ai [pA] ; 768 int64_t p = pC_start + i ; 769 GB_GET_MIJ (p) ; 770 if (mij) 771 { 772 int8_t c = Cb [p] ; 773 if (c == 1) 774 { 775 // C (i,j) = A (i,j) + B (i,j) 776 GB_GETA (aij, Ax, pA) ; 777 GB_GETB (bij, Bx, p) ; 778 GB_BINOP (GB_CX (p), aij, bij, i, j) ; 779 } 780 else 781 { 782 // C (i,j) = A (i,j) 783 GB_COPY_A_TO_C (GB_CX (p), Ax, pA) ; 784 Cb [p] = 1 ; 785 task_cnvals++ ; 786 } 787 } 788 } 789 } 790 cnvals += task_cnvals ; 791 } 792 } 793 } 794 795 C->nvals = cnvals ; 796 } 797 798