1 //------------------------------------------------------------------------------ 2 // GB_emult_01_template: C=A.*B, C<M or !M>=A.*B when C is sparse/hyper 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 // Computes C=A.*B, C<M>=A.*B, or C<!M>=A.*B when C is sparse or hypersparse: 11 12 // phase1: does not compute C itself, but just counts the # of entries in each 13 // vector of C. Fine tasks compute the # of entries in their slice of a 14 // single vector of C, and the results are cumsum'd. 15 16 // phase2: computes C, using the counts computed by phase1. 17 18 // No input matrix can be jumbled, and C is constructed as unjumbled. 19 20 // The following cases are handled: 21 22 // ------------------------------------------ 23 // C = A .* B 24 // ------------------------------------------ 25 // sparse . sparse sparse (method: 01) 26 27 // ------------------------------------------ 28 // C <M>= A .* B 29 // ------------------------------------------ 30 // sparse sparse sparse sparse (method: 01) 31 // sparse bitmap sparse sparse (method: 01) 32 // sparse full sparse sparse (method: 01) 33 // sparse sparse sparse bitmap (04a or 02a) 34 // sparse sparse sparse full (04a or 02a) 35 // sparse sparse bitmap sparse (04b or 02b) 36 // sparse sparse full sparse (04b or 02b) 37 38 // ------------------------------------------ 39 // C <!M>= A .* B 40 // ------------------------------------------ 41 // sparse sparse sparse sparse (01: M later) 42 // sparse bitmap sparse sparse (method: 01) 43 // sparse full sparse sparse (method: 01) 44 45 // The 04a and 04b methods are not yet implemented. This method handles 46 // those cases. Methods 02a and 02b can be used as well, but only if M is 47 // applied later. See GB_emult_sparsity for this decision. 48 49 { 50 51 int taskid ; 52 #pragma omp parallel for num_threads(C_nthreads) schedule(dynamic,1) 53 for (taskid = 0 ; taskid < C_ntasks ; taskid++) 54 { 55 56 //---------------------------------------------------------------------- 57 // get the task descriptor 58 //---------------------------------------------------------------------- 59 60 int64_t kfirst = TaskList [taskid].kfirst ; 61 int64_t klast = TaskList [taskid].klast ; 62 bool fine_task = (klast == -1) ; 63 int64_t len ; 64 if (fine_task) 65 { 66 // a fine task operates on a slice of a single vector 67 klast = kfirst ; 68 len = TaskList [taskid].len ; 69 } 70 else 71 { 72 // a coarse task operates on one or more whole vectors 73 len = vlen ; 74 } 75 76 //---------------------------------------------------------------------- 77 // compute all vectors in this task 78 //---------------------------------------------------------------------- 79 80 for (int64_t k = kfirst ; k <= klast ; k++) 81 { 82 83 //------------------------------------------------------------------ 84 // get j, the kth vector of C 85 //------------------------------------------------------------------ 86 87 int64_t j = GBH (Ch, k) ; 88 89 #if defined ( GB_PHASE_1_OF_2 ) 90 int64_t cjnz = 0 ; 91 #else 92 int64_t pC, pC_end ; 93 if (fine_task) 94 { 95 // A fine task computes a slice of C(:,j) 96 pC = TaskList [taskid ].pC ; 97 pC_end = TaskList [taskid+1].pC ; 98 ASSERT (Cp [k] <= pC && pC <= pC_end && pC_end <= Cp [k+1]) ; 99 } 100 else 101 { 102 // The vectors of C are never sliced for a coarse task. 103 pC = Cp [k] ; 104 pC_end = Cp [k+1] ; 105 } 106 int64_t cjnz = pC_end - pC ; 107 if (cjnz == 0) continue ; 108 #endif 109 110 //------------------------------------------------------------------ 111 // get A(:,j) 112 //------------------------------------------------------------------ 113 114 int64_t pA = -1, pA_end = -1 ; 115 if (fine_task) 116 { 117 // A fine task operates on Ai,Ax [pA...pA_end-1], which is 118 // a subset of the vector A(:,j) 119 pA = TaskList [taskid].pA ; 120 pA_end = TaskList [taskid].pA_end ; 121 } 122 else 123 { 124 // A coarse task operates on the entire vector A (:,j) 125 int64_t kA = (Ch == Ah) ? k : 126 ((C_to_A == NULL) ? j : C_to_A [k]) ; 127 if (kA >= 0) 128 { 129 pA = GBP (Ap, kA, vlen) ; 130 pA_end = GBP (Ap, kA+1, vlen) ; 131 } 132 } 133 134 int64_t ajnz = pA_end - pA ; // nnz in A(:,j) for this slice 135 int64_t pA_start = pA ; 136 bool adense = (ajnz == len) ; 137 138 // get the first and last indices in A(:,j) for this vector 139 int64_t iA_first = -1 ; 140 if (ajnz > 0) 141 { 142 iA_first = GBI (Ai, pA, vlen) ; 143 } 144 #if defined ( GB_PHASE_1_OF_2 ) || defined ( GB_DEBUG ) 145 int64_t iA_last = -1 ; 146 if (ajnz > 0) 147 { 148 iA_last = GBI (Ai, pA_end-1, vlen) ; 149 } 150 #endif 151 152 //------------------------------------------------------------------ 153 // get B(:,j) 154 //------------------------------------------------------------------ 155 156 int64_t pB = -1, pB_end = -1 ; 157 if (fine_task) 158 { 159 // A fine task operates on Bi,Bx [pB...pB_end-1], which is 160 // a subset of the vector B(:,j) 161 pB = TaskList [taskid].pB ; 162 pB_end = TaskList [taskid].pB_end ; 163 } 164 else 165 { 166 // A coarse task operates on the entire vector B (:,j) 167 int64_t kB = (Ch == Bh) ? k : 168 ((C_to_B == NULL) ? j : C_to_B [k]) ; 169 if (kB >= 0) 170 { 171 pB = GBP (Bp, kB, vlen) ; 172 pB_end = GBP (Bp, kB+1, vlen) ; 173 } 174 } 175 176 int64_t bjnz = pB_end - pB ; // nnz in B(:,j) for this slice 177 int64_t pB_start = pB ; 178 bool bdense = (bjnz == len) ; 179 180 // get the first and last indices in B(:,j) for this vector 181 int64_t iB_first = -1 ; 182 if (bjnz > 0) 183 { 184 iB_first = GBI (Bi, pB, vlen) ; 185 } 186 #if defined ( GB_PHASE_1_OF_2 ) || defined ( GB_DEBUG ) 187 int64_t iB_last = -1 ; 188 if (bjnz > 0) 189 { 190 iB_last = GBI (Bi, pB_end-1, vlen) ; 191 } 192 #endif 193 194 //------------------------------------------------------------------ 195 // get M(:,j) if M is sparse or hypersparse 196 //------------------------------------------------------------------ 197 198 int64_t pM = -1 ; 199 int64_t pM_end = -1 ; 200 if (M_is_sparse_or_hyper) 201 { 202 if (fine_task) 203 { 204 // A fine task operates on Mi,Mx [pM...pM_end-1], which is 205 // a subset of the vector M(:,j) 206 pM = TaskList [taskid].pM ; 207 pM_end = TaskList [taskid].pM_end ; 208 } 209 else 210 { 211 int64_t kM = -1 ; 212 if (Ch == Mh) 213 { 214 // Ch is the same as Mh (a shallow copy), or both NULL 215 kM = k ; 216 } 217 else 218 { 219 kM = (C_to_M == NULL) ? j : C_to_M [k] ; 220 } 221 if (kM >= 0) 222 { 223 pM = GBP (Mp, kM, vlen) ; 224 pM_end = GBP (Mp, kM+1, vlen) ; 225 } 226 } 227 } 228 229 //------------------------------------------------------------------ 230 // C(:,j)<optional mask> = A (:,j) .* B (:,j) or subvector 231 //------------------------------------------------------------------ 232 233 #if defined ( GB_PHASE_1_OF_2 ) 234 235 if (ajnz == 0 || bjnz == 0) 236 { 237 238 //-------------------------------------------------------------- 239 // A(:,j) and/or B(:,j) are empty 240 //-------------------------------------------------------------- 241 242 ; 243 244 } 245 else if (iA_last < iB_first || iB_last < iA_first) 246 { 247 248 //-------------------------------------------------------------- 249 // intersection of A(:,j) and B(:,j) is empty 250 //-------------------------------------------------------------- 251 252 // the last entry of A(:,j) comes before the first entry 253 // of B(:,j), or visa versa 254 ; 255 256 } 257 else 258 259 #endif 260 261 if (M == NULL) 262 { 263 264 //-------------------------------------------------------------- 265 // C = A.*B 266 //-------------------------------------------------------------- 267 268 // ------------------------------------------ 269 // C = A .* B 270 // ------------------------------------------ 271 // sparse . sparse sparse (method: 01) 272 // sparse sparse sparse sparse (01 M later) 273 274 // both A and B are sparse/hyper 275 ASSERT (A_is_sparse || A_is_hyper) ; 276 ASSERT (B_is_sparse || B_is_hyper) ; 277 278 if (ajnz > 32 * bjnz) 279 { 280 281 //---------------------------------------------------------- 282 // A(:,j) is much denser than B(:,j) 283 //---------------------------------------------------------- 284 285 for ( ; pB < pB_end ; pB++) 286 { 287 int64_t i = Bi [pB] ; 288 // find i in A(:,j) 289 int64_t pright = pA_end - 1 ; 290 bool found ; 291 GB_BINARY_SEARCH (i, Ai, pA, pright, found) ; 292 if (found) 293 { 294 // C (i,j) = A (i,j) .* B (i,j) 295 #if defined ( GB_PHASE_1_OF_2 ) 296 cjnz++ ; 297 #else 298 ASSERT (pC < pC_end) ; 299 Ci [pC] = i ; 300 GB_GETA (aij, Ax, pA) ; 301 GB_GETB (bij, Bx, pB) ; 302 GB_BINOP (GB_CX (pC), aij, bij, i, j) ; 303 pC++ ; 304 #endif 305 } 306 } 307 #if defined ( GB_PHASE_2_OF_2 ) 308 ASSERT (pC == pC_end) ; 309 #endif 310 311 } 312 else if (bjnz > 32 * ajnz) 313 { 314 315 //---------------------------------------------------------- 316 // B(:,j) is much denser than A(:,j) 317 //---------------------------------------------------------- 318 319 for ( ; pA < pA_end ; pA++) 320 { 321 int64_t i = Ai [pA] ; 322 // find i in B(:,j) 323 int64_t pright = pB_end - 1 ; 324 bool found ; 325 GB_BINARY_SEARCH (i, Bi, pB, pright, found) ; 326 if (found) 327 { 328 // C (i,j) = A (i,j) .* B (i,j) 329 #if defined ( GB_PHASE_1_OF_2 ) 330 cjnz++ ; 331 #else 332 ASSERT (pC < pC_end) ; 333 Ci [pC] = i ; 334 GB_GETA (aij, Ax, pA) ; 335 GB_GETB (bij, Bx, pB) ; 336 GB_BINOP (GB_CX (pC), aij, bij, i, j) ; 337 pC++ ; 338 #endif 339 } 340 } 341 #if defined ( GB_PHASE_2_OF_2 ) 342 ASSERT (pC == pC_end) ; 343 #endif 344 345 } 346 else 347 { 348 349 //---------------------------------------------------------- 350 // A(:,j) and B(:,j) about the sparsity 351 //---------------------------------------------------------- 352 353 // linear-time scan of A(:,j) and B(:,j) 354 355 while (pA < pA_end && pB < pB_end) 356 { 357 int64_t iA = Ai [pA] ; 358 int64_t iB = Bi [pB] ; 359 if (iA < iB) 360 { 361 // A(i,j) exists but not B(i,j) 362 pA++ ; 363 } 364 else if (iB < iA) 365 { 366 // B(i,j) exists but not A(i,j) 367 pB++ ; 368 } 369 else 370 { 371 // both A(i,j) and B(i,j) exist 372 // C (i,j) = A (i,j) .* B (i,j) 373 #if defined ( GB_PHASE_1_OF_2 ) 374 cjnz++ ; 375 #else 376 ASSERT (pC < pC_end) ; 377 Ci [pC] = iB ; 378 GB_GETA (aij, Ax, pA) ; 379 GB_GETB (bij, Bx, pB) ; 380 GB_BINOP (GB_CX (pC), aij, bij, iB, j) ; 381 pC++ ; 382 #endif 383 pA++ ; 384 pB++ ; 385 } 386 } 387 388 #if defined ( GB_PHASE_2_OF_2 ) 389 ASSERT (pC == pC_end) ; 390 #endif 391 } 392 393 } 394 else if (M_is_sparse_or_hyper) 395 { 396 397 //-------------------------------------------------------------- 398 // C and M are sparse or hypersparse 399 //-------------------------------------------------------------- 400 401 // ------------------------------------------ 402 // C <M>= A .* B 403 // ------------------------------------------ 404 // sparse sparse sparse sparse (method: 01) 405 // sparse sparse sparse bitmap (04a or 02a) 406 // sparse sparse sparse full (04a or 02a) 407 // sparse sparse bitmap sparse (04b or 02b) 408 // sparse sparse full sparse (04b or 02b) 409 410 // Method 04 is not yet implemented; using this method 411 // (GB_emult_01) instead. 412 413 // ether A or B are sparse/hyper 414 ASSERT (A_is_sparse || A_is_hyper || B_is_sparse || B_is_hyper); 415 416 for ( ; pM < pM_end ; pM++) 417 { 418 419 //---------------------------------------------------------- 420 // get M(i,j) for A(i,j) .* B (i,j) 421 //---------------------------------------------------------- 422 423 int64_t i = GBI (Mi, pM, vlen) ; 424 bool mij = GB_mcast (Mx, pM, msize) ; 425 if (!mij) continue ; 426 427 //---------------------------------------------------------- 428 // get A(i,j) 429 //---------------------------------------------------------- 430 431 bool afound ; 432 if (adense) 433 { 434 // A(:,j) is dense, bitmap, or full; use quick lookup 435 pA = pA_start + i - iA_first ; 436 afound = GBB (Ab, pA) ; 437 } 438 else 439 { 440 // A(:,j) is sparse; use binary search for A(i,j) 441 int64_t apright = pA_end - 1 ; 442 GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ; 443 } 444 if (!afound) continue ; 445 ASSERT (GBI (Ai, pA, vlen) == i) ; 446 447 //---------------------------------------------------------- 448 // get B(i,j) 449 //---------------------------------------------------------- 450 451 bool bfound ; 452 if (bdense) 453 { 454 // B(:,j) is dense; use direct lookup for B(i,j) 455 pB = pB_start + i - iB_first ; 456 bfound = GBB (Bb, pB) ; 457 } 458 else 459 { 460 // B(:,j) is sparse; use binary search for B(i,j) 461 int64_t bpright = pB_end - 1 ; 462 GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ; 463 } 464 if (!bfound) continue ; 465 ASSERT (GBI (Bi, pB, vlen) == i) ; 466 467 //---------------------------------------------------------- 468 // C(i,j) = A(i,j) .* B(i,j) 469 //---------------------------------------------------------- 470 471 // C (i,j) = A (i,j) .* B (i,j) 472 #if defined ( GB_PHASE_1_OF_2 ) 473 cjnz++ ; 474 #else 475 Ci [pC] = i ; 476 GB_GETA (aij, Ax, pA) ; 477 GB_GETB (bij, Bx, pB) ; 478 GB_BINOP (GB_CX (pC), aij, bij, i, j) ; 479 pC++ ; 480 #endif 481 } 482 483 #if defined ( GB_PHASE_2_OF_2 ) 484 ASSERT (pC == pC_end) ; 485 #endif 486 487 } 488 else 489 { 490 491 //-------------------------------------------------------------- 492 // M is bitmap or full, for either C<M>=A.*B or C<!M>=A.*B 493 //-------------------------------------------------------------- 494 495 // ------------------------------------------ 496 // C <M>= A .* B 497 // ------------------------------------------ 498 // sparse bitmap sparse sparse (method: 01) 499 // sparse full sparse sparse (method: 01) 500 501 // ------------------------------------------ 502 // C <!M>= A .* B 503 // ------------------------------------------ 504 // sparse bitmap sparse sparse (method: 01) 505 // sparse full sparse sparse (method: 01) 506 507 // GB_GET_MIJ: get M(i,j) where M is bitmap or full 508 #undef GB_GET_MIJ 509 #define GB_GET_MIJ(i) \ 510 int64_t pM = pM_start + i ; \ 511 bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ; \ 512 if (Mask_comp) mij = !mij ; 513 514 // both A and B are sparse/hyper 515 ASSERT (A_is_sparse || A_is_hyper) ; 516 ASSERT (B_is_sparse || B_is_hyper) ; 517 518 int64_t pM_start = j * vlen ; 519 520 if (ajnz > 32 * bjnz) 521 { 522 523 //---------------------------------------------------------- 524 // A(:,j) much denser than B(:,j), M bitmap/full 525 //---------------------------------------------------------- 526 527 for ( ; pB < pB_end ; pB++) 528 { 529 int64_t i = Bi [pB] ; 530 GB_GET_MIJ (i) ; 531 if (mij) 532 { 533 // find i in A(:,j) 534 int64_t pright = pA_end - 1 ; 535 bool found ; 536 GB_BINARY_SEARCH (i, Ai, pA, pright, found) ; 537 if (found) 538 { 539 // C (i,j) = A (i,j) .* B (i,j) 540 #if defined ( GB_PHASE_1_OF_2 ) 541 cjnz++ ; 542 #else 543 ASSERT (pC < pC_end) ; 544 Ci [pC] = i ; 545 GB_GETA (aij, Ax, pA) ; 546 GB_GETB (bij, Bx, pB) ; 547 GB_BINOP (GB_CX (pC), aij, bij, i, j) ; 548 pC++ ; 549 #endif 550 } 551 } 552 } 553 554 #if defined ( GB_PHASE_2_OF_2 ) 555 ASSERT (pC == pC_end) ; 556 #endif 557 558 } 559 else if (bjnz > 32 * ajnz) 560 { 561 562 //---------------------------------------------------------- 563 // B(:,j) much denser than A(:,j), M bitmap/full 564 //---------------------------------------------------------- 565 566 for ( ; pA < pA_end ; pA++) 567 { 568 int64_t i = Ai [pA] ; 569 GB_GET_MIJ (i) ; 570 if (mij) 571 { 572 573 // find i in B(:,j) 574 int64_t pright = pB_end - 1 ; 575 bool found ; 576 GB_BINARY_SEARCH (i, Bi, pB, pright, found) ; 577 if (found) 578 { 579 // C (i,j) = A (i,j) .* B (i,j) 580 #if defined ( GB_PHASE_1_OF_2 ) 581 cjnz++ ; 582 #else 583 ASSERT (pC < pC_end) ; 584 Ci [pC] = i ; 585 GB_GETA (aij, Ax, pA) ; 586 GB_GETB (bij, Bx, pB) ; 587 GB_BINOP (GB_CX (pC), aij, bij, i, j) ; 588 pC++ ; 589 #endif 590 } 591 } 592 } 593 594 #if defined ( GB_PHASE_2_OF_2 ) 595 ASSERT (pC == pC_end) ; 596 #endif 597 598 } 599 else 600 { 601 602 //---------------------------------------------------------- 603 // A(:,j) and B(:,j) about the same, M bitmap/full 604 //---------------------------------------------------------- 605 606 // linear-time scan of A(:,j) and B(:,j) 607 608 while (pA < pA_end && pB < pB_end) 609 { 610 int64_t iA = Ai [pA] ; 611 int64_t iB = Bi [pB] ; 612 if (iA < iB) 613 { 614 // A(i,j) exists but not B(i,j) 615 pA++ ; 616 } 617 else if (iB < iA) 618 { 619 // B(i,j) exists but not A(i,j) 620 pB++ ; 621 } 622 else 623 { 624 // both A(i,j) and B(i,j) exist 625 int64_t i = iA ; 626 GB_GET_MIJ (i) ; 627 if (mij) 628 { 629 // C (i,j) = A (i,j) .* B (i,j) 630 #if defined ( GB_PHASE_1_OF_2 ) 631 cjnz++ ; 632 #else 633 ASSERT (pC < pC_end) ; 634 Ci [pC] = i ; 635 GB_GETA (aij, Ax, pA) ; 636 GB_GETB (bij, Bx, pB) ; 637 GB_BINOP (GB_CX (pC), aij, bij, iB, j) ; 638 pC++ ; 639 #endif 640 } 641 pA++ ; 642 pB++ ; 643 } 644 } 645 646 #if defined ( GB_PHASE_2_OF_2 ) 647 ASSERT (pC == pC_end) ; 648 #endif 649 } 650 } 651 652 //------------------------------------------------------------------ 653 // final count of nnz (C (:,j)) 654 //------------------------------------------------------------------ 655 656 #if defined ( GB_PHASE_1_OF_2 ) 657 if (fine_task) 658 { 659 TaskList [taskid].pC = cjnz ; 660 } 661 else 662 { 663 Cp [k] = cjnz ; 664 } 665 #endif 666 } 667 } 668 } 669 670