1 //------------------------------------------------------------------------------ 2 // GB_AxB_saxpy3_coarseGus_M_phase5: C<M>=A*B, coarse Gustavson, phase5 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 12 //-------------------------------------------------------------------------- 13 // phase5: coarse Gustavson task, C<M>=A*B 14 //-------------------------------------------------------------------------- 15 16 // Initially, Hf [...] < mark for all of Hf. 17 18 // Hf [i] < mark : M(i,j)=0, C(i,j) is ignored. 19 // Hf [i] == mark : M(i,j)=1, and C(i,j) not yet seen. 20 // Hf [i] == mark+1 : M(i,j)=1, and C(i,j) has been seen. 21 22 for (int64_t kk = kfirst ; kk <= klast ; kk++) 23 { 24 int64_t pC = Cp [kk] ; 25 int64_t cjnz = Cp [kk+1] - pC ; 26 if (cjnz == 0) continue ; // nothing to do 27 GB_GET_B_j ; // get B(:,j) 28 29 #ifndef GB_GENERIC 30 if (cjnz == cvlen) // C(:,j) is dense 31 { 32 // This is not used for the generic saxpy3. 33 GB_COMPUTE_DENSE_C_j ; // C(:,j) = A*B(:,j) 34 continue ; 35 } 36 #endif 37 38 GB_GET_M_j ; // get M(:,j) 39 GB_GET_M_j_RANGE (64) ; // get first and last in M(:,j) 40 mark += 2 ; 41 int64_t mark1 = mark+1 ; 42 43 // scatter M(:,j) into the Gustavson workspace 44 GB_SCATTER_M_j (pM_start, pM_end, mark) ; 45 46 if (16 * cjnz > cvlen) 47 { 48 49 //------------------------------------------------------------------ 50 // C(:,j) is not very sparse 51 //------------------------------------------------------------------ 52 53 for ( ; pB < pB_end ; pB++) // scan B(:,j) 54 { 55 GB_GET_B_kj_INDEX ; // get k of B(k,j) 56 GB_GET_A_k ; // get A(:,k) 57 if (aknz == 0) continue ; 58 GB_GET_B_kj ; // bkj = B(k,j) 59 #define GB_IKJ \ 60 { \ 61 int64_t hf = Hf [i] ; \ 62 if (hf == mark) \ 63 { \ 64 /* C(i,j) = A(i,k) * B(k,j) */ \ 65 Hf [i] = mark1 ; /* mark as seen */\ 66 GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \ 67 GB_HX_WRITE (i, t) ; /* Hx [i] = t */ \ 68 } \ 69 else if (hf == mark1) \ 70 { \ 71 /* C(i,j) += A(i,k) * B(k,j) */ \ 72 GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \ 73 GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \ 74 } \ 75 } 76 GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ; 77 #undef GB_IKJ 78 } 79 GB_GATHER_ALL_C_j(mark1) ; // gather into C(:,j) 80 81 } 82 else 83 { 84 85 //------------------------------------------------------------------ 86 // C(:,j) is very sparse 87 //------------------------------------------------------------------ 88 89 for ( ; pB < pB_end ; pB++) // scan B(:,j) 90 { 91 GB_GET_B_kj_INDEX ; // get k of B(k,j) 92 GB_GET_A_k ; // get A(:,k) 93 if (aknz == 0) continue ; 94 GB_GET_B_kj ; // bkj = B(k,j) 95 #define GB_IKJ \ 96 { \ 97 int64_t hf = Hf [i] ; \ 98 if (hf == mark) \ 99 { \ 100 /* C(i,j) = A(i,k) * B(k,j) */ \ 101 Hf [i] = mark1 ; /* mark as seen */\ 102 GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \ 103 GB_HX_WRITE (i, t) ; /* Hx [i] = t */ \ 104 Ci [pC++] = i ; /* C(:,j) pattern */ \ 105 } \ 106 else if (hf == mark1) \ 107 { \ 108 /* C(i,j) += A(i,k) * B(k,j) */ \ 109 GB_MULT_A_ik_B_kj ; /* t = aik*bkj */ \ 110 GB_HX_UPDATE (i, t) ;/* Hx [i] += t */ \ 111 } \ 112 } 113 GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ; 114 #undef GB_IKJ 115 } 116 GB_SORT_AND_GATHER_C_j ; // gather into C(:,j) 117 } 118 } 119 } 120 121