1 //------------------------------------------------------------------------------ 2 // GB_AxB_saxpy3_fineGus_M_phase2: C<M>=A*B, fine Gustavson, phase2 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 12 //-------------------------------------------------------------------------- 13 // phase2: fine Gustavson task, C(:,j)<M(:,j)>=A*B(:,j) 14 //-------------------------------------------------------------------------- 15 16 // Hf [i] is 0 if M(i,j) not present or M(i,j)=0. 17 // 0 -> 1 : has already been done in phase0 if M(i,j)=1. 18 19 // 0 -> 0 : to ignore, if M(i,j)=0 20 // 1 -> 3 : to lock, if i seen for first time 21 // 2 -> 3 : to lock, if i seen already 22 // 3 -> 2 : to unlock; now i has been seen 23 24 GB_GET_M_j ; // get M(:,j) 25 GB_GET_M_j_RANGE (16) ; // get first and last in M(:,j) 26 for ( ; pB < pB_end ; pB++) // scan B(:,j) 27 { 28 GB_GET_B_kj_INDEX ; // get index k of B(k,j) 29 GB_GET_A_k ; // get A(:,k) 30 if (aknz == 0) continue ; 31 GB_GET_B_kj ; // bkj = B(k,j) 32 33 #if GB_IS_ANY_MONOID 34 35 //------------------------------------------------------------------ 36 // C(i,j) += A(i,k)*B(k,j) ; with the ANY monoid 37 //------------------------------------------------------------------ 38 39 #define GB_IKJ \ 40 int8_t f ; \ 41 GB_ATOMIC_READ \ 42 f = Hf [i] ; /* grab the entry */ \ 43 if (f == 0 || f == 2) continue ; \ 44 GB_ATOMIC_WRITE \ 45 Hf [i] = 2 ; /* unlock the entry */ \ 46 GB_MULT_A_ik_B_kj ; /* t = A(i,k) * B(k,j) */ \ 47 GB_ATOMIC_WRITE_HX (i, t) ; /* Hx [i] = t */ 48 49 #else 50 51 //------------------------------------------------------------------ 52 // C(i,j) += A(i,k)*B(k,j) ; all other monoids 53 //------------------------------------------------------------------ 54 55 #define GB_IKJ \ 56 { \ 57 GB_MULT_A_ik_B_kj ; /* t = A(i,k) * B(k,j) */ \ 58 int8_t f ; \ 59 GB_ATOMIC_READ \ 60 f = Hf [i] ; /* grab the entry */ \ 61 if (GB_HAS_ATOMIC && (f == 2)) \ 62 { \ 63 /* C(i,j) already seen; update it */ \ 64 GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */ \ 65 continue ; /* C(i,j) has been updated */ \ 66 } \ 67 if (f == 0) continue ; /* M(i,j)=0; ignore C(i,j)*/\ 68 do /* lock the entry */ \ 69 { \ 70 /* do this atomically: */ \ 71 /* { f = Hf [i] ; Hf [i] = 3 ; } */ \ 72 GB_ATOMIC_CAPTURE_INT8 (f, Hf [i], 3) ; \ 73 } while (f == 3) ; /* lock owner gets f=1 or 2 */ \ 74 if (f == 1) \ 75 { \ 76 /* C(i,j) is a new entry */ \ 77 GB_ATOMIC_WRITE_HX (i, t) ; /* Hx [i] = t */ \ 78 } \ 79 else /* f == 2 */ \ 80 { \ 81 /* C(i,j) already appears in C(:,j) */ \ 82 GB_ATOMIC_UPDATE_HX (i, t) ; /* Hx [i] += t */ \ 83 } \ 84 GB_ATOMIC_WRITE \ 85 Hf [i] = 2 ; /* unlock the entry */ \ 86 } 87 88 #endif 89 90 GB_SCAN_M_j_OR_A_k (A_ok_for_binary_search) ; 91 #undef GB_IKJ 92 } 93 } 94 95