1 //------------------------------------------------------------------------------ 2 // GB_AxB_saxpy3_fineHash_phase2: C=A*B (or with M packed), fine Hash, phase2 3 //------------------------------------------------------------------------------ 4 5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 6 // SPDX-License-Identifier: Apache-2.0 7 8 //------------------------------------------------------------------------------ 9 10 { 11 12 //-------------------------------------------------------------------------- 13 // phase2: fine hash task, C(:,j)=A*B(:,j) 14 //-------------------------------------------------------------------------- 15 16 // Given Hf [hash] split into (h,f) 17 18 // h == 0 , f == 0 : unlocked and unoccupied. 19 // h == i+1, f == 2 : unlocked, occupied by C(i,j). Hx is initialized. 20 // h == ..., f == 3 : locked. 21 22 // 0 -> 3 : to lock, if i seen for first time 23 // 2 -> 3 : to lock, if i seen already 24 // 3 -> 2 : to unlock; now i has been seen 25 26 // The mask M can be optionally checked, if it is dense and checked in 27 // place. This method is not used if M is present and sparse. 28 29 if (team_size == 1) 30 { 31 32 //---------------------------------------------------------------------- 33 // single-threaded version 34 //---------------------------------------------------------------------- 35 36 // the hash state 3 is not used, since only a single thread is 37 // doing all the work for this vector C(:,j). 38 39 // 0: if i seen for first time 40 // 2: if i seen already 41 42 for ( ; pB < pB_end ; pB++) // scan B(:,j) 43 { 44 GB_GET_B_kj_INDEX ; // get index k of B(k,j) 45 GB_GET_A_k ; // get A(:,k) 46 if (aknz == 0) continue ; 47 GB_GET_B_kj ; // bkj = B(k,j) 48 // scan A(:,k) 49 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 50 { 51 GB_GET_A_ik_INDEX ; // get index i of A(i,j) 52 #ifdef GB_CHECK_MASK_ij 53 // check mask condition and skip if C(i,j) 54 // is protected by the mask 55 GB_CHECK_MASK_ij ; 56 #endif 57 GB_MULT_A_ik_B_kj ; // t = A(i,k) * B(k,j) 58 int64_t i_unlocked = ((i+1) << 2) + 2 ; // (i+1,2) 59 // find the entry i in the hash table 60 bool hf_unlocked = false ; // true if i found 61 bool hf_empty = false ; // true if empty slot found 62 int64_t hash ; 63 for (hash = GB_HASHF (i) ; ; GB_REHASH (hash,i)) 64 { 65 int64_t hf = Hf [hash] ; // grab the entry 66 hf_unlocked = (hf == i_unlocked) ; 67 hf_empty = (hf == 0) ; 68 if (hf_unlocked || hf_empty) break ; 69 } 70 if (hf_unlocked) // if true, update C(i,j) 71 { 72 // hash entry occuppied by C(i,j): update it 73 GB_HX_UPDATE (hash, t) ; // Hx [hash] += t 74 } 75 else // hf_empty: if true, load the hash entry with C(i,j) 76 { 77 // hash entry unoccuppied: fill it with C(i,j) 78 ASSERT (hf_empty) ; 79 GB_HX_WRITE (hash, t) ; // Hx [hash] = t 80 Hf [hash] = i_unlocked ; // unlock entry 81 } 82 } 83 } 84 85 } 86 else 87 { 88 89 //---------------------------------------------------------------------- 90 // multi-threaded version 91 //---------------------------------------------------------------------- 92 93 for ( ; pB < pB_end ; pB++) // scan B(:,j) 94 { 95 GB_GET_B_kj_INDEX ; // get index k of B(k,j) 96 GB_GET_A_k ; // get A(:,k) 97 if (aknz == 0) continue ; 98 GB_GET_B_kj ; // bkj = B(k,j) 99 // scan A(:,k) 100 for (int64_t pA = pA_start ; pA < pA_end ; pA++) 101 { 102 GB_GET_A_ik_INDEX ; // get index i of A(i,j) 103 #ifdef GB_CHECK_MASK_ij 104 // check mask condition and skip if C(i,j) 105 // is protected by the mask 106 GB_CHECK_MASK_ij ; 107 #endif 108 GB_MULT_A_ik_B_kj ; // t = A(i,k) * B(k,j) 109 int64_t i1 = i + 1 ; // i1 = one-based index 110 int64_t i_unlocked = (i1 << 2) + 2 ; // (i+1,2) 111 for (GB_HASH (i)) // find i in hash table 112 { 113 int64_t hf ; 114 GB_ATOMIC_READ 115 hf = Hf [hash] ; // grab the entry 116 #if GB_HAS_ATOMIC 117 if (hf == i_unlocked) // if true, update C(i,j) 118 { 119 GB_ATOMIC_UPDATE_HX (hash, t) ;// Hx [.]+=t 120 break ; // C(i,j) has been updated 121 } 122 #endif 123 int64_t h = (hf >> 2) ; 124 if (h == 0 || h == i1) 125 { 126 // h=0: unoccupied, h=i1: occupied by i 127 do // lock the entry 128 { 129 // do this atomically: 130 // { hf = Hf [hash] ; Hf [hash] |= 3 ; } 131 GB_ATOMIC_CAPTURE_INT64_OR (hf, Hf [hash], 3) ; 132 } while ((hf & 3) == 3) ; // owner: f=0 or 2 133 134 if (hf == 0) // f == 0 135 { 136 // C(i,j) is a new entry in C(:,j) 137 // Hx [hash] = t 138 GB_ATOMIC_WRITE_HX (hash, t) ; 139 GB_ATOMIC_WRITE 140 Hf [hash] = i_unlocked ; // unlock entry 141 break ; 142 } 143 if (hf == i_unlocked) // f == 2 144 { 145 // C(i,j) already appears in C(:,j) 146 // Hx [hash] += t 147 GB_ATOMIC_UPDATE_HX (hash, t) ; 148 GB_ATOMIC_WRITE 149 Hf [hash] = i_unlocked ; // unlock entry 150 break ; 151 } 152 153 // hash table occupied, but not with i 154 GB_ATOMIC_WRITE 155 Hf [hash] = hf ; // unlock with prior value 156 } 157 } 158 } 159 } 160 } 161 } 162 163